diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,388117 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9999969938283295, + "eval_steps": 500, + "global_step": 166324, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 51199.96724201283, + "learning_rate": 1.2024048096192385e-08, + "loss": 59.1458, + "step": 3 + }, + { + "epoch": 0.0, + "grad_norm": 32538.341470016014, + "learning_rate": 2.404809619238477e-08, + "loss": 63.3021, + "step": 6 + }, + { + "epoch": 0.0, + "grad_norm": 36643.38325165955, + "learning_rate": 3.607214428857716e-08, + "loss": 60.1094, + "step": 9 + }, + { + "epoch": 0.0, + "grad_norm": 40696.534162003765, + "learning_rate": 4.809619238476954e-08, + "loss": 69.3542, + "step": 12 + }, + { + "epoch": 0.0, + "grad_norm": 30042.684126029402, + "learning_rate": 6.012024048096193e-08, + "loss": 56.3698, + "step": 15 + }, + { + "epoch": 0.0, + "grad_norm": 21130.231221077207, + "learning_rate": 7.214428857715431e-08, + "loss": 60.4115, + "step": 18 + }, + { + "epoch": 0.0, + "grad_norm": 18276.413824807656, + "learning_rate": 8.416833667334669e-08, + "loss": 57.1562, + "step": 21 + }, + { + "epoch": 0.0, + "grad_norm": 17112.000969614648, + "learning_rate": 9.619238476953908e-08, + "loss": 61.0625, + "step": 24 + }, + { + "epoch": 0.0, + "grad_norm": 23225.971830434257, + "learning_rate": 1.0821643286573146e-07, + "loss": 58.3333, + "step": 27 + }, + { + "epoch": 0.0, + "grad_norm": 21715.084746789576, + "learning_rate": 1.2024048096192385e-07, + "loss": 58.8438, + "step": 30 + }, + { + "epoch": 0.0, + "grad_norm": 29022.182129643254, + "learning_rate": 1.3226452905811624e-07, + "loss": 54.9115, + "step": 33 + }, + { + "epoch": 0.0, + "grad_norm": 10499.128258127299, + "learning_rate": 1.4428857715430863e-07, + "loss": 52.2552, + "step": 36 + }, + { + "epoch": 0.0, + "grad_norm": 9066.766248524455, + "learning_rate": 1.5631262525050102e-07, + "loss": 58.4323, + "step": 39 + }, + { + "epoch": 0.0, + "grad_norm": 24611.02218543854, + "learning_rate": 1.6833667334669338e-07, + "loss": 58.349, + "step": 42 + }, + { + "epoch": 0.0, + "grad_norm": 23233.78398504978, + "learning_rate": 1.803607214428858e-07, + "loss": 57.224, + "step": 45 + }, + { + "epoch": 0.0, + "grad_norm": 38569.080813584646, + "learning_rate": 1.9238476953907815e-07, + "loss": 52.724, + "step": 48 + }, + { + "epoch": 0.0, + "grad_norm": 27418.958809669217, + "learning_rate": 2.0440881763527057e-07, + "loss": 62.2188, + "step": 51 + }, + { + "epoch": 0.0, + "grad_norm": 21117.96940866193, + "learning_rate": 2.1643286573146293e-07, + "loss": 64.8542, + "step": 54 + }, + { + "epoch": 0.0, + "grad_norm": 21240.342248890993, + "learning_rate": 2.2845691382765534e-07, + "loss": 57.0521, + "step": 57 + }, + { + "epoch": 0.0, + "grad_norm": 12734.272954662856, + "learning_rate": 2.404809619238477e-07, + "loss": 61.6562, + "step": 60 + }, + { + "epoch": 0.0, + "grad_norm": 61404.81149374131, + "learning_rate": 2.525050100200401e-07, + "loss": 58.1042, + "step": 63 + }, + { + "epoch": 0.0, + "grad_norm": 21555.034845593076, + "learning_rate": 2.645290581162325e-07, + "loss": 50.2344, + "step": 66 + }, + { + "epoch": 0.0, + "grad_norm": 16098.486822577923, + "learning_rate": 2.7655310621242487e-07, + "loss": 57.151, + "step": 69 + }, + { + "epoch": 0.0, + "grad_norm": 26671.753694437404, + "learning_rate": 2.8857715430861726e-07, + "loss": 59.7812, + "step": 72 + }, + { + "epoch": 0.0, + "grad_norm": 237994.82377223932, + "learning_rate": 3.0060120240480964e-07, + "loss": 55.5208, + "step": 75 + }, + { + "epoch": 0.0, + "grad_norm": 19918.08941418328, + "learning_rate": 3.1262525050100203e-07, + "loss": 53.0885, + "step": 78 + }, + { + "epoch": 0.0, + "grad_norm": 17504.885135614422, + "learning_rate": 3.246492985971944e-07, + "loss": 59.125, + "step": 81 + }, + { + "epoch": 0.0, + "grad_norm": 14661.741969982884, + "learning_rate": 3.3667334669338675e-07, + "loss": 50.9167, + "step": 84 + }, + { + "epoch": 0.0, + "grad_norm": 27629.292906885996, + "learning_rate": 3.486973947895792e-07, + "loss": 53.9167, + "step": 87 + }, + { + "epoch": 0.0, + "grad_norm": 21572.118277094316, + "learning_rate": 3.607214428857716e-07, + "loss": 51.151, + "step": 90 + }, + { + "epoch": 0.0, + "grad_norm": 28238.327424206836, + "learning_rate": 3.7274549098196397e-07, + "loss": 47.3906, + "step": 93 + }, + { + "epoch": 0.0, + "grad_norm": 18184.198748039766, + "learning_rate": 3.847695390781563e-07, + "loss": 50.0885, + "step": 96 + }, + { + "epoch": 0.0, + "grad_norm": 13346.263628976498, + "learning_rate": 3.967935871743487e-07, + "loss": 50.4271, + "step": 99 + }, + { + "epoch": 0.0, + "grad_norm": 20519.836570466956, + "learning_rate": 4.0881763527054113e-07, + "loss": 48.0521, + "step": 102 + }, + { + "epoch": 0.0, + "grad_norm": 10635.557469991296, + "learning_rate": 4.208416833667335e-07, + "loss": 45.3229, + "step": 105 + }, + { + "epoch": 0.0, + "grad_norm": 20757.46820127396, + "learning_rate": 4.3286573146292586e-07, + "loss": 46.4375, + "step": 108 + }, + { + "epoch": 0.0, + "grad_norm": 11209.585513309845, + "learning_rate": 4.4488977955911824e-07, + "loss": 44.901, + "step": 111 + }, + { + "epoch": 0.0, + "grad_norm": 13201.275403939371, + "learning_rate": 4.569138276553107e-07, + "loss": 43.4688, + "step": 114 + }, + { + "epoch": 0.0, + "grad_norm": 12139.185358482255, + "learning_rate": 4.6893787575150307e-07, + "loss": 45.6406, + "step": 117 + }, + { + "epoch": 0.0, + "grad_norm": 9552.273224810428, + "learning_rate": 4.809619238476954e-07, + "loss": 44.1771, + "step": 120 + }, + { + "epoch": 0.0, + "grad_norm": 21889.555201641477, + "learning_rate": 4.929859719438878e-07, + "loss": 42.9792, + "step": 123 + }, + { + "epoch": 0.0, + "grad_norm": 37976.97909927038, + "learning_rate": 5.050100200400802e-07, + "loss": 40.6146, + "step": 126 + }, + { + "epoch": 0.0, + "grad_norm": 7517.672389272942, + "learning_rate": 5.170340681362726e-07, + "loss": 40.7083, + "step": 129 + }, + { + "epoch": 0.0, + "grad_norm": 9800.338743947936, + "learning_rate": 5.29058116232465e-07, + "loss": 39.6302, + "step": 132 + }, + { + "epoch": 0.0, + "grad_norm": 31165.30400298196, + "learning_rate": 5.410821643286573e-07, + "loss": 39.6823, + "step": 135 + }, + { + "epoch": 0.0, + "grad_norm": 15661.62046106445, + "learning_rate": 5.531062124248497e-07, + "loss": 38.6615, + "step": 138 + }, + { + "epoch": 0.0, + "grad_norm": 6242.911153959725, + "learning_rate": 5.651302605210421e-07, + "loss": 40.151, + "step": 141 + }, + { + "epoch": 0.0, + "grad_norm": 2330.4456314734985, + "learning_rate": 5.771543086172345e-07, + "loss": 36.0122, + "step": 144 + }, + { + "epoch": 0.0, + "grad_norm": 2835.953567390231, + "learning_rate": 5.891783567134269e-07, + "loss": 33.2708, + "step": 147 + }, + { + "epoch": 0.0, + "grad_norm": 4347.32742142968, + "learning_rate": 6.012024048096193e-07, + "loss": 32.776, + "step": 150 + }, + { + "epoch": 0.0, + "grad_norm": 5181.772761617854, + "learning_rate": 6.132264529058117e-07, + "loss": 33.3125, + "step": 153 + }, + { + "epoch": 0.0, + "grad_norm": 2668.443872601531, + "learning_rate": 6.252505010020041e-07, + "loss": 32.3281, + "step": 156 + }, + { + "epoch": 0.0, + "grad_norm": 2103.358851527844, + "learning_rate": 6.372745490981964e-07, + "loss": 30.5625, + "step": 159 + }, + { + "epoch": 0.0, + "grad_norm": 3182.529492717381, + "learning_rate": 6.492985971943888e-07, + "loss": 29.5521, + "step": 162 + }, + { + "epoch": 0.0, + "grad_norm": 2753.330618578484, + "learning_rate": 6.613226452905812e-07, + "loss": 31.7969, + "step": 165 + }, + { + "epoch": 0.0, + "grad_norm": 5188.093938683464, + "learning_rate": 6.733466933867735e-07, + "loss": 30.6771, + "step": 168 + }, + { + "epoch": 0.0, + "grad_norm": 1777.79682346576, + "learning_rate": 6.853707414829659e-07, + "loss": 32.8802, + "step": 171 + }, + { + "epoch": 0.0, + "grad_norm": 1862.9620993293008, + "learning_rate": 6.973947895791584e-07, + "loss": 30.151, + "step": 174 + }, + { + "epoch": 0.0, + "grad_norm": 5876.430100551337, + "learning_rate": 7.094188376753508e-07, + "loss": 31.9271, + "step": 177 + }, + { + "epoch": 0.0, + "grad_norm": 3349.5899080611784, + "learning_rate": 7.214428857715432e-07, + "loss": 29.9635, + "step": 180 + }, + { + "epoch": 0.0, + "grad_norm": 2173.743258987185, + "learning_rate": 7.334669338677356e-07, + "loss": 29.2812, + "step": 183 + }, + { + "epoch": 0.0, + "grad_norm": 3832.3765127910606, + "learning_rate": 7.454909819639279e-07, + "loss": 30.125, + "step": 186 + }, + { + "epoch": 0.0, + "grad_norm": 1786.9019975233412, + "learning_rate": 7.575150300601203e-07, + "loss": 29.8698, + "step": 189 + }, + { + "epoch": 0.0, + "grad_norm": 2891.3358365571094, + "learning_rate": 7.695390781563126e-07, + "loss": 29.474, + "step": 192 + }, + { + "epoch": 0.0, + "grad_norm": 2999.5298813672325, + "learning_rate": 7.81563126252505e-07, + "loss": 29.7812, + "step": 195 + }, + { + "epoch": 0.0, + "grad_norm": 5589.48292391496, + "learning_rate": 7.935871743486974e-07, + "loss": 27.3594, + "step": 198 + }, + { + "epoch": 0.0, + "grad_norm": 2770.912341967175, + "learning_rate": 8.056112224448899e-07, + "loss": 28.4531, + "step": 201 + }, + { + "epoch": 0.0, + "grad_norm": 2429.496934937241, + "learning_rate": 8.176352705410823e-07, + "loss": 28.5938, + "step": 204 + }, + { + "epoch": 0.0, + "grad_norm": 1012.1826949250576, + "learning_rate": 8.296593186372747e-07, + "loss": 27.4115, + "step": 207 + }, + { + "epoch": 0.0, + "grad_norm": 2007.4069612188255, + "learning_rate": 8.41683366733467e-07, + "loss": 28.1562, + "step": 210 + }, + { + "epoch": 0.0, + "grad_norm": 3629.8441262335314, + "learning_rate": 8.537074148296594e-07, + "loss": 29.362, + "step": 213 + }, + { + "epoch": 0.0, + "grad_norm": 1928.3559539931841, + "learning_rate": 8.657314629258517e-07, + "loss": 28.1302, + "step": 216 + }, + { + "epoch": 0.0, + "grad_norm": 1628.721516279276, + "learning_rate": 8.777555110220441e-07, + "loss": 27.8438, + "step": 219 + }, + { + "epoch": 0.0, + "grad_norm": 5142.2430436750265, + "learning_rate": 8.897795591182365e-07, + "loss": 25.2917, + "step": 222 + }, + { + "epoch": 0.0, + "grad_norm": 1109.3033569536833, + "learning_rate": 9.01803607214429e-07, + "loss": 26.0365, + "step": 225 + }, + { + "epoch": 0.0, + "grad_norm": 3910.031717492708, + "learning_rate": 9.138276553106214e-07, + "loss": 25.5156, + "step": 228 + }, + { + "epoch": 0.0, + "grad_norm": 2200.38711438906, + "learning_rate": 9.258517034068138e-07, + "loss": 28.0469, + "step": 231 + }, + { + "epoch": 0.0, + "grad_norm": 4671.38754366574, + "learning_rate": 9.378757515030061e-07, + "loss": 26.1615, + "step": 234 + }, + { + "epoch": 0.0, + "grad_norm": 2839.0777381423586, + "learning_rate": 9.498997995991984e-07, + "loss": 25.526, + "step": 237 + }, + { + "epoch": 0.0, + "grad_norm": 1276.3141086220419, + "learning_rate": 9.619238476953908e-07, + "loss": 24.8906, + "step": 240 + }, + { + "epoch": 0.0, + "grad_norm": 4387.768477537738, + "learning_rate": 9.739478957915833e-07, + "loss": 27.0677, + "step": 243 + }, + { + "epoch": 0.0, + "grad_norm": 1163.3496870164533, + "learning_rate": 9.859719438877756e-07, + "loss": 26.2708, + "step": 246 + }, + { + "epoch": 0.0, + "grad_norm": 1154.6516460164034, + "learning_rate": 9.979959919839679e-07, + "loss": 25.651, + "step": 249 + }, + { + "epoch": 0.0, + "grad_norm": 890.2708400380467, + "learning_rate": 1.0100200400801604e-06, + "loss": 25.1198, + "step": 252 + }, + { + "epoch": 0.0, + "grad_norm": 2427.693691716473, + "learning_rate": 1.0220440881763529e-06, + "loss": 24.4948, + "step": 255 + }, + { + "epoch": 0.0, + "grad_norm": 1403.720302552582, + "learning_rate": 1.0340681362725451e-06, + "loss": 24.6823, + "step": 258 + }, + { + "epoch": 0.0, + "grad_norm": 1039.7276684882327, + "learning_rate": 1.0460921843687376e-06, + "loss": 23.6823, + "step": 261 + }, + { + "epoch": 0.0, + "grad_norm": 781.093679687151, + "learning_rate": 1.05811623246493e-06, + "loss": 24.4583, + "step": 264 + }, + { + "epoch": 0.0, + "grad_norm": 988.4566377411879, + "learning_rate": 1.0701402805611224e-06, + "loss": 23.2708, + "step": 267 + }, + { + "epoch": 0.0, + "grad_norm": 2096.2653933781353, + "learning_rate": 1.0821643286573147e-06, + "loss": 23.6406, + "step": 270 + }, + { + "epoch": 0.0, + "grad_norm": 1473.9930678301216, + "learning_rate": 1.094188376753507e-06, + "loss": 22.8646, + "step": 273 + }, + { + "epoch": 0.0, + "grad_norm": 2153.6061662355723, + "learning_rate": 1.1062124248496995e-06, + "loss": 22.0781, + "step": 276 + }, + { + "epoch": 0.0, + "grad_norm": 707.442074536556, + "learning_rate": 1.118236472945892e-06, + "loss": 23.8073, + "step": 279 + }, + { + "epoch": 0.0, + "grad_norm": 1041.084204429105, + "learning_rate": 1.1302605210420842e-06, + "loss": 22.5104, + "step": 282 + }, + { + "epoch": 0.0, + "grad_norm": 889.0515507528611, + "learning_rate": 1.1422845691382767e-06, + "loss": 24.3646, + "step": 285 + }, + { + "epoch": 0.0, + "grad_norm": 1523.6507333400723, + "learning_rate": 1.154308617234469e-06, + "loss": 22.1667, + "step": 288 + }, + { + "epoch": 0.0, + "grad_norm": 741.2497298037591, + "learning_rate": 1.1663326653306615e-06, + "loss": 21.3073, + "step": 291 + }, + { + "epoch": 0.0, + "grad_norm": 854.3058385198258, + "learning_rate": 1.1783567134268538e-06, + "loss": 22.0312, + "step": 294 + }, + { + "epoch": 0.0, + "grad_norm": 2409.8898736737615, + "learning_rate": 1.190380761523046e-06, + "loss": 21.1615, + "step": 297 + }, + { + "epoch": 0.0, + "grad_norm": 749.5130913783704, + "learning_rate": 1.2024048096192386e-06, + "loss": 21.599, + "step": 300 + }, + { + "epoch": 0.0, + "grad_norm": 2047.9658263710144, + "learning_rate": 1.2144288577154309e-06, + "loss": 20.849, + "step": 303 + }, + { + "epoch": 0.0, + "grad_norm": 4388.569814882391, + "learning_rate": 1.2264529058116233e-06, + "loss": 20.4115, + "step": 306 + }, + { + "epoch": 0.0, + "grad_norm": 676.0941563095772, + "learning_rate": 1.2384769539078158e-06, + "loss": 19.875, + "step": 309 + }, + { + "epoch": 0.0, + "grad_norm": 896.6618159161494, + "learning_rate": 1.2505010020040081e-06, + "loss": 20.0833, + "step": 312 + }, + { + "epoch": 0.0, + "grad_norm": 990.2346199178942, + "learning_rate": 1.2625250501002004e-06, + "loss": 19.5208, + "step": 315 + }, + { + "epoch": 0.0, + "grad_norm": 897.7746680521537, + "learning_rate": 1.274549098196393e-06, + "loss": 18.7448, + "step": 318 + }, + { + "epoch": 0.0, + "grad_norm": 795.7924424355208, + "learning_rate": 1.2865731462925854e-06, + "loss": 18.526, + "step": 321 + }, + { + "epoch": 0.0, + "grad_norm": 1224.7777657789804, + "learning_rate": 1.2985971943887777e-06, + "loss": 18.7344, + "step": 324 + }, + { + "epoch": 0.0, + "grad_norm": 1027.4833912636905, + "learning_rate": 1.3106212424849702e-06, + "loss": 17.1276, + "step": 327 + }, + { + "epoch": 0.0, + "grad_norm": 790.6770902016307, + "learning_rate": 1.3226452905811624e-06, + "loss": 18.276, + "step": 330 + }, + { + "epoch": 0.0, + "grad_norm": 1193.2941751726505, + "learning_rate": 1.334669338677355e-06, + "loss": 18.0, + "step": 333 + }, + { + "epoch": 0.0, + "grad_norm": 1062.246901813828, + "learning_rate": 1.346693386773547e-06, + "loss": 17.5339, + "step": 336 + }, + { + "epoch": 0.0, + "grad_norm": 1317.5030320935996, + "learning_rate": 1.3587174348697397e-06, + "loss": 17.8984, + "step": 339 + }, + { + "epoch": 0.0, + "grad_norm": 876.5808468652473, + "learning_rate": 1.3707414829659318e-06, + "loss": 16.7682, + "step": 342 + }, + { + "epoch": 0.0, + "grad_norm": 611.4316579621134, + "learning_rate": 1.3827655310621243e-06, + "loss": 17.2292, + "step": 345 + }, + { + "epoch": 0.0, + "grad_norm": 3358.3884117372168, + "learning_rate": 1.3947895791583168e-06, + "loss": 17.4062, + "step": 348 + }, + { + "epoch": 0.0, + "grad_norm": 617.6141796174542, + "learning_rate": 1.406813627254509e-06, + "loss": 16.2734, + "step": 351 + }, + { + "epoch": 0.0, + "grad_norm": 442.3322238426687, + "learning_rate": 1.4188376753507016e-06, + "loss": 15.612, + "step": 354 + }, + { + "epoch": 0.0, + "grad_norm": 1601.7626086545108, + "learning_rate": 1.4308617234468938e-06, + "loss": 16.5365, + "step": 357 + }, + { + "epoch": 0.0, + "grad_norm": 584.6840646484202, + "learning_rate": 1.4428857715430863e-06, + "loss": 15.9297, + "step": 360 + }, + { + "epoch": 0.0, + "grad_norm": 525.955066288189, + "learning_rate": 1.4549098196392786e-06, + "loss": 15.2734, + "step": 363 + }, + { + "epoch": 0.0, + "grad_norm": 519.5463033958445, + "learning_rate": 1.466933867735471e-06, + "loss": 14.6224, + "step": 366 + }, + { + "epoch": 0.0, + "grad_norm": 838.9209381791715, + "learning_rate": 1.4789579158316634e-06, + "loss": 14.7474, + "step": 369 + }, + { + "epoch": 0.0, + "grad_norm": 1240.1964394682077, + "learning_rate": 1.4909819639278559e-06, + "loss": 14.8047, + "step": 372 + }, + { + "epoch": 0.0, + "grad_norm": 1856.7437554270325, + "learning_rate": 1.5030060120240484e-06, + "loss": 14.8359, + "step": 375 + }, + { + "epoch": 0.0, + "grad_norm": 2058.0135808501914, + "learning_rate": 1.5150300601202407e-06, + "loss": 13.763, + "step": 378 + }, + { + "epoch": 0.0, + "grad_norm": 903.7862428995271, + "learning_rate": 1.5270541082164331e-06, + "loss": 15.2422, + "step": 381 + }, + { + "epoch": 0.0, + "grad_norm": 524.448729772175, + "learning_rate": 1.5390781563126252e-06, + "loss": 14.0729, + "step": 384 + }, + { + "epoch": 0.0, + "grad_norm": 1339.7755214440353, + "learning_rate": 1.5511022044088177e-06, + "loss": 13.5729, + "step": 387 + }, + { + "epoch": 0.0, + "grad_norm": 1086.5225049416317, + "learning_rate": 1.56312625250501e-06, + "loss": 13.1823, + "step": 390 + }, + { + "epoch": 0.0, + "grad_norm": 1082.4340946404118, + "learning_rate": 1.5751503006012025e-06, + "loss": 13.75, + "step": 393 + }, + { + "epoch": 0.0, + "grad_norm": 727.6503962527433, + "learning_rate": 1.5871743486973948e-06, + "loss": 12.7604, + "step": 396 + }, + { + "epoch": 0.0, + "grad_norm": 3512.4801458376205, + "learning_rate": 1.5991983967935873e-06, + "loss": 12.5625, + "step": 399 + }, + { + "epoch": 0.0, + "grad_norm": 492.0868754970134, + "learning_rate": 1.6112224448897798e-06, + "loss": 11.9948, + "step": 402 + }, + { + "epoch": 0.0, + "grad_norm": 551.1655075294925, + "learning_rate": 1.623246492985972e-06, + "loss": 12.0234, + "step": 405 + }, + { + "epoch": 0.0, + "grad_norm": 1771.0623726380338, + "learning_rate": 1.6352705410821645e-06, + "loss": 12.5234, + "step": 408 + }, + { + "epoch": 0.0, + "grad_norm": 2155.7732688887045, + "learning_rate": 1.6472945891783568e-06, + "loss": 11.7396, + "step": 411 + }, + { + "epoch": 0.0, + "grad_norm": 783.0564620340658, + "learning_rate": 1.6593186372745493e-06, + "loss": 11.401, + "step": 414 + }, + { + "epoch": 0.01, + "grad_norm": 674.8920006369478, + "learning_rate": 1.6713426853707416e-06, + "loss": 13.1641, + "step": 417 + }, + { + "epoch": 0.01, + "grad_norm": 868.8948490789962, + "learning_rate": 1.683366733466934e-06, + "loss": 11.0781, + "step": 420 + }, + { + "epoch": 0.01, + "grad_norm": 833.213058141909, + "learning_rate": 1.6953907815631262e-06, + "loss": 10.5573, + "step": 423 + }, + { + "epoch": 0.01, + "grad_norm": 1665.0122425187487, + "learning_rate": 1.7074148296593189e-06, + "loss": 10.8229, + "step": 426 + }, + { + "epoch": 0.01, + "grad_norm": 476.1356616647953, + "learning_rate": 1.7194388777555114e-06, + "loss": 11.5495, + "step": 429 + }, + { + "epoch": 0.01, + "grad_norm": 327.26609934821084, + "learning_rate": 1.7314629258517034e-06, + "loss": 10.974, + "step": 432 + }, + { + "epoch": 0.01, + "grad_norm": 854.3330990796151, + "learning_rate": 1.743486973947896e-06, + "loss": 11.1536, + "step": 435 + }, + { + "epoch": 0.01, + "grad_norm": 487.8398882557892, + "learning_rate": 1.7555110220440882e-06, + "loss": 10.1406, + "step": 438 + }, + { + "epoch": 0.01, + "grad_norm": 1626.816962687508, + "learning_rate": 1.7675350701402807e-06, + "loss": 11.4805, + "step": 441 + }, + { + "epoch": 0.01, + "grad_norm": 871.1090260798811, + "learning_rate": 1.779559118236473e-06, + "loss": 10.3893, + "step": 444 + }, + { + "epoch": 0.01, + "grad_norm": 1106.3906080793272, + "learning_rate": 1.7915831663326655e-06, + "loss": 11.1888, + "step": 447 + }, + { + "epoch": 0.01, + "grad_norm": 812.4072831399887, + "learning_rate": 1.803607214428858e-06, + "loss": 10.2656, + "step": 450 + }, + { + "epoch": 0.01, + "grad_norm": 670.4342070332576, + "learning_rate": 1.8156312625250502e-06, + "loss": 10.0391, + "step": 453 + }, + { + "epoch": 0.01, + "grad_norm": 525.6663950688338, + "learning_rate": 1.8276553106212427e-06, + "loss": 9.0599, + "step": 456 + }, + { + "epoch": 0.01, + "grad_norm": 323.4140310153801, + "learning_rate": 1.839679358717435e-06, + "loss": 9.6029, + "step": 459 + }, + { + "epoch": 0.01, + "grad_norm": 265.92662407562693, + "learning_rate": 1.8517034068136275e-06, + "loss": 9.7357, + "step": 462 + }, + { + "epoch": 0.01, + "grad_norm": 750.5561068766718, + "learning_rate": 1.8637274549098198e-06, + "loss": 9.3125, + "step": 465 + }, + { + "epoch": 0.01, + "grad_norm": 465.42445231454076, + "learning_rate": 1.8757515030060123e-06, + "loss": 9.694, + "step": 468 + }, + { + "epoch": 0.01, + "grad_norm": 272.1663901545364, + "learning_rate": 1.8877755511022044e-06, + "loss": 9.7656, + "step": 471 + }, + { + "epoch": 0.01, + "grad_norm": 213.607222155107, + "learning_rate": 1.8997995991983969e-06, + "loss": 10.2539, + "step": 474 + }, + { + "epoch": 0.01, + "grad_norm": 646.2633228789078, + "learning_rate": 1.9118236472945896e-06, + "loss": 10.2135, + "step": 477 + }, + { + "epoch": 0.01, + "grad_norm": 448.1024761731943, + "learning_rate": 1.9238476953907816e-06, + "loss": 9.0326, + "step": 480 + }, + { + "epoch": 0.01, + "grad_norm": 1226.4199550823448, + "learning_rate": 1.935871743486974e-06, + "loss": 8.8893, + "step": 483 + }, + { + "epoch": 0.01, + "grad_norm": 466.33472703773737, + "learning_rate": 1.9478957915831666e-06, + "loss": 9.4844, + "step": 486 + }, + { + "epoch": 0.01, + "grad_norm": 207.70558388485233, + "learning_rate": 1.959919839679359e-06, + "loss": 8.8867, + "step": 489 + }, + { + "epoch": 0.01, + "grad_norm": 2340.744461531173, + "learning_rate": 1.971943887775551e-06, + "loss": 9.099, + "step": 492 + }, + { + "epoch": 0.01, + "grad_norm": 2811.9844800598094, + "learning_rate": 1.9839679358717437e-06, + "loss": 8.8529, + "step": 495 + }, + { + "epoch": 0.01, + "grad_norm": 492.11553418620946, + "learning_rate": 1.9959919839679357e-06, + "loss": 9.3776, + "step": 498 + }, + { + "epoch": 0.01, + "grad_norm": 544.9522224852215, + "learning_rate": 2.0080160320641282e-06, + "loss": 9.4115, + "step": 501 + }, + { + "epoch": 0.01, + "grad_norm": 347.6748136163846, + "learning_rate": 2.0200400801603207e-06, + "loss": 8.1107, + "step": 504 + }, + { + "epoch": 0.01, + "grad_norm": 390.26867069527043, + "learning_rate": 2.0320641282565132e-06, + "loss": 8.6003, + "step": 507 + }, + { + "epoch": 0.01, + "grad_norm": 299.5973339800683, + "learning_rate": 2.0440881763527057e-06, + "loss": 8.8542, + "step": 510 + }, + { + "epoch": 0.01, + "grad_norm": 209.924462897218, + "learning_rate": 2.056112224448898e-06, + "loss": 7.6315, + "step": 513 + }, + { + "epoch": 0.01, + "grad_norm": 675.762414491488, + "learning_rate": 2.0681362725450903e-06, + "loss": 7.7982, + "step": 516 + }, + { + "epoch": 0.01, + "grad_norm": 327.24060117186247, + "learning_rate": 2.0801603206412828e-06, + "loss": 8.5352, + "step": 519 + }, + { + "epoch": 0.01, + "grad_norm": 268.46294455019785, + "learning_rate": 2.0921843687374753e-06, + "loss": 7.6882, + "step": 522 + }, + { + "epoch": 0.01, + "grad_norm": 513.4016405221716, + "learning_rate": 2.1042084168336673e-06, + "loss": 7.4095, + "step": 525 + }, + { + "epoch": 0.01, + "grad_norm": 2831.911154498119, + "learning_rate": 2.11623246492986e-06, + "loss": 7.6484, + "step": 528 + }, + { + "epoch": 0.01, + "grad_norm": 1004.6563019141578, + "learning_rate": 2.1282565130260523e-06, + "loss": 8.0339, + "step": 531 + }, + { + "epoch": 0.01, + "grad_norm": 214.55312436895892, + "learning_rate": 2.140280561122245e-06, + "loss": 9.0, + "step": 534 + }, + { + "epoch": 0.01, + "grad_norm": 207.71789014793188, + "learning_rate": 2.1523046092184373e-06, + "loss": 6.4831, + "step": 537 + }, + { + "epoch": 0.01, + "grad_norm": 520.9343313526134, + "learning_rate": 2.1643286573146294e-06, + "loss": 7.0163, + "step": 540 + }, + { + "epoch": 0.01, + "grad_norm": 351.96519288068254, + "learning_rate": 2.176352705410822e-06, + "loss": 7.5742, + "step": 543 + }, + { + "epoch": 0.01, + "grad_norm": 681.7102992391452, + "learning_rate": 2.188376753507014e-06, + "loss": 8.2982, + "step": 546 + }, + { + "epoch": 0.01, + "grad_norm": 179.30413380860665, + "learning_rate": 2.2004008016032064e-06, + "loss": 6.7331, + "step": 549 + }, + { + "epoch": 0.01, + "grad_norm": 358.50388002618365, + "learning_rate": 2.212424849699399e-06, + "loss": 7.7721, + "step": 552 + }, + { + "epoch": 0.01, + "grad_norm": 220.89374989361772, + "learning_rate": 2.2244488977955914e-06, + "loss": 7.7025, + "step": 555 + }, + { + "epoch": 0.01, + "grad_norm": 1567.0982162729529, + "learning_rate": 2.236472945891784e-06, + "loss": 7.5169, + "step": 558 + }, + { + "epoch": 0.01, + "grad_norm": 560.2606304029359, + "learning_rate": 2.248496993987976e-06, + "loss": 6.9154, + "step": 561 + }, + { + "epoch": 0.01, + "grad_norm": 216.44934647635606, + "learning_rate": 2.2605210420841685e-06, + "loss": 7.0195, + "step": 564 + }, + { + "epoch": 0.01, + "grad_norm": 199.73989790719983, + "learning_rate": 2.272545090180361e-06, + "loss": 7.0885, + "step": 567 + }, + { + "epoch": 0.01, + "grad_norm": 338.1243439859276, + "learning_rate": 2.2845691382765535e-06, + "loss": 6.6751, + "step": 570 + }, + { + "epoch": 0.01, + "grad_norm": 848.6330643925021, + "learning_rate": 2.2965931863727455e-06, + "loss": 7.5, + "step": 573 + }, + { + "epoch": 0.01, + "grad_norm": 797.4591136208757, + "learning_rate": 2.308617234468938e-06, + "loss": 8.1341, + "step": 576 + }, + { + "epoch": 0.01, + "grad_norm": 132.73941638134522, + "learning_rate": 2.3206412825651305e-06, + "loss": 7.5345, + "step": 579 + }, + { + "epoch": 0.01, + "grad_norm": 162.68481587508984, + "learning_rate": 2.332665330661323e-06, + "loss": 6.6289, + "step": 582 + }, + { + "epoch": 0.01, + "grad_norm": 256.3914825552894, + "learning_rate": 2.3446893787575155e-06, + "loss": 6.1992, + "step": 585 + }, + { + "epoch": 0.01, + "grad_norm": 189.783837667493, + "learning_rate": 2.3567134268537076e-06, + "loss": 6.75, + "step": 588 + }, + { + "epoch": 0.01, + "grad_norm": 1449.7256998421333, + "learning_rate": 2.3687374749499e-06, + "loss": 6.6211, + "step": 591 + }, + { + "epoch": 0.01, + "grad_norm": 203.49524160939626, + "learning_rate": 2.380761523046092e-06, + "loss": 6.6003, + "step": 594 + }, + { + "epoch": 0.01, + "grad_norm": 308.23996018858287, + "learning_rate": 2.3927855711422846e-06, + "loss": 7.3392, + "step": 597 + }, + { + "epoch": 0.01, + "grad_norm": 379.7784970770216, + "learning_rate": 2.404809619238477e-06, + "loss": 6.6836, + "step": 600 + }, + { + "epoch": 0.01, + "grad_norm": 1302.5661238408882, + "learning_rate": 2.4168336673346696e-06, + "loss": 5.9466, + "step": 603 + }, + { + "epoch": 0.01, + "grad_norm": 428.887943600532, + "learning_rate": 2.4288577154308617e-06, + "loss": 5.8802, + "step": 606 + }, + { + "epoch": 0.01, + "grad_norm": 224.53350453833906, + "learning_rate": 2.440881763527054e-06, + "loss": 6.6406, + "step": 609 + }, + { + "epoch": 0.01, + "grad_norm": 540.1815785645642, + "learning_rate": 2.4529058116232467e-06, + "loss": 6.2956, + "step": 612 + }, + { + "epoch": 0.01, + "grad_norm": 189.84662224074177, + "learning_rate": 2.464929859719439e-06, + "loss": 5.6667, + "step": 615 + }, + { + "epoch": 0.01, + "grad_norm": 194.09097470432795, + "learning_rate": 2.4769539078156317e-06, + "loss": 6.5703, + "step": 618 + }, + { + "epoch": 0.01, + "grad_norm": 209.07592442667797, + "learning_rate": 2.4889779559118238e-06, + "loss": 5.7891, + "step": 621 + }, + { + "epoch": 0.01, + "grad_norm": 151.8049273019808, + "learning_rate": 2.5010020040080162e-06, + "loss": 6.2539, + "step": 624 + }, + { + "epoch": 0.01, + "grad_norm": 650.1513174412233, + "learning_rate": 2.5130260521042087e-06, + "loss": 6.2142, + "step": 627 + }, + { + "epoch": 0.01, + "grad_norm": 101.99916300147194, + "learning_rate": 2.525050100200401e-06, + "loss": 5.9609, + "step": 630 + }, + { + "epoch": 0.01, + "grad_norm": 413.18131720158743, + "learning_rate": 2.5370741482965933e-06, + "loss": 5.4831, + "step": 633 + }, + { + "epoch": 0.01, + "grad_norm": 876.8075625088157, + "learning_rate": 2.549098196392786e-06, + "loss": 5.7454, + "step": 636 + }, + { + "epoch": 0.01, + "grad_norm": 143.8500075632616, + "learning_rate": 2.5611222444889783e-06, + "loss": 5.7415, + "step": 639 + }, + { + "epoch": 0.01, + "grad_norm": 336.82068275496704, + "learning_rate": 2.5731462925851708e-06, + "loss": 5.3451, + "step": 642 + }, + { + "epoch": 0.01, + "grad_norm": 896.7296018262184, + "learning_rate": 2.585170340681363e-06, + "loss": 6.7936, + "step": 645 + }, + { + "epoch": 0.01, + "grad_norm": 340.3071732629995, + "learning_rate": 2.5971943887775553e-06, + "loss": 7.0059, + "step": 648 + }, + { + "epoch": 0.01, + "grad_norm": 415.28601034027326, + "learning_rate": 2.609218436873748e-06, + "loss": 6.4616, + "step": 651 + }, + { + "epoch": 0.01, + "grad_norm": 290.5389336771159, + "learning_rate": 2.6212424849699403e-06, + "loss": 6.7663, + "step": 654 + }, + { + "epoch": 0.01, + "grad_norm": 297.0224298753228, + "learning_rate": 2.6332665330661324e-06, + "loss": 5.4408, + "step": 657 + }, + { + "epoch": 0.01, + "grad_norm": 223.31351938693774, + "learning_rate": 2.645290581162325e-06, + "loss": 5.582, + "step": 660 + }, + { + "epoch": 0.01, + "grad_norm": 232.92908327520186, + "learning_rate": 2.6573146292585174e-06, + "loss": 6.1745, + "step": 663 + }, + { + "epoch": 0.01, + "grad_norm": 115.37860328805141, + "learning_rate": 2.66933867735471e-06, + "loss": 5.248, + "step": 666 + }, + { + "epoch": 0.01, + "grad_norm": 106.46874525749196, + "learning_rate": 2.6813627254509024e-06, + "loss": 6.528, + "step": 669 + }, + { + "epoch": 0.01, + "grad_norm": 311.56048907399133, + "learning_rate": 2.693386773547094e-06, + "loss": 5.1986, + "step": 672 + }, + { + "epoch": 0.01, + "grad_norm": 177.4626661505006, + "learning_rate": 2.7054108216432865e-06, + "loss": 5.9212, + "step": 675 + }, + { + "epoch": 0.01, + "grad_norm": 194.59175225336688, + "learning_rate": 2.7174348697394794e-06, + "loss": 5.6152, + "step": 678 + }, + { + "epoch": 0.01, + "grad_norm": 431.1475325088762, + "learning_rate": 2.729458917835672e-06, + "loss": 4.9922, + "step": 681 + }, + { + "epoch": 0.01, + "grad_norm": 191.1691083048966, + "learning_rate": 2.7414829659318636e-06, + "loss": 5.1953, + "step": 684 + }, + { + "epoch": 0.01, + "grad_norm": 149.00874544669415, + "learning_rate": 2.753507014028056e-06, + "loss": 4.7995, + "step": 687 + }, + { + "epoch": 0.01, + "grad_norm": 126.84992262922864, + "learning_rate": 2.7655310621242486e-06, + "loss": 5.7754, + "step": 690 + }, + { + "epoch": 0.01, + "grad_norm": 247.75810377316432, + "learning_rate": 2.777555110220441e-06, + "loss": 5.8763, + "step": 693 + }, + { + "epoch": 0.01, + "grad_norm": 330.27100732887294, + "learning_rate": 2.7895791583166336e-06, + "loss": 5.4023, + "step": 696 + }, + { + "epoch": 0.01, + "grad_norm": 265.12033490989467, + "learning_rate": 2.8016032064128256e-06, + "loss": 6.085, + "step": 699 + }, + { + "epoch": 0.01, + "grad_norm": 186.25696583243894, + "learning_rate": 2.813627254509018e-06, + "loss": 5.0625, + "step": 702 + }, + { + "epoch": 0.01, + "grad_norm": 208.7142169826468, + "learning_rate": 2.8256513026052106e-06, + "loss": 5.6094, + "step": 705 + }, + { + "epoch": 0.01, + "grad_norm": 202.4558169853546, + "learning_rate": 2.837675350701403e-06, + "loss": 4.8548, + "step": 708 + }, + { + "epoch": 0.01, + "grad_norm": 197.74893720425686, + "learning_rate": 2.849699398797595e-06, + "loss": 4.8841, + "step": 711 + }, + { + "epoch": 0.01, + "grad_norm": 87.13417730814265, + "learning_rate": 2.8617234468937877e-06, + "loss": 4.8496, + "step": 714 + }, + { + "epoch": 0.01, + "grad_norm": 121.0728049172972, + "learning_rate": 2.87374749498998e-06, + "loss": 4.8314, + "step": 717 + }, + { + "epoch": 0.01, + "grad_norm": 89.21273475866849, + "learning_rate": 2.8857715430861727e-06, + "loss": 4.5514, + "step": 720 + }, + { + "epoch": 0.01, + "grad_norm": 126.35090232779217, + "learning_rate": 2.897795591182365e-06, + "loss": 5.1478, + "step": 723 + }, + { + "epoch": 0.01, + "grad_norm": 73.6540522295332, + "learning_rate": 2.9098196392785572e-06, + "loss": 4.8216, + "step": 726 + }, + { + "epoch": 0.01, + "grad_norm": 127.10351781480492, + "learning_rate": 2.9218436873747497e-06, + "loss": 4.6465, + "step": 729 + }, + { + "epoch": 0.01, + "grad_norm": 454.6348561100241, + "learning_rate": 2.933867735470942e-06, + "loss": 5.1393, + "step": 732 + }, + { + "epoch": 0.01, + "grad_norm": 1827.314179788961, + "learning_rate": 2.9458917835671347e-06, + "loss": 5.2135, + "step": 735 + }, + { + "epoch": 0.01, + "grad_norm": 158.80633196231645, + "learning_rate": 2.9579158316633268e-06, + "loss": 5.4525, + "step": 738 + }, + { + "epoch": 0.01, + "grad_norm": 237.5072072090655, + "learning_rate": 2.9699398797595193e-06, + "loss": 4.9466, + "step": 741 + }, + { + "epoch": 0.01, + "grad_norm": 222.80573426693644, + "learning_rate": 2.9819639278557118e-06, + "loss": 5.151, + "step": 744 + }, + { + "epoch": 0.01, + "grad_norm": 304.8595224198188, + "learning_rate": 2.9939879759519043e-06, + "loss": 4.7682, + "step": 747 + }, + { + "epoch": 0.01, + "grad_norm": 540.5896177581618, + "learning_rate": 3.0060120240480967e-06, + "loss": 5.4408, + "step": 750 + }, + { + "epoch": 0.01, + "grad_norm": 621.8084685255162, + "learning_rate": 3.018036072144289e-06, + "loss": 5.5531, + "step": 753 + }, + { + "epoch": 0.01, + "grad_norm": 462.9017992460235, + "learning_rate": 3.0300601202404813e-06, + "loss": 5.1315, + "step": 756 + }, + { + "epoch": 0.01, + "grad_norm": 74.18756188885757, + "learning_rate": 3.042084168336674e-06, + "loss": 4.1849, + "step": 759 + }, + { + "epoch": 0.01, + "grad_norm": 392.2886638611252, + "learning_rate": 3.0541082164328663e-06, + "loss": 4.6829, + "step": 762 + }, + { + "epoch": 0.01, + "grad_norm": 104.84979610508913, + "learning_rate": 3.066132264529058e-06, + "loss": 5.0781, + "step": 765 + }, + { + "epoch": 0.01, + "grad_norm": 252.18100811537096, + "learning_rate": 3.0781563126252504e-06, + "loss": 4.3307, + "step": 768 + }, + { + "epoch": 0.01, + "grad_norm": 162.79627050496416, + "learning_rate": 3.090180360721443e-06, + "loss": 4.9427, + "step": 771 + }, + { + "epoch": 0.01, + "grad_norm": 179.44190623075755, + "learning_rate": 3.1022044088176354e-06, + "loss": 4.5059, + "step": 774 + }, + { + "epoch": 0.01, + "grad_norm": 354.42803005080947, + "learning_rate": 3.1142284569138283e-06, + "loss": 4.7161, + "step": 777 + }, + { + "epoch": 0.01, + "grad_norm": 192.04840207742345, + "learning_rate": 3.12625250501002e-06, + "loss": 4.5736, + "step": 780 + }, + { + "epoch": 0.01, + "grad_norm": 203.68683332413448, + "learning_rate": 3.1382765531062125e-06, + "loss": 5.084, + "step": 783 + }, + { + "epoch": 0.01, + "grad_norm": 816.6501930445306, + "learning_rate": 3.150300601202405e-06, + "loss": 5.2617, + "step": 786 + }, + { + "epoch": 0.01, + "grad_norm": 592.9095276313924, + "learning_rate": 3.1623246492985975e-06, + "loss": 4.5632, + "step": 789 + }, + { + "epoch": 0.01, + "grad_norm": 240.9616217906008, + "learning_rate": 3.1743486973947895e-06, + "loss": 4.2067, + "step": 792 + }, + { + "epoch": 0.01, + "grad_norm": 58.95984529438687, + "learning_rate": 3.186372745490982e-06, + "loss": 3.9658, + "step": 795 + }, + { + "epoch": 0.01, + "grad_norm": 190.06069342809903, + "learning_rate": 3.1983967935871745e-06, + "loss": 5.0586, + "step": 798 + }, + { + "epoch": 0.01, + "grad_norm": 170.73958610464555, + "learning_rate": 3.210420841683367e-06, + "loss": 4.2428, + "step": 801 + }, + { + "epoch": 0.01, + "grad_norm": 151.5595908679502, + "learning_rate": 3.2224448897795595e-06, + "loss": 5.2207, + "step": 804 + }, + { + "epoch": 0.01, + "grad_norm": 191.10929463879808, + "learning_rate": 3.2344689378757516e-06, + "loss": 5.0202, + "step": 807 + }, + { + "epoch": 0.01, + "grad_norm": 71.63761936211516, + "learning_rate": 3.246492985971944e-06, + "loss": 4.4323, + "step": 810 + }, + { + "epoch": 0.01, + "grad_norm": 234.44037987238133, + "learning_rate": 3.2585170340681366e-06, + "loss": 4.6468, + "step": 813 + }, + { + "epoch": 0.01, + "grad_norm": 323.27820061320654, + "learning_rate": 3.270541082164329e-06, + "loss": 4.4775, + "step": 816 + }, + { + "epoch": 0.01, + "grad_norm": 194.32733273409116, + "learning_rate": 3.282565130260521e-06, + "loss": 3.9544, + "step": 819 + }, + { + "epoch": 0.01, + "grad_norm": 266.0184934352069, + "learning_rate": 3.2945891783567136e-06, + "loss": 4.3096, + "step": 822 + }, + { + "epoch": 0.01, + "grad_norm": 273.20352691665346, + "learning_rate": 3.306613226452906e-06, + "loss": 3.5811, + "step": 825 + }, + { + "epoch": 0.01, + "grad_norm": 142.627774717499, + "learning_rate": 3.3186372745490986e-06, + "loss": 4.5993, + "step": 828 + }, + { + "epoch": 0.01, + "grad_norm": 106.00529177112782, + "learning_rate": 3.330661322645291e-06, + "loss": 4.9154, + "step": 831 + }, + { + "epoch": 0.01, + "grad_norm": 290.65851440732797, + "learning_rate": 3.342685370741483e-06, + "loss": 4.5033, + "step": 834 + }, + { + "epoch": 0.01, + "grad_norm": 63.48576802015846, + "learning_rate": 3.3547094188376757e-06, + "loss": 4.2227, + "step": 837 + }, + { + "epoch": 0.01, + "grad_norm": 341.7131760700091, + "learning_rate": 3.366733466933868e-06, + "loss": 4.2174, + "step": 840 + }, + { + "epoch": 0.01, + "grad_norm": 152.22998996998265, + "learning_rate": 3.3787575150300607e-06, + "loss": 5.0283, + "step": 843 + }, + { + "epoch": 0.01, + "grad_norm": 282.8433271459635, + "learning_rate": 3.3907815631262523e-06, + "loss": 4.2321, + "step": 846 + }, + { + "epoch": 0.01, + "grad_norm": 159.4440807344899, + "learning_rate": 3.4028056112224452e-06, + "loss": 4.0456, + "step": 849 + }, + { + "epoch": 0.01, + "grad_norm": 1066.0790215264556, + "learning_rate": 3.4148296593186377e-06, + "loss": 3.8411, + "step": 852 + }, + { + "epoch": 0.01, + "grad_norm": 222.20463427867625, + "learning_rate": 3.4268537074148302e-06, + "loss": 4.389, + "step": 855 + }, + { + "epoch": 0.01, + "grad_norm": 284.7361143886981, + "learning_rate": 3.4388777555110227e-06, + "loss": 4.3145, + "step": 858 + }, + { + "epoch": 0.01, + "grad_norm": 342.54208737939825, + "learning_rate": 3.4509018036072144e-06, + "loss": 5.0052, + "step": 861 + }, + { + "epoch": 0.01, + "grad_norm": 878.453324253649, + "learning_rate": 3.462925851703407e-06, + "loss": 4.6364, + "step": 864 + }, + { + "epoch": 0.01, + "grad_norm": 515.346136112809, + "learning_rate": 3.4749498997995993e-06, + "loss": 4.5355, + "step": 867 + }, + { + "epoch": 0.01, + "grad_norm": 292.4610490302053, + "learning_rate": 3.486973947895792e-06, + "loss": 3.7842, + "step": 870 + }, + { + "epoch": 0.01, + "grad_norm": 1715.726406361084, + "learning_rate": 3.4989979959919843e-06, + "loss": 4.3298, + "step": 873 + }, + { + "epoch": 0.01, + "grad_norm": 680.8326756493475, + "learning_rate": 3.5110220440881764e-06, + "loss": 4.6449, + "step": 876 + }, + { + "epoch": 0.01, + "grad_norm": 305.6134070350226, + "learning_rate": 3.523046092184369e-06, + "loss": 3.915, + "step": 879 + }, + { + "epoch": 0.01, + "grad_norm": 4198.681920771147, + "learning_rate": 3.5350701402805614e-06, + "loss": 4.2279, + "step": 882 + }, + { + "epoch": 0.01, + "grad_norm": 362.4767592848994, + "learning_rate": 3.547094188376754e-06, + "loss": 3.863, + "step": 885 + }, + { + "epoch": 0.01, + "grad_norm": 535.033559567652, + "learning_rate": 3.559118236472946e-06, + "loss": 3.7905, + "step": 888 + }, + { + "epoch": 0.01, + "grad_norm": 198.17351898720946, + "learning_rate": 3.5711422845691384e-06, + "loss": 4.2227, + "step": 891 + }, + { + "epoch": 0.01, + "grad_norm": 166.06305760217765, + "learning_rate": 3.583166332665331e-06, + "loss": 3.533, + "step": 894 + }, + { + "epoch": 0.01, + "grad_norm": 467.6247974261091, + "learning_rate": 3.5951903807615234e-06, + "loss": 4.8161, + "step": 897 + }, + { + "epoch": 0.01, + "grad_norm": 390.6368853697861, + "learning_rate": 3.607214428857716e-06, + "loss": 3.5391, + "step": 900 + }, + { + "epoch": 0.01, + "grad_norm": 352.9184225730157, + "learning_rate": 3.619238476953908e-06, + "loss": 4.5007, + "step": 903 + }, + { + "epoch": 0.01, + "grad_norm": 489.7813149428034, + "learning_rate": 3.6312625250501005e-06, + "loss": 4.554, + "step": 906 + }, + { + "epoch": 0.01, + "grad_norm": 337.0175807890609, + "learning_rate": 3.643286573146293e-06, + "loss": 4.0226, + "step": 909 + }, + { + "epoch": 0.01, + "grad_norm": 1104.611702768816, + "learning_rate": 3.6553106212424855e-06, + "loss": 4.0295, + "step": 912 + }, + { + "epoch": 0.01, + "grad_norm": 350.77549169851227, + "learning_rate": 3.6673346693386775e-06, + "loss": 4.2581, + "step": 915 + }, + { + "epoch": 0.01, + "grad_norm": 140.8762148361552, + "learning_rate": 3.67935871743487e-06, + "loss": 4.0996, + "step": 918 + }, + { + "epoch": 0.01, + "grad_norm": 588.2529594520274, + "learning_rate": 3.6913827655310625e-06, + "loss": 3.8444, + "step": 921 + }, + { + "epoch": 0.01, + "grad_norm": 262.3874355797204, + "learning_rate": 3.703406813627255e-06, + "loss": 3.9626, + "step": 924 + }, + { + "epoch": 0.01, + "grad_norm": 209.19111451597874, + "learning_rate": 3.7154308617234475e-06, + "loss": 3.8462, + "step": 927 + }, + { + "epoch": 0.01, + "grad_norm": 125.36645793988166, + "learning_rate": 3.7274549098196396e-06, + "loss": 4.2259, + "step": 930 + }, + { + "epoch": 0.01, + "grad_norm": 191.30984624523, + "learning_rate": 3.739478957915832e-06, + "loss": 4.1031, + "step": 933 + }, + { + "epoch": 0.01, + "grad_norm": 150.87491892972355, + "learning_rate": 3.7515030060120246e-06, + "loss": 3.2437, + "step": 936 + }, + { + "epoch": 0.01, + "grad_norm": 45.389549610258555, + "learning_rate": 3.763527054108217e-06, + "loss": 3.6169, + "step": 939 + }, + { + "epoch": 0.01, + "grad_norm": 342.63272383857657, + "learning_rate": 3.7755511022044087e-06, + "loss": 3.4144, + "step": 942 + }, + { + "epoch": 0.01, + "grad_norm": 188.87351507202052, + "learning_rate": 3.7875751503006012e-06, + "loss": 4.0752, + "step": 945 + }, + { + "epoch": 0.01, + "grad_norm": 219.95484236771182, + "learning_rate": 3.7995991983967937e-06, + "loss": 4.0589, + "step": 948 + }, + { + "epoch": 0.01, + "grad_norm": 136.21443752096067, + "learning_rate": 3.8116232464929866e-06, + "loss": 3.7616, + "step": 951 + }, + { + "epoch": 0.01, + "grad_norm": 71.10494998821066, + "learning_rate": 3.823647294589179e-06, + "loss": 3.3418, + "step": 954 + }, + { + "epoch": 0.01, + "grad_norm": 182.44560584421978, + "learning_rate": 3.835671342685371e-06, + "loss": 3.881, + "step": 957 + }, + { + "epoch": 0.01, + "grad_norm": 95.75507988686726, + "learning_rate": 3.847695390781563e-06, + "loss": 4.0825, + "step": 960 + }, + { + "epoch": 0.01, + "grad_norm": 379.9595378282558, + "learning_rate": 3.859719438877756e-06, + "loss": 4.0977, + "step": 963 + }, + { + "epoch": 0.01, + "grad_norm": 654.5312075672116, + "learning_rate": 3.871743486973948e-06, + "loss": 4.0508, + "step": 966 + }, + { + "epoch": 0.01, + "grad_norm": 129.96796086585334, + "learning_rate": 3.883767535070141e-06, + "loss": 3.1933, + "step": 969 + }, + { + "epoch": 0.01, + "grad_norm": 241.10069130699713, + "learning_rate": 3.895791583166333e-06, + "loss": 3.9115, + "step": 972 + }, + { + "epoch": 0.01, + "grad_norm": 220.76071945567975, + "learning_rate": 3.907815631262526e-06, + "loss": 3.7233, + "step": 975 + }, + { + "epoch": 0.01, + "grad_norm": 61.58409145067496, + "learning_rate": 3.919839679358718e-06, + "loss": 4.4678, + "step": 978 + }, + { + "epoch": 0.01, + "grad_norm": 626.0475763770044, + "learning_rate": 3.931863727454911e-06, + "loss": 3.2748, + "step": 981 + }, + { + "epoch": 0.01, + "grad_norm": 189.85646023293904, + "learning_rate": 3.943887775551102e-06, + "loss": 3.3118, + "step": 984 + }, + { + "epoch": 0.01, + "grad_norm": 112.22737930025357, + "learning_rate": 3.955911823647295e-06, + "loss": 3.631, + "step": 987 + }, + { + "epoch": 0.01, + "grad_norm": 137.85820529020006, + "learning_rate": 3.967935871743487e-06, + "loss": 3.5924, + "step": 990 + }, + { + "epoch": 0.01, + "grad_norm": 193.51280032129836, + "learning_rate": 3.97995991983968e-06, + "loss": 3.6074, + "step": 993 + }, + { + "epoch": 0.01, + "grad_norm": 209.56098322996965, + "learning_rate": 3.9919839679358715e-06, + "loss": 4.4748, + "step": 996 + }, + { + "epoch": 0.01, + "grad_norm": 328.6171530708383, + "learning_rate": 4.004008016032064e-06, + "loss": 3.4784, + "step": 999 + }, + { + "epoch": 0.01, + "grad_norm": 196.13810512875986, + "learning_rate": 4.0160320641282565e-06, + "loss": 4.1445, + "step": 1002 + }, + { + "epoch": 0.01, + "grad_norm": 139.8737055631196, + "learning_rate": 4.028056112224449e-06, + "loss": 4.0553, + "step": 1005 + }, + { + "epoch": 0.01, + "grad_norm": 275.345601067187, + "learning_rate": 4.0400801603206415e-06, + "loss": 4.1263, + "step": 1008 + }, + { + "epoch": 0.01, + "grad_norm": 102.98891849274023, + "learning_rate": 4.052104208416834e-06, + "loss": 3.5479, + "step": 1011 + }, + { + "epoch": 0.01, + "grad_norm": 525.4851700805013, + "learning_rate": 4.0641282565130265e-06, + "loss": 4.258, + "step": 1014 + }, + { + "epoch": 0.01, + "grad_norm": 535.6214865009234, + "learning_rate": 4.076152304609219e-06, + "loss": 3.3586, + "step": 1017 + }, + { + "epoch": 0.01, + "grad_norm": 287.6175083008977, + "learning_rate": 4.0881763527054114e-06, + "loss": 4.641, + "step": 1020 + }, + { + "epoch": 0.01, + "grad_norm": 155.8687657222258, + "learning_rate": 4.100200400801603e-06, + "loss": 3.2314, + "step": 1023 + }, + { + "epoch": 0.01, + "grad_norm": 197.38003553790213, + "learning_rate": 4.112224448897796e-06, + "loss": 3.6953, + "step": 1026 + }, + { + "epoch": 0.01, + "grad_norm": 283.37756039767754, + "learning_rate": 4.124248496993988e-06, + "loss": 3.4596, + "step": 1029 + }, + { + "epoch": 0.01, + "grad_norm": 106.30397588327342, + "learning_rate": 4.1362725450901806e-06, + "loss": 3.0731, + "step": 1032 + }, + { + "epoch": 0.01, + "grad_norm": 142.34999870130002, + "learning_rate": 4.148296593186373e-06, + "loss": 3.4673, + "step": 1035 + }, + { + "epoch": 0.01, + "grad_norm": 107.82999024072105, + "learning_rate": 4.1603206412825656e-06, + "loss": 3.0348, + "step": 1038 + }, + { + "epoch": 0.01, + "grad_norm": 166.0371803961332, + "learning_rate": 4.172344689378758e-06, + "loss": 4.1906, + "step": 1041 + }, + { + "epoch": 0.01, + "grad_norm": 262.5435837369286, + "learning_rate": 4.1843687374749505e-06, + "loss": 3.4365, + "step": 1044 + }, + { + "epoch": 0.01, + "grad_norm": 138.49684545812437, + "learning_rate": 4.196392785571143e-06, + "loss": 3.4868, + "step": 1047 + }, + { + "epoch": 0.01, + "grad_norm": 289.2831615477376, + "learning_rate": 4.208416833667335e-06, + "loss": 4.0713, + "step": 1050 + }, + { + "epoch": 0.01, + "grad_norm": 210.53445834400122, + "learning_rate": 4.220440881763527e-06, + "loss": 3.5596, + "step": 1053 + }, + { + "epoch": 0.01, + "grad_norm": 85.89148965692374, + "learning_rate": 4.23246492985972e-06, + "loss": 2.8381, + "step": 1056 + }, + { + "epoch": 0.01, + "grad_norm": 274.160609801848, + "learning_rate": 4.244488977955912e-06, + "loss": 3.9331, + "step": 1059 + }, + { + "epoch": 0.01, + "grad_norm": 739.6844552065412, + "learning_rate": 4.256513026052105e-06, + "loss": 3.0992, + "step": 1062 + }, + { + "epoch": 0.01, + "grad_norm": 276.3219037815293, + "learning_rate": 4.268537074148297e-06, + "loss": 3.4482, + "step": 1065 + }, + { + "epoch": 0.01, + "grad_norm": 357.77208723858524, + "learning_rate": 4.28056112224449e-06, + "loss": 3.2214, + "step": 1068 + }, + { + "epoch": 0.01, + "grad_norm": 195.0179979910478, + "learning_rate": 4.292585170340682e-06, + "loss": 2.9973, + "step": 1071 + }, + { + "epoch": 0.01, + "grad_norm": 129.60781649309112, + "learning_rate": 4.304609218436875e-06, + "loss": 3.7575, + "step": 1074 + }, + { + "epoch": 0.01, + "grad_norm": 1047.7564316515802, + "learning_rate": 4.316633266533066e-06, + "loss": 4.5591, + "step": 1077 + }, + { + "epoch": 0.01, + "grad_norm": 211.7714344281244, + "learning_rate": 4.328657314629259e-06, + "loss": 3.0389, + "step": 1080 + }, + { + "epoch": 0.01, + "grad_norm": 141.7799262023561, + "learning_rate": 4.340681362725451e-06, + "loss": 3.696, + "step": 1083 + }, + { + "epoch": 0.01, + "grad_norm": 198.03905563870268, + "learning_rate": 4.352705410821644e-06, + "loss": 3.5544, + "step": 1086 + }, + { + "epoch": 0.01, + "grad_norm": 158.6886561081127, + "learning_rate": 4.364729458917836e-06, + "loss": 3.3338, + "step": 1089 + }, + { + "epoch": 0.01, + "grad_norm": 296.07743397949696, + "learning_rate": 4.376753507014028e-06, + "loss": 3.5898, + "step": 1092 + }, + { + "epoch": 0.01, + "grad_norm": 38.30156096023493, + "learning_rate": 4.38877755511022e-06, + "loss": 3.4919, + "step": 1095 + }, + { + "epoch": 0.01, + "grad_norm": 604.7443295970253, + "learning_rate": 4.400801603206413e-06, + "loss": 4.0098, + "step": 1098 + }, + { + "epoch": 0.01, + "grad_norm": 679.5123427053525, + "learning_rate": 4.412825651302605e-06, + "loss": 3.887, + "step": 1101 + }, + { + "epoch": 0.01, + "grad_norm": 628.8135447334216, + "learning_rate": 4.424849699398798e-06, + "loss": 3.0472, + "step": 1104 + }, + { + "epoch": 0.01, + "grad_norm": 557.7293415339041, + "learning_rate": 4.43687374749499e-06, + "loss": 2.9262, + "step": 1107 + }, + { + "epoch": 0.01, + "grad_norm": 287.29796054755434, + "learning_rate": 4.448897795591183e-06, + "loss": 2.9476, + "step": 1110 + }, + { + "epoch": 0.01, + "grad_norm": 140.09933214844978, + "learning_rate": 4.460921843687375e-06, + "loss": 2.9701, + "step": 1113 + }, + { + "epoch": 0.01, + "grad_norm": 457.19903528048866, + "learning_rate": 4.472945891783568e-06, + "loss": 3.1585, + "step": 1116 + }, + { + "epoch": 0.01, + "grad_norm": 338.1897306970745, + "learning_rate": 4.4849699398797595e-06, + "loss": 2.4521, + "step": 1119 + }, + { + "epoch": 0.01, + "grad_norm": 315.1696409193873, + "learning_rate": 4.496993987975952e-06, + "loss": 2.8739, + "step": 1122 + }, + { + "epoch": 0.01, + "grad_norm": 162.0761398313567, + "learning_rate": 4.5090180360721445e-06, + "loss": 3.9745, + "step": 1125 + }, + { + "epoch": 0.01, + "grad_norm": 225.31259598132505, + "learning_rate": 4.521042084168337e-06, + "loss": 3.8919, + "step": 1128 + }, + { + "epoch": 0.01, + "grad_norm": 109.98008442561827, + "learning_rate": 4.5330661322645295e-06, + "loss": 2.5875, + "step": 1131 + }, + { + "epoch": 0.01, + "grad_norm": 176.0804278494564, + "learning_rate": 4.545090180360722e-06, + "loss": 3.6328, + "step": 1134 + }, + { + "epoch": 0.01, + "grad_norm": 94.06343274867857, + "learning_rate": 4.5571142284569145e-06, + "loss": 3.5806, + "step": 1137 + }, + { + "epoch": 0.01, + "grad_norm": 484.203314059311, + "learning_rate": 4.569138276553107e-06, + "loss": 2.9951, + "step": 1140 + }, + { + "epoch": 0.01, + "grad_norm": 134.6904343647755, + "learning_rate": 4.5811623246492994e-06, + "loss": 2.7122, + "step": 1143 + }, + { + "epoch": 0.01, + "grad_norm": 161.23954657403954, + "learning_rate": 4.593186372745491e-06, + "loss": 3.7896, + "step": 1146 + }, + { + "epoch": 0.01, + "grad_norm": 475.13426545273273, + "learning_rate": 4.605210420841684e-06, + "loss": 3.7643, + "step": 1149 + }, + { + "epoch": 0.01, + "grad_norm": 87.5684700601329, + "learning_rate": 4.617234468937876e-06, + "loss": 3.3249, + "step": 1152 + }, + { + "epoch": 0.01, + "grad_norm": 58.139694113604676, + "learning_rate": 4.6292585170340686e-06, + "loss": 3.6217, + "step": 1155 + }, + { + "epoch": 0.01, + "grad_norm": 987.8161039529864, + "learning_rate": 4.641282565130261e-06, + "loss": 3.5085, + "step": 1158 + }, + { + "epoch": 0.01, + "grad_norm": 22.493945812033186, + "learning_rate": 4.6533066132264536e-06, + "loss": 3.5101, + "step": 1161 + }, + { + "epoch": 0.01, + "grad_norm": 282.70771920236353, + "learning_rate": 4.665330661322646e-06, + "loss": 3.661, + "step": 1164 + }, + { + "epoch": 0.01, + "grad_norm": 75.31684649337402, + "learning_rate": 4.6773547094188385e-06, + "loss": 2.6746, + "step": 1167 + }, + { + "epoch": 0.01, + "grad_norm": 229.8330481711537, + "learning_rate": 4.689378757515031e-06, + "loss": 3.2331, + "step": 1170 + }, + { + "epoch": 0.01, + "grad_norm": 272.7975996081335, + "learning_rate": 4.701402805611223e-06, + "loss": 3.7427, + "step": 1173 + }, + { + "epoch": 0.01, + "grad_norm": 133.78950870833228, + "learning_rate": 4.713426853707415e-06, + "loss": 3.2106, + "step": 1176 + }, + { + "epoch": 0.01, + "grad_norm": 1094.9780534112604, + "learning_rate": 4.725450901803608e-06, + "loss": 2.765, + "step": 1179 + }, + { + "epoch": 0.01, + "grad_norm": 307.84113221650375, + "learning_rate": 4.7374749498998e-06, + "loss": 3.2204, + "step": 1182 + }, + { + "epoch": 0.01, + "grad_norm": 372.6428733757822, + "learning_rate": 4.749498997995992e-06, + "loss": 3.0366, + "step": 1185 + }, + { + "epoch": 0.01, + "grad_norm": 113.23297685958784, + "learning_rate": 4.761523046092184e-06, + "loss": 3.3748, + "step": 1188 + }, + { + "epoch": 0.01, + "grad_norm": 193.6323409867233, + "learning_rate": 4.773547094188377e-06, + "loss": 3.1553, + "step": 1191 + }, + { + "epoch": 0.01, + "grad_norm": 111.18630450871069, + "learning_rate": 4.785571142284569e-06, + "loss": 4.3931, + "step": 1194 + }, + { + "epoch": 0.01, + "grad_norm": 265.338951711754, + "learning_rate": 4.797595190380762e-06, + "loss": 2.9933, + "step": 1197 + }, + { + "epoch": 0.01, + "grad_norm": 54.81590249248751, + "learning_rate": 4.809619238476954e-06, + "loss": 2.6557, + "step": 1200 + }, + { + "epoch": 0.01, + "grad_norm": 162.73488399609303, + "learning_rate": 4.821643286573147e-06, + "loss": 3.696, + "step": 1203 + }, + { + "epoch": 0.01, + "grad_norm": 123.18271511532623, + "learning_rate": 4.833667334669339e-06, + "loss": 3.1877, + "step": 1206 + }, + { + "epoch": 0.01, + "grad_norm": 96.97348345394603, + "learning_rate": 4.845691382765532e-06, + "loss": 2.7308, + "step": 1209 + }, + { + "epoch": 0.01, + "grad_norm": 75.58885473548358, + "learning_rate": 4.857715430861723e-06, + "loss": 2.5889, + "step": 1212 + }, + { + "epoch": 0.01, + "grad_norm": 77.30247003920213, + "learning_rate": 4.869739478957916e-06, + "loss": 2.5246, + "step": 1215 + }, + { + "epoch": 0.01, + "grad_norm": 118.97938905218146, + "learning_rate": 4.881763527054108e-06, + "loss": 3.5246, + "step": 1218 + }, + { + "epoch": 0.01, + "grad_norm": 107.12807152280953, + "learning_rate": 4.893787575150301e-06, + "loss": 3.3958, + "step": 1221 + }, + { + "epoch": 0.01, + "grad_norm": 137.5312068327304, + "learning_rate": 4.905811623246493e-06, + "loss": 3.9684, + "step": 1224 + }, + { + "epoch": 0.01, + "grad_norm": 90.65531863120694, + "learning_rate": 4.917835671342686e-06, + "loss": 3.3669, + "step": 1227 + }, + { + "epoch": 0.01, + "grad_norm": 150.71871642946314, + "learning_rate": 4.929859719438878e-06, + "loss": 3.5335, + "step": 1230 + }, + { + "epoch": 0.01, + "grad_norm": 90.84239537670638, + "learning_rate": 4.941883767535071e-06, + "loss": 3.0881, + "step": 1233 + }, + { + "epoch": 0.01, + "grad_norm": 170.32699286655455, + "learning_rate": 4.953907815631263e-06, + "loss": 3.772, + "step": 1236 + }, + { + "epoch": 0.01, + "grad_norm": 98.02261077977747, + "learning_rate": 4.965931863727455e-06, + "loss": 3.4358, + "step": 1239 + }, + { + "epoch": 0.01, + "grad_norm": 87.90763525938776, + "learning_rate": 4.9779559118236475e-06, + "loss": 2.4917, + "step": 1242 + }, + { + "epoch": 0.01, + "grad_norm": 135.93837744148243, + "learning_rate": 4.98997995991984e-06, + "loss": 2.9473, + "step": 1245 + }, + { + "epoch": 0.02, + "grad_norm": 58.208941547068484, + "learning_rate": 5.0020040080160325e-06, + "loss": 3.2921, + "step": 1248 + }, + { + "epoch": 0.02, + "grad_norm": 134.10330813431906, + "learning_rate": 5.014028056112224e-06, + "loss": 3.2342, + "step": 1251 + }, + { + "epoch": 0.02, + "grad_norm": 132.7853510944989, + "learning_rate": 5.0260521042084175e-06, + "loss": 3.3815, + "step": 1254 + }, + { + "epoch": 0.02, + "grad_norm": 152.04973143811927, + "learning_rate": 5.03807615230461e-06, + "loss": 3.4994, + "step": 1257 + }, + { + "epoch": 0.02, + "grad_norm": 77.85252218652438, + "learning_rate": 5.050100200400802e-06, + "loss": 2.6968, + "step": 1260 + }, + { + "epoch": 0.02, + "grad_norm": 74.85925016328052, + "learning_rate": 5.062124248496995e-06, + "loss": 2.2319, + "step": 1263 + }, + { + "epoch": 0.02, + "grad_norm": 182.59354623137418, + "learning_rate": 5.074148296593187e-06, + "loss": 2.7671, + "step": 1266 + }, + { + "epoch": 0.02, + "grad_norm": 62.147377058283524, + "learning_rate": 5.08617234468938e-06, + "loss": 2.8088, + "step": 1269 + }, + { + "epoch": 0.02, + "grad_norm": 90.90105407577688, + "learning_rate": 5.098196392785572e-06, + "loss": 2.6388, + "step": 1272 + }, + { + "epoch": 0.02, + "grad_norm": 137.07281593522973, + "learning_rate": 5.110220440881763e-06, + "loss": 3.2898, + "step": 1275 + }, + { + "epoch": 0.02, + "grad_norm": 75.50117115682987, + "learning_rate": 5.122244488977957e-06, + "loss": 3.4162, + "step": 1278 + }, + { + "epoch": 0.02, + "grad_norm": 286.7665851470943, + "learning_rate": 5.134268537074148e-06, + "loss": 3.1901, + "step": 1281 + }, + { + "epoch": 0.02, + "grad_norm": 28.66601128910053, + "learning_rate": 5.1462925851703416e-06, + "loss": 3.2131, + "step": 1284 + }, + { + "epoch": 0.02, + "grad_norm": 1500.121825052642, + "learning_rate": 5.158316633266533e-06, + "loss": 3.2843, + "step": 1287 + }, + { + "epoch": 0.02, + "grad_norm": 80.1364140359042, + "learning_rate": 5.170340681362726e-06, + "loss": 3.5378, + "step": 1290 + }, + { + "epoch": 0.02, + "grad_norm": 191.56931289780508, + "learning_rate": 5.182364729458918e-06, + "loss": 3.0766, + "step": 1293 + }, + { + "epoch": 0.02, + "grad_norm": 83.90462245869212, + "learning_rate": 5.194388777555111e-06, + "loss": 3.4303, + "step": 1296 + }, + { + "epoch": 0.02, + "grad_norm": 419.6436536413931, + "learning_rate": 5.206412825651303e-06, + "loss": 3.4026, + "step": 1299 + }, + { + "epoch": 0.02, + "grad_norm": 47.62703536381855, + "learning_rate": 5.218436873747496e-06, + "loss": 3.3901, + "step": 1302 + }, + { + "epoch": 0.02, + "grad_norm": 118.92526272717755, + "learning_rate": 5.230460921843687e-06, + "loss": 3.1895, + "step": 1305 + }, + { + "epoch": 0.02, + "grad_norm": 218.56087085775675, + "learning_rate": 5.242484969939881e-06, + "loss": 2.749, + "step": 1308 + }, + { + "epoch": 0.02, + "grad_norm": 137.09062180161862, + "learning_rate": 5.254509018036072e-06, + "loss": 2.6322, + "step": 1311 + }, + { + "epoch": 0.02, + "grad_norm": 177.41629432807036, + "learning_rate": 5.266533066132265e-06, + "loss": 3.048, + "step": 1314 + }, + { + "epoch": 0.02, + "grad_norm": 115.4149280200188, + "learning_rate": 5.278557114228457e-06, + "loss": 3.5104, + "step": 1317 + }, + { + "epoch": 0.02, + "grad_norm": 425.89826457198217, + "learning_rate": 5.29058116232465e-06, + "loss": 3.0371, + "step": 1320 + }, + { + "epoch": 0.02, + "grad_norm": 46.142747046509996, + "learning_rate": 5.302605210420842e-06, + "loss": 3.5633, + "step": 1323 + }, + { + "epoch": 0.02, + "grad_norm": 290.80504138656187, + "learning_rate": 5.314629258517035e-06, + "loss": 2.6891, + "step": 1326 + }, + { + "epoch": 0.02, + "grad_norm": 85.52007667026322, + "learning_rate": 5.3266533066132264e-06, + "loss": 3.152, + "step": 1329 + }, + { + "epoch": 0.02, + "grad_norm": 89.11044848086871, + "learning_rate": 5.33867735470942e-06, + "loss": 3.1056, + "step": 1332 + }, + { + "epoch": 0.02, + "grad_norm": 102.57786367512801, + "learning_rate": 5.350701402805611e-06, + "loss": 3.2852, + "step": 1335 + }, + { + "epoch": 0.02, + "grad_norm": 136.57432691956066, + "learning_rate": 5.362725450901805e-06, + "loss": 2.6038, + "step": 1338 + }, + { + "epoch": 0.02, + "grad_norm": 257.8217558011037, + "learning_rate": 5.374749498997996e-06, + "loss": 2.6895, + "step": 1341 + }, + { + "epoch": 0.02, + "grad_norm": 97.93830344807986, + "learning_rate": 5.386773547094188e-06, + "loss": 2.9697, + "step": 1344 + }, + { + "epoch": 0.02, + "grad_norm": 136.5648526067958, + "learning_rate": 5.398797595190381e-06, + "loss": 3.2728, + "step": 1347 + }, + { + "epoch": 0.02, + "grad_norm": 160.12171417072287, + "learning_rate": 5.410821643286573e-06, + "loss": 2.812, + "step": 1350 + }, + { + "epoch": 0.02, + "grad_norm": 124.49105680471506, + "learning_rate": 5.422845691382766e-06, + "loss": 3.1955, + "step": 1353 + }, + { + "epoch": 0.02, + "grad_norm": 86.22823408791577, + "learning_rate": 5.434869739478959e-06, + "loss": 2.5579, + "step": 1356 + }, + { + "epoch": 0.02, + "grad_norm": 104.60711214245012, + "learning_rate": 5.4468937875751505e-06, + "loss": 3.3187, + "step": 1359 + }, + { + "epoch": 0.02, + "grad_norm": 114.2604027078665, + "learning_rate": 5.458917835671344e-06, + "loss": 2.8151, + "step": 1362 + }, + { + "epoch": 0.02, + "grad_norm": 29.297701168279946, + "learning_rate": 5.4709418837675355e-06, + "loss": 2.582, + "step": 1365 + }, + { + "epoch": 0.02, + "grad_norm": 101.27833562235766, + "learning_rate": 5.482965931863727e-06, + "loss": 4.0378, + "step": 1368 + }, + { + "epoch": 0.02, + "grad_norm": 305.33954243939354, + "learning_rate": 5.4949899799599205e-06, + "loss": 2.8478, + "step": 1371 + }, + { + "epoch": 0.02, + "grad_norm": 108.46048432759764, + "learning_rate": 5.507014028056112e-06, + "loss": 3.2673, + "step": 1374 + }, + { + "epoch": 0.02, + "grad_norm": 81.4886844371639, + "learning_rate": 5.5190380761523055e-06, + "loss": 3.085, + "step": 1377 + }, + { + "epoch": 0.02, + "grad_norm": 170.31308651871637, + "learning_rate": 5.531062124248497e-06, + "loss": 3.5664, + "step": 1380 + }, + { + "epoch": 0.02, + "grad_norm": 93.31579351205848, + "learning_rate": 5.54308617234469e-06, + "loss": 3.4224, + "step": 1383 + }, + { + "epoch": 0.02, + "grad_norm": 103.63142918750007, + "learning_rate": 5.555110220440882e-06, + "loss": 2.5846, + "step": 1386 + }, + { + "epoch": 0.02, + "grad_norm": 249.45254717718737, + "learning_rate": 5.567134268537075e-06, + "loss": 2.5367, + "step": 1389 + }, + { + "epoch": 0.02, + "grad_norm": 43.59744211892179, + "learning_rate": 5.579158316633267e-06, + "loss": 3.3012, + "step": 1392 + }, + { + "epoch": 0.02, + "grad_norm": 177.08240454429415, + "learning_rate": 5.59118236472946e-06, + "loss": 3.0113, + "step": 1395 + }, + { + "epoch": 0.02, + "grad_norm": 152.39278366381998, + "learning_rate": 5.603206412825651e-06, + "loss": 2.4985, + "step": 1398 + }, + { + "epoch": 0.02, + "grad_norm": 114.44800821266158, + "learning_rate": 5.615230460921845e-06, + "loss": 2.382, + "step": 1401 + }, + { + "epoch": 0.02, + "grad_norm": 198.93207239107542, + "learning_rate": 5.627254509018036e-06, + "loss": 2.4652, + "step": 1404 + }, + { + "epoch": 0.02, + "grad_norm": 122.23249056632383, + "learning_rate": 5.6392785571142296e-06, + "loss": 2.4183, + "step": 1407 + }, + { + "epoch": 0.02, + "grad_norm": 226.83233688934982, + "learning_rate": 5.651302605210421e-06, + "loss": 3.3407, + "step": 1410 + }, + { + "epoch": 0.02, + "grad_norm": 112.19713145637408, + "learning_rate": 5.663326653306614e-06, + "loss": 3.2275, + "step": 1413 + }, + { + "epoch": 0.02, + "grad_norm": 43.12804775323655, + "learning_rate": 5.675350701402806e-06, + "loss": 2.6086, + "step": 1416 + }, + { + "epoch": 0.02, + "grad_norm": 149.73832861017993, + "learning_rate": 5.687374749498999e-06, + "loss": 2.6676, + "step": 1419 + }, + { + "epoch": 0.02, + "grad_norm": 107.09039479908697, + "learning_rate": 5.69939879759519e-06, + "loss": 3.3363, + "step": 1422 + }, + { + "epoch": 0.02, + "grad_norm": 133.89077397685392, + "learning_rate": 5.711422845691384e-06, + "loss": 2.7614, + "step": 1425 + }, + { + "epoch": 0.02, + "grad_norm": 31.872510943345944, + "learning_rate": 5.723446893787575e-06, + "loss": 3.0522, + "step": 1428 + }, + { + "epoch": 0.02, + "grad_norm": 128.44451751024442, + "learning_rate": 5.735470941883769e-06, + "loss": 2.774, + "step": 1431 + }, + { + "epoch": 0.02, + "grad_norm": 128.76662910941562, + "learning_rate": 5.74749498997996e-06, + "loss": 2.882, + "step": 1434 + }, + { + "epoch": 0.02, + "grad_norm": 115.46937556703506, + "learning_rate": 5.759519038076152e-06, + "loss": 2.9019, + "step": 1437 + }, + { + "epoch": 0.02, + "grad_norm": 193.3760101134998, + "learning_rate": 5.771543086172345e-06, + "loss": 3.0827, + "step": 1440 + }, + { + "epoch": 0.02, + "grad_norm": 47.05089718851517, + "learning_rate": 5.783567134268537e-06, + "loss": 2.5785, + "step": 1443 + }, + { + "epoch": 0.02, + "grad_norm": 69.58347313666718, + "learning_rate": 5.79559118236473e-06, + "loss": 3.2347, + "step": 1446 + }, + { + "epoch": 0.02, + "grad_norm": 52.52856425548187, + "learning_rate": 5.807615230460922e-06, + "loss": 2.7257, + "step": 1449 + }, + { + "epoch": 0.02, + "grad_norm": 76.27141045735381, + "learning_rate": 5.8196392785571144e-06, + "loss": 2.101, + "step": 1452 + }, + { + "epoch": 0.02, + "grad_norm": 155.089506544711, + "learning_rate": 5.831663326653308e-06, + "loss": 2.775, + "step": 1455 + }, + { + "epoch": 0.02, + "grad_norm": 116.80248939599109, + "learning_rate": 5.8436873747494994e-06, + "loss": 3.2373, + "step": 1458 + }, + { + "epoch": 0.02, + "grad_norm": 127.86847575714636, + "learning_rate": 5.855711422845693e-06, + "loss": 2.9593, + "step": 1461 + }, + { + "epoch": 0.02, + "grad_norm": 54.01420251300528, + "learning_rate": 5.867735470941884e-06, + "loss": 2.6455, + "step": 1464 + }, + { + "epoch": 0.02, + "grad_norm": 173.70471378014463, + "learning_rate": 5.879759519038076e-06, + "loss": 3.1888, + "step": 1467 + }, + { + "epoch": 0.02, + "grad_norm": 100.73669388603457, + "learning_rate": 5.891783567134269e-06, + "loss": 2.7118, + "step": 1470 + }, + { + "epoch": 0.02, + "grad_norm": 73.73830997770878, + "learning_rate": 5.903807615230461e-06, + "loss": 2.9598, + "step": 1473 + }, + { + "epoch": 0.02, + "grad_norm": 110.04191131176292, + "learning_rate": 5.9158316633266535e-06, + "loss": 3.3081, + "step": 1476 + }, + { + "epoch": 0.02, + "grad_norm": 63.91426320707769, + "learning_rate": 5.927855711422846e-06, + "loss": 2.6683, + "step": 1479 + }, + { + "epoch": 0.02, + "grad_norm": 60.55446514409336, + "learning_rate": 5.9398797595190385e-06, + "loss": 3.1401, + "step": 1482 + }, + { + "epoch": 0.02, + "grad_norm": 164.68195113782568, + "learning_rate": 5.951903807615231e-06, + "loss": 3.0514, + "step": 1485 + }, + { + "epoch": 0.02, + "grad_norm": 492.29506002292993, + "learning_rate": 5.9639278557114235e-06, + "loss": 2.3749, + "step": 1488 + }, + { + "epoch": 0.02, + "grad_norm": 124.18524334431103, + "learning_rate": 5.975951903807615e-06, + "loss": 2.91, + "step": 1491 + }, + { + "epoch": 0.02, + "grad_norm": 190.17083051014902, + "learning_rate": 5.9879759519038085e-06, + "loss": 3.4333, + "step": 1494 + }, + { + "epoch": 0.02, + "grad_norm": 121.23534535522892, + "learning_rate": 6e-06, + "loss": 2.5809, + "step": 1497 + }, + { + "epoch": 0.02, + "grad_norm": 138.2979051303471, + "learning_rate": 6.0120240480961935e-06, + "loss": 3.1235, + "step": 1500 + }, + { + "epoch": 0.02, + "grad_norm": 88.11065567592635, + "learning_rate": 6.024048096192385e-06, + "loss": 3.0778, + "step": 1503 + }, + { + "epoch": 0.02, + "grad_norm": 121.15471931275187, + "learning_rate": 6.036072144288578e-06, + "loss": 3.3044, + "step": 1506 + }, + { + "epoch": 0.02, + "grad_norm": 184.01052305537848, + "learning_rate": 6.04809619238477e-06, + "loss": 2.9851, + "step": 1509 + }, + { + "epoch": 0.02, + "grad_norm": 116.8555627338207, + "learning_rate": 6.060120240480963e-06, + "loss": 2.868, + "step": 1512 + }, + { + "epoch": 0.02, + "grad_norm": 1303.9392097933758, + "learning_rate": 6.072144288577155e-06, + "loss": 2.7049, + "step": 1515 + }, + { + "epoch": 0.02, + "grad_norm": 594.8922530035271, + "learning_rate": 6.084168336673348e-06, + "loss": 3.7241, + "step": 1518 + }, + { + "epoch": 0.02, + "grad_norm": 42.876350761661364, + "learning_rate": 6.096192384769539e-06, + "loss": 3.429, + "step": 1521 + }, + { + "epoch": 0.02, + "grad_norm": 141.8050999515628, + "learning_rate": 6.108216432865733e-06, + "loss": 2.7463, + "step": 1524 + }, + { + "epoch": 0.02, + "grad_norm": 191.7772464023002, + "learning_rate": 6.120240480961924e-06, + "loss": 2.8669, + "step": 1527 + }, + { + "epoch": 0.02, + "grad_norm": 38.32658780658705, + "learning_rate": 6.132264529058116e-06, + "loss": 2.3705, + "step": 1530 + }, + { + "epoch": 0.02, + "grad_norm": 37.91498526549936, + "learning_rate": 6.144288577154309e-06, + "loss": 2.9102, + "step": 1533 + }, + { + "epoch": 0.02, + "grad_norm": 142.5096245575311, + "learning_rate": 6.156312625250501e-06, + "loss": 2.796, + "step": 1536 + }, + { + "epoch": 0.02, + "grad_norm": 249.37960821312348, + "learning_rate": 6.168336673346694e-06, + "loss": 3.1439, + "step": 1539 + }, + { + "epoch": 0.02, + "grad_norm": 481.0312384843851, + "learning_rate": 6.180360721442886e-06, + "loss": 2.8179, + "step": 1542 + }, + { + "epoch": 0.02, + "grad_norm": 83.09011222249877, + "learning_rate": 6.192384769539078e-06, + "loss": 3.389, + "step": 1545 + }, + { + "epoch": 0.02, + "grad_norm": 32.189208024424836, + "learning_rate": 6.204408817635271e-06, + "loss": 2.3971, + "step": 1548 + }, + { + "epoch": 0.02, + "grad_norm": 125.72771306366275, + "learning_rate": 6.216432865731463e-06, + "loss": 2.406, + "step": 1551 + }, + { + "epoch": 0.02, + "grad_norm": 108.50142949643288, + "learning_rate": 6.228456913827657e-06, + "loss": 2.9005, + "step": 1554 + }, + { + "epoch": 0.02, + "grad_norm": 83.2791343079443, + "learning_rate": 6.240480961923848e-06, + "loss": 2.9862, + "step": 1557 + }, + { + "epoch": 0.02, + "grad_norm": 59.02132755966351, + "learning_rate": 6.25250501002004e-06, + "loss": 2.2313, + "step": 1560 + }, + { + "epoch": 0.02, + "grad_norm": 56.65769725822971, + "learning_rate": 6.264529058116233e-06, + "loss": 2.8108, + "step": 1563 + }, + { + "epoch": 0.02, + "grad_norm": 69.9709206779411, + "learning_rate": 6.276553106212425e-06, + "loss": 2.4869, + "step": 1566 + }, + { + "epoch": 0.02, + "grad_norm": 175.08394840126422, + "learning_rate": 6.288577154308618e-06, + "loss": 3.1229, + "step": 1569 + }, + { + "epoch": 0.02, + "grad_norm": 40.093621211640404, + "learning_rate": 6.30060120240481e-06, + "loss": 2.8091, + "step": 1572 + }, + { + "epoch": 0.02, + "grad_norm": 92.27710670164174, + "learning_rate": 6.3126252505010024e-06, + "loss": 3.0882, + "step": 1575 + }, + { + "epoch": 0.02, + "grad_norm": 77.41527993129141, + "learning_rate": 6.324649298597195e-06, + "loss": 2.8076, + "step": 1578 + }, + { + "epoch": 0.02, + "grad_norm": 100.14349675040837, + "learning_rate": 6.3366733466933874e-06, + "loss": 2.361, + "step": 1581 + }, + { + "epoch": 0.02, + "grad_norm": 60.06708886619837, + "learning_rate": 6.348697394789579e-06, + "loss": 3.2602, + "step": 1584 + }, + { + "epoch": 0.02, + "grad_norm": 93.26033922806253, + "learning_rate": 6.360721442885772e-06, + "loss": 2.9288, + "step": 1587 + }, + { + "epoch": 0.02, + "grad_norm": 197.96291435752832, + "learning_rate": 6.372745490981964e-06, + "loss": 2.2946, + "step": 1590 + }, + { + "epoch": 0.02, + "grad_norm": 54.32219668297205, + "learning_rate": 6.384769539078157e-06, + "loss": 3.2832, + "step": 1593 + }, + { + "epoch": 0.02, + "grad_norm": 76.18016288510032, + "learning_rate": 6.396793587174349e-06, + "loss": 2.7693, + "step": 1596 + }, + { + "epoch": 0.02, + "grad_norm": 143.8716489246768, + "learning_rate": 6.4088176352705416e-06, + "loss": 2.6569, + "step": 1599 + }, + { + "epoch": 0.02, + "grad_norm": 250.88403955165302, + "learning_rate": 6.420841683366734e-06, + "loss": 2.6335, + "step": 1602 + }, + { + "epoch": 0.02, + "grad_norm": 629.2726912787767, + "learning_rate": 6.4328657314629265e-06, + "loss": 2.3742, + "step": 1605 + }, + { + "epoch": 0.02, + "grad_norm": 70.79674285844187, + "learning_rate": 6.444889779559119e-06, + "loss": 2.5183, + "step": 1608 + }, + { + "epoch": 0.02, + "grad_norm": 283.3366554869203, + "learning_rate": 6.4569138276553115e-06, + "loss": 2.9189, + "step": 1611 + }, + { + "epoch": 0.02, + "grad_norm": 72.86293676157051, + "learning_rate": 6.468937875751503e-06, + "loss": 2.3747, + "step": 1614 + }, + { + "epoch": 0.02, + "grad_norm": 79.63256611933431, + "learning_rate": 6.4809619238476965e-06, + "loss": 2.3494, + "step": 1617 + }, + { + "epoch": 0.02, + "grad_norm": 60.38018752550871, + "learning_rate": 6.492985971943888e-06, + "loss": 2.3725, + "step": 1620 + }, + { + "epoch": 0.02, + "grad_norm": 79.75683907064473, + "learning_rate": 6.5050100200400815e-06, + "loss": 2.9727, + "step": 1623 + }, + { + "epoch": 0.02, + "grad_norm": 101.57114359216581, + "learning_rate": 6.517034068136273e-06, + "loss": 2.9793, + "step": 1626 + }, + { + "epoch": 0.02, + "grad_norm": 32.01794322071198, + "learning_rate": 6.529058116232465e-06, + "loss": 2.2819, + "step": 1629 + }, + { + "epoch": 0.02, + "grad_norm": 88.83381951506398, + "learning_rate": 6.541082164328658e-06, + "loss": 2.6642, + "step": 1632 + }, + { + "epoch": 0.02, + "grad_norm": 79.67379954979364, + "learning_rate": 6.55310621242485e-06, + "loss": 2.3831, + "step": 1635 + }, + { + "epoch": 0.02, + "grad_norm": 81.01948362320267, + "learning_rate": 6.565130260521042e-06, + "loss": 2.7762, + "step": 1638 + }, + { + "epoch": 0.02, + "grad_norm": 116.44615703272481, + "learning_rate": 6.577154308617235e-06, + "loss": 2.9351, + "step": 1641 + }, + { + "epoch": 0.02, + "grad_norm": 120.36321329167835, + "learning_rate": 6.589178356713427e-06, + "loss": 2.513, + "step": 1644 + }, + { + "epoch": 0.02, + "grad_norm": 80.64826517755635, + "learning_rate": 6.60120240480962e-06, + "loss": 2.421, + "step": 1647 + }, + { + "epoch": 0.02, + "grad_norm": 32.58676804425949, + "learning_rate": 6.613226452905812e-06, + "loss": 2.8144, + "step": 1650 + }, + { + "epoch": 0.02, + "grad_norm": 142.84852564668734, + "learning_rate": 6.625250501002004e-06, + "loss": 2.1571, + "step": 1653 + }, + { + "epoch": 0.02, + "grad_norm": 415.68414264872507, + "learning_rate": 6.637274549098197e-06, + "loss": 2.4508, + "step": 1656 + }, + { + "epoch": 0.02, + "grad_norm": 116.54755215282042, + "learning_rate": 6.649298597194389e-06, + "loss": 2.6681, + "step": 1659 + }, + { + "epoch": 0.02, + "grad_norm": 99.55229853071744, + "learning_rate": 6.661322645290582e-06, + "loss": 2.9515, + "step": 1662 + }, + { + "epoch": 0.02, + "grad_norm": 61.487893616171185, + "learning_rate": 6.673346693386774e-06, + "loss": 2.4326, + "step": 1665 + }, + { + "epoch": 0.02, + "grad_norm": 57.56551119255928, + "learning_rate": 6.685370741482966e-06, + "loss": 2.6361, + "step": 1668 + }, + { + "epoch": 0.02, + "grad_norm": 85.90768797530282, + "learning_rate": 6.697394789579159e-06, + "loss": 3.26, + "step": 1671 + }, + { + "epoch": 0.02, + "grad_norm": 62.239969036750885, + "learning_rate": 6.709418837675351e-06, + "loss": 2.7474, + "step": 1674 + }, + { + "epoch": 0.02, + "grad_norm": 78.76359035818213, + "learning_rate": 6.721442885771544e-06, + "loss": 2.8786, + "step": 1677 + }, + { + "epoch": 0.02, + "grad_norm": 37.62407924793826, + "learning_rate": 6.733466933867736e-06, + "loss": 2.3911, + "step": 1680 + }, + { + "epoch": 0.02, + "grad_norm": 123.07610740465812, + "learning_rate": 6.745490981963928e-06, + "loss": 2.4805, + "step": 1683 + }, + { + "epoch": 0.02, + "grad_norm": 61.37754531242168, + "learning_rate": 6.757515030060121e-06, + "loss": 2.6247, + "step": 1686 + }, + { + "epoch": 0.02, + "grad_norm": 58.17967743749942, + "learning_rate": 6.769539078156313e-06, + "loss": 2.5991, + "step": 1689 + }, + { + "epoch": 0.02, + "grad_norm": 120.20393948212907, + "learning_rate": 6.781563126252505e-06, + "loss": 2.3784, + "step": 1692 + }, + { + "epoch": 0.02, + "grad_norm": 111.98010651507111, + "learning_rate": 6.793587174348698e-06, + "loss": 2.6408, + "step": 1695 + }, + { + "epoch": 0.02, + "grad_norm": 33.48945534437149, + "learning_rate": 6.8056112224448905e-06, + "loss": 2.8109, + "step": 1698 + }, + { + "epoch": 0.02, + "grad_norm": 28.815477001747585, + "learning_rate": 6.817635270541083e-06, + "loss": 2.6684, + "step": 1701 + }, + { + "epoch": 0.02, + "grad_norm": 128.57289617557885, + "learning_rate": 6.8296593186372754e-06, + "loss": 2.8215, + "step": 1704 + }, + { + "epoch": 0.02, + "grad_norm": 351.3696609760529, + "learning_rate": 6.841683366733467e-06, + "loss": 2.5466, + "step": 1707 + }, + { + "epoch": 0.02, + "grad_norm": 68.60928033894297, + "learning_rate": 6.8537074148296604e-06, + "loss": 2.7964, + "step": 1710 + }, + { + "epoch": 0.02, + "grad_norm": 58.217783925010266, + "learning_rate": 6.865731462925852e-06, + "loss": 2.577, + "step": 1713 + }, + { + "epoch": 0.02, + "grad_norm": 54.77151656999984, + "learning_rate": 6.877755511022045e-06, + "loss": 2.6007, + "step": 1716 + }, + { + "epoch": 0.02, + "grad_norm": 69.26718123097524, + "learning_rate": 6.889779559118237e-06, + "loss": 2.9369, + "step": 1719 + }, + { + "epoch": 0.02, + "grad_norm": 110.28047456905756, + "learning_rate": 6.901803607214429e-06, + "loss": 2.1828, + "step": 1722 + }, + { + "epoch": 0.02, + "grad_norm": 72.53137317823149, + "learning_rate": 6.913827655310622e-06, + "loss": 2.1809, + "step": 1725 + }, + { + "epoch": 0.02, + "grad_norm": 53.53495716667007, + "learning_rate": 6.925851703406814e-06, + "loss": 2.626, + "step": 1728 + }, + { + "epoch": 0.02, + "grad_norm": 52.8105785742068, + "learning_rate": 6.937875751503007e-06, + "loss": 2.4863, + "step": 1731 + }, + { + "epoch": 0.02, + "grad_norm": 36.47283743365225, + "learning_rate": 6.949899799599199e-06, + "loss": 3.1857, + "step": 1734 + }, + { + "epoch": 0.02, + "grad_norm": 44.77718832146034, + "learning_rate": 6.961923847695391e-06, + "loss": 2.4849, + "step": 1737 + }, + { + "epoch": 0.02, + "grad_norm": 93.9843647391874, + "learning_rate": 6.973947895791584e-06, + "loss": 2.3599, + "step": 1740 + }, + { + "epoch": 0.02, + "grad_norm": 50.226659691560386, + "learning_rate": 6.985971943887776e-06, + "loss": 2.6825, + "step": 1743 + }, + { + "epoch": 0.02, + "grad_norm": 59.98255303397829, + "learning_rate": 6.997995991983969e-06, + "loss": 3.181, + "step": 1746 + }, + { + "epoch": 0.02, + "grad_norm": 173.97689329054072, + "learning_rate": 7.010020040080161e-06, + "loss": 2.8649, + "step": 1749 + }, + { + "epoch": 0.02, + "grad_norm": 36.092404515982366, + "learning_rate": 7.022044088176353e-06, + "loss": 2.5256, + "step": 1752 + }, + { + "epoch": 0.02, + "grad_norm": 39.90570707279552, + "learning_rate": 7.034068136272546e-06, + "loss": 3.0319, + "step": 1755 + }, + { + "epoch": 0.02, + "grad_norm": 66.89866600829885, + "learning_rate": 7.046092184368738e-06, + "loss": 2.304, + "step": 1758 + }, + { + "epoch": 0.02, + "grad_norm": 59.60501052062021, + "learning_rate": 7.05811623246493e-06, + "loss": 2.292, + "step": 1761 + }, + { + "epoch": 0.02, + "grad_norm": 64.8144800747144, + "learning_rate": 7.070140280561123e-06, + "loss": 2.1706, + "step": 1764 + }, + { + "epoch": 0.02, + "grad_norm": 30.37395835039448, + "learning_rate": 7.082164328657315e-06, + "loss": 2.8394, + "step": 1767 + }, + { + "epoch": 0.02, + "grad_norm": 48.71093503642437, + "learning_rate": 7.094188376753508e-06, + "loss": 2.6768, + "step": 1770 + }, + { + "epoch": 0.02, + "grad_norm": 45.78622607492507, + "learning_rate": 7.1062124248497e-06, + "loss": 2.4152, + "step": 1773 + }, + { + "epoch": 0.02, + "grad_norm": 96.61779556624855, + "learning_rate": 7.118236472945892e-06, + "loss": 2.6031, + "step": 1776 + }, + { + "epoch": 0.02, + "grad_norm": 100.38067326212682, + "learning_rate": 7.130260521042085e-06, + "loss": 2.8478, + "step": 1779 + }, + { + "epoch": 0.02, + "grad_norm": 27.55988213283687, + "learning_rate": 7.142284569138277e-06, + "loss": 2.916, + "step": 1782 + }, + { + "epoch": 0.02, + "grad_norm": 25.487501643722716, + "learning_rate": 7.15430861723447e-06, + "loss": 2.2427, + "step": 1785 + }, + { + "epoch": 0.02, + "grad_norm": 95.06487618485231, + "learning_rate": 7.166332665330662e-06, + "loss": 2.5158, + "step": 1788 + }, + { + "epoch": 0.02, + "grad_norm": 56.24421548987991, + "learning_rate": 7.1783567134268535e-06, + "loss": 2.0557, + "step": 1791 + }, + { + "epoch": 0.02, + "grad_norm": 35.49065873293199, + "learning_rate": 7.190380761523047e-06, + "loss": 2.4803, + "step": 1794 + }, + { + "epoch": 0.02, + "grad_norm": 36.7153831205022, + "learning_rate": 7.202404809619239e-06, + "loss": 1.9997, + "step": 1797 + }, + { + "epoch": 0.02, + "grad_norm": 133.24977687879615, + "learning_rate": 7.214428857715432e-06, + "loss": 3.3478, + "step": 1800 + }, + { + "epoch": 0.02, + "grad_norm": 55.005379023589214, + "learning_rate": 7.226452905811624e-06, + "loss": 2.2036, + "step": 1803 + }, + { + "epoch": 0.02, + "grad_norm": 31.11319955414505, + "learning_rate": 7.238476953907816e-06, + "loss": 2.9149, + "step": 1806 + }, + { + "epoch": 0.02, + "grad_norm": 35.63313520491819, + "learning_rate": 7.250501002004009e-06, + "loss": 2.2552, + "step": 1809 + }, + { + "epoch": 0.02, + "grad_norm": 19.94503375253597, + "learning_rate": 7.262525050100201e-06, + "loss": 2.684, + "step": 1812 + }, + { + "epoch": 0.02, + "grad_norm": 30.419988687668514, + "learning_rate": 7.274549098196393e-06, + "loss": 2.327, + "step": 1815 + }, + { + "epoch": 0.02, + "grad_norm": 49.289412592104206, + "learning_rate": 7.286573146292586e-06, + "loss": 2.6771, + "step": 1818 + }, + { + "epoch": 0.02, + "grad_norm": 27.274767933614047, + "learning_rate": 7.298597194388778e-06, + "loss": 2.5256, + "step": 1821 + }, + { + "epoch": 0.02, + "grad_norm": 30.797323138871818, + "learning_rate": 7.310621242484971e-06, + "loss": 2.7827, + "step": 1824 + }, + { + "epoch": 0.02, + "grad_norm": 147.71182877240028, + "learning_rate": 7.322645290581163e-06, + "loss": 2.3022, + "step": 1827 + }, + { + "epoch": 0.02, + "grad_norm": 36.66099386456063, + "learning_rate": 7.334669338677355e-06, + "loss": 2.7567, + "step": 1830 + }, + { + "epoch": 0.02, + "grad_norm": 49.15416544010538, + "learning_rate": 7.346693386773548e-06, + "loss": 2.4954, + "step": 1833 + }, + { + "epoch": 0.02, + "grad_norm": 39.108294969000866, + "learning_rate": 7.35871743486974e-06, + "loss": 2.616, + "step": 1836 + }, + { + "epoch": 0.02, + "grad_norm": 111.45046680474147, + "learning_rate": 7.370741482965933e-06, + "loss": 2.6297, + "step": 1839 + }, + { + "epoch": 0.02, + "grad_norm": 156.4955483468562, + "learning_rate": 7.382765531062125e-06, + "loss": 3.2179, + "step": 1842 + }, + { + "epoch": 0.02, + "grad_norm": 23.868779709507496, + "learning_rate": 7.394789579158317e-06, + "loss": 2.382, + "step": 1845 + }, + { + "epoch": 0.02, + "grad_norm": 46.94059549026152, + "learning_rate": 7.40681362725451e-06, + "loss": 2.7368, + "step": 1848 + }, + { + "epoch": 0.02, + "grad_norm": 20.096900366972424, + "learning_rate": 7.418837675350702e-06, + "loss": 2.2161, + "step": 1851 + }, + { + "epoch": 0.02, + "grad_norm": 29.676865608493088, + "learning_rate": 7.430861723446895e-06, + "loss": 2.561, + "step": 1854 + }, + { + "epoch": 0.02, + "grad_norm": 26.08536873608454, + "learning_rate": 7.442885771543087e-06, + "loss": 2.9329, + "step": 1857 + }, + { + "epoch": 0.02, + "grad_norm": 123.0880580052061, + "learning_rate": 7.454909819639279e-06, + "loss": 2.6652, + "step": 1860 + }, + { + "epoch": 0.02, + "grad_norm": 111.97554223720219, + "learning_rate": 7.466933867735472e-06, + "loss": 2.575, + "step": 1863 + }, + { + "epoch": 0.02, + "grad_norm": 56.910881286703045, + "learning_rate": 7.478957915831664e-06, + "loss": 2.4767, + "step": 1866 + }, + { + "epoch": 0.02, + "grad_norm": 14.312153863227392, + "learning_rate": 7.490981963927856e-06, + "loss": 2.3075, + "step": 1869 + }, + { + "epoch": 0.02, + "grad_norm": 10.460664437604196, + "learning_rate": 7.503006012024049e-06, + "loss": 2.6187, + "step": 1872 + }, + { + "epoch": 0.02, + "grad_norm": 47.83079556444467, + "learning_rate": 7.515030060120241e-06, + "loss": 2.6797, + "step": 1875 + }, + { + "epoch": 0.02, + "grad_norm": 13.815740631003074, + "learning_rate": 7.527054108216434e-06, + "loss": 2.2307, + "step": 1878 + }, + { + "epoch": 0.02, + "grad_norm": 21.593139313198773, + "learning_rate": 7.539078156312626e-06, + "loss": 2.9386, + "step": 1881 + }, + { + "epoch": 0.02, + "grad_norm": 24.102720612828005, + "learning_rate": 7.5511022044088174e-06, + "loss": 2.4053, + "step": 1884 + }, + { + "epoch": 0.02, + "grad_norm": 49.94840875584802, + "learning_rate": 7.563126252505011e-06, + "loss": 3.2321, + "step": 1887 + }, + { + "epoch": 0.02, + "grad_norm": 50.08980047227339, + "learning_rate": 7.5751503006012024e-06, + "loss": 2.7244, + "step": 1890 + }, + { + "epoch": 0.02, + "grad_norm": 20.29079342044745, + "learning_rate": 7.587174348697396e-06, + "loss": 2.4767, + "step": 1893 + }, + { + "epoch": 0.02, + "grad_norm": 21.234239041960915, + "learning_rate": 7.599198396793587e-06, + "loss": 2.3374, + "step": 1896 + }, + { + "epoch": 0.02, + "grad_norm": 24.19799959165333, + "learning_rate": 7.61122244488978e-06, + "loss": 2.423, + "step": 1899 + }, + { + "epoch": 0.02, + "grad_norm": 16.961640821239282, + "learning_rate": 7.623246492985973e-06, + "loss": 2.9373, + "step": 1902 + }, + { + "epoch": 0.02, + "grad_norm": 38.918608246778746, + "learning_rate": 7.635270541082164e-06, + "loss": 2.9409, + "step": 1905 + }, + { + "epoch": 0.02, + "grad_norm": 39.278366157464575, + "learning_rate": 7.647294589178358e-06, + "loss": 2.2987, + "step": 1908 + }, + { + "epoch": 0.02, + "grad_norm": 14.645378537255777, + "learning_rate": 7.659318637274549e-06, + "loss": 2.5505, + "step": 1911 + }, + { + "epoch": 0.02, + "grad_norm": 7.913854688930644, + "learning_rate": 7.671342685370742e-06, + "loss": 2.4827, + "step": 1914 + }, + { + "epoch": 0.02, + "grad_norm": 19.293311357995407, + "learning_rate": 7.683366733466934e-06, + "loss": 2.9478, + "step": 1917 + }, + { + "epoch": 0.02, + "grad_norm": 43.7748214580343, + "learning_rate": 7.695390781563127e-06, + "loss": 2.5282, + "step": 1920 + }, + { + "epoch": 0.02, + "grad_norm": 34.25310494404407, + "learning_rate": 7.707414829659319e-06, + "loss": 2.533, + "step": 1923 + }, + { + "epoch": 0.02, + "grad_norm": 50.530033382955004, + "learning_rate": 7.719438877755512e-06, + "loss": 2.8652, + "step": 1926 + }, + { + "epoch": 0.02, + "grad_norm": 16.072740709533765, + "learning_rate": 7.731462925851704e-06, + "loss": 2.8296, + "step": 1929 + }, + { + "epoch": 0.02, + "grad_norm": 98.5657975660518, + "learning_rate": 7.743486973947896e-06, + "loss": 1.7914, + "step": 1932 + }, + { + "epoch": 0.02, + "grad_norm": 133.47047797931197, + "learning_rate": 7.755511022044089e-06, + "loss": 2.1764, + "step": 1935 + }, + { + "epoch": 0.02, + "grad_norm": 19.872637790082248, + "learning_rate": 7.767535070140281e-06, + "loss": 2.5278, + "step": 1938 + }, + { + "epoch": 0.02, + "grad_norm": 36.984187238446815, + "learning_rate": 7.779559118236474e-06, + "loss": 1.9861, + "step": 1941 + }, + { + "epoch": 0.02, + "grad_norm": 25.222021253462348, + "learning_rate": 7.791583166332666e-06, + "loss": 3.0083, + "step": 1944 + }, + { + "epoch": 0.02, + "grad_norm": 22.926086709519154, + "learning_rate": 7.803607214428859e-06, + "loss": 2.4847, + "step": 1947 + }, + { + "epoch": 0.02, + "grad_norm": 19.60569528610459, + "learning_rate": 7.815631262525051e-06, + "loss": 2.385, + "step": 1950 + }, + { + "epoch": 0.02, + "grad_norm": 12.68178599098023, + "learning_rate": 7.827655310621242e-06, + "loss": 1.6833, + "step": 1953 + }, + { + "epoch": 0.02, + "grad_norm": 28.83350940654805, + "learning_rate": 7.839679358717436e-06, + "loss": 2.1615, + "step": 1956 + }, + { + "epoch": 0.02, + "grad_norm": 63.7033311114423, + "learning_rate": 7.851703406813627e-06, + "loss": 2.4338, + "step": 1959 + }, + { + "epoch": 0.02, + "grad_norm": 146.31017570294716, + "learning_rate": 7.863727454909821e-06, + "loss": 2.4724, + "step": 1962 + }, + { + "epoch": 0.02, + "grad_norm": 26.330720400222294, + "learning_rate": 7.875751503006012e-06, + "loss": 2.0874, + "step": 1965 + }, + { + "epoch": 0.02, + "grad_norm": 81.38822003508314, + "learning_rate": 7.887775551102205e-06, + "loss": 2.599, + "step": 1968 + }, + { + "epoch": 0.02, + "grad_norm": 47.54125208204312, + "learning_rate": 7.899799599198397e-06, + "loss": 2.1774, + "step": 1971 + }, + { + "epoch": 0.02, + "grad_norm": 12.296647735680924, + "learning_rate": 7.91182364729459e-06, + "loss": 2.624, + "step": 1974 + }, + { + "epoch": 0.02, + "grad_norm": 104.18219878728681, + "learning_rate": 7.923847695390782e-06, + "loss": 2.7972, + "step": 1977 + }, + { + "epoch": 0.02, + "grad_norm": 36.48302638808912, + "learning_rate": 7.935871743486975e-06, + "loss": 1.9715, + "step": 1980 + }, + { + "epoch": 0.02, + "grad_norm": 27.924700449858697, + "learning_rate": 7.947895791583167e-06, + "loss": 2.5981, + "step": 1983 + }, + { + "epoch": 0.02, + "grad_norm": 5.413268631762547, + "learning_rate": 7.95991983967936e-06, + "loss": 3.1966, + "step": 1986 + }, + { + "epoch": 0.02, + "grad_norm": 65.2758214790544, + "learning_rate": 7.971943887775552e-06, + "loss": 2.3996, + "step": 1989 + }, + { + "epoch": 0.02, + "grad_norm": 24.431697209413535, + "learning_rate": 7.983967935871743e-06, + "loss": 2.1675, + "step": 1992 + }, + { + "epoch": 0.02, + "grad_norm": 35.179592202238545, + "learning_rate": 7.995991983967937e-06, + "loss": 2.833, + "step": 1995 + }, + { + "epoch": 0.02, + "grad_norm": 22.728130706716797, + "learning_rate": 8.008016032064128e-06, + "loss": 2.3729, + "step": 1998 + }, + { + "epoch": 0.02, + "grad_norm": 31.507963826136955, + "learning_rate": 8.020040080160322e-06, + "loss": 2.848, + "step": 2001 + }, + { + "epoch": 0.02, + "grad_norm": 26.498362539900786, + "learning_rate": 8.032064128256513e-06, + "loss": 2.8231, + "step": 2004 + }, + { + "epoch": 0.02, + "grad_norm": 18.709977839506696, + "learning_rate": 8.044088176352705e-06, + "loss": 2.2727, + "step": 2007 + }, + { + "epoch": 0.02, + "grad_norm": 23.34724462215221, + "learning_rate": 8.056112224448898e-06, + "loss": 2.2848, + "step": 2010 + }, + { + "epoch": 0.02, + "grad_norm": 26.313384550775247, + "learning_rate": 8.06813627254509e-06, + "loss": 2.3586, + "step": 2013 + }, + { + "epoch": 0.02, + "grad_norm": 24.415859359781216, + "learning_rate": 8.080160320641283e-06, + "loss": 2.3984, + "step": 2016 + }, + { + "epoch": 0.02, + "grad_norm": 57.903738382328775, + "learning_rate": 8.092184368737475e-06, + "loss": 2.6196, + "step": 2019 + }, + { + "epoch": 0.02, + "grad_norm": 21.779826012321358, + "learning_rate": 8.104208416833668e-06, + "loss": 2.5449, + "step": 2022 + }, + { + "epoch": 0.02, + "grad_norm": 40.84957686167067, + "learning_rate": 8.11623246492986e-06, + "loss": 2.3947, + "step": 2025 + }, + { + "epoch": 0.02, + "grad_norm": 76.14297812809055, + "learning_rate": 8.128256513026053e-06, + "loss": 2.3597, + "step": 2028 + }, + { + "epoch": 0.02, + "grad_norm": 45.79076971167997, + "learning_rate": 8.140280561122245e-06, + "loss": 2.1917, + "step": 2031 + }, + { + "epoch": 0.02, + "grad_norm": 51.17084004849996, + "learning_rate": 8.152304609218438e-06, + "loss": 2.4274, + "step": 2034 + }, + { + "epoch": 0.02, + "grad_norm": 35.73344625952469, + "learning_rate": 8.16432865731463e-06, + "loss": 2.5916, + "step": 2037 + }, + { + "epoch": 0.02, + "grad_norm": 15.44774286789996, + "learning_rate": 8.176352705410823e-06, + "loss": 2.8019, + "step": 2040 + }, + { + "epoch": 0.02, + "grad_norm": 65.79567303951556, + "learning_rate": 8.188376753507015e-06, + "loss": 2.2708, + "step": 2043 + }, + { + "epoch": 0.02, + "grad_norm": 34.09707924498041, + "learning_rate": 8.200400801603206e-06, + "loss": 2.5903, + "step": 2046 + }, + { + "epoch": 0.02, + "grad_norm": 594.2763707490128, + "learning_rate": 8.2124248496994e-06, + "loss": 2.7979, + "step": 2049 + }, + { + "epoch": 0.02, + "grad_norm": 23.46370278640079, + "learning_rate": 8.224448897795591e-06, + "loss": 3.2326, + "step": 2052 + }, + { + "epoch": 0.02, + "grad_norm": 24.33506995983605, + "learning_rate": 8.236472945891785e-06, + "loss": 2.0026, + "step": 2055 + }, + { + "epoch": 0.02, + "grad_norm": 15.096702702384801, + "learning_rate": 8.248496993987976e-06, + "loss": 2.3529, + "step": 2058 + }, + { + "epoch": 0.02, + "grad_norm": 37.20954766656181, + "learning_rate": 8.260521042084169e-06, + "loss": 2.0137, + "step": 2061 + }, + { + "epoch": 0.02, + "grad_norm": 65.164700846758, + "learning_rate": 8.272545090180361e-06, + "loss": 2.3136, + "step": 2064 + }, + { + "epoch": 0.02, + "grad_norm": 42.7678691123045, + "learning_rate": 8.284569138276554e-06, + "loss": 2.3002, + "step": 2067 + }, + { + "epoch": 0.02, + "grad_norm": 24.10966386194746, + "learning_rate": 8.296593186372746e-06, + "loss": 2.0115, + "step": 2070 + }, + { + "epoch": 0.02, + "grad_norm": 90.29717248426715, + "learning_rate": 8.308617234468939e-06, + "loss": 2.3651, + "step": 2073 + }, + { + "epoch": 0.02, + "grad_norm": 264.319190086161, + "learning_rate": 8.320641282565131e-06, + "loss": 2.8306, + "step": 2076 + }, + { + "epoch": 0.02, + "grad_norm": 92.16796036953887, + "learning_rate": 8.332665330661324e-06, + "loss": 3.1204, + "step": 2079 + }, + { + "epoch": 0.03, + "grad_norm": 37.72576407350804, + "learning_rate": 8.344689378757516e-06, + "loss": 1.9076, + "step": 2082 + }, + { + "epoch": 0.03, + "grad_norm": 27.556132913954734, + "learning_rate": 8.356713426853707e-06, + "loss": 2.5713, + "step": 2085 + }, + { + "epoch": 0.03, + "grad_norm": 68.27364048743767, + "learning_rate": 8.368737474949901e-06, + "loss": 2.2474, + "step": 2088 + }, + { + "epoch": 0.03, + "grad_norm": 86.13867114275388, + "learning_rate": 8.380761523046092e-06, + "loss": 2.7218, + "step": 2091 + }, + { + "epoch": 0.03, + "grad_norm": 45.047818782436586, + "learning_rate": 8.392785571142286e-06, + "loss": 2.362, + "step": 2094 + }, + { + "epoch": 0.03, + "grad_norm": 19.270700787057205, + "learning_rate": 8.404809619238477e-06, + "loss": 2.2902, + "step": 2097 + }, + { + "epoch": 0.03, + "grad_norm": 16.85574692354966, + "learning_rate": 8.41683366733467e-06, + "loss": 2.4491, + "step": 2100 + }, + { + "epoch": 0.03, + "grad_norm": 42.841714879213754, + "learning_rate": 8.428857715430862e-06, + "loss": 2.2696, + "step": 2103 + }, + { + "epoch": 0.03, + "grad_norm": 33.0282692586498, + "learning_rate": 8.440881763527054e-06, + "loss": 2.2301, + "step": 2106 + }, + { + "epoch": 0.03, + "grad_norm": 48.57741899149725, + "learning_rate": 8.452905811623247e-06, + "loss": 2.6964, + "step": 2109 + }, + { + "epoch": 0.03, + "grad_norm": 63.30164209723894, + "learning_rate": 8.46492985971944e-06, + "loss": 2.5526, + "step": 2112 + }, + { + "epoch": 0.03, + "grad_norm": 6.662271457424908, + "learning_rate": 8.476953907815632e-06, + "loss": 2.0617, + "step": 2115 + }, + { + "epoch": 0.03, + "grad_norm": 28.87148733456149, + "learning_rate": 8.488977955911824e-06, + "loss": 2.6737, + "step": 2118 + }, + { + "epoch": 0.03, + "grad_norm": 14.256764678175498, + "learning_rate": 8.501002004008017e-06, + "loss": 2.0516, + "step": 2121 + }, + { + "epoch": 0.03, + "grad_norm": 35.19973255799306, + "learning_rate": 8.51302605210421e-06, + "loss": 2.6067, + "step": 2124 + }, + { + "epoch": 0.03, + "grad_norm": 47.598774179370814, + "learning_rate": 8.525050100200402e-06, + "loss": 2.0773, + "step": 2127 + }, + { + "epoch": 0.03, + "grad_norm": 99.64118050233789, + "learning_rate": 8.537074148296594e-06, + "loss": 2.8613, + "step": 2130 + }, + { + "epoch": 0.03, + "grad_norm": 85.31701917474197, + "learning_rate": 8.549098196392787e-06, + "loss": 2.389, + "step": 2133 + }, + { + "epoch": 0.03, + "grad_norm": 78.87904332265511, + "learning_rate": 8.56112224448898e-06, + "loss": 2.173, + "step": 2136 + }, + { + "epoch": 0.03, + "grad_norm": 42.23632029840123, + "learning_rate": 8.57314629258517e-06, + "loss": 2.0483, + "step": 2139 + }, + { + "epoch": 0.03, + "grad_norm": 50.99627360739335, + "learning_rate": 8.585170340681364e-06, + "loss": 2.654, + "step": 2142 + }, + { + "epoch": 0.03, + "grad_norm": 44.73330839921038, + "learning_rate": 8.597194388777555e-06, + "loss": 1.7514, + "step": 2145 + }, + { + "epoch": 0.03, + "grad_norm": 56.68689777751471, + "learning_rate": 8.60921843687375e-06, + "loss": 2.3831, + "step": 2148 + }, + { + "epoch": 0.03, + "grad_norm": 41.33374507876811, + "learning_rate": 8.62124248496994e-06, + "loss": 2.8346, + "step": 2151 + }, + { + "epoch": 0.03, + "grad_norm": 24.156923333562414, + "learning_rate": 8.633266533066133e-06, + "loss": 2.148, + "step": 2154 + }, + { + "epoch": 0.03, + "grad_norm": 18.029578714048775, + "learning_rate": 8.645290581162325e-06, + "loss": 2.2738, + "step": 2157 + }, + { + "epoch": 0.03, + "grad_norm": 56.19155460343594, + "learning_rate": 8.657314629258518e-06, + "loss": 2.6385, + "step": 2160 + }, + { + "epoch": 0.03, + "grad_norm": 59.7498620293804, + "learning_rate": 8.66933867735471e-06, + "loss": 1.7356, + "step": 2163 + }, + { + "epoch": 0.03, + "grad_norm": 49.33237849754329, + "learning_rate": 8.681362725450903e-06, + "loss": 2.0864, + "step": 2166 + }, + { + "epoch": 0.03, + "grad_norm": 22.451489160841298, + "learning_rate": 8.693386773547095e-06, + "loss": 2.1058, + "step": 2169 + }, + { + "epoch": 0.03, + "grad_norm": 17.246889458141514, + "learning_rate": 8.705410821643288e-06, + "loss": 2.3818, + "step": 2172 + }, + { + "epoch": 0.03, + "grad_norm": 37.360235583963316, + "learning_rate": 8.71743486973948e-06, + "loss": 2.0524, + "step": 2175 + }, + { + "epoch": 0.03, + "grad_norm": 79.42101520208068, + "learning_rate": 8.729458917835673e-06, + "loss": 2.2612, + "step": 2178 + }, + { + "epoch": 0.03, + "grad_norm": 12.471773914901336, + "learning_rate": 8.741482965931865e-06, + "loss": 2.2213, + "step": 2181 + }, + { + "epoch": 0.03, + "grad_norm": 39.54700097161275, + "learning_rate": 8.753507014028056e-06, + "loss": 2.1959, + "step": 2184 + }, + { + "epoch": 0.03, + "grad_norm": 64.33471290553503, + "learning_rate": 8.76553106212425e-06, + "loss": 2.2397, + "step": 2187 + }, + { + "epoch": 0.03, + "grad_norm": 42.16418599727222, + "learning_rate": 8.77755511022044e-06, + "loss": 2.7639, + "step": 2190 + }, + { + "epoch": 0.03, + "grad_norm": 33.588675670652535, + "learning_rate": 8.789579158316633e-06, + "loss": 2.4474, + "step": 2193 + }, + { + "epoch": 0.03, + "grad_norm": 33.35438655924158, + "learning_rate": 8.801603206412826e-06, + "loss": 2.4604, + "step": 2196 + }, + { + "epoch": 0.03, + "grad_norm": 28.51660334963626, + "learning_rate": 8.813627254509018e-06, + "loss": 2.5444, + "step": 2199 + }, + { + "epoch": 0.03, + "grad_norm": 20.281824612632988, + "learning_rate": 8.82565130260521e-06, + "loss": 1.8934, + "step": 2202 + }, + { + "epoch": 0.03, + "grad_norm": 29.532544157760615, + "learning_rate": 8.837675350701403e-06, + "loss": 2.2593, + "step": 2205 + }, + { + "epoch": 0.03, + "grad_norm": 48.649233225291034, + "learning_rate": 8.849699398797596e-06, + "loss": 2.4836, + "step": 2208 + }, + { + "epoch": 0.03, + "grad_norm": 16.285684355582667, + "learning_rate": 8.861723446893788e-06, + "loss": 2.3478, + "step": 2211 + }, + { + "epoch": 0.03, + "grad_norm": 35.032048419317015, + "learning_rate": 8.87374749498998e-06, + "loss": 2.8397, + "step": 2214 + }, + { + "epoch": 0.03, + "grad_norm": 30.77948419090851, + "learning_rate": 8.885771543086173e-06, + "loss": 2.5869, + "step": 2217 + }, + { + "epoch": 0.03, + "grad_norm": 22.637020424309245, + "learning_rate": 8.897795591182366e-06, + "loss": 1.7063, + "step": 2220 + }, + { + "epoch": 0.03, + "grad_norm": 38.16063213659296, + "learning_rate": 8.909819639278558e-06, + "loss": 2.2015, + "step": 2223 + }, + { + "epoch": 0.03, + "grad_norm": 55.17092662766708, + "learning_rate": 8.92184368737475e-06, + "loss": 2.4395, + "step": 2226 + }, + { + "epoch": 0.03, + "grad_norm": 49.79357645052452, + "learning_rate": 8.933867735470943e-06, + "loss": 2.4084, + "step": 2229 + }, + { + "epoch": 0.03, + "grad_norm": 34.785782063499234, + "learning_rate": 8.945891783567136e-06, + "loss": 2.1702, + "step": 2232 + }, + { + "epoch": 0.03, + "grad_norm": 41.60246450154786, + "learning_rate": 8.957915831663328e-06, + "loss": 2.3373, + "step": 2235 + }, + { + "epoch": 0.03, + "grad_norm": 51.349563664033624, + "learning_rate": 8.969939879759519e-06, + "loss": 2.0728, + "step": 2238 + }, + { + "epoch": 0.03, + "grad_norm": 19.316767421593227, + "learning_rate": 8.981963927855713e-06, + "loss": 2.5125, + "step": 2241 + }, + { + "epoch": 0.03, + "grad_norm": 97.15670205472959, + "learning_rate": 8.993987975951904e-06, + "loss": 2.3593, + "step": 2244 + }, + { + "epoch": 0.03, + "grad_norm": 46.35738210903946, + "learning_rate": 9.006012024048096e-06, + "loss": 2.355, + "step": 2247 + }, + { + "epoch": 0.03, + "grad_norm": 9.017461320041807, + "learning_rate": 9.018036072144289e-06, + "loss": 2.4181, + "step": 2250 + }, + { + "epoch": 0.03, + "grad_norm": 38.66366059999117, + "learning_rate": 9.030060120240481e-06, + "loss": 2.1646, + "step": 2253 + }, + { + "epoch": 0.03, + "grad_norm": 16.219580376759566, + "learning_rate": 9.042084168336674e-06, + "loss": 2.1721, + "step": 2256 + }, + { + "epoch": 0.03, + "grad_norm": 12.883155186035042, + "learning_rate": 9.054108216432866e-06, + "loss": 1.5972, + "step": 2259 + }, + { + "epoch": 0.03, + "grad_norm": 32.94997431810238, + "learning_rate": 9.066132264529059e-06, + "loss": 2.3164, + "step": 2262 + }, + { + "epoch": 0.03, + "grad_norm": 32.311156520890655, + "learning_rate": 9.078156312625251e-06, + "loss": 2.9849, + "step": 2265 + }, + { + "epoch": 0.03, + "grad_norm": 44.90781684385859, + "learning_rate": 9.090180360721444e-06, + "loss": 2.235, + "step": 2268 + }, + { + "epoch": 0.03, + "grad_norm": 32.28373977440762, + "learning_rate": 9.102204408817636e-06, + "loss": 1.8852, + "step": 2271 + }, + { + "epoch": 0.03, + "grad_norm": 14.89156925247211, + "learning_rate": 9.114228456913829e-06, + "loss": 2.3495, + "step": 2274 + }, + { + "epoch": 0.03, + "grad_norm": 49.73381860117651, + "learning_rate": 9.12625250501002e-06, + "loss": 2.3236, + "step": 2277 + }, + { + "epoch": 0.03, + "grad_norm": 19.899631583760158, + "learning_rate": 9.138276553106214e-06, + "loss": 2.4062, + "step": 2280 + }, + { + "epoch": 0.03, + "grad_norm": 137.67179415776778, + "learning_rate": 9.150300601202405e-06, + "loss": 2.0841, + "step": 2283 + }, + { + "epoch": 0.03, + "grad_norm": 7.697993443487628, + "learning_rate": 9.162324649298599e-06, + "loss": 1.9121, + "step": 2286 + }, + { + "epoch": 0.03, + "grad_norm": 12.772489894446679, + "learning_rate": 9.17434869739479e-06, + "loss": 2.4087, + "step": 2289 + }, + { + "epoch": 0.03, + "grad_norm": 19.99610913563403, + "learning_rate": 9.186372745490982e-06, + "loss": 2.3898, + "step": 2292 + }, + { + "epoch": 0.03, + "grad_norm": 89.93884023262908, + "learning_rate": 9.198396793587175e-06, + "loss": 2.2161, + "step": 2295 + }, + { + "epoch": 0.03, + "grad_norm": 36.1576909124104, + "learning_rate": 9.210420841683367e-06, + "loss": 2.5654, + "step": 2298 + }, + { + "epoch": 0.03, + "grad_norm": 17.109814991860617, + "learning_rate": 9.22244488977956e-06, + "loss": 2.66, + "step": 2301 + }, + { + "epoch": 0.03, + "grad_norm": 123.01405718271639, + "learning_rate": 9.234468937875752e-06, + "loss": 2.3271, + "step": 2304 + }, + { + "epoch": 0.03, + "grad_norm": 28.969346086938938, + "learning_rate": 9.246492985971945e-06, + "loss": 2.2751, + "step": 2307 + }, + { + "epoch": 0.03, + "grad_norm": 28.91118725525268, + "learning_rate": 9.258517034068137e-06, + "loss": 1.7267, + "step": 2310 + }, + { + "epoch": 0.03, + "grad_norm": 18.87431897300086, + "learning_rate": 9.27054108216433e-06, + "loss": 2.2743, + "step": 2313 + }, + { + "epoch": 0.03, + "grad_norm": 65.54104186868155, + "learning_rate": 9.282565130260522e-06, + "loss": 2.0616, + "step": 2316 + }, + { + "epoch": 0.03, + "grad_norm": 29.15708763652065, + "learning_rate": 9.294589178356715e-06, + "loss": 2.3573, + "step": 2319 + }, + { + "epoch": 0.03, + "grad_norm": 17.175479965339168, + "learning_rate": 9.306613226452907e-06, + "loss": 2.1393, + "step": 2322 + }, + { + "epoch": 0.03, + "grad_norm": 29.169728121730262, + "learning_rate": 9.3186372745491e-06, + "loss": 1.8046, + "step": 2325 + }, + { + "epoch": 0.03, + "grad_norm": 40.26386894905434, + "learning_rate": 9.330661322645292e-06, + "loss": 2.3543, + "step": 2328 + }, + { + "epoch": 0.03, + "grad_norm": 144.60506419433293, + "learning_rate": 9.342685370741483e-06, + "loss": 2.1947, + "step": 2331 + }, + { + "epoch": 0.03, + "grad_norm": 25.17469183592564, + "learning_rate": 9.354709418837677e-06, + "loss": 2.3797, + "step": 2334 + }, + { + "epoch": 0.03, + "grad_norm": 42.32557077132348, + "learning_rate": 9.366733466933868e-06, + "loss": 2.0223, + "step": 2337 + }, + { + "epoch": 0.03, + "grad_norm": 93.31609758258756, + "learning_rate": 9.378757515030062e-06, + "loss": 2.4894, + "step": 2340 + }, + { + "epoch": 0.03, + "grad_norm": 23.382335173318175, + "learning_rate": 9.390781563126253e-06, + "loss": 2.0928, + "step": 2343 + }, + { + "epoch": 0.03, + "grad_norm": 43.38776913452729, + "learning_rate": 9.402805611222445e-06, + "loss": 2.3621, + "step": 2346 + }, + { + "epoch": 0.03, + "grad_norm": 27.60924002133046, + "learning_rate": 9.414829659318638e-06, + "loss": 2.4592, + "step": 2349 + }, + { + "epoch": 0.03, + "grad_norm": 34.845098466518195, + "learning_rate": 9.42685370741483e-06, + "loss": 2.4795, + "step": 2352 + }, + { + "epoch": 0.03, + "grad_norm": 44.94422812516438, + "learning_rate": 9.438877755511023e-06, + "loss": 2.4473, + "step": 2355 + }, + { + "epoch": 0.03, + "grad_norm": 41.148737232782814, + "learning_rate": 9.450901803607215e-06, + "loss": 2.7243, + "step": 2358 + }, + { + "epoch": 0.03, + "grad_norm": 10.985311232895146, + "learning_rate": 9.462925851703408e-06, + "loss": 2.5132, + "step": 2361 + }, + { + "epoch": 0.03, + "grad_norm": 54.40959836223645, + "learning_rate": 9.4749498997996e-06, + "loss": 2.5484, + "step": 2364 + }, + { + "epoch": 0.03, + "grad_norm": 36.577366333593034, + "learning_rate": 9.486973947895793e-06, + "loss": 2.3071, + "step": 2367 + }, + { + "epoch": 0.03, + "grad_norm": 14.444013588671133, + "learning_rate": 9.498997995991984e-06, + "loss": 1.8021, + "step": 2370 + }, + { + "epoch": 0.03, + "grad_norm": 360.7920191492217, + "learning_rate": 9.511022044088178e-06, + "loss": 2.3886, + "step": 2373 + }, + { + "epoch": 0.03, + "grad_norm": 32.188063853981504, + "learning_rate": 9.523046092184369e-06, + "loss": 2.054, + "step": 2376 + }, + { + "epoch": 0.03, + "grad_norm": 39.7372886151669, + "learning_rate": 9.535070140280563e-06, + "loss": 2.0893, + "step": 2379 + }, + { + "epoch": 0.03, + "grad_norm": 109.95315488709163, + "learning_rate": 9.547094188376754e-06, + "loss": 2.2656, + "step": 2382 + }, + { + "epoch": 0.03, + "grad_norm": 50.69978677656841, + "learning_rate": 9.559118236472946e-06, + "loss": 2.2287, + "step": 2385 + }, + { + "epoch": 0.03, + "grad_norm": 31.384235138654798, + "learning_rate": 9.571142284569139e-06, + "loss": 1.9906, + "step": 2388 + }, + { + "epoch": 0.03, + "grad_norm": 26.531217726567057, + "learning_rate": 9.583166332665331e-06, + "loss": 2.4569, + "step": 2391 + }, + { + "epoch": 0.03, + "grad_norm": 24.248890156690443, + "learning_rate": 9.595190380761524e-06, + "loss": 2.0972, + "step": 2394 + }, + { + "epoch": 0.03, + "grad_norm": 488.8457970167762, + "learning_rate": 9.607214428857716e-06, + "loss": 2.1178, + "step": 2397 + }, + { + "epoch": 0.03, + "grad_norm": 55.095426264886164, + "learning_rate": 9.619238476953909e-06, + "loss": 1.8011, + "step": 2400 + }, + { + "epoch": 0.03, + "grad_norm": 35.308066277994875, + "learning_rate": 9.631262525050101e-06, + "loss": 2.3634, + "step": 2403 + }, + { + "epoch": 0.03, + "grad_norm": 34.89334754311917, + "learning_rate": 9.643286573146294e-06, + "loss": 2.3522, + "step": 2406 + }, + { + "epoch": 0.03, + "grad_norm": 31.44324685032345, + "learning_rate": 9.655310621242486e-06, + "loss": 2.716, + "step": 2409 + }, + { + "epoch": 0.03, + "grad_norm": 93.69393256644803, + "learning_rate": 9.667334669338679e-06, + "loss": 2.4084, + "step": 2412 + }, + { + "epoch": 0.03, + "grad_norm": 19.417223342049585, + "learning_rate": 9.679358717434871e-06, + "loss": 2.6271, + "step": 2415 + }, + { + "epoch": 0.03, + "grad_norm": 19.65463874377017, + "learning_rate": 9.691382765531064e-06, + "loss": 2.2601, + "step": 2418 + }, + { + "epoch": 0.03, + "grad_norm": 33.39058808379645, + "learning_rate": 9.703406813627256e-06, + "loss": 1.8843, + "step": 2421 + }, + { + "epoch": 0.03, + "grad_norm": 32.678732623042094, + "learning_rate": 9.715430861723447e-06, + "loss": 1.8364, + "step": 2424 + }, + { + "epoch": 0.03, + "grad_norm": 22.242051167595854, + "learning_rate": 9.727454909819641e-06, + "loss": 2.258, + "step": 2427 + }, + { + "epoch": 0.03, + "grad_norm": 94.2286788004078, + "learning_rate": 9.739478957915832e-06, + "loss": 2.1567, + "step": 2430 + }, + { + "epoch": 0.03, + "grad_norm": 28.53063381674307, + "learning_rate": 9.751503006012026e-06, + "loss": 1.9694, + "step": 2433 + }, + { + "epoch": 0.03, + "grad_norm": 21.527906530584143, + "learning_rate": 9.763527054108217e-06, + "loss": 1.9663, + "step": 2436 + }, + { + "epoch": 0.03, + "grad_norm": 21.188355939391727, + "learning_rate": 9.77555110220441e-06, + "loss": 1.9024, + "step": 2439 + }, + { + "epoch": 0.03, + "grad_norm": 35.21816502911715, + "learning_rate": 9.787575150300602e-06, + "loss": 1.9031, + "step": 2442 + }, + { + "epoch": 0.03, + "grad_norm": 31.448702724554835, + "learning_rate": 9.799599198396794e-06, + "loss": 2.0034, + "step": 2445 + }, + { + "epoch": 0.03, + "grad_norm": 29.885950574846696, + "learning_rate": 9.811623246492987e-06, + "loss": 2.4105, + "step": 2448 + }, + { + "epoch": 0.03, + "grad_norm": 12.048923968298503, + "learning_rate": 9.82364729458918e-06, + "loss": 2.1123, + "step": 2451 + }, + { + "epoch": 0.03, + "grad_norm": 48.185981067313406, + "learning_rate": 9.835671342685372e-06, + "loss": 2.2686, + "step": 2454 + }, + { + "epoch": 0.03, + "grad_norm": 36.489546023276134, + "learning_rate": 9.847695390781564e-06, + "loss": 1.9253, + "step": 2457 + }, + { + "epoch": 0.03, + "grad_norm": 9.651115816102692, + "learning_rate": 9.859719438877757e-06, + "loss": 2.4484, + "step": 2460 + }, + { + "epoch": 0.03, + "grad_norm": 13.820209289070677, + "learning_rate": 9.87174348697395e-06, + "loss": 2.3305, + "step": 2463 + }, + { + "epoch": 0.03, + "grad_norm": 34.525622800441994, + "learning_rate": 9.883767535070142e-06, + "loss": 3.0343, + "step": 2466 + }, + { + "epoch": 0.03, + "grad_norm": 17.747180615987592, + "learning_rate": 9.895791583166333e-06, + "loss": 2.25, + "step": 2469 + }, + { + "epoch": 0.03, + "grad_norm": 15.383590743845113, + "learning_rate": 9.907815631262527e-06, + "loss": 2.3759, + "step": 2472 + }, + { + "epoch": 0.03, + "grad_norm": 44.63877123964949, + "learning_rate": 9.919839679358718e-06, + "loss": 1.9495, + "step": 2475 + }, + { + "epoch": 0.03, + "grad_norm": 26.955379266028295, + "learning_rate": 9.93186372745491e-06, + "loss": 2.3564, + "step": 2478 + }, + { + "epoch": 0.03, + "grad_norm": 56.559963384893194, + "learning_rate": 9.943887775551103e-06, + "loss": 2.3678, + "step": 2481 + }, + { + "epoch": 0.03, + "grad_norm": 25.368661385698367, + "learning_rate": 9.955911823647295e-06, + "loss": 2.2926, + "step": 2484 + }, + { + "epoch": 0.03, + "grad_norm": 36.102245281819556, + "learning_rate": 9.967935871743488e-06, + "loss": 2.6558, + "step": 2487 + }, + { + "epoch": 0.03, + "grad_norm": 20.455201001301713, + "learning_rate": 9.97995991983968e-06, + "loss": 2.8764, + "step": 2490 + }, + { + "epoch": 0.03, + "grad_norm": 34.92941740854282, + "learning_rate": 9.991983967935872e-06, + "loss": 2.2474, + "step": 2493 + }, + { + "epoch": 0.03, + "grad_norm": 36.15042282641943, + "learning_rate": 1.0004008016032065e-05, + "loss": 2.8128, + "step": 2496 + }, + { + "epoch": 0.03, + "grad_norm": 28.358127615760623, + "learning_rate": 1.0016032064128257e-05, + "loss": 2.5723, + "step": 2499 + }, + { + "epoch": 0.03, + "grad_norm": 15.696496380374283, + "learning_rate": 1.0028056112224448e-05, + "loss": 2.0823, + "step": 2502 + }, + { + "epoch": 0.03, + "grad_norm": 57.593408731870745, + "learning_rate": 1.0040080160320642e-05, + "loss": 2.0524, + "step": 2505 + }, + { + "epoch": 0.03, + "grad_norm": 50.22778853739173, + "learning_rate": 1.0052104208416835e-05, + "loss": 2.6558, + "step": 2508 + }, + { + "epoch": 0.03, + "grad_norm": 22.089454816122956, + "learning_rate": 1.0064128256513026e-05, + "loss": 2.2311, + "step": 2511 + }, + { + "epoch": 0.03, + "grad_norm": 196.67671121176397, + "learning_rate": 1.007615230460922e-05, + "loss": 1.8579, + "step": 2514 + }, + { + "epoch": 0.03, + "grad_norm": 15.072247799432953, + "learning_rate": 1.0088176352705412e-05, + "loss": 2.3919, + "step": 2517 + }, + { + "epoch": 0.03, + "grad_norm": 10.551333836037081, + "learning_rate": 1.0100200400801603e-05, + "loss": 2.0837, + "step": 2520 + }, + { + "epoch": 0.03, + "grad_norm": 10.537721088760522, + "learning_rate": 1.0112224448897796e-05, + "loss": 2.3031, + "step": 2523 + }, + { + "epoch": 0.03, + "grad_norm": 23.434327214005453, + "learning_rate": 1.012424849699399e-05, + "loss": 2.116, + "step": 2526 + }, + { + "epoch": 0.03, + "grad_norm": 27.046841907512388, + "learning_rate": 1.0136272545090182e-05, + "loss": 2.3079, + "step": 2529 + }, + { + "epoch": 0.03, + "grad_norm": 21.6269055897954, + "learning_rate": 1.0148296593186373e-05, + "loss": 1.9437, + "step": 2532 + }, + { + "epoch": 0.03, + "grad_norm": 19.09225548925472, + "learning_rate": 1.0160320641282566e-05, + "loss": 2.3867, + "step": 2535 + }, + { + "epoch": 0.03, + "grad_norm": 27.88489818001778, + "learning_rate": 1.017234468937876e-05, + "loss": 2.4368, + "step": 2538 + }, + { + "epoch": 0.03, + "grad_norm": 31.978375642930864, + "learning_rate": 1.018436873747495e-05, + "loss": 2.3423, + "step": 2541 + }, + { + "epoch": 0.03, + "grad_norm": 37.50986901668722, + "learning_rate": 1.0196392785571143e-05, + "loss": 2.1126, + "step": 2544 + }, + { + "epoch": 0.03, + "grad_norm": 18.697932901189997, + "learning_rate": 1.0208416833667336e-05, + "loss": 2.1644, + "step": 2547 + }, + { + "epoch": 0.03, + "grad_norm": 21.70854943405636, + "learning_rate": 1.0220440881763526e-05, + "loss": 2.4912, + "step": 2550 + }, + { + "epoch": 0.03, + "grad_norm": 42.37708639156818, + "learning_rate": 1.023246492985972e-05, + "loss": 2.2, + "step": 2553 + }, + { + "epoch": 0.03, + "grad_norm": 18.621484243213388, + "learning_rate": 1.0244488977955913e-05, + "loss": 2.3757, + "step": 2556 + }, + { + "epoch": 0.03, + "grad_norm": 39.38041954340111, + "learning_rate": 1.0256513026052104e-05, + "loss": 2.1237, + "step": 2559 + }, + { + "epoch": 0.03, + "grad_norm": 39.49741023618501, + "learning_rate": 1.0268537074148296e-05, + "loss": 2.3675, + "step": 2562 + }, + { + "epoch": 0.03, + "grad_norm": 44.00769574062534, + "learning_rate": 1.028056112224449e-05, + "loss": 2.1677, + "step": 2565 + }, + { + "epoch": 0.03, + "grad_norm": 32.611145470698396, + "learning_rate": 1.0292585170340683e-05, + "loss": 2.1805, + "step": 2568 + }, + { + "epoch": 0.03, + "grad_norm": 18.69800996826952, + "learning_rate": 1.0304609218436874e-05, + "loss": 2.607, + "step": 2571 + }, + { + "epoch": 0.03, + "grad_norm": 122.96860839190299, + "learning_rate": 1.0316633266533066e-05, + "loss": 2.3032, + "step": 2574 + }, + { + "epoch": 0.03, + "grad_norm": 15.648439851099486, + "learning_rate": 1.032865731462926e-05, + "loss": 1.9486, + "step": 2577 + }, + { + "epoch": 0.03, + "grad_norm": 12.095048134885577, + "learning_rate": 1.0340681362725451e-05, + "loss": 2.6953, + "step": 2580 + }, + { + "epoch": 0.03, + "grad_norm": 43.98794760639479, + "learning_rate": 1.0352705410821644e-05, + "loss": 2.6904, + "step": 2583 + }, + { + "epoch": 0.03, + "grad_norm": 21.688095325090046, + "learning_rate": 1.0364729458917836e-05, + "loss": 1.9212, + "step": 2586 + }, + { + "epoch": 0.03, + "grad_norm": 50.052844222715564, + "learning_rate": 1.0376753507014027e-05, + "loss": 2.2735, + "step": 2589 + }, + { + "epoch": 0.03, + "grad_norm": 57.44592261288191, + "learning_rate": 1.0388777555110221e-05, + "loss": 1.9925, + "step": 2592 + }, + { + "epoch": 0.03, + "grad_norm": 105.73340884231908, + "learning_rate": 1.0400801603206414e-05, + "loss": 1.9849, + "step": 2595 + }, + { + "epoch": 0.03, + "grad_norm": 11.172635838341895, + "learning_rate": 1.0412825651302606e-05, + "loss": 2.2322, + "step": 2598 + }, + { + "epoch": 0.03, + "grad_norm": 13.353053484671783, + "learning_rate": 1.0424849699398797e-05, + "loss": 2.6211, + "step": 2601 + }, + { + "epoch": 0.03, + "grad_norm": 13.648657766144053, + "learning_rate": 1.0436873747494991e-05, + "loss": 2.1047, + "step": 2604 + }, + { + "epoch": 0.03, + "grad_norm": 93.43254318346136, + "learning_rate": 1.0448897795591184e-05, + "loss": 2.3487, + "step": 2607 + }, + { + "epoch": 0.03, + "grad_norm": 10.868489839231657, + "learning_rate": 1.0460921843687375e-05, + "loss": 2.3193, + "step": 2610 + }, + { + "epoch": 0.03, + "grad_norm": 17.28551779961449, + "learning_rate": 1.0472945891783569e-05, + "loss": 2.3346, + "step": 2613 + }, + { + "epoch": 0.03, + "grad_norm": 20.886795436559698, + "learning_rate": 1.0484969939879761e-05, + "loss": 1.7069, + "step": 2616 + }, + { + "epoch": 0.03, + "grad_norm": 28.065464994260655, + "learning_rate": 1.0496993987975952e-05, + "loss": 2.1854, + "step": 2619 + }, + { + "epoch": 0.03, + "grad_norm": 36.51087453467867, + "learning_rate": 1.0509018036072145e-05, + "loss": 2.1461, + "step": 2622 + }, + { + "epoch": 0.03, + "grad_norm": 25.86507248723693, + "learning_rate": 1.0521042084168339e-05, + "loss": 1.5605, + "step": 2625 + }, + { + "epoch": 0.03, + "grad_norm": 33.62344270894158, + "learning_rate": 1.053306613226453e-05, + "loss": 2.0224, + "step": 2628 + }, + { + "epoch": 0.03, + "grad_norm": 28.38584233051546, + "learning_rate": 1.0545090180360722e-05, + "loss": 2.1818, + "step": 2631 + }, + { + "epoch": 0.03, + "grad_norm": 29.755707075921414, + "learning_rate": 1.0557114228456915e-05, + "loss": 2.2388, + "step": 2634 + }, + { + "epoch": 0.03, + "grad_norm": 15.54554155337718, + "learning_rate": 1.0569138276553109e-05, + "loss": 2.2585, + "step": 2637 + }, + { + "epoch": 0.03, + "grad_norm": 37.917550931640385, + "learning_rate": 1.05811623246493e-05, + "loss": 2.301, + "step": 2640 + }, + { + "epoch": 0.03, + "grad_norm": 73.34114400510701, + "learning_rate": 1.0593186372745492e-05, + "loss": 2.1216, + "step": 2643 + }, + { + "epoch": 0.03, + "grad_norm": 28.377451591147825, + "learning_rate": 1.0605210420841685e-05, + "loss": 1.7712, + "step": 2646 + }, + { + "epoch": 0.03, + "grad_norm": 18.093297877398488, + "learning_rate": 1.0617234468937875e-05, + "loss": 2.2859, + "step": 2649 + }, + { + "epoch": 0.03, + "grad_norm": 24.17407931303793, + "learning_rate": 1.062925851703407e-05, + "loss": 2.9277, + "step": 2652 + }, + { + "epoch": 0.03, + "grad_norm": 22.764977685859044, + "learning_rate": 1.0641282565130262e-05, + "loss": 1.9911, + "step": 2655 + }, + { + "epoch": 0.03, + "grad_norm": 70.04130144699904, + "learning_rate": 1.0653306613226453e-05, + "loss": 2.4775, + "step": 2658 + }, + { + "epoch": 0.03, + "grad_norm": 18.347260368603777, + "learning_rate": 1.0665330661322645e-05, + "loss": 1.7025, + "step": 2661 + }, + { + "epoch": 0.03, + "grad_norm": 11.997794103698725, + "learning_rate": 1.067735470941884e-05, + "loss": 2.1165, + "step": 2664 + }, + { + "epoch": 0.03, + "grad_norm": 50.07148376941203, + "learning_rate": 1.068937875751503e-05, + "loss": 2.4919, + "step": 2667 + }, + { + "epoch": 0.03, + "grad_norm": 14.176794930176031, + "learning_rate": 1.0701402805611223e-05, + "loss": 2.4867, + "step": 2670 + }, + { + "epoch": 0.03, + "grad_norm": 57.06057540723596, + "learning_rate": 1.0713426853707415e-05, + "loss": 2.1075, + "step": 2673 + }, + { + "epoch": 0.03, + "grad_norm": 27.295701466063324, + "learning_rate": 1.072545090180361e-05, + "loss": 2.1396, + "step": 2676 + }, + { + "epoch": 0.03, + "grad_norm": 308.56805310014784, + "learning_rate": 1.07374749498998e-05, + "loss": 2.5247, + "step": 2679 + }, + { + "epoch": 0.03, + "grad_norm": 45.8659820088051, + "learning_rate": 1.0749498997995993e-05, + "loss": 2.5387, + "step": 2682 + }, + { + "epoch": 0.03, + "grad_norm": 31.49741860007312, + "learning_rate": 1.0761523046092185e-05, + "loss": 2.1808, + "step": 2685 + }, + { + "epoch": 0.03, + "grad_norm": 37.96958655611112, + "learning_rate": 1.0773547094188376e-05, + "loss": 2.3387, + "step": 2688 + }, + { + "epoch": 0.03, + "grad_norm": 36.210482178620246, + "learning_rate": 1.078557114228457e-05, + "loss": 2.3771, + "step": 2691 + }, + { + "epoch": 0.03, + "grad_norm": 50.26729945820508, + "learning_rate": 1.0797595190380763e-05, + "loss": 1.9822, + "step": 2694 + }, + { + "epoch": 0.03, + "grad_norm": 22.687882072021065, + "learning_rate": 1.0809619238476954e-05, + "loss": 2.4915, + "step": 2697 + }, + { + "epoch": 0.03, + "grad_norm": 50.94790654673429, + "learning_rate": 1.0821643286573146e-05, + "loss": 2.2084, + "step": 2700 + }, + { + "epoch": 0.03, + "grad_norm": 36.14252401698451, + "learning_rate": 1.083366733466934e-05, + "loss": 1.9603, + "step": 2703 + }, + { + "epoch": 0.03, + "grad_norm": 51.643316462098234, + "learning_rate": 1.0845691382765533e-05, + "loss": 2.1883, + "step": 2706 + }, + { + "epoch": 0.03, + "grad_norm": 35.507885035789634, + "learning_rate": 1.0857715430861724e-05, + "loss": 1.8844, + "step": 2709 + }, + { + "epoch": 0.03, + "grad_norm": 67.9087397279414, + "learning_rate": 1.0869739478957918e-05, + "loss": 2.6564, + "step": 2712 + }, + { + "epoch": 0.03, + "grad_norm": 24.29909146116631, + "learning_rate": 1.088176352705411e-05, + "loss": 2.3241, + "step": 2715 + }, + { + "epoch": 0.03, + "grad_norm": 35.54964833702154, + "learning_rate": 1.0893787575150301e-05, + "loss": 2.3841, + "step": 2718 + }, + { + "epoch": 0.03, + "grad_norm": 12.795982729870014, + "learning_rate": 1.0905811623246494e-05, + "loss": 1.6793, + "step": 2721 + }, + { + "epoch": 0.03, + "grad_norm": 44.542826089857535, + "learning_rate": 1.0917835671342688e-05, + "loss": 1.8958, + "step": 2724 + }, + { + "epoch": 0.03, + "grad_norm": 16.330282557756, + "learning_rate": 1.0929859719438879e-05, + "loss": 2.1178, + "step": 2727 + }, + { + "epoch": 0.03, + "grad_norm": 101.63350874337758, + "learning_rate": 1.0941883767535071e-05, + "loss": 2.0366, + "step": 2730 + }, + { + "epoch": 0.03, + "grad_norm": 30.6180721739203, + "learning_rate": 1.0953907815631264e-05, + "loss": 1.312, + "step": 2733 + }, + { + "epoch": 0.03, + "grad_norm": 17.543318095682373, + "learning_rate": 1.0965931863727454e-05, + "loss": 2.2508, + "step": 2736 + }, + { + "epoch": 0.03, + "grad_norm": 31.496352996079302, + "learning_rate": 1.0977955911823649e-05, + "loss": 2.0315, + "step": 2739 + }, + { + "epoch": 0.03, + "grad_norm": 19.91348020633533, + "learning_rate": 1.0989979959919841e-05, + "loss": 2.3941, + "step": 2742 + }, + { + "epoch": 0.03, + "grad_norm": 18.31422406803728, + "learning_rate": 1.1002004008016033e-05, + "loss": 2.4771, + "step": 2745 + }, + { + "epoch": 0.03, + "grad_norm": 19.398159418759747, + "learning_rate": 1.1014028056112224e-05, + "loss": 2.3121, + "step": 2748 + }, + { + "epoch": 0.03, + "grad_norm": 15.983518656882863, + "learning_rate": 1.1026052104208418e-05, + "loss": 1.9155, + "step": 2751 + }, + { + "epoch": 0.03, + "grad_norm": 44.89871210687625, + "learning_rate": 1.1038076152304611e-05, + "loss": 2.6899, + "step": 2754 + }, + { + "epoch": 0.03, + "grad_norm": 39.077780745800986, + "learning_rate": 1.1050100200400802e-05, + "loss": 1.9279, + "step": 2757 + }, + { + "epoch": 0.03, + "grad_norm": 19.085272187796537, + "learning_rate": 1.1062124248496994e-05, + "loss": 2.0799, + "step": 2760 + }, + { + "epoch": 0.03, + "grad_norm": 63.38739264069348, + "learning_rate": 1.1074148296593188e-05, + "loss": 1.7991, + "step": 2763 + }, + { + "epoch": 0.03, + "grad_norm": 23.202683505253425, + "learning_rate": 1.108617234468938e-05, + "loss": 2.0692, + "step": 2766 + }, + { + "epoch": 0.03, + "grad_norm": 49.598525911275324, + "learning_rate": 1.1098196392785572e-05, + "loss": 2.1847, + "step": 2769 + }, + { + "epoch": 0.03, + "grad_norm": 17.206900595470437, + "learning_rate": 1.1110220440881764e-05, + "loss": 1.9647, + "step": 2772 + }, + { + "epoch": 0.03, + "grad_norm": 31.712747357103275, + "learning_rate": 1.1122244488977955e-05, + "loss": 2.1314, + "step": 2775 + }, + { + "epoch": 0.03, + "grad_norm": 20.73318449925575, + "learning_rate": 1.113426853707415e-05, + "loss": 2.0382, + "step": 2778 + }, + { + "epoch": 0.03, + "grad_norm": 45.48005723367668, + "learning_rate": 1.1146292585170342e-05, + "loss": 2.0164, + "step": 2781 + }, + { + "epoch": 0.03, + "grad_norm": 22.26914709426103, + "learning_rate": 1.1158316633266534e-05, + "loss": 2.4533, + "step": 2784 + }, + { + "epoch": 0.03, + "grad_norm": 40.21789345923981, + "learning_rate": 1.1170340681362725e-05, + "loss": 1.8354, + "step": 2787 + }, + { + "epoch": 0.03, + "grad_norm": 24.615258948233304, + "learning_rate": 1.118236472945892e-05, + "loss": 2.1987, + "step": 2790 + }, + { + "epoch": 0.03, + "grad_norm": 29.027855693970903, + "learning_rate": 1.1194388777555112e-05, + "loss": 2.3923, + "step": 2793 + }, + { + "epoch": 0.03, + "grad_norm": 42.64128750652443, + "learning_rate": 1.1206412825651302e-05, + "loss": 2.4035, + "step": 2796 + }, + { + "epoch": 0.03, + "grad_norm": 135.07936237050055, + "learning_rate": 1.1218436873747495e-05, + "loss": 2.1503, + "step": 2799 + }, + { + "epoch": 0.03, + "grad_norm": 13.283749790612468, + "learning_rate": 1.123046092184369e-05, + "loss": 1.929, + "step": 2802 + }, + { + "epoch": 0.03, + "grad_norm": 23.728535746240794, + "learning_rate": 1.124248496993988e-05, + "loss": 2.0133, + "step": 2805 + }, + { + "epoch": 0.03, + "grad_norm": 43.797938567403826, + "learning_rate": 1.1254509018036072e-05, + "loss": 1.9479, + "step": 2808 + }, + { + "epoch": 0.03, + "grad_norm": 22.536839479572507, + "learning_rate": 1.1266533066132267e-05, + "loss": 1.8094, + "step": 2811 + }, + { + "epoch": 0.03, + "grad_norm": 39.90267583674744, + "learning_rate": 1.1278557114228459e-05, + "loss": 1.9336, + "step": 2814 + }, + { + "epoch": 0.03, + "grad_norm": 17.642869992192733, + "learning_rate": 1.129058116232465e-05, + "loss": 1.9266, + "step": 2817 + }, + { + "epoch": 0.03, + "grad_norm": 15.488306227712606, + "learning_rate": 1.1302605210420842e-05, + "loss": 1.979, + "step": 2820 + }, + { + "epoch": 0.03, + "grad_norm": 23.534645089017275, + "learning_rate": 1.1314629258517037e-05, + "loss": 1.8765, + "step": 2823 + }, + { + "epoch": 0.03, + "grad_norm": 41.39550327168987, + "learning_rate": 1.1326653306613227e-05, + "loss": 2.082, + "step": 2826 + }, + { + "epoch": 0.03, + "grad_norm": 45.12174727186615, + "learning_rate": 1.133867735470942e-05, + "loss": 2.4454, + "step": 2829 + }, + { + "epoch": 0.03, + "grad_norm": 40.80123796919526, + "learning_rate": 1.1350701402805612e-05, + "loss": 1.9218, + "step": 2832 + }, + { + "epoch": 0.03, + "grad_norm": 15.03486955721534, + "learning_rate": 1.1362725450901803e-05, + "loss": 2.0426, + "step": 2835 + }, + { + "epoch": 0.03, + "grad_norm": 25.97568157080934, + "learning_rate": 1.1374749498997997e-05, + "loss": 2.6149, + "step": 2838 + }, + { + "epoch": 0.03, + "grad_norm": 71.3347477035774, + "learning_rate": 1.138677354709419e-05, + "loss": 2.2243, + "step": 2841 + }, + { + "epoch": 0.03, + "grad_norm": 73.99523471135356, + "learning_rate": 1.139879759519038e-05, + "loss": 1.9539, + "step": 2844 + }, + { + "epoch": 0.03, + "grad_norm": 21.080593558631474, + "learning_rate": 1.1410821643286573e-05, + "loss": 2.3099, + "step": 2847 + }, + { + "epoch": 0.03, + "grad_norm": 18.17811930155207, + "learning_rate": 1.1422845691382767e-05, + "loss": 1.9163, + "step": 2850 + }, + { + "epoch": 0.03, + "grad_norm": 272.15244403890216, + "learning_rate": 1.143486973947896e-05, + "loss": 2.1694, + "step": 2853 + }, + { + "epoch": 0.03, + "grad_norm": 42.32860956459698, + "learning_rate": 1.144689378757515e-05, + "loss": 2.1703, + "step": 2856 + }, + { + "epoch": 0.03, + "grad_norm": 279.1443163582572, + "learning_rate": 1.1458917835671343e-05, + "loss": 2.1993, + "step": 2859 + }, + { + "epoch": 0.03, + "grad_norm": 36.937081805159664, + "learning_rate": 1.1470941883767537e-05, + "loss": 2.0128, + "step": 2862 + }, + { + "epoch": 0.03, + "grad_norm": 22.2218046749011, + "learning_rate": 1.1482965931863728e-05, + "loss": 1.9434, + "step": 2865 + }, + { + "epoch": 0.03, + "grad_norm": 12.103024841229791, + "learning_rate": 1.149498997995992e-05, + "loss": 1.8783, + "step": 2868 + }, + { + "epoch": 0.03, + "grad_norm": 34.82056219137405, + "learning_rate": 1.1507014028056113e-05, + "loss": 2.5216, + "step": 2871 + }, + { + "epoch": 0.03, + "grad_norm": 22.033121500019753, + "learning_rate": 1.1519038076152304e-05, + "loss": 2.0794, + "step": 2874 + }, + { + "epoch": 0.03, + "grad_norm": 29.380204957718146, + "learning_rate": 1.1531062124248498e-05, + "loss": 2.1685, + "step": 2877 + }, + { + "epoch": 0.03, + "grad_norm": 21.561503917774594, + "learning_rate": 1.154308617234469e-05, + "loss": 2.1852, + "step": 2880 + }, + { + "epoch": 0.03, + "grad_norm": 35.70797923291189, + "learning_rate": 1.1555110220440883e-05, + "loss": 2.5246, + "step": 2883 + }, + { + "epoch": 0.03, + "grad_norm": 49.02100043618382, + "learning_rate": 1.1567134268537074e-05, + "loss": 2.5586, + "step": 2886 + }, + { + "epoch": 0.03, + "grad_norm": 21.16831649237732, + "learning_rate": 1.1579158316633268e-05, + "loss": 1.9974, + "step": 2889 + }, + { + "epoch": 0.03, + "grad_norm": 85.16685431960111, + "learning_rate": 1.159118236472946e-05, + "loss": 2.2212, + "step": 2892 + }, + { + "epoch": 0.03, + "grad_norm": 70.24358460534013, + "learning_rate": 1.1603206412825651e-05, + "loss": 2.2389, + "step": 2895 + }, + { + "epoch": 0.03, + "grad_norm": 24.441964059170658, + "learning_rate": 1.1615230460921844e-05, + "loss": 1.8467, + "step": 2898 + }, + { + "epoch": 0.03, + "grad_norm": 202.80203703655755, + "learning_rate": 1.1627254509018038e-05, + "loss": 1.9127, + "step": 2901 + }, + { + "epoch": 0.03, + "grad_norm": 50.73811168088825, + "learning_rate": 1.1639278557114229e-05, + "loss": 2.0997, + "step": 2904 + }, + { + "epoch": 0.03, + "grad_norm": 29.26138994435052, + "learning_rate": 1.1651302605210421e-05, + "loss": 2.2194, + "step": 2907 + }, + { + "epoch": 0.03, + "grad_norm": 31.280247880303623, + "learning_rate": 1.1663326653306616e-05, + "loss": 2.3547, + "step": 2910 + }, + { + "epoch": 0.04, + "grad_norm": 105.56774923945284, + "learning_rate": 1.1675350701402806e-05, + "loss": 1.7314, + "step": 2913 + }, + { + "epoch": 0.04, + "grad_norm": 38.65272892619328, + "learning_rate": 1.1687374749498999e-05, + "loss": 1.9631, + "step": 2916 + }, + { + "epoch": 0.04, + "grad_norm": 19.241612653099672, + "learning_rate": 1.1699398797595191e-05, + "loss": 2.0624, + "step": 2919 + }, + { + "epoch": 0.04, + "grad_norm": 29.00426255528619, + "learning_rate": 1.1711422845691386e-05, + "loss": 2.6357, + "step": 2922 + }, + { + "epoch": 0.04, + "grad_norm": 58.81415274686089, + "learning_rate": 1.1723446893787576e-05, + "loss": 2.2241, + "step": 2925 + }, + { + "epoch": 0.04, + "grad_norm": 10.640456885922726, + "learning_rate": 1.1735470941883769e-05, + "loss": 2.1465, + "step": 2928 + }, + { + "epoch": 0.04, + "grad_norm": 118.882482168951, + "learning_rate": 1.1747494989979961e-05, + "loss": 2.6958, + "step": 2931 + }, + { + "epoch": 0.04, + "grad_norm": 38.22083941625432, + "learning_rate": 1.1759519038076152e-05, + "loss": 1.4938, + "step": 2934 + }, + { + "epoch": 0.04, + "grad_norm": 78.64316514853016, + "learning_rate": 1.1771543086172346e-05, + "loss": 2.4119, + "step": 2937 + }, + { + "epoch": 0.04, + "grad_norm": 57.31624419606516, + "learning_rate": 1.1783567134268539e-05, + "loss": 2.1019, + "step": 2940 + }, + { + "epoch": 0.04, + "grad_norm": 50.955503648523994, + "learning_rate": 1.179559118236473e-05, + "loss": 2.0556, + "step": 2943 + }, + { + "epoch": 0.04, + "grad_norm": 44.38888575111979, + "learning_rate": 1.1807615230460922e-05, + "loss": 1.9788, + "step": 2946 + }, + { + "epoch": 0.04, + "grad_norm": 80.73889165861293, + "learning_rate": 1.1819639278557116e-05, + "loss": 1.8868, + "step": 2949 + }, + { + "epoch": 0.04, + "grad_norm": 22.364468023607515, + "learning_rate": 1.1831663326653307e-05, + "loss": 1.8521, + "step": 2952 + }, + { + "epoch": 0.04, + "grad_norm": 45.90414104928122, + "learning_rate": 1.18436873747495e-05, + "loss": 2.0539, + "step": 2955 + }, + { + "epoch": 0.04, + "grad_norm": 78.35280455440659, + "learning_rate": 1.1855711422845692e-05, + "loss": 2.0627, + "step": 2958 + }, + { + "epoch": 0.04, + "grad_norm": 39.563467314460325, + "learning_rate": 1.1867735470941886e-05, + "loss": 2.1586, + "step": 2961 + }, + { + "epoch": 0.04, + "grad_norm": 15.582651468042668, + "learning_rate": 1.1879759519038077e-05, + "loss": 2.0098, + "step": 2964 + }, + { + "epoch": 0.04, + "grad_norm": 26.34946978280521, + "learning_rate": 1.189178356713427e-05, + "loss": 2.9478, + "step": 2967 + }, + { + "epoch": 0.04, + "grad_norm": 56.258125765844966, + "learning_rate": 1.1903807615230462e-05, + "loss": 2.1906, + "step": 2970 + }, + { + "epoch": 0.04, + "grad_norm": 56.819262134071494, + "learning_rate": 1.1915831663326653e-05, + "loss": 2.2292, + "step": 2973 + }, + { + "epoch": 0.04, + "grad_norm": 22.6144440428598, + "learning_rate": 1.1927855711422847e-05, + "loss": 2.2002, + "step": 2976 + }, + { + "epoch": 0.04, + "grad_norm": 23.368107066304322, + "learning_rate": 1.193987975951904e-05, + "loss": 1.7279, + "step": 2979 + }, + { + "epoch": 0.04, + "grad_norm": 26.274959540922357, + "learning_rate": 1.195190380761523e-05, + "loss": 2.0099, + "step": 2982 + }, + { + "epoch": 0.04, + "grad_norm": 38.82316862956786, + "learning_rate": 1.1963927855711423e-05, + "loss": 1.6322, + "step": 2985 + }, + { + "epoch": 0.04, + "grad_norm": 30.486215137692156, + "learning_rate": 1.1975951903807617e-05, + "loss": 1.9319, + "step": 2988 + }, + { + "epoch": 0.04, + "grad_norm": 118.47256883191024, + "learning_rate": 1.198797595190381e-05, + "loss": 1.9434, + "step": 2991 + }, + { + "epoch": 0.04, + "grad_norm": 41.50612192166678, + "learning_rate": 1.2e-05, + "loss": 2.2419, + "step": 2994 + }, + { + "epoch": 0.04, + "grad_norm": 64.47049981966119, + "learning_rate": 1.2012024048096193e-05, + "loss": 1.4187, + "step": 2997 + }, + { + "epoch": 0.04, + "grad_norm": 48.513156588673425, + "learning_rate": 1.2024048096192387e-05, + "loss": 1.763, + "step": 3000 + }, + { + "epoch": 0.04, + "grad_norm": 101.56692419420763, + "learning_rate": 1.2036072144288578e-05, + "loss": 1.6329, + "step": 3003 + }, + { + "epoch": 0.04, + "grad_norm": 18.82814783173403, + "learning_rate": 1.204809619238477e-05, + "loss": 2.0622, + "step": 3006 + }, + { + "epoch": 0.04, + "grad_norm": 28.811323115005283, + "learning_rate": 1.2060120240480964e-05, + "loss": 2.3105, + "step": 3009 + }, + { + "epoch": 0.04, + "grad_norm": 46.98872947844506, + "learning_rate": 1.2072144288577155e-05, + "loss": 1.8318, + "step": 3012 + }, + { + "epoch": 0.04, + "grad_norm": 152.92801078566345, + "learning_rate": 1.2084168336673348e-05, + "loss": 2.1214, + "step": 3015 + }, + { + "epoch": 0.04, + "grad_norm": 192.86272700522102, + "learning_rate": 1.209619238476954e-05, + "loss": 2.1073, + "step": 3018 + }, + { + "epoch": 0.04, + "grad_norm": 49.22637361000531, + "learning_rate": 1.2108216432865731e-05, + "loss": 1.9541, + "step": 3021 + }, + { + "epoch": 0.04, + "grad_norm": 60.14474145092943, + "learning_rate": 1.2120240480961925e-05, + "loss": 2.1107, + "step": 3024 + }, + { + "epoch": 0.04, + "grad_norm": 29.08792605983944, + "learning_rate": 1.2132264529058118e-05, + "loss": 2.0487, + "step": 3027 + }, + { + "epoch": 0.04, + "grad_norm": 33.89793526411535, + "learning_rate": 1.214428857715431e-05, + "loss": 2.0185, + "step": 3030 + }, + { + "epoch": 0.04, + "grad_norm": 73.51730840920159, + "learning_rate": 1.2156312625250501e-05, + "loss": 2.5967, + "step": 3033 + }, + { + "epoch": 0.04, + "grad_norm": 14.585902808799105, + "learning_rate": 1.2168336673346695e-05, + "loss": 1.9028, + "step": 3036 + }, + { + "epoch": 0.04, + "grad_norm": 37.70236828989346, + "learning_rate": 1.2180360721442888e-05, + "loss": 2.1771, + "step": 3039 + }, + { + "epoch": 0.04, + "grad_norm": 37.045412009977845, + "learning_rate": 1.2192384769539079e-05, + "loss": 1.6798, + "step": 3042 + }, + { + "epoch": 0.04, + "grad_norm": 387.51421053561904, + "learning_rate": 1.2204408817635271e-05, + "loss": 1.8036, + "step": 3045 + }, + { + "epoch": 0.04, + "grad_norm": 37.02378083202324, + "learning_rate": 1.2216432865731465e-05, + "loss": 1.8249, + "step": 3048 + }, + { + "epoch": 0.04, + "grad_norm": 33.17017122040483, + "learning_rate": 1.2228456913827656e-05, + "loss": 1.6227, + "step": 3051 + }, + { + "epoch": 0.04, + "grad_norm": 36.403353905285655, + "learning_rate": 1.2240480961923848e-05, + "loss": 2.1608, + "step": 3054 + }, + { + "epoch": 0.04, + "grad_norm": 39.57682483221775, + "learning_rate": 1.2252505010020041e-05, + "loss": 1.7628, + "step": 3057 + }, + { + "epoch": 0.04, + "grad_norm": 12.967044832286712, + "learning_rate": 1.2264529058116232e-05, + "loss": 2.0867, + "step": 3060 + }, + { + "epoch": 0.04, + "grad_norm": 14.644923485149567, + "learning_rate": 1.2276553106212426e-05, + "loss": 1.9788, + "step": 3063 + }, + { + "epoch": 0.04, + "grad_norm": 43.37497518128024, + "learning_rate": 1.2288577154308618e-05, + "loss": 2.277, + "step": 3066 + }, + { + "epoch": 0.04, + "grad_norm": 46.077238023783615, + "learning_rate": 1.2300601202404811e-05, + "loss": 2.4681, + "step": 3069 + }, + { + "epoch": 0.04, + "grad_norm": 48.40024806313909, + "learning_rate": 1.2312625250501002e-05, + "loss": 1.9642, + "step": 3072 + }, + { + "epoch": 0.04, + "grad_norm": 92.68343209054572, + "learning_rate": 1.2324649298597196e-05, + "loss": 2.2415, + "step": 3075 + }, + { + "epoch": 0.04, + "grad_norm": 47.79123398391671, + "learning_rate": 1.2336673346693388e-05, + "loss": 2.159, + "step": 3078 + }, + { + "epoch": 0.04, + "grad_norm": 13.791525044441316, + "learning_rate": 1.234869739478958e-05, + "loss": 1.7353, + "step": 3081 + }, + { + "epoch": 0.04, + "grad_norm": 18.979657783437293, + "learning_rate": 1.2360721442885772e-05, + "loss": 2.171, + "step": 3084 + }, + { + "epoch": 0.04, + "grad_norm": 18.995206259792344, + "learning_rate": 1.2372745490981966e-05, + "loss": 2.3773, + "step": 3087 + }, + { + "epoch": 0.04, + "grad_norm": 36.72806401083114, + "learning_rate": 1.2384769539078157e-05, + "loss": 1.8204, + "step": 3090 + }, + { + "epoch": 0.04, + "grad_norm": 24.267240785142715, + "learning_rate": 1.239679358717435e-05, + "loss": 1.682, + "step": 3093 + }, + { + "epoch": 0.04, + "grad_norm": 31.518072908591048, + "learning_rate": 1.2408817635270542e-05, + "loss": 2.3104, + "step": 3096 + }, + { + "epoch": 0.04, + "grad_norm": 56.880536113254955, + "learning_rate": 1.2420841683366736e-05, + "loss": 1.7375, + "step": 3099 + }, + { + "epoch": 0.04, + "grad_norm": 39.65950556403013, + "learning_rate": 1.2432865731462927e-05, + "loss": 2.3973, + "step": 3102 + }, + { + "epoch": 0.04, + "grad_norm": 8.869145893431705, + "learning_rate": 1.244488977955912e-05, + "loss": 2.1356, + "step": 3105 + }, + { + "epoch": 0.04, + "grad_norm": 14.493538039088037, + "learning_rate": 1.2456913827655313e-05, + "loss": 1.6934, + "step": 3108 + }, + { + "epoch": 0.04, + "grad_norm": 5.396047536228811, + "learning_rate": 1.2468937875751504e-05, + "loss": 2.1382, + "step": 3111 + }, + { + "epoch": 0.04, + "grad_norm": 92.72613656831508, + "learning_rate": 1.2480961923847697e-05, + "loss": 1.9698, + "step": 3114 + }, + { + "epoch": 0.04, + "grad_norm": 63.43508731615822, + "learning_rate": 1.249298597194389e-05, + "loss": 2.1203, + "step": 3117 + }, + { + "epoch": 0.04, + "grad_norm": 21.467790324730323, + "learning_rate": 1.250501002004008e-05, + "loss": 2.4387, + "step": 3120 + }, + { + "epoch": 0.04, + "grad_norm": 23.523041917150024, + "learning_rate": 1.2517034068136274e-05, + "loss": 1.7765, + "step": 3123 + }, + { + "epoch": 0.04, + "grad_norm": 63.720537810797154, + "learning_rate": 1.2529058116232467e-05, + "loss": 1.9851, + "step": 3126 + }, + { + "epoch": 0.04, + "grad_norm": 28.686245660223054, + "learning_rate": 1.2541082164328657e-05, + "loss": 2.2083, + "step": 3129 + }, + { + "epoch": 0.04, + "grad_norm": 40.918038409294915, + "learning_rate": 1.255310621242485e-05, + "loss": 1.5583, + "step": 3132 + }, + { + "epoch": 0.04, + "grad_norm": 23.265529996652262, + "learning_rate": 1.2565130260521044e-05, + "loss": 2.3364, + "step": 3135 + }, + { + "epoch": 0.04, + "grad_norm": 29.868876133083365, + "learning_rate": 1.2577154308617237e-05, + "loss": 2.0749, + "step": 3138 + }, + { + "epoch": 0.04, + "grad_norm": 31.659944009366715, + "learning_rate": 1.2589178356713427e-05, + "loss": 2.097, + "step": 3141 + }, + { + "epoch": 0.04, + "grad_norm": 81.29832137488786, + "learning_rate": 1.260120240480962e-05, + "loss": 1.8633, + "step": 3144 + }, + { + "epoch": 0.04, + "grad_norm": 17.478486116728583, + "learning_rate": 1.2613226452905814e-05, + "loss": 1.8049, + "step": 3147 + }, + { + "epoch": 0.04, + "grad_norm": 64.28150362883574, + "learning_rate": 1.2625250501002005e-05, + "loss": 2.0424, + "step": 3150 + }, + { + "epoch": 0.04, + "grad_norm": 8.049910529481643, + "learning_rate": 1.2637274549098197e-05, + "loss": 1.6195, + "step": 3153 + }, + { + "epoch": 0.04, + "grad_norm": 14.128578794730565, + "learning_rate": 1.264929859719439e-05, + "loss": 1.8522, + "step": 3156 + }, + { + "epoch": 0.04, + "grad_norm": 37.75047437208797, + "learning_rate": 1.266132264529058e-05, + "loss": 1.7306, + "step": 3159 + }, + { + "epoch": 0.04, + "grad_norm": 14.645509016401434, + "learning_rate": 1.2673346693386775e-05, + "loss": 1.3399, + "step": 3162 + }, + { + "epoch": 0.04, + "grad_norm": 125.10286177135954, + "learning_rate": 1.2685370741482967e-05, + "loss": 1.8149, + "step": 3165 + }, + { + "epoch": 0.04, + "grad_norm": 42.54511773397862, + "learning_rate": 1.2697394789579158e-05, + "loss": 1.9614, + "step": 3168 + }, + { + "epoch": 0.04, + "grad_norm": 58.03985150781448, + "learning_rate": 1.270941883767535e-05, + "loss": 2.0697, + "step": 3171 + }, + { + "epoch": 0.04, + "grad_norm": 67.30045007359826, + "learning_rate": 1.2721442885771545e-05, + "loss": 2.3953, + "step": 3174 + }, + { + "epoch": 0.04, + "grad_norm": 60.54630129900434, + "learning_rate": 1.2733466933867737e-05, + "loss": 2.0176, + "step": 3177 + }, + { + "epoch": 0.04, + "grad_norm": 74.76006985611781, + "learning_rate": 1.2745490981963928e-05, + "loss": 2.1649, + "step": 3180 + }, + { + "epoch": 0.04, + "grad_norm": 55.96295562493014, + "learning_rate": 1.275751503006012e-05, + "loss": 1.8827, + "step": 3183 + }, + { + "epoch": 0.04, + "grad_norm": 148.69081388364805, + "learning_rate": 1.2769539078156315e-05, + "loss": 2.1071, + "step": 3186 + }, + { + "epoch": 0.04, + "grad_norm": 45.35709281378158, + "learning_rate": 1.2781563126252506e-05, + "loss": 1.7415, + "step": 3189 + }, + { + "epoch": 0.04, + "grad_norm": 107.76930031786378, + "learning_rate": 1.2793587174348698e-05, + "loss": 1.9141, + "step": 3192 + }, + { + "epoch": 0.04, + "grad_norm": 84.33632286207869, + "learning_rate": 1.280561122244489e-05, + "loss": 2.5262, + "step": 3195 + }, + { + "epoch": 0.04, + "grad_norm": 104.67822233440914, + "learning_rate": 1.2817635270541083e-05, + "loss": 1.9832, + "step": 3198 + }, + { + "epoch": 0.04, + "grad_norm": 424.9591284730644, + "learning_rate": 1.2829659318637276e-05, + "loss": 2.1424, + "step": 3201 + }, + { + "epoch": 0.04, + "grad_norm": 76.08958341357935, + "learning_rate": 1.2841683366733468e-05, + "loss": 2.4131, + "step": 3204 + }, + { + "epoch": 0.04, + "grad_norm": 214.2695887773906, + "learning_rate": 1.2853707414829662e-05, + "loss": 2.306, + "step": 3207 + }, + { + "epoch": 0.04, + "grad_norm": 36.44066697586963, + "learning_rate": 1.2865731462925853e-05, + "loss": 2.3817, + "step": 3210 + }, + { + "epoch": 0.04, + "grad_norm": 60.15223367820422, + "learning_rate": 1.2877755511022046e-05, + "loss": 2.0283, + "step": 3213 + }, + { + "epoch": 0.04, + "grad_norm": 90.26676723340299, + "learning_rate": 1.2889779559118238e-05, + "loss": 2.0586, + "step": 3216 + }, + { + "epoch": 0.04, + "grad_norm": 55.551934508287964, + "learning_rate": 1.2901803607214429e-05, + "loss": 2.4295, + "step": 3219 + }, + { + "epoch": 0.04, + "grad_norm": 14.962013508130388, + "learning_rate": 1.2913827655310623e-05, + "loss": 2.9657, + "step": 3222 + }, + { + "epoch": 0.04, + "grad_norm": 84.13799594678463, + "learning_rate": 1.2925851703406816e-05, + "loss": 1.9501, + "step": 3225 + }, + { + "epoch": 0.04, + "grad_norm": 19.116746502978625, + "learning_rate": 1.2937875751503006e-05, + "loss": 2.2946, + "step": 3228 + }, + { + "epoch": 0.04, + "grad_norm": 39.31743914364749, + "learning_rate": 1.2949899799599199e-05, + "loss": 2.0554, + "step": 3231 + }, + { + "epoch": 0.04, + "grad_norm": 15.5172791601549, + "learning_rate": 1.2961923847695393e-05, + "loss": 2.1639, + "step": 3234 + }, + { + "epoch": 0.04, + "grad_norm": 105.0133333520971, + "learning_rate": 1.2973947895791584e-05, + "loss": 2.2075, + "step": 3237 + }, + { + "epoch": 0.04, + "grad_norm": 293.1097661468001, + "learning_rate": 1.2985971943887776e-05, + "loss": 1.9916, + "step": 3240 + }, + { + "epoch": 0.04, + "grad_norm": 55.55654862254406, + "learning_rate": 1.2997995991983969e-05, + "loss": 2.5166, + "step": 3243 + }, + { + "epoch": 0.04, + "grad_norm": 78.91818805698729, + "learning_rate": 1.3010020040080163e-05, + "loss": 2.4681, + "step": 3246 + }, + { + "epoch": 0.04, + "grad_norm": 73.0775730409853, + "learning_rate": 1.3022044088176354e-05, + "loss": 2.4166, + "step": 3249 + }, + { + "epoch": 0.04, + "grad_norm": 12.20979885185021, + "learning_rate": 1.3034068136272546e-05, + "loss": 2.1392, + "step": 3252 + }, + { + "epoch": 0.04, + "grad_norm": 22.706071342785098, + "learning_rate": 1.3046092184368739e-05, + "loss": 2.5832, + "step": 3255 + }, + { + "epoch": 0.04, + "grad_norm": 14.713260410720334, + "learning_rate": 1.305811623246493e-05, + "loss": 2.3378, + "step": 3258 + }, + { + "epoch": 0.04, + "grad_norm": 72.61473083136242, + "learning_rate": 1.3070140280561124e-05, + "loss": 2.52, + "step": 3261 + }, + { + "epoch": 0.04, + "grad_norm": 29.78935790195438, + "learning_rate": 1.3082164328657316e-05, + "loss": 1.9855, + "step": 3264 + }, + { + "epoch": 0.04, + "grad_norm": 25.148876947536756, + "learning_rate": 1.3094188376753507e-05, + "loss": 2.1012, + "step": 3267 + }, + { + "epoch": 0.04, + "grad_norm": 22.23632722380777, + "learning_rate": 1.31062124248497e-05, + "loss": 2.1517, + "step": 3270 + }, + { + "epoch": 0.04, + "grad_norm": 59.69996175432443, + "learning_rate": 1.3118236472945894e-05, + "loss": 2.544, + "step": 3273 + }, + { + "epoch": 0.04, + "grad_norm": 30.188445177639476, + "learning_rate": 1.3130260521042085e-05, + "loss": 2.1973, + "step": 3276 + }, + { + "epoch": 0.04, + "grad_norm": 93.59283194470176, + "learning_rate": 1.3142284569138277e-05, + "loss": 2.4021, + "step": 3279 + }, + { + "epoch": 0.04, + "grad_norm": 55.43089842563764, + "learning_rate": 1.315430861723447e-05, + "loss": 2.349, + "step": 3282 + }, + { + "epoch": 0.04, + "grad_norm": 73.20912016240624, + "learning_rate": 1.3166332665330664e-05, + "loss": 2.3507, + "step": 3285 + }, + { + "epoch": 0.04, + "grad_norm": 29.062451290268587, + "learning_rate": 1.3178356713426855e-05, + "loss": 2.2886, + "step": 3288 + }, + { + "epoch": 0.04, + "grad_norm": 20.171830445681092, + "learning_rate": 1.3190380761523047e-05, + "loss": 1.9408, + "step": 3291 + }, + { + "epoch": 0.04, + "grad_norm": 9.62029988317502, + "learning_rate": 1.320240480961924e-05, + "loss": 1.829, + "step": 3294 + }, + { + "epoch": 0.04, + "grad_norm": 42.53825489278587, + "learning_rate": 1.3214428857715432e-05, + "loss": 1.8904, + "step": 3297 + }, + { + "epoch": 0.04, + "grad_norm": 80.21987076034542, + "learning_rate": 1.3226452905811624e-05, + "loss": 2.4819, + "step": 3300 + }, + { + "epoch": 0.04, + "grad_norm": 30.10195936694513, + "learning_rate": 1.3238476953907817e-05, + "loss": 1.8817, + "step": 3303 + }, + { + "epoch": 0.04, + "grad_norm": 105.47301400325891, + "learning_rate": 1.3250501002004008e-05, + "loss": 2.4352, + "step": 3306 + }, + { + "epoch": 0.04, + "grad_norm": 28.550132035871762, + "learning_rate": 1.3262525050100202e-05, + "loss": 2.2825, + "step": 3309 + }, + { + "epoch": 0.04, + "grad_norm": 26.724472888156686, + "learning_rate": 1.3274549098196394e-05, + "loss": 2.0765, + "step": 3312 + }, + { + "epoch": 0.04, + "grad_norm": 13.608833811665557, + "learning_rate": 1.3286573146292587e-05, + "loss": 2.425, + "step": 3315 + }, + { + "epoch": 0.04, + "grad_norm": 37.90607444215086, + "learning_rate": 1.3298597194388778e-05, + "loss": 2.4665, + "step": 3318 + }, + { + "epoch": 0.04, + "grad_norm": 17.4106970001608, + "learning_rate": 1.3310621242484972e-05, + "loss": 2.0422, + "step": 3321 + }, + { + "epoch": 0.04, + "grad_norm": 43.76352912997015, + "learning_rate": 1.3322645290581164e-05, + "loss": 2.009, + "step": 3324 + }, + { + "epoch": 0.04, + "grad_norm": 40.56711547076464, + "learning_rate": 1.3334669338677355e-05, + "loss": 1.6766, + "step": 3327 + }, + { + "epoch": 0.04, + "grad_norm": 30.73114517223061, + "learning_rate": 1.3346693386773548e-05, + "loss": 2.2292, + "step": 3330 + }, + { + "epoch": 0.04, + "grad_norm": 31.060741547307046, + "learning_rate": 1.3358717434869742e-05, + "loss": 2.0021, + "step": 3333 + }, + { + "epoch": 0.04, + "grad_norm": 51.83852582544694, + "learning_rate": 1.3370741482965933e-05, + "loss": 2.0467, + "step": 3336 + }, + { + "epoch": 0.04, + "grad_norm": 50.127930819800525, + "learning_rate": 1.3382765531062125e-05, + "loss": 1.8237, + "step": 3339 + }, + { + "epoch": 0.04, + "grad_norm": 20.071752218001198, + "learning_rate": 1.3394789579158318e-05, + "loss": 1.918, + "step": 3342 + }, + { + "epoch": 0.04, + "grad_norm": 48.86823102775433, + "learning_rate": 1.3406813627254509e-05, + "loss": 2.1325, + "step": 3345 + }, + { + "epoch": 0.04, + "grad_norm": 88.42106168875782, + "learning_rate": 1.3418837675350703e-05, + "loss": 2.3179, + "step": 3348 + }, + { + "epoch": 0.04, + "grad_norm": 40.99559816952981, + "learning_rate": 1.3430861723446895e-05, + "loss": 1.7919, + "step": 3351 + }, + { + "epoch": 0.04, + "grad_norm": 88.24131335308952, + "learning_rate": 1.3442885771543088e-05, + "loss": 2.152, + "step": 3354 + }, + { + "epoch": 0.04, + "grad_norm": 93.19469361597773, + "learning_rate": 1.3454909819639278e-05, + "loss": 2.0624, + "step": 3357 + }, + { + "epoch": 0.04, + "grad_norm": 66.91547799755152, + "learning_rate": 1.3466933867735473e-05, + "loss": 2.4425, + "step": 3360 + }, + { + "epoch": 0.04, + "grad_norm": 28.264079763199724, + "learning_rate": 1.3478957915831665e-05, + "loss": 2.068, + "step": 3363 + }, + { + "epoch": 0.04, + "grad_norm": 10.888030724477733, + "learning_rate": 1.3490981963927856e-05, + "loss": 2.7799, + "step": 3366 + }, + { + "epoch": 0.04, + "grad_norm": 59.65853107812815, + "learning_rate": 1.3503006012024048e-05, + "loss": 2.3719, + "step": 3369 + }, + { + "epoch": 0.04, + "grad_norm": 114.77997697531003, + "learning_rate": 1.3515030060120243e-05, + "loss": 2.4635, + "step": 3372 + }, + { + "epoch": 0.04, + "grad_norm": 51.26481156089919, + "learning_rate": 1.3527054108216433e-05, + "loss": 2.3894, + "step": 3375 + }, + { + "epoch": 0.04, + "grad_norm": 52.7858183007887, + "learning_rate": 1.3539078156312626e-05, + "loss": 1.9629, + "step": 3378 + }, + { + "epoch": 0.04, + "grad_norm": 57.00844874204393, + "learning_rate": 1.3551102204408818e-05, + "loss": 1.8175, + "step": 3381 + }, + { + "epoch": 0.04, + "grad_norm": 146.80410866956166, + "learning_rate": 1.356312625250501e-05, + "loss": 1.9117, + "step": 3384 + }, + { + "epoch": 0.04, + "grad_norm": 55.048341567499364, + "learning_rate": 1.3575150300601203e-05, + "loss": 1.6921, + "step": 3387 + }, + { + "epoch": 0.04, + "grad_norm": 775.3183142731551, + "learning_rate": 1.3587174348697396e-05, + "loss": 1.855, + "step": 3390 + }, + { + "epoch": 0.04, + "grad_norm": 91.18556991146355, + "learning_rate": 1.3599198396793588e-05, + "loss": 1.4896, + "step": 3393 + }, + { + "epoch": 0.04, + "grad_norm": 42.82008966601201, + "learning_rate": 1.3611222444889781e-05, + "loss": 1.7384, + "step": 3396 + }, + { + "epoch": 0.04, + "grad_norm": 37.90549592726892, + "learning_rate": 1.3623246492985973e-05, + "loss": 2.1306, + "step": 3399 + }, + { + "epoch": 0.04, + "grad_norm": 84.8511666533189, + "learning_rate": 1.3635270541082166e-05, + "loss": 2.304, + "step": 3402 + }, + { + "epoch": 0.04, + "grad_norm": 28.27210070963916, + "learning_rate": 1.3647294589178357e-05, + "loss": 2.1226, + "step": 3405 + }, + { + "epoch": 0.04, + "grad_norm": 87.36933082535042, + "learning_rate": 1.3659318637274551e-05, + "loss": 2.0343, + "step": 3408 + }, + { + "epoch": 0.04, + "grad_norm": 61.804712498522115, + "learning_rate": 1.3671342685370743e-05, + "loss": 2.1676, + "step": 3411 + }, + { + "epoch": 0.04, + "grad_norm": 34.208527024951046, + "learning_rate": 1.3683366733466934e-05, + "loss": 2.1943, + "step": 3414 + }, + { + "epoch": 0.04, + "grad_norm": 40.29467226357898, + "learning_rate": 1.3695390781563127e-05, + "loss": 2.1565, + "step": 3417 + }, + { + "epoch": 0.04, + "grad_norm": 36.26534165003002, + "learning_rate": 1.3707414829659321e-05, + "loss": 1.984, + "step": 3420 + }, + { + "epoch": 0.04, + "grad_norm": 75.29109895796888, + "learning_rate": 1.3719438877755513e-05, + "loss": 1.872, + "step": 3423 + }, + { + "epoch": 0.04, + "grad_norm": 49.37675331239792, + "learning_rate": 1.3731462925851704e-05, + "loss": 1.7174, + "step": 3426 + }, + { + "epoch": 0.04, + "grad_norm": 12.464923331378825, + "learning_rate": 1.3743486973947897e-05, + "loss": 2.2206, + "step": 3429 + }, + { + "epoch": 0.04, + "grad_norm": 12.303081372213635, + "learning_rate": 1.375551102204409e-05, + "loss": 1.8014, + "step": 3432 + }, + { + "epoch": 0.04, + "grad_norm": 41.445985794040965, + "learning_rate": 1.3767535070140282e-05, + "loss": 2.0389, + "step": 3435 + }, + { + "epoch": 0.04, + "grad_norm": 13.998985825472412, + "learning_rate": 1.3779559118236474e-05, + "loss": 1.8617, + "step": 3438 + }, + { + "epoch": 0.04, + "grad_norm": 15.988735065432904, + "learning_rate": 1.3791583166332667e-05, + "loss": 1.8911, + "step": 3441 + }, + { + "epoch": 0.04, + "grad_norm": 10.87722720047889, + "learning_rate": 1.3803607214428857e-05, + "loss": 1.9018, + "step": 3444 + }, + { + "epoch": 0.04, + "grad_norm": 31.394757417843863, + "learning_rate": 1.3815631262525052e-05, + "loss": 2.0299, + "step": 3447 + }, + { + "epoch": 0.04, + "grad_norm": 13.551049885069101, + "learning_rate": 1.3827655310621244e-05, + "loss": 2.059, + "step": 3450 + }, + { + "epoch": 0.04, + "grad_norm": 86.30331204146053, + "learning_rate": 1.3839679358717435e-05, + "loss": 2.1745, + "step": 3453 + }, + { + "epoch": 0.04, + "grad_norm": 25.035951261612222, + "learning_rate": 1.3851703406813627e-05, + "loss": 1.5279, + "step": 3456 + }, + { + "epoch": 0.04, + "grad_norm": 14.081562355975088, + "learning_rate": 1.3863727454909822e-05, + "loss": 1.9793, + "step": 3459 + }, + { + "epoch": 0.04, + "grad_norm": 34.23062679668549, + "learning_rate": 1.3875751503006014e-05, + "loss": 1.7591, + "step": 3462 + }, + { + "epoch": 0.04, + "grad_norm": 49.17064458581782, + "learning_rate": 1.3887775551102205e-05, + "loss": 2.1921, + "step": 3465 + }, + { + "epoch": 0.04, + "grad_norm": 29.78501175664652, + "learning_rate": 1.3899799599198397e-05, + "loss": 1.7984, + "step": 3468 + }, + { + "epoch": 0.04, + "grad_norm": 35.434242353005054, + "learning_rate": 1.3911823647294592e-05, + "loss": 1.7136, + "step": 3471 + }, + { + "epoch": 0.04, + "grad_norm": 10.60046533703103, + "learning_rate": 1.3923847695390782e-05, + "loss": 2.1609, + "step": 3474 + }, + { + "epoch": 0.04, + "grad_norm": 14.582279177309676, + "learning_rate": 1.3935871743486975e-05, + "loss": 1.9137, + "step": 3477 + }, + { + "epoch": 0.04, + "grad_norm": 13.937479734470301, + "learning_rate": 1.3947895791583167e-05, + "loss": 2.0173, + "step": 3480 + }, + { + "epoch": 0.04, + "grad_norm": 20.8907578373733, + "learning_rate": 1.3959919839679358e-05, + "loss": 1.5627, + "step": 3483 + }, + { + "epoch": 0.04, + "grad_norm": 3.056848733328155, + "learning_rate": 1.3971943887775552e-05, + "loss": 1.7682, + "step": 3486 + }, + { + "epoch": 0.04, + "grad_norm": 5.434300380854475, + "learning_rate": 1.3983967935871745e-05, + "loss": 1.7131, + "step": 3489 + }, + { + "epoch": 0.04, + "grad_norm": 10.137259800449689, + "learning_rate": 1.3995991983967937e-05, + "loss": 1.7264, + "step": 3492 + }, + { + "epoch": 0.04, + "grad_norm": 113.33170860651046, + "learning_rate": 1.400801603206413e-05, + "loss": 2.5182, + "step": 3495 + }, + { + "epoch": 0.04, + "grad_norm": 22.097452403876954, + "learning_rate": 1.4020040080160322e-05, + "loss": 1.9678, + "step": 3498 + }, + { + "epoch": 0.04, + "grad_norm": 20.384856014915087, + "learning_rate": 1.4032064128256515e-05, + "loss": 2.22, + "step": 3501 + }, + { + "epoch": 0.04, + "grad_norm": 37.10375901154673, + "learning_rate": 1.4044088176352706e-05, + "loss": 2.2961, + "step": 3504 + }, + { + "epoch": 0.04, + "grad_norm": 12.51835820972121, + "learning_rate": 1.40561122244489e-05, + "loss": 1.8238, + "step": 3507 + }, + { + "epoch": 0.04, + "grad_norm": 18.199387133540174, + "learning_rate": 1.4068136272545092e-05, + "loss": 1.4281, + "step": 3510 + }, + { + "epoch": 0.04, + "grad_norm": 24.478297877773812, + "learning_rate": 1.4080160320641283e-05, + "loss": 1.7033, + "step": 3513 + }, + { + "epoch": 0.04, + "grad_norm": 57.572466258244596, + "learning_rate": 1.4092184368737476e-05, + "loss": 1.9486, + "step": 3516 + }, + { + "epoch": 0.04, + "grad_norm": 94.40863629779238, + "learning_rate": 1.410420841683367e-05, + "loss": 2.3005, + "step": 3519 + }, + { + "epoch": 0.04, + "grad_norm": 116.60222235926825, + "learning_rate": 1.411623246492986e-05, + "loss": 2.0977, + "step": 3522 + }, + { + "epoch": 0.04, + "grad_norm": 14.035438006162604, + "learning_rate": 1.4128256513026053e-05, + "loss": 1.8084, + "step": 3525 + }, + { + "epoch": 0.04, + "grad_norm": 11.602059414927718, + "learning_rate": 1.4140280561122246e-05, + "loss": 2.214, + "step": 3528 + }, + { + "epoch": 0.04, + "grad_norm": 9.974092055688144, + "learning_rate": 1.415230460921844e-05, + "loss": 1.8535, + "step": 3531 + }, + { + "epoch": 0.04, + "grad_norm": 9.83510277837429, + "learning_rate": 1.416432865731463e-05, + "loss": 1.8979, + "step": 3534 + }, + { + "epoch": 0.04, + "grad_norm": 33.7907331624132, + "learning_rate": 1.4176352705410823e-05, + "loss": 2.0294, + "step": 3537 + }, + { + "epoch": 0.04, + "grad_norm": 16.440318616846096, + "learning_rate": 1.4188376753507016e-05, + "loss": 2.1126, + "step": 3540 + }, + { + "epoch": 0.04, + "grad_norm": 17.121444079205638, + "learning_rate": 1.4200400801603206e-05, + "loss": 2.2145, + "step": 3543 + }, + { + "epoch": 0.04, + "grad_norm": 13.064063087109169, + "learning_rate": 1.42124248496994e-05, + "loss": 2.3326, + "step": 3546 + }, + { + "epoch": 0.04, + "grad_norm": 5.1697764653980816, + "learning_rate": 1.4224448897795593e-05, + "loss": 1.6304, + "step": 3549 + }, + { + "epoch": 0.04, + "grad_norm": 43.71258679689917, + "learning_rate": 1.4236472945891784e-05, + "loss": 2.4259, + "step": 3552 + }, + { + "epoch": 0.04, + "grad_norm": 17.458140894321918, + "learning_rate": 1.4248496993987976e-05, + "loss": 1.5395, + "step": 3555 + }, + { + "epoch": 0.04, + "grad_norm": 17.477230112202253, + "learning_rate": 1.426052104208417e-05, + "loss": 2.1462, + "step": 3558 + }, + { + "epoch": 0.04, + "grad_norm": 5.857297696459638, + "learning_rate": 1.4272545090180361e-05, + "loss": 1.996, + "step": 3561 + }, + { + "epoch": 0.04, + "grad_norm": 38.09454319122642, + "learning_rate": 1.4284569138276554e-05, + "loss": 2.2741, + "step": 3564 + }, + { + "epoch": 0.04, + "grad_norm": 18.48391170541746, + "learning_rate": 1.4296593186372746e-05, + "loss": 2.0271, + "step": 3567 + }, + { + "epoch": 0.04, + "grad_norm": 21.265998115669724, + "learning_rate": 1.430861723446894e-05, + "loss": 2.5777, + "step": 3570 + }, + { + "epoch": 0.04, + "grad_norm": 17.541184975609255, + "learning_rate": 1.4320641282565131e-05, + "loss": 2.0383, + "step": 3573 + }, + { + "epoch": 0.04, + "grad_norm": 5.429716305635274, + "learning_rate": 1.4332665330661324e-05, + "loss": 1.9937, + "step": 3576 + }, + { + "epoch": 0.04, + "grad_norm": 41.33380191687909, + "learning_rate": 1.4344689378757516e-05, + "loss": 1.8789, + "step": 3579 + }, + { + "epoch": 0.04, + "grad_norm": 27.77401650133088, + "learning_rate": 1.4356713426853707e-05, + "loss": 1.6156, + "step": 3582 + }, + { + "epoch": 0.04, + "grad_norm": 32.7646156821679, + "learning_rate": 1.4368737474949901e-05, + "loss": 1.9855, + "step": 3585 + }, + { + "epoch": 0.04, + "grad_norm": 15.724311629544228, + "learning_rate": 1.4380761523046094e-05, + "loss": 1.9239, + "step": 3588 + }, + { + "epoch": 0.04, + "grad_norm": 70.64370729734158, + "learning_rate": 1.4392785571142285e-05, + "loss": 1.8674, + "step": 3591 + }, + { + "epoch": 0.04, + "grad_norm": 25.026869295841205, + "learning_rate": 1.4404809619238479e-05, + "loss": 2.0523, + "step": 3594 + }, + { + "epoch": 0.04, + "grad_norm": 25.10707379039676, + "learning_rate": 1.4416833667334671e-05, + "loss": 1.9623, + "step": 3597 + }, + { + "epoch": 0.04, + "grad_norm": 36.208768788672366, + "learning_rate": 1.4428857715430864e-05, + "loss": 2.0048, + "step": 3600 + }, + { + "epoch": 0.04, + "grad_norm": 24.50709083546949, + "learning_rate": 1.4440881763527055e-05, + "loss": 2.2661, + "step": 3603 + }, + { + "epoch": 0.04, + "grad_norm": 43.15006426365885, + "learning_rate": 1.4452905811623249e-05, + "loss": 1.8853, + "step": 3606 + }, + { + "epoch": 0.04, + "grad_norm": 13.320946703399864, + "learning_rate": 1.4464929859719441e-05, + "loss": 2.1208, + "step": 3609 + }, + { + "epoch": 0.04, + "grad_norm": 24.129433844764353, + "learning_rate": 1.4476953907815632e-05, + "loss": 2.516, + "step": 3612 + }, + { + "epoch": 0.04, + "grad_norm": 68.34214760983413, + "learning_rate": 1.4488977955911824e-05, + "loss": 2.224, + "step": 3615 + }, + { + "epoch": 0.04, + "grad_norm": 28.005677677781875, + "learning_rate": 1.4501002004008019e-05, + "loss": 1.6991, + "step": 3618 + }, + { + "epoch": 0.04, + "grad_norm": 34.272906342709604, + "learning_rate": 1.451302605210421e-05, + "loss": 1.8099, + "step": 3621 + }, + { + "epoch": 0.04, + "grad_norm": 63.66786230909972, + "learning_rate": 1.4525050100200402e-05, + "loss": 1.7711, + "step": 3624 + }, + { + "epoch": 0.04, + "grad_norm": 15.520438675723613, + "learning_rate": 1.4537074148296594e-05, + "loss": 2.0048, + "step": 3627 + }, + { + "epoch": 0.04, + "grad_norm": 10.478328475902227, + "learning_rate": 1.4549098196392785e-05, + "loss": 2.1057, + "step": 3630 + }, + { + "epoch": 0.04, + "grad_norm": 48.61998248452434, + "learning_rate": 1.456112224448898e-05, + "loss": 1.7845, + "step": 3633 + }, + { + "epoch": 0.04, + "grad_norm": 13.979074352777562, + "learning_rate": 1.4573146292585172e-05, + "loss": 1.9749, + "step": 3636 + }, + { + "epoch": 0.04, + "grad_norm": 35.40456020591634, + "learning_rate": 1.4585170340681364e-05, + "loss": 2.1437, + "step": 3639 + }, + { + "epoch": 0.04, + "grad_norm": 15.30781938266023, + "learning_rate": 1.4597194388777555e-05, + "loss": 1.9972, + "step": 3642 + }, + { + "epoch": 0.04, + "grad_norm": 23.938094769630222, + "learning_rate": 1.460921843687375e-05, + "loss": 2.1407, + "step": 3645 + }, + { + "epoch": 0.04, + "grad_norm": 32.5168885968296, + "learning_rate": 1.4621242484969942e-05, + "loss": 1.616, + "step": 3648 + }, + { + "epoch": 0.04, + "grad_norm": 20.295116812237282, + "learning_rate": 1.4633266533066133e-05, + "loss": 2.7506, + "step": 3651 + }, + { + "epoch": 0.04, + "grad_norm": 9.925107485112072, + "learning_rate": 1.4645290581162325e-05, + "loss": 2.1813, + "step": 3654 + }, + { + "epoch": 0.04, + "grad_norm": 23.675117272237625, + "learning_rate": 1.465731462925852e-05, + "loss": 2.0312, + "step": 3657 + }, + { + "epoch": 0.04, + "grad_norm": 56.93872420231626, + "learning_rate": 1.466933867735471e-05, + "loss": 2.05, + "step": 3660 + }, + { + "epoch": 0.04, + "grad_norm": 21.79683056655725, + "learning_rate": 1.4681362725450903e-05, + "loss": 1.9462, + "step": 3663 + }, + { + "epoch": 0.04, + "grad_norm": 12.593915425983512, + "learning_rate": 1.4693386773547095e-05, + "loss": 1.6012, + "step": 3666 + }, + { + "epoch": 0.04, + "grad_norm": 35.26238920221428, + "learning_rate": 1.4705410821643286e-05, + "loss": 1.6273, + "step": 3669 + }, + { + "epoch": 0.04, + "grad_norm": 26.722025855108964, + "learning_rate": 1.471743486973948e-05, + "loss": 1.6305, + "step": 3672 + }, + { + "epoch": 0.04, + "grad_norm": 11.670955123255307, + "learning_rate": 1.4729458917835673e-05, + "loss": 1.5736, + "step": 3675 + }, + { + "epoch": 0.04, + "grad_norm": 23.215544940824483, + "learning_rate": 1.4741482965931865e-05, + "loss": 1.9548, + "step": 3678 + }, + { + "epoch": 0.04, + "grad_norm": 15.86019209602385, + "learning_rate": 1.4753507014028056e-05, + "loss": 1.8108, + "step": 3681 + }, + { + "epoch": 0.04, + "grad_norm": 167.61198434876698, + "learning_rate": 1.476553106212425e-05, + "loss": 2.0804, + "step": 3684 + }, + { + "epoch": 0.04, + "grad_norm": 67.33978803854833, + "learning_rate": 1.4777555110220443e-05, + "loss": 2.1711, + "step": 3687 + }, + { + "epoch": 0.04, + "grad_norm": 29.02772365914923, + "learning_rate": 1.4789579158316633e-05, + "loss": 1.7275, + "step": 3690 + }, + { + "epoch": 0.04, + "grad_norm": 25.111318760702368, + "learning_rate": 1.4801603206412828e-05, + "loss": 1.9204, + "step": 3693 + }, + { + "epoch": 0.04, + "grad_norm": 31.06523642487202, + "learning_rate": 1.481362725450902e-05, + "loss": 2.0521, + "step": 3696 + }, + { + "epoch": 0.04, + "grad_norm": 26.734509145761407, + "learning_rate": 1.4825651302605211e-05, + "loss": 2.3733, + "step": 3699 + }, + { + "epoch": 0.04, + "grad_norm": 13.890329000700467, + "learning_rate": 1.4837675350701403e-05, + "loss": 1.7437, + "step": 3702 + }, + { + "epoch": 0.04, + "grad_norm": 12.990711722886614, + "learning_rate": 1.4849699398797598e-05, + "loss": 1.9134, + "step": 3705 + }, + { + "epoch": 0.04, + "grad_norm": 33.52357517536722, + "learning_rate": 1.486172344689379e-05, + "loss": 1.8092, + "step": 3708 + }, + { + "epoch": 0.04, + "grad_norm": 44.75260690419426, + "learning_rate": 1.4873747494989981e-05, + "loss": 1.7632, + "step": 3711 + }, + { + "epoch": 0.04, + "grad_norm": 62.81819149163785, + "learning_rate": 1.4885771543086173e-05, + "loss": 2.0752, + "step": 3714 + }, + { + "epoch": 0.04, + "grad_norm": 30.688027467631674, + "learning_rate": 1.4897795591182368e-05, + "loss": 1.886, + "step": 3717 + }, + { + "epoch": 0.04, + "grad_norm": 27.128515611656386, + "learning_rate": 1.4909819639278558e-05, + "loss": 2.2952, + "step": 3720 + }, + { + "epoch": 0.04, + "grad_norm": 18.40544329634798, + "learning_rate": 1.4921843687374751e-05, + "loss": 1.9753, + "step": 3723 + }, + { + "epoch": 0.04, + "grad_norm": 45.921913893572636, + "learning_rate": 1.4933867735470943e-05, + "loss": 1.5863, + "step": 3726 + }, + { + "epoch": 0.04, + "grad_norm": 61.9178415267817, + "learning_rate": 1.4945891783567134e-05, + "loss": 2.0451, + "step": 3729 + }, + { + "epoch": 0.04, + "grad_norm": 15.50935811622142, + "learning_rate": 1.4957915831663328e-05, + "loss": 1.4759, + "step": 3732 + }, + { + "epoch": 0.04, + "grad_norm": 9.255553785919794, + "learning_rate": 1.496993987975952e-05, + "loss": 1.9567, + "step": 3735 + }, + { + "epoch": 0.04, + "grad_norm": 23.63317383480672, + "learning_rate": 1.4981963927855712e-05, + "loss": 2.0155, + "step": 3738 + }, + { + "epoch": 0.04, + "grad_norm": 24.49891284384977, + "learning_rate": 1.4993987975951904e-05, + "loss": 1.9779, + "step": 3741 + }, + { + "epoch": 0.05, + "grad_norm": 17.46129910508676, + "learning_rate": 1.5006012024048098e-05, + "loss": 1.5002, + "step": 3744 + }, + { + "epoch": 0.05, + "grad_norm": 13.752299691144248, + "learning_rate": 1.501803607214429e-05, + "loss": 2.0382, + "step": 3747 + }, + { + "epoch": 0.05, + "grad_norm": 20.710341388227054, + "learning_rate": 1.5030060120240482e-05, + "loss": 1.6613, + "step": 3750 + }, + { + "epoch": 0.05, + "grad_norm": 10.043955676859722, + "learning_rate": 1.5042084168336674e-05, + "loss": 1.99, + "step": 3753 + }, + { + "epoch": 0.05, + "grad_norm": 137.2304337881877, + "learning_rate": 1.5054108216432868e-05, + "loss": 1.8201, + "step": 3756 + }, + { + "epoch": 0.05, + "grad_norm": 46.01655960771264, + "learning_rate": 1.5066132264529059e-05, + "loss": 2.2668, + "step": 3759 + }, + { + "epoch": 0.05, + "grad_norm": 33.705024105613106, + "learning_rate": 1.5078156312625252e-05, + "loss": 1.5833, + "step": 3762 + }, + { + "epoch": 0.05, + "grad_norm": 45.20972797956708, + "learning_rate": 1.5090180360721444e-05, + "loss": 1.8054, + "step": 3765 + }, + { + "epoch": 0.05, + "grad_norm": 22.576535363924474, + "learning_rate": 1.5102204408817635e-05, + "loss": 1.6201, + "step": 3768 + }, + { + "epoch": 0.05, + "grad_norm": 8.073914750143398, + "learning_rate": 1.5114228456913829e-05, + "loss": 1.5063, + "step": 3771 + }, + { + "epoch": 0.05, + "grad_norm": 26.755388900388727, + "learning_rate": 1.5126252505010022e-05, + "loss": 1.8588, + "step": 3774 + }, + { + "epoch": 0.05, + "grad_norm": 23.834789769515787, + "learning_rate": 1.5138276553106212e-05, + "loss": 2.0936, + "step": 3777 + }, + { + "epoch": 0.05, + "grad_norm": 53.73750075654907, + "learning_rate": 1.5150300601202405e-05, + "loss": 2.1906, + "step": 3780 + }, + { + "epoch": 0.05, + "grad_norm": 20.293325370777932, + "learning_rate": 1.5162324649298599e-05, + "loss": 1.8874, + "step": 3783 + }, + { + "epoch": 0.05, + "grad_norm": 72.78418061417085, + "learning_rate": 1.5174348697394792e-05, + "loss": 2.0097, + "step": 3786 + }, + { + "epoch": 0.05, + "grad_norm": 20.23515091157938, + "learning_rate": 1.5186372745490982e-05, + "loss": 1.7356, + "step": 3789 + }, + { + "epoch": 0.05, + "grad_norm": 9.818034038669552, + "learning_rate": 1.5198396793587175e-05, + "loss": 1.638, + "step": 3792 + }, + { + "epoch": 0.05, + "grad_norm": 18.45510593774744, + "learning_rate": 1.5210420841683369e-05, + "loss": 1.9949, + "step": 3795 + }, + { + "epoch": 0.05, + "grad_norm": 29.871554412310836, + "learning_rate": 1.522244488977956e-05, + "loss": 1.8862, + "step": 3798 + }, + { + "epoch": 0.05, + "grad_norm": 11.712880075697171, + "learning_rate": 1.5234468937875752e-05, + "loss": 1.7572, + "step": 3801 + }, + { + "epoch": 0.05, + "grad_norm": 59.899570495331425, + "learning_rate": 1.5246492985971947e-05, + "loss": 2.4268, + "step": 3804 + }, + { + "epoch": 0.05, + "grad_norm": 13.103297884258556, + "learning_rate": 1.5258517034068137e-05, + "loss": 1.7269, + "step": 3807 + }, + { + "epoch": 0.05, + "grad_norm": 82.65466479394655, + "learning_rate": 1.5270541082164328e-05, + "loss": 2.0454, + "step": 3810 + }, + { + "epoch": 0.05, + "grad_norm": 35.26612513142114, + "learning_rate": 1.5282565130260524e-05, + "loss": 2.3512, + "step": 3813 + }, + { + "epoch": 0.05, + "grad_norm": 18.016021400484984, + "learning_rate": 1.5294589178356716e-05, + "loss": 1.6069, + "step": 3816 + }, + { + "epoch": 0.05, + "grad_norm": 89.54678047550924, + "learning_rate": 1.5306613226452906e-05, + "loss": 2.312, + "step": 3819 + }, + { + "epoch": 0.05, + "grad_norm": 58.64529628712617, + "learning_rate": 1.5318637274549098e-05, + "loss": 1.5601, + "step": 3822 + }, + { + "epoch": 0.05, + "grad_norm": 48.76790729981377, + "learning_rate": 1.5330661322645294e-05, + "loss": 2.0142, + "step": 3825 + }, + { + "epoch": 0.05, + "grad_norm": 37.37693275544608, + "learning_rate": 1.5342685370741483e-05, + "loss": 2.1238, + "step": 3828 + }, + { + "epoch": 0.05, + "grad_norm": 19.36807944556632, + "learning_rate": 1.5354709418837676e-05, + "loss": 1.6716, + "step": 3831 + }, + { + "epoch": 0.05, + "grad_norm": 105.63600441323342, + "learning_rate": 1.5366733466933868e-05, + "loss": 1.7368, + "step": 3834 + }, + { + "epoch": 0.05, + "grad_norm": 18.20263349326058, + "learning_rate": 1.537875751503006e-05, + "loss": 2.2212, + "step": 3837 + }, + { + "epoch": 0.05, + "grad_norm": 28.16502942823268, + "learning_rate": 1.5390781563126253e-05, + "loss": 2.1495, + "step": 3840 + }, + { + "epoch": 0.05, + "grad_norm": 17.295773536188978, + "learning_rate": 1.5402805611222446e-05, + "loss": 1.8656, + "step": 3843 + }, + { + "epoch": 0.05, + "grad_norm": 78.34490325073402, + "learning_rate": 1.5414829659318638e-05, + "loss": 1.9025, + "step": 3846 + }, + { + "epoch": 0.05, + "grad_norm": 35.933922406462464, + "learning_rate": 1.542685370741483e-05, + "loss": 2.0519, + "step": 3849 + }, + { + "epoch": 0.05, + "grad_norm": 33.333552836937, + "learning_rate": 1.5438877755511023e-05, + "loss": 1.7891, + "step": 3852 + }, + { + "epoch": 0.05, + "grad_norm": 12.200435960023029, + "learning_rate": 1.5450901803607216e-05, + "loss": 1.9989, + "step": 3855 + }, + { + "epoch": 0.05, + "grad_norm": 86.7447743894861, + "learning_rate": 1.5462925851703408e-05, + "loss": 1.9113, + "step": 3858 + }, + { + "epoch": 0.05, + "grad_norm": 38.96476583822671, + "learning_rate": 1.54749498997996e-05, + "loss": 2.1181, + "step": 3861 + }, + { + "epoch": 0.05, + "grad_norm": 22.024220527592508, + "learning_rate": 1.5486973947895793e-05, + "loss": 1.8352, + "step": 3864 + }, + { + "epoch": 0.05, + "grad_norm": 44.82813751798613, + "learning_rate": 1.5498997995991985e-05, + "loss": 2.2593, + "step": 3867 + }, + { + "epoch": 0.05, + "grad_norm": 51.35357903026922, + "learning_rate": 1.5511022044088178e-05, + "loss": 1.9412, + "step": 3870 + }, + { + "epoch": 0.05, + "grad_norm": 25.6497200427772, + "learning_rate": 1.552304609218437e-05, + "loss": 1.814, + "step": 3873 + }, + { + "epoch": 0.05, + "grad_norm": 63.16987429706914, + "learning_rate": 1.5535070140280563e-05, + "loss": 1.9696, + "step": 3876 + }, + { + "epoch": 0.05, + "grad_norm": 26.976621107545537, + "learning_rate": 1.5547094188376755e-05, + "loss": 2.0152, + "step": 3879 + }, + { + "epoch": 0.05, + "grad_norm": 58.7014905297746, + "learning_rate": 1.5559118236472948e-05, + "loss": 1.9122, + "step": 3882 + }, + { + "epoch": 0.05, + "grad_norm": 37.96348279721806, + "learning_rate": 1.5571142284569137e-05, + "loss": 1.8953, + "step": 3885 + }, + { + "epoch": 0.05, + "grad_norm": 11.576976913389327, + "learning_rate": 1.5583166332665333e-05, + "loss": 1.4515, + "step": 3888 + }, + { + "epoch": 0.05, + "grad_norm": 50.409964193845, + "learning_rate": 1.5595190380761525e-05, + "loss": 1.8573, + "step": 3891 + }, + { + "epoch": 0.05, + "grad_norm": 9.939856982735991, + "learning_rate": 1.5607214428857718e-05, + "loss": 2.2131, + "step": 3894 + }, + { + "epoch": 0.05, + "grad_norm": 53.65957525552612, + "learning_rate": 1.5619238476953907e-05, + "loss": 1.9536, + "step": 3897 + }, + { + "epoch": 0.05, + "grad_norm": 36.1993476873999, + "learning_rate": 1.5631262525050103e-05, + "loss": 2.1333, + "step": 3900 + }, + { + "epoch": 0.05, + "grad_norm": 40.55255986841424, + "learning_rate": 1.5643286573146295e-05, + "loss": 1.9481, + "step": 3903 + }, + { + "epoch": 0.05, + "grad_norm": 8.256614144652639, + "learning_rate": 1.5655310621242485e-05, + "loss": 2.1957, + "step": 3906 + }, + { + "epoch": 0.05, + "grad_norm": 77.1844584802892, + "learning_rate": 1.5667334669338677e-05, + "loss": 1.7886, + "step": 3909 + }, + { + "epoch": 0.05, + "grad_norm": 29.427282252167785, + "learning_rate": 1.5679358717434873e-05, + "loss": 2.0788, + "step": 3912 + }, + { + "epoch": 0.05, + "grad_norm": 62.16007280373901, + "learning_rate": 1.5691382765531062e-05, + "loss": 1.7662, + "step": 3915 + }, + { + "epoch": 0.05, + "grad_norm": 243.23015078072058, + "learning_rate": 1.5703406813627254e-05, + "loss": 2.363, + "step": 3918 + }, + { + "epoch": 0.05, + "grad_norm": 28.045746905932557, + "learning_rate": 1.5715430861723447e-05, + "loss": 1.9098, + "step": 3921 + }, + { + "epoch": 0.05, + "grad_norm": 14.810456569106837, + "learning_rate": 1.5727454909819643e-05, + "loss": 1.705, + "step": 3924 + }, + { + "epoch": 0.05, + "grad_norm": 16.35398908478554, + "learning_rate": 1.5739478957915832e-05, + "loss": 1.5505, + "step": 3927 + }, + { + "epoch": 0.05, + "grad_norm": 15.491767153566395, + "learning_rate": 1.5751503006012024e-05, + "loss": 2.1174, + "step": 3930 + }, + { + "epoch": 0.05, + "grad_norm": 39.466085397636064, + "learning_rate": 1.5763527054108217e-05, + "loss": 2.603, + "step": 3933 + }, + { + "epoch": 0.05, + "grad_norm": 28.379886948373287, + "learning_rate": 1.577555110220441e-05, + "loss": 2.0853, + "step": 3936 + }, + { + "epoch": 0.05, + "grad_norm": 18.910832727913714, + "learning_rate": 1.5787575150300602e-05, + "loss": 2.123, + "step": 3939 + }, + { + "epoch": 0.05, + "grad_norm": 3.6164976210777096, + "learning_rate": 1.5799599198396794e-05, + "loss": 2.1691, + "step": 3942 + }, + { + "epoch": 0.05, + "grad_norm": 81.82009581327183, + "learning_rate": 1.5811623246492987e-05, + "loss": 2.0549, + "step": 3945 + }, + { + "epoch": 0.05, + "grad_norm": 47.3464504573833, + "learning_rate": 1.582364729458918e-05, + "loss": 2.0806, + "step": 3948 + }, + { + "epoch": 0.05, + "grad_norm": 63.84633608160227, + "learning_rate": 1.5835671342685372e-05, + "loss": 2.2332, + "step": 3951 + }, + { + "epoch": 0.05, + "grad_norm": 42.223601322343875, + "learning_rate": 1.5847695390781564e-05, + "loss": 2.0046, + "step": 3954 + }, + { + "epoch": 0.05, + "grad_norm": 36.24013216165288, + "learning_rate": 1.5859719438877757e-05, + "loss": 1.9154, + "step": 3957 + }, + { + "epoch": 0.05, + "grad_norm": 33.069215746785815, + "learning_rate": 1.587174348697395e-05, + "loss": 1.8866, + "step": 3960 + }, + { + "epoch": 0.05, + "grad_norm": 16.933817094240567, + "learning_rate": 1.5883767535070142e-05, + "loss": 1.6165, + "step": 3963 + }, + { + "epoch": 0.05, + "grad_norm": 15.524537368048978, + "learning_rate": 1.5895791583166334e-05, + "loss": 2.2146, + "step": 3966 + }, + { + "epoch": 0.05, + "grad_norm": 32.29250168299097, + "learning_rate": 1.5907815631262527e-05, + "loss": 2.0138, + "step": 3969 + }, + { + "epoch": 0.05, + "grad_norm": 14.976830429160822, + "learning_rate": 1.591983967935872e-05, + "loss": 1.7762, + "step": 3972 + }, + { + "epoch": 0.05, + "grad_norm": 12.72652275814442, + "learning_rate": 1.5931863727454912e-05, + "loss": 1.858, + "step": 3975 + }, + { + "epoch": 0.05, + "grad_norm": 78.32657611984119, + "learning_rate": 1.5943887775551104e-05, + "loss": 2.1247, + "step": 3978 + }, + { + "epoch": 0.05, + "grad_norm": 24.402846905110174, + "learning_rate": 1.5955911823647297e-05, + "loss": 1.8515, + "step": 3981 + }, + { + "epoch": 0.05, + "grad_norm": 22.743793918395166, + "learning_rate": 1.5967935871743486e-05, + "loss": 1.9255, + "step": 3984 + }, + { + "epoch": 0.05, + "grad_norm": 17.174199972405823, + "learning_rate": 1.5979959919839682e-05, + "loss": 2.1777, + "step": 3987 + }, + { + "epoch": 0.05, + "grad_norm": 12.134576469617958, + "learning_rate": 1.5991983967935874e-05, + "loss": 1.8204, + "step": 3990 + }, + { + "epoch": 0.05, + "grad_norm": 24.670177074182323, + "learning_rate": 1.6004008016032063e-05, + "loss": 2.4031, + "step": 3993 + }, + { + "epoch": 0.05, + "grad_norm": 40.63048934724392, + "learning_rate": 1.6016032064128256e-05, + "loss": 2.1213, + "step": 3996 + }, + { + "epoch": 0.05, + "grad_norm": 55.85938751883672, + "learning_rate": 1.6028056112224452e-05, + "loss": 1.4972, + "step": 3999 + }, + { + "epoch": 0.05, + "grad_norm": 25.52286191131461, + "learning_rate": 1.6040080160320644e-05, + "loss": 1.908, + "step": 4002 + }, + { + "epoch": 0.05, + "grad_norm": 19.958112581739428, + "learning_rate": 1.6052104208416833e-05, + "loss": 1.9465, + "step": 4005 + }, + { + "epoch": 0.05, + "grad_norm": 15.934198816942816, + "learning_rate": 1.6064128256513026e-05, + "loss": 2.3083, + "step": 4008 + }, + { + "epoch": 0.05, + "grad_norm": 23.607663624818116, + "learning_rate": 1.6076152304609222e-05, + "loss": 2.1297, + "step": 4011 + }, + { + "epoch": 0.05, + "grad_norm": 13.566156348074058, + "learning_rate": 1.608817635270541e-05, + "loss": 2.2986, + "step": 4014 + }, + { + "epoch": 0.05, + "grad_norm": 6.689006711082873, + "learning_rate": 1.6100200400801603e-05, + "loss": 1.9019, + "step": 4017 + }, + { + "epoch": 0.05, + "grad_norm": 19.32306433884884, + "learning_rate": 1.6112224448897796e-05, + "loss": 1.8923, + "step": 4020 + }, + { + "epoch": 0.05, + "grad_norm": 70.20842545982956, + "learning_rate": 1.612424849699399e-05, + "loss": 2.1402, + "step": 4023 + }, + { + "epoch": 0.05, + "grad_norm": 51.061795601750006, + "learning_rate": 1.613627254509018e-05, + "loss": 2.2012, + "step": 4026 + }, + { + "epoch": 0.05, + "grad_norm": 24.843191550629804, + "learning_rate": 1.6148296593186373e-05, + "loss": 2.7946, + "step": 4029 + }, + { + "epoch": 0.05, + "grad_norm": 29.42750004153764, + "learning_rate": 1.6160320641282566e-05, + "loss": 1.902, + "step": 4032 + }, + { + "epoch": 0.05, + "grad_norm": 133.33039957790425, + "learning_rate": 1.617234468937876e-05, + "loss": 2.0357, + "step": 4035 + }, + { + "epoch": 0.05, + "grad_norm": 66.70320801203434, + "learning_rate": 1.618436873747495e-05, + "loss": 1.5914, + "step": 4038 + }, + { + "epoch": 0.05, + "grad_norm": 10.400170307716694, + "learning_rate": 1.6196392785571143e-05, + "loss": 1.8343, + "step": 4041 + }, + { + "epoch": 0.05, + "grad_norm": 85.24480443148272, + "learning_rate": 1.6208416833667336e-05, + "loss": 1.9544, + "step": 4044 + }, + { + "epoch": 0.05, + "grad_norm": 20.20711649796737, + "learning_rate": 1.622044088176353e-05, + "loss": 1.8255, + "step": 4047 + }, + { + "epoch": 0.05, + "grad_norm": 46.905160745131646, + "learning_rate": 1.623246492985972e-05, + "loss": 1.7479, + "step": 4050 + }, + { + "epoch": 0.05, + "grad_norm": 26.52848736318524, + "learning_rate": 1.6244488977955913e-05, + "loss": 1.6626, + "step": 4053 + }, + { + "epoch": 0.05, + "grad_norm": 14.837883751226455, + "learning_rate": 1.6256513026052106e-05, + "loss": 1.9271, + "step": 4056 + }, + { + "epoch": 0.05, + "grad_norm": 11.275895867372386, + "learning_rate": 1.6268537074148298e-05, + "loss": 1.9179, + "step": 4059 + }, + { + "epoch": 0.05, + "grad_norm": 7.758457352639368, + "learning_rate": 1.628056112224449e-05, + "loss": 1.762, + "step": 4062 + }, + { + "epoch": 0.05, + "grad_norm": 27.260303596786393, + "learning_rate": 1.6292585170340683e-05, + "loss": 1.8724, + "step": 4065 + }, + { + "epoch": 0.05, + "grad_norm": 10.744596092864597, + "learning_rate": 1.6304609218436876e-05, + "loss": 1.587, + "step": 4068 + }, + { + "epoch": 0.05, + "grad_norm": 33.607850005259536, + "learning_rate": 1.6316633266533068e-05, + "loss": 2.0815, + "step": 4071 + }, + { + "epoch": 0.05, + "grad_norm": 68.11549250195722, + "learning_rate": 1.632865731462926e-05, + "loss": 1.6273, + "step": 4074 + }, + { + "epoch": 0.05, + "grad_norm": 4.546256130317312, + "learning_rate": 1.6340681362725453e-05, + "loss": 1.6752, + "step": 4077 + }, + { + "epoch": 0.05, + "grad_norm": 6.136168341604495, + "learning_rate": 1.6352705410821646e-05, + "loss": 2.1136, + "step": 4080 + }, + { + "epoch": 0.05, + "grad_norm": 14.225094409623509, + "learning_rate": 1.6364729458917835e-05, + "loss": 1.9658, + "step": 4083 + }, + { + "epoch": 0.05, + "grad_norm": 5.553381946151004, + "learning_rate": 1.637675350701403e-05, + "loss": 1.6138, + "step": 4086 + }, + { + "epoch": 0.05, + "grad_norm": 16.834461983144706, + "learning_rate": 1.6388777555110223e-05, + "loss": 1.8483, + "step": 4089 + }, + { + "epoch": 0.05, + "grad_norm": 42.07411390181226, + "learning_rate": 1.6400801603206412e-05, + "loss": 1.9718, + "step": 4092 + }, + { + "epoch": 0.05, + "grad_norm": 59.409519333573, + "learning_rate": 1.6412825651302605e-05, + "loss": 2.1976, + "step": 4095 + }, + { + "epoch": 0.05, + "grad_norm": 24.941576142930487, + "learning_rate": 1.64248496993988e-05, + "loss": 2.2362, + "step": 4098 + }, + { + "epoch": 0.05, + "grad_norm": 9.144220249570287, + "learning_rate": 1.6436873747494993e-05, + "loss": 1.5924, + "step": 4101 + }, + { + "epoch": 0.05, + "grad_norm": 30.577679402329384, + "learning_rate": 1.6448897795591182e-05, + "loss": 2.4901, + "step": 4104 + }, + { + "epoch": 0.05, + "grad_norm": 4.760531783902572, + "learning_rate": 1.6460921843687375e-05, + "loss": 1.9762, + "step": 4107 + }, + { + "epoch": 0.05, + "grad_norm": 31.992861336981612, + "learning_rate": 1.647294589178357e-05, + "loss": 1.7123, + "step": 4110 + }, + { + "epoch": 0.05, + "grad_norm": 10.758723338269569, + "learning_rate": 1.648496993987976e-05, + "loss": 1.8626, + "step": 4113 + }, + { + "epoch": 0.05, + "grad_norm": 32.17920548900995, + "learning_rate": 1.6496993987975952e-05, + "loss": 1.8787, + "step": 4116 + }, + { + "epoch": 0.05, + "grad_norm": 6.9629323566847505, + "learning_rate": 1.6509018036072145e-05, + "loss": 1.9163, + "step": 4119 + }, + { + "epoch": 0.05, + "grad_norm": 16.177847498528685, + "learning_rate": 1.6521042084168337e-05, + "loss": 1.8935, + "step": 4122 + }, + { + "epoch": 0.05, + "grad_norm": 14.692410046757953, + "learning_rate": 1.653306613226453e-05, + "loss": 1.9665, + "step": 4125 + }, + { + "epoch": 0.05, + "grad_norm": 35.467176069041535, + "learning_rate": 1.6545090180360722e-05, + "loss": 2.0239, + "step": 4128 + }, + { + "epoch": 0.05, + "grad_norm": 21.267142755547408, + "learning_rate": 1.6557114228456915e-05, + "loss": 2.0583, + "step": 4131 + }, + { + "epoch": 0.05, + "grad_norm": 30.869524814852547, + "learning_rate": 1.6569138276553107e-05, + "loss": 1.8722, + "step": 4134 + }, + { + "epoch": 0.05, + "grad_norm": 26.39890278716576, + "learning_rate": 1.65811623246493e-05, + "loss": 1.8284, + "step": 4137 + }, + { + "epoch": 0.05, + "grad_norm": 91.25439425624879, + "learning_rate": 1.6593186372745492e-05, + "loss": 1.7791, + "step": 4140 + }, + { + "epoch": 0.05, + "grad_norm": 8.705831628724605, + "learning_rate": 1.6605210420841685e-05, + "loss": 1.6912, + "step": 4143 + }, + { + "epoch": 0.05, + "grad_norm": 11.399955768758065, + "learning_rate": 1.6617234468937877e-05, + "loss": 1.9587, + "step": 4146 + }, + { + "epoch": 0.05, + "grad_norm": 4.448610384399344, + "learning_rate": 1.662925851703407e-05, + "loss": 1.8245, + "step": 4149 + }, + { + "epoch": 0.05, + "grad_norm": 21.06074174201894, + "learning_rate": 1.6641282565130262e-05, + "loss": 2.2266, + "step": 4152 + }, + { + "epoch": 0.05, + "grad_norm": 8.560812588536479, + "learning_rate": 1.6653306613226455e-05, + "loss": 2.1029, + "step": 4155 + }, + { + "epoch": 0.05, + "grad_norm": 36.135026041937834, + "learning_rate": 1.6665330661322647e-05, + "loss": 2.0095, + "step": 4158 + }, + { + "epoch": 0.05, + "grad_norm": 34.490234747109604, + "learning_rate": 1.667735470941884e-05, + "loss": 1.626, + "step": 4161 + }, + { + "epoch": 0.05, + "grad_norm": 11.119724715245662, + "learning_rate": 1.6689378757515032e-05, + "loss": 1.7624, + "step": 4164 + }, + { + "epoch": 0.05, + "grad_norm": 42.44098860977609, + "learning_rate": 1.6701402805611225e-05, + "loss": 1.9484, + "step": 4167 + }, + { + "epoch": 0.05, + "grad_norm": 27.020779173213032, + "learning_rate": 1.6713426853707414e-05, + "loss": 1.9629, + "step": 4170 + }, + { + "epoch": 0.05, + "grad_norm": 12.652562404580705, + "learning_rate": 1.672545090180361e-05, + "loss": 2.1392, + "step": 4173 + }, + { + "epoch": 0.05, + "grad_norm": 28.611063118395286, + "learning_rate": 1.6737474949899802e-05, + "loss": 1.8987, + "step": 4176 + }, + { + "epoch": 0.05, + "grad_norm": 77.81859139314531, + "learning_rate": 1.6749498997995995e-05, + "loss": 1.9099, + "step": 4179 + }, + { + "epoch": 0.05, + "grad_norm": 31.030178568191968, + "learning_rate": 1.6761523046092184e-05, + "loss": 1.7433, + "step": 4182 + }, + { + "epoch": 0.05, + "grad_norm": 72.78276134910419, + "learning_rate": 1.677354709418838e-05, + "loss": 1.6317, + "step": 4185 + }, + { + "epoch": 0.05, + "grad_norm": 28.380271128471033, + "learning_rate": 1.6785571142284572e-05, + "loss": 1.6083, + "step": 4188 + }, + { + "epoch": 0.05, + "grad_norm": 50.26327230671738, + "learning_rate": 1.679759519038076e-05, + "loss": 2.0138, + "step": 4191 + }, + { + "epoch": 0.05, + "grad_norm": 56.46656015294066, + "learning_rate": 1.6809619238476954e-05, + "loss": 1.8265, + "step": 4194 + }, + { + "epoch": 0.05, + "grad_norm": 14.044688691348403, + "learning_rate": 1.682164328657315e-05, + "loss": 1.3056, + "step": 4197 + }, + { + "epoch": 0.05, + "grad_norm": 27.092263247415467, + "learning_rate": 1.683366733466934e-05, + "loss": 2.1772, + "step": 4200 + }, + { + "epoch": 0.05, + "grad_norm": 23.770054292705176, + "learning_rate": 1.684569138276553e-05, + "loss": 2.2792, + "step": 4203 + }, + { + "epoch": 0.05, + "grad_norm": 23.942094824031756, + "learning_rate": 1.6857715430861724e-05, + "loss": 1.9781, + "step": 4206 + }, + { + "epoch": 0.05, + "grad_norm": 19.81138765194315, + "learning_rate": 1.686973947895792e-05, + "loss": 1.8055, + "step": 4209 + }, + { + "epoch": 0.05, + "grad_norm": 14.580657395833668, + "learning_rate": 1.688176352705411e-05, + "loss": 1.7192, + "step": 4212 + }, + { + "epoch": 0.05, + "grad_norm": 7.712571081307015, + "learning_rate": 1.68937875751503e-05, + "loss": 1.9437, + "step": 4215 + }, + { + "epoch": 0.05, + "grad_norm": 14.616794059772365, + "learning_rate": 1.6905811623246494e-05, + "loss": 2.2491, + "step": 4218 + }, + { + "epoch": 0.05, + "grad_norm": 26.76543485276958, + "learning_rate": 1.6917835671342686e-05, + "loss": 1.9836, + "step": 4221 + }, + { + "epoch": 0.05, + "grad_norm": 10.621642095169141, + "learning_rate": 1.692985971943888e-05, + "loss": 2.045, + "step": 4224 + }, + { + "epoch": 0.05, + "grad_norm": 41.06967886753802, + "learning_rate": 1.694188376753507e-05, + "loss": 1.8448, + "step": 4227 + }, + { + "epoch": 0.05, + "grad_norm": 8.892686143524813, + "learning_rate": 1.6953907815631264e-05, + "loss": 1.6055, + "step": 4230 + }, + { + "epoch": 0.05, + "grad_norm": 10.241324511872167, + "learning_rate": 1.6965931863727456e-05, + "loss": 2.0298, + "step": 4233 + }, + { + "epoch": 0.05, + "grad_norm": 533.734405693489, + "learning_rate": 1.697795591182365e-05, + "loss": 1.8035, + "step": 4236 + }, + { + "epoch": 0.05, + "grad_norm": 290.58104327856245, + "learning_rate": 1.698997995991984e-05, + "loss": 1.8697, + "step": 4239 + }, + { + "epoch": 0.05, + "grad_norm": 28.38519387624665, + "learning_rate": 1.7002004008016034e-05, + "loss": 1.9491, + "step": 4242 + }, + { + "epoch": 0.05, + "grad_norm": 16.958790632701064, + "learning_rate": 1.7014028056112226e-05, + "loss": 1.9189, + "step": 4245 + }, + { + "epoch": 0.05, + "grad_norm": 13.116964402833371, + "learning_rate": 1.702605210420842e-05, + "loss": 1.5673, + "step": 4248 + }, + { + "epoch": 0.05, + "grad_norm": 6.675522029709578, + "learning_rate": 1.703807615230461e-05, + "loss": 1.7886, + "step": 4251 + }, + { + "epoch": 0.05, + "grad_norm": 31.00425130416206, + "learning_rate": 1.7050100200400804e-05, + "loss": 1.8666, + "step": 4254 + }, + { + "epoch": 0.05, + "grad_norm": 10.243274723952615, + "learning_rate": 1.7062124248496996e-05, + "loss": 1.9359, + "step": 4257 + }, + { + "epoch": 0.05, + "grad_norm": 35.3576744358576, + "learning_rate": 1.707414829659319e-05, + "loss": 1.9702, + "step": 4260 + }, + { + "epoch": 0.05, + "grad_norm": 45.318031484752886, + "learning_rate": 1.708617234468938e-05, + "loss": 2.0273, + "step": 4263 + }, + { + "epoch": 0.05, + "grad_norm": 29.913319761602086, + "learning_rate": 1.7098196392785574e-05, + "loss": 1.8784, + "step": 4266 + }, + { + "epoch": 0.05, + "grad_norm": 16.066412849122106, + "learning_rate": 1.7110220440881763e-05, + "loss": 1.7493, + "step": 4269 + }, + { + "epoch": 0.05, + "grad_norm": 79.07344776022137, + "learning_rate": 1.712224448897796e-05, + "loss": 1.6897, + "step": 4272 + }, + { + "epoch": 0.05, + "grad_norm": 60.738633645559865, + "learning_rate": 1.713426853707415e-05, + "loss": 1.6493, + "step": 4275 + }, + { + "epoch": 0.05, + "grad_norm": 48.135680535035746, + "learning_rate": 1.714629258517034e-05, + "loss": 1.6839, + "step": 4278 + }, + { + "epoch": 0.05, + "grad_norm": 55.37397899757658, + "learning_rate": 1.7158316633266533e-05, + "loss": 2.2115, + "step": 4281 + }, + { + "epoch": 0.05, + "grad_norm": 33.11228586094623, + "learning_rate": 1.717034068136273e-05, + "loss": 1.8591, + "step": 4284 + }, + { + "epoch": 0.05, + "grad_norm": 22.66587360976963, + "learning_rate": 1.718236472945892e-05, + "loss": 1.8594, + "step": 4287 + }, + { + "epoch": 0.05, + "grad_norm": 21.643942659303676, + "learning_rate": 1.719438877755511e-05, + "loss": 2.1479, + "step": 4290 + }, + { + "epoch": 0.05, + "grad_norm": 12.22304656057923, + "learning_rate": 1.7206412825651303e-05, + "loss": 2.0968, + "step": 4293 + }, + { + "epoch": 0.05, + "grad_norm": 6.88573197723148, + "learning_rate": 1.72184368737475e-05, + "loss": 2.2871, + "step": 4296 + }, + { + "epoch": 0.05, + "grad_norm": 15.131134029720986, + "learning_rate": 1.7230460921843688e-05, + "loss": 1.4888, + "step": 4299 + }, + { + "epoch": 0.05, + "grad_norm": 18.67112705983606, + "learning_rate": 1.724248496993988e-05, + "loss": 1.2912, + "step": 4302 + }, + { + "epoch": 0.05, + "grad_norm": 9.486093788189411, + "learning_rate": 1.7254509018036073e-05, + "loss": 1.5179, + "step": 4305 + }, + { + "epoch": 0.05, + "grad_norm": 128.73009371622584, + "learning_rate": 1.7266533066132265e-05, + "loss": 1.8014, + "step": 4308 + }, + { + "epoch": 0.05, + "grad_norm": 56.657435757730205, + "learning_rate": 1.7278557114228458e-05, + "loss": 1.6121, + "step": 4311 + }, + { + "epoch": 0.05, + "grad_norm": 63.43754571994504, + "learning_rate": 1.729058116232465e-05, + "loss": 2.1406, + "step": 4314 + }, + { + "epoch": 0.05, + "grad_norm": 72.09657331264957, + "learning_rate": 1.7302605210420843e-05, + "loss": 1.7137, + "step": 4317 + }, + { + "epoch": 0.05, + "grad_norm": 27.24365809300207, + "learning_rate": 1.7314629258517035e-05, + "loss": 1.7778, + "step": 4320 + }, + { + "epoch": 0.05, + "grad_norm": 37.872579081545034, + "learning_rate": 1.7326653306613228e-05, + "loss": 1.8721, + "step": 4323 + }, + { + "epoch": 0.05, + "grad_norm": 65.76698339288824, + "learning_rate": 1.733867735470942e-05, + "loss": 1.9348, + "step": 4326 + }, + { + "epoch": 0.05, + "grad_norm": 11.623333632288242, + "learning_rate": 1.7350701402805613e-05, + "loss": 1.9138, + "step": 4329 + }, + { + "epoch": 0.05, + "grad_norm": 15.70238043562261, + "learning_rate": 1.7362725450901805e-05, + "loss": 1.7545, + "step": 4332 + }, + { + "epoch": 0.05, + "grad_norm": 8.372339631848448, + "learning_rate": 1.7374749498997998e-05, + "loss": 2.4095, + "step": 4335 + }, + { + "epoch": 0.05, + "grad_norm": 23.010523447100088, + "learning_rate": 1.738677354709419e-05, + "loss": 1.888, + "step": 4338 + }, + { + "epoch": 0.05, + "grad_norm": 41.26080217143179, + "learning_rate": 1.7398797595190383e-05, + "loss": 2.1725, + "step": 4341 + }, + { + "epoch": 0.05, + "grad_norm": 27.010030470271833, + "learning_rate": 1.7410821643286575e-05, + "loss": 1.5485, + "step": 4344 + }, + { + "epoch": 0.05, + "grad_norm": 33.30136272789158, + "learning_rate": 1.7422845691382768e-05, + "loss": 2.2524, + "step": 4347 + }, + { + "epoch": 0.05, + "grad_norm": 22.588203722221564, + "learning_rate": 1.743486973947896e-05, + "loss": 1.9893, + "step": 4350 + }, + { + "epoch": 0.05, + "grad_norm": 91.68511262542599, + "learning_rate": 1.7446893787575153e-05, + "loss": 2.0558, + "step": 4353 + }, + { + "epoch": 0.05, + "grad_norm": 122.11297554183052, + "learning_rate": 1.7458917835671345e-05, + "loss": 1.9648, + "step": 4356 + }, + { + "epoch": 0.05, + "grad_norm": 177.40632493679627, + "learning_rate": 1.7470941883767538e-05, + "loss": 2.0228, + "step": 4359 + }, + { + "epoch": 0.05, + "grad_norm": 80.17243119953139, + "learning_rate": 1.748296593186373e-05, + "loss": 1.8318, + "step": 4362 + }, + { + "epoch": 0.05, + "grad_norm": 65.6910101887426, + "learning_rate": 1.7494989979959922e-05, + "loss": 1.9012, + "step": 4365 + }, + { + "epoch": 0.05, + "grad_norm": 25.22463052110271, + "learning_rate": 1.750701402805611e-05, + "loss": 1.4645, + "step": 4368 + }, + { + "epoch": 0.05, + "grad_norm": 43.84521834227764, + "learning_rate": 1.7519038076152307e-05, + "loss": 1.9843, + "step": 4371 + }, + { + "epoch": 0.05, + "grad_norm": 91.4677994720262, + "learning_rate": 1.75310621242485e-05, + "loss": 1.7711, + "step": 4374 + }, + { + "epoch": 0.05, + "grad_norm": 15.144439745497632, + "learning_rate": 1.754308617234469e-05, + "loss": 1.6367, + "step": 4377 + }, + { + "epoch": 0.05, + "grad_norm": 17.381239881294295, + "learning_rate": 1.755511022044088e-05, + "loss": 1.9921, + "step": 4380 + }, + { + "epoch": 0.05, + "grad_norm": 89.48395674523367, + "learning_rate": 1.7567134268537077e-05, + "loss": 2.0978, + "step": 4383 + }, + { + "epoch": 0.05, + "grad_norm": 41.4271842092746, + "learning_rate": 1.7579158316633267e-05, + "loss": 1.6608, + "step": 4386 + }, + { + "epoch": 0.05, + "grad_norm": 41.94305479786889, + "learning_rate": 1.759118236472946e-05, + "loss": 1.4976, + "step": 4389 + }, + { + "epoch": 0.05, + "grad_norm": 14.234996614468134, + "learning_rate": 1.760320641282565e-05, + "loss": 2.323, + "step": 4392 + }, + { + "epoch": 0.05, + "grad_norm": 76.20112477985327, + "learning_rate": 1.7615230460921847e-05, + "loss": 2.07, + "step": 4395 + }, + { + "epoch": 0.05, + "grad_norm": 32.83606709630899, + "learning_rate": 1.7627254509018037e-05, + "loss": 2.2925, + "step": 4398 + }, + { + "epoch": 0.05, + "grad_norm": 28.614805619698014, + "learning_rate": 1.763927855711423e-05, + "loss": 1.436, + "step": 4401 + }, + { + "epoch": 0.05, + "grad_norm": 24.560123044907378, + "learning_rate": 1.765130260521042e-05, + "loss": 1.4892, + "step": 4404 + }, + { + "epoch": 0.05, + "grad_norm": 21.518169126929855, + "learning_rate": 1.7663326653306614e-05, + "loss": 2.0034, + "step": 4407 + }, + { + "epoch": 0.05, + "grad_norm": 38.50379388182216, + "learning_rate": 1.7675350701402807e-05, + "loss": 1.7726, + "step": 4410 + }, + { + "epoch": 0.05, + "grad_norm": 11.47057001193349, + "learning_rate": 1.7687374749499e-05, + "loss": 1.8294, + "step": 4413 + }, + { + "epoch": 0.05, + "grad_norm": 28.719497177069005, + "learning_rate": 1.769939879759519e-05, + "loss": 2.1846, + "step": 4416 + }, + { + "epoch": 0.05, + "grad_norm": 48.94245420208555, + "learning_rate": 1.7711422845691384e-05, + "loss": 1.8118, + "step": 4419 + }, + { + "epoch": 0.05, + "grad_norm": 39.370475403605724, + "learning_rate": 1.7723446893787576e-05, + "loss": 1.9408, + "step": 4422 + }, + { + "epoch": 0.05, + "grad_norm": 15.711722144493221, + "learning_rate": 1.773547094188377e-05, + "loss": 2.0033, + "step": 4425 + }, + { + "epoch": 0.05, + "grad_norm": 88.03022940879912, + "learning_rate": 1.774749498997996e-05, + "loss": 2.0609, + "step": 4428 + }, + { + "epoch": 0.05, + "grad_norm": 20.94923145685123, + "learning_rate": 1.7759519038076154e-05, + "loss": 1.578, + "step": 4431 + }, + { + "epoch": 0.05, + "grad_norm": 25.15967369902125, + "learning_rate": 1.7771543086172346e-05, + "loss": 1.8838, + "step": 4434 + }, + { + "epoch": 0.05, + "grad_norm": 126.57167174692533, + "learning_rate": 1.778356713426854e-05, + "loss": 1.7967, + "step": 4437 + }, + { + "epoch": 0.05, + "grad_norm": 25.635776888303752, + "learning_rate": 1.779559118236473e-05, + "loss": 1.6541, + "step": 4440 + }, + { + "epoch": 0.05, + "grad_norm": 108.4925577333667, + "learning_rate": 1.7807615230460924e-05, + "loss": 1.7095, + "step": 4443 + }, + { + "epoch": 0.05, + "grad_norm": 150.99869197256015, + "learning_rate": 1.7819639278557116e-05, + "loss": 1.9782, + "step": 4446 + }, + { + "epoch": 0.05, + "grad_norm": 46.63536706828198, + "learning_rate": 1.783166332665331e-05, + "loss": 1.9635, + "step": 4449 + }, + { + "epoch": 0.05, + "grad_norm": 59.37179095483614, + "learning_rate": 1.78436873747495e-05, + "loss": 2.1493, + "step": 4452 + }, + { + "epoch": 0.05, + "grad_norm": 72.42778236450725, + "learning_rate": 1.785571142284569e-05, + "loss": 2.1257, + "step": 4455 + }, + { + "epoch": 0.05, + "grad_norm": 373.2272059500186, + "learning_rate": 1.7867735470941886e-05, + "loss": 1.8128, + "step": 4458 + }, + { + "epoch": 0.05, + "grad_norm": 1734.6230095468525, + "learning_rate": 1.787975951903808e-05, + "loss": 1.9488, + "step": 4461 + }, + { + "epoch": 0.05, + "grad_norm": 11.750189710255183, + "learning_rate": 1.789178356713427e-05, + "loss": 1.9093, + "step": 4464 + }, + { + "epoch": 0.05, + "grad_norm": 61.859839011071074, + "learning_rate": 1.790380761523046e-05, + "loss": 2.0456, + "step": 4467 + }, + { + "epoch": 0.05, + "grad_norm": 70.56924324517624, + "learning_rate": 1.7915831663326656e-05, + "loss": 1.6937, + "step": 4470 + }, + { + "epoch": 0.05, + "grad_norm": 39.02321633061277, + "learning_rate": 1.792785571142285e-05, + "loss": 1.7389, + "step": 4473 + }, + { + "epoch": 0.05, + "grad_norm": 43.25695667679053, + "learning_rate": 1.7939879759519038e-05, + "loss": 2.1611, + "step": 4476 + }, + { + "epoch": 0.05, + "grad_norm": 23.5360652213058, + "learning_rate": 1.795190380761523e-05, + "loss": 2.0272, + "step": 4479 + }, + { + "epoch": 0.05, + "grad_norm": 113.74030429217468, + "learning_rate": 1.7963927855711426e-05, + "loss": 1.2958, + "step": 4482 + }, + { + "epoch": 0.05, + "grad_norm": 24.787645607352534, + "learning_rate": 1.7975951903807615e-05, + "loss": 2.2467, + "step": 4485 + }, + { + "epoch": 0.05, + "grad_norm": 67.52075014978497, + "learning_rate": 1.7987975951903808e-05, + "loss": 1.7157, + "step": 4488 + }, + { + "epoch": 0.05, + "grad_norm": 45.62238507096283, + "learning_rate": 1.8e-05, + "loss": 1.6865, + "step": 4491 + }, + { + "epoch": 0.05, + "grad_norm": 172.09925496452627, + "learning_rate": 1.8012024048096193e-05, + "loss": 1.8457, + "step": 4494 + }, + { + "epoch": 0.05, + "grad_norm": 63.57580084909734, + "learning_rate": 1.8024048096192385e-05, + "loss": 1.9517, + "step": 4497 + }, + { + "epoch": 0.05, + "grad_norm": 23.193115984464832, + "learning_rate": 1.8036072144288578e-05, + "loss": 1.9443, + "step": 4500 + }, + { + "epoch": 0.05, + "grad_norm": 44.336316708894884, + "learning_rate": 1.804809619238477e-05, + "loss": 1.986, + "step": 4503 + }, + { + "epoch": 0.05, + "grad_norm": 29.303303644529908, + "learning_rate": 1.8060120240480963e-05, + "loss": 1.6808, + "step": 4506 + }, + { + "epoch": 0.05, + "grad_norm": 30.74617218186509, + "learning_rate": 1.8072144288577155e-05, + "loss": 1.5751, + "step": 4509 + }, + { + "epoch": 0.05, + "grad_norm": 64.42857525303421, + "learning_rate": 1.8084168336673348e-05, + "loss": 1.4827, + "step": 4512 + }, + { + "epoch": 0.05, + "grad_norm": 80.13950817223177, + "learning_rate": 1.809619238476954e-05, + "loss": 2.0055, + "step": 4515 + }, + { + "epoch": 0.05, + "grad_norm": 22.189827140949493, + "learning_rate": 1.8108216432865733e-05, + "loss": 2.0008, + "step": 4518 + }, + { + "epoch": 0.05, + "grad_norm": 10.949286418653429, + "learning_rate": 1.8120240480961925e-05, + "loss": 1.6165, + "step": 4521 + }, + { + "epoch": 0.05, + "grad_norm": 7.817915670743298, + "learning_rate": 1.8132264529058118e-05, + "loss": 2.1221, + "step": 4524 + }, + { + "epoch": 0.05, + "grad_norm": 23.267186182474763, + "learning_rate": 1.814428857715431e-05, + "loss": 2.0923, + "step": 4527 + }, + { + "epoch": 0.05, + "grad_norm": 12.241054833631818, + "learning_rate": 1.8156312625250503e-05, + "loss": 1.5614, + "step": 4530 + }, + { + "epoch": 0.05, + "grad_norm": 41.323342348197926, + "learning_rate": 1.8168336673346695e-05, + "loss": 1.7116, + "step": 4533 + }, + { + "epoch": 0.05, + "grad_norm": 38.54914786638831, + "learning_rate": 1.8180360721442888e-05, + "loss": 1.6639, + "step": 4536 + }, + { + "epoch": 0.05, + "grad_norm": 35.88002950642174, + "learning_rate": 1.819238476953908e-05, + "loss": 1.9984, + "step": 4539 + }, + { + "epoch": 0.05, + "grad_norm": 20.742586333467116, + "learning_rate": 1.8204408817635273e-05, + "loss": 1.7575, + "step": 4542 + }, + { + "epoch": 0.05, + "grad_norm": 7.978074615904104, + "learning_rate": 1.8216432865731465e-05, + "loss": 1.6705, + "step": 4545 + }, + { + "epoch": 0.05, + "grad_norm": 14.90687227344978, + "learning_rate": 1.8228456913827658e-05, + "loss": 1.7401, + "step": 4548 + }, + { + "epoch": 0.05, + "grad_norm": 74.90600652880782, + "learning_rate": 1.824048096192385e-05, + "loss": 1.8539, + "step": 4551 + }, + { + "epoch": 0.05, + "grad_norm": 74.74760105307355, + "learning_rate": 1.825250501002004e-05, + "loss": 1.4992, + "step": 4554 + }, + { + "epoch": 0.05, + "grad_norm": 93.95637103652767, + "learning_rate": 1.8264529058116235e-05, + "loss": 1.974, + "step": 4557 + }, + { + "epoch": 0.05, + "grad_norm": 115.04743663145598, + "learning_rate": 1.8276553106212428e-05, + "loss": 1.9539, + "step": 4560 + }, + { + "epoch": 0.05, + "grad_norm": 36.900841304416076, + "learning_rate": 1.8288577154308617e-05, + "loss": 1.9423, + "step": 4563 + }, + { + "epoch": 0.05, + "grad_norm": 28.341953799498256, + "learning_rate": 1.830060120240481e-05, + "loss": 1.897, + "step": 4566 + }, + { + "epoch": 0.05, + "grad_norm": 21.597451652166416, + "learning_rate": 1.8312625250501005e-05, + "loss": 1.7119, + "step": 4569 + }, + { + "epoch": 0.05, + "grad_norm": 30.54519529401161, + "learning_rate": 1.8324649298597198e-05, + "loss": 2.0116, + "step": 4572 + }, + { + "epoch": 0.06, + "grad_norm": 10.011546515930657, + "learning_rate": 1.8336673346693387e-05, + "loss": 1.6651, + "step": 4575 + }, + { + "epoch": 0.06, + "grad_norm": 14.348558428501054, + "learning_rate": 1.834869739478958e-05, + "loss": 2.0005, + "step": 4578 + }, + { + "epoch": 0.06, + "grad_norm": 59.588994468443644, + "learning_rate": 1.8360721442885775e-05, + "loss": 1.7912, + "step": 4581 + }, + { + "epoch": 0.06, + "grad_norm": 11.012532773104365, + "learning_rate": 1.8372745490981964e-05, + "loss": 1.9382, + "step": 4584 + }, + { + "epoch": 0.06, + "grad_norm": 5.93638830266064, + "learning_rate": 1.8384769539078157e-05, + "loss": 1.9948, + "step": 4587 + }, + { + "epoch": 0.06, + "grad_norm": 21.13560277481412, + "learning_rate": 1.839679358717435e-05, + "loss": 1.939, + "step": 4590 + }, + { + "epoch": 0.06, + "grad_norm": 34.21658209455076, + "learning_rate": 1.8408817635270542e-05, + "loss": 1.8433, + "step": 4593 + }, + { + "epoch": 0.06, + "grad_norm": 86.08813250269979, + "learning_rate": 1.8420841683366734e-05, + "loss": 2.0498, + "step": 4596 + }, + { + "epoch": 0.06, + "grad_norm": 13.09046503747893, + "learning_rate": 1.8432865731462927e-05, + "loss": 2.0946, + "step": 4599 + }, + { + "epoch": 0.06, + "grad_norm": 12.415885152162845, + "learning_rate": 1.844488977955912e-05, + "loss": 1.5301, + "step": 4602 + }, + { + "epoch": 0.06, + "grad_norm": 11.73860396343534, + "learning_rate": 1.8456913827655312e-05, + "loss": 1.999, + "step": 4605 + }, + { + "epoch": 0.06, + "grad_norm": 126.31839453651071, + "learning_rate": 1.8468937875751504e-05, + "loss": 1.5843, + "step": 4608 + }, + { + "epoch": 0.06, + "grad_norm": 11.359630362824317, + "learning_rate": 1.8480961923847697e-05, + "loss": 1.5833, + "step": 4611 + }, + { + "epoch": 0.06, + "grad_norm": 19.004568695475317, + "learning_rate": 1.849298597194389e-05, + "loss": 1.5479, + "step": 4614 + }, + { + "epoch": 0.06, + "grad_norm": 24.70576292467596, + "learning_rate": 1.8505010020040082e-05, + "loss": 1.5523, + "step": 4617 + }, + { + "epoch": 0.06, + "grad_norm": 105.31546424078392, + "learning_rate": 1.8517034068136274e-05, + "loss": 2.2708, + "step": 4620 + }, + { + "epoch": 0.06, + "grad_norm": 49.13878344253408, + "learning_rate": 1.8529058116232467e-05, + "loss": 2.3247, + "step": 4623 + }, + { + "epoch": 0.06, + "grad_norm": 369.95283317081515, + "learning_rate": 1.854108216432866e-05, + "loss": 1.6387, + "step": 4626 + }, + { + "epoch": 0.06, + "grad_norm": 30.07916524785655, + "learning_rate": 1.8553106212424852e-05, + "loss": 1.6615, + "step": 4629 + }, + { + "epoch": 0.06, + "grad_norm": 82.33512856940898, + "learning_rate": 1.8565130260521044e-05, + "loss": 2.1799, + "step": 4632 + }, + { + "epoch": 0.06, + "grad_norm": 30.814481989729106, + "learning_rate": 1.8577154308617237e-05, + "loss": 1.8675, + "step": 4635 + }, + { + "epoch": 0.06, + "grad_norm": 29.4918358494788, + "learning_rate": 1.858917835671343e-05, + "loss": 2.4015, + "step": 4638 + }, + { + "epoch": 0.06, + "grad_norm": 67.43675881721938, + "learning_rate": 1.8601202404809622e-05, + "loss": 2.4951, + "step": 4641 + }, + { + "epoch": 0.06, + "grad_norm": 97.39364177549318, + "learning_rate": 1.8613226452905814e-05, + "loss": 2.1648, + "step": 4644 + }, + { + "epoch": 0.06, + "grad_norm": 27.860794318015724, + "learning_rate": 1.8625250501002007e-05, + "loss": 2.1235, + "step": 4647 + }, + { + "epoch": 0.06, + "grad_norm": 201.71013461775763, + "learning_rate": 1.86372745490982e-05, + "loss": 1.6183, + "step": 4650 + }, + { + "epoch": 0.06, + "grad_norm": 9.879873328433368, + "learning_rate": 1.864929859719439e-05, + "loss": 2.0404, + "step": 4653 + }, + { + "epoch": 0.06, + "grad_norm": 31.005227712492093, + "learning_rate": 1.8661322645290584e-05, + "loss": 2.0328, + "step": 4656 + }, + { + "epoch": 0.06, + "grad_norm": 9.646690280786286, + "learning_rate": 1.8673346693386777e-05, + "loss": 1.9457, + "step": 4659 + }, + { + "epoch": 0.06, + "grad_norm": 13.534370258640823, + "learning_rate": 1.8685370741482966e-05, + "loss": 1.8973, + "step": 4662 + }, + { + "epoch": 0.06, + "grad_norm": 10.054557589724194, + "learning_rate": 1.869739478957916e-05, + "loss": 1.8918, + "step": 4665 + }, + { + "epoch": 0.06, + "grad_norm": 33.408272516342784, + "learning_rate": 1.8709418837675354e-05, + "loss": 1.8274, + "step": 4668 + }, + { + "epoch": 0.06, + "grad_norm": 13.6604689906981, + "learning_rate": 1.8721442885771543e-05, + "loss": 1.8965, + "step": 4671 + }, + { + "epoch": 0.06, + "grad_norm": 12.121061766849804, + "learning_rate": 1.8733466933867736e-05, + "loss": 2.0111, + "step": 4674 + }, + { + "epoch": 0.06, + "grad_norm": 39.7392839408521, + "learning_rate": 1.8745490981963928e-05, + "loss": 1.6672, + "step": 4677 + }, + { + "epoch": 0.06, + "grad_norm": 66.53803944692996, + "learning_rate": 1.8757515030060124e-05, + "loss": 1.3924, + "step": 4680 + }, + { + "epoch": 0.06, + "grad_norm": 15.695484907832588, + "learning_rate": 1.8769539078156313e-05, + "loss": 1.6013, + "step": 4683 + }, + { + "epoch": 0.06, + "grad_norm": 10.745082422881081, + "learning_rate": 1.8781563126252506e-05, + "loss": 1.6525, + "step": 4686 + }, + { + "epoch": 0.06, + "grad_norm": 46.19209081482667, + "learning_rate": 1.8793587174348698e-05, + "loss": 1.7504, + "step": 4689 + }, + { + "epoch": 0.06, + "grad_norm": 40.479025409627404, + "learning_rate": 1.880561122244489e-05, + "loss": 1.9751, + "step": 4692 + }, + { + "epoch": 0.06, + "grad_norm": 60.73929416126635, + "learning_rate": 1.8817635270541083e-05, + "loss": 1.5203, + "step": 4695 + }, + { + "epoch": 0.06, + "grad_norm": 18.241903852199698, + "learning_rate": 1.8829659318637276e-05, + "loss": 1.8969, + "step": 4698 + }, + { + "epoch": 0.06, + "grad_norm": 19.82633645188481, + "learning_rate": 1.8841683366733468e-05, + "loss": 1.6807, + "step": 4701 + }, + { + "epoch": 0.06, + "grad_norm": 25.308251954020974, + "learning_rate": 1.885370741482966e-05, + "loss": 1.4804, + "step": 4704 + }, + { + "epoch": 0.06, + "grad_norm": 17.776434246816763, + "learning_rate": 1.8865731462925853e-05, + "loss": 1.5811, + "step": 4707 + }, + { + "epoch": 0.06, + "grad_norm": 24.409326362464842, + "learning_rate": 1.8877755511022046e-05, + "loss": 1.6308, + "step": 4710 + }, + { + "epoch": 0.06, + "grad_norm": 54.93568726290341, + "learning_rate": 1.8889779559118238e-05, + "loss": 2.107, + "step": 4713 + }, + { + "epoch": 0.06, + "grad_norm": 21.21330906725605, + "learning_rate": 1.890180360721443e-05, + "loss": 1.9473, + "step": 4716 + }, + { + "epoch": 0.06, + "grad_norm": 14.040403100708948, + "learning_rate": 1.8913827655310623e-05, + "loss": 1.9525, + "step": 4719 + }, + { + "epoch": 0.06, + "grad_norm": 29.218849148373877, + "learning_rate": 1.8925851703406816e-05, + "loss": 2.2983, + "step": 4722 + }, + { + "epoch": 0.06, + "grad_norm": 11.086195586759551, + "learning_rate": 1.8937875751503008e-05, + "loss": 1.7187, + "step": 4725 + }, + { + "epoch": 0.06, + "grad_norm": 10.133390051732599, + "learning_rate": 1.89498997995992e-05, + "loss": 1.835, + "step": 4728 + }, + { + "epoch": 0.06, + "grad_norm": 36.272315977341435, + "learning_rate": 1.8961923847695393e-05, + "loss": 2.1799, + "step": 4731 + }, + { + "epoch": 0.06, + "grad_norm": 32.186898564220066, + "learning_rate": 1.8973947895791586e-05, + "loss": 1.4418, + "step": 4734 + }, + { + "epoch": 0.06, + "grad_norm": 13.821744115797157, + "learning_rate": 1.8985971943887778e-05, + "loss": 1.7659, + "step": 4737 + }, + { + "epoch": 0.06, + "grad_norm": 39.39638329372772, + "learning_rate": 1.8997995991983967e-05, + "loss": 1.7096, + "step": 4740 + }, + { + "epoch": 0.06, + "grad_norm": 19.524309163019545, + "learning_rate": 1.9010020040080163e-05, + "loss": 1.9975, + "step": 4743 + }, + { + "epoch": 0.06, + "grad_norm": 11.002598803476337, + "learning_rate": 1.9022044088176356e-05, + "loss": 2.325, + "step": 4746 + }, + { + "epoch": 0.06, + "grad_norm": 50.340085335946256, + "learning_rate": 1.9034068136272548e-05, + "loss": 1.7926, + "step": 4749 + }, + { + "epoch": 0.06, + "grad_norm": 20.189534740186023, + "learning_rate": 1.9046092184368737e-05, + "loss": 1.6292, + "step": 4752 + }, + { + "epoch": 0.06, + "grad_norm": 8.224274856495589, + "learning_rate": 1.9058116232464933e-05, + "loss": 1.6664, + "step": 4755 + }, + { + "epoch": 0.06, + "grad_norm": 16.537541401310676, + "learning_rate": 1.9070140280561126e-05, + "loss": 1.6667, + "step": 4758 + }, + { + "epoch": 0.06, + "grad_norm": 26.34714197989013, + "learning_rate": 1.9082164328657315e-05, + "loss": 1.7814, + "step": 4761 + }, + { + "epoch": 0.06, + "grad_norm": 18.254051293891024, + "learning_rate": 1.9094188376753507e-05, + "loss": 1.5228, + "step": 4764 + }, + { + "epoch": 0.06, + "grad_norm": 7.065720703986179, + "learning_rate": 1.9106212424849703e-05, + "loss": 1.6586, + "step": 4767 + }, + { + "epoch": 0.06, + "grad_norm": 68.52203126130591, + "learning_rate": 1.9118236472945892e-05, + "loss": 1.9584, + "step": 4770 + }, + { + "epoch": 0.06, + "grad_norm": 24.56671771493261, + "learning_rate": 1.9130260521042085e-05, + "loss": 2.3048, + "step": 4773 + }, + { + "epoch": 0.06, + "grad_norm": 10.642714887624129, + "learning_rate": 1.9142284569138277e-05, + "loss": 2.0252, + "step": 4776 + }, + { + "epoch": 0.06, + "grad_norm": 27.688906467499997, + "learning_rate": 1.915430861723447e-05, + "loss": 1.4902, + "step": 4779 + }, + { + "epoch": 0.06, + "grad_norm": 6.550136685492372, + "learning_rate": 1.9166332665330662e-05, + "loss": 2.011, + "step": 4782 + }, + { + "epoch": 0.06, + "grad_norm": 14.637064198029442, + "learning_rate": 1.9178356713426855e-05, + "loss": 1.9694, + "step": 4785 + }, + { + "epoch": 0.06, + "grad_norm": 70.21268941603829, + "learning_rate": 1.9190380761523047e-05, + "loss": 2.1533, + "step": 4788 + }, + { + "epoch": 0.06, + "grad_norm": 16.925068050396142, + "learning_rate": 1.920240480961924e-05, + "loss": 1.8075, + "step": 4791 + }, + { + "epoch": 0.06, + "grad_norm": 8.773440083523091, + "learning_rate": 1.9214428857715432e-05, + "loss": 1.5548, + "step": 4794 + }, + { + "epoch": 0.06, + "grad_norm": 27.538005398909995, + "learning_rate": 1.9226452905811625e-05, + "loss": 1.8031, + "step": 4797 + }, + { + "epoch": 0.06, + "grad_norm": 56.88719364097389, + "learning_rate": 1.9238476953907817e-05, + "loss": 1.8037, + "step": 4800 + }, + { + "epoch": 0.06, + "grad_norm": 4.458657601075404, + "learning_rate": 1.925050100200401e-05, + "loss": 1.9737, + "step": 4803 + }, + { + "epoch": 0.06, + "grad_norm": 49.27081535627667, + "learning_rate": 1.9262525050100202e-05, + "loss": 1.8477, + "step": 4806 + }, + { + "epoch": 0.06, + "grad_norm": 74.70194599007739, + "learning_rate": 1.9274549098196395e-05, + "loss": 1.8717, + "step": 4809 + }, + { + "epoch": 0.06, + "grad_norm": 115.55984313961493, + "learning_rate": 1.9286573146292587e-05, + "loss": 2.3643, + "step": 4812 + }, + { + "epoch": 0.06, + "grad_norm": 8.293753887841604, + "learning_rate": 1.929859719438878e-05, + "loss": 1.9519, + "step": 4815 + }, + { + "epoch": 0.06, + "grad_norm": 32.76637274947262, + "learning_rate": 1.9310621242484972e-05, + "loss": 1.7074, + "step": 4818 + }, + { + "epoch": 0.06, + "grad_norm": 13.180428725339928, + "learning_rate": 1.9322645290581165e-05, + "loss": 1.6611, + "step": 4821 + }, + { + "epoch": 0.06, + "grad_norm": 6.443525161275544, + "learning_rate": 1.9334669338677357e-05, + "loss": 1.7656, + "step": 4824 + }, + { + "epoch": 0.06, + "grad_norm": 50.029726952338855, + "learning_rate": 1.934669338677355e-05, + "loss": 1.7371, + "step": 4827 + }, + { + "epoch": 0.06, + "grad_norm": 39.72725765363889, + "learning_rate": 1.9358717434869742e-05, + "loss": 1.439, + "step": 4830 + }, + { + "epoch": 0.06, + "grad_norm": 19.47236585332183, + "learning_rate": 1.9370741482965935e-05, + "loss": 1.9019, + "step": 4833 + }, + { + "epoch": 0.06, + "grad_norm": 38.10210318588738, + "learning_rate": 1.9382765531062127e-05, + "loss": 2.3201, + "step": 4836 + }, + { + "epoch": 0.06, + "grad_norm": 41.1128438810348, + "learning_rate": 1.9394789579158316e-05, + "loss": 2.0907, + "step": 4839 + }, + { + "epoch": 0.06, + "grad_norm": 30.300428195133357, + "learning_rate": 1.9406813627254512e-05, + "loss": 1.6884, + "step": 4842 + }, + { + "epoch": 0.06, + "grad_norm": 40.07080160078178, + "learning_rate": 1.9418837675350705e-05, + "loss": 1.956, + "step": 4845 + }, + { + "epoch": 0.06, + "grad_norm": 15.95934114417437, + "learning_rate": 1.9430861723446894e-05, + "loss": 1.9745, + "step": 4848 + }, + { + "epoch": 0.06, + "grad_norm": 127.6579511325484, + "learning_rate": 1.9442885771543086e-05, + "loss": 2.1354, + "step": 4851 + }, + { + "epoch": 0.06, + "grad_norm": 9.368818338537697, + "learning_rate": 1.9454909819639282e-05, + "loss": 1.8883, + "step": 4854 + }, + { + "epoch": 0.06, + "grad_norm": 23.811402246481883, + "learning_rate": 1.9466933867735475e-05, + "loss": 1.833, + "step": 4857 + }, + { + "epoch": 0.06, + "grad_norm": 29.44222425980365, + "learning_rate": 1.9478957915831664e-05, + "loss": 2.0332, + "step": 4860 + }, + { + "epoch": 0.06, + "grad_norm": 36.542250309464244, + "learning_rate": 1.9490981963927856e-05, + "loss": 1.9658, + "step": 4863 + }, + { + "epoch": 0.06, + "grad_norm": 30.917655815807848, + "learning_rate": 1.9503006012024052e-05, + "loss": 1.9558, + "step": 4866 + }, + { + "epoch": 0.06, + "grad_norm": 19.532996612737794, + "learning_rate": 1.951503006012024e-05, + "loss": 2.0441, + "step": 4869 + }, + { + "epoch": 0.06, + "grad_norm": 20.57407928892611, + "learning_rate": 1.9527054108216434e-05, + "loss": 1.9977, + "step": 4872 + }, + { + "epoch": 0.06, + "grad_norm": 34.23178082241901, + "learning_rate": 1.9539078156312626e-05, + "loss": 1.9364, + "step": 4875 + }, + { + "epoch": 0.06, + "grad_norm": 62.588922953945435, + "learning_rate": 1.955110220440882e-05, + "loss": 2.0278, + "step": 4878 + }, + { + "epoch": 0.06, + "grad_norm": 31.589089399947092, + "learning_rate": 1.956312625250501e-05, + "loss": 1.6645, + "step": 4881 + }, + { + "epoch": 0.06, + "grad_norm": 18.144859575010855, + "learning_rate": 1.9575150300601204e-05, + "loss": 1.8604, + "step": 4884 + }, + { + "epoch": 0.06, + "grad_norm": 36.099536739290215, + "learning_rate": 1.9587174348697396e-05, + "loss": 1.804, + "step": 4887 + }, + { + "epoch": 0.06, + "grad_norm": 6.646872791161821, + "learning_rate": 1.959919839679359e-05, + "loss": 1.9265, + "step": 4890 + }, + { + "epoch": 0.06, + "grad_norm": 7.394578111011912, + "learning_rate": 1.961122244488978e-05, + "loss": 2.168, + "step": 4893 + }, + { + "epoch": 0.06, + "grad_norm": 92.47817250352108, + "learning_rate": 1.9623246492985974e-05, + "loss": 1.8578, + "step": 4896 + }, + { + "epoch": 0.06, + "grad_norm": 25.237350010861654, + "learning_rate": 1.9635270541082166e-05, + "loss": 1.8974, + "step": 4899 + }, + { + "epoch": 0.06, + "grad_norm": 55.35752582518043, + "learning_rate": 1.964729458917836e-05, + "loss": 1.9481, + "step": 4902 + }, + { + "epoch": 0.06, + "grad_norm": 13.705544949695835, + "learning_rate": 1.965931863727455e-05, + "loss": 1.6213, + "step": 4905 + }, + { + "epoch": 0.06, + "grad_norm": 46.35929570637657, + "learning_rate": 1.9671342685370744e-05, + "loss": 2.1253, + "step": 4908 + }, + { + "epoch": 0.06, + "grad_norm": 14.45557069776493, + "learning_rate": 1.9683366733466936e-05, + "loss": 1.91, + "step": 4911 + }, + { + "epoch": 0.06, + "grad_norm": 20.6002881760162, + "learning_rate": 1.969539078156313e-05, + "loss": 1.7938, + "step": 4914 + }, + { + "epoch": 0.06, + "grad_norm": 10.556562237344131, + "learning_rate": 1.9707414829659318e-05, + "loss": 2.5035, + "step": 4917 + }, + { + "epoch": 0.06, + "grad_norm": 7.896504762843044, + "learning_rate": 1.9719438877755514e-05, + "loss": 1.6624, + "step": 4920 + }, + { + "epoch": 0.06, + "grad_norm": 6.895457414128677, + "learning_rate": 1.9731462925851706e-05, + "loss": 1.9788, + "step": 4923 + }, + { + "epoch": 0.06, + "grad_norm": 44.741756188978464, + "learning_rate": 1.97434869739479e-05, + "loss": 1.6942, + "step": 4926 + }, + { + "epoch": 0.06, + "grad_norm": 4.539216774748754, + "learning_rate": 1.975551102204409e-05, + "loss": 1.9744, + "step": 4929 + }, + { + "epoch": 0.06, + "grad_norm": 11.147736255538746, + "learning_rate": 1.9767535070140283e-05, + "loss": 1.8059, + "step": 4932 + }, + { + "epoch": 0.06, + "grad_norm": 31.998438018928947, + "learning_rate": 1.9779559118236476e-05, + "loss": 1.9803, + "step": 4935 + }, + { + "epoch": 0.06, + "grad_norm": 14.199157391629864, + "learning_rate": 1.9791583166332665e-05, + "loss": 2.1799, + "step": 4938 + }, + { + "epoch": 0.06, + "grad_norm": 12.321599997798968, + "learning_rate": 1.980360721442886e-05, + "loss": 1.9004, + "step": 4941 + }, + { + "epoch": 0.06, + "grad_norm": 12.992221894763409, + "learning_rate": 1.9815631262525053e-05, + "loss": 1.4196, + "step": 4944 + }, + { + "epoch": 0.06, + "grad_norm": 7.39256322812106, + "learning_rate": 1.9827655310621243e-05, + "loss": 2.1885, + "step": 4947 + }, + { + "epoch": 0.06, + "grad_norm": 5.900693328277683, + "learning_rate": 1.9839679358717435e-05, + "loss": 1.285, + "step": 4950 + }, + { + "epoch": 0.06, + "grad_norm": 26.854291367423052, + "learning_rate": 1.985170340681363e-05, + "loss": 2.2057, + "step": 4953 + }, + { + "epoch": 0.06, + "grad_norm": 44.5696375003547, + "learning_rate": 1.986372745490982e-05, + "loss": 1.6851, + "step": 4956 + }, + { + "epoch": 0.06, + "grad_norm": 43.62795403032929, + "learning_rate": 1.9875751503006013e-05, + "loss": 2.0645, + "step": 4959 + }, + { + "epoch": 0.06, + "grad_norm": 6.198564303777535, + "learning_rate": 1.9887775551102205e-05, + "loss": 1.9678, + "step": 4962 + }, + { + "epoch": 0.06, + "grad_norm": 16.327047994105005, + "learning_rate": 1.98997995991984e-05, + "loss": 1.5425, + "step": 4965 + }, + { + "epoch": 0.06, + "grad_norm": 6.221267743577397, + "learning_rate": 1.991182364729459e-05, + "loss": 1.6498, + "step": 4968 + }, + { + "epoch": 0.06, + "grad_norm": 36.68005120721239, + "learning_rate": 1.9923847695390783e-05, + "loss": 1.7826, + "step": 4971 + }, + { + "epoch": 0.06, + "grad_norm": 5.879501039267741, + "learning_rate": 1.9935871743486975e-05, + "loss": 1.736, + "step": 4974 + }, + { + "epoch": 0.06, + "grad_norm": 33.40734469670505, + "learning_rate": 1.9947895791583167e-05, + "loss": 1.508, + "step": 4977 + }, + { + "epoch": 0.06, + "grad_norm": 21.29064091397594, + "learning_rate": 1.995991983967936e-05, + "loss": 1.5098, + "step": 4980 + }, + { + "epoch": 0.06, + "grad_norm": 20.251240032461588, + "learning_rate": 1.9971943887775552e-05, + "loss": 1.5806, + "step": 4983 + }, + { + "epoch": 0.06, + "grad_norm": 8.487888428317998, + "learning_rate": 1.9983967935871745e-05, + "loss": 2.0474, + "step": 4986 + }, + { + "epoch": 0.06, + "grad_norm": 19.89861274088389, + "learning_rate": 1.9995991983967937e-05, + "loss": 1.8317, + "step": 4989 + }, + { + "epoch": 0.06, + "grad_norm": 65.03664168437052, + "learning_rate": 1.9999999992416356e-05, + "loss": 1.9005, + "step": 4992 + }, + { + "epoch": 0.06, + "grad_norm": 12.41222996180416, + "learning_rate": 1.9999999952602227e-05, + "loss": 2.1542, + "step": 4995 + }, + { + "epoch": 0.06, + "grad_norm": 22.184377416084686, + "learning_rate": 1.99999998786617e-05, + "loss": 1.9341, + "step": 4998 + }, + { + "epoch": 0.06, + "grad_norm": 51.20959091526802, + "learning_rate": 1.9999999770594772e-05, + "loss": 1.9775, + "step": 5001 + }, + { + "epoch": 0.06, + "grad_norm": 21.582313251455723, + "learning_rate": 1.9999999628401447e-05, + "loss": 1.8551, + "step": 5004 + }, + { + "epoch": 0.06, + "grad_norm": 6.754420196953236, + "learning_rate": 1.9999999452081727e-05, + "loss": 1.734, + "step": 5007 + }, + { + "epoch": 0.06, + "grad_norm": 20.953869368673107, + "learning_rate": 1.9999999241635612e-05, + "loss": 1.4053, + "step": 5010 + }, + { + "epoch": 0.06, + "grad_norm": 37.93470493311404, + "learning_rate": 1.99999989970631e-05, + "loss": 1.8457, + "step": 5013 + }, + { + "epoch": 0.06, + "grad_norm": 19.817895431663153, + "learning_rate": 1.999999871836419e-05, + "loss": 2.0201, + "step": 5016 + }, + { + "epoch": 0.06, + "grad_norm": 8.86961302492149, + "learning_rate": 1.9999998405538893e-05, + "loss": 1.3802, + "step": 5019 + }, + { + "epoch": 0.06, + "grad_norm": 17.371197566371926, + "learning_rate": 1.9999998058587198e-05, + "loss": 1.9344, + "step": 5022 + }, + { + "epoch": 0.06, + "grad_norm": 26.410295954286198, + "learning_rate": 1.999999767750911e-05, + "loss": 1.7094, + "step": 5025 + }, + { + "epoch": 0.06, + "grad_norm": 51.60372638550727, + "learning_rate": 1.999999726230464e-05, + "loss": 2.0609, + "step": 5028 + }, + { + "epoch": 0.06, + "grad_norm": 17.558197759261446, + "learning_rate": 1.9999996812973777e-05, + "loss": 1.7045, + "step": 5031 + }, + { + "epoch": 0.06, + "grad_norm": 145.67515342538323, + "learning_rate": 1.999999632951653e-05, + "loss": 1.6529, + "step": 5034 + }, + { + "epoch": 0.06, + "grad_norm": 33.44628372225197, + "learning_rate": 1.999999581193289e-05, + "loss": 1.7693, + "step": 5037 + }, + { + "epoch": 0.06, + "grad_norm": 9.995956889185486, + "learning_rate": 1.999999526022287e-05, + "loss": 1.6619, + "step": 5040 + }, + { + "epoch": 0.06, + "grad_norm": 5.170070938188999, + "learning_rate": 1.9999994674386472e-05, + "loss": 1.6261, + "step": 5043 + }, + { + "epoch": 0.06, + "grad_norm": 33.54493656096648, + "learning_rate": 1.999999405442369e-05, + "loss": 1.7363, + "step": 5046 + }, + { + "epoch": 0.06, + "grad_norm": 25.586801485988325, + "learning_rate": 1.9999993400334533e-05, + "loss": 2.2166, + "step": 5049 + }, + { + "epoch": 0.06, + "grad_norm": 16.01165976435617, + "learning_rate": 1.9999992712118996e-05, + "loss": 1.9213, + "step": 5052 + }, + { + "epoch": 0.06, + "grad_norm": 101.34768635926382, + "learning_rate": 1.999999198977709e-05, + "loss": 2.0222, + "step": 5055 + }, + { + "epoch": 0.06, + "grad_norm": 79.30821186559677, + "learning_rate": 1.9999991233308812e-05, + "loss": 2.0072, + "step": 5058 + }, + { + "epoch": 0.06, + "grad_norm": 29.749624752868836, + "learning_rate": 1.9999990442714165e-05, + "loss": 1.9544, + "step": 5061 + }, + { + "epoch": 0.06, + "grad_norm": 26.550639632824083, + "learning_rate": 1.9999989617993153e-05, + "loss": 1.7895, + "step": 5064 + }, + { + "epoch": 0.06, + "grad_norm": 9.090408919256994, + "learning_rate": 1.999998875914578e-05, + "loss": 1.9678, + "step": 5067 + }, + { + "epoch": 0.06, + "grad_norm": 45.16603865810317, + "learning_rate": 1.9999987866172046e-05, + "loss": 1.8392, + "step": 5070 + }, + { + "epoch": 0.06, + "grad_norm": 27.516975618479588, + "learning_rate": 1.9999986939071957e-05, + "loss": 2.0964, + "step": 5073 + }, + { + "epoch": 0.06, + "grad_norm": 53.62068497894271, + "learning_rate": 1.999998597784551e-05, + "loss": 2.0607, + "step": 5076 + }, + { + "epoch": 0.06, + "grad_norm": 6.421696274819812, + "learning_rate": 1.999998498249272e-05, + "loss": 1.693, + "step": 5079 + }, + { + "epoch": 0.06, + "grad_norm": 14.964380263920111, + "learning_rate": 1.9999983953013576e-05, + "loss": 2.131, + "step": 5082 + }, + { + "epoch": 0.06, + "grad_norm": 54.03315110776925, + "learning_rate": 1.9999982889408094e-05, + "loss": 1.9616, + "step": 5085 + }, + { + "epoch": 0.06, + "grad_norm": 17.53547034822461, + "learning_rate": 1.9999981791676267e-05, + "loss": 1.6556, + "step": 5088 + }, + { + "epoch": 0.06, + "grad_norm": 17.780762398672312, + "learning_rate": 1.999998065981811e-05, + "loss": 1.582, + "step": 5091 + }, + { + "epoch": 0.06, + "grad_norm": 19.71426843482105, + "learning_rate": 1.999997949383362e-05, + "loss": 1.744, + "step": 5094 + }, + { + "epoch": 0.06, + "grad_norm": 31.932321707061625, + "learning_rate": 1.9999978293722798e-05, + "loss": 1.9425, + "step": 5097 + }, + { + "epoch": 0.06, + "grad_norm": 16.506163650534656, + "learning_rate": 1.9999977059485655e-05, + "loss": 1.5581, + "step": 5100 + }, + { + "epoch": 0.06, + "grad_norm": 18.34663999186203, + "learning_rate": 1.9999975791122193e-05, + "loss": 1.798, + "step": 5103 + }, + { + "epoch": 0.06, + "grad_norm": 29.18263548339231, + "learning_rate": 1.9999974488632413e-05, + "loss": 1.6067, + "step": 5106 + }, + { + "epoch": 0.06, + "grad_norm": 11.478625007794282, + "learning_rate": 1.9999973152016323e-05, + "loss": 1.5406, + "step": 5109 + }, + { + "epoch": 0.06, + "grad_norm": 29.008975867686527, + "learning_rate": 1.999997178127393e-05, + "loss": 1.4816, + "step": 5112 + }, + { + "epoch": 0.06, + "grad_norm": 9.946902067809269, + "learning_rate": 1.999997037640523e-05, + "loss": 1.5005, + "step": 5115 + }, + { + "epoch": 0.06, + "grad_norm": 8.719952944498042, + "learning_rate": 1.9999968937410235e-05, + "loss": 2.2386, + "step": 5118 + }, + { + "epoch": 0.06, + "grad_norm": 6.445792094334562, + "learning_rate": 1.9999967464288948e-05, + "loss": 1.6514, + "step": 5121 + }, + { + "epoch": 0.06, + "grad_norm": 59.019202584643295, + "learning_rate": 1.9999965957041378e-05, + "loss": 1.9468, + "step": 5124 + }, + { + "epoch": 0.06, + "grad_norm": 18.887760179612616, + "learning_rate": 1.999996441566752e-05, + "loss": 1.7604, + "step": 5127 + }, + { + "epoch": 0.06, + "grad_norm": 26.62162404797283, + "learning_rate": 1.9999962840167387e-05, + "loss": 1.4664, + "step": 5130 + }, + { + "epoch": 0.06, + "grad_norm": 18.226189476184956, + "learning_rate": 1.9999961230540984e-05, + "loss": 1.8081, + "step": 5133 + }, + { + "epoch": 0.06, + "grad_norm": 17.46869853355027, + "learning_rate": 1.9999959586788315e-05, + "loss": 2.0859, + "step": 5136 + }, + { + "epoch": 0.06, + "grad_norm": 10.03847426525208, + "learning_rate": 1.9999957908909386e-05, + "loss": 1.6304, + "step": 5139 + }, + { + "epoch": 0.06, + "grad_norm": 8.70198240954428, + "learning_rate": 1.99999561969042e-05, + "loss": 1.7966, + "step": 5142 + }, + { + "epoch": 0.06, + "grad_norm": 5.798738093349423, + "learning_rate": 1.9999954450772773e-05, + "loss": 1.7335, + "step": 5145 + }, + { + "epoch": 0.06, + "grad_norm": 17.996854986848753, + "learning_rate": 1.9999952670515096e-05, + "loss": 1.7293, + "step": 5148 + }, + { + "epoch": 0.06, + "grad_norm": 26.38685017423919, + "learning_rate": 1.9999950856131184e-05, + "loss": 1.6831, + "step": 5151 + }, + { + "epoch": 0.06, + "grad_norm": 26.29159936578096, + "learning_rate": 1.9999949007621042e-05, + "loss": 1.8334, + "step": 5154 + }, + { + "epoch": 0.06, + "grad_norm": 4.1618794659891964, + "learning_rate": 1.999994712498468e-05, + "loss": 1.6938, + "step": 5157 + }, + { + "epoch": 0.06, + "grad_norm": 37.64417683338812, + "learning_rate": 1.9999945208222096e-05, + "loss": 1.9736, + "step": 5160 + }, + { + "epoch": 0.06, + "grad_norm": 9.821839406678492, + "learning_rate": 1.99999432573333e-05, + "loss": 1.9626, + "step": 5163 + }, + { + "epoch": 0.06, + "grad_norm": 27.114890790708234, + "learning_rate": 1.99999412723183e-05, + "loss": 1.9524, + "step": 5166 + }, + { + "epoch": 0.06, + "grad_norm": 89.88069387952511, + "learning_rate": 1.9999939253177106e-05, + "loss": 2.1193, + "step": 5169 + }, + { + "epoch": 0.06, + "grad_norm": 9.008146282755071, + "learning_rate": 1.999993719990972e-05, + "loss": 1.5612, + "step": 5172 + }, + { + "epoch": 0.06, + "grad_norm": 44.939154668745786, + "learning_rate": 1.999993511251615e-05, + "loss": 1.984, + "step": 5175 + }, + { + "epoch": 0.06, + "grad_norm": 22.76464483858322, + "learning_rate": 1.9999932990996404e-05, + "loss": 2.0794, + "step": 5178 + }, + { + "epoch": 0.06, + "grad_norm": 51.75272056214173, + "learning_rate": 1.9999930835350486e-05, + "loss": 1.3621, + "step": 5181 + }, + { + "epoch": 0.06, + "grad_norm": 33.00412614025089, + "learning_rate": 1.9999928645578407e-05, + "loss": 1.8623, + "step": 5184 + }, + { + "epoch": 0.06, + "grad_norm": 91.40149474090005, + "learning_rate": 1.9999926421680177e-05, + "loss": 2.0806, + "step": 5187 + }, + { + "epoch": 0.06, + "grad_norm": 24.278492973448866, + "learning_rate": 1.99999241636558e-05, + "loss": 1.925, + "step": 5190 + }, + { + "epoch": 0.06, + "grad_norm": 42.15116511591891, + "learning_rate": 1.9999921871505276e-05, + "loss": 2.0037, + "step": 5193 + }, + { + "epoch": 0.06, + "grad_norm": 20.614904572878334, + "learning_rate": 1.9999919545228627e-05, + "loss": 1.9062, + "step": 5196 + }, + { + "epoch": 0.06, + "grad_norm": 21.514369448650427, + "learning_rate": 1.9999917184825853e-05, + "loss": 1.7043, + "step": 5199 + }, + { + "epoch": 0.06, + "grad_norm": 5.061021975236093, + "learning_rate": 1.9999914790296962e-05, + "loss": 2.0682, + "step": 5202 + }, + { + "epoch": 0.06, + "grad_norm": 8.764261686854915, + "learning_rate": 1.9999912361641968e-05, + "loss": 1.9842, + "step": 5205 + }, + { + "epoch": 0.06, + "grad_norm": 3.270561328650551, + "learning_rate": 1.9999909898860873e-05, + "loss": 1.9389, + "step": 5208 + }, + { + "epoch": 0.06, + "grad_norm": 40.55913344979938, + "learning_rate": 1.9999907401953685e-05, + "loss": 1.8942, + "step": 5211 + }, + { + "epoch": 0.06, + "grad_norm": 26.52094572077242, + "learning_rate": 1.999990487092042e-05, + "loss": 1.9403, + "step": 5214 + }, + { + "epoch": 0.06, + "grad_norm": 24.179820177501888, + "learning_rate": 1.9999902305761076e-05, + "loss": 1.6766, + "step": 5217 + }, + { + "epoch": 0.06, + "grad_norm": 23.562622031958067, + "learning_rate": 1.9999899706475672e-05, + "loss": 1.7131, + "step": 5220 + }, + { + "epoch": 0.06, + "grad_norm": 17.225768310639328, + "learning_rate": 1.9999897073064208e-05, + "loss": 1.8516, + "step": 5223 + }, + { + "epoch": 0.06, + "grad_norm": 17.245532030252168, + "learning_rate": 1.99998944055267e-05, + "loss": 1.8354, + "step": 5226 + }, + { + "epoch": 0.06, + "grad_norm": 29.640243724443508, + "learning_rate": 1.9999891703863156e-05, + "loss": 1.4609, + "step": 5229 + }, + { + "epoch": 0.06, + "grad_norm": 45.14136314045739, + "learning_rate": 1.999988896807358e-05, + "loss": 2.4208, + "step": 5232 + }, + { + "epoch": 0.06, + "grad_norm": 50.629129045201395, + "learning_rate": 1.9999886198157988e-05, + "loss": 2.0456, + "step": 5235 + }, + { + "epoch": 0.06, + "grad_norm": 26.215102118912874, + "learning_rate": 1.9999883394116385e-05, + "loss": 2.0527, + "step": 5238 + }, + { + "epoch": 0.06, + "grad_norm": 25.09984557566193, + "learning_rate": 1.999988055594878e-05, + "loss": 1.7363, + "step": 5241 + }, + { + "epoch": 0.06, + "grad_norm": 11.915719940223267, + "learning_rate": 1.9999877683655188e-05, + "loss": 1.8118, + "step": 5244 + }, + { + "epoch": 0.06, + "grad_norm": 20.761247629178893, + "learning_rate": 1.9999874777235613e-05, + "loss": 1.6576, + "step": 5247 + }, + { + "epoch": 0.06, + "grad_norm": 22.73077720970549, + "learning_rate": 1.9999871836690067e-05, + "loss": 1.9721, + "step": 5250 + }, + { + "epoch": 0.06, + "grad_norm": 193.6553740551916, + "learning_rate": 1.9999868862018563e-05, + "loss": 2.27, + "step": 5253 + }, + { + "epoch": 0.06, + "grad_norm": 10.028218376650983, + "learning_rate": 1.9999865853221108e-05, + "loss": 1.6973, + "step": 5256 + }, + { + "epoch": 0.06, + "grad_norm": 19.68740507047658, + "learning_rate": 1.999986281029771e-05, + "loss": 1.7163, + "step": 5259 + }, + { + "epoch": 0.06, + "grad_norm": 17.378727922779902, + "learning_rate": 1.9999859733248385e-05, + "loss": 1.9501, + "step": 5262 + }, + { + "epoch": 0.06, + "grad_norm": 147.20077633703883, + "learning_rate": 1.9999856622073137e-05, + "loss": 1.5362, + "step": 5265 + }, + { + "epoch": 0.06, + "grad_norm": 18.112552459264986, + "learning_rate": 1.9999853476771986e-05, + "loss": 1.993, + "step": 5268 + }, + { + "epoch": 0.06, + "grad_norm": 130.53010602124135, + "learning_rate": 1.999985029734493e-05, + "loss": 2.1104, + "step": 5271 + }, + { + "epoch": 0.06, + "grad_norm": 19.93742981649483, + "learning_rate": 1.9999847083791992e-05, + "loss": 1.9153, + "step": 5274 + }, + { + "epoch": 0.06, + "grad_norm": 33.54514604604343, + "learning_rate": 1.9999843836113177e-05, + "loss": 1.7816, + "step": 5277 + }, + { + "epoch": 0.06, + "grad_norm": 23.060579717318863, + "learning_rate": 1.9999840554308495e-05, + "loss": 1.9163, + "step": 5280 + }, + { + "epoch": 0.06, + "grad_norm": 25.246948375227994, + "learning_rate": 1.999983723837796e-05, + "loss": 1.3311, + "step": 5283 + }, + { + "epoch": 0.06, + "grad_norm": 25.427516468299093, + "learning_rate": 1.9999833888321583e-05, + "loss": 1.7596, + "step": 5286 + }, + { + "epoch": 0.06, + "grad_norm": 72.68016892799648, + "learning_rate": 1.9999830504139377e-05, + "loss": 1.5142, + "step": 5289 + }, + { + "epoch": 0.06, + "grad_norm": 7.413594472221113, + "learning_rate": 1.9999827085831348e-05, + "loss": 1.9259, + "step": 5292 + }, + { + "epoch": 0.06, + "grad_norm": 26.24828623140619, + "learning_rate": 1.9999823633397513e-05, + "loss": 2.0089, + "step": 5295 + }, + { + "epoch": 0.06, + "grad_norm": 16.813991048758194, + "learning_rate": 1.9999820146837884e-05, + "loss": 1.9303, + "step": 5298 + }, + { + "epoch": 0.06, + "grad_norm": 85.07213616699471, + "learning_rate": 1.9999816626152465e-05, + "loss": 1.9203, + "step": 5301 + }, + { + "epoch": 0.06, + "grad_norm": 55.44431277202085, + "learning_rate": 1.9999813071341276e-05, + "loss": 1.8586, + "step": 5304 + }, + { + "epoch": 0.06, + "grad_norm": 43.87105023340057, + "learning_rate": 1.9999809482404328e-05, + "loss": 1.7511, + "step": 5307 + }, + { + "epoch": 0.06, + "grad_norm": 23.088619438524358, + "learning_rate": 1.9999805859341632e-05, + "loss": 1.5069, + "step": 5310 + }, + { + "epoch": 0.06, + "grad_norm": 151.12439304152159, + "learning_rate": 1.99998022021532e-05, + "loss": 1.7007, + "step": 5313 + }, + { + "epoch": 0.06, + "grad_norm": 11.731003268820944, + "learning_rate": 1.9999798510839047e-05, + "loss": 1.7358, + "step": 5316 + }, + { + "epoch": 0.06, + "grad_norm": 49.59214231484729, + "learning_rate": 1.999979478539918e-05, + "loss": 1.908, + "step": 5319 + }, + { + "epoch": 0.06, + "grad_norm": 51.98181411258251, + "learning_rate": 1.9999791025833617e-05, + "loss": 2.0886, + "step": 5322 + }, + { + "epoch": 0.06, + "grad_norm": 139.42853550809272, + "learning_rate": 1.999978723214237e-05, + "loss": 1.9275, + "step": 5325 + }, + { + "epoch": 0.06, + "grad_norm": 13.296707301124288, + "learning_rate": 1.999978340432545e-05, + "loss": 2.0426, + "step": 5328 + }, + { + "epoch": 0.06, + "grad_norm": 25.993691436696484, + "learning_rate": 1.999977954238287e-05, + "loss": 1.641, + "step": 5331 + }, + { + "epoch": 0.06, + "grad_norm": 44.79018577991021, + "learning_rate": 1.9999775646314646e-05, + "loss": 1.6188, + "step": 5334 + }, + { + "epoch": 0.06, + "grad_norm": 49.113945619257386, + "learning_rate": 1.9999771716120792e-05, + "loss": 1.8799, + "step": 5337 + }, + { + "epoch": 0.06, + "grad_norm": 30.228820091845968, + "learning_rate": 1.9999767751801315e-05, + "loss": 1.7039, + "step": 5340 + }, + { + "epoch": 0.06, + "grad_norm": 4.652199934949379, + "learning_rate": 1.9999763753356233e-05, + "loss": 1.9893, + "step": 5343 + }, + { + "epoch": 0.06, + "grad_norm": 13.749701791823613, + "learning_rate": 1.9999759720785562e-05, + "loss": 2.0896, + "step": 5346 + }, + { + "epoch": 0.06, + "grad_norm": 106.05400656989741, + "learning_rate": 1.999975565408931e-05, + "loss": 1.9046, + "step": 5349 + }, + { + "epoch": 0.06, + "grad_norm": 7.254377551714447, + "learning_rate": 1.9999751553267495e-05, + "loss": 1.5196, + "step": 5352 + }, + { + "epoch": 0.06, + "grad_norm": 10.539406040913578, + "learning_rate": 1.9999747418320133e-05, + "loss": 1.8921, + "step": 5355 + }, + { + "epoch": 0.06, + "grad_norm": 24.164669753840776, + "learning_rate": 1.999974324924723e-05, + "loss": 1.612, + "step": 5358 + }, + { + "epoch": 0.06, + "grad_norm": 32.726930193731704, + "learning_rate": 1.9999739046048808e-05, + "loss": 1.9793, + "step": 5361 + }, + { + "epoch": 0.06, + "grad_norm": 22.523895561413664, + "learning_rate": 1.9999734808724877e-05, + "loss": 2.2198, + "step": 5364 + }, + { + "epoch": 0.06, + "grad_norm": 65.92466780025093, + "learning_rate": 1.9999730537275455e-05, + "loss": 1.4081, + "step": 5367 + }, + { + "epoch": 0.06, + "grad_norm": 20.236969611799484, + "learning_rate": 1.9999726231700552e-05, + "loss": 1.6927, + "step": 5370 + }, + { + "epoch": 0.06, + "grad_norm": 7.632542303837638, + "learning_rate": 1.9999721892000187e-05, + "loss": 1.79, + "step": 5373 + }, + { + "epoch": 0.06, + "grad_norm": 105.31912306883201, + "learning_rate": 1.999971751817437e-05, + "loss": 1.7265, + "step": 5376 + }, + { + "epoch": 0.06, + "grad_norm": 20.335761233491766, + "learning_rate": 1.9999713110223123e-05, + "loss": 1.9449, + "step": 5379 + }, + { + "epoch": 0.06, + "grad_norm": 28.008319663547507, + "learning_rate": 1.9999708668146453e-05, + "loss": 1.6476, + "step": 5382 + }, + { + "epoch": 0.06, + "grad_norm": 53.09349475374742, + "learning_rate": 1.9999704191944383e-05, + "loss": 1.8887, + "step": 5385 + }, + { + "epoch": 0.06, + "grad_norm": 76.64938549882305, + "learning_rate": 1.9999699681616925e-05, + "loss": 1.75, + "step": 5388 + }, + { + "epoch": 0.06, + "grad_norm": 25.80789314373699, + "learning_rate": 1.999969513716409e-05, + "loss": 1.5187, + "step": 5391 + }, + { + "epoch": 0.06, + "grad_norm": 34.73001208620885, + "learning_rate": 1.99996905585859e-05, + "loss": 2.3486, + "step": 5394 + }, + { + "epoch": 0.06, + "grad_norm": 42.59228100835129, + "learning_rate": 1.9999685945882366e-05, + "loss": 2.0928, + "step": 5397 + }, + { + "epoch": 0.06, + "grad_norm": 205.4200476128244, + "learning_rate": 1.999968129905351e-05, + "loss": 1.898, + "step": 5400 + }, + { + "epoch": 0.06, + "grad_norm": 9.06516570267151, + "learning_rate": 1.999967661809934e-05, + "loss": 1.693, + "step": 5403 + }, + { + "epoch": 0.07, + "grad_norm": 10.234510103844125, + "learning_rate": 1.9999671903019875e-05, + "loss": 1.7747, + "step": 5406 + }, + { + "epoch": 0.07, + "grad_norm": 6.474346241068119, + "learning_rate": 1.9999667153815133e-05, + "loss": 2.0672, + "step": 5409 + }, + { + "epoch": 0.07, + "grad_norm": 40.77679152131814, + "learning_rate": 1.999966237048513e-05, + "loss": 1.9698, + "step": 5412 + }, + { + "epoch": 0.07, + "grad_norm": 19.98129808059503, + "learning_rate": 1.9999657553029878e-05, + "loss": 2.1411, + "step": 5415 + }, + { + "epoch": 0.07, + "grad_norm": 79.15316470234912, + "learning_rate": 1.99996527014494e-05, + "loss": 2.1759, + "step": 5418 + }, + { + "epoch": 0.07, + "grad_norm": 36.72022640038875, + "learning_rate": 1.999964781574371e-05, + "loss": 1.9769, + "step": 5421 + }, + { + "epoch": 0.07, + "grad_norm": 9.243144512583452, + "learning_rate": 1.9999642895912822e-05, + "loss": 1.7219, + "step": 5424 + }, + { + "epoch": 0.07, + "grad_norm": 7.892368908669923, + "learning_rate": 1.9999637941956754e-05, + "loss": 1.5354, + "step": 5427 + }, + { + "epoch": 0.07, + "grad_norm": 3.33097058360092, + "learning_rate": 1.999963295387552e-05, + "loss": 1.8483, + "step": 5430 + }, + { + "epoch": 0.07, + "grad_norm": 5.199350405312623, + "learning_rate": 1.9999627931669145e-05, + "loss": 1.7738, + "step": 5433 + }, + { + "epoch": 0.07, + "grad_norm": 38.81183794469517, + "learning_rate": 1.9999622875337642e-05, + "loss": 2.3366, + "step": 5436 + }, + { + "epoch": 0.07, + "grad_norm": 48.258334334192966, + "learning_rate": 1.9999617784881028e-05, + "loss": 1.561, + "step": 5439 + }, + { + "epoch": 0.07, + "grad_norm": 10.810676623597592, + "learning_rate": 1.999961266029932e-05, + "loss": 1.9775, + "step": 5442 + }, + { + "epoch": 0.07, + "grad_norm": 17.743369730910928, + "learning_rate": 1.9999607501592535e-05, + "loss": 1.5196, + "step": 5445 + }, + { + "epoch": 0.07, + "grad_norm": 57.019581862363616, + "learning_rate": 1.9999602308760693e-05, + "loss": 1.6414, + "step": 5448 + }, + { + "epoch": 0.07, + "grad_norm": 21.75408417484354, + "learning_rate": 1.999959708180381e-05, + "loss": 1.6379, + "step": 5451 + }, + { + "epoch": 0.07, + "grad_norm": 31.71683862407945, + "learning_rate": 1.99995918207219e-05, + "loss": 1.8944, + "step": 5454 + }, + { + "epoch": 0.07, + "grad_norm": 48.09622256228159, + "learning_rate": 1.9999586525514988e-05, + "loss": 1.8352, + "step": 5457 + }, + { + "epoch": 0.07, + "grad_norm": 4.307281222471099, + "learning_rate": 1.999958119618309e-05, + "loss": 1.4757, + "step": 5460 + }, + { + "epoch": 0.07, + "grad_norm": 15.885474636813505, + "learning_rate": 1.999957583272622e-05, + "loss": 1.6053, + "step": 5463 + }, + { + "epoch": 0.07, + "grad_norm": 33.139414110994046, + "learning_rate": 1.9999570435144405e-05, + "loss": 2.1497, + "step": 5466 + }, + { + "epoch": 0.07, + "grad_norm": 33.69791537800212, + "learning_rate": 1.9999565003437654e-05, + "loss": 1.7003, + "step": 5469 + }, + { + "epoch": 0.07, + "grad_norm": 64.73197647731745, + "learning_rate": 1.999955953760599e-05, + "loss": 1.573, + "step": 5472 + }, + { + "epoch": 0.07, + "grad_norm": 6.196632555439159, + "learning_rate": 1.999955403764943e-05, + "loss": 1.5056, + "step": 5475 + }, + { + "epoch": 0.07, + "grad_norm": 16.694693323789565, + "learning_rate": 1.9999548503567997e-05, + "loss": 1.7091, + "step": 5478 + }, + { + "epoch": 0.07, + "grad_norm": 28.99988804173395, + "learning_rate": 1.9999542935361704e-05, + "loss": 1.9457, + "step": 5481 + }, + { + "epoch": 0.07, + "grad_norm": 9.92877131722677, + "learning_rate": 1.999953733303057e-05, + "loss": 1.978, + "step": 5484 + }, + { + "epoch": 0.07, + "grad_norm": 72.26909308305281, + "learning_rate": 1.9999531696574622e-05, + "loss": 1.9701, + "step": 5487 + }, + { + "epoch": 0.07, + "grad_norm": 11.3089622034308, + "learning_rate": 1.9999526025993874e-05, + "loss": 1.6252, + "step": 5490 + }, + { + "epoch": 0.07, + "grad_norm": 7.778780994146468, + "learning_rate": 1.9999520321288344e-05, + "loss": 1.8945, + "step": 5493 + }, + { + "epoch": 0.07, + "grad_norm": 132.7121110613794, + "learning_rate": 1.999951458245805e-05, + "loss": 1.9578, + "step": 5496 + }, + { + "epoch": 0.07, + "grad_norm": 6.693952224582759, + "learning_rate": 1.9999508809503017e-05, + "loss": 2.349, + "step": 5499 + }, + { + "epoch": 0.07, + "grad_norm": 38.64994477239105, + "learning_rate": 1.9999503002423265e-05, + "loss": 1.7615, + "step": 5502 + }, + { + "epoch": 0.07, + "grad_norm": 16.8153626900959, + "learning_rate": 1.999949716121881e-05, + "loss": 1.2896, + "step": 5505 + }, + { + "epoch": 0.07, + "grad_norm": 9.53886661907216, + "learning_rate": 1.9999491285889672e-05, + "loss": 1.7792, + "step": 5508 + }, + { + "epoch": 0.07, + "grad_norm": 24.4165947425246, + "learning_rate": 1.999948537643587e-05, + "loss": 1.9285, + "step": 5511 + }, + { + "epoch": 0.07, + "grad_norm": 31.043471383334467, + "learning_rate": 1.999947943285743e-05, + "loss": 1.9597, + "step": 5514 + }, + { + "epoch": 0.07, + "grad_norm": 27.302063314035884, + "learning_rate": 1.9999473455154367e-05, + "loss": 1.7795, + "step": 5517 + }, + { + "epoch": 0.07, + "grad_norm": 47.17573573852622, + "learning_rate": 1.9999467443326704e-05, + "loss": 1.8149, + "step": 5520 + }, + { + "epoch": 0.07, + "grad_norm": 258.13812638818, + "learning_rate": 1.999946139737446e-05, + "loss": 1.8393, + "step": 5523 + }, + { + "epoch": 0.07, + "grad_norm": 19.515813574196958, + "learning_rate": 1.9999455317297652e-05, + "loss": 1.5961, + "step": 5526 + }, + { + "epoch": 0.07, + "grad_norm": 20.176066362164796, + "learning_rate": 1.999944920309631e-05, + "loss": 1.63, + "step": 5529 + }, + { + "epoch": 0.07, + "grad_norm": 113.51268266051831, + "learning_rate": 1.9999443054770446e-05, + "loss": 2.0271, + "step": 5532 + }, + { + "epoch": 0.07, + "grad_norm": 11.788689111812694, + "learning_rate": 1.9999436872320085e-05, + "loss": 1.9582, + "step": 5535 + }, + { + "epoch": 0.07, + "grad_norm": 11.256113473058559, + "learning_rate": 1.999943065574525e-05, + "loss": 1.7806, + "step": 5538 + }, + { + "epoch": 0.07, + "grad_norm": 26.58665215522058, + "learning_rate": 1.999942440504596e-05, + "loss": 1.7794, + "step": 5541 + }, + { + "epoch": 0.07, + "grad_norm": 20.720054619958987, + "learning_rate": 1.9999418120222235e-05, + "loss": 1.8923, + "step": 5544 + }, + { + "epoch": 0.07, + "grad_norm": 49.46389555934321, + "learning_rate": 1.9999411801274097e-05, + "loss": 1.7733, + "step": 5547 + }, + { + "epoch": 0.07, + "grad_norm": 37.209582328433164, + "learning_rate": 1.999940544820157e-05, + "loss": 1.5412, + "step": 5550 + }, + { + "epoch": 0.07, + "grad_norm": 22.254533383234747, + "learning_rate": 1.9999399061004672e-05, + "loss": 1.9892, + "step": 5553 + }, + { + "epoch": 0.07, + "grad_norm": 48.48800087939793, + "learning_rate": 1.9999392639683427e-05, + "loss": 1.5465, + "step": 5556 + }, + { + "epoch": 0.07, + "grad_norm": 8.915781410107352, + "learning_rate": 1.999938618423786e-05, + "loss": 1.772, + "step": 5559 + }, + { + "epoch": 0.07, + "grad_norm": 8.282769566569298, + "learning_rate": 1.9999379694667986e-05, + "loss": 1.5597, + "step": 5562 + }, + { + "epoch": 0.07, + "grad_norm": 12.053351701123047, + "learning_rate": 1.9999373170973834e-05, + "loss": 1.627, + "step": 5565 + }, + { + "epoch": 0.07, + "grad_norm": 28.16467607592326, + "learning_rate": 1.9999366613155418e-05, + "loss": 1.7529, + "step": 5568 + }, + { + "epoch": 0.07, + "grad_norm": 10.618064100712543, + "learning_rate": 1.999936002121277e-05, + "loss": 1.6332, + "step": 5571 + }, + { + "epoch": 0.07, + "grad_norm": 31.619193790449234, + "learning_rate": 1.9999353395145906e-05, + "loss": 1.7576, + "step": 5574 + }, + { + "epoch": 0.07, + "grad_norm": 18.18795905077775, + "learning_rate": 1.999934673495485e-05, + "loss": 2.105, + "step": 5577 + }, + { + "epoch": 0.07, + "grad_norm": 32.47185073515965, + "learning_rate": 1.9999340040639627e-05, + "loss": 1.9799, + "step": 5580 + }, + { + "epoch": 0.07, + "grad_norm": 9.698859998521206, + "learning_rate": 1.9999333312200258e-05, + "loss": 2.0825, + "step": 5583 + }, + { + "epoch": 0.07, + "grad_norm": 11.81911421889395, + "learning_rate": 1.9999326549636767e-05, + "loss": 2.0438, + "step": 5586 + }, + { + "epoch": 0.07, + "grad_norm": 38.02831801569526, + "learning_rate": 1.9999319752949175e-05, + "loss": 1.9456, + "step": 5589 + }, + { + "epoch": 0.07, + "grad_norm": 16.35986846466091, + "learning_rate": 1.9999312922137505e-05, + "loss": 1.7612, + "step": 5592 + }, + { + "epoch": 0.07, + "grad_norm": 33.26828074007641, + "learning_rate": 1.9999306057201785e-05, + "loss": 1.9736, + "step": 5595 + }, + { + "epoch": 0.07, + "grad_norm": 18.099020587683246, + "learning_rate": 1.9999299158142035e-05, + "loss": 1.6047, + "step": 5598 + }, + { + "epoch": 0.07, + "grad_norm": 4.459251059196241, + "learning_rate": 1.9999292224958274e-05, + "loss": 2.0164, + "step": 5601 + }, + { + "epoch": 0.07, + "grad_norm": 4.978450400080656, + "learning_rate": 1.9999285257650535e-05, + "loss": 1.6625, + "step": 5604 + }, + { + "epoch": 0.07, + "grad_norm": 28.041032422067268, + "learning_rate": 1.9999278256218834e-05, + "loss": 2.13, + "step": 5607 + }, + { + "epoch": 0.07, + "grad_norm": 30.30750730941472, + "learning_rate": 1.99992712206632e-05, + "loss": 1.591, + "step": 5610 + }, + { + "epoch": 0.07, + "grad_norm": 16.556084043464946, + "learning_rate": 1.9999264150983655e-05, + "loss": 1.8175, + "step": 5613 + }, + { + "epoch": 0.07, + "grad_norm": 52.688583297436786, + "learning_rate": 1.9999257047180226e-05, + "loss": 1.8602, + "step": 5616 + }, + { + "epoch": 0.07, + "grad_norm": 32.74722310359443, + "learning_rate": 1.9999249909252932e-05, + "loss": 1.8203, + "step": 5619 + }, + { + "epoch": 0.07, + "grad_norm": 25.807539212788253, + "learning_rate": 1.9999242737201795e-05, + "loss": 1.7082, + "step": 5622 + }, + { + "epoch": 0.07, + "grad_norm": 12.84484763535408, + "learning_rate": 1.999923553102685e-05, + "loss": 1.6398, + "step": 5625 + }, + { + "epoch": 0.07, + "grad_norm": 94.88451495103743, + "learning_rate": 1.9999228290728114e-05, + "loss": 1.9028, + "step": 5628 + }, + { + "epoch": 0.07, + "grad_norm": 45.796637307900944, + "learning_rate": 1.9999221016305615e-05, + "loss": 1.7036, + "step": 5631 + }, + { + "epoch": 0.07, + "grad_norm": 39.971458454220944, + "learning_rate": 1.9999213707759374e-05, + "loss": 1.7596, + "step": 5634 + }, + { + "epoch": 0.07, + "grad_norm": 69.5274277559145, + "learning_rate": 1.999920636508942e-05, + "loss": 1.7368, + "step": 5637 + }, + { + "epoch": 0.07, + "grad_norm": 89.12197974901531, + "learning_rate": 1.9999198988295777e-05, + "loss": 1.7858, + "step": 5640 + }, + { + "epoch": 0.07, + "grad_norm": 38.88837266227001, + "learning_rate": 1.999919157737847e-05, + "loss": 1.9298, + "step": 5643 + }, + { + "epoch": 0.07, + "grad_norm": 26.677027155850723, + "learning_rate": 1.999918413233752e-05, + "loss": 1.6858, + "step": 5646 + }, + { + "epoch": 0.07, + "grad_norm": 89.02128978633317, + "learning_rate": 1.9999176653172958e-05, + "loss": 1.7326, + "step": 5649 + }, + { + "epoch": 0.07, + "grad_norm": 9.957770555061218, + "learning_rate": 1.999916913988481e-05, + "loss": 1.6774, + "step": 5652 + }, + { + "epoch": 0.07, + "grad_norm": 18.334549165012486, + "learning_rate": 1.9999161592473095e-05, + "loss": 2.1169, + "step": 5655 + }, + { + "epoch": 0.07, + "grad_norm": 11.733494832933609, + "learning_rate": 1.9999154010937846e-05, + "loss": 1.8497, + "step": 5658 + }, + { + "epoch": 0.07, + "grad_norm": 31.42187019498429, + "learning_rate": 1.9999146395279088e-05, + "loss": 1.7236, + "step": 5661 + }, + { + "epoch": 0.07, + "grad_norm": 19.805507898181958, + "learning_rate": 1.999913874549684e-05, + "loss": 1.907, + "step": 5664 + }, + { + "epoch": 0.07, + "grad_norm": 21.266378673320165, + "learning_rate": 1.9999131061591138e-05, + "loss": 1.8302, + "step": 5667 + }, + { + "epoch": 0.07, + "grad_norm": 20.443966958497533, + "learning_rate": 1.9999123343562e-05, + "loss": 1.3702, + "step": 5670 + }, + { + "epoch": 0.07, + "grad_norm": 27.881791437774456, + "learning_rate": 1.999911559140946e-05, + "loss": 2.1342, + "step": 5673 + }, + { + "epoch": 0.07, + "grad_norm": 6.486653090237375, + "learning_rate": 1.9999107805133536e-05, + "loss": 1.5496, + "step": 5676 + }, + { + "epoch": 0.07, + "grad_norm": 51.77732829249956, + "learning_rate": 1.999909998473426e-05, + "loss": 2.0545, + "step": 5679 + }, + { + "epoch": 0.07, + "grad_norm": 32.82590550923511, + "learning_rate": 1.9999092130211655e-05, + "loss": 1.5221, + "step": 5682 + }, + { + "epoch": 0.07, + "grad_norm": 11.948948107794704, + "learning_rate": 1.9999084241565752e-05, + "loss": 1.749, + "step": 5685 + }, + { + "epoch": 0.07, + "grad_norm": 10.26254061696977, + "learning_rate": 1.9999076318796578e-05, + "loss": 1.6286, + "step": 5688 + }, + { + "epoch": 0.07, + "grad_norm": 24.637694290260082, + "learning_rate": 1.999906836190416e-05, + "loss": 1.9371, + "step": 5691 + }, + { + "epoch": 0.07, + "grad_norm": 15.006829601773264, + "learning_rate": 1.9999060370888518e-05, + "loss": 1.8768, + "step": 5694 + }, + { + "epoch": 0.07, + "grad_norm": 94.22427855829142, + "learning_rate": 1.999905234574969e-05, + "loss": 1.7521, + "step": 5697 + }, + { + "epoch": 0.07, + "grad_norm": 19.5271574512319, + "learning_rate": 1.9999044286487695e-05, + "loss": 1.8153, + "step": 5700 + }, + { + "epoch": 0.07, + "grad_norm": 9.952360690663562, + "learning_rate": 1.999903619310256e-05, + "loss": 1.3042, + "step": 5703 + }, + { + "epoch": 0.07, + "grad_norm": 22.874391352621693, + "learning_rate": 1.999902806559432e-05, + "loss": 1.7757, + "step": 5706 + }, + { + "epoch": 0.07, + "grad_norm": 25.38000462841589, + "learning_rate": 1.9999019903963e-05, + "loss": 1.6899, + "step": 5709 + }, + { + "epoch": 0.07, + "grad_norm": 21.0248469539688, + "learning_rate": 1.9999011708208624e-05, + "loss": 2.042, + "step": 5712 + }, + { + "epoch": 0.07, + "grad_norm": 11.592041275930601, + "learning_rate": 1.9999003478331222e-05, + "loss": 1.7686, + "step": 5715 + }, + { + "epoch": 0.07, + "grad_norm": 45.06688665390828, + "learning_rate": 1.9998995214330827e-05, + "loss": 1.8373, + "step": 5718 + }, + { + "epoch": 0.07, + "grad_norm": 30.862083518827347, + "learning_rate": 1.999898691620746e-05, + "loss": 1.4854, + "step": 5721 + }, + { + "epoch": 0.07, + "grad_norm": 36.310034542753726, + "learning_rate": 1.9998978583961152e-05, + "loss": 1.5406, + "step": 5724 + }, + { + "epoch": 0.07, + "grad_norm": 35.260171313457654, + "learning_rate": 1.999897021759193e-05, + "loss": 1.9728, + "step": 5727 + }, + { + "epoch": 0.07, + "grad_norm": 6.379064455718128, + "learning_rate": 1.999896181709983e-05, + "loss": 1.7962, + "step": 5730 + }, + { + "epoch": 0.07, + "grad_norm": 7.083339096545583, + "learning_rate": 1.999895338248487e-05, + "loss": 1.684, + "step": 5733 + }, + { + "epoch": 0.07, + "grad_norm": 7.087842606721255, + "learning_rate": 1.9998944913747085e-05, + "loss": 1.8078, + "step": 5736 + }, + { + "epoch": 0.07, + "grad_norm": 15.30453257617602, + "learning_rate": 1.9998936410886505e-05, + "loss": 1.6773, + "step": 5739 + }, + { + "epoch": 0.07, + "grad_norm": 34.63686495279626, + "learning_rate": 1.9998927873903153e-05, + "loss": 1.8342, + "step": 5742 + }, + { + "epoch": 0.07, + "grad_norm": 7.011858522210174, + "learning_rate": 1.999891930279706e-05, + "loss": 1.6525, + "step": 5745 + }, + { + "epoch": 0.07, + "grad_norm": 86.90423219582773, + "learning_rate": 1.9998910697568264e-05, + "loss": 1.7514, + "step": 5748 + }, + { + "epoch": 0.07, + "grad_norm": 30.97790955451699, + "learning_rate": 1.9998902058216783e-05, + "loss": 1.6695, + "step": 5751 + }, + { + "epoch": 0.07, + "grad_norm": 10.140998829044166, + "learning_rate": 1.9998893384742653e-05, + "loss": 1.4841, + "step": 5754 + }, + { + "epoch": 0.07, + "grad_norm": 17.33886708293684, + "learning_rate": 1.99988846771459e-05, + "loss": 1.7051, + "step": 5757 + }, + { + "epoch": 0.07, + "grad_norm": 17.261381776648847, + "learning_rate": 1.9998875935426555e-05, + "loss": 1.9764, + "step": 5760 + }, + { + "epoch": 0.07, + "grad_norm": 18.66604714277153, + "learning_rate": 1.999886715958465e-05, + "loss": 1.7769, + "step": 5763 + }, + { + "epoch": 0.07, + "grad_norm": 8.610319144271196, + "learning_rate": 1.999885834962021e-05, + "loss": 1.7898, + "step": 5766 + }, + { + "epoch": 0.07, + "grad_norm": 21.8831587378033, + "learning_rate": 1.9998849505533268e-05, + "loss": 2.0258, + "step": 5769 + }, + { + "epoch": 0.07, + "grad_norm": 22.050307095376212, + "learning_rate": 1.9998840627323855e-05, + "loss": 2.0507, + "step": 5772 + }, + { + "epoch": 0.07, + "grad_norm": 90.60263055168163, + "learning_rate": 1.9998831714992e-05, + "loss": 1.8237, + "step": 5775 + }, + { + "epoch": 0.07, + "grad_norm": 13.58321212325247, + "learning_rate": 1.9998822768537736e-05, + "loss": 1.8224, + "step": 5778 + }, + { + "epoch": 0.07, + "grad_norm": 44.906031135195924, + "learning_rate": 1.999881378796109e-05, + "loss": 1.5942, + "step": 5781 + }, + { + "epoch": 0.07, + "grad_norm": 12.50359310451203, + "learning_rate": 1.9998804773262093e-05, + "loss": 1.8436, + "step": 5784 + }, + { + "epoch": 0.07, + "grad_norm": 100.70399745492166, + "learning_rate": 1.999879572444078e-05, + "loss": 1.9273, + "step": 5787 + }, + { + "epoch": 0.07, + "grad_norm": 71.93205371837784, + "learning_rate": 1.9998786641497178e-05, + "loss": 1.6151, + "step": 5790 + }, + { + "epoch": 0.07, + "grad_norm": 21.47729102793129, + "learning_rate": 1.9998777524431318e-05, + "loss": 2.0531, + "step": 5793 + }, + { + "epoch": 0.07, + "grad_norm": 15.75119426162224, + "learning_rate": 1.999876837324323e-05, + "loss": 1.5914, + "step": 5796 + }, + { + "epoch": 0.07, + "grad_norm": 74.76145157581774, + "learning_rate": 1.999875918793295e-05, + "loss": 1.6225, + "step": 5799 + }, + { + "epoch": 0.07, + "grad_norm": 20.9778032581451, + "learning_rate": 1.9998749968500504e-05, + "loss": 1.6908, + "step": 5802 + }, + { + "epoch": 0.07, + "grad_norm": 43.11492192989545, + "learning_rate": 1.9998740714945926e-05, + "loss": 1.6211, + "step": 5805 + }, + { + "epoch": 0.07, + "grad_norm": 55.86102324833515, + "learning_rate": 1.9998731427269247e-05, + "loss": 1.811, + "step": 5808 + }, + { + "epoch": 0.07, + "grad_norm": 6.752281344289188, + "learning_rate": 1.9998722105470502e-05, + "loss": 1.7932, + "step": 5811 + }, + { + "epoch": 0.07, + "grad_norm": 38.38601975481159, + "learning_rate": 1.999871274954972e-05, + "loss": 1.5623, + "step": 5814 + }, + { + "epoch": 0.07, + "grad_norm": 13.393441028772921, + "learning_rate": 1.999870335950693e-05, + "loss": 1.7028, + "step": 5817 + }, + { + "epoch": 0.07, + "grad_norm": 80.56815855149155, + "learning_rate": 1.999869393534217e-05, + "loss": 2.3472, + "step": 5820 + }, + { + "epoch": 0.07, + "grad_norm": 87.87638639576258, + "learning_rate": 1.9998684477055466e-05, + "loss": 1.7806, + "step": 5823 + }, + { + "epoch": 0.07, + "grad_norm": 25.090970398691884, + "learning_rate": 1.9998674984646854e-05, + "loss": 1.7486, + "step": 5826 + }, + { + "epoch": 0.07, + "grad_norm": 9.360579594879116, + "learning_rate": 1.999866545811637e-05, + "loss": 1.6593, + "step": 5829 + }, + { + "epoch": 0.07, + "grad_norm": 65.40742182942247, + "learning_rate": 1.9998655897464038e-05, + "loss": 1.6559, + "step": 5832 + }, + { + "epoch": 0.07, + "grad_norm": 22.74103905277988, + "learning_rate": 1.9998646302689895e-05, + "loss": 1.5707, + "step": 5835 + }, + { + "epoch": 0.07, + "grad_norm": 21.006282016007937, + "learning_rate": 1.9998636673793975e-05, + "loss": 1.8026, + "step": 5838 + }, + { + "epoch": 0.07, + "grad_norm": 26.70012815606087, + "learning_rate": 1.9998627010776308e-05, + "loss": 1.4514, + "step": 5841 + }, + { + "epoch": 0.07, + "grad_norm": 46.630404510118375, + "learning_rate": 1.999861731363693e-05, + "loss": 2.0192, + "step": 5844 + }, + { + "epoch": 0.07, + "grad_norm": 69.23495546264465, + "learning_rate": 1.999860758237587e-05, + "loss": 2.0864, + "step": 5847 + }, + { + "epoch": 0.07, + "grad_norm": 35.33855746773436, + "learning_rate": 1.9998597816993168e-05, + "loss": 1.6626, + "step": 5850 + }, + { + "epoch": 0.07, + "grad_norm": 41.30473965268106, + "learning_rate": 1.999858801748885e-05, + "loss": 1.8396, + "step": 5853 + }, + { + "epoch": 0.07, + "grad_norm": 122.66454490418296, + "learning_rate": 1.9998578183862953e-05, + "loss": 1.3543, + "step": 5856 + }, + { + "epoch": 0.07, + "grad_norm": 10.422897301799372, + "learning_rate": 1.9998568316115513e-05, + "loss": 1.7326, + "step": 5859 + }, + { + "epoch": 0.07, + "grad_norm": 18.295195434028653, + "learning_rate": 1.999855841424656e-05, + "loss": 2.2358, + "step": 5862 + }, + { + "epoch": 0.07, + "grad_norm": 9.266046898652807, + "learning_rate": 1.9998548478256127e-05, + "loss": 1.8638, + "step": 5865 + }, + { + "epoch": 0.07, + "grad_norm": 34.748723695741454, + "learning_rate": 1.9998538508144246e-05, + "loss": 1.7817, + "step": 5868 + }, + { + "epoch": 0.07, + "grad_norm": 90.75500040633598, + "learning_rate": 1.999852850391096e-05, + "loss": 1.7094, + "step": 5871 + }, + { + "epoch": 0.07, + "grad_norm": 18.24153120380254, + "learning_rate": 1.9998518465556295e-05, + "loss": 1.8618, + "step": 5874 + }, + { + "epoch": 0.07, + "grad_norm": 19.862222756156328, + "learning_rate": 1.999850839308029e-05, + "loss": 1.8304, + "step": 5877 + }, + { + "epoch": 0.07, + "grad_norm": 24.612609104599343, + "learning_rate": 1.9998498286482975e-05, + "loss": 2.1392, + "step": 5880 + }, + { + "epoch": 0.07, + "grad_norm": 11.81212826475943, + "learning_rate": 1.999848814576439e-05, + "loss": 1.8328, + "step": 5883 + }, + { + "epoch": 0.07, + "grad_norm": 34.752153706108984, + "learning_rate": 1.9998477970924562e-05, + "loss": 2.1084, + "step": 5886 + }, + { + "epoch": 0.07, + "grad_norm": 16.443934968777587, + "learning_rate": 1.9998467761963532e-05, + "loss": 1.8701, + "step": 5889 + }, + { + "epoch": 0.07, + "grad_norm": 7.710241722424582, + "learning_rate": 1.9998457518881336e-05, + "loss": 1.9401, + "step": 5892 + }, + { + "epoch": 0.07, + "grad_norm": 6.2284341455940035, + "learning_rate": 1.9998447241678002e-05, + "loss": 1.5636, + "step": 5895 + }, + { + "epoch": 0.07, + "grad_norm": 103.47967849114148, + "learning_rate": 1.9998436930353572e-05, + "loss": 1.5455, + "step": 5898 + }, + { + "epoch": 0.07, + "grad_norm": 33.46941532789273, + "learning_rate": 1.9998426584908074e-05, + "loss": 1.8043, + "step": 5901 + }, + { + "epoch": 0.07, + "grad_norm": 34.401210607198, + "learning_rate": 1.999841620534155e-05, + "loss": 1.5749, + "step": 5904 + }, + { + "epoch": 0.07, + "grad_norm": 6.251877947661675, + "learning_rate": 1.9998405791654033e-05, + "loss": 1.8586, + "step": 5907 + }, + { + "epoch": 0.07, + "grad_norm": 35.220758713325125, + "learning_rate": 1.999839534384556e-05, + "loss": 1.4905, + "step": 5910 + }, + { + "epoch": 0.07, + "grad_norm": 8.220469064248693, + "learning_rate": 1.9998384861916164e-05, + "loss": 1.9056, + "step": 5913 + }, + { + "epoch": 0.07, + "grad_norm": 21.45576201783569, + "learning_rate": 1.999837434586588e-05, + "loss": 1.8071, + "step": 5916 + }, + { + "epoch": 0.07, + "grad_norm": 50.56677364618803, + "learning_rate": 1.9998363795694748e-05, + "loss": 1.8577, + "step": 5919 + }, + { + "epoch": 0.07, + "grad_norm": 52.56306603463439, + "learning_rate": 1.9998353211402798e-05, + "loss": 1.8577, + "step": 5922 + }, + { + "epoch": 0.07, + "grad_norm": 8.841425482257694, + "learning_rate": 1.9998342592990073e-05, + "loss": 1.693, + "step": 5925 + }, + { + "epoch": 0.07, + "grad_norm": 10.143219673883808, + "learning_rate": 1.9998331940456608e-05, + "loss": 1.6117, + "step": 5928 + }, + { + "epoch": 0.07, + "grad_norm": 69.18014014863441, + "learning_rate": 1.9998321253802434e-05, + "loss": 1.9176, + "step": 5931 + }, + { + "epoch": 0.07, + "grad_norm": 56.31463750173714, + "learning_rate": 1.9998310533027593e-05, + "loss": 1.6141, + "step": 5934 + }, + { + "epoch": 0.07, + "grad_norm": 21.796688606317073, + "learning_rate": 1.999829977813212e-05, + "loss": 1.3801, + "step": 5937 + }, + { + "epoch": 0.07, + "grad_norm": 8.744988790082436, + "learning_rate": 1.9998288989116048e-05, + "loss": 1.4972, + "step": 5940 + }, + { + "epoch": 0.07, + "grad_norm": 39.96913396153911, + "learning_rate": 1.999827816597942e-05, + "loss": 1.481, + "step": 5943 + }, + { + "epoch": 0.07, + "grad_norm": 138.72806886863415, + "learning_rate": 1.999826730872227e-05, + "loss": 1.806, + "step": 5946 + }, + { + "epoch": 0.07, + "grad_norm": 20.622708804945074, + "learning_rate": 1.9998256417344633e-05, + "loss": 1.8104, + "step": 5949 + }, + { + "epoch": 0.07, + "grad_norm": 70.4694843291922, + "learning_rate": 1.999824549184655e-05, + "loss": 2.2573, + "step": 5952 + }, + { + "epoch": 0.07, + "grad_norm": 134.3053270091812, + "learning_rate": 1.9998234532228054e-05, + "loss": 1.3001, + "step": 5955 + }, + { + "epoch": 0.07, + "grad_norm": 47.41098387750146, + "learning_rate": 1.9998223538489186e-05, + "loss": 1.7421, + "step": 5958 + }, + { + "epoch": 0.07, + "grad_norm": 16.272254441037116, + "learning_rate": 1.9998212510629984e-05, + "loss": 1.8272, + "step": 5961 + }, + { + "epoch": 0.07, + "grad_norm": 22.21904768457949, + "learning_rate": 1.9998201448650483e-05, + "loss": 1.3857, + "step": 5964 + }, + { + "epoch": 0.07, + "grad_norm": 23.029789740694145, + "learning_rate": 1.9998190352550722e-05, + "loss": 1.7087, + "step": 5967 + }, + { + "epoch": 0.07, + "grad_norm": 6.598923049240069, + "learning_rate": 1.999817922233074e-05, + "loss": 1.2331, + "step": 5970 + }, + { + "epoch": 0.07, + "grad_norm": 8.261899081667579, + "learning_rate": 1.999816805799057e-05, + "loss": 1.4375, + "step": 5973 + }, + { + "epoch": 0.07, + "grad_norm": 17.524371383239902, + "learning_rate": 1.9998156859530255e-05, + "loss": 1.5696, + "step": 5976 + }, + { + "epoch": 0.07, + "grad_norm": 11.906294127939946, + "learning_rate": 1.9998145626949834e-05, + "loss": 2.0775, + "step": 5979 + }, + { + "epoch": 0.07, + "grad_norm": 197.27953630982768, + "learning_rate": 1.999813436024934e-05, + "loss": 2.07, + "step": 5982 + }, + { + "epoch": 0.07, + "grad_norm": 66.59074568372223, + "learning_rate": 1.999812305942882e-05, + "loss": 1.7264, + "step": 5985 + }, + { + "epoch": 0.07, + "grad_norm": 18.279373074738604, + "learning_rate": 1.9998111724488304e-05, + "loss": 2.0563, + "step": 5988 + }, + { + "epoch": 0.07, + "grad_norm": 64.00383154932746, + "learning_rate": 1.9998100355427833e-05, + "loss": 1.9876, + "step": 5991 + }, + { + "epoch": 0.07, + "grad_norm": 12.561673050946602, + "learning_rate": 1.999808895224745e-05, + "loss": 1.7382, + "step": 5994 + }, + { + "epoch": 0.07, + "grad_norm": 17.82599716942634, + "learning_rate": 1.9998077514947187e-05, + "loss": 1.4835, + "step": 5997 + }, + { + "epoch": 0.07, + "grad_norm": 69.8946368481217, + "learning_rate": 1.999806604352709e-05, + "loss": 1.6061, + "step": 6000 + }, + { + "epoch": 0.07, + "grad_norm": 26.233030379819034, + "learning_rate": 1.999805453798719e-05, + "loss": 1.9596, + "step": 6003 + }, + { + "epoch": 0.07, + "grad_norm": 8.474788228789096, + "learning_rate": 1.9998042998327535e-05, + "loss": 1.7089, + "step": 6006 + }, + { + "epoch": 0.07, + "grad_norm": 5.664942242045029, + "learning_rate": 1.9998031424548162e-05, + "loss": 1.8202, + "step": 6009 + }, + { + "epoch": 0.07, + "grad_norm": 36.02674112574348, + "learning_rate": 1.9998019816649106e-05, + "loss": 1.7575, + "step": 6012 + }, + { + "epoch": 0.07, + "grad_norm": 95.33829309349741, + "learning_rate": 1.999800817463041e-05, + "loss": 2.2537, + "step": 6015 + }, + { + "epoch": 0.07, + "grad_norm": 108.34575631182743, + "learning_rate": 1.9997996498492113e-05, + "loss": 2.0388, + "step": 6018 + }, + { + "epoch": 0.07, + "grad_norm": 121.37138340268261, + "learning_rate": 1.9997984788234258e-05, + "loss": 1.5802, + "step": 6021 + }, + { + "epoch": 0.07, + "grad_norm": 32.677406775733196, + "learning_rate": 1.999797304385688e-05, + "loss": 1.7603, + "step": 6024 + }, + { + "epoch": 0.07, + "grad_norm": 27.278731098236147, + "learning_rate": 1.999796126536002e-05, + "loss": 1.9069, + "step": 6027 + }, + { + "epoch": 0.07, + "grad_norm": 8.301678631595399, + "learning_rate": 1.9997949452743723e-05, + "loss": 1.825, + "step": 6030 + }, + { + "epoch": 0.07, + "grad_norm": 19.874823724127793, + "learning_rate": 1.9997937606008023e-05, + "loss": 1.6445, + "step": 6033 + }, + { + "epoch": 0.07, + "grad_norm": 67.15088517016808, + "learning_rate": 1.9997925725152965e-05, + "loss": 2.0492, + "step": 6036 + }, + { + "epoch": 0.07, + "grad_norm": 83.68623631134588, + "learning_rate": 1.9997913810178587e-05, + "loss": 1.8649, + "step": 6039 + }, + { + "epoch": 0.07, + "grad_norm": 9.105121018438215, + "learning_rate": 1.9997901861084932e-05, + "loss": 1.3154, + "step": 6042 + }, + { + "epoch": 0.07, + "grad_norm": 215.95859621965758, + "learning_rate": 1.9997889877872035e-05, + "loss": 2.0732, + "step": 6045 + }, + { + "epoch": 0.07, + "grad_norm": 29.62144655244898, + "learning_rate": 1.9997877860539946e-05, + "loss": 2.0882, + "step": 6048 + }, + { + "epoch": 0.07, + "grad_norm": 19.396247085527925, + "learning_rate": 1.9997865809088698e-05, + "loss": 1.2261, + "step": 6051 + }, + { + "epoch": 0.07, + "grad_norm": 58.08498646625978, + "learning_rate": 1.9997853723518334e-05, + "loss": 1.923, + "step": 6054 + }, + { + "epoch": 0.07, + "grad_norm": 45.12964631490791, + "learning_rate": 1.99978416038289e-05, + "loss": 2.1908, + "step": 6057 + }, + { + "epoch": 0.07, + "grad_norm": 38.21590043234208, + "learning_rate": 1.9997829450020432e-05, + "loss": 1.2998, + "step": 6060 + }, + { + "epoch": 0.07, + "grad_norm": 184.62389580431156, + "learning_rate": 1.999781726209297e-05, + "loss": 1.4082, + "step": 6063 + }, + { + "epoch": 0.07, + "grad_norm": 75.28977677588256, + "learning_rate": 1.9997805040046563e-05, + "loss": 1.7915, + "step": 6066 + }, + { + "epoch": 0.07, + "grad_norm": 50.25089720894737, + "learning_rate": 1.999779278388125e-05, + "loss": 2.0748, + "step": 6069 + }, + { + "epoch": 0.07, + "grad_norm": 44.72993547150795, + "learning_rate": 1.9997780493597068e-05, + "loss": 2.019, + "step": 6072 + }, + { + "epoch": 0.07, + "grad_norm": 67.3999444889602, + "learning_rate": 1.999776816919406e-05, + "loss": 1.7923, + "step": 6075 + }, + { + "epoch": 0.07, + "grad_norm": 33.397945948035826, + "learning_rate": 1.9997755810672276e-05, + "loss": 1.6899, + "step": 6078 + }, + { + "epoch": 0.07, + "grad_norm": 13.908598574887513, + "learning_rate": 1.9997743418031753e-05, + "loss": 2.0327, + "step": 6081 + }, + { + "epoch": 0.07, + "grad_norm": 20.326918730230776, + "learning_rate": 1.999773099127253e-05, + "loss": 1.5704, + "step": 6084 + }, + { + "epoch": 0.07, + "grad_norm": 43.417670767634874, + "learning_rate": 1.999771853039465e-05, + "loss": 1.413, + "step": 6087 + }, + { + "epoch": 0.07, + "grad_norm": 50.988720288663366, + "learning_rate": 1.9997706035398164e-05, + "loss": 1.8022, + "step": 6090 + }, + { + "epoch": 0.07, + "grad_norm": 14.529531798762601, + "learning_rate": 1.9997693506283103e-05, + "loss": 1.9502, + "step": 6093 + }, + { + "epoch": 0.07, + "grad_norm": 14.39611347083048, + "learning_rate": 1.9997680943049514e-05, + "loss": 1.5978, + "step": 6096 + }, + { + "epoch": 0.07, + "grad_norm": 28.91391052705881, + "learning_rate": 1.999766834569745e-05, + "loss": 1.9305, + "step": 6099 + }, + { + "epoch": 0.07, + "grad_norm": 20.514700298853022, + "learning_rate": 1.9997655714226936e-05, + "loss": 1.7794, + "step": 6102 + }, + { + "epoch": 0.07, + "grad_norm": 18.85853523580481, + "learning_rate": 1.999764304863803e-05, + "loss": 1.6819, + "step": 6105 + }, + { + "epoch": 0.07, + "grad_norm": 18.560740679855105, + "learning_rate": 1.9997630348930766e-05, + "loss": 2.1823, + "step": 6108 + }, + { + "epoch": 0.07, + "grad_norm": 64.17289186253721, + "learning_rate": 1.9997617615105192e-05, + "loss": 1.867, + "step": 6111 + }, + { + "epoch": 0.07, + "grad_norm": 17.662650012473225, + "learning_rate": 1.9997604847161348e-05, + "loss": 1.7244, + "step": 6114 + }, + { + "epoch": 0.07, + "grad_norm": 14.089587958439678, + "learning_rate": 1.9997592045099285e-05, + "loss": 1.5314, + "step": 6117 + }, + { + "epoch": 0.07, + "grad_norm": 73.7945615440101, + "learning_rate": 1.9997579208919036e-05, + "loss": 1.5061, + "step": 6120 + }, + { + "epoch": 0.07, + "grad_norm": 4.155487133227744, + "learning_rate": 1.9997566338620652e-05, + "loss": 1.7927, + "step": 6123 + }, + { + "epoch": 0.07, + "grad_norm": 9.694912805625153, + "learning_rate": 1.9997553434204174e-05, + "loss": 1.6509, + "step": 6126 + }, + { + "epoch": 0.07, + "grad_norm": 36.01470401088418, + "learning_rate": 1.9997540495669653e-05, + "loss": 1.6222, + "step": 6129 + }, + { + "epoch": 0.07, + "grad_norm": 15.418652928751966, + "learning_rate": 1.9997527523017122e-05, + "loss": 1.5235, + "step": 6132 + }, + { + "epoch": 0.07, + "grad_norm": 67.31367718818002, + "learning_rate": 1.999751451624663e-05, + "loss": 1.9367, + "step": 6135 + }, + { + "epoch": 0.07, + "grad_norm": 22.475172693595965, + "learning_rate": 1.9997501475358226e-05, + "loss": 1.8654, + "step": 6138 + }, + { + "epoch": 0.07, + "grad_norm": 34.84360947835846, + "learning_rate": 1.999748840035195e-05, + "loss": 1.6388, + "step": 6141 + }, + { + "epoch": 0.07, + "grad_norm": 15.830275715232505, + "learning_rate": 1.999747529122784e-05, + "loss": 1.6323, + "step": 6144 + }, + { + "epoch": 0.07, + "grad_norm": 58.92207043459872, + "learning_rate": 1.9997462147985955e-05, + "loss": 2.0701, + "step": 6147 + }, + { + "epoch": 0.07, + "grad_norm": 13.953116822093508, + "learning_rate": 1.999744897062633e-05, + "loss": 1.9704, + "step": 6150 + }, + { + "epoch": 0.07, + "grad_norm": 28.524974743398285, + "learning_rate": 1.999743575914902e-05, + "loss": 1.911, + "step": 6153 + }, + { + "epoch": 0.07, + "grad_norm": 11.984243994542245, + "learning_rate": 1.9997422513554056e-05, + "loss": 1.4395, + "step": 6156 + }, + { + "epoch": 0.07, + "grad_norm": 20.134064614531155, + "learning_rate": 1.999740923384149e-05, + "loss": 1.7061, + "step": 6159 + }, + { + "epoch": 0.07, + "grad_norm": 39.34596833860276, + "learning_rate": 1.9997395920011367e-05, + "loss": 1.7563, + "step": 6162 + }, + { + "epoch": 0.07, + "grad_norm": 46.5369581794084, + "learning_rate": 1.999738257206373e-05, + "loss": 1.4101, + "step": 6165 + }, + { + "epoch": 0.07, + "grad_norm": 35.08741511955619, + "learning_rate": 1.9997369189998636e-05, + "loss": 1.8097, + "step": 6168 + }, + { + "epoch": 0.07, + "grad_norm": 23.27468256001051, + "learning_rate": 1.9997355773816113e-05, + "loss": 1.7166, + "step": 6171 + }, + { + "epoch": 0.07, + "grad_norm": 7.136214334223149, + "learning_rate": 1.9997342323516222e-05, + "loss": 1.5604, + "step": 6174 + }, + { + "epoch": 0.07, + "grad_norm": 7.999281826547124, + "learning_rate": 1.9997328839099002e-05, + "loss": 1.7955, + "step": 6177 + }, + { + "epoch": 0.07, + "grad_norm": 12.992269375023222, + "learning_rate": 1.9997315320564498e-05, + "loss": 1.8422, + "step": 6180 + }, + { + "epoch": 0.07, + "grad_norm": 9.634729355919786, + "learning_rate": 1.9997301767912757e-05, + "loss": 1.8753, + "step": 6183 + }, + { + "epoch": 0.07, + "grad_norm": 42.90434418531661, + "learning_rate": 1.9997288181143826e-05, + "loss": 2.184, + "step": 6186 + }, + { + "epoch": 0.07, + "grad_norm": 14.88567038188506, + "learning_rate": 1.9997274560257757e-05, + "loss": 1.8934, + "step": 6189 + }, + { + "epoch": 0.07, + "grad_norm": 12.343106649415907, + "learning_rate": 1.9997260905254587e-05, + "loss": 1.8212, + "step": 6192 + }, + { + "epoch": 0.07, + "grad_norm": 40.32225760733637, + "learning_rate": 1.9997247216134362e-05, + "loss": 1.8457, + "step": 6195 + }, + { + "epoch": 0.07, + "grad_norm": 49.863415702949375, + "learning_rate": 1.9997233492897138e-05, + "loss": 1.6052, + "step": 6198 + }, + { + "epoch": 0.07, + "grad_norm": 22.358751494751182, + "learning_rate": 1.9997219735542958e-05, + "loss": 1.3742, + "step": 6201 + }, + { + "epoch": 0.07, + "grad_norm": 11.910471260536214, + "learning_rate": 1.9997205944071867e-05, + "loss": 1.9964, + "step": 6204 + }, + { + "epoch": 0.07, + "grad_norm": 6.236239828660839, + "learning_rate": 1.999719211848391e-05, + "loss": 1.901, + "step": 6207 + }, + { + "epoch": 0.07, + "grad_norm": 44.580456988272246, + "learning_rate": 1.9997178258779142e-05, + "loss": 1.9495, + "step": 6210 + }, + { + "epoch": 0.07, + "grad_norm": 101.87998060540423, + "learning_rate": 1.9997164364957603e-05, + "loss": 2.3184, + "step": 6213 + }, + { + "epoch": 0.07, + "grad_norm": 42.512677727305494, + "learning_rate": 1.9997150437019343e-05, + "loss": 1.5809, + "step": 6216 + }, + { + "epoch": 0.07, + "grad_norm": 16.17033650098145, + "learning_rate": 1.9997136474964412e-05, + "loss": 1.7549, + "step": 6219 + }, + { + "epoch": 0.07, + "grad_norm": 31.792964598678942, + "learning_rate": 1.9997122478792853e-05, + "loss": 1.4865, + "step": 6222 + }, + { + "epoch": 0.07, + "grad_norm": 41.16125569233075, + "learning_rate": 1.9997108448504715e-05, + "loss": 2.2469, + "step": 6225 + }, + { + "epoch": 0.07, + "grad_norm": 19.236014791580537, + "learning_rate": 1.9997094384100045e-05, + "loss": 1.6298, + "step": 6228 + }, + { + "epoch": 0.07, + "grad_norm": 89.46896009535469, + "learning_rate": 1.99970802855789e-05, + "loss": 1.9789, + "step": 6231 + }, + { + "epoch": 0.07, + "grad_norm": 7.686969455344747, + "learning_rate": 1.9997066152941315e-05, + "loss": 1.7409, + "step": 6234 + }, + { + "epoch": 0.07, + "grad_norm": 25.252248189818264, + "learning_rate": 1.9997051986187346e-05, + "loss": 1.9685, + "step": 6237 + }, + { + "epoch": 0.08, + "grad_norm": 12.441149536537687, + "learning_rate": 1.999703778531704e-05, + "loss": 1.3579, + "step": 6240 + }, + { + "epoch": 0.08, + "grad_norm": 32.37305673487017, + "learning_rate": 1.9997023550330443e-05, + "loss": 1.9806, + "step": 6243 + }, + { + "epoch": 0.08, + "grad_norm": 9.179790226547771, + "learning_rate": 1.999700928122761e-05, + "loss": 1.7873, + "step": 6246 + }, + { + "epoch": 0.08, + "grad_norm": 35.273616746606734, + "learning_rate": 1.999699497800858e-05, + "loss": 1.7773, + "step": 6249 + }, + { + "epoch": 0.08, + "grad_norm": 18.524320388371216, + "learning_rate": 1.9996980640673408e-05, + "loss": 1.5374, + "step": 6252 + }, + { + "epoch": 0.08, + "grad_norm": 9.768967622803478, + "learning_rate": 1.9996966269222143e-05, + "loss": 1.9808, + "step": 6255 + }, + { + "epoch": 0.08, + "grad_norm": 12.386593754131756, + "learning_rate": 1.999695186365483e-05, + "loss": 1.6937, + "step": 6258 + }, + { + "epoch": 0.08, + "grad_norm": 23.33526110123058, + "learning_rate": 1.9996937423971526e-05, + "loss": 1.519, + "step": 6261 + }, + { + "epoch": 0.08, + "grad_norm": 26.538036093234098, + "learning_rate": 1.9996922950172275e-05, + "loss": 1.4962, + "step": 6264 + }, + { + "epoch": 0.08, + "grad_norm": 12.873391015372924, + "learning_rate": 1.9996908442257125e-05, + "loss": 2.0601, + "step": 6267 + }, + { + "epoch": 0.08, + "grad_norm": 30.34792381496365, + "learning_rate": 1.9996893900226128e-05, + "loss": 1.5702, + "step": 6270 + }, + { + "epoch": 0.08, + "grad_norm": 32.182536316239364, + "learning_rate": 1.9996879324079333e-05, + "loss": 1.9598, + "step": 6273 + }, + { + "epoch": 0.08, + "grad_norm": 31.68803267375115, + "learning_rate": 1.999686471381679e-05, + "loss": 1.6102, + "step": 6276 + }, + { + "epoch": 0.08, + "grad_norm": 10.097917639434934, + "learning_rate": 1.999685006943855e-05, + "loss": 1.5181, + "step": 6279 + }, + { + "epoch": 0.08, + "grad_norm": 10.612300764916633, + "learning_rate": 1.999683539094466e-05, + "loss": 2.1879, + "step": 6282 + }, + { + "epoch": 0.08, + "grad_norm": 8.301187836683463, + "learning_rate": 1.9996820678335168e-05, + "loss": 1.9078, + "step": 6285 + }, + { + "epoch": 0.08, + "grad_norm": 12.214465313967969, + "learning_rate": 1.999680593161013e-05, + "loss": 1.7651, + "step": 6288 + }, + { + "epoch": 0.08, + "grad_norm": 40.556721739386916, + "learning_rate": 1.9996791150769598e-05, + "loss": 1.4736, + "step": 6291 + }, + { + "epoch": 0.08, + "grad_norm": 47.67655163193122, + "learning_rate": 1.9996776335813614e-05, + "loss": 1.9064, + "step": 6294 + }, + { + "epoch": 0.08, + "grad_norm": 8.263712422662573, + "learning_rate": 1.9996761486742236e-05, + "loss": 1.6045, + "step": 6297 + }, + { + "epoch": 0.08, + "grad_norm": 11.435743794939588, + "learning_rate": 1.999674660355551e-05, + "loss": 1.1745, + "step": 6300 + }, + { + "epoch": 0.08, + "grad_norm": 15.722534063843145, + "learning_rate": 1.999673168625349e-05, + "loss": 1.499, + "step": 6303 + }, + { + "epoch": 0.08, + "grad_norm": 8.246974352631483, + "learning_rate": 1.9996716734836225e-05, + "loss": 1.8469, + "step": 6306 + }, + { + "epoch": 0.08, + "grad_norm": 8.4579878958239, + "learning_rate": 1.9996701749303767e-05, + "loss": 1.8109, + "step": 6309 + }, + { + "epoch": 0.08, + "grad_norm": 9.194920556911674, + "learning_rate": 1.999668672965617e-05, + "loss": 1.6597, + "step": 6312 + }, + { + "epoch": 0.08, + "grad_norm": 74.9168737128641, + "learning_rate": 1.9996671675893476e-05, + "loss": 1.9425, + "step": 6315 + }, + { + "epoch": 0.08, + "grad_norm": 25.366731337155525, + "learning_rate": 1.9996656588015745e-05, + "loss": 1.8346, + "step": 6318 + }, + { + "epoch": 0.08, + "grad_norm": 15.5262848460894, + "learning_rate": 1.9996641466023026e-05, + "loss": 1.8005, + "step": 6321 + }, + { + "epoch": 0.08, + "grad_norm": 33.443899851606616, + "learning_rate": 1.999662630991537e-05, + "loss": 1.9645, + "step": 6324 + }, + { + "epoch": 0.08, + "grad_norm": 6.35602876034418, + "learning_rate": 1.999661111969283e-05, + "loss": 1.7807, + "step": 6327 + }, + { + "epoch": 0.08, + "grad_norm": 32.51291328256724, + "learning_rate": 1.9996595895355456e-05, + "loss": 1.8759, + "step": 6330 + }, + { + "epoch": 0.08, + "grad_norm": 44.16222873228755, + "learning_rate": 1.9996580636903304e-05, + "loss": 1.5922, + "step": 6333 + }, + { + "epoch": 0.08, + "grad_norm": 38.88830668842668, + "learning_rate": 1.999656534433642e-05, + "loss": 1.7898, + "step": 6336 + }, + { + "epoch": 0.08, + "grad_norm": 45.0890252261434, + "learning_rate": 1.9996550017654858e-05, + "loss": 1.7306, + "step": 6339 + }, + { + "epoch": 0.08, + "grad_norm": 42.29728781178425, + "learning_rate": 1.9996534656858672e-05, + "loss": 1.9711, + "step": 6342 + }, + { + "epoch": 0.08, + "grad_norm": 30.113369907178637, + "learning_rate": 1.9996519261947916e-05, + "loss": 1.7187, + "step": 6345 + }, + { + "epoch": 0.08, + "grad_norm": 16.877261767020872, + "learning_rate": 1.999650383292264e-05, + "loss": 1.7812, + "step": 6348 + }, + { + "epoch": 0.08, + "grad_norm": 103.454514059647, + "learning_rate": 1.9996488369782897e-05, + "loss": 2.0605, + "step": 6351 + }, + { + "epoch": 0.08, + "grad_norm": 5.305628723139304, + "learning_rate": 1.999647287252874e-05, + "loss": 1.6467, + "step": 6354 + }, + { + "epoch": 0.08, + "grad_norm": 37.27762155206476, + "learning_rate": 1.999645734116022e-05, + "loss": 1.8986, + "step": 6357 + }, + { + "epoch": 0.08, + "grad_norm": 51.12236304771572, + "learning_rate": 1.9996441775677394e-05, + "loss": 1.5685, + "step": 6360 + }, + { + "epoch": 0.08, + "grad_norm": 5.6621504704572025, + "learning_rate": 1.9996426176080313e-05, + "loss": 1.9523, + "step": 6363 + }, + { + "epoch": 0.08, + "grad_norm": 40.08224611382125, + "learning_rate": 1.9996410542369028e-05, + "loss": 2.0083, + "step": 6366 + }, + { + "epoch": 0.08, + "grad_norm": 12.371413870052567, + "learning_rate": 1.9996394874543596e-05, + "loss": 1.6181, + "step": 6369 + }, + { + "epoch": 0.08, + "grad_norm": 5.280290907805573, + "learning_rate": 1.9996379172604067e-05, + "loss": 1.7978, + "step": 6372 + }, + { + "epoch": 0.08, + "grad_norm": 31.902870409605686, + "learning_rate": 1.99963634365505e-05, + "loss": 1.7006, + "step": 6375 + }, + { + "epoch": 0.08, + "grad_norm": 17.89829743994418, + "learning_rate": 1.9996347666382943e-05, + "loss": 1.847, + "step": 6378 + }, + { + "epoch": 0.08, + "grad_norm": 23.625755046237046, + "learning_rate": 1.9996331862101455e-05, + "loss": 2.0645, + "step": 6381 + }, + { + "epoch": 0.08, + "grad_norm": 20.452658082351228, + "learning_rate": 1.9996316023706085e-05, + "loss": 1.6032, + "step": 6384 + }, + { + "epoch": 0.08, + "grad_norm": 9.789127276110388, + "learning_rate": 1.9996300151196886e-05, + "loss": 1.827, + "step": 6387 + }, + { + "epoch": 0.08, + "grad_norm": 5.7495239903056214, + "learning_rate": 1.999628424457392e-05, + "loss": 1.7367, + "step": 6390 + }, + { + "epoch": 0.08, + "grad_norm": 50.58752356391413, + "learning_rate": 1.9996268303837233e-05, + "loss": 1.7322, + "step": 6393 + }, + { + "epoch": 0.08, + "grad_norm": 22.27982030420816, + "learning_rate": 1.9996252328986884e-05, + "loss": 1.936, + "step": 6396 + }, + { + "epoch": 0.08, + "grad_norm": 20.25053360628043, + "learning_rate": 1.9996236320022927e-05, + "loss": 1.9814, + "step": 6399 + }, + { + "epoch": 0.08, + "grad_norm": 47.04879988905278, + "learning_rate": 1.9996220276945416e-05, + "loss": 1.538, + "step": 6402 + }, + { + "epoch": 0.08, + "grad_norm": 19.726487496552295, + "learning_rate": 1.999620419975441e-05, + "loss": 1.7087, + "step": 6405 + }, + { + "epoch": 0.08, + "grad_norm": 18.825519438303168, + "learning_rate": 1.9996188088449952e-05, + "loss": 1.8273, + "step": 6408 + }, + { + "epoch": 0.08, + "grad_norm": 9.024515262294308, + "learning_rate": 1.9996171943032108e-05, + "loss": 1.9351, + "step": 6411 + }, + { + "epoch": 0.08, + "grad_norm": 11.738386134962852, + "learning_rate": 1.999615576350093e-05, + "loss": 1.7206, + "step": 6414 + }, + { + "epoch": 0.08, + "grad_norm": 6.221064442410105, + "learning_rate": 1.9996139549856472e-05, + "loss": 1.7499, + "step": 6417 + }, + { + "epoch": 0.08, + "grad_norm": 14.247337996186014, + "learning_rate": 1.9996123302098793e-05, + "loss": 1.7794, + "step": 6420 + }, + { + "epoch": 0.08, + "grad_norm": 13.290049091866388, + "learning_rate": 1.9996107020227945e-05, + "loss": 1.8385, + "step": 6423 + }, + { + "epoch": 0.08, + "grad_norm": 41.734526834442434, + "learning_rate": 1.999609070424398e-05, + "loss": 2.4945, + "step": 6426 + }, + { + "epoch": 0.08, + "grad_norm": 58.86364699066437, + "learning_rate": 1.9996074354146964e-05, + "loss": 2.1654, + "step": 6429 + }, + { + "epoch": 0.08, + "grad_norm": 12.925556096220824, + "learning_rate": 1.9996057969936942e-05, + "loss": 2.0534, + "step": 6432 + }, + { + "epoch": 0.08, + "grad_norm": 46.28665318782381, + "learning_rate": 1.999604155161398e-05, + "loss": 1.7659, + "step": 6435 + }, + { + "epoch": 0.08, + "grad_norm": 34.71321070788826, + "learning_rate": 1.9996025099178124e-05, + "loss": 2.149, + "step": 6438 + }, + { + "epoch": 0.08, + "grad_norm": 43.5514493863995, + "learning_rate": 1.9996008612629435e-05, + "loss": 1.8398, + "step": 6441 + }, + { + "epoch": 0.08, + "grad_norm": 11.224240289516466, + "learning_rate": 1.9995992091967972e-05, + "loss": 1.783, + "step": 6444 + }, + { + "epoch": 0.08, + "grad_norm": 93.37790277184094, + "learning_rate": 1.9995975537193785e-05, + "loss": 1.71, + "step": 6447 + }, + { + "epoch": 0.08, + "grad_norm": 14.801321497955682, + "learning_rate": 1.999595894830694e-05, + "loss": 1.8409, + "step": 6450 + }, + { + "epoch": 0.08, + "grad_norm": 29.82851250406418, + "learning_rate": 1.9995942325307484e-05, + "loss": 2.0874, + "step": 6453 + }, + { + "epoch": 0.08, + "grad_norm": 46.43386933941382, + "learning_rate": 1.9995925668195474e-05, + "loss": 1.8877, + "step": 6456 + }, + { + "epoch": 0.08, + "grad_norm": 30.79425034305022, + "learning_rate": 1.9995908976970974e-05, + "loss": 1.8929, + "step": 6459 + }, + { + "epoch": 0.08, + "grad_norm": 50.30237673013949, + "learning_rate": 1.9995892251634038e-05, + "loss": 2.0788, + "step": 6462 + }, + { + "epoch": 0.08, + "grad_norm": 7.030562027420222, + "learning_rate": 1.999587549218472e-05, + "loss": 1.6995, + "step": 6465 + }, + { + "epoch": 0.08, + "grad_norm": 12.413375700974985, + "learning_rate": 1.999585869862308e-05, + "loss": 1.86, + "step": 6468 + }, + { + "epoch": 0.08, + "grad_norm": 22.962803897615565, + "learning_rate": 1.9995841870949175e-05, + "loss": 1.9554, + "step": 6471 + }, + { + "epoch": 0.08, + "grad_norm": 45.71374178704574, + "learning_rate": 1.999582500916306e-05, + "loss": 1.4518, + "step": 6474 + }, + { + "epoch": 0.08, + "grad_norm": 52.15562045474428, + "learning_rate": 1.99958081132648e-05, + "loss": 1.762, + "step": 6477 + }, + { + "epoch": 0.08, + "grad_norm": 15.765324662142607, + "learning_rate": 1.999579118325444e-05, + "loss": 1.9844, + "step": 6480 + }, + { + "epoch": 0.08, + "grad_norm": 19.147386976216758, + "learning_rate": 1.9995774219132053e-05, + "loss": 1.8289, + "step": 6483 + }, + { + "epoch": 0.08, + "grad_norm": 8.355860356387254, + "learning_rate": 1.9995757220897683e-05, + "loss": 1.5016, + "step": 6486 + }, + { + "epoch": 0.08, + "grad_norm": 9.795724903221732, + "learning_rate": 1.99957401885514e-05, + "loss": 1.2585, + "step": 6489 + }, + { + "epoch": 0.08, + "grad_norm": 60.497488692402484, + "learning_rate": 1.9995723122093252e-05, + "loss": 1.9033, + "step": 6492 + }, + { + "epoch": 0.08, + "grad_norm": 26.01356945666927, + "learning_rate": 1.9995706021523302e-05, + "loss": 1.7064, + "step": 6495 + }, + { + "epoch": 0.08, + "grad_norm": 18.520060587849148, + "learning_rate": 1.9995688886841608e-05, + "loss": 1.6237, + "step": 6498 + }, + { + "epoch": 0.08, + "grad_norm": 23.056557653486156, + "learning_rate": 1.999567171804823e-05, + "loss": 1.8844, + "step": 6501 + }, + { + "epoch": 0.08, + "grad_norm": 11.387925665097884, + "learning_rate": 1.9995654515143222e-05, + "loss": 1.7424, + "step": 6504 + }, + { + "epoch": 0.08, + "grad_norm": 36.54014964112843, + "learning_rate": 1.9995637278126646e-05, + "loss": 2.0261, + "step": 6507 + }, + { + "epoch": 0.08, + "grad_norm": 13.171703731904882, + "learning_rate": 1.999562000699856e-05, + "loss": 1.8897, + "step": 6510 + }, + { + "epoch": 0.08, + "grad_norm": 11.49481709088908, + "learning_rate": 1.9995602701759025e-05, + "loss": 1.3732, + "step": 6513 + }, + { + "epoch": 0.08, + "grad_norm": 12.232782693860077, + "learning_rate": 1.9995585362408096e-05, + "loss": 1.848, + "step": 6516 + }, + { + "epoch": 0.08, + "grad_norm": 5.281372829014973, + "learning_rate": 1.9995567988945835e-05, + "loss": 2.1459, + "step": 6519 + }, + { + "epoch": 0.08, + "grad_norm": 7.214402026900007, + "learning_rate": 1.9995550581372304e-05, + "loss": 2.0613, + "step": 6522 + }, + { + "epoch": 0.08, + "grad_norm": 14.823744057946099, + "learning_rate": 1.9995533139687556e-05, + "loss": 1.8437, + "step": 6525 + }, + { + "epoch": 0.08, + "grad_norm": 31.55240403301301, + "learning_rate": 1.9995515663891657e-05, + "loss": 1.6318, + "step": 6528 + }, + { + "epoch": 0.08, + "grad_norm": 12.453649958221574, + "learning_rate": 1.9995498153984664e-05, + "loss": 1.9312, + "step": 6531 + }, + { + "epoch": 0.08, + "grad_norm": 28.907747609515678, + "learning_rate": 1.999548060996663e-05, + "loss": 1.9902, + "step": 6534 + }, + { + "epoch": 0.08, + "grad_norm": 11.175227211102843, + "learning_rate": 1.9995463031837625e-05, + "loss": 1.5384, + "step": 6537 + }, + { + "epoch": 0.08, + "grad_norm": 15.322451712063016, + "learning_rate": 1.9995445419597704e-05, + "loss": 1.582, + "step": 6540 + }, + { + "epoch": 0.08, + "grad_norm": 67.33972540634397, + "learning_rate": 1.999542777324693e-05, + "loss": 1.9679, + "step": 6543 + }, + { + "epoch": 0.08, + "grad_norm": 35.142893291514945, + "learning_rate": 1.999541009278536e-05, + "loss": 1.6177, + "step": 6546 + }, + { + "epoch": 0.08, + "grad_norm": 8.004938720440304, + "learning_rate": 1.9995392378213055e-05, + "loss": 1.5929, + "step": 6549 + }, + { + "epoch": 0.08, + "grad_norm": 27.917140817064645, + "learning_rate": 1.999537462953008e-05, + "loss": 1.8066, + "step": 6552 + }, + { + "epoch": 0.08, + "grad_norm": 50.90031021411218, + "learning_rate": 1.9995356846736487e-05, + "loss": 2.0657, + "step": 6555 + }, + { + "epoch": 0.08, + "grad_norm": 66.81271059250062, + "learning_rate": 1.9995339029832343e-05, + "loss": 1.8015, + "step": 6558 + }, + { + "epoch": 0.08, + "grad_norm": 8.86887076171431, + "learning_rate": 1.9995321178817707e-05, + "loss": 1.481, + "step": 6561 + }, + { + "epoch": 0.08, + "grad_norm": 13.5795959319689, + "learning_rate": 1.999530329369264e-05, + "loss": 1.7482, + "step": 6564 + }, + { + "epoch": 0.08, + "grad_norm": 79.30363417895629, + "learning_rate": 1.9995285374457203e-05, + "loss": 1.6206, + "step": 6567 + }, + { + "epoch": 0.08, + "grad_norm": 5.349671019936564, + "learning_rate": 1.999526742111146e-05, + "loss": 1.5968, + "step": 6570 + }, + { + "epoch": 0.08, + "grad_norm": 29.38218509912368, + "learning_rate": 1.9995249433655467e-05, + "loss": 1.3542, + "step": 6573 + }, + { + "epoch": 0.08, + "grad_norm": 9.065709865998043, + "learning_rate": 1.9995231412089287e-05, + "loss": 2.2996, + "step": 6576 + }, + { + "epoch": 0.08, + "grad_norm": 9.404360187392616, + "learning_rate": 1.9995213356412985e-05, + "loss": 1.9048, + "step": 6579 + }, + { + "epoch": 0.08, + "grad_norm": 13.267447896610582, + "learning_rate": 1.999519526662662e-05, + "loss": 1.773, + "step": 6582 + }, + { + "epoch": 0.08, + "grad_norm": 13.073033764495516, + "learning_rate": 1.999517714273025e-05, + "loss": 1.7746, + "step": 6585 + }, + { + "epoch": 0.08, + "grad_norm": 13.243342473735433, + "learning_rate": 1.9995158984723946e-05, + "loss": 1.9423, + "step": 6588 + }, + { + "epoch": 0.08, + "grad_norm": 27.259531509856625, + "learning_rate": 1.999514079260776e-05, + "loss": 1.3064, + "step": 6591 + }, + { + "epoch": 0.08, + "grad_norm": 40.94154522754161, + "learning_rate": 1.999512256638176e-05, + "loss": 1.7979, + "step": 6594 + }, + { + "epoch": 0.08, + "grad_norm": 44.47318496794785, + "learning_rate": 1.999510430604601e-05, + "loss": 1.7743, + "step": 6597 + }, + { + "epoch": 0.08, + "grad_norm": 3.86496327340951, + "learning_rate": 1.9995086011600566e-05, + "loss": 1.8444, + "step": 6600 + }, + { + "epoch": 0.08, + "grad_norm": 16.467998179462004, + "learning_rate": 1.9995067683045496e-05, + "loss": 1.7859, + "step": 6603 + }, + { + "epoch": 0.08, + "grad_norm": 9.043378183998875, + "learning_rate": 1.9995049320380856e-05, + "loss": 1.9313, + "step": 6606 + }, + { + "epoch": 0.08, + "grad_norm": 26.370480562914057, + "learning_rate": 1.9995030923606714e-05, + "loss": 1.7651, + "step": 6609 + }, + { + "epoch": 0.08, + "grad_norm": 5.1366198008462085, + "learning_rate": 1.9995012492723135e-05, + "loss": 1.9491, + "step": 6612 + }, + { + "epoch": 0.08, + "grad_norm": 44.362428191708545, + "learning_rate": 1.999499402773018e-05, + "loss": 1.7243, + "step": 6615 + }, + { + "epoch": 0.08, + "grad_norm": 10.351924093051931, + "learning_rate": 1.9994975528627905e-05, + "loss": 1.8139, + "step": 6618 + }, + { + "epoch": 0.08, + "grad_norm": 11.843422998070013, + "learning_rate": 1.9994956995416383e-05, + "loss": 1.9093, + "step": 6621 + }, + { + "epoch": 0.08, + "grad_norm": 8.861459033813643, + "learning_rate": 1.999493842809567e-05, + "loss": 1.5326, + "step": 6624 + }, + { + "epoch": 0.08, + "grad_norm": 12.517017742394348, + "learning_rate": 1.9994919826665833e-05, + "loss": 1.8989, + "step": 6627 + }, + { + "epoch": 0.08, + "grad_norm": 11.76250399140562, + "learning_rate": 1.9994901191126933e-05, + "loss": 2.0072, + "step": 6630 + }, + { + "epoch": 0.08, + "grad_norm": 9.37393810200894, + "learning_rate": 1.999488252147904e-05, + "loss": 1.7482, + "step": 6633 + }, + { + "epoch": 0.08, + "grad_norm": 6.067462017572313, + "learning_rate": 1.999486381772221e-05, + "loss": 1.6948, + "step": 6636 + }, + { + "epoch": 0.08, + "grad_norm": 22.99874893378358, + "learning_rate": 1.9994845079856508e-05, + "loss": 1.6363, + "step": 6639 + }, + { + "epoch": 0.08, + "grad_norm": 57.09845704645784, + "learning_rate": 1.9994826307882004e-05, + "loss": 1.9382, + "step": 6642 + }, + { + "epoch": 0.08, + "grad_norm": 25.35021397302533, + "learning_rate": 1.9994807501798757e-05, + "loss": 1.7429, + "step": 6645 + }, + { + "epoch": 0.08, + "grad_norm": 22.78874973640252, + "learning_rate": 1.999478866160683e-05, + "loss": 1.755, + "step": 6648 + }, + { + "epoch": 0.08, + "grad_norm": 150.99034814117502, + "learning_rate": 1.999476978730629e-05, + "loss": 1.8644, + "step": 6651 + }, + { + "epoch": 0.08, + "grad_norm": 5.435934498817336, + "learning_rate": 1.9994750878897206e-05, + "loss": 1.8309, + "step": 6654 + }, + { + "epoch": 0.08, + "grad_norm": 54.55231493251369, + "learning_rate": 1.999473193637963e-05, + "loss": 1.6331, + "step": 6657 + }, + { + "epoch": 0.08, + "grad_norm": 34.67890650629704, + "learning_rate": 1.999471295975364e-05, + "loss": 1.9678, + "step": 6660 + }, + { + "epoch": 0.08, + "grad_norm": 52.5251035011548, + "learning_rate": 1.999469394901929e-05, + "loss": 1.5726, + "step": 6663 + }, + { + "epoch": 0.08, + "grad_norm": 9.499157789055383, + "learning_rate": 1.999467490417665e-05, + "loss": 1.8647, + "step": 6666 + }, + { + "epoch": 0.08, + "grad_norm": 39.72482968265708, + "learning_rate": 1.9994655825225786e-05, + "loss": 1.8849, + "step": 6669 + }, + { + "epoch": 0.08, + "grad_norm": 15.676763483833438, + "learning_rate": 1.999463671216676e-05, + "loss": 1.9745, + "step": 6672 + }, + { + "epoch": 0.08, + "grad_norm": 57.43082784864519, + "learning_rate": 1.9994617564999643e-05, + "loss": 1.9789, + "step": 6675 + }, + { + "epoch": 0.08, + "grad_norm": 16.959262408510092, + "learning_rate": 1.9994598383724493e-05, + "loss": 1.6204, + "step": 6678 + }, + { + "epoch": 0.08, + "grad_norm": 21.501415879133916, + "learning_rate": 1.999457916834138e-05, + "loss": 1.5986, + "step": 6681 + }, + { + "epoch": 0.08, + "grad_norm": 15.63516042606551, + "learning_rate": 1.9994559918850368e-05, + "loss": 1.943, + "step": 6684 + }, + { + "epoch": 0.08, + "grad_norm": 39.03610259757805, + "learning_rate": 1.999454063525152e-05, + "loss": 2.0037, + "step": 6687 + }, + { + "epoch": 0.08, + "grad_norm": 27.99677389749355, + "learning_rate": 1.999452131754491e-05, + "loss": 1.7606, + "step": 6690 + }, + { + "epoch": 0.08, + "grad_norm": 19.290712533061374, + "learning_rate": 1.9994501965730597e-05, + "loss": 1.7228, + "step": 6693 + }, + { + "epoch": 0.08, + "grad_norm": 6.44530500696752, + "learning_rate": 1.999448257980865e-05, + "loss": 1.4574, + "step": 6696 + }, + { + "epoch": 0.08, + "grad_norm": 25.6628366076579, + "learning_rate": 1.999446315977913e-05, + "loss": 1.7991, + "step": 6699 + }, + { + "epoch": 0.08, + "grad_norm": 24.118734537424473, + "learning_rate": 1.9994443705642112e-05, + "loss": 1.4564, + "step": 6702 + }, + { + "epoch": 0.08, + "grad_norm": 24.487358350043998, + "learning_rate": 1.9994424217397652e-05, + "loss": 1.4087, + "step": 6705 + }, + { + "epoch": 0.08, + "grad_norm": 80.7682638369631, + "learning_rate": 1.9994404695045828e-05, + "loss": 1.4897, + "step": 6708 + }, + { + "epoch": 0.08, + "grad_norm": 4.2811305903835635, + "learning_rate": 1.9994385138586694e-05, + "loss": 1.4725, + "step": 6711 + }, + { + "epoch": 0.08, + "grad_norm": 12.43917869438641, + "learning_rate": 1.9994365548020333e-05, + "loss": 2.2963, + "step": 6714 + }, + { + "epoch": 0.08, + "grad_norm": 22.035367640292822, + "learning_rate": 1.9994345923346793e-05, + "loss": 1.8499, + "step": 6717 + }, + { + "epoch": 0.08, + "grad_norm": 43.36637811612897, + "learning_rate": 1.9994326264566154e-05, + "loss": 1.5298, + "step": 6720 + }, + { + "epoch": 0.08, + "grad_norm": 18.250753224865598, + "learning_rate": 1.999430657167848e-05, + "loss": 1.6253, + "step": 6723 + }, + { + "epoch": 0.08, + "grad_norm": 6.760458984350768, + "learning_rate": 1.9994286844683838e-05, + "loss": 1.9232, + "step": 6726 + }, + { + "epoch": 0.08, + "grad_norm": 6.323300404543237, + "learning_rate": 1.9994267083582293e-05, + "loss": 1.556, + "step": 6729 + }, + { + "epoch": 0.08, + "grad_norm": 13.571174311031168, + "learning_rate": 1.999424728837392e-05, + "loss": 1.5936, + "step": 6732 + }, + { + "epoch": 0.08, + "grad_norm": 40.59227410436269, + "learning_rate": 1.9994227459058772e-05, + "loss": 1.9941, + "step": 6735 + }, + { + "epoch": 0.08, + "grad_norm": 19.501260215349625, + "learning_rate": 1.999420759563693e-05, + "loss": 1.5326, + "step": 6738 + }, + { + "epoch": 0.08, + "grad_norm": 55.26665441612083, + "learning_rate": 1.999418769810846e-05, + "loss": 1.8207, + "step": 6741 + }, + { + "epoch": 0.08, + "grad_norm": 8.186990560500297, + "learning_rate": 1.999416776647342e-05, + "loss": 2.4495, + "step": 6744 + }, + { + "epoch": 0.08, + "grad_norm": 16.948117674190232, + "learning_rate": 1.999414780073189e-05, + "loss": 1.3711, + "step": 6747 + }, + { + "epoch": 0.08, + "grad_norm": 37.371851641461774, + "learning_rate": 1.9994127800883935e-05, + "loss": 1.7344, + "step": 6750 + }, + { + "epoch": 0.08, + "grad_norm": 10.881580762291227, + "learning_rate": 1.9994107766929622e-05, + "loss": 1.5593, + "step": 6753 + }, + { + "epoch": 0.08, + "grad_norm": 7.5115630690919994, + "learning_rate": 1.9994087698869016e-05, + "loss": 1.8488, + "step": 6756 + }, + { + "epoch": 0.08, + "grad_norm": 4.725109420881986, + "learning_rate": 1.999406759670219e-05, + "loss": 1.7697, + "step": 6759 + }, + { + "epoch": 0.08, + "grad_norm": 9.715088110521354, + "learning_rate": 1.999404746042921e-05, + "loss": 1.5122, + "step": 6762 + }, + { + "epoch": 0.08, + "grad_norm": 15.228313149670315, + "learning_rate": 1.9994027290050145e-05, + "loss": 1.795, + "step": 6765 + }, + { + "epoch": 0.08, + "grad_norm": 21.997341094229043, + "learning_rate": 1.9994007085565067e-05, + "loss": 1.8431, + "step": 6768 + }, + { + "epoch": 0.08, + "grad_norm": 4.928465654305643, + "learning_rate": 1.999398684697404e-05, + "loss": 1.7857, + "step": 6771 + }, + { + "epoch": 0.08, + "grad_norm": 28.939883835957875, + "learning_rate": 1.9993966574277143e-05, + "loss": 1.5503, + "step": 6774 + }, + { + "epoch": 0.08, + "grad_norm": 56.52826655066743, + "learning_rate": 1.9993946267474432e-05, + "loss": 1.5669, + "step": 6777 + }, + { + "epoch": 0.08, + "grad_norm": 43.239326518360585, + "learning_rate": 1.999392592656598e-05, + "loss": 1.6463, + "step": 6780 + }, + { + "epoch": 0.08, + "grad_norm": 20.513613675141666, + "learning_rate": 1.9993905551551862e-05, + "loss": 1.5417, + "step": 6783 + }, + { + "epoch": 0.08, + "grad_norm": 17.321544821640096, + "learning_rate": 1.9993885142432145e-05, + "loss": 1.8805, + "step": 6786 + }, + { + "epoch": 0.08, + "grad_norm": 85.34938380110592, + "learning_rate": 1.9993864699206894e-05, + "loss": 1.9028, + "step": 6789 + }, + { + "epoch": 0.08, + "grad_norm": 38.35770293749208, + "learning_rate": 1.9993844221876186e-05, + "loss": 1.7752, + "step": 6792 + }, + { + "epoch": 0.08, + "grad_norm": 13.939244343658137, + "learning_rate": 1.9993823710440087e-05, + "loss": 1.5164, + "step": 6795 + }, + { + "epoch": 0.08, + "grad_norm": 20.974019151581118, + "learning_rate": 1.9993803164898666e-05, + "loss": 1.8967, + "step": 6798 + }, + { + "epoch": 0.08, + "grad_norm": 29.719902784993224, + "learning_rate": 1.9993782585251996e-05, + "loss": 1.6495, + "step": 6801 + }, + { + "epoch": 0.08, + "grad_norm": 12.513235936765788, + "learning_rate": 1.9993761971500144e-05, + "loss": 1.7515, + "step": 6804 + }, + { + "epoch": 0.08, + "grad_norm": 40.66367004038342, + "learning_rate": 1.999374132364318e-05, + "loss": 2.3408, + "step": 6807 + }, + { + "epoch": 0.08, + "grad_norm": 61.27445437789141, + "learning_rate": 1.9993720641681183e-05, + "loss": 2.0049, + "step": 6810 + }, + { + "epoch": 0.08, + "grad_norm": 50.51693362037939, + "learning_rate": 1.9993699925614215e-05, + "loss": 1.4329, + "step": 6813 + }, + { + "epoch": 0.08, + "grad_norm": 17.477439652705925, + "learning_rate": 1.9993679175442347e-05, + "loss": 1.4946, + "step": 6816 + }, + { + "epoch": 0.08, + "grad_norm": 285.4777180250203, + "learning_rate": 1.9993658391165653e-05, + "loss": 1.7652, + "step": 6819 + }, + { + "epoch": 0.08, + "grad_norm": 12.518434691534452, + "learning_rate": 1.99936375727842e-05, + "loss": 2.2176, + "step": 6822 + }, + { + "epoch": 0.08, + "grad_norm": 42.53274592397526, + "learning_rate": 1.9993616720298067e-05, + "loss": 1.439, + "step": 6825 + }, + { + "epoch": 0.08, + "grad_norm": 25.39068318602163, + "learning_rate": 1.9993595833707314e-05, + "loss": 1.8858, + "step": 6828 + }, + { + "epoch": 0.08, + "grad_norm": 31.642484721251762, + "learning_rate": 1.9993574913012023e-05, + "loss": 1.765, + "step": 6831 + }, + { + "epoch": 0.08, + "grad_norm": 29.43839340839827, + "learning_rate": 1.999355395821226e-05, + "loss": 1.7406, + "step": 6834 + }, + { + "epoch": 0.08, + "grad_norm": 19.96915037157472, + "learning_rate": 1.999353296930809e-05, + "loss": 1.8677, + "step": 6837 + }, + { + "epoch": 0.08, + "grad_norm": 6.052736462966983, + "learning_rate": 1.99935119462996e-05, + "loss": 1.5848, + "step": 6840 + }, + { + "epoch": 0.08, + "grad_norm": 18.552818078853036, + "learning_rate": 1.999349088918685e-05, + "loss": 1.943, + "step": 6843 + }, + { + "epoch": 0.08, + "grad_norm": 88.83163241539964, + "learning_rate": 1.9993469797969912e-05, + "loss": 1.8356, + "step": 6846 + }, + { + "epoch": 0.08, + "grad_norm": 20.711305396310472, + "learning_rate": 1.9993448672648866e-05, + "loss": 1.6944, + "step": 6849 + }, + { + "epoch": 0.08, + "grad_norm": 7.8020309353112856, + "learning_rate": 1.9993427513223776e-05, + "loss": 1.4644, + "step": 6852 + }, + { + "epoch": 0.08, + "grad_norm": 15.682511617159017, + "learning_rate": 1.999340631969472e-05, + "loss": 1.6858, + "step": 6855 + }, + { + "epoch": 0.08, + "grad_norm": 24.60719447335025, + "learning_rate": 1.9993385092061768e-05, + "loss": 1.9894, + "step": 6858 + }, + { + "epoch": 0.08, + "grad_norm": 12.514918671481766, + "learning_rate": 1.999336383032499e-05, + "loss": 2.0651, + "step": 6861 + }, + { + "epoch": 0.08, + "grad_norm": 4.721883677441648, + "learning_rate": 1.9993342534484466e-05, + "loss": 2.2657, + "step": 6864 + }, + { + "epoch": 0.08, + "grad_norm": 11.362374237206689, + "learning_rate": 1.999332120454026e-05, + "loss": 1.8945, + "step": 6867 + }, + { + "epoch": 0.08, + "grad_norm": 11.297407517698595, + "learning_rate": 1.9993299840492448e-05, + "loss": 1.3246, + "step": 6870 + }, + { + "epoch": 0.08, + "grad_norm": 39.06574061240937, + "learning_rate": 1.9993278442341104e-05, + "loss": 2.0364, + "step": 6873 + }, + { + "epoch": 0.08, + "grad_norm": 16.279826427306798, + "learning_rate": 1.99932570100863e-05, + "loss": 1.9276, + "step": 6876 + }, + { + "epoch": 0.08, + "grad_norm": 11.259260611138602, + "learning_rate": 1.9993235543728107e-05, + "loss": 1.7405, + "step": 6879 + }, + { + "epoch": 0.08, + "grad_norm": 14.396211738494408, + "learning_rate": 1.99932140432666e-05, + "loss": 1.5556, + "step": 6882 + }, + { + "epoch": 0.08, + "grad_norm": 19.15138397477753, + "learning_rate": 1.999319250870186e-05, + "loss": 1.3931, + "step": 6885 + }, + { + "epoch": 0.08, + "grad_norm": 17.631403324965422, + "learning_rate": 1.9993170940033944e-05, + "loss": 1.8167, + "step": 6888 + }, + { + "epoch": 0.08, + "grad_norm": 7.439072214498939, + "learning_rate": 1.9993149337262943e-05, + "loss": 2.0367, + "step": 6891 + }, + { + "epoch": 0.08, + "grad_norm": 53.69895792876965, + "learning_rate": 1.9993127700388922e-05, + "loss": 1.904, + "step": 6894 + }, + { + "epoch": 0.08, + "grad_norm": 14.137678158852369, + "learning_rate": 1.9993106029411952e-05, + "loss": 1.7016, + "step": 6897 + }, + { + "epoch": 0.08, + "grad_norm": 28.224527260419155, + "learning_rate": 1.9993084324332114e-05, + "loss": 1.9584, + "step": 6900 + }, + { + "epoch": 0.08, + "grad_norm": 20.948914330737598, + "learning_rate": 1.9993062585149475e-05, + "loss": 1.8405, + "step": 6903 + }, + { + "epoch": 0.08, + "grad_norm": 12.836837822883572, + "learning_rate": 1.9993040811864115e-05, + "loss": 2.1968, + "step": 6906 + }, + { + "epoch": 0.08, + "grad_norm": 16.418783316517676, + "learning_rate": 1.999301900447611e-05, + "loss": 1.5617, + "step": 6909 + }, + { + "epoch": 0.08, + "grad_norm": 36.14067071425663, + "learning_rate": 1.9992997162985522e-05, + "loss": 1.786, + "step": 6912 + }, + { + "epoch": 0.08, + "grad_norm": 17.30148690631737, + "learning_rate": 1.999297528739244e-05, + "loss": 1.9325, + "step": 6915 + }, + { + "epoch": 0.08, + "grad_norm": 20.449614071297862, + "learning_rate": 1.9992953377696932e-05, + "loss": 1.8791, + "step": 6918 + }, + { + "epoch": 0.08, + "grad_norm": 7.5225287317671, + "learning_rate": 1.9992931433899076e-05, + "loss": 1.7926, + "step": 6921 + }, + { + "epoch": 0.08, + "grad_norm": 117.89322491066267, + "learning_rate": 1.9992909455998938e-05, + "loss": 1.9471, + "step": 6924 + }, + { + "epoch": 0.08, + "grad_norm": 33.69680660576673, + "learning_rate": 1.9992887443996606e-05, + "loss": 1.5155, + "step": 6927 + }, + { + "epoch": 0.08, + "grad_norm": 28.109086903759103, + "learning_rate": 1.9992865397892145e-05, + "loss": 2.3076, + "step": 6930 + }, + { + "epoch": 0.08, + "grad_norm": 25.857420381478445, + "learning_rate": 1.9992843317685633e-05, + "loss": 1.8545, + "step": 6933 + }, + { + "epoch": 0.08, + "grad_norm": 9.097267453078297, + "learning_rate": 1.9992821203377148e-05, + "loss": 1.463, + "step": 6936 + }, + { + "epoch": 0.08, + "grad_norm": 40.67945122394086, + "learning_rate": 1.9992799054966767e-05, + "loss": 1.6312, + "step": 6939 + }, + { + "epoch": 0.08, + "grad_norm": 87.06814386852126, + "learning_rate": 1.9992776872454555e-05, + "loss": 1.7246, + "step": 6942 + }, + { + "epoch": 0.08, + "grad_norm": 88.0079743269036, + "learning_rate": 1.99927546558406e-05, + "loss": 1.5651, + "step": 6945 + }, + { + "epoch": 0.08, + "grad_norm": 21.068095763911668, + "learning_rate": 1.999273240512497e-05, + "loss": 1.425, + "step": 6948 + }, + { + "epoch": 0.08, + "grad_norm": 9.514804818626335, + "learning_rate": 1.9992710120307745e-05, + "loss": 1.8267, + "step": 6951 + }, + { + "epoch": 0.08, + "grad_norm": 14.617732898894495, + "learning_rate": 1.9992687801389e-05, + "loss": 1.6991, + "step": 6954 + }, + { + "epoch": 0.08, + "grad_norm": 59.61661599136908, + "learning_rate": 1.999266544836881e-05, + "loss": 1.7042, + "step": 6957 + }, + { + "epoch": 0.08, + "grad_norm": 21.35967305369142, + "learning_rate": 1.999264306124725e-05, + "loss": 1.8855, + "step": 6960 + }, + { + "epoch": 0.08, + "grad_norm": 17.350187477969154, + "learning_rate": 1.9992620640024404e-05, + "loss": 1.8721, + "step": 6963 + }, + { + "epoch": 0.08, + "grad_norm": 14.111061633489077, + "learning_rate": 1.999259818470034e-05, + "loss": 1.4934, + "step": 6966 + }, + { + "epoch": 0.08, + "grad_norm": 5.022548773172483, + "learning_rate": 1.9992575695275137e-05, + "loss": 2.0389, + "step": 6969 + }, + { + "epoch": 0.08, + "grad_norm": 69.01218606161802, + "learning_rate": 1.9992553171748873e-05, + "loss": 1.4932, + "step": 6972 + }, + { + "epoch": 0.08, + "grad_norm": 12.779289647872949, + "learning_rate": 1.9992530614121625e-05, + "loss": 2.0375, + "step": 6975 + }, + { + "epoch": 0.08, + "grad_norm": 7.16727104916556, + "learning_rate": 1.999250802239347e-05, + "loss": 2.0757, + "step": 6978 + }, + { + "epoch": 0.08, + "grad_norm": 3.9530755185227244, + "learning_rate": 1.9992485396564483e-05, + "loss": 2.1292, + "step": 6981 + }, + { + "epoch": 0.08, + "grad_norm": 22.05207486924262, + "learning_rate": 1.9992462736634747e-05, + "loss": 1.7649, + "step": 6984 + }, + { + "epoch": 0.08, + "grad_norm": 9.275197800904223, + "learning_rate": 1.999244004260433e-05, + "loss": 1.9662, + "step": 6987 + }, + { + "epoch": 0.08, + "grad_norm": 36.87463015202065, + "learning_rate": 1.9992417314473313e-05, + "loss": 2.0404, + "step": 6990 + }, + { + "epoch": 0.08, + "grad_norm": 38.436984827114124, + "learning_rate": 1.9992394552241777e-05, + "loss": 1.7655, + "step": 6993 + }, + { + "epoch": 0.08, + "grad_norm": 12.965526518852366, + "learning_rate": 1.99923717559098e-05, + "loss": 1.782, + "step": 6996 + }, + { + "epoch": 0.08, + "grad_norm": 47.92187997425782, + "learning_rate": 1.9992348925477458e-05, + "loss": 1.5885, + "step": 6999 + }, + { + "epoch": 0.08, + "grad_norm": 15.486328622304148, + "learning_rate": 1.9992326060944823e-05, + "loss": 1.9399, + "step": 7002 + }, + { + "epoch": 0.08, + "grad_norm": 32.200652786567794, + "learning_rate": 1.999230316231198e-05, + "loss": 1.7444, + "step": 7005 + }, + { + "epoch": 0.08, + "grad_norm": 15.460656097495617, + "learning_rate": 1.9992280229579007e-05, + "loss": 2.1248, + "step": 7008 + }, + { + "epoch": 0.08, + "grad_norm": 7.257878693179694, + "learning_rate": 1.999225726274598e-05, + "loss": 1.4485, + "step": 7011 + }, + { + "epoch": 0.08, + "grad_norm": 21.277761092004884, + "learning_rate": 1.999223426181298e-05, + "loss": 1.7557, + "step": 7014 + }, + { + "epoch": 0.08, + "grad_norm": 35.81479366121766, + "learning_rate": 1.9992211226780083e-05, + "loss": 1.1713, + "step": 7017 + }, + { + "epoch": 0.08, + "grad_norm": 39.16894224066994, + "learning_rate": 1.999218815764737e-05, + "loss": 2.025, + "step": 7020 + }, + { + "epoch": 0.08, + "grad_norm": 10.543079105861517, + "learning_rate": 1.999216505441491e-05, + "loss": 1.3721, + "step": 7023 + }, + { + "epoch": 0.08, + "grad_norm": 12.826503986018418, + "learning_rate": 1.9992141917082796e-05, + "loss": 1.6707, + "step": 7026 + }, + { + "epoch": 0.08, + "grad_norm": 25.22124813685679, + "learning_rate": 1.99921187456511e-05, + "loss": 1.763, + "step": 7029 + }, + { + "epoch": 0.08, + "grad_norm": 47.191058944425656, + "learning_rate": 1.99920955401199e-05, + "loss": 1.9299, + "step": 7032 + }, + { + "epoch": 0.08, + "grad_norm": 38.21568826963168, + "learning_rate": 1.9992072300489283e-05, + "loss": 2.0317, + "step": 7035 + }, + { + "epoch": 0.08, + "grad_norm": 55.849956017572396, + "learning_rate": 1.9992049026759318e-05, + "loss": 1.8764, + "step": 7038 + }, + { + "epoch": 0.08, + "grad_norm": 18.462440355044276, + "learning_rate": 1.999202571893009e-05, + "loss": 1.8499, + "step": 7041 + }, + { + "epoch": 0.08, + "grad_norm": 17.0331151827383, + "learning_rate": 1.9992002377001676e-05, + "loss": 1.8035, + "step": 7044 + }, + { + "epoch": 0.08, + "grad_norm": 36.555984765264625, + "learning_rate": 1.999197900097416e-05, + "loss": 1.8414, + "step": 7047 + }, + { + "epoch": 0.08, + "grad_norm": 4.957615195377921, + "learning_rate": 1.9991955590847613e-05, + "loss": 1.4817, + "step": 7050 + }, + { + "epoch": 0.08, + "grad_norm": 12.30762281799608, + "learning_rate": 1.9991932146622127e-05, + "loss": 1.7413, + "step": 7053 + }, + { + "epoch": 0.08, + "grad_norm": 3.9621734212053177, + "learning_rate": 1.999190866829777e-05, + "loss": 2.0222, + "step": 7056 + }, + { + "epoch": 0.08, + "grad_norm": 8.544643093538209, + "learning_rate": 1.999188515587463e-05, + "loss": 1.9167, + "step": 7059 + }, + { + "epoch": 0.08, + "grad_norm": 4.70114170259726, + "learning_rate": 1.9991861609352786e-05, + "loss": 1.9577, + "step": 7062 + }, + { + "epoch": 0.08, + "grad_norm": 14.210327643857697, + "learning_rate": 1.9991838028732318e-05, + "loss": 1.7791, + "step": 7065 + }, + { + "epoch": 0.08, + "grad_norm": 28.642231128300402, + "learning_rate": 1.9991814414013305e-05, + "loss": 1.7533, + "step": 7068 + }, + { + "epoch": 0.09, + "grad_norm": 13.119212470811602, + "learning_rate": 1.9991790765195826e-05, + "loss": 1.9632, + "step": 7071 + }, + { + "epoch": 0.09, + "grad_norm": 14.390850912730993, + "learning_rate": 1.9991767082279968e-05, + "loss": 1.9391, + "step": 7074 + }, + { + "epoch": 0.09, + "grad_norm": 9.297162135058857, + "learning_rate": 1.9991743365265806e-05, + "loss": 1.8793, + "step": 7077 + }, + { + "epoch": 0.09, + "grad_norm": 51.67549287894254, + "learning_rate": 1.999171961415342e-05, + "loss": 1.7946, + "step": 7080 + }, + { + "epoch": 0.09, + "grad_norm": 19.93231773464896, + "learning_rate": 1.9991695828942902e-05, + "loss": 1.8389, + "step": 7083 + }, + { + "epoch": 0.09, + "grad_norm": 21.56560918288679, + "learning_rate": 1.999167200963432e-05, + "loss": 1.9526, + "step": 7086 + }, + { + "epoch": 0.09, + "grad_norm": 28.760158077184883, + "learning_rate": 1.9991648156227758e-05, + "loss": 1.951, + "step": 7089 + }, + { + "epoch": 0.09, + "grad_norm": 6.41415341920707, + "learning_rate": 1.9991624268723302e-05, + "loss": 1.7343, + "step": 7092 + }, + { + "epoch": 0.09, + "grad_norm": 22.206777779752542, + "learning_rate": 1.9991600347121034e-05, + "loss": 1.5735, + "step": 7095 + }, + { + "epoch": 0.09, + "grad_norm": 15.129566123144263, + "learning_rate": 1.999157639142103e-05, + "loss": 1.9012, + "step": 7098 + }, + { + "epoch": 0.09, + "grad_norm": 8.960498950215163, + "learning_rate": 1.9991552401623375e-05, + "loss": 1.6497, + "step": 7101 + }, + { + "epoch": 0.09, + "grad_norm": 14.472607234303519, + "learning_rate": 1.9991528377728153e-05, + "loss": 1.8133, + "step": 7104 + }, + { + "epoch": 0.09, + "grad_norm": 16.86834080648835, + "learning_rate": 1.999150431973544e-05, + "loss": 1.9245, + "step": 7107 + }, + { + "epoch": 0.09, + "grad_norm": 7.281802976481248, + "learning_rate": 1.9991480227645324e-05, + "loss": 1.4148, + "step": 7110 + }, + { + "epoch": 0.09, + "grad_norm": 49.74193748954867, + "learning_rate": 1.9991456101457884e-05, + "loss": 1.5493, + "step": 7113 + }, + { + "epoch": 0.09, + "grad_norm": 5.779945421624904, + "learning_rate": 1.9991431941173203e-05, + "loss": 1.276, + "step": 7116 + }, + { + "epoch": 0.09, + "grad_norm": 38.01648202557093, + "learning_rate": 1.9991407746791364e-05, + "loss": 1.9045, + "step": 7119 + }, + { + "epoch": 0.09, + "grad_norm": 14.176540888789154, + "learning_rate": 1.999138351831245e-05, + "loss": 1.4928, + "step": 7122 + }, + { + "epoch": 0.09, + "grad_norm": 41.427110277021406, + "learning_rate": 1.999135925573654e-05, + "loss": 1.6768, + "step": 7125 + }, + { + "epoch": 0.09, + "grad_norm": 8.833572207077017, + "learning_rate": 1.9991334959063723e-05, + "loss": 1.9601, + "step": 7128 + }, + { + "epoch": 0.09, + "grad_norm": 9.716163692678462, + "learning_rate": 1.999131062829408e-05, + "loss": 1.4716, + "step": 7131 + }, + { + "epoch": 0.09, + "grad_norm": 53.58864159891892, + "learning_rate": 1.9991286263427688e-05, + "loss": 1.4353, + "step": 7134 + }, + { + "epoch": 0.09, + "grad_norm": 41.36886865095879, + "learning_rate": 1.9991261864464637e-05, + "loss": 1.7712, + "step": 7137 + }, + { + "epoch": 0.09, + "grad_norm": 19.7959297819161, + "learning_rate": 1.999123743140501e-05, + "loss": 1.7484, + "step": 7140 + }, + { + "epoch": 0.09, + "grad_norm": 47.05543187599076, + "learning_rate": 1.999121296424889e-05, + "loss": 2.2777, + "step": 7143 + }, + { + "epoch": 0.09, + "grad_norm": 68.26250011274004, + "learning_rate": 1.999118846299635e-05, + "loss": 1.8368, + "step": 7146 + }, + { + "epoch": 0.09, + "grad_norm": 28.150150471969035, + "learning_rate": 1.9991163927647493e-05, + "loss": 1.5597, + "step": 7149 + }, + { + "epoch": 0.09, + "grad_norm": 5.16916291580887, + "learning_rate": 1.9991139358202386e-05, + "loss": 1.9148, + "step": 7152 + }, + { + "epoch": 0.09, + "grad_norm": 18.113532020607835, + "learning_rate": 1.9991114754661122e-05, + "loss": 1.7591, + "step": 7155 + }, + { + "epoch": 0.09, + "grad_norm": 19.628392236324288, + "learning_rate": 1.999109011702378e-05, + "loss": 1.7552, + "step": 7158 + }, + { + "epoch": 0.09, + "grad_norm": 30.591347765586757, + "learning_rate": 1.9991065445290445e-05, + "loss": 1.6958, + "step": 7161 + }, + { + "epoch": 0.09, + "grad_norm": 11.77796178901017, + "learning_rate": 1.9991040739461205e-05, + "loss": 1.5127, + "step": 7164 + }, + { + "epoch": 0.09, + "grad_norm": 6.203644684957812, + "learning_rate": 1.9991015999536144e-05, + "loss": 1.4849, + "step": 7167 + }, + { + "epoch": 0.09, + "grad_norm": 100.53530820871579, + "learning_rate": 1.999099122551534e-05, + "loss": 1.7971, + "step": 7170 + }, + { + "epoch": 0.09, + "grad_norm": 57.710693423667784, + "learning_rate": 1.9990966417398882e-05, + "loss": 1.3991, + "step": 7173 + }, + { + "epoch": 0.09, + "grad_norm": 7.022254082851858, + "learning_rate": 1.9990941575186858e-05, + "loss": 1.5749, + "step": 7176 + }, + { + "epoch": 0.09, + "grad_norm": 12.042334290890029, + "learning_rate": 1.9990916698879345e-05, + "loss": 1.7996, + "step": 7179 + }, + { + "epoch": 0.09, + "grad_norm": 13.045714637834969, + "learning_rate": 1.9990891788476435e-05, + "loss": 1.9292, + "step": 7182 + }, + { + "epoch": 0.09, + "grad_norm": 41.18615295310569, + "learning_rate": 1.9990866843978207e-05, + "loss": 1.8764, + "step": 7185 + }, + { + "epoch": 0.09, + "grad_norm": 17.85240993019384, + "learning_rate": 1.999084186538475e-05, + "loss": 1.6344, + "step": 7188 + }, + { + "epoch": 0.09, + "grad_norm": 3.5976404338603287, + "learning_rate": 1.9990816852696147e-05, + "loss": 1.5902, + "step": 7191 + }, + { + "epoch": 0.09, + "grad_norm": 16.544131488515895, + "learning_rate": 1.999079180591249e-05, + "loss": 1.6229, + "step": 7194 + }, + { + "epoch": 0.09, + "grad_norm": 8.406923818857598, + "learning_rate": 1.9990766725033855e-05, + "loss": 1.9696, + "step": 7197 + }, + { + "epoch": 0.09, + "grad_norm": 17.172751528357672, + "learning_rate": 1.9990741610060332e-05, + "loss": 1.8158, + "step": 7200 + }, + { + "epoch": 0.09, + "grad_norm": 30.897996856691382, + "learning_rate": 1.9990716460992012e-05, + "loss": 1.8412, + "step": 7203 + }, + { + "epoch": 0.09, + "grad_norm": 12.327369084266069, + "learning_rate": 1.999069127782897e-05, + "loss": 1.6327, + "step": 7206 + }, + { + "epoch": 0.09, + "grad_norm": 50.02445308112298, + "learning_rate": 1.9990666060571294e-05, + "loss": 1.5236, + "step": 7209 + }, + { + "epoch": 0.09, + "grad_norm": 13.22015248627621, + "learning_rate": 1.9990640809219083e-05, + "loss": 1.7402, + "step": 7212 + }, + { + "epoch": 0.09, + "grad_norm": 6.11597098230312, + "learning_rate": 1.9990615523772407e-05, + "loss": 1.9609, + "step": 7215 + }, + { + "epoch": 0.09, + "grad_norm": 27.624436583583762, + "learning_rate": 1.9990590204231357e-05, + "loss": 1.4443, + "step": 7218 + }, + { + "epoch": 0.09, + "grad_norm": 8.361483354155926, + "learning_rate": 1.9990564850596025e-05, + "loss": 1.585, + "step": 7221 + }, + { + "epoch": 0.09, + "grad_norm": 7.930471254004371, + "learning_rate": 1.9990539462866495e-05, + "loss": 1.7649, + "step": 7224 + }, + { + "epoch": 0.09, + "grad_norm": 45.61217629992603, + "learning_rate": 1.999051404104285e-05, + "loss": 1.9854, + "step": 7227 + }, + { + "epoch": 0.09, + "grad_norm": 32.049850629287754, + "learning_rate": 1.9990488585125178e-05, + "loss": 1.6189, + "step": 7230 + }, + { + "epoch": 0.09, + "grad_norm": 34.389454802950475, + "learning_rate": 1.999046309511357e-05, + "loss": 1.9295, + "step": 7233 + }, + { + "epoch": 0.09, + "grad_norm": 13.058418105309524, + "learning_rate": 1.9990437571008107e-05, + "loss": 2.2059, + "step": 7236 + }, + { + "epoch": 0.09, + "grad_norm": 17.85734929925203, + "learning_rate": 1.9990412012808883e-05, + "loss": 1.9487, + "step": 7239 + }, + { + "epoch": 0.09, + "grad_norm": 34.046874423211996, + "learning_rate": 1.9990386420515977e-05, + "loss": 2.0381, + "step": 7242 + }, + { + "epoch": 0.09, + "grad_norm": 7.399241440074428, + "learning_rate": 1.9990360794129483e-05, + "loss": 1.7048, + "step": 7245 + }, + { + "epoch": 0.09, + "grad_norm": 98.9537607572386, + "learning_rate": 1.9990335133649483e-05, + "loss": 2.1248, + "step": 7248 + }, + { + "epoch": 0.09, + "grad_norm": 6.368532270872758, + "learning_rate": 1.999030943907607e-05, + "loss": 1.7659, + "step": 7251 + }, + { + "epoch": 0.09, + "grad_norm": 26.483935533709506, + "learning_rate": 1.9990283710409332e-05, + "loss": 1.9365, + "step": 7254 + }, + { + "epoch": 0.09, + "grad_norm": 89.0264894958497, + "learning_rate": 1.999025794764935e-05, + "loss": 1.799, + "step": 7257 + }, + { + "epoch": 0.09, + "grad_norm": 21.945319750725787, + "learning_rate": 1.999023215079622e-05, + "loss": 1.5288, + "step": 7260 + }, + { + "epoch": 0.09, + "grad_norm": 119.96034891070529, + "learning_rate": 1.9990206319850026e-05, + "loss": 1.5457, + "step": 7263 + }, + { + "epoch": 0.09, + "grad_norm": 17.58562880100715, + "learning_rate": 1.9990180454810854e-05, + "loss": 1.6475, + "step": 7266 + }, + { + "epoch": 0.09, + "grad_norm": 31.848270164606514, + "learning_rate": 1.9990154555678792e-05, + "loss": 1.6439, + "step": 7269 + }, + { + "epoch": 0.09, + "grad_norm": 11.926542191332322, + "learning_rate": 1.9990128622453935e-05, + "loss": 1.8243, + "step": 7272 + }, + { + "epoch": 0.09, + "grad_norm": 5.878586328821175, + "learning_rate": 1.999010265513637e-05, + "loss": 1.6065, + "step": 7275 + }, + { + "epoch": 0.09, + "grad_norm": 10.777785551573832, + "learning_rate": 1.9990076653726176e-05, + "loss": 1.7974, + "step": 7278 + }, + { + "epoch": 0.09, + "grad_norm": 8.161632212906808, + "learning_rate": 1.999005061822345e-05, + "loss": 1.8024, + "step": 7281 + }, + { + "epoch": 0.09, + "grad_norm": 45.67151471349731, + "learning_rate": 1.9990024548628284e-05, + "loss": 1.794, + "step": 7284 + }, + { + "epoch": 0.09, + "grad_norm": 44.04982905126092, + "learning_rate": 1.998999844494076e-05, + "loss": 1.5687, + "step": 7287 + }, + { + "epoch": 0.09, + "grad_norm": 11.447826812619084, + "learning_rate": 1.998997230716097e-05, + "loss": 1.9473, + "step": 7290 + }, + { + "epoch": 0.09, + "grad_norm": 16.237315008829952, + "learning_rate": 1.9989946135289006e-05, + "loss": 1.4373, + "step": 7293 + }, + { + "epoch": 0.09, + "grad_norm": 44.96939863819801, + "learning_rate": 1.9989919929324948e-05, + "loss": 2.0601, + "step": 7296 + }, + { + "epoch": 0.09, + "grad_norm": 25.353648243771126, + "learning_rate": 1.9989893689268894e-05, + "loss": 1.7337, + "step": 7299 + }, + { + "epoch": 0.09, + "grad_norm": 30.33506046694472, + "learning_rate": 1.9989867415120933e-05, + "loss": 1.7658, + "step": 7302 + }, + { + "epoch": 0.09, + "grad_norm": 5.229670471413795, + "learning_rate": 1.9989841106881153e-05, + "loss": 1.8985, + "step": 7305 + }, + { + "epoch": 0.09, + "grad_norm": 37.59430750340613, + "learning_rate": 1.9989814764549642e-05, + "loss": 1.7686, + "step": 7308 + }, + { + "epoch": 0.09, + "grad_norm": 19.058542841042673, + "learning_rate": 1.9989788388126492e-05, + "loss": 1.5062, + "step": 7311 + }, + { + "epoch": 0.09, + "grad_norm": 14.258339218134912, + "learning_rate": 1.9989761977611793e-05, + "loss": 1.6309, + "step": 7314 + }, + { + "epoch": 0.09, + "grad_norm": 15.944893419429095, + "learning_rate": 1.9989735533005635e-05, + "loss": 1.4785, + "step": 7317 + }, + { + "epoch": 0.09, + "grad_norm": 5.330390480128647, + "learning_rate": 1.9989709054308108e-05, + "loss": 1.6217, + "step": 7320 + }, + { + "epoch": 0.09, + "grad_norm": 49.20904615296256, + "learning_rate": 1.99896825415193e-05, + "loss": 1.7972, + "step": 7323 + }, + { + "epoch": 0.09, + "grad_norm": 34.06973768120745, + "learning_rate": 1.9989655994639304e-05, + "loss": 1.6579, + "step": 7326 + }, + { + "epoch": 0.09, + "grad_norm": 37.12751437569545, + "learning_rate": 1.9989629413668213e-05, + "loss": 1.57, + "step": 7329 + }, + { + "epoch": 0.09, + "grad_norm": 23.03464669799886, + "learning_rate": 1.998960279860611e-05, + "loss": 1.8123, + "step": 7332 + }, + { + "epoch": 0.09, + "grad_norm": 24.21553109665489, + "learning_rate": 1.99895761494531e-05, + "loss": 1.9938, + "step": 7335 + }, + { + "epoch": 0.09, + "grad_norm": 8.071946429953066, + "learning_rate": 1.9989549466209258e-05, + "loss": 1.9534, + "step": 7338 + }, + { + "epoch": 0.09, + "grad_norm": 443.873383420528, + "learning_rate": 1.9989522748874682e-05, + "loss": 1.5448, + "step": 7341 + }, + { + "epoch": 0.09, + "grad_norm": 16.712313339638, + "learning_rate": 1.9989495997449463e-05, + "loss": 1.6471, + "step": 7344 + }, + { + "epoch": 0.09, + "grad_norm": 31.731155400352897, + "learning_rate": 1.9989469211933695e-05, + "loss": 1.9105, + "step": 7347 + }, + { + "epoch": 0.09, + "grad_norm": 6.884548458318584, + "learning_rate": 1.9989442392327464e-05, + "loss": 1.407, + "step": 7350 + }, + { + "epoch": 0.09, + "grad_norm": 10.100542613437817, + "learning_rate": 1.9989415538630867e-05, + "loss": 1.893, + "step": 7353 + }, + { + "epoch": 0.09, + "grad_norm": 14.984582900817895, + "learning_rate": 1.9989388650843992e-05, + "loss": 1.5908, + "step": 7356 + }, + { + "epoch": 0.09, + "grad_norm": 32.11849844873194, + "learning_rate": 1.998936172896693e-05, + "loss": 1.974, + "step": 7359 + }, + { + "epoch": 0.09, + "grad_norm": 25.602829194907343, + "learning_rate": 1.9989334772999776e-05, + "loss": 1.788, + "step": 7362 + }, + { + "epoch": 0.09, + "grad_norm": 73.7472864276887, + "learning_rate": 1.998930778294262e-05, + "loss": 1.8204, + "step": 7365 + }, + { + "epoch": 0.09, + "grad_norm": 47.41309251734709, + "learning_rate": 1.9989280758795554e-05, + "loss": 1.8775, + "step": 7368 + }, + { + "epoch": 0.09, + "grad_norm": 23.50407210114214, + "learning_rate": 1.9989253700558674e-05, + "loss": 1.8678, + "step": 7371 + }, + { + "epoch": 0.09, + "grad_norm": 15.028455524591722, + "learning_rate": 1.998922660823207e-05, + "loss": 1.7472, + "step": 7374 + }, + { + "epoch": 0.09, + "grad_norm": 6.359726819214624, + "learning_rate": 1.998919948181583e-05, + "loss": 1.7323, + "step": 7377 + }, + { + "epoch": 0.09, + "grad_norm": 22.035449703315912, + "learning_rate": 1.9989172321310048e-05, + "loss": 1.981, + "step": 7380 + }, + { + "epoch": 0.09, + "grad_norm": 32.46045191135252, + "learning_rate": 1.9989145126714822e-05, + "loss": 1.9565, + "step": 7383 + }, + { + "epoch": 0.09, + "grad_norm": 3.9434255033239376, + "learning_rate": 1.9989117898030244e-05, + "loss": 1.6188, + "step": 7386 + }, + { + "epoch": 0.09, + "grad_norm": 32.18337368209329, + "learning_rate": 1.9989090635256402e-05, + "loss": 1.8946, + "step": 7389 + }, + { + "epoch": 0.09, + "grad_norm": 35.932436221012345, + "learning_rate": 1.9989063338393393e-05, + "loss": 1.2962, + "step": 7392 + }, + { + "epoch": 0.09, + "grad_norm": 32.54757078720825, + "learning_rate": 1.998903600744131e-05, + "loss": 1.8867, + "step": 7395 + }, + { + "epoch": 0.09, + "grad_norm": 88.40353505561784, + "learning_rate": 1.998900864240024e-05, + "loss": 1.8188, + "step": 7398 + }, + { + "epoch": 0.09, + "grad_norm": 20.708121362134115, + "learning_rate": 1.9988981243270284e-05, + "loss": 1.5295, + "step": 7401 + }, + { + "epoch": 0.09, + "grad_norm": 5.533553229783615, + "learning_rate": 1.9988953810051536e-05, + "loss": 1.4354, + "step": 7404 + }, + { + "epoch": 0.09, + "grad_norm": 22.210438607989634, + "learning_rate": 1.9988926342744085e-05, + "loss": 1.8958, + "step": 7407 + }, + { + "epoch": 0.09, + "grad_norm": 10.153608111105228, + "learning_rate": 1.9988898841348027e-05, + "loss": 1.3667, + "step": 7410 + }, + { + "epoch": 0.09, + "grad_norm": 24.6861340032382, + "learning_rate": 1.9988871305863453e-05, + "loss": 1.7965, + "step": 7413 + }, + { + "epoch": 0.09, + "grad_norm": 5.685421352739481, + "learning_rate": 1.998884373629046e-05, + "loss": 1.7918, + "step": 7416 + }, + { + "epoch": 0.09, + "grad_norm": 15.609362880982271, + "learning_rate": 1.9988816132629142e-05, + "loss": 1.7913, + "step": 7419 + }, + { + "epoch": 0.09, + "grad_norm": 10.184697695559414, + "learning_rate": 1.998878849487959e-05, + "loss": 1.8094, + "step": 7422 + }, + { + "epoch": 0.09, + "grad_norm": 21.02364950781939, + "learning_rate": 1.9988760823041903e-05, + "loss": 1.8364, + "step": 7425 + }, + { + "epoch": 0.09, + "grad_norm": 14.677319577105072, + "learning_rate": 1.9988733117116173e-05, + "loss": 1.9246, + "step": 7428 + }, + { + "epoch": 0.09, + "grad_norm": 27.625738839191392, + "learning_rate": 1.9988705377102494e-05, + "loss": 1.497, + "step": 7431 + }, + { + "epoch": 0.09, + "grad_norm": 6.122518672274905, + "learning_rate": 1.9988677603000964e-05, + "loss": 1.4757, + "step": 7434 + }, + { + "epoch": 0.09, + "grad_norm": 17.163905038529556, + "learning_rate": 1.9988649794811676e-05, + "loss": 1.7897, + "step": 7437 + }, + { + "epoch": 0.09, + "grad_norm": 20.949287116204253, + "learning_rate": 1.998862195253472e-05, + "loss": 1.9744, + "step": 7440 + }, + { + "epoch": 0.09, + "grad_norm": 20.53705585315733, + "learning_rate": 1.9988594076170197e-05, + "loss": 1.7109, + "step": 7443 + }, + { + "epoch": 0.09, + "grad_norm": 32.67121528976499, + "learning_rate": 1.99885661657182e-05, + "loss": 1.5627, + "step": 7446 + }, + { + "epoch": 0.09, + "grad_norm": 18.606109187650464, + "learning_rate": 1.9988538221178826e-05, + "loss": 1.6229, + "step": 7449 + }, + { + "epoch": 0.09, + "grad_norm": 30.78235421949749, + "learning_rate": 1.9988510242552168e-05, + "loss": 1.8146, + "step": 7452 + }, + { + "epoch": 0.09, + "grad_norm": 67.53632383941947, + "learning_rate": 1.9988482229838322e-05, + "loss": 1.792, + "step": 7455 + }, + { + "epoch": 0.09, + "grad_norm": 19.147703823038906, + "learning_rate": 1.9988454183037383e-05, + "loss": 1.96, + "step": 7458 + }, + { + "epoch": 0.09, + "grad_norm": 40.401194634067124, + "learning_rate": 1.998842610214945e-05, + "loss": 1.4816, + "step": 7461 + }, + { + "epoch": 0.09, + "grad_norm": 10.938106366896552, + "learning_rate": 1.9988397987174614e-05, + "loss": 1.6832, + "step": 7464 + }, + { + "epoch": 0.09, + "grad_norm": 23.073266023310325, + "learning_rate": 1.9988369838112974e-05, + "loss": 1.9041, + "step": 7467 + }, + { + "epoch": 0.09, + "grad_norm": 15.288463442775749, + "learning_rate": 1.998834165496463e-05, + "loss": 1.8348, + "step": 7470 + }, + { + "epoch": 0.09, + "grad_norm": 26.115943539950518, + "learning_rate": 1.9988313437729668e-05, + "loss": 1.7707, + "step": 7473 + }, + { + "epoch": 0.09, + "grad_norm": 5.426524809342874, + "learning_rate": 1.9988285186408193e-05, + "loss": 1.8586, + "step": 7476 + }, + { + "epoch": 0.09, + "grad_norm": 14.456322177553648, + "learning_rate": 1.99882569010003e-05, + "loss": 2.1473, + "step": 7479 + }, + { + "epoch": 0.09, + "grad_norm": 40.57096709268774, + "learning_rate": 1.9988228581506078e-05, + "loss": 1.8749, + "step": 7482 + }, + { + "epoch": 0.09, + "grad_norm": 14.980861602737706, + "learning_rate": 1.9988200227925632e-05, + "loss": 1.6366, + "step": 7485 + }, + { + "epoch": 0.09, + "grad_norm": 8.747334891962504, + "learning_rate": 1.998817184025906e-05, + "loss": 1.4004, + "step": 7488 + }, + { + "epoch": 0.09, + "grad_norm": 8.068107667583996, + "learning_rate": 1.998814341850645e-05, + "loss": 1.6023, + "step": 7491 + }, + { + "epoch": 0.09, + "grad_norm": 8.036264532810177, + "learning_rate": 1.9988114962667907e-05, + "loss": 1.6566, + "step": 7494 + }, + { + "epoch": 0.09, + "grad_norm": 9.841053120406212, + "learning_rate": 1.9988086472743526e-05, + "loss": 1.7121, + "step": 7497 + }, + { + "epoch": 0.09, + "grad_norm": 16.40144614331294, + "learning_rate": 1.9988057948733403e-05, + "loss": 1.6613, + "step": 7500 + }, + { + "epoch": 0.09, + "grad_norm": 11.801529333568892, + "learning_rate": 1.9988029390637635e-05, + "loss": 1.7166, + "step": 7503 + }, + { + "epoch": 0.09, + "grad_norm": 29.445668650427212, + "learning_rate": 1.998800079845632e-05, + "loss": 1.8151, + "step": 7506 + }, + { + "epoch": 0.09, + "grad_norm": 41.079257103906286, + "learning_rate": 1.998797217218956e-05, + "loss": 1.7785, + "step": 7509 + }, + { + "epoch": 0.09, + "grad_norm": 5.97057131726519, + "learning_rate": 1.9987943511837446e-05, + "loss": 1.6318, + "step": 7512 + }, + { + "epoch": 0.09, + "grad_norm": 6.191929551335712, + "learning_rate": 1.998791481740008e-05, + "loss": 1.6028, + "step": 7515 + }, + { + "epoch": 0.09, + "grad_norm": 20.773677584516115, + "learning_rate": 1.998788608887755e-05, + "loss": 1.5416, + "step": 7518 + }, + { + "epoch": 0.09, + "grad_norm": 52.02145048843069, + "learning_rate": 1.9987857326269974e-05, + "loss": 1.6539, + "step": 7521 + }, + { + "epoch": 0.09, + "grad_norm": 19.706800245146155, + "learning_rate": 1.998782852957743e-05, + "loss": 1.6787, + "step": 7524 + }, + { + "epoch": 0.09, + "grad_norm": 2.878712562121157, + "learning_rate": 1.998779969880003e-05, + "loss": 1.4647, + "step": 7527 + }, + { + "epoch": 0.09, + "grad_norm": 4.59307050011141, + "learning_rate": 1.9987770833937865e-05, + "loss": 1.7817, + "step": 7530 + }, + { + "epoch": 0.09, + "grad_norm": 36.03318257176185, + "learning_rate": 1.9987741934991038e-05, + "loss": 1.9181, + "step": 7533 + }, + { + "epoch": 0.09, + "grad_norm": 16.611119106395755, + "learning_rate": 1.9987713001959642e-05, + "loss": 1.8181, + "step": 7536 + }, + { + "epoch": 0.09, + "grad_norm": 14.010330211240095, + "learning_rate": 1.998768403484378e-05, + "loss": 1.6703, + "step": 7539 + }, + { + "epoch": 0.09, + "grad_norm": 3.515655479862844, + "learning_rate": 1.998765503364355e-05, + "loss": 1.5868, + "step": 7542 + }, + { + "epoch": 0.09, + "grad_norm": 20.619903245003037, + "learning_rate": 1.9987625998359053e-05, + "loss": 1.5052, + "step": 7545 + }, + { + "epoch": 0.09, + "grad_norm": 26.875962117772282, + "learning_rate": 1.9987596928990382e-05, + "loss": 2.0144, + "step": 7548 + }, + { + "epoch": 0.09, + "grad_norm": 14.134305948455486, + "learning_rate": 1.9987567825537644e-05, + "loss": 1.5429, + "step": 7551 + }, + { + "epoch": 0.09, + "grad_norm": 29.001617054666195, + "learning_rate": 1.998753868800093e-05, + "loss": 1.5575, + "step": 7554 + }, + { + "epoch": 0.09, + "grad_norm": 16.933892582124418, + "learning_rate": 1.998750951638035e-05, + "loss": 1.8156, + "step": 7557 + }, + { + "epoch": 0.09, + "grad_norm": 15.16938376038767, + "learning_rate": 1.9987480310675995e-05, + "loss": 1.7362, + "step": 7560 + }, + { + "epoch": 0.09, + "grad_norm": 40.53484948997721, + "learning_rate": 1.9987451070887968e-05, + "loss": 1.7777, + "step": 7563 + }, + { + "epoch": 0.09, + "grad_norm": 16.10257157243238, + "learning_rate": 1.9987421797016365e-05, + "loss": 1.6325, + "step": 7566 + }, + { + "epoch": 0.09, + "grad_norm": 7.989639824646751, + "learning_rate": 1.9987392489061292e-05, + "loss": 1.8389, + "step": 7569 + }, + { + "epoch": 0.09, + "grad_norm": 8.453012219897667, + "learning_rate": 1.9987363147022842e-05, + "loss": 1.4311, + "step": 7572 + }, + { + "epoch": 0.09, + "grad_norm": 61.77672693090673, + "learning_rate": 1.9987333770901122e-05, + "loss": 1.9165, + "step": 7575 + }, + { + "epoch": 0.09, + "grad_norm": 106.25235031833961, + "learning_rate": 1.998730436069623e-05, + "loss": 1.9748, + "step": 7578 + }, + { + "epoch": 0.09, + "grad_norm": 9.019599091221732, + "learning_rate": 1.9987274916408262e-05, + "loss": 1.7778, + "step": 7581 + }, + { + "epoch": 0.09, + "grad_norm": 11.808558793678463, + "learning_rate": 1.9987245438037326e-05, + "loss": 1.7939, + "step": 7584 + }, + { + "epoch": 0.09, + "grad_norm": 3.1561189868499158, + "learning_rate": 1.9987215925583515e-05, + "loss": 1.7664, + "step": 7587 + }, + { + "epoch": 0.09, + "grad_norm": 33.707957065717025, + "learning_rate": 1.9987186379046938e-05, + "loss": 1.8208, + "step": 7590 + }, + { + "epoch": 0.09, + "grad_norm": 21.265201019594848, + "learning_rate": 1.9987156798427686e-05, + "loss": 1.5556, + "step": 7593 + }, + { + "epoch": 0.09, + "grad_norm": 10.861396675712081, + "learning_rate": 1.998712718372587e-05, + "loss": 1.5257, + "step": 7596 + }, + { + "epoch": 0.09, + "grad_norm": 45.93068601735888, + "learning_rate": 1.9987097534941583e-05, + "loss": 1.8831, + "step": 7599 + }, + { + "epoch": 0.09, + "grad_norm": 6.2681744709409974, + "learning_rate": 1.998706785207493e-05, + "loss": 1.7832, + "step": 7602 + }, + { + "epoch": 0.09, + "grad_norm": 14.258239720703214, + "learning_rate": 1.9987038135126015e-05, + "loss": 1.654, + "step": 7605 + }, + { + "epoch": 0.09, + "grad_norm": 74.08206501636337, + "learning_rate": 1.9987008384094932e-05, + "loss": 1.2903, + "step": 7608 + }, + { + "epoch": 0.09, + "grad_norm": 13.424067912758824, + "learning_rate": 1.998697859898179e-05, + "loss": 1.5686, + "step": 7611 + }, + { + "epoch": 0.09, + "grad_norm": 27.866165717156548, + "learning_rate": 1.9986948779786684e-05, + "loss": 1.8405, + "step": 7614 + }, + { + "epoch": 0.09, + "grad_norm": 13.098690485743282, + "learning_rate": 1.9986918926509723e-05, + "loss": 1.5749, + "step": 7617 + }, + { + "epoch": 0.09, + "grad_norm": 33.263584155887656, + "learning_rate": 1.9986889039151005e-05, + "loss": 1.6541, + "step": 7620 + }, + { + "epoch": 0.09, + "grad_norm": 6.8070400181598645, + "learning_rate": 1.9986859117710625e-05, + "loss": 1.5502, + "step": 7623 + }, + { + "epoch": 0.09, + "grad_norm": 79.6249841395367, + "learning_rate": 1.99868291621887e-05, + "loss": 1.6646, + "step": 7626 + }, + { + "epoch": 0.09, + "grad_norm": 18.54682397649646, + "learning_rate": 1.998679917258532e-05, + "loss": 1.6973, + "step": 7629 + }, + { + "epoch": 0.09, + "grad_norm": 26.55782804642822, + "learning_rate": 1.998676914890059e-05, + "loss": 1.8614, + "step": 7632 + }, + { + "epoch": 0.09, + "grad_norm": 22.488473334894294, + "learning_rate": 1.9986739091134616e-05, + "loss": 2.2246, + "step": 7635 + }, + { + "epoch": 0.09, + "grad_norm": 3.6368175169659467, + "learning_rate": 1.99867089992875e-05, + "loss": 1.9982, + "step": 7638 + }, + { + "epoch": 0.09, + "grad_norm": 7.236480697346093, + "learning_rate": 1.9986678873359344e-05, + "loss": 1.8095, + "step": 7641 + }, + { + "epoch": 0.09, + "grad_norm": 96.43366291108758, + "learning_rate": 1.998664871335025e-05, + "loss": 1.5728, + "step": 7644 + }, + { + "epoch": 0.09, + "grad_norm": 36.9391033121384, + "learning_rate": 1.998661851926032e-05, + "loss": 1.96, + "step": 7647 + }, + { + "epoch": 0.09, + "grad_norm": 29.106279491094806, + "learning_rate": 1.998658829108966e-05, + "loss": 1.7171, + "step": 7650 + }, + { + "epoch": 0.09, + "grad_norm": 11.824003637431586, + "learning_rate": 1.9986558028838366e-05, + "loss": 1.5068, + "step": 7653 + }, + { + "epoch": 0.09, + "grad_norm": 26.2955281813223, + "learning_rate": 1.9986527732506554e-05, + "loss": 1.6502, + "step": 7656 + }, + { + "epoch": 0.09, + "grad_norm": 8.344934980844215, + "learning_rate": 1.9986497402094317e-05, + "loss": 1.6161, + "step": 7659 + }, + { + "epoch": 0.09, + "grad_norm": 107.95970316780922, + "learning_rate": 1.998646703760176e-05, + "loss": 1.6413, + "step": 7662 + }, + { + "epoch": 0.09, + "grad_norm": 59.06538732143707, + "learning_rate": 1.9986436639028988e-05, + "loss": 2.2824, + "step": 7665 + }, + { + "epoch": 0.09, + "grad_norm": 25.697592090310994, + "learning_rate": 1.9986406206376107e-05, + "loss": 1.7899, + "step": 7668 + }, + { + "epoch": 0.09, + "grad_norm": 35.083603052483916, + "learning_rate": 1.9986375739643217e-05, + "loss": 1.8757, + "step": 7671 + }, + { + "epoch": 0.09, + "grad_norm": 18.61102942911838, + "learning_rate": 1.9986345238830425e-05, + "loss": 1.8569, + "step": 7674 + }, + { + "epoch": 0.09, + "grad_norm": 6.438400849311869, + "learning_rate": 1.9986314703937834e-05, + "loss": 1.913, + "step": 7677 + }, + { + "epoch": 0.09, + "grad_norm": 68.69729586469658, + "learning_rate": 1.9986284134965544e-05, + "loss": 1.4889, + "step": 7680 + }, + { + "epoch": 0.09, + "grad_norm": 14.962166258983387, + "learning_rate": 1.9986253531913665e-05, + "loss": 1.8343, + "step": 7683 + }, + { + "epoch": 0.09, + "grad_norm": 15.115443853366665, + "learning_rate": 1.99862228947823e-05, + "loss": 2.1601, + "step": 7686 + }, + { + "epoch": 0.09, + "grad_norm": 33.628345035917505, + "learning_rate": 1.9986192223571558e-05, + "loss": 1.4564, + "step": 7689 + }, + { + "epoch": 0.09, + "grad_norm": 59.57191872249051, + "learning_rate": 1.9986161518281532e-05, + "loss": 2.2303, + "step": 7692 + }, + { + "epoch": 0.09, + "grad_norm": 16.733204352645913, + "learning_rate": 1.998613077891234e-05, + "loss": 1.5135, + "step": 7695 + }, + { + "epoch": 0.09, + "grad_norm": 21.9398274887392, + "learning_rate": 1.9986100005464076e-05, + "loss": 1.4508, + "step": 7698 + }, + { + "epoch": 0.09, + "grad_norm": 9.190206940422934, + "learning_rate": 1.998606919793685e-05, + "loss": 1.7929, + "step": 7701 + }, + { + "epoch": 0.09, + "grad_norm": 35.77368903104029, + "learning_rate": 1.9986038356330768e-05, + "loss": 1.3215, + "step": 7704 + }, + { + "epoch": 0.09, + "grad_norm": 6.994480038032485, + "learning_rate": 1.9986007480645936e-05, + "loss": 1.4703, + "step": 7707 + }, + { + "epoch": 0.09, + "grad_norm": 29.305645359212036, + "learning_rate": 1.9985976570882455e-05, + "loss": 2.1409, + "step": 7710 + }, + { + "epoch": 0.09, + "grad_norm": 13.2946831593797, + "learning_rate": 1.9985945627040433e-05, + "loss": 1.5519, + "step": 7713 + }, + { + "epoch": 0.09, + "grad_norm": 41.320236091268924, + "learning_rate": 1.9985914649119975e-05, + "loss": 1.7948, + "step": 7716 + }, + { + "epoch": 0.09, + "grad_norm": 15.809070790997394, + "learning_rate": 1.9985883637121186e-05, + "loss": 1.5576, + "step": 7719 + }, + { + "epoch": 0.09, + "grad_norm": 19.44532757443697, + "learning_rate": 1.9985852591044178e-05, + "loss": 1.8362, + "step": 7722 + }, + { + "epoch": 0.09, + "grad_norm": 12.361894693176716, + "learning_rate": 1.998582151088905e-05, + "loss": 1.5046, + "step": 7725 + }, + { + "epoch": 0.09, + "grad_norm": 30.007144592591242, + "learning_rate": 1.9985790396655907e-05, + "loss": 1.3032, + "step": 7728 + }, + { + "epoch": 0.09, + "grad_norm": 39.9719238457314, + "learning_rate": 1.998575924834486e-05, + "loss": 1.8124, + "step": 7731 + }, + { + "epoch": 0.09, + "grad_norm": 14.574647617275048, + "learning_rate": 1.9985728065956013e-05, + "loss": 1.6077, + "step": 7734 + }, + { + "epoch": 0.09, + "grad_norm": 42.07039033350935, + "learning_rate": 1.9985696849489475e-05, + "loss": 2.0311, + "step": 7737 + }, + { + "epoch": 0.09, + "grad_norm": 7.469321910278827, + "learning_rate": 1.9985665598945347e-05, + "loss": 1.8329, + "step": 7740 + }, + { + "epoch": 0.09, + "grad_norm": 37.05202660686421, + "learning_rate": 1.9985634314323742e-05, + "loss": 1.6858, + "step": 7743 + }, + { + "epoch": 0.09, + "grad_norm": 9.367720401615848, + "learning_rate": 1.9985602995624765e-05, + "loss": 1.7543, + "step": 7746 + }, + { + "epoch": 0.09, + "grad_norm": 7.210714728761073, + "learning_rate": 1.998557164284852e-05, + "loss": 1.443, + "step": 7749 + }, + { + "epoch": 0.09, + "grad_norm": 9.086353769241864, + "learning_rate": 1.9985540255995115e-05, + "loss": 1.7674, + "step": 7752 + }, + { + "epoch": 0.09, + "grad_norm": 8.985711643827539, + "learning_rate": 1.998550883506466e-05, + "loss": 1.785, + "step": 7755 + }, + { + "epoch": 0.09, + "grad_norm": 15.329031297990506, + "learning_rate": 1.998547738005726e-05, + "loss": 1.7909, + "step": 7758 + }, + { + "epoch": 0.09, + "grad_norm": 9.126140773822463, + "learning_rate": 1.9985445890973022e-05, + "loss": 1.7106, + "step": 7761 + }, + { + "epoch": 0.09, + "grad_norm": 39.49055015009367, + "learning_rate": 1.998541436781205e-05, + "loss": 1.646, + "step": 7764 + }, + { + "epoch": 0.09, + "grad_norm": 6.129887999302348, + "learning_rate": 1.9985382810574458e-05, + "loss": 1.9263, + "step": 7767 + }, + { + "epoch": 0.09, + "grad_norm": 27.65725109947854, + "learning_rate": 1.9985351219260353e-05, + "loss": 1.7899, + "step": 7770 + }, + { + "epoch": 0.09, + "grad_norm": 5.231851998735074, + "learning_rate": 1.998531959386984e-05, + "loss": 1.4223, + "step": 7773 + }, + { + "epoch": 0.09, + "grad_norm": 12.209066668037313, + "learning_rate": 1.9985287934403028e-05, + "loss": 1.6053, + "step": 7776 + }, + { + "epoch": 0.09, + "grad_norm": 31.285231043617713, + "learning_rate": 1.9985256240860025e-05, + "loss": 1.642, + "step": 7779 + }, + { + "epoch": 0.09, + "grad_norm": 16.559327186079525, + "learning_rate": 1.9985224513240938e-05, + "loss": 1.6895, + "step": 7782 + }, + { + "epoch": 0.09, + "grad_norm": 10.325712010400437, + "learning_rate": 1.9985192751545877e-05, + "loss": 1.797, + "step": 7785 + }, + { + "epoch": 0.09, + "grad_norm": 4.314996596938037, + "learning_rate": 1.9985160955774953e-05, + "loss": 1.4998, + "step": 7788 + }, + { + "epoch": 0.09, + "grad_norm": 14.861018117325173, + "learning_rate": 1.9985129125928265e-05, + "loss": 1.7536, + "step": 7791 + }, + { + "epoch": 0.09, + "grad_norm": 53.8925893221705, + "learning_rate": 1.9985097262005933e-05, + "loss": 1.8343, + "step": 7794 + }, + { + "epoch": 0.09, + "grad_norm": 12.269241489343605, + "learning_rate": 1.998506536400806e-05, + "loss": 1.4192, + "step": 7797 + }, + { + "epoch": 0.09, + "grad_norm": 18.807454020215378, + "learning_rate": 1.9985033431934754e-05, + "loss": 1.5707, + "step": 7800 + }, + { + "epoch": 0.09, + "grad_norm": 64.34520251246191, + "learning_rate": 1.9985001465786123e-05, + "loss": 1.833, + "step": 7803 + }, + { + "epoch": 0.09, + "grad_norm": 21.298175431588835, + "learning_rate": 1.9984969465562286e-05, + "loss": 1.7281, + "step": 7806 + }, + { + "epoch": 0.09, + "grad_norm": 52.17325440783187, + "learning_rate": 1.998493743126334e-05, + "loss": 1.7612, + "step": 7809 + }, + { + "epoch": 0.09, + "grad_norm": 8.29811146348934, + "learning_rate": 1.99849053628894e-05, + "loss": 1.8892, + "step": 7812 + }, + { + "epoch": 0.09, + "grad_norm": 39.74826163931425, + "learning_rate": 1.9984873260440573e-05, + "loss": 2.0607, + "step": 7815 + }, + { + "epoch": 0.09, + "grad_norm": 8.82812911997779, + "learning_rate": 1.998484112391697e-05, + "loss": 1.9424, + "step": 7818 + }, + { + "epoch": 0.09, + "grad_norm": 17.070996389564637, + "learning_rate": 1.9984808953318703e-05, + "loss": 1.9028, + "step": 7821 + }, + { + "epoch": 0.09, + "grad_norm": 8.69083832521791, + "learning_rate": 1.998477674864588e-05, + "loss": 1.4403, + "step": 7824 + }, + { + "epoch": 0.09, + "grad_norm": 22.239697133233005, + "learning_rate": 1.9984744509898607e-05, + "loss": 1.6668, + "step": 7827 + }, + { + "epoch": 0.09, + "grad_norm": 10.649042026319, + "learning_rate": 1.9984712237077e-05, + "loss": 1.8831, + "step": 7830 + }, + { + "epoch": 0.09, + "grad_norm": 24.658558818660634, + "learning_rate": 1.9984679930181165e-05, + "loss": 2.0179, + "step": 7833 + }, + { + "epoch": 0.09, + "grad_norm": 42.814809476352565, + "learning_rate": 1.9984647589211216e-05, + "loss": 1.6444, + "step": 7836 + }, + { + "epoch": 0.09, + "grad_norm": 21.617590088108006, + "learning_rate": 1.998461521416726e-05, + "loss": 1.8393, + "step": 7839 + }, + { + "epoch": 0.09, + "grad_norm": 23.760064133711374, + "learning_rate": 1.9984582805049406e-05, + "loss": 1.7354, + "step": 7842 + }, + { + "epoch": 0.09, + "grad_norm": 90.91975310900153, + "learning_rate": 1.9984550361857772e-05, + "loss": 1.787, + "step": 7845 + }, + { + "epoch": 0.09, + "grad_norm": 9.388998369906469, + "learning_rate": 1.9984517884592463e-05, + "loss": 1.6548, + "step": 7848 + }, + { + "epoch": 0.09, + "grad_norm": 16.75637724474761, + "learning_rate": 1.998448537325359e-05, + "loss": 1.8076, + "step": 7851 + }, + { + "epoch": 0.09, + "grad_norm": 14.07551121918237, + "learning_rate": 1.9984452827841265e-05, + "loss": 1.6519, + "step": 7854 + }, + { + "epoch": 0.09, + "grad_norm": 21.496490318729943, + "learning_rate": 1.9984420248355596e-05, + "loss": 1.7424, + "step": 7857 + }, + { + "epoch": 0.09, + "grad_norm": 13.623515793530542, + "learning_rate": 1.99843876347967e-05, + "loss": 1.7396, + "step": 7860 + }, + { + "epoch": 0.09, + "grad_norm": 5.441435177490315, + "learning_rate": 1.9984354987164684e-05, + "loss": 1.386, + "step": 7863 + }, + { + "epoch": 0.09, + "grad_norm": 85.19492880049756, + "learning_rate": 1.9984322305459662e-05, + "loss": 1.7354, + "step": 7866 + }, + { + "epoch": 0.09, + "grad_norm": 6.032211179960747, + "learning_rate": 1.9984289589681743e-05, + "loss": 1.5726, + "step": 7869 + }, + { + "epoch": 0.09, + "grad_norm": 75.13953300417943, + "learning_rate": 1.9984256839831044e-05, + "loss": 1.8328, + "step": 7872 + }, + { + "epoch": 0.09, + "grad_norm": 66.72506010894308, + "learning_rate": 1.998422405590767e-05, + "loss": 1.8102, + "step": 7875 + }, + { + "epoch": 0.09, + "grad_norm": 4.491509341199596, + "learning_rate": 1.9984191237911734e-05, + "loss": 1.5072, + "step": 7878 + }, + { + "epoch": 0.09, + "grad_norm": 13.187875414248797, + "learning_rate": 1.998415838584335e-05, + "loss": 1.9854, + "step": 7881 + }, + { + "epoch": 0.09, + "grad_norm": 15.915814958245264, + "learning_rate": 1.9984125499702634e-05, + "loss": 1.8201, + "step": 7884 + }, + { + "epoch": 0.09, + "grad_norm": 60.32616499259471, + "learning_rate": 1.998409257948969e-05, + "loss": 1.8747, + "step": 7887 + }, + { + "epoch": 0.09, + "grad_norm": 16.50979766773832, + "learning_rate": 1.9984059625204636e-05, + "loss": 1.6978, + "step": 7890 + }, + { + "epoch": 0.09, + "grad_norm": 54.14214810066949, + "learning_rate": 1.998402663684758e-05, + "loss": 1.6956, + "step": 7893 + }, + { + "epoch": 0.09, + "grad_norm": 40.38477864445839, + "learning_rate": 1.9983993614418636e-05, + "loss": 1.5465, + "step": 7896 + }, + { + "epoch": 0.09, + "grad_norm": 23.04673305282861, + "learning_rate": 1.9983960557917923e-05, + "loss": 1.8711, + "step": 7899 + }, + { + "epoch": 0.1, + "grad_norm": 23.112268598221824, + "learning_rate": 1.9983927467345547e-05, + "loss": 1.7895, + "step": 7902 + }, + { + "epoch": 0.1, + "grad_norm": 32.84581909808872, + "learning_rate": 1.9983894342701623e-05, + "loss": 1.8343, + "step": 7905 + }, + { + "epoch": 0.1, + "grad_norm": 10.070021522936875, + "learning_rate": 1.9983861183986263e-05, + "loss": 1.5979, + "step": 7908 + }, + { + "epoch": 0.1, + "grad_norm": 18.49844842715892, + "learning_rate": 1.9983827991199582e-05, + "loss": 1.4322, + "step": 7911 + }, + { + "epoch": 0.1, + "grad_norm": 25.992426351549817, + "learning_rate": 1.998379476434169e-05, + "loss": 1.5355, + "step": 7914 + }, + { + "epoch": 0.1, + "grad_norm": 22.459155916559148, + "learning_rate": 1.9983761503412707e-05, + "loss": 1.8197, + "step": 7917 + }, + { + "epoch": 0.1, + "grad_norm": 12.550082500446253, + "learning_rate": 1.9983728208412738e-05, + "loss": 1.9458, + "step": 7920 + }, + { + "epoch": 0.1, + "grad_norm": 42.166751997552105, + "learning_rate": 1.9983694879341904e-05, + "loss": 1.7835, + "step": 7923 + }, + { + "epoch": 0.1, + "grad_norm": 38.5869128010769, + "learning_rate": 1.9983661516200313e-05, + "loss": 1.7139, + "step": 7926 + }, + { + "epoch": 0.1, + "grad_norm": 13.234505956853951, + "learning_rate": 1.998362811898808e-05, + "loss": 1.4482, + "step": 7929 + }, + { + "epoch": 0.1, + "grad_norm": 15.682172826859132, + "learning_rate": 1.9983594687705325e-05, + "loss": 1.8321, + "step": 7932 + }, + { + "epoch": 0.1, + "grad_norm": 46.03196475839626, + "learning_rate": 1.9983561222352152e-05, + "loss": 1.5177, + "step": 7935 + }, + { + "epoch": 0.1, + "grad_norm": 34.67071558731728, + "learning_rate": 1.9983527722928685e-05, + "loss": 1.5583, + "step": 7938 + }, + { + "epoch": 0.1, + "grad_norm": 13.812255179228904, + "learning_rate": 1.9983494189435032e-05, + "loss": 1.5621, + "step": 7941 + }, + { + "epoch": 0.1, + "grad_norm": 23.10826742490086, + "learning_rate": 1.998346062187131e-05, + "loss": 1.9743, + "step": 7944 + }, + { + "epoch": 0.1, + "grad_norm": 36.29786652358726, + "learning_rate": 1.9983427020237633e-05, + "loss": 1.992, + "step": 7947 + }, + { + "epoch": 0.1, + "grad_norm": 10.635404722205973, + "learning_rate": 1.9983393384534113e-05, + "loss": 1.9891, + "step": 7950 + }, + { + "epoch": 0.1, + "grad_norm": 13.77947105998913, + "learning_rate": 1.998335971476087e-05, + "loss": 1.3599, + "step": 7953 + }, + { + "epoch": 0.1, + "grad_norm": 11.130627812707614, + "learning_rate": 1.9983326010918014e-05, + "loss": 1.7724, + "step": 7956 + }, + { + "epoch": 0.1, + "grad_norm": 54.47580200041252, + "learning_rate": 1.9983292273005665e-05, + "loss": 1.3893, + "step": 7959 + }, + { + "epoch": 0.1, + "grad_norm": 96.09582476840941, + "learning_rate": 1.9983258501023933e-05, + "loss": 1.4693, + "step": 7962 + }, + { + "epoch": 0.1, + "grad_norm": 31.173087536579065, + "learning_rate": 1.998322469497294e-05, + "loss": 1.5462, + "step": 7965 + }, + { + "epoch": 0.1, + "grad_norm": 22.7278314972649, + "learning_rate": 1.9983190854852794e-05, + "loss": 1.4198, + "step": 7968 + }, + { + "epoch": 0.1, + "grad_norm": 7.795085675667768, + "learning_rate": 1.998315698066361e-05, + "loss": 1.6934, + "step": 7971 + }, + { + "epoch": 0.1, + "grad_norm": 19.333705354068645, + "learning_rate": 1.9983123072405513e-05, + "loss": 1.5566, + "step": 7974 + }, + { + "epoch": 0.1, + "grad_norm": 36.51449128429592, + "learning_rate": 1.998308913007861e-05, + "loss": 1.6782, + "step": 7977 + }, + { + "epoch": 0.1, + "grad_norm": 17.180119342578287, + "learning_rate": 1.998305515368302e-05, + "loss": 1.8885, + "step": 7980 + }, + { + "epoch": 0.1, + "grad_norm": 22.935380978723384, + "learning_rate": 1.998302114321886e-05, + "loss": 1.6139, + "step": 7983 + }, + { + "epoch": 0.1, + "grad_norm": 15.9331617769778, + "learning_rate": 1.9982987098686243e-05, + "loss": 2.1984, + "step": 7986 + }, + { + "epoch": 0.1, + "grad_norm": 9.74749076407839, + "learning_rate": 1.998295302008529e-05, + "loss": 1.6697, + "step": 7989 + }, + { + "epoch": 0.1, + "grad_norm": 17.31055621433652, + "learning_rate": 1.998291890741611e-05, + "loss": 1.7547, + "step": 7992 + }, + { + "epoch": 0.1, + "grad_norm": 46.275367520035765, + "learning_rate": 1.9982884760678827e-05, + "loss": 1.8876, + "step": 7995 + }, + { + "epoch": 0.1, + "grad_norm": 20.897368466919726, + "learning_rate": 1.9982850579873553e-05, + "loss": 1.9349, + "step": 7998 + }, + { + "epoch": 0.1, + "grad_norm": 95.00487859639996, + "learning_rate": 1.9982816365000403e-05, + "loss": 1.6126, + "step": 8001 + }, + { + "epoch": 0.1, + "grad_norm": 9.54791257644419, + "learning_rate": 1.99827821160595e-05, + "loss": 1.8462, + "step": 8004 + }, + { + "epoch": 0.1, + "grad_norm": 59.48531923617136, + "learning_rate": 1.9982747833050954e-05, + "loss": 1.5151, + "step": 8007 + }, + { + "epoch": 0.1, + "grad_norm": 43.92458236100196, + "learning_rate": 1.998271351597489e-05, + "loss": 1.5164, + "step": 8010 + }, + { + "epoch": 0.1, + "grad_norm": 6.883688959882873, + "learning_rate": 1.998267916483142e-05, + "loss": 1.9451, + "step": 8013 + }, + { + "epoch": 0.1, + "grad_norm": 103.3023909583166, + "learning_rate": 1.998264477962066e-05, + "loss": 1.4427, + "step": 8016 + }, + { + "epoch": 0.1, + "grad_norm": 50.453578898756284, + "learning_rate": 1.998261036034273e-05, + "loss": 1.9179, + "step": 8019 + }, + { + "epoch": 0.1, + "grad_norm": 8.641506558519579, + "learning_rate": 1.9982575906997743e-05, + "loss": 1.6569, + "step": 8022 + }, + { + "epoch": 0.1, + "grad_norm": 20.444450447771022, + "learning_rate": 1.9982541419585823e-05, + "loss": 1.9533, + "step": 8025 + }, + { + "epoch": 0.1, + "grad_norm": 76.70693034634118, + "learning_rate": 1.9982506898107083e-05, + "loss": 2.0429, + "step": 8028 + }, + { + "epoch": 0.1, + "grad_norm": 27.57279011567939, + "learning_rate": 1.9982472342561646e-05, + "loss": 1.8139, + "step": 8031 + }, + { + "epoch": 0.1, + "grad_norm": 9.77034148536003, + "learning_rate": 1.9982437752949625e-05, + "loss": 1.3888, + "step": 8034 + }, + { + "epoch": 0.1, + "grad_norm": 18.56143995582462, + "learning_rate": 1.9982403129271138e-05, + "loss": 1.4074, + "step": 8037 + }, + { + "epoch": 0.1, + "grad_norm": 12.959376733316901, + "learning_rate": 1.9982368471526307e-05, + "loss": 1.8533, + "step": 8040 + }, + { + "epoch": 0.1, + "grad_norm": 50.255527725316355, + "learning_rate": 1.9982333779715248e-05, + "loss": 1.7205, + "step": 8043 + }, + { + "epoch": 0.1, + "grad_norm": 9.529289469798302, + "learning_rate": 1.998229905383808e-05, + "loss": 1.7787, + "step": 8046 + }, + { + "epoch": 0.1, + "grad_norm": 11.276407308524858, + "learning_rate": 1.998226429389492e-05, + "loss": 1.6637, + "step": 8049 + }, + { + "epoch": 0.1, + "grad_norm": 12.440832671676743, + "learning_rate": 1.9982229499885885e-05, + "loss": 1.7013, + "step": 8052 + }, + { + "epoch": 0.1, + "grad_norm": 15.544668801968433, + "learning_rate": 1.99821946718111e-05, + "loss": 1.7028, + "step": 8055 + }, + { + "epoch": 0.1, + "grad_norm": 67.1658547157351, + "learning_rate": 1.9982159809670677e-05, + "loss": 2.0874, + "step": 8058 + }, + { + "epoch": 0.1, + "grad_norm": 14.412974226487265, + "learning_rate": 1.998212491346474e-05, + "loss": 1.7283, + "step": 8061 + }, + { + "epoch": 0.1, + "grad_norm": 15.499011151042216, + "learning_rate": 1.998208998319341e-05, + "loss": 1.9967, + "step": 8064 + }, + { + "epoch": 0.1, + "grad_norm": 22.418765725847283, + "learning_rate": 1.9982055018856793e-05, + "loss": 2.0155, + "step": 8067 + }, + { + "epoch": 0.1, + "grad_norm": 6.20041053982446, + "learning_rate": 1.9982020020455027e-05, + "loss": 1.668, + "step": 8070 + }, + { + "epoch": 0.1, + "grad_norm": 8.3659999546232, + "learning_rate": 1.9981984987988214e-05, + "loss": 1.7722, + "step": 8073 + }, + { + "epoch": 0.1, + "grad_norm": 12.062507158163626, + "learning_rate": 1.9981949921456487e-05, + "loss": 1.4416, + "step": 8076 + }, + { + "epoch": 0.1, + "grad_norm": 53.94751316419187, + "learning_rate": 1.998191482085996e-05, + "loss": 1.7572, + "step": 8079 + }, + { + "epoch": 0.1, + "grad_norm": 36.367265831238164, + "learning_rate": 1.9981879686198753e-05, + "loss": 1.8992, + "step": 8082 + }, + { + "epoch": 0.1, + "grad_norm": 56.44642405155505, + "learning_rate": 1.9981844517472986e-05, + "loss": 1.5099, + "step": 8085 + }, + { + "epoch": 0.1, + "grad_norm": 6.587263803987068, + "learning_rate": 1.998180931468278e-05, + "loss": 1.7098, + "step": 8088 + }, + { + "epoch": 0.1, + "grad_norm": 17.070664237999384, + "learning_rate": 1.998177407782825e-05, + "loss": 1.7094, + "step": 8091 + }, + { + "epoch": 0.1, + "grad_norm": 20.562227529286623, + "learning_rate": 1.9981738806909524e-05, + "loss": 1.8708, + "step": 8094 + }, + { + "epoch": 0.1, + "grad_norm": 24.815347945067504, + "learning_rate": 1.998170350192672e-05, + "loss": 1.9027, + "step": 8097 + }, + { + "epoch": 0.1, + "grad_norm": 15.52377277097681, + "learning_rate": 1.9981668162879955e-05, + "loss": 1.6102, + "step": 8100 + }, + { + "epoch": 0.1, + "grad_norm": 12.513220841421337, + "learning_rate": 1.9981632789769355e-05, + "loss": 1.8521, + "step": 8103 + }, + { + "epoch": 0.1, + "grad_norm": 9.876307880905092, + "learning_rate": 1.998159738259503e-05, + "loss": 1.6042, + "step": 8106 + }, + { + "epoch": 0.1, + "grad_norm": 66.73431142460156, + "learning_rate": 1.998156194135712e-05, + "loss": 1.7629, + "step": 8109 + }, + { + "epoch": 0.1, + "grad_norm": 12.494544262892184, + "learning_rate": 1.9981526466055725e-05, + "loss": 1.9073, + "step": 8112 + }, + { + "epoch": 0.1, + "grad_norm": 76.2651930091981, + "learning_rate": 1.998149095669098e-05, + "loss": 1.7595, + "step": 8115 + }, + { + "epoch": 0.1, + "grad_norm": 30.5802256343861, + "learning_rate": 1.9981455413263e-05, + "loss": 1.8021, + "step": 8118 + }, + { + "epoch": 0.1, + "grad_norm": 33.81863441084335, + "learning_rate": 1.998141983577191e-05, + "loss": 1.8941, + "step": 8121 + }, + { + "epoch": 0.1, + "grad_norm": 27.428293296250043, + "learning_rate": 1.9981384224217825e-05, + "loss": 1.7294, + "step": 8124 + }, + { + "epoch": 0.1, + "grad_norm": 56.14974949508449, + "learning_rate": 1.9981348578600874e-05, + "loss": 1.6654, + "step": 8127 + }, + { + "epoch": 0.1, + "grad_norm": 13.072850325665181, + "learning_rate": 1.9981312898921173e-05, + "loss": 1.6358, + "step": 8130 + }, + { + "epoch": 0.1, + "grad_norm": 8.011073621274557, + "learning_rate": 1.9981277185178853e-05, + "loss": 2.1159, + "step": 8133 + }, + { + "epoch": 0.1, + "grad_norm": 9.8821322588466, + "learning_rate": 1.9981241437374024e-05, + "loss": 1.8752, + "step": 8136 + }, + { + "epoch": 0.1, + "grad_norm": 18.492056329260063, + "learning_rate": 1.9981205655506812e-05, + "loss": 1.6117, + "step": 8139 + }, + { + "epoch": 0.1, + "grad_norm": 3.0991355353641996, + "learning_rate": 1.9981169839577343e-05, + "loss": 1.8802, + "step": 8142 + }, + { + "epoch": 0.1, + "grad_norm": 33.06364028821741, + "learning_rate": 1.9981133989585737e-05, + "loss": 1.5078, + "step": 8145 + }, + { + "epoch": 0.1, + "grad_norm": 23.769298541398474, + "learning_rate": 1.998109810553211e-05, + "loss": 1.7879, + "step": 8148 + }, + { + "epoch": 0.1, + "grad_norm": 33.21887064772178, + "learning_rate": 1.99810621874166e-05, + "loss": 1.6611, + "step": 8151 + }, + { + "epoch": 0.1, + "grad_norm": 6.715166926493275, + "learning_rate": 1.9981026235239317e-05, + "loss": 1.6228, + "step": 8154 + }, + { + "epoch": 0.1, + "grad_norm": 73.48799525077094, + "learning_rate": 1.998099024900038e-05, + "loss": 2.0298, + "step": 8157 + }, + { + "epoch": 0.1, + "grad_norm": 21.999813484900145, + "learning_rate": 1.9980954228699927e-05, + "loss": 1.8882, + "step": 8160 + }, + { + "epoch": 0.1, + "grad_norm": 18.973198153545013, + "learning_rate": 1.9980918174338066e-05, + "loss": 1.7808, + "step": 8163 + }, + { + "epoch": 0.1, + "grad_norm": 16.608567430881358, + "learning_rate": 1.998088208591493e-05, + "loss": 1.7113, + "step": 8166 + }, + { + "epoch": 0.1, + "grad_norm": 8.046738009688829, + "learning_rate": 1.9980845963430636e-05, + "loss": 1.6294, + "step": 8169 + }, + { + "epoch": 0.1, + "grad_norm": 11.79862196155398, + "learning_rate": 1.998080980688531e-05, + "loss": 1.4036, + "step": 8172 + }, + { + "epoch": 0.1, + "grad_norm": 67.69503079903383, + "learning_rate": 1.9980773616279078e-05, + "loss": 1.7826, + "step": 8175 + }, + { + "epoch": 0.1, + "grad_norm": 19.658388357135642, + "learning_rate": 1.998073739161206e-05, + "loss": 1.7641, + "step": 8178 + }, + { + "epoch": 0.1, + "grad_norm": 14.745736518476285, + "learning_rate": 1.9980701132884378e-05, + "loss": 1.5443, + "step": 8181 + }, + { + "epoch": 0.1, + "grad_norm": 35.305071013835736, + "learning_rate": 1.998066484009616e-05, + "loss": 1.9097, + "step": 8184 + }, + { + "epoch": 0.1, + "grad_norm": 4.134742484898761, + "learning_rate": 1.9980628513247525e-05, + "loss": 1.3908, + "step": 8187 + }, + { + "epoch": 0.1, + "grad_norm": 32.445981960208364, + "learning_rate": 1.9980592152338606e-05, + "loss": 1.7697, + "step": 8190 + }, + { + "epoch": 0.1, + "grad_norm": 12.511789666140267, + "learning_rate": 1.9980555757369516e-05, + "loss": 1.8554, + "step": 8193 + }, + { + "epoch": 0.1, + "grad_norm": 43.97002658250168, + "learning_rate": 1.9980519328340384e-05, + "loss": 1.9083, + "step": 8196 + }, + { + "epoch": 0.1, + "grad_norm": 60.628699271343116, + "learning_rate": 1.998048286525134e-05, + "loss": 1.5084, + "step": 8199 + }, + { + "epoch": 0.1, + "grad_norm": 29.89859618476668, + "learning_rate": 1.9980446368102496e-05, + "loss": 1.3142, + "step": 8202 + }, + { + "epoch": 0.1, + "grad_norm": 24.612719672823875, + "learning_rate": 1.9980409836893987e-05, + "loss": 1.6236, + "step": 8205 + }, + { + "epoch": 0.1, + "grad_norm": 28.285349797765853, + "learning_rate": 1.998037327162593e-05, + "loss": 2.0871, + "step": 8208 + }, + { + "epoch": 0.1, + "grad_norm": 80.32775630045708, + "learning_rate": 1.998033667229846e-05, + "loss": 1.7964, + "step": 8211 + }, + { + "epoch": 0.1, + "grad_norm": 26.795408878210477, + "learning_rate": 1.9980300038911693e-05, + "loss": 1.9522, + "step": 8214 + }, + { + "epoch": 0.1, + "grad_norm": 13.583307200408035, + "learning_rate": 1.9980263371465758e-05, + "loss": 1.9749, + "step": 8217 + }, + { + "epoch": 0.1, + "grad_norm": 2.860173360598828, + "learning_rate": 1.9980226669960777e-05, + "loss": 1.4671, + "step": 8220 + }, + { + "epoch": 0.1, + "grad_norm": 33.217982740272156, + "learning_rate": 1.998018993439688e-05, + "loss": 2.0056, + "step": 8223 + }, + { + "epoch": 0.1, + "grad_norm": 57.95024546123049, + "learning_rate": 1.9980153164774187e-05, + "loss": 1.6242, + "step": 8226 + }, + { + "epoch": 0.1, + "grad_norm": 14.66922913569338, + "learning_rate": 1.9980116361092828e-05, + "loss": 1.6148, + "step": 8229 + }, + { + "epoch": 0.1, + "grad_norm": 29.350004307771357, + "learning_rate": 1.9980079523352925e-05, + "loss": 1.4333, + "step": 8232 + }, + { + "epoch": 0.1, + "grad_norm": 9.927606529991124, + "learning_rate": 1.9980042651554606e-05, + "loss": 1.5141, + "step": 8235 + }, + { + "epoch": 0.1, + "grad_norm": 27.6066447191306, + "learning_rate": 1.9980005745697997e-05, + "loss": 1.5966, + "step": 8238 + }, + { + "epoch": 0.1, + "grad_norm": 15.975429614881687, + "learning_rate": 1.997996880578322e-05, + "loss": 1.6796, + "step": 8241 + }, + { + "epoch": 0.1, + "grad_norm": 17.794165445477482, + "learning_rate": 1.997993183181041e-05, + "loss": 1.4912, + "step": 8244 + }, + { + "epoch": 0.1, + "grad_norm": 21.499059208863645, + "learning_rate": 1.9979894823779682e-05, + "loss": 1.6219, + "step": 8247 + }, + { + "epoch": 0.1, + "grad_norm": 6.74914756812342, + "learning_rate": 1.997985778169117e-05, + "loss": 1.7173, + "step": 8250 + }, + { + "epoch": 0.1, + "grad_norm": 15.634478213466867, + "learning_rate": 1.9979820705544997e-05, + "loss": 1.8285, + "step": 8253 + }, + { + "epoch": 0.1, + "grad_norm": 6.17157945605076, + "learning_rate": 1.9979783595341293e-05, + "loss": 1.7941, + "step": 8256 + }, + { + "epoch": 0.1, + "grad_norm": 33.98991344134037, + "learning_rate": 1.997974645108018e-05, + "loss": 1.9843, + "step": 8259 + }, + { + "epoch": 0.1, + "grad_norm": 6.790802820729943, + "learning_rate": 1.997970927276179e-05, + "loss": 1.9371, + "step": 8262 + }, + { + "epoch": 0.1, + "grad_norm": 18.2236993106373, + "learning_rate": 1.9979672060386244e-05, + "loss": 1.6749, + "step": 8265 + }, + { + "epoch": 0.1, + "grad_norm": 28.151736535557642, + "learning_rate": 1.9979634813953672e-05, + "loss": 1.7687, + "step": 8268 + }, + { + "epoch": 0.1, + "grad_norm": 9.70839203348622, + "learning_rate": 1.9979597533464202e-05, + "loss": 1.6925, + "step": 8271 + }, + { + "epoch": 0.1, + "grad_norm": 33.1656070869699, + "learning_rate": 1.997956021891796e-05, + "loss": 2.0151, + "step": 8274 + }, + { + "epoch": 0.1, + "grad_norm": 65.68276153410729, + "learning_rate": 1.9979522870315072e-05, + "loss": 1.7479, + "step": 8277 + }, + { + "epoch": 0.1, + "grad_norm": 47.18294618917654, + "learning_rate": 1.997948548765567e-05, + "loss": 1.7546, + "step": 8280 + }, + { + "epoch": 0.1, + "grad_norm": 7.945455721258224, + "learning_rate": 1.997944807093988e-05, + "loss": 1.6764, + "step": 8283 + }, + { + "epoch": 0.1, + "grad_norm": 18.819185081487642, + "learning_rate": 1.9979410620167825e-05, + "loss": 1.6234, + "step": 8286 + }, + { + "epoch": 0.1, + "grad_norm": 23.914791325733823, + "learning_rate": 1.9979373135339636e-05, + "loss": 1.6347, + "step": 8289 + }, + { + "epoch": 0.1, + "grad_norm": 21.435867008234638, + "learning_rate": 1.9979335616455444e-05, + "loss": 1.8272, + "step": 8292 + }, + { + "epoch": 0.1, + "grad_norm": 10.838872536385322, + "learning_rate": 1.997929806351537e-05, + "loss": 1.9662, + "step": 8295 + }, + { + "epoch": 0.1, + "grad_norm": 48.927964406410275, + "learning_rate": 1.9979260476519548e-05, + "loss": 1.8437, + "step": 8298 + }, + { + "epoch": 0.1, + "grad_norm": 10.432235009552855, + "learning_rate": 1.9979222855468105e-05, + "loss": 1.8821, + "step": 8301 + }, + { + "epoch": 0.1, + "grad_norm": 10.316683324912592, + "learning_rate": 1.9979185200361167e-05, + "loss": 1.5749, + "step": 8304 + }, + { + "epoch": 0.1, + "grad_norm": 67.38296353029035, + "learning_rate": 1.9979147511198866e-05, + "loss": 1.4172, + "step": 8307 + }, + { + "epoch": 0.1, + "grad_norm": 13.90918160191596, + "learning_rate": 1.997910978798133e-05, + "loss": 1.759, + "step": 8310 + }, + { + "epoch": 0.1, + "grad_norm": 13.903349325045735, + "learning_rate": 1.9979072030708685e-05, + "loss": 1.896, + "step": 8313 + }, + { + "epoch": 0.1, + "grad_norm": 24.56837333594489, + "learning_rate": 1.997903423938106e-05, + "loss": 1.7087, + "step": 8316 + }, + { + "epoch": 0.1, + "grad_norm": 58.56770350982379, + "learning_rate": 1.997899641399859e-05, + "loss": 1.6478, + "step": 8319 + }, + { + "epoch": 0.1, + "grad_norm": 43.53001618581657, + "learning_rate": 1.9978958554561396e-05, + "loss": 2.3639, + "step": 8322 + }, + { + "epoch": 0.1, + "grad_norm": 19.713208659075764, + "learning_rate": 1.9978920661069616e-05, + "loss": 1.6299, + "step": 8325 + }, + { + "epoch": 0.1, + "grad_norm": 12.473821445385532, + "learning_rate": 1.9978882733523367e-05, + "loss": 1.4574, + "step": 8328 + }, + { + "epoch": 0.1, + "grad_norm": 31.953195035036988, + "learning_rate": 1.997884477192279e-05, + "loss": 1.7132, + "step": 8331 + }, + { + "epoch": 0.1, + "grad_norm": 59.2692423979981, + "learning_rate": 1.997880677626801e-05, + "loss": 1.9147, + "step": 8334 + }, + { + "epoch": 0.1, + "grad_norm": 52.367936230836165, + "learning_rate": 1.9978768746559158e-05, + "loss": 1.9351, + "step": 8337 + }, + { + "epoch": 0.1, + "grad_norm": 20.827215669625172, + "learning_rate": 1.997873068279636e-05, + "loss": 1.7528, + "step": 8340 + }, + { + "epoch": 0.1, + "grad_norm": 29.456527967449862, + "learning_rate": 1.9978692584979748e-05, + "loss": 1.6468, + "step": 8343 + }, + { + "epoch": 0.1, + "grad_norm": 20.970260994026436, + "learning_rate": 1.9978654453109453e-05, + "loss": 1.7177, + "step": 8346 + }, + { + "epoch": 0.1, + "grad_norm": 44.12986668623737, + "learning_rate": 1.9978616287185605e-05, + "loss": 1.5849, + "step": 8349 + }, + { + "epoch": 0.1, + "grad_norm": 23.344550780025337, + "learning_rate": 1.9978578087208336e-05, + "loss": 1.5503, + "step": 8352 + }, + { + "epoch": 0.1, + "grad_norm": 6.052328765105048, + "learning_rate": 1.9978539853177773e-05, + "loss": 1.9117, + "step": 8355 + }, + { + "epoch": 0.1, + "grad_norm": 19.70717843730749, + "learning_rate": 1.9978501585094044e-05, + "loss": 1.5014, + "step": 8358 + }, + { + "epoch": 0.1, + "grad_norm": 24.797667366217027, + "learning_rate": 1.997846328295729e-05, + "loss": 1.5091, + "step": 8361 + }, + { + "epoch": 0.1, + "grad_norm": 13.437213211328768, + "learning_rate": 1.9978424946767632e-05, + "loss": 1.9733, + "step": 8364 + }, + { + "epoch": 0.1, + "grad_norm": 11.081294219326173, + "learning_rate": 1.9978386576525203e-05, + "loss": 1.8918, + "step": 8367 + }, + { + "epoch": 0.1, + "grad_norm": 9.135174431141062, + "learning_rate": 1.9978348172230136e-05, + "loss": 1.3907, + "step": 8370 + }, + { + "epoch": 0.1, + "grad_norm": 7.562309240706517, + "learning_rate": 1.997830973388256e-05, + "loss": 1.3895, + "step": 8373 + }, + { + "epoch": 0.1, + "grad_norm": 21.811906259764523, + "learning_rate": 1.9978271261482607e-05, + "loss": 2.1381, + "step": 8376 + }, + { + "epoch": 0.1, + "grad_norm": 8.8879265690632, + "learning_rate": 1.997823275503041e-05, + "loss": 1.7397, + "step": 8379 + }, + { + "epoch": 0.1, + "grad_norm": 59.421812516983806, + "learning_rate": 1.9978194214526098e-05, + "loss": 1.3565, + "step": 8382 + }, + { + "epoch": 0.1, + "grad_norm": 38.91724746355106, + "learning_rate": 1.9978155639969802e-05, + "loss": 1.3746, + "step": 8385 + }, + { + "epoch": 0.1, + "grad_norm": 11.690565580073917, + "learning_rate": 1.9978117031361653e-05, + "loss": 1.5892, + "step": 8388 + }, + { + "epoch": 0.1, + "grad_norm": 7.191923287928899, + "learning_rate": 1.9978078388701787e-05, + "loss": 1.5501, + "step": 8391 + }, + { + "epoch": 0.1, + "grad_norm": 29.061809267614507, + "learning_rate": 1.9978039711990336e-05, + "loss": 1.5515, + "step": 8394 + }, + { + "epoch": 0.1, + "grad_norm": 21.64717372141338, + "learning_rate": 1.9978001001227426e-05, + "loss": 1.8885, + "step": 8397 + }, + { + "epoch": 0.1, + "grad_norm": 15.291114183015244, + "learning_rate": 1.9977962256413194e-05, + "loss": 1.702, + "step": 8400 + }, + { + "epoch": 0.1, + "grad_norm": 5.31624487566916, + "learning_rate": 1.997792347754777e-05, + "loss": 1.6314, + "step": 8403 + }, + { + "epoch": 0.1, + "grad_norm": 32.94295296481751, + "learning_rate": 1.9977884664631287e-05, + "loss": 1.5894, + "step": 8406 + }, + { + "epoch": 0.1, + "grad_norm": 14.030416990096858, + "learning_rate": 1.997784581766388e-05, + "loss": 1.7166, + "step": 8409 + }, + { + "epoch": 0.1, + "grad_norm": 16.052896662439995, + "learning_rate": 1.997780693664568e-05, + "loss": 1.8292, + "step": 8412 + }, + { + "epoch": 0.1, + "grad_norm": 22.102367598028025, + "learning_rate": 1.9977768021576815e-05, + "loss": 1.6627, + "step": 8415 + }, + { + "epoch": 0.1, + "grad_norm": 17.736309356091823, + "learning_rate": 1.9977729072457424e-05, + "loss": 1.9637, + "step": 8418 + }, + { + "epoch": 0.1, + "grad_norm": 19.50729382009687, + "learning_rate": 1.9977690089287636e-05, + "loss": 1.9625, + "step": 8421 + }, + { + "epoch": 0.1, + "grad_norm": 22.97613800591498, + "learning_rate": 1.997765107206759e-05, + "loss": 1.5764, + "step": 8424 + }, + { + "epoch": 0.1, + "grad_norm": 15.557434712223941, + "learning_rate": 1.997761202079741e-05, + "loss": 1.5883, + "step": 8427 + }, + { + "epoch": 0.1, + "grad_norm": 11.53742047960991, + "learning_rate": 1.9977572935477238e-05, + "loss": 1.6989, + "step": 8430 + }, + { + "epoch": 0.1, + "grad_norm": 40.847280573010316, + "learning_rate": 1.99775338161072e-05, + "loss": 1.9478, + "step": 8433 + }, + { + "epoch": 0.1, + "grad_norm": 93.03723766726803, + "learning_rate": 1.997749466268743e-05, + "loss": 1.459, + "step": 8436 + }, + { + "epoch": 0.1, + "grad_norm": 32.49255032989746, + "learning_rate": 1.9977455475218074e-05, + "loss": 1.4317, + "step": 8439 + }, + { + "epoch": 0.1, + "grad_norm": 31.8818655160373, + "learning_rate": 1.997741625369925e-05, + "loss": 1.8768, + "step": 8442 + }, + { + "epoch": 0.1, + "grad_norm": 68.52109845608595, + "learning_rate": 1.9977376998131096e-05, + "loss": 1.5014, + "step": 8445 + }, + { + "epoch": 0.1, + "grad_norm": 14.73756235973606, + "learning_rate": 1.9977337708513754e-05, + "loss": 2.0389, + "step": 8448 + }, + { + "epoch": 0.1, + "grad_norm": 41.09140817579368, + "learning_rate": 1.9977298384847347e-05, + "loss": 2.0244, + "step": 8451 + }, + { + "epoch": 0.1, + "grad_norm": 5.280914102729403, + "learning_rate": 1.9977259027132018e-05, + "loss": 1.8215, + "step": 8454 + }, + { + "epoch": 0.1, + "grad_norm": 9.475114102758855, + "learning_rate": 1.99772196353679e-05, + "loss": 2.2769, + "step": 8457 + }, + { + "epoch": 0.1, + "grad_norm": 3.5713913798330625, + "learning_rate": 1.997718020955512e-05, + "loss": 1.4292, + "step": 8460 + }, + { + "epoch": 0.1, + "grad_norm": 13.192807970617359, + "learning_rate": 1.997714074969382e-05, + "loss": 1.9042, + "step": 8463 + }, + { + "epoch": 0.1, + "grad_norm": 28.044976378223645, + "learning_rate": 1.997710125578413e-05, + "loss": 2.0268, + "step": 8466 + }, + { + "epoch": 0.1, + "grad_norm": 7.795061427051455, + "learning_rate": 1.997706172782619e-05, + "loss": 1.8136, + "step": 8469 + }, + { + "epoch": 0.1, + "grad_norm": 19.688338293303246, + "learning_rate": 1.9977022165820132e-05, + "loss": 1.7453, + "step": 8472 + }, + { + "epoch": 0.1, + "grad_norm": 4.374310333289214, + "learning_rate": 1.997698256976609e-05, + "loss": 1.4593, + "step": 8475 + }, + { + "epoch": 0.1, + "grad_norm": 7.42168566139984, + "learning_rate": 1.9976942939664203e-05, + "loss": 1.69, + "step": 8478 + }, + { + "epoch": 0.1, + "grad_norm": 23.056903879204782, + "learning_rate": 1.99769032755146e-05, + "loss": 1.403, + "step": 8481 + }, + { + "epoch": 0.1, + "grad_norm": 20.85547463438881, + "learning_rate": 1.997686357731742e-05, + "loss": 1.8586, + "step": 8484 + }, + { + "epoch": 0.1, + "grad_norm": 44.726977457024994, + "learning_rate": 1.99768238450728e-05, + "loss": 1.753, + "step": 8487 + }, + { + "epoch": 0.1, + "grad_norm": 28.190101269673367, + "learning_rate": 1.9976784078780872e-05, + "loss": 1.7394, + "step": 8490 + }, + { + "epoch": 0.1, + "grad_norm": 12.518154890973808, + "learning_rate": 1.9976744278441775e-05, + "loss": 1.5376, + "step": 8493 + }, + { + "epoch": 0.1, + "grad_norm": 24.993214905121075, + "learning_rate": 1.9976704444055643e-05, + "loss": 1.6308, + "step": 8496 + }, + { + "epoch": 0.1, + "grad_norm": 7.615117708967699, + "learning_rate": 1.997666457562261e-05, + "loss": 1.5997, + "step": 8499 + }, + { + "epoch": 0.1, + "grad_norm": 26.404006200038598, + "learning_rate": 1.9976624673142816e-05, + "loss": 1.4235, + "step": 8502 + }, + { + "epoch": 0.1, + "grad_norm": 127.33474706668633, + "learning_rate": 1.99765847366164e-05, + "loss": 1.851, + "step": 8505 + }, + { + "epoch": 0.1, + "grad_norm": 39.73781939307733, + "learning_rate": 1.997654476604349e-05, + "loss": 1.6933, + "step": 8508 + }, + { + "epoch": 0.1, + "grad_norm": 4.511292055437861, + "learning_rate": 1.9976504761424224e-05, + "loss": 1.6158, + "step": 8511 + }, + { + "epoch": 0.1, + "grad_norm": 17.806550930793993, + "learning_rate": 1.9976464722758742e-05, + "loss": 2.0335, + "step": 8514 + }, + { + "epoch": 0.1, + "grad_norm": 41.089093667386805, + "learning_rate": 1.9976424650047178e-05, + "loss": 1.6565, + "step": 8517 + }, + { + "epoch": 0.1, + "grad_norm": 4.99051828188001, + "learning_rate": 1.9976384543289674e-05, + "loss": 1.9801, + "step": 8520 + }, + { + "epoch": 0.1, + "grad_norm": 12.748881971561044, + "learning_rate": 1.9976344402486364e-05, + "loss": 1.6879, + "step": 8523 + }, + { + "epoch": 0.1, + "grad_norm": 7.677161424997553, + "learning_rate": 1.997630422763738e-05, + "loss": 1.8031, + "step": 8526 + }, + { + "epoch": 0.1, + "grad_norm": 8.335684984876604, + "learning_rate": 1.9976264018742862e-05, + "loss": 1.6054, + "step": 8529 + }, + { + "epoch": 0.1, + "grad_norm": 162.3700142702149, + "learning_rate": 1.9976223775802955e-05, + "loss": 1.6488, + "step": 8532 + }, + { + "epoch": 0.1, + "grad_norm": 32.57844283443692, + "learning_rate": 1.9976183498817784e-05, + "loss": 1.6835, + "step": 8535 + }, + { + "epoch": 0.1, + "grad_norm": 4.205546710864162, + "learning_rate": 1.9976143187787493e-05, + "loss": 2.05, + "step": 8538 + }, + { + "epoch": 0.1, + "grad_norm": 14.181598607635804, + "learning_rate": 1.997610284271222e-05, + "loss": 1.9187, + "step": 8541 + }, + { + "epoch": 0.1, + "grad_norm": 60.22457198532052, + "learning_rate": 1.9976062463592102e-05, + "loss": 1.836, + "step": 8544 + }, + { + "epoch": 0.1, + "grad_norm": 57.65175855103277, + "learning_rate": 1.9976022050427276e-05, + "loss": 1.3313, + "step": 8547 + }, + { + "epoch": 0.1, + "grad_norm": 11.103747949386051, + "learning_rate": 1.997598160321788e-05, + "loss": 1.7339, + "step": 8550 + }, + { + "epoch": 0.1, + "grad_norm": 11.732701569707038, + "learning_rate": 1.997594112196405e-05, + "loss": 1.5965, + "step": 8553 + }, + { + "epoch": 0.1, + "grad_norm": 40.46647512018756, + "learning_rate": 1.9975900606665925e-05, + "loss": 2.2367, + "step": 8556 + }, + { + "epoch": 0.1, + "grad_norm": 3.775937191219211, + "learning_rate": 1.997586005732365e-05, + "loss": 1.494, + "step": 8559 + }, + { + "epoch": 0.1, + "grad_norm": 30.672604461240326, + "learning_rate": 1.9975819473937355e-05, + "loss": 1.9269, + "step": 8562 + }, + { + "epoch": 0.1, + "grad_norm": 12.669710181302113, + "learning_rate": 1.997577885650718e-05, + "loss": 1.3888, + "step": 8565 + }, + { + "epoch": 0.1, + "grad_norm": 7.309514674862982, + "learning_rate": 1.9975738205033266e-05, + "loss": 1.4189, + "step": 8568 + }, + { + "epoch": 0.1, + "grad_norm": 15.538086964921808, + "learning_rate": 1.997569751951575e-05, + "loss": 2.0594, + "step": 8571 + }, + { + "epoch": 0.1, + "grad_norm": 28.71629162690136, + "learning_rate": 1.9975656799954774e-05, + "loss": 1.6922, + "step": 8574 + }, + { + "epoch": 0.1, + "grad_norm": 4.155503805118661, + "learning_rate": 1.9975616046350472e-05, + "loss": 2.0427, + "step": 8577 + }, + { + "epoch": 0.1, + "grad_norm": 55.12501501934414, + "learning_rate": 1.9975575258702985e-05, + "loss": 1.6954, + "step": 8580 + }, + { + "epoch": 0.1, + "grad_norm": 13.76414617921684, + "learning_rate": 1.9975534437012455e-05, + "loss": 1.3518, + "step": 8583 + }, + { + "epoch": 0.1, + "grad_norm": 79.66415096960772, + "learning_rate": 1.997549358127902e-05, + "loss": 1.7607, + "step": 8586 + }, + { + "epoch": 0.1, + "grad_norm": 4.525933182582074, + "learning_rate": 1.9975452691502818e-05, + "loss": 1.731, + "step": 8589 + }, + { + "epoch": 0.1, + "grad_norm": 24.394641347249898, + "learning_rate": 1.9975411767683987e-05, + "loss": 1.731, + "step": 8592 + }, + { + "epoch": 0.1, + "grad_norm": 33.95794200739313, + "learning_rate": 1.997537080982267e-05, + "loss": 1.7271, + "step": 8595 + }, + { + "epoch": 0.1, + "grad_norm": 29.05096924909367, + "learning_rate": 1.997532981791901e-05, + "loss": 2.0889, + "step": 8598 + }, + { + "epoch": 0.1, + "grad_norm": 38.44757326072132, + "learning_rate": 1.9975288791973138e-05, + "loss": 1.7517, + "step": 8601 + }, + { + "epoch": 0.1, + "grad_norm": 27.856725420408214, + "learning_rate": 1.9975247731985195e-05, + "loss": 1.7226, + "step": 8604 + }, + { + "epoch": 0.1, + "grad_norm": 114.55760174973199, + "learning_rate": 1.997520663795533e-05, + "loss": 1.4803, + "step": 8607 + }, + { + "epoch": 0.1, + "grad_norm": 91.39965312450397, + "learning_rate": 1.9975165509883676e-05, + "loss": 1.9714, + "step": 8610 + }, + { + "epoch": 0.1, + "grad_norm": 4.823897997545723, + "learning_rate": 1.9975124347770376e-05, + "loss": 1.5372, + "step": 8613 + }, + { + "epoch": 0.1, + "grad_norm": 53.57066041517606, + "learning_rate": 1.997508315161557e-05, + "loss": 1.3986, + "step": 8616 + }, + { + "epoch": 0.1, + "grad_norm": 15.284482292595067, + "learning_rate": 1.9975041921419397e-05, + "loss": 1.7813, + "step": 8619 + }, + { + "epoch": 0.1, + "grad_norm": 6.046225382984142, + "learning_rate": 1.9975000657182e-05, + "loss": 1.4659, + "step": 8622 + }, + { + "epoch": 0.1, + "grad_norm": 31.55222118912959, + "learning_rate": 1.9974959358903515e-05, + "loss": 2.1136, + "step": 8625 + }, + { + "epoch": 0.1, + "grad_norm": 25.253476249257925, + "learning_rate": 1.997491802658409e-05, + "loss": 1.9823, + "step": 8628 + }, + { + "epoch": 0.1, + "grad_norm": 77.03629191791553, + "learning_rate": 1.9974876660223864e-05, + "loss": 1.703, + "step": 8631 + }, + { + "epoch": 0.1, + "grad_norm": 6.089736809591432, + "learning_rate": 1.9974835259822972e-05, + "loss": 1.4126, + "step": 8634 + }, + { + "epoch": 0.1, + "grad_norm": 6.9051461401332155, + "learning_rate": 1.9974793825381563e-05, + "loss": 1.5732, + "step": 8637 + }, + { + "epoch": 0.1, + "grad_norm": 10.239761048511989, + "learning_rate": 1.9974752356899774e-05, + "loss": 1.4486, + "step": 8640 + }, + { + "epoch": 0.1, + "grad_norm": 4.712037946171014, + "learning_rate": 1.9974710854377754e-05, + "loss": 1.3062, + "step": 8643 + }, + { + "epoch": 0.1, + "grad_norm": 7.427079099600356, + "learning_rate": 1.9974669317815634e-05, + "loss": 1.7986, + "step": 8646 + }, + { + "epoch": 0.1, + "grad_norm": 17.252373705770516, + "learning_rate": 1.997462774721356e-05, + "loss": 1.4669, + "step": 8649 + }, + { + "epoch": 0.1, + "grad_norm": 4.385303782928678, + "learning_rate": 1.9974586142571676e-05, + "loss": 1.7001, + "step": 8652 + }, + { + "epoch": 0.1, + "grad_norm": 14.456174342210272, + "learning_rate": 1.9974544503890122e-05, + "loss": 1.5989, + "step": 8655 + }, + { + "epoch": 0.1, + "grad_norm": 266.72366857091095, + "learning_rate": 1.997450283116904e-05, + "loss": 1.7181, + "step": 8658 + }, + { + "epoch": 0.1, + "grad_norm": 38.12792240317596, + "learning_rate": 1.9974461124408574e-05, + "loss": 2.035, + "step": 8661 + }, + { + "epoch": 0.1, + "grad_norm": 39.09947999321589, + "learning_rate": 1.9974419383608864e-05, + "loss": 1.5767, + "step": 8664 + }, + { + "epoch": 0.1, + "grad_norm": 26.287844391785697, + "learning_rate": 1.997437760877005e-05, + "loss": 1.7703, + "step": 8667 + }, + { + "epoch": 0.1, + "grad_norm": 4.113578850255575, + "learning_rate": 1.9974335799892283e-05, + "loss": 1.8669, + "step": 8670 + }, + { + "epoch": 0.1, + "grad_norm": 39.396603023638164, + "learning_rate": 1.99742939569757e-05, + "loss": 1.8794, + "step": 8673 + }, + { + "epoch": 0.1, + "grad_norm": 8.4213529952077, + "learning_rate": 1.997425208002044e-05, + "loss": 1.6968, + "step": 8676 + }, + { + "epoch": 0.1, + "grad_norm": 23.469383172174783, + "learning_rate": 1.9974210169026654e-05, + "loss": 1.9226, + "step": 8679 + }, + { + "epoch": 0.1, + "grad_norm": 66.58816713054247, + "learning_rate": 1.997416822399448e-05, + "loss": 1.7272, + "step": 8682 + }, + { + "epoch": 0.1, + "grad_norm": 135.71950230805496, + "learning_rate": 1.9974126244924066e-05, + "loss": 2.0423, + "step": 8685 + }, + { + "epoch": 0.1, + "grad_norm": 10.74098151617314, + "learning_rate": 1.997408423181555e-05, + "loss": 1.5831, + "step": 8688 + }, + { + "epoch": 0.1, + "grad_norm": 21.441168894598164, + "learning_rate": 1.9974042184669073e-05, + "loss": 1.6954, + "step": 8691 + }, + { + "epoch": 0.1, + "grad_norm": 15.168010701999867, + "learning_rate": 1.997400010348479e-05, + "loss": 1.5938, + "step": 8694 + }, + { + "epoch": 0.1, + "grad_norm": 41.44657012559918, + "learning_rate": 1.997395798826283e-05, + "loss": 1.585, + "step": 8697 + }, + { + "epoch": 0.1, + "grad_norm": 48.6816667683485, + "learning_rate": 1.997391583900335e-05, + "loss": 1.4529, + "step": 8700 + }, + { + "epoch": 0.1, + "grad_norm": 31.671808312121097, + "learning_rate": 1.9973873655706485e-05, + "loss": 1.9832, + "step": 8703 + }, + { + "epoch": 0.1, + "grad_norm": 12.763142200175288, + "learning_rate": 1.997383143837238e-05, + "loss": 1.5792, + "step": 8706 + }, + { + "epoch": 0.1, + "grad_norm": 27.26264687743925, + "learning_rate": 1.9973789187001183e-05, + "loss": 1.7283, + "step": 8709 + }, + { + "epoch": 0.1, + "grad_norm": 20.427655834575436, + "learning_rate": 1.9973746901593037e-05, + "loss": 1.8509, + "step": 8712 + }, + { + "epoch": 0.1, + "grad_norm": 142.4984883537465, + "learning_rate": 1.9973704582148085e-05, + "loss": 1.492, + "step": 8715 + }, + { + "epoch": 0.1, + "grad_norm": 4.850817318970491, + "learning_rate": 1.997366222866647e-05, + "loss": 1.2467, + "step": 8718 + }, + { + "epoch": 0.1, + "grad_norm": 6.338833806342362, + "learning_rate": 1.9973619841148338e-05, + "loss": 1.6723, + "step": 8721 + }, + { + "epoch": 0.1, + "grad_norm": 7.557631951315156, + "learning_rate": 1.9973577419593835e-05, + "loss": 1.9406, + "step": 8724 + }, + { + "epoch": 0.1, + "grad_norm": 18.11940384680572, + "learning_rate": 1.9973534964003106e-05, + "loss": 1.667, + "step": 8727 + }, + { + "epoch": 0.1, + "grad_norm": 36.53939306242453, + "learning_rate": 1.9973492474376295e-05, + "loss": 1.7065, + "step": 8730 + }, + { + "epoch": 0.11, + "grad_norm": 73.77671595431026, + "learning_rate": 1.9973449950713542e-05, + "loss": 1.7101, + "step": 8733 + }, + { + "epoch": 0.11, + "grad_norm": 66.80667029227786, + "learning_rate": 1.9973407393015e-05, + "loss": 1.6535, + "step": 8736 + }, + { + "epoch": 0.11, + "grad_norm": 14.81324477645947, + "learning_rate": 1.997336480128081e-05, + "loss": 1.5167, + "step": 8739 + }, + { + "epoch": 0.11, + "grad_norm": 9.284817283597581, + "learning_rate": 1.997332217551112e-05, + "loss": 1.6728, + "step": 8742 + }, + { + "epoch": 0.11, + "grad_norm": 14.717815716229554, + "learning_rate": 1.9973279515706076e-05, + "loss": 1.7568, + "step": 8745 + }, + { + "epoch": 0.11, + "grad_norm": 42.654355627598605, + "learning_rate": 1.997323682186582e-05, + "loss": 1.8146, + "step": 8748 + }, + { + "epoch": 0.11, + "grad_norm": 72.58096298554867, + "learning_rate": 1.9973194093990497e-05, + "loss": 1.5153, + "step": 8751 + }, + { + "epoch": 0.11, + "grad_norm": 23.811665388762574, + "learning_rate": 1.9973151332080253e-05, + "loss": 1.9105, + "step": 8754 + }, + { + "epoch": 0.11, + "grad_norm": 5.40278353329426, + "learning_rate": 1.997310853613524e-05, + "loss": 1.7053, + "step": 8757 + }, + { + "epoch": 0.11, + "grad_norm": 26.31948260490667, + "learning_rate": 1.9973065706155598e-05, + "loss": 1.2703, + "step": 8760 + }, + { + "epoch": 0.11, + "grad_norm": 8.910660096258454, + "learning_rate": 1.9973022842141475e-05, + "loss": 1.7894, + "step": 8763 + }, + { + "epoch": 0.11, + "grad_norm": 7.868968123920087, + "learning_rate": 1.9972979944093015e-05, + "loss": 1.8432, + "step": 8766 + }, + { + "epoch": 0.11, + "grad_norm": 31.52244412932997, + "learning_rate": 1.997293701201037e-05, + "loss": 1.4893, + "step": 8769 + }, + { + "epoch": 0.11, + "grad_norm": 20.996133579898117, + "learning_rate": 1.9972894045893682e-05, + "loss": 1.7012, + "step": 8772 + }, + { + "epoch": 0.11, + "grad_norm": 105.78417842534174, + "learning_rate": 1.99728510457431e-05, + "loss": 1.524, + "step": 8775 + }, + { + "epoch": 0.11, + "grad_norm": 35.80866935771705, + "learning_rate": 1.997280801155877e-05, + "loss": 1.5359, + "step": 8778 + }, + { + "epoch": 0.11, + "grad_norm": 36.7052740805588, + "learning_rate": 1.9972764943340835e-05, + "loss": 1.5392, + "step": 8781 + }, + { + "epoch": 0.11, + "grad_norm": 53.786379434574464, + "learning_rate": 1.997272184108945e-05, + "loss": 1.5758, + "step": 8784 + }, + { + "epoch": 0.11, + "grad_norm": 13.134897955244751, + "learning_rate": 1.9972678704804754e-05, + "loss": 1.6922, + "step": 8787 + }, + { + "epoch": 0.11, + "grad_norm": 60.03210558994465, + "learning_rate": 1.99726355344869e-05, + "loss": 1.6983, + "step": 8790 + }, + { + "epoch": 0.11, + "grad_norm": 9.877633703085674, + "learning_rate": 1.997259233013603e-05, + "loss": 1.9181, + "step": 8793 + }, + { + "epoch": 0.11, + "grad_norm": 49.35825463022199, + "learning_rate": 1.99725490917523e-05, + "loss": 1.752, + "step": 8796 + }, + { + "epoch": 0.11, + "grad_norm": 8.568649582718807, + "learning_rate": 1.9972505819335847e-05, + "loss": 1.934, + "step": 8799 + }, + { + "epoch": 0.11, + "grad_norm": 16.128842551797664, + "learning_rate": 1.9972462512886823e-05, + "loss": 1.5987, + "step": 8802 + }, + { + "epoch": 0.11, + "grad_norm": 42.06173378129156, + "learning_rate": 1.9972419172405378e-05, + "loss": 1.8863, + "step": 8805 + }, + { + "epoch": 0.11, + "grad_norm": 14.726237337179855, + "learning_rate": 1.997237579789166e-05, + "loss": 1.8094, + "step": 8808 + }, + { + "epoch": 0.11, + "grad_norm": 10.372968474657732, + "learning_rate": 1.9972332389345815e-05, + "loss": 1.6593, + "step": 8811 + }, + { + "epoch": 0.11, + "grad_norm": 52.784491821180005, + "learning_rate": 1.9972288946767993e-05, + "loss": 1.6051, + "step": 8814 + }, + { + "epoch": 0.11, + "grad_norm": 21.528520349038697, + "learning_rate": 1.9972245470158338e-05, + "loss": 1.4913, + "step": 8817 + }, + { + "epoch": 0.11, + "grad_norm": 6.009084309392216, + "learning_rate": 1.9972201959517003e-05, + "loss": 1.6837, + "step": 8820 + }, + { + "epoch": 0.11, + "grad_norm": 6.092234804106371, + "learning_rate": 1.997215841484413e-05, + "loss": 1.4286, + "step": 8823 + }, + { + "epoch": 0.11, + "grad_norm": 53.919237345720745, + "learning_rate": 1.9972114836139876e-05, + "loss": 1.491, + "step": 8826 + }, + { + "epoch": 0.11, + "grad_norm": 100.50201980193371, + "learning_rate": 1.9972071223404386e-05, + "loss": 1.6979, + "step": 8829 + }, + { + "epoch": 0.11, + "grad_norm": 7.076695997917998, + "learning_rate": 1.9972027576637813e-05, + "loss": 1.4829, + "step": 8832 + }, + { + "epoch": 0.11, + "grad_norm": 23.127277747038075, + "learning_rate": 1.9971983895840298e-05, + "loss": 1.7656, + "step": 8835 + }, + { + "epoch": 0.11, + "grad_norm": 38.47967954448758, + "learning_rate": 1.997194018101199e-05, + "loss": 1.8053, + "step": 8838 + }, + { + "epoch": 0.11, + "grad_norm": 25.860387796089693, + "learning_rate": 1.997189643215305e-05, + "loss": 1.6518, + "step": 8841 + }, + { + "epoch": 0.11, + "grad_norm": 4.855849438336934, + "learning_rate": 1.9971852649263612e-05, + "loss": 1.7329, + "step": 8844 + }, + { + "epoch": 0.11, + "grad_norm": 23.14032103077287, + "learning_rate": 1.9971808832343837e-05, + "loss": 1.6115, + "step": 8847 + }, + { + "epoch": 0.11, + "grad_norm": 5.45405865599427, + "learning_rate": 1.997176498139387e-05, + "loss": 1.7726, + "step": 8850 + }, + { + "epoch": 0.11, + "grad_norm": 101.63393589924281, + "learning_rate": 1.997172109641386e-05, + "loss": 1.9144, + "step": 8853 + }, + { + "epoch": 0.11, + "grad_norm": 48.770498999998885, + "learning_rate": 1.9971677177403954e-05, + "loss": 1.3123, + "step": 8856 + }, + { + "epoch": 0.11, + "grad_norm": 10.583153604555905, + "learning_rate": 1.997163322436431e-05, + "loss": 1.7231, + "step": 8859 + }, + { + "epoch": 0.11, + "grad_norm": 18.61680689073895, + "learning_rate": 1.9971589237295074e-05, + "loss": 1.8057, + "step": 8862 + }, + { + "epoch": 0.11, + "grad_norm": 6.491657666018582, + "learning_rate": 1.9971545216196395e-05, + "loss": 1.2849, + "step": 8865 + }, + { + "epoch": 0.11, + "grad_norm": 30.11910079853167, + "learning_rate": 1.997150116106842e-05, + "loss": 1.9068, + "step": 8868 + }, + { + "epoch": 0.11, + "grad_norm": 57.549433793557384, + "learning_rate": 1.9971457071911307e-05, + "loss": 1.8273, + "step": 8871 + }, + { + "epoch": 0.11, + "grad_norm": 13.818918183747186, + "learning_rate": 1.9971412948725205e-05, + "loss": 2.11, + "step": 8874 + }, + { + "epoch": 0.11, + "grad_norm": 12.970659041717209, + "learning_rate": 1.997136879151026e-05, + "loss": 1.81, + "step": 8877 + }, + { + "epoch": 0.11, + "grad_norm": 6.514585965037471, + "learning_rate": 1.9971324600266624e-05, + "loss": 1.8013, + "step": 8880 + }, + { + "epoch": 0.11, + "grad_norm": 8.128840209451514, + "learning_rate": 1.9971280374994447e-05, + "loss": 1.7982, + "step": 8883 + }, + { + "epoch": 0.11, + "grad_norm": 34.03146643086612, + "learning_rate": 1.9971236115693884e-05, + "loss": 1.7084, + "step": 8886 + }, + { + "epoch": 0.11, + "grad_norm": 12.111850672487586, + "learning_rate": 1.9971191822365083e-05, + "loss": 1.8582, + "step": 8889 + }, + { + "epoch": 0.11, + "grad_norm": 10.256808480721922, + "learning_rate": 1.9971147495008197e-05, + "loss": 1.3771, + "step": 8892 + }, + { + "epoch": 0.11, + "grad_norm": 15.291665902820132, + "learning_rate": 1.997110313362338e-05, + "loss": 1.4609, + "step": 8895 + }, + { + "epoch": 0.11, + "grad_norm": 21.047727901183688, + "learning_rate": 1.9971058738210773e-05, + "loss": 1.5083, + "step": 8898 + }, + { + "epoch": 0.11, + "grad_norm": 21.937862683589458, + "learning_rate": 1.9971014308770537e-05, + "loss": 1.8148, + "step": 8901 + }, + { + "epoch": 0.11, + "grad_norm": 12.200100365178395, + "learning_rate": 1.997096984530282e-05, + "loss": 1.8244, + "step": 8904 + }, + { + "epoch": 0.11, + "grad_norm": 18.160659852506857, + "learning_rate": 1.9970925347807775e-05, + "loss": 1.5754, + "step": 8907 + }, + { + "epoch": 0.11, + "grad_norm": 10.318097199195037, + "learning_rate": 1.9970880816285554e-05, + "loss": 1.2772, + "step": 8910 + }, + { + "epoch": 0.11, + "grad_norm": 21.184287987593432, + "learning_rate": 1.9970836250736308e-05, + "loss": 1.7819, + "step": 8913 + }, + { + "epoch": 0.11, + "grad_norm": 7.937756100474406, + "learning_rate": 1.9970791651160187e-05, + "loss": 1.766, + "step": 8916 + }, + { + "epoch": 0.11, + "grad_norm": 18.901218985744947, + "learning_rate": 1.9970747017557347e-05, + "loss": 1.6266, + "step": 8919 + }, + { + "epoch": 0.11, + "grad_norm": 8.386475741625645, + "learning_rate": 1.997070234992794e-05, + "loss": 1.4885, + "step": 8922 + }, + { + "epoch": 0.11, + "grad_norm": 3.2372465797271333, + "learning_rate": 1.9970657648272117e-05, + "loss": 1.9658, + "step": 8925 + }, + { + "epoch": 0.11, + "grad_norm": 14.479008182741394, + "learning_rate": 1.997061291259003e-05, + "loss": 1.6666, + "step": 8928 + }, + { + "epoch": 0.11, + "grad_norm": 15.516116166818717, + "learning_rate": 1.997056814288183e-05, + "loss": 1.5398, + "step": 8931 + }, + { + "epoch": 0.11, + "grad_norm": 11.97755095439727, + "learning_rate": 1.9970523339147677e-05, + "loss": 1.7292, + "step": 8934 + }, + { + "epoch": 0.11, + "grad_norm": 12.165150711612554, + "learning_rate": 1.9970478501387717e-05, + "loss": 1.932, + "step": 8937 + }, + { + "epoch": 0.11, + "grad_norm": 7.173981302710818, + "learning_rate": 1.9970433629602105e-05, + "loss": 1.4683, + "step": 8940 + }, + { + "epoch": 0.11, + "grad_norm": 9.1210931808757, + "learning_rate": 1.9970388723790997e-05, + "loss": 1.4654, + "step": 8943 + }, + { + "epoch": 0.11, + "grad_norm": 27.875573839764886, + "learning_rate": 1.997034378395454e-05, + "loss": 1.924, + "step": 8946 + }, + { + "epoch": 0.11, + "grad_norm": 28.428018495046498, + "learning_rate": 1.9970298810092894e-05, + "loss": 1.8735, + "step": 8949 + }, + { + "epoch": 0.11, + "grad_norm": 9.299371973776767, + "learning_rate": 1.9970253802206208e-05, + "loss": 1.6203, + "step": 8952 + }, + { + "epoch": 0.11, + "grad_norm": 23.535510092462328, + "learning_rate": 1.9970208760294638e-05, + "loss": 1.8308, + "step": 8955 + }, + { + "epoch": 0.11, + "grad_norm": 49.07488787277734, + "learning_rate": 1.9970163684358335e-05, + "loss": 1.8746, + "step": 8958 + }, + { + "epoch": 0.11, + "grad_norm": 22.87533029751512, + "learning_rate": 1.9970118574397456e-05, + "loss": 1.2596, + "step": 8961 + }, + { + "epoch": 0.11, + "grad_norm": 6.62029102342305, + "learning_rate": 1.9970073430412152e-05, + "loss": 1.722, + "step": 8964 + }, + { + "epoch": 0.11, + "grad_norm": 332.85065997172444, + "learning_rate": 1.9970028252402578e-05, + "loss": 1.4176, + "step": 8967 + }, + { + "epoch": 0.11, + "grad_norm": 14.922768900634448, + "learning_rate": 1.9969983040368893e-05, + "loss": 1.9784, + "step": 8970 + }, + { + "epoch": 0.11, + "grad_norm": 8.940277286407706, + "learning_rate": 1.9969937794311243e-05, + "loss": 1.4287, + "step": 8973 + }, + { + "epoch": 0.11, + "grad_norm": 5.933201706205764, + "learning_rate": 1.9969892514229787e-05, + "loss": 1.7341, + "step": 8976 + }, + { + "epoch": 0.11, + "grad_norm": 9.785231835923828, + "learning_rate": 1.9969847200124678e-05, + "loss": 1.3633, + "step": 8979 + }, + { + "epoch": 0.11, + "grad_norm": 14.317364708202442, + "learning_rate": 1.9969801851996075e-05, + "loss": 1.9675, + "step": 8982 + }, + { + "epoch": 0.11, + "grad_norm": 15.805583443887116, + "learning_rate": 1.9969756469844127e-05, + "loss": 1.4949, + "step": 8985 + }, + { + "epoch": 0.11, + "grad_norm": 25.775862199925804, + "learning_rate": 1.9969711053668993e-05, + "loss": 1.7617, + "step": 8988 + }, + { + "epoch": 0.11, + "grad_norm": 8.760533624002948, + "learning_rate": 1.9969665603470826e-05, + "loss": 1.3929, + "step": 8991 + }, + { + "epoch": 0.11, + "grad_norm": 6.650535349968166, + "learning_rate": 1.9969620119249778e-05, + "loss": 1.6878, + "step": 8994 + }, + { + "epoch": 0.11, + "grad_norm": 51.85564549928549, + "learning_rate": 1.9969574601006012e-05, + "loss": 1.541, + "step": 8997 + }, + { + "epoch": 0.11, + "grad_norm": 23.268977404295022, + "learning_rate": 1.9969529048739677e-05, + "loss": 1.53, + "step": 9000 + }, + { + "epoch": 0.11, + "grad_norm": 17.66212882639233, + "learning_rate": 1.996948346245093e-05, + "loss": 1.818, + "step": 9003 + }, + { + "epoch": 0.11, + "grad_norm": 5.464423016076063, + "learning_rate": 1.996943784213993e-05, + "loss": 1.5976, + "step": 9006 + }, + { + "epoch": 0.11, + "grad_norm": 5.057032244262913, + "learning_rate": 1.9969392187806825e-05, + "loss": 1.4823, + "step": 9009 + }, + { + "epoch": 0.11, + "grad_norm": 21.77836495088878, + "learning_rate": 1.996934649945178e-05, + "loss": 1.9961, + "step": 9012 + }, + { + "epoch": 0.11, + "grad_norm": 16.501586133054417, + "learning_rate": 1.9969300777074943e-05, + "loss": 1.8456, + "step": 9015 + }, + { + "epoch": 0.11, + "grad_norm": 6.158215157105812, + "learning_rate": 1.9969255020676473e-05, + "loss": 2.0758, + "step": 9018 + }, + { + "epoch": 0.11, + "grad_norm": 18.820987971557933, + "learning_rate": 1.9969209230256528e-05, + "loss": 1.6019, + "step": 9021 + }, + { + "epoch": 0.11, + "grad_norm": 6.618549894427083, + "learning_rate": 1.996916340581526e-05, + "loss": 1.9198, + "step": 9024 + }, + { + "epoch": 0.11, + "grad_norm": 12.775178000407768, + "learning_rate": 1.996911754735283e-05, + "loss": 1.6304, + "step": 9027 + }, + { + "epoch": 0.11, + "grad_norm": 12.06060348028606, + "learning_rate": 1.9969071654869397e-05, + "loss": 1.4245, + "step": 9030 + }, + { + "epoch": 0.11, + "grad_norm": 94.61550792283273, + "learning_rate": 1.996902572836511e-05, + "loss": 1.6653, + "step": 9033 + }, + { + "epoch": 0.11, + "grad_norm": 33.0296399685073, + "learning_rate": 1.9968979767840128e-05, + "loss": 1.6314, + "step": 9036 + }, + { + "epoch": 0.11, + "grad_norm": 35.21218737990576, + "learning_rate": 1.9968933773294607e-05, + "loss": 1.3658, + "step": 9039 + }, + { + "epoch": 0.11, + "grad_norm": 44.750094926830876, + "learning_rate": 1.996888774472871e-05, + "loss": 1.6396, + "step": 9042 + }, + { + "epoch": 0.11, + "grad_norm": 9.811849814419794, + "learning_rate": 1.9968841682142588e-05, + "loss": 1.8875, + "step": 9045 + }, + { + "epoch": 0.11, + "grad_norm": 29.16851103172178, + "learning_rate": 1.9968795585536398e-05, + "loss": 1.4652, + "step": 9048 + }, + { + "epoch": 0.11, + "grad_norm": 5.342605259512993, + "learning_rate": 1.9968749454910305e-05, + "loss": 1.8784, + "step": 9051 + }, + { + "epoch": 0.11, + "grad_norm": 26.827683788486844, + "learning_rate": 1.9968703290264458e-05, + "loss": 1.6343, + "step": 9054 + }, + { + "epoch": 0.11, + "grad_norm": 35.441679155797154, + "learning_rate": 1.9968657091599017e-05, + "loss": 1.6995, + "step": 9057 + }, + { + "epoch": 0.11, + "grad_norm": 14.578196346604974, + "learning_rate": 1.9968610858914137e-05, + "loss": 1.859, + "step": 9060 + }, + { + "epoch": 0.11, + "grad_norm": 30.368336481171532, + "learning_rate": 1.9968564592209982e-05, + "loss": 1.8159, + "step": 9063 + }, + { + "epoch": 0.11, + "grad_norm": 4.876435668875812, + "learning_rate": 1.9968518291486707e-05, + "loss": 1.3901, + "step": 9066 + }, + { + "epoch": 0.11, + "grad_norm": 4.758203454018477, + "learning_rate": 1.9968471956744467e-05, + "loss": 1.6718, + "step": 9069 + }, + { + "epoch": 0.11, + "grad_norm": 37.91752942325701, + "learning_rate": 1.996842558798343e-05, + "loss": 1.7278, + "step": 9072 + }, + { + "epoch": 0.11, + "grad_norm": 44.16210090470194, + "learning_rate": 1.996837918520374e-05, + "loss": 1.913, + "step": 9075 + }, + { + "epoch": 0.11, + "grad_norm": 9.931612741538407, + "learning_rate": 1.9968332748405563e-05, + "loss": 1.9066, + "step": 9078 + }, + { + "epoch": 0.11, + "grad_norm": 4.436730723027514, + "learning_rate": 1.996828627758906e-05, + "loss": 1.6884, + "step": 9081 + }, + { + "epoch": 0.11, + "grad_norm": 3.995819504103405, + "learning_rate": 1.9968239772754385e-05, + "loss": 2.04, + "step": 9084 + }, + { + "epoch": 0.11, + "grad_norm": 52.23208799044534, + "learning_rate": 1.9968193233901698e-05, + "loss": 1.7228, + "step": 9087 + }, + { + "epoch": 0.11, + "grad_norm": 26.355683897884617, + "learning_rate": 1.9968146661031155e-05, + "loss": 1.7738, + "step": 9090 + }, + { + "epoch": 0.11, + "grad_norm": 64.5317338917786, + "learning_rate": 1.996810005414292e-05, + "loss": 1.8944, + "step": 9093 + }, + { + "epoch": 0.11, + "grad_norm": 16.063636256139358, + "learning_rate": 1.9968053413237154e-05, + "loss": 1.7979, + "step": 9096 + }, + { + "epoch": 0.11, + "grad_norm": 23.288876378923316, + "learning_rate": 1.996800673831401e-05, + "loss": 1.6177, + "step": 9099 + }, + { + "epoch": 0.11, + "grad_norm": 8.303457907758645, + "learning_rate": 1.996796002937365e-05, + "loss": 1.5343, + "step": 9102 + }, + { + "epoch": 0.11, + "grad_norm": 77.64183035538788, + "learning_rate": 1.996791328641623e-05, + "loss": 1.6759, + "step": 9105 + }, + { + "epoch": 0.11, + "grad_norm": 11.187248138446417, + "learning_rate": 1.9967866509441914e-05, + "loss": 1.7094, + "step": 9108 + }, + { + "epoch": 0.11, + "grad_norm": 12.710724278011481, + "learning_rate": 1.996781969845086e-05, + "loss": 1.7071, + "step": 9111 + }, + { + "epoch": 0.11, + "grad_norm": 7.8748417229100145, + "learning_rate": 1.9967772853443227e-05, + "loss": 1.7806, + "step": 9114 + }, + { + "epoch": 0.11, + "grad_norm": 28.237930413496624, + "learning_rate": 1.9967725974419178e-05, + "loss": 1.7358, + "step": 9117 + }, + { + "epoch": 0.11, + "grad_norm": 6.897191521801366, + "learning_rate": 1.9967679061378868e-05, + "loss": 1.55, + "step": 9120 + }, + { + "epoch": 0.11, + "grad_norm": 5.841271906072552, + "learning_rate": 1.996763211432246e-05, + "loss": 1.4303, + "step": 9123 + }, + { + "epoch": 0.11, + "grad_norm": 64.37934886284198, + "learning_rate": 1.9967585133250115e-05, + "loss": 1.9692, + "step": 9126 + }, + { + "epoch": 0.11, + "grad_norm": 3.5021252514159933, + "learning_rate": 1.9967538118161993e-05, + "loss": 1.9185, + "step": 9129 + }, + { + "epoch": 0.11, + "grad_norm": 16.552373743763905, + "learning_rate": 1.996749106905825e-05, + "loss": 1.7208, + "step": 9132 + }, + { + "epoch": 0.11, + "grad_norm": 12.094591077917858, + "learning_rate": 1.9967443985939058e-05, + "loss": 1.1801, + "step": 9135 + }, + { + "epoch": 0.11, + "grad_norm": 9.85950610397805, + "learning_rate": 1.996739686880456e-05, + "loss": 1.8065, + "step": 9138 + }, + { + "epoch": 0.11, + "grad_norm": 18.717124186723726, + "learning_rate": 1.9967349717654934e-05, + "loss": 2.1836, + "step": 9141 + }, + { + "epoch": 0.11, + "grad_norm": 182.2524838032724, + "learning_rate": 1.996730253249033e-05, + "loss": 1.3842, + "step": 9144 + }, + { + "epoch": 0.11, + "grad_norm": 10.26612671412275, + "learning_rate": 1.9967255313310912e-05, + "loss": 1.6217, + "step": 9147 + }, + { + "epoch": 0.11, + "grad_norm": 48.69740937160046, + "learning_rate": 1.9967208060116845e-05, + "loss": 1.9032, + "step": 9150 + }, + { + "epoch": 0.11, + "grad_norm": 8.773100249514815, + "learning_rate": 1.9967160772908286e-05, + "loss": 1.8246, + "step": 9153 + }, + { + "epoch": 0.11, + "grad_norm": 80.01736069843771, + "learning_rate": 1.9967113451685397e-05, + "loss": 1.8147, + "step": 9156 + }, + { + "epoch": 0.11, + "grad_norm": 10.19059794941783, + "learning_rate": 1.996706609644834e-05, + "loss": 1.2143, + "step": 9159 + }, + { + "epoch": 0.11, + "grad_norm": 22.149889404166142, + "learning_rate": 1.9967018707197277e-05, + "loss": 1.4707, + "step": 9162 + }, + { + "epoch": 0.11, + "grad_norm": 52.03834826756162, + "learning_rate": 1.9966971283932367e-05, + "loss": 1.8643, + "step": 9165 + }, + { + "epoch": 0.11, + "grad_norm": 11.131490552654338, + "learning_rate": 1.9966923826653778e-05, + "loss": 1.7019, + "step": 9168 + }, + { + "epoch": 0.11, + "grad_norm": 15.737213985571023, + "learning_rate": 1.9966876335361663e-05, + "loss": 1.2604, + "step": 9171 + }, + { + "epoch": 0.11, + "grad_norm": 122.1989518783652, + "learning_rate": 1.9966828810056195e-05, + "loss": 1.7852, + "step": 9174 + }, + { + "epoch": 0.11, + "grad_norm": 6.7534150415564245, + "learning_rate": 1.9966781250737526e-05, + "loss": 1.9371, + "step": 9177 + }, + { + "epoch": 0.11, + "grad_norm": 6.510958386328367, + "learning_rate": 1.9966733657405825e-05, + "loss": 1.5776, + "step": 9180 + }, + { + "epoch": 0.11, + "grad_norm": 8.434303881151777, + "learning_rate": 1.996668603006125e-05, + "loss": 1.5755, + "step": 9183 + }, + { + "epoch": 0.11, + "grad_norm": 52.52583958739714, + "learning_rate": 1.996663836870397e-05, + "loss": 1.9239, + "step": 9186 + }, + { + "epoch": 0.11, + "grad_norm": 12.925405060094258, + "learning_rate": 1.996659067333414e-05, + "loss": 1.8351, + "step": 9189 + }, + { + "epoch": 0.11, + "grad_norm": 10.408480184698998, + "learning_rate": 1.996654294395192e-05, + "loss": 1.8571, + "step": 9192 + }, + { + "epoch": 0.11, + "grad_norm": 9.512271310706128, + "learning_rate": 1.996649518055749e-05, + "loss": 1.581, + "step": 9195 + }, + { + "epoch": 0.11, + "grad_norm": 17.978282580398982, + "learning_rate": 1.9966447383150994e-05, + "loss": 1.4354, + "step": 9198 + }, + { + "epoch": 0.11, + "grad_norm": 9.521458073655364, + "learning_rate": 1.9966399551732606e-05, + "loss": 1.4784, + "step": 9201 + }, + { + "epoch": 0.11, + "grad_norm": 24.389102502395556, + "learning_rate": 1.9966351686302484e-05, + "loss": 2.1596, + "step": 9204 + }, + { + "epoch": 0.11, + "grad_norm": 124.28622378912478, + "learning_rate": 1.9966303786860798e-05, + "loss": 1.5439, + "step": 9207 + }, + { + "epoch": 0.11, + "grad_norm": 8.567146485899835, + "learning_rate": 1.9966255853407703e-05, + "loss": 1.4726, + "step": 9210 + }, + { + "epoch": 0.11, + "grad_norm": 9.50946781592735, + "learning_rate": 1.996620788594337e-05, + "loss": 1.614, + "step": 9213 + }, + { + "epoch": 0.11, + "grad_norm": 97.82514597979736, + "learning_rate": 1.9966159884467954e-05, + "loss": 1.9914, + "step": 9216 + }, + { + "epoch": 0.11, + "grad_norm": 51.65793395621284, + "learning_rate": 1.996611184898163e-05, + "loss": 1.5173, + "step": 9219 + }, + { + "epoch": 0.11, + "grad_norm": 19.065387038426643, + "learning_rate": 1.9966063779484554e-05, + "loss": 1.8586, + "step": 9222 + }, + { + "epoch": 0.11, + "grad_norm": 100.39211642033095, + "learning_rate": 1.996601567597689e-05, + "loss": 1.6616, + "step": 9225 + }, + { + "epoch": 0.11, + "grad_norm": 8.290684970218443, + "learning_rate": 1.9965967538458805e-05, + "loss": 1.6244, + "step": 9228 + }, + { + "epoch": 0.11, + "grad_norm": 4.899137819739696, + "learning_rate": 1.9965919366930465e-05, + "loss": 2.0465, + "step": 9231 + }, + { + "epoch": 0.11, + "grad_norm": 19.844749505428254, + "learning_rate": 1.9965871161392027e-05, + "loss": 1.381, + "step": 9234 + }, + { + "epoch": 0.11, + "grad_norm": 42.40897700296296, + "learning_rate": 1.9965822921843664e-05, + "loss": 1.5219, + "step": 9237 + }, + { + "epoch": 0.11, + "grad_norm": 34.188859874836155, + "learning_rate": 1.996577464828554e-05, + "loss": 1.9848, + "step": 9240 + }, + { + "epoch": 0.11, + "grad_norm": 31.86310835640068, + "learning_rate": 1.996572634071781e-05, + "loss": 1.6108, + "step": 9243 + }, + { + "epoch": 0.11, + "grad_norm": 24.95077522185817, + "learning_rate": 1.996567799914065e-05, + "loss": 1.6617, + "step": 9246 + }, + { + "epoch": 0.11, + "grad_norm": 6.483992511688009, + "learning_rate": 1.996562962355422e-05, + "loss": 1.6343, + "step": 9249 + }, + { + "epoch": 0.11, + "grad_norm": 19.54523345904345, + "learning_rate": 1.9965581213958685e-05, + "loss": 1.4837, + "step": 9252 + }, + { + "epoch": 0.11, + "grad_norm": 26.206822251779965, + "learning_rate": 1.996553277035421e-05, + "loss": 1.4585, + "step": 9255 + }, + { + "epoch": 0.11, + "grad_norm": 30.299703407901696, + "learning_rate": 1.9965484292740963e-05, + "loss": 1.9357, + "step": 9258 + }, + { + "epoch": 0.11, + "grad_norm": 14.121193638337683, + "learning_rate": 1.9965435781119106e-05, + "loss": 1.4023, + "step": 9261 + }, + { + "epoch": 0.11, + "grad_norm": 69.57744672279298, + "learning_rate": 1.9965387235488808e-05, + "loss": 2.1603, + "step": 9264 + }, + { + "epoch": 0.11, + "grad_norm": 31.476049868610552, + "learning_rate": 1.9965338655850232e-05, + "loss": 1.7238, + "step": 9267 + }, + { + "epoch": 0.11, + "grad_norm": 24.992503337705138, + "learning_rate": 1.9965290042203545e-05, + "loss": 2.0817, + "step": 9270 + }, + { + "epoch": 0.11, + "grad_norm": 8.429437730173058, + "learning_rate": 1.9965241394548915e-05, + "loss": 1.6805, + "step": 9273 + }, + { + "epoch": 0.11, + "grad_norm": 5.7243514651855305, + "learning_rate": 1.99651927128865e-05, + "loss": 1.5055, + "step": 9276 + }, + { + "epoch": 0.11, + "grad_norm": 15.239645273340065, + "learning_rate": 1.9965143997216474e-05, + "loss": 1.6336, + "step": 9279 + }, + { + "epoch": 0.11, + "grad_norm": 12.703819595806946, + "learning_rate": 1.9965095247539e-05, + "loss": 1.8836, + "step": 9282 + }, + { + "epoch": 0.11, + "grad_norm": 13.807607985204383, + "learning_rate": 1.9965046463854248e-05, + "loss": 1.6709, + "step": 9285 + }, + { + "epoch": 0.11, + "grad_norm": 13.652195960521134, + "learning_rate": 1.9964997646162382e-05, + "loss": 1.7219, + "step": 9288 + }, + { + "epoch": 0.11, + "grad_norm": 24.36421025427718, + "learning_rate": 1.996494879446357e-05, + "loss": 1.917, + "step": 9291 + }, + { + "epoch": 0.11, + "grad_norm": 13.559462242053625, + "learning_rate": 1.996489990875797e-05, + "loss": 1.5626, + "step": 9294 + }, + { + "epoch": 0.11, + "grad_norm": 29.00255761264406, + "learning_rate": 1.9964850989045762e-05, + "loss": 1.5771, + "step": 9297 + }, + { + "epoch": 0.11, + "grad_norm": 10.066188068375576, + "learning_rate": 1.9964802035327106e-05, + "loss": 1.9625, + "step": 9300 + }, + { + "epoch": 0.11, + "grad_norm": 6.881296574303719, + "learning_rate": 1.9964753047602168e-05, + "loss": 1.0534, + "step": 9303 + }, + { + "epoch": 0.11, + "grad_norm": 5.561886560407057, + "learning_rate": 1.9964704025871117e-05, + "loss": 1.3433, + "step": 9306 + }, + { + "epoch": 0.11, + "grad_norm": 16.207389448658862, + "learning_rate": 1.9964654970134122e-05, + "loss": 1.6353, + "step": 9309 + }, + { + "epoch": 0.11, + "grad_norm": 23.29719884941263, + "learning_rate": 1.996460588039135e-05, + "loss": 1.5578, + "step": 9312 + }, + { + "epoch": 0.11, + "grad_norm": 38.38151079540142, + "learning_rate": 1.9964556756642966e-05, + "loss": 1.5589, + "step": 9315 + }, + { + "epoch": 0.11, + "grad_norm": 7.178113455005222, + "learning_rate": 1.996450759888914e-05, + "loss": 1.5672, + "step": 9318 + }, + { + "epoch": 0.11, + "grad_norm": 10.559054215465002, + "learning_rate": 1.9964458407130035e-05, + "loss": 1.7719, + "step": 9321 + }, + { + "epoch": 0.11, + "grad_norm": 9.204382345592892, + "learning_rate": 1.9964409181365827e-05, + "loss": 1.7869, + "step": 9324 + }, + { + "epoch": 0.11, + "grad_norm": 4.365564009375531, + "learning_rate": 1.9964359921596678e-05, + "loss": 1.9092, + "step": 9327 + }, + { + "epoch": 0.11, + "grad_norm": 16.242069427630124, + "learning_rate": 1.9964310627822757e-05, + "loss": 1.5675, + "step": 9330 + }, + { + "epoch": 0.11, + "grad_norm": 11.197829369241703, + "learning_rate": 1.9964261300044235e-05, + "loss": 1.576, + "step": 9333 + }, + { + "epoch": 0.11, + "grad_norm": 6.535215910393814, + "learning_rate": 1.9964211938261277e-05, + "loss": 1.7693, + "step": 9336 + }, + { + "epoch": 0.11, + "grad_norm": 76.1665129716773, + "learning_rate": 1.996416254247405e-05, + "loss": 1.4561, + "step": 9339 + }, + { + "epoch": 0.11, + "grad_norm": 6.414929239149799, + "learning_rate": 1.9964113112682727e-05, + "loss": 1.3379, + "step": 9342 + }, + { + "epoch": 0.11, + "grad_norm": 6.7887221603948715, + "learning_rate": 1.9964063648887478e-05, + "loss": 1.8138, + "step": 9345 + }, + { + "epoch": 0.11, + "grad_norm": 48.734111974475134, + "learning_rate": 1.9964014151088467e-05, + "loss": 1.3371, + "step": 9348 + }, + { + "epoch": 0.11, + "grad_norm": 35.783430009181274, + "learning_rate": 1.9963964619285865e-05, + "loss": 1.4893, + "step": 9351 + }, + { + "epoch": 0.11, + "grad_norm": 20.745442922109962, + "learning_rate": 1.9963915053479842e-05, + "loss": 1.8917, + "step": 9354 + }, + { + "epoch": 0.11, + "grad_norm": 9.130270783766287, + "learning_rate": 1.996386545367056e-05, + "loss": 1.799, + "step": 9357 + }, + { + "epoch": 0.11, + "grad_norm": 18.560767037315514, + "learning_rate": 1.99638158198582e-05, + "loss": 1.4227, + "step": 9360 + }, + { + "epoch": 0.11, + "grad_norm": 16.39443484173472, + "learning_rate": 1.9963766152042923e-05, + "loss": 1.5946, + "step": 9363 + }, + { + "epoch": 0.11, + "grad_norm": 43.82416781122684, + "learning_rate": 1.99637164502249e-05, + "loss": 1.8372, + "step": 9366 + }, + { + "epoch": 0.11, + "grad_norm": 68.35856279221173, + "learning_rate": 1.9963666714404305e-05, + "loss": 1.7096, + "step": 9369 + }, + { + "epoch": 0.11, + "grad_norm": 10.235555801722745, + "learning_rate": 1.9963616944581307e-05, + "loss": 1.8807, + "step": 9372 + }, + { + "epoch": 0.11, + "grad_norm": 19.143621123659635, + "learning_rate": 1.996356714075607e-05, + "loss": 1.6737, + "step": 9375 + }, + { + "epoch": 0.11, + "grad_norm": 25.020396726969537, + "learning_rate": 1.9963517302928763e-05, + "loss": 1.8194, + "step": 9378 + }, + { + "epoch": 0.11, + "grad_norm": 26.311798238213484, + "learning_rate": 1.9963467431099562e-05, + "loss": 1.7647, + "step": 9381 + }, + { + "epoch": 0.11, + "grad_norm": 8.981875664633279, + "learning_rate": 1.9963417525268637e-05, + "loss": 1.7688, + "step": 9384 + }, + { + "epoch": 0.11, + "grad_norm": 16.58983623091939, + "learning_rate": 1.9963367585436158e-05, + "loss": 1.8761, + "step": 9387 + }, + { + "epoch": 0.11, + "grad_norm": 30.868942732803838, + "learning_rate": 1.9963317611602293e-05, + "loss": 1.6143, + "step": 9390 + }, + { + "epoch": 0.11, + "grad_norm": 39.462505489505965, + "learning_rate": 1.9963267603767215e-05, + "loss": 2.0029, + "step": 9393 + }, + { + "epoch": 0.11, + "grad_norm": 5.155205114941168, + "learning_rate": 1.9963217561931092e-05, + "loss": 1.4701, + "step": 9396 + }, + { + "epoch": 0.11, + "grad_norm": 37.568001448577625, + "learning_rate": 1.9963167486094097e-05, + "loss": 1.8334, + "step": 9399 + }, + { + "epoch": 0.11, + "grad_norm": 43.97028664485499, + "learning_rate": 1.99631173762564e-05, + "loss": 2.1169, + "step": 9402 + }, + { + "epoch": 0.11, + "grad_norm": 18.86844703719484, + "learning_rate": 1.9963067232418172e-05, + "loss": 1.6537, + "step": 9405 + }, + { + "epoch": 0.11, + "grad_norm": 44.39732675643787, + "learning_rate": 1.9963017054579583e-05, + "loss": 1.7064, + "step": 9408 + }, + { + "epoch": 0.11, + "grad_norm": 4.1965875208735, + "learning_rate": 1.9962966842740806e-05, + "loss": 1.6722, + "step": 9411 + }, + { + "epoch": 0.11, + "grad_norm": 16.272133748761654, + "learning_rate": 1.9962916596902016e-05, + "loss": 1.6734, + "step": 9414 + }, + { + "epoch": 0.11, + "grad_norm": 37.81177064183471, + "learning_rate": 1.9962866317063374e-05, + "loss": 1.6965, + "step": 9417 + }, + { + "epoch": 0.11, + "grad_norm": 5.491102548114541, + "learning_rate": 1.996281600322506e-05, + "loss": 1.8718, + "step": 9420 + }, + { + "epoch": 0.11, + "grad_norm": 37.1151788444892, + "learning_rate": 1.9962765655387242e-05, + "loss": 1.6735, + "step": 9423 + }, + { + "epoch": 0.11, + "grad_norm": 3.9388571071447647, + "learning_rate": 1.99627152735501e-05, + "loss": 1.4247, + "step": 9426 + }, + { + "epoch": 0.11, + "grad_norm": 7.815355518262666, + "learning_rate": 1.996266485771379e-05, + "loss": 1.5061, + "step": 9429 + }, + { + "epoch": 0.11, + "grad_norm": 9.37628577131678, + "learning_rate": 1.99626144078785e-05, + "loss": 1.423, + "step": 9432 + }, + { + "epoch": 0.11, + "grad_norm": 68.89941160657261, + "learning_rate": 1.9962563924044396e-05, + "loss": 1.7744, + "step": 9435 + }, + { + "epoch": 0.11, + "grad_norm": 28.794158485361258, + "learning_rate": 1.9962513406211648e-05, + "loss": 1.7973, + "step": 9438 + }, + { + "epoch": 0.11, + "grad_norm": 8.492522667110208, + "learning_rate": 1.996246285438043e-05, + "loss": 1.6634, + "step": 9441 + }, + { + "epoch": 0.11, + "grad_norm": 18.983008256459502, + "learning_rate": 1.996241226855091e-05, + "loss": 1.542, + "step": 9444 + }, + { + "epoch": 0.11, + "grad_norm": 9.789783763703149, + "learning_rate": 1.9962361648723273e-05, + "loss": 2.1838, + "step": 9447 + }, + { + "epoch": 0.11, + "grad_norm": 4.5102170010091065, + "learning_rate": 1.996231099489768e-05, + "loss": 1.5595, + "step": 9450 + }, + { + "epoch": 0.11, + "grad_norm": 54.06339221316547, + "learning_rate": 1.996226030707431e-05, + "loss": 1.6699, + "step": 9453 + }, + { + "epoch": 0.11, + "grad_norm": 8.805502791073025, + "learning_rate": 1.996220958525333e-05, + "loss": 1.8482, + "step": 9456 + }, + { + "epoch": 0.11, + "grad_norm": 6.6322372921585275, + "learning_rate": 1.996215882943492e-05, + "loss": 1.9229, + "step": 9459 + }, + { + "epoch": 0.11, + "grad_norm": 25.501955271060897, + "learning_rate": 1.9962108039619253e-05, + "loss": 2.1099, + "step": 9462 + }, + { + "epoch": 0.11, + "grad_norm": 7.031981035724264, + "learning_rate": 1.9962057215806498e-05, + "loss": 1.8315, + "step": 9465 + }, + { + "epoch": 0.11, + "grad_norm": 8.568411182673287, + "learning_rate": 1.9962006357996828e-05, + "loss": 1.2969, + "step": 9468 + }, + { + "epoch": 0.11, + "grad_norm": 38.997283051043475, + "learning_rate": 1.996195546619042e-05, + "loss": 1.7497, + "step": 9471 + }, + { + "epoch": 0.11, + "grad_norm": 65.60017544545877, + "learning_rate": 1.9961904540387444e-05, + "loss": 1.4272, + "step": 9474 + }, + { + "epoch": 0.11, + "grad_norm": 35.783232005432566, + "learning_rate": 1.996185358058808e-05, + "loss": 1.8999, + "step": 9477 + }, + { + "epoch": 0.11, + "grad_norm": 43.14987309752301, + "learning_rate": 1.9961802586792494e-05, + "loss": 1.3682, + "step": 9480 + }, + { + "epoch": 0.11, + "grad_norm": 29.199261002841947, + "learning_rate": 1.9961751559000862e-05, + "loss": 1.7972, + "step": 9483 + }, + { + "epoch": 0.11, + "grad_norm": 13.797362501839295, + "learning_rate": 1.9961700497213367e-05, + "loss": 1.7107, + "step": 9486 + }, + { + "epoch": 0.11, + "grad_norm": 11.599384188529692, + "learning_rate": 1.9961649401430174e-05, + "loss": 1.6482, + "step": 9489 + }, + { + "epoch": 0.11, + "grad_norm": 2.925868907421966, + "learning_rate": 1.996159827165146e-05, + "loss": 1.7502, + "step": 9492 + }, + { + "epoch": 0.11, + "grad_norm": 6.091807463667064, + "learning_rate": 1.99615471078774e-05, + "loss": 1.5094, + "step": 9495 + }, + { + "epoch": 0.11, + "grad_norm": 7.71076235715645, + "learning_rate": 1.9961495910108165e-05, + "loss": 1.8521, + "step": 9498 + }, + { + "epoch": 0.11, + "grad_norm": 4.96441684868272, + "learning_rate": 1.9961444678343932e-05, + "loss": 1.9025, + "step": 9501 + }, + { + "epoch": 0.11, + "grad_norm": 8.210778441424544, + "learning_rate": 1.996139341258488e-05, + "loss": 1.4766, + "step": 9504 + }, + { + "epoch": 0.11, + "grad_norm": 11.536945479465606, + "learning_rate": 1.9961342112831175e-05, + "loss": 1.7118, + "step": 9507 + }, + { + "epoch": 0.11, + "grad_norm": 56.433655724562215, + "learning_rate": 1.9961290779083003e-05, + "loss": 1.3428, + "step": 9510 + }, + { + "epoch": 0.11, + "grad_norm": 30.66916347700877, + "learning_rate": 1.996123941134053e-05, + "loss": 1.9159, + "step": 9513 + }, + { + "epoch": 0.11, + "grad_norm": 39.64782092703656, + "learning_rate": 1.996118800960394e-05, + "loss": 1.6722, + "step": 9516 + }, + { + "epoch": 0.11, + "grad_norm": 31.949391926568595, + "learning_rate": 1.9961136573873397e-05, + "loss": 1.6841, + "step": 9519 + }, + { + "epoch": 0.11, + "grad_norm": 29.337190553665003, + "learning_rate": 1.9961085104149084e-05, + "loss": 1.6117, + "step": 9522 + }, + { + "epoch": 0.11, + "grad_norm": 26.66428736659084, + "learning_rate": 1.9961033600431178e-05, + "loss": 1.9357, + "step": 9525 + }, + { + "epoch": 0.11, + "grad_norm": 6.924082646959088, + "learning_rate": 1.996098206271985e-05, + "loss": 1.5276, + "step": 9528 + }, + { + "epoch": 0.11, + "grad_norm": 10.560871797675171, + "learning_rate": 1.996093049101528e-05, + "loss": 2.1664, + "step": 9531 + }, + { + "epoch": 0.11, + "grad_norm": 30.20897741579248, + "learning_rate": 1.9960878885317644e-05, + "loss": 1.6139, + "step": 9534 + }, + { + "epoch": 0.11, + "grad_norm": 87.26943534412125, + "learning_rate": 1.996082724562711e-05, + "loss": 1.5371, + "step": 9537 + }, + { + "epoch": 0.11, + "grad_norm": 37.319500127730805, + "learning_rate": 1.9960775571943868e-05, + "loss": 1.8796, + "step": 9540 + }, + { + "epoch": 0.11, + "grad_norm": 18.33851067929426, + "learning_rate": 1.9960723864268082e-05, + "loss": 1.5669, + "step": 9543 + }, + { + "epoch": 0.11, + "grad_norm": 22.38941899743804, + "learning_rate": 1.9960672122599932e-05, + "loss": 1.7649, + "step": 9546 + }, + { + "epoch": 0.11, + "grad_norm": 18.84037086653564, + "learning_rate": 1.99606203469396e-05, + "loss": 1.7195, + "step": 9549 + }, + { + "epoch": 0.11, + "grad_norm": 16.774761496387615, + "learning_rate": 1.9960568537287258e-05, + "loss": 1.8963, + "step": 9552 + }, + { + "epoch": 0.11, + "grad_norm": 23.25530392639135, + "learning_rate": 1.996051669364308e-05, + "loss": 1.9032, + "step": 9555 + }, + { + "epoch": 0.11, + "grad_norm": 16.073330779726543, + "learning_rate": 1.996046481600725e-05, + "loss": 1.9549, + "step": 9558 + }, + { + "epoch": 0.11, + "grad_norm": 24.94512456799551, + "learning_rate": 1.9960412904379943e-05, + "loss": 1.7005, + "step": 9561 + }, + { + "epoch": 0.12, + "grad_norm": 5.0451276483069964, + "learning_rate": 1.996036095876133e-05, + "loss": 1.6632, + "step": 9564 + }, + { + "epoch": 0.12, + "grad_norm": 5.919783968509226, + "learning_rate": 1.9960308979151596e-05, + "loss": 1.7202, + "step": 9567 + }, + { + "epoch": 0.12, + "grad_norm": 82.24453536982504, + "learning_rate": 1.996025696555091e-05, + "loss": 1.866, + "step": 9570 + }, + { + "epoch": 0.12, + "grad_norm": 29.50157852825674, + "learning_rate": 1.996020491795946e-05, + "loss": 1.6757, + "step": 9573 + }, + { + "epoch": 0.12, + "grad_norm": 44.405171816170586, + "learning_rate": 1.9960152836377416e-05, + "loss": 1.737, + "step": 9576 + }, + { + "epoch": 0.12, + "grad_norm": 93.77075853509201, + "learning_rate": 1.9960100720804956e-05, + "loss": 1.6058, + "step": 9579 + }, + { + "epoch": 0.12, + "grad_norm": 9.869047325526122, + "learning_rate": 1.9960048571242262e-05, + "loss": 1.5395, + "step": 9582 + }, + { + "epoch": 0.12, + "grad_norm": 27.1010055116624, + "learning_rate": 1.9959996387689512e-05, + "loss": 1.7638, + "step": 9585 + }, + { + "epoch": 0.12, + "grad_norm": 5.587832444757031, + "learning_rate": 1.9959944170146878e-05, + "loss": 1.5278, + "step": 9588 + }, + { + "epoch": 0.12, + "grad_norm": 29.1922630695962, + "learning_rate": 1.9959891918614543e-05, + "loss": 1.4364, + "step": 9591 + }, + { + "epoch": 0.12, + "grad_norm": 47.21087388732429, + "learning_rate": 1.9959839633092687e-05, + "loss": 1.5835, + "step": 9594 + }, + { + "epoch": 0.12, + "grad_norm": 34.84402636316564, + "learning_rate": 1.9959787313581483e-05, + "loss": 1.9578, + "step": 9597 + }, + { + "epoch": 0.12, + "grad_norm": 19.170314092080528, + "learning_rate": 1.9959734960081113e-05, + "loss": 1.3003, + "step": 9600 + }, + { + "epoch": 0.12, + "grad_norm": 22.24777413370734, + "learning_rate": 1.9959682572591757e-05, + "loss": 1.6183, + "step": 9603 + }, + { + "epoch": 0.12, + "grad_norm": 54.24891851810393, + "learning_rate": 1.995963015111359e-05, + "loss": 1.743, + "step": 9606 + }, + { + "epoch": 0.12, + "grad_norm": 16.256752460252788, + "learning_rate": 1.9959577695646796e-05, + "loss": 1.7541, + "step": 9609 + }, + { + "epoch": 0.12, + "grad_norm": 12.099404922435978, + "learning_rate": 1.9959525206191546e-05, + "loss": 1.5472, + "step": 9612 + }, + { + "epoch": 0.12, + "grad_norm": 6.780535621744569, + "learning_rate": 1.9959472682748028e-05, + "loss": 1.8551, + "step": 9615 + }, + { + "epoch": 0.12, + "grad_norm": 13.68847617311981, + "learning_rate": 1.9959420125316412e-05, + "loss": 2.249, + "step": 9618 + }, + { + "epoch": 0.12, + "grad_norm": 16.697659394799086, + "learning_rate": 1.9959367533896885e-05, + "loss": 1.3929, + "step": 9621 + }, + { + "epoch": 0.12, + "grad_norm": 15.759624789923063, + "learning_rate": 1.9959314908489628e-05, + "loss": 1.7907, + "step": 9624 + }, + { + "epoch": 0.12, + "grad_norm": 22.53275791891628, + "learning_rate": 1.9959262249094812e-05, + "loss": 1.7031, + "step": 9627 + }, + { + "epoch": 0.12, + "grad_norm": 54.436653645153086, + "learning_rate": 1.9959209555712623e-05, + "loss": 2.2032, + "step": 9630 + }, + { + "epoch": 0.12, + "grad_norm": 49.03392192667818, + "learning_rate": 1.9959156828343236e-05, + "loss": 1.378, + "step": 9633 + }, + { + "epoch": 0.12, + "grad_norm": 4.419226881762293, + "learning_rate": 1.9959104066986836e-05, + "loss": 1.5059, + "step": 9636 + }, + { + "epoch": 0.12, + "grad_norm": 20.04164599360249, + "learning_rate": 1.99590512716436e-05, + "loss": 1.4827, + "step": 9639 + }, + { + "epoch": 0.12, + "grad_norm": 6.033326647558238, + "learning_rate": 1.995899844231371e-05, + "loss": 1.7556, + "step": 9642 + }, + { + "epoch": 0.12, + "grad_norm": 14.54881482986829, + "learning_rate": 1.995894557899735e-05, + "loss": 1.6077, + "step": 9645 + }, + { + "epoch": 0.12, + "grad_norm": 12.083546364010012, + "learning_rate": 1.9958892681694692e-05, + "loss": 2.091, + "step": 9648 + }, + { + "epoch": 0.12, + "grad_norm": 24.278210047527214, + "learning_rate": 1.995883975040592e-05, + "loss": 1.8078, + "step": 9651 + }, + { + "epoch": 0.12, + "grad_norm": 12.399248255627576, + "learning_rate": 1.9958786785131215e-05, + "loss": 1.9121, + "step": 9654 + }, + { + "epoch": 0.12, + "grad_norm": 9.126429131259664, + "learning_rate": 1.9958733785870758e-05, + "loss": 1.6611, + "step": 9657 + }, + { + "epoch": 0.12, + "grad_norm": 14.965278658306943, + "learning_rate": 1.995868075262473e-05, + "loss": 1.7917, + "step": 9660 + }, + { + "epoch": 0.12, + "grad_norm": 6.142388107721655, + "learning_rate": 1.9958627685393314e-05, + "loss": 1.4606, + "step": 9663 + }, + { + "epoch": 0.12, + "grad_norm": 32.73416476933627, + "learning_rate": 1.9958574584176683e-05, + "loss": 1.9345, + "step": 9666 + }, + { + "epoch": 0.12, + "grad_norm": 10.261455379341974, + "learning_rate": 1.9958521448975028e-05, + "loss": 1.7437, + "step": 9669 + }, + { + "epoch": 0.12, + "grad_norm": 85.57833806599896, + "learning_rate": 1.9958468279788526e-05, + "loss": 1.8576, + "step": 9672 + }, + { + "epoch": 0.12, + "grad_norm": 20.79162064412491, + "learning_rate": 1.995841507661736e-05, + "loss": 1.5186, + "step": 9675 + }, + { + "epoch": 0.12, + "grad_norm": 17.613361285678472, + "learning_rate": 1.9958361839461708e-05, + "loss": 1.7607, + "step": 9678 + }, + { + "epoch": 0.12, + "grad_norm": 72.47969225723973, + "learning_rate": 1.9958308568321757e-05, + "loss": 1.7226, + "step": 9681 + }, + { + "epoch": 0.12, + "grad_norm": 16.20751615463803, + "learning_rate": 1.9958255263197682e-05, + "loss": 1.4222, + "step": 9684 + }, + { + "epoch": 0.12, + "grad_norm": 67.31588025433832, + "learning_rate": 1.9958201924089673e-05, + "loss": 1.7394, + "step": 9687 + }, + { + "epoch": 0.12, + "grad_norm": 3.335738995009568, + "learning_rate": 1.9958148550997906e-05, + "loss": 1.4139, + "step": 9690 + }, + { + "epoch": 0.12, + "grad_norm": 6.592012142850812, + "learning_rate": 1.9958095143922564e-05, + "loss": 1.8117, + "step": 9693 + }, + { + "epoch": 0.12, + "grad_norm": 34.65772648921101, + "learning_rate": 1.995804170286383e-05, + "loss": 2.0431, + "step": 9696 + }, + { + "epoch": 0.12, + "grad_norm": 15.160729261362132, + "learning_rate": 1.9957988227821886e-05, + "loss": 1.9155, + "step": 9699 + }, + { + "epoch": 0.12, + "grad_norm": 17.933990784414224, + "learning_rate": 1.9957934718796916e-05, + "loss": 1.9633, + "step": 9702 + }, + { + "epoch": 0.12, + "grad_norm": 24.156943419072945, + "learning_rate": 1.9957881175789103e-05, + "loss": 1.6237, + "step": 9705 + }, + { + "epoch": 0.12, + "grad_norm": 92.18658457675805, + "learning_rate": 1.9957827598798623e-05, + "loss": 1.6802, + "step": 9708 + }, + { + "epoch": 0.12, + "grad_norm": 23.08612712982507, + "learning_rate": 1.995777398782567e-05, + "loss": 1.7119, + "step": 9711 + }, + { + "epoch": 0.12, + "grad_norm": 25.263376068489404, + "learning_rate": 1.9957720342870418e-05, + "loss": 1.6012, + "step": 9714 + }, + { + "epoch": 0.12, + "grad_norm": 3.194615002005656, + "learning_rate": 1.9957666663933058e-05, + "loss": 1.7586, + "step": 9717 + }, + { + "epoch": 0.12, + "grad_norm": 13.853847545187651, + "learning_rate": 1.9957612951013763e-05, + "loss": 1.5293, + "step": 9720 + }, + { + "epoch": 0.12, + "grad_norm": 8.693200825314804, + "learning_rate": 1.9957559204112726e-05, + "loss": 1.2768, + "step": 9723 + }, + { + "epoch": 0.12, + "grad_norm": 11.756003480213854, + "learning_rate": 1.9957505423230123e-05, + "loss": 1.5231, + "step": 9726 + }, + { + "epoch": 0.12, + "grad_norm": 4.196883613567831, + "learning_rate": 1.9957451608366143e-05, + "loss": 1.3438, + "step": 9729 + }, + { + "epoch": 0.12, + "grad_norm": 55.43584034053472, + "learning_rate": 1.9957397759520964e-05, + "loss": 1.6782, + "step": 9732 + }, + { + "epoch": 0.12, + "grad_norm": 6.928556949823435, + "learning_rate": 1.9957343876694778e-05, + "loss": 1.6936, + "step": 9735 + }, + { + "epoch": 0.12, + "grad_norm": 47.64878670061874, + "learning_rate": 1.9957289959887757e-05, + "loss": 1.8551, + "step": 9738 + }, + { + "epoch": 0.12, + "grad_norm": 12.554136550638066, + "learning_rate": 1.99572360091001e-05, + "loss": 1.2909, + "step": 9741 + }, + { + "epoch": 0.12, + "grad_norm": 43.37924102950771, + "learning_rate": 1.9957182024331977e-05, + "loss": 1.5394, + "step": 9744 + }, + { + "epoch": 0.12, + "grad_norm": 5.436829947790907, + "learning_rate": 1.995712800558358e-05, + "loss": 1.4682, + "step": 9747 + }, + { + "epoch": 0.12, + "grad_norm": 23.827120893474525, + "learning_rate": 1.9957073952855093e-05, + "loss": 1.897, + "step": 9750 + }, + { + "epoch": 0.12, + "grad_norm": 17.68479685753399, + "learning_rate": 1.9957019866146697e-05, + "loss": 1.7166, + "step": 9753 + }, + { + "epoch": 0.12, + "grad_norm": 18.741209649570983, + "learning_rate": 1.995696574545858e-05, + "loss": 1.9482, + "step": 9756 + }, + { + "epoch": 0.12, + "grad_norm": 9.751926963028318, + "learning_rate": 1.9956911590790924e-05, + "loss": 1.5321, + "step": 9759 + }, + { + "epoch": 0.12, + "grad_norm": 8.518156323130153, + "learning_rate": 1.9956857402143916e-05, + "loss": 1.5443, + "step": 9762 + }, + { + "epoch": 0.12, + "grad_norm": 23.09907242479872, + "learning_rate": 1.995680317951774e-05, + "loss": 1.5058, + "step": 9765 + }, + { + "epoch": 0.12, + "grad_norm": 27.712748673162224, + "learning_rate": 1.9956748922912584e-05, + "loss": 1.9731, + "step": 9768 + }, + { + "epoch": 0.12, + "grad_norm": 41.97658129077565, + "learning_rate": 1.9956694632328627e-05, + "loss": 1.5344, + "step": 9771 + }, + { + "epoch": 0.12, + "grad_norm": 12.038951500233534, + "learning_rate": 1.995664030776606e-05, + "loss": 1.7758, + "step": 9774 + }, + { + "epoch": 0.12, + "grad_norm": 89.41056151441066, + "learning_rate": 1.9956585949225064e-05, + "loss": 1.9291, + "step": 9777 + }, + { + "epoch": 0.12, + "grad_norm": 29.770359812644536, + "learning_rate": 1.9956531556705827e-05, + "loss": 1.6583, + "step": 9780 + }, + { + "epoch": 0.12, + "grad_norm": 18.04777027583921, + "learning_rate": 1.9956477130208532e-05, + "loss": 1.8087, + "step": 9783 + }, + { + "epoch": 0.12, + "grad_norm": 16.316273794936134, + "learning_rate": 1.995642266973337e-05, + "loss": 1.7036, + "step": 9786 + }, + { + "epoch": 0.12, + "grad_norm": 49.3413222285666, + "learning_rate": 1.9956368175280522e-05, + "loss": 1.48, + "step": 9789 + }, + { + "epoch": 0.12, + "grad_norm": 8.33540634299007, + "learning_rate": 1.9956313646850178e-05, + "loss": 1.7277, + "step": 9792 + }, + { + "epoch": 0.12, + "grad_norm": 15.04593529986144, + "learning_rate": 1.995625908444252e-05, + "loss": 1.6075, + "step": 9795 + }, + { + "epoch": 0.12, + "grad_norm": 10.792080394085273, + "learning_rate": 1.9956204488057736e-05, + "loss": 1.3428, + "step": 9798 + }, + { + "epoch": 0.12, + "grad_norm": 6.142070111875577, + "learning_rate": 1.9956149857696016e-05, + "loss": 1.8829, + "step": 9801 + }, + { + "epoch": 0.12, + "grad_norm": 9.344224789690424, + "learning_rate": 1.9956095193357537e-05, + "loss": 1.4499, + "step": 9804 + }, + { + "epoch": 0.12, + "grad_norm": 9.491823548571196, + "learning_rate": 1.9956040495042493e-05, + "loss": 1.592, + "step": 9807 + }, + { + "epoch": 0.12, + "grad_norm": 16.776145659397244, + "learning_rate": 1.9955985762751067e-05, + "loss": 1.9027, + "step": 9810 + }, + { + "epoch": 0.12, + "grad_norm": 20.271216730048955, + "learning_rate": 1.9955930996483453e-05, + "loss": 1.5277, + "step": 9813 + }, + { + "epoch": 0.12, + "grad_norm": 5.293523361455007, + "learning_rate": 1.995587619623983e-05, + "loss": 1.5505, + "step": 9816 + }, + { + "epoch": 0.12, + "grad_norm": 19.79024175597945, + "learning_rate": 1.9955821362020388e-05, + "loss": 1.805, + "step": 9819 + }, + { + "epoch": 0.12, + "grad_norm": 9.506043892248643, + "learning_rate": 1.9955766493825314e-05, + "loss": 1.5366, + "step": 9822 + }, + { + "epoch": 0.12, + "grad_norm": 36.07772605809804, + "learning_rate": 1.9955711591654793e-05, + "loss": 1.453, + "step": 9825 + }, + { + "epoch": 0.12, + "grad_norm": 11.402469012970624, + "learning_rate": 1.9955656655509015e-05, + "loss": 1.7943, + "step": 9828 + }, + { + "epoch": 0.12, + "grad_norm": 30.872680613253266, + "learning_rate": 1.9955601685388166e-05, + "loss": 1.6423, + "step": 9831 + }, + { + "epoch": 0.12, + "grad_norm": 7.367637784657758, + "learning_rate": 1.9955546681292436e-05, + "loss": 1.8918, + "step": 9834 + }, + { + "epoch": 0.12, + "grad_norm": 17.263792110646634, + "learning_rate": 1.995549164322201e-05, + "loss": 1.8993, + "step": 9837 + }, + { + "epoch": 0.12, + "grad_norm": 4.412726235384188, + "learning_rate": 1.9955436571177082e-05, + "loss": 1.6956, + "step": 9840 + }, + { + "epoch": 0.12, + "grad_norm": 8.09771835072926, + "learning_rate": 1.995538146515783e-05, + "loss": 1.6577, + "step": 9843 + }, + { + "epoch": 0.12, + "grad_norm": 6.5598712548165, + "learning_rate": 1.9955326325164445e-05, + "loss": 1.6938, + "step": 9846 + }, + { + "epoch": 0.12, + "grad_norm": 19.24187558908074, + "learning_rate": 1.9955271151197122e-05, + "loss": 1.4994, + "step": 9849 + }, + { + "epoch": 0.12, + "grad_norm": 8.193596662311927, + "learning_rate": 1.9955215943256042e-05, + "loss": 1.6703, + "step": 9852 + }, + { + "epoch": 0.12, + "grad_norm": 15.907367703329816, + "learning_rate": 1.9955160701341395e-05, + "loss": 1.8926, + "step": 9855 + }, + { + "epoch": 0.12, + "grad_norm": 50.97135400017947, + "learning_rate": 1.9955105425453372e-05, + "loss": 1.6513, + "step": 9858 + }, + { + "epoch": 0.12, + "grad_norm": 19.822482621393867, + "learning_rate": 1.9955050115592162e-05, + "loss": 1.8032, + "step": 9861 + }, + { + "epoch": 0.12, + "grad_norm": 7.430033267186329, + "learning_rate": 1.9954994771757947e-05, + "loss": 1.9751, + "step": 9864 + }, + { + "epoch": 0.12, + "grad_norm": 42.84091206304128, + "learning_rate": 1.9954939393950922e-05, + "loss": 1.6772, + "step": 9867 + }, + { + "epoch": 0.12, + "grad_norm": 9.670964886544885, + "learning_rate": 1.995488398217128e-05, + "loss": 1.5915, + "step": 9870 + }, + { + "epoch": 0.12, + "grad_norm": 21.54122615034862, + "learning_rate": 1.99548285364192e-05, + "loss": 1.7159, + "step": 9873 + }, + { + "epoch": 0.12, + "grad_norm": 41.99315491686136, + "learning_rate": 1.9954773056694873e-05, + "loss": 1.8561, + "step": 9876 + }, + { + "epoch": 0.12, + "grad_norm": 15.877464913256528, + "learning_rate": 1.9954717542998498e-05, + "loss": 1.8484, + "step": 9879 + }, + { + "epoch": 0.12, + "grad_norm": 18.657024453917007, + "learning_rate": 1.9954661995330253e-05, + "loss": 1.5394, + "step": 9882 + }, + { + "epoch": 0.12, + "grad_norm": 32.642887651416466, + "learning_rate": 1.9954606413690332e-05, + "loss": 1.5126, + "step": 9885 + }, + { + "epoch": 0.12, + "grad_norm": 3.9680795553018466, + "learning_rate": 1.9954550798078926e-05, + "loss": 1.5076, + "step": 9888 + }, + { + "epoch": 0.12, + "grad_norm": 33.33180912677587, + "learning_rate": 1.9954495148496226e-05, + "loss": 1.4642, + "step": 9891 + }, + { + "epoch": 0.12, + "grad_norm": 26.289502459763373, + "learning_rate": 1.9954439464942416e-05, + "loss": 1.7388, + "step": 9894 + }, + { + "epoch": 0.12, + "grad_norm": 7.210229080684261, + "learning_rate": 1.9954383747417692e-05, + "loss": 2.0882, + "step": 9897 + }, + { + "epoch": 0.12, + "grad_norm": 96.26612271835292, + "learning_rate": 1.9954327995922244e-05, + "loss": 1.9589, + "step": 9900 + }, + { + "epoch": 0.12, + "grad_norm": 7.533626538692885, + "learning_rate": 1.9954272210456252e-05, + "loss": 1.2989, + "step": 9903 + }, + { + "epoch": 0.12, + "grad_norm": 28.42853219701673, + "learning_rate": 1.9954216391019922e-05, + "loss": 1.4358, + "step": 9906 + }, + { + "epoch": 0.12, + "grad_norm": 11.54651632457019, + "learning_rate": 1.9954160537613435e-05, + "loss": 1.9286, + "step": 9909 + }, + { + "epoch": 0.12, + "grad_norm": 4.831496664502071, + "learning_rate": 1.9954104650236985e-05, + "loss": 1.5453, + "step": 9912 + }, + { + "epoch": 0.12, + "grad_norm": 35.336333630784324, + "learning_rate": 1.9954048728890757e-05, + "loss": 1.6961, + "step": 9915 + }, + { + "epoch": 0.12, + "grad_norm": 18.745143941056778, + "learning_rate": 1.9953992773574947e-05, + "loss": 1.8279, + "step": 9918 + }, + { + "epoch": 0.12, + "grad_norm": 5.8869413284329655, + "learning_rate": 1.9953936784289748e-05, + "loss": 1.4547, + "step": 9921 + }, + { + "epoch": 0.12, + "grad_norm": 7.252232519950237, + "learning_rate": 1.995388076103535e-05, + "loss": 1.5955, + "step": 9924 + }, + { + "epoch": 0.12, + "grad_norm": 6.298918323772926, + "learning_rate": 1.995382470381194e-05, + "loss": 1.6416, + "step": 9927 + }, + { + "epoch": 0.12, + "grad_norm": 7.610752108740213, + "learning_rate": 1.9953768612619707e-05, + "loss": 1.6751, + "step": 9930 + }, + { + "epoch": 0.12, + "grad_norm": 20.976208073223557, + "learning_rate": 1.9953712487458853e-05, + "loss": 1.9453, + "step": 9933 + }, + { + "epoch": 0.12, + "grad_norm": 3.8656805986056866, + "learning_rate": 1.9953656328329562e-05, + "loss": 1.8327, + "step": 9936 + }, + { + "epoch": 0.12, + "grad_norm": 13.721942817376574, + "learning_rate": 1.9953600135232027e-05, + "loss": 1.8726, + "step": 9939 + }, + { + "epoch": 0.12, + "grad_norm": 24.443180790405577, + "learning_rate": 1.9953543908166438e-05, + "loss": 1.6623, + "step": 9942 + }, + { + "epoch": 0.12, + "grad_norm": 19.646873760332216, + "learning_rate": 1.995348764713299e-05, + "loss": 1.8595, + "step": 9945 + }, + { + "epoch": 0.12, + "grad_norm": 29.111023832481287, + "learning_rate": 1.9953431352131877e-05, + "loss": 1.6584, + "step": 9948 + }, + { + "epoch": 0.12, + "grad_norm": 12.512188185633118, + "learning_rate": 1.9953375023163284e-05, + "loss": 1.526, + "step": 9951 + }, + { + "epoch": 0.12, + "grad_norm": 10.582102612655325, + "learning_rate": 1.995331866022741e-05, + "loss": 1.6884, + "step": 9954 + }, + { + "epoch": 0.12, + "grad_norm": 5.806302472888449, + "learning_rate": 1.9953262263324444e-05, + "loss": 1.8285, + "step": 9957 + }, + { + "epoch": 0.12, + "grad_norm": 3.9599206325659657, + "learning_rate": 1.995320583245458e-05, + "loss": 1.725, + "step": 9960 + }, + { + "epoch": 0.12, + "grad_norm": 19.389312634610388, + "learning_rate": 1.9953149367618006e-05, + "loss": 1.8457, + "step": 9963 + }, + { + "epoch": 0.12, + "grad_norm": 6.142604235277597, + "learning_rate": 1.995309286881492e-05, + "loss": 1.6175, + "step": 9966 + }, + { + "epoch": 0.12, + "grad_norm": 29.862230316555788, + "learning_rate": 1.9953036336045514e-05, + "loss": 1.3766, + "step": 9969 + }, + { + "epoch": 0.12, + "grad_norm": 27.041100872449764, + "learning_rate": 1.995297976930998e-05, + "loss": 1.7065, + "step": 9972 + }, + { + "epoch": 0.12, + "grad_norm": 15.346900508614672, + "learning_rate": 1.995292316860851e-05, + "loss": 1.256, + "step": 9975 + }, + { + "epoch": 0.12, + "grad_norm": 25.0501720327324, + "learning_rate": 1.9952866533941296e-05, + "loss": 1.2157, + "step": 9978 + }, + { + "epoch": 0.12, + "grad_norm": 7.802156155099694, + "learning_rate": 1.9952809865308538e-05, + "loss": 1.3271, + "step": 9981 + }, + { + "epoch": 0.12, + "grad_norm": 32.05424084508932, + "learning_rate": 1.9952753162710423e-05, + "loss": 1.8818, + "step": 9984 + }, + { + "epoch": 0.12, + "grad_norm": 9.178364637002877, + "learning_rate": 1.995269642614715e-05, + "loss": 1.7985, + "step": 9987 + }, + { + "epoch": 0.12, + "grad_norm": 10.97069430036355, + "learning_rate": 1.9952639655618903e-05, + "loss": 1.6204, + "step": 9990 + }, + { + "epoch": 0.12, + "grad_norm": 8.424547387161951, + "learning_rate": 1.9952582851125885e-05, + "loss": 2.0464, + "step": 9993 + }, + { + "epoch": 0.12, + "grad_norm": 56.25574127440557, + "learning_rate": 1.9952526012668286e-05, + "loss": 1.5297, + "step": 9996 + }, + { + "epoch": 0.12, + "grad_norm": 20.393650087997383, + "learning_rate": 1.9952469140246302e-05, + "loss": 1.5033, + "step": 9999 + }, + { + "epoch": 0.12, + "grad_norm": 16.68882986059172, + "learning_rate": 1.9952412233860127e-05, + "loss": 1.6764, + "step": 10002 + }, + { + "epoch": 0.12, + "grad_norm": 16.12154015253586, + "learning_rate": 1.995235529350995e-05, + "loss": 1.6532, + "step": 10005 + }, + { + "epoch": 0.12, + "grad_norm": 31.807136866688356, + "learning_rate": 1.995229831919597e-05, + "loss": 1.6145, + "step": 10008 + }, + { + "epoch": 0.12, + "grad_norm": 10.782341428411298, + "learning_rate": 1.995224131091838e-05, + "loss": 1.9847, + "step": 10011 + }, + { + "epoch": 0.12, + "grad_norm": 20.209786388589723, + "learning_rate": 1.995218426867738e-05, + "loss": 1.4622, + "step": 10014 + }, + { + "epoch": 0.12, + "grad_norm": 11.093581339989846, + "learning_rate": 1.9952127192473152e-05, + "loss": 1.5481, + "step": 10017 + }, + { + "epoch": 0.12, + "grad_norm": 12.356340620495741, + "learning_rate": 1.9952070082305903e-05, + "loss": 1.6707, + "step": 10020 + }, + { + "epoch": 0.12, + "grad_norm": 42.666096505763235, + "learning_rate": 1.9952012938175824e-05, + "loss": 1.9491, + "step": 10023 + }, + { + "epoch": 0.12, + "grad_norm": 36.42606437040596, + "learning_rate": 1.995195576008311e-05, + "loss": 1.7601, + "step": 10026 + }, + { + "epoch": 0.12, + "grad_norm": 29.543560730870347, + "learning_rate": 1.995189854802795e-05, + "loss": 1.7583, + "step": 10029 + }, + { + "epoch": 0.12, + "grad_norm": 24.302020322680796, + "learning_rate": 1.9951841302010556e-05, + "loss": 1.8612, + "step": 10032 + }, + { + "epoch": 0.12, + "grad_norm": 53.479646330970155, + "learning_rate": 1.99517840220311e-05, + "loss": 1.8127, + "step": 10035 + }, + { + "epoch": 0.12, + "grad_norm": 20.438924097574983, + "learning_rate": 1.9951726708089798e-05, + "loss": 1.3526, + "step": 10038 + }, + { + "epoch": 0.12, + "grad_norm": 42.29328144855957, + "learning_rate": 1.9951669360186835e-05, + "loss": 1.814, + "step": 10041 + }, + { + "epoch": 0.12, + "grad_norm": 26.60753056031938, + "learning_rate": 1.995161197832241e-05, + "loss": 2.291, + "step": 10044 + }, + { + "epoch": 0.12, + "grad_norm": 70.45839344700688, + "learning_rate": 1.9951554562496716e-05, + "loss": 1.7728, + "step": 10047 + }, + { + "epoch": 0.12, + "grad_norm": 26.032822635973982, + "learning_rate": 1.9951497112709953e-05, + "loss": 1.8287, + "step": 10050 + }, + { + "epoch": 0.12, + "grad_norm": 8.548763469009321, + "learning_rate": 1.9951439628962316e-05, + "loss": 1.6854, + "step": 10053 + }, + { + "epoch": 0.12, + "grad_norm": 15.53037141500191, + "learning_rate": 1.9951382111253994e-05, + "loss": 1.6961, + "step": 10056 + }, + { + "epoch": 0.12, + "grad_norm": 14.859983983378626, + "learning_rate": 1.9951324559585194e-05, + "loss": 1.571, + "step": 10059 + }, + { + "epoch": 0.12, + "grad_norm": 21.157278083813534, + "learning_rate": 1.9951266973956108e-05, + "loss": 1.5463, + "step": 10062 + }, + { + "epoch": 0.12, + "grad_norm": 21.546212911577815, + "learning_rate": 1.995120935436693e-05, + "loss": 1.988, + "step": 10065 + }, + { + "epoch": 0.12, + "grad_norm": 48.40410448489795, + "learning_rate": 1.9951151700817858e-05, + "loss": 1.7869, + "step": 10068 + }, + { + "epoch": 0.12, + "grad_norm": 22.292926592319436, + "learning_rate": 1.9951094013309093e-05, + "loss": 1.8034, + "step": 10071 + }, + { + "epoch": 0.12, + "grad_norm": 34.24985806867056, + "learning_rate": 1.9951036291840826e-05, + "loss": 1.5255, + "step": 10074 + }, + { + "epoch": 0.12, + "grad_norm": 16.082550951328884, + "learning_rate": 1.9950978536413256e-05, + "loss": 1.8244, + "step": 10077 + }, + { + "epoch": 0.12, + "grad_norm": 34.070635801814404, + "learning_rate": 1.9950920747026583e-05, + "loss": 1.7171, + "step": 10080 + }, + { + "epoch": 0.12, + "grad_norm": 8.728004875078547, + "learning_rate": 1.9950862923681e-05, + "loss": 1.8149, + "step": 10083 + }, + { + "epoch": 0.12, + "grad_norm": 6.29941409937064, + "learning_rate": 1.995080506637671e-05, + "loss": 1.4124, + "step": 10086 + }, + { + "epoch": 0.12, + "grad_norm": 12.302889862767069, + "learning_rate": 1.99507471751139e-05, + "loss": 2.0237, + "step": 10089 + }, + { + "epoch": 0.12, + "grad_norm": 6.301736530136007, + "learning_rate": 1.995068924989278e-05, + "loss": 1.399, + "step": 10092 + }, + { + "epoch": 0.12, + "grad_norm": 12.530884373700065, + "learning_rate": 1.995063129071354e-05, + "loss": 1.3817, + "step": 10095 + }, + { + "epoch": 0.12, + "grad_norm": 11.890022186317895, + "learning_rate": 1.9950573297576377e-05, + "loss": 1.2992, + "step": 10098 + }, + { + "epoch": 0.12, + "grad_norm": 14.51439742681732, + "learning_rate": 1.9950515270481495e-05, + "loss": 1.6112, + "step": 10101 + }, + { + "epoch": 0.12, + "grad_norm": 16.333438499860367, + "learning_rate": 1.9950457209429086e-05, + "loss": 1.7671, + "step": 10104 + }, + { + "epoch": 0.12, + "grad_norm": 47.56103646898862, + "learning_rate": 1.9950399114419354e-05, + "loss": 1.8746, + "step": 10107 + }, + { + "epoch": 0.12, + "grad_norm": 10.357132102051692, + "learning_rate": 1.995034098545249e-05, + "loss": 1.5741, + "step": 10110 + }, + { + "epoch": 0.12, + "grad_norm": 17.41603740371455, + "learning_rate": 1.9950282822528696e-05, + "loss": 2.0163, + "step": 10113 + }, + { + "epoch": 0.12, + "grad_norm": 5.925023360904638, + "learning_rate": 1.9950224625648174e-05, + "loss": 1.7778, + "step": 10116 + }, + { + "epoch": 0.12, + "grad_norm": 10.432855805030101, + "learning_rate": 1.9950166394811118e-05, + "loss": 1.3945, + "step": 10119 + }, + { + "epoch": 0.12, + "grad_norm": 26.39808228633396, + "learning_rate": 1.995010813001773e-05, + "loss": 1.4892, + "step": 10122 + }, + { + "epoch": 0.12, + "grad_norm": 4.492585883295506, + "learning_rate": 1.9950049831268205e-05, + "loss": 1.5088, + "step": 10125 + }, + { + "epoch": 0.12, + "grad_norm": 7.6215686497067505, + "learning_rate": 1.994999149856274e-05, + "loss": 1.5471, + "step": 10128 + }, + { + "epoch": 0.12, + "grad_norm": 15.679582294286904, + "learning_rate": 1.9949933131901543e-05, + "loss": 1.8627, + "step": 10131 + }, + { + "epoch": 0.12, + "grad_norm": 8.040421139442007, + "learning_rate": 1.994987473128481e-05, + "loss": 2.3402, + "step": 10134 + }, + { + "epoch": 0.12, + "grad_norm": 10.346098238111795, + "learning_rate": 1.9949816296712735e-05, + "loss": 1.4769, + "step": 10137 + }, + { + "epoch": 0.12, + "grad_norm": 11.147911361387726, + "learning_rate": 1.9949757828185525e-05, + "loss": 1.3246, + "step": 10140 + }, + { + "epoch": 0.12, + "grad_norm": 8.49730994545292, + "learning_rate": 1.994969932570337e-05, + "loss": 1.7262, + "step": 10143 + }, + { + "epoch": 0.12, + "grad_norm": 22.748434333876993, + "learning_rate": 1.9949640789266475e-05, + "loss": 1.8919, + "step": 10146 + }, + { + "epoch": 0.12, + "grad_norm": 5.901636197604819, + "learning_rate": 1.9949582218875045e-05, + "loss": 1.5741, + "step": 10149 + }, + { + "epoch": 0.12, + "grad_norm": 19.18355102363374, + "learning_rate": 1.994952361452927e-05, + "loss": 1.756, + "step": 10152 + }, + { + "epoch": 0.12, + "grad_norm": 111.00189424309309, + "learning_rate": 1.9949464976229356e-05, + "loss": 1.6737, + "step": 10155 + }, + { + "epoch": 0.12, + "grad_norm": 8.443108381136025, + "learning_rate": 1.9949406303975505e-05, + "loss": 1.5461, + "step": 10158 + }, + { + "epoch": 0.12, + "grad_norm": 38.26353779294558, + "learning_rate": 1.994934759776791e-05, + "loss": 1.7397, + "step": 10161 + }, + { + "epoch": 0.12, + "grad_norm": 4.1624954420668345, + "learning_rate": 1.9949288857606777e-05, + "loss": 1.8615, + "step": 10164 + }, + { + "epoch": 0.12, + "grad_norm": 6.143518110665068, + "learning_rate": 1.9949230083492303e-05, + "loss": 1.4672, + "step": 10167 + }, + { + "epoch": 0.12, + "grad_norm": 39.91881896933213, + "learning_rate": 1.9949171275424692e-05, + "loss": 1.9362, + "step": 10170 + }, + { + "epoch": 0.12, + "grad_norm": 12.590905073212848, + "learning_rate": 1.9949112433404145e-05, + "loss": 1.4927, + "step": 10173 + }, + { + "epoch": 0.12, + "grad_norm": 23.98408116657948, + "learning_rate": 1.994905355743086e-05, + "loss": 1.6202, + "step": 10176 + }, + { + "epoch": 0.12, + "grad_norm": 81.4246088762314, + "learning_rate": 1.9948994647505035e-05, + "loss": 1.6048, + "step": 10179 + }, + { + "epoch": 0.12, + "grad_norm": 21.4940388477793, + "learning_rate": 1.9948935703626878e-05, + "loss": 1.7104, + "step": 10182 + }, + { + "epoch": 0.12, + "grad_norm": 26.060903389809088, + "learning_rate": 1.9948876725796585e-05, + "loss": 1.5446, + "step": 10185 + }, + { + "epoch": 0.12, + "grad_norm": 28.224714878991445, + "learning_rate": 1.9948817714014362e-05, + "loss": 1.6069, + "step": 10188 + }, + { + "epoch": 0.12, + "grad_norm": 48.157913163847994, + "learning_rate": 1.994875866828041e-05, + "loss": 1.5859, + "step": 10191 + }, + { + "epoch": 0.12, + "grad_norm": 9.389001633481053, + "learning_rate": 1.9948699588594923e-05, + "loss": 1.6818, + "step": 10194 + }, + { + "epoch": 0.12, + "grad_norm": 25.15093017988097, + "learning_rate": 1.994864047495811e-05, + "loss": 1.8872, + "step": 10197 + }, + { + "epoch": 0.12, + "grad_norm": 4.763081402871266, + "learning_rate": 1.994858132737017e-05, + "loss": 1.8547, + "step": 10200 + }, + { + "epoch": 0.12, + "grad_norm": 15.767392705465488, + "learning_rate": 1.9948522145831305e-05, + "loss": 1.6717, + "step": 10203 + }, + { + "epoch": 0.12, + "grad_norm": 16.109898708735837, + "learning_rate": 1.9948462930341717e-05, + "loss": 1.7511, + "step": 10206 + }, + { + "epoch": 0.12, + "grad_norm": 8.78892663893491, + "learning_rate": 1.9948403680901612e-05, + "loss": 1.5924, + "step": 10209 + }, + { + "epoch": 0.12, + "grad_norm": 33.578789346698954, + "learning_rate": 1.9948344397511187e-05, + "loss": 1.6953, + "step": 10212 + }, + { + "epoch": 0.12, + "grad_norm": 24.059869030432026, + "learning_rate": 1.9948285080170644e-05, + "loss": 1.5762, + "step": 10215 + }, + { + "epoch": 0.12, + "grad_norm": 10.510140169578305, + "learning_rate": 1.9948225728880188e-05, + "loss": 1.4587, + "step": 10218 + }, + { + "epoch": 0.12, + "grad_norm": 80.44630548575275, + "learning_rate": 1.994816634364002e-05, + "loss": 1.5323, + "step": 10221 + }, + { + "epoch": 0.12, + "grad_norm": 16.603620860876212, + "learning_rate": 1.9948106924450345e-05, + "loss": 1.6258, + "step": 10224 + }, + { + "epoch": 0.12, + "grad_norm": 17.14650484926002, + "learning_rate": 1.9948047471311366e-05, + "loss": 1.69, + "step": 10227 + }, + { + "epoch": 0.12, + "grad_norm": 13.745648837333537, + "learning_rate": 1.9947987984223283e-05, + "loss": 1.5733, + "step": 10230 + }, + { + "epoch": 0.12, + "grad_norm": 39.76538574534221, + "learning_rate": 1.99479284631863e-05, + "loss": 1.876, + "step": 10233 + }, + { + "epoch": 0.12, + "grad_norm": 16.138946999794623, + "learning_rate": 1.9947868908200617e-05, + "loss": 1.7093, + "step": 10236 + }, + { + "epoch": 0.12, + "grad_norm": 2.8666192456489035, + "learning_rate": 1.9947809319266444e-05, + "loss": 1.6951, + "step": 10239 + }, + { + "epoch": 0.12, + "grad_norm": 21.99298052537085, + "learning_rate": 1.9947749696383983e-05, + "loss": 1.6056, + "step": 10242 + }, + { + "epoch": 0.12, + "grad_norm": 17.93135544420628, + "learning_rate": 1.9947690039553435e-05, + "loss": 1.5648, + "step": 10245 + }, + { + "epoch": 0.12, + "grad_norm": 25.687501140919707, + "learning_rate": 1.9947630348775e-05, + "loss": 1.4103, + "step": 10248 + }, + { + "epoch": 0.12, + "grad_norm": 46.001676070296746, + "learning_rate": 1.9947570624048888e-05, + "loss": 1.8448, + "step": 10251 + }, + { + "epoch": 0.12, + "grad_norm": 29.89626714258603, + "learning_rate": 1.9947510865375302e-05, + "loss": 1.7646, + "step": 10254 + }, + { + "epoch": 0.12, + "grad_norm": 10.426703716013366, + "learning_rate": 1.9947451072754442e-05, + "loss": 1.7772, + "step": 10257 + }, + { + "epoch": 0.12, + "grad_norm": 7.44261116986535, + "learning_rate": 1.9947391246186515e-05, + "loss": 1.7131, + "step": 10260 + }, + { + "epoch": 0.12, + "grad_norm": 38.25072629833713, + "learning_rate": 1.9947331385671725e-05, + "loss": 1.8792, + "step": 10263 + }, + { + "epoch": 0.12, + "grad_norm": 12.367921881099827, + "learning_rate": 1.9947271491210278e-05, + "loss": 1.6982, + "step": 10266 + }, + { + "epoch": 0.12, + "grad_norm": 2.957899165035778, + "learning_rate": 1.9947211562802376e-05, + "loss": 1.582, + "step": 10269 + }, + { + "epoch": 0.12, + "grad_norm": 45.74895154758077, + "learning_rate": 1.9947151600448223e-05, + "loss": 1.6715, + "step": 10272 + }, + { + "epoch": 0.12, + "grad_norm": 9.227369702625422, + "learning_rate": 1.9947091604148025e-05, + "loss": 1.614, + "step": 10275 + }, + { + "epoch": 0.12, + "grad_norm": 7.468345664174697, + "learning_rate": 1.994703157390198e-05, + "loss": 1.6965, + "step": 10278 + }, + { + "epoch": 0.12, + "grad_norm": 18.754833736817766, + "learning_rate": 1.9946971509710307e-05, + "loss": 1.5575, + "step": 10281 + }, + { + "epoch": 0.12, + "grad_norm": 7.43228102647347, + "learning_rate": 1.99469114115732e-05, + "loss": 1.649, + "step": 10284 + }, + { + "epoch": 0.12, + "grad_norm": 4.729043647208577, + "learning_rate": 1.9946851279490867e-05, + "loss": 1.634, + "step": 10287 + }, + { + "epoch": 0.12, + "grad_norm": 6.633658887659257, + "learning_rate": 1.9946791113463515e-05, + "loss": 1.3725, + "step": 10290 + }, + { + "epoch": 0.12, + "grad_norm": 2.4429380046555296, + "learning_rate": 1.9946730913491347e-05, + "loss": 1.6843, + "step": 10293 + }, + { + "epoch": 0.12, + "grad_norm": 35.380408298558024, + "learning_rate": 1.994667067957457e-05, + "loss": 1.5559, + "step": 10296 + }, + { + "epoch": 0.12, + "grad_norm": 17.505368631619323, + "learning_rate": 1.994661041171339e-05, + "loss": 1.6042, + "step": 10299 + }, + { + "epoch": 0.12, + "grad_norm": 25.917882152414755, + "learning_rate": 1.9946550109908007e-05, + "loss": 1.4852, + "step": 10302 + }, + { + "epoch": 0.12, + "grad_norm": 6.166267945441121, + "learning_rate": 1.9946489774158636e-05, + "loss": 1.942, + "step": 10305 + }, + { + "epoch": 0.12, + "grad_norm": 14.068365785129608, + "learning_rate": 1.994642940446547e-05, + "loss": 1.5044, + "step": 10308 + }, + { + "epoch": 0.12, + "grad_norm": 38.20236713773118, + "learning_rate": 1.9946369000828735e-05, + "loss": 1.8668, + "step": 10311 + }, + { + "epoch": 0.12, + "grad_norm": 19.17230343325425, + "learning_rate": 1.9946308563248617e-05, + "loss": 1.6065, + "step": 10314 + }, + { + "epoch": 0.12, + "grad_norm": 35.40658035873724, + "learning_rate": 1.9946248091725334e-05, + "loss": 1.6536, + "step": 10317 + }, + { + "epoch": 0.12, + "grad_norm": 10.875111594481975, + "learning_rate": 1.9946187586259086e-05, + "loss": 1.5253, + "step": 10320 + }, + { + "epoch": 0.12, + "grad_norm": 19.878370079931123, + "learning_rate": 1.9946127046850083e-05, + "loss": 1.8577, + "step": 10323 + }, + { + "epoch": 0.12, + "grad_norm": 4.226400220170914, + "learning_rate": 1.994606647349853e-05, + "loss": 1.5873, + "step": 10326 + }, + { + "epoch": 0.12, + "grad_norm": 4.196911852273276, + "learning_rate": 1.9946005866204642e-05, + "loss": 1.7931, + "step": 10329 + }, + { + "epoch": 0.12, + "grad_norm": 5.655650667381275, + "learning_rate": 1.994594522496861e-05, + "loss": 1.6938, + "step": 10332 + }, + { + "epoch": 0.12, + "grad_norm": 10.486455265297659, + "learning_rate": 1.9945884549790656e-05, + "loss": 1.3288, + "step": 10335 + }, + { + "epoch": 0.12, + "grad_norm": 6.680478133790802, + "learning_rate": 1.9945823840670975e-05, + "loss": 1.7668, + "step": 10338 + }, + { + "epoch": 0.12, + "grad_norm": 22.942542151665194, + "learning_rate": 1.9945763097609783e-05, + "loss": 1.425, + "step": 10341 + }, + { + "epoch": 0.12, + "grad_norm": 30.384635029557444, + "learning_rate": 1.994570232060728e-05, + "loss": 1.6548, + "step": 10344 + }, + { + "epoch": 0.12, + "grad_norm": 30.25239713327514, + "learning_rate": 1.994564150966368e-05, + "loss": 1.4172, + "step": 10347 + }, + { + "epoch": 0.12, + "grad_norm": 21.37620126661585, + "learning_rate": 1.994558066477919e-05, + "loss": 1.535, + "step": 10350 + }, + { + "epoch": 0.12, + "grad_norm": 15.678759199165938, + "learning_rate": 1.9945519785954014e-05, + "loss": 2.0067, + "step": 10353 + }, + { + "epoch": 0.12, + "grad_norm": 21.391750659520802, + "learning_rate": 1.9945458873188358e-05, + "loss": 1.5006, + "step": 10356 + }, + { + "epoch": 0.12, + "grad_norm": 208.647907367626, + "learning_rate": 1.9945397926482438e-05, + "loss": 1.8651, + "step": 10359 + }, + { + "epoch": 0.12, + "grad_norm": 77.00350206541562, + "learning_rate": 1.9945336945836456e-05, + "loss": 1.675, + "step": 10362 + }, + { + "epoch": 0.12, + "grad_norm": 35.68792538121769, + "learning_rate": 1.994527593125062e-05, + "loss": 1.8695, + "step": 10365 + }, + { + "epoch": 0.12, + "grad_norm": 19.040552147522057, + "learning_rate": 1.994521488272514e-05, + "loss": 1.3624, + "step": 10368 + }, + { + "epoch": 0.12, + "grad_norm": 10.071148559819544, + "learning_rate": 1.9945153800260223e-05, + "loss": 1.6464, + "step": 10371 + }, + { + "epoch": 0.12, + "grad_norm": 55.69128285110937, + "learning_rate": 1.994509268385608e-05, + "loss": 1.949, + "step": 10374 + }, + { + "epoch": 0.12, + "grad_norm": 17.435848302757297, + "learning_rate": 1.994503153351292e-05, + "loss": 1.738, + "step": 10377 + }, + { + "epoch": 0.12, + "grad_norm": 28.032711680011428, + "learning_rate": 1.9944970349230946e-05, + "loss": 1.5235, + "step": 10380 + }, + { + "epoch": 0.12, + "grad_norm": 8.881641196898322, + "learning_rate": 1.994490913101037e-05, + "loss": 1.6581, + "step": 10383 + }, + { + "epoch": 0.12, + "grad_norm": 67.87812237601874, + "learning_rate": 1.9944847878851402e-05, + "loss": 1.6534, + "step": 10386 + }, + { + "epoch": 0.12, + "grad_norm": 23.464825309090294, + "learning_rate": 1.994478659275425e-05, + "loss": 1.6707, + "step": 10389 + }, + { + "epoch": 0.12, + "grad_norm": 8.860820025708236, + "learning_rate": 1.9944725272719124e-05, + "loss": 1.4475, + "step": 10392 + }, + { + "epoch": 0.12, + "grad_norm": 59.45641449199623, + "learning_rate": 1.9944663918746234e-05, + "loss": 1.9071, + "step": 10395 + }, + { + "epoch": 0.13, + "grad_norm": 7.253281402226162, + "learning_rate": 1.994460253083579e-05, + "loss": 1.716, + "step": 10398 + }, + { + "epoch": 0.13, + "grad_norm": 31.41753476636825, + "learning_rate": 1.9944541108987994e-05, + "loss": 1.9026, + "step": 10401 + }, + { + "epoch": 0.13, + "grad_norm": 7.376704567380936, + "learning_rate": 1.9944479653203065e-05, + "loss": 1.4702, + "step": 10404 + }, + { + "epoch": 0.13, + "grad_norm": 23.79496470614267, + "learning_rate": 1.9944418163481205e-05, + "loss": 1.278, + "step": 10407 + }, + { + "epoch": 0.13, + "grad_norm": 22.23352054855352, + "learning_rate": 1.994435663982263e-05, + "loss": 1.8027, + "step": 10410 + }, + { + "epoch": 0.13, + "grad_norm": 24.92719493217814, + "learning_rate": 1.994429508222755e-05, + "loss": 2.237, + "step": 10413 + }, + { + "epoch": 0.13, + "grad_norm": 6.2969443495617625, + "learning_rate": 1.9944233490696173e-05, + "loss": 1.9165, + "step": 10416 + }, + { + "epoch": 0.13, + "grad_norm": 21.9819947486371, + "learning_rate": 1.9944171865228704e-05, + "loss": 1.4399, + "step": 10419 + }, + { + "epoch": 0.13, + "grad_norm": 10.73124328182455, + "learning_rate": 1.9944110205825362e-05, + "loss": 1.499, + "step": 10422 + }, + { + "epoch": 0.13, + "grad_norm": 30.696594287841958, + "learning_rate": 1.9944048512486353e-05, + "loss": 1.4536, + "step": 10425 + }, + { + "epoch": 0.13, + "grad_norm": 21.8761950287331, + "learning_rate": 1.994398678521189e-05, + "loss": 1.6006, + "step": 10428 + }, + { + "epoch": 0.13, + "grad_norm": 74.86582530982929, + "learning_rate": 1.994392502400218e-05, + "loss": 1.5649, + "step": 10431 + }, + { + "epoch": 0.13, + "grad_norm": 8.130708373880392, + "learning_rate": 1.9943863228857436e-05, + "loss": 1.5549, + "step": 10434 + }, + { + "epoch": 0.13, + "grad_norm": 21.620188048654047, + "learning_rate": 1.994380139977787e-05, + "loss": 1.9643, + "step": 10437 + }, + { + "epoch": 0.13, + "grad_norm": 39.7056001900286, + "learning_rate": 1.9943739536763688e-05, + "loss": 2.1501, + "step": 10440 + }, + { + "epoch": 0.13, + "grad_norm": 4.510969767720976, + "learning_rate": 1.9943677639815106e-05, + "loss": 1.7043, + "step": 10443 + }, + { + "epoch": 0.13, + "grad_norm": 10.15110742152287, + "learning_rate": 1.9943615708932334e-05, + "loss": 1.5086, + "step": 10446 + }, + { + "epoch": 0.13, + "grad_norm": 6.241912170215238, + "learning_rate": 1.9943553744115586e-05, + "loss": 1.5119, + "step": 10449 + }, + { + "epoch": 0.13, + "grad_norm": 22.84279577360239, + "learning_rate": 1.994349174536507e-05, + "loss": 1.7545, + "step": 10452 + }, + { + "epoch": 0.13, + "grad_norm": 9.22861744513315, + "learning_rate": 1.9943429712680995e-05, + "loss": 1.5289, + "step": 10455 + }, + { + "epoch": 0.13, + "grad_norm": 64.0262884518723, + "learning_rate": 1.9943367646063575e-05, + "loss": 1.5694, + "step": 10458 + }, + { + "epoch": 0.13, + "grad_norm": 45.17420632363122, + "learning_rate": 1.994330554551303e-05, + "loss": 1.7215, + "step": 10461 + }, + { + "epoch": 0.13, + "grad_norm": 11.055875278662235, + "learning_rate": 1.994324341102956e-05, + "loss": 1.6729, + "step": 10464 + }, + { + "epoch": 0.13, + "grad_norm": 10.803285850734449, + "learning_rate": 1.994318124261338e-05, + "loss": 1.4961, + "step": 10467 + }, + { + "epoch": 0.13, + "grad_norm": 17.794518255767823, + "learning_rate": 1.9943119040264708e-05, + "loss": 1.8324, + "step": 10470 + }, + { + "epoch": 0.13, + "grad_norm": 110.26244442137815, + "learning_rate": 1.994305680398375e-05, + "loss": 1.9299, + "step": 10473 + }, + { + "epoch": 0.13, + "grad_norm": 22.647983018963366, + "learning_rate": 1.994299453377072e-05, + "loss": 1.6237, + "step": 10476 + }, + { + "epoch": 0.13, + "grad_norm": 4.42844421257757, + "learning_rate": 1.994293222962583e-05, + "loss": 1.1484, + "step": 10479 + }, + { + "epoch": 0.13, + "grad_norm": 20.907986362097784, + "learning_rate": 1.99428698915493e-05, + "loss": 1.787, + "step": 10482 + }, + { + "epoch": 0.13, + "grad_norm": 22.974548526590155, + "learning_rate": 1.9942807519541333e-05, + "loss": 1.9648, + "step": 10485 + }, + { + "epoch": 0.13, + "grad_norm": 9.406739022331484, + "learning_rate": 1.9942745113602142e-05, + "loss": 2.0487, + "step": 10488 + }, + { + "epoch": 0.13, + "grad_norm": 43.186119242088886, + "learning_rate": 1.9942682673731946e-05, + "loss": 1.6697, + "step": 10491 + }, + { + "epoch": 0.13, + "grad_norm": 9.778753757642322, + "learning_rate": 1.9942620199930956e-05, + "loss": 1.3574, + "step": 10494 + }, + { + "epoch": 0.13, + "grad_norm": 12.539424425760934, + "learning_rate": 1.9942557692199384e-05, + "loss": 1.4209, + "step": 10497 + }, + { + "epoch": 0.13, + "grad_norm": 12.46736961614155, + "learning_rate": 1.994249515053744e-05, + "loss": 1.6747, + "step": 10500 + }, + { + "epoch": 0.13, + "grad_norm": 28.808497300324284, + "learning_rate": 1.9942432574945346e-05, + "loss": 1.5243, + "step": 10503 + }, + { + "epoch": 0.13, + "grad_norm": 6.719971244680147, + "learning_rate": 1.994236996542331e-05, + "loss": 1.5623, + "step": 10506 + }, + { + "epoch": 0.13, + "grad_norm": 28.052624316787234, + "learning_rate": 1.9942307321971548e-05, + "loss": 1.5984, + "step": 10509 + }, + { + "epoch": 0.13, + "grad_norm": 20.442464793137557, + "learning_rate": 1.994224464459027e-05, + "loss": 1.6376, + "step": 10512 + }, + { + "epoch": 0.13, + "grad_norm": 6.9680843399224806, + "learning_rate": 1.9942181933279694e-05, + "loss": 1.6942, + "step": 10515 + }, + { + "epoch": 0.13, + "grad_norm": 105.06426776738552, + "learning_rate": 1.9942119188040032e-05, + "loss": 1.7667, + "step": 10518 + }, + { + "epoch": 0.13, + "grad_norm": 9.147151176739476, + "learning_rate": 1.9942056408871497e-05, + "loss": 1.7187, + "step": 10521 + }, + { + "epoch": 0.13, + "grad_norm": 23.007116336326224, + "learning_rate": 1.9941993595774305e-05, + "loss": 2.0098, + "step": 10524 + }, + { + "epoch": 0.13, + "grad_norm": 34.60148051451297, + "learning_rate": 1.994193074874867e-05, + "loss": 1.8861, + "step": 10527 + }, + { + "epoch": 0.13, + "grad_norm": 4.1134638965909005, + "learning_rate": 1.9941867867794805e-05, + "loss": 1.5049, + "step": 10530 + }, + { + "epoch": 0.13, + "grad_norm": 9.037965911268085, + "learning_rate": 1.994180495291293e-05, + "loss": 1.8312, + "step": 10533 + }, + { + "epoch": 0.13, + "grad_norm": 55.5661978863131, + "learning_rate": 1.9941742004103252e-05, + "loss": 2.1995, + "step": 10536 + }, + { + "epoch": 0.13, + "grad_norm": 23.14748070850753, + "learning_rate": 1.994167902136599e-05, + "loss": 1.5328, + "step": 10539 + }, + { + "epoch": 0.13, + "grad_norm": 14.46177261850816, + "learning_rate": 1.994161600470136e-05, + "loss": 1.7198, + "step": 10542 + }, + { + "epoch": 0.13, + "grad_norm": 6.517709904322044, + "learning_rate": 1.9941552954109575e-05, + "loss": 1.6165, + "step": 10545 + }, + { + "epoch": 0.13, + "grad_norm": 22.94518448929933, + "learning_rate": 1.994148986959085e-05, + "loss": 1.5263, + "step": 10548 + }, + { + "epoch": 0.13, + "grad_norm": 15.653147933880186, + "learning_rate": 1.9941426751145402e-05, + "loss": 1.3605, + "step": 10551 + }, + { + "epoch": 0.13, + "grad_norm": 13.078217441488805, + "learning_rate": 1.9941363598773446e-05, + "loss": 1.8701, + "step": 10554 + }, + { + "epoch": 0.13, + "grad_norm": 25.33923420501146, + "learning_rate": 1.9941300412475194e-05, + "loss": 1.8097, + "step": 10557 + }, + { + "epoch": 0.13, + "grad_norm": 35.77967935915906, + "learning_rate": 1.9941237192250867e-05, + "loss": 1.6114, + "step": 10560 + }, + { + "epoch": 0.13, + "grad_norm": 6.713043449075171, + "learning_rate": 1.994117393810068e-05, + "loss": 1.8252, + "step": 10563 + }, + { + "epoch": 0.13, + "grad_norm": 14.509797074497008, + "learning_rate": 1.9941110650024842e-05, + "loss": 1.6891, + "step": 10566 + }, + { + "epoch": 0.13, + "grad_norm": 5.616038888996209, + "learning_rate": 1.9941047328023578e-05, + "loss": 1.6779, + "step": 10569 + }, + { + "epoch": 0.13, + "grad_norm": 24.873401659680475, + "learning_rate": 1.99409839720971e-05, + "loss": 1.4235, + "step": 10572 + }, + { + "epoch": 0.13, + "grad_norm": 12.020824751073885, + "learning_rate": 1.994092058224562e-05, + "loss": 1.5736, + "step": 10575 + }, + { + "epoch": 0.13, + "grad_norm": 8.887722177915666, + "learning_rate": 1.994085715846936e-05, + "loss": 1.5908, + "step": 10578 + }, + { + "epoch": 0.13, + "grad_norm": 3.6447325296295165, + "learning_rate": 1.994079370076854e-05, + "loss": 1.9686, + "step": 10581 + }, + { + "epoch": 0.13, + "grad_norm": 71.84394676633404, + "learning_rate": 1.9940730209143367e-05, + "loss": 1.8413, + "step": 10584 + }, + { + "epoch": 0.13, + "grad_norm": 14.397413860605818, + "learning_rate": 1.9940666683594068e-05, + "loss": 1.724, + "step": 10587 + }, + { + "epoch": 0.13, + "grad_norm": 29.05784723702183, + "learning_rate": 1.994060312412085e-05, + "loss": 2.1997, + "step": 10590 + }, + { + "epoch": 0.13, + "grad_norm": 15.585068107010798, + "learning_rate": 1.9940539530723936e-05, + "loss": 1.7585, + "step": 10593 + }, + { + "epoch": 0.13, + "grad_norm": 8.13853824385922, + "learning_rate": 1.9940475903403537e-05, + "loss": 1.4246, + "step": 10596 + }, + { + "epoch": 0.13, + "grad_norm": 34.82273495284561, + "learning_rate": 1.9940412242159878e-05, + "loss": 1.4603, + "step": 10599 + }, + { + "epoch": 0.13, + "grad_norm": 67.51185476448045, + "learning_rate": 1.9940348546993173e-05, + "loss": 1.9353, + "step": 10602 + }, + { + "epoch": 0.13, + "grad_norm": 13.568589558541477, + "learning_rate": 1.994028481790364e-05, + "loss": 1.6297, + "step": 10605 + }, + { + "epoch": 0.13, + "grad_norm": 15.838291985222648, + "learning_rate": 1.9940221054891492e-05, + "loss": 1.4551, + "step": 10608 + }, + { + "epoch": 0.13, + "grad_norm": 40.48930639640938, + "learning_rate": 1.994015725795695e-05, + "loss": 1.3048, + "step": 10611 + }, + { + "epoch": 0.13, + "grad_norm": 83.29221953841662, + "learning_rate": 1.9940093427100233e-05, + "loss": 1.5739, + "step": 10614 + }, + { + "epoch": 0.13, + "grad_norm": 7.720807168397892, + "learning_rate": 1.994002956232156e-05, + "loss": 1.6479, + "step": 10617 + }, + { + "epoch": 0.13, + "grad_norm": 15.947935642778594, + "learning_rate": 1.9939965663621142e-05, + "loss": 1.6882, + "step": 10620 + }, + { + "epoch": 0.13, + "grad_norm": 6.80562256665774, + "learning_rate": 1.9939901730999206e-05, + "loss": 1.8101, + "step": 10623 + }, + { + "epoch": 0.13, + "grad_norm": 20.611890637115092, + "learning_rate": 1.993983776445596e-05, + "loss": 1.3221, + "step": 10626 + }, + { + "epoch": 0.13, + "grad_norm": 7.722653616965074, + "learning_rate": 1.993977376399163e-05, + "loss": 1.7349, + "step": 10629 + }, + { + "epoch": 0.13, + "grad_norm": 5.912217483796704, + "learning_rate": 1.9939709729606435e-05, + "loss": 1.2823, + "step": 10632 + }, + { + "epoch": 0.13, + "grad_norm": 5.566298015928469, + "learning_rate": 1.993964566130059e-05, + "loss": 1.8233, + "step": 10635 + }, + { + "epoch": 0.13, + "grad_norm": 8.50760284726393, + "learning_rate": 1.9939581559074315e-05, + "loss": 1.9966, + "step": 10638 + }, + { + "epoch": 0.13, + "grad_norm": 9.85123059175647, + "learning_rate": 1.9939517422927824e-05, + "loss": 1.6372, + "step": 10641 + }, + { + "epoch": 0.13, + "grad_norm": 122.14708628624001, + "learning_rate": 1.9939453252861347e-05, + "loss": 1.8605, + "step": 10644 + }, + { + "epoch": 0.13, + "grad_norm": 92.74125062181139, + "learning_rate": 1.9939389048875093e-05, + "loss": 1.7047, + "step": 10647 + }, + { + "epoch": 0.13, + "grad_norm": 26.26875375139137, + "learning_rate": 1.9939324810969283e-05, + "loss": 1.5981, + "step": 10650 + }, + { + "epoch": 0.13, + "grad_norm": 32.751179541150535, + "learning_rate": 1.993926053914414e-05, + "loss": 1.4097, + "step": 10653 + }, + { + "epoch": 0.13, + "grad_norm": 39.0073339726131, + "learning_rate": 1.993919623339988e-05, + "loss": 1.6818, + "step": 10656 + }, + { + "epoch": 0.13, + "grad_norm": 101.3120538353786, + "learning_rate": 1.9939131893736723e-05, + "loss": 1.3561, + "step": 10659 + }, + { + "epoch": 0.13, + "grad_norm": 215.7174003496055, + "learning_rate": 1.993906752015489e-05, + "loss": 1.7126, + "step": 10662 + }, + { + "epoch": 0.13, + "grad_norm": 3.2984564053104113, + "learning_rate": 1.9939003112654598e-05, + "loss": 1.6296, + "step": 10665 + }, + { + "epoch": 0.13, + "grad_norm": 29.881770840586224, + "learning_rate": 1.9938938671236067e-05, + "loss": 1.4469, + "step": 10668 + }, + { + "epoch": 0.13, + "grad_norm": 46.639029779090755, + "learning_rate": 1.9938874195899522e-05, + "loss": 1.54, + "step": 10671 + }, + { + "epoch": 0.13, + "grad_norm": 18.923530472229046, + "learning_rate": 1.9938809686645176e-05, + "loss": 1.681, + "step": 10674 + }, + { + "epoch": 0.13, + "grad_norm": 18.038817160476757, + "learning_rate": 1.993874514347326e-05, + "loss": 1.6452, + "step": 10677 + }, + { + "epoch": 0.13, + "grad_norm": 6.89986534335888, + "learning_rate": 1.9938680566383977e-05, + "loss": 1.5121, + "step": 10680 + }, + { + "epoch": 0.13, + "grad_norm": 5.400597010959304, + "learning_rate": 1.9938615955377562e-05, + "loss": 1.5545, + "step": 10683 + }, + { + "epoch": 0.13, + "grad_norm": 6.7422868329442265, + "learning_rate": 1.993855131045423e-05, + "loss": 1.3941, + "step": 10686 + }, + { + "epoch": 0.13, + "grad_norm": 79.98008819151556, + "learning_rate": 1.99384866316142e-05, + "loss": 1.8127, + "step": 10689 + }, + { + "epoch": 0.13, + "grad_norm": 3.7586867694887562, + "learning_rate": 1.9938421918857695e-05, + "loss": 1.7142, + "step": 10692 + }, + { + "epoch": 0.13, + "grad_norm": 15.4586445076161, + "learning_rate": 1.993835717218494e-05, + "loss": 1.8189, + "step": 10695 + }, + { + "epoch": 0.13, + "grad_norm": 26.077954289753052, + "learning_rate": 1.993829239159615e-05, + "loss": 1.7738, + "step": 10698 + }, + { + "epoch": 0.13, + "grad_norm": 6.118933633346171, + "learning_rate": 1.9938227577091545e-05, + "loss": 1.6957, + "step": 10701 + }, + { + "epoch": 0.13, + "grad_norm": 3.298842296562086, + "learning_rate": 1.993816272867135e-05, + "loss": 1.5174, + "step": 10704 + }, + { + "epoch": 0.13, + "grad_norm": 26.945927052446, + "learning_rate": 1.9938097846335788e-05, + "loss": 1.7565, + "step": 10707 + }, + { + "epoch": 0.13, + "grad_norm": 36.134017742495274, + "learning_rate": 1.9938032930085075e-05, + "loss": 1.7672, + "step": 10710 + }, + { + "epoch": 0.13, + "grad_norm": 17.90288199375145, + "learning_rate": 1.9937967979919438e-05, + "loss": 1.9702, + "step": 10713 + }, + { + "epoch": 0.13, + "grad_norm": 7.445906471456727, + "learning_rate": 1.9937902995839096e-05, + "loss": 1.2762, + "step": 10716 + }, + { + "epoch": 0.13, + "grad_norm": 38.57347707083892, + "learning_rate": 1.993783797784427e-05, + "loss": 1.5415, + "step": 10719 + }, + { + "epoch": 0.13, + "grad_norm": 43.80215972039214, + "learning_rate": 1.993777292593518e-05, + "loss": 1.762, + "step": 10722 + }, + { + "epoch": 0.13, + "grad_norm": 4.677492476335641, + "learning_rate": 1.993770784011205e-05, + "loss": 1.8385, + "step": 10725 + }, + { + "epoch": 0.13, + "grad_norm": 22.58885715854066, + "learning_rate": 1.9937642720375106e-05, + "loss": 1.8237, + "step": 10728 + }, + { + "epoch": 0.13, + "grad_norm": 21.4164896022626, + "learning_rate": 1.993757756672457e-05, + "loss": 1.9375, + "step": 10731 + }, + { + "epoch": 0.13, + "grad_norm": 73.45539333623319, + "learning_rate": 1.9937512379160654e-05, + "loss": 1.6921, + "step": 10734 + }, + { + "epoch": 0.13, + "grad_norm": 8.066686010296397, + "learning_rate": 1.993744715768359e-05, + "loss": 1.6246, + "step": 10737 + }, + { + "epoch": 0.13, + "grad_norm": 7.277518923875316, + "learning_rate": 1.9937381902293598e-05, + "loss": 1.6456, + "step": 10740 + }, + { + "epoch": 0.13, + "grad_norm": 15.49647756690973, + "learning_rate": 1.9937316612990903e-05, + "loss": 1.6938, + "step": 10743 + }, + { + "epoch": 0.13, + "grad_norm": 5.908103920488181, + "learning_rate": 1.9937251289775727e-05, + "loss": 1.7047, + "step": 10746 + }, + { + "epoch": 0.13, + "grad_norm": 30.23182035104151, + "learning_rate": 1.9937185932648288e-05, + "loss": 1.8009, + "step": 10749 + }, + { + "epoch": 0.13, + "grad_norm": 30.98537778485997, + "learning_rate": 1.9937120541608815e-05, + "loss": 2.0903, + "step": 10752 + }, + { + "epoch": 0.13, + "grad_norm": 4.850371201941154, + "learning_rate": 1.9937055116657527e-05, + "loss": 1.648, + "step": 10755 + }, + { + "epoch": 0.13, + "grad_norm": 14.665413791459098, + "learning_rate": 1.9936989657794652e-05, + "loss": 1.8695, + "step": 10758 + }, + { + "epoch": 0.13, + "grad_norm": 3.7915175944139485, + "learning_rate": 1.9936924165020407e-05, + "loss": 1.8279, + "step": 10761 + }, + { + "epoch": 0.13, + "grad_norm": 9.477914557771998, + "learning_rate": 1.9936858638335024e-05, + "loss": 1.6553, + "step": 10764 + }, + { + "epoch": 0.13, + "grad_norm": 25.56856490782567, + "learning_rate": 1.9936793077738718e-05, + "loss": 1.805, + "step": 10767 + }, + { + "epoch": 0.13, + "grad_norm": 10.813818674164862, + "learning_rate": 1.9936727483231716e-05, + "loss": 1.5849, + "step": 10770 + }, + { + "epoch": 0.13, + "grad_norm": 17.766986753728997, + "learning_rate": 1.9936661854814248e-05, + "loss": 1.5054, + "step": 10773 + }, + { + "epoch": 0.13, + "grad_norm": 5.548615030534131, + "learning_rate": 1.9936596192486527e-05, + "loss": 1.6633, + "step": 10776 + }, + { + "epoch": 0.13, + "grad_norm": 32.099643749259634, + "learning_rate": 1.9936530496248784e-05, + "loss": 1.8294, + "step": 10779 + }, + { + "epoch": 0.13, + "grad_norm": 6.430543109115857, + "learning_rate": 1.993646476610124e-05, + "loss": 1.1346, + "step": 10782 + }, + { + "epoch": 0.13, + "grad_norm": 24.64563830410548, + "learning_rate": 1.993639900204412e-05, + "loss": 1.7648, + "step": 10785 + }, + { + "epoch": 0.13, + "grad_norm": 9.656754044503515, + "learning_rate": 1.993633320407765e-05, + "loss": 1.3778, + "step": 10788 + }, + { + "epoch": 0.13, + "grad_norm": 12.53700855638227, + "learning_rate": 1.9936267372202056e-05, + "loss": 1.5865, + "step": 10791 + }, + { + "epoch": 0.13, + "grad_norm": 29.226395696826184, + "learning_rate": 1.993620150641756e-05, + "loss": 1.3536, + "step": 10794 + }, + { + "epoch": 0.13, + "grad_norm": 20.87916848012905, + "learning_rate": 1.9936135606724387e-05, + "loss": 1.4566, + "step": 10797 + }, + { + "epoch": 0.13, + "grad_norm": 13.863736901807995, + "learning_rate": 1.993606967312276e-05, + "loss": 1.7399, + "step": 10800 + }, + { + "epoch": 0.13, + "grad_norm": 7.582561157949162, + "learning_rate": 1.993600370561291e-05, + "loss": 1.4326, + "step": 10803 + }, + { + "epoch": 0.13, + "grad_norm": 16.164325935954142, + "learning_rate": 1.9935937704195055e-05, + "loss": 1.7702, + "step": 10806 + }, + { + "epoch": 0.13, + "grad_norm": 4.042705123914919, + "learning_rate": 1.9935871668869424e-05, + "loss": 2.0559, + "step": 10809 + }, + { + "epoch": 0.13, + "grad_norm": 5.063005927409132, + "learning_rate": 1.9935805599636243e-05, + "loss": 1.2018, + "step": 10812 + }, + { + "epoch": 0.13, + "grad_norm": 7.585635383255532, + "learning_rate": 1.9935739496495738e-05, + "loss": 1.4799, + "step": 10815 + }, + { + "epoch": 0.13, + "grad_norm": 10.453625312355449, + "learning_rate": 1.993567335944813e-05, + "loss": 1.6368, + "step": 10818 + }, + { + "epoch": 0.13, + "grad_norm": 17.21699387806501, + "learning_rate": 1.9935607188493645e-05, + "loss": 1.9852, + "step": 10821 + }, + { + "epoch": 0.13, + "grad_norm": 36.06954819285138, + "learning_rate": 1.9935540983632516e-05, + "loss": 1.5748, + "step": 10824 + }, + { + "epoch": 0.13, + "grad_norm": 12.572987331967065, + "learning_rate": 1.9935474744864963e-05, + "loss": 1.5826, + "step": 10827 + }, + { + "epoch": 0.13, + "grad_norm": 28.41477677009137, + "learning_rate": 1.9935408472191216e-05, + "loss": 1.5827, + "step": 10830 + }, + { + "epoch": 0.13, + "grad_norm": 5.227819890033433, + "learning_rate": 1.9935342165611496e-05, + "loss": 1.5978, + "step": 10833 + }, + { + "epoch": 0.13, + "grad_norm": 24.555976373770132, + "learning_rate": 1.9935275825126033e-05, + "loss": 1.5294, + "step": 10836 + }, + { + "epoch": 0.13, + "grad_norm": 6.307014966880715, + "learning_rate": 1.993520945073505e-05, + "loss": 1.5867, + "step": 10839 + }, + { + "epoch": 0.13, + "grad_norm": 25.02231948178226, + "learning_rate": 1.993514304243878e-05, + "loss": 1.9264, + "step": 10842 + }, + { + "epoch": 0.13, + "grad_norm": 5.049860151626668, + "learning_rate": 1.993507660023744e-05, + "loss": 2.0692, + "step": 10845 + }, + { + "epoch": 0.13, + "grad_norm": 8.778203410316623, + "learning_rate": 1.9935010124131264e-05, + "loss": 1.6052, + "step": 10848 + }, + { + "epoch": 0.13, + "grad_norm": 5.953712299393982, + "learning_rate": 1.993494361412048e-05, + "loss": 1.641, + "step": 10851 + }, + { + "epoch": 0.13, + "grad_norm": 53.632545796931375, + "learning_rate": 1.993487707020531e-05, + "loss": 1.6826, + "step": 10854 + }, + { + "epoch": 0.13, + "grad_norm": 18.482878155836723, + "learning_rate": 1.9934810492385983e-05, + "loss": 1.3603, + "step": 10857 + }, + { + "epoch": 0.13, + "grad_norm": 24.881307717934543, + "learning_rate": 1.9934743880662725e-05, + "loss": 1.6464, + "step": 10860 + }, + { + "epoch": 0.13, + "grad_norm": 17.53311931358537, + "learning_rate": 1.9934677235035767e-05, + "loss": 1.6092, + "step": 10863 + }, + { + "epoch": 0.13, + "grad_norm": 14.084610332839823, + "learning_rate": 1.9934610555505333e-05, + "loss": 1.3577, + "step": 10866 + }, + { + "epoch": 0.13, + "grad_norm": 60.433321568480665, + "learning_rate": 1.9934543842071653e-05, + "loss": 1.9303, + "step": 10869 + }, + { + "epoch": 0.13, + "grad_norm": 9.859376132172262, + "learning_rate": 1.993447709473495e-05, + "loss": 1.3901, + "step": 10872 + }, + { + "epoch": 0.13, + "grad_norm": 27.303497601453223, + "learning_rate": 1.9934410313495456e-05, + "loss": 1.6362, + "step": 10875 + }, + { + "epoch": 0.13, + "grad_norm": 10.983930306252073, + "learning_rate": 1.99343434983534e-05, + "loss": 1.6764, + "step": 10878 + }, + { + "epoch": 0.13, + "grad_norm": 21.391680638625044, + "learning_rate": 1.9934276649309007e-05, + "loss": 1.9651, + "step": 10881 + }, + { + "epoch": 0.13, + "grad_norm": 9.201850567028119, + "learning_rate": 1.9934209766362506e-05, + "loss": 1.5644, + "step": 10884 + }, + { + "epoch": 0.13, + "grad_norm": 3.4427659732340037, + "learning_rate": 1.9934142849514125e-05, + "loss": 1.7697, + "step": 10887 + }, + { + "epoch": 0.13, + "grad_norm": 10.03893836641394, + "learning_rate": 1.9934075898764093e-05, + "loss": 1.6597, + "step": 10890 + }, + { + "epoch": 0.13, + "grad_norm": 7.598808950424021, + "learning_rate": 1.993400891411264e-05, + "loss": 1.8141, + "step": 10893 + }, + { + "epoch": 0.13, + "grad_norm": 41.77023294391929, + "learning_rate": 1.993394189555999e-05, + "loss": 1.7727, + "step": 10896 + }, + { + "epoch": 0.13, + "grad_norm": 11.181013445386942, + "learning_rate": 1.9933874843106375e-05, + "loss": 1.562, + "step": 10899 + }, + { + "epoch": 0.13, + "grad_norm": 37.99009876553298, + "learning_rate": 1.9933807756752023e-05, + "loss": 1.2428, + "step": 10902 + }, + { + "epoch": 0.13, + "grad_norm": 10.449067287737716, + "learning_rate": 1.9933740636497167e-05, + "loss": 1.3438, + "step": 10905 + }, + { + "epoch": 0.13, + "grad_norm": 78.60413132942097, + "learning_rate": 1.9933673482342026e-05, + "loss": 2.0703, + "step": 10908 + }, + { + "epoch": 0.13, + "grad_norm": 44.600346845941985, + "learning_rate": 1.993360629428684e-05, + "loss": 1.6462, + "step": 10911 + }, + { + "epoch": 0.13, + "grad_norm": 21.127877529293574, + "learning_rate": 1.9933539072331835e-05, + "loss": 1.3573, + "step": 10914 + }, + { + "epoch": 0.13, + "grad_norm": 8.00259736743214, + "learning_rate": 1.9933471816477235e-05, + "loss": 1.6241, + "step": 10917 + }, + { + "epoch": 0.13, + "grad_norm": 22.920636336876083, + "learning_rate": 1.9933404526723273e-05, + "loss": 1.5216, + "step": 10920 + }, + { + "epoch": 0.13, + "grad_norm": 16.914735909566645, + "learning_rate": 1.9933337203070185e-05, + "loss": 1.4554, + "step": 10923 + }, + { + "epoch": 0.13, + "grad_norm": 51.65712714382032, + "learning_rate": 1.993326984551819e-05, + "loss": 1.7013, + "step": 10926 + }, + { + "epoch": 0.13, + "grad_norm": 6.078275715032937, + "learning_rate": 1.9933202454067525e-05, + "loss": 1.5516, + "step": 10929 + }, + { + "epoch": 0.13, + "grad_norm": 24.780376995492034, + "learning_rate": 1.9933135028718418e-05, + "loss": 1.5667, + "step": 10932 + }, + { + "epoch": 0.13, + "grad_norm": 6.886891125538952, + "learning_rate": 1.99330675694711e-05, + "loss": 1.7097, + "step": 10935 + }, + { + "epoch": 0.13, + "grad_norm": 10.864088520963476, + "learning_rate": 1.99330000763258e-05, + "loss": 1.2607, + "step": 10938 + }, + { + "epoch": 0.13, + "grad_norm": 21.006862815931086, + "learning_rate": 1.9932932549282748e-05, + "loss": 1.5124, + "step": 10941 + }, + { + "epoch": 0.13, + "grad_norm": 19.734064137392654, + "learning_rate": 1.9932864988342175e-05, + "loss": 1.7255, + "step": 10944 + }, + { + "epoch": 0.13, + "grad_norm": 16.1995695239691, + "learning_rate": 1.993279739350431e-05, + "loss": 1.4603, + "step": 10947 + }, + { + "epoch": 0.13, + "grad_norm": 22.192640699728457, + "learning_rate": 1.9932729764769388e-05, + "loss": 1.7977, + "step": 10950 + }, + { + "epoch": 0.13, + "grad_norm": 8.648832478813278, + "learning_rate": 1.9932662102137634e-05, + "loss": 1.4373, + "step": 10953 + }, + { + "epoch": 0.13, + "grad_norm": 33.93502106869202, + "learning_rate": 1.9932594405609286e-05, + "loss": 1.5882, + "step": 10956 + }, + { + "epoch": 0.13, + "grad_norm": 13.117081078428523, + "learning_rate": 1.9932526675184566e-05, + "loss": 1.7199, + "step": 10959 + }, + { + "epoch": 0.13, + "grad_norm": 10.719580509332383, + "learning_rate": 1.9932458910863715e-05, + "loss": 1.565, + "step": 10962 + }, + { + "epoch": 0.13, + "grad_norm": 13.838511761021985, + "learning_rate": 1.9932391112646957e-05, + "loss": 1.5756, + "step": 10965 + }, + { + "epoch": 0.13, + "grad_norm": 8.048478500530281, + "learning_rate": 1.9932323280534526e-05, + "loss": 1.2708, + "step": 10968 + }, + { + "epoch": 0.13, + "grad_norm": 7.625504292081495, + "learning_rate": 1.9932255414526655e-05, + "loss": 1.9567, + "step": 10971 + }, + { + "epoch": 0.13, + "grad_norm": 89.94019540493905, + "learning_rate": 1.993218751462357e-05, + "loss": 1.9665, + "step": 10974 + }, + { + "epoch": 0.13, + "grad_norm": 32.82535370108053, + "learning_rate": 1.993211958082551e-05, + "loss": 1.5391, + "step": 10977 + }, + { + "epoch": 0.13, + "grad_norm": 12.9608986538386, + "learning_rate": 1.99320516131327e-05, + "loss": 1.5963, + "step": 10980 + }, + { + "epoch": 0.13, + "grad_norm": 8.694836712753915, + "learning_rate": 1.993198361154538e-05, + "loss": 1.9847, + "step": 10983 + }, + { + "epoch": 0.13, + "grad_norm": 6.699098788431061, + "learning_rate": 1.9931915576063773e-05, + "loss": 1.6017, + "step": 10986 + }, + { + "epoch": 0.13, + "grad_norm": 7.647389675859592, + "learning_rate": 1.993184750668812e-05, + "loss": 1.9448, + "step": 10989 + }, + { + "epoch": 0.13, + "grad_norm": 8.089368855029091, + "learning_rate": 1.9931779403418642e-05, + "loss": 1.7764, + "step": 10992 + }, + { + "epoch": 0.13, + "grad_norm": 38.228277697285186, + "learning_rate": 1.9931711266255586e-05, + "loss": 1.8874, + "step": 10995 + }, + { + "epoch": 0.13, + "grad_norm": 6.946011161058558, + "learning_rate": 1.993164309519917e-05, + "loss": 1.5314, + "step": 10998 + }, + { + "epoch": 0.13, + "grad_norm": 18.255371230394335, + "learning_rate": 1.993157489024964e-05, + "loss": 1.5191, + "step": 11001 + }, + { + "epoch": 0.13, + "grad_norm": 18.282717420720754, + "learning_rate": 1.993150665140722e-05, + "loss": 1.5488, + "step": 11004 + }, + { + "epoch": 0.13, + "grad_norm": 29.661803144454918, + "learning_rate": 1.9931438378672145e-05, + "loss": 1.6493, + "step": 11007 + }, + { + "epoch": 0.13, + "grad_norm": 19.61665274352002, + "learning_rate": 1.9931370072044646e-05, + "loss": 1.4808, + "step": 11010 + }, + { + "epoch": 0.13, + "grad_norm": 31.461796044982794, + "learning_rate": 1.9931301731524958e-05, + "loss": 1.412, + "step": 11013 + }, + { + "epoch": 0.13, + "grad_norm": 5.417965952700045, + "learning_rate": 1.993123335711332e-05, + "loss": 1.5075, + "step": 11016 + }, + { + "epoch": 0.13, + "grad_norm": 23.57885883507347, + "learning_rate": 1.9931164948809954e-05, + "loss": 1.432, + "step": 11019 + }, + { + "epoch": 0.13, + "grad_norm": 10.565087098575829, + "learning_rate": 1.99310965066151e-05, + "loss": 1.7733, + "step": 11022 + }, + { + "epoch": 0.13, + "grad_norm": 3.6562137979178684, + "learning_rate": 1.9931028030528995e-05, + "loss": 1.8285, + "step": 11025 + }, + { + "epoch": 0.13, + "grad_norm": 100.87165977171017, + "learning_rate": 1.9930959520551864e-05, + "loss": 1.541, + "step": 11028 + }, + { + "epoch": 0.13, + "grad_norm": 16.058911583238398, + "learning_rate": 1.9930890976683946e-05, + "loss": 1.535, + "step": 11031 + }, + { + "epoch": 0.13, + "grad_norm": 3.843713058771867, + "learning_rate": 1.9930822398925477e-05, + "loss": 1.8022, + "step": 11034 + }, + { + "epoch": 0.13, + "grad_norm": 12.079303838551573, + "learning_rate": 1.993075378727668e-05, + "loss": 1.6148, + "step": 11037 + }, + { + "epoch": 0.13, + "grad_norm": 12.189858985896146, + "learning_rate": 1.9930685141737805e-05, + "loss": 2.1998, + "step": 11040 + }, + { + "epoch": 0.13, + "grad_norm": 4.7460602902773354, + "learning_rate": 1.993061646230908e-05, + "loss": 2.2372, + "step": 11043 + }, + { + "epoch": 0.13, + "grad_norm": 31.245435432128076, + "learning_rate": 1.9930547748990734e-05, + "loss": 1.9232, + "step": 11046 + }, + { + "epoch": 0.13, + "grad_norm": 16.775665989458794, + "learning_rate": 1.993047900178301e-05, + "loss": 1.7006, + "step": 11049 + }, + { + "epoch": 0.13, + "grad_norm": 28.774243620664716, + "learning_rate": 1.9930410220686132e-05, + "loss": 1.4462, + "step": 11052 + }, + { + "epoch": 0.13, + "grad_norm": 5.641436798246321, + "learning_rate": 1.9930341405700342e-05, + "loss": 1.585, + "step": 11055 + }, + { + "epoch": 0.13, + "grad_norm": 19.836587131256742, + "learning_rate": 1.9930272556825874e-05, + "loss": 1.9577, + "step": 11058 + }, + { + "epoch": 0.13, + "grad_norm": 267.4332281301823, + "learning_rate": 1.9930203674062967e-05, + "loss": 1.5216, + "step": 11061 + }, + { + "epoch": 0.13, + "grad_norm": 16.130041302927644, + "learning_rate": 1.9930134757411847e-05, + "loss": 1.6933, + "step": 11064 + }, + { + "epoch": 0.13, + "grad_norm": 3.7641244529869478, + "learning_rate": 1.9930065806872755e-05, + "loss": 1.7739, + "step": 11067 + }, + { + "epoch": 0.13, + "grad_norm": 6.088949625507874, + "learning_rate": 1.992999682244593e-05, + "loss": 1.8649, + "step": 11070 + }, + { + "epoch": 0.13, + "grad_norm": 12.136779846829532, + "learning_rate": 1.9929927804131593e-05, + "loss": 1.3879, + "step": 11073 + }, + { + "epoch": 0.13, + "grad_norm": 17.252923198009295, + "learning_rate": 1.9929858751929994e-05, + "loss": 1.4224, + "step": 11076 + }, + { + "epoch": 0.13, + "grad_norm": 3.7612699849537234, + "learning_rate": 1.9929789665841365e-05, + "loss": 1.5268, + "step": 11079 + }, + { + "epoch": 0.13, + "grad_norm": 78.80749768864405, + "learning_rate": 1.992972054586594e-05, + "loss": 1.6421, + "step": 11082 + }, + { + "epoch": 0.13, + "grad_norm": 17.02784589077947, + "learning_rate": 1.9929651392003954e-05, + "loss": 1.924, + "step": 11085 + }, + { + "epoch": 0.13, + "grad_norm": 9.85246465492723, + "learning_rate": 1.9929582204255646e-05, + "loss": 1.4312, + "step": 11088 + }, + { + "epoch": 0.13, + "grad_norm": 54.244461507914494, + "learning_rate": 1.992951298262125e-05, + "loss": 1.6671, + "step": 11091 + }, + { + "epoch": 0.13, + "grad_norm": 23.444516724818833, + "learning_rate": 1.9929443727101004e-05, + "loss": 1.3166, + "step": 11094 + }, + { + "epoch": 0.13, + "grad_norm": 28.28424195681523, + "learning_rate": 1.9929374437695143e-05, + "loss": 1.4799, + "step": 11097 + }, + { + "epoch": 0.13, + "grad_norm": 28.140479576180596, + "learning_rate": 1.99293051144039e-05, + "loss": 1.3766, + "step": 11100 + }, + { + "epoch": 0.13, + "grad_norm": 25.406391497961156, + "learning_rate": 1.992923575722752e-05, + "loss": 1.346, + "step": 11103 + }, + { + "epoch": 0.13, + "grad_norm": 7.5677314901674455, + "learning_rate": 1.9929166366166233e-05, + "loss": 1.58, + "step": 11106 + }, + { + "epoch": 0.13, + "grad_norm": 15.846950279669855, + "learning_rate": 1.9929096941220278e-05, + "loss": 1.6627, + "step": 11109 + }, + { + "epoch": 0.13, + "grad_norm": 5.104953977383623, + "learning_rate": 1.9929027482389887e-05, + "loss": 1.6615, + "step": 11112 + }, + { + "epoch": 0.13, + "grad_norm": 10.452150859537584, + "learning_rate": 1.992895798967531e-05, + "loss": 1.7297, + "step": 11115 + }, + { + "epoch": 0.13, + "grad_norm": 40.68602710026282, + "learning_rate": 1.9928888463076767e-05, + "loss": 1.3856, + "step": 11118 + }, + { + "epoch": 0.13, + "grad_norm": 17.567438213642486, + "learning_rate": 1.9928818902594508e-05, + "loss": 1.7532, + "step": 11121 + }, + { + "epoch": 0.13, + "grad_norm": 31.63329405305571, + "learning_rate": 1.992874930822877e-05, + "loss": 2.1228, + "step": 11124 + }, + { + "epoch": 0.13, + "grad_norm": 15.683741411342686, + "learning_rate": 1.992867967997978e-05, + "loss": 1.1415, + "step": 11127 + }, + { + "epoch": 0.13, + "grad_norm": 31.841236491972715, + "learning_rate": 1.992861001784779e-05, + "loss": 1.734, + "step": 11130 + }, + { + "epoch": 0.13, + "grad_norm": 13.355293098951709, + "learning_rate": 1.9928540321833025e-05, + "loss": 1.5863, + "step": 11133 + }, + { + "epoch": 0.13, + "grad_norm": 16.40836553935446, + "learning_rate": 1.9928470591935733e-05, + "loss": 1.5127, + "step": 11136 + }, + { + "epoch": 0.13, + "grad_norm": 37.428010550922004, + "learning_rate": 1.992840082815614e-05, + "loss": 1.3713, + "step": 11139 + }, + { + "epoch": 0.13, + "grad_norm": 13.771176409403973, + "learning_rate": 1.9928331030494496e-05, + "loss": 2.1305, + "step": 11142 + }, + { + "epoch": 0.13, + "grad_norm": 21.76592549103456, + "learning_rate": 1.9928261198951035e-05, + "loss": 1.5109, + "step": 11145 + }, + { + "epoch": 0.13, + "grad_norm": 34.21590273970115, + "learning_rate": 1.992819133352599e-05, + "loss": 1.5233, + "step": 11148 + }, + { + "epoch": 0.13, + "grad_norm": 39.11034054808341, + "learning_rate": 1.992812143421961e-05, + "loss": 1.2999, + "step": 11151 + }, + { + "epoch": 0.13, + "grad_norm": 7.520276555506135, + "learning_rate": 1.9928051501032124e-05, + "loss": 1.7694, + "step": 11154 + }, + { + "epoch": 0.13, + "grad_norm": 9.369579783226936, + "learning_rate": 1.9927981533963774e-05, + "loss": 1.602, + "step": 11157 + }, + { + "epoch": 0.13, + "grad_norm": 40.208468965414085, + "learning_rate": 1.99279115330148e-05, + "loss": 1.5613, + "step": 11160 + }, + { + "epoch": 0.13, + "grad_norm": 4.991486831441982, + "learning_rate": 1.9927841498185438e-05, + "loss": 1.5219, + "step": 11163 + }, + { + "epoch": 0.13, + "grad_norm": 11.086415180068656, + "learning_rate": 1.9927771429475933e-05, + "loss": 1.7486, + "step": 11166 + }, + { + "epoch": 0.13, + "grad_norm": 8.757265558141782, + "learning_rate": 1.992770132688652e-05, + "loss": 1.4578, + "step": 11169 + }, + { + "epoch": 0.13, + "grad_norm": 7.869692819546372, + "learning_rate": 1.9927631190417435e-05, + "loss": 1.5636, + "step": 11172 + }, + { + "epoch": 0.13, + "grad_norm": 81.14262302367423, + "learning_rate": 1.9927561020068923e-05, + "loss": 1.4851, + "step": 11175 + }, + { + "epoch": 0.13, + "grad_norm": 8.942579263566886, + "learning_rate": 1.9927490815841218e-05, + "loss": 1.9727, + "step": 11178 + }, + { + "epoch": 0.13, + "grad_norm": 23.56016474888347, + "learning_rate": 1.9927420577734566e-05, + "loss": 1.8931, + "step": 11181 + }, + { + "epoch": 0.13, + "grad_norm": 8.669397046271447, + "learning_rate": 1.9927350305749202e-05, + "loss": 1.1982, + "step": 11184 + }, + { + "epoch": 0.13, + "grad_norm": 10.227156386753814, + "learning_rate": 1.9927279999885366e-05, + "loss": 1.7056, + "step": 11187 + }, + { + "epoch": 0.13, + "grad_norm": 20.89815437180014, + "learning_rate": 1.9927209660143302e-05, + "loss": 1.6699, + "step": 11190 + }, + { + "epoch": 0.13, + "grad_norm": 74.77563717349746, + "learning_rate": 1.9927139286523244e-05, + "loss": 1.3704, + "step": 11193 + }, + { + "epoch": 0.13, + "grad_norm": 12.156066891987663, + "learning_rate": 1.9927068879025436e-05, + "loss": 1.321, + "step": 11196 + }, + { + "epoch": 0.13, + "grad_norm": 38.57373128362047, + "learning_rate": 1.992699843765012e-05, + "loss": 1.3342, + "step": 11199 + }, + { + "epoch": 0.13, + "grad_norm": 28.270208589194244, + "learning_rate": 1.9926927962397535e-05, + "loss": 1.5544, + "step": 11202 + }, + { + "epoch": 0.13, + "grad_norm": 46.14443789178838, + "learning_rate": 1.992685745326792e-05, + "loss": 1.9714, + "step": 11205 + }, + { + "epoch": 0.13, + "grad_norm": 8.033579974361308, + "learning_rate": 1.9926786910261513e-05, + "loss": 1.623, + "step": 11208 + }, + { + "epoch": 0.13, + "grad_norm": 48.44723733519901, + "learning_rate": 1.992671633337856e-05, + "loss": 1.5896, + "step": 11211 + }, + { + "epoch": 0.13, + "grad_norm": 5.43081550946423, + "learning_rate": 1.9926645722619298e-05, + "loss": 1.7395, + "step": 11214 + }, + { + "epoch": 0.13, + "grad_norm": 13.616723302099079, + "learning_rate": 1.992657507798397e-05, + "loss": 1.7591, + "step": 11217 + }, + { + "epoch": 0.13, + "grad_norm": 14.617809953175316, + "learning_rate": 1.9926504399472817e-05, + "loss": 1.7148, + "step": 11220 + }, + { + "epoch": 0.13, + "grad_norm": 8.07111780630143, + "learning_rate": 1.9926433687086083e-05, + "loss": 1.6823, + "step": 11223 + }, + { + "epoch": 0.13, + "grad_norm": 53.2460724890727, + "learning_rate": 1.9926362940824003e-05, + "loss": 1.4572, + "step": 11226 + }, + { + "epoch": 0.14, + "grad_norm": 4.34002629659735, + "learning_rate": 1.9926292160686823e-05, + "loss": 1.466, + "step": 11229 + }, + { + "epoch": 0.14, + "grad_norm": 17.996024736808366, + "learning_rate": 1.992622134667478e-05, + "loss": 1.7875, + "step": 11232 + }, + { + "epoch": 0.14, + "grad_norm": 16.678552659446446, + "learning_rate": 1.9926150498788123e-05, + "loss": 1.9587, + "step": 11235 + }, + { + "epoch": 0.14, + "grad_norm": 12.46843229383675, + "learning_rate": 1.992607961702709e-05, + "loss": 1.5, + "step": 11238 + }, + { + "epoch": 0.14, + "grad_norm": 7.463206509030681, + "learning_rate": 1.992600870139192e-05, + "loss": 1.6633, + "step": 11241 + }, + { + "epoch": 0.14, + "grad_norm": 26.891050500855787, + "learning_rate": 1.992593775188286e-05, + "loss": 1.4666, + "step": 11244 + }, + { + "epoch": 0.14, + "grad_norm": 35.53860033109445, + "learning_rate": 1.9925866768500147e-05, + "loss": 1.7779, + "step": 11247 + }, + { + "epoch": 0.14, + "grad_norm": 9.197117754665445, + "learning_rate": 1.992579575124403e-05, + "loss": 1.6193, + "step": 11250 + }, + { + "epoch": 0.14, + "grad_norm": 5.794108026292955, + "learning_rate": 1.9925724700114744e-05, + "loss": 1.7877, + "step": 11253 + }, + { + "epoch": 0.14, + "grad_norm": 7.757550340410648, + "learning_rate": 1.9925653615112537e-05, + "loss": 1.6045, + "step": 11256 + }, + { + "epoch": 0.14, + "grad_norm": 28.658190484208834, + "learning_rate": 1.992558249623765e-05, + "loss": 1.6827, + "step": 11259 + }, + { + "epoch": 0.14, + "grad_norm": 5.919270755039552, + "learning_rate": 1.9925511343490323e-05, + "loss": 1.6626, + "step": 11262 + }, + { + "epoch": 0.14, + "grad_norm": 14.87574586904615, + "learning_rate": 1.99254401568708e-05, + "loss": 2.1411, + "step": 11265 + }, + { + "epoch": 0.14, + "grad_norm": 9.052567921018708, + "learning_rate": 1.9925368936379324e-05, + "loss": 1.6324, + "step": 11268 + }, + { + "epoch": 0.14, + "grad_norm": 14.686517475683896, + "learning_rate": 1.9925297682016142e-05, + "loss": 1.7712, + "step": 11271 + }, + { + "epoch": 0.14, + "grad_norm": 4.6462123323983775, + "learning_rate": 1.9925226393781495e-05, + "loss": 1.9993, + "step": 11274 + }, + { + "epoch": 0.14, + "grad_norm": 121.0060830948931, + "learning_rate": 1.9925155071675624e-05, + "loss": 1.9117, + "step": 11277 + }, + { + "epoch": 0.14, + "grad_norm": 40.44635984558853, + "learning_rate": 1.992508371569877e-05, + "loss": 1.5737, + "step": 11280 + }, + { + "epoch": 0.14, + "grad_norm": 33.34434902943933, + "learning_rate": 1.9925012325851185e-05, + "loss": 1.6613, + "step": 11283 + }, + { + "epoch": 0.14, + "grad_norm": 8.49256034953847, + "learning_rate": 1.9924940902133107e-05, + "loss": 1.4902, + "step": 11286 + }, + { + "epoch": 0.14, + "grad_norm": 11.973136779074334, + "learning_rate": 1.992486944454478e-05, + "loss": 1.6972, + "step": 11289 + }, + { + "epoch": 0.14, + "grad_norm": 16.518456633537113, + "learning_rate": 1.9924797953086447e-05, + "loss": 1.9089, + "step": 11292 + }, + { + "epoch": 0.14, + "grad_norm": 48.32765684507504, + "learning_rate": 1.9924726427758357e-05, + "loss": 1.6982, + "step": 11295 + }, + { + "epoch": 0.14, + "grad_norm": 15.567640837518026, + "learning_rate": 1.992465486856075e-05, + "loss": 1.5214, + "step": 11298 + }, + { + "epoch": 0.14, + "grad_norm": 12.441184573895878, + "learning_rate": 1.992458327549387e-05, + "loss": 1.805, + "step": 11301 + }, + { + "epoch": 0.14, + "grad_norm": 6.442446778040512, + "learning_rate": 1.992451164855796e-05, + "loss": 1.7778, + "step": 11304 + }, + { + "epoch": 0.14, + "grad_norm": 13.018447297904462, + "learning_rate": 1.9924439987753267e-05, + "loss": 1.6366, + "step": 11307 + }, + { + "epoch": 0.14, + "grad_norm": 6.31398705840502, + "learning_rate": 1.9924368293080038e-05, + "loss": 1.8835, + "step": 11310 + }, + { + "epoch": 0.14, + "grad_norm": 8.423759770768648, + "learning_rate": 1.9924296564538512e-05, + "loss": 1.4202, + "step": 11313 + }, + { + "epoch": 0.14, + "grad_norm": 33.99871172193639, + "learning_rate": 1.9924224802128936e-05, + "loss": 1.7742, + "step": 11316 + }, + { + "epoch": 0.14, + "grad_norm": 27.393419058820314, + "learning_rate": 1.9924153005851557e-05, + "loss": 1.5908, + "step": 11319 + }, + { + "epoch": 0.14, + "grad_norm": 21.54924771138404, + "learning_rate": 1.992408117570662e-05, + "loss": 1.5823, + "step": 11322 + }, + { + "epoch": 0.14, + "grad_norm": 84.74450608552365, + "learning_rate": 1.9924009311694368e-05, + "loss": 1.6872, + "step": 11325 + }, + { + "epoch": 0.14, + "grad_norm": 6.31909403552908, + "learning_rate": 1.9923937413815048e-05, + "loss": 1.7883, + "step": 11328 + }, + { + "epoch": 0.14, + "grad_norm": 5.109874198832343, + "learning_rate": 1.9923865482068902e-05, + "loss": 1.5784, + "step": 11331 + }, + { + "epoch": 0.14, + "grad_norm": 11.803321770438881, + "learning_rate": 1.9923793516456172e-05, + "loss": 1.923, + "step": 11334 + }, + { + "epoch": 0.14, + "grad_norm": 3.328465356833755, + "learning_rate": 1.9923721516977118e-05, + "loss": 1.6155, + "step": 11337 + }, + { + "epoch": 0.14, + "grad_norm": 17.746109707393188, + "learning_rate": 1.992364948363197e-05, + "loss": 1.5839, + "step": 11340 + }, + { + "epoch": 0.14, + "grad_norm": 6.895527642763576, + "learning_rate": 1.9923577416420988e-05, + "loss": 1.3184, + "step": 11343 + }, + { + "epoch": 0.14, + "grad_norm": 49.91383281253479, + "learning_rate": 1.9923505315344407e-05, + "loss": 1.5933, + "step": 11346 + }, + { + "epoch": 0.14, + "grad_norm": 30.642054132148875, + "learning_rate": 1.992343318040248e-05, + "loss": 1.6473, + "step": 11349 + }, + { + "epoch": 0.14, + "grad_norm": 80.82911811304666, + "learning_rate": 1.9923361011595447e-05, + "loss": 1.7766, + "step": 11352 + }, + { + "epoch": 0.14, + "grad_norm": 8.96055388377452, + "learning_rate": 1.9923288808923556e-05, + "loss": 1.4469, + "step": 11355 + }, + { + "epoch": 0.14, + "grad_norm": 5.580031726838519, + "learning_rate": 1.9923216572387057e-05, + "loss": 1.2575, + "step": 11358 + }, + { + "epoch": 0.14, + "grad_norm": 6.889473286885692, + "learning_rate": 1.992314430198619e-05, + "loss": 1.3975, + "step": 11361 + }, + { + "epoch": 0.14, + "grad_norm": 4.65545153734409, + "learning_rate": 1.992307199772121e-05, + "loss": 1.6577, + "step": 11364 + }, + { + "epoch": 0.14, + "grad_norm": 4.800634951260611, + "learning_rate": 1.992299965959236e-05, + "loss": 1.6006, + "step": 11367 + }, + { + "epoch": 0.14, + "grad_norm": 149.9712782399663, + "learning_rate": 1.9922927287599886e-05, + "loss": 1.5826, + "step": 11370 + }, + { + "epoch": 0.14, + "grad_norm": 25.96713252371028, + "learning_rate": 1.9922854881744035e-05, + "loss": 1.4794, + "step": 11373 + }, + { + "epoch": 0.14, + "grad_norm": 9.09492451546268, + "learning_rate": 1.9922782442025054e-05, + "loss": 1.3656, + "step": 11376 + }, + { + "epoch": 0.14, + "grad_norm": 12.659457589786786, + "learning_rate": 1.9922709968443187e-05, + "loss": 1.366, + "step": 11379 + }, + { + "epoch": 0.14, + "grad_norm": 32.209414714101015, + "learning_rate": 1.992263746099869e-05, + "loss": 1.7168, + "step": 11382 + }, + { + "epoch": 0.14, + "grad_norm": 15.433199376419818, + "learning_rate": 1.9922564919691806e-05, + "loss": 1.5642, + "step": 11385 + }, + { + "epoch": 0.14, + "grad_norm": 8.74630042594882, + "learning_rate": 1.992249234452278e-05, + "loss": 1.6094, + "step": 11388 + }, + { + "epoch": 0.14, + "grad_norm": 5.555032540371759, + "learning_rate": 1.992241973549186e-05, + "loss": 1.682, + "step": 11391 + }, + { + "epoch": 0.14, + "grad_norm": 9.994330815029196, + "learning_rate": 1.9922347092599298e-05, + "loss": 1.4141, + "step": 11394 + }, + { + "epoch": 0.14, + "grad_norm": 21.267928377785793, + "learning_rate": 1.9922274415845336e-05, + "loss": 1.7752, + "step": 11397 + }, + { + "epoch": 0.14, + "grad_norm": 23.80265219613108, + "learning_rate": 1.992220170523023e-05, + "loss": 1.7824, + "step": 11400 + }, + { + "epoch": 0.14, + "grad_norm": 54.02690333882158, + "learning_rate": 1.9922128960754222e-05, + "loss": 1.5087, + "step": 11403 + }, + { + "epoch": 0.14, + "grad_norm": 2.908187597995065, + "learning_rate": 1.992205618241756e-05, + "loss": 1.7407, + "step": 11406 + }, + { + "epoch": 0.14, + "grad_norm": 13.553264497711233, + "learning_rate": 1.9921983370220497e-05, + "loss": 1.6878, + "step": 11409 + }, + { + "epoch": 0.14, + "grad_norm": 31.762917007599402, + "learning_rate": 1.9921910524163277e-05, + "loss": 1.6974, + "step": 11412 + }, + { + "epoch": 0.14, + "grad_norm": 15.63907076999086, + "learning_rate": 1.9921837644246152e-05, + "loss": 1.3721, + "step": 11415 + }, + { + "epoch": 0.14, + "grad_norm": 20.59984750397663, + "learning_rate": 1.9921764730469367e-05, + "loss": 1.5785, + "step": 11418 + }, + { + "epoch": 0.14, + "grad_norm": 5.728093136567129, + "learning_rate": 1.9921691782833174e-05, + "loss": 1.6878, + "step": 11421 + }, + { + "epoch": 0.14, + "grad_norm": 168.16398416196705, + "learning_rate": 1.992161880133782e-05, + "loss": 1.62, + "step": 11424 + }, + { + "epoch": 0.14, + "grad_norm": 9.033101286694006, + "learning_rate": 1.9921545785983554e-05, + "loss": 1.8346, + "step": 11427 + }, + { + "epoch": 0.14, + "grad_norm": 6.791656160477627, + "learning_rate": 1.9921472736770625e-05, + "loss": 1.632, + "step": 11430 + }, + { + "epoch": 0.14, + "grad_norm": 23.024410879567, + "learning_rate": 1.9921399653699286e-05, + "loss": 1.791, + "step": 11433 + }, + { + "epoch": 0.14, + "grad_norm": 60.506671262137154, + "learning_rate": 1.9921326536769783e-05, + "loss": 1.7874, + "step": 11436 + }, + { + "epoch": 0.14, + "grad_norm": 48.87797570394344, + "learning_rate": 1.9921253385982363e-05, + "loss": 1.8072, + "step": 11439 + }, + { + "epoch": 0.14, + "grad_norm": 22.10989969893021, + "learning_rate": 1.9921180201337282e-05, + "loss": 1.9092, + "step": 11442 + }, + { + "epoch": 0.14, + "grad_norm": 36.80429496507911, + "learning_rate": 1.9921106982834786e-05, + "loss": 1.7039, + "step": 11445 + }, + { + "epoch": 0.14, + "grad_norm": 24.86712120347083, + "learning_rate": 1.9921033730475127e-05, + "loss": 1.6034, + "step": 11448 + }, + { + "epoch": 0.14, + "grad_norm": 18.961136930918077, + "learning_rate": 1.9920960444258554e-05, + "loss": 1.562, + "step": 11451 + }, + { + "epoch": 0.14, + "grad_norm": 10.562078446412407, + "learning_rate": 1.9920887124185312e-05, + "loss": 1.9312, + "step": 11454 + }, + { + "epoch": 0.14, + "grad_norm": 29.956529072786026, + "learning_rate": 1.9920813770255658e-05, + "loss": 1.8166, + "step": 11457 + }, + { + "epoch": 0.14, + "grad_norm": 4.7256508202259715, + "learning_rate": 1.992074038246984e-05, + "loss": 1.4579, + "step": 11460 + }, + { + "epoch": 0.14, + "grad_norm": 8.165409614209265, + "learning_rate": 1.992066696082811e-05, + "loss": 1.3806, + "step": 11463 + }, + { + "epoch": 0.14, + "grad_norm": 94.61904986536821, + "learning_rate": 1.9920593505330713e-05, + "loss": 2.1149, + "step": 11466 + }, + { + "epoch": 0.14, + "grad_norm": 6.180795523036589, + "learning_rate": 1.992052001597791e-05, + "loss": 1.9746, + "step": 11469 + }, + { + "epoch": 0.14, + "grad_norm": 18.56669130473251, + "learning_rate": 1.992044649276994e-05, + "loss": 1.7015, + "step": 11472 + }, + { + "epoch": 0.14, + "grad_norm": 10.478773825444902, + "learning_rate": 1.9920372935707065e-05, + "loss": 1.518, + "step": 11475 + }, + { + "epoch": 0.14, + "grad_norm": 30.877474721807907, + "learning_rate": 1.9920299344789525e-05, + "loss": 1.7237, + "step": 11478 + }, + { + "epoch": 0.14, + "grad_norm": 19.1553852325658, + "learning_rate": 1.992022572001758e-05, + "loss": 1.812, + "step": 11481 + }, + { + "epoch": 0.14, + "grad_norm": 16.972375117840638, + "learning_rate": 1.9920152061391476e-05, + "loss": 2.0565, + "step": 11484 + }, + { + "epoch": 0.14, + "grad_norm": 20.694665017428385, + "learning_rate": 1.9920078368911464e-05, + "loss": 1.288, + "step": 11487 + }, + { + "epoch": 0.14, + "grad_norm": 11.61829068027292, + "learning_rate": 1.99200046425778e-05, + "loss": 1.62, + "step": 11490 + }, + { + "epoch": 0.14, + "grad_norm": 13.215579662125004, + "learning_rate": 1.9919930882390736e-05, + "loss": 1.3642, + "step": 11493 + }, + { + "epoch": 0.14, + "grad_norm": 8.067953536343225, + "learning_rate": 1.9919857088350518e-05, + "loss": 1.4351, + "step": 11496 + }, + { + "epoch": 0.14, + "grad_norm": 12.99953990508535, + "learning_rate": 1.9919783260457402e-05, + "loss": 1.8499, + "step": 11499 + }, + { + "epoch": 0.14, + "grad_norm": 55.3622718991976, + "learning_rate": 1.991970939871164e-05, + "loss": 1.8035, + "step": 11502 + }, + { + "epoch": 0.14, + "grad_norm": 17.92300863181386, + "learning_rate": 1.9919635503113478e-05, + "loss": 1.7297, + "step": 11505 + }, + { + "epoch": 0.14, + "grad_norm": 81.13831821842622, + "learning_rate": 1.991956157366318e-05, + "loss": 1.6643, + "step": 11508 + }, + { + "epoch": 0.14, + "grad_norm": 20.619211289284006, + "learning_rate": 1.9919487610360987e-05, + "loss": 1.5716, + "step": 11511 + }, + { + "epoch": 0.14, + "grad_norm": 8.234129086011539, + "learning_rate": 1.9919413613207154e-05, + "loss": 1.8324, + "step": 11514 + }, + { + "epoch": 0.14, + "grad_norm": 29.204847726680992, + "learning_rate": 1.991933958220194e-05, + "loss": 1.8178, + "step": 11517 + }, + { + "epoch": 0.14, + "grad_norm": 40.16995742695408, + "learning_rate": 1.9919265517345588e-05, + "loss": 1.23, + "step": 11520 + }, + { + "epoch": 0.14, + "grad_norm": 39.0993922491456, + "learning_rate": 1.9919191418638363e-05, + "loss": 1.6772, + "step": 11523 + }, + { + "epoch": 0.14, + "grad_norm": 55.476522318816116, + "learning_rate": 1.9919117286080506e-05, + "loss": 1.4979, + "step": 11526 + }, + { + "epoch": 0.14, + "grad_norm": 7.4370452165431, + "learning_rate": 1.9919043119672272e-05, + "loss": 1.7467, + "step": 11529 + }, + { + "epoch": 0.14, + "grad_norm": 110.22189744660159, + "learning_rate": 1.991896891941392e-05, + "loss": 2.3031, + "step": 11532 + }, + { + "epoch": 0.14, + "grad_norm": 102.47125226590926, + "learning_rate": 1.99188946853057e-05, + "loss": 1.7359, + "step": 11535 + }, + { + "epoch": 0.14, + "grad_norm": 31.611253773658955, + "learning_rate": 1.9918820417347862e-05, + "loss": 1.6664, + "step": 11538 + }, + { + "epoch": 0.14, + "grad_norm": 2.4625308414374962, + "learning_rate": 1.991874611554067e-05, + "loss": 1.4573, + "step": 11541 + }, + { + "epoch": 0.14, + "grad_norm": 39.346694989404604, + "learning_rate": 1.9918671779884363e-05, + "loss": 1.2817, + "step": 11544 + }, + { + "epoch": 0.14, + "grad_norm": 12.232082812336735, + "learning_rate": 1.9918597410379204e-05, + "loss": 1.3604, + "step": 11547 + }, + { + "epoch": 0.14, + "grad_norm": 20.508046122472685, + "learning_rate": 1.9918523007025446e-05, + "loss": 1.488, + "step": 11550 + }, + { + "epoch": 0.14, + "grad_norm": 11.84384414911601, + "learning_rate": 1.991844856982334e-05, + "loss": 1.6069, + "step": 11553 + }, + { + "epoch": 0.14, + "grad_norm": 102.1126659703818, + "learning_rate": 1.991837409877314e-05, + "loss": 1.5864, + "step": 11556 + }, + { + "epoch": 0.14, + "grad_norm": 13.98255000780768, + "learning_rate": 1.9918299593875107e-05, + "loss": 1.7769, + "step": 11559 + }, + { + "epoch": 0.14, + "grad_norm": 21.621378355370606, + "learning_rate": 1.9918225055129485e-05, + "loss": 1.9608, + "step": 11562 + }, + { + "epoch": 0.14, + "grad_norm": 43.91595062341899, + "learning_rate": 1.9918150482536535e-05, + "loss": 1.4889, + "step": 11565 + }, + { + "epoch": 0.14, + "grad_norm": 15.398210413912787, + "learning_rate": 1.991807587609651e-05, + "loss": 1.6353, + "step": 11568 + }, + { + "epoch": 0.14, + "grad_norm": 8.199050760904083, + "learning_rate": 1.9918001235809664e-05, + "loss": 1.6266, + "step": 11571 + }, + { + "epoch": 0.14, + "grad_norm": 4.310644671784885, + "learning_rate": 1.9917926561676254e-05, + "loss": 1.7936, + "step": 11574 + }, + { + "epoch": 0.14, + "grad_norm": 26.177385954644596, + "learning_rate": 1.9917851853696532e-05, + "loss": 1.9381, + "step": 11577 + }, + { + "epoch": 0.14, + "grad_norm": 4.474337066994827, + "learning_rate": 1.9917777111870756e-05, + "loss": 1.2406, + "step": 11580 + }, + { + "epoch": 0.14, + "grad_norm": 5.702844796169288, + "learning_rate": 1.9917702336199177e-05, + "loss": 1.447, + "step": 11583 + }, + { + "epoch": 0.14, + "grad_norm": 29.5046311984027, + "learning_rate": 1.9917627526682052e-05, + "loss": 1.5662, + "step": 11586 + }, + { + "epoch": 0.14, + "grad_norm": 15.060336894927042, + "learning_rate": 1.9917552683319635e-05, + "loss": 1.7259, + "step": 11589 + }, + { + "epoch": 0.14, + "grad_norm": 30.955983223310866, + "learning_rate": 1.9917477806112187e-05, + "loss": 1.558, + "step": 11592 + }, + { + "epoch": 0.14, + "grad_norm": 21.128444843621196, + "learning_rate": 1.9917402895059955e-05, + "loss": 1.6692, + "step": 11595 + }, + { + "epoch": 0.14, + "grad_norm": 4.517299289269893, + "learning_rate": 1.9917327950163204e-05, + "loss": 1.512, + "step": 11598 + }, + { + "epoch": 0.14, + "grad_norm": 24.194369581981846, + "learning_rate": 1.9917252971422184e-05, + "loss": 2.0081, + "step": 11601 + }, + { + "epoch": 0.14, + "grad_norm": 22.143024714453833, + "learning_rate": 1.991717795883715e-05, + "loss": 1.4751, + "step": 11604 + }, + { + "epoch": 0.14, + "grad_norm": 9.379078146055951, + "learning_rate": 1.9917102912408358e-05, + "loss": 1.5771, + "step": 11607 + }, + { + "epoch": 0.14, + "grad_norm": 5.622226569012688, + "learning_rate": 1.9917027832136067e-05, + "loss": 1.7437, + "step": 11610 + }, + { + "epoch": 0.14, + "grad_norm": 7.9289976296144, + "learning_rate": 1.9916952718020534e-05, + "loss": 1.3711, + "step": 11613 + }, + { + "epoch": 0.14, + "grad_norm": 24.63639398460773, + "learning_rate": 1.9916877570062013e-05, + "loss": 1.5273, + "step": 11616 + }, + { + "epoch": 0.14, + "grad_norm": 29.67515876756807, + "learning_rate": 1.991680238826076e-05, + "loss": 1.412, + "step": 11619 + }, + { + "epoch": 0.14, + "grad_norm": 8.851965503877596, + "learning_rate": 1.9916727172617033e-05, + "loss": 1.466, + "step": 11622 + }, + { + "epoch": 0.14, + "grad_norm": 12.282214491164003, + "learning_rate": 1.991665192313109e-05, + "loss": 1.8512, + "step": 11625 + }, + { + "epoch": 0.14, + "grad_norm": 21.05208606854255, + "learning_rate": 1.9916576639803182e-05, + "loss": 1.9875, + "step": 11628 + }, + { + "epoch": 0.14, + "grad_norm": 68.47791674597546, + "learning_rate": 1.9916501322633573e-05, + "loss": 1.7069, + "step": 11631 + }, + { + "epoch": 0.14, + "grad_norm": 28.91883391054053, + "learning_rate": 1.9916425971622515e-05, + "loss": 1.4631, + "step": 11634 + }, + { + "epoch": 0.14, + "grad_norm": 115.0459549541005, + "learning_rate": 1.9916350586770268e-05, + "loss": 1.4518, + "step": 11637 + }, + { + "epoch": 0.14, + "grad_norm": 36.37244577489098, + "learning_rate": 1.991627516807709e-05, + "loss": 2.0075, + "step": 11640 + }, + { + "epoch": 0.14, + "grad_norm": 30.487743913314393, + "learning_rate": 1.9916199715543233e-05, + "loss": 1.659, + "step": 11643 + }, + { + "epoch": 0.14, + "grad_norm": 10.164344068839938, + "learning_rate": 1.9916124229168958e-05, + "loss": 1.5777, + "step": 11646 + }, + { + "epoch": 0.14, + "grad_norm": 34.25158169521841, + "learning_rate": 1.9916048708954524e-05, + "loss": 1.5466, + "step": 11649 + }, + { + "epoch": 0.14, + "grad_norm": 7.030664270149949, + "learning_rate": 1.991597315490019e-05, + "loss": 1.5785, + "step": 11652 + }, + { + "epoch": 0.14, + "grad_norm": 16.138341052238278, + "learning_rate": 1.991589756700621e-05, + "loss": 1.6593, + "step": 11655 + }, + { + "epoch": 0.14, + "grad_norm": 26.873889295917053, + "learning_rate": 1.9915821945272846e-05, + "loss": 1.6726, + "step": 11658 + }, + { + "epoch": 0.14, + "grad_norm": 15.94047806482451, + "learning_rate": 1.9915746289700347e-05, + "loss": 1.9039, + "step": 11661 + }, + { + "epoch": 0.14, + "grad_norm": 20.287121077092337, + "learning_rate": 1.991567060028898e-05, + "loss": 1.8785, + "step": 11664 + }, + { + "epoch": 0.14, + "grad_norm": 5.87276007004489, + "learning_rate": 1.9915594877039e-05, + "loss": 1.7572, + "step": 11667 + }, + { + "epoch": 0.14, + "grad_norm": 9.365852015229075, + "learning_rate": 1.991551911995067e-05, + "loss": 1.3524, + "step": 11670 + }, + { + "epoch": 0.14, + "grad_norm": 28.39419329927932, + "learning_rate": 1.9915443329024243e-05, + "loss": 1.5681, + "step": 11673 + }, + { + "epoch": 0.14, + "grad_norm": 11.371653341260691, + "learning_rate": 1.991536750425998e-05, + "loss": 1.4498, + "step": 11676 + }, + { + "epoch": 0.14, + "grad_norm": 8.502388136550852, + "learning_rate": 1.991529164565814e-05, + "loss": 1.6121, + "step": 11679 + }, + { + "epoch": 0.14, + "grad_norm": 78.44236709516403, + "learning_rate": 1.9915215753218976e-05, + "loss": 1.6876, + "step": 11682 + }, + { + "epoch": 0.14, + "grad_norm": 4.616165719715353, + "learning_rate": 1.9915139826942757e-05, + "loss": 1.9045, + "step": 11685 + }, + { + "epoch": 0.14, + "grad_norm": 8.29962477954028, + "learning_rate": 1.9915063866829734e-05, + "loss": 1.5208, + "step": 11688 + }, + { + "epoch": 0.14, + "grad_norm": 19.631452702834242, + "learning_rate": 1.991498787288017e-05, + "loss": 1.5737, + "step": 11691 + }, + { + "epoch": 0.14, + "grad_norm": 55.74823735528284, + "learning_rate": 1.9914911845094327e-05, + "loss": 1.7567, + "step": 11694 + }, + { + "epoch": 0.14, + "grad_norm": 22.158308616408327, + "learning_rate": 1.991483578347246e-05, + "loss": 1.619, + "step": 11697 + }, + { + "epoch": 0.14, + "grad_norm": 10.886819907010885, + "learning_rate": 1.9914759688014826e-05, + "loss": 1.7689, + "step": 11700 + }, + { + "epoch": 0.14, + "grad_norm": 9.04846549931652, + "learning_rate": 1.9914683558721693e-05, + "loss": 1.6251, + "step": 11703 + }, + { + "epoch": 0.14, + "grad_norm": 20.255835676409507, + "learning_rate": 1.9914607395593314e-05, + "loss": 1.514, + "step": 11706 + }, + { + "epoch": 0.14, + "grad_norm": 6.1636803747087745, + "learning_rate": 1.9914531198629953e-05, + "loss": 1.6077, + "step": 11709 + }, + { + "epoch": 0.14, + "grad_norm": 2.7728311673651866, + "learning_rate": 1.9914454967831868e-05, + "loss": 1.8349, + "step": 11712 + }, + { + "epoch": 0.14, + "grad_norm": 13.564485260925371, + "learning_rate": 1.9914378703199316e-05, + "loss": 1.4974, + "step": 11715 + }, + { + "epoch": 0.14, + "grad_norm": 10.582327182546166, + "learning_rate": 1.9914302404732565e-05, + "loss": 1.2387, + "step": 11718 + }, + { + "epoch": 0.14, + "grad_norm": 67.16090470074953, + "learning_rate": 1.991422607243187e-05, + "loss": 1.6085, + "step": 11721 + }, + { + "epoch": 0.14, + "grad_norm": 9.383267634554084, + "learning_rate": 1.9914149706297488e-05, + "loss": 1.2328, + "step": 11724 + }, + { + "epoch": 0.14, + "grad_norm": 33.37625989247337, + "learning_rate": 1.9914073306329688e-05, + "loss": 1.8005, + "step": 11727 + }, + { + "epoch": 0.14, + "grad_norm": 3.456174456107047, + "learning_rate": 1.9913996872528726e-05, + "loss": 1.4857, + "step": 11730 + }, + { + "epoch": 0.14, + "grad_norm": 16.877024404577906, + "learning_rate": 1.9913920404894865e-05, + "loss": 1.3463, + "step": 11733 + }, + { + "epoch": 0.14, + "grad_norm": 20.29342570448513, + "learning_rate": 1.9913843903428364e-05, + "loss": 1.7782, + "step": 11736 + }, + { + "epoch": 0.14, + "grad_norm": 26.11033178437731, + "learning_rate": 1.9913767368129483e-05, + "loss": 1.2746, + "step": 11739 + }, + { + "epoch": 0.14, + "grad_norm": 11.04538468104537, + "learning_rate": 1.991369079899849e-05, + "loss": 1.4768, + "step": 11742 + }, + { + "epoch": 0.14, + "grad_norm": 4.6220752714124735, + "learning_rate": 1.9913614196035635e-05, + "loss": 1.3892, + "step": 11745 + }, + { + "epoch": 0.14, + "grad_norm": 33.47895795564721, + "learning_rate": 1.9913537559241187e-05, + "loss": 1.9091, + "step": 11748 + }, + { + "epoch": 0.14, + "grad_norm": 33.20267535311689, + "learning_rate": 1.991346088861541e-05, + "loss": 1.5701, + "step": 11751 + }, + { + "epoch": 0.14, + "grad_norm": 23.250691027854977, + "learning_rate": 1.9913384184158557e-05, + "loss": 1.5395, + "step": 11754 + }, + { + "epoch": 0.14, + "grad_norm": 9.253099557889067, + "learning_rate": 1.9913307445870897e-05, + "loss": 1.6077, + "step": 11757 + }, + { + "epoch": 0.14, + "grad_norm": 42.33094391196995, + "learning_rate": 1.991323067375269e-05, + "loss": 1.6567, + "step": 11760 + }, + { + "epoch": 0.14, + "grad_norm": 66.1593719690545, + "learning_rate": 1.9913153867804197e-05, + "loss": 1.3662, + "step": 11763 + }, + { + "epoch": 0.14, + "grad_norm": 23.972515059329556, + "learning_rate": 1.991307702802568e-05, + "loss": 1.8831, + "step": 11766 + }, + { + "epoch": 0.14, + "grad_norm": 29.150090342087072, + "learning_rate": 1.99130001544174e-05, + "loss": 1.8107, + "step": 11769 + }, + { + "epoch": 0.14, + "grad_norm": 6.085768340410664, + "learning_rate": 1.9912923246979625e-05, + "loss": 1.546, + "step": 11772 + }, + { + "epoch": 0.14, + "grad_norm": 6.275834004249192, + "learning_rate": 1.991284630571261e-05, + "loss": 1.5812, + "step": 11775 + }, + { + "epoch": 0.14, + "grad_norm": 12.340518683070616, + "learning_rate": 1.9912769330616623e-05, + "loss": 1.7276, + "step": 11778 + }, + { + "epoch": 0.14, + "grad_norm": 5.336184876199316, + "learning_rate": 1.9912692321691924e-05, + "loss": 1.5205, + "step": 11781 + }, + { + "epoch": 0.14, + "grad_norm": 43.2386083082936, + "learning_rate": 1.991261527893878e-05, + "loss": 1.5596, + "step": 11784 + }, + { + "epoch": 0.14, + "grad_norm": 34.21019724452396, + "learning_rate": 1.991253820235745e-05, + "loss": 1.6985, + "step": 11787 + }, + { + "epoch": 0.14, + "grad_norm": 17.369285841414495, + "learning_rate": 1.991246109194819e-05, + "loss": 1.553, + "step": 11790 + }, + { + "epoch": 0.14, + "grad_norm": 28.538581686393336, + "learning_rate": 1.991238394771128e-05, + "loss": 1.4099, + "step": 11793 + }, + { + "epoch": 0.14, + "grad_norm": 12.856030982338277, + "learning_rate": 1.991230676964697e-05, + "loss": 1.8488, + "step": 11796 + }, + { + "epoch": 0.14, + "grad_norm": 14.524817591230448, + "learning_rate": 1.9912229557755527e-05, + "loss": 1.4609, + "step": 11799 + }, + { + "epoch": 0.14, + "grad_norm": 10.825172572485565, + "learning_rate": 1.9912152312037215e-05, + "loss": 1.1977, + "step": 11802 + }, + { + "epoch": 0.14, + "grad_norm": 4.354557925410136, + "learning_rate": 1.9912075032492296e-05, + "loss": 1.5916, + "step": 11805 + }, + { + "epoch": 0.14, + "grad_norm": 67.0469349819637, + "learning_rate": 1.9911997719121035e-05, + "loss": 1.4486, + "step": 11808 + }, + { + "epoch": 0.14, + "grad_norm": 12.566483235856495, + "learning_rate": 1.99119203719237e-05, + "loss": 1.544, + "step": 11811 + }, + { + "epoch": 0.14, + "grad_norm": 28.30201671803657, + "learning_rate": 1.991184299090055e-05, + "loss": 2.008, + "step": 11814 + }, + { + "epoch": 0.14, + "grad_norm": 4.9080006527414985, + "learning_rate": 1.991176557605185e-05, + "loss": 1.7823, + "step": 11817 + }, + { + "epoch": 0.14, + "grad_norm": 4.320165192929707, + "learning_rate": 1.9911688127377863e-05, + "loss": 1.5825, + "step": 11820 + }, + { + "epoch": 0.14, + "grad_norm": 170.05496468407878, + "learning_rate": 1.9911610644878853e-05, + "loss": 1.5155, + "step": 11823 + }, + { + "epoch": 0.14, + "grad_norm": 72.13374034552369, + "learning_rate": 1.991153312855509e-05, + "loss": 1.8317, + "step": 11826 + }, + { + "epoch": 0.14, + "grad_norm": 15.421843097086498, + "learning_rate": 1.9911455578406834e-05, + "loss": 1.7856, + "step": 11829 + }, + { + "epoch": 0.14, + "grad_norm": 18.490117645074008, + "learning_rate": 1.991137799443435e-05, + "loss": 1.7546, + "step": 11832 + }, + { + "epoch": 0.14, + "grad_norm": 19.924951050513435, + "learning_rate": 1.99113003766379e-05, + "loss": 1.6976, + "step": 11835 + }, + { + "epoch": 0.14, + "grad_norm": 12.29481005824999, + "learning_rate": 1.9911222725017753e-05, + "loss": 1.8432, + "step": 11838 + }, + { + "epoch": 0.14, + "grad_norm": 14.3926262549212, + "learning_rate": 1.9911145039574175e-05, + "loss": 1.5955, + "step": 11841 + }, + { + "epoch": 0.14, + "grad_norm": 13.516127246253895, + "learning_rate": 1.9911067320307426e-05, + "loss": 1.6414, + "step": 11844 + }, + { + "epoch": 0.14, + "grad_norm": 72.28503665118225, + "learning_rate": 1.9910989567217774e-05, + "loss": 1.8395, + "step": 11847 + }, + { + "epoch": 0.14, + "grad_norm": 23.487687537208746, + "learning_rate": 1.991091178030549e-05, + "loss": 1.7036, + "step": 11850 + }, + { + "epoch": 0.14, + "grad_norm": 17.51469318348185, + "learning_rate": 1.991083395957083e-05, + "loss": 1.5824, + "step": 11853 + }, + { + "epoch": 0.14, + "grad_norm": 4.324882709453209, + "learning_rate": 1.9910756105014063e-05, + "loss": 1.5924, + "step": 11856 + }, + { + "epoch": 0.14, + "grad_norm": 4.005727424503577, + "learning_rate": 1.9910678216635458e-05, + "loss": 1.3465, + "step": 11859 + }, + { + "epoch": 0.14, + "grad_norm": 16.623597441765824, + "learning_rate": 1.9910600294435275e-05, + "loss": 1.9667, + "step": 11862 + }, + { + "epoch": 0.14, + "grad_norm": 16.41042428441442, + "learning_rate": 1.991052233841378e-05, + "loss": 1.3319, + "step": 11865 + }, + { + "epoch": 0.14, + "grad_norm": 59.95862900119669, + "learning_rate": 1.991044434857125e-05, + "loss": 1.6476, + "step": 11868 + }, + { + "epoch": 0.14, + "grad_norm": 32.2547056348746, + "learning_rate": 1.991036632490794e-05, + "loss": 1.8947, + "step": 11871 + }, + { + "epoch": 0.14, + "grad_norm": 55.039181706350064, + "learning_rate": 1.991028826742412e-05, + "loss": 1.7729, + "step": 11874 + }, + { + "epoch": 0.14, + "grad_norm": 4.3687082679358085, + "learning_rate": 1.9910210176120053e-05, + "loss": 1.6496, + "step": 11877 + }, + { + "epoch": 0.14, + "grad_norm": 5.126013216862582, + "learning_rate": 1.991013205099601e-05, + "loss": 1.2343, + "step": 11880 + }, + { + "epoch": 0.14, + "grad_norm": 3.886942149627033, + "learning_rate": 1.9910053892052256e-05, + "loss": 1.3872, + "step": 11883 + }, + { + "epoch": 0.14, + "grad_norm": 19.45251525189066, + "learning_rate": 1.990997569928906e-05, + "loss": 1.7397, + "step": 11886 + }, + { + "epoch": 0.14, + "grad_norm": 45.25309684673655, + "learning_rate": 1.990989747270668e-05, + "loss": 1.4465, + "step": 11889 + }, + { + "epoch": 0.14, + "grad_norm": 11.93933857645982, + "learning_rate": 1.9909819212305395e-05, + "loss": 1.6389, + "step": 11892 + }, + { + "epoch": 0.14, + "grad_norm": 43.97087406577953, + "learning_rate": 1.9909740918085462e-05, + "loss": 1.7104, + "step": 11895 + }, + { + "epoch": 0.14, + "grad_norm": 36.24276560189139, + "learning_rate": 1.990966259004716e-05, + "loss": 1.5496, + "step": 11898 + }, + { + "epoch": 0.14, + "grad_norm": 73.0074932093826, + "learning_rate": 1.990958422819074e-05, + "loss": 1.7922, + "step": 11901 + }, + { + "epoch": 0.14, + "grad_norm": 11.997776630881411, + "learning_rate": 1.9909505832516486e-05, + "loss": 1.9843, + "step": 11904 + }, + { + "epoch": 0.14, + "grad_norm": 36.84083513595621, + "learning_rate": 1.9909427403024656e-05, + "loss": 1.4989, + "step": 11907 + }, + { + "epoch": 0.14, + "grad_norm": 40.38351772233109, + "learning_rate": 1.9909348939715517e-05, + "loss": 1.4771, + "step": 11910 + }, + { + "epoch": 0.14, + "grad_norm": 15.667299677870977, + "learning_rate": 1.9909270442589342e-05, + "loss": 1.8306, + "step": 11913 + }, + { + "epoch": 0.14, + "grad_norm": 35.66080732974609, + "learning_rate": 1.9909191911646394e-05, + "loss": 1.9521, + "step": 11916 + }, + { + "epoch": 0.14, + "grad_norm": 26.533318195162305, + "learning_rate": 1.9909113346886946e-05, + "loss": 1.686, + "step": 11919 + }, + { + "epoch": 0.14, + "grad_norm": 24.055038781005297, + "learning_rate": 1.9909034748311262e-05, + "loss": 1.6175, + "step": 11922 + }, + { + "epoch": 0.14, + "grad_norm": 11.600183633377233, + "learning_rate": 1.9908956115919615e-05, + "loss": 1.5204, + "step": 11925 + }, + { + "epoch": 0.14, + "grad_norm": 40.82564108292185, + "learning_rate": 1.9908877449712264e-05, + "loss": 1.6048, + "step": 11928 + }, + { + "epoch": 0.14, + "grad_norm": 24.714090931578813, + "learning_rate": 1.9908798749689488e-05, + "loss": 1.7603, + "step": 11931 + }, + { + "epoch": 0.14, + "grad_norm": 16.941400633290186, + "learning_rate": 1.9908720015851547e-05, + "loss": 1.7269, + "step": 11934 + }, + { + "epoch": 0.14, + "grad_norm": 29.053341850818246, + "learning_rate": 1.9908641248198716e-05, + "loss": 1.5462, + "step": 11937 + }, + { + "epoch": 0.14, + "grad_norm": 7.568102322012681, + "learning_rate": 1.9908562446731262e-05, + "loss": 1.8067, + "step": 11940 + }, + { + "epoch": 0.14, + "grad_norm": 18.770512981110485, + "learning_rate": 1.990848361144945e-05, + "loss": 1.8566, + "step": 11943 + }, + { + "epoch": 0.14, + "grad_norm": 4.563621314450061, + "learning_rate": 1.9908404742353554e-05, + "loss": 2.0083, + "step": 11946 + }, + { + "epoch": 0.14, + "grad_norm": 29.900535670321393, + "learning_rate": 1.9908325839443846e-05, + "loss": 1.5035, + "step": 11949 + }, + { + "epoch": 0.14, + "grad_norm": 7.886411938095723, + "learning_rate": 1.9908246902720586e-05, + "loss": 1.2131, + "step": 11952 + }, + { + "epoch": 0.14, + "grad_norm": 28.977505336845475, + "learning_rate": 1.990816793218405e-05, + "loss": 2.1415, + "step": 11955 + }, + { + "epoch": 0.14, + "grad_norm": 11.036393897256229, + "learning_rate": 1.9908088927834507e-05, + "loss": 1.6636, + "step": 11958 + }, + { + "epoch": 0.14, + "grad_norm": 22.08340979269473, + "learning_rate": 1.990800988967222e-05, + "loss": 1.4127, + "step": 11961 + }, + { + "epoch": 0.14, + "grad_norm": 19.844386937183376, + "learning_rate": 1.990793081769747e-05, + "loss": 1.8679, + "step": 11964 + }, + { + "epoch": 0.14, + "grad_norm": 9.573358755887533, + "learning_rate": 1.990785171191052e-05, + "loss": 1.7712, + "step": 11967 + }, + { + "epoch": 0.14, + "grad_norm": 14.313664837841202, + "learning_rate": 1.990777257231164e-05, + "loss": 1.7737, + "step": 11970 + }, + { + "epoch": 0.14, + "grad_norm": 8.537228893358858, + "learning_rate": 1.9907693398901103e-05, + "loss": 1.6136, + "step": 11973 + }, + { + "epoch": 0.14, + "grad_norm": 4.840862749567919, + "learning_rate": 1.9907614191679176e-05, + "loss": 1.7118, + "step": 11976 + }, + { + "epoch": 0.14, + "grad_norm": 31.93940013188073, + "learning_rate": 1.990753495064613e-05, + "loss": 1.8763, + "step": 11979 + }, + { + "epoch": 0.14, + "grad_norm": 12.386579997248003, + "learning_rate": 1.990745567580223e-05, + "loss": 1.721, + "step": 11982 + }, + { + "epoch": 0.14, + "grad_norm": 9.00976881155943, + "learning_rate": 1.990737636714776e-05, + "loss": 1.8037, + "step": 11985 + }, + { + "epoch": 0.14, + "grad_norm": 12.9122134940948, + "learning_rate": 1.990729702468298e-05, + "loss": 2.0098, + "step": 11988 + }, + { + "epoch": 0.14, + "grad_norm": 13.570740137515921, + "learning_rate": 1.9907217648408164e-05, + "loss": 1.5327, + "step": 11991 + }, + { + "epoch": 0.14, + "grad_norm": 8.411205070737928, + "learning_rate": 1.9907138238323584e-05, + "loss": 1.8491, + "step": 11994 + }, + { + "epoch": 0.14, + "grad_norm": 46.380205259681134, + "learning_rate": 1.9907058794429514e-05, + "loss": 1.8339, + "step": 11997 + }, + { + "epoch": 0.14, + "grad_norm": 8.846944914168914, + "learning_rate": 1.9906979316726212e-05, + "loss": 1.9149, + "step": 12000 + }, + { + "epoch": 0.14, + "grad_norm": 4.62235659708527, + "learning_rate": 1.9906899805213963e-05, + "loss": 1.588, + "step": 12003 + }, + { + "epoch": 0.14, + "grad_norm": 9.919759781890049, + "learning_rate": 1.9906820259893034e-05, + "loss": 1.6955, + "step": 12006 + }, + { + "epoch": 0.14, + "grad_norm": 57.579786076877234, + "learning_rate": 1.9906740680763693e-05, + "loss": 1.5826, + "step": 12009 + }, + { + "epoch": 0.14, + "grad_norm": 11.388190415753822, + "learning_rate": 1.9906661067826218e-05, + "loss": 2.1066, + "step": 12012 + }, + { + "epoch": 0.14, + "grad_norm": 35.351851549794205, + "learning_rate": 1.9906581421080873e-05, + "loss": 1.6406, + "step": 12015 + }, + { + "epoch": 0.14, + "grad_norm": 88.01529508683394, + "learning_rate": 1.990650174052794e-05, + "loss": 1.9038, + "step": 12018 + }, + { + "epoch": 0.14, + "grad_norm": 21.89625316459227, + "learning_rate": 1.9906422026167677e-05, + "loss": 1.6431, + "step": 12021 + }, + { + "epoch": 0.14, + "grad_norm": 14.439691422092123, + "learning_rate": 1.9906342278000368e-05, + "loss": 1.3784, + "step": 12024 + }, + { + "epoch": 0.14, + "grad_norm": 4.147120079072539, + "learning_rate": 1.990626249602628e-05, + "loss": 1.5713, + "step": 12027 + }, + { + "epoch": 0.14, + "grad_norm": 7.654672461820456, + "learning_rate": 1.990618268024569e-05, + "loss": 1.7542, + "step": 12030 + }, + { + "epoch": 0.14, + "grad_norm": 182.13766495930182, + "learning_rate": 1.9906102830658863e-05, + "loss": 1.7378, + "step": 12033 + }, + { + "epoch": 0.14, + "grad_norm": 3.3294687318052683, + "learning_rate": 1.9906022947266075e-05, + "loss": 1.5035, + "step": 12036 + }, + { + "epoch": 0.14, + "grad_norm": 26.538175681527164, + "learning_rate": 1.99059430300676e-05, + "loss": 1.8934, + "step": 12039 + }, + { + "epoch": 0.14, + "grad_norm": 18.005311045221653, + "learning_rate": 1.9905863079063706e-05, + "loss": 1.4316, + "step": 12042 + }, + { + "epoch": 0.14, + "grad_norm": 7.80855393259804, + "learning_rate": 1.9905783094254674e-05, + "loss": 1.5006, + "step": 12045 + }, + { + "epoch": 0.14, + "grad_norm": 13.783903494597892, + "learning_rate": 1.9905703075640774e-05, + "loss": 1.5876, + "step": 12048 + }, + { + "epoch": 0.14, + "grad_norm": 13.600679651229312, + "learning_rate": 1.990562302322227e-05, + "loss": 1.726, + "step": 12051 + }, + { + "epoch": 0.14, + "grad_norm": 13.530334046220045, + "learning_rate": 1.990554293699945e-05, + "loss": 1.5873, + "step": 12054 + }, + { + "epoch": 0.14, + "grad_norm": 45.442604228238544, + "learning_rate": 1.990546281697258e-05, + "loss": 1.6627, + "step": 12057 + }, + { + "epoch": 0.15, + "grad_norm": 28.019381244497257, + "learning_rate": 1.9905382663141927e-05, + "loss": 1.6421, + "step": 12060 + }, + { + "epoch": 0.15, + "grad_norm": 11.486157000615865, + "learning_rate": 1.9905302475507776e-05, + "loss": 1.6919, + "step": 12063 + }, + { + "epoch": 0.15, + "grad_norm": 48.1370980874202, + "learning_rate": 1.9905222254070393e-05, + "loss": 1.4693, + "step": 12066 + }, + { + "epoch": 0.15, + "grad_norm": 38.203221846752, + "learning_rate": 1.9905141998830056e-05, + "loss": 1.5879, + "step": 12069 + }, + { + "epoch": 0.15, + "grad_norm": 21.38108144318849, + "learning_rate": 1.9905061709787033e-05, + "loss": 1.9719, + "step": 12072 + }, + { + "epoch": 0.15, + "grad_norm": 29.66704680343366, + "learning_rate": 1.990498138694161e-05, + "loss": 1.5832, + "step": 12075 + }, + { + "epoch": 0.15, + "grad_norm": 9.762810098098026, + "learning_rate": 1.9904901030294044e-05, + "loss": 1.5336, + "step": 12078 + }, + { + "epoch": 0.15, + "grad_norm": 12.935334157984133, + "learning_rate": 1.9904820639844624e-05, + "loss": 1.3706, + "step": 12081 + }, + { + "epoch": 0.15, + "grad_norm": 3.5495680792889934, + "learning_rate": 1.9904740215593618e-05, + "loss": 2.041, + "step": 12084 + }, + { + "epoch": 0.15, + "grad_norm": 46.31914774032961, + "learning_rate": 1.99046597575413e-05, + "loss": 1.7663, + "step": 12087 + }, + { + "epoch": 0.15, + "grad_norm": 57.07575284963786, + "learning_rate": 1.990457926568795e-05, + "loss": 1.5293, + "step": 12090 + }, + { + "epoch": 0.15, + "grad_norm": 47.66058947183795, + "learning_rate": 1.9904498740033835e-05, + "loss": 1.5942, + "step": 12093 + }, + { + "epoch": 0.15, + "grad_norm": 173.4378126329123, + "learning_rate": 1.9904418180579233e-05, + "loss": 1.3039, + "step": 12096 + }, + { + "epoch": 0.15, + "grad_norm": 11.99443381656743, + "learning_rate": 1.9904337587324422e-05, + "loss": 1.8334, + "step": 12099 + }, + { + "epoch": 0.15, + "grad_norm": 4.764409319829419, + "learning_rate": 1.9904256960269673e-05, + "loss": 1.375, + "step": 12102 + }, + { + "epoch": 0.15, + "grad_norm": 10.20259648396846, + "learning_rate": 1.9904176299415263e-05, + "loss": 1.6754, + "step": 12105 + }, + { + "epoch": 0.15, + "grad_norm": 21.642277029965246, + "learning_rate": 1.9904095604761468e-05, + "loss": 1.694, + "step": 12108 + }, + { + "epoch": 0.15, + "grad_norm": 22.358342715492785, + "learning_rate": 1.9904014876308557e-05, + "loss": 1.5794, + "step": 12111 + }, + { + "epoch": 0.15, + "grad_norm": 20.611632369385962, + "learning_rate": 1.9903934114056816e-05, + "loss": 1.8585, + "step": 12114 + }, + { + "epoch": 0.15, + "grad_norm": 4.749630840466276, + "learning_rate": 1.9903853318006515e-05, + "loss": 1.9626, + "step": 12117 + }, + { + "epoch": 0.15, + "grad_norm": 9.59266554158585, + "learning_rate": 1.9903772488157926e-05, + "loss": 1.5251, + "step": 12120 + }, + { + "epoch": 0.15, + "grad_norm": 14.054723441191818, + "learning_rate": 1.990369162451133e-05, + "loss": 1.5544, + "step": 12123 + }, + { + "epoch": 0.15, + "grad_norm": 6.920142259662766, + "learning_rate": 1.9903610727067004e-05, + "loss": 1.4644, + "step": 12126 + }, + { + "epoch": 0.15, + "grad_norm": 11.66944463182728, + "learning_rate": 1.990352979582522e-05, + "loss": 1.902, + "step": 12129 + }, + { + "epoch": 0.15, + "grad_norm": 15.095677536553715, + "learning_rate": 1.9903448830786257e-05, + "loss": 1.6791, + "step": 12132 + }, + { + "epoch": 0.15, + "grad_norm": 11.130526340035784, + "learning_rate": 1.990336783195039e-05, + "loss": 1.5626, + "step": 12135 + }, + { + "epoch": 0.15, + "grad_norm": 55.46251891424232, + "learning_rate": 1.9903286799317895e-05, + "loss": 1.8878, + "step": 12138 + }, + { + "epoch": 0.15, + "grad_norm": 19.403777756405304, + "learning_rate": 1.990320573288905e-05, + "loss": 1.9416, + "step": 12141 + }, + { + "epoch": 0.15, + "grad_norm": 11.891082565724723, + "learning_rate": 1.9903124632664133e-05, + "loss": 1.7129, + "step": 12144 + }, + { + "epoch": 0.15, + "grad_norm": 13.253422280564655, + "learning_rate": 1.9903043498643414e-05, + "loss": 1.6491, + "step": 12147 + }, + { + "epoch": 0.15, + "grad_norm": 8.924805982035496, + "learning_rate": 1.990296233082718e-05, + "loss": 2.0775, + "step": 12150 + }, + { + "epoch": 0.15, + "grad_norm": 10.352396167684194, + "learning_rate": 1.99028811292157e-05, + "loss": 1.4045, + "step": 12153 + }, + { + "epoch": 0.15, + "grad_norm": 58.21162694890079, + "learning_rate": 1.9902799893809253e-05, + "loss": 1.8305, + "step": 12156 + }, + { + "epoch": 0.15, + "grad_norm": 147.68398533233832, + "learning_rate": 1.9902718624608117e-05, + "loss": 1.9491, + "step": 12159 + }, + { + "epoch": 0.15, + "grad_norm": 13.775317668593294, + "learning_rate": 1.990263732161257e-05, + "loss": 1.589, + "step": 12162 + }, + { + "epoch": 0.15, + "grad_norm": 7.9388100768210865, + "learning_rate": 1.990255598482289e-05, + "loss": 1.6346, + "step": 12165 + }, + { + "epoch": 0.15, + "grad_norm": 11.834944535622128, + "learning_rate": 1.990247461423935e-05, + "loss": 1.9313, + "step": 12168 + }, + { + "epoch": 0.15, + "grad_norm": 27.64115360504241, + "learning_rate": 1.9902393209862233e-05, + "loss": 1.9916, + "step": 12171 + }, + { + "epoch": 0.15, + "grad_norm": 53.59910155841231, + "learning_rate": 1.9902311771691814e-05, + "loss": 1.7289, + "step": 12174 + }, + { + "epoch": 0.15, + "grad_norm": 27.741636521405994, + "learning_rate": 1.9902230299728374e-05, + "loss": 1.776, + "step": 12177 + }, + { + "epoch": 0.15, + "grad_norm": 15.336379359006536, + "learning_rate": 1.9902148793972186e-05, + "loss": 1.6056, + "step": 12180 + }, + { + "epoch": 0.15, + "grad_norm": 25.938061122227655, + "learning_rate": 1.990206725442353e-05, + "loss": 1.4797, + "step": 12183 + }, + { + "epoch": 0.15, + "grad_norm": 28.248079033755598, + "learning_rate": 1.990198568108269e-05, + "loss": 1.661, + "step": 12186 + }, + { + "epoch": 0.15, + "grad_norm": 7.609796726335526, + "learning_rate": 1.9901904073949935e-05, + "loss": 1.8776, + "step": 12189 + }, + { + "epoch": 0.15, + "grad_norm": 35.87254628055684, + "learning_rate": 1.990182243302555e-05, + "loss": 1.5186, + "step": 12192 + }, + { + "epoch": 0.15, + "grad_norm": 16.960923840022712, + "learning_rate": 1.9901740758309813e-05, + "loss": 1.9654, + "step": 12195 + }, + { + "epoch": 0.15, + "grad_norm": 7.13383522102447, + "learning_rate": 1.9901659049803e-05, + "loss": 1.4025, + "step": 12198 + }, + { + "epoch": 0.15, + "grad_norm": 14.930817592583555, + "learning_rate": 1.990157730750539e-05, + "loss": 1.7397, + "step": 12201 + }, + { + "epoch": 0.15, + "grad_norm": 84.99498510494152, + "learning_rate": 1.9901495531417265e-05, + "loss": 1.4941, + "step": 12204 + }, + { + "epoch": 0.15, + "grad_norm": 41.63430822225172, + "learning_rate": 1.99014137215389e-05, + "loss": 1.5994, + "step": 12207 + }, + { + "epoch": 0.15, + "grad_norm": 32.44344387503518, + "learning_rate": 1.990133187787058e-05, + "loss": 1.6661, + "step": 12210 + }, + { + "epoch": 0.15, + "grad_norm": 21.26022711919686, + "learning_rate": 1.9901250000412578e-05, + "loss": 1.777, + "step": 12213 + }, + { + "epoch": 0.15, + "grad_norm": 23.790205142158342, + "learning_rate": 1.9901168089165177e-05, + "loss": 1.478, + "step": 12216 + }, + { + "epoch": 0.15, + "grad_norm": 31.61907056435828, + "learning_rate": 1.9901086144128653e-05, + "loss": 1.3923, + "step": 12219 + }, + { + "epoch": 0.15, + "grad_norm": 4.1866606942903255, + "learning_rate": 1.9901004165303293e-05, + "loss": 1.5632, + "step": 12222 + }, + { + "epoch": 0.15, + "grad_norm": 12.10528859887597, + "learning_rate": 1.990092215268937e-05, + "loss": 1.4574, + "step": 12225 + }, + { + "epoch": 0.15, + "grad_norm": 22.61920336438047, + "learning_rate": 1.990084010628717e-05, + "loss": 1.5292, + "step": 12228 + }, + { + "epoch": 0.15, + "grad_norm": 24.672004137444766, + "learning_rate": 1.9900758026096963e-05, + "loss": 1.6442, + "step": 12231 + }, + { + "epoch": 0.15, + "grad_norm": 6.79026874805277, + "learning_rate": 1.9900675912119032e-05, + "loss": 2.1015, + "step": 12234 + }, + { + "epoch": 0.15, + "grad_norm": 47.48601019499983, + "learning_rate": 1.990059376435367e-05, + "loss": 1.6139, + "step": 12237 + }, + { + "epoch": 0.15, + "grad_norm": 14.86000005715405, + "learning_rate": 1.9900511582801142e-05, + "loss": 1.8148, + "step": 12240 + }, + { + "epoch": 0.15, + "grad_norm": 55.41008575775941, + "learning_rate": 1.9900429367461737e-05, + "loss": 1.5084, + "step": 12243 + }, + { + "epoch": 0.15, + "grad_norm": 33.013452589922245, + "learning_rate": 1.990034711833573e-05, + "loss": 1.7732, + "step": 12246 + }, + { + "epoch": 0.15, + "grad_norm": 23.259962516069564, + "learning_rate": 1.9900264835423403e-05, + "loss": 1.4781, + "step": 12249 + }, + { + "epoch": 0.15, + "grad_norm": 19.985979888143188, + "learning_rate": 1.990018251872504e-05, + "loss": 1.8044, + "step": 12252 + }, + { + "epoch": 0.15, + "grad_norm": 44.07551977947327, + "learning_rate": 1.9900100168240922e-05, + "loss": 1.7585, + "step": 12255 + }, + { + "epoch": 0.15, + "grad_norm": 6.015506355719682, + "learning_rate": 1.9900017783971326e-05, + "loss": 2.013, + "step": 12258 + }, + { + "epoch": 0.15, + "grad_norm": 8.684451282331985, + "learning_rate": 1.9899935365916537e-05, + "loss": 1.5671, + "step": 12261 + }, + { + "epoch": 0.15, + "grad_norm": 7.4864945611691445, + "learning_rate": 1.9899852914076833e-05, + "loss": 1.648, + "step": 12264 + }, + { + "epoch": 0.15, + "grad_norm": 18.37621988753761, + "learning_rate": 1.9899770428452497e-05, + "loss": 1.8455, + "step": 12267 + }, + { + "epoch": 0.15, + "grad_norm": 7.4231481377665185, + "learning_rate": 1.989968790904381e-05, + "loss": 1.3566, + "step": 12270 + }, + { + "epoch": 0.15, + "grad_norm": 16.70000713345763, + "learning_rate": 1.9899605355851056e-05, + "loss": 1.2683, + "step": 12273 + }, + { + "epoch": 0.15, + "grad_norm": 5.734460509738135, + "learning_rate": 1.9899522768874515e-05, + "loss": 1.9339, + "step": 12276 + }, + { + "epoch": 0.15, + "grad_norm": 11.892506572334488, + "learning_rate": 1.9899440148114465e-05, + "loss": 1.9109, + "step": 12279 + }, + { + "epoch": 0.15, + "grad_norm": 46.16693382149323, + "learning_rate": 1.9899357493571194e-05, + "loss": 1.741, + "step": 12282 + }, + { + "epoch": 0.15, + "grad_norm": 9.628948449929473, + "learning_rate": 1.989927480524498e-05, + "loss": 1.3747, + "step": 12285 + }, + { + "epoch": 0.15, + "grad_norm": 10.850088300337395, + "learning_rate": 1.989919208313611e-05, + "loss": 1.6544, + "step": 12288 + }, + { + "epoch": 0.15, + "grad_norm": 32.764953011601584, + "learning_rate": 1.9899109327244862e-05, + "loss": 1.5045, + "step": 12291 + }, + { + "epoch": 0.15, + "grad_norm": 4.0112422577642075, + "learning_rate": 1.9899026537571518e-05, + "loss": 1.7275, + "step": 12294 + }, + { + "epoch": 0.15, + "grad_norm": 17.36879506562286, + "learning_rate": 1.9898943714116362e-05, + "loss": 1.8208, + "step": 12297 + }, + { + "epoch": 0.15, + "grad_norm": 10.567305642405154, + "learning_rate": 1.989886085687968e-05, + "loss": 1.2036, + "step": 12300 + }, + { + "epoch": 0.15, + "grad_norm": 17.397039236844268, + "learning_rate": 1.9898777965861746e-05, + "loss": 1.941, + "step": 12303 + }, + { + "epoch": 0.15, + "grad_norm": 2.7541994311510587, + "learning_rate": 1.9898695041062852e-05, + "loss": 1.5216, + "step": 12306 + }, + { + "epoch": 0.15, + "grad_norm": 33.170035218160635, + "learning_rate": 1.9898612082483275e-05, + "loss": 1.7802, + "step": 12309 + }, + { + "epoch": 0.15, + "grad_norm": 29.84362052950021, + "learning_rate": 1.9898529090123305e-05, + "loss": 1.8265, + "step": 12312 + }, + { + "epoch": 0.15, + "grad_norm": 7.118977757385992, + "learning_rate": 1.9898446063983217e-05, + "loss": 1.9476, + "step": 12315 + }, + { + "epoch": 0.15, + "grad_norm": 25.360293836341864, + "learning_rate": 1.9898363004063296e-05, + "loss": 2.1471, + "step": 12318 + }, + { + "epoch": 0.15, + "grad_norm": 11.435489778036445, + "learning_rate": 1.989827991036383e-05, + "loss": 1.7217, + "step": 12321 + }, + { + "epoch": 0.15, + "grad_norm": 126.06977058182254, + "learning_rate": 1.9898196782885104e-05, + "loss": 1.2502, + "step": 12324 + }, + { + "epoch": 0.15, + "grad_norm": 19.156355608521253, + "learning_rate": 1.989811362162739e-05, + "loss": 1.9741, + "step": 12327 + }, + { + "epoch": 0.15, + "grad_norm": 4.890515778309036, + "learning_rate": 1.9898030426590983e-05, + "loss": 1.9891, + "step": 12330 + }, + { + "epoch": 0.15, + "grad_norm": 18.80582033574374, + "learning_rate": 1.9897947197776166e-05, + "loss": 1.7135, + "step": 12333 + }, + { + "epoch": 0.15, + "grad_norm": 7.7996430270725225, + "learning_rate": 1.9897863935183216e-05, + "loss": 1.6532, + "step": 12336 + }, + { + "epoch": 0.15, + "grad_norm": 3.872092612900985, + "learning_rate": 1.9897780638812425e-05, + "loss": 1.2945, + "step": 12339 + }, + { + "epoch": 0.15, + "grad_norm": 7.3192276152513, + "learning_rate": 1.989769730866407e-05, + "loss": 1.5463, + "step": 12342 + }, + { + "epoch": 0.15, + "grad_norm": 21.050190469383306, + "learning_rate": 1.9897613944738445e-05, + "loss": 1.796, + "step": 12345 + }, + { + "epoch": 0.15, + "grad_norm": 67.3357834301772, + "learning_rate": 1.9897530547035825e-05, + "loss": 1.3742, + "step": 12348 + }, + { + "epoch": 0.15, + "grad_norm": 3.218386613867841, + "learning_rate": 1.9897447115556496e-05, + "loss": 1.5262, + "step": 12351 + }, + { + "epoch": 0.15, + "grad_norm": 5.879374567554701, + "learning_rate": 1.989736365030075e-05, + "loss": 1.6965, + "step": 12354 + }, + { + "epoch": 0.15, + "grad_norm": 17.872359618158832, + "learning_rate": 1.9897280151268865e-05, + "loss": 1.7218, + "step": 12357 + }, + { + "epoch": 0.15, + "grad_norm": 19.394335132287363, + "learning_rate": 1.989719661846113e-05, + "loss": 1.4812, + "step": 12360 + }, + { + "epoch": 0.15, + "grad_norm": 8.03897563172768, + "learning_rate": 1.9897113051877822e-05, + "loss": 1.7081, + "step": 12363 + }, + { + "epoch": 0.15, + "grad_norm": 10.631389298789774, + "learning_rate": 1.9897029451519233e-05, + "loss": 1.55, + "step": 12366 + }, + { + "epoch": 0.15, + "grad_norm": 17.547532557297156, + "learning_rate": 1.989694581738565e-05, + "loss": 1.6523, + "step": 12369 + }, + { + "epoch": 0.15, + "grad_norm": 13.258235880522136, + "learning_rate": 1.9896862149477356e-05, + "loss": 1.8458, + "step": 12372 + }, + { + "epoch": 0.15, + "grad_norm": 27.530726169101623, + "learning_rate": 1.989677844779464e-05, + "loss": 1.8433, + "step": 12375 + }, + { + "epoch": 0.15, + "grad_norm": 7.4978134741594715, + "learning_rate": 1.9896694712337777e-05, + "loss": 1.512, + "step": 12378 + }, + { + "epoch": 0.15, + "grad_norm": 5.097755556894284, + "learning_rate": 1.9896610943107062e-05, + "loss": 1.56, + "step": 12381 + }, + { + "epoch": 0.15, + "grad_norm": 16.83622418353268, + "learning_rate": 1.989652714010278e-05, + "loss": 1.5396, + "step": 12384 + }, + { + "epoch": 0.15, + "grad_norm": 7.681987128633916, + "learning_rate": 1.9896443303325215e-05, + "loss": 1.4088, + "step": 12387 + }, + { + "epoch": 0.15, + "grad_norm": 9.016586337197909, + "learning_rate": 1.989635943277465e-05, + "loss": 1.4859, + "step": 12390 + }, + { + "epoch": 0.15, + "grad_norm": 7.017311218950667, + "learning_rate": 1.989627552845138e-05, + "loss": 1.9215, + "step": 12393 + }, + { + "epoch": 0.15, + "grad_norm": 69.61759229046044, + "learning_rate": 1.9896191590355686e-05, + "loss": 1.8001, + "step": 12396 + }, + { + "epoch": 0.15, + "grad_norm": 20.643471891467687, + "learning_rate": 1.9896107618487856e-05, + "loss": 1.4817, + "step": 12399 + }, + { + "epoch": 0.15, + "grad_norm": 34.303030425258356, + "learning_rate": 1.9896023612848168e-05, + "loss": 1.8268, + "step": 12402 + }, + { + "epoch": 0.15, + "grad_norm": 85.38584852955324, + "learning_rate": 1.989593957343692e-05, + "loss": 1.6359, + "step": 12405 + }, + { + "epoch": 0.15, + "grad_norm": 4.657428441365014, + "learning_rate": 1.9895855500254396e-05, + "loss": 1.5033, + "step": 12408 + }, + { + "epoch": 0.15, + "grad_norm": 43.36541422179996, + "learning_rate": 1.989577139330088e-05, + "loss": 1.4567, + "step": 12411 + }, + { + "epoch": 0.15, + "grad_norm": 7.133787954630456, + "learning_rate": 1.9895687252576664e-05, + "loss": 1.7463, + "step": 12414 + }, + { + "epoch": 0.15, + "grad_norm": 18.467578702267883, + "learning_rate": 1.989560307808203e-05, + "loss": 1.6718, + "step": 12417 + }, + { + "epoch": 0.15, + "grad_norm": 38.14745003048724, + "learning_rate": 1.9895518869817265e-05, + "loss": 1.3973, + "step": 12420 + }, + { + "epoch": 0.15, + "grad_norm": 10.822032619125947, + "learning_rate": 1.989543462778266e-05, + "loss": 1.78, + "step": 12423 + }, + { + "epoch": 0.15, + "grad_norm": 54.805988012230856, + "learning_rate": 1.9895350351978503e-05, + "loss": 1.4237, + "step": 12426 + }, + { + "epoch": 0.15, + "grad_norm": 50.05061490007162, + "learning_rate": 1.9895266042405078e-05, + "loss": 2.1289, + "step": 12429 + }, + { + "epoch": 0.15, + "grad_norm": 11.481154412482569, + "learning_rate": 1.989518169906267e-05, + "loss": 1.8103, + "step": 12432 + }, + { + "epoch": 0.15, + "grad_norm": 17.109413716173012, + "learning_rate": 1.9895097321951576e-05, + "loss": 1.4075, + "step": 12435 + }, + { + "epoch": 0.15, + "grad_norm": 41.93838693797585, + "learning_rate": 1.9895012911072075e-05, + "loss": 1.7705, + "step": 12438 + }, + { + "epoch": 0.15, + "grad_norm": 10.881458349001178, + "learning_rate": 1.9894928466424464e-05, + "loss": 1.5387, + "step": 12441 + }, + { + "epoch": 0.15, + "grad_norm": 15.895802731511719, + "learning_rate": 1.9894843988009024e-05, + "loss": 1.5343, + "step": 12444 + }, + { + "epoch": 0.15, + "grad_norm": 13.554243351741919, + "learning_rate": 1.9894759475826046e-05, + "loss": 1.6213, + "step": 12447 + }, + { + "epoch": 0.15, + "grad_norm": 45.356198640900935, + "learning_rate": 1.989467492987582e-05, + "loss": 1.5972, + "step": 12450 + }, + { + "epoch": 0.15, + "grad_norm": 52.073492375800456, + "learning_rate": 1.989459035015863e-05, + "loss": 1.6433, + "step": 12453 + }, + { + "epoch": 0.15, + "grad_norm": 21.08191931420299, + "learning_rate": 1.9894505736674764e-05, + "loss": 1.592, + "step": 12456 + }, + { + "epoch": 0.15, + "grad_norm": 20.56099297180446, + "learning_rate": 1.989442108942452e-05, + "loss": 1.4754, + "step": 12459 + }, + { + "epoch": 0.15, + "grad_norm": 10.336208738866276, + "learning_rate": 1.9894336408408176e-05, + "loss": 1.3192, + "step": 12462 + }, + { + "epoch": 0.15, + "grad_norm": 14.634118615438643, + "learning_rate": 1.989425169362603e-05, + "loss": 1.1178, + "step": 12465 + }, + { + "epoch": 0.15, + "grad_norm": 6.548788246604262, + "learning_rate": 1.989416694507836e-05, + "loss": 1.4963, + "step": 12468 + }, + { + "epoch": 0.15, + "grad_norm": 7.9284957513189545, + "learning_rate": 1.9894082162765472e-05, + "loss": 1.9069, + "step": 12471 + }, + { + "epoch": 0.15, + "grad_norm": 3.7745378360884443, + "learning_rate": 1.989399734668764e-05, + "loss": 1.6501, + "step": 12474 + }, + { + "epoch": 0.15, + "grad_norm": 7.392363359659902, + "learning_rate": 1.9893912496845158e-05, + "loss": 1.5507, + "step": 12477 + }, + { + "epoch": 0.15, + "grad_norm": 5.789012548595248, + "learning_rate": 1.9893827613238318e-05, + "loss": 1.7552, + "step": 12480 + }, + { + "epoch": 0.15, + "grad_norm": 24.268818326781222, + "learning_rate": 1.9893742695867403e-05, + "loss": 1.806, + "step": 12483 + }, + { + "epoch": 0.15, + "grad_norm": 31.337808458420607, + "learning_rate": 1.9893657744732714e-05, + "loss": 1.6348, + "step": 12486 + }, + { + "epoch": 0.15, + "grad_norm": 6.862891443772635, + "learning_rate": 1.9893572759834533e-05, + "loss": 1.9072, + "step": 12489 + }, + { + "epoch": 0.15, + "grad_norm": 15.115698295783497, + "learning_rate": 1.9893487741173154e-05, + "loss": 1.622, + "step": 12492 + }, + { + "epoch": 0.15, + "grad_norm": 30.44680772908564, + "learning_rate": 1.9893402688748863e-05, + "loss": 1.488, + "step": 12495 + }, + { + "epoch": 0.15, + "grad_norm": 12.222040024090669, + "learning_rate": 1.989331760256195e-05, + "loss": 1.6584, + "step": 12498 + }, + { + "epoch": 0.15, + "grad_norm": 10.826298242106633, + "learning_rate": 1.989323248261271e-05, + "loss": 1.6532, + "step": 12501 + }, + { + "epoch": 0.15, + "grad_norm": 88.68279187549393, + "learning_rate": 1.9893147328901433e-05, + "loss": 1.9528, + "step": 12504 + }, + { + "epoch": 0.15, + "grad_norm": 30.18729537377127, + "learning_rate": 1.9893062141428404e-05, + "loss": 2.152, + "step": 12507 + }, + { + "epoch": 0.15, + "grad_norm": 22.02142305973098, + "learning_rate": 1.989297692019392e-05, + "loss": 1.88, + "step": 12510 + }, + { + "epoch": 0.15, + "grad_norm": 25.274843954366492, + "learning_rate": 1.989289166519827e-05, + "loss": 1.5613, + "step": 12513 + }, + { + "epoch": 0.15, + "grad_norm": 44.61175619857355, + "learning_rate": 1.989280637644174e-05, + "loss": 1.4506, + "step": 12516 + }, + { + "epoch": 0.15, + "grad_norm": 24.380602848508317, + "learning_rate": 1.9892721053924626e-05, + "loss": 1.5307, + "step": 12519 + }, + { + "epoch": 0.15, + "grad_norm": 46.23473078906917, + "learning_rate": 1.989263569764722e-05, + "loss": 1.443, + "step": 12522 + }, + { + "epoch": 0.15, + "grad_norm": 23.509883857577442, + "learning_rate": 1.9892550307609813e-05, + "loss": 1.431, + "step": 12525 + }, + { + "epoch": 0.15, + "grad_norm": 105.45453077226139, + "learning_rate": 1.9892464883812695e-05, + "loss": 1.7438, + "step": 12528 + }, + { + "epoch": 0.15, + "grad_norm": 41.97073681629095, + "learning_rate": 1.9892379426256153e-05, + "loss": 1.7633, + "step": 12531 + }, + { + "epoch": 0.15, + "grad_norm": 13.706798543436683, + "learning_rate": 1.9892293934940486e-05, + "loss": 1.39, + "step": 12534 + }, + { + "epoch": 0.15, + "grad_norm": 12.661413831542877, + "learning_rate": 1.9892208409865986e-05, + "loss": 1.8139, + "step": 12537 + }, + { + "epoch": 0.15, + "grad_norm": 4.498619811113271, + "learning_rate": 1.9892122851032937e-05, + "loss": 2.0474, + "step": 12540 + }, + { + "epoch": 0.15, + "grad_norm": 6.663635912439366, + "learning_rate": 1.9892037258441638e-05, + "loss": 1.436, + "step": 12543 + }, + { + "epoch": 0.15, + "grad_norm": 15.505173318332096, + "learning_rate": 1.989195163209238e-05, + "loss": 1.8311, + "step": 12546 + }, + { + "epoch": 0.15, + "grad_norm": 9.732411384132185, + "learning_rate": 1.9891865971985455e-05, + "loss": 1.8083, + "step": 12549 + }, + { + "epoch": 0.15, + "grad_norm": 15.602259119674471, + "learning_rate": 1.989178027812115e-05, + "loss": 1.8971, + "step": 12552 + }, + { + "epoch": 0.15, + "grad_norm": 22.79190812365279, + "learning_rate": 1.9891694550499765e-05, + "loss": 1.765, + "step": 12555 + }, + { + "epoch": 0.15, + "grad_norm": 16.8534200395362, + "learning_rate": 1.989160878912159e-05, + "loss": 1.7419, + "step": 12558 + }, + { + "epoch": 0.15, + "grad_norm": 6.661773686504378, + "learning_rate": 1.9891522993986917e-05, + "loss": 1.4555, + "step": 12561 + }, + { + "epoch": 0.15, + "grad_norm": 12.894122351452497, + "learning_rate": 1.9891437165096037e-05, + "loss": 1.7687, + "step": 12564 + }, + { + "epoch": 0.15, + "grad_norm": 43.23750293043736, + "learning_rate": 1.9891351302449245e-05, + "loss": 1.5258, + "step": 12567 + }, + { + "epoch": 0.15, + "grad_norm": 97.7856065358071, + "learning_rate": 1.9891265406046837e-05, + "loss": 1.465, + "step": 12570 + }, + { + "epoch": 0.15, + "grad_norm": 11.32387578812793, + "learning_rate": 1.98911794758891e-05, + "loss": 1.6709, + "step": 12573 + }, + { + "epoch": 0.15, + "grad_norm": 4.191673996477237, + "learning_rate": 1.989109351197633e-05, + "loss": 1.5792, + "step": 12576 + }, + { + "epoch": 0.15, + "grad_norm": 11.918082508664, + "learning_rate": 1.9891007514308823e-05, + "loss": 1.8856, + "step": 12579 + }, + { + "epoch": 0.15, + "grad_norm": 25.519351338393946, + "learning_rate": 1.9890921482886867e-05, + "loss": 1.5886, + "step": 12582 + }, + { + "epoch": 0.15, + "grad_norm": 8.15919199588434, + "learning_rate": 1.9890835417710763e-05, + "loss": 1.528, + "step": 12585 + }, + { + "epoch": 0.15, + "grad_norm": 12.261011935773375, + "learning_rate": 1.9890749318780795e-05, + "loss": 1.7808, + "step": 12588 + }, + { + "epoch": 0.15, + "grad_norm": 10.045314279472818, + "learning_rate": 1.9890663186097267e-05, + "loss": 1.6131, + "step": 12591 + }, + { + "epoch": 0.15, + "grad_norm": 6.09502969667047, + "learning_rate": 1.9890577019660464e-05, + "loss": 1.3499, + "step": 12594 + }, + { + "epoch": 0.15, + "grad_norm": 56.65079876126171, + "learning_rate": 1.9890490819470686e-05, + "loss": 1.6816, + "step": 12597 + }, + { + "epoch": 0.15, + "grad_norm": 5.21289745490758, + "learning_rate": 1.9890404585528228e-05, + "loss": 1.4623, + "step": 12600 + }, + { + "epoch": 0.15, + "grad_norm": 36.04663709324028, + "learning_rate": 1.9890318317833376e-05, + "loss": 1.505, + "step": 12603 + }, + { + "epoch": 0.15, + "grad_norm": 6.684763445586818, + "learning_rate": 1.9890232016386434e-05, + "loss": 1.5347, + "step": 12606 + }, + { + "epoch": 0.15, + "grad_norm": 34.14069569368802, + "learning_rate": 1.9890145681187692e-05, + "loss": 1.8424, + "step": 12609 + }, + { + "epoch": 0.15, + "grad_norm": 11.091096887296617, + "learning_rate": 1.9890059312237445e-05, + "loss": 1.516, + "step": 12612 + }, + { + "epoch": 0.15, + "grad_norm": 82.58135928455951, + "learning_rate": 1.988997290953599e-05, + "loss": 1.6923, + "step": 12615 + }, + { + "epoch": 0.15, + "grad_norm": 19.421526927557757, + "learning_rate": 1.9889886473083618e-05, + "loss": 1.5317, + "step": 12618 + }, + { + "epoch": 0.15, + "grad_norm": 28.019831254590425, + "learning_rate": 1.9889800002880624e-05, + "loss": 1.8218, + "step": 12621 + }, + { + "epoch": 0.15, + "grad_norm": 12.961883908847167, + "learning_rate": 1.9889713498927308e-05, + "loss": 1.4412, + "step": 12624 + }, + { + "epoch": 0.15, + "grad_norm": 22.314124480171543, + "learning_rate": 1.988962696122396e-05, + "loss": 1.8259, + "step": 12627 + }, + { + "epoch": 0.15, + "grad_norm": 8.768880865445599, + "learning_rate": 1.9889540389770877e-05, + "loss": 1.6709, + "step": 12630 + }, + { + "epoch": 0.15, + "grad_norm": 17.055834134132194, + "learning_rate": 1.9889453784568355e-05, + "loss": 1.7069, + "step": 12633 + }, + { + "epoch": 0.15, + "grad_norm": 6.223432926880844, + "learning_rate": 1.9889367145616693e-05, + "loss": 1.8809, + "step": 12636 + }, + { + "epoch": 0.15, + "grad_norm": 16.858972993221176, + "learning_rate": 1.988928047291618e-05, + "loss": 1.649, + "step": 12639 + }, + { + "epoch": 0.15, + "grad_norm": 30.010197926346834, + "learning_rate": 1.9889193766467114e-05, + "loss": 1.8071, + "step": 12642 + }, + { + "epoch": 0.15, + "grad_norm": 27.81979681223678, + "learning_rate": 1.9889107026269793e-05, + "loss": 1.5911, + "step": 12645 + }, + { + "epoch": 0.15, + "grad_norm": 8.30635062544822, + "learning_rate": 1.9889020252324515e-05, + "loss": 1.4543, + "step": 12648 + }, + { + "epoch": 0.15, + "grad_norm": 9.570838019889353, + "learning_rate": 1.9888933444631567e-05, + "loss": 2.0778, + "step": 12651 + }, + { + "epoch": 0.15, + "grad_norm": 12.00487650134552, + "learning_rate": 1.9888846603191255e-05, + "loss": 1.7704, + "step": 12654 + }, + { + "epoch": 0.15, + "grad_norm": 4.777728170878409, + "learning_rate": 1.988875972800387e-05, + "loss": 1.5961, + "step": 12657 + }, + { + "epoch": 0.15, + "grad_norm": 45.36658037113883, + "learning_rate": 1.988867281906971e-05, + "loss": 1.8366, + "step": 12660 + }, + { + "epoch": 0.15, + "grad_norm": 11.858513547477793, + "learning_rate": 1.988858587638907e-05, + "loss": 1.7806, + "step": 12663 + }, + { + "epoch": 0.15, + "grad_norm": 20.46661121813683, + "learning_rate": 1.988849889996225e-05, + "loss": 1.6559, + "step": 12666 + }, + { + "epoch": 0.15, + "grad_norm": 4.273077804864822, + "learning_rate": 1.988841188978955e-05, + "loss": 2.0637, + "step": 12669 + }, + { + "epoch": 0.15, + "grad_norm": 132.65665492123452, + "learning_rate": 1.988832484587125e-05, + "loss": 1.4393, + "step": 12672 + }, + { + "epoch": 0.15, + "grad_norm": 26.120939739793958, + "learning_rate": 1.988823776820767e-05, + "loss": 1.6229, + "step": 12675 + }, + { + "epoch": 0.15, + "grad_norm": 11.139739387232865, + "learning_rate": 1.9888150656799092e-05, + "loss": 1.5671, + "step": 12678 + }, + { + "epoch": 0.15, + "grad_norm": 46.188812841340265, + "learning_rate": 1.988806351164582e-05, + "loss": 1.6292, + "step": 12681 + }, + { + "epoch": 0.15, + "grad_norm": 57.45587197867776, + "learning_rate": 1.9887976332748146e-05, + "loss": 1.376, + "step": 12684 + }, + { + "epoch": 0.15, + "grad_norm": 9.034633537115088, + "learning_rate": 1.9887889120106368e-05, + "loss": 1.5162, + "step": 12687 + }, + { + "epoch": 0.15, + "grad_norm": 7.526038309474274, + "learning_rate": 1.9887801873720793e-05, + "loss": 1.75, + "step": 12690 + }, + { + "epoch": 0.15, + "grad_norm": 171.16489768257983, + "learning_rate": 1.988771459359171e-05, + "loss": 1.755, + "step": 12693 + }, + { + "epoch": 0.15, + "grad_norm": 13.219307355387928, + "learning_rate": 1.9887627279719413e-05, + "loss": 1.5704, + "step": 12696 + }, + { + "epoch": 0.15, + "grad_norm": 20.344821365252315, + "learning_rate": 1.988753993210421e-05, + "loss": 1.9812, + "step": 12699 + }, + { + "epoch": 0.15, + "grad_norm": 8.20963251899144, + "learning_rate": 1.9887452550746393e-05, + "loss": 1.6348, + "step": 12702 + }, + { + "epoch": 0.15, + "grad_norm": 10.69325235487269, + "learning_rate": 1.9887365135646265e-05, + "loss": 1.847, + "step": 12705 + }, + { + "epoch": 0.15, + "grad_norm": 6.544043180695672, + "learning_rate": 1.9887277686804115e-05, + "loss": 1.5431, + "step": 12708 + }, + { + "epoch": 0.15, + "grad_norm": 23.47130013093883, + "learning_rate": 1.988719020422025e-05, + "loss": 1.4443, + "step": 12711 + }, + { + "epoch": 0.15, + "grad_norm": 52.42884808144645, + "learning_rate": 1.9887102687894972e-05, + "loss": 1.4356, + "step": 12714 + }, + { + "epoch": 0.15, + "grad_norm": 145.57014443586635, + "learning_rate": 1.988701513782857e-05, + "loss": 1.9799, + "step": 12717 + }, + { + "epoch": 0.15, + "grad_norm": 11.426547377512817, + "learning_rate": 1.9886927554021345e-05, + "loss": 1.9049, + "step": 12720 + }, + { + "epoch": 0.15, + "grad_norm": 9.040795413081492, + "learning_rate": 1.98868399364736e-05, + "loss": 1.8843, + "step": 12723 + }, + { + "epoch": 0.15, + "grad_norm": 6.123338563851534, + "learning_rate": 1.988675228518563e-05, + "loss": 1.3654, + "step": 12726 + }, + { + "epoch": 0.15, + "grad_norm": 56.78994974131681, + "learning_rate": 1.9886664600157736e-05, + "loss": 1.5681, + "step": 12729 + }, + { + "epoch": 0.15, + "grad_norm": 21.35264562429155, + "learning_rate": 1.9886576881390218e-05, + "loss": 1.4731, + "step": 12732 + }, + { + "epoch": 0.15, + "grad_norm": 13.359178990160991, + "learning_rate": 1.988648912888337e-05, + "loss": 1.4321, + "step": 12735 + }, + { + "epoch": 0.15, + "grad_norm": 29.81248881869004, + "learning_rate": 1.98864013426375e-05, + "loss": 1.7686, + "step": 12738 + }, + { + "epoch": 0.15, + "grad_norm": 66.70999198986365, + "learning_rate": 1.9886313522652903e-05, + "loss": 1.3127, + "step": 12741 + }, + { + "epoch": 0.15, + "grad_norm": 5.901529767593974, + "learning_rate": 1.988622566892988e-05, + "loss": 1.665, + "step": 12744 + }, + { + "epoch": 0.15, + "grad_norm": 7.263043739975262, + "learning_rate": 1.9886137781468725e-05, + "loss": 1.7335, + "step": 12747 + }, + { + "epoch": 0.15, + "grad_norm": 53.48355794326484, + "learning_rate": 1.9886049860269744e-05, + "loss": 1.867, + "step": 12750 + }, + { + "epoch": 0.15, + "grad_norm": 28.709877060356735, + "learning_rate": 1.9885961905333238e-05, + "loss": 1.5713, + "step": 12753 + }, + { + "epoch": 0.15, + "grad_norm": 46.65191406564894, + "learning_rate": 1.9885873916659504e-05, + "loss": 1.9844, + "step": 12756 + }, + { + "epoch": 0.15, + "grad_norm": 11.264700899348316, + "learning_rate": 1.9885785894248845e-05, + "loss": 1.545, + "step": 12759 + }, + { + "epoch": 0.15, + "grad_norm": 30.433943855956862, + "learning_rate": 1.9885697838101558e-05, + "loss": 1.9104, + "step": 12762 + }, + { + "epoch": 0.15, + "grad_norm": 20.199171661044844, + "learning_rate": 1.9885609748217944e-05, + "loss": 1.7906, + "step": 12765 + }, + { + "epoch": 0.15, + "grad_norm": 16.61106256820484, + "learning_rate": 1.9885521624598306e-05, + "loss": 1.6488, + "step": 12768 + }, + { + "epoch": 0.15, + "grad_norm": 23.212715004701582, + "learning_rate": 1.988543346724294e-05, + "loss": 1.592, + "step": 12771 + }, + { + "epoch": 0.15, + "grad_norm": 7.548544577025258, + "learning_rate": 1.9885345276152155e-05, + "loss": 1.7806, + "step": 12774 + }, + { + "epoch": 0.15, + "grad_norm": 21.131487413677025, + "learning_rate": 1.988525705132625e-05, + "loss": 1.6364, + "step": 12777 + }, + { + "epoch": 0.15, + "grad_norm": 47.757935311365365, + "learning_rate": 1.9885168792765518e-05, + "loss": 1.3965, + "step": 12780 + }, + { + "epoch": 0.15, + "grad_norm": 8.42409985792235, + "learning_rate": 1.9885080500470266e-05, + "loss": 1.2817, + "step": 12783 + }, + { + "epoch": 0.15, + "grad_norm": 20.936361864584956, + "learning_rate": 1.9884992174440796e-05, + "loss": 2.0576, + "step": 12786 + }, + { + "epoch": 0.15, + "grad_norm": 29.576986192590674, + "learning_rate": 1.9884903814677408e-05, + "loss": 1.6731, + "step": 12789 + }, + { + "epoch": 0.15, + "grad_norm": 24.394128186287986, + "learning_rate": 1.9884815421180405e-05, + "loss": 1.7393, + "step": 12792 + }, + { + "epoch": 0.15, + "grad_norm": 32.787472885438994, + "learning_rate": 1.9884726993950084e-05, + "loss": 1.5556, + "step": 12795 + }, + { + "epoch": 0.15, + "grad_norm": 10.786720023775336, + "learning_rate": 1.9884638532986754e-05, + "loss": 1.6067, + "step": 12798 + }, + { + "epoch": 0.15, + "grad_norm": 17.90075428118479, + "learning_rate": 1.988455003829071e-05, + "loss": 2.0278, + "step": 12801 + }, + { + "epoch": 0.15, + "grad_norm": 67.29091773186286, + "learning_rate": 1.988446150986226e-05, + "loss": 1.4481, + "step": 12804 + }, + { + "epoch": 0.15, + "grad_norm": 27.80153820146934, + "learning_rate": 1.98843729477017e-05, + "loss": 1.6329, + "step": 12807 + }, + { + "epoch": 0.15, + "grad_norm": 13.51438650984214, + "learning_rate": 1.988428435180934e-05, + "loss": 1.4326, + "step": 12810 + }, + { + "epoch": 0.15, + "grad_norm": 2.722050759518978, + "learning_rate": 1.9884195722185473e-05, + "loss": 1.9508, + "step": 12813 + }, + { + "epoch": 0.15, + "grad_norm": 5.387391026279367, + "learning_rate": 1.9884107058830406e-05, + "loss": 1.481, + "step": 12816 + }, + { + "epoch": 0.15, + "grad_norm": 10.951951824822034, + "learning_rate": 1.9884018361744445e-05, + "loss": 1.8936, + "step": 12819 + }, + { + "epoch": 0.15, + "grad_norm": 45.76543198957156, + "learning_rate": 1.988392963092789e-05, + "loss": 1.7472, + "step": 12822 + }, + { + "epoch": 0.15, + "grad_norm": 16.40173851429189, + "learning_rate": 1.9883840866381044e-05, + "loss": 1.7935, + "step": 12825 + }, + { + "epoch": 0.15, + "grad_norm": 37.03233727803066, + "learning_rate": 1.9883752068104207e-05, + "loss": 1.6355, + "step": 12828 + }, + { + "epoch": 0.15, + "grad_norm": 13.87148422409584, + "learning_rate": 1.988366323609768e-05, + "loss": 1.5806, + "step": 12831 + }, + { + "epoch": 0.15, + "grad_norm": 10.46307712035781, + "learning_rate": 1.9883574370361778e-05, + "loss": 1.971, + "step": 12834 + }, + { + "epoch": 0.15, + "grad_norm": 59.50169349815856, + "learning_rate": 1.9883485470896792e-05, + "loss": 1.4732, + "step": 12837 + }, + { + "epoch": 0.15, + "grad_norm": 41.421552528382776, + "learning_rate": 1.9883396537703033e-05, + "loss": 1.37, + "step": 12840 + }, + { + "epoch": 0.15, + "grad_norm": 9.008247070269826, + "learning_rate": 1.98833075707808e-05, + "loss": 2.1064, + "step": 12843 + }, + { + "epoch": 0.15, + "grad_norm": 69.26649913129192, + "learning_rate": 1.9883218570130398e-05, + "loss": 1.4155, + "step": 12846 + }, + { + "epoch": 0.15, + "grad_norm": 27.50780878967537, + "learning_rate": 1.9883129535752133e-05, + "loss": 1.5612, + "step": 12849 + }, + { + "epoch": 0.15, + "grad_norm": 12.366238814583827, + "learning_rate": 1.9883040467646306e-05, + "loss": 1.6104, + "step": 12852 + }, + { + "epoch": 0.15, + "grad_norm": 8.015782477998053, + "learning_rate": 1.988295136581322e-05, + "loss": 1.7053, + "step": 12855 + }, + { + "epoch": 0.15, + "grad_norm": 14.693113899821153, + "learning_rate": 1.9882862230253183e-05, + "loss": 1.6771, + "step": 12858 + }, + { + "epoch": 0.15, + "grad_norm": 14.633783345701387, + "learning_rate": 1.9882773060966496e-05, + "loss": 1.4966, + "step": 12861 + }, + { + "epoch": 0.15, + "grad_norm": 89.27160264527433, + "learning_rate": 1.9882683857953464e-05, + "loss": 1.6125, + "step": 12864 + }, + { + "epoch": 0.15, + "grad_norm": 13.62119979858197, + "learning_rate": 1.9882594621214392e-05, + "loss": 1.5503, + "step": 12867 + }, + { + "epoch": 0.15, + "grad_norm": 48.75882188018632, + "learning_rate": 1.9882505350749586e-05, + "loss": 1.6724, + "step": 12870 + }, + { + "epoch": 0.15, + "grad_norm": 14.014337251287348, + "learning_rate": 1.9882416046559346e-05, + "loss": 1.4353, + "step": 12873 + }, + { + "epoch": 0.15, + "grad_norm": 7.030391740330436, + "learning_rate": 1.9882326708643978e-05, + "loss": 1.4735, + "step": 12876 + }, + { + "epoch": 0.15, + "grad_norm": 13.291182425529744, + "learning_rate": 1.9882237337003793e-05, + "loss": 1.7873, + "step": 12879 + }, + { + "epoch": 0.15, + "grad_norm": 19.61328157734164, + "learning_rate": 1.988214793163909e-05, + "loss": 1.7173, + "step": 12882 + }, + { + "epoch": 0.15, + "grad_norm": 6.906503309689927, + "learning_rate": 1.9882058492550176e-05, + "loss": 1.4976, + "step": 12885 + }, + { + "epoch": 0.15, + "grad_norm": 2.8047404949371413, + "learning_rate": 1.9881969019737357e-05, + "loss": 2.1395, + "step": 12888 + }, + { + "epoch": 0.16, + "grad_norm": 7.111951380051534, + "learning_rate": 1.9881879513200935e-05, + "loss": 1.8641, + "step": 12891 + }, + { + "epoch": 0.16, + "grad_norm": 13.916569552780016, + "learning_rate": 1.9881789972941217e-05, + "loss": 1.5685, + "step": 12894 + }, + { + "epoch": 0.16, + "grad_norm": 3.4271648850803955, + "learning_rate": 1.988170039895851e-05, + "loss": 1.5768, + "step": 12897 + }, + { + "epoch": 0.16, + "grad_norm": 28.86060348006476, + "learning_rate": 1.988161079125312e-05, + "loss": 1.5817, + "step": 12900 + }, + { + "epoch": 0.16, + "grad_norm": 15.40318667001447, + "learning_rate": 1.9881521149825353e-05, + "loss": 1.7518, + "step": 12903 + }, + { + "epoch": 0.16, + "grad_norm": 7.977108724628597, + "learning_rate": 1.988143147467551e-05, + "loss": 1.5088, + "step": 12906 + }, + { + "epoch": 0.16, + "grad_norm": 53.142807642459786, + "learning_rate": 1.98813417658039e-05, + "loss": 1.4496, + "step": 12909 + }, + { + "epoch": 0.16, + "grad_norm": 20.14631267555008, + "learning_rate": 1.9881252023210835e-05, + "loss": 1.9131, + "step": 12912 + }, + { + "epoch": 0.16, + "grad_norm": 33.64452652565301, + "learning_rate": 1.9881162246896614e-05, + "loss": 1.7582, + "step": 12915 + }, + { + "epoch": 0.16, + "grad_norm": 16.44295106816797, + "learning_rate": 1.9881072436861544e-05, + "loss": 1.597, + "step": 12918 + }, + { + "epoch": 0.16, + "grad_norm": 22.18438581108344, + "learning_rate": 1.9880982593105933e-05, + "loss": 1.3975, + "step": 12921 + }, + { + "epoch": 0.16, + "grad_norm": 29.590765859224724, + "learning_rate": 1.9880892715630086e-05, + "loss": 1.8595, + "step": 12924 + }, + { + "epoch": 0.16, + "grad_norm": 9.135322622233279, + "learning_rate": 1.9880802804434315e-05, + "loss": 1.4473, + "step": 12927 + }, + { + "epoch": 0.16, + "grad_norm": 5.81488006901909, + "learning_rate": 1.988071285951892e-05, + "loss": 1.5614, + "step": 12930 + }, + { + "epoch": 0.16, + "grad_norm": 17.196253604842756, + "learning_rate": 1.9880622880884213e-05, + "loss": 1.587, + "step": 12933 + }, + { + "epoch": 0.16, + "grad_norm": 13.952465706356987, + "learning_rate": 1.98805328685305e-05, + "loss": 1.5554, + "step": 12936 + }, + { + "epoch": 0.16, + "grad_norm": 3.347140776179423, + "learning_rate": 1.9880442822458087e-05, + "loss": 1.6137, + "step": 12939 + }, + { + "epoch": 0.16, + "grad_norm": 4.317150626963384, + "learning_rate": 1.988035274266728e-05, + "loss": 1.564, + "step": 12942 + }, + { + "epoch": 0.16, + "grad_norm": 19.244206448433903, + "learning_rate": 1.9880262629158387e-05, + "loss": 1.4736, + "step": 12945 + }, + { + "epoch": 0.16, + "grad_norm": 13.498114956078439, + "learning_rate": 1.9880172481931718e-05, + "loss": 1.5082, + "step": 12948 + }, + { + "epoch": 0.16, + "grad_norm": 20.53723259150854, + "learning_rate": 1.988008230098758e-05, + "loss": 1.8446, + "step": 12951 + }, + { + "epoch": 0.16, + "grad_norm": 15.471095991014401, + "learning_rate": 1.987999208632628e-05, + "loss": 1.4448, + "step": 12954 + }, + { + "epoch": 0.16, + "grad_norm": 8.090612696440829, + "learning_rate": 1.9879901837948124e-05, + "loss": 1.4664, + "step": 12957 + }, + { + "epoch": 0.16, + "grad_norm": 10.716388847250144, + "learning_rate": 1.987981155585342e-05, + "loss": 1.6237, + "step": 12960 + }, + { + "epoch": 0.16, + "grad_norm": 14.64284067019733, + "learning_rate": 1.9879721240042482e-05, + "loss": 1.6362, + "step": 12963 + }, + { + "epoch": 0.16, + "grad_norm": 19.7801793583851, + "learning_rate": 1.987963089051561e-05, + "loss": 1.8457, + "step": 12966 + }, + { + "epoch": 0.16, + "grad_norm": 6.945773581348412, + "learning_rate": 1.9879540507273122e-05, + "loss": 1.7119, + "step": 12969 + }, + { + "epoch": 0.16, + "grad_norm": 4.773055785935513, + "learning_rate": 1.9879450090315316e-05, + "loss": 1.4558, + "step": 12972 + }, + { + "epoch": 0.16, + "grad_norm": 4.8781047523722885, + "learning_rate": 1.987935963964251e-05, + "loss": 1.3171, + "step": 12975 + }, + { + "epoch": 0.16, + "grad_norm": 12.881385131774687, + "learning_rate": 1.9879269155255003e-05, + "loss": 1.647, + "step": 12978 + }, + { + "epoch": 0.16, + "grad_norm": 41.786141721771614, + "learning_rate": 1.987917863715311e-05, + "loss": 1.7523, + "step": 12981 + }, + { + "epoch": 0.16, + "grad_norm": 8.377404709009813, + "learning_rate": 1.987908808533714e-05, + "loss": 1.9278, + "step": 12984 + }, + { + "epoch": 0.16, + "grad_norm": 19.65155066657373, + "learning_rate": 1.98789974998074e-05, + "loss": 1.905, + "step": 12987 + }, + { + "epoch": 0.16, + "grad_norm": 8.898468336872092, + "learning_rate": 1.98789068805642e-05, + "loss": 1.583, + "step": 12990 + }, + { + "epoch": 0.16, + "grad_norm": 6.972966627968349, + "learning_rate": 1.987881622760785e-05, + "loss": 1.8652, + "step": 12993 + }, + { + "epoch": 0.16, + "grad_norm": 4.58652973920947, + "learning_rate": 1.987872554093866e-05, + "loss": 1.5597, + "step": 12996 + }, + { + "epoch": 0.16, + "grad_norm": 16.700698200791933, + "learning_rate": 1.9878634820556936e-05, + "loss": 1.7353, + "step": 12999 + }, + { + "epoch": 0.16, + "grad_norm": 13.52983773266746, + "learning_rate": 1.987854406646299e-05, + "loss": 1.2992, + "step": 13002 + }, + { + "epoch": 0.16, + "grad_norm": 5.328179561217777, + "learning_rate": 1.987845327865713e-05, + "loss": 1.44, + "step": 13005 + }, + { + "epoch": 0.16, + "grad_norm": 16.325429896102797, + "learning_rate": 1.987836245713967e-05, + "loss": 1.7379, + "step": 13008 + }, + { + "epoch": 0.16, + "grad_norm": 31.579028974033527, + "learning_rate": 1.9878271601910916e-05, + "loss": 1.586, + "step": 13011 + }, + { + "epoch": 0.16, + "grad_norm": 40.672027311729515, + "learning_rate": 1.9878180712971177e-05, + "loss": 1.3545, + "step": 13014 + }, + { + "epoch": 0.16, + "grad_norm": 14.06078379742062, + "learning_rate": 1.9878089790320766e-05, + "loss": 1.8101, + "step": 13017 + }, + { + "epoch": 0.16, + "grad_norm": 25.406448079117343, + "learning_rate": 1.9877998833959996e-05, + "loss": 1.686, + "step": 13020 + }, + { + "epoch": 0.16, + "grad_norm": 10.507192467082106, + "learning_rate": 1.987790784388917e-05, + "loss": 1.4606, + "step": 13023 + }, + { + "epoch": 0.16, + "grad_norm": 21.31081423660902, + "learning_rate": 1.9877816820108604e-05, + "loss": 1.9168, + "step": 13026 + }, + { + "epoch": 0.16, + "grad_norm": 15.44599515076266, + "learning_rate": 1.9877725762618605e-05, + "loss": 1.526, + "step": 13029 + }, + { + "epoch": 0.16, + "grad_norm": 7.33496638266561, + "learning_rate": 1.987763467141949e-05, + "loss": 1.7438, + "step": 13032 + }, + { + "epoch": 0.16, + "grad_norm": 7.14810401432079, + "learning_rate": 1.9877543546511565e-05, + "loss": 1.4261, + "step": 13035 + }, + { + "epoch": 0.16, + "grad_norm": 5.66110149865139, + "learning_rate": 1.987745238789514e-05, + "loss": 1.5227, + "step": 13038 + }, + { + "epoch": 0.16, + "grad_norm": 20.987512876077936, + "learning_rate": 1.987736119557053e-05, + "loss": 1.5295, + "step": 13041 + }, + { + "epoch": 0.16, + "grad_norm": 9.743423937058049, + "learning_rate": 1.987726996953804e-05, + "loss": 1.674, + "step": 13044 + }, + { + "epoch": 0.16, + "grad_norm": 22.34139665339785, + "learning_rate": 1.9877178709797987e-05, + "loss": 1.4066, + "step": 13047 + }, + { + "epoch": 0.16, + "grad_norm": 12.736002081681956, + "learning_rate": 1.987708741635068e-05, + "loss": 1.4935, + "step": 13050 + }, + { + "epoch": 0.16, + "grad_norm": 8.874745449311268, + "learning_rate": 1.9876996089196437e-05, + "loss": 1.406, + "step": 13053 + }, + { + "epoch": 0.16, + "grad_norm": 8.271935920128756, + "learning_rate": 1.987690472833556e-05, + "loss": 1.378, + "step": 13056 + }, + { + "epoch": 0.16, + "grad_norm": 7.690321828768187, + "learning_rate": 1.9876813333768364e-05, + "loss": 1.6058, + "step": 13059 + }, + { + "epoch": 0.16, + "grad_norm": 14.65771841369739, + "learning_rate": 1.987672190549516e-05, + "loss": 1.8255, + "step": 13062 + }, + { + "epoch": 0.16, + "grad_norm": 36.11502676023372, + "learning_rate": 1.9876630443516267e-05, + "loss": 1.8656, + "step": 13065 + }, + { + "epoch": 0.16, + "grad_norm": 36.772342926991165, + "learning_rate": 1.987653894783199e-05, + "loss": 1.7397, + "step": 13068 + }, + { + "epoch": 0.16, + "grad_norm": 6.14469534673512, + "learning_rate": 1.9876447418442645e-05, + "loss": 1.9142, + "step": 13071 + }, + { + "epoch": 0.16, + "grad_norm": 7.465720032237951, + "learning_rate": 1.9876355855348536e-05, + "loss": 1.6865, + "step": 13074 + }, + { + "epoch": 0.16, + "grad_norm": 20.116048204298977, + "learning_rate": 1.9876264258549986e-05, + "loss": 1.8974, + "step": 13077 + }, + { + "epoch": 0.16, + "grad_norm": 4.099304919398982, + "learning_rate": 1.9876172628047305e-05, + "loss": 1.3388, + "step": 13080 + }, + { + "epoch": 0.16, + "grad_norm": 25.17487656868597, + "learning_rate": 1.9876080963840803e-05, + "loss": 1.7386, + "step": 13083 + }, + { + "epoch": 0.16, + "grad_norm": 8.197320504154622, + "learning_rate": 1.9875989265930792e-05, + "loss": 1.4199, + "step": 13086 + }, + { + "epoch": 0.16, + "grad_norm": 4.471493084036634, + "learning_rate": 1.9875897534317588e-05, + "loss": 1.4145, + "step": 13089 + }, + { + "epoch": 0.16, + "grad_norm": 15.6333629264942, + "learning_rate": 1.9875805769001505e-05, + "loss": 1.4197, + "step": 13092 + }, + { + "epoch": 0.16, + "grad_norm": 24.130599292515004, + "learning_rate": 1.987571396998285e-05, + "loss": 1.678, + "step": 13095 + }, + { + "epoch": 0.16, + "grad_norm": 6.396381814267831, + "learning_rate": 1.9875622137261943e-05, + "loss": 1.6401, + "step": 13098 + }, + { + "epoch": 0.16, + "grad_norm": 3.280937059398741, + "learning_rate": 1.9875530270839095e-05, + "loss": 1.7743, + "step": 13101 + }, + { + "epoch": 0.16, + "grad_norm": 16.702584756744226, + "learning_rate": 1.987543837071462e-05, + "loss": 1.6027, + "step": 13104 + }, + { + "epoch": 0.16, + "grad_norm": 2.5195426871235562, + "learning_rate": 1.987534643688883e-05, + "loss": 1.8518, + "step": 13107 + }, + { + "epoch": 0.16, + "grad_norm": 22.86640700024452, + "learning_rate": 1.987525446936204e-05, + "loss": 1.7964, + "step": 13110 + }, + { + "epoch": 0.16, + "grad_norm": 38.895272309290256, + "learning_rate": 1.9875162468134563e-05, + "loss": 1.7288, + "step": 13113 + }, + { + "epoch": 0.16, + "grad_norm": 16.373708504243698, + "learning_rate": 1.987507043320671e-05, + "loss": 1.5675, + "step": 13116 + }, + { + "epoch": 0.16, + "grad_norm": 19.641662742368393, + "learning_rate": 1.9874978364578805e-05, + "loss": 1.8146, + "step": 13119 + }, + { + "epoch": 0.16, + "grad_norm": 82.78422106806725, + "learning_rate": 1.9874886262251155e-05, + "loss": 1.7629, + "step": 13122 + }, + { + "epoch": 0.16, + "grad_norm": 23.95579595326141, + "learning_rate": 1.9874794126224072e-05, + "loss": 1.6593, + "step": 13125 + }, + { + "epoch": 0.16, + "grad_norm": 86.22515821405486, + "learning_rate": 1.9874701956497875e-05, + "loss": 1.7826, + "step": 13128 + }, + { + "epoch": 0.16, + "grad_norm": 6.374756578077038, + "learning_rate": 1.9874609753072876e-05, + "loss": 1.5108, + "step": 13131 + }, + { + "epoch": 0.16, + "grad_norm": 6.695398428041981, + "learning_rate": 1.9874517515949392e-05, + "loss": 1.682, + "step": 13134 + }, + { + "epoch": 0.16, + "grad_norm": 13.116957069849407, + "learning_rate": 1.9874425245127736e-05, + "loss": 1.777, + "step": 13137 + }, + { + "epoch": 0.16, + "grad_norm": 27.328927780500965, + "learning_rate": 1.9874332940608222e-05, + "loss": 1.5046, + "step": 13140 + }, + { + "epoch": 0.16, + "grad_norm": 20.719693958352856, + "learning_rate": 1.987424060239117e-05, + "loss": 1.8003, + "step": 13143 + }, + { + "epoch": 0.16, + "grad_norm": 4.833150082802997, + "learning_rate": 1.9874148230476887e-05, + "loss": 1.5707, + "step": 13146 + }, + { + "epoch": 0.16, + "grad_norm": 32.83898480195453, + "learning_rate": 1.9874055824865695e-05, + "loss": 1.7998, + "step": 13149 + }, + { + "epoch": 0.16, + "grad_norm": 12.804785479070814, + "learning_rate": 1.987396338555791e-05, + "loss": 1.5002, + "step": 13152 + }, + { + "epoch": 0.16, + "grad_norm": 17.737300923633157, + "learning_rate": 1.987387091255384e-05, + "loss": 1.4333, + "step": 13155 + }, + { + "epoch": 0.16, + "grad_norm": 42.19551598047771, + "learning_rate": 1.9873778405853806e-05, + "loss": 1.9922, + "step": 13158 + }, + { + "epoch": 0.16, + "grad_norm": 25.35366049166842, + "learning_rate": 1.9873685865458124e-05, + "loss": 1.5325, + "step": 13161 + }, + { + "epoch": 0.16, + "grad_norm": 15.666837361939812, + "learning_rate": 1.987359329136711e-05, + "loss": 1.9332, + "step": 13164 + }, + { + "epoch": 0.16, + "grad_norm": 67.18328143773903, + "learning_rate": 1.9873500683581076e-05, + "loss": 1.3669, + "step": 13167 + }, + { + "epoch": 0.16, + "grad_norm": 8.052697262612877, + "learning_rate": 1.9873408042100343e-05, + "loss": 1.7721, + "step": 13170 + }, + { + "epoch": 0.16, + "grad_norm": 14.326512901037592, + "learning_rate": 1.9873315366925223e-05, + "loss": 1.7296, + "step": 13173 + }, + { + "epoch": 0.16, + "grad_norm": 27.600352469196874, + "learning_rate": 1.9873222658056032e-05, + "loss": 1.6671, + "step": 13176 + }, + { + "epoch": 0.16, + "grad_norm": 4.864316019191785, + "learning_rate": 1.987312991549309e-05, + "loss": 1.4917, + "step": 13179 + }, + { + "epoch": 0.16, + "grad_norm": 14.521938630343563, + "learning_rate": 1.9873037139236715e-05, + "loss": 1.6406, + "step": 13182 + }, + { + "epoch": 0.16, + "grad_norm": 4.596983727887187, + "learning_rate": 1.9872944329287217e-05, + "loss": 1.6664, + "step": 13185 + }, + { + "epoch": 0.16, + "grad_norm": 23.25040006661016, + "learning_rate": 1.987285148564492e-05, + "loss": 1.7919, + "step": 13188 + }, + { + "epoch": 0.16, + "grad_norm": 6.6244381699346535, + "learning_rate": 1.987275860831013e-05, + "loss": 1.2896, + "step": 13191 + }, + { + "epoch": 0.16, + "grad_norm": 3.435658081782313, + "learning_rate": 1.9872665697283178e-05, + "loss": 1.44, + "step": 13194 + }, + { + "epoch": 0.16, + "grad_norm": 19.40421314703396, + "learning_rate": 1.987257275256437e-05, + "loss": 1.7288, + "step": 13197 + }, + { + "epoch": 0.16, + "grad_norm": 7.007370191985864, + "learning_rate": 1.9872479774154028e-05, + "loss": 1.9578, + "step": 13200 + }, + { + "epoch": 0.16, + "grad_norm": 22.457992480861392, + "learning_rate": 1.987238676205247e-05, + "loss": 1.7638, + "step": 13203 + }, + { + "epoch": 0.16, + "grad_norm": 67.37144386387153, + "learning_rate": 1.9872293716260015e-05, + "loss": 1.9858, + "step": 13206 + }, + { + "epoch": 0.16, + "grad_norm": 26.279507382389646, + "learning_rate": 1.9872200636776973e-05, + "loss": 1.1942, + "step": 13209 + }, + { + "epoch": 0.16, + "grad_norm": 11.479479068795508, + "learning_rate": 1.9872107523603667e-05, + "loss": 1.7407, + "step": 13212 + }, + { + "epoch": 0.16, + "grad_norm": 41.62431652137284, + "learning_rate": 1.987201437674041e-05, + "loss": 1.6724, + "step": 13215 + }, + { + "epoch": 0.16, + "grad_norm": 14.265031125895018, + "learning_rate": 1.987192119618753e-05, + "loss": 1.5038, + "step": 13218 + }, + { + "epoch": 0.16, + "grad_norm": 25.066927675143173, + "learning_rate": 1.9871827981945335e-05, + "loss": 1.6074, + "step": 13221 + }, + { + "epoch": 0.16, + "grad_norm": 28.8542521873939, + "learning_rate": 1.987173473401415e-05, + "loss": 1.731, + "step": 13224 + }, + { + "epoch": 0.16, + "grad_norm": 208.4020179973348, + "learning_rate": 1.9871641452394287e-05, + "loss": 1.6366, + "step": 13227 + }, + { + "epoch": 0.16, + "grad_norm": 29.578576510792885, + "learning_rate": 1.987154813708607e-05, + "loss": 1.3719, + "step": 13230 + }, + { + "epoch": 0.16, + "grad_norm": 98.28848432805242, + "learning_rate": 1.9871454788089814e-05, + "loss": 1.4496, + "step": 13233 + }, + { + "epoch": 0.16, + "grad_norm": 10.479367477801715, + "learning_rate": 1.987136140540584e-05, + "loss": 1.6773, + "step": 13236 + }, + { + "epoch": 0.16, + "grad_norm": 2.8419229257359504, + "learning_rate": 1.987126798903446e-05, + "loss": 2.1402, + "step": 13239 + }, + { + "epoch": 0.16, + "grad_norm": 4.9328024681535645, + "learning_rate": 1.9871174538976005e-05, + "loss": 1.5372, + "step": 13242 + }, + { + "epoch": 0.16, + "grad_norm": 13.990100305353327, + "learning_rate": 1.987108105523078e-05, + "loss": 1.9762, + "step": 13245 + }, + { + "epoch": 0.16, + "grad_norm": 7.196797904240093, + "learning_rate": 1.9870987537799115e-05, + "loss": 1.2024, + "step": 13248 + }, + { + "epoch": 0.16, + "grad_norm": 9.872255468943527, + "learning_rate": 1.9870893986681325e-05, + "loss": 1.3427, + "step": 13251 + }, + { + "epoch": 0.16, + "grad_norm": 7.367771245524782, + "learning_rate": 1.987080040187773e-05, + "loss": 1.5661, + "step": 13254 + }, + { + "epoch": 0.16, + "grad_norm": 48.159539635821204, + "learning_rate": 1.987070678338865e-05, + "loss": 1.7031, + "step": 13257 + }, + { + "epoch": 0.16, + "grad_norm": 11.359256467636998, + "learning_rate": 1.9870613131214397e-05, + "loss": 1.6657, + "step": 13260 + }, + { + "epoch": 0.16, + "grad_norm": 25.792253630223286, + "learning_rate": 1.98705194453553e-05, + "loss": 1.235, + "step": 13263 + }, + { + "epoch": 0.16, + "grad_norm": 61.36682977760947, + "learning_rate": 1.9870425725811677e-05, + "loss": 1.7227, + "step": 13266 + }, + { + "epoch": 0.16, + "grad_norm": 16.89830329877709, + "learning_rate": 1.9870331972583843e-05, + "loss": 1.5078, + "step": 13269 + }, + { + "epoch": 0.16, + "grad_norm": 48.18018135813907, + "learning_rate": 1.9870238185672125e-05, + "loss": 1.5032, + "step": 13272 + }, + { + "epoch": 0.16, + "grad_norm": 4.825726914502145, + "learning_rate": 1.9870144365076837e-05, + "loss": 1.4288, + "step": 13275 + }, + { + "epoch": 0.16, + "grad_norm": 4.73206486703343, + "learning_rate": 1.98700505107983e-05, + "loss": 1.7368, + "step": 13278 + }, + { + "epoch": 0.16, + "grad_norm": 9.330427513873067, + "learning_rate": 1.9869956622836842e-05, + "loss": 1.73, + "step": 13281 + }, + { + "epoch": 0.16, + "grad_norm": 46.441563048909686, + "learning_rate": 1.9869862701192773e-05, + "loss": 1.8319, + "step": 13284 + }, + { + "epoch": 0.16, + "grad_norm": 80.06035081837538, + "learning_rate": 1.986976874586642e-05, + "loss": 1.8168, + "step": 13287 + }, + { + "epoch": 0.16, + "grad_norm": 69.03622837643503, + "learning_rate": 1.9869674756858097e-05, + "loss": 1.7389, + "step": 13290 + }, + { + "epoch": 0.16, + "grad_norm": 5.894214256408462, + "learning_rate": 1.9869580734168134e-05, + "loss": 1.2211, + "step": 13293 + }, + { + "epoch": 0.16, + "grad_norm": 33.29620589282661, + "learning_rate": 1.986948667779684e-05, + "loss": 1.6328, + "step": 13296 + }, + { + "epoch": 0.16, + "grad_norm": 5.885281212142477, + "learning_rate": 1.986939258774455e-05, + "loss": 1.4653, + "step": 13299 + }, + { + "epoch": 0.16, + "grad_norm": 9.353157931512479, + "learning_rate": 1.986929846401157e-05, + "loss": 1.6475, + "step": 13302 + }, + { + "epoch": 0.16, + "grad_norm": 13.990936827352984, + "learning_rate": 1.986920430659824e-05, + "loss": 1.3593, + "step": 13305 + }, + { + "epoch": 0.16, + "grad_norm": 81.45988400782265, + "learning_rate": 1.9869110115504865e-05, + "loss": 1.2357, + "step": 13308 + }, + { + "epoch": 0.16, + "grad_norm": 40.72639928958566, + "learning_rate": 1.986901589073177e-05, + "loss": 1.4342, + "step": 13311 + }, + { + "epoch": 0.16, + "grad_norm": 9.564072833561125, + "learning_rate": 1.986892163227928e-05, + "loss": 1.5019, + "step": 13314 + }, + { + "epoch": 0.16, + "grad_norm": 9.3017238312106, + "learning_rate": 1.986882734014772e-05, + "loss": 1.5111, + "step": 13317 + }, + { + "epoch": 0.16, + "grad_norm": 49.386237984302674, + "learning_rate": 1.98687330143374e-05, + "loss": 1.6147, + "step": 13320 + }, + { + "epoch": 0.16, + "grad_norm": 27.089454027095133, + "learning_rate": 1.9868638654848655e-05, + "loss": 1.8741, + "step": 13323 + }, + { + "epoch": 0.16, + "grad_norm": 3.210523793283891, + "learning_rate": 1.9868544261681795e-05, + "loss": 1.7507, + "step": 13326 + }, + { + "epoch": 0.16, + "grad_norm": 35.64203318193635, + "learning_rate": 1.9868449834837152e-05, + "loss": 1.8916, + "step": 13329 + }, + { + "epoch": 0.16, + "grad_norm": 26.2251219087877, + "learning_rate": 1.986835537431504e-05, + "loss": 1.7804, + "step": 13332 + }, + { + "epoch": 0.16, + "grad_norm": 6.560403993037222, + "learning_rate": 1.986826088011579e-05, + "loss": 1.4525, + "step": 13335 + }, + { + "epoch": 0.16, + "grad_norm": 4.891524261865368, + "learning_rate": 1.986816635223972e-05, + "loss": 1.7288, + "step": 13338 + }, + { + "epoch": 0.16, + "grad_norm": 30.61190461148422, + "learning_rate": 1.986807179068715e-05, + "loss": 1.7625, + "step": 13341 + }, + { + "epoch": 0.16, + "grad_norm": 4.12106674554032, + "learning_rate": 1.9867977195458406e-05, + "loss": 1.4965, + "step": 13344 + }, + { + "epoch": 0.16, + "grad_norm": 17.298453824121253, + "learning_rate": 1.9867882566553813e-05, + "loss": 1.2208, + "step": 13347 + }, + { + "epoch": 0.16, + "grad_norm": 18.679986380035807, + "learning_rate": 1.986778790397369e-05, + "loss": 2.0143, + "step": 13350 + }, + { + "epoch": 0.16, + "grad_norm": 15.045399581558717, + "learning_rate": 1.986769320771836e-05, + "loss": 1.708, + "step": 13353 + }, + { + "epoch": 0.16, + "grad_norm": 7.507883504597357, + "learning_rate": 1.9867598477788152e-05, + "loss": 1.4015, + "step": 13356 + }, + { + "epoch": 0.16, + "grad_norm": 5.863021285940383, + "learning_rate": 1.986750371418338e-05, + "loss": 1.8439, + "step": 13359 + }, + { + "epoch": 0.16, + "grad_norm": 9.324710879097008, + "learning_rate": 1.986740891690437e-05, + "loss": 1.6038, + "step": 13362 + }, + { + "epoch": 0.16, + "grad_norm": 95.63827518845459, + "learning_rate": 1.9867314085951452e-05, + "loss": 1.5455, + "step": 13365 + }, + { + "epoch": 0.16, + "grad_norm": 18.269706419894433, + "learning_rate": 1.9867219221324942e-05, + "loss": 1.2386, + "step": 13368 + }, + { + "epoch": 0.16, + "grad_norm": 6.403031179134156, + "learning_rate": 1.986712432302517e-05, + "loss": 1.6047, + "step": 13371 + }, + { + "epoch": 0.16, + "grad_norm": 37.75555972883186, + "learning_rate": 1.9867029391052456e-05, + "loss": 2.0075, + "step": 13374 + }, + { + "epoch": 0.16, + "grad_norm": 25.712197265938304, + "learning_rate": 1.9866934425407127e-05, + "loss": 1.6725, + "step": 13377 + }, + { + "epoch": 0.16, + "grad_norm": 10.170949552426645, + "learning_rate": 1.98668394260895e-05, + "loss": 1.8425, + "step": 13380 + }, + { + "epoch": 0.16, + "grad_norm": 25.620285701073012, + "learning_rate": 1.9866744393099904e-05, + "loss": 1.3004, + "step": 13383 + }, + { + "epoch": 0.16, + "grad_norm": 33.088095540824185, + "learning_rate": 1.9866649326438666e-05, + "loss": 1.3676, + "step": 13386 + }, + { + "epoch": 0.16, + "grad_norm": 87.5628306764135, + "learning_rate": 1.986655422610611e-05, + "loss": 1.5601, + "step": 13389 + }, + { + "epoch": 0.16, + "grad_norm": 25.933006317551623, + "learning_rate": 1.9866459092102556e-05, + "loss": 1.7126, + "step": 13392 + }, + { + "epoch": 0.16, + "grad_norm": 10.668164004381444, + "learning_rate": 1.986636392442833e-05, + "loss": 1.6427, + "step": 13395 + }, + { + "epoch": 0.16, + "grad_norm": 12.33463859691968, + "learning_rate": 1.9866268723083755e-05, + "loss": 1.2533, + "step": 13398 + }, + { + "epoch": 0.16, + "grad_norm": 10.081230700621541, + "learning_rate": 1.9866173488069165e-05, + "loss": 1.399, + "step": 13401 + }, + { + "epoch": 0.16, + "grad_norm": 12.996110988797517, + "learning_rate": 1.9866078219384877e-05, + "loss": 1.6518, + "step": 13404 + }, + { + "epoch": 0.16, + "grad_norm": 3.953747696055237, + "learning_rate": 1.9865982917031218e-05, + "loss": 1.4513, + "step": 13407 + }, + { + "epoch": 0.16, + "grad_norm": 46.575343345903846, + "learning_rate": 1.9865887581008508e-05, + "loss": 1.8936, + "step": 13410 + }, + { + "epoch": 0.16, + "grad_norm": 74.19787242026919, + "learning_rate": 1.9865792211317082e-05, + "loss": 1.5775, + "step": 13413 + }, + { + "epoch": 0.16, + "grad_norm": 8.25695958013675, + "learning_rate": 1.986569680795726e-05, + "loss": 1.696, + "step": 13416 + }, + { + "epoch": 0.16, + "grad_norm": 10.819838978227397, + "learning_rate": 1.986560137092937e-05, + "loss": 1.6002, + "step": 13419 + }, + { + "epoch": 0.16, + "grad_norm": 7.115951565745922, + "learning_rate": 1.9865505900233733e-05, + "loss": 1.7065, + "step": 13422 + }, + { + "epoch": 0.16, + "grad_norm": 3.6345397088841014, + "learning_rate": 1.986541039587068e-05, + "loss": 1.5635, + "step": 13425 + }, + { + "epoch": 0.16, + "grad_norm": 12.757599700872378, + "learning_rate": 1.9865314857840535e-05, + "loss": 1.241, + "step": 13428 + }, + { + "epoch": 0.16, + "grad_norm": 4.591386499954559, + "learning_rate": 1.9865219286143623e-05, + "loss": 1.3398, + "step": 13431 + }, + { + "epoch": 0.16, + "grad_norm": 17.85328978725112, + "learning_rate": 1.9865123680780268e-05, + "loss": 2.0763, + "step": 13434 + }, + { + "epoch": 0.16, + "grad_norm": 10.801558135943226, + "learning_rate": 1.9865028041750803e-05, + "loss": 1.7437, + "step": 13437 + }, + { + "epoch": 0.16, + "grad_norm": 7.99189146146405, + "learning_rate": 1.9864932369055553e-05, + "loss": 1.8822, + "step": 13440 + }, + { + "epoch": 0.16, + "grad_norm": 7.677765827061746, + "learning_rate": 1.9864836662694836e-05, + "loss": 1.771, + "step": 13443 + }, + { + "epoch": 0.16, + "grad_norm": 28.4537508174674, + "learning_rate": 1.986474092266899e-05, + "loss": 1.6699, + "step": 13446 + }, + { + "epoch": 0.16, + "grad_norm": 5.935121528115157, + "learning_rate": 1.9864645148978333e-05, + "loss": 1.6693, + "step": 13449 + }, + { + "epoch": 0.16, + "grad_norm": 10.64350946827533, + "learning_rate": 1.9864549341623197e-05, + "loss": 1.7642, + "step": 13452 + }, + { + "epoch": 0.16, + "grad_norm": 15.216645089338053, + "learning_rate": 1.9864453500603906e-05, + "loss": 1.5598, + "step": 13455 + }, + { + "epoch": 0.16, + "grad_norm": 4.25914426713987, + "learning_rate": 1.986435762592079e-05, + "loss": 1.6789, + "step": 13458 + }, + { + "epoch": 0.16, + "grad_norm": 5.095805556362008, + "learning_rate": 1.9864261717574175e-05, + "loss": 1.6903, + "step": 13461 + }, + { + "epoch": 0.16, + "grad_norm": 22.706852377909673, + "learning_rate": 1.9864165775564386e-05, + "loss": 1.7872, + "step": 13464 + }, + { + "epoch": 0.16, + "grad_norm": 4.71444552323265, + "learning_rate": 1.9864069799891756e-05, + "loss": 1.6921, + "step": 13467 + }, + { + "epoch": 0.16, + "grad_norm": 5.811809836545691, + "learning_rate": 1.98639737905566e-05, + "loss": 1.5036, + "step": 13470 + }, + { + "epoch": 0.16, + "grad_norm": 113.09005792455824, + "learning_rate": 1.9863877747559263e-05, + "loss": 1.5522, + "step": 13473 + }, + { + "epoch": 0.16, + "grad_norm": 3.6904856723557433, + "learning_rate": 1.986378167090006e-05, + "loss": 1.7371, + "step": 13476 + }, + { + "epoch": 0.16, + "grad_norm": 26.717479406210213, + "learning_rate": 1.9863685560579323e-05, + "loss": 1.4009, + "step": 13479 + }, + { + "epoch": 0.16, + "grad_norm": 24.39921862235089, + "learning_rate": 1.9863589416597384e-05, + "loss": 1.8525, + "step": 13482 + }, + { + "epoch": 0.16, + "grad_norm": 8.831396333032444, + "learning_rate": 1.9863493238954565e-05, + "loss": 1.6991, + "step": 13485 + }, + { + "epoch": 0.16, + "grad_norm": 20.964916545484755, + "learning_rate": 1.9863397027651196e-05, + "loss": 1.6792, + "step": 13488 + }, + { + "epoch": 0.16, + "grad_norm": 7.399522425989173, + "learning_rate": 1.986330078268761e-05, + "loss": 1.7199, + "step": 13491 + }, + { + "epoch": 0.16, + "grad_norm": 10.693260970841955, + "learning_rate": 1.9863204504064124e-05, + "loss": 1.5869, + "step": 13494 + }, + { + "epoch": 0.16, + "grad_norm": 13.485768584774092, + "learning_rate": 1.9863108191781075e-05, + "loss": 1.5503, + "step": 13497 + }, + { + "epoch": 0.16, + "grad_norm": 7.692516606546226, + "learning_rate": 1.986301184583879e-05, + "loss": 1.5247, + "step": 13500 + }, + { + "epoch": 0.16, + "grad_norm": 6.3421431981995955, + "learning_rate": 1.9862915466237604e-05, + "loss": 1.2209, + "step": 13503 + }, + { + "epoch": 0.16, + "grad_norm": 10.047586178732498, + "learning_rate": 1.9862819052977836e-05, + "loss": 1.5522, + "step": 13506 + }, + { + "epoch": 0.16, + "grad_norm": 29.691690361993697, + "learning_rate": 1.9862722606059818e-05, + "loss": 1.3328, + "step": 13509 + }, + { + "epoch": 0.16, + "grad_norm": 35.05601483323496, + "learning_rate": 1.9862626125483882e-05, + "loss": 1.3807, + "step": 13512 + }, + { + "epoch": 0.16, + "grad_norm": 17.119994217862818, + "learning_rate": 1.9862529611250357e-05, + "loss": 1.6806, + "step": 13515 + }, + { + "epoch": 0.16, + "grad_norm": 43.843992155930366, + "learning_rate": 1.9862433063359567e-05, + "loss": 1.7576, + "step": 13518 + }, + { + "epoch": 0.16, + "grad_norm": 18.012381212140305, + "learning_rate": 1.986233648181185e-05, + "loss": 1.1341, + "step": 13521 + }, + { + "epoch": 0.16, + "grad_norm": 29.38914171921146, + "learning_rate": 1.9862239866607527e-05, + "loss": 1.6104, + "step": 13524 + }, + { + "epoch": 0.16, + "grad_norm": 6.976422032329676, + "learning_rate": 1.9862143217746938e-05, + "loss": 1.52, + "step": 13527 + }, + { + "epoch": 0.16, + "grad_norm": 25.48166050579146, + "learning_rate": 1.98620465352304e-05, + "loss": 1.1855, + "step": 13530 + }, + { + "epoch": 0.16, + "grad_norm": 6.247792612053712, + "learning_rate": 1.9861949819058252e-05, + "loss": 1.5843, + "step": 13533 + }, + { + "epoch": 0.16, + "grad_norm": 11.373123060422165, + "learning_rate": 1.986185306923082e-05, + "loss": 1.6646, + "step": 13536 + }, + { + "epoch": 0.16, + "grad_norm": 22.74859020523518, + "learning_rate": 1.9861756285748438e-05, + "loss": 1.4747, + "step": 13539 + }, + { + "epoch": 0.16, + "grad_norm": 20.38153264412284, + "learning_rate": 1.986165946861143e-05, + "loss": 1.5516, + "step": 13542 + }, + { + "epoch": 0.16, + "grad_norm": 37.59150448009022, + "learning_rate": 1.9861562617820135e-05, + "loss": 1.5178, + "step": 13545 + }, + { + "epoch": 0.16, + "grad_norm": 7.264532238551589, + "learning_rate": 1.9861465733374875e-05, + "loss": 1.7186, + "step": 13548 + }, + { + "epoch": 0.16, + "grad_norm": 55.59732069214569, + "learning_rate": 1.9861368815275986e-05, + "loss": 1.9373, + "step": 13551 + }, + { + "epoch": 0.16, + "grad_norm": 28.586416287711497, + "learning_rate": 1.9861271863523798e-05, + "loss": 2.1926, + "step": 13554 + }, + { + "epoch": 0.16, + "grad_norm": 11.241748607714245, + "learning_rate": 1.986117487811864e-05, + "loss": 1.6646, + "step": 13557 + }, + { + "epoch": 0.16, + "grad_norm": 6.304093416924346, + "learning_rate": 1.9861077859060848e-05, + "loss": 1.4541, + "step": 13560 + }, + { + "epoch": 0.16, + "grad_norm": 16.613818553118932, + "learning_rate": 1.9860980806350745e-05, + "loss": 1.532, + "step": 13563 + }, + { + "epoch": 0.16, + "grad_norm": 34.81925904266601, + "learning_rate": 1.9860883719988666e-05, + "loss": 1.4871, + "step": 13566 + }, + { + "epoch": 0.16, + "grad_norm": 21.16530035232899, + "learning_rate": 1.9860786599974947e-05, + "loss": 1.7672, + "step": 13569 + }, + { + "epoch": 0.16, + "grad_norm": 4.401631960293187, + "learning_rate": 1.986068944630991e-05, + "loss": 1.5708, + "step": 13572 + }, + { + "epoch": 0.16, + "grad_norm": 7.2855215359019345, + "learning_rate": 1.9860592258993895e-05, + "loss": 1.7544, + "step": 13575 + }, + { + "epoch": 0.16, + "grad_norm": 14.402574111039312, + "learning_rate": 1.986049503802723e-05, + "loss": 1.467, + "step": 13578 + }, + { + "epoch": 0.16, + "grad_norm": 13.620621502174016, + "learning_rate": 1.9860397783410245e-05, + "loss": 1.693, + "step": 13581 + }, + { + "epoch": 0.16, + "grad_norm": 25.040730757586328, + "learning_rate": 1.9860300495143277e-05, + "loss": 1.4955, + "step": 13584 + }, + { + "epoch": 0.16, + "grad_norm": 11.8055685606546, + "learning_rate": 1.9860203173226655e-05, + "loss": 1.8159, + "step": 13587 + }, + { + "epoch": 0.16, + "grad_norm": 14.023890683383225, + "learning_rate": 1.9860105817660708e-05, + "loss": 1.4284, + "step": 13590 + }, + { + "epoch": 0.16, + "grad_norm": 21.830328273590915, + "learning_rate": 1.9860008428445777e-05, + "loss": 1.7453, + "step": 13593 + }, + { + "epoch": 0.16, + "grad_norm": 10.734303857867468, + "learning_rate": 1.9859911005582184e-05, + "loss": 1.6846, + "step": 13596 + }, + { + "epoch": 0.16, + "grad_norm": 127.11641127070988, + "learning_rate": 1.9859813549070268e-05, + "loss": 1.7232, + "step": 13599 + }, + { + "epoch": 0.16, + "grad_norm": 14.939376674144139, + "learning_rate": 1.9859716058910358e-05, + "loss": 1.5945, + "step": 13602 + }, + { + "epoch": 0.16, + "grad_norm": 10.580006618849392, + "learning_rate": 1.9859618535102793e-05, + "loss": 1.1785, + "step": 13605 + }, + { + "epoch": 0.16, + "grad_norm": 91.15633264135496, + "learning_rate": 1.9859520977647898e-05, + "loss": 1.5963, + "step": 13608 + }, + { + "epoch": 0.16, + "grad_norm": 65.56669896295863, + "learning_rate": 1.9859423386546007e-05, + "loss": 1.2842, + "step": 13611 + }, + { + "epoch": 0.16, + "grad_norm": 9.045811060485446, + "learning_rate": 1.9859325761797458e-05, + "loss": 1.5605, + "step": 13614 + }, + { + "epoch": 0.16, + "grad_norm": 13.527419256093788, + "learning_rate": 1.985922810340258e-05, + "loss": 1.4487, + "step": 13617 + }, + { + "epoch": 0.16, + "grad_norm": 7.695858758963433, + "learning_rate": 1.985913041136171e-05, + "loss": 1.4692, + "step": 13620 + }, + { + "epoch": 0.16, + "grad_norm": 16.109866723763346, + "learning_rate": 1.985903268567518e-05, + "loss": 1.368, + "step": 13623 + }, + { + "epoch": 0.16, + "grad_norm": 33.49408977838917, + "learning_rate": 1.985893492634332e-05, + "loss": 1.6147, + "step": 13626 + }, + { + "epoch": 0.16, + "grad_norm": 11.02930980915, + "learning_rate": 1.9858837133366467e-05, + "loss": 1.9661, + "step": 13629 + }, + { + "epoch": 0.16, + "grad_norm": 5.480556858524806, + "learning_rate": 1.9858739306744956e-05, + "loss": 1.5695, + "step": 13632 + }, + { + "epoch": 0.16, + "grad_norm": 11.051566433531338, + "learning_rate": 1.985864144647912e-05, + "loss": 1.6268, + "step": 13635 + }, + { + "epoch": 0.16, + "grad_norm": 4.439451870850522, + "learning_rate": 1.9858543552569286e-05, + "loss": 1.5601, + "step": 13638 + }, + { + "epoch": 0.16, + "grad_norm": 26.56622398310006, + "learning_rate": 1.98584456250158e-05, + "loss": 1.5532, + "step": 13641 + }, + { + "epoch": 0.16, + "grad_norm": 22.532114990154728, + "learning_rate": 1.9858347663818983e-05, + "loss": 1.4618, + "step": 13644 + }, + { + "epoch": 0.16, + "grad_norm": 12.856694173344403, + "learning_rate": 1.985824966897918e-05, + "loss": 1.7524, + "step": 13647 + }, + { + "epoch": 0.16, + "grad_norm": 14.687799313492828, + "learning_rate": 1.9858151640496724e-05, + "loss": 1.6233, + "step": 13650 + }, + { + "epoch": 0.16, + "grad_norm": 13.195968189431504, + "learning_rate": 1.9858053578371945e-05, + "loss": 1.5652, + "step": 13653 + }, + { + "epoch": 0.16, + "grad_norm": 17.132280431143823, + "learning_rate": 1.985795548260518e-05, + "loss": 1.6486, + "step": 13656 + }, + { + "epoch": 0.16, + "grad_norm": 7.358268793778433, + "learning_rate": 1.9857857353196765e-05, + "loss": 1.7048, + "step": 13659 + }, + { + "epoch": 0.16, + "grad_norm": 16.202204285884076, + "learning_rate": 1.9857759190147034e-05, + "loss": 1.2607, + "step": 13662 + }, + { + "epoch": 0.16, + "grad_norm": 31.333538671046302, + "learning_rate": 1.985766099345632e-05, + "loss": 1.748, + "step": 13665 + }, + { + "epoch": 0.16, + "grad_norm": 16.270502327104328, + "learning_rate": 1.985756276312496e-05, + "loss": 1.5885, + "step": 13668 + }, + { + "epoch": 0.16, + "grad_norm": 3.4811463419788495, + "learning_rate": 1.985746449915329e-05, + "loss": 1.6027, + "step": 13671 + }, + { + "epoch": 0.16, + "grad_norm": 56.24060257430277, + "learning_rate": 1.9857366201541645e-05, + "loss": 1.7948, + "step": 13674 + }, + { + "epoch": 0.16, + "grad_norm": 5.4283903594918455, + "learning_rate": 1.9857267870290355e-05, + "loss": 1.5369, + "step": 13677 + }, + { + "epoch": 0.16, + "grad_norm": 9.73978392554688, + "learning_rate": 1.9857169505399764e-05, + "loss": 1.6799, + "step": 13680 + }, + { + "epoch": 0.16, + "grad_norm": 68.82599761726355, + "learning_rate": 1.9857071106870202e-05, + "loss": 1.4877, + "step": 13683 + }, + { + "epoch": 0.16, + "grad_norm": 20.60829371056454, + "learning_rate": 1.985697267470201e-05, + "loss": 1.8953, + "step": 13686 + }, + { + "epoch": 0.16, + "grad_norm": 32.19601329127407, + "learning_rate": 1.9856874208895518e-05, + "loss": 1.5981, + "step": 13689 + }, + { + "epoch": 0.16, + "grad_norm": 19.06223159757106, + "learning_rate": 1.9856775709451067e-05, + "loss": 1.7544, + "step": 13692 + }, + { + "epoch": 0.16, + "grad_norm": 6.990628599243443, + "learning_rate": 1.985667717636899e-05, + "loss": 1.5392, + "step": 13695 + }, + { + "epoch": 0.16, + "grad_norm": 15.19702911194279, + "learning_rate": 1.9856578609649625e-05, + "loss": 1.9738, + "step": 13698 + }, + { + "epoch": 0.16, + "grad_norm": 13.362336569518412, + "learning_rate": 1.9856480009293307e-05, + "loss": 1.4118, + "step": 13701 + }, + { + "epoch": 0.16, + "grad_norm": 14.68035682661627, + "learning_rate": 1.985638137530037e-05, + "loss": 1.6065, + "step": 13704 + }, + { + "epoch": 0.16, + "grad_norm": 4.606111401363497, + "learning_rate": 1.9856282707671158e-05, + "loss": 1.5903, + "step": 13707 + }, + { + "epoch": 0.16, + "grad_norm": 12.352095357287315, + "learning_rate": 1.9856184006406e-05, + "loss": 1.4991, + "step": 13710 + }, + { + "epoch": 0.16, + "grad_norm": 22.07388999573978, + "learning_rate": 1.9856085271505237e-05, + "loss": 1.9143, + "step": 13713 + }, + { + "epoch": 0.16, + "grad_norm": 10.680749136863671, + "learning_rate": 1.985598650296921e-05, + "loss": 1.8181, + "step": 13716 + }, + { + "epoch": 0.16, + "grad_norm": 14.68136619292554, + "learning_rate": 1.9855887700798244e-05, + "loss": 1.5064, + "step": 13719 + }, + { + "epoch": 0.17, + "grad_norm": 29.08336777893837, + "learning_rate": 1.9855788864992686e-05, + "loss": 1.5456, + "step": 13722 + }, + { + "epoch": 0.17, + "grad_norm": 15.20950894643573, + "learning_rate": 1.985568999555287e-05, + "loss": 1.3263, + "step": 13725 + }, + { + "epoch": 0.17, + "grad_norm": 25.266311086786033, + "learning_rate": 1.9855591092479137e-05, + "loss": 1.6926, + "step": 13728 + }, + { + "epoch": 0.17, + "grad_norm": 26.285376958885877, + "learning_rate": 1.9855492155771817e-05, + "loss": 1.45, + "step": 13731 + }, + { + "epoch": 0.17, + "grad_norm": 12.767973224759645, + "learning_rate": 1.9855393185431254e-05, + "loss": 1.4083, + "step": 13734 + }, + { + "epoch": 0.17, + "grad_norm": 10.240546303542681, + "learning_rate": 1.9855294181457788e-05, + "loss": 1.4806, + "step": 13737 + }, + { + "epoch": 0.17, + "grad_norm": 5.468367659437695, + "learning_rate": 1.985519514385175e-05, + "loss": 1.3872, + "step": 13740 + }, + { + "epoch": 0.17, + "grad_norm": 11.73263026900724, + "learning_rate": 1.985509607261348e-05, + "loss": 1.4873, + "step": 13743 + }, + { + "epoch": 0.17, + "grad_norm": 13.164997115100617, + "learning_rate": 1.9854996967743316e-05, + "loss": 2.1148, + "step": 13746 + }, + { + "epoch": 0.17, + "grad_norm": 19.829951043165497, + "learning_rate": 1.9854897829241597e-05, + "loss": 1.54, + "step": 13749 + }, + { + "epoch": 0.17, + "grad_norm": 9.348933675666927, + "learning_rate": 1.9854798657108665e-05, + "loss": 1.6771, + "step": 13752 + }, + { + "epoch": 0.17, + "grad_norm": 53.30798975567428, + "learning_rate": 1.9854699451344852e-05, + "loss": 1.695, + "step": 13755 + }, + { + "epoch": 0.17, + "grad_norm": 8.112828541259189, + "learning_rate": 1.98546002119505e-05, + "loss": 1.6223, + "step": 13758 + }, + { + "epoch": 0.17, + "grad_norm": 17.549438653723296, + "learning_rate": 1.9854500938925947e-05, + "loss": 1.9686, + "step": 13761 + }, + { + "epoch": 0.17, + "grad_norm": 5.2725353153585495, + "learning_rate": 1.9854401632271535e-05, + "loss": 1.5887, + "step": 13764 + }, + { + "epoch": 0.17, + "grad_norm": 30.888311513994726, + "learning_rate": 1.9854302291987594e-05, + "loss": 1.3236, + "step": 13767 + }, + { + "epoch": 0.17, + "grad_norm": 6.356221191470593, + "learning_rate": 1.9854202918074474e-05, + "loss": 1.4547, + "step": 13770 + }, + { + "epoch": 0.17, + "grad_norm": 78.6695224209189, + "learning_rate": 1.9854103510532505e-05, + "loss": 1.6523, + "step": 13773 + }, + { + "epoch": 0.17, + "grad_norm": 14.942350497702812, + "learning_rate": 1.9854004069362028e-05, + "loss": 1.3325, + "step": 13776 + }, + { + "epoch": 0.17, + "grad_norm": 25.76207228959125, + "learning_rate": 1.985390459456339e-05, + "loss": 1.5249, + "step": 13779 + }, + { + "epoch": 0.17, + "grad_norm": 24.705909339788576, + "learning_rate": 1.9853805086136925e-05, + "loss": 1.5883, + "step": 13782 + }, + { + "epoch": 0.17, + "grad_norm": 127.49028180645603, + "learning_rate": 1.9853705544082967e-05, + "loss": 1.7972, + "step": 13785 + }, + { + "epoch": 0.17, + "grad_norm": 11.329218874506795, + "learning_rate": 1.985360596840187e-05, + "loss": 1.6091, + "step": 13788 + }, + { + "epoch": 0.17, + "grad_norm": 83.41933772490377, + "learning_rate": 1.985350635909396e-05, + "loss": 1.4829, + "step": 13791 + }, + { + "epoch": 0.17, + "grad_norm": 41.61923484970125, + "learning_rate": 1.985340671615958e-05, + "loss": 1.6442, + "step": 13794 + }, + { + "epoch": 0.17, + "grad_norm": 17.18868445471916, + "learning_rate": 1.9853307039599072e-05, + "loss": 1.6786, + "step": 13797 + }, + { + "epoch": 0.17, + "grad_norm": 45.452608564882304, + "learning_rate": 1.985320732941278e-05, + "loss": 1.5866, + "step": 13800 + }, + { + "epoch": 0.17, + "grad_norm": 5.849267188767061, + "learning_rate": 1.985310758560104e-05, + "loss": 1.5664, + "step": 13803 + }, + { + "epoch": 0.17, + "grad_norm": 7.07069206246765, + "learning_rate": 1.985300780816419e-05, + "loss": 1.7087, + "step": 13806 + }, + { + "epoch": 0.17, + "grad_norm": 3.4534980737592558, + "learning_rate": 1.9852907997102574e-05, + "loss": 1.7733, + "step": 13809 + }, + { + "epoch": 0.17, + "grad_norm": 82.93182553515808, + "learning_rate": 1.9852808152416534e-05, + "loss": 1.6934, + "step": 13812 + }, + { + "epoch": 0.17, + "grad_norm": 30.354157502092928, + "learning_rate": 1.985270827410641e-05, + "loss": 1.7458, + "step": 13815 + }, + { + "epoch": 0.17, + "grad_norm": 11.695974490087002, + "learning_rate": 1.9852608362172537e-05, + "loss": 1.6185, + "step": 13818 + }, + { + "epoch": 0.17, + "grad_norm": 6.714373877637384, + "learning_rate": 1.9852508416615265e-05, + "loss": 1.9323, + "step": 13821 + }, + { + "epoch": 0.17, + "grad_norm": 7.22999294585567, + "learning_rate": 1.9852408437434928e-05, + "loss": 1.4778, + "step": 13824 + }, + { + "epoch": 0.17, + "grad_norm": 9.720546452528156, + "learning_rate": 1.985230842463187e-05, + "loss": 1.5829, + "step": 13827 + }, + { + "epoch": 0.17, + "grad_norm": 1.7771720717491326, + "learning_rate": 1.9852208378206433e-05, + "loss": 1.7695, + "step": 13830 + }, + { + "epoch": 0.17, + "grad_norm": 43.87701029959817, + "learning_rate": 1.9852108298158956e-05, + "loss": 1.7301, + "step": 13833 + }, + { + "epoch": 0.17, + "grad_norm": 37.11722719826723, + "learning_rate": 1.9852008184489784e-05, + "loss": 1.6442, + "step": 13836 + }, + { + "epoch": 0.17, + "grad_norm": 7.851913850532296, + "learning_rate": 1.985190803719926e-05, + "loss": 1.4795, + "step": 13839 + }, + { + "epoch": 0.17, + "grad_norm": 18.163640777460024, + "learning_rate": 1.985180785628771e-05, + "loss": 1.5403, + "step": 13842 + }, + { + "epoch": 0.17, + "grad_norm": 4.119900927865318, + "learning_rate": 1.98517076417555e-05, + "loss": 1.5779, + "step": 13845 + }, + { + "epoch": 0.17, + "grad_norm": 22.05814000804036, + "learning_rate": 1.9851607393602957e-05, + "loss": 1.3789, + "step": 13848 + }, + { + "epoch": 0.17, + "grad_norm": 19.00231449167153, + "learning_rate": 1.9851507111830427e-05, + "loss": 1.7981, + "step": 13851 + }, + { + "epoch": 0.17, + "grad_norm": 371.4515848465895, + "learning_rate": 1.9851406796438254e-05, + "loss": 1.8085, + "step": 13854 + }, + { + "epoch": 0.17, + "grad_norm": 20.240572631312062, + "learning_rate": 1.9851306447426774e-05, + "loss": 1.5756, + "step": 13857 + }, + { + "epoch": 0.17, + "grad_norm": 10.254793203308289, + "learning_rate": 1.9851206064796337e-05, + "loss": 1.5018, + "step": 13860 + }, + { + "epoch": 0.17, + "grad_norm": 14.117859177342764, + "learning_rate": 1.9851105648547277e-05, + "loss": 1.6831, + "step": 13863 + }, + { + "epoch": 0.17, + "grad_norm": 4.01392219851652, + "learning_rate": 1.9851005198679947e-05, + "loss": 1.6423, + "step": 13866 + }, + { + "epoch": 0.17, + "grad_norm": 17.803558517050394, + "learning_rate": 1.9850904715194682e-05, + "loss": 1.7891, + "step": 13869 + }, + { + "epoch": 0.17, + "grad_norm": 19.952816008686955, + "learning_rate": 1.9850804198091825e-05, + "loss": 1.7864, + "step": 13872 + }, + { + "epoch": 0.17, + "grad_norm": 7.870434227290113, + "learning_rate": 1.9850703647371724e-05, + "loss": 1.4576, + "step": 13875 + }, + { + "epoch": 0.17, + "grad_norm": 93.5322046103492, + "learning_rate": 1.9850603063034722e-05, + "loss": 1.6917, + "step": 13878 + }, + { + "epoch": 0.17, + "grad_norm": 16.558472447311225, + "learning_rate": 1.9850502445081158e-05, + "loss": 1.5024, + "step": 13881 + }, + { + "epoch": 0.17, + "grad_norm": 16.07931036277898, + "learning_rate": 1.9850401793511376e-05, + "loss": 1.4561, + "step": 13884 + }, + { + "epoch": 0.17, + "grad_norm": 18.21085852848122, + "learning_rate": 1.9850301108325724e-05, + "loss": 1.6307, + "step": 13887 + }, + { + "epoch": 0.17, + "grad_norm": 81.31379748321744, + "learning_rate": 1.985020038952454e-05, + "loss": 1.8937, + "step": 13890 + }, + { + "epoch": 0.17, + "grad_norm": 55.84347536636887, + "learning_rate": 1.985009963710817e-05, + "loss": 1.398, + "step": 13893 + }, + { + "epoch": 0.17, + "grad_norm": 14.90789104688561, + "learning_rate": 1.984999885107696e-05, + "loss": 1.6607, + "step": 13896 + }, + { + "epoch": 0.17, + "grad_norm": 54.09235775936379, + "learning_rate": 1.984989803143125e-05, + "loss": 1.9371, + "step": 13899 + }, + { + "epoch": 0.17, + "grad_norm": 9.796088026980568, + "learning_rate": 1.9849797178171388e-05, + "loss": 1.6415, + "step": 13902 + }, + { + "epoch": 0.17, + "grad_norm": 17.66400457157862, + "learning_rate": 1.984969629129771e-05, + "loss": 1.2304, + "step": 13905 + }, + { + "epoch": 0.17, + "grad_norm": 4.731830008751073, + "learning_rate": 1.9849595370810574e-05, + "loss": 1.5693, + "step": 13908 + }, + { + "epoch": 0.17, + "grad_norm": 12.055490459347785, + "learning_rate": 1.9849494416710314e-05, + "loss": 1.5575, + "step": 13911 + }, + { + "epoch": 0.17, + "grad_norm": 87.90693426481447, + "learning_rate": 1.9849393428997276e-05, + "loss": 1.66, + "step": 13914 + }, + { + "epoch": 0.17, + "grad_norm": 7.38187598535974, + "learning_rate": 1.984929240767181e-05, + "loss": 1.5968, + "step": 13917 + }, + { + "epoch": 0.17, + "grad_norm": 8.727189578325662, + "learning_rate": 1.9849191352734252e-05, + "loss": 1.6059, + "step": 13920 + }, + { + "epoch": 0.17, + "grad_norm": 13.41419304084221, + "learning_rate": 1.984909026418495e-05, + "loss": 1.9326, + "step": 13923 + }, + { + "epoch": 0.17, + "grad_norm": 108.46802954893565, + "learning_rate": 1.9848989142024256e-05, + "loss": 1.8213, + "step": 13926 + }, + { + "epoch": 0.17, + "grad_norm": 9.989745017087737, + "learning_rate": 1.984888798625251e-05, + "loss": 1.5091, + "step": 13929 + }, + { + "epoch": 0.17, + "grad_norm": 7.393033721756353, + "learning_rate": 1.984878679687005e-05, + "loss": 1.8953, + "step": 13932 + }, + { + "epoch": 0.17, + "grad_norm": 8.509996154661152, + "learning_rate": 1.9848685573877233e-05, + "loss": 1.9679, + "step": 13935 + }, + { + "epoch": 0.17, + "grad_norm": 18.490353491201645, + "learning_rate": 1.98485843172744e-05, + "loss": 1.7233, + "step": 13938 + }, + { + "epoch": 0.17, + "grad_norm": 8.305795303560911, + "learning_rate": 1.9848483027061895e-05, + "loss": 1.8371, + "step": 13941 + }, + { + "epoch": 0.17, + "grad_norm": 6.272883815150242, + "learning_rate": 1.9848381703240064e-05, + "loss": 1.5208, + "step": 13944 + }, + { + "epoch": 0.17, + "grad_norm": 8.942705060093688, + "learning_rate": 1.9848280345809257e-05, + "loss": 1.7731, + "step": 13947 + }, + { + "epoch": 0.17, + "grad_norm": 53.5917830269611, + "learning_rate": 1.9848178954769813e-05, + "loss": 1.9378, + "step": 13950 + }, + { + "epoch": 0.17, + "grad_norm": 13.68110959870621, + "learning_rate": 1.9848077530122083e-05, + "loss": 1.7056, + "step": 13953 + }, + { + "epoch": 0.17, + "grad_norm": 10.251206867703537, + "learning_rate": 1.984797607186641e-05, + "loss": 1.7232, + "step": 13956 + }, + { + "epoch": 0.17, + "grad_norm": 20.917171737397243, + "learning_rate": 1.9847874580003143e-05, + "loss": 1.825, + "step": 13959 + }, + { + "epoch": 0.17, + "grad_norm": 22.296570609529528, + "learning_rate": 1.9847773054532628e-05, + "loss": 1.6849, + "step": 13962 + }, + { + "epoch": 0.17, + "grad_norm": 10.540430890000042, + "learning_rate": 1.9847671495455213e-05, + "loss": 1.7555, + "step": 13965 + }, + { + "epoch": 0.17, + "grad_norm": 23.898586127941556, + "learning_rate": 1.9847569902771237e-05, + "loss": 1.3941, + "step": 13968 + }, + { + "epoch": 0.17, + "grad_norm": 18.884076792596336, + "learning_rate": 1.9847468276481056e-05, + "loss": 1.3099, + "step": 13971 + }, + { + "epoch": 0.17, + "grad_norm": 6.61598264517263, + "learning_rate": 1.984736661658501e-05, + "loss": 1.6358, + "step": 13974 + }, + { + "epoch": 0.17, + "grad_norm": 11.143184890574975, + "learning_rate": 1.9847264923083452e-05, + "loss": 1.8466, + "step": 13977 + }, + { + "epoch": 0.17, + "grad_norm": 177.7884032972702, + "learning_rate": 1.9847163195976723e-05, + "loss": 1.7088, + "step": 13980 + }, + { + "epoch": 0.17, + "grad_norm": 7.556638799071978, + "learning_rate": 1.984706143526517e-05, + "loss": 1.7145, + "step": 13983 + }, + { + "epoch": 0.17, + "grad_norm": 28.57256172460864, + "learning_rate": 1.984695964094915e-05, + "loss": 1.8771, + "step": 13986 + }, + { + "epoch": 0.17, + "grad_norm": 36.56694363328423, + "learning_rate": 1.9846857813029e-05, + "loss": 1.7564, + "step": 13989 + }, + { + "epoch": 0.17, + "grad_norm": 32.58830344630159, + "learning_rate": 1.984675595150507e-05, + "loss": 1.2826, + "step": 13992 + }, + { + "epoch": 0.17, + "grad_norm": 4.052740393980285, + "learning_rate": 1.9846654056377712e-05, + "loss": 1.5451, + "step": 13995 + }, + { + "epoch": 0.17, + "grad_norm": 15.447924503437294, + "learning_rate": 1.9846552127647268e-05, + "loss": 1.2125, + "step": 13998 + }, + { + "epoch": 0.17, + "grad_norm": 6.177010411015343, + "learning_rate": 1.984645016531409e-05, + "loss": 1.651, + "step": 14001 + }, + { + "epoch": 0.17, + "grad_norm": 64.99955806772763, + "learning_rate": 1.9846348169378524e-05, + "loss": 1.454, + "step": 14004 + }, + { + "epoch": 0.17, + "grad_norm": 54.86210342538492, + "learning_rate": 1.984624613984092e-05, + "loss": 1.5423, + "step": 14007 + }, + { + "epoch": 0.17, + "grad_norm": 55.889129502217294, + "learning_rate": 1.984614407670162e-05, + "loss": 1.5801, + "step": 14010 + }, + { + "epoch": 0.17, + "grad_norm": 7.477605848988117, + "learning_rate": 1.984604197996098e-05, + "loss": 1.7639, + "step": 14013 + }, + { + "epoch": 0.17, + "grad_norm": 54.084177747695, + "learning_rate": 1.9845939849619345e-05, + "loss": 1.6369, + "step": 14016 + }, + { + "epoch": 0.17, + "grad_norm": 13.244470703923996, + "learning_rate": 1.9845837685677064e-05, + "loss": 1.5616, + "step": 14019 + }, + { + "epoch": 0.17, + "grad_norm": 46.219980203211996, + "learning_rate": 1.9845735488134484e-05, + "loss": 1.7817, + "step": 14022 + }, + { + "epoch": 0.17, + "grad_norm": 8.743666162475876, + "learning_rate": 1.9845633256991953e-05, + "loss": 1.5346, + "step": 14025 + }, + { + "epoch": 0.17, + "grad_norm": 163.8140144140751, + "learning_rate": 1.9845530992249828e-05, + "loss": 1.5285, + "step": 14028 + }, + { + "epoch": 0.17, + "grad_norm": 19.57486825122473, + "learning_rate": 1.9845428693908447e-05, + "loss": 1.3263, + "step": 14031 + }, + { + "epoch": 0.17, + "grad_norm": 5.970567565599443, + "learning_rate": 1.9845326361968165e-05, + "loss": 1.4516, + "step": 14034 + }, + { + "epoch": 0.17, + "grad_norm": 11.857550397004008, + "learning_rate": 1.9845223996429333e-05, + "loss": 1.5854, + "step": 14037 + }, + { + "epoch": 0.17, + "grad_norm": 7.843554703624832, + "learning_rate": 1.9845121597292298e-05, + "loss": 1.6549, + "step": 14040 + }, + { + "epoch": 0.17, + "grad_norm": 4.941114663581014, + "learning_rate": 1.9845019164557406e-05, + "loss": 1.3279, + "step": 14043 + }, + { + "epoch": 0.17, + "grad_norm": 20.344582143467072, + "learning_rate": 1.984491669822501e-05, + "loss": 1.741, + "step": 14046 + }, + { + "epoch": 0.17, + "grad_norm": 5.856177223729651, + "learning_rate": 1.984481419829546e-05, + "loss": 1.5415, + "step": 14049 + }, + { + "epoch": 0.17, + "grad_norm": 10.858754647715465, + "learning_rate": 1.9844711664769106e-05, + "loss": 1.4399, + "step": 14052 + }, + { + "epoch": 0.17, + "grad_norm": 46.22844447136021, + "learning_rate": 1.9844609097646298e-05, + "loss": 1.6963, + "step": 14055 + }, + { + "epoch": 0.17, + "grad_norm": 10.416294199555136, + "learning_rate": 1.9844506496927385e-05, + "loss": 1.9725, + "step": 14058 + }, + { + "epoch": 0.17, + "grad_norm": 25.879450030172688, + "learning_rate": 1.9844403862612716e-05, + "loss": 1.5869, + "step": 14061 + }, + { + "epoch": 0.17, + "grad_norm": 29.636056369767463, + "learning_rate": 1.984430119470264e-05, + "loss": 1.5857, + "step": 14064 + }, + { + "epoch": 0.17, + "grad_norm": 15.52875558181232, + "learning_rate": 1.9844198493197514e-05, + "loss": 1.3646, + "step": 14067 + }, + { + "epoch": 0.17, + "grad_norm": 57.80915425472987, + "learning_rate": 1.9844095758097684e-05, + "loss": 1.7806, + "step": 14070 + }, + { + "epoch": 0.17, + "grad_norm": 13.669738742685192, + "learning_rate": 1.9843992989403502e-05, + "loss": 2.162, + "step": 14073 + }, + { + "epoch": 0.17, + "grad_norm": 11.699922882073375, + "learning_rate": 1.984389018711532e-05, + "loss": 1.7183, + "step": 14076 + }, + { + "epoch": 0.17, + "grad_norm": 9.367410920086973, + "learning_rate": 1.984378735123348e-05, + "loss": 1.6517, + "step": 14079 + }, + { + "epoch": 0.17, + "grad_norm": 25.29646805285638, + "learning_rate": 1.984368448175834e-05, + "loss": 1.4727, + "step": 14082 + }, + { + "epoch": 0.17, + "grad_norm": 54.74432661015494, + "learning_rate": 1.9843581578690256e-05, + "loss": 1.6214, + "step": 14085 + }, + { + "epoch": 0.17, + "grad_norm": 9.79055459514428, + "learning_rate": 1.9843478642029568e-05, + "loss": 1.5168, + "step": 14088 + }, + { + "epoch": 0.17, + "grad_norm": 25.612852134131064, + "learning_rate": 1.9843375671776635e-05, + "loss": 1.4724, + "step": 14091 + }, + { + "epoch": 0.17, + "grad_norm": 14.251453255181918, + "learning_rate": 1.9843272667931805e-05, + "loss": 1.7838, + "step": 14094 + }, + { + "epoch": 0.17, + "grad_norm": 29.11473716309682, + "learning_rate": 1.9843169630495437e-05, + "loss": 1.5781, + "step": 14097 + }, + { + "epoch": 0.17, + "grad_norm": 42.012688319960894, + "learning_rate": 1.984306655946787e-05, + "loss": 1.6234, + "step": 14100 + }, + { + "epoch": 0.17, + "grad_norm": 18.38406094740641, + "learning_rate": 1.9842963454849464e-05, + "loss": 1.6182, + "step": 14103 + }, + { + "epoch": 0.17, + "grad_norm": 12.112312606268423, + "learning_rate": 1.984286031664057e-05, + "loss": 1.4199, + "step": 14106 + }, + { + "epoch": 0.17, + "grad_norm": 27.168753660675467, + "learning_rate": 1.984275714484154e-05, + "loss": 1.6654, + "step": 14109 + }, + { + "epoch": 0.17, + "grad_norm": 8.668125637606373, + "learning_rate": 1.9842653939452723e-05, + "loss": 1.519, + "step": 14112 + }, + { + "epoch": 0.17, + "grad_norm": 17.56620286739892, + "learning_rate": 1.9842550700474474e-05, + "loss": 1.2697, + "step": 14115 + }, + { + "epoch": 0.17, + "grad_norm": 14.954878634055923, + "learning_rate": 1.9842447427907147e-05, + "loss": 1.3363, + "step": 14118 + }, + { + "epoch": 0.17, + "grad_norm": 11.188939571257237, + "learning_rate": 1.9842344121751088e-05, + "loss": 1.446, + "step": 14121 + }, + { + "epoch": 0.17, + "grad_norm": 6.848620897074744, + "learning_rate": 1.9842240782006655e-05, + "loss": 1.587, + "step": 14124 + }, + { + "epoch": 0.17, + "grad_norm": 10.235454072998238, + "learning_rate": 1.9842137408674202e-05, + "loss": 1.7262, + "step": 14127 + }, + { + "epoch": 0.17, + "grad_norm": 45.074818661376256, + "learning_rate": 1.9842034001754077e-05, + "loss": 1.6247, + "step": 14130 + }, + { + "epoch": 0.17, + "grad_norm": 24.872457683764246, + "learning_rate": 1.9841930561246636e-05, + "loss": 2.1144, + "step": 14133 + }, + { + "epoch": 0.17, + "grad_norm": 9.071250346775965, + "learning_rate": 1.9841827087152234e-05, + "loss": 1.6396, + "step": 14136 + }, + { + "epoch": 0.17, + "grad_norm": 13.10690338364367, + "learning_rate": 1.9841723579471214e-05, + "loss": 1.6344, + "step": 14139 + }, + { + "epoch": 0.17, + "grad_norm": 5.827172532401616, + "learning_rate": 1.984162003820394e-05, + "loss": 1.2775, + "step": 14142 + }, + { + "epoch": 0.17, + "grad_norm": 25.02934652651557, + "learning_rate": 1.9841516463350763e-05, + "loss": 1.71, + "step": 14145 + }, + { + "epoch": 0.17, + "grad_norm": 16.7723439658135, + "learning_rate": 1.9841412854912033e-05, + "loss": 1.6405, + "step": 14148 + }, + { + "epoch": 0.17, + "grad_norm": 4.736369239795739, + "learning_rate": 1.9841309212888108e-05, + "loss": 1.345, + "step": 14151 + }, + { + "epoch": 0.17, + "grad_norm": 9.379479681724955, + "learning_rate": 1.9841205537279337e-05, + "loss": 1.5564, + "step": 14154 + }, + { + "epoch": 0.17, + "grad_norm": 4.839820262442781, + "learning_rate": 1.984110182808608e-05, + "loss": 1.7685, + "step": 14157 + }, + { + "epoch": 0.17, + "grad_norm": 8.339283870376395, + "learning_rate": 1.9840998085308683e-05, + "loss": 2.0668, + "step": 14160 + }, + { + "epoch": 0.17, + "grad_norm": 29.442099682287708, + "learning_rate": 1.9840894308947507e-05, + "loss": 1.5188, + "step": 14163 + }, + { + "epoch": 0.17, + "grad_norm": 12.833923542421267, + "learning_rate": 1.9840790499002902e-05, + "loss": 1.4201, + "step": 14166 + }, + { + "epoch": 0.17, + "grad_norm": 24.058644283060847, + "learning_rate": 1.9840686655475223e-05, + "loss": 1.1757, + "step": 14169 + }, + { + "epoch": 0.17, + "grad_norm": 14.173935757294558, + "learning_rate": 1.984058277836483e-05, + "loss": 1.4899, + "step": 14172 + }, + { + "epoch": 0.17, + "grad_norm": 12.174034389616885, + "learning_rate": 1.9840478867672064e-05, + "loss": 1.5129, + "step": 14175 + }, + { + "epoch": 0.17, + "grad_norm": 13.246028981314046, + "learning_rate": 1.9840374923397294e-05, + "loss": 1.5666, + "step": 14178 + }, + { + "epoch": 0.17, + "grad_norm": 13.38700733740554, + "learning_rate": 1.9840270945540868e-05, + "loss": 1.94, + "step": 14181 + }, + { + "epoch": 0.17, + "grad_norm": 36.680595404757646, + "learning_rate": 1.984016693410314e-05, + "loss": 1.5162, + "step": 14184 + }, + { + "epoch": 0.17, + "grad_norm": 22.251176748890686, + "learning_rate": 1.984006288908447e-05, + "loss": 1.5771, + "step": 14187 + }, + { + "epoch": 0.17, + "grad_norm": 13.056806578221325, + "learning_rate": 1.983995881048521e-05, + "loss": 1.3439, + "step": 14190 + }, + { + "epoch": 0.17, + "grad_norm": 15.109177074535447, + "learning_rate": 1.9839854698305712e-05, + "loss": 1.7741, + "step": 14193 + }, + { + "epoch": 0.17, + "grad_norm": 39.56245305595699, + "learning_rate": 1.9839750552546337e-05, + "loss": 1.55, + "step": 14196 + }, + { + "epoch": 0.17, + "grad_norm": 46.482458852085806, + "learning_rate": 1.9839646373207434e-05, + "loss": 1.8792, + "step": 14199 + }, + { + "epoch": 0.17, + "grad_norm": 17.59304551506543, + "learning_rate": 1.9839542160289364e-05, + "loss": 1.5782, + "step": 14202 + }, + { + "epoch": 0.17, + "grad_norm": 4.601616484972018, + "learning_rate": 1.9839437913792482e-05, + "loss": 1.6404, + "step": 14205 + }, + { + "epoch": 0.17, + "grad_norm": 25.431341601563044, + "learning_rate": 1.9839333633717137e-05, + "loss": 1.7405, + "step": 14208 + }, + { + "epoch": 0.17, + "grad_norm": 4.356479539439462, + "learning_rate": 1.98392293200637e-05, + "loss": 2.0767, + "step": 14211 + }, + { + "epoch": 0.17, + "grad_norm": 18.344532458914017, + "learning_rate": 1.983912497283251e-05, + "loss": 1.2965, + "step": 14214 + }, + { + "epoch": 0.17, + "grad_norm": 12.771619849549053, + "learning_rate": 1.983902059202393e-05, + "loss": 1.4537, + "step": 14217 + }, + { + "epoch": 0.17, + "grad_norm": 4.985225051667689, + "learning_rate": 1.983891617763832e-05, + "loss": 1.5792, + "step": 14220 + }, + { + "epoch": 0.17, + "grad_norm": 8.312952426309229, + "learning_rate": 1.983881172967603e-05, + "loss": 1.538, + "step": 14223 + }, + { + "epoch": 0.17, + "grad_norm": 3.9177194531130275, + "learning_rate": 1.9838707248137424e-05, + "loss": 1.6014, + "step": 14226 + }, + { + "epoch": 0.17, + "grad_norm": 12.185657204628937, + "learning_rate": 1.983860273302285e-05, + "loss": 1.5411, + "step": 14229 + }, + { + "epoch": 0.17, + "grad_norm": 18.231729871706673, + "learning_rate": 1.9838498184332673e-05, + "loss": 1.6734, + "step": 14232 + }, + { + "epoch": 0.17, + "grad_norm": 58.48418213806784, + "learning_rate": 1.983839360206724e-05, + "loss": 1.5575, + "step": 14235 + }, + { + "epoch": 0.17, + "grad_norm": 8.596473909153827, + "learning_rate": 1.983828898622692e-05, + "loss": 1.912, + "step": 14238 + }, + { + "epoch": 0.17, + "grad_norm": 23.32951649123419, + "learning_rate": 1.983818433681206e-05, + "loss": 1.7746, + "step": 14241 + }, + { + "epoch": 0.17, + "grad_norm": 11.154372916136005, + "learning_rate": 1.983807965382302e-05, + "loss": 1.4754, + "step": 14244 + }, + { + "epoch": 0.17, + "grad_norm": 4.059247249378657, + "learning_rate": 1.9837974937260158e-05, + "loss": 1.5671, + "step": 14247 + }, + { + "epoch": 0.17, + "grad_norm": 41.924968579375374, + "learning_rate": 1.9837870187123832e-05, + "loss": 1.5199, + "step": 14250 + }, + { + "epoch": 0.17, + "grad_norm": 19.22819739868343, + "learning_rate": 1.9837765403414396e-05, + "loss": 1.7733, + "step": 14253 + }, + { + "epoch": 0.17, + "grad_norm": 14.268722963264468, + "learning_rate": 1.9837660586132212e-05, + "loss": 1.6589, + "step": 14256 + }, + { + "epoch": 0.17, + "grad_norm": 3.8980014525730544, + "learning_rate": 1.9837555735277638e-05, + "loss": 1.526, + "step": 14259 + }, + { + "epoch": 0.17, + "grad_norm": 33.36330910937436, + "learning_rate": 1.9837450850851027e-05, + "loss": 1.3422, + "step": 14262 + }, + { + "epoch": 0.17, + "grad_norm": 18.545647762150892, + "learning_rate": 1.983734593285274e-05, + "loss": 1.5798, + "step": 14265 + }, + { + "epoch": 0.17, + "grad_norm": 12.98150869532247, + "learning_rate": 1.9837240981283136e-05, + "loss": 1.4548, + "step": 14268 + }, + { + "epoch": 0.17, + "grad_norm": 12.098586684802939, + "learning_rate": 1.983713599614257e-05, + "loss": 1.3328, + "step": 14271 + }, + { + "epoch": 0.17, + "grad_norm": 52.8876849844893, + "learning_rate": 1.9837030977431405e-05, + "loss": 1.2166, + "step": 14274 + }, + { + "epoch": 0.17, + "grad_norm": 19.57739307901871, + "learning_rate": 1.9836925925149993e-05, + "loss": 1.6682, + "step": 14277 + }, + { + "epoch": 0.17, + "grad_norm": 4.247119304035344, + "learning_rate": 1.9836820839298697e-05, + "loss": 1.6077, + "step": 14280 + }, + { + "epoch": 0.17, + "grad_norm": 17.063042259046465, + "learning_rate": 1.9836715719877876e-05, + "loss": 1.8549, + "step": 14283 + }, + { + "epoch": 0.17, + "grad_norm": 28.43912691382066, + "learning_rate": 1.983661056688789e-05, + "loss": 1.4247, + "step": 14286 + }, + { + "epoch": 0.17, + "grad_norm": 18.991515323854617, + "learning_rate": 1.983650538032909e-05, + "loss": 1.7949, + "step": 14289 + }, + { + "epoch": 0.17, + "grad_norm": 7.4273794507537545, + "learning_rate": 1.9836400160201844e-05, + "loss": 1.4331, + "step": 14292 + }, + { + "epoch": 0.17, + "grad_norm": 65.51352956061483, + "learning_rate": 1.9836294906506507e-05, + "loss": 1.4803, + "step": 14295 + }, + { + "epoch": 0.17, + "grad_norm": 10.050198002965884, + "learning_rate": 1.9836189619243438e-05, + "loss": 1.491, + "step": 14298 + }, + { + "epoch": 0.17, + "grad_norm": 7.27380491522594, + "learning_rate": 1.9836084298412995e-05, + "loss": 1.5965, + "step": 14301 + }, + { + "epoch": 0.17, + "grad_norm": 33.42986411015644, + "learning_rate": 1.983597894401554e-05, + "loss": 1.7335, + "step": 14304 + }, + { + "epoch": 0.17, + "grad_norm": 50.35749106119252, + "learning_rate": 1.983587355605143e-05, + "loss": 1.6312, + "step": 14307 + }, + { + "epoch": 0.17, + "grad_norm": 23.0904822397987, + "learning_rate": 1.983576813452103e-05, + "loss": 1.6826, + "step": 14310 + }, + { + "epoch": 0.17, + "grad_norm": 19.962038273635223, + "learning_rate": 1.9835662679424693e-05, + "loss": 1.776, + "step": 14313 + }, + { + "epoch": 0.17, + "grad_norm": 23.82088261430041, + "learning_rate": 1.9835557190762786e-05, + "loss": 1.4233, + "step": 14316 + }, + { + "epoch": 0.17, + "grad_norm": 20.848577399236667, + "learning_rate": 1.9835451668535664e-05, + "loss": 1.7196, + "step": 14319 + }, + { + "epoch": 0.17, + "grad_norm": 7.537814135966898, + "learning_rate": 1.983534611274369e-05, + "loss": 1.5597, + "step": 14322 + }, + { + "epoch": 0.17, + "grad_norm": 3.9236543158911634, + "learning_rate": 1.9835240523387217e-05, + "loss": 1.7097, + "step": 14325 + }, + { + "epoch": 0.17, + "grad_norm": 14.13971129086814, + "learning_rate": 1.983513490046661e-05, + "loss": 1.8382, + "step": 14328 + }, + { + "epoch": 0.17, + "grad_norm": 24.882317839252035, + "learning_rate": 1.9835029243982235e-05, + "loss": 1.6523, + "step": 14331 + }, + { + "epoch": 0.17, + "grad_norm": 17.768163948327015, + "learning_rate": 1.9834923553934447e-05, + "loss": 1.3361, + "step": 14334 + }, + { + "epoch": 0.17, + "grad_norm": 28.103152575203087, + "learning_rate": 1.9834817830323608e-05, + "loss": 1.5914, + "step": 14337 + }, + { + "epoch": 0.17, + "grad_norm": 7.396507577622213, + "learning_rate": 1.9834712073150075e-05, + "loss": 1.4935, + "step": 14340 + }, + { + "epoch": 0.17, + "grad_norm": 35.82241919543657, + "learning_rate": 1.9834606282414213e-05, + "loss": 1.9412, + "step": 14343 + }, + { + "epoch": 0.17, + "grad_norm": 61.55033363965623, + "learning_rate": 1.9834500458116385e-05, + "loss": 1.7342, + "step": 14346 + }, + { + "epoch": 0.17, + "grad_norm": 16.30457713220854, + "learning_rate": 1.9834394600256947e-05, + "loss": 1.4473, + "step": 14349 + }, + { + "epoch": 0.17, + "grad_norm": 13.64545572545424, + "learning_rate": 1.9834288708836264e-05, + "loss": 1.46, + "step": 14352 + }, + { + "epoch": 0.17, + "grad_norm": 18.190378252042017, + "learning_rate": 1.9834182783854696e-05, + "loss": 1.4365, + "step": 14355 + }, + { + "epoch": 0.17, + "grad_norm": 6.053091577862202, + "learning_rate": 1.98340768253126e-05, + "loss": 1.3872, + "step": 14358 + }, + { + "epoch": 0.17, + "grad_norm": 27.875683259866154, + "learning_rate": 1.9833970833210347e-05, + "loss": 1.4615, + "step": 14361 + }, + { + "epoch": 0.17, + "grad_norm": 5.13807764378756, + "learning_rate": 1.983386480754829e-05, + "loss": 1.5269, + "step": 14364 + }, + { + "epoch": 0.17, + "grad_norm": 11.597915749300615, + "learning_rate": 1.98337587483268e-05, + "loss": 1.9367, + "step": 14367 + }, + { + "epoch": 0.17, + "grad_norm": 15.276066822447133, + "learning_rate": 1.983365265554623e-05, + "loss": 1.6622, + "step": 14370 + }, + { + "epoch": 0.17, + "grad_norm": 11.73421506505072, + "learning_rate": 1.9833546529206942e-05, + "loss": 1.678, + "step": 14373 + }, + { + "epoch": 0.17, + "grad_norm": 21.28720065925971, + "learning_rate": 1.9833440369309308e-05, + "loss": 1.8482, + "step": 14376 + }, + { + "epoch": 0.17, + "grad_norm": 12.304209475893892, + "learning_rate": 1.983333417585368e-05, + "loss": 1.5903, + "step": 14379 + }, + { + "epoch": 0.17, + "grad_norm": 17.843990001567242, + "learning_rate": 1.9833227948840425e-05, + "loss": 1.6028, + "step": 14382 + }, + { + "epoch": 0.17, + "grad_norm": 79.4885108002012, + "learning_rate": 1.983312168826991e-05, + "loss": 1.7435, + "step": 14385 + }, + { + "epoch": 0.17, + "grad_norm": 7.383653418467824, + "learning_rate": 1.9833015394142485e-05, + "loss": 1.7887, + "step": 14388 + }, + { + "epoch": 0.17, + "grad_norm": 7.541153133974166, + "learning_rate": 1.9832909066458523e-05, + "loss": 1.6475, + "step": 14391 + }, + { + "epoch": 0.17, + "grad_norm": 31.402525264004503, + "learning_rate": 1.9832802705218385e-05, + "loss": 1.6912, + "step": 14394 + }, + { + "epoch": 0.17, + "grad_norm": 34.66569541348634, + "learning_rate": 1.9832696310422433e-05, + "loss": 1.4963, + "step": 14397 + }, + { + "epoch": 0.17, + "grad_norm": 3.968231961503556, + "learning_rate": 1.983258988207103e-05, + "loss": 1.4231, + "step": 14400 + }, + { + "epoch": 0.17, + "grad_norm": 41.729585274161586, + "learning_rate": 1.983248342016454e-05, + "loss": 1.466, + "step": 14403 + }, + { + "epoch": 0.17, + "grad_norm": 10.715757508635228, + "learning_rate": 1.9832376924703325e-05, + "loss": 1.8268, + "step": 14406 + }, + { + "epoch": 0.17, + "grad_norm": 33.567612440663666, + "learning_rate": 1.983227039568775e-05, + "loss": 1.4202, + "step": 14409 + }, + { + "epoch": 0.17, + "grad_norm": 135.19146078781174, + "learning_rate": 1.9832163833118176e-05, + "loss": 1.6003, + "step": 14412 + }, + { + "epoch": 0.17, + "grad_norm": 7.987885713685254, + "learning_rate": 1.9832057236994972e-05, + "loss": 1.2511, + "step": 14415 + }, + { + "epoch": 0.17, + "grad_norm": 12.215086128808299, + "learning_rate": 1.9831950607318492e-05, + "loss": 1.6943, + "step": 14418 + }, + { + "epoch": 0.17, + "grad_norm": 12.110095055737363, + "learning_rate": 1.983184394408911e-05, + "loss": 1.5666, + "step": 14421 + }, + { + "epoch": 0.17, + "grad_norm": 12.431427840918957, + "learning_rate": 1.9831737247307186e-05, + "loss": 1.7044, + "step": 14424 + }, + { + "epoch": 0.17, + "grad_norm": 55.3156512187133, + "learning_rate": 1.9831630516973085e-05, + "loss": 1.4727, + "step": 14427 + }, + { + "epoch": 0.17, + "grad_norm": 21.3856272967094, + "learning_rate": 1.983152375308717e-05, + "loss": 1.5672, + "step": 14430 + }, + { + "epoch": 0.17, + "grad_norm": 10.292007558012811, + "learning_rate": 1.9831416955649803e-05, + "loss": 1.4729, + "step": 14433 + }, + { + "epoch": 0.17, + "grad_norm": 26.33895955366531, + "learning_rate": 1.9831310124661354e-05, + "loss": 1.7863, + "step": 14436 + }, + { + "epoch": 0.17, + "grad_norm": 33.97803623229372, + "learning_rate": 1.9831203260122184e-05, + "loss": 1.792, + "step": 14439 + }, + { + "epoch": 0.17, + "grad_norm": 66.0542587229996, + "learning_rate": 1.983109636203266e-05, + "loss": 1.7608, + "step": 14442 + }, + { + "epoch": 0.17, + "grad_norm": 7.4145330200275055, + "learning_rate": 1.983098943039314e-05, + "loss": 1.2843, + "step": 14445 + }, + { + "epoch": 0.17, + "grad_norm": 6.688285126395631, + "learning_rate": 1.9830882465204e-05, + "loss": 1.75, + "step": 14448 + }, + { + "epoch": 0.17, + "grad_norm": 13.973365933368093, + "learning_rate": 1.9830775466465596e-05, + "loss": 1.6658, + "step": 14451 + }, + { + "epoch": 0.17, + "grad_norm": 39.95310225975441, + "learning_rate": 1.98306684341783e-05, + "loss": 1.4597, + "step": 14454 + }, + { + "epoch": 0.17, + "grad_norm": 22.175589701087787, + "learning_rate": 1.9830561368342468e-05, + "loss": 1.4372, + "step": 14457 + }, + { + "epoch": 0.17, + "grad_norm": 6.207590755265686, + "learning_rate": 1.9830454268958474e-05, + "loss": 1.6293, + "step": 14460 + }, + { + "epoch": 0.17, + "grad_norm": 14.101387796530782, + "learning_rate": 1.9830347136026682e-05, + "loss": 1.8053, + "step": 14463 + }, + { + "epoch": 0.17, + "grad_norm": 18.225207954568866, + "learning_rate": 1.9830239969547453e-05, + "loss": 1.563, + "step": 14466 + }, + { + "epoch": 0.17, + "grad_norm": 42.966846260487564, + "learning_rate": 1.9830132769521158e-05, + "loss": 1.5163, + "step": 14469 + }, + { + "epoch": 0.17, + "grad_norm": 18.702891485666587, + "learning_rate": 1.983002553594816e-05, + "loss": 1.3871, + "step": 14472 + }, + { + "epoch": 0.17, + "grad_norm": 11.52681673232803, + "learning_rate": 1.9829918268828824e-05, + "loss": 1.7888, + "step": 14475 + }, + { + "epoch": 0.17, + "grad_norm": 15.211999346938832, + "learning_rate": 1.9829810968163523e-05, + "loss": 1.5351, + "step": 14478 + }, + { + "epoch": 0.17, + "grad_norm": 10.108669098286871, + "learning_rate": 1.9829703633952613e-05, + "loss": 1.8517, + "step": 14481 + }, + { + "epoch": 0.17, + "grad_norm": 4.715025456378357, + "learning_rate": 1.9829596266196468e-05, + "loss": 1.4434, + "step": 14484 + }, + { + "epoch": 0.17, + "grad_norm": 11.47247313686298, + "learning_rate": 1.9829488864895447e-05, + "loss": 2.1965, + "step": 14487 + }, + { + "epoch": 0.17, + "grad_norm": 20.771176625682493, + "learning_rate": 1.982938143004992e-05, + "loss": 1.5682, + "step": 14490 + }, + { + "epoch": 0.17, + "grad_norm": 6.614659206822095, + "learning_rate": 1.9829273961660265e-05, + "loss": 1.6799, + "step": 14493 + }, + { + "epoch": 0.17, + "grad_norm": 25.16547107793506, + "learning_rate": 1.982916645972683e-05, + "loss": 1.7333, + "step": 14496 + }, + { + "epoch": 0.17, + "grad_norm": 14.591411050751487, + "learning_rate": 1.9829058924249993e-05, + "loss": 1.6308, + "step": 14499 + }, + { + "epoch": 0.17, + "grad_norm": 14.235750994693541, + "learning_rate": 1.9828951355230116e-05, + "loss": 1.6976, + "step": 14502 + }, + { + "epoch": 0.17, + "grad_norm": 9.740266546120921, + "learning_rate": 1.982884375266757e-05, + "loss": 1.388, + "step": 14505 + }, + { + "epoch": 0.17, + "grad_norm": 5.533521271644458, + "learning_rate": 1.9828736116562722e-05, + "loss": 1.7915, + "step": 14508 + }, + { + "epoch": 0.17, + "grad_norm": 7.517139121452972, + "learning_rate": 1.9828628446915937e-05, + "loss": 1.8138, + "step": 14511 + }, + { + "epoch": 0.17, + "grad_norm": 12.50993733411474, + "learning_rate": 1.9828520743727582e-05, + "loss": 1.6716, + "step": 14514 + }, + { + "epoch": 0.17, + "grad_norm": 15.630147151237267, + "learning_rate": 1.9828413006998027e-05, + "loss": 1.4891, + "step": 14517 + }, + { + "epoch": 0.17, + "grad_norm": 9.506645056636932, + "learning_rate": 1.9828305236727638e-05, + "loss": 1.8643, + "step": 14520 + }, + { + "epoch": 0.17, + "grad_norm": 36.66214556924208, + "learning_rate": 1.9828197432916787e-05, + "loss": 1.7663, + "step": 14523 + }, + { + "epoch": 0.17, + "grad_norm": 30.27159747042857, + "learning_rate": 1.9828089595565834e-05, + "loss": 1.5062, + "step": 14526 + }, + { + "epoch": 0.17, + "grad_norm": 24.98183606596673, + "learning_rate": 1.982798172467515e-05, + "loss": 1.9484, + "step": 14529 + }, + { + "epoch": 0.17, + "grad_norm": 24.971524133003996, + "learning_rate": 1.982787382024511e-05, + "loss": 1.387, + "step": 14532 + }, + { + "epoch": 0.17, + "grad_norm": 5.729006506546447, + "learning_rate": 1.982776588227607e-05, + "loss": 1.4593, + "step": 14535 + }, + { + "epoch": 0.17, + "grad_norm": 7.260609213547394, + "learning_rate": 1.982765791076841e-05, + "loss": 1.4339, + "step": 14538 + }, + { + "epoch": 0.17, + "grad_norm": 107.41187523154868, + "learning_rate": 1.982754990572249e-05, + "loss": 1.6284, + "step": 14541 + }, + { + "epoch": 0.17, + "grad_norm": 35.658675932245025, + "learning_rate": 1.982744186713869e-05, + "loss": 1.5136, + "step": 14544 + }, + { + "epoch": 0.17, + "grad_norm": 9.340380286691566, + "learning_rate": 1.982733379501736e-05, + "loss": 1.4377, + "step": 14547 + }, + { + "epoch": 0.17, + "grad_norm": 82.29199879450616, + "learning_rate": 1.9827225689358886e-05, + "loss": 1.2214, + "step": 14550 + }, + { + "epoch": 0.17, + "grad_norm": 16.756991177968807, + "learning_rate": 1.982711755016363e-05, + "loss": 1.7055, + "step": 14553 + }, + { + "epoch": 0.18, + "grad_norm": 21.820276489765526, + "learning_rate": 1.982700937743196e-05, + "loss": 1.6266, + "step": 14556 + }, + { + "epoch": 0.18, + "grad_norm": 9.783091083095455, + "learning_rate": 1.9826901171164245e-05, + "loss": 1.563, + "step": 14559 + }, + { + "epoch": 0.18, + "grad_norm": 6.484482655161167, + "learning_rate": 1.982679293136086e-05, + "loss": 1.6683, + "step": 14562 + }, + { + "epoch": 0.18, + "grad_norm": 10.471485618100678, + "learning_rate": 1.9826684658022167e-05, + "loss": 1.6685, + "step": 14565 + }, + { + "epoch": 0.18, + "grad_norm": 53.70092847947853, + "learning_rate": 1.982657635114854e-05, + "loss": 1.303, + "step": 14568 + }, + { + "epoch": 0.18, + "grad_norm": 56.11729726178092, + "learning_rate": 1.9826468010740347e-05, + "loss": 1.6224, + "step": 14571 + }, + { + "epoch": 0.18, + "grad_norm": 13.653062952055308, + "learning_rate": 1.982635963679796e-05, + "loss": 2.1302, + "step": 14574 + }, + { + "epoch": 0.18, + "grad_norm": 7.9613741214276885, + "learning_rate": 1.9826251229321746e-05, + "loss": 1.7764, + "step": 14577 + }, + { + "epoch": 0.18, + "grad_norm": 9.44242592339104, + "learning_rate": 1.982614278831207e-05, + "loss": 1.818, + "step": 14580 + }, + { + "epoch": 0.18, + "grad_norm": 11.486530139336633, + "learning_rate": 1.9826034313769318e-05, + "loss": 1.5676, + "step": 14583 + }, + { + "epoch": 0.18, + "grad_norm": 4.590656786839114, + "learning_rate": 1.9825925805693844e-05, + "loss": 1.3345, + "step": 14586 + }, + { + "epoch": 0.18, + "grad_norm": 8.386586806792321, + "learning_rate": 1.9825817264086027e-05, + "loss": 1.6575, + "step": 14589 + }, + { + "epoch": 0.18, + "grad_norm": 552.0972728294786, + "learning_rate": 1.9825708688946236e-05, + "loss": 1.7167, + "step": 14592 + }, + { + "epoch": 0.18, + "grad_norm": 18.313281258963453, + "learning_rate": 1.9825600080274836e-05, + "loss": 1.904, + "step": 14595 + }, + { + "epoch": 0.18, + "grad_norm": 37.341129661164196, + "learning_rate": 1.9825491438072207e-05, + "loss": 1.4775, + "step": 14598 + }, + { + "epoch": 0.18, + "grad_norm": 6.57251692166079, + "learning_rate": 1.9825382762338708e-05, + "loss": 1.6641, + "step": 14601 + }, + { + "epoch": 0.18, + "grad_norm": 4.446954411392737, + "learning_rate": 1.9825274053074722e-05, + "loss": 1.507, + "step": 14604 + }, + { + "epoch": 0.18, + "grad_norm": 13.809159131352738, + "learning_rate": 1.9825165310280615e-05, + "loss": 1.4264, + "step": 14607 + }, + { + "epoch": 0.18, + "grad_norm": 104.4447914738969, + "learning_rate": 1.9825056533956757e-05, + "loss": 1.6405, + "step": 14610 + }, + { + "epoch": 0.18, + "grad_norm": 4.960892513078324, + "learning_rate": 1.982494772410352e-05, + "loss": 1.9655, + "step": 14613 + }, + { + "epoch": 0.18, + "grad_norm": 8.30863557375015, + "learning_rate": 1.9824838880721275e-05, + "loss": 1.633, + "step": 14616 + }, + { + "epoch": 0.18, + "grad_norm": 16.354841819467683, + "learning_rate": 1.9824730003810393e-05, + "loss": 1.5021, + "step": 14619 + }, + { + "epoch": 0.18, + "grad_norm": 38.30013300417631, + "learning_rate": 1.9824621093371248e-05, + "loss": 1.6091, + "step": 14622 + }, + { + "epoch": 0.18, + "grad_norm": 14.75546113625904, + "learning_rate": 1.982451214940421e-05, + "loss": 1.5576, + "step": 14625 + }, + { + "epoch": 0.18, + "grad_norm": 21.215510133088195, + "learning_rate": 1.982440317190965e-05, + "loss": 1.9792, + "step": 14628 + }, + { + "epoch": 0.18, + "grad_norm": 16.196796693041392, + "learning_rate": 1.982429416088794e-05, + "loss": 1.607, + "step": 14631 + }, + { + "epoch": 0.18, + "grad_norm": 13.813150242675286, + "learning_rate": 1.9824185116339455e-05, + "loss": 1.6945, + "step": 14634 + }, + { + "epoch": 0.18, + "grad_norm": 30.27598200065493, + "learning_rate": 1.9824076038264564e-05, + "loss": 1.6018, + "step": 14637 + }, + { + "epoch": 0.18, + "grad_norm": 15.357230599231954, + "learning_rate": 1.982396692666364e-05, + "loss": 1.7734, + "step": 14640 + }, + { + "epoch": 0.18, + "grad_norm": 34.81113319229401, + "learning_rate": 1.9823857781537054e-05, + "loss": 1.7873, + "step": 14643 + }, + { + "epoch": 0.18, + "grad_norm": 35.18097654939416, + "learning_rate": 1.982374860288518e-05, + "loss": 1.7441, + "step": 14646 + }, + { + "epoch": 0.18, + "grad_norm": 8.82613164882028, + "learning_rate": 1.982363939070839e-05, + "loss": 1.8962, + "step": 14649 + }, + { + "epoch": 0.18, + "grad_norm": 9.619898745341642, + "learning_rate": 1.982353014500706e-05, + "loss": 1.8507, + "step": 14652 + }, + { + "epoch": 0.18, + "grad_norm": 7.261831599937464, + "learning_rate": 1.982342086578156e-05, + "loss": 1.4308, + "step": 14655 + }, + { + "epoch": 0.18, + "grad_norm": 16.926730190925685, + "learning_rate": 1.9823311553032257e-05, + "loss": 1.6463, + "step": 14658 + }, + { + "epoch": 0.18, + "grad_norm": 9.230445413693307, + "learning_rate": 1.982320220675954e-05, + "loss": 1.5972, + "step": 14661 + }, + { + "epoch": 0.18, + "grad_norm": 4.444931053374607, + "learning_rate": 1.9823092826963767e-05, + "loss": 1.6383, + "step": 14664 + }, + { + "epoch": 0.18, + "grad_norm": 67.56076144975366, + "learning_rate": 1.9822983413645314e-05, + "loss": 1.9717, + "step": 14667 + }, + { + "epoch": 0.18, + "grad_norm": 5.6490088336151825, + "learning_rate": 1.982287396680456e-05, + "loss": 1.7892, + "step": 14670 + }, + { + "epoch": 0.18, + "grad_norm": 34.731498439105195, + "learning_rate": 1.9822764486441875e-05, + "loss": 1.7166, + "step": 14673 + }, + { + "epoch": 0.18, + "grad_norm": 10.686373963447968, + "learning_rate": 1.982265497255763e-05, + "loss": 1.6042, + "step": 14676 + }, + { + "epoch": 0.18, + "grad_norm": 8.225469385604066, + "learning_rate": 1.9822545425152204e-05, + "loss": 1.6178, + "step": 14679 + }, + { + "epoch": 0.18, + "grad_norm": 23.806868053955675, + "learning_rate": 1.982243584422597e-05, + "loss": 1.7229, + "step": 14682 + }, + { + "epoch": 0.18, + "grad_norm": 10.028744033552256, + "learning_rate": 1.98223262297793e-05, + "loss": 1.8596, + "step": 14685 + }, + { + "epoch": 0.18, + "grad_norm": 10.690096712375166, + "learning_rate": 1.9822216581812564e-05, + "loss": 1.8006, + "step": 14688 + }, + { + "epoch": 0.18, + "grad_norm": 29.004857138840528, + "learning_rate": 1.9822106900326147e-05, + "loss": 1.6663, + "step": 14691 + }, + { + "epoch": 0.18, + "grad_norm": 10.523546915822745, + "learning_rate": 1.9821997185320413e-05, + "loss": 1.6473, + "step": 14694 + }, + { + "epoch": 0.18, + "grad_norm": 12.29671109303164, + "learning_rate": 1.9821887436795743e-05, + "loss": 1.3894, + "step": 14697 + }, + { + "epoch": 0.18, + "grad_norm": 9.545201731267166, + "learning_rate": 1.982177765475251e-05, + "loss": 1.5929, + "step": 14700 + }, + { + "epoch": 0.18, + "grad_norm": 26.008975536350146, + "learning_rate": 1.9821667839191082e-05, + "loss": 1.8945, + "step": 14703 + }, + { + "epoch": 0.18, + "grad_norm": 5.01948553893994, + "learning_rate": 1.9821557990111844e-05, + "loss": 1.5775, + "step": 14706 + }, + { + "epoch": 0.18, + "grad_norm": 11.088435100020552, + "learning_rate": 1.9821448107515164e-05, + "loss": 1.6362, + "step": 14709 + }, + { + "epoch": 0.18, + "grad_norm": 6.203853039388943, + "learning_rate": 1.9821338191401423e-05, + "loss": 1.5355, + "step": 14712 + }, + { + "epoch": 0.18, + "grad_norm": 9.603340926704918, + "learning_rate": 1.982122824177099e-05, + "loss": 1.454, + "step": 14715 + }, + { + "epoch": 0.18, + "grad_norm": 9.91424937393515, + "learning_rate": 1.9821118258624243e-05, + "loss": 1.8401, + "step": 14718 + }, + { + "epoch": 0.18, + "grad_norm": 7.736784140148482, + "learning_rate": 1.9821008241961555e-05, + "loss": 1.5026, + "step": 14721 + }, + { + "epoch": 0.18, + "grad_norm": 29.951970238293935, + "learning_rate": 1.9820898191783306e-05, + "loss": 1.5396, + "step": 14724 + }, + { + "epoch": 0.18, + "grad_norm": 12.218377026676514, + "learning_rate": 1.9820788108089866e-05, + "loss": 2.1826, + "step": 14727 + }, + { + "epoch": 0.18, + "grad_norm": 25.77082240417978, + "learning_rate": 1.9820677990881614e-05, + "loss": 1.7385, + "step": 14730 + }, + { + "epoch": 0.18, + "grad_norm": 19.63551315363083, + "learning_rate": 1.9820567840158927e-05, + "loss": 1.4861, + "step": 14733 + }, + { + "epoch": 0.18, + "grad_norm": 3.3804989472514815, + "learning_rate": 1.9820457655922182e-05, + "loss": 1.8057, + "step": 14736 + }, + { + "epoch": 0.18, + "grad_norm": 25.155822934140307, + "learning_rate": 1.982034743817175e-05, + "loss": 2.0356, + "step": 14739 + }, + { + "epoch": 0.18, + "grad_norm": 10.976474058169654, + "learning_rate": 1.9820237186908006e-05, + "loss": 2.2229, + "step": 14742 + }, + { + "epoch": 0.18, + "grad_norm": 4.663299292009362, + "learning_rate": 1.982012690213133e-05, + "loss": 1.3641, + "step": 14745 + }, + { + "epoch": 0.18, + "grad_norm": 83.20227439713297, + "learning_rate": 1.9820016583842105e-05, + "loss": 1.5477, + "step": 14748 + }, + { + "epoch": 0.18, + "grad_norm": 19.098794988480293, + "learning_rate": 1.9819906232040694e-05, + "loss": 1.6475, + "step": 14751 + }, + { + "epoch": 0.18, + "grad_norm": 34.126146422712836, + "learning_rate": 1.9819795846727483e-05, + "loss": 1.6926, + "step": 14754 + }, + { + "epoch": 0.18, + "grad_norm": 10.70786737291241, + "learning_rate": 1.9819685427902846e-05, + "loss": 1.6976, + "step": 14757 + }, + { + "epoch": 0.18, + "grad_norm": 22.20271901501899, + "learning_rate": 1.981957497556716e-05, + "loss": 1.8875, + "step": 14760 + }, + { + "epoch": 0.18, + "grad_norm": 13.270599174826375, + "learning_rate": 1.98194644897208e-05, + "loss": 1.3818, + "step": 14763 + }, + { + "epoch": 0.18, + "grad_norm": 10.436143329880096, + "learning_rate": 1.9819353970364143e-05, + "loss": 1.4154, + "step": 14766 + }, + { + "epoch": 0.18, + "grad_norm": 4.113263861541032, + "learning_rate": 1.9819243417497572e-05, + "loss": 1.5676, + "step": 14769 + }, + { + "epoch": 0.18, + "grad_norm": 27.139630477192014, + "learning_rate": 1.9819132831121456e-05, + "loss": 1.4478, + "step": 14772 + }, + { + "epoch": 0.18, + "grad_norm": 2.2712515960584967, + "learning_rate": 1.981902221123618e-05, + "loss": 1.8694, + "step": 14775 + }, + { + "epoch": 0.18, + "grad_norm": 8.937129588750237, + "learning_rate": 1.9818911557842114e-05, + "loss": 1.7623, + "step": 14778 + }, + { + "epoch": 0.18, + "grad_norm": 20.594133601092697, + "learning_rate": 1.9818800870939647e-05, + "loss": 1.4441, + "step": 14781 + }, + { + "epoch": 0.18, + "grad_norm": 6.569704914732647, + "learning_rate": 1.9818690150529142e-05, + "loss": 1.2812, + "step": 14784 + }, + { + "epoch": 0.18, + "grad_norm": 4.265124356953614, + "learning_rate": 1.9818579396610987e-05, + "loss": 1.5758, + "step": 14787 + }, + { + "epoch": 0.18, + "grad_norm": 3.103255759755128, + "learning_rate": 1.9818468609185556e-05, + "loss": 1.3639, + "step": 14790 + }, + { + "epoch": 0.18, + "grad_norm": 8.863815350361984, + "learning_rate": 1.9818357788253227e-05, + "loss": 1.5165, + "step": 14793 + }, + { + "epoch": 0.18, + "grad_norm": 64.64139764590396, + "learning_rate": 1.981824693381438e-05, + "loss": 1.7982, + "step": 14796 + }, + { + "epoch": 0.18, + "grad_norm": 13.118503227830256, + "learning_rate": 1.98181360458694e-05, + "loss": 1.4035, + "step": 14799 + }, + { + "epoch": 0.18, + "grad_norm": 25.605395457249028, + "learning_rate": 1.981802512441865e-05, + "loss": 1.4809, + "step": 14802 + }, + { + "epoch": 0.18, + "grad_norm": 17.22299801442548, + "learning_rate": 1.981791416946252e-05, + "loss": 1.5648, + "step": 14805 + }, + { + "epoch": 0.18, + "grad_norm": 45.67606166890028, + "learning_rate": 1.9817803181001386e-05, + "loss": 1.7278, + "step": 14808 + }, + { + "epoch": 0.18, + "grad_norm": 8.033427493606512, + "learning_rate": 1.9817692159035623e-05, + "loss": 1.4661, + "step": 14811 + }, + { + "epoch": 0.18, + "grad_norm": 21.53009827110318, + "learning_rate": 1.9817581103565617e-05, + "loss": 1.6733, + "step": 14814 + }, + { + "epoch": 0.18, + "grad_norm": 39.555680355981224, + "learning_rate": 1.9817470014591737e-05, + "loss": 1.5193, + "step": 14817 + }, + { + "epoch": 0.18, + "grad_norm": 10.997645711024205, + "learning_rate": 1.981735889211437e-05, + "loss": 1.6462, + "step": 14820 + }, + { + "epoch": 0.18, + "grad_norm": 10.168472820617856, + "learning_rate": 1.9817247736133897e-05, + "loss": 1.3534, + "step": 14823 + }, + { + "epoch": 0.18, + "grad_norm": 49.19608993558917, + "learning_rate": 1.981713654665069e-05, + "loss": 1.3023, + "step": 14826 + }, + { + "epoch": 0.18, + "grad_norm": 18.529186363782145, + "learning_rate": 1.9817025323665135e-05, + "loss": 1.8634, + "step": 14829 + }, + { + "epoch": 0.18, + "grad_norm": 16.950520220406677, + "learning_rate": 1.9816914067177605e-05, + "loss": 1.6083, + "step": 14832 + }, + { + "epoch": 0.18, + "grad_norm": 35.31962277479631, + "learning_rate": 1.981680277718849e-05, + "loss": 1.77, + "step": 14835 + }, + { + "epoch": 0.18, + "grad_norm": 7.151603541352337, + "learning_rate": 1.9816691453698156e-05, + "loss": 1.4648, + "step": 14838 + }, + { + "epoch": 0.18, + "grad_norm": 6.670855765034408, + "learning_rate": 1.9816580096706993e-05, + "loss": 1.3224, + "step": 14841 + }, + { + "epoch": 0.18, + "grad_norm": 5.020930295953182, + "learning_rate": 1.9816468706215378e-05, + "loss": 1.5523, + "step": 14844 + }, + { + "epoch": 0.18, + "grad_norm": 7.861553169744137, + "learning_rate": 1.981635728222369e-05, + "loss": 1.6007, + "step": 14847 + }, + { + "epoch": 0.18, + "grad_norm": 26.585317254372438, + "learning_rate": 1.9816245824732313e-05, + "loss": 1.2411, + "step": 14850 + }, + { + "epoch": 0.18, + "grad_norm": 17.05504125146173, + "learning_rate": 1.981613433374162e-05, + "loss": 1.5785, + "step": 14853 + }, + { + "epoch": 0.18, + "grad_norm": 48.218389010870446, + "learning_rate": 1.9816022809252003e-05, + "loss": 1.5317, + "step": 14856 + }, + { + "epoch": 0.18, + "grad_norm": 7.188925693852787, + "learning_rate": 1.981591125126383e-05, + "loss": 1.5044, + "step": 14859 + }, + { + "epoch": 0.18, + "grad_norm": 6.590004003756615, + "learning_rate": 1.981579965977749e-05, + "loss": 1.1161, + "step": 14862 + }, + { + "epoch": 0.18, + "grad_norm": 7.827262100505625, + "learning_rate": 1.981568803479336e-05, + "loss": 1.6099, + "step": 14865 + }, + { + "epoch": 0.18, + "grad_norm": 16.704498674720938, + "learning_rate": 1.9815576376311824e-05, + "loss": 1.3264, + "step": 14868 + }, + { + "epoch": 0.18, + "grad_norm": 37.978422171308516, + "learning_rate": 1.9815464684333265e-05, + "loss": 2.0015, + "step": 14871 + }, + { + "epoch": 0.18, + "grad_norm": 2.7587787723693937, + "learning_rate": 1.9815352958858053e-05, + "loss": 1.7, + "step": 14874 + }, + { + "epoch": 0.18, + "grad_norm": 5.191474874692449, + "learning_rate": 1.981524119988658e-05, + "loss": 1.8582, + "step": 14877 + }, + { + "epoch": 0.18, + "grad_norm": 20.28742152332833, + "learning_rate": 1.9815129407419224e-05, + "loss": 2.0022, + "step": 14880 + }, + { + "epoch": 0.18, + "grad_norm": 10.412412862193468, + "learning_rate": 1.9815017581456367e-05, + "loss": 1.4987, + "step": 14883 + }, + { + "epoch": 0.18, + "grad_norm": 6.332633002560057, + "learning_rate": 1.981490572199839e-05, + "loss": 1.6346, + "step": 14886 + }, + { + "epoch": 0.18, + "grad_norm": 20.874931781028565, + "learning_rate": 1.9814793829045677e-05, + "loss": 1.3277, + "step": 14889 + }, + { + "epoch": 0.18, + "grad_norm": 56.24411008710748, + "learning_rate": 1.9814681902598607e-05, + "loss": 1.7762, + "step": 14892 + }, + { + "epoch": 0.18, + "grad_norm": 11.00740576270385, + "learning_rate": 1.9814569942657563e-05, + "loss": 1.5199, + "step": 14895 + }, + { + "epoch": 0.18, + "grad_norm": 4.393615412930693, + "learning_rate": 1.9814457949222926e-05, + "loss": 1.7816, + "step": 14898 + }, + { + "epoch": 0.18, + "grad_norm": 11.411316916146697, + "learning_rate": 1.9814345922295078e-05, + "loss": 1.6239, + "step": 14901 + }, + { + "epoch": 0.18, + "grad_norm": 10.852142507984249, + "learning_rate": 1.9814233861874406e-05, + "loss": 1.7216, + "step": 14904 + }, + { + "epoch": 0.18, + "grad_norm": 6.698883600245119, + "learning_rate": 1.9814121767961288e-05, + "loss": 1.3262, + "step": 14907 + }, + { + "epoch": 0.18, + "grad_norm": 13.510253040253195, + "learning_rate": 1.9814009640556105e-05, + "loss": 1.7585, + "step": 14910 + }, + { + "epoch": 0.18, + "grad_norm": 6.860259179004148, + "learning_rate": 1.9813897479659246e-05, + "loss": 1.4603, + "step": 14913 + }, + { + "epoch": 0.18, + "grad_norm": 8.176181602803455, + "learning_rate": 1.9813785285271087e-05, + "loss": 1.6297, + "step": 14916 + }, + { + "epoch": 0.18, + "grad_norm": 9.764716164531928, + "learning_rate": 1.9813673057392014e-05, + "loss": 1.7425, + "step": 14919 + }, + { + "epoch": 0.18, + "grad_norm": 9.184994226740974, + "learning_rate": 1.9813560796022407e-05, + "loss": 1.4014, + "step": 14922 + }, + { + "epoch": 0.18, + "grad_norm": 3.5647765063109365, + "learning_rate": 1.9813448501162656e-05, + "loss": 1.5561, + "step": 14925 + }, + { + "epoch": 0.18, + "grad_norm": 12.029286602682754, + "learning_rate": 1.981333617281314e-05, + "loss": 1.9525, + "step": 14928 + }, + { + "epoch": 0.18, + "grad_norm": 15.91472312429651, + "learning_rate": 1.981322381097424e-05, + "loss": 2.026, + "step": 14931 + }, + { + "epoch": 0.18, + "grad_norm": 12.056550181450854, + "learning_rate": 1.9813111415646343e-05, + "loss": 1.5263, + "step": 14934 + }, + { + "epoch": 0.18, + "grad_norm": 9.444953658928837, + "learning_rate": 1.9812998986829832e-05, + "loss": 1.76, + "step": 14937 + }, + { + "epoch": 0.18, + "grad_norm": 28.95414466222424, + "learning_rate": 1.9812886524525088e-05, + "loss": 1.469, + "step": 14940 + }, + { + "epoch": 0.18, + "grad_norm": 21.23857883331079, + "learning_rate": 1.98127740287325e-05, + "loss": 1.5706, + "step": 14943 + }, + { + "epoch": 0.18, + "grad_norm": 9.796733860568175, + "learning_rate": 1.9812661499452446e-05, + "loss": 1.4569, + "step": 14946 + }, + { + "epoch": 0.18, + "grad_norm": 31.675723935959866, + "learning_rate": 1.9812548936685315e-05, + "loss": 1.1366, + "step": 14949 + }, + { + "epoch": 0.18, + "grad_norm": 12.090280910338132, + "learning_rate": 1.9812436340431488e-05, + "loss": 1.3717, + "step": 14952 + }, + { + "epoch": 0.18, + "grad_norm": 20.78755723211041, + "learning_rate": 1.981232371069135e-05, + "loss": 1.6522, + "step": 14955 + }, + { + "epoch": 0.18, + "grad_norm": 32.28126276036888, + "learning_rate": 1.9812211047465287e-05, + "loss": 1.5366, + "step": 14958 + }, + { + "epoch": 0.18, + "grad_norm": 37.397334206358295, + "learning_rate": 1.9812098350753678e-05, + "loss": 1.9176, + "step": 14961 + }, + { + "epoch": 0.18, + "grad_norm": 41.94626688401515, + "learning_rate": 1.9811985620556915e-05, + "loss": 1.8571, + "step": 14964 + }, + { + "epoch": 0.18, + "grad_norm": 15.536566020257123, + "learning_rate": 1.9811872856875378e-05, + "loss": 1.5078, + "step": 14967 + }, + { + "epoch": 0.18, + "grad_norm": 8.88243288210766, + "learning_rate": 1.9811760059709453e-05, + "loss": 1.9382, + "step": 14970 + }, + { + "epoch": 0.18, + "grad_norm": 44.54323946968804, + "learning_rate": 1.981164722905953e-05, + "loss": 1.5395, + "step": 14973 + }, + { + "epoch": 0.18, + "grad_norm": 60.16531418215007, + "learning_rate": 1.9811534364925983e-05, + "loss": 1.525, + "step": 14976 + }, + { + "epoch": 0.18, + "grad_norm": 17.873777988084655, + "learning_rate": 1.9811421467309204e-05, + "loss": 1.5809, + "step": 14979 + }, + { + "epoch": 0.18, + "grad_norm": 19.09647340108228, + "learning_rate": 1.981130853620958e-05, + "loss": 1.954, + "step": 14982 + }, + { + "epoch": 0.18, + "grad_norm": 54.426911509887816, + "learning_rate": 1.9811195571627493e-05, + "loss": 1.7842, + "step": 14985 + }, + { + "epoch": 0.18, + "grad_norm": 29.19995428212382, + "learning_rate": 1.981108257356333e-05, + "loss": 1.8451, + "step": 14988 + }, + { + "epoch": 0.18, + "grad_norm": 6.730770152287334, + "learning_rate": 1.9810969542017476e-05, + "loss": 1.3164, + "step": 14991 + }, + { + "epoch": 0.18, + "grad_norm": 9.863716092165253, + "learning_rate": 1.9810856476990314e-05, + "loss": 1.5317, + "step": 14994 + }, + { + "epoch": 0.18, + "grad_norm": 9.61203934535473, + "learning_rate": 1.9810743378482237e-05, + "loss": 1.5208, + "step": 14997 + }, + { + "epoch": 0.18, + "grad_norm": 21.96013888081872, + "learning_rate": 1.9810630246493627e-05, + "loss": 1.3226, + "step": 15000 + }, + { + "epoch": 0.18, + "grad_norm": 4.835690116148067, + "learning_rate": 1.9810517081024864e-05, + "loss": 1.981, + "step": 15003 + }, + { + "epoch": 0.18, + "grad_norm": 5.159005229383151, + "learning_rate": 1.9810403882076344e-05, + "loss": 1.5745, + "step": 15006 + }, + { + "epoch": 0.18, + "grad_norm": 21.467239576448915, + "learning_rate": 1.9810290649648448e-05, + "loss": 1.6805, + "step": 15009 + }, + { + "epoch": 0.18, + "grad_norm": 9.51134620937614, + "learning_rate": 1.9810177383741564e-05, + "loss": 1.4954, + "step": 15012 + }, + { + "epoch": 0.18, + "grad_norm": 10.159367339083548, + "learning_rate": 1.981006408435608e-05, + "loss": 1.5075, + "step": 15015 + }, + { + "epoch": 0.18, + "grad_norm": 26.621721796974285, + "learning_rate": 1.9809950751492376e-05, + "loss": 1.7473, + "step": 15018 + }, + { + "epoch": 0.18, + "grad_norm": 5.5309638918560395, + "learning_rate": 1.980983738515085e-05, + "loss": 1.797, + "step": 15021 + }, + { + "epoch": 0.18, + "grad_norm": 8.100866613912297, + "learning_rate": 1.980972398533188e-05, + "loss": 1.5044, + "step": 15024 + }, + { + "epoch": 0.18, + "grad_norm": 32.70888864188221, + "learning_rate": 1.9809610552035853e-05, + "loss": 1.6715, + "step": 15027 + }, + { + "epoch": 0.18, + "grad_norm": 13.133372442794226, + "learning_rate": 1.9809497085263162e-05, + "loss": 1.5789, + "step": 15030 + }, + { + "epoch": 0.18, + "grad_norm": 12.086943512184334, + "learning_rate": 1.9809383585014192e-05, + "loss": 1.6062, + "step": 15033 + }, + { + "epoch": 0.18, + "grad_norm": 24.745551047406572, + "learning_rate": 1.9809270051289324e-05, + "loss": 1.7446, + "step": 15036 + }, + { + "epoch": 0.18, + "grad_norm": 13.459522081509775, + "learning_rate": 1.980915648408895e-05, + "loss": 1.7435, + "step": 15039 + }, + { + "epoch": 0.18, + "grad_norm": 53.28305522285139, + "learning_rate": 1.9809042883413464e-05, + "loss": 1.5805, + "step": 15042 + }, + { + "epoch": 0.18, + "grad_norm": 25.897604535381074, + "learning_rate": 1.9808929249263245e-05, + "loss": 2.0266, + "step": 15045 + }, + { + "epoch": 0.18, + "grad_norm": 117.76930776654781, + "learning_rate": 1.9808815581638684e-05, + "loss": 1.3536, + "step": 15048 + }, + { + "epoch": 0.18, + "grad_norm": 68.02190345916466, + "learning_rate": 1.980870188054017e-05, + "loss": 1.7054, + "step": 15051 + }, + { + "epoch": 0.18, + "grad_norm": 32.02916268975906, + "learning_rate": 1.9808588145968082e-05, + "loss": 1.4904, + "step": 15054 + }, + { + "epoch": 0.18, + "grad_norm": 102.00114605416157, + "learning_rate": 1.9808474377922825e-05, + "loss": 1.4159, + "step": 15057 + }, + { + "epoch": 0.18, + "grad_norm": 19.90444100957577, + "learning_rate": 1.9808360576404774e-05, + "loss": 1.4142, + "step": 15060 + }, + { + "epoch": 0.18, + "grad_norm": 10.447958659162806, + "learning_rate": 1.9808246741414322e-05, + "loss": 1.6823, + "step": 15063 + }, + { + "epoch": 0.18, + "grad_norm": 3.6565537228333733, + "learning_rate": 1.9808132872951856e-05, + "loss": 1.4159, + "step": 15066 + }, + { + "epoch": 0.18, + "grad_norm": 15.323528849079766, + "learning_rate": 1.9808018971017768e-05, + "loss": 1.5143, + "step": 15069 + }, + { + "epoch": 0.18, + "grad_norm": 74.61876650105967, + "learning_rate": 1.9807905035612444e-05, + "loss": 1.7468, + "step": 15072 + }, + { + "epoch": 0.18, + "grad_norm": 50.106075344819644, + "learning_rate": 1.9807791066736268e-05, + "loss": 1.8336, + "step": 15075 + }, + { + "epoch": 0.18, + "grad_norm": 24.01596104049072, + "learning_rate": 1.9807677064389636e-05, + "loss": 1.627, + "step": 15078 + }, + { + "epoch": 0.18, + "grad_norm": 10.747155985189227, + "learning_rate": 1.9807563028572937e-05, + "loss": 1.8501, + "step": 15081 + }, + { + "epoch": 0.18, + "grad_norm": 20.898929119620707, + "learning_rate": 1.9807448959286555e-05, + "loss": 1.3424, + "step": 15084 + }, + { + "epoch": 0.18, + "grad_norm": 55.049815830727766, + "learning_rate": 1.9807334856530886e-05, + "loss": 1.8447, + "step": 15087 + }, + { + "epoch": 0.18, + "grad_norm": 22.636043543670368, + "learning_rate": 1.9807220720306312e-05, + "loss": 1.6069, + "step": 15090 + }, + { + "epoch": 0.18, + "grad_norm": 11.383946841321876, + "learning_rate": 1.9807106550613227e-05, + "loss": 1.4839, + "step": 15093 + }, + { + "epoch": 0.18, + "grad_norm": 64.22496668278876, + "learning_rate": 1.9806992347452024e-05, + "loss": 1.4158, + "step": 15096 + }, + { + "epoch": 0.18, + "grad_norm": 15.407334615431989, + "learning_rate": 1.9806878110823085e-05, + "loss": 1.7695, + "step": 15099 + }, + { + "epoch": 0.18, + "grad_norm": 30.0369770396314, + "learning_rate": 1.9806763840726804e-05, + "loss": 1.6771, + "step": 15102 + }, + { + "epoch": 0.18, + "grad_norm": 13.246384137834742, + "learning_rate": 1.980664953716357e-05, + "loss": 1.8486, + "step": 15105 + }, + { + "epoch": 0.18, + "grad_norm": 39.360547033182264, + "learning_rate": 1.9806535200133773e-05, + "loss": 1.5785, + "step": 15108 + }, + { + "epoch": 0.18, + "grad_norm": 63.90888582969427, + "learning_rate": 1.9806420829637803e-05, + "loss": 1.6218, + "step": 15111 + }, + { + "epoch": 0.18, + "grad_norm": 8.580817794917078, + "learning_rate": 1.9806306425676055e-05, + "loss": 1.5535, + "step": 15114 + }, + { + "epoch": 0.18, + "grad_norm": 30.447312997562808, + "learning_rate": 1.9806191988248914e-05, + "loss": 1.3564, + "step": 15117 + }, + { + "epoch": 0.18, + "grad_norm": 9.765938643426324, + "learning_rate": 1.9806077517356767e-05, + "loss": 1.484, + "step": 15120 + }, + { + "epoch": 0.18, + "grad_norm": 13.38907781118857, + "learning_rate": 1.9805963013000013e-05, + "loss": 1.6188, + "step": 15123 + }, + { + "epoch": 0.18, + "grad_norm": 26.05855203756962, + "learning_rate": 1.980584847517904e-05, + "loss": 1.4577, + "step": 15126 + }, + { + "epoch": 0.18, + "grad_norm": 22.724974870575707, + "learning_rate": 1.9805733903894238e-05, + "loss": 1.7153, + "step": 15129 + }, + { + "epoch": 0.18, + "grad_norm": 11.861920170000786, + "learning_rate": 1.9805619299146e-05, + "loss": 1.34, + "step": 15132 + }, + { + "epoch": 0.18, + "grad_norm": 44.161729061705884, + "learning_rate": 1.9805504660934713e-05, + "loss": 1.5582, + "step": 15135 + }, + { + "epoch": 0.18, + "grad_norm": 19.527858404075907, + "learning_rate": 1.980538998926077e-05, + "loss": 1.6095, + "step": 15138 + }, + { + "epoch": 0.18, + "grad_norm": 23.932952126798217, + "learning_rate": 1.9805275284124564e-05, + "loss": 1.7497, + "step": 15141 + }, + { + "epoch": 0.18, + "grad_norm": 12.828583743179243, + "learning_rate": 1.9805160545526486e-05, + "loss": 1.7164, + "step": 15144 + }, + { + "epoch": 0.18, + "grad_norm": 149.00615305695175, + "learning_rate": 1.9805045773466924e-05, + "loss": 1.657, + "step": 15147 + }, + { + "epoch": 0.18, + "grad_norm": 15.915519063014742, + "learning_rate": 1.9804930967946273e-05, + "loss": 1.4534, + "step": 15150 + }, + { + "epoch": 0.18, + "grad_norm": 85.18319515690881, + "learning_rate": 1.9804816128964924e-05, + "loss": 1.6201, + "step": 15153 + }, + { + "epoch": 0.18, + "grad_norm": 161.57812362401665, + "learning_rate": 1.9804701256523274e-05, + "loss": 1.8743, + "step": 15156 + }, + { + "epoch": 0.18, + "grad_norm": 12.142526488991376, + "learning_rate": 1.9804586350621708e-05, + "loss": 1.9373, + "step": 15159 + }, + { + "epoch": 0.18, + "grad_norm": 10.663162090671571, + "learning_rate": 1.980447141126062e-05, + "loss": 1.4721, + "step": 15162 + }, + { + "epoch": 0.18, + "grad_norm": 20.12054004044854, + "learning_rate": 1.98043564384404e-05, + "loss": 1.5067, + "step": 15165 + }, + { + "epoch": 0.18, + "grad_norm": 24.690302096480924, + "learning_rate": 1.9804241432161446e-05, + "loss": 1.7899, + "step": 15168 + }, + { + "epoch": 0.18, + "grad_norm": 13.209743256674468, + "learning_rate": 1.9804126392424144e-05, + "loss": 1.7075, + "step": 15171 + }, + { + "epoch": 0.18, + "grad_norm": 63.42127479362618, + "learning_rate": 1.9804011319228892e-05, + "loss": 1.7091, + "step": 15174 + }, + { + "epoch": 0.18, + "grad_norm": 72.49565262161909, + "learning_rate": 1.9803896212576082e-05, + "loss": 1.4938, + "step": 15177 + }, + { + "epoch": 0.18, + "grad_norm": 13.545583571810523, + "learning_rate": 1.9803781072466108e-05, + "loss": 1.3881, + "step": 15180 + }, + { + "epoch": 0.18, + "grad_norm": 5.209289583759303, + "learning_rate": 1.9803665898899355e-05, + "loss": 1.9358, + "step": 15183 + }, + { + "epoch": 0.18, + "grad_norm": 15.962062038281566, + "learning_rate": 1.9803550691876225e-05, + "loss": 1.7478, + "step": 15186 + }, + { + "epoch": 0.18, + "grad_norm": 7.871546866730331, + "learning_rate": 1.980343545139711e-05, + "loss": 1.4007, + "step": 15189 + }, + { + "epoch": 0.18, + "grad_norm": 13.33473290719744, + "learning_rate": 1.9803320177462397e-05, + "loss": 1.7372, + "step": 15192 + }, + { + "epoch": 0.18, + "grad_norm": 8.492223122250069, + "learning_rate": 1.9803204870072482e-05, + "loss": 1.5875, + "step": 15195 + }, + { + "epoch": 0.18, + "grad_norm": 213.00145441911792, + "learning_rate": 1.980308952922776e-05, + "loss": 1.5237, + "step": 15198 + }, + { + "epoch": 0.18, + "grad_norm": 33.21433341780954, + "learning_rate": 1.9802974154928627e-05, + "loss": 1.6139, + "step": 15201 + }, + { + "epoch": 0.18, + "grad_norm": 178.2924816888699, + "learning_rate": 1.9802858747175476e-05, + "loss": 1.8576, + "step": 15204 + }, + { + "epoch": 0.18, + "grad_norm": 12.633595266658926, + "learning_rate": 1.9802743305968697e-05, + "loss": 1.5429, + "step": 15207 + }, + { + "epoch": 0.18, + "grad_norm": 49.61133841171215, + "learning_rate": 1.9802627831308685e-05, + "loss": 1.7832, + "step": 15210 + }, + { + "epoch": 0.18, + "grad_norm": 8.033145166186953, + "learning_rate": 1.9802512323195834e-05, + "loss": 1.6501, + "step": 15213 + }, + { + "epoch": 0.18, + "grad_norm": 3.559543301504083, + "learning_rate": 1.9802396781630543e-05, + "loss": 1.6948, + "step": 15216 + }, + { + "epoch": 0.18, + "grad_norm": 4.774507013448774, + "learning_rate": 1.98022812066132e-05, + "loss": 1.5689, + "step": 15219 + }, + { + "epoch": 0.18, + "grad_norm": 44.72280800896052, + "learning_rate": 1.9802165598144204e-05, + "loss": 1.7272, + "step": 15222 + }, + { + "epoch": 0.18, + "grad_norm": 44.42032447759244, + "learning_rate": 1.9802049956223946e-05, + "loss": 1.2575, + "step": 15225 + }, + { + "epoch": 0.18, + "grad_norm": 25.64210411221048, + "learning_rate": 1.9801934280852822e-05, + "loss": 1.7415, + "step": 15228 + }, + { + "epoch": 0.18, + "grad_norm": 23.758204632199995, + "learning_rate": 1.980181857203123e-05, + "loss": 1.5927, + "step": 15231 + }, + { + "epoch": 0.18, + "grad_norm": 8.39581928635236, + "learning_rate": 1.9801702829759556e-05, + "loss": 1.3693, + "step": 15234 + }, + { + "epoch": 0.18, + "grad_norm": 19.752141427710455, + "learning_rate": 1.9801587054038203e-05, + "loss": 1.9317, + "step": 15237 + }, + { + "epoch": 0.18, + "grad_norm": 4.3413102944764175, + "learning_rate": 1.9801471244867566e-05, + "loss": 1.4512, + "step": 15240 + }, + { + "epoch": 0.18, + "grad_norm": 9.035095163695125, + "learning_rate": 1.9801355402248035e-05, + "loss": 1.3964, + "step": 15243 + }, + { + "epoch": 0.18, + "grad_norm": 40.32602113949914, + "learning_rate": 1.9801239526180014e-05, + "loss": 1.8805, + "step": 15246 + }, + { + "epoch": 0.18, + "grad_norm": 6.883445090307038, + "learning_rate": 1.9801123616663888e-05, + "loss": 1.5742, + "step": 15249 + }, + { + "epoch": 0.18, + "grad_norm": 4.53751769967324, + "learning_rate": 1.980100767370006e-05, + "loss": 1.2051, + "step": 15252 + }, + { + "epoch": 0.18, + "grad_norm": 15.408267855196579, + "learning_rate": 1.980089169728892e-05, + "loss": 1.4378, + "step": 15255 + }, + { + "epoch": 0.18, + "grad_norm": 6.264981995007075, + "learning_rate": 1.980077568743087e-05, + "loss": 1.144, + "step": 15258 + }, + { + "epoch": 0.18, + "grad_norm": 22.745472806184843, + "learning_rate": 1.9800659644126303e-05, + "loss": 1.761, + "step": 15261 + }, + { + "epoch": 0.18, + "grad_norm": 56.40890732932874, + "learning_rate": 1.9800543567375613e-05, + "loss": 1.4448, + "step": 15264 + }, + { + "epoch": 0.18, + "grad_norm": 22.773230767798058, + "learning_rate": 1.9800427457179198e-05, + "loss": 1.7158, + "step": 15267 + }, + { + "epoch": 0.18, + "grad_norm": 7.470228530042622, + "learning_rate": 1.9800311313537452e-05, + "loss": 1.828, + "step": 15270 + }, + { + "epoch": 0.18, + "grad_norm": 15.386184278357042, + "learning_rate": 1.9800195136450777e-05, + "loss": 1.8082, + "step": 15273 + }, + { + "epoch": 0.18, + "grad_norm": 6.954884698277789, + "learning_rate": 1.9800078925919565e-05, + "loss": 1.7663, + "step": 15276 + }, + { + "epoch": 0.18, + "grad_norm": 37.21711160730334, + "learning_rate": 1.9799962681944213e-05, + "loss": 1.6796, + "step": 15279 + }, + { + "epoch": 0.18, + "grad_norm": 4.905640455734094, + "learning_rate": 1.979984640452512e-05, + "loss": 1.4036, + "step": 15282 + }, + { + "epoch": 0.18, + "grad_norm": 10.258984260156273, + "learning_rate": 1.9799730093662682e-05, + "loss": 1.5549, + "step": 15285 + }, + { + "epoch": 0.18, + "grad_norm": 19.887436015897542, + "learning_rate": 1.979961374935729e-05, + "loss": 1.4742, + "step": 15288 + }, + { + "epoch": 0.18, + "grad_norm": 12.805484751722187, + "learning_rate": 1.979949737160935e-05, + "loss": 1.6471, + "step": 15291 + }, + { + "epoch": 0.18, + "grad_norm": 40.06252112571881, + "learning_rate": 1.9799380960419255e-05, + "loss": 1.4311, + "step": 15294 + }, + { + "epoch": 0.18, + "grad_norm": 4.537550490559017, + "learning_rate": 1.97992645157874e-05, + "loss": 1.7157, + "step": 15297 + }, + { + "epoch": 0.18, + "grad_norm": 36.08310242911285, + "learning_rate": 1.979914803771419e-05, + "loss": 1.9683, + "step": 15300 + }, + { + "epoch": 0.18, + "grad_norm": 28.353886552889964, + "learning_rate": 1.9799031526200014e-05, + "loss": 1.8013, + "step": 15303 + }, + { + "epoch": 0.18, + "grad_norm": 9.930455054703339, + "learning_rate": 1.9798914981245273e-05, + "loss": 1.9544, + "step": 15306 + }, + { + "epoch": 0.18, + "grad_norm": 54.28200784183191, + "learning_rate": 1.9798798402850366e-05, + "loss": 1.8732, + "step": 15309 + }, + { + "epoch": 0.18, + "grad_norm": 19.190541052824845, + "learning_rate": 1.979868179101569e-05, + "loss": 1.2384, + "step": 15312 + }, + { + "epoch": 0.18, + "grad_norm": 21.90639864269165, + "learning_rate": 1.979856514574164e-05, + "loss": 2.0447, + "step": 15315 + }, + { + "epoch": 0.18, + "grad_norm": 24.94298243595115, + "learning_rate": 1.979844846702862e-05, + "loss": 1.267, + "step": 15318 + }, + { + "epoch": 0.18, + "grad_norm": 6.718596153332366, + "learning_rate": 1.979833175487702e-05, + "loss": 1.6838, + "step": 15321 + }, + { + "epoch": 0.18, + "grad_norm": 74.61745436545529, + "learning_rate": 1.9798215009287243e-05, + "loss": 1.5638, + "step": 15324 + }, + { + "epoch": 0.18, + "grad_norm": 15.706577504176996, + "learning_rate": 1.9798098230259693e-05, + "loss": 1.7228, + "step": 15327 + }, + { + "epoch": 0.18, + "grad_norm": 39.77222899357433, + "learning_rate": 1.979798141779476e-05, + "loss": 1.7396, + "step": 15330 + }, + { + "epoch": 0.18, + "grad_norm": 6.496886318323893, + "learning_rate": 1.9797864571892845e-05, + "loss": 1.5054, + "step": 15333 + }, + { + "epoch": 0.18, + "grad_norm": 40.421560304375504, + "learning_rate": 1.9797747692554345e-05, + "loss": 1.4951, + "step": 15336 + }, + { + "epoch": 0.18, + "grad_norm": 36.195923754930156, + "learning_rate": 1.9797630779779667e-05, + "loss": 1.8, + "step": 15339 + }, + { + "epoch": 0.18, + "grad_norm": 6.267666881603551, + "learning_rate": 1.97975138335692e-05, + "loss": 1.2695, + "step": 15342 + }, + { + "epoch": 0.18, + "grad_norm": 48.95260257133916, + "learning_rate": 1.979739685392335e-05, + "loss": 2.2213, + "step": 15345 + }, + { + "epoch": 0.18, + "grad_norm": 13.695606558868938, + "learning_rate": 1.9797279840842512e-05, + "loss": 1.504, + "step": 15348 + }, + { + "epoch": 0.18, + "grad_norm": 3.4246163998845205, + "learning_rate": 1.9797162794327088e-05, + "loss": 1.717, + "step": 15351 + }, + { + "epoch": 0.18, + "grad_norm": 8.822902034258789, + "learning_rate": 1.9797045714377474e-05, + "loss": 1.7666, + "step": 15354 + }, + { + "epoch": 0.18, + "grad_norm": 11.921704087128045, + "learning_rate": 1.9796928600994074e-05, + "loss": 1.5658, + "step": 15357 + }, + { + "epoch": 0.18, + "grad_norm": 8.334612399309922, + "learning_rate": 1.9796811454177284e-05, + "loss": 1.4816, + "step": 15360 + }, + { + "epoch": 0.18, + "grad_norm": 8.869599005305727, + "learning_rate": 1.9796694273927507e-05, + "loss": 1.7419, + "step": 15363 + }, + { + "epoch": 0.18, + "grad_norm": 27.489205511081785, + "learning_rate": 1.979657706024514e-05, + "loss": 1.365, + "step": 15366 + }, + { + "epoch": 0.18, + "grad_norm": 11.103139096508597, + "learning_rate": 1.9796459813130586e-05, + "loss": 1.7229, + "step": 15369 + }, + { + "epoch": 0.18, + "grad_norm": 10.616954406823847, + "learning_rate": 1.979634253258424e-05, + "loss": 1.5106, + "step": 15372 + }, + { + "epoch": 0.18, + "grad_norm": 7.059888210757469, + "learning_rate": 1.979622521860651e-05, + "loss": 1.2874, + "step": 15375 + }, + { + "epoch": 0.18, + "grad_norm": 4.5827691929317025, + "learning_rate": 1.979610787119779e-05, + "loss": 1.6227, + "step": 15378 + }, + { + "epoch": 0.18, + "grad_norm": 10.823619993498497, + "learning_rate": 1.979599049035848e-05, + "loss": 1.8502, + "step": 15381 + }, + { + "epoch": 0.18, + "grad_norm": 4.704072467739644, + "learning_rate": 1.9795873076088984e-05, + "loss": 1.756, + "step": 15384 + }, + { + "epoch": 0.19, + "grad_norm": 16.64655158404242, + "learning_rate": 1.9795755628389703e-05, + "loss": 1.5939, + "step": 15387 + }, + { + "epoch": 0.19, + "grad_norm": 8.133328747176497, + "learning_rate": 1.9795638147261036e-05, + "loss": 1.9151, + "step": 15390 + }, + { + "epoch": 0.19, + "grad_norm": 13.262657095007325, + "learning_rate": 1.9795520632703385e-05, + "loss": 1.9135, + "step": 15393 + }, + { + "epoch": 0.19, + "grad_norm": 13.484827098980574, + "learning_rate": 1.9795403084717147e-05, + "loss": 1.5431, + "step": 15396 + }, + { + "epoch": 0.19, + "grad_norm": 44.262426352958855, + "learning_rate": 1.979528550330273e-05, + "loss": 1.7441, + "step": 15399 + }, + { + "epoch": 0.19, + "grad_norm": 10.148370104073072, + "learning_rate": 1.9795167888460532e-05, + "loss": 1.6752, + "step": 15402 + }, + { + "epoch": 0.19, + "grad_norm": 31.469054538911582, + "learning_rate": 1.9795050240190954e-05, + "loss": 1.704, + "step": 15405 + }, + { + "epoch": 0.19, + "grad_norm": 8.270034564374033, + "learning_rate": 1.9794932558494396e-05, + "loss": 1.6406, + "step": 15408 + }, + { + "epoch": 0.19, + "grad_norm": 10.277235756407416, + "learning_rate": 1.979481484337126e-05, + "loss": 1.6308, + "step": 15411 + }, + { + "epoch": 0.19, + "grad_norm": 9.926926450357211, + "learning_rate": 1.9794697094821954e-05, + "loss": 1.4251, + "step": 15414 + }, + { + "epoch": 0.19, + "grad_norm": 11.195933294224089, + "learning_rate": 1.979457931284687e-05, + "loss": 1.4277, + "step": 15417 + }, + { + "epoch": 0.19, + "grad_norm": 3.1871687357809813, + "learning_rate": 1.979446149744642e-05, + "loss": 1.4803, + "step": 15420 + }, + { + "epoch": 0.19, + "grad_norm": 12.423267333877638, + "learning_rate": 1.9794343648620997e-05, + "loss": 1.6604, + "step": 15423 + }, + { + "epoch": 0.19, + "grad_norm": 16.044011699562493, + "learning_rate": 1.9794225766371005e-05, + "loss": 1.6005, + "step": 15426 + }, + { + "epoch": 0.19, + "grad_norm": 62.64597998672459, + "learning_rate": 1.9794107850696853e-05, + "loss": 1.8169, + "step": 15429 + }, + { + "epoch": 0.19, + "grad_norm": 9.14517771738518, + "learning_rate": 1.9793989901598936e-05, + "loss": 1.5099, + "step": 15432 + }, + { + "epoch": 0.19, + "grad_norm": 11.461484057394703, + "learning_rate": 1.979387191907766e-05, + "loss": 1.7238, + "step": 15435 + }, + { + "epoch": 0.19, + "grad_norm": 16.695886498938368, + "learning_rate": 1.979375390313343e-05, + "loss": 1.7668, + "step": 15438 + }, + { + "epoch": 0.19, + "grad_norm": 6.0919325182595205, + "learning_rate": 1.979363585376664e-05, + "loss": 1.9358, + "step": 15441 + }, + { + "epoch": 0.19, + "grad_norm": 4.795135900068061, + "learning_rate": 1.9793517770977702e-05, + "loss": 1.5061, + "step": 15444 + }, + { + "epoch": 0.19, + "grad_norm": 13.80459632352427, + "learning_rate": 1.9793399654767015e-05, + "loss": 1.7239, + "step": 15447 + }, + { + "epoch": 0.19, + "grad_norm": 10.359836113136861, + "learning_rate": 1.979328150513498e-05, + "loss": 1.4973, + "step": 15450 + }, + { + "epoch": 0.19, + "grad_norm": 11.225458671229129, + "learning_rate": 1.9793163322082006e-05, + "loss": 1.3603, + "step": 15453 + }, + { + "epoch": 0.19, + "grad_norm": 36.5497711729986, + "learning_rate": 1.9793045105608493e-05, + "loss": 1.4484, + "step": 15456 + }, + { + "epoch": 0.19, + "grad_norm": 26.183727173664703, + "learning_rate": 1.979292685571484e-05, + "loss": 1.9167, + "step": 15459 + }, + { + "epoch": 0.19, + "grad_norm": 32.09101648869528, + "learning_rate": 1.9792808572401457e-05, + "loss": 1.9128, + "step": 15462 + }, + { + "epoch": 0.19, + "grad_norm": 12.503728987882575, + "learning_rate": 1.979269025566875e-05, + "loss": 1.6152, + "step": 15465 + }, + { + "epoch": 0.19, + "grad_norm": 7.5488506778706, + "learning_rate": 1.9792571905517115e-05, + "loss": 1.682, + "step": 15468 + }, + { + "epoch": 0.19, + "grad_norm": 7.351343002419001, + "learning_rate": 1.979245352194696e-05, + "loss": 1.3126, + "step": 15471 + }, + { + "epoch": 0.19, + "grad_norm": 10.621333166033343, + "learning_rate": 1.9792335104958688e-05, + "loss": 1.5976, + "step": 15474 + }, + { + "epoch": 0.19, + "grad_norm": 26.735527723840757, + "learning_rate": 1.9792216654552706e-05, + "loss": 1.4967, + "step": 15477 + }, + { + "epoch": 0.19, + "grad_norm": 9.076178028150789, + "learning_rate": 1.979209817072941e-05, + "loss": 1.4373, + "step": 15480 + }, + { + "epoch": 0.19, + "grad_norm": 10.31417498610652, + "learning_rate": 1.9791979653489214e-05, + "loss": 2.0025, + "step": 15483 + }, + { + "epoch": 0.19, + "grad_norm": 45.622712640330505, + "learning_rate": 1.979186110283252e-05, + "loss": 1.389, + "step": 15486 + }, + { + "epoch": 0.19, + "grad_norm": 5.843033738579619, + "learning_rate": 1.9791742518759725e-05, + "loss": 1.4443, + "step": 15489 + }, + { + "epoch": 0.19, + "grad_norm": 4.29180820027436, + "learning_rate": 1.9791623901271247e-05, + "loss": 1.5175, + "step": 15492 + }, + { + "epoch": 0.19, + "grad_norm": 4.546573980599141, + "learning_rate": 1.979150525036748e-05, + "loss": 1.8524, + "step": 15495 + }, + { + "epoch": 0.19, + "grad_norm": 4.584252096353607, + "learning_rate": 1.9791386566048832e-05, + "loss": 1.388, + "step": 15498 + }, + { + "epoch": 0.19, + "grad_norm": 7.014146672119141, + "learning_rate": 1.979126784831571e-05, + "loss": 1.7229, + "step": 15501 + }, + { + "epoch": 0.19, + "grad_norm": 11.53146154063109, + "learning_rate": 1.9791149097168518e-05, + "loss": 1.5861, + "step": 15504 + }, + { + "epoch": 0.19, + "grad_norm": 11.22017119742229, + "learning_rate": 1.979103031260766e-05, + "loss": 1.3905, + "step": 15507 + }, + { + "epoch": 0.19, + "grad_norm": 42.37080639867866, + "learning_rate": 1.9790911494633543e-05, + "loss": 1.7312, + "step": 15510 + }, + { + "epoch": 0.19, + "grad_norm": 13.40165274133698, + "learning_rate": 1.979079264324657e-05, + "loss": 1.6152, + "step": 15513 + }, + { + "epoch": 0.19, + "grad_norm": 12.277994880407585, + "learning_rate": 1.9790673758447148e-05, + "loss": 1.5212, + "step": 15516 + }, + { + "epoch": 0.19, + "grad_norm": 30.48687716119281, + "learning_rate": 1.979055484023568e-05, + "loss": 1.5492, + "step": 15519 + }, + { + "epoch": 0.19, + "grad_norm": 25.084239605262038, + "learning_rate": 1.9790435888612584e-05, + "loss": 1.5321, + "step": 15522 + }, + { + "epoch": 0.19, + "grad_norm": 33.295583830654316, + "learning_rate": 1.9790316903578254e-05, + "loss": 1.6206, + "step": 15525 + }, + { + "epoch": 0.19, + "grad_norm": 5.2224325700958545, + "learning_rate": 1.9790197885133097e-05, + "loss": 1.5557, + "step": 15528 + }, + { + "epoch": 0.19, + "grad_norm": 4.98377130887313, + "learning_rate": 1.9790078833277517e-05, + "loss": 1.7671, + "step": 15531 + }, + { + "epoch": 0.19, + "grad_norm": 7.494909580120126, + "learning_rate": 1.978995974801193e-05, + "loss": 1.5274, + "step": 15534 + }, + { + "epoch": 0.19, + "grad_norm": 43.04408504269039, + "learning_rate": 1.9789840629336733e-05, + "loss": 1.4774, + "step": 15537 + }, + { + "epoch": 0.19, + "grad_norm": 9.971068905159331, + "learning_rate": 1.9789721477252337e-05, + "loss": 1.7068, + "step": 15540 + }, + { + "epoch": 0.19, + "grad_norm": 9.109089247602245, + "learning_rate": 1.978960229175915e-05, + "loss": 1.53, + "step": 15543 + }, + { + "epoch": 0.19, + "grad_norm": 53.2613748560979, + "learning_rate": 1.9789483072857577e-05, + "loss": 1.3793, + "step": 15546 + }, + { + "epoch": 0.19, + "grad_norm": 17.522149784015145, + "learning_rate": 1.9789363820548022e-05, + "loss": 1.2405, + "step": 15549 + }, + { + "epoch": 0.19, + "grad_norm": 19.815309701651135, + "learning_rate": 1.9789244534830895e-05, + "loss": 1.6927, + "step": 15552 + }, + { + "epoch": 0.19, + "grad_norm": 28.82974417023129, + "learning_rate": 1.9789125215706605e-05, + "loss": 1.8298, + "step": 15555 + }, + { + "epoch": 0.19, + "grad_norm": 5.28388789801483, + "learning_rate": 1.9789005863175552e-05, + "loss": 1.6176, + "step": 15558 + }, + { + "epoch": 0.19, + "grad_norm": 217.37375144089384, + "learning_rate": 1.978888647723815e-05, + "loss": 1.6405, + "step": 15561 + }, + { + "epoch": 0.19, + "grad_norm": 7.910069281083598, + "learning_rate": 1.9788767057894806e-05, + "loss": 1.4592, + "step": 15564 + }, + { + "epoch": 0.19, + "grad_norm": 26.967871839210087, + "learning_rate": 1.9788647605145926e-05, + "loss": 1.4209, + "step": 15567 + }, + { + "epoch": 0.19, + "grad_norm": 66.1860418281605, + "learning_rate": 1.9788528118991915e-05, + "loss": 1.4097, + "step": 15570 + }, + { + "epoch": 0.19, + "grad_norm": 16.243128358240888, + "learning_rate": 1.9788408599433186e-05, + "loss": 1.6133, + "step": 15573 + }, + { + "epoch": 0.19, + "grad_norm": 36.12292295414874, + "learning_rate": 1.978828904647014e-05, + "loss": 1.6211, + "step": 15576 + }, + { + "epoch": 0.19, + "grad_norm": 26.721226490982605, + "learning_rate": 1.978816946010319e-05, + "loss": 1.6938, + "step": 15579 + }, + { + "epoch": 0.19, + "grad_norm": 3.4504409195957635, + "learning_rate": 1.978804984033275e-05, + "loss": 1.4316, + "step": 15582 + }, + { + "epoch": 0.19, + "grad_norm": 33.670416538265705, + "learning_rate": 1.9787930187159214e-05, + "loss": 1.8222, + "step": 15585 + }, + { + "epoch": 0.19, + "grad_norm": 48.57568466444321, + "learning_rate": 1.9787810500583e-05, + "loss": 2.146, + "step": 15588 + }, + { + "epoch": 0.19, + "grad_norm": 5.512070825362028, + "learning_rate": 1.9787690780604517e-05, + "loss": 1.4319, + "step": 15591 + }, + { + "epoch": 0.19, + "grad_norm": 27.862139234453256, + "learning_rate": 1.9787571027224168e-05, + "loss": 1.4909, + "step": 15594 + }, + { + "epoch": 0.19, + "grad_norm": 8.53417487897, + "learning_rate": 1.9787451240442366e-05, + "loss": 1.5167, + "step": 15597 + }, + { + "epoch": 0.19, + "grad_norm": 13.145968861040027, + "learning_rate": 1.9787331420259518e-05, + "loss": 1.9462, + "step": 15600 + }, + { + "epoch": 0.19, + "grad_norm": 89.05990719248828, + "learning_rate": 1.9787211566676032e-05, + "loss": 1.1839, + "step": 15603 + }, + { + "epoch": 0.19, + "grad_norm": 2.7984393067231164, + "learning_rate": 1.978709167969232e-05, + "loss": 1.8153, + "step": 15606 + }, + { + "epoch": 0.19, + "grad_norm": 27.739278218154276, + "learning_rate": 1.9786971759308787e-05, + "loss": 1.5212, + "step": 15609 + }, + { + "epoch": 0.19, + "grad_norm": 5.951680557721724, + "learning_rate": 1.9786851805525847e-05, + "loss": 1.6602, + "step": 15612 + }, + { + "epoch": 0.19, + "grad_norm": 30.315095867486292, + "learning_rate": 1.9786731818343907e-05, + "loss": 1.5447, + "step": 15615 + }, + { + "epoch": 0.19, + "grad_norm": 11.46376633129567, + "learning_rate": 1.9786611797763377e-05, + "loss": 1.5935, + "step": 15618 + }, + { + "epoch": 0.19, + "grad_norm": 12.871206915129582, + "learning_rate": 1.9786491743784662e-05, + "loss": 1.7384, + "step": 15621 + }, + { + "epoch": 0.19, + "grad_norm": 95.99702992549268, + "learning_rate": 1.978637165640818e-05, + "loss": 1.5299, + "step": 15624 + }, + { + "epoch": 0.19, + "grad_norm": 12.367421655333093, + "learning_rate": 1.9786251535634335e-05, + "loss": 1.7902, + "step": 15627 + }, + { + "epoch": 0.19, + "grad_norm": 6.715528662691853, + "learning_rate": 1.978613138146354e-05, + "loss": 1.8642, + "step": 15630 + }, + { + "epoch": 0.19, + "grad_norm": 7.681127983190974, + "learning_rate": 1.9786011193896203e-05, + "loss": 1.3009, + "step": 15633 + }, + { + "epoch": 0.19, + "grad_norm": 27.263644606515854, + "learning_rate": 1.9785890972932732e-05, + "loss": 1.7681, + "step": 15636 + }, + { + "epoch": 0.19, + "grad_norm": 8.577342761419763, + "learning_rate": 1.9785770718573544e-05, + "loss": 1.5878, + "step": 15639 + }, + { + "epoch": 0.19, + "grad_norm": 3.77316460861342, + "learning_rate": 1.9785650430819042e-05, + "loss": 1.8203, + "step": 15642 + }, + { + "epoch": 0.19, + "grad_norm": 4.99850224583098, + "learning_rate": 1.9785530109669643e-05, + "loss": 1.7222, + "step": 15645 + }, + { + "epoch": 0.19, + "grad_norm": 78.85625043149696, + "learning_rate": 1.978540975512575e-05, + "loss": 1.9806, + "step": 15648 + }, + { + "epoch": 0.19, + "grad_norm": 30.107975091962718, + "learning_rate": 1.978528936718778e-05, + "loss": 1.7505, + "step": 15651 + }, + { + "epoch": 0.19, + "grad_norm": 26.8151941425107, + "learning_rate": 1.9785168945856146e-05, + "loss": 1.7995, + "step": 15654 + }, + { + "epoch": 0.19, + "grad_norm": 143.06461427730918, + "learning_rate": 1.978504849113125e-05, + "loss": 1.8647, + "step": 15657 + }, + { + "epoch": 0.19, + "grad_norm": 10.272799600778336, + "learning_rate": 1.9784928003013507e-05, + "loss": 1.326, + "step": 15660 + }, + { + "epoch": 0.19, + "grad_norm": 11.209632462506587, + "learning_rate": 1.9784807481503336e-05, + "loss": 1.7524, + "step": 15663 + }, + { + "epoch": 0.19, + "grad_norm": 18.820587682358603, + "learning_rate": 1.9784686926601137e-05, + "loss": 1.5225, + "step": 15666 + }, + { + "epoch": 0.19, + "grad_norm": 15.490239545094797, + "learning_rate": 1.9784566338307325e-05, + "loss": 1.6926, + "step": 15669 + }, + { + "epoch": 0.19, + "grad_norm": 38.68278584917576, + "learning_rate": 1.9784445716622314e-05, + "loss": 1.8507, + "step": 15672 + }, + { + "epoch": 0.19, + "grad_norm": 36.14533494811743, + "learning_rate": 1.9784325061546513e-05, + "loss": 1.6803, + "step": 15675 + }, + { + "epoch": 0.19, + "grad_norm": 17.667081514821586, + "learning_rate": 1.9784204373080338e-05, + "loss": 1.2963, + "step": 15678 + }, + { + "epoch": 0.19, + "grad_norm": 15.921299948013381, + "learning_rate": 1.9784083651224194e-05, + "loss": 1.5949, + "step": 15681 + }, + { + "epoch": 0.19, + "grad_norm": 74.57431780152817, + "learning_rate": 1.97839628959785e-05, + "loss": 1.7293, + "step": 15684 + }, + { + "epoch": 0.19, + "grad_norm": 12.99832149475731, + "learning_rate": 1.9783842107343662e-05, + "loss": 1.3725, + "step": 15687 + }, + { + "epoch": 0.19, + "grad_norm": 17.059432049768468, + "learning_rate": 1.97837212853201e-05, + "loss": 1.8006, + "step": 15690 + }, + { + "epoch": 0.19, + "grad_norm": 8.250985240839455, + "learning_rate": 1.9783600429908216e-05, + "loss": 1.7848, + "step": 15693 + }, + { + "epoch": 0.19, + "grad_norm": 3.215362535669746, + "learning_rate": 1.978347954110843e-05, + "loss": 1.4995, + "step": 15696 + }, + { + "epoch": 0.19, + "grad_norm": 19.617389452875923, + "learning_rate": 1.978335861892115e-05, + "loss": 2.0658, + "step": 15699 + }, + { + "epoch": 0.19, + "grad_norm": 30.54103459827101, + "learning_rate": 1.9783237663346794e-05, + "loss": 1.6238, + "step": 15702 + }, + { + "epoch": 0.19, + "grad_norm": 19.535102840505353, + "learning_rate": 1.9783116674385773e-05, + "loss": 1.8415, + "step": 15705 + }, + { + "epoch": 0.19, + "grad_norm": 9.899695183243379, + "learning_rate": 1.9782995652038495e-05, + "loss": 1.8242, + "step": 15708 + }, + { + "epoch": 0.19, + "grad_norm": 22.720588902766252, + "learning_rate": 1.9782874596305376e-05, + "loss": 1.708, + "step": 15711 + }, + { + "epoch": 0.19, + "grad_norm": 9.697512723531363, + "learning_rate": 1.9782753507186833e-05, + "loss": 2.1776, + "step": 15714 + }, + { + "epoch": 0.19, + "grad_norm": 13.842164326233922, + "learning_rate": 1.9782632384683273e-05, + "loss": 1.7487, + "step": 15717 + }, + { + "epoch": 0.19, + "grad_norm": 9.826453011145551, + "learning_rate": 1.9782511228795114e-05, + "loss": 1.8864, + "step": 15720 + }, + { + "epoch": 0.19, + "grad_norm": 17.087524220182953, + "learning_rate": 1.978239003952277e-05, + "loss": 1.44, + "step": 15723 + }, + { + "epoch": 0.19, + "grad_norm": 28.09721828054996, + "learning_rate": 1.978226881686665e-05, + "loss": 1.4368, + "step": 15726 + }, + { + "epoch": 0.19, + "grad_norm": 14.498054304140304, + "learning_rate": 1.978214756082717e-05, + "loss": 1.4911, + "step": 15729 + }, + { + "epoch": 0.19, + "grad_norm": 11.00558740036365, + "learning_rate": 1.978202627140474e-05, + "loss": 1.6904, + "step": 15732 + }, + { + "epoch": 0.19, + "grad_norm": 10.623889075460788, + "learning_rate": 1.9781904948599782e-05, + "loss": 1.3282, + "step": 15735 + }, + { + "epoch": 0.19, + "grad_norm": 7.588119469626341, + "learning_rate": 1.978178359241271e-05, + "loss": 1.8029, + "step": 15738 + }, + { + "epoch": 0.19, + "grad_norm": 28.820129255664718, + "learning_rate": 1.9781662202843927e-05, + "loss": 1.6842, + "step": 15741 + }, + { + "epoch": 0.19, + "grad_norm": 7.810530071123504, + "learning_rate": 1.9781540779893856e-05, + "loss": 1.6239, + "step": 15744 + }, + { + "epoch": 0.19, + "grad_norm": 6.978548941657189, + "learning_rate": 1.9781419323562912e-05, + "loss": 1.6212, + "step": 15747 + }, + { + "epoch": 0.19, + "grad_norm": 18.145834028735973, + "learning_rate": 1.9781297833851507e-05, + "loss": 1.8237, + "step": 15750 + }, + { + "epoch": 0.19, + "grad_norm": 5.651266004397055, + "learning_rate": 1.978117631076005e-05, + "loss": 1.621, + "step": 15753 + }, + { + "epoch": 0.19, + "grad_norm": 23.802283202906615, + "learning_rate": 1.978105475428897e-05, + "loss": 1.4497, + "step": 15756 + }, + { + "epoch": 0.19, + "grad_norm": 8.153485538665024, + "learning_rate": 1.9780933164438666e-05, + "loss": 1.2959, + "step": 15759 + }, + { + "epoch": 0.19, + "grad_norm": 40.53992988345362, + "learning_rate": 1.9780811541209566e-05, + "loss": 1.6888, + "step": 15762 + }, + { + "epoch": 0.19, + "grad_norm": 13.262200107238069, + "learning_rate": 1.9780689884602076e-05, + "loss": 2.0116, + "step": 15765 + }, + { + "epoch": 0.19, + "grad_norm": 28.174259531374673, + "learning_rate": 1.9780568194616615e-05, + "loss": 1.2803, + "step": 15768 + }, + { + "epoch": 0.19, + "grad_norm": 23.316304061470948, + "learning_rate": 1.9780446471253598e-05, + "loss": 1.6496, + "step": 15771 + }, + { + "epoch": 0.19, + "grad_norm": 6.199528830829878, + "learning_rate": 1.978032471451344e-05, + "loss": 1.1879, + "step": 15774 + }, + { + "epoch": 0.19, + "grad_norm": 4.790421333274895, + "learning_rate": 1.9780202924396555e-05, + "loss": 1.4819, + "step": 15777 + }, + { + "epoch": 0.19, + "grad_norm": 9.808096254067737, + "learning_rate": 1.978008110090336e-05, + "loss": 1.7855, + "step": 15780 + }, + { + "epoch": 0.19, + "grad_norm": 30.755829561612387, + "learning_rate": 1.9779959244034273e-05, + "loss": 2.1898, + "step": 15783 + }, + { + "epoch": 0.19, + "grad_norm": 6.878817460020863, + "learning_rate": 1.977983735378971e-05, + "loss": 1.5857, + "step": 15786 + }, + { + "epoch": 0.19, + "grad_norm": 43.832675800499885, + "learning_rate": 1.977971543017008e-05, + "loss": 1.8553, + "step": 15789 + }, + { + "epoch": 0.19, + "grad_norm": 7.268509331813711, + "learning_rate": 1.9779593473175807e-05, + "loss": 1.4962, + "step": 15792 + }, + { + "epoch": 0.19, + "grad_norm": 15.270214172282492, + "learning_rate": 1.9779471482807303e-05, + "loss": 1.5865, + "step": 15795 + }, + { + "epoch": 0.19, + "grad_norm": 9.005837397044202, + "learning_rate": 1.9779349459064986e-05, + "loss": 1.3741, + "step": 15798 + }, + { + "epoch": 0.19, + "grad_norm": 2.9674381131538823, + "learning_rate": 1.977922740194927e-05, + "loss": 1.4207, + "step": 15801 + }, + { + "epoch": 0.19, + "grad_norm": 15.17253929155643, + "learning_rate": 1.9779105311460576e-05, + "loss": 1.5261, + "step": 15804 + }, + { + "epoch": 0.19, + "grad_norm": 44.41597570200414, + "learning_rate": 1.9778983187599317e-05, + "loss": 1.4297, + "step": 15807 + }, + { + "epoch": 0.19, + "grad_norm": 10.684322369707953, + "learning_rate": 1.977886103036591e-05, + "loss": 1.3258, + "step": 15810 + }, + { + "epoch": 0.19, + "grad_norm": 7.972173042914133, + "learning_rate": 1.977873883976077e-05, + "loss": 1.4111, + "step": 15813 + }, + { + "epoch": 0.19, + "grad_norm": 14.964595953007805, + "learning_rate": 1.977861661578432e-05, + "loss": 1.8329, + "step": 15816 + }, + { + "epoch": 0.19, + "grad_norm": 20.238779496523748, + "learning_rate": 1.977849435843697e-05, + "loss": 1.42, + "step": 15819 + }, + { + "epoch": 0.19, + "grad_norm": 19.325866812946803, + "learning_rate": 1.977837206771915e-05, + "loss": 1.4499, + "step": 15822 + }, + { + "epoch": 0.19, + "grad_norm": 23.293628596356573, + "learning_rate": 1.9778249743631262e-05, + "loss": 1.6121, + "step": 15825 + }, + { + "epoch": 0.19, + "grad_norm": 6.51549800187725, + "learning_rate": 1.977812738617373e-05, + "loss": 1.526, + "step": 15828 + }, + { + "epoch": 0.19, + "grad_norm": 16.743216369746023, + "learning_rate": 1.977800499534697e-05, + "loss": 1.591, + "step": 15831 + }, + { + "epoch": 0.19, + "grad_norm": 49.95989803476255, + "learning_rate": 1.9777882571151402e-05, + "loss": 1.7455, + "step": 15834 + }, + { + "epoch": 0.19, + "grad_norm": 9.508073412983606, + "learning_rate": 1.9777760113587443e-05, + "loss": 1.5812, + "step": 15837 + }, + { + "epoch": 0.19, + "grad_norm": 5.818230975293364, + "learning_rate": 1.977763762265551e-05, + "loss": 1.3611, + "step": 15840 + }, + { + "epoch": 0.19, + "grad_norm": 7.452849825013333, + "learning_rate": 1.9777515098356025e-05, + "loss": 1.7653, + "step": 15843 + }, + { + "epoch": 0.19, + "grad_norm": 15.815386238184292, + "learning_rate": 1.97773925406894e-05, + "loss": 1.8841, + "step": 15846 + }, + { + "epoch": 0.19, + "grad_norm": 17.031101161684177, + "learning_rate": 1.9777269949656057e-05, + "loss": 1.5025, + "step": 15849 + }, + { + "epoch": 0.19, + "grad_norm": 23.821740581636455, + "learning_rate": 1.9777147325256413e-05, + "loss": 1.5715, + "step": 15852 + }, + { + "epoch": 0.19, + "grad_norm": 12.385480489341946, + "learning_rate": 1.9777024667490886e-05, + "loss": 1.4548, + "step": 15855 + }, + { + "epoch": 0.19, + "grad_norm": 19.439564354035795, + "learning_rate": 1.9776901976359896e-05, + "loss": 1.8277, + "step": 15858 + }, + { + "epoch": 0.19, + "grad_norm": 12.978841846260362, + "learning_rate": 1.9776779251863864e-05, + "loss": 1.5188, + "step": 15861 + }, + { + "epoch": 0.19, + "grad_norm": 39.254294165360534, + "learning_rate": 1.9776656494003204e-05, + "loss": 1.668, + "step": 15864 + }, + { + "epoch": 0.19, + "grad_norm": 10.880897166104099, + "learning_rate": 1.9776533702778337e-05, + "loss": 1.3399, + "step": 15867 + }, + { + "epoch": 0.19, + "grad_norm": 4.288268578606508, + "learning_rate": 1.977641087818968e-05, + "loss": 1.6289, + "step": 15870 + }, + { + "epoch": 0.19, + "grad_norm": 66.95784972214706, + "learning_rate": 1.9776288020237656e-05, + "loss": 1.8685, + "step": 15873 + }, + { + "epoch": 0.19, + "grad_norm": 11.779617328408921, + "learning_rate": 1.9776165128922683e-05, + "loss": 2.021, + "step": 15876 + }, + { + "epoch": 0.19, + "grad_norm": 8.276503672351065, + "learning_rate": 1.9776042204245176e-05, + "loss": 1.7301, + "step": 15879 + }, + { + "epoch": 0.19, + "grad_norm": 8.826258483736753, + "learning_rate": 1.9775919246205563e-05, + "loss": 1.3437, + "step": 15882 + }, + { + "epoch": 0.19, + "grad_norm": 14.697672603125275, + "learning_rate": 1.9775796254804255e-05, + "loss": 1.606, + "step": 15885 + }, + { + "epoch": 0.19, + "grad_norm": 24.664627972523803, + "learning_rate": 1.9775673230041677e-05, + "loss": 1.5736, + "step": 15888 + }, + { + "epoch": 0.19, + "grad_norm": 13.029621990914752, + "learning_rate": 1.9775550171918246e-05, + "loss": 1.6501, + "step": 15891 + }, + { + "epoch": 0.19, + "grad_norm": 13.59654863715727, + "learning_rate": 1.977542708043439e-05, + "loss": 1.5363, + "step": 15894 + }, + { + "epoch": 0.19, + "grad_norm": 12.544662009522003, + "learning_rate": 1.9775303955590514e-05, + "loss": 1.6115, + "step": 15897 + }, + { + "epoch": 0.19, + "grad_norm": 6.7084026956166785, + "learning_rate": 1.977518079738705e-05, + "loss": 1.4839, + "step": 15900 + }, + { + "epoch": 0.19, + "grad_norm": 7.127779455611187, + "learning_rate": 1.9775057605824417e-05, + "loss": 1.4427, + "step": 15903 + }, + { + "epoch": 0.19, + "grad_norm": 23.731129185408694, + "learning_rate": 1.977493438090303e-05, + "loss": 1.5986, + "step": 15906 + }, + { + "epoch": 0.19, + "grad_norm": 13.833092960624738, + "learning_rate": 1.9774811122623316e-05, + "loss": 1.4688, + "step": 15909 + }, + { + "epoch": 0.19, + "grad_norm": 18.318510207227543, + "learning_rate": 1.977468783098569e-05, + "loss": 1.714, + "step": 15912 + }, + { + "epoch": 0.19, + "grad_norm": 28.652029627376386, + "learning_rate": 1.9774564505990578e-05, + "loss": 1.6332, + "step": 15915 + }, + { + "epoch": 0.19, + "grad_norm": 96.71898977309769, + "learning_rate": 1.9774441147638395e-05, + "loss": 1.3417, + "step": 15918 + }, + { + "epoch": 0.19, + "grad_norm": 46.08892792681686, + "learning_rate": 1.9774317755929568e-05, + "loss": 1.6376, + "step": 15921 + }, + { + "epoch": 0.19, + "grad_norm": 7.806124508036876, + "learning_rate": 1.977419433086451e-05, + "loss": 1.6275, + "step": 15924 + }, + { + "epoch": 0.19, + "grad_norm": 36.02409366802322, + "learning_rate": 1.9774070872443653e-05, + "loss": 1.7581, + "step": 15927 + }, + { + "epoch": 0.19, + "grad_norm": 16.581870497053764, + "learning_rate": 1.977394738066741e-05, + "loss": 1.5723, + "step": 15930 + }, + { + "epoch": 0.19, + "grad_norm": 20.66455328247161, + "learning_rate": 1.9773823855536206e-05, + "loss": 1.2968, + "step": 15933 + }, + { + "epoch": 0.19, + "grad_norm": 14.027977035963566, + "learning_rate": 1.9773700297050463e-05, + "loss": 1.6007, + "step": 15936 + }, + { + "epoch": 0.19, + "grad_norm": 90.48314628577698, + "learning_rate": 1.9773576705210603e-05, + "loss": 1.1398, + "step": 15939 + }, + { + "epoch": 0.19, + "grad_norm": 39.959018281575, + "learning_rate": 1.977345308001704e-05, + "loss": 1.9281, + "step": 15942 + }, + { + "epoch": 0.19, + "grad_norm": 17.84622787709011, + "learning_rate": 1.9773329421470207e-05, + "loss": 1.4392, + "step": 15945 + }, + { + "epoch": 0.19, + "grad_norm": 10.775782898286952, + "learning_rate": 1.977320572957052e-05, + "loss": 1.6081, + "step": 15948 + }, + { + "epoch": 0.19, + "grad_norm": 22.742170944005608, + "learning_rate": 1.97730820043184e-05, + "loss": 1.4345, + "step": 15951 + }, + { + "epoch": 0.19, + "grad_norm": 27.241228730238696, + "learning_rate": 1.977295824571427e-05, + "loss": 1.7339, + "step": 15954 + }, + { + "epoch": 0.19, + "grad_norm": 3.562951384333822, + "learning_rate": 1.977283445375856e-05, + "loss": 1.7312, + "step": 15957 + }, + { + "epoch": 0.19, + "grad_norm": 13.869423235448458, + "learning_rate": 1.9772710628451682e-05, + "loss": 1.7099, + "step": 15960 + }, + { + "epoch": 0.19, + "grad_norm": 11.301293149844383, + "learning_rate": 1.9772586769794062e-05, + "loss": 1.7218, + "step": 15963 + }, + { + "epoch": 0.19, + "grad_norm": 7.224619110453203, + "learning_rate": 1.9772462877786125e-05, + "loss": 1.7148, + "step": 15966 + }, + { + "epoch": 0.19, + "grad_norm": 2.637726194370054, + "learning_rate": 1.9772338952428292e-05, + "loss": 1.5544, + "step": 15969 + }, + { + "epoch": 0.19, + "grad_norm": 15.81755149863656, + "learning_rate": 1.9772214993720987e-05, + "loss": 1.2293, + "step": 15972 + }, + { + "epoch": 0.19, + "grad_norm": 40.88615348424549, + "learning_rate": 1.9772091001664632e-05, + "loss": 1.5358, + "step": 15975 + }, + { + "epoch": 0.19, + "grad_norm": 10.000043772318962, + "learning_rate": 1.9771966976259648e-05, + "loss": 1.5345, + "step": 15978 + }, + { + "epoch": 0.19, + "grad_norm": 12.973047328476076, + "learning_rate": 1.9771842917506462e-05, + "loss": 1.4111, + "step": 15981 + }, + { + "epoch": 0.19, + "grad_norm": 14.364837211678816, + "learning_rate": 1.9771718825405494e-05, + "loss": 1.9948, + "step": 15984 + }, + { + "epoch": 0.19, + "grad_norm": 8.230763086421986, + "learning_rate": 1.9771594699957174e-05, + "loss": 1.7087, + "step": 15987 + }, + { + "epoch": 0.19, + "grad_norm": 41.50183475892139, + "learning_rate": 1.9771470541161916e-05, + "loss": 1.7826, + "step": 15990 + }, + { + "epoch": 0.19, + "grad_norm": 5.207917882281873, + "learning_rate": 1.9771346349020154e-05, + "loss": 1.7555, + "step": 15993 + }, + { + "epoch": 0.19, + "grad_norm": 12.533531805114974, + "learning_rate": 1.9771222123532303e-05, + "loss": 1.5335, + "step": 15996 + }, + { + "epoch": 0.19, + "grad_norm": 40.23252212100192, + "learning_rate": 1.977109786469879e-05, + "loss": 1.4375, + "step": 15999 + }, + { + "epoch": 0.19, + "grad_norm": 10.016775378851108, + "learning_rate": 1.9770973572520042e-05, + "loss": 1.5949, + "step": 16002 + }, + { + "epoch": 0.19, + "grad_norm": 15.639959030880759, + "learning_rate": 1.977084924699648e-05, + "loss": 1.5031, + "step": 16005 + }, + { + "epoch": 0.19, + "grad_norm": 2.665273338009291, + "learning_rate": 1.977072488812853e-05, + "loss": 1.5907, + "step": 16008 + }, + { + "epoch": 0.19, + "grad_norm": 16.062510606989562, + "learning_rate": 1.977060049591661e-05, + "loss": 1.4971, + "step": 16011 + }, + { + "epoch": 0.19, + "grad_norm": 37.65059647524001, + "learning_rate": 1.9770476070361154e-05, + "loss": 1.6848, + "step": 16014 + }, + { + "epoch": 0.19, + "grad_norm": 6.615875277686152, + "learning_rate": 1.977035161146258e-05, + "loss": 1.6032, + "step": 16017 + }, + { + "epoch": 0.19, + "grad_norm": 3.643054210954868, + "learning_rate": 1.977022711922132e-05, + "loss": 1.3218, + "step": 16020 + }, + { + "epoch": 0.19, + "grad_norm": 14.840264298235443, + "learning_rate": 1.977010259363779e-05, + "loss": 1.7725, + "step": 16023 + }, + { + "epoch": 0.19, + "grad_norm": 25.627493947349464, + "learning_rate": 1.9769978034712422e-05, + "loss": 1.6366, + "step": 16026 + }, + { + "epoch": 0.19, + "grad_norm": 71.45401899849972, + "learning_rate": 1.9769853442445637e-05, + "loss": 1.6313, + "step": 16029 + }, + { + "epoch": 0.19, + "grad_norm": 16.74586873783619, + "learning_rate": 1.976972881683786e-05, + "loss": 1.7039, + "step": 16032 + }, + { + "epoch": 0.19, + "grad_norm": 31.09397108131759, + "learning_rate": 1.976960415788952e-05, + "loss": 1.6259, + "step": 16035 + }, + { + "epoch": 0.19, + "grad_norm": 9.706533710493598, + "learning_rate": 1.9769479465601038e-05, + "loss": 2.0387, + "step": 16038 + }, + { + "epoch": 0.19, + "grad_norm": 14.034347898577703, + "learning_rate": 1.976935473997284e-05, + "loss": 1.9096, + "step": 16041 + }, + { + "epoch": 0.19, + "grad_norm": 31.00198380342452, + "learning_rate": 1.9769229981005356e-05, + "loss": 1.5023, + "step": 16044 + }, + { + "epoch": 0.19, + "grad_norm": 15.086633588093386, + "learning_rate": 1.9769105188699005e-05, + "loss": 1.5157, + "step": 16047 + }, + { + "epoch": 0.19, + "grad_norm": 11.443008796064449, + "learning_rate": 1.9768980363054224e-05, + "loss": 1.3841, + "step": 16050 + }, + { + "epoch": 0.19, + "grad_norm": 3.4130694181171872, + "learning_rate": 1.9768855504071424e-05, + "loss": 1.647, + "step": 16053 + }, + { + "epoch": 0.19, + "grad_norm": 40.665977780408596, + "learning_rate": 1.9768730611751045e-05, + "loss": 1.3881, + "step": 16056 + }, + { + "epoch": 0.19, + "grad_norm": 5.840774182957001, + "learning_rate": 1.9768605686093502e-05, + "loss": 1.6791, + "step": 16059 + }, + { + "epoch": 0.19, + "grad_norm": 18.411443966883407, + "learning_rate": 1.9768480727099232e-05, + "loss": 2.1178, + "step": 16062 + }, + { + "epoch": 0.19, + "grad_norm": 7.071058945079863, + "learning_rate": 1.9768355734768653e-05, + "loss": 1.3767, + "step": 16065 + }, + { + "epoch": 0.19, + "grad_norm": 5.560715284138505, + "learning_rate": 1.976823070910219e-05, + "loss": 1.3215, + "step": 16068 + }, + { + "epoch": 0.19, + "grad_norm": 27.9506131831131, + "learning_rate": 1.9768105650100282e-05, + "loss": 1.3238, + "step": 16071 + }, + { + "epoch": 0.19, + "grad_norm": 16.058129613596705, + "learning_rate": 1.9767980557763345e-05, + "loss": 1.9288, + "step": 16074 + }, + { + "epoch": 0.19, + "grad_norm": 36.24097459895102, + "learning_rate": 1.9767855432091806e-05, + "loss": 1.8381, + "step": 16077 + }, + { + "epoch": 0.19, + "grad_norm": 29.81011807714078, + "learning_rate": 1.9767730273086098e-05, + "loss": 1.5693, + "step": 16080 + }, + { + "epoch": 0.19, + "grad_norm": 15.051711555870275, + "learning_rate": 1.9767605080746645e-05, + "loss": 1.6139, + "step": 16083 + }, + { + "epoch": 0.19, + "grad_norm": 28.713839019786615, + "learning_rate": 1.976747985507387e-05, + "loss": 1.4962, + "step": 16086 + }, + { + "epoch": 0.19, + "grad_norm": 14.283057031021661, + "learning_rate": 1.976735459606821e-05, + "loss": 1.7286, + "step": 16089 + }, + { + "epoch": 0.19, + "grad_norm": 6.931591396394673, + "learning_rate": 1.9767229303730083e-05, + "loss": 1.4994, + "step": 16092 + }, + { + "epoch": 0.19, + "grad_norm": 8.518706923507958, + "learning_rate": 1.976710397805992e-05, + "loss": 1.6876, + "step": 16095 + }, + { + "epoch": 0.19, + "grad_norm": 45.39709674151146, + "learning_rate": 1.976697861905815e-05, + "loss": 1.5531, + "step": 16098 + }, + { + "epoch": 0.19, + "grad_norm": 38.58941504788245, + "learning_rate": 1.97668532267252e-05, + "loss": 1.3622, + "step": 16101 + }, + { + "epoch": 0.19, + "grad_norm": 8.101375542212264, + "learning_rate": 1.9766727801061503e-05, + "loss": 1.4906, + "step": 16104 + }, + { + "epoch": 0.19, + "grad_norm": 13.004484416816858, + "learning_rate": 1.9766602342067476e-05, + "loss": 1.3507, + "step": 16107 + }, + { + "epoch": 0.19, + "grad_norm": 22.365210435443814, + "learning_rate": 1.9766476849743557e-05, + "loss": 1.6133, + "step": 16110 + }, + { + "epoch": 0.19, + "grad_norm": 10.430868929309744, + "learning_rate": 1.9766351324090168e-05, + "loss": 1.2487, + "step": 16113 + }, + { + "epoch": 0.19, + "grad_norm": 17.524680777205855, + "learning_rate": 1.9766225765107737e-05, + "loss": 1.4738, + "step": 16116 + }, + { + "epoch": 0.19, + "grad_norm": 16.819481173233665, + "learning_rate": 1.9766100172796704e-05, + "loss": 1.6683, + "step": 16119 + }, + { + "epoch": 0.19, + "grad_norm": 18.76690197304226, + "learning_rate": 1.976597454715748e-05, + "loss": 1.8268, + "step": 16122 + }, + { + "epoch": 0.19, + "grad_norm": 24.301396602471826, + "learning_rate": 1.9765848888190506e-05, + "loss": 1.729, + "step": 16125 + }, + { + "epoch": 0.19, + "grad_norm": 9.351636303425419, + "learning_rate": 1.9765723195896206e-05, + "loss": 1.6411, + "step": 16128 + }, + { + "epoch": 0.19, + "grad_norm": 10.688172154418387, + "learning_rate": 1.9765597470275014e-05, + "loss": 1.5036, + "step": 16131 + }, + { + "epoch": 0.19, + "grad_norm": 6.6017678992208895, + "learning_rate": 1.9765471711327352e-05, + "loss": 1.3453, + "step": 16134 + }, + { + "epoch": 0.19, + "grad_norm": 4.523274788026656, + "learning_rate": 1.9765345919053653e-05, + "loss": 1.8579, + "step": 16137 + }, + { + "epoch": 0.19, + "grad_norm": 30.77787948973449, + "learning_rate": 1.9765220093454348e-05, + "loss": 1.5916, + "step": 16140 + }, + { + "epoch": 0.19, + "grad_norm": 15.451843624225802, + "learning_rate": 1.9765094234529862e-05, + "loss": 1.5192, + "step": 16143 + }, + { + "epoch": 0.19, + "grad_norm": 12.901632427880948, + "learning_rate": 1.9764968342280627e-05, + "loss": 1.763, + "step": 16146 + }, + { + "epoch": 0.19, + "grad_norm": 92.49793296457281, + "learning_rate": 1.9764842416707073e-05, + "loss": 1.4498, + "step": 16149 + }, + { + "epoch": 0.19, + "grad_norm": 22.519473441090415, + "learning_rate": 1.9764716457809627e-05, + "loss": 1.6366, + "step": 16152 + }, + { + "epoch": 0.19, + "grad_norm": 6.817186470377514, + "learning_rate": 1.9764590465588725e-05, + "loss": 1.3184, + "step": 16155 + }, + { + "epoch": 0.19, + "grad_norm": 23.900434683505942, + "learning_rate": 1.976446444004479e-05, + "loss": 1.5459, + "step": 16158 + }, + { + "epoch": 0.19, + "grad_norm": 34.47560817978561, + "learning_rate": 1.9764338381178257e-05, + "loss": 1.6331, + "step": 16161 + }, + { + "epoch": 0.19, + "grad_norm": 26.381315720309, + "learning_rate": 1.976421228898955e-05, + "loss": 2.0684, + "step": 16164 + }, + { + "epoch": 0.19, + "grad_norm": 8.46019720728984, + "learning_rate": 1.9764086163479108e-05, + "loss": 1.1726, + "step": 16167 + }, + { + "epoch": 0.19, + "grad_norm": 22.567943175129784, + "learning_rate": 1.9763960004647354e-05, + "loss": 1.7152, + "step": 16170 + }, + { + "epoch": 0.19, + "grad_norm": 4.667206481562884, + "learning_rate": 1.9763833812494724e-05, + "loss": 1.73, + "step": 16173 + }, + { + "epoch": 0.19, + "grad_norm": 17.021187157737472, + "learning_rate": 1.9763707587021647e-05, + "loss": 1.494, + "step": 16176 + }, + { + "epoch": 0.19, + "grad_norm": 5.883592835235761, + "learning_rate": 1.976358132822855e-05, + "loss": 1.3902, + "step": 16179 + }, + { + "epoch": 0.19, + "grad_norm": 8.791537736760862, + "learning_rate": 1.9763455036115867e-05, + "loss": 1.6801, + "step": 16182 + }, + { + "epoch": 0.19, + "grad_norm": 36.49568225361012, + "learning_rate": 1.976332871068403e-05, + "loss": 1.6576, + "step": 16185 + }, + { + "epoch": 0.19, + "grad_norm": 5.9436775435772216, + "learning_rate": 1.9763202351933465e-05, + "loss": 1.5622, + "step": 16188 + }, + { + "epoch": 0.19, + "grad_norm": 11.066990175002243, + "learning_rate": 1.976307595986461e-05, + "loss": 1.6337, + "step": 16191 + }, + { + "epoch": 0.19, + "grad_norm": 8.03998703047146, + "learning_rate": 1.9762949534477896e-05, + "loss": 1.8479, + "step": 16194 + }, + { + "epoch": 0.19, + "grad_norm": 8.032567544843083, + "learning_rate": 1.976282307577375e-05, + "loss": 1.5045, + "step": 16197 + }, + { + "epoch": 0.19, + "grad_norm": 35.51784863061648, + "learning_rate": 1.9762696583752605e-05, + "loss": 1.3153, + "step": 16200 + }, + { + "epoch": 0.19, + "grad_norm": 268.5228653591828, + "learning_rate": 1.9762570058414893e-05, + "loss": 1.7939, + "step": 16203 + }, + { + "epoch": 0.19, + "grad_norm": 7.416637624385835, + "learning_rate": 1.9762443499761047e-05, + "loss": 1.7017, + "step": 16206 + }, + { + "epoch": 0.19, + "grad_norm": 14.353650710416511, + "learning_rate": 1.9762316907791494e-05, + "loss": 1.5641, + "step": 16209 + }, + { + "epoch": 0.19, + "grad_norm": 25.01382990723666, + "learning_rate": 1.9762190282506672e-05, + "loss": 1.9264, + "step": 16212 + }, + { + "epoch": 0.19, + "grad_norm": 24.08529016428678, + "learning_rate": 1.976206362390701e-05, + "loss": 1.9137, + "step": 16215 + }, + { + "epoch": 0.2, + "grad_norm": 7.03315813258751, + "learning_rate": 1.9761936931992944e-05, + "loss": 1.6943, + "step": 16218 + }, + { + "epoch": 0.2, + "grad_norm": 33.209038525327244, + "learning_rate": 1.97618102067649e-05, + "loss": 1.6367, + "step": 16221 + }, + { + "epoch": 0.2, + "grad_norm": 8.261330057878018, + "learning_rate": 1.9761683448223318e-05, + "loss": 1.4828, + "step": 16224 + }, + { + "epoch": 0.2, + "grad_norm": 69.16545517473895, + "learning_rate": 1.9761556656368623e-05, + "loss": 1.6318, + "step": 16227 + }, + { + "epoch": 0.2, + "grad_norm": 74.04637833464432, + "learning_rate": 1.9761429831201254e-05, + "loss": 1.4476, + "step": 16230 + }, + { + "epoch": 0.2, + "grad_norm": 8.077686335237637, + "learning_rate": 1.9761302972721638e-05, + "loss": 1.3849, + "step": 16233 + }, + { + "epoch": 0.2, + "grad_norm": 22.052714864025763, + "learning_rate": 1.9761176080930214e-05, + "loss": 1.5527, + "step": 16236 + }, + { + "epoch": 0.2, + "grad_norm": 26.74289868239618, + "learning_rate": 1.976104915582741e-05, + "loss": 1.5238, + "step": 16239 + }, + { + "epoch": 0.2, + "grad_norm": 48.76129789312209, + "learning_rate": 1.9760922197413662e-05, + "loss": 1.561, + "step": 16242 + }, + { + "epoch": 0.2, + "grad_norm": 11.654333619210167, + "learning_rate": 1.97607952056894e-05, + "loss": 1.5095, + "step": 16245 + }, + { + "epoch": 0.2, + "grad_norm": 87.98803083841929, + "learning_rate": 1.9760668180655063e-05, + "loss": 1.6397, + "step": 16248 + }, + { + "epoch": 0.2, + "grad_norm": 19.755040358783557, + "learning_rate": 1.976054112231108e-05, + "loss": 1.2542, + "step": 16251 + }, + { + "epoch": 0.2, + "grad_norm": 5.008272346423262, + "learning_rate": 1.9760414030657886e-05, + "loss": 1.6738, + "step": 16254 + }, + { + "epoch": 0.2, + "grad_norm": 21.405164759387688, + "learning_rate": 1.9760286905695914e-05, + "loss": 1.3622, + "step": 16257 + }, + { + "epoch": 0.2, + "grad_norm": 65.32351398575666, + "learning_rate": 1.9760159747425602e-05, + "loss": 1.4594, + "step": 16260 + }, + { + "epoch": 0.2, + "grad_norm": 8.869371450477512, + "learning_rate": 1.9760032555847375e-05, + "loss": 1.7925, + "step": 16263 + }, + { + "epoch": 0.2, + "grad_norm": 32.730999883101106, + "learning_rate": 1.9759905330961675e-05, + "loss": 1.4696, + "step": 16266 + }, + { + "epoch": 0.2, + "grad_norm": 35.41447589226005, + "learning_rate": 1.9759778072768932e-05, + "loss": 1.7674, + "step": 16269 + }, + { + "epoch": 0.2, + "grad_norm": 17.503950052850378, + "learning_rate": 1.9759650781269583e-05, + "loss": 1.7572, + "step": 16272 + }, + { + "epoch": 0.2, + "grad_norm": 21.35490209723124, + "learning_rate": 1.9759523456464062e-05, + "loss": 1.8225, + "step": 16275 + }, + { + "epoch": 0.2, + "grad_norm": 16.66241530454661, + "learning_rate": 1.9759396098352802e-05, + "loss": 1.6925, + "step": 16278 + }, + { + "epoch": 0.2, + "grad_norm": 10.776611564655354, + "learning_rate": 1.975926870693624e-05, + "loss": 1.6745, + "step": 16281 + }, + { + "epoch": 0.2, + "grad_norm": 7.0651499260475825, + "learning_rate": 1.9759141282214807e-05, + "loss": 1.6198, + "step": 16284 + }, + { + "epoch": 0.2, + "grad_norm": 13.24579964332224, + "learning_rate": 1.9759013824188944e-05, + "loss": 1.4694, + "step": 16287 + }, + { + "epoch": 0.2, + "grad_norm": 42.22783742167096, + "learning_rate": 1.9758886332859076e-05, + "loss": 1.6308, + "step": 16290 + }, + { + "epoch": 0.2, + "grad_norm": 20.310734184646535, + "learning_rate": 1.9758758808225648e-05, + "loss": 1.5826, + "step": 16293 + }, + { + "epoch": 0.2, + "grad_norm": 13.78962621194199, + "learning_rate": 1.975863125028909e-05, + "loss": 1.4466, + "step": 16296 + }, + { + "epoch": 0.2, + "grad_norm": 16.91376717468865, + "learning_rate": 1.975850365904984e-05, + "loss": 1.5505, + "step": 16299 + }, + { + "epoch": 0.2, + "grad_norm": 5.097911826530565, + "learning_rate": 1.975837603450833e-05, + "loss": 1.4062, + "step": 16302 + }, + { + "epoch": 0.2, + "grad_norm": 38.94464559525744, + "learning_rate": 1.9758248376665002e-05, + "loss": 1.5389, + "step": 16305 + }, + { + "epoch": 0.2, + "grad_norm": 7.1389889145770695, + "learning_rate": 1.9758120685520283e-05, + "loss": 1.6139, + "step": 16308 + }, + { + "epoch": 0.2, + "grad_norm": 31.610544915052593, + "learning_rate": 1.975799296107461e-05, + "loss": 1.5611, + "step": 16311 + }, + { + "epoch": 0.2, + "grad_norm": 8.461801059202243, + "learning_rate": 1.9757865203328426e-05, + "loss": 1.3191, + "step": 16314 + }, + { + "epoch": 0.2, + "grad_norm": 14.84849788365683, + "learning_rate": 1.9757737412282164e-05, + "loss": 1.5511, + "step": 16317 + }, + { + "epoch": 0.2, + "grad_norm": 4.956870591146192, + "learning_rate": 1.9757609587936258e-05, + "loss": 1.3591, + "step": 16320 + }, + { + "epoch": 0.2, + "grad_norm": 28.40189710916498, + "learning_rate": 1.9757481730291143e-05, + "loss": 1.8531, + "step": 16323 + }, + { + "epoch": 0.2, + "grad_norm": 14.4155169256559, + "learning_rate": 1.975735383934726e-05, + "loss": 1.8161, + "step": 16326 + }, + { + "epoch": 0.2, + "grad_norm": 5.346491697686292, + "learning_rate": 1.975722591510504e-05, + "loss": 1.4455, + "step": 16329 + }, + { + "epoch": 0.2, + "grad_norm": 12.239092563975994, + "learning_rate": 1.9757097957564925e-05, + "loss": 1.3136, + "step": 16332 + }, + { + "epoch": 0.2, + "grad_norm": 5.63878290654497, + "learning_rate": 1.975696996672735e-05, + "loss": 1.5225, + "step": 16335 + }, + { + "epoch": 0.2, + "grad_norm": 5.472727657491487, + "learning_rate": 1.9756841942592747e-05, + "loss": 2.1587, + "step": 16338 + }, + { + "epoch": 0.2, + "grad_norm": 27.951410047460797, + "learning_rate": 1.9756713885161562e-05, + "loss": 1.5352, + "step": 16341 + }, + { + "epoch": 0.2, + "grad_norm": 10.19968136682984, + "learning_rate": 1.9756585794434223e-05, + "loss": 1.8053, + "step": 16344 + }, + { + "epoch": 0.2, + "grad_norm": 8.060563489588938, + "learning_rate": 1.975645767041117e-05, + "loss": 1.7304, + "step": 16347 + }, + { + "epoch": 0.2, + "grad_norm": 14.542464431333688, + "learning_rate": 1.9756329513092846e-05, + "loss": 1.8057, + "step": 16350 + }, + { + "epoch": 0.2, + "grad_norm": 12.780772681255016, + "learning_rate": 1.9756201322479682e-05, + "loss": 1.6937, + "step": 16353 + }, + { + "epoch": 0.2, + "grad_norm": 13.346507817748744, + "learning_rate": 1.9756073098572116e-05, + "loss": 1.4376, + "step": 16356 + }, + { + "epoch": 0.2, + "grad_norm": 13.29758089118479, + "learning_rate": 1.9755944841370588e-05, + "loss": 1.7306, + "step": 16359 + }, + { + "epoch": 0.2, + "grad_norm": 12.863932722885341, + "learning_rate": 1.975581655087553e-05, + "loss": 1.3426, + "step": 16362 + }, + { + "epoch": 0.2, + "grad_norm": 5.796411219311191, + "learning_rate": 1.9755688227087387e-05, + "loss": 1.4544, + "step": 16365 + }, + { + "epoch": 0.2, + "grad_norm": 17.955898712542247, + "learning_rate": 1.9755559870006594e-05, + "loss": 1.662, + "step": 16368 + }, + { + "epoch": 0.2, + "grad_norm": 178.18329347087496, + "learning_rate": 1.975543147963359e-05, + "loss": 1.5264, + "step": 16371 + }, + { + "epoch": 0.2, + "grad_norm": 11.92886917809684, + "learning_rate": 1.9755303055968812e-05, + "loss": 1.8377, + "step": 16374 + }, + { + "epoch": 0.2, + "grad_norm": 3.9027834294769406, + "learning_rate": 1.9755174599012697e-05, + "loss": 1.3905, + "step": 16377 + }, + { + "epoch": 0.2, + "grad_norm": 19.805630140057026, + "learning_rate": 1.9755046108765687e-05, + "loss": 1.8368, + "step": 16380 + }, + { + "epoch": 0.2, + "grad_norm": 7.003362125283973, + "learning_rate": 1.975491758522822e-05, + "loss": 1.5119, + "step": 16383 + }, + { + "epoch": 0.2, + "grad_norm": 31.239110510590013, + "learning_rate": 1.975478902840073e-05, + "loss": 1.4484, + "step": 16386 + }, + { + "epoch": 0.2, + "grad_norm": 6.166773445292174, + "learning_rate": 1.9754660438283656e-05, + "loss": 1.6947, + "step": 16389 + }, + { + "epoch": 0.2, + "grad_norm": 9.4789069370716, + "learning_rate": 1.9754531814877446e-05, + "loss": 1.5079, + "step": 16392 + }, + { + "epoch": 0.2, + "grad_norm": 10.241080059300248, + "learning_rate": 1.9754403158182525e-05, + "loss": 1.434, + "step": 16395 + }, + { + "epoch": 0.2, + "grad_norm": 32.08674930799642, + "learning_rate": 1.9754274468199342e-05, + "loss": 1.5862, + "step": 16398 + }, + { + "epoch": 0.2, + "grad_norm": 10.224109109621974, + "learning_rate": 1.9754145744928337e-05, + "loss": 1.5693, + "step": 16401 + }, + { + "epoch": 0.2, + "grad_norm": 6.760383606786744, + "learning_rate": 1.9754016988369947e-05, + "loss": 1.6341, + "step": 16404 + }, + { + "epoch": 0.2, + "grad_norm": 69.32992325948733, + "learning_rate": 1.9753888198524605e-05, + "loss": 1.4796, + "step": 16407 + }, + { + "epoch": 0.2, + "grad_norm": 37.12211648469394, + "learning_rate": 1.975375937539276e-05, + "loss": 1.8447, + "step": 16410 + }, + { + "epoch": 0.2, + "grad_norm": 16.982911498083954, + "learning_rate": 1.9753630518974846e-05, + "loss": 1.8365, + "step": 16413 + }, + { + "epoch": 0.2, + "grad_norm": 9.505877470460158, + "learning_rate": 1.9753501629271303e-05, + "loss": 1.6917, + "step": 16416 + }, + { + "epoch": 0.2, + "grad_norm": 7.823011924168537, + "learning_rate": 1.9753372706282573e-05, + "loss": 1.668, + "step": 16419 + }, + { + "epoch": 0.2, + "grad_norm": 5.277141569813333, + "learning_rate": 1.9753243750009098e-05, + "loss": 1.4123, + "step": 16422 + }, + { + "epoch": 0.2, + "grad_norm": 24.544740089222593, + "learning_rate": 1.9753114760451314e-05, + "loss": 1.8501, + "step": 16425 + }, + { + "epoch": 0.2, + "grad_norm": 44.03396194305726, + "learning_rate": 1.975298573760966e-05, + "loss": 1.6685, + "step": 16428 + }, + { + "epoch": 0.2, + "grad_norm": 6.6687103130607905, + "learning_rate": 1.975285668148458e-05, + "loss": 1.5084, + "step": 16431 + }, + { + "epoch": 0.2, + "grad_norm": 11.684818764140054, + "learning_rate": 1.9752727592076513e-05, + "loss": 1.7086, + "step": 16434 + }, + { + "epoch": 0.2, + "grad_norm": 98.73631048948594, + "learning_rate": 1.97525984693859e-05, + "loss": 1.6382, + "step": 16437 + }, + { + "epoch": 0.2, + "grad_norm": 8.330684824854936, + "learning_rate": 1.975246931341318e-05, + "loss": 1.733, + "step": 16440 + }, + { + "epoch": 0.2, + "grad_norm": 33.63509724292071, + "learning_rate": 1.9752340124158794e-05, + "loss": 1.4177, + "step": 16443 + }, + { + "epoch": 0.2, + "grad_norm": 40.44327306507866, + "learning_rate": 1.975221090162319e-05, + "loss": 1.7817, + "step": 16446 + }, + { + "epoch": 0.2, + "grad_norm": 24.41697480998684, + "learning_rate": 1.9752081645806798e-05, + "loss": 1.6261, + "step": 16449 + }, + { + "epoch": 0.2, + "grad_norm": 92.44423194299793, + "learning_rate": 1.9751952356710066e-05, + "loss": 1.7035, + "step": 16452 + }, + { + "epoch": 0.2, + "grad_norm": 6.008259733088488, + "learning_rate": 1.975182303433343e-05, + "loss": 1.3394, + "step": 16455 + }, + { + "epoch": 0.2, + "grad_norm": 42.8406823294201, + "learning_rate": 1.9751693678677336e-05, + "loss": 1.3764, + "step": 16458 + }, + { + "epoch": 0.2, + "grad_norm": 25.769116970856338, + "learning_rate": 1.9751564289742226e-05, + "loss": 1.6185, + "step": 16461 + }, + { + "epoch": 0.2, + "grad_norm": 41.16646304535837, + "learning_rate": 1.9751434867528537e-05, + "loss": 1.733, + "step": 16464 + }, + { + "epoch": 0.2, + "grad_norm": 16.040966707919175, + "learning_rate": 1.9751305412036713e-05, + "loss": 1.3807, + "step": 16467 + }, + { + "epoch": 0.2, + "grad_norm": 24.323854797176146, + "learning_rate": 1.97511759232672e-05, + "loss": 1.5059, + "step": 16470 + }, + { + "epoch": 0.2, + "grad_norm": 103.3401770708286, + "learning_rate": 1.9751046401220436e-05, + "loss": 1.6545, + "step": 16473 + }, + { + "epoch": 0.2, + "grad_norm": 56.07615727978338, + "learning_rate": 1.975091684589686e-05, + "loss": 1.5903, + "step": 16476 + }, + { + "epoch": 0.2, + "grad_norm": 6.09990697018575, + "learning_rate": 1.9750787257296915e-05, + "loss": 1.3148, + "step": 16479 + }, + { + "epoch": 0.2, + "grad_norm": 15.876413909430013, + "learning_rate": 1.975065763542105e-05, + "loss": 1.6274, + "step": 16482 + }, + { + "epoch": 0.2, + "grad_norm": 27.97247947404666, + "learning_rate": 1.97505279802697e-05, + "loss": 1.6548, + "step": 16485 + }, + { + "epoch": 0.2, + "grad_norm": 11.773124411644568, + "learning_rate": 1.975039829184331e-05, + "loss": 1.5035, + "step": 16488 + }, + { + "epoch": 0.2, + "grad_norm": 53.54344615400698, + "learning_rate": 1.9750268570142324e-05, + "loss": 1.7068, + "step": 16491 + }, + { + "epoch": 0.2, + "grad_norm": 27.854639327725796, + "learning_rate": 1.975013881516718e-05, + "loss": 1.7205, + "step": 16494 + }, + { + "epoch": 0.2, + "grad_norm": 51.01715848707187, + "learning_rate": 1.9750009026918328e-05, + "loss": 1.2005, + "step": 16497 + }, + { + "epoch": 0.2, + "grad_norm": 28.25018268393895, + "learning_rate": 1.9749879205396208e-05, + "loss": 1.4727, + "step": 16500 + }, + { + "epoch": 0.2, + "grad_norm": 26.17900786160114, + "learning_rate": 1.974974935060126e-05, + "loss": 1.2034, + "step": 16503 + }, + { + "epoch": 0.2, + "grad_norm": 16.32045783513744, + "learning_rate": 1.974961946253393e-05, + "loss": 1.4555, + "step": 16506 + }, + { + "epoch": 0.2, + "grad_norm": 26.050311621188875, + "learning_rate": 1.974948954119466e-05, + "loss": 1.804, + "step": 16509 + }, + { + "epoch": 0.2, + "grad_norm": 7.801656575843128, + "learning_rate": 1.9749359586583894e-05, + "loss": 1.4019, + "step": 16512 + }, + { + "epoch": 0.2, + "grad_norm": 39.800719765582606, + "learning_rate": 1.9749229598702074e-05, + "loss": 1.8616, + "step": 16515 + }, + { + "epoch": 0.2, + "grad_norm": 7.4276899405173635, + "learning_rate": 1.9749099577549646e-05, + "loss": 1.2786, + "step": 16518 + }, + { + "epoch": 0.2, + "grad_norm": 26.86774721101717, + "learning_rate": 1.9748969523127056e-05, + "loss": 1.3217, + "step": 16521 + }, + { + "epoch": 0.2, + "grad_norm": 15.965565749757493, + "learning_rate": 1.974883943543474e-05, + "loss": 1.8409, + "step": 16524 + }, + { + "epoch": 0.2, + "grad_norm": 17.555912924666643, + "learning_rate": 1.974870931447315e-05, + "loss": 1.4393, + "step": 16527 + }, + { + "epoch": 0.2, + "grad_norm": 19.91122989173215, + "learning_rate": 1.9748579160242725e-05, + "loss": 2.0195, + "step": 16530 + }, + { + "epoch": 0.2, + "grad_norm": 7.148178670854102, + "learning_rate": 1.974844897274391e-05, + "loss": 1.429, + "step": 16533 + }, + { + "epoch": 0.2, + "grad_norm": 3.8084614990799137, + "learning_rate": 1.974831875197715e-05, + "loss": 1.4729, + "step": 16536 + }, + { + "epoch": 0.2, + "grad_norm": 8.960375889354303, + "learning_rate": 1.974818849794289e-05, + "loss": 1.0474, + "step": 16539 + }, + { + "epoch": 0.2, + "grad_norm": 3.0520890800841647, + "learning_rate": 1.9748058210641576e-05, + "loss": 1.7702, + "step": 16542 + }, + { + "epoch": 0.2, + "grad_norm": 17.582423825423675, + "learning_rate": 1.9747927890073647e-05, + "loss": 1.3265, + "step": 16545 + }, + { + "epoch": 0.2, + "grad_norm": 7.549843239127055, + "learning_rate": 1.9747797536239553e-05, + "loss": 1.6855, + "step": 16548 + }, + { + "epoch": 0.2, + "grad_norm": 12.694545728505737, + "learning_rate": 1.9747667149139738e-05, + "loss": 1.744, + "step": 16551 + }, + { + "epoch": 0.2, + "grad_norm": 18.565838042712098, + "learning_rate": 1.9747536728774644e-05, + "loss": 1.7256, + "step": 16554 + }, + { + "epoch": 0.2, + "grad_norm": 64.77042532011129, + "learning_rate": 1.974740627514472e-05, + "loss": 1.84, + "step": 16557 + }, + { + "epoch": 0.2, + "grad_norm": 10.135880447432474, + "learning_rate": 1.974727578825041e-05, + "loss": 1.5087, + "step": 16560 + }, + { + "epoch": 0.2, + "grad_norm": 13.580932703674586, + "learning_rate": 1.9747145268092158e-05, + "loss": 1.5915, + "step": 16563 + }, + { + "epoch": 0.2, + "grad_norm": 7.159990239550251, + "learning_rate": 1.9747014714670406e-05, + "loss": 1.8136, + "step": 16566 + }, + { + "epoch": 0.2, + "grad_norm": 4.229838593380712, + "learning_rate": 1.974688412798561e-05, + "loss": 1.6439, + "step": 16569 + }, + { + "epoch": 0.2, + "grad_norm": 5.901868307890072, + "learning_rate": 1.974675350803821e-05, + "loss": 1.4185, + "step": 16572 + }, + { + "epoch": 0.2, + "grad_norm": 6.495467481119599, + "learning_rate": 1.9746622854828644e-05, + "loss": 1.8739, + "step": 16575 + }, + { + "epoch": 0.2, + "grad_norm": 18.43495368175213, + "learning_rate": 1.974649216835737e-05, + "loss": 1.7384, + "step": 16578 + }, + { + "epoch": 0.2, + "grad_norm": 10.144993502407287, + "learning_rate": 1.9746361448624825e-05, + "loss": 1.4516, + "step": 16581 + }, + { + "epoch": 0.2, + "grad_norm": 11.83498654275988, + "learning_rate": 1.9746230695631462e-05, + "loss": 1.866, + "step": 16584 + }, + { + "epoch": 0.2, + "grad_norm": 16.179219403359546, + "learning_rate": 1.9746099909377724e-05, + "loss": 1.4894, + "step": 16587 + }, + { + "epoch": 0.2, + "grad_norm": 36.672217694259714, + "learning_rate": 1.9745969089864055e-05, + "loss": 1.6593, + "step": 16590 + }, + { + "epoch": 0.2, + "grad_norm": 36.24669405827477, + "learning_rate": 1.974583823709091e-05, + "loss": 1.5063, + "step": 16593 + }, + { + "epoch": 0.2, + "grad_norm": 7.505077130278892, + "learning_rate": 1.9745707351058723e-05, + "loss": 1.2553, + "step": 16596 + }, + { + "epoch": 0.2, + "grad_norm": 13.49561425935358, + "learning_rate": 1.974557643176795e-05, + "loss": 1.2862, + "step": 16599 + }, + { + "epoch": 0.2, + "grad_norm": 16.87946189736619, + "learning_rate": 1.9745445479219033e-05, + "loss": 1.5179, + "step": 16602 + }, + { + "epoch": 0.2, + "grad_norm": 6.708455716552065, + "learning_rate": 1.9745314493412423e-05, + "loss": 1.3657, + "step": 16605 + }, + { + "epoch": 0.2, + "grad_norm": 12.33112409597947, + "learning_rate": 1.974518347434857e-05, + "loss": 1.694, + "step": 16608 + }, + { + "epoch": 0.2, + "grad_norm": 61.34270184890302, + "learning_rate": 1.9745052422027905e-05, + "loss": 1.7326, + "step": 16611 + }, + { + "epoch": 0.2, + "grad_norm": 22.52160836913161, + "learning_rate": 1.9744921336450894e-05, + "loss": 2.0895, + "step": 16614 + }, + { + "epoch": 0.2, + "grad_norm": 4.538690126951998, + "learning_rate": 1.9744790217617974e-05, + "loss": 1.6799, + "step": 16617 + }, + { + "epoch": 0.2, + "grad_norm": 9.02528929582144, + "learning_rate": 1.9744659065529595e-05, + "loss": 1.3507, + "step": 16620 + }, + { + "epoch": 0.2, + "grad_norm": 16.339272728860045, + "learning_rate": 1.9744527880186207e-05, + "loss": 1.4374, + "step": 16623 + }, + { + "epoch": 0.2, + "grad_norm": 8.257045040591215, + "learning_rate": 1.9744396661588252e-05, + "loss": 1.5895, + "step": 16626 + }, + { + "epoch": 0.2, + "grad_norm": 11.115055170171505, + "learning_rate": 1.9744265409736184e-05, + "loss": 1.7658, + "step": 16629 + }, + { + "epoch": 0.2, + "grad_norm": 7.846337078942968, + "learning_rate": 1.9744134124630445e-05, + "loss": 1.7168, + "step": 16632 + }, + { + "epoch": 0.2, + "grad_norm": 7.113844589999934, + "learning_rate": 1.974400280627149e-05, + "loss": 2.114, + "step": 16635 + }, + { + "epoch": 0.2, + "grad_norm": 33.64178555122876, + "learning_rate": 1.974387145465976e-05, + "loss": 1.8236, + "step": 16638 + }, + { + "epoch": 0.2, + "grad_norm": 54.60845807136557, + "learning_rate": 1.9743740069795706e-05, + "loss": 1.6949, + "step": 16641 + }, + { + "epoch": 0.2, + "grad_norm": 8.392016256286484, + "learning_rate": 1.9743608651679777e-05, + "loss": 1.3505, + "step": 16644 + }, + { + "epoch": 0.2, + "grad_norm": 52.884725370643004, + "learning_rate": 1.974347720031242e-05, + "loss": 1.7142, + "step": 16647 + }, + { + "epoch": 0.2, + "grad_norm": 6.169535697428302, + "learning_rate": 1.974334571569409e-05, + "loss": 1.2568, + "step": 16650 + }, + { + "epoch": 0.2, + "grad_norm": 19.129254378892707, + "learning_rate": 1.974321419782523e-05, + "loss": 1.7112, + "step": 16653 + }, + { + "epoch": 0.2, + "grad_norm": 5.276546283143011, + "learning_rate": 1.9743082646706286e-05, + "loss": 1.4333, + "step": 16656 + }, + { + "epoch": 0.2, + "grad_norm": 31.901101408400045, + "learning_rate": 1.9742951062337708e-05, + "loss": 1.5, + "step": 16659 + }, + { + "epoch": 0.2, + "grad_norm": 35.377564124906556, + "learning_rate": 1.9742819444719953e-05, + "loss": 1.189, + "step": 16662 + }, + { + "epoch": 0.2, + "grad_norm": 7.286889721485836, + "learning_rate": 1.9742687793853465e-05, + "loss": 1.4569, + "step": 16665 + }, + { + "epoch": 0.2, + "grad_norm": 19.561039981491955, + "learning_rate": 1.9742556109738688e-05, + "loss": 1.8337, + "step": 16668 + }, + { + "epoch": 0.2, + "grad_norm": 22.159045634895623, + "learning_rate": 1.974242439237608e-05, + "loss": 2.0653, + "step": 16671 + }, + { + "epoch": 0.2, + "grad_norm": 114.94712717267454, + "learning_rate": 1.9742292641766087e-05, + "loss": 1.5776, + "step": 16674 + }, + { + "epoch": 0.2, + "grad_norm": 11.277749403500444, + "learning_rate": 1.9742160857909155e-05, + "loss": 1.8726, + "step": 16677 + }, + { + "epoch": 0.2, + "grad_norm": 11.730767514939464, + "learning_rate": 1.974202904080574e-05, + "loss": 1.5108, + "step": 16680 + }, + { + "epoch": 0.2, + "grad_norm": 8.581943058953048, + "learning_rate": 1.9741897190456286e-05, + "loss": 1.6384, + "step": 16683 + }, + { + "epoch": 0.2, + "grad_norm": 34.49242382660456, + "learning_rate": 1.974176530686125e-05, + "loss": 1.6222, + "step": 16686 + }, + { + "epoch": 0.2, + "grad_norm": 50.06164133130699, + "learning_rate": 1.9741633390021075e-05, + "loss": 1.4364, + "step": 16689 + }, + { + "epoch": 0.2, + "grad_norm": 9.01513412809281, + "learning_rate": 1.9741501439936214e-05, + "loss": 1.3498, + "step": 16692 + }, + { + "epoch": 0.2, + "grad_norm": 43.70768856810182, + "learning_rate": 1.974136945660712e-05, + "loss": 1.5589, + "step": 16695 + }, + { + "epoch": 0.2, + "grad_norm": 11.596246602729243, + "learning_rate": 1.974123744003424e-05, + "loss": 1.61, + "step": 16698 + }, + { + "epoch": 0.2, + "grad_norm": 6.610840747296571, + "learning_rate": 1.9741105390218026e-05, + "loss": 1.6851, + "step": 16701 + }, + { + "epoch": 0.2, + "grad_norm": 3.7671607201165336, + "learning_rate": 1.9740973307158923e-05, + "loss": 1.9048, + "step": 16704 + }, + { + "epoch": 0.2, + "grad_norm": 2.898422766343906, + "learning_rate": 1.9740841190857392e-05, + "loss": 1.8931, + "step": 16707 + }, + { + "epoch": 0.2, + "grad_norm": 2.441820411257492, + "learning_rate": 1.9740709041313876e-05, + "loss": 1.3123, + "step": 16710 + }, + { + "epoch": 0.2, + "grad_norm": 14.720030737786681, + "learning_rate": 1.974057685852883e-05, + "loss": 1.7913, + "step": 16713 + }, + { + "epoch": 0.2, + "grad_norm": 8.213971776621278, + "learning_rate": 1.9740444642502705e-05, + "loss": 1.5343, + "step": 16716 + }, + { + "epoch": 0.2, + "grad_norm": 10.04018701908504, + "learning_rate": 1.974031239323595e-05, + "loss": 1.7668, + "step": 16719 + }, + { + "epoch": 0.2, + "grad_norm": 39.19627403990043, + "learning_rate": 1.9740180110729015e-05, + "loss": 1.52, + "step": 16722 + }, + { + "epoch": 0.2, + "grad_norm": 27.404157707862833, + "learning_rate": 1.9740047794982355e-05, + "loss": 1.8032, + "step": 16725 + }, + { + "epoch": 0.2, + "grad_norm": 128.6752496499965, + "learning_rate": 1.973991544599642e-05, + "loss": 1.5821, + "step": 16728 + }, + { + "epoch": 0.2, + "grad_norm": 9.515406153539711, + "learning_rate": 1.973978306377166e-05, + "loss": 1.3759, + "step": 16731 + }, + { + "epoch": 0.2, + "grad_norm": 17.85857556559573, + "learning_rate": 1.9739650648308535e-05, + "loss": 1.748, + "step": 16734 + }, + { + "epoch": 0.2, + "grad_norm": 45.74793588451717, + "learning_rate": 1.9739518199607487e-05, + "loss": 1.5655, + "step": 16737 + }, + { + "epoch": 0.2, + "grad_norm": 19.3283304280025, + "learning_rate": 1.973938571766897e-05, + "loss": 1.4636, + "step": 16740 + }, + { + "epoch": 0.2, + "grad_norm": 18.00925200068273, + "learning_rate": 1.9739253202493442e-05, + "loss": 2.0811, + "step": 16743 + }, + { + "epoch": 0.2, + "grad_norm": 4.184284555979565, + "learning_rate": 1.9739120654081346e-05, + "loss": 1.6724, + "step": 16746 + }, + { + "epoch": 0.2, + "grad_norm": 109.09706385454933, + "learning_rate": 1.973898807243314e-05, + "loss": 1.697, + "step": 16749 + }, + { + "epoch": 0.2, + "grad_norm": 13.244104018931816, + "learning_rate": 1.9738855457549277e-05, + "loss": 1.7128, + "step": 16752 + }, + { + "epoch": 0.2, + "grad_norm": 16.95566517241698, + "learning_rate": 1.973872280943021e-05, + "loss": 1.7261, + "step": 16755 + }, + { + "epoch": 0.2, + "grad_norm": 13.395265811165556, + "learning_rate": 1.9738590128076386e-05, + "loss": 1.6997, + "step": 16758 + }, + { + "epoch": 0.2, + "grad_norm": 137.8460508660776, + "learning_rate": 1.9738457413488264e-05, + "loss": 1.6249, + "step": 16761 + }, + { + "epoch": 0.2, + "grad_norm": 25.356403186863236, + "learning_rate": 1.9738324665666293e-05, + "loss": 1.8254, + "step": 16764 + }, + { + "epoch": 0.2, + "grad_norm": 32.23123132329952, + "learning_rate": 1.973819188461093e-05, + "loss": 1.8038, + "step": 16767 + }, + { + "epoch": 0.2, + "grad_norm": 10.562265505218909, + "learning_rate": 1.9738059070322625e-05, + "loss": 1.5015, + "step": 16770 + }, + { + "epoch": 0.2, + "grad_norm": 28.39668237900313, + "learning_rate": 1.973792622280183e-05, + "loss": 1.4367, + "step": 16773 + }, + { + "epoch": 0.2, + "grad_norm": 14.862605008009751, + "learning_rate": 1.9737793342049004e-05, + "loss": 1.8763, + "step": 16776 + }, + { + "epoch": 0.2, + "grad_norm": 7.383502327574051, + "learning_rate": 1.9737660428064593e-05, + "loss": 1.3887, + "step": 16779 + }, + { + "epoch": 0.2, + "grad_norm": 17.525416320455147, + "learning_rate": 1.9737527480849057e-05, + "loss": 1.5472, + "step": 16782 + }, + { + "epoch": 0.2, + "grad_norm": 30.430437236272617, + "learning_rate": 1.9737394500402846e-05, + "loss": 1.6261, + "step": 16785 + }, + { + "epoch": 0.2, + "grad_norm": 11.50841913356546, + "learning_rate": 1.9737261486726417e-05, + "loss": 1.2924, + "step": 16788 + }, + { + "epoch": 0.2, + "grad_norm": 4.1759467064795475, + "learning_rate": 1.973712843982022e-05, + "loss": 1.6196, + "step": 16791 + }, + { + "epoch": 0.2, + "grad_norm": 5.25629462662979, + "learning_rate": 1.9736995359684708e-05, + "loss": 1.6725, + "step": 16794 + }, + { + "epoch": 0.2, + "grad_norm": 3.8740818000454125, + "learning_rate": 1.9736862246320342e-05, + "loss": 1.4251, + "step": 16797 + }, + { + "epoch": 0.2, + "grad_norm": 4.828719220516172, + "learning_rate": 1.9736729099727575e-05, + "loss": 1.5836, + "step": 16800 + }, + { + "epoch": 0.2, + "grad_norm": 14.496600959359329, + "learning_rate": 1.9736595919906854e-05, + "loss": 2.0294, + "step": 16803 + }, + { + "epoch": 0.2, + "grad_norm": 14.386950632456795, + "learning_rate": 1.973646270685864e-05, + "loss": 1.9259, + "step": 16806 + }, + { + "epoch": 0.2, + "grad_norm": 29.78605184612713, + "learning_rate": 1.9736329460583383e-05, + "loss": 2.0863, + "step": 16809 + }, + { + "epoch": 0.2, + "grad_norm": 91.93924943794747, + "learning_rate": 1.9736196181081544e-05, + "loss": 1.805, + "step": 16812 + }, + { + "epoch": 0.2, + "grad_norm": 14.96339254792447, + "learning_rate": 1.9736062868353575e-05, + "loss": 1.7989, + "step": 16815 + }, + { + "epoch": 0.2, + "grad_norm": 16.46431328378783, + "learning_rate": 1.9735929522399928e-05, + "loss": 1.4364, + "step": 16818 + }, + { + "epoch": 0.2, + "grad_norm": 15.067415407568584, + "learning_rate": 1.973579614322106e-05, + "loss": 1.6942, + "step": 16821 + }, + { + "epoch": 0.2, + "grad_norm": 11.105010748969894, + "learning_rate": 1.973566273081743e-05, + "loss": 1.7963, + "step": 16824 + }, + { + "epoch": 0.2, + "grad_norm": 9.854848007354676, + "learning_rate": 1.973552928518949e-05, + "loss": 1.7251, + "step": 16827 + }, + { + "epoch": 0.2, + "grad_norm": 6.518078647870261, + "learning_rate": 1.973539580633769e-05, + "loss": 1.8952, + "step": 16830 + }, + { + "epoch": 0.2, + "grad_norm": 69.10874589868959, + "learning_rate": 1.9735262294262495e-05, + "loss": 1.3888, + "step": 16833 + }, + { + "epoch": 0.2, + "grad_norm": 38.237612565885904, + "learning_rate": 1.9735128748964354e-05, + "loss": 1.7511, + "step": 16836 + }, + { + "epoch": 0.2, + "grad_norm": 45.26258428879057, + "learning_rate": 1.9734995170443727e-05, + "loss": 1.8034, + "step": 16839 + }, + { + "epoch": 0.2, + "grad_norm": 29.58383931198796, + "learning_rate": 1.9734861558701065e-05, + "loss": 2.0836, + "step": 16842 + }, + { + "epoch": 0.2, + "grad_norm": 44.442329812415274, + "learning_rate": 1.9734727913736832e-05, + "loss": 1.5724, + "step": 16845 + }, + { + "epoch": 0.2, + "grad_norm": 9.862367164025084, + "learning_rate": 1.9734594235551473e-05, + "loss": 1.6872, + "step": 16848 + }, + { + "epoch": 0.2, + "grad_norm": 59.694925666503515, + "learning_rate": 1.9734460524145454e-05, + "loss": 1.7749, + "step": 16851 + }, + { + "epoch": 0.2, + "grad_norm": 14.973476555098152, + "learning_rate": 1.9734326779519227e-05, + "loss": 1.89, + "step": 16854 + }, + { + "epoch": 0.2, + "grad_norm": 19.286401097847236, + "learning_rate": 1.973419300167325e-05, + "loss": 1.6625, + "step": 16857 + }, + { + "epoch": 0.2, + "grad_norm": 12.960420928764657, + "learning_rate": 1.973405919060798e-05, + "loss": 1.5135, + "step": 16860 + }, + { + "epoch": 0.2, + "grad_norm": 17.44834960131008, + "learning_rate": 1.9733925346323867e-05, + "loss": 1.9701, + "step": 16863 + }, + { + "epoch": 0.2, + "grad_norm": 28.75000919337019, + "learning_rate": 1.973379146882138e-05, + "loss": 1.7266, + "step": 16866 + }, + { + "epoch": 0.2, + "grad_norm": 14.21389755965704, + "learning_rate": 1.9733657558100962e-05, + "loss": 1.8272, + "step": 16869 + }, + { + "epoch": 0.2, + "grad_norm": 50.28770190301533, + "learning_rate": 1.9733523614163078e-05, + "loss": 2.0907, + "step": 16872 + }, + { + "epoch": 0.2, + "grad_norm": 9.647403568347423, + "learning_rate": 1.9733389637008183e-05, + "loss": 1.6091, + "step": 16875 + }, + { + "epoch": 0.2, + "grad_norm": 97.30750490998744, + "learning_rate": 1.973325562663674e-05, + "loss": 1.5562, + "step": 16878 + }, + { + "epoch": 0.2, + "grad_norm": 4.440831355163956, + "learning_rate": 1.97331215830492e-05, + "loss": 1.4544, + "step": 16881 + }, + { + "epoch": 0.2, + "grad_norm": 11.738597359841213, + "learning_rate": 1.973298750624602e-05, + "loss": 1.4252, + "step": 16884 + }, + { + "epoch": 0.2, + "grad_norm": 21.20026105459719, + "learning_rate": 1.973285339622766e-05, + "loss": 1.4518, + "step": 16887 + }, + { + "epoch": 0.2, + "grad_norm": 6.020999846496598, + "learning_rate": 1.9732719252994576e-05, + "loss": 1.7716, + "step": 16890 + }, + { + "epoch": 0.2, + "grad_norm": 20.31624502827416, + "learning_rate": 1.9732585076547232e-05, + "loss": 1.3697, + "step": 16893 + }, + { + "epoch": 0.2, + "grad_norm": 191.35984017431784, + "learning_rate": 1.9732450866886076e-05, + "loss": 1.8655, + "step": 16896 + }, + { + "epoch": 0.2, + "grad_norm": 10.787964408566735, + "learning_rate": 1.973231662401157e-05, + "loss": 1.3913, + "step": 16899 + }, + { + "epoch": 0.2, + "grad_norm": 4.459332124651378, + "learning_rate": 1.9732182347924175e-05, + "loss": 1.5128, + "step": 16902 + }, + { + "epoch": 0.2, + "grad_norm": 67.54958391790242, + "learning_rate": 1.9732048038624348e-05, + "loss": 1.6434, + "step": 16905 + }, + { + "epoch": 0.2, + "grad_norm": 27.982776125149332, + "learning_rate": 1.9731913696112546e-05, + "loss": 1.8198, + "step": 16908 + }, + { + "epoch": 0.2, + "grad_norm": 12.839719329487563, + "learning_rate": 1.9731779320389225e-05, + "loss": 2.0019, + "step": 16911 + }, + { + "epoch": 0.2, + "grad_norm": 13.798644667528672, + "learning_rate": 1.973164491145485e-05, + "loss": 1.8695, + "step": 16914 + }, + { + "epoch": 0.2, + "grad_norm": 14.782569430613215, + "learning_rate": 1.9731510469309876e-05, + "loss": 1.4508, + "step": 16917 + }, + { + "epoch": 0.2, + "grad_norm": 16.956869401441015, + "learning_rate": 1.973137599395476e-05, + "loss": 1.51, + "step": 16920 + }, + { + "epoch": 0.2, + "grad_norm": 21.180463969024373, + "learning_rate": 1.9731241485389964e-05, + "loss": 1.7667, + "step": 16923 + }, + { + "epoch": 0.2, + "grad_norm": 24.517715564632248, + "learning_rate": 1.9731106943615948e-05, + "loss": 1.5203, + "step": 16926 + }, + { + "epoch": 0.2, + "grad_norm": 20.03098243730304, + "learning_rate": 1.973097236863317e-05, + "loss": 2.0874, + "step": 16929 + }, + { + "epoch": 0.2, + "grad_norm": 17.955380526317423, + "learning_rate": 1.9730837760442082e-05, + "loss": 1.6662, + "step": 16932 + }, + { + "epoch": 0.2, + "grad_norm": 4.314381259082387, + "learning_rate": 1.9730703119043154e-05, + "loss": 1.7455, + "step": 16935 + }, + { + "epoch": 0.2, + "grad_norm": 15.405896444386375, + "learning_rate": 1.9730568444436842e-05, + "loss": 1.3442, + "step": 16938 + }, + { + "epoch": 0.2, + "grad_norm": 6.983278901074689, + "learning_rate": 1.9730433736623602e-05, + "loss": 1.9318, + "step": 16941 + }, + { + "epoch": 0.2, + "grad_norm": 9.555124349747942, + "learning_rate": 1.97302989956039e-05, + "loss": 1.6186, + "step": 16944 + }, + { + "epoch": 0.2, + "grad_norm": 12.400772783971087, + "learning_rate": 1.973016422137819e-05, + "loss": 1.7015, + "step": 16947 + }, + { + "epoch": 0.2, + "grad_norm": 79.94858540603461, + "learning_rate": 1.973002941394693e-05, + "loss": 1.6304, + "step": 16950 + }, + { + "epoch": 0.2, + "grad_norm": 85.40362545860287, + "learning_rate": 1.972989457331059e-05, + "loss": 1.771, + "step": 16953 + }, + { + "epoch": 0.2, + "grad_norm": 15.045312969156164, + "learning_rate": 1.9729759699469626e-05, + "loss": 1.5172, + "step": 16956 + }, + { + "epoch": 0.2, + "grad_norm": 9.481873112075297, + "learning_rate": 1.9729624792424493e-05, + "loss": 1.3952, + "step": 16959 + }, + { + "epoch": 0.2, + "grad_norm": 20.501078149253765, + "learning_rate": 1.9729489852175655e-05, + "loss": 1.6664, + "step": 16962 + }, + { + "epoch": 0.2, + "grad_norm": 43.27519049307206, + "learning_rate": 1.9729354878723576e-05, + "loss": 1.6921, + "step": 16965 + }, + { + "epoch": 0.2, + "grad_norm": 6.116594887655631, + "learning_rate": 1.9729219872068713e-05, + "loss": 1.5779, + "step": 16968 + }, + { + "epoch": 0.2, + "grad_norm": 6.6165761320915255, + "learning_rate": 1.9729084832211524e-05, + "loss": 1.3389, + "step": 16971 + }, + { + "epoch": 0.2, + "grad_norm": 35.309937710411035, + "learning_rate": 1.9728949759152477e-05, + "loss": 1.6883, + "step": 16974 + }, + { + "epoch": 0.2, + "grad_norm": 8.322443700268993, + "learning_rate": 1.9728814652892025e-05, + "loss": 1.5896, + "step": 16977 + }, + { + "epoch": 0.2, + "grad_norm": 6.867512369570214, + "learning_rate": 1.9728679513430636e-05, + "loss": 1.4296, + "step": 16980 + }, + { + "epoch": 0.2, + "grad_norm": 14.239107322748218, + "learning_rate": 1.9728544340768764e-05, + "loss": 1.5473, + "step": 16983 + }, + { + "epoch": 0.2, + "grad_norm": 29.80763050187845, + "learning_rate": 1.9728409134906877e-05, + "loss": 1.5125, + "step": 16986 + }, + { + "epoch": 0.2, + "grad_norm": 10.216020128973193, + "learning_rate": 1.9728273895845435e-05, + "loss": 1.5622, + "step": 16989 + }, + { + "epoch": 0.2, + "grad_norm": 13.381693512392662, + "learning_rate": 1.9728138623584896e-05, + "loss": 1.7657, + "step": 16992 + }, + { + "epoch": 0.2, + "grad_norm": 45.42448398057867, + "learning_rate": 1.9728003318125724e-05, + "loss": 1.692, + "step": 16995 + }, + { + "epoch": 0.2, + "grad_norm": 46.983096823814414, + "learning_rate": 1.972786797946838e-05, + "loss": 1.5111, + "step": 16998 + }, + { + "epoch": 0.2, + "grad_norm": 11.593496905270229, + "learning_rate": 1.9727732607613327e-05, + "loss": 1.5013, + "step": 17001 + }, + { + "epoch": 0.2, + "grad_norm": 24.71251004014807, + "learning_rate": 1.972759720256103e-05, + "loss": 1.6926, + "step": 17004 + }, + { + "epoch": 0.2, + "grad_norm": 11.312792470519963, + "learning_rate": 1.9727461764311944e-05, + "loss": 1.441, + "step": 17007 + }, + { + "epoch": 0.2, + "grad_norm": 10.38196963729339, + "learning_rate": 1.9727326292866535e-05, + "loss": 1.4411, + "step": 17010 + }, + { + "epoch": 0.2, + "grad_norm": 14.812135770755937, + "learning_rate": 1.9727190788225265e-05, + "loss": 1.352, + "step": 17013 + }, + { + "epoch": 0.2, + "grad_norm": 31.502838552650896, + "learning_rate": 1.97270552503886e-05, + "loss": 1.6965, + "step": 17016 + }, + { + "epoch": 0.2, + "grad_norm": 9.87683622856409, + "learning_rate": 1.9726919679356996e-05, + "loss": 1.6458, + "step": 17019 + }, + { + "epoch": 0.2, + "grad_norm": 36.51984956455593, + "learning_rate": 1.972678407513092e-05, + "loss": 1.7271, + "step": 17022 + }, + { + "epoch": 0.2, + "grad_norm": 10.836573152354111, + "learning_rate": 1.9726648437710835e-05, + "loss": 1.736, + "step": 17025 + }, + { + "epoch": 0.2, + "grad_norm": 5.16804380048817, + "learning_rate": 1.9726512767097196e-05, + "loss": 1.6174, + "step": 17028 + }, + { + "epoch": 0.2, + "grad_norm": 4.9386564399084865, + "learning_rate": 1.972637706329048e-05, + "loss": 1.7309, + "step": 17031 + }, + { + "epoch": 0.2, + "grad_norm": 43.69324076244068, + "learning_rate": 1.9726241326291138e-05, + "loss": 1.197, + "step": 17034 + }, + { + "epoch": 0.2, + "grad_norm": 13.566341304749535, + "learning_rate": 1.972610555609964e-05, + "loss": 1.3822, + "step": 17037 + }, + { + "epoch": 0.2, + "grad_norm": 23.730600122699656, + "learning_rate": 1.9725969752716444e-05, + "loss": 1.4101, + "step": 17040 + }, + { + "epoch": 0.2, + "grad_norm": 22.3899092095968, + "learning_rate": 1.972583391614202e-05, + "loss": 1.3832, + "step": 17043 + }, + { + "epoch": 0.2, + "grad_norm": 9.327243277678187, + "learning_rate": 1.9725698046376824e-05, + "loss": 1.455, + "step": 17046 + }, + { + "epoch": 0.21, + "grad_norm": 5.194564471537247, + "learning_rate": 1.9725562143421325e-05, + "loss": 1.5221, + "step": 17049 + }, + { + "epoch": 0.21, + "grad_norm": 21.105526248199048, + "learning_rate": 1.972542620727599e-05, + "loss": 1.3745, + "step": 17052 + }, + { + "epoch": 0.21, + "grad_norm": 10.476950401660766, + "learning_rate": 1.9725290237941274e-05, + "loss": 2.1544, + "step": 17055 + }, + { + "epoch": 0.21, + "grad_norm": 13.21631875459875, + "learning_rate": 1.972515423541765e-05, + "loss": 2.0304, + "step": 17058 + }, + { + "epoch": 0.21, + "grad_norm": 5.411362345912667, + "learning_rate": 1.9725018199705572e-05, + "loss": 1.5095, + "step": 17061 + }, + { + "epoch": 0.21, + "grad_norm": 32.338010650572286, + "learning_rate": 1.972488213080551e-05, + "loss": 1.2714, + "step": 17064 + }, + { + "epoch": 0.21, + "grad_norm": 32.99680550003112, + "learning_rate": 1.972474602871793e-05, + "loss": 1.3922, + "step": 17067 + }, + { + "epoch": 0.21, + "grad_norm": 15.90478623097888, + "learning_rate": 1.9724609893443297e-05, + "loss": 1.7465, + "step": 17070 + }, + { + "epoch": 0.21, + "grad_norm": 12.386063164253489, + "learning_rate": 1.972447372498207e-05, + "loss": 1.4926, + "step": 17073 + }, + { + "epoch": 0.21, + "grad_norm": 20.4355070123538, + "learning_rate": 1.9724337523334717e-05, + "loss": 1.5734, + "step": 17076 + }, + { + "epoch": 0.21, + "grad_norm": 23.747778135021235, + "learning_rate": 1.9724201288501705e-05, + "loss": 1.6921, + "step": 17079 + }, + { + "epoch": 0.21, + "grad_norm": 12.168303534832386, + "learning_rate": 1.9724065020483497e-05, + "loss": 1.8863, + "step": 17082 + }, + { + "epoch": 0.21, + "grad_norm": 22.385611189717427, + "learning_rate": 1.9723928719280554e-05, + "loss": 1.3438, + "step": 17085 + }, + { + "epoch": 0.21, + "grad_norm": 50.53879148933558, + "learning_rate": 1.9723792384893347e-05, + "loss": 1.6659, + "step": 17088 + }, + { + "epoch": 0.21, + "grad_norm": 18.25910493542255, + "learning_rate": 1.972365601732234e-05, + "loss": 1.4408, + "step": 17091 + }, + { + "epoch": 0.21, + "grad_norm": 12.048203279564362, + "learning_rate": 1.9723519616567995e-05, + "loss": 1.4924, + "step": 17094 + }, + { + "epoch": 0.21, + "grad_norm": 13.001907322449997, + "learning_rate": 1.9723383182630785e-05, + "loss": 1.511, + "step": 17097 + }, + { + "epoch": 0.21, + "grad_norm": 29.355214174093486, + "learning_rate": 1.9723246715511167e-05, + "loss": 1.5946, + "step": 17100 + }, + { + "epoch": 0.21, + "grad_norm": 13.892457066286873, + "learning_rate": 1.972311021520961e-05, + "loss": 1.5426, + "step": 17103 + }, + { + "epoch": 0.21, + "grad_norm": 7.599916813353636, + "learning_rate": 1.972297368172658e-05, + "loss": 1.2249, + "step": 17106 + }, + { + "epoch": 0.21, + "grad_norm": 13.360195571621789, + "learning_rate": 1.972283711506254e-05, + "loss": 1.5737, + "step": 17109 + }, + { + "epoch": 0.21, + "grad_norm": 8.517207402216373, + "learning_rate": 1.9722700515217965e-05, + "loss": 1.429, + "step": 17112 + }, + { + "epoch": 0.21, + "grad_norm": 52.81589822341179, + "learning_rate": 1.9722563882193314e-05, + "loss": 1.7641, + "step": 17115 + }, + { + "epoch": 0.21, + "grad_norm": 4.822207660120429, + "learning_rate": 1.9722427215989055e-05, + "loss": 1.3148, + "step": 17118 + }, + { + "epoch": 0.21, + "grad_norm": 7.973723616618747, + "learning_rate": 1.972229051660565e-05, + "loss": 1.5506, + "step": 17121 + }, + { + "epoch": 0.21, + "grad_norm": 9.727502399738563, + "learning_rate": 1.9722153784043573e-05, + "loss": 1.7518, + "step": 17124 + }, + { + "epoch": 0.21, + "grad_norm": 19.374738105713018, + "learning_rate": 1.9722017018303283e-05, + "loss": 1.4382, + "step": 17127 + }, + { + "epoch": 0.21, + "grad_norm": 17.360426306163866, + "learning_rate": 1.9721880219385254e-05, + "loss": 1.5805, + "step": 17130 + }, + { + "epoch": 0.21, + "grad_norm": 15.368929907620467, + "learning_rate": 1.9721743387289947e-05, + "loss": 1.2862, + "step": 17133 + }, + { + "epoch": 0.21, + "grad_norm": 33.191123539507196, + "learning_rate": 1.9721606522017834e-05, + "loss": 1.6043, + "step": 17136 + }, + { + "epoch": 0.21, + "grad_norm": 9.849169030558034, + "learning_rate": 1.972146962356938e-05, + "loss": 1.3878, + "step": 17139 + }, + { + "epoch": 0.21, + "grad_norm": 12.192138649122644, + "learning_rate": 1.972133269194505e-05, + "loss": 1.5842, + "step": 17142 + }, + { + "epoch": 0.21, + "grad_norm": 4.2580333113377, + "learning_rate": 1.9721195727145313e-05, + "loss": 1.5469, + "step": 17145 + }, + { + "epoch": 0.21, + "grad_norm": 26.870258630476876, + "learning_rate": 1.972105872917064e-05, + "loss": 1.3705, + "step": 17148 + }, + { + "epoch": 0.21, + "grad_norm": 10.483384083706667, + "learning_rate": 1.9720921698021492e-05, + "loss": 1.306, + "step": 17151 + }, + { + "epoch": 0.21, + "grad_norm": 15.79710044084496, + "learning_rate": 1.972078463369834e-05, + "loss": 1.5692, + "step": 17154 + }, + { + "epoch": 0.21, + "grad_norm": 17.876902493792766, + "learning_rate": 1.972064753620165e-05, + "loss": 1.5838, + "step": 17157 + }, + { + "epoch": 0.21, + "grad_norm": 14.193772084900388, + "learning_rate": 1.9720510405531893e-05, + "loss": 1.2448, + "step": 17160 + }, + { + "epoch": 0.21, + "grad_norm": 34.4714115256848, + "learning_rate": 1.972037324168954e-05, + "loss": 1.9663, + "step": 17163 + }, + { + "epoch": 0.21, + "grad_norm": 4.652008783710533, + "learning_rate": 1.972023604467505e-05, + "loss": 1.7913, + "step": 17166 + }, + { + "epoch": 0.21, + "grad_norm": 4.641168316265977, + "learning_rate": 1.9720098814488893e-05, + "loss": 1.1395, + "step": 17169 + }, + { + "epoch": 0.21, + "grad_norm": 9.916589591039584, + "learning_rate": 1.9719961551131543e-05, + "loss": 1.5497, + "step": 17172 + }, + { + "epoch": 0.21, + "grad_norm": 12.54214110193245, + "learning_rate": 1.9719824254603463e-05, + "loss": 1.2059, + "step": 17175 + }, + { + "epoch": 0.21, + "grad_norm": 16.665794208152438, + "learning_rate": 1.9719686924905127e-05, + "loss": 1.4426, + "step": 17178 + }, + { + "epoch": 0.21, + "grad_norm": 21.521019715221804, + "learning_rate": 1.9719549562037e-05, + "loss": 1.5968, + "step": 17181 + }, + { + "epoch": 0.21, + "grad_norm": 10.108492518710234, + "learning_rate": 1.971941216599955e-05, + "loss": 1.3574, + "step": 17184 + }, + { + "epoch": 0.21, + "grad_norm": 17.616977540220425, + "learning_rate": 1.9719274736793247e-05, + "loss": 1.5767, + "step": 17187 + }, + { + "epoch": 0.21, + "grad_norm": 10.41989318621182, + "learning_rate": 1.971913727441856e-05, + "loss": 1.5219, + "step": 17190 + }, + { + "epoch": 0.21, + "grad_norm": 51.37421669932843, + "learning_rate": 1.971899977887596e-05, + "loss": 1.8891, + "step": 17193 + }, + { + "epoch": 0.21, + "grad_norm": 22.038837997450802, + "learning_rate": 1.9718862250165914e-05, + "loss": 1.4937, + "step": 17196 + }, + { + "epoch": 0.21, + "grad_norm": 10.840479074320447, + "learning_rate": 1.971872468828889e-05, + "loss": 1.7448, + "step": 17199 + }, + { + "epoch": 0.21, + "grad_norm": 18.35054962596193, + "learning_rate": 1.9718587093245363e-05, + "loss": 1.3083, + "step": 17202 + }, + { + "epoch": 0.21, + "grad_norm": 6.368967823632623, + "learning_rate": 1.9718449465035797e-05, + "loss": 1.6477, + "step": 17205 + }, + { + "epoch": 0.21, + "grad_norm": 5.746382635191572, + "learning_rate": 1.971831180366066e-05, + "loss": 1.6797, + "step": 17208 + }, + { + "epoch": 0.21, + "grad_norm": 12.004541092776662, + "learning_rate": 1.971817410912043e-05, + "loss": 1.3113, + "step": 17211 + }, + { + "epoch": 0.21, + "grad_norm": 15.464493935654243, + "learning_rate": 1.971803638141557e-05, + "loss": 1.6687, + "step": 17214 + }, + { + "epoch": 0.21, + "grad_norm": 7.194012885372796, + "learning_rate": 1.9717898620546557e-05, + "loss": 1.5345, + "step": 17217 + }, + { + "epoch": 0.21, + "grad_norm": 16.40830682799238, + "learning_rate": 1.9717760826513852e-05, + "loss": 1.4269, + "step": 17220 + }, + { + "epoch": 0.21, + "grad_norm": 7.461775266028925, + "learning_rate": 1.971762299931793e-05, + "loss": 1.509, + "step": 17223 + }, + { + "epoch": 0.21, + "grad_norm": 5.712691824689068, + "learning_rate": 1.971748513895926e-05, + "loss": 1.3822, + "step": 17226 + }, + { + "epoch": 0.21, + "grad_norm": 8.611061472390832, + "learning_rate": 1.9717347245438314e-05, + "loss": 1.3506, + "step": 17229 + }, + { + "epoch": 0.21, + "grad_norm": 14.532599134209033, + "learning_rate": 1.9717209318755564e-05, + "loss": 1.3837, + "step": 17232 + }, + { + "epoch": 0.21, + "grad_norm": 8.089650485512493, + "learning_rate": 1.9717071358911477e-05, + "loss": 1.2287, + "step": 17235 + }, + { + "epoch": 0.21, + "grad_norm": 17.368831014108274, + "learning_rate": 1.9716933365906525e-05, + "loss": 1.7141, + "step": 17238 + }, + { + "epoch": 0.21, + "grad_norm": 20.333034153641442, + "learning_rate": 1.971679533974118e-05, + "loss": 1.7843, + "step": 17241 + }, + { + "epoch": 0.21, + "grad_norm": 24.44562456787323, + "learning_rate": 1.9716657280415915e-05, + "loss": 1.5285, + "step": 17244 + }, + { + "epoch": 0.21, + "grad_norm": 9.404773607653995, + "learning_rate": 1.9716519187931195e-05, + "loss": 1.5557, + "step": 17247 + }, + { + "epoch": 0.21, + "grad_norm": 10.592262363529237, + "learning_rate": 1.9716381062287494e-05, + "loss": 1.6156, + "step": 17250 + }, + { + "epoch": 0.21, + "grad_norm": 5.241805038937897, + "learning_rate": 1.971624290348529e-05, + "loss": 1.8835, + "step": 17253 + }, + { + "epoch": 0.21, + "grad_norm": 15.106739786330214, + "learning_rate": 1.9716104711525043e-05, + "loss": 1.6763, + "step": 17256 + }, + { + "epoch": 0.21, + "grad_norm": 39.61332418220642, + "learning_rate": 1.9715966486407233e-05, + "loss": 1.6941, + "step": 17259 + }, + { + "epoch": 0.21, + "grad_norm": 30.926571416578813, + "learning_rate": 1.971582822813233e-05, + "loss": 1.2969, + "step": 17262 + }, + { + "epoch": 0.21, + "grad_norm": 64.65679491456191, + "learning_rate": 1.9715689936700802e-05, + "loss": 1.9062, + "step": 17265 + }, + { + "epoch": 0.21, + "grad_norm": 6.194511950660203, + "learning_rate": 1.9715551612113124e-05, + "loss": 1.9754, + "step": 17268 + }, + { + "epoch": 0.21, + "grad_norm": 9.595280114460657, + "learning_rate": 1.971541325436977e-05, + "loss": 1.2251, + "step": 17271 + }, + { + "epoch": 0.21, + "grad_norm": 3.7331796067580165, + "learning_rate": 1.971527486347121e-05, + "loss": 1.4201, + "step": 17274 + }, + { + "epoch": 0.21, + "grad_norm": 12.514812562502216, + "learning_rate": 1.9715136439417913e-05, + "loss": 1.4701, + "step": 17277 + }, + { + "epoch": 0.21, + "grad_norm": 40.123067411196324, + "learning_rate": 1.971499798221036e-05, + "loss": 1.7799, + "step": 17280 + }, + { + "epoch": 0.21, + "grad_norm": 24.181403018054183, + "learning_rate": 1.9714859491849015e-05, + "loss": 1.4725, + "step": 17283 + }, + { + "epoch": 0.21, + "grad_norm": 5.397082344186261, + "learning_rate": 1.9714720968334355e-05, + "loss": 1.5317, + "step": 17286 + }, + { + "epoch": 0.21, + "grad_norm": 18.371374390715914, + "learning_rate": 1.9714582411666852e-05, + "loss": 1.5758, + "step": 17289 + }, + { + "epoch": 0.21, + "grad_norm": 24.510852513159747, + "learning_rate": 1.9714443821846975e-05, + "loss": 1.4618, + "step": 17292 + }, + { + "epoch": 0.21, + "grad_norm": 7.754583018302409, + "learning_rate": 1.9714305198875203e-05, + "loss": 1.5183, + "step": 17295 + }, + { + "epoch": 0.21, + "grad_norm": 22.062487151866943, + "learning_rate": 1.9714166542752006e-05, + "loss": 1.4095, + "step": 17298 + }, + { + "epoch": 0.21, + "grad_norm": 16.703452062834415, + "learning_rate": 1.9714027853477857e-05, + "loss": 1.6851, + "step": 17301 + }, + { + "epoch": 0.21, + "grad_norm": 39.983379343777735, + "learning_rate": 1.9713889131053233e-05, + "loss": 1.4897, + "step": 17304 + }, + { + "epoch": 0.21, + "grad_norm": 21.483377847416243, + "learning_rate": 1.97137503754786e-05, + "loss": 1.5741, + "step": 17307 + }, + { + "epoch": 0.21, + "grad_norm": 12.340035191916758, + "learning_rate": 1.9713611586754438e-05, + "loss": 1.9811, + "step": 17310 + }, + { + "epoch": 0.21, + "grad_norm": 14.193173555025375, + "learning_rate": 1.9713472764881218e-05, + "loss": 1.5632, + "step": 17313 + }, + { + "epoch": 0.21, + "grad_norm": 15.41696781375918, + "learning_rate": 1.9713333909859417e-05, + "loss": 1.4924, + "step": 17316 + }, + { + "epoch": 0.21, + "grad_norm": 20.337128943761996, + "learning_rate": 1.97131950216895e-05, + "loss": 1.6831, + "step": 17319 + }, + { + "epoch": 0.21, + "grad_norm": 13.577707870702415, + "learning_rate": 1.9713056100371953e-05, + "loss": 1.6332, + "step": 17322 + }, + { + "epoch": 0.21, + "grad_norm": 11.281603820659459, + "learning_rate": 1.971291714590724e-05, + "loss": 1.5917, + "step": 17325 + }, + { + "epoch": 0.21, + "grad_norm": 7.54548111207957, + "learning_rate": 1.9712778158295843e-05, + "loss": 1.225, + "step": 17328 + }, + { + "epoch": 0.21, + "grad_norm": 11.762562625088021, + "learning_rate": 1.971263913753823e-05, + "loss": 1.457, + "step": 17331 + }, + { + "epoch": 0.21, + "grad_norm": 10.45976872452204, + "learning_rate": 1.971250008363488e-05, + "loss": 1.3904, + "step": 17334 + }, + { + "epoch": 0.21, + "grad_norm": 44.04969697124846, + "learning_rate": 1.9712360996586262e-05, + "loss": 1.8424, + "step": 17337 + }, + { + "epoch": 0.21, + "grad_norm": 21.617166410311285, + "learning_rate": 1.9712221876392856e-05, + "loss": 2.0262, + "step": 17340 + }, + { + "epoch": 0.21, + "grad_norm": 13.071122890556373, + "learning_rate": 1.9712082723055138e-05, + "loss": 1.5324, + "step": 17343 + }, + { + "epoch": 0.21, + "grad_norm": 17.588211276379777, + "learning_rate": 1.9711943536573577e-05, + "loss": 1.5776, + "step": 17346 + }, + { + "epoch": 0.21, + "grad_norm": 11.342049925959268, + "learning_rate": 1.9711804316948654e-05, + "loss": 1.8405, + "step": 17349 + }, + { + "epoch": 0.21, + "grad_norm": 23.838239476356858, + "learning_rate": 1.971166506418084e-05, + "loss": 1.4115, + "step": 17352 + }, + { + "epoch": 0.21, + "grad_norm": 11.895700712513733, + "learning_rate": 1.9711525778270607e-05, + "loss": 2.0406, + "step": 17355 + }, + { + "epoch": 0.21, + "grad_norm": 27.92186008628867, + "learning_rate": 1.971138645921844e-05, + "loss": 1.4934, + "step": 17358 + }, + { + "epoch": 0.21, + "grad_norm": 14.780302990830686, + "learning_rate": 1.9711247107024806e-05, + "loss": 1.3991, + "step": 17361 + }, + { + "epoch": 0.21, + "grad_norm": 24.214343103140855, + "learning_rate": 1.9711107721690185e-05, + "loss": 1.5277, + "step": 17364 + }, + { + "epoch": 0.21, + "grad_norm": 51.875497629789486, + "learning_rate": 1.9710968303215048e-05, + "loss": 1.6618, + "step": 17367 + }, + { + "epoch": 0.21, + "grad_norm": 64.71782708792105, + "learning_rate": 1.9710828851599875e-05, + "loss": 1.428, + "step": 17370 + }, + { + "epoch": 0.21, + "grad_norm": 18.67240561002065, + "learning_rate": 1.9710689366845142e-05, + "loss": 1.6984, + "step": 17373 + }, + { + "epoch": 0.21, + "grad_norm": 14.078011018462162, + "learning_rate": 1.971054984895133e-05, + "loss": 1.3881, + "step": 17376 + }, + { + "epoch": 0.21, + "grad_norm": 39.83546698818985, + "learning_rate": 1.97104102979189e-05, + "loss": 1.717, + "step": 17379 + }, + { + "epoch": 0.21, + "grad_norm": 22.16370077085048, + "learning_rate": 1.971027071374834e-05, + "loss": 1.572, + "step": 17382 + }, + { + "epoch": 0.21, + "grad_norm": 5.4201377600783, + "learning_rate": 1.971013109644013e-05, + "loss": 1.5697, + "step": 17385 + }, + { + "epoch": 0.21, + "grad_norm": 21.735076460961324, + "learning_rate": 1.970999144599473e-05, + "loss": 1.8704, + "step": 17388 + }, + { + "epoch": 0.21, + "grad_norm": 21.214446294292706, + "learning_rate": 1.9709851762412633e-05, + "loss": 1.6806, + "step": 17391 + }, + { + "epoch": 0.21, + "grad_norm": 7.954934866302662, + "learning_rate": 1.970971204569431e-05, + "loss": 1.606, + "step": 17394 + }, + { + "epoch": 0.21, + "grad_norm": 29.73287319450602, + "learning_rate": 1.9709572295840235e-05, + "loss": 1.591, + "step": 17397 + }, + { + "epoch": 0.21, + "grad_norm": 26.11395828246043, + "learning_rate": 1.9709432512850886e-05, + "loss": 1.6694, + "step": 17400 + }, + { + "epoch": 0.21, + "grad_norm": 22.862759725833822, + "learning_rate": 1.970929269672674e-05, + "loss": 1.8058, + "step": 17403 + }, + { + "epoch": 0.21, + "grad_norm": 7.316809925856549, + "learning_rate": 1.9709152847468278e-05, + "loss": 1.6979, + "step": 17406 + }, + { + "epoch": 0.21, + "grad_norm": 14.572185054300292, + "learning_rate": 1.9709012965075974e-05, + "loss": 1.5538, + "step": 17409 + }, + { + "epoch": 0.21, + "grad_norm": 26.530900017333927, + "learning_rate": 1.970887304955031e-05, + "loss": 1.481, + "step": 17412 + }, + { + "epoch": 0.21, + "grad_norm": 5.067973884311703, + "learning_rate": 1.9708733100891755e-05, + "loss": 1.1558, + "step": 17415 + }, + { + "epoch": 0.21, + "grad_norm": 8.17181350851647, + "learning_rate": 1.970859311910079e-05, + "loss": 1.2187, + "step": 17418 + }, + { + "epoch": 0.21, + "grad_norm": 12.506060515782572, + "learning_rate": 1.9708453104177894e-05, + "loss": 2.0874, + "step": 17421 + }, + { + "epoch": 0.21, + "grad_norm": 13.868031516035773, + "learning_rate": 1.9708313056123547e-05, + "loss": 1.5242, + "step": 17424 + }, + { + "epoch": 0.21, + "grad_norm": 17.227845671335395, + "learning_rate": 1.9708172974938225e-05, + "loss": 1.2513, + "step": 17427 + }, + { + "epoch": 0.21, + "grad_norm": 22.35854553527526, + "learning_rate": 1.9708032860622404e-05, + "loss": 1.518, + "step": 17430 + }, + { + "epoch": 0.21, + "grad_norm": 12.43291630316476, + "learning_rate": 1.970789271317656e-05, + "loss": 1.417, + "step": 17433 + }, + { + "epoch": 0.21, + "grad_norm": 13.783888133939689, + "learning_rate": 1.970775253260118e-05, + "loss": 1.5154, + "step": 17436 + }, + { + "epoch": 0.21, + "grad_norm": 19.516425961737788, + "learning_rate": 1.9707612318896738e-05, + "loss": 1.5144, + "step": 17439 + }, + { + "epoch": 0.21, + "grad_norm": 6.951486198979202, + "learning_rate": 1.9707472072063707e-05, + "loss": 1.9696, + "step": 17442 + }, + { + "epoch": 0.21, + "grad_norm": 6.715223689202266, + "learning_rate": 1.9707331792102574e-05, + "loss": 1.5335, + "step": 17445 + }, + { + "epoch": 0.21, + "grad_norm": 15.328013365473025, + "learning_rate": 1.9707191479013813e-05, + "loss": 1.3678, + "step": 17448 + }, + { + "epoch": 0.21, + "grad_norm": 32.213132113434064, + "learning_rate": 1.9707051132797902e-05, + "loss": 1.6038, + "step": 17451 + }, + { + "epoch": 0.21, + "grad_norm": 8.821919167949055, + "learning_rate": 1.9706910753455326e-05, + "loss": 1.5244, + "step": 17454 + }, + { + "epoch": 0.21, + "grad_norm": 5.486619478705653, + "learning_rate": 1.9706770340986557e-05, + "loss": 1.519, + "step": 17457 + }, + { + "epoch": 0.21, + "grad_norm": 9.14527319764249, + "learning_rate": 1.970662989539208e-05, + "loss": 1.861, + "step": 17460 + }, + { + "epoch": 0.21, + "grad_norm": 6.292780287113117, + "learning_rate": 1.970648941667237e-05, + "loss": 1.7832, + "step": 17463 + }, + { + "epoch": 0.21, + "grad_norm": 13.335569159328905, + "learning_rate": 1.970634890482791e-05, + "loss": 1.8247, + "step": 17466 + }, + { + "epoch": 0.21, + "grad_norm": 19.07524181784392, + "learning_rate": 1.9706208359859172e-05, + "loss": 1.3976, + "step": 17469 + }, + { + "epoch": 0.21, + "grad_norm": 13.660675392844633, + "learning_rate": 1.9706067781766644e-05, + "loss": 1.6931, + "step": 17472 + }, + { + "epoch": 0.21, + "grad_norm": 19.009384794447723, + "learning_rate": 1.9705927170550803e-05, + "loss": 1.7461, + "step": 17475 + }, + { + "epoch": 0.21, + "grad_norm": 16.50015834438547, + "learning_rate": 1.970578652621213e-05, + "loss": 1.4325, + "step": 17478 + }, + { + "epoch": 0.21, + "grad_norm": 22.353874200274912, + "learning_rate": 1.97056458487511e-05, + "loss": 2.2503, + "step": 17481 + }, + { + "epoch": 0.21, + "grad_norm": 5.33199257087213, + "learning_rate": 1.9705505138168197e-05, + "loss": 1.1362, + "step": 17484 + }, + { + "epoch": 0.21, + "grad_norm": 5.002750389093317, + "learning_rate": 1.9705364394463905e-05, + "loss": 1.6195, + "step": 17487 + }, + { + "epoch": 0.21, + "grad_norm": 35.28802056462202, + "learning_rate": 1.9705223617638697e-05, + "loss": 1.306, + "step": 17490 + }, + { + "epoch": 0.21, + "grad_norm": 4.28669933980142, + "learning_rate": 1.970508280769306e-05, + "loss": 1.4078, + "step": 17493 + }, + { + "epoch": 0.21, + "grad_norm": 19.034158080264458, + "learning_rate": 1.9704941964627467e-05, + "loss": 1.5808, + "step": 17496 + }, + { + "epoch": 0.21, + "grad_norm": 22.617066426406588, + "learning_rate": 1.9704801088442404e-05, + "loss": 1.8626, + "step": 17499 + }, + { + "epoch": 0.21, + "grad_norm": 11.25146715616436, + "learning_rate": 1.970466017913835e-05, + "loss": 1.7184, + "step": 17502 + }, + { + "epoch": 0.21, + "grad_norm": 4.82483338247116, + "learning_rate": 1.9704519236715788e-05, + "loss": 1.5859, + "step": 17505 + }, + { + "epoch": 0.21, + "grad_norm": 34.939005691538654, + "learning_rate": 1.9704378261175193e-05, + "loss": 1.7759, + "step": 17508 + }, + { + "epoch": 0.21, + "grad_norm": 27.843555448469324, + "learning_rate": 1.9704237252517057e-05, + "loss": 1.8283, + "step": 17511 + }, + { + "epoch": 0.21, + "grad_norm": 8.763017771592086, + "learning_rate": 1.970409621074185e-05, + "loss": 1.5916, + "step": 17514 + }, + { + "epoch": 0.21, + "grad_norm": 33.172253097676375, + "learning_rate": 1.970395513585006e-05, + "loss": 1.604, + "step": 17517 + }, + { + "epoch": 0.21, + "grad_norm": 14.648512924075916, + "learning_rate": 1.9703814027842167e-05, + "loss": 1.7712, + "step": 17520 + }, + { + "epoch": 0.21, + "grad_norm": 6.984738738960741, + "learning_rate": 1.9703672886718652e-05, + "loss": 1.3861, + "step": 17523 + }, + { + "epoch": 0.21, + "grad_norm": 6.6499816930312265, + "learning_rate": 1.970353171248e-05, + "loss": 1.8256, + "step": 17526 + }, + { + "epoch": 0.21, + "grad_norm": 11.315478637282341, + "learning_rate": 1.970339050512668e-05, + "loss": 1.4836, + "step": 17529 + }, + { + "epoch": 0.21, + "grad_norm": 11.206347968189796, + "learning_rate": 1.970324926465919e-05, + "loss": 1.44, + "step": 17532 + }, + { + "epoch": 0.21, + "grad_norm": 8.764766513919524, + "learning_rate": 1.9703107991078005e-05, + "loss": 1.768, + "step": 17535 + }, + { + "epoch": 0.21, + "grad_norm": 34.710687039621625, + "learning_rate": 1.9702966684383605e-05, + "loss": 1.4208, + "step": 17538 + }, + { + "epoch": 0.21, + "grad_norm": 6.590001567343801, + "learning_rate": 1.9702825344576478e-05, + "loss": 2.313, + "step": 17541 + }, + { + "epoch": 0.21, + "grad_norm": 17.12921206201177, + "learning_rate": 1.9702683971657102e-05, + "loss": 1.7389, + "step": 17544 + }, + { + "epoch": 0.21, + "grad_norm": 34.50664770303372, + "learning_rate": 1.970254256562596e-05, + "loss": 1.7306, + "step": 17547 + }, + { + "epoch": 0.21, + "grad_norm": 14.03161358115068, + "learning_rate": 1.9702401126483536e-05, + "loss": 1.8112, + "step": 17550 + }, + { + "epoch": 0.21, + "grad_norm": 50.37898285267546, + "learning_rate": 1.970225965423031e-05, + "loss": 1.7142, + "step": 17553 + }, + { + "epoch": 0.21, + "grad_norm": 33.59748407017566, + "learning_rate": 1.9702118148866764e-05, + "loss": 1.9069, + "step": 17556 + }, + { + "epoch": 0.21, + "grad_norm": 18.546436733921613, + "learning_rate": 1.970197661039339e-05, + "loss": 1.4057, + "step": 17559 + }, + { + "epoch": 0.21, + "grad_norm": 15.188771165971415, + "learning_rate": 1.970183503881066e-05, + "loss": 1.827, + "step": 17562 + }, + { + "epoch": 0.21, + "grad_norm": 51.96123103117468, + "learning_rate": 1.9701693434119062e-05, + "loss": 1.4825, + "step": 17565 + }, + { + "epoch": 0.21, + "grad_norm": 6.349816879142397, + "learning_rate": 1.970155179631908e-05, + "loss": 1.6082, + "step": 17568 + }, + { + "epoch": 0.21, + "grad_norm": 56.38883465190061, + "learning_rate": 1.9701410125411195e-05, + "loss": 1.5845, + "step": 17571 + }, + { + "epoch": 0.21, + "grad_norm": 12.366831888429871, + "learning_rate": 1.9701268421395892e-05, + "loss": 1.7133, + "step": 17574 + }, + { + "epoch": 0.21, + "grad_norm": 11.836111354612772, + "learning_rate": 1.9701126684273653e-05, + "loss": 1.6339, + "step": 17577 + }, + { + "epoch": 0.21, + "grad_norm": 20.528195792005537, + "learning_rate": 1.970098491404497e-05, + "loss": 1.7235, + "step": 17580 + }, + { + "epoch": 0.21, + "grad_norm": 24.293071818319678, + "learning_rate": 1.9700843110710314e-05, + "loss": 1.577, + "step": 17583 + }, + { + "epoch": 0.21, + "grad_norm": 16.254645599038405, + "learning_rate": 1.970070127427017e-05, + "loss": 1.8476, + "step": 17586 + }, + { + "epoch": 0.21, + "grad_norm": 31.907424198293015, + "learning_rate": 1.9700559404725034e-05, + "loss": 2.0243, + "step": 17589 + }, + { + "epoch": 0.21, + "grad_norm": 20.168967788069974, + "learning_rate": 1.9700417502075383e-05, + "loss": 1.9885, + "step": 17592 + }, + { + "epoch": 0.21, + "grad_norm": 9.475399953434302, + "learning_rate": 1.97002755663217e-05, + "loss": 1.4477, + "step": 17595 + }, + { + "epoch": 0.21, + "grad_norm": 92.26981459181144, + "learning_rate": 1.970013359746447e-05, + "loss": 1.5112, + "step": 17598 + }, + { + "epoch": 0.21, + "grad_norm": 10.015546615948043, + "learning_rate": 1.9699991595504176e-05, + "loss": 1.5769, + "step": 17601 + }, + { + "epoch": 0.21, + "grad_norm": 19.928501119510326, + "learning_rate": 1.9699849560441306e-05, + "loss": 1.0988, + "step": 17604 + }, + { + "epoch": 0.21, + "grad_norm": 70.14695490453268, + "learning_rate": 1.9699707492276345e-05, + "loss": 1.1761, + "step": 17607 + }, + { + "epoch": 0.21, + "grad_norm": 17.511554562070497, + "learning_rate": 1.9699565391009773e-05, + "loss": 1.5103, + "step": 17610 + }, + { + "epoch": 0.21, + "grad_norm": 14.619408242466342, + "learning_rate": 1.969942325664208e-05, + "loss": 1.3066, + "step": 17613 + }, + { + "epoch": 0.21, + "grad_norm": 20.26961077676283, + "learning_rate": 1.9699281089173746e-05, + "loss": 1.4478, + "step": 17616 + }, + { + "epoch": 0.21, + "grad_norm": 43.41936941574535, + "learning_rate": 1.9699138888605263e-05, + "loss": 1.6735, + "step": 17619 + }, + { + "epoch": 0.21, + "grad_norm": 21.821842388209202, + "learning_rate": 1.969899665493711e-05, + "loss": 1.5295, + "step": 17622 + }, + { + "epoch": 0.21, + "grad_norm": 10.954770588932812, + "learning_rate": 1.9698854388169778e-05, + "loss": 1.2918, + "step": 17625 + }, + { + "epoch": 0.21, + "grad_norm": 10.41329987039192, + "learning_rate": 1.969871208830375e-05, + "loss": 1.7657, + "step": 17628 + }, + { + "epoch": 0.21, + "grad_norm": 32.5461769701355, + "learning_rate": 1.9698569755339506e-05, + "loss": 1.3738, + "step": 17631 + }, + { + "epoch": 0.21, + "grad_norm": 11.595808662139133, + "learning_rate": 1.969842738927754e-05, + "loss": 1.5223, + "step": 17634 + }, + { + "epoch": 0.21, + "grad_norm": 78.82124835524704, + "learning_rate": 1.9698284990118334e-05, + "loss": 1.9646, + "step": 17637 + }, + { + "epoch": 0.21, + "grad_norm": 171.81579023334257, + "learning_rate": 1.9698142557862376e-05, + "loss": 1.5802, + "step": 17640 + }, + { + "epoch": 0.21, + "grad_norm": 46.23414325272899, + "learning_rate": 1.969800009251015e-05, + "loss": 1.5687, + "step": 17643 + }, + { + "epoch": 0.21, + "grad_norm": 4.954150483903673, + "learning_rate": 1.9697857594062145e-05, + "loss": 1.7054, + "step": 17646 + }, + { + "epoch": 0.21, + "grad_norm": 7.080261562753654, + "learning_rate": 1.969771506251884e-05, + "loss": 1.255, + "step": 17649 + }, + { + "epoch": 0.21, + "grad_norm": 8.550498571776322, + "learning_rate": 1.969757249788073e-05, + "loss": 1.5789, + "step": 17652 + }, + { + "epoch": 0.21, + "grad_norm": 9.837752434072938, + "learning_rate": 1.9697429900148298e-05, + "loss": 1.4985, + "step": 17655 + }, + { + "epoch": 0.21, + "grad_norm": 6.042090770617043, + "learning_rate": 1.9697287269322033e-05, + "loss": 1.2862, + "step": 17658 + }, + { + "epoch": 0.21, + "grad_norm": 9.01041765012481, + "learning_rate": 1.9697144605402415e-05, + "loss": 1.2934, + "step": 17661 + }, + { + "epoch": 0.21, + "grad_norm": 33.00212366934233, + "learning_rate": 1.9697001908389938e-05, + "loss": 1.8051, + "step": 17664 + }, + { + "epoch": 0.21, + "grad_norm": 6.564980126652379, + "learning_rate": 1.9696859178285086e-05, + "loss": 1.4167, + "step": 17667 + }, + { + "epoch": 0.21, + "grad_norm": 8.985197632617547, + "learning_rate": 1.9696716415088344e-05, + "loss": 1.5707, + "step": 17670 + }, + { + "epoch": 0.21, + "grad_norm": 10.374872217099282, + "learning_rate": 1.9696573618800204e-05, + "loss": 1.5443, + "step": 17673 + }, + { + "epoch": 0.21, + "grad_norm": 102.03579876451396, + "learning_rate": 1.9696430789421153e-05, + "loss": 1.4059, + "step": 17676 + }, + { + "epoch": 0.21, + "grad_norm": 13.349440342126258, + "learning_rate": 1.9696287926951676e-05, + "loss": 1.6475, + "step": 17679 + }, + { + "epoch": 0.21, + "grad_norm": 22.14538084667953, + "learning_rate": 1.9696145031392256e-05, + "loss": 1.3825, + "step": 17682 + }, + { + "epoch": 0.21, + "grad_norm": 11.51027487262647, + "learning_rate": 1.969600210274339e-05, + "loss": 1.747, + "step": 17685 + }, + { + "epoch": 0.21, + "grad_norm": 43.5126777181927, + "learning_rate": 1.969585914100556e-05, + "loss": 1.2173, + "step": 17688 + }, + { + "epoch": 0.21, + "grad_norm": 5.271340459094806, + "learning_rate": 1.9695716146179254e-05, + "loss": 1.5662, + "step": 17691 + }, + { + "epoch": 0.21, + "grad_norm": 34.87458714112308, + "learning_rate": 1.9695573118264964e-05, + "loss": 1.4837, + "step": 17694 + }, + { + "epoch": 0.21, + "grad_norm": 12.833112219773948, + "learning_rate": 1.969543005726317e-05, + "loss": 1.7225, + "step": 17697 + }, + { + "epoch": 0.21, + "grad_norm": 6.24087320793052, + "learning_rate": 1.969528696317437e-05, + "loss": 1.6484, + "step": 17700 + }, + { + "epoch": 0.21, + "grad_norm": 13.511115778149588, + "learning_rate": 1.9695143835999047e-05, + "loss": 1.6367, + "step": 17703 + }, + { + "epoch": 0.21, + "grad_norm": 7.793801830835127, + "learning_rate": 1.969500067573769e-05, + "loss": 1.1283, + "step": 17706 + }, + { + "epoch": 0.21, + "grad_norm": 15.225407644541379, + "learning_rate": 1.969485748239079e-05, + "loss": 1.4784, + "step": 17709 + }, + { + "epoch": 0.21, + "grad_norm": 64.85146741542567, + "learning_rate": 1.969471425595883e-05, + "loss": 1.557, + "step": 17712 + }, + { + "epoch": 0.21, + "grad_norm": 29.610194030416192, + "learning_rate": 1.9694570996442303e-05, + "loss": 1.4453, + "step": 17715 + }, + { + "epoch": 0.21, + "grad_norm": 28.764272249075375, + "learning_rate": 1.96944277038417e-05, + "loss": 1.7184, + "step": 17718 + }, + { + "epoch": 0.21, + "grad_norm": 13.169930065956796, + "learning_rate": 1.9694284378157502e-05, + "loss": 1.7218, + "step": 17721 + }, + { + "epoch": 0.21, + "grad_norm": 41.72640217517186, + "learning_rate": 1.9694141019390208e-05, + "loss": 2.0949, + "step": 17724 + }, + { + "epoch": 0.21, + "grad_norm": 18.198923016740558, + "learning_rate": 1.96939976275403e-05, + "loss": 1.6458, + "step": 17727 + }, + { + "epoch": 0.21, + "grad_norm": 14.58167749174284, + "learning_rate": 1.969385420260827e-05, + "loss": 1.6838, + "step": 17730 + }, + { + "epoch": 0.21, + "grad_norm": 7.790642026845825, + "learning_rate": 1.9693710744594607e-05, + "loss": 1.5426, + "step": 17733 + }, + { + "epoch": 0.21, + "grad_norm": 6.556846153750756, + "learning_rate": 1.9693567253499804e-05, + "loss": 1.3854, + "step": 17736 + }, + { + "epoch": 0.21, + "grad_norm": 17.381584703719625, + "learning_rate": 1.9693423729324346e-05, + "loss": 1.6684, + "step": 17739 + }, + { + "epoch": 0.21, + "grad_norm": 18.481918335089453, + "learning_rate": 1.969328017206872e-05, + "loss": 1.6484, + "step": 17742 + }, + { + "epoch": 0.21, + "grad_norm": 4.006308093312657, + "learning_rate": 1.9693136581733427e-05, + "loss": 1.178, + "step": 17745 + }, + { + "epoch": 0.21, + "grad_norm": 43.02369666958689, + "learning_rate": 1.9692992958318945e-05, + "loss": 1.5863, + "step": 17748 + }, + { + "epoch": 0.21, + "grad_norm": 6.914861134366925, + "learning_rate": 1.9692849301825774e-05, + "loss": 1.7655, + "step": 17751 + }, + { + "epoch": 0.21, + "grad_norm": 50.928098263333816, + "learning_rate": 1.9692705612254397e-05, + "loss": 1.8638, + "step": 17754 + }, + { + "epoch": 0.21, + "grad_norm": 8.935030059776233, + "learning_rate": 1.9692561889605306e-05, + "loss": 1.9569, + "step": 17757 + }, + { + "epoch": 0.21, + "grad_norm": 54.880569496360536, + "learning_rate": 1.9692418133878996e-05, + "loss": 1.6632, + "step": 17760 + }, + { + "epoch": 0.21, + "grad_norm": 16.703999414406667, + "learning_rate": 1.969227434507595e-05, + "loss": 1.5276, + "step": 17763 + }, + { + "epoch": 0.21, + "grad_norm": 16.395016167424785, + "learning_rate": 1.9692130523196666e-05, + "loss": 1.5111, + "step": 17766 + }, + { + "epoch": 0.21, + "grad_norm": 7.364782365737531, + "learning_rate": 1.969198666824163e-05, + "loss": 1.1546, + "step": 17769 + }, + { + "epoch": 0.21, + "grad_norm": 4.180662849982251, + "learning_rate": 1.9691842780211336e-05, + "loss": 1.514, + "step": 17772 + }, + { + "epoch": 0.21, + "grad_norm": 64.70780049992693, + "learning_rate": 1.969169885910627e-05, + "loss": 1.6573, + "step": 17775 + }, + { + "epoch": 0.21, + "grad_norm": 4.8596409299612, + "learning_rate": 1.969155490492693e-05, + "loss": 1.5907, + "step": 17778 + }, + { + "epoch": 0.21, + "grad_norm": 9.3865373411197, + "learning_rate": 1.9691410917673804e-05, + "loss": 1.5266, + "step": 17781 + }, + { + "epoch": 0.21, + "grad_norm": 38.07454885812553, + "learning_rate": 1.9691266897347385e-05, + "loss": 1.4937, + "step": 17784 + }, + { + "epoch": 0.21, + "grad_norm": 20.573763582304462, + "learning_rate": 1.9691122843948157e-05, + "loss": 1.5045, + "step": 17787 + }, + { + "epoch": 0.21, + "grad_norm": 4.249050922766451, + "learning_rate": 1.9690978757476623e-05, + "loss": 1.6523, + "step": 17790 + }, + { + "epoch": 0.21, + "grad_norm": 27.963422373204065, + "learning_rate": 1.9690834637933267e-05, + "loss": 1.5732, + "step": 17793 + }, + { + "epoch": 0.21, + "grad_norm": 9.183661459152843, + "learning_rate": 1.9690690485318583e-05, + "loss": 1.6635, + "step": 17796 + }, + { + "epoch": 0.21, + "grad_norm": 120.2137099244689, + "learning_rate": 1.9690546299633063e-05, + "loss": 1.787, + "step": 17799 + }, + { + "epoch": 0.21, + "grad_norm": 14.884553318972799, + "learning_rate": 1.96904020808772e-05, + "loss": 1.4373, + "step": 17802 + }, + { + "epoch": 0.21, + "grad_norm": 70.432064698699, + "learning_rate": 1.9690257829051484e-05, + "loss": 1.3262, + "step": 17805 + }, + { + "epoch": 0.21, + "grad_norm": 26.497421550601743, + "learning_rate": 1.9690113544156407e-05, + "loss": 1.5856, + "step": 17808 + }, + { + "epoch": 0.21, + "grad_norm": 7.418948673292894, + "learning_rate": 1.9689969226192466e-05, + "loss": 1.3875, + "step": 17811 + }, + { + "epoch": 0.21, + "grad_norm": 3.8155851028437704, + "learning_rate": 1.9689824875160148e-05, + "loss": 1.5951, + "step": 17814 + }, + { + "epoch": 0.21, + "grad_norm": 47.2923456858199, + "learning_rate": 1.968968049105995e-05, + "loss": 1.5037, + "step": 17817 + }, + { + "epoch": 0.21, + "grad_norm": 9.578521196773796, + "learning_rate": 1.9689536073892363e-05, + "loss": 1.2269, + "step": 17820 + }, + { + "epoch": 0.21, + "grad_norm": 23.96144971744848, + "learning_rate": 1.9689391623657877e-05, + "loss": 1.4518, + "step": 17823 + }, + { + "epoch": 0.21, + "grad_norm": 39.996459083783385, + "learning_rate": 1.9689247140356988e-05, + "loss": 1.7499, + "step": 17826 + }, + { + "epoch": 0.21, + "grad_norm": 29.824238134707578, + "learning_rate": 1.9689102623990188e-05, + "loss": 1.508, + "step": 17829 + }, + { + "epoch": 0.21, + "grad_norm": 20.62430845878097, + "learning_rate": 1.9688958074557974e-05, + "loss": 1.5514, + "step": 17832 + }, + { + "epoch": 0.21, + "grad_norm": 8.216346557166032, + "learning_rate": 1.9688813492060832e-05, + "loss": 1.6464, + "step": 17835 + }, + { + "epoch": 0.21, + "grad_norm": 15.340401606118272, + "learning_rate": 1.968866887649926e-05, + "loss": 1.3389, + "step": 17838 + }, + { + "epoch": 0.21, + "grad_norm": 24.096841701859702, + "learning_rate": 1.9688524227873756e-05, + "loss": 1.2119, + "step": 17841 + }, + { + "epoch": 0.21, + "grad_norm": 10.123126961657555, + "learning_rate": 1.96883795461848e-05, + "loss": 1.4465, + "step": 17844 + }, + { + "epoch": 0.21, + "grad_norm": 8.17494826493368, + "learning_rate": 1.9688234831432903e-05, + "loss": 1.4261, + "step": 17847 + }, + { + "epoch": 0.21, + "grad_norm": 36.17269364608647, + "learning_rate": 1.968809008361855e-05, + "loss": 1.9079, + "step": 17850 + }, + { + "epoch": 0.21, + "grad_norm": 47.21591687136061, + "learning_rate": 1.9687945302742228e-05, + "loss": 1.3682, + "step": 17853 + }, + { + "epoch": 0.21, + "grad_norm": 6.3384414389035255, + "learning_rate": 1.968780048880444e-05, + "loss": 1.8231, + "step": 17856 + }, + { + "epoch": 0.21, + "grad_norm": 22.83144917009869, + "learning_rate": 1.968765564180568e-05, + "loss": 1.635, + "step": 17859 + }, + { + "epoch": 0.21, + "grad_norm": 28.73857716121015, + "learning_rate": 1.968751076174644e-05, + "loss": 1.6281, + "step": 17862 + }, + { + "epoch": 0.21, + "grad_norm": 6.803129426663583, + "learning_rate": 1.9687365848627217e-05, + "loss": 1.4003, + "step": 17865 + }, + { + "epoch": 0.21, + "grad_norm": 10.834367067200056, + "learning_rate": 1.9687220902448504e-05, + "loss": 1.4788, + "step": 17868 + }, + { + "epoch": 0.21, + "grad_norm": 12.914323736845379, + "learning_rate": 1.9687075923210796e-05, + "loss": 1.6695, + "step": 17871 + }, + { + "epoch": 0.21, + "grad_norm": 36.490730345675075, + "learning_rate": 1.9686930910914585e-05, + "loss": 1.7092, + "step": 17874 + }, + { + "epoch": 0.21, + "grad_norm": 33.18112389877077, + "learning_rate": 1.9686785865560368e-05, + "loss": 1.5035, + "step": 17877 + }, + { + "epoch": 0.22, + "grad_norm": 9.058313469343906, + "learning_rate": 1.968664078714864e-05, + "loss": 1.4425, + "step": 17880 + }, + { + "epoch": 0.22, + "grad_norm": 7.521161822096803, + "learning_rate": 1.9686495675679898e-05, + "loss": 1.5669, + "step": 17883 + }, + { + "epoch": 0.22, + "grad_norm": 33.56129704417877, + "learning_rate": 1.9686350531154634e-05, + "loss": 1.4812, + "step": 17886 + }, + { + "epoch": 0.22, + "grad_norm": 13.333319764892785, + "learning_rate": 1.9686205353573346e-05, + "loss": 1.8055, + "step": 17889 + }, + { + "epoch": 0.22, + "grad_norm": 20.723466232924476, + "learning_rate": 1.9686060142936524e-05, + "loss": 1.4741, + "step": 17892 + }, + { + "epoch": 0.22, + "grad_norm": 13.65380944456095, + "learning_rate": 1.9685914899244674e-05, + "loss": 1.5391, + "step": 17895 + }, + { + "epoch": 0.22, + "grad_norm": 10.525565065907493, + "learning_rate": 1.968576962249828e-05, + "loss": 1.9633, + "step": 17898 + }, + { + "epoch": 0.22, + "grad_norm": 10.615945626093232, + "learning_rate": 1.9685624312697844e-05, + "loss": 1.6018, + "step": 17901 + }, + { + "epoch": 0.22, + "grad_norm": 77.34821713643953, + "learning_rate": 1.968547896984386e-05, + "loss": 1.3999, + "step": 17904 + }, + { + "epoch": 0.22, + "grad_norm": 5.792330196895288, + "learning_rate": 1.968533359393683e-05, + "loss": 1.3727, + "step": 17907 + }, + { + "epoch": 0.22, + "grad_norm": 9.006233139719834, + "learning_rate": 1.968518818497724e-05, + "loss": 1.6147, + "step": 17910 + }, + { + "epoch": 0.22, + "grad_norm": 11.337819028735492, + "learning_rate": 1.9685042742965595e-05, + "loss": 1.3464, + "step": 17913 + }, + { + "epoch": 0.22, + "grad_norm": 14.993529164495035, + "learning_rate": 1.9684897267902386e-05, + "loss": 1.8047, + "step": 17916 + }, + { + "epoch": 0.22, + "grad_norm": 5.3793963510021925, + "learning_rate": 1.968475175978811e-05, + "loss": 1.4795, + "step": 17919 + }, + { + "epoch": 0.22, + "grad_norm": 11.287876798216493, + "learning_rate": 1.9684606218623267e-05, + "loss": 1.5113, + "step": 17922 + }, + { + "epoch": 0.22, + "grad_norm": 5.249669513815246, + "learning_rate": 1.9684460644408346e-05, + "loss": 1.4792, + "step": 17925 + }, + { + "epoch": 0.22, + "grad_norm": 7.305429546686674, + "learning_rate": 1.9684315037143857e-05, + "loss": 1.6572, + "step": 17928 + }, + { + "epoch": 0.22, + "grad_norm": 15.672745601261399, + "learning_rate": 1.9684169396830283e-05, + "loss": 1.1505, + "step": 17931 + }, + { + "epoch": 0.22, + "grad_norm": 31.150190170220515, + "learning_rate": 1.968402372346813e-05, + "loss": 1.5858, + "step": 17934 + }, + { + "epoch": 0.22, + "grad_norm": 14.156841477301814, + "learning_rate": 1.968387801705789e-05, + "loss": 1.474, + "step": 17937 + }, + { + "epoch": 0.22, + "grad_norm": 8.996231716235455, + "learning_rate": 1.9683732277600066e-05, + "loss": 1.6212, + "step": 17940 + }, + { + "epoch": 0.22, + "grad_norm": 17.55610802330796, + "learning_rate": 1.9683586505095146e-05, + "loss": 1.3894, + "step": 17943 + }, + { + "epoch": 0.22, + "grad_norm": 5.987224659351771, + "learning_rate": 1.9683440699543638e-05, + "loss": 2.0818, + "step": 17946 + }, + { + "epoch": 0.22, + "grad_norm": 12.964977638209406, + "learning_rate": 1.9683294860946035e-05, + "loss": 1.5866, + "step": 17949 + }, + { + "epoch": 0.22, + "grad_norm": 7.416648356733235, + "learning_rate": 1.9683148989302833e-05, + "loss": 1.6008, + "step": 17952 + }, + { + "epoch": 0.22, + "grad_norm": 43.581212095460614, + "learning_rate": 1.9683003084614532e-05, + "loss": 1.6742, + "step": 17955 + }, + { + "epoch": 0.22, + "grad_norm": 8.78548111003524, + "learning_rate": 1.968285714688163e-05, + "loss": 1.7179, + "step": 17958 + }, + { + "epoch": 0.22, + "grad_norm": 24.45636964174905, + "learning_rate": 1.968271117610462e-05, + "loss": 1.7649, + "step": 17961 + }, + { + "epoch": 0.22, + "grad_norm": 8.446733034263689, + "learning_rate": 1.968256517228401e-05, + "loss": 1.4281, + "step": 17964 + }, + { + "epoch": 0.22, + "grad_norm": 42.091961155425366, + "learning_rate": 1.968241913542029e-05, + "loss": 1.7494, + "step": 17967 + }, + { + "epoch": 0.22, + "grad_norm": 12.66264153817866, + "learning_rate": 1.9682273065513962e-05, + "loss": 1.3568, + "step": 17970 + }, + { + "epoch": 0.22, + "grad_norm": 11.25920279759231, + "learning_rate": 1.9682126962565524e-05, + "loss": 1.5738, + "step": 17973 + }, + { + "epoch": 0.22, + "grad_norm": 30.30418382059816, + "learning_rate": 1.9681980826575473e-05, + "loss": 1.3994, + "step": 17976 + }, + { + "epoch": 0.22, + "grad_norm": 12.460490448413726, + "learning_rate": 1.9681834657544306e-05, + "loss": 1.9386, + "step": 17979 + }, + { + "epoch": 0.22, + "grad_norm": 55.74850428718952, + "learning_rate": 1.968168845547253e-05, + "loss": 1.3573, + "step": 17982 + }, + { + "epoch": 0.22, + "grad_norm": 14.75322973283703, + "learning_rate": 1.9681542220360635e-05, + "loss": 1.6681, + "step": 17985 + }, + { + "epoch": 0.22, + "grad_norm": 4.980207272417726, + "learning_rate": 1.9681395952209127e-05, + "loss": 1.8074, + "step": 17988 + }, + { + "epoch": 0.22, + "grad_norm": 6.5417617527789185, + "learning_rate": 1.96812496510185e-05, + "loss": 1.5415, + "step": 17991 + }, + { + "epoch": 0.22, + "grad_norm": 28.56175872271812, + "learning_rate": 1.9681103316789254e-05, + "loss": 1.7233, + "step": 17994 + }, + { + "epoch": 0.22, + "grad_norm": 16.450081369745604, + "learning_rate": 1.968095694952189e-05, + "loss": 1.7126, + "step": 17997 + }, + { + "epoch": 0.22, + "grad_norm": 32.16325850209539, + "learning_rate": 1.968081054921691e-05, + "loss": 1.6475, + "step": 18000 + }, + { + "epoch": 0.22, + "grad_norm": 15.503125498073612, + "learning_rate": 1.9680664115874806e-05, + "loss": 1.529, + "step": 18003 + }, + { + "epoch": 0.22, + "grad_norm": 12.57574191498673, + "learning_rate": 1.9680517649496084e-05, + "loss": 1.6512, + "step": 18006 + }, + { + "epoch": 0.22, + "grad_norm": 7.434353095095914, + "learning_rate": 1.9680371150081244e-05, + "loss": 1.5809, + "step": 18009 + }, + { + "epoch": 0.22, + "grad_norm": 14.173137217041768, + "learning_rate": 1.968022461763078e-05, + "loss": 1.5276, + "step": 18012 + }, + { + "epoch": 0.22, + "grad_norm": 25.77183740175106, + "learning_rate": 1.9680078052145203e-05, + "loss": 1.4434, + "step": 18015 + }, + { + "epoch": 0.22, + "grad_norm": 25.69827341873904, + "learning_rate": 1.9679931453625e-05, + "loss": 1.1689, + "step": 18018 + }, + { + "epoch": 0.22, + "grad_norm": 11.52508483983763, + "learning_rate": 1.967978482207068e-05, + "loss": 1.3108, + "step": 18021 + }, + { + "epoch": 0.22, + "grad_norm": 7.493397936976226, + "learning_rate": 1.967963815748274e-05, + "loss": 1.3577, + "step": 18024 + }, + { + "epoch": 0.22, + "grad_norm": 12.168476147931921, + "learning_rate": 1.9679491459861684e-05, + "loss": 1.7405, + "step": 18027 + }, + { + "epoch": 0.22, + "grad_norm": 11.092448872963791, + "learning_rate": 1.9679344729208008e-05, + "loss": 1.7291, + "step": 18030 + }, + { + "epoch": 0.22, + "grad_norm": 49.815202400534204, + "learning_rate": 1.9679197965522216e-05, + "loss": 1.5499, + "step": 18033 + }, + { + "epoch": 0.22, + "grad_norm": 10.453456061746294, + "learning_rate": 1.967905116880481e-05, + "loss": 1.6786, + "step": 18036 + }, + { + "epoch": 0.22, + "grad_norm": 29.045845384591406, + "learning_rate": 1.9678904339056284e-05, + "loss": 1.6108, + "step": 18039 + }, + { + "epoch": 0.22, + "grad_norm": 18.926647694180637, + "learning_rate": 1.9678757476277147e-05, + "loss": 1.3662, + "step": 18042 + }, + { + "epoch": 0.22, + "grad_norm": 14.649489912584874, + "learning_rate": 1.9678610580467893e-05, + "loss": 1.5564, + "step": 18045 + }, + { + "epoch": 0.22, + "grad_norm": 12.50441200034373, + "learning_rate": 1.9678463651629033e-05, + "loss": 1.945, + "step": 18048 + }, + { + "epoch": 0.22, + "grad_norm": 14.61055598760426, + "learning_rate": 1.9678316689761058e-05, + "loss": 1.5734, + "step": 18051 + }, + { + "epoch": 0.22, + "grad_norm": 20.40032531294482, + "learning_rate": 1.9678169694864477e-05, + "loss": 1.7069, + "step": 18054 + }, + { + "epoch": 0.22, + "grad_norm": 23.391138586559645, + "learning_rate": 1.9678022666939787e-05, + "loss": 1.5487, + "step": 18057 + }, + { + "epoch": 0.22, + "grad_norm": 8.613298681910685, + "learning_rate": 1.9677875605987494e-05, + "loss": 1.6481, + "step": 18060 + }, + { + "epoch": 0.22, + "grad_norm": 33.819870462910345, + "learning_rate": 1.9677728512008096e-05, + "loss": 1.1154, + "step": 18063 + }, + { + "epoch": 0.22, + "grad_norm": 13.873399317426115, + "learning_rate": 1.9677581385002096e-05, + "loss": 1.3829, + "step": 18066 + }, + { + "epoch": 0.22, + "grad_norm": 48.918270262923485, + "learning_rate": 1.9677434224969998e-05, + "loss": 1.7732, + "step": 18069 + }, + { + "epoch": 0.22, + "grad_norm": 15.177588424068993, + "learning_rate": 1.9677287031912305e-05, + "loss": 1.4635, + "step": 18072 + }, + { + "epoch": 0.22, + "grad_norm": 11.478505272860897, + "learning_rate": 1.9677139805829512e-05, + "loss": 1.6949, + "step": 18075 + }, + { + "epoch": 0.22, + "grad_norm": 10.433060140190266, + "learning_rate": 1.9676992546722128e-05, + "loss": 1.3027, + "step": 18078 + }, + { + "epoch": 0.22, + "grad_norm": 36.69206269988225, + "learning_rate": 1.9676845254590654e-05, + "loss": 1.5629, + "step": 18081 + }, + { + "epoch": 0.22, + "grad_norm": 8.288850201052185, + "learning_rate": 1.9676697929435595e-05, + "loss": 1.4298, + "step": 18084 + }, + { + "epoch": 0.22, + "grad_norm": 2.9588053460858834, + "learning_rate": 1.9676550571257448e-05, + "loss": 1.6177, + "step": 18087 + }, + { + "epoch": 0.22, + "grad_norm": 18.0876727270301, + "learning_rate": 1.9676403180056726e-05, + "loss": 1.3736, + "step": 18090 + }, + { + "epoch": 0.22, + "grad_norm": 13.199199428749738, + "learning_rate": 1.9676255755833916e-05, + "loss": 1.4588, + "step": 18093 + }, + { + "epoch": 0.22, + "grad_norm": 10.779799257002923, + "learning_rate": 1.9676108298589537e-05, + "loss": 1.6946, + "step": 18096 + }, + { + "epoch": 0.22, + "grad_norm": 17.664733190844878, + "learning_rate": 1.9675960808324084e-05, + "loss": 1.517, + "step": 18099 + }, + { + "epoch": 0.22, + "grad_norm": 12.510382388765702, + "learning_rate": 1.9675813285038063e-05, + "loss": 1.9336, + "step": 18102 + }, + { + "epoch": 0.22, + "grad_norm": 29.002076693809617, + "learning_rate": 1.9675665728731975e-05, + "loss": 1.595, + "step": 18105 + }, + { + "epoch": 0.22, + "grad_norm": 9.693160638154096, + "learning_rate": 1.9675518139406327e-05, + "loss": 2.0583, + "step": 18108 + }, + { + "epoch": 0.22, + "grad_norm": 8.116707267846015, + "learning_rate": 1.967537051706162e-05, + "loss": 1.4729, + "step": 18111 + }, + { + "epoch": 0.22, + "grad_norm": 7.854739748959332, + "learning_rate": 1.9675222861698358e-05, + "loss": 1.6178, + "step": 18114 + }, + { + "epoch": 0.22, + "grad_norm": 28.80625999960346, + "learning_rate": 1.967507517331704e-05, + "loss": 1.3819, + "step": 18117 + }, + { + "epoch": 0.22, + "grad_norm": 10.78787302715287, + "learning_rate": 1.9674927451918184e-05, + "loss": 1.6402, + "step": 18120 + }, + { + "epoch": 0.22, + "grad_norm": 4.385358737356516, + "learning_rate": 1.9674779697502283e-05, + "loss": 1.8468, + "step": 18123 + }, + { + "epoch": 0.22, + "grad_norm": 7.403496770925525, + "learning_rate": 1.967463191006984e-05, + "loss": 1.6176, + "step": 18126 + }, + { + "epoch": 0.22, + "grad_norm": 14.618447618790075, + "learning_rate": 1.9674484089621367e-05, + "loss": 1.4967, + "step": 18129 + }, + { + "epoch": 0.22, + "grad_norm": 24.57158383289091, + "learning_rate": 1.9674336236157364e-05, + "loss": 1.6427, + "step": 18132 + }, + { + "epoch": 0.22, + "grad_norm": 5.325522636920007, + "learning_rate": 1.9674188349678337e-05, + "loss": 1.8197, + "step": 18135 + }, + { + "epoch": 0.22, + "grad_norm": 21.133990086196867, + "learning_rate": 1.9674040430184786e-05, + "loss": 1.6098, + "step": 18138 + }, + { + "epoch": 0.22, + "grad_norm": 14.562016438646326, + "learning_rate": 1.9673892477677224e-05, + "loss": 1.3802, + "step": 18141 + }, + { + "epoch": 0.22, + "grad_norm": 31.064813930510375, + "learning_rate": 1.9673744492156146e-05, + "loss": 1.6089, + "step": 18144 + }, + { + "epoch": 0.22, + "grad_norm": 9.803881051314482, + "learning_rate": 1.9673596473622067e-05, + "loss": 1.3581, + "step": 18147 + }, + { + "epoch": 0.22, + "grad_norm": 23.344605500306628, + "learning_rate": 1.9673448422075486e-05, + "loss": 1.7594, + "step": 18150 + }, + { + "epoch": 0.22, + "grad_norm": 12.476767582319537, + "learning_rate": 1.967330033751691e-05, + "loss": 1.4661, + "step": 18153 + }, + { + "epoch": 0.22, + "grad_norm": 80.58914795218239, + "learning_rate": 1.9673152219946845e-05, + "loss": 1.503, + "step": 18156 + }, + { + "epoch": 0.22, + "grad_norm": 7.694345042264159, + "learning_rate": 1.9673004069365795e-05, + "loss": 1.3827, + "step": 18159 + }, + { + "epoch": 0.22, + "grad_norm": 5.449118163757139, + "learning_rate": 1.967285588577427e-05, + "loss": 1.6629, + "step": 18162 + }, + { + "epoch": 0.22, + "grad_norm": 11.14862186188457, + "learning_rate": 1.9672707669172767e-05, + "loss": 1.6692, + "step": 18165 + }, + { + "epoch": 0.22, + "grad_norm": 7.139829689490503, + "learning_rate": 1.9672559419561795e-05, + "loss": 1.5221, + "step": 18168 + }, + { + "epoch": 0.22, + "grad_norm": 13.856634292446842, + "learning_rate": 1.9672411136941868e-05, + "loss": 1.9331, + "step": 18171 + }, + { + "epoch": 0.22, + "grad_norm": 21.637886556411807, + "learning_rate": 1.9672262821313484e-05, + "loss": 1.6261, + "step": 18174 + }, + { + "epoch": 0.22, + "grad_norm": 53.73262120008523, + "learning_rate": 1.9672114472677147e-05, + "loss": 1.3224, + "step": 18177 + }, + { + "epoch": 0.22, + "grad_norm": 4.328682786622219, + "learning_rate": 1.9671966091033373e-05, + "loss": 1.3659, + "step": 18180 + }, + { + "epoch": 0.22, + "grad_norm": 20.685759291497853, + "learning_rate": 1.967181767638266e-05, + "loss": 1.8436, + "step": 18183 + }, + { + "epoch": 0.22, + "grad_norm": 15.030500998025865, + "learning_rate": 1.9671669228725515e-05, + "loss": 1.7568, + "step": 18186 + }, + { + "epoch": 0.22, + "grad_norm": 99.5108852484895, + "learning_rate": 1.967152074806245e-05, + "loss": 1.5915, + "step": 18189 + }, + { + "epoch": 0.22, + "grad_norm": 16.361406983289807, + "learning_rate": 1.9671372234393965e-05, + "loss": 1.7079, + "step": 18192 + }, + { + "epoch": 0.22, + "grad_norm": 15.92455943312671, + "learning_rate": 1.9671223687720573e-05, + "loss": 1.3261, + "step": 18195 + }, + { + "epoch": 0.22, + "grad_norm": 11.653998015185996, + "learning_rate": 1.9671075108042776e-05, + "loss": 1.4226, + "step": 18198 + }, + { + "epoch": 0.22, + "grad_norm": 33.88316466613483, + "learning_rate": 1.9670926495361086e-05, + "loss": 1.6657, + "step": 18201 + }, + { + "epoch": 0.22, + "grad_norm": 6.928084059998915, + "learning_rate": 1.9670777849676004e-05, + "loss": 1.9553, + "step": 18204 + }, + { + "epoch": 0.22, + "grad_norm": 45.6457104440725, + "learning_rate": 1.967062917098804e-05, + "loss": 1.3951, + "step": 18207 + }, + { + "epoch": 0.22, + "grad_norm": 25.59420417809104, + "learning_rate": 1.9670480459297706e-05, + "loss": 1.3655, + "step": 18210 + }, + { + "epoch": 0.22, + "grad_norm": 141.00489600855497, + "learning_rate": 1.96703317146055e-05, + "loss": 1.3709, + "step": 18213 + }, + { + "epoch": 0.22, + "grad_norm": 61.58868417585866, + "learning_rate": 1.9670182936911943e-05, + "loss": 1.7151, + "step": 18216 + }, + { + "epoch": 0.22, + "grad_norm": 22.309464466520453, + "learning_rate": 1.967003412621753e-05, + "loss": 1.2137, + "step": 18219 + }, + { + "epoch": 0.22, + "grad_norm": 12.493624321704933, + "learning_rate": 1.9669885282522773e-05, + "loss": 1.6506, + "step": 18222 + }, + { + "epoch": 0.22, + "grad_norm": 8.412107171428483, + "learning_rate": 1.9669736405828184e-05, + "loss": 1.7975, + "step": 18225 + }, + { + "epoch": 0.22, + "grad_norm": 58.44571899014505, + "learning_rate": 1.9669587496134265e-05, + "loss": 1.8034, + "step": 18228 + }, + { + "epoch": 0.22, + "grad_norm": 17.11096025439813, + "learning_rate": 1.9669438553441528e-05, + "loss": 1.6691, + "step": 18231 + }, + { + "epoch": 0.22, + "grad_norm": 14.101920497216437, + "learning_rate": 1.9669289577750475e-05, + "loss": 1.6357, + "step": 18234 + }, + { + "epoch": 0.22, + "grad_norm": 5.593133493201107, + "learning_rate": 1.9669140569061627e-05, + "loss": 1.648, + "step": 18237 + }, + { + "epoch": 0.22, + "grad_norm": 17.53350700597134, + "learning_rate": 1.966899152737548e-05, + "loss": 1.7306, + "step": 18240 + }, + { + "epoch": 0.22, + "grad_norm": 92.71906344592855, + "learning_rate": 1.9668842452692553e-05, + "loss": 1.3819, + "step": 18243 + }, + { + "epoch": 0.22, + "grad_norm": 13.214222960456794, + "learning_rate": 1.9668693345013343e-05, + "loss": 1.5274, + "step": 18246 + }, + { + "epoch": 0.22, + "grad_norm": 49.416547640631464, + "learning_rate": 1.9668544204338367e-05, + "loss": 1.4477, + "step": 18249 + }, + { + "epoch": 0.22, + "grad_norm": 17.095023752571187, + "learning_rate": 1.9668395030668135e-05, + "loss": 1.5707, + "step": 18252 + }, + { + "epoch": 0.22, + "grad_norm": 6.198188186386666, + "learning_rate": 1.966824582400315e-05, + "loss": 1.3675, + "step": 18255 + }, + { + "epoch": 0.22, + "grad_norm": 80.56041335395838, + "learning_rate": 1.966809658434393e-05, + "loss": 1.6386, + "step": 18258 + }, + { + "epoch": 0.22, + "grad_norm": 3.694844040662048, + "learning_rate": 1.9667947311690974e-05, + "loss": 1.4702, + "step": 18261 + }, + { + "epoch": 0.22, + "grad_norm": 12.597021991130495, + "learning_rate": 1.9667798006044796e-05, + "loss": 1.4492, + "step": 18264 + }, + { + "epoch": 0.22, + "grad_norm": 11.734713399750278, + "learning_rate": 1.966764866740591e-05, + "loss": 2.0244, + "step": 18267 + }, + { + "epoch": 0.22, + "grad_norm": 12.674616590734557, + "learning_rate": 1.9667499295774816e-05, + "loss": 1.438, + "step": 18270 + }, + { + "epoch": 0.22, + "grad_norm": 42.4677494467334, + "learning_rate": 1.9667349891152032e-05, + "loss": 1.6366, + "step": 18273 + }, + { + "epoch": 0.22, + "grad_norm": 7.243396160950696, + "learning_rate": 1.9667200453538066e-05, + "loss": 1.5396, + "step": 18276 + }, + { + "epoch": 0.22, + "grad_norm": 16.04834487201186, + "learning_rate": 1.966705098293342e-05, + "loss": 1.4666, + "step": 18279 + }, + { + "epoch": 0.22, + "grad_norm": 9.89770270712386, + "learning_rate": 1.966690147933862e-05, + "loss": 1.6899, + "step": 18282 + }, + { + "epoch": 0.22, + "grad_norm": 8.752512505249323, + "learning_rate": 1.9666751942754164e-05, + "loss": 1.8778, + "step": 18285 + }, + { + "epoch": 0.22, + "grad_norm": 16.904315816231048, + "learning_rate": 1.9666602373180566e-05, + "loss": 1.6925, + "step": 18288 + }, + { + "epoch": 0.22, + "grad_norm": 44.79701761666883, + "learning_rate": 1.9666452770618335e-05, + "loss": 1.5727, + "step": 18291 + }, + { + "epoch": 0.22, + "grad_norm": 25.477689411548305, + "learning_rate": 1.9666303135067982e-05, + "loss": 1.6649, + "step": 18294 + }, + { + "epoch": 0.22, + "grad_norm": 41.91839068135942, + "learning_rate": 1.966615346653002e-05, + "loss": 1.3751, + "step": 18297 + }, + { + "epoch": 0.22, + "grad_norm": 9.313593581131535, + "learning_rate": 1.9666003765004955e-05, + "loss": 1.803, + "step": 18300 + }, + { + "epoch": 0.22, + "grad_norm": 18.639832780323907, + "learning_rate": 1.9665854030493303e-05, + "loss": 1.4067, + "step": 18303 + }, + { + "epoch": 0.22, + "grad_norm": 6.440410221154963, + "learning_rate": 1.9665704262995576e-05, + "loss": 1.6366, + "step": 18306 + }, + { + "epoch": 0.22, + "grad_norm": 8.058629441482779, + "learning_rate": 1.9665554462512276e-05, + "loss": 1.8966, + "step": 18309 + }, + { + "epoch": 0.22, + "grad_norm": 25.95699860024368, + "learning_rate": 1.9665404629043923e-05, + "loss": 1.5954, + "step": 18312 + }, + { + "epoch": 0.22, + "grad_norm": 7.37259267318848, + "learning_rate": 1.9665254762591024e-05, + "loss": 1.5084, + "step": 18315 + }, + { + "epoch": 0.22, + "grad_norm": 7.5940042084789265, + "learning_rate": 1.9665104863154095e-05, + "loss": 1.5107, + "step": 18318 + }, + { + "epoch": 0.22, + "grad_norm": 22.012999603318427, + "learning_rate": 1.9664954930733643e-05, + "loss": 1.2704, + "step": 18321 + }, + { + "epoch": 0.22, + "grad_norm": 18.35166334955254, + "learning_rate": 1.966480496533018e-05, + "loss": 1.4497, + "step": 18324 + }, + { + "epoch": 0.22, + "grad_norm": 2.9844867024699564, + "learning_rate": 1.966465496694422e-05, + "loss": 2.0909, + "step": 18327 + }, + { + "epoch": 0.22, + "grad_norm": 7.970248116216943, + "learning_rate": 1.966450493557627e-05, + "loss": 1.5209, + "step": 18330 + }, + { + "epoch": 0.22, + "grad_norm": 119.76543726793287, + "learning_rate": 1.9664354871226853e-05, + "loss": 1.7, + "step": 18333 + }, + { + "epoch": 0.22, + "grad_norm": 6.78309156427817, + "learning_rate": 1.9664204773896468e-05, + "loss": 1.7374, + "step": 18336 + }, + { + "epoch": 0.22, + "grad_norm": 5.719984690721196, + "learning_rate": 1.9664054643585636e-05, + "loss": 1.541, + "step": 18339 + }, + { + "epoch": 0.22, + "grad_norm": 13.050626671455012, + "learning_rate": 1.9663904480294865e-05, + "loss": 1.7209, + "step": 18342 + }, + { + "epoch": 0.22, + "grad_norm": 13.572775664944892, + "learning_rate": 1.966375428402467e-05, + "loss": 1.8254, + "step": 18345 + }, + { + "epoch": 0.22, + "grad_norm": 19.713709783708488, + "learning_rate": 1.966360405477556e-05, + "loss": 1.7276, + "step": 18348 + }, + { + "epoch": 0.22, + "grad_norm": 16.257304259221787, + "learning_rate": 1.9663453792548055e-05, + "loss": 1.7674, + "step": 18351 + }, + { + "epoch": 0.22, + "grad_norm": 8.271452232693237, + "learning_rate": 1.966330349734266e-05, + "loss": 1.815, + "step": 18354 + }, + { + "epoch": 0.22, + "grad_norm": 5.780798544783124, + "learning_rate": 1.966315316915989e-05, + "loss": 1.482, + "step": 18357 + }, + { + "epoch": 0.22, + "grad_norm": 24.931592253009445, + "learning_rate": 1.966300280800026e-05, + "loss": 1.6987, + "step": 18360 + }, + { + "epoch": 0.22, + "grad_norm": 6.141954457461107, + "learning_rate": 1.9662852413864286e-05, + "loss": 1.341, + "step": 18363 + }, + { + "epoch": 0.22, + "grad_norm": 24.971670838408787, + "learning_rate": 1.9662701986752472e-05, + "loss": 1.5355, + "step": 18366 + }, + { + "epoch": 0.22, + "grad_norm": 13.718188830853128, + "learning_rate": 1.9662551526665337e-05, + "loss": 1.5096, + "step": 18369 + }, + { + "epoch": 0.22, + "grad_norm": 12.803256849706097, + "learning_rate": 1.9662401033603392e-05, + "loss": 1.5478, + "step": 18372 + }, + { + "epoch": 0.22, + "grad_norm": 10.337305396148908, + "learning_rate": 1.9662250507567156e-05, + "loss": 1.9813, + "step": 18375 + }, + { + "epoch": 0.22, + "grad_norm": 34.00129049573026, + "learning_rate": 1.966209994855714e-05, + "loss": 1.4083, + "step": 18378 + }, + { + "epoch": 0.22, + "grad_norm": 32.219812397776145, + "learning_rate": 1.9661949356573857e-05, + "loss": 1.5651, + "step": 18381 + }, + { + "epoch": 0.22, + "grad_norm": 2.7776249795358003, + "learning_rate": 1.966179873161782e-05, + "loss": 1.6165, + "step": 18384 + }, + { + "epoch": 0.22, + "grad_norm": 1.746251269851169, + "learning_rate": 1.9661648073689542e-05, + "loss": 1.648, + "step": 18387 + }, + { + "epoch": 0.22, + "grad_norm": 27.54187448503192, + "learning_rate": 1.9661497382789544e-05, + "loss": 1.6945, + "step": 18390 + }, + { + "epoch": 0.22, + "grad_norm": 14.129419274813706, + "learning_rate": 1.966134665891833e-05, + "loss": 1.6367, + "step": 18393 + }, + { + "epoch": 0.22, + "grad_norm": 5.334328821735775, + "learning_rate": 1.9661195902076425e-05, + "loss": 1.2773, + "step": 18396 + }, + { + "epoch": 0.22, + "grad_norm": 9.79686299684106, + "learning_rate": 1.9661045112264334e-05, + "loss": 1.2096, + "step": 18399 + }, + { + "epoch": 0.22, + "grad_norm": 13.204941856946952, + "learning_rate": 1.966089428948258e-05, + "loss": 1.5545, + "step": 18402 + }, + { + "epoch": 0.22, + "grad_norm": 12.057479648417326, + "learning_rate": 1.966074343373167e-05, + "loss": 1.5141, + "step": 18405 + }, + { + "epoch": 0.22, + "grad_norm": 32.60070192278296, + "learning_rate": 1.9660592545012126e-05, + "loss": 1.3497, + "step": 18408 + }, + { + "epoch": 0.22, + "grad_norm": 12.875550905284992, + "learning_rate": 1.966044162332446e-05, + "loss": 1.3968, + "step": 18411 + }, + { + "epoch": 0.22, + "grad_norm": 33.35241468662403, + "learning_rate": 1.9660290668669182e-05, + "loss": 1.8338, + "step": 18414 + }, + { + "epoch": 0.22, + "grad_norm": 17.301102459936086, + "learning_rate": 1.9660139681046814e-05, + "loss": 1.7531, + "step": 18417 + }, + { + "epoch": 0.22, + "grad_norm": 20.467171859542137, + "learning_rate": 1.9659988660457866e-05, + "loss": 1.4596, + "step": 18420 + }, + { + "epoch": 0.22, + "grad_norm": 8.009322527731555, + "learning_rate": 1.965983760690286e-05, + "loss": 1.6918, + "step": 18423 + }, + { + "epoch": 0.22, + "grad_norm": 9.858036681384817, + "learning_rate": 1.9659686520382307e-05, + "loss": 1.7201, + "step": 18426 + }, + { + "epoch": 0.22, + "grad_norm": 18.137422183218263, + "learning_rate": 1.9659535400896723e-05, + "loss": 1.3891, + "step": 18429 + }, + { + "epoch": 0.22, + "grad_norm": 13.463032590238825, + "learning_rate": 1.965938424844662e-05, + "loss": 1.4017, + "step": 18432 + }, + { + "epoch": 0.22, + "grad_norm": 73.34600224481248, + "learning_rate": 1.9659233063032523e-05, + "loss": 1.3878, + "step": 18435 + }, + { + "epoch": 0.22, + "grad_norm": 6.628262799905651, + "learning_rate": 1.965908184465494e-05, + "loss": 1.5568, + "step": 18438 + }, + { + "epoch": 0.22, + "grad_norm": 3.7835888933512276, + "learning_rate": 1.9658930593314393e-05, + "loss": 1.691, + "step": 18441 + }, + { + "epoch": 0.22, + "grad_norm": 15.46739679408347, + "learning_rate": 1.9658779309011393e-05, + "loss": 1.357, + "step": 18444 + }, + { + "epoch": 0.22, + "grad_norm": 22.964224241112323, + "learning_rate": 1.965862799174646e-05, + "loss": 1.3876, + "step": 18447 + }, + { + "epoch": 0.22, + "grad_norm": 37.22995610718081, + "learning_rate": 1.9658476641520107e-05, + "loss": 1.8575, + "step": 18450 + }, + { + "epoch": 0.22, + "grad_norm": 29.412588858573983, + "learning_rate": 1.9658325258332848e-05, + "loss": 1.6884, + "step": 18453 + }, + { + "epoch": 0.22, + "grad_norm": 5.305329606145756, + "learning_rate": 1.9658173842185212e-05, + "loss": 1.7954, + "step": 18456 + }, + { + "epoch": 0.22, + "grad_norm": 18.31685871988198, + "learning_rate": 1.96580223930777e-05, + "loss": 1.1918, + "step": 18459 + }, + { + "epoch": 0.22, + "grad_norm": 7.456863419827813, + "learning_rate": 1.9657870911010845e-05, + "loss": 1.779, + "step": 18462 + }, + { + "epoch": 0.22, + "grad_norm": 12.328455738069637, + "learning_rate": 1.965771939598515e-05, + "loss": 1.5753, + "step": 18465 + }, + { + "epoch": 0.22, + "grad_norm": 17.840593160151247, + "learning_rate": 1.9657567848001136e-05, + "loss": 1.6308, + "step": 18468 + }, + { + "epoch": 0.22, + "grad_norm": 49.91599060021095, + "learning_rate": 1.965741626705932e-05, + "loss": 1.6661, + "step": 18471 + }, + { + "epoch": 0.22, + "grad_norm": 15.977644922920886, + "learning_rate": 1.9657264653160225e-05, + "loss": 1.7225, + "step": 18474 + }, + { + "epoch": 0.22, + "grad_norm": 6.932156673925458, + "learning_rate": 1.9657113006304365e-05, + "loss": 1.3489, + "step": 18477 + }, + { + "epoch": 0.22, + "grad_norm": 11.378094888396681, + "learning_rate": 1.9656961326492253e-05, + "loss": 1.5453, + "step": 18480 + }, + { + "epoch": 0.22, + "grad_norm": 6.134815465035961, + "learning_rate": 1.9656809613724416e-05, + "loss": 1.4996, + "step": 18483 + }, + { + "epoch": 0.22, + "grad_norm": 20.752974923235136, + "learning_rate": 1.9656657868001363e-05, + "loss": 1.7975, + "step": 18486 + }, + { + "epoch": 0.22, + "grad_norm": 19.698837712358078, + "learning_rate": 1.9656506089323612e-05, + "loss": 1.7358, + "step": 18489 + }, + { + "epoch": 0.22, + "grad_norm": 11.512110920537037, + "learning_rate": 1.965635427769169e-05, + "loss": 1.3428, + "step": 18492 + }, + { + "epoch": 0.22, + "grad_norm": 54.27220232561598, + "learning_rate": 1.9656202433106105e-05, + "loss": 1.6964, + "step": 18495 + }, + { + "epoch": 0.22, + "grad_norm": 14.325950798425119, + "learning_rate": 1.9656050555567378e-05, + "loss": 1.5087, + "step": 18498 + }, + { + "epoch": 0.22, + "grad_norm": 30.952821643006267, + "learning_rate": 1.965589864507603e-05, + "loss": 1.5023, + "step": 18501 + }, + { + "epoch": 0.22, + "grad_norm": 8.422135238420948, + "learning_rate": 1.965574670163258e-05, + "loss": 1.6594, + "step": 18504 + }, + { + "epoch": 0.22, + "grad_norm": 44.18932985081952, + "learning_rate": 1.9655594725237543e-05, + "loss": 1.6472, + "step": 18507 + }, + { + "epoch": 0.22, + "grad_norm": 12.811210695028468, + "learning_rate": 1.965544271589144e-05, + "loss": 1.7668, + "step": 18510 + }, + { + "epoch": 0.22, + "grad_norm": 7.475650486816481, + "learning_rate": 1.9655290673594787e-05, + "loss": 1.4749, + "step": 18513 + }, + { + "epoch": 0.22, + "grad_norm": 16.42923671244583, + "learning_rate": 1.9655138598348107e-05, + "loss": 1.2827, + "step": 18516 + }, + { + "epoch": 0.22, + "grad_norm": 3.0147511701357783, + "learning_rate": 1.9654986490151914e-05, + "loss": 1.5985, + "step": 18519 + }, + { + "epoch": 0.22, + "grad_norm": 20.061337786394994, + "learning_rate": 1.9654834349006732e-05, + "loss": 1.3925, + "step": 18522 + }, + { + "epoch": 0.22, + "grad_norm": 9.982316688210904, + "learning_rate": 1.9654682174913077e-05, + "loss": 1.5737, + "step": 18525 + }, + { + "epoch": 0.22, + "grad_norm": 13.12199555427711, + "learning_rate": 1.9654529967871473e-05, + "loss": 1.3818, + "step": 18528 + }, + { + "epoch": 0.22, + "grad_norm": 35.75378759197757, + "learning_rate": 1.965437772788243e-05, + "loss": 1.6213, + "step": 18531 + }, + { + "epoch": 0.22, + "grad_norm": 6.32598187987931, + "learning_rate": 1.9654225454946475e-05, + "loss": 1.5693, + "step": 18534 + }, + { + "epoch": 0.22, + "grad_norm": 5.634421747920579, + "learning_rate": 1.965407314906413e-05, + "loss": 1.8005, + "step": 18537 + }, + { + "epoch": 0.22, + "grad_norm": 48.49167902773014, + "learning_rate": 1.9653920810235904e-05, + "loss": 1.4622, + "step": 18540 + }, + { + "epoch": 0.22, + "grad_norm": 18.847620176417948, + "learning_rate": 1.965376843846233e-05, + "loss": 1.5536, + "step": 18543 + }, + { + "epoch": 0.22, + "grad_norm": 22.615587514605092, + "learning_rate": 1.9653616033743917e-05, + "loss": 1.6778, + "step": 18546 + }, + { + "epoch": 0.22, + "grad_norm": 99.4792897132518, + "learning_rate": 1.965346359608119e-05, + "loss": 1.8244, + "step": 18549 + }, + { + "epoch": 0.22, + "grad_norm": 11.727856543754285, + "learning_rate": 1.965331112547467e-05, + "loss": 1.4709, + "step": 18552 + }, + { + "epoch": 0.22, + "grad_norm": 22.252157237108282, + "learning_rate": 1.9653158621924877e-05, + "loss": 1.5197, + "step": 18555 + }, + { + "epoch": 0.22, + "grad_norm": 5.452961300481063, + "learning_rate": 1.9653006085432332e-05, + "loss": 1.2293, + "step": 18558 + }, + { + "epoch": 0.22, + "grad_norm": 9.526863562043845, + "learning_rate": 1.965285351599755e-05, + "loss": 1.3397, + "step": 18561 + }, + { + "epoch": 0.22, + "grad_norm": 3.9965597083079416, + "learning_rate": 1.965270091362106e-05, + "loss": 1.6283, + "step": 18564 + }, + { + "epoch": 0.22, + "grad_norm": 31.65549078387116, + "learning_rate": 1.9652548278303374e-05, + "loss": 1.4959, + "step": 18567 + }, + { + "epoch": 0.22, + "grad_norm": 2.912449465700158, + "learning_rate": 1.9652395610045022e-05, + "loss": 1.6285, + "step": 18570 + }, + { + "epoch": 0.22, + "grad_norm": 12.196816933668202, + "learning_rate": 1.965224290884652e-05, + "loss": 1.5581, + "step": 18573 + }, + { + "epoch": 0.22, + "grad_norm": 82.94704970691501, + "learning_rate": 1.965209017470839e-05, + "loss": 1.6536, + "step": 18576 + }, + { + "epoch": 0.22, + "grad_norm": 23.26641607032379, + "learning_rate": 1.9651937407631152e-05, + "loss": 1.5756, + "step": 18579 + }, + { + "epoch": 0.22, + "grad_norm": 7.295711810631206, + "learning_rate": 1.9651784607615324e-05, + "loss": 1.6494, + "step": 18582 + }, + { + "epoch": 0.22, + "grad_norm": 14.177835856607464, + "learning_rate": 1.9651631774661437e-05, + "loss": 1.3162, + "step": 18585 + }, + { + "epoch": 0.22, + "grad_norm": 9.509917780141677, + "learning_rate": 1.9651478908770007e-05, + "loss": 1.5932, + "step": 18588 + }, + { + "epoch": 0.22, + "grad_norm": 41.52416147828238, + "learning_rate": 1.9651326009941555e-05, + "loss": 1.5474, + "step": 18591 + }, + { + "epoch": 0.22, + "grad_norm": 6.472355472264374, + "learning_rate": 1.9651173078176603e-05, + "loss": 1.403, + "step": 18594 + }, + { + "epoch": 0.22, + "grad_norm": 21.8854068448167, + "learning_rate": 1.9651020113475673e-05, + "loss": 1.6545, + "step": 18597 + }, + { + "epoch": 0.22, + "grad_norm": 33.31890422415074, + "learning_rate": 1.9650867115839292e-05, + "loss": 1.3813, + "step": 18600 + }, + { + "epoch": 0.22, + "grad_norm": 27.903680735072925, + "learning_rate": 1.965071408526797e-05, + "loss": 1.8538, + "step": 18603 + }, + { + "epoch": 0.22, + "grad_norm": 37.03028794320172, + "learning_rate": 1.9650561021762246e-05, + "loss": 1.4845, + "step": 18606 + }, + { + "epoch": 0.22, + "grad_norm": 52.918983862610744, + "learning_rate": 1.9650407925322627e-05, + "loss": 1.7068, + "step": 18609 + }, + { + "epoch": 0.22, + "grad_norm": 9.804363651257379, + "learning_rate": 1.9650254795949645e-05, + "loss": 1.486, + "step": 18612 + }, + { + "epoch": 0.22, + "grad_norm": 12.574207413712346, + "learning_rate": 1.9650101633643816e-05, + "loss": 1.2868, + "step": 18615 + }, + { + "epoch": 0.22, + "grad_norm": 13.062188523597563, + "learning_rate": 1.9649948438405667e-05, + "loss": 1.2708, + "step": 18618 + }, + { + "epoch": 0.22, + "grad_norm": 21.163576393763535, + "learning_rate": 1.964979521023572e-05, + "loss": 1.7346, + "step": 18621 + }, + { + "epoch": 0.22, + "grad_norm": 116.03857696191488, + "learning_rate": 1.96496419491345e-05, + "loss": 1.4331, + "step": 18624 + }, + { + "epoch": 0.22, + "grad_norm": 3.7947367398325142, + "learning_rate": 1.9649488655102528e-05, + "loss": 1.6089, + "step": 18627 + }, + { + "epoch": 0.22, + "grad_norm": 44.88240483608408, + "learning_rate": 1.9649335328140327e-05, + "loss": 1.7917, + "step": 18630 + }, + { + "epoch": 0.22, + "grad_norm": 9.119415486350174, + "learning_rate": 1.964918196824842e-05, + "loss": 1.9496, + "step": 18633 + }, + { + "epoch": 0.22, + "grad_norm": 10.687173507645893, + "learning_rate": 1.9649028575427328e-05, + "loss": 1.2963, + "step": 18636 + }, + { + "epoch": 0.22, + "grad_norm": 9.93643801073383, + "learning_rate": 1.964887514967758e-05, + "loss": 1.8147, + "step": 18639 + }, + { + "epoch": 0.22, + "grad_norm": 18.404607264763406, + "learning_rate": 1.964872169099969e-05, + "loss": 1.5895, + "step": 18642 + }, + { + "epoch": 0.22, + "grad_norm": 7.58426452809622, + "learning_rate": 1.9648568199394194e-05, + "loss": 1.4662, + "step": 18645 + }, + { + "epoch": 0.22, + "grad_norm": 18.803431362211036, + "learning_rate": 1.964841467486161e-05, + "loss": 1.893, + "step": 18648 + }, + { + "epoch": 0.22, + "grad_norm": 27.636880152863675, + "learning_rate": 1.9648261117402462e-05, + "loss": 1.3569, + "step": 18651 + }, + { + "epoch": 0.22, + "grad_norm": 18.582705861004648, + "learning_rate": 1.9648107527017274e-05, + "loss": 1.3184, + "step": 18654 + }, + { + "epoch": 0.22, + "grad_norm": 11.667618556358919, + "learning_rate": 1.964795390370657e-05, + "loss": 1.6003, + "step": 18657 + }, + { + "epoch": 0.22, + "grad_norm": 20.947790152659568, + "learning_rate": 1.9647800247470876e-05, + "loss": 1.4729, + "step": 18660 + }, + { + "epoch": 0.22, + "grad_norm": 14.294912306643663, + "learning_rate": 1.9647646558310712e-05, + "loss": 1.6024, + "step": 18663 + }, + { + "epoch": 0.22, + "grad_norm": 12.984247763878784, + "learning_rate": 1.9647492836226605e-05, + "loss": 1.3116, + "step": 18666 + }, + { + "epoch": 0.22, + "grad_norm": 5.583223998452871, + "learning_rate": 1.9647339081219083e-05, + "loss": 1.421, + "step": 18669 + }, + { + "epoch": 0.22, + "grad_norm": 25.558960927351986, + "learning_rate": 1.9647185293288668e-05, + "loss": 1.3752, + "step": 18672 + }, + { + "epoch": 0.22, + "grad_norm": 21.058424020686637, + "learning_rate": 1.9647031472435884e-05, + "loss": 1.5643, + "step": 18675 + }, + { + "epoch": 0.22, + "grad_norm": 57.81807079895048, + "learning_rate": 1.964687761866125e-05, + "loss": 1.4465, + "step": 18678 + }, + { + "epoch": 0.22, + "grad_norm": 29.854833253460118, + "learning_rate": 1.9646723731965306e-05, + "loss": 1.642, + "step": 18681 + }, + { + "epoch": 0.22, + "grad_norm": 11.422094702399392, + "learning_rate": 1.9646569812348565e-05, + "loss": 1.7054, + "step": 18684 + }, + { + "epoch": 0.22, + "grad_norm": 24.717730664974606, + "learning_rate": 1.9646415859811557e-05, + "loss": 1.48, + "step": 18687 + }, + { + "epoch": 0.22, + "grad_norm": 5.645471214157594, + "learning_rate": 1.9646261874354803e-05, + "loss": 1.6592, + "step": 18690 + }, + { + "epoch": 0.22, + "grad_norm": 4.994298881137658, + "learning_rate": 1.9646107855978832e-05, + "loss": 1.3963, + "step": 18693 + }, + { + "epoch": 0.22, + "grad_norm": 6.3049446568287735, + "learning_rate": 1.9645953804684176e-05, + "loss": 1.4695, + "step": 18696 + }, + { + "epoch": 0.22, + "grad_norm": 22.205175688901594, + "learning_rate": 1.964579972047135e-05, + "loss": 1.7168, + "step": 18699 + }, + { + "epoch": 0.22, + "grad_norm": 12.051772093556043, + "learning_rate": 1.964564560334088e-05, + "loss": 1.8223, + "step": 18702 + }, + { + "epoch": 0.22, + "grad_norm": 5.437778263395747, + "learning_rate": 1.9645491453293303e-05, + "loss": 1.5743, + "step": 18705 + }, + { + "epoch": 0.22, + "grad_norm": 6.963635455887741, + "learning_rate": 1.9645337270329133e-05, + "loss": 1.6137, + "step": 18708 + }, + { + "epoch": 0.22, + "grad_norm": 12.334300615735572, + "learning_rate": 1.9645183054448904e-05, + "loss": 1.7593, + "step": 18711 + }, + { + "epoch": 0.23, + "grad_norm": 26.94421036842992, + "learning_rate": 1.9645028805653137e-05, + "loss": 1.7846, + "step": 18714 + }, + { + "epoch": 0.23, + "grad_norm": 96.37232764990573, + "learning_rate": 1.964487452394236e-05, + "loss": 2.0098, + "step": 18717 + }, + { + "epoch": 0.23, + "grad_norm": 5.8732440707347875, + "learning_rate": 1.9644720209317105e-05, + "loss": 1.772, + "step": 18720 + }, + { + "epoch": 0.23, + "grad_norm": 4.946139532195256, + "learning_rate": 1.964456586177789e-05, + "loss": 1.7102, + "step": 18723 + }, + { + "epoch": 0.23, + "grad_norm": 26.20853295200264, + "learning_rate": 1.9644411481325247e-05, + "loss": 1.3549, + "step": 18726 + }, + { + "epoch": 0.23, + "grad_norm": 67.86924139164653, + "learning_rate": 1.9644257067959703e-05, + "loss": 1.5391, + "step": 18729 + }, + { + "epoch": 0.23, + "grad_norm": 10.26530476484859, + "learning_rate": 1.964410262168178e-05, + "loss": 1.4372, + "step": 18732 + }, + { + "epoch": 0.23, + "grad_norm": 12.030624615769298, + "learning_rate": 1.964394814249201e-05, + "loss": 1.3249, + "step": 18735 + }, + { + "epoch": 0.23, + "grad_norm": 2.5743180970735553, + "learning_rate": 1.964379363039092e-05, + "loss": 1.3988, + "step": 18738 + }, + { + "epoch": 0.23, + "grad_norm": 24.531722557691342, + "learning_rate": 1.9643639085379033e-05, + "loss": 1.6173, + "step": 18741 + }, + { + "epoch": 0.23, + "grad_norm": 28.287997202971198, + "learning_rate": 1.9643484507456882e-05, + "loss": 1.4058, + "step": 18744 + }, + { + "epoch": 0.23, + "grad_norm": 41.78585566149958, + "learning_rate": 1.9643329896624993e-05, + "loss": 1.2164, + "step": 18747 + }, + { + "epoch": 0.23, + "grad_norm": 15.116702096050744, + "learning_rate": 1.9643175252883892e-05, + "loss": 1.7873, + "step": 18750 + }, + { + "epoch": 0.23, + "grad_norm": 6.511975935513643, + "learning_rate": 1.9643020576234108e-05, + "loss": 1.6954, + "step": 18753 + }, + { + "epoch": 0.23, + "grad_norm": 9.409446582698099, + "learning_rate": 1.9642865866676165e-05, + "loss": 1.8041, + "step": 18756 + }, + { + "epoch": 0.23, + "grad_norm": 53.07964994824097, + "learning_rate": 1.9642711124210593e-05, + "loss": 1.5628, + "step": 18759 + }, + { + "epoch": 0.23, + "grad_norm": 9.598769949773772, + "learning_rate": 1.9642556348837926e-05, + "loss": 1.5173, + "step": 18762 + }, + { + "epoch": 0.23, + "grad_norm": 20.24021745172799, + "learning_rate": 1.9642401540558683e-05, + "loss": 1.7572, + "step": 18765 + }, + { + "epoch": 0.23, + "grad_norm": 36.45969164978953, + "learning_rate": 1.9642246699373402e-05, + "loss": 1.5857, + "step": 18768 + }, + { + "epoch": 0.23, + "grad_norm": 9.813122633629225, + "learning_rate": 1.96420918252826e-05, + "loss": 1.7629, + "step": 18771 + }, + { + "epoch": 0.23, + "grad_norm": 36.284029090529835, + "learning_rate": 1.9641936918286817e-05, + "loss": 1.5555, + "step": 18774 + }, + { + "epoch": 0.23, + "grad_norm": 31.79964252295265, + "learning_rate": 1.9641781978386575e-05, + "loss": 1.6051, + "step": 18777 + }, + { + "epoch": 0.23, + "grad_norm": 8.8567168469554, + "learning_rate": 1.9641627005582403e-05, + "loss": 1.2783, + "step": 18780 + }, + { + "epoch": 0.23, + "grad_norm": 98.80018800314255, + "learning_rate": 1.964147199987483e-05, + "loss": 1.7306, + "step": 18783 + }, + { + "epoch": 0.23, + "grad_norm": 11.528295986488034, + "learning_rate": 1.9641316961264388e-05, + "loss": 1.3758, + "step": 18786 + }, + { + "epoch": 0.23, + "grad_norm": 33.023491148109294, + "learning_rate": 1.9641161889751605e-05, + "loss": 1.8044, + "step": 18789 + }, + { + "epoch": 0.23, + "grad_norm": 10.296854120633277, + "learning_rate": 1.9641006785337006e-05, + "loss": 1.6329, + "step": 18792 + }, + { + "epoch": 0.23, + "grad_norm": 9.684630009140243, + "learning_rate": 1.9640851648021123e-05, + "loss": 1.1307, + "step": 18795 + }, + { + "epoch": 0.23, + "grad_norm": 30.82217242567074, + "learning_rate": 1.9640696477804488e-05, + "loss": 1.5043, + "step": 18798 + }, + { + "epoch": 0.23, + "grad_norm": 6.038074018846356, + "learning_rate": 1.9640541274687626e-05, + "loss": 1.5535, + "step": 18801 + }, + { + "epoch": 0.23, + "grad_norm": 29.11440329713314, + "learning_rate": 1.9640386038671077e-05, + "loss": 1.4369, + "step": 18804 + }, + { + "epoch": 0.23, + "grad_norm": 6.181677253948035, + "learning_rate": 1.9640230769755354e-05, + "loss": 1.44, + "step": 18807 + }, + { + "epoch": 0.23, + "grad_norm": 8.358029670711346, + "learning_rate": 1.9640075467941e-05, + "loss": 1.388, + "step": 18810 + }, + { + "epoch": 0.23, + "grad_norm": 27.400364854891322, + "learning_rate": 1.9639920133228538e-05, + "loss": 1.3542, + "step": 18813 + }, + { + "epoch": 0.23, + "grad_norm": 29.965255746027903, + "learning_rate": 1.9639764765618505e-05, + "loss": 1.5852, + "step": 18816 + }, + { + "epoch": 0.23, + "grad_norm": 11.288797024263138, + "learning_rate": 1.9639609365111423e-05, + "loss": 1.589, + "step": 18819 + }, + { + "epoch": 0.23, + "grad_norm": 25.887431153996133, + "learning_rate": 1.9639453931707828e-05, + "loss": 1.6227, + "step": 18822 + }, + { + "epoch": 0.23, + "grad_norm": 8.61824684465098, + "learning_rate": 1.963929846540825e-05, + "loss": 1.7167, + "step": 18825 + }, + { + "epoch": 0.23, + "grad_norm": 36.84162973226776, + "learning_rate": 1.963914296621322e-05, + "loss": 1.356, + "step": 18828 + }, + { + "epoch": 0.23, + "grad_norm": 12.442316766737122, + "learning_rate": 1.963898743412326e-05, + "loss": 1.7315, + "step": 18831 + }, + { + "epoch": 0.23, + "grad_norm": 9.747626075038886, + "learning_rate": 1.9638831869138912e-05, + "loss": 1.8655, + "step": 18834 + }, + { + "epoch": 0.23, + "grad_norm": 60.09087477994551, + "learning_rate": 1.9638676271260705e-05, + "loss": 1.8221, + "step": 18837 + }, + { + "epoch": 0.23, + "grad_norm": 56.64887400988658, + "learning_rate": 1.9638520640489167e-05, + "loss": 1.6785, + "step": 18840 + }, + { + "epoch": 0.23, + "grad_norm": 156.79058706830202, + "learning_rate": 1.9638364976824825e-05, + "loss": 1.3409, + "step": 18843 + }, + { + "epoch": 0.23, + "grad_norm": 9.304553686442866, + "learning_rate": 1.9638209280268222e-05, + "loss": 1.8552, + "step": 18846 + }, + { + "epoch": 0.23, + "grad_norm": 19.904266302994905, + "learning_rate": 1.9638053550819877e-05, + "loss": 1.4648, + "step": 18849 + }, + { + "epoch": 0.23, + "grad_norm": 7.588346161468528, + "learning_rate": 1.963789778848033e-05, + "loss": 1.4545, + "step": 18852 + }, + { + "epoch": 0.23, + "grad_norm": 8.881761646953093, + "learning_rate": 1.9637741993250108e-05, + "loss": 1.2854, + "step": 18855 + }, + { + "epoch": 0.23, + "grad_norm": 22.834497539386167, + "learning_rate": 1.9637586165129745e-05, + "loss": 1.766, + "step": 18858 + }, + { + "epoch": 0.23, + "grad_norm": 16.491323682492006, + "learning_rate": 1.963743030411977e-05, + "loss": 1.979, + "step": 18861 + }, + { + "epoch": 0.23, + "grad_norm": 6.309234179820839, + "learning_rate": 1.9637274410220717e-05, + "loss": 1.5073, + "step": 18864 + }, + { + "epoch": 0.23, + "grad_norm": 2.6899829654892913, + "learning_rate": 1.963711848343312e-05, + "loss": 1.8361, + "step": 18867 + }, + { + "epoch": 0.23, + "grad_norm": 20.599844059386932, + "learning_rate": 1.9636962523757508e-05, + "loss": 1.7843, + "step": 18870 + }, + { + "epoch": 0.23, + "grad_norm": 100.26024015722221, + "learning_rate": 1.9636806531194413e-05, + "loss": 1.3419, + "step": 18873 + }, + { + "epoch": 0.23, + "grad_norm": 6.071884418133277, + "learning_rate": 1.963665050574437e-05, + "loss": 1.5172, + "step": 18876 + }, + { + "epoch": 0.23, + "grad_norm": 27.041665789572594, + "learning_rate": 1.9636494447407906e-05, + "loss": 1.8028, + "step": 18879 + }, + { + "epoch": 0.23, + "grad_norm": 22.9732748022258, + "learning_rate": 1.9636338356185565e-05, + "loss": 1.6454, + "step": 18882 + }, + { + "epoch": 0.23, + "grad_norm": 9.641487746621024, + "learning_rate": 1.9636182232077864e-05, + "loss": 1.2965, + "step": 18885 + }, + { + "epoch": 0.23, + "grad_norm": 6.781245652729501, + "learning_rate": 1.963602607508535e-05, + "loss": 0.979, + "step": 18888 + }, + { + "epoch": 0.23, + "grad_norm": 4.330685286435656, + "learning_rate": 1.9635869885208547e-05, + "loss": 1.6971, + "step": 18891 + }, + { + "epoch": 0.23, + "grad_norm": 14.27232899563846, + "learning_rate": 1.9635713662447993e-05, + "loss": 1.5305, + "step": 18894 + }, + { + "epoch": 0.23, + "grad_norm": 132.16683562476808, + "learning_rate": 1.9635557406804217e-05, + "loss": 1.685, + "step": 18897 + }, + { + "epoch": 0.23, + "grad_norm": 65.43090344924838, + "learning_rate": 1.9635401118277754e-05, + "loss": 1.8404, + "step": 18900 + }, + { + "epoch": 0.23, + "grad_norm": 35.200678682615845, + "learning_rate": 1.963524479686914e-05, + "loss": 1.7674, + "step": 18903 + }, + { + "epoch": 0.23, + "grad_norm": 25.320751229714773, + "learning_rate": 1.9635088442578904e-05, + "loss": 1.5402, + "step": 18906 + }, + { + "epoch": 0.23, + "grad_norm": 15.6922883439095, + "learning_rate": 1.9634932055407584e-05, + "loss": 1.7209, + "step": 18909 + }, + { + "epoch": 0.23, + "grad_norm": 97.28417167707568, + "learning_rate": 1.963477563535571e-05, + "loss": 1.8754, + "step": 18912 + }, + { + "epoch": 0.23, + "grad_norm": 15.007151996102285, + "learning_rate": 1.9634619182423814e-05, + "loss": 1.5232, + "step": 18915 + }, + { + "epoch": 0.23, + "grad_norm": 9.965098447009558, + "learning_rate": 1.9634462696612438e-05, + "loss": 1.1645, + "step": 18918 + }, + { + "epoch": 0.23, + "grad_norm": 14.496252141735846, + "learning_rate": 1.9634306177922106e-05, + "loss": 1.3875, + "step": 18921 + }, + { + "epoch": 0.23, + "grad_norm": 16.464833093807048, + "learning_rate": 1.9634149626353363e-05, + "loss": 1.7309, + "step": 18924 + }, + { + "epoch": 0.23, + "grad_norm": 15.373741546835209, + "learning_rate": 1.9633993041906734e-05, + "loss": 1.7931, + "step": 18927 + }, + { + "epoch": 0.23, + "grad_norm": 17.68304049654038, + "learning_rate": 1.963383642458276e-05, + "loss": 1.5909, + "step": 18930 + }, + { + "epoch": 0.23, + "grad_norm": 8.310097452518326, + "learning_rate": 1.963367977438197e-05, + "loss": 1.8342, + "step": 18933 + }, + { + "epoch": 0.23, + "grad_norm": 25.98395019756633, + "learning_rate": 1.96335230913049e-05, + "loss": 1.4228, + "step": 18936 + }, + { + "epoch": 0.23, + "grad_norm": 14.552019921373459, + "learning_rate": 1.9633366375352087e-05, + "loss": 1.5914, + "step": 18939 + }, + { + "epoch": 0.23, + "grad_norm": 8.918447725980386, + "learning_rate": 1.9633209626524062e-05, + "loss": 1.3675, + "step": 18942 + }, + { + "epoch": 0.23, + "grad_norm": 5.238775002514662, + "learning_rate": 1.9633052844821367e-05, + "loss": 1.2265, + "step": 18945 + }, + { + "epoch": 0.23, + "grad_norm": 13.08445140837207, + "learning_rate": 1.963289603024453e-05, + "loss": 1.8306, + "step": 18948 + }, + { + "epoch": 0.23, + "grad_norm": 25.991322365929502, + "learning_rate": 1.963273918279409e-05, + "loss": 1.2092, + "step": 18951 + }, + { + "epoch": 0.23, + "grad_norm": 15.093027121809351, + "learning_rate": 1.9632582302470576e-05, + "loss": 1.6858, + "step": 18954 + }, + { + "epoch": 0.23, + "grad_norm": 17.646711128232393, + "learning_rate": 1.9632425389274533e-05, + "loss": 1.7201, + "step": 18957 + }, + { + "epoch": 0.23, + "grad_norm": 13.361210044138016, + "learning_rate": 1.9632268443206494e-05, + "loss": 1.4836, + "step": 18960 + }, + { + "epoch": 0.23, + "grad_norm": 13.134728543995898, + "learning_rate": 1.9632111464266986e-05, + "loss": 1.391, + "step": 18963 + }, + { + "epoch": 0.23, + "grad_norm": 8.845316218318226, + "learning_rate": 1.9631954452456556e-05, + "loss": 1.1803, + "step": 18966 + }, + { + "epoch": 0.23, + "grad_norm": 22.14075827814003, + "learning_rate": 1.963179740777573e-05, + "loss": 1.672, + "step": 18969 + }, + { + "epoch": 0.23, + "grad_norm": 6.065984339387932, + "learning_rate": 1.9631640330225054e-05, + "loss": 1.2035, + "step": 18972 + }, + { + "epoch": 0.23, + "grad_norm": 3.9593164953933084, + "learning_rate": 1.9631483219805057e-05, + "loss": 1.5775, + "step": 18975 + }, + { + "epoch": 0.23, + "grad_norm": 4.458836408805282, + "learning_rate": 1.9631326076516278e-05, + "loss": 1.2643, + "step": 18978 + }, + { + "epoch": 0.23, + "grad_norm": 25.891040777374663, + "learning_rate": 1.963116890035925e-05, + "loss": 1.5898, + "step": 18981 + }, + { + "epoch": 0.23, + "grad_norm": 28.729233818698674, + "learning_rate": 1.963101169133451e-05, + "loss": 1.3297, + "step": 18984 + }, + { + "epoch": 0.23, + "grad_norm": 9.518278218176516, + "learning_rate": 1.96308544494426e-05, + "loss": 1.3047, + "step": 18987 + }, + { + "epoch": 0.23, + "grad_norm": 76.32198487896434, + "learning_rate": 1.9630697174684052e-05, + "loss": 1.4587, + "step": 18990 + }, + { + "epoch": 0.23, + "grad_norm": 4.071496733220958, + "learning_rate": 1.9630539867059403e-05, + "loss": 1.6333, + "step": 18993 + }, + { + "epoch": 0.23, + "grad_norm": 18.314273324764496, + "learning_rate": 1.963038252656919e-05, + "loss": 1.7279, + "step": 18996 + }, + { + "epoch": 0.23, + "grad_norm": 11.515327819121705, + "learning_rate": 1.963022515321395e-05, + "loss": 1.7021, + "step": 18999 + }, + { + "epoch": 0.23, + "grad_norm": 30.49032095140873, + "learning_rate": 1.9630067746994223e-05, + "loss": 1.4717, + "step": 19002 + }, + { + "epoch": 0.23, + "grad_norm": 14.111623614446309, + "learning_rate": 1.962991030791054e-05, + "loss": 1.2843, + "step": 19005 + }, + { + "epoch": 0.23, + "grad_norm": 7.353241125764797, + "learning_rate": 1.962975283596344e-05, + "loss": 1.5227, + "step": 19008 + }, + { + "epoch": 0.23, + "grad_norm": 18.883733930536888, + "learning_rate": 1.962959533115347e-05, + "loss": 1.4053, + "step": 19011 + }, + { + "epoch": 0.23, + "grad_norm": 27.0128918659232, + "learning_rate": 1.9629437793481153e-05, + "loss": 1.4199, + "step": 19014 + }, + { + "epoch": 0.23, + "grad_norm": 12.70196311569521, + "learning_rate": 1.9629280222947034e-05, + "loss": 1.2165, + "step": 19017 + }, + { + "epoch": 0.23, + "grad_norm": 3.631787271275718, + "learning_rate": 1.9629122619551656e-05, + "loss": 1.5172, + "step": 19020 + }, + { + "epoch": 0.23, + "grad_norm": 48.78549284167726, + "learning_rate": 1.9628964983295544e-05, + "loss": 1.8697, + "step": 19023 + }, + { + "epoch": 0.23, + "grad_norm": 13.103376826882894, + "learning_rate": 1.9628807314179245e-05, + "loss": 1.6294, + "step": 19026 + }, + { + "epoch": 0.23, + "grad_norm": 97.01734761558441, + "learning_rate": 1.9628649612203296e-05, + "loss": 1.4808, + "step": 19029 + }, + { + "epoch": 0.23, + "grad_norm": 40.32571991328881, + "learning_rate": 1.9628491877368235e-05, + "loss": 1.359, + "step": 19032 + }, + { + "epoch": 0.23, + "grad_norm": 9.388930464040111, + "learning_rate": 1.9628334109674595e-05, + "loss": 1.683, + "step": 19035 + }, + { + "epoch": 0.23, + "grad_norm": 70.57730433696004, + "learning_rate": 1.9628176309122923e-05, + "loss": 1.9177, + "step": 19038 + }, + { + "epoch": 0.23, + "grad_norm": 8.409306384293991, + "learning_rate": 1.962801847571375e-05, + "loss": 1.7546, + "step": 19041 + }, + { + "epoch": 0.23, + "grad_norm": 26.31148367154576, + "learning_rate": 1.962786060944762e-05, + "loss": 1.413, + "step": 19044 + }, + { + "epoch": 0.23, + "grad_norm": 63.0610624650872, + "learning_rate": 1.962770271032507e-05, + "loss": 1.7026, + "step": 19047 + }, + { + "epoch": 0.23, + "grad_norm": 9.959730837768015, + "learning_rate": 1.9627544778346637e-05, + "loss": 1.4165, + "step": 19050 + }, + { + "epoch": 0.23, + "grad_norm": 27.603376621126365, + "learning_rate": 1.9627386813512865e-05, + "loss": 1.7365, + "step": 19053 + }, + { + "epoch": 0.23, + "grad_norm": 15.694611033216697, + "learning_rate": 1.9627228815824283e-05, + "loss": 1.6132, + "step": 19056 + }, + { + "epoch": 0.23, + "grad_norm": 20.93098128779251, + "learning_rate": 1.9627070785281442e-05, + "loss": 1.2751, + "step": 19059 + }, + { + "epoch": 0.23, + "grad_norm": 6.539615050250718, + "learning_rate": 1.9626912721884876e-05, + "loss": 1.4382, + "step": 19062 + }, + { + "epoch": 0.23, + "grad_norm": 32.75999420862031, + "learning_rate": 1.9626754625635124e-05, + "loss": 1.4766, + "step": 19065 + }, + { + "epoch": 0.23, + "grad_norm": 17.510667316209744, + "learning_rate": 1.9626596496532725e-05, + "loss": 1.9998, + "step": 19068 + }, + { + "epoch": 0.23, + "grad_norm": 66.478674412996, + "learning_rate": 1.962643833457822e-05, + "loss": 1.5479, + "step": 19071 + }, + { + "epoch": 0.23, + "grad_norm": 34.25344510964594, + "learning_rate": 1.9626280139772146e-05, + "loss": 1.3404, + "step": 19074 + }, + { + "epoch": 0.23, + "grad_norm": 26.647552935733177, + "learning_rate": 1.962612191211505e-05, + "loss": 1.4258, + "step": 19077 + }, + { + "epoch": 0.23, + "grad_norm": 68.03406130971801, + "learning_rate": 1.962596365160746e-05, + "loss": 1.5798, + "step": 19080 + }, + { + "epoch": 0.23, + "grad_norm": 23.968838367261892, + "learning_rate": 1.9625805358249934e-05, + "loss": 1.9709, + "step": 19083 + }, + { + "epoch": 0.23, + "grad_norm": 28.700967623651383, + "learning_rate": 1.9625647032042995e-05, + "loss": 1.359, + "step": 19086 + }, + { + "epoch": 0.23, + "grad_norm": 10.128792909951995, + "learning_rate": 1.962548867298719e-05, + "loss": 1.4209, + "step": 19089 + }, + { + "epoch": 0.23, + "grad_norm": 18.777208927687397, + "learning_rate": 1.962533028108306e-05, + "loss": 1.6179, + "step": 19092 + }, + { + "epoch": 0.23, + "grad_norm": 60.7143595988614, + "learning_rate": 1.9625171856331143e-05, + "loss": 1.3832, + "step": 19095 + }, + { + "epoch": 0.23, + "grad_norm": 10.791323757327202, + "learning_rate": 1.9625013398731983e-05, + "loss": 1.3845, + "step": 19098 + }, + { + "epoch": 0.23, + "grad_norm": 10.633230424896764, + "learning_rate": 1.9624854908286118e-05, + "loss": 1.7106, + "step": 19101 + }, + { + "epoch": 0.23, + "grad_norm": 22.08199383044171, + "learning_rate": 1.9624696384994094e-05, + "loss": 1.6299, + "step": 19104 + }, + { + "epoch": 0.23, + "grad_norm": 5.6883093216505545, + "learning_rate": 1.9624537828856446e-05, + "loss": 1.6596, + "step": 19107 + }, + { + "epoch": 0.23, + "grad_norm": 28.11707077050808, + "learning_rate": 1.9624379239873714e-05, + "loss": 1.4322, + "step": 19110 + }, + { + "epoch": 0.23, + "grad_norm": 14.601547709442979, + "learning_rate": 1.9624220618046442e-05, + "loss": 1.7456, + "step": 19113 + }, + { + "epoch": 0.23, + "grad_norm": 11.933109469695776, + "learning_rate": 1.9624061963375176e-05, + "loss": 1.3962, + "step": 19116 + }, + { + "epoch": 0.23, + "grad_norm": 39.56160430899199, + "learning_rate": 1.9623903275860452e-05, + "loss": 1.3553, + "step": 19119 + }, + { + "epoch": 0.23, + "grad_norm": 6.344613913133521, + "learning_rate": 1.962374455550281e-05, + "loss": 1.4176, + "step": 19122 + }, + { + "epoch": 0.23, + "grad_norm": 18.008121254230755, + "learning_rate": 1.96235858023028e-05, + "loss": 1.7051, + "step": 19125 + }, + { + "epoch": 0.23, + "grad_norm": 6.058110064571027, + "learning_rate": 1.9623427016260952e-05, + "loss": 1.4982, + "step": 19128 + }, + { + "epoch": 0.23, + "grad_norm": 13.34671700434083, + "learning_rate": 1.9623268197377818e-05, + "loss": 1.4828, + "step": 19131 + }, + { + "epoch": 0.23, + "grad_norm": 16.442428323823552, + "learning_rate": 1.9623109345653932e-05, + "loss": 1.5204, + "step": 19134 + }, + { + "epoch": 0.23, + "grad_norm": 17.125655486113452, + "learning_rate": 1.9622950461089844e-05, + "loss": 1.2818, + "step": 19137 + }, + { + "epoch": 0.23, + "grad_norm": 7.588338526632312, + "learning_rate": 1.962279154368609e-05, + "loss": 1.7192, + "step": 19140 + }, + { + "epoch": 0.23, + "grad_norm": 8.029746013597595, + "learning_rate": 1.9622632593443218e-05, + "loss": 1.2491, + "step": 19143 + }, + { + "epoch": 0.23, + "grad_norm": 23.431385488204047, + "learning_rate": 1.9622473610361764e-05, + "loss": 1.5334, + "step": 19146 + }, + { + "epoch": 0.23, + "grad_norm": 10.1361896522568, + "learning_rate": 1.9622314594442273e-05, + "loss": 1.6962, + "step": 19149 + }, + { + "epoch": 0.23, + "grad_norm": 15.981124792818242, + "learning_rate": 1.9622155545685286e-05, + "loss": 1.3997, + "step": 19152 + }, + { + "epoch": 0.23, + "grad_norm": 40.384050046930795, + "learning_rate": 1.9621996464091353e-05, + "loss": 1.6528, + "step": 19155 + }, + { + "epoch": 0.23, + "grad_norm": 20.877596574307855, + "learning_rate": 1.962183734966101e-05, + "loss": 1.6495, + "step": 19158 + }, + { + "epoch": 0.23, + "grad_norm": 5.883410162490203, + "learning_rate": 1.96216782023948e-05, + "loss": 1.4135, + "step": 19161 + }, + { + "epoch": 0.23, + "grad_norm": 25.663507479052964, + "learning_rate": 1.962151902229327e-05, + "loss": 1.5793, + "step": 19164 + }, + { + "epoch": 0.23, + "grad_norm": 5.896898890801273, + "learning_rate": 1.9621359809356962e-05, + "loss": 1.566, + "step": 19167 + }, + { + "epoch": 0.23, + "grad_norm": 12.814094020338494, + "learning_rate": 1.9621200563586417e-05, + "loss": 1.531, + "step": 19170 + }, + { + "epoch": 0.23, + "grad_norm": 11.924204033302852, + "learning_rate": 1.962104128498218e-05, + "loss": 1.6538, + "step": 19173 + }, + { + "epoch": 0.23, + "grad_norm": 6.847296313664883, + "learning_rate": 1.9620881973544797e-05, + "loss": 1.3943, + "step": 19176 + }, + { + "epoch": 0.23, + "grad_norm": 8.818535281737626, + "learning_rate": 1.9620722629274807e-05, + "loss": 1.3959, + "step": 19179 + }, + { + "epoch": 0.23, + "grad_norm": 2.0157564181599947, + "learning_rate": 1.9620563252172756e-05, + "loss": 1.3765, + "step": 19182 + }, + { + "epoch": 0.23, + "grad_norm": 8.052531463830263, + "learning_rate": 1.962040384223919e-05, + "loss": 1.4904, + "step": 19185 + }, + { + "epoch": 0.23, + "grad_norm": 17.095030517139396, + "learning_rate": 1.962024439947465e-05, + "loss": 2.0023, + "step": 19188 + }, + { + "epoch": 0.23, + "grad_norm": 10.66483363621583, + "learning_rate": 1.9620084923879678e-05, + "loss": 1.5639, + "step": 19191 + }, + { + "epoch": 0.23, + "grad_norm": 11.338510772243664, + "learning_rate": 1.9619925415454824e-05, + "loss": 1.9239, + "step": 19194 + }, + { + "epoch": 0.23, + "grad_norm": 16.771432675748304, + "learning_rate": 1.961976587420063e-05, + "loss": 1.3313, + "step": 19197 + }, + { + "epoch": 0.23, + "grad_norm": 8.57843130296548, + "learning_rate": 1.9619606300117638e-05, + "loss": 1.617, + "step": 19200 + }, + { + "epoch": 0.23, + "grad_norm": 6.841812821059966, + "learning_rate": 1.9619446693206397e-05, + "loss": 1.4661, + "step": 19203 + }, + { + "epoch": 0.23, + "grad_norm": 10.369642402530692, + "learning_rate": 1.9619287053467445e-05, + "loss": 1.8732, + "step": 19206 + }, + { + "epoch": 0.23, + "grad_norm": 16.16304885441905, + "learning_rate": 1.961912738090134e-05, + "loss": 1.3822, + "step": 19209 + }, + { + "epoch": 0.23, + "grad_norm": 8.08078062736002, + "learning_rate": 1.9618967675508612e-05, + "loss": 1.6265, + "step": 19212 + }, + { + "epoch": 0.23, + "grad_norm": 17.863164118866592, + "learning_rate": 1.961880793728981e-05, + "loss": 1.5936, + "step": 19215 + }, + { + "epoch": 0.23, + "grad_norm": 6.711605623537731, + "learning_rate": 1.9618648166245483e-05, + "loss": 1.7569, + "step": 19218 + }, + { + "epoch": 0.23, + "grad_norm": 14.989981527404366, + "learning_rate": 1.9618488362376174e-05, + "loss": 1.5619, + "step": 19221 + }, + { + "epoch": 0.23, + "grad_norm": 22.15307694377302, + "learning_rate": 1.961832852568243e-05, + "loss": 1.6598, + "step": 19224 + }, + { + "epoch": 0.23, + "grad_norm": 4.64587952551099, + "learning_rate": 1.9618168656164796e-05, + "loss": 1.7651, + "step": 19227 + }, + { + "epoch": 0.23, + "grad_norm": 13.280518334679325, + "learning_rate": 1.9618008753823815e-05, + "loss": 1.6244, + "step": 19230 + }, + { + "epoch": 0.23, + "grad_norm": 10.145631489449313, + "learning_rate": 1.9617848818660035e-05, + "loss": 1.5021, + "step": 19233 + }, + { + "epoch": 0.23, + "grad_norm": 8.66386075344332, + "learning_rate": 1.9617688850674007e-05, + "loss": 1.5422, + "step": 19236 + }, + { + "epoch": 0.23, + "grad_norm": 18.71869064417847, + "learning_rate": 1.9617528849866265e-05, + "loss": 1.4563, + "step": 19239 + }, + { + "epoch": 0.23, + "grad_norm": 9.977619340561565, + "learning_rate": 1.9617368816237362e-05, + "loss": 1.7482, + "step": 19242 + }, + { + "epoch": 0.23, + "grad_norm": 12.993377025010307, + "learning_rate": 1.9617208749787843e-05, + "loss": 1.5622, + "step": 19245 + }, + { + "epoch": 0.23, + "grad_norm": 7.06543984754052, + "learning_rate": 1.9617048650518254e-05, + "loss": 1.2975, + "step": 19248 + }, + { + "epoch": 0.23, + "grad_norm": 27.5331064101771, + "learning_rate": 1.9616888518429144e-05, + "loss": 1.5415, + "step": 19251 + }, + { + "epoch": 0.23, + "grad_norm": 16.45005954037948, + "learning_rate": 1.961672835352106e-05, + "loss": 1.4587, + "step": 19254 + }, + { + "epoch": 0.23, + "grad_norm": 16.52476209545828, + "learning_rate": 1.961656815579454e-05, + "loss": 1.3798, + "step": 19257 + }, + { + "epoch": 0.23, + "grad_norm": 57.967351059900324, + "learning_rate": 1.961640792525014e-05, + "loss": 1.4152, + "step": 19260 + }, + { + "epoch": 0.23, + "grad_norm": 15.29778020611045, + "learning_rate": 1.9616247661888407e-05, + "loss": 1.5711, + "step": 19263 + }, + { + "epoch": 0.23, + "grad_norm": 5.01585958864047, + "learning_rate": 1.9616087365709878e-05, + "loss": 1.557, + "step": 19266 + }, + { + "epoch": 0.23, + "grad_norm": 3.8277045501231375, + "learning_rate": 1.961592703671511e-05, + "loss": 1.5554, + "step": 19269 + }, + { + "epoch": 0.23, + "grad_norm": 15.785067311866145, + "learning_rate": 1.9615766674904644e-05, + "loss": 1.4735, + "step": 19272 + }, + { + "epoch": 0.23, + "grad_norm": 5.925543364278003, + "learning_rate": 1.961560628027903e-05, + "loss": 1.1498, + "step": 19275 + }, + { + "epoch": 0.23, + "grad_norm": 133.49808793877975, + "learning_rate": 1.9615445852838818e-05, + "loss": 1.467, + "step": 19278 + }, + { + "epoch": 0.23, + "grad_norm": 5.7190108526606656, + "learning_rate": 1.9615285392584552e-05, + "loss": 1.5936, + "step": 19281 + }, + { + "epoch": 0.23, + "grad_norm": 9.454709108035606, + "learning_rate": 1.9615124899516783e-05, + "loss": 1.2714, + "step": 19284 + }, + { + "epoch": 0.23, + "grad_norm": 8.87570207351253, + "learning_rate": 1.9614964373636054e-05, + "loss": 1.2255, + "step": 19287 + }, + { + "epoch": 0.23, + "grad_norm": 6.954897359494108, + "learning_rate": 1.9614803814942913e-05, + "loss": 1.4079, + "step": 19290 + }, + { + "epoch": 0.23, + "grad_norm": 27.227395591058045, + "learning_rate": 1.961464322343791e-05, + "loss": 1.5742, + "step": 19293 + }, + { + "epoch": 0.23, + "grad_norm": 20.776023333413356, + "learning_rate": 1.9614482599121593e-05, + "loss": 1.7387, + "step": 19296 + }, + { + "epoch": 0.23, + "grad_norm": 44.74881273208002, + "learning_rate": 1.9614321941994513e-05, + "loss": 1.8479, + "step": 19299 + }, + { + "epoch": 0.23, + "grad_norm": 31.851000876420716, + "learning_rate": 1.9614161252057214e-05, + "loss": 1.807, + "step": 19302 + }, + { + "epoch": 0.23, + "grad_norm": 16.31824633543395, + "learning_rate": 1.9614000529310244e-05, + "loss": 1.471, + "step": 19305 + }, + { + "epoch": 0.23, + "grad_norm": 21.766820466673206, + "learning_rate": 1.9613839773754157e-05, + "loss": 1.5167, + "step": 19308 + }, + { + "epoch": 0.23, + "grad_norm": 4.057165252464501, + "learning_rate": 1.9613678985389492e-05, + "loss": 1.5932, + "step": 19311 + }, + { + "epoch": 0.23, + "grad_norm": 19.511815153034664, + "learning_rate": 1.9613518164216805e-05, + "loss": 1.5538, + "step": 19314 + }, + { + "epoch": 0.23, + "grad_norm": 13.227023970526055, + "learning_rate": 1.9613357310236645e-05, + "loss": 1.6264, + "step": 19317 + }, + { + "epoch": 0.23, + "grad_norm": 9.800472645927362, + "learning_rate": 1.9613196423449558e-05, + "loss": 1.8686, + "step": 19320 + }, + { + "epoch": 0.23, + "grad_norm": 14.748304029471633, + "learning_rate": 1.9613035503856098e-05, + "loss": 1.4947, + "step": 19323 + }, + { + "epoch": 0.23, + "grad_norm": 20.745685978615853, + "learning_rate": 1.9612874551456804e-05, + "loss": 1.5262, + "step": 19326 + }, + { + "epoch": 0.23, + "grad_norm": 18.772685443123436, + "learning_rate": 1.9612713566252237e-05, + "loss": 1.8522, + "step": 19329 + }, + { + "epoch": 0.23, + "grad_norm": 9.586348618986749, + "learning_rate": 1.9612552548242937e-05, + "loss": 1.8054, + "step": 19332 + }, + { + "epoch": 0.23, + "grad_norm": 27.767027017278593, + "learning_rate": 1.961239149742946e-05, + "loss": 1.592, + "step": 19335 + }, + { + "epoch": 0.23, + "grad_norm": 23.53717324626613, + "learning_rate": 1.9612230413812353e-05, + "loss": 1.3277, + "step": 19338 + }, + { + "epoch": 0.23, + "grad_norm": 66.08330378902683, + "learning_rate": 1.9612069297392167e-05, + "loss": 1.6261, + "step": 19341 + }, + { + "epoch": 0.23, + "grad_norm": 66.99121128292815, + "learning_rate": 1.961190814816945e-05, + "loss": 1.3136, + "step": 19344 + }, + { + "epoch": 0.23, + "grad_norm": 59.31874941566441, + "learning_rate": 1.9611746966144753e-05, + "loss": 1.492, + "step": 19347 + }, + { + "epoch": 0.23, + "grad_norm": 7.992730736017944, + "learning_rate": 1.9611585751318623e-05, + "loss": 1.2218, + "step": 19350 + }, + { + "epoch": 0.23, + "grad_norm": 11.041981314669089, + "learning_rate": 1.9611424503691617e-05, + "loss": 1.6534, + "step": 19353 + }, + { + "epoch": 0.23, + "grad_norm": 22.377847074919593, + "learning_rate": 1.961126322326428e-05, + "loss": 1.4262, + "step": 19356 + }, + { + "epoch": 0.23, + "grad_norm": 13.459855654428138, + "learning_rate": 1.9611101910037164e-05, + "loss": 1.8923, + "step": 19359 + }, + { + "epoch": 0.23, + "grad_norm": 30.53728074682336, + "learning_rate": 1.9610940564010815e-05, + "loss": 1.7066, + "step": 19362 + }, + { + "epoch": 0.23, + "grad_norm": 8.115689951769806, + "learning_rate": 1.961077918518579e-05, + "loss": 1.4255, + "step": 19365 + }, + { + "epoch": 0.23, + "grad_norm": 18.647722123036054, + "learning_rate": 1.961061777356264e-05, + "loss": 1.3306, + "step": 19368 + }, + { + "epoch": 0.23, + "grad_norm": 21.45346251798003, + "learning_rate": 1.961045632914191e-05, + "loss": 1.4182, + "step": 19371 + }, + { + "epoch": 0.23, + "grad_norm": 33.41681464793903, + "learning_rate": 1.9610294851924154e-05, + "loss": 1.4321, + "step": 19374 + }, + { + "epoch": 0.23, + "grad_norm": 6.305144668737778, + "learning_rate": 1.9610133341909925e-05, + "loss": 1.5101, + "step": 19377 + }, + { + "epoch": 0.23, + "grad_norm": 12.235550004008413, + "learning_rate": 1.960997179909977e-05, + "loss": 1.0912, + "step": 19380 + }, + { + "epoch": 0.23, + "grad_norm": 6.470058186624472, + "learning_rate": 1.9609810223494245e-05, + "loss": 1.8196, + "step": 19383 + }, + { + "epoch": 0.23, + "grad_norm": 8.74221881165649, + "learning_rate": 1.96096486150939e-05, + "loss": 1.8328, + "step": 19386 + }, + { + "epoch": 0.23, + "grad_norm": 14.579473954406803, + "learning_rate": 1.9609486973899287e-05, + "loss": 1.5859, + "step": 19389 + }, + { + "epoch": 0.23, + "grad_norm": 27.35258433336316, + "learning_rate": 1.9609325299910952e-05, + "loss": 1.438, + "step": 19392 + }, + { + "epoch": 0.23, + "grad_norm": 2.9361536032653737, + "learning_rate": 1.9609163593129455e-05, + "loss": 1.5686, + "step": 19395 + }, + { + "epoch": 0.23, + "grad_norm": 19.662086981685857, + "learning_rate": 1.960900185355534e-05, + "loss": 1.6453, + "step": 19398 + }, + { + "epoch": 0.23, + "grad_norm": 7.4965522649771525, + "learning_rate": 1.9608840081189166e-05, + "loss": 1.2584, + "step": 19401 + }, + { + "epoch": 0.23, + "grad_norm": 11.024494774269163, + "learning_rate": 1.960867827603148e-05, + "loss": 1.3812, + "step": 19404 + }, + { + "epoch": 0.23, + "grad_norm": 14.07193730945983, + "learning_rate": 1.9608516438082836e-05, + "loss": 1.8673, + "step": 19407 + }, + { + "epoch": 0.23, + "grad_norm": 6.298752588039272, + "learning_rate": 1.9608354567343787e-05, + "loss": 1.2262, + "step": 19410 + }, + { + "epoch": 0.23, + "grad_norm": 87.73679157067, + "learning_rate": 1.9608192663814884e-05, + "loss": 1.5189, + "step": 19413 + }, + { + "epoch": 0.23, + "grad_norm": 6.749529944772442, + "learning_rate": 1.960803072749668e-05, + "loss": 1.8047, + "step": 19416 + }, + { + "epoch": 0.23, + "grad_norm": 6.601402026746713, + "learning_rate": 1.960786875838973e-05, + "loss": 1.4409, + "step": 19419 + }, + { + "epoch": 0.23, + "grad_norm": 20.866170990442622, + "learning_rate": 1.9607706756494583e-05, + "loss": 1.4571, + "step": 19422 + }, + { + "epoch": 0.23, + "grad_norm": 7.6196189059803086, + "learning_rate": 1.9607544721811796e-05, + "loss": 1.8162, + "step": 19425 + }, + { + "epoch": 0.23, + "grad_norm": 19.700781252500345, + "learning_rate": 1.960738265434192e-05, + "loss": 1.7417, + "step": 19428 + }, + { + "epoch": 0.23, + "grad_norm": 4.882810396064229, + "learning_rate": 1.9607220554085504e-05, + "loss": 1.7314, + "step": 19431 + }, + { + "epoch": 0.23, + "grad_norm": 48.37888233240884, + "learning_rate": 1.9607058421043103e-05, + "loss": 1.9061, + "step": 19434 + }, + { + "epoch": 0.23, + "grad_norm": 7.759013841600456, + "learning_rate": 1.9606896255215275e-05, + "loss": 1.6921, + "step": 19437 + }, + { + "epoch": 0.23, + "grad_norm": 13.483930407420674, + "learning_rate": 1.9606734056602573e-05, + "loss": 1.3988, + "step": 19440 + }, + { + "epoch": 0.23, + "grad_norm": 23.933717678874952, + "learning_rate": 1.9606571825205545e-05, + "loss": 1.6604, + "step": 19443 + }, + { + "epoch": 0.23, + "grad_norm": 31.51789323624314, + "learning_rate": 1.9606409561024748e-05, + "loss": 1.32, + "step": 19446 + }, + { + "epoch": 0.23, + "grad_norm": 30.41677465584716, + "learning_rate": 1.9606247264060733e-05, + "loss": 1.2109, + "step": 19449 + }, + { + "epoch": 0.23, + "grad_norm": 18.639407801038132, + "learning_rate": 1.9606084934314063e-05, + "loss": 1.689, + "step": 19452 + }, + { + "epoch": 0.23, + "grad_norm": 5.504701217788295, + "learning_rate": 1.960592257178528e-05, + "loss": 1.4598, + "step": 19455 + }, + { + "epoch": 0.23, + "grad_norm": 6.489887600543254, + "learning_rate": 1.9605760176474942e-05, + "loss": 1.643, + "step": 19458 + }, + { + "epoch": 0.23, + "grad_norm": 24.900634854441424, + "learning_rate": 1.9605597748383607e-05, + "loss": 1.727, + "step": 19461 + }, + { + "epoch": 0.23, + "grad_norm": 17.867600481006935, + "learning_rate": 1.9605435287511828e-05, + "loss": 1.702, + "step": 19464 + }, + { + "epoch": 0.23, + "grad_norm": 9.385887745663009, + "learning_rate": 1.9605272793860156e-05, + "loss": 1.7537, + "step": 19467 + }, + { + "epoch": 0.23, + "grad_norm": 22.256201795799345, + "learning_rate": 1.960511026742915e-05, + "loss": 1.526, + "step": 19470 + }, + { + "epoch": 0.23, + "grad_norm": 20.650389753743465, + "learning_rate": 1.960494770821936e-05, + "loss": 1.7056, + "step": 19473 + }, + { + "epoch": 0.23, + "grad_norm": 14.85505430668518, + "learning_rate": 1.9604785116231343e-05, + "loss": 1.6985, + "step": 19476 + }, + { + "epoch": 0.23, + "grad_norm": 13.441972636721784, + "learning_rate": 1.9604622491465655e-05, + "loss": 1.5182, + "step": 19479 + }, + { + "epoch": 0.23, + "grad_norm": 27.922155916354047, + "learning_rate": 1.9604459833922852e-05, + "loss": 1.4965, + "step": 19482 + }, + { + "epoch": 0.23, + "grad_norm": 13.972260119298872, + "learning_rate": 1.9604297143603482e-05, + "loss": 1.462, + "step": 19485 + }, + { + "epoch": 0.23, + "grad_norm": 7.353373294080466, + "learning_rate": 1.960413442050811e-05, + "loss": 1.3772, + "step": 19488 + }, + { + "epoch": 0.23, + "grad_norm": 5.7542909659629, + "learning_rate": 1.960397166463729e-05, + "loss": 1.4025, + "step": 19491 + }, + { + "epoch": 0.23, + "grad_norm": 13.0880704204984, + "learning_rate": 1.9603808875991566e-05, + "loss": 1.4079, + "step": 19494 + }, + { + "epoch": 0.23, + "grad_norm": 35.86302555014193, + "learning_rate": 1.9603646054571503e-05, + "loss": 1.7371, + "step": 19497 + }, + { + "epoch": 0.23, + "grad_norm": 9.890175962362957, + "learning_rate": 1.9603483200377658e-05, + "loss": 1.2285, + "step": 19500 + }, + { + "epoch": 0.23, + "grad_norm": 35.3834032094631, + "learning_rate": 1.9603320313410583e-05, + "loss": 1.3636, + "step": 19503 + }, + { + "epoch": 0.23, + "grad_norm": 6.659549368010391, + "learning_rate": 1.9603157393670835e-05, + "loss": 1.5822, + "step": 19506 + }, + { + "epoch": 0.23, + "grad_norm": 38.13967260045271, + "learning_rate": 1.9602994441158973e-05, + "loss": 1.8517, + "step": 19509 + }, + { + "epoch": 0.23, + "grad_norm": 10.852040226666883, + "learning_rate": 1.960283145587554e-05, + "loss": 1.616, + "step": 19512 + }, + { + "epoch": 0.23, + "grad_norm": 13.083626976544346, + "learning_rate": 1.9602668437821112e-05, + "loss": 1.6954, + "step": 19515 + }, + { + "epoch": 0.23, + "grad_norm": 11.476487478009783, + "learning_rate": 1.9602505386996232e-05, + "loss": 1.4149, + "step": 19518 + }, + { + "epoch": 0.23, + "grad_norm": 102.9689255120443, + "learning_rate": 1.960234230340146e-05, + "loss": 1.5992, + "step": 19521 + }, + { + "epoch": 0.23, + "grad_norm": 6.60493095519528, + "learning_rate": 1.9602179187037353e-05, + "loss": 1.6672, + "step": 19524 + }, + { + "epoch": 0.23, + "grad_norm": 36.47115769765417, + "learning_rate": 1.9602016037904467e-05, + "loss": 1.6267, + "step": 19527 + }, + { + "epoch": 0.23, + "grad_norm": 6.0795422515956234, + "learning_rate": 1.960185285600336e-05, + "loss": 1.5456, + "step": 19530 + }, + { + "epoch": 0.23, + "grad_norm": 9.758749718713855, + "learning_rate": 1.9601689641334586e-05, + "loss": 1.5197, + "step": 19533 + }, + { + "epoch": 0.23, + "grad_norm": 6.382760592233983, + "learning_rate": 1.9601526393898706e-05, + "loss": 1.762, + "step": 19536 + }, + { + "epoch": 0.23, + "grad_norm": 21.038651191118166, + "learning_rate": 1.960136311369627e-05, + "loss": 1.6759, + "step": 19539 + }, + { + "epoch": 0.23, + "grad_norm": 5.492454362028502, + "learning_rate": 1.9601199800727843e-05, + "loss": 1.5191, + "step": 19542 + }, + { + "epoch": 0.24, + "grad_norm": 27.5516073036338, + "learning_rate": 1.960103645499398e-05, + "loss": 1.3339, + "step": 19545 + }, + { + "epoch": 0.24, + "grad_norm": 9.461239208365539, + "learning_rate": 1.960087307649524e-05, + "loss": 1.3236, + "step": 19548 + }, + { + "epoch": 0.24, + "grad_norm": 11.631153898804255, + "learning_rate": 1.9600709665232173e-05, + "loss": 1.7608, + "step": 19551 + }, + { + "epoch": 0.24, + "grad_norm": 17.7098521584346, + "learning_rate": 1.9600546221205347e-05, + "loss": 1.64, + "step": 19554 + }, + { + "epoch": 0.24, + "grad_norm": 16.30460823876473, + "learning_rate": 1.9600382744415313e-05, + "loss": 1.5152, + "step": 19557 + }, + { + "epoch": 0.24, + "grad_norm": 11.775713594430973, + "learning_rate": 1.960021923486263e-05, + "loss": 1.7506, + "step": 19560 + }, + { + "epoch": 0.24, + "grad_norm": 54.357691870197726, + "learning_rate": 1.9600055692547855e-05, + "loss": 1.5271, + "step": 19563 + }, + { + "epoch": 0.24, + "grad_norm": 8.897071292662279, + "learning_rate": 1.9599892117471554e-05, + "loss": 1.8142, + "step": 19566 + }, + { + "epoch": 0.24, + "grad_norm": 14.718769990281013, + "learning_rate": 1.9599728509634274e-05, + "loss": 1.5005, + "step": 19569 + }, + { + "epoch": 0.24, + "grad_norm": 85.27471714767795, + "learning_rate": 1.959956486903658e-05, + "loss": 1.4609, + "step": 19572 + }, + { + "epoch": 0.24, + "grad_norm": 11.915540105752472, + "learning_rate": 1.9599401195679027e-05, + "loss": 1.7405, + "step": 19575 + }, + { + "epoch": 0.24, + "grad_norm": 21.746307748274443, + "learning_rate": 1.959923748956218e-05, + "loss": 1.3403, + "step": 19578 + }, + { + "epoch": 0.24, + "grad_norm": 14.865726396897593, + "learning_rate": 1.9599073750686588e-05, + "loss": 1.4268, + "step": 19581 + }, + { + "epoch": 0.24, + "grad_norm": 35.20529410886574, + "learning_rate": 1.9598909979052815e-05, + "loss": 1.1106, + "step": 19584 + }, + { + "epoch": 0.24, + "grad_norm": 15.704631444389966, + "learning_rate": 1.9598746174661424e-05, + "loss": 1.1897, + "step": 19587 + }, + { + "epoch": 0.24, + "grad_norm": 14.666822212243318, + "learning_rate": 1.9598582337512966e-05, + "loss": 1.6908, + "step": 19590 + }, + { + "epoch": 0.24, + "grad_norm": 14.138660516088295, + "learning_rate": 1.9598418467608006e-05, + "loss": 1.3591, + "step": 19593 + }, + { + "epoch": 0.24, + "grad_norm": 28.03761004060259, + "learning_rate": 1.95982545649471e-05, + "loss": 1.6964, + "step": 19596 + }, + { + "epoch": 0.24, + "grad_norm": 4.994119423509068, + "learning_rate": 1.9598090629530808e-05, + "loss": 1.9135, + "step": 19599 + }, + { + "epoch": 0.24, + "grad_norm": 15.913007603508156, + "learning_rate": 1.959792666135969e-05, + "loss": 1.3171, + "step": 19602 + }, + { + "epoch": 0.24, + "grad_norm": 33.4986817035513, + "learning_rate": 1.9597762660434306e-05, + "loss": 1.6749, + "step": 19605 + }, + { + "epoch": 0.24, + "grad_norm": 12.430100936159834, + "learning_rate": 1.9597598626755217e-05, + "loss": 1.3715, + "step": 19608 + }, + { + "epoch": 0.24, + "grad_norm": 8.249425625941964, + "learning_rate": 1.959743456032298e-05, + "loss": 1.7795, + "step": 19611 + }, + { + "epoch": 0.24, + "grad_norm": 10.858270719072753, + "learning_rate": 1.9597270461138154e-05, + "loss": 1.4453, + "step": 19614 + }, + { + "epoch": 0.24, + "grad_norm": 73.88852104619959, + "learning_rate": 1.9597106329201302e-05, + "loss": 1.982, + "step": 19617 + }, + { + "epoch": 0.24, + "grad_norm": 20.41897232677353, + "learning_rate": 1.9596942164512982e-05, + "loss": 1.2205, + "step": 19620 + }, + { + "epoch": 0.24, + "grad_norm": 9.338355767912933, + "learning_rate": 1.9596777967073754e-05, + "loss": 1.6611, + "step": 19623 + }, + { + "epoch": 0.24, + "grad_norm": 5.369262865885381, + "learning_rate": 1.9596613736884183e-05, + "loss": 1.2794, + "step": 19626 + }, + { + "epoch": 0.24, + "grad_norm": 37.843147158309755, + "learning_rate": 1.9596449473944826e-05, + "loss": 1.6891, + "step": 19629 + }, + { + "epoch": 0.24, + "grad_norm": 7.906302866763477, + "learning_rate": 1.9596285178256244e-05, + "loss": 1.2951, + "step": 19632 + }, + { + "epoch": 0.24, + "grad_norm": 14.790507184637505, + "learning_rate": 1.9596120849818993e-05, + "loss": 1.7938, + "step": 19635 + }, + { + "epoch": 0.24, + "grad_norm": 4.140602938837488, + "learning_rate": 1.959595648863364e-05, + "loss": 1.562, + "step": 19638 + }, + { + "epoch": 0.24, + "grad_norm": 213.76886887070276, + "learning_rate": 1.9595792094700744e-05, + "loss": 1.4155, + "step": 19641 + }, + { + "epoch": 0.24, + "grad_norm": 8.691595679211035, + "learning_rate": 1.959562766802087e-05, + "loss": 1.5082, + "step": 19644 + }, + { + "epoch": 0.24, + "grad_norm": 9.880799834879546, + "learning_rate": 1.959546320859457e-05, + "loss": 1.2633, + "step": 19647 + }, + { + "epoch": 0.24, + "grad_norm": 110.07230735381799, + "learning_rate": 1.959529871642241e-05, + "loss": 1.5306, + "step": 19650 + }, + { + "epoch": 0.24, + "grad_norm": 7.431173090011578, + "learning_rate": 1.9595134191504952e-05, + "loss": 1.5754, + "step": 19653 + }, + { + "epoch": 0.24, + "grad_norm": 24.75136247571669, + "learning_rate": 1.959496963384276e-05, + "loss": 1.5699, + "step": 19656 + }, + { + "epoch": 0.24, + "grad_norm": 12.526158459838598, + "learning_rate": 1.9594805043436393e-05, + "loss": 1.6073, + "step": 19659 + }, + { + "epoch": 0.24, + "grad_norm": 26.825097164946097, + "learning_rate": 1.9594640420286408e-05, + "loss": 1.7856, + "step": 19662 + }, + { + "epoch": 0.24, + "grad_norm": 18.793421186448548, + "learning_rate": 1.9594475764393376e-05, + "loss": 1.4396, + "step": 19665 + }, + { + "epoch": 0.24, + "grad_norm": 10.025281687829912, + "learning_rate": 1.9594311075757854e-05, + "loss": 1.3051, + "step": 19668 + }, + { + "epoch": 0.24, + "grad_norm": 23.066732088126567, + "learning_rate": 1.95941463543804e-05, + "loss": 2.2045, + "step": 19671 + }, + { + "epoch": 0.24, + "grad_norm": 111.11516867774424, + "learning_rate": 1.959398160026158e-05, + "loss": 1.6493, + "step": 19674 + }, + { + "epoch": 0.24, + "grad_norm": 18.256292379250358, + "learning_rate": 1.959381681340196e-05, + "loss": 1.7223, + "step": 19677 + }, + { + "epoch": 0.24, + "grad_norm": 63.750862391703556, + "learning_rate": 1.9593651993802098e-05, + "loss": 1.6774, + "step": 19680 + }, + { + "epoch": 0.24, + "grad_norm": 11.842612432202905, + "learning_rate": 1.9593487141462558e-05, + "loss": 1.4742, + "step": 19683 + }, + { + "epoch": 0.24, + "grad_norm": 15.262299622497999, + "learning_rate": 1.95933222563839e-05, + "loss": 1.4732, + "step": 19686 + }, + { + "epoch": 0.24, + "grad_norm": 17.547808048793552, + "learning_rate": 1.9593157338566692e-05, + "loss": 1.2114, + "step": 19689 + }, + { + "epoch": 0.24, + "grad_norm": 10.917982588479187, + "learning_rate": 1.959299238801149e-05, + "loss": 1.6265, + "step": 19692 + }, + { + "epoch": 0.24, + "grad_norm": 15.688816801185041, + "learning_rate": 1.959282740471886e-05, + "loss": 1.6619, + "step": 19695 + }, + { + "epoch": 0.24, + "grad_norm": 37.62121475172553, + "learning_rate": 1.959266238868937e-05, + "loss": 1.7824, + "step": 19698 + }, + { + "epoch": 0.24, + "grad_norm": 26.01034678730537, + "learning_rate": 1.9592497339923576e-05, + "loss": 1.5137, + "step": 19701 + }, + { + "epoch": 0.24, + "grad_norm": 18.261862660977684, + "learning_rate": 1.9592332258422044e-05, + "loss": 1.4919, + "step": 19704 + }, + { + "epoch": 0.24, + "grad_norm": 27.38022160366362, + "learning_rate": 1.9592167144185335e-05, + "loss": 1.8344, + "step": 19707 + }, + { + "epoch": 0.24, + "grad_norm": 16.671672710719687, + "learning_rate": 1.9592001997214017e-05, + "loss": 1.7113, + "step": 19710 + }, + { + "epoch": 0.24, + "grad_norm": 4.46525043223896, + "learning_rate": 1.959183681750865e-05, + "loss": 1.412, + "step": 19713 + }, + { + "epoch": 0.24, + "grad_norm": 12.412291683196994, + "learning_rate": 1.9591671605069798e-05, + "loss": 1.373, + "step": 19716 + }, + { + "epoch": 0.24, + "grad_norm": 7.431380704454966, + "learning_rate": 1.9591506359898028e-05, + "loss": 1.607, + "step": 19719 + }, + { + "epoch": 0.24, + "grad_norm": 5.068890684847311, + "learning_rate": 1.9591341081993903e-05, + "loss": 1.57, + "step": 19722 + }, + { + "epoch": 0.24, + "grad_norm": 18.752485669122766, + "learning_rate": 1.9591175771357978e-05, + "loss": 1.6975, + "step": 19725 + }, + { + "epoch": 0.24, + "grad_norm": 10.000258582961761, + "learning_rate": 1.9591010427990834e-05, + "loss": 1.4712, + "step": 19728 + }, + { + "epoch": 0.24, + "grad_norm": 5.667561198852283, + "learning_rate": 1.959084505189302e-05, + "loss": 1.6086, + "step": 19731 + }, + { + "epoch": 0.24, + "grad_norm": 11.899488270847952, + "learning_rate": 1.959067964306511e-05, + "loss": 1.4193, + "step": 19734 + }, + { + "epoch": 0.24, + "grad_norm": 3.5035032840642013, + "learning_rate": 1.9590514201507664e-05, + "loss": 1.299, + "step": 19737 + }, + { + "epoch": 0.24, + "grad_norm": 19.48080555453467, + "learning_rate": 1.9590348727221247e-05, + "loss": 2.0006, + "step": 19740 + }, + { + "epoch": 0.24, + "grad_norm": 13.017512323554431, + "learning_rate": 1.9590183220206426e-05, + "loss": 1.6482, + "step": 19743 + }, + { + "epoch": 0.24, + "grad_norm": 20.659684089872535, + "learning_rate": 1.9590017680463763e-05, + "loss": 1.893, + "step": 19746 + }, + { + "epoch": 0.24, + "grad_norm": 4.414402583614812, + "learning_rate": 1.958985210799382e-05, + "loss": 1.3541, + "step": 19749 + }, + { + "epoch": 0.24, + "grad_norm": 46.507398791397954, + "learning_rate": 1.958968650279717e-05, + "loss": 1.8369, + "step": 19752 + }, + { + "epoch": 0.24, + "grad_norm": 16.39046487895789, + "learning_rate": 1.9589520864874372e-05, + "loss": 1.1133, + "step": 19755 + }, + { + "epoch": 0.24, + "grad_norm": 5.008772089246585, + "learning_rate": 1.9589355194225996e-05, + "loss": 1.7668, + "step": 19758 + }, + { + "epoch": 0.24, + "grad_norm": 43.185524577815684, + "learning_rate": 1.9589189490852603e-05, + "loss": 1.8117, + "step": 19761 + }, + { + "epoch": 0.24, + "grad_norm": 4.892351403097809, + "learning_rate": 1.9589023754754763e-05, + "loss": 1.6037, + "step": 19764 + }, + { + "epoch": 0.24, + "grad_norm": 7.509197361567238, + "learning_rate": 1.9588857985933036e-05, + "loss": 1.6991, + "step": 19767 + }, + { + "epoch": 0.24, + "grad_norm": 16.229278794103628, + "learning_rate": 1.9588692184387993e-05, + "loss": 1.6474, + "step": 19770 + }, + { + "epoch": 0.24, + "grad_norm": 16.21146354730968, + "learning_rate": 1.9588526350120193e-05, + "loss": 1.4213, + "step": 19773 + }, + { + "epoch": 0.24, + "grad_norm": 37.99619301799039, + "learning_rate": 1.958836048313021e-05, + "loss": 1.6599, + "step": 19776 + }, + { + "epoch": 0.24, + "grad_norm": 51.05283334364945, + "learning_rate": 1.9588194583418608e-05, + "loss": 1.866, + "step": 19779 + }, + { + "epoch": 0.24, + "grad_norm": 3.5417933397663015, + "learning_rate": 1.9588028650985948e-05, + "loss": 1.5393, + "step": 19782 + }, + { + "epoch": 0.24, + "grad_norm": 11.00191446108633, + "learning_rate": 1.95878626858328e-05, + "loss": 1.6266, + "step": 19785 + }, + { + "epoch": 0.24, + "grad_norm": 30.79208146492054, + "learning_rate": 1.958769668795973e-05, + "loss": 1.7821, + "step": 19788 + }, + { + "epoch": 0.24, + "grad_norm": 5.163058628558112, + "learning_rate": 1.9587530657367305e-05, + "loss": 1.1456, + "step": 19791 + }, + { + "epoch": 0.24, + "grad_norm": 51.91515648977024, + "learning_rate": 1.9587364594056095e-05, + "loss": 1.5321, + "step": 19794 + }, + { + "epoch": 0.24, + "grad_norm": 23.292769126009837, + "learning_rate": 1.958719849802666e-05, + "loss": 1.6855, + "step": 19797 + }, + { + "epoch": 0.24, + "grad_norm": 6.470438969083509, + "learning_rate": 1.958703236927957e-05, + "loss": 1.6645, + "step": 19800 + }, + { + "epoch": 0.24, + "grad_norm": 16.360756640542576, + "learning_rate": 1.958686620781539e-05, + "loss": 1.5671, + "step": 19803 + }, + { + "epoch": 0.24, + "grad_norm": 58.96237983393636, + "learning_rate": 1.9586700013634693e-05, + "loss": 1.112, + "step": 19806 + }, + { + "epoch": 0.24, + "grad_norm": 9.187234886429458, + "learning_rate": 1.958653378673804e-05, + "loss": 1.7326, + "step": 19809 + }, + { + "epoch": 0.24, + "grad_norm": 13.282943474389883, + "learning_rate": 1.9586367527125997e-05, + "loss": 1.5208, + "step": 19812 + }, + { + "epoch": 0.24, + "grad_norm": 90.1731264321877, + "learning_rate": 1.9586201234799137e-05, + "loss": 1.6781, + "step": 19815 + }, + { + "epoch": 0.24, + "grad_norm": 16.36574836418295, + "learning_rate": 1.9586034909758026e-05, + "loss": 1.5003, + "step": 19818 + }, + { + "epoch": 0.24, + "grad_norm": 18.328135873890155, + "learning_rate": 1.958586855200323e-05, + "loss": 1.4997, + "step": 19821 + }, + { + "epoch": 0.24, + "grad_norm": 38.311380086462044, + "learning_rate": 1.9585702161535316e-05, + "loss": 1.9274, + "step": 19824 + }, + { + "epoch": 0.24, + "grad_norm": 18.2736178273177, + "learning_rate": 1.9585535738354856e-05, + "loss": 1.5517, + "step": 19827 + }, + { + "epoch": 0.24, + "grad_norm": 59.69334260705879, + "learning_rate": 1.9585369282462414e-05, + "loss": 1.3272, + "step": 19830 + }, + { + "epoch": 0.24, + "grad_norm": 7.930418941348152, + "learning_rate": 1.958520279385856e-05, + "loss": 1.6766, + "step": 19833 + }, + { + "epoch": 0.24, + "grad_norm": 47.85938743559085, + "learning_rate": 1.958503627254386e-05, + "loss": 1.3669, + "step": 19836 + }, + { + "epoch": 0.24, + "grad_norm": 22.62335831429373, + "learning_rate": 1.9584869718518887e-05, + "loss": 1.7751, + "step": 19839 + }, + { + "epoch": 0.24, + "grad_norm": 9.02309874633315, + "learning_rate": 1.9584703131784202e-05, + "loss": 1.2784, + "step": 19842 + }, + { + "epoch": 0.24, + "grad_norm": 10.564159105411521, + "learning_rate": 1.958453651234038e-05, + "loss": 1.5354, + "step": 19845 + }, + { + "epoch": 0.24, + "grad_norm": 20.62816590786508, + "learning_rate": 1.9584369860187986e-05, + "loss": 1.2659, + "step": 19848 + }, + { + "epoch": 0.24, + "grad_norm": 5.716336765434213, + "learning_rate": 1.958420317532759e-05, + "loss": 1.6375, + "step": 19851 + }, + { + "epoch": 0.24, + "grad_norm": 14.071781111435342, + "learning_rate": 1.9584036457759762e-05, + "loss": 1.7417, + "step": 19854 + }, + { + "epoch": 0.24, + "grad_norm": 22.839227495376775, + "learning_rate": 1.9583869707485065e-05, + "loss": 1.6644, + "step": 19857 + }, + { + "epoch": 0.24, + "grad_norm": 39.361127382968384, + "learning_rate": 1.958370292450408e-05, + "loss": 1.487, + "step": 19860 + }, + { + "epoch": 0.24, + "grad_norm": 8.111898124438932, + "learning_rate": 1.9583536108817366e-05, + "loss": 1.5405, + "step": 19863 + }, + { + "epoch": 0.24, + "grad_norm": 15.43655774194328, + "learning_rate": 1.9583369260425492e-05, + "loss": 1.3355, + "step": 19866 + }, + { + "epoch": 0.24, + "grad_norm": 10.744283558685884, + "learning_rate": 1.9583202379329033e-05, + "loss": 1.6444, + "step": 19869 + }, + { + "epoch": 0.24, + "grad_norm": 4.341960022466538, + "learning_rate": 1.958303546552856e-05, + "loss": 1.457, + "step": 19872 + }, + { + "epoch": 0.24, + "grad_norm": 12.210524373420451, + "learning_rate": 1.9582868519024633e-05, + "loss": 1.5743, + "step": 19875 + }, + { + "epoch": 0.24, + "grad_norm": 23.574170233977448, + "learning_rate": 1.9582701539817832e-05, + "loss": 1.8433, + "step": 19878 + }, + { + "epoch": 0.24, + "grad_norm": 8.611486498529953, + "learning_rate": 1.9582534527908718e-05, + "loss": 1.248, + "step": 19881 + }, + { + "epoch": 0.24, + "grad_norm": 5.582794273219756, + "learning_rate": 1.9582367483297867e-05, + "loss": 1.6985, + "step": 19884 + }, + { + "epoch": 0.24, + "grad_norm": 5.131435418704701, + "learning_rate": 1.958220040598585e-05, + "loss": 1.3324, + "step": 19887 + }, + { + "epoch": 0.24, + "grad_norm": 7.495535193162996, + "learning_rate": 1.958203329597323e-05, + "loss": 1.2306, + "step": 19890 + }, + { + "epoch": 0.24, + "grad_norm": 8.875211755339768, + "learning_rate": 1.9581866153260586e-05, + "loss": 1.606, + "step": 19893 + }, + { + "epoch": 0.24, + "grad_norm": 6.950391394875477, + "learning_rate": 1.9581698977848482e-05, + "loss": 1.7332, + "step": 19896 + }, + { + "epoch": 0.24, + "grad_norm": 8.237771281734151, + "learning_rate": 1.958153176973749e-05, + "loss": 1.3913, + "step": 19899 + }, + { + "epoch": 0.24, + "grad_norm": 4.472931003323312, + "learning_rate": 1.9581364528928182e-05, + "loss": 1.247, + "step": 19902 + }, + { + "epoch": 0.24, + "grad_norm": 4.966276861250355, + "learning_rate": 1.958119725542113e-05, + "loss": 1.4051, + "step": 19905 + }, + { + "epoch": 0.24, + "grad_norm": 12.184497171433664, + "learning_rate": 1.95810299492169e-05, + "loss": 1.5929, + "step": 19908 + }, + { + "epoch": 0.24, + "grad_norm": 7.142318842594752, + "learning_rate": 1.958086261031607e-05, + "loss": 1.4095, + "step": 19911 + }, + { + "epoch": 0.24, + "grad_norm": 7.798329046890808, + "learning_rate": 1.95806952387192e-05, + "loss": 1.3202, + "step": 19914 + }, + { + "epoch": 0.24, + "grad_norm": 8.384688106962498, + "learning_rate": 1.958052783442687e-05, + "loss": 1.8198, + "step": 19917 + }, + { + "epoch": 0.24, + "grad_norm": 19.16236048072033, + "learning_rate": 1.9580360397439657e-05, + "loss": 1.7914, + "step": 19920 + }, + { + "epoch": 0.24, + "grad_norm": 12.410270969293503, + "learning_rate": 1.958019292775812e-05, + "loss": 1.1698, + "step": 19923 + }, + { + "epoch": 0.24, + "grad_norm": 61.211084048400245, + "learning_rate": 1.958002542538283e-05, + "loss": 1.6799, + "step": 19926 + }, + { + "epoch": 0.24, + "grad_norm": 9.122807321769557, + "learning_rate": 1.957985789031437e-05, + "loss": 1.5444, + "step": 19929 + }, + { + "epoch": 0.24, + "grad_norm": 44.3123443412327, + "learning_rate": 1.9579690322553305e-05, + "loss": 1.1923, + "step": 19932 + }, + { + "epoch": 0.24, + "grad_norm": 8.123796266313857, + "learning_rate": 1.9579522722100207e-05, + "loss": 1.553, + "step": 19935 + }, + { + "epoch": 0.24, + "grad_norm": 13.337776172223052, + "learning_rate": 1.957935508895565e-05, + "loss": 1.5195, + "step": 19938 + }, + { + "epoch": 0.24, + "grad_norm": 9.456679257997296, + "learning_rate": 1.95791874231202e-05, + "loss": 1.3671, + "step": 19941 + }, + { + "epoch": 0.24, + "grad_norm": 7.158688017782128, + "learning_rate": 1.9579019724594435e-05, + "loss": 1.4787, + "step": 19944 + }, + { + "epoch": 0.24, + "grad_norm": 32.696652461048416, + "learning_rate": 1.957885199337893e-05, + "loss": 1.6575, + "step": 19947 + }, + { + "epoch": 0.24, + "grad_norm": 17.724172580927696, + "learning_rate": 1.957868422947425e-05, + "loss": 1.5347, + "step": 19950 + }, + { + "epoch": 0.24, + "grad_norm": 17.38359254318737, + "learning_rate": 1.957851643288097e-05, + "loss": 1.3854, + "step": 19953 + }, + { + "epoch": 0.24, + "grad_norm": 21.30156294425791, + "learning_rate": 1.9578348603599666e-05, + "loss": 1.452, + "step": 19956 + }, + { + "epoch": 0.24, + "grad_norm": 8.996052504149882, + "learning_rate": 1.957818074163091e-05, + "loss": 1.5986, + "step": 19959 + }, + { + "epoch": 0.24, + "grad_norm": 20.626291798737874, + "learning_rate": 1.957801284697527e-05, + "loss": 1.8549, + "step": 19962 + }, + { + "epoch": 0.24, + "grad_norm": 11.884175411830244, + "learning_rate": 1.957784491963332e-05, + "loss": 1.664, + "step": 19965 + }, + { + "epoch": 0.24, + "grad_norm": 14.914383079226445, + "learning_rate": 1.957767695960564e-05, + "loss": 1.8251, + "step": 19968 + }, + { + "epoch": 0.24, + "grad_norm": 64.07157978768183, + "learning_rate": 1.9577508966892797e-05, + "loss": 1.1686, + "step": 19971 + }, + { + "epoch": 0.24, + "grad_norm": 16.386588486079418, + "learning_rate": 1.9577340941495365e-05, + "loss": 1.4049, + "step": 19974 + }, + { + "epoch": 0.24, + "grad_norm": 25.527930549549687, + "learning_rate": 1.957717288341392e-05, + "loss": 1.7358, + "step": 19977 + }, + { + "epoch": 0.24, + "grad_norm": 21.821240829905975, + "learning_rate": 1.957700479264903e-05, + "loss": 2.0377, + "step": 19980 + }, + { + "epoch": 0.24, + "grad_norm": 24.46513709022292, + "learning_rate": 1.9576836669201277e-05, + "loss": 1.5722, + "step": 19983 + }, + { + "epoch": 0.24, + "grad_norm": 17.8997560700288, + "learning_rate": 1.9576668513071225e-05, + "loss": 1.8, + "step": 19986 + }, + { + "epoch": 0.24, + "grad_norm": 7.6054107412607435, + "learning_rate": 1.9576500324259457e-05, + "loss": 1.6766, + "step": 19989 + }, + { + "epoch": 0.24, + "grad_norm": 33.80993142053364, + "learning_rate": 1.957633210276654e-05, + "loss": 1.4949, + "step": 19992 + }, + { + "epoch": 0.24, + "grad_norm": 37.52038793255318, + "learning_rate": 1.9576163848593053e-05, + "loss": 1.5753, + "step": 19995 + }, + { + "epoch": 0.24, + "grad_norm": 30.48338667571478, + "learning_rate": 1.9575995561739565e-05, + "loss": 1.4655, + "step": 19998 + }, + { + "epoch": 0.24, + "grad_norm": 22.642835262004695, + "learning_rate": 1.9575827242206656e-05, + "loss": 1.7893, + "step": 20001 + }, + { + "epoch": 0.24, + "grad_norm": 5.96158496374787, + "learning_rate": 1.95756588899949e-05, + "loss": 1.5728, + "step": 20004 + }, + { + "epoch": 0.24, + "grad_norm": 16.274831669296418, + "learning_rate": 1.9575490505104864e-05, + "loss": 1.4355, + "step": 20007 + }, + { + "epoch": 0.24, + "grad_norm": 15.501754437523541, + "learning_rate": 1.957532208753713e-05, + "loss": 1.2759, + "step": 20010 + }, + { + "epoch": 0.24, + "grad_norm": 20.632249885688022, + "learning_rate": 1.9575153637292272e-05, + "loss": 1.6954, + "step": 20013 + }, + { + "epoch": 0.24, + "grad_norm": 11.803449777751492, + "learning_rate": 1.9574985154370864e-05, + "loss": 1.7858, + "step": 20016 + }, + { + "epoch": 0.24, + "grad_norm": 65.3629103454451, + "learning_rate": 1.957481663877348e-05, + "loss": 1.7253, + "step": 20019 + }, + { + "epoch": 0.24, + "grad_norm": 7.609202236915475, + "learning_rate": 1.9574648090500695e-05, + "loss": 1.3559, + "step": 20022 + }, + { + "epoch": 0.24, + "grad_norm": 6.449238305463443, + "learning_rate": 1.9574479509553086e-05, + "loss": 1.5197, + "step": 20025 + }, + { + "epoch": 0.24, + "grad_norm": 20.278556532318916, + "learning_rate": 1.9574310895931225e-05, + "loss": 1.2308, + "step": 20028 + }, + { + "epoch": 0.24, + "grad_norm": 8.768740223170381, + "learning_rate": 1.957414224963569e-05, + "loss": 1.7396, + "step": 20031 + }, + { + "epoch": 0.24, + "grad_norm": 97.17856407740693, + "learning_rate": 1.9573973570667058e-05, + "loss": 1.5154, + "step": 20034 + }, + { + "epoch": 0.24, + "grad_norm": 16.38835216660089, + "learning_rate": 1.95738048590259e-05, + "loss": 1.6547, + "step": 20037 + }, + { + "epoch": 0.24, + "grad_norm": 10.891086627757279, + "learning_rate": 1.9573636114712798e-05, + "loss": 1.2281, + "step": 20040 + }, + { + "epoch": 0.24, + "grad_norm": 8.911094772718206, + "learning_rate": 1.957346733772832e-05, + "loss": 1.4627, + "step": 20043 + }, + { + "epoch": 0.24, + "grad_norm": 37.282980101288175, + "learning_rate": 1.957329852807305e-05, + "loss": 1.5538, + "step": 20046 + }, + { + "epoch": 0.24, + "grad_norm": 46.29682417078691, + "learning_rate": 1.957312968574756e-05, + "loss": 1.3267, + "step": 20049 + }, + { + "epoch": 0.24, + "grad_norm": 15.124394423581693, + "learning_rate": 1.9572960810752427e-05, + "loss": 1.3708, + "step": 20052 + }, + { + "epoch": 0.24, + "grad_norm": 14.870982557121156, + "learning_rate": 1.9572791903088224e-05, + "loss": 1.5405, + "step": 20055 + }, + { + "epoch": 0.24, + "grad_norm": 33.93044194285634, + "learning_rate": 1.9572622962755534e-05, + "loss": 1.7008, + "step": 20058 + }, + { + "epoch": 0.24, + "grad_norm": 31.767297979149575, + "learning_rate": 1.9572453989754927e-05, + "loss": 1.5155, + "step": 20061 + }, + { + "epoch": 0.24, + "grad_norm": 54.58888569871277, + "learning_rate": 1.9572284984086983e-05, + "loss": 1.7367, + "step": 20064 + }, + { + "epoch": 0.24, + "grad_norm": 21.624193061556685, + "learning_rate": 1.957211594575228e-05, + "loss": 1.5221, + "step": 20067 + }, + { + "epoch": 0.24, + "grad_norm": 4.326863625491038, + "learning_rate": 1.957194687475139e-05, + "loss": 1.2565, + "step": 20070 + }, + { + "epoch": 0.24, + "grad_norm": 25.011361710032453, + "learning_rate": 1.9571777771084895e-05, + "loss": 1.498, + "step": 20073 + }, + { + "epoch": 0.24, + "grad_norm": 9.405575119495705, + "learning_rate": 1.957160863475337e-05, + "loss": 1.4068, + "step": 20076 + }, + { + "epoch": 0.24, + "grad_norm": 31.78528351567097, + "learning_rate": 1.957143946575739e-05, + "loss": 1.9557, + "step": 20079 + }, + { + "epoch": 0.24, + "grad_norm": 27.46870714862739, + "learning_rate": 1.957127026409754e-05, + "loss": 1.3829, + "step": 20082 + }, + { + "epoch": 0.24, + "grad_norm": 28.119117467135514, + "learning_rate": 1.957110102977439e-05, + "loss": 1.733, + "step": 20085 + }, + { + "epoch": 0.24, + "grad_norm": 5.127119715757805, + "learning_rate": 1.9570931762788518e-05, + "loss": 1.689, + "step": 20088 + }, + { + "epoch": 0.24, + "grad_norm": 25.04995418267266, + "learning_rate": 1.9570762463140504e-05, + "loss": 1.7956, + "step": 20091 + }, + { + "epoch": 0.24, + "grad_norm": 19.896561095629266, + "learning_rate": 1.9570593130830926e-05, + "loss": 1.7275, + "step": 20094 + }, + { + "epoch": 0.24, + "grad_norm": 32.47798057706698, + "learning_rate": 1.9570423765860358e-05, + "loss": 1.5697, + "step": 20097 + }, + { + "epoch": 0.24, + "grad_norm": 56.4771707665531, + "learning_rate": 1.9570254368229385e-05, + "loss": 1.5673, + "step": 20100 + }, + { + "epoch": 0.24, + "grad_norm": 9.972034082390266, + "learning_rate": 1.957008493793858e-05, + "loss": 1.5347, + "step": 20103 + }, + { + "epoch": 0.24, + "grad_norm": 8.496669642350172, + "learning_rate": 1.9569915474988523e-05, + "loss": 1.4401, + "step": 20106 + }, + { + "epoch": 0.24, + "grad_norm": 70.12171303081007, + "learning_rate": 1.956974597937979e-05, + "loss": 1.5457, + "step": 20109 + }, + { + "epoch": 0.24, + "grad_norm": 24.953027290669457, + "learning_rate": 1.956957645111296e-05, + "loss": 1.4797, + "step": 20112 + }, + { + "epoch": 0.24, + "grad_norm": 30.696478513584136, + "learning_rate": 1.956940689018861e-05, + "loss": 1.6204, + "step": 20115 + }, + { + "epoch": 0.24, + "grad_norm": 10.5852122746241, + "learning_rate": 1.9569237296607326e-05, + "loss": 1.6025, + "step": 20118 + }, + { + "epoch": 0.24, + "grad_norm": 29.075836792263015, + "learning_rate": 1.9569067670369683e-05, + "loss": 1.5338, + "step": 20121 + }, + { + "epoch": 0.24, + "grad_norm": 13.66171031634615, + "learning_rate": 1.9568898011476256e-05, + "loss": 1.2828, + "step": 20124 + }, + { + "epoch": 0.24, + "grad_norm": 24.577008194960182, + "learning_rate": 1.956872831992763e-05, + "loss": 1.4691, + "step": 20127 + }, + { + "epoch": 0.24, + "grad_norm": 4.457124958215251, + "learning_rate": 1.9568558595724377e-05, + "loss": 1.7821, + "step": 20130 + }, + { + "epoch": 0.24, + "grad_norm": 202.2909514315357, + "learning_rate": 1.956838883886708e-05, + "loss": 1.9252, + "step": 20133 + }, + { + "epoch": 0.24, + "grad_norm": 6.7173804977134255, + "learning_rate": 1.956821904935632e-05, + "loss": 1.2653, + "step": 20136 + }, + { + "epoch": 0.24, + "grad_norm": 16.86484950620789, + "learning_rate": 1.9568049227192675e-05, + "loss": 1.4167, + "step": 20139 + }, + { + "epoch": 0.24, + "grad_norm": 18.46287556024926, + "learning_rate": 1.9567879372376723e-05, + "loss": 1.7596, + "step": 20142 + }, + { + "epoch": 0.24, + "grad_norm": 9.93321286618718, + "learning_rate": 1.956770948490905e-05, + "loss": 1.7373, + "step": 20145 + }, + { + "epoch": 0.24, + "grad_norm": 13.758446678580768, + "learning_rate": 1.9567539564790224e-05, + "loss": 1.5413, + "step": 20148 + }, + { + "epoch": 0.24, + "grad_norm": 4.063487507134073, + "learning_rate": 1.9567369612020834e-05, + "loss": 1.9235, + "step": 20151 + }, + { + "epoch": 0.24, + "grad_norm": 41.67220120821746, + "learning_rate": 1.9567199626601458e-05, + "loss": 1.5033, + "step": 20154 + }, + { + "epoch": 0.24, + "grad_norm": 33.89027349979518, + "learning_rate": 1.9567029608532678e-05, + "loss": 1.6448, + "step": 20157 + }, + { + "epoch": 0.24, + "grad_norm": 8.1718076812828, + "learning_rate": 1.9566859557815067e-05, + "loss": 1.2772, + "step": 20160 + }, + { + "epoch": 0.24, + "grad_norm": 20.741621158474736, + "learning_rate": 1.9566689474449215e-05, + "loss": 1.6824, + "step": 20163 + }, + { + "epoch": 0.24, + "grad_norm": 15.690096651012293, + "learning_rate": 1.9566519358435695e-05, + "loss": 1.616, + "step": 20166 + }, + { + "epoch": 0.24, + "grad_norm": 35.09832908978001, + "learning_rate": 1.9566349209775092e-05, + "loss": 1.5809, + "step": 20169 + }, + { + "epoch": 0.24, + "grad_norm": 16.05961684836766, + "learning_rate": 1.9566179028467983e-05, + "loss": 1.8255, + "step": 20172 + }, + { + "epoch": 0.24, + "grad_norm": 6.5521632279990145, + "learning_rate": 1.9566008814514953e-05, + "loss": 1.6574, + "step": 20175 + }, + { + "epoch": 0.24, + "grad_norm": 22.065244780888104, + "learning_rate": 1.956583856791658e-05, + "loss": 1.7792, + "step": 20178 + }, + { + "epoch": 0.24, + "grad_norm": 7.743160234810923, + "learning_rate": 1.9565668288673444e-05, + "loss": 1.5264, + "step": 20181 + }, + { + "epoch": 0.24, + "grad_norm": 18.951799585301046, + "learning_rate": 1.956549797678613e-05, + "loss": 1.3193, + "step": 20184 + }, + { + "epoch": 0.24, + "grad_norm": 15.47091845581035, + "learning_rate": 1.9565327632255212e-05, + "loss": 1.5141, + "step": 20187 + }, + { + "epoch": 0.24, + "grad_norm": 9.69975011640197, + "learning_rate": 1.956515725508128e-05, + "loss": 1.3888, + "step": 20190 + }, + { + "epoch": 0.24, + "grad_norm": 27.127969021315156, + "learning_rate": 1.9564986845264913e-05, + "loss": 1.4462, + "step": 20193 + }, + { + "epoch": 0.24, + "grad_norm": 29.7952438571123, + "learning_rate": 1.9564816402806687e-05, + "loss": 1.4679, + "step": 20196 + }, + { + "epoch": 0.24, + "grad_norm": 8.507310734932801, + "learning_rate": 1.9564645927707193e-05, + "loss": 1.8372, + "step": 20199 + }, + { + "epoch": 0.24, + "grad_norm": 16.577184187903164, + "learning_rate": 1.9564475419967003e-05, + "loss": 1.2227, + "step": 20202 + }, + { + "epoch": 0.24, + "grad_norm": 14.88812343365786, + "learning_rate": 1.9564304879586706e-05, + "loss": 1.5568, + "step": 20205 + }, + { + "epoch": 0.24, + "grad_norm": 65.52540815283177, + "learning_rate": 1.9564134306566883e-05, + "loss": 1.2573, + "step": 20208 + }, + { + "epoch": 0.24, + "grad_norm": 18.070334423521558, + "learning_rate": 1.9563963700908112e-05, + "loss": 1.4622, + "step": 20211 + }, + { + "epoch": 0.24, + "grad_norm": 14.827706512120058, + "learning_rate": 1.956379306261098e-05, + "loss": 1.6103, + "step": 20214 + }, + { + "epoch": 0.24, + "grad_norm": 25.226886710776014, + "learning_rate": 1.956362239167606e-05, + "loss": 1.4169, + "step": 20217 + }, + { + "epoch": 0.24, + "grad_norm": 20.76803732597723, + "learning_rate": 1.9563451688103948e-05, + "loss": 1.2166, + "step": 20220 + }, + { + "epoch": 0.24, + "grad_norm": 10.332069691651375, + "learning_rate": 1.956328095189522e-05, + "loss": 1.7139, + "step": 20223 + }, + { + "epoch": 0.24, + "grad_norm": 10.766965069549464, + "learning_rate": 1.956311018305046e-05, + "loss": 1.5087, + "step": 20226 + }, + { + "epoch": 0.24, + "grad_norm": 46.000205968979074, + "learning_rate": 1.956293938157025e-05, + "loss": 1.4886, + "step": 20229 + }, + { + "epoch": 0.24, + "grad_norm": 11.161814080767861, + "learning_rate": 1.9562768547455166e-05, + "loss": 1.4799, + "step": 20232 + }, + { + "epoch": 0.24, + "grad_norm": 16.37349673852447, + "learning_rate": 1.9562597680705804e-05, + "loss": 1.5325, + "step": 20235 + }, + { + "epoch": 0.24, + "grad_norm": 10.272903200428797, + "learning_rate": 1.9562426781322735e-05, + "loss": 1.8689, + "step": 20238 + }, + { + "epoch": 0.24, + "grad_norm": 15.114215558804437, + "learning_rate": 1.9562255849306554e-05, + "loss": 1.2262, + "step": 20241 + }, + { + "epoch": 0.24, + "grad_norm": 15.052988970733054, + "learning_rate": 1.9562084884657832e-05, + "loss": 1.2936, + "step": 20244 + }, + { + "epoch": 0.24, + "grad_norm": 10.845151019958125, + "learning_rate": 1.9561913887377163e-05, + "loss": 1.6125, + "step": 20247 + }, + { + "epoch": 0.24, + "grad_norm": 22.237919844310806, + "learning_rate": 1.9561742857465125e-05, + "loss": 1.6201, + "step": 20250 + }, + { + "epoch": 0.24, + "grad_norm": 14.955336151390252, + "learning_rate": 1.95615717949223e-05, + "loss": 1.7654, + "step": 20253 + }, + { + "epoch": 0.24, + "grad_norm": 10.31446694237672, + "learning_rate": 1.9561400699749278e-05, + "loss": 1.396, + "step": 20256 + }, + { + "epoch": 0.24, + "grad_norm": 11.913208835263314, + "learning_rate": 1.956122957194664e-05, + "loss": 1.4307, + "step": 20259 + }, + { + "epoch": 0.24, + "grad_norm": 50.75715296620737, + "learning_rate": 1.9561058411514965e-05, + "loss": 1.6138, + "step": 20262 + }, + { + "epoch": 0.24, + "grad_norm": 8.01566082738169, + "learning_rate": 1.9560887218454848e-05, + "loss": 1.6, + "step": 20265 + }, + { + "epoch": 0.24, + "grad_norm": 35.201137235442225, + "learning_rate": 1.956071599276686e-05, + "loss": 1.9856, + "step": 20268 + }, + { + "epoch": 0.24, + "grad_norm": 61.55326664368293, + "learning_rate": 1.9560544734451596e-05, + "loss": 1.5771, + "step": 20271 + }, + { + "epoch": 0.24, + "grad_norm": 11.521104388095704, + "learning_rate": 1.9560373443509635e-05, + "loss": 1.3889, + "step": 20274 + }, + { + "epoch": 0.24, + "grad_norm": 22.064005932172506, + "learning_rate": 1.956020211994156e-05, + "loss": 1.5597, + "step": 20277 + }, + { + "epoch": 0.24, + "grad_norm": 22.72209187755523, + "learning_rate": 1.9560030763747966e-05, + "loss": 1.7863, + "step": 20280 + }, + { + "epoch": 0.24, + "grad_norm": 25.522781314881623, + "learning_rate": 1.9559859374929427e-05, + "loss": 1.9482, + "step": 20283 + }, + { + "epoch": 0.24, + "grad_norm": 29.238934298754756, + "learning_rate": 1.9559687953486533e-05, + "loss": 1.4169, + "step": 20286 + }, + { + "epoch": 0.24, + "grad_norm": 19.431851021579856, + "learning_rate": 1.9559516499419864e-05, + "loss": 1.4528, + "step": 20289 + }, + { + "epoch": 0.24, + "grad_norm": 15.40918425835697, + "learning_rate": 1.955934501273001e-05, + "loss": 1.6235, + "step": 20292 + }, + { + "epoch": 0.24, + "grad_norm": 6.690436135093954, + "learning_rate": 1.9559173493417556e-05, + "loss": 1.3064, + "step": 20295 + }, + { + "epoch": 0.24, + "grad_norm": 18.137638799059573, + "learning_rate": 1.9559001941483084e-05, + "loss": 1.7795, + "step": 20298 + }, + { + "epoch": 0.24, + "grad_norm": 22.974680758439646, + "learning_rate": 1.9558830356927184e-05, + "loss": 1.8004, + "step": 20301 + }, + { + "epoch": 0.24, + "grad_norm": 25.02287557688397, + "learning_rate": 1.9558658739750436e-05, + "loss": 1.5824, + "step": 20304 + }, + { + "epoch": 0.24, + "grad_norm": 10.505921316388529, + "learning_rate": 1.9558487089953432e-05, + "loss": 1.6964, + "step": 20307 + }, + { + "epoch": 0.24, + "grad_norm": 3.4349555322787944, + "learning_rate": 1.9558315407536753e-05, + "loss": 1.7319, + "step": 20310 + }, + { + "epoch": 0.24, + "grad_norm": 5.744461351530033, + "learning_rate": 1.9558143692500984e-05, + "loss": 1.2868, + "step": 20313 + }, + { + "epoch": 0.24, + "grad_norm": 32.81577201136747, + "learning_rate": 1.9557971944846715e-05, + "loss": 1.6504, + "step": 20316 + }, + { + "epoch": 0.24, + "grad_norm": 17.50655088556224, + "learning_rate": 1.9557800164574532e-05, + "loss": 1.669, + "step": 20319 + }, + { + "epoch": 0.24, + "grad_norm": 21.303087998038723, + "learning_rate": 1.9557628351685017e-05, + "loss": 1.7217, + "step": 20322 + }, + { + "epoch": 0.24, + "grad_norm": 18.887820115698087, + "learning_rate": 1.955745650617876e-05, + "loss": 1.5839, + "step": 20325 + }, + { + "epoch": 0.24, + "grad_norm": 12.995917650259985, + "learning_rate": 1.955728462805635e-05, + "loss": 1.7008, + "step": 20328 + }, + { + "epoch": 0.24, + "grad_norm": 18.287729187544997, + "learning_rate": 1.9557112717318368e-05, + "loss": 1.5253, + "step": 20331 + }, + { + "epoch": 0.24, + "grad_norm": 12.059770068232668, + "learning_rate": 1.95569407739654e-05, + "loss": 1.5487, + "step": 20334 + }, + { + "epoch": 0.24, + "grad_norm": 3.670994795496247, + "learning_rate": 1.955676879799804e-05, + "loss": 1.4661, + "step": 20337 + }, + { + "epoch": 0.24, + "grad_norm": 11.584140702365671, + "learning_rate": 1.955659678941687e-05, + "loss": 1.2787, + "step": 20340 + }, + { + "epoch": 0.24, + "grad_norm": 6.782464203040218, + "learning_rate": 1.955642474822247e-05, + "loss": 1.716, + "step": 20343 + }, + { + "epoch": 0.24, + "grad_norm": 74.3312297203503, + "learning_rate": 1.9556252674415442e-05, + "loss": 1.8041, + "step": 20346 + }, + { + "epoch": 0.24, + "grad_norm": 3.1280968745421545, + "learning_rate": 1.9556080567996363e-05, + "loss": 1.8223, + "step": 20349 + }, + { + "epoch": 0.24, + "grad_norm": 50.750732018919834, + "learning_rate": 1.9555908428965825e-05, + "loss": 1.4549, + "step": 20352 + }, + { + "epoch": 0.24, + "grad_norm": 23.111330219083765, + "learning_rate": 1.9555736257324413e-05, + "loss": 1.4003, + "step": 20355 + }, + { + "epoch": 0.24, + "grad_norm": 8.287641686980297, + "learning_rate": 1.9555564053072714e-05, + "loss": 1.8594, + "step": 20358 + }, + { + "epoch": 0.24, + "grad_norm": 26.821452128528858, + "learning_rate": 1.955539181621132e-05, + "loss": 1.4775, + "step": 20361 + }, + { + "epoch": 0.24, + "grad_norm": 21.048450118962087, + "learning_rate": 1.955521954674081e-05, + "loss": 1.496, + "step": 20364 + }, + { + "epoch": 0.24, + "grad_norm": 18.01125455581195, + "learning_rate": 1.955504724466178e-05, + "loss": 1.762, + "step": 20367 + }, + { + "epoch": 0.24, + "grad_norm": 15.960124459024739, + "learning_rate": 1.9554874909974817e-05, + "loss": 1.3376, + "step": 20370 + }, + { + "epoch": 0.24, + "grad_norm": 43.46656840647753, + "learning_rate": 1.9554702542680507e-05, + "loss": 1.6993, + "step": 20373 + }, + { + "epoch": 0.25, + "grad_norm": 27.659814471916135, + "learning_rate": 1.9554530142779437e-05, + "loss": 1.6567, + "step": 20376 + }, + { + "epoch": 0.25, + "grad_norm": 20.417400254740084, + "learning_rate": 1.95543577102722e-05, + "loss": 1.3716, + "step": 20379 + }, + { + "epoch": 0.25, + "grad_norm": 14.683005821514046, + "learning_rate": 1.955418524515938e-05, + "loss": 1.4689, + "step": 20382 + }, + { + "epoch": 0.25, + "grad_norm": 9.828435209398833, + "learning_rate": 1.9554012747441567e-05, + "loss": 1.8148, + "step": 20385 + }, + { + "epoch": 0.25, + "grad_norm": 14.788170808187685, + "learning_rate": 1.955384021711935e-05, + "loss": 1.7259, + "step": 20388 + }, + { + "epoch": 0.25, + "grad_norm": 30.967155466330993, + "learning_rate": 1.955366765419332e-05, + "loss": 1.5044, + "step": 20391 + }, + { + "epoch": 0.25, + "grad_norm": 46.53981549080747, + "learning_rate": 1.9553495058664057e-05, + "loss": 1.9455, + "step": 20394 + }, + { + "epoch": 0.25, + "grad_norm": 25.153207004106083, + "learning_rate": 1.955332243053216e-05, + "loss": 1.2703, + "step": 20397 + }, + { + "epoch": 0.25, + "grad_norm": 21.0033083036818, + "learning_rate": 1.9553149769798218e-05, + "loss": 1.4714, + "step": 20400 + }, + { + "epoch": 0.25, + "grad_norm": 19.939300127113313, + "learning_rate": 1.9552977076462817e-05, + "loss": 1.3806, + "step": 20403 + }, + { + "epoch": 0.25, + "grad_norm": 31.699043180780997, + "learning_rate": 1.955280435052654e-05, + "loss": 1.4593, + "step": 20406 + }, + { + "epoch": 0.25, + "grad_norm": 42.47320049037031, + "learning_rate": 1.9552631591989986e-05, + "loss": 1.6755, + "step": 20409 + }, + { + "epoch": 0.25, + "grad_norm": 4.630333757584853, + "learning_rate": 1.9552458800853742e-05, + "loss": 1.1988, + "step": 20412 + }, + { + "epoch": 0.25, + "grad_norm": 27.104442536507477, + "learning_rate": 1.95522859771184e-05, + "loss": 1.5428, + "step": 20415 + }, + { + "epoch": 0.25, + "grad_norm": 8.175206501400408, + "learning_rate": 1.955211312078454e-05, + "loss": 1.6012, + "step": 20418 + }, + { + "epoch": 0.25, + "grad_norm": 30.481967923649016, + "learning_rate": 1.9551940231852763e-05, + "loss": 1.5192, + "step": 20421 + }, + { + "epoch": 0.25, + "grad_norm": 16.14106660299924, + "learning_rate": 1.9551767310323654e-05, + "loss": 1.0213, + "step": 20424 + }, + { + "epoch": 0.25, + "grad_norm": 14.911740240267768, + "learning_rate": 1.95515943561978e-05, + "loss": 1.2941, + "step": 20427 + }, + { + "epoch": 0.25, + "grad_norm": 8.23541662027079, + "learning_rate": 1.9551421369475797e-05, + "loss": 1.3827, + "step": 20430 + }, + { + "epoch": 0.25, + "grad_norm": 29.81201768568006, + "learning_rate": 1.9551248350158235e-05, + "loss": 1.8604, + "step": 20433 + }, + { + "epoch": 0.25, + "grad_norm": 58.25950687809922, + "learning_rate": 1.95510752982457e-05, + "loss": 1.4766, + "step": 20436 + }, + { + "epoch": 0.25, + "grad_norm": 29.303400297480408, + "learning_rate": 1.9550902213738788e-05, + "loss": 1.5933, + "step": 20439 + }, + { + "epoch": 0.25, + "grad_norm": 21.833677299637543, + "learning_rate": 1.955072909663809e-05, + "loss": 1.5835, + "step": 20442 + }, + { + "epoch": 0.25, + "grad_norm": 7.158831376793773, + "learning_rate": 1.9550555946944186e-05, + "loss": 1.826, + "step": 20445 + }, + { + "epoch": 0.25, + "grad_norm": 11.87136479693767, + "learning_rate": 1.9550382764657678e-05, + "loss": 1.6487, + "step": 20448 + }, + { + "epoch": 0.25, + "grad_norm": 38.24222286166807, + "learning_rate": 1.9550209549779155e-05, + "loss": 1.6418, + "step": 20451 + }, + { + "epoch": 0.25, + "grad_norm": 23.25590753246809, + "learning_rate": 1.9550036302309206e-05, + "loss": 1.4395, + "step": 20454 + }, + { + "epoch": 0.25, + "grad_norm": 39.429097711904845, + "learning_rate": 1.954986302224842e-05, + "loss": 1.339, + "step": 20457 + }, + { + "epoch": 0.25, + "grad_norm": 34.78389145079986, + "learning_rate": 1.9549689709597396e-05, + "loss": 1.4262, + "step": 20460 + }, + { + "epoch": 0.25, + "grad_norm": 74.09170246352991, + "learning_rate": 1.954951636435672e-05, + "loss": 1.4662, + "step": 20463 + }, + { + "epoch": 0.25, + "grad_norm": 5.914806294605649, + "learning_rate": 1.9549342986526982e-05, + "loss": 1.57, + "step": 20466 + }, + { + "epoch": 0.25, + "grad_norm": 21.46718911787064, + "learning_rate": 1.954916957610878e-05, + "loss": 1.6923, + "step": 20469 + }, + { + "epoch": 0.25, + "grad_norm": 10.09443083215974, + "learning_rate": 1.9548996133102694e-05, + "loss": 1.5642, + "step": 20472 + }, + { + "epoch": 0.25, + "grad_norm": 29.26041409659579, + "learning_rate": 1.954882265750933e-05, + "loss": 1.533, + "step": 20475 + }, + { + "epoch": 0.25, + "grad_norm": 32.372869218371605, + "learning_rate": 1.9548649149329275e-05, + "loss": 1.3417, + "step": 20478 + }, + { + "epoch": 0.25, + "grad_norm": 13.614256042165545, + "learning_rate": 1.954847560856312e-05, + "loss": 1.9618, + "step": 20481 + }, + { + "epoch": 0.25, + "grad_norm": 8.576433325625333, + "learning_rate": 1.9548302035211454e-05, + "loss": 1.7313, + "step": 20484 + }, + { + "epoch": 0.25, + "grad_norm": 30.526113543731064, + "learning_rate": 1.954812842927487e-05, + "loss": 1.4761, + "step": 20487 + }, + { + "epoch": 0.25, + "grad_norm": 20.11275852116998, + "learning_rate": 1.9547954790753968e-05, + "loss": 1.4701, + "step": 20490 + }, + { + "epoch": 0.25, + "grad_norm": 30.808641102220054, + "learning_rate": 1.954778111964933e-05, + "loss": 1.0501, + "step": 20493 + }, + { + "epoch": 0.25, + "grad_norm": 3.601899836945254, + "learning_rate": 1.954760741596156e-05, + "loss": 1.4413, + "step": 20496 + }, + { + "epoch": 0.25, + "grad_norm": 21.630910791887825, + "learning_rate": 1.9547433679691243e-05, + "loss": 1.8471, + "step": 20499 + }, + { + "epoch": 0.25, + "grad_norm": 17.817348568928534, + "learning_rate": 1.9547259910838973e-05, + "loss": 1.3132, + "step": 20502 + }, + { + "epoch": 0.25, + "grad_norm": 27.130330986122082, + "learning_rate": 1.9547086109405343e-05, + "loss": 1.5263, + "step": 20505 + }, + { + "epoch": 0.25, + "grad_norm": 228.9784191684936, + "learning_rate": 1.9546912275390947e-05, + "loss": 1.5635, + "step": 20508 + }, + { + "epoch": 0.25, + "grad_norm": 44.348654721385195, + "learning_rate": 1.954673840879638e-05, + "loss": 1.4818, + "step": 20511 + }, + { + "epoch": 0.25, + "grad_norm": 16.17319208478345, + "learning_rate": 1.9546564509622233e-05, + "loss": 1.6107, + "step": 20514 + }, + { + "epoch": 0.25, + "grad_norm": 73.76392292333203, + "learning_rate": 1.9546390577869098e-05, + "loss": 1.5405, + "step": 20517 + }, + { + "epoch": 0.25, + "grad_norm": 50.985111232248464, + "learning_rate": 1.9546216613537575e-05, + "loss": 1.6326, + "step": 20520 + }, + { + "epoch": 0.25, + "grad_norm": 6.266202499970813, + "learning_rate": 1.9546042616628253e-05, + "loss": 1.354, + "step": 20523 + }, + { + "epoch": 0.25, + "grad_norm": 96.04622754669207, + "learning_rate": 1.9545868587141722e-05, + "loss": 1.135, + "step": 20526 + }, + { + "epoch": 0.25, + "grad_norm": 5.766854593346703, + "learning_rate": 1.954569452507858e-05, + "loss": 1.6752, + "step": 20529 + }, + { + "epoch": 0.25, + "grad_norm": 5.80472870839077, + "learning_rate": 1.9545520430439426e-05, + "loss": 1.6259, + "step": 20532 + }, + { + "epoch": 0.25, + "grad_norm": 14.774647497422468, + "learning_rate": 1.9545346303224842e-05, + "loss": 1.3389, + "step": 20535 + }, + { + "epoch": 0.25, + "grad_norm": 20.318319733006838, + "learning_rate": 1.954517214343543e-05, + "loss": 1.8465, + "step": 20538 + }, + { + "epoch": 0.25, + "grad_norm": 11.081433847718197, + "learning_rate": 1.954499795107179e-05, + "loss": 1.419, + "step": 20541 + }, + { + "epoch": 0.25, + "grad_norm": 9.285272555454352, + "learning_rate": 1.9544823726134504e-05, + "loss": 1.4273, + "step": 20544 + }, + { + "epoch": 0.25, + "grad_norm": 26.44756237950265, + "learning_rate": 1.9544649468624176e-05, + "loss": 1.644, + "step": 20547 + }, + { + "epoch": 0.25, + "grad_norm": 8.120500309375196, + "learning_rate": 1.9544475178541397e-05, + "loss": 1.8413, + "step": 20550 + }, + { + "epoch": 0.25, + "grad_norm": 28.677480189889383, + "learning_rate": 1.954430085588676e-05, + "loss": 1.0047, + "step": 20553 + }, + { + "epoch": 0.25, + "grad_norm": 28.73981861102047, + "learning_rate": 1.9544126500660866e-05, + "loss": 1.4939, + "step": 20556 + }, + { + "epoch": 0.25, + "grad_norm": 13.459170698280081, + "learning_rate": 1.95439521128643e-05, + "loss": 1.3075, + "step": 20559 + }, + { + "epoch": 0.25, + "grad_norm": 22.88526567033132, + "learning_rate": 1.954377769249767e-05, + "loss": 1.5102, + "step": 20562 + }, + { + "epoch": 0.25, + "grad_norm": 7.490496295317321, + "learning_rate": 1.954360323956156e-05, + "loss": 1.4729, + "step": 20565 + }, + { + "epoch": 0.25, + "grad_norm": 9.177348074160019, + "learning_rate": 1.954342875405657e-05, + "loss": 1.7983, + "step": 20568 + }, + { + "epoch": 0.25, + "grad_norm": 58.70142363326042, + "learning_rate": 1.95432542359833e-05, + "loss": 1.3171, + "step": 20571 + }, + { + "epoch": 0.25, + "grad_norm": 21.193097847169515, + "learning_rate": 1.9543079685342335e-05, + "loss": 1.7713, + "step": 20574 + }, + { + "epoch": 0.25, + "grad_norm": 9.750094392689407, + "learning_rate": 1.9542905102134277e-05, + "loss": 1.5911, + "step": 20577 + }, + { + "epoch": 0.25, + "grad_norm": 6.759204308584949, + "learning_rate": 1.9542730486359726e-05, + "loss": 1.4857, + "step": 20580 + }, + { + "epoch": 0.25, + "grad_norm": 30.964094169960696, + "learning_rate": 1.9542555838019268e-05, + "loss": 1.7321, + "step": 20583 + }, + { + "epoch": 0.25, + "grad_norm": 13.562627571564787, + "learning_rate": 1.9542381157113505e-05, + "loss": 1.6854, + "step": 20586 + }, + { + "epoch": 0.25, + "grad_norm": 10.084678967601644, + "learning_rate": 1.9542206443643034e-05, + "loss": 1.4928, + "step": 20589 + }, + { + "epoch": 0.25, + "grad_norm": 8.486264941589567, + "learning_rate": 1.954203169760845e-05, + "loss": 1.4417, + "step": 20592 + }, + { + "epoch": 0.25, + "grad_norm": 15.295245080264785, + "learning_rate": 1.9541856919010344e-05, + "loss": 1.4152, + "step": 20595 + }, + { + "epoch": 0.25, + "grad_norm": 28.695800382549777, + "learning_rate": 1.954168210784932e-05, + "loss": 1.7218, + "step": 20598 + }, + { + "epoch": 0.25, + "grad_norm": 32.35498724470499, + "learning_rate": 1.9541507264125973e-05, + "loss": 1.9323, + "step": 20601 + }, + { + "epoch": 0.25, + "grad_norm": 22.179525339847935, + "learning_rate": 1.95413323878409e-05, + "loss": 1.6295, + "step": 20604 + }, + { + "epoch": 0.25, + "grad_norm": 41.49250165220426, + "learning_rate": 1.9541157478994693e-05, + "loss": 1.3926, + "step": 20607 + }, + { + "epoch": 0.25, + "grad_norm": 3.9522621927449992, + "learning_rate": 1.9540982537587955e-05, + "loss": 1.3781, + "step": 20610 + }, + { + "epoch": 0.25, + "grad_norm": 6.144292044018755, + "learning_rate": 1.9540807563621276e-05, + "loss": 1.0706, + "step": 20613 + }, + { + "epoch": 0.25, + "grad_norm": 15.330244250814646, + "learning_rate": 1.954063255709526e-05, + "loss": 1.7465, + "step": 20616 + }, + { + "epoch": 0.25, + "grad_norm": 17.93993830018255, + "learning_rate": 1.9540457518010503e-05, + "loss": 1.8444, + "step": 20619 + }, + { + "epoch": 0.25, + "grad_norm": 36.23470169256588, + "learning_rate": 1.95402824463676e-05, + "loss": 1.2312, + "step": 20622 + }, + { + "epoch": 0.25, + "grad_norm": 59.40829985000449, + "learning_rate": 1.954010734216715e-05, + "loss": 1.7413, + "step": 20625 + }, + { + "epoch": 0.25, + "grad_norm": 8.946509513110662, + "learning_rate": 1.953993220540975e-05, + "loss": 1.9692, + "step": 20628 + }, + { + "epoch": 0.25, + "grad_norm": 28.953367195288404, + "learning_rate": 1.9539757036095995e-05, + "loss": 1.7179, + "step": 20631 + }, + { + "epoch": 0.25, + "grad_norm": 36.584962375153644, + "learning_rate": 1.953958183422649e-05, + "loss": 1.872, + "step": 20634 + }, + { + "epoch": 0.25, + "grad_norm": 7.760191313564183, + "learning_rate": 1.9539406599801825e-05, + "loss": 1.5387, + "step": 20637 + }, + { + "epoch": 0.25, + "grad_norm": 14.296379892658896, + "learning_rate": 1.9539231332822602e-05, + "loss": 1.7655, + "step": 20640 + }, + { + "epoch": 0.25, + "grad_norm": 14.718620218061735, + "learning_rate": 1.9539056033289415e-05, + "loss": 1.9289, + "step": 20643 + }, + { + "epoch": 0.25, + "grad_norm": 26.994544523306818, + "learning_rate": 1.953888070120287e-05, + "loss": 1.169, + "step": 20646 + }, + { + "epoch": 0.25, + "grad_norm": 16.154566164242812, + "learning_rate": 1.953870533656356e-05, + "loss": 1.8952, + "step": 20649 + }, + { + "epoch": 0.25, + "grad_norm": 16.199017227116087, + "learning_rate": 1.9538529939372087e-05, + "loss": 1.779, + "step": 20652 + }, + { + "epoch": 0.25, + "grad_norm": 26.831274087232845, + "learning_rate": 1.953835450962904e-05, + "loss": 1.5542, + "step": 20655 + }, + { + "epoch": 0.25, + "grad_norm": 23.18528895445083, + "learning_rate": 1.953817904733503e-05, + "loss": 1.4667, + "step": 20658 + }, + { + "epoch": 0.25, + "grad_norm": 4.367542210309606, + "learning_rate": 1.9538003552490653e-05, + "loss": 1.2255, + "step": 20661 + }, + { + "epoch": 0.25, + "grad_norm": 11.375280979673512, + "learning_rate": 1.9537828025096503e-05, + "loss": 1.6545, + "step": 20664 + }, + { + "epoch": 0.25, + "grad_norm": 18.425941610941475, + "learning_rate": 1.953765246515318e-05, + "loss": 1.4529, + "step": 20667 + }, + { + "epoch": 0.25, + "grad_norm": 25.292558759681974, + "learning_rate": 1.9537476872661286e-05, + "loss": 1.3905, + "step": 20670 + }, + { + "epoch": 0.25, + "grad_norm": 9.83752937222081, + "learning_rate": 1.9537301247621418e-05, + "loss": 1.8063, + "step": 20673 + }, + { + "epoch": 0.25, + "grad_norm": 25.786152116697213, + "learning_rate": 1.9537125590034177e-05, + "loss": 1.2729, + "step": 20676 + }, + { + "epoch": 0.25, + "grad_norm": 10.57413594502658, + "learning_rate": 1.953694989990016e-05, + "loss": 1.5959, + "step": 20679 + }, + { + "epoch": 0.25, + "grad_norm": 7.498148815523775, + "learning_rate": 1.953677417721997e-05, + "loss": 1.4552, + "step": 20682 + }, + { + "epoch": 0.25, + "grad_norm": 7.819475174965232, + "learning_rate": 1.9536598421994206e-05, + "loss": 1.5578, + "step": 20685 + }, + { + "epoch": 0.25, + "grad_norm": 8.972969971077159, + "learning_rate": 1.9536422634223463e-05, + "loss": 1.7876, + "step": 20688 + }, + { + "epoch": 0.25, + "grad_norm": 4.31972739076345, + "learning_rate": 1.9536246813908345e-05, + "loss": 1.2874, + "step": 20691 + }, + { + "epoch": 0.25, + "grad_norm": 41.47410553359656, + "learning_rate": 1.9536070961049452e-05, + "loss": 1.6335, + "step": 20694 + }, + { + "epoch": 0.25, + "grad_norm": 53.741897245895025, + "learning_rate": 1.9535895075647388e-05, + "loss": 1.3549, + "step": 20697 + }, + { + "epoch": 0.25, + "grad_norm": 37.86376327537277, + "learning_rate": 1.9535719157702748e-05, + "loss": 1.5089, + "step": 20700 + }, + { + "epoch": 0.25, + "grad_norm": 8.681272513565354, + "learning_rate": 1.9535543207216128e-05, + "loss": 1.2688, + "step": 20703 + }, + { + "epoch": 0.25, + "grad_norm": 21.46130158215606, + "learning_rate": 1.9535367224188136e-05, + "loss": 2.2806, + "step": 20706 + }, + { + "epoch": 0.25, + "grad_norm": 8.186972870467162, + "learning_rate": 1.953519120861937e-05, + "loss": 1.4395, + "step": 20709 + }, + { + "epoch": 0.25, + "grad_norm": 26.285624206145883, + "learning_rate": 1.9535015160510432e-05, + "loss": 1.6096, + "step": 20712 + }, + { + "epoch": 0.25, + "grad_norm": 6.188073685125169, + "learning_rate": 1.9534839079861923e-05, + "loss": 1.6594, + "step": 20715 + }, + { + "epoch": 0.25, + "grad_norm": 6.9248284146781005, + "learning_rate": 1.9534662966674444e-05, + "loss": 1.6318, + "step": 20718 + }, + { + "epoch": 0.25, + "grad_norm": 24.294238712384235, + "learning_rate": 1.9534486820948587e-05, + "loss": 1.7443, + "step": 20721 + }, + { + "epoch": 0.25, + "grad_norm": 25.226443125808622, + "learning_rate": 1.953431064268497e-05, + "loss": 1.4458, + "step": 20724 + }, + { + "epoch": 0.25, + "grad_norm": 12.874703913234535, + "learning_rate": 1.953413443188418e-05, + "loss": 1.6821, + "step": 20727 + }, + { + "epoch": 0.25, + "grad_norm": 77.30197109899123, + "learning_rate": 1.953395818854682e-05, + "loss": 1.7508, + "step": 20730 + }, + { + "epoch": 0.25, + "grad_norm": 16.388941648756663, + "learning_rate": 1.9533781912673502e-05, + "loss": 1.5304, + "step": 20733 + }, + { + "epoch": 0.25, + "grad_norm": 24.18983133216973, + "learning_rate": 1.9533605604264816e-05, + "loss": 1.8701, + "step": 20736 + }, + { + "epoch": 0.25, + "grad_norm": 8.958536114895123, + "learning_rate": 1.9533429263321372e-05, + "loss": 1.6929, + "step": 20739 + }, + { + "epoch": 0.25, + "grad_norm": 35.793502636226705, + "learning_rate": 1.9533252889843765e-05, + "loss": 1.4645, + "step": 20742 + }, + { + "epoch": 0.25, + "grad_norm": 46.172524397876145, + "learning_rate": 1.9533076483832598e-05, + "loss": 1.7873, + "step": 20745 + }, + { + "epoch": 0.25, + "grad_norm": 32.805057641776656, + "learning_rate": 1.9532900045288477e-05, + "loss": 1.6287, + "step": 20748 + }, + { + "epoch": 0.25, + "grad_norm": 14.186554981507236, + "learning_rate": 1.9532723574212e-05, + "loss": 1.4538, + "step": 20751 + }, + { + "epoch": 0.25, + "grad_norm": 14.650440908597007, + "learning_rate": 1.9532547070603772e-05, + "loss": 1.4712, + "step": 20754 + }, + { + "epoch": 0.25, + "grad_norm": 24.520916156154946, + "learning_rate": 1.9532370534464397e-05, + "loss": 1.5858, + "step": 20757 + }, + { + "epoch": 0.25, + "grad_norm": 41.497747361926585, + "learning_rate": 1.9532193965794475e-05, + "loss": 1.3127, + "step": 20760 + }, + { + "epoch": 0.25, + "grad_norm": 18.947932900239785, + "learning_rate": 1.9532017364594607e-05, + "loss": 1.9196, + "step": 20763 + }, + { + "epoch": 0.25, + "grad_norm": 22.04845507146954, + "learning_rate": 1.9531840730865395e-05, + "loss": 1.5419, + "step": 20766 + }, + { + "epoch": 0.25, + "grad_norm": 9.728257434117003, + "learning_rate": 1.9531664064607444e-05, + "loss": 1.6438, + "step": 20769 + }, + { + "epoch": 0.25, + "grad_norm": 3.613284516483109, + "learning_rate": 1.953148736582136e-05, + "loss": 1.7576, + "step": 20772 + }, + { + "epoch": 0.25, + "grad_norm": 13.27311185637416, + "learning_rate": 1.953131063450774e-05, + "loss": 1.2574, + "step": 20775 + }, + { + "epoch": 0.25, + "grad_norm": 14.914010179597955, + "learning_rate": 1.9531133870667193e-05, + "loss": 1.7668, + "step": 20778 + }, + { + "epoch": 0.25, + "grad_norm": 33.42875041388492, + "learning_rate": 1.9530957074300316e-05, + "loss": 2.1532, + "step": 20781 + }, + { + "epoch": 0.25, + "grad_norm": 21.694870430390228, + "learning_rate": 1.953078024540772e-05, + "loss": 1.9594, + "step": 20784 + }, + { + "epoch": 0.25, + "grad_norm": 6.356071876654072, + "learning_rate": 1.953060338399e-05, + "loss": 1.197, + "step": 20787 + }, + { + "epoch": 0.25, + "grad_norm": 23.9948969485163, + "learning_rate": 1.9530426490047764e-05, + "loss": 1.7178, + "step": 20790 + }, + { + "epoch": 0.25, + "grad_norm": 34.995030045289575, + "learning_rate": 1.9530249563581614e-05, + "loss": 1.5555, + "step": 20793 + }, + { + "epoch": 0.25, + "grad_norm": 7.766487138039959, + "learning_rate": 1.953007260459216e-05, + "loss": 1.7808, + "step": 20796 + }, + { + "epoch": 0.25, + "grad_norm": 10.051520796431717, + "learning_rate": 1.9529895613079995e-05, + "loss": 1.6004, + "step": 20799 + }, + { + "epoch": 0.25, + "grad_norm": 19.10987446045994, + "learning_rate": 1.9529718589045732e-05, + "loss": 1.5237, + "step": 20802 + }, + { + "epoch": 0.25, + "grad_norm": 3.8688229858301235, + "learning_rate": 1.952954153248997e-05, + "loss": 1.3249, + "step": 20805 + }, + { + "epoch": 0.25, + "grad_norm": 11.736975737527635, + "learning_rate": 1.952936444341332e-05, + "loss": 1.5171, + "step": 20808 + }, + { + "epoch": 0.25, + "grad_norm": 10.896432233545626, + "learning_rate": 1.9529187321816376e-05, + "loss": 1.1781, + "step": 20811 + }, + { + "epoch": 0.25, + "grad_norm": 9.213019420766546, + "learning_rate": 1.952901016769975e-05, + "loss": 1.5127, + "step": 20814 + }, + { + "epoch": 0.25, + "grad_norm": 7.065067060761002, + "learning_rate": 1.952883298106405e-05, + "loss": 1.611, + "step": 20817 + }, + { + "epoch": 0.25, + "grad_norm": 19.27042641791431, + "learning_rate": 1.9528655761909868e-05, + "loss": 1.3966, + "step": 20820 + }, + { + "epoch": 0.25, + "grad_norm": 20.847920914299042, + "learning_rate": 1.952847851023782e-05, + "loss": 1.5833, + "step": 20823 + }, + { + "epoch": 0.25, + "grad_norm": 14.385030481543614, + "learning_rate": 1.9528301226048506e-05, + "loss": 1.716, + "step": 20826 + }, + { + "epoch": 0.25, + "grad_norm": 17.162778133806196, + "learning_rate": 1.952812390934253e-05, + "loss": 1.4719, + "step": 20829 + }, + { + "epoch": 0.25, + "grad_norm": 11.34659616728471, + "learning_rate": 1.95279465601205e-05, + "loss": 1.6415, + "step": 20832 + }, + { + "epoch": 0.25, + "grad_norm": 6.546470356480042, + "learning_rate": 1.952776917838302e-05, + "loss": 1.3228, + "step": 20835 + }, + { + "epoch": 0.25, + "grad_norm": 13.570470903748266, + "learning_rate": 1.95275917641307e-05, + "loss": 1.4331, + "step": 20838 + }, + { + "epoch": 0.25, + "grad_norm": 7.249728820037217, + "learning_rate": 1.9527414317364137e-05, + "loss": 1.6489, + "step": 20841 + }, + { + "epoch": 0.25, + "grad_norm": 22.123161646862112, + "learning_rate": 1.9527236838083943e-05, + "loss": 1.3318, + "step": 20844 + }, + { + "epoch": 0.25, + "grad_norm": 7.049097163293909, + "learning_rate": 1.9527059326290718e-05, + "loss": 1.7144, + "step": 20847 + }, + { + "epoch": 0.25, + "grad_norm": 11.082510688730464, + "learning_rate": 1.9526881781985075e-05, + "loss": 1.3301, + "step": 20850 + }, + { + "epoch": 0.25, + "grad_norm": 61.088454269492175, + "learning_rate": 1.9526704205167614e-05, + "loss": 1.483, + "step": 20853 + }, + { + "epoch": 0.25, + "grad_norm": 19.977146371576406, + "learning_rate": 1.952652659583894e-05, + "loss": 1.6078, + "step": 20856 + }, + { + "epoch": 0.25, + "grad_norm": 22.50803627075859, + "learning_rate": 1.9526348953999667e-05, + "loss": 1.3605, + "step": 20859 + }, + { + "epoch": 0.25, + "grad_norm": 7.229312131800825, + "learning_rate": 1.9526171279650398e-05, + "loss": 1.5208, + "step": 20862 + }, + { + "epoch": 0.25, + "grad_norm": 3.8783350755183643, + "learning_rate": 1.9525993572791733e-05, + "loss": 2.0268, + "step": 20865 + }, + { + "epoch": 0.25, + "grad_norm": 23.181626532776445, + "learning_rate": 1.9525815833424282e-05, + "loss": 1.4591, + "step": 20868 + }, + { + "epoch": 0.25, + "grad_norm": 9.885503719934446, + "learning_rate": 1.952563806154866e-05, + "loss": 1.4787, + "step": 20871 + }, + { + "epoch": 0.25, + "grad_norm": 3.7706434925356, + "learning_rate": 1.952546025716546e-05, + "loss": 1.647, + "step": 20874 + }, + { + "epoch": 0.25, + "grad_norm": 10.594106194685862, + "learning_rate": 1.9525282420275297e-05, + "loss": 1.5031, + "step": 20877 + }, + { + "epoch": 0.25, + "grad_norm": 6.651804063098718, + "learning_rate": 1.9525104550878775e-05, + "loss": 1.5424, + "step": 20880 + }, + { + "epoch": 0.25, + "grad_norm": 23.5180928195654, + "learning_rate": 1.9524926648976503e-05, + "loss": 1.601, + "step": 20883 + }, + { + "epoch": 0.25, + "grad_norm": 5.7001564542711085, + "learning_rate": 1.9524748714569087e-05, + "loss": 2.0129, + "step": 20886 + }, + { + "epoch": 0.25, + "grad_norm": 16.894018989588034, + "learning_rate": 1.9524570747657135e-05, + "loss": 1.3237, + "step": 20889 + }, + { + "epoch": 0.25, + "grad_norm": 14.286788883855213, + "learning_rate": 1.9524392748241255e-05, + "loss": 1.6151, + "step": 20892 + }, + { + "epoch": 0.25, + "grad_norm": 3.6782412067737105, + "learning_rate": 1.9524214716322052e-05, + "loss": 1.4283, + "step": 20895 + }, + { + "epoch": 0.25, + "grad_norm": 29.855393426733986, + "learning_rate": 1.9524036651900134e-05, + "loss": 1.6432, + "step": 20898 + }, + { + "epoch": 0.25, + "grad_norm": 27.41473461713297, + "learning_rate": 1.952385855497611e-05, + "loss": 1.3993, + "step": 20901 + }, + { + "epoch": 0.25, + "grad_norm": 8.904734208629831, + "learning_rate": 1.9523680425550585e-05, + "loss": 1.6399, + "step": 20904 + }, + { + "epoch": 0.25, + "grad_norm": 10.65202429272528, + "learning_rate": 1.9523502263624174e-05, + "loss": 1.2966, + "step": 20907 + }, + { + "epoch": 0.25, + "grad_norm": 8.528709992275104, + "learning_rate": 1.9523324069197478e-05, + "loss": 1.4489, + "step": 20910 + }, + { + "epoch": 0.25, + "grad_norm": 25.73013019867289, + "learning_rate": 1.9523145842271107e-05, + "loss": 1.8062, + "step": 20913 + }, + { + "epoch": 0.25, + "grad_norm": 36.58888212960525, + "learning_rate": 1.952296758284567e-05, + "loss": 1.6525, + "step": 20916 + }, + { + "epoch": 0.25, + "grad_norm": 36.33193575225836, + "learning_rate": 1.9522789290921772e-05, + "loss": 1.3011, + "step": 20919 + }, + { + "epoch": 0.25, + "grad_norm": 9.85191122996303, + "learning_rate": 1.952261096650003e-05, + "loss": 1.4895, + "step": 20922 + }, + { + "epoch": 0.25, + "grad_norm": 7.401139713200372, + "learning_rate": 1.952243260958104e-05, + "loss": 1.6598, + "step": 20925 + }, + { + "epoch": 0.25, + "grad_norm": 7.225418395423611, + "learning_rate": 1.952225422016542e-05, + "loss": 1.5094, + "step": 20928 + }, + { + "epoch": 0.25, + "grad_norm": 27.455208415667737, + "learning_rate": 1.9522075798253776e-05, + "loss": 1.4296, + "step": 20931 + }, + { + "epoch": 0.25, + "grad_norm": 7.44947925328843, + "learning_rate": 1.952189734384672e-05, + "loss": 1.522, + "step": 20934 + }, + { + "epoch": 0.25, + "grad_norm": 23.459398611638566, + "learning_rate": 1.9521718856944855e-05, + "loss": 1.4114, + "step": 20937 + }, + { + "epoch": 0.25, + "grad_norm": 21.126185359149908, + "learning_rate": 1.9521540337548798e-05, + "loss": 1.8694, + "step": 20940 + }, + { + "epoch": 0.25, + "grad_norm": 123.33467815706669, + "learning_rate": 1.9521361785659148e-05, + "loss": 1.6134, + "step": 20943 + }, + { + "epoch": 0.25, + "grad_norm": 72.30132268422246, + "learning_rate": 1.952118320127652e-05, + "loss": 1.9658, + "step": 20946 + }, + { + "epoch": 0.25, + "grad_norm": 61.50907067609504, + "learning_rate": 1.9521004584401525e-05, + "loss": 1.842, + "step": 20949 + }, + { + "epoch": 0.25, + "grad_norm": 20.91800917185135, + "learning_rate": 1.952082593503477e-05, + "loss": 1.8661, + "step": 20952 + }, + { + "epoch": 0.25, + "grad_norm": 103.2071200960869, + "learning_rate": 1.9520647253176867e-05, + "loss": 1.2678, + "step": 20955 + }, + { + "epoch": 0.25, + "grad_norm": 13.649961295001802, + "learning_rate": 1.9520468538828425e-05, + "loss": 1.4014, + "step": 20958 + }, + { + "epoch": 0.25, + "grad_norm": 4.237486882519325, + "learning_rate": 1.952028979199005e-05, + "loss": 1.4703, + "step": 20961 + }, + { + "epoch": 0.25, + "grad_norm": 14.165068548417535, + "learning_rate": 1.9520111012662356e-05, + "loss": 1.8595, + "step": 20964 + }, + { + "epoch": 0.25, + "grad_norm": 13.229046046302608, + "learning_rate": 1.9519932200845956e-05, + "loss": 1.1604, + "step": 20967 + }, + { + "epoch": 0.25, + "grad_norm": 10.894934154577562, + "learning_rate": 1.951975335654145e-05, + "loss": 1.4922, + "step": 20970 + }, + { + "epoch": 0.25, + "grad_norm": 29.189844959795554, + "learning_rate": 1.951957447974946e-05, + "loss": 1.8469, + "step": 20973 + }, + { + "epoch": 0.25, + "grad_norm": 17.642843731732796, + "learning_rate": 1.9519395570470586e-05, + "loss": 1.7558, + "step": 20976 + }, + { + "epoch": 0.25, + "grad_norm": 5.570391852726865, + "learning_rate": 1.951921662870545e-05, + "loss": 1.8631, + "step": 20979 + }, + { + "epoch": 0.25, + "grad_norm": 37.670414628557516, + "learning_rate": 1.9519037654454652e-05, + "loss": 1.6106, + "step": 20982 + }, + { + "epoch": 0.25, + "grad_norm": 47.34689279279558, + "learning_rate": 1.9518858647718813e-05, + "loss": 1.5541, + "step": 20985 + }, + { + "epoch": 0.25, + "grad_norm": 16.324755514402632, + "learning_rate": 1.951867960849853e-05, + "loss": 2.1243, + "step": 20988 + }, + { + "epoch": 0.25, + "grad_norm": 10.956824933069049, + "learning_rate": 1.9518500536794428e-05, + "loss": 1.3011, + "step": 20991 + }, + { + "epoch": 0.25, + "grad_norm": 16.739498650623286, + "learning_rate": 1.951832143260711e-05, + "loss": 1.2319, + "step": 20994 + }, + { + "epoch": 0.25, + "grad_norm": 9.342142881829568, + "learning_rate": 1.951814229593719e-05, + "loss": 1.4513, + "step": 20997 + }, + { + "epoch": 0.25, + "grad_norm": 6.052089684672171, + "learning_rate": 1.951796312678528e-05, + "loss": 1.3661, + "step": 21000 + }, + { + "epoch": 0.25, + "grad_norm": 12.704464648990951, + "learning_rate": 1.951778392515199e-05, + "loss": 2.1126, + "step": 21003 + }, + { + "epoch": 0.25, + "grad_norm": 16.099845732074826, + "learning_rate": 1.9517604691037927e-05, + "loss": 1.6615, + "step": 21006 + }, + { + "epoch": 0.25, + "grad_norm": 5.974882225520488, + "learning_rate": 1.9517425424443713e-05, + "loss": 1.7341, + "step": 21009 + }, + { + "epoch": 0.25, + "grad_norm": 4.831695335748491, + "learning_rate": 1.9517246125369952e-05, + "loss": 1.465, + "step": 21012 + }, + { + "epoch": 0.25, + "grad_norm": 22.962697219121875, + "learning_rate": 1.9517066793817256e-05, + "loss": 1.5645, + "step": 21015 + }, + { + "epoch": 0.25, + "grad_norm": 12.330854711597347, + "learning_rate": 1.9516887429786243e-05, + "loss": 1.1448, + "step": 21018 + }, + { + "epoch": 0.25, + "grad_norm": 13.044010778725339, + "learning_rate": 1.951670803327752e-05, + "loss": 1.5205, + "step": 21021 + }, + { + "epoch": 0.25, + "grad_norm": 15.58406353529975, + "learning_rate": 1.9516528604291697e-05, + "loss": 1.4774, + "step": 21024 + }, + { + "epoch": 0.25, + "grad_norm": 9.052286430877539, + "learning_rate": 1.9516349142829395e-05, + "loss": 1.5716, + "step": 21027 + }, + { + "epoch": 0.25, + "grad_norm": 23.822815833403588, + "learning_rate": 1.9516169648891215e-05, + "loss": 1.4204, + "step": 21030 + }, + { + "epoch": 0.25, + "grad_norm": 17.458279638271648, + "learning_rate": 1.9515990122477783e-05, + "loss": 1.4526, + "step": 21033 + }, + { + "epoch": 0.25, + "grad_norm": 17.99796593010504, + "learning_rate": 1.95158105635897e-05, + "loss": 1.6239, + "step": 21036 + }, + { + "epoch": 0.25, + "grad_norm": 10.351776045955452, + "learning_rate": 1.951563097222758e-05, + "loss": 1.3824, + "step": 21039 + }, + { + "epoch": 0.25, + "grad_norm": 6.094018136626425, + "learning_rate": 1.9515451348392047e-05, + "loss": 1.5842, + "step": 21042 + }, + { + "epoch": 0.25, + "grad_norm": 20.15190501579731, + "learning_rate": 1.9515271692083702e-05, + "loss": 1.9391, + "step": 21045 + }, + { + "epoch": 0.25, + "grad_norm": 18.171463674888773, + "learning_rate": 1.951509200330316e-05, + "loss": 1.4894, + "step": 21048 + }, + { + "epoch": 0.25, + "grad_norm": 12.705100908953568, + "learning_rate": 1.9514912282051036e-05, + "loss": 1.5107, + "step": 21051 + }, + { + "epoch": 0.25, + "grad_norm": 7.8403566715402615, + "learning_rate": 1.951473252832795e-05, + "loss": 1.4007, + "step": 21054 + }, + { + "epoch": 0.25, + "grad_norm": 21.312431605247276, + "learning_rate": 1.9514552742134506e-05, + "loss": 1.3621, + "step": 21057 + }, + { + "epoch": 0.25, + "grad_norm": 7.876607196517502, + "learning_rate": 1.951437292347132e-05, + "loss": 1.4891, + "step": 21060 + }, + { + "epoch": 0.25, + "grad_norm": 5.437840601403083, + "learning_rate": 1.9514193072339003e-05, + "loss": 1.6559, + "step": 21063 + }, + { + "epoch": 0.25, + "grad_norm": 5.310859023400344, + "learning_rate": 1.9514013188738174e-05, + "loss": 1.5856, + "step": 21066 + }, + { + "epoch": 0.25, + "grad_norm": 14.3075865457402, + "learning_rate": 1.9513833272669446e-05, + "loss": 1.3024, + "step": 21069 + }, + { + "epoch": 0.25, + "grad_norm": 17.264690565454007, + "learning_rate": 1.951365332413343e-05, + "loss": 1.8181, + "step": 21072 + }, + { + "epoch": 0.25, + "grad_norm": 31.952736621697028, + "learning_rate": 1.9513473343130745e-05, + "loss": 1.8312, + "step": 21075 + }, + { + "epoch": 0.25, + "grad_norm": 10.683602900940091, + "learning_rate": 1.9513293329662004e-05, + "loss": 1.4563, + "step": 21078 + }, + { + "epoch": 0.25, + "grad_norm": 39.92162079129305, + "learning_rate": 1.9513113283727814e-05, + "loss": 1.3293, + "step": 21081 + }, + { + "epoch": 0.25, + "grad_norm": 24.250989618843374, + "learning_rate": 1.9512933205328797e-05, + "loss": 1.6362, + "step": 21084 + }, + { + "epoch": 0.25, + "grad_norm": 19.920895279047876, + "learning_rate": 1.9512753094465565e-05, + "loss": 1.3348, + "step": 21087 + }, + { + "epoch": 0.25, + "grad_norm": 9.492077849416225, + "learning_rate": 1.9512572951138735e-05, + "loss": 1.5786, + "step": 21090 + }, + { + "epoch": 0.25, + "grad_norm": 6.878819775656877, + "learning_rate": 1.951239277534892e-05, + "loss": 2.0607, + "step": 21093 + }, + { + "epoch": 0.25, + "grad_norm": 16.733847374673676, + "learning_rate": 1.9512212567096733e-05, + "loss": 1.4225, + "step": 21096 + }, + { + "epoch": 0.25, + "grad_norm": 6.294213673073671, + "learning_rate": 1.9512032326382794e-05, + "loss": 1.6622, + "step": 21099 + }, + { + "epoch": 0.25, + "grad_norm": 23.56793778393052, + "learning_rate": 1.951185205320771e-05, + "loss": 1.528, + "step": 21102 + }, + { + "epoch": 0.25, + "grad_norm": 4.111222795995441, + "learning_rate": 1.9511671747572105e-05, + "loss": 1.3218, + "step": 21105 + }, + { + "epoch": 0.25, + "grad_norm": 8.13404948567256, + "learning_rate": 1.9511491409476587e-05, + "loss": 1.5604, + "step": 21108 + }, + { + "epoch": 0.25, + "grad_norm": 14.193703557193267, + "learning_rate": 1.9511311038921778e-05, + "loss": 1.4566, + "step": 21111 + }, + { + "epoch": 0.25, + "grad_norm": 11.587914701031, + "learning_rate": 1.951113063590829e-05, + "loss": 1.585, + "step": 21114 + }, + { + "epoch": 0.25, + "grad_norm": 37.799313406618175, + "learning_rate": 1.9510950200436737e-05, + "loss": 1.6814, + "step": 21117 + }, + { + "epoch": 0.25, + "grad_norm": 10.784984004850601, + "learning_rate": 1.9510769732507738e-05, + "loss": 1.8933, + "step": 21120 + }, + { + "epoch": 0.25, + "grad_norm": 13.410529951436482, + "learning_rate": 1.951058923212191e-05, + "loss": 1.7754, + "step": 21123 + }, + { + "epoch": 0.25, + "grad_norm": 27.157053885039563, + "learning_rate": 1.9510408699279865e-05, + "loss": 1.6993, + "step": 21126 + }, + { + "epoch": 0.25, + "grad_norm": 7.254746218564496, + "learning_rate": 1.951022813398222e-05, + "loss": 1.3559, + "step": 21129 + }, + { + "epoch": 0.25, + "grad_norm": 18.025171167565443, + "learning_rate": 1.951004753622959e-05, + "loss": 1.2669, + "step": 21132 + }, + { + "epoch": 0.25, + "grad_norm": 25.29241409507247, + "learning_rate": 1.9509866906022596e-05, + "loss": 1.1623, + "step": 21135 + }, + { + "epoch": 0.25, + "grad_norm": 6.718702812874634, + "learning_rate": 1.950968624336185e-05, + "loss": 1.5715, + "step": 21138 + }, + { + "epoch": 0.25, + "grad_norm": 14.020432215064574, + "learning_rate": 1.950950554824797e-05, + "loss": 1.5262, + "step": 21141 + }, + { + "epoch": 0.25, + "grad_norm": 8.110541865806457, + "learning_rate": 1.9509324820681578e-05, + "loss": 1.516, + "step": 21144 + }, + { + "epoch": 0.25, + "grad_norm": 28.51279815255079, + "learning_rate": 1.950914406066328e-05, + "loss": 1.5007, + "step": 21147 + }, + { + "epoch": 0.25, + "grad_norm": 7.150737182332524, + "learning_rate": 1.9508963268193702e-05, + "loss": 1.8556, + "step": 21150 + }, + { + "epoch": 0.25, + "grad_norm": 22.926166300414568, + "learning_rate": 1.9508782443273456e-05, + "loss": 1.3928, + "step": 21153 + }, + { + "epoch": 0.25, + "grad_norm": 33.343328128994095, + "learning_rate": 1.9508601585903157e-05, + "loss": 1.4726, + "step": 21156 + }, + { + "epoch": 0.25, + "grad_norm": 15.513463368635861, + "learning_rate": 1.950842069608343e-05, + "loss": 1.479, + "step": 21159 + }, + { + "epoch": 0.25, + "grad_norm": 13.761119635984011, + "learning_rate": 1.9508239773814888e-05, + "loss": 1.7991, + "step": 21162 + }, + { + "epoch": 0.25, + "grad_norm": 4.278096966216339, + "learning_rate": 1.9508058819098145e-05, + "loss": 1.4394, + "step": 21165 + }, + { + "epoch": 0.25, + "grad_norm": 39.88133888796623, + "learning_rate": 1.9507877831933826e-05, + "loss": 1.4083, + "step": 21168 + }, + { + "epoch": 0.25, + "grad_norm": 11.045881293951103, + "learning_rate": 1.9507696812322545e-05, + "loss": 1.6244, + "step": 21171 + }, + { + "epoch": 0.25, + "grad_norm": 9.219510459013511, + "learning_rate": 1.950751576026492e-05, + "loss": 1.6866, + "step": 21174 + }, + { + "epoch": 0.25, + "grad_norm": 24.180594982095183, + "learning_rate": 1.9507334675761562e-05, + "loss": 1.2848, + "step": 21177 + }, + { + "epoch": 0.25, + "grad_norm": 6.124842957378435, + "learning_rate": 1.95071535588131e-05, + "loss": 1.3259, + "step": 21180 + }, + { + "epoch": 0.25, + "grad_norm": 17.618983437723166, + "learning_rate": 1.950697240942015e-05, + "loss": 1.1851, + "step": 21183 + }, + { + "epoch": 0.25, + "grad_norm": 8.690033545947836, + "learning_rate": 1.9506791227583323e-05, + "loss": 1.5464, + "step": 21186 + }, + { + "epoch": 0.25, + "grad_norm": 17.04261235732003, + "learning_rate": 1.9506610013303243e-05, + "loss": 1.3515, + "step": 21189 + }, + { + "epoch": 0.25, + "grad_norm": 27.643236750338026, + "learning_rate": 1.9506428766580528e-05, + "loss": 1.7767, + "step": 21192 + }, + { + "epoch": 0.25, + "grad_norm": 6.452466075009875, + "learning_rate": 1.9506247487415794e-05, + "loss": 1.5131, + "step": 21195 + }, + { + "epoch": 0.25, + "grad_norm": 24.90022308504558, + "learning_rate": 1.950606617580966e-05, + "loss": 1.2952, + "step": 21198 + }, + { + "epoch": 0.25, + "grad_norm": 11.830168538594789, + "learning_rate": 1.950588483176275e-05, + "loss": 1.6071, + "step": 21201 + }, + { + "epoch": 0.25, + "grad_norm": 14.536752171713596, + "learning_rate": 1.950570345527568e-05, + "loss": 1.3099, + "step": 21204 + }, + { + "epoch": 0.26, + "grad_norm": 7.94246046405842, + "learning_rate": 1.9505522046349065e-05, + "loss": 1.612, + "step": 21207 + }, + { + "epoch": 0.26, + "grad_norm": 7.187236214769691, + "learning_rate": 1.950534060498353e-05, + "loss": 1.4017, + "step": 21210 + }, + { + "epoch": 0.26, + "grad_norm": 17.326838206431038, + "learning_rate": 1.9505159131179686e-05, + "loss": 1.3617, + "step": 21213 + }, + { + "epoch": 0.26, + "grad_norm": 29.613858675460488, + "learning_rate": 1.9504977624938165e-05, + "loss": 1.5746, + "step": 21216 + }, + { + "epoch": 0.26, + "grad_norm": 30.673375141417388, + "learning_rate": 1.9504796086259575e-05, + "loss": 1.6818, + "step": 21219 + }, + { + "epoch": 0.26, + "grad_norm": 17.614495406399428, + "learning_rate": 1.950461451514454e-05, + "loss": 1.597, + "step": 21222 + }, + { + "epoch": 0.26, + "grad_norm": 9.182212064372253, + "learning_rate": 1.9504432911593677e-05, + "loss": 1.6346, + "step": 21225 + }, + { + "epoch": 0.26, + "grad_norm": 9.465887994539125, + "learning_rate": 1.9504251275607612e-05, + "loss": 1.6018, + "step": 21228 + }, + { + "epoch": 0.26, + "grad_norm": 252.6883055487159, + "learning_rate": 1.9504069607186956e-05, + "loss": 1.4596, + "step": 21231 + }, + { + "epoch": 0.26, + "grad_norm": 15.434324964343306, + "learning_rate": 1.950388790633234e-05, + "loss": 1.8885, + "step": 21234 + }, + { + "epoch": 0.26, + "grad_norm": 10.561057654925829, + "learning_rate": 1.9503706173044373e-05, + "loss": 1.4565, + "step": 21237 + }, + { + "epoch": 0.26, + "grad_norm": 61.298997444259804, + "learning_rate": 1.9503524407323683e-05, + "loss": 1.2873, + "step": 21240 + }, + { + "epoch": 0.26, + "grad_norm": 32.919516908788154, + "learning_rate": 1.9503342609170885e-05, + "loss": 1.362, + "step": 21243 + }, + { + "epoch": 0.26, + "grad_norm": 55.58864725357801, + "learning_rate": 1.95031607785866e-05, + "loss": 1.5645, + "step": 21246 + }, + { + "epoch": 0.26, + "grad_norm": 16.15739052454816, + "learning_rate": 1.9502978915571454e-05, + "loss": 1.5868, + "step": 21249 + }, + { + "epoch": 0.26, + "grad_norm": 38.55025582615863, + "learning_rate": 1.9502797020126063e-05, + "loss": 1.7039, + "step": 21252 + }, + { + "epoch": 0.26, + "grad_norm": 5.856170462606712, + "learning_rate": 1.9502615092251046e-05, + "loss": 1.4227, + "step": 21255 + }, + { + "epoch": 0.26, + "grad_norm": 18.036526817127285, + "learning_rate": 1.950243313194703e-05, + "loss": 1.6023, + "step": 21258 + }, + { + "epoch": 0.26, + "grad_norm": 19.858998168475665, + "learning_rate": 1.9502251139214633e-05, + "loss": 1.6522, + "step": 21261 + }, + { + "epoch": 0.26, + "grad_norm": 12.96399904389279, + "learning_rate": 1.9502069114054473e-05, + "loss": 1.6573, + "step": 21264 + }, + { + "epoch": 0.26, + "grad_norm": 33.84893717424466, + "learning_rate": 1.9501887056467175e-05, + "loss": 1.8528, + "step": 21267 + }, + { + "epoch": 0.26, + "grad_norm": 11.8043641255927, + "learning_rate": 1.9501704966453362e-05, + "loss": 1.2059, + "step": 21270 + }, + { + "epoch": 0.26, + "grad_norm": 35.13488524526925, + "learning_rate": 1.9501522844013644e-05, + "loss": 1.496, + "step": 21273 + }, + { + "epoch": 0.26, + "grad_norm": 9.807406067094716, + "learning_rate": 1.9501340689148657e-05, + "loss": 1.5088, + "step": 21276 + }, + { + "epoch": 0.26, + "grad_norm": 17.041154527279723, + "learning_rate": 1.9501158501859016e-05, + "loss": 1.4484, + "step": 21279 + }, + { + "epoch": 0.26, + "grad_norm": 14.209248660480764, + "learning_rate": 1.9500976282145346e-05, + "loss": 1.4611, + "step": 21282 + }, + { + "epoch": 0.26, + "grad_norm": 10.25595838098228, + "learning_rate": 1.9500794030008265e-05, + "loss": 1.7639, + "step": 21285 + }, + { + "epoch": 0.26, + "grad_norm": 16.194531491320177, + "learning_rate": 1.950061174544839e-05, + "loss": 1.4099, + "step": 21288 + }, + { + "epoch": 0.26, + "grad_norm": 11.044375583341871, + "learning_rate": 1.9500429428466354e-05, + "loss": 1.9151, + "step": 21291 + }, + { + "epoch": 0.26, + "grad_norm": 30.67974426658124, + "learning_rate": 1.9500247079062776e-05, + "loss": 1.6733, + "step": 21294 + }, + { + "epoch": 0.26, + "grad_norm": 182.7042703000133, + "learning_rate": 1.9500064697238272e-05, + "loss": 1.557, + "step": 21297 + }, + { + "epoch": 0.26, + "grad_norm": 8.510054868919886, + "learning_rate": 1.9499882282993474e-05, + "loss": 1.4222, + "step": 21300 + }, + { + "epoch": 0.26, + "grad_norm": 45.09370640712488, + "learning_rate": 1.9499699836328995e-05, + "loss": 1.7004, + "step": 21303 + }, + { + "epoch": 0.26, + "grad_norm": 17.69239304399221, + "learning_rate": 1.9499517357245468e-05, + "loss": 1.8254, + "step": 21306 + }, + { + "epoch": 0.26, + "grad_norm": 30.885137241299176, + "learning_rate": 1.9499334845743507e-05, + "loss": 1.7142, + "step": 21309 + }, + { + "epoch": 0.26, + "grad_norm": 14.250927335537678, + "learning_rate": 1.9499152301823735e-05, + "loss": 1.2306, + "step": 21312 + }, + { + "epoch": 0.26, + "grad_norm": 38.26718216330329, + "learning_rate": 1.949896972548678e-05, + "loss": 1.212, + "step": 21315 + }, + { + "epoch": 0.26, + "grad_norm": 8.591583762112252, + "learning_rate": 1.9498787116733264e-05, + "loss": 1.5795, + "step": 21318 + }, + { + "epoch": 0.26, + "grad_norm": 13.620421730419979, + "learning_rate": 1.9498604475563807e-05, + "loss": 1.4629, + "step": 21321 + }, + { + "epoch": 0.26, + "grad_norm": 43.94392101046994, + "learning_rate": 1.9498421801979038e-05, + "loss": 1.9292, + "step": 21324 + }, + { + "epoch": 0.26, + "grad_norm": 21.608913274509867, + "learning_rate": 1.9498239095979576e-05, + "loss": 1.2469, + "step": 21327 + }, + { + "epoch": 0.26, + "grad_norm": 17.12457149042434, + "learning_rate": 1.949805635756604e-05, + "loss": 1.3858, + "step": 21330 + }, + { + "epoch": 0.26, + "grad_norm": 61.28699703087036, + "learning_rate": 1.9497873586739063e-05, + "loss": 1.6096, + "step": 21333 + }, + { + "epoch": 0.26, + "grad_norm": 8.299838702017517, + "learning_rate": 1.9497690783499263e-05, + "loss": 1.8283, + "step": 21336 + }, + { + "epoch": 0.26, + "grad_norm": 63.55104517104952, + "learning_rate": 1.9497507947847266e-05, + "loss": 1.5418, + "step": 21339 + }, + { + "epoch": 0.26, + "grad_norm": 10.819087903402947, + "learning_rate": 1.9497325079783696e-05, + "loss": 1.4227, + "step": 21342 + }, + { + "epoch": 0.26, + "grad_norm": 11.703504217459708, + "learning_rate": 1.9497142179309175e-05, + "loss": 1.2829, + "step": 21345 + }, + { + "epoch": 0.26, + "grad_norm": 6.346765650454051, + "learning_rate": 1.9496959246424332e-05, + "loss": 1.3475, + "step": 21348 + }, + { + "epoch": 0.26, + "grad_norm": 16.430827416541234, + "learning_rate": 1.9496776281129785e-05, + "loss": 1.5793, + "step": 21351 + }, + { + "epoch": 0.26, + "grad_norm": 18.846554554714622, + "learning_rate": 1.949659328342616e-05, + "loss": 1.3899, + "step": 21354 + }, + { + "epoch": 0.26, + "grad_norm": 35.33772246096518, + "learning_rate": 1.9496410253314086e-05, + "loss": 1.6422, + "step": 21357 + }, + { + "epoch": 0.26, + "grad_norm": 31.086000988650085, + "learning_rate": 1.949622719079418e-05, + "loss": 1.6243, + "step": 21360 + }, + { + "epoch": 0.26, + "grad_norm": 13.258721286666006, + "learning_rate": 1.949604409586708e-05, + "loss": 1.5953, + "step": 21363 + }, + { + "epoch": 0.26, + "grad_norm": 26.25971053415635, + "learning_rate": 1.9495860968533394e-05, + "loss": 1.653, + "step": 21366 + }, + { + "epoch": 0.26, + "grad_norm": 12.934476994502456, + "learning_rate": 1.9495677808793757e-05, + "loss": 1.6542, + "step": 21369 + }, + { + "epoch": 0.26, + "grad_norm": 26.779636641542503, + "learning_rate": 1.949549461664879e-05, + "loss": 1.4062, + "step": 21372 + }, + { + "epoch": 0.26, + "grad_norm": 7.373397308853274, + "learning_rate": 1.949531139209912e-05, + "loss": 1.4236, + "step": 21375 + }, + { + "epoch": 0.26, + "grad_norm": 34.51071627568787, + "learning_rate": 1.9495128135145376e-05, + "loss": 1.647, + "step": 21378 + }, + { + "epoch": 0.26, + "grad_norm": 13.042217742575069, + "learning_rate": 1.9494944845788177e-05, + "loss": 1.7291, + "step": 21381 + }, + { + "epoch": 0.26, + "grad_norm": 9.359823894224064, + "learning_rate": 1.9494761524028153e-05, + "loss": 1.3671, + "step": 21384 + }, + { + "epoch": 0.26, + "grad_norm": 19.510473102462818, + "learning_rate": 1.9494578169865926e-05, + "loss": 1.4695, + "step": 21387 + }, + { + "epoch": 0.26, + "grad_norm": 15.183346490071862, + "learning_rate": 1.949439478330213e-05, + "loss": 1.8541, + "step": 21390 + }, + { + "epoch": 0.26, + "grad_norm": 18.585401807848633, + "learning_rate": 1.9494211364337376e-05, + "loss": 1.862, + "step": 21393 + }, + { + "epoch": 0.26, + "grad_norm": 6.429478671023372, + "learning_rate": 1.94940279129723e-05, + "loss": 1.4985, + "step": 21396 + }, + { + "epoch": 0.26, + "grad_norm": 12.184630312303868, + "learning_rate": 1.9493844429207532e-05, + "loss": 1.6754, + "step": 21399 + }, + { + "epoch": 0.26, + "grad_norm": 26.320654332693564, + "learning_rate": 1.949366091304369e-05, + "loss": 1.71, + "step": 21402 + }, + { + "epoch": 0.26, + "grad_norm": 12.332574835464197, + "learning_rate": 1.94934773644814e-05, + "loss": 1.7658, + "step": 21405 + }, + { + "epoch": 0.26, + "grad_norm": 11.805857375608767, + "learning_rate": 1.9493293783521295e-05, + "loss": 1.2911, + "step": 21408 + }, + { + "epoch": 0.26, + "grad_norm": 25.56250720821696, + "learning_rate": 1.9493110170163995e-05, + "loss": 1.7484, + "step": 21411 + }, + { + "epoch": 0.26, + "grad_norm": 136.6066242194788, + "learning_rate": 1.949292652441013e-05, + "loss": 1.4505, + "step": 21414 + }, + { + "epoch": 0.26, + "grad_norm": 17.022137266080538, + "learning_rate": 1.9492742846260327e-05, + "loss": 1.5431, + "step": 21417 + }, + { + "epoch": 0.26, + "grad_norm": 32.28250681523204, + "learning_rate": 1.949255913571521e-05, + "loss": 1.6081, + "step": 21420 + }, + { + "epoch": 0.26, + "grad_norm": 14.885129791429826, + "learning_rate": 1.949237539277541e-05, + "loss": 1.5815, + "step": 21423 + }, + { + "epoch": 0.26, + "grad_norm": 26.206574417745617, + "learning_rate": 1.949219161744155e-05, + "loss": 1.1346, + "step": 21426 + }, + { + "epoch": 0.26, + "grad_norm": 8.938333140019049, + "learning_rate": 1.9492007809714263e-05, + "loss": 1.8807, + "step": 21429 + }, + { + "epoch": 0.26, + "grad_norm": 5.883630033158651, + "learning_rate": 1.9491823969594168e-05, + "loss": 1.3814, + "step": 21432 + }, + { + "epoch": 0.26, + "grad_norm": 9.86548514990155, + "learning_rate": 1.94916400970819e-05, + "loss": 1.5662, + "step": 21435 + }, + { + "epoch": 0.26, + "grad_norm": 3.013997887388268, + "learning_rate": 1.949145619217808e-05, + "loss": 1.4646, + "step": 21438 + }, + { + "epoch": 0.26, + "grad_norm": 4.659422410370607, + "learning_rate": 1.949127225488334e-05, + "loss": 1.5692, + "step": 21441 + }, + { + "epoch": 0.26, + "grad_norm": 43.396670818415004, + "learning_rate": 1.949108828519831e-05, + "loss": 1.7667, + "step": 21444 + }, + { + "epoch": 0.26, + "grad_norm": 23.881305135282823, + "learning_rate": 1.949090428312361e-05, + "loss": 1.4106, + "step": 21447 + }, + { + "epoch": 0.26, + "grad_norm": 14.899783997681244, + "learning_rate": 1.9490720248659872e-05, + "loss": 1.5765, + "step": 21450 + }, + { + "epoch": 0.26, + "grad_norm": 35.86365119761424, + "learning_rate": 1.9490536181807725e-05, + "loss": 1.7212, + "step": 21453 + }, + { + "epoch": 0.26, + "grad_norm": 17.47629229753838, + "learning_rate": 1.9490352082567797e-05, + "loss": 1.5141, + "step": 21456 + }, + { + "epoch": 0.26, + "grad_norm": 11.55421021863847, + "learning_rate": 1.949016795094072e-05, + "loss": 1.5656, + "step": 21459 + }, + { + "epoch": 0.26, + "grad_norm": 6.044013061302873, + "learning_rate": 1.948998378692711e-05, + "loss": 1.7364, + "step": 21462 + }, + { + "epoch": 0.26, + "grad_norm": 12.98182655770529, + "learning_rate": 1.9489799590527608e-05, + "loss": 1.7212, + "step": 21465 + }, + { + "epoch": 0.26, + "grad_norm": 51.8569318510813, + "learning_rate": 1.948961536174284e-05, + "loss": 1.5831, + "step": 21468 + }, + { + "epoch": 0.26, + "grad_norm": 16.66384213088179, + "learning_rate": 1.948943110057343e-05, + "loss": 1.7867, + "step": 21471 + }, + { + "epoch": 0.26, + "grad_norm": 27.607351745829753, + "learning_rate": 1.948924680702001e-05, + "loss": 1.6819, + "step": 21474 + }, + { + "epoch": 0.26, + "grad_norm": 7.945418351379468, + "learning_rate": 1.9489062481083213e-05, + "loss": 1.4071, + "step": 21477 + }, + { + "epoch": 0.26, + "grad_norm": 47.603354316015945, + "learning_rate": 1.948887812276366e-05, + "loss": 1.5937, + "step": 21480 + }, + { + "epoch": 0.26, + "grad_norm": 5.867659315713945, + "learning_rate": 1.9488693732061983e-05, + "loss": 1.7501, + "step": 21483 + }, + { + "epoch": 0.26, + "grad_norm": 79.43032133685482, + "learning_rate": 1.9488509308978814e-05, + "loss": 1.1868, + "step": 21486 + }, + { + "epoch": 0.26, + "grad_norm": 28.437498641342543, + "learning_rate": 1.948832485351478e-05, + "loss": 1.4631, + "step": 21489 + }, + { + "epoch": 0.26, + "grad_norm": 13.257682271314401, + "learning_rate": 1.948814036567051e-05, + "loss": 1.3142, + "step": 21492 + }, + { + "epoch": 0.26, + "grad_norm": 8.14357543687331, + "learning_rate": 1.9487955845446635e-05, + "loss": 1.5911, + "step": 21495 + }, + { + "epoch": 0.26, + "grad_norm": 22.030384623823345, + "learning_rate": 1.9487771292843783e-05, + "loss": 1.2866, + "step": 21498 + }, + { + "epoch": 0.26, + "grad_norm": 31.263262802052843, + "learning_rate": 1.9487586707862586e-05, + "loss": 1.6169, + "step": 21501 + }, + { + "epoch": 0.26, + "grad_norm": 14.165892901597218, + "learning_rate": 1.9487402090503673e-05, + "loss": 1.2426, + "step": 21504 + }, + { + "epoch": 0.26, + "grad_norm": 14.136619289791591, + "learning_rate": 1.9487217440767674e-05, + "loss": 1.5464, + "step": 21507 + }, + { + "epoch": 0.26, + "grad_norm": 35.55380939811617, + "learning_rate": 1.9487032758655223e-05, + "loss": 1.6622, + "step": 21510 + }, + { + "epoch": 0.26, + "grad_norm": 60.80302172812999, + "learning_rate": 1.948684804416694e-05, + "loss": 1.9139, + "step": 21513 + }, + { + "epoch": 0.26, + "grad_norm": 27.40348890702532, + "learning_rate": 1.9486663297303465e-05, + "loss": 1.7887, + "step": 21516 + }, + { + "epoch": 0.26, + "grad_norm": 11.725609544566, + "learning_rate": 1.9486478518065426e-05, + "loss": 1.54, + "step": 21519 + }, + { + "epoch": 0.26, + "grad_norm": 7.6390085782819535, + "learning_rate": 1.948629370645345e-05, + "loss": 1.6798, + "step": 21522 + }, + { + "epoch": 0.26, + "grad_norm": 4.172197164148252, + "learning_rate": 1.9486108862468173e-05, + "loss": 1.7687, + "step": 21525 + }, + { + "epoch": 0.26, + "grad_norm": 10.60330677049447, + "learning_rate": 1.9485923986110223e-05, + "loss": 1.5633, + "step": 21528 + }, + { + "epoch": 0.26, + "grad_norm": 7.243840811406924, + "learning_rate": 1.9485739077380233e-05, + "loss": 1.6435, + "step": 21531 + }, + { + "epoch": 0.26, + "grad_norm": 46.3748468704449, + "learning_rate": 1.948555413627883e-05, + "loss": 1.7861, + "step": 21534 + }, + { + "epoch": 0.26, + "grad_norm": 17.823988051275578, + "learning_rate": 1.9485369162806648e-05, + "loss": 1.3956, + "step": 21537 + }, + { + "epoch": 0.26, + "grad_norm": 11.743272883348457, + "learning_rate": 1.9485184156964316e-05, + "loss": 1.4456, + "step": 21540 + }, + { + "epoch": 0.26, + "grad_norm": 39.239173232878464, + "learning_rate": 1.9484999118752465e-05, + "loss": 1.2719, + "step": 21543 + }, + { + "epoch": 0.26, + "grad_norm": 16.827759159563527, + "learning_rate": 1.9484814048171735e-05, + "loss": 1.506, + "step": 21546 + }, + { + "epoch": 0.26, + "grad_norm": 5.532022759131657, + "learning_rate": 1.9484628945222748e-05, + "loss": 1.3729, + "step": 21549 + }, + { + "epoch": 0.26, + "grad_norm": 20.053463095880616, + "learning_rate": 1.9484443809906138e-05, + "loss": 1.7244, + "step": 21552 + }, + { + "epoch": 0.26, + "grad_norm": 36.96180002115699, + "learning_rate": 1.9484258642222536e-05, + "loss": 1.7405, + "step": 21555 + }, + { + "epoch": 0.26, + "grad_norm": 20.28420657130761, + "learning_rate": 1.948407344217258e-05, + "loss": 1.6008, + "step": 21558 + }, + { + "epoch": 0.26, + "grad_norm": 60.13716040605346, + "learning_rate": 1.9483888209756893e-05, + "loss": 1.8654, + "step": 21561 + }, + { + "epoch": 0.26, + "grad_norm": 6.922489872623816, + "learning_rate": 1.9483702944976116e-05, + "loss": 1.4124, + "step": 21564 + }, + { + "epoch": 0.26, + "grad_norm": 19.910392559169406, + "learning_rate": 1.9483517647830873e-05, + "loss": 1.8406, + "step": 21567 + }, + { + "epoch": 0.26, + "grad_norm": 24.348901572186797, + "learning_rate": 1.9483332318321803e-05, + "loss": 1.3733, + "step": 21570 + }, + { + "epoch": 0.26, + "grad_norm": 17.170339092547685, + "learning_rate": 1.9483146956449532e-05, + "loss": 1.6381, + "step": 21573 + }, + { + "epoch": 0.26, + "grad_norm": 16.481507426184365, + "learning_rate": 1.9482961562214703e-05, + "loss": 1.789, + "step": 21576 + }, + { + "epoch": 0.26, + "grad_norm": 12.080031501552044, + "learning_rate": 1.9482776135617937e-05, + "loss": 1.1536, + "step": 21579 + }, + { + "epoch": 0.26, + "grad_norm": 41.01673061200336, + "learning_rate": 1.948259067665987e-05, + "loss": 1.2703, + "step": 21582 + }, + { + "epoch": 0.26, + "grad_norm": 3.893477123968039, + "learning_rate": 1.9482405185341138e-05, + "loss": 1.6437, + "step": 21585 + }, + { + "epoch": 0.26, + "grad_norm": 50.428864581930284, + "learning_rate": 1.9482219661662375e-05, + "loss": 1.4755, + "step": 21588 + }, + { + "epoch": 0.26, + "grad_norm": 11.57656465488225, + "learning_rate": 1.948203410562421e-05, + "loss": 1.3315, + "step": 21591 + }, + { + "epoch": 0.26, + "grad_norm": 20.917134662780793, + "learning_rate": 1.948184851722728e-05, + "loss": 1.7803, + "step": 21594 + }, + { + "epoch": 0.26, + "grad_norm": 25.19980705412201, + "learning_rate": 1.9481662896472214e-05, + "loss": 1.7984, + "step": 21597 + }, + { + "epoch": 0.26, + "grad_norm": 10.04202595467343, + "learning_rate": 1.9481477243359647e-05, + "loss": 1.4173, + "step": 21600 + }, + { + "epoch": 0.26, + "grad_norm": 26.92475054792714, + "learning_rate": 1.9481291557890215e-05, + "loss": 1.8547, + "step": 21603 + }, + { + "epoch": 0.26, + "grad_norm": 29.620507981167137, + "learning_rate": 1.9481105840064552e-05, + "loss": 1.2758, + "step": 21606 + }, + { + "epoch": 0.26, + "grad_norm": 34.44403618302203, + "learning_rate": 1.9480920089883287e-05, + "loss": 1.8018, + "step": 21609 + }, + { + "epoch": 0.26, + "grad_norm": 11.536966482741692, + "learning_rate": 1.9480734307347058e-05, + "loss": 1.472, + "step": 21612 + }, + { + "epoch": 0.26, + "grad_norm": 9.45175350371593, + "learning_rate": 1.9480548492456498e-05, + "loss": 1.8588, + "step": 21615 + }, + { + "epoch": 0.26, + "grad_norm": 19.094873017464575, + "learning_rate": 1.948036264521224e-05, + "loss": 1.5134, + "step": 21618 + }, + { + "epoch": 0.26, + "grad_norm": 17.261072906821997, + "learning_rate": 1.948017676561492e-05, + "loss": 1.5006, + "step": 21621 + }, + { + "epoch": 0.26, + "grad_norm": 6.559959945326962, + "learning_rate": 1.947999085366517e-05, + "loss": 1.6899, + "step": 21624 + }, + { + "epoch": 0.26, + "grad_norm": 46.64646409927708, + "learning_rate": 1.9479804909363625e-05, + "loss": 1.8129, + "step": 21627 + }, + { + "epoch": 0.26, + "grad_norm": 8.781229353199496, + "learning_rate": 1.947961893271092e-05, + "loss": 1.5343, + "step": 21630 + }, + { + "epoch": 0.26, + "grad_norm": 17.80705272739408, + "learning_rate": 1.9479432923707694e-05, + "loss": 1.5566, + "step": 21633 + }, + { + "epoch": 0.26, + "grad_norm": 43.05522780536004, + "learning_rate": 1.9479246882354578e-05, + "loss": 1.9037, + "step": 21636 + }, + { + "epoch": 0.26, + "grad_norm": 6.948658730906024, + "learning_rate": 1.9479060808652206e-05, + "loss": 1.6432, + "step": 21639 + }, + { + "epoch": 0.26, + "grad_norm": 35.40922319846962, + "learning_rate": 1.9478874702601212e-05, + "loss": 1.5168, + "step": 21642 + }, + { + "epoch": 0.26, + "grad_norm": 12.63584110203488, + "learning_rate": 1.9478688564202233e-05, + "loss": 1.3489, + "step": 21645 + }, + { + "epoch": 0.26, + "grad_norm": 14.797298115490946, + "learning_rate": 1.9478502393455903e-05, + "loss": 1.6142, + "step": 21648 + }, + { + "epoch": 0.26, + "grad_norm": 6.311796138726497, + "learning_rate": 1.9478316190362862e-05, + "loss": 1.6182, + "step": 21651 + }, + { + "epoch": 0.26, + "grad_norm": 13.387412685091148, + "learning_rate": 1.947812995492374e-05, + "loss": 1.5204, + "step": 21654 + }, + { + "epoch": 0.26, + "grad_norm": 15.500886853721973, + "learning_rate": 1.9477943687139174e-05, + "loss": 1.6971, + "step": 21657 + }, + { + "epoch": 0.26, + "grad_norm": 9.028698158647497, + "learning_rate": 1.94777573870098e-05, + "loss": 1.5258, + "step": 21660 + }, + { + "epoch": 0.26, + "grad_norm": 9.558138089164538, + "learning_rate": 1.9477571054536256e-05, + "loss": 1.6519, + "step": 21663 + }, + { + "epoch": 0.26, + "grad_norm": 11.516293911111271, + "learning_rate": 1.9477384689719177e-05, + "loss": 1.3919, + "step": 21666 + }, + { + "epoch": 0.26, + "grad_norm": 20.239873175012658, + "learning_rate": 1.9477198292559195e-05, + "loss": 1.3226, + "step": 21669 + }, + { + "epoch": 0.26, + "grad_norm": 6.463104399614475, + "learning_rate": 1.947701186305695e-05, + "loss": 1.5336, + "step": 21672 + }, + { + "epoch": 0.26, + "grad_norm": 22.16826449767166, + "learning_rate": 1.9476825401213074e-05, + "loss": 1.8067, + "step": 21675 + }, + { + "epoch": 0.26, + "grad_norm": 20.30356961155294, + "learning_rate": 1.947663890702821e-05, + "loss": 1.5596, + "step": 21678 + }, + { + "epoch": 0.26, + "grad_norm": 11.174708420428919, + "learning_rate": 1.947645238050299e-05, + "loss": 1.5356, + "step": 21681 + }, + { + "epoch": 0.26, + "grad_norm": 94.24205195995165, + "learning_rate": 1.947626582163805e-05, + "loss": 1.4676, + "step": 21684 + }, + { + "epoch": 0.26, + "grad_norm": 7.4131157378332695, + "learning_rate": 1.947607923043403e-05, + "loss": 1.4098, + "step": 21687 + }, + { + "epoch": 0.26, + "grad_norm": 9.110497366322505, + "learning_rate": 1.9475892606891564e-05, + "loss": 1.7188, + "step": 21690 + }, + { + "epoch": 0.26, + "grad_norm": 15.18113644193352, + "learning_rate": 1.9475705951011287e-05, + "loss": 1.6781, + "step": 21693 + }, + { + "epoch": 0.26, + "grad_norm": 27.75243907075036, + "learning_rate": 1.9475519262793844e-05, + "loss": 1.6567, + "step": 21696 + }, + { + "epoch": 0.26, + "grad_norm": 10.832968664649735, + "learning_rate": 1.9475332542239865e-05, + "loss": 1.4561, + "step": 21699 + }, + { + "epoch": 0.26, + "grad_norm": 3.076238984300027, + "learning_rate": 1.947514578934999e-05, + "loss": 1.6265, + "step": 21702 + }, + { + "epoch": 0.26, + "grad_norm": 11.747557872131999, + "learning_rate": 1.947495900412485e-05, + "loss": 1.7532, + "step": 21705 + }, + { + "epoch": 0.26, + "grad_norm": 3.271861788740632, + "learning_rate": 1.947477218656509e-05, + "loss": 1.7747, + "step": 21708 + }, + { + "epoch": 0.26, + "grad_norm": 18.362210979830557, + "learning_rate": 1.947458533667135e-05, + "loss": 1.9631, + "step": 21711 + }, + { + "epoch": 0.26, + "grad_norm": 8.816176179012997, + "learning_rate": 1.9474398454444258e-05, + "loss": 1.5915, + "step": 21714 + }, + { + "epoch": 0.26, + "grad_norm": 19.97048842662447, + "learning_rate": 1.947421153988446e-05, + "loss": 1.7171, + "step": 21717 + }, + { + "epoch": 0.26, + "grad_norm": 15.486882366632102, + "learning_rate": 1.947402459299259e-05, + "loss": 1.5213, + "step": 21720 + }, + { + "epoch": 0.26, + "grad_norm": 27.617707730477726, + "learning_rate": 1.9473837613769287e-05, + "loss": 1.4661, + "step": 21723 + }, + { + "epoch": 0.26, + "grad_norm": 15.217596532318838, + "learning_rate": 1.9473650602215187e-05, + "loss": 1.379, + "step": 21726 + }, + { + "epoch": 0.26, + "grad_norm": 7.95719431920659, + "learning_rate": 1.947346355833093e-05, + "loss": 1.4236, + "step": 21729 + }, + { + "epoch": 0.26, + "grad_norm": 16.164172353166467, + "learning_rate": 1.947327648211715e-05, + "loss": 1.5729, + "step": 21732 + }, + { + "epoch": 0.26, + "grad_norm": 15.502447563695288, + "learning_rate": 1.9473089373574498e-05, + "loss": 1.4367, + "step": 21735 + }, + { + "epoch": 0.26, + "grad_norm": 13.632427447103826, + "learning_rate": 1.94729022327036e-05, + "loss": 1.5323, + "step": 21738 + }, + { + "epoch": 0.26, + "grad_norm": 7.870375405717667, + "learning_rate": 1.94727150595051e-05, + "loss": 1.2425, + "step": 21741 + }, + { + "epoch": 0.26, + "grad_norm": 5.311288733340642, + "learning_rate": 1.9472527853979635e-05, + "loss": 1.5789, + "step": 21744 + }, + { + "epoch": 0.26, + "grad_norm": 15.010644660950115, + "learning_rate": 1.947234061612784e-05, + "loss": 1.2685, + "step": 21747 + }, + { + "epoch": 0.26, + "grad_norm": 14.875129085898756, + "learning_rate": 1.9472153345950368e-05, + "loss": 1.7055, + "step": 21750 + }, + { + "epoch": 0.26, + "grad_norm": 9.78357608370515, + "learning_rate": 1.947196604344784e-05, + "loss": 1.4792, + "step": 21753 + }, + { + "epoch": 0.26, + "grad_norm": 6.949945845345206, + "learning_rate": 1.9471778708620905e-05, + "loss": 1.7655, + "step": 21756 + }, + { + "epoch": 0.26, + "grad_norm": 30.481306051752192, + "learning_rate": 1.9471591341470203e-05, + "loss": 1.6169, + "step": 21759 + }, + { + "epoch": 0.26, + "grad_norm": 19.468877703261022, + "learning_rate": 1.9471403941996372e-05, + "loss": 1.7858, + "step": 21762 + }, + { + "epoch": 0.26, + "grad_norm": 27.820045934669828, + "learning_rate": 1.947121651020005e-05, + "loss": 1.6947, + "step": 21765 + }, + { + "epoch": 0.26, + "grad_norm": 6.9501945973187675, + "learning_rate": 1.947102904608188e-05, + "loss": 1.3068, + "step": 21768 + }, + { + "epoch": 0.26, + "grad_norm": 23.440191593421318, + "learning_rate": 1.9470841549642494e-05, + "loss": 1.3908, + "step": 21771 + }, + { + "epoch": 0.26, + "grad_norm": 53.589211252992726, + "learning_rate": 1.947065402088254e-05, + "loss": 1.5775, + "step": 21774 + }, + { + "epoch": 0.26, + "grad_norm": 16.848834941441577, + "learning_rate": 1.9470466459802657e-05, + "loss": 1.5335, + "step": 21777 + }, + { + "epoch": 0.26, + "grad_norm": 23.408687341364914, + "learning_rate": 1.947027886640348e-05, + "loss": 1.6982, + "step": 21780 + }, + { + "epoch": 0.26, + "grad_norm": 16.655358001716273, + "learning_rate": 1.9470091240685653e-05, + "loss": 1.8962, + "step": 21783 + }, + { + "epoch": 0.26, + "grad_norm": 16.716617372215193, + "learning_rate": 1.9469903582649817e-05, + "loss": 1.4635, + "step": 21786 + }, + { + "epoch": 0.26, + "grad_norm": 11.81944379846145, + "learning_rate": 1.946971589229661e-05, + "loss": 1.4643, + "step": 21789 + }, + { + "epoch": 0.26, + "grad_norm": 7.1968895057379845, + "learning_rate": 1.9469528169626676e-05, + "loss": 1.4628, + "step": 21792 + }, + { + "epoch": 0.26, + "grad_norm": 14.596709163376799, + "learning_rate": 1.946934041464065e-05, + "loss": 1.5251, + "step": 21795 + }, + { + "epoch": 0.26, + "grad_norm": 20.16506896106307, + "learning_rate": 1.946915262733918e-05, + "loss": 1.5281, + "step": 21798 + }, + { + "epoch": 0.26, + "grad_norm": 12.816023407631006, + "learning_rate": 1.9468964807722898e-05, + "loss": 1.4461, + "step": 21801 + }, + { + "epoch": 0.26, + "grad_norm": 7.025395539772238, + "learning_rate": 1.9468776955792452e-05, + "loss": 1.3831, + "step": 21804 + }, + { + "epoch": 0.26, + "grad_norm": 8.821669820709243, + "learning_rate": 1.9468589071548482e-05, + "loss": 1.4783, + "step": 21807 + }, + { + "epoch": 0.26, + "grad_norm": 12.81100512024481, + "learning_rate": 1.9468401154991628e-05, + "loss": 1.3628, + "step": 21810 + }, + { + "epoch": 0.26, + "grad_norm": 50.87660801520976, + "learning_rate": 1.9468213206122527e-05, + "loss": 1.3844, + "step": 21813 + }, + { + "epoch": 0.26, + "grad_norm": 43.34595353336432, + "learning_rate": 1.9468025224941833e-05, + "loss": 1.3692, + "step": 21816 + }, + { + "epoch": 0.26, + "grad_norm": 7.791736559908985, + "learning_rate": 1.9467837211450172e-05, + "loss": 1.8475, + "step": 21819 + }, + { + "epoch": 0.26, + "grad_norm": 32.10024229440641, + "learning_rate": 1.9467649165648196e-05, + "loss": 1.5136, + "step": 21822 + }, + { + "epoch": 0.26, + "grad_norm": 12.244037348802085, + "learning_rate": 1.9467461087536545e-05, + "loss": 1.6412, + "step": 21825 + }, + { + "epoch": 0.26, + "grad_norm": 15.669045297116512, + "learning_rate": 1.9467272977115855e-05, + "loss": 1.3333, + "step": 21828 + }, + { + "epoch": 0.26, + "grad_norm": 3.5270947948961977, + "learning_rate": 1.9467084834386777e-05, + "loss": 1.4405, + "step": 21831 + }, + { + "epoch": 0.26, + "grad_norm": 46.13851197483545, + "learning_rate": 1.9466896659349945e-05, + "loss": 1.2806, + "step": 21834 + }, + { + "epoch": 0.26, + "grad_norm": 11.557296236190417, + "learning_rate": 1.9466708452006003e-05, + "loss": 1.4294, + "step": 21837 + }, + { + "epoch": 0.26, + "grad_norm": 17.575976427352686, + "learning_rate": 1.94665202123556e-05, + "loss": 1.7308, + "step": 21840 + }, + { + "epoch": 0.26, + "grad_norm": 4.15177236653941, + "learning_rate": 1.946633194039937e-05, + "loss": 1.6577, + "step": 21843 + }, + { + "epoch": 0.26, + "grad_norm": 45.69578480838351, + "learning_rate": 1.9466143636137963e-05, + "loss": 1.5275, + "step": 21846 + }, + { + "epoch": 0.26, + "grad_norm": 23.37467824907941, + "learning_rate": 1.9465955299572016e-05, + "loss": 1.5092, + "step": 21849 + }, + { + "epoch": 0.26, + "grad_norm": 7.446143455956203, + "learning_rate": 1.946576693070217e-05, + "loss": 1.82, + "step": 21852 + }, + { + "epoch": 0.26, + "grad_norm": 36.74305508739479, + "learning_rate": 1.9465578529529073e-05, + "loss": 1.5479, + "step": 21855 + }, + { + "epoch": 0.26, + "grad_norm": 21.176694173874225, + "learning_rate": 1.9465390096053368e-05, + "loss": 1.7581, + "step": 21858 + }, + { + "epoch": 0.26, + "grad_norm": 15.283816231521932, + "learning_rate": 1.9465201630275692e-05, + "loss": 1.6247, + "step": 21861 + }, + { + "epoch": 0.26, + "grad_norm": 36.36494042464552, + "learning_rate": 1.9465013132196696e-05, + "loss": 1.7725, + "step": 21864 + }, + { + "epoch": 0.26, + "grad_norm": 20.04390727326438, + "learning_rate": 1.9464824601817013e-05, + "loss": 1.5586, + "step": 21867 + }, + { + "epoch": 0.26, + "grad_norm": 21.72289472218815, + "learning_rate": 1.9464636039137298e-05, + "loss": 1.7319, + "step": 21870 + }, + { + "epoch": 0.26, + "grad_norm": 13.158497008493098, + "learning_rate": 1.946444744415819e-05, + "loss": 1.8674, + "step": 21873 + }, + { + "epoch": 0.26, + "grad_norm": 3.4241236581861716, + "learning_rate": 1.946425881688033e-05, + "loss": 1.2564, + "step": 21876 + }, + { + "epoch": 0.26, + "grad_norm": 47.61809051038092, + "learning_rate": 1.9464070157304366e-05, + "loss": 1.5066, + "step": 21879 + }, + { + "epoch": 0.26, + "grad_norm": 11.993258883336347, + "learning_rate": 1.9463881465430937e-05, + "loss": 1.2474, + "step": 21882 + }, + { + "epoch": 0.26, + "grad_norm": 13.433008290069422, + "learning_rate": 1.946369274126069e-05, + "loss": 1.707, + "step": 21885 + }, + { + "epoch": 0.26, + "grad_norm": 9.737458986173818, + "learning_rate": 1.946350398479427e-05, + "loss": 1.5232, + "step": 21888 + }, + { + "epoch": 0.26, + "grad_norm": 9.996971778956194, + "learning_rate": 1.9463315196032316e-05, + "loss": 1.3434, + "step": 21891 + }, + { + "epoch": 0.26, + "grad_norm": 75.49049420094876, + "learning_rate": 1.946312637497548e-05, + "loss": 1.4176, + "step": 21894 + }, + { + "epoch": 0.26, + "grad_norm": 9.13353431845874, + "learning_rate": 1.94629375216244e-05, + "loss": 1.3003, + "step": 21897 + }, + { + "epoch": 0.26, + "grad_norm": 117.94804829342698, + "learning_rate": 1.9462748635979724e-05, + "loss": 1.3869, + "step": 21900 + }, + { + "epoch": 0.26, + "grad_norm": 31.8165345329171, + "learning_rate": 1.9462559718042095e-05, + "loss": 1.7424, + "step": 21903 + }, + { + "epoch": 0.26, + "grad_norm": 7.499499963519859, + "learning_rate": 1.9462370767812158e-05, + "loss": 1.2771, + "step": 21906 + }, + { + "epoch": 0.26, + "grad_norm": 56.6762429857868, + "learning_rate": 1.9462181785290557e-05, + "loss": 1.5125, + "step": 21909 + }, + { + "epoch": 0.26, + "grad_norm": 97.17218185140945, + "learning_rate": 1.946199277047794e-05, + "loss": 2.0003, + "step": 21912 + }, + { + "epoch": 0.26, + "grad_norm": 16.837385716133138, + "learning_rate": 1.9461803723374948e-05, + "loss": 1.1643, + "step": 21915 + }, + { + "epoch": 0.26, + "grad_norm": 16.07691561829583, + "learning_rate": 1.9461614643982228e-05, + "loss": 1.4857, + "step": 21918 + }, + { + "epoch": 0.26, + "grad_norm": 23.31036882383061, + "learning_rate": 1.9461425532300425e-05, + "loss": 1.2666, + "step": 21921 + }, + { + "epoch": 0.26, + "grad_norm": 18.035581942159446, + "learning_rate": 1.9461236388330187e-05, + "loss": 1.8525, + "step": 21924 + }, + { + "epoch": 0.26, + "grad_norm": 18.664041342053125, + "learning_rate": 1.9461047212072155e-05, + "loss": 1.4795, + "step": 21927 + }, + { + "epoch": 0.26, + "grad_norm": 10.162426463653777, + "learning_rate": 1.9460858003526978e-05, + "loss": 1.4534, + "step": 21930 + }, + { + "epoch": 0.26, + "grad_norm": 26.542648343575028, + "learning_rate": 1.9460668762695297e-05, + "loss": 1.2393, + "step": 21933 + }, + { + "epoch": 0.26, + "grad_norm": 8.550278416193207, + "learning_rate": 1.9460479489577766e-05, + "loss": 1.4736, + "step": 21936 + }, + { + "epoch": 0.26, + "grad_norm": 15.021635297389642, + "learning_rate": 1.9460290184175022e-05, + "loss": 1.4689, + "step": 21939 + }, + { + "epoch": 0.26, + "grad_norm": 13.973957835096328, + "learning_rate": 1.946010084648772e-05, + "loss": 1.5754, + "step": 21942 + }, + { + "epoch": 0.26, + "grad_norm": 8.922452293156747, + "learning_rate": 1.9459911476516494e-05, + "loss": 1.3567, + "step": 21945 + }, + { + "epoch": 0.26, + "grad_norm": 8.036911659630949, + "learning_rate": 1.9459722074262004e-05, + "loss": 1.6653, + "step": 21948 + }, + { + "epoch": 0.26, + "grad_norm": 10.169082799683096, + "learning_rate": 1.9459532639724886e-05, + "loss": 1.2717, + "step": 21951 + }, + { + "epoch": 0.26, + "grad_norm": 20.84579089184188, + "learning_rate": 1.9459343172905797e-05, + "loss": 1.7434, + "step": 21954 + }, + { + "epoch": 0.26, + "grad_norm": 9.834204795128398, + "learning_rate": 1.945915367380537e-05, + "loss": 1.8053, + "step": 21957 + }, + { + "epoch": 0.26, + "grad_norm": 12.244774439164035, + "learning_rate": 1.9458964142424263e-05, + "loss": 1.8433, + "step": 21960 + }, + { + "epoch": 0.26, + "grad_norm": 18.737089569921217, + "learning_rate": 1.9458774578763115e-05, + "loss": 1.4676, + "step": 21963 + }, + { + "epoch": 0.26, + "grad_norm": 16.045192360598932, + "learning_rate": 1.9458584982822578e-05, + "loss": 1.5952, + "step": 21966 + }, + { + "epoch": 0.26, + "grad_norm": 10.019284265490642, + "learning_rate": 1.94583953546033e-05, + "loss": 1.8093, + "step": 21969 + }, + { + "epoch": 0.26, + "grad_norm": 14.750888224813478, + "learning_rate": 1.945820569410592e-05, + "loss": 1.4046, + "step": 21972 + }, + { + "epoch": 0.26, + "grad_norm": 24.72507533765271, + "learning_rate": 1.9458016001331095e-05, + "loss": 1.5228, + "step": 21975 + }, + { + "epoch": 0.26, + "grad_norm": 11.658903619395172, + "learning_rate": 1.9457826276279467e-05, + "loss": 1.6241, + "step": 21978 + }, + { + "epoch": 0.26, + "grad_norm": 8.281070757533707, + "learning_rate": 1.9457636518951683e-05, + "loss": 1.365, + "step": 21981 + }, + { + "epoch": 0.26, + "grad_norm": 22.699021344234968, + "learning_rate": 1.9457446729348395e-05, + "loss": 1.5531, + "step": 21984 + }, + { + "epoch": 0.26, + "grad_norm": 21.907344096360564, + "learning_rate": 1.9457256907470246e-05, + "loss": 1.3906, + "step": 21987 + }, + { + "epoch": 0.26, + "grad_norm": 17.404584940925545, + "learning_rate": 1.9457067053317885e-05, + "loss": 1.6327, + "step": 21990 + }, + { + "epoch": 0.26, + "grad_norm": 8.844211895822198, + "learning_rate": 1.9456877166891963e-05, + "loss": 1.7733, + "step": 21993 + }, + { + "epoch": 0.26, + "grad_norm": 6.397525639496165, + "learning_rate": 1.9456687248193126e-05, + "loss": 1.5141, + "step": 21996 + }, + { + "epoch": 0.26, + "grad_norm": 33.728598607488486, + "learning_rate": 1.945649729722202e-05, + "loss": 1.3213, + "step": 21999 + }, + { + "epoch": 0.26, + "grad_norm": 20.758290151511556, + "learning_rate": 1.9456307313979296e-05, + "loss": 1.4766, + "step": 22002 + }, + { + "epoch": 0.26, + "grad_norm": 11.393024229268459, + "learning_rate": 1.9456117298465596e-05, + "loss": 1.2828, + "step": 22005 + }, + { + "epoch": 0.26, + "grad_norm": 3.910071292634566, + "learning_rate": 1.945592725068158e-05, + "loss": 1.6116, + "step": 22008 + }, + { + "epoch": 0.26, + "grad_norm": 10.280385786425676, + "learning_rate": 1.9455737170627888e-05, + "loss": 1.6141, + "step": 22011 + }, + { + "epoch": 0.26, + "grad_norm": 23.17095520271198, + "learning_rate": 1.9455547058305173e-05, + "loss": 1.1267, + "step": 22014 + }, + { + "epoch": 0.26, + "grad_norm": 5.6896813313159615, + "learning_rate": 1.9455356913714078e-05, + "loss": 1.4731, + "step": 22017 + }, + { + "epoch": 0.26, + "grad_norm": 16.12120151720719, + "learning_rate": 1.945516673685526e-05, + "loss": 1.4191, + "step": 22020 + }, + { + "epoch": 0.26, + "grad_norm": 16.40692255300918, + "learning_rate": 1.945497652772936e-05, + "loss": 1.3968, + "step": 22023 + }, + { + "epoch": 0.26, + "grad_norm": 34.85220655342995, + "learning_rate": 1.9454786286337032e-05, + "loss": 1.5752, + "step": 22026 + }, + { + "epoch": 0.26, + "grad_norm": 7.632095706462302, + "learning_rate": 1.9454596012678925e-05, + "loss": 1.2932, + "step": 22029 + }, + { + "epoch": 0.26, + "grad_norm": 43.68409358282281, + "learning_rate": 1.9454405706755687e-05, + "loss": 1.9561, + "step": 22032 + }, + { + "epoch": 0.26, + "grad_norm": 33.414343138658474, + "learning_rate": 1.9454215368567964e-05, + "loss": 1.4356, + "step": 22035 + }, + { + "epoch": 0.27, + "grad_norm": 24.734552858363283, + "learning_rate": 1.9454024998116418e-05, + "loss": 1.5224, + "step": 22038 + }, + { + "epoch": 0.27, + "grad_norm": 19.909591491995336, + "learning_rate": 1.945383459540168e-05, + "loss": 1.5932, + "step": 22041 + }, + { + "epoch": 0.27, + "grad_norm": 28.055862213672384, + "learning_rate": 1.9453644160424415e-05, + "loss": 1.7769, + "step": 22044 + }, + { + "epoch": 0.27, + "grad_norm": 14.20759228148299, + "learning_rate": 1.9453453693185266e-05, + "loss": 1.2311, + "step": 22047 + }, + { + "epoch": 0.27, + "grad_norm": 4.6766248291375545, + "learning_rate": 1.945326319368489e-05, + "loss": 1.4698, + "step": 22050 + }, + { + "epoch": 0.27, + "grad_norm": 8.618643924358507, + "learning_rate": 1.9453072661923924e-05, + "loss": 1.6266, + "step": 22053 + }, + { + "epoch": 0.27, + "grad_norm": 16.17599501142044, + "learning_rate": 1.945288209790303e-05, + "loss": 1.73, + "step": 22056 + }, + { + "epoch": 0.27, + "grad_norm": 10.931704872904874, + "learning_rate": 1.9452691501622855e-05, + "loss": 1.5561, + "step": 22059 + }, + { + "epoch": 0.27, + "grad_norm": 26.775433155440872, + "learning_rate": 1.945250087308405e-05, + "loss": 1.4373, + "step": 22062 + }, + { + "epoch": 0.27, + "grad_norm": 7.497941311000961, + "learning_rate": 1.9452310212287257e-05, + "loss": 1.5162, + "step": 22065 + }, + { + "epoch": 0.27, + "grad_norm": 45.949405524438816, + "learning_rate": 1.945211951923314e-05, + "loss": 1.5194, + "step": 22068 + }, + { + "epoch": 0.27, + "grad_norm": 25.94699025232265, + "learning_rate": 1.9451928793922344e-05, + "loss": 1.7056, + "step": 22071 + }, + { + "epoch": 0.27, + "grad_norm": 11.055373766348204, + "learning_rate": 1.9451738036355517e-05, + "loss": 1.066, + "step": 22074 + }, + { + "epoch": 0.27, + "grad_norm": 74.40123126211782, + "learning_rate": 1.9451547246533314e-05, + "loss": 1.206, + "step": 22077 + }, + { + "epoch": 0.27, + "grad_norm": 11.14133638480707, + "learning_rate": 1.9451356424456383e-05, + "loss": 1.8111, + "step": 22080 + }, + { + "epoch": 0.27, + "grad_norm": 5.004518208715826, + "learning_rate": 1.945116557012538e-05, + "loss": 1.5046, + "step": 22083 + }, + { + "epoch": 0.27, + "grad_norm": 4.818805794027554, + "learning_rate": 1.9450974683540955e-05, + "loss": 1.828, + "step": 22086 + }, + { + "epoch": 0.27, + "grad_norm": 24.632348629434066, + "learning_rate": 1.945078376470375e-05, + "loss": 1.2354, + "step": 22089 + }, + { + "epoch": 0.27, + "grad_norm": 41.457798973945735, + "learning_rate": 1.945059281361443e-05, + "loss": 1.5514, + "step": 22092 + }, + { + "epoch": 0.27, + "grad_norm": 10.646506637580643, + "learning_rate": 1.9450401830273637e-05, + "loss": 1.7, + "step": 22095 + }, + { + "epoch": 0.27, + "grad_norm": 6.51007039426069, + "learning_rate": 1.945021081468203e-05, + "loss": 1.4124, + "step": 22098 + }, + { + "epoch": 0.27, + "grad_norm": 23.842729346337208, + "learning_rate": 1.9450019766840256e-05, + "loss": 1.6221, + "step": 22101 + }, + { + "epoch": 0.27, + "grad_norm": 14.380335020396172, + "learning_rate": 1.944982868674897e-05, + "loss": 1.4152, + "step": 22104 + }, + { + "epoch": 0.27, + "grad_norm": 16.15189473921906, + "learning_rate": 1.944963757440882e-05, + "loss": 1.2141, + "step": 22107 + }, + { + "epoch": 0.27, + "grad_norm": 34.00269042715647, + "learning_rate": 1.944944642982046e-05, + "loss": 1.8155, + "step": 22110 + }, + { + "epoch": 0.27, + "grad_norm": 9.813422941963326, + "learning_rate": 1.9449255252984547e-05, + "loss": 1.8724, + "step": 22113 + }, + { + "epoch": 0.27, + "grad_norm": 10.48743233410087, + "learning_rate": 1.9449064043901726e-05, + "loss": 2.0915, + "step": 22116 + }, + { + "epoch": 0.27, + "grad_norm": 4.851134343526063, + "learning_rate": 1.9448872802572655e-05, + "loss": 1.6122, + "step": 22119 + }, + { + "epoch": 0.27, + "grad_norm": 13.690452639328504, + "learning_rate": 1.9448681528997985e-05, + "loss": 1.5824, + "step": 22122 + }, + { + "epoch": 0.27, + "grad_norm": 19.460627830767788, + "learning_rate": 1.9448490223178367e-05, + "loss": 1.4708, + "step": 22125 + }, + { + "epoch": 0.27, + "grad_norm": 15.384643666567777, + "learning_rate": 1.9448298885114454e-05, + "loss": 1.2957, + "step": 22128 + }, + { + "epoch": 0.27, + "grad_norm": 19.77279978635534, + "learning_rate": 1.9448107514806903e-05, + "loss": 1.1313, + "step": 22131 + }, + { + "epoch": 0.27, + "grad_norm": 4.113395489890999, + "learning_rate": 1.944791611225636e-05, + "loss": 1.6781, + "step": 22134 + }, + { + "epoch": 0.27, + "grad_norm": 6.698454272247329, + "learning_rate": 1.9447724677463483e-05, + "loss": 1.6187, + "step": 22137 + }, + { + "epoch": 0.27, + "grad_norm": 17.408851531303142, + "learning_rate": 1.9447533210428927e-05, + "loss": 1.4445, + "step": 22140 + }, + { + "epoch": 0.27, + "grad_norm": 7.945238172078589, + "learning_rate": 1.9447341711153346e-05, + "loss": 1.8494, + "step": 22143 + }, + { + "epoch": 0.27, + "grad_norm": 3.421224516998884, + "learning_rate": 1.9447150179637388e-05, + "loss": 1.5856, + "step": 22146 + }, + { + "epoch": 0.27, + "grad_norm": 40.224570182445476, + "learning_rate": 1.944695861588171e-05, + "loss": 1.5516, + "step": 22149 + }, + { + "epoch": 0.27, + "grad_norm": 22.41957865898696, + "learning_rate": 1.944676701988696e-05, + "loss": 1.7697, + "step": 22152 + }, + { + "epoch": 0.27, + "grad_norm": 41.822042835354665, + "learning_rate": 1.9446575391653804e-05, + "loss": 1.3901, + "step": 22155 + }, + { + "epoch": 0.27, + "grad_norm": 17.288675616840266, + "learning_rate": 1.9446383731182887e-05, + "loss": 1.7001, + "step": 22158 + }, + { + "epoch": 0.27, + "grad_norm": 30.991621452524765, + "learning_rate": 1.944619203847486e-05, + "loss": 1.4118, + "step": 22161 + }, + { + "epoch": 0.27, + "grad_norm": 37.785454789778306, + "learning_rate": 1.9446000313530387e-05, + "loss": 1.507, + "step": 22164 + }, + { + "epoch": 0.27, + "grad_norm": 2.3195600345474765, + "learning_rate": 1.9445808556350116e-05, + "loss": 1.3963, + "step": 22167 + }, + { + "epoch": 0.27, + "grad_norm": 22.330743038339154, + "learning_rate": 1.9445616766934702e-05, + "loss": 1.3411, + "step": 22170 + }, + { + "epoch": 0.27, + "grad_norm": 4.1075701859970675, + "learning_rate": 1.9445424945284802e-05, + "loss": 1.4763, + "step": 22173 + }, + { + "epoch": 0.27, + "grad_norm": 11.600074264265764, + "learning_rate": 1.9445233091401067e-05, + "loss": 1.5738, + "step": 22176 + }, + { + "epoch": 0.27, + "grad_norm": 6.196697127064217, + "learning_rate": 1.9445041205284157e-05, + "loss": 1.3985, + "step": 22179 + }, + { + "epoch": 0.27, + "grad_norm": 6.040087664138186, + "learning_rate": 1.944484928693472e-05, + "loss": 1.6755, + "step": 22182 + }, + { + "epoch": 0.27, + "grad_norm": 17.856620359355198, + "learning_rate": 1.9444657336353415e-05, + "loss": 1.6015, + "step": 22185 + }, + { + "epoch": 0.27, + "grad_norm": 5.755713186677204, + "learning_rate": 1.9444465353540897e-05, + "loss": 1.5971, + "step": 22188 + }, + { + "epoch": 0.27, + "grad_norm": 16.95190166401116, + "learning_rate": 1.9444273338497817e-05, + "loss": 1.4559, + "step": 22191 + }, + { + "epoch": 0.27, + "grad_norm": 3.106592125656203, + "learning_rate": 1.9444081291224842e-05, + "loss": 1.7102, + "step": 22194 + }, + { + "epoch": 0.27, + "grad_norm": 24.347693855747014, + "learning_rate": 1.9443889211722613e-05, + "loss": 1.4499, + "step": 22197 + }, + { + "epoch": 0.27, + "grad_norm": 23.830584992253787, + "learning_rate": 1.9443697099991792e-05, + "loss": 1.3094, + "step": 22200 + }, + { + "epoch": 0.27, + "grad_norm": 8.980383441012519, + "learning_rate": 1.944350495603304e-05, + "loss": 1.1986, + "step": 22203 + }, + { + "epoch": 0.27, + "grad_norm": 19.49765933359169, + "learning_rate": 1.9443312779847e-05, + "loss": 1.105, + "step": 22206 + }, + { + "epoch": 0.27, + "grad_norm": 13.334989796182047, + "learning_rate": 1.9443120571434337e-05, + "loss": 1.6967, + "step": 22209 + }, + { + "epoch": 0.27, + "grad_norm": 43.810089984214834, + "learning_rate": 1.9442928330795707e-05, + "loss": 1.5142, + "step": 22212 + }, + { + "epoch": 0.27, + "grad_norm": 5.000650555842482, + "learning_rate": 1.9442736057931763e-05, + "loss": 1.6962, + "step": 22215 + }, + { + "epoch": 0.27, + "grad_norm": 10.844800376889077, + "learning_rate": 1.944254375284316e-05, + "loss": 1.4814, + "step": 22218 + }, + { + "epoch": 0.27, + "grad_norm": 11.007936824258314, + "learning_rate": 1.944235141553056e-05, + "loss": 1.1983, + "step": 22221 + }, + { + "epoch": 0.27, + "grad_norm": 7.888571330250559, + "learning_rate": 1.9442159045994614e-05, + "loss": 1.4621, + "step": 22224 + }, + { + "epoch": 0.27, + "grad_norm": 2.528576200639764, + "learning_rate": 1.944196664423598e-05, + "loss": 1.6408, + "step": 22227 + }, + { + "epoch": 0.27, + "grad_norm": 9.777800227431879, + "learning_rate": 1.9441774210255315e-05, + "loss": 1.4152, + "step": 22230 + }, + { + "epoch": 0.27, + "grad_norm": 2.787682847622045, + "learning_rate": 1.9441581744053275e-05, + "loss": 1.8465, + "step": 22233 + }, + { + "epoch": 0.27, + "grad_norm": 78.13005358304407, + "learning_rate": 1.944138924563052e-05, + "loss": 1.7384, + "step": 22236 + }, + { + "epoch": 0.27, + "grad_norm": 25.982193592175296, + "learning_rate": 1.94411967149877e-05, + "loss": 1.3368, + "step": 22239 + }, + { + "epoch": 0.27, + "grad_norm": 8.46076322837266, + "learning_rate": 1.9441004152125477e-05, + "loss": 1.5055, + "step": 22242 + }, + { + "epoch": 0.27, + "grad_norm": 15.29750804971963, + "learning_rate": 1.944081155704451e-05, + "loss": 1.7597, + "step": 22245 + }, + { + "epoch": 0.27, + "grad_norm": 13.557236605143249, + "learning_rate": 1.9440618929745454e-05, + "loss": 1.5259, + "step": 22248 + }, + { + "epoch": 0.27, + "grad_norm": 10.41260878914603, + "learning_rate": 1.9440426270228964e-05, + "loss": 1.7222, + "step": 22251 + }, + { + "epoch": 0.27, + "grad_norm": 13.333180320035988, + "learning_rate": 1.94402335784957e-05, + "loss": 1.5598, + "step": 22254 + }, + { + "epoch": 0.27, + "grad_norm": 3.3275994333382877, + "learning_rate": 1.944004085454632e-05, + "loss": 1.4873, + "step": 22257 + }, + { + "epoch": 0.27, + "grad_norm": 12.867318970894082, + "learning_rate": 1.9439848098381482e-05, + "loss": 1.3956, + "step": 22260 + }, + { + "epoch": 0.27, + "grad_norm": 14.106425679736441, + "learning_rate": 1.943965531000184e-05, + "loss": 1.388, + "step": 22263 + }, + { + "epoch": 0.27, + "grad_norm": 12.618924048654215, + "learning_rate": 1.9439462489408053e-05, + "loss": 1.5578, + "step": 22266 + }, + { + "epoch": 0.27, + "grad_norm": 8.880016734560728, + "learning_rate": 1.9439269636600785e-05, + "loss": 1.2953, + "step": 22269 + }, + { + "epoch": 0.27, + "grad_norm": 11.548118703885942, + "learning_rate": 1.943907675158069e-05, + "loss": 1.6501, + "step": 22272 + }, + { + "epoch": 0.27, + "grad_norm": 4.3447921437443116, + "learning_rate": 1.943888383434842e-05, + "loss": 1.5552, + "step": 22275 + }, + { + "epoch": 0.27, + "grad_norm": 6.0087297754202655, + "learning_rate": 1.9438690884904644e-05, + "loss": 1.5332, + "step": 22278 + }, + { + "epoch": 0.27, + "grad_norm": 20.97120828938141, + "learning_rate": 1.9438497903250018e-05, + "loss": 1.6679, + "step": 22281 + }, + { + "epoch": 0.27, + "grad_norm": 19.968373866538073, + "learning_rate": 1.9438304889385192e-05, + "loss": 1.5867, + "step": 22284 + }, + { + "epoch": 0.27, + "grad_norm": 9.845777031638175, + "learning_rate": 1.9438111843310837e-05, + "loss": 1.5098, + "step": 22287 + }, + { + "epoch": 0.27, + "grad_norm": 5.986274034671452, + "learning_rate": 1.9437918765027603e-05, + "loss": 1.4264, + "step": 22290 + }, + { + "epoch": 0.27, + "grad_norm": 4.0115142659774135, + "learning_rate": 1.9437725654536152e-05, + "loss": 1.4729, + "step": 22293 + }, + { + "epoch": 0.27, + "grad_norm": 4.993161279444612, + "learning_rate": 1.9437532511837144e-05, + "loss": 1.6499, + "step": 22296 + }, + { + "epoch": 0.27, + "grad_norm": 5.806316147841532, + "learning_rate": 1.9437339336931232e-05, + "loss": 1.8603, + "step": 22299 + }, + { + "epoch": 0.27, + "grad_norm": 10.387180760314095, + "learning_rate": 1.9437146129819088e-05, + "loss": 1.2347, + "step": 22302 + }, + { + "epoch": 0.27, + "grad_norm": 4.317636754337474, + "learning_rate": 1.943695289050136e-05, + "loss": 1.295, + "step": 22305 + }, + { + "epoch": 0.27, + "grad_norm": 44.56965615522037, + "learning_rate": 1.943675961897871e-05, + "loss": 1.4822, + "step": 22308 + }, + { + "epoch": 0.27, + "grad_norm": 4.406938271640543, + "learning_rate": 1.94365663152518e-05, + "loss": 1.7106, + "step": 22311 + }, + { + "epoch": 0.27, + "grad_norm": 24.85425093077951, + "learning_rate": 1.9436372979321285e-05, + "loss": 1.3002, + "step": 22314 + }, + { + "epoch": 0.27, + "grad_norm": 27.53131443466447, + "learning_rate": 1.9436179611187832e-05, + "loss": 1.999, + "step": 22317 + }, + { + "epoch": 0.27, + "grad_norm": 23.184420705342045, + "learning_rate": 1.9435986210852094e-05, + "loss": 1.4515, + "step": 22320 + }, + { + "epoch": 0.27, + "grad_norm": 23.383144617639243, + "learning_rate": 1.9435792778314732e-05, + "loss": 1.6756, + "step": 22323 + }, + { + "epoch": 0.27, + "grad_norm": 6.572660579754874, + "learning_rate": 1.9435599313576413e-05, + "loss": 1.3732, + "step": 22326 + }, + { + "epoch": 0.27, + "grad_norm": 17.429816746847813, + "learning_rate": 1.943540581663779e-05, + "loss": 1.3018, + "step": 22329 + }, + { + "epoch": 0.27, + "grad_norm": 17.47066211328042, + "learning_rate": 1.9435212287499526e-05, + "loss": 1.5133, + "step": 22332 + }, + { + "epoch": 0.27, + "grad_norm": 3.8004868334789954, + "learning_rate": 1.943501872616228e-05, + "loss": 1.6993, + "step": 22335 + }, + { + "epoch": 0.27, + "grad_norm": 15.747007412068196, + "learning_rate": 1.9434825132626715e-05, + "loss": 1.2596, + "step": 22338 + }, + { + "epoch": 0.27, + "grad_norm": 16.95691235876775, + "learning_rate": 1.943463150689349e-05, + "loss": 1.6648, + "step": 22341 + }, + { + "epoch": 0.27, + "grad_norm": 51.09524629669757, + "learning_rate": 1.9434437848963267e-05, + "loss": 1.5823, + "step": 22344 + }, + { + "epoch": 0.27, + "grad_norm": 24.5039238897099, + "learning_rate": 1.94342441588367e-05, + "loss": 1.6275, + "step": 22347 + }, + { + "epoch": 0.27, + "grad_norm": 12.992077513819098, + "learning_rate": 1.9434050436514466e-05, + "loss": 1.182, + "step": 22350 + }, + { + "epoch": 0.27, + "grad_norm": 11.80667838919709, + "learning_rate": 1.9433856681997207e-05, + "loss": 1.4709, + "step": 22353 + }, + { + "epoch": 0.27, + "grad_norm": 11.629986621490865, + "learning_rate": 1.9433662895285598e-05, + "loss": 1.9347, + "step": 22356 + }, + { + "epoch": 0.27, + "grad_norm": 5.395148549828816, + "learning_rate": 1.9433469076380293e-05, + "loss": 1.4688, + "step": 22359 + }, + { + "epoch": 0.27, + "grad_norm": 20.760759970383177, + "learning_rate": 1.9433275225281955e-05, + "loss": 1.3441, + "step": 22362 + }, + { + "epoch": 0.27, + "grad_norm": 43.642365304003675, + "learning_rate": 1.943308134199125e-05, + "loss": 1.3831, + "step": 22365 + }, + { + "epoch": 0.27, + "grad_norm": 12.185028665816048, + "learning_rate": 1.9432887426508834e-05, + "loss": 1.5097, + "step": 22368 + }, + { + "epoch": 0.27, + "grad_norm": 10.1009790236376, + "learning_rate": 1.943269347883537e-05, + "loss": 1.719, + "step": 22371 + }, + { + "epoch": 0.27, + "grad_norm": 20.67848020946255, + "learning_rate": 1.9432499498971525e-05, + "loss": 1.6602, + "step": 22374 + }, + { + "epoch": 0.27, + "grad_norm": 33.07876171994791, + "learning_rate": 1.9432305486917954e-05, + "loss": 1.3479, + "step": 22377 + }, + { + "epoch": 0.27, + "grad_norm": 59.69950261393768, + "learning_rate": 1.9432111442675322e-05, + "loss": 1.5465, + "step": 22380 + }, + { + "epoch": 0.27, + "grad_norm": 4.396846409844844, + "learning_rate": 1.9431917366244293e-05, + "loss": 1.8778, + "step": 22383 + }, + { + "epoch": 0.27, + "grad_norm": 24.78268309352461, + "learning_rate": 1.9431723257625523e-05, + "loss": 1.3361, + "step": 22386 + }, + { + "epoch": 0.27, + "grad_norm": 16.953195482647335, + "learning_rate": 1.9431529116819682e-05, + "loss": 1.3341, + "step": 22389 + }, + { + "epoch": 0.27, + "grad_norm": 31.553102240251988, + "learning_rate": 1.943133494382743e-05, + "loss": 1.7975, + "step": 22392 + }, + { + "epoch": 0.27, + "grad_norm": 27.73959886088699, + "learning_rate": 1.9431140738649427e-05, + "loss": 1.6669, + "step": 22395 + }, + { + "epoch": 0.27, + "grad_norm": 23.706339711489655, + "learning_rate": 1.943094650128634e-05, + "loss": 1.6394, + "step": 22398 + }, + { + "epoch": 0.27, + "grad_norm": 14.457473859256867, + "learning_rate": 1.9430752231738825e-05, + "loss": 1.5897, + "step": 22401 + }, + { + "epoch": 0.27, + "grad_norm": 9.961131500008374, + "learning_rate": 1.9430557930007555e-05, + "loss": 1.5601, + "step": 22404 + }, + { + "epoch": 0.27, + "grad_norm": 24.466076461411344, + "learning_rate": 1.9430363596093184e-05, + "loss": 1.4735, + "step": 22407 + }, + { + "epoch": 0.27, + "grad_norm": 60.832396094564125, + "learning_rate": 1.9430169229996382e-05, + "loss": 1.6276, + "step": 22410 + }, + { + "epoch": 0.27, + "grad_norm": 5.5969174267672015, + "learning_rate": 1.9429974831717806e-05, + "loss": 1.3264, + "step": 22413 + }, + { + "epoch": 0.27, + "grad_norm": 18.052829575505825, + "learning_rate": 1.942978040125812e-05, + "loss": 1.9257, + "step": 22416 + }, + { + "epoch": 0.27, + "grad_norm": 20.85158239045654, + "learning_rate": 1.9429585938618e-05, + "loss": 1.4769, + "step": 22419 + }, + { + "epoch": 0.27, + "grad_norm": 5.985467775742078, + "learning_rate": 1.942939144379809e-05, + "loss": 1.5227, + "step": 22422 + }, + { + "epoch": 0.27, + "grad_norm": 9.90693618452134, + "learning_rate": 1.9429196916799066e-05, + "loss": 2.0049, + "step": 22425 + }, + { + "epoch": 0.27, + "grad_norm": 25.470864061555147, + "learning_rate": 1.942900235762159e-05, + "loss": 1.9393, + "step": 22428 + }, + { + "epoch": 0.27, + "grad_norm": 16.77711066844139, + "learning_rate": 1.9428807766266328e-05, + "loss": 1.3692, + "step": 22431 + }, + { + "epoch": 0.27, + "grad_norm": 23.228710991726302, + "learning_rate": 1.942861314273394e-05, + "loss": 1.12, + "step": 22434 + }, + { + "epoch": 0.27, + "grad_norm": 41.42093348799775, + "learning_rate": 1.942841848702509e-05, + "loss": 1.699, + "step": 22437 + }, + { + "epoch": 0.27, + "grad_norm": 7.030277583469023, + "learning_rate": 1.9428223799140445e-05, + "loss": 1.5279, + "step": 22440 + }, + { + "epoch": 0.27, + "grad_norm": 18.814356751059137, + "learning_rate": 1.9428029079080667e-05, + "loss": 1.6822, + "step": 22443 + }, + { + "epoch": 0.27, + "grad_norm": 41.5176028708825, + "learning_rate": 1.9427834326846423e-05, + "loss": 1.9329, + "step": 22446 + }, + { + "epoch": 0.27, + "grad_norm": 21.360657676269, + "learning_rate": 1.9427639542438375e-05, + "loss": 1.6733, + "step": 22449 + }, + { + "epoch": 0.27, + "grad_norm": 13.032729414879295, + "learning_rate": 1.942744472585719e-05, + "loss": 1.5865, + "step": 22452 + }, + { + "epoch": 0.27, + "grad_norm": 20.33618955855685, + "learning_rate": 1.9427249877103528e-05, + "loss": 1.642, + "step": 22455 + }, + { + "epoch": 0.27, + "grad_norm": 18.070379889709606, + "learning_rate": 1.9427054996178065e-05, + "loss": 1.7984, + "step": 22458 + }, + { + "epoch": 0.27, + "grad_norm": 25.884831102134708, + "learning_rate": 1.9426860083081453e-05, + "loss": 1.9139, + "step": 22461 + }, + { + "epoch": 0.27, + "grad_norm": 8.884168190138809, + "learning_rate": 1.9426665137814367e-05, + "loss": 1.7033, + "step": 22464 + }, + { + "epoch": 0.27, + "grad_norm": 19.78985772448372, + "learning_rate": 1.9426470160377468e-05, + "loss": 1.8162, + "step": 22467 + }, + { + "epoch": 0.27, + "grad_norm": 20.45252335057354, + "learning_rate": 1.942627515077142e-05, + "loss": 1.5618, + "step": 22470 + }, + { + "epoch": 0.27, + "grad_norm": 45.02353620978751, + "learning_rate": 1.9426080108996886e-05, + "loss": 1.6453, + "step": 22473 + }, + { + "epoch": 0.27, + "grad_norm": 10.537017585396608, + "learning_rate": 1.9425885035054542e-05, + "loss": 1.4732, + "step": 22476 + }, + { + "epoch": 0.27, + "grad_norm": 16.287795928289317, + "learning_rate": 1.9425689928945047e-05, + "loss": 1.5841, + "step": 22479 + }, + { + "epoch": 0.27, + "grad_norm": 9.4499631316946, + "learning_rate": 1.9425494790669067e-05, + "loss": 1.556, + "step": 22482 + }, + { + "epoch": 0.27, + "grad_norm": 13.348879386935687, + "learning_rate": 1.942529962022727e-05, + "loss": 1.6614, + "step": 22485 + }, + { + "epoch": 0.27, + "grad_norm": 68.93418513208275, + "learning_rate": 1.9425104417620314e-05, + "loss": 1.3457, + "step": 22488 + }, + { + "epoch": 0.27, + "grad_norm": 14.649389867209445, + "learning_rate": 1.9424909182848876e-05, + "loss": 1.6163, + "step": 22491 + }, + { + "epoch": 0.27, + "grad_norm": 10.979917385309314, + "learning_rate": 1.942471391591362e-05, + "loss": 1.5349, + "step": 22494 + }, + { + "epoch": 0.27, + "grad_norm": 18.349547380556402, + "learning_rate": 1.9424518616815206e-05, + "loss": 1.7054, + "step": 22497 + }, + { + "epoch": 0.27, + "grad_norm": 27.940173382211636, + "learning_rate": 1.9424323285554308e-05, + "loss": 1.6847, + "step": 22500 + }, + { + "epoch": 0.27, + "grad_norm": 24.661400855958824, + "learning_rate": 1.942412792213159e-05, + "loss": 1.4165, + "step": 22503 + }, + { + "epoch": 0.27, + "grad_norm": 11.441736690116187, + "learning_rate": 1.942393252654771e-05, + "loss": 1.429, + "step": 22506 + }, + { + "epoch": 0.27, + "grad_norm": 68.98650012713689, + "learning_rate": 1.942373709880335e-05, + "loss": 1.6602, + "step": 22509 + }, + { + "epoch": 0.27, + "grad_norm": 6.502601045455852, + "learning_rate": 1.9423541638899168e-05, + "loss": 1.3577, + "step": 22512 + }, + { + "epoch": 0.27, + "grad_norm": 18.570323850870174, + "learning_rate": 1.9423346146835835e-05, + "loss": 1.2215, + "step": 22515 + }, + { + "epoch": 0.27, + "grad_norm": 6.3458648572057585, + "learning_rate": 1.9423150622614015e-05, + "loss": 1.4603, + "step": 22518 + }, + { + "epoch": 0.27, + "grad_norm": 11.132604292839567, + "learning_rate": 1.9422955066234372e-05, + "loss": 1.5519, + "step": 22521 + }, + { + "epoch": 0.27, + "grad_norm": 16.163358964996107, + "learning_rate": 1.942275947769758e-05, + "loss": 1.4673, + "step": 22524 + }, + { + "epoch": 0.27, + "grad_norm": 7.5046965115866024, + "learning_rate": 1.9422563857004305e-05, + "loss": 1.6242, + "step": 22527 + }, + { + "epoch": 0.27, + "grad_norm": 91.1656173931378, + "learning_rate": 1.9422368204155213e-05, + "loss": 1.4369, + "step": 22530 + }, + { + "epoch": 0.27, + "grad_norm": 41.99712332145433, + "learning_rate": 1.942217251915097e-05, + "loss": 1.6206, + "step": 22533 + }, + { + "epoch": 0.27, + "grad_norm": 49.03536888322172, + "learning_rate": 1.9421976801992247e-05, + "loss": 1.3691, + "step": 22536 + }, + { + "epoch": 0.27, + "grad_norm": 5.512298230063533, + "learning_rate": 1.9421781052679712e-05, + "loss": 1.5855, + "step": 22539 + }, + { + "epoch": 0.27, + "grad_norm": 8.125438238373784, + "learning_rate": 1.9421585271214034e-05, + "loss": 1.4146, + "step": 22542 + }, + { + "epoch": 0.27, + "grad_norm": 12.70330845966336, + "learning_rate": 1.9421389457595875e-05, + "loss": 1.6513, + "step": 22545 + }, + { + "epoch": 0.27, + "grad_norm": 10.232086393842696, + "learning_rate": 1.942119361182591e-05, + "loss": 1.1239, + "step": 22548 + }, + { + "epoch": 0.27, + "grad_norm": 10.387788546353752, + "learning_rate": 1.9420997733904803e-05, + "loss": 1.5517, + "step": 22551 + }, + { + "epoch": 0.27, + "grad_norm": 5.681500976525429, + "learning_rate": 1.9420801823833228e-05, + "loss": 1.903, + "step": 22554 + }, + { + "epoch": 0.27, + "grad_norm": 30.053702512620156, + "learning_rate": 1.9420605881611846e-05, + "loss": 1.5487, + "step": 22557 + }, + { + "epoch": 0.27, + "grad_norm": 7.673169900560377, + "learning_rate": 1.9420409907241332e-05, + "loss": 1.6416, + "step": 22560 + }, + { + "epoch": 0.27, + "grad_norm": 9.295146241215301, + "learning_rate": 1.942021390072235e-05, + "loss": 1.4194, + "step": 22563 + }, + { + "epoch": 0.27, + "grad_norm": 21.32949131912636, + "learning_rate": 1.942001786205557e-05, + "loss": 1.4612, + "step": 22566 + }, + { + "epoch": 0.27, + "grad_norm": 15.956087323052781, + "learning_rate": 1.9419821791241666e-05, + "loss": 1.717, + "step": 22569 + }, + { + "epoch": 0.27, + "grad_norm": 16.63455156053647, + "learning_rate": 1.9419625688281302e-05, + "loss": 1.6539, + "step": 22572 + }, + { + "epoch": 0.27, + "grad_norm": 8.886046293078701, + "learning_rate": 1.9419429553175146e-05, + "loss": 1.3878, + "step": 22575 + }, + { + "epoch": 0.27, + "grad_norm": 8.227772613801031, + "learning_rate": 1.9419233385923873e-05, + "loss": 1.5072, + "step": 22578 + }, + { + "epoch": 0.27, + "grad_norm": 49.03398920794388, + "learning_rate": 1.9419037186528147e-05, + "loss": 1.7332, + "step": 22581 + }, + { + "epoch": 0.27, + "grad_norm": 4.876696663938881, + "learning_rate": 1.941884095498864e-05, + "loss": 1.5042, + "step": 22584 + }, + { + "epoch": 0.27, + "grad_norm": 23.26200290396705, + "learning_rate": 1.9418644691306022e-05, + "loss": 1.6415, + "step": 22587 + }, + { + "epoch": 0.27, + "grad_norm": 36.2179382480332, + "learning_rate": 1.9418448395480962e-05, + "loss": 1.497, + "step": 22590 + }, + { + "epoch": 0.27, + "grad_norm": 44.03286162451049, + "learning_rate": 1.941825206751413e-05, + "loss": 1.3682, + "step": 22593 + }, + { + "epoch": 0.27, + "grad_norm": 15.273467153489095, + "learning_rate": 1.9418055707406196e-05, + "loss": 1.6029, + "step": 22596 + }, + { + "epoch": 0.27, + "grad_norm": 21.656177870073535, + "learning_rate": 1.9417859315157833e-05, + "loss": 1.4188, + "step": 22599 + }, + { + "epoch": 0.27, + "grad_norm": 9.219021301704789, + "learning_rate": 1.9417662890769705e-05, + "loss": 1.3501, + "step": 22602 + }, + { + "epoch": 0.27, + "grad_norm": 6.968673378064231, + "learning_rate": 1.9417466434242484e-05, + "loss": 1.2145, + "step": 22605 + }, + { + "epoch": 0.27, + "grad_norm": 21.445156916730976, + "learning_rate": 1.9417269945576848e-05, + "loss": 1.5007, + "step": 22608 + }, + { + "epoch": 0.27, + "grad_norm": 39.110947544308225, + "learning_rate": 1.9417073424773457e-05, + "loss": 1.3756, + "step": 22611 + }, + { + "epoch": 0.27, + "grad_norm": 13.57221794378093, + "learning_rate": 1.941687687183299e-05, + "loss": 1.7512, + "step": 22614 + }, + { + "epoch": 0.27, + "grad_norm": 10.56691331042542, + "learning_rate": 1.941668028675611e-05, + "loss": 1.5457, + "step": 22617 + }, + { + "epoch": 0.27, + "grad_norm": 14.372215448398972, + "learning_rate": 1.9416483669543495e-05, + "loss": 1.3283, + "step": 22620 + }, + { + "epoch": 0.27, + "grad_norm": 27.631640471635052, + "learning_rate": 1.9416287020195814e-05, + "loss": 1.5426, + "step": 22623 + }, + { + "epoch": 0.27, + "grad_norm": 10.001408690607015, + "learning_rate": 1.9416090338713735e-05, + "loss": 1.3751, + "step": 22626 + }, + { + "epoch": 0.27, + "grad_norm": 10.789358260904912, + "learning_rate": 1.941589362509793e-05, + "loss": 1.5227, + "step": 22629 + }, + { + "epoch": 0.27, + "grad_norm": 5.941221020631729, + "learning_rate": 1.9415696879349074e-05, + "loss": 1.6823, + "step": 22632 + }, + { + "epoch": 0.27, + "grad_norm": 20.064116226952503, + "learning_rate": 1.9415500101467836e-05, + "loss": 1.5877, + "step": 22635 + }, + { + "epoch": 0.27, + "grad_norm": 14.066936353215384, + "learning_rate": 1.9415303291454888e-05, + "loss": 1.6979, + "step": 22638 + }, + { + "epoch": 0.27, + "grad_norm": 213.44131732342314, + "learning_rate": 1.94151064493109e-05, + "loss": 1.7576, + "step": 22641 + }, + { + "epoch": 0.27, + "grad_norm": 16.514101672008824, + "learning_rate": 1.9414909575036547e-05, + "loss": 1.559, + "step": 22644 + }, + { + "epoch": 0.27, + "grad_norm": 9.818268006431301, + "learning_rate": 1.94147126686325e-05, + "loss": 1.4254, + "step": 22647 + }, + { + "epoch": 0.27, + "grad_norm": 20.154759814279558, + "learning_rate": 1.9414515730099425e-05, + "loss": 2.195, + "step": 22650 + }, + { + "epoch": 0.27, + "grad_norm": 4.923811990484822, + "learning_rate": 1.9414318759438e-05, + "loss": 1.1774, + "step": 22653 + }, + { + "epoch": 0.27, + "grad_norm": 37.653462155770555, + "learning_rate": 1.9414121756648896e-05, + "loss": 1.4181, + "step": 22656 + }, + { + "epoch": 0.27, + "grad_norm": 56.291738931247394, + "learning_rate": 1.941392472173279e-05, + "loss": 1.3714, + "step": 22659 + }, + { + "epoch": 0.27, + "grad_norm": 33.45557950222769, + "learning_rate": 1.9413727654690343e-05, + "loss": 1.7244, + "step": 22662 + }, + { + "epoch": 0.27, + "grad_norm": 29.605537340699254, + "learning_rate": 1.9413530555522237e-05, + "loss": 1.512, + "step": 22665 + }, + { + "epoch": 0.27, + "grad_norm": 18.5409410995146, + "learning_rate": 1.9413333424229143e-05, + "loss": 1.3237, + "step": 22668 + }, + { + "epoch": 0.27, + "grad_norm": 37.661440507627255, + "learning_rate": 1.9413136260811735e-05, + "loss": 2.0285, + "step": 22671 + }, + { + "epoch": 0.27, + "grad_norm": 6.338729580658889, + "learning_rate": 1.941293906527068e-05, + "loss": 1.4049, + "step": 22674 + }, + { + "epoch": 0.27, + "grad_norm": 48.385814719619894, + "learning_rate": 1.9412741837606653e-05, + "loss": 1.5376, + "step": 22677 + }, + { + "epoch": 0.27, + "grad_norm": 9.692851868322139, + "learning_rate": 1.9412544577820335e-05, + "loss": 1.6713, + "step": 22680 + }, + { + "epoch": 0.27, + "grad_norm": 33.05013491466019, + "learning_rate": 1.9412347285912386e-05, + "loss": 1.556, + "step": 22683 + }, + { + "epoch": 0.27, + "grad_norm": 13.429428494541696, + "learning_rate": 1.9412149961883487e-05, + "loss": 1.8591, + "step": 22686 + }, + { + "epoch": 0.27, + "grad_norm": 15.187069782914815, + "learning_rate": 1.9411952605734313e-05, + "loss": 1.6189, + "step": 22689 + }, + { + "epoch": 0.27, + "grad_norm": 26.280477736125405, + "learning_rate": 1.9411755217465534e-05, + "loss": 1.5739, + "step": 22692 + }, + { + "epoch": 0.27, + "grad_norm": 57.881765665531596, + "learning_rate": 1.9411557797077825e-05, + "loss": 1.4674, + "step": 22695 + }, + { + "epoch": 0.27, + "grad_norm": 10.974976849497978, + "learning_rate": 1.941136034457186e-05, + "loss": 1.5985, + "step": 22698 + }, + { + "epoch": 0.27, + "grad_norm": 15.84831660206098, + "learning_rate": 1.941116285994831e-05, + "loss": 1.972, + "step": 22701 + }, + { + "epoch": 0.27, + "grad_norm": 2.7929924597127083, + "learning_rate": 1.9410965343207855e-05, + "loss": 1.7994, + "step": 22704 + }, + { + "epoch": 0.27, + "grad_norm": 8.72396788282804, + "learning_rate": 1.941076779435116e-05, + "loss": 1.3272, + "step": 22707 + }, + { + "epoch": 0.27, + "grad_norm": 28.590033820064914, + "learning_rate": 1.9410570213378907e-05, + "loss": 1.501, + "step": 22710 + }, + { + "epoch": 0.27, + "grad_norm": 27.58052833839458, + "learning_rate": 1.941037260029177e-05, + "loss": 1.3317, + "step": 22713 + }, + { + "epoch": 0.27, + "grad_norm": 6.148575212036649, + "learning_rate": 1.9410174955090416e-05, + "loss": 1.6123, + "step": 22716 + }, + { + "epoch": 0.27, + "grad_norm": 7.742495759840136, + "learning_rate": 1.9409977277775523e-05, + "loss": 1.218, + "step": 22719 + }, + { + "epoch": 0.27, + "grad_norm": 18.541673755725945, + "learning_rate": 1.9409779568347772e-05, + "loss": 1.6904, + "step": 22722 + }, + { + "epoch": 0.27, + "grad_norm": 6.170823349923314, + "learning_rate": 1.9409581826807833e-05, + "loss": 1.4968, + "step": 22725 + }, + { + "epoch": 0.27, + "grad_norm": 11.819265741366607, + "learning_rate": 1.9409384053156376e-05, + "loss": 1.4879, + "step": 22728 + }, + { + "epoch": 0.27, + "grad_norm": 16.24771827529102, + "learning_rate": 1.9409186247394084e-05, + "loss": 1.2594, + "step": 22731 + }, + { + "epoch": 0.27, + "grad_norm": 7.954389608917993, + "learning_rate": 1.9408988409521626e-05, + "loss": 1.5255, + "step": 22734 + }, + { + "epoch": 0.27, + "grad_norm": 8.225207385141859, + "learning_rate": 1.9408790539539685e-05, + "loss": 1.2841, + "step": 22737 + }, + { + "epoch": 0.27, + "grad_norm": 5.566838789155218, + "learning_rate": 1.9408592637448923e-05, + "loss": 1.3595, + "step": 22740 + }, + { + "epoch": 0.27, + "grad_norm": 4.116295993338954, + "learning_rate": 1.940839470325003e-05, + "loss": 1.5653, + "step": 22743 + }, + { + "epoch": 0.27, + "grad_norm": 15.819085631757472, + "learning_rate": 1.940819673694367e-05, + "loss": 1.4056, + "step": 22746 + }, + { + "epoch": 0.27, + "grad_norm": 14.67657834406033, + "learning_rate": 1.9407998738530526e-05, + "loss": 1.617, + "step": 22749 + }, + { + "epoch": 0.27, + "grad_norm": 32.50844677237321, + "learning_rate": 1.940780070801127e-05, + "loss": 1.8193, + "step": 22752 + }, + { + "epoch": 0.27, + "grad_norm": 20.440422285267378, + "learning_rate": 1.9407602645386582e-05, + "loss": 1.3115, + "step": 22755 + }, + { + "epoch": 0.27, + "grad_norm": 5.53570452649081, + "learning_rate": 1.9407404550657133e-05, + "loss": 1.5384, + "step": 22758 + }, + { + "epoch": 0.27, + "grad_norm": 4.257203471612239, + "learning_rate": 1.9407206423823597e-05, + "loss": 1.2953, + "step": 22761 + }, + { + "epoch": 0.27, + "grad_norm": 13.430873315009908, + "learning_rate": 1.9407008264886658e-05, + "loss": 1.4835, + "step": 22764 + }, + { + "epoch": 0.27, + "grad_norm": 11.970482844068883, + "learning_rate": 1.9406810073846988e-05, + "loss": 1.6447, + "step": 22767 + }, + { + "epoch": 0.27, + "grad_norm": 10.977724408030372, + "learning_rate": 1.940661185070526e-05, + "loss": 1.2227, + "step": 22770 + }, + { + "epoch": 0.27, + "grad_norm": 23.856632107869366, + "learning_rate": 1.940641359546216e-05, + "loss": 1.3024, + "step": 22773 + }, + { + "epoch": 0.27, + "grad_norm": 18.029685555871897, + "learning_rate": 1.9406215308118353e-05, + "loss": 1.6533, + "step": 22776 + }, + { + "epoch": 0.27, + "grad_norm": 9.681145477432992, + "learning_rate": 1.9406016988674526e-05, + "loss": 1.4897, + "step": 22779 + }, + { + "epoch": 0.27, + "grad_norm": 13.62054579365144, + "learning_rate": 1.9405818637131345e-05, + "loss": 1.526, + "step": 22782 + }, + { + "epoch": 0.27, + "grad_norm": 13.300791467933557, + "learning_rate": 1.9405620253489498e-05, + "loss": 1.4107, + "step": 22785 + }, + { + "epoch": 0.27, + "grad_norm": 6.227745693044587, + "learning_rate": 1.9405421837749656e-05, + "loss": 1.407, + "step": 22788 + }, + { + "epoch": 0.27, + "grad_norm": 12.404068479712404, + "learning_rate": 1.9405223389912496e-05, + "loss": 1.4337, + "step": 22791 + }, + { + "epoch": 0.27, + "grad_norm": 14.08872486224484, + "learning_rate": 1.9405024909978697e-05, + "loss": 1.8004, + "step": 22794 + }, + { + "epoch": 0.27, + "grad_norm": 10.66940661635737, + "learning_rate": 1.9404826397948935e-05, + "loss": 1.4816, + "step": 22797 + }, + { + "epoch": 0.27, + "grad_norm": 12.490054078582377, + "learning_rate": 1.9404627853823886e-05, + "loss": 1.4688, + "step": 22800 + }, + { + "epoch": 0.27, + "grad_norm": 11.122786490719056, + "learning_rate": 1.9404429277604236e-05, + "loss": 1.5267, + "step": 22803 + }, + { + "epoch": 0.27, + "grad_norm": 10.404426825519018, + "learning_rate": 1.940423066929065e-05, + "loss": 1.4731, + "step": 22806 + }, + { + "epoch": 0.27, + "grad_norm": 10.101005609464867, + "learning_rate": 1.9404032028883817e-05, + "loss": 1.4253, + "step": 22809 + }, + { + "epoch": 0.27, + "grad_norm": 64.48698398507445, + "learning_rate": 1.940383335638441e-05, + "loss": 1.4175, + "step": 22812 + }, + { + "epoch": 0.27, + "grad_norm": 6.132626224510051, + "learning_rate": 1.94036346517931e-05, + "loss": 1.4408, + "step": 22815 + }, + { + "epoch": 0.27, + "grad_norm": 26.852037983879015, + "learning_rate": 1.9403435915110578e-05, + "loss": 1.571, + "step": 22818 + }, + { + "epoch": 0.27, + "grad_norm": 20.658526323161436, + "learning_rate": 1.9403237146337517e-05, + "loss": 1.6759, + "step": 22821 + }, + { + "epoch": 0.27, + "grad_norm": 31.20475142648541, + "learning_rate": 1.940303834547459e-05, + "loss": 1.8412, + "step": 22824 + }, + { + "epoch": 0.27, + "grad_norm": 13.697011228735791, + "learning_rate": 1.9402839512522486e-05, + "loss": 1.7778, + "step": 22827 + }, + { + "epoch": 0.27, + "grad_norm": 24.790380816080987, + "learning_rate": 1.9402640647481874e-05, + "loss": 1.7745, + "step": 22830 + }, + { + "epoch": 0.27, + "grad_norm": 20.08341074242698, + "learning_rate": 1.9402441750353435e-05, + "loss": 1.6771, + "step": 22833 + }, + { + "epoch": 0.27, + "grad_norm": 40.29222164671382, + "learning_rate": 1.9402242821137853e-05, + "loss": 1.5046, + "step": 22836 + }, + { + "epoch": 0.27, + "grad_norm": 17.977538323608456, + "learning_rate": 1.94020438598358e-05, + "loss": 1.7535, + "step": 22839 + }, + { + "epoch": 0.27, + "grad_norm": 21.656733514104502, + "learning_rate": 1.9401844866447957e-05, + "loss": 1.3901, + "step": 22842 + }, + { + "epoch": 0.27, + "grad_norm": 16.077864104762998, + "learning_rate": 1.9401645840975005e-05, + "loss": 1.0304, + "step": 22845 + }, + { + "epoch": 0.27, + "grad_norm": 14.195082465960018, + "learning_rate": 1.9401446783417625e-05, + "loss": 1.283, + "step": 22848 + }, + { + "epoch": 0.27, + "grad_norm": 13.21686352018799, + "learning_rate": 1.940124769377649e-05, + "loss": 1.8549, + "step": 22851 + }, + { + "epoch": 0.27, + "grad_norm": 7.80126675161333, + "learning_rate": 1.9401048572052287e-05, + "loss": 1.5439, + "step": 22854 + }, + { + "epoch": 0.27, + "grad_norm": 7.118641200697287, + "learning_rate": 1.9400849418245685e-05, + "loss": 1.5483, + "step": 22857 + }, + { + "epoch": 0.27, + "grad_norm": 4.415571612391556, + "learning_rate": 1.9400650232357375e-05, + "loss": 1.477, + "step": 22860 + }, + { + "epoch": 0.27, + "grad_norm": 34.232555146601875, + "learning_rate": 1.940045101438803e-05, + "loss": 1.2037, + "step": 22863 + }, + { + "epoch": 0.27, + "grad_norm": 28.479494970563945, + "learning_rate": 1.9400251764338334e-05, + "loss": 1.3717, + "step": 22866 + }, + { + "epoch": 0.27, + "grad_norm": 64.63030339020807, + "learning_rate": 1.940005248220896e-05, + "loss": 1.5007, + "step": 22869 + }, + { + "epoch": 0.28, + "grad_norm": 6.08594774522297, + "learning_rate": 1.9399853168000598e-05, + "loss": 1.8996, + "step": 22872 + }, + { + "epoch": 0.28, + "grad_norm": 56.51527944166105, + "learning_rate": 1.939965382171392e-05, + "loss": 1.6956, + "step": 22875 + }, + { + "epoch": 0.28, + "grad_norm": 30.18746333458789, + "learning_rate": 1.939945444334961e-05, + "loss": 1.4978, + "step": 22878 + }, + { + "epoch": 0.28, + "grad_norm": 10.786287041087066, + "learning_rate": 1.939925503290835e-05, + "loss": 1.4863, + "step": 22881 + }, + { + "epoch": 0.28, + "grad_norm": 9.145095027671283, + "learning_rate": 1.9399055590390813e-05, + "loss": 1.3731, + "step": 22884 + }, + { + "epoch": 0.28, + "grad_norm": 11.447617006246288, + "learning_rate": 1.939885611579769e-05, + "loss": 1.5301, + "step": 22887 + }, + { + "epoch": 0.28, + "grad_norm": 15.015174921745908, + "learning_rate": 1.9398656609129657e-05, + "loss": 1.6264, + "step": 22890 + }, + { + "epoch": 0.28, + "grad_norm": 45.61473801027506, + "learning_rate": 1.9398457070387387e-05, + "loss": 1.4355, + "step": 22893 + }, + { + "epoch": 0.28, + "grad_norm": 14.20422536545131, + "learning_rate": 1.9398257499571575e-05, + "loss": 1.8543, + "step": 22896 + }, + { + "epoch": 0.28, + "grad_norm": 9.933140420980347, + "learning_rate": 1.9398057896682894e-05, + "loss": 1.3591, + "step": 22899 + }, + { + "epoch": 0.28, + "grad_norm": 17.891120053385418, + "learning_rate": 1.9397858261722025e-05, + "loss": 1.5814, + "step": 22902 + }, + { + "epoch": 0.28, + "grad_norm": 23.984075441708736, + "learning_rate": 1.9397658594689653e-05, + "loss": 1.5545, + "step": 22905 + }, + { + "epoch": 0.28, + "grad_norm": 7.983503342963637, + "learning_rate": 1.9397458895586455e-05, + "loss": 1.4317, + "step": 22908 + }, + { + "epoch": 0.28, + "grad_norm": 39.10351496287259, + "learning_rate": 1.9397259164413116e-05, + "loss": 1.7025, + "step": 22911 + }, + { + "epoch": 0.28, + "grad_norm": 87.43044011510887, + "learning_rate": 1.9397059401170314e-05, + "loss": 1.822, + "step": 22914 + }, + { + "epoch": 0.28, + "grad_norm": 16.994939313984826, + "learning_rate": 1.9396859605858735e-05, + "loss": 1.6885, + "step": 22917 + }, + { + "epoch": 0.28, + "grad_norm": 15.52388177636839, + "learning_rate": 1.9396659778479062e-05, + "loss": 1.6991, + "step": 22920 + }, + { + "epoch": 0.28, + "grad_norm": 15.63564194330073, + "learning_rate": 1.9396459919031972e-05, + "loss": 1.7632, + "step": 22923 + }, + { + "epoch": 0.28, + "grad_norm": 22.527655984604106, + "learning_rate": 1.9396260027518145e-05, + "loss": 1.6629, + "step": 22926 + }, + { + "epoch": 0.28, + "grad_norm": 6.306970923888079, + "learning_rate": 1.939606010393827e-05, + "loss": 1.6844, + "step": 22929 + }, + { + "epoch": 0.28, + "grad_norm": 24.313280412231514, + "learning_rate": 1.9395860148293025e-05, + "loss": 1.3921, + "step": 22932 + }, + { + "epoch": 0.28, + "grad_norm": 9.895568134915557, + "learning_rate": 1.9395660160583097e-05, + "loss": 1.7526, + "step": 22935 + }, + { + "epoch": 0.28, + "grad_norm": 83.35478715510013, + "learning_rate": 1.9395460140809163e-05, + "loss": 1.5771, + "step": 22938 + }, + { + "epoch": 0.28, + "grad_norm": 7.823872367152576, + "learning_rate": 1.9395260088971905e-05, + "loss": 1.4313, + "step": 22941 + }, + { + "epoch": 0.28, + "grad_norm": 18.561122285530047, + "learning_rate": 1.9395060005072014e-05, + "loss": 1.3411, + "step": 22944 + }, + { + "epoch": 0.28, + "grad_norm": 5.501009764695202, + "learning_rate": 1.9394859889110164e-05, + "loss": 1.9073, + "step": 22947 + }, + { + "epoch": 0.28, + "grad_norm": 21.398251459827314, + "learning_rate": 1.9394659741087042e-05, + "loss": 1.3813, + "step": 22950 + }, + { + "epoch": 0.28, + "grad_norm": 36.42309744457798, + "learning_rate": 1.9394459561003328e-05, + "loss": 1.5256, + "step": 22953 + }, + { + "epoch": 0.28, + "grad_norm": 26.303724364797024, + "learning_rate": 1.939425934885971e-05, + "loss": 1.5175, + "step": 22956 + }, + { + "epoch": 0.28, + "grad_norm": 8.8675970000061, + "learning_rate": 1.939405910465687e-05, + "loss": 1.5531, + "step": 22959 + }, + { + "epoch": 0.28, + "grad_norm": 11.578416805177643, + "learning_rate": 1.9393858828395487e-05, + "loss": 1.2984, + "step": 22962 + }, + { + "epoch": 0.28, + "grad_norm": 123.16833064935462, + "learning_rate": 1.939365852007625e-05, + "loss": 1.5391, + "step": 22965 + }, + { + "epoch": 0.28, + "grad_norm": 12.273432671642022, + "learning_rate": 1.9393458179699837e-05, + "loss": 1.268, + "step": 22968 + }, + { + "epoch": 0.28, + "grad_norm": 12.66784258747302, + "learning_rate": 1.9393257807266938e-05, + "loss": 1.7675, + "step": 22971 + }, + { + "epoch": 0.28, + "grad_norm": 17.806857918373634, + "learning_rate": 1.9393057402778232e-05, + "loss": 1.5284, + "step": 22974 + }, + { + "epoch": 0.28, + "grad_norm": 7.027297772676675, + "learning_rate": 1.9392856966234405e-05, + "loss": 1.6555, + "step": 22977 + }, + { + "epoch": 0.28, + "grad_norm": 50.671226692590885, + "learning_rate": 1.9392656497636137e-05, + "loss": 1.4384, + "step": 22980 + }, + { + "epoch": 0.28, + "grad_norm": 75.38996096967233, + "learning_rate": 1.939245599698412e-05, + "loss": 1.7297, + "step": 22983 + }, + { + "epoch": 0.28, + "grad_norm": 15.412142503131413, + "learning_rate": 1.939225546427903e-05, + "loss": 1.6021, + "step": 22986 + }, + { + "epoch": 0.28, + "grad_norm": 11.888670384392029, + "learning_rate": 1.9392054899521557e-05, + "loss": 2.0682, + "step": 22989 + }, + { + "epoch": 0.28, + "grad_norm": 24.975198003021358, + "learning_rate": 1.9391854302712385e-05, + "loss": 1.7055, + "step": 22992 + }, + { + "epoch": 0.28, + "grad_norm": 31.327298830797794, + "learning_rate": 1.9391653673852197e-05, + "loss": 1.7157, + "step": 22995 + }, + { + "epoch": 0.28, + "grad_norm": 41.8278096925679, + "learning_rate": 1.9391453012941674e-05, + "loss": 1.2989, + "step": 22998 + }, + { + "epoch": 0.28, + "grad_norm": 22.84005522202398, + "learning_rate": 1.939125231998151e-05, + "loss": 1.6082, + "step": 23001 + }, + { + "epoch": 0.28, + "grad_norm": 22.442628924887746, + "learning_rate": 1.9391051594972382e-05, + "loss": 1.6929, + "step": 23004 + }, + { + "epoch": 0.28, + "grad_norm": 5.561423810256845, + "learning_rate": 1.9390850837914977e-05, + "loss": 1.7266, + "step": 23007 + }, + { + "epoch": 0.28, + "grad_norm": 15.939017685593761, + "learning_rate": 1.9390650048809983e-05, + "loss": 1.6426, + "step": 23010 + }, + { + "epoch": 0.28, + "grad_norm": 21.151758667640575, + "learning_rate": 1.939044922765808e-05, + "loss": 1.5741, + "step": 23013 + }, + { + "epoch": 0.28, + "grad_norm": 8.166640372018978, + "learning_rate": 1.9390248374459958e-05, + "loss": 1.6798, + "step": 23016 + }, + { + "epoch": 0.28, + "grad_norm": 29.59560815971649, + "learning_rate": 1.93900474892163e-05, + "loss": 1.5525, + "step": 23019 + }, + { + "epoch": 0.28, + "grad_norm": 15.371657238562657, + "learning_rate": 1.938984657192779e-05, + "loss": 1.3508, + "step": 23022 + }, + { + "epoch": 0.28, + "grad_norm": 74.87209248943678, + "learning_rate": 1.938964562259512e-05, + "loss": 1.3489, + "step": 23025 + }, + { + "epoch": 0.28, + "grad_norm": 73.47748023064676, + "learning_rate": 1.938944464121897e-05, + "loss": 1.6464, + "step": 23028 + }, + { + "epoch": 0.28, + "grad_norm": 13.02562515977887, + "learning_rate": 1.9389243627800026e-05, + "loss": 1.4305, + "step": 23031 + }, + { + "epoch": 0.28, + "grad_norm": 4.522317351355517, + "learning_rate": 1.9389042582338978e-05, + "loss": 1.5604, + "step": 23034 + }, + { + "epoch": 0.28, + "grad_norm": 35.176262458190656, + "learning_rate": 1.9388841504836507e-05, + "loss": 1.4861, + "step": 23037 + }, + { + "epoch": 0.28, + "grad_norm": 26.345229077181482, + "learning_rate": 1.9388640395293304e-05, + "loss": 1.7292, + "step": 23040 + }, + { + "epoch": 0.28, + "grad_norm": 7.950904490087017, + "learning_rate": 1.9388439253710053e-05, + "loss": 1.6279, + "step": 23043 + }, + { + "epoch": 0.28, + "grad_norm": 48.885597952252205, + "learning_rate": 1.9388238080087442e-05, + "loss": 1.7647, + "step": 23046 + }, + { + "epoch": 0.28, + "grad_norm": 14.902583609030092, + "learning_rate": 1.9388036874426152e-05, + "loss": 1.4475, + "step": 23049 + }, + { + "epoch": 0.28, + "grad_norm": 10.11637914465669, + "learning_rate": 1.9387835636726874e-05, + "loss": 1.5195, + "step": 23052 + }, + { + "epoch": 0.28, + "grad_norm": 14.710848078482961, + "learning_rate": 1.93876343669903e-05, + "loss": 1.5306, + "step": 23055 + }, + { + "epoch": 0.28, + "grad_norm": 9.564880517658585, + "learning_rate": 1.9387433065217106e-05, + "loss": 1.3794, + "step": 23058 + }, + { + "epoch": 0.28, + "grad_norm": 39.49516155513759, + "learning_rate": 1.9387231731407984e-05, + "loss": 1.5696, + "step": 23061 + }, + { + "epoch": 0.28, + "grad_norm": 24.362376426797375, + "learning_rate": 1.9387030365563622e-05, + "loss": 1.5925, + "step": 23064 + }, + { + "epoch": 0.28, + "grad_norm": 7.2207490807207115, + "learning_rate": 1.938682896768471e-05, + "loss": 1.7115, + "step": 23067 + }, + { + "epoch": 0.28, + "grad_norm": 8.802734067511103, + "learning_rate": 1.9386627537771927e-05, + "loss": 1.3383, + "step": 23070 + }, + { + "epoch": 0.28, + "grad_norm": 28.309673879333417, + "learning_rate": 1.9386426075825972e-05, + "loss": 1.2979, + "step": 23073 + }, + { + "epoch": 0.28, + "grad_norm": 7.905267097972065, + "learning_rate": 1.9386224581847518e-05, + "loss": 1.5955, + "step": 23076 + }, + { + "epoch": 0.28, + "grad_norm": 29.819216113131393, + "learning_rate": 1.9386023055837266e-05, + "loss": 1.5785, + "step": 23079 + }, + { + "epoch": 0.28, + "grad_norm": 21.039220382010953, + "learning_rate": 1.9385821497795894e-05, + "loss": 1.496, + "step": 23082 + }, + { + "epoch": 0.28, + "grad_norm": 10.920628513678961, + "learning_rate": 1.9385619907724096e-05, + "loss": 1.3254, + "step": 23085 + }, + { + "epoch": 0.28, + "grad_norm": 13.404305581631471, + "learning_rate": 1.9385418285622557e-05, + "loss": 1.365, + "step": 23088 + }, + { + "epoch": 0.28, + "grad_norm": 49.086917339846494, + "learning_rate": 1.938521663149197e-05, + "loss": 1.9329, + "step": 23091 + }, + { + "epoch": 0.28, + "grad_norm": 6.43265706456871, + "learning_rate": 1.9385014945333015e-05, + "loss": 1.687, + "step": 23094 + }, + { + "epoch": 0.28, + "grad_norm": 4.308867283736946, + "learning_rate": 1.9384813227146386e-05, + "loss": 1.6341, + "step": 23097 + }, + { + "epoch": 0.28, + "grad_norm": 2.5408642148347043, + "learning_rate": 1.9384611476932768e-05, + "loss": 1.7309, + "step": 23100 + }, + { + "epoch": 0.28, + "grad_norm": 8.750799993483687, + "learning_rate": 1.9384409694692854e-05, + "loss": 1.5724, + "step": 23103 + }, + { + "epoch": 0.28, + "grad_norm": 6.523888205034562, + "learning_rate": 1.938420788042733e-05, + "loss": 1.4283, + "step": 23106 + }, + { + "epoch": 0.28, + "grad_norm": 58.768921416627556, + "learning_rate": 1.938400603413688e-05, + "loss": 1.7052, + "step": 23109 + }, + { + "epoch": 0.28, + "grad_norm": 4.66367446201354, + "learning_rate": 1.9383804155822204e-05, + "loss": 1.3785, + "step": 23112 + }, + { + "epoch": 0.28, + "grad_norm": 10.52850181553222, + "learning_rate": 1.938360224548398e-05, + "loss": 1.7161, + "step": 23115 + }, + { + "epoch": 0.28, + "grad_norm": 9.208853411421568, + "learning_rate": 1.9383400303122905e-05, + "loss": 1.6871, + "step": 23118 + }, + { + "epoch": 0.28, + "grad_norm": 19.70578375906359, + "learning_rate": 1.938319832873966e-05, + "loss": 1.5857, + "step": 23121 + }, + { + "epoch": 0.28, + "grad_norm": 33.374436137790354, + "learning_rate": 1.9382996322334942e-05, + "loss": 1.2887, + "step": 23124 + }, + { + "epoch": 0.28, + "grad_norm": 5.5899023112776165, + "learning_rate": 1.9382794283909435e-05, + "loss": 1.62, + "step": 23127 + }, + { + "epoch": 0.28, + "grad_norm": 20.62157535938215, + "learning_rate": 1.9382592213463834e-05, + "loss": 1.5347, + "step": 23130 + }, + { + "epoch": 0.28, + "grad_norm": 35.51543762528473, + "learning_rate": 1.9382390110998825e-05, + "loss": 2.0654, + "step": 23133 + }, + { + "epoch": 0.28, + "grad_norm": 26.4144720943635, + "learning_rate": 1.9382187976515096e-05, + "loss": 1.2234, + "step": 23136 + }, + { + "epoch": 0.28, + "grad_norm": 26.405898061423162, + "learning_rate": 1.9381985810013336e-05, + "loss": 1.7037, + "step": 23139 + }, + { + "epoch": 0.28, + "grad_norm": 33.447934643423295, + "learning_rate": 1.9381783611494245e-05, + "loss": 1.7112, + "step": 23142 + }, + { + "epoch": 0.28, + "grad_norm": 13.14743353292192, + "learning_rate": 1.93815813809585e-05, + "loss": 1.8002, + "step": 23145 + }, + { + "epoch": 0.28, + "grad_norm": 34.71055130799314, + "learning_rate": 1.9381379118406803e-05, + "loss": 1.7504, + "step": 23148 + }, + { + "epoch": 0.28, + "grad_norm": 6.1318999196060515, + "learning_rate": 1.9381176823839834e-05, + "loss": 1.5321, + "step": 23151 + }, + { + "epoch": 0.28, + "grad_norm": 34.0209350369099, + "learning_rate": 1.9380974497258286e-05, + "loss": 1.41, + "step": 23154 + }, + { + "epoch": 0.28, + "grad_norm": 49.35709930298297, + "learning_rate": 1.9380772138662857e-05, + "loss": 1.5376, + "step": 23157 + }, + { + "epoch": 0.28, + "grad_norm": 10.151020263089785, + "learning_rate": 1.9380569748054224e-05, + "loss": 1.5414, + "step": 23160 + }, + { + "epoch": 0.28, + "grad_norm": 3.647414153504052, + "learning_rate": 1.9380367325433092e-05, + "loss": 1.2263, + "step": 23163 + }, + { + "epoch": 0.28, + "grad_norm": 5.536058609118446, + "learning_rate": 1.938016487080014e-05, + "loss": 1.4276, + "step": 23166 + }, + { + "epoch": 0.28, + "grad_norm": 15.213142379257697, + "learning_rate": 1.937996238415607e-05, + "loss": 1.7668, + "step": 23169 + }, + { + "epoch": 0.28, + "grad_norm": 12.992075059687773, + "learning_rate": 1.937975986550156e-05, + "loss": 1.3375, + "step": 23172 + }, + { + "epoch": 0.28, + "grad_norm": 9.022748517404194, + "learning_rate": 1.937955731483731e-05, + "loss": 1.5764, + "step": 23175 + }, + { + "epoch": 0.28, + "grad_norm": 9.801677199239887, + "learning_rate": 1.9379354732164014e-05, + "loss": 1.6804, + "step": 23178 + }, + { + "epoch": 0.28, + "grad_norm": 54.53333443011745, + "learning_rate": 1.9379152117482353e-05, + "loss": 1.3887, + "step": 23181 + }, + { + "epoch": 0.28, + "grad_norm": 3.860498027803476, + "learning_rate": 1.937894947079303e-05, + "loss": 1.3256, + "step": 23184 + }, + { + "epoch": 0.28, + "grad_norm": 6.531651345380272, + "learning_rate": 1.9378746792096725e-05, + "loss": 1.5844, + "step": 23187 + }, + { + "epoch": 0.28, + "grad_norm": 73.24561372494412, + "learning_rate": 1.9378544081394138e-05, + "loss": 1.6264, + "step": 23190 + }, + { + "epoch": 0.28, + "grad_norm": 3.082848004574346, + "learning_rate": 1.937834133868596e-05, + "loss": 1.5787, + "step": 23193 + }, + { + "epoch": 0.28, + "grad_norm": 21.74344559869236, + "learning_rate": 1.937813856397288e-05, + "loss": 1.5062, + "step": 23196 + }, + { + "epoch": 0.28, + "grad_norm": 15.205482829621467, + "learning_rate": 1.9377935757255592e-05, + "loss": 1.3639, + "step": 23199 + }, + { + "epoch": 0.28, + "grad_norm": 15.686326859987295, + "learning_rate": 1.9377732918534783e-05, + "loss": 1.3072, + "step": 23202 + }, + { + "epoch": 0.28, + "grad_norm": 23.80745480968027, + "learning_rate": 1.937753004781115e-05, + "loss": 1.5511, + "step": 23205 + }, + { + "epoch": 0.28, + "grad_norm": 20.300615638393214, + "learning_rate": 1.9377327145085386e-05, + "loss": 1.6619, + "step": 23208 + }, + { + "epoch": 0.28, + "grad_norm": 4.9980464883751585, + "learning_rate": 1.9377124210358183e-05, + "loss": 1.4619, + "step": 23211 + }, + { + "epoch": 0.28, + "grad_norm": 16.163151329048166, + "learning_rate": 1.9376921243630235e-05, + "loss": 1.9131, + "step": 23214 + }, + { + "epoch": 0.28, + "grad_norm": 21.60256953056664, + "learning_rate": 1.937671824490223e-05, + "loss": 1.6816, + "step": 23217 + }, + { + "epoch": 0.28, + "grad_norm": 5.92379276202121, + "learning_rate": 1.9376515214174863e-05, + "loss": 1.4784, + "step": 23220 + }, + { + "epoch": 0.28, + "grad_norm": 8.314979335084937, + "learning_rate": 1.9376312151448826e-05, + "loss": 1.8919, + "step": 23223 + }, + { + "epoch": 0.28, + "grad_norm": 11.791156718321988, + "learning_rate": 1.9376109056724814e-05, + "loss": 1.5055, + "step": 23226 + }, + { + "epoch": 0.28, + "grad_norm": 12.92508088854308, + "learning_rate": 1.9375905930003516e-05, + "loss": 1.45, + "step": 23229 + }, + { + "epoch": 0.28, + "grad_norm": 16.25723793475384, + "learning_rate": 1.9375702771285634e-05, + "loss": 1.652, + "step": 23232 + }, + { + "epoch": 0.28, + "grad_norm": 3.361280044354678, + "learning_rate": 1.9375499580571854e-05, + "loss": 1.7155, + "step": 23235 + }, + { + "epoch": 0.28, + "grad_norm": 33.88944585663124, + "learning_rate": 1.9375296357862868e-05, + "loss": 2.1737, + "step": 23238 + }, + { + "epoch": 0.28, + "grad_norm": 24.826090614229933, + "learning_rate": 1.9375093103159375e-05, + "loss": 1.5175, + "step": 23241 + }, + { + "epoch": 0.28, + "grad_norm": 25.66285377404252, + "learning_rate": 1.9374889816462064e-05, + "loss": 1.4662, + "step": 23244 + }, + { + "epoch": 0.28, + "grad_norm": 4.863360925489377, + "learning_rate": 1.937468649777163e-05, + "loss": 1.2905, + "step": 23247 + }, + { + "epoch": 0.28, + "grad_norm": 3.7202087147921965, + "learning_rate": 1.937448314708877e-05, + "loss": 1.5118, + "step": 23250 + }, + { + "epoch": 0.28, + "grad_norm": 6.2166973627115905, + "learning_rate": 1.9374279764414178e-05, + "loss": 1.4172, + "step": 23253 + }, + { + "epoch": 0.28, + "grad_norm": 11.302578329525206, + "learning_rate": 1.9374076349748546e-05, + "loss": 1.361, + "step": 23256 + }, + { + "epoch": 0.28, + "grad_norm": 12.73090054470589, + "learning_rate": 1.9373872903092564e-05, + "loss": 1.36, + "step": 23259 + }, + { + "epoch": 0.28, + "grad_norm": 33.57750439965136, + "learning_rate": 1.937366942444693e-05, + "loss": 1.2046, + "step": 23262 + }, + { + "epoch": 0.28, + "grad_norm": 8.539154066562611, + "learning_rate": 1.937346591381234e-05, + "loss": 1.7632, + "step": 23265 + }, + { + "epoch": 0.28, + "grad_norm": 10.859210924175954, + "learning_rate": 1.9373262371189488e-05, + "loss": 1.2485, + "step": 23268 + }, + { + "epoch": 0.28, + "grad_norm": 40.43930672317948, + "learning_rate": 1.9373058796579068e-05, + "loss": 2.0739, + "step": 23271 + }, + { + "epoch": 0.28, + "grad_norm": 23.328743788758512, + "learning_rate": 1.937285518998177e-05, + "loss": 1.623, + "step": 23274 + }, + { + "epoch": 0.28, + "grad_norm": 7.644654398447201, + "learning_rate": 1.93726515513983e-05, + "loss": 1.6191, + "step": 23277 + }, + { + "epoch": 0.28, + "grad_norm": 49.06626568714199, + "learning_rate": 1.9372447880829345e-05, + "loss": 1.411, + "step": 23280 + }, + { + "epoch": 0.28, + "grad_norm": 20.680296633423065, + "learning_rate": 1.9372244178275598e-05, + "loss": 1.7355, + "step": 23283 + }, + { + "epoch": 0.28, + "grad_norm": 3.375639745651688, + "learning_rate": 1.937204044373776e-05, + "loss": 1.9329, + "step": 23286 + }, + { + "epoch": 0.28, + "grad_norm": 17.15823534319957, + "learning_rate": 1.937183667721652e-05, + "loss": 1.6627, + "step": 23289 + }, + { + "epoch": 0.28, + "grad_norm": 13.459700812136148, + "learning_rate": 1.937163287871258e-05, + "loss": 1.2921, + "step": 23292 + }, + { + "epoch": 0.28, + "grad_norm": 14.059912106096913, + "learning_rate": 1.9371429048226632e-05, + "loss": 1.3047, + "step": 23295 + }, + { + "epoch": 0.28, + "grad_norm": 11.818417656365177, + "learning_rate": 1.9371225185759374e-05, + "loss": 1.8175, + "step": 23298 + }, + { + "epoch": 0.28, + "grad_norm": 15.03328904864635, + "learning_rate": 1.93710212913115e-05, + "loss": 1.8755, + "step": 23301 + }, + { + "epoch": 0.28, + "grad_norm": 878.3789449020888, + "learning_rate": 1.9370817364883702e-05, + "loss": 1.5689, + "step": 23304 + }, + { + "epoch": 0.28, + "grad_norm": 80.65320866902344, + "learning_rate": 1.9370613406476684e-05, + "loss": 1.2885, + "step": 23307 + }, + { + "epoch": 0.28, + "grad_norm": 14.06261519574156, + "learning_rate": 1.9370409416091137e-05, + "loss": 1.4525, + "step": 23310 + }, + { + "epoch": 0.28, + "grad_norm": 8.005784017245059, + "learning_rate": 1.9370205393727755e-05, + "loss": 1.567, + "step": 23313 + }, + { + "epoch": 0.28, + "grad_norm": 18.61899895832696, + "learning_rate": 1.937000133938724e-05, + "loss": 1.5163, + "step": 23316 + }, + { + "epoch": 0.28, + "grad_norm": 7.361517605219407, + "learning_rate": 1.9369797253070284e-05, + "loss": 1.5907, + "step": 23319 + }, + { + "epoch": 0.28, + "grad_norm": 43.36178275497162, + "learning_rate": 1.9369593134777585e-05, + "loss": 1.871, + "step": 23322 + }, + { + "epoch": 0.28, + "grad_norm": 18.590561609698177, + "learning_rate": 1.936938898450984e-05, + "loss": 1.5081, + "step": 23325 + }, + { + "epoch": 0.28, + "grad_norm": 7.1957755232082015, + "learning_rate": 1.9369184802267744e-05, + "loss": 1.4324, + "step": 23328 + }, + { + "epoch": 0.28, + "grad_norm": 44.63516874522608, + "learning_rate": 1.9368980588052e-05, + "loss": 1.3718, + "step": 23331 + }, + { + "epoch": 0.28, + "grad_norm": 8.647150282705843, + "learning_rate": 1.9368776341863295e-05, + "loss": 1.7344, + "step": 23334 + }, + { + "epoch": 0.28, + "grad_norm": 15.909424505878839, + "learning_rate": 1.936857206370233e-05, + "loss": 1.6606, + "step": 23337 + }, + { + "epoch": 0.28, + "grad_norm": 39.78515092594927, + "learning_rate": 1.9368367753569806e-05, + "loss": 1.3208, + "step": 23340 + }, + { + "epoch": 0.28, + "grad_norm": 10.72182945762871, + "learning_rate": 1.9368163411466418e-05, + "loss": 1.6077, + "step": 23343 + }, + { + "epoch": 0.28, + "grad_norm": 38.23966937703769, + "learning_rate": 1.936795903739286e-05, + "loss": 1.4698, + "step": 23346 + }, + { + "epoch": 0.28, + "grad_norm": 10.162223183178803, + "learning_rate": 1.936775463134983e-05, + "loss": 1.6724, + "step": 23349 + }, + { + "epoch": 0.28, + "grad_norm": 20.762213911967056, + "learning_rate": 1.9367550193338032e-05, + "loss": 1.8047, + "step": 23352 + }, + { + "epoch": 0.28, + "grad_norm": 8.550221845027945, + "learning_rate": 1.9367345723358158e-05, + "loss": 1.4771, + "step": 23355 + }, + { + "epoch": 0.28, + "grad_norm": 5.34635055713067, + "learning_rate": 1.9367141221410908e-05, + "loss": 1.5548, + "step": 23358 + }, + { + "epoch": 0.28, + "grad_norm": 19.645227245323245, + "learning_rate": 1.936693668749698e-05, + "loss": 1.5496, + "step": 23361 + }, + { + "epoch": 0.28, + "grad_norm": 7.1566567731461, + "learning_rate": 1.9366732121617068e-05, + "loss": 1.901, + "step": 23364 + }, + { + "epoch": 0.28, + "grad_norm": 35.13393804459471, + "learning_rate": 1.9366527523771875e-05, + "loss": 1.5347, + "step": 23367 + }, + { + "epoch": 0.28, + "grad_norm": 9.923556564992468, + "learning_rate": 1.9366322893962098e-05, + "loss": 1.7092, + "step": 23370 + }, + { + "epoch": 0.28, + "grad_norm": 10.16635605397173, + "learning_rate": 1.936611823218843e-05, + "loss": 1.5975, + "step": 23373 + }, + { + "epoch": 0.28, + "grad_norm": 5.536527754289707, + "learning_rate": 1.936591353845158e-05, + "loss": 0.9285, + "step": 23376 + }, + { + "epoch": 0.28, + "grad_norm": 4.020732109816837, + "learning_rate": 1.9365708812752238e-05, + "loss": 1.5308, + "step": 23379 + }, + { + "epoch": 0.28, + "grad_norm": 29.527873377493037, + "learning_rate": 1.9365504055091108e-05, + "loss": 1.5297, + "step": 23382 + }, + { + "epoch": 0.28, + "grad_norm": 12.233087780070532, + "learning_rate": 1.9365299265468884e-05, + "loss": 1.4102, + "step": 23385 + }, + { + "epoch": 0.28, + "grad_norm": 5.2033603108478355, + "learning_rate": 1.9365094443886266e-05, + "loss": 1.492, + "step": 23388 + }, + { + "epoch": 0.28, + "grad_norm": 13.659506722842641, + "learning_rate": 1.9364889590343954e-05, + "loss": 1.3271, + "step": 23391 + }, + { + "epoch": 0.28, + "grad_norm": 11.755044048692506, + "learning_rate": 1.9364684704842645e-05, + "loss": 1.7774, + "step": 23394 + }, + { + "epoch": 0.28, + "grad_norm": 8.414012519712024, + "learning_rate": 1.9364479787383047e-05, + "loss": 1.9175, + "step": 23397 + }, + { + "epoch": 0.28, + "grad_norm": 5.287709328631245, + "learning_rate": 1.9364274837965848e-05, + "loss": 1.7109, + "step": 23400 + }, + { + "epoch": 0.28, + "grad_norm": 18.787398800160943, + "learning_rate": 1.9364069856591753e-05, + "loss": 1.5238, + "step": 23403 + }, + { + "epoch": 0.28, + "grad_norm": 77.23523542320396, + "learning_rate": 1.9363864843261463e-05, + "loss": 1.3536, + "step": 23406 + }, + { + "epoch": 0.28, + "grad_norm": 17.534781756641003, + "learning_rate": 1.936365979797567e-05, + "loss": 1.1606, + "step": 23409 + }, + { + "epoch": 0.28, + "grad_norm": 29.592346997757772, + "learning_rate": 1.9363454720735083e-05, + "loss": 1.3335, + "step": 23412 + }, + { + "epoch": 0.28, + "grad_norm": 6.652781904445901, + "learning_rate": 1.9363249611540392e-05, + "loss": 1.6357, + "step": 23415 + }, + { + "epoch": 0.28, + "grad_norm": 15.631202259910031, + "learning_rate": 1.9363044470392308e-05, + "loss": 1.5619, + "step": 23418 + }, + { + "epoch": 0.28, + "grad_norm": 24.222691481749532, + "learning_rate": 1.9362839297291526e-05, + "loss": 1.4877, + "step": 23421 + }, + { + "epoch": 0.28, + "grad_norm": 7.86398561416794, + "learning_rate": 1.9362634092238743e-05, + "loss": 1.9556, + "step": 23424 + }, + { + "epoch": 0.28, + "grad_norm": 8.64808785047322, + "learning_rate": 1.936242885523466e-05, + "loss": 1.6335, + "step": 23427 + }, + { + "epoch": 0.28, + "grad_norm": 91.23707504654278, + "learning_rate": 1.936222358627999e-05, + "loss": 1.545, + "step": 23430 + }, + { + "epoch": 0.28, + "grad_norm": 25.713348879192207, + "learning_rate": 1.9362018285375413e-05, + "loss": 1.8756, + "step": 23433 + }, + { + "epoch": 0.28, + "grad_norm": 6.632964422086815, + "learning_rate": 1.9361812952521642e-05, + "loss": 1.5906, + "step": 23436 + }, + { + "epoch": 0.28, + "grad_norm": 12.217700474615864, + "learning_rate": 1.936160758771938e-05, + "loss": 1.829, + "step": 23439 + }, + { + "epoch": 0.28, + "grad_norm": 4.17935380143707, + "learning_rate": 1.9361402190969317e-05, + "loss": 1.6196, + "step": 23442 + }, + { + "epoch": 0.28, + "grad_norm": 21.120627858248398, + "learning_rate": 1.9361196762272165e-05, + "loss": 1.7234, + "step": 23445 + }, + { + "epoch": 0.28, + "grad_norm": 17.198960898575308, + "learning_rate": 1.936099130162862e-05, + "loss": 1.567, + "step": 23448 + }, + { + "epoch": 0.28, + "grad_norm": 20.631130714749997, + "learning_rate": 1.936078580903938e-05, + "loss": 1.8875, + "step": 23451 + }, + { + "epoch": 0.28, + "grad_norm": 118.32017921790288, + "learning_rate": 1.9360580284505154e-05, + "loss": 1.5991, + "step": 23454 + }, + { + "epoch": 0.28, + "grad_norm": 10.569624100243738, + "learning_rate": 1.9360374728026635e-05, + "loss": 1.4303, + "step": 23457 + }, + { + "epoch": 0.28, + "grad_norm": 22.23482109653954, + "learning_rate": 1.936016913960453e-05, + "loss": 1.6883, + "step": 23460 + }, + { + "epoch": 0.28, + "grad_norm": 23.009584655961127, + "learning_rate": 1.935996351923954e-05, + "loss": 1.4683, + "step": 23463 + }, + { + "epoch": 0.28, + "grad_norm": 7.834218444967083, + "learning_rate": 1.935975786693237e-05, + "loss": 1.8761, + "step": 23466 + }, + { + "epoch": 0.28, + "grad_norm": 5.729141627404479, + "learning_rate": 1.935955218268371e-05, + "loss": 1.1077, + "step": 23469 + }, + { + "epoch": 0.28, + "grad_norm": 18.53702290620568, + "learning_rate": 1.9359346466494277e-05, + "loss": 1.4349, + "step": 23472 + }, + { + "epoch": 0.28, + "grad_norm": 22.537172903118993, + "learning_rate": 1.9359140718364758e-05, + "loss": 1.6614, + "step": 23475 + }, + { + "epoch": 0.28, + "grad_norm": 8.344253055764957, + "learning_rate": 1.9358934938295867e-05, + "loss": 1.4701, + "step": 23478 + }, + { + "epoch": 0.28, + "grad_norm": 4.707761814418855, + "learning_rate": 1.9358729126288304e-05, + "loss": 1.599, + "step": 23481 + }, + { + "epoch": 0.28, + "grad_norm": 6.6964538032851975, + "learning_rate": 1.935852328234277e-05, + "loss": 1.2806, + "step": 23484 + }, + { + "epoch": 0.28, + "grad_norm": 9.165639902499011, + "learning_rate": 1.9358317406459965e-05, + "loss": 1.2084, + "step": 23487 + }, + { + "epoch": 0.28, + "grad_norm": 19.589678443571653, + "learning_rate": 1.9358111498640592e-05, + "loss": 1.4194, + "step": 23490 + }, + { + "epoch": 0.28, + "grad_norm": 10.980790356039979, + "learning_rate": 1.9357905558885356e-05, + "loss": 1.4471, + "step": 23493 + }, + { + "epoch": 0.28, + "grad_norm": 18.940073538554937, + "learning_rate": 1.935769958719496e-05, + "loss": 1.2142, + "step": 23496 + }, + { + "epoch": 0.28, + "grad_norm": 16.994612721523957, + "learning_rate": 1.9357493583570105e-05, + "loss": 1.3438, + "step": 23499 + }, + { + "epoch": 0.28, + "grad_norm": 60.92242984639366, + "learning_rate": 1.9357287548011495e-05, + "loss": 1.4483, + "step": 23502 + }, + { + "epoch": 0.28, + "grad_norm": 3.3739676683286564, + "learning_rate": 1.9357081480519832e-05, + "loss": 1.5982, + "step": 23505 + }, + { + "epoch": 0.28, + "grad_norm": 41.65605084716062, + "learning_rate": 1.9356875381095823e-05, + "loss": 1.5983, + "step": 23508 + }, + { + "epoch": 0.28, + "grad_norm": 38.316340348213785, + "learning_rate": 1.935666924974017e-05, + "loss": 1.5794, + "step": 23511 + }, + { + "epoch": 0.28, + "grad_norm": 12.721798273208355, + "learning_rate": 1.935646308645357e-05, + "loss": 1.3851, + "step": 23514 + }, + { + "epoch": 0.28, + "grad_norm": 14.669250277395522, + "learning_rate": 1.9356256891236733e-05, + "loss": 1.5245, + "step": 23517 + }, + { + "epoch": 0.28, + "grad_norm": 31.075067729684726, + "learning_rate": 1.9356050664090364e-05, + "loss": 1.2264, + "step": 23520 + }, + { + "epoch": 0.28, + "grad_norm": 23.17709547531937, + "learning_rate": 1.9355844405015162e-05, + "loss": 1.43, + "step": 23523 + }, + { + "epoch": 0.28, + "grad_norm": 9.802729065533338, + "learning_rate": 1.9355638114011833e-05, + "loss": 1.9301, + "step": 23526 + }, + { + "epoch": 0.28, + "grad_norm": 26.252172422965135, + "learning_rate": 1.9355431791081084e-05, + "loss": 1.4456, + "step": 23529 + }, + { + "epoch": 0.28, + "grad_norm": 18.134839137925866, + "learning_rate": 1.9355225436223613e-05, + "loss": 1.0531, + "step": 23532 + }, + { + "epoch": 0.28, + "grad_norm": 6.413846238223652, + "learning_rate": 1.9355019049440126e-05, + "loss": 1.8286, + "step": 23535 + }, + { + "epoch": 0.28, + "grad_norm": 61.204068319841134, + "learning_rate": 1.9354812630731333e-05, + "loss": 1.5952, + "step": 23538 + }, + { + "epoch": 0.28, + "grad_norm": 8.565744155939464, + "learning_rate": 1.935460618009793e-05, + "loss": 1.4688, + "step": 23541 + }, + { + "epoch": 0.28, + "grad_norm": 14.881477271859106, + "learning_rate": 1.9354399697540627e-05, + "loss": 1.6689, + "step": 23544 + }, + { + "epoch": 0.28, + "grad_norm": 9.4355844161014, + "learning_rate": 1.935419318306013e-05, + "loss": 1.6559, + "step": 23547 + }, + { + "epoch": 0.28, + "grad_norm": 9.789496404663245, + "learning_rate": 1.935398663665714e-05, + "loss": 1.165, + "step": 23550 + }, + { + "epoch": 0.28, + "grad_norm": 12.791327196695551, + "learning_rate": 1.935378005833236e-05, + "loss": 2.0105, + "step": 23553 + }, + { + "epoch": 0.28, + "grad_norm": 4.375879563526616, + "learning_rate": 1.93535734480865e-05, + "loss": 1.4784, + "step": 23556 + }, + { + "epoch": 0.28, + "grad_norm": 19.449587245962594, + "learning_rate": 1.935336680592026e-05, + "loss": 1.8212, + "step": 23559 + }, + { + "epoch": 0.28, + "grad_norm": 55.154540102248035, + "learning_rate": 1.9353160131834353e-05, + "loss": 1.6394, + "step": 23562 + }, + { + "epoch": 0.28, + "grad_norm": 11.332734597233495, + "learning_rate": 1.9352953425829474e-05, + "loss": 1.0692, + "step": 23565 + }, + { + "epoch": 0.28, + "grad_norm": 15.815473352606624, + "learning_rate": 1.9352746687906337e-05, + "loss": 1.7493, + "step": 23568 + }, + { + "epoch": 0.28, + "grad_norm": 11.552282999245456, + "learning_rate": 1.9352539918065643e-05, + "loss": 1.4102, + "step": 23571 + }, + { + "epoch": 0.28, + "grad_norm": 9.95031782479813, + "learning_rate": 1.9352333116308104e-05, + "loss": 1.5633, + "step": 23574 + }, + { + "epoch": 0.28, + "grad_norm": 12.827772324805583, + "learning_rate": 1.935212628263442e-05, + "loss": 1.7956, + "step": 23577 + }, + { + "epoch": 0.28, + "grad_norm": 36.30308695961057, + "learning_rate": 1.935191941704529e-05, + "loss": 1.7091, + "step": 23580 + }, + { + "epoch": 0.28, + "grad_norm": 12.701025299786235, + "learning_rate": 1.9351712519541434e-05, + "loss": 1.3349, + "step": 23583 + }, + { + "epoch": 0.28, + "grad_norm": 21.880901789891496, + "learning_rate": 1.9351505590123548e-05, + "loss": 1.6571, + "step": 23586 + }, + { + "epoch": 0.28, + "grad_norm": 12.3837293719855, + "learning_rate": 1.9351298628792347e-05, + "loss": 1.7625, + "step": 23589 + }, + { + "epoch": 0.28, + "grad_norm": 65.47527156037874, + "learning_rate": 1.9351091635548526e-05, + "loss": 1.6695, + "step": 23592 + }, + { + "epoch": 0.28, + "grad_norm": 27.997777203556602, + "learning_rate": 1.93508846103928e-05, + "loss": 1.8391, + "step": 23595 + }, + { + "epoch": 0.28, + "grad_norm": 10.858685037093279, + "learning_rate": 1.9350677553325873e-05, + "loss": 1.234, + "step": 23598 + }, + { + "epoch": 0.28, + "grad_norm": 5.20665435083198, + "learning_rate": 1.9350470464348454e-05, + "loss": 1.3223, + "step": 23601 + }, + { + "epoch": 0.28, + "grad_norm": 12.76921577213229, + "learning_rate": 1.9350263343461244e-05, + "loss": 1.6589, + "step": 23604 + }, + { + "epoch": 0.28, + "grad_norm": 19.722120961676186, + "learning_rate": 1.9350056190664954e-05, + "loss": 1.7745, + "step": 23607 + }, + { + "epoch": 0.28, + "grad_norm": 25.606488851094856, + "learning_rate": 1.934984900596029e-05, + "loss": 2.0745, + "step": 23610 + }, + { + "epoch": 0.28, + "grad_norm": 7.597806791062559, + "learning_rate": 1.934964178934796e-05, + "loss": 1.5991, + "step": 23613 + }, + { + "epoch": 0.28, + "grad_norm": 14.929546537102077, + "learning_rate": 1.934943454082867e-05, + "loss": 1.5701, + "step": 23616 + }, + { + "epoch": 0.28, + "grad_norm": 28.089494587089426, + "learning_rate": 1.934922726040313e-05, + "loss": 1.3411, + "step": 23619 + }, + { + "epoch": 0.28, + "grad_norm": 5.451415827497292, + "learning_rate": 1.9349019948072044e-05, + "loss": 1.4262, + "step": 23622 + }, + { + "epoch": 0.28, + "grad_norm": 10.963886632097239, + "learning_rate": 1.9348812603836117e-05, + "loss": 1.545, + "step": 23625 + }, + { + "epoch": 0.28, + "grad_norm": 23.44093762595368, + "learning_rate": 1.9348605227696063e-05, + "loss": 1.3079, + "step": 23628 + }, + { + "epoch": 0.28, + "grad_norm": 17.111929948809152, + "learning_rate": 1.9348397819652585e-05, + "loss": 1.9539, + "step": 23631 + }, + { + "epoch": 0.28, + "grad_norm": 4.852456125959022, + "learning_rate": 1.9348190379706393e-05, + "loss": 1.6768, + "step": 23634 + }, + { + "epoch": 0.28, + "grad_norm": 20.139542315674714, + "learning_rate": 1.9347982907858193e-05, + "loss": 1.5413, + "step": 23637 + }, + { + "epoch": 0.28, + "grad_norm": 38.08126191753139, + "learning_rate": 1.93477754041087e-05, + "loss": 1.6408, + "step": 23640 + }, + { + "epoch": 0.28, + "grad_norm": 10.139919606461078, + "learning_rate": 1.934756786845861e-05, + "loss": 1.9258, + "step": 23643 + }, + { + "epoch": 0.28, + "grad_norm": 17.22935939206261, + "learning_rate": 1.9347360300908642e-05, + "loss": 1.3766, + "step": 23646 + }, + { + "epoch": 0.28, + "grad_norm": 17.69689627819282, + "learning_rate": 1.93471527014595e-05, + "loss": 1.7732, + "step": 23649 + }, + { + "epoch": 0.28, + "grad_norm": 6.607745915800249, + "learning_rate": 1.9346945070111888e-05, + "loss": 1.6452, + "step": 23652 + }, + { + "epoch": 0.28, + "grad_norm": 6.228648589814964, + "learning_rate": 1.9346737406866523e-05, + "loss": 1.3338, + "step": 23655 + }, + { + "epoch": 0.28, + "grad_norm": 7.3952179662043305, + "learning_rate": 1.934652971172411e-05, + "loss": 1.5245, + "step": 23658 + }, + { + "epoch": 0.28, + "grad_norm": 8.430207833097647, + "learning_rate": 1.9346321984685358e-05, + "loss": 1.5415, + "step": 23661 + }, + { + "epoch": 0.28, + "grad_norm": 18.476833610419337, + "learning_rate": 1.9346114225750973e-05, + "loss": 1.6049, + "step": 23664 + }, + { + "epoch": 0.28, + "grad_norm": 6.422674316092304, + "learning_rate": 1.9345906434921668e-05, + "loss": 1.4489, + "step": 23667 + }, + { + "epoch": 0.28, + "grad_norm": 23.758111674099872, + "learning_rate": 1.9345698612198153e-05, + "loss": 1.408, + "step": 23670 + }, + { + "epoch": 0.28, + "grad_norm": 13.016438178054676, + "learning_rate": 1.9345490757581134e-05, + "loss": 1.5689, + "step": 23673 + }, + { + "epoch": 0.28, + "grad_norm": 8.322092938888732, + "learning_rate": 1.934528287107132e-05, + "loss": 1.6231, + "step": 23676 + }, + { + "epoch": 0.28, + "grad_norm": 13.791563776915481, + "learning_rate": 1.934507495266942e-05, + "loss": 1.4928, + "step": 23679 + }, + { + "epoch": 0.28, + "grad_norm": 59.3891093647156, + "learning_rate": 1.934486700237615e-05, + "loss": 1.1927, + "step": 23682 + }, + { + "epoch": 0.28, + "grad_norm": 19.969904148602613, + "learning_rate": 1.934465902019221e-05, + "loss": 1.6948, + "step": 23685 + }, + { + "epoch": 0.28, + "grad_norm": 23.12026922128309, + "learning_rate": 1.9344451006118316e-05, + "loss": 1.6367, + "step": 23688 + }, + { + "epoch": 0.28, + "grad_norm": 8.397171803443397, + "learning_rate": 1.934424296015518e-05, + "loss": 1.7336, + "step": 23691 + }, + { + "epoch": 0.28, + "grad_norm": 17.049914800865377, + "learning_rate": 1.9344034882303507e-05, + "loss": 1.5802, + "step": 23694 + }, + { + "epoch": 0.28, + "grad_norm": 9.070638489674378, + "learning_rate": 1.9343826772564007e-05, + "loss": 1.6787, + "step": 23697 + }, + { + "epoch": 0.28, + "grad_norm": 8.398030723362576, + "learning_rate": 1.934361863093739e-05, + "loss": 1.4922, + "step": 23700 + }, + { + "epoch": 0.29, + "grad_norm": 5.290738084862343, + "learning_rate": 1.934341045742437e-05, + "loss": 1.4728, + "step": 23703 + }, + { + "epoch": 0.29, + "grad_norm": 14.103895701041065, + "learning_rate": 1.9343202252025658e-05, + "loss": 1.8433, + "step": 23706 + }, + { + "epoch": 0.29, + "grad_norm": 8.54457113180066, + "learning_rate": 1.934299401474196e-05, + "loss": 1.6574, + "step": 23709 + }, + { + "epoch": 0.29, + "grad_norm": 17.410756552565513, + "learning_rate": 1.9342785745573992e-05, + "loss": 1.8244, + "step": 23712 + }, + { + "epoch": 0.29, + "grad_norm": 10.767661981981867, + "learning_rate": 1.9342577444522458e-05, + "loss": 1.7699, + "step": 23715 + }, + { + "epoch": 0.29, + "grad_norm": 17.0703260075268, + "learning_rate": 1.934236911158807e-05, + "loss": 1.5391, + "step": 23718 + }, + { + "epoch": 0.29, + "grad_norm": 28.80039953453573, + "learning_rate": 1.9342160746771544e-05, + "loss": 1.5531, + "step": 23721 + }, + { + "epoch": 0.29, + "grad_norm": 18.44069346517536, + "learning_rate": 1.934195235007359e-05, + "loss": 1.4835, + "step": 23724 + }, + { + "epoch": 0.29, + "grad_norm": 12.874252039139076, + "learning_rate": 1.9341743921494918e-05, + "loss": 1.5583, + "step": 23727 + }, + { + "epoch": 0.29, + "grad_norm": 17.5312902878742, + "learning_rate": 1.9341535461036235e-05, + "loss": 1.3719, + "step": 23730 + }, + { + "epoch": 0.29, + "grad_norm": 38.67426120570937, + "learning_rate": 1.934132696869826e-05, + "loss": 1.2056, + "step": 23733 + }, + { + "epoch": 0.29, + "grad_norm": 25.5616198179649, + "learning_rate": 1.93411184444817e-05, + "loss": 1.472, + "step": 23736 + }, + { + "epoch": 0.29, + "grad_norm": 7.7838236809257, + "learning_rate": 1.9340909888387265e-05, + "loss": 1.6505, + "step": 23739 + }, + { + "epoch": 0.29, + "grad_norm": 47.50008205677545, + "learning_rate": 1.9340701300415673e-05, + "loss": 1.4951, + "step": 23742 + }, + { + "epoch": 0.29, + "grad_norm": 32.34163774912121, + "learning_rate": 1.934049268056763e-05, + "loss": 1.7317, + "step": 23745 + }, + { + "epoch": 0.29, + "grad_norm": 5.497269503246393, + "learning_rate": 1.934028402884385e-05, + "loss": 1.8407, + "step": 23748 + }, + { + "epoch": 0.29, + "grad_norm": 3.704064560503776, + "learning_rate": 1.9340075345245045e-05, + "loss": 1.57, + "step": 23751 + }, + { + "epoch": 0.29, + "grad_norm": 12.603244098590137, + "learning_rate": 1.9339866629771925e-05, + "loss": 1.4041, + "step": 23754 + }, + { + "epoch": 0.29, + "grad_norm": 5.5234702582831625, + "learning_rate": 1.9339657882425205e-05, + "loss": 1.477, + "step": 23757 + }, + { + "epoch": 0.29, + "grad_norm": 45.01547247541756, + "learning_rate": 1.93394491032056e-05, + "loss": 1.5613, + "step": 23760 + }, + { + "epoch": 0.29, + "grad_norm": 16.446329269324877, + "learning_rate": 1.9339240292113815e-05, + "loss": 1.6226, + "step": 23763 + }, + { + "epoch": 0.29, + "grad_norm": 13.148180546451092, + "learning_rate": 1.933903144915057e-05, + "loss": 1.4354, + "step": 23766 + }, + { + "epoch": 0.29, + "grad_norm": 6.508278372535347, + "learning_rate": 1.9338822574316577e-05, + "loss": 1.5178, + "step": 23769 + }, + { + "epoch": 0.29, + "grad_norm": 7.152297257526174, + "learning_rate": 1.933861366761254e-05, + "loss": 1.5397, + "step": 23772 + }, + { + "epoch": 0.29, + "grad_norm": 5.008499631871866, + "learning_rate": 1.9338404729039183e-05, + "loss": 1.5717, + "step": 23775 + }, + { + "epoch": 0.29, + "grad_norm": 31.60011470520518, + "learning_rate": 1.9338195758597212e-05, + "loss": 1.4905, + "step": 23778 + }, + { + "epoch": 0.29, + "grad_norm": 23.919478279229967, + "learning_rate": 1.9337986756287345e-05, + "loss": 1.422, + "step": 23781 + }, + { + "epoch": 0.29, + "grad_norm": 4.093018782548198, + "learning_rate": 1.933777772211029e-05, + "loss": 1.4927, + "step": 23784 + }, + { + "epoch": 0.29, + "grad_norm": 25.74251775936289, + "learning_rate": 1.9337568656066762e-05, + "loss": 1.484, + "step": 23787 + }, + { + "epoch": 0.29, + "grad_norm": 6.391431457391638, + "learning_rate": 1.933735955815748e-05, + "loss": 1.7721, + "step": 23790 + }, + { + "epoch": 0.29, + "grad_norm": 35.06344868186924, + "learning_rate": 1.933715042838315e-05, + "loss": 1.5222, + "step": 23793 + }, + { + "epoch": 0.29, + "grad_norm": 9.519299595567766, + "learning_rate": 1.933694126674449e-05, + "loss": 1.3317, + "step": 23796 + }, + { + "epoch": 0.29, + "grad_norm": 19.879334846298455, + "learning_rate": 1.9336732073242212e-05, + "loss": 1.4158, + "step": 23799 + }, + { + "epoch": 0.29, + "grad_norm": 12.302432371485102, + "learning_rate": 1.933652284787703e-05, + "loss": 1.5448, + "step": 23802 + }, + { + "epoch": 0.29, + "grad_norm": 2.8897179754515085, + "learning_rate": 1.933631359064966e-05, + "loss": 1.658, + "step": 23805 + }, + { + "epoch": 0.29, + "grad_norm": 14.308260138182842, + "learning_rate": 1.9336104301560813e-05, + "loss": 1.522, + "step": 23808 + }, + { + "epoch": 0.29, + "grad_norm": 14.720764035603215, + "learning_rate": 1.9335894980611204e-05, + "loss": 1.6423, + "step": 23811 + }, + { + "epoch": 0.29, + "grad_norm": 26.737497002801582, + "learning_rate": 1.9335685627801553e-05, + "loss": 1.3222, + "step": 23814 + }, + { + "epoch": 0.29, + "grad_norm": 19.102182667730542, + "learning_rate": 1.9335476243132563e-05, + "loss": 1.309, + "step": 23817 + }, + { + "epoch": 0.29, + "grad_norm": 18.08099967124432, + "learning_rate": 1.933526682660496e-05, + "loss": 1.7852, + "step": 23820 + }, + { + "epoch": 0.29, + "grad_norm": 4.159384540084893, + "learning_rate": 1.9335057378219452e-05, + "loss": 1.5554, + "step": 23823 + }, + { + "epoch": 0.29, + "grad_norm": 8.215636947851937, + "learning_rate": 1.933484789797676e-05, + "loss": 1.5527, + "step": 23826 + }, + { + "epoch": 0.29, + "grad_norm": 22.017437345438296, + "learning_rate": 1.9334638385877586e-05, + "loss": 1.3503, + "step": 23829 + }, + { + "epoch": 0.29, + "grad_norm": 7.161917168388298, + "learning_rate": 1.933442884192266e-05, + "loss": 1.6915, + "step": 23832 + }, + { + "epoch": 0.29, + "grad_norm": 29.0759945390781, + "learning_rate": 1.9334219266112687e-05, + "loss": 1.5937, + "step": 23835 + }, + { + "epoch": 0.29, + "grad_norm": 10.841399988928815, + "learning_rate": 1.933400965844839e-05, + "loss": 1.5151, + "step": 23838 + }, + { + "epoch": 0.29, + "grad_norm": 13.651090677756395, + "learning_rate": 1.9333800018930476e-05, + "loss": 1.6453, + "step": 23841 + }, + { + "epoch": 0.29, + "grad_norm": 15.422012324858567, + "learning_rate": 1.9333590347559668e-05, + "loss": 1.2083, + "step": 23844 + }, + { + "epoch": 0.29, + "grad_norm": 145.10958247795185, + "learning_rate": 1.9333380644336678e-05, + "loss": 1.523, + "step": 23847 + }, + { + "epoch": 0.29, + "grad_norm": 11.70470743120048, + "learning_rate": 1.933317090926222e-05, + "loss": 1.5533, + "step": 23850 + }, + { + "epoch": 0.29, + "grad_norm": 87.25277802799745, + "learning_rate": 1.933296114233701e-05, + "loss": 1.5102, + "step": 23853 + }, + { + "epoch": 0.29, + "grad_norm": 15.763799995643286, + "learning_rate": 1.9332751343561766e-05, + "loss": 1.6486, + "step": 23856 + }, + { + "epoch": 0.29, + "grad_norm": 12.768709809616697, + "learning_rate": 1.9332541512937207e-05, + "loss": 1.7127, + "step": 23859 + }, + { + "epoch": 0.29, + "grad_norm": 39.18027626699179, + "learning_rate": 1.9332331650464043e-05, + "loss": 1.2333, + "step": 23862 + }, + { + "epoch": 0.29, + "grad_norm": 25.001323476710464, + "learning_rate": 1.933212175614299e-05, + "loss": 1.3199, + "step": 23865 + }, + { + "epoch": 0.29, + "grad_norm": 14.092431147257281, + "learning_rate": 1.933191182997477e-05, + "loss": 1.4535, + "step": 23868 + }, + { + "epoch": 0.29, + "grad_norm": 11.674708718054323, + "learning_rate": 1.9331701871960096e-05, + "loss": 1.4904, + "step": 23871 + }, + { + "epoch": 0.29, + "grad_norm": 189.79767366674108, + "learning_rate": 1.9331491882099686e-05, + "loss": 1.5161, + "step": 23874 + }, + { + "epoch": 0.29, + "grad_norm": 6.436468299036908, + "learning_rate": 1.9331281860394252e-05, + "loss": 1.7789, + "step": 23877 + }, + { + "epoch": 0.29, + "grad_norm": 34.58224215180851, + "learning_rate": 1.9331071806844516e-05, + "loss": 1.7899, + "step": 23880 + }, + { + "epoch": 0.29, + "grad_norm": 16.178362936914976, + "learning_rate": 1.933086172145119e-05, + "loss": 1.5496, + "step": 23883 + }, + { + "epoch": 0.29, + "grad_norm": 4.626339481288598, + "learning_rate": 1.9330651604215e-05, + "loss": 1.29, + "step": 23886 + }, + { + "epoch": 0.29, + "grad_norm": 17.97194467954127, + "learning_rate": 1.933044145513665e-05, + "loss": 1.429, + "step": 23889 + }, + { + "epoch": 0.29, + "grad_norm": 3.032212117450701, + "learning_rate": 1.9330231274216872e-05, + "loss": 1.5418, + "step": 23892 + }, + { + "epoch": 0.29, + "grad_norm": 8.703470553374961, + "learning_rate": 1.933002106145637e-05, + "loss": 1.2322, + "step": 23895 + }, + { + "epoch": 0.29, + "grad_norm": 3.5889744540178197, + "learning_rate": 1.9329810816855867e-05, + "loss": 1.4023, + "step": 23898 + }, + { + "epoch": 0.29, + "grad_norm": 17.93486579225623, + "learning_rate": 1.932960054041608e-05, + "loss": 1.5302, + "step": 23901 + }, + { + "epoch": 0.29, + "grad_norm": 10.516108353180774, + "learning_rate": 1.932939023213773e-05, + "loss": 1.3517, + "step": 23904 + }, + { + "epoch": 0.29, + "grad_norm": 5.532135648977128, + "learning_rate": 1.9329179892021527e-05, + "loss": 1.9132, + "step": 23907 + }, + { + "epoch": 0.29, + "grad_norm": 150.49264111562155, + "learning_rate": 1.9328969520068197e-05, + "loss": 1.5544, + "step": 23910 + }, + { + "epoch": 0.29, + "grad_norm": 6.693631381942219, + "learning_rate": 1.9328759116278452e-05, + "loss": 1.5719, + "step": 23913 + }, + { + "epoch": 0.29, + "grad_norm": 2.413646754100355, + "learning_rate": 1.9328548680653014e-05, + "loss": 1.3022, + "step": 23916 + }, + { + "epoch": 0.29, + "grad_norm": 12.665535241671105, + "learning_rate": 1.93283382131926e-05, + "loss": 1.8177, + "step": 23919 + }, + { + "epoch": 0.29, + "grad_norm": 9.933395900366094, + "learning_rate": 1.9328127713897927e-05, + "loss": 1.6315, + "step": 23922 + }, + { + "epoch": 0.29, + "grad_norm": 7.905747612661086, + "learning_rate": 1.9327917182769715e-05, + "loss": 1.3789, + "step": 23925 + }, + { + "epoch": 0.29, + "grad_norm": 11.268232950860737, + "learning_rate": 1.9327706619808676e-05, + "loss": 1.7122, + "step": 23928 + }, + { + "epoch": 0.29, + "grad_norm": 315.6164212763244, + "learning_rate": 1.932749602501554e-05, + "loss": 1.4332, + "step": 23931 + }, + { + "epoch": 0.29, + "grad_norm": 18.4807498216478, + "learning_rate": 1.932728539839102e-05, + "loss": 1.7245, + "step": 23934 + }, + { + "epoch": 0.29, + "grad_norm": 23.02801253338615, + "learning_rate": 1.932707473993583e-05, + "loss": 1.6222, + "step": 23937 + }, + { + "epoch": 0.29, + "grad_norm": 20.419843574450123, + "learning_rate": 1.93268640496507e-05, + "loss": 1.4471, + "step": 23940 + }, + { + "epoch": 0.29, + "grad_norm": 217.15293464393548, + "learning_rate": 1.932665332753634e-05, + "loss": 1.3848, + "step": 23943 + }, + { + "epoch": 0.29, + "grad_norm": 8.949458357422294, + "learning_rate": 1.9326442573593466e-05, + "loss": 1.6375, + "step": 23946 + }, + { + "epoch": 0.29, + "grad_norm": 41.23639170044972, + "learning_rate": 1.9326231787822813e-05, + "loss": 1.8817, + "step": 23949 + }, + { + "epoch": 0.29, + "grad_norm": 20.46402033009775, + "learning_rate": 1.9326020970225083e-05, + "loss": 1.5806, + "step": 23952 + }, + { + "epoch": 0.29, + "grad_norm": 15.15452567859882, + "learning_rate": 1.9325810120801004e-05, + "loss": 1.0154, + "step": 23955 + }, + { + "epoch": 0.29, + "grad_norm": 18.481385921775054, + "learning_rate": 1.9325599239551293e-05, + "loss": 1.4287, + "step": 23958 + }, + { + "epoch": 0.29, + "grad_norm": 46.886435114057626, + "learning_rate": 1.9325388326476673e-05, + "loss": 1.3808, + "step": 23961 + }, + { + "epoch": 0.29, + "grad_norm": 32.29020771786257, + "learning_rate": 1.932517738157786e-05, + "loss": 1.7082, + "step": 23964 + }, + { + "epoch": 0.29, + "grad_norm": 10.249184007288639, + "learning_rate": 1.9324966404855578e-05, + "loss": 1.8732, + "step": 23967 + }, + { + "epoch": 0.29, + "grad_norm": 65.80950597036421, + "learning_rate": 1.9324755396310543e-05, + "loss": 1.3426, + "step": 23970 + }, + { + "epoch": 0.29, + "grad_norm": 28.79507906652276, + "learning_rate": 1.9324544355943476e-05, + "loss": 1.6622, + "step": 23973 + }, + { + "epoch": 0.29, + "grad_norm": 13.78305175882185, + "learning_rate": 1.9324333283755096e-05, + "loss": 1.258, + "step": 23976 + }, + { + "epoch": 0.29, + "grad_norm": 15.437675252573632, + "learning_rate": 1.932412217974613e-05, + "loss": 1.7338, + "step": 23979 + }, + { + "epoch": 0.29, + "grad_norm": 20.22719066852483, + "learning_rate": 1.9323911043917293e-05, + "loss": 1.4069, + "step": 23982 + }, + { + "epoch": 0.29, + "grad_norm": 32.12057840530986, + "learning_rate": 1.9323699876269302e-05, + "loss": 1.4445, + "step": 23985 + }, + { + "epoch": 0.29, + "grad_norm": 29.248988821777854, + "learning_rate": 1.9323488676802885e-05, + "loss": 1.1413, + "step": 23988 + }, + { + "epoch": 0.29, + "grad_norm": 73.7660282779705, + "learning_rate": 1.9323277445518756e-05, + "loss": 1.6668, + "step": 23991 + }, + { + "epoch": 0.29, + "grad_norm": 24.652143849006855, + "learning_rate": 1.9323066182417644e-05, + "loss": 1.3392, + "step": 23994 + }, + { + "epoch": 0.29, + "grad_norm": 6.795391442383569, + "learning_rate": 1.9322854887500263e-05, + "loss": 1.714, + "step": 23997 + }, + { + "epoch": 0.29, + "grad_norm": 2.5785661744913497, + "learning_rate": 1.9322643560767336e-05, + "loss": 1.3684, + "step": 24000 + }, + { + "epoch": 0.29, + "grad_norm": 9.69699332488155, + "learning_rate": 1.9322432202219583e-05, + "loss": 1.717, + "step": 24003 + }, + { + "epoch": 0.29, + "grad_norm": 43.82603952994915, + "learning_rate": 1.932222081185773e-05, + "loss": 1.5755, + "step": 24006 + }, + { + "epoch": 0.29, + "grad_norm": 3.0490233537157563, + "learning_rate": 1.932200938968249e-05, + "loss": 1.5958, + "step": 24009 + }, + { + "epoch": 0.29, + "grad_norm": 13.81920988572727, + "learning_rate": 1.9321797935694597e-05, + "loss": 1.5301, + "step": 24012 + }, + { + "epoch": 0.29, + "grad_norm": 22.776134564880845, + "learning_rate": 1.932158644989476e-05, + "loss": 1.6637, + "step": 24015 + }, + { + "epoch": 0.29, + "grad_norm": 32.93987778928593, + "learning_rate": 1.932137493228371e-05, + "loss": 1.6283, + "step": 24018 + }, + { + "epoch": 0.29, + "grad_norm": 9.636146792674303, + "learning_rate": 1.932116338286216e-05, + "loss": 1.5904, + "step": 24021 + }, + { + "epoch": 0.29, + "grad_norm": 16.65616039316665, + "learning_rate": 1.932095180163084e-05, + "loss": 1.5455, + "step": 24024 + }, + { + "epoch": 0.29, + "grad_norm": 66.0204474366865, + "learning_rate": 1.932074018859047e-05, + "loss": 1.4642, + "step": 24027 + }, + { + "epoch": 0.29, + "grad_norm": 14.374486630985198, + "learning_rate": 1.932052854374177e-05, + "loss": 1.3981, + "step": 24030 + }, + { + "epoch": 0.29, + "grad_norm": 61.470997078983075, + "learning_rate": 1.9320316867085462e-05, + "loss": 1.4171, + "step": 24033 + }, + { + "epoch": 0.29, + "grad_norm": 56.657718556079416, + "learning_rate": 1.932010515862227e-05, + "loss": 1.588, + "step": 24036 + }, + { + "epoch": 0.29, + "grad_norm": 59.78455896175595, + "learning_rate": 1.931989341835292e-05, + "loss": 1.4645, + "step": 24039 + }, + { + "epoch": 0.29, + "grad_norm": 14.357554642711426, + "learning_rate": 1.9319681646278125e-05, + "loss": 1.6911, + "step": 24042 + }, + { + "epoch": 0.29, + "grad_norm": 8.582418123380048, + "learning_rate": 1.931946984239862e-05, + "loss": 1.1292, + "step": 24045 + }, + { + "epoch": 0.29, + "grad_norm": 10.390361981638053, + "learning_rate": 1.9319258006715113e-05, + "loss": 1.5541, + "step": 24048 + }, + { + "epoch": 0.29, + "grad_norm": 51.89630587802856, + "learning_rate": 1.931904613922834e-05, + "loss": 1.9252, + "step": 24051 + }, + { + "epoch": 0.29, + "grad_norm": 17.41026515543179, + "learning_rate": 1.9318834239939018e-05, + "loss": 1.8891, + "step": 24054 + }, + { + "epoch": 0.29, + "grad_norm": 4.106805960092682, + "learning_rate": 1.9318622308847872e-05, + "loss": 1.5658, + "step": 24057 + }, + { + "epoch": 0.29, + "grad_norm": 11.356819329512565, + "learning_rate": 1.9318410345955624e-05, + "loss": 1.4196, + "step": 24060 + }, + { + "epoch": 0.29, + "grad_norm": 9.009196356589099, + "learning_rate": 1.9318198351263003e-05, + "loss": 1.497, + "step": 24063 + }, + { + "epoch": 0.29, + "grad_norm": 8.018385018578408, + "learning_rate": 1.931798632477072e-05, + "loss": 1.6663, + "step": 24066 + }, + { + "epoch": 0.29, + "grad_norm": 19.222268468018807, + "learning_rate": 1.9317774266479508e-05, + "loss": 1.0044, + "step": 24069 + }, + { + "epoch": 0.29, + "grad_norm": 10.528719919493652, + "learning_rate": 1.9317562176390094e-05, + "loss": 1.2734, + "step": 24072 + }, + { + "epoch": 0.29, + "grad_norm": 14.128623470920424, + "learning_rate": 1.931735005450319e-05, + "loss": 1.6906, + "step": 24075 + }, + { + "epoch": 0.29, + "grad_norm": 42.05107206152166, + "learning_rate": 1.9317137900819525e-05, + "loss": 1.425, + "step": 24078 + }, + { + "epoch": 0.29, + "grad_norm": 11.952899435088998, + "learning_rate": 1.9316925715339832e-05, + "loss": 1.3523, + "step": 24081 + }, + { + "epoch": 0.29, + "grad_norm": 30.15141945187312, + "learning_rate": 1.9316713498064822e-05, + "loss": 1.7702, + "step": 24084 + }, + { + "epoch": 0.29, + "grad_norm": 9.587123027284239, + "learning_rate": 1.9316501248995228e-05, + "loss": 1.4899, + "step": 24087 + }, + { + "epoch": 0.29, + "grad_norm": 24.100962943459784, + "learning_rate": 1.9316288968131768e-05, + "loss": 1.3638, + "step": 24090 + }, + { + "epoch": 0.29, + "grad_norm": 2.8672649084431985, + "learning_rate": 1.931607665547517e-05, + "loss": 1.4197, + "step": 24093 + }, + { + "epoch": 0.29, + "grad_norm": 47.6994361653084, + "learning_rate": 1.931586431102616e-05, + "loss": 1.4144, + "step": 24096 + }, + { + "epoch": 0.29, + "grad_norm": 14.640550433692008, + "learning_rate": 1.9315651934785457e-05, + "loss": 1.7789, + "step": 24099 + }, + { + "epoch": 0.29, + "grad_norm": 22.07101219223769, + "learning_rate": 1.9315439526753793e-05, + "loss": 1.9229, + "step": 24102 + }, + { + "epoch": 0.29, + "grad_norm": 10.8055096906381, + "learning_rate": 1.931522708693189e-05, + "loss": 1.5439, + "step": 24105 + }, + { + "epoch": 0.29, + "grad_norm": 33.596638052358045, + "learning_rate": 1.931501461532047e-05, + "loss": 1.4574, + "step": 24108 + }, + { + "epoch": 0.29, + "grad_norm": 4.890220524332868, + "learning_rate": 1.9314802111920258e-05, + "loss": 1.2812, + "step": 24111 + }, + { + "epoch": 0.29, + "grad_norm": 14.062417439250979, + "learning_rate": 1.9314589576731987e-05, + "loss": 1.7165, + "step": 24114 + }, + { + "epoch": 0.29, + "grad_norm": 48.82236554962594, + "learning_rate": 1.9314377009756372e-05, + "loss": 1.4722, + "step": 24117 + }, + { + "epoch": 0.29, + "grad_norm": 89.94863690154382, + "learning_rate": 1.9314164410994146e-05, + "loss": 1.0391, + "step": 24120 + }, + { + "epoch": 0.29, + "grad_norm": 8.871872372346996, + "learning_rate": 1.931395178044603e-05, + "loss": 1.5445, + "step": 24123 + }, + { + "epoch": 0.29, + "grad_norm": 20.89559625079175, + "learning_rate": 1.9313739118112752e-05, + "loss": 1.7898, + "step": 24126 + }, + { + "epoch": 0.29, + "grad_norm": 12.848147384883998, + "learning_rate": 1.931352642399504e-05, + "loss": 1.5792, + "step": 24129 + }, + { + "epoch": 0.29, + "grad_norm": 14.773161199321189, + "learning_rate": 1.9313313698093617e-05, + "loss": 1.5833, + "step": 24132 + }, + { + "epoch": 0.29, + "grad_norm": 11.071207570172575, + "learning_rate": 1.9313100940409205e-05, + "loss": 1.283, + "step": 24135 + }, + { + "epoch": 0.29, + "grad_norm": 4.544331679943437, + "learning_rate": 1.9312888150942535e-05, + "loss": 1.2527, + "step": 24138 + }, + { + "epoch": 0.29, + "grad_norm": 22.688751509779138, + "learning_rate": 1.9312675329694334e-05, + "loss": 1.3507, + "step": 24141 + }, + { + "epoch": 0.29, + "grad_norm": 17.779728535093646, + "learning_rate": 1.9312462476665326e-05, + "loss": 1.4541, + "step": 24144 + }, + { + "epoch": 0.29, + "grad_norm": 10.16410762200163, + "learning_rate": 1.9312249591856237e-05, + "loss": 1.224, + "step": 24147 + }, + { + "epoch": 0.29, + "grad_norm": 24.973701975577306, + "learning_rate": 1.9312036675267797e-05, + "loss": 1.8159, + "step": 24150 + }, + { + "epoch": 0.29, + "grad_norm": 7.186810893038284, + "learning_rate": 1.9311823726900727e-05, + "loss": 1.3378, + "step": 24153 + }, + { + "epoch": 0.29, + "grad_norm": 5.617724092583931, + "learning_rate": 1.9311610746755756e-05, + "loss": 1.3336, + "step": 24156 + }, + { + "epoch": 0.29, + "grad_norm": 24.722145240554404, + "learning_rate": 1.9311397734833616e-05, + "loss": 1.3211, + "step": 24159 + }, + { + "epoch": 0.29, + "grad_norm": 12.647096937775968, + "learning_rate": 1.9311184691135024e-05, + "loss": 1.5609, + "step": 24162 + }, + { + "epoch": 0.29, + "grad_norm": 125.47931409605651, + "learning_rate": 1.9310971615660717e-05, + "loss": 1.5674, + "step": 24165 + }, + { + "epoch": 0.29, + "grad_norm": 20.31265516194795, + "learning_rate": 1.9310758508411415e-05, + "loss": 1.1579, + "step": 24168 + }, + { + "epoch": 0.29, + "grad_norm": 58.845823081989266, + "learning_rate": 1.931054536938785e-05, + "loss": 1.6407, + "step": 24171 + }, + { + "epoch": 0.29, + "grad_norm": 4.826101716372597, + "learning_rate": 1.9310332198590745e-05, + "loss": 1.625, + "step": 24174 + }, + { + "epoch": 0.29, + "grad_norm": 28.392540029986513, + "learning_rate": 1.931011899602083e-05, + "loss": 1.5492, + "step": 24177 + }, + { + "epoch": 0.29, + "grad_norm": 18.83611183359949, + "learning_rate": 1.9309905761678833e-05, + "loss": 2.064, + "step": 24180 + }, + { + "epoch": 0.29, + "grad_norm": 8.650519889269676, + "learning_rate": 1.9309692495565483e-05, + "loss": 1.6595, + "step": 24183 + }, + { + "epoch": 0.29, + "grad_norm": 6.378441932052031, + "learning_rate": 1.9309479197681506e-05, + "loss": 1.7522, + "step": 24186 + }, + { + "epoch": 0.29, + "grad_norm": 10.035790350718548, + "learning_rate": 1.9309265868027627e-05, + "loss": 1.7121, + "step": 24189 + }, + { + "epoch": 0.29, + "grad_norm": 26.126589261146673, + "learning_rate": 1.9309052506604578e-05, + "loss": 1.7912, + "step": 24192 + }, + { + "epoch": 0.29, + "grad_norm": 46.73393486386926, + "learning_rate": 1.9308839113413084e-05, + "loss": 1.6718, + "step": 24195 + }, + { + "epoch": 0.29, + "grad_norm": 33.062877330559544, + "learning_rate": 1.9308625688453877e-05, + "loss": 1.4839, + "step": 24198 + }, + { + "epoch": 0.29, + "grad_norm": 25.39138909796551, + "learning_rate": 1.9308412231727685e-05, + "loss": 1.2383, + "step": 24201 + }, + { + "epoch": 0.29, + "grad_norm": 85.43215047297853, + "learning_rate": 1.930819874323523e-05, + "loss": 1.7177, + "step": 24204 + }, + { + "epoch": 0.29, + "grad_norm": 8.273982892603794, + "learning_rate": 1.930798522297725e-05, + "loss": 1.3201, + "step": 24207 + }, + { + "epoch": 0.29, + "grad_norm": 7.184920089146349, + "learning_rate": 1.9307771670954468e-05, + "loss": 1.7485, + "step": 24210 + }, + { + "epoch": 0.29, + "grad_norm": 11.671571002862171, + "learning_rate": 1.930755808716761e-05, + "loss": 1.6127, + "step": 24213 + }, + { + "epoch": 0.29, + "grad_norm": 5.596434773851704, + "learning_rate": 1.930734447161742e-05, + "loss": 1.2954, + "step": 24216 + }, + { + "epoch": 0.29, + "grad_norm": 40.271700923426785, + "learning_rate": 1.9307130824304606e-05, + "loss": 1.3165, + "step": 24219 + }, + { + "epoch": 0.29, + "grad_norm": 48.46863127714206, + "learning_rate": 1.9306917145229907e-05, + "loss": 1.9291, + "step": 24222 + }, + { + "epoch": 0.29, + "grad_norm": 12.283598385111622, + "learning_rate": 1.9306703434394055e-05, + "loss": 1.4969, + "step": 24225 + }, + { + "epoch": 0.29, + "grad_norm": 36.57088211359822, + "learning_rate": 1.9306489691797777e-05, + "loss": 1.8498, + "step": 24228 + }, + { + "epoch": 0.29, + "grad_norm": 13.007022253594, + "learning_rate": 1.93062759174418e-05, + "loss": 1.2549, + "step": 24231 + }, + { + "epoch": 0.29, + "grad_norm": 18.442190511602174, + "learning_rate": 1.930606211132686e-05, + "loss": 1.6287, + "step": 24234 + }, + { + "epoch": 0.29, + "grad_norm": 8.117048371275825, + "learning_rate": 1.9305848273453676e-05, + "loss": 1.474, + "step": 24237 + }, + { + "epoch": 0.29, + "grad_norm": 13.402086298562695, + "learning_rate": 1.9305634403822985e-05, + "loss": 1.4634, + "step": 24240 + }, + { + "epoch": 0.29, + "grad_norm": 8.134889505105551, + "learning_rate": 1.9305420502435517e-05, + "loss": 1.66, + "step": 24243 + }, + { + "epoch": 0.29, + "grad_norm": 74.86555014771947, + "learning_rate": 1.9305206569292e-05, + "loss": 1.23, + "step": 24246 + }, + { + "epoch": 0.29, + "grad_norm": 23.763706548890735, + "learning_rate": 1.9304992604393164e-05, + "loss": 1.9313, + "step": 24249 + }, + { + "epoch": 0.29, + "grad_norm": 15.588074856219558, + "learning_rate": 1.930477860773974e-05, + "loss": 1.0029, + "step": 24252 + }, + { + "epoch": 0.29, + "grad_norm": 34.955219841253786, + "learning_rate": 1.930456457933246e-05, + "loss": 1.8677, + "step": 24255 + }, + { + "epoch": 0.29, + "grad_norm": 6.188658610945639, + "learning_rate": 1.930435051917205e-05, + "loss": 1.3668, + "step": 24258 + }, + { + "epoch": 0.29, + "grad_norm": 12.760271867067285, + "learning_rate": 1.9304136427259247e-05, + "loss": 1.592, + "step": 24261 + }, + { + "epoch": 0.29, + "grad_norm": 21.324392895828574, + "learning_rate": 1.9303922303594772e-05, + "loss": 1.6278, + "step": 24264 + }, + { + "epoch": 0.29, + "grad_norm": 21.146351097646995, + "learning_rate": 1.930370814817937e-05, + "loss": 1.1904, + "step": 24267 + }, + { + "epoch": 0.29, + "grad_norm": 26.159301788017665, + "learning_rate": 1.9303493961013756e-05, + "loss": 1.4321, + "step": 24270 + }, + { + "epoch": 0.29, + "grad_norm": 10.785785312745713, + "learning_rate": 1.9303279742098668e-05, + "loss": 1.6195, + "step": 24273 + }, + { + "epoch": 0.29, + "grad_norm": 4.894409757157538, + "learning_rate": 1.930306549143484e-05, + "loss": 1.2886, + "step": 24276 + }, + { + "epoch": 0.29, + "grad_norm": 9.725552553953726, + "learning_rate": 1.9302851209023002e-05, + "loss": 1.6065, + "step": 24279 + }, + { + "epoch": 0.29, + "grad_norm": 6.434646669218584, + "learning_rate": 1.9302636894863884e-05, + "loss": 1.5866, + "step": 24282 + }, + { + "epoch": 0.29, + "grad_norm": 4.14180712900849, + "learning_rate": 1.930242254895821e-05, + "loss": 1.4521, + "step": 24285 + }, + { + "epoch": 0.29, + "grad_norm": 17.19654664788867, + "learning_rate": 1.9302208171306723e-05, + "loss": 1.7131, + "step": 24288 + }, + { + "epoch": 0.29, + "grad_norm": 8.949761889191471, + "learning_rate": 1.930199376191015e-05, + "loss": 1.4837, + "step": 24291 + }, + { + "epoch": 0.29, + "grad_norm": 3.571252441816036, + "learning_rate": 1.9301779320769224e-05, + "loss": 1.307, + "step": 24294 + }, + { + "epoch": 0.29, + "grad_norm": 15.629938741056428, + "learning_rate": 1.9301564847884676e-05, + "loss": 1.6729, + "step": 24297 + }, + { + "epoch": 0.29, + "grad_norm": 11.854644146020584, + "learning_rate": 1.9301350343257237e-05, + "loss": 1.6698, + "step": 24300 + }, + { + "epoch": 0.29, + "grad_norm": 14.070085617444873, + "learning_rate": 1.9301135806887637e-05, + "loss": 1.0327, + "step": 24303 + }, + { + "epoch": 0.29, + "grad_norm": 20.656102520342568, + "learning_rate": 1.9300921238776617e-05, + "loss": 1.5811, + "step": 24306 + }, + { + "epoch": 0.29, + "grad_norm": 11.49714958445794, + "learning_rate": 1.9300706638924897e-05, + "loss": 1.2929, + "step": 24309 + }, + { + "epoch": 0.29, + "grad_norm": 3.1937698399194883, + "learning_rate": 1.9300492007333216e-05, + "loss": 1.7138, + "step": 24312 + }, + { + "epoch": 0.29, + "grad_norm": 22.19775177964574, + "learning_rate": 1.930027734400231e-05, + "loss": 1.5135, + "step": 24315 + }, + { + "epoch": 0.29, + "grad_norm": 12.919115366417442, + "learning_rate": 1.93000626489329e-05, + "loss": 1.4143, + "step": 24318 + }, + { + "epoch": 0.29, + "grad_norm": 3.262097628589028, + "learning_rate": 1.929984792212573e-05, + "loss": 1.6417, + "step": 24321 + }, + { + "epoch": 0.29, + "grad_norm": 9.086124516930866, + "learning_rate": 1.929963316358153e-05, + "loss": 1.8171, + "step": 24324 + }, + { + "epoch": 0.29, + "grad_norm": 21.886058472265685, + "learning_rate": 1.9299418373301033e-05, + "loss": 1.4829, + "step": 24327 + }, + { + "epoch": 0.29, + "grad_norm": 34.042319869286935, + "learning_rate": 1.9299203551284968e-05, + "loss": 1.5548, + "step": 24330 + }, + { + "epoch": 0.29, + "grad_norm": 43.71993262725729, + "learning_rate": 1.9298988697534075e-05, + "loss": 1.567, + "step": 24333 + }, + { + "epoch": 0.29, + "grad_norm": 15.043862765669399, + "learning_rate": 1.9298773812049075e-05, + "loss": 1.6067, + "step": 24336 + }, + { + "epoch": 0.29, + "grad_norm": 23.6686215277296, + "learning_rate": 1.9298558894830715e-05, + "loss": 1.6123, + "step": 24339 + }, + { + "epoch": 0.29, + "grad_norm": 18.620096825698152, + "learning_rate": 1.9298343945879723e-05, + "loss": 1.4766, + "step": 24342 + }, + { + "epoch": 0.29, + "grad_norm": 18.864971373374747, + "learning_rate": 1.9298128965196833e-05, + "loss": 1.2587, + "step": 24345 + }, + { + "epoch": 0.29, + "grad_norm": 27.731936545657728, + "learning_rate": 1.9297913952782774e-05, + "loss": 1.5895, + "step": 24348 + }, + { + "epoch": 0.29, + "grad_norm": 13.481804212908976, + "learning_rate": 1.9297698908638285e-05, + "loss": 1.8546, + "step": 24351 + }, + { + "epoch": 0.29, + "grad_norm": 4.37142180192792, + "learning_rate": 1.9297483832764098e-05, + "loss": 1.4688, + "step": 24354 + }, + { + "epoch": 0.29, + "grad_norm": 11.38165240321024, + "learning_rate": 1.929726872516095e-05, + "loss": 1.6729, + "step": 24357 + }, + { + "epoch": 0.29, + "grad_norm": 31.502747787786113, + "learning_rate": 1.9297053585829576e-05, + "loss": 1.4072, + "step": 24360 + }, + { + "epoch": 0.29, + "grad_norm": 9.060580318179639, + "learning_rate": 1.92968384147707e-05, + "loss": 1.6832, + "step": 24363 + }, + { + "epoch": 0.29, + "grad_norm": 8.912333737512377, + "learning_rate": 1.9296623211985066e-05, + "loss": 1.3709, + "step": 24366 + }, + { + "epoch": 0.29, + "grad_norm": 17.62705551593437, + "learning_rate": 1.9296407977473405e-05, + "loss": 1.598, + "step": 24369 + }, + { + "epoch": 0.29, + "grad_norm": 4.600600530580506, + "learning_rate": 1.9296192711236454e-05, + "loss": 1.3203, + "step": 24372 + }, + { + "epoch": 0.29, + "grad_norm": 14.866958009848618, + "learning_rate": 1.9295977413274944e-05, + "loss": 1.5439, + "step": 24375 + }, + { + "epoch": 0.29, + "grad_norm": 10.98243919572054, + "learning_rate": 1.9295762083589614e-05, + "loss": 1.3172, + "step": 24378 + }, + { + "epoch": 0.29, + "grad_norm": 23.288991921088282, + "learning_rate": 1.9295546722181193e-05, + "loss": 1.691, + "step": 24381 + }, + { + "epoch": 0.29, + "grad_norm": 6.89556991028045, + "learning_rate": 1.929533132905042e-05, + "loss": 1.4007, + "step": 24384 + }, + { + "epoch": 0.29, + "grad_norm": 4.43927414543726, + "learning_rate": 1.929511590419803e-05, + "loss": 1.2102, + "step": 24387 + }, + { + "epoch": 0.29, + "grad_norm": 9.147100330920125, + "learning_rate": 1.9294900447624756e-05, + "loss": 1.4786, + "step": 24390 + }, + { + "epoch": 0.29, + "grad_norm": 28.318582600119946, + "learning_rate": 1.929468495933134e-05, + "loss": 1.3888, + "step": 24393 + }, + { + "epoch": 0.29, + "grad_norm": 23.793849119812606, + "learning_rate": 1.9294469439318505e-05, + "loss": 1.5258, + "step": 24396 + }, + { + "epoch": 0.29, + "grad_norm": 21.014823042974438, + "learning_rate": 1.9294253887586998e-05, + "loss": 1.5295, + "step": 24399 + }, + { + "epoch": 0.29, + "grad_norm": 14.184223871512316, + "learning_rate": 1.929403830413755e-05, + "loss": 1.5132, + "step": 24402 + }, + { + "epoch": 0.29, + "grad_norm": 9.62362289591908, + "learning_rate": 1.9293822688970895e-05, + "loss": 1.8925, + "step": 24405 + }, + { + "epoch": 0.29, + "grad_norm": 30.084003558462847, + "learning_rate": 1.9293607042087773e-05, + "loss": 1.5322, + "step": 24408 + }, + { + "epoch": 0.29, + "grad_norm": 14.281633242493346, + "learning_rate": 1.9293391363488915e-05, + "loss": 1.6734, + "step": 24411 + }, + { + "epoch": 0.29, + "grad_norm": 16.659834976172924, + "learning_rate": 1.9293175653175063e-05, + "loss": 1.5769, + "step": 24414 + }, + { + "epoch": 0.29, + "grad_norm": 48.59831479870827, + "learning_rate": 1.9292959911146946e-05, + "loss": 1.3879, + "step": 24417 + }, + { + "epoch": 0.29, + "grad_norm": 11.997635856456489, + "learning_rate": 1.9292744137405308e-05, + "loss": 1.9286, + "step": 24420 + }, + { + "epoch": 0.29, + "grad_norm": 31.016767315297738, + "learning_rate": 1.9292528331950882e-05, + "loss": 1.881, + "step": 24423 + }, + { + "epoch": 0.29, + "grad_norm": 4.479680255051512, + "learning_rate": 1.92923124947844e-05, + "loss": 1.7447, + "step": 24426 + }, + { + "epoch": 0.29, + "grad_norm": 11.169173885678752, + "learning_rate": 1.9292096625906604e-05, + "loss": 1.6939, + "step": 24429 + }, + { + "epoch": 0.29, + "grad_norm": 36.576173641864976, + "learning_rate": 1.9291880725318232e-05, + "loss": 1.1882, + "step": 24432 + }, + { + "epoch": 0.29, + "grad_norm": 9.83549296368535, + "learning_rate": 1.9291664793020013e-05, + "loss": 1.6916, + "step": 24435 + }, + { + "epoch": 0.29, + "grad_norm": 8.386216454163117, + "learning_rate": 1.9291448829012695e-05, + "loss": 1.7671, + "step": 24438 + }, + { + "epoch": 0.29, + "grad_norm": 18.487109984075264, + "learning_rate": 1.9291232833297004e-05, + "loss": 1.6184, + "step": 24441 + }, + { + "epoch": 0.29, + "grad_norm": 12.133729960860732, + "learning_rate": 1.929101680587368e-05, + "loss": 1.7486, + "step": 24444 + }, + { + "epoch": 0.29, + "grad_norm": 15.184140201805317, + "learning_rate": 1.929080074674347e-05, + "loss": 1.7277, + "step": 24447 + }, + { + "epoch": 0.29, + "grad_norm": 7.73905939474255, + "learning_rate": 1.92905846559071e-05, + "loss": 1.653, + "step": 24450 + }, + { + "epoch": 0.29, + "grad_norm": 9.221007489315841, + "learning_rate": 1.929036853336531e-05, + "loss": 1.3815, + "step": 24453 + }, + { + "epoch": 0.29, + "grad_norm": 25.363216927262872, + "learning_rate": 1.9290152379118838e-05, + "loss": 1.5825, + "step": 24456 + }, + { + "epoch": 0.29, + "grad_norm": 9.868760462083946, + "learning_rate": 1.928993619316842e-05, + "loss": 1.763, + "step": 24459 + }, + { + "epoch": 0.29, + "grad_norm": 4.936857460390384, + "learning_rate": 1.92897199755148e-05, + "loss": 1.8473, + "step": 24462 + }, + { + "epoch": 0.29, + "grad_norm": 9.887349736955242, + "learning_rate": 1.928950372615871e-05, + "loss": 1.7815, + "step": 24465 + }, + { + "epoch": 0.29, + "grad_norm": 24.350613658629847, + "learning_rate": 1.9289287445100892e-05, + "loss": 1.4329, + "step": 24468 + }, + { + "epoch": 0.29, + "grad_norm": 13.281596188921965, + "learning_rate": 1.928907113234208e-05, + "loss": 1.4622, + "step": 24471 + }, + { + "epoch": 0.29, + "grad_norm": 34.91723534118849, + "learning_rate": 1.928885478788302e-05, + "loss": 1.3993, + "step": 24474 + }, + { + "epoch": 0.29, + "grad_norm": 14.75442045343232, + "learning_rate": 1.9288638411724438e-05, + "loss": 2.0504, + "step": 24477 + }, + { + "epoch": 0.29, + "grad_norm": 19.886119911805583, + "learning_rate": 1.9288422003867078e-05, + "loss": 1.7302, + "step": 24480 + }, + { + "epoch": 0.29, + "grad_norm": 10.033541809198024, + "learning_rate": 1.9288205564311682e-05, + "loss": 1.4685, + "step": 24483 + }, + { + "epoch": 0.29, + "grad_norm": 35.924260579890195, + "learning_rate": 1.928798909305899e-05, + "loss": 1.5422, + "step": 24486 + }, + { + "epoch": 0.29, + "grad_norm": 46.77996226249204, + "learning_rate": 1.9287772590109726e-05, + "loss": 1.5891, + "step": 24489 + }, + { + "epoch": 0.29, + "grad_norm": 34.392092185623, + "learning_rate": 1.928755605546465e-05, + "loss": 1.5531, + "step": 24492 + }, + { + "epoch": 0.29, + "grad_norm": 22.452022639055713, + "learning_rate": 1.9287339489124485e-05, + "loss": 1.7156, + "step": 24495 + }, + { + "epoch": 0.29, + "grad_norm": 41.3606830548999, + "learning_rate": 1.9287122891089977e-05, + "loss": 1.7717, + "step": 24498 + }, + { + "epoch": 0.29, + "grad_norm": 31.016676555190063, + "learning_rate": 1.9286906261361865e-05, + "loss": 1.7912, + "step": 24501 + }, + { + "epoch": 0.29, + "grad_norm": 4.466294975649089, + "learning_rate": 1.9286689599940883e-05, + "loss": 1.7184, + "step": 24504 + }, + { + "epoch": 0.29, + "grad_norm": 21.478248250306816, + "learning_rate": 1.9286472906827777e-05, + "loss": 1.6597, + "step": 24507 + }, + { + "epoch": 0.29, + "grad_norm": 79.91058732746991, + "learning_rate": 1.9286256182023286e-05, + "loss": 1.2358, + "step": 24510 + }, + { + "epoch": 0.29, + "grad_norm": 5.0817442996390145, + "learning_rate": 1.9286039425528144e-05, + "loss": 1.641, + "step": 24513 + }, + { + "epoch": 0.29, + "grad_norm": 34.84004996874965, + "learning_rate": 1.9285822637343098e-05, + "loss": 1.4924, + "step": 24516 + }, + { + "epoch": 0.29, + "grad_norm": 25.99240982557015, + "learning_rate": 1.9285605817468883e-05, + "loss": 1.5318, + "step": 24519 + }, + { + "epoch": 0.29, + "grad_norm": 7.9819861667893885, + "learning_rate": 1.9285388965906236e-05, + "loss": 1.3098, + "step": 24522 + }, + { + "epoch": 0.29, + "grad_norm": 9.324468397591769, + "learning_rate": 1.9285172082655907e-05, + "loss": 1.4511, + "step": 24525 + }, + { + "epoch": 0.29, + "grad_norm": 5.897406195402003, + "learning_rate": 1.9284955167718625e-05, + "loss": 1.594, + "step": 24528 + }, + { + "epoch": 0.29, + "grad_norm": 18.645548393034893, + "learning_rate": 1.928473822109514e-05, + "loss": 1.6963, + "step": 24531 + }, + { + "epoch": 0.3, + "grad_norm": 20.79312831248928, + "learning_rate": 1.9284521242786188e-05, + "loss": 1.3767, + "step": 24534 + }, + { + "epoch": 0.3, + "grad_norm": 8.65711442270419, + "learning_rate": 1.9284304232792506e-05, + "loss": 1.4933, + "step": 24537 + }, + { + "epoch": 0.3, + "grad_norm": 3.7135247080257665, + "learning_rate": 1.9284087191114836e-05, + "loss": 1.6871, + "step": 24540 + }, + { + "epoch": 0.3, + "grad_norm": 5.1967813937607525, + "learning_rate": 1.9283870117753925e-05, + "loss": 1.2717, + "step": 24543 + }, + { + "epoch": 0.3, + "grad_norm": 14.343119260486294, + "learning_rate": 1.928365301271051e-05, + "loss": 1.5183, + "step": 24546 + }, + { + "epoch": 0.3, + "grad_norm": 26.22479157563948, + "learning_rate": 1.9283435875985324e-05, + "loss": 1.4931, + "step": 24549 + }, + { + "epoch": 0.3, + "grad_norm": 4.527035313771827, + "learning_rate": 1.9283218707579125e-05, + "loss": 1.5019, + "step": 24552 + }, + { + "epoch": 0.3, + "grad_norm": 24.8903688130777, + "learning_rate": 1.9283001507492636e-05, + "loss": 1.6237, + "step": 24555 + }, + { + "epoch": 0.3, + "grad_norm": 65.40577959516287, + "learning_rate": 1.9282784275726613e-05, + "loss": 1.3371, + "step": 24558 + }, + { + "epoch": 0.3, + "grad_norm": 61.860374544275246, + "learning_rate": 1.9282567012281785e-05, + "loss": 1.7725, + "step": 24561 + }, + { + "epoch": 0.3, + "grad_norm": 12.468212973179233, + "learning_rate": 1.9282349717158904e-05, + "loss": 1.3211, + "step": 24564 + }, + { + "epoch": 0.3, + "grad_norm": 52.49862538851333, + "learning_rate": 1.9282132390358702e-05, + "loss": 1.363, + "step": 24567 + }, + { + "epoch": 0.3, + "grad_norm": 9.619108787385143, + "learning_rate": 1.928191503188193e-05, + "loss": 1.5773, + "step": 24570 + }, + { + "epoch": 0.3, + "grad_norm": 5.444860862605784, + "learning_rate": 1.9281697641729326e-05, + "loss": 1.6146, + "step": 24573 + }, + { + "epoch": 0.3, + "grad_norm": 11.865493902691146, + "learning_rate": 1.928148021990163e-05, + "loss": 1.7365, + "step": 24576 + }, + { + "epoch": 0.3, + "grad_norm": 14.206839654022826, + "learning_rate": 1.9281262766399585e-05, + "loss": 1.5919, + "step": 24579 + }, + { + "epoch": 0.3, + "grad_norm": 16.51551911598442, + "learning_rate": 1.928104528122393e-05, + "loss": 1.5807, + "step": 24582 + }, + { + "epoch": 0.3, + "grad_norm": 58.62995117897259, + "learning_rate": 1.9280827764375414e-05, + "loss": 1.5032, + "step": 24585 + }, + { + "epoch": 0.3, + "grad_norm": 24.20804949285674, + "learning_rate": 1.9280610215854777e-05, + "loss": 1.1408, + "step": 24588 + }, + { + "epoch": 0.3, + "grad_norm": 57.22797902363472, + "learning_rate": 1.9280392635662757e-05, + "loss": 1.6853, + "step": 24591 + }, + { + "epoch": 0.3, + "grad_norm": 6.734990244317682, + "learning_rate": 1.92801750238001e-05, + "loss": 1.1645, + "step": 24594 + }, + { + "epoch": 0.3, + "grad_norm": 24.849928479696246, + "learning_rate": 1.927995738026755e-05, + "loss": 1.6613, + "step": 24597 + }, + { + "epoch": 0.3, + "grad_norm": 14.759795347040132, + "learning_rate": 1.927973970506585e-05, + "loss": 1.6618, + "step": 24600 + }, + { + "epoch": 0.3, + "grad_norm": 19.70391949590106, + "learning_rate": 1.9279521998195733e-05, + "loss": 1.775, + "step": 24603 + }, + { + "epoch": 0.3, + "grad_norm": 6.345378296542297, + "learning_rate": 1.9279304259657955e-05, + "loss": 1.7645, + "step": 24606 + }, + { + "epoch": 0.3, + "grad_norm": 33.909702978956744, + "learning_rate": 1.9279086489453257e-05, + "loss": 1.4021, + "step": 24609 + }, + { + "epoch": 0.3, + "grad_norm": 24.70033765672515, + "learning_rate": 1.9278868687582375e-05, + "loss": 1.2004, + "step": 24612 + }, + { + "epoch": 0.3, + "grad_norm": 9.591142978435835, + "learning_rate": 1.927865085404606e-05, + "loss": 1.3811, + "step": 24615 + }, + { + "epoch": 0.3, + "grad_norm": 11.49110479453452, + "learning_rate": 1.9278432988845046e-05, + "loss": 1.5329, + "step": 24618 + }, + { + "epoch": 0.3, + "grad_norm": 26.023408265518494, + "learning_rate": 1.9278215091980086e-05, + "loss": 1.8256, + "step": 24621 + }, + { + "epoch": 0.3, + "grad_norm": 12.601457243633252, + "learning_rate": 1.927799716345192e-05, + "loss": 2.0154, + "step": 24624 + }, + { + "epoch": 0.3, + "grad_norm": 15.875445264396346, + "learning_rate": 1.927777920326129e-05, + "loss": 1.2945, + "step": 24627 + }, + { + "epoch": 0.3, + "grad_norm": 8.636822473658984, + "learning_rate": 1.9277561211408944e-05, + "loss": 1.7299, + "step": 24630 + }, + { + "epoch": 0.3, + "grad_norm": 20.95552876920674, + "learning_rate": 1.927734318789562e-05, + "loss": 1.6566, + "step": 24633 + }, + { + "epoch": 0.3, + "grad_norm": 35.37017850623478, + "learning_rate": 1.927712513272207e-05, + "loss": 1.6462, + "step": 24636 + }, + { + "epoch": 0.3, + "grad_norm": 13.466844659309334, + "learning_rate": 1.927690704588903e-05, + "loss": 1.6153, + "step": 24639 + }, + { + "epoch": 0.3, + "grad_norm": 90.52830343113096, + "learning_rate": 1.927668892739725e-05, + "loss": 1.3722, + "step": 24642 + }, + { + "epoch": 0.3, + "grad_norm": 20.88537126162722, + "learning_rate": 1.927647077724747e-05, + "loss": 1.436, + "step": 24645 + }, + { + "epoch": 0.3, + "grad_norm": 12.006465510384476, + "learning_rate": 1.9276252595440437e-05, + "loss": 1.9237, + "step": 24648 + }, + { + "epoch": 0.3, + "grad_norm": 6.629960735478062, + "learning_rate": 1.9276034381976897e-05, + "loss": 1.7244, + "step": 24651 + }, + { + "epoch": 0.3, + "grad_norm": 73.39272531476696, + "learning_rate": 1.927581613685759e-05, + "loss": 1.4206, + "step": 24654 + }, + { + "epoch": 0.3, + "grad_norm": 31.68121381873125, + "learning_rate": 1.9275597860083266e-05, + "loss": 1.6929, + "step": 24657 + }, + { + "epoch": 0.3, + "grad_norm": 30.980852494602885, + "learning_rate": 1.9275379551654663e-05, + "loss": 1.3804, + "step": 24660 + }, + { + "epoch": 0.3, + "grad_norm": 19.48357815987859, + "learning_rate": 1.9275161211572538e-05, + "loss": 1.4934, + "step": 24663 + }, + { + "epoch": 0.3, + "grad_norm": 8.740122102065817, + "learning_rate": 1.9274942839837625e-05, + "loss": 1.3078, + "step": 24666 + }, + { + "epoch": 0.3, + "grad_norm": 7.6864741979417355, + "learning_rate": 1.9274724436450673e-05, + "loss": 1.5064, + "step": 24669 + }, + { + "epoch": 0.3, + "grad_norm": 30.25982694220525, + "learning_rate": 1.9274506001412424e-05, + "loss": 1.8604, + "step": 24672 + }, + { + "epoch": 0.3, + "grad_norm": 40.23107845218532, + "learning_rate": 1.9274287534723633e-05, + "loss": 1.495, + "step": 24675 + }, + { + "epoch": 0.3, + "grad_norm": 32.90978014923779, + "learning_rate": 1.9274069036385036e-05, + "loss": 1.6738, + "step": 24678 + }, + { + "epoch": 0.3, + "grad_norm": 45.72309239273149, + "learning_rate": 1.927385050639738e-05, + "loss": 1.4233, + "step": 24681 + }, + { + "epoch": 0.3, + "grad_norm": 30.5231171628938, + "learning_rate": 1.9273631944761416e-05, + "loss": 1.8697, + "step": 24684 + }, + { + "epoch": 0.3, + "grad_norm": 22.588082135352654, + "learning_rate": 1.9273413351477886e-05, + "loss": 1.6407, + "step": 24687 + }, + { + "epoch": 0.3, + "grad_norm": 8.152066124437615, + "learning_rate": 1.9273194726547536e-05, + "loss": 1.5585, + "step": 24690 + }, + { + "epoch": 0.3, + "grad_norm": 23.968955292891536, + "learning_rate": 1.927297606997111e-05, + "loss": 1.311, + "step": 24693 + }, + { + "epoch": 0.3, + "grad_norm": 25.6453207554134, + "learning_rate": 1.927275738174936e-05, + "loss": 1.6662, + "step": 24696 + }, + { + "epoch": 0.3, + "grad_norm": 6.321456081419612, + "learning_rate": 1.9272538661883027e-05, + "loss": 1.3739, + "step": 24699 + }, + { + "epoch": 0.3, + "grad_norm": 5.301125355109826, + "learning_rate": 1.9272319910372862e-05, + "loss": 1.1417, + "step": 24702 + }, + { + "epoch": 0.3, + "grad_norm": 12.147974537180259, + "learning_rate": 1.9272101127219606e-05, + "loss": 1.6144, + "step": 24705 + }, + { + "epoch": 0.3, + "grad_norm": 30.84514891917911, + "learning_rate": 1.927188231242401e-05, + "loss": 1.4492, + "step": 24708 + }, + { + "epoch": 0.3, + "grad_norm": 32.963783928728155, + "learning_rate": 1.927166346598682e-05, + "loss": 1.4196, + "step": 24711 + }, + { + "epoch": 0.3, + "grad_norm": 8.901656511075403, + "learning_rate": 1.927144458790878e-05, + "loss": 1.3171, + "step": 24714 + }, + { + "epoch": 0.3, + "grad_norm": 39.05983778429345, + "learning_rate": 1.9271225678190642e-05, + "loss": 1.3249, + "step": 24717 + }, + { + "epoch": 0.3, + "grad_norm": 43.62143398835828, + "learning_rate": 1.927100673683315e-05, + "loss": 1.533, + "step": 24720 + }, + { + "epoch": 0.3, + "grad_norm": 71.10704003331345, + "learning_rate": 1.927078776383705e-05, + "loss": 1.4397, + "step": 24723 + }, + { + "epoch": 0.3, + "grad_norm": 5.117010522352891, + "learning_rate": 1.9270568759203092e-05, + "loss": 1.8701, + "step": 24726 + }, + { + "epoch": 0.3, + "grad_norm": 6.43747425728569, + "learning_rate": 1.927034972293202e-05, + "loss": 1.1341, + "step": 24729 + }, + { + "epoch": 0.3, + "grad_norm": 48.465294013697545, + "learning_rate": 1.9270130655024587e-05, + "loss": 1.6104, + "step": 24732 + }, + { + "epoch": 0.3, + "grad_norm": 26.770418862965887, + "learning_rate": 1.9269911555481534e-05, + "loss": 1.592, + "step": 24735 + }, + { + "epoch": 0.3, + "grad_norm": 52.894697663602706, + "learning_rate": 1.9269692424303612e-05, + "loss": 1.9019, + "step": 24738 + }, + { + "epoch": 0.3, + "grad_norm": 44.874121141356376, + "learning_rate": 1.9269473261491568e-05, + "loss": 1.8469, + "step": 24741 + }, + { + "epoch": 0.3, + "grad_norm": 6.119062417257216, + "learning_rate": 1.9269254067046154e-05, + "loss": 1.6169, + "step": 24744 + }, + { + "epoch": 0.3, + "grad_norm": 6.564976282725548, + "learning_rate": 1.926903484096811e-05, + "loss": 2.0903, + "step": 24747 + }, + { + "epoch": 0.3, + "grad_norm": 3.9302230664163327, + "learning_rate": 1.9268815583258192e-05, + "loss": 1.3993, + "step": 24750 + }, + { + "epoch": 0.3, + "grad_norm": 5.923110347150805, + "learning_rate": 1.926859629391715e-05, + "loss": 1.4176, + "step": 24753 + }, + { + "epoch": 0.3, + "grad_norm": 7.121614848831939, + "learning_rate": 1.926837697294572e-05, + "loss": 1.3681, + "step": 24756 + }, + { + "epoch": 0.3, + "grad_norm": 46.91027072977155, + "learning_rate": 1.926815762034466e-05, + "loss": 1.2483, + "step": 24759 + }, + { + "epoch": 0.3, + "grad_norm": 76.66911394414466, + "learning_rate": 1.9267938236114716e-05, + "loss": 1.3039, + "step": 24762 + }, + { + "epoch": 0.3, + "grad_norm": 7.468575159150272, + "learning_rate": 1.9267718820256638e-05, + "loss": 0.9663, + "step": 24765 + }, + { + "epoch": 0.3, + "grad_norm": 17.59732739099106, + "learning_rate": 1.9267499372771172e-05, + "loss": 1.5233, + "step": 24768 + }, + { + "epoch": 0.3, + "grad_norm": 15.872582110116987, + "learning_rate": 1.926727989365907e-05, + "loss": 1.4659, + "step": 24771 + }, + { + "epoch": 0.3, + "grad_norm": 17.332601762651535, + "learning_rate": 1.9267060382921082e-05, + "loss": 0.9165, + "step": 24774 + }, + { + "epoch": 0.3, + "grad_norm": 7.4498782602504585, + "learning_rate": 1.9266840840557952e-05, + "loss": 1.3132, + "step": 24777 + }, + { + "epoch": 0.3, + "grad_norm": 6.218316213796387, + "learning_rate": 1.926662126657043e-05, + "loss": 1.6382, + "step": 24780 + }, + { + "epoch": 0.3, + "grad_norm": 14.502773501905235, + "learning_rate": 1.9266401660959272e-05, + "loss": 1.4467, + "step": 24783 + }, + { + "epoch": 0.3, + "grad_norm": 9.13201744082815, + "learning_rate": 1.9266182023725223e-05, + "loss": 1.6002, + "step": 24786 + }, + { + "epoch": 0.3, + "grad_norm": 8.121652540623638, + "learning_rate": 1.926596235486903e-05, + "loss": 1.3285, + "step": 24789 + }, + { + "epoch": 0.3, + "grad_norm": 72.2252210594327, + "learning_rate": 1.9265742654391447e-05, + "loss": 1.4395, + "step": 24792 + }, + { + "epoch": 0.3, + "grad_norm": 6.290509297673334, + "learning_rate": 1.9265522922293218e-05, + "loss": 1.7732, + "step": 24795 + }, + { + "epoch": 0.3, + "grad_norm": 6.815251830965932, + "learning_rate": 1.92653031585751e-05, + "loss": 1.0397, + "step": 24798 + }, + { + "epoch": 0.3, + "grad_norm": 23.227307707976802, + "learning_rate": 1.9265083363237838e-05, + "loss": 1.2005, + "step": 24801 + }, + { + "epoch": 0.3, + "grad_norm": 32.157578979080036, + "learning_rate": 1.9264863536282185e-05, + "loss": 1.5022, + "step": 24804 + }, + { + "epoch": 0.3, + "grad_norm": 12.382972487133694, + "learning_rate": 1.926464367770889e-05, + "loss": 1.1292, + "step": 24807 + }, + { + "epoch": 0.3, + "grad_norm": 6.781580348946835, + "learning_rate": 1.9264423787518702e-05, + "loss": 1.1805, + "step": 24810 + }, + { + "epoch": 0.3, + "grad_norm": 6.969236908004032, + "learning_rate": 1.926420386571237e-05, + "loss": 1.6969, + "step": 24813 + }, + { + "epoch": 0.3, + "grad_norm": 42.13396055256421, + "learning_rate": 1.9263983912290653e-05, + "loss": 1.766, + "step": 24816 + }, + { + "epoch": 0.3, + "grad_norm": 13.894157011605241, + "learning_rate": 1.9263763927254295e-05, + "loss": 1.3452, + "step": 24819 + }, + { + "epoch": 0.3, + "grad_norm": 8.444079176680228, + "learning_rate": 1.9263543910604044e-05, + "loss": 1.5957, + "step": 24822 + }, + { + "epoch": 0.3, + "grad_norm": 78.57888738041187, + "learning_rate": 1.9263323862340657e-05, + "loss": 1.0704, + "step": 24825 + }, + { + "epoch": 0.3, + "grad_norm": 23.670827728818935, + "learning_rate": 1.926310378246488e-05, + "loss": 1.5109, + "step": 24828 + }, + { + "epoch": 0.3, + "grad_norm": 6.999272853358786, + "learning_rate": 1.926288367097747e-05, + "loss": 1.7118, + "step": 24831 + }, + { + "epoch": 0.3, + "grad_norm": 7.460829715338824, + "learning_rate": 1.9262663527879173e-05, + "loss": 1.641, + "step": 24834 + }, + { + "epoch": 0.3, + "grad_norm": 20.83319088334902, + "learning_rate": 1.9262443353170738e-05, + "loss": 1.2609, + "step": 24837 + }, + { + "epoch": 0.3, + "grad_norm": 9.90559648055449, + "learning_rate": 1.9262223146852924e-05, + "loss": 1.5304, + "step": 24840 + }, + { + "epoch": 0.3, + "grad_norm": 26.768895411165804, + "learning_rate": 1.926200290892648e-05, + "loss": 1.6771, + "step": 24843 + }, + { + "epoch": 0.3, + "grad_norm": 28.94028727548059, + "learning_rate": 1.926178263939215e-05, + "loss": 1.6366, + "step": 24846 + }, + { + "epoch": 0.3, + "grad_norm": 8.214232798750526, + "learning_rate": 1.92615623382507e-05, + "loss": 1.5959, + "step": 24849 + }, + { + "epoch": 0.3, + "grad_norm": 14.750524613305737, + "learning_rate": 1.926134200550287e-05, + "loss": 1.5944, + "step": 24852 + }, + { + "epoch": 0.3, + "grad_norm": 72.0801831057563, + "learning_rate": 1.9261121641149414e-05, + "loss": 1.8181, + "step": 24855 + }, + { + "epoch": 0.3, + "grad_norm": 13.41246885970306, + "learning_rate": 1.9260901245191088e-05, + "loss": 1.4413, + "step": 24858 + }, + { + "epoch": 0.3, + "grad_norm": 23.300568227128206, + "learning_rate": 1.926068081762864e-05, + "loss": 1.3029, + "step": 24861 + }, + { + "epoch": 0.3, + "grad_norm": 2.5424791285388104, + "learning_rate": 1.9260460358462826e-05, + "loss": 2.0168, + "step": 24864 + }, + { + "epoch": 0.3, + "grad_norm": 4.733507072464053, + "learning_rate": 1.9260239867694394e-05, + "loss": 1.3287, + "step": 24867 + }, + { + "epoch": 0.3, + "grad_norm": 14.419480047393705, + "learning_rate": 1.92600193453241e-05, + "loss": 1.6056, + "step": 24870 + }, + { + "epoch": 0.3, + "grad_norm": 21.08190425836497, + "learning_rate": 1.9259798791352697e-05, + "loss": 1.3215, + "step": 24873 + }, + { + "epoch": 0.3, + "grad_norm": 70.17855315411161, + "learning_rate": 1.9259578205780936e-05, + "loss": 1.253, + "step": 24876 + }, + { + "epoch": 0.3, + "grad_norm": 17.877672864102514, + "learning_rate": 1.9259357588609568e-05, + "loss": 1.6154, + "step": 24879 + }, + { + "epoch": 0.3, + "grad_norm": 28.385941324467616, + "learning_rate": 1.925913693983935e-05, + "loss": 1.5444, + "step": 24882 + }, + { + "epoch": 0.3, + "grad_norm": 15.18944496435224, + "learning_rate": 1.9258916259471027e-05, + "loss": 1.4041, + "step": 24885 + }, + { + "epoch": 0.3, + "grad_norm": 12.354047019933402, + "learning_rate": 1.9258695547505365e-05, + "loss": 1.2547, + "step": 24888 + }, + { + "epoch": 0.3, + "grad_norm": 25.605504954183576, + "learning_rate": 1.9258474803943105e-05, + "loss": 1.3147, + "step": 24891 + }, + { + "epoch": 0.3, + "grad_norm": 19.99621322815487, + "learning_rate": 1.9258254028785008e-05, + "loss": 1.5372, + "step": 24894 + }, + { + "epoch": 0.3, + "grad_norm": 22.494787925775288, + "learning_rate": 1.9258033222031826e-05, + "loss": 1.5482, + "step": 24897 + }, + { + "epoch": 0.3, + "grad_norm": 28.16107989936529, + "learning_rate": 1.9257812383684305e-05, + "loss": 1.3735, + "step": 24900 + }, + { + "epoch": 0.3, + "grad_norm": 4.406379830995654, + "learning_rate": 1.925759151374321e-05, + "loss": 1.5723, + "step": 24903 + }, + { + "epoch": 0.3, + "grad_norm": 50.103584775135324, + "learning_rate": 1.9257370612209293e-05, + "loss": 1.7428, + "step": 24906 + }, + { + "epoch": 0.3, + "grad_norm": 26.960268871781576, + "learning_rate": 1.92571496790833e-05, + "loss": 1.7125, + "step": 24909 + }, + { + "epoch": 0.3, + "grad_norm": 15.310335542009264, + "learning_rate": 1.925692871436599e-05, + "loss": 1.7251, + "step": 24912 + }, + { + "epoch": 0.3, + "grad_norm": 19.521739773883063, + "learning_rate": 1.9256707718058118e-05, + "loss": 1.4684, + "step": 24915 + }, + { + "epoch": 0.3, + "grad_norm": 8.970212532878332, + "learning_rate": 1.9256486690160436e-05, + "loss": 1.5408, + "step": 24918 + }, + { + "epoch": 0.3, + "grad_norm": 17.730663096834494, + "learning_rate": 1.9256265630673697e-05, + "loss": 1.2924, + "step": 24921 + }, + { + "epoch": 0.3, + "grad_norm": 15.346616643774457, + "learning_rate": 1.9256044539598663e-05, + "loss": 1.6479, + "step": 24924 + }, + { + "epoch": 0.3, + "grad_norm": 16.636251383055914, + "learning_rate": 1.9255823416936077e-05, + "loss": 2.0471, + "step": 24927 + }, + { + "epoch": 0.3, + "grad_norm": 10.38341731056178, + "learning_rate": 1.92556022626867e-05, + "loss": 1.3459, + "step": 24930 + }, + { + "epoch": 0.3, + "grad_norm": 27.917212931716364, + "learning_rate": 1.925538107685129e-05, + "loss": 1.511, + "step": 24933 + }, + { + "epoch": 0.3, + "grad_norm": 12.490243181145129, + "learning_rate": 1.9255159859430595e-05, + "loss": 1.5015, + "step": 24936 + }, + { + "epoch": 0.3, + "grad_norm": 11.583229740982079, + "learning_rate": 1.9254938610425378e-05, + "loss": 1.4468, + "step": 24939 + }, + { + "epoch": 0.3, + "grad_norm": 11.914536471882553, + "learning_rate": 1.925471732983638e-05, + "loss": 1.4447, + "step": 24942 + }, + { + "epoch": 0.3, + "grad_norm": 2.744799501638576, + "learning_rate": 1.9254496017664377e-05, + "loss": 1.4294, + "step": 24945 + }, + { + "epoch": 0.3, + "grad_norm": 54.40801749002176, + "learning_rate": 1.9254274673910104e-05, + "loss": 1.3296, + "step": 24948 + }, + { + "epoch": 0.3, + "grad_norm": 23.127668116883207, + "learning_rate": 1.925405329857433e-05, + "loss": 1.3438, + "step": 24951 + }, + { + "epoch": 0.3, + "grad_norm": 15.032705397910407, + "learning_rate": 1.92538318916578e-05, + "loss": 1.5918, + "step": 24954 + }, + { + "epoch": 0.3, + "grad_norm": 114.36130975465534, + "learning_rate": 1.925361045316128e-05, + "loss": 1.3906, + "step": 24957 + }, + { + "epoch": 0.3, + "grad_norm": 7.184932494250821, + "learning_rate": 1.9253388983085515e-05, + "loss": 1.632, + "step": 24960 + }, + { + "epoch": 0.3, + "grad_norm": 53.31547654585802, + "learning_rate": 1.925316748143127e-05, + "loss": 1.5858, + "step": 24963 + }, + { + "epoch": 0.3, + "grad_norm": 14.560012794136739, + "learning_rate": 1.92529459481993e-05, + "loss": 1.4435, + "step": 24966 + }, + { + "epoch": 0.3, + "grad_norm": 6.4352012225080255, + "learning_rate": 1.9252724383390356e-05, + "loss": 1.5547, + "step": 24969 + }, + { + "epoch": 0.3, + "grad_norm": 11.794502008613192, + "learning_rate": 1.9252502787005197e-05, + "loss": 1.7137, + "step": 24972 + }, + { + "epoch": 0.3, + "grad_norm": 15.042865304861111, + "learning_rate": 1.9252281159044578e-05, + "loss": 1.3432, + "step": 24975 + }, + { + "epoch": 0.3, + "grad_norm": 5.678789839295248, + "learning_rate": 1.9252059499509257e-05, + "loss": 1.5277, + "step": 24978 + }, + { + "epoch": 0.3, + "grad_norm": 5.098908589330353, + "learning_rate": 1.9251837808399987e-05, + "loss": 1.7042, + "step": 24981 + }, + { + "epoch": 0.3, + "grad_norm": 23.687582523030137, + "learning_rate": 1.925161608571753e-05, + "loss": 1.6913, + "step": 24984 + }, + { + "epoch": 0.3, + "grad_norm": 25.253356309104557, + "learning_rate": 1.9251394331462637e-05, + "loss": 1.2972, + "step": 24987 + }, + { + "epoch": 0.3, + "grad_norm": 6.394103575708469, + "learning_rate": 1.925117254563607e-05, + "loss": 1.2605, + "step": 24990 + }, + { + "epoch": 0.3, + "grad_norm": 8.158468310621933, + "learning_rate": 1.925095072823858e-05, + "loss": 1.7101, + "step": 24993 + }, + { + "epoch": 0.3, + "grad_norm": 34.843474800200305, + "learning_rate": 1.925072887927093e-05, + "loss": 1.7023, + "step": 24996 + }, + { + "epoch": 0.3, + "grad_norm": 50.5645331501752, + "learning_rate": 1.9250506998733876e-05, + "loss": 1.8889, + "step": 24999 + }, + { + "epoch": 0.3, + "grad_norm": 9.177385628502424, + "learning_rate": 1.925028508662817e-05, + "loss": 1.5771, + "step": 25002 + }, + { + "epoch": 0.3, + "grad_norm": 30.28913768718312, + "learning_rate": 1.9250063142954573e-05, + "loss": 1.4788, + "step": 25005 + }, + { + "epoch": 0.3, + "grad_norm": 2.993222482780337, + "learning_rate": 1.9249841167713844e-05, + "loss": 1.5347, + "step": 25008 + }, + { + "epoch": 0.3, + "grad_norm": 20.066849962240365, + "learning_rate": 1.9249619160906737e-05, + "loss": 1.4385, + "step": 25011 + }, + { + "epoch": 0.3, + "grad_norm": 6.581117199363832, + "learning_rate": 1.9249397122534016e-05, + "loss": 1.2633, + "step": 25014 + }, + { + "epoch": 0.3, + "grad_norm": 25.859121657853912, + "learning_rate": 1.9249175052596433e-05, + "loss": 1.5709, + "step": 25017 + }, + { + "epoch": 0.3, + "grad_norm": 18.79852049287101, + "learning_rate": 1.9248952951094743e-05, + "loss": 1.1016, + "step": 25020 + }, + { + "epoch": 0.3, + "grad_norm": 4.991115817233499, + "learning_rate": 1.9248730818029712e-05, + "loss": 1.5975, + "step": 25023 + }, + { + "epoch": 0.3, + "grad_norm": 14.21324428803364, + "learning_rate": 1.9248508653402092e-05, + "loss": 1.3574, + "step": 25026 + }, + { + "epoch": 0.3, + "grad_norm": 7.396825348453318, + "learning_rate": 1.9248286457212645e-05, + "loss": 1.6268, + "step": 25029 + }, + { + "epoch": 0.3, + "grad_norm": 190.07005506058343, + "learning_rate": 1.9248064229462128e-05, + "loss": 1.2549, + "step": 25032 + }, + { + "epoch": 0.3, + "grad_norm": 8.302926247233328, + "learning_rate": 1.9247841970151295e-05, + "loss": 1.3825, + "step": 25035 + }, + { + "epoch": 0.3, + "grad_norm": 20.18298840488162, + "learning_rate": 1.924761967928091e-05, + "loss": 1.6066, + "step": 25038 + }, + { + "epoch": 0.3, + "grad_norm": 12.60808078573491, + "learning_rate": 1.924739735685173e-05, + "loss": 1.1107, + "step": 25041 + }, + { + "epoch": 0.3, + "grad_norm": 62.58418073410018, + "learning_rate": 1.9247175002864516e-05, + "loss": 1.9644, + "step": 25044 + }, + { + "epoch": 0.3, + "grad_norm": 3.758507616717517, + "learning_rate": 1.9246952617320024e-05, + "loss": 1.6582, + "step": 25047 + }, + { + "epoch": 0.3, + "grad_norm": 38.61399214680983, + "learning_rate": 1.924673020021901e-05, + "loss": 1.5874, + "step": 25050 + }, + { + "epoch": 0.3, + "grad_norm": 16.07448646742879, + "learning_rate": 1.924650775156224e-05, + "loss": 1.4771, + "step": 25053 + }, + { + "epoch": 0.3, + "grad_norm": 4.1095722658199705, + "learning_rate": 1.924628527135047e-05, + "loss": 1.6529, + "step": 25056 + }, + { + "epoch": 0.3, + "grad_norm": 7.8350543880117405, + "learning_rate": 1.9246062759584457e-05, + "loss": 1.2935, + "step": 25059 + }, + { + "epoch": 0.3, + "grad_norm": 14.596871472613106, + "learning_rate": 1.924584021626496e-05, + "loss": 1.5948, + "step": 25062 + }, + { + "epoch": 0.3, + "grad_norm": 13.10761050821247, + "learning_rate": 1.9245617641392745e-05, + "loss": 1.6798, + "step": 25065 + }, + { + "epoch": 0.3, + "grad_norm": 8.830037484894225, + "learning_rate": 1.9245395034968566e-05, + "loss": 1.4187, + "step": 25068 + }, + { + "epoch": 0.3, + "grad_norm": 12.87528242391329, + "learning_rate": 1.9245172396993182e-05, + "loss": 1.7241, + "step": 25071 + }, + { + "epoch": 0.3, + "grad_norm": 14.204051892270519, + "learning_rate": 1.924494972746736e-05, + "loss": 1.5221, + "step": 25074 + }, + { + "epoch": 0.3, + "grad_norm": 18.03987429796243, + "learning_rate": 1.924472702639185e-05, + "loss": 1.9913, + "step": 25077 + }, + { + "epoch": 0.3, + "grad_norm": 27.378391168405226, + "learning_rate": 1.9244504293767417e-05, + "loss": 1.2258, + "step": 25080 + }, + { + "epoch": 0.3, + "grad_norm": 15.557719146953945, + "learning_rate": 1.924428152959482e-05, + "loss": 1.4654, + "step": 25083 + }, + { + "epoch": 0.3, + "grad_norm": 22.639275827079132, + "learning_rate": 1.9244058733874823e-05, + "loss": 1.8699, + "step": 25086 + }, + { + "epoch": 0.3, + "grad_norm": 12.067555990813917, + "learning_rate": 1.924383590660818e-05, + "loss": 1.606, + "step": 25089 + }, + { + "epoch": 0.3, + "grad_norm": 16.428049252935047, + "learning_rate": 1.9243613047795656e-05, + "loss": 1.3878, + "step": 25092 + }, + { + "epoch": 0.3, + "grad_norm": 24.482699998616123, + "learning_rate": 1.924339015743801e-05, + "loss": 1.5057, + "step": 25095 + }, + { + "epoch": 0.3, + "grad_norm": 39.16194523768682, + "learning_rate": 1.9243167235536006e-05, + "loss": 1.772, + "step": 25098 + }, + { + "epoch": 0.3, + "grad_norm": 13.213161987538731, + "learning_rate": 1.9242944282090397e-05, + "loss": 1.4166, + "step": 25101 + }, + { + "epoch": 0.3, + "grad_norm": 12.893941288235958, + "learning_rate": 1.9242721297101948e-05, + "loss": 1.1597, + "step": 25104 + }, + { + "epoch": 0.3, + "grad_norm": 15.041563081368041, + "learning_rate": 1.9242498280571423e-05, + "loss": 1.5637, + "step": 25107 + }, + { + "epoch": 0.3, + "grad_norm": 13.30435941306286, + "learning_rate": 1.924227523249958e-05, + "loss": 1.4529, + "step": 25110 + }, + { + "epoch": 0.3, + "grad_norm": 21.958569397671827, + "learning_rate": 1.924205215288718e-05, + "loss": 1.587, + "step": 25113 + }, + { + "epoch": 0.3, + "grad_norm": 7.572767263435375, + "learning_rate": 1.9241829041734986e-05, + "loss": 1.6201, + "step": 25116 + }, + { + "epoch": 0.3, + "grad_norm": 14.1892560457674, + "learning_rate": 1.9241605899043758e-05, + "loss": 1.4223, + "step": 25119 + }, + { + "epoch": 0.3, + "grad_norm": 253.7489673303313, + "learning_rate": 1.9241382724814258e-05, + "loss": 1.7242, + "step": 25122 + }, + { + "epoch": 0.3, + "grad_norm": 11.662299371561426, + "learning_rate": 1.9241159519047247e-05, + "loss": 1.4392, + "step": 25125 + }, + { + "epoch": 0.3, + "grad_norm": 6.8944130305995435, + "learning_rate": 1.9240936281743486e-05, + "loss": 1.3314, + "step": 25128 + }, + { + "epoch": 0.3, + "grad_norm": 13.283501919674789, + "learning_rate": 1.9240713012903743e-05, + "loss": 1.4231, + "step": 25131 + }, + { + "epoch": 0.3, + "grad_norm": 10.993593750294426, + "learning_rate": 1.924048971252877e-05, + "loss": 1.7319, + "step": 25134 + }, + { + "epoch": 0.3, + "grad_norm": 15.339640665679989, + "learning_rate": 1.9240266380619337e-05, + "loss": 1.5616, + "step": 25137 + }, + { + "epoch": 0.3, + "grad_norm": 27.387353450349448, + "learning_rate": 1.9240043017176198e-05, + "loss": 1.5918, + "step": 25140 + }, + { + "epoch": 0.3, + "grad_norm": 5.5237163774055995, + "learning_rate": 1.9239819622200127e-05, + "loss": 1.5451, + "step": 25143 + }, + { + "epoch": 0.3, + "grad_norm": 20.75422867556795, + "learning_rate": 1.9239596195691875e-05, + "loss": 1.2445, + "step": 25146 + }, + { + "epoch": 0.3, + "grad_norm": 13.62122341914645, + "learning_rate": 1.9239372737652212e-05, + "loss": 1.6938, + "step": 25149 + }, + { + "epoch": 0.3, + "grad_norm": 7.167238801134173, + "learning_rate": 1.9239149248081897e-05, + "loss": 1.3011, + "step": 25152 + }, + { + "epoch": 0.3, + "grad_norm": 12.68542261206489, + "learning_rate": 1.9238925726981695e-05, + "loss": 1.4736, + "step": 25155 + }, + { + "epoch": 0.3, + "grad_norm": 11.216099735320018, + "learning_rate": 1.9238702174352365e-05, + "loss": 1.508, + "step": 25158 + }, + { + "epoch": 0.3, + "grad_norm": 37.16028785923521, + "learning_rate": 1.9238478590194677e-05, + "loss": 1.5849, + "step": 25161 + }, + { + "epoch": 0.3, + "grad_norm": 19.124111156908036, + "learning_rate": 1.9238254974509382e-05, + "loss": 1.4867, + "step": 25164 + }, + { + "epoch": 0.3, + "grad_norm": 16.78125816621657, + "learning_rate": 1.9238031327297254e-05, + "loss": 1.213, + "step": 25167 + }, + { + "epoch": 0.3, + "grad_norm": 14.282154573136365, + "learning_rate": 1.9237807648559053e-05, + "loss": 1.4819, + "step": 25170 + }, + { + "epoch": 0.3, + "grad_norm": 25.95275206417473, + "learning_rate": 1.9237583938295544e-05, + "loss": 1.6043, + "step": 25173 + }, + { + "epoch": 0.3, + "grad_norm": 19.4418119877307, + "learning_rate": 1.9237360196507484e-05, + "loss": 1.555, + "step": 25176 + }, + { + "epoch": 0.3, + "grad_norm": 27.761169626211878, + "learning_rate": 1.9237136423195642e-05, + "loss": 1.6631, + "step": 25179 + }, + { + "epoch": 0.3, + "grad_norm": 37.826470481139445, + "learning_rate": 1.9236912618360782e-05, + "loss": 1.6174, + "step": 25182 + }, + { + "epoch": 0.3, + "grad_norm": 22.801932883771254, + "learning_rate": 1.9236688782003665e-05, + "loss": 1.4476, + "step": 25185 + }, + { + "epoch": 0.3, + "grad_norm": 15.610829085488676, + "learning_rate": 1.9236464914125058e-05, + "loss": 1.8134, + "step": 25188 + }, + { + "epoch": 0.3, + "grad_norm": 10.619695953403538, + "learning_rate": 1.923624101472572e-05, + "loss": 1.5504, + "step": 25191 + }, + { + "epoch": 0.3, + "grad_norm": 8.945057152010547, + "learning_rate": 1.9236017083806422e-05, + "loss": 1.5329, + "step": 25194 + }, + { + "epoch": 0.3, + "grad_norm": 17.762211715040714, + "learning_rate": 1.9235793121367923e-05, + "loss": 1.6295, + "step": 25197 + }, + { + "epoch": 0.3, + "grad_norm": 8.067126767952535, + "learning_rate": 1.923556912741099e-05, + "loss": 1.422, + "step": 25200 + }, + { + "epoch": 0.3, + "grad_norm": 37.895281753186005, + "learning_rate": 1.9235345101936385e-05, + "loss": 1.3851, + "step": 25203 + }, + { + "epoch": 0.3, + "grad_norm": 23.1913385885148, + "learning_rate": 1.9235121044944875e-05, + "loss": 1.8289, + "step": 25206 + }, + { + "epoch": 0.3, + "grad_norm": 4.467714277810823, + "learning_rate": 1.923489695643722e-05, + "loss": 1.585, + "step": 25209 + }, + { + "epoch": 0.3, + "grad_norm": 11.997014160694299, + "learning_rate": 1.923467283641419e-05, + "loss": 1.479, + "step": 25212 + }, + { + "epoch": 0.3, + "grad_norm": 13.186882942718405, + "learning_rate": 1.9234448684876548e-05, + "loss": 1.2315, + "step": 25215 + }, + { + "epoch": 0.3, + "grad_norm": 16.440478321533078, + "learning_rate": 1.9234224501825058e-05, + "loss": 1.5647, + "step": 25218 + }, + { + "epoch": 0.3, + "grad_norm": 23.91149051952166, + "learning_rate": 1.923400028726049e-05, + "loss": 1.557, + "step": 25221 + }, + { + "epoch": 0.3, + "grad_norm": 11.450336550818474, + "learning_rate": 1.92337760411836e-05, + "loss": 1.7023, + "step": 25224 + }, + { + "epoch": 0.3, + "grad_norm": 206.1490790246728, + "learning_rate": 1.9233551763595158e-05, + "loss": 1.8328, + "step": 25227 + }, + { + "epoch": 0.3, + "grad_norm": 18.472672508253403, + "learning_rate": 1.9233327454495933e-05, + "loss": 1.33, + "step": 25230 + }, + { + "epoch": 0.3, + "grad_norm": 7.362502508397431, + "learning_rate": 1.9233103113886685e-05, + "loss": 1.3151, + "step": 25233 + }, + { + "epoch": 0.3, + "grad_norm": 29.038396742906173, + "learning_rate": 1.9232878741768185e-05, + "loss": 1.8304, + "step": 25236 + }, + { + "epoch": 0.3, + "grad_norm": 7.139863140726302, + "learning_rate": 1.923265433814119e-05, + "loss": 1.7703, + "step": 25239 + }, + { + "epoch": 0.3, + "grad_norm": 9.336031942583762, + "learning_rate": 1.923242990300648e-05, + "loss": 1.5729, + "step": 25242 + }, + { + "epoch": 0.3, + "grad_norm": 42.04241482337536, + "learning_rate": 1.9232205436364807e-05, + "loss": 1.342, + "step": 25245 + }, + { + "epoch": 0.3, + "grad_norm": 9.194443435171532, + "learning_rate": 1.9231980938216942e-05, + "loss": 1.66, + "step": 25248 + }, + { + "epoch": 0.3, + "grad_norm": 15.424179057171395, + "learning_rate": 1.9231756408563653e-05, + "loss": 1.6824, + "step": 25251 + }, + { + "epoch": 0.3, + "grad_norm": 31.83162494662164, + "learning_rate": 1.92315318474057e-05, + "loss": 1.5409, + "step": 25254 + }, + { + "epoch": 0.3, + "grad_norm": 22.873006377270105, + "learning_rate": 1.9231307254743857e-05, + "loss": 1.2531, + "step": 25257 + }, + { + "epoch": 0.3, + "grad_norm": 20.3108028994693, + "learning_rate": 1.923108263057889e-05, + "loss": 1.2231, + "step": 25260 + }, + { + "epoch": 0.3, + "grad_norm": 14.213080231796114, + "learning_rate": 1.923085797491156e-05, + "loss": 1.5898, + "step": 25263 + }, + { + "epoch": 0.3, + "grad_norm": 12.305872007147087, + "learning_rate": 1.923063328774264e-05, + "loss": 1.7478, + "step": 25266 + }, + { + "epoch": 0.3, + "grad_norm": 24.338593445789908, + "learning_rate": 1.923040856907289e-05, + "loss": 1.8267, + "step": 25269 + }, + { + "epoch": 0.3, + "grad_norm": 14.454062881995572, + "learning_rate": 1.9230183818903082e-05, + "loss": 1.8148, + "step": 25272 + }, + { + "epoch": 0.3, + "grad_norm": 26.485078924436742, + "learning_rate": 1.9229959037233978e-05, + "loss": 1.4631, + "step": 25275 + }, + { + "epoch": 0.3, + "grad_norm": 11.386887967632761, + "learning_rate": 1.9229734224066354e-05, + "loss": 1.4915, + "step": 25278 + }, + { + "epoch": 0.3, + "grad_norm": 37.395149825160445, + "learning_rate": 1.9229509379400966e-05, + "loss": 1.601, + "step": 25281 + }, + { + "epoch": 0.3, + "grad_norm": 15.425958871919153, + "learning_rate": 1.922928450323859e-05, + "loss": 1.4944, + "step": 25284 + }, + { + "epoch": 0.3, + "grad_norm": 22.023949691049946, + "learning_rate": 1.9229059595579992e-05, + "loss": 1.4934, + "step": 25287 + }, + { + "epoch": 0.3, + "grad_norm": 53.420979417939094, + "learning_rate": 1.9228834656425934e-05, + "loss": 1.7198, + "step": 25290 + }, + { + "epoch": 0.3, + "grad_norm": 9.490939823379309, + "learning_rate": 1.9228609685777192e-05, + "loss": 1.402, + "step": 25293 + }, + { + "epoch": 0.3, + "grad_norm": 10.087357275883361, + "learning_rate": 1.9228384683634525e-05, + "loss": 1.8389, + "step": 25296 + }, + { + "epoch": 0.3, + "grad_norm": 5.917006126250743, + "learning_rate": 1.922815964999871e-05, + "loss": 1.4183, + "step": 25299 + }, + { + "epoch": 0.3, + "grad_norm": 9.45811439628844, + "learning_rate": 1.9227934584870503e-05, + "loss": 1.529, + "step": 25302 + }, + { + "epoch": 0.3, + "grad_norm": 24.20076516065306, + "learning_rate": 1.9227709488250682e-05, + "loss": 1.5331, + "step": 25305 + }, + { + "epoch": 0.3, + "grad_norm": 12.125928340026272, + "learning_rate": 1.9227484360140013e-05, + "loss": 1.5914, + "step": 25308 + }, + { + "epoch": 0.3, + "grad_norm": 10.416884059720998, + "learning_rate": 1.9227259200539268e-05, + "loss": 1.6069, + "step": 25311 + }, + { + "epoch": 0.3, + "grad_norm": 10.113544598192519, + "learning_rate": 1.9227034009449205e-05, + "loss": 1.4773, + "step": 25314 + }, + { + "epoch": 0.3, + "grad_norm": 30.675562626848325, + "learning_rate": 1.92268087868706e-05, + "loss": 1.6713, + "step": 25317 + }, + { + "epoch": 0.3, + "grad_norm": 7.371142256778871, + "learning_rate": 1.9226583532804222e-05, + "loss": 1.5382, + "step": 25320 + }, + { + "epoch": 0.3, + "grad_norm": 17.222108292677273, + "learning_rate": 1.9226358247250836e-05, + "loss": 1.38, + "step": 25323 + }, + { + "epoch": 0.3, + "grad_norm": 7.24860916922667, + "learning_rate": 1.9226132930211213e-05, + "loss": 1.6836, + "step": 25326 + }, + { + "epoch": 0.3, + "grad_norm": 30.392013063000785, + "learning_rate": 1.922590758168612e-05, + "loss": 1.486, + "step": 25329 + }, + { + "epoch": 0.3, + "grad_norm": 12.874318081119593, + "learning_rate": 1.922568220167633e-05, + "loss": 1.9324, + "step": 25332 + }, + { + "epoch": 0.3, + "grad_norm": 30.13402190558866, + "learning_rate": 1.9225456790182607e-05, + "loss": 1.8656, + "step": 25335 + }, + { + "epoch": 0.3, + "grad_norm": 14.372416727317232, + "learning_rate": 1.9225231347205725e-05, + "loss": 1.7535, + "step": 25338 + }, + { + "epoch": 0.3, + "grad_norm": 51.20082005746678, + "learning_rate": 1.9225005872746455e-05, + "loss": 1.4454, + "step": 25341 + }, + { + "epoch": 0.3, + "grad_norm": 16.015698781875578, + "learning_rate": 1.9224780366805556e-05, + "loss": 1.2325, + "step": 25344 + }, + { + "epoch": 0.3, + "grad_norm": 56.41601245984713, + "learning_rate": 1.922455482938381e-05, + "loss": 1.6162, + "step": 25347 + }, + { + "epoch": 0.3, + "grad_norm": 17.267443912483706, + "learning_rate": 1.9224329260481975e-05, + "loss": 1.1398, + "step": 25350 + }, + { + "epoch": 0.3, + "grad_norm": 6.115759621842021, + "learning_rate": 1.922410366010083e-05, + "loss": 1.5762, + "step": 25353 + }, + { + "epoch": 0.3, + "grad_norm": 14.686824084829036, + "learning_rate": 1.9223878028241143e-05, + "loss": 1.4436, + "step": 25356 + }, + { + "epoch": 0.3, + "grad_norm": 36.26191825215074, + "learning_rate": 1.9223652364903683e-05, + "loss": 1.187, + "step": 25359 + }, + { + "epoch": 0.3, + "grad_norm": 8.572776980284234, + "learning_rate": 1.9223426670089215e-05, + "loss": 1.3725, + "step": 25362 + }, + { + "epoch": 0.31, + "grad_norm": 9.929254995334999, + "learning_rate": 1.9223200943798522e-05, + "loss": 1.4841, + "step": 25365 + }, + { + "epoch": 0.31, + "grad_norm": 37.23943275309203, + "learning_rate": 1.922297518603236e-05, + "loss": 1.7459, + "step": 25368 + }, + { + "epoch": 0.31, + "grad_norm": 49.78810391794201, + "learning_rate": 1.922274939679151e-05, + "loss": 1.7081, + "step": 25371 + }, + { + "epoch": 0.31, + "grad_norm": 4.79931947834479, + "learning_rate": 1.9222523576076734e-05, + "loss": 1.5214, + "step": 25374 + }, + { + "epoch": 0.31, + "grad_norm": 6.981244670836304, + "learning_rate": 1.9222297723888806e-05, + "loss": 1.2528, + "step": 25377 + }, + { + "epoch": 0.31, + "grad_norm": 12.880325098991937, + "learning_rate": 1.9222071840228502e-05, + "loss": 1.8677, + "step": 25380 + }, + { + "epoch": 0.31, + "grad_norm": 13.350589181282352, + "learning_rate": 1.922184592509659e-05, + "loss": 1.3618, + "step": 25383 + }, + { + "epoch": 0.31, + "grad_norm": 11.669425776094203, + "learning_rate": 1.9221619978493834e-05, + "loss": 1.8109, + "step": 25386 + }, + { + "epoch": 0.31, + "grad_norm": 75.89373779291851, + "learning_rate": 1.9221394000421015e-05, + "loss": 1.6637, + "step": 25389 + }, + { + "epoch": 0.31, + "grad_norm": 6.187649213590597, + "learning_rate": 1.9221167990878896e-05, + "loss": 1.6099, + "step": 25392 + }, + { + "epoch": 0.31, + "grad_norm": 8.115442947638314, + "learning_rate": 1.9220941949868256e-05, + "loss": 1.8722, + "step": 25395 + }, + { + "epoch": 0.31, + "grad_norm": 59.45916442283929, + "learning_rate": 1.922071587738986e-05, + "loss": 1.8446, + "step": 25398 + }, + { + "epoch": 0.31, + "grad_norm": 8.134361272048793, + "learning_rate": 1.9220489773444483e-05, + "loss": 1.6093, + "step": 25401 + }, + { + "epoch": 0.31, + "grad_norm": 12.131403588523664, + "learning_rate": 1.9220263638032895e-05, + "loss": 1.5218, + "step": 25404 + }, + { + "epoch": 0.31, + "grad_norm": 23.259456809080426, + "learning_rate": 1.922003747115587e-05, + "loss": 1.4798, + "step": 25407 + }, + { + "epoch": 0.31, + "grad_norm": 13.703730912620953, + "learning_rate": 1.9219811272814176e-05, + "loss": 1.6735, + "step": 25410 + }, + { + "epoch": 0.31, + "grad_norm": 10.954649312435025, + "learning_rate": 1.921958504300859e-05, + "loss": 1.3053, + "step": 25413 + }, + { + "epoch": 0.31, + "grad_norm": 11.956933027679039, + "learning_rate": 1.921935878173988e-05, + "loss": 1.3087, + "step": 25416 + }, + { + "epoch": 0.31, + "grad_norm": 19.35428692867936, + "learning_rate": 1.9219132489008815e-05, + "loss": 1.3645, + "step": 25419 + }, + { + "epoch": 0.31, + "grad_norm": 8.220766427948904, + "learning_rate": 1.9218906164816175e-05, + "loss": 1.0812, + "step": 25422 + }, + { + "epoch": 0.31, + "grad_norm": 34.72141099646814, + "learning_rate": 1.921867980916273e-05, + "loss": 1.4255, + "step": 25425 + }, + { + "epoch": 0.31, + "grad_norm": 42.70300558783507, + "learning_rate": 1.9218453422049247e-05, + "loss": 1.6607, + "step": 25428 + }, + { + "epoch": 0.31, + "grad_norm": 11.40250846446463, + "learning_rate": 1.9218227003476507e-05, + "loss": 1.8027, + "step": 25431 + }, + { + "epoch": 0.31, + "grad_norm": 12.614694141691759, + "learning_rate": 1.921800055344528e-05, + "loss": 1.3873, + "step": 25434 + }, + { + "epoch": 0.31, + "grad_norm": 17.874770110632866, + "learning_rate": 1.9217774071956333e-05, + "loss": 1.2312, + "step": 25437 + }, + { + "epoch": 0.31, + "grad_norm": 6.7877845317595815, + "learning_rate": 1.9217547559010444e-05, + "loss": 1.3853, + "step": 25440 + }, + { + "epoch": 0.31, + "grad_norm": 13.467683525753781, + "learning_rate": 1.921732101460839e-05, + "loss": 1.5499, + "step": 25443 + }, + { + "epoch": 0.31, + "grad_norm": 17.825422560731393, + "learning_rate": 1.9217094438750937e-05, + "loss": 1.5296, + "step": 25446 + }, + { + "epoch": 0.31, + "grad_norm": 41.85414826637504, + "learning_rate": 1.921686783143886e-05, + "loss": 1.3985, + "step": 25449 + }, + { + "epoch": 0.31, + "grad_norm": 14.999908674440965, + "learning_rate": 1.921664119267293e-05, + "loss": 1.5918, + "step": 25452 + }, + { + "epoch": 0.31, + "grad_norm": 16.10239186525359, + "learning_rate": 1.9216414522453926e-05, + "loss": 1.4154, + "step": 25455 + }, + { + "epoch": 0.31, + "grad_norm": 23.36287858074353, + "learning_rate": 1.9216187820782622e-05, + "loss": 1.6382, + "step": 25458 + }, + { + "epoch": 0.31, + "grad_norm": 18.006223565796375, + "learning_rate": 1.9215961087659786e-05, + "loss": 1.4733, + "step": 25461 + }, + { + "epoch": 0.31, + "grad_norm": 23.913056575102647, + "learning_rate": 1.9215734323086192e-05, + "loss": 1.0894, + "step": 25464 + }, + { + "epoch": 0.31, + "grad_norm": 8.33355525070655, + "learning_rate": 1.921550752706262e-05, + "loss": 1.3217, + "step": 25467 + }, + { + "epoch": 0.31, + "grad_norm": 12.796695923142805, + "learning_rate": 1.921528069958984e-05, + "loss": 1.644, + "step": 25470 + }, + { + "epoch": 0.31, + "grad_norm": 15.480269622187532, + "learning_rate": 1.9215053840668623e-05, + "loss": 1.3433, + "step": 25473 + }, + { + "epoch": 0.31, + "grad_norm": 16.69303746727545, + "learning_rate": 1.9214826950299747e-05, + "loss": 1.5059, + "step": 25476 + }, + { + "epoch": 0.31, + "grad_norm": 12.786704927603877, + "learning_rate": 1.921460002848399e-05, + "loss": 1.4119, + "step": 25479 + }, + { + "epoch": 0.31, + "grad_norm": 7.186833261269924, + "learning_rate": 1.921437307522212e-05, + "loss": 1.6252, + "step": 25482 + }, + { + "epoch": 0.31, + "grad_norm": 14.314099215077949, + "learning_rate": 1.9214146090514912e-05, + "loss": 1.324, + "step": 25485 + }, + { + "epoch": 0.31, + "grad_norm": 7.140905067448612, + "learning_rate": 1.9213919074363145e-05, + "loss": 1.0042, + "step": 25488 + }, + { + "epoch": 0.31, + "grad_norm": 17.1613062844411, + "learning_rate": 1.9213692026767586e-05, + "loss": 1.6733, + "step": 25491 + }, + { + "epoch": 0.31, + "grad_norm": 25.67745439397205, + "learning_rate": 1.921346494772902e-05, + "loss": 1.2819, + "step": 25494 + }, + { + "epoch": 0.31, + "grad_norm": 12.402579368389485, + "learning_rate": 1.9213237837248215e-05, + "loss": 1.22, + "step": 25497 + }, + { + "epoch": 0.31, + "grad_norm": 11.18117908908293, + "learning_rate": 1.921301069532595e-05, + "loss": 1.4362, + "step": 25500 + }, + { + "epoch": 0.31, + "grad_norm": 4.716478701167527, + "learning_rate": 1.9212783521962993e-05, + "loss": 1.6914, + "step": 25503 + }, + { + "epoch": 0.31, + "grad_norm": 8.126919204478176, + "learning_rate": 1.921255631716013e-05, + "loss": 1.3787, + "step": 25506 + }, + { + "epoch": 0.31, + "grad_norm": 14.083344254936303, + "learning_rate": 1.921232908091813e-05, + "loss": 1.6307, + "step": 25509 + }, + { + "epoch": 0.31, + "grad_norm": 141.95646040032975, + "learning_rate": 1.9212101813237764e-05, + "loss": 1.3153, + "step": 25512 + }, + { + "epoch": 0.31, + "grad_norm": 9.716550919836195, + "learning_rate": 1.9211874514119814e-05, + "loss": 1.2479, + "step": 25515 + }, + { + "epoch": 0.31, + "grad_norm": 7.379704105582572, + "learning_rate": 1.921164718356506e-05, + "loss": 1.9151, + "step": 25518 + }, + { + "epoch": 0.31, + "grad_norm": 24.243837269197936, + "learning_rate": 1.9211419821574267e-05, + "loss": 1.5479, + "step": 25521 + }, + { + "epoch": 0.31, + "grad_norm": 7.611175835037976, + "learning_rate": 1.9211192428148217e-05, + "loss": 1.5572, + "step": 25524 + }, + { + "epoch": 0.31, + "grad_norm": 18.282476937942874, + "learning_rate": 1.9210965003287688e-05, + "loss": 1.6903, + "step": 25527 + }, + { + "epoch": 0.31, + "grad_norm": 16.110330811531984, + "learning_rate": 1.9210737546993447e-05, + "loss": 1.5084, + "step": 25530 + }, + { + "epoch": 0.31, + "grad_norm": 2.448224323827305, + "learning_rate": 1.921051005926628e-05, + "loss": 1.2537, + "step": 25533 + }, + { + "epoch": 0.31, + "grad_norm": 28.509308258968833, + "learning_rate": 1.9210282540106963e-05, + "loss": 1.3671, + "step": 25536 + }, + { + "epoch": 0.31, + "grad_norm": 14.290329383185446, + "learning_rate": 1.9210054989516264e-05, + "loss": 1.2577, + "step": 25539 + }, + { + "epoch": 0.31, + "grad_norm": 12.422718311913915, + "learning_rate": 1.9209827407494968e-05, + "loss": 1.7467, + "step": 25542 + }, + { + "epoch": 0.31, + "grad_norm": 20.7607480458031, + "learning_rate": 1.9209599794043847e-05, + "loss": 1.9006, + "step": 25545 + }, + { + "epoch": 0.31, + "grad_norm": 26.719309469320898, + "learning_rate": 1.9209372149163677e-05, + "loss": 1.6527, + "step": 25548 + }, + { + "epoch": 0.31, + "grad_norm": 9.730592822573662, + "learning_rate": 1.920914447285524e-05, + "loss": 1.6697, + "step": 25551 + }, + { + "epoch": 0.31, + "grad_norm": 30.227440372543384, + "learning_rate": 1.9208916765119314e-05, + "loss": 1.5255, + "step": 25554 + }, + { + "epoch": 0.31, + "grad_norm": 89.28733543113854, + "learning_rate": 1.9208689025956665e-05, + "loss": 1.5794, + "step": 25557 + }, + { + "epoch": 0.31, + "grad_norm": 8.158860769880127, + "learning_rate": 1.920846125536808e-05, + "loss": 1.1605, + "step": 25560 + }, + { + "epoch": 0.31, + "grad_norm": 22.14744823374875, + "learning_rate": 1.9208233453354335e-05, + "loss": 1.6411, + "step": 25563 + }, + { + "epoch": 0.31, + "grad_norm": 21.444408686088366, + "learning_rate": 1.9208005619916204e-05, + "loss": 1.6639, + "step": 25566 + }, + { + "epoch": 0.31, + "grad_norm": 14.438089972578819, + "learning_rate": 1.920777775505447e-05, + "loss": 1.6785, + "step": 25569 + }, + { + "epoch": 0.31, + "grad_norm": 29.60478910110786, + "learning_rate": 1.92075498587699e-05, + "loss": 1.6903, + "step": 25572 + }, + { + "epoch": 0.31, + "grad_norm": 6.437396069091468, + "learning_rate": 1.9207321931063287e-05, + "loss": 1.1239, + "step": 25575 + }, + { + "epoch": 0.31, + "grad_norm": 18.02959422527472, + "learning_rate": 1.9207093971935394e-05, + "loss": 1.7147, + "step": 25578 + }, + { + "epoch": 0.31, + "grad_norm": 5.008705417560644, + "learning_rate": 1.920686598138701e-05, + "loss": 1.4022, + "step": 25581 + }, + { + "epoch": 0.31, + "grad_norm": 41.07358387771141, + "learning_rate": 1.9206637959418906e-05, + "loss": 1.1862, + "step": 25584 + }, + { + "epoch": 0.31, + "grad_norm": 8.90619794418049, + "learning_rate": 1.9206409906031863e-05, + "loss": 1.2367, + "step": 25587 + }, + { + "epoch": 0.31, + "grad_norm": 45.41952831916919, + "learning_rate": 1.9206181821226663e-05, + "loss": 1.3725, + "step": 25590 + }, + { + "epoch": 0.31, + "grad_norm": 17.46699124019456, + "learning_rate": 1.920595370500408e-05, + "loss": 1.6339, + "step": 25593 + }, + { + "epoch": 0.31, + "grad_norm": 19.93812557549522, + "learning_rate": 1.920572555736489e-05, + "loss": 1.7165, + "step": 25596 + }, + { + "epoch": 0.31, + "grad_norm": 10.594903113989838, + "learning_rate": 1.9205497378309874e-05, + "loss": 1.5547, + "step": 25599 + }, + { + "epoch": 0.31, + "grad_norm": 41.12150128866691, + "learning_rate": 1.920526916783981e-05, + "loss": 1.6113, + "step": 25602 + }, + { + "epoch": 0.31, + "grad_norm": 45.37434679946161, + "learning_rate": 1.9205040925955485e-05, + "loss": 1.6593, + "step": 25605 + }, + { + "epoch": 0.31, + "grad_norm": 6.529430644593026, + "learning_rate": 1.9204812652657665e-05, + "loss": 1.707, + "step": 25608 + }, + { + "epoch": 0.31, + "grad_norm": 36.23771755747106, + "learning_rate": 1.9204584347947137e-05, + "loss": 1.5346, + "step": 25611 + }, + { + "epoch": 0.31, + "grad_norm": 6.699904907042189, + "learning_rate": 1.920435601182468e-05, + "loss": 1.5287, + "step": 25614 + }, + { + "epoch": 0.31, + "grad_norm": 7.55000007343832, + "learning_rate": 1.9204127644291066e-05, + "loss": 1.3996, + "step": 25617 + }, + { + "epoch": 0.31, + "grad_norm": 48.747202095377155, + "learning_rate": 1.9203899245347083e-05, + "loss": 1.5168, + "step": 25620 + }, + { + "epoch": 0.31, + "grad_norm": 35.242647559893165, + "learning_rate": 1.9203670814993508e-05, + "loss": 1.5603, + "step": 25623 + }, + { + "epoch": 0.31, + "grad_norm": 14.628522703709411, + "learning_rate": 1.9203442353231118e-05, + "loss": 1.3122, + "step": 25626 + }, + { + "epoch": 0.31, + "grad_norm": 4.197176466001104, + "learning_rate": 1.9203213860060695e-05, + "loss": 1.4917, + "step": 25629 + }, + { + "epoch": 0.31, + "grad_norm": 8.426569532235733, + "learning_rate": 1.9202985335483016e-05, + "loss": 1.5931, + "step": 25632 + }, + { + "epoch": 0.31, + "grad_norm": 15.1447320657865, + "learning_rate": 1.9202756779498866e-05, + "loss": 1.4885, + "step": 25635 + }, + { + "epoch": 0.31, + "grad_norm": 34.674877599352236, + "learning_rate": 1.9202528192109022e-05, + "loss": 1.6235, + "step": 25638 + }, + { + "epoch": 0.31, + "grad_norm": 4.246532750577673, + "learning_rate": 1.9202299573314262e-05, + "loss": 1.4861, + "step": 25641 + }, + { + "epoch": 0.31, + "grad_norm": 29.13456629899962, + "learning_rate": 1.920207092311537e-05, + "loss": 1.43, + "step": 25644 + }, + { + "epoch": 0.31, + "grad_norm": 9.456297052204777, + "learning_rate": 1.9201842241513123e-05, + "loss": 1.4268, + "step": 25647 + }, + { + "epoch": 0.31, + "grad_norm": 50.92052403844368, + "learning_rate": 1.9201613528508303e-05, + "loss": 1.5252, + "step": 25650 + }, + { + "epoch": 0.31, + "grad_norm": 10.85970437062563, + "learning_rate": 1.920138478410169e-05, + "loss": 1.7853, + "step": 25653 + }, + { + "epoch": 0.31, + "grad_norm": 5.306708826070051, + "learning_rate": 1.9201156008294063e-05, + "loss": 1.5908, + "step": 25656 + }, + { + "epoch": 0.31, + "grad_norm": 32.2011956625809, + "learning_rate": 1.920092720108621e-05, + "loss": 1.3261, + "step": 25659 + }, + { + "epoch": 0.31, + "grad_norm": 27.435651122579262, + "learning_rate": 1.9200698362478905e-05, + "loss": 1.597, + "step": 25662 + }, + { + "epoch": 0.31, + "grad_norm": 16.836391530293977, + "learning_rate": 1.920046949247293e-05, + "loss": 1.6028, + "step": 25665 + }, + { + "epoch": 0.31, + "grad_norm": 8.925085367492018, + "learning_rate": 1.9200240591069066e-05, + "loss": 1.7715, + "step": 25668 + }, + { + "epoch": 0.31, + "grad_norm": 27.50647067390556, + "learning_rate": 1.9200011658268093e-05, + "loss": 1.719, + "step": 25671 + }, + { + "epoch": 0.31, + "grad_norm": 30.946073197027825, + "learning_rate": 1.9199782694070797e-05, + "loss": 1.7062, + "step": 25674 + }, + { + "epoch": 0.31, + "grad_norm": 10.995461063312183, + "learning_rate": 1.9199553698477954e-05, + "loss": 1.6224, + "step": 25677 + }, + { + "epoch": 0.31, + "grad_norm": 34.90704580789946, + "learning_rate": 1.9199324671490353e-05, + "loss": 1.7185, + "step": 25680 + }, + { + "epoch": 0.31, + "grad_norm": 10.638233106804266, + "learning_rate": 1.9199095613108766e-05, + "loss": 1.6721, + "step": 25683 + }, + { + "epoch": 0.31, + "grad_norm": 22.409910223259303, + "learning_rate": 1.919886652333398e-05, + "loss": 1.5316, + "step": 25686 + }, + { + "epoch": 0.31, + "grad_norm": 21.584406323736534, + "learning_rate": 1.919863740216678e-05, + "loss": 1.439, + "step": 25689 + }, + { + "epoch": 0.31, + "grad_norm": 6.14432397921718, + "learning_rate": 1.919840824960794e-05, + "loss": 1.4459, + "step": 25692 + }, + { + "epoch": 0.31, + "grad_norm": 6.022074908467729, + "learning_rate": 1.9198179065658245e-05, + "loss": 1.3271, + "step": 25695 + }, + { + "epoch": 0.31, + "grad_norm": 10.82977921697294, + "learning_rate": 1.9197949850318478e-05, + "loss": 1.3451, + "step": 25698 + }, + { + "epoch": 0.31, + "grad_norm": 22.55536166156954, + "learning_rate": 1.919772060358942e-05, + "loss": 1.3766, + "step": 25701 + }, + { + "epoch": 0.31, + "grad_norm": 28.362410174654315, + "learning_rate": 1.9197491325471857e-05, + "loss": 1.573, + "step": 25704 + }, + { + "epoch": 0.31, + "grad_norm": 5.186234519657052, + "learning_rate": 1.919726201596657e-05, + "loss": 1.567, + "step": 25707 + }, + { + "epoch": 0.31, + "grad_norm": 15.766249769009363, + "learning_rate": 1.919703267507434e-05, + "loss": 1.4773, + "step": 25710 + }, + { + "epoch": 0.31, + "grad_norm": 61.26140695275089, + "learning_rate": 1.919680330279595e-05, + "loss": 1.6248, + "step": 25713 + }, + { + "epoch": 0.31, + "grad_norm": 8.875836407575758, + "learning_rate": 1.9196573899132183e-05, + "loss": 1.7842, + "step": 25716 + }, + { + "epoch": 0.31, + "grad_norm": 6.527584805366689, + "learning_rate": 1.919634446408382e-05, + "loss": 1.3351, + "step": 25719 + }, + { + "epoch": 0.31, + "grad_norm": 10.305657378498768, + "learning_rate": 1.9196114997651647e-05, + "loss": 1.9115, + "step": 25722 + }, + { + "epoch": 0.31, + "grad_norm": 10.996878104460865, + "learning_rate": 1.9195885499836445e-05, + "loss": 1.4626, + "step": 25725 + }, + { + "epoch": 0.31, + "grad_norm": 15.914293317995307, + "learning_rate": 1.9195655970639e-05, + "loss": 1.1895, + "step": 25728 + }, + { + "epoch": 0.31, + "grad_norm": 17.340111874699705, + "learning_rate": 1.9195426410060095e-05, + "loss": 1.2997, + "step": 25731 + }, + { + "epoch": 0.31, + "grad_norm": 8.824366222374517, + "learning_rate": 1.919519681810051e-05, + "loss": 1.4718, + "step": 25734 + }, + { + "epoch": 0.31, + "grad_norm": 20.81361120625453, + "learning_rate": 1.9194967194761028e-05, + "loss": 1.6067, + "step": 25737 + }, + { + "epoch": 0.31, + "grad_norm": 16.723574980434215, + "learning_rate": 1.919473754004244e-05, + "loss": 1.4675, + "step": 25740 + }, + { + "epoch": 0.31, + "grad_norm": 5.075526285353388, + "learning_rate": 1.9194507853945522e-05, + "loss": 1.5446, + "step": 25743 + }, + { + "epoch": 0.31, + "grad_norm": 5.443853835635914, + "learning_rate": 1.919427813647106e-05, + "loss": 1.5448, + "step": 25746 + }, + { + "epoch": 0.31, + "grad_norm": 10.021978495278965, + "learning_rate": 1.9194048387619838e-05, + "loss": 1.5316, + "step": 25749 + }, + { + "epoch": 0.31, + "grad_norm": 10.158074293260977, + "learning_rate": 1.919381860739264e-05, + "loss": 1.2544, + "step": 25752 + }, + { + "epoch": 0.31, + "grad_norm": 6.662674061011893, + "learning_rate": 1.9193588795790252e-05, + "loss": 1.5808, + "step": 25755 + }, + { + "epoch": 0.31, + "grad_norm": 13.143947332334314, + "learning_rate": 1.9193358952813457e-05, + "loss": 1.4964, + "step": 25758 + }, + { + "epoch": 0.31, + "grad_norm": 9.239580973387442, + "learning_rate": 1.919312907846304e-05, + "loss": 1.4494, + "step": 25761 + }, + { + "epoch": 0.31, + "grad_norm": 17.133588433792998, + "learning_rate": 1.9192899172739784e-05, + "loss": 1.4924, + "step": 25764 + }, + { + "epoch": 0.31, + "grad_norm": 9.67506199811262, + "learning_rate": 1.9192669235644473e-05, + "loss": 1.4905, + "step": 25767 + }, + { + "epoch": 0.31, + "grad_norm": 21.815152450376907, + "learning_rate": 1.9192439267177894e-05, + "loss": 1.4348, + "step": 25770 + }, + { + "epoch": 0.31, + "grad_norm": 15.43858621974648, + "learning_rate": 1.9192209267340834e-05, + "loss": 2.0411, + "step": 25773 + }, + { + "epoch": 0.31, + "grad_norm": 7.514348649209588, + "learning_rate": 1.9191979236134074e-05, + "loss": 1.2095, + "step": 25776 + }, + { + "epoch": 0.31, + "grad_norm": 7.0013322822345225, + "learning_rate": 1.9191749173558395e-05, + "loss": 1.7678, + "step": 25779 + }, + { + "epoch": 0.31, + "grad_norm": 27.56122674608973, + "learning_rate": 1.9191519079614588e-05, + "loss": 1.8669, + "step": 25782 + }, + { + "epoch": 0.31, + "grad_norm": 3.641263008098378, + "learning_rate": 1.919128895430344e-05, + "loss": 1.7327, + "step": 25785 + }, + { + "epoch": 0.31, + "grad_norm": 10.022001383583643, + "learning_rate": 1.919105879762573e-05, + "loss": 1.3221, + "step": 25788 + }, + { + "epoch": 0.31, + "grad_norm": 12.375856204726798, + "learning_rate": 1.9190828609582252e-05, + "loss": 1.8392, + "step": 25791 + }, + { + "epoch": 0.31, + "grad_norm": 16.392381351833762, + "learning_rate": 1.9190598390173787e-05, + "loss": 1.5006, + "step": 25794 + }, + { + "epoch": 0.31, + "grad_norm": 16.318337448243458, + "learning_rate": 1.9190368139401116e-05, + "loss": 1.3501, + "step": 25797 + }, + { + "epoch": 0.31, + "grad_norm": 11.697678241207424, + "learning_rate": 1.919013785726503e-05, + "loss": 1.1746, + "step": 25800 + }, + { + "epoch": 0.31, + "grad_norm": 5.291500917008377, + "learning_rate": 1.9189907543766316e-05, + "loss": 1.5387, + "step": 25803 + }, + { + "epoch": 0.31, + "grad_norm": 4.6302287152299195, + "learning_rate": 1.9189677198905753e-05, + "loss": 1.3133, + "step": 25806 + }, + { + "epoch": 0.31, + "grad_norm": 12.052930555141128, + "learning_rate": 1.9189446822684134e-05, + "loss": 1.4335, + "step": 25809 + }, + { + "epoch": 0.31, + "grad_norm": 137.28851691587948, + "learning_rate": 1.9189216415102244e-05, + "loss": 1.7485, + "step": 25812 + }, + { + "epoch": 0.31, + "grad_norm": 7.961933671592162, + "learning_rate": 1.9188985976160868e-05, + "loss": 1.5727, + "step": 25815 + }, + { + "epoch": 0.31, + "grad_norm": 33.84487830454512, + "learning_rate": 1.9188755505860794e-05, + "loss": 1.3974, + "step": 25818 + }, + { + "epoch": 0.31, + "grad_norm": 14.544497890353078, + "learning_rate": 1.9188525004202804e-05, + "loss": 1.3581, + "step": 25821 + }, + { + "epoch": 0.31, + "grad_norm": 4.120736885308009, + "learning_rate": 1.918829447118769e-05, + "loss": 1.738, + "step": 25824 + }, + { + "epoch": 0.31, + "grad_norm": 24.770109381914697, + "learning_rate": 1.9188063906816237e-05, + "loss": 1.2547, + "step": 25827 + }, + { + "epoch": 0.31, + "grad_norm": 24.18041779317204, + "learning_rate": 1.918783331108923e-05, + "loss": 1.2669, + "step": 25830 + }, + { + "epoch": 0.31, + "grad_norm": 9.36483989420706, + "learning_rate": 1.9187602684007455e-05, + "loss": 1.523, + "step": 25833 + }, + { + "epoch": 0.31, + "grad_norm": 34.35051494071137, + "learning_rate": 1.9187372025571703e-05, + "loss": 1.5415, + "step": 25836 + }, + { + "epoch": 0.31, + "grad_norm": 26.11564790529708, + "learning_rate": 1.9187141335782763e-05, + "loss": 1.8448, + "step": 25839 + }, + { + "epoch": 0.31, + "grad_norm": 49.67165301068442, + "learning_rate": 1.918691061464142e-05, + "loss": 1.3183, + "step": 25842 + }, + { + "epoch": 0.31, + "grad_norm": 16.551398482570942, + "learning_rate": 1.9186679862148453e-05, + "loss": 1.804, + "step": 25845 + }, + { + "epoch": 0.31, + "grad_norm": 4.549826385017852, + "learning_rate": 1.918644907830466e-05, + "loss": 1.3086, + "step": 25848 + }, + { + "epoch": 0.31, + "grad_norm": 9.695787495568545, + "learning_rate": 1.9186218263110828e-05, + "loss": 1.3812, + "step": 25851 + }, + { + "epoch": 0.31, + "grad_norm": 7.84452976135831, + "learning_rate": 1.9185987416567737e-05, + "loss": 1.3419, + "step": 25854 + }, + { + "epoch": 0.31, + "grad_norm": 19.83498322734461, + "learning_rate": 1.9185756538676185e-05, + "loss": 1.83, + "step": 25857 + }, + { + "epoch": 0.31, + "grad_norm": 15.316449879397457, + "learning_rate": 1.9185525629436952e-05, + "loss": 1.2792, + "step": 25860 + }, + { + "epoch": 0.31, + "grad_norm": 10.104010518588682, + "learning_rate": 1.918529468885083e-05, + "loss": 1.6271, + "step": 25863 + }, + { + "epoch": 0.31, + "grad_norm": 12.429949117982725, + "learning_rate": 1.9185063716918603e-05, + "loss": 1.4165, + "step": 25866 + }, + { + "epoch": 0.31, + "grad_norm": 47.63807900827209, + "learning_rate": 1.918483271364106e-05, + "loss": 1.6081, + "step": 25869 + }, + { + "epoch": 0.31, + "grad_norm": 17.866920539845154, + "learning_rate": 1.9184601679018997e-05, + "loss": 1.4077, + "step": 25872 + }, + { + "epoch": 0.31, + "grad_norm": 14.813157496338503, + "learning_rate": 1.9184370613053198e-05, + "loss": 1.4469, + "step": 25875 + }, + { + "epoch": 0.31, + "grad_norm": 17.571043860663973, + "learning_rate": 1.9184139515744445e-05, + "loss": 1.4741, + "step": 25878 + }, + { + "epoch": 0.31, + "grad_norm": 6.642915450314432, + "learning_rate": 1.918390838709353e-05, + "loss": 1.5648, + "step": 25881 + }, + { + "epoch": 0.31, + "grad_norm": 28.734011456738468, + "learning_rate": 1.918367722710125e-05, + "loss": 1.5209, + "step": 25884 + }, + { + "epoch": 0.31, + "grad_norm": 14.674702871397518, + "learning_rate": 1.918344603576838e-05, + "loss": 1.491, + "step": 25887 + }, + { + "epoch": 0.31, + "grad_norm": 15.05560730985504, + "learning_rate": 1.9183214813095726e-05, + "loss": 1.9922, + "step": 25890 + }, + { + "epoch": 0.31, + "grad_norm": 25.924069534568797, + "learning_rate": 1.9182983559084062e-05, + "loss": 1.3611, + "step": 25893 + }, + { + "epoch": 0.31, + "grad_norm": 16.59555794057668, + "learning_rate": 1.9182752273734185e-05, + "loss": 1.5806, + "step": 25896 + }, + { + "epoch": 0.31, + "grad_norm": 15.738835819531124, + "learning_rate": 1.918252095704688e-05, + "loss": 1.5091, + "step": 25899 + }, + { + "epoch": 0.31, + "grad_norm": 13.149554638744032, + "learning_rate": 1.9182289609022938e-05, + "loss": 1.2207, + "step": 25902 + }, + { + "epoch": 0.31, + "grad_norm": 2.961321896957182, + "learning_rate": 1.918205822966315e-05, + "loss": 1.4032, + "step": 25905 + }, + { + "epoch": 0.31, + "grad_norm": 32.07601959896473, + "learning_rate": 1.9181826818968307e-05, + "loss": 1.3543, + "step": 25908 + }, + { + "epoch": 0.31, + "grad_norm": 15.271224767661804, + "learning_rate": 1.9181595376939195e-05, + "loss": 1.4058, + "step": 25911 + }, + { + "epoch": 0.31, + "grad_norm": 6.4752895404636055, + "learning_rate": 1.9181363903576606e-05, + "loss": 1.5926, + "step": 25914 + }, + { + "epoch": 0.31, + "grad_norm": 6.25634108022108, + "learning_rate": 1.9181132398881326e-05, + "loss": 1.5805, + "step": 25917 + }, + { + "epoch": 0.31, + "grad_norm": 15.747455601098084, + "learning_rate": 1.9180900862854152e-05, + "loss": 1.7051, + "step": 25920 + }, + { + "epoch": 0.31, + "grad_norm": 7.938118867684391, + "learning_rate": 1.9180669295495866e-05, + "loss": 1.7508, + "step": 25923 + }, + { + "epoch": 0.31, + "grad_norm": 5.707212375978411, + "learning_rate": 1.9180437696807264e-05, + "loss": 1.6792, + "step": 25926 + }, + { + "epoch": 0.31, + "grad_norm": 13.70416607296851, + "learning_rate": 1.9180206066789137e-05, + "loss": 1.6185, + "step": 25929 + }, + { + "epoch": 0.31, + "grad_norm": 12.824307322610505, + "learning_rate": 1.9179974405442273e-05, + "loss": 1.2823, + "step": 25932 + }, + { + "epoch": 0.31, + "grad_norm": 5.912723065314589, + "learning_rate": 1.917974271276746e-05, + "loss": 1.6071, + "step": 25935 + }, + { + "epoch": 0.31, + "grad_norm": 4.942617701765461, + "learning_rate": 1.9179510988765495e-05, + "loss": 1.6295, + "step": 25938 + }, + { + "epoch": 0.31, + "grad_norm": 9.20914899532676, + "learning_rate": 1.9179279233437165e-05, + "loss": 1.4794, + "step": 25941 + }, + { + "epoch": 0.31, + "grad_norm": 25.835098495353773, + "learning_rate": 1.9179047446783258e-05, + "loss": 1.3418, + "step": 25944 + }, + { + "epoch": 0.31, + "grad_norm": 29.23465282116614, + "learning_rate": 1.9178815628804572e-05, + "loss": 1.3971, + "step": 25947 + }, + { + "epoch": 0.31, + "grad_norm": 26.39845324845193, + "learning_rate": 1.9178583779501893e-05, + "loss": 1.2475, + "step": 25950 + }, + { + "epoch": 0.31, + "grad_norm": 11.458322343950067, + "learning_rate": 1.9178351898876014e-05, + "loss": 1.6271, + "step": 25953 + }, + { + "epoch": 0.31, + "grad_norm": 32.46403736114479, + "learning_rate": 1.9178119986927727e-05, + "loss": 1.6817, + "step": 25956 + }, + { + "epoch": 0.31, + "grad_norm": 8.66135422264557, + "learning_rate": 1.9177888043657818e-05, + "loss": 1.7834, + "step": 25959 + }, + { + "epoch": 0.31, + "grad_norm": 9.852685353599583, + "learning_rate": 1.9177656069067088e-05, + "loss": 1.5414, + "step": 25962 + }, + { + "epoch": 0.31, + "grad_norm": 35.487041821100384, + "learning_rate": 1.9177424063156324e-05, + "loss": 1.6017, + "step": 25965 + }, + { + "epoch": 0.31, + "grad_norm": 23.1069995712759, + "learning_rate": 1.9177192025926315e-05, + "loss": 1.1463, + "step": 25968 + }, + { + "epoch": 0.31, + "grad_norm": 4.31556474289222, + "learning_rate": 1.9176959957377854e-05, + "loss": 1.5735, + "step": 25971 + }, + { + "epoch": 0.31, + "grad_norm": 27.020168319724522, + "learning_rate": 1.9176727857511737e-05, + "loss": 1.469, + "step": 25974 + }, + { + "epoch": 0.31, + "grad_norm": 12.006743179265513, + "learning_rate": 1.917649572632875e-05, + "loss": 1.4529, + "step": 25977 + }, + { + "epoch": 0.31, + "grad_norm": 18.908997235916825, + "learning_rate": 1.9176263563829694e-05, + "loss": 1.4747, + "step": 25980 + }, + { + "epoch": 0.31, + "grad_norm": 16.7643209873589, + "learning_rate": 1.9176031370015347e-05, + "loss": 1.7559, + "step": 25983 + }, + { + "epoch": 0.31, + "grad_norm": 28.948643169029932, + "learning_rate": 1.917579914488652e-05, + "loss": 1.4104, + "step": 25986 + }, + { + "epoch": 0.31, + "grad_norm": 10.745866840770077, + "learning_rate": 1.917556688844399e-05, + "loss": 1.2922, + "step": 25989 + }, + { + "epoch": 0.31, + "grad_norm": 22.20809411258551, + "learning_rate": 1.9175334600688555e-05, + "loss": 1.5062, + "step": 25992 + }, + { + "epoch": 0.31, + "grad_norm": 9.155597301278723, + "learning_rate": 1.917510228162101e-05, + "loss": 1.7938, + "step": 25995 + }, + { + "epoch": 0.31, + "grad_norm": 6.154102046368241, + "learning_rate": 1.9174869931242144e-05, + "loss": 1.7117, + "step": 25998 + }, + { + "epoch": 0.31, + "grad_norm": 10.835755304557615, + "learning_rate": 1.9174637549552752e-05, + "loss": 1.3168, + "step": 26001 + }, + { + "epoch": 0.31, + "grad_norm": 31.158521694151275, + "learning_rate": 1.9174405136553628e-05, + "loss": 1.4424, + "step": 26004 + }, + { + "epoch": 0.31, + "grad_norm": 18.014444938687376, + "learning_rate": 1.9174172692245564e-05, + "loss": 1.6694, + "step": 26007 + }, + { + "epoch": 0.31, + "grad_norm": 8.605301049650773, + "learning_rate": 1.9173940216629353e-05, + "loss": 1.5876, + "step": 26010 + }, + { + "epoch": 0.31, + "grad_norm": 34.826306911696044, + "learning_rate": 1.9173707709705787e-05, + "loss": 1.2828, + "step": 26013 + }, + { + "epoch": 0.31, + "grad_norm": 26.041163894068255, + "learning_rate": 1.917347517147566e-05, + "loss": 1.4019, + "step": 26016 + }, + { + "epoch": 0.31, + "grad_norm": 7.619867667883384, + "learning_rate": 1.917324260193977e-05, + "loss": 1.7995, + "step": 26019 + }, + { + "epoch": 0.31, + "grad_norm": 22.099289860362862, + "learning_rate": 1.9173010001098905e-05, + "loss": 1.6208, + "step": 26022 + }, + { + "epoch": 0.31, + "grad_norm": 11.01832547444884, + "learning_rate": 1.917277736895386e-05, + "loss": 1.2701, + "step": 26025 + }, + { + "epoch": 0.31, + "grad_norm": 12.485298866295638, + "learning_rate": 1.9172544705505434e-05, + "loss": 1.5164, + "step": 26028 + }, + { + "epoch": 0.31, + "grad_norm": 12.706341699984455, + "learning_rate": 1.9172312010754414e-05, + "loss": 1.4291, + "step": 26031 + }, + { + "epoch": 0.31, + "grad_norm": 13.114203578547395, + "learning_rate": 1.9172079284701595e-05, + "loss": 1.5507, + "step": 26034 + }, + { + "epoch": 0.31, + "grad_norm": 5.769081429668313, + "learning_rate": 1.9171846527347776e-05, + "loss": 1.7097, + "step": 26037 + }, + { + "epoch": 0.31, + "grad_norm": 6.564362937845925, + "learning_rate": 1.9171613738693747e-05, + "loss": 1.1866, + "step": 26040 + }, + { + "epoch": 0.31, + "grad_norm": 25.751555615148032, + "learning_rate": 1.9171380918740306e-05, + "loss": 1.6439, + "step": 26043 + }, + { + "epoch": 0.31, + "grad_norm": 13.278535393440789, + "learning_rate": 1.9171148067488243e-05, + "loss": 1.4294, + "step": 26046 + }, + { + "epoch": 0.31, + "grad_norm": 7.8969887550458475, + "learning_rate": 1.9170915184938356e-05, + "loss": 1.4113, + "step": 26049 + }, + { + "epoch": 0.31, + "grad_norm": 12.363628621386455, + "learning_rate": 1.917068227109144e-05, + "loss": 1.5601, + "step": 26052 + }, + { + "epoch": 0.31, + "grad_norm": 28.157590644760276, + "learning_rate": 1.917044932594829e-05, + "loss": 1.5594, + "step": 26055 + }, + { + "epoch": 0.31, + "grad_norm": 3.836164987035659, + "learning_rate": 1.9170216349509693e-05, + "loss": 1.325, + "step": 26058 + }, + { + "epoch": 0.31, + "grad_norm": 12.767349165090655, + "learning_rate": 1.9169983341776457e-05, + "loss": 1.5615, + "step": 26061 + }, + { + "epoch": 0.31, + "grad_norm": 4.5775912252040065, + "learning_rate": 1.916975030274937e-05, + "loss": 1.7948, + "step": 26064 + }, + { + "epoch": 0.31, + "grad_norm": 15.726426646566793, + "learning_rate": 1.9169517232429225e-05, + "loss": 1.5206, + "step": 26067 + }, + { + "epoch": 0.31, + "grad_norm": 8.327986799355374, + "learning_rate": 1.9169284130816825e-05, + "loss": 1.4688, + "step": 26070 + }, + { + "epoch": 0.31, + "grad_norm": 26.439487293089165, + "learning_rate": 1.9169050997912957e-05, + "loss": 1.6373, + "step": 26073 + }, + { + "epoch": 0.31, + "grad_norm": 11.631790853320366, + "learning_rate": 1.916881783371842e-05, + "loss": 1.6679, + "step": 26076 + }, + { + "epoch": 0.31, + "grad_norm": 22.16985463515503, + "learning_rate": 1.9168584638234014e-05, + "loss": 1.6856, + "step": 26079 + }, + { + "epoch": 0.31, + "grad_norm": 9.897927254854961, + "learning_rate": 1.9168351411460532e-05, + "loss": 1.6632, + "step": 26082 + }, + { + "epoch": 0.31, + "grad_norm": 35.56117770681819, + "learning_rate": 1.9168118153398764e-05, + "loss": 1.4096, + "step": 26085 + }, + { + "epoch": 0.31, + "grad_norm": 24.847977491796488, + "learning_rate": 1.9167884864049513e-05, + "loss": 1.5131, + "step": 26088 + }, + { + "epoch": 0.31, + "grad_norm": 14.252586023279475, + "learning_rate": 1.9167651543413576e-05, + "loss": 1.2825, + "step": 26091 + }, + { + "epoch": 0.31, + "grad_norm": 27.096287385340386, + "learning_rate": 1.9167418191491745e-05, + "loss": 1.7469, + "step": 26094 + }, + { + "epoch": 0.31, + "grad_norm": 9.953250258750446, + "learning_rate": 1.9167184808284817e-05, + "loss": 1.5445, + "step": 26097 + }, + { + "epoch": 0.31, + "grad_norm": 15.859212393324391, + "learning_rate": 1.9166951393793587e-05, + "loss": 1.8418, + "step": 26100 + }, + { + "epoch": 0.31, + "grad_norm": 53.970661739477656, + "learning_rate": 1.916671794801886e-05, + "loss": 1.4391, + "step": 26103 + }, + { + "epoch": 0.31, + "grad_norm": 33.97937999303934, + "learning_rate": 1.9166484470961418e-05, + "loss": 1.534, + "step": 26106 + }, + { + "epoch": 0.31, + "grad_norm": 13.057985485407471, + "learning_rate": 1.916625096262207e-05, + "loss": 1.405, + "step": 26109 + }, + { + "epoch": 0.31, + "grad_norm": 4.827074893101123, + "learning_rate": 1.9166017423001612e-05, + "loss": 1.5185, + "step": 26112 + }, + { + "epoch": 0.31, + "grad_norm": 5.622723690730432, + "learning_rate": 1.9165783852100836e-05, + "loss": 1.5813, + "step": 26115 + }, + { + "epoch": 0.31, + "grad_norm": 25.281569080632895, + "learning_rate": 1.9165550249920542e-05, + "loss": 1.3281, + "step": 26118 + }, + { + "epoch": 0.31, + "grad_norm": 6.663155404317006, + "learning_rate": 1.9165316616461527e-05, + "loss": 1.4487, + "step": 26121 + }, + { + "epoch": 0.31, + "grad_norm": 19.580082903140795, + "learning_rate": 1.9165082951724584e-05, + "loss": 1.2752, + "step": 26124 + }, + { + "epoch": 0.31, + "grad_norm": 6.7695738109021155, + "learning_rate": 1.916484925571052e-05, + "loss": 1.4358, + "step": 26127 + }, + { + "epoch": 0.31, + "grad_norm": 24.681805177891313, + "learning_rate": 1.9164615528420122e-05, + "loss": 1.6549, + "step": 26130 + }, + { + "epoch": 0.31, + "grad_norm": 11.12300795597803, + "learning_rate": 1.9164381769854196e-05, + "loss": 1.5087, + "step": 26133 + }, + { + "epoch": 0.31, + "grad_norm": 52.81325888850273, + "learning_rate": 1.9164147980013532e-05, + "loss": 1.4051, + "step": 26136 + }, + { + "epoch": 0.31, + "grad_norm": 34.98892966571778, + "learning_rate": 1.9163914158898932e-05, + "loss": 1.5821, + "step": 26139 + }, + { + "epoch": 0.31, + "grad_norm": 15.65214846019814, + "learning_rate": 1.9163680306511197e-05, + "loss": 1.755, + "step": 26142 + }, + { + "epoch": 0.31, + "grad_norm": 10.103223236588349, + "learning_rate": 1.916344642285112e-05, + "loss": 1.6251, + "step": 26145 + }, + { + "epoch": 0.31, + "grad_norm": 31.349763377812344, + "learning_rate": 1.91632125079195e-05, + "loss": 1.6321, + "step": 26148 + }, + { + "epoch": 0.31, + "grad_norm": 10.448365950363696, + "learning_rate": 1.916297856171714e-05, + "loss": 1.281, + "step": 26151 + }, + { + "epoch": 0.31, + "grad_norm": 14.817642463989635, + "learning_rate": 1.916274458424483e-05, + "loss": 1.6165, + "step": 26154 + }, + { + "epoch": 0.31, + "grad_norm": 20.03871425374196, + "learning_rate": 1.9162510575503375e-05, + "loss": 1.6673, + "step": 26157 + }, + { + "epoch": 0.31, + "grad_norm": 50.94715119328974, + "learning_rate": 1.9162276535493574e-05, + "loss": 1.7666, + "step": 26160 + }, + { + "epoch": 0.31, + "grad_norm": 31.826668265356872, + "learning_rate": 1.916204246421622e-05, + "loss": 1.4447, + "step": 26163 + }, + { + "epoch": 0.31, + "grad_norm": 31.3744225008612, + "learning_rate": 1.916180836167212e-05, + "loss": 1.5885, + "step": 26166 + }, + { + "epoch": 0.31, + "grad_norm": 6.492572336898912, + "learning_rate": 1.9161574227862063e-05, + "loss": 1.1503, + "step": 26169 + }, + { + "epoch": 0.31, + "grad_norm": 7.133069436404926, + "learning_rate": 1.9161340062786854e-05, + "loss": 1.3532, + "step": 26172 + }, + { + "epoch": 0.31, + "grad_norm": 16.526100265799407, + "learning_rate": 1.916110586644729e-05, + "loss": 1.3801, + "step": 26175 + }, + { + "epoch": 0.31, + "grad_norm": 35.72341124334997, + "learning_rate": 1.9160871638844177e-05, + "loss": 1.2782, + "step": 26178 + }, + { + "epoch": 0.31, + "grad_norm": 17.450091198667643, + "learning_rate": 1.9160637379978305e-05, + "loss": 1.2637, + "step": 26181 + }, + { + "epoch": 0.31, + "grad_norm": 46.30608366237864, + "learning_rate": 1.9160403089850475e-05, + "loss": 1.352, + "step": 26184 + }, + { + "epoch": 0.31, + "grad_norm": 20.448620407063423, + "learning_rate": 1.9160168768461496e-05, + "loss": 1.5456, + "step": 26187 + }, + { + "epoch": 0.31, + "grad_norm": 12.915678525262225, + "learning_rate": 1.9159934415812152e-05, + "loss": 1.8559, + "step": 26190 + }, + { + "epoch": 0.31, + "grad_norm": 20.795065826846567, + "learning_rate": 1.9159700031903258e-05, + "loss": 1.4969, + "step": 26193 + }, + { + "epoch": 0.31, + "grad_norm": 37.72611870746183, + "learning_rate": 1.9159465616735605e-05, + "loss": 1.735, + "step": 26196 + }, + { + "epoch": 0.32, + "grad_norm": 19.567743647699693, + "learning_rate": 1.9159231170309993e-05, + "loss": 1.6964, + "step": 26199 + }, + { + "epoch": 0.32, + "grad_norm": 13.882878533141971, + "learning_rate": 1.9158996692627226e-05, + "loss": 1.1418, + "step": 26202 + }, + { + "epoch": 0.32, + "grad_norm": 20.270113601907465, + "learning_rate": 1.9158762183688103e-05, + "loss": 1.3652, + "step": 26205 + }, + { + "epoch": 0.32, + "grad_norm": 79.96350600582508, + "learning_rate": 1.9158527643493424e-05, + "loss": 1.4635, + "step": 26208 + }, + { + "epoch": 0.32, + "grad_norm": 21.437924041052582, + "learning_rate": 1.9158293072043984e-05, + "loss": 1.8114, + "step": 26211 + }, + { + "epoch": 0.32, + "grad_norm": 22.742132374289202, + "learning_rate": 1.9158058469340594e-05, + "loss": 1.4043, + "step": 26214 + }, + { + "epoch": 0.32, + "grad_norm": 25.76352652649419, + "learning_rate": 1.915782383538405e-05, + "loss": 1.4203, + "step": 26217 + }, + { + "epoch": 0.32, + "grad_norm": 26.673012365052088, + "learning_rate": 1.9157589170175147e-05, + "loss": 1.4229, + "step": 26220 + }, + { + "epoch": 0.32, + "grad_norm": 15.67019108966941, + "learning_rate": 1.915735447371469e-05, + "loss": 1.3659, + "step": 26223 + }, + { + "epoch": 0.32, + "grad_norm": 14.220786787595538, + "learning_rate": 1.9157119746003484e-05, + "loss": 1.8076, + "step": 26226 + }, + { + "epoch": 0.32, + "grad_norm": 22.635397951818202, + "learning_rate": 1.915688498704233e-05, + "loss": 1.2563, + "step": 26229 + }, + { + "epoch": 0.32, + "grad_norm": 20.858054833213618, + "learning_rate": 1.915665019683202e-05, + "loss": 1.6388, + "step": 26232 + }, + { + "epoch": 0.32, + "grad_norm": 40.542933256038324, + "learning_rate": 1.9156415375373367e-05, + "loss": 1.5406, + "step": 26235 + }, + { + "epoch": 0.32, + "grad_norm": 12.44939142045929, + "learning_rate": 1.915618052266716e-05, + "loss": 1.3289, + "step": 26238 + }, + { + "epoch": 0.32, + "grad_norm": 18.88056361269591, + "learning_rate": 1.9155945638714207e-05, + "loss": 1.4596, + "step": 26241 + }, + { + "epoch": 0.32, + "grad_norm": 76.9429773883461, + "learning_rate": 1.9155710723515314e-05, + "loss": 1.7285, + "step": 26244 + }, + { + "epoch": 0.32, + "grad_norm": 23.008453628921092, + "learning_rate": 1.9155475777071276e-05, + "loss": 1.6431, + "step": 26247 + }, + { + "epoch": 0.32, + "grad_norm": 17.386734589124853, + "learning_rate": 1.9155240799382897e-05, + "loss": 1.3853, + "step": 26250 + }, + { + "epoch": 0.32, + "grad_norm": 14.195397202347108, + "learning_rate": 1.915500579045098e-05, + "loss": 1.4739, + "step": 26253 + }, + { + "epoch": 0.32, + "grad_norm": 86.52884321302987, + "learning_rate": 1.9154770750276322e-05, + "loss": 1.6012, + "step": 26256 + }, + { + "epoch": 0.32, + "grad_norm": 38.2725568962844, + "learning_rate": 1.9154535678859733e-05, + "loss": 1.7291, + "step": 26259 + }, + { + "epoch": 0.32, + "grad_norm": 19.365035602056917, + "learning_rate": 1.9154300576202013e-05, + "loss": 1.7016, + "step": 26262 + }, + { + "epoch": 0.32, + "grad_norm": 10.193644525654468, + "learning_rate": 1.915406544230396e-05, + "loss": 1.7111, + "step": 26265 + }, + { + "epoch": 0.32, + "grad_norm": 37.86383568218083, + "learning_rate": 1.9153830277166377e-05, + "loss": 1.9181, + "step": 26268 + }, + { + "epoch": 0.32, + "grad_norm": 17.770580223720394, + "learning_rate": 1.915359508079007e-05, + "loss": 1.5462, + "step": 26271 + }, + { + "epoch": 0.32, + "grad_norm": 13.37006631964178, + "learning_rate": 1.915335985317584e-05, + "loss": 1.5557, + "step": 26274 + }, + { + "epoch": 0.32, + "grad_norm": 31.550919679087258, + "learning_rate": 1.915312459432449e-05, + "loss": 1.6264, + "step": 26277 + }, + { + "epoch": 0.32, + "grad_norm": 11.994920980355353, + "learning_rate": 1.915288930423682e-05, + "loss": 1.5837, + "step": 26280 + }, + { + "epoch": 0.32, + "grad_norm": 12.138981684158857, + "learning_rate": 1.915265398291364e-05, + "loss": 1.1291, + "step": 26283 + }, + { + "epoch": 0.32, + "grad_norm": 17.376459573555383, + "learning_rate": 1.915241863035575e-05, + "loss": 1.3982, + "step": 26286 + }, + { + "epoch": 0.32, + "grad_norm": 60.76856037537279, + "learning_rate": 1.915218324656395e-05, + "loss": 1.6171, + "step": 26289 + }, + { + "epoch": 0.32, + "grad_norm": 12.613928607044782, + "learning_rate": 1.915194783153904e-05, + "loss": 1.4442, + "step": 26292 + }, + { + "epoch": 0.32, + "grad_norm": 5.122015928285858, + "learning_rate": 1.9151712385281834e-05, + "loss": 1.3407, + "step": 26295 + }, + { + "epoch": 0.32, + "grad_norm": 8.634324828330467, + "learning_rate": 1.9151476907793127e-05, + "loss": 2.0914, + "step": 26298 + }, + { + "epoch": 0.32, + "grad_norm": 7.622587006751083, + "learning_rate": 1.9151241399073728e-05, + "loss": 1.4526, + "step": 26301 + }, + { + "epoch": 0.32, + "grad_norm": 7.564854880229266, + "learning_rate": 1.9151005859124438e-05, + "loss": 1.8741, + "step": 26304 + }, + { + "epoch": 0.32, + "grad_norm": 47.54504886993092, + "learning_rate": 1.915077028794606e-05, + "loss": 1.3236, + "step": 26307 + }, + { + "epoch": 0.32, + "grad_norm": 7.258749338559905, + "learning_rate": 1.9150534685539403e-05, + "loss": 1.5455, + "step": 26310 + }, + { + "epoch": 0.32, + "grad_norm": 29.0159302075441, + "learning_rate": 1.9150299051905263e-05, + "loss": 1.8253, + "step": 26313 + }, + { + "epoch": 0.32, + "grad_norm": 16.290765431512042, + "learning_rate": 1.9150063387044448e-05, + "loss": 1.154, + "step": 26316 + }, + { + "epoch": 0.32, + "grad_norm": 35.40842728840143, + "learning_rate": 1.9149827690957763e-05, + "loss": 1.6072, + "step": 26319 + }, + { + "epoch": 0.32, + "grad_norm": 19.281986644781014, + "learning_rate": 1.9149591963646015e-05, + "loss": 1.305, + "step": 26322 + }, + { + "epoch": 0.32, + "grad_norm": 19.798433505909397, + "learning_rate": 1.9149356205110004e-05, + "loss": 1.6542, + "step": 26325 + }, + { + "epoch": 0.32, + "grad_norm": 14.476043936024324, + "learning_rate": 1.914912041535053e-05, + "loss": 1.2946, + "step": 26328 + }, + { + "epoch": 0.32, + "grad_norm": 25.45619770429983, + "learning_rate": 1.914888459436841e-05, + "loss": 1.7242, + "step": 26331 + }, + { + "epoch": 0.32, + "grad_norm": 8.409790965494931, + "learning_rate": 1.914864874216444e-05, + "loss": 1.1367, + "step": 26334 + }, + { + "epoch": 0.32, + "grad_norm": 8.35019159526797, + "learning_rate": 1.9148412858739428e-05, + "loss": 1.1406, + "step": 26337 + }, + { + "epoch": 0.32, + "grad_norm": 33.783914066212134, + "learning_rate": 1.914817694409418e-05, + "loss": 1.6665, + "step": 26340 + }, + { + "epoch": 0.32, + "grad_norm": 13.876998318471907, + "learning_rate": 1.9147940998229497e-05, + "loss": 1.7741, + "step": 26343 + }, + { + "epoch": 0.32, + "grad_norm": 9.119230521587438, + "learning_rate": 1.9147705021146188e-05, + "loss": 1.4894, + "step": 26346 + }, + { + "epoch": 0.32, + "grad_norm": 11.287476011367158, + "learning_rate": 1.9147469012845056e-05, + "loss": 1.1224, + "step": 26349 + }, + { + "epoch": 0.32, + "grad_norm": 4.577967272885147, + "learning_rate": 1.9147232973326904e-05, + "loss": 1.4826, + "step": 26352 + }, + { + "epoch": 0.32, + "grad_norm": 9.411180832777434, + "learning_rate": 1.9146996902592546e-05, + "loss": 1.3582, + "step": 26355 + }, + { + "epoch": 0.32, + "grad_norm": 13.809159083450385, + "learning_rate": 1.9146760800642777e-05, + "loss": 1.815, + "step": 26358 + }, + { + "epoch": 0.32, + "grad_norm": 24.11717145717197, + "learning_rate": 1.914652466747841e-05, + "loss": 1.5916, + "step": 26361 + }, + { + "epoch": 0.32, + "grad_norm": 13.224938070990651, + "learning_rate": 1.914628850310025e-05, + "loss": 1.3221, + "step": 26364 + }, + { + "epoch": 0.32, + "grad_norm": 31.586187092852317, + "learning_rate": 1.9146052307509103e-05, + "loss": 1.4638, + "step": 26367 + }, + { + "epoch": 0.32, + "grad_norm": 19.364869358622393, + "learning_rate": 1.9145816080705773e-05, + "loss": 1.3643, + "step": 26370 + }, + { + "epoch": 0.32, + "grad_norm": 11.990292986122288, + "learning_rate": 1.9145579822691065e-05, + "loss": 1.2997, + "step": 26373 + }, + { + "epoch": 0.32, + "grad_norm": 25.461575751160304, + "learning_rate": 1.914534353346579e-05, + "loss": 1.6027, + "step": 26376 + }, + { + "epoch": 0.32, + "grad_norm": 8.140341856265447, + "learning_rate": 1.9145107213030748e-05, + "loss": 1.4332, + "step": 26379 + }, + { + "epoch": 0.32, + "grad_norm": 10.516789855050055, + "learning_rate": 1.9144870861386752e-05, + "loss": 1.8115, + "step": 26382 + }, + { + "epoch": 0.32, + "grad_norm": 10.88044107984478, + "learning_rate": 1.9144634478534604e-05, + "loss": 1.512, + "step": 26385 + }, + { + "epoch": 0.32, + "grad_norm": 8.710766975516297, + "learning_rate": 1.9144398064475115e-05, + "loss": 1.2583, + "step": 26388 + }, + { + "epoch": 0.32, + "grad_norm": 17.612321545667125, + "learning_rate": 1.9144161619209086e-05, + "loss": 1.2607, + "step": 26391 + }, + { + "epoch": 0.32, + "grad_norm": 87.31698259853381, + "learning_rate": 1.914392514273733e-05, + "loss": 1.9539, + "step": 26394 + }, + { + "epoch": 0.32, + "grad_norm": 19.933538127749156, + "learning_rate": 1.914368863506065e-05, + "loss": 1.5902, + "step": 26397 + }, + { + "epoch": 0.32, + "grad_norm": 23.819217330012346, + "learning_rate": 1.9143452096179852e-05, + "loss": 1.5086, + "step": 26400 + }, + { + "epoch": 0.32, + "grad_norm": 58.057571196518865, + "learning_rate": 1.914321552609575e-05, + "loss": 1.3111, + "step": 26403 + }, + { + "epoch": 0.32, + "grad_norm": 58.93625233369917, + "learning_rate": 1.9142978924809145e-05, + "loss": 1.6305, + "step": 26406 + }, + { + "epoch": 0.32, + "grad_norm": 6.317441892032688, + "learning_rate": 1.9142742292320845e-05, + "loss": 1.3013, + "step": 26409 + }, + { + "epoch": 0.32, + "grad_norm": 6.623645154426894, + "learning_rate": 1.914250562863166e-05, + "loss": 1.4658, + "step": 26412 + }, + { + "epoch": 0.32, + "grad_norm": 10.873054758180839, + "learning_rate": 1.9142268933742393e-05, + "loss": 1.3359, + "step": 26415 + }, + { + "epoch": 0.32, + "grad_norm": 12.087883274054132, + "learning_rate": 1.914203220765386e-05, + "loss": 1.8062, + "step": 26418 + }, + { + "epoch": 0.32, + "grad_norm": 28.123312444971194, + "learning_rate": 1.914179545036686e-05, + "loss": 1.636, + "step": 26421 + }, + { + "epoch": 0.32, + "grad_norm": 8.839651420912038, + "learning_rate": 1.914155866188221e-05, + "loss": 1.3154, + "step": 26424 + }, + { + "epoch": 0.32, + "grad_norm": 7.484834207293178, + "learning_rate": 1.914132184220071e-05, + "loss": 1.4075, + "step": 26427 + }, + { + "epoch": 0.32, + "grad_norm": 3.85784976757662, + "learning_rate": 1.914108499132317e-05, + "loss": 1.4566, + "step": 26430 + }, + { + "epoch": 0.32, + "grad_norm": 13.02456534145237, + "learning_rate": 1.91408481092504e-05, + "loss": 1.5662, + "step": 26433 + }, + { + "epoch": 0.32, + "grad_norm": 12.562452305479688, + "learning_rate": 1.9140611195983208e-05, + "loss": 1.5108, + "step": 26436 + }, + { + "epoch": 0.32, + "grad_norm": 8.987026168831106, + "learning_rate": 1.91403742515224e-05, + "loss": 1.61, + "step": 26439 + }, + { + "epoch": 0.32, + "grad_norm": 42.29527570877175, + "learning_rate": 1.9140137275868793e-05, + "loss": 1.4563, + "step": 26442 + }, + { + "epoch": 0.32, + "grad_norm": 6.510711753158099, + "learning_rate": 1.913990026902319e-05, + "loss": 1.3745, + "step": 26445 + }, + { + "epoch": 0.32, + "grad_norm": 32.51678939583118, + "learning_rate": 1.9139663230986394e-05, + "loss": 1.3143, + "step": 26448 + }, + { + "epoch": 0.32, + "grad_norm": 21.3177811366641, + "learning_rate": 1.913942616175922e-05, + "loss": 1.4034, + "step": 26451 + }, + { + "epoch": 0.32, + "grad_norm": 2.062240616225878, + "learning_rate": 1.913918906134248e-05, + "loss": 1.8784, + "step": 26454 + }, + { + "epoch": 0.32, + "grad_norm": 11.5604679593927, + "learning_rate": 1.9138951929736977e-05, + "loss": 1.4114, + "step": 26457 + }, + { + "epoch": 0.32, + "grad_norm": 5.8672522437210155, + "learning_rate": 1.9138714766943524e-05, + "loss": 1.3177, + "step": 26460 + }, + { + "epoch": 0.32, + "grad_norm": 28.04441886852422, + "learning_rate": 1.9138477572962932e-05, + "loss": 1.9016, + "step": 26463 + }, + { + "epoch": 0.32, + "grad_norm": 12.87629087004799, + "learning_rate": 1.9138240347796003e-05, + "loss": 1.5835, + "step": 26466 + }, + { + "epoch": 0.32, + "grad_norm": 6.555148558533495, + "learning_rate": 1.9138003091443553e-05, + "loss": 1.5163, + "step": 26469 + }, + { + "epoch": 0.32, + "grad_norm": 8.307532788133088, + "learning_rate": 1.913776580390639e-05, + "loss": 1.5155, + "step": 26472 + }, + { + "epoch": 0.32, + "grad_norm": 3.858579302743954, + "learning_rate": 1.9137528485185325e-05, + "loss": 1.6273, + "step": 26475 + }, + { + "epoch": 0.32, + "grad_norm": 7.01859889205116, + "learning_rate": 1.9137291135281163e-05, + "loss": 1.4557, + "step": 26478 + }, + { + "epoch": 0.32, + "grad_norm": 17.266672388404096, + "learning_rate": 1.9137053754194723e-05, + "loss": 1.4933, + "step": 26481 + }, + { + "epoch": 0.32, + "grad_norm": 28.643821466993547, + "learning_rate": 1.9136816341926804e-05, + "loss": 1.7065, + "step": 26484 + }, + { + "epoch": 0.32, + "grad_norm": 6.7934253568041925, + "learning_rate": 1.9136578898478224e-05, + "loss": 1.7329, + "step": 26487 + }, + { + "epoch": 0.32, + "grad_norm": 24.394721188283857, + "learning_rate": 1.9136341423849795e-05, + "loss": 1.4062, + "step": 26490 + }, + { + "epoch": 0.32, + "grad_norm": 13.466994930104041, + "learning_rate": 1.913610391804232e-05, + "loss": 1.6472, + "step": 26493 + }, + { + "epoch": 0.32, + "grad_norm": 9.68786873309132, + "learning_rate": 1.9135866381056614e-05, + "loss": 1.1674, + "step": 26496 + }, + { + "epoch": 0.32, + "grad_norm": 96.45759513222751, + "learning_rate": 1.9135628812893484e-05, + "loss": 1.4985, + "step": 26499 + }, + { + "epoch": 0.32, + "grad_norm": 18.198117034386815, + "learning_rate": 1.9135391213553745e-05, + "loss": 1.6916, + "step": 26502 + }, + { + "epoch": 0.32, + "grad_norm": 11.139447285819028, + "learning_rate": 1.913515358303821e-05, + "loss": 1.4417, + "step": 26505 + }, + { + "epoch": 0.32, + "grad_norm": 21.692474571400712, + "learning_rate": 1.913491592134768e-05, + "loss": 1.7197, + "step": 26508 + }, + { + "epoch": 0.32, + "grad_norm": 8.648368609748017, + "learning_rate": 1.913467822848298e-05, + "loss": 1.6162, + "step": 26511 + }, + { + "epoch": 0.32, + "grad_norm": 11.877883131102665, + "learning_rate": 1.9134440504444905e-05, + "loss": 1.0781, + "step": 26514 + }, + { + "epoch": 0.32, + "grad_norm": 71.7790680004788, + "learning_rate": 1.9134202749234282e-05, + "loss": 1.5905, + "step": 26517 + }, + { + "epoch": 0.32, + "grad_norm": 16.40952107536358, + "learning_rate": 1.913396496285191e-05, + "loss": 1.8642, + "step": 26520 + }, + { + "epoch": 0.32, + "grad_norm": 83.57639792433056, + "learning_rate": 1.9133727145298612e-05, + "loss": 1.7033, + "step": 26523 + }, + { + "epoch": 0.32, + "grad_norm": 17.613826679052178, + "learning_rate": 1.913348929657519e-05, + "loss": 1.4595, + "step": 26526 + }, + { + "epoch": 0.32, + "grad_norm": 24.556034578052845, + "learning_rate": 1.9133251416682454e-05, + "loss": 1.3693, + "step": 26529 + }, + { + "epoch": 0.32, + "grad_norm": 10.799781840551354, + "learning_rate": 1.9133013505621224e-05, + "loss": 1.6696, + "step": 26532 + }, + { + "epoch": 0.32, + "grad_norm": 12.395252859336926, + "learning_rate": 1.913277556339231e-05, + "loss": 1.489, + "step": 26535 + }, + { + "epoch": 0.32, + "grad_norm": 26.07926416044589, + "learning_rate": 1.913253758999652e-05, + "loss": 1.66, + "step": 26538 + }, + { + "epoch": 0.32, + "grad_norm": 5.197655869346207, + "learning_rate": 1.9132299585434672e-05, + "loss": 1.6768, + "step": 26541 + }, + { + "epoch": 0.32, + "grad_norm": 8.005705156188922, + "learning_rate": 1.9132061549707574e-05, + "loss": 1.563, + "step": 26544 + }, + { + "epoch": 0.32, + "grad_norm": 23.331466105264532, + "learning_rate": 1.913182348281604e-05, + "loss": 1.6172, + "step": 26547 + }, + { + "epoch": 0.32, + "grad_norm": 12.261094777760224, + "learning_rate": 1.913158538476088e-05, + "loss": 1.1468, + "step": 26550 + }, + { + "epoch": 0.32, + "grad_norm": 37.20251692440243, + "learning_rate": 1.9131347255542908e-05, + "loss": 1.1013, + "step": 26553 + }, + { + "epoch": 0.32, + "grad_norm": 16.514956878297195, + "learning_rate": 1.9131109095162937e-05, + "loss": 1.541, + "step": 26556 + }, + { + "epoch": 0.32, + "grad_norm": 15.25891597438241, + "learning_rate": 1.913087090362178e-05, + "loss": 1.6589, + "step": 26559 + }, + { + "epoch": 0.32, + "grad_norm": 10.070718564622409, + "learning_rate": 1.9130632680920252e-05, + "loss": 1.6357, + "step": 26562 + }, + { + "epoch": 0.32, + "grad_norm": 17.892188358681224, + "learning_rate": 1.913039442705916e-05, + "loss": 1.385, + "step": 26565 + }, + { + "epoch": 0.32, + "grad_norm": 105.10163947036926, + "learning_rate": 1.9130156142039322e-05, + "loss": 1.2876, + "step": 26568 + }, + { + "epoch": 0.32, + "grad_norm": 18.56831901964304, + "learning_rate": 1.912991782586155e-05, + "loss": 1.4512, + "step": 26571 + }, + { + "epoch": 0.32, + "grad_norm": 13.391281047566865, + "learning_rate": 1.9129679478526656e-05, + "loss": 1.2985, + "step": 26574 + }, + { + "epoch": 0.32, + "grad_norm": 65.90884766430806, + "learning_rate": 1.9129441100035454e-05, + "loss": 1.4248, + "step": 26577 + }, + { + "epoch": 0.32, + "grad_norm": 10.701718794265672, + "learning_rate": 1.912920269038876e-05, + "loss": 1.6442, + "step": 26580 + }, + { + "epoch": 0.32, + "grad_norm": 8.721724109109836, + "learning_rate": 1.9128964249587386e-05, + "loss": 1.3162, + "step": 26583 + }, + { + "epoch": 0.32, + "grad_norm": 9.130207533870538, + "learning_rate": 1.9128725777632143e-05, + "loss": 1.5347, + "step": 26586 + }, + { + "epoch": 0.32, + "grad_norm": 8.149080587259137, + "learning_rate": 1.9128487274523852e-05, + "loss": 1.5446, + "step": 26589 + }, + { + "epoch": 0.32, + "grad_norm": 7.307577819724135, + "learning_rate": 1.9128248740263315e-05, + "loss": 1.5055, + "step": 26592 + }, + { + "epoch": 0.32, + "grad_norm": 9.219048226738533, + "learning_rate": 1.9128010174851356e-05, + "loss": 1.4195, + "step": 26595 + }, + { + "epoch": 0.32, + "grad_norm": 22.882621537383482, + "learning_rate": 1.9127771578288787e-05, + "loss": 1.6432, + "step": 26598 + }, + { + "epoch": 0.32, + "grad_norm": 4.336708272279634, + "learning_rate": 1.912753295057642e-05, + "loss": 1.2548, + "step": 26601 + }, + { + "epoch": 0.32, + "grad_norm": 434.87265432972583, + "learning_rate": 1.912729429171507e-05, + "loss": 1.2796, + "step": 26604 + }, + { + "epoch": 0.32, + "grad_norm": 18.671996618498667, + "learning_rate": 1.9127055601705553e-05, + "loss": 1.6583, + "step": 26607 + }, + { + "epoch": 0.32, + "grad_norm": 14.0583292087653, + "learning_rate": 1.9126816880548684e-05, + "loss": 1.6991, + "step": 26610 + }, + { + "epoch": 0.32, + "grad_norm": 21.751097063585053, + "learning_rate": 1.9126578128245278e-05, + "loss": 1.2531, + "step": 26613 + }, + { + "epoch": 0.32, + "grad_norm": 41.82810865942276, + "learning_rate": 1.9126339344796145e-05, + "loss": 1.6955, + "step": 26616 + }, + { + "epoch": 0.32, + "grad_norm": 142.0576698927158, + "learning_rate": 1.9126100530202106e-05, + "loss": 1.4832, + "step": 26619 + }, + { + "epoch": 0.32, + "grad_norm": 14.970075692950953, + "learning_rate": 1.912586168446397e-05, + "loss": 1.6337, + "step": 26622 + }, + { + "epoch": 0.32, + "grad_norm": 14.956337294751336, + "learning_rate": 1.9125622807582556e-05, + "loss": 1.557, + "step": 26625 + }, + { + "epoch": 0.32, + "grad_norm": 47.878011221233315, + "learning_rate": 1.912538389955868e-05, + "loss": 1.3331, + "step": 26628 + }, + { + "epoch": 0.32, + "grad_norm": 258.82002576489253, + "learning_rate": 1.9125144960393153e-05, + "loss": 1.4467, + "step": 26631 + }, + { + "epoch": 0.32, + "grad_norm": 55.37540098154572, + "learning_rate": 1.9124905990086794e-05, + "loss": 1.8504, + "step": 26634 + }, + { + "epoch": 0.32, + "grad_norm": 16.37846177208419, + "learning_rate": 1.912466698864042e-05, + "loss": 1.3237, + "step": 26637 + }, + { + "epoch": 0.32, + "grad_norm": 36.56857607434551, + "learning_rate": 1.9124427956054842e-05, + "loss": 1.6393, + "step": 26640 + }, + { + "epoch": 0.32, + "grad_norm": 15.558100985307348, + "learning_rate": 1.912418889233088e-05, + "loss": 1.2721, + "step": 26643 + }, + { + "epoch": 0.32, + "grad_norm": 13.722504747886406, + "learning_rate": 1.9123949797469342e-05, + "loss": 1.3406, + "step": 26646 + }, + { + "epoch": 0.32, + "grad_norm": 200.04591948245877, + "learning_rate": 1.9123710671471056e-05, + "loss": 1.4423, + "step": 26649 + }, + { + "epoch": 0.32, + "grad_norm": 4.9163407931793905, + "learning_rate": 1.9123471514336832e-05, + "loss": 1.6122, + "step": 26652 + }, + { + "epoch": 0.32, + "grad_norm": 86.10034905719911, + "learning_rate": 1.9123232326067482e-05, + "loss": 1.3797, + "step": 26655 + }, + { + "epoch": 0.32, + "grad_norm": 18.644436163653467, + "learning_rate": 1.912299310666383e-05, + "loss": 1.3857, + "step": 26658 + }, + { + "epoch": 0.32, + "grad_norm": 6.683293801728492, + "learning_rate": 1.9122753856126687e-05, + "loss": 1.3488, + "step": 26661 + }, + { + "epoch": 0.32, + "grad_norm": 14.736002745606912, + "learning_rate": 1.912251457445687e-05, + "loss": 1.5129, + "step": 26664 + }, + { + "epoch": 0.32, + "grad_norm": 10.174938045106748, + "learning_rate": 1.9122275261655196e-05, + "loss": 1.3952, + "step": 26667 + }, + { + "epoch": 0.32, + "grad_norm": 6.154937355860831, + "learning_rate": 1.9122035917722485e-05, + "loss": 1.3452, + "step": 26670 + }, + { + "epoch": 0.32, + "grad_norm": 16.167252491712112, + "learning_rate": 1.9121796542659548e-05, + "loss": 1.3019, + "step": 26673 + }, + { + "epoch": 0.32, + "grad_norm": 89.84475750451163, + "learning_rate": 1.912155713646721e-05, + "loss": 1.8544, + "step": 26676 + }, + { + "epoch": 0.32, + "grad_norm": 18.247564571082528, + "learning_rate": 1.912131769914628e-05, + "loss": 1.5846, + "step": 26679 + }, + { + "epoch": 0.32, + "grad_norm": 13.934862249925716, + "learning_rate": 1.9121078230697577e-05, + "loss": 1.4043, + "step": 26682 + }, + { + "epoch": 0.32, + "grad_norm": 13.272186655013309, + "learning_rate": 1.912083873112192e-05, + "loss": 1.4766, + "step": 26685 + }, + { + "epoch": 0.32, + "grad_norm": 2.6479729098803, + "learning_rate": 1.9120599200420127e-05, + "loss": 1.2817, + "step": 26688 + }, + { + "epoch": 0.32, + "grad_norm": 11.370777438299987, + "learning_rate": 1.9120359638593015e-05, + "loss": 1.9096, + "step": 26691 + }, + { + "epoch": 0.32, + "grad_norm": 12.243837163658691, + "learning_rate": 1.9120120045641396e-05, + "loss": 1.4152, + "step": 26694 + }, + { + "epoch": 0.32, + "grad_norm": 10.066872984134204, + "learning_rate": 1.91198804215661e-05, + "loss": 1.3495, + "step": 26697 + }, + { + "epoch": 0.32, + "grad_norm": 6.056046217973278, + "learning_rate": 1.911964076636793e-05, + "loss": 1.1526, + "step": 26700 + }, + { + "epoch": 0.32, + "grad_norm": 4.4822284037868005, + "learning_rate": 1.9119401080047715e-05, + "loss": 1.2668, + "step": 26703 + }, + { + "epoch": 0.32, + "grad_norm": 11.334335073335007, + "learning_rate": 1.9119161362606268e-05, + "loss": 1.5448, + "step": 26706 + }, + { + "epoch": 0.32, + "grad_norm": 9.97985138046061, + "learning_rate": 1.9118921614044407e-05, + "loss": 1.2608, + "step": 26709 + }, + { + "epoch": 0.32, + "grad_norm": 20.879891871070665, + "learning_rate": 1.9118681834362952e-05, + "loss": 1.4511, + "step": 26712 + }, + { + "epoch": 0.32, + "grad_norm": 8.15461649498124, + "learning_rate": 1.911844202356272e-05, + "loss": 1.5928, + "step": 26715 + }, + { + "epoch": 0.32, + "grad_norm": 26.188530456847246, + "learning_rate": 1.911820218164453e-05, + "loss": 1.5232, + "step": 26718 + }, + { + "epoch": 0.32, + "grad_norm": 16.835784220617125, + "learning_rate": 1.9117962308609202e-05, + "loss": 1.6044, + "step": 26721 + }, + { + "epoch": 0.32, + "grad_norm": 8.921385554574805, + "learning_rate": 1.911772240445755e-05, + "loss": 1.4127, + "step": 26724 + }, + { + "epoch": 0.32, + "grad_norm": 19.170666112540353, + "learning_rate": 1.9117482469190396e-05, + "loss": 1.6676, + "step": 26727 + }, + { + "epoch": 0.32, + "grad_norm": 7.174624922770084, + "learning_rate": 1.9117242502808558e-05, + "loss": 1.4072, + "step": 26730 + }, + { + "epoch": 0.32, + "grad_norm": 11.529848355811612, + "learning_rate": 1.911700250531286e-05, + "loss": 1.3791, + "step": 26733 + }, + { + "epoch": 0.32, + "grad_norm": 13.287428810164526, + "learning_rate": 1.911676247670411e-05, + "loss": 1.9642, + "step": 26736 + }, + { + "epoch": 0.32, + "grad_norm": 43.50004466219688, + "learning_rate": 1.9116522416983135e-05, + "loss": 1.2157, + "step": 26739 + }, + { + "epoch": 0.32, + "grad_norm": 68.23993661084027, + "learning_rate": 1.9116282326150757e-05, + "loss": 1.367, + "step": 26742 + }, + { + "epoch": 0.32, + "grad_norm": 13.22826278847426, + "learning_rate": 1.9116042204207788e-05, + "loss": 1.8122, + "step": 26745 + }, + { + "epoch": 0.32, + "grad_norm": 25.08660620676794, + "learning_rate": 1.911580205115505e-05, + "loss": 1.6401, + "step": 26748 + }, + { + "epoch": 0.32, + "grad_norm": 5.588714443629734, + "learning_rate": 1.9115561866993367e-05, + "loss": 1.4178, + "step": 26751 + }, + { + "epoch": 0.32, + "grad_norm": 5.886491137056447, + "learning_rate": 1.911532165172355e-05, + "loss": 1.5277, + "step": 26754 + }, + { + "epoch": 0.32, + "grad_norm": 2.859382230299788, + "learning_rate": 1.9115081405346425e-05, + "loss": 1.426, + "step": 26757 + }, + { + "epoch": 0.32, + "grad_norm": 8.638801487988601, + "learning_rate": 1.9114841127862812e-05, + "loss": 1.2036, + "step": 26760 + }, + { + "epoch": 0.32, + "grad_norm": 18.554899583898933, + "learning_rate": 1.9114600819273524e-05, + "loss": 1.3212, + "step": 26763 + }, + { + "epoch": 0.32, + "grad_norm": 25.50573321211935, + "learning_rate": 1.9114360479579394e-05, + "loss": 1.3562, + "step": 26766 + }, + { + "epoch": 0.32, + "grad_norm": 48.87561445781781, + "learning_rate": 1.9114120108781228e-05, + "loss": 1.6147, + "step": 26769 + }, + { + "epoch": 0.32, + "grad_norm": 7.4875185254011924, + "learning_rate": 1.911387970687986e-05, + "loss": 1.3959, + "step": 26772 + }, + { + "epoch": 0.32, + "grad_norm": 38.616481516523535, + "learning_rate": 1.9113639273876095e-05, + "loss": 1.7539, + "step": 26775 + }, + { + "epoch": 0.32, + "grad_norm": 5.060848994964932, + "learning_rate": 1.9113398809770772e-05, + "loss": 1.8134, + "step": 26778 + }, + { + "epoch": 0.32, + "grad_norm": 12.650390513389581, + "learning_rate": 1.9113158314564693e-05, + "loss": 1.7523, + "step": 26781 + }, + { + "epoch": 0.32, + "grad_norm": 14.605966052820895, + "learning_rate": 1.911291778825869e-05, + "loss": 1.5831, + "step": 26784 + }, + { + "epoch": 0.32, + "grad_norm": 15.507789972672144, + "learning_rate": 1.911267723085358e-05, + "loss": 1.3901, + "step": 26787 + }, + { + "epoch": 0.32, + "grad_norm": 13.171590963502153, + "learning_rate": 1.911243664235019e-05, + "loss": 1.5083, + "step": 26790 + }, + { + "epoch": 0.32, + "grad_norm": 11.219284200759054, + "learning_rate": 1.911219602274933e-05, + "loss": 1.6169, + "step": 26793 + }, + { + "epoch": 0.32, + "grad_norm": 17.269273940953916, + "learning_rate": 1.911195537205183e-05, + "loss": 1.6577, + "step": 26796 + }, + { + "epoch": 0.32, + "grad_norm": 17.71731539632434, + "learning_rate": 1.9111714690258506e-05, + "loss": 1.4052, + "step": 26799 + }, + { + "epoch": 0.32, + "grad_norm": 18.689131660423048, + "learning_rate": 1.9111473977370184e-05, + "loss": 1.4153, + "step": 26802 + }, + { + "epoch": 0.32, + "grad_norm": 24.599863672180643, + "learning_rate": 1.9111233233387682e-05, + "loss": 1.4201, + "step": 26805 + }, + { + "epoch": 0.32, + "grad_norm": 28.624872307185495, + "learning_rate": 1.9110992458311827e-05, + "loss": 1.2647, + "step": 26808 + }, + { + "epoch": 0.32, + "grad_norm": 21.05796715574755, + "learning_rate": 1.9110751652143432e-05, + "loss": 1.3991, + "step": 26811 + }, + { + "epoch": 0.32, + "grad_norm": 11.87737941691273, + "learning_rate": 1.9110510814883324e-05, + "loss": 1.42, + "step": 26814 + }, + { + "epoch": 0.32, + "grad_norm": 21.91631943474789, + "learning_rate": 1.9110269946532326e-05, + "loss": 1.3448, + "step": 26817 + }, + { + "epoch": 0.32, + "grad_norm": 83.73096736812825, + "learning_rate": 1.9110029047091255e-05, + "loss": 1.7368, + "step": 26820 + }, + { + "epoch": 0.32, + "grad_norm": 35.24436161015916, + "learning_rate": 1.9109788116560937e-05, + "loss": 1.5607, + "step": 26823 + }, + { + "epoch": 0.32, + "grad_norm": 10.825766585890522, + "learning_rate": 1.9109547154942193e-05, + "loss": 1.3457, + "step": 26826 + }, + { + "epoch": 0.32, + "grad_norm": 5.820024808602504, + "learning_rate": 1.910930616223585e-05, + "loss": 1.2219, + "step": 26829 + }, + { + "epoch": 0.32, + "grad_norm": 27.81166081066688, + "learning_rate": 1.9109065138442725e-05, + "loss": 1.6705, + "step": 26832 + }, + { + "epoch": 0.32, + "grad_norm": 51.061551363969464, + "learning_rate": 1.910882408356364e-05, + "loss": 1.2769, + "step": 26835 + }, + { + "epoch": 0.32, + "grad_norm": 17.999148128160563, + "learning_rate": 1.910858299759942e-05, + "loss": 1.7341, + "step": 26838 + }, + { + "epoch": 0.32, + "grad_norm": 55.66824069866426, + "learning_rate": 1.9108341880550885e-05, + "loss": 1.5014, + "step": 26841 + }, + { + "epoch": 0.32, + "grad_norm": 9.14305244275181, + "learning_rate": 1.910810073241886e-05, + "loss": 1.3905, + "step": 26844 + }, + { + "epoch": 0.32, + "grad_norm": 14.751987434164995, + "learning_rate": 1.9107859553204172e-05, + "loss": 1.6298, + "step": 26847 + }, + { + "epoch": 0.32, + "grad_norm": 125.85304986300329, + "learning_rate": 1.9107618342907637e-05, + "loss": 1.4437, + "step": 26850 + }, + { + "epoch": 0.32, + "grad_norm": 25.3292447921981, + "learning_rate": 1.9107377101530082e-05, + "loss": 1.8187, + "step": 26853 + }, + { + "epoch": 0.32, + "grad_norm": 10.997842665923221, + "learning_rate": 1.910713582907233e-05, + "loss": 1.1604, + "step": 26856 + }, + { + "epoch": 0.32, + "grad_norm": 16.72698931783746, + "learning_rate": 1.91068945255352e-05, + "loss": 1.8614, + "step": 26859 + }, + { + "epoch": 0.32, + "grad_norm": 31.02861591245838, + "learning_rate": 1.9106653190919525e-05, + "loss": 1.6446, + "step": 26862 + }, + { + "epoch": 0.32, + "grad_norm": 15.52222365337683, + "learning_rate": 1.910641182522612e-05, + "loss": 1.5109, + "step": 26865 + }, + { + "epoch": 0.32, + "grad_norm": 7.3604450600849045, + "learning_rate": 1.9106170428455812e-05, + "loss": 1.4653, + "step": 26868 + }, + { + "epoch": 0.32, + "grad_norm": 9.4557932031978, + "learning_rate": 1.9105929000609425e-05, + "loss": 1.7739, + "step": 26871 + }, + { + "epoch": 0.32, + "grad_norm": 12.130597727452026, + "learning_rate": 1.9105687541687782e-05, + "loss": 1.8082, + "step": 26874 + }, + { + "epoch": 0.32, + "grad_norm": 14.488763161966178, + "learning_rate": 1.9105446051691704e-05, + "loss": 1.6219, + "step": 26877 + }, + { + "epoch": 0.32, + "grad_norm": 25.744803912086194, + "learning_rate": 1.910520453062202e-05, + "loss": 1.3173, + "step": 26880 + }, + { + "epoch": 0.32, + "grad_norm": 12.430697642699903, + "learning_rate": 1.9104962978479556e-05, + "loss": 1.5249, + "step": 26883 + }, + { + "epoch": 0.32, + "grad_norm": 15.606335603629278, + "learning_rate": 1.910472139526513e-05, + "loss": 1.7743, + "step": 26886 + }, + { + "epoch": 0.32, + "grad_norm": 55.340359666857104, + "learning_rate": 1.910447978097957e-05, + "loss": 1.3969, + "step": 26889 + }, + { + "epoch": 0.32, + "grad_norm": 22.400271872352953, + "learning_rate": 1.91042381356237e-05, + "loss": 1.3044, + "step": 26892 + }, + { + "epoch": 0.32, + "grad_norm": 15.839775844702245, + "learning_rate": 1.9103996459198344e-05, + "loss": 1.1915, + "step": 26895 + }, + { + "epoch": 0.32, + "grad_norm": 25.836905029907328, + "learning_rate": 1.9103754751704328e-05, + "loss": 1.053, + "step": 26898 + }, + { + "epoch": 0.32, + "grad_norm": 14.083378692029578, + "learning_rate": 1.9103513013142475e-05, + "loss": 1.46, + "step": 26901 + }, + { + "epoch": 0.32, + "grad_norm": 8.176321914426405, + "learning_rate": 1.9103271243513615e-05, + "loss": 1.4799, + "step": 26904 + }, + { + "epoch": 0.32, + "grad_norm": 45.84914711845889, + "learning_rate": 1.9103029442818565e-05, + "loss": 1.2601, + "step": 26907 + }, + { + "epoch": 0.32, + "grad_norm": 4.759185813310085, + "learning_rate": 1.9102787611058154e-05, + "loss": 1.6638, + "step": 26910 + }, + { + "epoch": 0.32, + "grad_norm": 3.714736769742574, + "learning_rate": 1.9102545748233214e-05, + "loss": 1.9045, + "step": 26913 + }, + { + "epoch": 0.32, + "grad_norm": 11.861305474302336, + "learning_rate": 1.9102303854344556e-05, + "loss": 1.3407, + "step": 26916 + }, + { + "epoch": 0.32, + "grad_norm": 17.331787858431426, + "learning_rate": 1.9102061929393018e-05, + "loss": 1.2708, + "step": 26919 + }, + { + "epoch": 0.32, + "grad_norm": 12.039697418206769, + "learning_rate": 1.910181997337942e-05, + "loss": 1.8278, + "step": 26922 + }, + { + "epoch": 0.32, + "grad_norm": 9.619463625104606, + "learning_rate": 1.910157798630459e-05, + "loss": 1.7126, + "step": 26925 + }, + { + "epoch": 0.32, + "grad_norm": 24.108435512006103, + "learning_rate": 1.9101335968169354e-05, + "loss": 1.257, + "step": 26928 + }, + { + "epoch": 0.32, + "grad_norm": 24.772243419636904, + "learning_rate": 1.9101093918974532e-05, + "loss": 1.6636, + "step": 26931 + }, + { + "epoch": 0.32, + "grad_norm": 13.219404594688317, + "learning_rate": 1.9100851838720957e-05, + "loss": 1.3323, + "step": 26934 + }, + { + "epoch": 0.32, + "grad_norm": 10.302265189355262, + "learning_rate": 1.9100609727409455e-05, + "loss": 1.7664, + "step": 26937 + }, + { + "epoch": 0.32, + "grad_norm": 80.6462086422546, + "learning_rate": 1.9100367585040846e-05, + "loss": 1.523, + "step": 26940 + }, + { + "epoch": 0.32, + "grad_norm": 21.360298281500487, + "learning_rate": 1.910012541161596e-05, + "loss": 1.4423, + "step": 26943 + }, + { + "epoch": 0.32, + "grad_norm": 23.402186947255455, + "learning_rate": 1.909988320713563e-05, + "loss": 1.2575, + "step": 26946 + }, + { + "epoch": 0.32, + "grad_norm": 6.3410418665278945, + "learning_rate": 1.9099640971600674e-05, + "loss": 1.3555, + "step": 26949 + }, + { + "epoch": 0.32, + "grad_norm": 5.997600915843085, + "learning_rate": 1.909939870501192e-05, + "loss": 1.5013, + "step": 26952 + }, + { + "epoch": 0.32, + "grad_norm": 45.11179507928314, + "learning_rate": 1.909915640737019e-05, + "loss": 1.5047, + "step": 26955 + }, + { + "epoch": 0.32, + "grad_norm": 11.668869099745182, + "learning_rate": 1.9098914078676323e-05, + "loss": 1.5313, + "step": 26958 + }, + { + "epoch": 0.32, + "grad_norm": 10.50697260743225, + "learning_rate": 1.9098671718931138e-05, + "loss": 1.5205, + "step": 26961 + }, + { + "epoch": 0.32, + "grad_norm": 12.307695442258396, + "learning_rate": 1.9098429328135467e-05, + "loss": 1.3352, + "step": 26964 + }, + { + "epoch": 0.32, + "grad_norm": 12.608738414959245, + "learning_rate": 1.9098186906290133e-05, + "loss": 1.5866, + "step": 26967 + }, + { + "epoch": 0.32, + "grad_norm": 24.11794949189368, + "learning_rate": 1.9097944453395963e-05, + "loss": 1.4032, + "step": 26970 + }, + { + "epoch": 0.32, + "grad_norm": 27.1573601010001, + "learning_rate": 1.9097701969453783e-05, + "loss": 1.6223, + "step": 26973 + }, + { + "epoch": 0.32, + "grad_norm": 4.210904543730511, + "learning_rate": 1.9097459454464428e-05, + "loss": 1.3992, + "step": 26976 + }, + { + "epoch": 0.32, + "grad_norm": 14.985282146964167, + "learning_rate": 1.909721690842872e-05, + "loss": 1.4034, + "step": 26979 + }, + { + "epoch": 0.32, + "grad_norm": 4.439759154343836, + "learning_rate": 1.909697433134748e-05, + "loss": 1.5621, + "step": 26982 + }, + { + "epoch": 0.32, + "grad_norm": 14.525931968068136, + "learning_rate": 1.909673172322155e-05, + "loss": 1.4181, + "step": 26985 + }, + { + "epoch": 0.32, + "grad_norm": 4.420611391279856, + "learning_rate": 1.909648908405175e-05, + "loss": 1.8889, + "step": 26988 + }, + { + "epoch": 0.32, + "grad_norm": 10.153631172896837, + "learning_rate": 1.909624641383891e-05, + "loss": 1.2312, + "step": 26991 + }, + { + "epoch": 0.32, + "grad_norm": 14.937295392677262, + "learning_rate": 1.9096003712583858e-05, + "loss": 1.3414, + "step": 26994 + }, + { + "epoch": 0.32, + "grad_norm": 10.65863598613946, + "learning_rate": 1.909576098028742e-05, + "loss": 1.2694, + "step": 26997 + }, + { + "epoch": 0.32, + "grad_norm": 7.524568249988389, + "learning_rate": 1.909551821695043e-05, + "loss": 1.503, + "step": 27000 + }, + { + "epoch": 0.32, + "grad_norm": 42.41646287890604, + "learning_rate": 1.9095275422573707e-05, + "loss": 1.4933, + "step": 27003 + }, + { + "epoch": 0.32, + "grad_norm": 3.9737926434722257, + "learning_rate": 1.909503259715809e-05, + "loss": 1.3059, + "step": 27006 + }, + { + "epoch": 0.32, + "grad_norm": 9.810693525246277, + "learning_rate": 1.9094789740704395e-05, + "loss": 1.5314, + "step": 27009 + }, + { + "epoch": 0.32, + "grad_norm": 10.661451644980362, + "learning_rate": 1.9094546853213466e-05, + "loss": 1.5634, + "step": 27012 + }, + { + "epoch": 0.32, + "grad_norm": 4.921852549810865, + "learning_rate": 1.909430393468612e-05, + "loss": 1.4513, + "step": 27015 + }, + { + "epoch": 0.32, + "grad_norm": 10.898602145331695, + "learning_rate": 1.9094060985123195e-05, + "loss": 1.5354, + "step": 27018 + }, + { + "epoch": 0.32, + "grad_norm": 8.270168694566893, + "learning_rate": 1.9093818004525513e-05, + "loss": 1.4655, + "step": 27021 + }, + { + "epoch": 0.32, + "grad_norm": 27.46702944473244, + "learning_rate": 1.9093574992893906e-05, + "loss": 1.7326, + "step": 27024 + }, + { + "epoch": 0.32, + "grad_norm": 13.559628952727467, + "learning_rate": 1.9093331950229204e-05, + "loss": 1.8587, + "step": 27027 + }, + { + "epoch": 0.33, + "grad_norm": 10.234522306234915, + "learning_rate": 1.9093088876532234e-05, + "loss": 1.3707, + "step": 27030 + }, + { + "epoch": 0.33, + "grad_norm": 46.10349045395451, + "learning_rate": 1.909284577180383e-05, + "loss": 1.5483, + "step": 27033 + }, + { + "epoch": 0.33, + "grad_norm": 7.732451556363335, + "learning_rate": 1.9092602636044812e-05, + "loss": 1.6847, + "step": 27036 + }, + { + "epoch": 0.33, + "grad_norm": 25.860283082418945, + "learning_rate": 1.909235946925602e-05, + "loss": 1.5712, + "step": 27039 + }, + { + "epoch": 0.33, + "grad_norm": 15.447763263730774, + "learning_rate": 1.909211627143828e-05, + "loss": 1.5046, + "step": 27042 + }, + { + "epoch": 0.33, + "grad_norm": 29.999292579360908, + "learning_rate": 1.9091873042592424e-05, + "loss": 1.4841, + "step": 27045 + }, + { + "epoch": 0.33, + "grad_norm": 9.17261791121094, + "learning_rate": 1.909162978271928e-05, + "loss": 1.8223, + "step": 27048 + }, + { + "epoch": 0.33, + "grad_norm": 32.445492930958245, + "learning_rate": 1.9091386491819676e-05, + "loss": 1.3713, + "step": 27051 + }, + { + "epoch": 0.33, + "grad_norm": 16.544984888517106, + "learning_rate": 1.9091143169894445e-05, + "loss": 1.6346, + "step": 27054 + }, + { + "epoch": 0.33, + "grad_norm": 9.221181233690542, + "learning_rate": 1.9090899816944417e-05, + "loss": 1.4413, + "step": 27057 + }, + { + "epoch": 0.33, + "grad_norm": 86.40652614061324, + "learning_rate": 1.9090656432970425e-05, + "loss": 1.7168, + "step": 27060 + }, + { + "epoch": 0.33, + "grad_norm": 13.26979261080167, + "learning_rate": 1.9090413017973292e-05, + "loss": 1.929, + "step": 27063 + }, + { + "epoch": 0.33, + "grad_norm": 124.4260775018903, + "learning_rate": 1.9090169571953862e-05, + "loss": 1.2517, + "step": 27066 + }, + { + "epoch": 0.33, + "grad_norm": 9.256350081780633, + "learning_rate": 1.9089926094912948e-05, + "loss": 1.5032, + "step": 27069 + }, + { + "epoch": 0.33, + "grad_norm": 29.47569079910444, + "learning_rate": 1.9089682586851397e-05, + "loss": 1.5351, + "step": 27072 + }, + { + "epoch": 0.33, + "grad_norm": 12.39668592153992, + "learning_rate": 1.908943904777003e-05, + "loss": 1.5113, + "step": 27075 + }, + { + "epoch": 0.33, + "grad_norm": 5.402822588714179, + "learning_rate": 1.9089195477669685e-05, + "loss": 1.0984, + "step": 27078 + }, + { + "epoch": 0.33, + "grad_norm": 21.27454196253026, + "learning_rate": 1.9088951876551184e-05, + "loss": 1.3546, + "step": 27081 + }, + { + "epoch": 0.33, + "grad_norm": 9.644239963076226, + "learning_rate": 1.908870824441537e-05, + "loss": 1.3389, + "step": 27084 + }, + { + "epoch": 0.33, + "grad_norm": 13.814766371970835, + "learning_rate": 1.9088464581263065e-05, + "loss": 1.4377, + "step": 27087 + }, + { + "epoch": 0.33, + "grad_norm": 21.371579984990724, + "learning_rate": 1.9088220887095105e-05, + "loss": 1.5378, + "step": 27090 + }, + { + "epoch": 0.33, + "grad_norm": 15.214955373009014, + "learning_rate": 1.908797716191232e-05, + "loss": 1.3012, + "step": 27093 + }, + { + "epoch": 0.33, + "grad_norm": 16.25958408108538, + "learning_rate": 1.9087733405715545e-05, + "loss": 1.5943, + "step": 27096 + }, + { + "epoch": 0.33, + "grad_norm": 7.357779270318221, + "learning_rate": 1.908748961850561e-05, + "loss": 1.4891, + "step": 27099 + }, + { + "epoch": 0.33, + "grad_norm": 15.183521227488534, + "learning_rate": 1.908724580028334e-05, + "loss": 1.575, + "step": 27102 + }, + { + "epoch": 0.33, + "grad_norm": 11.596348535260624, + "learning_rate": 1.9087001951049577e-05, + "loss": 1.2347, + "step": 27105 + }, + { + "epoch": 0.33, + "grad_norm": 4.608135522711739, + "learning_rate": 1.908675807080515e-05, + "loss": 1.3175, + "step": 27108 + }, + { + "epoch": 0.33, + "grad_norm": 20.263432386288255, + "learning_rate": 1.908651415955089e-05, + "loss": 1.8336, + "step": 27111 + }, + { + "epoch": 0.33, + "grad_norm": 119.86087169530417, + "learning_rate": 1.908627021728763e-05, + "loss": 1.4502, + "step": 27114 + }, + { + "epoch": 0.33, + "grad_norm": 6.546029487892818, + "learning_rate": 1.9086026244016204e-05, + "loss": 1.607, + "step": 27117 + }, + { + "epoch": 0.33, + "grad_norm": 40.582055527724016, + "learning_rate": 1.9085782239737443e-05, + "loss": 1.3596, + "step": 27120 + }, + { + "epoch": 0.33, + "grad_norm": 75.16103585919478, + "learning_rate": 1.9085538204452177e-05, + "loss": 1.5341, + "step": 27123 + }, + { + "epoch": 0.33, + "grad_norm": 43.19015185603338, + "learning_rate": 1.9085294138161247e-05, + "loss": 1.8, + "step": 27126 + }, + { + "epoch": 0.33, + "grad_norm": 9.268652023100948, + "learning_rate": 1.9085050040865475e-05, + "loss": 1.1794, + "step": 27129 + }, + { + "epoch": 0.33, + "grad_norm": 59.609234123175675, + "learning_rate": 1.9084805912565702e-05, + "loss": 1.4773, + "step": 27132 + }, + { + "epoch": 0.33, + "grad_norm": 116.94929570257536, + "learning_rate": 1.908456175326276e-05, + "loss": 1.5903, + "step": 27135 + }, + { + "epoch": 0.33, + "grad_norm": 5.182565305281589, + "learning_rate": 1.908431756295748e-05, + "loss": 1.2592, + "step": 27138 + }, + { + "epoch": 0.33, + "grad_norm": 15.753178880675799, + "learning_rate": 1.9084073341650694e-05, + "loss": 1.3976, + "step": 27141 + }, + { + "epoch": 0.33, + "grad_norm": 10.542581803553382, + "learning_rate": 1.908382908934324e-05, + "loss": 1.7191, + "step": 27144 + }, + { + "epoch": 0.33, + "grad_norm": 24.309060093487407, + "learning_rate": 1.908358480603595e-05, + "loss": 1.3349, + "step": 27147 + }, + { + "epoch": 0.33, + "grad_norm": 88.95895355524291, + "learning_rate": 1.9083340491729657e-05, + "loss": 1.7004, + "step": 27150 + }, + { + "epoch": 0.33, + "grad_norm": 6.181636025515109, + "learning_rate": 1.9083096146425192e-05, + "loss": 1.7205, + "step": 27153 + }, + { + "epoch": 0.33, + "grad_norm": 22.719937283052708, + "learning_rate": 1.9082851770123395e-05, + "loss": 1.8144, + "step": 27156 + }, + { + "epoch": 0.33, + "grad_norm": 10.041702322497908, + "learning_rate": 1.9082607362825094e-05, + "loss": 1.7215, + "step": 27159 + }, + { + "epoch": 0.33, + "grad_norm": 50.673365344333895, + "learning_rate": 1.9082362924531127e-05, + "loss": 1.372, + "step": 27162 + }, + { + "epoch": 0.33, + "grad_norm": 17.65812734575321, + "learning_rate": 1.9082118455242327e-05, + "loss": 1.2822, + "step": 27165 + }, + { + "epoch": 0.33, + "grad_norm": 66.64376267940557, + "learning_rate": 1.9081873954959528e-05, + "loss": 1.458, + "step": 27168 + }, + { + "epoch": 0.33, + "grad_norm": 36.60825239798355, + "learning_rate": 1.9081629423683562e-05, + "loss": 1.7814, + "step": 27171 + }, + { + "epoch": 0.33, + "grad_norm": 12.504392058017869, + "learning_rate": 1.9081384861415268e-05, + "loss": 1.526, + "step": 27174 + }, + { + "epoch": 0.33, + "grad_norm": 8.80029560857314, + "learning_rate": 1.9081140268155478e-05, + "loss": 1.517, + "step": 27177 + }, + { + "epoch": 0.33, + "grad_norm": 7.594888171780301, + "learning_rate": 1.908089564390503e-05, + "loss": 1.2379, + "step": 27180 + }, + { + "epoch": 0.33, + "grad_norm": 17.305288569792424, + "learning_rate": 1.9080650988664752e-05, + "loss": 1.5556, + "step": 27183 + }, + { + "epoch": 0.33, + "grad_norm": 34.75134521246394, + "learning_rate": 1.9080406302435483e-05, + "loss": 1.3101, + "step": 27186 + }, + { + "epoch": 0.33, + "grad_norm": 6.798134904214088, + "learning_rate": 1.908016158521806e-05, + "loss": 1.6032, + "step": 27189 + }, + { + "epoch": 0.33, + "grad_norm": 3.4369437346986356, + "learning_rate": 1.9079916837013317e-05, + "loss": 1.5117, + "step": 27192 + }, + { + "epoch": 0.33, + "grad_norm": 24.154151284528933, + "learning_rate": 1.9079672057822087e-05, + "loss": 1.402, + "step": 27195 + }, + { + "epoch": 0.33, + "grad_norm": 10.92005964114903, + "learning_rate": 1.907942724764521e-05, + "loss": 1.6639, + "step": 27198 + }, + { + "epoch": 0.33, + "grad_norm": 10.683577203573456, + "learning_rate": 1.907918240648351e-05, + "loss": 1.5982, + "step": 27201 + }, + { + "epoch": 0.33, + "grad_norm": 33.930086736766825, + "learning_rate": 1.907893753433784e-05, + "loss": 1.5973, + "step": 27204 + }, + { + "epoch": 0.33, + "grad_norm": 37.995930450762316, + "learning_rate": 1.9078692631209022e-05, + "loss": 2.2298, + "step": 27207 + }, + { + "epoch": 0.33, + "grad_norm": 14.14344391727214, + "learning_rate": 1.9078447697097896e-05, + "loss": 1.4423, + "step": 27210 + }, + { + "epoch": 0.33, + "grad_norm": 10.70028417424926, + "learning_rate": 1.90782027320053e-05, + "loss": 1.3929, + "step": 27213 + }, + { + "epoch": 0.33, + "grad_norm": 13.897964916588137, + "learning_rate": 1.9077957735932066e-05, + "loss": 1.255, + "step": 27216 + }, + { + "epoch": 0.33, + "grad_norm": 7.41573294205995, + "learning_rate": 1.9077712708879032e-05, + "loss": 1.2814, + "step": 27219 + }, + { + "epoch": 0.33, + "grad_norm": 10.156513548491096, + "learning_rate": 1.9077467650847033e-05, + "loss": 1.4284, + "step": 27222 + }, + { + "epoch": 0.33, + "grad_norm": 4.52753413496412, + "learning_rate": 1.907722256183691e-05, + "loss": 1.7901, + "step": 27225 + }, + { + "epoch": 0.33, + "grad_norm": 2.4348495648730175, + "learning_rate": 1.9076977441849496e-05, + "loss": 1.5095, + "step": 27228 + }, + { + "epoch": 0.33, + "grad_norm": 12.635642520689382, + "learning_rate": 1.9076732290885626e-05, + "loss": 1.5872, + "step": 27231 + }, + { + "epoch": 0.33, + "grad_norm": 10.225638374183852, + "learning_rate": 1.9076487108946137e-05, + "loss": 1.4828, + "step": 27234 + }, + { + "epoch": 0.33, + "grad_norm": 71.72548408193893, + "learning_rate": 1.907624189603187e-05, + "loss": 1.5389, + "step": 27237 + }, + { + "epoch": 0.33, + "grad_norm": 20.24780228743369, + "learning_rate": 1.9075996652143657e-05, + "loss": 1.4902, + "step": 27240 + }, + { + "epoch": 0.33, + "grad_norm": 14.231973847976793, + "learning_rate": 1.9075751377282336e-05, + "loss": 1.4175, + "step": 27243 + }, + { + "epoch": 0.33, + "grad_norm": 11.540090516923398, + "learning_rate": 1.9075506071448744e-05, + "loss": 1.473, + "step": 27246 + }, + { + "epoch": 0.33, + "grad_norm": 9.978287933436663, + "learning_rate": 1.907526073464372e-05, + "loss": 1.1664, + "step": 27249 + }, + { + "epoch": 0.33, + "grad_norm": 22.93959988750255, + "learning_rate": 1.90750153668681e-05, + "loss": 1.5572, + "step": 27252 + }, + { + "epoch": 0.33, + "grad_norm": 11.427339899812523, + "learning_rate": 1.9074769968122722e-05, + "loss": 1.4577, + "step": 27255 + }, + { + "epoch": 0.33, + "grad_norm": 53.56615020972811, + "learning_rate": 1.907452453840842e-05, + "loss": 1.6695, + "step": 27258 + }, + { + "epoch": 0.33, + "grad_norm": 9.013099280911124, + "learning_rate": 1.9074279077726038e-05, + "loss": 1.3927, + "step": 27261 + }, + { + "epoch": 0.33, + "grad_norm": 10.282315689644351, + "learning_rate": 1.907403358607641e-05, + "loss": 1.4443, + "step": 27264 + }, + { + "epoch": 0.33, + "grad_norm": 14.169829595286057, + "learning_rate": 1.907378806346037e-05, + "loss": 1.4621, + "step": 27267 + }, + { + "epoch": 0.33, + "grad_norm": 21.15782051086472, + "learning_rate": 1.907354250987876e-05, + "loss": 1.8358, + "step": 27270 + }, + { + "epoch": 0.33, + "grad_norm": 15.937907064614226, + "learning_rate": 1.9073296925332424e-05, + "loss": 1.5253, + "step": 27273 + }, + { + "epoch": 0.33, + "grad_norm": 12.909470026257326, + "learning_rate": 1.9073051309822187e-05, + "loss": 1.5648, + "step": 27276 + }, + { + "epoch": 0.33, + "grad_norm": 5.849422200381634, + "learning_rate": 1.9072805663348898e-05, + "loss": 1.5501, + "step": 27279 + }, + { + "epoch": 0.33, + "grad_norm": 22.603581881122143, + "learning_rate": 1.907255998591339e-05, + "loss": 1.5642, + "step": 27282 + }, + { + "epoch": 0.33, + "grad_norm": 15.085532609549334, + "learning_rate": 1.90723142775165e-05, + "loss": 1.738, + "step": 27285 + }, + { + "epoch": 0.33, + "grad_norm": 54.9653159262215, + "learning_rate": 1.9072068538159072e-05, + "loss": 1.6692, + "step": 27288 + }, + { + "epoch": 0.33, + "grad_norm": 10.984421035760516, + "learning_rate": 1.9071822767841942e-05, + "loss": 1.2719, + "step": 27291 + }, + { + "epoch": 0.33, + "grad_norm": 4.742771638793659, + "learning_rate": 1.907157696656595e-05, + "loss": 1.2074, + "step": 27294 + }, + { + "epoch": 0.33, + "grad_norm": 14.90048766486061, + "learning_rate": 1.9071331134331927e-05, + "loss": 1.7721, + "step": 27297 + }, + { + "epoch": 0.33, + "grad_norm": 32.2044658194185, + "learning_rate": 1.9071085271140725e-05, + "loss": 1.1715, + "step": 27300 + }, + { + "epoch": 0.33, + "grad_norm": 12.343641221906763, + "learning_rate": 1.9070839376993173e-05, + "loss": 1.6548, + "step": 27303 + }, + { + "epoch": 0.33, + "grad_norm": 7.130194058649753, + "learning_rate": 1.9070593451890116e-05, + "loss": 1.2225, + "step": 27306 + }, + { + "epoch": 0.33, + "grad_norm": 7.0961415029380195, + "learning_rate": 1.9070347495832387e-05, + "loss": 1.4224, + "step": 27309 + }, + { + "epoch": 0.33, + "grad_norm": 30.770546521925535, + "learning_rate": 1.907010150882083e-05, + "loss": 1.498, + "step": 27312 + }, + { + "epoch": 0.33, + "grad_norm": 10.521384125270528, + "learning_rate": 1.906985549085629e-05, + "loss": 1.4548, + "step": 27315 + }, + { + "epoch": 0.33, + "grad_norm": 21.42886547428663, + "learning_rate": 1.9069609441939595e-05, + "loss": 1.0719, + "step": 27318 + }, + { + "epoch": 0.33, + "grad_norm": 8.69131630453488, + "learning_rate": 1.906936336207159e-05, + "loss": 1.5666, + "step": 27321 + }, + { + "epoch": 0.33, + "grad_norm": 13.589784181796091, + "learning_rate": 1.906911725125311e-05, + "loss": 1.3599, + "step": 27324 + }, + { + "epoch": 0.33, + "grad_norm": 12.89374148169722, + "learning_rate": 1.9068871109485006e-05, + "loss": 1.4634, + "step": 27327 + }, + { + "epoch": 0.33, + "grad_norm": 15.237336392987132, + "learning_rate": 1.906862493676811e-05, + "loss": 1.6903, + "step": 27330 + }, + { + "epoch": 0.33, + "grad_norm": 26.422028936259768, + "learning_rate": 1.9068378733103264e-05, + "loss": 1.356, + "step": 27333 + }, + { + "epoch": 0.33, + "grad_norm": 8.503059191393987, + "learning_rate": 1.9068132498491306e-05, + "loss": 1.5251, + "step": 27336 + }, + { + "epoch": 0.33, + "grad_norm": 8.228840671902379, + "learning_rate": 1.906788623293308e-05, + "loss": 1.7607, + "step": 27339 + }, + { + "epoch": 0.33, + "grad_norm": 62.653158475926496, + "learning_rate": 1.906763993642942e-05, + "loss": 1.6926, + "step": 27342 + }, + { + "epoch": 0.33, + "grad_norm": 122.67140784337667, + "learning_rate": 1.906739360898118e-05, + "loss": 1.3417, + "step": 27345 + }, + { + "epoch": 0.33, + "grad_norm": 21.51944330369065, + "learning_rate": 1.9067147250589183e-05, + "loss": 1.4653, + "step": 27348 + }, + { + "epoch": 0.33, + "grad_norm": 15.733860810437275, + "learning_rate": 1.906690086125428e-05, + "loss": 1.4542, + "step": 27351 + }, + { + "epoch": 0.33, + "grad_norm": 18.20114856580734, + "learning_rate": 1.9066654440977315e-05, + "loss": 1.1142, + "step": 27354 + }, + { + "epoch": 0.33, + "grad_norm": 8.668866690677202, + "learning_rate": 1.906640798975912e-05, + "loss": 1.3713, + "step": 27357 + }, + { + "epoch": 0.33, + "grad_norm": 9.755218991788333, + "learning_rate": 1.906616150760054e-05, + "loss": 1.4064, + "step": 27360 + }, + { + "epoch": 0.33, + "grad_norm": 23.0901103782931, + "learning_rate": 1.9065914994502418e-05, + "loss": 1.3446, + "step": 27363 + }, + { + "epoch": 0.33, + "grad_norm": 7.193166860263231, + "learning_rate": 1.9065668450465594e-05, + "loss": 1.6121, + "step": 27366 + }, + { + "epoch": 0.33, + "grad_norm": 33.48705423946633, + "learning_rate": 1.906542187549091e-05, + "loss": 1.6794, + "step": 27369 + }, + { + "epoch": 0.33, + "grad_norm": 23.473646810932255, + "learning_rate": 1.9065175269579203e-05, + "loss": 1.7246, + "step": 27372 + }, + { + "epoch": 0.33, + "grad_norm": 9.00954956332858, + "learning_rate": 1.9064928632731322e-05, + "loss": 1.7253, + "step": 27375 + }, + { + "epoch": 0.33, + "grad_norm": 24.69618205141265, + "learning_rate": 1.90646819649481e-05, + "loss": 1.5098, + "step": 27378 + }, + { + "epoch": 0.33, + "grad_norm": 14.653587105402957, + "learning_rate": 1.906443526623039e-05, + "loss": 1.7563, + "step": 27381 + }, + { + "epoch": 0.33, + "grad_norm": 9.84917588534644, + "learning_rate": 1.906418853657902e-05, + "loss": 1.6888, + "step": 27384 + }, + { + "epoch": 0.33, + "grad_norm": 26.780624152703847, + "learning_rate": 1.9063941775994843e-05, + "loss": 1.7835, + "step": 27387 + }, + { + "epoch": 0.33, + "grad_norm": 40.99375944461114, + "learning_rate": 1.90636949844787e-05, + "loss": 1.3122, + "step": 27390 + }, + { + "epoch": 0.33, + "grad_norm": 55.84205064455776, + "learning_rate": 1.906344816203143e-05, + "loss": 1.3598, + "step": 27393 + }, + { + "epoch": 0.33, + "grad_norm": 8.99014816567146, + "learning_rate": 1.9063201308653873e-05, + "loss": 1.4777, + "step": 27396 + }, + { + "epoch": 0.33, + "grad_norm": 8.155327241385297, + "learning_rate": 1.9062954424346875e-05, + "loss": 1.5556, + "step": 27399 + }, + { + "epoch": 0.33, + "grad_norm": 12.833480649255128, + "learning_rate": 1.906270750911128e-05, + "loss": 1.4754, + "step": 27402 + }, + { + "epoch": 0.33, + "grad_norm": 8.664800887954334, + "learning_rate": 1.9062460562947925e-05, + "loss": 1.881, + "step": 27405 + }, + { + "epoch": 0.33, + "grad_norm": 14.953218910541125, + "learning_rate": 1.906221358585766e-05, + "loss": 1.3282, + "step": 27408 + }, + { + "epoch": 0.33, + "grad_norm": 42.16633329676658, + "learning_rate": 1.9061966577841325e-05, + "loss": 1.2747, + "step": 27411 + }, + { + "epoch": 0.33, + "grad_norm": 14.157733603683981, + "learning_rate": 1.906171953889976e-05, + "loss": 1.3244, + "step": 27414 + }, + { + "epoch": 0.33, + "grad_norm": 11.882721694655947, + "learning_rate": 1.906147246903381e-05, + "loss": 1.8555, + "step": 27417 + }, + { + "epoch": 0.33, + "grad_norm": 13.10960826509291, + "learning_rate": 1.906122536824432e-05, + "loss": 1.3477, + "step": 27420 + }, + { + "epoch": 0.33, + "grad_norm": 47.25278310938665, + "learning_rate": 1.9060978236532127e-05, + "loss": 1.6753, + "step": 27423 + }, + { + "epoch": 0.33, + "grad_norm": 13.544006235990832, + "learning_rate": 1.9060731073898085e-05, + "loss": 1.1961, + "step": 27426 + }, + { + "epoch": 0.33, + "grad_norm": 9.267133563838934, + "learning_rate": 1.9060483880343028e-05, + "loss": 1.4866, + "step": 27429 + }, + { + "epoch": 0.33, + "grad_norm": 10.036825354447117, + "learning_rate": 1.9060236655867805e-05, + "loss": 1.4323, + "step": 27432 + }, + { + "epoch": 0.33, + "grad_norm": 13.504534155963682, + "learning_rate": 1.9059989400473258e-05, + "loss": 1.6247, + "step": 27435 + }, + { + "epoch": 0.33, + "grad_norm": 9.823301068751949, + "learning_rate": 1.9059742114160232e-05, + "loss": 1.859, + "step": 27438 + }, + { + "epoch": 0.33, + "grad_norm": 17.53566732249506, + "learning_rate": 1.9059494796929566e-05, + "loss": 1.3658, + "step": 27441 + }, + { + "epoch": 0.33, + "grad_norm": 17.434677536329772, + "learning_rate": 1.905924744878211e-05, + "loss": 1.5127, + "step": 27444 + }, + { + "epoch": 0.33, + "grad_norm": 8.584748860835614, + "learning_rate": 1.9059000069718704e-05, + "loss": 1.3524, + "step": 27447 + }, + { + "epoch": 0.33, + "grad_norm": 16.546311280244968, + "learning_rate": 1.9058752659740193e-05, + "loss": 1.2476, + "step": 27450 + }, + { + "epoch": 0.33, + "grad_norm": 33.89462327979115, + "learning_rate": 1.9058505218847422e-05, + "loss": 1.3462, + "step": 27453 + }, + { + "epoch": 0.33, + "grad_norm": 17.726834779873432, + "learning_rate": 1.905825774704124e-05, + "loss": 1.5518, + "step": 27456 + }, + { + "epoch": 0.33, + "grad_norm": 56.31476002893023, + "learning_rate": 1.905801024432248e-05, + "loss": 1.6695, + "step": 27459 + }, + { + "epoch": 0.33, + "grad_norm": 6.388642674643404, + "learning_rate": 1.9057762710691995e-05, + "loss": 1.0932, + "step": 27462 + }, + { + "epoch": 0.33, + "grad_norm": 43.604895527101625, + "learning_rate": 1.9057515146150635e-05, + "loss": 1.329, + "step": 27465 + }, + { + "epoch": 0.33, + "grad_norm": 5.6003484072078225, + "learning_rate": 1.905726755069923e-05, + "loss": 1.6966, + "step": 27468 + }, + { + "epoch": 0.33, + "grad_norm": 20.274264621632064, + "learning_rate": 1.905701992433864e-05, + "loss": 1.6689, + "step": 27471 + }, + { + "epoch": 0.33, + "grad_norm": 4.7954638253404305, + "learning_rate": 1.9056772267069702e-05, + "loss": 1.9666, + "step": 27474 + }, + { + "epoch": 0.33, + "grad_norm": 5.663753752001313, + "learning_rate": 1.9056524578893262e-05, + "loss": 1.8448, + "step": 27477 + }, + { + "epoch": 0.33, + "grad_norm": 25.04263583502666, + "learning_rate": 1.905627685981016e-05, + "loss": 1.5734, + "step": 27480 + }, + { + "epoch": 0.33, + "grad_norm": 18.007074927158676, + "learning_rate": 1.9056029109821255e-05, + "loss": 1.3811, + "step": 27483 + }, + { + "epoch": 0.33, + "grad_norm": 23.02960674154869, + "learning_rate": 1.9055781328927382e-05, + "loss": 1.2005, + "step": 27486 + }, + { + "epoch": 0.33, + "grad_norm": 25.061603119686573, + "learning_rate": 1.9055533517129387e-05, + "loss": 1.3933, + "step": 27489 + }, + { + "epoch": 0.33, + "grad_norm": 5.876943499504853, + "learning_rate": 1.905528567442812e-05, + "loss": 1.3534, + "step": 27492 + }, + { + "epoch": 0.33, + "grad_norm": 8.123247504852126, + "learning_rate": 1.9055037800824428e-05, + "loss": 1.3035, + "step": 27495 + }, + { + "epoch": 0.33, + "grad_norm": 7.50021078187676, + "learning_rate": 1.9054789896319147e-05, + "loss": 1.5247, + "step": 27498 + }, + { + "epoch": 0.33, + "grad_norm": 20.562341271696422, + "learning_rate": 1.9054541960913133e-05, + "loss": 1.2737, + "step": 27501 + }, + { + "epoch": 0.33, + "grad_norm": 15.513657215565445, + "learning_rate": 1.9054293994607228e-05, + "loss": 1.782, + "step": 27504 + }, + { + "epoch": 0.33, + "grad_norm": 8.48038266638905, + "learning_rate": 1.9054045997402282e-05, + "loss": 1.5347, + "step": 27507 + }, + { + "epoch": 0.33, + "grad_norm": 8.932936006638373, + "learning_rate": 1.9053797969299133e-05, + "loss": 1.517, + "step": 27510 + }, + { + "epoch": 0.33, + "grad_norm": 8.705288825440753, + "learning_rate": 1.9053549910298638e-05, + "loss": 1.6362, + "step": 27513 + }, + { + "epoch": 0.33, + "grad_norm": 11.563816525422231, + "learning_rate": 1.9053301820401634e-05, + "loss": 1.5863, + "step": 27516 + }, + { + "epoch": 0.33, + "grad_norm": 9.909744232729423, + "learning_rate": 1.9053053699608974e-05, + "loss": 1.3424, + "step": 27519 + }, + { + "epoch": 0.33, + "grad_norm": 8.041177681275483, + "learning_rate": 1.9052805547921502e-05, + "loss": 1.5128, + "step": 27522 + }, + { + "epoch": 0.33, + "grad_norm": 7.030183504418145, + "learning_rate": 1.9052557365340066e-05, + "loss": 1.616, + "step": 27525 + }, + { + "epoch": 0.33, + "grad_norm": 19.140242413995622, + "learning_rate": 1.905230915186551e-05, + "loss": 1.4829, + "step": 27528 + }, + { + "epoch": 0.33, + "grad_norm": 5.231077350467798, + "learning_rate": 1.905206090749869e-05, + "loss": 1.441, + "step": 27531 + }, + { + "epoch": 0.33, + "grad_norm": 6.086561162815654, + "learning_rate": 1.9051812632240437e-05, + "loss": 1.2247, + "step": 27534 + }, + { + "epoch": 0.33, + "grad_norm": 7.101139531114063, + "learning_rate": 1.9051564326091615e-05, + "loss": 1.3504, + "step": 27537 + }, + { + "epoch": 0.33, + "grad_norm": 40.188679279956695, + "learning_rate": 1.9051315989053064e-05, + "loss": 1.4902, + "step": 27540 + }, + { + "epoch": 0.33, + "grad_norm": 8.070663161794526, + "learning_rate": 1.905106762112563e-05, + "loss": 1.5758, + "step": 27543 + }, + { + "epoch": 0.33, + "grad_norm": 15.405158708591784, + "learning_rate": 1.9050819222310163e-05, + "loss": 1.1993, + "step": 27546 + }, + { + "epoch": 0.33, + "grad_norm": 10.167561964911151, + "learning_rate": 1.9050570792607507e-05, + "loss": 1.677, + "step": 27549 + }, + { + "epoch": 0.33, + "grad_norm": 24.086497202418077, + "learning_rate": 1.9050322332018517e-05, + "loss": 1.1357, + "step": 27552 + }, + { + "epoch": 0.33, + "grad_norm": 35.150578321383165, + "learning_rate": 1.9050073840544033e-05, + "loss": 1.6383, + "step": 27555 + }, + { + "epoch": 0.33, + "grad_norm": 10.984896514759416, + "learning_rate": 1.904982531818491e-05, + "loss": 1.5358, + "step": 27558 + }, + { + "epoch": 0.33, + "grad_norm": 9.065105518165861, + "learning_rate": 1.9049576764941996e-05, + "loss": 1.5375, + "step": 27561 + }, + { + "epoch": 0.33, + "grad_norm": 74.60178249739148, + "learning_rate": 1.904932818081613e-05, + "loss": 1.4838, + "step": 27564 + }, + { + "epoch": 0.33, + "grad_norm": 34.5528788265581, + "learning_rate": 1.904907956580817e-05, + "loss": 1.4714, + "step": 27567 + }, + { + "epoch": 0.33, + "grad_norm": 17.053841491204402, + "learning_rate": 1.904883091991896e-05, + "loss": 1.7942, + "step": 27570 + }, + { + "epoch": 0.33, + "grad_norm": 78.81550549840883, + "learning_rate": 1.9048582243149346e-05, + "loss": 1.5249, + "step": 27573 + }, + { + "epoch": 0.33, + "grad_norm": 5.424070525654807, + "learning_rate": 1.9048333535500184e-05, + "loss": 1.5271, + "step": 27576 + }, + { + "epoch": 0.33, + "grad_norm": 26.81870708383532, + "learning_rate": 1.904808479697232e-05, + "loss": 1.3687, + "step": 27579 + }, + { + "epoch": 0.33, + "grad_norm": 21.124644942010327, + "learning_rate": 1.90478360275666e-05, + "loss": 1.189, + "step": 27582 + }, + { + "epoch": 0.33, + "grad_norm": 5.550082390327636, + "learning_rate": 1.904758722728387e-05, + "loss": 1.7824, + "step": 27585 + }, + { + "epoch": 0.33, + "grad_norm": 29.927271069407624, + "learning_rate": 1.904733839612499e-05, + "loss": 1.6868, + "step": 27588 + }, + { + "epoch": 0.33, + "grad_norm": 20.505781340565072, + "learning_rate": 1.90470895340908e-05, + "loss": 1.4326, + "step": 27591 + }, + { + "epoch": 0.33, + "grad_norm": 33.657533183625, + "learning_rate": 1.9046840641182153e-05, + "loss": 1.5683, + "step": 27594 + }, + { + "epoch": 0.33, + "grad_norm": 14.454963245189496, + "learning_rate": 1.9046591717399898e-05, + "loss": 1.8794, + "step": 27597 + }, + { + "epoch": 0.33, + "grad_norm": 11.428259721144862, + "learning_rate": 1.9046342762744884e-05, + "loss": 1.2522, + "step": 27600 + }, + { + "epoch": 0.33, + "grad_norm": 33.02825571639081, + "learning_rate": 1.904609377721796e-05, + "loss": 1.3403, + "step": 27603 + }, + { + "epoch": 0.33, + "grad_norm": 7.848644362361358, + "learning_rate": 1.904584476081998e-05, + "loss": 1.381, + "step": 27606 + }, + { + "epoch": 0.33, + "grad_norm": 13.448201231783322, + "learning_rate": 1.9045595713551782e-05, + "loss": 1.5749, + "step": 27609 + }, + { + "epoch": 0.33, + "grad_norm": 17.281800641542205, + "learning_rate": 1.904534663541423e-05, + "loss": 1.5116, + "step": 27612 + }, + { + "epoch": 0.33, + "grad_norm": 29.64525750911007, + "learning_rate": 1.9045097526408168e-05, + "loss": 1.545, + "step": 27615 + }, + { + "epoch": 0.33, + "grad_norm": 8.17100889939841, + "learning_rate": 1.9044848386534445e-05, + "loss": 1.1253, + "step": 27618 + }, + { + "epoch": 0.33, + "grad_norm": 8.743269111653035, + "learning_rate": 1.9044599215793913e-05, + "loss": 1.7716, + "step": 27621 + }, + { + "epoch": 0.33, + "grad_norm": 6.415475214079144, + "learning_rate": 1.904435001418742e-05, + "loss": 1.8582, + "step": 27624 + }, + { + "epoch": 0.33, + "grad_norm": 29.61127184861427, + "learning_rate": 1.904410078171582e-05, + "loss": 1.6006, + "step": 27627 + }, + { + "epoch": 0.33, + "grad_norm": 58.62108022928497, + "learning_rate": 1.904385151837996e-05, + "loss": 1.8875, + "step": 27630 + }, + { + "epoch": 0.33, + "grad_norm": 22.063206264526947, + "learning_rate": 1.9043602224180694e-05, + "loss": 1.4702, + "step": 27633 + }, + { + "epoch": 0.33, + "grad_norm": 9.986061016026024, + "learning_rate": 1.904335289911887e-05, + "loss": 1.4646, + "step": 27636 + }, + { + "epoch": 0.33, + "grad_norm": 15.613707483348245, + "learning_rate": 1.904310354319534e-05, + "loss": 1.3545, + "step": 27639 + }, + { + "epoch": 0.33, + "grad_norm": 15.677585402357309, + "learning_rate": 1.9042854156410957e-05, + "loss": 1.3725, + "step": 27642 + }, + { + "epoch": 0.33, + "grad_norm": 6.808175912763877, + "learning_rate": 1.9042604738766565e-05, + "loss": 1.5331, + "step": 27645 + }, + { + "epoch": 0.33, + "grad_norm": 36.90157947567368, + "learning_rate": 1.9042355290263025e-05, + "loss": 1.7898, + "step": 27648 + }, + { + "epoch": 0.33, + "grad_norm": 149.70871578489604, + "learning_rate": 1.9042105810901184e-05, + "loss": 1.3985, + "step": 27651 + }, + { + "epoch": 0.33, + "grad_norm": 3.460143383127005, + "learning_rate": 1.9041856300681888e-05, + "loss": 1.8318, + "step": 27654 + }, + { + "epoch": 0.33, + "grad_norm": 4.048935429538689, + "learning_rate": 1.9041606759605998e-05, + "loss": 1.6683, + "step": 27657 + }, + { + "epoch": 0.33, + "grad_norm": 77.07323531088934, + "learning_rate": 1.904135718767436e-05, + "loss": 1.266, + "step": 27660 + }, + { + "epoch": 0.33, + "grad_norm": 9.611500719973899, + "learning_rate": 1.9041107584887823e-05, + "loss": 1.2265, + "step": 27663 + }, + { + "epoch": 0.33, + "grad_norm": 7.413967691398366, + "learning_rate": 1.9040857951247248e-05, + "loss": 1.3365, + "step": 27666 + }, + { + "epoch": 0.33, + "grad_norm": 17.98053456188334, + "learning_rate": 1.9040608286753474e-05, + "loss": 1.3686, + "step": 27669 + }, + { + "epoch": 0.33, + "grad_norm": 13.976324825600674, + "learning_rate": 1.9040358591407365e-05, + "loss": 1.4774, + "step": 27672 + }, + { + "epoch": 0.33, + "grad_norm": 25.258355402087336, + "learning_rate": 1.904010886520977e-05, + "loss": 1.2782, + "step": 27675 + }, + { + "epoch": 0.33, + "grad_norm": 37.29830143404509, + "learning_rate": 1.903985910816154e-05, + "loss": 1.6982, + "step": 27678 + }, + { + "epoch": 0.33, + "grad_norm": 58.02165251276315, + "learning_rate": 1.903960932026352e-05, + "loss": 1.3979, + "step": 27681 + }, + { + "epoch": 0.33, + "grad_norm": 28.225250569986617, + "learning_rate": 1.9039359501516577e-05, + "loss": 1.6999, + "step": 27684 + }, + { + "epoch": 0.33, + "grad_norm": 23.17069176901404, + "learning_rate": 1.9039109651921554e-05, + "loss": 1.5609, + "step": 27687 + }, + { + "epoch": 0.33, + "grad_norm": 18.143122299444457, + "learning_rate": 1.9038859771479302e-05, + "loss": 1.524, + "step": 27690 + }, + { + "epoch": 0.33, + "grad_norm": 36.15117939382442, + "learning_rate": 1.903860986019068e-05, + "loss": 1.4636, + "step": 27693 + }, + { + "epoch": 0.33, + "grad_norm": 14.486876842510254, + "learning_rate": 1.903835991805654e-05, + "loss": 1.4322, + "step": 27696 + }, + { + "epoch": 0.33, + "grad_norm": 14.443787676268116, + "learning_rate": 1.903810994507773e-05, + "loss": 1.1019, + "step": 27699 + }, + { + "epoch": 0.33, + "grad_norm": 15.531846657579866, + "learning_rate": 1.903785994125511e-05, + "loss": 1.9981, + "step": 27702 + }, + { + "epoch": 0.33, + "grad_norm": 3.0991378144072983, + "learning_rate": 1.9037609906589525e-05, + "loss": 1.2262, + "step": 27705 + }, + { + "epoch": 0.33, + "grad_norm": 5.382515991880708, + "learning_rate": 1.9037359841081836e-05, + "loss": 1.2948, + "step": 27708 + }, + { + "epoch": 0.33, + "grad_norm": 43.95669494580455, + "learning_rate": 1.903710974473289e-05, + "loss": 1.3778, + "step": 27711 + }, + { + "epoch": 0.33, + "grad_norm": 17.066847128127403, + "learning_rate": 1.9036859617543548e-05, + "loss": 1.4338, + "step": 27714 + }, + { + "epoch": 0.33, + "grad_norm": 35.344662145748536, + "learning_rate": 1.9036609459514657e-05, + "loss": 1.5378, + "step": 27717 + }, + { + "epoch": 0.33, + "grad_norm": 10.482394131771812, + "learning_rate": 1.9036359270647075e-05, + "loss": 1.2569, + "step": 27720 + }, + { + "epoch": 0.33, + "grad_norm": 12.819563375484062, + "learning_rate": 1.903610905094165e-05, + "loss": 1.4663, + "step": 27723 + }, + { + "epoch": 0.33, + "grad_norm": 19.313304203487874, + "learning_rate": 1.903585880039924e-05, + "loss": 1.4949, + "step": 27726 + }, + { + "epoch": 0.33, + "grad_norm": 29.926903277878946, + "learning_rate": 1.9035608519020702e-05, + "loss": 1.5044, + "step": 27729 + }, + { + "epoch": 0.33, + "grad_norm": 14.660815019424925, + "learning_rate": 1.903535820680688e-05, + "loss": 1.7525, + "step": 27732 + }, + { + "epoch": 0.33, + "grad_norm": 9.5512628555773, + "learning_rate": 1.9035107863758642e-05, + "loss": 1.33, + "step": 27735 + }, + { + "epoch": 0.33, + "grad_norm": 8.829508396684112, + "learning_rate": 1.903485748987683e-05, + "loss": 1.3206, + "step": 27738 + }, + { + "epoch": 0.33, + "grad_norm": 9.961085303315942, + "learning_rate": 1.903460708516231e-05, + "loss": 1.4829, + "step": 27741 + }, + { + "epoch": 0.33, + "grad_norm": 18.06777063921463, + "learning_rate": 1.9034356649615925e-05, + "loss": 1.2865, + "step": 27744 + }, + { + "epoch": 0.33, + "grad_norm": 6.552043974992578, + "learning_rate": 1.9034106183238537e-05, + "loss": 1.7321, + "step": 27747 + }, + { + "epoch": 0.33, + "grad_norm": 16.826033621460983, + "learning_rate": 1.9033855686031e-05, + "loss": 1.3684, + "step": 27750 + }, + { + "epoch": 0.33, + "grad_norm": 27.582054080588275, + "learning_rate": 1.9033605157994163e-05, + "loss": 1.8092, + "step": 27753 + }, + { + "epoch": 0.33, + "grad_norm": 10.575955919965248, + "learning_rate": 1.903335459912889e-05, + "loss": 1.3157, + "step": 27756 + }, + { + "epoch": 0.33, + "grad_norm": 13.26562435890441, + "learning_rate": 1.903310400943603e-05, + "loss": 1.5269, + "step": 27759 + }, + { + "epoch": 0.33, + "grad_norm": 16.02835526785616, + "learning_rate": 1.9032853388916438e-05, + "loss": 1.3013, + "step": 27762 + }, + { + "epoch": 0.33, + "grad_norm": 20.38937842513182, + "learning_rate": 1.9032602737570975e-05, + "loss": 1.3056, + "step": 27765 + }, + { + "epoch": 0.33, + "grad_norm": 14.580342714149955, + "learning_rate": 1.9032352055400488e-05, + "loss": 1.4054, + "step": 27768 + }, + { + "epoch": 0.33, + "grad_norm": 21.714270583640513, + "learning_rate": 1.903210134240584e-05, + "loss": 1.4579, + "step": 27771 + }, + { + "epoch": 0.33, + "grad_norm": 53.37591496790569, + "learning_rate": 1.9031850598587884e-05, + "loss": 2.0496, + "step": 27774 + }, + { + "epoch": 0.33, + "grad_norm": 11.42663587241029, + "learning_rate": 1.903159982394747e-05, + "loss": 1.6364, + "step": 27777 + }, + { + "epoch": 0.33, + "grad_norm": 9.267840415509097, + "learning_rate": 1.9031349018485463e-05, + "loss": 1.2734, + "step": 27780 + }, + { + "epoch": 0.33, + "grad_norm": 64.6472586465016, + "learning_rate": 1.9031098182202715e-05, + "loss": 1.6204, + "step": 27783 + }, + { + "epoch": 0.33, + "grad_norm": 6.1073960481856915, + "learning_rate": 1.903084731510008e-05, + "loss": 1.4924, + "step": 27786 + }, + { + "epoch": 0.33, + "grad_norm": 38.84275231028848, + "learning_rate": 1.9030596417178417e-05, + "loss": 1.2767, + "step": 27789 + }, + { + "epoch": 0.33, + "grad_norm": 21.728455649138237, + "learning_rate": 1.903034548843858e-05, + "loss": 1.7997, + "step": 27792 + }, + { + "epoch": 0.33, + "grad_norm": 14.756443732236828, + "learning_rate": 1.9030094528881432e-05, + "loss": 1.6128, + "step": 27795 + }, + { + "epoch": 0.33, + "grad_norm": 27.512641178551327, + "learning_rate": 1.9029843538507816e-05, + "loss": 1.623, + "step": 27798 + }, + { + "epoch": 0.33, + "grad_norm": 36.049444524632065, + "learning_rate": 1.9029592517318603e-05, + "loss": 1.2633, + "step": 27801 + }, + { + "epoch": 0.33, + "grad_norm": 10.906056242308187, + "learning_rate": 1.9029341465314642e-05, + "loss": 1.3136, + "step": 27804 + }, + { + "epoch": 0.33, + "grad_norm": 11.509352236487267, + "learning_rate": 1.902909038249679e-05, + "loss": 1.8154, + "step": 27807 + }, + { + "epoch": 0.33, + "grad_norm": 3.0658692040084152, + "learning_rate": 1.9028839268865903e-05, + "loss": 1.2416, + "step": 27810 + }, + { + "epoch": 0.33, + "grad_norm": 12.075441022132543, + "learning_rate": 1.902858812442284e-05, + "loss": 1.7521, + "step": 27813 + }, + { + "epoch": 0.33, + "grad_norm": 51.27862429822862, + "learning_rate": 1.902833694916846e-05, + "loss": 1.7048, + "step": 27816 + }, + { + "epoch": 0.33, + "grad_norm": 155.45502943082096, + "learning_rate": 1.9028085743103618e-05, + "loss": 1.7692, + "step": 27819 + }, + { + "epoch": 0.33, + "grad_norm": 4.603122543586905, + "learning_rate": 1.9027834506229167e-05, + "loss": 1.3022, + "step": 27822 + }, + { + "epoch": 0.33, + "grad_norm": 11.030538118722477, + "learning_rate": 1.9027583238545973e-05, + "loss": 1.7177, + "step": 27825 + }, + { + "epoch": 0.33, + "grad_norm": 18.881579634867688, + "learning_rate": 1.902733194005489e-05, + "loss": 1.4267, + "step": 27828 + }, + { + "epoch": 0.33, + "grad_norm": 92.94040578105346, + "learning_rate": 1.902708061075677e-05, + "loss": 1.4818, + "step": 27831 + }, + { + "epoch": 0.33, + "grad_norm": 36.57203131993711, + "learning_rate": 1.902682925065248e-05, + "loss": 1.4534, + "step": 27834 + }, + { + "epoch": 0.33, + "grad_norm": 6.959774639548991, + "learning_rate": 1.9026577859742872e-05, + "loss": 1.4706, + "step": 27837 + }, + { + "epoch": 0.33, + "grad_norm": 10.071838084728759, + "learning_rate": 1.9026326438028803e-05, + "loss": 1.2933, + "step": 27840 + }, + { + "epoch": 0.33, + "grad_norm": 11.843008874993325, + "learning_rate": 1.9026074985511134e-05, + "loss": 1.5514, + "step": 27843 + }, + { + "epoch": 0.33, + "grad_norm": 17.10832653412103, + "learning_rate": 1.9025823502190723e-05, + "loss": 1.3138, + "step": 27846 + }, + { + "epoch": 0.33, + "grad_norm": 34.72110686790882, + "learning_rate": 1.902557198806843e-05, + "loss": 1.6054, + "step": 27849 + }, + { + "epoch": 0.33, + "grad_norm": 23.314969719089348, + "learning_rate": 1.9025320443145107e-05, + "loss": 1.9189, + "step": 27852 + }, + { + "epoch": 0.33, + "grad_norm": 3.048056958204427, + "learning_rate": 1.9025068867421617e-05, + "loss": 1.438, + "step": 27855 + }, + { + "epoch": 0.33, + "grad_norm": 6.216606693954811, + "learning_rate": 1.9024817260898817e-05, + "loss": 1.2678, + "step": 27858 + }, + { + "epoch": 0.34, + "grad_norm": 59.09666797596424, + "learning_rate": 1.902456562357757e-05, + "loss": 1.5029, + "step": 27861 + }, + { + "epoch": 0.34, + "grad_norm": 5.131513983416461, + "learning_rate": 1.9024313955458727e-05, + "loss": 1.5622, + "step": 27864 + }, + { + "epoch": 0.34, + "grad_norm": 74.07785219740579, + "learning_rate": 1.9024062256543155e-05, + "loss": 1.7821, + "step": 27867 + }, + { + "epoch": 0.34, + "grad_norm": 28.82148733064216, + "learning_rate": 1.9023810526831704e-05, + "loss": 1.6666, + "step": 27870 + }, + { + "epoch": 0.34, + "grad_norm": 9.24800427168827, + "learning_rate": 1.902355876632524e-05, + "loss": 1.5019, + "step": 27873 + }, + { + "epoch": 0.34, + "grad_norm": 8.901361376186141, + "learning_rate": 1.902330697502462e-05, + "loss": 1.5332, + "step": 27876 + }, + { + "epoch": 0.34, + "grad_norm": 34.823559690437726, + "learning_rate": 1.902305515293071e-05, + "loss": 1.3935, + "step": 27879 + }, + { + "epoch": 0.34, + "grad_norm": 15.473320662421393, + "learning_rate": 1.9022803300044354e-05, + "loss": 1.2006, + "step": 27882 + }, + { + "epoch": 0.34, + "grad_norm": 20.23892111916226, + "learning_rate": 1.9022551416366424e-05, + "loss": 1.8498, + "step": 27885 + }, + { + "epoch": 0.34, + "grad_norm": 49.87773915730408, + "learning_rate": 1.9022299501897775e-05, + "loss": 1.5861, + "step": 27888 + }, + { + "epoch": 0.34, + "grad_norm": 21.776529772989466, + "learning_rate": 1.902204755663927e-05, + "loss": 1.5505, + "step": 27891 + }, + { + "epoch": 0.34, + "grad_norm": 32.08987221729613, + "learning_rate": 1.9021795580591762e-05, + "loss": 1.5673, + "step": 27894 + }, + { + "epoch": 0.34, + "grad_norm": 34.26029230901223, + "learning_rate": 1.902154357375612e-05, + "loss": 1.5947, + "step": 27897 + }, + { + "epoch": 0.34, + "grad_norm": 111.38160251392743, + "learning_rate": 1.9021291536133196e-05, + "loss": 1.3758, + "step": 27900 + }, + { + "epoch": 0.34, + "grad_norm": 10.554941606410166, + "learning_rate": 1.9021039467723855e-05, + "loss": 1.7473, + "step": 27903 + }, + { + "epoch": 0.34, + "grad_norm": 5.8472574552561065, + "learning_rate": 1.9020787368528955e-05, + "loss": 1.669, + "step": 27906 + }, + { + "epoch": 0.34, + "grad_norm": 5.959870241324168, + "learning_rate": 1.9020535238549355e-05, + "loss": 1.0812, + "step": 27909 + }, + { + "epoch": 0.34, + "grad_norm": 15.257727666793478, + "learning_rate": 1.902028307778592e-05, + "loss": 1.7523, + "step": 27912 + }, + { + "epoch": 0.34, + "grad_norm": 8.963497381613722, + "learning_rate": 1.902003088623951e-05, + "loss": 1.4787, + "step": 27915 + }, + { + "epoch": 0.34, + "grad_norm": 18.12851933872799, + "learning_rate": 1.901977866391098e-05, + "loss": 1.4166, + "step": 27918 + }, + { + "epoch": 0.34, + "grad_norm": 3.1362777703193054, + "learning_rate": 1.9019526410801195e-05, + "loss": 1.3881, + "step": 27921 + }, + { + "epoch": 0.34, + "grad_norm": 8.79162618307272, + "learning_rate": 1.9019274126911016e-05, + "loss": 1.3337, + "step": 27924 + }, + { + "epoch": 0.34, + "grad_norm": 23.544061481512408, + "learning_rate": 1.9019021812241302e-05, + "loss": 2.0185, + "step": 27927 + }, + { + "epoch": 0.34, + "grad_norm": 21.405819505933543, + "learning_rate": 1.901876946679292e-05, + "loss": 1.4818, + "step": 27930 + }, + { + "epoch": 0.34, + "grad_norm": 10.010971434716142, + "learning_rate": 1.901851709056672e-05, + "loss": 1.2655, + "step": 27933 + }, + { + "epoch": 0.34, + "grad_norm": 15.19106974158223, + "learning_rate": 1.9018264683563573e-05, + "loss": 1.5014, + "step": 27936 + }, + { + "epoch": 0.34, + "grad_norm": 8.27091689534654, + "learning_rate": 1.9018012245784336e-05, + "loss": 1.5054, + "step": 27939 + }, + { + "epoch": 0.34, + "grad_norm": 46.971236723132485, + "learning_rate": 1.901775977722987e-05, + "loss": 1.3657, + "step": 27942 + }, + { + "epoch": 0.34, + "grad_norm": 44.326488173765085, + "learning_rate": 1.9017507277901042e-05, + "loss": 1.3473, + "step": 27945 + }, + { + "epoch": 0.34, + "grad_norm": 24.363911510753933, + "learning_rate": 1.9017254747798707e-05, + "loss": 1.4902, + "step": 27948 + }, + { + "epoch": 0.34, + "grad_norm": 27.591840322007457, + "learning_rate": 1.901700218692373e-05, + "loss": 1.8232, + "step": 27951 + }, + { + "epoch": 0.34, + "grad_norm": 13.196675884125014, + "learning_rate": 1.9016749595276972e-05, + "loss": 1.6951, + "step": 27954 + }, + { + "epoch": 0.34, + "grad_norm": 20.85868358742936, + "learning_rate": 1.9016496972859294e-05, + "loss": 1.7106, + "step": 27957 + }, + { + "epoch": 0.34, + "grad_norm": 18.358243851111627, + "learning_rate": 1.9016244319671562e-05, + "loss": 1.484, + "step": 27960 + }, + { + "epoch": 0.34, + "grad_norm": 15.355113843415392, + "learning_rate": 1.9015991635714633e-05, + "loss": 2.0828, + "step": 27963 + }, + { + "epoch": 0.34, + "grad_norm": 18.55294524568292, + "learning_rate": 1.9015738920989375e-05, + "loss": 1.7155, + "step": 27966 + }, + { + "epoch": 0.34, + "grad_norm": 37.499529230508074, + "learning_rate": 1.9015486175496647e-05, + "loss": 1.7984, + "step": 27969 + }, + { + "epoch": 0.34, + "grad_norm": 35.232359582319994, + "learning_rate": 1.9015233399237312e-05, + "loss": 1.4973, + "step": 27972 + }, + { + "epoch": 0.34, + "grad_norm": 5.147323384309238, + "learning_rate": 1.901498059221223e-05, + "loss": 1.4599, + "step": 27975 + }, + { + "epoch": 0.34, + "grad_norm": 10.870713036078012, + "learning_rate": 1.901472775442227e-05, + "loss": 1.6049, + "step": 27978 + }, + { + "epoch": 0.34, + "grad_norm": 12.350058914931603, + "learning_rate": 1.901447488586829e-05, + "loss": 1.2462, + "step": 27981 + }, + { + "epoch": 0.34, + "grad_norm": 9.927431973220829, + "learning_rate": 1.901422198655116e-05, + "loss": 1.5628, + "step": 27984 + }, + { + "epoch": 0.34, + "grad_norm": 13.345650886297548, + "learning_rate": 1.9013969056471727e-05, + "loss": 1.6586, + "step": 27987 + }, + { + "epoch": 0.34, + "grad_norm": 5.016461684159787, + "learning_rate": 1.901371609563087e-05, + "loss": 1.3149, + "step": 27990 + }, + { + "epoch": 0.34, + "grad_norm": 60.92086317155658, + "learning_rate": 1.9013463104029445e-05, + "loss": 1.4318, + "step": 27993 + }, + { + "epoch": 0.34, + "grad_norm": 11.13198272854075, + "learning_rate": 1.9013210081668318e-05, + "loss": 1.618, + "step": 27996 + }, + { + "epoch": 0.34, + "grad_norm": 5.886557062269175, + "learning_rate": 1.9012957028548352e-05, + "loss": 1.5817, + "step": 27999 + }, + { + "epoch": 0.34, + "grad_norm": 13.475344463853835, + "learning_rate": 1.9012703944670408e-05, + "loss": 1.4426, + "step": 28002 + }, + { + "epoch": 0.34, + "grad_norm": 3.2446904018191116, + "learning_rate": 1.9012450830035355e-05, + "loss": 1.7175, + "step": 28005 + }, + { + "epoch": 0.34, + "grad_norm": 18.076930001837887, + "learning_rate": 1.9012197684644052e-05, + "loss": 1.1613, + "step": 28008 + }, + { + "epoch": 0.34, + "grad_norm": 37.09155494733658, + "learning_rate": 1.9011944508497362e-05, + "loss": 1.5717, + "step": 28011 + }, + { + "epoch": 0.34, + "grad_norm": 12.965932877129578, + "learning_rate": 1.9011691301596155e-05, + "loss": 1.3275, + "step": 28014 + }, + { + "epoch": 0.34, + "grad_norm": 5.1176334339227285, + "learning_rate": 1.9011438063941294e-05, + "loss": 1.7321, + "step": 28017 + }, + { + "epoch": 0.34, + "grad_norm": 15.80213082535225, + "learning_rate": 1.9011184795533633e-05, + "loss": 1.6646, + "step": 28020 + }, + { + "epoch": 0.34, + "grad_norm": 30.745363079002427, + "learning_rate": 1.901093149637405e-05, + "loss": 1.5937, + "step": 28023 + }, + { + "epoch": 0.34, + "grad_norm": 12.038877644127492, + "learning_rate": 1.90106781664634e-05, + "loss": 1.4361, + "step": 28026 + }, + { + "epoch": 0.34, + "grad_norm": 22.1891161387202, + "learning_rate": 1.901042480580255e-05, + "loss": 1.9352, + "step": 28029 + }, + { + "epoch": 0.34, + "grad_norm": 12.250402033996592, + "learning_rate": 1.9010171414392374e-05, + "loss": 1.6764, + "step": 28032 + }, + { + "epoch": 0.34, + "grad_norm": 3.7498653064042795, + "learning_rate": 1.9009917992233723e-05, + "loss": 1.4148, + "step": 28035 + }, + { + "epoch": 0.34, + "grad_norm": 27.2828832574797, + "learning_rate": 1.9009664539327467e-05, + "loss": 1.4998, + "step": 28038 + }, + { + "epoch": 0.34, + "grad_norm": 23.101803793126457, + "learning_rate": 1.900941105567447e-05, + "loss": 1.6025, + "step": 28041 + }, + { + "epoch": 0.34, + "grad_norm": 21.514056054214322, + "learning_rate": 1.9009157541275602e-05, + "loss": 1.3598, + "step": 28044 + }, + { + "epoch": 0.34, + "grad_norm": 12.26653475242805, + "learning_rate": 1.900890399613172e-05, + "loss": 1.4203, + "step": 28047 + }, + { + "epoch": 0.34, + "grad_norm": 15.849134977880377, + "learning_rate": 1.9008650420243694e-05, + "loss": 1.4723, + "step": 28050 + }, + { + "epoch": 0.34, + "grad_norm": 59.382706717059435, + "learning_rate": 1.900839681361239e-05, + "loss": 1.4572, + "step": 28053 + }, + { + "epoch": 0.34, + "grad_norm": 12.786504536147579, + "learning_rate": 1.9008143176238675e-05, + "loss": 1.8142, + "step": 28056 + }, + { + "epoch": 0.34, + "grad_norm": 72.57541977839757, + "learning_rate": 1.9007889508123408e-05, + "loss": 1.4974, + "step": 28059 + }, + { + "epoch": 0.34, + "grad_norm": 10.807826561821372, + "learning_rate": 1.9007635809267465e-05, + "loss": 1.4196, + "step": 28062 + }, + { + "epoch": 0.34, + "grad_norm": 5.546574364823208, + "learning_rate": 1.90073820796717e-05, + "loss": 1.7986, + "step": 28065 + }, + { + "epoch": 0.34, + "grad_norm": 14.715232437026282, + "learning_rate": 1.9007128319336984e-05, + "loss": 1.7931, + "step": 28068 + }, + { + "epoch": 0.34, + "grad_norm": 22.913029634977136, + "learning_rate": 1.9006874528264188e-05, + "loss": 1.5592, + "step": 28071 + }, + { + "epoch": 0.34, + "grad_norm": 11.531838146031413, + "learning_rate": 1.900662070645417e-05, + "loss": 1.3268, + "step": 28074 + }, + { + "epoch": 0.34, + "grad_norm": 12.574408752265377, + "learning_rate": 1.90063668539078e-05, + "loss": 1.4884, + "step": 28077 + }, + { + "epoch": 0.34, + "grad_norm": 5.9475664841607365, + "learning_rate": 1.9006112970625944e-05, + "loss": 1.7392, + "step": 28080 + }, + { + "epoch": 0.34, + "grad_norm": 16.255314474134977, + "learning_rate": 1.9005859056609467e-05, + "loss": 1.2054, + "step": 28083 + }, + { + "epoch": 0.34, + "grad_norm": 15.085219278577327, + "learning_rate": 1.900560511185924e-05, + "loss": 1.5137, + "step": 28086 + }, + { + "epoch": 0.34, + "grad_norm": 7.738051468866856, + "learning_rate": 1.900535113637613e-05, + "loss": 1.6583, + "step": 28089 + }, + { + "epoch": 0.34, + "grad_norm": 45.75370769257559, + "learning_rate": 1.9005097130160992e-05, + "loss": 1.6413, + "step": 28092 + }, + { + "epoch": 0.34, + "grad_norm": 9.674102925165313, + "learning_rate": 1.9004843093214704e-05, + "loss": 1.3268, + "step": 28095 + }, + { + "epoch": 0.34, + "grad_norm": 9.36300357455063, + "learning_rate": 1.9004589025538132e-05, + "loss": 1.6811, + "step": 28098 + }, + { + "epoch": 0.34, + "grad_norm": 7.136346013640787, + "learning_rate": 1.900433492713214e-05, + "loss": 1.5291, + "step": 28101 + }, + { + "epoch": 0.34, + "grad_norm": 20.380404642426093, + "learning_rate": 1.9004080797997595e-05, + "loss": 1.5002, + "step": 28104 + }, + { + "epoch": 0.34, + "grad_norm": 11.044268464588825, + "learning_rate": 1.9003826638135368e-05, + "loss": 1.6603, + "step": 28107 + }, + { + "epoch": 0.34, + "grad_norm": 14.52209357886805, + "learning_rate": 1.9003572447546323e-05, + "loss": 1.6608, + "step": 28110 + }, + { + "epoch": 0.34, + "grad_norm": 8.353743263970369, + "learning_rate": 1.9003318226231327e-05, + "loss": 1.2145, + "step": 28113 + }, + { + "epoch": 0.34, + "grad_norm": 21.096199037154232, + "learning_rate": 1.900306397419125e-05, + "loss": 1.3284, + "step": 28116 + }, + { + "epoch": 0.34, + "grad_norm": 37.9713044674623, + "learning_rate": 1.900280969142696e-05, + "loss": 1.6181, + "step": 28119 + }, + { + "epoch": 0.34, + "grad_norm": 13.031455404717377, + "learning_rate": 1.9002555377939318e-05, + "loss": 1.3857, + "step": 28122 + }, + { + "epoch": 0.34, + "grad_norm": 72.22868393184143, + "learning_rate": 1.9002301033729203e-05, + "loss": 1.3929, + "step": 28125 + }, + { + "epoch": 0.34, + "grad_norm": 20.077971162449494, + "learning_rate": 1.9002046658797474e-05, + "loss": 1.33, + "step": 28128 + }, + { + "epoch": 0.34, + "grad_norm": 10.587593899779362, + "learning_rate": 1.9001792253145005e-05, + "loss": 1.4457, + "step": 28131 + }, + { + "epoch": 0.34, + "grad_norm": 6.260806710854654, + "learning_rate": 1.9001537816772657e-05, + "loss": 1.2954, + "step": 28134 + }, + { + "epoch": 0.34, + "grad_norm": 29.177632002014004, + "learning_rate": 1.9001283349681307e-05, + "loss": 1.6221, + "step": 28137 + }, + { + "epoch": 0.34, + "grad_norm": 48.90479295415367, + "learning_rate": 1.9001028851871816e-05, + "loss": 1.5428, + "step": 28140 + }, + { + "epoch": 0.34, + "grad_norm": 36.37862400173825, + "learning_rate": 1.9000774323345058e-05, + "loss": 1.3353, + "step": 28143 + }, + { + "epoch": 0.34, + "grad_norm": 24.12608476136419, + "learning_rate": 1.90005197641019e-05, + "loss": 1.6477, + "step": 28146 + }, + { + "epoch": 0.34, + "grad_norm": 11.41144820617446, + "learning_rate": 1.9000265174143207e-05, + "loss": 1.7532, + "step": 28149 + }, + { + "epoch": 0.34, + "grad_norm": 15.297271158227344, + "learning_rate": 1.9000010553469856e-05, + "loss": 1.6255, + "step": 28152 + }, + { + "epoch": 0.34, + "grad_norm": 3.9418389115815584, + "learning_rate": 1.8999755902082704e-05, + "loss": 1.3189, + "step": 28155 + }, + { + "epoch": 0.34, + "grad_norm": 19.305600056695685, + "learning_rate": 1.899950121998263e-05, + "loss": 1.3149, + "step": 28158 + }, + { + "epoch": 0.34, + "grad_norm": 23.600454011673357, + "learning_rate": 1.89992465071705e-05, + "loss": 1.644, + "step": 28161 + }, + { + "epoch": 0.34, + "grad_norm": 19.200075784626662, + "learning_rate": 1.8998991763647183e-05, + "loss": 1.5675, + "step": 28164 + }, + { + "epoch": 0.34, + "grad_norm": 14.838793694293198, + "learning_rate": 1.899873698941355e-05, + "loss": 1.5122, + "step": 28167 + }, + { + "epoch": 0.34, + "grad_norm": 7.394909516580814, + "learning_rate": 1.899848218447047e-05, + "loss": 1.5289, + "step": 28170 + }, + { + "epoch": 0.34, + "grad_norm": 28.76754204656281, + "learning_rate": 1.8998227348818807e-05, + "loss": 1.6849, + "step": 28173 + }, + { + "epoch": 0.34, + "grad_norm": 11.300484588196985, + "learning_rate": 1.8997972482459437e-05, + "loss": 1.1275, + "step": 28176 + }, + { + "epoch": 0.34, + "grad_norm": 37.13445575619809, + "learning_rate": 1.899771758539323e-05, + "loss": 1.3795, + "step": 28179 + }, + { + "epoch": 0.34, + "grad_norm": 28.16864291125539, + "learning_rate": 1.899746265762105e-05, + "loss": 1.4269, + "step": 28182 + }, + { + "epoch": 0.34, + "grad_norm": 16.152591634479958, + "learning_rate": 1.8997207699143774e-05, + "loss": 1.4594, + "step": 28185 + }, + { + "epoch": 0.34, + "grad_norm": 104.76061506111999, + "learning_rate": 1.8996952709962267e-05, + "loss": 1.2978, + "step": 28188 + }, + { + "epoch": 0.34, + "grad_norm": 36.18421380964608, + "learning_rate": 1.8996697690077402e-05, + "loss": 1.5709, + "step": 28191 + }, + { + "epoch": 0.34, + "grad_norm": 16.186639171611695, + "learning_rate": 1.899644263949005e-05, + "loss": 1.2884, + "step": 28194 + }, + { + "epoch": 0.34, + "grad_norm": 18.075416929974473, + "learning_rate": 1.899618755820108e-05, + "loss": 1.7652, + "step": 28197 + }, + { + "epoch": 0.34, + "grad_norm": 14.498314496697436, + "learning_rate": 1.899593244621136e-05, + "loss": 1.275, + "step": 28200 + }, + { + "epoch": 0.34, + "grad_norm": 66.01440349879562, + "learning_rate": 1.899567730352176e-05, + "loss": 1.4479, + "step": 28203 + }, + { + "epoch": 0.34, + "grad_norm": 32.660368926208434, + "learning_rate": 1.899542213013316e-05, + "loss": 1.2076, + "step": 28206 + }, + { + "epoch": 0.34, + "grad_norm": 32.57926477152256, + "learning_rate": 1.899516692604642e-05, + "loss": 1.6237, + "step": 28209 + }, + { + "epoch": 0.34, + "grad_norm": 4.733460878930267, + "learning_rate": 1.8994911691262416e-05, + "loss": 1.3421, + "step": 28212 + }, + { + "epoch": 0.34, + "grad_norm": 4.563968600533252, + "learning_rate": 1.899465642578202e-05, + "loss": 1.7944, + "step": 28215 + }, + { + "epoch": 0.34, + "grad_norm": 11.162728373386408, + "learning_rate": 1.89944011296061e-05, + "loss": 1.376, + "step": 28218 + }, + { + "epoch": 0.34, + "grad_norm": 49.88864404542445, + "learning_rate": 1.8994145802735528e-05, + "loss": 1.5716, + "step": 28221 + }, + { + "epoch": 0.34, + "grad_norm": 35.82468382420505, + "learning_rate": 1.899389044517118e-05, + "loss": 1.2385, + "step": 28224 + }, + { + "epoch": 0.34, + "grad_norm": 7.097090187622738, + "learning_rate": 1.899363505691392e-05, + "loss": 1.8921, + "step": 28227 + }, + { + "epoch": 0.34, + "grad_norm": 24.758309626450064, + "learning_rate": 1.8993379637964626e-05, + "loss": 1.6084, + "step": 28230 + }, + { + "epoch": 0.34, + "grad_norm": 11.736285828495182, + "learning_rate": 1.899312418832416e-05, + "loss": 1.4413, + "step": 28233 + }, + { + "epoch": 0.34, + "grad_norm": 76.53388830935198, + "learning_rate": 1.899286870799341e-05, + "loss": 1.2598, + "step": 28236 + }, + { + "epoch": 0.34, + "grad_norm": 12.106400506061469, + "learning_rate": 1.899261319697323e-05, + "loss": 1.471, + "step": 28239 + }, + { + "epoch": 0.34, + "grad_norm": 27.09668909516741, + "learning_rate": 1.8992357655264505e-05, + "loss": 1.4679, + "step": 28242 + }, + { + "epoch": 0.34, + "grad_norm": 5.664761097289221, + "learning_rate": 1.89921020828681e-05, + "loss": 1.4583, + "step": 28245 + }, + { + "epoch": 0.34, + "grad_norm": 16.55134995602691, + "learning_rate": 1.899184647978489e-05, + "loss": 1.5113, + "step": 28248 + }, + { + "epoch": 0.34, + "grad_norm": 14.534630644799927, + "learning_rate": 1.8991590846015748e-05, + "loss": 1.523, + "step": 28251 + }, + { + "epoch": 0.34, + "grad_norm": 5.18535100946635, + "learning_rate": 1.8991335181561544e-05, + "loss": 1.4811, + "step": 28254 + }, + { + "epoch": 0.34, + "grad_norm": 30.40385853352567, + "learning_rate": 1.8991079486423152e-05, + "loss": 1.7563, + "step": 28257 + }, + { + "epoch": 0.34, + "grad_norm": 22.79491929427512, + "learning_rate": 1.8990823760601445e-05, + "loss": 1.4208, + "step": 28260 + }, + { + "epoch": 0.34, + "grad_norm": 25.979675677523858, + "learning_rate": 1.8990568004097295e-05, + "loss": 1.4539, + "step": 28263 + }, + { + "epoch": 0.34, + "grad_norm": 11.889174777806685, + "learning_rate": 1.899031221691157e-05, + "loss": 1.3682, + "step": 28266 + }, + { + "epoch": 0.34, + "grad_norm": 79.21899911291831, + "learning_rate": 1.8990056399045156e-05, + "loss": 1.6385, + "step": 28269 + }, + { + "epoch": 0.34, + "grad_norm": 11.575987346924443, + "learning_rate": 1.8989800550498912e-05, + "loss": 1.9766, + "step": 28272 + }, + { + "epoch": 0.34, + "grad_norm": 5.321356560079567, + "learning_rate": 1.8989544671273718e-05, + "loss": 1.6986, + "step": 28275 + }, + { + "epoch": 0.34, + "grad_norm": 33.471761590655575, + "learning_rate": 1.8989288761370447e-05, + "loss": 1.3996, + "step": 28278 + }, + { + "epoch": 0.34, + "grad_norm": 23.767548785530696, + "learning_rate": 1.898903282078997e-05, + "loss": 1.2896, + "step": 28281 + }, + { + "epoch": 0.34, + "grad_norm": 14.70219194705891, + "learning_rate": 1.8988776849533166e-05, + "loss": 1.3798, + "step": 28284 + }, + { + "epoch": 0.34, + "grad_norm": 3.7593374662462935, + "learning_rate": 1.89885208476009e-05, + "loss": 1.6731, + "step": 28287 + }, + { + "epoch": 0.34, + "grad_norm": 25.88195851905145, + "learning_rate": 1.8988264814994052e-05, + "loss": 1.2126, + "step": 28290 + }, + { + "epoch": 0.34, + "grad_norm": 17.805452600130284, + "learning_rate": 1.898800875171349e-05, + "loss": 1.5365, + "step": 28293 + }, + { + "epoch": 0.34, + "grad_norm": 12.548605501677196, + "learning_rate": 1.8987752657760095e-05, + "loss": 1.5309, + "step": 28296 + }, + { + "epoch": 0.34, + "grad_norm": 26.46653868727611, + "learning_rate": 1.8987496533134737e-05, + "loss": 1.4963, + "step": 28299 + }, + { + "epoch": 0.34, + "grad_norm": 8.98111884666557, + "learning_rate": 1.898724037783829e-05, + "loss": 1.7147, + "step": 28302 + }, + { + "epoch": 0.34, + "grad_norm": 12.750411591488211, + "learning_rate": 1.898698419187163e-05, + "loss": 1.6502, + "step": 28305 + }, + { + "epoch": 0.34, + "grad_norm": 12.678059879685065, + "learning_rate": 1.8986727975235632e-05, + "loss": 1.4235, + "step": 28308 + }, + { + "epoch": 0.34, + "grad_norm": 12.401627170165284, + "learning_rate": 1.8986471727931163e-05, + "loss": 1.4236, + "step": 28311 + }, + { + "epoch": 0.34, + "grad_norm": 28.191960368240977, + "learning_rate": 1.8986215449959106e-05, + "loss": 1.2981, + "step": 28314 + }, + { + "epoch": 0.34, + "grad_norm": 21.72948960657199, + "learning_rate": 1.8985959141320334e-05, + "loss": 1.2685, + "step": 28317 + }, + { + "epoch": 0.34, + "grad_norm": 32.21517483891992, + "learning_rate": 1.8985702802015717e-05, + "loss": 1.6275, + "step": 28320 + }, + { + "epoch": 0.34, + "grad_norm": 14.171024453071693, + "learning_rate": 1.8985446432046135e-05, + "loss": 1.3263, + "step": 28323 + }, + { + "epoch": 0.34, + "grad_norm": 11.2435894059746, + "learning_rate": 1.898519003141246e-05, + "loss": 1.254, + "step": 28326 + }, + { + "epoch": 0.34, + "grad_norm": 10.731116382283874, + "learning_rate": 1.898493360011557e-05, + "loss": 1.3634, + "step": 28329 + }, + { + "epoch": 0.34, + "grad_norm": 13.591153097517175, + "learning_rate": 1.8984677138156334e-05, + "loss": 1.4077, + "step": 28332 + }, + { + "epoch": 0.34, + "grad_norm": 9.390345297804894, + "learning_rate": 1.8984420645535632e-05, + "loss": 1.6449, + "step": 28335 + }, + { + "epoch": 0.34, + "grad_norm": 25.20808900850364, + "learning_rate": 1.898416412225434e-05, + "loss": 1.2933, + "step": 28338 + }, + { + "epoch": 0.34, + "grad_norm": 13.123682467236929, + "learning_rate": 1.8983907568313333e-05, + "loss": 1.4491, + "step": 28341 + }, + { + "epoch": 0.34, + "grad_norm": 5.63383086211824, + "learning_rate": 1.8983650983713483e-05, + "loss": 1.2018, + "step": 28344 + }, + { + "epoch": 0.34, + "grad_norm": 25.387330477285126, + "learning_rate": 1.8983394368455668e-05, + "loss": 1.3082, + "step": 28347 + }, + { + "epoch": 0.34, + "grad_norm": 10.729023133893778, + "learning_rate": 1.8983137722540767e-05, + "loss": 1.3845, + "step": 28350 + }, + { + "epoch": 0.34, + "grad_norm": 16.110648107828943, + "learning_rate": 1.8982881045969653e-05, + "loss": 1.4754, + "step": 28353 + }, + { + "epoch": 0.34, + "grad_norm": 11.2479023455964, + "learning_rate": 1.89826243387432e-05, + "loss": 1.3073, + "step": 28356 + }, + { + "epoch": 0.34, + "grad_norm": 19.12518881195045, + "learning_rate": 1.8982367600862282e-05, + "loss": 1.738, + "step": 28359 + }, + { + "epoch": 0.34, + "grad_norm": 15.43108905590525, + "learning_rate": 1.8982110832327784e-05, + "loss": 1.1376, + "step": 28362 + }, + { + "epoch": 0.34, + "grad_norm": 48.36702686858515, + "learning_rate": 1.8981854033140574e-05, + "loss": 1.6549, + "step": 28365 + }, + { + "epoch": 0.34, + "grad_norm": 14.851499716420525, + "learning_rate": 1.898159720330153e-05, + "loss": 1.351, + "step": 28368 + }, + { + "epoch": 0.34, + "grad_norm": 10.771054680872677, + "learning_rate": 1.8981340342811534e-05, + "loss": 1.4444, + "step": 28371 + }, + { + "epoch": 0.34, + "grad_norm": 35.67806738160628, + "learning_rate": 1.898108345167146e-05, + "loss": 1.5784, + "step": 28374 + }, + { + "epoch": 0.34, + "grad_norm": 44.32128130618342, + "learning_rate": 1.8980826529882176e-05, + "loss": 1.3505, + "step": 28377 + }, + { + "epoch": 0.34, + "grad_norm": 8.753037798757601, + "learning_rate": 1.898056957744457e-05, + "loss": 1.2486, + "step": 28380 + }, + { + "epoch": 0.34, + "grad_norm": 137.58823225792082, + "learning_rate": 1.8980312594359517e-05, + "loss": 1.222, + "step": 28383 + }, + { + "epoch": 0.34, + "grad_norm": 5.811087520135762, + "learning_rate": 1.898005558062789e-05, + "loss": 1.5413, + "step": 28386 + }, + { + "epoch": 0.34, + "grad_norm": 12.399628249135493, + "learning_rate": 1.8979798536250564e-05, + "loss": 1.5616, + "step": 28389 + }, + { + "epoch": 0.34, + "grad_norm": 10.086445083049215, + "learning_rate": 1.8979541461228426e-05, + "loss": 1.3642, + "step": 28392 + }, + { + "epoch": 0.34, + "grad_norm": 19.119226701916304, + "learning_rate": 1.8979284355562343e-05, + "loss": 1.4342, + "step": 28395 + }, + { + "epoch": 0.34, + "grad_norm": 7.245517171852826, + "learning_rate": 1.8979027219253197e-05, + "loss": 1.8298, + "step": 28398 + }, + { + "epoch": 0.34, + "grad_norm": 11.221926354267316, + "learning_rate": 1.897877005230187e-05, + "loss": 1.4207, + "step": 28401 + }, + { + "epoch": 0.34, + "grad_norm": 41.50334325822438, + "learning_rate": 1.897851285470923e-05, + "loss": 1.5122, + "step": 28404 + }, + { + "epoch": 0.34, + "grad_norm": 6.259710699572661, + "learning_rate": 1.8978255626476164e-05, + "loss": 1.8982, + "step": 28407 + }, + { + "epoch": 0.34, + "grad_norm": 13.959796616410603, + "learning_rate": 1.8977998367603542e-05, + "loss": 1.7936, + "step": 28410 + }, + { + "epoch": 0.34, + "grad_norm": 24.27218348458021, + "learning_rate": 1.8977741078092245e-05, + "loss": 1.5007, + "step": 28413 + }, + { + "epoch": 0.34, + "grad_norm": 10.407359437676591, + "learning_rate": 1.8977483757943153e-05, + "loss": 1.8517, + "step": 28416 + }, + { + "epoch": 0.34, + "grad_norm": 6.162849571442026, + "learning_rate": 1.8977226407157143e-05, + "loss": 1.5984, + "step": 28419 + }, + { + "epoch": 0.34, + "grad_norm": 44.575864236327796, + "learning_rate": 1.8976969025735094e-05, + "loss": 1.5684, + "step": 28422 + }, + { + "epoch": 0.34, + "grad_norm": 13.24500836192271, + "learning_rate": 1.8976711613677882e-05, + "loss": 1.4978, + "step": 28425 + }, + { + "epoch": 0.34, + "grad_norm": 6.05199077730593, + "learning_rate": 1.897645417098639e-05, + "loss": 1.461, + "step": 28428 + }, + { + "epoch": 0.34, + "grad_norm": 17.753197430097998, + "learning_rate": 1.8976196697661488e-05, + "loss": 1.5081, + "step": 28431 + }, + { + "epoch": 0.34, + "grad_norm": 14.78142799529295, + "learning_rate": 1.8975939193704057e-05, + "loss": 1.884, + "step": 28434 + }, + { + "epoch": 0.34, + "grad_norm": 13.305257428758487, + "learning_rate": 1.8975681659114985e-05, + "loss": 1.5507, + "step": 28437 + }, + { + "epoch": 0.34, + "grad_norm": 24.36331767997248, + "learning_rate": 1.8975424093895144e-05, + "loss": 1.6124, + "step": 28440 + }, + { + "epoch": 0.34, + "grad_norm": 4.309434588212517, + "learning_rate": 1.897516649804541e-05, + "loss": 1.2319, + "step": 28443 + }, + { + "epoch": 0.34, + "grad_norm": 19.614123241946434, + "learning_rate": 1.897490887156667e-05, + "loss": 1.7712, + "step": 28446 + }, + { + "epoch": 0.34, + "grad_norm": 22.27870721839925, + "learning_rate": 1.8974651214459797e-05, + "loss": 1.5661, + "step": 28449 + }, + { + "epoch": 0.34, + "grad_norm": 18.540597039683604, + "learning_rate": 1.8974393526725673e-05, + "loss": 1.3014, + "step": 28452 + }, + { + "epoch": 0.34, + "grad_norm": 16.85648641888629, + "learning_rate": 1.8974135808365172e-05, + "loss": 1.4979, + "step": 28455 + }, + { + "epoch": 0.34, + "grad_norm": 18.340239581760105, + "learning_rate": 1.897387805937918e-05, + "loss": 1.3558, + "step": 28458 + }, + { + "epoch": 0.34, + "grad_norm": 17.663357193984318, + "learning_rate": 1.8973620279768578e-05, + "loss": 1.5569, + "step": 28461 + }, + { + "epoch": 0.34, + "grad_norm": 16.023021893210174, + "learning_rate": 1.897336246953424e-05, + "loss": 1.5268, + "step": 28464 + }, + { + "epoch": 0.34, + "grad_norm": 6.578697426002711, + "learning_rate": 1.8973104628677046e-05, + "loss": 1.5418, + "step": 28467 + }, + { + "epoch": 0.34, + "grad_norm": 14.443404143606926, + "learning_rate": 1.897284675719788e-05, + "loss": 1.3155, + "step": 28470 + }, + { + "epoch": 0.34, + "grad_norm": 12.57587014446228, + "learning_rate": 1.897258885509762e-05, + "loss": 1.7271, + "step": 28473 + }, + { + "epoch": 0.34, + "grad_norm": 6.301271020483238, + "learning_rate": 1.8972330922377148e-05, + "loss": 1.5452, + "step": 28476 + }, + { + "epoch": 0.34, + "grad_norm": 9.52178746806183, + "learning_rate": 1.897207295903734e-05, + "loss": 1.5745, + "step": 28479 + }, + { + "epoch": 0.34, + "grad_norm": 20.242747150893628, + "learning_rate": 1.897181496507908e-05, + "loss": 1.8009, + "step": 28482 + }, + { + "epoch": 0.34, + "grad_norm": 3.6651152804263765, + "learning_rate": 1.8971556940503242e-05, + "loss": 1.2188, + "step": 28485 + }, + { + "epoch": 0.34, + "grad_norm": 11.77914232003206, + "learning_rate": 1.897129888531072e-05, + "loss": 1.6359, + "step": 28488 + }, + { + "epoch": 0.34, + "grad_norm": 12.367578053923465, + "learning_rate": 1.897104079950238e-05, + "loss": 1.4103, + "step": 28491 + }, + { + "epoch": 0.34, + "grad_norm": 16.848787994123057, + "learning_rate": 1.897078268307911e-05, + "loss": 1.9023, + "step": 28494 + }, + { + "epoch": 0.34, + "grad_norm": 18.11425806085982, + "learning_rate": 1.8970524536041792e-05, + "loss": 1.3914, + "step": 28497 + }, + { + "epoch": 0.34, + "grad_norm": 45.661781099662036, + "learning_rate": 1.8970266358391306e-05, + "loss": 1.8126, + "step": 28500 + }, + { + "epoch": 0.34, + "grad_norm": 43.85192668089425, + "learning_rate": 1.897000815012853e-05, + "loss": 1.3603, + "step": 28503 + }, + { + "epoch": 0.34, + "grad_norm": 6.431354852055407, + "learning_rate": 1.8969749911254348e-05, + "loss": 1.7631, + "step": 28506 + }, + { + "epoch": 0.34, + "grad_norm": 22.292762787399493, + "learning_rate": 1.896949164176964e-05, + "loss": 1.8123, + "step": 28509 + }, + { + "epoch": 0.34, + "grad_norm": 96.46929260782036, + "learning_rate": 1.8969233341675284e-05, + "loss": 1.5822, + "step": 28512 + }, + { + "epoch": 0.34, + "grad_norm": 39.39500740324974, + "learning_rate": 1.8968975010972173e-05, + "loss": 1.6644, + "step": 28515 + }, + { + "epoch": 0.34, + "grad_norm": 10.262257448327516, + "learning_rate": 1.8968716649661176e-05, + "loss": 1.4665, + "step": 28518 + }, + { + "epoch": 0.34, + "grad_norm": 21.713496569978016, + "learning_rate": 1.896845825774318e-05, + "loss": 1.1257, + "step": 28521 + }, + { + "epoch": 0.34, + "grad_norm": 11.879782465440332, + "learning_rate": 1.8968199835219067e-05, + "loss": 1.4078, + "step": 28524 + }, + { + "epoch": 0.34, + "grad_norm": 9.052553835545062, + "learning_rate": 1.896794138208972e-05, + "loss": 1.5988, + "step": 28527 + }, + { + "epoch": 0.34, + "grad_norm": 9.08079764269712, + "learning_rate": 1.8967682898356017e-05, + "loss": 1.7694, + "step": 28530 + }, + { + "epoch": 0.34, + "grad_norm": 10.390011724104163, + "learning_rate": 1.8967424384018846e-05, + "loss": 1.5305, + "step": 28533 + }, + { + "epoch": 0.34, + "grad_norm": 14.854153582612575, + "learning_rate": 1.896716583907908e-05, + "loss": 1.4601, + "step": 28536 + }, + { + "epoch": 0.34, + "grad_norm": 42.118918506039236, + "learning_rate": 1.8966907263537607e-05, + "loss": 1.4051, + "step": 28539 + }, + { + "epoch": 0.34, + "grad_norm": 26.987296308986412, + "learning_rate": 1.896664865739531e-05, + "loss": 1.6248, + "step": 28542 + }, + { + "epoch": 0.34, + "grad_norm": 6.814352715062134, + "learning_rate": 1.8966390020653077e-05, + "loss": 1.6601, + "step": 28545 + }, + { + "epoch": 0.34, + "grad_norm": 8.607567636096237, + "learning_rate": 1.896613135331178e-05, + "loss": 1.4965, + "step": 28548 + }, + { + "epoch": 0.34, + "grad_norm": 10.673913936989301, + "learning_rate": 1.896587265537231e-05, + "loss": 1.2982, + "step": 28551 + }, + { + "epoch": 0.34, + "grad_norm": 74.12190050489167, + "learning_rate": 1.896561392683554e-05, + "loss": 1.5879, + "step": 28554 + }, + { + "epoch": 0.34, + "grad_norm": 19.022079658422506, + "learning_rate": 1.8965355167702363e-05, + "loss": 1.0673, + "step": 28557 + }, + { + "epoch": 0.34, + "grad_norm": 5.8532032548767825, + "learning_rate": 1.8965096377973655e-05, + "loss": 1.4823, + "step": 28560 + }, + { + "epoch": 0.34, + "grad_norm": 14.49127100015555, + "learning_rate": 1.8964837557650307e-05, + "loss": 1.806, + "step": 28563 + }, + { + "epoch": 0.34, + "grad_norm": 14.907580874119162, + "learning_rate": 1.8964578706733193e-05, + "loss": 1.3617, + "step": 28566 + }, + { + "epoch": 0.34, + "grad_norm": 11.312519011958424, + "learning_rate": 1.8964319825223203e-05, + "loss": 1.2455, + "step": 28569 + }, + { + "epoch": 0.34, + "grad_norm": 23.496636247867407, + "learning_rate": 1.896406091312122e-05, + "loss": 1.6787, + "step": 28572 + }, + { + "epoch": 0.34, + "grad_norm": 16.893041585725502, + "learning_rate": 1.8963801970428123e-05, + "loss": 1.5276, + "step": 28575 + }, + { + "epoch": 0.34, + "grad_norm": 31.572667845996424, + "learning_rate": 1.8963542997144798e-05, + "loss": 1.615, + "step": 28578 + }, + { + "epoch": 0.34, + "grad_norm": 17.979551884535113, + "learning_rate": 1.896328399327213e-05, + "loss": 1.4715, + "step": 28581 + }, + { + "epoch": 0.34, + "grad_norm": 19.05915763351545, + "learning_rate": 1.8963024958811003e-05, + "loss": 1.7202, + "step": 28584 + }, + { + "epoch": 0.34, + "grad_norm": 49.89284581079947, + "learning_rate": 1.89627658937623e-05, + "loss": 1.6117, + "step": 28587 + }, + { + "epoch": 0.34, + "grad_norm": 5.007065725616672, + "learning_rate": 1.8962506798126908e-05, + "loss": 1.7555, + "step": 28590 + }, + { + "epoch": 0.34, + "grad_norm": 29.379045272221802, + "learning_rate": 1.8962247671905705e-05, + "loss": 1.6323, + "step": 28593 + }, + { + "epoch": 0.34, + "grad_norm": 7.058320548283056, + "learning_rate": 1.896198851509958e-05, + "loss": 1.5782, + "step": 28596 + }, + { + "epoch": 0.34, + "grad_norm": 16.43396306017675, + "learning_rate": 1.896172932770942e-05, + "loss": 1.6636, + "step": 28599 + }, + { + "epoch": 0.34, + "grad_norm": 19.2080733434121, + "learning_rate": 1.8961470109736102e-05, + "loss": 1.6707, + "step": 28602 + }, + { + "epoch": 0.34, + "grad_norm": 54.41953994743692, + "learning_rate": 1.8961210861180513e-05, + "loss": 1.5043, + "step": 28605 + }, + { + "epoch": 0.34, + "grad_norm": 12.911951782986106, + "learning_rate": 1.896095158204354e-05, + "loss": 1.6911, + "step": 28608 + }, + { + "epoch": 0.34, + "grad_norm": 2.473678235821553, + "learning_rate": 1.8960692272326067e-05, + "loss": 1.6176, + "step": 28611 + }, + { + "epoch": 0.34, + "grad_norm": 8.837889791624024, + "learning_rate": 1.8960432932028983e-05, + "loss": 1.5839, + "step": 28614 + }, + { + "epoch": 0.34, + "grad_norm": 6.925291745485646, + "learning_rate": 1.8960173561153166e-05, + "loss": 1.3809, + "step": 28617 + }, + { + "epoch": 0.34, + "grad_norm": 19.769448706179947, + "learning_rate": 1.8959914159699503e-05, + "loss": 1.2137, + "step": 28620 + }, + { + "epoch": 0.34, + "grad_norm": 10.594500763597967, + "learning_rate": 1.8959654727668883e-05, + "loss": 1.6081, + "step": 28623 + }, + { + "epoch": 0.34, + "grad_norm": 23.708102836003626, + "learning_rate": 1.8959395265062186e-05, + "loss": 1.3454, + "step": 28626 + }, + { + "epoch": 0.34, + "grad_norm": 12.860292340441486, + "learning_rate": 1.89591357718803e-05, + "loss": 1.2852, + "step": 28629 + }, + { + "epoch": 0.34, + "grad_norm": 26.61695604625556, + "learning_rate": 1.8958876248124112e-05, + "loss": 1.5176, + "step": 28632 + }, + { + "epoch": 0.34, + "grad_norm": 11.684898685223255, + "learning_rate": 1.8958616693794508e-05, + "loss": 1.1842, + "step": 28635 + }, + { + "epoch": 0.34, + "grad_norm": 16.153832044621257, + "learning_rate": 1.895835710889237e-05, + "loss": 1.9188, + "step": 28638 + }, + { + "epoch": 0.34, + "grad_norm": 5.409493507755072, + "learning_rate": 1.8958097493418585e-05, + "loss": 1.6085, + "step": 28641 + }, + { + "epoch": 0.34, + "grad_norm": 18.1348682409919, + "learning_rate": 1.8957837847374042e-05, + "loss": 1.1618, + "step": 28644 + }, + { + "epoch": 0.34, + "grad_norm": 19.259945218891556, + "learning_rate": 1.8957578170759622e-05, + "loss": 1.1765, + "step": 28647 + }, + { + "epoch": 0.34, + "grad_norm": 8.240633314929559, + "learning_rate": 1.8957318463576217e-05, + "loss": 1.444, + "step": 28650 + }, + { + "epoch": 0.34, + "grad_norm": 2.949947808020847, + "learning_rate": 1.8957058725824708e-05, + "loss": 1.6027, + "step": 28653 + }, + { + "epoch": 0.34, + "grad_norm": 9.297498035508694, + "learning_rate": 1.8956798957505985e-05, + "loss": 1.5746, + "step": 28656 + }, + { + "epoch": 0.34, + "grad_norm": 21.576022696931247, + "learning_rate": 1.8956539158620937e-05, + "loss": 1.7866, + "step": 28659 + }, + { + "epoch": 0.34, + "grad_norm": 17.304217714673474, + "learning_rate": 1.8956279329170443e-05, + "loss": 1.4451, + "step": 28662 + }, + { + "epoch": 0.34, + "grad_norm": 18.04143294981591, + "learning_rate": 1.8956019469155396e-05, + "loss": 1.5332, + "step": 28665 + }, + { + "epoch": 0.34, + "grad_norm": 23.472690903076604, + "learning_rate": 1.895575957857668e-05, + "loss": 1.4404, + "step": 28668 + }, + { + "epoch": 0.34, + "grad_norm": 10.248701184645455, + "learning_rate": 1.8955499657435182e-05, + "loss": 1.728, + "step": 28671 + }, + { + "epoch": 0.34, + "grad_norm": 94.43262551983975, + "learning_rate": 1.8955239705731786e-05, + "loss": 1.4184, + "step": 28674 + }, + { + "epoch": 0.34, + "grad_norm": 4.621703985064509, + "learning_rate": 1.8954979723467384e-05, + "loss": 1.5728, + "step": 28677 + }, + { + "epoch": 0.34, + "grad_norm": 4.960311706040712, + "learning_rate": 1.8954719710642867e-05, + "loss": 1.2843, + "step": 28680 + }, + { + "epoch": 0.34, + "grad_norm": 28.16835684820359, + "learning_rate": 1.8954459667259112e-05, + "loss": 1.5079, + "step": 28683 + }, + { + "epoch": 0.34, + "grad_norm": 10.214797601888657, + "learning_rate": 1.8954199593317014e-05, + "loss": 1.2876, + "step": 28686 + }, + { + "epoch": 0.34, + "grad_norm": 11.293466292600776, + "learning_rate": 1.8953939488817457e-05, + "loss": 1.7546, + "step": 28689 + }, + { + "epoch": 0.35, + "grad_norm": 12.983316252927429, + "learning_rate": 1.895367935376133e-05, + "loss": 1.6066, + "step": 28692 + }, + { + "epoch": 0.35, + "grad_norm": 8.454861187558292, + "learning_rate": 1.895341918814952e-05, + "loss": 1.7572, + "step": 28695 + }, + { + "epoch": 0.35, + "grad_norm": 15.052553341068585, + "learning_rate": 1.8953158991982916e-05, + "loss": 1.5127, + "step": 28698 + }, + { + "epoch": 0.35, + "grad_norm": 6.997092285806725, + "learning_rate": 1.8952898765262406e-05, + "loss": 1.1123, + "step": 28701 + }, + { + "epoch": 0.35, + "grad_norm": 8.295881258110747, + "learning_rate": 1.8952638507988873e-05, + "loss": 1.3173, + "step": 28704 + }, + { + "epoch": 0.35, + "grad_norm": 7.246764323628395, + "learning_rate": 1.8952378220163218e-05, + "loss": 1.1062, + "step": 28707 + }, + { + "epoch": 0.35, + "grad_norm": 10.90459875327264, + "learning_rate": 1.8952117901786313e-05, + "loss": 1.4194, + "step": 28710 + }, + { + "epoch": 0.35, + "grad_norm": 6.399122871971123, + "learning_rate": 1.8951857552859055e-05, + "loss": 1.6482, + "step": 28713 + }, + { + "epoch": 0.35, + "grad_norm": 7.037064079638925, + "learning_rate": 1.8951597173382333e-05, + "loss": 1.3742, + "step": 28716 + }, + { + "epoch": 0.35, + "grad_norm": 16.84518191165983, + "learning_rate": 1.8951336763357037e-05, + "loss": 1.3083, + "step": 28719 + }, + { + "epoch": 0.35, + "grad_norm": 14.160303524632072, + "learning_rate": 1.895107632278405e-05, + "loss": 1.5342, + "step": 28722 + }, + { + "epoch": 0.35, + "grad_norm": 11.868452179701729, + "learning_rate": 1.8950815851664265e-05, + "loss": 1.4159, + "step": 28725 + }, + { + "epoch": 0.35, + "grad_norm": 15.219547255803345, + "learning_rate": 1.895055534999857e-05, + "loss": 1.3948, + "step": 28728 + }, + { + "epoch": 0.35, + "grad_norm": 11.138339515687896, + "learning_rate": 1.8950294817787848e-05, + "loss": 1.4689, + "step": 28731 + }, + { + "epoch": 0.35, + "grad_norm": 19.60848893081239, + "learning_rate": 1.8950034255032997e-05, + "loss": 1.2516, + "step": 28734 + }, + { + "epoch": 0.35, + "grad_norm": 38.760459642418745, + "learning_rate": 1.8949773661734907e-05, + "loss": 1.6664, + "step": 28737 + }, + { + "epoch": 0.35, + "grad_norm": 5.752869527854647, + "learning_rate": 1.894951303789446e-05, + "loss": 1.0126, + "step": 28740 + }, + { + "epoch": 0.35, + "grad_norm": 33.872769861093715, + "learning_rate": 1.8949252383512548e-05, + "loss": 1.7646, + "step": 28743 + }, + { + "epoch": 0.35, + "grad_norm": 3.831840145106952, + "learning_rate": 1.8948991698590064e-05, + "loss": 1.5772, + "step": 28746 + }, + { + "epoch": 0.35, + "grad_norm": 20.748846246438116, + "learning_rate": 1.8948730983127888e-05, + "loss": 1.2334, + "step": 28749 + }, + { + "epoch": 0.35, + "grad_norm": 13.953205638359421, + "learning_rate": 1.8948470237126922e-05, + "loss": 1.2677, + "step": 28752 + }, + { + "epoch": 0.35, + "grad_norm": 38.33951631098284, + "learning_rate": 1.894820946058805e-05, + "loss": 1.4612, + "step": 28755 + }, + { + "epoch": 0.35, + "grad_norm": 15.889262427533163, + "learning_rate": 1.894794865351216e-05, + "loss": 1.8311, + "step": 28758 + }, + { + "epoch": 0.35, + "grad_norm": 9.343200943264248, + "learning_rate": 1.8947687815900146e-05, + "loss": 1.1103, + "step": 28761 + }, + { + "epoch": 0.35, + "grad_norm": 7.890169173828288, + "learning_rate": 1.8947426947752895e-05, + "loss": 1.4085, + "step": 28764 + }, + { + "epoch": 0.35, + "grad_norm": 38.616966349060505, + "learning_rate": 1.8947166049071303e-05, + "loss": 1.2508, + "step": 28767 + }, + { + "epoch": 0.35, + "grad_norm": 31.341896184852576, + "learning_rate": 1.894690511985625e-05, + "loss": 1.5156, + "step": 28770 + }, + { + "epoch": 0.35, + "grad_norm": 24.492298910911995, + "learning_rate": 1.8946644160108637e-05, + "loss": 1.5128, + "step": 28773 + }, + { + "epoch": 0.35, + "grad_norm": 12.70097150434235, + "learning_rate": 1.894638316982935e-05, + "loss": 1.5635, + "step": 28776 + }, + { + "epoch": 0.35, + "grad_norm": 17.23270210826483, + "learning_rate": 1.8946122149019274e-05, + "loss": 1.5047, + "step": 28779 + }, + { + "epoch": 0.35, + "grad_norm": 19.216757121997734, + "learning_rate": 1.894586109767931e-05, + "loss": 1.339, + "step": 28782 + }, + { + "epoch": 0.35, + "grad_norm": 6.900893384653552, + "learning_rate": 1.8945600015810345e-05, + "loss": 1.5783, + "step": 28785 + }, + { + "epoch": 0.35, + "grad_norm": 18.36660817686898, + "learning_rate": 1.894533890341327e-05, + "loss": 1.4276, + "step": 28788 + }, + { + "epoch": 0.35, + "grad_norm": 3.506989936381867, + "learning_rate": 1.894507776048897e-05, + "loss": 1.6111, + "step": 28791 + }, + { + "epoch": 0.35, + "grad_norm": 9.13571228861071, + "learning_rate": 1.8944816587038345e-05, + "loss": 1.3504, + "step": 28794 + }, + { + "epoch": 0.35, + "grad_norm": 16.505369101701653, + "learning_rate": 1.8944555383062283e-05, + "loss": 1.4008, + "step": 28797 + }, + { + "epoch": 0.35, + "grad_norm": 11.932253623632704, + "learning_rate": 1.8944294148561676e-05, + "loss": 1.3742, + "step": 28800 + }, + { + "epoch": 0.35, + "grad_norm": 7.182729336048367, + "learning_rate": 1.8944032883537415e-05, + "loss": 1.6563, + "step": 28803 + }, + { + "epoch": 0.35, + "grad_norm": 8.757888236023616, + "learning_rate": 1.894377158799039e-05, + "loss": 1.6829, + "step": 28806 + }, + { + "epoch": 0.35, + "grad_norm": 6.899610269967144, + "learning_rate": 1.8943510261921496e-05, + "loss": 1.8299, + "step": 28809 + }, + { + "epoch": 0.35, + "grad_norm": 5.363280563360099, + "learning_rate": 1.8943248905331623e-05, + "loss": 1.2933, + "step": 28812 + }, + { + "epoch": 0.35, + "grad_norm": 9.077219042265217, + "learning_rate": 1.894298751822166e-05, + "loss": 1.5778, + "step": 28815 + }, + { + "epoch": 0.35, + "grad_norm": 4.039688332765271, + "learning_rate": 1.8942726100592503e-05, + "loss": 1.4145, + "step": 28818 + }, + { + "epoch": 0.35, + "grad_norm": 7.750272576290198, + "learning_rate": 1.8942464652445043e-05, + "loss": 1.7863, + "step": 28821 + }, + { + "epoch": 0.35, + "grad_norm": 12.257316299150146, + "learning_rate": 1.894220317378017e-05, + "loss": 1.4293, + "step": 28824 + }, + { + "epoch": 0.35, + "grad_norm": 64.84396707733364, + "learning_rate": 1.8941941664598786e-05, + "loss": 1.689, + "step": 28827 + }, + { + "epoch": 0.35, + "grad_norm": 12.203380348069487, + "learning_rate": 1.8941680124901768e-05, + "loss": 1.2035, + "step": 28830 + }, + { + "epoch": 0.35, + "grad_norm": 15.816466203283678, + "learning_rate": 1.8941418554690022e-05, + "loss": 1.3703, + "step": 28833 + }, + { + "epoch": 0.35, + "grad_norm": 12.525315262921202, + "learning_rate": 1.894115695396443e-05, + "loss": 1.2785, + "step": 28836 + }, + { + "epoch": 0.35, + "grad_norm": 6.27042591068258, + "learning_rate": 1.8940895322725895e-05, + "loss": 1.6513, + "step": 28839 + }, + { + "epoch": 0.35, + "grad_norm": 19.01872075961462, + "learning_rate": 1.89406336609753e-05, + "loss": 1.4501, + "step": 28842 + }, + { + "epoch": 0.35, + "grad_norm": 5.473564888860543, + "learning_rate": 1.8940371968713543e-05, + "loss": 1.7373, + "step": 28845 + }, + { + "epoch": 0.35, + "grad_norm": 5.226573717527259, + "learning_rate": 1.894011024594152e-05, + "loss": 1.3839, + "step": 28848 + }, + { + "epoch": 0.35, + "grad_norm": 55.33818588821659, + "learning_rate": 1.893984849266012e-05, + "loss": 1.3586, + "step": 28851 + }, + { + "epoch": 0.35, + "grad_norm": 4.547620259963636, + "learning_rate": 1.8939586708870238e-05, + "loss": 1.697, + "step": 28854 + }, + { + "epoch": 0.35, + "grad_norm": 54.84018702249504, + "learning_rate": 1.8939324894572765e-05, + "loss": 1.6588, + "step": 28857 + }, + { + "epoch": 0.35, + "grad_norm": 17.112240215219252, + "learning_rate": 1.8939063049768598e-05, + "loss": 1.7696, + "step": 28860 + }, + { + "epoch": 0.35, + "grad_norm": 34.25705960188513, + "learning_rate": 1.8938801174458628e-05, + "loss": 1.6614, + "step": 28863 + }, + { + "epoch": 0.35, + "grad_norm": 54.607123813441454, + "learning_rate": 1.8938539268643748e-05, + "loss": 1.7981, + "step": 28866 + }, + { + "epoch": 0.35, + "grad_norm": 12.554395151703844, + "learning_rate": 1.8938277332324854e-05, + "loss": 1.5452, + "step": 28869 + }, + { + "epoch": 0.35, + "grad_norm": 8.807124499384809, + "learning_rate": 1.8938015365502836e-05, + "loss": 1.8448, + "step": 28872 + }, + { + "epoch": 0.35, + "grad_norm": 44.97263834626432, + "learning_rate": 1.8937753368178596e-05, + "loss": 1.4556, + "step": 28875 + }, + { + "epoch": 0.35, + "grad_norm": 40.61196543473265, + "learning_rate": 1.893749134035302e-05, + "loss": 1.1459, + "step": 28878 + }, + { + "epoch": 0.35, + "grad_norm": 16.903373207522378, + "learning_rate": 1.8937229282027004e-05, + "loss": 1.3551, + "step": 28881 + }, + { + "epoch": 0.35, + "grad_norm": 8.911290263474449, + "learning_rate": 1.8936967193201447e-05, + "loss": 1.1356, + "step": 28884 + }, + { + "epoch": 0.35, + "grad_norm": 6.882320804436784, + "learning_rate": 1.8936705073877243e-05, + "loss": 1.5068, + "step": 28887 + }, + { + "epoch": 0.35, + "grad_norm": 36.268738060998906, + "learning_rate": 1.8936442924055275e-05, + "loss": 1.4037, + "step": 28890 + }, + { + "epoch": 0.35, + "grad_norm": 17.10274742664274, + "learning_rate": 1.893618074373645e-05, + "loss": 1.6751, + "step": 28893 + }, + { + "epoch": 0.35, + "grad_norm": 32.036205040677444, + "learning_rate": 1.8935918532921663e-05, + "loss": 1.4225, + "step": 28896 + }, + { + "epoch": 0.35, + "grad_norm": 35.53645344473545, + "learning_rate": 1.89356562916118e-05, + "loss": 1.6478, + "step": 28899 + }, + { + "epoch": 0.35, + "grad_norm": 8.426836586797927, + "learning_rate": 1.8935394019807762e-05, + "loss": 1.275, + "step": 28902 + }, + { + "epoch": 0.35, + "grad_norm": 12.890377006851773, + "learning_rate": 1.893513171751044e-05, + "loss": 1.6565, + "step": 28905 + }, + { + "epoch": 0.35, + "grad_norm": 14.77061115020006, + "learning_rate": 1.8934869384720737e-05, + "loss": 1.5791, + "step": 28908 + }, + { + "epoch": 0.35, + "grad_norm": 38.96010514969104, + "learning_rate": 1.893460702143954e-05, + "loss": 1.5364, + "step": 28911 + }, + { + "epoch": 0.35, + "grad_norm": 21.072709141605173, + "learning_rate": 1.8934344627667748e-05, + "loss": 1.3449, + "step": 28914 + }, + { + "epoch": 0.35, + "grad_norm": 16.473991334005458, + "learning_rate": 1.8934082203406258e-05, + "loss": 1.5461, + "step": 28917 + }, + { + "epoch": 0.35, + "grad_norm": 67.00849209001484, + "learning_rate": 1.8933819748655963e-05, + "loss": 1.8371, + "step": 28920 + }, + { + "epoch": 0.35, + "grad_norm": 63.96528460130641, + "learning_rate": 1.8933557263417753e-05, + "loss": 1.679, + "step": 28923 + }, + { + "epoch": 0.35, + "grad_norm": 19.764875347092474, + "learning_rate": 1.8933294747692538e-05, + "loss": 1.5246, + "step": 28926 + }, + { + "epoch": 0.35, + "grad_norm": 20.695248500264274, + "learning_rate": 1.8933032201481204e-05, + "loss": 1.6453, + "step": 28929 + }, + { + "epoch": 0.35, + "grad_norm": 14.242326493839817, + "learning_rate": 1.8932769624784642e-05, + "loss": 1.7773, + "step": 28932 + }, + { + "epoch": 0.35, + "grad_norm": 3.4619977987156405, + "learning_rate": 1.893250701760376e-05, + "loss": 1.4078, + "step": 28935 + }, + { + "epoch": 0.35, + "grad_norm": 129.63808754386326, + "learning_rate": 1.8932244379939453e-05, + "loss": 1.8304, + "step": 28938 + }, + { + "epoch": 0.35, + "grad_norm": 59.688390144002206, + "learning_rate": 1.893198171179261e-05, + "loss": 1.4299, + "step": 28941 + }, + { + "epoch": 0.35, + "grad_norm": 26.56760954744121, + "learning_rate": 1.8931719013164128e-05, + "loss": 1.5985, + "step": 28944 + }, + { + "epoch": 0.35, + "grad_norm": 15.208214961422634, + "learning_rate": 1.893145628405491e-05, + "loss": 1.3963, + "step": 28947 + }, + { + "epoch": 0.35, + "grad_norm": 29.883346281459833, + "learning_rate": 1.8931193524465843e-05, + "loss": 1.6652, + "step": 28950 + }, + { + "epoch": 0.35, + "grad_norm": 17.5302076359066, + "learning_rate": 1.8930930734397837e-05, + "loss": 1.5331, + "step": 28953 + }, + { + "epoch": 0.35, + "grad_norm": 37.83307655982736, + "learning_rate": 1.8930667913851776e-05, + "loss": 1.5001, + "step": 28956 + }, + { + "epoch": 0.35, + "grad_norm": 19.782015145073682, + "learning_rate": 1.8930405062828565e-05, + "loss": 1.6248, + "step": 28959 + }, + { + "epoch": 0.35, + "grad_norm": 28.142557536624107, + "learning_rate": 1.8930142181329095e-05, + "loss": 1.5282, + "step": 28962 + }, + { + "epoch": 0.35, + "grad_norm": 30.730534417390267, + "learning_rate": 1.892987926935427e-05, + "loss": 1.4502, + "step": 28965 + }, + { + "epoch": 0.35, + "grad_norm": 30.551657014070102, + "learning_rate": 1.8929616326904986e-05, + "loss": 1.607, + "step": 28968 + }, + { + "epoch": 0.35, + "grad_norm": 28.781358905998225, + "learning_rate": 1.8929353353982133e-05, + "loss": 1.3949, + "step": 28971 + }, + { + "epoch": 0.35, + "grad_norm": 16.824520041297173, + "learning_rate": 1.8929090350586616e-05, + "loss": 1.6322, + "step": 28974 + }, + { + "epoch": 0.35, + "grad_norm": 46.988220059970516, + "learning_rate": 1.892882731671933e-05, + "loss": 1.59, + "step": 28977 + }, + { + "epoch": 0.35, + "grad_norm": 9.733607671651358, + "learning_rate": 1.8928564252381166e-05, + "loss": 1.5093, + "step": 28980 + }, + { + "epoch": 0.35, + "grad_norm": 23.600536733159874, + "learning_rate": 1.8928301157573033e-05, + "loss": 1.5016, + "step": 28983 + }, + { + "epoch": 0.35, + "grad_norm": 32.62087227168778, + "learning_rate": 1.8928038032295824e-05, + "loss": 1.3773, + "step": 28986 + }, + { + "epoch": 0.35, + "grad_norm": 91.22589802266029, + "learning_rate": 1.8927774876550437e-05, + "loss": 1.2844, + "step": 28989 + }, + { + "epoch": 0.35, + "grad_norm": 4.998166166388712, + "learning_rate": 1.8927511690337772e-05, + "loss": 1.7271, + "step": 28992 + }, + { + "epoch": 0.35, + "grad_norm": 31.379875973666156, + "learning_rate": 1.8927248473658722e-05, + "loss": 1.3989, + "step": 28995 + }, + { + "epoch": 0.35, + "grad_norm": 14.74322467231954, + "learning_rate": 1.892698522651419e-05, + "loss": 1.5368, + "step": 28998 + }, + { + "epoch": 0.35, + "grad_norm": 8.400855618842316, + "learning_rate": 1.8926721948905075e-05, + "loss": 1.488, + "step": 29001 + }, + { + "epoch": 0.35, + "grad_norm": 6.153955755233603, + "learning_rate": 1.8926458640832274e-05, + "loss": 1.2767, + "step": 29004 + }, + { + "epoch": 0.35, + "grad_norm": 17.62442663752284, + "learning_rate": 1.8926195302296682e-05, + "loss": 1.5028, + "step": 29007 + }, + { + "epoch": 0.35, + "grad_norm": 11.351800456922106, + "learning_rate": 1.89259319332992e-05, + "loss": 1.8087, + "step": 29010 + }, + { + "epoch": 0.35, + "grad_norm": 29.580213392760353, + "learning_rate": 1.892566853384073e-05, + "loss": 1.2335, + "step": 29013 + }, + { + "epoch": 0.35, + "grad_norm": 6.232887672210153, + "learning_rate": 1.8925405103922164e-05, + "loss": 1.711, + "step": 29016 + }, + { + "epoch": 0.35, + "grad_norm": 5.272822054024305, + "learning_rate": 1.892514164354441e-05, + "loss": 1.419, + "step": 29019 + }, + { + "epoch": 0.35, + "grad_norm": 11.297831594733767, + "learning_rate": 1.892487815270836e-05, + "loss": 1.3614, + "step": 29022 + }, + { + "epoch": 0.35, + "grad_norm": 16.311460766039144, + "learning_rate": 1.8924614631414913e-05, + "loss": 1.5426, + "step": 29025 + }, + { + "epoch": 0.35, + "grad_norm": 20.369996516306216, + "learning_rate": 1.8924351079664974e-05, + "loss": 1.5037, + "step": 29028 + }, + { + "epoch": 0.35, + "grad_norm": 14.541685805863056, + "learning_rate": 1.8924087497459443e-05, + "loss": 1.325, + "step": 29031 + }, + { + "epoch": 0.35, + "grad_norm": 14.990673680446474, + "learning_rate": 1.8923823884799212e-05, + "loss": 1.5485, + "step": 29034 + }, + { + "epoch": 0.35, + "grad_norm": 12.557379390332892, + "learning_rate": 1.8923560241685185e-05, + "loss": 1.3723, + "step": 29037 + }, + { + "epoch": 0.35, + "grad_norm": 24.81102370317357, + "learning_rate": 1.892329656811826e-05, + "loss": 1.5058, + "step": 29040 + }, + { + "epoch": 0.35, + "grad_norm": 25.990316730254097, + "learning_rate": 1.892303286409934e-05, + "loss": 1.4469, + "step": 29043 + }, + { + "epoch": 0.35, + "grad_norm": 5.991642245532386, + "learning_rate": 1.892276912962932e-05, + "loss": 1.3989, + "step": 29046 + }, + { + "epoch": 0.35, + "grad_norm": 26.35190961795256, + "learning_rate": 1.89225053647091e-05, + "loss": 1.6989, + "step": 29049 + }, + { + "epoch": 0.35, + "grad_norm": 19.38136373932809, + "learning_rate": 1.892224156933959e-05, + "loss": 1.5203, + "step": 29052 + }, + { + "epoch": 0.35, + "grad_norm": 80.14896349885036, + "learning_rate": 1.8921977743521682e-05, + "loss": 1.4753, + "step": 29055 + }, + { + "epoch": 0.35, + "grad_norm": 7.2432964283949, + "learning_rate": 1.8921713887256276e-05, + "loss": 1.6012, + "step": 29058 + }, + { + "epoch": 0.35, + "grad_norm": 40.79098842787606, + "learning_rate": 1.8921450000544274e-05, + "loss": 1.4457, + "step": 29061 + }, + { + "epoch": 0.35, + "grad_norm": 15.280626668813804, + "learning_rate": 1.8921186083386578e-05, + "loss": 1.6182, + "step": 29064 + }, + { + "epoch": 0.35, + "grad_norm": 13.891691946048155, + "learning_rate": 1.8920922135784088e-05, + "loss": 1.4339, + "step": 29067 + }, + { + "epoch": 0.35, + "grad_norm": 15.253989632773939, + "learning_rate": 1.8920658157737703e-05, + "loss": 1.2341, + "step": 29070 + }, + { + "epoch": 0.35, + "grad_norm": 13.054794065452034, + "learning_rate": 1.8920394149248323e-05, + "loss": 1.5074, + "step": 29073 + }, + { + "epoch": 0.35, + "grad_norm": 14.843298116935243, + "learning_rate": 1.8920130110316854e-05, + "loss": 1.6963, + "step": 29076 + }, + { + "epoch": 0.35, + "grad_norm": 42.798706347260435, + "learning_rate": 1.8919866040944192e-05, + "loss": 1.5881, + "step": 29079 + }, + { + "epoch": 0.35, + "grad_norm": 8.694416899247372, + "learning_rate": 1.891960194113124e-05, + "loss": 1.4238, + "step": 29082 + }, + { + "epoch": 0.35, + "grad_norm": 9.95184553083945, + "learning_rate": 1.89193378108789e-05, + "loss": 1.5371, + "step": 29085 + }, + { + "epoch": 0.35, + "grad_norm": 26.749801348057353, + "learning_rate": 1.891907365018807e-05, + "loss": 1.2432, + "step": 29088 + }, + { + "epoch": 0.35, + "grad_norm": 13.997822263173175, + "learning_rate": 1.891880945905966e-05, + "loss": 1.2522, + "step": 29091 + }, + { + "epoch": 0.35, + "grad_norm": 26.748587580133837, + "learning_rate": 1.8918545237494562e-05, + "loss": 1.5963, + "step": 29094 + }, + { + "epoch": 0.35, + "grad_norm": 7.416664090234131, + "learning_rate": 1.8918280985493683e-05, + "loss": 1.8298, + "step": 29097 + }, + { + "epoch": 0.35, + "grad_norm": 49.54175480640645, + "learning_rate": 1.8918016703057924e-05, + "loss": 1.7346, + "step": 29100 + }, + { + "epoch": 0.35, + "grad_norm": 126.92074631062843, + "learning_rate": 1.8917752390188183e-05, + "loss": 1.2936, + "step": 29103 + }, + { + "epoch": 0.35, + "grad_norm": 4.472683997453873, + "learning_rate": 1.8917488046885368e-05, + "loss": 1.6634, + "step": 29106 + }, + { + "epoch": 0.35, + "grad_norm": 62.43192575749372, + "learning_rate": 1.8917223673150377e-05, + "loss": 1.6175, + "step": 29109 + }, + { + "epoch": 0.35, + "grad_norm": 6.566635796701541, + "learning_rate": 1.8916959268984115e-05, + "loss": 1.3449, + "step": 29112 + }, + { + "epoch": 0.35, + "grad_norm": 16.299846356785917, + "learning_rate": 1.8916694834387482e-05, + "loss": 1.119, + "step": 29115 + }, + { + "epoch": 0.35, + "grad_norm": 16.307483505765056, + "learning_rate": 1.891643036936138e-05, + "loss": 1.5366, + "step": 29118 + }, + { + "epoch": 0.35, + "grad_norm": 21.642643284957618, + "learning_rate": 1.8916165873906712e-05, + "loss": 1.67, + "step": 29121 + }, + { + "epoch": 0.35, + "grad_norm": 15.126901148881222, + "learning_rate": 1.8915901348024387e-05, + "loss": 1.5268, + "step": 29124 + }, + { + "epoch": 0.35, + "grad_norm": 68.58365046473922, + "learning_rate": 1.8915636791715296e-05, + "loss": 1.2125, + "step": 29127 + }, + { + "epoch": 0.35, + "grad_norm": 26.37402835571263, + "learning_rate": 1.8915372204980352e-05, + "loss": 1.4598, + "step": 29130 + }, + { + "epoch": 0.35, + "grad_norm": 41.97921462218988, + "learning_rate": 1.8915107587820452e-05, + "loss": 1.4017, + "step": 29133 + }, + { + "epoch": 0.35, + "grad_norm": 41.35339546969044, + "learning_rate": 1.89148429402365e-05, + "loss": 1.6902, + "step": 29136 + }, + { + "epoch": 0.35, + "grad_norm": 13.143968382619644, + "learning_rate": 1.8914578262229403e-05, + "loss": 2.0192, + "step": 29139 + }, + { + "epoch": 0.35, + "grad_norm": 28.52590395172216, + "learning_rate": 1.8914313553800063e-05, + "loss": 1.5714, + "step": 29142 + }, + { + "epoch": 0.35, + "grad_norm": 5.638659133735591, + "learning_rate": 1.8914048814949376e-05, + "loss": 1.448, + "step": 29145 + }, + { + "epoch": 0.35, + "grad_norm": 9.323597833876017, + "learning_rate": 1.8913784045678255e-05, + "loss": 1.3253, + "step": 29148 + }, + { + "epoch": 0.35, + "grad_norm": 14.15615128028388, + "learning_rate": 1.89135192459876e-05, + "loss": 1.4526, + "step": 29151 + }, + { + "epoch": 0.35, + "grad_norm": 7.25694424859832, + "learning_rate": 1.8913254415878315e-05, + "loss": 1.6414, + "step": 29154 + }, + { + "epoch": 0.35, + "grad_norm": 15.297156366982298, + "learning_rate": 1.89129895553513e-05, + "loss": 1.2915, + "step": 29157 + }, + { + "epoch": 0.35, + "grad_norm": 14.547357175176879, + "learning_rate": 1.8912724664407465e-05, + "loss": 1.8213, + "step": 29160 + }, + { + "epoch": 0.35, + "grad_norm": 7.747349940354963, + "learning_rate": 1.891245974304771e-05, + "loss": 1.2822, + "step": 29163 + }, + { + "epoch": 0.35, + "grad_norm": 17.860185909115295, + "learning_rate": 1.891219479127294e-05, + "loss": 1.632, + "step": 29166 + }, + { + "epoch": 0.35, + "grad_norm": 12.580352161847356, + "learning_rate": 1.891192980908406e-05, + "loss": 1.4636, + "step": 29169 + }, + { + "epoch": 0.35, + "grad_norm": 36.1966166090206, + "learning_rate": 1.8911664796481972e-05, + "loss": 1.6156, + "step": 29172 + }, + { + "epoch": 0.35, + "grad_norm": 14.663614212513178, + "learning_rate": 1.8911399753467584e-05, + "loss": 1.7356, + "step": 29175 + }, + { + "epoch": 0.35, + "grad_norm": 11.82659304987783, + "learning_rate": 1.89111346800418e-05, + "loss": 1.6986, + "step": 29178 + }, + { + "epoch": 0.35, + "grad_norm": 95.20043842317659, + "learning_rate": 1.891086957620552e-05, + "loss": 1.3298, + "step": 29181 + }, + { + "epoch": 0.35, + "grad_norm": 11.092435177269781, + "learning_rate": 1.891060444195965e-05, + "loss": 1.5625, + "step": 29184 + }, + { + "epoch": 0.35, + "grad_norm": 5.37631205632343, + "learning_rate": 1.89103392773051e-05, + "loss": 1.394, + "step": 29187 + }, + { + "epoch": 0.35, + "grad_norm": 18.120806909842482, + "learning_rate": 1.891007408224277e-05, + "loss": 1.7039, + "step": 29190 + }, + { + "epoch": 0.35, + "grad_norm": 36.48655560217263, + "learning_rate": 1.8909808856773567e-05, + "loss": 1.7433, + "step": 29193 + }, + { + "epoch": 0.35, + "grad_norm": 10.895109034338645, + "learning_rate": 1.8909543600898397e-05, + "loss": 1.5253, + "step": 29196 + }, + { + "epoch": 0.35, + "grad_norm": 62.7225508716952, + "learning_rate": 1.8909278314618162e-05, + "loss": 1.6691, + "step": 29199 + }, + { + "epoch": 0.35, + "grad_norm": 16.8621410321558, + "learning_rate": 1.890901299793377e-05, + "loss": 1.7419, + "step": 29202 + }, + { + "epoch": 0.35, + "grad_norm": 9.803193016685242, + "learning_rate": 1.8908747650846123e-05, + "loss": 1.7536, + "step": 29205 + }, + { + "epoch": 0.35, + "grad_norm": 14.551554461380785, + "learning_rate": 1.8908482273356134e-05, + "loss": 1.5932, + "step": 29208 + }, + { + "epoch": 0.35, + "grad_norm": 12.188167393697652, + "learning_rate": 1.8908216865464703e-05, + "loss": 1.6015, + "step": 29211 + }, + { + "epoch": 0.35, + "grad_norm": 56.630295806198426, + "learning_rate": 1.8907951427172734e-05, + "loss": 1.6833, + "step": 29214 + }, + { + "epoch": 0.35, + "grad_norm": 15.978960811021894, + "learning_rate": 1.8907685958481135e-05, + "loss": 1.9609, + "step": 29217 + }, + { + "epoch": 0.35, + "grad_norm": 21.443001196998104, + "learning_rate": 1.8907420459390815e-05, + "loss": 1.5864, + "step": 29220 + }, + { + "epoch": 0.35, + "grad_norm": 9.4580303051784, + "learning_rate": 1.8907154929902675e-05, + "loss": 1.3938, + "step": 29223 + }, + { + "epoch": 0.35, + "grad_norm": 45.161209553738175, + "learning_rate": 1.890688937001763e-05, + "loss": 1.3778, + "step": 29226 + }, + { + "epoch": 0.35, + "grad_norm": 16.78583955799711, + "learning_rate": 1.8906623779736572e-05, + "loss": 1.527, + "step": 29229 + }, + { + "epoch": 0.35, + "grad_norm": 4.198587667830904, + "learning_rate": 1.890635815906042e-05, + "loss": 1.485, + "step": 29232 + }, + { + "epoch": 0.35, + "grad_norm": 3.2046125374530545, + "learning_rate": 1.8906092507990073e-05, + "loss": 1.2911, + "step": 29235 + }, + { + "epoch": 0.35, + "grad_norm": 24.280615031889234, + "learning_rate": 1.890582682652644e-05, + "loss": 1.0828, + "step": 29238 + }, + { + "epoch": 0.35, + "grad_norm": 30.853828976819898, + "learning_rate": 1.890556111467043e-05, + "loss": 1.4753, + "step": 29241 + }, + { + "epoch": 0.35, + "grad_norm": 14.997404806216176, + "learning_rate": 1.8905295372422945e-05, + "loss": 1.6211, + "step": 29244 + }, + { + "epoch": 0.35, + "grad_norm": 9.93584296919303, + "learning_rate": 1.8905029599784896e-05, + "loss": 1.6549, + "step": 29247 + }, + { + "epoch": 0.35, + "grad_norm": 34.89019152682489, + "learning_rate": 1.890476379675719e-05, + "loss": 1.3504, + "step": 29250 + }, + { + "epoch": 0.35, + "grad_norm": 31.28208223882867, + "learning_rate": 1.890449796334073e-05, + "loss": 1.627, + "step": 29253 + }, + { + "epoch": 0.35, + "grad_norm": 8.187736034638817, + "learning_rate": 1.890423209953643e-05, + "loss": 1.4711, + "step": 29256 + }, + { + "epoch": 0.35, + "grad_norm": 121.2877147837111, + "learning_rate": 1.890396620534519e-05, + "loss": 1.2634, + "step": 29259 + }, + { + "epoch": 0.35, + "grad_norm": 7.346249938643801, + "learning_rate": 1.890370028076792e-05, + "loss": 1.483, + "step": 29262 + }, + { + "epoch": 0.35, + "grad_norm": 18.900033880772657, + "learning_rate": 1.8903434325805527e-05, + "loss": 1.7622, + "step": 29265 + }, + { + "epoch": 0.35, + "grad_norm": 10.769136318002039, + "learning_rate": 1.8903168340458918e-05, + "loss": 1.4167, + "step": 29268 + }, + { + "epoch": 0.35, + "grad_norm": 19.018322119126395, + "learning_rate": 1.890290232472901e-05, + "loss": 1.6047, + "step": 29271 + }, + { + "epoch": 0.35, + "grad_norm": 36.31696476698451, + "learning_rate": 1.8902636278616698e-05, + "loss": 1.8386, + "step": 29274 + }, + { + "epoch": 0.35, + "grad_norm": 4.845063488021547, + "learning_rate": 1.8902370202122897e-05, + "loss": 1.4019, + "step": 29277 + }, + { + "epoch": 0.35, + "grad_norm": 12.398184132672258, + "learning_rate": 1.8902104095248514e-05, + "loss": 1.2496, + "step": 29280 + }, + { + "epoch": 0.35, + "grad_norm": 5.846807348399434, + "learning_rate": 1.890183795799445e-05, + "loss": 1.7614, + "step": 29283 + }, + { + "epoch": 0.35, + "grad_norm": 15.883770798714947, + "learning_rate": 1.8901571790361625e-05, + "loss": 1.4264, + "step": 29286 + }, + { + "epoch": 0.35, + "grad_norm": 14.759443349374365, + "learning_rate": 1.890130559235094e-05, + "loss": 1.9368, + "step": 29289 + }, + { + "epoch": 0.35, + "grad_norm": 17.855324463467795, + "learning_rate": 1.8901039363963306e-05, + "loss": 1.5637, + "step": 29292 + }, + { + "epoch": 0.35, + "grad_norm": 10.863279881541317, + "learning_rate": 1.8900773105199632e-05, + "loss": 1.3613, + "step": 29295 + }, + { + "epoch": 0.35, + "grad_norm": 13.022940766710224, + "learning_rate": 1.8900506816060824e-05, + "loss": 1.4502, + "step": 29298 + }, + { + "epoch": 0.35, + "grad_norm": 13.777989092909808, + "learning_rate": 1.8900240496547792e-05, + "loss": 1.879, + "step": 29301 + }, + { + "epoch": 0.35, + "grad_norm": 23.874754842180547, + "learning_rate": 1.8899974146661445e-05, + "loss": 1.6151, + "step": 29304 + }, + { + "epoch": 0.35, + "grad_norm": 22.041793528933045, + "learning_rate": 1.8899707766402692e-05, + "loss": 1.4879, + "step": 29307 + }, + { + "epoch": 0.35, + "grad_norm": 7.050871364213521, + "learning_rate": 1.889944135577244e-05, + "loss": 1.3641, + "step": 29310 + }, + { + "epoch": 0.35, + "grad_norm": 17.69254491370545, + "learning_rate": 1.8899174914771604e-05, + "loss": 1.3228, + "step": 29313 + }, + { + "epoch": 0.35, + "grad_norm": 16.6288203858636, + "learning_rate": 1.889890844340109e-05, + "loss": 1.5008, + "step": 29316 + }, + { + "epoch": 0.35, + "grad_norm": 6.86447648953224, + "learning_rate": 1.8898641941661802e-05, + "loss": 1.2764, + "step": 29319 + }, + { + "epoch": 0.35, + "grad_norm": 7.48340581018756, + "learning_rate": 1.889837540955466e-05, + "loss": 1.5606, + "step": 29322 + }, + { + "epoch": 0.35, + "grad_norm": 10.506036397948368, + "learning_rate": 1.8898108847080564e-05, + "loss": 1.6613, + "step": 29325 + }, + { + "epoch": 0.35, + "grad_norm": 25.320975338761226, + "learning_rate": 1.889784225424043e-05, + "loss": 1.2559, + "step": 29328 + }, + { + "epoch": 0.35, + "grad_norm": 15.555925650521, + "learning_rate": 1.8897575631035163e-05, + "loss": 1.7834, + "step": 29331 + }, + { + "epoch": 0.35, + "grad_norm": 41.562525375141504, + "learning_rate": 1.889730897746568e-05, + "loss": 1.5465, + "step": 29334 + }, + { + "epoch": 0.35, + "grad_norm": 11.029034084533468, + "learning_rate": 1.889704229353288e-05, + "loss": 1.5314, + "step": 29337 + }, + { + "epoch": 0.35, + "grad_norm": 17.52442837961365, + "learning_rate": 1.8896775579237684e-05, + "loss": 1.9477, + "step": 29340 + }, + { + "epoch": 0.35, + "grad_norm": 20.194280786417934, + "learning_rate": 1.8896508834580993e-05, + "loss": 1.2898, + "step": 29343 + }, + { + "epoch": 0.35, + "grad_norm": 17.549867552030292, + "learning_rate": 1.8896242059563725e-05, + "loss": 1.3245, + "step": 29346 + }, + { + "epoch": 0.35, + "grad_norm": 16.711071298804566, + "learning_rate": 1.8895975254186786e-05, + "loss": 1.641, + "step": 29349 + }, + { + "epoch": 0.35, + "grad_norm": 8.875468189968798, + "learning_rate": 1.8895708418451093e-05, + "loss": 1.3646, + "step": 29352 + }, + { + "epoch": 0.35, + "grad_norm": 18.699145773446876, + "learning_rate": 1.8895441552357545e-05, + "loss": 1.7695, + "step": 29355 + }, + { + "epoch": 0.35, + "grad_norm": 12.541710853094157, + "learning_rate": 1.889517465590706e-05, + "loss": 1.1969, + "step": 29358 + }, + { + "epoch": 0.35, + "grad_norm": 13.031576829837045, + "learning_rate": 1.889490772910055e-05, + "loss": 1.5373, + "step": 29361 + }, + { + "epoch": 0.35, + "grad_norm": 25.551416091038035, + "learning_rate": 1.8894640771938923e-05, + "loss": 1.6431, + "step": 29364 + }, + { + "epoch": 0.35, + "grad_norm": 24.697498043492963, + "learning_rate": 1.889437378442309e-05, + "loss": 1.5159, + "step": 29367 + }, + { + "epoch": 0.35, + "grad_norm": 17.170621194759814, + "learning_rate": 1.8894106766553964e-05, + "loss": 1.284, + "step": 29370 + }, + { + "epoch": 0.35, + "grad_norm": 19.168526590154777, + "learning_rate": 1.8893839718332453e-05, + "loss": 1.5816, + "step": 29373 + }, + { + "epoch": 0.35, + "grad_norm": 16.918412377393473, + "learning_rate": 1.8893572639759474e-05, + "loss": 1.2751, + "step": 29376 + }, + { + "epoch": 0.35, + "grad_norm": 10.93161635247721, + "learning_rate": 1.8893305530835936e-05, + "loss": 1.2576, + "step": 29379 + }, + { + "epoch": 0.35, + "grad_norm": 5.053952999593626, + "learning_rate": 1.8893038391562743e-05, + "loss": 1.2842, + "step": 29382 + }, + { + "epoch": 0.35, + "grad_norm": 13.148917141979966, + "learning_rate": 1.889277122194082e-05, + "loss": 1.4649, + "step": 29385 + }, + { + "epoch": 0.35, + "grad_norm": 20.10503544845628, + "learning_rate": 1.889250402197107e-05, + "loss": 1.8449, + "step": 29388 + }, + { + "epoch": 0.35, + "grad_norm": 3.988432828356153, + "learning_rate": 1.8892236791654404e-05, + "loss": 1.4383, + "step": 29391 + }, + { + "epoch": 0.35, + "grad_norm": 11.188536604618731, + "learning_rate": 1.889196953099174e-05, + "loss": 1.5139, + "step": 29394 + }, + { + "epoch": 0.35, + "grad_norm": 9.729333810783007, + "learning_rate": 1.8891702239983986e-05, + "loss": 1.4641, + "step": 29397 + }, + { + "epoch": 0.35, + "grad_norm": 12.288081427300442, + "learning_rate": 1.8891434918632056e-05, + "loss": 1.5152, + "step": 29400 + }, + { + "epoch": 0.35, + "grad_norm": 9.270261933471124, + "learning_rate": 1.8891167566936857e-05, + "loss": 1.6437, + "step": 29403 + }, + { + "epoch": 0.35, + "grad_norm": 23.904747511772754, + "learning_rate": 1.889090018489931e-05, + "loss": 1.8562, + "step": 29406 + }, + { + "epoch": 0.35, + "grad_norm": 3.7322541280628343, + "learning_rate": 1.8890632772520322e-05, + "loss": 1.5983, + "step": 29409 + }, + { + "epoch": 0.35, + "grad_norm": 23.660424899024186, + "learning_rate": 1.8890365329800805e-05, + "loss": 1.335, + "step": 29412 + }, + { + "epoch": 0.35, + "grad_norm": 37.14813437853541, + "learning_rate": 1.889009785674168e-05, + "loss": 1.707, + "step": 29415 + }, + { + "epoch": 0.35, + "grad_norm": 44.59587679383065, + "learning_rate": 1.8889830353343845e-05, + "loss": 1.4396, + "step": 29418 + }, + { + "epoch": 0.35, + "grad_norm": 27.33006749794866, + "learning_rate": 1.8889562819608225e-05, + "loss": 1.2505, + "step": 29421 + }, + { + "epoch": 0.35, + "grad_norm": 23.22301008065013, + "learning_rate": 1.888929525553573e-05, + "loss": 1.7263, + "step": 29424 + }, + { + "epoch": 0.35, + "grad_norm": 19.394152386300973, + "learning_rate": 1.888902766112727e-05, + "loss": 1.9626, + "step": 29427 + }, + { + "epoch": 0.35, + "grad_norm": 5.749708116192885, + "learning_rate": 1.888876003638376e-05, + "loss": 1.6444, + "step": 29430 + }, + { + "epoch": 0.35, + "grad_norm": 19.650139540774642, + "learning_rate": 1.8888492381306113e-05, + "loss": 1.5609, + "step": 29433 + }, + { + "epoch": 0.35, + "grad_norm": 12.582781073782563, + "learning_rate": 1.8888224695895248e-05, + "loss": 1.3321, + "step": 29436 + }, + { + "epoch": 0.35, + "grad_norm": 10.955410713071494, + "learning_rate": 1.888795698015207e-05, + "loss": 1.5726, + "step": 29439 + }, + { + "epoch": 0.35, + "grad_norm": 59.36510796574416, + "learning_rate": 1.8887689234077496e-05, + "loss": 1.5776, + "step": 29442 + }, + { + "epoch": 0.35, + "grad_norm": 10.600379829890493, + "learning_rate": 1.8887421457672443e-05, + "loss": 1.5577, + "step": 29445 + }, + { + "epoch": 0.35, + "grad_norm": 41.43409272437546, + "learning_rate": 1.888715365093782e-05, + "loss": 1.2995, + "step": 29448 + }, + { + "epoch": 0.35, + "grad_norm": 52.65917476286998, + "learning_rate": 1.888688581387454e-05, + "loss": 1.6083, + "step": 29451 + }, + { + "epoch": 0.35, + "grad_norm": 15.436628826159115, + "learning_rate": 1.8886617946483522e-05, + "loss": 1.5684, + "step": 29454 + }, + { + "epoch": 0.35, + "grad_norm": 20.4451587231433, + "learning_rate": 1.888635004876568e-05, + "loss": 1.1672, + "step": 29457 + }, + { + "epoch": 0.35, + "grad_norm": 19.948385056582687, + "learning_rate": 1.8886082120721923e-05, + "loss": 1.3549, + "step": 29460 + }, + { + "epoch": 0.35, + "grad_norm": 12.136618477597528, + "learning_rate": 1.888581416235317e-05, + "loss": 1.5445, + "step": 29463 + }, + { + "epoch": 0.35, + "grad_norm": 9.849592940988689, + "learning_rate": 1.8885546173660335e-05, + "loss": 1.4566, + "step": 29466 + }, + { + "epoch": 0.35, + "grad_norm": 10.897574778453954, + "learning_rate": 1.888527815464433e-05, + "loss": 1.6103, + "step": 29469 + }, + { + "epoch": 0.35, + "grad_norm": 32.63116481807234, + "learning_rate": 1.888501010530607e-05, + "loss": 1.6251, + "step": 29472 + }, + { + "epoch": 0.35, + "grad_norm": 10.23511299863941, + "learning_rate": 1.888474202564647e-05, + "loss": 1.3715, + "step": 29475 + }, + { + "epoch": 0.35, + "grad_norm": 17.000275901085356, + "learning_rate": 1.888447391566645e-05, + "loss": 1.6659, + "step": 29478 + }, + { + "epoch": 0.35, + "grad_norm": 11.57181689024335, + "learning_rate": 1.888420577536692e-05, + "loss": 1.6559, + "step": 29481 + }, + { + "epoch": 0.35, + "grad_norm": 35.24941058583593, + "learning_rate": 1.8883937604748793e-05, + "loss": 1.5172, + "step": 29484 + }, + { + "epoch": 0.35, + "grad_norm": 19.11619394507311, + "learning_rate": 1.888366940381299e-05, + "loss": 1.7501, + "step": 29487 + }, + { + "epoch": 0.35, + "grad_norm": 36.049469738265856, + "learning_rate": 1.8883401172560417e-05, + "loss": 1.5788, + "step": 29490 + }, + { + "epoch": 0.35, + "grad_norm": 19.079503761634818, + "learning_rate": 1.8883132910992e-05, + "loss": 1.492, + "step": 29493 + }, + { + "epoch": 0.35, + "grad_norm": 4.260964966443622, + "learning_rate": 1.8882864619108653e-05, + "loss": 1.5959, + "step": 29496 + }, + { + "epoch": 0.35, + "grad_norm": 6.0498954488222445, + "learning_rate": 1.8882596296911282e-05, + "loss": 1.6356, + "step": 29499 + }, + { + "epoch": 0.35, + "grad_norm": 50.56102610144604, + "learning_rate": 1.888232794440082e-05, + "loss": 1.3939, + "step": 29502 + }, + { + "epoch": 0.35, + "grad_norm": 18.517529798706942, + "learning_rate": 1.888205956157816e-05, + "loss": 1.3683, + "step": 29505 + }, + { + "epoch": 0.35, + "grad_norm": 15.325208092422933, + "learning_rate": 1.8881791148444236e-05, + "loss": 1.3172, + "step": 29508 + }, + { + "epoch": 0.35, + "grad_norm": 22.335519162259704, + "learning_rate": 1.8881522704999958e-05, + "loss": 1.5397, + "step": 29511 + }, + { + "epoch": 0.35, + "grad_norm": 19.76080821715425, + "learning_rate": 1.888125423124624e-05, + "loss": 1.5287, + "step": 29514 + }, + { + "epoch": 0.35, + "grad_norm": 58.596340183049946, + "learning_rate": 1.8880985727184002e-05, + "loss": 1.5382, + "step": 29517 + }, + { + "epoch": 0.35, + "grad_norm": 8.358662541540266, + "learning_rate": 1.888071719281416e-05, + "loss": 1.4642, + "step": 29520 + }, + { + "epoch": 0.36, + "grad_norm": 15.29559738580067, + "learning_rate": 1.8880448628137625e-05, + "loss": 1.5895, + "step": 29523 + }, + { + "epoch": 0.36, + "grad_norm": 14.292724805978292, + "learning_rate": 1.888018003315532e-05, + "loss": 1.4642, + "step": 29526 + }, + { + "epoch": 0.36, + "grad_norm": 77.6365338832411, + "learning_rate": 1.8879911407868162e-05, + "loss": 1.2371, + "step": 29529 + }, + { + "epoch": 0.36, + "grad_norm": 29.11079229270714, + "learning_rate": 1.8879642752277058e-05, + "loss": 1.62, + "step": 29532 + }, + { + "epoch": 0.36, + "grad_norm": 10.014238815041521, + "learning_rate": 1.8879374066382937e-05, + "loss": 1.4757, + "step": 29535 + }, + { + "epoch": 0.36, + "grad_norm": 8.969323904187135, + "learning_rate": 1.887910535018671e-05, + "loss": 1.5087, + "step": 29538 + }, + { + "epoch": 0.36, + "grad_norm": 20.670727355162008, + "learning_rate": 1.8878836603689294e-05, + "loss": 1.5819, + "step": 29541 + }, + { + "epoch": 0.36, + "grad_norm": 50.369364084170385, + "learning_rate": 1.8878567826891607e-05, + "loss": 1.7712, + "step": 29544 + }, + { + "epoch": 0.36, + "grad_norm": 196.85081985791453, + "learning_rate": 1.8878299019794566e-05, + "loss": 1.0105, + "step": 29547 + }, + { + "epoch": 0.36, + "grad_norm": 10.881359867537727, + "learning_rate": 1.887803018239909e-05, + "loss": 1.4616, + "step": 29550 + }, + { + "epoch": 0.36, + "grad_norm": 176.06713039014494, + "learning_rate": 1.887776131470609e-05, + "loss": 1.1712, + "step": 29553 + }, + { + "epoch": 0.36, + "grad_norm": 60.98291090342156, + "learning_rate": 1.8877492416716495e-05, + "loss": 1.4485, + "step": 29556 + }, + { + "epoch": 0.36, + "grad_norm": 17.48417251632209, + "learning_rate": 1.8877223488431207e-05, + "loss": 1.5613, + "step": 29559 + }, + { + "epoch": 0.36, + "grad_norm": 12.896951753828345, + "learning_rate": 1.887695452985116e-05, + "loss": 1.6145, + "step": 29562 + }, + { + "epoch": 0.36, + "grad_norm": 11.784092279616027, + "learning_rate": 1.8876685540977264e-05, + "loss": 1.5136, + "step": 29565 + }, + { + "epoch": 0.36, + "grad_norm": 36.76179323979105, + "learning_rate": 1.8876416521810437e-05, + "loss": 1.7232, + "step": 29568 + }, + { + "epoch": 0.36, + "grad_norm": 8.799807647390603, + "learning_rate": 1.8876147472351598e-05, + "loss": 1.4292, + "step": 29571 + }, + { + "epoch": 0.36, + "grad_norm": 41.432215417122684, + "learning_rate": 1.887587839260166e-05, + "loss": 1.5912, + "step": 29574 + }, + { + "epoch": 0.36, + "grad_norm": 21.692755385205395, + "learning_rate": 1.8875609282561552e-05, + "loss": 1.4652, + "step": 29577 + }, + { + "epoch": 0.36, + "grad_norm": 34.07365846449932, + "learning_rate": 1.8875340142232185e-05, + "loss": 1.5547, + "step": 29580 + }, + { + "epoch": 0.36, + "grad_norm": 25.493330896822382, + "learning_rate": 1.8875070971614478e-05, + "loss": 1.4608, + "step": 29583 + }, + { + "epoch": 0.36, + "grad_norm": 20.06242801696066, + "learning_rate": 1.8874801770709353e-05, + "loss": 1.9492, + "step": 29586 + }, + { + "epoch": 0.36, + "grad_norm": 11.673485114432474, + "learning_rate": 1.8874532539517725e-05, + "loss": 1.5339, + "step": 29589 + }, + { + "epoch": 0.36, + "grad_norm": 64.06003912585943, + "learning_rate": 1.8874263278040512e-05, + "loss": 1.3179, + "step": 29592 + }, + { + "epoch": 0.36, + "grad_norm": 50.775784588543914, + "learning_rate": 1.8873993986278635e-05, + "loss": 1.4486, + "step": 29595 + }, + { + "epoch": 0.36, + "grad_norm": 21.403488187504006, + "learning_rate": 1.8873724664233016e-05, + "loss": 1.5486, + "step": 29598 + }, + { + "epoch": 0.36, + "grad_norm": 9.924695307702759, + "learning_rate": 1.887345531190457e-05, + "loss": 1.4372, + "step": 29601 + }, + { + "epoch": 0.36, + "grad_norm": 47.51528931597449, + "learning_rate": 1.887318592929422e-05, + "loss": 2.0293, + "step": 29604 + }, + { + "epoch": 0.36, + "grad_norm": 28.59860046455586, + "learning_rate": 1.8872916516402876e-05, + "loss": 1.7301, + "step": 29607 + }, + { + "epoch": 0.36, + "grad_norm": 15.971218847643224, + "learning_rate": 1.8872647073231468e-05, + "loss": 1.4085, + "step": 29610 + }, + { + "epoch": 0.36, + "grad_norm": 10.309526439836684, + "learning_rate": 1.8872377599780912e-05, + "loss": 1.5304, + "step": 29613 + }, + { + "epoch": 0.36, + "grad_norm": 18.05960257230687, + "learning_rate": 1.8872108096052124e-05, + "loss": 1.4255, + "step": 29616 + }, + { + "epoch": 0.36, + "grad_norm": 14.188198286402557, + "learning_rate": 1.8871838562046027e-05, + "loss": 1.8799, + "step": 29619 + }, + { + "epoch": 0.36, + "grad_norm": 145.1507373923855, + "learning_rate": 1.8871568997763544e-05, + "loss": 1.1918, + "step": 29622 + }, + { + "epoch": 0.36, + "grad_norm": 59.40282086373679, + "learning_rate": 1.8871299403205586e-05, + "loss": 1.715, + "step": 29625 + }, + { + "epoch": 0.36, + "grad_norm": 11.41511953742534, + "learning_rate": 1.8871029778373085e-05, + "loss": 1.6335, + "step": 29628 + }, + { + "epoch": 0.36, + "grad_norm": 7.8463770111057975, + "learning_rate": 1.8870760123266953e-05, + "loss": 1.6163, + "step": 29631 + }, + { + "epoch": 0.36, + "grad_norm": 13.53063509003746, + "learning_rate": 1.887049043788811e-05, + "loss": 1.7052, + "step": 29634 + }, + { + "epoch": 0.36, + "grad_norm": 9.801232778956305, + "learning_rate": 1.8870220722237478e-05, + "loss": 1.5411, + "step": 29637 + }, + { + "epoch": 0.36, + "grad_norm": 8.336182637903825, + "learning_rate": 1.886995097631598e-05, + "loss": 1.4371, + "step": 29640 + }, + { + "epoch": 0.36, + "grad_norm": 43.90906537601124, + "learning_rate": 1.8869681200124532e-05, + "loss": 2.0111, + "step": 29643 + }, + { + "epoch": 0.36, + "grad_norm": 23.15865377565063, + "learning_rate": 1.8869411393664057e-05, + "loss": 1.274, + "step": 29646 + }, + { + "epoch": 0.36, + "grad_norm": 20.787393114906678, + "learning_rate": 1.8869141556935477e-05, + "loss": 1.2138, + "step": 29649 + }, + { + "epoch": 0.36, + "grad_norm": 16.90783987253793, + "learning_rate": 1.8868871689939712e-05, + "loss": 1.6144, + "step": 29652 + }, + { + "epoch": 0.36, + "grad_norm": 31.602582664932548, + "learning_rate": 1.886860179267768e-05, + "loss": 1.4891, + "step": 29655 + }, + { + "epoch": 0.36, + "grad_norm": 14.030936934777559, + "learning_rate": 1.8868331865150306e-05, + "loss": 1.9095, + "step": 29658 + }, + { + "epoch": 0.36, + "grad_norm": 2.7952881192779166, + "learning_rate": 1.886806190735851e-05, + "loss": 1.3136, + "step": 29661 + }, + { + "epoch": 0.36, + "grad_norm": 9.01201510628787, + "learning_rate": 1.8867791919303212e-05, + "loss": 1.6892, + "step": 29664 + }, + { + "epoch": 0.36, + "grad_norm": 6.623882529375787, + "learning_rate": 1.8867521900985337e-05, + "loss": 1.4329, + "step": 29667 + }, + { + "epoch": 0.36, + "grad_norm": 30.4550539138466, + "learning_rate": 1.88672518524058e-05, + "loss": 1.2641, + "step": 29670 + }, + { + "epoch": 0.36, + "grad_norm": 11.401608492217912, + "learning_rate": 1.886698177356553e-05, + "loss": 1.4504, + "step": 29673 + }, + { + "epoch": 0.36, + "grad_norm": 23.66403658011299, + "learning_rate": 1.8866711664465445e-05, + "loss": 1.6208, + "step": 29676 + }, + { + "epoch": 0.36, + "grad_norm": 10.884994722716387, + "learning_rate": 1.8866441525106466e-05, + "loss": 1.3298, + "step": 29679 + }, + { + "epoch": 0.36, + "grad_norm": 18.08297730475067, + "learning_rate": 1.8866171355489512e-05, + "loss": 1.5362, + "step": 29682 + }, + { + "epoch": 0.36, + "grad_norm": 19.304949237611712, + "learning_rate": 1.8865901155615512e-05, + "loss": 1.4897, + "step": 29685 + }, + { + "epoch": 0.36, + "grad_norm": 9.562480836983687, + "learning_rate": 1.8865630925485386e-05, + "loss": 1.345, + "step": 29688 + }, + { + "epoch": 0.36, + "grad_norm": 48.6349418445797, + "learning_rate": 1.8865360665100053e-05, + "loss": 1.7782, + "step": 29691 + }, + { + "epoch": 0.36, + "grad_norm": 11.288585289229886, + "learning_rate": 1.8865090374460438e-05, + "loss": 1.226, + "step": 29694 + }, + { + "epoch": 0.36, + "grad_norm": 24.547392997536175, + "learning_rate": 1.8864820053567462e-05, + "loss": 1.7157, + "step": 29697 + }, + { + "epoch": 0.36, + "grad_norm": 15.829618243706399, + "learning_rate": 1.886454970242205e-05, + "loss": 1.6282, + "step": 29700 + }, + { + "epoch": 0.36, + "grad_norm": 11.447647772963654, + "learning_rate": 1.8864279321025122e-05, + "loss": 1.3278, + "step": 29703 + }, + { + "epoch": 0.36, + "grad_norm": 34.25588228759866, + "learning_rate": 1.88640089093776e-05, + "loss": 1.5176, + "step": 29706 + }, + { + "epoch": 0.36, + "grad_norm": 17.061026221033472, + "learning_rate": 1.8863738467480412e-05, + "loss": 1.2591, + "step": 29709 + }, + { + "epoch": 0.36, + "grad_norm": 10.666411069850259, + "learning_rate": 1.8863467995334474e-05, + "loss": 1.6889, + "step": 29712 + }, + { + "epoch": 0.36, + "grad_norm": 20.238585638893483, + "learning_rate": 1.886319749294071e-05, + "loss": 1.7904, + "step": 29715 + }, + { + "epoch": 0.36, + "grad_norm": 6.390903616303411, + "learning_rate": 1.8862926960300053e-05, + "loss": 1.4954, + "step": 29718 + }, + { + "epoch": 0.36, + "grad_norm": 15.717664557981184, + "learning_rate": 1.8862656397413413e-05, + "loss": 1.674, + "step": 29721 + }, + { + "epoch": 0.36, + "grad_norm": 18.911168553852608, + "learning_rate": 1.886238580428172e-05, + "loss": 1.4975, + "step": 29724 + }, + { + "epoch": 0.36, + "grad_norm": 16.2662311627193, + "learning_rate": 1.8862115180905896e-05, + "loss": 1.4788, + "step": 29727 + }, + { + "epoch": 0.36, + "grad_norm": 4.022850729729246, + "learning_rate": 1.8861844527286866e-05, + "loss": 1.4938, + "step": 29730 + }, + { + "epoch": 0.36, + "grad_norm": 16.52977107737474, + "learning_rate": 1.8861573843425554e-05, + "loss": 1.679, + "step": 29733 + }, + { + "epoch": 0.36, + "grad_norm": 14.831457421792527, + "learning_rate": 1.8861303129322883e-05, + "loss": 1.3976, + "step": 29736 + }, + { + "epoch": 0.36, + "grad_norm": 21.87525690999385, + "learning_rate": 1.8861032384979773e-05, + "loss": 1.2277, + "step": 29739 + }, + { + "epoch": 0.36, + "grad_norm": 36.30873531609063, + "learning_rate": 1.8860761610397154e-05, + "loss": 1.8011, + "step": 29742 + }, + { + "epoch": 0.36, + "grad_norm": 11.044128043128836, + "learning_rate": 1.8860490805575942e-05, + "loss": 1.4118, + "step": 29745 + }, + { + "epoch": 0.36, + "grad_norm": 24.055253371052448, + "learning_rate": 1.8860219970517074e-05, + "loss": 1.3801, + "step": 29748 + }, + { + "epoch": 0.36, + "grad_norm": 20.004309477990606, + "learning_rate": 1.885994910522146e-05, + "loss": 1.4177, + "step": 29751 + }, + { + "epoch": 0.36, + "grad_norm": 16.894584557514253, + "learning_rate": 1.8859678209690035e-05, + "loss": 1.4162, + "step": 29754 + }, + { + "epoch": 0.36, + "grad_norm": 15.075414508433294, + "learning_rate": 1.885940728392372e-05, + "loss": 1.3125, + "step": 29757 + }, + { + "epoch": 0.36, + "grad_norm": 9.817649596648666, + "learning_rate": 1.8859136327923438e-05, + "loss": 1.5238, + "step": 29760 + }, + { + "epoch": 0.36, + "grad_norm": 10.434009945657747, + "learning_rate": 1.8858865341690117e-05, + "loss": 1.5875, + "step": 29763 + }, + { + "epoch": 0.36, + "grad_norm": 12.660474427984402, + "learning_rate": 1.8858594325224673e-05, + "loss": 1.7889, + "step": 29766 + }, + { + "epoch": 0.36, + "grad_norm": 14.409454641229596, + "learning_rate": 1.8858323278528043e-05, + "loss": 1.557, + "step": 29769 + }, + { + "epoch": 0.36, + "grad_norm": 5.451344670226069, + "learning_rate": 1.8858052201601143e-05, + "loss": 1.0551, + "step": 29772 + }, + { + "epoch": 0.36, + "grad_norm": 6.152909723833914, + "learning_rate": 1.8857781094444904e-05, + "loss": 1.8487, + "step": 29775 + }, + { + "epoch": 0.36, + "grad_norm": 5.645538089428308, + "learning_rate": 1.885750995706025e-05, + "loss": 1.8191, + "step": 29778 + }, + { + "epoch": 0.36, + "grad_norm": 21.29552046626337, + "learning_rate": 1.88572387894481e-05, + "loss": 1.9106, + "step": 29781 + }, + { + "epoch": 0.36, + "grad_norm": 9.49698850175565, + "learning_rate": 1.885696759160939e-05, + "loss": 1.4814, + "step": 29784 + }, + { + "epoch": 0.36, + "grad_norm": 116.48760346507648, + "learning_rate": 1.8856696363545034e-05, + "loss": 1.4399, + "step": 29787 + }, + { + "epoch": 0.36, + "grad_norm": 18.18257158524322, + "learning_rate": 1.885642510525597e-05, + "loss": 1.3949, + "step": 29790 + }, + { + "epoch": 0.36, + "grad_norm": 37.39376720356238, + "learning_rate": 1.885615381674311e-05, + "loss": 1.7487, + "step": 29793 + }, + { + "epoch": 0.36, + "grad_norm": 18.20009985543251, + "learning_rate": 1.885588249800739e-05, + "loss": 1.3104, + "step": 29796 + }, + { + "epoch": 0.36, + "grad_norm": 45.94433105990383, + "learning_rate": 1.8855611149049738e-05, + "loss": 1.548, + "step": 29799 + }, + { + "epoch": 0.36, + "grad_norm": 75.14292190693864, + "learning_rate": 1.885533976987107e-05, + "loss": 1.7264, + "step": 29802 + }, + { + "epoch": 0.36, + "grad_norm": 134.145137448032, + "learning_rate": 1.8855068360472314e-05, + "loss": 1.236, + "step": 29805 + }, + { + "epoch": 0.36, + "grad_norm": 32.14837643365684, + "learning_rate": 1.8854796920854407e-05, + "loss": 1.5658, + "step": 29808 + }, + { + "epoch": 0.36, + "grad_norm": 28.735440764856207, + "learning_rate": 1.8854525451018263e-05, + "loss": 1.6094, + "step": 29811 + }, + { + "epoch": 0.36, + "grad_norm": 10.570958524743212, + "learning_rate": 1.8854253950964813e-05, + "loss": 1.4068, + "step": 29814 + }, + { + "epoch": 0.36, + "grad_norm": 7.724946253707728, + "learning_rate": 1.8853982420694983e-05, + "loss": 1.9027, + "step": 29817 + }, + { + "epoch": 0.36, + "grad_norm": 75.3437728676255, + "learning_rate": 1.8853710860209703e-05, + "loss": 1.5951, + "step": 29820 + }, + { + "epoch": 0.36, + "grad_norm": 56.70762756802267, + "learning_rate": 1.8853439269509893e-05, + "loss": 1.4498, + "step": 29823 + }, + { + "epoch": 0.36, + "grad_norm": 58.5429143784578, + "learning_rate": 1.8853167648596486e-05, + "loss": 1.6562, + "step": 29826 + }, + { + "epoch": 0.36, + "grad_norm": 126.11244162376626, + "learning_rate": 1.885289599747041e-05, + "loss": 1.8395, + "step": 29829 + }, + { + "epoch": 0.36, + "grad_norm": 60.82766134644541, + "learning_rate": 1.8852624316132583e-05, + "loss": 2.002, + "step": 29832 + }, + { + "epoch": 0.36, + "grad_norm": 4.7126751294628635, + "learning_rate": 1.885235260458394e-05, + "loss": 1.9529, + "step": 29835 + }, + { + "epoch": 0.36, + "grad_norm": 20.07401748229538, + "learning_rate": 1.885208086282541e-05, + "loss": 1.5141, + "step": 29838 + }, + { + "epoch": 0.36, + "grad_norm": 9.553107351058523, + "learning_rate": 1.885180909085791e-05, + "loss": 1.3912, + "step": 29841 + }, + { + "epoch": 0.36, + "grad_norm": 178.77315714339525, + "learning_rate": 1.8851537288682376e-05, + "loss": 1.4435, + "step": 29844 + }, + { + "epoch": 0.36, + "grad_norm": 54.67594440309661, + "learning_rate": 1.8851265456299733e-05, + "loss": 1.3765, + "step": 29847 + }, + { + "epoch": 0.36, + "grad_norm": 8.29665290276214, + "learning_rate": 1.8850993593710913e-05, + "loss": 2.0381, + "step": 29850 + }, + { + "epoch": 0.36, + "grad_norm": 37.59755444566779, + "learning_rate": 1.885072170091684e-05, + "loss": 1.6639, + "step": 29853 + }, + { + "epoch": 0.36, + "grad_norm": 59.63429413936627, + "learning_rate": 1.885044977791844e-05, + "loss": 1.4741, + "step": 29856 + }, + { + "epoch": 0.36, + "grad_norm": 21.251085725470027, + "learning_rate": 1.885017782471664e-05, + "loss": 1.5644, + "step": 29859 + }, + { + "epoch": 0.36, + "grad_norm": 19.770647447711145, + "learning_rate": 1.8849905841312376e-05, + "loss": 1.6061, + "step": 29862 + }, + { + "epoch": 0.36, + "grad_norm": 4.957069824430351, + "learning_rate": 1.8849633827706566e-05, + "loss": 1.708, + "step": 29865 + }, + { + "epoch": 0.36, + "grad_norm": 18.94576854346883, + "learning_rate": 1.8849361783900146e-05, + "loss": 1.4212, + "step": 29868 + }, + { + "epoch": 0.36, + "grad_norm": 9.703449670360527, + "learning_rate": 1.884908970989404e-05, + "loss": 1.7882, + "step": 29871 + }, + { + "epoch": 0.36, + "grad_norm": 14.490546429721928, + "learning_rate": 1.8848817605689183e-05, + "loss": 1.6514, + "step": 29874 + }, + { + "epoch": 0.36, + "grad_norm": 13.233886438671504, + "learning_rate": 1.8848545471286493e-05, + "loss": 1.4025, + "step": 29877 + }, + { + "epoch": 0.36, + "grad_norm": 38.41641570223089, + "learning_rate": 1.884827330668691e-05, + "loss": 1.3417, + "step": 29880 + }, + { + "epoch": 0.36, + "grad_norm": 10.720212094515427, + "learning_rate": 1.884800111189135e-05, + "loss": 1.549, + "step": 29883 + }, + { + "epoch": 0.36, + "grad_norm": 98.02647309033534, + "learning_rate": 1.8847728886900758e-05, + "loss": 1.6193, + "step": 29886 + }, + { + "epoch": 0.36, + "grad_norm": 12.004642118752933, + "learning_rate": 1.884745663171605e-05, + "loss": 1.8374, + "step": 29889 + }, + { + "epoch": 0.36, + "grad_norm": 15.29669484012775, + "learning_rate": 1.8847184346338156e-05, + "loss": 1.4824, + "step": 29892 + }, + { + "epoch": 0.36, + "grad_norm": 8.791142588529839, + "learning_rate": 1.8846912030768015e-05, + "loss": 1.5203, + "step": 29895 + }, + { + "epoch": 0.36, + "grad_norm": 9.951311549423286, + "learning_rate": 1.8846639685006546e-05, + "loss": 1.4421, + "step": 29898 + }, + { + "epoch": 0.36, + "grad_norm": 30.766547833488907, + "learning_rate": 1.884636730905468e-05, + "loss": 1.6515, + "step": 29901 + }, + { + "epoch": 0.36, + "grad_norm": 21.78707262573432, + "learning_rate": 1.8846094902913353e-05, + "loss": 1.4606, + "step": 29904 + }, + { + "epoch": 0.36, + "grad_norm": 12.316475796015103, + "learning_rate": 1.884582246658349e-05, + "loss": 1.5621, + "step": 29907 + }, + { + "epoch": 0.36, + "grad_norm": 7.286990283035418, + "learning_rate": 1.8845550000066022e-05, + "loss": 1.7011, + "step": 29910 + }, + { + "epoch": 0.36, + "grad_norm": 11.446286200168714, + "learning_rate": 1.8845277503361876e-05, + "loss": 1.6146, + "step": 29913 + }, + { + "epoch": 0.36, + "grad_norm": 9.519148338016024, + "learning_rate": 1.8845004976471983e-05, + "loss": 1.3087, + "step": 29916 + }, + { + "epoch": 0.36, + "grad_norm": 13.118296445379631, + "learning_rate": 1.8844732419397275e-05, + "loss": 1.3925, + "step": 29919 + }, + { + "epoch": 0.36, + "grad_norm": 13.292078918052532, + "learning_rate": 1.8844459832138683e-05, + "loss": 1.8092, + "step": 29922 + }, + { + "epoch": 0.36, + "grad_norm": 6.283762280267037, + "learning_rate": 1.8844187214697132e-05, + "loss": 1.3789, + "step": 29925 + }, + { + "epoch": 0.36, + "grad_norm": 7.311987575541003, + "learning_rate": 1.884391456707356e-05, + "loss": 1.6288, + "step": 29928 + }, + { + "epoch": 0.36, + "grad_norm": 22.37157095137199, + "learning_rate": 1.8843641889268892e-05, + "loss": 1.514, + "step": 29931 + }, + { + "epoch": 0.36, + "grad_norm": 8.185477826085082, + "learning_rate": 1.8843369181284057e-05, + "loss": 1.1149, + "step": 29934 + }, + { + "epoch": 0.36, + "grad_norm": 10.44093205769567, + "learning_rate": 1.884309644311999e-05, + "loss": 1.4485, + "step": 29937 + }, + { + "epoch": 0.36, + "grad_norm": 9.16182048877683, + "learning_rate": 1.8842823674777618e-05, + "loss": 1.4505, + "step": 29940 + }, + { + "epoch": 0.36, + "grad_norm": 7.237043099638605, + "learning_rate": 1.8842550876257876e-05, + "loss": 1.1658, + "step": 29943 + }, + { + "epoch": 0.36, + "grad_norm": 12.550515934978497, + "learning_rate": 1.8842278047561693e-05, + "loss": 1.307, + "step": 29946 + }, + { + "epoch": 0.36, + "grad_norm": 6.112766272548814, + "learning_rate": 1.884200518869e-05, + "loss": 1.7048, + "step": 29949 + }, + { + "epoch": 0.36, + "grad_norm": 6.178564171221674, + "learning_rate": 1.884173229964373e-05, + "loss": 1.2286, + "step": 29952 + }, + { + "epoch": 0.36, + "grad_norm": 19.522335226112972, + "learning_rate": 1.884145938042381e-05, + "loss": 1.4593, + "step": 29955 + }, + { + "epoch": 0.36, + "grad_norm": 5.835573550743424, + "learning_rate": 1.8841186431031174e-05, + "loss": 1.4261, + "step": 29958 + }, + { + "epoch": 0.36, + "grad_norm": 17.14676444036813, + "learning_rate": 1.8840913451466755e-05, + "loss": 1.3705, + "step": 29961 + }, + { + "epoch": 0.36, + "grad_norm": 74.17518138350574, + "learning_rate": 1.884064044173148e-05, + "loss": 1.1978, + "step": 29964 + }, + { + "epoch": 0.36, + "grad_norm": 18.928490314222316, + "learning_rate": 1.8840367401826284e-05, + "loss": 1.8345, + "step": 29967 + }, + { + "epoch": 0.36, + "grad_norm": 77.82833256992403, + "learning_rate": 1.88400943317521e-05, + "loss": 1.6267, + "step": 29970 + }, + { + "epoch": 0.36, + "grad_norm": 17.818338498391174, + "learning_rate": 1.8839821231509855e-05, + "loss": 1.4038, + "step": 29973 + }, + { + "epoch": 0.36, + "grad_norm": 8.467891302815913, + "learning_rate": 1.883954810110049e-05, + "loss": 1.4668, + "step": 29976 + }, + { + "epoch": 0.36, + "grad_norm": 18.266355032952806, + "learning_rate": 1.8839274940524927e-05, + "loss": 1.339, + "step": 29979 + }, + { + "epoch": 0.36, + "grad_norm": 3.7406042566618356, + "learning_rate": 1.88390017497841e-05, + "loss": 1.4049, + "step": 29982 + }, + { + "epoch": 0.36, + "grad_norm": 16.723864762345073, + "learning_rate": 1.8838728528878952e-05, + "loss": 1.4625, + "step": 29985 + }, + { + "epoch": 0.36, + "grad_norm": 10.911150241407027, + "learning_rate": 1.88384552778104e-05, + "loss": 1.3197, + "step": 29988 + }, + { + "epoch": 0.36, + "grad_norm": 12.83922656506397, + "learning_rate": 1.883818199657939e-05, + "loss": 1.5286, + "step": 29991 + }, + { + "epoch": 0.36, + "grad_norm": 11.098181267399738, + "learning_rate": 1.883790868518684e-05, + "loss": 1.7124, + "step": 29994 + }, + { + "epoch": 0.36, + "grad_norm": 11.966971535788296, + "learning_rate": 1.8837635343633697e-05, + "loss": 1.567, + "step": 29997 + }, + { + "epoch": 0.36, + "grad_norm": 32.21796519951015, + "learning_rate": 1.8837361971920888e-05, + "loss": 1.6872, + "step": 30000 + }, + { + "epoch": 0.36, + "grad_norm": 14.0358793786694, + "learning_rate": 1.8837088570049345e-05, + "loss": 1.5747, + "step": 30003 + }, + { + "epoch": 0.36, + "grad_norm": 7.58284041044524, + "learning_rate": 1.883681513802e-05, + "loss": 1.7042, + "step": 30006 + }, + { + "epoch": 0.36, + "grad_norm": 13.26870203201432, + "learning_rate": 1.883654167583379e-05, + "loss": 1.3835, + "step": 30009 + }, + { + "epoch": 0.36, + "grad_norm": 4.751331157130628, + "learning_rate": 1.8836268183491647e-05, + "loss": 1.5579, + "step": 30012 + }, + { + "epoch": 0.36, + "grad_norm": 65.01821713199853, + "learning_rate": 1.88359946609945e-05, + "loss": 1.2015, + "step": 30015 + }, + { + "epoch": 0.36, + "grad_norm": 27.891178298819053, + "learning_rate": 1.8835721108343287e-05, + "loss": 1.8759, + "step": 30018 + }, + { + "epoch": 0.36, + "grad_norm": 14.948280487175746, + "learning_rate": 1.883544752553894e-05, + "loss": 1.481, + "step": 30021 + }, + { + "epoch": 0.36, + "grad_norm": 14.285989947919292, + "learning_rate": 1.8835173912582396e-05, + "loss": 1.513, + "step": 30024 + }, + { + "epoch": 0.36, + "grad_norm": 47.30324836301676, + "learning_rate": 1.8834900269474582e-05, + "loss": 1.5115, + "step": 30027 + }, + { + "epoch": 0.36, + "grad_norm": 11.023793662514251, + "learning_rate": 1.883462659621644e-05, + "loss": 1.8187, + "step": 30030 + }, + { + "epoch": 0.36, + "grad_norm": 8.878521247742462, + "learning_rate": 1.8834352892808897e-05, + "loss": 1.1959, + "step": 30033 + }, + { + "epoch": 0.36, + "grad_norm": 13.180790580524047, + "learning_rate": 1.8834079159252893e-05, + "loss": 1.7752, + "step": 30036 + }, + { + "epoch": 0.36, + "grad_norm": 12.021094111484395, + "learning_rate": 1.8833805395549353e-05, + "loss": 1.1663, + "step": 30039 + }, + { + "epoch": 0.36, + "grad_norm": 13.893387453926477, + "learning_rate": 1.8833531601699222e-05, + "loss": 1.7708, + "step": 30042 + }, + { + "epoch": 0.36, + "grad_norm": 7.31305728811759, + "learning_rate": 1.8833257777703426e-05, + "loss": 1.7202, + "step": 30045 + }, + { + "epoch": 0.36, + "grad_norm": 21.783512444949903, + "learning_rate": 1.8832983923562907e-05, + "loss": 1.4076, + "step": 30048 + }, + { + "epoch": 0.36, + "grad_norm": 9.621010675952986, + "learning_rate": 1.8832710039278592e-05, + "loss": 1.5895, + "step": 30051 + }, + { + "epoch": 0.36, + "grad_norm": 7.021628973748112, + "learning_rate": 1.883243612485142e-05, + "loss": 1.6821, + "step": 30054 + }, + { + "epoch": 0.36, + "grad_norm": 85.40790233340876, + "learning_rate": 1.8832162180282325e-05, + "loss": 1.5936, + "step": 30057 + }, + { + "epoch": 0.36, + "grad_norm": 45.08070624479845, + "learning_rate": 1.8831888205572242e-05, + "loss": 1.5054, + "step": 30060 + }, + { + "epoch": 0.36, + "grad_norm": 14.132017131681906, + "learning_rate": 1.883161420072211e-05, + "loss": 1.5223, + "step": 30063 + }, + { + "epoch": 0.36, + "grad_norm": 38.835242538412324, + "learning_rate": 1.883134016573285e-05, + "loss": 1.4218, + "step": 30066 + }, + { + "epoch": 0.36, + "grad_norm": 33.450315753905805, + "learning_rate": 1.8831066100605414e-05, + "loss": 1.5332, + "step": 30069 + }, + { + "epoch": 0.36, + "grad_norm": 10.260165025300214, + "learning_rate": 1.883079200534073e-05, + "loss": 1.3576, + "step": 30072 + }, + { + "epoch": 0.36, + "grad_norm": 20.188378943192017, + "learning_rate": 1.8830517879939735e-05, + "loss": 1.471, + "step": 30075 + }, + { + "epoch": 0.36, + "grad_norm": 9.946870861483093, + "learning_rate": 1.8830243724403358e-05, + "loss": 1.4513, + "step": 30078 + }, + { + "epoch": 0.36, + "grad_norm": 6.30552457440582, + "learning_rate": 1.882996953873254e-05, + "loss": 1.5699, + "step": 30081 + }, + { + "epoch": 0.36, + "grad_norm": 13.328250237969693, + "learning_rate": 1.8829695322928225e-05, + "loss": 1.6598, + "step": 30084 + }, + { + "epoch": 0.36, + "grad_norm": 17.551485070552133, + "learning_rate": 1.882942107699133e-05, + "loss": 1.4328, + "step": 30087 + }, + { + "epoch": 0.36, + "grad_norm": 41.74665050273025, + "learning_rate": 1.882914680092281e-05, + "loss": 1.8002, + "step": 30090 + }, + { + "epoch": 0.36, + "grad_norm": 21.689834623134697, + "learning_rate": 1.8828872494723588e-05, + "loss": 1.3141, + "step": 30093 + }, + { + "epoch": 0.36, + "grad_norm": 2.4988492577536077, + "learning_rate": 1.88285981583946e-05, + "loss": 1.5984, + "step": 30096 + }, + { + "epoch": 0.36, + "grad_norm": 10.538317907737788, + "learning_rate": 1.8828323791936795e-05, + "loss": 1.5868, + "step": 30099 + }, + { + "epoch": 0.36, + "grad_norm": 17.873817661330964, + "learning_rate": 1.8828049395351094e-05, + "loss": 1.4876, + "step": 30102 + }, + { + "epoch": 0.36, + "grad_norm": 15.570922588158666, + "learning_rate": 1.8827774968638444e-05, + "loss": 1.5447, + "step": 30105 + }, + { + "epoch": 0.36, + "grad_norm": 8.710086618293827, + "learning_rate": 1.8827500511799776e-05, + "loss": 1.4596, + "step": 30108 + }, + { + "epoch": 0.36, + "grad_norm": 32.58073726672365, + "learning_rate": 1.882722602483603e-05, + "loss": 1.6819, + "step": 30111 + }, + { + "epoch": 0.36, + "grad_norm": 9.004820420511635, + "learning_rate": 1.882695150774814e-05, + "loss": 1.2228, + "step": 30114 + }, + { + "epoch": 0.36, + "grad_norm": 11.751854958201253, + "learning_rate": 1.8826676960537044e-05, + "loss": 1.4965, + "step": 30117 + }, + { + "epoch": 0.36, + "grad_norm": 9.533165134771235, + "learning_rate": 1.882640238320368e-05, + "loss": 1.8661, + "step": 30120 + }, + { + "epoch": 0.36, + "grad_norm": 53.701539848538594, + "learning_rate": 1.8826127775748985e-05, + "loss": 1.6534, + "step": 30123 + }, + { + "epoch": 0.36, + "grad_norm": 65.96061000252435, + "learning_rate": 1.882585313817389e-05, + "loss": 1.7817, + "step": 30126 + }, + { + "epoch": 0.36, + "grad_norm": 14.340234641597727, + "learning_rate": 1.8825578470479343e-05, + "loss": 1.4346, + "step": 30129 + }, + { + "epoch": 0.36, + "grad_norm": 6.112594405990599, + "learning_rate": 1.882530377266627e-05, + "loss": 1.5437, + "step": 30132 + }, + { + "epoch": 0.36, + "grad_norm": 33.79285859404131, + "learning_rate": 1.8825029044735618e-05, + "loss": 1.5808, + "step": 30135 + }, + { + "epoch": 0.36, + "grad_norm": 26.29688120943066, + "learning_rate": 1.882475428668832e-05, + "loss": 1.3167, + "step": 30138 + }, + { + "epoch": 0.36, + "grad_norm": 12.806841713287493, + "learning_rate": 1.8824479498525315e-05, + "loss": 1.2149, + "step": 30141 + }, + { + "epoch": 0.36, + "grad_norm": 4.670578470200441, + "learning_rate": 1.882420468024754e-05, + "loss": 1.5588, + "step": 30144 + }, + { + "epoch": 0.36, + "grad_norm": 9.785664153382317, + "learning_rate": 1.882392983185593e-05, + "loss": 1.3441, + "step": 30147 + }, + { + "epoch": 0.36, + "grad_norm": 14.652946015336829, + "learning_rate": 1.882365495335143e-05, + "loss": 1.5229, + "step": 30150 + }, + { + "epoch": 0.36, + "grad_norm": 37.35569411168246, + "learning_rate": 1.882338004473497e-05, + "loss": 1.5763, + "step": 30153 + }, + { + "epoch": 0.36, + "grad_norm": 16.141989442047052, + "learning_rate": 1.8823105106007494e-05, + "loss": 1.4785, + "step": 30156 + }, + { + "epoch": 0.36, + "grad_norm": 30.113850877631926, + "learning_rate": 1.8822830137169936e-05, + "loss": 1.6878, + "step": 30159 + }, + { + "epoch": 0.36, + "grad_norm": 109.45184266767258, + "learning_rate": 1.8822555138223243e-05, + "loss": 1.3194, + "step": 30162 + }, + { + "epoch": 0.36, + "grad_norm": 2.7510453864752016, + "learning_rate": 1.882228010916834e-05, + "loss": 1.4944, + "step": 30165 + }, + { + "epoch": 0.36, + "grad_norm": 6.769590365455929, + "learning_rate": 1.8822005050006176e-05, + "loss": 1.3454, + "step": 30168 + }, + { + "epoch": 0.36, + "grad_norm": 13.254327146116696, + "learning_rate": 1.8821729960737687e-05, + "loss": 1.3745, + "step": 30171 + }, + { + "epoch": 0.36, + "grad_norm": 7.047365535557032, + "learning_rate": 1.882145484136381e-05, + "loss": 1.488, + "step": 30174 + }, + { + "epoch": 0.36, + "grad_norm": 23.145134017675286, + "learning_rate": 1.8821179691885488e-05, + "loss": 1.8205, + "step": 30177 + }, + { + "epoch": 0.36, + "grad_norm": 8.380656298951099, + "learning_rate": 1.8820904512303652e-05, + "loss": 1.5802, + "step": 30180 + }, + { + "epoch": 0.36, + "grad_norm": 25.01520173692995, + "learning_rate": 1.8820629302619248e-05, + "loss": 1.2017, + "step": 30183 + }, + { + "epoch": 0.36, + "grad_norm": 4.669075952684404, + "learning_rate": 1.8820354062833213e-05, + "loss": 1.3341, + "step": 30186 + }, + { + "epoch": 0.36, + "grad_norm": 68.23666185472419, + "learning_rate": 1.882007879294649e-05, + "loss": 1.3602, + "step": 30189 + }, + { + "epoch": 0.36, + "grad_norm": 28.756194822431908, + "learning_rate": 1.8819803492960013e-05, + "loss": 1.734, + "step": 30192 + }, + { + "epoch": 0.36, + "grad_norm": 6.996467474263834, + "learning_rate": 1.8819528162874724e-05, + "loss": 1.3726, + "step": 30195 + }, + { + "epoch": 0.36, + "grad_norm": 9.178146469725792, + "learning_rate": 1.8819252802691562e-05, + "loss": 1.4089, + "step": 30198 + }, + { + "epoch": 0.36, + "grad_norm": 23.00738043041226, + "learning_rate": 1.8818977412411466e-05, + "loss": 1.5778, + "step": 30201 + }, + { + "epoch": 0.36, + "grad_norm": 7.361762398182015, + "learning_rate": 1.881870199203538e-05, + "loss": 1.4438, + "step": 30204 + }, + { + "epoch": 0.36, + "grad_norm": 88.14078825893625, + "learning_rate": 1.8818426541564237e-05, + "loss": 1.3676, + "step": 30207 + }, + { + "epoch": 0.36, + "grad_norm": 20.38991866359404, + "learning_rate": 1.881815106099898e-05, + "loss": 1.1501, + "step": 30210 + }, + { + "epoch": 0.36, + "grad_norm": 18.135957166491362, + "learning_rate": 1.8817875550340548e-05, + "loss": 1.3001, + "step": 30213 + }, + { + "epoch": 0.36, + "grad_norm": 10.498012452380351, + "learning_rate": 1.8817600009589888e-05, + "loss": 1.5534, + "step": 30216 + }, + { + "epoch": 0.36, + "grad_norm": 5.54254302854729, + "learning_rate": 1.8817324438747934e-05, + "loss": 1.6007, + "step": 30219 + }, + { + "epoch": 0.36, + "grad_norm": 24.66072893733422, + "learning_rate": 1.8817048837815627e-05, + "loss": 1.3953, + "step": 30222 + }, + { + "epoch": 0.36, + "grad_norm": 13.20887861073566, + "learning_rate": 1.8816773206793907e-05, + "loss": 1.6434, + "step": 30225 + }, + { + "epoch": 0.36, + "grad_norm": 75.91307259691769, + "learning_rate": 1.8816497545683717e-05, + "loss": 1.2362, + "step": 30228 + }, + { + "epoch": 0.36, + "grad_norm": 24.71074396814485, + "learning_rate": 1.8816221854485996e-05, + "loss": 1.5437, + "step": 30231 + }, + { + "epoch": 0.36, + "grad_norm": 11.78274203031841, + "learning_rate": 1.8815946133201687e-05, + "loss": 1.2802, + "step": 30234 + }, + { + "epoch": 0.36, + "grad_norm": 12.31305073124888, + "learning_rate": 1.8815670381831727e-05, + "loss": 1.4899, + "step": 30237 + }, + { + "epoch": 0.36, + "grad_norm": 6.858835828541955, + "learning_rate": 1.881539460037706e-05, + "loss": 1.4868, + "step": 30240 + }, + { + "epoch": 0.36, + "grad_norm": 9.72532300890361, + "learning_rate": 1.8815118788838627e-05, + "loss": 1.6786, + "step": 30243 + }, + { + "epoch": 0.36, + "grad_norm": 34.93133975873405, + "learning_rate": 1.881484294721737e-05, + "loss": 1.6746, + "step": 30246 + }, + { + "epoch": 0.36, + "grad_norm": 13.970872684920577, + "learning_rate": 1.8814567075514228e-05, + "loss": 1.564, + "step": 30249 + }, + { + "epoch": 0.36, + "grad_norm": 338.9100564767783, + "learning_rate": 1.8814291173730143e-05, + "loss": 1.2789, + "step": 30252 + }, + { + "epoch": 0.36, + "grad_norm": 19.76884031051552, + "learning_rate": 1.8814015241866058e-05, + "loss": 1.3576, + "step": 30255 + }, + { + "epoch": 0.36, + "grad_norm": 8.452268103865517, + "learning_rate": 1.8813739279922914e-05, + "loss": 1.7358, + "step": 30258 + }, + { + "epoch": 0.36, + "grad_norm": 84.59160182146837, + "learning_rate": 1.881346328790165e-05, + "loss": 1.4769, + "step": 30261 + }, + { + "epoch": 0.36, + "grad_norm": 14.59592052469125, + "learning_rate": 1.8813187265803214e-05, + "loss": 1.4635, + "step": 30264 + }, + { + "epoch": 0.36, + "grad_norm": 9.827324851182638, + "learning_rate": 1.881291121362854e-05, + "loss": 1.4645, + "step": 30267 + }, + { + "epoch": 0.36, + "grad_norm": 22.597185927485516, + "learning_rate": 1.881263513137858e-05, + "loss": 1.1351, + "step": 30270 + }, + { + "epoch": 0.36, + "grad_norm": 11.545818676368244, + "learning_rate": 1.8812359019054268e-05, + "loss": 1.4345, + "step": 30273 + }, + { + "epoch": 0.36, + "grad_norm": 39.10638132222731, + "learning_rate": 1.881208287665655e-05, + "loss": 1.3711, + "step": 30276 + }, + { + "epoch": 0.36, + "grad_norm": 13.983463247661648, + "learning_rate": 1.8811806704186365e-05, + "loss": 1.2789, + "step": 30279 + }, + { + "epoch": 0.36, + "grad_norm": 75.44168737221108, + "learning_rate": 1.8811530501644658e-05, + "loss": 1.3104, + "step": 30282 + }, + { + "epoch": 0.36, + "grad_norm": 6.43094694477484, + "learning_rate": 1.8811254269032373e-05, + "loss": 1.6612, + "step": 30285 + }, + { + "epoch": 0.36, + "grad_norm": 20.47595144681899, + "learning_rate": 1.881097800635045e-05, + "loss": 1.7002, + "step": 30288 + }, + { + "epoch": 0.36, + "grad_norm": 32.12247966405726, + "learning_rate": 1.881070171359983e-05, + "loss": 1.3805, + "step": 30291 + }, + { + "epoch": 0.36, + "grad_norm": 7.763558828726911, + "learning_rate": 1.8810425390781463e-05, + "loss": 1.1409, + "step": 30294 + }, + { + "epoch": 0.36, + "grad_norm": 42.16350245464632, + "learning_rate": 1.8810149037896287e-05, + "loss": 1.6781, + "step": 30297 + }, + { + "epoch": 0.36, + "grad_norm": 12.13948617821141, + "learning_rate": 1.8809872654945246e-05, + "loss": 1.4177, + "step": 30300 + }, + { + "epoch": 0.36, + "grad_norm": 11.373451239071578, + "learning_rate": 1.880959624192928e-05, + "loss": 1.2499, + "step": 30303 + }, + { + "epoch": 0.36, + "grad_norm": 17.427146973252274, + "learning_rate": 1.8809319798849337e-05, + "loss": 1.1839, + "step": 30306 + }, + { + "epoch": 0.36, + "grad_norm": 71.5433603638933, + "learning_rate": 1.8809043325706357e-05, + "loss": 1.3935, + "step": 30309 + }, + { + "epoch": 0.36, + "grad_norm": 12.097547082107212, + "learning_rate": 1.8808766822501286e-05, + "loss": 1.8371, + "step": 30312 + }, + { + "epoch": 0.36, + "grad_norm": 71.12658569893225, + "learning_rate": 1.8808490289235067e-05, + "loss": 1.6316, + "step": 30315 + }, + { + "epoch": 0.36, + "grad_norm": 10.649180706754347, + "learning_rate": 1.8808213725908645e-05, + "loss": 1.357, + "step": 30318 + }, + { + "epoch": 0.36, + "grad_norm": 100.96019499897835, + "learning_rate": 1.880793713252296e-05, + "loss": 1.469, + "step": 30321 + }, + { + "epoch": 0.36, + "grad_norm": 11.282892872102513, + "learning_rate": 1.880766050907896e-05, + "loss": 1.6, + "step": 30324 + }, + { + "epoch": 0.36, + "grad_norm": 11.567806872156204, + "learning_rate": 1.8807383855577588e-05, + "loss": 1.5559, + "step": 30327 + }, + { + "epoch": 0.36, + "grad_norm": 7.481351343997569, + "learning_rate": 1.8807107172019785e-05, + "loss": 1.609, + "step": 30330 + }, + { + "epoch": 0.36, + "grad_norm": 54.417353952976555, + "learning_rate": 1.88068304584065e-05, + "loss": 1.5848, + "step": 30333 + }, + { + "epoch": 0.36, + "grad_norm": 79.68449207283066, + "learning_rate": 1.880655371473867e-05, + "loss": 1.2905, + "step": 30336 + }, + { + "epoch": 0.36, + "grad_norm": 21.722165112749277, + "learning_rate": 1.8806276941017247e-05, + "loss": 1.5721, + "step": 30339 + }, + { + "epoch": 0.36, + "grad_norm": 5.244963915701207, + "learning_rate": 1.8806000137243176e-05, + "loss": 1.4727, + "step": 30342 + }, + { + "epoch": 0.36, + "grad_norm": 8.271658660451893, + "learning_rate": 1.8805723303417395e-05, + "loss": 1.3057, + "step": 30345 + }, + { + "epoch": 0.36, + "grad_norm": 25.068919047775807, + "learning_rate": 1.8805446439540853e-05, + "loss": 1.1705, + "step": 30348 + }, + { + "epoch": 0.36, + "grad_norm": 17.453192146673846, + "learning_rate": 1.8805169545614494e-05, + "loss": 1.8778, + "step": 30351 + }, + { + "epoch": 0.36, + "grad_norm": 13.12271143850309, + "learning_rate": 1.880489262163926e-05, + "loss": 1.3333, + "step": 30354 + }, + { + "epoch": 0.37, + "grad_norm": 6.423475019052588, + "learning_rate": 1.8804615667616103e-05, + "loss": 1.3469, + "step": 30357 + }, + { + "epoch": 0.37, + "grad_norm": 142.95216278706496, + "learning_rate": 1.8804338683545966e-05, + "loss": 1.6245, + "step": 30360 + }, + { + "epoch": 0.37, + "grad_norm": 14.759874358405897, + "learning_rate": 1.8804061669429784e-05, + "loss": 1.6089, + "step": 30363 + }, + { + "epoch": 0.37, + "grad_norm": 37.66978167665004, + "learning_rate": 1.8803784625268518e-05, + "loss": 1.657, + "step": 30366 + }, + { + "epoch": 0.37, + "grad_norm": 68.63197173880116, + "learning_rate": 1.8803507551063104e-05, + "loss": 1.6461, + "step": 30369 + }, + { + "epoch": 0.37, + "grad_norm": 12.588641663467738, + "learning_rate": 1.880323044681449e-05, + "loss": 1.276, + "step": 30372 + }, + { + "epoch": 0.37, + "grad_norm": 42.15077761496004, + "learning_rate": 1.8802953312523618e-05, + "loss": 1.4588, + "step": 30375 + }, + { + "epoch": 0.37, + "grad_norm": 11.459440474803165, + "learning_rate": 1.8802676148191443e-05, + "loss": 1.5059, + "step": 30378 + }, + { + "epoch": 0.37, + "grad_norm": 38.038085865780666, + "learning_rate": 1.8802398953818902e-05, + "loss": 1.3399, + "step": 30381 + }, + { + "epoch": 0.37, + "grad_norm": 7.779673476501409, + "learning_rate": 1.8802121729406947e-05, + "loss": 1.6165, + "step": 30384 + }, + { + "epoch": 0.37, + "grad_norm": 37.54659317196004, + "learning_rate": 1.8801844474956517e-05, + "loss": 1.4685, + "step": 30387 + }, + { + "epoch": 0.37, + "grad_norm": 18.91391616687599, + "learning_rate": 1.8801567190468565e-05, + "loss": 1.7361, + "step": 30390 + }, + { + "epoch": 0.37, + "grad_norm": 18.492385555718332, + "learning_rate": 1.8801289875944034e-05, + "loss": 1.3127, + "step": 30393 + }, + { + "epoch": 0.37, + "grad_norm": 18.26098905954207, + "learning_rate": 1.8801012531383868e-05, + "loss": 1.1646, + "step": 30396 + }, + { + "epoch": 0.37, + "grad_norm": 9.697388208768551, + "learning_rate": 1.8800735156789017e-05, + "loss": 1.3134, + "step": 30399 + }, + { + "epoch": 0.37, + "grad_norm": 9.374015634924811, + "learning_rate": 1.880045775216043e-05, + "loss": 1.4042, + "step": 30402 + }, + { + "epoch": 0.37, + "grad_norm": 38.462331233257444, + "learning_rate": 1.8800180317499047e-05, + "loss": 1.6444, + "step": 30405 + }, + { + "epoch": 0.37, + "grad_norm": 5.433080936628109, + "learning_rate": 1.8799902852805824e-05, + "loss": 1.8828, + "step": 30408 + }, + { + "epoch": 0.37, + "grad_norm": 10.700843409044523, + "learning_rate": 1.8799625358081695e-05, + "loss": 1.3022, + "step": 30411 + }, + { + "epoch": 0.37, + "grad_norm": 100.25544117827853, + "learning_rate": 1.879934783332762e-05, + "loss": 1.8299, + "step": 30414 + }, + { + "epoch": 0.37, + "grad_norm": 16.07977066581353, + "learning_rate": 1.8799070278544537e-05, + "loss": 1.4531, + "step": 30417 + }, + { + "epoch": 0.37, + "grad_norm": 19.739564616351856, + "learning_rate": 1.8798792693733396e-05, + "loss": 1.7568, + "step": 30420 + }, + { + "epoch": 0.37, + "grad_norm": 20.46966754164535, + "learning_rate": 1.879851507889515e-05, + "loss": 1.398, + "step": 30423 + }, + { + "epoch": 0.37, + "grad_norm": 29.7583213532166, + "learning_rate": 1.8798237434030735e-05, + "loss": 1.5386, + "step": 30426 + }, + { + "epoch": 0.37, + "grad_norm": 7.729823107704632, + "learning_rate": 1.879795975914111e-05, + "loss": 1.7487, + "step": 30429 + }, + { + "epoch": 0.37, + "grad_norm": 9.139958143007986, + "learning_rate": 1.8797682054227214e-05, + "loss": 1.5071, + "step": 30432 + }, + { + "epoch": 0.37, + "grad_norm": 13.546774838043458, + "learning_rate": 1.879740431929e-05, + "loss": 1.2494, + "step": 30435 + }, + { + "epoch": 0.37, + "grad_norm": 14.698316071710899, + "learning_rate": 1.879712655433041e-05, + "loss": 1.982, + "step": 30438 + }, + { + "epoch": 0.37, + "grad_norm": 6.530943275087183, + "learning_rate": 1.8796848759349398e-05, + "loss": 1.5238, + "step": 30441 + }, + { + "epoch": 0.37, + "grad_norm": 7.83456004299618, + "learning_rate": 1.8796570934347912e-05, + "loss": 1.0916, + "step": 30444 + }, + { + "epoch": 0.37, + "grad_norm": 15.021012369970693, + "learning_rate": 1.8796293079326898e-05, + "loss": 0.9961, + "step": 30447 + }, + { + "epoch": 0.37, + "grad_norm": 9.9294093666058, + "learning_rate": 1.87960151942873e-05, + "loss": 1.4328, + "step": 30450 + }, + { + "epoch": 0.37, + "grad_norm": 6.1178017433021425, + "learning_rate": 1.8795737279230074e-05, + "loss": 1.2412, + "step": 30453 + }, + { + "epoch": 0.37, + "grad_norm": 9.858377284963955, + "learning_rate": 1.8795459334156162e-05, + "loss": 1.5517, + "step": 30456 + }, + { + "epoch": 0.37, + "grad_norm": 11.229371266512763, + "learning_rate": 1.879518135906652e-05, + "loss": 1.8232, + "step": 30459 + }, + { + "epoch": 0.37, + "grad_norm": 20.950369050976093, + "learning_rate": 1.879490335396209e-05, + "loss": 1.5967, + "step": 30462 + }, + { + "epoch": 0.37, + "grad_norm": 13.648904440638885, + "learning_rate": 1.879462531884382e-05, + "loss": 1.9544, + "step": 30465 + }, + { + "epoch": 0.37, + "grad_norm": 17.26877809127973, + "learning_rate": 1.8794347253712662e-05, + "loss": 1.4057, + "step": 30468 + }, + { + "epoch": 0.37, + "grad_norm": 14.14360259326377, + "learning_rate": 1.8794069158569567e-05, + "loss": 1.1903, + "step": 30471 + }, + { + "epoch": 0.37, + "grad_norm": 7.638643265029286, + "learning_rate": 1.8793791033415482e-05, + "loss": 1.7074, + "step": 30474 + }, + { + "epoch": 0.37, + "grad_norm": 7.420229085539307, + "learning_rate": 1.8793512878251355e-05, + "loss": 2.0415, + "step": 30477 + }, + { + "epoch": 0.37, + "grad_norm": 42.71836983888155, + "learning_rate": 1.8793234693078134e-05, + "loss": 1.8785, + "step": 30480 + }, + { + "epoch": 0.37, + "grad_norm": 23.87770581502969, + "learning_rate": 1.8792956477896772e-05, + "loss": 1.7363, + "step": 30483 + }, + { + "epoch": 0.37, + "grad_norm": 17.103674692888763, + "learning_rate": 1.8792678232708218e-05, + "loss": 1.4028, + "step": 30486 + }, + { + "epoch": 0.37, + "grad_norm": 11.30693951796822, + "learning_rate": 1.8792399957513417e-05, + "loss": 1.2303, + "step": 30489 + }, + { + "epoch": 0.37, + "grad_norm": 15.263977275018489, + "learning_rate": 1.8792121652313324e-05, + "loss": 1.5638, + "step": 30492 + }, + { + "epoch": 0.37, + "grad_norm": 15.652952181247699, + "learning_rate": 1.8791843317108887e-05, + "loss": 1.3882, + "step": 30495 + }, + { + "epoch": 0.37, + "grad_norm": 11.36436237872736, + "learning_rate": 1.8791564951901057e-05, + "loss": 1.1361, + "step": 30498 + }, + { + "epoch": 0.37, + "grad_norm": 16.10416815194042, + "learning_rate": 1.8791286556690777e-05, + "loss": 1.3929, + "step": 30501 + }, + { + "epoch": 0.37, + "grad_norm": 4.023049549039625, + "learning_rate": 1.8791008131479007e-05, + "loss": 1.2738, + "step": 30504 + }, + { + "epoch": 0.37, + "grad_norm": 43.90591656238959, + "learning_rate": 1.879072967626669e-05, + "loss": 1.6682, + "step": 30507 + }, + { + "epoch": 0.37, + "grad_norm": 3.6133119888735785, + "learning_rate": 1.879045119105478e-05, + "loss": 1.4061, + "step": 30510 + }, + { + "epoch": 0.37, + "grad_norm": 5.631181034577221, + "learning_rate": 1.879017267584423e-05, + "loss": 1.6796, + "step": 30513 + }, + { + "epoch": 0.37, + "grad_norm": 8.62073301815352, + "learning_rate": 1.8789894130635985e-05, + "loss": 1.8175, + "step": 30516 + }, + { + "epoch": 0.37, + "grad_norm": 10.299411456210004, + "learning_rate": 1.8789615555430994e-05, + "loss": 1.6956, + "step": 30519 + }, + { + "epoch": 0.37, + "grad_norm": 14.485678980700916, + "learning_rate": 1.8789336950230215e-05, + "loss": 1.2529, + "step": 30522 + }, + { + "epoch": 0.37, + "grad_norm": 3.2345633819660606, + "learning_rate": 1.8789058315034594e-05, + "loss": 1.7616, + "step": 30525 + }, + { + "epoch": 0.37, + "grad_norm": 22.756764573402585, + "learning_rate": 1.878877964984508e-05, + "loss": 1.6485, + "step": 30528 + }, + { + "epoch": 0.37, + "grad_norm": 12.99876769075551, + "learning_rate": 1.878850095466263e-05, + "loss": 1.3456, + "step": 30531 + }, + { + "epoch": 0.37, + "grad_norm": 11.5518984359008, + "learning_rate": 1.8788222229488193e-05, + "loss": 1.1903, + "step": 30534 + }, + { + "epoch": 0.37, + "grad_norm": 12.534773404812215, + "learning_rate": 1.8787943474322717e-05, + "loss": 1.5999, + "step": 30537 + }, + { + "epoch": 0.37, + "grad_norm": 9.417229381469543, + "learning_rate": 1.8787664689167156e-05, + "loss": 1.3647, + "step": 30540 + }, + { + "epoch": 0.37, + "grad_norm": 28.470344338991307, + "learning_rate": 1.8787385874022457e-05, + "loss": 1.6853, + "step": 30543 + }, + { + "epoch": 0.37, + "grad_norm": 11.981171909011618, + "learning_rate": 1.8787107028889577e-05, + "loss": 1.37, + "step": 30546 + }, + { + "epoch": 0.37, + "grad_norm": 9.092026688155956, + "learning_rate": 1.878682815376947e-05, + "loss": 1.3409, + "step": 30549 + }, + { + "epoch": 0.37, + "grad_norm": 22.498828540640957, + "learning_rate": 1.878654924866308e-05, + "loss": 1.4577, + "step": 30552 + }, + { + "epoch": 0.37, + "grad_norm": 15.123194681390611, + "learning_rate": 1.878627031357136e-05, + "loss": 1.3105, + "step": 30555 + }, + { + "epoch": 0.37, + "grad_norm": 11.293064354267921, + "learning_rate": 1.878599134849527e-05, + "loss": 1.4368, + "step": 30558 + }, + { + "epoch": 0.37, + "grad_norm": 10.24829692264597, + "learning_rate": 1.8785712353435754e-05, + "loss": 1.3941, + "step": 30561 + }, + { + "epoch": 0.37, + "grad_norm": 6.586930212917328, + "learning_rate": 1.8785433328393764e-05, + "loss": 1.2882, + "step": 30564 + }, + { + "epoch": 0.37, + "grad_norm": 12.879911587788609, + "learning_rate": 1.8785154273370257e-05, + "loss": 1.5004, + "step": 30567 + }, + { + "epoch": 0.37, + "grad_norm": 13.12016304481904, + "learning_rate": 1.878487518836618e-05, + "loss": 1.5819, + "step": 30570 + }, + { + "epoch": 0.37, + "grad_norm": 7.570317807902873, + "learning_rate": 1.878459607338249e-05, + "loss": 1.6917, + "step": 30573 + }, + { + "epoch": 0.37, + "grad_norm": 7.8633848434944, + "learning_rate": 1.878431692842014e-05, + "loss": 1.2261, + "step": 30576 + }, + { + "epoch": 0.37, + "grad_norm": 15.884610989994577, + "learning_rate": 1.878403775348008e-05, + "loss": 1.5041, + "step": 30579 + }, + { + "epoch": 0.37, + "grad_norm": 25.758217304248955, + "learning_rate": 1.878375854856326e-05, + "loss": 1.6239, + "step": 30582 + }, + { + "epoch": 0.37, + "grad_norm": 34.703121057615874, + "learning_rate": 1.878347931367064e-05, + "loss": 1.6678, + "step": 30585 + }, + { + "epoch": 0.37, + "grad_norm": 15.04510269039575, + "learning_rate": 1.8783200048803165e-05, + "loss": 1.3854, + "step": 30588 + }, + { + "epoch": 0.37, + "grad_norm": 12.451642987621561, + "learning_rate": 1.8782920753961794e-05, + "loss": 1.2413, + "step": 30591 + }, + { + "epoch": 0.37, + "grad_norm": 12.472014243475213, + "learning_rate": 1.8782641429147475e-05, + "loss": 1.7424, + "step": 30594 + }, + { + "epoch": 0.37, + "grad_norm": 13.32145323018367, + "learning_rate": 1.8782362074361166e-05, + "loss": 1.82, + "step": 30597 + }, + { + "epoch": 0.37, + "grad_norm": 24.276838847831115, + "learning_rate": 1.878208268960382e-05, + "loss": 0.9911, + "step": 30600 + }, + { + "epoch": 0.37, + "grad_norm": 21.800651091674418, + "learning_rate": 1.878180327487639e-05, + "loss": 1.2437, + "step": 30603 + }, + { + "epoch": 0.37, + "grad_norm": 14.082965194271285, + "learning_rate": 1.878152383017983e-05, + "loss": 1.295, + "step": 30606 + }, + { + "epoch": 0.37, + "grad_norm": 4.5325426882111755, + "learning_rate": 1.8781244355515086e-05, + "loss": 1.2662, + "step": 30609 + }, + { + "epoch": 0.37, + "grad_norm": 6.657706500661121, + "learning_rate": 1.8780964850883122e-05, + "loss": 1.4529, + "step": 30612 + }, + { + "epoch": 0.37, + "grad_norm": 3.224670762552337, + "learning_rate": 1.8780685316284888e-05, + "loss": 1.4497, + "step": 30615 + }, + { + "epoch": 0.37, + "grad_norm": 12.116662149989656, + "learning_rate": 1.8780405751721335e-05, + "loss": 1.5201, + "step": 30618 + }, + { + "epoch": 0.37, + "grad_norm": 9.772245588497894, + "learning_rate": 1.8780126157193422e-05, + "loss": 1.2978, + "step": 30621 + }, + { + "epoch": 0.37, + "grad_norm": 5.765110156370539, + "learning_rate": 1.8779846532702102e-05, + "loss": 1.3774, + "step": 30624 + }, + { + "epoch": 0.37, + "grad_norm": 17.26881154955118, + "learning_rate": 1.8779566878248326e-05, + "loss": 1.6331, + "step": 30627 + }, + { + "epoch": 0.37, + "grad_norm": 22.891396650752245, + "learning_rate": 1.8779287193833054e-05, + "loss": 1.3503, + "step": 30630 + }, + { + "epoch": 0.37, + "grad_norm": 6.804688244429223, + "learning_rate": 1.8779007479457233e-05, + "loss": 1.639, + "step": 30633 + }, + { + "epoch": 0.37, + "grad_norm": 9.712613539565712, + "learning_rate": 1.8778727735121823e-05, + "loss": 1.5263, + "step": 30636 + }, + { + "epoch": 0.37, + "grad_norm": 16.401261711787644, + "learning_rate": 1.877844796082778e-05, + "loss": 1.7334, + "step": 30639 + }, + { + "epoch": 0.37, + "grad_norm": 36.42778542661748, + "learning_rate": 1.8778168156576055e-05, + "loss": 1.5722, + "step": 30642 + }, + { + "epoch": 0.37, + "grad_norm": 12.989651016674875, + "learning_rate": 1.87778883223676e-05, + "loss": 1.0948, + "step": 30645 + }, + { + "epoch": 0.37, + "grad_norm": 15.567812599427988, + "learning_rate": 1.8777608458203377e-05, + "loss": 1.3999, + "step": 30648 + }, + { + "epoch": 0.37, + "grad_norm": 70.47835559853812, + "learning_rate": 1.877732856408434e-05, + "loss": 1.3684, + "step": 30651 + }, + { + "epoch": 0.37, + "grad_norm": 32.45325519788154, + "learning_rate": 1.877704864001144e-05, + "loss": 1.6717, + "step": 30654 + }, + { + "epoch": 0.37, + "grad_norm": 9.11609167006546, + "learning_rate": 1.8776768685985633e-05, + "loss": 1.7943, + "step": 30657 + }, + { + "epoch": 0.37, + "grad_norm": 6.53108165036834, + "learning_rate": 1.877648870200788e-05, + "loss": 1.2392, + "step": 30660 + }, + { + "epoch": 0.37, + "grad_norm": 5.24753294159013, + "learning_rate": 1.8776208688079127e-05, + "loss": 1.4044, + "step": 30663 + }, + { + "epoch": 0.37, + "grad_norm": 23.795539198080846, + "learning_rate": 1.8775928644200338e-05, + "loss": 1.6086, + "step": 30666 + }, + { + "epoch": 0.37, + "grad_norm": 3.190955569818704, + "learning_rate": 1.8775648570372466e-05, + "loss": 1.4723, + "step": 30669 + }, + { + "epoch": 0.37, + "grad_norm": 16.490554791392874, + "learning_rate": 1.8775368466596467e-05, + "loss": 1.4328, + "step": 30672 + }, + { + "epoch": 0.37, + "grad_norm": 30.53460728762125, + "learning_rate": 1.8775088332873292e-05, + "loss": 1.7163, + "step": 30675 + }, + { + "epoch": 0.37, + "grad_norm": 52.10182585059939, + "learning_rate": 1.8774808169203905e-05, + "loss": 1.2778, + "step": 30678 + }, + { + "epoch": 0.37, + "grad_norm": 9.617862545727197, + "learning_rate": 1.8774527975589257e-05, + "loss": 1.6945, + "step": 30681 + }, + { + "epoch": 0.37, + "grad_norm": 16.50924049793554, + "learning_rate": 1.8774247752030307e-05, + "loss": 1.6562, + "step": 30684 + }, + { + "epoch": 0.37, + "grad_norm": 5.641427918732202, + "learning_rate": 1.877396749852801e-05, + "loss": 1.6645, + "step": 30687 + }, + { + "epoch": 0.37, + "grad_norm": 10.902206457417792, + "learning_rate": 1.877368721508332e-05, + "loss": 1.4906, + "step": 30690 + }, + { + "epoch": 0.37, + "grad_norm": 24.08986636780899, + "learning_rate": 1.87734069016972e-05, + "loss": 1.4671, + "step": 30693 + }, + { + "epoch": 0.37, + "grad_norm": 12.565206360885307, + "learning_rate": 1.8773126558370596e-05, + "loss": 1.3036, + "step": 30696 + }, + { + "epoch": 0.37, + "grad_norm": 9.04752448759024, + "learning_rate": 1.877284618510448e-05, + "loss": 1.4417, + "step": 30699 + }, + { + "epoch": 0.37, + "grad_norm": 32.63082004611142, + "learning_rate": 1.877256578189979e-05, + "loss": 1.6799, + "step": 30702 + }, + { + "epoch": 0.37, + "grad_norm": 11.966600681949668, + "learning_rate": 1.87722853487575e-05, + "loss": 1.3987, + "step": 30705 + }, + { + "epoch": 0.37, + "grad_norm": 29.95167086320173, + "learning_rate": 1.8772004885678558e-05, + "loss": 1.408, + "step": 30708 + }, + { + "epoch": 0.37, + "grad_norm": 5.2037850038693625, + "learning_rate": 1.8771724392663923e-05, + "loss": 1.2408, + "step": 30711 + }, + { + "epoch": 0.37, + "grad_norm": 7.000561412804123, + "learning_rate": 1.877144386971455e-05, + "loss": 1.3343, + "step": 30714 + }, + { + "epoch": 0.37, + "grad_norm": 94.61172953235209, + "learning_rate": 1.87711633168314e-05, + "loss": 1.2884, + "step": 30717 + }, + { + "epoch": 0.37, + "grad_norm": 17.13366765954187, + "learning_rate": 1.877088273401543e-05, + "loss": 1.3224, + "step": 30720 + }, + { + "epoch": 0.37, + "grad_norm": 7.9632520506822875, + "learning_rate": 1.8770602121267595e-05, + "loss": 1.534, + "step": 30723 + }, + { + "epoch": 0.37, + "grad_norm": 26.347147019908107, + "learning_rate": 1.8770321478588855e-05, + "loss": 1.8507, + "step": 30726 + }, + { + "epoch": 0.37, + "grad_norm": 11.347104674458894, + "learning_rate": 1.8770040805980165e-05, + "loss": 1.3292, + "step": 30729 + }, + { + "epoch": 0.37, + "grad_norm": 9.297182844602299, + "learning_rate": 1.8769760103442488e-05, + "loss": 1.531, + "step": 30732 + }, + { + "epoch": 0.37, + "grad_norm": 41.36356974864241, + "learning_rate": 1.8769479370976778e-05, + "loss": 1.4581, + "step": 30735 + }, + { + "epoch": 0.37, + "grad_norm": 14.77082701755396, + "learning_rate": 1.876919860858399e-05, + "loss": 1.521, + "step": 30738 + }, + { + "epoch": 0.37, + "grad_norm": 8.673415439460742, + "learning_rate": 1.8768917816265088e-05, + "loss": 1.4404, + "step": 30741 + }, + { + "epoch": 0.37, + "grad_norm": 21.412715547500127, + "learning_rate": 1.876863699402103e-05, + "loss": 1.2436, + "step": 30744 + }, + { + "epoch": 0.37, + "grad_norm": 9.179948458515701, + "learning_rate": 1.876835614185277e-05, + "loss": 1.1049, + "step": 30747 + }, + { + "epoch": 0.37, + "grad_norm": 14.337703250621946, + "learning_rate": 1.876807525976127e-05, + "loss": 1.3702, + "step": 30750 + }, + { + "epoch": 0.37, + "grad_norm": 11.897914208637484, + "learning_rate": 1.8767794347747486e-05, + "loss": 1.8201, + "step": 30753 + }, + { + "epoch": 0.37, + "grad_norm": 13.282296748301384, + "learning_rate": 1.8767513405812382e-05, + "loss": 1.5986, + "step": 30756 + }, + { + "epoch": 0.37, + "grad_norm": 26.160754252685468, + "learning_rate": 1.8767232433956907e-05, + "loss": 1.2931, + "step": 30759 + }, + { + "epoch": 0.37, + "grad_norm": 14.369005709266432, + "learning_rate": 1.876695143218203e-05, + "loss": 1.5287, + "step": 30762 + }, + { + "epoch": 0.37, + "grad_norm": 185.8447399756839, + "learning_rate": 1.8766670400488706e-05, + "loss": 1.7412, + "step": 30765 + }, + { + "epoch": 0.37, + "grad_norm": 6.792844758248562, + "learning_rate": 1.876638933887789e-05, + "loss": 1.5243, + "step": 30768 + }, + { + "epoch": 0.37, + "grad_norm": 12.341309842023323, + "learning_rate": 1.8766108247350546e-05, + "loss": 1.7621, + "step": 30771 + }, + { + "epoch": 0.37, + "grad_norm": 16.873432455768928, + "learning_rate": 1.8765827125907632e-05, + "loss": 1.4932, + "step": 30774 + }, + { + "epoch": 0.37, + "grad_norm": 47.656697867036414, + "learning_rate": 1.8765545974550107e-05, + "loss": 1.3565, + "step": 30777 + }, + { + "epoch": 0.37, + "grad_norm": 10.241380957958869, + "learning_rate": 1.876526479327893e-05, + "loss": 1.4999, + "step": 30780 + }, + { + "epoch": 0.37, + "grad_norm": 17.36771306169517, + "learning_rate": 1.8764983582095066e-05, + "loss": 1.8451, + "step": 30783 + }, + { + "epoch": 0.37, + "grad_norm": 8.021412980787101, + "learning_rate": 1.8764702340999468e-05, + "loss": 1.6157, + "step": 30786 + }, + { + "epoch": 0.37, + "grad_norm": 9.52802317005553, + "learning_rate": 1.87644210699931e-05, + "loss": 1.4953, + "step": 30789 + }, + { + "epoch": 0.37, + "grad_norm": 16.241205054687953, + "learning_rate": 1.8764139769076916e-05, + "loss": 1.4375, + "step": 30792 + }, + { + "epoch": 0.37, + "grad_norm": 6.386769721901275, + "learning_rate": 1.876385843825188e-05, + "loss": 1.4294, + "step": 30795 + }, + { + "epoch": 0.37, + "grad_norm": 27.102691614319674, + "learning_rate": 1.8763577077518956e-05, + "loss": 1.6995, + "step": 30798 + }, + { + "epoch": 0.37, + "grad_norm": 29.59350111628031, + "learning_rate": 1.8763295686879095e-05, + "loss": 1.5276, + "step": 30801 + }, + { + "epoch": 0.37, + "grad_norm": 22.614419149155705, + "learning_rate": 1.8763014266333262e-05, + "loss": 1.4163, + "step": 30804 + }, + { + "epoch": 0.37, + "grad_norm": 23.455725553216286, + "learning_rate": 1.8762732815882423e-05, + "loss": 1.7418, + "step": 30807 + }, + { + "epoch": 0.37, + "grad_norm": 23.76422740523486, + "learning_rate": 1.876245133552753e-05, + "loss": 1.5497, + "step": 30810 + }, + { + "epoch": 0.37, + "grad_norm": 28.37778077231934, + "learning_rate": 1.8762169825269547e-05, + "loss": 1.5727, + "step": 30813 + }, + { + "epoch": 0.37, + "grad_norm": 13.777857298062642, + "learning_rate": 1.8761888285109435e-05, + "loss": 1.4169, + "step": 30816 + }, + { + "epoch": 0.37, + "grad_norm": 16.12203478060162, + "learning_rate": 1.8761606715048155e-05, + "loss": 1.6022, + "step": 30819 + }, + { + "epoch": 0.37, + "grad_norm": 31.297876857486024, + "learning_rate": 1.8761325115086666e-05, + "loss": 1.5259, + "step": 30822 + }, + { + "epoch": 0.37, + "grad_norm": 28.557296071304208, + "learning_rate": 1.876104348522593e-05, + "loss": 1.3108, + "step": 30825 + }, + { + "epoch": 0.37, + "grad_norm": 13.713096878622146, + "learning_rate": 1.8760761825466907e-05, + "loss": 1.6724, + "step": 30828 + }, + { + "epoch": 0.37, + "grad_norm": 4.098241070057062, + "learning_rate": 1.8760480135810562e-05, + "loss": 1.2857, + "step": 30831 + }, + { + "epoch": 0.37, + "grad_norm": 10.555251629388037, + "learning_rate": 1.8760198416257857e-05, + "loss": 1.9139, + "step": 30834 + }, + { + "epoch": 0.37, + "grad_norm": 11.638936533542308, + "learning_rate": 1.8759916666809744e-05, + "loss": 1.8141, + "step": 30837 + }, + { + "epoch": 0.37, + "grad_norm": 14.334939529670406, + "learning_rate": 1.8759634887467192e-05, + "loss": 1.5567, + "step": 30840 + }, + { + "epoch": 0.37, + "grad_norm": 28.664935885551596, + "learning_rate": 1.875935307823116e-05, + "loss": 1.4119, + "step": 30843 + }, + { + "epoch": 0.37, + "grad_norm": 4.998792591892247, + "learning_rate": 1.8759071239102614e-05, + "loss": 1.5152, + "step": 30846 + }, + { + "epoch": 0.37, + "grad_norm": 28.23361758651266, + "learning_rate": 1.8758789370082515e-05, + "loss": 1.7266, + "step": 30849 + }, + { + "epoch": 0.37, + "grad_norm": 10.315843461189536, + "learning_rate": 1.8758507471171817e-05, + "loss": 1.5702, + "step": 30852 + }, + { + "epoch": 0.37, + "grad_norm": 7.2887995364127445, + "learning_rate": 1.8758225542371494e-05, + "loss": 1.4859, + "step": 30855 + }, + { + "epoch": 0.37, + "grad_norm": 18.497793471102586, + "learning_rate": 1.87579435836825e-05, + "loss": 1.6375, + "step": 30858 + }, + { + "epoch": 0.37, + "grad_norm": 21.275540422254487, + "learning_rate": 1.8757661595105798e-05, + "loss": 1.6788, + "step": 30861 + }, + { + "epoch": 0.37, + "grad_norm": 8.259966573019009, + "learning_rate": 1.8757379576642348e-05, + "loss": 1.2556, + "step": 30864 + }, + { + "epoch": 0.37, + "grad_norm": 7.027146371157955, + "learning_rate": 1.875709752829312e-05, + "loss": 1.5533, + "step": 30867 + }, + { + "epoch": 0.37, + "grad_norm": 16.383222919090127, + "learning_rate": 1.8756815450059075e-05, + "loss": 1.7951, + "step": 30870 + }, + { + "epoch": 0.37, + "grad_norm": 16.70462717985768, + "learning_rate": 1.875653334194117e-05, + "loss": 1.5766, + "step": 30873 + }, + { + "epoch": 0.37, + "grad_norm": 63.365475181124964, + "learning_rate": 1.8756251203940368e-05, + "loss": 1.7291, + "step": 30876 + }, + { + "epoch": 0.37, + "grad_norm": 11.716774887008297, + "learning_rate": 1.8755969036057637e-05, + "loss": 1.8064, + "step": 30879 + }, + { + "epoch": 0.37, + "grad_norm": 12.063957308675068, + "learning_rate": 1.8755686838293938e-05, + "loss": 1.5681, + "step": 30882 + }, + { + "epoch": 0.37, + "grad_norm": 2.7765063113764956, + "learning_rate": 1.875540461065023e-05, + "loss": 1.5424, + "step": 30885 + }, + { + "epoch": 0.37, + "grad_norm": 8.73178289836104, + "learning_rate": 1.8755122353127486e-05, + "loss": 1.2183, + "step": 30888 + }, + { + "epoch": 0.37, + "grad_norm": 30.984219257106922, + "learning_rate": 1.875484006572666e-05, + "loss": 1.4989, + "step": 30891 + }, + { + "epoch": 0.37, + "grad_norm": 88.71929343788999, + "learning_rate": 1.8754557748448718e-05, + "loss": 1.7788, + "step": 30894 + }, + { + "epoch": 0.37, + "grad_norm": 19.7305645655104, + "learning_rate": 1.8754275401294625e-05, + "loss": 1.4227, + "step": 30897 + }, + { + "epoch": 0.37, + "grad_norm": 16.264265429987883, + "learning_rate": 1.8753993024265344e-05, + "loss": 1.5604, + "step": 30900 + }, + { + "epoch": 0.37, + "grad_norm": 12.745285719981812, + "learning_rate": 1.8753710617361834e-05, + "loss": 1.5097, + "step": 30903 + }, + { + "epoch": 0.37, + "grad_norm": 19.88233788684762, + "learning_rate": 1.875342818058507e-05, + "loss": 1.5586, + "step": 30906 + }, + { + "epoch": 0.37, + "grad_norm": 44.777045925551825, + "learning_rate": 1.8753145713936002e-05, + "loss": 1.7697, + "step": 30909 + }, + { + "epoch": 0.37, + "grad_norm": 31.81749909113677, + "learning_rate": 1.8752863217415604e-05, + "loss": 1.5462, + "step": 30912 + }, + { + "epoch": 0.37, + "grad_norm": 12.450458337921718, + "learning_rate": 1.875258069102483e-05, + "loss": 1.6257, + "step": 30915 + }, + { + "epoch": 0.37, + "grad_norm": 66.2391407828736, + "learning_rate": 1.8752298134764658e-05, + "loss": 1.4681, + "step": 30918 + }, + { + "epoch": 0.37, + "grad_norm": 9.055584958517384, + "learning_rate": 1.875201554863604e-05, + "loss": 1.7965, + "step": 30921 + }, + { + "epoch": 0.37, + "grad_norm": 9.937554110077922, + "learning_rate": 1.875173293263995e-05, + "loss": 1.6715, + "step": 30924 + }, + { + "epoch": 0.37, + "grad_norm": 61.8306317932695, + "learning_rate": 1.8751450286777348e-05, + "loss": 1.5809, + "step": 30927 + }, + { + "epoch": 0.37, + "grad_norm": 9.893943546324794, + "learning_rate": 1.8751167611049196e-05, + "loss": 1.9336, + "step": 30930 + }, + { + "epoch": 0.37, + "grad_norm": 4.959999365290456, + "learning_rate": 1.875088490545646e-05, + "loss": 1.4925, + "step": 30933 + }, + { + "epoch": 0.37, + "grad_norm": 30.987045203472118, + "learning_rate": 1.875060217000011e-05, + "loss": 1.4578, + "step": 30936 + }, + { + "epoch": 0.37, + "grad_norm": 7.140732706154548, + "learning_rate": 1.87503194046811e-05, + "loss": 1.4308, + "step": 30939 + }, + { + "epoch": 0.37, + "grad_norm": 26.917457205435614, + "learning_rate": 1.875003660950041e-05, + "loss": 1.6379, + "step": 30942 + }, + { + "epoch": 0.37, + "grad_norm": 9.61515987864561, + "learning_rate": 1.874975378445899e-05, + "loss": 1.1888, + "step": 30945 + }, + { + "epoch": 0.37, + "grad_norm": 13.65286384141936, + "learning_rate": 1.8749470929557817e-05, + "loss": 1.2825, + "step": 30948 + }, + { + "epoch": 0.37, + "grad_norm": 28.09062729440265, + "learning_rate": 1.874918804479785e-05, + "loss": 1.3199, + "step": 30951 + }, + { + "epoch": 0.37, + "grad_norm": 17.719693767638386, + "learning_rate": 1.8748905130180053e-05, + "loss": 1.4728, + "step": 30954 + }, + { + "epoch": 0.37, + "grad_norm": 12.46632255898893, + "learning_rate": 1.8748622185705393e-05, + "loss": 1.2579, + "step": 30957 + }, + { + "epoch": 0.37, + "grad_norm": 9.117840552022603, + "learning_rate": 1.874833921137484e-05, + "loss": 1.4176, + "step": 30960 + }, + { + "epoch": 0.37, + "grad_norm": 9.685054321025431, + "learning_rate": 1.874805620718936e-05, + "loss": 1.4932, + "step": 30963 + }, + { + "epoch": 0.37, + "grad_norm": 11.22516575041734, + "learning_rate": 1.874777317314991e-05, + "loss": 1.6283, + "step": 30966 + }, + { + "epoch": 0.37, + "grad_norm": 22.02965398208065, + "learning_rate": 1.874749010925746e-05, + "loss": 1.2993, + "step": 30969 + }, + { + "epoch": 0.37, + "grad_norm": 7.541047123804975, + "learning_rate": 1.874720701551298e-05, + "loss": 1.4112, + "step": 30972 + }, + { + "epoch": 0.37, + "grad_norm": 8.432706657849112, + "learning_rate": 1.874692389191743e-05, + "loss": 1.5455, + "step": 30975 + }, + { + "epoch": 0.37, + "grad_norm": 26.673151693797, + "learning_rate": 1.8746640738471784e-05, + "loss": 1.5442, + "step": 30978 + }, + { + "epoch": 0.37, + "grad_norm": 23.665312179311133, + "learning_rate": 1.8746357555177e-05, + "loss": 1.7104, + "step": 30981 + }, + { + "epoch": 0.37, + "grad_norm": 8.320084783060203, + "learning_rate": 1.874607434203405e-05, + "loss": 1.2125, + "step": 30984 + }, + { + "epoch": 0.37, + "grad_norm": 9.46302502250472, + "learning_rate": 1.87457910990439e-05, + "loss": 1.3194, + "step": 30987 + }, + { + "epoch": 0.37, + "grad_norm": 14.138718460893786, + "learning_rate": 1.8745507826207512e-05, + "loss": 1.3595, + "step": 30990 + }, + { + "epoch": 0.37, + "grad_norm": 7.352220146063064, + "learning_rate": 1.8745224523525857e-05, + "loss": 1.7527, + "step": 30993 + }, + { + "epoch": 0.37, + "grad_norm": 10.634938406538005, + "learning_rate": 1.8744941190999905e-05, + "loss": 1.1956, + "step": 30996 + }, + { + "epoch": 0.37, + "grad_norm": 7.975648011422212, + "learning_rate": 1.874465782863061e-05, + "loss": 1.2907, + "step": 30999 + }, + { + "epoch": 0.37, + "grad_norm": 284.34910493370387, + "learning_rate": 1.8744374436418952e-05, + "loss": 1.6602, + "step": 31002 + }, + { + "epoch": 0.37, + "grad_norm": 50.301854615377714, + "learning_rate": 1.8744091014365893e-05, + "loss": 1.5594, + "step": 31005 + }, + { + "epoch": 0.37, + "grad_norm": 8.527998360760657, + "learning_rate": 1.8743807562472406e-05, + "loss": 1.2443, + "step": 31008 + }, + { + "epoch": 0.37, + "grad_norm": 18.380271149406045, + "learning_rate": 1.874352408073945e-05, + "loss": 1.3049, + "step": 31011 + }, + { + "epoch": 0.37, + "grad_norm": 13.77494899801054, + "learning_rate": 1.8743240569167997e-05, + "loss": 1.2639, + "step": 31014 + }, + { + "epoch": 0.37, + "grad_norm": 12.047117383126421, + "learning_rate": 1.8742957027759007e-05, + "loss": 1.4836, + "step": 31017 + }, + { + "epoch": 0.37, + "grad_norm": 14.137447838085954, + "learning_rate": 1.874267345651346e-05, + "loss": 1.3188, + "step": 31020 + }, + { + "epoch": 0.37, + "grad_norm": 9.75573352790989, + "learning_rate": 1.8742389855432314e-05, + "loss": 1.5837, + "step": 31023 + }, + { + "epoch": 0.37, + "grad_norm": 19.173541752388108, + "learning_rate": 1.8742106224516543e-05, + "loss": 1.3018, + "step": 31026 + }, + { + "epoch": 0.37, + "grad_norm": 79.66192718481004, + "learning_rate": 1.8741822563767112e-05, + "loss": 1.6213, + "step": 31029 + }, + { + "epoch": 0.37, + "grad_norm": 21.2823246262923, + "learning_rate": 1.874153887318499e-05, + "loss": 1.0854, + "step": 31032 + }, + { + "epoch": 0.37, + "grad_norm": 5.38390284201467, + "learning_rate": 1.8741255152771143e-05, + "loss": 1.2801, + "step": 31035 + }, + { + "epoch": 0.37, + "grad_norm": 9.6555593011878, + "learning_rate": 1.874097140252654e-05, + "loss": 1.5037, + "step": 31038 + }, + { + "epoch": 0.37, + "grad_norm": 6.380997447402878, + "learning_rate": 1.8740687622452152e-05, + "loss": 1.1837, + "step": 31041 + }, + { + "epoch": 0.37, + "grad_norm": 42.54273784373736, + "learning_rate": 1.8740403812548944e-05, + "loss": 1.4741, + "step": 31044 + }, + { + "epoch": 0.37, + "grad_norm": 49.57733430272437, + "learning_rate": 1.8740119972817888e-05, + "loss": 1.8514, + "step": 31047 + }, + { + "epoch": 0.37, + "grad_norm": 43.08174524784203, + "learning_rate": 1.873983610325995e-05, + "loss": 1.2299, + "step": 31050 + }, + { + "epoch": 0.37, + "grad_norm": 17.283880481999343, + "learning_rate": 1.87395522038761e-05, + "loss": 1.5505, + "step": 31053 + }, + { + "epoch": 0.37, + "grad_norm": 14.746507256105884, + "learning_rate": 1.8739268274667304e-05, + "loss": 1.4031, + "step": 31056 + }, + { + "epoch": 0.37, + "grad_norm": 26.27361548805071, + "learning_rate": 1.8738984315634533e-05, + "loss": 1.4335, + "step": 31059 + }, + { + "epoch": 0.37, + "grad_norm": 89.32494252857167, + "learning_rate": 1.873870032677876e-05, + "loss": 1.0505, + "step": 31062 + }, + { + "epoch": 0.37, + "grad_norm": 17.785377288035914, + "learning_rate": 1.8738416308100947e-05, + "loss": 1.5092, + "step": 31065 + }, + { + "epoch": 0.37, + "grad_norm": 17.813358366504882, + "learning_rate": 1.873813225960207e-05, + "loss": 1.5049, + "step": 31068 + }, + { + "epoch": 0.37, + "grad_norm": 23.688150138210343, + "learning_rate": 1.873784818128309e-05, + "loss": 1.5436, + "step": 31071 + }, + { + "epoch": 0.37, + "grad_norm": 7.917254070380161, + "learning_rate": 1.8737564073144984e-05, + "loss": 1.1919, + "step": 31074 + }, + { + "epoch": 0.37, + "grad_norm": 42.21767121560064, + "learning_rate": 1.8737279935188718e-05, + "loss": 1.5064, + "step": 31077 + }, + { + "epoch": 0.37, + "grad_norm": 29.68853790062144, + "learning_rate": 1.8736995767415265e-05, + "loss": 1.6118, + "step": 31080 + }, + { + "epoch": 0.37, + "grad_norm": 26.0913894110685, + "learning_rate": 1.8736711569825592e-05, + "loss": 1.3468, + "step": 31083 + }, + { + "epoch": 0.37, + "grad_norm": 27.29738853064918, + "learning_rate": 1.873642734242067e-05, + "loss": 1.5029, + "step": 31086 + }, + { + "epoch": 0.37, + "grad_norm": 38.88221797385108, + "learning_rate": 1.8736143085201468e-05, + "loss": 1.4906, + "step": 31089 + }, + { + "epoch": 0.37, + "grad_norm": 7.126703089766922, + "learning_rate": 1.8735858798168958e-05, + "loss": 1.5633, + "step": 31092 + }, + { + "epoch": 0.37, + "grad_norm": 236.89479277584957, + "learning_rate": 1.8735574481324103e-05, + "loss": 1.5114, + "step": 31095 + }, + { + "epoch": 0.37, + "grad_norm": 4.100332442291844, + "learning_rate": 1.8735290134667884e-05, + "loss": 1.5674, + "step": 31098 + }, + { + "epoch": 0.37, + "grad_norm": 131.2662524226758, + "learning_rate": 1.8735005758201264e-05, + "loss": 1.3389, + "step": 31101 + }, + { + "epoch": 0.37, + "grad_norm": 10.729610547839682, + "learning_rate": 1.8734721351925218e-05, + "loss": 1.645, + "step": 31104 + }, + { + "epoch": 0.37, + "grad_norm": 8.486032639744153, + "learning_rate": 1.873443691584071e-05, + "loss": 1.6216, + "step": 31107 + }, + { + "epoch": 0.37, + "grad_norm": 85.04090386591277, + "learning_rate": 1.873415244994872e-05, + "loss": 1.5495, + "step": 31110 + }, + { + "epoch": 0.37, + "grad_norm": 35.1255946275748, + "learning_rate": 1.8733867954250212e-05, + "loss": 1.2764, + "step": 31113 + }, + { + "epoch": 0.37, + "grad_norm": 23.39238194312441, + "learning_rate": 1.8733583428746157e-05, + "loss": 1.4482, + "step": 31116 + }, + { + "epoch": 0.37, + "grad_norm": 5.679346166686826, + "learning_rate": 1.8733298873437526e-05, + "loss": 1.6999, + "step": 31119 + }, + { + "epoch": 0.37, + "grad_norm": 12.588634308459689, + "learning_rate": 1.8733014288325296e-05, + "loss": 1.2897, + "step": 31122 + }, + { + "epoch": 0.37, + "grad_norm": 14.453518029755466, + "learning_rate": 1.873272967341043e-05, + "loss": 1.754, + "step": 31125 + }, + { + "epoch": 0.37, + "grad_norm": 29.772329874380162, + "learning_rate": 1.8732445028693905e-05, + "loss": 1.5574, + "step": 31128 + }, + { + "epoch": 0.37, + "grad_norm": 30.184393072244543, + "learning_rate": 1.873216035417669e-05, + "loss": 1.4426, + "step": 31131 + }, + { + "epoch": 0.37, + "grad_norm": 5.1005593484916885, + "learning_rate": 1.8731875649859758e-05, + "loss": 1.2582, + "step": 31134 + }, + { + "epoch": 0.37, + "grad_norm": 16.41298411750702, + "learning_rate": 1.8731590915744082e-05, + "loss": 0.9794, + "step": 31137 + }, + { + "epoch": 0.37, + "grad_norm": 22.14485134642535, + "learning_rate": 1.8731306151830626e-05, + "loss": 1.6578, + "step": 31140 + }, + { + "epoch": 0.37, + "grad_norm": 111.02825145015711, + "learning_rate": 1.873102135812037e-05, + "loss": 1.6536, + "step": 31143 + }, + { + "epoch": 0.37, + "grad_norm": 17.64257553388163, + "learning_rate": 1.8730736534614283e-05, + "loss": 1.3111, + "step": 31146 + }, + { + "epoch": 0.37, + "grad_norm": 17.71622396367702, + "learning_rate": 1.873045168131334e-05, + "loss": 1.9172, + "step": 31149 + }, + { + "epoch": 0.37, + "grad_norm": 42.403372297195716, + "learning_rate": 1.8730166798218502e-05, + "loss": 1.5736, + "step": 31152 + }, + { + "epoch": 0.37, + "grad_norm": 17.635857260062647, + "learning_rate": 1.8729881885330756e-05, + "loss": 1.5439, + "step": 31155 + }, + { + "epoch": 0.37, + "grad_norm": 24.946387003455207, + "learning_rate": 1.872959694265106e-05, + "loss": 1.2658, + "step": 31158 + }, + { + "epoch": 0.37, + "grad_norm": 9.297265647138055, + "learning_rate": 1.8729311970180405e-05, + "loss": 1.4621, + "step": 31161 + }, + { + "epoch": 0.37, + "grad_norm": 12.472450425425809, + "learning_rate": 1.8729026967919745e-05, + "loss": 1.1127, + "step": 31164 + }, + { + "epoch": 0.37, + "grad_norm": 52.706178808665136, + "learning_rate": 1.8728741935870062e-05, + "loss": 1.1741, + "step": 31167 + }, + { + "epoch": 0.37, + "grad_norm": 22.992665407745534, + "learning_rate": 1.8728456874032327e-05, + "loss": 1.625, + "step": 31170 + }, + { + "epoch": 0.37, + "grad_norm": 86.84839102423368, + "learning_rate": 1.8728171782407514e-05, + "loss": 1.2495, + "step": 31173 + }, + { + "epoch": 0.37, + "grad_norm": 8.709080627752408, + "learning_rate": 1.8727886660996596e-05, + "loss": 1.5387, + "step": 31176 + }, + { + "epoch": 0.37, + "grad_norm": 101.4996882309899, + "learning_rate": 1.8727601509800538e-05, + "loss": 1.6517, + "step": 31179 + }, + { + "epoch": 0.37, + "grad_norm": 28.26535378760156, + "learning_rate": 1.8727316328820328e-05, + "loss": 1.5246, + "step": 31182 + }, + { + "epoch": 0.37, + "grad_norm": 9.286330599548563, + "learning_rate": 1.8727031118056925e-05, + "loss": 1.583, + "step": 31185 + }, + { + "epoch": 0.38, + "grad_norm": 17.88742055603202, + "learning_rate": 1.8726745877511308e-05, + "loss": 1.1171, + "step": 31188 + }, + { + "epoch": 0.38, + "grad_norm": 9.544456600886837, + "learning_rate": 1.8726460607184453e-05, + "loss": 1.6198, + "step": 31191 + }, + { + "epoch": 0.38, + "grad_norm": 13.279190547179951, + "learning_rate": 1.872617530707733e-05, + "loss": 1.567, + "step": 31194 + }, + { + "epoch": 0.38, + "grad_norm": 20.525328833259213, + "learning_rate": 1.8725889977190913e-05, + "loss": 1.3093, + "step": 31197 + }, + { + "epoch": 0.38, + "grad_norm": 37.110996099498415, + "learning_rate": 1.8725604617526182e-05, + "loss": 1.7292, + "step": 31200 + }, + { + "epoch": 0.38, + "grad_norm": 8.219952476356221, + "learning_rate": 1.87253192280841e-05, + "loss": 1.3485, + "step": 31203 + }, + { + "epoch": 0.38, + "grad_norm": 31.7886487604519, + "learning_rate": 1.8725033808865647e-05, + "loss": 1.5123, + "step": 31206 + }, + { + "epoch": 0.38, + "grad_norm": 6.958404107312863, + "learning_rate": 1.87247483598718e-05, + "loss": 1.2147, + "step": 31209 + }, + { + "epoch": 0.38, + "grad_norm": 25.486766334519842, + "learning_rate": 1.8724462881103526e-05, + "loss": 1.5064, + "step": 31212 + }, + { + "epoch": 0.38, + "grad_norm": 8.926068432893231, + "learning_rate": 1.8724177372561804e-05, + "loss": 1.5406, + "step": 31215 + }, + { + "epoch": 0.38, + "grad_norm": 13.974784417588722, + "learning_rate": 1.8723891834247606e-05, + "loss": 1.7988, + "step": 31218 + }, + { + "epoch": 0.38, + "grad_norm": 7.973722707136904, + "learning_rate": 1.8723606266161907e-05, + "loss": 1.5697, + "step": 31221 + }, + { + "epoch": 0.38, + "grad_norm": 16.676562205113544, + "learning_rate": 1.8723320668305684e-05, + "loss": 1.4657, + "step": 31224 + }, + { + "epoch": 0.38, + "grad_norm": 23.227906672560312, + "learning_rate": 1.8723035040679905e-05, + "loss": 1.2576, + "step": 31227 + }, + { + "epoch": 0.38, + "grad_norm": 14.690867376020849, + "learning_rate": 1.8722749383285553e-05, + "loss": 1.4537, + "step": 31230 + }, + { + "epoch": 0.38, + "grad_norm": 9.644728844067512, + "learning_rate": 1.8722463696123604e-05, + "loss": 1.4563, + "step": 31233 + }, + { + "epoch": 0.38, + "grad_norm": 15.640750477878818, + "learning_rate": 1.8722177979195022e-05, + "loss": 1.2393, + "step": 31236 + }, + { + "epoch": 0.38, + "grad_norm": 14.927051851902053, + "learning_rate": 1.8721892232500792e-05, + "loss": 1.5403, + "step": 31239 + }, + { + "epoch": 0.38, + "grad_norm": 37.99201953306767, + "learning_rate": 1.8721606456041882e-05, + "loss": 1.3382, + "step": 31242 + }, + { + "epoch": 0.38, + "grad_norm": 3.725257563752633, + "learning_rate": 1.8721320649819274e-05, + "loss": 1.5997, + "step": 31245 + }, + { + "epoch": 0.38, + "grad_norm": 10.631995895047032, + "learning_rate": 1.8721034813833934e-05, + "loss": 1.4633, + "step": 31248 + }, + { + "epoch": 0.38, + "grad_norm": 7.608765199283432, + "learning_rate": 1.8720748948086847e-05, + "loss": 1.5198, + "step": 31251 + }, + { + "epoch": 0.38, + "grad_norm": 12.591863255228871, + "learning_rate": 1.8720463052578988e-05, + "loss": 1.2086, + "step": 31254 + }, + { + "epoch": 0.38, + "grad_norm": 7.199475450701329, + "learning_rate": 1.8720177127311327e-05, + "loss": 1.2631, + "step": 31257 + }, + { + "epoch": 0.38, + "grad_norm": 17.803389138454296, + "learning_rate": 1.871989117228484e-05, + "loss": 1.6506, + "step": 31260 + }, + { + "epoch": 0.38, + "grad_norm": 56.11351149352179, + "learning_rate": 1.871960518750051e-05, + "loss": 1.8165, + "step": 31263 + }, + { + "epoch": 0.38, + "grad_norm": 22.763014373972982, + "learning_rate": 1.8719319172959304e-05, + "loss": 1.3473, + "step": 31266 + }, + { + "epoch": 0.38, + "grad_norm": 19.349216675919365, + "learning_rate": 1.8719033128662206e-05, + "loss": 1.2491, + "step": 31269 + }, + { + "epoch": 0.38, + "grad_norm": 7.845943518587712, + "learning_rate": 1.8718747054610185e-05, + "loss": 1.54, + "step": 31272 + }, + { + "epoch": 0.38, + "grad_norm": 5.81845259233144, + "learning_rate": 1.871846095080423e-05, + "loss": 1.268, + "step": 31275 + }, + { + "epoch": 0.38, + "grad_norm": 3.965638474672437, + "learning_rate": 1.8718174817245298e-05, + "loss": 1.578, + "step": 31278 + }, + { + "epoch": 0.38, + "grad_norm": 18.814096460746825, + "learning_rate": 1.871788865393438e-05, + "loss": 1.8551, + "step": 31281 + }, + { + "epoch": 0.38, + "grad_norm": 6.4935033318393085, + "learning_rate": 1.8717602460872447e-05, + "loss": 1.8129, + "step": 31284 + }, + { + "epoch": 0.38, + "grad_norm": 22.42280459018198, + "learning_rate": 1.8717316238060475e-05, + "loss": 1.4001, + "step": 31287 + }, + { + "epoch": 0.38, + "grad_norm": 22.106188113821652, + "learning_rate": 1.8717029985499446e-05, + "loss": 1.8302, + "step": 31290 + }, + { + "epoch": 0.38, + "grad_norm": 29.513768610539092, + "learning_rate": 1.871674370319033e-05, + "loss": 1.3814, + "step": 31293 + }, + { + "epoch": 0.38, + "grad_norm": 17.81489740233538, + "learning_rate": 1.871645739113411e-05, + "loss": 1.1526, + "step": 31296 + }, + { + "epoch": 0.38, + "grad_norm": 20.82951953209055, + "learning_rate": 1.871617104933176e-05, + "loss": 1.5905, + "step": 31299 + }, + { + "epoch": 0.38, + "grad_norm": 12.664881493801715, + "learning_rate": 1.8715884677784256e-05, + "loss": 1.5386, + "step": 31302 + }, + { + "epoch": 0.38, + "grad_norm": 15.568125084283288, + "learning_rate": 1.8715598276492583e-05, + "loss": 1.4576, + "step": 31305 + }, + { + "epoch": 0.38, + "grad_norm": 3.344435082840865, + "learning_rate": 1.8715311845457703e-05, + "loss": 1.3288, + "step": 31308 + }, + { + "epoch": 0.38, + "grad_norm": 15.677026055061033, + "learning_rate": 1.871502538468061e-05, + "loss": 2.1946, + "step": 31311 + }, + { + "epoch": 0.38, + "grad_norm": 17.870305727257616, + "learning_rate": 1.8714738894162273e-05, + "loss": 1.5353, + "step": 31314 + }, + { + "epoch": 0.38, + "grad_norm": 10.207748219064277, + "learning_rate": 1.871445237390367e-05, + "loss": 2.0802, + "step": 31317 + }, + { + "epoch": 0.38, + "grad_norm": 6.785580749571568, + "learning_rate": 1.8714165823905782e-05, + "loss": 1.3683, + "step": 31320 + }, + { + "epoch": 0.38, + "grad_norm": 21.72184878410308, + "learning_rate": 1.8713879244169583e-05, + "loss": 1.3201, + "step": 31323 + }, + { + "epoch": 0.38, + "grad_norm": 20.82119858738998, + "learning_rate": 1.8713592634696053e-05, + "loss": 1.4036, + "step": 31326 + }, + { + "epoch": 0.38, + "grad_norm": 11.561334133254395, + "learning_rate": 1.871330599548617e-05, + "loss": 1.3611, + "step": 31329 + }, + { + "epoch": 0.38, + "grad_norm": 13.18352177481182, + "learning_rate": 1.8713019326540912e-05, + "loss": 1.4084, + "step": 31332 + }, + { + "epoch": 0.38, + "grad_norm": 5.540907737697063, + "learning_rate": 1.871273262786126e-05, + "loss": 0.95, + "step": 31335 + }, + { + "epoch": 0.38, + "grad_norm": 27.06414091029355, + "learning_rate": 1.8712445899448187e-05, + "loss": 1.6803, + "step": 31338 + }, + { + "epoch": 0.38, + "grad_norm": 9.872508781471485, + "learning_rate": 1.8712159141302678e-05, + "loss": 1.8353, + "step": 31341 + }, + { + "epoch": 0.38, + "grad_norm": 17.723883915911404, + "learning_rate": 1.8711872353425703e-05, + "loss": 1.6601, + "step": 31344 + }, + { + "epoch": 0.38, + "grad_norm": 8.893742266837316, + "learning_rate": 1.8711585535818248e-05, + "loss": 1.1093, + "step": 31347 + }, + { + "epoch": 0.38, + "grad_norm": 13.636434063490764, + "learning_rate": 1.871129868848129e-05, + "loss": 1.512, + "step": 31350 + }, + { + "epoch": 0.38, + "grad_norm": 14.47737485419776, + "learning_rate": 1.8711011811415806e-05, + "loss": 1.7849, + "step": 31353 + }, + { + "epoch": 0.38, + "grad_norm": 21.191908304858288, + "learning_rate": 1.8710724904622778e-05, + "loss": 1.9003, + "step": 31356 + }, + { + "epoch": 0.38, + "grad_norm": 25.496710791559497, + "learning_rate": 1.8710437968103182e-05, + "loss": 1.5838, + "step": 31359 + }, + { + "epoch": 0.38, + "grad_norm": 16.28615116200577, + "learning_rate": 1.8710151001858003e-05, + "loss": 1.6694, + "step": 31362 + }, + { + "epoch": 0.38, + "grad_norm": 8.993278963989303, + "learning_rate": 1.870986400588821e-05, + "loss": 1.1094, + "step": 31365 + }, + { + "epoch": 0.38, + "grad_norm": 7.264172806972044, + "learning_rate": 1.870957698019479e-05, + "loss": 1.3842, + "step": 31368 + }, + { + "epoch": 0.38, + "grad_norm": 26.864843279574373, + "learning_rate": 1.870928992477872e-05, + "loss": 1.2204, + "step": 31371 + }, + { + "epoch": 0.38, + "grad_norm": 11.441770559188642, + "learning_rate": 1.8709002839640984e-05, + "loss": 1.5592, + "step": 31374 + }, + { + "epoch": 0.38, + "grad_norm": 62.26285516525424, + "learning_rate": 1.870871572478256e-05, + "loss": 1.4124, + "step": 31377 + }, + { + "epoch": 0.38, + "grad_norm": 23.653110691883068, + "learning_rate": 1.870842858020442e-05, + "loss": 1.4574, + "step": 31380 + }, + { + "epoch": 0.38, + "grad_norm": 4.5445326507037205, + "learning_rate": 1.8708141405907553e-05, + "loss": 1.5517, + "step": 31383 + }, + { + "epoch": 0.38, + "grad_norm": 14.092672812850397, + "learning_rate": 1.8707854201892934e-05, + "loss": 1.6862, + "step": 31386 + }, + { + "epoch": 0.38, + "grad_norm": 18.25827022748158, + "learning_rate": 1.870756696816155e-05, + "loss": 1.252, + "step": 31389 + }, + { + "epoch": 0.38, + "grad_norm": 15.911899418384635, + "learning_rate": 1.8707279704714372e-05, + "loss": 1.9469, + "step": 31392 + }, + { + "epoch": 0.38, + "grad_norm": 20.26376379594595, + "learning_rate": 1.8706992411552388e-05, + "loss": 1.4842, + "step": 31395 + }, + { + "epoch": 0.38, + "grad_norm": 8.15105196020457, + "learning_rate": 1.8706705088676572e-05, + "loss": 1.4997, + "step": 31398 + }, + { + "epoch": 0.38, + "grad_norm": 10.378991323035162, + "learning_rate": 1.870641773608791e-05, + "loss": 1.288, + "step": 31401 + }, + { + "epoch": 0.38, + "grad_norm": 19.686534502096944, + "learning_rate": 1.8706130353787375e-05, + "loss": 1.4307, + "step": 31404 + }, + { + "epoch": 0.38, + "grad_norm": 25.73582638424949, + "learning_rate": 1.870584294177596e-05, + "loss": 1.6508, + "step": 31407 + }, + { + "epoch": 0.38, + "grad_norm": 24.657548284828717, + "learning_rate": 1.8705555500054635e-05, + "loss": 1.7113, + "step": 31410 + }, + { + "epoch": 0.38, + "grad_norm": 6.746593503871403, + "learning_rate": 1.8705268028624385e-05, + "loss": 1.3761, + "step": 31413 + }, + { + "epoch": 0.38, + "grad_norm": 31.471460141544817, + "learning_rate": 1.8704980527486193e-05, + "loss": 1.6292, + "step": 31416 + }, + { + "epoch": 0.38, + "grad_norm": 16.11728634616911, + "learning_rate": 1.8704692996641035e-05, + "loss": 1.3157, + "step": 31419 + }, + { + "epoch": 0.38, + "grad_norm": 11.192087495638235, + "learning_rate": 1.87044054360899e-05, + "loss": 1.163, + "step": 31422 + }, + { + "epoch": 0.38, + "grad_norm": 10.284756148614607, + "learning_rate": 1.870411784583376e-05, + "loss": 1.6434, + "step": 31425 + }, + { + "epoch": 0.38, + "grad_norm": 11.94563112305353, + "learning_rate": 1.87038302258736e-05, + "loss": 1.7004, + "step": 31428 + }, + { + "epoch": 0.38, + "grad_norm": 11.29059757447608, + "learning_rate": 1.8703542576210405e-05, + "loss": 1.3811, + "step": 31431 + }, + { + "epoch": 0.38, + "grad_norm": 18.361424813144637, + "learning_rate": 1.8703254896845154e-05, + "loss": 1.6568, + "step": 31434 + }, + { + "epoch": 0.38, + "grad_norm": 37.7107748190053, + "learning_rate": 1.8702967187778832e-05, + "loss": 1.6678, + "step": 31437 + }, + { + "epoch": 0.38, + "grad_norm": 21.060156983805605, + "learning_rate": 1.8702679449012415e-05, + "loss": 1.7082, + "step": 31440 + }, + { + "epoch": 0.38, + "grad_norm": 24.088990332261936, + "learning_rate": 1.8702391680546888e-05, + "loss": 1.2601, + "step": 31443 + }, + { + "epoch": 0.38, + "grad_norm": 6.950183125643772, + "learning_rate": 1.870210388238323e-05, + "loss": 1.5555, + "step": 31446 + }, + { + "epoch": 0.38, + "grad_norm": 13.670573075363933, + "learning_rate": 1.8701816054522428e-05, + "loss": 1.2985, + "step": 31449 + }, + { + "epoch": 0.38, + "grad_norm": 4.803895984245781, + "learning_rate": 1.8701528196965464e-05, + "loss": 1.2877, + "step": 31452 + }, + { + "epoch": 0.38, + "grad_norm": 22.6954388807515, + "learning_rate": 1.8701240309713317e-05, + "loss": 1.5226, + "step": 31455 + }, + { + "epoch": 0.38, + "grad_norm": 53.503426072553644, + "learning_rate": 1.870095239276697e-05, + "loss": 1.653, + "step": 31458 + }, + { + "epoch": 0.38, + "grad_norm": 7.225500290540417, + "learning_rate": 1.870066444612741e-05, + "loss": 1.361, + "step": 31461 + }, + { + "epoch": 0.38, + "grad_norm": 31.083423987299582, + "learning_rate": 1.8700376469795612e-05, + "loss": 1.5163, + "step": 31464 + }, + { + "epoch": 0.38, + "grad_norm": 10.507486962278556, + "learning_rate": 1.8700088463772563e-05, + "loss": 1.1099, + "step": 31467 + }, + { + "epoch": 0.38, + "grad_norm": 37.86589136789124, + "learning_rate": 1.869980042805925e-05, + "loss": 1.6317, + "step": 31470 + }, + { + "epoch": 0.38, + "grad_norm": 14.95098627543714, + "learning_rate": 1.869951236265665e-05, + "loss": 1.4734, + "step": 31473 + }, + { + "epoch": 0.38, + "grad_norm": 10.041545733234305, + "learning_rate": 1.8699224267565746e-05, + "loss": 1.2995, + "step": 31476 + }, + { + "epoch": 0.38, + "grad_norm": 17.250437325167336, + "learning_rate": 1.8698936142787525e-05, + "loss": 1.596, + "step": 31479 + }, + { + "epoch": 0.38, + "grad_norm": 32.54635295688203, + "learning_rate": 1.8698647988322965e-05, + "loss": 1.4304, + "step": 31482 + }, + { + "epoch": 0.38, + "grad_norm": 173.1341460890828, + "learning_rate": 1.8698359804173058e-05, + "loss": 1.3525, + "step": 31485 + }, + { + "epoch": 0.38, + "grad_norm": 13.703964517699125, + "learning_rate": 1.869807159033878e-05, + "loss": 1.692, + "step": 31488 + }, + { + "epoch": 0.38, + "grad_norm": 19.34838867205743, + "learning_rate": 1.8697783346821116e-05, + "loss": 1.1967, + "step": 31491 + }, + { + "epoch": 0.38, + "grad_norm": 21.294161918890616, + "learning_rate": 1.869749507362105e-05, + "loss": 1.2311, + "step": 31494 + }, + { + "epoch": 0.38, + "grad_norm": 12.842347667997725, + "learning_rate": 1.8697206770739566e-05, + "loss": 1.5885, + "step": 31497 + }, + { + "epoch": 0.38, + "grad_norm": 56.02852355835645, + "learning_rate": 1.8696918438177648e-05, + "loss": 1.7313, + "step": 31500 + }, + { + "epoch": 0.38, + "grad_norm": 67.99043477140577, + "learning_rate": 1.8696630075936282e-05, + "loss": 1.4974, + "step": 31503 + }, + { + "epoch": 0.38, + "grad_norm": 10.746345484467149, + "learning_rate": 1.8696341684016447e-05, + "loss": 1.4296, + "step": 31506 + }, + { + "epoch": 0.38, + "grad_norm": 5.551582602008702, + "learning_rate": 1.8696053262419132e-05, + "loss": 1.4343, + "step": 31509 + }, + { + "epoch": 0.38, + "grad_norm": 20.81009961452573, + "learning_rate": 1.869576481114532e-05, + "loss": 1.5057, + "step": 31512 + }, + { + "epoch": 0.38, + "grad_norm": 24.382176292294574, + "learning_rate": 1.8695476330195993e-05, + "loss": 1.1987, + "step": 31515 + }, + { + "epoch": 0.38, + "grad_norm": 18.316176244108785, + "learning_rate": 1.869518781957214e-05, + "loss": 1.3979, + "step": 31518 + }, + { + "epoch": 0.38, + "grad_norm": 10.73791927276013, + "learning_rate": 1.8694899279274742e-05, + "loss": 1.5565, + "step": 31521 + }, + { + "epoch": 0.38, + "grad_norm": 11.444712343449558, + "learning_rate": 1.8694610709304783e-05, + "loss": 1.502, + "step": 31524 + }, + { + "epoch": 0.38, + "grad_norm": 10.7350653152114, + "learning_rate": 1.8694322109663253e-05, + "loss": 1.7415, + "step": 31527 + }, + { + "epoch": 0.38, + "grad_norm": 7.4969411910019295, + "learning_rate": 1.8694033480351132e-05, + "loss": 1.2205, + "step": 31530 + }, + { + "epoch": 0.38, + "grad_norm": 2.3120418259411455, + "learning_rate": 1.86937448213694e-05, + "loss": 1.396, + "step": 31533 + }, + { + "epoch": 0.38, + "grad_norm": 10.650173057751307, + "learning_rate": 1.8693456132719054e-05, + "loss": 1.5423, + "step": 31536 + }, + { + "epoch": 0.38, + "grad_norm": 10.615581138585252, + "learning_rate": 1.8693167414401077e-05, + "loss": 1.6129, + "step": 31539 + }, + { + "epoch": 0.38, + "grad_norm": 10.858862852582845, + "learning_rate": 1.8692878666416444e-05, + "loss": 1.9659, + "step": 31542 + }, + { + "epoch": 0.38, + "grad_norm": 21.268900942260967, + "learning_rate": 1.8692589888766153e-05, + "loss": 1.3242, + "step": 31545 + }, + { + "epoch": 0.38, + "grad_norm": 8.279820212362972, + "learning_rate": 1.869230108145118e-05, + "loss": 1.4157, + "step": 31548 + }, + { + "epoch": 0.38, + "grad_norm": 54.08951092208618, + "learning_rate": 1.8692012244472516e-05, + "loss": 1.1564, + "step": 31551 + }, + { + "epoch": 0.38, + "grad_norm": 3.8772496913505976, + "learning_rate": 1.8691723377831144e-05, + "loss": 1.7928, + "step": 31554 + }, + { + "epoch": 0.38, + "grad_norm": 17.779323369765294, + "learning_rate": 1.869143448152805e-05, + "loss": 1.2221, + "step": 31557 + }, + { + "epoch": 0.38, + "grad_norm": 13.765065951832105, + "learning_rate": 1.8691145555564225e-05, + "loss": 1.5047, + "step": 31560 + }, + { + "epoch": 0.38, + "grad_norm": 34.48933629906141, + "learning_rate": 1.8690856599940647e-05, + "loss": 1.1211, + "step": 31563 + }, + { + "epoch": 0.38, + "grad_norm": 3.81887982860533, + "learning_rate": 1.8690567614658305e-05, + "loss": 1.1923, + "step": 31566 + }, + { + "epoch": 0.38, + "grad_norm": 11.697573187222751, + "learning_rate": 1.869027859971819e-05, + "loss": 1.3302, + "step": 31569 + }, + { + "epoch": 0.38, + "grad_norm": 8.363308995301182, + "learning_rate": 1.8689989555121283e-05, + "loss": 1.3658, + "step": 31572 + }, + { + "epoch": 0.38, + "grad_norm": 7.560758745129673, + "learning_rate": 1.8689700480868568e-05, + "loss": 1.5042, + "step": 31575 + }, + { + "epoch": 0.38, + "grad_norm": 17.53560410837898, + "learning_rate": 1.868941137696104e-05, + "loss": 1.6768, + "step": 31578 + }, + { + "epoch": 0.38, + "grad_norm": 20.180274267167338, + "learning_rate": 1.868912224339968e-05, + "loss": 1.2297, + "step": 31581 + }, + { + "epoch": 0.38, + "grad_norm": 5.399967998109735, + "learning_rate": 1.8688833080185475e-05, + "loss": 1.338, + "step": 31584 + }, + { + "epoch": 0.38, + "grad_norm": 8.16472815463094, + "learning_rate": 1.8688543887319413e-05, + "loss": 1.6397, + "step": 31587 + }, + { + "epoch": 0.38, + "grad_norm": 29.07375404709378, + "learning_rate": 1.868825466480248e-05, + "loss": 1.4112, + "step": 31590 + }, + { + "epoch": 0.38, + "grad_norm": 6.736324265222239, + "learning_rate": 1.8687965412635663e-05, + "loss": 1.6023, + "step": 31593 + }, + { + "epoch": 0.38, + "grad_norm": 4.925124500920562, + "learning_rate": 1.8687676130819948e-05, + "loss": 1.7765, + "step": 31596 + }, + { + "epoch": 0.38, + "grad_norm": 8.725362796183326, + "learning_rate": 1.8687386819356327e-05, + "loss": 1.8127, + "step": 31599 + }, + { + "epoch": 0.38, + "grad_norm": 18.45480663480137, + "learning_rate": 1.868709747824578e-05, + "loss": 1.7984, + "step": 31602 + }, + { + "epoch": 0.38, + "grad_norm": 18.359440981029138, + "learning_rate": 1.8686808107489298e-05, + "loss": 1.3718, + "step": 31605 + }, + { + "epoch": 0.38, + "grad_norm": 16.719107948094372, + "learning_rate": 1.8686518707087872e-05, + "loss": 1.692, + "step": 31608 + }, + { + "epoch": 0.38, + "grad_norm": 14.500664698279909, + "learning_rate": 1.8686229277042485e-05, + "loss": 2.0626, + "step": 31611 + }, + { + "epoch": 0.38, + "grad_norm": 25.906180846418223, + "learning_rate": 1.868593981735413e-05, + "loss": 1.259, + "step": 31614 + }, + { + "epoch": 0.38, + "grad_norm": 28.0070387515908, + "learning_rate": 1.8685650328023785e-05, + "loss": 1.3891, + "step": 31617 + }, + { + "epoch": 0.38, + "grad_norm": 20.789774937762097, + "learning_rate": 1.8685360809052447e-05, + "loss": 1.7385, + "step": 31620 + }, + { + "epoch": 0.38, + "grad_norm": 123.59595201860574, + "learning_rate": 1.8685071260441102e-05, + "loss": 1.4987, + "step": 31623 + }, + { + "epoch": 0.38, + "grad_norm": 6.59666593212097, + "learning_rate": 1.8684781682190736e-05, + "loss": 1.3188, + "step": 31626 + }, + { + "epoch": 0.38, + "grad_norm": 19.65885815568883, + "learning_rate": 1.8684492074302342e-05, + "loss": 1.475, + "step": 31629 + }, + { + "epoch": 0.38, + "grad_norm": 40.12443936958885, + "learning_rate": 1.86842024367769e-05, + "loss": 1.5118, + "step": 31632 + }, + { + "epoch": 0.38, + "grad_norm": 27.13150662736718, + "learning_rate": 1.86839127696154e-05, + "loss": 1.4378, + "step": 31635 + }, + { + "epoch": 0.38, + "grad_norm": 5.496239617762217, + "learning_rate": 1.868362307281884e-05, + "loss": 1.2978, + "step": 31638 + }, + { + "epoch": 0.38, + "grad_norm": 16.799375643262245, + "learning_rate": 1.8683333346388203e-05, + "loss": 1.2725, + "step": 31641 + }, + { + "epoch": 0.38, + "grad_norm": 12.526594337928476, + "learning_rate": 1.8683043590324473e-05, + "loss": 1.8174, + "step": 31644 + }, + { + "epoch": 0.38, + "grad_norm": 34.591314104051534, + "learning_rate": 1.8682753804628643e-05, + "loss": 1.3856, + "step": 31647 + }, + { + "epoch": 0.38, + "grad_norm": 27.36035917856215, + "learning_rate": 1.8682463989301704e-05, + "loss": 1.5523, + "step": 31650 + }, + { + "epoch": 0.38, + "grad_norm": 5.897575349270241, + "learning_rate": 1.868217414434464e-05, + "loss": 1.9583, + "step": 31653 + }, + { + "epoch": 0.38, + "grad_norm": 19.93417695521635, + "learning_rate": 1.8681884269758446e-05, + "loss": 1.5815, + "step": 31656 + }, + { + "epoch": 0.38, + "grad_norm": 56.352174338884076, + "learning_rate": 1.8681594365544103e-05, + "loss": 1.7329, + "step": 31659 + }, + { + "epoch": 0.38, + "grad_norm": 66.52964168662554, + "learning_rate": 1.8681304431702615e-05, + "loss": 1.22, + "step": 31662 + }, + { + "epoch": 0.38, + "grad_norm": 11.963108140583502, + "learning_rate": 1.8681014468234956e-05, + "loss": 1.7049, + "step": 31665 + }, + { + "epoch": 0.38, + "grad_norm": 14.191654077936924, + "learning_rate": 1.8680724475142117e-05, + "loss": 1.6229, + "step": 31668 + }, + { + "epoch": 0.38, + "grad_norm": 19.531169058396987, + "learning_rate": 1.86804344524251e-05, + "loss": 1.4823, + "step": 31671 + }, + { + "epoch": 0.38, + "grad_norm": 85.43568547335656, + "learning_rate": 1.8680144400084884e-05, + "loss": 1.6571, + "step": 31674 + }, + { + "epoch": 0.38, + "grad_norm": 24.818446640575512, + "learning_rate": 1.8679854318122463e-05, + "loss": 1.5704, + "step": 31677 + }, + { + "epoch": 0.38, + "grad_norm": 14.01598699846513, + "learning_rate": 1.867956420653882e-05, + "loss": 1.4818, + "step": 31680 + }, + { + "epoch": 0.38, + "grad_norm": 11.310234871417668, + "learning_rate": 1.867927406533496e-05, + "loss": 1.4215, + "step": 31683 + }, + { + "epoch": 0.38, + "grad_norm": 8.635243431755017, + "learning_rate": 1.8678983894511857e-05, + "loss": 1.0034, + "step": 31686 + }, + { + "epoch": 0.38, + "grad_norm": 16.230541917300837, + "learning_rate": 1.867869369407051e-05, + "loss": 1.5498, + "step": 31689 + }, + { + "epoch": 0.38, + "grad_norm": 11.655667650197554, + "learning_rate": 1.8678403464011908e-05, + "loss": 1.326, + "step": 31692 + }, + { + "epoch": 0.38, + "grad_norm": 43.590601463590694, + "learning_rate": 1.8678113204337038e-05, + "loss": 1.4932, + "step": 31695 + }, + { + "epoch": 0.38, + "grad_norm": 52.83353844380787, + "learning_rate": 1.8677822915046897e-05, + "loss": 1.4605, + "step": 31698 + }, + { + "epoch": 0.38, + "grad_norm": 45.09721069416692, + "learning_rate": 1.8677532596142472e-05, + "loss": 1.6868, + "step": 31701 + }, + { + "epoch": 0.38, + "grad_norm": 73.49733629335836, + "learning_rate": 1.8677242247624752e-05, + "loss": 1.5483, + "step": 31704 + }, + { + "epoch": 0.38, + "grad_norm": 20.3448861947731, + "learning_rate": 1.867695186949473e-05, + "loss": 1.6633, + "step": 31707 + }, + { + "epoch": 0.38, + "grad_norm": 7.593625113827162, + "learning_rate": 1.8676661461753398e-05, + "loss": 1.2186, + "step": 31710 + }, + { + "epoch": 0.38, + "grad_norm": 6.324836251325896, + "learning_rate": 1.8676371024401746e-05, + "loss": 1.4989, + "step": 31713 + }, + { + "epoch": 0.38, + "grad_norm": 35.205825107978654, + "learning_rate": 1.8676080557440762e-05, + "loss": 1.8045, + "step": 31716 + }, + { + "epoch": 0.38, + "grad_norm": 44.721629106312434, + "learning_rate": 1.8675790060871444e-05, + "loss": 1.6639, + "step": 31719 + }, + { + "epoch": 0.38, + "grad_norm": 7.3116560665754315, + "learning_rate": 1.8675499534694777e-05, + "loss": 1.5603, + "step": 31722 + }, + { + "epoch": 0.38, + "grad_norm": 12.389521263297624, + "learning_rate": 1.8675208978911758e-05, + "loss": 1.34, + "step": 31725 + }, + { + "epoch": 0.38, + "grad_norm": 28.01929838127603, + "learning_rate": 1.8674918393523368e-05, + "loss": 1.6062, + "step": 31728 + }, + { + "epoch": 0.38, + "grad_norm": 17.513011918300936, + "learning_rate": 1.8674627778530614e-05, + "loss": 1.4265, + "step": 31731 + }, + { + "epoch": 0.38, + "grad_norm": 19.402966558975166, + "learning_rate": 1.8674337133934476e-05, + "loss": 1.4534, + "step": 31734 + }, + { + "epoch": 0.38, + "grad_norm": 50.22640231154051, + "learning_rate": 1.8674046459735952e-05, + "loss": 1.7996, + "step": 31737 + }, + { + "epoch": 0.38, + "grad_norm": 15.658604732113266, + "learning_rate": 1.8673755755936032e-05, + "loss": 1.2756, + "step": 31740 + }, + { + "epoch": 0.38, + "grad_norm": 13.363829676544956, + "learning_rate": 1.8673465022535706e-05, + "loss": 1.2061, + "step": 31743 + }, + { + "epoch": 0.38, + "grad_norm": 7.320053406916184, + "learning_rate": 1.867317425953597e-05, + "loss": 1.2526, + "step": 31746 + }, + { + "epoch": 0.38, + "grad_norm": 10.280596415873694, + "learning_rate": 1.867288346693781e-05, + "loss": 1.3012, + "step": 31749 + }, + { + "epoch": 0.38, + "grad_norm": 19.889651435072246, + "learning_rate": 1.8672592644742228e-05, + "loss": 1.482, + "step": 31752 + }, + { + "epoch": 0.38, + "grad_norm": 3.964357701847139, + "learning_rate": 1.867230179295021e-05, + "loss": 1.4832, + "step": 31755 + }, + { + "epoch": 0.38, + "grad_norm": 25.92426943584607, + "learning_rate": 1.8672010911562747e-05, + "loss": 1.9481, + "step": 31758 + }, + { + "epoch": 0.38, + "grad_norm": 24.162598997807073, + "learning_rate": 1.867172000058084e-05, + "loss": 1.4928, + "step": 31761 + }, + { + "epoch": 0.38, + "grad_norm": 19.56884733953338, + "learning_rate": 1.867142906000547e-05, + "loss": 1.1977, + "step": 31764 + }, + { + "epoch": 0.38, + "grad_norm": 9.146999288969262, + "learning_rate": 1.8671138089837638e-05, + "loss": 1.3104, + "step": 31767 + }, + { + "epoch": 0.38, + "grad_norm": 6.890716308591395, + "learning_rate": 1.8670847090078338e-05, + "loss": 1.3647, + "step": 31770 + }, + { + "epoch": 0.38, + "grad_norm": 22.390828899572423, + "learning_rate": 1.8670556060728556e-05, + "loss": 1.4121, + "step": 31773 + }, + { + "epoch": 0.38, + "grad_norm": 21.467236759945422, + "learning_rate": 1.867026500178929e-05, + "loss": 1.5616, + "step": 31776 + }, + { + "epoch": 0.38, + "grad_norm": 9.385651202675072, + "learning_rate": 1.8669973913261534e-05, + "loss": 1.802, + "step": 31779 + }, + { + "epoch": 0.38, + "grad_norm": 24.218751775406645, + "learning_rate": 1.866968279514628e-05, + "loss": 1.7667, + "step": 31782 + }, + { + "epoch": 0.38, + "grad_norm": 16.166759479133642, + "learning_rate": 1.866939164744452e-05, + "loss": 1.1919, + "step": 31785 + }, + { + "epoch": 0.38, + "grad_norm": 14.691805262281626, + "learning_rate": 1.8669100470157252e-05, + "loss": 1.4818, + "step": 31788 + }, + { + "epoch": 0.38, + "grad_norm": 23.67252072033066, + "learning_rate": 1.8668809263285465e-05, + "loss": 1.4952, + "step": 31791 + }, + { + "epoch": 0.38, + "grad_norm": 6.110333558357852, + "learning_rate": 1.8668518026830153e-05, + "loss": 1.5666, + "step": 31794 + }, + { + "epoch": 0.38, + "grad_norm": 14.41538191899163, + "learning_rate": 1.866822676079231e-05, + "loss": 1.5557, + "step": 31797 + }, + { + "epoch": 0.38, + "grad_norm": 20.08015351102029, + "learning_rate": 1.8667935465172938e-05, + "loss": 1.5326, + "step": 31800 + }, + { + "epoch": 0.38, + "grad_norm": 19.212733611564985, + "learning_rate": 1.866764413997302e-05, + "loss": 1.5952, + "step": 31803 + }, + { + "epoch": 0.38, + "grad_norm": 36.30741576388034, + "learning_rate": 1.866735278519355e-05, + "loss": 1.5645, + "step": 31806 + }, + { + "epoch": 0.38, + "grad_norm": 224.28938357668065, + "learning_rate": 1.8667061400835536e-05, + "loss": 1.4972, + "step": 31809 + }, + { + "epoch": 0.38, + "grad_norm": 14.018463755069906, + "learning_rate": 1.8666769986899958e-05, + "loss": 1.575, + "step": 31812 + }, + { + "epoch": 0.38, + "grad_norm": 29.75784645915124, + "learning_rate": 1.8666478543387815e-05, + "loss": 1.5555, + "step": 31815 + }, + { + "epoch": 0.38, + "grad_norm": 16.479608011264588, + "learning_rate": 1.8666187070300105e-05, + "loss": 1.9322, + "step": 31818 + }, + { + "epoch": 0.38, + "grad_norm": 15.443599899854146, + "learning_rate": 1.866589556763782e-05, + "loss": 1.4135, + "step": 31821 + }, + { + "epoch": 0.38, + "grad_norm": 16.12036234256849, + "learning_rate": 1.8665604035401953e-05, + "loss": 1.6785, + "step": 31824 + }, + { + "epoch": 0.38, + "grad_norm": 10.482215006819896, + "learning_rate": 1.86653124735935e-05, + "loss": 1.2181, + "step": 31827 + }, + { + "epoch": 0.38, + "grad_norm": 10.560987897916418, + "learning_rate": 1.866502088221346e-05, + "loss": 1.2605, + "step": 31830 + }, + { + "epoch": 0.38, + "grad_norm": 11.188964117104128, + "learning_rate": 1.8664729261262824e-05, + "loss": 1.2171, + "step": 31833 + }, + { + "epoch": 0.38, + "grad_norm": 34.92951480977954, + "learning_rate": 1.8664437610742588e-05, + "loss": 1.4914, + "step": 31836 + }, + { + "epoch": 0.38, + "grad_norm": 8.544317410323472, + "learning_rate": 1.8664145930653747e-05, + "loss": 1.587, + "step": 31839 + }, + { + "epoch": 0.38, + "grad_norm": 7.4764604379115, + "learning_rate": 1.8663854220997296e-05, + "loss": 1.424, + "step": 31842 + }, + { + "epoch": 0.38, + "grad_norm": 10.27189589986057, + "learning_rate": 1.8663562481774232e-05, + "loss": 1.388, + "step": 31845 + }, + { + "epoch": 0.38, + "grad_norm": 22.044224466215354, + "learning_rate": 1.866327071298555e-05, + "loss": 1.3881, + "step": 31848 + }, + { + "epoch": 0.38, + "grad_norm": 37.1207409749641, + "learning_rate": 1.8662978914632245e-05, + "loss": 1.3854, + "step": 31851 + }, + { + "epoch": 0.38, + "grad_norm": 6.089695943186194, + "learning_rate": 1.8662687086715314e-05, + "loss": 1.7727, + "step": 31854 + }, + { + "epoch": 0.38, + "grad_norm": 13.1030448156281, + "learning_rate": 1.866239522923575e-05, + "loss": 1.4056, + "step": 31857 + }, + { + "epoch": 0.38, + "grad_norm": 23.537865213441567, + "learning_rate": 1.8662103342194555e-05, + "loss": 1.2893, + "step": 31860 + }, + { + "epoch": 0.38, + "grad_norm": 12.803011080327082, + "learning_rate": 1.866181142559272e-05, + "loss": 1.3619, + "step": 31863 + }, + { + "epoch": 0.38, + "grad_norm": 79.72431494182834, + "learning_rate": 1.866151947943124e-05, + "loss": 1.4909, + "step": 31866 + }, + { + "epoch": 0.38, + "grad_norm": 6.735800004784451, + "learning_rate": 1.8661227503711118e-05, + "loss": 1.2474, + "step": 31869 + }, + { + "epoch": 0.38, + "grad_norm": 23.725694593115954, + "learning_rate": 1.8660935498433345e-05, + "loss": 1.4709, + "step": 31872 + }, + { + "epoch": 0.38, + "grad_norm": 7.479247293718298, + "learning_rate": 1.8660643463598916e-05, + "loss": 1.5977, + "step": 31875 + }, + { + "epoch": 0.38, + "grad_norm": 13.610612390268054, + "learning_rate": 1.8660351399208835e-05, + "loss": 1.7316, + "step": 31878 + }, + { + "epoch": 0.38, + "grad_norm": 42.97696883268573, + "learning_rate": 1.866005930526409e-05, + "loss": 1.6091, + "step": 31881 + }, + { + "epoch": 0.38, + "grad_norm": 14.388028151817846, + "learning_rate": 1.8659767181765685e-05, + "loss": 1.6374, + "step": 31884 + }, + { + "epoch": 0.38, + "grad_norm": 12.996993939106304, + "learning_rate": 1.865947502871461e-05, + "loss": 1.4675, + "step": 31887 + }, + { + "epoch": 0.38, + "grad_norm": 17.270557107442716, + "learning_rate": 1.865918284611187e-05, + "loss": 1.7528, + "step": 31890 + }, + { + "epoch": 0.38, + "grad_norm": 41.17995229991057, + "learning_rate": 1.8658890633958456e-05, + "loss": 1.6146, + "step": 31893 + }, + { + "epoch": 0.38, + "grad_norm": 11.66768206953433, + "learning_rate": 1.8658598392255367e-05, + "loss": 1.2027, + "step": 31896 + }, + { + "epoch": 0.38, + "grad_norm": 6.416749267460507, + "learning_rate": 1.8658306121003604e-05, + "loss": 1.3432, + "step": 31899 + }, + { + "epoch": 0.38, + "grad_norm": 14.274035649926962, + "learning_rate": 1.8658013820204157e-05, + "loss": 1.016, + "step": 31902 + }, + { + "epoch": 0.38, + "grad_norm": 25.331955933071885, + "learning_rate": 1.865772148985803e-05, + "loss": 1.2437, + "step": 31905 + }, + { + "epoch": 0.38, + "grad_norm": 24.363891536472018, + "learning_rate": 1.8657429129966213e-05, + "loss": 1.36, + "step": 31908 + }, + { + "epoch": 0.38, + "grad_norm": 3.061379822427433, + "learning_rate": 1.8657136740529714e-05, + "loss": 1.3632, + "step": 31911 + }, + { + "epoch": 0.38, + "grad_norm": 8.51867767509099, + "learning_rate": 1.865684432154952e-05, + "loss": 1.3494, + "step": 31914 + }, + { + "epoch": 0.38, + "grad_norm": 4.058816821170769, + "learning_rate": 1.865655187302664e-05, + "loss": 1.4657, + "step": 31917 + }, + { + "epoch": 0.38, + "grad_norm": 6.19702673447012, + "learning_rate": 1.8656259394962065e-05, + "loss": 1.7059, + "step": 31920 + }, + { + "epoch": 0.38, + "grad_norm": 11.440366807823995, + "learning_rate": 1.8655966887356794e-05, + "loss": 1.5464, + "step": 31923 + }, + { + "epoch": 0.38, + "grad_norm": 12.739629729600324, + "learning_rate": 1.8655674350211824e-05, + "loss": 1.4419, + "step": 31926 + }, + { + "epoch": 0.38, + "grad_norm": 55.746422974701034, + "learning_rate": 1.865538178352816e-05, + "loss": 1.4713, + "step": 31929 + }, + { + "epoch": 0.38, + "grad_norm": 5.870801249897406, + "learning_rate": 1.8655089187306795e-05, + "loss": 1.8061, + "step": 31932 + }, + { + "epoch": 0.38, + "grad_norm": 12.219928520739654, + "learning_rate": 1.8654796561548724e-05, + "loss": 1.188, + "step": 31935 + }, + { + "epoch": 0.38, + "grad_norm": 16.802184220743463, + "learning_rate": 1.8654503906254954e-05, + "loss": 1.6251, + "step": 31938 + }, + { + "epoch": 0.38, + "grad_norm": 4.72082508991564, + "learning_rate": 1.8654211221426476e-05, + "loss": 1.2084, + "step": 31941 + }, + { + "epoch": 0.38, + "grad_norm": 40.03191906636013, + "learning_rate": 1.8653918507064294e-05, + "loss": 1.2078, + "step": 31944 + }, + { + "epoch": 0.38, + "grad_norm": 8.275016882492526, + "learning_rate": 1.8653625763169403e-05, + "loss": 1.8501, + "step": 31947 + }, + { + "epoch": 0.38, + "grad_norm": 9.139751187716046, + "learning_rate": 1.8653332989742807e-05, + "loss": 1.6416, + "step": 31950 + }, + { + "epoch": 0.38, + "grad_norm": 19.59986655304917, + "learning_rate": 1.8653040186785502e-05, + "loss": 1.558, + "step": 31953 + }, + { + "epoch": 0.38, + "grad_norm": 2.662266889138597, + "learning_rate": 1.865274735429849e-05, + "loss": 1.374, + "step": 31956 + }, + { + "epoch": 0.38, + "grad_norm": 35.10751629982087, + "learning_rate": 1.8652454492282764e-05, + "loss": 1.5876, + "step": 31959 + }, + { + "epoch": 0.38, + "grad_norm": 12.390451340433927, + "learning_rate": 1.8652161600739328e-05, + "loss": 1.6902, + "step": 31962 + }, + { + "epoch": 0.38, + "grad_norm": 8.242264944422436, + "learning_rate": 1.865186867966918e-05, + "loss": 1.7511, + "step": 31965 + }, + { + "epoch": 0.38, + "grad_norm": 16.351981016761236, + "learning_rate": 1.8651575729073324e-05, + "loss": 1.5262, + "step": 31968 + }, + { + "epoch": 0.38, + "grad_norm": 21.560306590301654, + "learning_rate": 1.8651282748952756e-05, + "loss": 1.645, + "step": 31971 + }, + { + "epoch": 0.38, + "grad_norm": 11.019121862055766, + "learning_rate": 1.8650989739308474e-05, + "loss": 1.4836, + "step": 31974 + }, + { + "epoch": 0.38, + "grad_norm": 44.93126259041688, + "learning_rate": 1.865069670014148e-05, + "loss": 1.5271, + "step": 31977 + }, + { + "epoch": 0.38, + "grad_norm": 3.8662279096173644, + "learning_rate": 1.8650403631452778e-05, + "loss": 1.555, + "step": 31980 + }, + { + "epoch": 0.38, + "grad_norm": 12.954520321408925, + "learning_rate": 1.8650110533243358e-05, + "loss": 1.6412, + "step": 31983 + }, + { + "epoch": 0.38, + "grad_norm": 31.679551676034833, + "learning_rate": 1.8649817405514234e-05, + "loss": 1.3241, + "step": 31986 + }, + { + "epoch": 0.38, + "grad_norm": 20.458079285893117, + "learning_rate": 1.864952424826639e-05, + "loss": 1.6078, + "step": 31989 + }, + { + "epoch": 0.38, + "grad_norm": 11.00948161021934, + "learning_rate": 1.8649231061500842e-05, + "loss": 1.3131, + "step": 31992 + }, + { + "epoch": 0.38, + "grad_norm": 18.277404193962827, + "learning_rate": 1.864893784521858e-05, + "loss": 1.2346, + "step": 31995 + }, + { + "epoch": 0.38, + "grad_norm": 6.968917445799988, + "learning_rate": 1.8648644599420612e-05, + "loss": 1.6439, + "step": 31998 + }, + { + "epoch": 0.38, + "grad_norm": 13.389214511107006, + "learning_rate": 1.8648351324107933e-05, + "loss": 1.2444, + "step": 32001 + }, + { + "epoch": 0.38, + "grad_norm": 8.461087880237248, + "learning_rate": 1.8648058019281547e-05, + "loss": 1.5962, + "step": 32004 + }, + { + "epoch": 0.38, + "grad_norm": 73.68102361684593, + "learning_rate": 1.8647764684942452e-05, + "loss": 1.5306, + "step": 32007 + }, + { + "epoch": 0.38, + "grad_norm": 6.960456704330142, + "learning_rate": 1.8647471321091655e-05, + "loss": 1.5933, + "step": 32010 + }, + { + "epoch": 0.38, + "grad_norm": 23.857959243282625, + "learning_rate": 1.8647177927730153e-05, + "loss": 1.4663, + "step": 32013 + }, + { + "epoch": 0.38, + "grad_norm": 41.137426078326996, + "learning_rate": 1.864688450485894e-05, + "loss": 1.2972, + "step": 32016 + }, + { + "epoch": 0.39, + "grad_norm": 31.750884154962613, + "learning_rate": 1.864659105247903e-05, + "loss": 1.699, + "step": 32019 + }, + { + "epoch": 0.39, + "grad_norm": 8.826273133298669, + "learning_rate": 1.8646297570591422e-05, + "loss": 1.3761, + "step": 32022 + }, + { + "epoch": 0.39, + "grad_norm": 15.656504799693133, + "learning_rate": 1.864600405919711e-05, + "loss": 1.7305, + "step": 32025 + }, + { + "epoch": 0.39, + "grad_norm": 49.04065912765236, + "learning_rate": 1.8645710518297103e-05, + "loss": 1.9203, + "step": 32028 + }, + { + "epoch": 0.39, + "grad_norm": 8.571306165236635, + "learning_rate": 1.86454169478924e-05, + "loss": 1.6193, + "step": 32031 + }, + { + "epoch": 0.39, + "grad_norm": 9.219600081816704, + "learning_rate": 1.8645123347984003e-05, + "loss": 1.6084, + "step": 32034 + }, + { + "epoch": 0.39, + "grad_norm": 13.018007222166462, + "learning_rate": 1.8644829718572914e-05, + "loss": 1.4861, + "step": 32037 + }, + { + "epoch": 0.39, + "grad_norm": 12.136252876063947, + "learning_rate": 1.8644536059660138e-05, + "loss": 1.0941, + "step": 32040 + }, + { + "epoch": 0.39, + "grad_norm": 2.295793000024975, + "learning_rate": 1.864424237124667e-05, + "loss": 1.1361, + "step": 32043 + }, + { + "epoch": 0.39, + "grad_norm": 18.93521371758447, + "learning_rate": 1.8643948653333515e-05, + "loss": 1.4616, + "step": 32046 + }, + { + "epoch": 0.39, + "grad_norm": 59.469177502043, + "learning_rate": 1.8643654905921676e-05, + "loss": 1.4403, + "step": 32049 + }, + { + "epoch": 0.39, + "grad_norm": 14.681491113216264, + "learning_rate": 1.8643361129012163e-05, + "loss": 1.356, + "step": 32052 + }, + { + "epoch": 0.39, + "grad_norm": 5.59263724966818, + "learning_rate": 1.8643067322605967e-05, + "loss": 1.5418, + "step": 32055 + }, + { + "epoch": 0.39, + "grad_norm": 17.423308220432993, + "learning_rate": 1.8642773486704095e-05, + "loss": 1.4412, + "step": 32058 + }, + { + "epoch": 0.39, + "grad_norm": 14.407638278656643, + "learning_rate": 1.8642479621307553e-05, + "loss": 1.6388, + "step": 32061 + }, + { + "epoch": 0.39, + "grad_norm": 26.292280210693498, + "learning_rate": 1.8642185726417337e-05, + "loss": 1.4767, + "step": 32064 + }, + { + "epoch": 0.39, + "grad_norm": 64.55303254073844, + "learning_rate": 1.8641891802034458e-05, + "loss": 1.2541, + "step": 32067 + }, + { + "epoch": 0.39, + "grad_norm": 9.679007521656349, + "learning_rate": 1.8641597848159917e-05, + "loss": 1.7107, + "step": 32070 + }, + { + "epoch": 0.39, + "grad_norm": 8.984107341909743, + "learning_rate": 1.8641303864794712e-05, + "loss": 1.18, + "step": 32073 + }, + { + "epoch": 0.39, + "grad_norm": 29.324274164576526, + "learning_rate": 1.8641009851939848e-05, + "loss": 1.4723, + "step": 32076 + }, + { + "epoch": 0.39, + "grad_norm": 7.446621967863798, + "learning_rate": 1.864071580959633e-05, + "loss": 1.3891, + "step": 32079 + }, + { + "epoch": 0.39, + "grad_norm": 16.72487983909209, + "learning_rate": 1.8640421737765165e-05, + "loss": 1.5704, + "step": 32082 + }, + { + "epoch": 0.39, + "grad_norm": 42.48543064331262, + "learning_rate": 1.8640127636447348e-05, + "loss": 1.8431, + "step": 32085 + }, + { + "epoch": 0.39, + "grad_norm": 3.981005680764808, + "learning_rate": 1.863983350564389e-05, + "loss": 1.6447, + "step": 32088 + }, + { + "epoch": 0.39, + "grad_norm": 13.349003888668564, + "learning_rate": 1.8639539345355794e-05, + "loss": 1.6108, + "step": 32091 + }, + { + "epoch": 0.39, + "grad_norm": 6.984234221963689, + "learning_rate": 1.863924515558406e-05, + "loss": 1.1659, + "step": 32094 + }, + { + "epoch": 0.39, + "grad_norm": 8.205987880458133, + "learning_rate": 1.8638950936329697e-05, + "loss": 1.6698, + "step": 32097 + }, + { + "epoch": 0.39, + "grad_norm": 17.886713837604173, + "learning_rate": 1.8638656687593704e-05, + "loss": 1.6166, + "step": 32100 + }, + { + "epoch": 0.39, + "grad_norm": 18.185568873503467, + "learning_rate": 1.8638362409377085e-05, + "loss": 1.1822, + "step": 32103 + }, + { + "epoch": 0.39, + "grad_norm": 14.125481592372907, + "learning_rate": 1.863806810168085e-05, + "loss": 1.6136, + "step": 32106 + }, + { + "epoch": 0.39, + "grad_norm": 9.885785491748111, + "learning_rate": 1.8637773764506e-05, + "loss": 1.3862, + "step": 32109 + }, + { + "epoch": 0.39, + "grad_norm": 41.76183246224945, + "learning_rate": 1.863747939785354e-05, + "loss": 1.4718, + "step": 32112 + }, + { + "epoch": 0.39, + "grad_norm": 12.617750070740236, + "learning_rate": 1.8637185001724468e-05, + "loss": 1.7839, + "step": 32115 + }, + { + "epoch": 0.39, + "grad_norm": 9.537984874133453, + "learning_rate": 1.86368905761198e-05, + "loss": 1.6431, + "step": 32118 + }, + { + "epoch": 0.39, + "grad_norm": 11.95701438278707, + "learning_rate": 1.8636596121040537e-05, + "loss": 1.6552, + "step": 32121 + }, + { + "epoch": 0.39, + "grad_norm": 48.806966918289945, + "learning_rate": 1.8636301636487678e-05, + "loss": 1.7083, + "step": 32124 + }, + { + "epoch": 0.39, + "grad_norm": 7.832834199261451, + "learning_rate": 1.8636007122462233e-05, + "loss": 1.8086, + "step": 32127 + }, + { + "epoch": 0.39, + "grad_norm": 28.511778281532326, + "learning_rate": 1.8635712578965208e-05, + "loss": 1.7621, + "step": 32130 + }, + { + "epoch": 0.39, + "grad_norm": 20.2568821113459, + "learning_rate": 1.8635418005997607e-05, + "loss": 1.8108, + "step": 32133 + }, + { + "epoch": 0.39, + "grad_norm": 6.00091757200721, + "learning_rate": 1.8635123403560432e-05, + "loss": 1.8516, + "step": 32136 + }, + { + "epoch": 0.39, + "grad_norm": 24.581538482277086, + "learning_rate": 1.8634828771654692e-05, + "loss": 1.5659, + "step": 32139 + }, + { + "epoch": 0.39, + "grad_norm": 4.067414142701048, + "learning_rate": 1.8634534110281395e-05, + "loss": 1.7604, + "step": 32142 + }, + { + "epoch": 0.39, + "grad_norm": 11.164769218916653, + "learning_rate": 1.863423941944154e-05, + "loss": 1.5207, + "step": 32145 + }, + { + "epoch": 0.39, + "grad_norm": 12.581028084855017, + "learning_rate": 1.8633944699136138e-05, + "loss": 1.6478, + "step": 32148 + }, + { + "epoch": 0.39, + "grad_norm": 5.109226237381294, + "learning_rate": 1.8633649949366194e-05, + "loss": 1.3427, + "step": 32151 + }, + { + "epoch": 0.39, + "grad_norm": 16.108750546326633, + "learning_rate": 1.8633355170132707e-05, + "loss": 1.6171, + "step": 32154 + }, + { + "epoch": 0.39, + "grad_norm": 8.203556262103087, + "learning_rate": 1.8633060361436694e-05, + "loss": 1.6849, + "step": 32157 + }, + { + "epoch": 0.39, + "grad_norm": 16.85328799490575, + "learning_rate": 1.863276552327915e-05, + "loss": 1.5853, + "step": 32160 + }, + { + "epoch": 0.39, + "grad_norm": 24.14011581880395, + "learning_rate": 1.863247065566109e-05, + "loss": 1.3701, + "step": 32163 + }, + { + "epoch": 0.39, + "grad_norm": 9.393361038463267, + "learning_rate": 1.8632175758583516e-05, + "loss": 1.8488, + "step": 32166 + }, + { + "epoch": 0.39, + "grad_norm": 7.294122347846808, + "learning_rate": 1.863188083204744e-05, + "loss": 1.4128, + "step": 32169 + }, + { + "epoch": 0.39, + "grad_norm": 5.651844590903438, + "learning_rate": 1.8631585876053857e-05, + "loss": 1.4973, + "step": 32172 + }, + { + "epoch": 0.39, + "grad_norm": 13.205154962510532, + "learning_rate": 1.8631290890603785e-05, + "loss": 1.1458, + "step": 32175 + }, + { + "epoch": 0.39, + "grad_norm": 41.17898434231924, + "learning_rate": 1.8630995875698224e-05, + "loss": 1.0757, + "step": 32178 + }, + { + "epoch": 0.39, + "grad_norm": 13.753382388401793, + "learning_rate": 1.8630700831338183e-05, + "loss": 0.896, + "step": 32181 + }, + { + "epoch": 0.39, + "grad_norm": 5.596858368962453, + "learning_rate": 1.863040575752467e-05, + "loss": 1.3043, + "step": 32184 + }, + { + "epoch": 0.39, + "grad_norm": 14.910383391099602, + "learning_rate": 1.863011065425869e-05, + "loss": 1.5825, + "step": 32187 + }, + { + "epoch": 0.39, + "grad_norm": 13.676842723504912, + "learning_rate": 1.8629815521541252e-05, + "loss": 1.5417, + "step": 32190 + }, + { + "epoch": 0.39, + "grad_norm": 7.755612194756504, + "learning_rate": 1.8629520359373362e-05, + "loss": 1.0846, + "step": 32193 + }, + { + "epoch": 0.39, + "grad_norm": 8.344845302501575, + "learning_rate": 1.8629225167756027e-05, + "loss": 1.279, + "step": 32196 + }, + { + "epoch": 0.39, + "grad_norm": 69.0649946225478, + "learning_rate": 1.8628929946690256e-05, + "loss": 1.992, + "step": 32199 + }, + { + "epoch": 0.39, + "grad_norm": 24.604206536467306, + "learning_rate": 1.8628634696177053e-05, + "loss": 1.6777, + "step": 32202 + }, + { + "epoch": 0.39, + "grad_norm": 8.170237888255201, + "learning_rate": 1.862833941621743e-05, + "loss": 1.3119, + "step": 32205 + }, + { + "epoch": 0.39, + "grad_norm": 4.951632170688705, + "learning_rate": 1.8628044106812392e-05, + "loss": 1.3816, + "step": 32208 + }, + { + "epoch": 0.39, + "grad_norm": 17.697616120234898, + "learning_rate": 1.8627748767962943e-05, + "loss": 1.3223, + "step": 32211 + }, + { + "epoch": 0.39, + "grad_norm": 7.306292854942103, + "learning_rate": 1.86274533996701e-05, + "loss": 1.3946, + "step": 32214 + }, + { + "epoch": 0.39, + "grad_norm": 16.984441173975245, + "learning_rate": 1.8627158001934866e-05, + "loss": 1.6023, + "step": 32217 + }, + { + "epoch": 0.39, + "grad_norm": 21.08721592844497, + "learning_rate": 1.8626862574758247e-05, + "loss": 1.8406, + "step": 32220 + }, + { + "epoch": 0.39, + "grad_norm": 8.150847048721916, + "learning_rate": 1.8626567118141254e-05, + "loss": 1.1228, + "step": 32223 + }, + { + "epoch": 0.39, + "grad_norm": 6.946043770980707, + "learning_rate": 1.8626271632084895e-05, + "loss": 1.3403, + "step": 32226 + }, + { + "epoch": 0.39, + "grad_norm": 39.365341284057074, + "learning_rate": 1.8625976116590177e-05, + "loss": 1.092, + "step": 32229 + }, + { + "epoch": 0.39, + "grad_norm": 21.616439564744766, + "learning_rate": 1.8625680571658113e-05, + "loss": 1.4831, + "step": 32232 + }, + { + "epoch": 0.39, + "grad_norm": 14.4297830611659, + "learning_rate": 1.8625384997289703e-05, + "loss": 1.4393, + "step": 32235 + }, + { + "epoch": 0.39, + "grad_norm": 16.89653688107482, + "learning_rate": 1.8625089393485963e-05, + "loss": 1.333, + "step": 32238 + }, + { + "epoch": 0.39, + "grad_norm": 20.885967380210985, + "learning_rate": 1.8624793760247902e-05, + "loss": 1.6438, + "step": 32241 + }, + { + "epoch": 0.39, + "grad_norm": 24.655003280564973, + "learning_rate": 1.8624498097576524e-05, + "loss": 1.47, + "step": 32244 + }, + { + "epoch": 0.39, + "grad_norm": 16.07164198299253, + "learning_rate": 1.862420240547284e-05, + "loss": 1.5956, + "step": 32247 + }, + { + "epoch": 0.39, + "grad_norm": 25.217072484058754, + "learning_rate": 1.8623906683937864e-05, + "loss": 1.4213, + "step": 32250 + }, + { + "epoch": 0.39, + "grad_norm": 10.177625083497494, + "learning_rate": 1.8623610932972598e-05, + "loss": 1.1901, + "step": 32253 + }, + { + "epoch": 0.39, + "grad_norm": 11.652395897297266, + "learning_rate": 1.8623315152578053e-05, + "loss": 1.6416, + "step": 32256 + }, + { + "epoch": 0.39, + "grad_norm": 9.003770307058556, + "learning_rate": 1.8623019342755243e-05, + "loss": 1.2692, + "step": 32259 + }, + { + "epoch": 0.39, + "grad_norm": 29.011226327984645, + "learning_rate": 1.862272350350517e-05, + "loss": 1.7548, + "step": 32262 + }, + { + "epoch": 0.39, + "grad_norm": 23.484970404955945, + "learning_rate": 1.862242763482885e-05, + "loss": 1.3697, + "step": 32265 + }, + { + "epoch": 0.39, + "grad_norm": 18.385165313175897, + "learning_rate": 1.862213173672729e-05, + "loss": 1.5356, + "step": 32268 + }, + { + "epoch": 0.39, + "grad_norm": 5.669036216409867, + "learning_rate": 1.8621835809201496e-05, + "loss": 1.5795, + "step": 32271 + }, + { + "epoch": 0.39, + "grad_norm": 12.805316193734548, + "learning_rate": 1.8621539852252487e-05, + "loss": 1.6458, + "step": 32274 + }, + { + "epoch": 0.39, + "grad_norm": 9.222063789375428, + "learning_rate": 1.8621243865881265e-05, + "loss": 1.678, + "step": 32277 + }, + { + "epoch": 0.39, + "grad_norm": 7.513688324466062, + "learning_rate": 1.8620947850088844e-05, + "loss": 1.8339, + "step": 32280 + }, + { + "epoch": 0.39, + "grad_norm": 4.900165940739835, + "learning_rate": 1.8620651804876235e-05, + "loss": 1.4484, + "step": 32283 + }, + { + "epoch": 0.39, + "grad_norm": 45.6741423710813, + "learning_rate": 1.8620355730244443e-05, + "loss": 1.63, + "step": 32286 + }, + { + "epoch": 0.39, + "grad_norm": 3.8434668483052454, + "learning_rate": 1.8620059626194483e-05, + "loss": 1.7099, + "step": 32289 + }, + { + "epoch": 0.39, + "grad_norm": 16.37492477386982, + "learning_rate": 1.8619763492727366e-05, + "loss": 1.603, + "step": 32292 + }, + { + "epoch": 0.39, + "grad_norm": 16.03859782945477, + "learning_rate": 1.8619467329844103e-05, + "loss": 1.7657, + "step": 32295 + }, + { + "epoch": 0.39, + "grad_norm": 11.764044887477846, + "learning_rate": 1.86191711375457e-05, + "loss": 1.5656, + "step": 32298 + }, + { + "epoch": 0.39, + "grad_norm": 11.7421772655763, + "learning_rate": 1.861887491583317e-05, + "loss": 1.1897, + "step": 32301 + }, + { + "epoch": 0.39, + "grad_norm": 5.448241511021022, + "learning_rate": 1.8618578664707525e-05, + "loss": 1.5299, + "step": 32304 + }, + { + "epoch": 0.39, + "grad_norm": 5.267750583486495, + "learning_rate": 1.8618282384169776e-05, + "loss": 1.4431, + "step": 32307 + }, + { + "epoch": 0.39, + "grad_norm": 6.8565781040650675, + "learning_rate": 1.8617986074220932e-05, + "loss": 1.3914, + "step": 32310 + }, + { + "epoch": 0.39, + "grad_norm": 14.969863235874898, + "learning_rate": 1.8617689734862007e-05, + "loss": 1.5111, + "step": 32313 + }, + { + "epoch": 0.39, + "grad_norm": 16.413440482264143, + "learning_rate": 1.861739336609401e-05, + "loss": 1.5621, + "step": 32316 + }, + { + "epoch": 0.39, + "grad_norm": 10.593030823091082, + "learning_rate": 1.8617096967917953e-05, + "loss": 1.2836, + "step": 32319 + }, + { + "epoch": 0.39, + "grad_norm": 34.33107247210048, + "learning_rate": 1.861680054033485e-05, + "loss": 1.2702, + "step": 32322 + }, + { + "epoch": 0.39, + "grad_norm": 11.183704431975269, + "learning_rate": 1.861650408334571e-05, + "loss": 1.4466, + "step": 32325 + }, + { + "epoch": 0.39, + "grad_norm": 16.34047392642691, + "learning_rate": 1.8616207596951544e-05, + "loss": 1.1443, + "step": 32328 + }, + { + "epoch": 0.39, + "grad_norm": 9.459774466246266, + "learning_rate": 1.8615911081153365e-05, + "loss": 1.634, + "step": 32331 + }, + { + "epoch": 0.39, + "grad_norm": 10.667765791005378, + "learning_rate": 1.8615614535952186e-05, + "loss": 1.44, + "step": 32334 + }, + { + "epoch": 0.39, + "grad_norm": 8.009464277998573, + "learning_rate": 1.861531796134902e-05, + "loss": 1.2704, + "step": 32337 + }, + { + "epoch": 0.39, + "grad_norm": 8.364809351342776, + "learning_rate": 1.8615021357344872e-05, + "loss": 1.4188, + "step": 32340 + }, + { + "epoch": 0.39, + "grad_norm": 80.20635773257172, + "learning_rate": 1.8614724723940763e-05, + "loss": 1.5409, + "step": 32343 + }, + { + "epoch": 0.39, + "grad_norm": 5.828909140316781, + "learning_rate": 1.86144280611377e-05, + "loss": 1.8226, + "step": 32346 + }, + { + "epoch": 0.39, + "grad_norm": 79.75387746130282, + "learning_rate": 1.8614131368936696e-05, + "loss": 1.2387, + "step": 32349 + }, + { + "epoch": 0.39, + "grad_norm": 8.210226649022365, + "learning_rate": 1.8613834647338768e-05, + "loss": 1.4381, + "step": 32352 + }, + { + "epoch": 0.39, + "grad_norm": 26.743781061915264, + "learning_rate": 1.8613537896344924e-05, + "loss": 1.4549, + "step": 32355 + }, + { + "epoch": 0.39, + "grad_norm": 24.289410253220435, + "learning_rate": 1.8613241115956177e-05, + "loss": 1.3872, + "step": 32358 + }, + { + "epoch": 0.39, + "grad_norm": 16.038972439918737, + "learning_rate": 1.8612944306173536e-05, + "loss": 1.4735, + "step": 32361 + }, + { + "epoch": 0.39, + "grad_norm": 66.90360529579574, + "learning_rate": 1.8612647466998023e-05, + "loss": 1.5803, + "step": 32364 + }, + { + "epoch": 0.39, + "grad_norm": 29.626589310320675, + "learning_rate": 1.8612350598430646e-05, + "loss": 1.6552, + "step": 32367 + }, + { + "epoch": 0.39, + "grad_norm": 18.08545152126568, + "learning_rate": 1.8612053700472418e-05, + "loss": 1.3959, + "step": 32370 + }, + { + "epoch": 0.39, + "grad_norm": 36.97161417851569, + "learning_rate": 1.8611756773124355e-05, + "loss": 1.3953, + "step": 32373 + }, + { + "epoch": 0.39, + "grad_norm": 12.302032130332737, + "learning_rate": 1.8611459816387465e-05, + "loss": 1.4594, + "step": 32376 + }, + { + "epoch": 0.39, + "grad_norm": 78.77494481857424, + "learning_rate": 1.8611162830262767e-05, + "loss": 1.6845, + "step": 32379 + }, + { + "epoch": 0.39, + "grad_norm": 13.979953084966933, + "learning_rate": 1.861086581475127e-05, + "loss": 1.6896, + "step": 32382 + }, + { + "epoch": 0.39, + "grad_norm": 11.061991569409264, + "learning_rate": 1.8610568769853992e-05, + "loss": 1.3693, + "step": 32385 + }, + { + "epoch": 0.39, + "grad_norm": 82.84663478665131, + "learning_rate": 1.861027169557194e-05, + "loss": 1.6838, + "step": 32388 + }, + { + "epoch": 0.39, + "grad_norm": 5.441425794974673, + "learning_rate": 1.8609974591906138e-05, + "loss": 1.4576, + "step": 32391 + }, + { + "epoch": 0.39, + "grad_norm": 33.80859184350616, + "learning_rate": 1.860967745885759e-05, + "loss": 1.5958, + "step": 32394 + }, + { + "epoch": 0.39, + "grad_norm": 5.509265110870994, + "learning_rate": 1.8609380296427315e-05, + "loss": 1.4568, + "step": 32397 + }, + { + "epoch": 0.39, + "grad_norm": 39.981188094854375, + "learning_rate": 1.8609083104616322e-05, + "loss": 1.7992, + "step": 32400 + }, + { + "epoch": 0.39, + "grad_norm": 7.70200384371355, + "learning_rate": 1.8608785883425636e-05, + "loss": 1.8613, + "step": 32403 + }, + { + "epoch": 0.39, + "grad_norm": 8.73307331929858, + "learning_rate": 1.8608488632856262e-05, + "loss": 1.3291, + "step": 32406 + }, + { + "epoch": 0.39, + "grad_norm": 73.24971379633939, + "learning_rate": 1.8608191352909218e-05, + "loss": 1.6479, + "step": 32409 + }, + { + "epoch": 0.39, + "grad_norm": 20.10637254283848, + "learning_rate": 1.8607894043585516e-05, + "loss": 1.5095, + "step": 32412 + }, + { + "epoch": 0.39, + "grad_norm": 9.96124973311108, + "learning_rate": 1.8607596704886172e-05, + "loss": 1.4122, + "step": 32415 + }, + { + "epoch": 0.39, + "grad_norm": 70.79786021175025, + "learning_rate": 1.8607299336812204e-05, + "loss": 1.3346, + "step": 32418 + }, + { + "epoch": 0.39, + "grad_norm": 12.186553157483992, + "learning_rate": 1.8607001939364623e-05, + "loss": 1.525, + "step": 32421 + }, + { + "epoch": 0.39, + "grad_norm": 6.194282336380533, + "learning_rate": 1.860670451254444e-05, + "loss": 1.4033, + "step": 32424 + }, + { + "epoch": 0.39, + "grad_norm": 14.340109861836668, + "learning_rate": 1.8606407056352676e-05, + "loss": 1.5826, + "step": 32427 + }, + { + "epoch": 0.39, + "grad_norm": 14.356986814554393, + "learning_rate": 1.8606109570790346e-05, + "loss": 1.9162, + "step": 32430 + }, + { + "epoch": 0.39, + "grad_norm": 21.68872144077856, + "learning_rate": 1.8605812055858465e-05, + "loss": 1.6531, + "step": 32433 + }, + { + "epoch": 0.39, + "grad_norm": 24.985889698439127, + "learning_rate": 1.860551451155805e-05, + "loss": 1.5911, + "step": 32436 + }, + { + "epoch": 0.39, + "grad_norm": 18.11805635718211, + "learning_rate": 1.8605216937890106e-05, + "loss": 1.369, + "step": 32439 + }, + { + "epoch": 0.39, + "grad_norm": 11.865705495208996, + "learning_rate": 1.8604919334855662e-05, + "loss": 1.5282, + "step": 32442 + }, + { + "epoch": 0.39, + "grad_norm": 3.2921143697932647, + "learning_rate": 1.8604621702455728e-05, + "loss": 1.231, + "step": 32445 + }, + { + "epoch": 0.39, + "grad_norm": 23.550359206229906, + "learning_rate": 1.8604324040691313e-05, + "loss": 1.5276, + "step": 32448 + }, + { + "epoch": 0.39, + "grad_norm": 10.944422430877546, + "learning_rate": 1.8604026349563448e-05, + "loss": 1.5692, + "step": 32451 + }, + { + "epoch": 0.39, + "grad_norm": 7.6444667030659375, + "learning_rate": 1.8603728629073138e-05, + "loss": 1.4529, + "step": 32454 + }, + { + "epoch": 0.39, + "grad_norm": 11.638404866838366, + "learning_rate": 1.86034308792214e-05, + "loss": 1.755, + "step": 32457 + }, + { + "epoch": 0.39, + "grad_norm": 5.7443303965349015, + "learning_rate": 1.8603133100009246e-05, + "loss": 1.3847, + "step": 32460 + }, + { + "epoch": 0.39, + "grad_norm": 10.130702035215545, + "learning_rate": 1.8602835291437705e-05, + "loss": 1.3247, + "step": 32463 + }, + { + "epoch": 0.39, + "grad_norm": 10.510637721231193, + "learning_rate": 1.8602537453507784e-05, + "loss": 1.7426, + "step": 32466 + }, + { + "epoch": 0.39, + "grad_norm": 17.185717137809455, + "learning_rate": 1.8602239586220498e-05, + "loss": 1.0942, + "step": 32469 + }, + { + "epoch": 0.39, + "grad_norm": 15.98704681567559, + "learning_rate": 1.860194168957687e-05, + "loss": 1.5657, + "step": 32472 + }, + { + "epoch": 0.39, + "grad_norm": 12.332786887552851, + "learning_rate": 1.8601643763577914e-05, + "loss": 1.6209, + "step": 32475 + }, + { + "epoch": 0.39, + "grad_norm": 2.7405715216500646, + "learning_rate": 1.8601345808224646e-05, + "loss": 1.6688, + "step": 32478 + }, + { + "epoch": 0.39, + "grad_norm": 110.09866089976629, + "learning_rate": 1.8601047823518078e-05, + "loss": 1.326, + "step": 32481 + }, + { + "epoch": 0.39, + "grad_norm": 5.067726889519763, + "learning_rate": 1.8600749809459237e-05, + "loss": 1.4087, + "step": 32484 + }, + { + "epoch": 0.39, + "grad_norm": 12.53073814568632, + "learning_rate": 1.8600451766049135e-05, + "loss": 1.3581, + "step": 32487 + }, + { + "epoch": 0.39, + "grad_norm": 10.162575652060543, + "learning_rate": 1.8600153693288786e-05, + "loss": 1.6218, + "step": 32490 + }, + { + "epoch": 0.39, + "grad_norm": 9.551981233512622, + "learning_rate": 1.8599855591179213e-05, + "loss": 1.5368, + "step": 32493 + }, + { + "epoch": 0.39, + "grad_norm": 10.174320608971614, + "learning_rate": 1.859955745972143e-05, + "loss": 1.3915, + "step": 32496 + }, + { + "epoch": 0.39, + "grad_norm": 7.1440525237667245, + "learning_rate": 1.859925929891645e-05, + "loss": 1.3499, + "step": 32499 + }, + { + "epoch": 0.39, + "grad_norm": 21.514618254189127, + "learning_rate": 1.8598961108765305e-05, + "loss": 1.7587, + "step": 32502 + }, + { + "epoch": 0.39, + "grad_norm": 7.235262584720345, + "learning_rate": 1.8598662889268996e-05, + "loss": 1.9084, + "step": 32505 + }, + { + "epoch": 0.39, + "grad_norm": 23.020789388517446, + "learning_rate": 1.859836464042855e-05, + "loss": 1.4821, + "step": 32508 + }, + { + "epoch": 0.39, + "grad_norm": 10.870991423013134, + "learning_rate": 1.859806636224498e-05, + "loss": 1.3676, + "step": 32511 + }, + { + "epoch": 0.39, + "grad_norm": 14.157922826576, + "learning_rate": 1.859776805471931e-05, + "loss": 1.7256, + "step": 32514 + }, + { + "epoch": 0.39, + "grad_norm": 10.892694299613941, + "learning_rate": 1.8597469717852553e-05, + "loss": 1.4186, + "step": 32517 + }, + { + "epoch": 0.39, + "grad_norm": 8.386318065150817, + "learning_rate": 1.8597171351645727e-05, + "loss": 1.4201, + "step": 32520 + }, + { + "epoch": 0.39, + "grad_norm": 15.069717349051519, + "learning_rate": 1.8596872956099855e-05, + "loss": 1.4964, + "step": 32523 + }, + { + "epoch": 0.39, + "grad_norm": 7.561030580884664, + "learning_rate": 1.859657453121595e-05, + "loss": 1.3734, + "step": 32526 + }, + { + "epoch": 0.39, + "grad_norm": 2.919136964375959, + "learning_rate": 1.8596276076995035e-05, + "loss": 1.4158, + "step": 32529 + }, + { + "epoch": 0.39, + "grad_norm": 39.06804423234816, + "learning_rate": 1.8595977593438123e-05, + "loss": 1.5563, + "step": 32532 + }, + { + "epoch": 0.39, + "grad_norm": 16.05781851941924, + "learning_rate": 1.8595679080546238e-05, + "loss": 1.2742, + "step": 32535 + }, + { + "epoch": 0.39, + "grad_norm": 21.494296752770893, + "learning_rate": 1.8595380538320396e-05, + "loss": 1.7705, + "step": 32538 + }, + { + "epoch": 0.39, + "grad_norm": 23.21232881418321, + "learning_rate": 1.8595081966761616e-05, + "loss": 1.4743, + "step": 32541 + }, + { + "epoch": 0.39, + "grad_norm": 4.85754700973406, + "learning_rate": 1.859478336587092e-05, + "loss": 1.1857, + "step": 32544 + }, + { + "epoch": 0.39, + "grad_norm": 9.055014274121948, + "learning_rate": 1.859448473564932e-05, + "loss": 1.5026, + "step": 32547 + }, + { + "epoch": 0.39, + "grad_norm": 24.741995147766502, + "learning_rate": 1.859418607609784e-05, + "loss": 1.4736, + "step": 32550 + }, + { + "epoch": 0.39, + "grad_norm": 17.506747016862825, + "learning_rate": 1.8593887387217496e-05, + "loss": 1.2194, + "step": 32553 + }, + { + "epoch": 0.39, + "grad_norm": 11.96152753810114, + "learning_rate": 1.8593588669009314e-05, + "loss": 1.4108, + "step": 32556 + }, + { + "epoch": 0.39, + "grad_norm": 20.410825660156387, + "learning_rate": 1.8593289921474313e-05, + "loss": 1.8596, + "step": 32559 + }, + { + "epoch": 0.39, + "grad_norm": 17.954833253324843, + "learning_rate": 1.85929911446135e-05, + "loss": 1.5155, + "step": 32562 + }, + { + "epoch": 0.39, + "grad_norm": 8.86852936333584, + "learning_rate": 1.8592692338427908e-05, + "loss": 1.6633, + "step": 32565 + }, + { + "epoch": 0.39, + "grad_norm": 16.631902278170195, + "learning_rate": 1.8592393502918554e-05, + "loss": 1.3409, + "step": 32568 + }, + { + "epoch": 0.39, + "grad_norm": 5.948663581483982, + "learning_rate": 1.8592094638086455e-05, + "loss": 1.4426, + "step": 32571 + }, + { + "epoch": 0.39, + "grad_norm": 21.91372584621993, + "learning_rate": 1.859179574393263e-05, + "loss": 1.394, + "step": 32574 + }, + { + "epoch": 0.39, + "grad_norm": 12.327730015459114, + "learning_rate": 1.85914968204581e-05, + "loss": 1.5902, + "step": 32577 + }, + { + "epoch": 0.39, + "grad_norm": 16.904704141900947, + "learning_rate": 1.859119786766389e-05, + "loss": 1.502, + "step": 32580 + }, + { + "epoch": 0.39, + "grad_norm": 8.236525021703866, + "learning_rate": 1.859089888555101e-05, + "loss": 1.7216, + "step": 32583 + }, + { + "epoch": 0.39, + "grad_norm": 25.240229642496075, + "learning_rate": 1.859059987412049e-05, + "loss": 1.422, + "step": 32586 + }, + { + "epoch": 0.39, + "grad_norm": 50.04591633416784, + "learning_rate": 1.8590300833373346e-05, + "loss": 1.6456, + "step": 32589 + }, + { + "epoch": 0.39, + "grad_norm": 6.274743899679054, + "learning_rate": 1.85900017633106e-05, + "loss": 1.7794, + "step": 32592 + }, + { + "epoch": 0.39, + "grad_norm": 28.731578039721928, + "learning_rate": 1.8589702663933272e-05, + "loss": 1.7065, + "step": 32595 + }, + { + "epoch": 0.39, + "grad_norm": 63.358105834156945, + "learning_rate": 1.8589403535242386e-05, + "loss": 1.832, + "step": 32598 + }, + { + "epoch": 0.39, + "grad_norm": 25.65769045470449, + "learning_rate": 1.858910437723896e-05, + "loss": 1.706, + "step": 32601 + }, + { + "epoch": 0.39, + "grad_norm": 21.766815279960692, + "learning_rate": 1.858880518992401e-05, + "loss": 1.8739, + "step": 32604 + }, + { + "epoch": 0.39, + "grad_norm": 11.479188447076792, + "learning_rate": 1.8588505973298563e-05, + "loss": 1.4843, + "step": 32607 + }, + { + "epoch": 0.39, + "grad_norm": 15.028552051788013, + "learning_rate": 1.8588206727363637e-05, + "loss": 1.4979, + "step": 32610 + }, + { + "epoch": 0.39, + "grad_norm": 6.510097747107575, + "learning_rate": 1.8587907452120256e-05, + "loss": 1.4448, + "step": 32613 + }, + { + "epoch": 0.39, + "grad_norm": 10.656260017074317, + "learning_rate": 1.858760814756944e-05, + "loss": 1.2379, + "step": 32616 + }, + { + "epoch": 0.39, + "grad_norm": 6.140813253001868, + "learning_rate": 1.8587308813712212e-05, + "loss": 1.4984, + "step": 32619 + }, + { + "epoch": 0.39, + "grad_norm": 4.506982376110055, + "learning_rate": 1.8587009450549593e-05, + "loss": 1.2698, + "step": 32622 + }, + { + "epoch": 0.39, + "grad_norm": 5.882198312245381, + "learning_rate": 1.85867100580826e-05, + "loss": 1.3846, + "step": 32625 + }, + { + "epoch": 0.39, + "grad_norm": 12.659086526712152, + "learning_rate": 1.858641063631226e-05, + "loss": 1.5261, + "step": 32628 + }, + { + "epoch": 0.39, + "grad_norm": 23.420358477679926, + "learning_rate": 1.85861111852396e-05, + "loss": 1.4883, + "step": 32631 + }, + { + "epoch": 0.39, + "grad_norm": 7.65966040248645, + "learning_rate": 1.8585811704865627e-05, + "loss": 1.4909, + "step": 32634 + }, + { + "epoch": 0.39, + "grad_norm": 21.05947479464939, + "learning_rate": 1.8585512195191377e-05, + "loss": 1.5434, + "step": 32637 + }, + { + "epoch": 0.39, + "grad_norm": 3.9369040512287383, + "learning_rate": 1.858521265621786e-05, + "loss": 1.0978, + "step": 32640 + }, + { + "epoch": 0.39, + "grad_norm": 64.87016913277778, + "learning_rate": 1.8584913087946107e-05, + "loss": 1.5742, + "step": 32643 + }, + { + "epoch": 0.39, + "grad_norm": 12.446431718033487, + "learning_rate": 1.858461349037714e-05, + "loss": 1.6915, + "step": 32646 + }, + { + "epoch": 0.39, + "grad_norm": 5.792055767422766, + "learning_rate": 1.8584313863511976e-05, + "loss": 1.4487, + "step": 32649 + }, + { + "epoch": 0.39, + "grad_norm": 14.198023660849891, + "learning_rate": 1.8584014207351645e-05, + "loss": 1.3999, + "step": 32652 + }, + { + "epoch": 0.39, + "grad_norm": 9.481544767009364, + "learning_rate": 1.8583714521897163e-05, + "loss": 1.3196, + "step": 32655 + }, + { + "epoch": 0.39, + "grad_norm": 46.272727314195286, + "learning_rate": 1.8583414807149555e-05, + "loss": 1.2845, + "step": 32658 + }, + { + "epoch": 0.39, + "grad_norm": 21.481745393541217, + "learning_rate": 1.8583115063109846e-05, + "loss": 1.7226, + "step": 32661 + }, + { + "epoch": 0.39, + "grad_norm": 11.647260077849184, + "learning_rate": 1.8582815289779054e-05, + "loss": 1.3212, + "step": 32664 + }, + { + "epoch": 0.39, + "grad_norm": 2.850826475971127, + "learning_rate": 1.8582515487158207e-05, + "loss": 1.8291, + "step": 32667 + }, + { + "epoch": 0.39, + "grad_norm": 33.469168920183805, + "learning_rate": 1.8582215655248324e-05, + "loss": 1.4289, + "step": 32670 + }, + { + "epoch": 0.39, + "grad_norm": 18.952960495621713, + "learning_rate": 1.858191579405043e-05, + "loss": 1.1538, + "step": 32673 + }, + { + "epoch": 0.39, + "grad_norm": 5.012673671529193, + "learning_rate": 1.8581615903565552e-05, + "loss": 1.5157, + "step": 32676 + }, + { + "epoch": 0.39, + "grad_norm": 35.787251106825394, + "learning_rate": 1.8581315983794706e-05, + "loss": 1.6471, + "step": 32679 + }, + { + "epoch": 0.39, + "grad_norm": 10.808200782790033, + "learning_rate": 1.8581016034738923e-05, + "loss": 1.4063, + "step": 32682 + }, + { + "epoch": 0.39, + "grad_norm": 26.21908542877318, + "learning_rate": 1.858071605639922e-05, + "loss": 1.2694, + "step": 32685 + }, + { + "epoch": 0.39, + "grad_norm": 7.821594848981584, + "learning_rate": 1.858041604877663e-05, + "loss": 1.2568, + "step": 32688 + }, + { + "epoch": 0.39, + "grad_norm": 9.527362967736316, + "learning_rate": 1.8580116011872165e-05, + "loss": 1.2833, + "step": 32691 + }, + { + "epoch": 0.39, + "grad_norm": 17.605097106017883, + "learning_rate": 1.8579815945686857e-05, + "loss": 1.2657, + "step": 32694 + }, + { + "epoch": 0.39, + "grad_norm": 5.359240325420511, + "learning_rate": 1.8579515850221726e-05, + "loss": 1.7071, + "step": 32697 + }, + { + "epoch": 0.39, + "grad_norm": 40.88148031136646, + "learning_rate": 1.8579215725477798e-05, + "loss": 1.4738, + "step": 32700 + }, + { + "epoch": 0.39, + "grad_norm": 13.717556025239803, + "learning_rate": 1.8578915571456098e-05, + "loss": 1.4728, + "step": 32703 + }, + { + "epoch": 0.39, + "grad_norm": 14.893163521838751, + "learning_rate": 1.857861538815765e-05, + "loss": 1.6697, + "step": 32706 + }, + { + "epoch": 0.39, + "grad_norm": 18.822848591216164, + "learning_rate": 1.8578315175583475e-05, + "loss": 1.5898, + "step": 32709 + }, + { + "epoch": 0.39, + "grad_norm": 29.525398128634418, + "learning_rate": 1.85780149337346e-05, + "loss": 1.2323, + "step": 32712 + }, + { + "epoch": 0.39, + "grad_norm": 11.652665443643297, + "learning_rate": 1.857771466261205e-05, + "loss": 1.2416, + "step": 32715 + }, + { + "epoch": 0.39, + "grad_norm": 230.1202201058462, + "learning_rate": 1.8577414362216853e-05, + "loss": 1.3894, + "step": 32718 + }, + { + "epoch": 0.39, + "grad_norm": 11.61669148819868, + "learning_rate": 1.8577114032550026e-05, + "loss": 1.0463, + "step": 32721 + }, + { + "epoch": 0.39, + "grad_norm": 22.964187831772843, + "learning_rate": 1.85768136736126e-05, + "loss": 1.6432, + "step": 32724 + }, + { + "epoch": 0.39, + "grad_norm": 8.244427688452976, + "learning_rate": 1.85765132854056e-05, + "loss": 1.4468, + "step": 32727 + }, + { + "epoch": 0.39, + "grad_norm": 7.879079147110418, + "learning_rate": 1.857621286793005e-05, + "loss": 1.4646, + "step": 32730 + }, + { + "epoch": 0.39, + "grad_norm": 11.081924684515542, + "learning_rate": 1.857591242118697e-05, + "loss": 1.3723, + "step": 32733 + }, + { + "epoch": 0.39, + "grad_norm": 33.969608046684826, + "learning_rate": 1.8575611945177393e-05, + "loss": 1.4401, + "step": 32736 + }, + { + "epoch": 0.39, + "grad_norm": 17.059125962340783, + "learning_rate": 1.857531143990234e-05, + "loss": 1.4146, + "step": 32739 + }, + { + "epoch": 0.39, + "grad_norm": 67.6017644254013, + "learning_rate": 1.857501090536284e-05, + "loss": 1.6163, + "step": 32742 + }, + { + "epoch": 0.39, + "grad_norm": 93.85243611932249, + "learning_rate": 1.8574710341559915e-05, + "loss": 1.5156, + "step": 32745 + }, + { + "epoch": 0.39, + "grad_norm": 18.326150687241306, + "learning_rate": 1.8574409748494593e-05, + "loss": 1.5736, + "step": 32748 + }, + { + "epoch": 0.39, + "grad_norm": 73.63421662489492, + "learning_rate": 1.8574109126167896e-05, + "loss": 1.5771, + "step": 32751 + }, + { + "epoch": 0.39, + "grad_norm": 40.68122838209156, + "learning_rate": 1.8573808474580858e-05, + "loss": 1.3701, + "step": 32754 + }, + { + "epoch": 0.39, + "grad_norm": 24.33894238622004, + "learning_rate": 1.8573507793734497e-05, + "loss": 1.3439, + "step": 32757 + }, + { + "epoch": 0.39, + "grad_norm": 24.029059338301344, + "learning_rate": 1.8573207083629843e-05, + "loss": 1.5275, + "step": 32760 + }, + { + "epoch": 0.39, + "grad_norm": 23.192557959073252, + "learning_rate": 1.857290634426792e-05, + "loss": 1.3034, + "step": 32763 + }, + { + "epoch": 0.39, + "grad_norm": 31.47457369575603, + "learning_rate": 1.8572605575649757e-05, + "loss": 1.256, + "step": 32766 + }, + { + "epoch": 0.39, + "grad_norm": 10.03630492141793, + "learning_rate": 1.857230477777638e-05, + "loss": 1.4021, + "step": 32769 + }, + { + "epoch": 0.39, + "grad_norm": 18.615677357348055, + "learning_rate": 1.857200395064881e-05, + "loss": 1.679, + "step": 32772 + }, + { + "epoch": 0.39, + "grad_norm": 24.169214458565808, + "learning_rate": 1.8571703094268084e-05, + "loss": 1.4206, + "step": 32775 + }, + { + "epoch": 0.39, + "grad_norm": 12.994851496819942, + "learning_rate": 1.8571402208635215e-05, + "loss": 1.4, + "step": 32778 + }, + { + "epoch": 0.39, + "grad_norm": 18.17782014034842, + "learning_rate": 1.8571101293751245e-05, + "loss": 1.3261, + "step": 32781 + }, + { + "epoch": 0.39, + "grad_norm": 14.347665816989878, + "learning_rate": 1.857080034961719e-05, + "loss": 1.1766, + "step": 32784 + }, + { + "epoch": 0.39, + "grad_norm": 41.21426502140519, + "learning_rate": 1.8570499376234083e-05, + "loss": 1.5879, + "step": 32787 + }, + { + "epoch": 0.39, + "grad_norm": 6.647203558584606, + "learning_rate": 1.8570198373602948e-05, + "loss": 1.3251, + "step": 32790 + }, + { + "epoch": 0.39, + "grad_norm": 15.782241817535503, + "learning_rate": 1.8569897341724812e-05, + "loss": 1.3285, + "step": 32793 + }, + { + "epoch": 0.39, + "grad_norm": 5.50713753870547, + "learning_rate": 1.8569596280600702e-05, + "loss": 1.4914, + "step": 32796 + }, + { + "epoch": 0.39, + "grad_norm": 10.342401871861872, + "learning_rate": 1.856929519023165e-05, + "loss": 1.6675, + "step": 32799 + }, + { + "epoch": 0.39, + "grad_norm": 20.118371602607912, + "learning_rate": 1.8568994070618677e-05, + "loss": 1.4317, + "step": 32802 + }, + { + "epoch": 0.39, + "grad_norm": 25.93587290300848, + "learning_rate": 1.8568692921762817e-05, + "loss": 1.5531, + "step": 32805 + }, + { + "epoch": 0.39, + "grad_norm": 11.90302183916441, + "learning_rate": 1.8568391743665094e-05, + "loss": 1.3819, + "step": 32808 + }, + { + "epoch": 0.39, + "grad_norm": 10.718530166034144, + "learning_rate": 1.8568090536326533e-05, + "loss": 1.2882, + "step": 32811 + }, + { + "epoch": 0.39, + "grad_norm": 26.000396018876764, + "learning_rate": 1.8567789299748167e-05, + "loss": 1.6469, + "step": 32814 + }, + { + "epoch": 0.39, + "grad_norm": 8.269555700571923, + "learning_rate": 1.8567488033931024e-05, + "loss": 1.4461, + "step": 32817 + }, + { + "epoch": 0.39, + "grad_norm": 16.897725005439497, + "learning_rate": 1.856718673887613e-05, + "loss": 1.183, + "step": 32820 + }, + { + "epoch": 0.39, + "grad_norm": 14.041406371659328, + "learning_rate": 1.8566885414584507e-05, + "loss": 1.4242, + "step": 32823 + }, + { + "epoch": 0.39, + "grad_norm": 11.756278399092643, + "learning_rate": 1.8566584061057197e-05, + "loss": 1.2949, + "step": 32826 + }, + { + "epoch": 0.39, + "grad_norm": 22.649391538557108, + "learning_rate": 1.856628267829522e-05, + "loss": 1.3222, + "step": 32829 + }, + { + "epoch": 0.39, + "grad_norm": 18.83989833046211, + "learning_rate": 1.8565981266299603e-05, + "loss": 1.8325, + "step": 32832 + }, + { + "epoch": 0.39, + "grad_norm": 29.887241998263267, + "learning_rate": 1.8565679825071382e-05, + "loss": 1.1854, + "step": 32835 + }, + { + "epoch": 0.39, + "grad_norm": 18.945925952523805, + "learning_rate": 1.856537835461158e-05, + "loss": 1.6335, + "step": 32838 + }, + { + "epoch": 0.39, + "grad_norm": 4.338726324169541, + "learning_rate": 1.8565076854921223e-05, + "loss": 1.3983, + "step": 32841 + }, + { + "epoch": 0.39, + "grad_norm": 87.87210408244418, + "learning_rate": 1.856477532600135e-05, + "loss": 1.9382, + "step": 32844 + }, + { + "epoch": 0.39, + "grad_norm": 22.592967422330823, + "learning_rate": 1.8564473767852976e-05, + "loss": 1.8746, + "step": 32847 + }, + { + "epoch": 0.4, + "grad_norm": 29.555502352943133, + "learning_rate": 1.856417218047714e-05, + "loss": 1.4337, + "step": 32850 + }, + { + "epoch": 0.4, + "grad_norm": 9.447673567899091, + "learning_rate": 1.8563870563874876e-05, + "loss": 1.1998, + "step": 32853 + }, + { + "epoch": 0.4, + "grad_norm": 14.52218680717143, + "learning_rate": 1.85635689180472e-05, + "loss": 1.4404, + "step": 32856 + }, + { + "epoch": 0.4, + "grad_norm": 37.98462848967411, + "learning_rate": 1.856326724299515e-05, + "loss": 1.1319, + "step": 32859 + }, + { + "epoch": 0.4, + "grad_norm": 10.589488237774265, + "learning_rate": 1.8562965538719754e-05, + "loss": 1.2322, + "step": 32862 + }, + { + "epoch": 0.4, + "grad_norm": 11.804647299507527, + "learning_rate": 1.856266380522204e-05, + "loss": 1.4529, + "step": 32865 + }, + { + "epoch": 0.4, + "grad_norm": 20.46509918892344, + "learning_rate": 1.856236204250304e-05, + "loss": 1.4995, + "step": 32868 + }, + { + "epoch": 0.4, + "grad_norm": 15.873240242245078, + "learning_rate": 1.8562060250563778e-05, + "loss": 1.3121, + "step": 32871 + }, + { + "epoch": 0.4, + "grad_norm": 19.120045836956166, + "learning_rate": 1.8561758429405295e-05, + "loss": 1.361, + "step": 32874 + }, + { + "epoch": 0.4, + "grad_norm": 28.82978518486041, + "learning_rate": 1.856145657902861e-05, + "loss": 1.519, + "step": 32877 + }, + { + "epoch": 0.4, + "grad_norm": 13.983285205124643, + "learning_rate": 1.8561154699434758e-05, + "loss": 1.3118, + "step": 32880 + }, + { + "epoch": 0.4, + "grad_norm": 10.16032533274632, + "learning_rate": 1.8560852790624767e-05, + "loss": 1.3078, + "step": 32883 + }, + { + "epoch": 0.4, + "grad_norm": 9.955610855019097, + "learning_rate": 1.856055085259967e-05, + "loss": 1.1369, + "step": 32886 + }, + { + "epoch": 0.4, + "grad_norm": 3.259261932967332, + "learning_rate": 1.85602488853605e-05, + "loss": 1.4207, + "step": 32889 + }, + { + "epoch": 0.4, + "grad_norm": 30.24826128666151, + "learning_rate": 1.855994688890828e-05, + "loss": 1.7274, + "step": 32892 + }, + { + "epoch": 0.4, + "grad_norm": 20.108043739097287, + "learning_rate": 1.8559644863244048e-05, + "loss": 1.4501, + "step": 32895 + }, + { + "epoch": 0.4, + "grad_norm": 17.20614986535687, + "learning_rate": 1.855934280836883e-05, + "loss": 1.2714, + "step": 32898 + }, + { + "epoch": 0.4, + "grad_norm": 31.36976174558355, + "learning_rate": 1.8559040724283656e-05, + "loss": 1.4784, + "step": 32901 + }, + { + "epoch": 0.4, + "grad_norm": 8.474219954078592, + "learning_rate": 1.855873861098956e-05, + "loss": 1.3951, + "step": 32904 + }, + { + "epoch": 0.4, + "grad_norm": 7.9092540569631336, + "learning_rate": 1.8558436468487574e-05, + "loss": 1.5634, + "step": 32907 + }, + { + "epoch": 0.4, + "grad_norm": 21.922326581473122, + "learning_rate": 1.8558134296778726e-05, + "loss": 1.5092, + "step": 32910 + }, + { + "epoch": 0.4, + "grad_norm": 60.94177347720631, + "learning_rate": 1.8557832095864046e-05, + "loss": 1.4439, + "step": 32913 + }, + { + "epoch": 0.4, + "grad_norm": 4.1662183907887815, + "learning_rate": 1.855752986574457e-05, + "loss": 1.2238, + "step": 32916 + }, + { + "epoch": 0.4, + "grad_norm": 7.782045171301391, + "learning_rate": 1.8557227606421326e-05, + "loss": 1.4319, + "step": 32919 + }, + { + "epoch": 0.4, + "grad_norm": 19.409286679614336, + "learning_rate": 1.8556925317895348e-05, + "loss": 1.4898, + "step": 32922 + }, + { + "epoch": 0.4, + "grad_norm": 26.362426914360075, + "learning_rate": 1.8556623000167662e-05, + "loss": 1.357, + "step": 32925 + }, + { + "epoch": 0.4, + "grad_norm": 8.55295983389116, + "learning_rate": 1.855632065323931e-05, + "loss": 1.425, + "step": 32928 + }, + { + "epoch": 0.4, + "grad_norm": 85.382313828998, + "learning_rate": 1.8556018277111314e-05, + "loss": 1.1351, + "step": 32931 + }, + { + "epoch": 0.4, + "grad_norm": 11.604308260914733, + "learning_rate": 1.855571587178471e-05, + "loss": 1.6219, + "step": 32934 + }, + { + "epoch": 0.4, + "grad_norm": 20.2345152821773, + "learning_rate": 1.855541343726053e-05, + "loss": 1.1657, + "step": 32937 + }, + { + "epoch": 0.4, + "grad_norm": 18.13954542298084, + "learning_rate": 1.8555110973539803e-05, + "loss": 1.4785, + "step": 32940 + }, + { + "epoch": 0.4, + "grad_norm": 9.747487848692142, + "learning_rate": 1.8554808480623566e-05, + "loss": 1.4548, + "step": 32943 + }, + { + "epoch": 0.4, + "grad_norm": 19.527394249496854, + "learning_rate": 1.8554505958512848e-05, + "loss": 1.6978, + "step": 32946 + }, + { + "epoch": 0.4, + "grad_norm": 31.684918035786914, + "learning_rate": 1.8554203407208685e-05, + "loss": 1.5912, + "step": 32949 + }, + { + "epoch": 0.4, + "grad_norm": 39.854051562888095, + "learning_rate": 1.8553900826712104e-05, + "loss": 1.7753, + "step": 32952 + }, + { + "epoch": 0.4, + "grad_norm": 20.23978894969635, + "learning_rate": 1.855359821702414e-05, + "loss": 1.7125, + "step": 32955 + }, + { + "epoch": 0.4, + "grad_norm": 21.6769299978617, + "learning_rate": 1.855329557814583e-05, + "loss": 1.6187, + "step": 32958 + }, + { + "epoch": 0.4, + "grad_norm": 5.611260087905077, + "learning_rate": 1.85529929100782e-05, + "loss": 1.5191, + "step": 32961 + }, + { + "epoch": 0.4, + "grad_norm": 14.069679482337978, + "learning_rate": 1.855269021282229e-05, + "loss": 1.3695, + "step": 32964 + }, + { + "epoch": 0.4, + "grad_norm": 59.376681737508854, + "learning_rate": 1.8552387486379125e-05, + "loss": 1.5162, + "step": 32967 + }, + { + "epoch": 0.4, + "grad_norm": 10.912837983319953, + "learning_rate": 1.855208473074974e-05, + "loss": 1.6126, + "step": 32970 + }, + { + "epoch": 0.4, + "grad_norm": 22.03297349483, + "learning_rate": 1.8551781945935176e-05, + "loss": 1.3818, + "step": 32973 + }, + { + "epoch": 0.4, + "grad_norm": 18.058325556464986, + "learning_rate": 1.8551479131936457e-05, + "loss": 1.6956, + "step": 32976 + }, + { + "epoch": 0.4, + "grad_norm": 6.925954005617104, + "learning_rate": 1.855117628875462e-05, + "loss": 1.5956, + "step": 32979 + }, + { + "epoch": 0.4, + "grad_norm": 27.423556830230698, + "learning_rate": 1.85508734163907e-05, + "loss": 1.5217, + "step": 32982 + }, + { + "epoch": 0.4, + "grad_norm": 54.542734096449095, + "learning_rate": 1.8550570514845727e-05, + "loss": 1.4085, + "step": 32985 + }, + { + "epoch": 0.4, + "grad_norm": 12.24294606035911, + "learning_rate": 1.8550267584120738e-05, + "loss": 1.6178, + "step": 32988 + }, + { + "epoch": 0.4, + "grad_norm": 11.52108243101877, + "learning_rate": 1.8549964624216765e-05, + "loss": 1.3687, + "step": 32991 + }, + { + "epoch": 0.4, + "grad_norm": 9.694619788198597, + "learning_rate": 1.8549661635134845e-05, + "loss": 1.2553, + "step": 32994 + }, + { + "epoch": 0.4, + "grad_norm": 10.004212806470065, + "learning_rate": 1.8549358616876007e-05, + "loss": 1.1714, + "step": 32997 + }, + { + "epoch": 0.4, + "grad_norm": 8.19990558957602, + "learning_rate": 1.854905556944129e-05, + "loss": 1.2078, + "step": 33000 + }, + { + "epoch": 0.4, + "grad_norm": 5.007405564880912, + "learning_rate": 1.8548752492831724e-05, + "loss": 1.7333, + "step": 33003 + }, + { + "epoch": 0.4, + "grad_norm": 9.631622198017896, + "learning_rate": 1.8548449387048345e-05, + "loss": 1.2019, + "step": 33006 + }, + { + "epoch": 0.4, + "grad_norm": 6.263160775212368, + "learning_rate": 1.8548146252092186e-05, + "loss": 1.5286, + "step": 33009 + }, + { + "epoch": 0.4, + "grad_norm": 16.226078629963784, + "learning_rate": 1.854784308796428e-05, + "loss": 1.3188, + "step": 33012 + }, + { + "epoch": 0.4, + "grad_norm": 4.403622906249785, + "learning_rate": 1.854753989466567e-05, + "loss": 1.7931, + "step": 33015 + }, + { + "epoch": 0.4, + "grad_norm": 10.821156401371626, + "learning_rate": 1.8547236672197384e-05, + "loss": 1.5394, + "step": 33018 + }, + { + "epoch": 0.4, + "grad_norm": 19.778597590724722, + "learning_rate": 1.8546933420560458e-05, + "loss": 1.5225, + "step": 33021 + }, + { + "epoch": 0.4, + "grad_norm": 62.70263888694684, + "learning_rate": 1.8546630139755925e-05, + "loss": 1.1475, + "step": 33024 + }, + { + "epoch": 0.4, + "grad_norm": 13.84110081721109, + "learning_rate": 1.8546326829784826e-05, + "loss": 1.2957, + "step": 33027 + }, + { + "epoch": 0.4, + "grad_norm": 67.26891172188556, + "learning_rate": 1.8546023490648186e-05, + "loss": 1.8562, + "step": 33030 + }, + { + "epoch": 0.4, + "grad_norm": 22.44637227475748, + "learning_rate": 1.854572012234705e-05, + "loss": 1.3447, + "step": 33033 + }, + { + "epoch": 0.4, + "grad_norm": 10.936742627999884, + "learning_rate": 1.854541672488245e-05, + "loss": 1.4402, + "step": 33036 + }, + { + "epoch": 0.4, + "grad_norm": 26.668928121401336, + "learning_rate": 1.8545113298255417e-05, + "loss": 1.448, + "step": 33039 + }, + { + "epoch": 0.4, + "grad_norm": 8.833730476637719, + "learning_rate": 1.8544809842466993e-05, + "loss": 1.6334, + "step": 33042 + }, + { + "epoch": 0.4, + "grad_norm": 70.62381627286597, + "learning_rate": 1.854450635751821e-05, + "loss": 1.6178, + "step": 33045 + }, + { + "epoch": 0.4, + "grad_norm": 14.002077416024836, + "learning_rate": 1.8544202843410106e-05, + "loss": 1.8892, + "step": 33048 + }, + { + "epoch": 0.4, + "grad_norm": 27.888266531312222, + "learning_rate": 1.8543899300143714e-05, + "loss": 1.4105, + "step": 33051 + }, + { + "epoch": 0.4, + "grad_norm": 11.299399359414483, + "learning_rate": 1.854359572772007e-05, + "loss": 1.7762, + "step": 33054 + }, + { + "epoch": 0.4, + "grad_norm": 18.787821073494392, + "learning_rate": 1.854329212614021e-05, + "loss": 1.3758, + "step": 33057 + }, + { + "epoch": 0.4, + "grad_norm": 34.15447335665675, + "learning_rate": 1.8542988495405175e-05, + "loss": 1.5225, + "step": 33060 + }, + { + "epoch": 0.4, + "grad_norm": 38.677897451344755, + "learning_rate": 1.8542684835515997e-05, + "loss": 1.5618, + "step": 33063 + }, + { + "epoch": 0.4, + "grad_norm": 16.47299992330315, + "learning_rate": 1.8542381146473713e-05, + "loss": 1.6112, + "step": 33066 + }, + { + "epoch": 0.4, + "grad_norm": 8.237707720262799, + "learning_rate": 1.8542077428279357e-05, + "loss": 1.5886, + "step": 33069 + }, + { + "epoch": 0.4, + "grad_norm": 21.946757719514135, + "learning_rate": 1.854177368093397e-05, + "loss": 1.8112, + "step": 33072 + }, + { + "epoch": 0.4, + "grad_norm": 13.940736831742413, + "learning_rate": 1.8541469904438583e-05, + "loss": 1.3276, + "step": 33075 + }, + { + "epoch": 0.4, + "grad_norm": 286.70164154383355, + "learning_rate": 1.854116609879424e-05, + "loss": 1.2232, + "step": 33078 + }, + { + "epoch": 0.4, + "grad_norm": 19.587981964295018, + "learning_rate": 1.854086226400197e-05, + "loss": 1.4725, + "step": 33081 + }, + { + "epoch": 0.4, + "grad_norm": 20.166013215920586, + "learning_rate": 1.8540558400062814e-05, + "loss": 1.6729, + "step": 33084 + }, + { + "epoch": 0.4, + "grad_norm": 47.4263001524291, + "learning_rate": 1.8540254506977808e-05, + "loss": 1.8085, + "step": 33087 + }, + { + "epoch": 0.4, + "grad_norm": 5.916867850356645, + "learning_rate": 1.853995058474799e-05, + "loss": 1.4314, + "step": 33090 + }, + { + "epoch": 0.4, + "grad_norm": 18.814492871008014, + "learning_rate": 1.8539646633374396e-05, + "loss": 1.423, + "step": 33093 + }, + { + "epoch": 0.4, + "grad_norm": 51.94151429346051, + "learning_rate": 1.853934265285807e-05, + "loss": 1.6615, + "step": 33096 + }, + { + "epoch": 0.4, + "grad_norm": 6.794110140401262, + "learning_rate": 1.8539038643200036e-05, + "loss": 1.5556, + "step": 33099 + }, + { + "epoch": 0.4, + "grad_norm": 48.92529106716581, + "learning_rate": 1.8538734604401344e-05, + "loss": 1.8823, + "step": 33102 + }, + { + "epoch": 0.4, + "grad_norm": 9.425482358988509, + "learning_rate": 1.8538430536463022e-05, + "loss": 1.3326, + "step": 33105 + }, + { + "epoch": 0.4, + "grad_norm": 10.324475133695008, + "learning_rate": 1.8538126439386116e-05, + "loss": 1.6282, + "step": 33108 + }, + { + "epoch": 0.4, + "grad_norm": 12.732332052809827, + "learning_rate": 1.8537822313171656e-05, + "loss": 1.5525, + "step": 33111 + }, + { + "epoch": 0.4, + "grad_norm": 10.372280251566938, + "learning_rate": 1.8537518157820686e-05, + "loss": 1.2974, + "step": 33114 + }, + { + "epoch": 0.4, + "grad_norm": 12.657811012393918, + "learning_rate": 1.853721397333424e-05, + "loss": 1.6622, + "step": 33117 + }, + { + "epoch": 0.4, + "grad_norm": 12.880154088444511, + "learning_rate": 1.853690975971336e-05, + "loss": 1.6002, + "step": 33120 + }, + { + "epoch": 0.4, + "grad_norm": 48.064576149439766, + "learning_rate": 1.8536605516959083e-05, + "loss": 1.321, + "step": 33123 + }, + { + "epoch": 0.4, + "grad_norm": 67.78508875162692, + "learning_rate": 1.8536301245072446e-05, + "loss": 1.2248, + "step": 33126 + }, + { + "epoch": 0.4, + "grad_norm": 10.835857369136948, + "learning_rate": 1.8535996944054485e-05, + "loss": 1.5084, + "step": 33129 + }, + { + "epoch": 0.4, + "grad_norm": 22.837461715337223, + "learning_rate": 1.8535692613906242e-05, + "loss": 1.3897, + "step": 33132 + }, + { + "epoch": 0.4, + "grad_norm": 4.456398664717472, + "learning_rate": 1.8535388254628755e-05, + "loss": 1.3972, + "step": 33135 + }, + { + "epoch": 0.4, + "grad_norm": 17.773614321218172, + "learning_rate": 1.853508386622306e-05, + "loss": 1.4808, + "step": 33138 + }, + { + "epoch": 0.4, + "grad_norm": 6.8567308815443155, + "learning_rate": 1.8534779448690205e-05, + "loss": 1.194, + "step": 33141 + }, + { + "epoch": 0.4, + "grad_norm": 6.591004467922256, + "learning_rate": 1.8534475002031214e-05, + "loss": 1.6597, + "step": 33144 + }, + { + "epoch": 0.4, + "grad_norm": 26.31308038650219, + "learning_rate": 1.853417052624714e-05, + "loss": 1.6981, + "step": 33147 + }, + { + "epoch": 0.4, + "grad_norm": 7.811559815097074, + "learning_rate": 1.8533866021339012e-05, + "loss": 1.4195, + "step": 33150 + }, + { + "epoch": 0.4, + "grad_norm": 17.078255289241085, + "learning_rate": 1.853356148730787e-05, + "loss": 1.3586, + "step": 33153 + }, + { + "epoch": 0.4, + "grad_norm": 10.510427920427736, + "learning_rate": 1.8533256924154766e-05, + "loss": 1.4364, + "step": 33156 + }, + { + "epoch": 0.4, + "grad_norm": 15.25984446318327, + "learning_rate": 1.853295233188072e-05, + "loss": 1.6075, + "step": 33159 + }, + { + "epoch": 0.4, + "grad_norm": 17.889515738848054, + "learning_rate": 1.853264771048679e-05, + "loss": 1.3724, + "step": 33162 + }, + { + "epoch": 0.4, + "grad_norm": 27.835022005109522, + "learning_rate": 1.8532343059974e-05, + "loss": 1.7313, + "step": 33165 + }, + { + "epoch": 0.4, + "grad_norm": 91.47132105812433, + "learning_rate": 1.85320383803434e-05, + "loss": 1.7078, + "step": 33168 + }, + { + "epoch": 0.4, + "grad_norm": 27.605139722230085, + "learning_rate": 1.8531733671596023e-05, + "loss": 1.474, + "step": 33171 + }, + { + "epoch": 0.4, + "grad_norm": 23.1462740287459, + "learning_rate": 1.8531428933732916e-05, + "loss": 1.8978, + "step": 33174 + }, + { + "epoch": 0.4, + "grad_norm": 7.886040601822189, + "learning_rate": 1.8531124166755114e-05, + "loss": 1.577, + "step": 33177 + }, + { + "epoch": 0.4, + "grad_norm": 11.758108609951297, + "learning_rate": 1.8530819370663658e-05, + "loss": 1.409, + "step": 33180 + }, + { + "epoch": 0.4, + "grad_norm": 9.628070149244607, + "learning_rate": 1.8530514545459585e-05, + "loss": 1.5173, + "step": 33183 + }, + { + "epoch": 0.4, + "grad_norm": 95.74740575065086, + "learning_rate": 1.8530209691143945e-05, + "loss": 1.5538, + "step": 33186 + }, + { + "epoch": 0.4, + "grad_norm": 13.487762651309694, + "learning_rate": 1.8529904807717765e-05, + "loss": 1.2063, + "step": 33189 + }, + { + "epoch": 0.4, + "grad_norm": 28.786332492233907, + "learning_rate": 1.8529599895182098e-05, + "loss": 1.535, + "step": 33192 + }, + { + "epoch": 0.4, + "grad_norm": 12.605247873483453, + "learning_rate": 1.8529294953537978e-05, + "loss": 1.2895, + "step": 33195 + }, + { + "epoch": 0.4, + "grad_norm": 77.82221965596734, + "learning_rate": 1.8528989982786445e-05, + "loss": 1.5812, + "step": 33198 + }, + { + "epoch": 0.4, + "grad_norm": 23.1294343675606, + "learning_rate": 1.852868498292854e-05, + "loss": 1.4996, + "step": 33201 + }, + { + "epoch": 0.4, + "grad_norm": 7.196384537218661, + "learning_rate": 1.8528379953965307e-05, + "loss": 1.3563, + "step": 33204 + }, + { + "epoch": 0.4, + "grad_norm": 14.950994305907152, + "learning_rate": 1.8528074895897782e-05, + "loss": 1.4058, + "step": 33207 + }, + { + "epoch": 0.4, + "grad_norm": 4.928714764056484, + "learning_rate": 1.8527769808727015e-05, + "loss": 1.4793, + "step": 33210 + }, + { + "epoch": 0.4, + "grad_norm": 15.690794647346872, + "learning_rate": 1.852746469245404e-05, + "loss": 1.3202, + "step": 33213 + }, + { + "epoch": 0.4, + "grad_norm": 11.492159939626234, + "learning_rate": 1.8527159547079897e-05, + "loss": 1.5455, + "step": 33216 + }, + { + "epoch": 0.4, + "grad_norm": 5.128170820162759, + "learning_rate": 1.8526854372605627e-05, + "loss": 1.2503, + "step": 33219 + }, + { + "epoch": 0.4, + "grad_norm": 7.260542644434242, + "learning_rate": 1.852654916903228e-05, + "loss": 2.0471, + "step": 33222 + }, + { + "epoch": 0.4, + "grad_norm": 3.7837449044633713, + "learning_rate": 1.852624393636089e-05, + "loss": 1.1786, + "step": 33225 + }, + { + "epoch": 0.4, + "grad_norm": 8.920128301545356, + "learning_rate": 1.85259386745925e-05, + "loss": 1.3386, + "step": 33228 + }, + { + "epoch": 0.4, + "grad_norm": 9.041333674948268, + "learning_rate": 1.8525633383728153e-05, + "loss": 1.5667, + "step": 33231 + }, + { + "epoch": 0.4, + "grad_norm": 14.803328101753332, + "learning_rate": 1.852532806376889e-05, + "loss": 1.391, + "step": 33234 + }, + { + "epoch": 0.4, + "grad_norm": 12.68274478237548, + "learning_rate": 1.8525022714715755e-05, + "loss": 1.302, + "step": 33237 + }, + { + "epoch": 0.4, + "grad_norm": 11.986133727365235, + "learning_rate": 1.8524717336569784e-05, + "loss": 1.3006, + "step": 33240 + }, + { + "epoch": 0.4, + "grad_norm": 32.67062212218412, + "learning_rate": 1.8524411929332025e-05, + "loss": 1.3187, + "step": 33243 + }, + { + "epoch": 0.4, + "grad_norm": 18.054502437633328, + "learning_rate": 1.8524106493003524e-05, + "loss": 1.4661, + "step": 33246 + }, + { + "epoch": 0.4, + "grad_norm": 15.234093261304269, + "learning_rate": 1.8523801027585308e-05, + "loss": 1.3477, + "step": 33249 + }, + { + "epoch": 0.4, + "grad_norm": 2.3388387889325677, + "learning_rate": 1.8523495533078436e-05, + "loss": 1.3414, + "step": 33252 + }, + { + "epoch": 0.4, + "grad_norm": 36.24015482758076, + "learning_rate": 1.8523190009483945e-05, + "loss": 1.7159, + "step": 33255 + }, + { + "epoch": 0.4, + "grad_norm": 5.106975465117536, + "learning_rate": 1.852288445680287e-05, + "loss": 1.7682, + "step": 33258 + }, + { + "epoch": 0.4, + "grad_norm": 11.886928279684794, + "learning_rate": 1.8522578875036264e-05, + "loss": 1.573, + "step": 33261 + }, + { + "epoch": 0.4, + "grad_norm": 34.94036248664896, + "learning_rate": 1.8522273264185168e-05, + "loss": 1.7371, + "step": 33264 + }, + { + "epoch": 0.4, + "grad_norm": 22.929043614806844, + "learning_rate": 1.8521967624250624e-05, + "loss": 1.7584, + "step": 33267 + }, + { + "epoch": 0.4, + "grad_norm": 11.081388922059569, + "learning_rate": 1.852166195523367e-05, + "loss": 1.3313, + "step": 33270 + }, + { + "epoch": 0.4, + "grad_norm": 10.928534950003641, + "learning_rate": 1.8521356257135355e-05, + "loss": 1.549, + "step": 33273 + }, + { + "epoch": 0.4, + "grad_norm": 45.804493072125645, + "learning_rate": 1.852105052995672e-05, + "loss": 1.5222, + "step": 33276 + }, + { + "epoch": 0.4, + "grad_norm": 49.878164047740775, + "learning_rate": 1.852074477369881e-05, + "loss": 1.2869, + "step": 33279 + }, + { + "epoch": 0.4, + "grad_norm": 11.349026764340476, + "learning_rate": 1.8520438988362665e-05, + "loss": 1.3614, + "step": 33282 + }, + { + "epoch": 0.4, + "grad_norm": 25.61562867263427, + "learning_rate": 1.852013317394933e-05, + "loss": 1.0926, + "step": 33285 + }, + { + "epoch": 0.4, + "grad_norm": 28.58225032263131, + "learning_rate": 1.8519827330459854e-05, + "loss": 1.8878, + "step": 33288 + }, + { + "epoch": 0.4, + "grad_norm": 18.546356308140634, + "learning_rate": 1.8519521457895273e-05, + "loss": 1.4451, + "step": 33291 + }, + { + "epoch": 0.4, + "grad_norm": 14.780177648227694, + "learning_rate": 1.8519215556256635e-05, + "loss": 1.2552, + "step": 33294 + }, + { + "epoch": 0.4, + "grad_norm": 7.855480371434779, + "learning_rate": 1.8518909625544983e-05, + "loss": 1.5576, + "step": 33297 + }, + { + "epoch": 0.4, + "grad_norm": 5.010045272625091, + "learning_rate": 1.851860366576136e-05, + "loss": 1.6917, + "step": 33300 + }, + { + "epoch": 0.4, + "grad_norm": 9.835165560676801, + "learning_rate": 1.8518297676906814e-05, + "loss": 1.4641, + "step": 33303 + }, + { + "epoch": 0.4, + "grad_norm": 47.219719245850925, + "learning_rate": 1.851799165898238e-05, + "loss": 1.3131, + "step": 33306 + }, + { + "epoch": 0.4, + "grad_norm": 96.19510798652861, + "learning_rate": 1.8517685611989114e-05, + "loss": 1.3616, + "step": 33309 + }, + { + "epoch": 0.4, + "grad_norm": 3.891898560408273, + "learning_rate": 1.851737953592805e-05, + "loss": 1.4337, + "step": 33312 + }, + { + "epoch": 0.4, + "grad_norm": 10.951276531221147, + "learning_rate": 1.8517073430800244e-05, + "loss": 1.4593, + "step": 33315 + }, + { + "epoch": 0.4, + "grad_norm": 30.594337646944727, + "learning_rate": 1.851676729660673e-05, + "loss": 1.2671, + "step": 33318 + }, + { + "epoch": 0.4, + "grad_norm": 14.044875283492686, + "learning_rate": 1.851646113334856e-05, + "loss": 1.6828, + "step": 33321 + }, + { + "epoch": 0.4, + "grad_norm": 11.056274134598798, + "learning_rate": 1.8516154941026775e-05, + "loss": 1.5998, + "step": 33324 + }, + { + "epoch": 0.4, + "grad_norm": 12.640882236249618, + "learning_rate": 1.851584871964242e-05, + "loss": 1.4803, + "step": 33327 + }, + { + "epoch": 0.4, + "grad_norm": 52.37176788597646, + "learning_rate": 1.8515542469196538e-05, + "loss": 1.5263, + "step": 33330 + }, + { + "epoch": 0.4, + "grad_norm": 49.0444436309857, + "learning_rate": 1.8515236189690183e-05, + "loss": 1.0504, + "step": 33333 + }, + { + "epoch": 0.4, + "grad_norm": 16.963870900356, + "learning_rate": 1.851492988112439e-05, + "loss": 1.5209, + "step": 33336 + }, + { + "epoch": 0.4, + "grad_norm": 6.268309179432523, + "learning_rate": 1.851462354350021e-05, + "loss": 1.6051, + "step": 33339 + }, + { + "epoch": 0.4, + "grad_norm": 17.98538932190464, + "learning_rate": 1.8514317176818683e-05, + "loss": 1.1849, + "step": 33342 + }, + { + "epoch": 0.4, + "grad_norm": 5.340647272620341, + "learning_rate": 1.851401078108086e-05, + "loss": 1.4136, + "step": 33345 + }, + { + "epoch": 0.4, + "grad_norm": 19.859466852502006, + "learning_rate": 1.8513704356287787e-05, + "loss": 1.5706, + "step": 33348 + }, + { + "epoch": 0.4, + "grad_norm": 9.957402036922584, + "learning_rate": 1.8513397902440506e-05, + "loss": 1.4388, + "step": 33351 + }, + { + "epoch": 0.4, + "grad_norm": 38.215778930897514, + "learning_rate": 1.8513091419540067e-05, + "loss": 1.2888, + "step": 33354 + }, + { + "epoch": 0.4, + "grad_norm": 10.568707661235166, + "learning_rate": 1.851278490758751e-05, + "loss": 1.2624, + "step": 33357 + }, + { + "epoch": 0.4, + "grad_norm": 27.038731328830643, + "learning_rate": 1.8512478366583885e-05, + "loss": 1.6216, + "step": 33360 + }, + { + "epoch": 0.4, + "grad_norm": 38.48477288153588, + "learning_rate": 1.8512171796530237e-05, + "loss": 1.7442, + "step": 33363 + }, + { + "epoch": 0.4, + "grad_norm": 40.41092695557056, + "learning_rate": 1.8511865197427617e-05, + "loss": 1.3943, + "step": 33366 + }, + { + "epoch": 0.4, + "grad_norm": 59.876055665754876, + "learning_rate": 1.851155856927706e-05, + "loss": 1.446, + "step": 33369 + }, + { + "epoch": 0.4, + "grad_norm": 39.934506400021235, + "learning_rate": 1.8511251912079625e-05, + "loss": 1.2391, + "step": 33372 + }, + { + "epoch": 0.4, + "grad_norm": 18.575466780029036, + "learning_rate": 1.8510945225836354e-05, + "loss": 1.6267, + "step": 33375 + }, + { + "epoch": 0.4, + "grad_norm": 44.579235650075525, + "learning_rate": 1.8510638510548288e-05, + "loss": 1.6979, + "step": 33378 + }, + { + "epoch": 0.4, + "grad_norm": 45.69091866811871, + "learning_rate": 1.8510331766216483e-05, + "loss": 1.3748, + "step": 33381 + }, + { + "epoch": 0.4, + "grad_norm": 7.288941190825406, + "learning_rate": 1.8510024992841978e-05, + "loss": 1.588, + "step": 33384 + }, + { + "epoch": 0.4, + "grad_norm": 57.21582350697673, + "learning_rate": 1.8509718190425823e-05, + "loss": 1.5423, + "step": 33387 + }, + { + "epoch": 0.4, + "grad_norm": 36.600175014273844, + "learning_rate": 1.8509411358969066e-05, + "loss": 1.4174, + "step": 33390 + }, + { + "epoch": 0.4, + "grad_norm": 8.113486709934635, + "learning_rate": 1.850910449847275e-05, + "loss": 1.2965, + "step": 33393 + }, + { + "epoch": 0.4, + "grad_norm": 9.37210976162465, + "learning_rate": 1.850879760893793e-05, + "loss": 1.5583, + "step": 33396 + }, + { + "epoch": 0.4, + "grad_norm": 10.70204196822476, + "learning_rate": 1.8508490690365645e-05, + "loss": 1.5783, + "step": 33399 + }, + { + "epoch": 0.4, + "grad_norm": 5.620563411376052, + "learning_rate": 1.8508183742756947e-05, + "loss": 1.4534, + "step": 33402 + }, + { + "epoch": 0.4, + "grad_norm": 8.601775671023942, + "learning_rate": 1.8507876766112885e-05, + "loss": 1.5127, + "step": 33405 + }, + { + "epoch": 0.4, + "grad_norm": 11.464823382896613, + "learning_rate": 1.85075697604345e-05, + "loss": 1.6912, + "step": 33408 + }, + { + "epoch": 0.4, + "grad_norm": 15.640123741783917, + "learning_rate": 1.8507262725722848e-05, + "loss": 1.5116, + "step": 33411 + }, + { + "epoch": 0.4, + "grad_norm": 16.612335550682413, + "learning_rate": 1.850695566197897e-05, + "loss": 1.2784, + "step": 33414 + }, + { + "epoch": 0.4, + "grad_norm": 15.696877867019044, + "learning_rate": 1.850664856920392e-05, + "loss": 1.3514, + "step": 33417 + }, + { + "epoch": 0.4, + "grad_norm": 9.660402158662972, + "learning_rate": 1.8506341447398735e-05, + "loss": 1.7802, + "step": 33420 + }, + { + "epoch": 0.4, + "grad_norm": 33.442061161544984, + "learning_rate": 1.8506034296564475e-05, + "loss": 1.724, + "step": 33423 + }, + { + "epoch": 0.4, + "grad_norm": 3.5937893109090764, + "learning_rate": 1.8505727116702185e-05, + "loss": 1.5452, + "step": 33426 + }, + { + "epoch": 0.4, + "grad_norm": 19.828918105175696, + "learning_rate": 1.850541990781291e-05, + "loss": 1.8665, + "step": 33429 + }, + { + "epoch": 0.4, + "grad_norm": 5.373574635294007, + "learning_rate": 1.8505112669897703e-05, + "loss": 1.1735, + "step": 33432 + }, + { + "epoch": 0.4, + "grad_norm": 20.775368978963606, + "learning_rate": 1.8504805402957608e-05, + "loss": 1.3646, + "step": 33435 + }, + { + "epoch": 0.4, + "grad_norm": 23.551067756014934, + "learning_rate": 1.8504498106993675e-05, + "loss": 1.699, + "step": 33438 + }, + { + "epoch": 0.4, + "grad_norm": 20.55707088861401, + "learning_rate": 1.8504190782006954e-05, + "loss": 1.517, + "step": 33441 + }, + { + "epoch": 0.4, + "grad_norm": 17.579883613933458, + "learning_rate": 1.8503883427998495e-05, + "loss": 1.661, + "step": 33444 + }, + { + "epoch": 0.4, + "grad_norm": 4.034247267653137, + "learning_rate": 1.8503576044969343e-05, + "loss": 1.5422, + "step": 33447 + }, + { + "epoch": 0.4, + "grad_norm": 19.329454862978384, + "learning_rate": 1.850326863292055e-05, + "loss": 1.2904, + "step": 33450 + }, + { + "epoch": 0.4, + "grad_norm": 36.52474418409704, + "learning_rate": 1.8502961191853165e-05, + "loss": 1.5994, + "step": 33453 + }, + { + "epoch": 0.4, + "grad_norm": 15.393190037853373, + "learning_rate": 1.8502653721768233e-05, + "loss": 1.4951, + "step": 33456 + }, + { + "epoch": 0.4, + "grad_norm": 27.52096008678086, + "learning_rate": 1.8502346222666808e-05, + "loss": 1.3626, + "step": 33459 + }, + { + "epoch": 0.4, + "grad_norm": 8.380637321063368, + "learning_rate": 1.8502038694549938e-05, + "loss": 1.6292, + "step": 33462 + }, + { + "epoch": 0.4, + "grad_norm": 10.026382585989024, + "learning_rate": 1.8501731137418672e-05, + "loss": 1.361, + "step": 33465 + }, + { + "epoch": 0.4, + "grad_norm": 49.84700795693289, + "learning_rate": 1.8501423551274062e-05, + "loss": 1.5515, + "step": 33468 + }, + { + "epoch": 0.4, + "grad_norm": 13.976383207321708, + "learning_rate": 1.850111593611715e-05, + "loss": 1.4393, + "step": 33471 + }, + { + "epoch": 0.4, + "grad_norm": 37.841914065025584, + "learning_rate": 1.8500808291948997e-05, + "loss": 1.4886, + "step": 33474 + }, + { + "epoch": 0.4, + "grad_norm": 18.029714145544162, + "learning_rate": 1.8500500618770646e-05, + "loss": 1.3997, + "step": 33477 + }, + { + "epoch": 0.4, + "grad_norm": 3.8938115875350756, + "learning_rate": 1.8500192916583145e-05, + "loss": 1.2389, + "step": 33480 + }, + { + "epoch": 0.4, + "grad_norm": 22.993606588370522, + "learning_rate": 1.8499885185387554e-05, + "loss": 1.2153, + "step": 33483 + }, + { + "epoch": 0.4, + "grad_norm": 22.235570819107505, + "learning_rate": 1.849957742518491e-05, + "loss": 1.5037, + "step": 33486 + }, + { + "epoch": 0.4, + "grad_norm": 52.494490988003506, + "learning_rate": 1.8499269635976273e-05, + "loss": 1.1373, + "step": 33489 + }, + { + "epoch": 0.4, + "grad_norm": 34.26110269507118, + "learning_rate": 1.849896181776269e-05, + "loss": 1.2911, + "step": 33492 + }, + { + "epoch": 0.4, + "grad_norm": 16.3162351300065, + "learning_rate": 1.8498653970545212e-05, + "loss": 1.4696, + "step": 33495 + }, + { + "epoch": 0.4, + "grad_norm": 64.37574860709054, + "learning_rate": 1.849834609432489e-05, + "loss": 1.485, + "step": 33498 + }, + { + "epoch": 0.4, + "grad_norm": 7.6001059948848795, + "learning_rate": 1.849803818910277e-05, + "loss": 1.5084, + "step": 33501 + }, + { + "epoch": 0.4, + "grad_norm": 14.869575283329052, + "learning_rate": 1.8497730254879908e-05, + "loss": 1.6332, + "step": 33504 + }, + { + "epoch": 0.4, + "grad_norm": 9.089972030501844, + "learning_rate": 1.8497422291657354e-05, + "loss": 1.599, + "step": 33507 + }, + { + "epoch": 0.4, + "grad_norm": 7.989617296441499, + "learning_rate": 1.849711429943616e-05, + "loss": 1.182, + "step": 33510 + }, + { + "epoch": 0.4, + "grad_norm": 35.43408463518124, + "learning_rate": 1.8496806278217376e-05, + "loss": 1.7216, + "step": 33513 + }, + { + "epoch": 0.4, + "grad_norm": 13.359977160012125, + "learning_rate": 1.8496498228002048e-05, + "loss": 1.625, + "step": 33516 + }, + { + "epoch": 0.4, + "grad_norm": 12.8071031503013, + "learning_rate": 1.8496190148791234e-05, + "loss": 1.594, + "step": 33519 + }, + { + "epoch": 0.4, + "grad_norm": 20.026623154707643, + "learning_rate": 1.8495882040585988e-05, + "loss": 1.4577, + "step": 33522 + }, + { + "epoch": 0.4, + "grad_norm": 39.988609978713406, + "learning_rate": 1.8495573903387353e-05, + "loss": 1.4858, + "step": 33525 + }, + { + "epoch": 0.4, + "grad_norm": 19.674152755983258, + "learning_rate": 1.8495265737196382e-05, + "loss": 1.4211, + "step": 33528 + }, + { + "epoch": 0.4, + "grad_norm": 8.219461953727727, + "learning_rate": 1.849495754201413e-05, + "loss": 1.4546, + "step": 33531 + }, + { + "epoch": 0.4, + "grad_norm": 63.9087490950447, + "learning_rate": 1.8494649317841654e-05, + "loss": 1.4725, + "step": 33534 + }, + { + "epoch": 0.4, + "grad_norm": 30.739841133979976, + "learning_rate": 1.8494341064679994e-05, + "loss": 1.3543, + "step": 33537 + }, + { + "epoch": 0.4, + "grad_norm": 31.165414498978283, + "learning_rate": 1.849403278253021e-05, + "loss": 1.6396, + "step": 33540 + }, + { + "epoch": 0.4, + "grad_norm": 15.747724738219512, + "learning_rate": 1.849372447139335e-05, + "loss": 1.6665, + "step": 33543 + }, + { + "epoch": 0.4, + "grad_norm": 19.69162572721224, + "learning_rate": 1.8493416131270464e-05, + "loss": 1.5778, + "step": 33546 + }, + { + "epoch": 0.4, + "grad_norm": 16.53146562340043, + "learning_rate": 1.8493107762162614e-05, + "loss": 1.318, + "step": 33549 + }, + { + "epoch": 0.4, + "grad_norm": 3.274217116055658, + "learning_rate": 1.8492799364070845e-05, + "loss": 1.2362, + "step": 33552 + }, + { + "epoch": 0.4, + "grad_norm": 6.355198970916889, + "learning_rate": 1.849249093699621e-05, + "loss": 1.4828, + "step": 33555 + }, + { + "epoch": 0.4, + "grad_norm": 3.9775740090785834, + "learning_rate": 1.849218248093976e-05, + "loss": 1.3588, + "step": 33558 + }, + { + "epoch": 0.4, + "grad_norm": 6.137267550839885, + "learning_rate": 1.8491873995902557e-05, + "loss": 1.5457, + "step": 33561 + }, + { + "epoch": 0.4, + "grad_norm": 14.974782884443899, + "learning_rate": 1.849156548188564e-05, + "loss": 1.576, + "step": 33564 + }, + { + "epoch": 0.4, + "grad_norm": 4.593399685873336, + "learning_rate": 1.8491256938890072e-05, + "loss": 1.4205, + "step": 33567 + }, + { + "epoch": 0.4, + "grad_norm": 6.603750821335467, + "learning_rate": 1.8490948366916903e-05, + "loss": 1.5978, + "step": 33570 + }, + { + "epoch": 0.4, + "grad_norm": 5.33426311042056, + "learning_rate": 1.8490639765967183e-05, + "loss": 1.146, + "step": 33573 + }, + { + "epoch": 0.4, + "grad_norm": 4.311108112072585, + "learning_rate": 1.849033113604197e-05, + "loss": 1.3586, + "step": 33576 + }, + { + "epoch": 0.4, + "grad_norm": 89.22477347332092, + "learning_rate": 1.8490022477142317e-05, + "loss": 1.5006, + "step": 33579 + }, + { + "epoch": 0.4, + "grad_norm": 50.0231324940587, + "learning_rate": 1.848971378926927e-05, + "loss": 1.5769, + "step": 33582 + }, + { + "epoch": 0.4, + "grad_norm": 2.804019178408945, + "learning_rate": 1.8489405072423892e-05, + "loss": 1.3748, + "step": 33585 + }, + { + "epoch": 0.4, + "grad_norm": 10.776077348655495, + "learning_rate": 1.848909632660723e-05, + "loss": 1.5859, + "step": 33588 + }, + { + "epoch": 0.4, + "grad_norm": 37.79219779333722, + "learning_rate": 1.848878755182034e-05, + "loss": 1.1937, + "step": 33591 + }, + { + "epoch": 0.4, + "grad_norm": 5.824391519055068, + "learning_rate": 1.848847874806428e-05, + "loss": 1.9246, + "step": 33594 + }, + { + "epoch": 0.4, + "grad_norm": 78.92857919831371, + "learning_rate": 1.8488169915340095e-05, + "loss": 1.7708, + "step": 33597 + }, + { + "epoch": 0.4, + "grad_norm": 17.8266953186831, + "learning_rate": 1.8487861053648847e-05, + "loss": 1.5328, + "step": 33600 + }, + { + "epoch": 0.4, + "grad_norm": 44.14333296884652, + "learning_rate": 1.8487552162991583e-05, + "loss": 1.5458, + "step": 33603 + }, + { + "epoch": 0.4, + "grad_norm": 6.4082234052226985, + "learning_rate": 1.848724324336936e-05, + "loss": 1.193, + "step": 33606 + }, + { + "epoch": 0.4, + "grad_norm": 14.287059298164884, + "learning_rate": 1.8486934294783238e-05, + "loss": 1.5103, + "step": 33609 + }, + { + "epoch": 0.4, + "grad_norm": 8.847855302445476, + "learning_rate": 1.8486625317234258e-05, + "loss": 1.5635, + "step": 33612 + }, + { + "epoch": 0.4, + "grad_norm": 24.723570003896555, + "learning_rate": 1.848631631072349e-05, + "loss": 2.0177, + "step": 33615 + }, + { + "epoch": 0.4, + "grad_norm": 3.514218969382111, + "learning_rate": 1.8486007275251977e-05, + "loss": 1.7454, + "step": 33618 + }, + { + "epoch": 0.4, + "grad_norm": 19.00540384832823, + "learning_rate": 1.8485698210820784e-05, + "loss": 1.5541, + "step": 33621 + }, + { + "epoch": 0.4, + "grad_norm": 22.381843788025662, + "learning_rate": 1.8485389117430955e-05, + "loss": 1.3423, + "step": 33624 + }, + { + "epoch": 0.4, + "grad_norm": 12.182813006044002, + "learning_rate": 1.848507999508355e-05, + "loss": 1.6065, + "step": 33627 + }, + { + "epoch": 0.4, + "grad_norm": 9.53323498362092, + "learning_rate": 1.848477084377962e-05, + "loss": 1.5211, + "step": 33630 + }, + { + "epoch": 0.4, + "grad_norm": 12.65622327318227, + "learning_rate": 1.848446166352023e-05, + "loss": 1.436, + "step": 33633 + }, + { + "epoch": 0.4, + "grad_norm": 9.456539589781132, + "learning_rate": 1.8484152454306423e-05, + "loss": 1.3198, + "step": 33636 + }, + { + "epoch": 0.4, + "grad_norm": 7.151770150365685, + "learning_rate": 1.8483843216139262e-05, + "loss": 1.3161, + "step": 33639 + }, + { + "epoch": 0.4, + "grad_norm": 12.77993166782438, + "learning_rate": 1.8483533949019797e-05, + "loss": 1.5935, + "step": 33642 + }, + { + "epoch": 0.4, + "grad_norm": 10.358248155968122, + "learning_rate": 1.848322465294909e-05, + "loss": 1.4051, + "step": 33645 + }, + { + "epoch": 0.4, + "grad_norm": 65.2120336301452, + "learning_rate": 1.848291532792819e-05, + "loss": 1.3658, + "step": 33648 + }, + { + "epoch": 0.4, + "grad_norm": 8.315638069621157, + "learning_rate": 1.848260597395816e-05, + "loss": 1.7295, + "step": 33651 + }, + { + "epoch": 0.4, + "grad_norm": 16.947504944578252, + "learning_rate": 1.8482296591040048e-05, + "loss": 1.6047, + "step": 33654 + }, + { + "epoch": 0.4, + "grad_norm": 8.620710795678503, + "learning_rate": 1.8481987179174914e-05, + "loss": 1.5243, + "step": 33657 + }, + { + "epoch": 0.4, + "grad_norm": 44.18177849357941, + "learning_rate": 1.848167773836381e-05, + "loss": 1.5493, + "step": 33660 + }, + { + "epoch": 0.4, + "grad_norm": 9.920827454882746, + "learning_rate": 1.8481368268607796e-05, + "loss": 1.5226, + "step": 33663 + }, + { + "epoch": 0.4, + "grad_norm": 10.618462850602958, + "learning_rate": 1.8481058769907928e-05, + "loss": 1.2293, + "step": 33666 + }, + { + "epoch": 0.4, + "grad_norm": 21.580708928220478, + "learning_rate": 1.8480749242265265e-05, + "loss": 1.7397, + "step": 33669 + }, + { + "epoch": 0.4, + "grad_norm": 11.766205365869592, + "learning_rate": 1.8480439685680855e-05, + "loss": 1.1218, + "step": 33672 + }, + { + "epoch": 0.4, + "grad_norm": 18.460596261525566, + "learning_rate": 1.848013010015576e-05, + "loss": 1.4069, + "step": 33675 + }, + { + "epoch": 0.4, + "grad_norm": 19.529979877237345, + "learning_rate": 1.8479820485691037e-05, + "loss": 1.3053, + "step": 33678 + }, + { + "epoch": 0.41, + "grad_norm": 11.606046507697307, + "learning_rate": 1.847951084228774e-05, + "loss": 1.474, + "step": 33681 + }, + { + "epoch": 0.41, + "grad_norm": 39.42351296600479, + "learning_rate": 1.8479201169946926e-05, + "loss": 1.3837, + "step": 33684 + }, + { + "epoch": 0.41, + "grad_norm": 4.541983648107506, + "learning_rate": 1.8478891468669654e-05, + "loss": 1.29, + "step": 33687 + }, + { + "epoch": 0.41, + "grad_norm": 12.786653241558176, + "learning_rate": 1.847858173845698e-05, + "loss": 1.2243, + "step": 33690 + }, + { + "epoch": 0.41, + "grad_norm": 32.57972242360845, + "learning_rate": 1.847827197930996e-05, + "loss": 1.2275, + "step": 33693 + }, + { + "epoch": 0.41, + "grad_norm": 14.56521950706853, + "learning_rate": 1.847796219122965e-05, + "loss": 1.3055, + "step": 33696 + }, + { + "epoch": 0.41, + "grad_norm": 6.158688141762495, + "learning_rate": 1.847765237421711e-05, + "loss": 1.3576, + "step": 33699 + }, + { + "epoch": 0.41, + "grad_norm": 31.098901721521138, + "learning_rate": 1.8477342528273394e-05, + "loss": 1.45, + "step": 33702 + }, + { + "epoch": 0.41, + "grad_norm": 13.053844753450333, + "learning_rate": 1.8477032653399562e-05, + "loss": 1.6172, + "step": 33705 + }, + { + "epoch": 0.41, + "grad_norm": 12.537504688139487, + "learning_rate": 1.8476722749596673e-05, + "loss": 1.1484, + "step": 33708 + }, + { + "epoch": 0.41, + "grad_norm": 5.731654929490616, + "learning_rate": 1.8476412816865782e-05, + "loss": 1.3693, + "step": 33711 + }, + { + "epoch": 0.41, + "grad_norm": 7.24711869472191, + "learning_rate": 1.8476102855207946e-05, + "loss": 1.5459, + "step": 33714 + }, + { + "epoch": 0.41, + "grad_norm": 27.080998857216596, + "learning_rate": 1.8475792864624224e-05, + "loss": 1.7257, + "step": 33717 + }, + { + "epoch": 0.41, + "grad_norm": 7.265026348346084, + "learning_rate": 1.8475482845115675e-05, + "loss": 1.5222, + "step": 33720 + }, + { + "epoch": 0.41, + "grad_norm": 5.007381342083684, + "learning_rate": 1.8475172796683356e-05, + "loss": 1.5063, + "step": 33723 + }, + { + "epoch": 0.41, + "grad_norm": 7.641194950414862, + "learning_rate": 1.8474862719328325e-05, + "loss": 1.3355, + "step": 33726 + }, + { + "epoch": 0.41, + "grad_norm": 168.33396322138637, + "learning_rate": 1.847455261305164e-05, + "loss": 1.6901, + "step": 33729 + }, + { + "epoch": 0.41, + "grad_norm": 23.170804412573702, + "learning_rate": 1.847424247785436e-05, + "loss": 1.6791, + "step": 33732 + }, + { + "epoch": 0.41, + "grad_norm": 20.304718330171017, + "learning_rate": 1.8473932313737545e-05, + "loss": 1.4722, + "step": 33735 + }, + { + "epoch": 0.41, + "grad_norm": 9.8214498664535, + "learning_rate": 1.8473622120702245e-05, + "loss": 1.8217, + "step": 33738 + }, + { + "epoch": 0.41, + "grad_norm": 33.54096556730518, + "learning_rate": 1.8473311898749528e-05, + "loss": 1.7163, + "step": 33741 + }, + { + "epoch": 0.41, + "grad_norm": 11.73418765021691, + "learning_rate": 1.847300164788045e-05, + "loss": 1.4777, + "step": 33744 + }, + { + "epoch": 0.41, + "grad_norm": 23.988352874582468, + "learning_rate": 1.8472691368096072e-05, + "loss": 1.5351, + "step": 33747 + }, + { + "epoch": 0.41, + "grad_norm": 18.009948684235447, + "learning_rate": 1.8472381059397448e-05, + "loss": 1.3606, + "step": 33750 + }, + { + "epoch": 0.41, + "grad_norm": 17.910107880970642, + "learning_rate": 1.8472070721785638e-05, + "loss": 1.2214, + "step": 33753 + }, + { + "epoch": 0.41, + "grad_norm": 3.6535354121659855, + "learning_rate": 1.8471760355261704e-05, + "loss": 1.634, + "step": 33756 + }, + { + "epoch": 0.41, + "grad_norm": 21.57233840815458, + "learning_rate": 1.8471449959826702e-05, + "loss": 1.7613, + "step": 33759 + }, + { + "epoch": 0.41, + "grad_norm": 11.638398926116691, + "learning_rate": 1.8471139535481693e-05, + "loss": 1.586, + "step": 33762 + }, + { + "epoch": 0.41, + "grad_norm": 45.485864990839815, + "learning_rate": 1.8470829082227735e-05, + "loss": 1.3931, + "step": 33765 + }, + { + "epoch": 0.41, + "grad_norm": 19.898080869687384, + "learning_rate": 1.847051860006589e-05, + "loss": 1.3065, + "step": 33768 + }, + { + "epoch": 0.41, + "grad_norm": 7.114693724620312, + "learning_rate": 1.8470208088997216e-05, + "loss": 1.5785, + "step": 33771 + }, + { + "epoch": 0.41, + "grad_norm": 25.203126947393212, + "learning_rate": 1.846989754902277e-05, + "loss": 1.7669, + "step": 33774 + }, + { + "epoch": 0.41, + "grad_norm": 32.99730086574665, + "learning_rate": 1.8469586980143618e-05, + "loss": 1.3807, + "step": 33777 + }, + { + "epoch": 0.41, + "grad_norm": 2.4908821268050816, + "learning_rate": 1.8469276382360817e-05, + "loss": 1.4314, + "step": 33780 + }, + { + "epoch": 0.41, + "grad_norm": 5.298351320517488, + "learning_rate": 1.8468965755675422e-05, + "loss": 1.2662, + "step": 33783 + }, + { + "epoch": 0.41, + "grad_norm": 27.017885021868132, + "learning_rate": 1.84686551000885e-05, + "loss": 1.2503, + "step": 33786 + }, + { + "epoch": 0.41, + "grad_norm": 66.55769748606708, + "learning_rate": 1.846834441560111e-05, + "loss": 1.8199, + "step": 33789 + }, + { + "epoch": 0.41, + "grad_norm": 43.046383905309405, + "learning_rate": 1.846803370221431e-05, + "loss": 1.6565, + "step": 33792 + }, + { + "epoch": 0.41, + "grad_norm": 13.548996430296611, + "learning_rate": 1.846772295992916e-05, + "loss": 1.2994, + "step": 33795 + }, + { + "epoch": 0.41, + "grad_norm": 50.83852068194199, + "learning_rate": 1.8467412188746725e-05, + "loss": 1.6831, + "step": 33798 + }, + { + "epoch": 0.41, + "grad_norm": 14.462381620631177, + "learning_rate": 1.846710138866806e-05, + "loss": 1.4544, + "step": 33801 + }, + { + "epoch": 0.41, + "grad_norm": 36.31554733022201, + "learning_rate": 1.8466790559694225e-05, + "loss": 1.6209, + "step": 33804 + }, + { + "epoch": 0.41, + "grad_norm": 5.584080014222095, + "learning_rate": 1.8466479701826285e-05, + "loss": 1.1602, + "step": 33807 + }, + { + "epoch": 0.41, + "grad_norm": 23.962860015275208, + "learning_rate": 1.8466168815065302e-05, + "loss": 1.3998, + "step": 33810 + }, + { + "epoch": 0.41, + "grad_norm": 7.3981988492350865, + "learning_rate": 1.846585789941233e-05, + "loss": 1.2892, + "step": 33813 + }, + { + "epoch": 0.41, + "grad_norm": 26.60850515843367, + "learning_rate": 1.8465546954868438e-05, + "loss": 1.7413, + "step": 33816 + }, + { + "epoch": 0.41, + "grad_norm": 23.56377030716309, + "learning_rate": 1.846523598143468e-05, + "loss": 1.1908, + "step": 33819 + }, + { + "epoch": 0.41, + "grad_norm": 10.227162675035816, + "learning_rate": 1.8464924979112125e-05, + "loss": 1.2059, + "step": 33822 + }, + { + "epoch": 0.41, + "grad_norm": 6.268911270568338, + "learning_rate": 1.8464613947901827e-05, + "loss": 1.3253, + "step": 33825 + }, + { + "epoch": 0.41, + "grad_norm": 7.317665449900178, + "learning_rate": 1.8464302887804852e-05, + "loss": 1.386, + "step": 33828 + }, + { + "epoch": 0.41, + "grad_norm": 5.343910091056138, + "learning_rate": 1.846399179882226e-05, + "loss": 1.3501, + "step": 33831 + }, + { + "epoch": 0.41, + "grad_norm": 4.9906036152733755, + "learning_rate": 1.8463680680955115e-05, + "loss": 1.4999, + "step": 33834 + }, + { + "epoch": 0.41, + "grad_norm": 22.796769198706784, + "learning_rate": 1.846336953420447e-05, + "loss": 1.4242, + "step": 33837 + }, + { + "epoch": 0.41, + "grad_norm": 27.320813803542553, + "learning_rate": 1.84630583585714e-05, + "loss": 1.3009, + "step": 33840 + }, + { + "epoch": 0.41, + "grad_norm": 27.716854378411988, + "learning_rate": 1.8462747154056957e-05, + "loss": 1.2806, + "step": 33843 + }, + { + "epoch": 0.41, + "grad_norm": 15.806549729938208, + "learning_rate": 1.8462435920662208e-05, + "loss": 1.3699, + "step": 33846 + }, + { + "epoch": 0.41, + "grad_norm": 17.628900992813005, + "learning_rate": 1.846212465838821e-05, + "loss": 1.605, + "step": 33849 + }, + { + "epoch": 0.41, + "grad_norm": 16.243321616761282, + "learning_rate": 1.846181336723603e-05, + "loss": 1.7989, + "step": 33852 + }, + { + "epoch": 0.41, + "grad_norm": 12.758381857106428, + "learning_rate": 1.8461502047206733e-05, + "loss": 1.5096, + "step": 33855 + }, + { + "epoch": 0.41, + "grad_norm": 7.281437319538659, + "learning_rate": 1.8461190698301373e-05, + "loss": 1.7821, + "step": 33858 + }, + { + "epoch": 0.41, + "grad_norm": 157.82365792730664, + "learning_rate": 1.846087932052102e-05, + "loss": 1.8523, + "step": 33861 + }, + { + "epoch": 0.41, + "grad_norm": 26.56461602407801, + "learning_rate": 1.8460567913866727e-05, + "loss": 1.4554, + "step": 33864 + }, + { + "epoch": 0.41, + "grad_norm": 13.081387903511265, + "learning_rate": 1.846025647833957e-05, + "loss": 1.4377, + "step": 33867 + }, + { + "epoch": 0.41, + "grad_norm": 49.034232054041986, + "learning_rate": 1.84599450139406e-05, + "loss": 1.1407, + "step": 33870 + }, + { + "epoch": 0.41, + "grad_norm": 22.357816161368444, + "learning_rate": 1.8459633520670887e-05, + "loss": 1.5477, + "step": 33873 + }, + { + "epoch": 0.41, + "grad_norm": 96.1079068216679, + "learning_rate": 1.845932199853149e-05, + "loss": 1.314, + "step": 33876 + }, + { + "epoch": 0.41, + "grad_norm": 55.67676148686296, + "learning_rate": 1.845901044752348e-05, + "loss": 1.5068, + "step": 33879 + }, + { + "epoch": 0.41, + "grad_norm": 14.385687698339895, + "learning_rate": 1.8458698867647908e-05, + "loss": 1.3194, + "step": 33882 + }, + { + "epoch": 0.41, + "grad_norm": 12.03337746775434, + "learning_rate": 1.8458387258905846e-05, + "loss": 1.358, + "step": 33885 + }, + { + "epoch": 0.41, + "grad_norm": 2.6929938503857063, + "learning_rate": 1.8458075621298355e-05, + "loss": 1.366, + "step": 33888 + }, + { + "epoch": 0.41, + "grad_norm": 3.786745181344471, + "learning_rate": 1.84577639548265e-05, + "loss": 1.7701, + "step": 33891 + }, + { + "epoch": 0.41, + "grad_norm": 7.981072601060674, + "learning_rate": 1.845745225949134e-05, + "loss": 1.5074, + "step": 33894 + }, + { + "epoch": 0.41, + "grad_norm": 6.345376087300174, + "learning_rate": 1.8457140535293946e-05, + "loss": 1.2773, + "step": 33897 + }, + { + "epoch": 0.41, + "grad_norm": 22.430597089320063, + "learning_rate": 1.8456828782235373e-05, + "loss": 1.7437, + "step": 33900 + }, + { + "epoch": 0.41, + "grad_norm": 5.2372585681110415, + "learning_rate": 1.8456517000316694e-05, + "loss": 1.9739, + "step": 33903 + }, + { + "epoch": 0.41, + "grad_norm": 7.837086201240965, + "learning_rate": 1.8456205189538965e-05, + "loss": 1.7477, + "step": 33906 + }, + { + "epoch": 0.41, + "grad_norm": 4.641902914542586, + "learning_rate": 1.8455893349903254e-05, + "loss": 1.0171, + "step": 33909 + }, + { + "epoch": 0.41, + "grad_norm": 27.451775915207964, + "learning_rate": 1.8455581481410625e-05, + "loss": 1.2833, + "step": 33912 + }, + { + "epoch": 0.41, + "grad_norm": 31.903262459012424, + "learning_rate": 1.8455269584062144e-05, + "loss": 1.4064, + "step": 33915 + }, + { + "epoch": 0.41, + "grad_norm": 14.092063556521806, + "learning_rate": 1.8454957657858873e-05, + "loss": 1.4442, + "step": 33918 + }, + { + "epoch": 0.41, + "grad_norm": 69.28452766926867, + "learning_rate": 1.8454645702801875e-05, + "loss": 1.3471, + "step": 33921 + }, + { + "epoch": 0.41, + "grad_norm": 36.19025477163672, + "learning_rate": 1.8454333718892218e-05, + "loss": 1.7596, + "step": 33924 + }, + { + "epoch": 0.41, + "grad_norm": 18.839006898405234, + "learning_rate": 1.8454021706130962e-05, + "loss": 1.3173, + "step": 33927 + }, + { + "epoch": 0.41, + "grad_norm": 31.26601261937508, + "learning_rate": 1.845370966451918e-05, + "loss": 1.3103, + "step": 33930 + }, + { + "epoch": 0.41, + "grad_norm": 21.672721111769793, + "learning_rate": 1.845339759405793e-05, + "loss": 1.4101, + "step": 33933 + }, + { + "epoch": 0.41, + "grad_norm": 22.577323720259027, + "learning_rate": 1.845308549474828e-05, + "loss": 1.6519, + "step": 33936 + }, + { + "epoch": 0.41, + "grad_norm": 19.5703592641859, + "learning_rate": 1.845277336659129e-05, + "loss": 1.7548, + "step": 33939 + }, + { + "epoch": 0.41, + "grad_norm": 13.342445558362554, + "learning_rate": 1.8452461209588033e-05, + "loss": 1.3858, + "step": 33942 + }, + { + "epoch": 0.41, + "grad_norm": 4.293952017622262, + "learning_rate": 1.845214902373957e-05, + "loss": 1.9591, + "step": 33945 + }, + { + "epoch": 0.41, + "grad_norm": 13.697537099824295, + "learning_rate": 1.8451836809046962e-05, + "loss": 1.2397, + "step": 33948 + }, + { + "epoch": 0.41, + "grad_norm": 20.88328145064503, + "learning_rate": 1.8451524565511288e-05, + "loss": 1.6128, + "step": 33951 + }, + { + "epoch": 0.41, + "grad_norm": 8.944829694548671, + "learning_rate": 1.8451212293133597e-05, + "loss": 1.422, + "step": 33954 + }, + { + "epoch": 0.41, + "grad_norm": 7.480561347313449, + "learning_rate": 1.845089999191497e-05, + "loss": 1.6478, + "step": 33957 + }, + { + "epoch": 0.41, + "grad_norm": 25.014884560567513, + "learning_rate": 1.845058766185646e-05, + "loss": 1.4664, + "step": 33960 + }, + { + "epoch": 0.41, + "grad_norm": 26.1647256261026, + "learning_rate": 1.845027530295914e-05, + "loss": 1.7718, + "step": 33963 + }, + { + "epoch": 0.41, + "grad_norm": 41.089998124079465, + "learning_rate": 1.8449962915224074e-05, + "loss": 1.5631, + "step": 33966 + }, + { + "epoch": 0.41, + "grad_norm": 9.199871382782286, + "learning_rate": 1.844965049865233e-05, + "loss": 1.432, + "step": 33969 + }, + { + "epoch": 0.41, + "grad_norm": 48.372528625666256, + "learning_rate": 1.8449338053244966e-05, + "loss": 1.5706, + "step": 33972 + }, + { + "epoch": 0.41, + "grad_norm": 14.911174996742075, + "learning_rate": 1.844902557900306e-05, + "loss": 1.2435, + "step": 33975 + }, + { + "epoch": 0.41, + "grad_norm": 15.778981319784291, + "learning_rate": 1.844871307592767e-05, + "loss": 1.4764, + "step": 33978 + }, + { + "epoch": 0.41, + "grad_norm": 25.225843076202924, + "learning_rate": 1.8448400544019868e-05, + "loss": 1.4078, + "step": 33981 + }, + { + "epoch": 0.41, + "grad_norm": 33.06768742708427, + "learning_rate": 1.844808798328072e-05, + "loss": 1.597, + "step": 33984 + }, + { + "epoch": 0.41, + "grad_norm": 9.462044002428536, + "learning_rate": 1.8447775393711283e-05, + "loss": 1.4113, + "step": 33987 + }, + { + "epoch": 0.41, + "grad_norm": 4.650261622637633, + "learning_rate": 1.844746277531264e-05, + "loss": 1.4512, + "step": 33990 + }, + { + "epoch": 0.41, + "grad_norm": 18.302258224821117, + "learning_rate": 1.8447150128085844e-05, + "loss": 1.2152, + "step": 33993 + }, + { + "epoch": 0.41, + "grad_norm": 9.685965979402317, + "learning_rate": 1.844683745203197e-05, + "loss": 1.4798, + "step": 33996 + }, + { + "epoch": 0.41, + "grad_norm": 12.797068916063346, + "learning_rate": 1.8446524747152082e-05, + "loss": 1.2135, + "step": 33999 + }, + { + "epoch": 0.41, + "grad_norm": 17.545584806005316, + "learning_rate": 1.8446212013447247e-05, + "loss": 1.5977, + "step": 34002 + }, + { + "epoch": 0.41, + "grad_norm": 20.54754638998237, + "learning_rate": 1.8445899250918535e-05, + "loss": 2.0057, + "step": 34005 + }, + { + "epoch": 0.41, + "grad_norm": 33.742193130767454, + "learning_rate": 1.8445586459567006e-05, + "loss": 1.7244, + "step": 34008 + }, + { + "epoch": 0.41, + "grad_norm": 66.63378790722035, + "learning_rate": 1.8445273639393737e-05, + "loss": 1.5845, + "step": 34011 + }, + { + "epoch": 0.41, + "grad_norm": 81.04824156626542, + "learning_rate": 1.8444960790399787e-05, + "loss": 1.2581, + "step": 34014 + }, + { + "epoch": 0.41, + "grad_norm": 20.25535493959146, + "learning_rate": 1.844464791258623e-05, + "loss": 1.4702, + "step": 34017 + }, + { + "epoch": 0.41, + "grad_norm": 9.368247873100879, + "learning_rate": 1.844433500595413e-05, + "loss": 1.6868, + "step": 34020 + }, + { + "epoch": 0.41, + "grad_norm": 6.662197365943879, + "learning_rate": 1.8444022070504558e-05, + "loss": 1.2129, + "step": 34023 + }, + { + "epoch": 0.41, + "grad_norm": 10.592822260695778, + "learning_rate": 1.8443709106238585e-05, + "loss": 1.8506, + "step": 34026 + }, + { + "epoch": 0.41, + "grad_norm": 35.38640272390192, + "learning_rate": 1.8443396113157265e-05, + "loss": 1.1991, + "step": 34029 + }, + { + "epoch": 0.41, + "grad_norm": 45.57878859784654, + "learning_rate": 1.844308309126168e-05, + "loss": 1.8084, + "step": 34032 + }, + { + "epoch": 0.41, + "grad_norm": 52.34437466269068, + "learning_rate": 1.8442770040552892e-05, + "loss": 1.3031, + "step": 34035 + }, + { + "epoch": 0.41, + "grad_norm": 14.097966433648395, + "learning_rate": 1.8442456961031972e-05, + "loss": 1.9064, + "step": 34038 + }, + { + "epoch": 0.41, + "grad_norm": 9.303702766427175, + "learning_rate": 1.8442143852699986e-05, + "loss": 1.3361, + "step": 34041 + }, + { + "epoch": 0.41, + "grad_norm": 13.280926627789917, + "learning_rate": 1.8441830715558007e-05, + "loss": 1.3098, + "step": 34044 + }, + { + "epoch": 0.41, + "grad_norm": 11.451438257669997, + "learning_rate": 1.84415175496071e-05, + "loss": 1.606, + "step": 34047 + }, + { + "epoch": 0.41, + "grad_norm": 15.002412662873889, + "learning_rate": 1.8441204354848332e-05, + "loss": 1.4357, + "step": 34050 + }, + { + "epoch": 0.41, + "grad_norm": 12.911014101033427, + "learning_rate": 1.8440891131282775e-05, + "loss": 1.7481, + "step": 34053 + }, + { + "epoch": 0.41, + "grad_norm": 22.49536198311995, + "learning_rate": 1.8440577878911498e-05, + "loss": 1.4576, + "step": 34056 + }, + { + "epoch": 0.41, + "grad_norm": 11.829896689743512, + "learning_rate": 1.8440264597735567e-05, + "loss": 1.4958, + "step": 34059 + }, + { + "epoch": 0.41, + "grad_norm": 5.070174952486236, + "learning_rate": 1.8439951287756054e-05, + "loss": 1.3719, + "step": 34062 + }, + { + "epoch": 0.41, + "grad_norm": 55.873258372227724, + "learning_rate": 1.8439637948974027e-05, + "loss": 1.671, + "step": 34065 + }, + { + "epoch": 0.41, + "grad_norm": 11.813503760294408, + "learning_rate": 1.8439324581390557e-05, + "loss": 1.1929, + "step": 34068 + }, + { + "epoch": 0.41, + "grad_norm": 21.07385780004605, + "learning_rate": 1.843901118500671e-05, + "loss": 1.5053, + "step": 34071 + }, + { + "epoch": 0.41, + "grad_norm": 35.09689667742679, + "learning_rate": 1.843869775982356e-05, + "loss": 1.7349, + "step": 34074 + }, + { + "epoch": 0.41, + "grad_norm": 14.122638437102074, + "learning_rate": 1.8438384305842173e-05, + "loss": 1.3969, + "step": 34077 + }, + { + "epoch": 0.41, + "grad_norm": 18.84112313987882, + "learning_rate": 1.843807082306362e-05, + "loss": 1.9667, + "step": 34080 + }, + { + "epoch": 0.41, + "grad_norm": 16.364184582010424, + "learning_rate": 1.8437757311488967e-05, + "loss": 1.4659, + "step": 34083 + }, + { + "epoch": 0.41, + "grad_norm": 27.43500388629133, + "learning_rate": 1.843744377111929e-05, + "loss": 1.5184, + "step": 34086 + }, + { + "epoch": 0.41, + "grad_norm": 12.110823130873671, + "learning_rate": 1.843713020195566e-05, + "loss": 1.7178, + "step": 34089 + }, + { + "epoch": 0.41, + "grad_norm": 14.057292425504192, + "learning_rate": 1.843681660399914e-05, + "loss": 1.5105, + "step": 34092 + }, + { + "epoch": 0.41, + "grad_norm": 10.176270672437033, + "learning_rate": 1.8436502977250806e-05, + "loss": 1.5482, + "step": 34095 + }, + { + "epoch": 0.41, + "grad_norm": 29.57876522859395, + "learning_rate": 1.843618932171172e-05, + "loss": 1.4343, + "step": 34098 + }, + { + "epoch": 0.41, + "grad_norm": 7.301546973409349, + "learning_rate": 1.8435875637382964e-05, + "loss": 1.3739, + "step": 34101 + }, + { + "epoch": 0.41, + "grad_norm": 36.11319876625164, + "learning_rate": 1.8435561924265605e-05, + "loss": 1.1281, + "step": 34104 + }, + { + "epoch": 0.41, + "grad_norm": 19.652032196199475, + "learning_rate": 1.843524818236071e-05, + "loss": 1.1569, + "step": 34107 + }, + { + "epoch": 0.41, + "grad_norm": 34.43866248835873, + "learning_rate": 1.843493441166935e-05, + "loss": 1.6483, + "step": 34110 + }, + { + "epoch": 0.41, + "grad_norm": 12.167359405935429, + "learning_rate": 1.8434620612192598e-05, + "loss": 1.5757, + "step": 34113 + }, + { + "epoch": 0.41, + "grad_norm": 30.325812826249262, + "learning_rate": 1.8434306783931524e-05, + "loss": 1.3768, + "step": 34116 + }, + { + "epoch": 0.41, + "grad_norm": 13.694860789986329, + "learning_rate": 1.84339929268872e-05, + "loss": 1.3998, + "step": 34119 + }, + { + "epoch": 0.41, + "grad_norm": 7.687429698491142, + "learning_rate": 1.8433679041060693e-05, + "loss": 1.485, + "step": 34122 + }, + { + "epoch": 0.41, + "grad_norm": 21.34367986629334, + "learning_rate": 1.8433365126453077e-05, + "loss": 1.4465, + "step": 34125 + }, + { + "epoch": 0.41, + "grad_norm": 29.972391672133156, + "learning_rate": 1.8433051183065423e-05, + "loss": 1.2882, + "step": 34128 + }, + { + "epoch": 0.41, + "grad_norm": 24.279550264339466, + "learning_rate": 1.843273721089881e-05, + "loss": 1.3741, + "step": 34131 + }, + { + "epoch": 0.41, + "grad_norm": 32.67067094152491, + "learning_rate": 1.8432423209954297e-05, + "loss": 1.4987, + "step": 34134 + }, + { + "epoch": 0.41, + "grad_norm": 23.14722661801625, + "learning_rate": 1.8432109180232962e-05, + "loss": 1.4217, + "step": 34137 + }, + { + "epoch": 0.41, + "grad_norm": 8.596057114082805, + "learning_rate": 1.8431795121735875e-05, + "loss": 1.6167, + "step": 34140 + }, + { + "epoch": 0.41, + "grad_norm": 2.873849728111305, + "learning_rate": 1.8431481034464106e-05, + "loss": 1.5561, + "step": 34143 + }, + { + "epoch": 0.41, + "grad_norm": 13.84396397083415, + "learning_rate": 1.8431166918418734e-05, + "loss": 1.3371, + "step": 34146 + }, + { + "epoch": 0.41, + "grad_norm": 60.743248960143404, + "learning_rate": 1.8430852773600824e-05, + "loss": 1.6938, + "step": 34149 + }, + { + "epoch": 0.41, + "grad_norm": 19.498108003224218, + "learning_rate": 1.8430538600011448e-05, + "loss": 1.5207, + "step": 34152 + }, + { + "epoch": 0.41, + "grad_norm": 8.50618102426003, + "learning_rate": 1.8430224397651683e-05, + "loss": 1.4445, + "step": 34155 + }, + { + "epoch": 0.41, + "grad_norm": 21.46630298938645, + "learning_rate": 1.8429910166522597e-05, + "loss": 1.9248, + "step": 34158 + }, + { + "epoch": 0.41, + "grad_norm": 4.610380955026248, + "learning_rate": 1.8429595906625266e-05, + "loss": 1.2879, + "step": 34161 + }, + { + "epoch": 0.41, + "grad_norm": 8.550580533837218, + "learning_rate": 1.8429281617960762e-05, + "loss": 1.3593, + "step": 34164 + }, + { + "epoch": 0.41, + "grad_norm": 59.187569549139226, + "learning_rate": 1.842896730053015e-05, + "loss": 1.413, + "step": 34167 + }, + { + "epoch": 0.41, + "grad_norm": 20.236674125582727, + "learning_rate": 1.8428652954334512e-05, + "loss": 1.3274, + "step": 34170 + }, + { + "epoch": 0.41, + "grad_norm": 48.62167293987383, + "learning_rate": 1.8428338579374922e-05, + "loss": 1.6529, + "step": 34173 + }, + { + "epoch": 0.41, + "grad_norm": 10.131260223763487, + "learning_rate": 1.8428024175652444e-05, + "loss": 1.3954, + "step": 34176 + }, + { + "epoch": 0.41, + "grad_norm": 4.749816159855194, + "learning_rate": 1.8427709743168156e-05, + "loss": 1.3846, + "step": 34179 + }, + { + "epoch": 0.41, + "grad_norm": 25.430309334739455, + "learning_rate": 1.8427395281923128e-05, + "loss": 1.6774, + "step": 34182 + }, + { + "epoch": 0.41, + "grad_norm": 10.763645922589806, + "learning_rate": 1.8427080791918438e-05, + "loss": 1.7372, + "step": 34185 + }, + { + "epoch": 0.41, + "grad_norm": 11.659290045624077, + "learning_rate": 1.8426766273155157e-05, + "loss": 1.2414, + "step": 34188 + }, + { + "epoch": 0.41, + "grad_norm": 8.501445375972647, + "learning_rate": 1.842645172563436e-05, + "loss": 1.8919, + "step": 34191 + }, + { + "epoch": 0.41, + "grad_norm": 19.006269024016294, + "learning_rate": 1.8426137149357114e-05, + "loss": 1.4338, + "step": 34194 + }, + { + "epoch": 0.41, + "grad_norm": 18.163610006156798, + "learning_rate": 1.8425822544324496e-05, + "loss": 1.5496, + "step": 34197 + }, + { + "epoch": 0.41, + "grad_norm": 57.17871954740736, + "learning_rate": 1.8425507910537583e-05, + "loss": 1.5676, + "step": 34200 + }, + { + "epoch": 0.41, + "grad_norm": 25.933776981447373, + "learning_rate": 1.842519324799745e-05, + "loss": 1.4681, + "step": 34203 + }, + { + "epoch": 0.41, + "grad_norm": 5.924236381773474, + "learning_rate": 1.842487855670516e-05, + "loss": 1.2858, + "step": 34206 + }, + { + "epoch": 0.41, + "grad_norm": 80.51663807182015, + "learning_rate": 1.8424563836661796e-05, + "loss": 1.0453, + "step": 34209 + }, + { + "epoch": 0.41, + "grad_norm": 67.19031241719492, + "learning_rate": 1.8424249087868433e-05, + "loss": 1.4189, + "step": 34212 + }, + { + "epoch": 0.41, + "grad_norm": 4.81141523150861, + "learning_rate": 1.842393431032614e-05, + "loss": 1.1592, + "step": 34215 + }, + { + "epoch": 0.41, + "grad_norm": 3.5368667422557136, + "learning_rate": 1.8423619504035998e-05, + "loss": 1.5245, + "step": 34218 + }, + { + "epoch": 0.41, + "grad_norm": 11.909511018165212, + "learning_rate": 1.842330466899907e-05, + "loss": 1.291, + "step": 34221 + }, + { + "epoch": 0.41, + "grad_norm": 180.66248836535283, + "learning_rate": 1.842298980521644e-05, + "loss": 1.6978, + "step": 34224 + }, + { + "epoch": 0.41, + "grad_norm": 13.660210905843805, + "learning_rate": 1.8422674912689183e-05, + "loss": 1.5641, + "step": 34227 + }, + { + "epoch": 0.41, + "grad_norm": 5.179083641436155, + "learning_rate": 1.842235999141837e-05, + "loss": 1.5142, + "step": 34230 + }, + { + "epoch": 0.41, + "grad_norm": 20.923651228775622, + "learning_rate": 1.842204504140507e-05, + "loss": 1.37, + "step": 34233 + }, + { + "epoch": 0.41, + "grad_norm": 10.549951680990171, + "learning_rate": 1.842173006265037e-05, + "loss": 1.633, + "step": 34236 + }, + { + "epoch": 0.41, + "grad_norm": 12.338031306656635, + "learning_rate": 1.8421415055155334e-05, + "loss": 1.5145, + "step": 34239 + }, + { + "epoch": 0.41, + "grad_norm": 13.066629586059024, + "learning_rate": 1.8421100018921045e-05, + "loss": 1.6087, + "step": 34242 + }, + { + "epoch": 0.41, + "grad_norm": 35.321649945854105, + "learning_rate": 1.8420784953948578e-05, + "loss": 1.5992, + "step": 34245 + }, + { + "epoch": 0.41, + "grad_norm": 18.97364736459053, + "learning_rate": 1.8420469860239e-05, + "loss": 1.542, + "step": 34248 + }, + { + "epoch": 0.41, + "grad_norm": 30.17157634841879, + "learning_rate": 1.842015473779339e-05, + "loss": 1.7443, + "step": 34251 + }, + { + "epoch": 0.41, + "grad_norm": 17.161063045529538, + "learning_rate": 1.841983958661283e-05, + "loss": 1.7145, + "step": 34254 + }, + { + "epoch": 0.41, + "grad_norm": 20.304294791290307, + "learning_rate": 1.8419524406698385e-05, + "loss": 1.8672, + "step": 34257 + }, + { + "epoch": 0.41, + "grad_norm": 21.3878810078976, + "learning_rate": 1.841920919805114e-05, + "loss": 1.4147, + "step": 34260 + }, + { + "epoch": 0.41, + "grad_norm": 26.398063330941678, + "learning_rate": 1.8418893960672166e-05, + "loss": 1.2452, + "step": 34263 + }, + { + "epoch": 0.41, + "grad_norm": 6.186089874161226, + "learning_rate": 1.8418578694562538e-05, + "loss": 1.0634, + "step": 34266 + }, + { + "epoch": 0.41, + "grad_norm": 8.974262006520968, + "learning_rate": 1.8418263399723333e-05, + "loss": 1.4731, + "step": 34269 + }, + { + "epoch": 0.41, + "grad_norm": 9.936152597744519, + "learning_rate": 1.8417948076155628e-05, + "loss": 1.6374, + "step": 34272 + }, + { + "epoch": 0.41, + "grad_norm": 21.446045235902446, + "learning_rate": 1.8417632723860498e-05, + "loss": 1.4147, + "step": 34275 + }, + { + "epoch": 0.41, + "grad_norm": 4.761114344322593, + "learning_rate": 1.8417317342839022e-05, + "loss": 1.4507, + "step": 34278 + }, + { + "epoch": 0.41, + "grad_norm": 16.1315750810644, + "learning_rate": 1.841700193309227e-05, + "loss": 1.7199, + "step": 34281 + }, + { + "epoch": 0.41, + "grad_norm": 36.573183504370405, + "learning_rate": 1.8416686494621326e-05, + "loss": 1.2707, + "step": 34284 + }, + { + "epoch": 0.41, + "grad_norm": 10.01139307321085, + "learning_rate": 1.841637102742726e-05, + "loss": 1.278, + "step": 34287 + }, + { + "epoch": 0.41, + "grad_norm": 25.549671180067097, + "learning_rate": 1.8416055531511153e-05, + "loss": 1.1969, + "step": 34290 + }, + { + "epoch": 0.41, + "grad_norm": 9.967431175293326, + "learning_rate": 1.8415740006874076e-05, + "loss": 1.2739, + "step": 34293 + }, + { + "epoch": 0.41, + "grad_norm": 44.64605917874371, + "learning_rate": 1.8415424453517116e-05, + "loss": 1.6554, + "step": 34296 + }, + { + "epoch": 0.41, + "grad_norm": 8.066294883819321, + "learning_rate": 1.8415108871441338e-05, + "loss": 1.6322, + "step": 34299 + }, + { + "epoch": 0.41, + "grad_norm": 32.149114754728934, + "learning_rate": 1.8414793260647826e-05, + "loss": 1.6912, + "step": 34302 + }, + { + "epoch": 0.41, + "grad_norm": 6.630063359475632, + "learning_rate": 1.8414477621137658e-05, + "loss": 1.5588, + "step": 34305 + }, + { + "epoch": 0.41, + "grad_norm": 45.27224299514664, + "learning_rate": 1.8414161952911907e-05, + "loss": 1.4578, + "step": 34308 + }, + { + "epoch": 0.41, + "grad_norm": 13.27279150414766, + "learning_rate": 1.841384625597165e-05, + "loss": 1.0946, + "step": 34311 + }, + { + "epoch": 0.41, + "grad_norm": 19.920034330682793, + "learning_rate": 1.841353053031797e-05, + "loss": 1.2074, + "step": 34314 + }, + { + "epoch": 0.41, + "grad_norm": 21.953222607037407, + "learning_rate": 1.8413214775951938e-05, + "loss": 1.1142, + "step": 34317 + }, + { + "epoch": 0.41, + "grad_norm": 10.785882455807558, + "learning_rate": 1.841289899287464e-05, + "loss": 1.6394, + "step": 34320 + }, + { + "epoch": 0.41, + "grad_norm": 7.349846318537029, + "learning_rate": 1.841258318108714e-05, + "loss": 1.6046, + "step": 34323 + }, + { + "epoch": 0.41, + "grad_norm": 9.97675859379186, + "learning_rate": 1.8412267340590528e-05, + "loss": 1.783, + "step": 34326 + }, + { + "epoch": 0.41, + "grad_norm": 10.274502404301224, + "learning_rate": 1.8411951471385875e-05, + "loss": 1.5813, + "step": 34329 + }, + { + "epoch": 0.41, + "grad_norm": 5.23239238403993, + "learning_rate": 1.8411635573474266e-05, + "loss": 1.2501, + "step": 34332 + }, + { + "epoch": 0.41, + "grad_norm": 46.60996387130728, + "learning_rate": 1.8411319646856774e-05, + "loss": 1.6276, + "step": 34335 + }, + { + "epoch": 0.41, + "grad_norm": 32.93231225631586, + "learning_rate": 1.8411003691534475e-05, + "loss": 1.6871, + "step": 34338 + }, + { + "epoch": 0.41, + "grad_norm": 10.972112662097116, + "learning_rate": 1.841068770750845e-05, + "loss": 1.1578, + "step": 34341 + }, + { + "epoch": 0.41, + "grad_norm": 3.067239611796263, + "learning_rate": 1.841037169477978e-05, + "loss": 1.7752, + "step": 34344 + }, + { + "epoch": 0.41, + "grad_norm": 17.35087989510772, + "learning_rate": 1.8410055653349542e-05, + "loss": 1.7046, + "step": 34347 + }, + { + "epoch": 0.41, + "grad_norm": 9.32856093383354, + "learning_rate": 1.8409739583218807e-05, + "loss": 1.2333, + "step": 34350 + }, + { + "epoch": 0.41, + "grad_norm": 13.203671350894943, + "learning_rate": 1.8409423484388666e-05, + "loss": 1.2859, + "step": 34353 + }, + { + "epoch": 0.41, + "grad_norm": 3.067688996445535, + "learning_rate": 1.840910735686019e-05, + "loss": 1.6791, + "step": 34356 + }, + { + "epoch": 0.41, + "grad_norm": 29.62193403108938, + "learning_rate": 1.8408791200634456e-05, + "loss": 1.6145, + "step": 34359 + }, + { + "epoch": 0.41, + "grad_norm": 209.21550241601116, + "learning_rate": 1.840847501571255e-05, + "loss": 1.2274, + "step": 34362 + }, + { + "epoch": 0.41, + "grad_norm": 58.687362477901864, + "learning_rate": 1.840815880209555e-05, + "loss": 1.1639, + "step": 34365 + }, + { + "epoch": 0.41, + "grad_norm": 8.331430844719245, + "learning_rate": 1.8407842559784527e-05, + "loss": 1.5031, + "step": 34368 + }, + { + "epoch": 0.41, + "grad_norm": 20.624313976206277, + "learning_rate": 1.840752628878057e-05, + "loss": 1.7597, + "step": 34371 + }, + { + "epoch": 0.41, + "grad_norm": 6.146061304907588, + "learning_rate": 1.840720998908475e-05, + "loss": 1.259, + "step": 34374 + }, + { + "epoch": 0.41, + "grad_norm": 28.857012220657538, + "learning_rate": 1.8406893660698156e-05, + "loss": 1.6103, + "step": 34377 + }, + { + "epoch": 0.41, + "grad_norm": 43.898772465083255, + "learning_rate": 1.8406577303621858e-05, + "loss": 1.6987, + "step": 34380 + }, + { + "epoch": 0.41, + "grad_norm": 16.20598085036537, + "learning_rate": 1.840626091785694e-05, + "loss": 1.6205, + "step": 34383 + }, + { + "epoch": 0.41, + "grad_norm": 3.6143408473831555, + "learning_rate": 1.8405944503404484e-05, + "loss": 1.359, + "step": 34386 + }, + { + "epoch": 0.41, + "grad_norm": 6.64355456795734, + "learning_rate": 1.8405628060265566e-05, + "loss": 1.4447, + "step": 34389 + }, + { + "epoch": 0.41, + "grad_norm": 11.074008652916435, + "learning_rate": 1.8405311588441264e-05, + "loss": 1.2528, + "step": 34392 + }, + { + "epoch": 0.41, + "grad_norm": 3.934152713668062, + "learning_rate": 1.8404995087932666e-05, + "loss": 1.4491, + "step": 34395 + }, + { + "epoch": 0.41, + "grad_norm": 19.789512003826673, + "learning_rate": 1.8404678558740845e-05, + "loss": 1.274, + "step": 34398 + }, + { + "epoch": 0.41, + "grad_norm": 11.735970299212976, + "learning_rate": 1.8404362000866883e-05, + "loss": 1.3397, + "step": 34401 + }, + { + "epoch": 0.41, + "grad_norm": 13.288443857837581, + "learning_rate": 1.8404045414311863e-05, + "loss": 1.58, + "step": 34404 + }, + { + "epoch": 0.41, + "grad_norm": 11.130736874005102, + "learning_rate": 1.840372879907686e-05, + "loss": 1.662, + "step": 34407 + }, + { + "epoch": 0.41, + "grad_norm": 24.608654323923552, + "learning_rate": 1.8403412155162958e-05, + "loss": 1.296, + "step": 34410 + }, + { + "epoch": 0.41, + "grad_norm": 6.316713521659325, + "learning_rate": 1.8403095482571238e-05, + "loss": 1.532, + "step": 34413 + }, + { + "epoch": 0.41, + "grad_norm": 12.98682259678052, + "learning_rate": 1.840277878130278e-05, + "loss": 1.6294, + "step": 34416 + }, + { + "epoch": 0.41, + "grad_norm": 20.179041142726014, + "learning_rate": 1.8402462051358665e-05, + "loss": 1.6382, + "step": 34419 + }, + { + "epoch": 0.41, + "grad_norm": 3.7062275966024485, + "learning_rate": 1.840214529273997e-05, + "loss": 1.4801, + "step": 34422 + }, + { + "epoch": 0.41, + "grad_norm": 10.058080107316153, + "learning_rate": 1.8401828505447786e-05, + "loss": 1.5033, + "step": 34425 + }, + { + "epoch": 0.41, + "grad_norm": 4.216530189869718, + "learning_rate": 1.8401511689483186e-05, + "loss": 1.5406, + "step": 34428 + }, + { + "epoch": 0.41, + "grad_norm": 4.891880753466431, + "learning_rate": 1.840119484484725e-05, + "loss": 1.3011, + "step": 34431 + }, + { + "epoch": 0.41, + "grad_norm": 24.603149853690397, + "learning_rate": 1.8400877971541064e-05, + "loss": 1.5412, + "step": 34434 + }, + { + "epoch": 0.41, + "grad_norm": 5.483454152122104, + "learning_rate": 1.8400561069565706e-05, + "loss": 1.0851, + "step": 34437 + }, + { + "epoch": 0.41, + "grad_norm": 36.215425670466594, + "learning_rate": 1.840024413892226e-05, + "loss": 1.8162, + "step": 34440 + }, + { + "epoch": 0.41, + "grad_norm": 13.069588001251224, + "learning_rate": 1.8399927179611805e-05, + "loss": 1.2795, + "step": 34443 + }, + { + "epoch": 0.41, + "grad_norm": 24.999984939052386, + "learning_rate": 1.8399610191635427e-05, + "loss": 1.3159, + "step": 34446 + }, + { + "epoch": 0.41, + "grad_norm": 6.796389065645988, + "learning_rate": 1.8399293174994204e-05, + "loss": 1.5821, + "step": 34449 + }, + { + "epoch": 0.41, + "grad_norm": 11.484687161703084, + "learning_rate": 1.8398976129689217e-05, + "loss": 1.3114, + "step": 34452 + }, + { + "epoch": 0.41, + "grad_norm": 6.80824661171119, + "learning_rate": 1.839865905572155e-05, + "loss": 1.3774, + "step": 34455 + }, + { + "epoch": 0.41, + "grad_norm": 15.273434893313901, + "learning_rate": 1.8398341953092286e-05, + "loss": 1.5662, + "step": 34458 + }, + { + "epoch": 0.41, + "grad_norm": 15.871635374710282, + "learning_rate": 1.8398024821802507e-05, + "loss": 1.513, + "step": 34461 + }, + { + "epoch": 0.41, + "grad_norm": 6.089046336381549, + "learning_rate": 1.8397707661853294e-05, + "loss": 1.6062, + "step": 34464 + }, + { + "epoch": 0.41, + "grad_norm": 17.042013440332322, + "learning_rate": 1.8397390473245728e-05, + "loss": 1.1904, + "step": 34467 + }, + { + "epoch": 0.41, + "grad_norm": 32.06303232841399, + "learning_rate": 1.8397073255980892e-05, + "loss": 1.4698, + "step": 34470 + }, + { + "epoch": 0.41, + "grad_norm": 52.07478048881856, + "learning_rate": 1.839675601005987e-05, + "loss": 1.6283, + "step": 34473 + }, + { + "epoch": 0.41, + "grad_norm": 78.18217014119305, + "learning_rate": 1.839643873548375e-05, + "loss": 1.7765, + "step": 34476 + }, + { + "epoch": 0.41, + "grad_norm": 9.872645002670586, + "learning_rate": 1.83961214322536e-05, + "loss": 1.3586, + "step": 34479 + }, + { + "epoch": 0.41, + "grad_norm": 22.718804685678997, + "learning_rate": 1.839580410037052e-05, + "loss": 1.2556, + "step": 34482 + }, + { + "epoch": 0.41, + "grad_norm": 6.796913597092582, + "learning_rate": 1.839548673983558e-05, + "loss": 1.3399, + "step": 34485 + }, + { + "epoch": 0.41, + "grad_norm": 10.708018606900811, + "learning_rate": 1.839516935064987e-05, + "loss": 1.2727, + "step": 34488 + }, + { + "epoch": 0.41, + "grad_norm": 22.777831055977302, + "learning_rate": 1.839485193281447e-05, + "loss": 1.5481, + "step": 34491 + }, + { + "epoch": 0.41, + "grad_norm": 11.28273605207524, + "learning_rate": 1.8394534486330463e-05, + "loss": 1.3636, + "step": 34494 + }, + { + "epoch": 0.41, + "grad_norm": 11.79819071592007, + "learning_rate": 1.8394217011198935e-05, + "loss": 1.8368, + "step": 34497 + }, + { + "epoch": 0.41, + "grad_norm": 68.14412454300044, + "learning_rate": 1.8393899507420966e-05, + "loss": 1.4525, + "step": 34500 + }, + { + "epoch": 0.41, + "grad_norm": 20.44884755791713, + "learning_rate": 1.8393581974997644e-05, + "loss": 1.0895, + "step": 34503 + }, + { + "epoch": 0.41, + "grad_norm": 34.20788996454566, + "learning_rate": 1.8393264413930047e-05, + "loss": 1.5652, + "step": 34506 + }, + { + "epoch": 0.41, + "grad_norm": 15.923531100272173, + "learning_rate": 1.8392946824219264e-05, + "loss": 1.0623, + "step": 34509 + }, + { + "epoch": 0.41, + "grad_norm": 13.265556064739872, + "learning_rate": 1.8392629205866377e-05, + "loss": 0.9614, + "step": 34512 + }, + { + "epoch": 0.42, + "grad_norm": 2.8069142005875234, + "learning_rate": 1.8392311558872468e-05, + "loss": 1.4598, + "step": 34515 + }, + { + "epoch": 0.42, + "grad_norm": 6.29590542364414, + "learning_rate": 1.8391993883238622e-05, + "loss": 1.5417, + "step": 34518 + }, + { + "epoch": 0.42, + "grad_norm": 18.67311618320096, + "learning_rate": 1.839167617896593e-05, + "loss": 1.4506, + "step": 34521 + }, + { + "epoch": 0.42, + "grad_norm": 28.55652666753377, + "learning_rate": 1.839135844605546e-05, + "loss": 1.6391, + "step": 34524 + }, + { + "epoch": 0.42, + "grad_norm": 26.135357165891758, + "learning_rate": 1.839104068450831e-05, + "loss": 1.4871, + "step": 34527 + }, + { + "epoch": 0.42, + "grad_norm": 8.115772519897936, + "learning_rate": 1.8390722894325565e-05, + "loss": 1.6138, + "step": 34530 + }, + { + "epoch": 0.42, + "grad_norm": 12.882998878808587, + "learning_rate": 1.83904050755083e-05, + "loss": 1.5305, + "step": 34533 + }, + { + "epoch": 0.42, + "grad_norm": 14.13601402692866, + "learning_rate": 1.8390087228057607e-05, + "loss": 1.6854, + "step": 34536 + }, + { + "epoch": 0.42, + "grad_norm": 9.579646349270751, + "learning_rate": 1.838976935197457e-05, + "loss": 1.5427, + "step": 34539 + }, + { + "epoch": 0.42, + "grad_norm": 11.10081935926546, + "learning_rate": 1.8389451447260267e-05, + "loss": 1.3885, + "step": 34542 + }, + { + "epoch": 0.42, + "grad_norm": 10.256170495009197, + "learning_rate": 1.8389133513915794e-05, + "loss": 1.5692, + "step": 34545 + }, + { + "epoch": 0.42, + "grad_norm": 22.557163035068577, + "learning_rate": 1.8388815551942224e-05, + "loss": 1.5037, + "step": 34548 + }, + { + "epoch": 0.42, + "grad_norm": 8.309462592661061, + "learning_rate": 1.8388497561340654e-05, + "loss": 1.5619, + "step": 34551 + }, + { + "epoch": 0.42, + "grad_norm": 9.235641284921467, + "learning_rate": 1.8388179542112162e-05, + "loss": 1.7986, + "step": 34554 + }, + { + "epoch": 0.42, + "grad_norm": 10.030745737821992, + "learning_rate": 1.838786149425783e-05, + "loss": 1.5787, + "step": 34557 + }, + { + "epoch": 0.42, + "grad_norm": 53.92369141767386, + "learning_rate": 1.838754341777875e-05, + "loss": 1.2072, + "step": 34560 + }, + { + "epoch": 0.42, + "grad_norm": 15.419551429709278, + "learning_rate": 1.838722531267601e-05, + "loss": 1.612, + "step": 34563 + }, + { + "epoch": 0.42, + "grad_norm": 24.15720902214596, + "learning_rate": 1.8386907178950687e-05, + "loss": 1.3912, + "step": 34566 + }, + { + "epoch": 0.42, + "grad_norm": 27.94570354889836, + "learning_rate": 1.838658901660387e-05, + "loss": 1.5393, + "step": 34569 + }, + { + "epoch": 0.42, + "grad_norm": 17.51449318441389, + "learning_rate": 1.8386270825636646e-05, + "loss": 1.5623, + "step": 34572 + }, + { + "epoch": 0.42, + "grad_norm": 18.979264131943964, + "learning_rate": 1.8385952606050104e-05, + "loss": 1.3074, + "step": 34575 + }, + { + "epoch": 0.42, + "grad_norm": 7.231601675000566, + "learning_rate": 1.8385634357845323e-05, + "loss": 1.4777, + "step": 34578 + }, + { + "epoch": 0.42, + "grad_norm": 13.490830432404637, + "learning_rate": 1.8385316081023396e-05, + "loss": 1.4819, + "step": 34581 + }, + { + "epoch": 0.42, + "grad_norm": 15.156522035756998, + "learning_rate": 1.83849977755854e-05, + "loss": 1.2957, + "step": 34584 + }, + { + "epoch": 0.42, + "grad_norm": 13.653932083869593, + "learning_rate": 1.838467944153243e-05, + "loss": 1.2236, + "step": 34587 + }, + { + "epoch": 0.42, + "grad_norm": 12.237889615045273, + "learning_rate": 1.838436107886557e-05, + "loss": 1.4251, + "step": 34590 + }, + { + "epoch": 0.42, + "grad_norm": 18.150189428975345, + "learning_rate": 1.8384042687585904e-05, + "loss": 1.2878, + "step": 34593 + }, + { + "epoch": 0.42, + "grad_norm": 18.87624579369509, + "learning_rate": 1.838372426769452e-05, + "loss": 1.628, + "step": 34596 + }, + { + "epoch": 0.42, + "grad_norm": 14.224373274595331, + "learning_rate": 1.8383405819192507e-05, + "loss": 1.5293, + "step": 34599 + }, + { + "epoch": 0.42, + "grad_norm": 10.538370126786527, + "learning_rate": 1.838308734208095e-05, + "loss": 1.3205, + "step": 34602 + }, + { + "epoch": 0.42, + "grad_norm": 15.913515178742704, + "learning_rate": 1.8382768836360934e-05, + "loss": 1.4794, + "step": 34605 + }, + { + "epoch": 0.42, + "grad_norm": 20.13917321619424, + "learning_rate": 1.8382450302033547e-05, + "loss": 1.2868, + "step": 34608 + }, + { + "epoch": 0.42, + "grad_norm": 12.723932551631853, + "learning_rate": 1.8382131739099875e-05, + "loss": 1.3505, + "step": 34611 + }, + { + "epoch": 0.42, + "grad_norm": 9.623622965130243, + "learning_rate": 1.838181314756101e-05, + "loss": 1.0704, + "step": 34614 + }, + { + "epoch": 0.42, + "grad_norm": 7.957725035466259, + "learning_rate": 1.8381494527418035e-05, + "loss": 1.4115, + "step": 34617 + }, + { + "epoch": 0.42, + "grad_norm": 35.00293213171641, + "learning_rate": 1.8381175878672034e-05, + "loss": 1.7673, + "step": 34620 + }, + { + "epoch": 0.42, + "grad_norm": 11.203469151169411, + "learning_rate": 1.8380857201324103e-05, + "loss": 1.6336, + "step": 34623 + }, + { + "epoch": 0.42, + "grad_norm": 5.059619694231639, + "learning_rate": 1.8380538495375325e-05, + "loss": 1.8486, + "step": 34626 + }, + { + "epoch": 0.42, + "grad_norm": 17.893926139065204, + "learning_rate": 1.838021976082679e-05, + "loss": 1.4402, + "step": 34629 + }, + { + "epoch": 0.42, + "grad_norm": 10.537492185154973, + "learning_rate": 1.8379900997679576e-05, + "loss": 1.4273, + "step": 34632 + }, + { + "epoch": 0.42, + "grad_norm": 40.372442667493104, + "learning_rate": 1.8379582205934784e-05, + "loss": 1.6076, + "step": 34635 + }, + { + "epoch": 0.42, + "grad_norm": 6.51014118837717, + "learning_rate": 1.8379263385593495e-05, + "loss": 1.305, + "step": 34638 + }, + { + "epoch": 0.42, + "grad_norm": 22.58633532394914, + "learning_rate": 1.8378944536656797e-05, + "loss": 1.4415, + "step": 34641 + }, + { + "epoch": 0.42, + "grad_norm": 27.275698096046412, + "learning_rate": 1.837862565912578e-05, + "loss": 1.4631, + "step": 34644 + }, + { + "epoch": 0.42, + "grad_norm": 2.754906899767069, + "learning_rate": 1.8378306753001532e-05, + "loss": 1.3324, + "step": 34647 + }, + { + "epoch": 0.42, + "grad_norm": 7.571980508147991, + "learning_rate": 1.8377987818285144e-05, + "loss": 1.3224, + "step": 34650 + }, + { + "epoch": 0.42, + "grad_norm": 18.131634266758656, + "learning_rate": 1.8377668854977697e-05, + "loss": 1.5572, + "step": 34653 + }, + { + "epoch": 0.42, + "grad_norm": 16.319818452693813, + "learning_rate": 1.8377349863080286e-05, + "loss": 1.3161, + "step": 34656 + }, + { + "epoch": 0.42, + "grad_norm": 11.355256904005492, + "learning_rate": 1.8377030842593996e-05, + "loss": 1.2898, + "step": 34659 + }, + { + "epoch": 0.42, + "grad_norm": 9.820240669039773, + "learning_rate": 1.837671179351992e-05, + "loss": 1.6704, + "step": 34662 + }, + { + "epoch": 0.42, + "grad_norm": 20.955759237188577, + "learning_rate": 1.837639271585914e-05, + "loss": 1.403, + "step": 34665 + }, + { + "epoch": 0.42, + "grad_norm": 8.168352054715829, + "learning_rate": 1.837607360961275e-05, + "loss": 1.3833, + "step": 34668 + }, + { + "epoch": 0.42, + "grad_norm": 12.487013476891871, + "learning_rate": 1.8375754474781837e-05, + "loss": 1.0875, + "step": 34671 + }, + { + "epoch": 0.42, + "grad_norm": 7.588655702342651, + "learning_rate": 1.8375435311367495e-05, + "loss": 1.7149, + "step": 34674 + }, + { + "epoch": 0.42, + "grad_norm": 17.3045750695563, + "learning_rate": 1.8375116119370805e-05, + "loss": 1.6575, + "step": 34677 + }, + { + "epoch": 0.42, + "grad_norm": 14.718254439044722, + "learning_rate": 1.8374796898792862e-05, + "loss": 1.479, + "step": 34680 + }, + { + "epoch": 0.42, + "grad_norm": 25.536393257600565, + "learning_rate": 1.8374477649634753e-05, + "loss": 1.2207, + "step": 34683 + }, + { + "epoch": 0.42, + "grad_norm": 21.238992075958922, + "learning_rate": 1.8374158371897567e-05, + "loss": 1.5442, + "step": 34686 + }, + { + "epoch": 0.42, + "grad_norm": 3.382352017354792, + "learning_rate": 1.8373839065582397e-05, + "loss": 1.2287, + "step": 34689 + }, + { + "epoch": 0.42, + "grad_norm": 4.3807316501831375, + "learning_rate": 1.837351973069033e-05, + "loss": 1.3273, + "step": 34692 + }, + { + "epoch": 0.42, + "grad_norm": 7.426726610209989, + "learning_rate": 1.837320036722246e-05, + "loss": 1.527, + "step": 34695 + }, + { + "epoch": 0.42, + "grad_norm": 199.82809475956296, + "learning_rate": 1.8372880975179867e-05, + "loss": 1.3905, + "step": 34698 + }, + { + "epoch": 0.42, + "grad_norm": 25.046528614362533, + "learning_rate": 1.837256155456365e-05, + "loss": 1.8094, + "step": 34701 + }, + { + "epoch": 0.42, + "grad_norm": 25.257755415172724, + "learning_rate": 1.8372242105374898e-05, + "loss": 1.5276, + "step": 34704 + }, + { + "epoch": 0.42, + "grad_norm": 6.529609300683363, + "learning_rate": 1.8371922627614696e-05, + "loss": 1.2073, + "step": 34707 + }, + { + "epoch": 0.42, + "grad_norm": 22.198010451287995, + "learning_rate": 1.8371603121284137e-05, + "loss": 1.6533, + "step": 34710 + }, + { + "epoch": 0.42, + "grad_norm": 13.193273806089138, + "learning_rate": 1.8371283586384313e-05, + "loss": 1.3496, + "step": 34713 + }, + { + "epoch": 0.42, + "grad_norm": 9.434463876586095, + "learning_rate": 1.8370964022916316e-05, + "loss": 1.6794, + "step": 34716 + }, + { + "epoch": 0.42, + "grad_norm": 70.58669324139159, + "learning_rate": 1.8370644430881232e-05, + "loss": 1.4525, + "step": 34719 + }, + { + "epoch": 0.42, + "grad_norm": 7.028757788318194, + "learning_rate": 1.8370324810280155e-05, + "loss": 1.2393, + "step": 34722 + }, + { + "epoch": 0.42, + "grad_norm": 9.944488357909547, + "learning_rate": 1.8370005161114173e-05, + "loss": 1.6613, + "step": 34725 + }, + { + "epoch": 0.42, + "grad_norm": 14.778118718004707, + "learning_rate": 1.836968548338438e-05, + "loss": 1.3451, + "step": 34728 + }, + { + "epoch": 0.42, + "grad_norm": 15.316284401803763, + "learning_rate": 1.836936577709186e-05, + "loss": 1.6525, + "step": 34731 + }, + { + "epoch": 0.42, + "grad_norm": 12.81150454148608, + "learning_rate": 1.8369046042237714e-05, + "loss": 1.3524, + "step": 34734 + }, + { + "epoch": 0.42, + "grad_norm": 106.7438343521955, + "learning_rate": 1.836872627882303e-05, + "loss": 1.6986, + "step": 34737 + }, + { + "epoch": 0.42, + "grad_norm": 5.082110247542774, + "learning_rate": 1.8368406486848895e-05, + "loss": 1.6375, + "step": 34740 + }, + { + "epoch": 0.42, + "grad_norm": 12.143516713927996, + "learning_rate": 1.8368086666316404e-05, + "loss": 1.4007, + "step": 34743 + }, + { + "epoch": 0.42, + "grad_norm": 23.476225753220834, + "learning_rate": 1.8367766817226645e-05, + "loss": 1.364, + "step": 34746 + }, + { + "epoch": 0.42, + "grad_norm": 9.943018786840334, + "learning_rate": 1.836744693958071e-05, + "loss": 1.745, + "step": 34749 + }, + { + "epoch": 0.42, + "grad_norm": 5.486615109143623, + "learning_rate": 1.8367127033379698e-05, + "loss": 1.5471, + "step": 34752 + }, + { + "epoch": 0.42, + "grad_norm": 21.337176018110778, + "learning_rate": 1.8366807098624692e-05, + "loss": 1.6875, + "step": 34755 + }, + { + "epoch": 0.42, + "grad_norm": 26.821466121476085, + "learning_rate": 1.8366487135316792e-05, + "loss": 1.1604, + "step": 34758 + }, + { + "epoch": 0.42, + "grad_norm": 20.953081049390818, + "learning_rate": 1.8366167143457078e-05, + "loss": 1.501, + "step": 34761 + }, + { + "epoch": 0.42, + "grad_norm": 21.599637189403005, + "learning_rate": 1.8365847123046655e-05, + "loss": 1.5181, + "step": 34764 + }, + { + "epoch": 0.42, + "grad_norm": 8.50217268517509, + "learning_rate": 1.8365527074086604e-05, + "loss": 1.3476, + "step": 34767 + }, + { + "epoch": 0.42, + "grad_norm": 9.299312955071322, + "learning_rate": 1.8365206996578025e-05, + "loss": 1.7383, + "step": 34770 + }, + { + "epoch": 0.42, + "grad_norm": 31.666954870896344, + "learning_rate": 1.8364886890522007e-05, + "loss": 1.4466, + "step": 34773 + }, + { + "epoch": 0.42, + "grad_norm": 10.071032018719547, + "learning_rate": 1.8364566755919643e-05, + "loss": 1.5407, + "step": 34776 + }, + { + "epoch": 0.42, + "grad_norm": 17.971257719293654, + "learning_rate": 1.8364246592772026e-05, + "loss": 1.4967, + "step": 34779 + }, + { + "epoch": 0.42, + "grad_norm": 14.830652895945859, + "learning_rate": 1.8363926401080248e-05, + "loss": 1.4491, + "step": 34782 + }, + { + "epoch": 0.42, + "grad_norm": 26.715155757092436, + "learning_rate": 1.83636061808454e-05, + "loss": 1.674, + "step": 34785 + }, + { + "epoch": 0.42, + "grad_norm": 51.43385460459369, + "learning_rate": 1.836328593206858e-05, + "loss": 1.4203, + "step": 34788 + }, + { + "epoch": 0.42, + "grad_norm": 11.484635820716411, + "learning_rate": 1.8362965654750876e-05, + "loss": 1.2669, + "step": 34791 + }, + { + "epoch": 0.42, + "grad_norm": 15.21391657729516, + "learning_rate": 1.8362645348893385e-05, + "loss": 1.606, + "step": 34794 + }, + { + "epoch": 0.42, + "grad_norm": 5.628504260041747, + "learning_rate": 1.8362325014497196e-05, + "loss": 1.7428, + "step": 34797 + }, + { + "epoch": 0.42, + "grad_norm": 3.9019406072275578, + "learning_rate": 1.83620046515634e-05, + "loss": 1.1116, + "step": 34800 + }, + { + "epoch": 0.42, + "grad_norm": 17.677454443027262, + "learning_rate": 1.8361684260093098e-05, + "loss": 1.569, + "step": 34803 + }, + { + "epoch": 0.42, + "grad_norm": 15.043916468397825, + "learning_rate": 1.836136384008738e-05, + "loss": 1.5956, + "step": 34806 + }, + { + "epoch": 0.42, + "grad_norm": 30.68986145257196, + "learning_rate": 1.8361043391547336e-05, + "loss": 1.3551, + "step": 34809 + }, + { + "epoch": 0.42, + "grad_norm": 4.085773444447363, + "learning_rate": 1.8360722914474065e-05, + "loss": 1.4767, + "step": 34812 + }, + { + "epoch": 0.42, + "grad_norm": 61.00140027823477, + "learning_rate": 1.836040240886866e-05, + "loss": 1.257, + "step": 34815 + }, + { + "epoch": 0.42, + "grad_norm": 39.08964473908493, + "learning_rate": 1.8360081874732207e-05, + "loss": 1.3504, + "step": 34818 + }, + { + "epoch": 0.42, + "grad_norm": 38.977991591908655, + "learning_rate": 1.835976131206581e-05, + "loss": 1.5852, + "step": 34821 + }, + { + "epoch": 0.42, + "grad_norm": 17.816146115958922, + "learning_rate": 1.835944072087056e-05, + "loss": 1.343, + "step": 34824 + }, + { + "epoch": 0.42, + "grad_norm": 21.644024947350104, + "learning_rate": 1.8359120101147545e-05, + "loss": 1.0431, + "step": 34827 + }, + { + "epoch": 0.42, + "grad_norm": 39.90981028913262, + "learning_rate": 1.835879945289787e-05, + "loss": 1.3446, + "step": 34830 + }, + { + "epoch": 0.42, + "grad_norm": 9.525601288396418, + "learning_rate": 1.835847877612262e-05, + "loss": 1.5413, + "step": 34833 + }, + { + "epoch": 0.42, + "grad_norm": 9.397084241614188, + "learning_rate": 1.8358158070822893e-05, + "loss": 1.5212, + "step": 34836 + }, + { + "epoch": 0.42, + "grad_norm": 25.026027501382476, + "learning_rate": 1.8357837336999783e-05, + "loss": 1.3694, + "step": 34839 + }, + { + "epoch": 0.42, + "grad_norm": 22.615662563822866, + "learning_rate": 1.8357516574654384e-05, + "loss": 1.4366, + "step": 34842 + }, + { + "epoch": 0.42, + "grad_norm": 4.153460415942876, + "learning_rate": 1.835719578378779e-05, + "loss": 1.3883, + "step": 34845 + }, + { + "epoch": 0.42, + "grad_norm": 9.304741760308264, + "learning_rate": 1.83568749644011e-05, + "loss": 1.4927, + "step": 34848 + }, + { + "epoch": 0.42, + "grad_norm": 11.119404515071498, + "learning_rate": 1.8356554116495403e-05, + "loss": 1.2133, + "step": 34851 + }, + { + "epoch": 0.42, + "grad_norm": 8.653950549608007, + "learning_rate": 1.8356233240071803e-05, + "loss": 1.7596, + "step": 34854 + }, + { + "epoch": 0.42, + "grad_norm": 35.13072744303945, + "learning_rate": 1.8355912335131382e-05, + "loss": 1.7668, + "step": 34857 + }, + { + "epoch": 0.42, + "grad_norm": 6.604205175402465, + "learning_rate": 1.8355591401675244e-05, + "loss": 1.5422, + "step": 34860 + }, + { + "epoch": 0.42, + "grad_norm": 16.900143197418377, + "learning_rate": 1.8355270439704484e-05, + "loss": 1.4216, + "step": 34863 + }, + { + "epoch": 0.42, + "grad_norm": 4.800465035152178, + "learning_rate": 1.8354949449220193e-05, + "loss": 1.3094, + "step": 34866 + }, + { + "epoch": 0.42, + "grad_norm": 17.797421418108122, + "learning_rate": 1.8354628430223472e-05, + "loss": 1.4531, + "step": 34869 + }, + { + "epoch": 0.42, + "grad_norm": 7.466116275259414, + "learning_rate": 1.8354307382715412e-05, + "loss": 1.4124, + "step": 34872 + }, + { + "epoch": 0.42, + "grad_norm": 6.369604837111363, + "learning_rate": 1.835398630669711e-05, + "loss": 1.2886, + "step": 34875 + }, + { + "epoch": 0.42, + "grad_norm": 10.557766243093727, + "learning_rate": 1.8353665202169662e-05, + "loss": 1.5125, + "step": 34878 + }, + { + "epoch": 0.42, + "grad_norm": 15.57598357236147, + "learning_rate": 1.835334406913416e-05, + "loss": 1.5966, + "step": 34881 + }, + { + "epoch": 0.42, + "grad_norm": 14.31253446764619, + "learning_rate": 1.8353022907591712e-05, + "loss": 1.4469, + "step": 34884 + }, + { + "epoch": 0.42, + "grad_norm": 119.53461387911173, + "learning_rate": 1.8352701717543396e-05, + "loss": 1.4163, + "step": 34887 + }, + { + "epoch": 0.42, + "grad_norm": 8.520992446164767, + "learning_rate": 1.8352380498990324e-05, + "loss": 1.4112, + "step": 34890 + }, + { + "epoch": 0.42, + "grad_norm": 25.157479554094362, + "learning_rate": 1.8352059251933583e-05, + "loss": 1.3232, + "step": 34893 + }, + { + "epoch": 0.42, + "grad_norm": 9.363573870669041, + "learning_rate": 1.8351737976374276e-05, + "loss": 1.365, + "step": 34896 + }, + { + "epoch": 0.42, + "grad_norm": 35.06214823651352, + "learning_rate": 1.835141667231349e-05, + "loss": 1.2791, + "step": 34899 + }, + { + "epoch": 0.42, + "grad_norm": 9.400931321164403, + "learning_rate": 1.8351095339752333e-05, + "loss": 1.3691, + "step": 34902 + }, + { + "epoch": 0.42, + "grad_norm": 16.062390809206747, + "learning_rate": 1.835077397869189e-05, + "loss": 1.471, + "step": 34905 + }, + { + "epoch": 0.42, + "grad_norm": 5.111545243364953, + "learning_rate": 1.8350452589133267e-05, + "loss": 1.4363, + "step": 34908 + }, + { + "epoch": 0.42, + "grad_norm": 4.233453415685567, + "learning_rate": 1.8350131171077556e-05, + "loss": 1.2823, + "step": 34911 + }, + { + "epoch": 0.42, + "grad_norm": 15.802375484411668, + "learning_rate": 1.8349809724525853e-05, + "loss": 1.0924, + "step": 34914 + }, + { + "epoch": 0.42, + "grad_norm": 11.904419109150805, + "learning_rate": 1.8349488249479258e-05, + "loss": 1.414, + "step": 34917 + }, + { + "epoch": 0.42, + "grad_norm": 5.117122703242032, + "learning_rate": 1.8349166745938868e-05, + "loss": 1.5312, + "step": 34920 + }, + { + "epoch": 0.42, + "grad_norm": 113.09843351259063, + "learning_rate": 1.8348845213905783e-05, + "loss": 1.5776, + "step": 34923 + }, + { + "epoch": 0.42, + "grad_norm": 40.98876362811334, + "learning_rate": 1.834852365338109e-05, + "loss": 1.3767, + "step": 34926 + }, + { + "epoch": 0.42, + "grad_norm": 27.034392055479803, + "learning_rate": 1.83482020643659e-05, + "loss": 1.7528, + "step": 34929 + }, + { + "epoch": 0.42, + "grad_norm": 8.131799201119712, + "learning_rate": 1.8347880446861298e-05, + "loss": 1.3569, + "step": 34932 + }, + { + "epoch": 0.42, + "grad_norm": 6.511123715240114, + "learning_rate": 1.8347558800868387e-05, + "loss": 1.5699, + "step": 34935 + }, + { + "epoch": 0.42, + "grad_norm": 32.12268279450116, + "learning_rate": 1.8347237126388263e-05, + "loss": 1.3244, + "step": 34938 + }, + { + "epoch": 0.42, + "grad_norm": 6.431411265086686, + "learning_rate": 1.834691542342203e-05, + "loss": 1.1089, + "step": 34941 + }, + { + "epoch": 0.42, + "grad_norm": 12.178203198271955, + "learning_rate": 1.834659369197078e-05, + "loss": 1.5717, + "step": 34944 + }, + { + "epoch": 0.42, + "grad_norm": 28.504621768009773, + "learning_rate": 1.8346271932035612e-05, + "loss": 1.7107, + "step": 34947 + }, + { + "epoch": 0.42, + "grad_norm": 10.29567721535032, + "learning_rate": 1.834595014361762e-05, + "loss": 1.3419, + "step": 34950 + }, + { + "epoch": 0.42, + "grad_norm": 3.2707862617867685, + "learning_rate": 1.8345628326717914e-05, + "loss": 1.4508, + "step": 34953 + }, + { + "epoch": 0.42, + "grad_norm": 30.112370723172578, + "learning_rate": 1.834530648133758e-05, + "loss": 1.4339, + "step": 34956 + }, + { + "epoch": 0.42, + "grad_norm": 15.881269935106427, + "learning_rate": 1.834498460747772e-05, + "loss": 1.7033, + "step": 34959 + }, + { + "epoch": 0.42, + "grad_norm": 17.49013738137945, + "learning_rate": 1.8344662705139435e-05, + "loss": 1.698, + "step": 34962 + }, + { + "epoch": 0.42, + "grad_norm": 17.064793549292265, + "learning_rate": 1.834434077432382e-05, + "loss": 1.487, + "step": 34965 + }, + { + "epoch": 0.42, + "grad_norm": 41.05457955056923, + "learning_rate": 1.834401881503198e-05, + "loss": 1.51, + "step": 34968 + }, + { + "epoch": 0.42, + "grad_norm": 6.615762529714309, + "learning_rate": 1.8343696827265005e-05, + "loss": 1.4042, + "step": 34971 + }, + { + "epoch": 0.42, + "grad_norm": 19.772615016247272, + "learning_rate": 1.8343374811023998e-05, + "loss": 1.67, + "step": 34974 + }, + { + "epoch": 0.42, + "grad_norm": 10.40766615303405, + "learning_rate": 1.834305276631006e-05, + "loss": 1.3824, + "step": 34977 + }, + { + "epoch": 0.42, + "grad_norm": 28.16996261306008, + "learning_rate": 1.8342730693124288e-05, + "loss": 1.3483, + "step": 34980 + }, + { + "epoch": 0.42, + "grad_norm": 41.18829905588672, + "learning_rate": 1.8342408591467784e-05, + "loss": 1.2261, + "step": 34983 + }, + { + "epoch": 0.42, + "grad_norm": 15.203548816213953, + "learning_rate": 1.8342086461341638e-05, + "loss": 1.3166, + "step": 34986 + }, + { + "epoch": 0.42, + "grad_norm": 15.353342881486578, + "learning_rate": 1.834176430274696e-05, + "loss": 1.6001, + "step": 34989 + }, + { + "epoch": 0.42, + "grad_norm": 20.651044000490817, + "learning_rate": 1.8341442115684845e-05, + "loss": 1.285, + "step": 34992 + }, + { + "epoch": 0.42, + "grad_norm": 10.982400868457745, + "learning_rate": 1.834111990015639e-05, + "loss": 0.9938, + "step": 34995 + }, + { + "epoch": 0.42, + "grad_norm": 14.915710689731588, + "learning_rate": 1.83407976561627e-05, + "loss": 1.7408, + "step": 34998 + }, + { + "epoch": 0.42, + "grad_norm": 19.92872437786133, + "learning_rate": 1.834047538370487e-05, + "loss": 1.6231, + "step": 35001 + }, + { + "epoch": 0.42, + "grad_norm": 30.38221052736665, + "learning_rate": 1.8340153082784005e-05, + "loss": 1.5195, + "step": 35004 + }, + { + "epoch": 0.42, + "grad_norm": 4.477918196927825, + "learning_rate": 1.83398307534012e-05, + "loss": 1.2977, + "step": 35007 + }, + { + "epoch": 0.42, + "grad_norm": 10.157564272424596, + "learning_rate": 1.8339508395557554e-05, + "loss": 1.0316, + "step": 35010 + }, + { + "epoch": 0.42, + "grad_norm": 6.4211452190942255, + "learning_rate": 1.8339186009254174e-05, + "loss": 1.7713, + "step": 35013 + }, + { + "epoch": 0.42, + "grad_norm": 11.451308615574746, + "learning_rate": 1.833886359449215e-05, + "loss": 1.4931, + "step": 35016 + }, + { + "epoch": 0.42, + "grad_norm": 19.499090958151204, + "learning_rate": 1.8338541151272596e-05, + "loss": 1.6972, + "step": 35019 + }, + { + "epoch": 0.42, + "grad_norm": 18.898828947914502, + "learning_rate": 1.8338218679596596e-05, + "loss": 1.6011, + "step": 35022 + }, + { + "epoch": 0.42, + "grad_norm": 21.71652253445129, + "learning_rate": 1.8337896179465263e-05, + "loss": 1.9609, + "step": 35025 + }, + { + "epoch": 0.42, + "grad_norm": 11.549041665854963, + "learning_rate": 1.8337573650879695e-05, + "loss": 1.5421, + "step": 35028 + }, + { + "epoch": 0.42, + "grad_norm": 10.104582880232938, + "learning_rate": 1.8337251093840992e-05, + "loss": 1.257, + "step": 35031 + }, + { + "epoch": 0.42, + "grad_norm": 6.833039214764986, + "learning_rate": 1.8336928508350253e-05, + "loss": 1.3801, + "step": 35034 + }, + { + "epoch": 0.42, + "grad_norm": 7.171242769992981, + "learning_rate": 1.833660589440858e-05, + "loss": 1.5587, + "step": 35037 + }, + { + "epoch": 0.42, + "grad_norm": 12.996393712996708, + "learning_rate": 1.8336283252017072e-05, + "loss": 1.2605, + "step": 35040 + }, + { + "epoch": 0.42, + "grad_norm": 9.57762072733187, + "learning_rate": 1.8335960581176833e-05, + "loss": 1.3954, + "step": 35043 + }, + { + "epoch": 0.42, + "grad_norm": 8.241085753228766, + "learning_rate": 1.8335637881888962e-05, + "loss": 1.2626, + "step": 35046 + }, + { + "epoch": 0.42, + "grad_norm": 14.544846559287974, + "learning_rate": 1.8335315154154563e-05, + "loss": 1.8776, + "step": 35049 + }, + { + "epoch": 0.42, + "grad_norm": 9.087986705822932, + "learning_rate": 1.833499239797474e-05, + "loss": 1.2493, + "step": 35052 + }, + { + "epoch": 0.42, + "grad_norm": 4.979089120862472, + "learning_rate": 1.8334669613350585e-05, + "loss": 1.6264, + "step": 35055 + }, + { + "epoch": 0.42, + "grad_norm": 6.947119551427579, + "learning_rate": 1.8334346800283204e-05, + "loss": 1.5684, + "step": 35058 + }, + { + "epoch": 0.42, + "grad_norm": 16.978317611824313, + "learning_rate": 1.83340239587737e-05, + "loss": 1.093, + "step": 35061 + }, + { + "epoch": 0.42, + "grad_norm": 19.815514803067206, + "learning_rate": 1.8333701088823178e-05, + "loss": 1.4618, + "step": 35064 + }, + { + "epoch": 0.42, + "grad_norm": 9.052157632871468, + "learning_rate": 1.8333378190432732e-05, + "loss": 1.7804, + "step": 35067 + }, + { + "epoch": 0.42, + "grad_norm": 8.004894857062148, + "learning_rate": 1.8333055263603466e-05, + "loss": 1.2064, + "step": 35070 + }, + { + "epoch": 0.42, + "grad_norm": 8.94720254479794, + "learning_rate": 1.833273230833649e-05, + "loss": 1.2939, + "step": 35073 + }, + { + "epoch": 0.42, + "grad_norm": 30.198584428011152, + "learning_rate": 1.8332409324632897e-05, + "loss": 1.3619, + "step": 35076 + }, + { + "epoch": 0.42, + "grad_norm": 9.714866271401133, + "learning_rate": 1.833208631249379e-05, + "loss": 1.7106, + "step": 35079 + }, + { + "epoch": 0.42, + "grad_norm": 61.00848730551644, + "learning_rate": 1.8331763271920278e-05, + "loss": 1.6148, + "step": 35082 + }, + { + "epoch": 0.42, + "grad_norm": 5.241227514595019, + "learning_rate": 1.8331440202913456e-05, + "loss": 1.5372, + "step": 35085 + }, + { + "epoch": 0.42, + "grad_norm": 18.754444934350285, + "learning_rate": 1.833111710547443e-05, + "loss": 1.8603, + "step": 35088 + }, + { + "epoch": 0.42, + "grad_norm": 4.588205219976335, + "learning_rate": 1.8330793979604305e-05, + "loss": 1.3416, + "step": 35091 + }, + { + "epoch": 0.42, + "grad_norm": 74.68262231691362, + "learning_rate": 1.833047082530418e-05, + "loss": 1.836, + "step": 35094 + }, + { + "epoch": 0.42, + "grad_norm": 25.179210966879996, + "learning_rate": 1.8330147642575156e-05, + "loss": 1.5192, + "step": 35097 + }, + { + "epoch": 0.42, + "grad_norm": 12.421538538720693, + "learning_rate": 1.832982443141834e-05, + "loss": 1.454, + "step": 35100 + }, + { + "epoch": 0.42, + "grad_norm": 50.22647938564497, + "learning_rate": 1.832950119183483e-05, + "loss": 1.6414, + "step": 35103 + }, + { + "epoch": 0.42, + "grad_norm": 21.877849236758195, + "learning_rate": 1.8329177923825738e-05, + "loss": 1.5247, + "step": 35106 + }, + { + "epoch": 0.42, + "grad_norm": 24.093975621825766, + "learning_rate": 1.832885462739216e-05, + "loss": 1.5781, + "step": 35109 + }, + { + "epoch": 0.42, + "grad_norm": 42.802835104855724, + "learning_rate": 1.8328531302535202e-05, + "loss": 1.6458, + "step": 35112 + }, + { + "epoch": 0.42, + "grad_norm": 32.95831160193534, + "learning_rate": 1.8328207949255964e-05, + "loss": 1.6359, + "step": 35115 + }, + { + "epoch": 0.42, + "grad_norm": 8.301981725227506, + "learning_rate": 1.8327884567555555e-05, + "loss": 1.0309, + "step": 35118 + }, + { + "epoch": 0.42, + "grad_norm": 8.623383444455824, + "learning_rate": 1.8327561157435077e-05, + "loss": 1.2351, + "step": 35121 + }, + { + "epoch": 0.42, + "grad_norm": 187.61922465326097, + "learning_rate": 1.8327237718895627e-05, + "loss": 1.3384, + "step": 35124 + }, + { + "epoch": 0.42, + "grad_norm": 37.2377781936709, + "learning_rate": 1.832691425193832e-05, + "loss": 1.6599, + "step": 35127 + }, + { + "epoch": 0.42, + "grad_norm": 83.29827998245959, + "learning_rate": 1.832659075656425e-05, + "loss": 1.6241, + "step": 35130 + }, + { + "epoch": 0.42, + "grad_norm": 47.158538414644184, + "learning_rate": 1.8326267232774528e-05, + "loss": 1.7203, + "step": 35133 + }, + { + "epoch": 0.42, + "grad_norm": 41.57972827253438, + "learning_rate": 1.832594368057025e-05, + "loss": 1.2461, + "step": 35136 + }, + { + "epoch": 0.42, + "grad_norm": 22.393895923368806, + "learning_rate": 1.8325620099952528e-05, + "loss": 1.7668, + "step": 35139 + }, + { + "epoch": 0.42, + "grad_norm": 26.035051176909846, + "learning_rate": 1.8325296490922462e-05, + "loss": 1.8902, + "step": 35142 + }, + { + "epoch": 0.42, + "grad_norm": 60.6770591296322, + "learning_rate": 1.8324972853481163e-05, + "loss": 1.5661, + "step": 35145 + }, + { + "epoch": 0.42, + "grad_norm": 20.815551067490137, + "learning_rate": 1.8324649187629725e-05, + "loss": 1.5628, + "step": 35148 + }, + { + "epoch": 0.42, + "grad_norm": 19.988941128656464, + "learning_rate": 1.8324325493369263e-05, + "loss": 1.2947, + "step": 35151 + }, + { + "epoch": 0.42, + "grad_norm": 15.308302973151026, + "learning_rate": 1.832400177070087e-05, + "loss": 1.422, + "step": 35154 + }, + { + "epoch": 0.42, + "grad_norm": 163.79367590400213, + "learning_rate": 1.832367801962566e-05, + "loss": 1.1399, + "step": 35157 + }, + { + "epoch": 0.42, + "grad_norm": 20.222637612367834, + "learning_rate": 1.8323354240144736e-05, + "loss": 1.6651, + "step": 35160 + }, + { + "epoch": 0.42, + "grad_norm": 15.947256614386678, + "learning_rate": 1.8323030432259205e-05, + "loss": 1.6399, + "step": 35163 + }, + { + "epoch": 0.42, + "grad_norm": 15.960873301353207, + "learning_rate": 1.8322706595970162e-05, + "loss": 1.4016, + "step": 35166 + }, + { + "epoch": 0.42, + "grad_norm": 5.59206080613601, + "learning_rate": 1.8322382731278722e-05, + "loss": 1.418, + "step": 35169 + }, + { + "epoch": 0.42, + "grad_norm": 80.39517965873726, + "learning_rate": 1.832205883818599e-05, + "loss": 1.9482, + "step": 35172 + }, + { + "epoch": 0.42, + "grad_norm": 27.05227953667644, + "learning_rate": 1.8321734916693066e-05, + "loss": 1.2152, + "step": 35175 + }, + { + "epoch": 0.42, + "grad_norm": 6.720616360088842, + "learning_rate": 1.8321410966801063e-05, + "loss": 1.3043, + "step": 35178 + }, + { + "epoch": 0.42, + "grad_norm": 8.985519118292922, + "learning_rate": 1.8321086988511076e-05, + "loss": 1.6464, + "step": 35181 + }, + { + "epoch": 0.42, + "grad_norm": 12.45404175590588, + "learning_rate": 1.832076298182422e-05, + "loss": 1.5066, + "step": 35184 + }, + { + "epoch": 0.42, + "grad_norm": 3.5536492951593783, + "learning_rate": 1.8320438946741596e-05, + "loss": 2.0732, + "step": 35187 + }, + { + "epoch": 0.42, + "grad_norm": 8.644062430337858, + "learning_rate": 1.832011488326431e-05, + "loss": 1.5635, + "step": 35190 + }, + { + "epoch": 0.42, + "grad_norm": 44.391682298955054, + "learning_rate": 1.8319790791393467e-05, + "loss": 1.3454, + "step": 35193 + }, + { + "epoch": 0.42, + "grad_norm": 7.999497591591988, + "learning_rate": 1.8319466671130178e-05, + "loss": 1.3582, + "step": 35196 + }, + { + "epoch": 0.42, + "grad_norm": 6.934903554146578, + "learning_rate": 1.8319142522475543e-05, + "loss": 1.5645, + "step": 35199 + }, + { + "epoch": 0.42, + "grad_norm": 4.582258356903731, + "learning_rate": 1.8318818345430672e-05, + "loss": 1.5402, + "step": 35202 + }, + { + "epoch": 0.42, + "grad_norm": 24.238070554721638, + "learning_rate": 1.831849413999667e-05, + "loss": 1.6738, + "step": 35205 + }, + { + "epoch": 0.42, + "grad_norm": 22.837171531986257, + "learning_rate": 1.8318169906174646e-05, + "loss": 1.3696, + "step": 35208 + }, + { + "epoch": 0.42, + "grad_norm": 36.92707609269682, + "learning_rate": 1.8317845643965705e-05, + "loss": 1.2428, + "step": 35211 + }, + { + "epoch": 0.42, + "grad_norm": 9.644306989706404, + "learning_rate": 1.8317521353370954e-05, + "loss": 1.5805, + "step": 35214 + }, + { + "epoch": 0.42, + "grad_norm": 8.193630772317507, + "learning_rate": 1.8317197034391492e-05, + "loss": 1.2955, + "step": 35217 + }, + { + "epoch": 0.42, + "grad_norm": 11.316046570331423, + "learning_rate": 1.8316872687028438e-05, + "loss": 1.557, + "step": 35220 + }, + { + "epoch": 0.42, + "grad_norm": 14.48898299460839, + "learning_rate": 1.8316548311282895e-05, + "loss": 1.6292, + "step": 35223 + }, + { + "epoch": 0.42, + "grad_norm": 10.06630689588097, + "learning_rate": 1.831622390715596e-05, + "loss": 1.3249, + "step": 35226 + }, + { + "epoch": 0.42, + "grad_norm": 43.03734994479576, + "learning_rate": 1.8315899474648755e-05, + "loss": 1.5167, + "step": 35229 + }, + { + "epoch": 0.42, + "grad_norm": 18.179552878636716, + "learning_rate": 1.8315575013762377e-05, + "loss": 1.2336, + "step": 35232 + }, + { + "epoch": 0.42, + "grad_norm": 12.814628411848066, + "learning_rate": 1.831525052449794e-05, + "loss": 1.427, + "step": 35235 + }, + { + "epoch": 0.42, + "grad_norm": 15.666564645143078, + "learning_rate": 1.8314926006856546e-05, + "loss": 1.5055, + "step": 35238 + }, + { + "epoch": 0.42, + "grad_norm": 14.759153325037419, + "learning_rate": 1.8314601460839302e-05, + "loss": 1.5775, + "step": 35241 + }, + { + "epoch": 0.42, + "grad_norm": 5.475680179934038, + "learning_rate": 1.8314276886447324e-05, + "loss": 1.3691, + "step": 35244 + }, + { + "epoch": 0.42, + "grad_norm": 15.675329948125253, + "learning_rate": 1.8313952283681712e-05, + "loss": 1.5594, + "step": 35247 + }, + { + "epoch": 0.42, + "grad_norm": 17.303931906805385, + "learning_rate": 1.8313627652543572e-05, + "loss": 1.4801, + "step": 35250 + }, + { + "epoch": 0.42, + "grad_norm": 14.574817482661915, + "learning_rate": 1.831330299303402e-05, + "loss": 1.6566, + "step": 35253 + }, + { + "epoch": 0.42, + "grad_norm": 49.03900543717511, + "learning_rate": 1.8312978305154155e-05, + "loss": 1.9642, + "step": 35256 + }, + { + "epoch": 0.42, + "grad_norm": 10.327072276829227, + "learning_rate": 1.831265358890509e-05, + "loss": 1.1433, + "step": 35259 + }, + { + "epoch": 0.42, + "grad_norm": 21.77394420697816, + "learning_rate": 1.8312328844287934e-05, + "loss": 1.3204, + "step": 35262 + }, + { + "epoch": 0.42, + "grad_norm": 12.954436790423696, + "learning_rate": 1.8312004071303797e-05, + "loss": 1.0844, + "step": 35265 + }, + { + "epoch": 0.42, + "grad_norm": 15.575779194873563, + "learning_rate": 1.831167926995378e-05, + "loss": 1.4457, + "step": 35268 + }, + { + "epoch": 0.42, + "grad_norm": 10.125586834145553, + "learning_rate": 1.8311354440238998e-05, + "loss": 1.737, + "step": 35271 + }, + { + "epoch": 0.42, + "grad_norm": 9.262165767957239, + "learning_rate": 1.8311029582160555e-05, + "loss": 1.518, + "step": 35274 + }, + { + "epoch": 0.42, + "grad_norm": 8.210028903016434, + "learning_rate": 1.8310704695719563e-05, + "loss": 1.7242, + "step": 35277 + }, + { + "epoch": 0.42, + "grad_norm": 13.760463990455454, + "learning_rate": 1.8310379780917126e-05, + "loss": 1.1996, + "step": 35280 + }, + { + "epoch": 0.42, + "grad_norm": 14.091398016734416, + "learning_rate": 1.831005483775436e-05, + "loss": 1.6862, + "step": 35283 + }, + { + "epoch": 0.42, + "grad_norm": 39.83140095505317, + "learning_rate": 1.8309729866232366e-05, + "loss": 1.3659, + "step": 35286 + }, + { + "epoch": 0.42, + "grad_norm": 7.56186029762649, + "learning_rate": 1.830940486635226e-05, + "loss": 1.3636, + "step": 35289 + }, + { + "epoch": 0.42, + "grad_norm": 36.80928935609549, + "learning_rate": 1.830907983811515e-05, + "loss": 1.5256, + "step": 35292 + }, + { + "epoch": 0.42, + "grad_norm": 6.790759183655806, + "learning_rate": 1.830875478152214e-05, + "loss": 1.5174, + "step": 35295 + }, + { + "epoch": 0.42, + "grad_norm": 6.0276526433605575, + "learning_rate": 1.8308429696574344e-05, + "loss": 1.6017, + "step": 35298 + }, + { + "epoch": 0.42, + "grad_norm": 6.041054636533119, + "learning_rate": 1.830810458327287e-05, + "loss": 1.6619, + "step": 35301 + }, + { + "epoch": 0.42, + "grad_norm": 8.817141836625046, + "learning_rate": 1.830777944161883e-05, + "loss": 1.2443, + "step": 35304 + }, + { + "epoch": 0.42, + "grad_norm": 9.803731303997836, + "learning_rate": 1.8307454271613327e-05, + "loss": 1.6791, + "step": 35307 + }, + { + "epoch": 0.42, + "grad_norm": 13.72178115115439, + "learning_rate": 1.8307129073257476e-05, + "loss": 1.7193, + "step": 35310 + }, + { + "epoch": 0.42, + "grad_norm": 15.27381804880271, + "learning_rate": 1.830680384655239e-05, + "loss": 1.2947, + "step": 35313 + }, + { + "epoch": 0.42, + "grad_norm": 10.307506923462277, + "learning_rate": 1.8306478591499168e-05, + "loss": 1.6103, + "step": 35316 + }, + { + "epoch": 0.42, + "grad_norm": 19.3666395571153, + "learning_rate": 1.830615330809893e-05, + "loss": 1.5871, + "step": 35319 + }, + { + "epoch": 0.42, + "grad_norm": 28.801895278473662, + "learning_rate": 1.8305827996352785e-05, + "loss": 1.5189, + "step": 35322 + }, + { + "epoch": 0.42, + "grad_norm": 7.323263719781179, + "learning_rate": 1.8305502656261834e-05, + "loss": 1.3251, + "step": 35325 + }, + { + "epoch": 0.42, + "grad_norm": 36.6661701334622, + "learning_rate": 1.8305177287827202e-05, + "loss": 1.507, + "step": 35328 + }, + { + "epoch": 0.42, + "grad_norm": 9.811925210745997, + "learning_rate": 1.8304851891049988e-05, + "loss": 1.1888, + "step": 35331 + }, + { + "epoch": 0.42, + "grad_norm": 38.755668287857745, + "learning_rate": 1.8304526465931305e-05, + "loss": 1.5282, + "step": 35334 + }, + { + "epoch": 0.42, + "grad_norm": 15.207890440467276, + "learning_rate": 1.8304201012472265e-05, + "loss": 1.7322, + "step": 35337 + }, + { + "epoch": 0.42, + "grad_norm": 18.423410632433605, + "learning_rate": 1.830387553067398e-05, + "loss": 1.5229, + "step": 35340 + }, + { + "epoch": 0.42, + "grad_norm": 9.697890599643902, + "learning_rate": 1.8303550020537556e-05, + "loss": 1.6117, + "step": 35343 + }, + { + "epoch": 0.43, + "grad_norm": 3.3464148489321577, + "learning_rate": 1.8303224482064106e-05, + "loss": 1.8195, + "step": 35346 + }, + { + "epoch": 0.43, + "grad_norm": 51.98367068477284, + "learning_rate": 1.8302898915254744e-05, + "loss": 1.4837, + "step": 35349 + }, + { + "epoch": 0.43, + "grad_norm": 6.387767066616425, + "learning_rate": 1.8302573320110576e-05, + "loss": 1.4053, + "step": 35352 + }, + { + "epoch": 0.43, + "grad_norm": 25.84919162746884, + "learning_rate": 1.8302247696632718e-05, + "loss": 1.3882, + "step": 35355 + }, + { + "epoch": 0.43, + "grad_norm": 6.452368304330244, + "learning_rate": 1.830192204482228e-05, + "loss": 1.3326, + "step": 35358 + }, + { + "epoch": 0.43, + "grad_norm": 15.165544530479739, + "learning_rate": 1.8301596364680373e-05, + "loss": 1.4454, + "step": 35361 + }, + { + "epoch": 0.43, + "grad_norm": 4.11965100266498, + "learning_rate": 1.8301270656208105e-05, + "loss": 1.206, + "step": 35364 + }, + { + "epoch": 0.43, + "grad_norm": 10.939795447240916, + "learning_rate": 1.830094491940659e-05, + "loss": 1.0662, + "step": 35367 + }, + { + "epoch": 0.43, + "grad_norm": 39.187013344247305, + "learning_rate": 1.8300619154276945e-05, + "loss": 0.9006, + "step": 35370 + }, + { + "epoch": 0.43, + "grad_norm": 5.421511652434122, + "learning_rate": 1.830029336082027e-05, + "loss": 1.8598, + "step": 35373 + }, + { + "epoch": 0.43, + "grad_norm": 8.210679671459449, + "learning_rate": 1.8299967539037687e-05, + "loss": 1.5553, + "step": 35376 + }, + { + "epoch": 0.43, + "grad_norm": 12.541190311765284, + "learning_rate": 1.8299641688930303e-05, + "loss": 1.3494, + "step": 35379 + }, + { + "epoch": 0.43, + "grad_norm": 21.563921586073068, + "learning_rate": 1.8299315810499234e-05, + "loss": 1.5924, + "step": 35382 + }, + { + "epoch": 0.43, + "grad_norm": 16.27351859958117, + "learning_rate": 1.8298989903745586e-05, + "loss": 1.4124, + "step": 35385 + }, + { + "epoch": 0.43, + "grad_norm": 6.772830436645639, + "learning_rate": 1.8298663968670477e-05, + "loss": 1.518, + "step": 35388 + }, + { + "epoch": 0.43, + "grad_norm": 41.74862439176789, + "learning_rate": 1.829833800527502e-05, + "loss": 1.5976, + "step": 35391 + }, + { + "epoch": 0.43, + "grad_norm": 8.87598269813201, + "learning_rate": 1.829801201356032e-05, + "loss": 1.2891, + "step": 35394 + }, + { + "epoch": 0.43, + "grad_norm": 7.32223171925528, + "learning_rate": 1.8297685993527494e-05, + "loss": 1.3738, + "step": 35397 + }, + { + "epoch": 0.43, + "grad_norm": 21.022861869068112, + "learning_rate": 1.829735994517766e-05, + "loss": 1.4157, + "step": 35400 + }, + { + "epoch": 0.43, + "grad_norm": 6.835499856045489, + "learning_rate": 1.829703386851192e-05, + "loss": 1.6043, + "step": 35403 + }, + { + "epoch": 0.43, + "grad_norm": 33.469318259965, + "learning_rate": 1.829670776353139e-05, + "loss": 1.6291, + "step": 35406 + }, + { + "epoch": 0.43, + "grad_norm": 138.80021416410608, + "learning_rate": 1.8296381630237187e-05, + "loss": 1.394, + "step": 35409 + }, + { + "epoch": 0.43, + "grad_norm": 19.462069492286354, + "learning_rate": 1.8296055468630424e-05, + "loss": 1.1501, + "step": 35412 + }, + { + "epoch": 0.43, + "grad_norm": 4.260645073892753, + "learning_rate": 1.829572927871221e-05, + "loss": 1.5395, + "step": 35415 + }, + { + "epoch": 0.43, + "grad_norm": 58.7520587257492, + "learning_rate": 1.829540306048366e-05, + "loss": 1.7778, + "step": 35418 + }, + { + "epoch": 0.43, + "grad_norm": 83.37751810253577, + "learning_rate": 1.829507681394589e-05, + "loss": 1.4746, + "step": 35421 + }, + { + "epoch": 0.43, + "grad_norm": 19.136746653317722, + "learning_rate": 1.8294750539100007e-05, + "loss": 1.4591, + "step": 35424 + }, + { + "epoch": 0.43, + "grad_norm": 10.225142374247557, + "learning_rate": 1.829442423594713e-05, + "loss": 1.5544, + "step": 35427 + }, + { + "epoch": 0.43, + "grad_norm": 3.567881811427206, + "learning_rate": 1.829409790448837e-05, + "loss": 1.2119, + "step": 35430 + }, + { + "epoch": 0.43, + "grad_norm": 24.901649493934844, + "learning_rate": 1.8293771544724844e-05, + "loss": 1.608, + "step": 35433 + }, + { + "epoch": 0.43, + "grad_norm": 49.28549819063547, + "learning_rate": 1.8293445156657663e-05, + "loss": 1.442, + "step": 35436 + }, + { + "epoch": 0.43, + "grad_norm": 3.769673233247095, + "learning_rate": 1.8293118740287936e-05, + "loss": 1.3732, + "step": 35439 + }, + { + "epoch": 0.43, + "grad_norm": 9.95714106693262, + "learning_rate": 1.8292792295616786e-05, + "loss": 1.3894, + "step": 35442 + }, + { + "epoch": 0.43, + "grad_norm": 20.402508278496153, + "learning_rate": 1.8292465822645323e-05, + "loss": 1.6278, + "step": 35445 + }, + { + "epoch": 0.43, + "grad_norm": 21.777702265471675, + "learning_rate": 1.829213932137466e-05, + "loss": 1.6508, + "step": 35448 + }, + { + "epoch": 0.43, + "grad_norm": 50.69898932260469, + "learning_rate": 1.829181279180591e-05, + "loss": 1.1743, + "step": 35451 + }, + { + "epoch": 0.43, + "grad_norm": 6.868957504876604, + "learning_rate": 1.829148623394019e-05, + "loss": 1.3844, + "step": 35454 + }, + { + "epoch": 0.43, + "grad_norm": 100.49103176850109, + "learning_rate": 1.829115964777862e-05, + "loss": 1.5561, + "step": 35457 + }, + { + "epoch": 0.43, + "grad_norm": 23.79233614939334, + "learning_rate": 1.8290833033322305e-05, + "loss": 1.5909, + "step": 35460 + }, + { + "epoch": 0.43, + "grad_norm": 9.84642460074843, + "learning_rate": 1.829050639057236e-05, + "loss": 1.6868, + "step": 35463 + }, + { + "epoch": 0.43, + "grad_norm": 19.019434719222684, + "learning_rate": 1.8290179719529907e-05, + "loss": 1.419, + "step": 35466 + }, + { + "epoch": 0.43, + "grad_norm": 10.5269983209388, + "learning_rate": 1.8289853020196055e-05, + "loss": 1.3009, + "step": 35469 + }, + { + "epoch": 0.43, + "grad_norm": 9.078027887896804, + "learning_rate": 1.828952629257192e-05, + "loss": 1.496, + "step": 35472 + }, + { + "epoch": 0.43, + "grad_norm": 31.87767543466375, + "learning_rate": 1.828919953665862e-05, + "loss": 1.9625, + "step": 35475 + }, + { + "epoch": 0.43, + "grad_norm": 20.953036387385104, + "learning_rate": 1.8288872752457267e-05, + "loss": 1.2691, + "step": 35478 + }, + { + "epoch": 0.43, + "grad_norm": 34.68492947077171, + "learning_rate": 1.8288545939968975e-05, + "loss": 1.5299, + "step": 35481 + }, + { + "epoch": 0.43, + "grad_norm": 18.8752932592552, + "learning_rate": 1.8288219099194864e-05, + "loss": 1.458, + "step": 35484 + }, + { + "epoch": 0.43, + "grad_norm": 17.45514049139257, + "learning_rate": 1.8287892230136043e-05, + "loss": 1.3873, + "step": 35487 + }, + { + "epoch": 0.43, + "grad_norm": 12.680565206177905, + "learning_rate": 1.8287565332793636e-05, + "loss": 1.3535, + "step": 35490 + }, + { + "epoch": 0.43, + "grad_norm": 10.803094809800294, + "learning_rate": 1.8287238407168752e-05, + "loss": 1.5632, + "step": 35493 + }, + { + "epoch": 0.43, + "grad_norm": 6.656067076151762, + "learning_rate": 1.8286911453262505e-05, + "loss": 1.4788, + "step": 35496 + }, + { + "epoch": 0.43, + "grad_norm": 6.569005365457206, + "learning_rate": 1.8286584471076017e-05, + "loss": 1.7729, + "step": 35499 + }, + { + "epoch": 0.43, + "grad_norm": 55.487263240307705, + "learning_rate": 1.8286257460610398e-05, + "loss": 1.4735, + "step": 35502 + }, + { + "epoch": 0.43, + "grad_norm": 19.874123275767143, + "learning_rate": 1.828593042186677e-05, + "loss": 1.9113, + "step": 35505 + }, + { + "epoch": 0.43, + "grad_norm": 8.727022543818624, + "learning_rate": 1.8285603354846247e-05, + "loss": 1.4474, + "step": 35508 + }, + { + "epoch": 0.43, + "grad_norm": 8.247185762588305, + "learning_rate": 1.8285276259549943e-05, + "loss": 1.5172, + "step": 35511 + }, + { + "epoch": 0.43, + "grad_norm": 13.065383241353025, + "learning_rate": 1.8284949135978975e-05, + "loss": 1.3009, + "step": 35514 + }, + { + "epoch": 0.43, + "grad_norm": 4.8169615743985785, + "learning_rate": 1.828462198413446e-05, + "loss": 1.5216, + "step": 35517 + }, + { + "epoch": 0.43, + "grad_norm": 7.9949820241613185, + "learning_rate": 1.8284294804017515e-05, + "loss": 1.4344, + "step": 35520 + }, + { + "epoch": 0.43, + "grad_norm": 19.332188038580156, + "learning_rate": 1.8283967595629255e-05, + "loss": 1.5572, + "step": 35523 + }, + { + "epoch": 0.43, + "grad_norm": 7.994480018947822, + "learning_rate": 1.8283640358970796e-05, + "loss": 1.6102, + "step": 35526 + }, + { + "epoch": 0.43, + "grad_norm": 59.225665104151176, + "learning_rate": 1.8283313094043256e-05, + "loss": 1.3434, + "step": 35529 + }, + { + "epoch": 0.43, + "grad_norm": 33.31736041029607, + "learning_rate": 1.8282985800847757e-05, + "loss": 1.2884, + "step": 35532 + }, + { + "epoch": 0.43, + "grad_norm": 20.76349387263858, + "learning_rate": 1.828265847938541e-05, + "loss": 1.6643, + "step": 35535 + }, + { + "epoch": 0.43, + "grad_norm": 59.89983775333362, + "learning_rate": 1.828233112965733e-05, + "loss": 1.7552, + "step": 35538 + }, + { + "epoch": 0.43, + "grad_norm": 4.847754938455163, + "learning_rate": 1.8282003751664637e-05, + "loss": 1.349, + "step": 35541 + }, + { + "epoch": 0.43, + "grad_norm": 93.57540283364773, + "learning_rate": 1.828167634540845e-05, + "loss": 1.5252, + "step": 35544 + }, + { + "epoch": 0.43, + "grad_norm": 9.776038218849667, + "learning_rate": 1.8281348910889886e-05, + "loss": 1.4095, + "step": 35547 + }, + { + "epoch": 0.43, + "grad_norm": 22.350656986395386, + "learning_rate": 1.8281021448110056e-05, + "loss": 1.342, + "step": 35550 + }, + { + "epoch": 0.43, + "grad_norm": 25.83016800962121, + "learning_rate": 1.828069395707009e-05, + "loss": 1.6318, + "step": 35553 + }, + { + "epoch": 0.43, + "grad_norm": 18.52787320519331, + "learning_rate": 1.8280366437771092e-05, + "loss": 1.3605, + "step": 35556 + }, + { + "epoch": 0.43, + "grad_norm": 8.443222667990982, + "learning_rate": 1.8280038890214187e-05, + "loss": 1.4811, + "step": 35559 + }, + { + "epoch": 0.43, + "grad_norm": 19.59347962302975, + "learning_rate": 1.827971131440049e-05, + "loss": 1.6131, + "step": 35562 + }, + { + "epoch": 0.43, + "grad_norm": 8.338035401088526, + "learning_rate": 1.8279383710331123e-05, + "loss": 1.2821, + "step": 35565 + }, + { + "epoch": 0.43, + "grad_norm": 19.646493113085313, + "learning_rate": 1.8279056078007203e-05, + "loss": 1.4788, + "step": 35568 + }, + { + "epoch": 0.43, + "grad_norm": 17.149564019662243, + "learning_rate": 1.8278728417429846e-05, + "loss": 1.3723, + "step": 35571 + }, + { + "epoch": 0.43, + "grad_norm": 41.78282527435562, + "learning_rate": 1.8278400728600168e-05, + "loss": 1.2498, + "step": 35574 + }, + { + "epoch": 0.43, + "grad_norm": 14.678289656707731, + "learning_rate": 1.8278073011519296e-05, + "loss": 1.5664, + "step": 35577 + }, + { + "epoch": 0.43, + "grad_norm": 4.333291869762822, + "learning_rate": 1.827774526618834e-05, + "loss": 1.5504, + "step": 35580 + }, + { + "epoch": 0.43, + "grad_norm": 17.063615647671423, + "learning_rate": 1.8277417492608418e-05, + "loss": 1.6463, + "step": 35583 + }, + { + "epoch": 0.43, + "grad_norm": 29.06938792254705, + "learning_rate": 1.8277089690780652e-05, + "loss": 1.6777, + "step": 35586 + }, + { + "epoch": 0.43, + "grad_norm": 12.601603991974729, + "learning_rate": 1.8276761860706164e-05, + "loss": 1.324, + "step": 35589 + }, + { + "epoch": 0.43, + "grad_norm": 4.080658357251486, + "learning_rate": 1.8276434002386068e-05, + "loss": 1.9034, + "step": 35592 + }, + { + "epoch": 0.43, + "grad_norm": 12.211194392246378, + "learning_rate": 1.8276106115821482e-05, + "loss": 1.6809, + "step": 35595 + }, + { + "epoch": 0.43, + "grad_norm": 17.860492642361244, + "learning_rate": 1.827577820101353e-05, + "loss": 1.5728, + "step": 35598 + }, + { + "epoch": 0.43, + "grad_norm": 40.74717702921129, + "learning_rate": 1.8275450257963326e-05, + "loss": 1.6425, + "step": 35601 + }, + { + "epoch": 0.43, + "grad_norm": 54.69648956004245, + "learning_rate": 1.827512228667199e-05, + "loss": 1.5977, + "step": 35604 + }, + { + "epoch": 0.43, + "grad_norm": 67.6680523357202, + "learning_rate": 1.8274794287140642e-05, + "loss": 1.2988, + "step": 35607 + }, + { + "epoch": 0.43, + "grad_norm": 16.75276111974283, + "learning_rate": 1.8274466259370402e-05, + "loss": 1.3845, + "step": 35610 + }, + { + "epoch": 0.43, + "grad_norm": 96.55284655658438, + "learning_rate": 1.827413820336239e-05, + "loss": 1.3427, + "step": 35613 + }, + { + "epoch": 0.43, + "grad_norm": 48.17282590955886, + "learning_rate": 1.8273810119117727e-05, + "loss": 1.6583, + "step": 35616 + }, + { + "epoch": 0.43, + "grad_norm": 7.241035816219758, + "learning_rate": 1.8273482006637528e-05, + "loss": 1.2046, + "step": 35619 + }, + { + "epoch": 0.43, + "grad_norm": 2.1315050054586893, + "learning_rate": 1.8273153865922915e-05, + "loss": 1.5496, + "step": 35622 + }, + { + "epoch": 0.43, + "grad_norm": 29.24628896133391, + "learning_rate": 1.8272825696975007e-05, + "loss": 1.4383, + "step": 35625 + }, + { + "epoch": 0.43, + "grad_norm": 25.715591880938284, + "learning_rate": 1.8272497499794925e-05, + "loss": 1.7503, + "step": 35628 + }, + { + "epoch": 0.43, + "grad_norm": 20.422424711493786, + "learning_rate": 1.827216927438379e-05, + "loss": 1.3823, + "step": 35631 + }, + { + "epoch": 0.43, + "grad_norm": 19.367822368300295, + "learning_rate": 1.827184102074272e-05, + "loss": 1.2765, + "step": 35634 + }, + { + "epoch": 0.43, + "grad_norm": 9.389462823583372, + "learning_rate": 1.8271512738872836e-05, + "loss": 1.2949, + "step": 35637 + }, + { + "epoch": 0.43, + "grad_norm": 9.497376614794627, + "learning_rate": 1.827118442877526e-05, + "loss": 1.5681, + "step": 35640 + }, + { + "epoch": 0.43, + "grad_norm": 55.81542685122337, + "learning_rate": 1.827085609045111e-05, + "loss": 1.3334, + "step": 35643 + }, + { + "epoch": 0.43, + "grad_norm": 9.383646453444754, + "learning_rate": 1.8270527723901504e-05, + "loss": 1.537, + "step": 35646 + }, + { + "epoch": 0.43, + "grad_norm": 8.614415712384707, + "learning_rate": 1.8270199329127568e-05, + "loss": 1.3983, + "step": 35649 + }, + { + "epoch": 0.43, + "grad_norm": 10.263136327466341, + "learning_rate": 1.8269870906130423e-05, + "loss": 1.3967, + "step": 35652 + }, + { + "epoch": 0.43, + "grad_norm": 37.69071075598947, + "learning_rate": 1.8269542454911184e-05, + "loss": 1.438, + "step": 35655 + }, + { + "epoch": 0.43, + "grad_norm": 4.791536229988676, + "learning_rate": 1.826921397547098e-05, + "loss": 1.6374, + "step": 35658 + }, + { + "epoch": 0.43, + "grad_norm": 18.666251312470727, + "learning_rate": 1.8268885467810923e-05, + "loss": 1.4656, + "step": 35661 + }, + { + "epoch": 0.43, + "grad_norm": 19.84478795909494, + "learning_rate": 1.826855693193214e-05, + "loss": 1.3356, + "step": 35664 + }, + { + "epoch": 0.43, + "grad_norm": 28.13975752532744, + "learning_rate": 1.826822836783575e-05, + "loss": 1.8029, + "step": 35667 + }, + { + "epoch": 0.43, + "grad_norm": 28.286982465372557, + "learning_rate": 1.8267899775522874e-05, + "loss": 1.3523, + "step": 35670 + }, + { + "epoch": 0.43, + "grad_norm": 10.717561395931085, + "learning_rate": 1.8267571154994637e-05, + "loss": 1.586, + "step": 35673 + }, + { + "epoch": 0.43, + "grad_norm": 15.882463891874847, + "learning_rate": 1.8267242506252153e-05, + "loss": 1.7643, + "step": 35676 + }, + { + "epoch": 0.43, + "grad_norm": 40.483637650303756, + "learning_rate": 1.826691382929655e-05, + "loss": 1.6946, + "step": 35679 + }, + { + "epoch": 0.43, + "grad_norm": 3.3599793370005235, + "learning_rate": 1.826658512412895e-05, + "loss": 1.4914, + "step": 35682 + }, + { + "epoch": 0.43, + "grad_norm": 32.3496303452329, + "learning_rate": 1.8266256390750472e-05, + "loss": 1.4129, + "step": 35685 + }, + { + "epoch": 0.43, + "grad_norm": 5.593882859232959, + "learning_rate": 1.8265927629162237e-05, + "loss": 1.2265, + "step": 35688 + }, + { + "epoch": 0.43, + "grad_norm": 57.80192943440984, + "learning_rate": 1.826559883936537e-05, + "loss": 1.2849, + "step": 35691 + }, + { + "epoch": 0.43, + "grad_norm": 35.996187898543305, + "learning_rate": 1.826527002136099e-05, + "loss": 1.3991, + "step": 35694 + }, + { + "epoch": 0.43, + "grad_norm": 18.773993764219746, + "learning_rate": 1.8264941175150222e-05, + "loss": 1.7524, + "step": 35697 + }, + { + "epoch": 0.43, + "grad_norm": 25.000871327018945, + "learning_rate": 1.8264612300734183e-05, + "loss": 1.3179, + "step": 35700 + }, + { + "epoch": 0.43, + "grad_norm": 29.813844029060686, + "learning_rate": 1.8264283398114004e-05, + "loss": 1.5059, + "step": 35703 + }, + { + "epoch": 0.43, + "grad_norm": 8.227694592227065, + "learning_rate": 1.82639544672908e-05, + "loss": 1.7262, + "step": 35706 + }, + { + "epoch": 0.43, + "grad_norm": 4.377164857144572, + "learning_rate": 1.8263625508265695e-05, + "loss": 1.2072, + "step": 35709 + }, + { + "epoch": 0.43, + "grad_norm": 6.051918973012966, + "learning_rate": 1.8263296521039816e-05, + "loss": 1.2506, + "step": 35712 + }, + { + "epoch": 0.43, + "grad_norm": 9.790805054230763, + "learning_rate": 1.826296750561428e-05, + "loss": 1.1956, + "step": 35715 + }, + { + "epoch": 0.43, + "grad_norm": 9.889436737162375, + "learning_rate": 1.8262638461990212e-05, + "loss": 1.585, + "step": 35718 + }, + { + "epoch": 0.43, + "grad_norm": 4.470457651656638, + "learning_rate": 1.8262309390168737e-05, + "loss": 1.581, + "step": 35721 + }, + { + "epoch": 0.43, + "grad_norm": 43.23768469688755, + "learning_rate": 1.8261980290150974e-05, + "loss": 1.5635, + "step": 35724 + }, + { + "epoch": 0.43, + "grad_norm": 3.139083671025422, + "learning_rate": 1.826165116193805e-05, + "loss": 1.0928, + "step": 35727 + }, + { + "epoch": 0.43, + "grad_norm": 6.7644389446356685, + "learning_rate": 1.826132200553109e-05, + "loss": 1.572, + "step": 35730 + }, + { + "epoch": 0.43, + "grad_norm": 5.191858212079525, + "learning_rate": 1.826099282093121e-05, + "loss": 1.5823, + "step": 35733 + }, + { + "epoch": 0.43, + "grad_norm": 11.581876649672099, + "learning_rate": 1.8260663608139534e-05, + "loss": 1.4487, + "step": 35736 + }, + { + "epoch": 0.43, + "grad_norm": 11.74928840212392, + "learning_rate": 1.826033436715719e-05, + "loss": 1.6633, + "step": 35739 + }, + { + "epoch": 0.43, + "grad_norm": 5.00901494691988, + "learning_rate": 1.82600050979853e-05, + "loss": 1.2701, + "step": 35742 + }, + { + "epoch": 0.43, + "grad_norm": 24.3518961916959, + "learning_rate": 1.825967580062499e-05, + "loss": 1.4206, + "step": 35745 + }, + { + "epoch": 0.43, + "grad_norm": 5.805604235090378, + "learning_rate": 1.825934647507738e-05, + "loss": 1.2438, + "step": 35748 + }, + { + "epoch": 0.43, + "grad_norm": 19.803718408189035, + "learning_rate": 1.8259017121343598e-05, + "loss": 1.3403, + "step": 35751 + }, + { + "epoch": 0.43, + "grad_norm": 6.948419207098933, + "learning_rate": 1.8258687739424763e-05, + "loss": 1.6111, + "step": 35754 + }, + { + "epoch": 0.43, + "grad_norm": 24.65138738872237, + "learning_rate": 1.8258358329322e-05, + "loss": 1.5575, + "step": 35757 + }, + { + "epoch": 0.43, + "grad_norm": 13.935361790009038, + "learning_rate": 1.8258028891036438e-05, + "loss": 1.2994, + "step": 35760 + }, + { + "epoch": 0.43, + "grad_norm": 8.339588491884658, + "learning_rate": 1.8257699424569197e-05, + "loss": 1.2866, + "step": 35763 + }, + { + "epoch": 0.43, + "grad_norm": 11.62746628354478, + "learning_rate": 1.8257369929921404e-05, + "loss": 1.2624, + "step": 35766 + }, + { + "epoch": 0.43, + "grad_norm": 9.140346678146834, + "learning_rate": 1.8257040407094176e-05, + "loss": 1.2915, + "step": 35769 + }, + { + "epoch": 0.43, + "grad_norm": 8.81439116690048, + "learning_rate": 1.825671085608865e-05, + "loss": 1.3519, + "step": 35772 + }, + { + "epoch": 0.43, + "grad_norm": 14.913745367471867, + "learning_rate": 1.8256381276905938e-05, + "loss": 1.0433, + "step": 35775 + }, + { + "epoch": 0.43, + "grad_norm": 19.297137342033356, + "learning_rate": 1.8256051669547173e-05, + "loss": 1.5133, + "step": 35778 + }, + { + "epoch": 0.43, + "grad_norm": 8.858125647944064, + "learning_rate": 1.825572203401348e-05, + "loss": 1.578, + "step": 35781 + }, + { + "epoch": 0.43, + "grad_norm": 40.08536465392988, + "learning_rate": 1.825539237030598e-05, + "loss": 1.4513, + "step": 35784 + }, + { + "epoch": 0.43, + "grad_norm": 22.60284167959446, + "learning_rate": 1.8255062678425798e-05, + "loss": 1.3993, + "step": 35787 + }, + { + "epoch": 0.43, + "grad_norm": 21.16324972780344, + "learning_rate": 1.825473295837406e-05, + "loss": 1.4613, + "step": 35790 + }, + { + "epoch": 0.43, + "grad_norm": 12.528584457830414, + "learning_rate": 1.8254403210151897e-05, + "loss": 1.4165, + "step": 35793 + }, + { + "epoch": 0.43, + "grad_norm": 20.599333944713454, + "learning_rate": 1.825407343376042e-05, + "loss": 1.6351, + "step": 35796 + }, + { + "epoch": 0.43, + "grad_norm": 8.323085788317762, + "learning_rate": 1.825374362920077e-05, + "loss": 1.4823, + "step": 35799 + }, + { + "epoch": 0.43, + "grad_norm": 35.9370612703508, + "learning_rate": 1.8253413796474065e-05, + "loss": 1.7628, + "step": 35802 + }, + { + "epoch": 0.43, + "grad_norm": 21.289681117104017, + "learning_rate": 1.8253083935581433e-05, + "loss": 1.5312, + "step": 35805 + }, + { + "epoch": 0.43, + "grad_norm": 7.657816749055744, + "learning_rate": 1.8252754046523996e-05, + "loss": 1.3875, + "step": 35808 + }, + { + "epoch": 0.43, + "grad_norm": 67.677996900416, + "learning_rate": 1.825242412930288e-05, + "loss": 1.3775, + "step": 35811 + }, + { + "epoch": 0.43, + "grad_norm": 9.955379627518901, + "learning_rate": 1.825209418391922e-05, + "loss": 1.2558, + "step": 35814 + }, + { + "epoch": 0.43, + "grad_norm": 5.5111050777598205, + "learning_rate": 1.825176421037413e-05, + "loss": 1.4572, + "step": 35817 + }, + { + "epoch": 0.43, + "grad_norm": 13.37538568739882, + "learning_rate": 1.825143420866874e-05, + "loss": 1.4902, + "step": 35820 + }, + { + "epoch": 0.43, + "grad_norm": 22.788491754541873, + "learning_rate": 1.825110417880418e-05, + "loss": 1.6163, + "step": 35823 + }, + { + "epoch": 0.43, + "grad_norm": 20.779440853003525, + "learning_rate": 1.825077412078157e-05, + "loss": 1.4491, + "step": 35826 + }, + { + "epoch": 0.43, + "grad_norm": 7.28762355772939, + "learning_rate": 1.8250444034602042e-05, + "loss": 1.2332, + "step": 35829 + }, + { + "epoch": 0.43, + "grad_norm": 14.103273610060025, + "learning_rate": 1.825011392026672e-05, + "loss": 1.2924, + "step": 35832 + }, + { + "epoch": 0.43, + "grad_norm": 12.565506005025924, + "learning_rate": 1.824978377777673e-05, + "loss": 1.1225, + "step": 35835 + }, + { + "epoch": 0.43, + "grad_norm": 18.050634951204042, + "learning_rate": 1.8249453607133203e-05, + "loss": 1.8037, + "step": 35838 + }, + { + "epoch": 0.43, + "grad_norm": 15.707837434833401, + "learning_rate": 1.824912340833726e-05, + "loss": 1.4576, + "step": 35841 + }, + { + "epoch": 0.43, + "grad_norm": 26.98118207836084, + "learning_rate": 1.824879318139003e-05, + "loss": 1.2126, + "step": 35844 + }, + { + "epoch": 0.43, + "grad_norm": 9.604559686409708, + "learning_rate": 1.824846292629264e-05, + "loss": 1.2165, + "step": 35847 + }, + { + "epoch": 0.43, + "grad_norm": 37.276618224303164, + "learning_rate": 1.8248132643046217e-05, + "loss": 1.478, + "step": 35850 + }, + { + "epoch": 0.43, + "grad_norm": 10.47092039275757, + "learning_rate": 1.824780233165189e-05, + "loss": 1.89, + "step": 35853 + }, + { + "epoch": 0.43, + "grad_norm": 8.826512581497324, + "learning_rate": 1.824747199211078e-05, + "loss": 1.4329, + "step": 35856 + }, + { + "epoch": 0.43, + "grad_norm": 7.855734179546007, + "learning_rate": 1.8247141624424024e-05, + "loss": 1.1991, + "step": 35859 + }, + { + "epoch": 0.43, + "grad_norm": 13.512094967467377, + "learning_rate": 1.8246811228592742e-05, + "loss": 1.2885, + "step": 35862 + }, + { + "epoch": 0.43, + "grad_norm": 18.32594927798467, + "learning_rate": 1.8246480804618065e-05, + "loss": 1.361, + "step": 35865 + }, + { + "epoch": 0.43, + "grad_norm": 5.383258051060718, + "learning_rate": 1.8246150352501114e-05, + "loss": 1.7269, + "step": 35868 + }, + { + "epoch": 0.43, + "grad_norm": 20.815118550637006, + "learning_rate": 1.824581987224303e-05, + "loss": 1.6278, + "step": 35871 + }, + { + "epoch": 0.43, + "grad_norm": 12.099293713505705, + "learning_rate": 1.8245489363844927e-05, + "loss": 1.5701, + "step": 35874 + }, + { + "epoch": 0.43, + "grad_norm": 2.1077074260890165, + "learning_rate": 1.8245158827307943e-05, + "loss": 1.3743, + "step": 35877 + }, + { + "epoch": 0.43, + "grad_norm": 28.511290356004885, + "learning_rate": 1.82448282626332e-05, + "loss": 1.4726, + "step": 35880 + }, + { + "epoch": 0.43, + "grad_norm": 6.953877834751693, + "learning_rate": 1.8244497669821826e-05, + "loss": 1.7554, + "step": 35883 + }, + { + "epoch": 0.43, + "grad_norm": 9.09088179838424, + "learning_rate": 1.8244167048874953e-05, + "loss": 1.3833, + "step": 35886 + }, + { + "epoch": 0.43, + "grad_norm": 6.295323539671166, + "learning_rate": 1.8243836399793708e-05, + "loss": 1.4516, + "step": 35889 + }, + { + "epoch": 0.43, + "grad_norm": 18.47326201533613, + "learning_rate": 1.824350572257922e-05, + "loss": 1.5288, + "step": 35892 + }, + { + "epoch": 0.43, + "grad_norm": 7.533540787758788, + "learning_rate": 1.8243175017232614e-05, + "loss": 1.7327, + "step": 35895 + }, + { + "epoch": 0.43, + "grad_norm": 6.772283221240449, + "learning_rate": 1.824284428375502e-05, + "loss": 1.5343, + "step": 35898 + }, + { + "epoch": 0.43, + "grad_norm": 10.434488908229607, + "learning_rate": 1.8242513522147568e-05, + "loss": 1.6078, + "step": 35901 + }, + { + "epoch": 0.43, + "grad_norm": 10.729550404299834, + "learning_rate": 1.8242182732411385e-05, + "loss": 1.3414, + "step": 35904 + }, + { + "epoch": 0.43, + "grad_norm": 11.42582524041291, + "learning_rate": 1.8241851914547607e-05, + "loss": 1.4684, + "step": 35907 + }, + { + "epoch": 0.43, + "grad_norm": 9.181991024046594, + "learning_rate": 1.824152106855735e-05, + "loss": 1.1652, + "step": 35910 + }, + { + "epoch": 0.43, + "grad_norm": 14.709518816525902, + "learning_rate": 1.8241190194441753e-05, + "loss": 1.3415, + "step": 35913 + }, + { + "epoch": 0.43, + "grad_norm": 48.09996479929073, + "learning_rate": 1.8240859292201943e-05, + "loss": 1.7136, + "step": 35916 + }, + { + "epoch": 0.43, + "grad_norm": 19.314136330777547, + "learning_rate": 1.824052836183905e-05, + "loss": 1.1987, + "step": 35919 + }, + { + "epoch": 0.43, + "grad_norm": 5.418415358655385, + "learning_rate": 1.82401974033542e-05, + "loss": 1.2949, + "step": 35922 + }, + { + "epoch": 0.43, + "grad_norm": 10.212082895579423, + "learning_rate": 1.8239866416748526e-05, + "loss": 1.8266, + "step": 35925 + }, + { + "epoch": 0.43, + "grad_norm": 8.980734684050782, + "learning_rate": 1.8239535402023154e-05, + "loss": 1.1226, + "step": 35928 + }, + { + "epoch": 0.43, + "grad_norm": 10.22644367507336, + "learning_rate": 1.8239204359179216e-05, + "loss": 1.3283, + "step": 35931 + }, + { + "epoch": 0.43, + "grad_norm": 11.460855247060115, + "learning_rate": 1.823887328821784e-05, + "loss": 1.3855, + "step": 35934 + }, + { + "epoch": 0.43, + "grad_norm": 7.0431260556494415, + "learning_rate": 1.823854218914016e-05, + "loss": 1.6758, + "step": 35937 + }, + { + "epoch": 0.43, + "grad_norm": 128.6685015548225, + "learning_rate": 1.82382110619473e-05, + "loss": 1.428, + "step": 35940 + }, + { + "epoch": 0.43, + "grad_norm": 4.041250589257097, + "learning_rate": 1.8237879906640397e-05, + "loss": 1.4932, + "step": 35943 + }, + { + "epoch": 0.43, + "grad_norm": 18.826185050740495, + "learning_rate": 1.8237548723220573e-05, + "loss": 1.505, + "step": 35946 + }, + { + "epoch": 0.43, + "grad_norm": 12.939285498658583, + "learning_rate": 1.8237217511688967e-05, + "loss": 1.3619, + "step": 35949 + }, + { + "epoch": 0.43, + "grad_norm": 5.506549678016493, + "learning_rate": 1.8236886272046698e-05, + "loss": 1.4563, + "step": 35952 + }, + { + "epoch": 0.43, + "grad_norm": 36.22616100120888, + "learning_rate": 1.8236555004294905e-05, + "loss": 1.4451, + "step": 35955 + }, + { + "epoch": 0.43, + "grad_norm": 4.505576987398914, + "learning_rate": 1.8236223708434718e-05, + "loss": 1.153, + "step": 35958 + }, + { + "epoch": 0.43, + "grad_norm": 9.53098792708372, + "learning_rate": 1.823589238446727e-05, + "loss": 1.3398, + "step": 35961 + }, + { + "epoch": 0.43, + "grad_norm": 15.059394192193631, + "learning_rate": 1.8235561032393683e-05, + "loss": 1.2134, + "step": 35964 + }, + { + "epoch": 0.43, + "grad_norm": 24.080170789310305, + "learning_rate": 1.8235229652215095e-05, + "loss": 1.5922, + "step": 35967 + }, + { + "epoch": 0.43, + "grad_norm": 5.130428449086802, + "learning_rate": 1.8234898243932633e-05, + "loss": 1.2685, + "step": 35970 + }, + { + "epoch": 0.43, + "grad_norm": 9.787030683329876, + "learning_rate": 1.823456680754743e-05, + "loss": 1.486, + "step": 35973 + }, + { + "epoch": 0.43, + "grad_norm": 10.428148392082596, + "learning_rate": 1.8234235343060617e-05, + "loss": 1.11, + "step": 35976 + }, + { + "epoch": 0.43, + "grad_norm": 10.858833029151798, + "learning_rate": 1.8233903850473324e-05, + "loss": 1.8536, + "step": 35979 + }, + { + "epoch": 0.43, + "grad_norm": 30.3469886296123, + "learning_rate": 1.8233572329786682e-05, + "loss": 1.2297, + "step": 35982 + }, + { + "epoch": 0.43, + "grad_norm": 17.6167653867505, + "learning_rate": 1.8233240781001823e-05, + "loss": 1.6749, + "step": 35985 + }, + { + "epoch": 0.43, + "grad_norm": 11.127104434472326, + "learning_rate": 1.823290920411988e-05, + "loss": 1.3724, + "step": 35988 + }, + { + "epoch": 0.43, + "grad_norm": 7.2856288821275585, + "learning_rate": 1.823257759914199e-05, + "loss": 1.4612, + "step": 35991 + }, + { + "epoch": 0.43, + "grad_norm": 12.432436125885314, + "learning_rate": 1.823224596606927e-05, + "loss": 1.5658, + "step": 35994 + }, + { + "epoch": 0.43, + "grad_norm": 11.466511808072028, + "learning_rate": 1.8231914304902858e-05, + "loss": 1.6118, + "step": 35997 + }, + { + "epoch": 0.43, + "grad_norm": 16.425867798524614, + "learning_rate": 1.8231582615643893e-05, + "loss": 1.4607, + "step": 36000 + }, + { + "epoch": 0.43, + "grad_norm": 10.858741920938373, + "learning_rate": 1.82312508982935e-05, + "loss": 1.4729, + "step": 36003 + }, + { + "epoch": 0.43, + "grad_norm": 56.652813053436496, + "learning_rate": 1.823091915285281e-05, + "loss": 1.5311, + "step": 36006 + }, + { + "epoch": 0.43, + "grad_norm": 10.35503235875631, + "learning_rate": 1.8230587379322962e-05, + "loss": 1.8402, + "step": 36009 + }, + { + "epoch": 0.43, + "grad_norm": 25.834713355613765, + "learning_rate": 1.823025557770508e-05, + "loss": 1.6981, + "step": 36012 + }, + { + "epoch": 0.43, + "grad_norm": 12.811756811491792, + "learning_rate": 1.82299237480003e-05, + "loss": 1.8413, + "step": 36015 + }, + { + "epoch": 0.43, + "grad_norm": 3.9546138266389455, + "learning_rate": 1.822959189020976e-05, + "loss": 1.2594, + "step": 36018 + }, + { + "epoch": 0.43, + "grad_norm": 6.896362422223678, + "learning_rate": 1.8229260004334585e-05, + "loss": 1.2051, + "step": 36021 + }, + { + "epoch": 0.43, + "grad_norm": 10.78640155258401, + "learning_rate": 1.822892809037591e-05, + "loss": 1.1517, + "step": 36024 + }, + { + "epoch": 0.43, + "grad_norm": 26.538869977787623, + "learning_rate": 1.8228596148334862e-05, + "loss": 1.8305, + "step": 36027 + }, + { + "epoch": 0.43, + "grad_norm": 9.277768292408547, + "learning_rate": 1.8228264178212586e-05, + "loss": 1.5939, + "step": 36030 + }, + { + "epoch": 0.43, + "grad_norm": 9.22391911425409, + "learning_rate": 1.8227932180010205e-05, + "loss": 1.6838, + "step": 36033 + }, + { + "epoch": 0.43, + "grad_norm": 22.862590690823605, + "learning_rate": 1.8227600153728855e-05, + "loss": 1.4763, + "step": 36036 + }, + { + "epoch": 0.43, + "grad_norm": 17.167086748319008, + "learning_rate": 1.822726809936967e-05, + "loss": 1.4266, + "step": 36039 + }, + { + "epoch": 0.43, + "grad_norm": 4.494873107086211, + "learning_rate": 1.822693601693378e-05, + "loss": 1.3143, + "step": 36042 + }, + { + "epoch": 0.43, + "grad_norm": 8.407859801838251, + "learning_rate": 1.8226603906422324e-05, + "loss": 1.6096, + "step": 36045 + }, + { + "epoch": 0.43, + "grad_norm": 3.2647726029970245, + "learning_rate": 1.8226271767836433e-05, + "loss": 1.0085, + "step": 36048 + }, + { + "epoch": 0.43, + "grad_norm": 29.655439607968674, + "learning_rate": 1.8225939601177238e-05, + "loss": 1.5467, + "step": 36051 + }, + { + "epoch": 0.43, + "grad_norm": 9.20181991594957, + "learning_rate": 1.8225607406445873e-05, + "loss": 1.3256, + "step": 36054 + }, + { + "epoch": 0.43, + "grad_norm": 4.727661067439793, + "learning_rate": 1.8225275183643473e-05, + "loss": 1.3571, + "step": 36057 + }, + { + "epoch": 0.43, + "grad_norm": 6.855153133563486, + "learning_rate": 1.822494293277117e-05, + "loss": 1.6034, + "step": 36060 + }, + { + "epoch": 0.43, + "grad_norm": 68.91009098802121, + "learning_rate": 1.8224610653830105e-05, + "loss": 1.4578, + "step": 36063 + }, + { + "epoch": 0.43, + "grad_norm": 10.395071398415233, + "learning_rate": 1.8224278346821402e-05, + "loss": 1.314, + "step": 36066 + }, + { + "epoch": 0.43, + "grad_norm": 42.39989276829163, + "learning_rate": 1.82239460117462e-05, + "loss": 1.4296, + "step": 36069 + }, + { + "epoch": 0.43, + "grad_norm": 43.26839613993998, + "learning_rate": 1.8223613648605635e-05, + "loss": 1.5068, + "step": 36072 + }, + { + "epoch": 0.43, + "grad_norm": 36.4776206476866, + "learning_rate": 1.8223281257400834e-05, + "loss": 1.3625, + "step": 36075 + }, + { + "epoch": 0.43, + "grad_norm": 33.14867452841759, + "learning_rate": 1.822294883813294e-05, + "loss": 1.9762, + "step": 36078 + }, + { + "epoch": 0.43, + "grad_norm": 12.880309157104794, + "learning_rate": 1.822261639080308e-05, + "loss": 1.3772, + "step": 36081 + }, + { + "epoch": 0.43, + "grad_norm": 21.669784550326582, + "learning_rate": 1.8222283915412396e-05, + "loss": 1.1536, + "step": 36084 + }, + { + "epoch": 0.43, + "grad_norm": 8.1089218397466, + "learning_rate": 1.8221951411962018e-05, + "loss": 1.4323, + "step": 36087 + }, + { + "epoch": 0.43, + "grad_norm": 23.31224666723889, + "learning_rate": 1.822161888045308e-05, + "loss": 1.4185, + "step": 36090 + }, + { + "epoch": 0.43, + "grad_norm": 19.982874787454886, + "learning_rate": 1.822128632088672e-05, + "loss": 1.7454, + "step": 36093 + }, + { + "epoch": 0.43, + "grad_norm": 11.886096961295546, + "learning_rate": 1.8220953733264068e-05, + "loss": 1.3277, + "step": 36096 + }, + { + "epoch": 0.43, + "grad_norm": 29.46596832100854, + "learning_rate": 1.8220621117586263e-05, + "loss": 1.0217, + "step": 36099 + }, + { + "epoch": 0.43, + "grad_norm": 25.9153788499924, + "learning_rate": 1.822028847385444e-05, + "loss": 1.2329, + "step": 36102 + }, + { + "epoch": 0.43, + "grad_norm": 18.547248236452134, + "learning_rate": 1.821995580206973e-05, + "loss": 1.1499, + "step": 36105 + }, + { + "epoch": 0.43, + "grad_norm": 18.466312598928248, + "learning_rate": 1.8219623102233276e-05, + "loss": 1.6035, + "step": 36108 + }, + { + "epoch": 0.43, + "grad_norm": 8.372446101948215, + "learning_rate": 1.8219290374346207e-05, + "loss": 1.5367, + "step": 36111 + }, + { + "epoch": 0.43, + "grad_norm": 5.386937677616009, + "learning_rate": 1.8218957618409663e-05, + "loss": 1.5703, + "step": 36114 + }, + { + "epoch": 0.43, + "grad_norm": 6.857416667615966, + "learning_rate": 1.8218624834424775e-05, + "loss": 1.3926, + "step": 36117 + }, + { + "epoch": 0.43, + "grad_norm": 42.55035076934975, + "learning_rate": 1.821829202239268e-05, + "loss": 1.3359, + "step": 36120 + }, + { + "epoch": 0.43, + "grad_norm": 14.44566086008291, + "learning_rate": 1.8217959182314516e-05, + "loss": 1.2556, + "step": 36123 + }, + { + "epoch": 0.43, + "grad_norm": 15.828239629679354, + "learning_rate": 1.8217626314191416e-05, + "loss": 1.3254, + "step": 36126 + }, + { + "epoch": 0.43, + "grad_norm": 12.912736348154905, + "learning_rate": 1.821729341802452e-05, + "loss": 1.6667, + "step": 36129 + }, + { + "epoch": 0.43, + "grad_norm": 22.838070742620374, + "learning_rate": 1.821696049381496e-05, + "loss": 1.4381, + "step": 36132 + }, + { + "epoch": 0.43, + "grad_norm": 9.097919663017944, + "learning_rate": 1.8216627541563872e-05, + "loss": 1.4229, + "step": 36135 + }, + { + "epoch": 0.43, + "grad_norm": 8.647773129067966, + "learning_rate": 1.8216294561272397e-05, + "loss": 1.353, + "step": 36138 + }, + { + "epoch": 0.43, + "grad_norm": 34.93253594065286, + "learning_rate": 1.8215961552941664e-05, + "loss": 1.2647, + "step": 36141 + }, + { + "epoch": 0.43, + "grad_norm": 56.10409424378291, + "learning_rate": 1.821562851657282e-05, + "loss": 1.2564, + "step": 36144 + }, + { + "epoch": 0.43, + "grad_norm": 5.999842258794059, + "learning_rate": 1.821529545216699e-05, + "loss": 1.4222, + "step": 36147 + }, + { + "epoch": 0.43, + "grad_norm": 9.42235150702421, + "learning_rate": 1.8214962359725318e-05, + "loss": 1.4021, + "step": 36150 + }, + { + "epoch": 0.43, + "grad_norm": 2.1689487956905955, + "learning_rate": 1.8214629239248934e-05, + "loss": 1.9611, + "step": 36153 + }, + { + "epoch": 0.43, + "grad_norm": 16.534201956671208, + "learning_rate": 1.8214296090738982e-05, + "loss": 1.3941, + "step": 36156 + }, + { + "epoch": 0.43, + "grad_norm": 11.89875335676906, + "learning_rate": 1.82139629141966e-05, + "loss": 1.3165, + "step": 36159 + }, + { + "epoch": 0.43, + "grad_norm": 55.89360892625045, + "learning_rate": 1.8213629709622916e-05, + "loss": 1.3187, + "step": 36162 + }, + { + "epoch": 0.43, + "grad_norm": 8.846626385978578, + "learning_rate": 1.8213296477019076e-05, + "loss": 1.5654, + "step": 36165 + }, + { + "epoch": 0.43, + "grad_norm": 45.72221623413151, + "learning_rate": 1.821296321638621e-05, + "loss": 1.3809, + "step": 36168 + }, + { + "epoch": 0.43, + "grad_norm": 17.271734990594126, + "learning_rate": 1.821262992772546e-05, + "loss": 1.3877, + "step": 36171 + }, + { + "epoch": 0.43, + "grad_norm": 113.7943738323254, + "learning_rate": 1.8212296611037965e-05, + "loss": 1.4797, + "step": 36174 + }, + { + "epoch": 0.44, + "grad_norm": 59.49630485059742, + "learning_rate": 1.8211963266324858e-05, + "loss": 1.6713, + "step": 36177 + }, + { + "epoch": 0.44, + "grad_norm": 27.929176785711757, + "learning_rate": 1.8211629893587277e-05, + "loss": 1.8097, + "step": 36180 + }, + { + "epoch": 0.44, + "grad_norm": 21.816988447843205, + "learning_rate": 1.8211296492826363e-05, + "loss": 1.881, + "step": 36183 + }, + { + "epoch": 0.44, + "grad_norm": 46.50856178278398, + "learning_rate": 1.821096306404325e-05, + "loss": 1.3689, + "step": 36186 + }, + { + "epoch": 0.44, + "grad_norm": 24.262290084292424, + "learning_rate": 1.821062960723908e-05, + "loss": 1.337, + "step": 36189 + }, + { + "epoch": 0.44, + "grad_norm": 29.261874179648757, + "learning_rate": 1.8210296122414987e-05, + "loss": 1.8024, + "step": 36192 + }, + { + "epoch": 0.44, + "grad_norm": 15.567359521701722, + "learning_rate": 1.8209962609572112e-05, + "loss": 1.6724, + "step": 36195 + }, + { + "epoch": 0.44, + "grad_norm": 16.0108273240325, + "learning_rate": 1.820962906871159e-05, + "loss": 1.2679, + "step": 36198 + }, + { + "epoch": 0.44, + "grad_norm": 13.672556811365956, + "learning_rate": 1.820929549983456e-05, + "loss": 1.7413, + "step": 36201 + }, + { + "epoch": 0.44, + "grad_norm": 10.336757543268845, + "learning_rate": 1.8208961902942165e-05, + "loss": 1.4141, + "step": 36204 + }, + { + "epoch": 0.44, + "grad_norm": 2.6937981905401682, + "learning_rate": 1.8208628278035536e-05, + "loss": 1.5117, + "step": 36207 + }, + { + "epoch": 0.44, + "grad_norm": 24.685471168988574, + "learning_rate": 1.8208294625115817e-05, + "loss": 1.5098, + "step": 36210 + }, + { + "epoch": 0.44, + "grad_norm": 17.640127727450945, + "learning_rate": 1.8207960944184146e-05, + "loss": 1.4893, + "step": 36213 + }, + { + "epoch": 0.44, + "grad_norm": 16.268356040981306, + "learning_rate": 1.820762723524166e-05, + "loss": 1.7036, + "step": 36216 + }, + { + "epoch": 0.44, + "grad_norm": 40.6494634879815, + "learning_rate": 1.82072934982895e-05, + "loss": 1.539, + "step": 36219 + }, + { + "epoch": 0.44, + "grad_norm": 13.218906480873919, + "learning_rate": 1.8206959733328804e-05, + "loss": 1.3647, + "step": 36222 + }, + { + "epoch": 0.44, + "grad_norm": 10.984550699849079, + "learning_rate": 1.8206625940360706e-05, + "loss": 1.5334, + "step": 36225 + }, + { + "epoch": 0.44, + "grad_norm": 16.330387578119517, + "learning_rate": 1.8206292119386357e-05, + "loss": 1.4184, + "step": 36228 + }, + { + "epoch": 0.44, + "grad_norm": 13.864929938991123, + "learning_rate": 1.820595827040688e-05, + "loss": 1.4859, + "step": 36231 + }, + { + "epoch": 0.44, + "grad_norm": 110.87865569117359, + "learning_rate": 1.820562439342343e-05, + "loss": 1.6246, + "step": 36234 + }, + { + "epoch": 0.44, + "grad_norm": 9.29994310870411, + "learning_rate": 1.8205290488437137e-05, + "loss": 1.3301, + "step": 36237 + }, + { + "epoch": 0.44, + "grad_norm": 53.34748237874527, + "learning_rate": 1.8204956555449142e-05, + "loss": 1.8042, + "step": 36240 + }, + { + "epoch": 0.44, + "grad_norm": 6.408092662481237, + "learning_rate": 1.820462259446059e-05, + "loss": 1.4964, + "step": 36243 + }, + { + "epoch": 0.44, + "grad_norm": 28.530682430167072, + "learning_rate": 1.8204288605472613e-05, + "loss": 1.5623, + "step": 36246 + }, + { + "epoch": 0.44, + "grad_norm": 21.144312781794117, + "learning_rate": 1.8203954588486353e-05, + "loss": 1.3557, + "step": 36249 + }, + { + "epoch": 0.44, + "grad_norm": 10.2543699815202, + "learning_rate": 1.8203620543502954e-05, + "loss": 1.801, + "step": 36252 + }, + { + "epoch": 0.44, + "grad_norm": 16.94787759076332, + "learning_rate": 1.820328647052355e-05, + "loss": 1.571, + "step": 36255 + }, + { + "epoch": 0.44, + "grad_norm": 6.442813828859475, + "learning_rate": 1.8202952369549284e-05, + "loss": 1.6393, + "step": 36258 + }, + { + "epoch": 0.44, + "grad_norm": 16.734358150753156, + "learning_rate": 1.82026182405813e-05, + "loss": 1.3652, + "step": 36261 + }, + { + "epoch": 0.44, + "grad_norm": 8.324595581594322, + "learning_rate": 1.8202284083620732e-05, + "loss": 1.5194, + "step": 36264 + }, + { + "epoch": 0.44, + "grad_norm": 11.104191879449234, + "learning_rate": 1.8201949898668726e-05, + "loss": 1.7159, + "step": 36267 + }, + { + "epoch": 0.44, + "grad_norm": 11.61883426670377, + "learning_rate": 1.820161568572642e-05, + "loss": 1.7724, + "step": 36270 + }, + { + "epoch": 0.44, + "grad_norm": 39.96273993530174, + "learning_rate": 1.8201281444794947e-05, + "loss": 1.3129, + "step": 36273 + }, + { + "epoch": 0.44, + "grad_norm": 12.93451455857349, + "learning_rate": 1.8200947175875457e-05, + "loss": 1.5046, + "step": 36276 + }, + { + "epoch": 0.44, + "grad_norm": 4.9916485851311805, + "learning_rate": 1.820061287896909e-05, + "loss": 1.6466, + "step": 36279 + }, + { + "epoch": 0.44, + "grad_norm": 126.79009498131856, + "learning_rate": 1.8200278554076986e-05, + "loss": 1.2802, + "step": 36282 + }, + { + "epoch": 0.44, + "grad_norm": 11.216244784407984, + "learning_rate": 1.819994420120028e-05, + "loss": 1.3596, + "step": 36285 + }, + { + "epoch": 0.44, + "grad_norm": 24.551580202220855, + "learning_rate": 1.8199609820340123e-05, + "loss": 1.2957, + "step": 36288 + }, + { + "epoch": 0.44, + "grad_norm": 46.04982054860787, + "learning_rate": 1.819927541149765e-05, + "loss": 1.586, + "step": 36291 + }, + { + "epoch": 0.44, + "grad_norm": 11.020532683753594, + "learning_rate": 1.8198940974674e-05, + "loss": 1.7329, + "step": 36294 + }, + { + "epoch": 0.44, + "grad_norm": 10.680238467048591, + "learning_rate": 1.8198606509870322e-05, + "loss": 1.558, + "step": 36297 + }, + { + "epoch": 0.44, + "grad_norm": 39.30397145830907, + "learning_rate": 1.8198272017087756e-05, + "loss": 1.7188, + "step": 36300 + }, + { + "epoch": 0.44, + "grad_norm": 30.867700436767166, + "learning_rate": 1.8197937496327434e-05, + "loss": 1.559, + "step": 36303 + }, + { + "epoch": 0.44, + "grad_norm": 22.59231009250233, + "learning_rate": 1.8197602947590505e-05, + "loss": 1.4789, + "step": 36306 + }, + { + "epoch": 0.44, + "grad_norm": 16.13447876864834, + "learning_rate": 1.8197268370878114e-05, + "loss": 1.2839, + "step": 36309 + }, + { + "epoch": 0.44, + "grad_norm": 32.35375415452862, + "learning_rate": 1.8196933766191397e-05, + "loss": 1.6222, + "step": 36312 + }, + { + "epoch": 0.44, + "grad_norm": 10.000649306340517, + "learning_rate": 1.8196599133531497e-05, + "loss": 1.1339, + "step": 36315 + }, + { + "epoch": 0.44, + "grad_norm": 11.877669396470054, + "learning_rate": 1.8196264472899554e-05, + "loss": 1.2608, + "step": 36318 + }, + { + "epoch": 0.44, + "grad_norm": 14.129707077667632, + "learning_rate": 1.819592978429672e-05, + "loss": 1.4545, + "step": 36321 + }, + { + "epoch": 0.44, + "grad_norm": 16.7834432071662, + "learning_rate": 1.819559506772412e-05, + "loss": 1.5166, + "step": 36324 + }, + { + "epoch": 0.44, + "grad_norm": 2.58392467900853, + "learning_rate": 1.8195260323182913e-05, + "loss": 1.3035, + "step": 36327 + }, + { + "epoch": 0.44, + "grad_norm": 23.552670096572573, + "learning_rate": 1.819492555067423e-05, + "loss": 1.6567, + "step": 36330 + }, + { + "epoch": 0.44, + "grad_norm": 5.106011440758325, + "learning_rate": 1.8194590750199222e-05, + "loss": 1.5588, + "step": 36333 + }, + { + "epoch": 0.44, + "grad_norm": 32.55247042191929, + "learning_rate": 1.8194255921759024e-05, + "loss": 1.4882, + "step": 36336 + }, + { + "epoch": 0.44, + "grad_norm": 7.4304778965006975, + "learning_rate": 1.8193921065354785e-05, + "loss": 1.5866, + "step": 36339 + }, + { + "epoch": 0.44, + "grad_norm": 25.75758495665263, + "learning_rate": 1.8193586180987647e-05, + "loss": 1.7438, + "step": 36342 + }, + { + "epoch": 0.44, + "grad_norm": 9.840464495162301, + "learning_rate": 1.8193251268658746e-05, + "loss": 1.3804, + "step": 36345 + }, + { + "epoch": 0.44, + "grad_norm": 34.15072929231481, + "learning_rate": 1.819291632836923e-05, + "loss": 1.5977, + "step": 36348 + }, + { + "epoch": 0.44, + "grad_norm": 14.058588713101646, + "learning_rate": 1.8192581360120243e-05, + "loss": 1.4761, + "step": 36351 + }, + { + "epoch": 0.44, + "grad_norm": 5.734441059755449, + "learning_rate": 1.8192246363912928e-05, + "loss": 1.3626, + "step": 36354 + }, + { + "epoch": 0.44, + "grad_norm": 22.02752557971243, + "learning_rate": 1.8191911339748426e-05, + "loss": 1.9068, + "step": 36357 + }, + { + "epoch": 0.44, + "grad_norm": 6.584168973155817, + "learning_rate": 1.819157628762788e-05, + "loss": 1.4669, + "step": 36360 + }, + { + "epoch": 0.44, + "grad_norm": 7.423070985490367, + "learning_rate": 1.8191241207552435e-05, + "loss": 1.4845, + "step": 36363 + }, + { + "epoch": 0.44, + "grad_norm": 14.304815980340619, + "learning_rate": 1.8190906099523233e-05, + "loss": 1.2111, + "step": 36366 + }, + { + "epoch": 0.44, + "grad_norm": 19.21088755255954, + "learning_rate": 1.819057096354142e-05, + "loss": 1.4757, + "step": 36369 + }, + { + "epoch": 0.44, + "grad_norm": 28.760514961768585, + "learning_rate": 1.819023579960814e-05, + "loss": 1.4141, + "step": 36372 + }, + { + "epoch": 0.44, + "grad_norm": 9.458547164553574, + "learning_rate": 1.8189900607724537e-05, + "loss": 1.5014, + "step": 36375 + }, + { + "epoch": 0.44, + "grad_norm": 3.7293919135796125, + "learning_rate": 1.818956538789175e-05, + "loss": 1.0653, + "step": 36378 + }, + { + "epoch": 0.44, + "grad_norm": 14.920571357826372, + "learning_rate": 1.8189230140110926e-05, + "loss": 1.3902, + "step": 36381 + }, + { + "epoch": 0.44, + "grad_norm": 36.27232715326804, + "learning_rate": 1.818889486438321e-05, + "loss": 1.3345, + "step": 36384 + }, + { + "epoch": 0.44, + "grad_norm": 15.625104107843319, + "learning_rate": 1.8188559560709748e-05, + "loss": 1.8524, + "step": 36387 + }, + { + "epoch": 0.44, + "grad_norm": 19.909532902435142, + "learning_rate": 1.8188224229091677e-05, + "loss": 1.3038, + "step": 36390 + }, + { + "epoch": 0.44, + "grad_norm": 35.245668486992294, + "learning_rate": 1.818788886953015e-05, + "loss": 1.5046, + "step": 36393 + }, + { + "epoch": 0.44, + "grad_norm": 659.1465540119066, + "learning_rate": 1.8187553482026304e-05, + "loss": 1.5474, + "step": 36396 + }, + { + "epoch": 0.44, + "grad_norm": 6.284197589498017, + "learning_rate": 1.818721806658129e-05, + "loss": 1.691, + "step": 36399 + }, + { + "epoch": 0.44, + "grad_norm": 17.12306018244889, + "learning_rate": 1.8186882623196245e-05, + "loss": 1.6554, + "step": 36402 + }, + { + "epoch": 0.44, + "grad_norm": 6.519065022840518, + "learning_rate": 1.8186547151872324e-05, + "loss": 1.4767, + "step": 36405 + }, + { + "epoch": 0.44, + "grad_norm": 43.86270950909992, + "learning_rate": 1.8186211652610665e-05, + "loss": 1.3835, + "step": 36408 + }, + { + "epoch": 0.44, + "grad_norm": 15.701156950403204, + "learning_rate": 1.8185876125412412e-05, + "loss": 1.4885, + "step": 36411 + }, + { + "epoch": 0.44, + "grad_norm": 68.40638650647516, + "learning_rate": 1.8185540570278718e-05, + "loss": 1.6163, + "step": 36414 + }, + { + "epoch": 0.44, + "grad_norm": 14.070296525507288, + "learning_rate": 1.8185204987210718e-05, + "loss": 1.2426, + "step": 36417 + }, + { + "epoch": 0.44, + "grad_norm": 25.400547951563293, + "learning_rate": 1.8184869376209563e-05, + "loss": 1.5831, + "step": 36420 + }, + { + "epoch": 0.44, + "grad_norm": 88.5793447618846, + "learning_rate": 1.8184533737276395e-05, + "loss": 1.2572, + "step": 36423 + }, + { + "epoch": 0.44, + "grad_norm": 22.128680242292177, + "learning_rate": 1.818419807041236e-05, + "loss": 1.5333, + "step": 36426 + }, + { + "epoch": 0.44, + "grad_norm": 34.64530761727677, + "learning_rate": 1.818386237561861e-05, + "loss": 1.541, + "step": 36429 + }, + { + "epoch": 0.44, + "grad_norm": 32.26983529282775, + "learning_rate": 1.8183526652896283e-05, + "loss": 1.504, + "step": 36432 + }, + { + "epoch": 0.44, + "grad_norm": 6.031516380855738, + "learning_rate": 1.8183190902246527e-05, + "loss": 1.5562, + "step": 36435 + }, + { + "epoch": 0.44, + "grad_norm": 8.743884857507943, + "learning_rate": 1.818285512367049e-05, + "loss": 1.2486, + "step": 36438 + }, + { + "epoch": 0.44, + "grad_norm": 4.377764407127725, + "learning_rate": 1.8182519317169315e-05, + "loss": 1.5281, + "step": 36441 + }, + { + "epoch": 0.44, + "grad_norm": 12.680232103320764, + "learning_rate": 1.8182183482744148e-05, + "loss": 1.2595, + "step": 36444 + }, + { + "epoch": 0.44, + "grad_norm": 51.636621854838864, + "learning_rate": 1.8181847620396134e-05, + "loss": 1.7918, + "step": 36447 + }, + { + "epoch": 0.44, + "grad_norm": 15.875272323168451, + "learning_rate": 1.8181511730126424e-05, + "loss": 1.3407, + "step": 36450 + }, + { + "epoch": 0.44, + "grad_norm": 28.53182036584865, + "learning_rate": 1.818117581193616e-05, + "loss": 1.4705, + "step": 36453 + }, + { + "epoch": 0.44, + "grad_norm": 5.866486297314127, + "learning_rate": 1.818083986582649e-05, + "loss": 1.66, + "step": 36456 + }, + { + "epoch": 0.44, + "grad_norm": 15.802905874114078, + "learning_rate": 1.8180503891798565e-05, + "loss": 1.0952, + "step": 36459 + }, + { + "epoch": 0.44, + "grad_norm": 10.922990338506935, + "learning_rate": 1.818016788985352e-05, + "loss": 1.59, + "step": 36462 + }, + { + "epoch": 0.44, + "grad_norm": 15.31319666047607, + "learning_rate": 1.817983185999251e-05, + "loss": 1.2932, + "step": 36465 + }, + { + "epoch": 0.44, + "grad_norm": 2.912884292094443, + "learning_rate": 1.8179495802216682e-05, + "loss": 1.5549, + "step": 36468 + }, + { + "epoch": 0.44, + "grad_norm": 8.254788349699165, + "learning_rate": 1.817915971652718e-05, + "loss": 1.2467, + "step": 36471 + }, + { + "epoch": 0.44, + "grad_norm": 22.569023492993892, + "learning_rate": 1.8178823602925156e-05, + "loss": 1.3005, + "step": 36474 + }, + { + "epoch": 0.44, + "grad_norm": 10.82494100553482, + "learning_rate": 1.817848746141175e-05, + "loss": 1.6727, + "step": 36477 + }, + { + "epoch": 0.44, + "grad_norm": 28.39454158982505, + "learning_rate": 1.8178151291988107e-05, + "loss": 2.025, + "step": 36480 + }, + { + "epoch": 0.44, + "grad_norm": 7.904449589592716, + "learning_rate": 1.8177815094655388e-05, + "loss": 1.3771, + "step": 36483 + }, + { + "epoch": 0.44, + "grad_norm": 10.979121410817562, + "learning_rate": 1.8177478869414725e-05, + "loss": 1.3771, + "step": 36486 + }, + { + "epoch": 0.44, + "grad_norm": 19.87127944730781, + "learning_rate": 1.8177142616267275e-05, + "loss": 1.4666, + "step": 36489 + }, + { + "epoch": 0.44, + "grad_norm": 16.913368243795095, + "learning_rate": 1.817680633521418e-05, + "loss": 1.5121, + "step": 36492 + }, + { + "epoch": 0.44, + "grad_norm": 14.875240833797475, + "learning_rate": 1.8176470026256597e-05, + "loss": 1.1777, + "step": 36495 + }, + { + "epoch": 0.44, + "grad_norm": 12.131697108184355, + "learning_rate": 1.8176133689395664e-05, + "loss": 1.7041, + "step": 36498 + }, + { + "epoch": 0.44, + "grad_norm": 11.218994058270198, + "learning_rate": 1.817579732463253e-05, + "loss": 1.6705, + "step": 36501 + }, + { + "epoch": 0.44, + "grad_norm": 5.726601162302586, + "learning_rate": 1.8175460931968346e-05, + "loss": 1.6164, + "step": 36504 + }, + { + "epoch": 0.44, + "grad_norm": 13.942006652704196, + "learning_rate": 1.8175124511404257e-05, + "loss": 1.4754, + "step": 36507 + }, + { + "epoch": 0.44, + "grad_norm": 21.010163940084386, + "learning_rate": 1.8174788062941414e-05, + "loss": 1.5479, + "step": 36510 + }, + { + "epoch": 0.44, + "grad_norm": 16.75764621976912, + "learning_rate": 1.8174451586580964e-05, + "loss": 1.425, + "step": 36513 + }, + { + "epoch": 0.44, + "grad_norm": 5.600269791999797, + "learning_rate": 1.8174115082324054e-05, + "loss": 1.2675, + "step": 36516 + }, + { + "epoch": 0.44, + "grad_norm": 11.763072967494232, + "learning_rate": 1.8173778550171835e-05, + "loss": 1.5749, + "step": 36519 + }, + { + "epoch": 0.44, + "grad_norm": 22.92567901103631, + "learning_rate": 1.8173441990125452e-05, + "loss": 1.4144, + "step": 36522 + }, + { + "epoch": 0.44, + "grad_norm": 5.5714962870026286, + "learning_rate": 1.8173105402186054e-05, + "loss": 1.3347, + "step": 36525 + }, + { + "epoch": 0.44, + "grad_norm": 11.310800329492013, + "learning_rate": 1.8172768786354796e-05, + "loss": 1.612, + "step": 36528 + }, + { + "epoch": 0.44, + "grad_norm": 7.772429743127839, + "learning_rate": 1.817243214263282e-05, + "loss": 1.4289, + "step": 36531 + }, + { + "epoch": 0.44, + "grad_norm": 12.20773792489039, + "learning_rate": 1.8172095471021278e-05, + "loss": 1.224, + "step": 36534 + }, + { + "epoch": 0.44, + "grad_norm": 5.774154672306136, + "learning_rate": 1.8171758771521315e-05, + "loss": 1.3433, + "step": 36537 + }, + { + "epoch": 0.44, + "grad_norm": 9.543273605923227, + "learning_rate": 1.8171422044134083e-05, + "loss": 1.4568, + "step": 36540 + }, + { + "epoch": 0.44, + "grad_norm": 12.462499325125087, + "learning_rate": 1.8171085288860734e-05, + "loss": 1.4205, + "step": 36543 + }, + { + "epoch": 0.44, + "grad_norm": 2.4574016759815955, + "learning_rate": 1.817074850570241e-05, + "loss": 1.1573, + "step": 36546 + }, + { + "epoch": 0.44, + "grad_norm": 6.840571576745925, + "learning_rate": 1.817041169466027e-05, + "loss": 1.4655, + "step": 36549 + }, + { + "epoch": 0.44, + "grad_norm": 17.673734562479904, + "learning_rate": 1.8170074855735455e-05, + "loss": 1.2771, + "step": 36552 + }, + { + "epoch": 0.44, + "grad_norm": 15.982885955680832, + "learning_rate": 1.8169737988929114e-05, + "loss": 1.8013, + "step": 36555 + }, + { + "epoch": 0.44, + "grad_norm": 11.537474383676123, + "learning_rate": 1.8169401094242402e-05, + "loss": 1.5948, + "step": 36558 + }, + { + "epoch": 0.44, + "grad_norm": 23.866886997598513, + "learning_rate": 1.816906417167647e-05, + "loss": 1.5111, + "step": 36561 + }, + { + "epoch": 0.44, + "grad_norm": 17.004828750553294, + "learning_rate": 1.8168727221232458e-05, + "loss": 1.8116, + "step": 36564 + }, + { + "epoch": 0.44, + "grad_norm": 61.65010705469969, + "learning_rate": 1.8168390242911526e-05, + "loss": 1.5358, + "step": 36567 + }, + { + "epoch": 0.44, + "grad_norm": 40.8983080444833, + "learning_rate": 1.8168053236714824e-05, + "loss": 1.4181, + "step": 36570 + }, + { + "epoch": 0.44, + "grad_norm": 5.472909451649276, + "learning_rate": 1.8167716202643494e-05, + "loss": 1.5636, + "step": 36573 + }, + { + "epoch": 0.44, + "grad_norm": 27.812674826200425, + "learning_rate": 1.8167379140698692e-05, + "loss": 1.5313, + "step": 36576 + }, + { + "epoch": 0.44, + "grad_norm": 5.391695400713279, + "learning_rate": 1.8167042050881566e-05, + "loss": 1.5377, + "step": 36579 + }, + { + "epoch": 0.44, + "grad_norm": 4.47587861134384, + "learning_rate": 1.8166704933193265e-05, + "loss": 1.8364, + "step": 36582 + }, + { + "epoch": 0.44, + "grad_norm": 15.530150315763501, + "learning_rate": 1.8166367787634947e-05, + "loss": 1.4008, + "step": 36585 + }, + { + "epoch": 0.44, + "grad_norm": 10.356685624210238, + "learning_rate": 1.8166030614207755e-05, + "loss": 1.4362, + "step": 36588 + }, + { + "epoch": 0.44, + "grad_norm": 30.189173958717035, + "learning_rate": 1.8165693412912843e-05, + "loss": 1.2882, + "step": 36591 + }, + { + "epoch": 0.44, + "grad_norm": 10.9641097536516, + "learning_rate": 1.8165356183751357e-05, + "loss": 1.3062, + "step": 36594 + }, + { + "epoch": 0.44, + "grad_norm": 6.349892093510144, + "learning_rate": 1.8165018926724452e-05, + "loss": 1.8678, + "step": 36597 + }, + { + "epoch": 0.44, + "grad_norm": 3.224604464600971, + "learning_rate": 1.8164681641833284e-05, + "loss": 1.4797, + "step": 36600 + }, + { + "epoch": 0.44, + "grad_norm": 70.92969712317885, + "learning_rate": 1.816434432907899e-05, + "loss": 1.5716, + "step": 36603 + }, + { + "epoch": 0.44, + "grad_norm": 22.848509354532066, + "learning_rate": 1.8164006988462737e-05, + "loss": 1.4757, + "step": 36606 + }, + { + "epoch": 0.44, + "grad_norm": 13.779356102706155, + "learning_rate": 1.8163669619985666e-05, + "loss": 1.4909, + "step": 36609 + }, + { + "epoch": 0.44, + "grad_norm": 15.501656181306394, + "learning_rate": 1.816333222364893e-05, + "loss": 1.4202, + "step": 36612 + }, + { + "epoch": 0.44, + "grad_norm": 214.23848854990464, + "learning_rate": 1.8162994799453683e-05, + "loss": 1.5722, + "step": 36615 + }, + { + "epoch": 0.44, + "grad_norm": 3.9980397390016034, + "learning_rate": 1.8162657347401077e-05, + "loss": 1.1494, + "step": 36618 + }, + { + "epoch": 0.44, + "grad_norm": 39.89966025494812, + "learning_rate": 1.8162319867492258e-05, + "loss": 1.5479, + "step": 36621 + }, + { + "epoch": 0.44, + "grad_norm": 7.0063420943306145, + "learning_rate": 1.816198235972838e-05, + "loss": 1.6174, + "step": 36624 + }, + { + "epoch": 0.44, + "grad_norm": 9.560426881566318, + "learning_rate": 1.8161644824110602e-05, + "loss": 1.4947, + "step": 36627 + }, + { + "epoch": 0.44, + "grad_norm": 26.01766591739905, + "learning_rate": 1.8161307260640065e-05, + "loss": 1.5941, + "step": 36630 + }, + { + "epoch": 0.44, + "grad_norm": 10.436517468785793, + "learning_rate": 1.816096966931793e-05, + "loss": 1.4654, + "step": 36633 + }, + { + "epoch": 0.44, + "grad_norm": 19.450355208369487, + "learning_rate": 1.816063205014534e-05, + "loss": 1.3158, + "step": 36636 + }, + { + "epoch": 0.44, + "grad_norm": 9.579569863860653, + "learning_rate": 1.8160294403123457e-05, + "loss": 1.5311, + "step": 36639 + }, + { + "epoch": 0.44, + "grad_norm": 6.4791844916046495, + "learning_rate": 1.8159956728253428e-05, + "loss": 1.6616, + "step": 36642 + }, + { + "epoch": 0.44, + "grad_norm": 25.877883248020318, + "learning_rate": 1.8159619025536404e-05, + "loss": 1.7019, + "step": 36645 + }, + { + "epoch": 0.44, + "grad_norm": 38.27003769993117, + "learning_rate": 1.8159281294973537e-05, + "loss": 1.4974, + "step": 36648 + }, + { + "epoch": 0.44, + "grad_norm": 8.058931413282364, + "learning_rate": 1.8158943536565988e-05, + "loss": 1.7121, + "step": 36651 + }, + { + "epoch": 0.44, + "grad_norm": 5.180309332459702, + "learning_rate": 1.81586057503149e-05, + "loss": 1.333, + "step": 36654 + }, + { + "epoch": 0.44, + "grad_norm": 19.4608698784399, + "learning_rate": 1.815826793622143e-05, + "loss": 1.2927, + "step": 36657 + }, + { + "epoch": 0.44, + "grad_norm": 34.69856329215747, + "learning_rate": 1.815793009428673e-05, + "loss": 1.6144, + "step": 36660 + }, + { + "epoch": 0.44, + "grad_norm": 13.881638515294199, + "learning_rate": 1.8157592224511953e-05, + "loss": 1.1295, + "step": 36663 + }, + { + "epoch": 0.44, + "grad_norm": 4.459729973876549, + "learning_rate": 1.815725432689825e-05, + "loss": 1.3919, + "step": 36666 + }, + { + "epoch": 0.44, + "grad_norm": 7.732600845559235, + "learning_rate": 1.815691640144678e-05, + "loss": 1.3728, + "step": 36669 + }, + { + "epoch": 0.44, + "grad_norm": 9.541204975345263, + "learning_rate": 1.815657844815869e-05, + "loss": 1.5513, + "step": 36672 + }, + { + "epoch": 0.44, + "grad_norm": 66.65032338987407, + "learning_rate": 1.815624046703514e-05, + "loss": 1.2408, + "step": 36675 + }, + { + "epoch": 0.44, + "grad_norm": 32.1303516262483, + "learning_rate": 1.815590245807727e-05, + "loss": 1.53, + "step": 36678 + }, + { + "epoch": 0.44, + "grad_norm": 8.977190542361775, + "learning_rate": 1.815556442128625e-05, + "loss": 1.7112, + "step": 36681 + }, + { + "epoch": 0.44, + "grad_norm": 4.228873124042907, + "learning_rate": 1.8155226356663226e-05, + "loss": 1.4234, + "step": 36684 + }, + { + "epoch": 0.44, + "grad_norm": 7.475728949037891, + "learning_rate": 1.8154888264209348e-05, + "loss": 1.7582, + "step": 36687 + }, + { + "epoch": 0.44, + "grad_norm": 3.6878001788318437, + "learning_rate": 1.8154550143925776e-05, + "loss": 1.1536, + "step": 36690 + }, + { + "epoch": 0.44, + "grad_norm": 6.340164201311896, + "learning_rate": 1.815421199581366e-05, + "loss": 1.5555, + "step": 36693 + }, + { + "epoch": 0.44, + "grad_norm": 40.87276064553237, + "learning_rate": 1.815387381987416e-05, + "loss": 1.6388, + "step": 36696 + }, + { + "epoch": 0.44, + "grad_norm": 4.197289530050098, + "learning_rate": 1.8153535616108426e-05, + "loss": 1.3643, + "step": 36699 + }, + { + "epoch": 0.44, + "grad_norm": 7.330288293656525, + "learning_rate": 1.8153197384517606e-05, + "loss": 1.1753, + "step": 36702 + }, + { + "epoch": 0.44, + "grad_norm": 54.17620773642976, + "learning_rate": 1.8152859125102865e-05, + "loss": 1.6536, + "step": 36705 + }, + { + "epoch": 0.44, + "grad_norm": 12.99500709720885, + "learning_rate": 1.8152520837865348e-05, + "loss": 1.246, + "step": 36708 + }, + { + "epoch": 0.44, + "grad_norm": 13.461806241151733, + "learning_rate": 1.8152182522806217e-05, + "loss": 1.1981, + "step": 36711 + }, + { + "epoch": 0.44, + "grad_norm": 17.03971518430603, + "learning_rate": 1.8151844179926622e-05, + "loss": 1.2382, + "step": 36714 + }, + { + "epoch": 0.44, + "grad_norm": 26.963867143258298, + "learning_rate": 1.815150580922772e-05, + "loss": 1.3728, + "step": 36717 + }, + { + "epoch": 0.44, + "grad_norm": 30.669879712944624, + "learning_rate": 1.8151167410710663e-05, + "loss": 1.1238, + "step": 36720 + }, + { + "epoch": 0.44, + "grad_norm": 3.8037514745963854, + "learning_rate": 1.815082898437661e-05, + "loss": 1.6613, + "step": 36723 + }, + { + "epoch": 0.44, + "grad_norm": 18.245740135206454, + "learning_rate": 1.8150490530226715e-05, + "loss": 1.558, + "step": 36726 + }, + { + "epoch": 0.44, + "grad_norm": 3.0142190920233323, + "learning_rate": 1.8150152048262128e-05, + "loss": 1.5696, + "step": 36729 + }, + { + "epoch": 0.44, + "grad_norm": 17.422068793278466, + "learning_rate": 1.8149813538484007e-05, + "loss": 1.4145, + "step": 36732 + }, + { + "epoch": 0.44, + "grad_norm": 39.28797895247006, + "learning_rate": 1.8149475000893513e-05, + "loss": 1.3297, + "step": 36735 + }, + { + "epoch": 0.44, + "grad_norm": 32.70360788221101, + "learning_rate": 1.8149136435491792e-05, + "loss": 1.1674, + "step": 36738 + }, + { + "epoch": 0.44, + "grad_norm": 19.56142870233753, + "learning_rate": 1.8148797842280005e-05, + "loss": 1.435, + "step": 36741 + }, + { + "epoch": 0.44, + "grad_norm": 18.87593132606729, + "learning_rate": 1.8148459221259306e-05, + "loss": 1.6669, + "step": 36744 + }, + { + "epoch": 0.44, + "grad_norm": 38.2728588241165, + "learning_rate": 1.814812057243085e-05, + "loss": 1.2963, + "step": 36747 + }, + { + "epoch": 0.44, + "grad_norm": 8.789138515104588, + "learning_rate": 1.8147781895795798e-05, + "loss": 1.5554, + "step": 36750 + }, + { + "epoch": 0.44, + "grad_norm": 6.731016645058292, + "learning_rate": 1.81474431913553e-05, + "loss": 1.3161, + "step": 36753 + }, + { + "epoch": 0.44, + "grad_norm": 14.263746297870162, + "learning_rate": 1.8147104459110508e-05, + "loss": 1.2486, + "step": 36756 + }, + { + "epoch": 0.44, + "grad_norm": 7.549639697155771, + "learning_rate": 1.8146765699062585e-05, + "loss": 1.5549, + "step": 36759 + }, + { + "epoch": 0.44, + "grad_norm": 13.07678994642672, + "learning_rate": 1.8146426911212685e-05, + "loss": 0.9641, + "step": 36762 + }, + { + "epoch": 0.44, + "grad_norm": 7.330491352240842, + "learning_rate": 1.8146088095561965e-05, + "loss": 1.5225, + "step": 36765 + }, + { + "epoch": 0.44, + "grad_norm": 4.4176097956718845, + "learning_rate": 1.8145749252111583e-05, + "loss": 0.9514, + "step": 36768 + }, + { + "epoch": 0.44, + "grad_norm": 8.661020628702708, + "learning_rate": 1.8145410380862688e-05, + "loss": 1.4834, + "step": 36771 + }, + { + "epoch": 0.44, + "grad_norm": 10.474362776924602, + "learning_rate": 1.8145071481816444e-05, + "loss": 1.6248, + "step": 36774 + }, + { + "epoch": 0.44, + "grad_norm": 25.141282396473642, + "learning_rate": 1.814473255497401e-05, + "loss": 1.3994, + "step": 36777 + }, + { + "epoch": 0.44, + "grad_norm": 7.288611033475114, + "learning_rate": 1.814439360033653e-05, + "loss": 1.3892, + "step": 36780 + }, + { + "epoch": 0.44, + "grad_norm": 4.395562364620954, + "learning_rate": 1.814405461790517e-05, + "loss": 1.3765, + "step": 36783 + }, + { + "epoch": 0.44, + "grad_norm": 18.52303412764826, + "learning_rate": 1.8143715607681086e-05, + "loss": 1.289, + "step": 36786 + }, + { + "epoch": 0.44, + "grad_norm": 9.585381783613661, + "learning_rate": 1.8143376569665433e-05, + "loss": 1.243, + "step": 36789 + }, + { + "epoch": 0.44, + "grad_norm": 4.4566412610373165, + "learning_rate": 1.8143037503859368e-05, + "loss": 1.3003, + "step": 36792 + }, + { + "epoch": 0.44, + "grad_norm": 12.427530485980686, + "learning_rate": 1.814269841026405e-05, + "loss": 1.5593, + "step": 36795 + }, + { + "epoch": 0.44, + "grad_norm": 8.8694957462838, + "learning_rate": 1.8142359288880636e-05, + "loss": 1.5497, + "step": 36798 + }, + { + "epoch": 0.44, + "grad_norm": 10.4973606915878, + "learning_rate": 1.8142020139710283e-05, + "loss": 1.2131, + "step": 36801 + }, + { + "epoch": 0.44, + "grad_norm": 13.759862393734101, + "learning_rate": 1.814168096275415e-05, + "loss": 1.4581, + "step": 36804 + }, + { + "epoch": 0.44, + "grad_norm": 6.406367838435471, + "learning_rate": 1.814134175801339e-05, + "loss": 1.3182, + "step": 36807 + }, + { + "epoch": 0.44, + "grad_norm": 28.070019686491, + "learning_rate": 1.814100252548916e-05, + "loss": 1.672, + "step": 36810 + }, + { + "epoch": 0.44, + "grad_norm": 66.79171790618075, + "learning_rate": 1.8140663265182627e-05, + "loss": 1.5586, + "step": 36813 + }, + { + "epoch": 0.44, + "grad_norm": 13.19841201350976, + "learning_rate": 1.8140323977094937e-05, + "loss": 1.5843, + "step": 36816 + }, + { + "epoch": 0.44, + "grad_norm": 5.164267721275436, + "learning_rate": 1.8139984661227257e-05, + "loss": 1.2465, + "step": 36819 + }, + { + "epoch": 0.44, + "grad_norm": 5.401862405628717, + "learning_rate": 1.813964531758074e-05, + "loss": 1.5517, + "step": 36822 + }, + { + "epoch": 0.44, + "grad_norm": 3.723121503026601, + "learning_rate": 1.8139305946156546e-05, + "loss": 1.7801, + "step": 36825 + }, + { + "epoch": 0.44, + "grad_norm": 110.2187225750329, + "learning_rate": 1.8138966546955833e-05, + "loss": 1.5313, + "step": 36828 + }, + { + "epoch": 0.44, + "grad_norm": 10.812221883301381, + "learning_rate": 1.8138627119979757e-05, + "loss": 0.9196, + "step": 36831 + }, + { + "epoch": 0.44, + "grad_norm": 22.869124689150723, + "learning_rate": 1.8138287665229478e-05, + "loss": 1.4293, + "step": 36834 + }, + { + "epoch": 0.44, + "grad_norm": 15.419165512151707, + "learning_rate": 1.8137948182706155e-05, + "loss": 1.6962, + "step": 36837 + }, + { + "epoch": 0.44, + "grad_norm": 4.556925971657864, + "learning_rate": 1.8137608672410948e-05, + "loss": 1.2747, + "step": 36840 + }, + { + "epoch": 0.44, + "grad_norm": 14.700439326544304, + "learning_rate": 1.8137269134345014e-05, + "loss": 1.2846, + "step": 36843 + }, + { + "epoch": 0.44, + "grad_norm": 13.983259160819467, + "learning_rate": 1.8136929568509512e-05, + "loss": 1.3865, + "step": 36846 + }, + { + "epoch": 0.44, + "grad_norm": 21.421907243501366, + "learning_rate": 1.8136589974905596e-05, + "loss": 1.5554, + "step": 36849 + }, + { + "epoch": 0.44, + "grad_norm": 17.074122685906683, + "learning_rate": 1.813625035353443e-05, + "loss": 1.333, + "step": 36852 + }, + { + "epoch": 0.44, + "grad_norm": 9.266413039918598, + "learning_rate": 1.8135910704397174e-05, + "loss": 1.5989, + "step": 36855 + }, + { + "epoch": 0.44, + "grad_norm": 4.51163419169526, + "learning_rate": 1.8135571027494988e-05, + "loss": 1.4182, + "step": 36858 + }, + { + "epoch": 0.44, + "grad_norm": 31.40879104535133, + "learning_rate": 1.8135231322829027e-05, + "loss": 1.3284, + "step": 36861 + }, + { + "epoch": 0.44, + "grad_norm": 37.8614056223197, + "learning_rate": 1.8134891590400447e-05, + "loss": 1.534, + "step": 36864 + }, + { + "epoch": 0.44, + "grad_norm": 11.868800304036297, + "learning_rate": 1.813455183021042e-05, + "loss": 1.3609, + "step": 36867 + }, + { + "epoch": 0.44, + "grad_norm": 8.235121442038555, + "learning_rate": 1.813421204226009e-05, + "loss": 1.4148, + "step": 36870 + }, + { + "epoch": 0.44, + "grad_norm": 26.778183371052926, + "learning_rate": 1.8133872226550626e-05, + "loss": 1.8341, + "step": 36873 + }, + { + "epoch": 0.44, + "grad_norm": 32.96825973339245, + "learning_rate": 1.813353238308319e-05, + "loss": 1.5837, + "step": 36876 + }, + { + "epoch": 0.44, + "grad_norm": 58.53787240927539, + "learning_rate": 1.8133192511858936e-05, + "loss": 1.5016, + "step": 36879 + }, + { + "epoch": 0.44, + "grad_norm": 6.080008728677763, + "learning_rate": 1.8132852612879023e-05, + "loss": 1.4771, + "step": 36882 + }, + { + "epoch": 0.44, + "grad_norm": 5.898847790251561, + "learning_rate": 1.8132512686144615e-05, + "loss": 1.409, + "step": 36885 + }, + { + "epoch": 0.44, + "grad_norm": 12.255154704879399, + "learning_rate": 1.813217273165687e-05, + "loss": 1.5459, + "step": 36888 + }, + { + "epoch": 0.44, + "grad_norm": 23.262175103562043, + "learning_rate": 1.813183274941695e-05, + "loss": 1.0192, + "step": 36891 + }, + { + "epoch": 0.44, + "grad_norm": 16.909470175236425, + "learning_rate": 1.8131492739426013e-05, + "loss": 1.817, + "step": 36894 + }, + { + "epoch": 0.44, + "grad_norm": 35.10129248281224, + "learning_rate": 1.813115270168522e-05, + "loss": 1.4694, + "step": 36897 + }, + { + "epoch": 0.44, + "grad_norm": 16.64659338293902, + "learning_rate": 1.8130812636195732e-05, + "loss": 1.9017, + "step": 36900 + }, + { + "epoch": 0.44, + "grad_norm": 16.892309503891404, + "learning_rate": 1.813047254295871e-05, + "loss": 1.6225, + "step": 36903 + }, + { + "epoch": 0.44, + "grad_norm": 13.705057561483908, + "learning_rate": 1.8130132421975313e-05, + "loss": 1.5174, + "step": 36906 + }, + { + "epoch": 0.44, + "grad_norm": 27.335664190145422, + "learning_rate": 1.81297922732467e-05, + "loss": 1.4058, + "step": 36909 + }, + { + "epoch": 0.44, + "grad_norm": 14.711639894320866, + "learning_rate": 1.8129452096774038e-05, + "loss": 1.4409, + "step": 36912 + }, + { + "epoch": 0.44, + "grad_norm": 3.0827007100798802, + "learning_rate": 1.8129111892558484e-05, + "loss": 1.1678, + "step": 36915 + }, + { + "epoch": 0.44, + "grad_norm": 3.7056169227743117, + "learning_rate": 1.8128771660601198e-05, + "loss": 1.6034, + "step": 36918 + }, + { + "epoch": 0.44, + "grad_norm": 4.198364578516057, + "learning_rate": 1.8128431400903342e-05, + "loss": 1.494, + "step": 36921 + }, + { + "epoch": 0.44, + "grad_norm": 3.072145462434179, + "learning_rate": 1.8128091113466076e-05, + "loss": 1.1815, + "step": 36924 + }, + { + "epoch": 0.44, + "grad_norm": 41.56166049181919, + "learning_rate": 1.8127750798290568e-05, + "loss": 1.4991, + "step": 36927 + }, + { + "epoch": 0.44, + "grad_norm": 24.80197628315737, + "learning_rate": 1.8127410455377973e-05, + "loss": 1.6296, + "step": 36930 + }, + { + "epoch": 0.44, + "grad_norm": 35.91808534801556, + "learning_rate": 1.8127070084729447e-05, + "loss": 1.4286, + "step": 36933 + }, + { + "epoch": 0.44, + "grad_norm": 13.354725027239226, + "learning_rate": 1.8126729686346162e-05, + "loss": 1.8318, + "step": 36936 + }, + { + "epoch": 0.44, + "grad_norm": 18.220174625076673, + "learning_rate": 1.8126389260229277e-05, + "loss": 1.6812, + "step": 36939 + }, + { + "epoch": 0.44, + "grad_norm": 33.10293499469351, + "learning_rate": 1.8126048806379954e-05, + "loss": 1.437, + "step": 36942 + }, + { + "epoch": 0.44, + "grad_norm": 16.121259698590826, + "learning_rate": 1.8125708324799348e-05, + "loss": 1.2303, + "step": 36945 + }, + { + "epoch": 0.44, + "grad_norm": 20.017707103623117, + "learning_rate": 1.812536781548863e-05, + "loss": 1.7422, + "step": 36948 + }, + { + "epoch": 0.44, + "grad_norm": 38.52912155749528, + "learning_rate": 1.812502727844896e-05, + "loss": 1.7234, + "step": 36951 + }, + { + "epoch": 0.44, + "grad_norm": 10.08885816909147, + "learning_rate": 1.8124686713681495e-05, + "loss": 1.6088, + "step": 36954 + }, + { + "epoch": 0.44, + "grad_norm": 17.48611119767598, + "learning_rate": 1.8124346121187404e-05, + "loss": 1.6818, + "step": 36957 + }, + { + "epoch": 0.44, + "grad_norm": 22.69305905172807, + "learning_rate": 1.8124005500967847e-05, + "loss": 1.6949, + "step": 36960 + }, + { + "epoch": 0.44, + "grad_norm": 24.606888398341653, + "learning_rate": 1.812366485302398e-05, + "loss": 1.6245, + "step": 36963 + }, + { + "epoch": 0.44, + "grad_norm": 89.55274657744201, + "learning_rate": 1.8123324177356975e-05, + "loss": 1.2313, + "step": 36966 + }, + { + "epoch": 0.44, + "grad_norm": 11.231670189827689, + "learning_rate": 1.812298347396799e-05, + "loss": 1.467, + "step": 36969 + }, + { + "epoch": 0.44, + "grad_norm": 13.586082760623457, + "learning_rate": 1.812264274285819e-05, + "loss": 1.5687, + "step": 36972 + }, + { + "epoch": 0.44, + "grad_norm": 47.36414958284505, + "learning_rate": 1.8122301984028733e-05, + "loss": 1.9977, + "step": 36975 + }, + { + "epoch": 0.44, + "grad_norm": 6.684807752226674, + "learning_rate": 1.8121961197480785e-05, + "loss": 1.2107, + "step": 36978 + }, + { + "epoch": 0.44, + "grad_norm": 16.601671397865115, + "learning_rate": 1.8121620383215507e-05, + "loss": 1.7303, + "step": 36981 + }, + { + "epoch": 0.44, + "grad_norm": 24.948777536872647, + "learning_rate": 1.8121279541234068e-05, + "loss": 1.8229, + "step": 36984 + }, + { + "epoch": 0.44, + "grad_norm": 22.403953248663022, + "learning_rate": 1.8120938671537626e-05, + "loss": 1.4563, + "step": 36987 + }, + { + "epoch": 0.44, + "grad_norm": 13.165605199474893, + "learning_rate": 1.8120597774127347e-05, + "loss": 1.6548, + "step": 36990 + }, + { + "epoch": 0.44, + "grad_norm": 8.44145809606155, + "learning_rate": 1.812025684900439e-05, + "loss": 1.2146, + "step": 36993 + }, + { + "epoch": 0.44, + "grad_norm": 18.54522120548405, + "learning_rate": 1.8119915896169925e-05, + "loss": 1.4125, + "step": 36996 + }, + { + "epoch": 0.44, + "grad_norm": 16.020178005162432, + "learning_rate": 1.811957491562511e-05, + "loss": 1.2537, + "step": 36999 + }, + { + "epoch": 0.44, + "grad_norm": 128.03826019397684, + "learning_rate": 1.811923390737111e-05, + "loss": 1.6853, + "step": 37002 + }, + { + "epoch": 0.44, + "grad_norm": 8.86226691062512, + "learning_rate": 1.8118892871409087e-05, + "loss": 1.5205, + "step": 37005 + }, + { + "epoch": 0.45, + "grad_norm": 44.29210465292885, + "learning_rate": 1.811855180774021e-05, + "loss": 1.5086, + "step": 37008 + }, + { + "epoch": 0.45, + "grad_norm": 21.665700900848687, + "learning_rate": 1.811821071636564e-05, + "loss": 1.2636, + "step": 37011 + }, + { + "epoch": 0.45, + "grad_norm": 45.65752430430163, + "learning_rate": 1.8117869597286536e-05, + "loss": 1.3757, + "step": 37014 + }, + { + "epoch": 0.45, + "grad_norm": 6.555079628215897, + "learning_rate": 1.8117528450504075e-05, + "loss": 1.1639, + "step": 37017 + }, + { + "epoch": 0.45, + "grad_norm": 12.528220206435654, + "learning_rate": 1.811718727601941e-05, + "loss": 1.2743, + "step": 37020 + }, + { + "epoch": 0.45, + "grad_norm": 7.365507284053837, + "learning_rate": 1.811684607383371e-05, + "loss": 1.9342, + "step": 37023 + }, + { + "epoch": 0.45, + "grad_norm": 9.58757885634919, + "learning_rate": 1.8116504843948134e-05, + "loss": 1.8387, + "step": 37026 + }, + { + "epoch": 0.45, + "grad_norm": 17.419388049805367, + "learning_rate": 1.811616358636385e-05, + "loss": 1.3371, + "step": 37029 + }, + { + "epoch": 0.45, + "grad_norm": 73.00553201474156, + "learning_rate": 1.811582230108203e-05, + "loss": 1.3548, + "step": 37032 + }, + { + "epoch": 0.45, + "grad_norm": 39.201560010963526, + "learning_rate": 1.8115480988103823e-05, + "loss": 1.6105, + "step": 37035 + }, + { + "epoch": 0.45, + "grad_norm": 6.123723020550616, + "learning_rate": 1.8115139647430408e-05, + "loss": 1.5068, + "step": 37038 + }, + { + "epoch": 0.45, + "grad_norm": 25.666198700021535, + "learning_rate": 1.8114798279062946e-05, + "loss": 1.5732, + "step": 37041 + }, + { + "epoch": 0.45, + "grad_norm": 26.44415213023265, + "learning_rate": 1.8114456883002595e-05, + "loss": 1.2664, + "step": 37044 + }, + { + "epoch": 0.45, + "grad_norm": 59.75056124139428, + "learning_rate": 1.811411545925053e-05, + "loss": 1.484, + "step": 37047 + }, + { + "epoch": 0.45, + "grad_norm": 137.78685079451665, + "learning_rate": 1.811377400780791e-05, + "loss": 1.4783, + "step": 37050 + }, + { + "epoch": 0.45, + "grad_norm": 9.18208620488406, + "learning_rate": 1.81134325286759e-05, + "loss": 1.4652, + "step": 37053 + }, + { + "epoch": 0.45, + "grad_norm": 3.6053242611355745, + "learning_rate": 1.8113091021855672e-05, + "loss": 1.4601, + "step": 37056 + }, + { + "epoch": 0.45, + "grad_norm": 11.458068588515133, + "learning_rate": 1.8112749487348383e-05, + "loss": 1.4155, + "step": 37059 + }, + { + "epoch": 0.45, + "grad_norm": 22.13399365919058, + "learning_rate": 1.8112407925155205e-05, + "loss": 1.7318, + "step": 37062 + }, + { + "epoch": 0.45, + "grad_norm": 24.383068061793786, + "learning_rate": 1.81120663352773e-05, + "loss": 1.5236, + "step": 37065 + }, + { + "epoch": 0.45, + "grad_norm": 9.565323781133964, + "learning_rate": 1.811172471771583e-05, + "loss": 1.4895, + "step": 37068 + }, + { + "epoch": 0.45, + "grad_norm": 21.427240050029386, + "learning_rate": 1.811138307247197e-05, + "loss": 1.1678, + "step": 37071 + }, + { + "epoch": 0.45, + "grad_norm": 5.380623695478986, + "learning_rate": 1.8111041399546883e-05, + "loss": 1.4186, + "step": 37074 + }, + { + "epoch": 0.45, + "grad_norm": 7.844929714911691, + "learning_rate": 1.811069969894173e-05, + "loss": 1.401, + "step": 37077 + }, + { + "epoch": 0.45, + "grad_norm": 5.844269154198954, + "learning_rate": 1.8110357970657678e-05, + "loss": 1.5923, + "step": 37080 + }, + { + "epoch": 0.45, + "grad_norm": 7.6749911628218435, + "learning_rate": 1.8110016214695898e-05, + "loss": 1.7485, + "step": 37083 + }, + { + "epoch": 0.45, + "grad_norm": 16.836173464424675, + "learning_rate": 1.8109674431057556e-05, + "loss": 1.5632, + "step": 37086 + }, + { + "epoch": 0.45, + "grad_norm": 12.978001495464026, + "learning_rate": 1.8109332619743813e-05, + "loss": 1.4073, + "step": 37089 + }, + { + "epoch": 0.45, + "grad_norm": 10.266061451632329, + "learning_rate": 1.8108990780755843e-05, + "loss": 1.4364, + "step": 37092 + }, + { + "epoch": 0.45, + "grad_norm": 78.65939335018962, + "learning_rate": 1.8108648914094805e-05, + "loss": 1.4883, + "step": 37095 + }, + { + "epoch": 0.45, + "grad_norm": 35.032160361695816, + "learning_rate": 1.8108307019761868e-05, + "loss": 1.7137, + "step": 37098 + }, + { + "epoch": 0.45, + "grad_norm": 6.937849618276469, + "learning_rate": 1.81079650977582e-05, + "loss": 1.519, + "step": 37101 + }, + { + "epoch": 0.45, + "grad_norm": 3.1920554677999466, + "learning_rate": 1.8107623148084973e-05, + "loss": 1.3876, + "step": 37104 + }, + { + "epoch": 0.45, + "grad_norm": 9.913042830829133, + "learning_rate": 1.8107281170743343e-05, + "loss": 1.6085, + "step": 37107 + }, + { + "epoch": 0.45, + "grad_norm": 6.081931130266829, + "learning_rate": 1.8106939165734482e-05, + "loss": 1.4282, + "step": 37110 + }, + { + "epoch": 0.45, + "grad_norm": 11.223784884757105, + "learning_rate": 1.810659713305956e-05, + "loss": 1.3829, + "step": 37113 + }, + { + "epoch": 0.45, + "grad_norm": 13.080596253784247, + "learning_rate": 1.8106255072719744e-05, + "loss": 1.1618, + "step": 37116 + }, + { + "epoch": 0.45, + "grad_norm": 60.14823566037287, + "learning_rate": 1.8105912984716196e-05, + "loss": 1.3808, + "step": 37119 + }, + { + "epoch": 0.45, + "grad_norm": 3.200836749358161, + "learning_rate": 1.8105570869050088e-05, + "loss": 1.378, + "step": 37122 + }, + { + "epoch": 0.45, + "grad_norm": 8.079254089687767, + "learning_rate": 1.8105228725722587e-05, + "loss": 1.4552, + "step": 37125 + }, + { + "epoch": 0.45, + "grad_norm": 7.221146783050232, + "learning_rate": 1.810488655473486e-05, + "loss": 1.5494, + "step": 37128 + }, + { + "epoch": 0.45, + "grad_norm": 12.56425949471802, + "learning_rate": 1.810454435608807e-05, + "loss": 1.3616, + "step": 37131 + }, + { + "epoch": 0.45, + "grad_norm": 11.207430830300515, + "learning_rate": 1.8104202129783392e-05, + "loss": 1.5999, + "step": 37134 + }, + { + "epoch": 0.45, + "grad_norm": 44.787985638972856, + "learning_rate": 1.8103859875821994e-05, + "loss": 1.2087, + "step": 37137 + }, + { + "epoch": 0.45, + "grad_norm": 9.119892107282435, + "learning_rate": 1.8103517594205038e-05, + "loss": 1.4958, + "step": 37140 + }, + { + "epoch": 0.45, + "grad_norm": 44.66496647684091, + "learning_rate": 1.8103175284933696e-05, + "loss": 1.3723, + "step": 37143 + }, + { + "epoch": 0.45, + "grad_norm": 14.715342268655327, + "learning_rate": 1.8102832948009137e-05, + "loss": 1.6542, + "step": 37146 + }, + { + "epoch": 0.45, + "grad_norm": 11.287288774934153, + "learning_rate": 1.8102490583432527e-05, + "loss": 1.4614, + "step": 37149 + }, + { + "epoch": 0.45, + "grad_norm": 11.078736828109587, + "learning_rate": 1.8102148191205035e-05, + "loss": 1.2143, + "step": 37152 + }, + { + "epoch": 0.45, + "grad_norm": 5.192528516416598, + "learning_rate": 1.810180577132783e-05, + "loss": 1.5315, + "step": 37155 + }, + { + "epoch": 0.45, + "grad_norm": 13.243437630212075, + "learning_rate": 1.810146332380208e-05, + "loss": 1.5396, + "step": 37158 + }, + { + "epoch": 0.45, + "grad_norm": 12.845708384025249, + "learning_rate": 1.8101120848628954e-05, + "loss": 1.2458, + "step": 37161 + }, + { + "epoch": 0.45, + "grad_norm": 14.293200935960348, + "learning_rate": 1.810077834580962e-05, + "loss": 1.3218, + "step": 37164 + }, + { + "epoch": 0.45, + "grad_norm": 16.65273224507234, + "learning_rate": 1.8100435815345248e-05, + "loss": 1.8362, + "step": 37167 + }, + { + "epoch": 0.45, + "grad_norm": 9.129903392143312, + "learning_rate": 1.8100093257237005e-05, + "loss": 1.282, + "step": 37170 + }, + { + "epoch": 0.45, + "grad_norm": 11.894110314120761, + "learning_rate": 1.8099750671486062e-05, + "loss": 1.5692, + "step": 37173 + }, + { + "epoch": 0.45, + "grad_norm": 5.912932363222628, + "learning_rate": 1.809940805809359e-05, + "loss": 1.5501, + "step": 37176 + }, + { + "epoch": 0.45, + "grad_norm": 9.017541501789134, + "learning_rate": 1.809906541706075e-05, + "loss": 1.6001, + "step": 37179 + }, + { + "epoch": 0.45, + "grad_norm": 18.646756446367128, + "learning_rate": 1.809872274838872e-05, + "loss": 1.8879, + "step": 37182 + }, + { + "epoch": 0.45, + "grad_norm": 6.642419416653805, + "learning_rate": 1.8098380052078667e-05, + "loss": 1.3647, + "step": 37185 + }, + { + "epoch": 0.45, + "grad_norm": 5.977126026628551, + "learning_rate": 1.809803732813176e-05, + "loss": 1.5302, + "step": 37188 + }, + { + "epoch": 0.45, + "grad_norm": 10.46715620073925, + "learning_rate": 1.809769457654917e-05, + "loss": 1.4872, + "step": 37191 + }, + { + "epoch": 0.45, + "grad_norm": 17.414183840473093, + "learning_rate": 1.8097351797332062e-05, + "loss": 1.6942, + "step": 37194 + }, + { + "epoch": 0.45, + "grad_norm": 10.94537842129707, + "learning_rate": 1.8097008990481614e-05, + "loss": 1.2839, + "step": 37197 + }, + { + "epoch": 0.45, + "grad_norm": 12.914902779453865, + "learning_rate": 1.8096666155998982e-05, + "loss": 1.2878, + "step": 37200 + }, + { + "epoch": 0.45, + "grad_norm": 12.089754913680384, + "learning_rate": 1.8096323293885353e-05, + "loss": 1.5614, + "step": 37203 + }, + { + "epoch": 0.45, + "grad_norm": 12.395156327810923, + "learning_rate": 1.8095980404141885e-05, + "loss": 1.4036, + "step": 37206 + }, + { + "epoch": 0.45, + "grad_norm": 7.357913571378323, + "learning_rate": 1.809563748676975e-05, + "loss": 1.6036, + "step": 37209 + }, + { + "epoch": 0.45, + "grad_norm": 9.993131191185418, + "learning_rate": 1.8095294541770126e-05, + "loss": 1.6605, + "step": 37212 + }, + { + "epoch": 0.45, + "grad_norm": 8.6407457510285, + "learning_rate": 1.8094951569144173e-05, + "loss": 1.5269, + "step": 37215 + }, + { + "epoch": 0.45, + "grad_norm": 8.826780209908051, + "learning_rate": 1.809460856889307e-05, + "loss": 1.3754, + "step": 37218 + }, + { + "epoch": 0.45, + "grad_norm": 11.885562673545568, + "learning_rate": 1.809426554101798e-05, + "loss": 1.3382, + "step": 37221 + }, + { + "epoch": 0.45, + "grad_norm": 13.230536806669425, + "learning_rate": 1.809392248552008e-05, + "loss": 1.5477, + "step": 37224 + }, + { + "epoch": 0.45, + "grad_norm": 8.276783492878785, + "learning_rate": 1.8093579402400537e-05, + "loss": 1.1834, + "step": 37227 + }, + { + "epoch": 0.45, + "grad_norm": 7.798012116385843, + "learning_rate": 1.809323629166052e-05, + "loss": 1.4038, + "step": 37230 + }, + { + "epoch": 0.45, + "grad_norm": 16.387636942828596, + "learning_rate": 1.809289315330121e-05, + "loss": 1.5222, + "step": 37233 + }, + { + "epoch": 0.45, + "grad_norm": 17.016630551503603, + "learning_rate": 1.8092549987323766e-05, + "loss": 1.5886, + "step": 37236 + }, + { + "epoch": 0.45, + "grad_norm": 66.54640792924492, + "learning_rate": 1.809220679372936e-05, + "loss": 1.5873, + "step": 37239 + }, + { + "epoch": 0.45, + "grad_norm": 48.61176835755011, + "learning_rate": 1.8091863572519172e-05, + "loss": 1.3565, + "step": 37242 + }, + { + "epoch": 0.45, + "grad_norm": 50.102389888872985, + "learning_rate": 1.809152032369437e-05, + "loss": 1.7527, + "step": 37245 + }, + { + "epoch": 0.45, + "grad_norm": 16.757174257399416, + "learning_rate": 1.8091177047256118e-05, + "loss": 1.3139, + "step": 37248 + }, + { + "epoch": 0.45, + "grad_norm": 10.138459984111782, + "learning_rate": 1.8090833743205597e-05, + "loss": 1.4862, + "step": 37251 + }, + { + "epoch": 0.45, + "grad_norm": 6.6777238911210794, + "learning_rate": 1.8090490411543973e-05, + "loss": 1.5463, + "step": 37254 + }, + { + "epoch": 0.45, + "grad_norm": 27.918602060076527, + "learning_rate": 1.809014705227242e-05, + "loss": 1.1986, + "step": 37257 + }, + { + "epoch": 0.45, + "grad_norm": 3.291959823220455, + "learning_rate": 1.808980366539211e-05, + "loss": 1.5765, + "step": 37260 + }, + { + "epoch": 0.45, + "grad_norm": 11.22542390401761, + "learning_rate": 1.808946025090421e-05, + "loss": 1.556, + "step": 37263 + }, + { + "epoch": 0.45, + "grad_norm": 15.807726039452827, + "learning_rate": 1.80891168088099e-05, + "loss": 1.1106, + "step": 37266 + }, + { + "epoch": 0.45, + "grad_norm": 15.491433997785714, + "learning_rate": 1.8088773339110347e-05, + "loss": 1.4776, + "step": 37269 + }, + { + "epoch": 0.45, + "grad_norm": 10.54823156854677, + "learning_rate": 1.8088429841806727e-05, + "loss": 1.2296, + "step": 37272 + }, + { + "epoch": 0.45, + "grad_norm": 5.698789425524158, + "learning_rate": 1.8088086316900205e-05, + "loss": 1.5066, + "step": 37275 + }, + { + "epoch": 0.45, + "grad_norm": 4.193832006124515, + "learning_rate": 1.8087742764391962e-05, + "loss": 1.3737, + "step": 37278 + }, + { + "epoch": 0.45, + "grad_norm": 16.089981343289782, + "learning_rate": 1.8087399184283164e-05, + "loss": 1.1932, + "step": 37281 + }, + { + "epoch": 0.45, + "grad_norm": 8.158317871497934, + "learning_rate": 1.8087055576574984e-05, + "loss": 1.5966, + "step": 37284 + }, + { + "epoch": 0.45, + "grad_norm": 27.262525875797397, + "learning_rate": 1.80867119412686e-05, + "loss": 1.5704, + "step": 37287 + }, + { + "epoch": 0.45, + "grad_norm": 18.620935348541433, + "learning_rate": 1.8086368278365177e-05, + "loss": 1.9874, + "step": 37290 + }, + { + "epoch": 0.45, + "grad_norm": 22.896633925236795, + "learning_rate": 1.8086024587865895e-05, + "loss": 1.5112, + "step": 37293 + }, + { + "epoch": 0.45, + "grad_norm": 41.974720129170066, + "learning_rate": 1.808568086977192e-05, + "loss": 1.4785, + "step": 37296 + }, + { + "epoch": 0.45, + "grad_norm": 3.8010546001480807, + "learning_rate": 1.8085337124084434e-05, + "loss": 1.7402, + "step": 37299 + }, + { + "epoch": 0.45, + "grad_norm": 10.344550060290452, + "learning_rate": 1.80849933508046e-05, + "loss": 1.174, + "step": 37302 + }, + { + "epoch": 0.45, + "grad_norm": 11.669345469015626, + "learning_rate": 1.8084649549933593e-05, + "loss": 1.4065, + "step": 37305 + }, + { + "epoch": 0.45, + "grad_norm": 16.518801947513744, + "learning_rate": 1.8084305721472594e-05, + "loss": 1.2301, + "step": 37308 + }, + { + "epoch": 0.45, + "grad_norm": 30.896089362542128, + "learning_rate": 1.8083961865422773e-05, + "loss": 1.2636, + "step": 37311 + }, + { + "epoch": 0.45, + "grad_norm": 11.090787875673273, + "learning_rate": 1.80836179817853e-05, + "loss": 1.2501, + "step": 37314 + }, + { + "epoch": 0.45, + "grad_norm": 35.443323227254915, + "learning_rate": 1.808327407056135e-05, + "loss": 1.6286, + "step": 37317 + }, + { + "epoch": 0.45, + "grad_norm": 20.14713682012308, + "learning_rate": 1.80829301317521e-05, + "loss": 1.3477, + "step": 37320 + }, + { + "epoch": 0.45, + "grad_norm": 31.073050702753438, + "learning_rate": 1.8082586165358714e-05, + "loss": 1.4128, + "step": 37323 + }, + { + "epoch": 0.45, + "grad_norm": 9.209630425288333, + "learning_rate": 1.8082242171382378e-05, + "loss": 1.295, + "step": 37326 + }, + { + "epoch": 0.45, + "grad_norm": 12.938384269068568, + "learning_rate": 1.8081898149824263e-05, + "loss": 1.5223, + "step": 37329 + }, + { + "epoch": 0.45, + "grad_norm": 18.8635908192691, + "learning_rate": 1.8081554100685537e-05, + "loss": 1.4956, + "step": 37332 + }, + { + "epoch": 0.45, + "grad_norm": 24.860323584077644, + "learning_rate": 1.8081210023967376e-05, + "loss": 1.4531, + "step": 37335 + }, + { + "epoch": 0.45, + "grad_norm": 18.014153106548832, + "learning_rate": 1.808086591967096e-05, + "loss": 1.9204, + "step": 37338 + }, + { + "epoch": 0.45, + "grad_norm": 11.365866723413783, + "learning_rate": 1.8080521787797458e-05, + "loss": 1.4808, + "step": 37341 + }, + { + "epoch": 0.45, + "grad_norm": 11.796466356942839, + "learning_rate": 1.8080177628348045e-05, + "loss": 1.2679, + "step": 37344 + }, + { + "epoch": 0.45, + "grad_norm": 11.264468734985481, + "learning_rate": 1.8079833441323898e-05, + "loss": 1.608, + "step": 37347 + }, + { + "epoch": 0.45, + "grad_norm": 14.357274130469108, + "learning_rate": 1.807948922672619e-05, + "loss": 1.4458, + "step": 37350 + }, + { + "epoch": 0.45, + "grad_norm": 17.389071446213357, + "learning_rate": 1.8079144984556093e-05, + "loss": 1.4935, + "step": 37353 + }, + { + "epoch": 0.45, + "grad_norm": 23.20458703284594, + "learning_rate": 1.8078800714814784e-05, + "loss": 1.4162, + "step": 37356 + }, + { + "epoch": 0.45, + "grad_norm": 21.177745268391565, + "learning_rate": 1.807845641750344e-05, + "loss": 1.6461, + "step": 37359 + }, + { + "epoch": 0.45, + "grad_norm": 14.34801506629518, + "learning_rate": 1.807811209262323e-05, + "loss": 1.7284, + "step": 37362 + }, + { + "epoch": 0.45, + "grad_norm": 22.783644699634266, + "learning_rate": 1.8077767740175338e-05, + "loss": 1.4466, + "step": 37365 + }, + { + "epoch": 0.45, + "grad_norm": 60.20661227042043, + "learning_rate": 1.8077423360160934e-05, + "loss": 1.4053, + "step": 37368 + }, + { + "epoch": 0.45, + "grad_norm": 40.134558315000206, + "learning_rate": 1.8077078952581194e-05, + "loss": 1.0682, + "step": 37371 + }, + { + "epoch": 0.45, + "grad_norm": 9.869990657240873, + "learning_rate": 1.807673451743729e-05, + "loss": 1.3399, + "step": 37374 + }, + { + "epoch": 0.45, + "grad_norm": 9.668504649757578, + "learning_rate": 1.8076390054730405e-05, + "loss": 1.2392, + "step": 37377 + }, + { + "epoch": 0.45, + "grad_norm": 42.81146929425019, + "learning_rate": 1.8076045564461707e-05, + "loss": 1.4893, + "step": 37380 + }, + { + "epoch": 0.45, + "grad_norm": 17.884926595226954, + "learning_rate": 1.807570104663237e-05, + "loss": 1.6561, + "step": 37383 + }, + { + "epoch": 0.45, + "grad_norm": 23.80953873023073, + "learning_rate": 1.807535650124358e-05, + "loss": 1.6411, + "step": 37386 + }, + { + "epoch": 0.45, + "grad_norm": 54.09498175338655, + "learning_rate": 1.8075011928296508e-05, + "loss": 1.5903, + "step": 37389 + }, + { + "epoch": 0.45, + "grad_norm": 6.962496682763528, + "learning_rate": 1.8074667327792323e-05, + "loss": 1.5244, + "step": 37392 + }, + { + "epoch": 0.45, + "grad_norm": 22.852823160838746, + "learning_rate": 1.807432269973221e-05, + "loss": 1.3662, + "step": 37395 + }, + { + "epoch": 0.45, + "grad_norm": 16.493610912058603, + "learning_rate": 1.807397804411734e-05, + "loss": 1.7786, + "step": 37398 + }, + { + "epoch": 0.45, + "grad_norm": 11.289387264908754, + "learning_rate": 1.8073633360948894e-05, + "loss": 1.3269, + "step": 37401 + }, + { + "epoch": 0.45, + "grad_norm": 6.0181795534854094, + "learning_rate": 1.8073288650228046e-05, + "loss": 1.8035, + "step": 37404 + }, + { + "epoch": 0.45, + "grad_norm": 6.958499629393236, + "learning_rate": 1.807294391195597e-05, + "loss": 1.3534, + "step": 37407 + }, + { + "epoch": 0.45, + "grad_norm": 4.726885032263315, + "learning_rate": 1.8072599146133845e-05, + "loss": 1.8604, + "step": 37410 + }, + { + "epoch": 0.45, + "grad_norm": 10.109399040469809, + "learning_rate": 1.8072254352762847e-05, + "loss": 1.5426, + "step": 37413 + }, + { + "epoch": 0.45, + "grad_norm": 22.275270240254304, + "learning_rate": 1.8071909531844154e-05, + "loss": 1.7448, + "step": 37416 + }, + { + "epoch": 0.45, + "grad_norm": 46.33417112718348, + "learning_rate": 1.8071564683378938e-05, + "loss": 1.5063, + "step": 37419 + }, + { + "epoch": 0.45, + "grad_norm": 21.57540157821862, + "learning_rate": 1.8071219807368383e-05, + "loss": 1.7286, + "step": 37422 + }, + { + "epoch": 0.45, + "grad_norm": 15.424190530709764, + "learning_rate": 1.8070874903813654e-05, + "loss": 1.1735, + "step": 37425 + }, + { + "epoch": 0.45, + "grad_norm": 4.166228184610368, + "learning_rate": 1.8070529972715945e-05, + "loss": 1.6364, + "step": 37428 + }, + { + "epoch": 0.45, + "grad_norm": 9.305638093567552, + "learning_rate": 1.807018501407642e-05, + "loss": 1.4178, + "step": 37431 + }, + { + "epoch": 0.45, + "grad_norm": 99.81641872219576, + "learning_rate": 1.806984002789626e-05, + "loss": 1.4181, + "step": 37434 + }, + { + "epoch": 0.45, + "grad_norm": 30.882962296300278, + "learning_rate": 1.8069495014176645e-05, + "loss": 1.1804, + "step": 37437 + }, + { + "epoch": 0.45, + "grad_norm": 37.16159953231375, + "learning_rate": 1.806914997291875e-05, + "loss": 1.5774, + "step": 37440 + }, + { + "epoch": 0.45, + "grad_norm": 20.648454676920295, + "learning_rate": 1.8068804904123754e-05, + "loss": 1.5111, + "step": 37443 + }, + { + "epoch": 0.45, + "grad_norm": 18.156386593451533, + "learning_rate": 1.8068459807792827e-05, + "loss": 1.4108, + "step": 37446 + }, + { + "epoch": 0.45, + "grad_norm": 15.098247923191359, + "learning_rate": 1.8068114683927157e-05, + "loss": 1.395, + "step": 37449 + }, + { + "epoch": 0.45, + "grad_norm": 4.295047873958264, + "learning_rate": 1.806776953252792e-05, + "loss": 1.6259, + "step": 37452 + }, + { + "epoch": 0.45, + "grad_norm": 24.74036658621204, + "learning_rate": 1.8067424353596287e-05, + "loss": 1.7876, + "step": 37455 + }, + { + "epoch": 0.45, + "grad_norm": 4.323251185872886, + "learning_rate": 1.8067079147133442e-05, + "loss": 1.2341, + "step": 37458 + }, + { + "epoch": 0.45, + "grad_norm": 5.523332184605988, + "learning_rate": 1.8066733913140563e-05, + "loss": 1.2938, + "step": 37461 + }, + { + "epoch": 0.45, + "grad_norm": 3.9102541081196764, + "learning_rate": 1.8066388651618825e-05, + "loss": 1.4575, + "step": 37464 + }, + { + "epoch": 0.45, + "grad_norm": 7.809918349786407, + "learning_rate": 1.806604336256941e-05, + "loss": 1.5577, + "step": 37467 + }, + { + "epoch": 0.45, + "grad_norm": 36.08131067246767, + "learning_rate": 1.8065698045993496e-05, + "loss": 1.5425, + "step": 37470 + }, + { + "epoch": 0.45, + "grad_norm": 13.197116128095, + "learning_rate": 1.8065352701892256e-05, + "loss": 1.9224, + "step": 37473 + }, + { + "epoch": 0.45, + "grad_norm": 23.13378553888406, + "learning_rate": 1.8065007330266874e-05, + "loss": 1.3204, + "step": 37476 + }, + { + "epoch": 0.45, + "grad_norm": 16.521088489206925, + "learning_rate": 1.8064661931118528e-05, + "loss": 1.4071, + "step": 37479 + }, + { + "epoch": 0.45, + "grad_norm": 16.928525207369077, + "learning_rate": 1.8064316504448394e-05, + "loss": 1.7114, + "step": 37482 + }, + { + "epoch": 0.45, + "grad_norm": 14.831133722854922, + "learning_rate": 1.8063971050257652e-05, + "loss": 1.5774, + "step": 37485 + }, + { + "epoch": 0.45, + "grad_norm": 6.0831050332604, + "learning_rate": 1.8063625568547484e-05, + "loss": 1.7951, + "step": 37488 + }, + { + "epoch": 0.45, + "grad_norm": 8.217273596589145, + "learning_rate": 1.8063280059319065e-05, + "loss": 1.5127, + "step": 37491 + }, + { + "epoch": 0.45, + "grad_norm": 6.252590873783194, + "learning_rate": 1.806293452257357e-05, + "loss": 1.6437, + "step": 37494 + }, + { + "epoch": 0.45, + "grad_norm": 5.174160724789818, + "learning_rate": 1.806258895831219e-05, + "loss": 1.3284, + "step": 37497 + }, + { + "epoch": 0.45, + "grad_norm": 16.646676095371305, + "learning_rate": 1.80622433665361e-05, + "loss": 1.5405, + "step": 37500 + }, + { + "epoch": 0.45, + "grad_norm": 8.195114409940313, + "learning_rate": 1.806189774724647e-05, + "loss": 1.4599, + "step": 37503 + }, + { + "epoch": 0.45, + "grad_norm": 10.782433740268852, + "learning_rate": 1.8061552100444496e-05, + "loss": 2.2722, + "step": 37506 + }, + { + "epoch": 0.45, + "grad_norm": 12.362638940962025, + "learning_rate": 1.8061206426131345e-05, + "loss": 1.3433, + "step": 37509 + }, + { + "epoch": 0.45, + "grad_norm": 2.9368298345132637, + "learning_rate": 1.80608607243082e-05, + "loss": 1.209, + "step": 37512 + }, + { + "epoch": 0.45, + "grad_norm": 38.449108220590205, + "learning_rate": 1.8060514994976236e-05, + "loss": 1.7766, + "step": 37515 + }, + { + "epoch": 0.45, + "grad_norm": 7.395120297024711, + "learning_rate": 1.806016923813664e-05, + "loss": 1.7425, + "step": 37518 + }, + { + "epoch": 0.45, + "grad_norm": 13.8345960417863, + "learning_rate": 1.8059823453790592e-05, + "loss": 1.5197, + "step": 37521 + }, + { + "epoch": 0.45, + "grad_norm": 10.282166814298419, + "learning_rate": 1.805947764193927e-05, + "loss": 1.632, + "step": 37524 + }, + { + "epoch": 0.45, + "grad_norm": 16.56912311522467, + "learning_rate": 1.8059131802583853e-05, + "loss": 1.5028, + "step": 37527 + }, + { + "epoch": 0.45, + "grad_norm": 3.393118119394237, + "learning_rate": 1.805878593572552e-05, + "loss": 1.583, + "step": 37530 + }, + { + "epoch": 0.45, + "grad_norm": 37.983210284666, + "learning_rate": 1.8058440041365456e-05, + "loss": 1.7544, + "step": 37533 + }, + { + "epoch": 0.45, + "grad_norm": 4.981538045720348, + "learning_rate": 1.8058094119504835e-05, + "loss": 1.4684, + "step": 37536 + }, + { + "epoch": 0.45, + "grad_norm": 12.774575973448542, + "learning_rate": 1.8057748170144846e-05, + "loss": 1.4302, + "step": 37539 + }, + { + "epoch": 0.45, + "grad_norm": 6.112600639091142, + "learning_rate": 1.8057402193286664e-05, + "loss": 1.4347, + "step": 37542 + }, + { + "epoch": 0.45, + "grad_norm": 2.115339806760428, + "learning_rate": 1.8057056188931468e-05, + "loss": 1.447, + "step": 37545 + }, + { + "epoch": 0.45, + "grad_norm": 22.180868830355735, + "learning_rate": 1.8056710157080444e-05, + "loss": 1.1992, + "step": 37548 + }, + { + "epoch": 0.45, + "grad_norm": 28.31867767843285, + "learning_rate": 1.8056364097734768e-05, + "loss": 1.3361, + "step": 37551 + }, + { + "epoch": 0.45, + "grad_norm": 15.87791105601284, + "learning_rate": 1.8056018010895626e-05, + "loss": 1.5409, + "step": 37554 + }, + { + "epoch": 0.45, + "grad_norm": 45.604285092010414, + "learning_rate": 1.8055671896564196e-05, + "loss": 1.5435, + "step": 37557 + }, + { + "epoch": 0.45, + "grad_norm": 20.774933866025723, + "learning_rate": 1.805532575474166e-05, + "loss": 1.3398, + "step": 37560 + }, + { + "epoch": 0.45, + "grad_norm": 26.884705337786126, + "learning_rate": 1.805497958542919e-05, + "loss": 1.5432, + "step": 37563 + }, + { + "epoch": 0.45, + "grad_norm": 27.546019087239113, + "learning_rate": 1.8054633388627985e-05, + "loss": 1.2982, + "step": 37566 + }, + { + "epoch": 0.45, + "grad_norm": 10.594264013484368, + "learning_rate": 1.8054287164339217e-05, + "loss": 1.31, + "step": 37569 + }, + { + "epoch": 0.45, + "grad_norm": 8.939706083719486, + "learning_rate": 1.8053940912564067e-05, + "loss": 1.3601, + "step": 37572 + }, + { + "epoch": 0.45, + "grad_norm": 5.771298287508694, + "learning_rate": 1.805359463330372e-05, + "loss": 1.341, + "step": 37575 + }, + { + "epoch": 0.45, + "grad_norm": 25.998892522261006, + "learning_rate": 1.8053248326559346e-05, + "loss": 1.3007, + "step": 37578 + }, + { + "epoch": 0.45, + "grad_norm": 15.767758911397683, + "learning_rate": 1.8052901992332143e-05, + "loss": 1.2609, + "step": 37581 + }, + { + "epoch": 0.45, + "grad_norm": 8.655411725252828, + "learning_rate": 1.8052555630623287e-05, + "loss": 1.303, + "step": 37584 + }, + { + "epoch": 0.45, + "grad_norm": 26.4690661206218, + "learning_rate": 1.805220924143396e-05, + "loss": 1.4399, + "step": 37587 + }, + { + "epoch": 0.45, + "grad_norm": 30.143180526488702, + "learning_rate": 1.805186282476534e-05, + "loss": 1.3894, + "step": 37590 + }, + { + "epoch": 0.45, + "grad_norm": 19.678623109859984, + "learning_rate": 1.8051516380618614e-05, + "loss": 1.3387, + "step": 37593 + }, + { + "epoch": 0.45, + "grad_norm": 5.7427696192324715, + "learning_rate": 1.8051169908994962e-05, + "loss": 1.5964, + "step": 37596 + }, + { + "epoch": 0.45, + "grad_norm": 29.043760339968333, + "learning_rate": 1.8050823409895567e-05, + "loss": 1.2872, + "step": 37599 + }, + { + "epoch": 0.45, + "grad_norm": 24.084860875931348, + "learning_rate": 1.805047688332161e-05, + "loss": 1.4163, + "step": 37602 + }, + { + "epoch": 0.45, + "grad_norm": 22.20601443557753, + "learning_rate": 1.805013032927428e-05, + "loss": 1.3933, + "step": 37605 + }, + { + "epoch": 0.45, + "grad_norm": 14.13487507053166, + "learning_rate": 1.8049783747754755e-05, + "loss": 1.5371, + "step": 37608 + }, + { + "epoch": 0.45, + "grad_norm": 3.521309218358099, + "learning_rate": 1.8049437138764212e-05, + "loss": 1.4251, + "step": 37611 + }, + { + "epoch": 0.45, + "grad_norm": 12.881578331669399, + "learning_rate": 1.8049090502303842e-05, + "loss": 1.2934, + "step": 37614 + }, + { + "epoch": 0.45, + "grad_norm": 15.126254372025334, + "learning_rate": 1.8048743838374827e-05, + "loss": 0.9314, + "step": 37617 + }, + { + "epoch": 0.45, + "grad_norm": 9.578031766985356, + "learning_rate": 1.8048397146978346e-05, + "loss": 1.4287, + "step": 37620 + }, + { + "epoch": 0.45, + "grad_norm": 9.764322698043493, + "learning_rate": 1.8048050428115585e-05, + "loss": 1.2613, + "step": 37623 + }, + { + "epoch": 0.45, + "grad_norm": 9.459520052594096, + "learning_rate": 1.804770368178773e-05, + "loss": 1.0937, + "step": 37626 + }, + { + "epoch": 0.45, + "grad_norm": 17.667480550461043, + "learning_rate": 1.8047356907995957e-05, + "loss": 1.6312, + "step": 37629 + }, + { + "epoch": 0.45, + "grad_norm": 14.392882666167715, + "learning_rate": 1.8047010106741455e-05, + "loss": 1.7141, + "step": 37632 + }, + { + "epoch": 0.45, + "grad_norm": 14.841079440572624, + "learning_rate": 1.8046663278025405e-05, + "loss": 1.4904, + "step": 37635 + }, + { + "epoch": 0.45, + "grad_norm": 19.543632789455504, + "learning_rate": 1.8046316421848992e-05, + "loss": 1.502, + "step": 37638 + }, + { + "epoch": 0.45, + "grad_norm": 36.04015134201493, + "learning_rate": 1.8045969538213403e-05, + "loss": 1.4215, + "step": 37641 + }, + { + "epoch": 0.45, + "grad_norm": 9.043345941226777, + "learning_rate": 1.8045622627119815e-05, + "loss": 1.5142, + "step": 37644 + }, + { + "epoch": 0.45, + "grad_norm": 11.550020223533673, + "learning_rate": 1.8045275688569413e-05, + "loss": 1.3849, + "step": 37647 + }, + { + "epoch": 0.45, + "grad_norm": 15.957802380516027, + "learning_rate": 1.8044928722563387e-05, + "loss": 1.6538, + "step": 37650 + }, + { + "epoch": 0.45, + "grad_norm": 15.016241750201234, + "learning_rate": 1.8044581729102916e-05, + "loss": 1.8792, + "step": 37653 + }, + { + "epoch": 0.45, + "grad_norm": 27.630216617848557, + "learning_rate": 1.8044234708189182e-05, + "loss": 1.6176, + "step": 37656 + }, + { + "epoch": 0.45, + "grad_norm": 3.4686063616040244, + "learning_rate": 1.8043887659823375e-05, + "loss": 1.5566, + "step": 37659 + }, + { + "epoch": 0.45, + "grad_norm": 10.346552532949048, + "learning_rate": 1.8043540584006676e-05, + "loss": 1.6951, + "step": 37662 + }, + { + "epoch": 0.45, + "grad_norm": 16.994642364382155, + "learning_rate": 1.8043193480740272e-05, + "loss": 1.3846, + "step": 37665 + }, + { + "epoch": 0.45, + "grad_norm": 23.59582305372572, + "learning_rate": 1.8042846350025342e-05, + "loss": 1.6925, + "step": 37668 + }, + { + "epoch": 0.45, + "grad_norm": 6.136935894494791, + "learning_rate": 1.804249919186308e-05, + "loss": 1.2599, + "step": 37671 + }, + { + "epoch": 0.45, + "grad_norm": 15.549201802214599, + "learning_rate": 1.804215200625466e-05, + "loss": 1.6641, + "step": 37674 + }, + { + "epoch": 0.45, + "grad_norm": 5.499200439485129, + "learning_rate": 1.8041804793201274e-05, + "loss": 1.3896, + "step": 37677 + }, + { + "epoch": 0.45, + "grad_norm": 6.82381451184892, + "learning_rate": 1.8041457552704105e-05, + "loss": 1.3932, + "step": 37680 + }, + { + "epoch": 0.45, + "grad_norm": 13.182117683206569, + "learning_rate": 1.8041110284764342e-05, + "loss": 1.4856, + "step": 37683 + }, + { + "epoch": 0.45, + "grad_norm": 272.85963441439304, + "learning_rate": 1.8040762989383157e-05, + "loss": 1.3497, + "step": 37686 + }, + { + "epoch": 0.45, + "grad_norm": 18.464487456281912, + "learning_rate": 1.804041566656175e-05, + "loss": 1.5652, + "step": 37689 + }, + { + "epoch": 0.45, + "grad_norm": 78.88749712385109, + "learning_rate": 1.80400683163013e-05, + "loss": 1.7028, + "step": 37692 + }, + { + "epoch": 0.45, + "grad_norm": 20.64262533646133, + "learning_rate": 1.803972093860299e-05, + "loss": 1.315, + "step": 37695 + }, + { + "epoch": 0.45, + "grad_norm": 8.291954852615985, + "learning_rate": 1.8039373533468013e-05, + "loss": 1.2374, + "step": 37698 + }, + { + "epoch": 0.45, + "grad_norm": 63.68196212448009, + "learning_rate": 1.8039026100897547e-05, + "loss": 1.7184, + "step": 37701 + }, + { + "epoch": 0.45, + "grad_norm": 5.556473203489636, + "learning_rate": 1.8038678640892782e-05, + "loss": 1.4948, + "step": 37704 + }, + { + "epoch": 0.45, + "grad_norm": 33.22400625487433, + "learning_rate": 1.8038331153454904e-05, + "loss": 1.3206, + "step": 37707 + }, + { + "epoch": 0.45, + "grad_norm": 10.533217635238136, + "learning_rate": 1.8037983638585094e-05, + "loss": 1.6197, + "step": 37710 + }, + { + "epoch": 0.45, + "grad_norm": 16.76020013669144, + "learning_rate": 1.8037636096284542e-05, + "loss": 1.565, + "step": 37713 + }, + { + "epoch": 0.45, + "grad_norm": 8.441507597749151, + "learning_rate": 1.803728852655443e-05, + "loss": 1.236, + "step": 37716 + }, + { + "epoch": 0.45, + "grad_norm": 16.15954502127961, + "learning_rate": 1.803694092939595e-05, + "loss": 1.3632, + "step": 37719 + }, + { + "epoch": 0.45, + "grad_norm": 8.757914433485295, + "learning_rate": 1.803659330481029e-05, + "loss": 1.2596, + "step": 37722 + }, + { + "epoch": 0.45, + "grad_norm": 27.04710554399656, + "learning_rate": 1.8036245652798624e-05, + "loss": 1.3994, + "step": 37725 + }, + { + "epoch": 0.45, + "grad_norm": 21.261914052639032, + "learning_rate": 1.803589797336215e-05, + "loss": 1.4406, + "step": 37728 + }, + { + "epoch": 0.45, + "grad_norm": 11.261808760756, + "learning_rate": 1.803555026650205e-05, + "loss": 1.3184, + "step": 37731 + }, + { + "epoch": 0.45, + "grad_norm": 10.424365186446876, + "learning_rate": 1.8035202532219514e-05, + "loss": 1.7875, + "step": 37734 + }, + { + "epoch": 0.45, + "grad_norm": 10.410239530767903, + "learning_rate": 1.8034854770515724e-05, + "loss": 1.6251, + "step": 37737 + }, + { + "epoch": 0.45, + "grad_norm": 24.92773752157753, + "learning_rate": 1.803450698139187e-05, + "loss": 0.9742, + "step": 37740 + }, + { + "epoch": 0.45, + "grad_norm": 29.56690959165915, + "learning_rate": 1.8034159164849134e-05, + "loss": 1.4418, + "step": 37743 + }, + { + "epoch": 0.45, + "grad_norm": 79.7953004975033, + "learning_rate": 1.803381132088871e-05, + "loss": 1.8664, + "step": 37746 + }, + { + "epoch": 0.45, + "grad_norm": 20.649807156738117, + "learning_rate": 1.8033463449511777e-05, + "loss": 1.2415, + "step": 37749 + }, + { + "epoch": 0.45, + "grad_norm": 5.6994257333773595, + "learning_rate": 1.8033115550719534e-05, + "loss": 1.3965, + "step": 37752 + }, + { + "epoch": 0.45, + "grad_norm": 37.99577463026523, + "learning_rate": 1.8032767624513158e-05, + "loss": 1.5138, + "step": 37755 + }, + { + "epoch": 0.45, + "grad_norm": 7.240361266303174, + "learning_rate": 1.803241967089384e-05, + "loss": 1.3981, + "step": 37758 + }, + { + "epoch": 0.45, + "grad_norm": 7.063551110445661, + "learning_rate": 1.8032071689862766e-05, + "loss": 1.5064, + "step": 37761 + }, + { + "epoch": 0.45, + "grad_norm": 12.24830498599078, + "learning_rate": 1.8031723681421125e-05, + "loss": 1.5741, + "step": 37764 + }, + { + "epoch": 0.45, + "grad_norm": 6.885344196656432, + "learning_rate": 1.80313756455701e-05, + "loss": 1.3171, + "step": 37767 + }, + { + "epoch": 0.45, + "grad_norm": 11.509518494751996, + "learning_rate": 1.803102758231089e-05, + "loss": 1.6475, + "step": 37770 + }, + { + "epoch": 0.45, + "grad_norm": 18.653800437128183, + "learning_rate": 1.803067949164467e-05, + "loss": 1.3211, + "step": 37773 + }, + { + "epoch": 0.45, + "grad_norm": 19.49294912753941, + "learning_rate": 1.8030331373572637e-05, + "loss": 1.5216, + "step": 37776 + }, + { + "epoch": 0.45, + "grad_norm": 12.196565937834404, + "learning_rate": 1.8029983228095976e-05, + "loss": 1.3662, + "step": 37779 + }, + { + "epoch": 0.45, + "grad_norm": 20.792765848948335, + "learning_rate": 1.8029635055215872e-05, + "loss": 1.5767, + "step": 37782 + }, + { + "epoch": 0.45, + "grad_norm": 13.809274959618245, + "learning_rate": 1.8029286854933517e-05, + "loss": 1.4947, + "step": 37785 + }, + { + "epoch": 0.45, + "grad_norm": 10.861257229192889, + "learning_rate": 1.8028938627250097e-05, + "loss": 1.6769, + "step": 37788 + }, + { + "epoch": 0.45, + "grad_norm": 21.759012300969527, + "learning_rate": 1.8028590372166804e-05, + "loss": 1.5771, + "step": 37791 + }, + { + "epoch": 0.45, + "grad_norm": 37.06140242661714, + "learning_rate": 1.802824208968482e-05, + "loss": 1.1661, + "step": 37794 + }, + { + "epoch": 0.45, + "grad_norm": 11.924172639606747, + "learning_rate": 1.802789377980534e-05, + "loss": 1.3985, + "step": 37797 + }, + { + "epoch": 0.45, + "grad_norm": 24.128585162089816, + "learning_rate": 1.802754544252955e-05, + "loss": 1.3932, + "step": 37800 + }, + { + "epoch": 0.45, + "grad_norm": 12.686204840011863, + "learning_rate": 1.802719707785864e-05, + "loss": 1.4481, + "step": 37803 + }, + { + "epoch": 0.45, + "grad_norm": 16.543385504089944, + "learning_rate": 1.8026848685793795e-05, + "loss": 1.2937, + "step": 37806 + }, + { + "epoch": 0.45, + "grad_norm": 9.966544540008194, + "learning_rate": 1.802650026633621e-05, + "loss": 1.5967, + "step": 37809 + }, + { + "epoch": 0.45, + "grad_norm": 22.83844183772684, + "learning_rate": 1.8026151819487066e-05, + "loss": 1.2862, + "step": 37812 + }, + { + "epoch": 0.45, + "grad_norm": 45.86871312634984, + "learning_rate": 1.802580334524756e-05, + "loss": 1.2872, + "step": 37815 + }, + { + "epoch": 0.45, + "grad_norm": 21.597446807606897, + "learning_rate": 1.802545484361888e-05, + "loss": 1.1908, + "step": 37818 + }, + { + "epoch": 0.45, + "grad_norm": 26.85429162697069, + "learning_rate": 1.802510631460221e-05, + "loss": 1.3397, + "step": 37821 + }, + { + "epoch": 0.45, + "grad_norm": 19.300716431065172, + "learning_rate": 1.8024757758198747e-05, + "loss": 1.2735, + "step": 37824 + }, + { + "epoch": 0.45, + "grad_norm": 17.451263987320242, + "learning_rate": 1.8024409174409672e-05, + "loss": 1.8346, + "step": 37827 + }, + { + "epoch": 0.45, + "grad_norm": 12.276675471839019, + "learning_rate": 1.8024060563236177e-05, + "loss": 1.3773, + "step": 37830 + }, + { + "epoch": 0.45, + "grad_norm": 55.30126426102747, + "learning_rate": 1.802371192467946e-05, + "loss": 1.4166, + "step": 37833 + }, + { + "epoch": 0.45, + "grad_norm": 23.08736333221448, + "learning_rate": 1.80233632587407e-05, + "loss": 1.186, + "step": 37836 + }, + { + "epoch": 0.46, + "grad_norm": 26.69819431719962, + "learning_rate": 1.8023014565421094e-05, + "loss": 1.4361, + "step": 37839 + }, + { + "epoch": 0.46, + "grad_norm": 8.080872754049048, + "learning_rate": 1.802266584472183e-05, + "loss": 2.0017, + "step": 37842 + }, + { + "epoch": 0.46, + "grad_norm": 18.047956286841686, + "learning_rate": 1.802231709664409e-05, + "loss": 1.3337, + "step": 37845 + }, + { + "epoch": 0.46, + "grad_norm": 7.72481505851353, + "learning_rate": 1.8021968321189077e-05, + "loss": 1.7821, + "step": 37848 + }, + { + "epoch": 0.46, + "grad_norm": 201.2348606916759, + "learning_rate": 1.8021619518357972e-05, + "loss": 1.168, + "step": 37851 + }, + { + "epoch": 0.46, + "grad_norm": 18.548199130463647, + "learning_rate": 1.8021270688151973e-05, + "loss": 1.2462, + "step": 37854 + }, + { + "epoch": 0.46, + "grad_norm": 8.092466289048682, + "learning_rate": 1.8020921830572264e-05, + "loss": 1.5656, + "step": 37857 + }, + { + "epoch": 0.46, + "grad_norm": 8.484929395361902, + "learning_rate": 1.8020572945620042e-05, + "loss": 1.5558, + "step": 37860 + }, + { + "epoch": 0.46, + "grad_norm": 9.741853965124205, + "learning_rate": 1.802022403329649e-05, + "loss": 1.1888, + "step": 37863 + }, + { + "epoch": 0.46, + "grad_norm": 2.866305238789227, + "learning_rate": 1.8019875093602804e-05, + "loss": 1.4424, + "step": 37866 + }, + { + "epoch": 0.46, + "grad_norm": 59.10931308187986, + "learning_rate": 1.801952612654017e-05, + "loss": 1.2044, + "step": 37869 + }, + { + "epoch": 0.46, + "grad_norm": 23.931249039331544, + "learning_rate": 1.8019177132109783e-05, + "loss": 2.0563, + "step": 37872 + }, + { + "epoch": 0.46, + "grad_norm": 52.047951110358795, + "learning_rate": 1.8018828110312835e-05, + "loss": 1.5316, + "step": 37875 + }, + { + "epoch": 0.46, + "grad_norm": 11.157981073152863, + "learning_rate": 1.801847906115051e-05, + "loss": 1.6394, + "step": 37878 + }, + { + "epoch": 0.46, + "grad_norm": 12.831690535156232, + "learning_rate": 1.8018129984624012e-05, + "loss": 1.3396, + "step": 37881 + }, + { + "epoch": 0.46, + "grad_norm": 5.988836447894234, + "learning_rate": 1.801778088073452e-05, + "loss": 1.3197, + "step": 37884 + }, + { + "epoch": 0.46, + "grad_norm": 3.7668334705360342, + "learning_rate": 1.801743174948323e-05, + "loss": 1.6895, + "step": 37887 + }, + { + "epoch": 0.46, + "grad_norm": 2.77168441788193, + "learning_rate": 1.8017082590871334e-05, + "loss": 0.9928, + "step": 37890 + }, + { + "epoch": 0.46, + "grad_norm": 25.1353774277279, + "learning_rate": 1.801673340490002e-05, + "loss": 1.3971, + "step": 37893 + }, + { + "epoch": 0.46, + "grad_norm": 30.15533844517016, + "learning_rate": 1.8016384191570486e-05, + "loss": 1.6794, + "step": 37896 + }, + { + "epoch": 0.46, + "grad_norm": 22.56122813070464, + "learning_rate": 1.8016034950883918e-05, + "loss": 1.4997, + "step": 37899 + }, + { + "epoch": 0.46, + "grad_norm": 27.410747222625925, + "learning_rate": 1.8015685682841513e-05, + "loss": 1.7606, + "step": 37902 + }, + { + "epoch": 0.46, + "grad_norm": 12.859579712453348, + "learning_rate": 1.8015336387444456e-05, + "loss": 1.4797, + "step": 37905 + }, + { + "epoch": 0.46, + "grad_norm": 5.021193503198746, + "learning_rate": 1.8014987064693948e-05, + "loss": 1.497, + "step": 37908 + }, + { + "epoch": 0.46, + "grad_norm": 17.622957720341937, + "learning_rate": 1.8014637714591174e-05, + "loss": 1.636, + "step": 37911 + }, + { + "epoch": 0.46, + "grad_norm": 17.469328027071164, + "learning_rate": 1.8014288337137326e-05, + "loss": 1.6164, + "step": 37914 + }, + { + "epoch": 0.46, + "grad_norm": 5.426175268752438, + "learning_rate": 1.80139389323336e-05, + "loss": 1.094, + "step": 37917 + }, + { + "epoch": 0.46, + "grad_norm": 32.55496951267415, + "learning_rate": 1.8013589500181188e-05, + "loss": 1.143, + "step": 37920 + }, + { + "epoch": 0.46, + "grad_norm": 6.068719087229419, + "learning_rate": 1.801324004068128e-05, + "loss": 1.3993, + "step": 37923 + }, + { + "epoch": 0.46, + "grad_norm": 37.850471421843814, + "learning_rate": 1.8012890553835072e-05, + "loss": 1.2249, + "step": 37926 + }, + { + "epoch": 0.46, + "grad_norm": 22.96956611878422, + "learning_rate": 1.801254103964375e-05, + "loss": 1.3247, + "step": 37929 + }, + { + "epoch": 0.46, + "grad_norm": 56.07362443536102, + "learning_rate": 1.8012191498108518e-05, + "loss": 1.399, + "step": 37932 + }, + { + "epoch": 0.46, + "grad_norm": 15.658492959407324, + "learning_rate": 1.801184192923056e-05, + "loss": 1.4064, + "step": 37935 + }, + { + "epoch": 0.46, + "grad_norm": 19.649357825916447, + "learning_rate": 1.801149233301107e-05, + "loss": 1.6286, + "step": 37938 + }, + { + "epoch": 0.46, + "grad_norm": 18.370849945368846, + "learning_rate": 1.8011142709451245e-05, + "loss": 1.1727, + "step": 37941 + }, + { + "epoch": 0.46, + "grad_norm": 18.655644604335077, + "learning_rate": 1.8010793058552275e-05, + "loss": 1.6045, + "step": 37944 + }, + { + "epoch": 0.46, + "grad_norm": 29.806219843847032, + "learning_rate": 1.801044338031535e-05, + "loss": 1.7039, + "step": 37947 + }, + { + "epoch": 0.46, + "grad_norm": 10.955026854319778, + "learning_rate": 1.801009367474167e-05, + "loss": 1.7598, + "step": 37950 + }, + { + "epoch": 0.46, + "grad_norm": 4.464076707669617, + "learning_rate": 1.8009743941832426e-05, + "loss": 1.3285, + "step": 37953 + }, + { + "epoch": 0.46, + "grad_norm": 10.118969987992456, + "learning_rate": 1.800939418158881e-05, + "loss": 1.4976, + "step": 37956 + }, + { + "epoch": 0.46, + "grad_norm": 77.53219606523284, + "learning_rate": 1.800904439401202e-05, + "loss": 1.2271, + "step": 37959 + }, + { + "epoch": 0.46, + "grad_norm": 17.588682509887647, + "learning_rate": 1.8008694579103242e-05, + "loss": 1.484, + "step": 37962 + }, + { + "epoch": 0.46, + "grad_norm": 10.54087431451636, + "learning_rate": 1.8008344736863673e-05, + "loss": 1.1514, + "step": 37965 + }, + { + "epoch": 0.46, + "grad_norm": 4.186041239502438, + "learning_rate": 1.8007994867294512e-05, + "loss": 1.6062, + "step": 37968 + }, + { + "epoch": 0.46, + "grad_norm": 13.315568761359806, + "learning_rate": 1.800764497039695e-05, + "loss": 1.6417, + "step": 37971 + }, + { + "epoch": 0.46, + "grad_norm": 31.716167385272776, + "learning_rate": 1.8007295046172175e-05, + "loss": 1.5235, + "step": 37974 + }, + { + "epoch": 0.46, + "grad_norm": 6.16710461321367, + "learning_rate": 1.8006945094621388e-05, + "loss": 1.3691, + "step": 37977 + }, + { + "epoch": 0.46, + "grad_norm": 107.0996111476019, + "learning_rate": 1.8006595115745783e-05, + "loss": 1.5903, + "step": 37980 + }, + { + "epoch": 0.46, + "grad_norm": 15.815993855844825, + "learning_rate": 1.800624510954655e-05, + "loss": 1.4511, + "step": 37983 + }, + { + "epoch": 0.46, + "grad_norm": 5.910113738369527, + "learning_rate": 1.800589507602489e-05, + "loss": 1.4825, + "step": 37986 + }, + { + "epoch": 0.46, + "grad_norm": 11.126687568268727, + "learning_rate": 1.800554501518199e-05, + "loss": 1.5884, + "step": 37989 + }, + { + "epoch": 0.46, + "grad_norm": 75.15748280060363, + "learning_rate": 1.8005194927019053e-05, + "loss": 1.5045, + "step": 37992 + }, + { + "epoch": 0.46, + "grad_norm": 13.183612870128144, + "learning_rate": 1.8004844811537267e-05, + "loss": 1.5937, + "step": 37995 + }, + { + "epoch": 0.46, + "grad_norm": 14.68873004827504, + "learning_rate": 1.8004494668737828e-05, + "loss": 1.4417, + "step": 37998 + }, + { + "epoch": 0.46, + "grad_norm": 17.20712163736486, + "learning_rate": 1.8004144498621933e-05, + "loss": 1.4438, + "step": 38001 + }, + { + "epoch": 0.46, + "grad_norm": 24.788432291686465, + "learning_rate": 1.8003794301190776e-05, + "loss": 1.5142, + "step": 38004 + }, + { + "epoch": 0.46, + "grad_norm": 9.336707370737269, + "learning_rate": 1.8003444076445552e-05, + "loss": 1.4722, + "step": 38007 + }, + { + "epoch": 0.46, + "grad_norm": 69.61679148838594, + "learning_rate": 1.8003093824387455e-05, + "loss": 1.7574, + "step": 38010 + }, + { + "epoch": 0.46, + "grad_norm": 4.786004626892669, + "learning_rate": 1.800274354501768e-05, + "loss": 1.3368, + "step": 38013 + }, + { + "epoch": 0.46, + "grad_norm": 7.691704471972467, + "learning_rate": 1.8002393238337426e-05, + "loss": 1.6206, + "step": 38016 + }, + { + "epoch": 0.46, + "grad_norm": 18.213645999217015, + "learning_rate": 1.800204290434789e-05, + "loss": 1.3393, + "step": 38019 + }, + { + "epoch": 0.46, + "grad_norm": 27.392981375452017, + "learning_rate": 1.8001692543050257e-05, + "loss": 1.5853, + "step": 38022 + }, + { + "epoch": 0.46, + "grad_norm": 13.215448960465524, + "learning_rate": 1.8001342154445733e-05, + "loss": 1.5307, + "step": 38025 + }, + { + "epoch": 0.46, + "grad_norm": 18.455602281333004, + "learning_rate": 1.8000991738535512e-05, + "loss": 1.4557, + "step": 38028 + }, + { + "epoch": 0.46, + "grad_norm": 21.174987970247624, + "learning_rate": 1.8000641295320785e-05, + "loss": 1.6675, + "step": 38031 + }, + { + "epoch": 0.46, + "grad_norm": 24.092009071651734, + "learning_rate": 1.800029082480275e-05, + "loss": 1.22, + "step": 38034 + }, + { + "epoch": 0.46, + "grad_norm": 8.62039252024399, + "learning_rate": 1.799994032698261e-05, + "loss": 1.5433, + "step": 38037 + }, + { + "epoch": 0.46, + "grad_norm": 15.108749825938538, + "learning_rate": 1.799958980186155e-05, + "loss": 1.6107, + "step": 38040 + }, + { + "epoch": 0.46, + "grad_norm": 26.27591303942284, + "learning_rate": 1.7999239249440773e-05, + "loss": 1.0771, + "step": 38043 + }, + { + "epoch": 0.46, + "grad_norm": 14.524078389945087, + "learning_rate": 1.7998888669721476e-05, + "loss": 1.6455, + "step": 38046 + }, + { + "epoch": 0.46, + "grad_norm": 11.47205433118292, + "learning_rate": 1.7998538062704847e-05, + "loss": 1.7086, + "step": 38049 + }, + { + "epoch": 0.46, + "grad_norm": 19.05812034094853, + "learning_rate": 1.7998187428392095e-05, + "loss": 1.669, + "step": 38052 + }, + { + "epoch": 0.46, + "grad_norm": 19.82559475320358, + "learning_rate": 1.7997836766784406e-05, + "loss": 1.4312, + "step": 38055 + }, + { + "epoch": 0.46, + "grad_norm": 25.970517940364317, + "learning_rate": 1.7997486077882986e-05, + "loss": 1.4685, + "step": 38058 + }, + { + "epoch": 0.46, + "grad_norm": 110.48130039773136, + "learning_rate": 1.799713536168902e-05, + "loss": 1.9849, + "step": 38061 + }, + { + "epoch": 0.46, + "grad_norm": 42.19593558871227, + "learning_rate": 1.7996784618203717e-05, + "loss": 1.4695, + "step": 38064 + }, + { + "epoch": 0.46, + "grad_norm": 103.68552687196186, + "learning_rate": 1.7996433847428267e-05, + "loss": 1.7944, + "step": 38067 + }, + { + "epoch": 0.46, + "grad_norm": 14.688101314058356, + "learning_rate": 1.799608304936387e-05, + "loss": 1.3534, + "step": 38070 + }, + { + "epoch": 0.46, + "grad_norm": 3.4103770748575424, + "learning_rate": 1.799573222401172e-05, + "loss": 1.6213, + "step": 38073 + }, + { + "epoch": 0.46, + "grad_norm": 46.65884766796017, + "learning_rate": 1.7995381371373012e-05, + "loss": 1.5115, + "step": 38076 + }, + { + "epoch": 0.46, + "grad_norm": 15.247657409351243, + "learning_rate": 1.7995030491448955e-05, + "loss": 1.8169, + "step": 38079 + }, + { + "epoch": 0.46, + "grad_norm": 5.9461490913074995, + "learning_rate": 1.7994679584240733e-05, + "loss": 1.2383, + "step": 38082 + }, + { + "epoch": 0.46, + "grad_norm": 21.670226416611996, + "learning_rate": 1.7994328649749553e-05, + "loss": 1.5367, + "step": 38085 + }, + { + "epoch": 0.46, + "grad_norm": 34.901278643125636, + "learning_rate": 1.7993977687976604e-05, + "loss": 1.4881, + "step": 38088 + }, + { + "epoch": 0.46, + "grad_norm": 12.220963277677985, + "learning_rate": 1.799362669892309e-05, + "loss": 1.3041, + "step": 38091 + }, + { + "epoch": 0.46, + "grad_norm": 27.611638092132733, + "learning_rate": 1.7993275682590212e-05, + "loss": 1.4967, + "step": 38094 + }, + { + "epoch": 0.46, + "grad_norm": 4.115789197508877, + "learning_rate": 1.7992924638979162e-05, + "loss": 1.4721, + "step": 38097 + }, + { + "epoch": 0.46, + "grad_norm": 7.298020216987511, + "learning_rate": 1.7992573568091134e-05, + "loss": 1.6549, + "step": 38100 + }, + { + "epoch": 0.46, + "grad_norm": 12.798953168943383, + "learning_rate": 1.7992222469927337e-05, + "loss": 1.1974, + "step": 38103 + }, + { + "epoch": 0.46, + "grad_norm": 20.737074057027932, + "learning_rate": 1.799187134448896e-05, + "loss": 1.4643, + "step": 38106 + }, + { + "epoch": 0.46, + "grad_norm": 43.23246764295069, + "learning_rate": 1.7991520191777207e-05, + "loss": 1.3171, + "step": 38109 + }, + { + "epoch": 0.46, + "grad_norm": 17.59967515554025, + "learning_rate": 1.7991169011793274e-05, + "loss": 1.5234, + "step": 38112 + }, + { + "epoch": 0.46, + "grad_norm": 16.90772027858528, + "learning_rate": 1.799081780453836e-05, + "loss": 1.2354, + "step": 38115 + }, + { + "epoch": 0.46, + "grad_norm": 32.74757008042175, + "learning_rate": 1.799046657001366e-05, + "loss": 1.5173, + "step": 38118 + }, + { + "epoch": 0.46, + "grad_norm": 45.70506534871755, + "learning_rate": 1.799011530822038e-05, + "loss": 1.5311, + "step": 38121 + }, + { + "epoch": 0.46, + "grad_norm": 19.084998165638712, + "learning_rate": 1.798976401915971e-05, + "loss": 1.6211, + "step": 38124 + }, + { + "epoch": 0.46, + "grad_norm": 36.64312912656836, + "learning_rate": 1.7989412702832853e-05, + "loss": 1.4638, + "step": 38127 + }, + { + "epoch": 0.46, + "grad_norm": 19.82069196704476, + "learning_rate": 1.7989061359241012e-05, + "loss": 1.6251, + "step": 38130 + }, + { + "epoch": 0.46, + "grad_norm": 12.148281654693816, + "learning_rate": 1.7988709988385383e-05, + "loss": 1.3607, + "step": 38133 + }, + { + "epoch": 0.46, + "grad_norm": 15.260064494931896, + "learning_rate": 1.798835859026716e-05, + "loss": 1.1172, + "step": 38136 + }, + { + "epoch": 0.46, + "grad_norm": 7.837181666654273, + "learning_rate": 1.7988007164887547e-05, + "loss": 1.7304, + "step": 38139 + }, + { + "epoch": 0.46, + "grad_norm": 20.47574585463657, + "learning_rate": 1.7987655712247746e-05, + "loss": 1.4741, + "step": 38142 + }, + { + "epoch": 0.46, + "grad_norm": 335.9898281134482, + "learning_rate": 1.798730423234895e-05, + "loss": 1.155, + "step": 38145 + }, + { + "epoch": 0.46, + "grad_norm": 21.80770512030473, + "learning_rate": 1.7986952725192365e-05, + "loss": 1.4377, + "step": 38148 + }, + { + "epoch": 0.46, + "grad_norm": 14.093799202139156, + "learning_rate": 1.7986601190779187e-05, + "loss": 1.1777, + "step": 38151 + }, + { + "epoch": 0.46, + "grad_norm": 42.783900114901385, + "learning_rate": 1.7986249629110616e-05, + "loss": 1.3871, + "step": 38154 + }, + { + "epoch": 0.46, + "grad_norm": 11.864627424845589, + "learning_rate": 1.798589804018785e-05, + "loss": 1.2161, + "step": 38157 + }, + { + "epoch": 0.46, + "grad_norm": 7.195114303251293, + "learning_rate": 1.7985546424012088e-05, + "loss": 1.3677, + "step": 38160 + }, + { + "epoch": 0.46, + "grad_norm": 48.62191587133062, + "learning_rate": 1.7985194780584536e-05, + "loss": 1.5316, + "step": 38163 + }, + { + "epoch": 0.46, + "grad_norm": 28.169269587267753, + "learning_rate": 1.798484310990639e-05, + "loss": 1.448, + "step": 38166 + }, + { + "epoch": 0.46, + "grad_norm": 18.55069924266094, + "learning_rate": 1.7984491411978847e-05, + "loss": 1.4166, + "step": 38169 + }, + { + "epoch": 0.46, + "grad_norm": 17.996408779387508, + "learning_rate": 1.7984139686803116e-05, + "loss": 1.21, + "step": 38172 + }, + { + "epoch": 0.46, + "grad_norm": 12.218769653551034, + "learning_rate": 1.798378793438039e-05, + "loss": 1.7061, + "step": 38175 + }, + { + "epoch": 0.46, + "grad_norm": 7.732217867007964, + "learning_rate": 1.7983436154711872e-05, + "loss": 1.5124, + "step": 38178 + }, + { + "epoch": 0.46, + "grad_norm": 10.1164803737185, + "learning_rate": 1.7983084347798762e-05, + "loss": 1.3612, + "step": 38181 + }, + { + "epoch": 0.46, + "grad_norm": 11.323685683693293, + "learning_rate": 1.798273251364226e-05, + "loss": 1.3425, + "step": 38184 + }, + { + "epoch": 0.46, + "grad_norm": 27.381449943240376, + "learning_rate": 1.7982380652243567e-05, + "loss": 1.2903, + "step": 38187 + }, + { + "epoch": 0.46, + "grad_norm": 9.884283470011582, + "learning_rate": 1.7982028763603886e-05, + "loss": 1.7469, + "step": 38190 + }, + { + "epoch": 0.46, + "grad_norm": 7.670177655666777, + "learning_rate": 1.7981676847724413e-05, + "loss": 1.1471, + "step": 38193 + }, + { + "epoch": 0.46, + "grad_norm": 26.353689283519344, + "learning_rate": 1.7981324904606355e-05, + "loss": 1.1386, + "step": 38196 + }, + { + "epoch": 0.46, + "grad_norm": 36.29493020694129, + "learning_rate": 1.7980972934250906e-05, + "loss": 1.3297, + "step": 38199 + }, + { + "epoch": 0.46, + "grad_norm": 22.221151109129778, + "learning_rate": 1.7980620936659275e-05, + "loss": 1.4612, + "step": 38202 + }, + { + "epoch": 0.46, + "grad_norm": 4.817576844434458, + "learning_rate": 1.7980268911832655e-05, + "loss": 1.6974, + "step": 38205 + }, + { + "epoch": 0.46, + "grad_norm": 13.45900004607084, + "learning_rate": 1.7979916859772256e-05, + "loss": 1.6292, + "step": 38208 + }, + { + "epoch": 0.46, + "grad_norm": 11.887057371741095, + "learning_rate": 1.7979564780479276e-05, + "loss": 1.59, + "step": 38211 + }, + { + "epoch": 0.46, + "grad_norm": 26.32248809844569, + "learning_rate": 1.7979212673954912e-05, + "loss": 1.4417, + "step": 38214 + }, + { + "epoch": 0.46, + "grad_norm": 44.15450948660922, + "learning_rate": 1.797886054020037e-05, + "loss": 1.7038, + "step": 38217 + }, + { + "epoch": 0.46, + "grad_norm": 41.550719821224725, + "learning_rate": 1.7978508379216853e-05, + "loss": 1.39, + "step": 38220 + }, + { + "epoch": 0.46, + "grad_norm": 20.199234862504664, + "learning_rate": 1.7978156191005557e-05, + "loss": 1.4255, + "step": 38223 + }, + { + "epoch": 0.46, + "grad_norm": 14.700713344589417, + "learning_rate": 1.797780397556769e-05, + "loss": 1.4434, + "step": 38226 + }, + { + "epoch": 0.46, + "grad_norm": 13.641147995286898, + "learning_rate": 1.797745173290445e-05, + "loss": 1.2988, + "step": 38229 + }, + { + "epoch": 0.46, + "grad_norm": 4.185455532078105, + "learning_rate": 1.7977099463017044e-05, + "loss": 1.1061, + "step": 38232 + }, + { + "epoch": 0.46, + "grad_norm": 15.489016478453449, + "learning_rate": 1.797674716590667e-05, + "loss": 1.2827, + "step": 38235 + }, + { + "epoch": 0.46, + "grad_norm": 13.131157326126232, + "learning_rate": 1.797639484157453e-05, + "loss": 1.4604, + "step": 38238 + }, + { + "epoch": 0.46, + "grad_norm": 9.691607312846253, + "learning_rate": 1.7976042490021828e-05, + "loss": 1.4363, + "step": 38241 + }, + { + "epoch": 0.46, + "grad_norm": 43.491207528799876, + "learning_rate": 1.7975690111249765e-05, + "loss": 1.3044, + "step": 38244 + }, + { + "epoch": 0.46, + "grad_norm": 10.081613928796372, + "learning_rate": 1.7975337705259546e-05, + "loss": 1.2864, + "step": 38247 + }, + { + "epoch": 0.46, + "grad_norm": 12.455978140958672, + "learning_rate": 1.797498527205237e-05, + "loss": 1.5465, + "step": 38250 + }, + { + "epoch": 0.46, + "grad_norm": 17.806324390933998, + "learning_rate": 1.7974632811629443e-05, + "loss": 1.4625, + "step": 38253 + }, + { + "epoch": 0.46, + "grad_norm": 15.95696145360325, + "learning_rate": 1.797428032399197e-05, + "loss": 1.5232, + "step": 38256 + }, + { + "epoch": 0.46, + "grad_norm": 23.7613123832455, + "learning_rate": 1.7973927809141147e-05, + "loss": 1.2857, + "step": 38259 + }, + { + "epoch": 0.46, + "grad_norm": 11.780098431440454, + "learning_rate": 1.7973575267078177e-05, + "loss": 1.1203, + "step": 38262 + }, + { + "epoch": 0.46, + "grad_norm": 11.295844378141691, + "learning_rate": 1.7973222697804275e-05, + "loss": 1.3892, + "step": 38265 + }, + { + "epoch": 0.46, + "grad_norm": 12.563229018101953, + "learning_rate": 1.7972870101320628e-05, + "loss": 1.2861, + "step": 38268 + }, + { + "epoch": 0.46, + "grad_norm": 13.844797652398473, + "learning_rate": 1.7972517477628453e-05, + "loss": 1.2505, + "step": 38271 + }, + { + "epoch": 0.46, + "grad_norm": 36.17740111802432, + "learning_rate": 1.7972164826728943e-05, + "loss": 1.8215, + "step": 38274 + }, + { + "epoch": 0.46, + "grad_norm": 21.72034910229237, + "learning_rate": 1.797181214862331e-05, + "loss": 1.7811, + "step": 38277 + }, + { + "epoch": 0.46, + "grad_norm": 7.130637285546265, + "learning_rate": 1.7971459443312752e-05, + "loss": 1.4257, + "step": 38280 + }, + { + "epoch": 0.46, + "grad_norm": 25.984419078729953, + "learning_rate": 1.7971106710798473e-05, + "loss": 1.1397, + "step": 38283 + }, + { + "epoch": 0.46, + "grad_norm": 34.79921190494718, + "learning_rate": 1.7970753951081677e-05, + "loss": 1.3723, + "step": 38286 + }, + { + "epoch": 0.46, + "grad_norm": 13.470277212271162, + "learning_rate": 1.797040116416357e-05, + "loss": 1.0412, + "step": 38289 + }, + { + "epoch": 0.46, + "grad_norm": 27.619925363885965, + "learning_rate": 1.7970048350045358e-05, + "loss": 1.4106, + "step": 38292 + }, + { + "epoch": 0.46, + "grad_norm": 63.39070277083603, + "learning_rate": 1.7969695508728237e-05, + "loss": 1.4437, + "step": 38295 + }, + { + "epoch": 0.46, + "grad_norm": 19.793184492788242, + "learning_rate": 1.796934264021342e-05, + "loss": 1.2111, + "step": 38298 + }, + { + "epoch": 0.46, + "grad_norm": 14.928836608285438, + "learning_rate": 1.7968989744502106e-05, + "loss": 1.5454, + "step": 38301 + }, + { + "epoch": 0.46, + "grad_norm": 12.104921167307577, + "learning_rate": 1.79686368215955e-05, + "loss": 1.3566, + "step": 38304 + }, + { + "epoch": 0.46, + "grad_norm": 12.94950286334127, + "learning_rate": 1.7968283871494807e-05, + "loss": 1.6689, + "step": 38307 + }, + { + "epoch": 0.46, + "grad_norm": 8.166747160178053, + "learning_rate": 1.7967930894201227e-05, + "loss": 1.3782, + "step": 38310 + }, + { + "epoch": 0.46, + "grad_norm": 11.488107255120838, + "learning_rate": 1.7967577889715975e-05, + "loss": 1.2271, + "step": 38313 + }, + { + "epoch": 0.46, + "grad_norm": 18.446902080855267, + "learning_rate": 1.7967224858040247e-05, + "loss": 1.4457, + "step": 38316 + }, + { + "epoch": 0.46, + "grad_norm": 11.408259595921669, + "learning_rate": 1.796687179917525e-05, + "loss": 1.2907, + "step": 38319 + }, + { + "epoch": 0.46, + "grad_norm": 10.946307823389985, + "learning_rate": 1.7966518713122192e-05, + "loss": 1.1774, + "step": 38322 + }, + { + "epoch": 0.46, + "grad_norm": 6.516579113008565, + "learning_rate": 1.7966165599882274e-05, + "loss": 1.474, + "step": 38325 + }, + { + "epoch": 0.46, + "grad_norm": 15.44422801841351, + "learning_rate": 1.79658124594567e-05, + "loss": 1.4981, + "step": 38328 + }, + { + "epoch": 0.46, + "grad_norm": 19.089357695328363, + "learning_rate": 1.7965459291846676e-05, + "loss": 1.3663, + "step": 38331 + }, + { + "epoch": 0.46, + "grad_norm": 6.563848027644389, + "learning_rate": 1.7965106097053413e-05, + "loss": 1.1905, + "step": 38334 + }, + { + "epoch": 0.46, + "grad_norm": 13.468545297532499, + "learning_rate": 1.796475287507811e-05, + "loss": 1.3112, + "step": 38337 + }, + { + "epoch": 0.46, + "grad_norm": 12.456952777452537, + "learning_rate": 1.7964399625921977e-05, + "loss": 1.2647, + "step": 38340 + }, + { + "epoch": 0.46, + "grad_norm": 66.93272635659974, + "learning_rate": 1.7964046349586213e-05, + "loss": 1.4635, + "step": 38343 + }, + { + "epoch": 0.46, + "grad_norm": 6.856117588445844, + "learning_rate": 1.7963693046072026e-05, + "loss": 1.3802, + "step": 38346 + }, + { + "epoch": 0.46, + "grad_norm": 12.937935107061003, + "learning_rate": 1.7963339715380627e-05, + "loss": 1.4911, + "step": 38349 + }, + { + "epoch": 0.46, + "grad_norm": 30.67613014913867, + "learning_rate": 1.7962986357513216e-05, + "loss": 1.7227, + "step": 38352 + }, + { + "epoch": 0.46, + "grad_norm": 6.0062540387695265, + "learning_rate": 1.7962632972471002e-05, + "loss": 1.3488, + "step": 38355 + }, + { + "epoch": 0.46, + "grad_norm": 3.0908694936968484, + "learning_rate": 1.796227956025519e-05, + "loss": 1.7649, + "step": 38358 + }, + { + "epoch": 0.46, + "grad_norm": 99.919310500216, + "learning_rate": 1.7961926120866983e-05, + "loss": 1.4541, + "step": 38361 + }, + { + "epoch": 0.46, + "grad_norm": 14.651785140779184, + "learning_rate": 1.7961572654307593e-05, + "loss": 1.4779, + "step": 38364 + }, + { + "epoch": 0.46, + "grad_norm": 7.936512863256735, + "learning_rate": 1.7961219160578225e-05, + "loss": 1.3507, + "step": 38367 + }, + { + "epoch": 0.46, + "grad_norm": 13.056967001939153, + "learning_rate": 1.796086563968008e-05, + "loss": 1.7542, + "step": 38370 + }, + { + "epoch": 0.46, + "grad_norm": 12.64884004823915, + "learning_rate": 1.796051209161437e-05, + "loss": 1.4345, + "step": 38373 + }, + { + "epoch": 0.46, + "grad_norm": 8.949601043076616, + "learning_rate": 1.79601585163823e-05, + "loss": 1.3517, + "step": 38376 + }, + { + "epoch": 0.46, + "grad_norm": 12.63024237476921, + "learning_rate": 1.7959804913985072e-05, + "loss": 1.0678, + "step": 38379 + }, + { + "epoch": 0.46, + "grad_norm": 11.019528818481199, + "learning_rate": 1.79594512844239e-05, + "loss": 1.4145, + "step": 38382 + }, + { + "epoch": 0.46, + "grad_norm": 29.81413521614889, + "learning_rate": 1.7959097627699987e-05, + "loss": 1.3071, + "step": 38385 + }, + { + "epoch": 0.46, + "grad_norm": 9.89155254891331, + "learning_rate": 1.795874394381454e-05, + "loss": 1.4378, + "step": 38388 + }, + { + "epoch": 0.46, + "grad_norm": 16.64046312925899, + "learning_rate": 1.795839023276877e-05, + "loss": 1.8539, + "step": 38391 + }, + { + "epoch": 0.46, + "grad_norm": 27.85863846402834, + "learning_rate": 1.795803649456388e-05, + "loss": 1.2856, + "step": 38394 + }, + { + "epoch": 0.46, + "grad_norm": 13.389098051228961, + "learning_rate": 1.7957682729201076e-05, + "loss": 1.6149, + "step": 38397 + }, + { + "epoch": 0.46, + "grad_norm": 18.114916126343456, + "learning_rate": 1.7957328936681568e-05, + "loss": 1.4403, + "step": 38400 + }, + { + "epoch": 0.46, + "grad_norm": 57.20862603038785, + "learning_rate": 1.7956975117006562e-05, + "loss": 1.5624, + "step": 38403 + }, + { + "epoch": 0.46, + "grad_norm": 10.695361621738732, + "learning_rate": 1.7956621270177267e-05, + "loss": 1.3537, + "step": 38406 + }, + { + "epoch": 0.46, + "grad_norm": 19.128504372038602, + "learning_rate": 1.795626739619489e-05, + "loss": 1.2804, + "step": 38409 + }, + { + "epoch": 0.46, + "grad_norm": 2.963792923610236, + "learning_rate": 1.7955913495060638e-05, + "loss": 1.5613, + "step": 38412 + }, + { + "epoch": 0.46, + "grad_norm": 12.7492470589576, + "learning_rate": 1.7955559566775714e-05, + "loss": 1.6329, + "step": 38415 + }, + { + "epoch": 0.46, + "grad_norm": 2.3877142845723376, + "learning_rate": 1.7955205611341338e-05, + "loss": 1.3832, + "step": 38418 + }, + { + "epoch": 0.46, + "grad_norm": 6.698836859894374, + "learning_rate": 1.7954851628758705e-05, + "loss": 1.7684, + "step": 38421 + }, + { + "epoch": 0.46, + "grad_norm": 20.056313373343798, + "learning_rate": 1.7954497619029033e-05, + "loss": 1.6525, + "step": 38424 + }, + { + "epoch": 0.46, + "grad_norm": 13.094408705847352, + "learning_rate": 1.7954143582153523e-05, + "loss": 1.5021, + "step": 38427 + }, + { + "epoch": 0.46, + "grad_norm": 8.208058003166359, + "learning_rate": 1.795378951813339e-05, + "loss": 1.5718, + "step": 38430 + }, + { + "epoch": 0.46, + "grad_norm": 26.105410216224772, + "learning_rate": 1.7953435426969833e-05, + "loss": 1.2309, + "step": 38433 + }, + { + "epoch": 0.46, + "grad_norm": 12.947431024588914, + "learning_rate": 1.795308130866407e-05, + "loss": 1.5621, + "step": 38436 + }, + { + "epoch": 0.46, + "grad_norm": 42.324410894461785, + "learning_rate": 1.79527271632173e-05, + "loss": 1.5027, + "step": 38439 + }, + { + "epoch": 0.46, + "grad_norm": 37.89538266227596, + "learning_rate": 1.7952372990630742e-05, + "loss": 1.4077, + "step": 38442 + }, + { + "epoch": 0.46, + "grad_norm": 44.13305782674882, + "learning_rate": 1.7952018790905596e-05, + "loss": 1.3308, + "step": 38445 + }, + { + "epoch": 0.46, + "grad_norm": 27.45441068799337, + "learning_rate": 1.795166456404308e-05, + "loss": 1.5258, + "step": 38448 + }, + { + "epoch": 0.46, + "grad_norm": 25.845440229528325, + "learning_rate": 1.795131031004439e-05, + "loss": 1.504, + "step": 38451 + }, + { + "epoch": 0.46, + "grad_norm": 31.386038575402022, + "learning_rate": 1.7950956028910743e-05, + "loss": 1.4226, + "step": 38454 + }, + { + "epoch": 0.46, + "grad_norm": 23.02262677420698, + "learning_rate": 1.795060172064335e-05, + "loss": 1.573, + "step": 38457 + }, + { + "epoch": 0.46, + "grad_norm": 7.325998963527967, + "learning_rate": 1.7950247385243417e-05, + "loss": 1.2718, + "step": 38460 + }, + { + "epoch": 0.46, + "grad_norm": 29.630684468260384, + "learning_rate": 1.794989302271215e-05, + "loss": 1.5962, + "step": 38463 + }, + { + "epoch": 0.46, + "grad_norm": 11.380889417396716, + "learning_rate": 1.7949538633050763e-05, + "loss": 1.2346, + "step": 38466 + }, + { + "epoch": 0.46, + "grad_norm": 29.913574422537863, + "learning_rate": 1.7949184216260468e-05, + "loss": 1.3766, + "step": 38469 + }, + { + "epoch": 0.46, + "grad_norm": 27.325471843540743, + "learning_rate": 1.7948829772342466e-05, + "loss": 1.5745, + "step": 38472 + }, + { + "epoch": 0.46, + "grad_norm": 9.880915822519833, + "learning_rate": 1.7948475301297975e-05, + "loss": 1.4629, + "step": 38475 + }, + { + "epoch": 0.46, + "grad_norm": 6.978888485153194, + "learning_rate": 1.7948120803128194e-05, + "loss": 1.6143, + "step": 38478 + }, + { + "epoch": 0.46, + "grad_norm": 10.979618773623288, + "learning_rate": 1.7947766277834347e-05, + "loss": 1.9123, + "step": 38481 + }, + { + "epoch": 0.46, + "grad_norm": 11.667758603675265, + "learning_rate": 1.7947411725417634e-05, + "loss": 1.6139, + "step": 38484 + }, + { + "epoch": 0.46, + "grad_norm": 3.051066076791886, + "learning_rate": 1.7947057145879265e-05, + "loss": 1.6145, + "step": 38487 + }, + { + "epoch": 0.46, + "grad_norm": 4.781137610745792, + "learning_rate": 1.7946702539220457e-05, + "loss": 1.5024, + "step": 38490 + }, + { + "epoch": 0.46, + "grad_norm": 13.972406467024806, + "learning_rate": 1.794634790544241e-05, + "loss": 1.1558, + "step": 38493 + }, + { + "epoch": 0.46, + "grad_norm": 15.115380790241806, + "learning_rate": 1.794599324454634e-05, + "loss": 1.6513, + "step": 38496 + }, + { + "epoch": 0.46, + "grad_norm": 6.344588864949703, + "learning_rate": 1.794563855653346e-05, + "loss": 1.3551, + "step": 38499 + }, + { + "epoch": 0.46, + "grad_norm": 7.4167170740650255, + "learning_rate": 1.7945283841404982e-05, + "loss": 1.6748, + "step": 38502 + }, + { + "epoch": 0.46, + "grad_norm": 7.107940595531117, + "learning_rate": 1.7944929099162107e-05, + "loss": 1.4683, + "step": 38505 + }, + { + "epoch": 0.46, + "grad_norm": 6.661813457251311, + "learning_rate": 1.7944574329806054e-05, + "loss": 1.2655, + "step": 38508 + }, + { + "epoch": 0.46, + "grad_norm": 16.803360591543303, + "learning_rate": 1.7944219533338024e-05, + "loss": 1.1775, + "step": 38511 + }, + { + "epoch": 0.46, + "grad_norm": 10.759970655521707, + "learning_rate": 1.794386470975924e-05, + "loss": 1.4183, + "step": 38514 + }, + { + "epoch": 0.46, + "grad_norm": 12.492835386003092, + "learning_rate": 1.7943509859070905e-05, + "loss": 1.5086, + "step": 38517 + }, + { + "epoch": 0.46, + "grad_norm": 23.574267154524236, + "learning_rate": 1.794315498127423e-05, + "loss": 1.5601, + "step": 38520 + }, + { + "epoch": 0.46, + "grad_norm": 11.207250195127743, + "learning_rate": 1.7942800076370432e-05, + "loss": 1.437, + "step": 38523 + }, + { + "epoch": 0.46, + "grad_norm": 9.9909752882436, + "learning_rate": 1.7942445144360716e-05, + "loss": 1.1984, + "step": 38526 + }, + { + "epoch": 0.46, + "grad_norm": 13.33762438014521, + "learning_rate": 1.7942090185246297e-05, + "loss": 1.3617, + "step": 38529 + }, + { + "epoch": 0.46, + "grad_norm": 25.134325444062426, + "learning_rate": 1.7941735199028386e-05, + "loss": 1.7421, + "step": 38532 + }, + { + "epoch": 0.46, + "grad_norm": 40.576098630447895, + "learning_rate": 1.7941380185708192e-05, + "loss": 1.2905, + "step": 38535 + }, + { + "epoch": 0.46, + "grad_norm": 19.636978135901824, + "learning_rate": 1.7941025145286928e-05, + "loss": 1.6817, + "step": 38538 + }, + { + "epoch": 0.46, + "grad_norm": 40.15270559251711, + "learning_rate": 1.7940670077765804e-05, + "loss": 1.5986, + "step": 38541 + }, + { + "epoch": 0.46, + "grad_norm": 47.18128794639315, + "learning_rate": 1.7940314983146036e-05, + "loss": 1.6816, + "step": 38544 + }, + { + "epoch": 0.46, + "grad_norm": 12.797348871188747, + "learning_rate": 1.793995986142883e-05, + "loss": 1.4439, + "step": 38547 + }, + { + "epoch": 0.46, + "grad_norm": 7.467257672034933, + "learning_rate": 1.7939604712615403e-05, + "loss": 1.2399, + "step": 38550 + }, + { + "epoch": 0.46, + "grad_norm": 41.51166315590974, + "learning_rate": 1.7939249536706964e-05, + "loss": 1.7314, + "step": 38553 + }, + { + "epoch": 0.46, + "grad_norm": 13.716317043339924, + "learning_rate": 1.7938894333704725e-05, + "loss": 1.4121, + "step": 38556 + }, + { + "epoch": 0.46, + "grad_norm": 12.114014235593364, + "learning_rate": 1.79385391036099e-05, + "loss": 1.4088, + "step": 38559 + }, + { + "epoch": 0.46, + "grad_norm": 10.404867875992318, + "learning_rate": 1.79381838464237e-05, + "loss": 1.5888, + "step": 38562 + }, + { + "epoch": 0.46, + "grad_norm": 6.329802883082081, + "learning_rate": 1.793782856214734e-05, + "loss": 1.4857, + "step": 38565 + }, + { + "epoch": 0.46, + "grad_norm": 10.980263067893496, + "learning_rate": 1.793747325078203e-05, + "loss": 1.3806, + "step": 38568 + }, + { + "epoch": 0.46, + "grad_norm": 24.191809328178177, + "learning_rate": 1.793711791232898e-05, + "loss": 1.2517, + "step": 38571 + }, + { + "epoch": 0.46, + "grad_norm": 7.701250383146917, + "learning_rate": 1.7936762546789407e-05, + "loss": 1.3092, + "step": 38574 + }, + { + "epoch": 0.46, + "grad_norm": 9.52811695450226, + "learning_rate": 1.7936407154164522e-05, + "loss": 1.4621, + "step": 38577 + }, + { + "epoch": 0.46, + "grad_norm": 10.284063719102647, + "learning_rate": 1.793605173445554e-05, + "loss": 1.3607, + "step": 38580 + }, + { + "epoch": 0.46, + "grad_norm": 49.085040103127604, + "learning_rate": 1.7935696287663672e-05, + "loss": 1.1585, + "step": 38583 + }, + { + "epoch": 0.46, + "grad_norm": 7.609400405307699, + "learning_rate": 1.7935340813790133e-05, + "loss": 1.2836, + "step": 38586 + }, + { + "epoch": 0.46, + "grad_norm": 6.642307779265148, + "learning_rate": 1.7934985312836128e-05, + "loss": 1.6279, + "step": 38589 + }, + { + "epoch": 0.46, + "grad_norm": 16.084439741939565, + "learning_rate": 1.793462978480288e-05, + "loss": 1.3808, + "step": 38592 + }, + { + "epoch": 0.46, + "grad_norm": 2.7378114672802103, + "learning_rate": 1.7934274229691596e-05, + "loss": 1.5178, + "step": 38595 + }, + { + "epoch": 0.46, + "grad_norm": 10.90630125237427, + "learning_rate": 1.7933918647503495e-05, + "loss": 1.0885, + "step": 38598 + }, + { + "epoch": 0.46, + "grad_norm": 5.3385943741860675, + "learning_rate": 1.7933563038239786e-05, + "loss": 1.4774, + "step": 38601 + }, + { + "epoch": 0.46, + "grad_norm": 3.0935381055767652, + "learning_rate": 1.7933207401901686e-05, + "loss": 1.4802, + "step": 38604 + }, + { + "epoch": 0.46, + "grad_norm": 32.21942005048035, + "learning_rate": 1.7932851738490403e-05, + "loss": 1.3513, + "step": 38607 + }, + { + "epoch": 0.46, + "grad_norm": 4.530751624344494, + "learning_rate": 1.7932496048007158e-05, + "loss": 1.1743, + "step": 38610 + }, + { + "epoch": 0.46, + "grad_norm": 7.067329831065287, + "learning_rate": 1.7932140330453157e-05, + "loss": 1.188, + "step": 38613 + }, + { + "epoch": 0.46, + "grad_norm": 67.42663842504858, + "learning_rate": 1.7931784585829622e-05, + "loss": 1.7181, + "step": 38616 + }, + { + "epoch": 0.46, + "grad_norm": 14.004371849574099, + "learning_rate": 1.7931428814137764e-05, + "loss": 1.4626, + "step": 38619 + }, + { + "epoch": 0.46, + "grad_norm": 24.25975643188754, + "learning_rate": 1.7931073015378792e-05, + "loss": 1.3963, + "step": 38622 + }, + { + "epoch": 0.46, + "grad_norm": 8.498299593485969, + "learning_rate": 1.7930717189553926e-05, + "loss": 1.7417, + "step": 38625 + }, + { + "epoch": 0.46, + "grad_norm": 23.80107642498753, + "learning_rate": 1.7930361336664382e-05, + "loss": 1.6497, + "step": 38628 + }, + { + "epoch": 0.46, + "grad_norm": 20.195242771548386, + "learning_rate": 1.793000545671137e-05, + "loss": 1.3076, + "step": 38631 + }, + { + "epoch": 0.46, + "grad_norm": 17.929556774953404, + "learning_rate": 1.79296495496961e-05, + "loss": 1.326, + "step": 38634 + }, + { + "epoch": 0.46, + "grad_norm": 7.841575102043615, + "learning_rate": 1.79292936156198e-05, + "loss": 1.2265, + "step": 38637 + }, + { + "epoch": 0.46, + "grad_norm": 9.830619141611452, + "learning_rate": 1.7928937654483674e-05, + "loss": 1.5809, + "step": 38640 + }, + { + "epoch": 0.46, + "grad_norm": 23.74597762488485, + "learning_rate": 1.792858166628894e-05, + "loss": 1.5911, + "step": 38643 + }, + { + "epoch": 0.46, + "grad_norm": 28.59654651509675, + "learning_rate": 1.7928225651036815e-05, + "loss": 1.8094, + "step": 38646 + }, + { + "epoch": 0.46, + "grad_norm": 11.45776361390578, + "learning_rate": 1.7927869608728508e-05, + "loss": 1.1889, + "step": 38649 + }, + { + "epoch": 0.46, + "grad_norm": 47.484153450468305, + "learning_rate": 1.7927513539365237e-05, + "loss": 1.1203, + "step": 38652 + }, + { + "epoch": 0.46, + "grad_norm": 5.1616693962601925, + "learning_rate": 1.792715744294822e-05, + "loss": 1.5602, + "step": 38655 + }, + { + "epoch": 0.46, + "grad_norm": 10.60442143599281, + "learning_rate": 1.7926801319478668e-05, + "loss": 1.1967, + "step": 38658 + }, + { + "epoch": 0.46, + "grad_norm": 19.75164937589943, + "learning_rate": 1.7926445168957802e-05, + "loss": 1.5177, + "step": 38661 + }, + { + "epoch": 0.46, + "grad_norm": 8.190157149619685, + "learning_rate": 1.792608899138683e-05, + "loss": 1.2997, + "step": 38664 + }, + { + "epoch": 0.46, + "grad_norm": 68.3156279968787, + "learning_rate": 1.7925732786766974e-05, + "loss": 1.3627, + "step": 38667 + }, + { + "epoch": 0.46, + "grad_norm": 4.021561072577918, + "learning_rate": 1.7925376555099444e-05, + "loss": 1.1663, + "step": 38670 + }, + { + "epoch": 0.47, + "grad_norm": 6.758721018236547, + "learning_rate": 1.792502029638546e-05, + "loss": 1.5939, + "step": 38673 + }, + { + "epoch": 0.47, + "grad_norm": 12.072233801856948, + "learning_rate": 1.7924664010626238e-05, + "loss": 1.435, + "step": 38676 + }, + { + "epoch": 0.47, + "grad_norm": 11.040567336542052, + "learning_rate": 1.792430769782299e-05, + "loss": 1.4424, + "step": 38679 + }, + { + "epoch": 0.47, + "grad_norm": 21.55910222885679, + "learning_rate": 1.7923951357976936e-05, + "loss": 1.5384, + "step": 38682 + }, + { + "epoch": 0.47, + "grad_norm": 23.59786375597654, + "learning_rate": 1.7923594991089287e-05, + "loss": 1.6699, + "step": 38685 + }, + { + "epoch": 0.47, + "grad_norm": 70.77853114521145, + "learning_rate": 1.7923238597161266e-05, + "loss": 1.7281, + "step": 38688 + }, + { + "epoch": 0.47, + "grad_norm": 3.0771866236313703, + "learning_rate": 1.7922882176194086e-05, + "loss": 1.186, + "step": 38691 + }, + { + "epoch": 0.47, + "grad_norm": 8.489961215179967, + "learning_rate": 1.792252572818896e-05, + "loss": 1.4462, + "step": 38694 + }, + { + "epoch": 0.47, + "grad_norm": 74.43857862640441, + "learning_rate": 1.7922169253147107e-05, + "loss": 0.9096, + "step": 38697 + }, + { + "epoch": 0.47, + "grad_norm": 35.06495587625785, + "learning_rate": 1.7921812751069743e-05, + "loss": 1.2009, + "step": 38700 + }, + { + "epoch": 0.47, + "grad_norm": 31.79423939905789, + "learning_rate": 1.792145622195809e-05, + "loss": 1.5607, + "step": 38703 + }, + { + "epoch": 0.47, + "grad_norm": 33.02409714104572, + "learning_rate": 1.792109966581336e-05, + "loss": 1.7137, + "step": 38706 + }, + { + "epoch": 0.47, + "grad_norm": 77.18458825954173, + "learning_rate": 1.7920743082636765e-05, + "loss": 1.3833, + "step": 38709 + }, + { + "epoch": 0.47, + "grad_norm": 16.206662296566265, + "learning_rate": 1.7920386472429533e-05, + "loss": 1.2293, + "step": 38712 + }, + { + "epoch": 0.47, + "grad_norm": 29.819140443556737, + "learning_rate": 1.792002983519287e-05, + "loss": 1.4632, + "step": 38715 + }, + { + "epoch": 0.47, + "grad_norm": 21.42542679195119, + "learning_rate": 1.7919673170928e-05, + "loss": 1.2317, + "step": 38718 + }, + { + "epoch": 0.47, + "grad_norm": 4.059119182249421, + "learning_rate": 1.7919316479636137e-05, + "loss": 1.4762, + "step": 38721 + }, + { + "epoch": 0.47, + "grad_norm": 7.163332925874939, + "learning_rate": 1.79189597613185e-05, + "loss": 1.3796, + "step": 38724 + }, + { + "epoch": 0.47, + "grad_norm": 12.522020651760574, + "learning_rate": 1.7918603015976308e-05, + "loss": 1.5178, + "step": 38727 + }, + { + "epoch": 0.47, + "grad_norm": 26.335611274867063, + "learning_rate": 1.7918246243610772e-05, + "loss": 1.3762, + "step": 38730 + }, + { + "epoch": 0.47, + "grad_norm": 55.809886447798554, + "learning_rate": 1.7917889444223116e-05, + "loss": 1.1966, + "step": 38733 + }, + { + "epoch": 0.47, + "grad_norm": 64.69401380074154, + "learning_rate": 1.7917532617814556e-05, + "loss": 1.4921, + "step": 38736 + }, + { + "epoch": 0.47, + "grad_norm": 7.084549092326922, + "learning_rate": 1.7917175764386306e-05, + "loss": 1.2675, + "step": 38739 + }, + { + "epoch": 0.47, + "grad_norm": 9.730401192570882, + "learning_rate": 1.791681888393959e-05, + "loss": 1.2609, + "step": 38742 + }, + { + "epoch": 0.47, + "grad_norm": 12.895360421703288, + "learning_rate": 1.791646197647562e-05, + "loss": 1.1434, + "step": 38745 + }, + { + "epoch": 0.47, + "grad_norm": 6.000238026230944, + "learning_rate": 1.7916105041995622e-05, + "loss": 1.5972, + "step": 38748 + }, + { + "epoch": 0.47, + "grad_norm": 27.352104204727272, + "learning_rate": 1.7915748080500805e-05, + "loss": 1.3973, + "step": 38751 + }, + { + "epoch": 0.47, + "grad_norm": 6.508102656847803, + "learning_rate": 1.791539109199239e-05, + "loss": 1.3842, + "step": 38754 + }, + { + "epoch": 0.47, + "grad_norm": 16.922017681306155, + "learning_rate": 1.7915034076471598e-05, + "loss": 1.4111, + "step": 38757 + }, + { + "epoch": 0.47, + "grad_norm": 7.557755963692895, + "learning_rate": 1.7914677033939645e-05, + "loss": 1.5931, + "step": 38760 + }, + { + "epoch": 0.47, + "grad_norm": 17.14553017885242, + "learning_rate": 1.7914319964397752e-05, + "loss": 1.6097, + "step": 38763 + }, + { + "epoch": 0.47, + "grad_norm": 9.247865506280997, + "learning_rate": 1.7913962867847133e-05, + "loss": 1.3708, + "step": 38766 + }, + { + "epoch": 0.47, + "grad_norm": 32.673306529591976, + "learning_rate": 1.791360574428901e-05, + "loss": 1.6138, + "step": 38769 + }, + { + "epoch": 0.47, + "grad_norm": 11.814634944336948, + "learning_rate": 1.7913248593724605e-05, + "loss": 1.6237, + "step": 38772 + }, + { + "epoch": 0.47, + "grad_norm": 14.568993026158706, + "learning_rate": 1.791289141615513e-05, + "loss": 1.6471, + "step": 38775 + }, + { + "epoch": 0.47, + "grad_norm": 22.482541224007427, + "learning_rate": 1.7912534211581805e-05, + "loss": 1.1084, + "step": 38778 + }, + { + "epoch": 0.47, + "grad_norm": 3.346799917121974, + "learning_rate": 1.791217698000585e-05, + "loss": 1.1015, + "step": 38781 + }, + { + "epoch": 0.47, + "grad_norm": 18.866386539889582, + "learning_rate": 1.791181972142849e-05, + "loss": 1.2192, + "step": 38784 + }, + { + "epoch": 0.47, + "grad_norm": 23.17501558564229, + "learning_rate": 1.7911462435850936e-05, + "loss": 1.406, + "step": 38787 + }, + { + "epoch": 0.47, + "grad_norm": 34.00574368479432, + "learning_rate": 1.791110512327441e-05, + "loss": 1.4624, + "step": 38790 + }, + { + "epoch": 0.47, + "grad_norm": 27.856378444131728, + "learning_rate": 1.791074778370013e-05, + "loss": 1.3392, + "step": 38793 + }, + { + "epoch": 0.47, + "grad_norm": 15.028169663522432, + "learning_rate": 1.791039041712932e-05, + "loss": 1.5529, + "step": 38796 + }, + { + "epoch": 0.47, + "grad_norm": 5.532447585947174, + "learning_rate": 1.79100330235632e-05, + "loss": 1.6834, + "step": 38799 + }, + { + "epoch": 0.47, + "grad_norm": 13.576882379898976, + "learning_rate": 1.790967560300298e-05, + "loss": 1.2301, + "step": 38802 + }, + { + "epoch": 0.47, + "grad_norm": 31.801042114972123, + "learning_rate": 1.790931815544989e-05, + "loss": 1.298, + "step": 38805 + }, + { + "epoch": 0.47, + "grad_norm": 131.56926882017132, + "learning_rate": 1.7908960680905146e-05, + "loss": 1.8873, + "step": 38808 + }, + { + "epoch": 0.47, + "grad_norm": 3.158170930576089, + "learning_rate": 1.790860317936997e-05, + "loss": 1.6341, + "step": 38811 + }, + { + "epoch": 0.47, + "grad_norm": 11.336461068970905, + "learning_rate": 1.7908245650845574e-05, + "loss": 1.1716, + "step": 38814 + }, + { + "epoch": 0.47, + "grad_norm": 11.34139317797393, + "learning_rate": 1.7907888095333188e-05, + "loss": 1.5019, + "step": 38817 + }, + { + "epoch": 0.47, + "grad_norm": 4.85616188673527, + "learning_rate": 1.7907530512834027e-05, + "loss": 1.4349, + "step": 38820 + }, + { + "epoch": 0.47, + "grad_norm": 21.681890090627288, + "learning_rate": 1.7907172903349312e-05, + "loss": 1.3979, + "step": 38823 + }, + { + "epoch": 0.47, + "grad_norm": 9.911908542815032, + "learning_rate": 1.7906815266880265e-05, + "loss": 1.5765, + "step": 38826 + }, + { + "epoch": 0.47, + "grad_norm": 23.049802792371768, + "learning_rate": 1.7906457603428106e-05, + "loss": 1.5819, + "step": 38829 + }, + { + "epoch": 0.47, + "grad_norm": 17.42727764090449, + "learning_rate": 1.7906099912994058e-05, + "loss": 1.3008, + "step": 38832 + }, + { + "epoch": 0.47, + "grad_norm": 62.72990381461154, + "learning_rate": 1.7905742195579333e-05, + "loss": 1.5935, + "step": 38835 + }, + { + "epoch": 0.47, + "grad_norm": 5.450701604965852, + "learning_rate": 1.790538445118516e-05, + "loss": 1.5768, + "step": 38838 + }, + { + "epoch": 0.47, + "grad_norm": 8.927034688444305, + "learning_rate": 1.7905026679812756e-05, + "loss": 1.3947, + "step": 38841 + }, + { + "epoch": 0.47, + "grad_norm": 7.652319243692687, + "learning_rate": 1.7904668881463346e-05, + "loss": 1.4657, + "step": 38844 + }, + { + "epoch": 0.47, + "grad_norm": 19.272314835303366, + "learning_rate": 1.790431105613815e-05, + "loss": 1.6242, + "step": 38847 + }, + { + "epoch": 0.47, + "grad_norm": 7.029668451472104, + "learning_rate": 1.7903953203838385e-05, + "loss": 1.085, + "step": 38850 + }, + { + "epoch": 0.47, + "grad_norm": 5.350692622416659, + "learning_rate": 1.7903595324565274e-05, + "loss": 1.5492, + "step": 38853 + }, + { + "epoch": 0.47, + "grad_norm": 3.6975928421520594, + "learning_rate": 1.7903237418320038e-05, + "loss": 1.2526, + "step": 38856 + }, + { + "epoch": 0.47, + "grad_norm": 13.154431445675796, + "learning_rate": 1.79028794851039e-05, + "loss": 1.4366, + "step": 38859 + }, + { + "epoch": 0.47, + "grad_norm": 5.500485632590882, + "learning_rate": 1.7902521524918085e-05, + "loss": 1.2833, + "step": 38862 + }, + { + "epoch": 0.47, + "grad_norm": 9.631108800986523, + "learning_rate": 1.7902163537763805e-05, + "loss": 1.0765, + "step": 38865 + }, + { + "epoch": 0.47, + "grad_norm": 11.198027205566882, + "learning_rate": 1.790180552364229e-05, + "loss": 1.709, + "step": 38868 + }, + { + "epoch": 0.47, + "grad_norm": 32.26907716119343, + "learning_rate": 1.7901447482554758e-05, + "loss": 0.9875, + "step": 38871 + }, + { + "epoch": 0.47, + "grad_norm": 4.969883473161891, + "learning_rate": 1.7901089414502434e-05, + "loss": 1.8081, + "step": 38874 + }, + { + "epoch": 0.47, + "grad_norm": 36.910864676022186, + "learning_rate": 1.7900731319486534e-05, + "loss": 2.0157, + "step": 38877 + }, + { + "epoch": 0.47, + "grad_norm": 9.729708459913358, + "learning_rate": 1.7900373197508288e-05, + "loss": 1.3269, + "step": 38880 + }, + { + "epoch": 0.47, + "grad_norm": 41.29925292015899, + "learning_rate": 1.790001504856891e-05, + "loss": 1.4563, + "step": 38883 + }, + { + "epoch": 0.47, + "grad_norm": 11.138451694913615, + "learning_rate": 1.789965687266963e-05, + "loss": 1.7077, + "step": 38886 + }, + { + "epoch": 0.47, + "grad_norm": 25.703160459522934, + "learning_rate": 1.7899298669811663e-05, + "loss": 1.2289, + "step": 38889 + }, + { + "epoch": 0.47, + "grad_norm": 11.787387579623557, + "learning_rate": 1.7898940439996237e-05, + "loss": 2.0425, + "step": 38892 + }, + { + "epoch": 0.47, + "grad_norm": 13.812443254796401, + "learning_rate": 1.7898582183224572e-05, + "loss": 1.4272, + "step": 38895 + }, + { + "epoch": 0.47, + "grad_norm": 8.757873752648955, + "learning_rate": 1.789822389949789e-05, + "loss": 1.6175, + "step": 38898 + }, + { + "epoch": 0.47, + "grad_norm": 2.300646122077772, + "learning_rate": 1.7897865588817418e-05, + "loss": 1.527, + "step": 38901 + }, + { + "epoch": 0.47, + "grad_norm": 13.244965748628319, + "learning_rate": 1.7897507251184373e-05, + "loss": 1.6358, + "step": 38904 + }, + { + "epoch": 0.47, + "grad_norm": 24.899410014155077, + "learning_rate": 1.7897148886599984e-05, + "loss": 1.536, + "step": 38907 + }, + { + "epoch": 0.47, + "grad_norm": 11.448625878508945, + "learning_rate": 1.7896790495065466e-05, + "loss": 1.4691, + "step": 38910 + }, + { + "epoch": 0.47, + "grad_norm": 10.249824684762777, + "learning_rate": 1.7896432076582046e-05, + "loss": 1.6227, + "step": 38913 + }, + { + "epoch": 0.47, + "grad_norm": 12.53667347292857, + "learning_rate": 1.7896073631150953e-05, + "loss": 1.5522, + "step": 38916 + }, + { + "epoch": 0.47, + "grad_norm": 8.309839484224854, + "learning_rate": 1.78957151587734e-05, + "loss": 1.3746, + "step": 38919 + }, + { + "epoch": 0.47, + "grad_norm": 7.929292398024451, + "learning_rate": 1.7895356659450614e-05, + "loss": 1.5321, + "step": 38922 + }, + { + "epoch": 0.47, + "grad_norm": 5.989849704007112, + "learning_rate": 1.7894998133183824e-05, + "loss": 1.4678, + "step": 38925 + }, + { + "epoch": 0.47, + "grad_norm": 9.79841161677783, + "learning_rate": 1.7894639579974247e-05, + "loss": 1.7184, + "step": 38928 + }, + { + "epoch": 0.47, + "grad_norm": 20.35078386486425, + "learning_rate": 1.789428099982311e-05, + "loss": 1.5528, + "step": 38931 + }, + { + "epoch": 0.47, + "grad_norm": 19.96890163260522, + "learning_rate": 1.7893922392731632e-05, + "loss": 1.4714, + "step": 38934 + }, + { + "epoch": 0.47, + "grad_norm": 14.316617044986888, + "learning_rate": 1.789356375870104e-05, + "loss": 1.4294, + "step": 38937 + }, + { + "epoch": 0.47, + "grad_norm": 5.4306496905946755, + "learning_rate": 1.7893205097732563e-05, + "loss": 1.2548, + "step": 38940 + }, + { + "epoch": 0.47, + "grad_norm": 7.272452843815338, + "learning_rate": 1.789284640982742e-05, + "loss": 1.3979, + "step": 38943 + }, + { + "epoch": 0.47, + "grad_norm": 17.382629817317614, + "learning_rate": 1.789248769498683e-05, + "loss": 1.3309, + "step": 38946 + }, + { + "epoch": 0.47, + "grad_norm": 15.826097432205426, + "learning_rate": 1.7892128953212028e-05, + "loss": 1.7329, + "step": 38949 + }, + { + "epoch": 0.47, + "grad_norm": 4.099262687414236, + "learning_rate": 1.789177018450423e-05, + "loss": 1.1709, + "step": 38952 + }, + { + "epoch": 0.47, + "grad_norm": 27.079188183258545, + "learning_rate": 1.789141138886466e-05, + "loss": 1.6798, + "step": 38955 + }, + { + "epoch": 0.47, + "grad_norm": 46.250407910123236, + "learning_rate": 1.789105256629455e-05, + "loss": 1.5965, + "step": 38958 + }, + { + "epoch": 0.47, + "grad_norm": 9.73243523254994, + "learning_rate": 1.789069371679512e-05, + "loss": 1.4366, + "step": 38961 + }, + { + "epoch": 0.47, + "grad_norm": 14.511867052831288, + "learning_rate": 1.789033484036759e-05, + "loss": 1.8838, + "step": 38964 + }, + { + "epoch": 0.47, + "grad_norm": 26.759698998393162, + "learning_rate": 1.7889975937013194e-05, + "loss": 1.5461, + "step": 38967 + }, + { + "epoch": 0.47, + "grad_norm": 6.115885491051618, + "learning_rate": 1.7889617006733152e-05, + "loss": 1.5104, + "step": 38970 + }, + { + "epoch": 0.47, + "grad_norm": 7.311480811541244, + "learning_rate": 1.7889258049528683e-05, + "loss": 1.2236, + "step": 38973 + }, + { + "epoch": 0.47, + "grad_norm": 21.512285029464167, + "learning_rate": 1.7888899065401025e-05, + "loss": 1.4433, + "step": 38976 + }, + { + "epoch": 0.47, + "grad_norm": 9.542276972134982, + "learning_rate": 1.7888540054351393e-05, + "loss": 1.5754, + "step": 38979 + }, + { + "epoch": 0.47, + "grad_norm": 13.316343662120763, + "learning_rate": 1.7888181016381015e-05, + "loss": 1.4012, + "step": 38982 + }, + { + "epoch": 0.47, + "grad_norm": 11.55328354387732, + "learning_rate": 1.7887821951491116e-05, + "loss": 1.4368, + "step": 38985 + }, + { + "epoch": 0.47, + "grad_norm": 11.166600587609464, + "learning_rate": 1.7887462859682924e-05, + "loss": 1.5624, + "step": 38988 + }, + { + "epoch": 0.47, + "grad_norm": 22.341452483858042, + "learning_rate": 1.788710374095766e-05, + "loss": 1.5018, + "step": 38991 + }, + { + "epoch": 0.47, + "grad_norm": 12.855527895938856, + "learning_rate": 1.7886744595316554e-05, + "loss": 1.5966, + "step": 38994 + }, + { + "epoch": 0.47, + "grad_norm": 18.225711107533627, + "learning_rate": 1.788638542276083e-05, + "loss": 1.4709, + "step": 38997 + }, + { + "epoch": 0.47, + "grad_norm": 7.71212141463549, + "learning_rate": 1.788602622329171e-05, + "loss": 1.0809, + "step": 39000 + }, + { + "epoch": 0.47, + "grad_norm": 62.04666866929162, + "learning_rate": 1.7885666996910424e-05, + "loss": 1.6721, + "step": 39003 + }, + { + "epoch": 0.47, + "grad_norm": 16.995322533763808, + "learning_rate": 1.78853077436182e-05, + "loss": 1.6159, + "step": 39006 + }, + { + "epoch": 0.47, + "grad_norm": 10.736082073178991, + "learning_rate": 1.788494846341626e-05, + "loss": 1.5409, + "step": 39009 + }, + { + "epoch": 0.47, + "grad_norm": 7.390301042969955, + "learning_rate": 1.7884589156305827e-05, + "loss": 1.3649, + "step": 39012 + }, + { + "epoch": 0.47, + "grad_norm": 5.756894026212911, + "learning_rate": 1.7884229822288133e-05, + "loss": 1.3373, + "step": 39015 + }, + { + "epoch": 0.47, + "grad_norm": 6.214873679282898, + "learning_rate": 1.7883870461364404e-05, + "loss": 1.357, + "step": 39018 + }, + { + "epoch": 0.47, + "grad_norm": 7.933879417314395, + "learning_rate": 1.7883511073535862e-05, + "loss": 1.4233, + "step": 39021 + }, + { + "epoch": 0.47, + "grad_norm": 16.13188881987544, + "learning_rate": 1.7883151658803742e-05, + "loss": 1.5448, + "step": 39024 + }, + { + "epoch": 0.47, + "grad_norm": 7.138112476127322, + "learning_rate": 1.7882792217169262e-05, + "loss": 1.5106, + "step": 39027 + }, + { + "epoch": 0.47, + "grad_norm": 51.08934899228387, + "learning_rate": 1.7882432748633653e-05, + "loss": 1.307, + "step": 39030 + }, + { + "epoch": 0.47, + "grad_norm": 50.454144413959696, + "learning_rate": 1.7882073253198137e-05, + "loss": 1.2503, + "step": 39033 + }, + { + "epoch": 0.47, + "grad_norm": 8.153279116235916, + "learning_rate": 1.7881713730863945e-05, + "loss": 1.433, + "step": 39036 + }, + { + "epoch": 0.47, + "grad_norm": 8.21496743938829, + "learning_rate": 1.7881354181632303e-05, + "loss": 1.2582, + "step": 39039 + }, + { + "epoch": 0.47, + "grad_norm": 9.277552335742454, + "learning_rate": 1.7880994605504437e-05, + "loss": 1.4492, + "step": 39042 + }, + { + "epoch": 0.47, + "grad_norm": 11.23063621851807, + "learning_rate": 1.7880635002481578e-05, + "loss": 2.0659, + "step": 39045 + }, + { + "epoch": 0.47, + "grad_norm": 17.39550405659822, + "learning_rate": 1.788027537256495e-05, + "loss": 1.5428, + "step": 39048 + }, + { + "epoch": 0.47, + "grad_norm": 13.154328277976916, + "learning_rate": 1.787991571575578e-05, + "loss": 1.4937, + "step": 39051 + }, + { + "epoch": 0.47, + "grad_norm": 24.558373502355664, + "learning_rate": 1.7879556032055295e-05, + "loss": 1.6344, + "step": 39054 + }, + { + "epoch": 0.47, + "grad_norm": 7.852686640918202, + "learning_rate": 1.7879196321464723e-05, + "loss": 1.6865, + "step": 39057 + }, + { + "epoch": 0.47, + "grad_norm": 10.65205208498504, + "learning_rate": 1.787883658398529e-05, + "loss": 1.2652, + "step": 39060 + }, + { + "epoch": 0.47, + "grad_norm": 35.151679749439445, + "learning_rate": 1.787847681961823e-05, + "loss": 1.6564, + "step": 39063 + }, + { + "epoch": 0.47, + "grad_norm": 25.22365570347243, + "learning_rate": 1.7878117028364766e-05, + "loss": 1.4234, + "step": 39066 + }, + { + "epoch": 0.47, + "grad_norm": 55.203654170629015, + "learning_rate": 1.7877757210226126e-05, + "loss": 1.2848, + "step": 39069 + }, + { + "epoch": 0.47, + "grad_norm": 7.894943896683387, + "learning_rate": 1.787739736520354e-05, + "loss": 1.3125, + "step": 39072 + }, + { + "epoch": 0.47, + "grad_norm": 179.28591467789224, + "learning_rate": 1.787703749329823e-05, + "loss": 1.5146, + "step": 39075 + }, + { + "epoch": 0.47, + "grad_norm": 8.17612627408579, + "learning_rate": 1.7876677594511428e-05, + "loss": 1.3818, + "step": 39078 + }, + { + "epoch": 0.47, + "grad_norm": 22.17798440229528, + "learning_rate": 1.7876317668844364e-05, + "loss": 1.3648, + "step": 39081 + }, + { + "epoch": 0.47, + "grad_norm": 12.088375239892368, + "learning_rate": 1.7875957716298266e-05, + "loss": 1.3516, + "step": 39084 + }, + { + "epoch": 0.47, + "grad_norm": 4.678422744348509, + "learning_rate": 1.7875597736874365e-05, + "loss": 1.328, + "step": 39087 + }, + { + "epoch": 0.47, + "grad_norm": 15.10204175186318, + "learning_rate": 1.7875237730573878e-05, + "loss": 1.3286, + "step": 39090 + }, + { + "epoch": 0.47, + "grad_norm": 4.814930738762672, + "learning_rate": 1.7874877697398046e-05, + "loss": 1.5571, + "step": 39093 + }, + { + "epoch": 0.47, + "grad_norm": 6.0644443187249895, + "learning_rate": 1.787451763734809e-05, + "loss": 1.2956, + "step": 39096 + }, + { + "epoch": 0.47, + "grad_norm": 40.8048531913621, + "learning_rate": 1.7874157550425243e-05, + "loss": 1.4449, + "step": 39099 + }, + { + "epoch": 0.47, + "grad_norm": 7.273913871763537, + "learning_rate": 1.7873797436630732e-05, + "loss": 1.5573, + "step": 39102 + }, + { + "epoch": 0.47, + "grad_norm": 13.820456366197027, + "learning_rate": 1.787343729596579e-05, + "loss": 1.7105, + "step": 39105 + }, + { + "epoch": 0.47, + "grad_norm": 5.560579278620493, + "learning_rate": 1.787307712843164e-05, + "loss": 1.5179, + "step": 39108 + }, + { + "epoch": 0.47, + "grad_norm": 13.903593463836874, + "learning_rate": 1.7872716934029512e-05, + "loss": 1.2712, + "step": 39111 + }, + { + "epoch": 0.47, + "grad_norm": 11.58138043825277, + "learning_rate": 1.787235671276064e-05, + "loss": 1.6215, + "step": 39114 + }, + { + "epoch": 0.47, + "grad_norm": 9.472972274987931, + "learning_rate": 1.7871996464626245e-05, + "loss": 1.2892, + "step": 39117 + }, + { + "epoch": 0.47, + "grad_norm": 95.29647929351694, + "learning_rate": 1.787163618962757e-05, + "loss": 1.3845, + "step": 39120 + }, + { + "epoch": 0.47, + "grad_norm": 51.586541366342416, + "learning_rate": 1.7871275887765828e-05, + "loss": 1.5864, + "step": 39123 + }, + { + "epoch": 0.47, + "grad_norm": 10.916528220251102, + "learning_rate": 1.787091555904226e-05, + "loss": 1.5487, + "step": 39126 + }, + { + "epoch": 0.47, + "grad_norm": 15.629320041348997, + "learning_rate": 1.7870555203458095e-05, + "loss": 1.5565, + "step": 39129 + }, + { + "epoch": 0.47, + "grad_norm": 60.509487532119685, + "learning_rate": 1.7870194821014552e-05, + "loss": 1.5072, + "step": 39132 + }, + { + "epoch": 0.47, + "grad_norm": 9.28817058585485, + "learning_rate": 1.7869834411712876e-05, + "loss": 1.7615, + "step": 39135 + }, + { + "epoch": 0.47, + "grad_norm": 5.31581428230194, + "learning_rate": 1.7869473975554288e-05, + "loss": 1.6467, + "step": 39138 + }, + { + "epoch": 0.47, + "grad_norm": 7.015512474677886, + "learning_rate": 1.786911351254002e-05, + "loss": 1.3156, + "step": 39141 + }, + { + "epoch": 0.47, + "grad_norm": 10.290672293182395, + "learning_rate": 1.78687530226713e-05, + "loss": 1.2194, + "step": 39144 + }, + { + "epoch": 0.47, + "grad_norm": 27.18979481960905, + "learning_rate": 1.7868392505949362e-05, + "loss": 1.4732, + "step": 39147 + }, + { + "epoch": 0.47, + "grad_norm": 18.052607052489822, + "learning_rate": 1.7868031962375437e-05, + "loss": 2.1019, + "step": 39150 + }, + { + "epoch": 0.47, + "grad_norm": 12.123744091812545, + "learning_rate": 1.786767139195075e-05, + "loss": 1.2755, + "step": 39153 + }, + { + "epoch": 0.47, + "grad_norm": 18.45111039697596, + "learning_rate": 1.7867310794676535e-05, + "loss": 1.3965, + "step": 39156 + }, + { + "epoch": 0.47, + "grad_norm": 35.105470442545645, + "learning_rate": 1.786695017055402e-05, + "loss": 1.5102, + "step": 39159 + }, + { + "epoch": 0.47, + "grad_norm": 28.81154369584561, + "learning_rate": 1.786658951958444e-05, + "loss": 1.3752, + "step": 39162 + }, + { + "epoch": 0.47, + "grad_norm": 9.841600131361195, + "learning_rate": 1.7866228841769025e-05, + "loss": 1.4003, + "step": 39165 + }, + { + "epoch": 0.47, + "grad_norm": 11.365136834891153, + "learning_rate": 1.7865868137109e-05, + "loss": 1.4239, + "step": 39168 + }, + { + "epoch": 0.47, + "grad_norm": 15.123121327676735, + "learning_rate": 1.7865507405605606e-05, + "loss": 1.8067, + "step": 39171 + }, + { + "epoch": 0.47, + "grad_norm": 24.05071998686295, + "learning_rate": 1.7865146647260063e-05, + "loss": 1.6606, + "step": 39174 + }, + { + "epoch": 0.47, + "grad_norm": 49.96724467482353, + "learning_rate": 1.786478586207361e-05, + "loss": 1.5597, + "step": 39177 + }, + { + "epoch": 0.47, + "grad_norm": 12.861394027443742, + "learning_rate": 1.7864425050047477e-05, + "loss": 1.4639, + "step": 39180 + }, + { + "epoch": 0.47, + "grad_norm": 2.9737658310115274, + "learning_rate": 1.7864064211182894e-05, + "loss": 1.6593, + "step": 39183 + }, + { + "epoch": 0.47, + "grad_norm": 6.586733246386603, + "learning_rate": 1.7863703345481088e-05, + "loss": 1.452, + "step": 39186 + }, + { + "epoch": 0.47, + "grad_norm": 7.533761868628045, + "learning_rate": 1.78633424529433e-05, + "loss": 1.5241, + "step": 39189 + }, + { + "epoch": 0.47, + "grad_norm": 15.977474624375224, + "learning_rate": 1.7862981533570753e-05, + "loss": 1.3252, + "step": 39192 + }, + { + "epoch": 0.47, + "grad_norm": 13.459644027227156, + "learning_rate": 1.7862620587364686e-05, + "loss": 1.4143, + "step": 39195 + }, + { + "epoch": 0.47, + "grad_norm": 65.24971362882597, + "learning_rate": 1.7862259614326323e-05, + "loss": 1.4264, + "step": 39198 + }, + { + "epoch": 0.47, + "grad_norm": 9.68674029205526, + "learning_rate": 1.7861898614456904e-05, + "loss": 1.5041, + "step": 39201 + }, + { + "epoch": 0.47, + "grad_norm": 23.887015430221822, + "learning_rate": 1.7861537587757652e-05, + "loss": 1.351, + "step": 39204 + }, + { + "epoch": 0.47, + "grad_norm": 21.565782930749585, + "learning_rate": 1.7861176534229808e-05, + "loss": 1.8673, + "step": 39207 + }, + { + "epoch": 0.47, + "grad_norm": 9.1551747016609, + "learning_rate": 1.7860815453874596e-05, + "loss": 1.5187, + "step": 39210 + }, + { + "epoch": 0.47, + "grad_norm": 25.34328526146907, + "learning_rate": 1.7860454346693256e-05, + "loss": 1.4146, + "step": 39213 + }, + { + "epoch": 0.47, + "grad_norm": 58.2738735658411, + "learning_rate": 1.7860093212687015e-05, + "loss": 1.8605, + "step": 39216 + }, + { + "epoch": 0.47, + "grad_norm": 16.008593223524638, + "learning_rate": 1.7859732051857107e-05, + "loss": 1.2282, + "step": 39219 + }, + { + "epoch": 0.47, + "grad_norm": 23.47474565388314, + "learning_rate": 1.7859370864204767e-05, + "loss": 1.5111, + "step": 39222 + }, + { + "epoch": 0.47, + "grad_norm": 5.39763125301048, + "learning_rate": 1.7859009649731223e-05, + "loss": 1.1537, + "step": 39225 + }, + { + "epoch": 0.47, + "grad_norm": 25.972013974512297, + "learning_rate": 1.7858648408437712e-05, + "loss": 1.2553, + "step": 39228 + }, + { + "epoch": 0.47, + "grad_norm": 9.005998014400959, + "learning_rate": 1.785828714032546e-05, + "loss": 1.7301, + "step": 39231 + }, + { + "epoch": 0.47, + "grad_norm": 74.91630683111605, + "learning_rate": 1.785792584539571e-05, + "loss": 1.5271, + "step": 39234 + }, + { + "epoch": 0.47, + "grad_norm": 14.242509370490945, + "learning_rate": 1.7857564523649688e-05, + "loss": 1.4803, + "step": 39237 + }, + { + "epoch": 0.47, + "grad_norm": 16.838635742080218, + "learning_rate": 1.7857203175088626e-05, + "loss": 1.2197, + "step": 39240 + }, + { + "epoch": 0.47, + "grad_norm": 9.560772458583696, + "learning_rate": 1.785684179971376e-05, + "loss": 1.8091, + "step": 39243 + }, + { + "epoch": 0.47, + "grad_norm": 8.124500969797367, + "learning_rate": 1.7856480397526324e-05, + "loss": 1.1649, + "step": 39246 + }, + { + "epoch": 0.47, + "grad_norm": 13.39378597880383, + "learning_rate": 1.785611896852755e-05, + "loss": 1.7087, + "step": 39249 + }, + { + "epoch": 0.47, + "grad_norm": 12.143664470948595, + "learning_rate": 1.7855757512718674e-05, + "loss": 1.28, + "step": 39252 + }, + { + "epoch": 0.47, + "grad_norm": 2.7922788731087826, + "learning_rate": 1.7855396030100926e-05, + "loss": 1.4917, + "step": 39255 + }, + { + "epoch": 0.47, + "grad_norm": 6.11986261606889, + "learning_rate": 1.785503452067554e-05, + "loss": 1.6248, + "step": 39258 + }, + { + "epoch": 0.47, + "grad_norm": 12.936383203983832, + "learning_rate": 1.7854672984443752e-05, + "loss": 1.7264, + "step": 39261 + }, + { + "epoch": 0.47, + "grad_norm": 3.100403317429708, + "learning_rate": 1.7854311421406792e-05, + "loss": 1.4444, + "step": 39264 + }, + { + "epoch": 0.47, + "grad_norm": 13.490579719382263, + "learning_rate": 1.7853949831565898e-05, + "loss": 1.2607, + "step": 39267 + }, + { + "epoch": 0.47, + "grad_norm": 50.18929496249218, + "learning_rate": 1.78535882149223e-05, + "loss": 1.399, + "step": 39270 + }, + { + "epoch": 0.47, + "grad_norm": 8.101405967252708, + "learning_rate": 1.7853226571477237e-05, + "loss": 1.6356, + "step": 39273 + }, + { + "epoch": 0.47, + "grad_norm": 35.65394199209638, + "learning_rate": 1.7852864901231937e-05, + "loss": 1.4415, + "step": 39276 + }, + { + "epoch": 0.47, + "grad_norm": 17.586474022692475, + "learning_rate": 1.785250320418764e-05, + "loss": 1.0881, + "step": 39279 + }, + { + "epoch": 0.47, + "grad_norm": 16.595995700717282, + "learning_rate": 1.7852141480345578e-05, + "loss": 1.9599, + "step": 39282 + }, + { + "epoch": 0.47, + "grad_norm": 16.938650571057117, + "learning_rate": 1.7851779729706988e-05, + "loss": 1.1395, + "step": 39285 + }, + { + "epoch": 0.47, + "grad_norm": 46.58790902437593, + "learning_rate": 1.7851417952273098e-05, + "loss": 1.698, + "step": 39288 + }, + { + "epoch": 0.47, + "grad_norm": 6.728707159614524, + "learning_rate": 1.785105614804515e-05, + "loss": 1.2199, + "step": 39291 + }, + { + "epoch": 0.47, + "grad_norm": 10.45645593836014, + "learning_rate": 1.785069431702437e-05, + "loss": 1.4863, + "step": 39294 + }, + { + "epoch": 0.47, + "grad_norm": 14.399102025154791, + "learning_rate": 1.7850332459212002e-05, + "loss": 1.2959, + "step": 39297 + }, + { + "epoch": 0.47, + "grad_norm": 10.79210636962493, + "learning_rate": 1.784997057460928e-05, + "loss": 1.3606, + "step": 39300 + }, + { + "epoch": 0.47, + "grad_norm": 16.368059969658088, + "learning_rate": 1.784960866321743e-05, + "loss": 1.468, + "step": 39303 + }, + { + "epoch": 0.47, + "grad_norm": 12.897634161876262, + "learning_rate": 1.7849246725037693e-05, + "loss": 1.3847, + "step": 39306 + }, + { + "epoch": 0.47, + "grad_norm": 6.701035115650232, + "learning_rate": 1.7848884760071308e-05, + "loss": 1.3577, + "step": 39309 + }, + { + "epoch": 0.47, + "grad_norm": 10.733035613335, + "learning_rate": 1.7848522768319507e-05, + "loss": 1.6809, + "step": 39312 + }, + { + "epoch": 0.47, + "grad_norm": 41.00008254100189, + "learning_rate": 1.7848160749783523e-05, + "loss": 1.4483, + "step": 39315 + }, + { + "epoch": 0.47, + "grad_norm": 3.8736662337293732, + "learning_rate": 1.7847798704464593e-05, + "loss": 1.1648, + "step": 39318 + }, + { + "epoch": 0.47, + "grad_norm": 4.000627187407984, + "learning_rate": 1.7847436632363953e-05, + "loss": 1.5502, + "step": 39321 + }, + { + "epoch": 0.47, + "grad_norm": 15.883336534450416, + "learning_rate": 1.784707453348284e-05, + "loss": 1.596, + "step": 39324 + }, + { + "epoch": 0.47, + "grad_norm": 14.58511488345329, + "learning_rate": 1.7846712407822487e-05, + "loss": 1.0457, + "step": 39327 + }, + { + "epoch": 0.47, + "grad_norm": 48.81116102906426, + "learning_rate": 1.784635025538413e-05, + "loss": 1.3788, + "step": 39330 + }, + { + "epoch": 0.47, + "grad_norm": 9.153653101836394, + "learning_rate": 1.7845988076169006e-05, + "loss": 1.7205, + "step": 39333 + }, + { + "epoch": 0.47, + "grad_norm": 29.699016470163247, + "learning_rate": 1.7845625870178352e-05, + "loss": 1.5743, + "step": 39336 + }, + { + "epoch": 0.47, + "grad_norm": 16.361958316746538, + "learning_rate": 1.78452636374134e-05, + "loss": 1.7894, + "step": 39339 + }, + { + "epoch": 0.47, + "grad_norm": 10.005490664987112, + "learning_rate": 1.7844901377875393e-05, + "loss": 1.5636, + "step": 39342 + }, + { + "epoch": 0.47, + "grad_norm": 4.158176388469655, + "learning_rate": 1.784453909156556e-05, + "loss": 1.3844, + "step": 39345 + }, + { + "epoch": 0.47, + "grad_norm": 13.185511911279425, + "learning_rate": 1.784417677848514e-05, + "loss": 1.6085, + "step": 39348 + }, + { + "epoch": 0.47, + "grad_norm": 10.010848660649373, + "learning_rate": 1.7843814438635373e-05, + "loss": 1.1892, + "step": 39351 + }, + { + "epoch": 0.47, + "grad_norm": 12.34847705614843, + "learning_rate": 1.7843452072017494e-05, + "loss": 1.4051, + "step": 39354 + }, + { + "epoch": 0.47, + "grad_norm": 7.653874069677054, + "learning_rate": 1.7843089678632733e-05, + "loss": 1.7296, + "step": 39357 + }, + { + "epoch": 0.47, + "grad_norm": 6.115499457970995, + "learning_rate": 1.7842727258482338e-05, + "loss": 1.1783, + "step": 39360 + }, + { + "epoch": 0.47, + "grad_norm": 28.60961715924946, + "learning_rate": 1.7842364811567537e-05, + "loss": 1.5903, + "step": 39363 + }, + { + "epoch": 0.47, + "grad_norm": 12.726307111103855, + "learning_rate": 1.784200233788957e-05, + "loss": 1.5706, + "step": 39366 + }, + { + "epoch": 0.47, + "grad_norm": 24.250934217002264, + "learning_rate": 1.7841639837449673e-05, + "loss": 1.5683, + "step": 39369 + }, + { + "epoch": 0.47, + "grad_norm": 16.32601645423395, + "learning_rate": 1.7841277310249082e-05, + "loss": 1.5371, + "step": 39372 + }, + { + "epoch": 0.47, + "grad_norm": 4.35799313867451, + "learning_rate": 1.784091475628904e-05, + "loss": 1.6757, + "step": 39375 + }, + { + "epoch": 0.47, + "grad_norm": 10.980035372876692, + "learning_rate": 1.784055217557078e-05, + "loss": 1.6357, + "step": 39378 + }, + { + "epoch": 0.47, + "grad_norm": 14.356334218664832, + "learning_rate": 1.7840189568095537e-05, + "loss": 1.2953, + "step": 39381 + }, + { + "epoch": 0.47, + "grad_norm": 14.019982260149265, + "learning_rate": 1.783982693386455e-05, + "loss": 1.4858, + "step": 39384 + }, + { + "epoch": 0.47, + "grad_norm": 37.85748371104557, + "learning_rate": 1.783946427287906e-05, + "loss": 1.3506, + "step": 39387 + }, + { + "epoch": 0.47, + "grad_norm": 8.531615354270162, + "learning_rate": 1.78391015851403e-05, + "loss": 1.1803, + "step": 39390 + }, + { + "epoch": 0.47, + "grad_norm": 48.78930293519047, + "learning_rate": 1.7838738870649513e-05, + "loss": 1.1374, + "step": 39393 + }, + { + "epoch": 0.47, + "grad_norm": 8.9131130645114, + "learning_rate": 1.783837612940793e-05, + "loss": 1.1629, + "step": 39396 + }, + { + "epoch": 0.47, + "grad_norm": 11.102694573290231, + "learning_rate": 1.7838013361416795e-05, + "loss": 1.3646, + "step": 39399 + }, + { + "epoch": 0.47, + "grad_norm": 23.9353216714116, + "learning_rate": 1.7837650566677344e-05, + "loss": 1.4852, + "step": 39402 + }, + { + "epoch": 0.47, + "grad_norm": 13.614179841494575, + "learning_rate": 1.7837287745190816e-05, + "loss": 1.3141, + "step": 39405 + }, + { + "epoch": 0.47, + "grad_norm": 8.688372441652552, + "learning_rate": 1.7836924896958446e-05, + "loss": 1.4112, + "step": 39408 + }, + { + "epoch": 0.47, + "grad_norm": 12.569033571628056, + "learning_rate": 1.7836562021981475e-05, + "loss": 1.4172, + "step": 39411 + }, + { + "epoch": 0.47, + "grad_norm": 27.671298551707523, + "learning_rate": 1.7836199120261136e-05, + "loss": 1.8949, + "step": 39414 + }, + { + "epoch": 0.47, + "grad_norm": 23.05010596112168, + "learning_rate": 1.783583619179868e-05, + "loss": 1.5608, + "step": 39417 + }, + { + "epoch": 0.47, + "grad_norm": 18.476375037616936, + "learning_rate": 1.783547323659533e-05, + "loss": 1.4585, + "step": 39420 + }, + { + "epoch": 0.47, + "grad_norm": 2.89389361836028, + "learning_rate": 1.783511025465234e-05, + "loss": 1.5369, + "step": 39423 + }, + { + "epoch": 0.47, + "grad_norm": 9.675797135246833, + "learning_rate": 1.7834747245970933e-05, + "loss": 1.3343, + "step": 39426 + }, + { + "epoch": 0.47, + "grad_norm": 36.98088827503907, + "learning_rate": 1.783438421055236e-05, + "loss": 1.1793, + "step": 39429 + }, + { + "epoch": 0.47, + "grad_norm": 7.152525407505067, + "learning_rate": 1.7834021148397855e-05, + "loss": 1.4581, + "step": 39432 + }, + { + "epoch": 0.47, + "grad_norm": 23.584003873324843, + "learning_rate": 1.7833658059508656e-05, + "loss": 1.6288, + "step": 39435 + }, + { + "epoch": 0.47, + "grad_norm": 13.476098092312728, + "learning_rate": 1.7833294943886006e-05, + "loss": 1.6872, + "step": 39438 + }, + { + "epoch": 0.47, + "grad_norm": 4.949533445424079, + "learning_rate": 1.7832931801531142e-05, + "loss": 1.0888, + "step": 39441 + }, + { + "epoch": 0.47, + "grad_norm": 11.676689523531554, + "learning_rate": 1.78325686324453e-05, + "loss": 1.8416, + "step": 39444 + }, + { + "epoch": 0.47, + "grad_norm": 17.48133958888804, + "learning_rate": 1.7832205436629725e-05, + "loss": 1.4335, + "step": 39447 + }, + { + "epoch": 0.47, + "grad_norm": 18.126767795302115, + "learning_rate": 1.7831842214085655e-05, + "loss": 1.3227, + "step": 39450 + }, + { + "epoch": 0.47, + "grad_norm": 28.959179594702427, + "learning_rate": 1.783147896481433e-05, + "loss": 1.5781, + "step": 39453 + }, + { + "epoch": 0.47, + "grad_norm": 3.8390454260743576, + "learning_rate": 1.783111568881698e-05, + "loss": 1.5236, + "step": 39456 + }, + { + "epoch": 0.47, + "grad_norm": 13.886401304688379, + "learning_rate": 1.7830752386094864e-05, + "loss": 1.1131, + "step": 39459 + }, + { + "epoch": 0.47, + "grad_norm": 21.938917468741565, + "learning_rate": 1.7830389056649202e-05, + "loss": 1.6024, + "step": 39462 + }, + { + "epoch": 0.47, + "grad_norm": 30.96250841478314, + "learning_rate": 1.7830025700481247e-05, + "loss": 1.5208, + "step": 39465 + }, + { + "epoch": 0.47, + "grad_norm": 8.293826823874966, + "learning_rate": 1.782966231759223e-05, + "loss": 1.4856, + "step": 39468 + }, + { + "epoch": 0.47, + "grad_norm": 7.0454913361375855, + "learning_rate": 1.7829298907983402e-05, + "loss": 1.3474, + "step": 39471 + }, + { + "epoch": 0.47, + "grad_norm": 13.717427801573567, + "learning_rate": 1.7828935471655992e-05, + "loss": 1.2293, + "step": 39474 + }, + { + "epoch": 0.47, + "grad_norm": 14.900227204073223, + "learning_rate": 1.782857200861125e-05, + "loss": 1.4569, + "step": 39477 + }, + { + "epoch": 0.47, + "grad_norm": 6.652247146589979, + "learning_rate": 1.7828208518850406e-05, + "loss": 1.3741, + "step": 39480 + }, + { + "epoch": 0.47, + "grad_norm": 9.272289865019406, + "learning_rate": 1.782784500237471e-05, + "loss": 1.3498, + "step": 39483 + }, + { + "epoch": 0.47, + "grad_norm": 6.132964140250291, + "learning_rate": 1.78274814591854e-05, + "loss": 1.5633, + "step": 39486 + }, + { + "epoch": 0.47, + "grad_norm": 10.468890076812366, + "learning_rate": 1.782711788928371e-05, + "loss": 1.7167, + "step": 39489 + }, + { + "epoch": 0.47, + "grad_norm": 24.28930520489171, + "learning_rate": 1.782675429267089e-05, + "loss": 1.1694, + "step": 39492 + }, + { + "epoch": 0.47, + "grad_norm": 5.679547283743347, + "learning_rate": 1.7826390669348175e-05, + "loss": 1.2716, + "step": 39495 + }, + { + "epoch": 0.47, + "grad_norm": 12.57037788932086, + "learning_rate": 1.782602701931681e-05, + "loss": 1.381, + "step": 39498 + }, + { + "epoch": 0.47, + "grad_norm": 6.453833436692159, + "learning_rate": 1.782566334257803e-05, + "loss": 1.7863, + "step": 39501 + }, + { + "epoch": 0.48, + "grad_norm": 49.64542383687752, + "learning_rate": 1.7825299639133082e-05, + "loss": 1.2472, + "step": 39504 + }, + { + "epoch": 0.48, + "grad_norm": 13.681526515338373, + "learning_rate": 1.7824935908983204e-05, + "loss": 1.4309, + "step": 39507 + }, + { + "epoch": 0.48, + "grad_norm": 10.596391495119343, + "learning_rate": 1.782457215212964e-05, + "loss": 1.366, + "step": 39510 + }, + { + "epoch": 0.48, + "grad_norm": 11.370225055593647, + "learning_rate": 1.7824208368573627e-05, + "loss": 1.6068, + "step": 39513 + }, + { + "epoch": 0.48, + "grad_norm": 20.564491111679384, + "learning_rate": 1.782384455831641e-05, + "loss": 1.5642, + "step": 39516 + }, + { + "epoch": 0.48, + "grad_norm": 10.599969983475704, + "learning_rate": 1.782348072135923e-05, + "loss": 1.5851, + "step": 39519 + }, + { + "epoch": 0.48, + "grad_norm": 10.743189154172658, + "learning_rate": 1.7823116857703332e-05, + "loss": 1.3213, + "step": 39522 + }, + { + "epoch": 0.48, + "grad_norm": 11.438544783320042, + "learning_rate": 1.782275296734995e-05, + "loss": 1.6009, + "step": 39525 + }, + { + "epoch": 0.48, + "grad_norm": 81.96341076963888, + "learning_rate": 1.7822389050300327e-05, + "loss": 1.5509, + "step": 39528 + }, + { + "epoch": 0.48, + "grad_norm": 20.316771496982533, + "learning_rate": 1.7822025106555713e-05, + "loss": 1.5109, + "step": 39531 + }, + { + "epoch": 0.48, + "grad_norm": 8.955863864356163, + "learning_rate": 1.7821661136117343e-05, + "loss": 1.7626, + "step": 39534 + }, + { + "epoch": 0.48, + "grad_norm": 3.1029026039588055, + "learning_rate": 1.7821297138986462e-05, + "loss": 1.2345, + "step": 39537 + }, + { + "epoch": 0.48, + "grad_norm": 25.060966175329213, + "learning_rate": 1.7820933115164308e-05, + "loss": 1.6297, + "step": 39540 + }, + { + "epoch": 0.48, + "grad_norm": 10.880965845491714, + "learning_rate": 1.7820569064652133e-05, + "loss": 1.6212, + "step": 39543 + }, + { + "epoch": 0.48, + "grad_norm": 3.792525088569204, + "learning_rate": 1.7820204987451166e-05, + "loss": 1.4018, + "step": 39546 + }, + { + "epoch": 0.48, + "grad_norm": 14.677196289646457, + "learning_rate": 1.7819840883562657e-05, + "loss": 1.3031, + "step": 39549 + }, + { + "epoch": 0.48, + "grad_norm": 41.56723185590965, + "learning_rate": 1.781947675298785e-05, + "loss": 1.3339, + "step": 39552 + }, + { + "epoch": 0.48, + "grad_norm": 27.942044564158135, + "learning_rate": 1.7819112595727982e-05, + "loss": 1.5407, + "step": 39555 + }, + { + "epoch": 0.48, + "grad_norm": 7.666376973971251, + "learning_rate": 1.7818748411784302e-05, + "loss": 1.1399, + "step": 39558 + }, + { + "epoch": 0.48, + "grad_norm": 13.391541246129764, + "learning_rate": 1.781838420115805e-05, + "loss": 1.3094, + "step": 39561 + }, + { + "epoch": 0.48, + "grad_norm": 4.008509091412563, + "learning_rate": 1.781801996385047e-05, + "loss": 1.6678, + "step": 39564 + }, + { + "epoch": 0.48, + "grad_norm": 7.810108726555453, + "learning_rate": 1.7817655699862803e-05, + "loss": 1.4426, + "step": 39567 + }, + { + "epoch": 0.48, + "grad_norm": 7.524885735105934, + "learning_rate": 1.7817291409196292e-05, + "loss": 1.7118, + "step": 39570 + }, + { + "epoch": 0.48, + "grad_norm": 8.372831331199798, + "learning_rate": 1.7816927091852184e-05, + "loss": 1.4043, + "step": 39573 + }, + { + "epoch": 0.48, + "grad_norm": 5.555751633122659, + "learning_rate": 1.7816562747831712e-05, + "loss": 1.3514, + "step": 39576 + }, + { + "epoch": 0.48, + "grad_norm": 34.84610979267708, + "learning_rate": 1.7816198377136133e-05, + "loss": 1.3951, + "step": 39579 + }, + { + "epoch": 0.48, + "grad_norm": 7.939008140142614, + "learning_rate": 1.7815833979766686e-05, + "loss": 1.6309, + "step": 39582 + }, + { + "epoch": 0.48, + "grad_norm": 25.144800695181733, + "learning_rate": 1.781546955572461e-05, + "loss": 1.605, + "step": 39585 + }, + { + "epoch": 0.48, + "grad_norm": 23.843943089993495, + "learning_rate": 1.781510510501115e-05, + "loss": 1.441, + "step": 39588 + }, + { + "epoch": 0.48, + "grad_norm": 99.33955299343494, + "learning_rate": 1.7814740627627555e-05, + "loss": 1.2848, + "step": 39591 + }, + { + "epoch": 0.48, + "grad_norm": 157.53328297086946, + "learning_rate": 1.781437612357506e-05, + "loss": 1.2303, + "step": 39594 + }, + { + "epoch": 0.48, + "grad_norm": 4.316059017188672, + "learning_rate": 1.7814011592854917e-05, + "loss": 1.4674, + "step": 39597 + }, + { + "epoch": 0.48, + "grad_norm": 13.699811230978117, + "learning_rate": 1.781364703546837e-05, + "loss": 1.436, + "step": 39600 + }, + { + "epoch": 0.48, + "grad_norm": 14.093373742917782, + "learning_rate": 1.7813282451416657e-05, + "loss": 1.3685, + "step": 39603 + }, + { + "epoch": 0.48, + "grad_norm": 43.07233248728661, + "learning_rate": 1.7812917840701024e-05, + "loss": 1.413, + "step": 39606 + }, + { + "epoch": 0.48, + "grad_norm": 21.84235430599589, + "learning_rate": 1.7812553203322718e-05, + "loss": 1.5802, + "step": 39609 + }, + { + "epoch": 0.48, + "grad_norm": 7.650583017151633, + "learning_rate": 1.7812188539282983e-05, + "loss": 1.5287, + "step": 39612 + }, + { + "epoch": 0.48, + "grad_norm": 9.32975611239118, + "learning_rate": 1.7811823848583062e-05, + "loss": 1.5154, + "step": 39615 + }, + { + "epoch": 0.48, + "grad_norm": 19.778805278661896, + "learning_rate": 1.78114591312242e-05, + "loss": 2.1807, + "step": 39618 + }, + { + "epoch": 0.48, + "grad_norm": 7.520423971473429, + "learning_rate": 1.781109438720764e-05, + "loss": 1.194, + "step": 39621 + }, + { + "epoch": 0.48, + "grad_norm": 9.294235206057762, + "learning_rate": 1.781072961653463e-05, + "loss": 1.3183, + "step": 39624 + }, + { + "epoch": 0.48, + "grad_norm": 5.67107856213295, + "learning_rate": 1.781036481920641e-05, + "loss": 1.5922, + "step": 39627 + }, + { + "epoch": 0.48, + "grad_norm": 5.629842140565271, + "learning_rate": 1.7809999995224232e-05, + "loss": 1.1834, + "step": 39630 + }, + { + "epoch": 0.48, + "grad_norm": 9.083800986729937, + "learning_rate": 1.7809635144589336e-05, + "loss": 1.5103, + "step": 39633 + }, + { + "epoch": 0.48, + "grad_norm": 14.123793708154396, + "learning_rate": 1.7809270267302968e-05, + "loss": 1.7935, + "step": 39636 + }, + { + "epoch": 0.48, + "grad_norm": 17.081164831276723, + "learning_rate": 1.7808905363366376e-05, + "loss": 1.7125, + "step": 39639 + }, + { + "epoch": 0.48, + "grad_norm": 15.010358004219006, + "learning_rate": 1.7808540432780802e-05, + "loss": 1.1195, + "step": 39642 + }, + { + "epoch": 0.48, + "grad_norm": 3.519863319983414, + "learning_rate": 1.780817547554749e-05, + "loss": 1.3567, + "step": 39645 + }, + { + "epoch": 0.48, + "grad_norm": 6.257817018195163, + "learning_rate": 1.7807810491667687e-05, + "loss": 1.2965, + "step": 39648 + }, + { + "epoch": 0.48, + "grad_norm": 5.213203006551095, + "learning_rate": 1.7807445481142644e-05, + "loss": 1.1578, + "step": 39651 + }, + { + "epoch": 0.48, + "grad_norm": 9.72548397499242, + "learning_rate": 1.7807080443973596e-05, + "loss": 1.1776, + "step": 39654 + }, + { + "epoch": 0.48, + "grad_norm": 134.68908983838978, + "learning_rate": 1.7806715380161798e-05, + "loss": 1.6557, + "step": 39657 + }, + { + "epoch": 0.48, + "grad_norm": 26.56301765596031, + "learning_rate": 1.780635028970849e-05, + "loss": 1.3246, + "step": 39660 + }, + { + "epoch": 0.48, + "grad_norm": 29.281560551235156, + "learning_rate": 1.780598517261492e-05, + "loss": 1.734, + "step": 39663 + }, + { + "epoch": 0.48, + "grad_norm": 9.962101467843704, + "learning_rate": 1.780562002888234e-05, + "loss": 1.3663, + "step": 39666 + }, + { + "epoch": 0.48, + "grad_norm": 37.953081947897424, + "learning_rate": 1.7805254858511983e-05, + "loss": 1.4896, + "step": 39669 + }, + { + "epoch": 0.48, + "grad_norm": 26.68532317543692, + "learning_rate": 1.7804889661505108e-05, + "loss": 1.4357, + "step": 39672 + }, + { + "epoch": 0.48, + "grad_norm": 7.3373930659047035, + "learning_rate": 1.7804524437862953e-05, + "loss": 1.299, + "step": 39675 + }, + { + "epoch": 0.48, + "grad_norm": 23.40199103378995, + "learning_rate": 1.780415918758677e-05, + "loss": 1.2001, + "step": 39678 + }, + { + "epoch": 0.48, + "grad_norm": 9.673298482881963, + "learning_rate": 1.78037939106778e-05, + "loss": 1.2101, + "step": 39681 + }, + { + "epoch": 0.48, + "grad_norm": 4.307698351753054, + "learning_rate": 1.7803428607137292e-05, + "loss": 1.2485, + "step": 39684 + }, + { + "epoch": 0.48, + "grad_norm": 11.075751692738157, + "learning_rate": 1.7803063276966493e-05, + "loss": 1.5203, + "step": 39687 + }, + { + "epoch": 0.48, + "grad_norm": 11.124130002577818, + "learning_rate": 1.7802697920166653e-05, + "loss": 1.431, + "step": 39690 + }, + { + "epoch": 0.48, + "grad_norm": 8.405450566672064, + "learning_rate": 1.780233253673901e-05, + "loss": 1.2823, + "step": 39693 + }, + { + "epoch": 0.48, + "grad_norm": 24.64327305518744, + "learning_rate": 1.7801967126684822e-05, + "loss": 1.5827, + "step": 39696 + }, + { + "epoch": 0.48, + "grad_norm": 19.40682379105292, + "learning_rate": 1.780160169000533e-05, + "loss": 1.4132, + "step": 39699 + }, + { + "epoch": 0.48, + "grad_norm": 18.370990487795645, + "learning_rate": 1.780123622670178e-05, + "loss": 1.474, + "step": 39702 + }, + { + "epoch": 0.48, + "grad_norm": 12.352554369350141, + "learning_rate": 1.7800870736775416e-05, + "loss": 1.3299, + "step": 39705 + }, + { + "epoch": 0.48, + "grad_norm": 30.127110010687623, + "learning_rate": 1.7800505220227496e-05, + "loss": 1.3485, + "step": 39708 + }, + { + "epoch": 0.48, + "grad_norm": 85.32166319745245, + "learning_rate": 1.780013967705926e-05, + "loss": 1.2821, + "step": 39711 + }, + { + "epoch": 0.48, + "grad_norm": 10.018892540929867, + "learning_rate": 1.7799774107271956e-05, + "loss": 0.9209, + "step": 39714 + }, + { + "epoch": 0.48, + "grad_norm": 2.7196064451897333, + "learning_rate": 1.7799408510866834e-05, + "loss": 1.4826, + "step": 39717 + }, + { + "epoch": 0.48, + "grad_norm": 76.30045154823304, + "learning_rate": 1.7799042887845138e-05, + "loss": 1.4198, + "step": 39720 + }, + { + "epoch": 0.48, + "grad_norm": 13.991274357990049, + "learning_rate": 1.779867723820812e-05, + "loss": 1.107, + "step": 39723 + }, + { + "epoch": 0.48, + "grad_norm": 21.174037271944446, + "learning_rate": 1.7798311561957025e-05, + "loss": 1.2067, + "step": 39726 + }, + { + "epoch": 0.48, + "grad_norm": 5.994978817560623, + "learning_rate": 1.77979458590931e-05, + "loss": 1.5885, + "step": 39729 + }, + { + "epoch": 0.48, + "grad_norm": 15.379602198704475, + "learning_rate": 1.77975801296176e-05, + "loss": 1.413, + "step": 39732 + }, + { + "epoch": 0.48, + "grad_norm": 17.444189225174725, + "learning_rate": 1.7797214373531763e-05, + "loss": 1.5296, + "step": 39735 + }, + { + "epoch": 0.48, + "grad_norm": 47.42207799838218, + "learning_rate": 1.7796848590836838e-05, + "loss": 1.7325, + "step": 39738 + }, + { + "epoch": 0.48, + "grad_norm": 10.616048093601938, + "learning_rate": 1.7796482781534084e-05, + "loss": 1.156, + "step": 39741 + }, + { + "epoch": 0.48, + "grad_norm": 7.9804393531606745, + "learning_rate": 1.779611694562474e-05, + "loss": 1.3361, + "step": 39744 + }, + { + "epoch": 0.48, + "grad_norm": 14.385471614169784, + "learning_rate": 1.779575108311006e-05, + "loss": 1.5603, + "step": 39747 + }, + { + "epoch": 0.48, + "grad_norm": 12.344225043455248, + "learning_rate": 1.7795385193991285e-05, + "loss": 1.2664, + "step": 39750 + }, + { + "epoch": 0.48, + "grad_norm": 8.804468166009489, + "learning_rate": 1.779501927826967e-05, + "loss": 1.6357, + "step": 39753 + }, + { + "epoch": 0.48, + "grad_norm": 11.306919327607261, + "learning_rate": 1.7794653335946463e-05, + "loss": 1.6377, + "step": 39756 + }, + { + "epoch": 0.48, + "grad_norm": 19.40898758174144, + "learning_rate": 1.7794287367022908e-05, + "loss": 1.12, + "step": 39759 + }, + { + "epoch": 0.48, + "grad_norm": 8.623654072411137, + "learning_rate": 1.779392137150026e-05, + "loss": 1.4929, + "step": 39762 + }, + { + "epoch": 0.48, + "grad_norm": 11.57642737420318, + "learning_rate": 1.779355534937977e-05, + "loss": 1.8375, + "step": 39765 + }, + { + "epoch": 0.48, + "grad_norm": 14.264047277024433, + "learning_rate": 1.7793189300662677e-05, + "loss": 1.3951, + "step": 39768 + }, + { + "epoch": 0.48, + "grad_norm": 39.77192293595883, + "learning_rate": 1.779282322535024e-05, + "loss": 1.4878, + "step": 39771 + }, + { + "epoch": 0.48, + "grad_norm": 16.52461226356, + "learning_rate": 1.77924571234437e-05, + "loss": 1.694, + "step": 39774 + }, + { + "epoch": 0.48, + "grad_norm": 8.647450447646916, + "learning_rate": 1.7792090994944312e-05, + "loss": 1.8732, + "step": 39777 + }, + { + "epoch": 0.48, + "grad_norm": 20.980092680474215, + "learning_rate": 1.7791724839853327e-05, + "loss": 1.756, + "step": 39780 + }, + { + "epoch": 0.48, + "grad_norm": 51.03973758668968, + "learning_rate": 1.7791358658171992e-05, + "loss": 1.5778, + "step": 39783 + }, + { + "epoch": 0.48, + "grad_norm": 50.46102819592826, + "learning_rate": 1.779099244990155e-05, + "loss": 1.7279, + "step": 39786 + }, + { + "epoch": 0.48, + "grad_norm": 12.687190022855262, + "learning_rate": 1.7790626215043264e-05, + "loss": 1.7999, + "step": 39789 + }, + { + "epoch": 0.48, + "grad_norm": 2.712354229448373, + "learning_rate": 1.779025995359837e-05, + "loss": 1.6148, + "step": 39792 + }, + { + "epoch": 0.48, + "grad_norm": 9.588560446939615, + "learning_rate": 1.7789893665568134e-05, + "loss": 1.0127, + "step": 39795 + }, + { + "epoch": 0.48, + "grad_norm": 7.462856141179784, + "learning_rate": 1.778952735095379e-05, + "loss": 1.4042, + "step": 39798 + }, + { + "epoch": 0.48, + "grad_norm": 13.68117112523801, + "learning_rate": 1.7789161009756595e-05, + "loss": 1.6703, + "step": 39801 + }, + { + "epoch": 0.48, + "grad_norm": 19.18357441608283, + "learning_rate": 1.7788794641977802e-05, + "loss": 1.4296, + "step": 39804 + }, + { + "epoch": 0.48, + "grad_norm": 32.582166012927345, + "learning_rate": 1.7788428247618658e-05, + "loss": 1.7399, + "step": 39807 + }, + { + "epoch": 0.48, + "grad_norm": 24.700494332498653, + "learning_rate": 1.7788061826680412e-05, + "loss": 1.4867, + "step": 39810 + }, + { + "epoch": 0.48, + "grad_norm": 18.175213417186608, + "learning_rate": 1.778769537916432e-05, + "loss": 1.2691, + "step": 39813 + }, + { + "epoch": 0.48, + "grad_norm": 6.80480599342568, + "learning_rate": 1.7787328905071623e-05, + "loss": 1.5484, + "step": 39816 + }, + { + "epoch": 0.48, + "grad_norm": 23.871815059888313, + "learning_rate": 1.7786962404403582e-05, + "loss": 1.3324, + "step": 39819 + }, + { + "epoch": 0.48, + "grad_norm": 17.954394893523, + "learning_rate": 1.778659587716144e-05, + "loss": 1.8043, + "step": 39822 + }, + { + "epoch": 0.48, + "grad_norm": 12.578863052453425, + "learning_rate": 1.7786229323346454e-05, + "loss": 1.2986, + "step": 39825 + }, + { + "epoch": 0.48, + "grad_norm": 4.5466758790382045, + "learning_rate": 1.778586274295987e-05, + "loss": 1.2994, + "step": 39828 + }, + { + "epoch": 0.48, + "grad_norm": 23.40934769327046, + "learning_rate": 1.778549613600294e-05, + "loss": 1.0831, + "step": 39831 + }, + { + "epoch": 0.48, + "grad_norm": 5.222525732080832, + "learning_rate": 1.778512950247692e-05, + "loss": 1.6642, + "step": 39834 + }, + { + "epoch": 0.48, + "grad_norm": 9.327749640850692, + "learning_rate": 1.7784762842383054e-05, + "loss": 1.2941, + "step": 39837 + }, + { + "epoch": 0.48, + "grad_norm": 57.56814734338376, + "learning_rate": 1.7784396155722597e-05, + "loss": 1.6971, + "step": 39840 + }, + { + "epoch": 0.48, + "grad_norm": 5.9458061538636775, + "learning_rate": 1.77840294424968e-05, + "loss": 1.2228, + "step": 39843 + }, + { + "epoch": 0.48, + "grad_norm": 14.597529597743828, + "learning_rate": 1.7783662702706915e-05, + "loss": 1.447, + "step": 39846 + }, + { + "epoch": 0.48, + "grad_norm": 18.977179894475288, + "learning_rate": 1.778329593635419e-05, + "loss": 1.4169, + "step": 39849 + }, + { + "epoch": 0.48, + "grad_norm": 16.486433968198185, + "learning_rate": 1.778292914343988e-05, + "loss": 1.3834, + "step": 39852 + }, + { + "epoch": 0.48, + "grad_norm": 17.66343071383194, + "learning_rate": 1.778256232396524e-05, + "loss": 1.2391, + "step": 39855 + }, + { + "epoch": 0.48, + "grad_norm": 4.5295607310394885, + "learning_rate": 1.7782195477931514e-05, + "loss": 1.5487, + "step": 39858 + }, + { + "epoch": 0.48, + "grad_norm": 5.619141532017227, + "learning_rate": 1.7781828605339963e-05, + "loss": 1.6599, + "step": 39861 + }, + { + "epoch": 0.48, + "grad_norm": 14.851481091202162, + "learning_rate": 1.7781461706191828e-05, + "loss": 1.2334, + "step": 39864 + }, + { + "epoch": 0.48, + "grad_norm": 7.767340223324553, + "learning_rate": 1.7781094780488374e-05, + "loss": 1.2483, + "step": 39867 + }, + { + "epoch": 0.48, + "grad_norm": 5.948425879144347, + "learning_rate": 1.778072782823084e-05, + "loss": 1.2719, + "step": 39870 + }, + { + "epoch": 0.48, + "grad_norm": 28.016528257614286, + "learning_rate": 1.7780360849420486e-05, + "loss": 1.5704, + "step": 39873 + }, + { + "epoch": 0.48, + "grad_norm": 13.066793305872665, + "learning_rate": 1.7779993844058564e-05, + "loss": 1.2644, + "step": 39876 + }, + { + "epoch": 0.48, + "grad_norm": 6.6271782042109795, + "learning_rate": 1.7779626812146326e-05, + "loss": 1.1242, + "step": 39879 + }, + { + "epoch": 0.48, + "grad_norm": 15.622915464022633, + "learning_rate": 1.7779259753685023e-05, + "loss": 1.3472, + "step": 39882 + }, + { + "epoch": 0.48, + "grad_norm": 11.09926348435683, + "learning_rate": 1.7778892668675908e-05, + "loss": 1.5885, + "step": 39885 + }, + { + "epoch": 0.48, + "grad_norm": 91.62141494763716, + "learning_rate": 1.7778525557120235e-05, + "loss": 1.4933, + "step": 39888 + }, + { + "epoch": 0.48, + "grad_norm": 4.071787149126844, + "learning_rate": 1.7778158419019254e-05, + "loss": 1.7237, + "step": 39891 + }, + { + "epoch": 0.48, + "grad_norm": 4.071430297671446, + "learning_rate": 1.7777791254374223e-05, + "loss": 1.2421, + "step": 39894 + }, + { + "epoch": 0.48, + "grad_norm": 42.87305864061468, + "learning_rate": 1.7777424063186387e-05, + "loss": 1.5665, + "step": 39897 + }, + { + "epoch": 0.48, + "grad_norm": 28.210299358238963, + "learning_rate": 1.777705684545701e-05, + "loss": 1.5307, + "step": 39900 + }, + { + "epoch": 0.48, + "grad_norm": 13.722909918028284, + "learning_rate": 1.777668960118734e-05, + "loss": 1.8301, + "step": 39903 + }, + { + "epoch": 0.48, + "grad_norm": 33.5885842524374, + "learning_rate": 1.7776322330378623e-05, + "loss": 1.5891, + "step": 39906 + }, + { + "epoch": 0.48, + "grad_norm": 9.059005423312051, + "learning_rate": 1.7775955033032124e-05, + "loss": 1.7448, + "step": 39909 + }, + { + "epoch": 0.48, + "grad_norm": 15.60545106383946, + "learning_rate": 1.777558770914909e-05, + "loss": 1.6787, + "step": 39912 + }, + { + "epoch": 0.48, + "grad_norm": 9.301274437733172, + "learning_rate": 1.7775220358730776e-05, + "loss": 1.2338, + "step": 39915 + }, + { + "epoch": 0.48, + "grad_norm": 7.451618492270577, + "learning_rate": 1.7774852981778434e-05, + "loss": 1.5343, + "step": 39918 + }, + { + "epoch": 0.48, + "grad_norm": 8.393946725810698, + "learning_rate": 1.777448557829332e-05, + "loss": 1.4688, + "step": 39921 + }, + { + "epoch": 0.48, + "grad_norm": 12.777022476701159, + "learning_rate": 1.7774118148276688e-05, + "loss": 1.3926, + "step": 39924 + }, + { + "epoch": 0.48, + "grad_norm": 13.801587100997175, + "learning_rate": 1.777375069172979e-05, + "loss": 1.7347, + "step": 39927 + }, + { + "epoch": 0.48, + "grad_norm": 28.67991698564411, + "learning_rate": 1.7773383208653883e-05, + "loss": 1.3584, + "step": 39930 + }, + { + "epoch": 0.48, + "grad_norm": 44.35723942720635, + "learning_rate": 1.7773015699050218e-05, + "loss": 1.2243, + "step": 39933 + }, + { + "epoch": 0.48, + "grad_norm": 24.638961421934013, + "learning_rate": 1.777264816292005e-05, + "loss": 1.3162, + "step": 39936 + }, + { + "epoch": 0.48, + "grad_norm": 20.83314080839597, + "learning_rate": 1.7772280600264634e-05, + "loss": 1.5805, + "step": 39939 + }, + { + "epoch": 0.48, + "grad_norm": 12.906345739351323, + "learning_rate": 1.7771913011085222e-05, + "loss": 1.413, + "step": 39942 + }, + { + "epoch": 0.48, + "grad_norm": 13.017700447574228, + "learning_rate": 1.777154539538307e-05, + "loss": 1.5739, + "step": 39945 + }, + { + "epoch": 0.48, + "grad_norm": 7.688611442272419, + "learning_rate": 1.7771177753159437e-05, + "loss": 1.5542, + "step": 39948 + }, + { + "epoch": 0.48, + "grad_norm": 25.068940484513508, + "learning_rate": 1.777081008441557e-05, + "loss": 1.6136, + "step": 39951 + }, + { + "epoch": 0.48, + "grad_norm": 37.773577501414856, + "learning_rate": 1.777044238915273e-05, + "loss": 1.2724, + "step": 39954 + }, + { + "epoch": 0.48, + "grad_norm": 26.108486463838076, + "learning_rate": 1.7770074667372167e-05, + "loss": 1.8639, + "step": 39957 + }, + { + "epoch": 0.48, + "grad_norm": 18.351911138673948, + "learning_rate": 1.7769706919075136e-05, + "loss": 1.2053, + "step": 39960 + }, + { + "epoch": 0.48, + "grad_norm": 13.949581052043197, + "learning_rate": 1.7769339144262897e-05, + "loss": 1.6583, + "step": 39963 + }, + { + "epoch": 0.48, + "grad_norm": 12.13376196730377, + "learning_rate": 1.77689713429367e-05, + "loss": 1.057, + "step": 39966 + }, + { + "epoch": 0.48, + "grad_norm": 10.177548828301763, + "learning_rate": 1.7768603515097804e-05, + "loss": 1.34, + "step": 39969 + }, + { + "epoch": 0.48, + "grad_norm": 15.042296612979639, + "learning_rate": 1.776823566074746e-05, + "loss": 1.6272, + "step": 39972 + }, + { + "epoch": 0.48, + "grad_norm": 18.98907467369973, + "learning_rate": 1.776786777988693e-05, + "loss": 1.7081, + "step": 39975 + }, + { + "epoch": 0.48, + "grad_norm": 8.088352802871839, + "learning_rate": 1.776749987251746e-05, + "loss": 1.2197, + "step": 39978 + }, + { + "epoch": 0.48, + "grad_norm": 14.120522434537655, + "learning_rate": 1.7767131938640316e-05, + "loss": 1.6291, + "step": 39981 + }, + { + "epoch": 0.48, + "grad_norm": 10.652695462805577, + "learning_rate": 1.7766763978256744e-05, + "loss": 2.041, + "step": 39984 + }, + { + "epoch": 0.48, + "grad_norm": 20.384427726421205, + "learning_rate": 1.7766395991368005e-05, + "loss": 1.4285, + "step": 39987 + }, + { + "epoch": 0.48, + "grad_norm": 9.57285496172108, + "learning_rate": 1.7766027977975357e-05, + "loss": 1.523, + "step": 39990 + }, + { + "epoch": 0.48, + "grad_norm": 25.28204952275598, + "learning_rate": 1.776565993808005e-05, + "loss": 1.4665, + "step": 39993 + }, + { + "epoch": 0.48, + "grad_norm": 54.234791577246796, + "learning_rate": 1.7765291871683347e-05, + "loss": 1.1332, + "step": 39996 + }, + { + "epoch": 0.48, + "grad_norm": 8.448389893485555, + "learning_rate": 1.7764923778786496e-05, + "loss": 1.2772, + "step": 39999 + }, + { + "epoch": 0.48, + "grad_norm": 30.15392557214384, + "learning_rate": 1.7764555659390758e-05, + "loss": 1.5015, + "step": 40002 + }, + { + "epoch": 0.48, + "grad_norm": 45.550079911776905, + "learning_rate": 1.7764187513497386e-05, + "loss": 1.7097, + "step": 40005 + }, + { + "epoch": 0.48, + "grad_norm": 20.237002408388474, + "learning_rate": 1.7763819341107638e-05, + "loss": 1.7107, + "step": 40008 + }, + { + "epoch": 0.48, + "grad_norm": 3.6855839812128335, + "learning_rate": 1.7763451142222778e-05, + "loss": 1.4736, + "step": 40011 + }, + { + "epoch": 0.48, + "grad_norm": 6.296897810392293, + "learning_rate": 1.776308291684405e-05, + "loss": 1.2418, + "step": 40014 + }, + { + "epoch": 0.48, + "grad_norm": 16.14031443042339, + "learning_rate": 1.7762714664972716e-05, + "loss": 1.4913, + "step": 40017 + }, + { + "epoch": 0.48, + "grad_norm": 11.849843669160636, + "learning_rate": 1.7762346386610035e-05, + "loss": 1.1916, + "step": 40020 + }, + { + "epoch": 0.48, + "grad_norm": 3.490904574581322, + "learning_rate": 1.7761978081757263e-05, + "loss": 1.5486, + "step": 40023 + }, + { + "epoch": 0.48, + "grad_norm": 11.85085905348614, + "learning_rate": 1.7761609750415654e-05, + "loss": 1.5152, + "step": 40026 + }, + { + "epoch": 0.48, + "grad_norm": 34.527588976670756, + "learning_rate": 1.7761241392586467e-05, + "loss": 1.2176, + "step": 40029 + }, + { + "epoch": 0.48, + "grad_norm": 34.07450522147663, + "learning_rate": 1.776087300827096e-05, + "loss": 1.3734, + "step": 40032 + }, + { + "epoch": 0.48, + "grad_norm": 55.239969401151875, + "learning_rate": 1.776050459747038e-05, + "loss": 1.4519, + "step": 40035 + }, + { + "epoch": 0.48, + "grad_norm": 2.870515772941604, + "learning_rate": 1.7760136160186004e-05, + "loss": 1.6912, + "step": 40038 + }, + { + "epoch": 0.48, + "grad_norm": 12.079095789450209, + "learning_rate": 1.7759767696419073e-05, + "loss": 1.2761, + "step": 40041 + }, + { + "epoch": 0.48, + "grad_norm": 32.682964867743095, + "learning_rate": 1.7759399206170854e-05, + "loss": 1.6755, + "step": 40044 + }, + { + "epoch": 0.48, + "grad_norm": 8.11969764049555, + "learning_rate": 1.77590306894426e-05, + "loss": 1.4122, + "step": 40047 + }, + { + "epoch": 0.48, + "grad_norm": 23.07225549316919, + "learning_rate": 1.7758662146235565e-05, + "loss": 1.3689, + "step": 40050 + }, + { + "epoch": 0.48, + "grad_norm": 13.446077975630397, + "learning_rate": 1.775829357655101e-05, + "loss": 1.6134, + "step": 40053 + }, + { + "epoch": 0.48, + "grad_norm": 14.702895150170495, + "learning_rate": 1.77579249803902e-05, + "loss": 1.6656, + "step": 40056 + }, + { + "epoch": 0.48, + "grad_norm": 8.778869941281334, + "learning_rate": 1.775755635775438e-05, + "loss": 1.4814, + "step": 40059 + }, + { + "epoch": 0.48, + "grad_norm": 21.596146681583107, + "learning_rate": 1.7757187708644817e-05, + "loss": 1.8385, + "step": 40062 + }, + { + "epoch": 0.48, + "grad_norm": 5.746073479178012, + "learning_rate": 1.775681903306277e-05, + "loss": 1.6003, + "step": 40065 + }, + { + "epoch": 0.48, + "grad_norm": 11.784998187025993, + "learning_rate": 1.7756450331009486e-05, + "loss": 1.567, + "step": 40068 + }, + { + "epoch": 0.48, + "grad_norm": 11.546858484737978, + "learning_rate": 1.775608160248624e-05, + "loss": 1.2738, + "step": 40071 + }, + { + "epoch": 0.48, + "grad_norm": 42.660431922398345, + "learning_rate": 1.7755712847494275e-05, + "loss": 1.7471, + "step": 40074 + }, + { + "epoch": 0.48, + "grad_norm": 8.34341471016828, + "learning_rate": 1.7755344066034858e-05, + "loss": 1.4704, + "step": 40077 + }, + { + "epoch": 0.48, + "grad_norm": 15.724081959617171, + "learning_rate": 1.7754975258109245e-05, + "loss": 1.4093, + "step": 40080 + }, + { + "epoch": 0.48, + "grad_norm": 15.028748815379824, + "learning_rate": 1.7754606423718692e-05, + "loss": 1.2596, + "step": 40083 + }, + { + "epoch": 0.48, + "grad_norm": 8.300077976148042, + "learning_rate": 1.7754237562864464e-05, + "loss": 1.3407, + "step": 40086 + }, + { + "epoch": 0.48, + "grad_norm": 12.97449909506493, + "learning_rate": 1.7753868675547812e-05, + "loss": 1.4856, + "step": 40089 + }, + { + "epoch": 0.48, + "grad_norm": 33.04122519538812, + "learning_rate": 1.7753499761770004e-05, + "loss": 1.6186, + "step": 40092 + }, + { + "epoch": 0.48, + "grad_norm": 12.741364977051841, + "learning_rate": 1.775313082153229e-05, + "loss": 1.5878, + "step": 40095 + }, + { + "epoch": 0.48, + "grad_norm": 11.387247971262832, + "learning_rate": 1.7752761854835937e-05, + "loss": 1.3698, + "step": 40098 + }, + { + "epoch": 0.48, + "grad_norm": 14.46202194078507, + "learning_rate": 1.77523928616822e-05, + "loss": 1.5619, + "step": 40101 + }, + { + "epoch": 0.48, + "grad_norm": 6.480377378549624, + "learning_rate": 1.7752023842072334e-05, + "loss": 1.3025, + "step": 40104 + }, + { + "epoch": 0.48, + "grad_norm": 27.77198527377389, + "learning_rate": 1.7751654796007607e-05, + "loss": 1.6027, + "step": 40107 + }, + { + "epoch": 0.48, + "grad_norm": 6.495341313916661, + "learning_rate": 1.7751285723489275e-05, + "loss": 1.4534, + "step": 40110 + }, + { + "epoch": 0.48, + "grad_norm": 12.001716269764056, + "learning_rate": 1.7750916624518595e-05, + "loss": 1.2599, + "step": 40113 + }, + { + "epoch": 0.48, + "grad_norm": 12.973118375357148, + "learning_rate": 1.7750547499096828e-05, + "loss": 1.2446, + "step": 40116 + }, + { + "epoch": 0.48, + "grad_norm": 14.748614323584052, + "learning_rate": 1.7750178347225234e-05, + "loss": 1.3333, + "step": 40119 + }, + { + "epoch": 0.48, + "grad_norm": 7.900972191108013, + "learning_rate": 1.7749809168905073e-05, + "loss": 1.4635, + "step": 40122 + }, + { + "epoch": 0.48, + "grad_norm": 21.088695719886836, + "learning_rate": 1.7749439964137603e-05, + "loss": 1.197, + "step": 40125 + }, + { + "epoch": 0.48, + "grad_norm": 12.692575099564724, + "learning_rate": 1.774907073292409e-05, + "loss": 1.3078, + "step": 40128 + }, + { + "epoch": 0.48, + "grad_norm": 25.277763651801212, + "learning_rate": 1.7748701475265787e-05, + "loss": 1.4928, + "step": 40131 + }, + { + "epoch": 0.48, + "grad_norm": 4.21745464237597, + "learning_rate": 1.774833219116396e-05, + "loss": 1.4181, + "step": 40134 + }, + { + "epoch": 0.48, + "grad_norm": 9.886624389577044, + "learning_rate": 1.7747962880619863e-05, + "loss": 1.5412, + "step": 40137 + }, + { + "epoch": 0.48, + "grad_norm": 24.1310724823764, + "learning_rate": 1.774759354363476e-05, + "loss": 1.5537, + "step": 40140 + }, + { + "epoch": 0.48, + "grad_norm": 30.48635067733026, + "learning_rate": 1.7747224180209913e-05, + "loss": 1.4077, + "step": 40143 + }, + { + "epoch": 0.48, + "grad_norm": 2.83698523236142, + "learning_rate": 1.7746854790346577e-05, + "loss": 1.7611, + "step": 40146 + }, + { + "epoch": 0.48, + "grad_norm": 4.711948234902152, + "learning_rate": 1.774648537404602e-05, + "loss": 1.3537, + "step": 40149 + }, + { + "epoch": 0.48, + "grad_norm": 7.996222430596407, + "learning_rate": 1.7746115931309497e-05, + "loss": 1.3506, + "step": 40152 + }, + { + "epoch": 0.48, + "grad_norm": 13.903339072174038, + "learning_rate": 1.774574646213827e-05, + "loss": 1.6947, + "step": 40155 + }, + { + "epoch": 0.48, + "grad_norm": 11.831224959732667, + "learning_rate": 1.7745376966533598e-05, + "loss": 1.5854, + "step": 40158 + }, + { + "epoch": 0.48, + "grad_norm": 6.67641938725736, + "learning_rate": 1.7745007444496747e-05, + "loss": 1.6787, + "step": 40161 + }, + { + "epoch": 0.48, + "grad_norm": 37.702940536363066, + "learning_rate": 1.7744637896028972e-05, + "loss": 1.4574, + "step": 40164 + }, + { + "epoch": 0.48, + "grad_norm": 20.224940676226012, + "learning_rate": 1.7744268321131544e-05, + "loss": 1.7135, + "step": 40167 + }, + { + "epoch": 0.48, + "grad_norm": 173.01340855777448, + "learning_rate": 1.774389871980571e-05, + "loss": 1.2593, + "step": 40170 + }, + { + "epoch": 0.48, + "grad_norm": 29.040139967932127, + "learning_rate": 1.774352909205275e-05, + "loss": 1.4771, + "step": 40173 + }, + { + "epoch": 0.48, + "grad_norm": 22.6254894835404, + "learning_rate": 1.7743159437873903e-05, + "loss": 1.5352, + "step": 40176 + }, + { + "epoch": 0.48, + "grad_norm": 9.78954503968385, + "learning_rate": 1.7742789757270446e-05, + "loss": 1.3699, + "step": 40179 + }, + { + "epoch": 0.48, + "grad_norm": 65.84846469762097, + "learning_rate": 1.774242005024364e-05, + "loss": 1.6673, + "step": 40182 + }, + { + "epoch": 0.48, + "grad_norm": 9.438465080679192, + "learning_rate": 1.7742050316794738e-05, + "loss": 1.0509, + "step": 40185 + }, + { + "epoch": 0.48, + "grad_norm": 7.888439845929695, + "learning_rate": 1.774168055692501e-05, + "loss": 1.903, + "step": 40188 + }, + { + "epoch": 0.48, + "grad_norm": 25.834412127891024, + "learning_rate": 1.7741310770635715e-05, + "loss": 1.6134, + "step": 40191 + }, + { + "epoch": 0.48, + "grad_norm": 8.043553667425487, + "learning_rate": 1.774094095792811e-05, + "loss": 1.2488, + "step": 40194 + }, + { + "epoch": 0.48, + "grad_norm": 8.098509593152583, + "learning_rate": 1.7740571118803467e-05, + "loss": 1.45, + "step": 40197 + }, + { + "epoch": 0.48, + "grad_norm": 71.0639717622131, + "learning_rate": 1.774020125326304e-05, + "loss": 1.3259, + "step": 40200 + }, + { + "epoch": 0.48, + "grad_norm": 16.764207315092715, + "learning_rate": 1.7739831361308098e-05, + "loss": 1.4951, + "step": 40203 + }, + { + "epoch": 0.48, + "grad_norm": 44.426949853309885, + "learning_rate": 1.7739461442939893e-05, + "loss": 1.4514, + "step": 40206 + }, + { + "epoch": 0.48, + "grad_norm": 5.008389311931479, + "learning_rate": 1.7739091498159703e-05, + "loss": 1.3851, + "step": 40209 + }, + { + "epoch": 0.48, + "grad_norm": 31.369624347842016, + "learning_rate": 1.7738721526968772e-05, + "loss": 1.313, + "step": 40212 + }, + { + "epoch": 0.48, + "grad_norm": 25.969476138983104, + "learning_rate": 1.7738351529368377e-05, + "loss": 1.5479, + "step": 40215 + }, + { + "epoch": 0.48, + "grad_norm": 34.075881738828485, + "learning_rate": 1.7737981505359775e-05, + "loss": 1.5579, + "step": 40218 + }, + { + "epoch": 0.48, + "grad_norm": 63.05654244377419, + "learning_rate": 1.773761145494423e-05, + "loss": 1.4919, + "step": 40221 + }, + { + "epoch": 0.48, + "grad_norm": 33.28930024899503, + "learning_rate": 1.7737241378123e-05, + "loss": 1.7281, + "step": 40224 + }, + { + "epoch": 0.48, + "grad_norm": 20.98118823627755, + "learning_rate": 1.7736871274897357e-05, + "loss": 1.3051, + "step": 40227 + }, + { + "epoch": 0.48, + "grad_norm": 10.941195961674369, + "learning_rate": 1.7736501145268557e-05, + "loss": 1.5213, + "step": 40230 + }, + { + "epoch": 0.48, + "grad_norm": 19.893190574859, + "learning_rate": 1.7736130989237866e-05, + "loss": 1.4947, + "step": 40233 + }, + { + "epoch": 0.48, + "grad_norm": 22.585616422700667, + "learning_rate": 1.7735760806806547e-05, + "loss": 1.7392, + "step": 40236 + }, + { + "epoch": 0.48, + "grad_norm": 71.62210927520526, + "learning_rate": 1.773539059797586e-05, + "loss": 1.3759, + "step": 40239 + }, + { + "epoch": 0.48, + "grad_norm": 11.716114233839459, + "learning_rate": 1.7735020362747074e-05, + "loss": 1.2363, + "step": 40242 + }, + { + "epoch": 0.48, + "grad_norm": 7.398162931180008, + "learning_rate": 1.7734650101121448e-05, + "loss": 1.3596, + "step": 40245 + }, + { + "epoch": 0.48, + "grad_norm": 13.845055019058556, + "learning_rate": 1.7734279813100248e-05, + "loss": 1.4702, + "step": 40248 + }, + { + "epoch": 0.48, + "grad_norm": 11.85297947496301, + "learning_rate": 1.7733909498684736e-05, + "loss": 1.5542, + "step": 40251 + }, + { + "epoch": 0.48, + "grad_norm": 7.111245048370081, + "learning_rate": 1.7733539157876176e-05, + "loss": 1.5374, + "step": 40254 + }, + { + "epoch": 0.48, + "grad_norm": 17.33992090781073, + "learning_rate": 1.7733168790675834e-05, + "loss": 1.3778, + "step": 40257 + }, + { + "epoch": 0.48, + "grad_norm": 6.271569504973072, + "learning_rate": 1.773279839708497e-05, + "loss": 1.6579, + "step": 40260 + }, + { + "epoch": 0.48, + "grad_norm": 29.32687675430915, + "learning_rate": 1.7732427977104855e-05, + "loss": 1.495, + "step": 40263 + }, + { + "epoch": 0.48, + "grad_norm": 10.858860007878064, + "learning_rate": 1.7732057530736745e-05, + "loss": 1.5396, + "step": 40266 + }, + { + "epoch": 0.48, + "grad_norm": 20.156440713061812, + "learning_rate": 1.773168705798191e-05, + "loss": 1.276, + "step": 40269 + }, + { + "epoch": 0.48, + "grad_norm": 7.276309007985526, + "learning_rate": 1.7731316558841613e-05, + "loss": 1.5395, + "step": 40272 + }, + { + "epoch": 0.48, + "grad_norm": 17.950106845906458, + "learning_rate": 1.7730946033317115e-05, + "loss": 1.4445, + "step": 40275 + }, + { + "epoch": 0.48, + "grad_norm": 42.901443943703974, + "learning_rate": 1.773057548140968e-05, + "loss": 0.9913, + "step": 40278 + }, + { + "epoch": 0.48, + "grad_norm": 15.357293545720212, + "learning_rate": 1.773020490312058e-05, + "loss": 1.3282, + "step": 40281 + }, + { + "epoch": 0.48, + "grad_norm": 3.2917956866150404, + "learning_rate": 1.772983429845107e-05, + "loss": 1.5575, + "step": 40284 + }, + { + "epoch": 0.48, + "grad_norm": 14.134127329685949, + "learning_rate": 1.7729463667402425e-05, + "loss": 1.2523, + "step": 40287 + }, + { + "epoch": 0.48, + "grad_norm": 19.59408593878232, + "learning_rate": 1.7729093009975904e-05, + "loss": 1.3803, + "step": 40290 + }, + { + "epoch": 0.48, + "grad_norm": 15.346204168743052, + "learning_rate": 1.7728722326172773e-05, + "loss": 1.2742, + "step": 40293 + }, + { + "epoch": 0.48, + "grad_norm": 14.45881881377166, + "learning_rate": 1.7728351615994293e-05, + "loss": 1.1332, + "step": 40296 + }, + { + "epoch": 0.48, + "grad_norm": 48.44771386409064, + "learning_rate": 1.7727980879441738e-05, + "loss": 1.44, + "step": 40299 + }, + { + "epoch": 0.48, + "grad_norm": 18.599681020080446, + "learning_rate": 1.772761011651636e-05, + "loss": 1.4439, + "step": 40302 + }, + { + "epoch": 0.48, + "grad_norm": 19.48841012816049, + "learning_rate": 1.7727239327219438e-05, + "loss": 1.5326, + "step": 40305 + }, + { + "epoch": 0.48, + "grad_norm": 26.838592095631217, + "learning_rate": 1.772686851155223e-05, + "loss": 1.5345, + "step": 40308 + }, + { + "epoch": 0.48, + "grad_norm": 16.35939685515122, + "learning_rate": 1.7726497669516006e-05, + "loss": 1.6002, + "step": 40311 + }, + { + "epoch": 0.48, + "grad_norm": 15.179622098338854, + "learning_rate": 1.7726126801112026e-05, + "loss": 1.9194, + "step": 40314 + }, + { + "epoch": 0.48, + "grad_norm": 6.883264103491813, + "learning_rate": 1.7725755906341556e-05, + "loss": 1.2531, + "step": 40317 + }, + { + "epoch": 0.48, + "grad_norm": 16.751507657039436, + "learning_rate": 1.7725384985205865e-05, + "loss": 1.3712, + "step": 40320 + }, + { + "epoch": 0.48, + "grad_norm": 23.233744684358566, + "learning_rate": 1.772501403770622e-05, + "loss": 1.1207, + "step": 40323 + }, + { + "epoch": 0.48, + "grad_norm": 20.418138863493784, + "learning_rate": 1.7724643063843886e-05, + "loss": 0.9864, + "step": 40326 + }, + { + "epoch": 0.48, + "grad_norm": 23.95927025883839, + "learning_rate": 1.7724272063620122e-05, + "loss": 1.41, + "step": 40329 + }, + { + "epoch": 0.48, + "grad_norm": 19.470304321638967, + "learning_rate": 1.77239010370362e-05, + "loss": 1.3148, + "step": 40332 + }, + { + "epoch": 0.49, + "grad_norm": 15.17218266203002, + "learning_rate": 1.772352998409339e-05, + "loss": 1.3108, + "step": 40335 + }, + { + "epoch": 0.49, + "grad_norm": 7.874420654853086, + "learning_rate": 1.7723158904792956e-05, + "loss": 1.3308, + "step": 40338 + }, + { + "epoch": 0.49, + "grad_norm": 5.545332820288751, + "learning_rate": 1.7722787799136155e-05, + "loss": 1.7046, + "step": 40341 + }, + { + "epoch": 0.49, + "grad_norm": 2.373673865719632, + "learning_rate": 1.7722416667124266e-05, + "loss": 1.8459, + "step": 40344 + }, + { + "epoch": 0.49, + "grad_norm": 4.412780451313142, + "learning_rate": 1.7722045508758553e-05, + "loss": 1.217, + "step": 40347 + }, + { + "epoch": 0.49, + "grad_norm": 31.70725294442986, + "learning_rate": 1.7721674324040276e-05, + "loss": 1.7839, + "step": 40350 + }, + { + "epoch": 0.49, + "grad_norm": 12.577008678727186, + "learning_rate": 1.7721303112970706e-05, + "loss": 1.6131, + "step": 40353 + }, + { + "epoch": 0.49, + "grad_norm": 16.298821237389944, + "learning_rate": 1.772093187555111e-05, + "loss": 1.6201, + "step": 40356 + }, + { + "epoch": 0.49, + "grad_norm": 49.16756552120217, + "learning_rate": 1.7720560611782755e-05, + "loss": 1.7216, + "step": 40359 + }, + { + "epoch": 0.49, + "grad_norm": 3.602845347363411, + "learning_rate": 1.7720189321666908e-05, + "loss": 1.3758, + "step": 40362 + }, + { + "epoch": 0.49, + "grad_norm": 6.3808040225715725, + "learning_rate": 1.7719818005204833e-05, + "loss": 1.2221, + "step": 40365 + }, + { + "epoch": 0.49, + "grad_norm": 63.1148299975722, + "learning_rate": 1.7719446662397804e-05, + "loss": 1.6641, + "step": 40368 + }, + { + "epoch": 0.49, + "grad_norm": 43.360975625768795, + "learning_rate": 1.771907529324708e-05, + "loss": 1.5848, + "step": 40371 + }, + { + "epoch": 0.49, + "grad_norm": 3.516672904066714, + "learning_rate": 1.7718703897753936e-05, + "loss": 1.724, + "step": 40374 + }, + { + "epoch": 0.49, + "grad_norm": 25.26013729043622, + "learning_rate": 1.7718332475919634e-05, + "loss": 1.436, + "step": 40377 + }, + { + "epoch": 0.49, + "grad_norm": 5.634556914118295, + "learning_rate": 1.7717961027745444e-05, + "loss": 1.1953, + "step": 40380 + }, + { + "epoch": 0.49, + "grad_norm": 5.960047094397994, + "learning_rate": 1.7717589553232632e-05, + "loss": 1.2422, + "step": 40383 + }, + { + "epoch": 0.49, + "grad_norm": 5.94936336643253, + "learning_rate": 1.771721805238247e-05, + "loss": 1.6396, + "step": 40386 + }, + { + "epoch": 0.49, + "grad_norm": 3.7437636230262408, + "learning_rate": 1.7716846525196215e-05, + "loss": 1.8273, + "step": 40389 + }, + { + "epoch": 0.49, + "grad_norm": 3.826029662713228, + "learning_rate": 1.771647497167515e-05, + "loss": 1.4215, + "step": 40392 + }, + { + "epoch": 0.49, + "grad_norm": 8.089141233448652, + "learning_rate": 1.771610339182053e-05, + "loss": 1.4582, + "step": 40395 + }, + { + "epoch": 0.49, + "grad_norm": 12.017270094986918, + "learning_rate": 1.7715731785633635e-05, + "loss": 1.9563, + "step": 40398 + }, + { + "epoch": 0.49, + "grad_norm": 9.158557332580186, + "learning_rate": 1.771536015311572e-05, + "loss": 1.4765, + "step": 40401 + }, + { + "epoch": 0.49, + "grad_norm": 4.156155697643576, + "learning_rate": 1.7714988494268065e-05, + "loss": 1.3256, + "step": 40404 + }, + { + "epoch": 0.49, + "grad_norm": 42.80936356997506, + "learning_rate": 1.771461680909193e-05, + "loss": 1.142, + "step": 40407 + }, + { + "epoch": 0.49, + "grad_norm": 32.55783823269939, + "learning_rate": 1.771424509758859e-05, + "loss": 1.2212, + "step": 40410 + }, + { + "epoch": 0.49, + "grad_norm": 32.068622004285544, + "learning_rate": 1.7713873359759307e-05, + "loss": 1.7306, + "step": 40413 + }, + { + "epoch": 0.49, + "grad_norm": 5.887121546156562, + "learning_rate": 1.771350159560535e-05, + "loss": 1.4622, + "step": 40416 + }, + { + "epoch": 0.49, + "grad_norm": 21.66419540877664, + "learning_rate": 1.7713129805127995e-05, + "loss": 1.3697, + "step": 40419 + }, + { + "epoch": 0.49, + "grad_norm": 4.987975692150324, + "learning_rate": 1.7712757988328505e-05, + "loss": 1.2651, + "step": 40422 + }, + { + "epoch": 0.49, + "grad_norm": 34.8824464496213, + "learning_rate": 1.771238614520815e-05, + "loss": 1.1498, + "step": 40425 + }, + { + "epoch": 0.49, + "grad_norm": 6.655851387639749, + "learning_rate": 1.77120142757682e-05, + "loss": 1.6673, + "step": 40428 + }, + { + "epoch": 0.49, + "grad_norm": 22.623437677544988, + "learning_rate": 1.771164238000992e-05, + "loss": 1.404, + "step": 40431 + }, + { + "epoch": 0.49, + "grad_norm": 3.6279420055965312, + "learning_rate": 1.771127045793459e-05, + "loss": 1.3725, + "step": 40434 + }, + { + "epoch": 0.49, + "grad_norm": 6.369622382789393, + "learning_rate": 1.7710898509543462e-05, + "loss": 1.5217, + "step": 40437 + }, + { + "epoch": 0.49, + "grad_norm": 6.246285003613404, + "learning_rate": 1.771052653483782e-05, + "loss": 0.9877, + "step": 40440 + }, + { + "epoch": 0.49, + "grad_norm": 9.42030171704291, + "learning_rate": 1.771015453381893e-05, + "loss": 1.1308, + "step": 40443 + }, + { + "epoch": 0.49, + "grad_norm": 6.069063152511551, + "learning_rate": 1.7709782506488052e-05, + "loss": 1.2759, + "step": 40446 + }, + { + "epoch": 0.49, + "grad_norm": 24.600162221057502, + "learning_rate": 1.770941045284647e-05, + "loss": 1.8064, + "step": 40449 + }, + { + "epoch": 0.49, + "grad_norm": 32.35076602551886, + "learning_rate": 1.7709038372895447e-05, + "loss": 1.293, + "step": 40452 + }, + { + "epoch": 0.49, + "grad_norm": 69.65345179575989, + "learning_rate": 1.770866626663625e-05, + "loss": 1.6921, + "step": 40455 + }, + { + "epoch": 0.49, + "grad_norm": 49.67750962328183, + "learning_rate": 1.7708294134070155e-05, + "loss": 1.6391, + "step": 40458 + }, + { + "epoch": 0.49, + "grad_norm": 5.691333518610539, + "learning_rate": 1.7707921975198425e-05, + "loss": 1.4213, + "step": 40461 + }, + { + "epoch": 0.49, + "grad_norm": 16.337498277734397, + "learning_rate": 1.7707549790022334e-05, + "loss": 1.6864, + "step": 40464 + }, + { + "epoch": 0.49, + "grad_norm": 21.11263955968945, + "learning_rate": 1.7707177578543153e-05, + "loss": 1.346, + "step": 40467 + }, + { + "epoch": 0.49, + "grad_norm": 25.248293899450893, + "learning_rate": 1.7706805340762153e-05, + "loss": 1.3742, + "step": 40470 + }, + { + "epoch": 0.49, + "grad_norm": 14.145929253479313, + "learning_rate": 1.77064330766806e-05, + "loss": 1.2264, + "step": 40473 + }, + { + "epoch": 0.49, + "grad_norm": 9.363640246412158, + "learning_rate": 1.7706060786299767e-05, + "loss": 1.3346, + "step": 40476 + }, + { + "epoch": 0.49, + "grad_norm": 19.40340731190555, + "learning_rate": 1.7705688469620924e-05, + "loss": 1.645, + "step": 40479 + }, + { + "epoch": 0.49, + "grad_norm": 8.59873402565044, + "learning_rate": 1.7705316126645343e-05, + "loss": 1.4392, + "step": 40482 + }, + { + "epoch": 0.49, + "grad_norm": 3.6171580683205855, + "learning_rate": 1.770494375737429e-05, + "loss": 1.2583, + "step": 40485 + }, + { + "epoch": 0.49, + "grad_norm": 9.836463733813028, + "learning_rate": 1.7704571361809043e-05, + "loss": 1.1939, + "step": 40488 + }, + { + "epoch": 0.49, + "grad_norm": 39.04387198590743, + "learning_rate": 1.770419893995087e-05, + "loss": 1.669, + "step": 40491 + }, + { + "epoch": 0.49, + "grad_norm": 4.403778396099662, + "learning_rate": 1.7703826491801037e-05, + "loss": 1.3852, + "step": 40494 + }, + { + "epoch": 0.49, + "grad_norm": 24.86408206697062, + "learning_rate": 1.7703454017360823e-05, + "loss": 1.4489, + "step": 40497 + }, + { + "epoch": 0.49, + "grad_norm": 13.186196014034142, + "learning_rate": 1.770308151663149e-05, + "loss": 1.2582, + "step": 40500 + }, + { + "epoch": 0.49, + "grad_norm": 17.862155017643854, + "learning_rate": 1.7702708989614323e-05, + "loss": 1.5462, + "step": 40503 + }, + { + "epoch": 0.49, + "grad_norm": 30.649188271169738, + "learning_rate": 1.7702336436310578e-05, + "loss": 1.3127, + "step": 40506 + }, + { + "epoch": 0.49, + "grad_norm": 8.162145422548196, + "learning_rate": 1.7701963856721533e-05, + "loss": 1.7454, + "step": 40509 + }, + { + "epoch": 0.49, + "grad_norm": 28.032151372962005, + "learning_rate": 1.7701591250848464e-05, + "loss": 1.878, + "step": 40512 + }, + { + "epoch": 0.49, + "grad_norm": 11.739543552007415, + "learning_rate": 1.7701218618692633e-05, + "loss": 1.5355, + "step": 40515 + }, + { + "epoch": 0.49, + "grad_norm": 32.12900994474334, + "learning_rate": 1.770084596025532e-05, + "loss": 1.2122, + "step": 40518 + }, + { + "epoch": 0.49, + "grad_norm": 51.65873063991514, + "learning_rate": 1.7700473275537792e-05, + "loss": 1.4801, + "step": 40521 + }, + { + "epoch": 0.49, + "grad_norm": 4.172687669456874, + "learning_rate": 1.7700100564541324e-05, + "loss": 1.3547, + "step": 40524 + }, + { + "epoch": 0.49, + "grad_norm": 28.39493979510479, + "learning_rate": 1.7699727827267186e-05, + "loss": 1.4391, + "step": 40527 + }, + { + "epoch": 0.49, + "grad_norm": 16.614647617014064, + "learning_rate": 1.769935506371665e-05, + "loss": 1.4885, + "step": 40530 + }, + { + "epoch": 0.49, + "grad_norm": 17.21916894996384, + "learning_rate": 1.769898227389099e-05, + "loss": 1.5581, + "step": 40533 + }, + { + "epoch": 0.49, + "grad_norm": 25.99044397704415, + "learning_rate": 1.7698609457791474e-05, + "loss": 1.5205, + "step": 40536 + }, + { + "epoch": 0.49, + "grad_norm": 8.02010822339783, + "learning_rate": 1.769823661541938e-05, + "loss": 1.1708, + "step": 40539 + }, + { + "epoch": 0.49, + "grad_norm": 10.943573108215174, + "learning_rate": 1.7697863746775974e-05, + "loss": 1.3333, + "step": 40542 + }, + { + "epoch": 0.49, + "grad_norm": 8.234329255799725, + "learning_rate": 1.7697490851862533e-05, + "loss": 1.2238, + "step": 40545 + }, + { + "epoch": 0.49, + "grad_norm": 29.97657096343689, + "learning_rate": 1.7697117930680328e-05, + "loss": 1.9248, + "step": 40548 + }, + { + "epoch": 0.49, + "grad_norm": 20.80007184240772, + "learning_rate": 1.769674498323063e-05, + "loss": 1.9289, + "step": 40551 + }, + { + "epoch": 0.49, + "grad_norm": 8.27797324883138, + "learning_rate": 1.7696372009514717e-05, + "loss": 1.1402, + "step": 40554 + }, + { + "epoch": 0.49, + "grad_norm": 12.571240639775251, + "learning_rate": 1.7695999009533856e-05, + "loss": 1.3394, + "step": 40557 + }, + { + "epoch": 0.49, + "grad_norm": 35.459268089995156, + "learning_rate": 1.769562598328933e-05, + "loss": 1.521, + "step": 40560 + }, + { + "epoch": 0.49, + "grad_norm": 9.622339856143457, + "learning_rate": 1.7695252930782393e-05, + "loss": 1.4069, + "step": 40563 + }, + { + "epoch": 0.49, + "grad_norm": 30.52893698529718, + "learning_rate": 1.7694879852014336e-05, + "loss": 1.1826, + "step": 40566 + }, + { + "epoch": 0.49, + "grad_norm": 45.17758840901202, + "learning_rate": 1.7694506746986428e-05, + "loss": 1.3549, + "step": 40569 + }, + { + "epoch": 0.49, + "grad_norm": 19.608099837857914, + "learning_rate": 1.769413361569994e-05, + "loss": 1.5244, + "step": 40572 + }, + { + "epoch": 0.49, + "grad_norm": 12.917959689053681, + "learning_rate": 1.7693760458156137e-05, + "loss": 2.0402, + "step": 40575 + }, + { + "epoch": 0.49, + "grad_norm": 21.3179912504375, + "learning_rate": 1.769338727435631e-05, + "loss": 1.6786, + "step": 40578 + }, + { + "epoch": 0.49, + "grad_norm": 15.417745749357472, + "learning_rate": 1.7693014064301717e-05, + "loss": 1.642, + "step": 40581 + }, + { + "epoch": 0.49, + "grad_norm": 49.18740761368983, + "learning_rate": 1.7692640827993642e-05, + "loss": 1.5071, + "step": 40584 + }, + { + "epoch": 0.49, + "grad_norm": 7.9245505361960555, + "learning_rate": 1.769226756543335e-05, + "loss": 1.7613, + "step": 40587 + }, + { + "epoch": 0.49, + "grad_norm": 9.142609411859466, + "learning_rate": 1.7691894276622122e-05, + "loss": 1.2782, + "step": 40590 + }, + { + "epoch": 0.49, + "grad_norm": 16.873853420899614, + "learning_rate": 1.7691520961561234e-05, + "loss": 1.3738, + "step": 40593 + }, + { + "epoch": 0.49, + "grad_norm": 9.16231933238607, + "learning_rate": 1.769114762025195e-05, + "loss": 1.5313, + "step": 40596 + }, + { + "epoch": 0.49, + "grad_norm": 9.479895716867754, + "learning_rate": 1.769077425269555e-05, + "loss": 1.6253, + "step": 40599 + }, + { + "epoch": 0.49, + "grad_norm": 33.6151286283539, + "learning_rate": 1.7690400858893312e-05, + "loss": 1.4032, + "step": 40602 + }, + { + "epoch": 0.49, + "grad_norm": 12.48530190285462, + "learning_rate": 1.76900274388465e-05, + "loss": 1.7127, + "step": 40605 + }, + { + "epoch": 0.49, + "grad_norm": 14.758999868352648, + "learning_rate": 1.76896539925564e-05, + "loss": 1.1703, + "step": 40608 + }, + { + "epoch": 0.49, + "grad_norm": 6.097461754062418, + "learning_rate": 1.7689280520024276e-05, + "loss": 1.7061, + "step": 40611 + }, + { + "epoch": 0.49, + "grad_norm": 3.8485126042958866, + "learning_rate": 1.768890702125141e-05, + "loss": 1.4574, + "step": 40614 + }, + { + "epoch": 0.49, + "grad_norm": 28.446672292842244, + "learning_rate": 1.7688533496239072e-05, + "loss": 1.9443, + "step": 40617 + }, + { + "epoch": 0.49, + "grad_norm": 49.61990238065597, + "learning_rate": 1.7688159944988542e-05, + "loss": 1.4046, + "step": 40620 + }, + { + "epoch": 0.49, + "grad_norm": 5.645201188274331, + "learning_rate": 1.768778636750109e-05, + "loss": 1.3452, + "step": 40623 + }, + { + "epoch": 0.49, + "grad_norm": 35.066372544811806, + "learning_rate": 1.768741276377799e-05, + "loss": 1.3135, + "step": 40626 + }, + { + "epoch": 0.49, + "grad_norm": 16.54340875825133, + "learning_rate": 1.768703913382052e-05, + "loss": 1.7039, + "step": 40629 + }, + { + "epoch": 0.49, + "grad_norm": 46.66775109607253, + "learning_rate": 1.7686665477629955e-05, + "loss": 1.1213, + "step": 40632 + }, + { + "epoch": 0.49, + "grad_norm": 10.30328003318926, + "learning_rate": 1.7686291795207567e-05, + "loss": 1.1312, + "step": 40635 + }, + { + "epoch": 0.49, + "grad_norm": 36.09356700811735, + "learning_rate": 1.768591808655464e-05, + "loss": 1.4914, + "step": 40638 + }, + { + "epoch": 0.49, + "grad_norm": 16.35084131516434, + "learning_rate": 1.7685544351672436e-05, + "loss": 1.4701, + "step": 40641 + }, + { + "epoch": 0.49, + "grad_norm": 13.026392223096641, + "learning_rate": 1.7685170590562243e-05, + "loss": 1.2022, + "step": 40644 + }, + { + "epoch": 0.49, + "grad_norm": 11.509694695878434, + "learning_rate": 1.7684796803225328e-05, + "loss": 1.5696, + "step": 40647 + }, + { + "epoch": 0.49, + "grad_norm": 8.035241711943117, + "learning_rate": 1.768442298966297e-05, + "loss": 1.41, + "step": 40650 + }, + { + "epoch": 0.49, + "grad_norm": 44.9275816617052, + "learning_rate": 1.7684049149876442e-05, + "loss": 1.7278, + "step": 40653 + }, + { + "epoch": 0.49, + "grad_norm": 15.205409864957359, + "learning_rate": 1.7683675283867025e-05, + "loss": 1.2368, + "step": 40656 + }, + { + "epoch": 0.49, + "grad_norm": 5.9453612089588495, + "learning_rate": 1.7683301391635993e-05, + "loss": 1.2828, + "step": 40659 + }, + { + "epoch": 0.49, + "grad_norm": 25.87097352017096, + "learning_rate": 1.7682927473184616e-05, + "loss": 1.6293, + "step": 40662 + }, + { + "epoch": 0.49, + "grad_norm": 36.4495292300605, + "learning_rate": 1.768255352851418e-05, + "loss": 1.726, + "step": 40665 + }, + { + "epoch": 0.49, + "grad_norm": 7.080223782448649, + "learning_rate": 1.7682179557625954e-05, + "loss": 1.3999, + "step": 40668 + }, + { + "epoch": 0.49, + "grad_norm": 9.575242548517517, + "learning_rate": 1.7681805560521215e-05, + "loss": 1.3564, + "step": 40671 + }, + { + "epoch": 0.49, + "grad_norm": 15.46311480223178, + "learning_rate": 1.7681431537201243e-05, + "loss": 1.554, + "step": 40674 + }, + { + "epoch": 0.49, + "grad_norm": 73.7703495503671, + "learning_rate": 1.7681057487667308e-05, + "loss": 1.473, + "step": 40677 + }, + { + "epoch": 0.49, + "grad_norm": 29.359763369888338, + "learning_rate": 1.7680683411920693e-05, + "loss": 1.2665, + "step": 40680 + }, + { + "epoch": 0.49, + "grad_norm": 9.571090552300603, + "learning_rate": 1.7680309309962673e-05, + "loss": 1.5847, + "step": 40683 + }, + { + "epoch": 0.49, + "grad_norm": 13.074057217012355, + "learning_rate": 1.7679935181794527e-05, + "loss": 1.7605, + "step": 40686 + }, + { + "epoch": 0.49, + "grad_norm": 13.145057814722714, + "learning_rate": 1.767956102741752e-05, + "loss": 1.5139, + "step": 40689 + }, + { + "epoch": 0.49, + "grad_norm": 21.47420413934781, + "learning_rate": 1.7679186846832946e-05, + "loss": 1.4978, + "step": 40692 + }, + { + "epoch": 0.49, + "grad_norm": 4.651966732786045, + "learning_rate": 1.7678812640042066e-05, + "loss": 1.4458, + "step": 40695 + }, + { + "epoch": 0.49, + "grad_norm": 9.64378651931205, + "learning_rate": 1.767843840704617e-05, + "loss": 1.5326, + "step": 40698 + }, + { + "epoch": 0.49, + "grad_norm": 24.482986071448412, + "learning_rate": 1.767806414784653e-05, + "loss": 1.9379, + "step": 40701 + }, + { + "epoch": 0.49, + "grad_norm": 22.915784478919456, + "learning_rate": 1.7677689862444415e-05, + "loss": 1.5397, + "step": 40704 + }, + { + "epoch": 0.49, + "grad_norm": 10.191185672765437, + "learning_rate": 1.7677315550841116e-05, + "loss": 1.2557, + "step": 40707 + }, + { + "epoch": 0.49, + "grad_norm": 14.81124399612623, + "learning_rate": 1.7676941213037903e-05, + "loss": 1.4269, + "step": 40710 + }, + { + "epoch": 0.49, + "grad_norm": 14.525403276052012, + "learning_rate": 1.7676566849036058e-05, + "loss": 1.482, + "step": 40713 + }, + { + "epoch": 0.49, + "grad_norm": 23.093792850048104, + "learning_rate": 1.767619245883685e-05, + "loss": 1.4597, + "step": 40716 + }, + { + "epoch": 0.49, + "grad_norm": 17.8780191226852, + "learning_rate": 1.7675818042441565e-05, + "loss": 1.7343, + "step": 40719 + }, + { + "epoch": 0.49, + "grad_norm": 5.660533291632104, + "learning_rate": 1.7675443599851475e-05, + "loss": 1.6756, + "step": 40722 + }, + { + "epoch": 0.49, + "grad_norm": 6.890866758563319, + "learning_rate": 1.7675069131067867e-05, + "loss": 1.5768, + "step": 40725 + }, + { + "epoch": 0.49, + "grad_norm": 9.290056177306859, + "learning_rate": 1.7674694636092008e-05, + "loss": 1.5024, + "step": 40728 + }, + { + "epoch": 0.49, + "grad_norm": 27.70504900860529, + "learning_rate": 1.7674320114925182e-05, + "loss": 1.4295, + "step": 40731 + }, + { + "epoch": 0.49, + "grad_norm": 12.861912514000604, + "learning_rate": 1.7673945567568663e-05, + "loss": 0.8747, + "step": 40734 + }, + { + "epoch": 0.49, + "grad_norm": 3.758514896406468, + "learning_rate": 1.7673570994023736e-05, + "loss": 1.5758, + "step": 40737 + }, + { + "epoch": 0.49, + "grad_norm": 14.536666632337402, + "learning_rate": 1.7673196394291673e-05, + "loss": 1.4855, + "step": 40740 + }, + { + "epoch": 0.49, + "grad_norm": 110.41234318441302, + "learning_rate": 1.7672821768373753e-05, + "loss": 1.4243, + "step": 40743 + }, + { + "epoch": 0.49, + "grad_norm": 13.426943857906428, + "learning_rate": 1.767244711627126e-05, + "loss": 1.4415, + "step": 40746 + }, + { + "epoch": 0.49, + "grad_norm": 10.575234944661279, + "learning_rate": 1.7672072437985463e-05, + "loss": 1.3303, + "step": 40749 + }, + { + "epoch": 0.49, + "grad_norm": 38.53809826450691, + "learning_rate": 1.767169773351765e-05, + "loss": 1.2753, + "step": 40752 + }, + { + "epoch": 0.49, + "grad_norm": 14.351804010384509, + "learning_rate": 1.7671323002869098e-05, + "loss": 1.6149, + "step": 40755 + }, + { + "epoch": 0.49, + "grad_norm": 9.456811505671316, + "learning_rate": 1.767094824604108e-05, + "loss": 1.2899, + "step": 40758 + }, + { + "epoch": 0.49, + "grad_norm": 16.925869350683044, + "learning_rate": 1.767057346303488e-05, + "loss": 1.6093, + "step": 40761 + }, + { + "epoch": 0.49, + "grad_norm": 25.493191022552253, + "learning_rate": 1.7670198653851776e-05, + "loss": 1.5899, + "step": 40764 + }, + { + "epoch": 0.49, + "grad_norm": 16.12683679046036, + "learning_rate": 1.7669823818493047e-05, + "loss": 1.6296, + "step": 40767 + }, + { + "epoch": 0.49, + "grad_norm": 18.08998424634964, + "learning_rate": 1.766944895695997e-05, + "loss": 1.2574, + "step": 40770 + }, + { + "epoch": 0.49, + "grad_norm": 16.34635856563398, + "learning_rate": 1.766907406925383e-05, + "loss": 1.2797, + "step": 40773 + }, + { + "epoch": 0.49, + "grad_norm": 10.366075057626837, + "learning_rate": 1.7668699155375902e-05, + "loss": 1.2861, + "step": 40776 + }, + { + "epoch": 0.49, + "grad_norm": 12.458778333237495, + "learning_rate": 1.766832421532746e-05, + "loss": 1.5206, + "step": 40779 + }, + { + "epoch": 0.49, + "grad_norm": 11.600386849991049, + "learning_rate": 1.7667949249109796e-05, + "loss": 1.337, + "step": 40782 + }, + { + "epoch": 0.49, + "grad_norm": 22.660673387201967, + "learning_rate": 1.7667574256724183e-05, + "loss": 1.6161, + "step": 40785 + }, + { + "epoch": 0.49, + "grad_norm": 49.30329673551412, + "learning_rate": 1.76671992381719e-05, + "loss": 1.5302, + "step": 40788 + }, + { + "epoch": 0.49, + "grad_norm": 32.21080112686717, + "learning_rate": 1.7666824193454225e-05, + "loss": 1.0815, + "step": 40791 + }, + { + "epoch": 0.49, + "grad_norm": 11.139649754535581, + "learning_rate": 1.7666449122572442e-05, + "loss": 1.3774, + "step": 40794 + }, + { + "epoch": 0.49, + "grad_norm": 25.447616027952154, + "learning_rate": 1.766607402552783e-05, + "loss": 1.6587, + "step": 40797 + }, + { + "epoch": 0.49, + "grad_norm": 9.613769679601107, + "learning_rate": 1.7665698902321668e-05, + "loss": 1.6766, + "step": 40800 + }, + { + "epoch": 0.49, + "grad_norm": 27.511756370068333, + "learning_rate": 1.766532375295524e-05, + "loss": 1.1638, + "step": 40803 + }, + { + "epoch": 0.49, + "grad_norm": 29.04763405421406, + "learning_rate": 1.766494857742982e-05, + "loss": 1.3606, + "step": 40806 + }, + { + "epoch": 0.49, + "grad_norm": 16.127184449387002, + "learning_rate": 1.766457337574669e-05, + "loss": 1.4504, + "step": 40809 + }, + { + "epoch": 0.49, + "grad_norm": 47.18349212885076, + "learning_rate": 1.7664198147907135e-05, + "loss": 1.6059, + "step": 40812 + }, + { + "epoch": 0.49, + "grad_norm": 6.034698151347691, + "learning_rate": 1.766382289391243e-05, + "loss": 1.0033, + "step": 40815 + }, + { + "epoch": 0.49, + "grad_norm": 17.59515309071463, + "learning_rate": 1.766344761376386e-05, + "loss": 1.4, + "step": 40818 + }, + { + "epoch": 0.49, + "grad_norm": 13.870508087124087, + "learning_rate": 1.76630723074627e-05, + "loss": 1.2426, + "step": 40821 + }, + { + "epoch": 0.49, + "grad_norm": 18.53107491010072, + "learning_rate": 1.766269697501024e-05, + "loss": 1.7419, + "step": 40824 + }, + { + "epoch": 0.49, + "grad_norm": 16.771033566242618, + "learning_rate": 1.7662321616407752e-05, + "loss": 1.4077, + "step": 40827 + }, + { + "epoch": 0.49, + "grad_norm": 19.600039176705053, + "learning_rate": 1.766194623165652e-05, + "loss": 1.3156, + "step": 40830 + }, + { + "epoch": 0.49, + "grad_norm": 12.652945504629772, + "learning_rate": 1.766157082075783e-05, + "loss": 1.4325, + "step": 40833 + }, + { + "epoch": 0.49, + "grad_norm": 16.39923173526585, + "learning_rate": 1.766119538371295e-05, + "loss": 1.2597, + "step": 40836 + }, + { + "epoch": 0.49, + "grad_norm": 9.754078824780835, + "learning_rate": 1.7660819920523176e-05, + "loss": 1.514, + "step": 40839 + }, + { + "epoch": 0.49, + "grad_norm": 24.769298716522407, + "learning_rate": 1.766044443118978e-05, + "loss": 1.4651, + "step": 40842 + }, + { + "epoch": 0.49, + "grad_norm": 11.590255355435815, + "learning_rate": 1.7660068915714048e-05, + "loss": 0.9717, + "step": 40845 + }, + { + "epoch": 0.49, + "grad_norm": 14.898725854176819, + "learning_rate": 1.7659693374097262e-05, + "loss": 1.4013, + "step": 40848 + }, + { + "epoch": 0.49, + "grad_norm": 45.683453985237655, + "learning_rate": 1.76593178063407e-05, + "loss": 1.6178, + "step": 40851 + }, + { + "epoch": 0.49, + "grad_norm": 23.050318491758336, + "learning_rate": 1.7658942212445644e-05, + "loss": 1.4827, + "step": 40854 + }, + { + "epoch": 0.49, + "grad_norm": 19.19506576714682, + "learning_rate": 1.7658566592413374e-05, + "loss": 1.3657, + "step": 40857 + }, + { + "epoch": 0.49, + "grad_norm": 8.540935945610695, + "learning_rate": 1.765819094624518e-05, + "loss": 1.3155, + "step": 40860 + }, + { + "epoch": 0.49, + "grad_norm": 13.53205393750137, + "learning_rate": 1.7657815273942337e-05, + "loss": 1.0227, + "step": 40863 + }, + { + "epoch": 0.49, + "grad_norm": 7.777320723863741, + "learning_rate": 1.765743957550613e-05, + "loss": 1.5086, + "step": 40866 + }, + { + "epoch": 0.49, + "grad_norm": 3.0514146647238296, + "learning_rate": 1.765706385093784e-05, + "loss": 1.2125, + "step": 40869 + }, + { + "epoch": 0.49, + "grad_norm": 20.14487323180502, + "learning_rate": 1.7656688100238747e-05, + "loss": 1.4045, + "step": 40872 + }, + { + "epoch": 0.49, + "grad_norm": 5.7925011529759844, + "learning_rate": 1.7656312323410137e-05, + "loss": 1.8088, + "step": 40875 + }, + { + "epoch": 0.49, + "grad_norm": 10.343155305600762, + "learning_rate": 1.765593652045329e-05, + "loss": 1.4468, + "step": 40878 + }, + { + "epoch": 0.49, + "grad_norm": 48.7577687160273, + "learning_rate": 1.765556069136949e-05, + "loss": 1.0758, + "step": 40881 + }, + { + "epoch": 0.49, + "grad_norm": 18.413038722149835, + "learning_rate": 1.765518483616002e-05, + "loss": 1.3182, + "step": 40884 + }, + { + "epoch": 0.49, + "grad_norm": 9.5895375155725, + "learning_rate": 1.7654808954826158e-05, + "loss": 1.1178, + "step": 40887 + }, + { + "epoch": 0.49, + "grad_norm": 15.10387575007717, + "learning_rate": 1.765443304736919e-05, + "loss": 1.5045, + "step": 40890 + }, + { + "epoch": 0.49, + "grad_norm": 20.665521892210247, + "learning_rate": 1.7654057113790403e-05, + "loss": 0.9338, + "step": 40893 + }, + { + "epoch": 0.49, + "grad_norm": 20.36286697601899, + "learning_rate": 1.7653681154091073e-05, + "loss": 1.4839, + "step": 40896 + }, + { + "epoch": 0.49, + "grad_norm": 6.774464796826859, + "learning_rate": 1.7653305168272486e-05, + "loss": 1.4873, + "step": 40899 + }, + { + "epoch": 0.49, + "grad_norm": 13.06762852270698, + "learning_rate": 1.7652929156335927e-05, + "loss": 1.5165, + "step": 40902 + }, + { + "epoch": 0.49, + "grad_norm": 13.27724319671067, + "learning_rate": 1.765255311828268e-05, + "loss": 1.2657, + "step": 40905 + }, + { + "epoch": 0.49, + "grad_norm": 4.09344809303946, + "learning_rate": 1.7652177054114022e-05, + "loss": 1.5843, + "step": 40908 + }, + { + "epoch": 0.49, + "grad_norm": 7.918761982410834, + "learning_rate": 1.7651800963831238e-05, + "loss": 1.4834, + "step": 40911 + }, + { + "epoch": 0.49, + "grad_norm": 18.3115023256602, + "learning_rate": 1.7651424847435617e-05, + "loss": 1.6522, + "step": 40914 + }, + { + "epoch": 0.49, + "grad_norm": 92.37920555008321, + "learning_rate": 1.7651048704928438e-05, + "loss": 1.6031, + "step": 40917 + }, + { + "epoch": 0.49, + "grad_norm": 24.260447832171018, + "learning_rate": 1.7650672536310986e-05, + "loss": 1.166, + "step": 40920 + }, + { + "epoch": 0.49, + "grad_norm": 9.050960169135406, + "learning_rate": 1.7650296341584544e-05, + "loss": 1.4269, + "step": 40923 + }, + { + "epoch": 0.49, + "grad_norm": 15.298019855946729, + "learning_rate": 1.7649920120750398e-05, + "loss": 1.4676, + "step": 40926 + }, + { + "epoch": 0.49, + "grad_norm": 21.28505276746921, + "learning_rate": 1.7649543873809826e-05, + "loss": 1.6187, + "step": 40929 + }, + { + "epoch": 0.49, + "grad_norm": 17.378488376945214, + "learning_rate": 1.764916760076412e-05, + "loss": 1.4079, + "step": 40932 + }, + { + "epoch": 0.49, + "grad_norm": 56.31409677639031, + "learning_rate": 1.764879130161456e-05, + "loss": 1.6253, + "step": 40935 + }, + { + "epoch": 0.49, + "grad_norm": 4.739077116072491, + "learning_rate": 1.7648414976362427e-05, + "loss": 1.2963, + "step": 40938 + }, + { + "epoch": 0.49, + "grad_norm": 15.770921860364835, + "learning_rate": 1.764803862500901e-05, + "loss": 1.5376, + "step": 40941 + }, + { + "epoch": 0.49, + "grad_norm": 8.632426327495311, + "learning_rate": 1.7647662247555594e-05, + "loss": 1.21, + "step": 40944 + }, + { + "epoch": 0.49, + "grad_norm": 22.47512353864844, + "learning_rate": 1.764728584400346e-05, + "loss": 1.0878, + "step": 40947 + }, + { + "epoch": 0.49, + "grad_norm": 19.039441696422156, + "learning_rate": 1.7646909414353895e-05, + "loss": 1.3992, + "step": 40950 + }, + { + "epoch": 0.49, + "grad_norm": 4.489006184127964, + "learning_rate": 1.764653295860818e-05, + "loss": 1.8567, + "step": 40953 + }, + { + "epoch": 0.49, + "grad_norm": 14.952418370300455, + "learning_rate": 1.7646156476767606e-05, + "loss": 1.7743, + "step": 40956 + }, + { + "epoch": 0.49, + "grad_norm": 4.098512105312759, + "learning_rate": 1.7645779968833448e-05, + "loss": 1.7226, + "step": 40959 + }, + { + "epoch": 0.49, + "grad_norm": 25.7055408435712, + "learning_rate": 1.7645403434807003e-05, + "loss": 1.6081, + "step": 40962 + }, + { + "epoch": 0.49, + "grad_norm": 13.82008249554508, + "learning_rate": 1.764502687468955e-05, + "loss": 1.35, + "step": 40965 + }, + { + "epoch": 0.49, + "grad_norm": 19.151527794833854, + "learning_rate": 1.7644650288482373e-05, + "loss": 1.2467, + "step": 40968 + }, + { + "epoch": 0.49, + "grad_norm": 14.943208952038294, + "learning_rate": 1.7644273676186757e-05, + "loss": 1.1308, + "step": 40971 + }, + { + "epoch": 0.49, + "grad_norm": 10.309487556239937, + "learning_rate": 1.764389703780399e-05, + "loss": 1.4465, + "step": 40974 + }, + { + "epoch": 0.49, + "grad_norm": 11.376036859374679, + "learning_rate": 1.764352037333535e-05, + "loss": 1.5628, + "step": 40977 + }, + { + "epoch": 0.49, + "grad_norm": 11.288775596410165, + "learning_rate": 1.7643143682782134e-05, + "loss": 1.5722, + "step": 40980 + }, + { + "epoch": 0.49, + "grad_norm": 14.807616547713478, + "learning_rate": 1.764276696614562e-05, + "loss": 1.635, + "step": 40983 + }, + { + "epoch": 0.49, + "grad_norm": 15.860293722368032, + "learning_rate": 1.7642390223427097e-05, + "loss": 1.2739, + "step": 40986 + }, + { + "epoch": 0.49, + "grad_norm": 17.37259184485983, + "learning_rate": 1.7642013454627845e-05, + "loss": 1.4209, + "step": 40989 + }, + { + "epoch": 0.49, + "grad_norm": 26.285312500026873, + "learning_rate": 1.7641636659749157e-05, + "loss": 1.3111, + "step": 40992 + }, + { + "epoch": 0.49, + "grad_norm": 6.112650524558204, + "learning_rate": 1.7641259838792314e-05, + "loss": 1.5359, + "step": 40995 + }, + { + "epoch": 0.49, + "grad_norm": 11.569666476714952, + "learning_rate": 1.7640882991758602e-05, + "loss": 1.543, + "step": 40998 + }, + { + "epoch": 0.49, + "grad_norm": 5.809057671192874, + "learning_rate": 1.7640506118649314e-05, + "loss": 1.4062, + "step": 41001 + }, + { + "epoch": 0.49, + "grad_norm": 13.540242910200933, + "learning_rate": 1.764012921946573e-05, + "loss": 1.6581, + "step": 41004 + }, + { + "epoch": 0.49, + "grad_norm": 17.84292328958143, + "learning_rate": 1.763975229420913e-05, + "loss": 1.2309, + "step": 41007 + }, + { + "epoch": 0.49, + "grad_norm": 9.980997507100504, + "learning_rate": 1.7639375342880815e-05, + "loss": 1.4009, + "step": 41010 + }, + { + "epoch": 0.49, + "grad_norm": 30.923636537741533, + "learning_rate": 1.7638998365482058e-05, + "loss": 1.4586, + "step": 41013 + }, + { + "epoch": 0.49, + "grad_norm": 150.70743420370704, + "learning_rate": 1.763862136201415e-05, + "loss": 1.6888, + "step": 41016 + }, + { + "epoch": 0.49, + "grad_norm": 12.125414317430314, + "learning_rate": 1.7638244332478386e-05, + "loss": 1.384, + "step": 41019 + }, + { + "epoch": 0.49, + "grad_norm": 7.529495493115135, + "learning_rate": 1.7637867276876042e-05, + "loss": 1.2529, + "step": 41022 + }, + { + "epoch": 0.49, + "grad_norm": 7.191782425533533, + "learning_rate": 1.7637490195208407e-05, + "loss": 1.1547, + "step": 41025 + }, + { + "epoch": 0.49, + "grad_norm": 35.01838961172133, + "learning_rate": 1.763711308747677e-05, + "loss": 1.2295, + "step": 41028 + }, + { + "epoch": 0.49, + "grad_norm": 30.39538009220585, + "learning_rate": 1.7636735953682414e-05, + "loss": 1.7658, + "step": 41031 + }, + { + "epoch": 0.49, + "grad_norm": 20.99742399219232, + "learning_rate": 1.763635879382663e-05, + "loss": 1.2675, + "step": 41034 + }, + { + "epoch": 0.49, + "grad_norm": 23.410337974969405, + "learning_rate": 1.7635981607910708e-05, + "loss": 1.1965, + "step": 41037 + }, + { + "epoch": 0.49, + "grad_norm": 9.72385221932749, + "learning_rate": 1.7635604395935928e-05, + "loss": 1.5606, + "step": 41040 + }, + { + "epoch": 0.49, + "grad_norm": 11.961570976375079, + "learning_rate": 1.763522715790358e-05, + "loss": 1.1781, + "step": 41043 + }, + { + "epoch": 0.49, + "grad_norm": 5.919931374152851, + "learning_rate": 1.7634849893814956e-05, + "loss": 1.604, + "step": 41046 + }, + { + "epoch": 0.49, + "grad_norm": 59.73747678323343, + "learning_rate": 1.7634472603671334e-05, + "loss": 1.5515, + "step": 41049 + }, + { + "epoch": 0.49, + "grad_norm": 36.44844148241168, + "learning_rate": 1.763409528747401e-05, + "loss": 1.5558, + "step": 41052 + }, + { + "epoch": 0.49, + "grad_norm": 8.518818170079703, + "learning_rate": 1.763371794522427e-05, + "loss": 1.4137, + "step": 41055 + }, + { + "epoch": 0.49, + "grad_norm": 5.805338439792765, + "learning_rate": 1.76333405769234e-05, + "loss": 1.3232, + "step": 41058 + }, + { + "epoch": 0.49, + "grad_norm": 10.303225887171312, + "learning_rate": 1.763296318257268e-05, + "loss": 1.4784, + "step": 41061 + }, + { + "epoch": 0.49, + "grad_norm": 8.58459050585561, + "learning_rate": 1.7632585762173416e-05, + "loss": 1.4084, + "step": 41064 + }, + { + "epoch": 0.49, + "grad_norm": 14.637373245483673, + "learning_rate": 1.763220831572688e-05, + "loss": 1.2841, + "step": 41067 + }, + { + "epoch": 0.49, + "grad_norm": 7.606904572604609, + "learning_rate": 1.763183084323437e-05, + "loss": 1.2057, + "step": 41070 + }, + { + "epoch": 0.49, + "grad_norm": 13.34011071086928, + "learning_rate": 1.763145334469717e-05, + "loss": 1.4261, + "step": 41073 + }, + { + "epoch": 0.49, + "grad_norm": 16.93703178856505, + "learning_rate": 1.7631075820116567e-05, + "loss": 1.1597, + "step": 41076 + }, + { + "epoch": 0.49, + "grad_norm": 13.220611190500192, + "learning_rate": 1.7630698269493854e-05, + "loss": 1.2866, + "step": 41079 + }, + { + "epoch": 0.49, + "grad_norm": 18.63920665429817, + "learning_rate": 1.7630320692830312e-05, + "loss": 1.3881, + "step": 41082 + }, + { + "epoch": 0.49, + "grad_norm": 6.062163240001447, + "learning_rate": 1.7629943090127236e-05, + "loss": 1.3927, + "step": 41085 + }, + { + "epoch": 0.49, + "grad_norm": 4.883410436061088, + "learning_rate": 1.7629565461385913e-05, + "loss": 1.3491, + "step": 41088 + }, + { + "epoch": 0.49, + "grad_norm": 12.357992059733071, + "learning_rate": 1.7629187806607627e-05, + "loss": 1.7844, + "step": 41091 + }, + { + "epoch": 0.49, + "grad_norm": 14.635166192386574, + "learning_rate": 1.7628810125793677e-05, + "loss": 1.5986, + "step": 41094 + }, + { + "epoch": 0.49, + "grad_norm": 22.415921031166217, + "learning_rate": 1.762843241894534e-05, + "loss": 1.6278, + "step": 41097 + }, + { + "epoch": 0.49, + "grad_norm": 6.327993361707156, + "learning_rate": 1.7628054686063916e-05, + "loss": 1.3363, + "step": 41100 + }, + { + "epoch": 0.49, + "grad_norm": 9.209800815422282, + "learning_rate": 1.762767692715069e-05, + "loss": 1.3237, + "step": 41103 + }, + { + "epoch": 0.49, + "grad_norm": 17.36675528945258, + "learning_rate": 1.7627299142206946e-05, + "loss": 1.5537, + "step": 41106 + }, + { + "epoch": 0.49, + "grad_norm": 20.09593816688537, + "learning_rate": 1.762692133123398e-05, + "loss": 1.1792, + "step": 41109 + }, + { + "epoch": 0.49, + "grad_norm": 20.00996234470728, + "learning_rate": 1.7626543494233078e-05, + "loss": 1.4254, + "step": 41112 + }, + { + "epoch": 0.49, + "grad_norm": 10.058791441491016, + "learning_rate": 1.762616563120553e-05, + "loss": 1.4834, + "step": 41115 + }, + { + "epoch": 0.49, + "grad_norm": 8.151648095070993, + "learning_rate": 1.7625787742152624e-05, + "loss": 1.5677, + "step": 41118 + }, + { + "epoch": 0.49, + "grad_norm": 19.133037823273966, + "learning_rate": 1.7625409827075652e-05, + "loss": 1.3859, + "step": 41121 + }, + { + "epoch": 0.49, + "grad_norm": 5.418347209551914, + "learning_rate": 1.7625031885975903e-05, + "loss": 1.158, + "step": 41124 + }, + { + "epoch": 0.49, + "grad_norm": 4.354060218511223, + "learning_rate": 1.7624653918854665e-05, + "loss": 1.4896, + "step": 41127 + }, + { + "epoch": 0.49, + "grad_norm": 8.765942948973944, + "learning_rate": 1.7624275925713233e-05, + "loss": 1.6556, + "step": 41130 + }, + { + "epoch": 0.49, + "grad_norm": 11.570629603011499, + "learning_rate": 1.7623897906552893e-05, + "loss": 1.1143, + "step": 41133 + }, + { + "epoch": 0.49, + "grad_norm": 6.419124071598281, + "learning_rate": 1.7623519861374933e-05, + "loss": 1.7411, + "step": 41136 + }, + { + "epoch": 0.49, + "grad_norm": 17.871793475447916, + "learning_rate": 1.7623141790180648e-05, + "loss": 1.2006, + "step": 41139 + }, + { + "epoch": 0.49, + "grad_norm": 4.232812866727802, + "learning_rate": 1.7622763692971327e-05, + "loss": 1.1241, + "step": 41142 + }, + { + "epoch": 0.49, + "grad_norm": 22.779793386371328, + "learning_rate": 1.7622385569748257e-05, + "loss": 1.6514, + "step": 41145 + }, + { + "epoch": 0.49, + "grad_norm": 45.91830063383837, + "learning_rate": 1.7622007420512733e-05, + "loss": 1.4833, + "step": 41148 + }, + { + "epoch": 0.49, + "grad_norm": 4.819794426560142, + "learning_rate": 1.7621629245266037e-05, + "loss": 1.304, + "step": 41151 + }, + { + "epoch": 0.49, + "grad_norm": 5.528502918692343, + "learning_rate": 1.7621251044009473e-05, + "loss": 1.1993, + "step": 41154 + }, + { + "epoch": 0.49, + "grad_norm": 8.118677822935451, + "learning_rate": 1.7620872816744322e-05, + "loss": 1.5547, + "step": 41157 + }, + { + "epoch": 0.49, + "grad_norm": 5.385115736539991, + "learning_rate": 1.7620494563471875e-05, + "loss": 1.2248, + "step": 41160 + }, + { + "epoch": 0.49, + "grad_norm": 6.384049517330697, + "learning_rate": 1.7620116284193426e-05, + "loss": 1.2969, + "step": 41163 + }, + { + "epoch": 0.5, + "grad_norm": 14.283829417045657, + "learning_rate": 1.7619737978910266e-05, + "loss": 1.0833, + "step": 41166 + }, + { + "epoch": 0.5, + "grad_norm": 18.102927054151195, + "learning_rate": 1.7619359647623686e-05, + "loss": 1.6708, + "step": 41169 + }, + { + "epoch": 0.5, + "grad_norm": 22.513265774477297, + "learning_rate": 1.7618981290334973e-05, + "loss": 1.3362, + "step": 41172 + }, + { + "epoch": 0.5, + "grad_norm": 7.099358415625954, + "learning_rate": 1.7618602907045424e-05, + "loss": 1.32, + "step": 41175 + }, + { + "epoch": 0.5, + "grad_norm": 10.309188236435634, + "learning_rate": 1.7618224497756325e-05, + "loss": 1.576, + "step": 41178 + }, + { + "epoch": 0.5, + "grad_norm": 15.187932669129149, + "learning_rate": 1.761784606246897e-05, + "loss": 1.4508, + "step": 41181 + }, + { + "epoch": 0.5, + "grad_norm": 23.063491997407425, + "learning_rate": 1.761746760118465e-05, + "loss": 1.3102, + "step": 41184 + }, + { + "epoch": 0.5, + "grad_norm": 8.405380342577633, + "learning_rate": 1.761708911390466e-05, + "loss": 1.5504, + "step": 41187 + }, + { + "epoch": 0.5, + "grad_norm": 7.163715106301699, + "learning_rate": 1.7616710600630285e-05, + "loss": 1.274, + "step": 41190 + }, + { + "epoch": 0.5, + "grad_norm": 6.5923250103753945, + "learning_rate": 1.761633206136282e-05, + "loss": 1.6082, + "step": 41193 + }, + { + "epoch": 0.5, + "grad_norm": 21.159692932572515, + "learning_rate": 1.7615953496103562e-05, + "loss": 1.2517, + "step": 41196 + }, + { + "epoch": 0.5, + "grad_norm": 7.840444717784696, + "learning_rate": 1.7615574904853795e-05, + "loss": 1.4046, + "step": 41199 + }, + { + "epoch": 0.5, + "grad_norm": 27.278055476024967, + "learning_rate": 1.761519628761481e-05, + "loss": 1.4839, + "step": 41202 + }, + { + "epoch": 0.5, + "grad_norm": 9.817257515911184, + "learning_rate": 1.761481764438791e-05, + "loss": 1.1179, + "step": 41205 + }, + { + "epoch": 0.5, + "grad_norm": 11.612268516468061, + "learning_rate": 1.7614438975174378e-05, + "loss": 1.3929, + "step": 41208 + }, + { + "epoch": 0.5, + "grad_norm": 11.63342080350236, + "learning_rate": 1.7614060279975504e-05, + "loss": 1.5286, + "step": 41211 + }, + { + "epoch": 0.5, + "grad_norm": 13.033325002207238, + "learning_rate": 1.761368155879259e-05, + "loss": 1.6073, + "step": 41214 + }, + { + "epoch": 0.5, + "grad_norm": 10.992892119582981, + "learning_rate": 1.761330281162692e-05, + "loss": 1.3884, + "step": 41217 + }, + { + "epoch": 0.5, + "grad_norm": 5.860052560219223, + "learning_rate": 1.7612924038479793e-05, + "loss": 1.5716, + "step": 41220 + }, + { + "epoch": 0.5, + "grad_norm": 9.390578390177012, + "learning_rate": 1.7612545239352498e-05, + "loss": 1.0683, + "step": 41223 + }, + { + "epoch": 0.5, + "grad_norm": 21.8034558297185, + "learning_rate": 1.7612166414246322e-05, + "loss": 1.4704, + "step": 41226 + }, + { + "epoch": 0.5, + "grad_norm": 13.258169198776228, + "learning_rate": 1.761178756316257e-05, + "loss": 1.4064, + "step": 41229 + }, + { + "epoch": 0.5, + "grad_norm": 20.451098425801387, + "learning_rate": 1.761140868610253e-05, + "loss": 1.3927, + "step": 41232 + }, + { + "epoch": 0.5, + "grad_norm": 20.74080342459409, + "learning_rate": 1.761102978306749e-05, + "loss": 1.2284, + "step": 41235 + }, + { + "epoch": 0.5, + "grad_norm": 21.2394916604144, + "learning_rate": 1.7610650854058747e-05, + "loss": 1.3156, + "step": 41238 + }, + { + "epoch": 0.5, + "grad_norm": 20.24826611539098, + "learning_rate": 1.7610271899077596e-05, + "loss": 1.7577, + "step": 41241 + }, + { + "epoch": 0.5, + "grad_norm": 11.125875055784773, + "learning_rate": 1.7609892918125328e-05, + "loss": 1.4343, + "step": 41244 + }, + { + "epoch": 0.5, + "grad_norm": 110.36374834994, + "learning_rate": 1.760951391120324e-05, + "loss": 1.3695, + "step": 41247 + }, + { + "epoch": 0.5, + "grad_norm": 10.355770779604347, + "learning_rate": 1.7609134878312614e-05, + "loss": 1.2781, + "step": 41250 + }, + { + "epoch": 0.5, + "grad_norm": 40.04511821260506, + "learning_rate": 1.7608755819454754e-05, + "loss": 1.544, + "step": 41253 + }, + { + "epoch": 0.5, + "grad_norm": 12.230033452613052, + "learning_rate": 1.7608376734630954e-05, + "loss": 1.2359, + "step": 41256 + }, + { + "epoch": 0.5, + "grad_norm": 5.102444999379667, + "learning_rate": 1.7607997623842503e-05, + "loss": 1.2779, + "step": 41259 + }, + { + "epoch": 0.5, + "grad_norm": 15.371456416589854, + "learning_rate": 1.7607618487090697e-05, + "loss": 1.8615, + "step": 41262 + }, + { + "epoch": 0.5, + "grad_norm": 11.35470984351824, + "learning_rate": 1.7607239324376827e-05, + "loss": 1.179, + "step": 41265 + }, + { + "epoch": 0.5, + "grad_norm": 17.917122853588104, + "learning_rate": 1.7606860135702193e-05, + "loss": 1.1166, + "step": 41268 + }, + { + "epoch": 0.5, + "grad_norm": 57.93021763761577, + "learning_rate": 1.7606480921068082e-05, + "loss": 1.5098, + "step": 41271 + }, + { + "epoch": 0.5, + "grad_norm": 13.620507147569656, + "learning_rate": 1.7606101680475794e-05, + "loss": 1.341, + "step": 41274 + }, + { + "epoch": 0.5, + "grad_norm": 7.390104097255778, + "learning_rate": 1.760572241392662e-05, + "loss": 1.6102, + "step": 41277 + }, + { + "epoch": 0.5, + "grad_norm": 32.55528441861117, + "learning_rate": 1.7605343121421854e-05, + "loss": 1.4732, + "step": 41280 + }, + { + "epoch": 0.5, + "grad_norm": 25.681115173088255, + "learning_rate": 1.7604963802962792e-05, + "loss": 1.2267, + "step": 41283 + }, + { + "epoch": 0.5, + "grad_norm": 7.031060003514903, + "learning_rate": 1.760458445855073e-05, + "loss": 1.3452, + "step": 41286 + }, + { + "epoch": 0.5, + "grad_norm": 14.323225849162709, + "learning_rate": 1.7604205088186955e-05, + "loss": 1.3101, + "step": 41289 + }, + { + "epoch": 0.5, + "grad_norm": 8.140721170563797, + "learning_rate": 1.7603825691872772e-05, + "loss": 1.4156, + "step": 41292 + }, + { + "epoch": 0.5, + "grad_norm": 23.81612431760022, + "learning_rate": 1.760344626960947e-05, + "loss": 1.0722, + "step": 41295 + }, + { + "epoch": 0.5, + "grad_norm": 14.196993098213202, + "learning_rate": 1.7603066821398343e-05, + "loss": 1.5054, + "step": 41298 + }, + { + "epoch": 0.5, + "grad_norm": 4.212924548813788, + "learning_rate": 1.760268734724069e-05, + "loss": 1.6407, + "step": 41301 + }, + { + "epoch": 0.5, + "grad_norm": 33.291346772081084, + "learning_rate": 1.76023078471378e-05, + "loss": 1.4609, + "step": 41304 + }, + { + "epoch": 0.5, + "grad_norm": 20.664682504490028, + "learning_rate": 1.7601928321090974e-05, + "loss": 1.6036, + "step": 41307 + }, + { + "epoch": 0.5, + "grad_norm": 29.26656379470449, + "learning_rate": 1.7601548769101503e-05, + "loss": 1.439, + "step": 41310 + }, + { + "epoch": 0.5, + "grad_norm": 6.98025674310605, + "learning_rate": 1.760116919117069e-05, + "loss": 1.9438, + "step": 41313 + }, + { + "epoch": 0.5, + "grad_norm": 13.507109205379253, + "learning_rate": 1.7600789587299817e-05, + "loss": 1.589, + "step": 41316 + }, + { + "epoch": 0.5, + "grad_norm": 13.259782216658783, + "learning_rate": 1.760040995749019e-05, + "loss": 1.6385, + "step": 41319 + }, + { + "epoch": 0.5, + "grad_norm": 17.456226021343532, + "learning_rate": 1.76000303017431e-05, + "loss": 1.2864, + "step": 41322 + }, + { + "epoch": 0.5, + "grad_norm": 12.027386309715952, + "learning_rate": 1.7599650620059847e-05, + "loss": 1.3591, + "step": 41325 + }, + { + "epoch": 0.5, + "grad_norm": 8.25246394460131, + "learning_rate": 1.7599270912441723e-05, + "loss": 1.2094, + "step": 41328 + }, + { + "epoch": 0.5, + "grad_norm": 77.676681354535, + "learning_rate": 1.7598891178890025e-05, + "loss": 1.3696, + "step": 41331 + }, + { + "epoch": 0.5, + "grad_norm": 7.280286985924846, + "learning_rate": 1.7598511419406047e-05, + "loss": 1.3582, + "step": 41334 + }, + { + "epoch": 0.5, + "grad_norm": 7.475115924177207, + "learning_rate": 1.7598131633991088e-05, + "loss": 1.3881, + "step": 41337 + }, + { + "epoch": 0.5, + "grad_norm": 15.960781964716979, + "learning_rate": 1.759775182264644e-05, + "loss": 1.4561, + "step": 41340 + }, + { + "epoch": 0.5, + "grad_norm": 10.946524557236208, + "learning_rate": 1.7597371985373403e-05, + "loss": 1.5675, + "step": 41343 + }, + { + "epoch": 0.5, + "grad_norm": 15.32355182598446, + "learning_rate": 1.7596992122173274e-05, + "loss": 1.5626, + "step": 41346 + }, + { + "epoch": 0.5, + "grad_norm": 18.74902714778326, + "learning_rate": 1.7596612233047342e-05, + "loss": 1.4768, + "step": 41349 + }, + { + "epoch": 0.5, + "grad_norm": 8.406148190975504, + "learning_rate": 1.7596232317996914e-05, + "loss": 1.2607, + "step": 41352 + }, + { + "epoch": 0.5, + "grad_norm": 11.776238559499328, + "learning_rate": 1.759585237702328e-05, + "loss": 1.4335, + "step": 41355 + }, + { + "epoch": 0.5, + "grad_norm": 14.153139215254921, + "learning_rate": 1.759547241012774e-05, + "loss": 1.6772, + "step": 41358 + }, + { + "epoch": 0.5, + "grad_norm": 9.04444708253373, + "learning_rate": 1.7595092417311583e-05, + "loss": 1.4985, + "step": 41361 + }, + { + "epoch": 0.5, + "grad_norm": 22.20060057837428, + "learning_rate": 1.7594712398576113e-05, + "loss": 1.5036, + "step": 41364 + }, + { + "epoch": 0.5, + "grad_norm": 17.624495792205035, + "learning_rate": 1.7594332353922627e-05, + "loss": 1.5605, + "step": 41367 + }, + { + "epoch": 0.5, + "grad_norm": 5.405203657443054, + "learning_rate": 1.759395228335242e-05, + "loss": 1.3711, + "step": 41370 + }, + { + "epoch": 0.5, + "grad_norm": 27.988683326581626, + "learning_rate": 1.759357218686679e-05, + "loss": 1.3936, + "step": 41373 + }, + { + "epoch": 0.5, + "grad_norm": 44.019341307518616, + "learning_rate": 1.759319206446703e-05, + "loss": 1.1644, + "step": 41376 + }, + { + "epoch": 0.5, + "grad_norm": 15.401649113442206, + "learning_rate": 1.7592811916154443e-05, + "loss": 1.2467, + "step": 41379 + }, + { + "epoch": 0.5, + "grad_norm": 11.499164138877786, + "learning_rate": 1.7592431741930324e-05, + "loss": 1.5396, + "step": 41382 + }, + { + "epoch": 0.5, + "grad_norm": 6.128687490985934, + "learning_rate": 1.7592051541795968e-05, + "loss": 1.5356, + "step": 41385 + }, + { + "epoch": 0.5, + "grad_norm": 8.157044542982376, + "learning_rate": 1.759167131575268e-05, + "loss": 1.4093, + "step": 41388 + }, + { + "epoch": 0.5, + "grad_norm": 9.003141519555589, + "learning_rate": 1.7591291063801748e-05, + "loss": 1.2389, + "step": 41391 + }, + { + "epoch": 0.5, + "grad_norm": 17.10418213336099, + "learning_rate": 1.7590910785944472e-05, + "loss": 2.0049, + "step": 41394 + }, + { + "epoch": 0.5, + "grad_norm": 17.90319961138615, + "learning_rate": 1.7590530482182154e-05, + "loss": 1.3515, + "step": 41397 + }, + { + "epoch": 0.5, + "grad_norm": 9.392678348994416, + "learning_rate": 1.759015015251609e-05, + "loss": 1.3927, + "step": 41400 + }, + { + "epoch": 0.5, + "grad_norm": 11.630833932013932, + "learning_rate": 1.758976979694758e-05, + "loss": 1.1839, + "step": 41403 + }, + { + "epoch": 0.5, + "grad_norm": 10.405809480499567, + "learning_rate": 1.7589389415477915e-05, + "loss": 1.4536, + "step": 41406 + }, + { + "epoch": 0.5, + "grad_norm": 40.33980196327056, + "learning_rate": 1.75890090081084e-05, + "loss": 1.3338, + "step": 41409 + }, + { + "epoch": 0.5, + "grad_norm": 9.089288358859793, + "learning_rate": 1.7588628574840332e-05, + "loss": 1.3627, + "step": 41412 + }, + { + "epoch": 0.5, + "grad_norm": 22.117243300711205, + "learning_rate": 1.7588248115675007e-05, + "loss": 1.5494, + "step": 41415 + }, + { + "epoch": 0.5, + "grad_norm": 56.491599223885515, + "learning_rate": 1.7587867630613723e-05, + "loss": 2.0072, + "step": 41418 + }, + { + "epoch": 0.5, + "grad_norm": 57.95827804506983, + "learning_rate": 1.758748711965778e-05, + "loss": 1.1146, + "step": 41421 + }, + { + "epoch": 0.5, + "grad_norm": 33.00794946549317, + "learning_rate": 1.758710658280848e-05, + "loss": 1.5158, + "step": 41424 + }, + { + "epoch": 0.5, + "grad_norm": 17.298510051189638, + "learning_rate": 1.7586726020067115e-05, + "loss": 1.4082, + "step": 41427 + }, + { + "epoch": 0.5, + "grad_norm": 25.775650454830938, + "learning_rate": 1.7586345431434986e-05, + "loss": 1.6496, + "step": 41430 + }, + { + "epoch": 0.5, + "grad_norm": 9.435327169020669, + "learning_rate": 1.7585964816913393e-05, + "loss": 1.3684, + "step": 41433 + }, + { + "epoch": 0.5, + "grad_norm": 15.08234830058808, + "learning_rate": 1.7585584176503637e-05, + "loss": 1.1553, + "step": 41436 + }, + { + "epoch": 0.5, + "grad_norm": 14.01459221022665, + "learning_rate": 1.7585203510207013e-05, + "loss": 1.3567, + "step": 41439 + }, + { + "epoch": 0.5, + "grad_norm": 14.043078350721183, + "learning_rate": 1.758482281802482e-05, + "loss": 1.5712, + "step": 41442 + }, + { + "epoch": 0.5, + "grad_norm": 2.29586449112594, + "learning_rate": 1.758444209995836e-05, + "loss": 1.5408, + "step": 41445 + }, + { + "epoch": 0.5, + "grad_norm": 7.275865644577054, + "learning_rate": 1.7584061356008934e-05, + "loss": 1.6112, + "step": 41448 + }, + { + "epoch": 0.5, + "grad_norm": 6.802455744122888, + "learning_rate": 1.7583680586177832e-05, + "loss": 1.5403, + "step": 41451 + }, + { + "epoch": 0.5, + "grad_norm": 8.910417420924974, + "learning_rate": 1.7583299790466365e-05, + "loss": 1.3541, + "step": 41454 + }, + { + "epoch": 0.5, + "grad_norm": 3.1614028410122037, + "learning_rate": 1.7582918968875828e-05, + "loss": 1.2627, + "step": 41457 + }, + { + "epoch": 0.5, + "grad_norm": 2.5692641173446953, + "learning_rate": 1.7582538121407516e-05, + "loss": 1.8316, + "step": 41460 + }, + { + "epoch": 0.5, + "grad_norm": 13.922933803474677, + "learning_rate": 1.7582157248062735e-05, + "loss": 1.5775, + "step": 41463 + }, + { + "epoch": 0.5, + "grad_norm": 5.324056468789496, + "learning_rate": 1.758177634884278e-05, + "loss": 1.7567, + "step": 41466 + }, + { + "epoch": 0.5, + "grad_norm": 65.16199194786738, + "learning_rate": 1.7581395423748957e-05, + "loss": 1.4328, + "step": 41469 + }, + { + "epoch": 0.5, + "grad_norm": 12.287930620032993, + "learning_rate": 1.7581014472782558e-05, + "loss": 1.6623, + "step": 41472 + }, + { + "epoch": 0.5, + "grad_norm": 13.261418066789245, + "learning_rate": 1.758063349594489e-05, + "loss": 1.3165, + "step": 41475 + }, + { + "epoch": 0.5, + "grad_norm": 17.172941927740332, + "learning_rate": 1.758025249323725e-05, + "loss": 1.6615, + "step": 41478 + }, + { + "epoch": 0.5, + "grad_norm": 4.3199326146464285, + "learning_rate": 1.757987146466094e-05, + "loss": 1.3931, + "step": 41481 + }, + { + "epoch": 0.5, + "grad_norm": 19.72766235575647, + "learning_rate": 1.7579490410217256e-05, + "loss": 1.521, + "step": 41484 + }, + { + "epoch": 0.5, + "grad_norm": 22.90328060958513, + "learning_rate": 1.7579109329907505e-05, + "loss": 1.528, + "step": 41487 + }, + { + "epoch": 0.5, + "grad_norm": 23.717609771821664, + "learning_rate": 1.757872822373298e-05, + "loss": 1.5092, + "step": 41490 + }, + { + "epoch": 0.5, + "grad_norm": 17.733017946351914, + "learning_rate": 1.757834709169499e-05, + "loss": 1.458, + "step": 41493 + }, + { + "epoch": 0.5, + "grad_norm": 12.453444984199633, + "learning_rate": 1.7577965933794824e-05, + "loss": 1.4505, + "step": 41496 + }, + { + "epoch": 0.5, + "grad_norm": 13.00919578326097, + "learning_rate": 1.7577584750033792e-05, + "loss": 1.3555, + "step": 41499 + }, + { + "epoch": 0.5, + "grad_norm": 13.097614183370652, + "learning_rate": 1.7577203540413197e-05, + "loss": 1.5466, + "step": 41502 + }, + { + "epoch": 0.5, + "grad_norm": 22.773540613768997, + "learning_rate": 1.7576822304934332e-05, + "loss": 1.4999, + "step": 41505 + }, + { + "epoch": 0.5, + "grad_norm": 11.280344493707902, + "learning_rate": 1.7576441043598506e-05, + "loss": 1.4177, + "step": 41508 + }, + { + "epoch": 0.5, + "grad_norm": 7.6736236666961135, + "learning_rate": 1.7576059756407012e-05, + "loss": 1.5839, + "step": 41511 + }, + { + "epoch": 0.5, + "grad_norm": 22.043093257066495, + "learning_rate": 1.7575678443361156e-05, + "loss": 1.3149, + "step": 41514 + }, + { + "epoch": 0.5, + "grad_norm": 10.982831099133607, + "learning_rate": 1.757529710446224e-05, + "loss": 1.5125, + "step": 41517 + }, + { + "epoch": 0.5, + "grad_norm": 9.8684132717799, + "learning_rate": 1.7574915739711556e-05, + "loss": 1.2115, + "step": 41520 + }, + { + "epoch": 0.5, + "grad_norm": 16.135061938537344, + "learning_rate": 1.757453434911042e-05, + "loss": 1.1879, + "step": 41523 + }, + { + "epoch": 0.5, + "grad_norm": 19.295948049148137, + "learning_rate": 1.7574152932660125e-05, + "loss": 1.5798, + "step": 41526 + }, + { + "epoch": 0.5, + "grad_norm": 8.462355168268925, + "learning_rate": 1.7573771490361974e-05, + "loss": 1.4034, + "step": 41529 + }, + { + "epoch": 0.5, + "grad_norm": 65.1945860119003, + "learning_rate": 1.757339002221727e-05, + "loss": 1.4275, + "step": 41532 + }, + { + "epoch": 0.5, + "grad_norm": 12.198143342072338, + "learning_rate": 1.757300852822731e-05, + "loss": 1.6519, + "step": 41535 + }, + { + "epoch": 0.5, + "grad_norm": 24.74948405894881, + "learning_rate": 1.7572627008393404e-05, + "loss": 1.9896, + "step": 41538 + }, + { + "epoch": 0.5, + "grad_norm": 22.292829816562065, + "learning_rate": 1.7572245462716847e-05, + "loss": 1.753, + "step": 41541 + }, + { + "epoch": 0.5, + "grad_norm": 26.244716710942427, + "learning_rate": 1.757186389119895e-05, + "loss": 1.5162, + "step": 41544 + }, + { + "epoch": 0.5, + "grad_norm": 16.272334159765805, + "learning_rate": 1.7571482293841e-05, + "loss": 1.4228, + "step": 41547 + }, + { + "epoch": 0.5, + "grad_norm": 28.862543252002467, + "learning_rate": 1.7571100670644315e-05, + "loss": 1.4666, + "step": 41550 + }, + { + "epoch": 0.5, + "grad_norm": 18.314116562037007, + "learning_rate": 1.757071902161019e-05, + "loss": 1.282, + "step": 41553 + }, + { + "epoch": 0.5, + "grad_norm": 9.492107197862104, + "learning_rate": 1.757033734673992e-05, + "loss": 1.5367, + "step": 41556 + }, + { + "epoch": 0.5, + "grad_norm": 29.539294709181796, + "learning_rate": 1.756995564603482e-05, + "loss": 1.1607, + "step": 41559 + }, + { + "epoch": 0.5, + "grad_norm": 4.961168889933896, + "learning_rate": 1.7569573919496194e-05, + "loss": 1.2852, + "step": 41562 + }, + { + "epoch": 0.5, + "grad_norm": 21.38615646952985, + "learning_rate": 1.7569192167125332e-05, + "loss": 1.4529, + "step": 41565 + }, + { + "epoch": 0.5, + "grad_norm": 9.209260468322617, + "learning_rate": 1.7568810388923547e-05, + "loss": 1.3603, + "step": 41568 + }, + { + "epoch": 0.5, + "grad_norm": 15.783200179658095, + "learning_rate": 1.7568428584892136e-05, + "loss": 1.4476, + "step": 41571 + }, + { + "epoch": 0.5, + "grad_norm": 11.007715435163952, + "learning_rate": 1.7568046755032404e-05, + "loss": 1.1125, + "step": 41574 + }, + { + "epoch": 0.5, + "grad_norm": 13.376359445791296, + "learning_rate": 1.7567664899345657e-05, + "loss": 1.5032, + "step": 41577 + }, + { + "epoch": 0.5, + "grad_norm": 23.907414001250277, + "learning_rate": 1.7567283017833195e-05, + "loss": 1.3596, + "step": 41580 + }, + { + "epoch": 0.5, + "grad_norm": 16.158732407053318, + "learning_rate": 1.756690111049632e-05, + "loss": 1.0471, + "step": 41583 + }, + { + "epoch": 0.5, + "grad_norm": 18.540411348722156, + "learning_rate": 1.756651917733634e-05, + "loss": 1.5583, + "step": 41586 + }, + { + "epoch": 0.5, + "grad_norm": 10.362584853162716, + "learning_rate": 1.7566137218354552e-05, + "loss": 1.2912, + "step": 41589 + }, + { + "epoch": 0.5, + "grad_norm": 28.073330779434496, + "learning_rate": 1.7565755233552263e-05, + "loss": 1.4986, + "step": 41592 + }, + { + "epoch": 0.5, + "grad_norm": 13.59788232262556, + "learning_rate": 1.756537322293078e-05, + "loss": 1.5583, + "step": 41595 + }, + { + "epoch": 0.5, + "grad_norm": 13.03275831419668, + "learning_rate": 1.7564991186491404e-05, + "loss": 1.1645, + "step": 41598 + }, + { + "epoch": 0.5, + "grad_norm": 48.369988490436654, + "learning_rate": 1.7564609124235433e-05, + "loss": 1.1938, + "step": 41601 + }, + { + "epoch": 0.5, + "grad_norm": 14.159494172338182, + "learning_rate": 1.7564227036164178e-05, + "loss": 1.5066, + "step": 41604 + }, + { + "epoch": 0.5, + "grad_norm": 25.384159909940767, + "learning_rate": 1.756384492227894e-05, + "loss": 1.4437, + "step": 41607 + }, + { + "epoch": 0.5, + "grad_norm": 39.99375456913722, + "learning_rate": 1.7563462782581026e-05, + "loss": 1.4486, + "step": 41610 + }, + { + "epoch": 0.5, + "grad_norm": 38.65781202419313, + "learning_rate": 1.7563080617071735e-05, + "loss": 1.9916, + "step": 41613 + }, + { + "epoch": 0.5, + "grad_norm": 11.856464148589902, + "learning_rate": 1.7562698425752375e-05, + "loss": 1.4216, + "step": 41616 + }, + { + "epoch": 0.5, + "grad_norm": 26.72502585990808, + "learning_rate": 1.756231620862425e-05, + "loss": 1.7909, + "step": 41619 + }, + { + "epoch": 0.5, + "grad_norm": 6.62825971343529, + "learning_rate": 1.7561933965688666e-05, + "loss": 1.6449, + "step": 41622 + }, + { + "epoch": 0.5, + "grad_norm": 10.373226178957792, + "learning_rate": 1.7561551696946923e-05, + "loss": 1.3559, + "step": 41625 + }, + { + "epoch": 0.5, + "grad_norm": 5.857156323461929, + "learning_rate": 1.7561169402400328e-05, + "loss": 1.4731, + "step": 41628 + }, + { + "epoch": 0.5, + "grad_norm": 8.549630001495476, + "learning_rate": 1.7560787082050185e-05, + "loss": 1.6268, + "step": 41631 + }, + { + "epoch": 0.5, + "grad_norm": 19.675383412955526, + "learning_rate": 1.7560404735897796e-05, + "loss": 1.6405, + "step": 41634 + }, + { + "epoch": 0.5, + "grad_norm": 6.104496660299679, + "learning_rate": 1.7560022363944472e-05, + "loss": 1.5971, + "step": 41637 + }, + { + "epoch": 0.5, + "grad_norm": 10.014841170049595, + "learning_rate": 1.7559639966191517e-05, + "loss": 1.4364, + "step": 41640 + }, + { + "epoch": 0.5, + "grad_norm": 5.829369253150342, + "learning_rate": 1.7559257542640228e-05, + "loss": 1.6973, + "step": 41643 + }, + { + "epoch": 0.5, + "grad_norm": 8.868151745732687, + "learning_rate": 1.755887509329192e-05, + "loss": 1.3905, + "step": 41646 + }, + { + "epoch": 0.5, + "grad_norm": 41.640287304977825, + "learning_rate": 1.7558492618147897e-05, + "loss": 1.7161, + "step": 41649 + }, + { + "epoch": 0.5, + "grad_norm": 6.225436218862525, + "learning_rate": 1.7558110117209456e-05, + "loss": 1.3577, + "step": 41652 + }, + { + "epoch": 0.5, + "grad_norm": 24.22897981898829, + "learning_rate": 1.755772759047791e-05, + "loss": 1.738, + "step": 41655 + }, + { + "epoch": 0.5, + "grad_norm": 5.392280202248758, + "learning_rate": 1.755734503795456e-05, + "loss": 1.3662, + "step": 41658 + }, + { + "epoch": 0.5, + "grad_norm": 31.178857746142207, + "learning_rate": 1.7556962459640715e-05, + "loss": 1.5429, + "step": 41661 + }, + { + "epoch": 0.5, + "grad_norm": 7.504950306931028, + "learning_rate": 1.7556579855537676e-05, + "loss": 1.3691, + "step": 41664 + }, + { + "epoch": 0.5, + "grad_norm": 23.602630490614487, + "learning_rate": 1.7556197225646756e-05, + "loss": 1.2288, + "step": 41667 + }, + { + "epoch": 0.5, + "grad_norm": 20.192814981554385, + "learning_rate": 1.7555814569969257e-05, + "loss": 1.3002, + "step": 41670 + }, + { + "epoch": 0.5, + "grad_norm": 7.845757343005381, + "learning_rate": 1.755543188850648e-05, + "loss": 1.379, + "step": 41673 + }, + { + "epoch": 0.5, + "grad_norm": 16.446302554398596, + "learning_rate": 1.755504918125974e-05, + "loss": 1.1889, + "step": 41676 + }, + { + "epoch": 0.5, + "grad_norm": 3.1086140656434984, + "learning_rate": 1.7554666448230334e-05, + "loss": 1.7724, + "step": 41679 + }, + { + "epoch": 0.5, + "grad_norm": 7.99987626480386, + "learning_rate": 1.7554283689419576e-05, + "loss": 1.3384, + "step": 41682 + }, + { + "epoch": 0.5, + "grad_norm": 5.114290771141166, + "learning_rate": 1.7553900904828768e-05, + "loss": 1.4826, + "step": 41685 + }, + { + "epoch": 0.5, + "grad_norm": 32.57298074842399, + "learning_rate": 1.7553518094459217e-05, + "loss": 1.2588, + "step": 41688 + }, + { + "epoch": 0.5, + "grad_norm": 14.118325794991843, + "learning_rate": 1.7553135258312227e-05, + "loss": 1.1745, + "step": 41691 + }, + { + "epoch": 0.5, + "grad_norm": 25.756278533231956, + "learning_rate": 1.7552752396389113e-05, + "loss": 1.363, + "step": 41694 + }, + { + "epoch": 0.5, + "grad_norm": 19.91879821761585, + "learning_rate": 1.755236950869117e-05, + "loss": 1.7725, + "step": 41697 + }, + { + "epoch": 0.5, + "grad_norm": 2.8271919857231684, + "learning_rate": 1.7551986595219714e-05, + "loss": 1.9394, + "step": 41700 + }, + { + "epoch": 0.5, + "grad_norm": 4.748287640650681, + "learning_rate": 1.7551603655976045e-05, + "loss": 1.6914, + "step": 41703 + }, + { + "epoch": 0.5, + "grad_norm": 8.089139587281025, + "learning_rate": 1.7551220690961476e-05, + "loss": 1.2698, + "step": 41706 + }, + { + "epoch": 0.5, + "grad_norm": 10.811663599046675, + "learning_rate": 1.7550837700177308e-05, + "loss": 1.465, + "step": 41709 + }, + { + "epoch": 0.5, + "grad_norm": 12.303905439147117, + "learning_rate": 1.755045468362485e-05, + "loss": 1.0084, + "step": 41712 + }, + { + "epoch": 0.5, + "grad_norm": 17.04684806591847, + "learning_rate": 1.7550071641305413e-05, + "loss": 1.1959, + "step": 41715 + }, + { + "epoch": 0.5, + "grad_norm": 33.41400147121254, + "learning_rate": 1.7549688573220297e-05, + "loss": 1.4876, + "step": 41718 + }, + { + "epoch": 0.5, + "grad_norm": 21.348672946499025, + "learning_rate": 1.7549305479370817e-05, + "loss": 1.5437, + "step": 41721 + }, + { + "epoch": 0.5, + "grad_norm": 6.213638778648449, + "learning_rate": 1.7548922359758275e-05, + "loss": 1.6699, + "step": 41724 + }, + { + "epoch": 0.5, + "grad_norm": 18.886166093194674, + "learning_rate": 1.754853921438398e-05, + "loss": 1.4678, + "step": 41727 + }, + { + "epoch": 0.5, + "grad_norm": 6.164485561496895, + "learning_rate": 1.7548156043249244e-05, + "loss": 2.1045, + "step": 41730 + }, + { + "epoch": 0.5, + "grad_norm": 26.009611868151147, + "learning_rate": 1.7547772846355363e-05, + "loss": 1.5508, + "step": 41733 + }, + { + "epoch": 0.5, + "grad_norm": 22.75861581773794, + "learning_rate": 1.7547389623703655e-05, + "loss": 2.1365, + "step": 41736 + }, + { + "epoch": 0.5, + "grad_norm": 30.371647286092692, + "learning_rate": 1.7547006375295426e-05, + "loss": 1.4132, + "step": 41739 + }, + { + "epoch": 0.5, + "grad_norm": 13.424679282609988, + "learning_rate": 1.7546623101131982e-05, + "loss": 1.2514, + "step": 41742 + }, + { + "epoch": 0.5, + "grad_norm": 17.09029285788415, + "learning_rate": 1.754623980121463e-05, + "loss": 1.6168, + "step": 41745 + }, + { + "epoch": 0.5, + "grad_norm": 64.00604219406338, + "learning_rate": 1.754585647554468e-05, + "loss": 1.6764, + "step": 41748 + }, + { + "epoch": 0.5, + "grad_norm": 10.779915758571931, + "learning_rate": 1.7545473124123442e-05, + "loss": 1.5349, + "step": 41751 + }, + { + "epoch": 0.5, + "grad_norm": 8.798265396860893, + "learning_rate": 1.7545089746952222e-05, + "loss": 1.3426, + "step": 41754 + }, + { + "epoch": 0.5, + "grad_norm": 21.012907504405025, + "learning_rate": 1.7544706344032326e-05, + "loss": 1.5738, + "step": 41757 + }, + { + "epoch": 0.5, + "grad_norm": 12.242444667573757, + "learning_rate": 1.7544322915365068e-05, + "loss": 1.4711, + "step": 41760 + }, + { + "epoch": 0.5, + "grad_norm": 7.359318299394074, + "learning_rate": 1.7543939460951752e-05, + "loss": 1.6686, + "step": 41763 + }, + { + "epoch": 0.5, + "grad_norm": 7.91818910411211, + "learning_rate": 1.7543555980793686e-05, + "loss": 1.8501, + "step": 41766 + }, + { + "epoch": 0.5, + "grad_norm": 12.56232551101395, + "learning_rate": 1.754317247489218e-05, + "loss": 1.7002, + "step": 41769 + }, + { + "epoch": 0.5, + "grad_norm": 18.45746333742449, + "learning_rate": 1.754278894324855e-05, + "loss": 1.491, + "step": 41772 + }, + { + "epoch": 0.5, + "grad_norm": 30.117041262044488, + "learning_rate": 1.754240538586409e-05, + "loss": 1.5399, + "step": 41775 + }, + { + "epoch": 0.5, + "grad_norm": 9.622562629834963, + "learning_rate": 1.754202180274012e-05, + "loss": 1.3595, + "step": 41778 + }, + { + "epoch": 0.5, + "grad_norm": 5.181022161952222, + "learning_rate": 1.754163819387795e-05, + "loss": 1.1562, + "step": 41781 + }, + { + "epoch": 0.5, + "grad_norm": 3.1004222683685727, + "learning_rate": 1.7541254559278882e-05, + "loss": 1.3027, + "step": 41784 + }, + { + "epoch": 0.5, + "grad_norm": 64.36207710507374, + "learning_rate": 1.754087089894423e-05, + "loss": 1.2875, + "step": 41787 + }, + { + "epoch": 0.5, + "grad_norm": 148.82414710342556, + "learning_rate": 1.7540487212875297e-05, + "loss": 1.2661, + "step": 41790 + }, + { + "epoch": 0.5, + "grad_norm": 67.4633110621448, + "learning_rate": 1.7540103501073405e-05, + "loss": 1.6903, + "step": 41793 + }, + { + "epoch": 0.5, + "grad_norm": 35.25442996529379, + "learning_rate": 1.7539719763539852e-05, + "loss": 1.4653, + "step": 41796 + }, + { + "epoch": 0.5, + "grad_norm": 9.573219401749698, + "learning_rate": 1.7539336000275948e-05, + "loss": 0.9813, + "step": 41799 + }, + { + "epoch": 0.5, + "grad_norm": 9.018375481784295, + "learning_rate": 1.7538952211283012e-05, + "loss": 1.3644, + "step": 41802 + }, + { + "epoch": 0.5, + "grad_norm": 11.179787950300542, + "learning_rate": 1.7538568396562345e-05, + "loss": 1.3483, + "step": 41805 + }, + { + "epoch": 0.5, + "grad_norm": 14.65799733044767, + "learning_rate": 1.7538184556115257e-05, + "loss": 1.4872, + "step": 41808 + }, + { + "epoch": 0.5, + "grad_norm": 6.421724060378752, + "learning_rate": 1.7537800689943064e-05, + "loss": 1.3186, + "step": 41811 + }, + { + "epoch": 0.5, + "grad_norm": 32.69373682960947, + "learning_rate": 1.753741679804707e-05, + "loss": 1.489, + "step": 41814 + }, + { + "epoch": 0.5, + "grad_norm": 9.193559535718974, + "learning_rate": 1.7537032880428586e-05, + "loss": 1.3317, + "step": 41817 + }, + { + "epoch": 0.5, + "grad_norm": 13.795387724144561, + "learning_rate": 1.7536648937088923e-05, + "loss": 1.4717, + "step": 41820 + }, + { + "epoch": 0.5, + "grad_norm": 41.41783797626638, + "learning_rate": 1.7536264968029396e-05, + "loss": 1.5193, + "step": 41823 + }, + { + "epoch": 0.5, + "grad_norm": 21.930672897876878, + "learning_rate": 1.753588097325131e-05, + "loss": 1.6471, + "step": 41826 + }, + { + "epoch": 0.5, + "grad_norm": 11.432600867731804, + "learning_rate": 1.753549695275598e-05, + "loss": 1.4376, + "step": 41829 + }, + { + "epoch": 0.5, + "grad_norm": 13.762758726395164, + "learning_rate": 1.753511290654471e-05, + "loss": 1.134, + "step": 41832 + }, + { + "epoch": 0.5, + "grad_norm": 19.196092420648895, + "learning_rate": 1.7534728834618812e-05, + "loss": 1.4755, + "step": 41835 + }, + { + "epoch": 0.5, + "grad_norm": 17.21876135357034, + "learning_rate": 1.75343447369796e-05, + "loss": 1.6208, + "step": 41838 + }, + { + "epoch": 0.5, + "grad_norm": 11.985055274618873, + "learning_rate": 1.753396061362838e-05, + "loss": 1.3086, + "step": 41841 + }, + { + "epoch": 0.5, + "grad_norm": 6.768961819435706, + "learning_rate": 1.753357646456647e-05, + "loss": 1.1665, + "step": 41844 + }, + { + "epoch": 0.5, + "grad_norm": 18.356538331480394, + "learning_rate": 1.7533192289795177e-05, + "loss": 1.3784, + "step": 41847 + }, + { + "epoch": 0.5, + "grad_norm": 13.368477945523777, + "learning_rate": 1.753280808931581e-05, + "loss": 1.1391, + "step": 41850 + }, + { + "epoch": 0.5, + "grad_norm": 27.398000586429596, + "learning_rate": 1.7532423863129685e-05, + "loss": 1.5668, + "step": 41853 + }, + { + "epoch": 0.5, + "grad_norm": 6.752032638322254, + "learning_rate": 1.753203961123811e-05, + "loss": 1.3246, + "step": 41856 + }, + { + "epoch": 0.5, + "grad_norm": 6.508665648613781, + "learning_rate": 1.7531655333642396e-05, + "loss": 1.2255, + "step": 41859 + }, + { + "epoch": 0.5, + "grad_norm": 23.15577297646549, + "learning_rate": 1.7531271030343854e-05, + "loss": 1.2818, + "step": 41862 + }, + { + "epoch": 0.5, + "grad_norm": 24.050539270861105, + "learning_rate": 1.7530886701343802e-05, + "loss": 1.6473, + "step": 41865 + }, + { + "epoch": 0.5, + "grad_norm": 47.28700987185761, + "learning_rate": 1.7530502346643537e-05, + "loss": 1.8333, + "step": 41868 + }, + { + "epoch": 0.5, + "grad_norm": 13.408662673139675, + "learning_rate": 1.7530117966244388e-05, + "loss": 1.3169, + "step": 41871 + }, + { + "epoch": 0.5, + "grad_norm": 12.105877136748566, + "learning_rate": 1.7529733560147658e-05, + "loss": 1.2294, + "step": 41874 + }, + { + "epoch": 0.5, + "grad_norm": 6.8285945761675615, + "learning_rate": 1.7529349128354656e-05, + "loss": 1.4039, + "step": 41877 + }, + { + "epoch": 0.5, + "grad_norm": 9.284224207904352, + "learning_rate": 1.75289646708667e-05, + "loss": 1.3446, + "step": 41880 + }, + { + "epoch": 0.5, + "grad_norm": 14.793500271360243, + "learning_rate": 1.75285801876851e-05, + "loss": 1.2859, + "step": 41883 + }, + { + "epoch": 0.5, + "grad_norm": 13.251458228729032, + "learning_rate": 1.7528195678811163e-05, + "loss": 1.27, + "step": 41886 + }, + { + "epoch": 0.5, + "grad_norm": 11.68904133546648, + "learning_rate": 1.752781114424621e-05, + "loss": 1.4097, + "step": 41889 + }, + { + "epoch": 0.5, + "grad_norm": 19.537781272361233, + "learning_rate": 1.752742658399155e-05, + "loss": 1.6773, + "step": 41892 + }, + { + "epoch": 0.5, + "grad_norm": 6.313135109399867, + "learning_rate": 1.7527041998048492e-05, + "loss": 1.1797, + "step": 41895 + }, + { + "epoch": 0.5, + "grad_norm": 12.173099131430869, + "learning_rate": 1.752665738641835e-05, + "loss": 1.4374, + "step": 41898 + }, + { + "epoch": 0.5, + "grad_norm": 7.358859916258889, + "learning_rate": 1.752627274910244e-05, + "loss": 1.6655, + "step": 41901 + }, + { + "epoch": 0.5, + "grad_norm": 10.463918128413766, + "learning_rate": 1.752588808610207e-05, + "loss": 1.8584, + "step": 41904 + }, + { + "epoch": 0.5, + "grad_norm": 66.32395922934597, + "learning_rate": 1.7525503397418557e-05, + "loss": 1.4162, + "step": 41907 + }, + { + "epoch": 0.5, + "grad_norm": 8.528874938992205, + "learning_rate": 1.752511868305321e-05, + "loss": 1.4798, + "step": 41910 + }, + { + "epoch": 0.5, + "grad_norm": 15.326987569332376, + "learning_rate": 1.7524733943007342e-05, + "loss": 1.4991, + "step": 41913 + }, + { + "epoch": 0.5, + "grad_norm": 31.50453824285082, + "learning_rate": 1.752434917728227e-05, + "loss": 1.5013, + "step": 41916 + }, + { + "epoch": 0.5, + "grad_norm": 4.5990872378517125, + "learning_rate": 1.7523964385879305e-05, + "loss": 1.6461, + "step": 41919 + }, + { + "epoch": 0.5, + "grad_norm": 20.164264861217827, + "learning_rate": 1.7523579568799756e-05, + "loss": 1.6418, + "step": 41922 + }, + { + "epoch": 0.5, + "grad_norm": 3.7984708472025903, + "learning_rate": 1.7523194726044942e-05, + "loss": 1.2653, + "step": 41925 + }, + { + "epoch": 0.5, + "grad_norm": 7.524938222153938, + "learning_rate": 1.7522809857616176e-05, + "loss": 1.5152, + "step": 41928 + }, + { + "epoch": 0.5, + "grad_norm": 24.555854278245786, + "learning_rate": 1.752242496351477e-05, + "loss": 1.43, + "step": 41931 + }, + { + "epoch": 0.5, + "grad_norm": 11.271584355385214, + "learning_rate": 1.7522040043742034e-05, + "loss": 1.3527, + "step": 41934 + }, + { + "epoch": 0.5, + "grad_norm": 16.16639842170905, + "learning_rate": 1.7521655098299284e-05, + "loss": 1.0652, + "step": 41937 + }, + { + "epoch": 0.5, + "grad_norm": 15.708669690596784, + "learning_rate": 1.7521270127187837e-05, + "loss": 1.6563, + "step": 41940 + }, + { + "epoch": 0.5, + "grad_norm": 15.616385203535742, + "learning_rate": 1.7520885130409005e-05, + "loss": 1.5226, + "step": 41943 + }, + { + "epoch": 0.5, + "grad_norm": 16.324271665969626, + "learning_rate": 1.75205001079641e-05, + "loss": 1.5459, + "step": 41946 + }, + { + "epoch": 0.5, + "grad_norm": 8.132831475787345, + "learning_rate": 1.7520115059854438e-05, + "loss": 1.3018, + "step": 41949 + }, + { + "epoch": 0.5, + "grad_norm": 6.619091440728829, + "learning_rate": 1.7519729986081327e-05, + "loss": 1.5155, + "step": 41952 + }, + { + "epoch": 0.5, + "grad_norm": 12.207097325526917, + "learning_rate": 1.7519344886646094e-05, + "loss": 1.4775, + "step": 41955 + }, + { + "epoch": 0.5, + "grad_norm": 17.61569879141816, + "learning_rate": 1.7518959761550037e-05, + "loss": 1.3314, + "step": 41958 + }, + { + "epoch": 0.5, + "grad_norm": 230.64211034198425, + "learning_rate": 1.7518574610794487e-05, + "loss": 1.6222, + "step": 41961 + }, + { + "epoch": 0.5, + "grad_norm": 11.867201903902364, + "learning_rate": 1.7518189434380746e-05, + "loss": 1.2576, + "step": 41964 + }, + { + "epoch": 0.5, + "grad_norm": 13.603568825331044, + "learning_rate": 1.751780423231013e-05, + "loss": 1.5357, + "step": 41967 + }, + { + "epoch": 0.5, + "grad_norm": 12.611104931221602, + "learning_rate": 1.751741900458396e-05, + "loss": 1.0808, + "step": 41970 + }, + { + "epoch": 0.5, + "grad_norm": 7.801066880628433, + "learning_rate": 1.7517033751203545e-05, + "loss": 1.5098, + "step": 41973 + }, + { + "epoch": 0.5, + "grad_norm": 37.147380676284584, + "learning_rate": 1.75166484721702e-05, + "loss": 1.326, + "step": 41976 + }, + { + "epoch": 0.5, + "grad_norm": 30.5775003898781, + "learning_rate": 1.7516263167485243e-05, + "loss": 1.5583, + "step": 41979 + }, + { + "epoch": 0.5, + "grad_norm": 26.527589184769194, + "learning_rate": 1.7515877837149987e-05, + "loss": 1.5949, + "step": 41982 + }, + { + "epoch": 0.5, + "grad_norm": 13.169962003345253, + "learning_rate": 1.751549248116575e-05, + "loss": 1.1318, + "step": 41985 + }, + { + "epoch": 0.5, + "grad_norm": 8.320226169612868, + "learning_rate": 1.751510709953384e-05, + "loss": 1.4083, + "step": 41988 + }, + { + "epoch": 0.5, + "grad_norm": 17.635509981082336, + "learning_rate": 1.751472169225558e-05, + "loss": 1.4539, + "step": 41991 + }, + { + "epoch": 0.5, + "grad_norm": 12.559483432746397, + "learning_rate": 1.7514336259332277e-05, + "loss": 1.2698, + "step": 41994 + }, + { + "epoch": 0.51, + "grad_norm": 13.011370561463302, + "learning_rate": 1.7513950800765255e-05, + "loss": 1.3796, + "step": 41997 + }, + { + "epoch": 0.51, + "grad_norm": 40.1431299616541, + "learning_rate": 1.751356531655582e-05, + "loss": 1.5142, + "step": 42000 + }, + { + "epoch": 0.51, + "grad_norm": 30.413020442190092, + "learning_rate": 1.75131798067053e-05, + "loss": 1.5744, + "step": 42003 + }, + { + "epoch": 0.51, + "grad_norm": 41.70295265639522, + "learning_rate": 1.7512794271215e-05, + "loss": 1.6047, + "step": 42006 + }, + { + "epoch": 0.51, + "grad_norm": 84.12447192352835, + "learning_rate": 1.7512408710086243e-05, + "loss": 1.7411, + "step": 42009 + }, + { + "epoch": 0.51, + "grad_norm": 22.93882946696867, + "learning_rate": 1.7512023123320337e-05, + "loss": 1.466, + "step": 42012 + }, + { + "epoch": 0.51, + "grad_norm": 14.662868125645069, + "learning_rate": 1.75116375109186e-05, + "loss": 1.1375, + "step": 42015 + }, + { + "epoch": 0.51, + "grad_norm": 44.00148565756611, + "learning_rate": 1.7511251872882352e-05, + "loss": 1.1889, + "step": 42018 + }, + { + "epoch": 0.51, + "grad_norm": 6.566080404207128, + "learning_rate": 1.751086620921291e-05, + "loss": 1.5103, + "step": 42021 + }, + { + "epoch": 0.51, + "grad_norm": 7.269557331305965, + "learning_rate": 1.7510480519911584e-05, + "loss": 1.0922, + "step": 42024 + }, + { + "epoch": 0.51, + "grad_norm": 42.52882572557147, + "learning_rate": 1.7510094804979693e-05, + "loss": 1.3752, + "step": 42027 + }, + { + "epoch": 0.51, + "grad_norm": 11.620912138898765, + "learning_rate": 1.7509709064418557e-05, + "loss": 1.786, + "step": 42030 + }, + { + "epoch": 0.51, + "grad_norm": 11.126502055260177, + "learning_rate": 1.7509323298229484e-05, + "loss": 1.397, + "step": 42033 + }, + { + "epoch": 0.51, + "grad_norm": 62.60855792950381, + "learning_rate": 1.7508937506413798e-05, + "loss": 1.7994, + "step": 42036 + }, + { + "epoch": 0.51, + "grad_norm": 9.945807787244524, + "learning_rate": 1.750855168897281e-05, + "loss": 1.5495, + "step": 42039 + }, + { + "epoch": 0.51, + "grad_norm": 47.65497606629082, + "learning_rate": 1.7508165845907844e-05, + "loss": 1.5055, + "step": 42042 + }, + { + "epoch": 0.51, + "grad_norm": 37.864509883148855, + "learning_rate": 1.750777997722021e-05, + "loss": 1.4637, + "step": 42045 + }, + { + "epoch": 0.51, + "grad_norm": 30.93331099433838, + "learning_rate": 1.750739408291123e-05, + "loss": 1.5041, + "step": 42048 + }, + { + "epoch": 0.51, + "grad_norm": 12.198952133558581, + "learning_rate": 1.7507008162982216e-05, + "loss": 1.8427, + "step": 42051 + }, + { + "epoch": 0.51, + "grad_norm": 22.9154900252719, + "learning_rate": 1.7506622217434485e-05, + "loss": 1.6696, + "step": 42054 + }, + { + "epoch": 0.51, + "grad_norm": 8.47982732019814, + "learning_rate": 1.750623624626936e-05, + "loss": 1.3932, + "step": 42057 + }, + { + "epoch": 0.51, + "grad_norm": 12.87119104410539, + "learning_rate": 1.750585024948815e-05, + "loss": 1.4936, + "step": 42060 + }, + { + "epoch": 0.51, + "grad_norm": 58.43672206282725, + "learning_rate": 1.750546422709218e-05, + "loss": 1.1995, + "step": 42063 + }, + { + "epoch": 0.51, + "grad_norm": 20.94397422716179, + "learning_rate": 1.7505078179082768e-05, + "loss": 1.2132, + "step": 42066 + }, + { + "epoch": 0.51, + "grad_norm": 3.0663104677079014, + "learning_rate": 1.750469210546122e-05, + "loss": 1.57, + "step": 42069 + }, + { + "epoch": 0.51, + "grad_norm": 7.936296852315099, + "learning_rate": 1.7504306006228864e-05, + "loss": 1.4184, + "step": 42072 + }, + { + "epoch": 0.51, + "grad_norm": 3.2732868702210616, + "learning_rate": 1.7503919881387017e-05, + "loss": 1.4093, + "step": 42075 + }, + { + "epoch": 0.51, + "grad_norm": 8.202762464060678, + "learning_rate": 1.750353373093699e-05, + "loss": 1.6745, + "step": 42078 + }, + { + "epoch": 0.51, + "grad_norm": 8.4980281749071, + "learning_rate": 1.750314755488011e-05, + "loss": 1.3865, + "step": 42081 + }, + { + "epoch": 0.51, + "grad_norm": 12.73381559475366, + "learning_rate": 1.7502761353217687e-05, + "loss": 1.1436, + "step": 42084 + }, + { + "epoch": 0.51, + "grad_norm": 21.67482120388126, + "learning_rate": 1.750237512595104e-05, + "loss": 1.4006, + "step": 42087 + }, + { + "epoch": 0.51, + "grad_norm": 11.329510055249674, + "learning_rate": 1.750198887308149e-05, + "loss": 1.2191, + "step": 42090 + }, + { + "epoch": 0.51, + "grad_norm": 6.031094387687521, + "learning_rate": 1.7501602594610357e-05, + "loss": 1.4257, + "step": 42093 + }, + { + "epoch": 0.51, + "grad_norm": 22.828796826808322, + "learning_rate": 1.7501216290538953e-05, + "loss": 1.6294, + "step": 42096 + }, + { + "epoch": 0.51, + "grad_norm": 9.602189932928413, + "learning_rate": 1.7500829960868605e-05, + "loss": 1.3516, + "step": 42099 + }, + { + "epoch": 0.51, + "grad_norm": 6.790964788767529, + "learning_rate": 1.7500443605600622e-05, + "loss": 1.5146, + "step": 42102 + }, + { + "epoch": 0.51, + "grad_norm": 7.324751310679154, + "learning_rate": 1.7500057224736328e-05, + "loss": 1.2555, + "step": 42105 + }, + { + "epoch": 0.51, + "grad_norm": 5.538980855000744, + "learning_rate": 1.749967081827704e-05, + "loss": 1.4568, + "step": 42108 + }, + { + "epoch": 0.51, + "grad_norm": 16.44138495356966, + "learning_rate": 1.7499284386224076e-05, + "loss": 1.4065, + "step": 42111 + }, + { + "epoch": 0.51, + "grad_norm": 8.1032430461902, + "learning_rate": 1.7498897928578755e-05, + "loss": 1.1489, + "step": 42114 + }, + { + "epoch": 0.51, + "grad_norm": 15.22026846246989, + "learning_rate": 1.7498511445342397e-05, + "loss": 1.6205, + "step": 42117 + }, + { + "epoch": 0.51, + "grad_norm": 10.127340917688166, + "learning_rate": 1.749812493651632e-05, + "loss": 1.7034, + "step": 42120 + }, + { + "epoch": 0.51, + "grad_norm": 4.6707636027171775, + "learning_rate": 1.7497738402101845e-05, + "loss": 1.4842, + "step": 42123 + }, + { + "epoch": 0.51, + "grad_norm": 5.5797461902331005, + "learning_rate": 1.749735184210029e-05, + "loss": 1.0197, + "step": 42126 + }, + { + "epoch": 0.51, + "grad_norm": 27.068365510890107, + "learning_rate": 1.7496965256512974e-05, + "loss": 1.2115, + "step": 42129 + }, + { + "epoch": 0.51, + "grad_norm": 21.459666188054666, + "learning_rate": 1.7496578645341214e-05, + "loss": 1.5535, + "step": 42132 + }, + { + "epoch": 0.51, + "grad_norm": 18.05961731381869, + "learning_rate": 1.749619200858633e-05, + "loss": 1.3538, + "step": 42135 + }, + { + "epoch": 0.51, + "grad_norm": 33.5425315393447, + "learning_rate": 1.7495805346249643e-05, + "loss": 1.4704, + "step": 42138 + }, + { + "epoch": 0.51, + "grad_norm": 35.11323135026105, + "learning_rate": 1.7495418658332476e-05, + "loss": 1.2816, + "step": 42141 + }, + { + "epoch": 0.51, + "grad_norm": 11.266937458572349, + "learning_rate": 1.7495031944836144e-05, + "loss": 1.4262, + "step": 42144 + }, + { + "epoch": 0.51, + "grad_norm": 10.006236027581988, + "learning_rate": 1.7494645205761965e-05, + "loss": 1.4903, + "step": 42147 + }, + { + "epoch": 0.51, + "grad_norm": 12.835262476840759, + "learning_rate": 1.7494258441111264e-05, + "loss": 1.3147, + "step": 42150 + }, + { + "epoch": 0.51, + "grad_norm": 90.08280991226007, + "learning_rate": 1.7493871650885358e-05, + "loss": 1.5068, + "step": 42153 + }, + { + "epoch": 0.51, + "grad_norm": 14.709826304361618, + "learning_rate": 1.7493484835085565e-05, + "loss": 1.5652, + "step": 42156 + }, + { + "epoch": 0.51, + "grad_norm": 10.90913349320519, + "learning_rate": 1.749309799371321e-05, + "loss": 1.7622, + "step": 42159 + }, + { + "epoch": 0.51, + "grad_norm": 16.145652234299497, + "learning_rate": 1.749271112676961e-05, + "loss": 1.6399, + "step": 42162 + }, + { + "epoch": 0.51, + "grad_norm": 8.690961807529838, + "learning_rate": 1.7492324234256087e-05, + "loss": 1.4312, + "step": 42165 + }, + { + "epoch": 0.51, + "grad_norm": 23.59269388392932, + "learning_rate": 1.7491937316173956e-05, + "loss": 1.4318, + "step": 42168 + }, + { + "epoch": 0.51, + "grad_norm": 19.405884656582533, + "learning_rate": 1.7491550372524545e-05, + "loss": 1.3662, + "step": 42171 + }, + { + "epoch": 0.51, + "grad_norm": 23.2134700822937, + "learning_rate": 1.749116340330917e-05, + "loss": 1.3394, + "step": 42174 + }, + { + "epoch": 0.51, + "grad_norm": 15.543045344687638, + "learning_rate": 1.749077640852915e-05, + "loss": 1.2622, + "step": 42177 + }, + { + "epoch": 0.51, + "grad_norm": 54.52903280577211, + "learning_rate": 1.7490389388185812e-05, + "loss": 0.9661, + "step": 42180 + }, + { + "epoch": 0.51, + "grad_norm": 8.577787507921803, + "learning_rate": 1.7490002342280473e-05, + "loss": 1.7271, + "step": 42183 + }, + { + "epoch": 0.51, + "grad_norm": 2.924026197708824, + "learning_rate": 1.7489615270814452e-05, + "loss": 1.4808, + "step": 42186 + }, + { + "epoch": 0.51, + "grad_norm": 5.890300389928242, + "learning_rate": 1.7489228173789075e-05, + "loss": 1.1376, + "step": 42189 + }, + { + "epoch": 0.51, + "grad_norm": 75.07891796623814, + "learning_rate": 1.7488841051205655e-05, + "loss": 1.6056, + "step": 42192 + }, + { + "epoch": 0.51, + "grad_norm": 17.948497281728564, + "learning_rate": 1.7488453903065524e-05, + "loss": 1.2241, + "step": 42195 + }, + { + "epoch": 0.51, + "grad_norm": 3.8855932519096408, + "learning_rate": 1.748806672936999e-05, + "loss": 1.3742, + "step": 42198 + }, + { + "epoch": 0.51, + "grad_norm": 14.442653359995528, + "learning_rate": 1.7487679530120386e-05, + "loss": 1.0501, + "step": 42201 + }, + { + "epoch": 0.51, + "grad_norm": 43.46840832350233, + "learning_rate": 1.748729230531803e-05, + "loss": 1.333, + "step": 42204 + }, + { + "epoch": 0.51, + "grad_norm": 4.325048880583525, + "learning_rate": 1.748690505496424e-05, + "loss": 1.2175, + "step": 42207 + }, + { + "epoch": 0.51, + "grad_norm": 13.559638473500112, + "learning_rate": 1.748651777906034e-05, + "loss": 1.4048, + "step": 42210 + }, + { + "epoch": 0.51, + "grad_norm": 15.122027047817117, + "learning_rate": 1.748613047760765e-05, + "loss": 1.9234, + "step": 42213 + }, + { + "epoch": 0.51, + "grad_norm": 21.956708931640502, + "learning_rate": 1.7485743150607498e-05, + "loss": 1.8057, + "step": 42216 + }, + { + "epoch": 0.51, + "grad_norm": 12.858013044585954, + "learning_rate": 1.7485355798061196e-05, + "loss": 1.2609, + "step": 42219 + }, + { + "epoch": 0.51, + "grad_norm": 11.834865205897968, + "learning_rate": 1.748496841997007e-05, + "loss": 1.5275, + "step": 42222 + }, + { + "epoch": 0.51, + "grad_norm": 23.61826161163136, + "learning_rate": 1.7484581016335447e-05, + "loss": 1.3902, + "step": 42225 + }, + { + "epoch": 0.51, + "grad_norm": 14.259996612443253, + "learning_rate": 1.7484193587158643e-05, + "loss": 1.2677, + "step": 42228 + }, + { + "epoch": 0.51, + "grad_norm": 7.080559445396977, + "learning_rate": 1.7483806132440984e-05, + "loss": 1.3847, + "step": 42231 + }, + { + "epoch": 0.51, + "grad_norm": 13.320539821140551, + "learning_rate": 1.7483418652183787e-05, + "loss": 1.2743, + "step": 42234 + }, + { + "epoch": 0.51, + "grad_norm": 11.635738225156338, + "learning_rate": 1.748303114638838e-05, + "loss": 1.3709, + "step": 42237 + }, + { + "epoch": 0.51, + "grad_norm": 6.419846238130353, + "learning_rate": 1.7482643615056083e-05, + "loss": 1.4416, + "step": 42240 + }, + { + "epoch": 0.51, + "grad_norm": 3.909474902762867, + "learning_rate": 1.748225605818822e-05, + "loss": 1.3706, + "step": 42243 + }, + { + "epoch": 0.51, + "grad_norm": 8.966805257266264, + "learning_rate": 1.7481868475786106e-05, + "loss": 1.2913, + "step": 42246 + }, + { + "epoch": 0.51, + "grad_norm": 4.802492551584519, + "learning_rate": 1.7481480867851076e-05, + "loss": 1.2711, + "step": 42249 + }, + { + "epoch": 0.51, + "grad_norm": 14.72301640630795, + "learning_rate": 1.7481093234384444e-05, + "loss": 1.3577, + "step": 42252 + }, + { + "epoch": 0.51, + "grad_norm": 9.33894535116296, + "learning_rate": 1.7480705575387537e-05, + "loss": 1.3613, + "step": 42255 + }, + { + "epoch": 0.51, + "grad_norm": 8.816027199734158, + "learning_rate": 1.7480317890861677e-05, + "loss": 1.7467, + "step": 42258 + }, + { + "epoch": 0.51, + "grad_norm": 22.11256245316447, + "learning_rate": 1.7479930180808185e-05, + "loss": 1.6096, + "step": 42261 + }, + { + "epoch": 0.51, + "grad_norm": 14.583945125879907, + "learning_rate": 1.7479542445228387e-05, + "loss": 1.888, + "step": 42264 + }, + { + "epoch": 0.51, + "grad_norm": 22.18448418900882, + "learning_rate": 1.74791546841236e-05, + "loss": 1.769, + "step": 42267 + }, + { + "epoch": 0.51, + "grad_norm": 16.13392517810435, + "learning_rate": 1.7478766897495155e-05, + "loss": 1.5334, + "step": 42270 + }, + { + "epoch": 0.51, + "grad_norm": 13.806772088374778, + "learning_rate": 1.747837908534437e-05, + "loss": 1.8278, + "step": 42273 + }, + { + "epoch": 0.51, + "grad_norm": 22.44816991596063, + "learning_rate": 1.7477991247672576e-05, + "loss": 1.3827, + "step": 42276 + }, + { + "epoch": 0.51, + "grad_norm": 18.575394013280555, + "learning_rate": 1.747760338448109e-05, + "loss": 1.6838, + "step": 42279 + }, + { + "epoch": 0.51, + "grad_norm": 11.471628589208999, + "learning_rate": 1.7477215495771237e-05, + "loss": 1.2196, + "step": 42282 + }, + { + "epoch": 0.51, + "grad_norm": 6.141698929969253, + "learning_rate": 1.747682758154434e-05, + "loss": 1.5955, + "step": 42285 + }, + { + "epoch": 0.51, + "grad_norm": 14.736279884690935, + "learning_rate": 1.7476439641801724e-05, + "loss": 1.4931, + "step": 42288 + }, + { + "epoch": 0.51, + "grad_norm": 24.248710454001323, + "learning_rate": 1.747605167654471e-05, + "loss": 1.5438, + "step": 42291 + }, + { + "epoch": 0.51, + "grad_norm": 12.64388841845307, + "learning_rate": 1.747566368577463e-05, + "loss": 1.8011, + "step": 42294 + }, + { + "epoch": 0.51, + "grad_norm": 17.296894367286367, + "learning_rate": 1.74752756694928e-05, + "loss": 1.4875, + "step": 42297 + }, + { + "epoch": 0.51, + "grad_norm": 4.955167928000686, + "learning_rate": 1.747488762770054e-05, + "loss": 1.2058, + "step": 42300 + }, + { + "epoch": 0.51, + "grad_norm": 2.9401219699436405, + "learning_rate": 1.747449956039919e-05, + "loss": 1.3248, + "step": 42303 + }, + { + "epoch": 0.51, + "grad_norm": 5.063950213101323, + "learning_rate": 1.747411146759006e-05, + "loss": 1.4107, + "step": 42306 + }, + { + "epoch": 0.51, + "grad_norm": 17.537327563230185, + "learning_rate": 1.7473723349274484e-05, + "loss": 1.5167, + "step": 42309 + }, + { + "epoch": 0.51, + "grad_norm": 5.141880157280005, + "learning_rate": 1.7473335205453777e-05, + "loss": 1.2977, + "step": 42312 + }, + { + "epoch": 0.51, + "grad_norm": 42.47479186619449, + "learning_rate": 1.7472947036129274e-05, + "loss": 1.4856, + "step": 42315 + }, + { + "epoch": 0.51, + "grad_norm": 53.95466630708732, + "learning_rate": 1.7472558841302293e-05, + "loss": 1.333, + "step": 42318 + }, + { + "epoch": 0.51, + "grad_norm": 18.00618039618523, + "learning_rate": 1.7472170620974158e-05, + "loss": 1.6634, + "step": 42321 + }, + { + "epoch": 0.51, + "grad_norm": 14.960483811325238, + "learning_rate": 1.74717823751462e-05, + "loss": 1.1622, + "step": 42324 + }, + { + "epoch": 0.51, + "grad_norm": 6.281790341003564, + "learning_rate": 1.7471394103819738e-05, + "loss": 1.4092, + "step": 42327 + }, + { + "epoch": 0.51, + "grad_norm": 23.15610860245129, + "learning_rate": 1.7471005806996098e-05, + "loss": 1.3432, + "step": 42330 + }, + { + "epoch": 0.51, + "grad_norm": 16.095439448063022, + "learning_rate": 1.7470617484676608e-05, + "loss": 1.5791, + "step": 42333 + }, + { + "epoch": 0.51, + "grad_norm": 11.675296094382068, + "learning_rate": 1.747022913686259e-05, + "loss": 1.5593, + "step": 42336 + }, + { + "epoch": 0.51, + "grad_norm": 41.63613129125728, + "learning_rate": 1.746984076355537e-05, + "loss": 1.6913, + "step": 42339 + }, + { + "epoch": 0.51, + "grad_norm": 4.1827354647573465, + "learning_rate": 1.7469452364756275e-05, + "loss": 1.7477, + "step": 42342 + }, + { + "epoch": 0.51, + "grad_norm": 10.883791523062913, + "learning_rate": 1.746906394046663e-05, + "loss": 1.2708, + "step": 42345 + }, + { + "epoch": 0.51, + "grad_norm": 6.447419576160049, + "learning_rate": 1.746867549068776e-05, + "loss": 1.3335, + "step": 42348 + }, + { + "epoch": 0.51, + "grad_norm": 22.341186177357542, + "learning_rate": 1.746828701542099e-05, + "loss": 1.7343, + "step": 42351 + }, + { + "epoch": 0.51, + "grad_norm": 8.023842542047463, + "learning_rate": 1.7467898514667648e-05, + "loss": 1.3743, + "step": 42354 + }, + { + "epoch": 0.51, + "grad_norm": 7.62239183548496, + "learning_rate": 1.7467509988429058e-05, + "loss": 1.1904, + "step": 42357 + }, + { + "epoch": 0.51, + "grad_norm": 15.542517935356862, + "learning_rate": 1.7467121436706547e-05, + "loss": 1.3367, + "step": 42360 + }, + { + "epoch": 0.51, + "grad_norm": 2.597379450342112, + "learning_rate": 1.746673285950144e-05, + "loss": 1.6093, + "step": 42363 + }, + { + "epoch": 0.51, + "grad_norm": 6.2336715510405885, + "learning_rate": 1.7466344256815063e-05, + "loss": 1.198, + "step": 42366 + }, + { + "epoch": 0.51, + "grad_norm": 9.801371975420615, + "learning_rate": 1.746595562864874e-05, + "loss": 1.3391, + "step": 42369 + }, + { + "epoch": 0.51, + "grad_norm": 13.587610348455001, + "learning_rate": 1.74655669750038e-05, + "loss": 1.642, + "step": 42372 + }, + { + "epoch": 0.51, + "grad_norm": 10.400553487443014, + "learning_rate": 1.746517829588157e-05, + "loss": 1.093, + "step": 42375 + }, + { + "epoch": 0.51, + "grad_norm": 6.074349820087344, + "learning_rate": 1.746478959128338e-05, + "loss": 1.3923, + "step": 42378 + }, + { + "epoch": 0.51, + "grad_norm": 9.429731025365731, + "learning_rate": 1.7464400861210546e-05, + "loss": 1.2796, + "step": 42381 + }, + { + "epoch": 0.51, + "grad_norm": 19.10157690794852, + "learning_rate": 1.74640121056644e-05, + "loss": 1.2573, + "step": 42384 + }, + { + "epoch": 0.51, + "grad_norm": 14.831190879175674, + "learning_rate": 1.7463623324646275e-05, + "loss": 1.2367, + "step": 42387 + }, + { + "epoch": 0.51, + "grad_norm": 12.120666649903487, + "learning_rate": 1.746323451815749e-05, + "loss": 1.4817, + "step": 42390 + }, + { + "epoch": 0.51, + "grad_norm": 12.08436558609742, + "learning_rate": 1.746284568619937e-05, + "loss": 1.4212, + "step": 42393 + }, + { + "epoch": 0.51, + "grad_norm": 7.723356364999609, + "learning_rate": 1.7462456828773245e-05, + "loss": 1.2034, + "step": 42396 + }, + { + "epoch": 0.51, + "grad_norm": 16.740121430314435, + "learning_rate": 1.746206794588045e-05, + "loss": 1.5319, + "step": 42399 + }, + { + "epoch": 0.51, + "grad_norm": 43.43769757532245, + "learning_rate": 1.7461679037522298e-05, + "loss": 1.8664, + "step": 42402 + }, + { + "epoch": 0.51, + "grad_norm": 10.19598324834522, + "learning_rate": 1.7461290103700127e-05, + "loss": 1.4018, + "step": 42405 + }, + { + "epoch": 0.51, + "grad_norm": 11.580206671822504, + "learning_rate": 1.746090114441526e-05, + "loss": 1.2178, + "step": 42408 + }, + { + "epoch": 0.51, + "grad_norm": 10.578139727554616, + "learning_rate": 1.7460512159669024e-05, + "loss": 0.8734, + "step": 42411 + }, + { + "epoch": 0.51, + "grad_norm": 14.405523228310562, + "learning_rate": 1.7460123149462747e-05, + "loss": 1.4525, + "step": 42414 + }, + { + "epoch": 0.51, + "grad_norm": 10.35281103789195, + "learning_rate": 1.7459734113797756e-05, + "loss": 1.2527, + "step": 42417 + }, + { + "epoch": 0.51, + "grad_norm": 8.013031517342947, + "learning_rate": 1.7459345052675384e-05, + "loss": 1.6385, + "step": 42420 + }, + { + "epoch": 0.51, + "grad_norm": 17.67040809551145, + "learning_rate": 1.7458955966096947e-05, + "loss": 1.9394, + "step": 42423 + }, + { + "epoch": 0.51, + "grad_norm": 44.0584219644378, + "learning_rate": 1.7458566854063784e-05, + "loss": 1.5918, + "step": 42426 + }, + { + "epoch": 0.51, + "grad_norm": 47.057178034757335, + "learning_rate": 1.745817771657722e-05, + "loss": 1.0607, + "step": 42429 + }, + { + "epoch": 0.51, + "grad_norm": 9.723400992064908, + "learning_rate": 1.745778855363858e-05, + "loss": 1.3841, + "step": 42432 + }, + { + "epoch": 0.51, + "grad_norm": 9.397009973983147, + "learning_rate": 1.7457399365249196e-05, + "loss": 1.335, + "step": 42435 + }, + { + "epoch": 0.51, + "grad_norm": 14.421507288180857, + "learning_rate": 1.745701015141039e-05, + "loss": 1.5253, + "step": 42438 + }, + { + "epoch": 0.51, + "grad_norm": 24.42395068997561, + "learning_rate": 1.74566209121235e-05, + "loss": 1.6581, + "step": 42441 + }, + { + "epoch": 0.51, + "grad_norm": 12.060273181773692, + "learning_rate": 1.7456231647389845e-05, + "loss": 1.4103, + "step": 42444 + }, + { + "epoch": 0.51, + "grad_norm": 88.05344035797133, + "learning_rate": 1.7455842357210756e-05, + "loss": 1.4331, + "step": 42447 + }, + { + "epoch": 0.51, + "grad_norm": 59.52964941917819, + "learning_rate": 1.7455453041587563e-05, + "loss": 1.4269, + "step": 42450 + }, + { + "epoch": 0.51, + "grad_norm": 15.465546378849337, + "learning_rate": 1.7455063700521598e-05, + "loss": 1.6931, + "step": 42453 + }, + { + "epoch": 0.51, + "grad_norm": 22.142304194794463, + "learning_rate": 1.7454674334014185e-05, + "loss": 1.5625, + "step": 42456 + }, + { + "epoch": 0.51, + "grad_norm": 10.771812213042082, + "learning_rate": 1.7454284942066652e-05, + "loss": 1.5394, + "step": 42459 + }, + { + "epoch": 0.51, + "grad_norm": 18.98723422709541, + "learning_rate": 1.7453895524680328e-05, + "loss": 1.0986, + "step": 42462 + }, + { + "epoch": 0.51, + "grad_norm": 237.68442225867085, + "learning_rate": 1.7453506081856547e-05, + "loss": 1.1374, + "step": 42465 + }, + { + "epoch": 0.51, + "grad_norm": 10.487425604637256, + "learning_rate": 1.745311661359663e-05, + "loss": 1.6721, + "step": 42468 + }, + { + "epoch": 0.51, + "grad_norm": 13.673174003105004, + "learning_rate": 1.7452727119901912e-05, + "loss": 1.8591, + "step": 42471 + }, + { + "epoch": 0.51, + "grad_norm": 9.105427287633685, + "learning_rate": 1.7452337600773727e-05, + "loss": 1.594, + "step": 42474 + }, + { + "epoch": 0.51, + "grad_norm": 4.28335782702902, + "learning_rate": 1.7451948056213392e-05, + "loss": 1.4923, + "step": 42477 + }, + { + "epoch": 0.51, + "grad_norm": 5.19612343892665, + "learning_rate": 1.745155848622224e-05, + "loss": 1.0776, + "step": 42480 + }, + { + "epoch": 0.51, + "grad_norm": 10.542009331905021, + "learning_rate": 1.7451168890801612e-05, + "loss": 1.3243, + "step": 42483 + }, + { + "epoch": 0.51, + "grad_norm": 22.04328853685205, + "learning_rate": 1.745077926995282e-05, + "loss": 1.3764, + "step": 42486 + }, + { + "epoch": 0.51, + "grad_norm": 11.317914491120879, + "learning_rate": 1.7450389623677205e-05, + "loss": 1.5251, + "step": 42489 + }, + { + "epoch": 0.51, + "grad_norm": 32.28808863156729, + "learning_rate": 1.7449999951976092e-05, + "loss": 1.4825, + "step": 42492 + }, + { + "epoch": 0.51, + "grad_norm": 21.40376803149706, + "learning_rate": 1.7449610254850818e-05, + "loss": 1.3166, + "step": 42495 + }, + { + "epoch": 0.51, + "grad_norm": 8.67015007046426, + "learning_rate": 1.7449220532302704e-05, + "loss": 1.3071, + "step": 42498 + }, + { + "epoch": 0.51, + "grad_norm": 5.393479186153651, + "learning_rate": 1.7448830784333082e-05, + "loss": 1.7392, + "step": 42501 + }, + { + "epoch": 0.51, + "grad_norm": 17.27791178333855, + "learning_rate": 1.7448441010943286e-05, + "loss": 1.4319, + "step": 42504 + }, + { + "epoch": 0.51, + "grad_norm": 15.463354517537505, + "learning_rate": 1.7448051212134644e-05, + "loss": 1.1791, + "step": 42507 + }, + { + "epoch": 0.51, + "grad_norm": 20.7471469260327, + "learning_rate": 1.7447661387908485e-05, + "loss": 1.381, + "step": 42510 + }, + { + "epoch": 0.51, + "grad_norm": 28.172504616250443, + "learning_rate": 1.744727153826614e-05, + "loss": 1.087, + "step": 42513 + }, + { + "epoch": 0.51, + "grad_norm": 6.117341576578472, + "learning_rate": 1.7446881663208937e-05, + "loss": 1.3784, + "step": 42516 + }, + { + "epoch": 0.51, + "grad_norm": 25.171826285743013, + "learning_rate": 1.7446491762738212e-05, + "loss": 1.5643, + "step": 42519 + }, + { + "epoch": 0.51, + "grad_norm": 8.51457292016124, + "learning_rate": 1.7446101836855292e-05, + "loss": 1.3905, + "step": 42522 + }, + { + "epoch": 0.51, + "grad_norm": 7.23653024139094, + "learning_rate": 1.7445711885561513e-05, + "loss": 1.558, + "step": 42525 + }, + { + "epoch": 0.51, + "grad_norm": 22.354158272329943, + "learning_rate": 1.74453219088582e-05, + "loss": 1.7147, + "step": 42528 + }, + { + "epoch": 0.51, + "grad_norm": 21.50269211033004, + "learning_rate": 1.744493190674668e-05, + "loss": 1.442, + "step": 42531 + }, + { + "epoch": 0.51, + "grad_norm": 50.22367871842462, + "learning_rate": 1.744454187922829e-05, + "loss": 1.2558, + "step": 42534 + }, + { + "epoch": 0.51, + "grad_norm": 17.070308613552193, + "learning_rate": 1.7444151826304365e-05, + "loss": 1.3651, + "step": 42537 + }, + { + "epoch": 0.51, + "grad_norm": 7.869111800236977, + "learning_rate": 1.7443761747976226e-05, + "loss": 1.5127, + "step": 42540 + }, + { + "epoch": 0.51, + "grad_norm": 3.382319453410321, + "learning_rate": 1.7443371644245217e-05, + "loss": 1.7111, + "step": 42543 + }, + { + "epoch": 0.51, + "grad_norm": 34.194977100167485, + "learning_rate": 1.7442981515112655e-05, + "loss": 1.6159, + "step": 42546 + }, + { + "epoch": 0.51, + "grad_norm": 23.686467597138613, + "learning_rate": 1.7442591360579876e-05, + "loss": 1.6034, + "step": 42549 + }, + { + "epoch": 0.51, + "grad_norm": 4.259262679299554, + "learning_rate": 1.744220118064822e-05, + "loss": 1.7248, + "step": 42552 + }, + { + "epoch": 0.51, + "grad_norm": 12.067472499250743, + "learning_rate": 1.7441810975319008e-05, + "loss": 1.1576, + "step": 42555 + }, + { + "epoch": 0.51, + "grad_norm": 8.259734107881595, + "learning_rate": 1.7441420744593576e-05, + "loss": 1.4971, + "step": 42558 + }, + { + "epoch": 0.51, + "grad_norm": 30.37902820388017, + "learning_rate": 1.7441030488473255e-05, + "loss": 1.6245, + "step": 42561 + }, + { + "epoch": 0.51, + "grad_norm": 6.14498810469786, + "learning_rate": 1.7440640206959378e-05, + "loss": 1.2366, + "step": 42564 + }, + { + "epoch": 0.51, + "grad_norm": 13.810599139999752, + "learning_rate": 1.7440249900053276e-05, + "loss": 1.3328, + "step": 42567 + }, + { + "epoch": 0.51, + "grad_norm": 14.050409657892764, + "learning_rate": 1.7439859567756284e-05, + "loss": 1.5468, + "step": 42570 + }, + { + "epoch": 0.51, + "grad_norm": 71.04436962421461, + "learning_rate": 1.7439469210069727e-05, + "loss": 1.6703, + "step": 42573 + }, + { + "epoch": 0.51, + "grad_norm": 6.229858040811515, + "learning_rate": 1.7439078826994943e-05, + "loss": 1.3262, + "step": 42576 + }, + { + "epoch": 0.51, + "grad_norm": 16.804777811636633, + "learning_rate": 1.7438688418533264e-05, + "loss": 1.5066, + "step": 42579 + }, + { + "epoch": 0.51, + "grad_norm": 9.752183292009274, + "learning_rate": 1.7438297984686017e-05, + "loss": 1.6951, + "step": 42582 + }, + { + "epoch": 0.51, + "grad_norm": 18.153112148657335, + "learning_rate": 1.743790752545454e-05, + "loss": 1.3486, + "step": 42585 + }, + { + "epoch": 0.51, + "grad_norm": 65.68862893933681, + "learning_rate": 1.7437517040840165e-05, + "loss": 1.431, + "step": 42588 + }, + { + "epoch": 0.51, + "grad_norm": 26.65921610428217, + "learning_rate": 1.7437126530844222e-05, + "loss": 1.5765, + "step": 42591 + }, + { + "epoch": 0.51, + "grad_norm": 19.91050758802038, + "learning_rate": 1.7436735995468046e-05, + "loss": 1.4365, + "step": 42594 + }, + { + "epoch": 0.51, + "grad_norm": 10.035422081888598, + "learning_rate": 1.7436345434712966e-05, + "loss": 1.4954, + "step": 42597 + }, + { + "epoch": 0.51, + "grad_norm": 11.090242912812666, + "learning_rate": 1.7435954848580322e-05, + "loss": 0.8871, + "step": 42600 + }, + { + "epoch": 0.51, + "grad_norm": 8.119956642779705, + "learning_rate": 1.7435564237071438e-05, + "loss": 1.6232, + "step": 42603 + }, + { + "epoch": 0.51, + "grad_norm": 22.05443782613101, + "learning_rate": 1.7435173600187653e-05, + "loss": 1.6182, + "step": 42606 + }, + { + "epoch": 0.51, + "grad_norm": 16.662123294481283, + "learning_rate": 1.74347829379303e-05, + "loss": 1.5608, + "step": 42609 + }, + { + "epoch": 0.51, + "grad_norm": 34.72518263063352, + "learning_rate": 1.743439225030071e-05, + "loss": 1.2705, + "step": 42612 + }, + { + "epoch": 0.51, + "grad_norm": 11.491494511832077, + "learning_rate": 1.7434001537300218e-05, + "loss": 1.2886, + "step": 42615 + }, + { + "epoch": 0.51, + "grad_norm": 23.415685502649772, + "learning_rate": 1.7433610798930156e-05, + "loss": 1.4844, + "step": 42618 + }, + { + "epoch": 0.51, + "grad_norm": 16.23197215809771, + "learning_rate": 1.7433220035191857e-05, + "loss": 1.3254, + "step": 42621 + }, + { + "epoch": 0.51, + "grad_norm": 5.504506913996362, + "learning_rate": 1.7432829246086655e-05, + "loss": 1.4687, + "step": 42624 + }, + { + "epoch": 0.51, + "grad_norm": 10.841789617143446, + "learning_rate": 1.7432438431615887e-05, + "loss": 1.5873, + "step": 42627 + }, + { + "epoch": 0.51, + "grad_norm": 15.616910301862875, + "learning_rate": 1.743204759178088e-05, + "loss": 1.6719, + "step": 42630 + }, + { + "epoch": 0.51, + "grad_norm": 21.792233536855893, + "learning_rate": 1.7431656726582976e-05, + "loss": 1.549, + "step": 42633 + }, + { + "epoch": 0.51, + "grad_norm": 30.297119691613368, + "learning_rate": 1.7431265836023502e-05, + "loss": 1.0323, + "step": 42636 + }, + { + "epoch": 0.51, + "grad_norm": 28.139085482502544, + "learning_rate": 1.7430874920103794e-05, + "loss": 1.5411, + "step": 42639 + }, + { + "epoch": 0.51, + "grad_norm": 7.296592108136483, + "learning_rate": 1.7430483978825184e-05, + "loss": 0.9612, + "step": 42642 + }, + { + "epoch": 0.51, + "grad_norm": 38.21157218184294, + "learning_rate": 1.7430093012189013e-05, + "loss": 1.6679, + "step": 42645 + }, + { + "epoch": 0.51, + "grad_norm": 18.429846817888656, + "learning_rate": 1.7429702020196612e-05, + "loss": 1.5206, + "step": 42648 + }, + { + "epoch": 0.51, + "grad_norm": 43.69848260890555, + "learning_rate": 1.7429311002849313e-05, + "loss": 1.5073, + "step": 42651 + }, + { + "epoch": 0.51, + "grad_norm": 15.82863287272995, + "learning_rate": 1.7428919960148448e-05, + "loss": 1.4832, + "step": 42654 + }, + { + "epoch": 0.51, + "grad_norm": 16.91286608448746, + "learning_rate": 1.7428528892095355e-05, + "loss": 1.5548, + "step": 42657 + }, + { + "epoch": 0.51, + "grad_norm": 8.101700905967443, + "learning_rate": 1.7428137798691372e-05, + "loss": 1.479, + "step": 42660 + }, + { + "epoch": 0.51, + "grad_norm": 4.965535558101293, + "learning_rate": 1.7427746679937828e-05, + "loss": 1.4722, + "step": 42663 + }, + { + "epoch": 0.51, + "grad_norm": 18.801900930173648, + "learning_rate": 1.7427355535836063e-05, + "loss": 1.5578, + "step": 42666 + }, + { + "epoch": 0.51, + "grad_norm": 25.432977697210447, + "learning_rate": 1.7426964366387407e-05, + "loss": 1.4383, + "step": 42669 + }, + { + "epoch": 0.51, + "grad_norm": 6.0450966910516435, + "learning_rate": 1.7426573171593198e-05, + "loss": 1.0002, + "step": 42672 + }, + { + "epoch": 0.51, + "grad_norm": 11.562741717704418, + "learning_rate": 1.7426181951454766e-05, + "loss": 1.2851, + "step": 42675 + }, + { + "epoch": 0.51, + "grad_norm": 6.768337209892824, + "learning_rate": 1.7425790705973455e-05, + "loss": 1.397, + "step": 42678 + }, + { + "epoch": 0.51, + "grad_norm": 10.961055861057229, + "learning_rate": 1.742539943515059e-05, + "loss": 1.2843, + "step": 42681 + }, + { + "epoch": 0.51, + "grad_norm": 16.38423712868652, + "learning_rate": 1.7425008138987517e-05, + "loss": 1.6119, + "step": 42684 + }, + { + "epoch": 0.51, + "grad_norm": 13.097383621341423, + "learning_rate": 1.742461681748556e-05, + "loss": 1.0844, + "step": 42687 + }, + { + "epoch": 0.51, + "grad_norm": 12.847651399139878, + "learning_rate": 1.7424225470646064e-05, + "loss": 1.6847, + "step": 42690 + }, + { + "epoch": 0.51, + "grad_norm": 61.14812744510784, + "learning_rate": 1.742383409847036e-05, + "loss": 1.5079, + "step": 42693 + }, + { + "epoch": 0.51, + "grad_norm": 15.081583439029252, + "learning_rate": 1.7423442700959783e-05, + "loss": 1.3728, + "step": 42696 + }, + { + "epoch": 0.51, + "grad_norm": 56.2003599774624, + "learning_rate": 1.7423051278115674e-05, + "loss": 1.3868, + "step": 42699 + }, + { + "epoch": 0.51, + "grad_norm": 97.1906396629089, + "learning_rate": 1.7422659829939358e-05, + "loss": 1.3173, + "step": 42702 + }, + { + "epoch": 0.51, + "grad_norm": 9.980561180566593, + "learning_rate": 1.7422268356432182e-05, + "loss": 1.495, + "step": 42705 + }, + { + "epoch": 0.51, + "grad_norm": 3.7172989012416418, + "learning_rate": 1.7421876857595476e-05, + "loss": 1.2507, + "step": 42708 + }, + { + "epoch": 0.51, + "grad_norm": 86.76892923063052, + "learning_rate": 1.7421485333430577e-05, + "loss": 1.3389, + "step": 42711 + }, + { + "epoch": 0.51, + "grad_norm": 3.1402860522921783, + "learning_rate": 1.7421093783938824e-05, + "loss": 1.6372, + "step": 42714 + }, + { + "epoch": 0.51, + "grad_norm": 18.130538478037423, + "learning_rate": 1.742070220912155e-05, + "loss": 1.4653, + "step": 42717 + }, + { + "epoch": 0.51, + "grad_norm": 10.909335180595912, + "learning_rate": 1.7420310608980094e-05, + "loss": 1.6201, + "step": 42720 + }, + { + "epoch": 0.51, + "grad_norm": 9.45669436232044, + "learning_rate": 1.741991898351579e-05, + "loss": 1.1948, + "step": 42723 + }, + { + "epoch": 0.51, + "grad_norm": 3.3250100840110184, + "learning_rate": 1.7419527332729973e-05, + "loss": 1.7166, + "step": 42726 + }, + { + "epoch": 0.51, + "grad_norm": 4.180902068941508, + "learning_rate": 1.7419135656623982e-05, + "loss": 1.1676, + "step": 42729 + }, + { + "epoch": 0.51, + "grad_norm": 6.667035901434534, + "learning_rate": 1.7418743955199157e-05, + "loss": 1.6208, + "step": 42732 + }, + { + "epoch": 0.51, + "grad_norm": 3.554333513032219, + "learning_rate": 1.741835222845683e-05, + "loss": 1.4614, + "step": 42735 + }, + { + "epoch": 0.51, + "grad_norm": 40.955841905145796, + "learning_rate": 1.7417960476398335e-05, + "loss": 1.209, + "step": 42738 + }, + { + "epoch": 0.51, + "grad_norm": 6.117077038103998, + "learning_rate": 1.7417568699025012e-05, + "loss": 1.6789, + "step": 42741 + }, + { + "epoch": 0.51, + "grad_norm": 17.783658363401862, + "learning_rate": 1.7417176896338204e-05, + "loss": 1.6287, + "step": 42744 + }, + { + "epoch": 0.51, + "grad_norm": 65.72989757458548, + "learning_rate": 1.7416785068339243e-05, + "loss": 1.5184, + "step": 42747 + }, + { + "epoch": 0.51, + "grad_norm": 18.4890805349596, + "learning_rate": 1.7416393215029463e-05, + "loss": 1.5807, + "step": 42750 + }, + { + "epoch": 0.51, + "grad_norm": 8.427364511103649, + "learning_rate": 1.7416001336410204e-05, + "loss": 1.5015, + "step": 42753 + }, + { + "epoch": 0.51, + "grad_norm": 6.049513821877612, + "learning_rate": 1.7415609432482806e-05, + "loss": 1.5225, + "step": 42756 + }, + { + "epoch": 0.51, + "grad_norm": 5.064419450996822, + "learning_rate": 1.7415217503248602e-05, + "loss": 1.5665, + "step": 42759 + }, + { + "epoch": 0.51, + "grad_norm": 18.167171939848355, + "learning_rate": 1.7414825548708932e-05, + "loss": 0.9366, + "step": 42762 + }, + { + "epoch": 0.51, + "grad_norm": 11.342406447884038, + "learning_rate": 1.7414433568865136e-05, + "loss": 1.2838, + "step": 42765 + }, + { + "epoch": 0.51, + "grad_norm": 10.280475403097242, + "learning_rate": 1.7414041563718545e-05, + "loss": 1.4544, + "step": 42768 + }, + { + "epoch": 0.51, + "grad_norm": 3.0265329862563064, + "learning_rate": 1.7413649533270503e-05, + "loss": 1.5565, + "step": 42771 + }, + { + "epoch": 0.51, + "grad_norm": 10.299153504132976, + "learning_rate": 1.7413257477522346e-05, + "loss": 1.5151, + "step": 42774 + }, + { + "epoch": 0.51, + "grad_norm": 8.957332876097297, + "learning_rate": 1.7412865396475413e-05, + "loss": 1.6086, + "step": 42777 + }, + { + "epoch": 0.51, + "grad_norm": 2.882276817824667, + "learning_rate": 1.7412473290131037e-05, + "loss": 1.4635, + "step": 42780 + }, + { + "epoch": 0.51, + "grad_norm": 14.612711892230289, + "learning_rate": 1.7412081158490558e-05, + "loss": 1.2927, + "step": 42783 + }, + { + "epoch": 0.51, + "grad_norm": 7.896758456841281, + "learning_rate": 1.741168900155532e-05, + "loss": 1.1895, + "step": 42786 + }, + { + "epoch": 0.51, + "grad_norm": 28.067885976591604, + "learning_rate": 1.7411296819326655e-05, + "loss": 1.2486, + "step": 42789 + }, + { + "epoch": 0.51, + "grad_norm": 9.650862409700458, + "learning_rate": 1.7410904611805905e-05, + "loss": 1.4757, + "step": 42792 + }, + { + "epoch": 0.51, + "grad_norm": 17.542559699917746, + "learning_rate": 1.7410512378994404e-05, + "loss": 1.684, + "step": 42795 + }, + { + "epoch": 0.51, + "grad_norm": 9.428807038311914, + "learning_rate": 1.74101201208935e-05, + "loss": 1.5982, + "step": 42798 + }, + { + "epoch": 0.51, + "grad_norm": 43.42111831323232, + "learning_rate": 1.7409727837504515e-05, + "loss": 1.4022, + "step": 42801 + }, + { + "epoch": 0.51, + "grad_norm": 8.964558842472394, + "learning_rate": 1.7409335528828807e-05, + "loss": 1.7911, + "step": 42804 + }, + { + "epoch": 0.51, + "grad_norm": 51.040342298709625, + "learning_rate": 1.74089431948677e-05, + "loss": 1.37, + "step": 42807 + }, + { + "epoch": 0.51, + "grad_norm": 14.355972231319356, + "learning_rate": 1.740855083562254e-05, + "loss": 1.1872, + "step": 42810 + }, + { + "epoch": 0.51, + "grad_norm": 9.874608569466583, + "learning_rate": 1.7408158451094666e-05, + "loss": 1.307, + "step": 42813 + }, + { + "epoch": 0.51, + "grad_norm": 15.502982091336724, + "learning_rate": 1.7407766041285416e-05, + "loss": 0.9831, + "step": 42816 + }, + { + "epoch": 0.51, + "grad_norm": 21.22776938965645, + "learning_rate": 1.740737360619613e-05, + "loss": 1.234, + "step": 42819 + }, + { + "epoch": 0.51, + "grad_norm": 41.80754851931265, + "learning_rate": 1.7406981145828142e-05, + "loss": 1.4312, + "step": 42822 + }, + { + "epoch": 0.51, + "grad_norm": 18.17947825120573, + "learning_rate": 1.7406588660182796e-05, + "loss": 1.485, + "step": 42825 + }, + { + "epoch": 0.51, + "grad_norm": 13.168500622372363, + "learning_rate": 1.740619614926143e-05, + "loss": 1.2394, + "step": 42828 + }, + { + "epoch": 0.52, + "grad_norm": 9.511035563258064, + "learning_rate": 1.7405803613065385e-05, + "loss": 1.4679, + "step": 42831 + }, + { + "epoch": 0.52, + "grad_norm": 17.716796183232738, + "learning_rate": 1.7405411051596e-05, + "loss": 1.43, + "step": 42834 + }, + { + "epoch": 0.52, + "grad_norm": 7.958170982752354, + "learning_rate": 1.740501846485462e-05, + "loss": 1.6142, + "step": 42837 + }, + { + "epoch": 0.52, + "grad_norm": 32.99401137890155, + "learning_rate": 1.740462585284257e-05, + "loss": 1.2826, + "step": 42840 + }, + { + "epoch": 0.52, + "grad_norm": 7.955326342501415, + "learning_rate": 1.7404233215561207e-05, + "loss": 1.6957, + "step": 42843 + }, + { + "epoch": 0.52, + "grad_norm": 5.479760144757501, + "learning_rate": 1.7403840553011857e-05, + "loss": 1.742, + "step": 42846 + }, + { + "epoch": 0.52, + "grad_norm": 10.902095623997791, + "learning_rate": 1.740344786519587e-05, + "loss": 1.7711, + "step": 42849 + }, + { + "epoch": 0.52, + "grad_norm": 17.26624106640756, + "learning_rate": 1.740305515211458e-05, + "loss": 1.631, + "step": 42852 + }, + { + "epoch": 0.52, + "grad_norm": 101.6299465833696, + "learning_rate": 1.740266241376933e-05, + "loss": 1.0808, + "step": 42855 + }, + { + "epoch": 0.52, + "grad_norm": 12.917130503289453, + "learning_rate": 1.740226965016146e-05, + "loss": 1.3245, + "step": 42858 + }, + { + "epoch": 0.52, + "grad_norm": 9.788534873066334, + "learning_rate": 1.740187686129231e-05, + "loss": 1.6445, + "step": 42861 + }, + { + "epoch": 0.52, + "grad_norm": 38.575300908442806, + "learning_rate": 1.740148404716322e-05, + "loss": 1.2423, + "step": 42864 + }, + { + "epoch": 0.52, + "grad_norm": 12.0823438338717, + "learning_rate": 1.7401091207775532e-05, + "loss": 1.3444, + "step": 42867 + }, + { + "epoch": 0.52, + "grad_norm": 9.54601972235295, + "learning_rate": 1.7400698343130586e-05, + "loss": 1.7476, + "step": 42870 + }, + { + "epoch": 0.52, + "grad_norm": 24.931190402056938, + "learning_rate": 1.740030545322972e-05, + "loss": 1.5876, + "step": 42873 + }, + { + "epoch": 0.52, + "grad_norm": 11.619186896536227, + "learning_rate": 1.7399912538074277e-05, + "loss": 1.7109, + "step": 42876 + }, + { + "epoch": 0.52, + "grad_norm": 16.71773729191073, + "learning_rate": 1.73995195976656e-05, + "loss": 1.1912, + "step": 42879 + }, + { + "epoch": 0.52, + "grad_norm": 30.331432413234364, + "learning_rate": 1.7399126632005026e-05, + "loss": 1.7244, + "step": 42882 + }, + { + "epoch": 0.52, + "grad_norm": 42.57851597372287, + "learning_rate": 1.73987336410939e-05, + "loss": 1.1638, + "step": 42885 + }, + { + "epoch": 0.52, + "grad_norm": 11.428478352449105, + "learning_rate": 1.739834062493356e-05, + "loss": 1.9506, + "step": 42888 + }, + { + "epoch": 0.52, + "grad_norm": 24.837571744956495, + "learning_rate": 1.7397947583525348e-05, + "loss": 1.8591, + "step": 42891 + }, + { + "epoch": 0.52, + "grad_norm": 7.13347355140137, + "learning_rate": 1.7397554516870606e-05, + "loss": 1.5251, + "step": 42894 + }, + { + "epoch": 0.52, + "grad_norm": 30.691413706580395, + "learning_rate": 1.7397161424970673e-05, + "loss": 1.5588, + "step": 42897 + }, + { + "epoch": 0.52, + "grad_norm": 16.443574160710867, + "learning_rate": 1.7396768307826896e-05, + "loss": 1.4787, + "step": 42900 + }, + { + "epoch": 0.52, + "grad_norm": 24.500548698988574, + "learning_rate": 1.7396375165440612e-05, + "loss": 1.3846, + "step": 42903 + }, + { + "epoch": 0.52, + "grad_norm": 35.81666828691776, + "learning_rate": 1.7395981997813162e-05, + "loss": 1.5517, + "step": 42906 + }, + { + "epoch": 0.52, + "grad_norm": 4.605606122791725, + "learning_rate": 1.739558880494589e-05, + "loss": 1.5732, + "step": 42909 + }, + { + "epoch": 0.52, + "grad_norm": 39.94859477483967, + "learning_rate": 1.739519558684014e-05, + "loss": 1.3485, + "step": 42912 + }, + { + "epoch": 0.52, + "grad_norm": 11.828975857254271, + "learning_rate": 1.739480234349725e-05, + "loss": 1.9423, + "step": 42915 + }, + { + "epoch": 0.52, + "grad_norm": 20.314721362000494, + "learning_rate": 1.7394409074918562e-05, + "loss": 1.9263, + "step": 42918 + }, + { + "epoch": 0.52, + "grad_norm": 30.160547413704748, + "learning_rate": 1.739401578110542e-05, + "loss": 1.1424, + "step": 42921 + }, + { + "epoch": 0.52, + "grad_norm": 4.909551356300639, + "learning_rate": 1.7393622462059162e-05, + "loss": 1.4313, + "step": 42924 + }, + { + "epoch": 0.52, + "grad_norm": 7.532816899493961, + "learning_rate": 1.7393229117781138e-05, + "loss": 1.1548, + "step": 42927 + }, + { + "epoch": 0.52, + "grad_norm": 25.77018854176208, + "learning_rate": 1.7392835748272686e-05, + "loss": 1.1888, + "step": 42930 + }, + { + "epoch": 0.52, + "grad_norm": 2.6243192255238372, + "learning_rate": 1.7392442353535145e-05, + "loss": 1.3698, + "step": 42933 + }, + { + "epoch": 0.52, + "grad_norm": 21.114724987473647, + "learning_rate": 1.7392048933569865e-05, + "loss": 1.4751, + "step": 42936 + }, + { + "epoch": 0.52, + "grad_norm": 20.72875581940713, + "learning_rate": 1.7391655488378183e-05, + "loss": 1.5706, + "step": 42939 + }, + { + "epoch": 0.52, + "grad_norm": 11.792788119326447, + "learning_rate": 1.7391262017961442e-05, + "loss": 1.1077, + "step": 42942 + }, + { + "epoch": 0.52, + "grad_norm": 14.0608947803046, + "learning_rate": 1.739086852232099e-05, + "loss": 1.5553, + "step": 42945 + }, + { + "epoch": 0.52, + "grad_norm": 5.386520593480524, + "learning_rate": 1.739047500145816e-05, + "loss": 1.9335, + "step": 42948 + }, + { + "epoch": 0.52, + "grad_norm": 60.55743694517957, + "learning_rate": 1.7390081455374304e-05, + "loss": 1.3881, + "step": 42951 + }, + { + "epoch": 0.52, + "grad_norm": 19.3812355116179, + "learning_rate": 1.7389687884070762e-05, + "loss": 1.1655, + "step": 42954 + }, + { + "epoch": 0.52, + "grad_norm": 3.1386368773264848, + "learning_rate": 1.7389294287548878e-05, + "loss": 1.4504, + "step": 42957 + }, + { + "epoch": 0.52, + "grad_norm": 6.744862375600886, + "learning_rate": 1.7388900665809993e-05, + "loss": 1.016, + "step": 42960 + }, + { + "epoch": 0.52, + "grad_norm": 19.4536310926549, + "learning_rate": 1.7388507018855452e-05, + "loss": 1.4661, + "step": 42963 + }, + { + "epoch": 0.52, + "grad_norm": 8.556090567284496, + "learning_rate": 1.7388113346686597e-05, + "loss": 1.4667, + "step": 42966 + }, + { + "epoch": 0.52, + "grad_norm": 4.149352751531919, + "learning_rate": 1.7387719649304773e-05, + "loss": 1.3849, + "step": 42969 + }, + { + "epoch": 0.52, + "grad_norm": 13.01776553797818, + "learning_rate": 1.738732592671132e-05, + "loss": 1.1558, + "step": 42972 + }, + { + "epoch": 0.52, + "grad_norm": 18.02971825349433, + "learning_rate": 1.738693217890759e-05, + "loss": 1.3596, + "step": 42975 + }, + { + "epoch": 0.52, + "grad_norm": 8.561816173201859, + "learning_rate": 1.7386538405894916e-05, + "loss": 1.4265, + "step": 42978 + }, + { + "epoch": 0.52, + "grad_norm": 13.98974486566813, + "learning_rate": 1.738614460767465e-05, + "loss": 1.3714, + "step": 42981 + }, + { + "epoch": 0.52, + "grad_norm": 21.99976369870103, + "learning_rate": 1.7385750784248133e-05, + "loss": 1.7014, + "step": 42984 + }, + { + "epoch": 0.52, + "grad_norm": 10.7505917036121, + "learning_rate": 1.738535693561671e-05, + "loss": 1.4777, + "step": 42987 + }, + { + "epoch": 0.52, + "grad_norm": 2.97331653011921, + "learning_rate": 1.7384963061781722e-05, + "loss": 1.2627, + "step": 42990 + }, + { + "epoch": 0.52, + "grad_norm": 44.94051110833207, + "learning_rate": 1.7384569162744514e-05, + "loss": 1.2346, + "step": 42993 + }, + { + "epoch": 0.52, + "grad_norm": 16.998750931210534, + "learning_rate": 1.7384175238506436e-05, + "loss": 1.2942, + "step": 42996 + }, + { + "epoch": 0.52, + "grad_norm": 5.113424367373453, + "learning_rate": 1.738378128906882e-05, + "loss": 1.2412, + "step": 42999 + }, + { + "epoch": 0.52, + "grad_norm": 23.111451719959334, + "learning_rate": 1.7383387314433028e-05, + "loss": 1.4942, + "step": 43002 + }, + { + "epoch": 0.52, + "grad_norm": 18.75762869829131, + "learning_rate": 1.7382993314600387e-05, + "loss": 1.5035, + "step": 43005 + }, + { + "epoch": 0.52, + "grad_norm": 54.08780904051834, + "learning_rate": 1.738259928957225e-05, + "loss": 1.3723, + "step": 43008 + }, + { + "epoch": 0.52, + "grad_norm": 9.564338728964708, + "learning_rate": 1.7382205239349964e-05, + "loss": 1.3136, + "step": 43011 + }, + { + "epoch": 0.52, + "grad_norm": 4.4794855611378415, + "learning_rate": 1.7381811163934865e-05, + "loss": 1.4563, + "step": 43014 + }, + { + "epoch": 0.52, + "grad_norm": 12.612107571501612, + "learning_rate": 1.738141706332831e-05, + "loss": 1.2777, + "step": 43017 + }, + { + "epoch": 0.52, + "grad_norm": 13.851009222140227, + "learning_rate": 1.7381022937531633e-05, + "loss": 1.3872, + "step": 43020 + }, + { + "epoch": 0.52, + "grad_norm": 8.29742517106057, + "learning_rate": 1.7380628786546186e-05, + "loss": 1.3046, + "step": 43023 + }, + { + "epoch": 0.52, + "grad_norm": 17.496887813757503, + "learning_rate": 1.738023461037331e-05, + "loss": 1.3984, + "step": 43026 + }, + { + "epoch": 0.52, + "grad_norm": 21.085199836053434, + "learning_rate": 1.7379840409014353e-05, + "loss": 1.1765, + "step": 43029 + }, + { + "epoch": 0.52, + "grad_norm": 15.770099537173596, + "learning_rate": 1.7379446182470657e-05, + "loss": 1.3005, + "step": 43032 + }, + { + "epoch": 0.52, + "grad_norm": 9.618240866553682, + "learning_rate": 1.737905193074357e-05, + "loss": 1.7711, + "step": 43035 + }, + { + "epoch": 0.52, + "grad_norm": 9.472273469864094, + "learning_rate": 1.7378657653834438e-05, + "loss": 1.554, + "step": 43038 + }, + { + "epoch": 0.52, + "grad_norm": 14.353224523632965, + "learning_rate": 1.73782633517446e-05, + "loss": 1.3337, + "step": 43041 + }, + { + "epoch": 0.52, + "grad_norm": 2.8470916034937948, + "learning_rate": 1.737786902447541e-05, + "loss": 1.5227, + "step": 43044 + }, + { + "epoch": 0.52, + "grad_norm": 9.309271449492318, + "learning_rate": 1.7377474672028212e-05, + "loss": 1.193, + "step": 43047 + }, + { + "epoch": 0.52, + "grad_norm": 10.899699404913877, + "learning_rate": 1.737708029440435e-05, + "loss": 0.9727, + "step": 43050 + }, + { + "epoch": 0.52, + "grad_norm": 21.725449181486873, + "learning_rate": 1.737668589160517e-05, + "loss": 1.1811, + "step": 43053 + }, + { + "epoch": 0.52, + "grad_norm": 17.066295515462468, + "learning_rate": 1.737629146363202e-05, + "loss": 1.4485, + "step": 43056 + }, + { + "epoch": 0.52, + "grad_norm": 13.669446216488751, + "learning_rate": 1.737589701048624e-05, + "loss": 1.659, + "step": 43059 + }, + { + "epoch": 0.52, + "grad_norm": 30.468143043795905, + "learning_rate": 1.7375502532169183e-05, + "loss": 1.7011, + "step": 43062 + }, + { + "epoch": 0.52, + "grad_norm": 6.547797111557273, + "learning_rate": 1.737510802868219e-05, + "loss": 1.7514, + "step": 43065 + }, + { + "epoch": 0.52, + "grad_norm": 11.97101540748118, + "learning_rate": 1.7374713500026612e-05, + "loss": 1.778, + "step": 43068 + }, + { + "epoch": 0.52, + "grad_norm": 15.079035251786284, + "learning_rate": 1.737431894620379e-05, + "loss": 1.5886, + "step": 43071 + }, + { + "epoch": 0.52, + "grad_norm": 12.750282142032937, + "learning_rate": 1.7373924367215077e-05, + "loss": 1.5562, + "step": 43074 + }, + { + "epoch": 0.52, + "grad_norm": 4.119541776649533, + "learning_rate": 1.737352976306182e-05, + "loss": 1.8019, + "step": 43077 + }, + { + "epoch": 0.52, + "grad_norm": 35.53715322807233, + "learning_rate": 1.7373135133745354e-05, + "loss": 1.5173, + "step": 43080 + }, + { + "epoch": 0.52, + "grad_norm": 18.07382776498145, + "learning_rate": 1.7372740479267037e-05, + "loss": 1.5003, + "step": 43083 + }, + { + "epoch": 0.52, + "grad_norm": 7.440643023027325, + "learning_rate": 1.7372345799628212e-05, + "loss": 1.2912, + "step": 43086 + }, + { + "epoch": 0.52, + "grad_norm": 60.59603094052349, + "learning_rate": 1.737195109483023e-05, + "loss": 1.5631, + "step": 43089 + }, + { + "epoch": 0.52, + "grad_norm": 5.997421345083076, + "learning_rate": 1.737155636487443e-05, + "loss": 1.4099, + "step": 43092 + }, + { + "epoch": 0.52, + "grad_norm": 42.831306370337934, + "learning_rate": 1.7371161609762163e-05, + "loss": 1.356, + "step": 43095 + }, + { + "epoch": 0.52, + "grad_norm": 17.20961572433317, + "learning_rate": 1.737076682949478e-05, + "loss": 1.5506, + "step": 43098 + }, + { + "epoch": 0.52, + "grad_norm": 15.944712947749858, + "learning_rate": 1.737037202407362e-05, + "loss": 1.2456, + "step": 43101 + }, + { + "epoch": 0.52, + "grad_norm": 10.846039386327915, + "learning_rate": 1.7369977193500037e-05, + "loss": 1.6029, + "step": 43104 + }, + { + "epoch": 0.52, + "grad_norm": 4.262961057943815, + "learning_rate": 1.736958233777538e-05, + "loss": 1.3533, + "step": 43107 + }, + { + "epoch": 0.52, + "grad_norm": 17.826393273292183, + "learning_rate": 1.736918745690099e-05, + "loss": 1.4099, + "step": 43110 + }, + { + "epoch": 0.52, + "grad_norm": 11.40241096876707, + "learning_rate": 1.7368792550878218e-05, + "loss": 1.1306, + "step": 43113 + }, + { + "epoch": 0.52, + "grad_norm": 42.348099414104404, + "learning_rate": 1.736839761970841e-05, + "loss": 1.6032, + "step": 43116 + }, + { + "epoch": 0.52, + "grad_norm": 25.90374789550767, + "learning_rate": 1.7368002663392917e-05, + "loss": 1.2795, + "step": 43119 + }, + { + "epoch": 0.52, + "grad_norm": 9.236004255151302, + "learning_rate": 1.7367607681933082e-05, + "loss": 1.5263, + "step": 43122 + }, + { + "epoch": 0.52, + "grad_norm": 17.4838531885245, + "learning_rate": 1.736721267533026e-05, + "loss": 1.6375, + "step": 43125 + }, + { + "epoch": 0.52, + "grad_norm": 19.78011901545971, + "learning_rate": 1.7366817643585792e-05, + "loss": 1.308, + "step": 43128 + }, + { + "epoch": 0.52, + "grad_norm": 33.635200623934026, + "learning_rate": 1.7366422586701027e-05, + "loss": 1.653, + "step": 43131 + }, + { + "epoch": 0.52, + "grad_norm": 19.349008029656986, + "learning_rate": 1.736602750467732e-05, + "loss": 1.2971, + "step": 43134 + }, + { + "epoch": 0.52, + "grad_norm": 10.386604849398116, + "learning_rate": 1.736563239751601e-05, + "loss": 1.5696, + "step": 43137 + }, + { + "epoch": 0.52, + "grad_norm": 8.296657205993688, + "learning_rate": 1.7365237265218453e-05, + "loss": 1.1048, + "step": 43140 + }, + { + "epoch": 0.52, + "grad_norm": 9.20133229786202, + "learning_rate": 1.736484210778599e-05, + "loss": 1.1751, + "step": 43143 + }, + { + "epoch": 0.52, + "grad_norm": 20.35245881985744, + "learning_rate": 1.736444692521998e-05, + "loss": 1.3651, + "step": 43146 + }, + { + "epoch": 0.52, + "grad_norm": 2.969800545188767, + "learning_rate": 1.736405171752176e-05, + "loss": 1.9683, + "step": 43149 + }, + { + "epoch": 0.52, + "grad_norm": 28.69207850877161, + "learning_rate": 1.7363656484692684e-05, + "loss": 1.4844, + "step": 43152 + }, + { + "epoch": 0.52, + "grad_norm": 15.377416089534746, + "learning_rate": 1.73632612267341e-05, + "loss": 1.3693, + "step": 43155 + }, + { + "epoch": 0.52, + "grad_norm": 7.49885861555719, + "learning_rate": 1.7362865943647365e-05, + "loss": 1.811, + "step": 43158 + }, + { + "epoch": 0.52, + "grad_norm": 24.68251801861401, + "learning_rate": 1.7362470635433814e-05, + "loss": 1.4472, + "step": 43161 + }, + { + "epoch": 0.52, + "grad_norm": 18.288086305711165, + "learning_rate": 1.7362075302094805e-05, + "loss": 1.2829, + "step": 43164 + }, + { + "epoch": 0.52, + "grad_norm": 5.660380010980221, + "learning_rate": 1.7361679943631684e-05, + "loss": 1.237, + "step": 43167 + }, + { + "epoch": 0.52, + "grad_norm": 9.400317524324722, + "learning_rate": 1.73612845600458e-05, + "loss": 1.4821, + "step": 43170 + }, + { + "epoch": 0.52, + "grad_norm": 10.390415631133827, + "learning_rate": 1.7360889151338508e-05, + "loss": 1.3935, + "step": 43173 + }, + { + "epoch": 0.52, + "grad_norm": 9.969129047083513, + "learning_rate": 1.7360493717511145e-05, + "loss": 1.3127, + "step": 43176 + }, + { + "epoch": 0.52, + "grad_norm": 7.66877301502384, + "learning_rate": 1.7360098258565075e-05, + "loss": 1.6496, + "step": 43179 + }, + { + "epoch": 0.52, + "grad_norm": 37.687327595481115, + "learning_rate": 1.735970277450164e-05, + "loss": 1.439, + "step": 43182 + }, + { + "epoch": 0.52, + "grad_norm": 7.872503688920751, + "learning_rate": 1.735930726532219e-05, + "loss": 1.1851, + "step": 43185 + }, + { + "epoch": 0.52, + "grad_norm": 18.247626594742187, + "learning_rate": 1.735891173102807e-05, + "loss": 1.4011, + "step": 43188 + }, + { + "epoch": 0.52, + "grad_norm": 20.43534746808547, + "learning_rate": 1.735851617162064e-05, + "loss": 1.4196, + "step": 43191 + }, + { + "epoch": 0.52, + "grad_norm": 19.144403860291362, + "learning_rate": 1.735812058710124e-05, + "loss": 1.5618, + "step": 43194 + }, + { + "epoch": 0.52, + "grad_norm": 30.17809687968012, + "learning_rate": 1.735772497747123e-05, + "loss": 1.0994, + "step": 43197 + }, + { + "epoch": 0.52, + "grad_norm": 39.29287375929313, + "learning_rate": 1.7357329342731953e-05, + "loss": 1.5895, + "step": 43200 + }, + { + "epoch": 0.52, + "grad_norm": 17.763417841826115, + "learning_rate": 1.7356933682884762e-05, + "loss": 1.5478, + "step": 43203 + }, + { + "epoch": 0.52, + "grad_norm": 7.895165996899754, + "learning_rate": 1.7356537997931003e-05, + "loss": 1.4662, + "step": 43206 + }, + { + "epoch": 0.52, + "grad_norm": 10.482716261575431, + "learning_rate": 1.7356142287872034e-05, + "loss": 1.6533, + "step": 43209 + }, + { + "epoch": 0.52, + "grad_norm": 11.061029367588878, + "learning_rate": 1.7355746552709198e-05, + "loss": 1.543, + "step": 43212 + }, + { + "epoch": 0.52, + "grad_norm": 21.498010233611254, + "learning_rate": 1.735535079244385e-05, + "loss": 1.6731, + "step": 43215 + }, + { + "epoch": 0.52, + "grad_norm": 9.217414977167566, + "learning_rate": 1.735495500707734e-05, + "loss": 1.3106, + "step": 43218 + }, + { + "epoch": 0.52, + "grad_norm": 9.944856606333248, + "learning_rate": 1.7354559196611017e-05, + "loss": 1.4215, + "step": 43221 + }, + { + "epoch": 0.52, + "grad_norm": 81.36572815757556, + "learning_rate": 1.735416336104623e-05, + "loss": 1.4397, + "step": 43224 + }, + { + "epoch": 0.52, + "grad_norm": 13.691105353698537, + "learning_rate": 1.7353767500384337e-05, + "loss": 1.0972, + "step": 43227 + }, + { + "epoch": 0.52, + "grad_norm": 7.985386097912337, + "learning_rate": 1.7353371614626683e-05, + "loss": 1.2635, + "step": 43230 + }, + { + "epoch": 0.52, + "grad_norm": 10.188626621449536, + "learning_rate": 1.7352975703774617e-05, + "loss": 1.2423, + "step": 43233 + }, + { + "epoch": 0.52, + "grad_norm": 32.05942861684462, + "learning_rate": 1.7352579767829495e-05, + "loss": 1.5877, + "step": 43236 + }, + { + "epoch": 0.52, + "grad_norm": 10.575105248366489, + "learning_rate": 1.7352183806792668e-05, + "loss": 1.4629, + "step": 43239 + }, + { + "epoch": 0.52, + "grad_norm": 3.7540535579233807, + "learning_rate": 1.7351787820665486e-05, + "loss": 1.2359, + "step": 43242 + }, + { + "epoch": 0.52, + "grad_norm": 15.987868862232675, + "learning_rate": 1.73513918094493e-05, + "loss": 1.3622, + "step": 43245 + }, + { + "epoch": 0.52, + "grad_norm": 9.529604628363035, + "learning_rate": 1.735099577314546e-05, + "loss": 1.2097, + "step": 43248 + }, + { + "epoch": 0.52, + "grad_norm": 12.36502481664389, + "learning_rate": 1.735059971175532e-05, + "loss": 1.5287, + "step": 43251 + }, + { + "epoch": 0.52, + "grad_norm": 16.48175101583092, + "learning_rate": 1.7350203625280233e-05, + "loss": 1.4971, + "step": 43254 + }, + { + "epoch": 0.52, + "grad_norm": 6.525452793378306, + "learning_rate": 1.7349807513721546e-05, + "loss": 1.2228, + "step": 43257 + }, + { + "epoch": 0.52, + "grad_norm": 22.033369264368492, + "learning_rate": 1.7349411377080613e-05, + "loss": 1.1695, + "step": 43260 + }, + { + "epoch": 0.52, + "grad_norm": 17.893310351376567, + "learning_rate": 1.7349015215358788e-05, + "loss": 1.8127, + "step": 43263 + }, + { + "epoch": 0.52, + "grad_norm": 11.67912363903375, + "learning_rate": 1.734861902855742e-05, + "loss": 1.5285, + "step": 43266 + }, + { + "epoch": 0.52, + "grad_norm": 18.19134493105683, + "learning_rate": 1.7348222816677866e-05, + "loss": 1.8057, + "step": 43269 + }, + { + "epoch": 0.52, + "grad_norm": 17.165524499982766, + "learning_rate": 1.734782657972147e-05, + "loss": 1.4217, + "step": 43272 + }, + { + "epoch": 0.52, + "grad_norm": 3.4440647869755345, + "learning_rate": 1.7347430317689588e-05, + "loss": 1.2797, + "step": 43275 + }, + { + "epoch": 0.52, + "grad_norm": 39.94852072554385, + "learning_rate": 1.7347034030583576e-05, + "loss": 1.3109, + "step": 43278 + }, + { + "epoch": 0.52, + "grad_norm": 9.971244999503487, + "learning_rate": 1.7346637718404783e-05, + "loss": 1.6772, + "step": 43281 + }, + { + "epoch": 0.52, + "grad_norm": 22.033386220734904, + "learning_rate": 1.734624138115456e-05, + "loss": 1.4043, + "step": 43284 + }, + { + "epoch": 0.52, + "grad_norm": 10.828741112544193, + "learning_rate": 1.734584501883426e-05, + "loss": 1.2061, + "step": 43287 + }, + { + "epoch": 0.52, + "grad_norm": 79.50404811957371, + "learning_rate": 1.734544863144524e-05, + "loss": 1.9688, + "step": 43290 + }, + { + "epoch": 0.52, + "grad_norm": 6.026253744821807, + "learning_rate": 1.734505221898885e-05, + "loss": 1.5737, + "step": 43293 + }, + { + "epoch": 0.52, + "grad_norm": 25.5703471693355, + "learning_rate": 1.7344655781466438e-05, + "loss": 1.7741, + "step": 43296 + }, + { + "epoch": 0.52, + "grad_norm": 12.355846375331978, + "learning_rate": 1.7344259318879367e-05, + "loss": 1.1852, + "step": 43299 + }, + { + "epoch": 0.52, + "grad_norm": 15.399832866048273, + "learning_rate": 1.734386283122898e-05, + "loss": 1.6037, + "step": 43302 + }, + { + "epoch": 0.52, + "grad_norm": 34.924846569034386, + "learning_rate": 1.7343466318516634e-05, + "loss": 1.4278, + "step": 43305 + }, + { + "epoch": 0.52, + "grad_norm": 19.18134330262153, + "learning_rate": 1.7343069780743683e-05, + "loss": 1.4913, + "step": 43308 + }, + { + "epoch": 0.52, + "grad_norm": 19.511735991494028, + "learning_rate": 1.7342673217911486e-05, + "loss": 1.6464, + "step": 43311 + }, + { + "epoch": 0.52, + "grad_norm": 18.66898810280927, + "learning_rate": 1.7342276630021384e-05, + "loss": 1.4927, + "step": 43314 + }, + { + "epoch": 0.52, + "grad_norm": 80.76024031478485, + "learning_rate": 1.734188001707474e-05, + "loss": 1.2266, + "step": 43317 + }, + { + "epoch": 0.52, + "grad_norm": 22.409445985290024, + "learning_rate": 1.7341483379072902e-05, + "loss": 1.7816, + "step": 43320 + }, + { + "epoch": 0.52, + "grad_norm": 9.760773091131778, + "learning_rate": 1.7341086716017222e-05, + "loss": 1.462, + "step": 43323 + }, + { + "epoch": 0.52, + "grad_norm": 26.33474504369664, + "learning_rate": 1.7340690027909062e-05, + "loss": 1.3943, + "step": 43326 + }, + { + "epoch": 0.52, + "grad_norm": 47.87899725353082, + "learning_rate": 1.734029331474977e-05, + "loss": 1.6061, + "step": 43329 + }, + { + "epoch": 0.52, + "grad_norm": 18.338740389866796, + "learning_rate": 1.7339896576540703e-05, + "loss": 1.5203, + "step": 43332 + }, + { + "epoch": 0.52, + "grad_norm": 10.030985723117675, + "learning_rate": 1.733949981328321e-05, + "loss": 1.2219, + "step": 43335 + }, + { + "epoch": 0.52, + "grad_norm": 21.880059729283424, + "learning_rate": 1.733910302497865e-05, + "loss": 1.4208, + "step": 43338 + }, + { + "epoch": 0.52, + "grad_norm": 13.179010899981186, + "learning_rate": 1.733870621162837e-05, + "loss": 1.5968, + "step": 43341 + }, + { + "epoch": 0.52, + "grad_norm": 6.870436732527547, + "learning_rate": 1.7338309373233736e-05, + "loss": 1.632, + "step": 43344 + }, + { + "epoch": 0.52, + "grad_norm": 18.7880968331432, + "learning_rate": 1.7337912509796088e-05, + "loss": 1.5117, + "step": 43347 + }, + { + "epoch": 0.52, + "grad_norm": 5.516863894317102, + "learning_rate": 1.7337515621316792e-05, + "loss": 1.2209, + "step": 43350 + }, + { + "epoch": 0.52, + "grad_norm": 17.681134755885388, + "learning_rate": 1.7337118707797197e-05, + "loss": 1.4766, + "step": 43353 + }, + { + "epoch": 0.52, + "grad_norm": 4.422608555241051, + "learning_rate": 1.733672176923866e-05, + "loss": 1.5675, + "step": 43356 + }, + { + "epoch": 0.52, + "grad_norm": 5.29288047375769, + "learning_rate": 1.7336324805642534e-05, + "loss": 1.7949, + "step": 43359 + }, + { + "epoch": 0.52, + "grad_norm": 5.775604522461554, + "learning_rate": 1.733592781701017e-05, + "loss": 1.454, + "step": 43362 + }, + { + "epoch": 0.52, + "grad_norm": 14.764450289764165, + "learning_rate": 1.7335530803342933e-05, + "loss": 1.9208, + "step": 43365 + }, + { + "epoch": 0.52, + "grad_norm": 3.2334625712051652, + "learning_rate": 1.733513376464217e-05, + "loss": 1.1638, + "step": 43368 + }, + { + "epoch": 0.52, + "grad_norm": 93.35626430366165, + "learning_rate": 1.7334736700909237e-05, + "loss": 1.4897, + "step": 43371 + }, + { + "epoch": 0.52, + "grad_norm": 12.065193193334187, + "learning_rate": 1.7334339612145488e-05, + "loss": 1.2077, + "step": 43374 + }, + { + "epoch": 0.52, + "grad_norm": 17.489479286716836, + "learning_rate": 1.733394249835228e-05, + "loss": 1.6617, + "step": 43377 + }, + { + "epoch": 0.52, + "grad_norm": 7.828317970374133, + "learning_rate": 1.733354535953097e-05, + "loss": 1.7746, + "step": 43380 + }, + { + "epoch": 0.52, + "grad_norm": 12.16850621468438, + "learning_rate": 1.733314819568291e-05, + "loss": 1.5521, + "step": 43383 + }, + { + "epoch": 0.52, + "grad_norm": 21.81795844246213, + "learning_rate": 1.7332751006809458e-05, + "loss": 1.2324, + "step": 43386 + }, + { + "epoch": 0.52, + "grad_norm": 23.906281574883693, + "learning_rate": 1.7332353792911965e-05, + "loss": 1.0203, + "step": 43389 + }, + { + "epoch": 0.52, + "grad_norm": 12.833810122917228, + "learning_rate": 1.733195655399179e-05, + "loss": 1.3266, + "step": 43392 + }, + { + "epoch": 0.52, + "grad_norm": 9.6369789703061, + "learning_rate": 1.733155929005029e-05, + "loss": 1.6918, + "step": 43395 + }, + { + "epoch": 0.52, + "grad_norm": 7.298161366101971, + "learning_rate": 1.733116200108882e-05, + "loss": 1.4976, + "step": 43398 + }, + { + "epoch": 0.52, + "grad_norm": 12.219772601567422, + "learning_rate": 1.7330764687108734e-05, + "loss": 1.1917, + "step": 43401 + }, + { + "epoch": 0.52, + "grad_norm": 12.072511767524025, + "learning_rate": 1.7330367348111384e-05, + "loss": 1.3698, + "step": 43404 + }, + { + "epoch": 0.52, + "grad_norm": 7.111468685701827, + "learning_rate": 1.7329969984098135e-05, + "loss": 1.3626, + "step": 43407 + }, + { + "epoch": 0.52, + "grad_norm": 9.740940023886237, + "learning_rate": 1.732957259507034e-05, + "loss": 1.6881, + "step": 43410 + }, + { + "epoch": 0.52, + "grad_norm": 8.538964897297653, + "learning_rate": 1.732917518102935e-05, + "loss": 1.4085, + "step": 43413 + }, + { + "epoch": 0.52, + "grad_norm": 19.92580435686441, + "learning_rate": 1.7328777741976528e-05, + "loss": 1.4091, + "step": 43416 + }, + { + "epoch": 0.52, + "grad_norm": 40.20050698602851, + "learning_rate": 1.7328380277913228e-05, + "loss": 1.6988, + "step": 43419 + }, + { + "epoch": 0.52, + "grad_norm": 27.48767244075801, + "learning_rate": 1.7327982788840803e-05, + "loss": 1.6029, + "step": 43422 + }, + { + "epoch": 0.52, + "grad_norm": 13.092480670388365, + "learning_rate": 1.732758527476061e-05, + "loss": 1.9657, + "step": 43425 + }, + { + "epoch": 0.52, + "grad_norm": 15.702490492380521, + "learning_rate": 1.7327187735674017e-05, + "loss": 1.09, + "step": 43428 + }, + { + "epoch": 0.52, + "grad_norm": 15.03731574627313, + "learning_rate": 1.7326790171582364e-05, + "loss": 1.4176, + "step": 43431 + }, + { + "epoch": 0.52, + "grad_norm": 38.332617586902984, + "learning_rate": 1.7326392582487016e-05, + "loss": 1.3434, + "step": 43434 + }, + { + "epoch": 0.52, + "grad_norm": 34.09988733509042, + "learning_rate": 1.732599496838933e-05, + "loss": 1.4836, + "step": 43437 + }, + { + "epoch": 0.52, + "grad_norm": 6.819716351378554, + "learning_rate": 1.7325597329290662e-05, + "loss": 1.4862, + "step": 43440 + }, + { + "epoch": 0.52, + "grad_norm": 124.0648078932187, + "learning_rate": 1.732519966519237e-05, + "loss": 1.4528, + "step": 43443 + }, + { + "epoch": 0.52, + "grad_norm": 53.03584490469922, + "learning_rate": 1.732480197609581e-05, + "loss": 1.3748, + "step": 43446 + }, + { + "epoch": 0.52, + "grad_norm": 9.385440484463059, + "learning_rate": 1.7324404262002338e-05, + "loss": 1.3194, + "step": 43449 + }, + { + "epoch": 0.52, + "grad_norm": 16.054235866376306, + "learning_rate": 1.7324006522913312e-05, + "loss": 1.4977, + "step": 43452 + }, + { + "epoch": 0.52, + "grad_norm": 38.729873990540916, + "learning_rate": 1.7323608758830092e-05, + "loss": 1.8236, + "step": 43455 + }, + { + "epoch": 0.52, + "grad_norm": 86.69871530736687, + "learning_rate": 1.7323210969754033e-05, + "loss": 1.672, + "step": 43458 + }, + { + "epoch": 0.52, + "grad_norm": 20.43300006221696, + "learning_rate": 1.732281315568649e-05, + "loss": 1.6269, + "step": 43461 + }, + { + "epoch": 0.52, + "grad_norm": 68.27169494437508, + "learning_rate": 1.7322415316628823e-05, + "loss": 1.2213, + "step": 43464 + }, + { + "epoch": 0.52, + "grad_norm": 5.153011420826823, + "learning_rate": 1.7322017452582397e-05, + "loss": 1.3653, + "step": 43467 + }, + { + "epoch": 0.52, + "grad_norm": 36.07969222550072, + "learning_rate": 1.7321619563548556e-05, + "loss": 1.8179, + "step": 43470 + }, + { + "epoch": 0.52, + "grad_norm": 16.466713610630496, + "learning_rate": 1.7321221649528666e-05, + "loss": 1.3039, + "step": 43473 + }, + { + "epoch": 0.52, + "grad_norm": 6.626840520841397, + "learning_rate": 1.7320823710524086e-05, + "loss": 1.323, + "step": 43476 + }, + { + "epoch": 0.52, + "grad_norm": 97.85444800128879, + "learning_rate": 1.7320425746536167e-05, + "loss": 1.2924, + "step": 43479 + }, + { + "epoch": 0.52, + "grad_norm": 7.124290680244137, + "learning_rate": 1.7320027757566275e-05, + "loss": 1.4144, + "step": 43482 + }, + { + "epoch": 0.52, + "grad_norm": 70.28968661005882, + "learning_rate": 1.7319629743615766e-05, + "loss": 1.6227, + "step": 43485 + }, + { + "epoch": 0.52, + "grad_norm": 14.702822063875821, + "learning_rate": 1.7319231704685994e-05, + "loss": 1.6771, + "step": 43488 + }, + { + "epoch": 0.52, + "grad_norm": 4.618727335855211, + "learning_rate": 1.7318833640778322e-05, + "loss": 1.3752, + "step": 43491 + }, + { + "epoch": 0.52, + "grad_norm": 18.753464794530483, + "learning_rate": 1.7318435551894108e-05, + "loss": 1.3562, + "step": 43494 + }, + { + "epoch": 0.52, + "grad_norm": 26.197294301385675, + "learning_rate": 1.731803743803471e-05, + "loss": 1.5215, + "step": 43497 + }, + { + "epoch": 0.52, + "grad_norm": 12.067787289516238, + "learning_rate": 1.7317639299201485e-05, + "loss": 1.3344, + "step": 43500 + }, + { + "epoch": 0.52, + "grad_norm": 11.101644884527401, + "learning_rate": 1.7317241135395794e-05, + "loss": 1.2855, + "step": 43503 + }, + { + "epoch": 0.52, + "grad_norm": 9.13974493827986, + "learning_rate": 1.7316842946618992e-05, + "loss": 1.2771, + "step": 43506 + }, + { + "epoch": 0.52, + "grad_norm": 2.3986638598761445, + "learning_rate": 1.7316444732872445e-05, + "loss": 1.2741, + "step": 43509 + }, + { + "epoch": 0.52, + "grad_norm": 11.862946070420243, + "learning_rate": 1.73160464941575e-05, + "loss": 1.4479, + "step": 43512 + }, + { + "epoch": 0.52, + "grad_norm": 16.869874263115072, + "learning_rate": 1.731564823047553e-05, + "loss": 1.0878, + "step": 43515 + }, + { + "epoch": 0.52, + "grad_norm": 5.617792605155777, + "learning_rate": 1.7315249941827885e-05, + "loss": 1.47, + "step": 43518 + }, + { + "epoch": 0.52, + "grad_norm": 5.645925683627124, + "learning_rate": 1.731485162821593e-05, + "loss": 1.2702, + "step": 43521 + }, + { + "epoch": 0.52, + "grad_norm": 16.314883974956327, + "learning_rate": 1.731445328964102e-05, + "loss": 1.4558, + "step": 43524 + }, + { + "epoch": 0.52, + "grad_norm": 14.267111205048508, + "learning_rate": 1.731405492610451e-05, + "loss": 1.2164, + "step": 43527 + }, + { + "epoch": 0.52, + "grad_norm": 11.37738362644344, + "learning_rate": 1.731365653760777e-05, + "loss": 1.6679, + "step": 43530 + }, + { + "epoch": 0.52, + "grad_norm": 41.356808528446784, + "learning_rate": 1.7313258124152154e-05, + "loss": 1.533, + "step": 43533 + }, + { + "epoch": 0.52, + "grad_norm": 37.301307325414605, + "learning_rate": 1.7312859685739022e-05, + "loss": 1.3567, + "step": 43536 + }, + { + "epoch": 0.52, + "grad_norm": 11.598589718635072, + "learning_rate": 1.7312461222369737e-05, + "loss": 1.2545, + "step": 43539 + }, + { + "epoch": 0.52, + "grad_norm": 20.546090494929572, + "learning_rate": 1.7312062734045653e-05, + "loss": 1.6041, + "step": 43542 + }, + { + "epoch": 0.52, + "grad_norm": 20.023098619562607, + "learning_rate": 1.731166422076813e-05, + "loss": 1.2157, + "step": 43545 + }, + { + "epoch": 0.52, + "grad_norm": 4.322699647894746, + "learning_rate": 1.7311265682538535e-05, + "loss": 1.6184, + "step": 43548 + }, + { + "epoch": 0.52, + "grad_norm": 14.622771558353516, + "learning_rate": 1.7310867119358225e-05, + "loss": 1.3882, + "step": 43551 + }, + { + "epoch": 0.52, + "grad_norm": 23.8360718684409, + "learning_rate": 1.7310468531228553e-05, + "loss": 1.5167, + "step": 43554 + }, + { + "epoch": 0.52, + "grad_norm": 7.354346534216296, + "learning_rate": 1.7310069918150888e-05, + "loss": 1.5405, + "step": 43557 + }, + { + "epoch": 0.52, + "grad_norm": 5.552967326741971, + "learning_rate": 1.730967128012659e-05, + "loss": 1.4561, + "step": 43560 + }, + { + "epoch": 0.52, + "grad_norm": 24.877301623417104, + "learning_rate": 1.7309272617157015e-05, + "loss": 1.436, + "step": 43563 + }, + { + "epoch": 0.52, + "grad_norm": 16.456154844408186, + "learning_rate": 1.7308873929243525e-05, + "loss": 1.6573, + "step": 43566 + }, + { + "epoch": 0.52, + "grad_norm": 6.425402363972008, + "learning_rate": 1.730847521638748e-05, + "loss": 1.1757, + "step": 43569 + }, + { + "epoch": 0.52, + "grad_norm": 24.427021938209677, + "learning_rate": 1.730807647859024e-05, + "loss": 1.6908, + "step": 43572 + }, + { + "epoch": 0.52, + "grad_norm": 5.463157644402315, + "learning_rate": 1.730767771585317e-05, + "loss": 1.7515, + "step": 43575 + }, + { + "epoch": 0.52, + "grad_norm": 19.659035826737846, + "learning_rate": 1.730727892817763e-05, + "loss": 1.2106, + "step": 43578 + }, + { + "epoch": 0.52, + "grad_norm": 16.205762082063373, + "learning_rate": 1.7306880115564975e-05, + "loss": 1.9211, + "step": 43581 + }, + { + "epoch": 0.52, + "grad_norm": 16.028790061353167, + "learning_rate": 1.7306481278016575e-05, + "loss": 1.2571, + "step": 43584 + }, + { + "epoch": 0.52, + "grad_norm": 7.3378900471405855, + "learning_rate": 1.7306082415533782e-05, + "loss": 1.4578, + "step": 43587 + }, + { + "epoch": 0.52, + "grad_norm": 3.7013510494130606, + "learning_rate": 1.730568352811796e-05, + "loss": 1.605, + "step": 43590 + }, + { + "epoch": 0.52, + "grad_norm": 5.987339483355406, + "learning_rate": 1.7305284615770474e-05, + "loss": 1.431, + "step": 43593 + }, + { + "epoch": 0.52, + "grad_norm": 19.782753936027042, + "learning_rate": 1.7304885678492686e-05, + "loss": 1.4942, + "step": 43596 + }, + { + "epoch": 0.52, + "grad_norm": 8.812812908837525, + "learning_rate": 1.730448671628595e-05, + "loss": 1.6795, + "step": 43599 + }, + { + "epoch": 0.52, + "grad_norm": 43.314612750150495, + "learning_rate": 1.7304087729151633e-05, + "loss": 1.3846, + "step": 43602 + }, + { + "epoch": 0.52, + "grad_norm": 7.167969106062149, + "learning_rate": 1.73036887170911e-05, + "loss": 1.4677, + "step": 43605 + }, + { + "epoch": 0.52, + "grad_norm": 21.636839187657117, + "learning_rate": 1.7303289680105702e-05, + "loss": 1.3717, + "step": 43608 + }, + { + "epoch": 0.52, + "grad_norm": 20.502136672926394, + "learning_rate": 1.7302890618196808e-05, + "loss": 1.366, + "step": 43611 + }, + { + "epoch": 0.52, + "grad_norm": 9.953279178859045, + "learning_rate": 1.7302491531365778e-05, + "loss": 1.1905, + "step": 43614 + }, + { + "epoch": 0.52, + "grad_norm": 16.541782216941467, + "learning_rate": 1.730209241961398e-05, + "loss": 1.4647, + "step": 43617 + }, + { + "epoch": 0.52, + "grad_norm": 23.930689379451007, + "learning_rate": 1.7301693282942765e-05, + "loss": 1.4781, + "step": 43620 + }, + { + "epoch": 0.52, + "grad_norm": 14.42664854109964, + "learning_rate": 1.7301294121353506e-05, + "loss": 1.5782, + "step": 43623 + }, + { + "epoch": 0.52, + "grad_norm": 13.739969210284166, + "learning_rate": 1.7300894934847552e-05, + "loss": 1.4342, + "step": 43626 + }, + { + "epoch": 0.52, + "grad_norm": 8.088405081207279, + "learning_rate": 1.7300495723426284e-05, + "loss": 1.5558, + "step": 43629 + }, + { + "epoch": 0.52, + "grad_norm": 11.673506883426455, + "learning_rate": 1.7300096487091046e-05, + "loss": 1.732, + "step": 43632 + }, + { + "epoch": 0.52, + "grad_norm": 15.05580350776564, + "learning_rate": 1.729969722584321e-05, + "loss": 1.1165, + "step": 43635 + }, + { + "epoch": 0.52, + "grad_norm": 6.896737398921462, + "learning_rate": 1.729929793968414e-05, + "loss": 1.1809, + "step": 43638 + }, + { + "epoch": 0.52, + "grad_norm": 12.892898762423426, + "learning_rate": 1.7298898628615194e-05, + "loss": 1.2091, + "step": 43641 + }, + { + "epoch": 0.52, + "grad_norm": 90.89644406607076, + "learning_rate": 1.7298499292637735e-05, + "loss": 1.2303, + "step": 43644 + }, + { + "epoch": 0.52, + "grad_norm": 10.936595144016323, + "learning_rate": 1.7298099931753122e-05, + "loss": 1.4647, + "step": 43647 + }, + { + "epoch": 0.52, + "grad_norm": 60.893790656497224, + "learning_rate": 1.729770054596273e-05, + "loss": 1.142, + "step": 43650 + }, + { + "epoch": 0.52, + "grad_norm": 18.87914878113115, + "learning_rate": 1.7297301135267913e-05, + "loss": 1.4146, + "step": 43653 + }, + { + "epoch": 0.52, + "grad_norm": 32.31262712712846, + "learning_rate": 1.729690169967003e-05, + "loss": 1.5057, + "step": 43656 + }, + { + "epoch": 0.52, + "grad_norm": 10.245863403845345, + "learning_rate": 1.7296502239170457e-05, + "loss": 1.4826, + "step": 43659 + }, + { + "epoch": 0.53, + "grad_norm": 29.06772699932471, + "learning_rate": 1.7296102753770548e-05, + "loss": 1.46, + "step": 43662 + }, + { + "epoch": 0.53, + "grad_norm": 36.497860920791574, + "learning_rate": 1.7295703243471668e-05, + "loss": 1.9904, + "step": 43665 + }, + { + "epoch": 0.53, + "grad_norm": 5.885999454214276, + "learning_rate": 1.729530370827518e-05, + "loss": 1.2685, + "step": 43668 + }, + { + "epoch": 0.53, + "grad_norm": 6.929173693540508, + "learning_rate": 1.729490414818245e-05, + "loss": 1.5137, + "step": 43671 + }, + { + "epoch": 0.53, + "grad_norm": 9.6632258258062, + "learning_rate": 1.7294504563194835e-05, + "loss": 1.1309, + "step": 43674 + }, + { + "epoch": 0.53, + "grad_norm": 17.66538545834127, + "learning_rate": 1.7294104953313707e-05, + "loss": 1.4, + "step": 43677 + }, + { + "epoch": 0.53, + "grad_norm": 12.533600907021526, + "learning_rate": 1.7293705318540425e-05, + "loss": 1.3104, + "step": 43680 + }, + { + "epoch": 0.53, + "grad_norm": 12.535323830596004, + "learning_rate": 1.7293305658876355e-05, + "loss": 1.3501, + "step": 43683 + }, + { + "epoch": 0.53, + "grad_norm": 33.45729299614545, + "learning_rate": 1.7292905974322862e-05, + "loss": 1.3745, + "step": 43686 + }, + { + "epoch": 0.53, + "grad_norm": 5.147396455866746, + "learning_rate": 1.7292506264881305e-05, + "loss": 1.4747, + "step": 43689 + }, + { + "epoch": 0.53, + "grad_norm": 3.6513134367367908, + "learning_rate": 1.7292106530553052e-05, + "loss": 1.1938, + "step": 43692 + }, + { + "epoch": 0.53, + "grad_norm": 15.467907609515507, + "learning_rate": 1.7291706771339466e-05, + "loss": 1.8706, + "step": 43695 + }, + { + "epoch": 0.53, + "grad_norm": 11.779925686498688, + "learning_rate": 1.7291306987241913e-05, + "loss": 1.4156, + "step": 43698 + }, + { + "epoch": 0.53, + "grad_norm": 3.479531311105223, + "learning_rate": 1.729090717826175e-05, + "loss": 1.8732, + "step": 43701 + }, + { + "epoch": 0.53, + "grad_norm": 16.555177974064776, + "learning_rate": 1.7290507344400353e-05, + "loss": 1.8057, + "step": 43704 + }, + { + "epoch": 0.53, + "grad_norm": 16.214906546150686, + "learning_rate": 1.7290107485659074e-05, + "loss": 1.4741, + "step": 43707 + }, + { + "epoch": 0.53, + "grad_norm": 15.783620633799467, + "learning_rate": 1.728970760203929e-05, + "loss": 1.6948, + "step": 43710 + }, + { + "epoch": 0.53, + "grad_norm": 13.45562656963439, + "learning_rate": 1.7289307693542357e-05, + "loss": 1.5028, + "step": 43713 + }, + { + "epoch": 0.53, + "grad_norm": 24.170858126510627, + "learning_rate": 1.7288907760169643e-05, + "loss": 1.3937, + "step": 43716 + }, + { + "epoch": 0.53, + "grad_norm": 16.82177036781926, + "learning_rate": 1.728850780192251e-05, + "loss": 1.5055, + "step": 43719 + }, + { + "epoch": 0.53, + "grad_norm": 17.700512495378245, + "learning_rate": 1.728810781880233e-05, + "loss": 1.503, + "step": 43722 + }, + { + "epoch": 0.53, + "grad_norm": 32.90414945869578, + "learning_rate": 1.728770781081046e-05, + "loss": 1.1514, + "step": 43725 + }, + { + "epoch": 0.53, + "grad_norm": 72.42036958009697, + "learning_rate": 1.7287307777948268e-05, + "loss": 1.1079, + "step": 43728 + }, + { + "epoch": 0.53, + "grad_norm": 29.776632866899405, + "learning_rate": 1.7286907720217117e-05, + "loss": 1.7304, + "step": 43731 + }, + { + "epoch": 0.53, + "grad_norm": 23.52313930347353, + "learning_rate": 1.728650763761838e-05, + "loss": 1.4729, + "step": 43734 + }, + { + "epoch": 0.53, + "grad_norm": 2.8481580485200366, + "learning_rate": 1.7286107530153415e-05, + "loss": 1.5212, + "step": 43737 + }, + { + "epoch": 0.53, + "grad_norm": 13.06299019126047, + "learning_rate": 1.728570739782359e-05, + "loss": 1.275, + "step": 43740 + }, + { + "epoch": 0.53, + "grad_norm": 4.143401772632302, + "learning_rate": 1.728530724063027e-05, + "loss": 1.2943, + "step": 43743 + }, + { + "epoch": 0.53, + "grad_norm": 18.591086085084072, + "learning_rate": 1.728490705857482e-05, + "loss": 1.9727, + "step": 43746 + }, + { + "epoch": 0.53, + "grad_norm": 16.47483440964682, + "learning_rate": 1.7284506851658608e-05, + "loss": 1.328, + "step": 43749 + }, + { + "epoch": 0.53, + "grad_norm": 9.851441416907809, + "learning_rate": 1.7284106619882993e-05, + "loss": 1.4086, + "step": 43752 + }, + { + "epoch": 0.53, + "grad_norm": 23.099849531545168, + "learning_rate": 1.728370636324935e-05, + "loss": 1.4212, + "step": 43755 + }, + { + "epoch": 0.53, + "grad_norm": 33.77418720023131, + "learning_rate": 1.7283306081759036e-05, + "loss": 1.6679, + "step": 43758 + }, + { + "epoch": 0.53, + "grad_norm": 16.57381172078113, + "learning_rate": 1.728290577541343e-05, + "loss": 1.2843, + "step": 43761 + }, + { + "epoch": 0.53, + "grad_norm": 23.11373962064106, + "learning_rate": 1.7282505444213883e-05, + "loss": 1.3239, + "step": 43764 + }, + { + "epoch": 0.53, + "grad_norm": 8.44507565936871, + "learning_rate": 1.728210508816177e-05, + "loss": 1.6622, + "step": 43767 + }, + { + "epoch": 0.53, + "grad_norm": 7.721691071187266, + "learning_rate": 1.7281704707258455e-05, + "loss": 1.7091, + "step": 43770 + }, + { + "epoch": 0.53, + "grad_norm": 12.687170646642237, + "learning_rate": 1.7281304301505305e-05, + "loss": 1.607, + "step": 43773 + }, + { + "epoch": 0.53, + "grad_norm": 7.407203891379872, + "learning_rate": 1.7280903870903688e-05, + "loss": 1.3787, + "step": 43776 + }, + { + "epoch": 0.53, + "grad_norm": 4.362227993812349, + "learning_rate": 1.7280503415454966e-05, + "loss": 1.6245, + "step": 43779 + }, + { + "epoch": 0.53, + "grad_norm": 11.768411083810296, + "learning_rate": 1.728010293516051e-05, + "loss": 1.2358, + "step": 43782 + }, + { + "epoch": 0.53, + "grad_norm": 32.65520474602907, + "learning_rate": 1.7279702430021685e-05, + "loss": 1.648, + "step": 43785 + }, + { + "epoch": 0.53, + "grad_norm": 5.726522688758967, + "learning_rate": 1.7279301900039858e-05, + "loss": 1.2793, + "step": 43788 + }, + { + "epoch": 0.53, + "grad_norm": 7.252187389703376, + "learning_rate": 1.7278901345216393e-05, + "loss": 1.7707, + "step": 43791 + }, + { + "epoch": 0.53, + "grad_norm": 23.41322487168822, + "learning_rate": 1.727850076555266e-05, + "loss": 1.4523, + "step": 43794 + }, + { + "epoch": 0.53, + "grad_norm": 6.82111751187153, + "learning_rate": 1.727810016105003e-05, + "loss": 1.6477, + "step": 43797 + }, + { + "epoch": 0.53, + "grad_norm": 16.67784912239469, + "learning_rate": 1.7277699531709862e-05, + "loss": 1.5949, + "step": 43800 + }, + { + "epoch": 0.53, + "grad_norm": 22.06944961475938, + "learning_rate": 1.727729887753353e-05, + "loss": 1.5453, + "step": 43803 + }, + { + "epoch": 0.53, + "grad_norm": 14.2651455294244, + "learning_rate": 1.7276898198522393e-05, + "loss": 1.4919, + "step": 43806 + }, + { + "epoch": 0.53, + "grad_norm": 41.43073329243466, + "learning_rate": 1.7276497494677827e-05, + "loss": 1.2858, + "step": 43809 + }, + { + "epoch": 0.53, + "grad_norm": 13.428828434507134, + "learning_rate": 1.7276096766001193e-05, + "loss": 1.2891, + "step": 43812 + }, + { + "epoch": 0.53, + "grad_norm": 27.68833830650877, + "learning_rate": 1.727569601249386e-05, + "loss": 1.4564, + "step": 43815 + }, + { + "epoch": 0.53, + "grad_norm": 4.699106241125901, + "learning_rate": 1.7275295234157207e-05, + "loss": 1.761, + "step": 43818 + }, + { + "epoch": 0.53, + "grad_norm": 78.15592421382934, + "learning_rate": 1.7274894430992586e-05, + "loss": 1.6135, + "step": 43821 + }, + { + "epoch": 0.53, + "grad_norm": 8.441758362332461, + "learning_rate": 1.727449360300137e-05, + "loss": 1.5761, + "step": 43824 + }, + { + "epoch": 0.53, + "grad_norm": 8.668439587707178, + "learning_rate": 1.7274092750184927e-05, + "loss": 1.2401, + "step": 43827 + }, + { + "epoch": 0.53, + "grad_norm": 36.110536639353825, + "learning_rate": 1.7273691872544627e-05, + "loss": 1.6664, + "step": 43830 + }, + { + "epoch": 0.53, + "grad_norm": 10.551190369808777, + "learning_rate": 1.727329097008184e-05, + "loss": 1.0362, + "step": 43833 + }, + { + "epoch": 0.53, + "grad_norm": 23.486238213522743, + "learning_rate": 1.7272890042797927e-05, + "loss": 1.2529, + "step": 43836 + }, + { + "epoch": 0.53, + "grad_norm": 20.82057608398366, + "learning_rate": 1.727248909069426e-05, + "loss": 1.2832, + "step": 43839 + }, + { + "epoch": 0.53, + "grad_norm": 8.270121881272178, + "learning_rate": 1.7272088113772206e-05, + "loss": 1.3551, + "step": 43842 + }, + { + "epoch": 0.53, + "grad_norm": 14.652847741246966, + "learning_rate": 1.7271687112033135e-05, + "loss": 1.3798, + "step": 43845 + }, + { + "epoch": 0.53, + "grad_norm": 7.655530937755004, + "learning_rate": 1.7271286085478415e-05, + "loss": 1.6306, + "step": 43848 + }, + { + "epoch": 0.53, + "grad_norm": 26.93713526735934, + "learning_rate": 1.7270885034109417e-05, + "loss": 1.3882, + "step": 43851 + }, + { + "epoch": 0.53, + "grad_norm": 14.495510485053435, + "learning_rate": 1.7270483957927507e-05, + "loss": 1.1831, + "step": 43854 + }, + { + "epoch": 0.53, + "grad_norm": 67.15754710416476, + "learning_rate": 1.7270082856934054e-05, + "loss": 1.3142, + "step": 43857 + }, + { + "epoch": 0.53, + "grad_norm": 10.615593554680862, + "learning_rate": 1.7269681731130427e-05, + "loss": 1.328, + "step": 43860 + }, + { + "epoch": 0.53, + "grad_norm": 6.537656692349447, + "learning_rate": 1.7269280580517994e-05, + "loss": 1.0604, + "step": 43863 + }, + { + "epoch": 0.53, + "grad_norm": 5.782602118219038, + "learning_rate": 1.7268879405098124e-05, + "loss": 1.62, + "step": 43866 + }, + { + "epoch": 0.53, + "grad_norm": 16.417261655883227, + "learning_rate": 1.7268478204872186e-05, + "loss": 1.6817, + "step": 43869 + }, + { + "epoch": 0.53, + "grad_norm": 13.226748769312003, + "learning_rate": 1.7268076979841553e-05, + "loss": 1.3534, + "step": 43872 + }, + { + "epoch": 0.53, + "grad_norm": 42.53556278278974, + "learning_rate": 1.726767573000759e-05, + "loss": 1.0688, + "step": 43875 + }, + { + "epoch": 0.53, + "grad_norm": 15.604419262248564, + "learning_rate": 1.7267274455371667e-05, + "loss": 1.3376, + "step": 43878 + }, + { + "epoch": 0.53, + "grad_norm": 14.58087688344822, + "learning_rate": 1.7266873155935156e-05, + "loss": 1.1798, + "step": 43881 + }, + { + "epoch": 0.53, + "grad_norm": 6.374607578857529, + "learning_rate": 1.726647183169942e-05, + "loss": 1.5674, + "step": 43884 + }, + { + "epoch": 0.53, + "grad_norm": 13.577830911873843, + "learning_rate": 1.7266070482665837e-05, + "loss": 1.4718, + "step": 43887 + }, + { + "epoch": 0.53, + "grad_norm": 15.36088800266099, + "learning_rate": 1.7265669108835773e-05, + "loss": 1.6226, + "step": 43890 + }, + { + "epoch": 0.53, + "grad_norm": 56.31852834989933, + "learning_rate": 1.7265267710210592e-05, + "loss": 1.1824, + "step": 43893 + }, + { + "epoch": 0.53, + "grad_norm": 11.402558778727714, + "learning_rate": 1.7264866286791676e-05, + "loss": 1.8021, + "step": 43896 + }, + { + "epoch": 0.53, + "grad_norm": 9.620937064576866, + "learning_rate": 1.7264464838580384e-05, + "loss": 1.2836, + "step": 43899 + }, + { + "epoch": 0.53, + "grad_norm": 153.36778560924725, + "learning_rate": 1.7264063365578094e-05, + "loss": 1.5002, + "step": 43902 + }, + { + "epoch": 0.53, + "grad_norm": 13.946264425449826, + "learning_rate": 1.7263661867786168e-05, + "loss": 1.5223, + "step": 43905 + }, + { + "epoch": 0.53, + "grad_norm": 15.108577240337636, + "learning_rate": 1.7263260345205982e-05, + "loss": 1.4536, + "step": 43908 + }, + { + "epoch": 0.53, + "grad_norm": 13.207003293741893, + "learning_rate": 1.7262858797838906e-05, + "loss": 1.1719, + "step": 43911 + }, + { + "epoch": 0.53, + "grad_norm": 8.642286050730645, + "learning_rate": 1.7262457225686306e-05, + "loss": 1.5351, + "step": 43914 + }, + { + "epoch": 0.53, + "grad_norm": 60.8941011820558, + "learning_rate": 1.7262055628749557e-05, + "loss": 1.4896, + "step": 43917 + }, + { + "epoch": 0.53, + "grad_norm": 4.960831338652499, + "learning_rate": 1.726165400703003e-05, + "loss": 1.4458, + "step": 43920 + }, + { + "epoch": 0.53, + "grad_norm": 29.94209439476285, + "learning_rate": 1.7261252360529087e-05, + "loss": 1.6805, + "step": 43923 + }, + { + "epoch": 0.53, + "grad_norm": 13.648618801715992, + "learning_rate": 1.7260850689248113e-05, + "loss": 1.181, + "step": 43926 + }, + { + "epoch": 0.53, + "grad_norm": 18.994765430747027, + "learning_rate": 1.7260448993188465e-05, + "loss": 1.2508, + "step": 43929 + }, + { + "epoch": 0.53, + "grad_norm": 42.4099891882689, + "learning_rate": 1.7260047272351524e-05, + "loss": 1.2494, + "step": 43932 + }, + { + "epoch": 0.53, + "grad_norm": 2.2769511613999653, + "learning_rate": 1.7259645526738656e-05, + "loss": 1.33, + "step": 43935 + }, + { + "epoch": 0.53, + "grad_norm": 11.015884189036385, + "learning_rate": 1.7259243756351233e-05, + "loss": 1.3654, + "step": 43938 + }, + { + "epoch": 0.53, + "grad_norm": 49.861170790733354, + "learning_rate": 1.7258841961190624e-05, + "loss": 1.1222, + "step": 43941 + }, + { + "epoch": 0.53, + "grad_norm": 10.676135752190342, + "learning_rate": 1.72584401412582e-05, + "loss": 1.6043, + "step": 43944 + }, + { + "epoch": 0.53, + "grad_norm": 2.5558545293840473, + "learning_rate": 1.7258038296555336e-05, + "loss": 1.1445, + "step": 43947 + }, + { + "epoch": 0.53, + "grad_norm": 26.65058591598259, + "learning_rate": 1.7257636427083405e-05, + "loss": 1.5465, + "step": 43950 + }, + { + "epoch": 0.53, + "grad_norm": 4.7755583171869525, + "learning_rate": 1.7257234532843773e-05, + "loss": 1.3638, + "step": 43953 + }, + { + "epoch": 0.53, + "grad_norm": 5.319597933987, + "learning_rate": 1.7256832613837812e-05, + "loss": 1.3374, + "step": 43956 + }, + { + "epoch": 0.53, + "grad_norm": 31.38938490674762, + "learning_rate": 1.7256430670066895e-05, + "loss": 1.2332, + "step": 43959 + }, + { + "epoch": 0.53, + "grad_norm": 4.961875262258074, + "learning_rate": 1.7256028701532398e-05, + "loss": 1.1525, + "step": 43962 + }, + { + "epoch": 0.53, + "grad_norm": 10.081147996440503, + "learning_rate": 1.7255626708235685e-05, + "loss": 1.4534, + "step": 43965 + }, + { + "epoch": 0.53, + "grad_norm": 36.79608462066121, + "learning_rate": 1.7255224690178135e-05, + "loss": 1.2204, + "step": 43968 + }, + { + "epoch": 0.53, + "grad_norm": 12.513452004570373, + "learning_rate": 1.7254822647361115e-05, + "loss": 1.4563, + "step": 43971 + }, + { + "epoch": 0.53, + "grad_norm": 8.940354776789892, + "learning_rate": 1.7254420579785995e-05, + "loss": 1.2171, + "step": 43974 + }, + { + "epoch": 0.53, + "grad_norm": 16.792967622369787, + "learning_rate": 1.7254018487454153e-05, + "loss": 1.4645, + "step": 43977 + }, + { + "epoch": 0.53, + "grad_norm": 6.784608679149105, + "learning_rate": 1.725361637036696e-05, + "loss": 1.4703, + "step": 43980 + }, + { + "epoch": 0.53, + "grad_norm": 18.343170218179573, + "learning_rate": 1.7253214228525787e-05, + "loss": 1.4315, + "step": 43983 + }, + { + "epoch": 0.53, + "grad_norm": 13.220895891671374, + "learning_rate": 1.7252812061932e-05, + "loss": 1.5742, + "step": 43986 + }, + { + "epoch": 0.53, + "grad_norm": 5.068884895530245, + "learning_rate": 1.7252409870586984e-05, + "loss": 1.336, + "step": 43989 + }, + { + "epoch": 0.53, + "grad_norm": 23.966166929416378, + "learning_rate": 1.7252007654492105e-05, + "loss": 1.8311, + "step": 43992 + }, + { + "epoch": 0.53, + "grad_norm": 9.969584704295004, + "learning_rate": 1.725160541364874e-05, + "loss": 1.5774, + "step": 43995 + }, + { + "epoch": 0.53, + "grad_norm": 4.526010968791788, + "learning_rate": 1.7251203148058252e-05, + "loss": 1.6157, + "step": 43998 + }, + { + "epoch": 0.53, + "grad_norm": 8.188831023486692, + "learning_rate": 1.725080085772202e-05, + "loss": 1.206, + "step": 44001 + }, + { + "epoch": 0.53, + "grad_norm": 5.182969716002713, + "learning_rate": 1.7250398542641417e-05, + "loss": 2.0264, + "step": 44004 + }, + { + "epoch": 0.53, + "grad_norm": 9.818981288185732, + "learning_rate": 1.7249996202817816e-05, + "loss": 1.3083, + "step": 44007 + }, + { + "epoch": 0.53, + "grad_norm": 13.287363669097232, + "learning_rate": 1.724959383825259e-05, + "loss": 1.3712, + "step": 44010 + }, + { + "epoch": 0.53, + "grad_norm": 21.497000490402492, + "learning_rate": 1.724919144894711e-05, + "loss": 1.2427, + "step": 44013 + }, + { + "epoch": 0.53, + "grad_norm": 21.982347032869388, + "learning_rate": 1.724878903490275e-05, + "loss": 1.4769, + "step": 44016 + }, + { + "epoch": 0.53, + "grad_norm": 5.223375090470757, + "learning_rate": 1.7248386596120884e-05, + "loss": 1.5729, + "step": 44019 + }, + { + "epoch": 0.53, + "grad_norm": 6.5772593536751485, + "learning_rate": 1.7247984132602887e-05, + "loss": 1.1132, + "step": 44022 + }, + { + "epoch": 0.53, + "grad_norm": 10.070293472709508, + "learning_rate": 1.7247581644350132e-05, + "loss": 1.1116, + "step": 44025 + }, + { + "epoch": 0.53, + "grad_norm": 13.781649742611606, + "learning_rate": 1.724717913136399e-05, + "loss": 1.2535, + "step": 44028 + }, + { + "epoch": 0.53, + "grad_norm": 10.636525902515993, + "learning_rate": 1.7246776593645836e-05, + "loss": 1.2445, + "step": 44031 + }, + { + "epoch": 0.53, + "grad_norm": 20.828709048654943, + "learning_rate": 1.7246374031197044e-05, + "loss": 1.8812, + "step": 44034 + }, + { + "epoch": 0.53, + "grad_norm": 37.884274697249815, + "learning_rate": 1.724597144401899e-05, + "loss": 1.2136, + "step": 44037 + }, + { + "epoch": 0.53, + "grad_norm": 19.860566078734507, + "learning_rate": 1.724556883211304e-05, + "loss": 1.2958, + "step": 44040 + }, + { + "epoch": 0.53, + "grad_norm": 20.516464784158796, + "learning_rate": 1.7245166195480577e-05, + "loss": 1.8312, + "step": 44043 + }, + { + "epoch": 0.53, + "grad_norm": 11.533898853918913, + "learning_rate": 1.724476353412297e-05, + "loss": 1.6987, + "step": 44046 + }, + { + "epoch": 0.53, + "grad_norm": 6.672818041650444, + "learning_rate": 1.72443608480416e-05, + "loss": 1.4237, + "step": 44049 + }, + { + "epoch": 0.53, + "grad_norm": 38.027526239922665, + "learning_rate": 1.7243958137237828e-05, + "loss": 2.1042, + "step": 44052 + }, + { + "epoch": 0.53, + "grad_norm": 11.891882788494232, + "learning_rate": 1.7243555401713038e-05, + "loss": 1.3568, + "step": 44055 + }, + { + "epoch": 0.53, + "grad_norm": 35.78479195486307, + "learning_rate": 1.7243152641468605e-05, + "loss": 1.4794, + "step": 44058 + }, + { + "epoch": 0.53, + "grad_norm": 9.748045175480497, + "learning_rate": 1.7242749856505897e-05, + "loss": 1.6841, + "step": 44061 + }, + { + "epoch": 0.53, + "grad_norm": 16.863645774908935, + "learning_rate": 1.7242347046826297e-05, + "loss": 1.7623, + "step": 44064 + }, + { + "epoch": 0.53, + "grad_norm": 43.10099836399674, + "learning_rate": 1.7241944212431173e-05, + "loss": 1.5758, + "step": 44067 + }, + { + "epoch": 0.53, + "grad_norm": 20.25518948446016, + "learning_rate": 1.7241541353321904e-05, + "loss": 1.8764, + "step": 44070 + }, + { + "epoch": 0.53, + "grad_norm": 95.18440565224374, + "learning_rate": 1.724113846949986e-05, + "loss": 1.578, + "step": 44073 + }, + { + "epoch": 0.53, + "grad_norm": 3.6152228177245402, + "learning_rate": 1.724073556096642e-05, + "loss": 1.3972, + "step": 44076 + }, + { + "epoch": 0.53, + "grad_norm": 12.938651061532687, + "learning_rate": 1.724033262772296e-05, + "loss": 1.5696, + "step": 44079 + }, + { + "epoch": 0.53, + "grad_norm": 7.855231832761088, + "learning_rate": 1.723992966977085e-05, + "loss": 1.0831, + "step": 44082 + }, + { + "epoch": 0.53, + "grad_norm": 43.02937233799309, + "learning_rate": 1.7239526687111468e-05, + "loss": 1.2028, + "step": 44085 + }, + { + "epoch": 0.53, + "grad_norm": 11.129085950855387, + "learning_rate": 1.723912367974619e-05, + "loss": 0.9232, + "step": 44088 + }, + { + "epoch": 0.53, + "grad_norm": 8.630955544788462, + "learning_rate": 1.7238720647676387e-05, + "loss": 1.0645, + "step": 44091 + }, + { + "epoch": 0.53, + "grad_norm": 7.490463738218608, + "learning_rate": 1.723831759090344e-05, + "loss": 1.6857, + "step": 44094 + }, + { + "epoch": 0.53, + "grad_norm": 44.35238809709137, + "learning_rate": 1.7237914509428723e-05, + "loss": 1.6253, + "step": 44097 + }, + { + "epoch": 0.53, + "grad_norm": 9.51778319608539, + "learning_rate": 1.723751140325361e-05, + "loss": 1.6439, + "step": 44100 + }, + { + "epoch": 0.53, + "grad_norm": 36.11328050105792, + "learning_rate": 1.723710827237948e-05, + "loss": 1.538, + "step": 44103 + }, + { + "epoch": 0.53, + "grad_norm": 8.46907102200052, + "learning_rate": 1.7236705116807704e-05, + "loss": 1.486, + "step": 44106 + }, + { + "epoch": 0.53, + "grad_norm": 8.155303679067787, + "learning_rate": 1.723630193653966e-05, + "loss": 1.2164, + "step": 44109 + }, + { + "epoch": 0.53, + "grad_norm": 14.723686107895148, + "learning_rate": 1.7235898731576722e-05, + "loss": 1.4717, + "step": 44112 + }, + { + "epoch": 0.53, + "grad_norm": 21.663131816150134, + "learning_rate": 1.7235495501920273e-05, + "loss": 1.175, + "step": 44115 + }, + { + "epoch": 0.53, + "grad_norm": 18.552624929922903, + "learning_rate": 1.723509224757168e-05, + "loss": 1.6534, + "step": 44118 + }, + { + "epoch": 0.53, + "grad_norm": 32.9012496822305, + "learning_rate": 1.7234688968532326e-05, + "loss": 1.3322, + "step": 44121 + }, + { + "epoch": 0.53, + "grad_norm": 7.63113900472361, + "learning_rate": 1.7234285664803584e-05, + "loss": 1.4407, + "step": 44124 + }, + { + "epoch": 0.53, + "grad_norm": 7.264550156745744, + "learning_rate": 1.7233882336386828e-05, + "loss": 1.3407, + "step": 44127 + }, + { + "epoch": 0.53, + "grad_norm": 36.188099348508786, + "learning_rate": 1.7233478983283442e-05, + "loss": 1.4136, + "step": 44130 + }, + { + "epoch": 0.53, + "grad_norm": 18.4420240389106, + "learning_rate": 1.7233075605494795e-05, + "loss": 1.1345, + "step": 44133 + }, + { + "epoch": 0.53, + "grad_norm": 49.79369026742834, + "learning_rate": 1.7232672203022266e-05, + "loss": 1.3677, + "step": 44136 + }, + { + "epoch": 0.53, + "grad_norm": 7.620725114359849, + "learning_rate": 1.7232268775867228e-05, + "loss": 1.5063, + "step": 44139 + }, + { + "epoch": 0.53, + "grad_norm": 3.580221514943911, + "learning_rate": 1.7231865324031066e-05, + "loss": 1.2148, + "step": 44142 + }, + { + "epoch": 0.53, + "grad_norm": 15.91246383266823, + "learning_rate": 1.7231461847515156e-05, + "loss": 1.402, + "step": 44145 + }, + { + "epoch": 0.53, + "grad_norm": 7.086464289461179, + "learning_rate": 1.7231058346320866e-05, + "loss": 1.4424, + "step": 44148 + }, + { + "epoch": 0.53, + "grad_norm": 3.9592584619671167, + "learning_rate": 1.7230654820449582e-05, + "loss": 1.6877, + "step": 44151 + }, + { + "epoch": 0.53, + "grad_norm": 25.22069606972585, + "learning_rate": 1.7230251269902677e-05, + "loss": 1.5255, + "step": 44154 + }, + { + "epoch": 0.53, + "grad_norm": 19.064521536616212, + "learning_rate": 1.7229847694681526e-05, + "loss": 1.5174, + "step": 44157 + }, + { + "epoch": 0.53, + "grad_norm": 3.5840877230057253, + "learning_rate": 1.7229444094787512e-05, + "loss": 1.4312, + "step": 44160 + }, + { + "epoch": 0.53, + "grad_norm": 21.20180678221498, + "learning_rate": 1.7229040470222006e-05, + "loss": 1.0477, + "step": 44163 + }, + { + "epoch": 0.53, + "grad_norm": 10.937751096429233, + "learning_rate": 1.7228636820986394e-05, + "loss": 1.3633, + "step": 44166 + }, + { + "epoch": 0.53, + "grad_norm": 8.501401939841012, + "learning_rate": 1.7228233147082042e-05, + "loss": 1.0719, + "step": 44169 + }, + { + "epoch": 0.53, + "grad_norm": 15.820543275585786, + "learning_rate": 1.722782944851034e-05, + "loss": 1.3427, + "step": 44172 + }, + { + "epoch": 0.53, + "grad_norm": 14.986846672418254, + "learning_rate": 1.7227425725272654e-05, + "loss": 1.3857, + "step": 44175 + }, + { + "epoch": 0.53, + "grad_norm": 11.271539065787458, + "learning_rate": 1.722702197737037e-05, + "loss": 1.6759, + "step": 44178 + }, + { + "epoch": 0.53, + "grad_norm": 8.966511629953857, + "learning_rate": 1.7226618204804863e-05, + "loss": 1.0745, + "step": 44181 + }, + { + "epoch": 0.53, + "grad_norm": 9.027248116656365, + "learning_rate": 1.7226214407577512e-05, + "loss": 1.5142, + "step": 44184 + }, + { + "epoch": 0.53, + "grad_norm": 4.0247235636421586, + "learning_rate": 1.7225810585689694e-05, + "loss": 1.4086, + "step": 44187 + }, + { + "epoch": 0.53, + "grad_norm": 14.875302309560924, + "learning_rate": 1.7225406739142785e-05, + "loss": 1.725, + "step": 44190 + }, + { + "epoch": 0.53, + "grad_norm": 9.299819939818397, + "learning_rate": 1.7225002867938165e-05, + "loss": 1.3957, + "step": 44193 + }, + { + "epoch": 0.53, + "grad_norm": 14.323696164676383, + "learning_rate": 1.7224598972077215e-05, + "loss": 1.3709, + "step": 44196 + }, + { + "epoch": 0.53, + "grad_norm": 11.628543430452577, + "learning_rate": 1.722419505156131e-05, + "loss": 1.3292, + "step": 44199 + }, + { + "epoch": 0.53, + "grad_norm": 16.09196582862166, + "learning_rate": 1.7223791106391832e-05, + "loss": 1.4157, + "step": 44202 + }, + { + "epoch": 0.53, + "grad_norm": 13.42608660811086, + "learning_rate": 1.7223387136570154e-05, + "loss": 1.0082, + "step": 44205 + }, + { + "epoch": 0.53, + "grad_norm": 7.69840816857869, + "learning_rate": 1.722298314209766e-05, + "loss": 1.5834, + "step": 44208 + }, + { + "epoch": 0.53, + "grad_norm": 182.86883506364995, + "learning_rate": 1.7222579122975722e-05, + "loss": 1.5038, + "step": 44211 + }, + { + "epoch": 0.53, + "grad_norm": 3.156824839156209, + "learning_rate": 1.7222175079205726e-05, + "loss": 1.4377, + "step": 44214 + }, + { + "epoch": 0.53, + "grad_norm": 32.49013972896188, + "learning_rate": 1.7221771010789047e-05, + "loss": 1.3542, + "step": 44217 + }, + { + "epoch": 0.53, + "grad_norm": 120.32344790177385, + "learning_rate": 1.7221366917727067e-05, + "loss": 1.5955, + "step": 44220 + }, + { + "epoch": 0.53, + "grad_norm": 24.624384794805096, + "learning_rate": 1.7220962800021158e-05, + "loss": 1.2794, + "step": 44223 + }, + { + "epoch": 0.53, + "grad_norm": 85.09133329140091, + "learning_rate": 1.722055865767271e-05, + "loss": 1.3578, + "step": 44226 + }, + { + "epoch": 0.53, + "grad_norm": 229.98602218987924, + "learning_rate": 1.7220154490683092e-05, + "loss": 1.6706, + "step": 44229 + }, + { + "epoch": 0.53, + "grad_norm": 9.959245339073226, + "learning_rate": 1.721975029905369e-05, + "loss": 1.1897, + "step": 44232 + }, + { + "epoch": 0.53, + "grad_norm": 3.0512632088962315, + "learning_rate": 1.721934608278588e-05, + "loss": 1.3248, + "step": 44235 + }, + { + "epoch": 0.53, + "grad_norm": 18.362612436769247, + "learning_rate": 1.721894184188104e-05, + "loss": 1.0421, + "step": 44238 + }, + { + "epoch": 0.53, + "grad_norm": 19.133981548922495, + "learning_rate": 1.7218537576340556e-05, + "loss": 1.315, + "step": 44241 + }, + { + "epoch": 0.53, + "grad_norm": 27.883916293838393, + "learning_rate": 1.72181332861658e-05, + "loss": 1.6968, + "step": 44244 + }, + { + "epoch": 0.53, + "grad_norm": 5.741645209082647, + "learning_rate": 1.7217728971358152e-05, + "loss": 1.2535, + "step": 44247 + }, + { + "epoch": 0.53, + "grad_norm": 5.554087941152431, + "learning_rate": 1.7217324631919e-05, + "loss": 1.3936, + "step": 44250 + }, + { + "epoch": 0.53, + "grad_norm": 12.873755949566743, + "learning_rate": 1.7216920267849717e-05, + "loss": 1.6343, + "step": 44253 + }, + { + "epoch": 0.53, + "grad_norm": 32.568872744778886, + "learning_rate": 1.7216515879151686e-05, + "loss": 1.7428, + "step": 44256 + }, + { + "epoch": 0.53, + "grad_norm": 8.351964886674383, + "learning_rate": 1.7216111465826284e-05, + "loss": 1.4428, + "step": 44259 + }, + { + "epoch": 0.53, + "grad_norm": 40.21612297667939, + "learning_rate": 1.7215707027874896e-05, + "loss": 1.2693, + "step": 44262 + }, + { + "epoch": 0.53, + "grad_norm": 32.4600582054326, + "learning_rate": 1.7215302565298896e-05, + "loss": 1.5478, + "step": 44265 + }, + { + "epoch": 0.53, + "grad_norm": 8.53072136643487, + "learning_rate": 1.7214898078099668e-05, + "loss": 1.5939, + "step": 44268 + }, + { + "epoch": 0.53, + "grad_norm": 35.26276021261479, + "learning_rate": 1.7214493566278588e-05, + "loss": 1.4634, + "step": 44271 + }, + { + "epoch": 0.53, + "grad_norm": 8.70493932536186, + "learning_rate": 1.7214089029837042e-05, + "loss": 1.3145, + "step": 44274 + }, + { + "epoch": 0.53, + "grad_norm": 8.833433048180538, + "learning_rate": 1.7213684468776412e-05, + "loss": 1.0656, + "step": 44277 + }, + { + "epoch": 0.53, + "grad_norm": 8.053713291606348, + "learning_rate": 1.7213279883098075e-05, + "loss": 1.5335, + "step": 44280 + }, + { + "epoch": 0.53, + "grad_norm": 13.418546616022873, + "learning_rate": 1.7212875272803407e-05, + "loss": 1.6214, + "step": 44283 + }, + { + "epoch": 0.53, + "grad_norm": 9.386665858839539, + "learning_rate": 1.72124706378938e-05, + "loss": 1.6509, + "step": 44286 + }, + { + "epoch": 0.53, + "grad_norm": 15.615930361751392, + "learning_rate": 1.7212065978370625e-05, + "loss": 1.2565, + "step": 44289 + }, + { + "epoch": 0.53, + "grad_norm": 8.751762019797757, + "learning_rate": 1.7211661294235268e-05, + "loss": 1.3933, + "step": 44292 + }, + { + "epoch": 0.53, + "grad_norm": 4.923567477171363, + "learning_rate": 1.7211256585489108e-05, + "loss": 1.2542, + "step": 44295 + }, + { + "epoch": 0.53, + "grad_norm": 26.39667965289958, + "learning_rate": 1.7210851852133526e-05, + "loss": 1.2231, + "step": 44298 + }, + { + "epoch": 0.53, + "grad_norm": 15.734640162738826, + "learning_rate": 1.7210447094169903e-05, + "loss": 1.23, + "step": 44301 + }, + { + "epoch": 0.53, + "grad_norm": 45.74019229699832, + "learning_rate": 1.7210042311599624e-05, + "loss": 1.4558, + "step": 44304 + }, + { + "epoch": 0.53, + "grad_norm": 9.46477987454519, + "learning_rate": 1.7209637504424065e-05, + "loss": 1.8793, + "step": 44307 + }, + { + "epoch": 0.53, + "grad_norm": 24.630103827248618, + "learning_rate": 1.7209232672644614e-05, + "loss": 1.3992, + "step": 44310 + }, + { + "epoch": 0.53, + "grad_norm": 6.61895835399006, + "learning_rate": 1.7208827816262647e-05, + "loss": 1.4819, + "step": 44313 + }, + { + "epoch": 0.53, + "grad_norm": 7.811565272850796, + "learning_rate": 1.7208422935279546e-05, + "loss": 1.4588, + "step": 44316 + }, + { + "epoch": 0.53, + "grad_norm": 8.69230132257376, + "learning_rate": 1.7208018029696695e-05, + "loss": 1.6055, + "step": 44319 + }, + { + "epoch": 0.53, + "grad_norm": 31.699032305880696, + "learning_rate": 1.720761309951547e-05, + "loss": 1.4672, + "step": 44322 + }, + { + "epoch": 0.53, + "grad_norm": 23.473977939176976, + "learning_rate": 1.7207208144737265e-05, + "loss": 1.3016, + "step": 44325 + }, + { + "epoch": 0.53, + "grad_norm": 14.495829775191224, + "learning_rate": 1.720680316536345e-05, + "loss": 1.1549, + "step": 44328 + }, + { + "epoch": 0.53, + "grad_norm": 31.197978792866724, + "learning_rate": 1.7206398161395416e-05, + "loss": 1.428, + "step": 44331 + }, + { + "epoch": 0.53, + "grad_norm": 22.848633796347723, + "learning_rate": 1.7205993132834534e-05, + "loss": 1.6597, + "step": 44334 + }, + { + "epoch": 0.53, + "grad_norm": 22.79680753576003, + "learning_rate": 1.72055880796822e-05, + "loss": 1.3077, + "step": 44337 + }, + { + "epoch": 0.53, + "grad_norm": 7.691939863963344, + "learning_rate": 1.7205183001939786e-05, + "loss": 1.7795, + "step": 44340 + }, + { + "epoch": 0.53, + "grad_norm": 10.72055765646206, + "learning_rate": 1.7204777899608677e-05, + "loss": 1.5147, + "step": 44343 + }, + { + "epoch": 0.53, + "grad_norm": 27.718002476471543, + "learning_rate": 1.7204372772690262e-05, + "loss": 1.2805, + "step": 44346 + }, + { + "epoch": 0.53, + "grad_norm": 15.819680198970435, + "learning_rate": 1.720396762118591e-05, + "loss": 1.485, + "step": 44349 + }, + { + "epoch": 0.53, + "grad_norm": 35.51058851644397, + "learning_rate": 1.720356244509701e-05, + "loss": 1.176, + "step": 44352 + }, + { + "epoch": 0.53, + "grad_norm": 17.337299953437462, + "learning_rate": 1.7203157244424954e-05, + "loss": 1.7092, + "step": 44355 + }, + { + "epoch": 0.53, + "grad_norm": 9.155032356971645, + "learning_rate": 1.7202752019171113e-05, + "loss": 1.1916, + "step": 44358 + }, + { + "epoch": 0.53, + "grad_norm": 5.720339817703425, + "learning_rate": 1.7202346769336874e-05, + "loss": 1.6763, + "step": 44361 + }, + { + "epoch": 0.53, + "grad_norm": 4.873379402738949, + "learning_rate": 1.7201941494923622e-05, + "loss": 1.0979, + "step": 44364 + }, + { + "epoch": 0.53, + "grad_norm": 7.426989828954914, + "learning_rate": 1.7201536195932734e-05, + "loss": 1.5468, + "step": 44367 + }, + { + "epoch": 0.53, + "grad_norm": 20.338039473338064, + "learning_rate": 1.7201130872365598e-05, + "loss": 1.3925, + "step": 44370 + }, + { + "epoch": 0.53, + "grad_norm": 22.010662070302924, + "learning_rate": 1.72007255242236e-05, + "loss": 1.5055, + "step": 44373 + }, + { + "epoch": 0.53, + "grad_norm": 8.532104508853845, + "learning_rate": 1.7200320151508116e-05, + "loss": 1.503, + "step": 44376 + }, + { + "epoch": 0.53, + "grad_norm": 7.238731678839698, + "learning_rate": 1.719991475422053e-05, + "loss": 1.4383, + "step": 44379 + }, + { + "epoch": 0.53, + "grad_norm": 19.568245810939317, + "learning_rate": 1.7199509332362235e-05, + "loss": 1.452, + "step": 44382 + }, + { + "epoch": 0.53, + "grad_norm": 37.490543347769055, + "learning_rate": 1.7199103885934603e-05, + "loss": 1.248, + "step": 44385 + }, + { + "epoch": 0.53, + "grad_norm": 3.209381173725601, + "learning_rate": 1.7198698414939025e-05, + "loss": 1.626, + "step": 44388 + }, + { + "epoch": 0.53, + "grad_norm": 3.569235740749496, + "learning_rate": 1.719829291937688e-05, + "loss": 1.3737, + "step": 44391 + }, + { + "epoch": 0.53, + "grad_norm": 23.512642441817547, + "learning_rate": 1.7197887399249557e-05, + "loss": 1.6482, + "step": 44394 + }, + { + "epoch": 0.53, + "grad_norm": 8.214720681926678, + "learning_rate": 1.7197481854558432e-05, + "loss": 1.325, + "step": 44397 + }, + { + "epoch": 0.53, + "grad_norm": 15.76880491981591, + "learning_rate": 1.71970762853049e-05, + "loss": 1.5665, + "step": 44400 + }, + { + "epoch": 0.53, + "grad_norm": 12.427486876040442, + "learning_rate": 1.7196670691490334e-05, + "loss": 1.1877, + "step": 44403 + }, + { + "epoch": 0.53, + "grad_norm": 8.514136306107345, + "learning_rate": 1.719626507311613e-05, + "loss": 1.197, + "step": 44406 + }, + { + "epoch": 0.53, + "grad_norm": 10.215008611667484, + "learning_rate": 1.7195859430183657e-05, + "loss": 1.6609, + "step": 44409 + }, + { + "epoch": 0.53, + "grad_norm": 27.66065835793131, + "learning_rate": 1.7195453762694313e-05, + "loss": 1.4478, + "step": 44412 + }, + { + "epoch": 0.53, + "grad_norm": 16.004273808278224, + "learning_rate": 1.7195048070649475e-05, + "loss": 0.9873, + "step": 44415 + }, + { + "epoch": 0.53, + "grad_norm": 57.77453145167556, + "learning_rate": 1.719464235405053e-05, + "loss": 1.2918, + "step": 44418 + }, + { + "epoch": 0.53, + "grad_norm": 12.339984846731198, + "learning_rate": 1.7194236612898862e-05, + "loss": 1.6894, + "step": 44421 + }, + { + "epoch": 0.53, + "grad_norm": 45.56025158693036, + "learning_rate": 1.7193830847195855e-05, + "loss": 1.4407, + "step": 44424 + }, + { + "epoch": 0.53, + "grad_norm": 17.437766937967616, + "learning_rate": 1.71934250569429e-05, + "loss": 1.8567, + "step": 44427 + }, + { + "epoch": 0.53, + "grad_norm": 7.184659314227807, + "learning_rate": 1.7193019242141368e-05, + "loss": 1.3608, + "step": 44430 + }, + { + "epoch": 0.53, + "grad_norm": 6.390463401150522, + "learning_rate": 1.7192613402792658e-05, + "loss": 1.4149, + "step": 44433 + }, + { + "epoch": 0.53, + "grad_norm": 13.011587363934751, + "learning_rate": 1.7192207538898147e-05, + "loss": 1.5569, + "step": 44436 + }, + { + "epoch": 0.53, + "grad_norm": 2.786157668945115, + "learning_rate": 1.7191801650459222e-05, + "loss": 1.5466, + "step": 44439 + }, + { + "epoch": 0.53, + "grad_norm": 14.59552673566397, + "learning_rate": 1.719139573747727e-05, + "loss": 1.5858, + "step": 44442 + }, + { + "epoch": 0.53, + "grad_norm": 3.964659535594478, + "learning_rate": 1.7190989799953675e-05, + "loss": 1.2329, + "step": 44445 + }, + { + "epoch": 0.53, + "grad_norm": 21.288733577753217, + "learning_rate": 1.719058383788982e-05, + "loss": 1.2693, + "step": 44448 + }, + { + "epoch": 0.53, + "grad_norm": 10.269863971284725, + "learning_rate": 1.7190177851287093e-05, + "loss": 1.2736, + "step": 44451 + }, + { + "epoch": 0.53, + "grad_norm": 4.880609747280575, + "learning_rate": 1.7189771840146878e-05, + "loss": 1.4437, + "step": 44454 + }, + { + "epoch": 0.53, + "grad_norm": 11.03516746326924, + "learning_rate": 1.7189365804470564e-05, + "loss": 1.0912, + "step": 44457 + }, + { + "epoch": 0.53, + "grad_norm": 7.380201406255709, + "learning_rate": 1.718895974425953e-05, + "loss": 1.3263, + "step": 44460 + }, + { + "epoch": 0.53, + "grad_norm": 4.817202452127758, + "learning_rate": 1.7188553659515172e-05, + "loss": 1.394, + "step": 44463 + }, + { + "epoch": 0.53, + "grad_norm": 12.222497057852422, + "learning_rate": 1.718814755023886e-05, + "loss": 1.2428, + "step": 44466 + }, + { + "epoch": 0.53, + "grad_norm": 13.321658068362789, + "learning_rate": 1.7187741416432e-05, + "loss": 1.2769, + "step": 44469 + }, + { + "epoch": 0.53, + "grad_norm": 18.803787441860596, + "learning_rate": 1.7187335258095963e-05, + "loss": 1.0306, + "step": 44472 + }, + { + "epoch": 0.53, + "grad_norm": 19.031490826490064, + "learning_rate": 1.718692907523214e-05, + "loss": 1.5391, + "step": 44475 + }, + { + "epoch": 0.53, + "grad_norm": 10.025125760050459, + "learning_rate": 1.7186522867841917e-05, + "loss": 1.1578, + "step": 44478 + }, + { + "epoch": 0.53, + "grad_norm": 19.660375520883097, + "learning_rate": 1.718611663592668e-05, + "loss": 1.4725, + "step": 44481 + }, + { + "epoch": 0.53, + "grad_norm": 12.984354291148653, + "learning_rate": 1.7185710379487814e-05, + "loss": 1.3914, + "step": 44484 + }, + { + "epoch": 0.53, + "grad_norm": 24.253883880476593, + "learning_rate": 1.718530409852671e-05, + "loss": 1.4369, + "step": 44487 + }, + { + "epoch": 0.53, + "grad_norm": 43.983184012584964, + "learning_rate": 1.7184897793044748e-05, + "loss": 1.5025, + "step": 44490 + }, + { + "epoch": 0.54, + "grad_norm": 18.104764653901693, + "learning_rate": 1.718449146304332e-05, + "loss": 1.3333, + "step": 44493 + }, + { + "epoch": 0.54, + "grad_norm": 8.069718693780366, + "learning_rate": 1.718408510852381e-05, + "loss": 1.345, + "step": 44496 + }, + { + "epoch": 0.54, + "grad_norm": 113.30070383820933, + "learning_rate": 1.7183678729487603e-05, + "loss": 1.5056, + "step": 44499 + }, + { + "epoch": 0.54, + "grad_norm": 6.766636743673525, + "learning_rate": 1.7183272325936088e-05, + "loss": 1.1212, + "step": 44502 + }, + { + "epoch": 0.54, + "grad_norm": 11.228501177444304, + "learning_rate": 1.7182865897870654e-05, + "loss": 1.2675, + "step": 44505 + }, + { + "epoch": 0.54, + "grad_norm": 9.553750614086756, + "learning_rate": 1.7182459445292686e-05, + "loss": 1.3017, + "step": 44508 + }, + { + "epoch": 0.54, + "grad_norm": 16.243630047870152, + "learning_rate": 1.718205296820357e-05, + "loss": 1.2964, + "step": 44511 + }, + { + "epoch": 0.54, + "grad_norm": 16.29729059879254, + "learning_rate": 1.7181646466604695e-05, + "loss": 1.5319, + "step": 44514 + }, + { + "epoch": 0.54, + "grad_norm": 7.3446449903260795, + "learning_rate": 1.7181239940497447e-05, + "loss": 1.5704, + "step": 44517 + }, + { + "epoch": 0.54, + "grad_norm": 8.579673605188225, + "learning_rate": 1.7180833389883215e-05, + "loss": 1.8194, + "step": 44520 + }, + { + "epoch": 0.54, + "grad_norm": 20.943499497633304, + "learning_rate": 1.7180426814763384e-05, + "loss": 1.026, + "step": 44523 + }, + { + "epoch": 0.54, + "grad_norm": 25.325389284807493, + "learning_rate": 1.7180020215139345e-05, + "loss": 1.5558, + "step": 44526 + }, + { + "epoch": 0.54, + "grad_norm": 15.688929353641603, + "learning_rate": 1.7179613591012478e-05, + "loss": 1.3698, + "step": 44529 + }, + { + "epoch": 0.54, + "grad_norm": 30.26755386499724, + "learning_rate": 1.7179206942384177e-05, + "loss": 1.2614, + "step": 44532 + }, + { + "epoch": 0.54, + "grad_norm": 15.404211344534042, + "learning_rate": 1.717880026925583e-05, + "loss": 1.5649, + "step": 44535 + }, + { + "epoch": 0.54, + "grad_norm": 15.823668396054918, + "learning_rate": 1.717839357162883e-05, + "loss": 1.1153, + "step": 44538 + }, + { + "epoch": 0.54, + "grad_norm": 50.75159347759262, + "learning_rate": 1.7177986849504548e-05, + "loss": 1.5378, + "step": 44541 + }, + { + "epoch": 0.54, + "grad_norm": 85.4880532050876, + "learning_rate": 1.717758010288439e-05, + "loss": 1.3184, + "step": 44544 + }, + { + "epoch": 0.54, + "grad_norm": 12.713151864531218, + "learning_rate": 1.7177173331769732e-05, + "loss": 1.1767, + "step": 44547 + }, + { + "epoch": 0.54, + "grad_norm": 10.574193480605436, + "learning_rate": 1.7176766536161967e-05, + "loss": 1.3267, + "step": 44550 + }, + { + "epoch": 0.54, + "grad_norm": 10.291198558259039, + "learning_rate": 1.7176359716062485e-05, + "loss": 1.3434, + "step": 44553 + }, + { + "epoch": 0.54, + "grad_norm": 13.433218968399405, + "learning_rate": 1.7175952871472672e-05, + "loss": 1.7963, + "step": 44556 + }, + { + "epoch": 0.54, + "grad_norm": 39.96312662365883, + "learning_rate": 1.7175546002393916e-05, + "loss": 1.5477, + "step": 44559 + }, + { + "epoch": 0.54, + "grad_norm": 8.857999484515997, + "learning_rate": 1.7175139108827604e-05, + "loss": 1.5613, + "step": 44562 + }, + { + "epoch": 0.54, + "grad_norm": 16.029611687698864, + "learning_rate": 1.717473219077513e-05, + "loss": 1.3618, + "step": 44565 + }, + { + "epoch": 0.54, + "grad_norm": 34.19715691024521, + "learning_rate": 1.7174325248237877e-05, + "loss": 1.3953, + "step": 44568 + }, + { + "epoch": 0.54, + "grad_norm": 13.303515102070786, + "learning_rate": 1.7173918281217237e-05, + "loss": 1.8573, + "step": 44571 + }, + { + "epoch": 0.54, + "grad_norm": 4.935184757648314, + "learning_rate": 1.7173511289714597e-05, + "loss": 1.6313, + "step": 44574 + }, + { + "epoch": 0.54, + "grad_norm": 9.168313717660785, + "learning_rate": 1.7173104273731346e-05, + "loss": 1.8479, + "step": 44577 + }, + { + "epoch": 0.54, + "grad_norm": 11.508471488006109, + "learning_rate": 1.7172697233268877e-05, + "loss": 1.5251, + "step": 44580 + }, + { + "epoch": 0.54, + "grad_norm": 14.625560662325677, + "learning_rate": 1.7172290168328575e-05, + "loss": 1.5856, + "step": 44583 + }, + { + "epoch": 0.54, + "grad_norm": 24.710691700297335, + "learning_rate": 1.717188307891183e-05, + "loss": 2.0264, + "step": 44586 + }, + { + "epoch": 0.54, + "grad_norm": 34.74323481992444, + "learning_rate": 1.7171475965020034e-05, + "loss": 1.4703, + "step": 44589 + }, + { + "epoch": 0.54, + "grad_norm": 14.610528792669125, + "learning_rate": 1.7171068826654566e-05, + "loss": 1.5863, + "step": 44592 + }, + { + "epoch": 0.54, + "grad_norm": 11.957402259577556, + "learning_rate": 1.717066166381683e-05, + "loss": 1.2282, + "step": 44595 + }, + { + "epoch": 0.54, + "grad_norm": 15.314139554553943, + "learning_rate": 1.7170254476508205e-05, + "loss": 1.5376, + "step": 44598 + }, + { + "epoch": 0.54, + "grad_norm": 2.7983176954892977, + "learning_rate": 1.7169847264730086e-05, + "loss": 1.4774, + "step": 44601 + }, + { + "epoch": 0.54, + "grad_norm": 14.506606937065033, + "learning_rate": 1.716944002848386e-05, + "loss": 1.4477, + "step": 44604 + }, + { + "epoch": 0.54, + "grad_norm": 42.09355473734624, + "learning_rate": 1.716903276777092e-05, + "loss": 1.2453, + "step": 44607 + }, + { + "epoch": 0.54, + "grad_norm": 18.164514844067025, + "learning_rate": 1.716862548259265e-05, + "loss": 1.3637, + "step": 44610 + }, + { + "epoch": 0.54, + "grad_norm": 8.502582010002465, + "learning_rate": 1.7168218172950445e-05, + "loss": 1.4492, + "step": 44613 + }, + { + "epoch": 0.54, + "grad_norm": 17.33947999654273, + "learning_rate": 1.7167810838845692e-05, + "loss": 1.4175, + "step": 44616 + }, + { + "epoch": 0.54, + "grad_norm": 13.255128983573451, + "learning_rate": 1.7167403480279783e-05, + "loss": 1.3112, + "step": 44619 + }, + { + "epoch": 0.54, + "grad_norm": 25.822833061812187, + "learning_rate": 1.716699609725411e-05, + "loss": 1.5297, + "step": 44622 + }, + { + "epoch": 0.54, + "grad_norm": 16.599593136492455, + "learning_rate": 1.716658868977006e-05, + "loss": 1.3846, + "step": 44625 + }, + { + "epoch": 0.54, + "grad_norm": 13.130559065782847, + "learning_rate": 1.716618125782902e-05, + "loss": 1.4927, + "step": 44628 + }, + { + "epoch": 0.54, + "grad_norm": 3.1995357816256678, + "learning_rate": 1.7165773801432387e-05, + "loss": 1.2726, + "step": 44631 + }, + { + "epoch": 0.54, + "grad_norm": 12.367223950484911, + "learning_rate": 1.7165366320581554e-05, + "loss": 1.4391, + "step": 44634 + }, + { + "epoch": 0.54, + "grad_norm": 17.063316453362145, + "learning_rate": 1.71649588152779e-05, + "loss": 1.3916, + "step": 44637 + }, + { + "epoch": 0.54, + "grad_norm": 7.774226110585401, + "learning_rate": 1.7164551285522827e-05, + "loss": 1.4545, + "step": 44640 + }, + { + "epoch": 0.54, + "grad_norm": 8.83691930359126, + "learning_rate": 1.7164143731317718e-05, + "loss": 0.9342, + "step": 44643 + }, + { + "epoch": 0.54, + "grad_norm": 15.093225968248698, + "learning_rate": 1.716373615266397e-05, + "loss": 1.8398, + "step": 44646 + }, + { + "epoch": 0.54, + "grad_norm": 8.937111498493287, + "learning_rate": 1.7163328549562964e-05, + "loss": 1.5101, + "step": 44649 + }, + { + "epoch": 0.54, + "grad_norm": 25.051068554728474, + "learning_rate": 1.7162920922016108e-05, + "loss": 1.7708, + "step": 44652 + }, + { + "epoch": 0.54, + "grad_norm": 6.819057955187361, + "learning_rate": 1.7162513270024775e-05, + "loss": 1.5412, + "step": 44655 + }, + { + "epoch": 0.54, + "grad_norm": 15.58709616276887, + "learning_rate": 1.716210559359037e-05, + "loss": 1.2577, + "step": 44658 + }, + { + "epoch": 0.54, + "grad_norm": 6.2422602284570665, + "learning_rate": 1.716169789271427e-05, + "loss": 1.382, + "step": 44661 + }, + { + "epoch": 0.54, + "grad_norm": 6.888186742552012, + "learning_rate": 1.7161290167397885e-05, + "loss": 1.5877, + "step": 44664 + }, + { + "epoch": 0.54, + "grad_norm": 8.45538355451379, + "learning_rate": 1.716088241764259e-05, + "loss": 1.5561, + "step": 44667 + }, + { + "epoch": 0.54, + "grad_norm": 6.357924361851754, + "learning_rate": 1.7160474643449783e-05, + "loss": 1.6301, + "step": 44670 + }, + { + "epoch": 0.54, + "grad_norm": 10.355406154309229, + "learning_rate": 1.7160066844820855e-05, + "loss": 1.428, + "step": 44673 + }, + { + "epoch": 0.54, + "grad_norm": 9.454550557532327, + "learning_rate": 1.71596590217572e-05, + "loss": 1.3402, + "step": 44676 + }, + { + "epoch": 0.54, + "grad_norm": 15.311926038438614, + "learning_rate": 1.7159251174260206e-05, + "loss": 1.6586, + "step": 44679 + }, + { + "epoch": 0.54, + "grad_norm": 31.76270273947943, + "learning_rate": 1.7158843302331267e-05, + "loss": 1.3458, + "step": 44682 + }, + { + "epoch": 0.54, + "grad_norm": 15.420431451335636, + "learning_rate": 1.7158435405971772e-05, + "loss": 1.4333, + "step": 44685 + }, + { + "epoch": 0.54, + "grad_norm": 7.311235209800961, + "learning_rate": 1.715802748518312e-05, + "loss": 1.3409, + "step": 44688 + }, + { + "epoch": 0.54, + "grad_norm": 6.608001791066679, + "learning_rate": 1.71576195399667e-05, + "loss": 1.3376, + "step": 44691 + }, + { + "epoch": 0.54, + "grad_norm": 12.038776126550573, + "learning_rate": 1.7157211570323898e-05, + "loss": 1.3057, + "step": 44694 + }, + { + "epoch": 0.54, + "grad_norm": 13.72630299245276, + "learning_rate": 1.715680357625611e-05, + "loss": 1.3835, + "step": 44697 + }, + { + "epoch": 0.54, + "grad_norm": 50.19077902053995, + "learning_rate": 1.7156395557764735e-05, + "loss": 1.7391, + "step": 44700 + }, + { + "epoch": 0.54, + "grad_norm": 16.588214818343204, + "learning_rate": 1.7155987514851153e-05, + "loss": 1.2839, + "step": 44703 + }, + { + "epoch": 0.54, + "grad_norm": 15.379538252244037, + "learning_rate": 1.7155579447516767e-05, + "loss": 1.4609, + "step": 44706 + }, + { + "epoch": 0.54, + "grad_norm": 5.2616911761247405, + "learning_rate": 1.7155171355762964e-05, + "loss": 1.1475, + "step": 44709 + }, + { + "epoch": 0.54, + "grad_norm": 11.662281955480923, + "learning_rate": 1.715476323959114e-05, + "loss": 1.68, + "step": 44712 + }, + { + "epoch": 0.54, + "grad_norm": 11.274975723942685, + "learning_rate": 1.7154355099002688e-05, + "loss": 1.7513, + "step": 44715 + }, + { + "epoch": 0.54, + "grad_norm": 9.704557719199775, + "learning_rate": 1.7153946933998993e-05, + "loss": 1.5352, + "step": 44718 + }, + { + "epoch": 0.54, + "grad_norm": 27.152498450907164, + "learning_rate": 1.7153538744581458e-05, + "loss": 1.4572, + "step": 44721 + }, + { + "epoch": 0.54, + "grad_norm": 7.866112647065957, + "learning_rate": 1.715313053075147e-05, + "loss": 1.7182, + "step": 44724 + }, + { + "epoch": 0.54, + "grad_norm": 4.707982844971253, + "learning_rate": 1.7152722292510427e-05, + "loss": 1.0727, + "step": 44727 + }, + { + "epoch": 0.54, + "grad_norm": 19.92604764693081, + "learning_rate": 1.7152314029859717e-05, + "loss": 1.3546, + "step": 44730 + }, + { + "epoch": 0.54, + "grad_norm": 4.758440003613462, + "learning_rate": 1.7151905742800737e-05, + "loss": 1.4786, + "step": 44733 + }, + { + "epoch": 0.54, + "grad_norm": 97.80010880330003, + "learning_rate": 1.7151497431334878e-05, + "loss": 1.1317, + "step": 44736 + }, + { + "epoch": 0.54, + "grad_norm": 16.27802768738121, + "learning_rate": 1.7151089095463533e-05, + "loss": 1.5014, + "step": 44739 + }, + { + "epoch": 0.54, + "grad_norm": 21.38158825837129, + "learning_rate": 1.7150680735188098e-05, + "loss": 1.6027, + "step": 44742 + }, + { + "epoch": 0.54, + "grad_norm": 13.902910032150576, + "learning_rate": 1.7150272350509966e-05, + "loss": 1.4408, + "step": 44745 + }, + { + "epoch": 0.54, + "grad_norm": 14.677927547206563, + "learning_rate": 1.7149863941430533e-05, + "loss": 1.4739, + "step": 44748 + }, + { + "epoch": 0.54, + "grad_norm": 9.052308619017378, + "learning_rate": 1.7149455507951184e-05, + "loss": 1.4174, + "step": 44751 + }, + { + "epoch": 0.54, + "grad_norm": 40.88538019995334, + "learning_rate": 1.7149047050073318e-05, + "loss": 1.702, + "step": 44754 + }, + { + "epoch": 0.54, + "grad_norm": 20.974134459912236, + "learning_rate": 1.7148638567798336e-05, + "loss": 1.5334, + "step": 44757 + }, + { + "epoch": 0.54, + "grad_norm": 5.860503415192823, + "learning_rate": 1.7148230061127618e-05, + "loss": 1.6794, + "step": 44760 + }, + { + "epoch": 0.54, + "grad_norm": 10.914761536417956, + "learning_rate": 1.7147821530062572e-05, + "loss": 1.2037, + "step": 44763 + }, + { + "epoch": 0.54, + "grad_norm": 4.8315826270122555, + "learning_rate": 1.7147412974604584e-05, + "loss": 1.3505, + "step": 44766 + }, + { + "epoch": 0.54, + "grad_norm": 8.392871058830185, + "learning_rate": 1.7147004394755048e-05, + "loss": 1.3249, + "step": 44769 + }, + { + "epoch": 0.54, + "grad_norm": 32.195662033256234, + "learning_rate": 1.714659579051536e-05, + "loss": 1.3116, + "step": 44772 + }, + { + "epoch": 0.54, + "grad_norm": 65.94157519768957, + "learning_rate": 1.714618716188692e-05, + "loss": 0.9945, + "step": 44775 + }, + { + "epoch": 0.54, + "grad_norm": 27.482996253918728, + "learning_rate": 1.714577850887111e-05, + "loss": 1.5362, + "step": 44778 + }, + { + "epoch": 0.54, + "grad_norm": 5.364603129079447, + "learning_rate": 1.7145369831469336e-05, + "loss": 1.1976, + "step": 44781 + }, + { + "epoch": 0.54, + "grad_norm": 3.995559767371264, + "learning_rate": 1.7144961129682986e-05, + "loss": 1.5016, + "step": 44784 + }, + { + "epoch": 0.54, + "grad_norm": 16.411528772135597, + "learning_rate": 1.714455240351346e-05, + "loss": 1.5033, + "step": 44787 + }, + { + "epoch": 0.54, + "grad_norm": 16.857463130879484, + "learning_rate": 1.714414365296215e-05, + "loss": 1.2551, + "step": 44790 + }, + { + "epoch": 0.54, + "grad_norm": 48.06525194738046, + "learning_rate": 1.714373487803045e-05, + "loss": 1.3942, + "step": 44793 + }, + { + "epoch": 0.54, + "grad_norm": 26.122876151592084, + "learning_rate": 1.7143326078719756e-05, + "loss": 1.5889, + "step": 44796 + }, + { + "epoch": 0.54, + "grad_norm": 20.737038352636194, + "learning_rate": 1.714291725503146e-05, + "loss": 1.8175, + "step": 44799 + }, + { + "epoch": 0.54, + "grad_norm": 43.936050709108436, + "learning_rate": 1.7142508406966967e-05, + "loss": 1.2345, + "step": 44802 + }, + { + "epoch": 0.54, + "grad_norm": 20.68305549580575, + "learning_rate": 1.714209953452766e-05, + "loss": 1.3574, + "step": 44805 + }, + { + "epoch": 0.54, + "grad_norm": 7.999481853738109, + "learning_rate": 1.7141690637714943e-05, + "loss": 1.3231, + "step": 44808 + }, + { + "epoch": 0.54, + "grad_norm": 17.779761092854717, + "learning_rate": 1.7141281716530204e-05, + "loss": 1.4302, + "step": 44811 + }, + { + "epoch": 0.54, + "grad_norm": 14.208149909897529, + "learning_rate": 1.7140872770974847e-05, + "loss": 1.0081, + "step": 44814 + }, + { + "epoch": 0.54, + "grad_norm": 15.169063821093172, + "learning_rate": 1.7140463801050265e-05, + "loss": 1.6588, + "step": 44817 + }, + { + "epoch": 0.54, + "grad_norm": 12.571666702235566, + "learning_rate": 1.7140054806757848e-05, + "loss": 1.4848, + "step": 44820 + }, + { + "epoch": 0.54, + "grad_norm": 8.333165135028468, + "learning_rate": 1.7139645788098998e-05, + "loss": 1.1844, + "step": 44823 + }, + { + "epoch": 0.54, + "grad_norm": 9.156571257065583, + "learning_rate": 1.713923674507511e-05, + "loss": 1.5908, + "step": 44826 + }, + { + "epoch": 0.54, + "grad_norm": 9.33443224957092, + "learning_rate": 1.7138827677687574e-05, + "loss": 1.2276, + "step": 44829 + }, + { + "epoch": 0.54, + "grad_norm": 10.225873607034172, + "learning_rate": 1.7138418585937793e-05, + "loss": 1.5549, + "step": 44832 + }, + { + "epoch": 0.54, + "grad_norm": 10.146967545652751, + "learning_rate": 1.713800946982716e-05, + "loss": 1.4968, + "step": 44835 + }, + { + "epoch": 0.54, + "grad_norm": 19.404798813378008, + "learning_rate": 1.7137600329357073e-05, + "loss": 1.4334, + "step": 44838 + }, + { + "epoch": 0.54, + "grad_norm": 91.24371089716041, + "learning_rate": 1.7137191164528924e-05, + "loss": 1.2045, + "step": 44841 + }, + { + "epoch": 0.54, + "grad_norm": 19.582166835436173, + "learning_rate": 1.7136781975344113e-05, + "loss": 1.5518, + "step": 44844 + }, + { + "epoch": 0.54, + "grad_norm": 16.628492097755796, + "learning_rate": 1.713637276180404e-05, + "loss": 1.4701, + "step": 44847 + }, + { + "epoch": 0.54, + "grad_norm": 14.288790766625297, + "learning_rate": 1.7135963523910094e-05, + "loss": 1.3363, + "step": 44850 + }, + { + "epoch": 0.54, + "grad_norm": 4.13656364809264, + "learning_rate": 1.7135554261663676e-05, + "loss": 1.9003, + "step": 44853 + }, + { + "epoch": 0.54, + "grad_norm": 53.30780298199612, + "learning_rate": 1.713514497506618e-05, + "loss": 1.4613, + "step": 44856 + }, + { + "epoch": 0.54, + "grad_norm": 4.960270925277133, + "learning_rate": 1.7134735664119008e-05, + "loss": 1.308, + "step": 44859 + }, + { + "epoch": 0.54, + "grad_norm": 197.0507873942506, + "learning_rate": 1.7134326328823547e-05, + "loss": 1.3385, + "step": 44862 + }, + { + "epoch": 0.54, + "grad_norm": 17.3312968747787, + "learning_rate": 1.7133916969181203e-05, + "loss": 1.4112, + "step": 44865 + }, + { + "epoch": 0.54, + "grad_norm": 25.328618726889257, + "learning_rate": 1.713350758519337e-05, + "loss": 1.5473, + "step": 44868 + }, + { + "epoch": 0.54, + "grad_norm": 25.905291729798414, + "learning_rate": 1.7133098176861447e-05, + "loss": 1.4967, + "step": 44871 + }, + { + "epoch": 0.54, + "grad_norm": 10.051470732753328, + "learning_rate": 1.7132688744186827e-05, + "loss": 1.4669, + "step": 44874 + }, + { + "epoch": 0.54, + "grad_norm": 14.856425886770825, + "learning_rate": 1.7132279287170913e-05, + "loss": 1.4514, + "step": 44877 + }, + { + "epoch": 0.54, + "grad_norm": 5.689013552922804, + "learning_rate": 1.7131869805815094e-05, + "loss": 1.5347, + "step": 44880 + }, + { + "epoch": 0.54, + "grad_norm": 9.585303951530024, + "learning_rate": 1.7131460300120776e-05, + "loss": 1.5571, + "step": 44883 + }, + { + "epoch": 0.54, + "grad_norm": 12.429531535038768, + "learning_rate": 1.713105077008935e-05, + "loss": 1.2313, + "step": 44886 + }, + { + "epoch": 0.54, + "grad_norm": 15.277157806329217, + "learning_rate": 1.7130641215722216e-05, + "loss": 1.0491, + "step": 44889 + }, + { + "epoch": 0.54, + "grad_norm": 11.197408706020171, + "learning_rate": 1.7130231637020773e-05, + "loss": 1.5035, + "step": 44892 + }, + { + "epoch": 0.54, + "grad_norm": 44.91298819359387, + "learning_rate": 1.7129822033986418e-05, + "loss": 1.6738, + "step": 44895 + }, + { + "epoch": 0.54, + "grad_norm": 22.40662284467894, + "learning_rate": 1.712941240662055e-05, + "loss": 1.5033, + "step": 44898 + }, + { + "epoch": 0.54, + "grad_norm": 5.302036226378212, + "learning_rate": 1.7129002754924566e-05, + "loss": 1.476, + "step": 44901 + }, + { + "epoch": 0.54, + "grad_norm": 27.618808885288527, + "learning_rate": 1.712859307889986e-05, + "loss": 1.1655, + "step": 44904 + }, + { + "epoch": 0.54, + "grad_norm": 17.54236203595006, + "learning_rate": 1.7128183378547833e-05, + "loss": 1.3717, + "step": 44907 + }, + { + "epoch": 0.54, + "grad_norm": 44.65236571878002, + "learning_rate": 1.7127773653869887e-05, + "loss": 1.3274, + "step": 44910 + }, + { + "epoch": 0.54, + "grad_norm": 12.388363136673158, + "learning_rate": 1.7127363904867415e-05, + "loss": 1.3985, + "step": 44913 + }, + { + "epoch": 0.54, + "grad_norm": 20.696856918793948, + "learning_rate": 1.7126954131541816e-05, + "loss": 1.4228, + "step": 44916 + }, + { + "epoch": 0.54, + "grad_norm": 21.655435826634726, + "learning_rate": 1.7126544333894496e-05, + "loss": 1.7389, + "step": 44919 + }, + { + "epoch": 0.54, + "grad_norm": 27.457141402171015, + "learning_rate": 1.712613451192684e-05, + "loss": 2.0495, + "step": 44922 + }, + { + "epoch": 0.54, + "grad_norm": 12.069052214791068, + "learning_rate": 1.7125724665640255e-05, + "loss": 1.398, + "step": 44925 + }, + { + "epoch": 0.54, + "grad_norm": 3.585785477790738, + "learning_rate": 1.712531479503614e-05, + "loss": 1.6285, + "step": 44928 + }, + { + "epoch": 0.54, + "grad_norm": 3.484045858429294, + "learning_rate": 1.7124904900115896e-05, + "loss": 1.3833, + "step": 44931 + }, + { + "epoch": 0.54, + "grad_norm": 9.044155169434342, + "learning_rate": 1.7124494980880913e-05, + "loss": 1.0966, + "step": 44934 + }, + { + "epoch": 0.54, + "grad_norm": 69.53644073074348, + "learning_rate": 1.7124085037332594e-05, + "loss": 1.2134, + "step": 44937 + }, + { + "epoch": 0.54, + "grad_norm": 20.378339131974915, + "learning_rate": 1.712367506947234e-05, + "loss": 1.3502, + "step": 44940 + }, + { + "epoch": 0.54, + "grad_norm": 7.9936661960925806, + "learning_rate": 1.7123265077301548e-05, + "loss": 1.4995, + "step": 44943 + }, + { + "epoch": 0.54, + "grad_norm": 27.218898481134424, + "learning_rate": 1.7122855060821623e-05, + "loss": 1.6277, + "step": 44946 + }, + { + "epoch": 0.54, + "grad_norm": 5.872371175281278, + "learning_rate": 1.7122445020033956e-05, + "loss": 1.4745, + "step": 44949 + }, + { + "epoch": 0.54, + "grad_norm": 23.884106535098198, + "learning_rate": 1.712203495493995e-05, + "loss": 1.7387, + "step": 44952 + }, + { + "epoch": 0.54, + "grad_norm": 10.314839139724477, + "learning_rate": 1.7121624865541e-05, + "loss": 1.5147, + "step": 44955 + }, + { + "epoch": 0.54, + "grad_norm": 16.004922185276413, + "learning_rate": 1.7121214751838516e-05, + "loss": 1.2048, + "step": 44958 + }, + { + "epoch": 0.54, + "grad_norm": 157.79441644228658, + "learning_rate": 1.712080461383389e-05, + "loss": 1.5527, + "step": 44961 + }, + { + "epoch": 0.54, + "grad_norm": 14.217798258364958, + "learning_rate": 1.712039445152852e-05, + "loss": 1.0321, + "step": 44964 + }, + { + "epoch": 0.54, + "grad_norm": 2.656044274471205, + "learning_rate": 1.711998426492381e-05, + "loss": 1.1286, + "step": 44967 + }, + { + "epoch": 0.54, + "grad_norm": 12.448527465130583, + "learning_rate": 1.7119574054021156e-05, + "loss": 1.5229, + "step": 44970 + }, + { + "epoch": 0.54, + "grad_norm": 37.09944849735944, + "learning_rate": 1.7119163818821962e-05, + "loss": 1.6195, + "step": 44973 + }, + { + "epoch": 0.54, + "grad_norm": 5.879881189692774, + "learning_rate": 1.7118753559327627e-05, + "loss": 1.0802, + "step": 44976 + }, + { + "epoch": 0.54, + "grad_norm": 31.61724955653882, + "learning_rate": 1.711834327553955e-05, + "loss": 1.4743, + "step": 44979 + }, + { + "epoch": 0.54, + "grad_norm": 109.1302339941841, + "learning_rate": 1.711793296745913e-05, + "loss": 1.397, + "step": 44982 + }, + { + "epoch": 0.54, + "grad_norm": 15.963134862584162, + "learning_rate": 1.711752263508777e-05, + "loss": 1.609, + "step": 44985 + }, + { + "epoch": 0.54, + "grad_norm": 48.18840025324353, + "learning_rate": 1.7117112278426867e-05, + "loss": 1.5288, + "step": 44988 + }, + { + "epoch": 0.54, + "grad_norm": 5.253236325100119, + "learning_rate": 1.7116701897477826e-05, + "loss": 1.1841, + "step": 44991 + }, + { + "epoch": 0.54, + "grad_norm": 24.464928401417488, + "learning_rate": 1.7116291492242046e-05, + "loss": 1.3499, + "step": 44994 + }, + { + "epoch": 0.54, + "grad_norm": 61.82752855298996, + "learning_rate": 1.711588106272092e-05, + "loss": 1.4316, + "step": 44997 + }, + { + "epoch": 0.54, + "grad_norm": 14.245722952819328, + "learning_rate": 1.7115470608915862e-05, + "loss": 1.0715, + "step": 45000 + }, + { + "epoch": 0.54, + "grad_norm": 2.945407131965509, + "learning_rate": 1.7115060130828263e-05, + "loss": 1.6651, + "step": 45003 + }, + { + "epoch": 0.54, + "grad_norm": 8.414262843190714, + "learning_rate": 1.7114649628459525e-05, + "loss": 1.3356, + "step": 45006 + }, + { + "epoch": 0.54, + "grad_norm": 9.428085997226372, + "learning_rate": 1.7114239101811053e-05, + "loss": 1.3573, + "step": 45009 + }, + { + "epoch": 0.54, + "grad_norm": 5.487612592519895, + "learning_rate": 1.7113828550884244e-05, + "loss": 1.2694, + "step": 45012 + }, + { + "epoch": 0.54, + "grad_norm": 17.607048925011078, + "learning_rate": 1.7113417975680502e-05, + "loss": 1.4757, + "step": 45015 + }, + { + "epoch": 0.54, + "grad_norm": 6.850934491892411, + "learning_rate": 1.7113007376201226e-05, + "loss": 1.305, + "step": 45018 + }, + { + "epoch": 0.54, + "grad_norm": 13.80849688140842, + "learning_rate": 1.7112596752447816e-05, + "loss": 1.8748, + "step": 45021 + }, + { + "epoch": 0.54, + "grad_norm": 8.55004753320903, + "learning_rate": 1.7112186104421677e-05, + "loss": 1.4503, + "step": 45024 + }, + { + "epoch": 0.54, + "grad_norm": 20.508256042233885, + "learning_rate": 1.7111775432124208e-05, + "loss": 1.4399, + "step": 45027 + }, + { + "epoch": 0.54, + "grad_norm": 10.6773646370211, + "learning_rate": 1.7111364735556813e-05, + "loss": 1.5377, + "step": 45030 + }, + { + "epoch": 0.54, + "grad_norm": 16.19917979010055, + "learning_rate": 1.711095401472089e-05, + "loss": 1.4684, + "step": 45033 + }, + { + "epoch": 0.54, + "grad_norm": 13.789248800282444, + "learning_rate": 1.7110543269617844e-05, + "loss": 1.609, + "step": 45036 + }, + { + "epoch": 0.54, + "grad_norm": 25.897761132834784, + "learning_rate": 1.711013250024907e-05, + "loss": 1.8452, + "step": 45039 + }, + { + "epoch": 0.54, + "grad_norm": 14.802015866124895, + "learning_rate": 1.7109721706615983e-05, + "loss": 1.1562, + "step": 45042 + }, + { + "epoch": 0.54, + "grad_norm": 14.57708413254306, + "learning_rate": 1.710931088871997e-05, + "loss": 1.4141, + "step": 45045 + }, + { + "epoch": 0.54, + "grad_norm": 11.150949769641842, + "learning_rate": 1.7108900046562443e-05, + "loss": 1.4884, + "step": 45048 + }, + { + "epoch": 0.54, + "grad_norm": 10.188280908745414, + "learning_rate": 1.71084891801448e-05, + "loss": 1.2322, + "step": 45051 + }, + { + "epoch": 0.54, + "grad_norm": 25.405839524739974, + "learning_rate": 1.710807828946844e-05, + "loss": 1.3947, + "step": 45054 + }, + { + "epoch": 0.54, + "grad_norm": 3.816431252670428, + "learning_rate": 1.7107667374534776e-05, + "loss": 1.2877, + "step": 45057 + }, + { + "epoch": 0.54, + "grad_norm": 15.751469722093763, + "learning_rate": 1.71072564353452e-05, + "loss": 1.3278, + "step": 45060 + }, + { + "epoch": 0.54, + "grad_norm": 7.295888473680045, + "learning_rate": 1.7106845471901117e-05, + "loss": 1.3398, + "step": 45063 + }, + { + "epoch": 0.54, + "grad_norm": 10.846457009960389, + "learning_rate": 1.710643448420393e-05, + "loss": 1.0999, + "step": 45066 + }, + { + "epoch": 0.54, + "grad_norm": 17.000368324498897, + "learning_rate": 1.7106023472255046e-05, + "loss": 1.3344, + "step": 45069 + }, + { + "epoch": 0.54, + "grad_norm": 9.301074974483251, + "learning_rate": 1.7105612436055862e-05, + "loss": 1.1945, + "step": 45072 + }, + { + "epoch": 0.54, + "grad_norm": 10.845518952144388, + "learning_rate": 1.7105201375607778e-05, + "loss": 1.3269, + "step": 45075 + }, + { + "epoch": 0.54, + "grad_norm": 6.9857232180137965, + "learning_rate": 1.7104790290912204e-05, + "loss": 1.4655, + "step": 45078 + }, + { + "epoch": 0.54, + "grad_norm": 30.158141795154616, + "learning_rate": 1.710437918197054e-05, + "loss": 0.9987, + "step": 45081 + }, + { + "epoch": 0.54, + "grad_norm": 9.89104796839352, + "learning_rate": 1.710396804878419e-05, + "loss": 1.167, + "step": 45084 + }, + { + "epoch": 0.54, + "grad_norm": 16.990029707622362, + "learning_rate": 1.7103556891354552e-05, + "loss": 1.4922, + "step": 45087 + }, + { + "epoch": 0.54, + "grad_norm": 15.743871581091424, + "learning_rate": 1.7103145709683037e-05, + "loss": 1.5924, + "step": 45090 + }, + { + "epoch": 0.54, + "grad_norm": 17.969356429646957, + "learning_rate": 1.710273450377104e-05, + "loss": 1.3071, + "step": 45093 + }, + { + "epoch": 0.54, + "grad_norm": 6.394067784024365, + "learning_rate": 1.710232327361997e-05, + "loss": 1.4406, + "step": 45096 + }, + { + "epoch": 0.54, + "grad_norm": 3.956262092960221, + "learning_rate": 1.7101912019231232e-05, + "loss": 1.3614, + "step": 45099 + }, + { + "epoch": 0.54, + "grad_norm": 9.930704458342328, + "learning_rate": 1.7101500740606224e-05, + "loss": 1.6337, + "step": 45102 + }, + { + "epoch": 0.54, + "grad_norm": 29.374133484333555, + "learning_rate": 1.7101089437746354e-05, + "loss": 1.5371, + "step": 45105 + }, + { + "epoch": 0.54, + "grad_norm": 24.79478623861468, + "learning_rate": 1.710067811065302e-05, + "loss": 1.4844, + "step": 45108 + }, + { + "epoch": 0.54, + "grad_norm": 16.602250344982576, + "learning_rate": 1.7100266759327632e-05, + "loss": 1.5907, + "step": 45111 + }, + { + "epoch": 0.54, + "grad_norm": 14.926437368602299, + "learning_rate": 1.709985538377159e-05, + "loss": 1.2946, + "step": 45114 + }, + { + "epoch": 0.54, + "grad_norm": 18.7372627575075, + "learning_rate": 1.7099443983986298e-05, + "loss": 1.3002, + "step": 45117 + }, + { + "epoch": 0.54, + "grad_norm": 14.706817706764937, + "learning_rate": 1.7099032559973164e-05, + "loss": 1.2634, + "step": 45120 + }, + { + "epoch": 0.54, + "grad_norm": 9.634737882318289, + "learning_rate": 1.7098621111733587e-05, + "loss": 1.1831, + "step": 45123 + }, + { + "epoch": 0.54, + "grad_norm": 37.03693478274485, + "learning_rate": 1.7098209639268972e-05, + "loss": 1.4915, + "step": 45126 + }, + { + "epoch": 0.54, + "grad_norm": 8.463143644036077, + "learning_rate": 1.7097798142580726e-05, + "loss": 1.6403, + "step": 45129 + }, + { + "epoch": 0.54, + "grad_norm": 19.078436233623496, + "learning_rate": 1.7097386621670252e-05, + "loss": 1.4016, + "step": 45132 + }, + { + "epoch": 0.54, + "grad_norm": 15.671744706568004, + "learning_rate": 1.709697507653895e-05, + "loss": 1.8595, + "step": 45135 + }, + { + "epoch": 0.54, + "grad_norm": 11.954780751671647, + "learning_rate": 1.709656350718823e-05, + "loss": 1.2515, + "step": 45138 + }, + { + "epoch": 0.54, + "grad_norm": 13.657470647795106, + "learning_rate": 1.7096151913619498e-05, + "loss": 1.455, + "step": 45141 + }, + { + "epoch": 0.54, + "grad_norm": 8.71297010423685, + "learning_rate": 1.7095740295834154e-05, + "loss": 1.5654, + "step": 45144 + }, + { + "epoch": 0.54, + "grad_norm": 6.151918382704574, + "learning_rate": 1.7095328653833604e-05, + "loss": 1.5835, + "step": 45147 + }, + { + "epoch": 0.54, + "grad_norm": 10.690889503546154, + "learning_rate": 1.7094916987619253e-05, + "loss": 1.5236, + "step": 45150 + }, + { + "epoch": 0.54, + "grad_norm": 22.362335850768112, + "learning_rate": 1.7094505297192505e-05, + "loss": 1.0072, + "step": 45153 + }, + { + "epoch": 0.54, + "grad_norm": 5.250757822016539, + "learning_rate": 1.7094093582554767e-05, + "loss": 1.5133, + "step": 45156 + }, + { + "epoch": 0.54, + "grad_norm": 14.041301142714689, + "learning_rate": 1.7093681843707442e-05, + "loss": 1.5016, + "step": 45159 + }, + { + "epoch": 0.54, + "grad_norm": 11.986846440705802, + "learning_rate": 1.7093270080651935e-05, + "loss": 1.7593, + "step": 45162 + }, + { + "epoch": 0.54, + "grad_norm": 18.49329041144716, + "learning_rate": 1.7092858293389657e-05, + "loss": 1.4248, + "step": 45165 + }, + { + "epoch": 0.54, + "grad_norm": 14.54711183069711, + "learning_rate": 1.7092446481922006e-05, + "loss": 1.1605, + "step": 45168 + }, + { + "epoch": 0.54, + "grad_norm": 3.676571319783894, + "learning_rate": 1.7092034646250385e-05, + "loss": 1.0523, + "step": 45171 + }, + { + "epoch": 0.54, + "grad_norm": 14.595807174151744, + "learning_rate": 1.709162278637621e-05, + "loss": 1.2859, + "step": 45174 + }, + { + "epoch": 0.54, + "grad_norm": 24.424234362142982, + "learning_rate": 1.709121090230088e-05, + "loss": 1.3773, + "step": 45177 + }, + { + "epoch": 0.54, + "grad_norm": 6.935172914258889, + "learning_rate": 1.7090798994025802e-05, + "loss": 1.8669, + "step": 45180 + }, + { + "epoch": 0.54, + "grad_norm": 17.300520697281957, + "learning_rate": 1.7090387061552376e-05, + "loss": 1.7644, + "step": 45183 + }, + { + "epoch": 0.54, + "grad_norm": 10.085584122361366, + "learning_rate": 1.7089975104882017e-05, + "loss": 1.3302, + "step": 45186 + }, + { + "epoch": 0.54, + "grad_norm": 6.07510288462488, + "learning_rate": 1.708956312401613e-05, + "loss": 1.3374, + "step": 45189 + }, + { + "epoch": 0.54, + "grad_norm": 3.248365331142621, + "learning_rate": 1.708915111895611e-05, + "loss": 1.5447, + "step": 45192 + }, + { + "epoch": 0.54, + "grad_norm": 44.3982329591454, + "learning_rate": 1.7088739089703376e-05, + "loss": 1.5039, + "step": 45195 + }, + { + "epoch": 0.54, + "grad_norm": 27.8646599867314, + "learning_rate": 1.7088327036259327e-05, + "loss": 1.2924, + "step": 45198 + }, + { + "epoch": 0.54, + "grad_norm": 80.97355881819776, + "learning_rate": 1.7087914958625372e-05, + "loss": 1.605, + "step": 45201 + }, + { + "epoch": 0.54, + "grad_norm": 17.154466191043962, + "learning_rate": 1.7087502856802914e-05, + "loss": 1.2639, + "step": 45204 + }, + { + "epoch": 0.54, + "grad_norm": 18.02144786315551, + "learning_rate": 1.7087090730793366e-05, + "loss": 1.696, + "step": 45207 + }, + { + "epoch": 0.54, + "grad_norm": 3.2408833517967888, + "learning_rate": 1.7086678580598127e-05, + "loss": 1.5944, + "step": 45210 + }, + { + "epoch": 0.54, + "grad_norm": 3.260169847803043, + "learning_rate": 1.7086266406218607e-05, + "loss": 1.1303, + "step": 45213 + }, + { + "epoch": 0.54, + "grad_norm": 11.896575752496668, + "learning_rate": 1.7085854207656215e-05, + "loss": 1.5437, + "step": 45216 + }, + { + "epoch": 0.54, + "grad_norm": 5.8664401323099336, + "learning_rate": 1.708544198491235e-05, + "loss": 1.25, + "step": 45219 + }, + { + "epoch": 0.54, + "grad_norm": 10.552075615198259, + "learning_rate": 1.708502973798843e-05, + "loss": 1.2311, + "step": 45222 + }, + { + "epoch": 0.54, + "grad_norm": 11.698212564705473, + "learning_rate": 1.7084617466885847e-05, + "loss": 1.2659, + "step": 45225 + }, + { + "epoch": 0.54, + "grad_norm": 15.984680963223028, + "learning_rate": 1.7084205171606024e-05, + "loss": 1.2602, + "step": 45228 + }, + { + "epoch": 0.54, + "grad_norm": 7.8582762929914205, + "learning_rate": 1.7083792852150357e-05, + "loss": 1.2026, + "step": 45231 + }, + { + "epoch": 0.54, + "grad_norm": 11.628223332431638, + "learning_rate": 1.7083380508520257e-05, + "loss": 1.3735, + "step": 45234 + }, + { + "epoch": 0.54, + "grad_norm": 14.970522559004165, + "learning_rate": 1.708296814071713e-05, + "loss": 1.5644, + "step": 45237 + }, + { + "epoch": 0.54, + "grad_norm": 6.269232441807086, + "learning_rate": 1.7082555748742385e-05, + "loss": 1.3693, + "step": 45240 + }, + { + "epoch": 0.54, + "grad_norm": 14.003311004295515, + "learning_rate": 1.708214333259743e-05, + "loss": 1.4244, + "step": 45243 + }, + { + "epoch": 0.54, + "grad_norm": 17.949359047186903, + "learning_rate": 1.7081730892283665e-05, + "loss": 1.3424, + "step": 45246 + }, + { + "epoch": 0.54, + "grad_norm": 7.850552486369601, + "learning_rate": 1.708131842780251e-05, + "loss": 1.5423, + "step": 45249 + }, + { + "epoch": 0.54, + "grad_norm": 12.033518609790853, + "learning_rate": 1.7080905939155364e-05, + "loss": 1.6864, + "step": 45252 + }, + { + "epoch": 0.54, + "grad_norm": 13.620251438988172, + "learning_rate": 1.7080493426343633e-05, + "loss": 1.1984, + "step": 45255 + }, + { + "epoch": 0.54, + "grad_norm": 4.240152339104757, + "learning_rate": 1.708008088936873e-05, + "loss": 1.3416, + "step": 45258 + }, + { + "epoch": 0.54, + "grad_norm": 14.787892162830211, + "learning_rate": 1.7079668328232065e-05, + "loss": 1.0591, + "step": 45261 + }, + { + "epoch": 0.54, + "grad_norm": 9.393006628203292, + "learning_rate": 1.707925574293504e-05, + "loss": 1.363, + "step": 45264 + }, + { + "epoch": 0.54, + "grad_norm": 25.07237510205805, + "learning_rate": 1.7078843133479064e-05, + "loss": 1.2599, + "step": 45267 + }, + { + "epoch": 0.54, + "grad_norm": 8.785769817754788, + "learning_rate": 1.7078430499865544e-05, + "loss": 1.5011, + "step": 45270 + }, + { + "epoch": 0.54, + "grad_norm": 6.014253660759643, + "learning_rate": 1.707801784209589e-05, + "loss": 1.3588, + "step": 45273 + }, + { + "epoch": 0.54, + "grad_norm": 16.919238206854473, + "learning_rate": 1.707760516017152e-05, + "loss": 1.4267, + "step": 45276 + }, + { + "epoch": 0.54, + "grad_norm": 7.777820528373615, + "learning_rate": 1.7077192454093822e-05, + "loss": 1.5349, + "step": 45279 + }, + { + "epoch": 0.54, + "grad_norm": 11.259747635870696, + "learning_rate": 1.707677972386422e-05, + "loss": 1.5654, + "step": 45282 + }, + { + "epoch": 0.54, + "grad_norm": 15.138064990246813, + "learning_rate": 1.7076366969484118e-05, + "loss": 1.5227, + "step": 45285 + }, + { + "epoch": 0.54, + "grad_norm": 43.47748963387636, + "learning_rate": 1.7075954190954926e-05, + "loss": 1.1414, + "step": 45288 + }, + { + "epoch": 0.54, + "grad_norm": 7.759162252400846, + "learning_rate": 1.707554138827805e-05, + "loss": 1.3652, + "step": 45291 + }, + { + "epoch": 0.54, + "grad_norm": 7.949724930952424, + "learning_rate": 1.7075128561454896e-05, + "loss": 1.1697, + "step": 45294 + }, + { + "epoch": 0.54, + "grad_norm": 12.548732070317081, + "learning_rate": 1.707471571048688e-05, + "loss": 1.4947, + "step": 45297 + }, + { + "epoch": 0.54, + "grad_norm": 3.802811387915496, + "learning_rate": 1.7074302835375408e-05, + "loss": 1.1984, + "step": 45300 + }, + { + "epoch": 0.54, + "grad_norm": 13.106819460197306, + "learning_rate": 1.7073889936121886e-05, + "loss": 1.531, + "step": 45303 + }, + { + "epoch": 0.54, + "grad_norm": 13.834677392657687, + "learning_rate": 1.707347701272773e-05, + "loss": 1.2973, + "step": 45306 + }, + { + "epoch": 0.54, + "grad_norm": 30.259464786347145, + "learning_rate": 1.7073064065194343e-05, + "loss": 1.6899, + "step": 45309 + }, + { + "epoch": 0.54, + "grad_norm": 7.407400789043226, + "learning_rate": 1.7072651093523133e-05, + "loss": 1.5078, + "step": 45312 + }, + { + "epoch": 0.54, + "grad_norm": 3.4028826118752837, + "learning_rate": 1.7072238097715516e-05, + "loss": 1.4381, + "step": 45315 + }, + { + "epoch": 0.54, + "grad_norm": 6.8764244139080795, + "learning_rate": 1.7071825077772898e-05, + "loss": 1.4762, + "step": 45318 + }, + { + "epoch": 0.54, + "grad_norm": 11.089457337158851, + "learning_rate": 1.7071412033696685e-05, + "loss": 1.41, + "step": 45321 + }, + { + "epoch": 0.55, + "grad_norm": 11.706221592305466, + "learning_rate": 1.7070998965488293e-05, + "loss": 1.2744, + "step": 45324 + }, + { + "epoch": 0.55, + "grad_norm": 28.987565291991608, + "learning_rate": 1.7070585873149126e-05, + "loss": 1.1949, + "step": 45327 + }, + { + "epoch": 0.55, + "grad_norm": 9.866369839505863, + "learning_rate": 1.7070172756680595e-05, + "loss": 1.4591, + "step": 45330 + }, + { + "epoch": 0.55, + "grad_norm": 5.135204574748002, + "learning_rate": 1.7069759616084117e-05, + "loss": 1.4905, + "step": 45333 + }, + { + "epoch": 0.55, + "grad_norm": 28.851233691611597, + "learning_rate": 1.706934645136109e-05, + "loss": 1.4823, + "step": 45336 + }, + { + "epoch": 0.55, + "grad_norm": 11.137175943533531, + "learning_rate": 1.706893326251293e-05, + "loss": 1.5967, + "step": 45339 + }, + { + "epoch": 0.55, + "grad_norm": 9.7316536324259, + "learning_rate": 1.7068520049541052e-05, + "loss": 1.1719, + "step": 45342 + }, + { + "epoch": 0.55, + "grad_norm": 26.578535320692843, + "learning_rate": 1.7068106812446857e-05, + "loss": 1.4745, + "step": 45345 + }, + { + "epoch": 0.55, + "grad_norm": 6.189523832205621, + "learning_rate": 1.7067693551231763e-05, + "loss": 1.4277, + "step": 45348 + }, + { + "epoch": 0.55, + "grad_norm": 11.596229148623763, + "learning_rate": 1.706728026589717e-05, + "loss": 1.4516, + "step": 45351 + }, + { + "epoch": 0.55, + "grad_norm": 24.407572106659497, + "learning_rate": 1.7066866956444503e-05, + "loss": 1.1882, + "step": 45354 + }, + { + "epoch": 0.55, + "grad_norm": 13.151159912389199, + "learning_rate": 1.7066453622875163e-05, + "loss": 1.718, + "step": 45357 + }, + { + "epoch": 0.55, + "grad_norm": 9.84998824943197, + "learning_rate": 1.706604026519056e-05, + "loss": 1.3972, + "step": 45360 + }, + { + "epoch": 0.55, + "grad_norm": 11.526494609235835, + "learning_rate": 1.7065626883392107e-05, + "loss": 1.5014, + "step": 45363 + }, + { + "epoch": 0.55, + "grad_norm": 43.1877899122194, + "learning_rate": 1.7065213477481213e-05, + "loss": 1.5068, + "step": 45366 + }, + { + "epoch": 0.55, + "grad_norm": 31.835309162058437, + "learning_rate": 1.706480004745929e-05, + "loss": 2.1635, + "step": 45369 + }, + { + "epoch": 0.55, + "grad_norm": 10.813201260971283, + "learning_rate": 1.7064386593327752e-05, + "loss": 1.3103, + "step": 45372 + }, + { + "epoch": 0.55, + "grad_norm": 8.103881133876895, + "learning_rate": 1.7063973115088007e-05, + "loss": 1.5228, + "step": 45375 + }, + { + "epoch": 0.55, + "grad_norm": 9.163268926073666, + "learning_rate": 1.7063559612741464e-05, + "loss": 1.2296, + "step": 45378 + }, + { + "epoch": 0.55, + "grad_norm": 19.34106025472951, + "learning_rate": 1.706314608628954e-05, + "loss": 1.3102, + "step": 45381 + }, + { + "epoch": 0.55, + "grad_norm": 12.377701376302385, + "learning_rate": 1.7062732535733637e-05, + "loss": 1.2313, + "step": 45384 + }, + { + "epoch": 0.55, + "grad_norm": 29.687724601725446, + "learning_rate": 1.7062318961075176e-05, + "loss": 1.5339, + "step": 45387 + }, + { + "epoch": 0.55, + "grad_norm": 13.561827551028824, + "learning_rate": 1.7061905362315562e-05, + "loss": 1.5224, + "step": 45390 + }, + { + "epoch": 0.55, + "grad_norm": 33.179500770622035, + "learning_rate": 1.706149173945621e-05, + "loss": 1.4979, + "step": 45393 + }, + { + "epoch": 0.55, + "grad_norm": 11.69053905568344, + "learning_rate": 1.706107809249853e-05, + "loss": 1.3137, + "step": 45396 + }, + { + "epoch": 0.55, + "grad_norm": 35.812586216271, + "learning_rate": 1.7060664421443935e-05, + "loss": 1.9395, + "step": 45399 + }, + { + "epoch": 0.55, + "grad_norm": 44.11563084899066, + "learning_rate": 1.706025072629383e-05, + "loss": 1.8071, + "step": 45402 + }, + { + "epoch": 0.55, + "grad_norm": 28.63553992492145, + "learning_rate": 1.7059837007049642e-05, + "loss": 1.5704, + "step": 45405 + }, + { + "epoch": 0.55, + "grad_norm": 14.40286529679041, + "learning_rate": 1.7059423263712765e-05, + "loss": 1.2941, + "step": 45408 + }, + { + "epoch": 0.55, + "grad_norm": 27.492557702690473, + "learning_rate": 1.7059009496284625e-05, + "loss": 1.5614, + "step": 45411 + }, + { + "epoch": 0.55, + "grad_norm": 8.580140936002685, + "learning_rate": 1.7058595704766624e-05, + "loss": 1.9883, + "step": 45414 + }, + { + "epoch": 0.55, + "grad_norm": 14.688440331356036, + "learning_rate": 1.705818188916018e-05, + "loss": 1.3228, + "step": 45417 + }, + { + "epoch": 0.55, + "grad_norm": 11.615766416552825, + "learning_rate": 1.7057768049466704e-05, + "loss": 1.3767, + "step": 45420 + }, + { + "epoch": 0.55, + "grad_norm": 21.014278787127054, + "learning_rate": 1.7057354185687604e-05, + "loss": 1.2594, + "step": 45423 + }, + { + "epoch": 0.55, + "grad_norm": 6.463327368741533, + "learning_rate": 1.70569402978243e-05, + "loss": 1.3814, + "step": 45426 + }, + { + "epoch": 0.55, + "grad_norm": 24.15350967673353, + "learning_rate": 1.70565263858782e-05, + "loss": 1.1118, + "step": 45429 + }, + { + "epoch": 0.55, + "grad_norm": 38.279055720470375, + "learning_rate": 1.7056112449850718e-05, + "loss": 1.5929, + "step": 45432 + }, + { + "epoch": 0.55, + "grad_norm": 20.41355509878771, + "learning_rate": 1.7055698489743267e-05, + "loss": 1.2815, + "step": 45435 + }, + { + "epoch": 0.55, + "grad_norm": 12.438386708647638, + "learning_rate": 1.7055284505557255e-05, + "loss": 1.2839, + "step": 45438 + }, + { + "epoch": 0.55, + "grad_norm": 11.219673794391172, + "learning_rate": 1.70548704972941e-05, + "loss": 1.376, + "step": 45441 + }, + { + "epoch": 0.55, + "grad_norm": 8.075162916944159, + "learning_rate": 1.7054456464955213e-05, + "loss": 1.628, + "step": 45444 + }, + { + "epoch": 0.55, + "grad_norm": 16.512305032247006, + "learning_rate": 1.705404240854201e-05, + "loss": 1.2309, + "step": 45447 + }, + { + "epoch": 0.55, + "grad_norm": 138.82910282951477, + "learning_rate": 1.7053628328055896e-05, + "loss": 2.0345, + "step": 45450 + }, + { + "epoch": 0.55, + "grad_norm": 4.8128100679670816, + "learning_rate": 1.7053214223498293e-05, + "loss": 1.5835, + "step": 45453 + }, + { + "epoch": 0.55, + "grad_norm": 12.768234678454267, + "learning_rate": 1.7052800094870608e-05, + "loss": 1.2751, + "step": 45456 + }, + { + "epoch": 0.55, + "grad_norm": 13.4184407109266, + "learning_rate": 1.7052385942174258e-05, + "loss": 1.7096, + "step": 45459 + }, + { + "epoch": 0.55, + "grad_norm": 14.962437924175685, + "learning_rate": 1.7051971765410653e-05, + "loss": 1.2175, + "step": 45462 + }, + { + "epoch": 0.55, + "grad_norm": 15.24755895739242, + "learning_rate": 1.7051557564581212e-05, + "loss": 1.3886, + "step": 45465 + }, + { + "epoch": 0.55, + "grad_norm": 4.982445647669152, + "learning_rate": 1.7051143339687342e-05, + "loss": 1.3852, + "step": 45468 + }, + { + "epoch": 0.55, + "grad_norm": 16.83194404687301, + "learning_rate": 1.705072909073046e-05, + "loss": 1.1031, + "step": 45471 + }, + { + "epoch": 0.55, + "grad_norm": 4.92779961316721, + "learning_rate": 1.705031481771198e-05, + "loss": 1.5221, + "step": 45474 + }, + { + "epoch": 0.55, + "grad_norm": 12.78579597723398, + "learning_rate": 1.7049900520633315e-05, + "loss": 1.6389, + "step": 45477 + }, + { + "epoch": 0.55, + "grad_norm": 9.526438027396791, + "learning_rate": 1.7049486199495878e-05, + "loss": 1.4365, + "step": 45480 + }, + { + "epoch": 0.55, + "grad_norm": 3.5532316741889, + "learning_rate": 1.7049071854301086e-05, + "loss": 1.458, + "step": 45483 + }, + { + "epoch": 0.55, + "grad_norm": 68.05838318802743, + "learning_rate": 1.7048657485050348e-05, + "loss": 1.8355, + "step": 45486 + }, + { + "epoch": 0.55, + "grad_norm": 22.238370032314144, + "learning_rate": 1.704824309174508e-05, + "loss": 1.3056, + "step": 45489 + }, + { + "epoch": 0.55, + "grad_norm": 28.321192111978206, + "learning_rate": 1.70478286743867e-05, + "loss": 1.6618, + "step": 45492 + }, + { + "epoch": 0.55, + "grad_norm": 17.45283927538374, + "learning_rate": 1.7047414232976623e-05, + "loss": 1.1904, + "step": 45495 + }, + { + "epoch": 0.55, + "grad_norm": 27.94887540948949, + "learning_rate": 1.7046999767516254e-05, + "loss": 1.1366, + "step": 45498 + }, + { + "epoch": 0.55, + "grad_norm": 14.387475934365234, + "learning_rate": 1.7046585278007014e-05, + "loss": 1.659, + "step": 45501 + }, + { + "epoch": 0.55, + "grad_norm": 6.085339646725639, + "learning_rate": 1.7046170764450316e-05, + "loss": 1.2681, + "step": 45504 + }, + { + "epoch": 0.55, + "grad_norm": 5.613886974015399, + "learning_rate": 1.7045756226847578e-05, + "loss": 1.6598, + "step": 45507 + }, + { + "epoch": 0.55, + "grad_norm": 46.66236533120485, + "learning_rate": 1.704534166520021e-05, + "loss": 1.4673, + "step": 45510 + }, + { + "epoch": 0.55, + "grad_norm": 5.83333707118029, + "learning_rate": 1.704492707950963e-05, + "loss": 1.1993, + "step": 45513 + }, + { + "epoch": 0.55, + "grad_norm": 13.590255770867628, + "learning_rate": 1.7044512469777248e-05, + "loss": 1.2541, + "step": 45516 + }, + { + "epoch": 0.55, + "grad_norm": 20.741189181988155, + "learning_rate": 1.7044097836004487e-05, + "loss": 1.2931, + "step": 45519 + }, + { + "epoch": 0.55, + "grad_norm": 13.651070268401531, + "learning_rate": 1.7043683178192755e-05, + "loss": 1.2604, + "step": 45522 + }, + { + "epoch": 0.55, + "grad_norm": 8.850316593746792, + "learning_rate": 1.704326849634347e-05, + "loss": 1.1812, + "step": 45525 + }, + { + "epoch": 0.55, + "grad_norm": 20.289583877827248, + "learning_rate": 1.7042853790458045e-05, + "loss": 1.2178, + "step": 45528 + }, + { + "epoch": 0.55, + "grad_norm": 30.12595125067607, + "learning_rate": 1.70424390605379e-05, + "loss": 1.3389, + "step": 45531 + }, + { + "epoch": 0.55, + "grad_norm": 7.692750295365691, + "learning_rate": 1.7042024306584442e-05, + "loss": 1.7142, + "step": 45534 + }, + { + "epoch": 0.55, + "grad_norm": 6.245176393036063, + "learning_rate": 1.7041609528599094e-05, + "loss": 1.4749, + "step": 45537 + }, + { + "epoch": 0.55, + "grad_norm": 26.001357833358682, + "learning_rate": 1.7041194726583272e-05, + "loss": 1.8028, + "step": 45540 + }, + { + "epoch": 0.55, + "grad_norm": 29.34558409222012, + "learning_rate": 1.7040779900538386e-05, + "loss": 1.5334, + "step": 45543 + }, + { + "epoch": 0.55, + "grad_norm": 10.415393693769571, + "learning_rate": 1.7040365050465855e-05, + "loss": 1.3378, + "step": 45546 + }, + { + "epoch": 0.55, + "grad_norm": 3.5406429439896354, + "learning_rate": 1.7039950176367092e-05, + "loss": 1.5091, + "step": 45549 + }, + { + "epoch": 0.55, + "grad_norm": 15.53806219463914, + "learning_rate": 1.7039535278243517e-05, + "loss": 1.1928, + "step": 45552 + }, + { + "epoch": 0.55, + "grad_norm": 9.265306340297151, + "learning_rate": 1.7039120356096543e-05, + "loss": 1.4522, + "step": 45555 + }, + { + "epoch": 0.55, + "grad_norm": 2.8099180503186463, + "learning_rate": 1.703870540992759e-05, + "loss": 1.5146, + "step": 45558 + }, + { + "epoch": 0.55, + "grad_norm": 10.18458863024488, + "learning_rate": 1.7038290439738064e-05, + "loss": 1.476, + "step": 45561 + }, + { + "epoch": 0.55, + "grad_norm": 146.3026188376123, + "learning_rate": 1.7037875445529393e-05, + "loss": 1.3059, + "step": 45564 + }, + { + "epoch": 0.55, + "grad_norm": 22.96928841828063, + "learning_rate": 1.7037460427302987e-05, + "loss": 1.2648, + "step": 45567 + }, + { + "epoch": 0.55, + "grad_norm": 22.076170054242546, + "learning_rate": 1.7037045385060262e-05, + "loss": 1.5891, + "step": 45570 + }, + { + "epoch": 0.55, + "grad_norm": 81.40512725246319, + "learning_rate": 1.7036630318802636e-05, + "loss": 1.467, + "step": 45573 + }, + { + "epoch": 0.55, + "grad_norm": 4.080710071957849, + "learning_rate": 1.7036215228531528e-05, + "loss": 1.6344, + "step": 45576 + }, + { + "epoch": 0.55, + "grad_norm": 16.559573888847908, + "learning_rate": 1.703580011424835e-05, + "loss": 1.4482, + "step": 45579 + }, + { + "epoch": 0.55, + "grad_norm": 8.431355052768497, + "learning_rate": 1.7035384975954522e-05, + "loss": 1.1955, + "step": 45582 + }, + { + "epoch": 0.55, + "grad_norm": 17.69827042014608, + "learning_rate": 1.7034969813651457e-05, + "loss": 1.4041, + "step": 45585 + }, + { + "epoch": 0.55, + "grad_norm": 9.611428292268231, + "learning_rate": 1.7034554627340575e-05, + "loss": 1.278, + "step": 45588 + }, + { + "epoch": 0.55, + "grad_norm": 9.31245145947024, + "learning_rate": 1.703413941702329e-05, + "loss": 1.4352, + "step": 45591 + }, + { + "epoch": 0.55, + "grad_norm": 19.74458265648964, + "learning_rate": 1.7033724182701024e-05, + "loss": 1.4289, + "step": 45594 + }, + { + "epoch": 0.55, + "grad_norm": 21.545590962430584, + "learning_rate": 1.703330892437519e-05, + "loss": 1.1718, + "step": 45597 + }, + { + "epoch": 0.55, + "grad_norm": 9.71829359140819, + "learning_rate": 1.7032893642047205e-05, + "loss": 1.4532, + "step": 45600 + }, + { + "epoch": 0.55, + "grad_norm": 11.77752492999793, + "learning_rate": 1.7032478335718486e-05, + "loss": 1.3213, + "step": 45603 + }, + { + "epoch": 0.55, + "grad_norm": 8.267891687376473, + "learning_rate": 1.7032063005390455e-05, + "loss": 1.4486, + "step": 45606 + }, + { + "epoch": 0.55, + "grad_norm": 8.845736895137328, + "learning_rate": 1.7031647651064523e-05, + "loss": 1.4851, + "step": 45609 + }, + { + "epoch": 0.55, + "grad_norm": 22.26958296192836, + "learning_rate": 1.7031232272742113e-05, + "loss": 1.295, + "step": 45612 + }, + { + "epoch": 0.55, + "grad_norm": 9.865204582943203, + "learning_rate": 1.7030816870424636e-05, + "loss": 1.352, + "step": 45615 + }, + { + "epoch": 0.55, + "grad_norm": 23.054298365788046, + "learning_rate": 1.7030401444113514e-05, + "loss": 1.1068, + "step": 45618 + }, + { + "epoch": 0.55, + "grad_norm": 24.844117024018466, + "learning_rate": 1.702998599381017e-05, + "loss": 1.3837, + "step": 45621 + }, + { + "epoch": 0.55, + "grad_norm": 39.474493089792006, + "learning_rate": 1.702957051951601e-05, + "loss": 1.6628, + "step": 45624 + }, + { + "epoch": 0.55, + "grad_norm": 42.65151492118702, + "learning_rate": 1.7029155021232463e-05, + "loss": 1.1296, + "step": 45627 + }, + { + "epoch": 0.55, + "grad_norm": 14.438371032534615, + "learning_rate": 1.7028739498960938e-05, + "loss": 1.5056, + "step": 45630 + }, + { + "epoch": 0.55, + "grad_norm": 111.36457577354177, + "learning_rate": 1.702832395270286e-05, + "loss": 1.2192, + "step": 45633 + }, + { + "epoch": 0.55, + "grad_norm": 7.809515671595712, + "learning_rate": 1.702790838245964e-05, + "loss": 1.4378, + "step": 45636 + }, + { + "epoch": 0.55, + "grad_norm": 183.6273435361921, + "learning_rate": 1.70274927882327e-05, + "loss": 1.6118, + "step": 45639 + }, + { + "epoch": 0.55, + "grad_norm": 19.96783659661253, + "learning_rate": 1.7027077170023462e-05, + "loss": 1.2217, + "step": 45642 + }, + { + "epoch": 0.55, + "grad_norm": 60.65865189045898, + "learning_rate": 1.7026661527833335e-05, + "loss": 1.6415, + "step": 45645 + }, + { + "epoch": 0.55, + "grad_norm": 8.916456377257743, + "learning_rate": 1.7026245861663747e-05, + "loss": 1.3913, + "step": 45648 + }, + { + "epoch": 0.55, + "grad_norm": 24.981204769440687, + "learning_rate": 1.7025830171516116e-05, + "loss": 1.4645, + "step": 45651 + }, + { + "epoch": 0.55, + "grad_norm": 6.611587682906516, + "learning_rate": 1.7025414457391852e-05, + "loss": 1.5763, + "step": 45654 + }, + { + "epoch": 0.55, + "grad_norm": 9.241762122828954, + "learning_rate": 1.702499871929238e-05, + "loss": 1.1331, + "step": 45657 + }, + { + "epoch": 0.55, + "grad_norm": 40.835335277820086, + "learning_rate": 1.7024582957219124e-05, + "loss": 1.175, + "step": 45660 + }, + { + "epoch": 0.55, + "grad_norm": 41.541285606830726, + "learning_rate": 1.702416717117349e-05, + "loss": 1.7328, + "step": 45663 + }, + { + "epoch": 0.55, + "grad_norm": 39.43973898598111, + "learning_rate": 1.7023751361156902e-05, + "loss": 1.2068, + "step": 45666 + }, + { + "epoch": 0.55, + "grad_norm": 55.79121259283834, + "learning_rate": 1.7023335527170785e-05, + "loss": 1.34, + "step": 45669 + }, + { + "epoch": 0.55, + "grad_norm": 27.485664206397335, + "learning_rate": 1.7022919669216552e-05, + "loss": 1.3852, + "step": 45672 + }, + { + "epoch": 0.55, + "grad_norm": 11.341433234362437, + "learning_rate": 1.7022503787295627e-05, + "loss": 1.4688, + "step": 45675 + }, + { + "epoch": 0.55, + "grad_norm": 3.60704105193418, + "learning_rate": 1.7022087881409422e-05, + "loss": 1.3752, + "step": 45678 + }, + { + "epoch": 0.55, + "grad_norm": 22.176955642084696, + "learning_rate": 1.702167195155936e-05, + "loss": 1.5258, + "step": 45681 + }, + { + "epoch": 0.55, + "grad_norm": 18.481952060536386, + "learning_rate": 1.702125599774686e-05, + "loss": 1.3548, + "step": 45684 + }, + { + "epoch": 0.55, + "grad_norm": 37.80877761791612, + "learning_rate": 1.702084001997335e-05, + "loss": 1.7889, + "step": 45687 + }, + { + "epoch": 0.55, + "grad_norm": 14.640768150225247, + "learning_rate": 1.7020424018240234e-05, + "loss": 1.6982, + "step": 45690 + }, + { + "epoch": 0.55, + "grad_norm": 31.63580395547275, + "learning_rate": 1.702000799254894e-05, + "loss": 1.5061, + "step": 45693 + }, + { + "epoch": 0.55, + "grad_norm": 6.05260873760031, + "learning_rate": 1.701959194290089e-05, + "loss": 1.2959, + "step": 45696 + }, + { + "epoch": 0.55, + "grad_norm": 10.117409649343964, + "learning_rate": 1.7019175869297503e-05, + "loss": 1.6796, + "step": 45699 + }, + { + "epoch": 0.55, + "grad_norm": 7.257531064543819, + "learning_rate": 1.7018759771740194e-05, + "loss": 1.1981, + "step": 45702 + }, + { + "epoch": 0.55, + "grad_norm": 6.699319207026742, + "learning_rate": 1.7018343650230385e-05, + "loss": 1.2343, + "step": 45705 + }, + { + "epoch": 0.55, + "grad_norm": 9.461763661170902, + "learning_rate": 1.7017927504769504e-05, + "loss": 1.3765, + "step": 45708 + }, + { + "epoch": 0.55, + "grad_norm": 3.9009231644589466, + "learning_rate": 1.7017511335358956e-05, + "loss": 1.3573, + "step": 45711 + }, + { + "epoch": 0.55, + "grad_norm": 11.890196756452625, + "learning_rate": 1.7017095142000174e-05, + "loss": 1.1367, + "step": 45714 + }, + { + "epoch": 0.55, + "grad_norm": 18.16522328463264, + "learning_rate": 1.701667892469457e-05, + "loss": 1.1643, + "step": 45717 + }, + { + "epoch": 0.55, + "grad_norm": 7.40039243098227, + "learning_rate": 1.701626268344357e-05, + "loss": 1.407, + "step": 45720 + }, + { + "epoch": 0.55, + "grad_norm": 11.489048287085172, + "learning_rate": 1.7015846418248596e-05, + "loss": 1.5993, + "step": 45723 + }, + { + "epoch": 0.55, + "grad_norm": 7.656272447000368, + "learning_rate": 1.7015430129111062e-05, + "loss": 1.5334, + "step": 45726 + }, + { + "epoch": 0.55, + "grad_norm": 8.560890154800015, + "learning_rate": 1.7015013816032393e-05, + "loss": 1.2718, + "step": 45729 + }, + { + "epoch": 0.55, + "grad_norm": 9.236675239097114, + "learning_rate": 1.7014597479014008e-05, + "loss": 1.5885, + "step": 45732 + }, + { + "epoch": 0.55, + "grad_norm": 7.345251877272868, + "learning_rate": 1.7014181118057326e-05, + "loss": 1.5499, + "step": 45735 + }, + { + "epoch": 0.55, + "grad_norm": 12.055067052780915, + "learning_rate": 1.7013764733163775e-05, + "loss": 1.3174, + "step": 45738 + }, + { + "epoch": 0.55, + "grad_norm": 30.230414878071112, + "learning_rate": 1.701334832433477e-05, + "loss": 1.6629, + "step": 45741 + }, + { + "epoch": 0.55, + "grad_norm": 18.7844549013863, + "learning_rate": 1.701293189157173e-05, + "loss": 1.6799, + "step": 45744 + }, + { + "epoch": 0.55, + "grad_norm": 12.005578938686883, + "learning_rate": 1.701251543487608e-05, + "loss": 1.564, + "step": 45747 + }, + { + "epoch": 0.55, + "grad_norm": 31.739195599109166, + "learning_rate": 1.7012098954249247e-05, + "loss": 1.278, + "step": 45750 + }, + { + "epoch": 0.55, + "grad_norm": 32.27108734879201, + "learning_rate": 1.701168244969264e-05, + "loss": 1.4241, + "step": 45753 + }, + { + "epoch": 0.55, + "grad_norm": 78.04370426598769, + "learning_rate": 1.701126592120769e-05, + "loss": 1.1257, + "step": 45756 + }, + { + "epoch": 0.55, + "grad_norm": 20.808935913279374, + "learning_rate": 1.701084936879581e-05, + "loss": 1.0831, + "step": 45759 + }, + { + "epoch": 0.55, + "grad_norm": 16.308055295623053, + "learning_rate": 1.7010432792458435e-05, + "loss": 1.38, + "step": 45762 + }, + { + "epoch": 0.55, + "grad_norm": 4.377510841008985, + "learning_rate": 1.7010016192196968e-05, + "loss": 1.355, + "step": 45765 + }, + { + "epoch": 0.55, + "grad_norm": 29.537306427156345, + "learning_rate": 1.7009599568012845e-05, + "loss": 1.506, + "step": 45768 + }, + { + "epoch": 0.55, + "grad_norm": 10.06707005435332, + "learning_rate": 1.7009182919907485e-05, + "loss": 1.3431, + "step": 45771 + }, + { + "epoch": 0.55, + "grad_norm": 23.394314272925826, + "learning_rate": 1.7008766247882307e-05, + "loss": 1.2891, + "step": 45774 + }, + { + "epoch": 0.55, + "grad_norm": 99.32311637020561, + "learning_rate": 1.7008349551938735e-05, + "loss": 1.701, + "step": 45777 + }, + { + "epoch": 0.55, + "grad_norm": 23.226504540703175, + "learning_rate": 1.7007932832078192e-05, + "loss": 1.3969, + "step": 45780 + }, + { + "epoch": 0.55, + "grad_norm": 15.317847882197366, + "learning_rate": 1.7007516088302094e-05, + "loss": 1.5112, + "step": 45783 + }, + { + "epoch": 0.55, + "grad_norm": 16.31344305255366, + "learning_rate": 1.700709932061187e-05, + "loss": 1.5659, + "step": 45786 + }, + { + "epoch": 0.55, + "grad_norm": 10.980484210127477, + "learning_rate": 1.7006682529008944e-05, + "loss": 1.1864, + "step": 45789 + }, + { + "epoch": 0.55, + "grad_norm": 86.96647665271483, + "learning_rate": 1.7006265713494728e-05, + "loss": 1.3086, + "step": 45792 + }, + { + "epoch": 0.55, + "grad_norm": 20.778351120193662, + "learning_rate": 1.7005848874070653e-05, + "loss": 1.816, + "step": 45795 + }, + { + "epoch": 0.55, + "grad_norm": 3.951851778841109, + "learning_rate": 1.7005432010738137e-05, + "loss": 1.4192, + "step": 45798 + }, + { + "epoch": 0.55, + "grad_norm": 43.59392654288863, + "learning_rate": 1.7005015123498608e-05, + "loss": 1.739, + "step": 45801 + }, + { + "epoch": 0.55, + "grad_norm": 29.14021712507828, + "learning_rate": 1.700459821235349e-05, + "loss": 1.7508, + "step": 45804 + }, + { + "epoch": 0.55, + "grad_norm": 19.543313626480966, + "learning_rate": 1.700418127730419e-05, + "loss": 1.6843, + "step": 45807 + }, + { + "epoch": 0.55, + "grad_norm": 12.548515977281648, + "learning_rate": 1.7003764318352152e-05, + "loss": 1.5707, + "step": 45810 + }, + { + "epoch": 0.55, + "grad_norm": 51.081010199398676, + "learning_rate": 1.7003347335498785e-05, + "loss": 1.4991, + "step": 45813 + }, + { + "epoch": 0.55, + "grad_norm": 13.964126848167195, + "learning_rate": 1.7002930328745516e-05, + "loss": 1.4196, + "step": 45816 + }, + { + "epoch": 0.55, + "grad_norm": 4.131658558483847, + "learning_rate": 1.700251329809377e-05, + "loss": 1.4941, + "step": 45819 + }, + { + "epoch": 0.55, + "grad_norm": 5.917092964579949, + "learning_rate": 1.7002096243544968e-05, + "loss": 1.6519, + "step": 45822 + }, + { + "epoch": 0.55, + "grad_norm": 3.776268477852633, + "learning_rate": 1.7001679165100534e-05, + "loss": 1.6243, + "step": 45825 + }, + { + "epoch": 0.55, + "grad_norm": 16.283941494072568, + "learning_rate": 1.7001262062761887e-05, + "loss": 1.2821, + "step": 45828 + }, + { + "epoch": 0.55, + "grad_norm": 6.383166492442899, + "learning_rate": 1.7000844936530457e-05, + "loss": 1.0859, + "step": 45831 + }, + { + "epoch": 0.55, + "grad_norm": 10.865831888151675, + "learning_rate": 1.7000427786407666e-05, + "loss": 1.2587, + "step": 45834 + }, + { + "epoch": 0.55, + "grad_norm": 6.899684555261335, + "learning_rate": 1.7000010612394938e-05, + "loss": 1.3115, + "step": 45837 + }, + { + "epoch": 0.55, + "grad_norm": 3.1309000663430715, + "learning_rate": 1.6999593414493692e-05, + "loss": 1.4202, + "step": 45840 + }, + { + "epoch": 0.55, + "grad_norm": 11.679302284068294, + "learning_rate": 1.6999176192705355e-05, + "loss": 1.6073, + "step": 45843 + }, + { + "epoch": 0.55, + "grad_norm": 45.41020372765086, + "learning_rate": 1.6998758947031352e-05, + "loss": 1.6102, + "step": 45846 + }, + { + "epoch": 0.55, + "grad_norm": 7.928452720791574, + "learning_rate": 1.6998341677473104e-05, + "loss": 1.5024, + "step": 45849 + }, + { + "epoch": 0.55, + "grad_norm": 22.02279122175592, + "learning_rate": 1.699792438403204e-05, + "loss": 1.514, + "step": 45852 + }, + { + "epoch": 0.55, + "grad_norm": 13.832804033599523, + "learning_rate": 1.6997507066709578e-05, + "loss": 1.7738, + "step": 45855 + }, + { + "epoch": 0.55, + "grad_norm": 19.284153399466064, + "learning_rate": 1.6997089725507145e-05, + "loss": 1.5279, + "step": 45858 + }, + { + "epoch": 0.55, + "grad_norm": 37.949980307262216, + "learning_rate": 1.6996672360426166e-05, + "loss": 1.5581, + "step": 45861 + }, + { + "epoch": 0.55, + "grad_norm": 22.377183724879348, + "learning_rate": 1.6996254971468063e-05, + "loss": 1.5147, + "step": 45864 + }, + { + "epoch": 0.55, + "grad_norm": 41.08390893942516, + "learning_rate": 1.6995837558634263e-05, + "loss": 1.4883, + "step": 45867 + }, + { + "epoch": 0.55, + "grad_norm": 37.34890966099715, + "learning_rate": 1.6995420121926193e-05, + "loss": 1.498, + "step": 45870 + }, + { + "epoch": 0.55, + "grad_norm": 9.330062744773072, + "learning_rate": 1.6995002661345268e-05, + "loss": 1.3224, + "step": 45873 + }, + { + "epoch": 0.55, + "grad_norm": 9.36982660931897, + "learning_rate": 1.699458517689292e-05, + "loss": 1.4554, + "step": 45876 + }, + { + "epoch": 0.55, + "grad_norm": 9.951570461335443, + "learning_rate": 1.6994167668570573e-05, + "loss": 1.3209, + "step": 45879 + }, + { + "epoch": 0.55, + "grad_norm": 4.788976776976505, + "learning_rate": 1.699375013637965e-05, + "loss": 1.3445, + "step": 45882 + }, + { + "epoch": 0.55, + "grad_norm": 3.9232042989508797, + "learning_rate": 1.6993332580321577e-05, + "loss": 1.2969, + "step": 45885 + }, + { + "epoch": 0.55, + "grad_norm": 21.575182245957585, + "learning_rate": 1.6992915000397783e-05, + "loss": 1.5037, + "step": 45888 + }, + { + "epoch": 0.55, + "grad_norm": 10.597271767216071, + "learning_rate": 1.6992497396609685e-05, + "loss": 1.289, + "step": 45891 + }, + { + "epoch": 0.55, + "grad_norm": 24.966104885997307, + "learning_rate": 1.6992079768958714e-05, + "loss": 1.4724, + "step": 45894 + }, + { + "epoch": 0.55, + "grad_norm": 16.89125889785043, + "learning_rate": 1.699166211744629e-05, + "loss": 1.297, + "step": 45897 + }, + { + "epoch": 0.55, + "grad_norm": 21.909039412811353, + "learning_rate": 1.6991244442073842e-05, + "loss": 1.3875, + "step": 45900 + }, + { + "epoch": 0.55, + "grad_norm": 14.32533572789302, + "learning_rate": 1.69908267428428e-05, + "loss": 1.7284, + "step": 45903 + }, + { + "epoch": 0.55, + "grad_norm": 17.653307768558392, + "learning_rate": 1.6990409019754577e-05, + "loss": 1.3504, + "step": 45906 + }, + { + "epoch": 0.55, + "grad_norm": 15.109302273144882, + "learning_rate": 1.698999127281061e-05, + "loss": 1.3672, + "step": 45909 + }, + { + "epoch": 0.55, + "grad_norm": 8.159132580793308, + "learning_rate": 1.698957350201232e-05, + "loss": 1.6537, + "step": 45912 + }, + { + "epoch": 0.55, + "grad_norm": 37.46695128813536, + "learning_rate": 1.6989155707361134e-05, + "loss": 1.3122, + "step": 45915 + }, + { + "epoch": 0.55, + "grad_norm": 9.485101987689243, + "learning_rate": 1.698873788885848e-05, + "loss": 1.942, + "step": 45918 + }, + { + "epoch": 0.55, + "grad_norm": 40.036341269809284, + "learning_rate": 1.6988320046505777e-05, + "loss": 1.5278, + "step": 45921 + }, + { + "epoch": 0.55, + "grad_norm": 5.805604787461952, + "learning_rate": 1.6987902180304457e-05, + "loss": 1.6775, + "step": 45924 + }, + { + "epoch": 0.55, + "grad_norm": 6.830121421814571, + "learning_rate": 1.698748429025594e-05, + "loss": 1.6156, + "step": 45927 + }, + { + "epoch": 0.55, + "grad_norm": 11.119128314606787, + "learning_rate": 1.6987066376361657e-05, + "loss": 1.6926, + "step": 45930 + }, + { + "epoch": 0.55, + "grad_norm": 50.97822944939485, + "learning_rate": 1.6986648438623037e-05, + "loss": 1.3404, + "step": 45933 + }, + { + "epoch": 0.55, + "grad_norm": 23.53059766146133, + "learning_rate": 1.69862304770415e-05, + "loss": 1.494, + "step": 45936 + }, + { + "epoch": 0.55, + "grad_norm": 9.607728443928469, + "learning_rate": 1.6985812491618477e-05, + "loss": 1.4377, + "step": 45939 + }, + { + "epoch": 0.55, + "grad_norm": 16.866691292556563, + "learning_rate": 1.698539448235539e-05, + "loss": 1.1118, + "step": 45942 + }, + { + "epoch": 0.55, + "grad_norm": 2.915270150486302, + "learning_rate": 1.6984976449253666e-05, + "loss": 1.17, + "step": 45945 + }, + { + "epoch": 0.55, + "grad_norm": 28.721582204424685, + "learning_rate": 1.6984558392314733e-05, + "loss": 1.4226, + "step": 45948 + }, + { + "epoch": 0.55, + "grad_norm": 6.2520936322421345, + "learning_rate": 1.6984140311540024e-05, + "loss": 1.6251, + "step": 45951 + }, + { + "epoch": 0.55, + "grad_norm": 42.94743021872427, + "learning_rate": 1.6983722206930954e-05, + "loss": 1.1855, + "step": 45954 + }, + { + "epoch": 0.55, + "grad_norm": 16.46642977902024, + "learning_rate": 1.6983304078488958e-05, + "loss": 1.2457, + "step": 45957 + }, + { + "epoch": 0.55, + "grad_norm": 3.405730149366913, + "learning_rate": 1.698288592621546e-05, + "loss": 1.2557, + "step": 45960 + }, + { + "epoch": 0.55, + "grad_norm": 4.827070795843731, + "learning_rate": 1.698246775011189e-05, + "loss": 1.5271, + "step": 45963 + }, + { + "epoch": 0.55, + "grad_norm": 13.798240497157952, + "learning_rate": 1.6982049550179668e-05, + "loss": 1.3223, + "step": 45966 + }, + { + "epoch": 0.55, + "grad_norm": 30.208007314344343, + "learning_rate": 1.6981631326420228e-05, + "loss": 1.2022, + "step": 45969 + }, + { + "epoch": 0.55, + "grad_norm": 7.722093611173094, + "learning_rate": 1.6981213078834998e-05, + "loss": 1.6837, + "step": 45972 + }, + { + "epoch": 0.55, + "grad_norm": 8.64295479921005, + "learning_rate": 1.6980794807425397e-05, + "loss": 1.4552, + "step": 45975 + }, + { + "epoch": 0.55, + "grad_norm": 28.857635333373732, + "learning_rate": 1.6980376512192863e-05, + "loss": 1.6938, + "step": 45978 + }, + { + "epoch": 0.55, + "grad_norm": 12.275049564180302, + "learning_rate": 1.6979958193138815e-05, + "loss": 1.4697, + "step": 45981 + }, + { + "epoch": 0.55, + "grad_norm": 11.111401713291201, + "learning_rate": 1.6979539850264685e-05, + "loss": 1.45, + "step": 45984 + }, + { + "epoch": 0.55, + "grad_norm": 17.312278701694428, + "learning_rate": 1.6979121483571898e-05, + "loss": 1.199, + "step": 45987 + }, + { + "epoch": 0.55, + "grad_norm": 4.122123973579987, + "learning_rate": 1.6978703093061882e-05, + "loss": 1.1254, + "step": 45990 + }, + { + "epoch": 0.55, + "grad_norm": 9.841778258309844, + "learning_rate": 1.697828467873607e-05, + "loss": 1.4198, + "step": 45993 + }, + { + "epoch": 0.55, + "grad_norm": 15.666258913085551, + "learning_rate": 1.6977866240595883e-05, + "loss": 1.8593, + "step": 45996 + }, + { + "epoch": 0.55, + "grad_norm": 3.8206945803208665, + "learning_rate": 1.6977447778642755e-05, + "loss": 1.4638, + "step": 45999 + }, + { + "epoch": 0.55, + "grad_norm": 7.966629769690364, + "learning_rate": 1.6977029292878107e-05, + "loss": 1.0768, + "step": 46002 + }, + { + "epoch": 0.55, + "grad_norm": 19.959107602479666, + "learning_rate": 1.6976610783303374e-05, + "loss": 1.7157, + "step": 46005 + }, + { + "epoch": 0.55, + "grad_norm": 9.094074907924007, + "learning_rate": 1.697619224991998e-05, + "loss": 1.4702, + "step": 46008 + }, + { + "epoch": 0.55, + "grad_norm": 14.47344091037252, + "learning_rate": 1.697577369272936e-05, + "loss": 1.6566, + "step": 46011 + }, + { + "epoch": 0.55, + "grad_norm": 12.016241241646451, + "learning_rate": 1.6975355111732933e-05, + "loss": 1.4229, + "step": 46014 + }, + { + "epoch": 0.55, + "grad_norm": 11.295542853245848, + "learning_rate": 1.697493650693213e-05, + "loss": 1.3375, + "step": 46017 + }, + { + "epoch": 0.55, + "grad_norm": 12.196571656636149, + "learning_rate": 1.6974517878328383e-05, + "loss": 1.416, + "step": 46020 + }, + { + "epoch": 0.55, + "grad_norm": 11.811984364228742, + "learning_rate": 1.6974099225923116e-05, + "loss": 1.1296, + "step": 46023 + }, + { + "epoch": 0.55, + "grad_norm": 10.09033800934227, + "learning_rate": 1.697368054971776e-05, + "loss": 1.4129, + "step": 46026 + }, + { + "epoch": 0.55, + "grad_norm": 178.6119739672121, + "learning_rate": 1.6973261849713748e-05, + "loss": 1.25, + "step": 46029 + }, + { + "epoch": 0.55, + "grad_norm": 3.187695859087827, + "learning_rate": 1.6972843125912507e-05, + "loss": 1.3731, + "step": 46032 + }, + { + "epoch": 0.55, + "grad_norm": 14.526066333056308, + "learning_rate": 1.697242437831546e-05, + "loss": 1.2659, + "step": 46035 + }, + { + "epoch": 0.55, + "grad_norm": 6.442484511625568, + "learning_rate": 1.6972005606924036e-05, + "loss": 1.1021, + "step": 46038 + }, + { + "epoch": 0.55, + "grad_norm": 15.322752531953789, + "learning_rate": 1.6971586811739677e-05, + "loss": 1.7144, + "step": 46041 + }, + { + "epoch": 0.55, + "grad_norm": 5.291385437788449, + "learning_rate": 1.6971167992763798e-05, + "loss": 1.2164, + "step": 46044 + }, + { + "epoch": 0.55, + "grad_norm": 8.569114112623685, + "learning_rate": 1.6970749149997833e-05, + "loss": 1.5211, + "step": 46047 + }, + { + "epoch": 0.55, + "grad_norm": 28.80070267018038, + "learning_rate": 1.6970330283443216e-05, + "loss": 1.2592, + "step": 46050 + }, + { + "epoch": 0.55, + "grad_norm": 70.53084696282546, + "learning_rate": 1.696991139310137e-05, + "loss": 1.7461, + "step": 46053 + }, + { + "epoch": 0.55, + "grad_norm": 4.448086675727181, + "learning_rate": 1.6969492478973725e-05, + "loss": 1.2384, + "step": 46056 + }, + { + "epoch": 0.55, + "grad_norm": 9.503152170229464, + "learning_rate": 1.6969073541061715e-05, + "loss": 1.5804, + "step": 46059 + }, + { + "epoch": 0.55, + "grad_norm": 83.7507179746636, + "learning_rate": 1.6968654579366768e-05, + "loss": 1.611, + "step": 46062 + }, + { + "epoch": 0.55, + "grad_norm": 43.585261570795524, + "learning_rate": 1.696823559389031e-05, + "loss": 1.8736, + "step": 46065 + }, + { + "epoch": 0.55, + "grad_norm": 4.594412812738114, + "learning_rate": 1.696781658463378e-05, + "loss": 1.4988, + "step": 46068 + }, + { + "epoch": 0.55, + "grad_norm": 2.753857575908417, + "learning_rate": 1.6967397551598597e-05, + "loss": 1.4131, + "step": 46071 + }, + { + "epoch": 0.55, + "grad_norm": 8.743358022302775, + "learning_rate": 1.6966978494786194e-05, + "loss": 1.5188, + "step": 46074 + }, + { + "epoch": 0.55, + "grad_norm": 27.444338016596667, + "learning_rate": 1.6966559414198006e-05, + "loss": 1.3974, + "step": 46077 + }, + { + "epoch": 0.55, + "grad_norm": 30.946077920395926, + "learning_rate": 1.696614030983546e-05, + "loss": 1.3877, + "step": 46080 + }, + { + "epoch": 0.55, + "grad_norm": 23.282142342376822, + "learning_rate": 1.6965721181699985e-05, + "loss": 1.7414, + "step": 46083 + }, + { + "epoch": 0.55, + "grad_norm": 4.129926561983557, + "learning_rate": 1.6965302029793012e-05, + "loss": 1.3643, + "step": 46086 + }, + { + "epoch": 0.55, + "grad_norm": 18.819203376088026, + "learning_rate": 1.6964882854115977e-05, + "loss": 1.4139, + "step": 46089 + }, + { + "epoch": 0.55, + "grad_norm": 30.95249331882706, + "learning_rate": 1.69644636546703e-05, + "loss": 1.332, + "step": 46092 + }, + { + "epoch": 0.55, + "grad_norm": 7.9010635186742135, + "learning_rate": 1.6964044431457417e-05, + "loss": 1.3354, + "step": 46095 + }, + { + "epoch": 0.55, + "grad_norm": 30.294925864873868, + "learning_rate": 1.696362518447876e-05, + "loss": 1.8135, + "step": 46098 + }, + { + "epoch": 0.55, + "grad_norm": 7.4598419555187725, + "learning_rate": 1.6963205913735763e-05, + "loss": 1.5975, + "step": 46101 + }, + { + "epoch": 0.55, + "grad_norm": 15.09018683634316, + "learning_rate": 1.696278661922985e-05, + "loss": 1.534, + "step": 46104 + }, + { + "epoch": 0.55, + "grad_norm": 4.395223779233005, + "learning_rate": 1.6962367300962453e-05, + "loss": 1.3829, + "step": 46107 + }, + { + "epoch": 0.55, + "grad_norm": 17.123259498842256, + "learning_rate": 1.6961947958935005e-05, + "loss": 1.3785, + "step": 46110 + }, + { + "epoch": 0.55, + "grad_norm": 15.724136626134735, + "learning_rate": 1.6961528593148933e-05, + "loss": 1.5311, + "step": 46113 + }, + { + "epoch": 0.55, + "grad_norm": 12.525340614341884, + "learning_rate": 1.6961109203605674e-05, + "loss": 1.7585, + "step": 46116 + }, + { + "epoch": 0.55, + "grad_norm": 18.6318505268588, + "learning_rate": 1.6960689790306658e-05, + "loss": 1.6803, + "step": 46119 + }, + { + "epoch": 0.55, + "grad_norm": 18.701580782035716, + "learning_rate": 1.696027035325331e-05, + "loss": 1.6377, + "step": 46122 + }, + { + "epoch": 0.55, + "grad_norm": 26.038398097834083, + "learning_rate": 1.6959850892447073e-05, + "loss": 1.8198, + "step": 46125 + }, + { + "epoch": 0.55, + "grad_norm": 27.37186772008942, + "learning_rate": 1.695943140788937e-05, + "loss": 1.5522, + "step": 46128 + }, + { + "epoch": 0.55, + "grad_norm": 16.327941495413178, + "learning_rate": 1.6959011899581633e-05, + "loss": 1.7725, + "step": 46131 + }, + { + "epoch": 0.55, + "grad_norm": 40.40012684556644, + "learning_rate": 1.6958592367525294e-05, + "loss": 1.6359, + "step": 46134 + }, + { + "epoch": 0.55, + "grad_norm": 5.969727656309851, + "learning_rate": 1.6958172811721787e-05, + "loss": 1.7243, + "step": 46137 + }, + { + "epoch": 0.55, + "grad_norm": 3.3111810931368066, + "learning_rate": 1.695775323217254e-05, + "loss": 1.5415, + "step": 46140 + }, + { + "epoch": 0.55, + "grad_norm": 10.045980497093613, + "learning_rate": 1.6957333628878993e-05, + "loss": 1.3049, + "step": 46143 + }, + { + "epoch": 0.55, + "grad_norm": 7.565176651182903, + "learning_rate": 1.695691400184257e-05, + "loss": 1.3908, + "step": 46146 + }, + { + "epoch": 0.55, + "grad_norm": 16.654218186211253, + "learning_rate": 1.6956494351064704e-05, + "loss": 1.3575, + "step": 46149 + }, + { + "epoch": 0.55, + "grad_norm": 39.134999627221205, + "learning_rate": 1.695607467654683e-05, + "loss": 1.3083, + "step": 46152 + }, + { + "epoch": 0.55, + "grad_norm": 6.2610726640136365, + "learning_rate": 1.6955654978290377e-05, + "loss": 1.3449, + "step": 46155 + }, + { + "epoch": 0.56, + "grad_norm": 11.719183444175197, + "learning_rate": 1.6955235256296777e-05, + "loss": 1.4694, + "step": 46158 + }, + { + "epoch": 0.56, + "grad_norm": 27.6359152630768, + "learning_rate": 1.695481551056747e-05, + "loss": 1.3851, + "step": 46161 + }, + { + "epoch": 0.56, + "grad_norm": 3.301101729354593, + "learning_rate": 1.695439574110388e-05, + "loss": 1.4898, + "step": 46164 + }, + { + "epoch": 0.56, + "grad_norm": 28.18662920660379, + "learning_rate": 1.695397594790744e-05, + "loss": 1.3417, + "step": 46167 + }, + { + "epoch": 0.56, + "grad_norm": 27.69322710310544, + "learning_rate": 1.695355613097959e-05, + "loss": 1.2226, + "step": 46170 + }, + { + "epoch": 0.56, + "grad_norm": 41.42853213484854, + "learning_rate": 1.6953136290321756e-05, + "loss": 1.6664, + "step": 46173 + }, + { + "epoch": 0.56, + "grad_norm": 15.175429392661894, + "learning_rate": 1.6952716425935368e-05, + "loss": 1.5005, + "step": 46176 + }, + { + "epoch": 0.56, + "grad_norm": 24.339007617937895, + "learning_rate": 1.6952296537821865e-05, + "loss": 1.3653, + "step": 46179 + }, + { + "epoch": 0.56, + "grad_norm": 16.68178436329496, + "learning_rate": 1.695187662598268e-05, + "loss": 1.4675, + "step": 46182 + }, + { + "epoch": 0.56, + "grad_norm": 21.362455976570143, + "learning_rate": 1.6951456690419244e-05, + "loss": 1.6806, + "step": 46185 + }, + { + "epoch": 0.56, + "grad_norm": 12.81963829094722, + "learning_rate": 1.695103673113299e-05, + "loss": 1.6319, + "step": 46188 + }, + { + "epoch": 0.56, + "grad_norm": 12.565445788890994, + "learning_rate": 1.695061674812535e-05, + "loss": 1.4157, + "step": 46191 + }, + { + "epoch": 0.56, + "grad_norm": 23.05403159283653, + "learning_rate": 1.6950196741397762e-05, + "loss": 1.3223, + "step": 46194 + }, + { + "epoch": 0.56, + "grad_norm": 18.855327084877842, + "learning_rate": 1.6949776710951652e-05, + "loss": 1.806, + "step": 46197 + }, + { + "epoch": 0.56, + "grad_norm": 6.229181623150513, + "learning_rate": 1.694935665678846e-05, + "loss": 1.3888, + "step": 46200 + }, + { + "epoch": 0.56, + "grad_norm": 8.148415427174763, + "learning_rate": 1.6948936578909613e-05, + "loss": 1.2827, + "step": 46203 + }, + { + "epoch": 0.56, + "grad_norm": 14.12083691166871, + "learning_rate": 1.6948516477316553e-05, + "loss": 1.5153, + "step": 46206 + }, + { + "epoch": 0.56, + "grad_norm": 18.024622014354296, + "learning_rate": 1.6948096352010708e-05, + "loss": 1.4436, + "step": 46209 + }, + { + "epoch": 0.56, + "grad_norm": 10.777742141146138, + "learning_rate": 1.6947676202993514e-05, + "loss": 1.8943, + "step": 46212 + }, + { + "epoch": 0.56, + "grad_norm": 19.21168700418001, + "learning_rate": 1.69472560302664e-05, + "loss": 1.225, + "step": 46215 + }, + { + "epoch": 0.56, + "grad_norm": 13.472979179511842, + "learning_rate": 1.6946835833830805e-05, + "loss": 1.2438, + "step": 46218 + }, + { + "epoch": 0.56, + "grad_norm": 16.38123097113486, + "learning_rate": 1.6946415613688162e-05, + "loss": 1.3201, + "step": 46221 + }, + { + "epoch": 0.56, + "grad_norm": 11.33030232695725, + "learning_rate": 1.6945995369839904e-05, + "loss": 1.3123, + "step": 46224 + }, + { + "epoch": 0.56, + "grad_norm": 14.79268969955665, + "learning_rate": 1.694557510228747e-05, + "loss": 1.2692, + "step": 46227 + }, + { + "epoch": 0.56, + "grad_norm": 11.5076595522756, + "learning_rate": 1.6945154811032284e-05, + "loss": 1.538, + "step": 46230 + }, + { + "epoch": 0.56, + "grad_norm": 19.307216073804288, + "learning_rate": 1.6944734496075786e-05, + "loss": 1.5497, + "step": 46233 + }, + { + "epoch": 0.56, + "grad_norm": 7.570148277352006, + "learning_rate": 1.6944314157419418e-05, + "loss": 1.3916, + "step": 46236 + }, + { + "epoch": 0.56, + "grad_norm": 12.311103844402346, + "learning_rate": 1.6943893795064602e-05, + "loss": 1.6015, + "step": 46239 + }, + { + "epoch": 0.56, + "grad_norm": 16.005653140414456, + "learning_rate": 1.6943473409012777e-05, + "loss": 1.2526, + "step": 46242 + }, + { + "epoch": 0.56, + "grad_norm": 8.232137894881339, + "learning_rate": 1.694305299926538e-05, + "loss": 1.2977, + "step": 46245 + }, + { + "epoch": 0.56, + "grad_norm": 12.747002323877524, + "learning_rate": 1.6942632565823843e-05, + "loss": 1.2467, + "step": 46248 + }, + { + "epoch": 0.56, + "grad_norm": 17.339745932807972, + "learning_rate": 1.6942212108689602e-05, + "loss": 1.9012, + "step": 46251 + }, + { + "epoch": 0.56, + "grad_norm": 7.828794853434878, + "learning_rate": 1.6941791627864093e-05, + "loss": 1.355, + "step": 46254 + }, + { + "epoch": 0.56, + "grad_norm": 13.851919824343748, + "learning_rate": 1.6941371123348747e-05, + "loss": 1.2953, + "step": 46257 + }, + { + "epoch": 0.56, + "grad_norm": 4.762611022993234, + "learning_rate": 1.6940950595145002e-05, + "loss": 1.4512, + "step": 46260 + }, + { + "epoch": 0.56, + "grad_norm": 20.315968050228868, + "learning_rate": 1.6940530043254297e-05, + "loss": 1.0568, + "step": 46263 + }, + { + "epoch": 0.56, + "grad_norm": 22.455611860406787, + "learning_rate": 1.6940109467678056e-05, + "loss": 1.8686, + "step": 46266 + }, + { + "epoch": 0.56, + "grad_norm": 14.89013123691748, + "learning_rate": 1.6939688868417727e-05, + "loss": 1.1326, + "step": 46269 + }, + { + "epoch": 0.56, + "grad_norm": 14.360174025542882, + "learning_rate": 1.6939268245474737e-05, + "loss": 1.4019, + "step": 46272 + }, + { + "epoch": 0.56, + "grad_norm": 16.05019375876188, + "learning_rate": 1.6938847598850525e-05, + "loss": 1.5741, + "step": 46275 + }, + { + "epoch": 0.56, + "grad_norm": 21.016284317805425, + "learning_rate": 1.6938426928546524e-05, + "loss": 1.5256, + "step": 46278 + }, + { + "epoch": 0.56, + "grad_norm": 9.3837447789013, + "learning_rate": 1.693800623456417e-05, + "loss": 1.4151, + "step": 46281 + }, + { + "epoch": 0.56, + "grad_norm": 13.729346133710312, + "learning_rate": 1.6937585516904903e-05, + "loss": 1.9238, + "step": 46284 + }, + { + "epoch": 0.56, + "grad_norm": 12.814552766373883, + "learning_rate": 1.6937164775570156e-05, + "loss": 1.6673, + "step": 46287 + }, + { + "epoch": 0.56, + "grad_norm": 21.28670705602147, + "learning_rate": 1.6936744010561363e-05, + "loss": 1.5362, + "step": 46290 + }, + { + "epoch": 0.56, + "grad_norm": 12.63892751824195, + "learning_rate": 1.693632322187996e-05, + "loss": 1.5452, + "step": 46293 + }, + { + "epoch": 0.56, + "grad_norm": 15.184718685257426, + "learning_rate": 1.6935902409527386e-05, + "loss": 1.1601, + "step": 46296 + }, + { + "epoch": 0.56, + "grad_norm": 13.889815752293602, + "learning_rate": 1.6935481573505072e-05, + "loss": 1.3275, + "step": 46299 + }, + { + "epoch": 0.56, + "grad_norm": 14.784677058550974, + "learning_rate": 1.6935060713814465e-05, + "loss": 1.4718, + "step": 46302 + }, + { + "epoch": 0.56, + "grad_norm": 5.983885959568979, + "learning_rate": 1.6934639830456986e-05, + "loss": 1.3446, + "step": 46305 + }, + { + "epoch": 0.56, + "grad_norm": 13.333444868690528, + "learning_rate": 1.6934218923434086e-05, + "loss": 1.8435, + "step": 46308 + }, + { + "epoch": 0.56, + "grad_norm": 12.134712649659564, + "learning_rate": 1.6933797992747187e-05, + "loss": 1.3159, + "step": 46311 + }, + { + "epoch": 0.56, + "grad_norm": 10.482606477496521, + "learning_rate": 1.6933377038397735e-05, + "loss": 1.4924, + "step": 46314 + }, + { + "epoch": 0.56, + "grad_norm": 11.551483126972036, + "learning_rate": 1.693295606038717e-05, + "loss": 1.3697, + "step": 46317 + }, + { + "epoch": 0.56, + "grad_norm": 18.409786764135973, + "learning_rate": 1.6932535058716917e-05, + "loss": 1.5232, + "step": 46320 + }, + { + "epoch": 0.56, + "grad_norm": 9.069400311169105, + "learning_rate": 1.693211403338842e-05, + "loss": 1.3941, + "step": 46323 + }, + { + "epoch": 0.56, + "grad_norm": 13.974443970785861, + "learning_rate": 1.6931692984403118e-05, + "loss": 1.5936, + "step": 46326 + }, + { + "epoch": 0.56, + "grad_norm": 21.042774110250566, + "learning_rate": 1.6931271911762445e-05, + "loss": 1.0764, + "step": 46329 + }, + { + "epoch": 0.56, + "grad_norm": 8.312237339534153, + "learning_rate": 1.693085081546783e-05, + "loss": 1.3734, + "step": 46332 + }, + { + "epoch": 0.56, + "grad_norm": 13.968077563834713, + "learning_rate": 1.6930429695520727e-05, + "loss": 1.2925, + "step": 46335 + }, + { + "epoch": 0.56, + "grad_norm": 24.189080501956187, + "learning_rate": 1.693000855192256e-05, + "loss": 1.4366, + "step": 46338 + }, + { + "epoch": 0.56, + "grad_norm": 22.752142959439173, + "learning_rate": 1.6929587384674767e-05, + "loss": 1.1618, + "step": 46341 + }, + { + "epoch": 0.56, + "grad_norm": 22.14930522034627, + "learning_rate": 1.692916619377879e-05, + "loss": 1.1888, + "step": 46344 + }, + { + "epoch": 0.56, + "grad_norm": 13.922201446826813, + "learning_rate": 1.6928744979236067e-05, + "loss": 1.6378, + "step": 46347 + }, + { + "epoch": 0.56, + "grad_norm": 16.895339069979844, + "learning_rate": 1.6928323741048033e-05, + "loss": 1.5699, + "step": 46350 + }, + { + "epoch": 0.56, + "grad_norm": 32.22913244512119, + "learning_rate": 1.6927902479216123e-05, + "loss": 1.5521, + "step": 46353 + }, + { + "epoch": 0.56, + "grad_norm": 10.582030912439269, + "learning_rate": 1.6927481193741783e-05, + "loss": 1.3978, + "step": 46356 + }, + { + "epoch": 0.56, + "grad_norm": 39.43544452993, + "learning_rate": 1.692705988462644e-05, + "loss": 1.2337, + "step": 46359 + }, + { + "epoch": 0.56, + "grad_norm": 8.352924506748453, + "learning_rate": 1.6926638551871536e-05, + "loss": 1.7603, + "step": 46362 + }, + { + "epoch": 0.56, + "grad_norm": 8.436953086657095, + "learning_rate": 1.6926217195478508e-05, + "loss": 1.1591, + "step": 46365 + }, + { + "epoch": 0.56, + "grad_norm": 6.532309335218193, + "learning_rate": 1.69257958154488e-05, + "loss": 1.6073, + "step": 46368 + }, + { + "epoch": 0.56, + "grad_norm": 24.53481754486587, + "learning_rate": 1.6925374411783842e-05, + "loss": 1.2468, + "step": 46371 + }, + { + "epoch": 0.56, + "grad_norm": 12.57379264924357, + "learning_rate": 1.692495298448508e-05, + "loss": 1.7411, + "step": 46374 + }, + { + "epoch": 0.56, + "grad_norm": 10.435456222354667, + "learning_rate": 1.6924531533553946e-05, + "loss": 1.6514, + "step": 46377 + }, + { + "epoch": 0.56, + "grad_norm": 13.260703708105273, + "learning_rate": 1.6924110058991877e-05, + "loss": 1.1871, + "step": 46380 + }, + { + "epoch": 0.56, + "grad_norm": 9.178310137092897, + "learning_rate": 1.692368856080032e-05, + "loss": 1.6714, + "step": 46383 + }, + { + "epoch": 0.56, + "grad_norm": 25.084718703193705, + "learning_rate": 1.69232670389807e-05, + "loss": 1.4548, + "step": 46386 + }, + { + "epoch": 0.56, + "grad_norm": 14.699714734868065, + "learning_rate": 1.6922845493534473e-05, + "loss": 1.6022, + "step": 46389 + }, + { + "epoch": 0.56, + "grad_norm": 10.045958599363797, + "learning_rate": 1.692242392446306e-05, + "loss": 1.4857, + "step": 46392 + }, + { + "epoch": 0.56, + "grad_norm": 11.303592967025594, + "learning_rate": 1.692200233176791e-05, + "loss": 1.5415, + "step": 46395 + }, + { + "epoch": 0.56, + "grad_norm": 29.666834346405636, + "learning_rate": 1.692158071545046e-05, + "loss": 1.3816, + "step": 46398 + }, + { + "epoch": 0.56, + "grad_norm": 10.784012898062956, + "learning_rate": 1.692115907551215e-05, + "loss": 1.5765, + "step": 46401 + }, + { + "epoch": 0.56, + "grad_norm": 19.53570610147495, + "learning_rate": 1.6920737411954416e-05, + "loss": 1.5013, + "step": 46404 + }, + { + "epoch": 0.56, + "grad_norm": 37.263293086555485, + "learning_rate": 1.69203157247787e-05, + "loss": 1.3147, + "step": 46407 + }, + { + "epoch": 0.56, + "grad_norm": 16.375768062101454, + "learning_rate": 1.6919894013986432e-05, + "loss": 1.2416, + "step": 46410 + }, + { + "epoch": 0.56, + "grad_norm": 12.784393224811584, + "learning_rate": 1.6919472279579063e-05, + "loss": 1.1814, + "step": 46413 + }, + { + "epoch": 0.56, + "grad_norm": 6.847632823576092, + "learning_rate": 1.691905052155803e-05, + "loss": 1.3037, + "step": 46416 + }, + { + "epoch": 0.56, + "grad_norm": 14.458410517543443, + "learning_rate": 1.6918628739924765e-05, + "loss": 1.143, + "step": 46419 + }, + { + "epoch": 0.56, + "grad_norm": 14.573116486816835, + "learning_rate": 1.6918206934680715e-05, + "loss": 1.5546, + "step": 46422 + }, + { + "epoch": 0.56, + "grad_norm": 20.189216286991293, + "learning_rate": 1.6917785105827315e-05, + "loss": 1.3152, + "step": 46425 + }, + { + "epoch": 0.56, + "grad_norm": 24.531906337062033, + "learning_rate": 1.6917363253366007e-05, + "loss": 1.587, + "step": 46428 + }, + { + "epoch": 0.56, + "grad_norm": 110.41190480905523, + "learning_rate": 1.691694137729823e-05, + "loss": 1.2477, + "step": 46431 + }, + { + "epoch": 0.56, + "grad_norm": 53.751131613099915, + "learning_rate": 1.691651947762542e-05, + "loss": 1.4954, + "step": 46434 + }, + { + "epoch": 0.56, + "grad_norm": 7.034438545132963, + "learning_rate": 1.6916097554349027e-05, + "loss": 1.5518, + "step": 46437 + }, + { + "epoch": 0.56, + "grad_norm": 14.39052455761739, + "learning_rate": 1.691567560747048e-05, + "loss": 1.4355, + "step": 46440 + }, + { + "epoch": 0.56, + "grad_norm": 16.038269150541907, + "learning_rate": 1.6915253636991225e-05, + "loss": 1.5769, + "step": 46443 + }, + { + "epoch": 0.56, + "grad_norm": 22.996331208934773, + "learning_rate": 1.69148316429127e-05, + "loss": 1.1998, + "step": 46446 + }, + { + "epoch": 0.56, + "grad_norm": 7.63649238659278, + "learning_rate": 1.6914409625236343e-05, + "loss": 1.4503, + "step": 46449 + }, + { + "epoch": 0.56, + "grad_norm": 22.792860835257418, + "learning_rate": 1.6913987583963598e-05, + "loss": 1.3156, + "step": 46452 + }, + { + "epoch": 0.56, + "grad_norm": 7.183109277481204, + "learning_rate": 1.6913565519095906e-05, + "loss": 1.51, + "step": 46455 + }, + { + "epoch": 0.56, + "grad_norm": 13.360668449856707, + "learning_rate": 1.69131434306347e-05, + "loss": 1.3617, + "step": 46458 + }, + { + "epoch": 0.56, + "grad_norm": 30.174060887721918, + "learning_rate": 1.691272131858143e-05, + "loss": 1.3289, + "step": 46461 + }, + { + "epoch": 0.56, + "grad_norm": 22.388456411619483, + "learning_rate": 1.6912299182937532e-05, + "loss": 1.4399, + "step": 46464 + }, + { + "epoch": 0.56, + "grad_norm": 14.060225838827096, + "learning_rate": 1.6911877023704444e-05, + "loss": 1.394, + "step": 46467 + }, + { + "epoch": 0.56, + "grad_norm": 10.996436627216537, + "learning_rate": 1.6911454840883613e-05, + "loss": 1.481, + "step": 46470 + }, + { + "epoch": 0.56, + "grad_norm": 5.240908028127324, + "learning_rate": 1.6911032634476472e-05, + "loss": 1.3308, + "step": 46473 + }, + { + "epoch": 0.56, + "grad_norm": 28.946769386282053, + "learning_rate": 1.6910610404484468e-05, + "loss": 1.1584, + "step": 46476 + }, + { + "epoch": 0.56, + "grad_norm": 6.975840110780772, + "learning_rate": 1.691018815090904e-05, + "loss": 1.1685, + "step": 46479 + }, + { + "epoch": 0.56, + "grad_norm": 7.8634832065568725, + "learning_rate": 1.690976587375163e-05, + "loss": 1.6352, + "step": 46482 + }, + { + "epoch": 0.56, + "grad_norm": 8.258903935884991, + "learning_rate": 1.6909343573013674e-05, + "loss": 1.6788, + "step": 46485 + }, + { + "epoch": 0.56, + "grad_norm": 2.8955237281102666, + "learning_rate": 1.6908921248696623e-05, + "loss": 1.5347, + "step": 46488 + }, + { + "epoch": 0.56, + "grad_norm": 15.628568697814398, + "learning_rate": 1.6908498900801908e-05, + "loss": 1.3633, + "step": 46491 + }, + { + "epoch": 0.56, + "grad_norm": 22.59372506372261, + "learning_rate": 1.6908076529330973e-05, + "loss": 1.2955, + "step": 46494 + }, + { + "epoch": 0.56, + "grad_norm": 13.27371377601755, + "learning_rate": 1.6907654134285264e-05, + "loss": 1.3352, + "step": 46497 + }, + { + "epoch": 0.56, + "grad_norm": 3.4271094000570415, + "learning_rate": 1.6907231715666222e-05, + "loss": 1.451, + "step": 46500 + }, + { + "epoch": 0.56, + "grad_norm": 32.2757324565647, + "learning_rate": 1.6906809273475283e-05, + "loss": 1.1234, + "step": 46503 + }, + { + "epoch": 0.56, + "grad_norm": 47.43760209055954, + "learning_rate": 1.690638680771389e-05, + "loss": 0.945, + "step": 46506 + }, + { + "epoch": 0.56, + "grad_norm": 64.53970262045861, + "learning_rate": 1.6905964318383488e-05, + "loss": 1.7917, + "step": 46509 + }, + { + "epoch": 0.56, + "grad_norm": 9.754342021745892, + "learning_rate": 1.690554180548552e-05, + "loss": 1.5907, + "step": 46512 + }, + { + "epoch": 0.56, + "grad_norm": 7.172445498250779, + "learning_rate": 1.6905119269021422e-05, + "loss": 1.4157, + "step": 46515 + }, + { + "epoch": 0.56, + "grad_norm": 5.258892159709591, + "learning_rate": 1.6904696708992642e-05, + "loss": 1.4642, + "step": 46518 + }, + { + "epoch": 0.56, + "grad_norm": 4.229324342053327, + "learning_rate": 1.6904274125400618e-05, + "loss": 1.4832, + "step": 46521 + }, + { + "epoch": 0.56, + "grad_norm": 14.693216991535992, + "learning_rate": 1.6903851518246794e-05, + "loss": 1.4386, + "step": 46524 + }, + { + "epoch": 0.56, + "grad_norm": 98.81586049148663, + "learning_rate": 1.6903428887532606e-05, + "loss": 1.3699, + "step": 46527 + }, + { + "epoch": 0.56, + "grad_norm": 32.667249933481145, + "learning_rate": 1.6903006233259508e-05, + "loss": 1.3946, + "step": 46530 + }, + { + "epoch": 0.56, + "grad_norm": 50.282445436971116, + "learning_rate": 1.6902583555428936e-05, + "loss": 1.3406, + "step": 46533 + }, + { + "epoch": 0.56, + "grad_norm": 28.813385905070582, + "learning_rate": 1.690216085404233e-05, + "loss": 1.3733, + "step": 46536 + }, + { + "epoch": 0.56, + "grad_norm": 11.375595085561294, + "learning_rate": 1.690173812910114e-05, + "loss": 1.1201, + "step": 46539 + }, + { + "epoch": 0.56, + "grad_norm": 14.789054623647553, + "learning_rate": 1.6901315380606796e-05, + "loss": 1.711, + "step": 46542 + }, + { + "epoch": 0.56, + "grad_norm": 18.69009876251594, + "learning_rate": 1.6900892608560752e-05, + "loss": 1.6328, + "step": 46545 + }, + { + "epoch": 0.56, + "grad_norm": 17.667215953822435, + "learning_rate": 1.690046981296445e-05, + "loss": 1.5046, + "step": 46548 + }, + { + "epoch": 0.56, + "grad_norm": 17.22359372489921, + "learning_rate": 1.6900046993819325e-05, + "loss": 1.4735, + "step": 46551 + }, + { + "epoch": 0.56, + "grad_norm": 17.750992906405266, + "learning_rate": 1.689962415112683e-05, + "loss": 1.4188, + "step": 46554 + }, + { + "epoch": 0.56, + "grad_norm": 3.98792383494096, + "learning_rate": 1.68992012848884e-05, + "loss": 1.5218, + "step": 46557 + }, + { + "epoch": 0.56, + "grad_norm": 13.792139955394632, + "learning_rate": 1.689877839510548e-05, + "loss": 1.8639, + "step": 46560 + }, + { + "epoch": 0.56, + "grad_norm": 17.897580798809486, + "learning_rate": 1.6898355481779517e-05, + "loss": 1.6218, + "step": 46563 + }, + { + "epoch": 0.56, + "grad_norm": 11.121948583232768, + "learning_rate": 1.689793254491195e-05, + "loss": 1.3333, + "step": 46566 + }, + { + "epoch": 0.56, + "grad_norm": 11.000931543067964, + "learning_rate": 1.6897509584504223e-05, + "loss": 1.464, + "step": 46569 + }, + { + "epoch": 0.56, + "grad_norm": 15.894502387765842, + "learning_rate": 1.6897086600557784e-05, + "loss": 1.4455, + "step": 46572 + }, + { + "epoch": 0.56, + "grad_norm": 12.332981872629919, + "learning_rate": 1.689666359307407e-05, + "loss": 1.281, + "step": 46575 + }, + { + "epoch": 0.56, + "grad_norm": 7.462231994035748, + "learning_rate": 1.6896240562054528e-05, + "loss": 1.5732, + "step": 46578 + }, + { + "epoch": 0.56, + "grad_norm": 2.247690275031176, + "learning_rate": 1.6895817507500602e-05, + "loss": 1.62, + "step": 46581 + }, + { + "epoch": 0.56, + "grad_norm": 7.103168696988735, + "learning_rate": 1.689539442941373e-05, + "loss": 1.353, + "step": 46584 + }, + { + "epoch": 0.56, + "grad_norm": 9.801811889099682, + "learning_rate": 1.6894971327795368e-05, + "loss": 1.4096, + "step": 46587 + }, + { + "epoch": 0.56, + "grad_norm": 7.718724631944276, + "learning_rate": 1.6894548202646944e-05, + "loss": 1.2767, + "step": 46590 + }, + { + "epoch": 0.56, + "grad_norm": 5.64407954650447, + "learning_rate": 1.6894125053969917e-05, + "loss": 1.5747, + "step": 46593 + }, + { + "epoch": 0.56, + "grad_norm": 6.058602150210768, + "learning_rate": 1.689370188176572e-05, + "loss": 1.3166, + "step": 46596 + }, + { + "epoch": 0.56, + "grad_norm": 29.155693019909197, + "learning_rate": 1.6893278686035805e-05, + "loss": 1.5304, + "step": 46599 + }, + { + "epoch": 0.56, + "grad_norm": 37.24997106661644, + "learning_rate": 1.689285546678161e-05, + "loss": 1.4594, + "step": 46602 + }, + { + "epoch": 0.56, + "grad_norm": 16.332774034891994, + "learning_rate": 1.6892432224004587e-05, + "loss": 1.3441, + "step": 46605 + }, + { + "epoch": 0.56, + "grad_norm": 72.85302625465478, + "learning_rate": 1.689200895770617e-05, + "loss": 1.2426, + "step": 46608 + }, + { + "epoch": 0.56, + "grad_norm": 3.3573221888658678, + "learning_rate": 1.689158566788781e-05, + "loss": 1.2001, + "step": 46611 + }, + { + "epoch": 0.56, + "grad_norm": 6.570012229773189, + "learning_rate": 1.6891162354550954e-05, + "loss": 1.3095, + "step": 46614 + }, + { + "epoch": 0.56, + "grad_norm": 8.540018935864383, + "learning_rate": 1.689073901769704e-05, + "loss": 1.0449, + "step": 46617 + }, + { + "epoch": 0.56, + "grad_norm": 10.91298509473549, + "learning_rate": 1.6890315657327516e-05, + "loss": 1.1951, + "step": 46620 + }, + { + "epoch": 0.56, + "grad_norm": 3.6243936557866077, + "learning_rate": 1.688989227344383e-05, + "loss": 1.1923, + "step": 46623 + }, + { + "epoch": 0.56, + "grad_norm": 3.388953569966026, + "learning_rate": 1.6889468866047418e-05, + "loss": 1.3054, + "step": 46626 + }, + { + "epoch": 0.56, + "grad_norm": 15.093969748946845, + "learning_rate": 1.688904543513973e-05, + "loss": 1.9308, + "step": 46629 + }, + { + "epoch": 0.56, + "grad_norm": 2.251009397929241, + "learning_rate": 1.6888621980722218e-05, + "loss": 1.7144, + "step": 46632 + }, + { + "epoch": 0.56, + "grad_norm": 4.875216438186428, + "learning_rate": 1.6888198502796315e-05, + "loss": 1.4018, + "step": 46635 + }, + { + "epoch": 0.56, + "grad_norm": 10.770694661867074, + "learning_rate": 1.6887775001363472e-05, + "loss": 1.5737, + "step": 46638 + }, + { + "epoch": 0.56, + "grad_norm": 12.238566150028824, + "learning_rate": 1.6887351476425136e-05, + "loss": 1.4238, + "step": 46641 + }, + { + "epoch": 0.56, + "grad_norm": 20.932417444236332, + "learning_rate": 1.6886927927982747e-05, + "loss": 1.3948, + "step": 46644 + }, + { + "epoch": 0.56, + "grad_norm": 3.1067805392501486, + "learning_rate": 1.688650435603776e-05, + "loss": 1.8136, + "step": 46647 + }, + { + "epoch": 0.56, + "grad_norm": 20.001470547588763, + "learning_rate": 1.6886080760591608e-05, + "loss": 1.7389, + "step": 46650 + }, + { + "epoch": 0.56, + "grad_norm": 20.240674309188584, + "learning_rate": 1.6885657141645744e-05, + "loss": 1.3354, + "step": 46653 + }, + { + "epoch": 0.56, + "grad_norm": 30.73071554581705, + "learning_rate": 1.6885233499201614e-05, + "loss": 1.5123, + "step": 46656 + }, + { + "epoch": 0.56, + "grad_norm": 6.745372839261502, + "learning_rate": 1.6884809833260662e-05, + "loss": 0.8789, + "step": 46659 + }, + { + "epoch": 0.56, + "grad_norm": 38.565352801670095, + "learning_rate": 1.6884386143824333e-05, + "loss": 1.3771, + "step": 46662 + }, + { + "epoch": 0.56, + "grad_norm": 15.401881432266466, + "learning_rate": 1.6883962430894072e-05, + "loss": 1.3647, + "step": 46665 + }, + { + "epoch": 0.56, + "grad_norm": 30.495938679176646, + "learning_rate": 1.6883538694471328e-05, + "loss": 1.2818, + "step": 46668 + }, + { + "epoch": 0.56, + "grad_norm": 18.997597509407736, + "learning_rate": 1.688311493455755e-05, + "loss": 1.6309, + "step": 46671 + }, + { + "epoch": 0.56, + "grad_norm": 3.769204242686534, + "learning_rate": 1.6882691151154173e-05, + "loss": 1.5789, + "step": 46674 + }, + { + "epoch": 0.56, + "grad_norm": 8.06491677866351, + "learning_rate": 1.6882267344262656e-05, + "loss": 1.4273, + "step": 46677 + }, + { + "epoch": 0.56, + "grad_norm": 33.333522269946826, + "learning_rate": 1.6881843513884436e-05, + "loss": 1.7564, + "step": 46680 + }, + { + "epoch": 0.56, + "grad_norm": 47.64549110745533, + "learning_rate": 1.6881419660020967e-05, + "loss": 1.1535, + "step": 46683 + }, + { + "epoch": 0.56, + "grad_norm": 4.094328185392403, + "learning_rate": 1.6880995782673686e-05, + "loss": 1.3699, + "step": 46686 + }, + { + "epoch": 0.56, + "grad_norm": 17.06422540606263, + "learning_rate": 1.688057188184405e-05, + "loss": 1.6796, + "step": 46689 + }, + { + "epoch": 0.56, + "grad_norm": 22.66948045308801, + "learning_rate": 1.6880147957533494e-05, + "loss": 1.3394, + "step": 46692 + }, + { + "epoch": 0.56, + "grad_norm": 12.412199623427632, + "learning_rate": 1.687972400974348e-05, + "loss": 1.5109, + "step": 46695 + }, + { + "epoch": 0.56, + "grad_norm": 6.42181565340188, + "learning_rate": 1.6879300038475438e-05, + "loss": 1.1698, + "step": 46698 + }, + { + "epoch": 0.56, + "grad_norm": 13.875738562325926, + "learning_rate": 1.6878876043730827e-05, + "loss": 1.2883, + "step": 46701 + }, + { + "epoch": 0.56, + "grad_norm": 21.049958999719447, + "learning_rate": 1.6878452025511087e-05, + "loss": 1.075, + "step": 46704 + }, + { + "epoch": 0.56, + "grad_norm": 4.369563610467051, + "learning_rate": 1.6878027983817673e-05, + "loss": 1.2168, + "step": 46707 + }, + { + "epoch": 0.56, + "grad_norm": 4.644565096248298, + "learning_rate": 1.687760391865202e-05, + "loss": 1.5864, + "step": 46710 + }, + { + "epoch": 0.56, + "grad_norm": 18.58254940718728, + "learning_rate": 1.6877179830015592e-05, + "loss": 0.9704, + "step": 46713 + }, + { + "epoch": 0.56, + "grad_norm": 9.6698389151427, + "learning_rate": 1.687675571790982e-05, + "loss": 1.4048, + "step": 46716 + }, + { + "epoch": 0.56, + "grad_norm": 10.030860243374443, + "learning_rate": 1.6876331582336155e-05, + "loss": 1.3625, + "step": 46719 + }, + { + "epoch": 0.56, + "grad_norm": 30.183248959890822, + "learning_rate": 1.6875907423296053e-05, + "loss": 1.1714, + "step": 46722 + }, + { + "epoch": 0.56, + "grad_norm": 50.85164771229277, + "learning_rate": 1.687548324079095e-05, + "loss": 1.5133, + "step": 46725 + }, + { + "epoch": 0.56, + "grad_norm": 23.250172236493572, + "learning_rate": 1.6875059034822302e-05, + "loss": 1.5498, + "step": 46728 + }, + { + "epoch": 0.56, + "grad_norm": 18.90612214241843, + "learning_rate": 1.6874634805391553e-05, + "loss": 1.6857, + "step": 46731 + }, + { + "epoch": 0.56, + "grad_norm": 18.96920625417805, + "learning_rate": 1.6874210552500153e-05, + "loss": 1.3701, + "step": 46734 + }, + { + "epoch": 0.56, + "grad_norm": 4.561401747194748, + "learning_rate": 1.6873786276149552e-05, + "loss": 1.5649, + "step": 46737 + }, + { + "epoch": 0.56, + "grad_norm": 19.353986167139162, + "learning_rate": 1.687336197634119e-05, + "loss": 1.5216, + "step": 46740 + }, + { + "epoch": 0.56, + "grad_norm": 11.908822323137247, + "learning_rate": 1.687293765307652e-05, + "loss": 1.5519, + "step": 46743 + }, + { + "epoch": 0.56, + "grad_norm": 27.57582185225759, + "learning_rate": 1.6872513306356988e-05, + "loss": 1.422, + "step": 46746 + }, + { + "epoch": 0.56, + "grad_norm": 23.84890277304789, + "learning_rate": 1.6872088936184047e-05, + "loss": 1.7252, + "step": 46749 + }, + { + "epoch": 0.56, + "grad_norm": 17.957215671561574, + "learning_rate": 1.6871664542559136e-05, + "loss": 1.3099, + "step": 46752 + }, + { + "epoch": 0.56, + "grad_norm": 2.503030461482098, + "learning_rate": 1.6871240125483714e-05, + "loss": 1.5185, + "step": 46755 + }, + { + "epoch": 0.56, + "grad_norm": 27.001566635581206, + "learning_rate": 1.6870815684959223e-05, + "loss": 1.631, + "step": 46758 + }, + { + "epoch": 0.56, + "grad_norm": 7.881937759017119, + "learning_rate": 1.6870391220987114e-05, + "loss": 1.4518, + "step": 46761 + }, + { + "epoch": 0.56, + "grad_norm": 54.77589473411741, + "learning_rate": 1.686996673356883e-05, + "loss": 1.8083, + "step": 46764 + }, + { + "epoch": 0.56, + "grad_norm": 20.67836711397131, + "learning_rate": 1.686954222270583e-05, + "loss": 1.7094, + "step": 46767 + }, + { + "epoch": 0.56, + "grad_norm": 49.1431734932093, + "learning_rate": 1.6869117688399557e-05, + "loss": 1.3618, + "step": 46770 + }, + { + "epoch": 0.56, + "grad_norm": 7.755208636559215, + "learning_rate": 1.6868693130651455e-05, + "loss": 1.2673, + "step": 46773 + }, + { + "epoch": 0.56, + "grad_norm": 9.189316431490031, + "learning_rate": 1.686826854946298e-05, + "loss": 1.2783, + "step": 46776 + }, + { + "epoch": 0.56, + "grad_norm": 50.87400163700339, + "learning_rate": 1.686784394483558e-05, + "loss": 1.5445, + "step": 46779 + }, + { + "epoch": 0.56, + "grad_norm": 14.097805662122482, + "learning_rate": 1.6867419316770697e-05, + "loss": 1.562, + "step": 46782 + }, + { + "epoch": 0.56, + "grad_norm": 11.774566947198464, + "learning_rate": 1.686699466526979e-05, + "loss": 1.1091, + "step": 46785 + }, + { + "epoch": 0.56, + "grad_norm": 7.712777898077763, + "learning_rate": 1.6866569990334304e-05, + "loss": 1.6139, + "step": 46788 + }, + { + "epoch": 0.56, + "grad_norm": 7.9184948889122655, + "learning_rate": 1.6866145291965685e-05, + "loss": 1.6014, + "step": 46791 + }, + { + "epoch": 0.56, + "grad_norm": 16.26998447189464, + "learning_rate": 1.6865720570165387e-05, + "loss": 1.5072, + "step": 46794 + }, + { + "epoch": 0.56, + "grad_norm": 11.982996176539421, + "learning_rate": 1.6865295824934856e-05, + "loss": 1.448, + "step": 46797 + }, + { + "epoch": 0.56, + "grad_norm": 4.997298295926996, + "learning_rate": 1.6864871056275543e-05, + "loss": 1.6605, + "step": 46800 + }, + { + "epoch": 0.56, + "grad_norm": 13.051591172200649, + "learning_rate": 1.6864446264188902e-05, + "loss": 1.2463, + "step": 46803 + }, + { + "epoch": 0.56, + "grad_norm": 18.897623180754362, + "learning_rate": 1.686402144867637e-05, + "loss": 1.4691, + "step": 46806 + }, + { + "epoch": 0.56, + "grad_norm": 58.01857692104144, + "learning_rate": 1.6863596609739416e-05, + "loss": 1.3158, + "step": 46809 + }, + { + "epoch": 0.56, + "grad_norm": 19.405952954599524, + "learning_rate": 1.6863171747379476e-05, + "loss": 1.3697, + "step": 46812 + }, + { + "epoch": 0.56, + "grad_norm": 46.59752561289108, + "learning_rate": 1.6862746861597997e-05, + "loss": 1.2776, + "step": 46815 + }, + { + "epoch": 0.56, + "grad_norm": 13.47163287481361, + "learning_rate": 1.686232195239644e-05, + "loss": 1.3967, + "step": 46818 + }, + { + "epoch": 0.56, + "grad_norm": 15.120913612596086, + "learning_rate": 1.686189701977625e-05, + "loss": 1.3826, + "step": 46821 + }, + { + "epoch": 0.56, + "grad_norm": 46.554629400466496, + "learning_rate": 1.6861472063738875e-05, + "loss": 1.5329, + "step": 46824 + }, + { + "epoch": 0.56, + "grad_norm": 83.68055163719869, + "learning_rate": 1.686104708428577e-05, + "loss": 1.7026, + "step": 46827 + }, + { + "epoch": 0.56, + "grad_norm": 26.369890734044333, + "learning_rate": 1.686062208141838e-05, + "loss": 1.5322, + "step": 46830 + }, + { + "epoch": 0.56, + "grad_norm": 31.679794663890327, + "learning_rate": 1.686019705513816e-05, + "loss": 1.3959, + "step": 46833 + }, + { + "epoch": 0.56, + "grad_norm": 16.390604885951593, + "learning_rate": 1.6859772005446558e-05, + "loss": 1.4861, + "step": 46836 + }, + { + "epoch": 0.56, + "grad_norm": 25.02036108620258, + "learning_rate": 1.6859346932345024e-05, + "loss": 1.1786, + "step": 46839 + }, + { + "epoch": 0.56, + "grad_norm": 7.48870426330518, + "learning_rate": 1.685892183583501e-05, + "loss": 1.5683, + "step": 46842 + }, + { + "epoch": 0.56, + "grad_norm": 19.11485250331432, + "learning_rate": 1.6858496715917967e-05, + "loss": 1.5604, + "step": 46845 + }, + { + "epoch": 0.56, + "grad_norm": 10.434680684323695, + "learning_rate": 1.6858071572595344e-05, + "loss": 1.5454, + "step": 46848 + }, + { + "epoch": 0.56, + "grad_norm": 21.067075815484916, + "learning_rate": 1.6857646405868595e-05, + "loss": 1.3857, + "step": 46851 + }, + { + "epoch": 0.56, + "grad_norm": 13.975712630192819, + "learning_rate": 1.6857221215739166e-05, + "loss": 1.73, + "step": 46854 + }, + { + "epoch": 0.56, + "grad_norm": 7.238961742630541, + "learning_rate": 1.6856796002208513e-05, + "loss": 0.9839, + "step": 46857 + }, + { + "epoch": 0.56, + "grad_norm": 10.179488933731678, + "learning_rate": 1.6856370765278084e-05, + "loss": 1.3771, + "step": 46860 + }, + { + "epoch": 0.56, + "grad_norm": 12.781028096902162, + "learning_rate": 1.6855945504949332e-05, + "loss": 1.0314, + "step": 46863 + }, + { + "epoch": 0.56, + "grad_norm": 8.143553970229753, + "learning_rate": 1.6855520221223708e-05, + "loss": 1.1946, + "step": 46866 + }, + { + "epoch": 0.56, + "grad_norm": 37.77375872112231, + "learning_rate": 1.685509491410266e-05, + "loss": 1.3229, + "step": 46869 + }, + { + "epoch": 0.56, + "grad_norm": 14.81204154359188, + "learning_rate": 1.6854669583587647e-05, + "loss": 1.3182, + "step": 46872 + }, + { + "epoch": 0.56, + "grad_norm": 10.77313158681535, + "learning_rate": 1.685424422968011e-05, + "loss": 1.7208, + "step": 46875 + }, + { + "epoch": 0.56, + "grad_norm": 22.51150076081823, + "learning_rate": 1.685381885238151e-05, + "loss": 1.7487, + "step": 46878 + }, + { + "epoch": 0.56, + "grad_norm": 66.34487095721612, + "learning_rate": 1.6853393451693297e-05, + "loss": 1.3593, + "step": 46881 + }, + { + "epoch": 0.56, + "grad_norm": 13.34133867591482, + "learning_rate": 1.6852968027616915e-05, + "loss": 1.1951, + "step": 46884 + }, + { + "epoch": 0.56, + "grad_norm": 15.48364386542493, + "learning_rate": 1.6852542580153824e-05, + "loss": 1.5699, + "step": 46887 + }, + { + "epoch": 0.56, + "grad_norm": 8.574057881446533, + "learning_rate": 1.6852117109305476e-05, + "loss": 1.5034, + "step": 46890 + }, + { + "epoch": 0.56, + "grad_norm": 28.437893957069363, + "learning_rate": 1.685169161507332e-05, + "loss": 1.5516, + "step": 46893 + }, + { + "epoch": 0.56, + "grad_norm": 3.2785094990165446, + "learning_rate": 1.6851266097458807e-05, + "loss": 1.2401, + "step": 46896 + }, + { + "epoch": 0.56, + "grad_norm": 24.08741760245613, + "learning_rate": 1.685084055646339e-05, + "loss": 1.3445, + "step": 46899 + }, + { + "epoch": 0.56, + "grad_norm": 39.12224523594635, + "learning_rate": 1.685041499208852e-05, + "loss": 1.6311, + "step": 46902 + }, + { + "epoch": 0.56, + "grad_norm": 10.09568618123503, + "learning_rate": 1.6849989404335656e-05, + "loss": 1.6343, + "step": 46905 + }, + { + "epoch": 0.56, + "grad_norm": 4.828945688884766, + "learning_rate": 1.6849563793206244e-05, + "loss": 1.2408, + "step": 46908 + }, + { + "epoch": 0.56, + "grad_norm": 10.070069501953087, + "learning_rate": 1.6849138158701738e-05, + "loss": 1.6135, + "step": 46911 + }, + { + "epoch": 0.56, + "grad_norm": 12.847501999888587, + "learning_rate": 1.6848712500823588e-05, + "loss": 1.7647, + "step": 46914 + }, + { + "epoch": 0.56, + "grad_norm": 9.391216131561341, + "learning_rate": 1.684828681957325e-05, + "loss": 1.4456, + "step": 46917 + }, + { + "epoch": 0.56, + "grad_norm": 2.8392073717044077, + "learning_rate": 1.6847861114952178e-05, + "loss": 1.6328, + "step": 46920 + }, + { + "epoch": 0.56, + "grad_norm": 7.237920824269771, + "learning_rate": 1.684743538696182e-05, + "loss": 1.2226, + "step": 46923 + }, + { + "epoch": 0.56, + "grad_norm": 18.91012463672032, + "learning_rate": 1.6847009635603635e-05, + "loss": 1.074, + "step": 46926 + }, + { + "epoch": 0.56, + "grad_norm": 7.242843804945781, + "learning_rate": 1.684658386087907e-05, + "loss": 1.5706, + "step": 46929 + }, + { + "epoch": 0.56, + "grad_norm": 10.087040757964283, + "learning_rate": 1.684615806278958e-05, + "loss": 1.5079, + "step": 46932 + }, + { + "epoch": 0.56, + "grad_norm": 14.156595663023213, + "learning_rate": 1.684573224133662e-05, + "loss": 1.1921, + "step": 46935 + }, + { + "epoch": 0.56, + "grad_norm": 6.1869850215505195, + "learning_rate": 1.6845306396521638e-05, + "loss": 1.5434, + "step": 46938 + }, + { + "epoch": 0.56, + "grad_norm": 7.520864361296783, + "learning_rate": 1.6844880528346095e-05, + "loss": 1.8047, + "step": 46941 + }, + { + "epoch": 0.56, + "grad_norm": 16.55183491822628, + "learning_rate": 1.684445463681144e-05, + "loss": 1.2211, + "step": 46944 + }, + { + "epoch": 0.56, + "grad_norm": 3.5101523623420112, + "learning_rate": 1.6844028721919127e-05, + "loss": 1.3659, + "step": 46947 + }, + { + "epoch": 0.56, + "grad_norm": 21.670030127247617, + "learning_rate": 1.6843602783670605e-05, + "loss": 1.1139, + "step": 46950 + }, + { + "epoch": 0.56, + "grad_norm": 108.46707308132532, + "learning_rate": 1.6843176822067335e-05, + "loss": 1.3673, + "step": 46953 + }, + { + "epoch": 0.56, + "grad_norm": 3.6753792871174578, + "learning_rate": 1.684275083711077e-05, + "loss": 1.451, + "step": 46956 + }, + { + "epoch": 0.56, + "grad_norm": 13.832535978926773, + "learning_rate": 1.6842324828802357e-05, + "loss": 1.1979, + "step": 46959 + }, + { + "epoch": 0.56, + "grad_norm": 10.774638077235245, + "learning_rate": 1.6841898797143554e-05, + "loss": 1.4984, + "step": 46962 + }, + { + "epoch": 0.56, + "grad_norm": 12.86630902989109, + "learning_rate": 1.6841472742135816e-05, + "loss": 1.6854, + "step": 46965 + }, + { + "epoch": 0.56, + "grad_norm": 12.006635960515638, + "learning_rate": 1.6841046663780597e-05, + "loss": 1.4599, + "step": 46968 + }, + { + "epoch": 0.56, + "grad_norm": 51.62238810045979, + "learning_rate": 1.6840620562079345e-05, + "loss": 1.5433, + "step": 46971 + }, + { + "epoch": 0.56, + "grad_norm": 6.929647441957333, + "learning_rate": 1.6840194437033522e-05, + "loss": 1.3858, + "step": 46974 + }, + { + "epoch": 0.56, + "grad_norm": 21.78625039005432, + "learning_rate": 1.6839768288644583e-05, + "loss": 1.5427, + "step": 46977 + }, + { + "epoch": 0.56, + "grad_norm": 5.329355529074093, + "learning_rate": 1.6839342116913974e-05, + "loss": 1.4328, + "step": 46980 + }, + { + "epoch": 0.56, + "grad_norm": 20.72609442759135, + "learning_rate": 1.6838915921843155e-05, + "loss": 1.2224, + "step": 46983 + }, + { + "epoch": 0.56, + "grad_norm": 19.929031306963466, + "learning_rate": 1.683848970343358e-05, + "loss": 1.2795, + "step": 46986 + }, + { + "epoch": 0.57, + "grad_norm": 86.86685664790257, + "learning_rate": 1.68380634616867e-05, + "loss": 1.1803, + "step": 46989 + }, + { + "epoch": 0.57, + "grad_norm": 14.777044577388113, + "learning_rate": 1.6837637196603974e-05, + "loss": 1.3618, + "step": 46992 + }, + { + "epoch": 0.57, + "grad_norm": 23.71407868385025, + "learning_rate": 1.683721090818686e-05, + "loss": 1.4588, + "step": 46995 + }, + { + "epoch": 0.57, + "grad_norm": 11.889161418548385, + "learning_rate": 1.68367845964368e-05, + "loss": 1.0866, + "step": 46998 + }, + { + "epoch": 0.57, + "grad_norm": 19.809468782942027, + "learning_rate": 1.6836358261355264e-05, + "loss": 1.3962, + "step": 47001 + }, + { + "epoch": 0.57, + "grad_norm": 8.515475501326101, + "learning_rate": 1.6835931902943695e-05, + "loss": 1.6585, + "step": 47004 + }, + { + "epoch": 0.57, + "grad_norm": 12.26238021721976, + "learning_rate": 1.6835505521203556e-05, + "loss": 1.3163, + "step": 47007 + }, + { + "epoch": 0.57, + "grad_norm": 38.451799303677866, + "learning_rate": 1.6835079116136298e-05, + "loss": 1.1242, + "step": 47010 + }, + { + "epoch": 0.57, + "grad_norm": 16.553298444270126, + "learning_rate": 1.6834652687743372e-05, + "loss": 1.7489, + "step": 47013 + }, + { + "epoch": 0.57, + "grad_norm": 22.138100693916808, + "learning_rate": 1.6834226236026242e-05, + "loss": 1.2423, + "step": 47016 + }, + { + "epoch": 0.57, + "grad_norm": 18.667268560154863, + "learning_rate": 1.6833799760986362e-05, + "loss": 1.4573, + "step": 47019 + }, + { + "epoch": 0.57, + "grad_norm": 10.488455382542833, + "learning_rate": 1.683337326262518e-05, + "loss": 1.3608, + "step": 47022 + }, + { + "epoch": 0.57, + "grad_norm": 31.435755712255173, + "learning_rate": 1.683294674094416e-05, + "loss": 1.3815, + "step": 47025 + }, + { + "epoch": 0.57, + "grad_norm": 19.508081620963992, + "learning_rate": 1.6832520195944755e-05, + "loss": 1.3423, + "step": 47028 + }, + { + "epoch": 0.57, + "grad_norm": 19.729266660571405, + "learning_rate": 1.6832093627628417e-05, + "loss": 1.9088, + "step": 47031 + }, + { + "epoch": 0.57, + "grad_norm": 2.44511520878837, + "learning_rate": 1.6831667035996603e-05, + "loss": 1.5335, + "step": 47034 + }, + { + "epoch": 0.57, + "grad_norm": 32.72196404203992, + "learning_rate": 1.6831240421050773e-05, + "loss": 1.4316, + "step": 47037 + }, + { + "epoch": 0.57, + "grad_norm": 10.049824460731134, + "learning_rate": 1.683081378279238e-05, + "loss": 1.2494, + "step": 47040 + }, + { + "epoch": 0.57, + "grad_norm": 19.82509226143104, + "learning_rate": 1.6830387121222878e-05, + "loss": 1.2579, + "step": 47043 + }, + { + "epoch": 0.57, + "grad_norm": 6.9415699503219805, + "learning_rate": 1.6829960436343728e-05, + "loss": 1.3297, + "step": 47046 + }, + { + "epoch": 0.57, + "grad_norm": 14.435896659151904, + "learning_rate": 1.6829533728156378e-05, + "loss": 1.5585, + "step": 47049 + }, + { + "epoch": 0.57, + "grad_norm": 24.63392177807811, + "learning_rate": 1.6829106996662294e-05, + "loss": 1.2985, + "step": 47052 + }, + { + "epoch": 0.57, + "grad_norm": 17.25047554958686, + "learning_rate": 1.682868024186293e-05, + "loss": 1.3381, + "step": 47055 + }, + { + "epoch": 0.57, + "grad_norm": 16.64150765480234, + "learning_rate": 1.6828253463759735e-05, + "loss": 1.5392, + "step": 47058 + }, + { + "epoch": 0.57, + "grad_norm": 19.720902482497838, + "learning_rate": 1.682782666235417e-05, + "loss": 1.2078, + "step": 47061 + }, + { + "epoch": 0.57, + "grad_norm": 7.585796815270196, + "learning_rate": 1.6827399837647695e-05, + "loss": 1.7049, + "step": 47064 + }, + { + "epoch": 0.57, + "grad_norm": 15.606414916341832, + "learning_rate": 1.6826972989641762e-05, + "loss": 1.2054, + "step": 47067 + }, + { + "epoch": 0.57, + "grad_norm": 21.812718037735436, + "learning_rate": 1.6826546118337826e-05, + "loss": 1.564, + "step": 47070 + }, + { + "epoch": 0.57, + "grad_norm": 15.09748109645707, + "learning_rate": 1.6826119223737353e-05, + "loss": 1.65, + "step": 47073 + }, + { + "epoch": 0.57, + "grad_norm": 5.05198854868764, + "learning_rate": 1.6825692305841793e-05, + "loss": 1.3348, + "step": 47076 + }, + { + "epoch": 0.57, + "grad_norm": 11.034507351540535, + "learning_rate": 1.68252653646526e-05, + "loss": 1.1681, + "step": 47079 + }, + { + "epoch": 0.57, + "grad_norm": 29.82816126603646, + "learning_rate": 1.682483840017124e-05, + "loss": 1.4481, + "step": 47082 + }, + { + "epoch": 0.57, + "grad_norm": 56.31600318031534, + "learning_rate": 1.682441141239916e-05, + "loss": 1.4809, + "step": 47085 + }, + { + "epoch": 0.57, + "grad_norm": 10.9803987445653, + "learning_rate": 1.6823984401337824e-05, + "loss": 1.3455, + "step": 47088 + }, + { + "epoch": 0.57, + "grad_norm": 13.02997942157314, + "learning_rate": 1.6823557366988688e-05, + "loss": 1.6016, + "step": 47091 + }, + { + "epoch": 0.57, + "grad_norm": 22.56443617699304, + "learning_rate": 1.6823130309353207e-05, + "loss": 1.6599, + "step": 47094 + }, + { + "epoch": 0.57, + "grad_norm": 19.28061861233695, + "learning_rate": 1.6822703228432838e-05, + "loss": 1.8457, + "step": 47097 + }, + { + "epoch": 0.57, + "grad_norm": 13.193362598482738, + "learning_rate": 1.6822276124229048e-05, + "loss": 1.3409, + "step": 47100 + }, + { + "epoch": 0.57, + "grad_norm": 11.735841076221872, + "learning_rate": 1.682184899674328e-05, + "loss": 1.2985, + "step": 47103 + }, + { + "epoch": 0.57, + "grad_norm": 25.8625633581064, + "learning_rate": 1.6821421845977e-05, + "loss": 1.1752, + "step": 47106 + }, + { + "epoch": 0.57, + "grad_norm": 12.456114639573416, + "learning_rate": 1.6820994671931664e-05, + "loss": 1.5423, + "step": 47109 + }, + { + "epoch": 0.57, + "grad_norm": 45.57328571284204, + "learning_rate": 1.682056747460873e-05, + "loss": 1.5904, + "step": 47112 + }, + { + "epoch": 0.57, + "grad_norm": 14.218077070691942, + "learning_rate": 1.6820140254009658e-05, + "loss": 1.0838, + "step": 47115 + }, + { + "epoch": 0.57, + "grad_norm": 21.120661731601352, + "learning_rate": 1.6819713010135904e-05, + "loss": 1.5712, + "step": 47118 + }, + { + "epoch": 0.57, + "grad_norm": 16.97431142965821, + "learning_rate": 1.681928574298892e-05, + "loss": 1.1018, + "step": 47121 + }, + { + "epoch": 0.57, + "grad_norm": 11.736560589006844, + "learning_rate": 1.6818858452570176e-05, + "loss": 1.1544, + "step": 47124 + }, + { + "epoch": 0.57, + "grad_norm": 41.0998986349733, + "learning_rate": 1.6818431138881128e-05, + "loss": 1.7843, + "step": 47127 + }, + { + "epoch": 0.57, + "grad_norm": 11.756929815917536, + "learning_rate": 1.681800380192322e-05, + "loss": 1.2987, + "step": 47130 + }, + { + "epoch": 0.57, + "grad_norm": 17.24348472510553, + "learning_rate": 1.6817576441697928e-05, + "loss": 1.7909, + "step": 47133 + }, + { + "epoch": 0.57, + "grad_norm": 10.88203191800406, + "learning_rate": 1.6817149058206704e-05, + "loss": 1.6538, + "step": 47136 + }, + { + "epoch": 0.57, + "grad_norm": 31.982043217506316, + "learning_rate": 1.6816721651451002e-05, + "loss": 1.4127, + "step": 47139 + }, + { + "epoch": 0.57, + "grad_norm": 17.689796952596787, + "learning_rate": 1.6816294221432285e-05, + "loss": 1.3485, + "step": 47142 + }, + { + "epoch": 0.57, + "grad_norm": 12.934372725027357, + "learning_rate": 1.6815866768152014e-05, + "loss": 1.6437, + "step": 47145 + }, + { + "epoch": 0.57, + "grad_norm": 21.596582205641205, + "learning_rate": 1.6815439291611643e-05, + "loss": 1.4387, + "step": 47148 + }, + { + "epoch": 0.57, + "grad_norm": 4.530666991597604, + "learning_rate": 1.681501179181263e-05, + "loss": 1.6991, + "step": 47151 + }, + { + "epoch": 0.57, + "grad_norm": 16.678119486791573, + "learning_rate": 1.681458426875644e-05, + "loss": 1.5385, + "step": 47154 + }, + { + "epoch": 0.57, + "grad_norm": 8.802184841785833, + "learning_rate": 1.6814156722444527e-05, + "loss": 1.1615, + "step": 47157 + }, + { + "epoch": 0.57, + "grad_norm": 15.145943381770895, + "learning_rate": 1.681372915287835e-05, + "loss": 1.4866, + "step": 47160 + }, + { + "epoch": 0.57, + "grad_norm": 9.460987738110342, + "learning_rate": 1.681330156005937e-05, + "loss": 1.1392, + "step": 47163 + }, + { + "epoch": 0.57, + "grad_norm": 12.314233069791122, + "learning_rate": 1.6812873943989054e-05, + "loss": 1.049, + "step": 47166 + }, + { + "epoch": 0.57, + "grad_norm": 4.1606596661726565, + "learning_rate": 1.6812446304668845e-05, + "loss": 1.6578, + "step": 47169 + }, + { + "epoch": 0.57, + "grad_norm": 17.402759549160283, + "learning_rate": 1.681201864210021e-05, + "loss": 1.6073, + "step": 47172 + }, + { + "epoch": 0.57, + "grad_norm": 44.92580299721734, + "learning_rate": 1.6811590956284615e-05, + "loss": 1.4732, + "step": 47175 + }, + { + "epoch": 0.57, + "grad_norm": 43.577591347475575, + "learning_rate": 1.6811163247223508e-05, + "loss": 1.371, + "step": 47178 + }, + { + "epoch": 0.57, + "grad_norm": 26.964180979039302, + "learning_rate": 1.6810735514918356e-05, + "loss": 1.6057, + "step": 47181 + }, + { + "epoch": 0.57, + "grad_norm": 7.520317897575319, + "learning_rate": 1.6810307759370617e-05, + "loss": 1.5251, + "step": 47184 + }, + { + "epoch": 0.57, + "grad_norm": 15.031843127652916, + "learning_rate": 1.6809879980581748e-05, + "loss": 1.5814, + "step": 47187 + }, + { + "epoch": 0.57, + "grad_norm": 7.698007289076927, + "learning_rate": 1.6809452178553215e-05, + "loss": 1.5036, + "step": 47190 + }, + { + "epoch": 0.57, + "grad_norm": 36.34010655293244, + "learning_rate": 1.6809024353286475e-05, + "loss": 1.2891, + "step": 47193 + }, + { + "epoch": 0.57, + "grad_norm": 8.8937341204364, + "learning_rate": 1.6808596504782986e-05, + "loss": 1.2744, + "step": 47196 + }, + { + "epoch": 0.57, + "grad_norm": 11.746059961614408, + "learning_rate": 1.680816863304421e-05, + "loss": 1.2553, + "step": 47199 + }, + { + "epoch": 0.57, + "grad_norm": 8.940514671225795, + "learning_rate": 1.6807740738071606e-05, + "loss": 1.168, + "step": 47202 + }, + { + "epoch": 0.57, + "grad_norm": 21.028308329038257, + "learning_rate": 1.6807312819866636e-05, + "loss": 1.414, + "step": 47205 + }, + { + "epoch": 0.57, + "grad_norm": 24.865298018877468, + "learning_rate": 1.6806884878430757e-05, + "loss": 1.2234, + "step": 47208 + }, + { + "epoch": 0.57, + "grad_norm": 37.038775140112264, + "learning_rate": 1.6806456913765434e-05, + "loss": 1.5597, + "step": 47211 + }, + { + "epoch": 0.57, + "grad_norm": 28.610174494453815, + "learning_rate": 1.6806028925872126e-05, + "loss": 1.3803, + "step": 47214 + }, + { + "epoch": 0.57, + "grad_norm": 17.18625764542193, + "learning_rate": 1.680560091475229e-05, + "loss": 1.4158, + "step": 47217 + }, + { + "epoch": 0.57, + "grad_norm": 2.5018533227301023, + "learning_rate": 1.6805172880407394e-05, + "loss": 1.4101, + "step": 47220 + }, + { + "epoch": 0.57, + "grad_norm": 5.464025598018106, + "learning_rate": 1.680474482283889e-05, + "loss": 1.6534, + "step": 47223 + }, + { + "epoch": 0.57, + "grad_norm": 10.229438458282536, + "learning_rate": 1.6804316742048246e-05, + "loss": 1.2742, + "step": 47226 + }, + { + "epoch": 0.57, + "grad_norm": 25.77412478771266, + "learning_rate": 1.6803888638036916e-05, + "loss": 1.4038, + "step": 47229 + }, + { + "epoch": 0.57, + "grad_norm": 7.788364141281445, + "learning_rate": 1.6803460510806366e-05, + "loss": 1.2212, + "step": 47232 + }, + { + "epoch": 0.57, + "grad_norm": 34.54213641017769, + "learning_rate": 1.6803032360358057e-05, + "loss": 1.5328, + "step": 47235 + }, + { + "epoch": 0.57, + "grad_norm": 7.180048804513766, + "learning_rate": 1.6802604186693446e-05, + "loss": 1.4746, + "step": 47238 + }, + { + "epoch": 0.57, + "grad_norm": 4.739238000888739, + "learning_rate": 1.6802175989814006e-05, + "loss": 1.344, + "step": 47241 + }, + { + "epoch": 0.57, + "grad_norm": 11.456070306850016, + "learning_rate": 1.680174776972118e-05, + "loss": 1.463, + "step": 47244 + }, + { + "epoch": 0.57, + "grad_norm": 19.00380462364547, + "learning_rate": 1.680131952641644e-05, + "loss": 1.6039, + "step": 47247 + }, + { + "epoch": 0.57, + "grad_norm": 11.396550816090143, + "learning_rate": 1.6800891259901254e-05, + "loss": 1.3255, + "step": 47250 + }, + { + "epoch": 0.57, + "grad_norm": 11.948773104351966, + "learning_rate": 1.680046297017707e-05, + "loss": 1.4552, + "step": 47253 + }, + { + "epoch": 0.57, + "grad_norm": 14.842964305368058, + "learning_rate": 1.6800034657245354e-05, + "loss": 1.6887, + "step": 47256 + }, + { + "epoch": 0.57, + "grad_norm": 14.693518846516136, + "learning_rate": 1.6799606321107572e-05, + "loss": 1.2304, + "step": 47259 + }, + { + "epoch": 0.57, + "grad_norm": 46.84959052943458, + "learning_rate": 1.679917796176518e-05, + "loss": 1.5331, + "step": 47262 + }, + { + "epoch": 0.57, + "grad_norm": 3.2635902809583506, + "learning_rate": 1.6798749579219645e-05, + "loss": 1.6496, + "step": 47265 + }, + { + "epoch": 0.57, + "grad_norm": 3.5387812483132763, + "learning_rate": 1.6798321173472424e-05, + "loss": 1.226, + "step": 47268 + }, + { + "epoch": 0.57, + "grad_norm": 15.409867832742012, + "learning_rate": 1.6797892744524985e-05, + "loss": 1.7661, + "step": 47271 + }, + { + "epoch": 0.57, + "grad_norm": 5.236684077068132, + "learning_rate": 1.6797464292378783e-05, + "loss": 1.4115, + "step": 47274 + }, + { + "epoch": 0.57, + "grad_norm": 23.191787931136936, + "learning_rate": 1.6797035817035287e-05, + "loss": 1.6644, + "step": 47277 + }, + { + "epoch": 0.57, + "grad_norm": 23.175487410185426, + "learning_rate": 1.6796607318495955e-05, + "loss": 1.4713, + "step": 47280 + }, + { + "epoch": 0.57, + "grad_norm": 11.401467758121916, + "learning_rate": 1.6796178796762253e-05, + "loss": 1.7854, + "step": 47283 + }, + { + "epoch": 0.57, + "grad_norm": 5.706981950877848, + "learning_rate": 1.6795750251835637e-05, + "loss": 1.391, + "step": 47286 + }, + { + "epoch": 0.57, + "grad_norm": 9.595602654501386, + "learning_rate": 1.6795321683717574e-05, + "loss": 1.1988, + "step": 47289 + }, + { + "epoch": 0.57, + "grad_norm": 5.756044437579183, + "learning_rate": 1.6794893092409524e-05, + "loss": 1.5626, + "step": 47292 + }, + { + "epoch": 0.57, + "grad_norm": 10.597643188828199, + "learning_rate": 1.6794464477912955e-05, + "loss": 1.5672, + "step": 47295 + }, + { + "epoch": 0.57, + "grad_norm": 21.84550200206561, + "learning_rate": 1.6794035840229326e-05, + "loss": 1.5022, + "step": 47298 + }, + { + "epoch": 0.57, + "grad_norm": 47.600135838825814, + "learning_rate": 1.6793607179360098e-05, + "loss": 1.6343, + "step": 47301 + }, + { + "epoch": 0.57, + "grad_norm": 16.227453202722455, + "learning_rate": 1.6793178495306734e-05, + "loss": 1.4146, + "step": 47304 + }, + { + "epoch": 0.57, + "grad_norm": 10.024230776181028, + "learning_rate": 1.6792749788070703e-05, + "loss": 1.3686, + "step": 47307 + }, + { + "epoch": 0.57, + "grad_norm": 4.749374152638604, + "learning_rate": 1.679232105765346e-05, + "loss": 1.4361, + "step": 47310 + }, + { + "epoch": 0.57, + "grad_norm": 36.20479693270817, + "learning_rate": 1.6791892304056472e-05, + "loss": 1.1015, + "step": 47313 + }, + { + "epoch": 0.57, + "grad_norm": 9.6779701065082, + "learning_rate": 1.6791463527281204e-05, + "loss": 1.2814, + "step": 47316 + }, + { + "epoch": 0.57, + "grad_norm": 33.68182863884008, + "learning_rate": 1.6791034727329115e-05, + "loss": 1.4582, + "step": 47319 + }, + { + "epoch": 0.57, + "grad_norm": 4.40138651991452, + "learning_rate": 1.6790605904201673e-05, + "loss": 1.5201, + "step": 47322 + }, + { + "epoch": 0.57, + "grad_norm": 6.095907132716811, + "learning_rate": 1.679017705790034e-05, + "loss": 1.65, + "step": 47325 + }, + { + "epoch": 0.57, + "grad_norm": 5.306196420672874, + "learning_rate": 1.6789748188426576e-05, + "loss": 1.7086, + "step": 47328 + }, + { + "epoch": 0.57, + "grad_norm": 11.884801908046361, + "learning_rate": 1.6789319295781846e-05, + "loss": 1.5278, + "step": 47331 + }, + { + "epoch": 0.57, + "grad_norm": 37.53123747324007, + "learning_rate": 1.678889037996762e-05, + "loss": 1.422, + "step": 47334 + }, + { + "epoch": 0.57, + "grad_norm": 23.325037030242697, + "learning_rate": 1.678846144098535e-05, + "loss": 1.4449, + "step": 47337 + }, + { + "epoch": 0.57, + "grad_norm": 16.246906474087634, + "learning_rate": 1.678803247883651e-05, + "loss": 1.5089, + "step": 47340 + }, + { + "epoch": 0.57, + "grad_norm": 27.030413849773574, + "learning_rate": 1.678760349352256e-05, + "loss": 1.1689, + "step": 47343 + }, + { + "epoch": 0.57, + "grad_norm": 21.30572171114029, + "learning_rate": 1.6787174485044964e-05, + "loss": 1.276, + "step": 47346 + }, + { + "epoch": 0.57, + "grad_norm": 37.81716233510638, + "learning_rate": 1.6786745453405186e-05, + "loss": 1.2365, + "step": 47349 + }, + { + "epoch": 0.57, + "grad_norm": 25.35399061077877, + "learning_rate": 1.678631639860469e-05, + "loss": 1.4174, + "step": 47352 + }, + { + "epoch": 0.57, + "grad_norm": 15.710987237405876, + "learning_rate": 1.678588732064494e-05, + "loss": 1.3982, + "step": 47355 + }, + { + "epoch": 0.57, + "grad_norm": 81.45855333662554, + "learning_rate": 1.6785458219527405e-05, + "loss": 1.3856, + "step": 47358 + }, + { + "epoch": 0.57, + "grad_norm": 10.45673274409468, + "learning_rate": 1.678502909525354e-05, + "loss": 1.2353, + "step": 47361 + }, + { + "epoch": 0.57, + "grad_norm": 19.728540459687927, + "learning_rate": 1.6784599947824815e-05, + "loss": 1.3426, + "step": 47364 + }, + { + "epoch": 0.57, + "grad_norm": 4.820650743776514, + "learning_rate": 1.6784170777242696e-05, + "loss": 1.2799, + "step": 47367 + }, + { + "epoch": 0.57, + "grad_norm": 6.786097702418092, + "learning_rate": 1.6783741583508646e-05, + "loss": 1.3044, + "step": 47370 + }, + { + "epoch": 0.57, + "grad_norm": 38.3508382282609, + "learning_rate": 1.678331236662413e-05, + "loss": 1.5673, + "step": 47373 + }, + { + "epoch": 0.57, + "grad_norm": 21.2115072176882, + "learning_rate": 1.678288312659061e-05, + "loss": 1.3455, + "step": 47376 + }, + { + "epoch": 0.57, + "grad_norm": 6.555797452937961, + "learning_rate": 1.6782453863409552e-05, + "loss": 1.2934, + "step": 47379 + }, + { + "epoch": 0.57, + "grad_norm": 19.147741887001647, + "learning_rate": 1.6782024577082427e-05, + "loss": 1.5016, + "step": 47382 + }, + { + "epoch": 0.57, + "grad_norm": 84.67867830827883, + "learning_rate": 1.678159526761069e-05, + "loss": 1.8457, + "step": 47385 + }, + { + "epoch": 0.57, + "grad_norm": 29.265207838163438, + "learning_rate": 1.6781165934995817e-05, + "loss": 1.3875, + "step": 47388 + }, + { + "epoch": 0.57, + "grad_norm": 9.935097200302792, + "learning_rate": 1.678073657923926e-05, + "loss": 1.4414, + "step": 47391 + }, + { + "epoch": 0.57, + "grad_norm": 22.042234941510348, + "learning_rate": 1.6780307200342498e-05, + "loss": 1.6932, + "step": 47394 + }, + { + "epoch": 0.57, + "grad_norm": 23.87122200004342, + "learning_rate": 1.6779877798306986e-05, + "loss": 1.6183, + "step": 47397 + }, + { + "epoch": 0.57, + "grad_norm": 10.953834500153294, + "learning_rate": 1.6779448373134194e-05, + "loss": 1.3979, + "step": 47400 + }, + { + "epoch": 0.57, + "grad_norm": 17.299029998867713, + "learning_rate": 1.6779018924825584e-05, + "loss": 1.667, + "step": 47403 + }, + { + "epoch": 0.57, + "grad_norm": 3.3793506276441145, + "learning_rate": 1.6778589453382627e-05, + "loss": 1.4447, + "step": 47406 + }, + { + "epoch": 0.57, + "grad_norm": 17.677780582653632, + "learning_rate": 1.6778159958806787e-05, + "loss": 1.4054, + "step": 47409 + }, + { + "epoch": 0.57, + "grad_norm": 2.8528191255693534, + "learning_rate": 1.6777730441099528e-05, + "loss": 1.4523, + "step": 47412 + }, + { + "epoch": 0.57, + "grad_norm": 7.119096549732951, + "learning_rate": 1.6777300900262313e-05, + "loss": 1.308, + "step": 47415 + }, + { + "epoch": 0.57, + "grad_norm": 219.6805245912363, + "learning_rate": 1.6776871336296613e-05, + "loss": 1.251, + "step": 47418 + }, + { + "epoch": 0.57, + "grad_norm": 13.27424385225512, + "learning_rate": 1.6776441749203894e-05, + "loss": 1.2951, + "step": 47421 + }, + { + "epoch": 0.57, + "grad_norm": 16.76611106301346, + "learning_rate": 1.677601213898562e-05, + "loss": 1.6282, + "step": 47424 + }, + { + "epoch": 0.57, + "grad_norm": 8.109040089090305, + "learning_rate": 1.6775582505643257e-05, + "loss": 1.5563, + "step": 47427 + }, + { + "epoch": 0.57, + "grad_norm": 14.055543763049492, + "learning_rate": 1.677515284917827e-05, + "loss": 1.6523, + "step": 47430 + }, + { + "epoch": 0.57, + "grad_norm": 5.472658694785478, + "learning_rate": 1.6774723169592127e-05, + "loss": 1.2049, + "step": 47433 + }, + { + "epoch": 0.57, + "grad_norm": 15.117904855887462, + "learning_rate": 1.6774293466886296e-05, + "loss": 1.4457, + "step": 47436 + }, + { + "epoch": 0.57, + "grad_norm": 6.081153947715489, + "learning_rate": 1.677386374106224e-05, + "loss": 1.7473, + "step": 47439 + }, + { + "epoch": 0.57, + "grad_norm": 12.77840883659836, + "learning_rate": 1.6773433992121425e-05, + "loss": 1.2681, + "step": 47442 + }, + { + "epoch": 0.57, + "grad_norm": 45.05761645122854, + "learning_rate": 1.6773004220065324e-05, + "loss": 1.3065, + "step": 47445 + }, + { + "epoch": 0.57, + "grad_norm": 12.304833984235348, + "learning_rate": 1.6772574424895394e-05, + "loss": 0.948, + "step": 47448 + }, + { + "epoch": 0.57, + "grad_norm": 9.357478874364558, + "learning_rate": 1.6772144606613113e-05, + "loss": 0.8602, + "step": 47451 + }, + { + "epoch": 0.57, + "grad_norm": 8.968224475800971, + "learning_rate": 1.6771714765219935e-05, + "loss": 1.274, + "step": 47454 + }, + { + "epoch": 0.57, + "grad_norm": 12.833273746211969, + "learning_rate": 1.6771284900717336e-05, + "loss": 1.4518, + "step": 47457 + }, + { + "epoch": 0.57, + "grad_norm": 6.643587231449465, + "learning_rate": 1.6770855013106784e-05, + "loss": 1.2078, + "step": 47460 + }, + { + "epoch": 0.57, + "grad_norm": 9.560336347423066, + "learning_rate": 1.677042510238974e-05, + "loss": 1.2957, + "step": 47463 + }, + { + "epoch": 0.57, + "grad_norm": 8.708815188557413, + "learning_rate": 1.6769995168567674e-05, + "loss": 1.2689, + "step": 47466 + }, + { + "epoch": 0.57, + "grad_norm": 17.342733238354576, + "learning_rate": 1.6769565211642055e-05, + "loss": 1.5931, + "step": 47469 + }, + { + "epoch": 0.57, + "grad_norm": 16.99434152479676, + "learning_rate": 1.6769135231614347e-05, + "loss": 1.5781, + "step": 47472 + }, + { + "epoch": 0.57, + "grad_norm": 31.3793847943549, + "learning_rate": 1.6768705228486014e-05, + "loss": 1.4599, + "step": 47475 + }, + { + "epoch": 0.57, + "grad_norm": 2.685697524519562, + "learning_rate": 1.6768275202258534e-05, + "loss": 1.6109, + "step": 47478 + }, + { + "epoch": 0.57, + "grad_norm": 29.351470628571988, + "learning_rate": 1.676784515293337e-05, + "loss": 1.5452, + "step": 47481 + }, + { + "epoch": 0.57, + "grad_norm": 9.244042125274497, + "learning_rate": 1.6767415080511985e-05, + "loss": 1.7149, + "step": 47484 + }, + { + "epoch": 0.57, + "grad_norm": 26.481629122589272, + "learning_rate": 1.6766984984995848e-05, + "loss": 1.2879, + "step": 47487 + }, + { + "epoch": 0.57, + "grad_norm": 3.8660077518464093, + "learning_rate": 1.6766554866386434e-05, + "loss": 1.2122, + "step": 47490 + }, + { + "epoch": 0.57, + "grad_norm": 4.88549812410901, + "learning_rate": 1.67661247246852e-05, + "loss": 1.3732, + "step": 47493 + }, + { + "epoch": 0.57, + "grad_norm": 12.332812726355256, + "learning_rate": 1.6765694559893625e-05, + "loss": 1.494, + "step": 47496 + }, + { + "epoch": 0.57, + "grad_norm": 23.57602043213445, + "learning_rate": 1.6765264372013166e-05, + "loss": 1.3726, + "step": 47499 + }, + { + "epoch": 0.57, + "grad_norm": 14.97627110207895, + "learning_rate": 1.67648341610453e-05, + "loss": 1.6121, + "step": 47502 + }, + { + "epoch": 0.57, + "grad_norm": 45.73954553171419, + "learning_rate": 1.6764403926991492e-05, + "loss": 1.5042, + "step": 47505 + }, + { + "epoch": 0.57, + "grad_norm": 14.393610269512953, + "learning_rate": 1.6763973669853208e-05, + "loss": 1.363, + "step": 47508 + }, + { + "epoch": 0.57, + "grad_norm": 67.70099536277615, + "learning_rate": 1.6763543389631917e-05, + "loss": 1.1364, + "step": 47511 + }, + { + "epoch": 0.57, + "grad_norm": 18.178047232764964, + "learning_rate": 1.676311308632909e-05, + "loss": 1.593, + "step": 47514 + }, + { + "epoch": 0.57, + "grad_norm": 11.667267408889211, + "learning_rate": 1.6762682759946195e-05, + "loss": 1.4509, + "step": 47517 + }, + { + "epoch": 0.57, + "grad_norm": 22.406356796817423, + "learning_rate": 1.67622524104847e-05, + "loss": 1.7428, + "step": 47520 + }, + { + "epoch": 0.57, + "grad_norm": 2.7071244190162282, + "learning_rate": 1.6761822037946073e-05, + "loss": 1.6877, + "step": 47523 + }, + { + "epoch": 0.57, + "grad_norm": 5.205823963338764, + "learning_rate": 1.6761391642331783e-05, + "loss": 1.5272, + "step": 47526 + }, + { + "epoch": 0.57, + "grad_norm": 12.0121881280593, + "learning_rate": 1.6760961223643296e-05, + "loss": 1.2687, + "step": 47529 + }, + { + "epoch": 0.57, + "grad_norm": 15.426567976677658, + "learning_rate": 1.676053078188209e-05, + "loss": 1.4694, + "step": 47532 + }, + { + "epoch": 0.57, + "grad_norm": 25.641468353204395, + "learning_rate": 1.6760100317049622e-05, + "loss": 1.8392, + "step": 47535 + }, + { + "epoch": 0.57, + "grad_norm": 2.4310259700016386, + "learning_rate": 1.6759669829147365e-05, + "loss": 2.1512, + "step": 47538 + }, + { + "epoch": 0.57, + "grad_norm": 20.608259781566144, + "learning_rate": 1.6759239318176794e-05, + "loss": 1.3177, + "step": 47541 + }, + { + "epoch": 0.57, + "grad_norm": 14.464549004491975, + "learning_rate": 1.6758808784139377e-05, + "loss": 1.5846, + "step": 47544 + }, + { + "epoch": 0.57, + "grad_norm": 11.858197482067455, + "learning_rate": 1.6758378227036575e-05, + "loss": 1.3906, + "step": 47547 + }, + { + "epoch": 0.57, + "grad_norm": 12.233733685418652, + "learning_rate": 1.6757947646869863e-05, + "loss": 1.2495, + "step": 47550 + }, + { + "epoch": 0.57, + "grad_norm": 21.605033969247206, + "learning_rate": 1.6757517043640706e-05, + "loss": 1.4794, + "step": 47553 + }, + { + "epoch": 0.57, + "grad_norm": 14.590239272900089, + "learning_rate": 1.6757086417350583e-05, + "loss": 1.4046, + "step": 47556 + }, + { + "epoch": 0.57, + "grad_norm": 17.460537714121426, + "learning_rate": 1.6756655768000957e-05, + "loss": 1.6154, + "step": 47559 + }, + { + "epoch": 0.57, + "grad_norm": 17.45115043139947, + "learning_rate": 1.6756225095593298e-05, + "loss": 1.3695, + "step": 47562 + }, + { + "epoch": 0.57, + "grad_norm": 14.353926176996964, + "learning_rate": 1.6755794400129074e-05, + "loss": 1.307, + "step": 47565 + }, + { + "epoch": 0.57, + "grad_norm": 20.5915109199375, + "learning_rate": 1.675536368160976e-05, + "loss": 1.302, + "step": 47568 + }, + { + "epoch": 0.57, + "grad_norm": 8.326405996450431, + "learning_rate": 1.6754932940036825e-05, + "loss": 1.4214, + "step": 47571 + }, + { + "epoch": 0.57, + "grad_norm": 12.435095488893147, + "learning_rate": 1.6754502175411734e-05, + "loss": 1.1807, + "step": 47574 + }, + { + "epoch": 0.57, + "grad_norm": 3.6591292431262494, + "learning_rate": 1.675407138773596e-05, + "loss": 1.3041, + "step": 47577 + }, + { + "epoch": 0.57, + "grad_norm": 27.29059954242159, + "learning_rate": 1.6753640577010974e-05, + "loss": 1.6283, + "step": 47580 + }, + { + "epoch": 0.57, + "grad_norm": 3.370857198919052, + "learning_rate": 1.6753209743238247e-05, + "loss": 1.2194, + "step": 47583 + }, + { + "epoch": 0.57, + "grad_norm": 10.164404825349418, + "learning_rate": 1.6752778886419244e-05, + "loss": 1.4396, + "step": 47586 + }, + { + "epoch": 0.57, + "grad_norm": 11.820035412130004, + "learning_rate": 1.675234800655544e-05, + "loss": 1.4881, + "step": 47589 + }, + { + "epoch": 0.57, + "grad_norm": 21.91467403973993, + "learning_rate": 1.6751917103648306e-05, + "loss": 1.3641, + "step": 47592 + }, + { + "epoch": 0.57, + "grad_norm": 22.553734337842755, + "learning_rate": 1.675148617769931e-05, + "loss": 1.5379, + "step": 47595 + }, + { + "epoch": 0.57, + "grad_norm": 6.889790189209339, + "learning_rate": 1.6751055228709924e-05, + "loss": 1.5251, + "step": 47598 + }, + { + "epoch": 0.57, + "grad_norm": 18.834544994844627, + "learning_rate": 1.675062425668162e-05, + "loss": 1.6592, + "step": 47601 + }, + { + "epoch": 0.57, + "grad_norm": 25.319591993562405, + "learning_rate": 1.6750193261615862e-05, + "loss": 1.5189, + "step": 47604 + }, + { + "epoch": 0.57, + "grad_norm": 16.144268949329415, + "learning_rate": 1.674976224351413e-05, + "loss": 1.4231, + "step": 47607 + }, + { + "epoch": 0.57, + "grad_norm": 104.19566702967896, + "learning_rate": 1.674933120237789e-05, + "loss": 1.2637, + "step": 47610 + }, + { + "epoch": 0.57, + "grad_norm": 27.874797968144087, + "learning_rate": 1.674890013820861e-05, + "loss": 1.2931, + "step": 47613 + }, + { + "epoch": 0.57, + "grad_norm": 6.8008952103383065, + "learning_rate": 1.674846905100777e-05, + "loss": 1.3306, + "step": 47616 + }, + { + "epoch": 0.57, + "grad_norm": 17.375972989656123, + "learning_rate": 1.6748037940776832e-05, + "loss": 1.4667, + "step": 47619 + }, + { + "epoch": 0.57, + "grad_norm": 12.807094034717712, + "learning_rate": 1.6747606807517273e-05, + "loss": 1.0304, + "step": 47622 + }, + { + "epoch": 0.57, + "grad_norm": 17.682186037110924, + "learning_rate": 1.6747175651230564e-05, + "loss": 1.202, + "step": 47625 + }, + { + "epoch": 0.57, + "grad_norm": 8.689001316176075, + "learning_rate": 1.6746744471918173e-05, + "loss": 1.5324, + "step": 47628 + }, + { + "epoch": 0.57, + "grad_norm": 16.91252544372462, + "learning_rate": 1.6746313269581573e-05, + "loss": 1.2703, + "step": 47631 + }, + { + "epoch": 0.57, + "grad_norm": 6.741454871332308, + "learning_rate": 1.6745882044222237e-05, + "loss": 1.2251, + "step": 47634 + }, + { + "epoch": 0.57, + "grad_norm": 10.470313991196592, + "learning_rate": 1.6745450795841634e-05, + "loss": 1.2632, + "step": 47637 + }, + { + "epoch": 0.57, + "grad_norm": 16.65405118482002, + "learning_rate": 1.674501952444124e-05, + "loss": 1.5131, + "step": 47640 + }, + { + "epoch": 0.57, + "grad_norm": 11.465087515536316, + "learning_rate": 1.674458823002252e-05, + "loss": 1.4716, + "step": 47643 + }, + { + "epoch": 0.57, + "grad_norm": 9.34601352336934, + "learning_rate": 1.6744156912586952e-05, + "loss": 1.6148, + "step": 47646 + }, + { + "epoch": 0.57, + "grad_norm": 93.0456686737602, + "learning_rate": 1.6743725572136006e-05, + "loss": 1.509, + "step": 47649 + }, + { + "epoch": 0.57, + "grad_norm": 54.81230394458042, + "learning_rate": 1.6743294208671154e-05, + "loss": 1.2228, + "step": 47652 + }, + { + "epoch": 0.57, + "grad_norm": 27.262147924397905, + "learning_rate": 1.6742862822193867e-05, + "loss": 1.5179, + "step": 47655 + }, + { + "epoch": 0.57, + "grad_norm": 11.229671371553714, + "learning_rate": 1.6742431412705615e-05, + "loss": 1.3949, + "step": 47658 + }, + { + "epoch": 0.57, + "grad_norm": 13.518862353370697, + "learning_rate": 1.6741999980207876e-05, + "loss": 0.9448, + "step": 47661 + }, + { + "epoch": 0.57, + "grad_norm": 28.166909547640397, + "learning_rate": 1.674156852470212e-05, + "loss": 1.3268, + "step": 47664 + }, + { + "epoch": 0.57, + "grad_norm": 22.50353232558113, + "learning_rate": 1.674113704618982e-05, + "loss": 1.2404, + "step": 47667 + }, + { + "epoch": 0.57, + "grad_norm": 5.5699627351829815, + "learning_rate": 1.6740705544672442e-05, + "loss": 1.7084, + "step": 47670 + }, + { + "epoch": 0.57, + "grad_norm": 10.319691675062312, + "learning_rate": 1.674027402015147e-05, + "loss": 1.1382, + "step": 47673 + }, + { + "epoch": 0.57, + "grad_norm": 9.63384287336307, + "learning_rate": 1.673984247262837e-05, + "loss": 1.342, + "step": 47676 + }, + { + "epoch": 0.57, + "grad_norm": 3.180580994118509, + "learning_rate": 1.6739410902104612e-05, + "loss": 1.3966, + "step": 47679 + }, + { + "epoch": 0.57, + "grad_norm": 10.845528940356054, + "learning_rate": 1.6738979308581675e-05, + "loss": 1.3806, + "step": 47682 + }, + { + "epoch": 0.57, + "grad_norm": 11.191396856786664, + "learning_rate": 1.6738547692061023e-05, + "loss": 1.5313, + "step": 47685 + }, + { + "epoch": 0.57, + "grad_norm": 5.515302915220249, + "learning_rate": 1.6738116052544143e-05, + "loss": 1.5062, + "step": 47688 + }, + { + "epoch": 0.57, + "grad_norm": 3.8677117509991183, + "learning_rate": 1.6737684390032496e-05, + "loss": 1.6687, + "step": 47691 + }, + { + "epoch": 0.57, + "grad_norm": 32.35702368139687, + "learning_rate": 1.673725270452756e-05, + "loss": 1.3411, + "step": 47694 + }, + { + "epoch": 0.57, + "grad_norm": 14.619593318999504, + "learning_rate": 1.6736820996030807e-05, + "loss": 1.5852, + "step": 47697 + }, + { + "epoch": 0.57, + "grad_norm": 65.31957719374364, + "learning_rate": 1.6736389264543712e-05, + "loss": 1.3398, + "step": 47700 + }, + { + "epoch": 0.57, + "grad_norm": 4.406636863231738, + "learning_rate": 1.6735957510067743e-05, + "loss": 1.5069, + "step": 47703 + }, + { + "epoch": 0.57, + "grad_norm": 15.286511921189373, + "learning_rate": 1.6735525732604384e-05, + "loss": 1.098, + "step": 47706 + }, + { + "epoch": 0.57, + "grad_norm": 3.139264243422158, + "learning_rate": 1.6735093932155092e-05, + "loss": 1.1077, + "step": 47709 + }, + { + "epoch": 0.57, + "grad_norm": 8.142789564795384, + "learning_rate": 1.6734662108721358e-05, + "loss": 1.5054, + "step": 47712 + }, + { + "epoch": 0.57, + "grad_norm": 8.94114458073916, + "learning_rate": 1.673423026230465e-05, + "loss": 1.6466, + "step": 47715 + }, + { + "epoch": 0.57, + "grad_norm": 25.016875547944064, + "learning_rate": 1.673379839290643e-05, + "loss": 1.2188, + "step": 47718 + }, + { + "epoch": 0.57, + "grad_norm": 12.195332011466347, + "learning_rate": 1.673336650052819e-05, + "loss": 1.3786, + "step": 47721 + }, + { + "epoch": 0.57, + "grad_norm": 9.798816127975774, + "learning_rate": 1.6732934585171394e-05, + "loss": 1.7513, + "step": 47724 + }, + { + "epoch": 0.57, + "grad_norm": 8.821393057296024, + "learning_rate": 1.6732502646837515e-05, + "loss": 1.2623, + "step": 47727 + }, + { + "epoch": 0.57, + "grad_norm": 5.986366563813178, + "learning_rate": 1.6732070685528026e-05, + "loss": 1.2603, + "step": 47730 + }, + { + "epoch": 0.57, + "grad_norm": 8.15989398274031, + "learning_rate": 1.6731638701244413e-05, + "loss": 1.6018, + "step": 47733 + }, + { + "epoch": 0.57, + "grad_norm": 28.677911967135344, + "learning_rate": 1.6731206693988137e-05, + "loss": 1.5398, + "step": 47736 + }, + { + "epoch": 0.57, + "grad_norm": 9.795263347948561, + "learning_rate": 1.6730774663760677e-05, + "loss": 1.5296, + "step": 47739 + }, + { + "epoch": 0.57, + "grad_norm": 21.000665548813693, + "learning_rate": 1.6730342610563513e-05, + "loss": 1.4691, + "step": 47742 + }, + { + "epoch": 0.57, + "grad_norm": 22.960733140908655, + "learning_rate": 1.672991053439811e-05, + "loss": 1.857, + "step": 47745 + }, + { + "epoch": 0.57, + "grad_norm": 20.4137835189526, + "learning_rate": 1.672947843526595e-05, + "loss": 1.7773, + "step": 47748 + }, + { + "epoch": 0.57, + "grad_norm": 2.932325532105158, + "learning_rate": 1.6729046313168495e-05, + "loss": 1.2226, + "step": 47751 + }, + { + "epoch": 0.57, + "grad_norm": 7.616429877686501, + "learning_rate": 1.6728614168107237e-05, + "loss": 1.7776, + "step": 47754 + }, + { + "epoch": 0.57, + "grad_norm": 10.095444915340707, + "learning_rate": 1.6728182000083637e-05, + "loss": 1.2879, + "step": 47757 + }, + { + "epoch": 0.57, + "grad_norm": 16.6416526877373, + "learning_rate": 1.672774980909918e-05, + "loss": 1.4859, + "step": 47760 + }, + { + "epoch": 0.57, + "grad_norm": 6.733131836950072, + "learning_rate": 1.6727317595155332e-05, + "loss": 1.31, + "step": 47763 + }, + { + "epoch": 0.57, + "grad_norm": 61.281315812901305, + "learning_rate": 1.6726885358253574e-05, + "loss": 1.0074, + "step": 47766 + }, + { + "epoch": 0.57, + "grad_norm": 21.01222054983035, + "learning_rate": 1.672645309839538e-05, + "loss": 1.4897, + "step": 47769 + }, + { + "epoch": 0.57, + "grad_norm": 12.921451238576909, + "learning_rate": 1.672602081558222e-05, + "loss": 1.5646, + "step": 47772 + }, + { + "epoch": 0.57, + "grad_norm": 3.7770577887195462, + "learning_rate": 1.6725588509815584e-05, + "loss": 1.5073, + "step": 47775 + }, + { + "epoch": 0.57, + "grad_norm": 8.071963572114663, + "learning_rate": 1.6725156181096925e-05, + "loss": 1.5615, + "step": 47778 + }, + { + "epoch": 0.57, + "grad_norm": 29.45261039931972, + "learning_rate": 1.672472382942774e-05, + "loss": 1.5031, + "step": 47781 + }, + { + "epoch": 0.57, + "grad_norm": 14.552628878204386, + "learning_rate": 1.672429145480949e-05, + "loss": 1.3286, + "step": 47784 + }, + { + "epoch": 0.57, + "grad_norm": 4.365377051740664, + "learning_rate": 1.6723859057243658e-05, + "loss": 1.7925, + "step": 47787 + }, + { + "epoch": 0.57, + "grad_norm": 6.663477417305325, + "learning_rate": 1.6723426636731713e-05, + "loss": 1.2216, + "step": 47790 + }, + { + "epoch": 0.57, + "grad_norm": 10.097808015357545, + "learning_rate": 1.6722994193275133e-05, + "loss": 1.5571, + "step": 47793 + }, + { + "epoch": 0.57, + "grad_norm": 4.495110391664302, + "learning_rate": 1.6722561726875403e-05, + "loss": 1.1274, + "step": 47796 + }, + { + "epoch": 0.57, + "grad_norm": 9.526296787652685, + "learning_rate": 1.6722129237533984e-05, + "loss": 1.3959, + "step": 47799 + }, + { + "epoch": 0.57, + "grad_norm": 7.13969948453561, + "learning_rate": 1.6721696725252363e-05, + "loss": 1.2926, + "step": 47802 + }, + { + "epoch": 0.57, + "grad_norm": 17.7025809089023, + "learning_rate": 1.672126419003201e-05, + "loss": 1.3383, + "step": 47805 + }, + { + "epoch": 0.57, + "grad_norm": 44.78558452968185, + "learning_rate": 1.6720831631874407e-05, + "loss": 1.3123, + "step": 47808 + }, + { + "epoch": 0.57, + "grad_norm": 11.210040314064134, + "learning_rate": 1.6720399050781025e-05, + "loss": 1.4341, + "step": 47811 + }, + { + "epoch": 0.57, + "grad_norm": 23.30945947620944, + "learning_rate": 1.671996644675334e-05, + "loss": 1.4113, + "step": 47814 + }, + { + "epoch": 0.57, + "grad_norm": 11.433638036915516, + "learning_rate": 1.6719533819792834e-05, + "loss": 1.3386, + "step": 47817 + }, + { + "epoch": 0.58, + "grad_norm": 7.224647455466419, + "learning_rate": 1.6719101169900975e-05, + "loss": 1.3859, + "step": 47820 + }, + { + "epoch": 0.58, + "grad_norm": 9.147983019408633, + "learning_rate": 1.6718668497079248e-05, + "loss": 1.6575, + "step": 47823 + }, + { + "epoch": 0.58, + "grad_norm": 6.238574725630061, + "learning_rate": 1.671823580132912e-05, + "loss": 1.6529, + "step": 47826 + }, + { + "epoch": 0.58, + "grad_norm": 9.22887142344342, + "learning_rate": 1.671780308265208e-05, + "loss": 1.4456, + "step": 47829 + }, + { + "epoch": 0.58, + "grad_norm": 8.753720968281955, + "learning_rate": 1.671737034104959e-05, + "loss": 1.4814, + "step": 47832 + }, + { + "epoch": 0.58, + "grad_norm": 7.762657439235831, + "learning_rate": 1.6716937576523144e-05, + "loss": 1.5582, + "step": 47835 + }, + { + "epoch": 0.58, + "grad_norm": 9.786879358513353, + "learning_rate": 1.6716504789074202e-05, + "loss": 1.4961, + "step": 47838 + }, + { + "epoch": 0.58, + "grad_norm": 10.337354510503674, + "learning_rate": 1.6716071978704247e-05, + "loss": 1.9126, + "step": 47841 + }, + { + "epoch": 0.58, + "grad_norm": 25.944608174519786, + "learning_rate": 1.6715639145414764e-05, + "loss": 1.7203, + "step": 47844 + }, + { + "epoch": 0.58, + "grad_norm": 4.815037030785531, + "learning_rate": 1.671520628920722e-05, + "loss": 1.7682, + "step": 47847 + }, + { + "epoch": 0.58, + "grad_norm": 21.122525043898584, + "learning_rate": 1.6714773410083098e-05, + "loss": 1.3382, + "step": 47850 + }, + { + "epoch": 0.58, + "grad_norm": 10.161290452498669, + "learning_rate": 1.671434050804387e-05, + "loss": 1.5416, + "step": 47853 + }, + { + "epoch": 0.58, + "grad_norm": 46.23942763140548, + "learning_rate": 1.6713907583091018e-05, + "loss": 1.213, + "step": 47856 + }, + { + "epoch": 0.58, + "grad_norm": 10.625671796521388, + "learning_rate": 1.6713474635226017e-05, + "loss": 1.457, + "step": 47859 + }, + { + "epoch": 0.58, + "grad_norm": 10.080523441119299, + "learning_rate": 1.6713041664450343e-05, + "loss": 1.3114, + "step": 47862 + }, + { + "epoch": 0.58, + "grad_norm": 3.8520583290975736, + "learning_rate": 1.671260867076548e-05, + "loss": 1.3449, + "step": 47865 + }, + { + "epoch": 0.58, + "grad_norm": 40.28665541271391, + "learning_rate": 1.67121756541729e-05, + "loss": 1.5896, + "step": 47868 + }, + { + "epoch": 0.58, + "grad_norm": 18.755037493791473, + "learning_rate": 1.6711742614674086e-05, + "loss": 1.2732, + "step": 47871 + }, + { + "epoch": 0.58, + "grad_norm": 6.363939873222967, + "learning_rate": 1.6711309552270508e-05, + "loss": 1.249, + "step": 47874 + }, + { + "epoch": 0.58, + "grad_norm": 5.472577971858908, + "learning_rate": 1.6710876466963647e-05, + "loss": 1.4568, + "step": 47877 + }, + { + "epoch": 0.58, + "grad_norm": 23.216086136481135, + "learning_rate": 1.6710443358754983e-05, + "loss": 1.3878, + "step": 47880 + }, + { + "epoch": 0.58, + "grad_norm": 29.111847492015485, + "learning_rate": 1.671001022764599e-05, + "loss": 1.6487, + "step": 47883 + }, + { + "epoch": 0.58, + "grad_norm": 17.843096217113356, + "learning_rate": 1.670957707363815e-05, + "loss": 1.5015, + "step": 47886 + }, + { + "epoch": 0.58, + "grad_norm": 15.355447115444443, + "learning_rate": 1.6709143896732948e-05, + "loss": 1.1449, + "step": 47889 + }, + { + "epoch": 0.58, + "grad_norm": 20.354621973616894, + "learning_rate": 1.6708710696931847e-05, + "loss": 1.1808, + "step": 47892 + }, + { + "epoch": 0.58, + "grad_norm": 7.5734120197314585, + "learning_rate": 1.6708277474236333e-05, + "loss": 1.4621, + "step": 47895 + }, + { + "epoch": 0.58, + "grad_norm": 9.653009109010066, + "learning_rate": 1.6707844228647887e-05, + "loss": 1.3054, + "step": 47898 + }, + { + "epoch": 0.58, + "grad_norm": 11.127856886341803, + "learning_rate": 1.6707410960167983e-05, + "loss": 1.1812, + "step": 47901 + }, + { + "epoch": 0.58, + "grad_norm": 26.579603613285986, + "learning_rate": 1.67069776687981e-05, + "loss": 1.5558, + "step": 47904 + }, + { + "epoch": 0.58, + "grad_norm": 7.936050117957353, + "learning_rate": 1.670654435453972e-05, + "loss": 1.2583, + "step": 47907 + }, + { + "epoch": 0.58, + "grad_norm": 9.063546078919586, + "learning_rate": 1.6706111017394322e-05, + "loss": 1.5751, + "step": 47910 + }, + { + "epoch": 0.58, + "grad_norm": 8.188140278394718, + "learning_rate": 1.670567765736338e-05, + "loss": 1.275, + "step": 47913 + }, + { + "epoch": 0.58, + "grad_norm": 6.3048068377966295, + "learning_rate": 1.670524427444838e-05, + "loss": 1.3897, + "step": 47916 + }, + { + "epoch": 0.58, + "grad_norm": 9.40583383277658, + "learning_rate": 1.670481086865079e-05, + "loss": 1.2913, + "step": 47919 + }, + { + "epoch": 0.58, + "grad_norm": 9.069139415508483, + "learning_rate": 1.67043774399721e-05, + "loss": 1.2333, + "step": 47922 + }, + { + "epoch": 0.58, + "grad_norm": 27.553304153643406, + "learning_rate": 1.670394398841378e-05, + "loss": 0.9919, + "step": 47925 + }, + { + "epoch": 0.58, + "grad_norm": 17.980334244561725, + "learning_rate": 1.6703510513977317e-05, + "loss": 1.8219, + "step": 47928 + }, + { + "epoch": 0.58, + "grad_norm": 18.421582852923365, + "learning_rate": 1.6703077016664187e-05, + "loss": 1.4569, + "step": 47931 + }, + { + "epoch": 0.58, + "grad_norm": 23.887933443609466, + "learning_rate": 1.670264349647587e-05, + "loss": 1.5718, + "step": 47934 + }, + { + "epoch": 0.58, + "grad_norm": 18.908099964605015, + "learning_rate": 1.6702209953413843e-05, + "loss": 1.4167, + "step": 47937 + }, + { + "epoch": 0.58, + "grad_norm": 16.549279679651708, + "learning_rate": 1.670177638747959e-05, + "loss": 1.4833, + "step": 47940 + }, + { + "epoch": 0.58, + "grad_norm": 7.679002482303952, + "learning_rate": 1.6701342798674586e-05, + "loss": 1.3949, + "step": 47943 + }, + { + "epoch": 0.58, + "grad_norm": 8.303419374698107, + "learning_rate": 1.6700909187000313e-05, + "loss": 1.8363, + "step": 47946 + }, + { + "epoch": 0.58, + "grad_norm": 45.96675226243398, + "learning_rate": 1.670047555245825e-05, + "loss": 1.1846, + "step": 47949 + }, + { + "epoch": 0.58, + "grad_norm": 16.801988929485482, + "learning_rate": 1.6700041895049885e-05, + "loss": 1.1718, + "step": 47952 + }, + { + "epoch": 0.58, + "grad_norm": 11.057935630318877, + "learning_rate": 1.6699608214776683e-05, + "loss": 1.2846, + "step": 47955 + }, + { + "epoch": 0.58, + "grad_norm": 11.377883789753064, + "learning_rate": 1.669917451164013e-05, + "loss": 1.3478, + "step": 47958 + }, + { + "epoch": 0.58, + "grad_norm": 11.43203329977219, + "learning_rate": 1.6698740785641713e-05, + "loss": 1.4202, + "step": 47961 + }, + { + "epoch": 0.58, + "grad_norm": 23.809258451452433, + "learning_rate": 1.6698307036782905e-05, + "loss": 1.4104, + "step": 47964 + }, + { + "epoch": 0.58, + "grad_norm": 16.357858208003837, + "learning_rate": 1.6697873265065186e-05, + "loss": 1.3734, + "step": 47967 + }, + { + "epoch": 0.58, + "grad_norm": 17.02880909525132, + "learning_rate": 1.6697439470490038e-05, + "loss": 1.3702, + "step": 47970 + }, + { + "epoch": 0.58, + "grad_norm": 8.390543356519762, + "learning_rate": 1.6697005653058943e-05, + "loss": 1.7012, + "step": 47973 + }, + { + "epoch": 0.58, + "grad_norm": 49.682575772539536, + "learning_rate": 1.6696571812773378e-05, + "loss": 1.7026, + "step": 47976 + }, + { + "epoch": 0.58, + "grad_norm": 5.80559017151721, + "learning_rate": 1.6696137949634826e-05, + "loss": 1.5999, + "step": 47979 + }, + { + "epoch": 0.58, + "grad_norm": 11.318070888583481, + "learning_rate": 1.669570406364477e-05, + "loss": 1.5306, + "step": 47982 + }, + { + "epoch": 0.58, + "grad_norm": 17.35130061492467, + "learning_rate": 1.6695270154804687e-05, + "loss": 1.4641, + "step": 47985 + }, + { + "epoch": 0.58, + "grad_norm": 10.20514198676678, + "learning_rate": 1.6694836223116054e-05, + "loss": 1.6065, + "step": 47988 + }, + { + "epoch": 0.58, + "grad_norm": 10.929044017395661, + "learning_rate": 1.669440226858036e-05, + "loss": 1.3883, + "step": 47991 + }, + { + "epoch": 0.58, + "grad_norm": 4.612234087150414, + "learning_rate": 1.6693968291199084e-05, + "loss": 1.5669, + "step": 47994 + }, + { + "epoch": 0.58, + "grad_norm": 29.46493845251188, + "learning_rate": 1.66935342909737e-05, + "loss": 1.6115, + "step": 47997 + }, + { + "epoch": 0.58, + "grad_norm": 15.25051100468225, + "learning_rate": 1.66931002679057e-05, + "loss": 1.2499, + "step": 48000 + }, + { + "epoch": 0.58, + "grad_norm": 8.092456435773027, + "learning_rate": 1.669266622199656e-05, + "loss": 1.1833, + "step": 48003 + }, + { + "epoch": 0.58, + "grad_norm": 13.120508545808088, + "learning_rate": 1.6692232153247756e-05, + "loss": 1.3568, + "step": 48006 + }, + { + "epoch": 0.58, + "grad_norm": 9.870945903434636, + "learning_rate": 1.6691798061660776e-05, + "loss": 1.1423, + "step": 48009 + }, + { + "epoch": 0.58, + "grad_norm": 27.714425890228632, + "learning_rate": 1.66913639472371e-05, + "loss": 1.6836, + "step": 48012 + }, + { + "epoch": 0.58, + "grad_norm": 55.802306834120486, + "learning_rate": 1.669092980997821e-05, + "loss": 1.4707, + "step": 48015 + }, + { + "epoch": 0.58, + "grad_norm": 12.907145443228528, + "learning_rate": 1.6690495649885587e-05, + "loss": 1.4622, + "step": 48018 + }, + { + "epoch": 0.58, + "grad_norm": 17.51927451638252, + "learning_rate": 1.669006146696071e-05, + "loss": 1.3943, + "step": 48021 + }, + { + "epoch": 0.58, + "grad_norm": 6.396601599684203, + "learning_rate": 1.6689627261205063e-05, + "loss": 1.2817, + "step": 48024 + }, + { + "epoch": 0.58, + "grad_norm": 36.851856525615915, + "learning_rate": 1.6689193032620127e-05, + "loss": 1.5037, + "step": 48027 + }, + { + "epoch": 0.58, + "grad_norm": 29.76269313705021, + "learning_rate": 1.6688758781207388e-05, + "loss": 1.5325, + "step": 48030 + }, + { + "epoch": 0.58, + "grad_norm": 34.978335560036484, + "learning_rate": 1.6688324506968324e-05, + "loss": 1.2573, + "step": 48033 + }, + { + "epoch": 0.58, + "grad_norm": 8.687524690420103, + "learning_rate": 1.6687890209904416e-05, + "loss": 1.1435, + "step": 48036 + }, + { + "epoch": 0.58, + "grad_norm": 47.75483993488286, + "learning_rate": 1.6687455890017148e-05, + "loss": 1.2556, + "step": 48039 + }, + { + "epoch": 0.58, + "grad_norm": 16.461892082783862, + "learning_rate": 1.6687021547308003e-05, + "loss": 1.4504, + "step": 48042 + }, + { + "epoch": 0.58, + "grad_norm": 8.21076980601673, + "learning_rate": 1.668658718177846e-05, + "loss": 1.4804, + "step": 48045 + }, + { + "epoch": 0.58, + "grad_norm": 12.386367930129287, + "learning_rate": 1.6686152793430004e-05, + "loss": 1.4002, + "step": 48048 + }, + { + "epoch": 0.58, + "grad_norm": 30.54754924365748, + "learning_rate": 1.668571838226412e-05, + "loss": 1.6953, + "step": 48051 + }, + { + "epoch": 0.58, + "grad_norm": 10.875732774978392, + "learning_rate": 1.6685283948282283e-05, + "loss": 1.2413, + "step": 48054 + }, + { + "epoch": 0.58, + "grad_norm": 56.48707513053794, + "learning_rate": 1.6684849491485983e-05, + "loss": 1.3919, + "step": 48057 + }, + { + "epoch": 0.58, + "grad_norm": 11.189025978433333, + "learning_rate": 1.6684415011876696e-05, + "loss": 1.3538, + "step": 48060 + }, + { + "epoch": 0.58, + "grad_norm": 12.057352374365271, + "learning_rate": 1.668398050945591e-05, + "loss": 1.5999, + "step": 48063 + }, + { + "epoch": 0.58, + "grad_norm": 39.18646781291857, + "learning_rate": 1.668354598422511e-05, + "loss": 1.3773, + "step": 48066 + }, + { + "epoch": 0.58, + "grad_norm": 14.539151847875742, + "learning_rate": 1.6683111436185772e-05, + "loss": 1.6147, + "step": 48069 + }, + { + "epoch": 0.58, + "grad_norm": 12.757312230624823, + "learning_rate": 1.668267686533938e-05, + "loss": 1.7546, + "step": 48072 + }, + { + "epoch": 0.58, + "grad_norm": 16.622324823796795, + "learning_rate": 1.668224227168742e-05, + "loss": 1.3851, + "step": 48075 + }, + { + "epoch": 0.58, + "grad_norm": 11.239933508696133, + "learning_rate": 1.6681807655231375e-05, + "loss": 1.3351, + "step": 48078 + }, + { + "epoch": 0.58, + "grad_norm": 15.566101446949792, + "learning_rate": 1.668137301597273e-05, + "loss": 1.6751, + "step": 48081 + }, + { + "epoch": 0.58, + "grad_norm": 9.513187633148547, + "learning_rate": 1.668093835391296e-05, + "loss": 1.2056, + "step": 48084 + }, + { + "epoch": 0.58, + "grad_norm": 8.635802205069934, + "learning_rate": 1.6680503669053556e-05, + "loss": 1.3431, + "step": 48087 + }, + { + "epoch": 0.58, + "grad_norm": 40.21053555731057, + "learning_rate": 1.6680068961395997e-05, + "loss": 1.6357, + "step": 48090 + }, + { + "epoch": 0.58, + "grad_norm": 7.110637144316875, + "learning_rate": 1.6679634230941776e-05, + "loss": 1.4461, + "step": 48093 + }, + { + "epoch": 0.58, + "grad_norm": 7.296338815575985, + "learning_rate": 1.667919947769236e-05, + "loss": 1.1836, + "step": 48096 + }, + { + "epoch": 0.58, + "grad_norm": 17.773194701256383, + "learning_rate": 1.6678764701649247e-05, + "loss": 1.3079, + "step": 48099 + }, + { + "epoch": 0.58, + "grad_norm": 7.135753234031555, + "learning_rate": 1.6678329902813918e-05, + "loss": 1.109, + "step": 48102 + }, + { + "epoch": 0.58, + "grad_norm": 14.024643253348753, + "learning_rate": 1.667789508118785e-05, + "loss": 1.421, + "step": 48105 + }, + { + "epoch": 0.58, + "grad_norm": 27.39948971571624, + "learning_rate": 1.6677460236772534e-05, + "loss": 1.3852, + "step": 48108 + }, + { + "epoch": 0.58, + "grad_norm": 8.468901862238003, + "learning_rate": 1.667702536956945e-05, + "loss": 1.2815, + "step": 48111 + }, + { + "epoch": 0.58, + "grad_norm": 4.1530876621939745, + "learning_rate": 1.6676590479580085e-05, + "loss": 1.3335, + "step": 48114 + }, + { + "epoch": 0.58, + "grad_norm": 22.394921051183996, + "learning_rate": 1.6676155566805922e-05, + "loss": 1.4444, + "step": 48117 + }, + { + "epoch": 0.58, + "grad_norm": 52.61931564857636, + "learning_rate": 1.6675720631248443e-05, + "loss": 1.8557, + "step": 48120 + }, + { + "epoch": 0.58, + "grad_norm": 10.677718232936801, + "learning_rate": 1.6675285672909135e-05, + "loss": 1.5024, + "step": 48123 + }, + { + "epoch": 0.58, + "grad_norm": 25.623119669598367, + "learning_rate": 1.6674850691789482e-05, + "loss": 1.1609, + "step": 48126 + }, + { + "epoch": 0.58, + "grad_norm": 10.09276157906648, + "learning_rate": 1.6674415687890966e-05, + "loss": 1.4352, + "step": 48129 + }, + { + "epoch": 0.58, + "grad_norm": 10.802597322680716, + "learning_rate": 1.6673980661215076e-05, + "loss": 1.3429, + "step": 48132 + }, + { + "epoch": 0.58, + "grad_norm": 33.05149764532209, + "learning_rate": 1.667354561176329e-05, + "loss": 1.2822, + "step": 48135 + }, + { + "epoch": 0.58, + "grad_norm": 41.77399903798536, + "learning_rate": 1.66731105395371e-05, + "loss": 1.3443, + "step": 48138 + }, + { + "epoch": 0.58, + "grad_norm": 19.31546651392105, + "learning_rate": 1.6672675444537984e-05, + "loss": 1.1733, + "step": 48141 + }, + { + "epoch": 0.58, + "grad_norm": 4.75628166175485, + "learning_rate": 1.6672240326767434e-05, + "loss": 1.485, + "step": 48144 + }, + { + "epoch": 0.58, + "grad_norm": 10.408840875965707, + "learning_rate": 1.667180518622693e-05, + "loss": 1.487, + "step": 48147 + }, + { + "epoch": 0.58, + "grad_norm": 29.912844951710515, + "learning_rate": 1.6671370022917956e-05, + "loss": 1.4036, + "step": 48150 + }, + { + "epoch": 0.58, + "grad_norm": 13.236101592960846, + "learning_rate": 1.6670934836842002e-05, + "loss": 1.4432, + "step": 48153 + }, + { + "epoch": 0.58, + "grad_norm": 6.283835145382078, + "learning_rate": 1.667049962800055e-05, + "loss": 1.3008, + "step": 48156 + }, + { + "epoch": 0.58, + "grad_norm": 9.064840546701959, + "learning_rate": 1.6670064396395085e-05, + "loss": 1.5577, + "step": 48159 + }, + { + "epoch": 0.58, + "grad_norm": 5.915124690948326, + "learning_rate": 1.666962914202709e-05, + "loss": 1.603, + "step": 48162 + }, + { + "epoch": 0.58, + "grad_norm": 11.79850782395382, + "learning_rate": 1.6669193864898055e-05, + "loss": 1.5417, + "step": 48165 + }, + { + "epoch": 0.58, + "grad_norm": 19.274532217956555, + "learning_rate": 1.6668758565009466e-05, + "loss": 1.4543, + "step": 48168 + }, + { + "epoch": 0.58, + "grad_norm": 15.176325548979163, + "learning_rate": 1.6668323242362805e-05, + "loss": 1.5352, + "step": 48171 + }, + { + "epoch": 0.58, + "grad_norm": 28.526161875619298, + "learning_rate": 1.6667887896959557e-05, + "loss": 1.4059, + "step": 48174 + }, + { + "epoch": 0.58, + "grad_norm": 34.45624561125052, + "learning_rate": 1.666745252880121e-05, + "loss": 1.4382, + "step": 48177 + }, + { + "epoch": 0.58, + "grad_norm": 6.712955857763702, + "learning_rate": 1.666701713788925e-05, + "loss": 1.0351, + "step": 48180 + }, + { + "epoch": 0.58, + "grad_norm": 12.318906076912329, + "learning_rate": 1.666658172422516e-05, + "loss": 1.5187, + "step": 48183 + }, + { + "epoch": 0.58, + "grad_norm": 37.156706864371095, + "learning_rate": 1.666614628781043e-05, + "loss": 1.2901, + "step": 48186 + }, + { + "epoch": 0.58, + "grad_norm": 28.635561539980944, + "learning_rate": 1.666571082864654e-05, + "loss": 1.2191, + "step": 48189 + }, + { + "epoch": 0.58, + "grad_norm": 13.206439080428925, + "learning_rate": 1.666527534673499e-05, + "loss": 1.2207, + "step": 48192 + }, + { + "epoch": 0.58, + "grad_norm": 7.5809705989664735, + "learning_rate": 1.6664839842077246e-05, + "loss": 1.695, + "step": 48195 + }, + { + "epoch": 0.58, + "grad_norm": 3.7932515367444846, + "learning_rate": 1.6664404314674807e-05, + "loss": 1.7131, + "step": 48198 + }, + { + "epoch": 0.58, + "grad_norm": 9.504725273075811, + "learning_rate": 1.6663968764529157e-05, + "loss": 1.1509, + "step": 48201 + }, + { + "epoch": 0.58, + "grad_norm": 11.846838790866286, + "learning_rate": 1.666353319164178e-05, + "loss": 1.346, + "step": 48204 + }, + { + "epoch": 0.58, + "grad_norm": 2.416538517644127, + "learning_rate": 1.666309759601417e-05, + "loss": 1.3269, + "step": 48207 + }, + { + "epoch": 0.58, + "grad_norm": 15.536961859369134, + "learning_rate": 1.66626619776478e-05, + "loss": 1.7972, + "step": 48210 + }, + { + "epoch": 0.58, + "grad_norm": 14.056792402758262, + "learning_rate": 1.666222633654417e-05, + "loss": 1.5327, + "step": 48213 + }, + { + "epoch": 0.58, + "grad_norm": 8.092656491081307, + "learning_rate": 1.666179067270476e-05, + "loss": 1.4714, + "step": 48216 + }, + { + "epoch": 0.58, + "grad_norm": 22.685754876926644, + "learning_rate": 1.6661354986131058e-05, + "loss": 1.203, + "step": 48219 + }, + { + "epoch": 0.58, + "grad_norm": 17.898502208825242, + "learning_rate": 1.6660919276824554e-05, + "loss": 1.6185, + "step": 48222 + }, + { + "epoch": 0.58, + "grad_norm": 12.53158416247026, + "learning_rate": 1.6660483544786724e-05, + "loss": 1.377, + "step": 48225 + }, + { + "epoch": 0.58, + "grad_norm": 7.76151228020181, + "learning_rate": 1.666004779001907e-05, + "loss": 1.2919, + "step": 48228 + }, + { + "epoch": 0.58, + "grad_norm": 8.454949150000655, + "learning_rate": 1.665961201252307e-05, + "loss": 1.5575, + "step": 48231 + }, + { + "epoch": 0.58, + "grad_norm": 10.241212365815063, + "learning_rate": 1.665917621230021e-05, + "loss": 1.6105, + "step": 48234 + }, + { + "epoch": 0.58, + "grad_norm": 10.3400996848793, + "learning_rate": 1.6658740389351982e-05, + "loss": 1.3788, + "step": 48237 + }, + { + "epoch": 0.58, + "grad_norm": 4.5487149515302026, + "learning_rate": 1.665830454367987e-05, + "loss": 1.2414, + "step": 48240 + }, + { + "epoch": 0.58, + "grad_norm": 14.153993387447589, + "learning_rate": 1.6657868675285366e-05, + "loss": 1.6578, + "step": 48243 + }, + { + "epoch": 0.58, + "grad_norm": 5.946492945284379, + "learning_rate": 1.6657432784169955e-05, + "loss": 1.6035, + "step": 48246 + }, + { + "epoch": 0.58, + "grad_norm": 25.780651905038603, + "learning_rate": 1.665699687033512e-05, + "loss": 1.5309, + "step": 48249 + }, + { + "epoch": 0.58, + "grad_norm": 10.965705859333442, + "learning_rate": 1.6656560933782355e-05, + "loss": 2.1518, + "step": 48252 + }, + { + "epoch": 0.58, + "grad_norm": 13.445882682788268, + "learning_rate": 1.6656124974513144e-05, + "loss": 1.687, + "step": 48255 + }, + { + "epoch": 0.58, + "grad_norm": 3.1951259107373398, + "learning_rate": 1.6655688992528977e-05, + "loss": 1.1421, + "step": 48258 + }, + { + "epoch": 0.58, + "grad_norm": 26.406154626834677, + "learning_rate": 1.665525298783134e-05, + "loss": 1.3722, + "step": 48261 + }, + { + "epoch": 0.58, + "grad_norm": 9.606012129916152, + "learning_rate": 1.6654816960421723e-05, + "loss": 1.0664, + "step": 48264 + }, + { + "epoch": 0.58, + "grad_norm": 42.8778959533739, + "learning_rate": 1.6654380910301612e-05, + "loss": 1.2243, + "step": 48267 + }, + { + "epoch": 0.58, + "grad_norm": 24.18967525274275, + "learning_rate": 1.6653944837472497e-05, + "loss": 1.6144, + "step": 48270 + }, + { + "epoch": 0.58, + "grad_norm": 73.57022962369703, + "learning_rate": 1.6653508741935865e-05, + "loss": 1.5124, + "step": 48273 + }, + { + "epoch": 0.58, + "grad_norm": 29.541498486711504, + "learning_rate": 1.6653072623693203e-05, + "loss": 1.4822, + "step": 48276 + }, + { + "epoch": 0.58, + "grad_norm": 5.83893951691945, + "learning_rate": 1.6652636482746e-05, + "loss": 1.3182, + "step": 48279 + }, + { + "epoch": 0.58, + "grad_norm": 4.11892051828694, + "learning_rate": 1.6652200319095746e-05, + "loss": 1.1904, + "step": 48282 + }, + { + "epoch": 0.58, + "grad_norm": 38.74572995934266, + "learning_rate": 1.6651764132743927e-05, + "loss": 1.6957, + "step": 48285 + }, + { + "epoch": 0.58, + "grad_norm": 8.14040270963495, + "learning_rate": 1.6651327923692036e-05, + "loss": 1.4655, + "step": 48288 + }, + { + "epoch": 0.58, + "grad_norm": 7.1048510731655155, + "learning_rate": 1.665089169194156e-05, + "loss": 1.7205, + "step": 48291 + }, + { + "epoch": 0.58, + "grad_norm": 13.99733437034365, + "learning_rate": 1.665045543749398e-05, + "loss": 1.2095, + "step": 48294 + }, + { + "epoch": 0.58, + "grad_norm": 17.360714649485807, + "learning_rate": 1.665001916035079e-05, + "loss": 1.7182, + "step": 48297 + }, + { + "epoch": 0.58, + "grad_norm": 31.0984318514764, + "learning_rate": 1.6649582860513488e-05, + "loss": 1.4838, + "step": 48300 + }, + { + "epoch": 0.58, + "grad_norm": 61.7064234212395, + "learning_rate": 1.6649146537983553e-05, + "loss": 1.85, + "step": 48303 + }, + { + "epoch": 0.58, + "grad_norm": 43.39943479360746, + "learning_rate": 1.664871019276247e-05, + "loss": 1.3149, + "step": 48306 + }, + { + "epoch": 0.58, + "grad_norm": 11.321323519125121, + "learning_rate": 1.6648273824851737e-05, + "loss": 1.1856, + "step": 48309 + }, + { + "epoch": 0.58, + "grad_norm": 166.2326265644335, + "learning_rate": 1.664783743425284e-05, + "loss": 1.4291, + "step": 48312 + }, + { + "epoch": 0.58, + "grad_norm": 9.877702912290038, + "learning_rate": 1.6647401020967273e-05, + "loss": 1.3561, + "step": 48315 + }, + { + "epoch": 0.58, + "grad_norm": 13.689038540116531, + "learning_rate": 1.6646964584996515e-05, + "loss": 1.452, + "step": 48318 + }, + { + "epoch": 0.58, + "grad_norm": 7.4856742684942645, + "learning_rate": 1.6646528126342066e-05, + "loss": 1.1353, + "step": 48321 + }, + { + "epoch": 0.58, + "grad_norm": 89.37464858665032, + "learning_rate": 1.6646091645005403e-05, + "loss": 1.4725, + "step": 48324 + }, + { + "epoch": 0.58, + "grad_norm": 8.967798625994604, + "learning_rate": 1.664565514098803e-05, + "loss": 1.0267, + "step": 48327 + }, + { + "epoch": 0.58, + "grad_norm": 7.182138354962687, + "learning_rate": 1.6645218614291428e-05, + "loss": 1.2193, + "step": 48330 + }, + { + "epoch": 0.58, + "grad_norm": 20.64882085569166, + "learning_rate": 1.6644782064917087e-05, + "loss": 1.3288, + "step": 48333 + }, + { + "epoch": 0.58, + "grad_norm": 13.870772274161807, + "learning_rate": 1.66443454928665e-05, + "loss": 1.4192, + "step": 48336 + }, + { + "epoch": 0.58, + "grad_norm": 14.122194979053305, + "learning_rate": 1.6643908898141154e-05, + "loss": 1.1657, + "step": 48339 + }, + { + "epoch": 0.58, + "grad_norm": 7.788113936015355, + "learning_rate": 1.6643472280742542e-05, + "loss": 1.2332, + "step": 48342 + }, + { + "epoch": 0.58, + "grad_norm": 5.901678673738087, + "learning_rate": 1.664303564067215e-05, + "loss": 1.2896, + "step": 48345 + }, + { + "epoch": 0.58, + "grad_norm": 26.79965346287018, + "learning_rate": 1.664259897793147e-05, + "loss": 1.8221, + "step": 48348 + }, + { + "epoch": 0.58, + "grad_norm": 21.43253994610965, + "learning_rate": 1.6642162292521992e-05, + "loss": 1.5878, + "step": 48351 + }, + { + "epoch": 0.58, + "grad_norm": 4.627992730282869, + "learning_rate": 1.6641725584445207e-05, + "loss": 1.3997, + "step": 48354 + }, + { + "epoch": 0.58, + "grad_norm": 19.390953398485063, + "learning_rate": 1.6641288853702607e-05, + "loss": 1.4015, + "step": 48357 + }, + { + "epoch": 0.58, + "grad_norm": 12.27420693225259, + "learning_rate": 1.6640852100295678e-05, + "loss": 1.6024, + "step": 48360 + }, + { + "epoch": 0.58, + "grad_norm": 7.486097676354942, + "learning_rate": 1.6640415324225915e-05, + "loss": 1.6209, + "step": 48363 + }, + { + "epoch": 0.58, + "grad_norm": 24.33792712877607, + "learning_rate": 1.6639978525494802e-05, + "loss": 1.6142, + "step": 48366 + }, + { + "epoch": 0.58, + "grad_norm": 6.522471416544212, + "learning_rate": 1.663954170410384e-05, + "loss": 1.6374, + "step": 48369 + }, + { + "epoch": 0.58, + "grad_norm": 6.189947365815474, + "learning_rate": 1.663910486005451e-05, + "loss": 1.4225, + "step": 48372 + }, + { + "epoch": 0.58, + "grad_norm": 15.345368084739416, + "learning_rate": 1.6638667993348304e-05, + "loss": 1.4644, + "step": 48375 + }, + { + "epoch": 0.58, + "grad_norm": 8.78922371611564, + "learning_rate": 1.663823110398672e-05, + "loss": 1.6379, + "step": 48378 + }, + { + "epoch": 0.58, + "grad_norm": 11.526271328688226, + "learning_rate": 1.6637794191971244e-05, + "loss": 1.5103, + "step": 48381 + }, + { + "epoch": 0.58, + "grad_norm": 11.280775698202978, + "learning_rate": 1.6637357257303364e-05, + "loss": 1.6419, + "step": 48384 + }, + { + "epoch": 0.58, + "grad_norm": 17.035796761691007, + "learning_rate": 1.6636920299984577e-05, + "loss": 1.7847, + "step": 48387 + }, + { + "epoch": 0.58, + "grad_norm": 39.104058444250036, + "learning_rate": 1.6636483320016372e-05, + "loss": 1.6187, + "step": 48390 + }, + { + "epoch": 0.58, + "grad_norm": 132.11248078942336, + "learning_rate": 1.663604631740024e-05, + "loss": 1.4399, + "step": 48393 + }, + { + "epoch": 0.58, + "grad_norm": 36.29294047438184, + "learning_rate": 1.6635609292137674e-05, + "loss": 1.2776, + "step": 48396 + }, + { + "epoch": 0.58, + "grad_norm": 49.331268600392875, + "learning_rate": 1.663517224423016e-05, + "loss": 1.4837, + "step": 48399 + }, + { + "epoch": 0.58, + "grad_norm": 18.98575201033264, + "learning_rate": 1.6634735173679194e-05, + "loss": 1.235, + "step": 48402 + }, + { + "epoch": 0.58, + "grad_norm": 11.299047010561516, + "learning_rate": 1.6634298080486265e-05, + "loss": 1.5691, + "step": 48405 + }, + { + "epoch": 0.58, + "grad_norm": 8.806758113015322, + "learning_rate": 1.663386096465287e-05, + "loss": 1.4045, + "step": 48408 + }, + { + "epoch": 0.58, + "grad_norm": 14.839424216145579, + "learning_rate": 1.6633423826180496e-05, + "loss": 1.7282, + "step": 48411 + }, + { + "epoch": 0.58, + "grad_norm": 13.944596422672761, + "learning_rate": 1.6632986665070632e-05, + "loss": 1.2979, + "step": 48414 + }, + { + "epoch": 0.58, + "grad_norm": 16.315827184948663, + "learning_rate": 1.663254948132478e-05, + "loss": 2.0024, + "step": 48417 + }, + { + "epoch": 0.58, + "grad_norm": 17.296779333409, + "learning_rate": 1.663211227494442e-05, + "loss": 1.4532, + "step": 48420 + }, + { + "epoch": 0.58, + "grad_norm": 31.140757057844564, + "learning_rate": 1.6631675045931053e-05, + "loss": 1.5605, + "step": 48423 + }, + { + "epoch": 0.58, + "grad_norm": 10.570597806671367, + "learning_rate": 1.6631237794286165e-05, + "loss": 1.5387, + "step": 48426 + }, + { + "epoch": 0.58, + "grad_norm": 10.840717228227197, + "learning_rate": 1.6630800520011253e-05, + "loss": 1.4057, + "step": 48429 + }, + { + "epoch": 0.58, + "grad_norm": 11.139219833177464, + "learning_rate": 1.663036322310781e-05, + "loss": 1.6335, + "step": 48432 + }, + { + "epoch": 0.58, + "grad_norm": 5.994136710603525, + "learning_rate": 1.6629925903577323e-05, + "loss": 1.4811, + "step": 48435 + }, + { + "epoch": 0.58, + "grad_norm": 7.5299877023050374, + "learning_rate": 1.6629488561421285e-05, + "loss": 1.4516, + "step": 48438 + }, + { + "epoch": 0.58, + "grad_norm": 10.451320331294143, + "learning_rate": 1.662905119664119e-05, + "loss": 1.4296, + "step": 48441 + }, + { + "epoch": 0.58, + "grad_norm": 8.304804177125499, + "learning_rate": 1.6628613809238535e-05, + "loss": 1.3431, + "step": 48444 + }, + { + "epoch": 0.58, + "grad_norm": 53.10017084430571, + "learning_rate": 1.6628176399214802e-05, + "loss": 1.9242, + "step": 48447 + }, + { + "epoch": 0.58, + "grad_norm": 17.82530228133288, + "learning_rate": 1.66277389665715e-05, + "loss": 1.2595, + "step": 48450 + }, + { + "epoch": 0.58, + "grad_norm": 7.406777891484329, + "learning_rate": 1.6627301511310106e-05, + "loss": 1.481, + "step": 48453 + }, + { + "epoch": 0.58, + "grad_norm": 13.06293271345368, + "learning_rate": 1.6626864033432123e-05, + "loss": 1.5143, + "step": 48456 + }, + { + "epoch": 0.58, + "grad_norm": 11.613715665670306, + "learning_rate": 1.6626426532939034e-05, + "loss": 1.5669, + "step": 48459 + }, + { + "epoch": 0.58, + "grad_norm": 16.603433987549096, + "learning_rate": 1.6625989009832345e-05, + "loss": 1.2969, + "step": 48462 + }, + { + "epoch": 0.58, + "grad_norm": 25.607352588806783, + "learning_rate": 1.6625551464113538e-05, + "loss": 1.9322, + "step": 48465 + }, + { + "epoch": 0.58, + "grad_norm": 40.55423567428446, + "learning_rate": 1.6625113895784108e-05, + "loss": 1.2844, + "step": 48468 + }, + { + "epoch": 0.58, + "grad_norm": 22.69662431955186, + "learning_rate": 1.6624676304845556e-05, + "loss": 1.6032, + "step": 48471 + }, + { + "epoch": 0.58, + "grad_norm": 15.776906601698034, + "learning_rate": 1.6624238691299367e-05, + "loss": 1.5461, + "step": 48474 + }, + { + "epoch": 0.58, + "grad_norm": 8.293461614930948, + "learning_rate": 1.6623801055147037e-05, + "loss": 1.6346, + "step": 48477 + }, + { + "epoch": 0.58, + "grad_norm": 10.70196853096912, + "learning_rate": 1.6623363396390063e-05, + "loss": 1.0964, + "step": 48480 + }, + { + "epoch": 0.58, + "grad_norm": 9.700594635098845, + "learning_rate": 1.6622925715029935e-05, + "loss": 1.223, + "step": 48483 + }, + { + "epoch": 0.58, + "grad_norm": 4.647706687042592, + "learning_rate": 1.662248801106814e-05, + "loss": 1.2286, + "step": 48486 + }, + { + "epoch": 0.58, + "grad_norm": 9.08354788224024, + "learning_rate": 1.6622050284506184e-05, + "loss": 1.4435, + "step": 48489 + }, + { + "epoch": 0.58, + "grad_norm": 7.802604644021483, + "learning_rate": 1.6621612535345558e-05, + "loss": 1.3656, + "step": 48492 + }, + { + "epoch": 0.58, + "grad_norm": 12.897490592399652, + "learning_rate": 1.6621174763587752e-05, + "loss": 1.2503, + "step": 48495 + }, + { + "epoch": 0.58, + "grad_norm": 7.545454452633894, + "learning_rate": 1.6620736969234262e-05, + "loss": 1.3198, + "step": 48498 + }, + { + "epoch": 0.58, + "grad_norm": 35.35626294924735, + "learning_rate": 1.6620299152286578e-05, + "loss": 1.3605, + "step": 48501 + }, + { + "epoch": 0.58, + "grad_norm": 7.825553705582934, + "learning_rate": 1.66198613127462e-05, + "loss": 1.4292, + "step": 48504 + }, + { + "epoch": 0.58, + "grad_norm": 7.69738800483411, + "learning_rate": 1.661942345061462e-05, + "loss": 1.5945, + "step": 48507 + }, + { + "epoch": 0.58, + "grad_norm": 3.5766034449145288, + "learning_rate": 1.6618985565893334e-05, + "loss": 1.3319, + "step": 48510 + }, + { + "epoch": 0.58, + "grad_norm": 7.76783521449905, + "learning_rate": 1.6618547658583833e-05, + "loss": 1.2098, + "step": 48513 + }, + { + "epoch": 0.58, + "grad_norm": 20.764562056196436, + "learning_rate": 1.6618109728687612e-05, + "loss": 1.23, + "step": 48516 + }, + { + "epoch": 0.58, + "grad_norm": 32.77649658251147, + "learning_rate": 1.6617671776206166e-05, + "loss": 1.4607, + "step": 48519 + }, + { + "epoch": 0.58, + "grad_norm": 33.7460931245227, + "learning_rate": 1.6617233801140988e-05, + "loss": 1.2474, + "step": 48522 + }, + { + "epoch": 0.58, + "grad_norm": 7.324124064035805, + "learning_rate": 1.661679580349358e-05, + "loss": 1.4813, + "step": 48525 + }, + { + "epoch": 0.58, + "grad_norm": 8.182465532304384, + "learning_rate": 1.6616357783265425e-05, + "loss": 1.3823, + "step": 48528 + }, + { + "epoch": 0.58, + "grad_norm": 7.452319207843255, + "learning_rate": 1.661591974045803e-05, + "loss": 1.4643, + "step": 48531 + }, + { + "epoch": 0.58, + "grad_norm": 130.835970587568, + "learning_rate": 1.661548167507288e-05, + "loss": 1.2705, + "step": 48534 + }, + { + "epoch": 0.58, + "grad_norm": 12.596639337061509, + "learning_rate": 1.6615043587111475e-05, + "loss": 1.9071, + "step": 48537 + }, + { + "epoch": 0.58, + "grad_norm": 13.283798519248787, + "learning_rate": 1.661460547657531e-05, + "loss": 1.3645, + "step": 48540 + }, + { + "epoch": 0.58, + "grad_norm": 11.456739695629278, + "learning_rate": 1.6614167343465877e-05, + "loss": 1.6775, + "step": 48543 + }, + { + "epoch": 0.58, + "grad_norm": 15.080794562538848, + "learning_rate": 1.6613729187784676e-05, + "loss": 1.6486, + "step": 48546 + }, + { + "epoch": 0.58, + "grad_norm": 17.304441692464383, + "learning_rate": 1.6613291009533198e-05, + "loss": 1.1256, + "step": 48549 + }, + { + "epoch": 0.58, + "grad_norm": 12.207525049137677, + "learning_rate": 1.661285280871294e-05, + "loss": 1.4755, + "step": 48552 + }, + { + "epoch": 0.58, + "grad_norm": 19.055588794702967, + "learning_rate": 1.6612414585325396e-05, + "loss": 1.5055, + "step": 48555 + }, + { + "epoch": 0.58, + "grad_norm": 442.4222642003403, + "learning_rate": 1.6611976339372065e-05, + "loss": 1.2587, + "step": 48558 + }, + { + "epoch": 0.58, + "grad_norm": 9.110533945682946, + "learning_rate": 1.661153807085444e-05, + "loss": 1.5355, + "step": 48561 + }, + { + "epoch": 0.58, + "grad_norm": 9.503870645655573, + "learning_rate": 1.6611099779774016e-05, + "loss": 1.3141, + "step": 48564 + }, + { + "epoch": 0.58, + "grad_norm": 50.85765077643387, + "learning_rate": 1.661066146613229e-05, + "loss": 1.6946, + "step": 48567 + }, + { + "epoch": 0.58, + "grad_norm": 21.550960744955574, + "learning_rate": 1.661022312993076e-05, + "loss": 1.645, + "step": 48570 + }, + { + "epoch": 0.58, + "grad_norm": 12.38457135382087, + "learning_rate": 1.6609784771170916e-05, + "loss": 1.5742, + "step": 48573 + }, + { + "epoch": 0.58, + "grad_norm": 42.751481501909744, + "learning_rate": 1.660934638985426e-05, + "loss": 1.3777, + "step": 48576 + }, + { + "epoch": 0.58, + "grad_norm": 116.33962498159981, + "learning_rate": 1.6608907985982284e-05, + "loss": 1.4795, + "step": 48579 + }, + { + "epoch": 0.58, + "grad_norm": 15.458470791179753, + "learning_rate": 1.6608469559556484e-05, + "loss": 1.5173, + "step": 48582 + }, + { + "epoch": 0.58, + "grad_norm": 16.94960386514988, + "learning_rate": 1.660803111057836e-05, + "loss": 1.5343, + "step": 48585 + }, + { + "epoch": 0.58, + "grad_norm": 12.924559024838445, + "learning_rate": 1.6607592639049402e-05, + "loss": 1.7924, + "step": 48588 + }, + { + "epoch": 0.58, + "grad_norm": 7.683472232153671, + "learning_rate": 1.660715414497112e-05, + "loss": 1.6875, + "step": 48591 + }, + { + "epoch": 0.58, + "grad_norm": 47.849839262491955, + "learning_rate": 1.660671562834499e-05, + "loss": 1.6121, + "step": 48594 + }, + { + "epoch": 0.58, + "grad_norm": 10.538685633382395, + "learning_rate": 1.6606277089172526e-05, + "loss": 1.1312, + "step": 48597 + }, + { + "epoch": 0.58, + "grad_norm": 17.552679560860447, + "learning_rate": 1.6605838527455212e-05, + "loss": 1.2753, + "step": 48600 + }, + { + "epoch": 0.58, + "grad_norm": 10.954791062226558, + "learning_rate": 1.6605399943194557e-05, + "loss": 1.6294, + "step": 48603 + }, + { + "epoch": 0.58, + "grad_norm": 10.941105619727239, + "learning_rate": 1.6604961336392048e-05, + "loss": 1.1986, + "step": 48606 + }, + { + "epoch": 0.58, + "grad_norm": 28.981892861798485, + "learning_rate": 1.6604522707049186e-05, + "loss": 1.3283, + "step": 48609 + }, + { + "epoch": 0.58, + "grad_norm": 25.063793680476312, + "learning_rate": 1.6604084055167468e-05, + "loss": 1.6222, + "step": 48612 + }, + { + "epoch": 0.58, + "grad_norm": 28.61772015427987, + "learning_rate": 1.6603645380748385e-05, + "loss": 1.3143, + "step": 48615 + }, + { + "epoch": 0.58, + "grad_norm": 8.627662245251157, + "learning_rate": 1.6603206683793446e-05, + "loss": 1.5696, + "step": 48618 + }, + { + "epoch": 0.58, + "grad_norm": 20.74092631415718, + "learning_rate": 1.6602767964304136e-05, + "loss": 1.5016, + "step": 48621 + }, + { + "epoch": 0.58, + "grad_norm": 10.87579773480121, + "learning_rate": 1.660232922228196e-05, + "loss": 1.3639, + "step": 48624 + }, + { + "epoch": 0.58, + "grad_norm": 10.32950965521383, + "learning_rate": 1.6601890457728413e-05, + "loss": 1.6035, + "step": 48627 + }, + { + "epoch": 0.58, + "grad_norm": 9.05189572380024, + "learning_rate": 1.6601451670644986e-05, + "loss": 1.1957, + "step": 48630 + }, + { + "epoch": 0.58, + "grad_norm": 25.80944063749424, + "learning_rate": 1.6601012861033184e-05, + "loss": 1.2366, + "step": 48633 + }, + { + "epoch": 0.58, + "grad_norm": 29.13300061204209, + "learning_rate": 1.660057402889451e-05, + "loss": 1.526, + "step": 48636 + }, + { + "epoch": 0.58, + "grad_norm": 18.918038532694993, + "learning_rate": 1.660013517423045e-05, + "loss": 0.9771, + "step": 48639 + }, + { + "epoch": 0.58, + "grad_norm": 12.207885988568997, + "learning_rate": 1.6599696297042503e-05, + "loss": 1.1111, + "step": 48642 + }, + { + "epoch": 0.58, + "grad_norm": 11.103832462578037, + "learning_rate": 1.6599257397332172e-05, + "loss": 1.4238, + "step": 48645 + }, + { + "epoch": 0.58, + "grad_norm": 17.83934941705402, + "learning_rate": 1.6598818475100953e-05, + "loss": 1.5575, + "step": 48648 + }, + { + "epoch": 0.59, + "grad_norm": 9.228457667344962, + "learning_rate": 1.6598379530350343e-05, + "loss": 1.2556, + "step": 48651 + }, + { + "epoch": 0.59, + "grad_norm": 14.594938630401161, + "learning_rate": 1.659794056308184e-05, + "loss": 1.6158, + "step": 48654 + }, + { + "epoch": 0.59, + "grad_norm": 7.979718191697725, + "learning_rate": 1.6597501573296945e-05, + "loss": 1.5166, + "step": 48657 + }, + { + "epoch": 0.59, + "grad_norm": 6.379866175369343, + "learning_rate": 1.659706256099715e-05, + "loss": 1.2671, + "step": 48660 + }, + { + "epoch": 0.59, + "grad_norm": 5.054349435451875, + "learning_rate": 1.659662352618396e-05, + "loss": 1.6546, + "step": 48663 + }, + { + "epoch": 0.59, + "grad_norm": 7.925828393081374, + "learning_rate": 1.6596184468858867e-05, + "loss": 1.6681, + "step": 48666 + }, + { + "epoch": 0.59, + "grad_norm": 14.371736284111439, + "learning_rate": 1.6595745389023372e-05, + "loss": 1.459, + "step": 48669 + }, + { + "epoch": 0.59, + "grad_norm": 4.30011243260197, + "learning_rate": 1.6595306286678975e-05, + "loss": 1.2244, + "step": 48672 + }, + { + "epoch": 0.59, + "grad_norm": 5.727966292928187, + "learning_rate": 1.6594867161827175e-05, + "loss": 1.4913, + "step": 48675 + }, + { + "epoch": 0.59, + "grad_norm": 9.376225350124733, + "learning_rate": 1.659442801446947e-05, + "loss": 1.4908, + "step": 48678 + }, + { + "epoch": 0.59, + "grad_norm": 27.74433897968848, + "learning_rate": 1.6593988844607354e-05, + "loss": 1.5787, + "step": 48681 + }, + { + "epoch": 0.59, + "grad_norm": 7.9773533735475075, + "learning_rate": 1.6593549652242326e-05, + "loss": 1.5131, + "step": 48684 + }, + { + "epoch": 0.59, + "grad_norm": 27.81474874015468, + "learning_rate": 1.6593110437375894e-05, + "loss": 1.6766, + "step": 48687 + }, + { + "epoch": 0.59, + "grad_norm": 9.921653441841572, + "learning_rate": 1.659267120000955e-05, + "loss": 1.8141, + "step": 48690 + }, + { + "epoch": 0.59, + "grad_norm": 27.314028385256233, + "learning_rate": 1.659223194014479e-05, + "loss": 1.8082, + "step": 48693 + }, + { + "epoch": 0.59, + "grad_norm": 17.630450677670698, + "learning_rate": 1.6591792657783124e-05, + "loss": 1.2435, + "step": 48696 + }, + { + "epoch": 0.59, + "grad_norm": 11.551119120407105, + "learning_rate": 1.6591353352926042e-05, + "loss": 1.5785, + "step": 48699 + }, + { + "epoch": 0.59, + "grad_norm": 22.378366217153918, + "learning_rate": 1.659091402557504e-05, + "loss": 1.5617, + "step": 48702 + }, + { + "epoch": 0.59, + "grad_norm": 13.05882131411998, + "learning_rate": 1.659047467573163e-05, + "loss": 1.3913, + "step": 48705 + }, + { + "epoch": 0.59, + "grad_norm": 9.428520161875502, + "learning_rate": 1.6590035303397293e-05, + "loss": 1.2201, + "step": 48708 + }, + { + "epoch": 0.59, + "grad_norm": 10.56205596671307, + "learning_rate": 1.6589595908573548e-05, + "loss": 1.1197, + "step": 48711 + }, + { + "epoch": 0.59, + "grad_norm": 25.183293439811628, + "learning_rate": 1.6589156491261884e-05, + "loss": 1.6841, + "step": 48714 + }, + { + "epoch": 0.59, + "grad_norm": 23.085053144138474, + "learning_rate": 1.65887170514638e-05, + "loss": 1.4632, + "step": 48717 + }, + { + "epoch": 0.59, + "grad_norm": 3.7522674216994747, + "learning_rate": 1.6588277589180803e-05, + "loss": 1.4509, + "step": 48720 + }, + { + "epoch": 0.59, + "grad_norm": 11.146982277357852, + "learning_rate": 1.658783810441438e-05, + "loss": 1.6906, + "step": 48723 + }, + { + "epoch": 0.59, + "grad_norm": 53.07435816557561, + "learning_rate": 1.6587398597166045e-05, + "loss": 1.4508, + "step": 48726 + }, + { + "epoch": 0.59, + "grad_norm": 8.00750400554068, + "learning_rate": 1.6586959067437287e-05, + "loss": 1.3459, + "step": 48729 + }, + { + "epoch": 0.59, + "grad_norm": 4.829373808989827, + "learning_rate": 1.6586519515229613e-05, + "loss": 1.65, + "step": 48732 + }, + { + "epoch": 0.59, + "grad_norm": 9.459793578698136, + "learning_rate": 1.658607994054452e-05, + "loss": 1.5286, + "step": 48735 + }, + { + "epoch": 0.59, + "grad_norm": 15.747989452950641, + "learning_rate": 1.6585640343383506e-05, + "loss": 1.4539, + "step": 48738 + }, + { + "epoch": 0.59, + "grad_norm": 12.886623615140682, + "learning_rate": 1.658520072374808e-05, + "loss": 1.3562, + "step": 48741 + }, + { + "epoch": 0.59, + "grad_norm": 12.272723755152484, + "learning_rate": 1.658476108163973e-05, + "loss": 1.6112, + "step": 48744 + }, + { + "epoch": 0.59, + "grad_norm": 6.973065768058948, + "learning_rate": 1.6584321417059962e-05, + "loss": 1.3423, + "step": 48747 + }, + { + "epoch": 0.59, + "grad_norm": 27.575066328466775, + "learning_rate": 1.6583881730010277e-05, + "loss": 1.5089, + "step": 48750 + }, + { + "epoch": 0.59, + "grad_norm": 25.861520791406985, + "learning_rate": 1.658344202049218e-05, + "loss": 1.6628, + "step": 48753 + }, + { + "epoch": 0.59, + "grad_norm": 7.703713413718127, + "learning_rate": 1.6583002288507163e-05, + "loss": 1.25, + "step": 48756 + }, + { + "epoch": 0.59, + "grad_norm": 13.870782222225383, + "learning_rate": 1.6582562534056732e-05, + "loss": 1.3005, + "step": 48759 + }, + { + "epoch": 0.59, + "grad_norm": 12.937832634568517, + "learning_rate": 1.6582122757142384e-05, + "loss": 1.5386, + "step": 48762 + }, + { + "epoch": 0.59, + "grad_norm": 73.87444672829103, + "learning_rate": 1.658168295776562e-05, + "loss": 1.2021, + "step": 48765 + }, + { + "epoch": 0.59, + "grad_norm": 18.67804823089237, + "learning_rate": 1.658124313592795e-05, + "loss": 1.8853, + "step": 48768 + }, + { + "epoch": 0.59, + "grad_norm": 12.536123090777124, + "learning_rate": 1.6580803291630862e-05, + "loss": 1.4777, + "step": 48771 + }, + { + "epoch": 0.59, + "grad_norm": 10.714584427894149, + "learning_rate": 1.6580363424875863e-05, + "loss": 1.3242, + "step": 48774 + }, + { + "epoch": 0.59, + "grad_norm": 8.58409314927338, + "learning_rate": 1.6579923535664456e-05, + "loss": 1.3681, + "step": 48777 + }, + { + "epoch": 0.59, + "grad_norm": 11.19962617892179, + "learning_rate": 1.657948362399814e-05, + "loss": 1.3706, + "step": 48780 + }, + { + "epoch": 0.59, + "grad_norm": 3.5270740389274695, + "learning_rate": 1.6579043689878414e-05, + "loss": 1.3257, + "step": 48783 + }, + { + "epoch": 0.59, + "grad_norm": 11.703640033517196, + "learning_rate": 1.6578603733306782e-05, + "loss": 1.4153, + "step": 48786 + }, + { + "epoch": 0.59, + "grad_norm": 41.58764426938287, + "learning_rate": 1.6578163754284746e-05, + "loss": 1.173, + "step": 48789 + }, + { + "epoch": 0.59, + "grad_norm": 24.390875161270408, + "learning_rate": 1.657772375281381e-05, + "loss": 1.2592, + "step": 48792 + }, + { + "epoch": 0.59, + "grad_norm": 21.28909927134897, + "learning_rate": 1.6577283728895465e-05, + "loss": 1.4429, + "step": 48795 + }, + { + "epoch": 0.59, + "grad_norm": 14.64654673025463, + "learning_rate": 1.6576843682531225e-05, + "loss": 1.3326, + "step": 48798 + }, + { + "epoch": 0.59, + "grad_norm": 9.176843683493367, + "learning_rate": 1.6576403613722588e-05, + "loss": 1.8191, + "step": 48801 + }, + { + "epoch": 0.59, + "grad_norm": 12.257643573879376, + "learning_rate": 1.6575963522471048e-05, + "loss": 1.3857, + "step": 48804 + }, + { + "epoch": 0.59, + "grad_norm": 11.151887911456937, + "learning_rate": 1.6575523408778118e-05, + "loss": 1.1278, + "step": 48807 + }, + { + "epoch": 0.59, + "grad_norm": 4.0431602275531215, + "learning_rate": 1.6575083272645293e-05, + "loss": 1.2282, + "step": 48810 + }, + { + "epoch": 0.59, + "grad_norm": 47.66946786606994, + "learning_rate": 1.6574643114074077e-05, + "loss": 1.3947, + "step": 48813 + }, + { + "epoch": 0.59, + "grad_norm": 4.595739849873324, + "learning_rate": 1.6574202933065972e-05, + "loss": 1.4514, + "step": 48816 + }, + { + "epoch": 0.59, + "grad_norm": 89.17989944838675, + "learning_rate": 1.657376272962248e-05, + "loss": 1.4989, + "step": 48819 + }, + { + "epoch": 0.59, + "grad_norm": 14.366616551089596, + "learning_rate": 1.657332250374511e-05, + "loss": 1.222, + "step": 48822 + }, + { + "epoch": 0.59, + "grad_norm": 4.990693536789605, + "learning_rate": 1.657288225543535e-05, + "loss": 1.2803, + "step": 48825 + }, + { + "epoch": 0.59, + "grad_norm": 28.154442683488508, + "learning_rate": 1.6572441984694713e-05, + "loss": 1.3759, + "step": 48828 + }, + { + "epoch": 0.59, + "grad_norm": 18.868479690313716, + "learning_rate": 1.65720016915247e-05, + "loss": 1.3898, + "step": 48831 + }, + { + "epoch": 0.59, + "grad_norm": 12.217441416195792, + "learning_rate": 1.657156137592681e-05, + "loss": 1.6168, + "step": 48834 + }, + { + "epoch": 0.59, + "grad_norm": 28.360682917184032, + "learning_rate": 1.657112103790255e-05, + "loss": 1.6257, + "step": 48837 + }, + { + "epoch": 0.59, + "grad_norm": 9.684417095734087, + "learning_rate": 1.657068067745342e-05, + "loss": 1.459, + "step": 48840 + }, + { + "epoch": 0.59, + "grad_norm": 87.93081743141813, + "learning_rate": 1.6570240294580927e-05, + "loss": 1.4167, + "step": 48843 + }, + { + "epoch": 0.59, + "grad_norm": 21.979573429869646, + "learning_rate": 1.6569799889286565e-05, + "loss": 1.5938, + "step": 48846 + }, + { + "epoch": 0.59, + "grad_norm": 391.76134116733857, + "learning_rate": 1.6569359461571847e-05, + "loss": 1.1029, + "step": 48849 + }, + { + "epoch": 0.59, + "grad_norm": 18.077397763735846, + "learning_rate": 1.6568919011438267e-05, + "loss": 1.3888, + "step": 48852 + }, + { + "epoch": 0.59, + "grad_norm": 23.761509359950757, + "learning_rate": 1.6568478538887337e-05, + "loss": 1.2429, + "step": 48855 + }, + { + "epoch": 0.59, + "grad_norm": 6.494279406484359, + "learning_rate": 1.6568038043920554e-05, + "loss": 1.6868, + "step": 48858 + }, + { + "epoch": 0.59, + "grad_norm": 13.040989340391782, + "learning_rate": 1.6567597526539423e-05, + "loss": 1.3904, + "step": 48861 + }, + { + "epoch": 0.59, + "grad_norm": 44.208503484935285, + "learning_rate": 1.6567156986745447e-05, + "loss": 1.4831, + "step": 48864 + }, + { + "epoch": 0.59, + "grad_norm": 8.027899429685446, + "learning_rate": 1.656671642454013e-05, + "loss": 1.4606, + "step": 48867 + }, + { + "epoch": 0.59, + "grad_norm": 4.624472208997189, + "learning_rate": 1.6566275839924975e-05, + "loss": 1.3486, + "step": 48870 + }, + { + "epoch": 0.59, + "grad_norm": 20.968875623952492, + "learning_rate": 1.6565835232901483e-05, + "loss": 1.4801, + "step": 48873 + }, + { + "epoch": 0.59, + "grad_norm": 7.137907908043441, + "learning_rate": 1.6565394603471167e-05, + "loss": 0.9784, + "step": 48876 + }, + { + "epoch": 0.59, + "grad_norm": 6.161498446795187, + "learning_rate": 1.6564953951635515e-05, + "loss": 1.4372, + "step": 48879 + }, + { + "epoch": 0.59, + "grad_norm": 20.577987670191256, + "learning_rate": 1.6564513277396046e-05, + "loss": 1.3025, + "step": 48882 + }, + { + "epoch": 0.59, + "grad_norm": 6.627441781207024, + "learning_rate": 1.656407258075426e-05, + "loss": 1.5361, + "step": 48885 + }, + { + "epoch": 0.59, + "grad_norm": 9.590292398023712, + "learning_rate": 1.6563631861711652e-05, + "loss": 1.1518, + "step": 48888 + }, + { + "epoch": 0.59, + "grad_norm": 26.273876559371704, + "learning_rate": 1.6563191120269733e-05, + "loss": 1.4541, + "step": 48891 + }, + { + "epoch": 0.59, + "grad_norm": 16.171661325432094, + "learning_rate": 1.6562750356430014e-05, + "loss": 1.1983, + "step": 48894 + }, + { + "epoch": 0.59, + "grad_norm": 20.655085477353673, + "learning_rate": 1.6562309570193988e-05, + "loss": 1.3917, + "step": 48897 + }, + { + "epoch": 0.59, + "grad_norm": 7.552785172894407, + "learning_rate": 1.656186876156316e-05, + "loss": 1.3927, + "step": 48900 + }, + { + "epoch": 0.59, + "grad_norm": 4.890646656626561, + "learning_rate": 1.656142793053904e-05, + "loss": 1.2453, + "step": 48903 + }, + { + "epoch": 0.59, + "grad_norm": 27.211724423090967, + "learning_rate": 1.656098707712313e-05, + "loss": 1.4248, + "step": 48906 + }, + { + "epoch": 0.59, + "grad_norm": 11.171722739401721, + "learning_rate": 1.6560546201316934e-05, + "loss": 1.3319, + "step": 48909 + }, + { + "epoch": 0.59, + "grad_norm": 22.59351470952449, + "learning_rate": 1.656010530312196e-05, + "loss": 1.4172, + "step": 48912 + }, + { + "epoch": 0.59, + "grad_norm": 27.552242603339266, + "learning_rate": 1.6559664382539705e-05, + "loss": 1.4874, + "step": 48915 + }, + { + "epoch": 0.59, + "grad_norm": 4.762270058807133, + "learning_rate": 1.655922343957168e-05, + "loss": 1.3928, + "step": 48918 + }, + { + "epoch": 0.59, + "grad_norm": 9.103199889224394, + "learning_rate": 1.6558782474219387e-05, + "loss": 1.2205, + "step": 48921 + }, + { + "epoch": 0.59, + "grad_norm": 16.1685994754858, + "learning_rate": 1.6558341486484333e-05, + "loss": 1.3958, + "step": 48924 + }, + { + "epoch": 0.59, + "grad_norm": 23.601381318309535, + "learning_rate": 1.6557900476368022e-05, + "loss": 1.1163, + "step": 48927 + }, + { + "epoch": 0.59, + "grad_norm": 6.063064569597319, + "learning_rate": 1.655745944387196e-05, + "loss": 1.1724, + "step": 48930 + }, + { + "epoch": 0.59, + "grad_norm": 13.245094416388206, + "learning_rate": 1.655701838899765e-05, + "loss": 1.5436, + "step": 48933 + }, + { + "epoch": 0.59, + "grad_norm": 24.154924817426565, + "learning_rate": 1.6556577311746597e-05, + "loss": 1.3281, + "step": 48936 + }, + { + "epoch": 0.59, + "grad_norm": 11.519659148790069, + "learning_rate": 1.6556136212120307e-05, + "loss": 1.3792, + "step": 48939 + }, + { + "epoch": 0.59, + "grad_norm": 23.077317089420607, + "learning_rate": 1.655569509012029e-05, + "loss": 1.3465, + "step": 48942 + }, + { + "epoch": 0.59, + "grad_norm": 13.394022196290098, + "learning_rate": 1.655525394574804e-05, + "loss": 1.0982, + "step": 48945 + }, + { + "epoch": 0.59, + "grad_norm": 7.838262467045342, + "learning_rate": 1.6554812779005074e-05, + "loss": 1.2853, + "step": 48948 + }, + { + "epoch": 0.59, + "grad_norm": 5.649292910310614, + "learning_rate": 1.655437158989289e-05, + "loss": 1.68, + "step": 48951 + }, + { + "epoch": 0.59, + "grad_norm": 7.863489110786024, + "learning_rate": 1.6553930378413002e-05, + "loss": 1.5545, + "step": 48954 + }, + { + "epoch": 0.59, + "grad_norm": 28.19858441116339, + "learning_rate": 1.6553489144566904e-05, + "loss": 1.3143, + "step": 48957 + }, + { + "epoch": 0.59, + "grad_norm": 10.303825876319069, + "learning_rate": 1.6553047888356114e-05, + "loss": 1.463, + "step": 48960 + }, + { + "epoch": 0.59, + "grad_norm": 12.436514703559684, + "learning_rate": 1.6552606609782127e-05, + "loss": 1.0966, + "step": 48963 + }, + { + "epoch": 0.59, + "grad_norm": 22.794235193904242, + "learning_rate": 1.6552165308846456e-05, + "loss": 1.4798, + "step": 48966 + }, + { + "epoch": 0.59, + "grad_norm": 11.480173099229551, + "learning_rate": 1.6551723985550606e-05, + "loss": 1.6074, + "step": 48969 + }, + { + "epoch": 0.59, + "grad_norm": 25.178022187529038, + "learning_rate": 1.655128263989608e-05, + "loss": 1.33, + "step": 48972 + }, + { + "epoch": 0.59, + "grad_norm": 32.02322482451742, + "learning_rate": 1.6550841271884386e-05, + "loss": 1.4299, + "step": 48975 + }, + { + "epoch": 0.59, + "grad_norm": 8.69411235346018, + "learning_rate": 1.655039988151703e-05, + "loss": 1.5216, + "step": 48978 + }, + { + "epoch": 0.59, + "grad_norm": 6.4982918380095995, + "learning_rate": 1.654995846879552e-05, + "loss": 1.6259, + "step": 48981 + }, + { + "epoch": 0.59, + "grad_norm": 14.747665210241482, + "learning_rate": 1.6549517033721362e-05, + "loss": 1.3589, + "step": 48984 + }, + { + "epoch": 0.59, + "grad_norm": 9.570883385169687, + "learning_rate": 1.654907557629606e-05, + "loss": 1.3524, + "step": 48987 + }, + { + "epoch": 0.59, + "grad_norm": 47.02538218991213, + "learning_rate": 1.6548634096521125e-05, + "loss": 1.2211, + "step": 48990 + }, + { + "epoch": 0.59, + "grad_norm": 34.294942901592634, + "learning_rate": 1.6548192594398056e-05, + "loss": 1.492, + "step": 48993 + }, + { + "epoch": 0.59, + "grad_norm": 53.711115307590084, + "learning_rate": 1.6547751069928367e-05, + "loss": 1.4365, + "step": 48996 + }, + { + "epoch": 0.59, + "grad_norm": 81.12080110686993, + "learning_rate": 1.654730952311356e-05, + "loss": 1.4519, + "step": 48999 + }, + { + "epoch": 0.59, + "grad_norm": 21.794781967949064, + "learning_rate": 1.6546867953955148e-05, + "loss": 1.2275, + "step": 49002 + }, + { + "epoch": 0.59, + "grad_norm": 81.03142787862161, + "learning_rate": 1.654642636245463e-05, + "loss": 1.435, + "step": 49005 + }, + { + "epoch": 0.59, + "grad_norm": 12.184783355436771, + "learning_rate": 1.6545984748613516e-05, + "loss": 1.3215, + "step": 49008 + }, + { + "epoch": 0.59, + "grad_norm": 8.99604278741864, + "learning_rate": 1.6545543112433317e-05, + "loss": 1.0151, + "step": 49011 + }, + { + "epoch": 0.59, + "grad_norm": 6.980568241614791, + "learning_rate": 1.654510145391554e-05, + "loss": 1.6473, + "step": 49014 + }, + { + "epoch": 0.59, + "grad_norm": 27.460931121509898, + "learning_rate": 1.654465977306168e-05, + "loss": 1.5882, + "step": 49017 + }, + { + "epoch": 0.59, + "grad_norm": 29.089099972581025, + "learning_rate": 1.654421806987326e-05, + "loss": 1.3734, + "step": 49020 + }, + { + "epoch": 0.59, + "grad_norm": 10.436373406801179, + "learning_rate": 1.6543776344351782e-05, + "loss": 0.9337, + "step": 49023 + }, + { + "epoch": 0.59, + "grad_norm": 25.069963274200678, + "learning_rate": 1.654333459649875e-05, + "loss": 1.5862, + "step": 49026 + }, + { + "epoch": 0.59, + "grad_norm": 13.81999384126508, + "learning_rate": 1.6542892826315672e-05, + "loss": 1.4098, + "step": 49029 + }, + { + "epoch": 0.59, + "grad_norm": 44.36629660470508, + "learning_rate": 1.654245103380406e-05, + "loss": 1.4843, + "step": 49032 + }, + { + "epoch": 0.59, + "grad_norm": 2.8629515917403885, + "learning_rate": 1.6542009218965416e-05, + "loss": 1.4379, + "step": 49035 + }, + { + "epoch": 0.59, + "grad_norm": 12.762093575378112, + "learning_rate": 1.6541567381801257e-05, + "loss": 1.4491, + "step": 49038 + }, + { + "epoch": 0.59, + "grad_norm": 12.297251070220788, + "learning_rate": 1.654112552231308e-05, + "loss": 1.445, + "step": 49041 + }, + { + "epoch": 0.59, + "grad_norm": 6.204719232475593, + "learning_rate": 1.65406836405024e-05, + "loss": 1.2659, + "step": 49044 + }, + { + "epoch": 0.59, + "grad_norm": 9.88901444755646, + "learning_rate": 1.6540241736370722e-05, + "loss": 1.5262, + "step": 49047 + }, + { + "epoch": 0.59, + "grad_norm": 53.69739174700939, + "learning_rate": 1.6539799809919556e-05, + "loss": 1.3908, + "step": 49050 + }, + { + "epoch": 0.59, + "grad_norm": 45.152273411489574, + "learning_rate": 1.6539357861150407e-05, + "loss": 1.4, + "step": 49053 + }, + { + "epoch": 0.59, + "grad_norm": 31.905813452171273, + "learning_rate": 1.6538915890064785e-05, + "loss": 1.826, + "step": 49056 + }, + { + "epoch": 0.59, + "grad_norm": 7.4449923325996705, + "learning_rate": 1.65384738966642e-05, + "loss": 1.7112, + "step": 49059 + }, + { + "epoch": 0.59, + "grad_norm": 9.265160040586004, + "learning_rate": 1.6538031880950157e-05, + "loss": 1.2209, + "step": 49062 + }, + { + "epoch": 0.59, + "grad_norm": 15.597097832711025, + "learning_rate": 1.6537589842924166e-05, + "loss": 1.2008, + "step": 49065 + }, + { + "epoch": 0.59, + "grad_norm": 15.887075540949514, + "learning_rate": 1.6537147782587736e-05, + "loss": 1.1197, + "step": 49068 + }, + { + "epoch": 0.59, + "grad_norm": 37.125449960820006, + "learning_rate": 1.6536705699942377e-05, + "loss": 1.5684, + "step": 49071 + }, + { + "epoch": 0.59, + "grad_norm": 9.904763697285766, + "learning_rate": 1.6536263594989595e-05, + "loss": 1.1465, + "step": 49074 + }, + { + "epoch": 0.59, + "grad_norm": 5.742097661078106, + "learning_rate": 1.6535821467730898e-05, + "loss": 1.4409, + "step": 49077 + }, + { + "epoch": 0.59, + "grad_norm": 6.08280973279089, + "learning_rate": 1.6535379318167798e-05, + "loss": 1.5144, + "step": 49080 + }, + { + "epoch": 0.59, + "grad_norm": 58.362733760711706, + "learning_rate": 1.6534937146301805e-05, + "loss": 1.4188, + "step": 49083 + }, + { + "epoch": 0.59, + "grad_norm": 10.08222306708274, + "learning_rate": 1.653449495213442e-05, + "loss": 1.5986, + "step": 49086 + }, + { + "epoch": 0.59, + "grad_norm": 77.09647603411727, + "learning_rate": 1.6534052735667163e-05, + "loss": 1.2656, + "step": 49089 + }, + { + "epoch": 0.59, + "grad_norm": 20.55395298933627, + "learning_rate": 1.6533610496901532e-05, + "loss": 1.5191, + "step": 49092 + }, + { + "epoch": 0.59, + "grad_norm": 14.49629196841103, + "learning_rate": 1.6533168235839047e-05, + "loss": 1.6981, + "step": 49095 + }, + { + "epoch": 0.59, + "grad_norm": 13.419482366292529, + "learning_rate": 1.653272595248121e-05, + "loss": 1.6234, + "step": 49098 + }, + { + "epoch": 0.59, + "grad_norm": 16.584974783998796, + "learning_rate": 1.6532283646829532e-05, + "loss": 1.9187, + "step": 49101 + }, + { + "epoch": 0.59, + "grad_norm": 5.812168843892121, + "learning_rate": 1.6531841318885525e-05, + "loss": 1.3453, + "step": 49104 + }, + { + "epoch": 0.59, + "grad_norm": 14.904846909295033, + "learning_rate": 1.6531398968650694e-05, + "loss": 1.5321, + "step": 49107 + }, + { + "epoch": 0.59, + "grad_norm": 23.501498453809607, + "learning_rate": 1.653095659612655e-05, + "loss": 1.6426, + "step": 49110 + }, + { + "epoch": 0.59, + "grad_norm": 31.23302631798059, + "learning_rate": 1.6530514201314604e-05, + "loss": 1.7397, + "step": 49113 + }, + { + "epoch": 0.59, + "grad_norm": 58.19653494078656, + "learning_rate": 1.6530071784216367e-05, + "loss": 1.2012, + "step": 49116 + }, + { + "epoch": 0.59, + "grad_norm": 6.57354931875348, + "learning_rate": 1.6529629344833345e-05, + "loss": 1.2218, + "step": 49119 + }, + { + "epoch": 0.59, + "grad_norm": 23.01210290087854, + "learning_rate": 1.6529186883167054e-05, + "loss": 1.1404, + "step": 49122 + }, + { + "epoch": 0.59, + "grad_norm": 2.720115692627212, + "learning_rate": 1.6528744399218998e-05, + "loss": 1.3785, + "step": 49125 + }, + { + "epoch": 0.59, + "grad_norm": 7.943495820133954, + "learning_rate": 1.6528301892990683e-05, + "loss": 1.3104, + "step": 49128 + }, + { + "epoch": 0.59, + "grad_norm": 13.567456393489215, + "learning_rate": 1.6527859364483633e-05, + "loss": 1.6097, + "step": 49131 + }, + { + "epoch": 0.59, + "grad_norm": 21.43126421702015, + "learning_rate": 1.6527416813699345e-05, + "loss": 1.7045, + "step": 49134 + }, + { + "epoch": 0.59, + "grad_norm": 38.75226075253929, + "learning_rate": 1.652697424063934e-05, + "loss": 1.5191, + "step": 49137 + }, + { + "epoch": 0.59, + "grad_norm": 71.30672882904045, + "learning_rate": 1.652653164530512e-05, + "loss": 1.2325, + "step": 49140 + }, + { + "epoch": 0.59, + "grad_norm": 7.387675270154399, + "learning_rate": 1.65260890276982e-05, + "loss": 2.0291, + "step": 49143 + }, + { + "epoch": 0.59, + "grad_norm": 7.05378759701312, + "learning_rate": 1.652564638782009e-05, + "loss": 1.3783, + "step": 49146 + }, + { + "epoch": 0.59, + "grad_norm": 8.45541136512628, + "learning_rate": 1.6525203725672293e-05, + "loss": 1.4065, + "step": 49149 + }, + { + "epoch": 0.59, + "grad_norm": 61.31423215252804, + "learning_rate": 1.652476104125633e-05, + "loss": 1.3809, + "step": 49152 + }, + { + "epoch": 0.59, + "grad_norm": 5.956586067604147, + "learning_rate": 1.6524318334573704e-05, + "loss": 1.2617, + "step": 49155 + }, + { + "epoch": 0.59, + "grad_norm": 8.692667921368455, + "learning_rate": 1.6523875605625936e-05, + "loss": 1.3966, + "step": 49158 + }, + { + "epoch": 0.59, + "grad_norm": 9.230359140810402, + "learning_rate": 1.6523432854414527e-05, + "loss": 1.3482, + "step": 49161 + }, + { + "epoch": 0.59, + "grad_norm": 16.940637009173752, + "learning_rate": 1.6522990080940995e-05, + "loss": 1.494, + "step": 49164 + }, + { + "epoch": 0.59, + "grad_norm": 11.367193039599753, + "learning_rate": 1.6522547285206844e-05, + "loss": 1.5365, + "step": 49167 + }, + { + "epoch": 0.59, + "grad_norm": 6.966086086920985, + "learning_rate": 1.6522104467213585e-05, + "loss": 1.4384, + "step": 49170 + }, + { + "epoch": 0.59, + "grad_norm": 7.744136684988597, + "learning_rate": 1.652166162696274e-05, + "loss": 1.6633, + "step": 49173 + }, + { + "epoch": 0.59, + "grad_norm": 14.829429142584921, + "learning_rate": 1.652121876445581e-05, + "loss": 1.3883, + "step": 49176 + }, + { + "epoch": 0.59, + "grad_norm": 15.615255790575045, + "learning_rate": 1.652077587969431e-05, + "loss": 1.4509, + "step": 49179 + }, + { + "epoch": 0.59, + "grad_norm": 38.077897153715, + "learning_rate": 1.6520332972679747e-05, + "loss": 1.3052, + "step": 49182 + }, + { + "epoch": 0.59, + "grad_norm": 10.031774486430768, + "learning_rate": 1.6519890043413637e-05, + "loss": 1.2517, + "step": 49185 + }, + { + "epoch": 0.59, + "grad_norm": 9.25675177258907, + "learning_rate": 1.6519447091897493e-05, + "loss": 1.3902, + "step": 49188 + }, + { + "epoch": 0.59, + "grad_norm": 8.736208220462748, + "learning_rate": 1.6519004118132825e-05, + "loss": 1.2384, + "step": 49191 + }, + { + "epoch": 0.59, + "grad_norm": 4.972020802132298, + "learning_rate": 1.6518561122121143e-05, + "loss": 1.5032, + "step": 49194 + }, + { + "epoch": 0.59, + "grad_norm": 28.121024215378323, + "learning_rate": 1.651811810386396e-05, + "loss": 1.3169, + "step": 49197 + }, + { + "epoch": 0.59, + "grad_norm": 3.1561673072186904, + "learning_rate": 1.651767506336279e-05, + "loss": 1.6511, + "step": 49200 + }, + { + "epoch": 0.59, + "grad_norm": 17.053545372578135, + "learning_rate": 1.6517232000619136e-05, + "loss": 1.4119, + "step": 49203 + }, + { + "epoch": 0.59, + "grad_norm": 20.913714729908474, + "learning_rate": 1.6516788915634523e-05, + "loss": 1.1876, + "step": 49206 + }, + { + "epoch": 0.59, + "grad_norm": 43.71640054654275, + "learning_rate": 1.6516345808410453e-05, + "loss": 1.3384, + "step": 49209 + }, + { + "epoch": 0.59, + "grad_norm": 9.44735749266866, + "learning_rate": 1.6515902678948442e-05, + "loss": 1.3785, + "step": 49212 + }, + { + "epoch": 0.59, + "grad_norm": 14.68301503963431, + "learning_rate": 1.6515459527250006e-05, + "loss": 1.584, + "step": 49215 + }, + { + "epoch": 0.59, + "grad_norm": 40.1119436996787, + "learning_rate": 1.6515016353316654e-05, + "loss": 1.5216, + "step": 49218 + }, + { + "epoch": 0.59, + "grad_norm": 8.14228011605078, + "learning_rate": 1.6514573157149893e-05, + "loss": 1.2148, + "step": 49221 + }, + { + "epoch": 0.59, + "grad_norm": 17.197933947981106, + "learning_rate": 1.651412993875124e-05, + "loss": 1.3269, + "step": 49224 + }, + { + "epoch": 0.59, + "grad_norm": 4.912011037934096, + "learning_rate": 1.651368669812221e-05, + "loss": 1.2365, + "step": 49227 + }, + { + "epoch": 0.59, + "grad_norm": 9.62445075458875, + "learning_rate": 1.6513243435264316e-05, + "loss": 1.3236, + "step": 49230 + }, + { + "epoch": 0.59, + "grad_norm": 29.777553975402434, + "learning_rate": 1.6512800150179066e-05, + "loss": 1.296, + "step": 49233 + }, + { + "epoch": 0.59, + "grad_norm": 6.672007690615161, + "learning_rate": 1.6512356842867975e-05, + "loss": 1.0743, + "step": 49236 + }, + { + "epoch": 0.59, + "grad_norm": 11.49195393729079, + "learning_rate": 1.6511913513332557e-05, + "loss": 1.3356, + "step": 49239 + }, + { + "epoch": 0.59, + "grad_norm": 27.187616327850453, + "learning_rate": 1.6511470161574324e-05, + "loss": 1.3767, + "step": 49242 + }, + { + "epoch": 0.59, + "grad_norm": 34.775959339663, + "learning_rate": 1.6511026787594785e-05, + "loss": 1.2006, + "step": 49245 + }, + { + "epoch": 0.59, + "grad_norm": 4.997105793117209, + "learning_rate": 1.6510583391395457e-05, + "loss": 1.3134, + "step": 49248 + }, + { + "epoch": 0.59, + "grad_norm": 13.905726118333126, + "learning_rate": 1.6510139972977856e-05, + "loss": 1.6279, + "step": 49251 + }, + { + "epoch": 0.59, + "grad_norm": 11.28652248514834, + "learning_rate": 1.650969653234349e-05, + "loss": 1.3464, + "step": 49254 + }, + { + "epoch": 0.59, + "grad_norm": 7.1441895599319265, + "learning_rate": 1.650925306949388e-05, + "loss": 1.2271, + "step": 49257 + }, + { + "epoch": 0.59, + "grad_norm": 6.175820257989553, + "learning_rate": 1.650880958443053e-05, + "loss": 1.3669, + "step": 49260 + }, + { + "epoch": 0.59, + "grad_norm": 20.27563928076319, + "learning_rate": 1.6508366077154957e-05, + "loss": 1.4684, + "step": 49263 + }, + { + "epoch": 0.59, + "grad_norm": 6.352076641732571, + "learning_rate": 1.6507922547668676e-05, + "loss": 1.4952, + "step": 49266 + }, + { + "epoch": 0.59, + "grad_norm": 25.39054084418976, + "learning_rate": 1.65074789959732e-05, + "loss": 1.363, + "step": 49269 + }, + { + "epoch": 0.59, + "grad_norm": 9.175776063005655, + "learning_rate": 1.6507035422070038e-05, + "loss": 1.5141, + "step": 49272 + }, + { + "epoch": 0.59, + "grad_norm": 5.712517094657255, + "learning_rate": 1.650659182596071e-05, + "loss": 1.1207, + "step": 49275 + }, + { + "epoch": 0.59, + "grad_norm": 6.034034270073471, + "learning_rate": 1.6506148207646728e-05, + "loss": 1.9516, + "step": 49278 + }, + { + "epoch": 0.59, + "grad_norm": 66.87124256111588, + "learning_rate": 1.6505704567129607e-05, + "loss": 1.6181, + "step": 49281 + }, + { + "epoch": 0.59, + "grad_norm": 11.092724644727703, + "learning_rate": 1.650526090441086e-05, + "loss": 1.5198, + "step": 49284 + }, + { + "epoch": 0.59, + "grad_norm": 3.6996150489994695, + "learning_rate": 1.6504817219491998e-05, + "loss": 1.3687, + "step": 49287 + }, + { + "epoch": 0.59, + "grad_norm": 9.823968518058281, + "learning_rate": 1.650437351237454e-05, + "loss": 1.5507, + "step": 49290 + }, + { + "epoch": 0.59, + "grad_norm": 23.63808946561504, + "learning_rate": 1.6503929783059997e-05, + "loss": 1.7945, + "step": 49293 + }, + { + "epoch": 0.59, + "grad_norm": 27.079437035833717, + "learning_rate": 1.6503486031549884e-05, + "loss": 1.3669, + "step": 49296 + }, + { + "epoch": 0.59, + "grad_norm": 6.98934823121675, + "learning_rate": 1.650304225784572e-05, + "loss": 1.4997, + "step": 49299 + }, + { + "epoch": 0.59, + "grad_norm": 9.38554209259394, + "learning_rate": 1.650259846194901e-05, + "loss": 1.5286, + "step": 49302 + }, + { + "epoch": 0.59, + "grad_norm": 6.674597738952675, + "learning_rate": 1.6502154643861273e-05, + "loss": 1.7903, + "step": 49305 + }, + { + "epoch": 0.59, + "grad_norm": 23.16517240274434, + "learning_rate": 1.650171080358403e-05, + "loss": 1.5394, + "step": 49308 + }, + { + "epoch": 0.59, + "grad_norm": 33.86825308434076, + "learning_rate": 1.6501266941118786e-05, + "loss": 1.3534, + "step": 49311 + }, + { + "epoch": 0.59, + "grad_norm": 9.815077342493268, + "learning_rate": 1.6500823056467058e-05, + "loss": 1.414, + "step": 49314 + }, + { + "epoch": 0.59, + "grad_norm": 20.75553489691952, + "learning_rate": 1.6500379149630367e-05, + "loss": 1.256, + "step": 49317 + }, + { + "epoch": 0.59, + "grad_norm": 12.519089344748059, + "learning_rate": 1.649993522061022e-05, + "loss": 1.2241, + "step": 49320 + }, + { + "epoch": 0.59, + "grad_norm": 70.4738588856643, + "learning_rate": 1.6499491269408135e-05, + "loss": 1.2332, + "step": 49323 + }, + { + "epoch": 0.59, + "grad_norm": 9.967692120437285, + "learning_rate": 1.6499047296025632e-05, + "loss": 1.3311, + "step": 49326 + }, + { + "epoch": 0.59, + "grad_norm": 67.37276048641957, + "learning_rate": 1.6498603300464216e-05, + "loss": 1.7627, + "step": 49329 + }, + { + "epoch": 0.59, + "grad_norm": 35.00406306157572, + "learning_rate": 1.649815928272541e-05, + "loss": 1.8059, + "step": 49332 + }, + { + "epoch": 0.59, + "grad_norm": 5.295529605864472, + "learning_rate": 1.6497715242810726e-05, + "loss": 1.1577, + "step": 49335 + }, + { + "epoch": 0.59, + "grad_norm": 23.36483656909225, + "learning_rate": 1.6497271180721682e-05, + "loss": 1.417, + "step": 49338 + }, + { + "epoch": 0.59, + "grad_norm": 8.212529405233271, + "learning_rate": 1.6496827096459792e-05, + "loss": 1.2885, + "step": 49341 + }, + { + "epoch": 0.59, + "grad_norm": 8.519563969892978, + "learning_rate": 1.649638299002657e-05, + "loss": 1.7075, + "step": 49344 + }, + { + "epoch": 0.59, + "grad_norm": 38.94445414367927, + "learning_rate": 1.6495938861423535e-05, + "loss": 1.3893, + "step": 49347 + }, + { + "epoch": 0.59, + "grad_norm": 28.52394840110377, + "learning_rate": 1.6495494710652198e-05, + "loss": 1.2858, + "step": 49350 + }, + { + "epoch": 0.59, + "grad_norm": 20.10523593414208, + "learning_rate": 1.649505053771408e-05, + "loss": 1.2943, + "step": 49353 + }, + { + "epoch": 0.59, + "grad_norm": 4.356957314366495, + "learning_rate": 1.649460634261069e-05, + "loss": 1.4248, + "step": 49356 + }, + { + "epoch": 0.59, + "grad_norm": 27.30622673003368, + "learning_rate": 1.649416212534355e-05, + "loss": 1.6206, + "step": 49359 + }, + { + "epoch": 0.59, + "grad_norm": 9.644581092739866, + "learning_rate": 1.6493717885914176e-05, + "loss": 1.3765, + "step": 49362 + }, + { + "epoch": 0.59, + "grad_norm": 8.384166419568464, + "learning_rate": 1.649327362432408e-05, + "loss": 1.4711, + "step": 49365 + }, + { + "epoch": 0.59, + "grad_norm": 5.987836101467919, + "learning_rate": 1.6492829340574775e-05, + "loss": 1.5971, + "step": 49368 + }, + { + "epoch": 0.59, + "grad_norm": 8.580331359325452, + "learning_rate": 1.6492385034667792e-05, + "loss": 1.646, + "step": 49371 + }, + { + "epoch": 0.59, + "grad_norm": 51.34717124672964, + "learning_rate": 1.649194070660463e-05, + "loss": 1.3034, + "step": 49374 + }, + { + "epoch": 0.59, + "grad_norm": 17.471241264182748, + "learning_rate": 1.6491496356386814e-05, + "loss": 1.4183, + "step": 49377 + }, + { + "epoch": 0.59, + "grad_norm": 9.195807577376058, + "learning_rate": 1.649105198401586e-05, + "loss": 1.7837, + "step": 49380 + }, + { + "epoch": 0.59, + "grad_norm": 12.79690507860944, + "learning_rate": 1.6490607589493284e-05, + "loss": 1.3879, + "step": 49383 + }, + { + "epoch": 0.59, + "grad_norm": 30.274711038259454, + "learning_rate": 1.6490163172820602e-05, + "loss": 1.3479, + "step": 49386 + }, + { + "epoch": 0.59, + "grad_norm": 4.549623691552068, + "learning_rate": 1.6489718733999333e-05, + "loss": 1.5595, + "step": 49389 + }, + { + "epoch": 0.59, + "grad_norm": 20.079185401266066, + "learning_rate": 1.6489274273030988e-05, + "loss": 1.7881, + "step": 49392 + }, + { + "epoch": 0.59, + "grad_norm": 19.01536739559351, + "learning_rate": 1.648882978991709e-05, + "loss": 1.4561, + "step": 49395 + }, + { + "epoch": 0.59, + "grad_norm": 14.681439717486244, + "learning_rate": 1.6488385284659153e-05, + "loss": 1.1887, + "step": 49398 + }, + { + "epoch": 0.59, + "grad_norm": 6.269722069250157, + "learning_rate": 1.648794075725869e-05, + "loss": 1.3304, + "step": 49401 + }, + { + "epoch": 0.59, + "grad_norm": 11.097993329745439, + "learning_rate": 1.6487496207717226e-05, + "loss": 1.4037, + "step": 49404 + }, + { + "epoch": 0.59, + "grad_norm": 15.32892173087697, + "learning_rate": 1.6487051636036274e-05, + "loss": 1.4785, + "step": 49407 + }, + { + "epoch": 0.59, + "grad_norm": 31.2789640234985, + "learning_rate": 1.648660704221735e-05, + "loss": 1.2045, + "step": 49410 + }, + { + "epoch": 0.59, + "grad_norm": 7.987546196790258, + "learning_rate": 1.6486162426261973e-05, + "loss": 1.4316, + "step": 49413 + }, + { + "epoch": 0.59, + "grad_norm": 16.40435703103304, + "learning_rate": 1.648571778817166e-05, + "loss": 1.303, + "step": 49416 + }, + { + "epoch": 0.59, + "grad_norm": 4.41508125416618, + "learning_rate": 1.648527312794793e-05, + "loss": 1.2612, + "step": 49419 + }, + { + "epoch": 0.59, + "grad_norm": 10.151427557393736, + "learning_rate": 1.64848284455923e-05, + "loss": 1.6416, + "step": 49422 + }, + { + "epoch": 0.59, + "grad_norm": 48.06508830108594, + "learning_rate": 1.648438374110628e-05, + "loss": 1.1482, + "step": 49425 + }, + { + "epoch": 0.59, + "grad_norm": 23.114763732262176, + "learning_rate": 1.64839390144914e-05, + "loss": 1.4431, + "step": 49428 + }, + { + "epoch": 0.59, + "grad_norm": 37.274778315715864, + "learning_rate": 1.648349426574917e-05, + "loss": 1.3638, + "step": 49431 + }, + { + "epoch": 0.59, + "grad_norm": 13.65607444159035, + "learning_rate": 1.648304949488111e-05, + "loss": 1.3931, + "step": 49434 + }, + { + "epoch": 0.59, + "grad_norm": 81.04444202652124, + "learning_rate": 1.6482604701888734e-05, + "loss": 1.5568, + "step": 49437 + }, + { + "epoch": 0.59, + "grad_norm": 27.124997392521916, + "learning_rate": 1.6482159886773565e-05, + "loss": 1.6549, + "step": 49440 + }, + { + "epoch": 0.59, + "grad_norm": 5.104805879483553, + "learning_rate": 1.648171504953712e-05, + "loss": 1.452, + "step": 49443 + }, + { + "epoch": 0.59, + "grad_norm": 7.895401615183695, + "learning_rate": 1.6481270190180915e-05, + "loss": 1.5693, + "step": 49446 + }, + { + "epoch": 0.59, + "grad_norm": 4.664705831239788, + "learning_rate": 1.6480825308706472e-05, + "loss": 1.448, + "step": 49449 + }, + { + "epoch": 0.59, + "grad_norm": 15.266872838067991, + "learning_rate": 1.6480380405115304e-05, + "loss": 1.5516, + "step": 49452 + }, + { + "epoch": 0.59, + "grad_norm": 25.580361086557442, + "learning_rate": 1.6479935479408932e-05, + "loss": 1.0774, + "step": 49455 + }, + { + "epoch": 0.59, + "grad_norm": 11.487732587289456, + "learning_rate": 1.6479490531588872e-05, + "loss": 1.3802, + "step": 49458 + }, + { + "epoch": 0.59, + "grad_norm": 9.827977992483309, + "learning_rate": 1.647904556165665e-05, + "loss": 1.5736, + "step": 49461 + }, + { + "epoch": 0.59, + "grad_norm": 26.717080171017514, + "learning_rate": 1.6478600569613775e-05, + "loss": 1.115, + "step": 49464 + }, + { + "epoch": 0.59, + "grad_norm": 16.587069419263667, + "learning_rate": 1.6478155555461772e-05, + "loss": 1.2826, + "step": 49467 + }, + { + "epoch": 0.59, + "grad_norm": 7.0152256398503186, + "learning_rate": 1.6477710519202157e-05, + "loss": 1.4023, + "step": 49470 + }, + { + "epoch": 0.59, + "grad_norm": 25.05085919773006, + "learning_rate": 1.647726546083645e-05, + "loss": 1.3911, + "step": 49473 + }, + { + "epoch": 0.59, + "grad_norm": 16.9933207003609, + "learning_rate": 1.6476820380366168e-05, + "loss": 1.7849, + "step": 49476 + }, + { + "epoch": 0.59, + "grad_norm": 12.838969068015283, + "learning_rate": 1.6476375277792832e-05, + "loss": 1.3369, + "step": 49479 + }, + { + "epoch": 0.6, + "grad_norm": 18.997754299698308, + "learning_rate": 1.647593015311796e-05, + "loss": 1.7, + "step": 49482 + }, + { + "epoch": 0.6, + "grad_norm": 11.039515849622132, + "learning_rate": 1.647548500634307e-05, + "loss": 1.5861, + "step": 49485 + }, + { + "epoch": 0.6, + "grad_norm": 13.26559138614799, + "learning_rate": 1.6475039837469685e-05, + "loss": 1.3247, + "step": 49488 + }, + { + "epoch": 0.6, + "grad_norm": 8.613165424550447, + "learning_rate": 1.6474594646499317e-05, + "loss": 1.4166, + "step": 49491 + }, + { + "epoch": 0.6, + "grad_norm": 47.930466788226624, + "learning_rate": 1.6474149433433494e-05, + "loss": 1.7797, + "step": 49494 + }, + { + "epoch": 0.6, + "grad_norm": 5.053897337231069, + "learning_rate": 1.6473704198273725e-05, + "loss": 1.6367, + "step": 49497 + }, + { + "epoch": 0.6, + "grad_norm": 6.96928174978197, + "learning_rate": 1.647325894102154e-05, + "loss": 1.3971, + "step": 49500 + }, + { + "epoch": 0.6, + "grad_norm": 29.451505287596966, + "learning_rate": 1.6472813661678453e-05, + "loss": 1.6681, + "step": 49503 + }, + { + "epoch": 0.6, + "grad_norm": 11.732183238432796, + "learning_rate": 1.6472368360245986e-05, + "loss": 1.1609, + "step": 49506 + }, + { + "epoch": 0.6, + "grad_norm": 5.236548528533754, + "learning_rate": 1.6471923036725655e-05, + "loss": 1.5269, + "step": 49509 + }, + { + "epoch": 0.6, + "grad_norm": 23.202617024270914, + "learning_rate": 1.6471477691118983e-05, + "loss": 1.4751, + "step": 49512 + }, + { + "epoch": 0.6, + "grad_norm": 15.973490467894424, + "learning_rate": 1.6471032323427486e-05, + "loss": 1.0826, + "step": 49515 + }, + { + "epoch": 0.6, + "grad_norm": 16.154572980758797, + "learning_rate": 1.6470586933652687e-05, + "loss": 1.2667, + "step": 49518 + }, + { + "epoch": 0.6, + "grad_norm": 12.084110357288644, + "learning_rate": 1.647014152179611e-05, + "loss": 1.5242, + "step": 49521 + }, + { + "epoch": 0.6, + "grad_norm": 6.165050668150791, + "learning_rate": 1.6469696087859267e-05, + "loss": 1.5144, + "step": 49524 + }, + { + "epoch": 0.6, + "grad_norm": 23.691407585022827, + "learning_rate": 1.646925063184368e-05, + "loss": 1.4317, + "step": 49527 + }, + { + "epoch": 0.6, + "grad_norm": 3.8777215313614137, + "learning_rate": 1.6468805153750874e-05, + "loss": 1.4183, + "step": 49530 + }, + { + "epoch": 0.6, + "grad_norm": 77.3028218035196, + "learning_rate": 1.6468359653582363e-05, + "loss": 1.2437, + "step": 49533 + }, + { + "epoch": 0.6, + "grad_norm": 85.21474099941771, + "learning_rate": 1.6467914131339675e-05, + "loss": 1.3406, + "step": 49536 + }, + { + "epoch": 0.6, + "grad_norm": 14.854113403957625, + "learning_rate": 1.6467468587024323e-05, + "loss": 1.4282, + "step": 49539 + }, + { + "epoch": 0.6, + "grad_norm": 22.817054779403634, + "learning_rate": 1.6467023020637832e-05, + "loss": 1.5576, + "step": 49542 + }, + { + "epoch": 0.6, + "grad_norm": 8.36905776487888, + "learning_rate": 1.646657743218172e-05, + "loss": 1.2459, + "step": 49545 + }, + { + "epoch": 0.6, + "grad_norm": 21.613732834415718, + "learning_rate": 1.6466131821657507e-05, + "loss": 1.5614, + "step": 49548 + }, + { + "epoch": 0.6, + "grad_norm": 140.12096020594475, + "learning_rate": 1.6465686189066714e-05, + "loss": 1.5562, + "step": 49551 + }, + { + "epoch": 0.6, + "grad_norm": 5.855705751098027, + "learning_rate": 1.6465240534410866e-05, + "loss": 1.2751, + "step": 49554 + }, + { + "epoch": 0.6, + "grad_norm": 13.186993884311192, + "learning_rate": 1.646479485769148e-05, + "loss": 1.4018, + "step": 49557 + }, + { + "epoch": 0.6, + "grad_norm": 17.063386937516576, + "learning_rate": 1.6464349158910074e-05, + "loss": 1.4993, + "step": 49560 + }, + { + "epoch": 0.6, + "grad_norm": 12.173720885976133, + "learning_rate": 1.646390343806818e-05, + "loss": 1.558, + "step": 49563 + }, + { + "epoch": 0.6, + "grad_norm": 7.000140861715971, + "learning_rate": 1.6463457695167307e-05, + "loss": 1.5478, + "step": 49566 + }, + { + "epoch": 0.6, + "grad_norm": 46.7082975015168, + "learning_rate": 1.6463011930208982e-05, + "loss": 1.4343, + "step": 49569 + }, + { + "epoch": 0.6, + "grad_norm": 15.441883482839076, + "learning_rate": 1.6462566143194726e-05, + "loss": 1.1641, + "step": 49572 + }, + { + "epoch": 0.6, + "grad_norm": 16.925216702091074, + "learning_rate": 1.646212033412606e-05, + "loss": 1.5997, + "step": 49575 + }, + { + "epoch": 0.6, + "grad_norm": 2.4394152286224027, + "learning_rate": 1.6461674503004503e-05, + "loss": 1.3741, + "step": 49578 + }, + { + "epoch": 0.6, + "grad_norm": 34.941069173548165, + "learning_rate": 1.646122864983158e-05, + "loss": 1.267, + "step": 49581 + }, + { + "epoch": 0.6, + "grad_norm": 11.49708441113008, + "learning_rate": 1.646078277460881e-05, + "loss": 1.4845, + "step": 49584 + }, + { + "epoch": 0.6, + "grad_norm": 13.96863509760377, + "learning_rate": 1.6460336877337713e-05, + "loss": 1.4317, + "step": 49587 + }, + { + "epoch": 0.6, + "grad_norm": 6.550915841952577, + "learning_rate": 1.6459890958019817e-05, + "loss": 1.4493, + "step": 49590 + }, + { + "epoch": 0.6, + "grad_norm": 8.349121501139638, + "learning_rate": 1.645944501665664e-05, + "loss": 1.1456, + "step": 49593 + }, + { + "epoch": 0.6, + "grad_norm": 10.256732733834712, + "learning_rate": 1.6458999053249702e-05, + "loss": 1.4336, + "step": 49596 + }, + { + "epoch": 0.6, + "grad_norm": 9.997760596282248, + "learning_rate": 1.6458553067800525e-05, + "loss": 1.4708, + "step": 49599 + }, + { + "epoch": 0.6, + "grad_norm": 21.551739062434166, + "learning_rate": 1.6458107060310636e-05, + "loss": 1.1013, + "step": 49602 + }, + { + "epoch": 0.6, + "grad_norm": 7.269940881099002, + "learning_rate": 1.6457661030781557e-05, + "loss": 1.1333, + "step": 49605 + }, + { + "epoch": 0.6, + "grad_norm": 167.4600981745482, + "learning_rate": 1.64572149792148e-05, + "loss": 1.2538, + "step": 49608 + }, + { + "epoch": 0.6, + "grad_norm": 23.861220782999055, + "learning_rate": 1.64567689056119e-05, + "loss": 1.221, + "step": 49611 + }, + { + "epoch": 0.6, + "grad_norm": 31.37693059614833, + "learning_rate": 1.645632280997437e-05, + "loss": 1.4309, + "step": 49614 + }, + { + "epoch": 0.6, + "grad_norm": 21.412493664476482, + "learning_rate": 1.6455876692303735e-05, + "loss": 1.2084, + "step": 49617 + }, + { + "epoch": 0.6, + "grad_norm": 18.828995290205828, + "learning_rate": 1.645543055260152e-05, + "loss": 1.6871, + "step": 49620 + }, + { + "epoch": 0.6, + "grad_norm": 29.557056607092818, + "learning_rate": 1.6454984390869246e-05, + "loss": 1.2315, + "step": 49623 + }, + { + "epoch": 0.6, + "grad_norm": 34.34103306396167, + "learning_rate": 1.6454538207108436e-05, + "loss": 1.302, + "step": 49626 + }, + { + "epoch": 0.6, + "grad_norm": 11.621895956583492, + "learning_rate": 1.6454092001320606e-05, + "loss": 1.3331, + "step": 49629 + }, + { + "epoch": 0.6, + "grad_norm": 12.774492290400232, + "learning_rate": 1.6453645773507293e-05, + "loss": 1.1237, + "step": 49632 + }, + { + "epoch": 0.6, + "grad_norm": 9.107098361359983, + "learning_rate": 1.6453199523670006e-05, + "loss": 1.7293, + "step": 49635 + }, + { + "epoch": 0.6, + "grad_norm": 24.989527496939765, + "learning_rate": 1.645275325181028e-05, + "loss": 1.3696, + "step": 49638 + }, + { + "epoch": 0.6, + "grad_norm": 6.230154892622996, + "learning_rate": 1.6452306957929626e-05, + "loss": 1.2251, + "step": 49641 + }, + { + "epoch": 0.6, + "grad_norm": 9.254273744233856, + "learning_rate": 1.6451860642029573e-05, + "loss": 1.3381, + "step": 49644 + }, + { + "epoch": 0.6, + "grad_norm": 30.101465114173294, + "learning_rate": 1.6451414304111643e-05, + "loss": 1.2698, + "step": 49647 + }, + { + "epoch": 0.6, + "grad_norm": 37.806805122058684, + "learning_rate": 1.6450967944177362e-05, + "loss": 1.5854, + "step": 49650 + }, + { + "epoch": 0.6, + "grad_norm": 7.09849706812837, + "learning_rate": 1.645052156222825e-05, + "loss": 1.2838, + "step": 49653 + }, + { + "epoch": 0.6, + "grad_norm": 18.90714022950217, + "learning_rate": 1.645007515826583e-05, + "loss": 1.1597, + "step": 49656 + }, + { + "epoch": 0.6, + "grad_norm": 18.489149433909112, + "learning_rate": 1.644962873229163e-05, + "loss": 1.38, + "step": 49659 + }, + { + "epoch": 0.6, + "grad_norm": 9.893462654313216, + "learning_rate": 1.6449182284307167e-05, + "loss": 1.3447, + "step": 49662 + }, + { + "epoch": 0.6, + "grad_norm": 32.12186321009008, + "learning_rate": 1.6448735814313968e-05, + "loss": 1.4898, + "step": 49665 + }, + { + "epoch": 0.6, + "grad_norm": 6.954280078012246, + "learning_rate": 1.6448289322313556e-05, + "loss": 1.3343, + "step": 49668 + }, + { + "epoch": 0.6, + "grad_norm": 50.359477367056456, + "learning_rate": 1.6447842808307458e-05, + "loss": 1.4971, + "step": 49671 + }, + { + "epoch": 0.6, + "grad_norm": 20.616364910807203, + "learning_rate": 1.6447396272297196e-05, + "loss": 1.472, + "step": 49674 + }, + { + "epoch": 0.6, + "grad_norm": 14.978860220674754, + "learning_rate": 1.644694971428429e-05, + "loss": 1.4932, + "step": 49677 + }, + { + "epoch": 0.6, + "grad_norm": 33.11356076971444, + "learning_rate": 1.6446503134270268e-05, + "loss": 1.1819, + "step": 49680 + }, + { + "epoch": 0.6, + "grad_norm": 6.565819691488512, + "learning_rate": 1.644605653225665e-05, + "loss": 1.3842, + "step": 49683 + }, + { + "epoch": 0.6, + "grad_norm": 31.10647747065367, + "learning_rate": 1.6445609908244968e-05, + "loss": 1.1847, + "step": 49686 + }, + { + "epoch": 0.6, + "grad_norm": 29.779970312756383, + "learning_rate": 1.6445163262236737e-05, + "loss": 1.661, + "step": 49689 + }, + { + "epoch": 0.6, + "grad_norm": 25.300776167156148, + "learning_rate": 1.6444716594233486e-05, + "loss": 1.7479, + "step": 49692 + }, + { + "epoch": 0.6, + "grad_norm": 11.100854884507088, + "learning_rate": 1.644426990423674e-05, + "loss": 1.1278, + "step": 49695 + }, + { + "epoch": 0.6, + "grad_norm": 17.155031664144513, + "learning_rate": 1.644382319224802e-05, + "loss": 1.6368, + "step": 49698 + }, + { + "epoch": 0.6, + "grad_norm": 11.20822365053341, + "learning_rate": 1.6443376458268857e-05, + "loss": 1.5943, + "step": 49701 + }, + { + "epoch": 0.6, + "grad_norm": 28.697821329438707, + "learning_rate": 1.644292970230077e-05, + "loss": 1.9997, + "step": 49704 + }, + { + "epoch": 0.6, + "grad_norm": 11.05647769992175, + "learning_rate": 1.644248292434528e-05, + "loss": 1.2309, + "step": 49707 + }, + { + "epoch": 0.6, + "grad_norm": 22.04796670109066, + "learning_rate": 1.6442036124403917e-05, + "loss": 1.754, + "step": 49710 + }, + { + "epoch": 0.6, + "grad_norm": 8.975198329622126, + "learning_rate": 1.644158930247821e-05, + "loss": 1.4671, + "step": 49713 + }, + { + "epoch": 0.6, + "grad_norm": 20.808263520614652, + "learning_rate": 1.6441142458569674e-05, + "loss": 1.2147, + "step": 49716 + }, + { + "epoch": 0.6, + "grad_norm": 9.600124240078937, + "learning_rate": 1.6440695592679842e-05, + "loss": 1.115, + "step": 49719 + }, + { + "epoch": 0.6, + "grad_norm": 7.426166174952041, + "learning_rate": 1.6440248704810234e-05, + "loss": 1.5465, + "step": 49722 + }, + { + "epoch": 0.6, + "grad_norm": 51.87006036953824, + "learning_rate": 1.643980179496238e-05, + "loss": 1.1478, + "step": 49725 + }, + { + "epoch": 0.6, + "grad_norm": 21.21775363942195, + "learning_rate": 1.6439354863137796e-05, + "loss": 1.4133, + "step": 49728 + }, + { + "epoch": 0.6, + "grad_norm": 10.272882319225916, + "learning_rate": 1.643890790933802e-05, + "loss": 1.3462, + "step": 49731 + }, + { + "epoch": 0.6, + "grad_norm": 19.97857540986968, + "learning_rate": 1.6438460933564565e-05, + "loss": 1.224, + "step": 49734 + }, + { + "epoch": 0.6, + "grad_norm": 9.088946644523118, + "learning_rate": 1.6438013935818964e-05, + "loss": 1.2209, + "step": 49737 + }, + { + "epoch": 0.6, + "grad_norm": 28.74159888794377, + "learning_rate": 1.643756691610274e-05, + "loss": 1.5388, + "step": 49740 + }, + { + "epoch": 0.6, + "grad_norm": 23.81384366410018, + "learning_rate": 1.6437119874417425e-05, + "loss": 1.3335, + "step": 49743 + }, + { + "epoch": 0.6, + "grad_norm": 5.900828938003933, + "learning_rate": 1.6436672810764533e-05, + "loss": 1.3923, + "step": 49746 + }, + { + "epoch": 0.6, + "grad_norm": 14.941212589751316, + "learning_rate": 1.6436225725145593e-05, + "loss": 1.4719, + "step": 49749 + }, + { + "epoch": 0.6, + "grad_norm": 28.953678799845687, + "learning_rate": 1.6435778617562136e-05, + "loss": 1.6649, + "step": 49752 + }, + { + "epoch": 0.6, + "grad_norm": 11.92193722593535, + "learning_rate": 1.6435331488015684e-05, + "loss": 1.2495, + "step": 49755 + }, + { + "epoch": 0.6, + "grad_norm": 46.250798118190474, + "learning_rate": 1.6434884336507762e-05, + "loss": 1.4785, + "step": 49758 + }, + { + "epoch": 0.6, + "grad_norm": 52.08381986101798, + "learning_rate": 1.64344371630399e-05, + "loss": 1.7366, + "step": 49761 + }, + { + "epoch": 0.6, + "grad_norm": 5.631708098247227, + "learning_rate": 1.6433989967613622e-05, + "loss": 1.354, + "step": 49764 + }, + { + "epoch": 0.6, + "grad_norm": 12.088041143489669, + "learning_rate": 1.6433542750230454e-05, + "loss": 1.6479, + "step": 49767 + }, + { + "epoch": 0.6, + "grad_norm": 6.8732101665499785, + "learning_rate": 1.643309551089192e-05, + "loss": 1.1381, + "step": 49770 + }, + { + "epoch": 0.6, + "grad_norm": 8.21994663197925, + "learning_rate": 1.643264824959955e-05, + "loss": 1.1418, + "step": 49773 + }, + { + "epoch": 0.6, + "grad_norm": 6.241751691560509, + "learning_rate": 1.6432200966354866e-05, + "loss": 1.471, + "step": 49776 + }, + { + "epoch": 0.6, + "grad_norm": 11.88510083975984, + "learning_rate": 1.6431753661159403e-05, + "loss": 1.5888, + "step": 49779 + }, + { + "epoch": 0.6, + "grad_norm": 16.816043976782588, + "learning_rate": 1.6431306334014672e-05, + "loss": 1.2199, + "step": 49782 + }, + { + "epoch": 0.6, + "grad_norm": 10.587869240939222, + "learning_rate": 1.6430858984922217e-05, + "loss": 1.6622, + "step": 49785 + }, + { + "epoch": 0.6, + "grad_norm": 27.333692617614194, + "learning_rate": 1.6430411613883555e-05, + "loss": 1.4416, + "step": 49788 + }, + { + "epoch": 0.6, + "grad_norm": 66.08701115653272, + "learning_rate": 1.642996422090021e-05, + "loss": 1.3089, + "step": 49791 + }, + { + "epoch": 0.6, + "grad_norm": 8.7398202824874, + "learning_rate": 1.6429516805973716e-05, + "loss": 1.3335, + "step": 49794 + }, + { + "epoch": 0.6, + "grad_norm": 19.148743224600864, + "learning_rate": 1.6429069369105598e-05, + "loss": 1.5888, + "step": 49797 + }, + { + "epoch": 0.6, + "grad_norm": 19.097991137706362, + "learning_rate": 1.6428621910297382e-05, + "loss": 1.5562, + "step": 49800 + }, + { + "epoch": 0.6, + "grad_norm": 22.10328728799654, + "learning_rate": 1.6428174429550593e-05, + "loss": 1.5729, + "step": 49803 + }, + { + "epoch": 0.6, + "grad_norm": 18.074148044521824, + "learning_rate": 1.642772692686676e-05, + "loss": 1.7011, + "step": 49806 + }, + { + "epoch": 0.6, + "grad_norm": 4.9058498485823145, + "learning_rate": 1.6427279402247416e-05, + "loss": 1.4863, + "step": 49809 + }, + { + "epoch": 0.6, + "grad_norm": 18.43215527463647, + "learning_rate": 1.6426831855694077e-05, + "loss": 1.5224, + "step": 49812 + }, + { + "epoch": 0.6, + "grad_norm": 21.922110396568566, + "learning_rate": 1.6426384287208278e-05, + "loss": 1.2362, + "step": 49815 + }, + { + "epoch": 0.6, + "grad_norm": 16.785845970811458, + "learning_rate": 1.6425936696791542e-05, + "loss": 1.4996, + "step": 49818 + }, + { + "epoch": 0.6, + "grad_norm": 30.50916946121705, + "learning_rate": 1.6425489084445397e-05, + "loss": 1.7459, + "step": 49821 + }, + { + "epoch": 0.6, + "grad_norm": 11.338202774580802, + "learning_rate": 1.6425041450171376e-05, + "loss": 1.0782, + "step": 49824 + }, + { + "epoch": 0.6, + "grad_norm": 20.64719751233764, + "learning_rate": 1.6424593793971e-05, + "loss": 1.5641, + "step": 49827 + }, + { + "epoch": 0.6, + "grad_norm": 2.2588572089326715, + "learning_rate": 1.6424146115845806e-05, + "loss": 1.7465, + "step": 49830 + }, + { + "epoch": 0.6, + "grad_norm": 37.598853338134646, + "learning_rate": 1.6423698415797308e-05, + "loss": 1.3393, + "step": 49833 + }, + { + "epoch": 0.6, + "grad_norm": 13.31051296309131, + "learning_rate": 1.6423250693827048e-05, + "loss": 1.5429, + "step": 49836 + }, + { + "epoch": 0.6, + "grad_norm": 10.804145699675383, + "learning_rate": 1.642280294993654e-05, + "loss": 1.3355, + "step": 49839 + }, + { + "epoch": 0.6, + "grad_norm": 7.690294591877128, + "learning_rate": 1.642235518412732e-05, + "loss": 1.1081, + "step": 49842 + }, + { + "epoch": 0.6, + "grad_norm": 23.33422850549979, + "learning_rate": 1.6421907396400918e-05, + "loss": 1.4087, + "step": 49845 + }, + { + "epoch": 0.6, + "grad_norm": 38.96141778319247, + "learning_rate": 1.642145958675886e-05, + "loss": 1.5829, + "step": 49848 + }, + { + "epoch": 0.6, + "grad_norm": 16.29113643673504, + "learning_rate": 1.642101175520267e-05, + "loss": 1.3096, + "step": 49851 + }, + { + "epoch": 0.6, + "grad_norm": 6.048855998461419, + "learning_rate": 1.642056390173388e-05, + "loss": 1.4848, + "step": 49854 + }, + { + "epoch": 0.6, + "grad_norm": 27.898111804229554, + "learning_rate": 1.642011602635402e-05, + "loss": 1.3064, + "step": 49857 + }, + { + "epoch": 0.6, + "grad_norm": 4.9573496836313815, + "learning_rate": 1.6419668129064616e-05, + "loss": 1.5032, + "step": 49860 + }, + { + "epoch": 0.6, + "grad_norm": 18.19782127434297, + "learning_rate": 1.6419220209867197e-05, + "loss": 1.6346, + "step": 49863 + }, + { + "epoch": 0.6, + "grad_norm": 5.4370386853310055, + "learning_rate": 1.6418772268763293e-05, + "loss": 1.2565, + "step": 49866 + }, + { + "epoch": 0.6, + "grad_norm": 46.94941548526806, + "learning_rate": 1.641832430575443e-05, + "loss": 1.6616, + "step": 49869 + }, + { + "epoch": 0.6, + "grad_norm": 14.461091187862767, + "learning_rate": 1.6417876320842133e-05, + "loss": 1.2768, + "step": 49872 + }, + { + "epoch": 0.6, + "grad_norm": 35.14535719048593, + "learning_rate": 1.6417428314027944e-05, + "loss": 1.2971, + "step": 49875 + }, + { + "epoch": 0.6, + "grad_norm": 17.012183628187515, + "learning_rate": 1.641698028531338e-05, + "loss": 1.2177, + "step": 49878 + }, + { + "epoch": 0.6, + "grad_norm": 21.157180979705, + "learning_rate": 1.6416532234699974e-05, + "loss": 1.3472, + "step": 49881 + }, + { + "epoch": 0.6, + "grad_norm": 4.566647124702278, + "learning_rate": 1.6416084162189255e-05, + "loss": 1.4651, + "step": 49884 + }, + { + "epoch": 0.6, + "grad_norm": 21.294835544521828, + "learning_rate": 1.641563606778275e-05, + "loss": 1.6129, + "step": 49887 + }, + { + "epoch": 0.6, + "grad_norm": 36.52246176374313, + "learning_rate": 1.6415187951481992e-05, + "loss": 1.345, + "step": 49890 + }, + { + "epoch": 0.6, + "grad_norm": 10.055418148293745, + "learning_rate": 1.6414739813288507e-05, + "loss": 1.2937, + "step": 49893 + }, + { + "epoch": 0.6, + "grad_norm": 34.31045494559671, + "learning_rate": 1.6414291653203828e-05, + "loss": 0.989, + "step": 49896 + }, + { + "epoch": 0.6, + "grad_norm": 9.089239117146226, + "learning_rate": 1.6413843471229477e-05, + "loss": 1.5191, + "step": 49899 + }, + { + "epoch": 0.6, + "grad_norm": 16.775979206170266, + "learning_rate": 1.6413395267366997e-05, + "loss": 1.4563, + "step": 49902 + }, + { + "epoch": 0.6, + "grad_norm": 22.758698217178335, + "learning_rate": 1.6412947041617902e-05, + "loss": 1.2691, + "step": 49905 + }, + { + "epoch": 0.6, + "grad_norm": 9.046133929020257, + "learning_rate": 1.641249879398373e-05, + "loss": 1.3244, + "step": 49908 + }, + { + "epoch": 0.6, + "grad_norm": 7.409085913988411, + "learning_rate": 1.641205052446601e-05, + "loss": 1.417, + "step": 49911 + }, + { + "epoch": 0.6, + "grad_norm": 15.755443968104927, + "learning_rate": 1.641160223306627e-05, + "loss": 1.4536, + "step": 49914 + }, + { + "epoch": 0.6, + "grad_norm": 10.360376869832322, + "learning_rate": 1.641115391978604e-05, + "loss": 1.0588, + "step": 49917 + }, + { + "epoch": 0.6, + "grad_norm": 8.423299919247926, + "learning_rate": 1.6410705584626855e-05, + "loss": 1.8271, + "step": 49920 + }, + { + "epoch": 0.6, + "grad_norm": 42.7799468277965, + "learning_rate": 1.641025722759024e-05, + "loss": 1.3865, + "step": 49923 + }, + { + "epoch": 0.6, + "grad_norm": 13.207103334747687, + "learning_rate": 1.6409808848677725e-05, + "loss": 1.3761, + "step": 49926 + }, + { + "epoch": 0.6, + "grad_norm": 5.442510371213534, + "learning_rate": 1.6409360447890843e-05, + "loss": 1.6395, + "step": 49929 + }, + { + "epoch": 0.6, + "grad_norm": 6.741531382452793, + "learning_rate": 1.640891202523112e-05, + "loss": 1.7545, + "step": 49932 + }, + { + "epoch": 0.6, + "grad_norm": 26.80739591084506, + "learning_rate": 1.640846358070009e-05, + "loss": 1.7784, + "step": 49935 + }, + { + "epoch": 0.6, + "grad_norm": 9.609737433909274, + "learning_rate": 1.640801511429928e-05, + "loss": 1.3486, + "step": 49938 + }, + { + "epoch": 0.6, + "grad_norm": 25.71190542952759, + "learning_rate": 1.6407566626030226e-05, + "loss": 1.2055, + "step": 49941 + }, + { + "epoch": 0.6, + "grad_norm": 41.98711092313913, + "learning_rate": 1.6407118115894453e-05, + "loss": 1.4538, + "step": 49944 + }, + { + "epoch": 0.6, + "grad_norm": 9.55711691695182, + "learning_rate": 1.6406669583893494e-05, + "loss": 1.1901, + "step": 49947 + }, + { + "epoch": 0.6, + "grad_norm": 13.306622748306495, + "learning_rate": 1.6406221030028876e-05, + "loss": 1.5264, + "step": 49950 + }, + { + "epoch": 0.6, + "grad_norm": 7.042938234054386, + "learning_rate": 1.6405772454302138e-05, + "loss": 1.4621, + "step": 49953 + }, + { + "epoch": 0.6, + "grad_norm": 11.040118353006228, + "learning_rate": 1.6405323856714805e-05, + "loss": 1.5881, + "step": 49956 + }, + { + "epoch": 0.6, + "grad_norm": 13.941126309250736, + "learning_rate": 1.640487523726841e-05, + "loss": 1.2627, + "step": 49959 + }, + { + "epoch": 0.6, + "grad_norm": 4.59046614157291, + "learning_rate": 1.640442659596448e-05, + "loss": 1.5691, + "step": 49962 + }, + { + "epoch": 0.6, + "grad_norm": 25.539853367865167, + "learning_rate": 1.6403977932804548e-05, + "loss": 1.0859, + "step": 49965 + }, + { + "epoch": 0.6, + "grad_norm": 16.269108272158782, + "learning_rate": 1.640352924779015e-05, + "loss": 1.8051, + "step": 49968 + }, + { + "epoch": 0.6, + "grad_norm": 8.729743178732294, + "learning_rate": 1.640308054092281e-05, + "loss": 1.5901, + "step": 49971 + }, + { + "epoch": 0.6, + "grad_norm": 24.445810261611005, + "learning_rate": 1.6402631812204063e-05, + "loss": 1.2176, + "step": 49974 + }, + { + "epoch": 0.6, + "grad_norm": 3.3743230642076276, + "learning_rate": 1.6402183061635444e-05, + "loss": 1.1442, + "step": 49977 + }, + { + "epoch": 0.6, + "grad_norm": 18.94428814307328, + "learning_rate": 1.6401734289218474e-05, + "loss": 1.4026, + "step": 49980 + }, + { + "epoch": 0.6, + "grad_norm": 10.438775916844056, + "learning_rate": 1.6401285494954694e-05, + "loss": 1.6785, + "step": 49983 + }, + { + "epoch": 0.6, + "grad_norm": 18.8947033442507, + "learning_rate": 1.6400836678845634e-05, + "loss": 1.226, + "step": 49986 + }, + { + "epoch": 0.6, + "grad_norm": 23.872868403107333, + "learning_rate": 1.6400387840892823e-05, + "loss": 1.5957, + "step": 49989 + }, + { + "epoch": 0.6, + "grad_norm": 12.497389406592669, + "learning_rate": 1.639993898109779e-05, + "loss": 1.4951, + "step": 49992 + }, + { + "epoch": 0.6, + "grad_norm": 11.924139735543491, + "learning_rate": 1.6399490099462075e-05, + "loss": 1.3649, + "step": 49995 + }, + { + "epoch": 0.6, + "grad_norm": 8.240579101568784, + "learning_rate": 1.63990411959872e-05, + "loss": 1.2878, + "step": 49998 + }, + { + "epoch": 0.6, + "grad_norm": 14.945765704310611, + "learning_rate": 1.639859227067471e-05, + "loss": 1.0264, + "step": 50001 + }, + { + "epoch": 0.6, + "grad_norm": 22.352743440830803, + "learning_rate": 1.6398143323526124e-05, + "loss": 1.0922, + "step": 50004 + }, + { + "epoch": 0.6, + "grad_norm": 3.108518905719734, + "learning_rate": 1.6397694354542978e-05, + "loss": 1.4873, + "step": 50007 + }, + { + "epoch": 0.6, + "grad_norm": 7.37586502250966, + "learning_rate": 1.639724536372681e-05, + "loss": 1.2267, + "step": 50010 + }, + { + "epoch": 0.6, + "grad_norm": 13.409068556309075, + "learning_rate": 1.6396796351079145e-05, + "loss": 1.4384, + "step": 50013 + }, + { + "epoch": 0.6, + "grad_norm": 8.621327279509847, + "learning_rate": 1.639634731660152e-05, + "loss": 1.1131, + "step": 50016 + }, + { + "epoch": 0.6, + "grad_norm": 21.22833808446825, + "learning_rate": 1.6395898260295465e-05, + "loss": 1.3935, + "step": 50019 + }, + { + "epoch": 0.6, + "grad_norm": 11.38889898782046, + "learning_rate": 1.6395449182162516e-05, + "loss": 1.484, + "step": 50022 + }, + { + "epoch": 0.6, + "grad_norm": 11.42744343184609, + "learning_rate": 1.6395000082204197e-05, + "loss": 1.4166, + "step": 50025 + }, + { + "epoch": 0.6, + "grad_norm": 73.78743180309137, + "learning_rate": 1.639455096042205e-05, + "loss": 1.702, + "step": 50028 + }, + { + "epoch": 0.6, + "grad_norm": 16.473838325828172, + "learning_rate": 1.6394101816817604e-05, + "loss": 1.2416, + "step": 50031 + }, + { + "epoch": 0.6, + "grad_norm": 11.038261803288133, + "learning_rate": 1.639365265139239e-05, + "loss": 1.2802, + "step": 50034 + }, + { + "epoch": 0.6, + "grad_norm": 18.816377413436996, + "learning_rate": 1.639320346414794e-05, + "loss": 1.6032, + "step": 50037 + }, + { + "epoch": 0.6, + "grad_norm": 16.181292766257375, + "learning_rate": 1.6392754255085792e-05, + "loss": 1.6746, + "step": 50040 + }, + { + "epoch": 0.6, + "grad_norm": 10.756425304554684, + "learning_rate": 1.639230502420748e-05, + "loss": 1.2146, + "step": 50043 + }, + { + "epoch": 0.6, + "grad_norm": 80.2028279573854, + "learning_rate": 1.6391855771514527e-05, + "loss": 1.6006, + "step": 50046 + }, + { + "epoch": 0.6, + "grad_norm": 12.83085912380937, + "learning_rate": 1.6391406497008478e-05, + "loss": 1.6808, + "step": 50049 + }, + { + "epoch": 0.6, + "grad_norm": 21.899535012813264, + "learning_rate": 1.6390957200690855e-05, + "loss": 1.4181, + "step": 50052 + }, + { + "epoch": 0.6, + "grad_norm": 18.7867947552675, + "learning_rate": 1.6390507882563202e-05, + "loss": 1.1781, + "step": 50055 + }, + { + "epoch": 0.6, + "grad_norm": 9.732835178929507, + "learning_rate": 1.6390058542627042e-05, + "loss": 1.2226, + "step": 50058 + }, + { + "epoch": 0.6, + "grad_norm": 4.642218339244471, + "learning_rate": 1.638960918088392e-05, + "loss": 1.5733, + "step": 50061 + }, + { + "epoch": 0.6, + "grad_norm": 3.4342626245430905, + "learning_rate": 1.6389159797335363e-05, + "loss": 1.3674, + "step": 50064 + }, + { + "epoch": 0.6, + "grad_norm": 55.46806401625406, + "learning_rate": 1.63887103919829e-05, + "loss": 1.3023, + "step": 50067 + }, + { + "epoch": 0.6, + "grad_norm": 10.370779317047827, + "learning_rate": 1.6388260964828075e-05, + "loss": 1.5727, + "step": 50070 + }, + { + "epoch": 0.6, + "grad_norm": 27.475433822546837, + "learning_rate": 1.6387811515872412e-05, + "loss": 1.5478, + "step": 50073 + }, + { + "epoch": 0.6, + "grad_norm": 16.856930187967052, + "learning_rate": 1.638736204511745e-05, + "loss": 1.4014, + "step": 50076 + }, + { + "epoch": 0.6, + "grad_norm": 23.82127689408445, + "learning_rate": 1.6386912552564725e-05, + "loss": 1.8552, + "step": 50079 + }, + { + "epoch": 0.6, + "grad_norm": 9.93702862989274, + "learning_rate": 1.638646303821577e-05, + "loss": 1.2266, + "step": 50082 + }, + { + "epoch": 0.6, + "grad_norm": 82.31407069628149, + "learning_rate": 1.638601350207211e-05, + "loss": 1.2457, + "step": 50085 + }, + { + "epoch": 0.6, + "grad_norm": 43.27821021374154, + "learning_rate": 1.638556394413529e-05, + "loss": 1.3071, + "step": 50088 + }, + { + "epoch": 0.6, + "grad_norm": 8.122579715359667, + "learning_rate": 1.638511436440684e-05, + "loss": 1.6613, + "step": 50091 + }, + { + "epoch": 0.6, + "grad_norm": 3.3747429522628445, + "learning_rate": 1.6384664762888295e-05, + "loss": 1.7878, + "step": 50094 + }, + { + "epoch": 0.6, + "grad_norm": 11.288828585286918, + "learning_rate": 1.638421513958119e-05, + "loss": 1.5406, + "step": 50097 + }, + { + "epoch": 0.6, + "grad_norm": 6.877364402882842, + "learning_rate": 1.6383765494487055e-05, + "loss": 1.2017, + "step": 50100 + }, + { + "epoch": 0.6, + "grad_norm": 24.004274207802098, + "learning_rate": 1.638331582760743e-05, + "loss": 1.4348, + "step": 50103 + }, + { + "epoch": 0.6, + "grad_norm": 54.89560649439168, + "learning_rate": 1.6382866138943845e-05, + "loss": 1.7861, + "step": 50106 + }, + { + "epoch": 0.6, + "grad_norm": 18.424830770286817, + "learning_rate": 1.6382416428497842e-05, + "loss": 1.1463, + "step": 50109 + }, + { + "epoch": 0.6, + "grad_norm": 7.530592344691801, + "learning_rate": 1.6381966696270946e-05, + "loss": 1.55, + "step": 50112 + }, + { + "epoch": 0.6, + "grad_norm": 17.033921934416686, + "learning_rate": 1.6381516942264695e-05, + "loss": 1.5491, + "step": 50115 + }, + { + "epoch": 0.6, + "grad_norm": 18.00949765841162, + "learning_rate": 1.6381067166480627e-05, + "loss": 1.3951, + "step": 50118 + }, + { + "epoch": 0.6, + "grad_norm": 34.612440239505936, + "learning_rate": 1.638061736892028e-05, + "loss": 1.2652, + "step": 50121 + }, + { + "epoch": 0.6, + "grad_norm": 17.471017603066844, + "learning_rate": 1.638016754958518e-05, + "loss": 1.3673, + "step": 50124 + }, + { + "epoch": 0.6, + "grad_norm": 13.492251637679903, + "learning_rate": 1.6379717708476865e-05, + "loss": 1.3802, + "step": 50127 + }, + { + "epoch": 0.6, + "grad_norm": 34.58686653227797, + "learning_rate": 1.6379267845596875e-05, + "loss": 1.345, + "step": 50130 + }, + { + "epoch": 0.6, + "grad_norm": 4.631438836877367, + "learning_rate": 1.637881796094674e-05, + "loss": 1.2451, + "step": 50133 + }, + { + "epoch": 0.6, + "grad_norm": 13.6680992213976, + "learning_rate": 1.6378368054527998e-05, + "loss": 1.2779, + "step": 50136 + }, + { + "epoch": 0.6, + "grad_norm": 4.987465729446974, + "learning_rate": 1.637791812634218e-05, + "loss": 1.0357, + "step": 50139 + }, + { + "epoch": 0.6, + "grad_norm": 11.26545410696381, + "learning_rate": 1.6377468176390826e-05, + "loss": 1.5538, + "step": 50142 + }, + { + "epoch": 0.6, + "grad_norm": 11.135805772832386, + "learning_rate": 1.637701820467547e-05, + "loss": 1.6083, + "step": 50145 + }, + { + "epoch": 0.6, + "grad_norm": 5.437611964624432, + "learning_rate": 1.637656821119765e-05, + "loss": 1.6023, + "step": 50148 + }, + { + "epoch": 0.6, + "grad_norm": 12.86019059812783, + "learning_rate": 1.63761181959589e-05, + "loss": 1.4321, + "step": 50151 + }, + { + "epoch": 0.6, + "grad_norm": 18.568374028454127, + "learning_rate": 1.6375668158960754e-05, + "loss": 1.4066, + "step": 50154 + }, + { + "epoch": 0.6, + "grad_norm": 28.32907283769162, + "learning_rate": 1.6375218100204748e-05, + "loss": 1.5727, + "step": 50157 + }, + { + "epoch": 0.6, + "grad_norm": 30.34382019232957, + "learning_rate": 1.6374768019692422e-05, + "loss": 1.5859, + "step": 50160 + }, + { + "epoch": 0.6, + "grad_norm": 57.136289189033995, + "learning_rate": 1.6374317917425305e-05, + "loss": 1.2793, + "step": 50163 + }, + { + "epoch": 0.6, + "grad_norm": 2.241135915701987, + "learning_rate": 1.637386779340494e-05, + "loss": 1.1127, + "step": 50166 + }, + { + "epoch": 0.6, + "grad_norm": 12.367754655295514, + "learning_rate": 1.6373417647632858e-05, + "loss": 1.3144, + "step": 50169 + }, + { + "epoch": 0.6, + "grad_norm": 3.7230792901313206, + "learning_rate": 1.63729674801106e-05, + "loss": 1.5551, + "step": 50172 + }, + { + "epoch": 0.6, + "grad_norm": 13.655370632472385, + "learning_rate": 1.6372517290839692e-05, + "loss": 1.4746, + "step": 50175 + }, + { + "epoch": 0.6, + "grad_norm": 15.577973854065538, + "learning_rate": 1.6372067079821686e-05, + "loss": 1.295, + "step": 50178 + }, + { + "epoch": 0.6, + "grad_norm": 4.134452462155258, + "learning_rate": 1.637161684705811e-05, + "loss": 1.4722, + "step": 50181 + }, + { + "epoch": 0.6, + "grad_norm": 13.140407243160972, + "learning_rate": 1.6371166592550498e-05, + "loss": 1.3969, + "step": 50184 + }, + { + "epoch": 0.6, + "grad_norm": 9.117689090046312, + "learning_rate": 1.6370716316300388e-05, + "loss": 1.2949, + "step": 50187 + }, + { + "epoch": 0.6, + "grad_norm": 7.598825076332118, + "learning_rate": 1.6370266018309324e-05, + "loss": 1.4485, + "step": 50190 + }, + { + "epoch": 0.6, + "grad_norm": 3.7441501575349463, + "learning_rate": 1.6369815698578827e-05, + "loss": 1.3295, + "step": 50193 + }, + { + "epoch": 0.6, + "grad_norm": 11.59246504505615, + "learning_rate": 1.636936535711045e-05, + "loss": 1.3146, + "step": 50196 + }, + { + "epoch": 0.6, + "grad_norm": 43.040066042570764, + "learning_rate": 1.6368914993905726e-05, + "loss": 1.1951, + "step": 50199 + }, + { + "epoch": 0.6, + "grad_norm": 31.49438253246952, + "learning_rate": 1.636846460896618e-05, + "loss": 1.6102, + "step": 50202 + }, + { + "epoch": 0.6, + "grad_norm": 3.0111735920721445, + "learning_rate": 1.6368014202293367e-05, + "loss": 1.9263, + "step": 50205 + }, + { + "epoch": 0.6, + "grad_norm": 35.70121335801486, + "learning_rate": 1.636756377388881e-05, + "loss": 1.4354, + "step": 50208 + }, + { + "epoch": 0.6, + "grad_norm": 21.858533525836066, + "learning_rate": 1.6367113323754055e-05, + "loss": 1.2437, + "step": 50211 + }, + { + "epoch": 0.6, + "grad_norm": 5.746377713405899, + "learning_rate": 1.636666285189063e-05, + "loss": 1.3035, + "step": 50214 + }, + { + "epoch": 0.6, + "grad_norm": 17.35886541882437, + "learning_rate": 1.6366212358300085e-05, + "loss": 1.5173, + "step": 50217 + }, + { + "epoch": 0.6, + "grad_norm": 25.551655242016658, + "learning_rate": 1.636576184298395e-05, + "loss": 1.2654, + "step": 50220 + }, + { + "epoch": 0.6, + "grad_norm": 10.544192054575396, + "learning_rate": 1.6365311305943758e-05, + "loss": 1.3986, + "step": 50223 + }, + { + "epoch": 0.6, + "grad_norm": 14.540887331979578, + "learning_rate": 1.636486074718105e-05, + "loss": 1.7722, + "step": 50226 + }, + { + "epoch": 0.6, + "grad_norm": 23.573924108780357, + "learning_rate": 1.6364410166697368e-05, + "loss": 1.8043, + "step": 50229 + }, + { + "epoch": 0.6, + "grad_norm": 50.50696728495233, + "learning_rate": 1.636395956449425e-05, + "loss": 1.2904, + "step": 50232 + }, + { + "epoch": 0.6, + "grad_norm": 44.7241759442964, + "learning_rate": 1.6363508940573224e-05, + "loss": 1.4405, + "step": 50235 + }, + { + "epoch": 0.6, + "grad_norm": 19.379496082328327, + "learning_rate": 1.636305829493584e-05, + "loss": 1.0345, + "step": 50238 + }, + { + "epoch": 0.6, + "grad_norm": 24.11563224812324, + "learning_rate": 1.6362607627583625e-05, + "loss": 1.2776, + "step": 50241 + }, + { + "epoch": 0.6, + "grad_norm": 13.462711441366743, + "learning_rate": 1.6362156938518127e-05, + "loss": 1.3123, + "step": 50244 + }, + { + "epoch": 0.6, + "grad_norm": 15.992054533159193, + "learning_rate": 1.6361706227740874e-05, + "loss": 1.3934, + "step": 50247 + }, + { + "epoch": 0.6, + "grad_norm": 16.691319967244365, + "learning_rate": 1.636125549525341e-05, + "loss": 1.6388, + "step": 50250 + }, + { + "epoch": 0.6, + "grad_norm": 8.980731279825832, + "learning_rate": 1.6360804741057276e-05, + "loss": 1.0986, + "step": 50253 + }, + { + "epoch": 0.6, + "grad_norm": 5.913874835016291, + "learning_rate": 1.6360353965154e-05, + "loss": 1.5092, + "step": 50256 + }, + { + "epoch": 0.6, + "grad_norm": 19.651503576646892, + "learning_rate": 1.6359903167545133e-05, + "loss": 1.583, + "step": 50259 + }, + { + "epoch": 0.6, + "grad_norm": 13.728999944343396, + "learning_rate": 1.6359452348232206e-05, + "loss": 1.5518, + "step": 50262 + }, + { + "epoch": 0.6, + "grad_norm": 11.105707350952702, + "learning_rate": 1.6359001507216756e-05, + "loss": 1.1971, + "step": 50265 + }, + { + "epoch": 0.6, + "grad_norm": 7.412112323983463, + "learning_rate": 1.6358550644500325e-05, + "loss": 1.4808, + "step": 50268 + }, + { + "epoch": 0.6, + "grad_norm": 7.657705645488256, + "learning_rate": 1.6358099760084455e-05, + "loss": 1.4339, + "step": 50271 + }, + { + "epoch": 0.6, + "grad_norm": 18.12220082719774, + "learning_rate": 1.6357648853970678e-05, + "loss": 1.2953, + "step": 50274 + }, + { + "epoch": 0.6, + "grad_norm": 11.961571453508906, + "learning_rate": 1.6357197926160534e-05, + "loss": 1.2613, + "step": 50277 + }, + { + "epoch": 0.6, + "grad_norm": 20.801327222510448, + "learning_rate": 1.6356746976655563e-05, + "loss": 1.4685, + "step": 50280 + }, + { + "epoch": 0.6, + "grad_norm": 17.33334186382001, + "learning_rate": 1.6356296005457308e-05, + "loss": 0.8663, + "step": 50283 + }, + { + "epoch": 0.6, + "grad_norm": 10.483219401830512, + "learning_rate": 1.6355845012567302e-05, + "loss": 1.7696, + "step": 50286 + }, + { + "epoch": 0.6, + "grad_norm": 36.665830124125314, + "learning_rate": 1.6355393997987087e-05, + "loss": 1.284, + "step": 50289 + }, + { + "epoch": 0.6, + "grad_norm": 7.304655072255291, + "learning_rate": 1.63549429617182e-05, + "loss": 1.492, + "step": 50292 + }, + { + "epoch": 0.6, + "grad_norm": 18.325827428142347, + "learning_rate": 1.6354491903762185e-05, + "loss": 1.165, + "step": 50295 + }, + { + "epoch": 0.6, + "grad_norm": 13.492026266927688, + "learning_rate": 1.6354040824120576e-05, + "loss": 1.4227, + "step": 50298 + }, + { + "epoch": 0.6, + "grad_norm": 13.643461955961147, + "learning_rate": 1.6353589722794914e-05, + "loss": 1.1678, + "step": 50301 + }, + { + "epoch": 0.6, + "grad_norm": 6.52185870667167, + "learning_rate": 1.635313859978674e-05, + "loss": 1.4382, + "step": 50304 + }, + { + "epoch": 0.6, + "grad_norm": 18.885549287003826, + "learning_rate": 1.635268745509759e-05, + "loss": 1.3673, + "step": 50307 + }, + { + "epoch": 0.6, + "grad_norm": 8.975648920762932, + "learning_rate": 1.6352236288729006e-05, + "loss": 1.5035, + "step": 50310 + }, + { + "epoch": 0.6, + "grad_norm": 13.788399292759678, + "learning_rate": 1.6351785100682528e-05, + "loss": 1.1211, + "step": 50313 + }, + { + "epoch": 0.61, + "grad_norm": 4.125196447782674, + "learning_rate": 1.6351333890959697e-05, + "loss": 1.559, + "step": 50316 + }, + { + "epoch": 0.61, + "grad_norm": 29.985585161300587, + "learning_rate": 1.6350882659562052e-05, + "loss": 1.509, + "step": 50319 + }, + { + "epoch": 0.61, + "grad_norm": 4.6397035822661845, + "learning_rate": 1.635043140649113e-05, + "loss": 1.4836, + "step": 50322 + }, + { + "epoch": 0.61, + "grad_norm": 5.023594366577615, + "learning_rate": 1.6349980131748477e-05, + "loss": 1.1536, + "step": 50325 + }, + { + "epoch": 0.61, + "grad_norm": 16.05838937124761, + "learning_rate": 1.6349528835335622e-05, + "loss": 1.4148, + "step": 50328 + }, + { + "epoch": 0.61, + "grad_norm": 27.7944264115837, + "learning_rate": 1.6349077517254116e-05, + "loss": 1.5091, + "step": 50331 + }, + { + "epoch": 0.61, + "grad_norm": 10.615858566234072, + "learning_rate": 1.6348626177505494e-05, + "loss": 1.1142, + "step": 50334 + }, + { + "epoch": 0.61, + "grad_norm": 22.615715979576652, + "learning_rate": 1.63481748160913e-05, + "loss": 1.2603, + "step": 50337 + }, + { + "epoch": 0.61, + "grad_norm": 6.802061201573253, + "learning_rate": 1.634772343301307e-05, + "loss": 1.4675, + "step": 50340 + }, + { + "epoch": 0.61, + "grad_norm": 24.283971420626475, + "learning_rate": 1.634727202827235e-05, + "loss": 1.2214, + "step": 50343 + }, + { + "epoch": 0.61, + "grad_norm": 10.208899569357829, + "learning_rate": 1.634682060187067e-05, + "loss": 1.415, + "step": 50346 + }, + { + "epoch": 0.61, + "grad_norm": 18.77228319353399, + "learning_rate": 1.6346369153809584e-05, + "loss": 1.3472, + "step": 50349 + }, + { + "epoch": 0.61, + "grad_norm": 3.720041786313799, + "learning_rate": 1.634591768409062e-05, + "loss": 1.4155, + "step": 50352 + }, + { + "epoch": 0.61, + "grad_norm": 20.49933488808469, + "learning_rate": 1.6345466192715332e-05, + "loss": 1.1751, + "step": 50355 + }, + { + "epoch": 0.61, + "grad_norm": 5.973819232519108, + "learning_rate": 1.6345014679685246e-05, + "loss": 1.2329, + "step": 50358 + }, + { + "epoch": 0.61, + "grad_norm": 5.2273480532195045, + "learning_rate": 1.634456314500192e-05, + "loss": 1.1277, + "step": 50361 + }, + { + "epoch": 0.61, + "grad_norm": 3.5719715229433837, + "learning_rate": 1.6344111588666877e-05, + "loss": 1.6154, + "step": 50364 + }, + { + "epoch": 0.61, + "grad_norm": 133.81456470469467, + "learning_rate": 1.6343660010681666e-05, + "loss": 1.2183, + "step": 50367 + }, + { + "epoch": 0.61, + "grad_norm": 21.4198208531298, + "learning_rate": 1.6343208411047832e-05, + "loss": 1.4569, + "step": 50370 + }, + { + "epoch": 0.61, + "grad_norm": 10.544110659045328, + "learning_rate": 1.634275678976691e-05, + "loss": 1.5173, + "step": 50373 + }, + { + "epoch": 0.61, + "grad_norm": 33.13662956483053, + "learning_rate": 1.6342305146840447e-05, + "loss": 1.6918, + "step": 50376 + }, + { + "epoch": 0.61, + "grad_norm": 25.47191884573238, + "learning_rate": 1.634185348226998e-05, + "loss": 1.4997, + "step": 50379 + }, + { + "epoch": 0.61, + "grad_norm": 27.62064532667364, + "learning_rate": 1.634140179605705e-05, + "loss": 1.05, + "step": 50382 + }, + { + "epoch": 0.61, + "grad_norm": 12.236033347428249, + "learning_rate": 1.63409500882032e-05, + "loss": 1.2397, + "step": 50385 + }, + { + "epoch": 0.61, + "grad_norm": 35.00397557391744, + "learning_rate": 1.634049835870997e-05, + "loss": 1.3015, + "step": 50388 + }, + { + "epoch": 0.61, + "grad_norm": 8.396290610288908, + "learning_rate": 1.6340046607578907e-05, + "loss": 1.3113, + "step": 50391 + }, + { + "epoch": 0.61, + "grad_norm": 33.982394839486986, + "learning_rate": 1.6339594834811548e-05, + "loss": 1.4738, + "step": 50394 + }, + { + "epoch": 0.61, + "grad_norm": 7.825496113235674, + "learning_rate": 1.633914304040943e-05, + "loss": 1.3202, + "step": 50397 + }, + { + "epoch": 0.61, + "grad_norm": 20.393864857928406, + "learning_rate": 1.6338691224374107e-05, + "loss": 1.4225, + "step": 50400 + }, + { + "epoch": 0.61, + "grad_norm": 16.932315788365365, + "learning_rate": 1.6338239386707107e-05, + "loss": 1.4013, + "step": 50403 + }, + { + "epoch": 0.61, + "grad_norm": 25.644572239394915, + "learning_rate": 1.6337787527409987e-05, + "loss": 1.3951, + "step": 50406 + }, + { + "epoch": 0.61, + "grad_norm": 23.575517094127488, + "learning_rate": 1.6337335646484276e-05, + "loss": 1.4576, + "step": 50409 + }, + { + "epoch": 0.61, + "grad_norm": 7.048824650821964, + "learning_rate": 1.633688374393152e-05, + "loss": 1.5251, + "step": 50412 + }, + { + "epoch": 0.61, + "grad_norm": 32.34680271412166, + "learning_rate": 1.633643181975327e-05, + "loss": 1.3254, + "step": 50415 + }, + { + "epoch": 0.61, + "grad_norm": 16.80369409752576, + "learning_rate": 1.6335979873951048e-05, + "loss": 1.498, + "step": 50418 + }, + { + "epoch": 0.61, + "grad_norm": 10.734712998904607, + "learning_rate": 1.6335527906526417e-05, + "loss": 1.4102, + "step": 50421 + }, + { + "epoch": 0.61, + "grad_norm": 9.778833499850558, + "learning_rate": 1.633507591748091e-05, + "loss": 1.4106, + "step": 50424 + }, + { + "epoch": 0.61, + "grad_norm": 9.480357272816674, + "learning_rate": 1.6334623906816073e-05, + "loss": 1.3366, + "step": 50427 + }, + { + "epoch": 0.61, + "grad_norm": 8.541481807399567, + "learning_rate": 1.633417187453344e-05, + "loss": 1.2835, + "step": 50430 + }, + { + "epoch": 0.61, + "grad_norm": 215.57314042962722, + "learning_rate": 1.6333719820634567e-05, + "loss": 1.2856, + "step": 50433 + }, + { + "epoch": 0.61, + "grad_norm": 44.48814584728247, + "learning_rate": 1.633326774512099e-05, + "loss": 1.5495, + "step": 50436 + }, + { + "epoch": 0.61, + "grad_norm": 5.638703026048092, + "learning_rate": 1.6332815647994243e-05, + "loss": 1.4446, + "step": 50439 + }, + { + "epoch": 0.61, + "grad_norm": 14.065516732488428, + "learning_rate": 1.6332363529255884e-05, + "loss": 1.3288, + "step": 50442 + }, + { + "epoch": 0.61, + "grad_norm": 23.780168923745965, + "learning_rate": 1.6331911388907448e-05, + "loss": 1.43, + "step": 50445 + }, + { + "epoch": 0.61, + "grad_norm": 67.78563572480887, + "learning_rate": 1.633145922695048e-05, + "loss": 1.1628, + "step": 50448 + }, + { + "epoch": 0.61, + "grad_norm": 5.895849692833204, + "learning_rate": 1.633100704338652e-05, + "loss": 1.4998, + "step": 50451 + }, + { + "epoch": 0.61, + "grad_norm": 26.31539041968632, + "learning_rate": 1.6330554838217113e-05, + "loss": 1.0185, + "step": 50454 + }, + { + "epoch": 0.61, + "grad_norm": 93.64972171372047, + "learning_rate": 1.6330102611443805e-05, + "loss": 1.0161, + "step": 50457 + }, + { + "epoch": 0.61, + "grad_norm": 27.70433477696465, + "learning_rate": 1.6329650363068137e-05, + "loss": 1.45, + "step": 50460 + }, + { + "epoch": 0.61, + "grad_norm": 19.691812264861937, + "learning_rate": 1.632919809309165e-05, + "loss": 1.4247, + "step": 50463 + }, + { + "epoch": 0.61, + "grad_norm": 112.4166553912302, + "learning_rate": 1.6328745801515892e-05, + "loss": 1.4204, + "step": 50466 + }, + { + "epoch": 0.61, + "grad_norm": 5.821552017511638, + "learning_rate": 1.6328293488342404e-05, + "loss": 1.1625, + "step": 50469 + }, + { + "epoch": 0.61, + "grad_norm": 11.21036323976944, + "learning_rate": 1.6327841153572727e-05, + "loss": 1.3419, + "step": 50472 + }, + { + "epoch": 0.61, + "grad_norm": 4.976683169396742, + "learning_rate": 1.632738879720841e-05, + "loss": 1.482, + "step": 50475 + }, + { + "epoch": 0.61, + "grad_norm": 7.584051061633335, + "learning_rate": 1.632693641925099e-05, + "loss": 1.392, + "step": 50478 + }, + { + "epoch": 0.61, + "grad_norm": 11.34671774594287, + "learning_rate": 1.6326484019702023e-05, + "loss": 1.37, + "step": 50481 + }, + { + "epoch": 0.61, + "grad_norm": 9.487060788913048, + "learning_rate": 1.632603159856304e-05, + "loss": 1.5389, + "step": 50484 + }, + { + "epoch": 0.61, + "grad_norm": 8.568767489890678, + "learning_rate": 1.6325579155835588e-05, + "loss": 1.4143, + "step": 50487 + }, + { + "epoch": 0.61, + "grad_norm": 18.98503873277241, + "learning_rate": 1.6325126691521217e-05, + "loss": 1.2505, + "step": 50490 + }, + { + "epoch": 0.61, + "grad_norm": 19.00826491522148, + "learning_rate": 1.632467420562146e-05, + "loss": 1.5146, + "step": 50493 + }, + { + "epoch": 0.61, + "grad_norm": 4.517856204046158, + "learning_rate": 1.6324221698137877e-05, + "loss": 1.687, + "step": 50496 + }, + { + "epoch": 0.61, + "grad_norm": 24.266189807608956, + "learning_rate": 1.6323769169071998e-05, + "loss": 1.3965, + "step": 50499 + }, + { + "epoch": 0.61, + "grad_norm": 30.17882694386987, + "learning_rate": 1.6323316618425372e-05, + "loss": 1.4633, + "step": 50502 + }, + { + "epoch": 0.61, + "grad_norm": 22.554424457410967, + "learning_rate": 1.6322864046199546e-05, + "loss": 1.1301, + "step": 50505 + }, + { + "epoch": 0.61, + "grad_norm": 20.054271269000516, + "learning_rate": 1.6322411452396064e-05, + "loss": 1.3888, + "step": 50508 + }, + { + "epoch": 0.61, + "grad_norm": 4.319979274573868, + "learning_rate": 1.6321958837016467e-05, + "loss": 1.2728, + "step": 50511 + }, + { + "epoch": 0.61, + "grad_norm": 15.060392350630702, + "learning_rate": 1.6321506200062303e-05, + "loss": 1.3867, + "step": 50514 + }, + { + "epoch": 0.61, + "grad_norm": 9.854543254802616, + "learning_rate": 1.6321053541535112e-05, + "loss": 1.787, + "step": 50517 + }, + { + "epoch": 0.61, + "grad_norm": 17.38506510259565, + "learning_rate": 1.6320600861436445e-05, + "loss": 1.3975, + "step": 50520 + }, + { + "epoch": 0.61, + "grad_norm": 13.752386014533553, + "learning_rate": 1.6320148159767844e-05, + "loss": 1.2923, + "step": 50523 + }, + { + "epoch": 0.61, + "grad_norm": 21.887478391318325, + "learning_rate": 1.6319695436530852e-05, + "loss": 1.0961, + "step": 50526 + }, + { + "epoch": 0.61, + "grad_norm": 13.484346801280706, + "learning_rate": 1.6319242691727016e-05, + "loss": 1.3624, + "step": 50529 + }, + { + "epoch": 0.61, + "grad_norm": 5.248441303487158, + "learning_rate": 1.6318789925357884e-05, + "loss": 1.438, + "step": 50532 + }, + { + "epoch": 0.61, + "grad_norm": 9.25560361447697, + "learning_rate": 1.6318337137424994e-05, + "loss": 1.9485, + "step": 50535 + }, + { + "epoch": 0.61, + "grad_norm": 8.255517552419274, + "learning_rate": 1.6317884327929898e-05, + "loss": 1.5865, + "step": 50538 + }, + { + "epoch": 0.61, + "grad_norm": 9.809524593387902, + "learning_rate": 1.631743149687414e-05, + "loss": 1.116, + "step": 50541 + }, + { + "epoch": 0.61, + "grad_norm": 10.716991758506698, + "learning_rate": 1.631697864425926e-05, + "loss": 1.4496, + "step": 50544 + }, + { + "epoch": 0.61, + "grad_norm": 21.752306097658806, + "learning_rate": 1.6316525770086808e-05, + "loss": 1.2629, + "step": 50547 + }, + { + "epoch": 0.61, + "grad_norm": 18.10445086177608, + "learning_rate": 1.631607287435833e-05, + "loss": 1.6684, + "step": 50550 + }, + { + "epoch": 0.61, + "grad_norm": 38.01061661885165, + "learning_rate": 1.6315619957075367e-05, + "loss": 1.5917, + "step": 50553 + }, + { + "epoch": 0.61, + "grad_norm": 21.52104785594867, + "learning_rate": 1.631516701823947e-05, + "loss": 1.3648, + "step": 50556 + }, + { + "epoch": 0.61, + "grad_norm": 14.905178092451978, + "learning_rate": 1.6314714057852184e-05, + "loss": 1.1838, + "step": 50559 + }, + { + "epoch": 0.61, + "grad_norm": 49.314407046403275, + "learning_rate": 1.6314261075915054e-05, + "loss": 1.5911, + "step": 50562 + }, + { + "epoch": 0.61, + "grad_norm": 27.221670727433875, + "learning_rate": 1.6313808072429623e-05, + "loss": 1.3922, + "step": 50565 + }, + { + "epoch": 0.61, + "grad_norm": 46.93876365181675, + "learning_rate": 1.6313355047397442e-05, + "loss": 1.1413, + "step": 50568 + }, + { + "epoch": 0.61, + "grad_norm": 88.37290465748322, + "learning_rate": 1.631290200082005e-05, + "loss": 1.4873, + "step": 50571 + }, + { + "epoch": 0.61, + "grad_norm": 19.840770947608277, + "learning_rate": 1.6312448932699e-05, + "loss": 1.2333, + "step": 50574 + }, + { + "epoch": 0.61, + "grad_norm": 13.017510495815662, + "learning_rate": 1.6311995843035838e-05, + "loss": 1.3627, + "step": 50577 + }, + { + "epoch": 0.61, + "grad_norm": 2.6260065973011124, + "learning_rate": 1.6311542731832105e-05, + "loss": 1.5076, + "step": 50580 + }, + { + "epoch": 0.61, + "grad_norm": 28.18368056655924, + "learning_rate": 1.6311089599089348e-05, + "loss": 1.5009, + "step": 50583 + }, + { + "epoch": 0.61, + "grad_norm": 32.944325098161094, + "learning_rate": 1.6310636444809118e-05, + "loss": 1.4691, + "step": 50586 + }, + { + "epoch": 0.61, + "grad_norm": 55.187384701820484, + "learning_rate": 1.631018326899296e-05, + "loss": 1.3506, + "step": 50589 + }, + { + "epoch": 0.61, + "grad_norm": 20.823203795364154, + "learning_rate": 1.630973007164242e-05, + "loss": 1.606, + "step": 50592 + }, + { + "epoch": 0.61, + "grad_norm": 8.592892669156821, + "learning_rate": 1.630927685275904e-05, + "loss": 1.2548, + "step": 50595 + }, + { + "epoch": 0.61, + "grad_norm": 8.063546836124017, + "learning_rate": 1.6308823612344375e-05, + "loss": 1.0257, + "step": 50598 + }, + { + "epoch": 0.61, + "grad_norm": 24.13439883843339, + "learning_rate": 1.630837035039996e-05, + "loss": 1.0532, + "step": 50601 + }, + { + "epoch": 0.61, + "grad_norm": 4.247496695159921, + "learning_rate": 1.630791706692736e-05, + "loss": 1.3074, + "step": 50604 + }, + { + "epoch": 0.61, + "grad_norm": 12.389536852312244, + "learning_rate": 1.6307463761928103e-05, + "loss": 1.412, + "step": 50607 + }, + { + "epoch": 0.61, + "grad_norm": 58.31266971073346, + "learning_rate": 1.6307010435403746e-05, + "loss": 1.4243, + "step": 50610 + }, + { + "epoch": 0.61, + "grad_norm": 9.357016894803149, + "learning_rate": 1.6306557087355836e-05, + "loss": 1.488, + "step": 50613 + }, + { + "epoch": 0.61, + "grad_norm": 8.844989611067685, + "learning_rate": 1.6306103717785916e-05, + "loss": 1.2533, + "step": 50616 + }, + { + "epoch": 0.61, + "grad_norm": 5.858453064332371, + "learning_rate": 1.6305650326695538e-05, + "loss": 1.5548, + "step": 50619 + }, + { + "epoch": 0.61, + "grad_norm": 16.800179749426594, + "learning_rate": 1.6305196914086244e-05, + "loss": 1.6412, + "step": 50622 + }, + { + "epoch": 0.61, + "grad_norm": 7.551624349116254, + "learning_rate": 1.6304743479959586e-05, + "loss": 1.278, + "step": 50625 + }, + { + "epoch": 0.61, + "grad_norm": 12.481000802471614, + "learning_rate": 1.6304290024317108e-05, + "loss": 1.2924, + "step": 50628 + }, + { + "epoch": 0.61, + "grad_norm": 10.114791752010689, + "learning_rate": 1.630383654716036e-05, + "loss": 1.3243, + "step": 50631 + }, + { + "epoch": 0.61, + "grad_norm": 37.679290561477536, + "learning_rate": 1.630338304849089e-05, + "loss": 1.7904, + "step": 50634 + }, + { + "epoch": 0.61, + "grad_norm": 12.98137024560166, + "learning_rate": 1.6302929528310243e-05, + "loss": 1.3168, + "step": 50637 + }, + { + "epoch": 0.61, + "grad_norm": 14.938491024211281, + "learning_rate": 1.6302475986619962e-05, + "loss": 1.3441, + "step": 50640 + }, + { + "epoch": 0.61, + "grad_norm": 4.50668973895256, + "learning_rate": 1.6302022423421607e-05, + "loss": 1.2841, + "step": 50643 + }, + { + "epoch": 0.61, + "grad_norm": 51.85704946087064, + "learning_rate": 1.630156883871672e-05, + "loss": 1.1774, + "step": 50646 + }, + { + "epoch": 0.61, + "grad_norm": 6.731289612714408, + "learning_rate": 1.6301115232506847e-05, + "loss": 1.4462, + "step": 50649 + }, + { + "epoch": 0.61, + "grad_norm": 5.731621758982675, + "learning_rate": 1.6300661604793537e-05, + "loss": 1.4703, + "step": 50652 + }, + { + "epoch": 0.61, + "grad_norm": 13.08183324613001, + "learning_rate": 1.630020795557834e-05, + "loss": 1.4267, + "step": 50655 + }, + { + "epoch": 0.61, + "grad_norm": 3.3786073216511663, + "learning_rate": 1.6299754284862798e-05, + "loss": 1.2638, + "step": 50658 + }, + { + "epoch": 0.61, + "grad_norm": 32.249089567042304, + "learning_rate": 1.6299300592648467e-05, + "loss": 1.5846, + "step": 50661 + }, + { + "epoch": 0.61, + "grad_norm": 6.075638585459598, + "learning_rate": 1.6298846878936894e-05, + "loss": 1.5409, + "step": 50664 + }, + { + "epoch": 0.61, + "grad_norm": 11.652983809216003, + "learning_rate": 1.6298393143729623e-05, + "loss": 1.3324, + "step": 50667 + }, + { + "epoch": 0.61, + "grad_norm": 14.218634144289366, + "learning_rate": 1.6297939387028204e-05, + "loss": 1.208, + "step": 50670 + }, + { + "epoch": 0.61, + "grad_norm": 17.328012569813268, + "learning_rate": 1.629748560883419e-05, + "loss": 1.3063, + "step": 50673 + }, + { + "epoch": 0.61, + "grad_norm": 21.070345243587433, + "learning_rate": 1.6297031809149122e-05, + "loss": 1.151, + "step": 50676 + }, + { + "epoch": 0.61, + "grad_norm": 27.2249439746966, + "learning_rate": 1.6296577987974554e-05, + "loss": 1.0557, + "step": 50679 + }, + { + "epoch": 0.61, + "grad_norm": 22.944671462407054, + "learning_rate": 1.6296124145312033e-05, + "loss": 1.1574, + "step": 50682 + }, + { + "epoch": 0.61, + "grad_norm": 2.361928910831372, + "learning_rate": 1.629567028116311e-05, + "loss": 1.8958, + "step": 50685 + }, + { + "epoch": 0.61, + "grad_norm": 5.4541783328764675, + "learning_rate": 1.629521639552933e-05, + "loss": 1.7072, + "step": 50688 + }, + { + "epoch": 0.61, + "grad_norm": 11.250857666200465, + "learning_rate": 1.6294762488412246e-05, + "loss": 1.4856, + "step": 50691 + }, + { + "epoch": 0.61, + "grad_norm": 26.691961942162735, + "learning_rate": 1.6294308559813402e-05, + "loss": 1.5488, + "step": 50694 + }, + { + "epoch": 0.61, + "grad_norm": 29.195935019246892, + "learning_rate": 1.629385460973435e-05, + "loss": 1.4279, + "step": 50697 + }, + { + "epoch": 0.61, + "grad_norm": 33.072919946572576, + "learning_rate": 1.629340063817664e-05, + "loss": 1.1992, + "step": 50700 + }, + { + "epoch": 0.61, + "grad_norm": 9.258665176252347, + "learning_rate": 1.6292946645141825e-05, + "loss": 1.4584, + "step": 50703 + }, + { + "epoch": 0.61, + "grad_norm": 14.398327941942675, + "learning_rate": 1.6292492630631444e-05, + "loss": 1.1799, + "step": 50706 + }, + { + "epoch": 0.61, + "grad_norm": 16.18683383680849, + "learning_rate": 1.6292038594647056e-05, + "loss": 1.517, + "step": 50709 + }, + { + "epoch": 0.61, + "grad_norm": 10.270577589715433, + "learning_rate": 1.6291584537190207e-05, + "loss": 1.4392, + "step": 50712 + }, + { + "epoch": 0.61, + "grad_norm": 22.170112996474543, + "learning_rate": 1.6291130458262442e-05, + "loss": 1.4351, + "step": 50715 + }, + { + "epoch": 0.61, + "grad_norm": 18.111124458124014, + "learning_rate": 1.629067635786532e-05, + "loss": 1.3283, + "step": 50718 + }, + { + "epoch": 0.61, + "grad_norm": 12.62434220278693, + "learning_rate": 1.6290222236000383e-05, + "loss": 1.4614, + "step": 50721 + }, + { + "epoch": 0.61, + "grad_norm": 7.814846235251057, + "learning_rate": 1.628976809266918e-05, + "loss": 1.4388, + "step": 50724 + }, + { + "epoch": 0.61, + "grad_norm": 19.739742145331082, + "learning_rate": 1.6289313927873266e-05, + "loss": 1.2428, + "step": 50727 + }, + { + "epoch": 0.61, + "grad_norm": 6.210702520596093, + "learning_rate": 1.628885974161419e-05, + "loss": 1.4821, + "step": 50730 + }, + { + "epoch": 0.61, + "grad_norm": 2.6292129733514127, + "learning_rate": 1.6288405533893505e-05, + "loss": 1.4771, + "step": 50733 + }, + { + "epoch": 0.61, + "grad_norm": 19.84392328424716, + "learning_rate": 1.628795130471275e-05, + "loss": 1.4455, + "step": 50736 + }, + { + "epoch": 0.61, + "grad_norm": 16.490973769686843, + "learning_rate": 1.6287497054073484e-05, + "loss": 1.7535, + "step": 50739 + }, + { + "epoch": 0.61, + "grad_norm": 13.151602452434737, + "learning_rate": 1.628704278197726e-05, + "loss": 1.2883, + "step": 50742 + }, + { + "epoch": 0.61, + "grad_norm": 5.377572802258229, + "learning_rate": 1.6286588488425616e-05, + "loss": 1.5448, + "step": 50745 + }, + { + "epoch": 0.61, + "grad_norm": 12.505669523505185, + "learning_rate": 1.6286134173420112e-05, + "loss": 1.4059, + "step": 50748 + }, + { + "epoch": 0.61, + "grad_norm": 37.271424835410926, + "learning_rate": 1.62856798369623e-05, + "loss": 1.8957, + "step": 50751 + }, + { + "epoch": 0.61, + "grad_norm": 7.308028598677842, + "learning_rate": 1.6285225479053727e-05, + "loss": 1.1885, + "step": 50754 + }, + { + "epoch": 0.61, + "grad_norm": 33.18166595970493, + "learning_rate": 1.6284771099695938e-05, + "loss": 0.9824, + "step": 50757 + }, + { + "epoch": 0.61, + "grad_norm": 14.08655291925772, + "learning_rate": 1.6284316698890493e-05, + "loss": 1.6249, + "step": 50760 + }, + { + "epoch": 0.61, + "grad_norm": 21.06129920147242, + "learning_rate": 1.628386227663894e-05, + "loss": 1.5269, + "step": 50763 + }, + { + "epoch": 0.61, + "grad_norm": 9.670316734556815, + "learning_rate": 1.6283407832942825e-05, + "loss": 2.0087, + "step": 50766 + }, + { + "epoch": 0.61, + "grad_norm": 8.18320435466256, + "learning_rate": 1.6282953367803706e-05, + "loss": 1.5244, + "step": 50769 + }, + { + "epoch": 0.61, + "grad_norm": 13.000646469243627, + "learning_rate": 1.6282498881223127e-05, + "loss": 1.3204, + "step": 50772 + }, + { + "epoch": 0.61, + "grad_norm": 10.221550735189393, + "learning_rate": 1.628204437320264e-05, + "loss": 1.9116, + "step": 50775 + }, + { + "epoch": 0.61, + "grad_norm": 6.432203422962207, + "learning_rate": 1.6281589843743803e-05, + "loss": 1.2716, + "step": 50778 + }, + { + "epoch": 0.61, + "grad_norm": 26.590031096297373, + "learning_rate": 1.628113529284816e-05, + "loss": 1.3006, + "step": 50781 + }, + { + "epoch": 0.61, + "grad_norm": 13.176203090812145, + "learning_rate": 1.6280680720517267e-05, + "loss": 1.2859, + "step": 50784 + }, + { + "epoch": 0.61, + "grad_norm": 13.577065409293946, + "learning_rate": 1.628022612675267e-05, + "loss": 1.3487, + "step": 50787 + }, + { + "epoch": 0.61, + "grad_norm": 18.277351358110902, + "learning_rate": 1.6279771511555926e-05, + "loss": 1.3649, + "step": 50790 + }, + { + "epoch": 0.61, + "grad_norm": 7.930972476582836, + "learning_rate": 1.6279316874928584e-05, + "loss": 1.3031, + "step": 50793 + }, + { + "epoch": 0.61, + "grad_norm": 23.098675705479433, + "learning_rate": 1.6278862216872192e-05, + "loss": 1.4615, + "step": 50796 + }, + { + "epoch": 0.61, + "grad_norm": 30.681752192135182, + "learning_rate": 1.627840753738831e-05, + "loss": 1.7288, + "step": 50799 + }, + { + "epoch": 0.61, + "grad_norm": 18.19977617660875, + "learning_rate": 1.6277952836478477e-05, + "loss": 1.3699, + "step": 50802 + }, + { + "epoch": 0.61, + "grad_norm": 3.9158872043936666, + "learning_rate": 1.6277498114144262e-05, + "loss": 1.3743, + "step": 50805 + }, + { + "epoch": 0.61, + "grad_norm": 14.042152413007639, + "learning_rate": 1.62770433703872e-05, + "loss": 1.2367, + "step": 50808 + }, + { + "epoch": 0.61, + "grad_norm": 15.10679380690374, + "learning_rate": 1.6276588605208852e-05, + "loss": 1.3569, + "step": 50811 + }, + { + "epoch": 0.61, + "grad_norm": 55.13768384515223, + "learning_rate": 1.627613381861077e-05, + "loss": 1.5903, + "step": 50814 + }, + { + "epoch": 0.61, + "grad_norm": 10.774730529185891, + "learning_rate": 1.6275679010594498e-05, + "loss": 1.3551, + "step": 50817 + }, + { + "epoch": 0.61, + "grad_norm": 17.35575634072597, + "learning_rate": 1.62752241811616e-05, + "loss": 1.9311, + "step": 50820 + }, + { + "epoch": 0.61, + "grad_norm": 19.90699043682575, + "learning_rate": 1.627476933031362e-05, + "loss": 1.8162, + "step": 50823 + }, + { + "epoch": 0.61, + "grad_norm": 22.514774674777804, + "learning_rate": 1.627431445805211e-05, + "loss": 1.3749, + "step": 50826 + }, + { + "epoch": 0.61, + "grad_norm": 15.84109506853987, + "learning_rate": 1.627385956437863e-05, + "loss": 1.424, + "step": 50829 + }, + { + "epoch": 0.61, + "grad_norm": 2.9452091331429533, + "learning_rate": 1.6273404649294725e-05, + "loss": 1.4659, + "step": 50832 + }, + { + "epoch": 0.61, + "grad_norm": 6.4853286028876935, + "learning_rate": 1.6272949712801947e-05, + "loss": 1.8649, + "step": 50835 + }, + { + "epoch": 0.61, + "grad_norm": 6.488938120638776, + "learning_rate": 1.6272494754901855e-05, + "loss": 1.1022, + "step": 50838 + }, + { + "epoch": 0.61, + "grad_norm": 19.209896345899335, + "learning_rate": 1.6272039775595994e-05, + "loss": 1.2874, + "step": 50841 + }, + { + "epoch": 0.61, + "grad_norm": 6.5089730418794085, + "learning_rate": 1.627158477488592e-05, + "loss": 1.4424, + "step": 50844 + }, + { + "epoch": 0.61, + "grad_norm": 5.4900665471757515, + "learning_rate": 1.6271129752773193e-05, + "loss": 1.6035, + "step": 50847 + }, + { + "epoch": 0.61, + "grad_norm": 8.83719297488335, + "learning_rate": 1.6270674709259354e-05, + "loss": 1.323, + "step": 50850 + }, + { + "epoch": 0.61, + "grad_norm": 34.70458161369105, + "learning_rate": 1.627021964434596e-05, + "loss": 1.2016, + "step": 50853 + }, + { + "epoch": 0.61, + "grad_norm": 11.744236026280129, + "learning_rate": 1.6269764558034565e-05, + "loss": 1.4256, + "step": 50856 + }, + { + "epoch": 0.61, + "grad_norm": 5.179050506338627, + "learning_rate": 1.6269309450326725e-05, + "loss": 1.628, + "step": 50859 + }, + { + "epoch": 0.61, + "grad_norm": 16.66369485233078, + "learning_rate": 1.6268854321223988e-05, + "loss": 1.1086, + "step": 50862 + }, + { + "epoch": 0.61, + "grad_norm": 14.05833142811824, + "learning_rate": 1.6268399170727912e-05, + "loss": 1.2636, + "step": 50865 + }, + { + "epoch": 0.61, + "grad_norm": 17.248914082229415, + "learning_rate": 1.626794399884004e-05, + "loss": 1.0873, + "step": 50868 + }, + { + "epoch": 0.61, + "grad_norm": 3.53911539161882, + "learning_rate": 1.626748880556194e-05, + "loss": 1.7414, + "step": 50871 + }, + { + "epoch": 0.61, + "grad_norm": 16.317473128245318, + "learning_rate": 1.6267033590895157e-05, + "loss": 1.2708, + "step": 50874 + }, + { + "epoch": 0.61, + "grad_norm": 18.242422158322277, + "learning_rate": 1.6266578354841245e-05, + "loss": 1.5524, + "step": 50877 + }, + { + "epoch": 0.61, + "grad_norm": 29.75959482532663, + "learning_rate": 1.6266123097401755e-05, + "loss": 1.5075, + "step": 50880 + }, + { + "epoch": 0.61, + "grad_norm": 5.603748988664907, + "learning_rate": 1.6265667818578246e-05, + "loss": 1.7441, + "step": 50883 + }, + { + "epoch": 0.61, + "grad_norm": 5.308029595462225, + "learning_rate": 1.6265212518372272e-05, + "loss": 1.4693, + "step": 50886 + }, + { + "epoch": 0.61, + "grad_norm": 28.68081024297366, + "learning_rate": 1.6264757196785384e-05, + "loss": 1.4563, + "step": 50889 + }, + { + "epoch": 0.61, + "grad_norm": 15.830010057244394, + "learning_rate": 1.6264301853819137e-05, + "loss": 1.4441, + "step": 50892 + }, + { + "epoch": 0.61, + "grad_norm": 40.15866515939401, + "learning_rate": 1.6263846489475083e-05, + "loss": 1.2867, + "step": 50895 + }, + { + "epoch": 0.61, + "grad_norm": 4.860425519661578, + "learning_rate": 1.6263391103754773e-05, + "loss": 1.3298, + "step": 50898 + }, + { + "epoch": 0.61, + "grad_norm": 123.72986043695892, + "learning_rate": 1.626293569665977e-05, + "loss": 1.3005, + "step": 50901 + }, + { + "epoch": 0.61, + "grad_norm": 21.1407679907157, + "learning_rate": 1.626248026819162e-05, + "loss": 1.4165, + "step": 50904 + }, + { + "epoch": 0.61, + "grad_norm": 29.0277902154862, + "learning_rate": 1.6262024818351883e-05, + "loss": 1.3075, + "step": 50907 + }, + { + "epoch": 0.61, + "grad_norm": 11.406242812234062, + "learning_rate": 1.6261569347142114e-05, + "loss": 1.3164, + "step": 50910 + }, + { + "epoch": 0.61, + "grad_norm": 7.546552006322803, + "learning_rate": 1.6261113854563858e-05, + "loss": 1.2923, + "step": 50913 + }, + { + "epoch": 0.61, + "grad_norm": 76.01921015135342, + "learning_rate": 1.6260658340618677e-05, + "loss": 1.0408, + "step": 50916 + }, + { + "epoch": 0.61, + "grad_norm": 9.552492390070768, + "learning_rate": 1.626020280530813e-05, + "loss": 1.1383, + "step": 50919 + }, + { + "epoch": 0.61, + "grad_norm": 38.841599055967684, + "learning_rate": 1.625974724863376e-05, + "loss": 1.2329, + "step": 50922 + }, + { + "epoch": 0.61, + "grad_norm": 9.801905381576345, + "learning_rate": 1.6259291670597127e-05, + "loss": 1.0611, + "step": 50925 + }, + { + "epoch": 0.61, + "grad_norm": 6.415951897850971, + "learning_rate": 1.625883607119979e-05, + "loss": 1.1445, + "step": 50928 + }, + { + "epoch": 0.61, + "grad_norm": 56.79867813177799, + "learning_rate": 1.6258380450443297e-05, + "loss": 1.7911, + "step": 50931 + }, + { + "epoch": 0.61, + "grad_norm": 7.423557123507699, + "learning_rate": 1.6257924808329208e-05, + "loss": 1.5115, + "step": 50934 + }, + { + "epoch": 0.61, + "grad_norm": 14.524213450782495, + "learning_rate": 1.625746914485907e-05, + "loss": 1.3539, + "step": 50937 + }, + { + "epoch": 0.61, + "grad_norm": 26.407380489097434, + "learning_rate": 1.625701346003445e-05, + "loss": 1.315, + "step": 50940 + }, + { + "epoch": 0.61, + "grad_norm": 52.06151335770715, + "learning_rate": 1.6256557753856896e-05, + "loss": 1.4838, + "step": 50943 + }, + { + "epoch": 0.61, + "grad_norm": 26.953795296214057, + "learning_rate": 1.6256102026327963e-05, + "loss": 1.2852, + "step": 50946 + }, + { + "epoch": 0.61, + "grad_norm": 45.790158193596426, + "learning_rate": 1.6255646277449203e-05, + "loss": 1.8844, + "step": 50949 + }, + { + "epoch": 0.61, + "grad_norm": 11.22016023422398, + "learning_rate": 1.6255190507222183e-05, + "loss": 1.6441, + "step": 50952 + }, + { + "epoch": 0.61, + "grad_norm": 8.014351945576713, + "learning_rate": 1.6254734715648445e-05, + "loss": 1.3843, + "step": 50955 + }, + { + "epoch": 0.61, + "grad_norm": 8.478893514912048, + "learning_rate": 1.6254278902729556e-05, + "loss": 1.8222, + "step": 50958 + }, + { + "epoch": 0.61, + "grad_norm": 20.82777937152293, + "learning_rate": 1.6253823068467062e-05, + "loss": 1.4749, + "step": 50961 + }, + { + "epoch": 0.61, + "grad_norm": 26.97295127481564, + "learning_rate": 1.625336721286252e-05, + "loss": 1.524, + "step": 50964 + }, + { + "epoch": 0.61, + "grad_norm": 23.294167339518104, + "learning_rate": 1.6252911335917493e-05, + "loss": 1.4134, + "step": 50967 + }, + { + "epoch": 0.61, + "grad_norm": 14.648305224058078, + "learning_rate": 1.6252455437633532e-05, + "loss": 1.4183, + "step": 50970 + }, + { + "epoch": 0.61, + "grad_norm": 2.5883895965189576, + "learning_rate": 1.625199951801219e-05, + "loss": 1.4396, + "step": 50973 + }, + { + "epoch": 0.61, + "grad_norm": 16.24992003909471, + "learning_rate": 1.6251543577055025e-05, + "loss": 1.1842, + "step": 50976 + }, + { + "epoch": 0.61, + "grad_norm": 56.69767662388028, + "learning_rate": 1.6251087614763593e-05, + "loss": 1.531, + "step": 50979 + }, + { + "epoch": 0.61, + "grad_norm": 51.91641939292093, + "learning_rate": 1.6250631631139456e-05, + "loss": 1.4511, + "step": 50982 + }, + { + "epoch": 0.61, + "grad_norm": 26.184275245590783, + "learning_rate": 1.625017562618416e-05, + "loss": 1.3775, + "step": 50985 + }, + { + "epoch": 0.61, + "grad_norm": 7.1312144948484715, + "learning_rate": 1.6249719599899267e-05, + "loss": 1.6009, + "step": 50988 + }, + { + "epoch": 0.61, + "grad_norm": 14.979567635963559, + "learning_rate": 1.6249263552286335e-05, + "loss": 1.3136, + "step": 50991 + }, + { + "epoch": 0.61, + "grad_norm": 94.88897618402306, + "learning_rate": 1.6248807483346915e-05, + "loss": 1.4729, + "step": 50994 + }, + { + "epoch": 0.61, + "grad_norm": 21.022407544681307, + "learning_rate": 1.6248351393082564e-05, + "loss": 1.2916, + "step": 50997 + }, + { + "epoch": 0.61, + "grad_norm": 7.613298623356686, + "learning_rate": 1.6247895281494846e-05, + "loss": 1.3928, + "step": 51000 + }, + { + "epoch": 0.61, + "grad_norm": 4.313712023564359, + "learning_rate": 1.6247439148585303e-05, + "loss": 1.2509, + "step": 51003 + }, + { + "epoch": 0.61, + "grad_norm": 11.783252889243624, + "learning_rate": 1.6246982994355507e-05, + "loss": 1.1586, + "step": 51006 + }, + { + "epoch": 0.61, + "grad_norm": 14.319284196872523, + "learning_rate": 1.6246526818807006e-05, + "loss": 1.6406, + "step": 51009 + }, + { + "epoch": 0.61, + "grad_norm": 16.52834934038019, + "learning_rate": 1.624607062194136e-05, + "loss": 1.4893, + "step": 51012 + }, + { + "epoch": 0.61, + "grad_norm": 13.828901240862288, + "learning_rate": 1.6245614403760125e-05, + "loss": 1.6051, + "step": 51015 + }, + { + "epoch": 0.61, + "grad_norm": 23.406000164578877, + "learning_rate": 1.624515816426486e-05, + "loss": 1.2187, + "step": 51018 + }, + { + "epoch": 0.61, + "grad_norm": 11.955955582697083, + "learning_rate": 1.6244701903457115e-05, + "loss": 0.9423, + "step": 51021 + }, + { + "epoch": 0.61, + "grad_norm": 8.377339083493704, + "learning_rate": 1.6244245621338453e-05, + "loss": 1.2262, + "step": 51024 + }, + { + "epoch": 0.61, + "grad_norm": 11.970482128083272, + "learning_rate": 1.6243789317910434e-05, + "loss": 1.4677, + "step": 51027 + }, + { + "epoch": 0.61, + "grad_norm": 14.617369788751153, + "learning_rate": 1.6243332993174603e-05, + "loss": 1.4537, + "step": 51030 + }, + { + "epoch": 0.61, + "grad_norm": 6.96624651223764, + "learning_rate": 1.624287664713253e-05, + "loss": 1.1136, + "step": 51033 + }, + { + "epoch": 0.61, + "grad_norm": 48.480475008885236, + "learning_rate": 1.624242027978577e-05, + "loss": 1.5823, + "step": 51036 + }, + { + "epoch": 0.61, + "grad_norm": 10.95502075232533, + "learning_rate": 1.6241963891135873e-05, + "loss": 1.3727, + "step": 51039 + }, + { + "epoch": 0.61, + "grad_norm": 50.49355783211072, + "learning_rate": 1.6241507481184407e-05, + "loss": 1.4833, + "step": 51042 + }, + { + "epoch": 0.61, + "grad_norm": 12.406462230402237, + "learning_rate": 1.624105104993292e-05, + "loss": 1.6854, + "step": 51045 + }, + { + "epoch": 0.61, + "grad_norm": 17.388377577359734, + "learning_rate": 1.6240594597382978e-05, + "loss": 1.4232, + "step": 51048 + }, + { + "epoch": 0.61, + "grad_norm": 12.26356491803516, + "learning_rate": 1.6240138123536128e-05, + "loss": 1.4846, + "step": 51051 + }, + { + "epoch": 0.61, + "grad_norm": 7.671937684585517, + "learning_rate": 1.623968162839394e-05, + "loss": 1.7242, + "step": 51054 + }, + { + "epoch": 0.61, + "grad_norm": 12.030586921268426, + "learning_rate": 1.6239225111957963e-05, + "loss": 1.7109, + "step": 51057 + }, + { + "epoch": 0.61, + "grad_norm": 6.960318506866319, + "learning_rate": 1.623876857422976e-05, + "loss": 1.4004, + "step": 51060 + }, + { + "epoch": 0.61, + "grad_norm": 5.520662364270283, + "learning_rate": 1.6238312015210885e-05, + "loss": 1.1322, + "step": 51063 + }, + { + "epoch": 0.61, + "grad_norm": 12.279869426390635, + "learning_rate": 1.62378554349029e-05, + "loss": 1.1846, + "step": 51066 + }, + { + "epoch": 0.61, + "grad_norm": 18.715263861122335, + "learning_rate": 1.623739883330736e-05, + "loss": 1.2539, + "step": 51069 + }, + { + "epoch": 0.61, + "grad_norm": 22.0756358690799, + "learning_rate": 1.6236942210425825e-05, + "loss": 1.6018, + "step": 51072 + }, + { + "epoch": 0.61, + "grad_norm": 3.048554704245988, + "learning_rate": 1.6236485566259853e-05, + "loss": 1.1829, + "step": 51075 + }, + { + "epoch": 0.61, + "grad_norm": 5.479380132004395, + "learning_rate": 1.6236028900811e-05, + "loss": 1.2566, + "step": 51078 + }, + { + "epoch": 0.61, + "grad_norm": 26.155755683657247, + "learning_rate": 1.623557221408083e-05, + "loss": 1.3874, + "step": 51081 + }, + { + "epoch": 0.61, + "grad_norm": 47.62176828957738, + "learning_rate": 1.6235115506070894e-05, + "loss": 1.3302, + "step": 51084 + }, + { + "epoch": 0.61, + "grad_norm": 6.09267074139035, + "learning_rate": 1.623465877678276e-05, + "loss": 0.9859, + "step": 51087 + }, + { + "epoch": 0.61, + "grad_norm": 27.481951065582177, + "learning_rate": 1.623420202621798e-05, + "loss": 1.3051, + "step": 51090 + }, + { + "epoch": 0.61, + "grad_norm": 48.4235601499328, + "learning_rate": 1.623374525437811e-05, + "loss": 1.581, + "step": 51093 + }, + { + "epoch": 0.61, + "grad_norm": 20.186067602584416, + "learning_rate": 1.6233288461264717e-05, + "loss": 1.3191, + "step": 51096 + }, + { + "epoch": 0.61, + "grad_norm": 34.87077180455939, + "learning_rate": 1.6232831646879353e-05, + "loss": 1.4211, + "step": 51099 + }, + { + "epoch": 0.61, + "grad_norm": 31.763130733885557, + "learning_rate": 1.6232374811223582e-05, + "loss": 0.9074, + "step": 51102 + }, + { + "epoch": 0.61, + "grad_norm": 2.1392705765507736, + "learning_rate": 1.6231917954298956e-05, + "loss": 1.5523, + "step": 51105 + }, + { + "epoch": 0.61, + "grad_norm": 8.198979712188539, + "learning_rate": 1.6231461076107044e-05, + "loss": 1.292, + "step": 51108 + }, + { + "epoch": 0.61, + "grad_norm": 8.139862916664455, + "learning_rate": 1.6231004176649397e-05, + "loss": 1.1054, + "step": 51111 + }, + { + "epoch": 0.61, + "grad_norm": 70.58318271639835, + "learning_rate": 1.6230547255927578e-05, + "loss": 1.5084, + "step": 51114 + }, + { + "epoch": 0.61, + "grad_norm": 14.159690618955025, + "learning_rate": 1.6230090313943148e-05, + "loss": 1.4332, + "step": 51117 + }, + { + "epoch": 0.61, + "grad_norm": 15.07686042780713, + "learning_rate": 1.622963335069766e-05, + "loss": 1.2065, + "step": 51120 + }, + { + "epoch": 0.61, + "grad_norm": 5.626597551096252, + "learning_rate": 1.6229176366192677e-05, + "loss": 1.3523, + "step": 51123 + }, + { + "epoch": 0.61, + "grad_norm": 9.304681551058296, + "learning_rate": 1.6228719360429763e-05, + "loss": 1.9227, + "step": 51126 + }, + { + "epoch": 0.61, + "grad_norm": 29.530911783536087, + "learning_rate": 1.6228262333410472e-05, + "loss": 1.6009, + "step": 51129 + }, + { + "epoch": 0.61, + "grad_norm": 10.790611579392108, + "learning_rate": 1.6227805285136363e-05, + "loss": 1.7038, + "step": 51132 + }, + { + "epoch": 0.61, + "grad_norm": 7.082956482688607, + "learning_rate": 1.6227348215609e-05, + "loss": 1.7101, + "step": 51135 + }, + { + "epoch": 0.61, + "grad_norm": 15.567013588032207, + "learning_rate": 1.622689112482994e-05, + "loss": 1.2561, + "step": 51138 + }, + { + "epoch": 0.61, + "grad_norm": 88.39771953088652, + "learning_rate": 1.622643401280074e-05, + "loss": 1.5159, + "step": 51141 + }, + { + "epoch": 0.61, + "grad_norm": 29.51459136716285, + "learning_rate": 1.6225976879522967e-05, + "loss": 1.1846, + "step": 51144 + }, + { + "epoch": 0.62, + "grad_norm": 11.391082084783935, + "learning_rate": 1.6225519724998177e-05, + "loss": 1.6194, + "step": 51147 + }, + { + "epoch": 0.62, + "grad_norm": 28.518089104321025, + "learning_rate": 1.622506254922793e-05, + "loss": 1.5599, + "step": 51150 + }, + { + "epoch": 0.62, + "grad_norm": 39.40248176793188, + "learning_rate": 1.6224605352213786e-05, + "loss": 1.3154, + "step": 51153 + }, + { + "epoch": 0.62, + "grad_norm": 11.315133679718976, + "learning_rate": 1.622414813395731e-05, + "loss": 1.4997, + "step": 51156 + }, + { + "epoch": 0.62, + "grad_norm": 6.222801877629206, + "learning_rate": 1.6223690894460055e-05, + "loss": 1.1127, + "step": 51159 + }, + { + "epoch": 0.62, + "grad_norm": 11.63762381214965, + "learning_rate": 1.6223233633723588e-05, + "loss": 1.6427, + "step": 51162 + }, + { + "epoch": 0.62, + "grad_norm": 13.272324433748118, + "learning_rate": 1.622277635174946e-05, + "loss": 1.6392, + "step": 51165 + }, + { + "epoch": 0.62, + "grad_norm": 16.87897576260947, + "learning_rate": 1.622231904853924e-05, + "loss": 1.1146, + "step": 51168 + }, + { + "epoch": 0.62, + "grad_norm": 64.69541139289842, + "learning_rate": 1.6221861724094492e-05, + "loss": 1.5922, + "step": 51171 + }, + { + "epoch": 0.62, + "grad_norm": 14.137967024888004, + "learning_rate": 1.6221404378416765e-05, + "loss": 1.2724, + "step": 51174 + }, + { + "epoch": 0.62, + "grad_norm": 16.199971848884726, + "learning_rate": 1.6220947011507625e-05, + "loss": 1.453, + "step": 51177 + }, + { + "epoch": 0.62, + "grad_norm": 10.658222975151697, + "learning_rate": 1.6220489623368637e-05, + "loss": 1.3752, + "step": 51180 + }, + { + "epoch": 0.62, + "grad_norm": 22.843209088822594, + "learning_rate": 1.622003221400136e-05, + "loss": 1.6967, + "step": 51183 + }, + { + "epoch": 0.62, + "grad_norm": 9.19509986007558, + "learning_rate": 1.621957478340735e-05, + "loss": 1.4026, + "step": 51186 + }, + { + "epoch": 0.62, + "grad_norm": 11.181897070170017, + "learning_rate": 1.6219117331588173e-05, + "loss": 1.3027, + "step": 51189 + }, + { + "epoch": 0.62, + "grad_norm": 4.022746422983988, + "learning_rate": 1.6218659858545388e-05, + "loss": 1.46, + "step": 51192 + }, + { + "epoch": 0.62, + "grad_norm": 4.908020683046851, + "learning_rate": 1.6218202364280558e-05, + "loss": 1.4372, + "step": 51195 + }, + { + "epoch": 0.62, + "grad_norm": 10.78853498706446, + "learning_rate": 1.6217744848795243e-05, + "loss": 1.3736, + "step": 51198 + }, + { + "epoch": 0.62, + "grad_norm": 24.853457419665492, + "learning_rate": 1.6217287312091003e-05, + "loss": 1.5271, + "step": 51201 + }, + { + "epoch": 0.62, + "grad_norm": 23.517352123991955, + "learning_rate": 1.62168297541694e-05, + "loss": 1.3241, + "step": 51204 + }, + { + "epoch": 0.62, + "grad_norm": 12.52191977297917, + "learning_rate": 1.6216372175032e-05, + "loss": 1.475, + "step": 51207 + }, + { + "epoch": 0.62, + "grad_norm": 21.450859345893363, + "learning_rate": 1.6215914574680362e-05, + "loss": 1.2827, + "step": 51210 + }, + { + "epoch": 0.62, + "grad_norm": 9.555622859051972, + "learning_rate": 1.621545695311604e-05, + "loss": 1.3109, + "step": 51213 + }, + { + "epoch": 0.62, + "grad_norm": 18.57507709322342, + "learning_rate": 1.6214999310340607e-05, + "loss": 1.4371, + "step": 51216 + }, + { + "epoch": 0.62, + "grad_norm": 4.329350669080439, + "learning_rate": 1.6214541646355617e-05, + "loss": 1.3927, + "step": 51219 + }, + { + "epoch": 0.62, + "grad_norm": 10.1107717535409, + "learning_rate": 1.6214083961162635e-05, + "loss": 1.3674, + "step": 51222 + }, + { + "epoch": 0.62, + "grad_norm": 12.43944797057577, + "learning_rate": 1.6213626254763226e-05, + "loss": 1.68, + "step": 51225 + }, + { + "epoch": 0.62, + "grad_norm": 6.916543203731025, + "learning_rate": 1.6213168527158946e-05, + "loss": 1.2192, + "step": 51228 + }, + { + "epoch": 0.62, + "grad_norm": 14.166147442106265, + "learning_rate": 1.621271077835136e-05, + "loss": 1.3723, + "step": 51231 + }, + { + "epoch": 0.62, + "grad_norm": 74.91033825408313, + "learning_rate": 1.6212253008342033e-05, + "loss": 0.9764, + "step": 51234 + }, + { + "epoch": 0.62, + "grad_norm": 13.048730922780926, + "learning_rate": 1.621179521713252e-05, + "loss": 1.3741, + "step": 51237 + }, + { + "epoch": 0.62, + "grad_norm": 20.430614506035692, + "learning_rate": 1.621133740472439e-05, + "loss": 1.413, + "step": 51240 + }, + { + "epoch": 0.62, + "grad_norm": 4.633610335130575, + "learning_rate": 1.6210879571119202e-05, + "loss": 1.207, + "step": 51243 + }, + { + "epoch": 0.62, + "grad_norm": 28.90470597134922, + "learning_rate": 1.6210421716318522e-05, + "loss": 1.2325, + "step": 51246 + }, + { + "epoch": 0.62, + "grad_norm": 29.16922126269497, + "learning_rate": 1.6209963840323902e-05, + "loss": 1.6709, + "step": 51249 + }, + { + "epoch": 0.62, + "grad_norm": 12.812219811050358, + "learning_rate": 1.6209505943136917e-05, + "loss": 1.2676, + "step": 51252 + }, + { + "epoch": 0.62, + "grad_norm": 23.71024181893112, + "learning_rate": 1.6209048024759127e-05, + "loss": 1.7045, + "step": 51255 + }, + { + "epoch": 0.62, + "grad_norm": 9.685090071921406, + "learning_rate": 1.6208590085192092e-05, + "loss": 1.409, + "step": 51258 + }, + { + "epoch": 0.62, + "grad_norm": 4.882347398057046, + "learning_rate": 1.620813212443737e-05, + "loss": 1.109, + "step": 51261 + }, + { + "epoch": 0.62, + "grad_norm": 8.215297460227458, + "learning_rate": 1.6207674142496535e-05, + "loss": 1.3467, + "step": 51264 + }, + { + "epoch": 0.62, + "grad_norm": 37.01926714358674, + "learning_rate": 1.6207216139371143e-05, + "loss": 1.2065, + "step": 51267 + }, + { + "epoch": 0.62, + "grad_norm": 47.05221792340708, + "learning_rate": 1.620675811506276e-05, + "loss": 1.8572, + "step": 51270 + }, + { + "epoch": 0.62, + "grad_norm": 15.488059327796352, + "learning_rate": 1.6206300069572943e-05, + "loss": 1.1276, + "step": 51273 + }, + { + "epoch": 0.62, + "grad_norm": 18.250668050619584, + "learning_rate": 1.6205842002903258e-05, + "loss": 1.2224, + "step": 51276 + }, + { + "epoch": 0.62, + "grad_norm": 15.254688842242919, + "learning_rate": 1.6205383915055276e-05, + "loss": 1.0983, + "step": 51279 + }, + { + "epoch": 0.62, + "grad_norm": 3.007789235759844, + "learning_rate": 1.620492580603055e-05, + "loss": 1.6058, + "step": 51282 + }, + { + "epoch": 0.62, + "grad_norm": 11.968549737498211, + "learning_rate": 1.6204467675830645e-05, + "loss": 1.4873, + "step": 51285 + }, + { + "epoch": 0.62, + "grad_norm": 12.977780526808788, + "learning_rate": 1.620400952445713e-05, + "loss": 1.4725, + "step": 51288 + }, + { + "epoch": 0.62, + "grad_norm": 10.21165333596083, + "learning_rate": 1.6203551351911565e-05, + "loss": 1.3979, + "step": 51291 + }, + { + "epoch": 0.62, + "grad_norm": 8.288754687455565, + "learning_rate": 1.6203093158195514e-05, + "loss": 1.3964, + "step": 51294 + }, + { + "epoch": 0.62, + "grad_norm": 6.711871010122546, + "learning_rate": 1.6202634943310537e-05, + "loss": 1.4033, + "step": 51297 + }, + { + "epoch": 0.62, + "grad_norm": 2.656084682977241, + "learning_rate": 1.6202176707258203e-05, + "loss": 1.5103, + "step": 51300 + }, + { + "epoch": 0.62, + "grad_norm": 7.877281467749434, + "learning_rate": 1.6201718450040074e-05, + "loss": 1.8834, + "step": 51303 + }, + { + "epoch": 0.62, + "grad_norm": 11.088697195220332, + "learning_rate": 1.6201260171657716e-05, + "loss": 1.3685, + "step": 51306 + }, + { + "epoch": 0.62, + "grad_norm": 15.697988547284098, + "learning_rate": 1.620080187211269e-05, + "loss": 1.3358, + "step": 51309 + }, + { + "epoch": 0.62, + "grad_norm": 5.479395802739568, + "learning_rate": 1.620034355140656e-05, + "loss": 1.3329, + "step": 51312 + }, + { + "epoch": 0.62, + "grad_norm": 7.611010331687193, + "learning_rate": 1.619988520954089e-05, + "loss": 1.2888, + "step": 51315 + }, + { + "epoch": 0.62, + "grad_norm": 20.583843133545763, + "learning_rate": 1.6199426846517247e-05, + "loss": 1.8246, + "step": 51318 + }, + { + "epoch": 0.62, + "grad_norm": 30.289125955374185, + "learning_rate": 1.619896846233719e-05, + "loss": 1.2975, + "step": 51321 + }, + { + "epoch": 0.62, + "grad_norm": 9.949578438518868, + "learning_rate": 1.6198510057002287e-05, + "loss": 1.194, + "step": 51324 + }, + { + "epoch": 0.62, + "grad_norm": 11.074711864326835, + "learning_rate": 1.6198051630514103e-05, + "loss": 1.2715, + "step": 51327 + }, + { + "epoch": 0.62, + "grad_norm": 30.41795296638768, + "learning_rate": 1.6197593182874203e-05, + "loss": 1.4679, + "step": 51330 + }, + { + "epoch": 0.62, + "grad_norm": 4.995675181086535, + "learning_rate": 1.6197134714084147e-05, + "loss": 1.5426, + "step": 51333 + }, + { + "epoch": 0.62, + "grad_norm": 10.804665842027028, + "learning_rate": 1.6196676224145505e-05, + "loss": 1.2478, + "step": 51336 + }, + { + "epoch": 0.62, + "grad_norm": 30.178004255824934, + "learning_rate": 1.6196217713059835e-05, + "loss": 1.2986, + "step": 51339 + }, + { + "epoch": 0.62, + "grad_norm": 13.23066809815154, + "learning_rate": 1.619575918082871e-05, + "loss": 1.3617, + "step": 51342 + }, + { + "epoch": 0.62, + "grad_norm": 11.139976735562206, + "learning_rate": 1.6195300627453693e-05, + "loss": 1.5998, + "step": 51345 + }, + { + "epoch": 0.62, + "grad_norm": 3.071251321548621, + "learning_rate": 1.6194842052936338e-05, + "loss": 1.5123, + "step": 51348 + }, + { + "epoch": 0.62, + "grad_norm": 17.70364016519204, + "learning_rate": 1.6194383457278227e-05, + "loss": 1.4263, + "step": 51351 + }, + { + "epoch": 0.62, + "grad_norm": 34.4215204864793, + "learning_rate": 1.619392484048091e-05, + "loss": 1.1026, + "step": 51354 + }, + { + "epoch": 0.62, + "grad_norm": 6.830231744926127, + "learning_rate": 1.619346620254596e-05, + "loss": 1.3512, + "step": 51357 + }, + { + "epoch": 0.62, + "grad_norm": 18.22338731900697, + "learning_rate": 1.6193007543474945e-05, + "loss": 1.1906, + "step": 51360 + }, + { + "epoch": 0.62, + "grad_norm": 20.274080881429203, + "learning_rate": 1.6192548863269423e-05, + "loss": 1.1541, + "step": 51363 + }, + { + "epoch": 0.62, + "grad_norm": 4.4168370461123985, + "learning_rate": 1.619209016193096e-05, + "loss": 1.5972, + "step": 51366 + }, + { + "epoch": 0.62, + "grad_norm": 15.098653903085694, + "learning_rate": 1.6191631439461128e-05, + "loss": 1.5138, + "step": 51369 + }, + { + "epoch": 0.62, + "grad_norm": 14.41886427736263, + "learning_rate": 1.6191172695861485e-05, + "loss": 1.3439, + "step": 51372 + }, + { + "epoch": 0.62, + "grad_norm": 50.81435001331154, + "learning_rate": 1.6190713931133598e-05, + "loss": 1.41, + "step": 51375 + }, + { + "epoch": 0.62, + "grad_norm": 15.762832615773995, + "learning_rate": 1.619025514527904e-05, + "loss": 1.7983, + "step": 51378 + }, + { + "epoch": 0.62, + "grad_norm": 23.36854310094654, + "learning_rate": 1.6189796338299366e-05, + "loss": 1.2851, + "step": 51381 + }, + { + "epoch": 0.62, + "grad_norm": 25.373955016238103, + "learning_rate": 1.618933751019615e-05, + "loss": 1.4459, + "step": 51384 + }, + { + "epoch": 0.62, + "grad_norm": 15.401015458052273, + "learning_rate": 1.618887866097095e-05, + "loss": 1.3766, + "step": 51387 + }, + { + "epoch": 0.62, + "grad_norm": 12.151881557350876, + "learning_rate": 1.618841979062534e-05, + "loss": 1.4071, + "step": 51390 + }, + { + "epoch": 0.62, + "grad_norm": 5.700480289341004, + "learning_rate": 1.618796089916088e-05, + "loss": 1.633, + "step": 51393 + }, + { + "epoch": 0.62, + "grad_norm": 12.679988577687984, + "learning_rate": 1.618750198657914e-05, + "loss": 1.2964, + "step": 51396 + }, + { + "epoch": 0.62, + "grad_norm": 18.17335389299421, + "learning_rate": 1.6187043052881687e-05, + "loss": 1.3898, + "step": 51399 + }, + { + "epoch": 0.62, + "grad_norm": 2.632571870090598, + "learning_rate": 1.618658409807008e-05, + "loss": 1.601, + "step": 51402 + }, + { + "epoch": 0.62, + "grad_norm": 7.257060436048349, + "learning_rate": 1.6186125122145894e-05, + "loss": 1.426, + "step": 51405 + }, + { + "epoch": 0.62, + "grad_norm": 53.8548939948946, + "learning_rate": 1.6185666125110687e-05, + "loss": 0.9771, + "step": 51408 + }, + { + "epoch": 0.62, + "grad_norm": 26.704049592376954, + "learning_rate": 1.6185207106966032e-05, + "loss": 1.3041, + "step": 51411 + }, + { + "epoch": 0.62, + "grad_norm": 7.5006213200974825, + "learning_rate": 1.6184748067713493e-05, + "loss": 1.2376, + "step": 51414 + }, + { + "epoch": 0.62, + "grad_norm": 68.3530506646943, + "learning_rate": 1.6184289007354635e-05, + "loss": 1.5078, + "step": 51417 + }, + { + "epoch": 0.62, + "grad_norm": 17.60585356973894, + "learning_rate": 1.618382992589103e-05, + "loss": 1.5446, + "step": 51420 + }, + { + "epoch": 0.62, + "grad_norm": 35.39172384461626, + "learning_rate": 1.6183370823324237e-05, + "loss": 1.5755, + "step": 51423 + }, + { + "epoch": 0.62, + "grad_norm": 10.182331500738961, + "learning_rate": 1.618291169965583e-05, + "loss": 1.1249, + "step": 51426 + }, + { + "epoch": 0.62, + "grad_norm": 14.10264928503059, + "learning_rate": 1.618245255488737e-05, + "loss": 1.5925, + "step": 51429 + }, + { + "epoch": 0.62, + "grad_norm": 12.981111381407938, + "learning_rate": 1.6181993389020426e-05, + "loss": 1.8745, + "step": 51432 + }, + { + "epoch": 0.62, + "grad_norm": 41.335718899704986, + "learning_rate": 1.6181534202056566e-05, + "loss": 1.699, + "step": 51435 + }, + { + "epoch": 0.62, + "grad_norm": 6.29051749844175, + "learning_rate": 1.6181074993997354e-05, + "loss": 1.5028, + "step": 51438 + }, + { + "epoch": 0.62, + "grad_norm": 23.791610731990147, + "learning_rate": 1.6180615764844365e-05, + "loss": 1.4845, + "step": 51441 + }, + { + "epoch": 0.62, + "grad_norm": 23.764263626654824, + "learning_rate": 1.6180156514599154e-05, + "loss": 1.6614, + "step": 51444 + }, + { + "epoch": 0.62, + "grad_norm": 14.092271215089777, + "learning_rate": 1.61796972432633e-05, + "loss": 1.4512, + "step": 51447 + }, + { + "epoch": 0.62, + "grad_norm": 9.669026631995173, + "learning_rate": 1.6179237950838363e-05, + "loss": 1.3395, + "step": 51450 + }, + { + "epoch": 0.62, + "grad_norm": 7.024449575225342, + "learning_rate": 1.6178778637325914e-05, + "loss": 1.5755, + "step": 51453 + }, + { + "epoch": 0.62, + "grad_norm": 42.256274299225865, + "learning_rate": 1.6178319302727518e-05, + "loss": 1.4294, + "step": 51456 + }, + { + "epoch": 0.62, + "grad_norm": 21.544128050492677, + "learning_rate": 1.6177859947044744e-05, + "loss": 1.7476, + "step": 51459 + }, + { + "epoch": 0.62, + "grad_norm": 11.484840800347607, + "learning_rate": 1.6177400570279157e-05, + "loss": 1.3645, + "step": 51462 + }, + { + "epoch": 0.62, + "grad_norm": 109.71601366722723, + "learning_rate": 1.6176941172432326e-05, + "loss": 1.6106, + "step": 51465 + }, + { + "epoch": 0.62, + "grad_norm": 50.07273889120983, + "learning_rate": 1.6176481753505827e-05, + "loss": 1.7115, + "step": 51468 + }, + { + "epoch": 0.62, + "grad_norm": 13.747219331431586, + "learning_rate": 1.6176022313501215e-05, + "loss": 1.4528, + "step": 51471 + }, + { + "epoch": 0.62, + "grad_norm": 19.940178380570423, + "learning_rate": 1.6175562852420067e-05, + "loss": 1.0228, + "step": 51474 + }, + { + "epoch": 0.62, + "grad_norm": 11.578007342392986, + "learning_rate": 1.617510337026394e-05, + "loss": 1.2007, + "step": 51477 + }, + { + "epoch": 0.62, + "grad_norm": 21.120787246254025, + "learning_rate": 1.6174643867034418e-05, + "loss": 1.9593, + "step": 51480 + }, + { + "epoch": 0.62, + "grad_norm": 10.120609035503966, + "learning_rate": 1.6174184342733056e-05, + "loss": 1.4574, + "step": 51483 + }, + { + "epoch": 0.62, + "grad_norm": 10.472650936910483, + "learning_rate": 1.617372479736143e-05, + "loss": 1.1418, + "step": 51486 + }, + { + "epoch": 0.62, + "grad_norm": 24.03132621746885, + "learning_rate": 1.61732652309211e-05, + "loss": 1.5652, + "step": 51489 + }, + { + "epoch": 0.62, + "grad_norm": 16.83758488135926, + "learning_rate": 1.6172805643413646e-05, + "loss": 1.3594, + "step": 51492 + }, + { + "epoch": 0.62, + "grad_norm": 6.21888663003018, + "learning_rate": 1.6172346034840624e-05, + "loss": 1.2754, + "step": 51495 + }, + { + "epoch": 0.62, + "grad_norm": 10.753810696372032, + "learning_rate": 1.617188640520361e-05, + "loss": 1.3014, + "step": 51498 + }, + { + "epoch": 0.62, + "grad_norm": 4.121586692582014, + "learning_rate": 1.6171426754504173e-05, + "loss": 1.3175, + "step": 51501 + }, + { + "epoch": 0.62, + "grad_norm": 25.560894496042994, + "learning_rate": 1.617096708274388e-05, + "loss": 1.3261, + "step": 51504 + }, + { + "epoch": 0.62, + "grad_norm": 18.5533849503365, + "learning_rate": 1.6170507389924295e-05, + "loss": 1.6074, + "step": 51507 + }, + { + "epoch": 0.62, + "grad_norm": 12.634534114158976, + "learning_rate": 1.617004767604699e-05, + "loss": 1.439, + "step": 51510 + }, + { + "epoch": 0.62, + "grad_norm": 25.61912809543682, + "learning_rate": 1.616958794111354e-05, + "loss": 1.3997, + "step": 51513 + }, + { + "epoch": 0.62, + "grad_norm": 8.039620225070193, + "learning_rate": 1.6169128185125506e-05, + "loss": 1.3724, + "step": 51516 + }, + { + "epoch": 0.62, + "grad_norm": 16.461757437169346, + "learning_rate": 1.6168668408084457e-05, + "loss": 1.7686, + "step": 51519 + }, + { + "epoch": 0.62, + "grad_norm": 8.523148017383937, + "learning_rate": 1.616820860999197e-05, + "loss": 1.2835, + "step": 51522 + }, + { + "epoch": 0.62, + "grad_norm": 9.156404188609352, + "learning_rate": 1.6167748790849603e-05, + "loss": 1.5112, + "step": 51525 + }, + { + "epoch": 0.62, + "grad_norm": 10.071103034333776, + "learning_rate": 1.616728895065894e-05, + "loss": 1.5704, + "step": 51528 + }, + { + "epoch": 0.62, + "grad_norm": 10.679501781240097, + "learning_rate": 1.616682908942153e-05, + "loss": 1.2727, + "step": 51531 + }, + { + "epoch": 0.62, + "grad_norm": 5.482829140712939, + "learning_rate": 1.616636920713896e-05, + "loss": 1.4593, + "step": 51534 + }, + { + "epoch": 0.62, + "grad_norm": 33.79028945446007, + "learning_rate": 1.6165909303812793e-05, + "loss": 1.2784, + "step": 51537 + }, + { + "epoch": 0.62, + "grad_norm": 19.581809546818917, + "learning_rate": 1.6165449379444597e-05, + "loss": 1.6511, + "step": 51540 + }, + { + "epoch": 0.62, + "grad_norm": 17.550756891741905, + "learning_rate": 1.6164989434035944e-05, + "loss": 1.4005, + "step": 51543 + }, + { + "epoch": 0.62, + "grad_norm": 18.411702873295305, + "learning_rate": 1.61645294675884e-05, + "loss": 1.5208, + "step": 51546 + }, + { + "epoch": 0.62, + "grad_norm": 8.363176418657524, + "learning_rate": 1.616406948010354e-05, + "loss": 1.4876, + "step": 51549 + }, + { + "epoch": 0.62, + "grad_norm": 19.03746694719282, + "learning_rate": 1.616360947158293e-05, + "loss": 1.2028, + "step": 51552 + }, + { + "epoch": 0.62, + "grad_norm": 9.770930411740483, + "learning_rate": 1.6163149442028143e-05, + "loss": 1.5443, + "step": 51555 + }, + { + "epoch": 0.62, + "grad_norm": 26.87032629297665, + "learning_rate": 1.6162689391440746e-05, + "loss": 1.6838, + "step": 51558 + }, + { + "epoch": 0.62, + "grad_norm": 30.38221136039231, + "learning_rate": 1.616222931982231e-05, + "loss": 1.4354, + "step": 51561 + }, + { + "epoch": 0.62, + "grad_norm": 31.90977588019381, + "learning_rate": 1.6161769227174403e-05, + "loss": 1.4733, + "step": 51564 + }, + { + "epoch": 0.62, + "grad_norm": 4.979716392688645, + "learning_rate": 1.61613091134986e-05, + "loss": 1.4151, + "step": 51567 + }, + { + "epoch": 0.62, + "grad_norm": 4.104233848921317, + "learning_rate": 1.6160848978796465e-05, + "loss": 1.6029, + "step": 51570 + }, + { + "epoch": 0.62, + "grad_norm": 164.49025256659078, + "learning_rate": 1.616038882306957e-05, + "loss": 1.5422, + "step": 51573 + }, + { + "epoch": 0.62, + "grad_norm": 6.489500747894601, + "learning_rate": 1.6159928646319496e-05, + "loss": 1.4856, + "step": 51576 + }, + { + "epoch": 0.62, + "grad_norm": 31.86324465292756, + "learning_rate": 1.6159468448547797e-05, + "loss": 1.5178, + "step": 51579 + }, + { + "epoch": 0.62, + "grad_norm": 16.0956243948452, + "learning_rate": 1.615900822975605e-05, + "loss": 1.4942, + "step": 51582 + }, + { + "epoch": 0.62, + "grad_norm": 13.53265760715, + "learning_rate": 1.615854798994583e-05, + "loss": 1.8518, + "step": 51585 + }, + { + "epoch": 0.62, + "grad_norm": 16.08348773629088, + "learning_rate": 1.6158087729118705e-05, + "loss": 1.4178, + "step": 51588 + }, + { + "epoch": 0.62, + "grad_norm": 8.213236511235824, + "learning_rate": 1.6157627447276242e-05, + "loss": 1.4861, + "step": 51591 + }, + { + "epoch": 0.62, + "grad_norm": 18.487464197777282, + "learning_rate": 1.6157167144420017e-05, + "loss": 1.4647, + "step": 51594 + }, + { + "epoch": 0.62, + "grad_norm": 48.658295140161535, + "learning_rate": 1.6156706820551594e-05, + "loss": 1.1847, + "step": 51597 + }, + { + "epoch": 0.62, + "grad_norm": 46.422428182575295, + "learning_rate": 1.615624647567255e-05, + "loss": 1.3277, + "step": 51600 + }, + { + "epoch": 0.62, + "grad_norm": 23.338809619446568, + "learning_rate": 1.6155786109784456e-05, + "loss": 1.2652, + "step": 51603 + }, + { + "epoch": 0.62, + "grad_norm": 13.661132664677911, + "learning_rate": 1.615532572288888e-05, + "loss": 1.2777, + "step": 51606 + }, + { + "epoch": 0.62, + "grad_norm": 56.16918925999956, + "learning_rate": 1.6154865314987397e-05, + "loss": 1.613, + "step": 51609 + }, + { + "epoch": 0.62, + "grad_norm": 2.238091651106648, + "learning_rate": 1.6154404886081573e-05, + "loss": 1.3843, + "step": 51612 + }, + { + "epoch": 0.62, + "grad_norm": 6.654072353113836, + "learning_rate": 1.615394443617298e-05, + "loss": 1.1455, + "step": 51615 + }, + { + "epoch": 0.62, + "grad_norm": 31.97867478079837, + "learning_rate": 1.6153483965263192e-05, + "loss": 1.2375, + "step": 51618 + }, + { + "epoch": 0.62, + "grad_norm": 38.45551288172939, + "learning_rate": 1.615302347335378e-05, + "loss": 1.2336, + "step": 51621 + }, + { + "epoch": 0.62, + "grad_norm": 8.600216230394128, + "learning_rate": 1.615256296044632e-05, + "loss": 1.5691, + "step": 51624 + }, + { + "epoch": 0.62, + "grad_norm": 9.664658031396497, + "learning_rate": 1.615210242654237e-05, + "loss": 1.5805, + "step": 51627 + }, + { + "epoch": 0.62, + "grad_norm": 49.61967963066555, + "learning_rate": 1.615164187164352e-05, + "loss": 1.5334, + "step": 51630 + }, + { + "epoch": 0.62, + "grad_norm": 10.909246799053577, + "learning_rate": 1.6151181295751325e-05, + "loss": 1.4132, + "step": 51633 + }, + { + "epoch": 0.62, + "grad_norm": 8.789991009915843, + "learning_rate": 1.6150720698867368e-05, + "loss": 1.6177, + "step": 51636 + }, + { + "epoch": 0.62, + "grad_norm": 10.60842014790585, + "learning_rate": 1.6150260080993217e-05, + "loss": 1.4232, + "step": 51639 + }, + { + "epoch": 0.62, + "grad_norm": 11.288972073857764, + "learning_rate": 1.614979944213044e-05, + "loss": 1.2986, + "step": 51642 + }, + { + "epoch": 0.62, + "grad_norm": 50.247198846472855, + "learning_rate": 1.6149338782280614e-05, + "loss": 1.1245, + "step": 51645 + }, + { + "epoch": 0.62, + "grad_norm": 11.691651918769097, + "learning_rate": 1.614887810144531e-05, + "loss": 1.6339, + "step": 51648 + }, + { + "epoch": 0.62, + "grad_norm": 11.416228750203867, + "learning_rate": 1.6148417399626102e-05, + "loss": 1.2283, + "step": 51651 + }, + { + "epoch": 0.62, + "grad_norm": 30.934202647801964, + "learning_rate": 1.6147956676824558e-05, + "loss": 1.4823, + "step": 51654 + }, + { + "epoch": 0.62, + "grad_norm": 43.5524638464679, + "learning_rate": 1.6147495933042252e-05, + "loss": 1.2789, + "step": 51657 + }, + { + "epoch": 0.62, + "grad_norm": 4.345816242777819, + "learning_rate": 1.6147035168280756e-05, + "loss": 1.3997, + "step": 51660 + }, + { + "epoch": 0.62, + "grad_norm": 8.446817670127746, + "learning_rate": 1.6146574382541646e-05, + "loss": 1.6106, + "step": 51663 + }, + { + "epoch": 0.62, + "grad_norm": 25.795741608789402, + "learning_rate": 1.614611357582649e-05, + "loss": 1.4315, + "step": 51666 + }, + { + "epoch": 0.62, + "grad_norm": 4.201363149491778, + "learning_rate": 1.6145652748136862e-05, + "loss": 1.2158, + "step": 51669 + }, + { + "epoch": 0.62, + "grad_norm": 7.065220951038512, + "learning_rate": 1.6145191899474334e-05, + "loss": 1.5182, + "step": 51672 + }, + { + "epoch": 0.62, + "grad_norm": 31.293071121322782, + "learning_rate": 1.6144731029840482e-05, + "loss": 1.2524, + "step": 51675 + }, + { + "epoch": 0.62, + "grad_norm": 44.09473136708467, + "learning_rate": 1.6144270139236874e-05, + "loss": 1.5349, + "step": 51678 + }, + { + "epoch": 0.62, + "grad_norm": 19.936120240110924, + "learning_rate": 1.6143809227665083e-05, + "loss": 1.2049, + "step": 51681 + }, + { + "epoch": 0.62, + "grad_norm": 18.867108395653034, + "learning_rate": 1.614334829512669e-05, + "loss": 1.6049, + "step": 51684 + }, + { + "epoch": 0.62, + "grad_norm": 10.955661302894828, + "learning_rate": 1.614288734162326e-05, + "loss": 1.254, + "step": 51687 + }, + { + "epoch": 0.62, + "grad_norm": 17.975896038013044, + "learning_rate": 1.6142426367156366e-05, + "loss": 1.1213, + "step": 51690 + }, + { + "epoch": 0.62, + "grad_norm": 15.189634609140116, + "learning_rate": 1.6141965371727585e-05, + "loss": 1.1964, + "step": 51693 + }, + { + "epoch": 0.62, + "grad_norm": 5.823359146151823, + "learning_rate": 1.614150435533849e-05, + "loss": 1.0817, + "step": 51696 + }, + { + "epoch": 0.62, + "grad_norm": 9.403571453985839, + "learning_rate": 1.614104331799065e-05, + "loss": 1.3822, + "step": 51699 + }, + { + "epoch": 0.62, + "grad_norm": 16.64641721727942, + "learning_rate": 1.614058225968564e-05, + "loss": 1.4184, + "step": 51702 + }, + { + "epoch": 0.62, + "grad_norm": 12.114286367451905, + "learning_rate": 1.6140121180425037e-05, + "loss": 1.486, + "step": 51705 + }, + { + "epoch": 0.62, + "grad_norm": 21.93938064867674, + "learning_rate": 1.6139660080210413e-05, + "loss": 1.4496, + "step": 51708 + }, + { + "epoch": 0.62, + "grad_norm": 30.365900171553065, + "learning_rate": 1.613919895904334e-05, + "loss": 1.4431, + "step": 51711 + }, + { + "epoch": 0.62, + "grad_norm": 9.48981057129569, + "learning_rate": 1.613873781692539e-05, + "loss": 1.5892, + "step": 51714 + }, + { + "epoch": 0.62, + "grad_norm": 23.13532222979879, + "learning_rate": 1.613827665385814e-05, + "loss": 1.6731, + "step": 51717 + }, + { + "epoch": 0.62, + "grad_norm": 8.500162044076342, + "learning_rate": 1.6137815469843166e-05, + "loss": 0.9378, + "step": 51720 + }, + { + "epoch": 0.62, + "grad_norm": 4.959522625980365, + "learning_rate": 1.6137354264882036e-05, + "loss": 1.6472, + "step": 51723 + }, + { + "epoch": 0.62, + "grad_norm": 58.83760386666666, + "learning_rate": 1.6136893038976325e-05, + "loss": 1.2949, + "step": 51726 + }, + { + "epoch": 0.62, + "grad_norm": 22.046931558486204, + "learning_rate": 1.6136431792127613e-05, + "loss": 1.4715, + "step": 51729 + }, + { + "epoch": 0.62, + "grad_norm": 14.017005971420152, + "learning_rate": 1.6135970524337464e-05, + "loss": 1.1285, + "step": 51732 + }, + { + "epoch": 0.62, + "grad_norm": 5.008579244989655, + "learning_rate": 1.6135509235607462e-05, + "loss": 1.6278, + "step": 51735 + }, + { + "epoch": 0.62, + "grad_norm": 23.32793335022583, + "learning_rate": 1.6135047925939175e-05, + "loss": 1.609, + "step": 51738 + }, + { + "epoch": 0.62, + "grad_norm": 15.175020616045389, + "learning_rate": 1.6134586595334178e-05, + "loss": 1.3632, + "step": 51741 + }, + { + "epoch": 0.62, + "grad_norm": 24.163037082940143, + "learning_rate": 1.613412524379405e-05, + "loss": 1.3343, + "step": 51744 + }, + { + "epoch": 0.62, + "grad_norm": 24.1132659017929, + "learning_rate": 1.613366387132036e-05, + "loss": 1.5954, + "step": 51747 + }, + { + "epoch": 0.62, + "grad_norm": 9.420264401977626, + "learning_rate": 1.6133202477914687e-05, + "loss": 1.3033, + "step": 51750 + }, + { + "epoch": 0.62, + "grad_norm": 10.812802846439707, + "learning_rate": 1.6132741063578602e-05, + "loss": 1.421, + "step": 51753 + }, + { + "epoch": 0.62, + "grad_norm": 14.960019516306325, + "learning_rate": 1.613227962831368e-05, + "loss": 1.2454, + "step": 51756 + }, + { + "epoch": 0.62, + "grad_norm": 9.559834348709655, + "learning_rate": 1.6131818172121496e-05, + "loss": 1.6422, + "step": 51759 + }, + { + "epoch": 0.62, + "grad_norm": 13.049163103161096, + "learning_rate": 1.613135669500362e-05, + "loss": 1.7271, + "step": 51762 + }, + { + "epoch": 0.62, + "grad_norm": 11.389816602548006, + "learning_rate": 1.613089519696164e-05, + "loss": 1.599, + "step": 51765 + }, + { + "epoch": 0.62, + "grad_norm": 2.724823162241424, + "learning_rate": 1.6130433677997122e-05, + "loss": 1.2314, + "step": 51768 + }, + { + "epoch": 0.62, + "grad_norm": 5.733380274155451, + "learning_rate": 1.612997213811164e-05, + "loss": 1.4482, + "step": 51771 + }, + { + "epoch": 0.62, + "grad_norm": 18.065046927468913, + "learning_rate": 1.612951057730677e-05, + "loss": 1.5396, + "step": 51774 + }, + { + "epoch": 0.62, + "grad_norm": 18.12046941499757, + "learning_rate": 1.612904899558409e-05, + "loss": 1.8091, + "step": 51777 + }, + { + "epoch": 0.62, + "grad_norm": 9.339703771113532, + "learning_rate": 1.6128587392945175e-05, + "loss": 1.3255, + "step": 51780 + }, + { + "epoch": 0.62, + "grad_norm": 13.509940163935765, + "learning_rate": 1.6128125769391597e-05, + "loss": 1.8403, + "step": 51783 + }, + { + "epoch": 0.62, + "grad_norm": 3.339919443995483, + "learning_rate": 1.612766412492493e-05, + "loss": 1.6139, + "step": 51786 + }, + { + "epoch": 0.62, + "grad_norm": 9.96824376558445, + "learning_rate": 1.612720245954676e-05, + "loss": 1.2445, + "step": 51789 + }, + { + "epoch": 0.62, + "grad_norm": 5.633766284731381, + "learning_rate": 1.612674077325865e-05, + "loss": 1.3551, + "step": 51792 + }, + { + "epoch": 0.62, + "grad_norm": 18.435946696455144, + "learning_rate": 1.6126279066062184e-05, + "loss": 1.3067, + "step": 51795 + }, + { + "epoch": 0.62, + "grad_norm": 23.512381038451093, + "learning_rate": 1.612581733795893e-05, + "loss": 1.5369, + "step": 51798 + }, + { + "epoch": 0.62, + "grad_norm": 7.026799458174104, + "learning_rate": 1.6125355588950472e-05, + "loss": 0.9254, + "step": 51801 + }, + { + "epoch": 0.62, + "grad_norm": 30.47742834343037, + "learning_rate": 1.612489381903838e-05, + "loss": 1.4314, + "step": 51804 + }, + { + "epoch": 0.62, + "grad_norm": 7.032812876470928, + "learning_rate": 1.6124432028224227e-05, + "loss": 1.6646, + "step": 51807 + }, + { + "epoch": 0.62, + "grad_norm": 3.0091838383266243, + "learning_rate": 1.61239702165096e-05, + "loss": 1.0642, + "step": 51810 + }, + { + "epoch": 0.62, + "grad_norm": 4.662277185659494, + "learning_rate": 1.6123508383896065e-05, + "loss": 1.4064, + "step": 51813 + }, + { + "epoch": 0.62, + "grad_norm": 4.900422086185497, + "learning_rate": 1.61230465303852e-05, + "loss": 1.0983, + "step": 51816 + }, + { + "epoch": 0.62, + "grad_norm": 18.259683662005585, + "learning_rate": 1.6122584655978588e-05, + "loss": 1.4358, + "step": 51819 + }, + { + "epoch": 0.62, + "grad_norm": 17.20913505355539, + "learning_rate": 1.6122122760677796e-05, + "loss": 1.4178, + "step": 51822 + }, + { + "epoch": 0.62, + "grad_norm": 27.809194889149065, + "learning_rate": 1.6121660844484407e-05, + "loss": 1.2803, + "step": 51825 + }, + { + "epoch": 0.62, + "grad_norm": 24.401140846915137, + "learning_rate": 1.612119890739999e-05, + "loss": 1.7572, + "step": 51828 + }, + { + "epoch": 0.62, + "grad_norm": 36.1137932918658, + "learning_rate": 1.612073694942613e-05, + "loss": 1.8951, + "step": 51831 + }, + { + "epoch": 0.62, + "grad_norm": 8.01976519588051, + "learning_rate": 1.61202749705644e-05, + "loss": 1.3883, + "step": 51834 + }, + { + "epoch": 0.62, + "grad_norm": 37.98340124451005, + "learning_rate": 1.6119812970816374e-05, + "loss": 1.8742, + "step": 51837 + }, + { + "epoch": 0.62, + "grad_norm": 10.79754939089474, + "learning_rate": 1.611935095018363e-05, + "loss": 1.9425, + "step": 51840 + }, + { + "epoch": 0.62, + "grad_norm": 16.256797491493987, + "learning_rate": 1.6118888908667747e-05, + "loss": 1.3425, + "step": 51843 + }, + { + "epoch": 0.62, + "grad_norm": 48.06278583337527, + "learning_rate": 1.61184268462703e-05, + "loss": 1.5714, + "step": 51846 + }, + { + "epoch": 0.62, + "grad_norm": 8.467925908658954, + "learning_rate": 1.6117964762992864e-05, + "loss": 1.252, + "step": 51849 + }, + { + "epoch": 0.62, + "grad_norm": 21.62998427409328, + "learning_rate": 1.611750265883702e-05, + "loss": 1.1978, + "step": 51852 + }, + { + "epoch": 0.62, + "grad_norm": 12.909197707342088, + "learning_rate": 1.6117040533804345e-05, + "loss": 1.5602, + "step": 51855 + }, + { + "epoch": 0.62, + "grad_norm": 21.85068890305031, + "learning_rate": 1.611657838789641e-05, + "loss": 1.4744, + "step": 51858 + }, + { + "epoch": 0.62, + "grad_norm": 18.820927565996993, + "learning_rate": 1.6116116221114797e-05, + "loss": 1.4132, + "step": 51861 + }, + { + "epoch": 0.62, + "grad_norm": 23.41723197100841, + "learning_rate": 1.6115654033461084e-05, + "loss": 1.6896, + "step": 51864 + }, + { + "epoch": 0.62, + "grad_norm": 16.97146134402418, + "learning_rate": 1.6115191824936846e-05, + "loss": 1.638, + "step": 51867 + }, + { + "epoch": 0.62, + "grad_norm": 12.84549367562791, + "learning_rate": 1.611472959554366e-05, + "loss": 1.5207, + "step": 51870 + }, + { + "epoch": 0.62, + "grad_norm": 6.512466873034564, + "learning_rate": 1.6114267345283107e-05, + "loss": 1.3516, + "step": 51873 + }, + { + "epoch": 0.62, + "grad_norm": 12.794617504801623, + "learning_rate": 1.6113805074156757e-05, + "loss": 1.3373, + "step": 51876 + }, + { + "epoch": 0.62, + "grad_norm": 6.408785227980485, + "learning_rate": 1.6113342782166194e-05, + "loss": 1.1646, + "step": 51879 + }, + { + "epoch": 0.62, + "grad_norm": 9.332525770501842, + "learning_rate": 1.6112880469312995e-05, + "loss": 1.2334, + "step": 51882 + }, + { + "epoch": 0.62, + "grad_norm": 13.456136923880551, + "learning_rate": 1.611241813559874e-05, + "loss": 1.2961, + "step": 51885 + }, + { + "epoch": 0.62, + "grad_norm": 11.54611322935741, + "learning_rate": 1.6111955781025002e-05, + "loss": 1.4002, + "step": 51888 + }, + { + "epoch": 0.62, + "grad_norm": 27.721297984685236, + "learning_rate": 1.611149340559336e-05, + "loss": 1.2221, + "step": 51891 + }, + { + "epoch": 0.62, + "grad_norm": 15.526757606071326, + "learning_rate": 1.611103100930539e-05, + "loss": 1.5396, + "step": 51894 + }, + { + "epoch": 0.62, + "grad_norm": 5.270112039332733, + "learning_rate": 1.6110568592162672e-05, + "loss": 1.291, + "step": 51897 + }, + { + "epoch": 0.62, + "grad_norm": 21.56721410773248, + "learning_rate": 1.611010615416679e-05, + "loss": 1.3415, + "step": 51900 + }, + { + "epoch": 0.62, + "grad_norm": 19.929892321673872, + "learning_rate": 1.610964369531931e-05, + "loss": 1.5883, + "step": 51903 + }, + { + "epoch": 0.62, + "grad_norm": 8.130148172115522, + "learning_rate": 1.610918121562182e-05, + "loss": 1.2251, + "step": 51906 + }, + { + "epoch": 0.62, + "grad_norm": 5.453587186246705, + "learning_rate": 1.6108718715075897e-05, + "loss": 1.1346, + "step": 51909 + }, + { + "epoch": 0.62, + "grad_norm": 6.928001029293841, + "learning_rate": 1.6108256193683115e-05, + "loss": 1.5054, + "step": 51912 + }, + { + "epoch": 0.62, + "grad_norm": 20.590964653627392, + "learning_rate": 1.6107793651445053e-05, + "loss": 1.537, + "step": 51915 + }, + { + "epoch": 0.62, + "grad_norm": 16.412218439034806, + "learning_rate": 1.6107331088363296e-05, + "loss": 1.276, + "step": 51918 + }, + { + "epoch": 0.62, + "grad_norm": 6.098327632029198, + "learning_rate": 1.6106868504439412e-05, + "loss": 1.3716, + "step": 51921 + }, + { + "epoch": 0.62, + "grad_norm": 11.09048595850704, + "learning_rate": 1.6106405899674987e-05, + "loss": 1.6946, + "step": 51924 + }, + { + "epoch": 0.62, + "grad_norm": 9.979735059968489, + "learning_rate": 1.61059432740716e-05, + "loss": 1.2763, + "step": 51927 + }, + { + "epoch": 0.62, + "grad_norm": 7.451642503427287, + "learning_rate": 1.610548062763083e-05, + "loss": 1.6126, + "step": 51930 + }, + { + "epoch": 0.62, + "grad_norm": 4.951075747592433, + "learning_rate": 1.610501796035425e-05, + "loss": 1.1109, + "step": 51933 + }, + { + "epoch": 0.62, + "grad_norm": 10.359634103875496, + "learning_rate": 1.6104555272243445e-05, + "loss": 1.6303, + "step": 51936 + }, + { + "epoch": 0.62, + "grad_norm": 24.026297987821888, + "learning_rate": 1.6104092563299992e-05, + "loss": 1.3922, + "step": 51939 + }, + { + "epoch": 0.62, + "grad_norm": 5.557001124839257, + "learning_rate": 1.610362983352547e-05, + "loss": 1.6566, + "step": 51942 + }, + { + "epoch": 0.62, + "grad_norm": 29.756298672590546, + "learning_rate": 1.6103167082921458e-05, + "loss": 1.2925, + "step": 51945 + }, + { + "epoch": 0.62, + "grad_norm": 9.253695407966653, + "learning_rate": 1.610270431148953e-05, + "loss": 1.4369, + "step": 51948 + }, + { + "epoch": 0.62, + "grad_norm": 18.09085240362936, + "learning_rate": 1.6102241519231273e-05, + "loss": 1.2015, + "step": 51951 + }, + { + "epoch": 0.62, + "grad_norm": 18.170033770101146, + "learning_rate": 1.6101778706148267e-05, + "loss": 1.0667, + "step": 51954 + }, + { + "epoch": 0.62, + "grad_norm": 16.570530516280666, + "learning_rate": 1.6101315872242083e-05, + "loss": 1.3684, + "step": 51957 + }, + { + "epoch": 0.62, + "grad_norm": 18.513104095758916, + "learning_rate": 1.6100853017514312e-05, + "loss": 1.5044, + "step": 51960 + }, + { + "epoch": 0.62, + "grad_norm": 22.02528368663085, + "learning_rate": 1.610039014196652e-05, + "loss": 1.5698, + "step": 51963 + }, + { + "epoch": 0.62, + "grad_norm": 11.713695163884843, + "learning_rate": 1.60999272456003e-05, + "loss": 1.729, + "step": 51966 + }, + { + "epoch": 0.62, + "grad_norm": 36.251430562481936, + "learning_rate": 1.6099464328417224e-05, + "loss": 1.356, + "step": 51969 + }, + { + "epoch": 0.62, + "grad_norm": 15.542213519159674, + "learning_rate": 1.609900139041887e-05, + "loss": 1.0546, + "step": 51972 + }, + { + "epoch": 0.62, + "grad_norm": 15.703660426226028, + "learning_rate": 1.6098538431606827e-05, + "loss": 1.3837, + "step": 51975 + }, + { + "epoch": 0.63, + "grad_norm": 16.607429282184075, + "learning_rate": 1.609807545198266e-05, + "loss": 1.5368, + "step": 51978 + }, + { + "epoch": 0.63, + "grad_norm": 6.214608189436368, + "learning_rate": 1.6097612451547964e-05, + "loss": 1.4406, + "step": 51981 + }, + { + "epoch": 0.63, + "grad_norm": 11.15402108076954, + "learning_rate": 1.6097149430304314e-05, + "loss": 1.2417, + "step": 51984 + }, + { + "epoch": 0.63, + "grad_norm": 3.4686945800905216, + "learning_rate": 1.609668638825329e-05, + "loss": 1.428, + "step": 51987 + }, + { + "epoch": 0.63, + "grad_norm": 18.05320613045173, + "learning_rate": 1.6096223325396465e-05, + "loss": 1.1234, + "step": 51990 + }, + { + "epoch": 0.63, + "grad_norm": 18.256310245771743, + "learning_rate": 1.609576024173543e-05, + "loss": 1.2628, + "step": 51993 + }, + { + "epoch": 0.63, + "grad_norm": 64.90634889235002, + "learning_rate": 1.6095297137271762e-05, + "loss": 1.3725, + "step": 51996 + }, + { + "epoch": 0.63, + "grad_norm": 10.47914983235128, + "learning_rate": 1.6094834012007038e-05, + "loss": 1.3894, + "step": 51999 + }, + { + "epoch": 0.63, + "grad_norm": 9.425236341490104, + "learning_rate": 1.609437086594284e-05, + "loss": 1.0496, + "step": 52002 + }, + { + "epoch": 0.63, + "grad_norm": 3.7996529653908544, + "learning_rate": 1.6093907699080755e-05, + "loss": 1.2848, + "step": 52005 + }, + { + "epoch": 0.63, + "grad_norm": 6.951991647943651, + "learning_rate": 1.6093444511422354e-05, + "loss": 1.3663, + "step": 52008 + }, + { + "epoch": 0.63, + "grad_norm": 15.555647022069618, + "learning_rate": 1.609298130296922e-05, + "loss": 1.2709, + "step": 52011 + }, + { + "epoch": 0.63, + "grad_norm": 16.69627096075032, + "learning_rate": 1.609251807372294e-05, + "loss": 1.5316, + "step": 52014 + }, + { + "epoch": 0.63, + "grad_norm": 2.1801523978564683, + "learning_rate": 1.6092054823685086e-05, + "loss": 1.4484, + "step": 52017 + }, + { + "epoch": 0.63, + "grad_norm": 3.7833533127796333, + "learning_rate": 1.6091591552857245e-05, + "loss": 1.2316, + "step": 52020 + }, + { + "epoch": 0.63, + "grad_norm": 32.348959454234844, + "learning_rate": 1.6091128261240996e-05, + "loss": 1.2899, + "step": 52023 + }, + { + "epoch": 0.63, + "grad_norm": 57.39587901529349, + "learning_rate": 1.609066494883792e-05, + "loss": 1.1419, + "step": 52026 + }, + { + "epoch": 0.63, + "grad_norm": 27.933641179886923, + "learning_rate": 1.6090201615649597e-05, + "loss": 1.7461, + "step": 52029 + }, + { + "epoch": 0.63, + "grad_norm": 27.058516463825057, + "learning_rate": 1.6089738261677612e-05, + "loss": 1.5499, + "step": 52032 + }, + { + "epoch": 0.63, + "grad_norm": 16.71747412387805, + "learning_rate": 1.6089274886923548e-05, + "loss": 1.4819, + "step": 52035 + }, + { + "epoch": 0.63, + "grad_norm": 11.513978601741613, + "learning_rate": 1.6088811491388974e-05, + "loss": 1.6063, + "step": 52038 + }, + { + "epoch": 0.63, + "grad_norm": 4.163346957120896, + "learning_rate": 1.6088348075075482e-05, + "loss": 1.462, + "step": 52041 + }, + { + "epoch": 0.63, + "grad_norm": 5.408704040403553, + "learning_rate": 1.6087884637984652e-05, + "loss": 1.6982, + "step": 52044 + }, + { + "epoch": 0.63, + "grad_norm": 15.333150215442258, + "learning_rate": 1.6087421180118065e-05, + "loss": 1.4244, + "step": 52047 + }, + { + "epoch": 0.63, + "grad_norm": 21.75558693583684, + "learning_rate": 1.6086957701477304e-05, + "loss": 1.9082, + "step": 52050 + }, + { + "epoch": 0.63, + "grad_norm": 7.075612619561021, + "learning_rate": 1.6086494202063948e-05, + "loss": 1.6506, + "step": 52053 + }, + { + "epoch": 0.63, + "grad_norm": 19.94850471589519, + "learning_rate": 1.6086030681879575e-05, + "loss": 1.2737, + "step": 52056 + }, + { + "epoch": 0.63, + "grad_norm": 59.14676380938765, + "learning_rate": 1.6085567140925774e-05, + "loss": 1.8817, + "step": 52059 + }, + { + "epoch": 0.63, + "grad_norm": 6.2441159537030755, + "learning_rate": 1.608510357920413e-05, + "loss": 1.6428, + "step": 52062 + }, + { + "epoch": 0.63, + "grad_norm": 7.553549901594131, + "learning_rate": 1.608463999671621e-05, + "loss": 1.3377, + "step": 52065 + }, + { + "epoch": 0.63, + "grad_norm": 38.424585204328444, + "learning_rate": 1.608417639346361e-05, + "loss": 1.2705, + "step": 52068 + }, + { + "epoch": 0.63, + "grad_norm": 78.60850217594442, + "learning_rate": 1.6083712769447907e-05, + "loss": 1.3923, + "step": 52071 + }, + { + "epoch": 0.63, + "grad_norm": 26.013125589756864, + "learning_rate": 1.6083249124670686e-05, + "loss": 1.3534, + "step": 52074 + }, + { + "epoch": 0.63, + "grad_norm": 8.281348302005123, + "learning_rate": 1.608278545913352e-05, + "loss": 1.5098, + "step": 52077 + }, + { + "epoch": 0.63, + "grad_norm": 11.852629134186072, + "learning_rate": 1.608232177283801e-05, + "loss": 1.111, + "step": 52080 + }, + { + "epoch": 0.63, + "grad_norm": 8.708472196618052, + "learning_rate": 1.608185806578572e-05, + "loss": 1.0798, + "step": 52083 + }, + { + "epoch": 0.63, + "grad_norm": 7.681231943390547, + "learning_rate": 1.6081394337978237e-05, + "loss": 1.2629, + "step": 52086 + }, + { + "epoch": 0.63, + "grad_norm": 8.181757342195576, + "learning_rate": 1.608093058941715e-05, + "loss": 1.4157, + "step": 52089 + }, + { + "epoch": 0.63, + "grad_norm": 6.928858972997665, + "learning_rate": 1.608046682010403e-05, + "loss": 1.3266, + "step": 52092 + }, + { + "epoch": 0.63, + "grad_norm": 10.82490115914142, + "learning_rate": 1.6080003030040474e-05, + "loss": 1.2908, + "step": 52095 + }, + { + "epoch": 0.63, + "grad_norm": 50.60600855607104, + "learning_rate": 1.6079539219228055e-05, + "loss": 1.3624, + "step": 52098 + }, + { + "epoch": 0.63, + "grad_norm": 15.8859668104997, + "learning_rate": 1.6079075387668357e-05, + "loss": 1.2428, + "step": 52101 + }, + { + "epoch": 0.63, + "grad_norm": 7.523258558116322, + "learning_rate": 1.6078611535362965e-05, + "loss": 1.705, + "step": 52104 + }, + { + "epoch": 0.63, + "grad_norm": 18.508367518906088, + "learning_rate": 1.6078147662313465e-05, + "loss": 1.2239, + "step": 52107 + }, + { + "epoch": 0.63, + "grad_norm": 12.478346908601933, + "learning_rate": 1.6077683768521433e-05, + "loss": 1.5999, + "step": 52110 + }, + { + "epoch": 0.63, + "grad_norm": 63.58858291497891, + "learning_rate": 1.6077219853988454e-05, + "loss": 1.1151, + "step": 52113 + }, + { + "epoch": 0.63, + "grad_norm": 9.385550726697968, + "learning_rate": 1.6076755918716118e-05, + "loss": 1.3484, + "step": 52116 + }, + { + "epoch": 0.63, + "grad_norm": 4.885971628954376, + "learning_rate": 1.6076291962706002e-05, + "loss": 1.0085, + "step": 52119 + }, + { + "epoch": 0.63, + "grad_norm": 9.023795521317728, + "learning_rate": 1.6075827985959686e-05, + "loss": 1.6149, + "step": 52122 + }, + { + "epoch": 0.63, + "grad_norm": 21.38693293112317, + "learning_rate": 1.607536398847876e-05, + "loss": 1.7108, + "step": 52125 + }, + { + "epoch": 0.63, + "grad_norm": 17.210349842524305, + "learning_rate": 1.6074899970264804e-05, + "loss": 1.7595, + "step": 52128 + }, + { + "epoch": 0.63, + "grad_norm": 13.409056418868824, + "learning_rate": 1.6074435931319402e-05, + "loss": 1.4552, + "step": 52131 + }, + { + "epoch": 0.63, + "grad_norm": 4.0894017977520525, + "learning_rate": 1.607397187164414e-05, + "loss": 1.3897, + "step": 52134 + }, + { + "epoch": 0.63, + "grad_norm": 7.4318503660190425, + "learning_rate": 1.60735077912406e-05, + "loss": 1.5465, + "step": 52137 + }, + { + "epoch": 0.63, + "grad_norm": 6.894454087840856, + "learning_rate": 1.6073043690110368e-05, + "loss": 1.5366, + "step": 52140 + }, + { + "epoch": 0.63, + "grad_norm": 11.028228351933747, + "learning_rate": 1.607257956825502e-05, + "loss": 1.5571, + "step": 52143 + }, + { + "epoch": 0.63, + "grad_norm": 23.78680942352859, + "learning_rate": 1.6072115425676146e-05, + "loss": 1.2824, + "step": 52146 + }, + { + "epoch": 0.63, + "grad_norm": 3.864951577951937, + "learning_rate": 1.6071651262375333e-05, + "loss": 1.4726, + "step": 52149 + }, + { + "epoch": 0.63, + "grad_norm": 12.610482421696837, + "learning_rate": 1.6071187078354157e-05, + "loss": 1.5139, + "step": 52152 + }, + { + "epoch": 0.63, + "grad_norm": 6.986500438695519, + "learning_rate": 1.607072287361421e-05, + "loss": 1.3644, + "step": 52155 + }, + { + "epoch": 0.63, + "grad_norm": 18.428472499778497, + "learning_rate": 1.6070258648157068e-05, + "loss": 1.4565, + "step": 52158 + }, + { + "epoch": 0.63, + "grad_norm": 18.736859140974055, + "learning_rate": 1.6069794401984324e-05, + "loss": 1.3164, + "step": 52161 + }, + { + "epoch": 0.63, + "grad_norm": 26.89015872866503, + "learning_rate": 1.6069330135097555e-05, + "loss": 1.686, + "step": 52164 + }, + { + "epoch": 0.63, + "grad_norm": 8.893509553358575, + "learning_rate": 1.606886584749835e-05, + "loss": 1.3753, + "step": 52167 + }, + { + "epoch": 0.63, + "grad_norm": 4.35912828782219, + "learning_rate": 1.6068401539188295e-05, + "loss": 1.3799, + "step": 52170 + }, + { + "epoch": 0.63, + "grad_norm": 4.650986069003106, + "learning_rate": 1.6067937210168966e-05, + "loss": 1.2938, + "step": 52173 + }, + { + "epoch": 0.63, + "grad_norm": 4.689158770435034, + "learning_rate": 1.6067472860441955e-05, + "loss": 1.039, + "step": 52176 + }, + { + "epoch": 0.63, + "grad_norm": 9.17825676378411, + "learning_rate": 1.6067008490008844e-05, + "loss": 1.3879, + "step": 52179 + }, + { + "epoch": 0.63, + "grad_norm": 14.869277217008857, + "learning_rate": 1.6066544098871218e-05, + "loss": 1.2218, + "step": 52182 + }, + { + "epoch": 0.63, + "grad_norm": 5.032295154473743, + "learning_rate": 1.6066079687030668e-05, + "loss": 1.5658, + "step": 52185 + }, + { + "epoch": 0.63, + "grad_norm": 20.16993347524845, + "learning_rate": 1.6065615254488767e-05, + "loss": 1.2682, + "step": 52188 + }, + { + "epoch": 0.63, + "grad_norm": 88.85614603022543, + "learning_rate": 1.6065150801247106e-05, + "loss": 1.3005, + "step": 52191 + }, + { + "epoch": 0.63, + "grad_norm": 14.214621305266528, + "learning_rate": 1.606468632730727e-05, + "loss": 1.0438, + "step": 52194 + }, + { + "epoch": 0.63, + "grad_norm": 10.690235913728813, + "learning_rate": 1.6064221832670845e-05, + "loss": 0.8713, + "step": 52197 + }, + { + "epoch": 0.63, + "grad_norm": 17.94277955719661, + "learning_rate": 1.6063757317339416e-05, + "loss": 1.4846, + "step": 52200 + }, + { + "epoch": 0.63, + "grad_norm": 10.569342370383357, + "learning_rate": 1.606329278131457e-05, + "loss": 1.0404, + "step": 52203 + }, + { + "epoch": 0.63, + "grad_norm": 5.0099904218654805, + "learning_rate": 1.6062828224597885e-05, + "loss": 1.1549, + "step": 52206 + }, + { + "epoch": 0.63, + "grad_norm": 8.33114869399158, + "learning_rate": 1.6062363647190952e-05, + "loss": 1.287, + "step": 52209 + }, + { + "epoch": 0.63, + "grad_norm": 22.490333235984146, + "learning_rate": 1.6061899049095354e-05, + "loss": 1.4651, + "step": 52212 + }, + { + "epoch": 0.63, + "grad_norm": 12.14240511795902, + "learning_rate": 1.6061434430312685e-05, + "loss": 1.1453, + "step": 52215 + }, + { + "epoch": 0.63, + "grad_norm": 14.215715801913003, + "learning_rate": 1.6060969790844518e-05, + "loss": 1.2966, + "step": 52218 + }, + { + "epoch": 0.63, + "grad_norm": 26.334013001176263, + "learning_rate": 1.6060505130692444e-05, + "loss": 1.6563, + "step": 52221 + }, + { + "epoch": 0.63, + "grad_norm": 17.131392035006183, + "learning_rate": 1.606004044985805e-05, + "loss": 1.3159, + "step": 52224 + }, + { + "epoch": 0.63, + "grad_norm": 61.31114792012375, + "learning_rate": 1.605957574834292e-05, + "loss": 1.4461, + "step": 52227 + }, + { + "epoch": 0.63, + "grad_norm": 5.291966785719453, + "learning_rate": 1.6059111026148643e-05, + "loss": 1.3913, + "step": 52230 + }, + { + "epoch": 0.63, + "grad_norm": 8.409395242866577, + "learning_rate": 1.60586462832768e-05, + "loss": 1.1784, + "step": 52233 + }, + { + "epoch": 0.63, + "grad_norm": 15.245746472589483, + "learning_rate": 1.605818151972898e-05, + "loss": 1.0314, + "step": 52236 + }, + { + "epoch": 0.63, + "grad_norm": 33.03448261085298, + "learning_rate": 1.605771673550677e-05, + "loss": 1.4534, + "step": 52239 + }, + { + "epoch": 0.63, + "grad_norm": 11.82604181943219, + "learning_rate": 1.6057251930611754e-05, + "loss": 1.3616, + "step": 52242 + }, + { + "epoch": 0.63, + "grad_norm": 21.55766190963498, + "learning_rate": 1.605678710504552e-05, + "loss": 1.2398, + "step": 52245 + }, + { + "epoch": 0.63, + "grad_norm": 8.465860560778047, + "learning_rate": 1.605632225880965e-05, + "loss": 1.4132, + "step": 52248 + }, + { + "epoch": 0.63, + "grad_norm": 5.628702910495081, + "learning_rate": 1.6055857391905738e-05, + "loss": 1.4868, + "step": 52251 + }, + { + "epoch": 0.63, + "grad_norm": 5.275073978239492, + "learning_rate": 1.6055392504335364e-05, + "loss": 1.2463, + "step": 52254 + }, + { + "epoch": 0.63, + "grad_norm": 122.16990380781989, + "learning_rate": 1.6054927596100116e-05, + "loss": 1.5193, + "step": 52257 + }, + { + "epoch": 0.63, + "grad_norm": 6.6664123011159395, + "learning_rate": 1.6054462667201584e-05, + "loss": 1.3691, + "step": 52260 + }, + { + "epoch": 0.63, + "grad_norm": 13.135502544347332, + "learning_rate": 1.6053997717641347e-05, + "loss": 1.2299, + "step": 52263 + }, + { + "epoch": 0.63, + "grad_norm": 95.47269092795634, + "learning_rate": 1.6053532747420997e-05, + "loss": 1.1436, + "step": 52266 + }, + { + "epoch": 0.63, + "grad_norm": 32.69715563629856, + "learning_rate": 1.605306775654212e-05, + "loss": 1.2908, + "step": 52269 + }, + { + "epoch": 0.63, + "grad_norm": 51.828653851487026, + "learning_rate": 1.6052602745006306e-05, + "loss": 1.5708, + "step": 52272 + }, + { + "epoch": 0.63, + "grad_norm": 16.799234207274782, + "learning_rate": 1.605213771281514e-05, + "loss": 1.4496, + "step": 52275 + }, + { + "epoch": 0.63, + "grad_norm": 15.551623804933243, + "learning_rate": 1.6051672659970204e-05, + "loss": 1.6492, + "step": 52278 + }, + { + "epoch": 0.63, + "grad_norm": 37.489908077800955, + "learning_rate": 1.6051207586473092e-05, + "loss": 1.2913, + "step": 52281 + }, + { + "epoch": 0.63, + "grad_norm": 7.010592017108616, + "learning_rate": 1.605074249232539e-05, + "loss": 1.584, + "step": 52284 + }, + { + "epoch": 0.63, + "grad_norm": 8.818530519838632, + "learning_rate": 1.6050277377528678e-05, + "loss": 1.3537, + "step": 52287 + }, + { + "epoch": 0.63, + "grad_norm": 30.425172889688263, + "learning_rate": 1.604981224208455e-05, + "loss": 1.8596, + "step": 52290 + }, + { + "epoch": 0.63, + "grad_norm": 6.97234241811115, + "learning_rate": 1.6049347085994594e-05, + "loss": 1.5767, + "step": 52293 + }, + { + "epoch": 0.63, + "grad_norm": 12.120281186963126, + "learning_rate": 1.6048881909260395e-05, + "loss": 1.1969, + "step": 52296 + }, + { + "epoch": 0.63, + "grad_norm": 25.228656890263544, + "learning_rate": 1.604841671188354e-05, + "loss": 1.4351, + "step": 52299 + }, + { + "epoch": 0.63, + "grad_norm": 11.002144234181271, + "learning_rate": 1.6047951493865618e-05, + "loss": 1.5076, + "step": 52302 + }, + { + "epoch": 0.63, + "grad_norm": 66.57627468179965, + "learning_rate": 1.6047486255208215e-05, + "loss": 1.3186, + "step": 52305 + }, + { + "epoch": 0.63, + "grad_norm": 12.489252674634788, + "learning_rate": 1.604702099591292e-05, + "loss": 1.4073, + "step": 52308 + }, + { + "epoch": 0.63, + "grad_norm": 48.67806474472786, + "learning_rate": 1.604655571598132e-05, + "loss": 1.7017, + "step": 52311 + }, + { + "epoch": 0.63, + "grad_norm": 9.083053820827908, + "learning_rate": 1.6046090415415005e-05, + "loss": 0.8462, + "step": 52314 + }, + { + "epoch": 0.63, + "grad_norm": 14.789711789932898, + "learning_rate": 1.604562509421556e-05, + "loss": 1.3616, + "step": 52317 + }, + { + "epoch": 0.63, + "grad_norm": 12.11740273920781, + "learning_rate": 1.6045159752384572e-05, + "loss": 1.6526, + "step": 52320 + }, + { + "epoch": 0.63, + "grad_norm": 8.864846587888408, + "learning_rate": 1.6044694389923634e-05, + "loss": 1.5356, + "step": 52323 + }, + { + "epoch": 0.63, + "grad_norm": 12.866922788808788, + "learning_rate": 1.604422900683433e-05, + "loss": 1.2975, + "step": 52326 + }, + { + "epoch": 0.63, + "grad_norm": 39.70492036775735, + "learning_rate": 1.6043763603118252e-05, + "loss": 1.8431, + "step": 52329 + }, + { + "epoch": 0.63, + "grad_norm": 21.713827213563942, + "learning_rate": 1.6043298178776983e-05, + "loss": 1.5663, + "step": 52332 + }, + { + "epoch": 0.63, + "grad_norm": 27.080627958683557, + "learning_rate": 1.6042832733812116e-05, + "loss": 1.4014, + "step": 52335 + }, + { + "epoch": 0.63, + "grad_norm": 7.955392953823172, + "learning_rate": 1.6042367268225238e-05, + "loss": 1.0846, + "step": 52338 + }, + { + "epoch": 0.63, + "grad_norm": 86.78273886849284, + "learning_rate": 1.6041901782017933e-05, + "loss": 1.2842, + "step": 52341 + }, + { + "epoch": 0.63, + "grad_norm": 11.444897429885446, + "learning_rate": 1.60414362751918e-05, + "loss": 1.0382, + "step": 52344 + }, + { + "epoch": 0.63, + "grad_norm": 23.746834639540452, + "learning_rate": 1.6040970747748413e-05, + "loss": 1.8627, + "step": 52347 + }, + { + "epoch": 0.63, + "grad_norm": 6.6751273837029, + "learning_rate": 1.6040505199689376e-05, + "loss": 1.5123, + "step": 52350 + }, + { + "epoch": 0.63, + "grad_norm": 12.017251329588971, + "learning_rate": 1.6040039631016268e-05, + "loss": 1.4133, + "step": 52353 + }, + { + "epoch": 0.63, + "grad_norm": 12.00166721730575, + "learning_rate": 1.6039574041730678e-05, + "loss": 1.4954, + "step": 52356 + }, + { + "epoch": 0.63, + "grad_norm": 18.947578437275947, + "learning_rate": 1.60391084318342e-05, + "loss": 1.3091, + "step": 52359 + }, + { + "epoch": 0.63, + "grad_norm": 37.57996552036875, + "learning_rate": 1.6038642801328422e-05, + "loss": 1.6542, + "step": 52362 + }, + { + "epoch": 0.63, + "grad_norm": 6.2156132031529445, + "learning_rate": 1.6038177150214926e-05, + "loss": 1.1676, + "step": 52365 + }, + { + "epoch": 0.63, + "grad_norm": 10.212463753087327, + "learning_rate": 1.603771147849531e-05, + "loss": 1.5162, + "step": 52368 + }, + { + "epoch": 0.63, + "grad_norm": 16.563276867591618, + "learning_rate": 1.603724578617116e-05, + "loss": 1.4458, + "step": 52371 + }, + { + "epoch": 0.63, + "grad_norm": 8.854161320764927, + "learning_rate": 1.603678007324406e-05, + "loss": 1.1672, + "step": 52374 + }, + { + "epoch": 0.63, + "grad_norm": 27.763771623314653, + "learning_rate": 1.603631433971561e-05, + "loss": 1.7637, + "step": 52377 + }, + { + "epoch": 0.63, + "grad_norm": 8.294174122698514, + "learning_rate": 1.603584858558739e-05, + "loss": 1.2449, + "step": 52380 + }, + { + "epoch": 0.63, + "grad_norm": 19.008034567958514, + "learning_rate": 1.6035382810860995e-05, + "loss": 1.415, + "step": 52383 + }, + { + "epoch": 0.63, + "grad_norm": 13.767567278986602, + "learning_rate": 1.603491701553801e-05, + "loss": 1.2634, + "step": 52386 + }, + { + "epoch": 0.63, + "grad_norm": 35.54681860923846, + "learning_rate": 1.6034451199620027e-05, + "loss": 1.7228, + "step": 52389 + }, + { + "epoch": 0.63, + "grad_norm": 22.127183022152945, + "learning_rate": 1.6033985363108637e-05, + "loss": 1.5136, + "step": 52392 + }, + { + "epoch": 0.63, + "grad_norm": 26.77579749568224, + "learning_rate": 1.6033519506005428e-05, + "loss": 1.3381, + "step": 52395 + }, + { + "epoch": 0.63, + "grad_norm": 39.2191073560468, + "learning_rate": 1.6033053628311993e-05, + "loss": 1.5297, + "step": 52398 + }, + { + "epoch": 0.63, + "grad_norm": 7.045523912408788, + "learning_rate": 1.6032587730029915e-05, + "loss": 1.3502, + "step": 52401 + }, + { + "epoch": 0.63, + "grad_norm": 8.373798912388269, + "learning_rate": 1.603212181116079e-05, + "loss": 1.4209, + "step": 52404 + }, + { + "epoch": 0.63, + "grad_norm": 48.05144348768966, + "learning_rate": 1.6031655871706205e-05, + "loss": 1.3776, + "step": 52407 + }, + { + "epoch": 0.63, + "grad_norm": 39.65676455829196, + "learning_rate": 1.6031189911667748e-05, + "loss": 1.3841, + "step": 52410 + }, + { + "epoch": 0.63, + "grad_norm": 19.767573872451468, + "learning_rate": 1.603072393104702e-05, + "loss": 1.2496, + "step": 52413 + }, + { + "epoch": 0.63, + "grad_norm": 25.503862443851613, + "learning_rate": 1.6030257929845597e-05, + "loss": 1.1344, + "step": 52416 + }, + { + "epoch": 0.63, + "grad_norm": 22.941958237104036, + "learning_rate": 1.602979190806508e-05, + "loss": 1.0694, + "step": 52419 + }, + { + "epoch": 0.63, + "grad_norm": 7.035714169523173, + "learning_rate": 1.602932586570705e-05, + "loss": 1.5638, + "step": 52422 + }, + { + "epoch": 0.63, + "grad_norm": 30.594711543440848, + "learning_rate": 1.6028859802773107e-05, + "loss": 1.2794, + "step": 52425 + }, + { + "epoch": 0.63, + "grad_norm": 34.69206469284978, + "learning_rate": 1.6028393719264835e-05, + "loss": 1.7541, + "step": 52428 + }, + { + "epoch": 0.63, + "grad_norm": 18.06678702748484, + "learning_rate": 1.6027927615183828e-05, + "loss": 1.4498, + "step": 52431 + }, + { + "epoch": 0.63, + "grad_norm": 50.51173439596164, + "learning_rate": 1.6027461490531676e-05, + "loss": 1.478, + "step": 52434 + }, + { + "epoch": 0.63, + "grad_norm": 26.232354964284905, + "learning_rate": 1.6026995345309963e-05, + "loss": 1.2006, + "step": 52437 + }, + { + "epoch": 0.63, + "grad_norm": 8.760344509392954, + "learning_rate": 1.602652917952029e-05, + "loss": 1.4698, + "step": 52440 + }, + { + "epoch": 0.63, + "grad_norm": 32.00216273443325, + "learning_rate": 1.6026062993164245e-05, + "loss": 1.1537, + "step": 52443 + }, + { + "epoch": 0.63, + "grad_norm": 13.987212960609964, + "learning_rate": 1.602559678624342e-05, + "loss": 1.2688, + "step": 52446 + }, + { + "epoch": 0.63, + "grad_norm": 2.7686670323732714, + "learning_rate": 1.6025130558759397e-05, + "loss": 1.542, + "step": 52449 + }, + { + "epoch": 0.63, + "grad_norm": 13.956509425202968, + "learning_rate": 1.602466431071378e-05, + "loss": 1.177, + "step": 52452 + }, + { + "epoch": 0.63, + "grad_norm": 12.577242144933107, + "learning_rate": 1.602419804210815e-05, + "loss": 1.1944, + "step": 52455 + }, + { + "epoch": 0.63, + "grad_norm": 6.23174554914973, + "learning_rate": 1.6023731752944102e-05, + "loss": 1.3429, + "step": 52458 + }, + { + "epoch": 0.63, + "grad_norm": 39.83314983655395, + "learning_rate": 1.6023265443223228e-05, + "loss": 1.4874, + "step": 52461 + }, + { + "epoch": 0.63, + "grad_norm": 7.941133726787072, + "learning_rate": 1.602279911294712e-05, + "loss": 1.5592, + "step": 52464 + }, + { + "epoch": 0.63, + "grad_norm": 7.430293202477631, + "learning_rate": 1.6022332762117367e-05, + "loss": 1.7128, + "step": 52467 + }, + { + "epoch": 0.63, + "grad_norm": 4.3159413548924315, + "learning_rate": 1.602186639073556e-05, + "loss": 1.214, + "step": 52470 + }, + { + "epoch": 0.63, + "grad_norm": 9.197075640127006, + "learning_rate": 1.6021399998803294e-05, + "loss": 1.2958, + "step": 52473 + }, + { + "epoch": 0.63, + "grad_norm": 31.085703168390197, + "learning_rate": 1.6020933586322157e-05, + "loss": 1.2367, + "step": 52476 + }, + { + "epoch": 0.63, + "grad_norm": 18.641983111276062, + "learning_rate": 1.6020467153293746e-05, + "loss": 1.956, + "step": 52479 + }, + { + "epoch": 0.63, + "grad_norm": 6.729249008777076, + "learning_rate": 1.6020000699719643e-05, + "loss": 1.2433, + "step": 52482 + }, + { + "epoch": 0.63, + "grad_norm": 23.70841770050581, + "learning_rate": 1.6019534225601453e-05, + "loss": 1.3795, + "step": 52485 + }, + { + "epoch": 0.63, + "grad_norm": 46.57551440302911, + "learning_rate": 1.6019067730940756e-05, + "loss": 1.3199, + "step": 52488 + }, + { + "epoch": 0.63, + "grad_norm": 8.756123381296865, + "learning_rate": 1.6018601215739148e-05, + "loss": 1.3561, + "step": 52491 + }, + { + "epoch": 0.63, + "grad_norm": 10.602646287201056, + "learning_rate": 1.6018134679998224e-05, + "loss": 1.4628, + "step": 52494 + }, + { + "epoch": 0.63, + "grad_norm": 3.8179090131238067, + "learning_rate": 1.6017668123719574e-05, + "loss": 1.5492, + "step": 52497 + }, + { + "epoch": 0.63, + "grad_norm": 10.683533237135995, + "learning_rate": 1.6017201546904796e-05, + "loss": 1.4954, + "step": 52500 + }, + { + "epoch": 0.63, + "grad_norm": 12.616965725448088, + "learning_rate": 1.601673494955547e-05, + "loss": 1.484, + "step": 52503 + }, + { + "epoch": 0.63, + "grad_norm": 12.628247869245785, + "learning_rate": 1.6016268331673196e-05, + "loss": 1.4937, + "step": 52506 + }, + { + "epoch": 0.63, + "grad_norm": 29.64895714717571, + "learning_rate": 1.6015801693259563e-05, + "loss": 1.2254, + "step": 52509 + }, + { + "epoch": 0.63, + "grad_norm": 16.38921398667803, + "learning_rate": 1.601533503431617e-05, + "loss": 1.5773, + "step": 52512 + }, + { + "epoch": 0.63, + "grad_norm": 23.388143477303, + "learning_rate": 1.6014868354844603e-05, + "loss": 1.174, + "step": 52515 + }, + { + "epoch": 0.63, + "grad_norm": 14.326787038173855, + "learning_rate": 1.6014401654846453e-05, + "loss": 1.2103, + "step": 52518 + }, + { + "epoch": 0.63, + "grad_norm": 37.95871059726474, + "learning_rate": 1.6013934934323323e-05, + "loss": 1.777, + "step": 52521 + }, + { + "epoch": 0.63, + "grad_norm": 8.605337277396877, + "learning_rate": 1.6013468193276795e-05, + "loss": 1.3402, + "step": 52524 + }, + { + "epoch": 0.63, + "grad_norm": 9.344143824279787, + "learning_rate": 1.601300143170847e-05, + "loss": 1.556, + "step": 52527 + }, + { + "epoch": 0.63, + "grad_norm": 6.39002214360568, + "learning_rate": 1.6012534649619934e-05, + "loss": 1.737, + "step": 52530 + }, + { + "epoch": 0.63, + "grad_norm": 17.928975352458455, + "learning_rate": 1.6012067847012783e-05, + "loss": 1.8986, + "step": 52533 + }, + { + "epoch": 0.63, + "grad_norm": 27.03555502785691, + "learning_rate": 1.6011601023888613e-05, + "loss": 1.4137, + "step": 52536 + }, + { + "epoch": 0.63, + "grad_norm": 16.571546431750676, + "learning_rate": 1.601113418024901e-05, + "loss": 1.6275, + "step": 52539 + }, + { + "epoch": 0.63, + "grad_norm": 24.411298446535067, + "learning_rate": 1.6010667316095574e-05, + "loss": 1.047, + "step": 52542 + }, + { + "epoch": 0.63, + "grad_norm": 45.88309697638151, + "learning_rate": 1.6010200431429894e-05, + "loss": 1.6733, + "step": 52545 + }, + { + "epoch": 0.63, + "grad_norm": 9.560416279075675, + "learning_rate": 1.6009733526253564e-05, + "loss": 1.2472, + "step": 52548 + }, + { + "epoch": 0.63, + "grad_norm": 44.951090342826234, + "learning_rate": 1.6009266600568182e-05, + "loss": 1.2476, + "step": 52551 + }, + { + "epoch": 0.63, + "grad_norm": 4.693690147766627, + "learning_rate": 1.6008799654375336e-05, + "loss": 1.2774, + "step": 52554 + }, + { + "epoch": 0.63, + "grad_norm": 7.667222356782194, + "learning_rate": 1.600833268767662e-05, + "loss": 1.0522, + "step": 52557 + }, + { + "epoch": 0.63, + "grad_norm": 5.378744853845773, + "learning_rate": 1.6007865700473625e-05, + "loss": 1.3043, + "step": 52560 + }, + { + "epoch": 0.63, + "grad_norm": 14.259675202209205, + "learning_rate": 1.6007398692767956e-05, + "loss": 1.7642, + "step": 52563 + }, + { + "epoch": 0.63, + "grad_norm": 4.7653661454068175, + "learning_rate": 1.6006931664561195e-05, + "loss": 1.3498, + "step": 52566 + }, + { + "epoch": 0.63, + "grad_norm": 29.241951418752837, + "learning_rate": 1.600646461585494e-05, + "loss": 1.1177, + "step": 52569 + }, + { + "epoch": 0.63, + "grad_norm": 3.0405055239129597, + "learning_rate": 1.6005997546650786e-05, + "loss": 1.2142, + "step": 52572 + }, + { + "epoch": 0.63, + "grad_norm": 8.27586684193367, + "learning_rate": 1.6005530456950324e-05, + "loss": 1.5608, + "step": 52575 + }, + { + "epoch": 0.63, + "grad_norm": 36.92569931821559, + "learning_rate": 1.600506334675515e-05, + "loss": 1.1786, + "step": 52578 + }, + { + "epoch": 0.63, + "grad_norm": 12.95934638266052, + "learning_rate": 1.6004596216066858e-05, + "loss": 1.4892, + "step": 52581 + }, + { + "epoch": 0.63, + "grad_norm": 18.63669234413482, + "learning_rate": 1.600412906488704e-05, + "loss": 1.612, + "step": 52584 + }, + { + "epoch": 0.63, + "grad_norm": 26.843084229122184, + "learning_rate": 1.6003661893217297e-05, + "loss": 1.2107, + "step": 52587 + }, + { + "epoch": 0.63, + "grad_norm": 13.01876685150816, + "learning_rate": 1.6003194701059216e-05, + "loss": 1.561, + "step": 52590 + }, + { + "epoch": 0.63, + "grad_norm": 11.355530692796778, + "learning_rate": 1.6002727488414395e-05, + "loss": 1.2697, + "step": 52593 + }, + { + "epoch": 0.63, + "grad_norm": 20.67701203090038, + "learning_rate": 1.6002260255284426e-05, + "loss": 1.2982, + "step": 52596 + }, + { + "epoch": 0.63, + "grad_norm": 2.1254093509585448, + "learning_rate": 1.6001793001670902e-05, + "loss": 1.5513, + "step": 52599 + }, + { + "epoch": 0.63, + "grad_norm": 2.8688785716388243, + "learning_rate": 1.6001325727575427e-05, + "loss": 1.9564, + "step": 52602 + }, + { + "epoch": 0.63, + "grad_norm": 29.775230239979475, + "learning_rate": 1.6000858432999583e-05, + "loss": 1.2332, + "step": 52605 + }, + { + "epoch": 0.63, + "grad_norm": 18.434910348652267, + "learning_rate": 1.6000391117944976e-05, + "loss": 1.4514, + "step": 52608 + }, + { + "epoch": 0.63, + "grad_norm": 7.013664326415222, + "learning_rate": 1.5999923782413192e-05, + "loss": 1.3618, + "step": 52611 + }, + { + "epoch": 0.63, + "grad_norm": 6.014888815238729, + "learning_rate": 1.599945642640583e-05, + "loss": 1.5178, + "step": 52614 + }, + { + "epoch": 0.63, + "grad_norm": 21.303525321084717, + "learning_rate": 1.599898904992448e-05, + "loss": 1.5232, + "step": 52617 + }, + { + "epoch": 0.63, + "grad_norm": 16.04356508857481, + "learning_rate": 1.5998521652970747e-05, + "loss": 1.3812, + "step": 52620 + }, + { + "epoch": 0.63, + "grad_norm": 8.313821364237329, + "learning_rate": 1.5998054235546218e-05, + "loss": 1.3613, + "step": 52623 + }, + { + "epoch": 0.63, + "grad_norm": 12.620358544616618, + "learning_rate": 1.599758679765249e-05, + "loss": 1.6781, + "step": 52626 + }, + { + "epoch": 0.63, + "grad_norm": 22.712023382185464, + "learning_rate": 1.599711933929116e-05, + "loss": 1.662, + "step": 52629 + }, + { + "epoch": 0.63, + "grad_norm": 11.379291373504879, + "learning_rate": 1.599665186046382e-05, + "loss": 1.5965, + "step": 52632 + }, + { + "epoch": 0.63, + "grad_norm": 30.58016830630589, + "learning_rate": 1.5996184361172068e-05, + "loss": 1.2175, + "step": 52635 + }, + { + "epoch": 0.63, + "grad_norm": 9.426731743013708, + "learning_rate": 1.5995716841417502e-05, + "loss": 1.271, + "step": 52638 + }, + { + "epoch": 0.63, + "grad_norm": 21.42588080551856, + "learning_rate": 1.599524930120171e-05, + "loss": 1.4542, + "step": 52641 + }, + { + "epoch": 0.63, + "grad_norm": 8.9165300004141, + "learning_rate": 1.5994781740526292e-05, + "loss": 1.3639, + "step": 52644 + }, + { + "epoch": 0.63, + "grad_norm": 4.2873125979997, + "learning_rate": 1.5994314159392846e-05, + "loss": 0.9808, + "step": 52647 + }, + { + "epoch": 0.63, + "grad_norm": 23.85284517628531, + "learning_rate": 1.599384655780296e-05, + "loss": 1.1704, + "step": 52650 + }, + { + "epoch": 0.63, + "grad_norm": 6.342166242386108, + "learning_rate": 1.5993378935758237e-05, + "loss": 1.367, + "step": 52653 + }, + { + "epoch": 0.63, + "grad_norm": 18.35290150627715, + "learning_rate": 1.5992911293260273e-05, + "loss": 1.6181, + "step": 52656 + }, + { + "epoch": 0.63, + "grad_norm": 14.877565484411528, + "learning_rate": 1.5992443630310657e-05, + "loss": 1.2375, + "step": 52659 + }, + { + "epoch": 0.63, + "grad_norm": 5.687371635899157, + "learning_rate": 1.5991975946910993e-05, + "loss": 1.4999, + "step": 52662 + }, + { + "epoch": 0.63, + "grad_norm": 6.135452380634021, + "learning_rate": 1.5991508243062873e-05, + "loss": 1.6373, + "step": 52665 + }, + { + "epoch": 0.63, + "grad_norm": 2.8117782566875564, + "learning_rate": 1.599104051876789e-05, + "loss": 1.6064, + "step": 52668 + }, + { + "epoch": 0.63, + "grad_norm": 13.444695457535268, + "learning_rate": 1.5990572774027648e-05, + "loss": 1.3523, + "step": 52671 + }, + { + "epoch": 0.63, + "grad_norm": 9.324844461257342, + "learning_rate": 1.5990105008843734e-05, + "loss": 1.2709, + "step": 52674 + }, + { + "epoch": 0.63, + "grad_norm": 8.341844331027186, + "learning_rate": 1.5989637223217754e-05, + "loss": 1.0146, + "step": 52677 + }, + { + "epoch": 0.63, + "grad_norm": 16.76988955050669, + "learning_rate": 1.5989169417151297e-05, + "loss": 1.5035, + "step": 52680 + }, + { + "epoch": 0.63, + "grad_norm": 18.53866826653584, + "learning_rate": 1.5988701590645965e-05, + "loss": 1.3506, + "step": 52683 + }, + { + "epoch": 0.63, + "grad_norm": 11.482395596677621, + "learning_rate": 1.5988233743703348e-05, + "loss": 1.7521, + "step": 52686 + }, + { + "epoch": 0.63, + "grad_norm": 5.351032556021903, + "learning_rate": 1.598776587632505e-05, + "loss": 1.463, + "step": 52689 + }, + { + "epoch": 0.63, + "grad_norm": 15.441465619222328, + "learning_rate": 1.598729798851266e-05, + "loss": 1.3849, + "step": 52692 + }, + { + "epoch": 0.63, + "grad_norm": 8.376812845041616, + "learning_rate": 1.598683008026778e-05, + "loss": 1.2986, + "step": 52695 + }, + { + "epoch": 0.63, + "grad_norm": 9.809689288134726, + "learning_rate": 1.5986362151592005e-05, + "loss": 1.4609, + "step": 52698 + }, + { + "epoch": 0.63, + "grad_norm": 12.214720567456318, + "learning_rate": 1.5985894202486935e-05, + "loss": 1.2019, + "step": 52701 + }, + { + "epoch": 0.63, + "grad_norm": 2.562566928773474, + "learning_rate": 1.598542623295416e-05, + "loss": 1.4825, + "step": 52704 + }, + { + "epoch": 0.63, + "grad_norm": 21.372607513670363, + "learning_rate": 1.5984958242995283e-05, + "loss": 1.2015, + "step": 52707 + }, + { + "epoch": 0.63, + "grad_norm": 22.053369592935486, + "learning_rate": 1.59844902326119e-05, + "loss": 1.2429, + "step": 52710 + }, + { + "epoch": 0.63, + "grad_norm": 15.935954591424016, + "learning_rate": 1.5984022201805608e-05, + "loss": 1.1186, + "step": 52713 + }, + { + "epoch": 0.63, + "grad_norm": 17.87381352708295, + "learning_rate": 1.5983554150578004e-05, + "loss": 1.3031, + "step": 52716 + }, + { + "epoch": 0.63, + "grad_norm": 5.123652479572789, + "learning_rate": 1.5983086078930682e-05, + "loss": 1.2948, + "step": 52719 + }, + { + "epoch": 0.63, + "grad_norm": 3.571117741398791, + "learning_rate": 1.5982617986865244e-05, + "loss": 1.6455, + "step": 52722 + }, + { + "epoch": 0.63, + "grad_norm": 13.17260795066731, + "learning_rate": 1.5982149874383287e-05, + "loss": 1.4573, + "step": 52725 + }, + { + "epoch": 0.63, + "grad_norm": 11.349836186921836, + "learning_rate": 1.5981681741486402e-05, + "loss": 1.5084, + "step": 52728 + }, + { + "epoch": 0.63, + "grad_norm": 13.91687774332555, + "learning_rate": 1.5981213588176197e-05, + "loss": 1.1034, + "step": 52731 + }, + { + "epoch": 0.63, + "grad_norm": 3.307459145016468, + "learning_rate": 1.5980745414454265e-05, + "loss": 1.5225, + "step": 52734 + }, + { + "epoch": 0.63, + "grad_norm": 16.113520121302532, + "learning_rate": 1.59802772203222e-05, + "loss": 1.3414, + "step": 52737 + }, + { + "epoch": 0.63, + "grad_norm": 8.568452449368879, + "learning_rate": 1.5979809005781603e-05, + "loss": 1.0244, + "step": 52740 + }, + { + "epoch": 0.63, + "grad_norm": 10.373850457565899, + "learning_rate": 1.5979340770834074e-05, + "loss": 1.258, + "step": 52743 + }, + { + "epoch": 0.63, + "grad_norm": 19.131179247247083, + "learning_rate": 1.5978872515481206e-05, + "loss": 1.1394, + "step": 52746 + }, + { + "epoch": 0.63, + "grad_norm": 3.5620897015880675, + "learning_rate": 1.5978404239724602e-05, + "loss": 1.5203, + "step": 52749 + }, + { + "epoch": 0.63, + "grad_norm": 12.212486685615904, + "learning_rate": 1.5977935943565857e-05, + "loss": 1.3386, + "step": 52752 + }, + { + "epoch": 0.63, + "grad_norm": 4.563967193828925, + "learning_rate": 1.597746762700657e-05, + "loss": 1.2711, + "step": 52755 + }, + { + "epoch": 0.63, + "grad_norm": 3.209521470143555, + "learning_rate": 1.597699929004834e-05, + "loss": 1.3714, + "step": 52758 + }, + { + "epoch": 0.63, + "grad_norm": 40.45479765368318, + "learning_rate": 1.5976530932692763e-05, + "loss": 1.5197, + "step": 52761 + }, + { + "epoch": 0.63, + "grad_norm": 14.701088539990694, + "learning_rate": 1.5976062554941442e-05, + "loss": 1.1454, + "step": 52764 + }, + { + "epoch": 0.63, + "grad_norm": 24.84615909782875, + "learning_rate": 1.5975594156795967e-05, + "loss": 1.1744, + "step": 52767 + }, + { + "epoch": 0.63, + "grad_norm": 7.780026530380517, + "learning_rate": 1.5975125738257942e-05, + "loss": 2.0472, + "step": 52770 + }, + { + "epoch": 0.63, + "grad_norm": 8.738093829188584, + "learning_rate": 1.5974657299328965e-05, + "loss": 1.1237, + "step": 52773 + }, + { + "epoch": 0.63, + "grad_norm": 10.680160728326355, + "learning_rate": 1.597418884001064e-05, + "loss": 1.5444, + "step": 52776 + }, + { + "epoch": 0.63, + "grad_norm": 13.919115351912115, + "learning_rate": 1.5973720360304557e-05, + "loss": 1.4548, + "step": 52779 + }, + { + "epoch": 0.63, + "grad_norm": 7.221306023973642, + "learning_rate": 1.5973251860212317e-05, + "loss": 1.4248, + "step": 52782 + }, + { + "epoch": 0.63, + "grad_norm": 13.38741285250737, + "learning_rate": 1.5972783339735524e-05, + "loss": 1.1154, + "step": 52785 + }, + { + "epoch": 0.63, + "grad_norm": 21.77965322079664, + "learning_rate": 1.5972314798875768e-05, + "loss": 1.269, + "step": 52788 + }, + { + "epoch": 0.63, + "grad_norm": 14.778052753947085, + "learning_rate": 1.5971846237634654e-05, + "loss": 1.3557, + "step": 52791 + }, + { + "epoch": 0.63, + "grad_norm": 67.47304735048202, + "learning_rate": 1.597137765601378e-05, + "loss": 1.21, + "step": 52794 + }, + { + "epoch": 0.63, + "grad_norm": 24.611234193788995, + "learning_rate": 1.5970909054014743e-05, + "loss": 1.2498, + "step": 52797 + }, + { + "epoch": 0.63, + "grad_norm": 14.195150126047453, + "learning_rate": 1.597044043163915e-05, + "loss": 1.5137, + "step": 52800 + }, + { + "epoch": 0.63, + "grad_norm": 7.792215229577566, + "learning_rate": 1.596997178888859e-05, + "loss": 1.3969, + "step": 52803 + }, + { + "epoch": 0.63, + "grad_norm": 18.88468459306748, + "learning_rate": 1.596950312576467e-05, + "loss": 1.4827, + "step": 52806 + }, + { + "epoch": 0.64, + "grad_norm": 33.532038009341015, + "learning_rate": 1.596903444226898e-05, + "loss": 1.6234, + "step": 52809 + }, + { + "epoch": 0.64, + "grad_norm": 8.457957175691824, + "learning_rate": 1.596856573840313e-05, + "loss": 1.7056, + "step": 52812 + }, + { + "epoch": 0.64, + "grad_norm": 11.22742167537593, + "learning_rate": 1.5968097014168712e-05, + "loss": 0.9892, + "step": 52815 + }, + { + "epoch": 0.64, + "grad_norm": 6.921346388895614, + "learning_rate": 1.596762826956733e-05, + "loss": 1.9098, + "step": 52818 + }, + { + "epoch": 0.64, + "grad_norm": 103.22786771178053, + "learning_rate": 1.5967159504600586e-05, + "loss": 1.4877, + "step": 52821 + }, + { + "epoch": 0.64, + "grad_norm": 9.91778051082409, + "learning_rate": 1.5966690719270072e-05, + "loss": 1.4508, + "step": 52824 + }, + { + "epoch": 0.64, + "grad_norm": 39.279164698700875, + "learning_rate": 1.596622191357739e-05, + "loss": 1.0368, + "step": 52827 + }, + { + "epoch": 0.64, + "grad_norm": 9.944723179577894, + "learning_rate": 1.5965753087524145e-05, + "loss": 1.6366, + "step": 52830 + }, + { + "epoch": 0.64, + "grad_norm": 19.470927242889925, + "learning_rate": 1.5965284241111934e-05, + "loss": 1.4853, + "step": 52833 + }, + { + "epoch": 0.64, + "grad_norm": 7.056404731383403, + "learning_rate": 1.5964815374342356e-05, + "loss": 1.6177, + "step": 52836 + }, + { + "epoch": 0.64, + "grad_norm": 26.503946596578256, + "learning_rate": 1.596434648721701e-05, + "loss": 1.7125, + "step": 52839 + }, + { + "epoch": 0.64, + "grad_norm": 19.588951713099288, + "learning_rate": 1.5963877579737496e-05, + "loss": 1.3379, + "step": 52842 + }, + { + "epoch": 0.64, + "grad_norm": 7.342441710132271, + "learning_rate": 1.5963408651905422e-05, + "loss": 1.2451, + "step": 52845 + }, + { + "epoch": 0.64, + "grad_norm": 7.334161741887947, + "learning_rate": 1.5962939703722378e-05, + "loss": 1.3183, + "step": 52848 + }, + { + "epoch": 0.64, + "grad_norm": 16.662088394432722, + "learning_rate": 1.596247073518997e-05, + "loss": 1.1166, + "step": 52851 + }, + { + "epoch": 0.64, + "grad_norm": 23.297932197351678, + "learning_rate": 1.5962001746309796e-05, + "loss": 1.4732, + "step": 52854 + }, + { + "epoch": 0.64, + "grad_norm": 15.676145050066156, + "learning_rate": 1.596153273708346e-05, + "loss": 1.263, + "step": 52857 + }, + { + "epoch": 0.64, + "grad_norm": 35.92506950257697, + "learning_rate": 1.5961063707512558e-05, + "loss": 1.2335, + "step": 52860 + }, + { + "epoch": 0.64, + "grad_norm": 21.842069267449755, + "learning_rate": 1.5960594657598695e-05, + "loss": 1.5264, + "step": 52863 + }, + { + "epoch": 0.64, + "grad_norm": 11.215940175753806, + "learning_rate": 1.5960125587343466e-05, + "loss": 1.576, + "step": 52866 + }, + { + "epoch": 0.64, + "grad_norm": 26.694990472345165, + "learning_rate": 1.5959656496748474e-05, + "loss": 1.5803, + "step": 52869 + }, + { + "epoch": 0.64, + "grad_norm": 15.86553179217513, + "learning_rate": 1.5959187385815328e-05, + "loss": 1.1482, + "step": 52872 + }, + { + "epoch": 0.64, + "grad_norm": 29.854398028209722, + "learning_rate": 1.5958718254545614e-05, + "loss": 1.8206, + "step": 52875 + }, + { + "epoch": 0.64, + "grad_norm": 10.950725192425885, + "learning_rate": 1.5958249102940947e-05, + "loss": 1.5853, + "step": 52878 + }, + { + "epoch": 0.64, + "grad_norm": 6.088688630811339, + "learning_rate": 1.595777993100292e-05, + "loss": 1.1723, + "step": 52881 + }, + { + "epoch": 0.64, + "grad_norm": 9.91199878677975, + "learning_rate": 1.5957310738733135e-05, + "loss": 1.5569, + "step": 52884 + }, + { + "epoch": 0.64, + "grad_norm": 25.007074197817328, + "learning_rate": 1.5956841526133192e-05, + "loss": 1.4163, + "step": 52887 + }, + { + "epoch": 0.64, + "grad_norm": 11.398140338757715, + "learning_rate": 1.5956372293204702e-05, + "loss": 1.2902, + "step": 52890 + }, + { + "epoch": 0.64, + "grad_norm": 10.785320582456679, + "learning_rate": 1.595590303994925e-05, + "loss": 0.9828, + "step": 52893 + }, + { + "epoch": 0.64, + "grad_norm": 6.770905955633644, + "learning_rate": 1.595543376636845e-05, + "loss": 1.6893, + "step": 52896 + }, + { + "epoch": 0.64, + "grad_norm": 6.444398006758593, + "learning_rate": 1.5954964472463903e-05, + "loss": 1.6164, + "step": 52899 + }, + { + "epoch": 0.64, + "grad_norm": 38.540486543211095, + "learning_rate": 1.59544951582372e-05, + "loss": 1.2458, + "step": 52902 + }, + { + "epoch": 0.64, + "grad_norm": 7.406013784270707, + "learning_rate": 1.5954025823689958e-05, + "loss": 1.2174, + "step": 52905 + }, + { + "epoch": 0.64, + "grad_norm": 9.61726086695149, + "learning_rate": 1.5953556468823767e-05, + "loss": 1.2365, + "step": 52908 + }, + { + "epoch": 0.64, + "grad_norm": 8.258157032170693, + "learning_rate": 1.5953087093640234e-05, + "loss": 1.5442, + "step": 52911 + }, + { + "epoch": 0.64, + "grad_norm": 38.536035184691706, + "learning_rate": 1.5952617698140954e-05, + "loss": 1.5578, + "step": 52914 + }, + { + "epoch": 0.64, + "grad_norm": 18.947840010564306, + "learning_rate": 1.595214828232754e-05, + "loss": 1.2113, + "step": 52917 + }, + { + "epoch": 0.64, + "grad_norm": 13.94421104135873, + "learning_rate": 1.5951678846201583e-05, + "loss": 1.6658, + "step": 52920 + }, + { + "epoch": 0.64, + "grad_norm": 35.33838112839754, + "learning_rate": 1.5951209389764693e-05, + "loss": 1.5575, + "step": 52923 + }, + { + "epoch": 0.64, + "grad_norm": 2.291593303159883, + "learning_rate": 1.5950739913018467e-05, + "loss": 1.5117, + "step": 52926 + }, + { + "epoch": 0.64, + "grad_norm": 20.30162849742909, + "learning_rate": 1.5950270415964507e-05, + "loss": 1.5977, + "step": 52929 + }, + { + "epoch": 0.64, + "grad_norm": 14.454375186642446, + "learning_rate": 1.5949800898604424e-05, + "loss": 1.5995, + "step": 52932 + }, + { + "epoch": 0.64, + "grad_norm": 5.786532081099041, + "learning_rate": 1.594933136093981e-05, + "loss": 1.613, + "step": 52935 + }, + { + "epoch": 0.64, + "grad_norm": 4.911262586479856, + "learning_rate": 1.5948861802972268e-05, + "loss": 1.2703, + "step": 52938 + }, + { + "epoch": 0.64, + "grad_norm": 15.60814245101246, + "learning_rate": 1.5948392224703407e-05, + "loss": 1.215, + "step": 52941 + }, + { + "epoch": 0.64, + "grad_norm": 18.37068490775435, + "learning_rate": 1.5947922626134826e-05, + "loss": 0.9506, + "step": 52944 + }, + { + "epoch": 0.64, + "grad_norm": 11.148262958833007, + "learning_rate": 1.5947453007268126e-05, + "loss": 1.6552, + "step": 52947 + }, + { + "epoch": 0.64, + "grad_norm": 2.2632899012885264, + "learning_rate": 1.594698336810491e-05, + "loss": 1.4362, + "step": 52950 + }, + { + "epoch": 0.64, + "grad_norm": 11.367816075111378, + "learning_rate": 1.5946513708646784e-05, + "loss": 1.3921, + "step": 52953 + }, + { + "epoch": 0.64, + "grad_norm": 14.780136430822001, + "learning_rate": 1.594604402889535e-05, + "loss": 1.5946, + "step": 52956 + }, + { + "epoch": 0.64, + "grad_norm": 10.71967615110302, + "learning_rate": 1.5945574328852208e-05, + "loss": 1.6309, + "step": 52959 + }, + { + "epoch": 0.64, + "grad_norm": 10.820375544881362, + "learning_rate": 1.594510460851896e-05, + "loss": 1.3351, + "step": 52962 + }, + { + "epoch": 0.64, + "grad_norm": 6.19872746186068, + "learning_rate": 1.5944634867897218e-05, + "loss": 1.4885, + "step": 52965 + }, + { + "epoch": 0.64, + "grad_norm": 43.86964243109091, + "learning_rate": 1.594416510698857e-05, + "loss": 1.5117, + "step": 52968 + }, + { + "epoch": 0.64, + "grad_norm": 19.257985734310704, + "learning_rate": 1.5943695325794632e-05, + "loss": 1.5086, + "step": 52971 + }, + { + "epoch": 0.64, + "grad_norm": 20.54082397406242, + "learning_rate": 1.5943225524317005e-05, + "loss": 1.4517, + "step": 52974 + }, + { + "epoch": 0.64, + "grad_norm": 12.501522545408495, + "learning_rate": 1.5942755702557285e-05, + "loss": 1.1501, + "step": 52977 + }, + { + "epoch": 0.64, + "grad_norm": 11.063633734664194, + "learning_rate": 1.5942285860517085e-05, + "loss": 1.7681, + "step": 52980 + }, + { + "epoch": 0.64, + "grad_norm": 7.733576542995986, + "learning_rate": 1.5941815998198e-05, + "loss": 1.2763, + "step": 52983 + }, + { + "epoch": 0.64, + "grad_norm": 39.85771681524932, + "learning_rate": 1.594134611560164e-05, + "loss": 1.2024, + "step": 52986 + }, + { + "epoch": 0.64, + "grad_norm": 19.610206703653752, + "learning_rate": 1.5940876212729604e-05, + "loss": 1.3625, + "step": 52989 + }, + { + "epoch": 0.64, + "grad_norm": 12.88889370116861, + "learning_rate": 1.5940406289583496e-05, + "loss": 1.3049, + "step": 52992 + }, + { + "epoch": 0.64, + "grad_norm": 18.06734478647789, + "learning_rate": 1.5939936346164925e-05, + "loss": 1.2864, + "step": 52995 + }, + { + "epoch": 0.64, + "grad_norm": 3.0604264754381045, + "learning_rate": 1.5939466382475487e-05, + "loss": 1.6131, + "step": 52998 + }, + { + "epoch": 0.64, + "grad_norm": 9.83236887410584, + "learning_rate": 1.5938996398516794e-05, + "loss": 1.5848, + "step": 53001 + }, + { + "epoch": 0.64, + "grad_norm": 6.011745788768899, + "learning_rate": 1.593852639429044e-05, + "loss": 1.5374, + "step": 53004 + }, + { + "epoch": 0.64, + "grad_norm": 6.465796256104982, + "learning_rate": 1.593805636979804e-05, + "loss": 1.1531, + "step": 53007 + }, + { + "epoch": 0.64, + "grad_norm": 10.711513216820002, + "learning_rate": 1.593758632504119e-05, + "loss": 1.4509, + "step": 53010 + }, + { + "epoch": 0.64, + "grad_norm": 9.655854837679218, + "learning_rate": 1.5937116260021494e-05, + "loss": 1.4113, + "step": 53013 + }, + { + "epoch": 0.64, + "grad_norm": 4.907262609707784, + "learning_rate": 1.593664617474056e-05, + "loss": 1.2704, + "step": 53016 + }, + { + "epoch": 0.64, + "grad_norm": 16.840449022515504, + "learning_rate": 1.5936176069199992e-05, + "loss": 1.4687, + "step": 53019 + }, + { + "epoch": 0.64, + "grad_norm": 9.810124849572981, + "learning_rate": 1.5935705943401398e-05, + "loss": 1.6683, + "step": 53022 + }, + { + "epoch": 0.64, + "grad_norm": 17.070207582292806, + "learning_rate": 1.5935235797346373e-05, + "loss": 1.262, + "step": 53025 + }, + { + "epoch": 0.64, + "grad_norm": 37.68159538288946, + "learning_rate": 1.5934765631036523e-05, + "loss": 1.1153, + "step": 53028 + }, + { + "epoch": 0.64, + "grad_norm": 19.460386997912668, + "learning_rate": 1.593429544447346e-05, + "loss": 1.3212, + "step": 53031 + }, + { + "epoch": 0.64, + "grad_norm": 6.548134771920892, + "learning_rate": 1.593382523765878e-05, + "loss": 1.461, + "step": 53034 + }, + { + "epoch": 0.64, + "grad_norm": 19.73832379482302, + "learning_rate": 1.5933355010594094e-05, + "loss": 1.0329, + "step": 53037 + }, + { + "epoch": 0.64, + "grad_norm": 3.043508455145211, + "learning_rate": 1.593288476328101e-05, + "loss": 1.3343, + "step": 53040 + }, + { + "epoch": 0.64, + "grad_norm": 20.0429655945692, + "learning_rate": 1.593241449572112e-05, + "loss": 1.3196, + "step": 53043 + }, + { + "epoch": 0.64, + "grad_norm": 88.7924172399789, + "learning_rate": 1.593194420791604e-05, + "loss": 1.5977, + "step": 53046 + }, + { + "epoch": 0.64, + "grad_norm": 19.386568561995418, + "learning_rate": 1.593147389986737e-05, + "loss": 1.3708, + "step": 53049 + }, + { + "epoch": 0.64, + "grad_norm": 16.171528207506388, + "learning_rate": 1.5931003571576717e-05, + "loss": 1.3942, + "step": 53052 + }, + { + "epoch": 0.64, + "grad_norm": 10.506292287951114, + "learning_rate": 1.5930533223045682e-05, + "loss": 1.4482, + "step": 53055 + }, + { + "epoch": 0.64, + "grad_norm": 9.486471002969749, + "learning_rate": 1.5930062854275873e-05, + "loss": 1.5286, + "step": 53058 + }, + { + "epoch": 0.64, + "grad_norm": 6.291558619310559, + "learning_rate": 1.59295924652689e-05, + "loss": 1.5112, + "step": 53061 + }, + { + "epoch": 0.64, + "grad_norm": 80.6000859634535, + "learning_rate": 1.592912205602636e-05, + "loss": 1.6514, + "step": 53064 + }, + { + "epoch": 0.64, + "grad_norm": 12.62013617589897, + "learning_rate": 1.5928651626549866e-05, + "loss": 1.3181, + "step": 53067 + }, + { + "epoch": 0.64, + "grad_norm": 2.3201889541360967, + "learning_rate": 1.5928181176841017e-05, + "loss": 1.4591, + "step": 53070 + }, + { + "epoch": 0.64, + "grad_norm": 26.625901215710872, + "learning_rate": 1.5927710706901423e-05, + "loss": 1.6873, + "step": 53073 + }, + { + "epoch": 0.64, + "grad_norm": 10.311056948012382, + "learning_rate": 1.5927240216732683e-05, + "loss": 1.2853, + "step": 53076 + }, + { + "epoch": 0.64, + "grad_norm": 26.777826353340167, + "learning_rate": 1.592676970633641e-05, + "loss": 1.2312, + "step": 53079 + }, + { + "epoch": 0.64, + "grad_norm": 37.10249438089831, + "learning_rate": 1.5926299175714208e-05, + "loss": 1.345, + "step": 53082 + }, + { + "epoch": 0.64, + "grad_norm": 17.912619514184307, + "learning_rate": 1.592582862486768e-05, + "loss": 1.3044, + "step": 53085 + }, + { + "epoch": 0.64, + "grad_norm": 23.617734966343527, + "learning_rate": 1.592535805379843e-05, + "loss": 1.5417, + "step": 53088 + }, + { + "epoch": 0.64, + "grad_norm": 7.391662757901054, + "learning_rate": 1.5924887462508075e-05, + "loss": 1.295, + "step": 53091 + }, + { + "epoch": 0.64, + "grad_norm": 32.391210912880645, + "learning_rate": 1.5924416850998207e-05, + "loss": 1.564, + "step": 53094 + }, + { + "epoch": 0.64, + "grad_norm": 10.925815955054311, + "learning_rate": 1.5923946219270437e-05, + "loss": 1.8825, + "step": 53097 + }, + { + "epoch": 0.64, + "grad_norm": 20.310473232948393, + "learning_rate": 1.5923475567326377e-05, + "loss": 1.5811, + "step": 53100 + }, + { + "epoch": 0.64, + "grad_norm": 9.512748942276058, + "learning_rate": 1.5923004895167628e-05, + "loss": 1.4264, + "step": 53103 + }, + { + "epoch": 0.64, + "grad_norm": 29.31942829384658, + "learning_rate": 1.5922534202795797e-05, + "loss": 1.2338, + "step": 53106 + }, + { + "epoch": 0.64, + "grad_norm": 7.779244561321247, + "learning_rate": 1.5922063490212488e-05, + "loss": 1.4113, + "step": 53109 + }, + { + "epoch": 0.64, + "grad_norm": 27.85974172647386, + "learning_rate": 1.592159275741931e-05, + "loss": 1.2409, + "step": 53112 + }, + { + "epoch": 0.64, + "grad_norm": 20.854458628471747, + "learning_rate": 1.592112200441787e-05, + "loss": 1.1774, + "step": 53115 + }, + { + "epoch": 0.64, + "grad_norm": 5.678818818102583, + "learning_rate": 1.592065123120977e-05, + "loss": 1.3604, + "step": 53118 + }, + { + "epoch": 0.64, + "grad_norm": 6.915716564041544, + "learning_rate": 1.5920180437796625e-05, + "loss": 1.7356, + "step": 53121 + }, + { + "epoch": 0.64, + "grad_norm": 24.527986637882233, + "learning_rate": 1.5919709624180036e-05, + "loss": 1.3502, + "step": 53124 + }, + { + "epoch": 0.64, + "grad_norm": 11.057028283214672, + "learning_rate": 1.5919238790361607e-05, + "loss": 1.1945, + "step": 53127 + }, + { + "epoch": 0.64, + "grad_norm": 16.5530651085957, + "learning_rate": 1.591876793634295e-05, + "loss": 1.5728, + "step": 53130 + }, + { + "epoch": 0.64, + "grad_norm": 13.481303598504113, + "learning_rate": 1.591829706212567e-05, + "loss": 1.2883, + "step": 53133 + }, + { + "epoch": 0.64, + "grad_norm": 12.641951399962457, + "learning_rate": 1.591782616771137e-05, + "loss": 1.2505, + "step": 53136 + }, + { + "epoch": 0.64, + "grad_norm": 19.241645619303437, + "learning_rate": 1.5917355253101666e-05, + "loss": 1.6533, + "step": 53139 + }, + { + "epoch": 0.64, + "grad_norm": 19.820889271891744, + "learning_rate": 1.591688431829816e-05, + "loss": 1.0753, + "step": 53142 + }, + { + "epoch": 0.64, + "grad_norm": 3.970446401360723, + "learning_rate": 1.5916413363302458e-05, + "loss": 1.4081, + "step": 53145 + }, + { + "epoch": 0.64, + "grad_norm": 11.524287984922506, + "learning_rate": 1.5915942388116164e-05, + "loss": 1.186, + "step": 53148 + }, + { + "epoch": 0.64, + "grad_norm": 19.467915898337523, + "learning_rate": 1.5915471392740895e-05, + "loss": 1.7421, + "step": 53151 + }, + { + "epoch": 0.64, + "grad_norm": 24.15580311566717, + "learning_rate": 1.5915000377178248e-05, + "loss": 1.3921, + "step": 53154 + }, + { + "epoch": 0.64, + "grad_norm": 12.22154306927842, + "learning_rate": 1.591452934142984e-05, + "loss": 1.3956, + "step": 53157 + }, + { + "epoch": 0.64, + "grad_norm": 9.026659858741825, + "learning_rate": 1.5914058285497272e-05, + "loss": 1.4771, + "step": 53160 + }, + { + "epoch": 0.64, + "grad_norm": 25.232972193634573, + "learning_rate": 1.5913587209382154e-05, + "loss": 1.3799, + "step": 53163 + }, + { + "epoch": 0.64, + "grad_norm": 10.89705881845013, + "learning_rate": 1.591311611308609e-05, + "loss": 1.7696, + "step": 53166 + }, + { + "epoch": 0.64, + "grad_norm": 6.363029313356957, + "learning_rate": 1.5912644996610696e-05, + "loss": 1.8952, + "step": 53169 + }, + { + "epoch": 0.64, + "grad_norm": 34.893268712101154, + "learning_rate": 1.591217385995757e-05, + "loss": 1.6958, + "step": 53172 + }, + { + "epoch": 0.64, + "grad_norm": 14.688559494443986, + "learning_rate": 1.5911702703128325e-05, + "loss": 1.3008, + "step": 53175 + }, + { + "epoch": 0.64, + "grad_norm": 5.2073094794759704, + "learning_rate": 1.591123152612457e-05, + "loss": 1.1172, + "step": 53178 + }, + { + "epoch": 0.64, + "grad_norm": 13.7750541265431, + "learning_rate": 1.591076032894791e-05, + "loss": 1.2843, + "step": 53181 + }, + { + "epoch": 0.64, + "grad_norm": 18.37395996336994, + "learning_rate": 1.591028911159995e-05, + "loss": 1.2732, + "step": 53184 + }, + { + "epoch": 0.64, + "grad_norm": 13.419304577408468, + "learning_rate": 1.590981787408231e-05, + "loss": 1.4329, + "step": 53187 + }, + { + "epoch": 0.64, + "grad_norm": 11.539266804939427, + "learning_rate": 1.5909346616396585e-05, + "loss": 1.6332, + "step": 53190 + }, + { + "epoch": 0.64, + "grad_norm": 13.701707949024682, + "learning_rate": 1.5908875338544386e-05, + "loss": 1.2478, + "step": 53193 + }, + { + "epoch": 0.64, + "grad_norm": 3.789464618463164, + "learning_rate": 1.590840404052733e-05, + "loss": 1.1995, + "step": 53196 + }, + { + "epoch": 0.64, + "grad_norm": 2.415242245093773, + "learning_rate": 1.5907932722347016e-05, + "loss": 1.5049, + "step": 53199 + }, + { + "epoch": 0.64, + "grad_norm": 8.55866635948616, + "learning_rate": 1.5907461384005056e-05, + "loss": 1.1321, + "step": 53202 + }, + { + "epoch": 0.64, + "grad_norm": 18.38292200150243, + "learning_rate": 1.590699002550306e-05, + "loss": 1.291, + "step": 53205 + }, + { + "epoch": 0.64, + "grad_norm": 8.510560702517566, + "learning_rate": 1.5906518646842633e-05, + "loss": 1.2938, + "step": 53208 + }, + { + "epoch": 0.64, + "grad_norm": 9.443244913237915, + "learning_rate": 1.5906047248025384e-05, + "loss": 1.3724, + "step": 53211 + }, + { + "epoch": 0.64, + "grad_norm": 18.711188974140686, + "learning_rate": 1.5905575829052925e-05, + "loss": 1.1865, + "step": 53214 + }, + { + "epoch": 0.64, + "grad_norm": 56.186089357755364, + "learning_rate": 1.5905104389926863e-05, + "loss": 1.3295, + "step": 53217 + }, + { + "epoch": 0.64, + "grad_norm": 5.692350954037317, + "learning_rate": 1.5904632930648805e-05, + "loss": 1.2604, + "step": 53220 + }, + { + "epoch": 0.64, + "grad_norm": 34.47205610642368, + "learning_rate": 1.5904161451220363e-05, + "loss": 1.5362, + "step": 53223 + }, + { + "epoch": 0.64, + "grad_norm": 13.37682955440838, + "learning_rate": 1.5903689951643144e-05, + "loss": 1.4858, + "step": 53226 + }, + { + "epoch": 0.64, + "grad_norm": 13.67139956715757, + "learning_rate": 1.590321843191876e-05, + "loss": 1.3751, + "step": 53229 + }, + { + "epoch": 0.64, + "grad_norm": 6.4009716071547285, + "learning_rate": 1.5902746892048813e-05, + "loss": 1.3543, + "step": 53232 + }, + { + "epoch": 0.64, + "grad_norm": 5.079752393426991, + "learning_rate": 1.5902275332034923e-05, + "loss": 1.3098, + "step": 53235 + }, + { + "epoch": 0.64, + "grad_norm": 11.803000966055142, + "learning_rate": 1.5901803751878688e-05, + "loss": 1.3811, + "step": 53238 + }, + { + "epoch": 0.64, + "grad_norm": 4.781722866633535, + "learning_rate": 1.5901332151581727e-05, + "loss": 1.8795, + "step": 53241 + }, + { + "epoch": 0.64, + "grad_norm": 28.68745878127238, + "learning_rate": 1.590086053114564e-05, + "loss": 1.6051, + "step": 53244 + }, + { + "epoch": 0.64, + "grad_norm": 13.698922647710601, + "learning_rate": 1.5900388890572045e-05, + "loss": 1.4301, + "step": 53247 + }, + { + "epoch": 0.64, + "grad_norm": 66.68211710135785, + "learning_rate": 1.5899917229862546e-05, + "loss": 1.2649, + "step": 53250 + }, + { + "epoch": 0.64, + "grad_norm": 12.389882083279584, + "learning_rate": 1.5899445549018756e-05, + "loss": 1.5273, + "step": 53253 + }, + { + "epoch": 0.64, + "grad_norm": 14.954006132418261, + "learning_rate": 1.5898973848042286e-05, + "loss": 1.0448, + "step": 53256 + }, + { + "epoch": 0.64, + "grad_norm": 8.297727365211893, + "learning_rate": 1.5898502126934737e-05, + "loss": 1.0818, + "step": 53259 + }, + { + "epoch": 0.64, + "grad_norm": 8.500670910390577, + "learning_rate": 1.589803038569773e-05, + "loss": 1.5071, + "step": 53262 + }, + { + "epoch": 0.64, + "grad_norm": 3.69149915985368, + "learning_rate": 1.589755862433287e-05, + "loss": 1.2805, + "step": 53265 + }, + { + "epoch": 0.64, + "grad_norm": 28.75403885127352, + "learning_rate": 1.5897086842841766e-05, + "loss": 1.0703, + "step": 53268 + }, + { + "epoch": 0.64, + "grad_norm": 8.684012343765533, + "learning_rate": 1.5896615041226026e-05, + "loss": 1.2535, + "step": 53271 + }, + { + "epoch": 0.64, + "grad_norm": 5.486362319404258, + "learning_rate": 1.5896143219487263e-05, + "loss": 1.6146, + "step": 53274 + }, + { + "epoch": 0.64, + "grad_norm": 6.33213418861877, + "learning_rate": 1.5895671377627086e-05, + "loss": 1.4821, + "step": 53277 + }, + { + "epoch": 0.64, + "grad_norm": 15.55570987260509, + "learning_rate": 1.589519951564711e-05, + "loss": 1.4666, + "step": 53280 + }, + { + "epoch": 0.64, + "grad_norm": 20.717472416853525, + "learning_rate": 1.589472763354894e-05, + "loss": 1.6397, + "step": 53283 + }, + { + "epoch": 0.64, + "grad_norm": 19.485831215514796, + "learning_rate": 1.589425573133419e-05, + "loss": 1.3873, + "step": 53286 + }, + { + "epoch": 0.64, + "grad_norm": 19.87438289627111, + "learning_rate": 1.5893783809004465e-05, + "loss": 1.5154, + "step": 53289 + }, + { + "epoch": 0.64, + "grad_norm": 7.639769589266336, + "learning_rate": 1.589331186656138e-05, + "loss": 1.3327, + "step": 53292 + }, + { + "epoch": 0.64, + "grad_norm": 9.010169235537148, + "learning_rate": 1.5892839904006546e-05, + "loss": 1.2406, + "step": 53295 + }, + { + "epoch": 0.64, + "grad_norm": 20.629484592771973, + "learning_rate": 1.5892367921341566e-05, + "loss": 1.4849, + "step": 53298 + }, + { + "epoch": 0.64, + "grad_norm": 9.505478340121183, + "learning_rate": 1.589189591856806e-05, + "loss": 1.1871, + "step": 53301 + }, + { + "epoch": 0.64, + "grad_norm": 21.921600167407085, + "learning_rate": 1.589142389568764e-05, + "loss": 1.2413, + "step": 53304 + }, + { + "epoch": 0.64, + "grad_norm": 44.518219886032725, + "learning_rate": 1.589095185270191e-05, + "loss": 1.3388, + "step": 53307 + }, + { + "epoch": 0.64, + "grad_norm": 16.472074598041495, + "learning_rate": 1.589047978961248e-05, + "loss": 1.0396, + "step": 53310 + }, + { + "epoch": 0.64, + "grad_norm": 12.736952493490286, + "learning_rate": 1.5890007706420966e-05, + "loss": 1.3708, + "step": 53313 + }, + { + "epoch": 0.64, + "grad_norm": 25.319355107285446, + "learning_rate": 1.5889535603128977e-05, + "loss": 1.5836, + "step": 53316 + }, + { + "epoch": 0.64, + "grad_norm": 17.332371775107198, + "learning_rate": 1.5889063479738125e-05, + "loss": 1.548, + "step": 53319 + }, + { + "epoch": 0.64, + "grad_norm": 10.943100200112978, + "learning_rate": 1.588859133625002e-05, + "loss": 1.509, + "step": 53322 + }, + { + "epoch": 0.64, + "grad_norm": 12.061810363212123, + "learning_rate": 1.5888119172666274e-05, + "loss": 1.6632, + "step": 53325 + }, + { + "epoch": 0.64, + "grad_norm": 9.066782003087923, + "learning_rate": 1.58876469889885e-05, + "loss": 1.1084, + "step": 53328 + }, + { + "epoch": 0.64, + "grad_norm": 5.009202969904137, + "learning_rate": 1.5887174785218304e-05, + "loss": 1.1969, + "step": 53331 + }, + { + "epoch": 0.64, + "grad_norm": 31.259974235892965, + "learning_rate": 1.5886702561357303e-05, + "loss": 1.2692, + "step": 53334 + }, + { + "epoch": 0.64, + "grad_norm": 27.710369251878422, + "learning_rate": 1.5886230317407107e-05, + "loss": 1.3335, + "step": 53337 + }, + { + "epoch": 0.64, + "grad_norm": 27.79420575907574, + "learning_rate": 1.5885758053369328e-05, + "loss": 1.295, + "step": 53340 + }, + { + "epoch": 0.64, + "grad_norm": 18.999078978344237, + "learning_rate": 1.5885285769245572e-05, + "loss": 1.3301, + "step": 53343 + }, + { + "epoch": 0.64, + "grad_norm": 44.09455568338121, + "learning_rate": 1.588481346503746e-05, + "loss": 1.3889, + "step": 53346 + }, + { + "epoch": 0.64, + "grad_norm": 21.13608268172098, + "learning_rate": 1.58843411407466e-05, + "loss": 1.4615, + "step": 53349 + }, + { + "epoch": 0.64, + "grad_norm": 24.27679884696247, + "learning_rate": 1.58838687963746e-05, + "loss": 1.4172, + "step": 53352 + }, + { + "epoch": 0.64, + "grad_norm": 7.855225809639872, + "learning_rate": 1.5883396431923076e-05, + "loss": 1.431, + "step": 53355 + }, + { + "epoch": 0.64, + "grad_norm": 24.61917658189857, + "learning_rate": 1.588292404739364e-05, + "loss": 1.2369, + "step": 53358 + }, + { + "epoch": 0.64, + "grad_norm": 8.089167383043211, + "learning_rate": 1.5882451642787905e-05, + "loss": 1.2922, + "step": 53361 + }, + { + "epoch": 0.64, + "grad_norm": 47.91402811311533, + "learning_rate": 1.5881979218107478e-05, + "loss": 1.5344, + "step": 53364 + }, + { + "epoch": 0.64, + "grad_norm": 31.47223650979333, + "learning_rate": 1.588150677335398e-05, + "loss": 1.3841, + "step": 53367 + }, + { + "epoch": 0.64, + "grad_norm": 25.139887970577703, + "learning_rate": 1.5881034308529012e-05, + "loss": 1.1777, + "step": 53370 + }, + { + "epoch": 0.64, + "grad_norm": 31.822313252416315, + "learning_rate": 1.5880561823634192e-05, + "loss": 1.2957, + "step": 53373 + }, + { + "epoch": 0.64, + "grad_norm": 23.128668371683, + "learning_rate": 1.5880089318671136e-05, + "loss": 1.1884, + "step": 53376 + }, + { + "epoch": 0.64, + "grad_norm": 13.189157512194338, + "learning_rate": 1.5879616793641452e-05, + "loss": 1.5031, + "step": 53379 + }, + { + "epoch": 0.64, + "grad_norm": 20.262530298003394, + "learning_rate": 1.5879144248546754e-05, + "loss": 1.2082, + "step": 53382 + }, + { + "epoch": 0.64, + "grad_norm": 20.636349149512846, + "learning_rate": 1.5878671683388653e-05, + "loss": 1.2788, + "step": 53385 + }, + { + "epoch": 0.64, + "grad_norm": 47.7248317221908, + "learning_rate": 1.5878199098168764e-05, + "loss": 1.561, + "step": 53388 + }, + { + "epoch": 0.64, + "grad_norm": 10.38406853291782, + "learning_rate": 1.5877726492888702e-05, + "loss": 1.6943, + "step": 53391 + }, + { + "epoch": 0.64, + "grad_norm": 15.058327212878552, + "learning_rate": 1.587725386755007e-05, + "loss": 1.3463, + "step": 53394 + }, + { + "epoch": 0.64, + "grad_norm": 23.63174269260097, + "learning_rate": 1.5876781222154494e-05, + "loss": 1.7522, + "step": 53397 + }, + { + "epoch": 0.64, + "grad_norm": 8.416964187227853, + "learning_rate": 1.5876308556703576e-05, + "loss": 1.1717, + "step": 53400 + }, + { + "epoch": 0.64, + "grad_norm": 14.584676408843196, + "learning_rate": 1.5875835871198937e-05, + "loss": 1.4031, + "step": 53403 + }, + { + "epoch": 0.64, + "grad_norm": 7.437831161607188, + "learning_rate": 1.5875363165642185e-05, + "loss": 1.3901, + "step": 53406 + }, + { + "epoch": 0.64, + "grad_norm": 8.943235126364431, + "learning_rate": 1.5874890440034933e-05, + "loss": 1.5072, + "step": 53409 + }, + { + "epoch": 0.64, + "grad_norm": 6.378450774476456, + "learning_rate": 1.5874417694378797e-05, + "loss": 1.8337, + "step": 53412 + }, + { + "epoch": 0.64, + "grad_norm": 14.292165879739013, + "learning_rate": 1.587394492867539e-05, + "loss": 1.3634, + "step": 53415 + }, + { + "epoch": 0.64, + "grad_norm": 32.799089834037865, + "learning_rate": 1.5873472142926323e-05, + "loss": 1.4706, + "step": 53418 + }, + { + "epoch": 0.64, + "grad_norm": 5.169347118623049, + "learning_rate": 1.5872999337133214e-05, + "loss": 1.2984, + "step": 53421 + }, + { + "epoch": 0.64, + "grad_norm": 13.818734071362588, + "learning_rate": 1.5872526511297675e-05, + "loss": 1.2811, + "step": 53424 + }, + { + "epoch": 0.64, + "grad_norm": 10.138650615331162, + "learning_rate": 1.5872053665421316e-05, + "loss": 1.5764, + "step": 53427 + }, + { + "epoch": 0.64, + "grad_norm": 3.5827406476643744, + "learning_rate": 1.5871580799505753e-05, + "loss": 1.3424, + "step": 53430 + }, + { + "epoch": 0.64, + "grad_norm": 7.205875206243228, + "learning_rate": 1.5871107913552596e-05, + "loss": 1.3951, + "step": 53433 + }, + { + "epoch": 0.64, + "grad_norm": 21.555848523591873, + "learning_rate": 1.5870635007563468e-05, + "loss": 1.363, + "step": 53436 + }, + { + "epoch": 0.64, + "grad_norm": 6.7285821954454965, + "learning_rate": 1.5870162081539974e-05, + "loss": 1.435, + "step": 53439 + }, + { + "epoch": 0.64, + "grad_norm": 36.39910512908406, + "learning_rate": 1.586968913548373e-05, + "loss": 1.1451, + "step": 53442 + }, + { + "epoch": 0.64, + "grad_norm": 10.05535227017311, + "learning_rate": 1.5869216169396358e-05, + "loss": 1.4303, + "step": 53445 + }, + { + "epoch": 0.64, + "grad_norm": 13.154194623282834, + "learning_rate": 1.586874318327946e-05, + "loss": 1.4879, + "step": 53448 + }, + { + "epoch": 0.64, + "grad_norm": 13.639388955502387, + "learning_rate": 1.586827017713466e-05, + "loss": 1.3268, + "step": 53451 + }, + { + "epoch": 0.64, + "grad_norm": 45.93804371895072, + "learning_rate": 1.586779715096356e-05, + "loss": 1.6836, + "step": 53454 + }, + { + "epoch": 0.64, + "grad_norm": 23.31612647946152, + "learning_rate": 1.586732410476779e-05, + "loss": 1.4482, + "step": 53457 + }, + { + "epoch": 0.64, + "grad_norm": 238.5822205111693, + "learning_rate": 1.5866851038548952e-05, + "loss": 1.5028, + "step": 53460 + }, + { + "epoch": 0.64, + "grad_norm": 6.239939285401723, + "learning_rate": 1.586637795230866e-05, + "loss": 1.3163, + "step": 53463 + }, + { + "epoch": 0.64, + "grad_norm": 5.962649171185524, + "learning_rate": 1.586590484604854e-05, + "loss": 1.4136, + "step": 53466 + }, + { + "epoch": 0.64, + "grad_norm": 9.053061548953368, + "learning_rate": 1.5865431719770198e-05, + "loss": 1.7292, + "step": 53469 + }, + { + "epoch": 0.64, + "grad_norm": 34.35489051976219, + "learning_rate": 1.586495857347525e-05, + "loss": 1.3611, + "step": 53472 + }, + { + "epoch": 0.64, + "grad_norm": 27.751966690048537, + "learning_rate": 1.586448540716531e-05, + "loss": 1.4305, + "step": 53475 + }, + { + "epoch": 0.64, + "grad_norm": 12.11772414383799, + "learning_rate": 1.5864012220841996e-05, + "loss": 1.8447, + "step": 53478 + }, + { + "epoch": 0.64, + "grad_norm": 10.673366175944354, + "learning_rate": 1.5863539014506918e-05, + "loss": 1.1508, + "step": 53481 + }, + { + "epoch": 0.64, + "grad_norm": 10.094097161269492, + "learning_rate": 1.5863065788161696e-05, + "loss": 1.5271, + "step": 53484 + }, + { + "epoch": 0.64, + "grad_norm": 13.368243908341716, + "learning_rate": 1.586259254180794e-05, + "loss": 1.2271, + "step": 53487 + }, + { + "epoch": 0.64, + "grad_norm": 14.93419149017928, + "learning_rate": 1.5862119275447265e-05, + "loss": 1.7769, + "step": 53490 + }, + { + "epoch": 0.64, + "grad_norm": 11.693371469241816, + "learning_rate": 1.5861645989081295e-05, + "loss": 1.1784, + "step": 53493 + }, + { + "epoch": 0.64, + "grad_norm": 16.161710511328767, + "learning_rate": 1.5861172682711635e-05, + "loss": 1.4837, + "step": 53496 + }, + { + "epoch": 0.64, + "grad_norm": 13.210839738727472, + "learning_rate": 1.5860699356339903e-05, + "loss": 1.2678, + "step": 53499 + }, + { + "epoch": 0.64, + "grad_norm": 21.986786315634063, + "learning_rate": 1.5860226009967717e-05, + "loss": 1.578, + "step": 53502 + }, + { + "epoch": 0.64, + "grad_norm": 2.1481240764031804, + "learning_rate": 1.585975264359669e-05, + "loss": 1.528, + "step": 53505 + }, + { + "epoch": 0.64, + "grad_norm": 5.574259240357738, + "learning_rate": 1.5859279257228438e-05, + "loss": 1.2092, + "step": 53508 + }, + { + "epoch": 0.64, + "grad_norm": 37.9932988293219, + "learning_rate": 1.5858805850864577e-05, + "loss": 1.769, + "step": 53511 + }, + { + "epoch": 0.64, + "grad_norm": 6.888689828962004, + "learning_rate": 1.5858332424506717e-05, + "loss": 1.1992, + "step": 53514 + }, + { + "epoch": 0.64, + "grad_norm": 14.98515977476508, + "learning_rate": 1.585785897815648e-05, + "loss": 1.5053, + "step": 53517 + }, + { + "epoch": 0.64, + "grad_norm": 12.780874580795079, + "learning_rate": 1.5857385511815485e-05, + "loss": 1.2398, + "step": 53520 + }, + { + "epoch": 0.64, + "grad_norm": 32.464058007332035, + "learning_rate": 1.585691202548534e-05, + "loss": 1.399, + "step": 53523 + }, + { + "epoch": 0.64, + "grad_norm": 20.721866639613918, + "learning_rate": 1.5856438519167666e-05, + "loss": 1.2882, + "step": 53526 + }, + { + "epoch": 0.64, + "grad_norm": 9.687254336954581, + "learning_rate": 1.5855964992864076e-05, + "loss": 1.2726, + "step": 53529 + }, + { + "epoch": 0.64, + "grad_norm": 37.92651356671686, + "learning_rate": 1.5855491446576186e-05, + "loss": 2.0104, + "step": 53532 + }, + { + "epoch": 0.64, + "grad_norm": 7.346751830902454, + "learning_rate": 1.585501788030561e-05, + "loss": 1.2935, + "step": 53535 + }, + { + "epoch": 0.64, + "grad_norm": 16.609853396201956, + "learning_rate": 1.5854544294053975e-05, + "loss": 1.4877, + "step": 53538 + }, + { + "epoch": 0.64, + "grad_norm": 30.34872181185999, + "learning_rate": 1.5854070687822882e-05, + "loss": 1.2319, + "step": 53541 + }, + { + "epoch": 0.64, + "grad_norm": 18.54568052449986, + "learning_rate": 1.5853597061613958e-05, + "loss": 1.3417, + "step": 53544 + }, + { + "epoch": 0.64, + "grad_norm": 9.79436665582354, + "learning_rate": 1.5853123415428812e-05, + "loss": 1.6633, + "step": 53547 + }, + { + "epoch": 0.64, + "grad_norm": 21.748738571640754, + "learning_rate": 1.5852649749269067e-05, + "loss": 1.2934, + "step": 53550 + }, + { + "epoch": 0.64, + "grad_norm": 18.725403175241393, + "learning_rate": 1.5852176063136336e-05, + "loss": 1.2188, + "step": 53553 + }, + { + "epoch": 0.64, + "grad_norm": 20.974071223113523, + "learning_rate": 1.5851702357032236e-05, + "loss": 1.5927, + "step": 53556 + }, + { + "epoch": 0.64, + "grad_norm": 17.359411111149036, + "learning_rate": 1.5851228630958385e-05, + "loss": 1.122, + "step": 53559 + }, + { + "epoch": 0.64, + "grad_norm": 28.990324972682966, + "learning_rate": 1.5850754884916396e-05, + "loss": 1.2786, + "step": 53562 + }, + { + "epoch": 0.64, + "grad_norm": 5.027442921742753, + "learning_rate": 1.5850281118907885e-05, + "loss": 1.6185, + "step": 53565 + }, + { + "epoch": 0.64, + "grad_norm": 17.708320405359675, + "learning_rate": 1.5849807332934477e-05, + "loss": 1.3675, + "step": 53568 + }, + { + "epoch": 0.64, + "grad_norm": 19.60985860773505, + "learning_rate": 1.5849333526997782e-05, + "loss": 1.3703, + "step": 53571 + }, + { + "epoch": 0.64, + "grad_norm": 13.408035939820897, + "learning_rate": 1.584885970109942e-05, + "loss": 1.5951, + "step": 53574 + }, + { + "epoch": 0.64, + "grad_norm": 12.36539054328143, + "learning_rate": 1.5848385855241003e-05, + "loss": 1.1744, + "step": 53577 + }, + { + "epoch": 0.64, + "grad_norm": 8.193099265986083, + "learning_rate": 1.5847911989424157e-05, + "loss": 1.2392, + "step": 53580 + }, + { + "epoch": 0.64, + "grad_norm": 13.595194716139227, + "learning_rate": 1.584743810365049e-05, + "loss": 1.5089, + "step": 53583 + }, + { + "epoch": 0.64, + "grad_norm": 17.305737679599613, + "learning_rate": 1.5846964197921626e-05, + "loss": 1.3701, + "step": 53586 + }, + { + "epoch": 0.64, + "grad_norm": 7.350768554611524, + "learning_rate": 1.5846490272239175e-05, + "loss": 1.4305, + "step": 53589 + }, + { + "epoch": 0.64, + "grad_norm": 9.831482508192593, + "learning_rate": 1.5846016326604762e-05, + "loss": 1.7007, + "step": 53592 + }, + { + "epoch": 0.64, + "grad_norm": 6.864596994914229, + "learning_rate": 1.584554236102e-05, + "loss": 1.4643, + "step": 53595 + }, + { + "epoch": 0.64, + "grad_norm": 7.202732606906291, + "learning_rate": 1.5845068375486505e-05, + "loss": 1.6278, + "step": 53598 + }, + { + "epoch": 0.64, + "grad_norm": 30.355937100182572, + "learning_rate": 1.58445943700059e-05, + "loss": 1.3098, + "step": 53601 + }, + { + "epoch": 0.64, + "grad_norm": 44.30449317139002, + "learning_rate": 1.5844120344579797e-05, + "loss": 1.6039, + "step": 53604 + }, + { + "epoch": 0.64, + "grad_norm": 12.961092883388117, + "learning_rate": 1.584364629920982e-05, + "loss": 1.3739, + "step": 53607 + }, + { + "epoch": 0.64, + "grad_norm": 24.96042763883693, + "learning_rate": 1.5843172233897582e-05, + "loss": 1.6489, + "step": 53610 + }, + { + "epoch": 0.64, + "grad_norm": 12.577078288550391, + "learning_rate": 1.5842698148644702e-05, + "loss": 1.2557, + "step": 53613 + }, + { + "epoch": 0.64, + "grad_norm": 20.361050476271537, + "learning_rate": 1.5842224043452795e-05, + "loss": 1.5584, + "step": 53616 + }, + { + "epoch": 0.64, + "grad_norm": 15.217705176608218, + "learning_rate": 1.5841749918323484e-05, + "loss": 1.1309, + "step": 53619 + }, + { + "epoch": 0.64, + "grad_norm": 23.32233151168019, + "learning_rate": 1.5841275773258385e-05, + "loss": 1.1893, + "step": 53622 + }, + { + "epoch": 0.64, + "grad_norm": 22.644980221629982, + "learning_rate": 1.584080160825911e-05, + "loss": 1.4275, + "step": 53625 + }, + { + "epoch": 0.64, + "grad_norm": 33.969822351850766, + "learning_rate": 1.5840327423327293e-05, + "loss": 1.7385, + "step": 53628 + }, + { + "epoch": 0.64, + "grad_norm": 5.7114485717634285, + "learning_rate": 1.5839853218464534e-05, + "loss": 1.6392, + "step": 53631 + }, + { + "epoch": 0.64, + "grad_norm": 41.31950983770198, + "learning_rate": 1.5839378993672463e-05, + "loss": 1.2105, + "step": 53634 + }, + { + "epoch": 0.64, + "grad_norm": 7.4446626762616726, + "learning_rate": 1.5838904748952698e-05, + "loss": 1.3774, + "step": 53637 + }, + { + "epoch": 0.65, + "grad_norm": 26.483778139152495, + "learning_rate": 1.583843048430685e-05, + "loss": 1.6086, + "step": 53640 + }, + { + "epoch": 0.65, + "grad_norm": 31.001187033320527, + "learning_rate": 1.583795619973654e-05, + "loss": 1.3257, + "step": 53643 + }, + { + "epoch": 0.65, + "grad_norm": 24.461482138468096, + "learning_rate": 1.5837481895243393e-05, + "loss": 1.651, + "step": 53646 + }, + { + "epoch": 0.65, + "grad_norm": 26.190915428226564, + "learning_rate": 1.583700757082902e-05, + "loss": 1.4313, + "step": 53649 + }, + { + "epoch": 0.65, + "grad_norm": 23.385330233256262, + "learning_rate": 1.583653322649504e-05, + "loss": 1.3824, + "step": 53652 + }, + { + "epoch": 0.65, + "grad_norm": 12.739771634540947, + "learning_rate": 1.583605886224308e-05, + "loss": 1.4165, + "step": 53655 + }, + { + "epoch": 0.65, + "grad_norm": 24.65589347646931, + "learning_rate": 1.583558447807475e-05, + "loss": 1.0639, + "step": 53658 + }, + { + "epoch": 0.65, + "grad_norm": 25.042212401928765, + "learning_rate": 1.5835110073991676e-05, + "loss": 1.5359, + "step": 53661 + }, + { + "epoch": 0.65, + "grad_norm": 11.756535536780616, + "learning_rate": 1.583463564999547e-05, + "loss": 0.9784, + "step": 53664 + }, + { + "epoch": 0.65, + "grad_norm": 2.676203686304889, + "learning_rate": 1.5834161206087753e-05, + "loss": 1.5264, + "step": 53667 + }, + { + "epoch": 0.65, + "grad_norm": 4.347590318769513, + "learning_rate": 1.5833686742270146e-05, + "loss": 1.6528, + "step": 53670 + }, + { + "epoch": 0.65, + "grad_norm": 53.7036240381736, + "learning_rate": 1.583321225854427e-05, + "loss": 1.3358, + "step": 53673 + }, + { + "epoch": 0.65, + "grad_norm": 24.423812014048213, + "learning_rate": 1.5832737754911737e-05, + "loss": 1.3358, + "step": 53676 + }, + { + "epoch": 0.65, + "grad_norm": 13.11811658790057, + "learning_rate": 1.5832263231374178e-05, + "loss": 1.194, + "step": 53679 + }, + { + "epoch": 0.65, + "grad_norm": 71.23180970223548, + "learning_rate": 1.5831788687933197e-05, + "loss": 1.4453, + "step": 53682 + }, + { + "epoch": 0.65, + "grad_norm": 11.278231324672603, + "learning_rate": 1.5831314124590426e-05, + "loss": 1.2942, + "step": 53685 + }, + { + "epoch": 0.65, + "grad_norm": 13.594134256704018, + "learning_rate": 1.583083954134748e-05, + "loss": 1.6088, + "step": 53688 + }, + { + "epoch": 0.65, + "grad_norm": 21.648726812719016, + "learning_rate": 1.5830364938205983e-05, + "loss": 1.5872, + "step": 53691 + }, + { + "epoch": 0.65, + "grad_norm": 3.737082370063076, + "learning_rate": 1.5829890315167544e-05, + "loss": 1.347, + "step": 53694 + }, + { + "epoch": 0.65, + "grad_norm": 14.367284139503177, + "learning_rate": 1.5829415672233793e-05, + "loss": 1.4995, + "step": 53697 + }, + { + "epoch": 0.65, + "grad_norm": 22.258794421660085, + "learning_rate": 1.5828941009406344e-05, + "loss": 1.0546, + "step": 53700 + }, + { + "epoch": 0.65, + "grad_norm": 6.2329793339952495, + "learning_rate": 1.582846632668682e-05, + "loss": 1.2469, + "step": 53703 + }, + { + "epoch": 0.65, + "grad_norm": 13.871589063354731, + "learning_rate": 1.582799162407684e-05, + "loss": 1.3088, + "step": 53706 + }, + { + "epoch": 0.65, + "grad_norm": 19.711303505910223, + "learning_rate": 1.5827516901578024e-05, + "loss": 1.1854, + "step": 53709 + }, + { + "epoch": 0.65, + "grad_norm": 21.953326476764715, + "learning_rate": 1.5827042159191993e-05, + "loss": 1.2297, + "step": 53712 + }, + { + "epoch": 0.65, + "grad_norm": 29.330119062239604, + "learning_rate": 1.5826567396920362e-05, + "loss": 1.504, + "step": 53715 + }, + { + "epoch": 0.65, + "grad_norm": 43.70681155081116, + "learning_rate": 1.582609261476476e-05, + "loss": 1.4339, + "step": 53718 + }, + { + "epoch": 0.65, + "grad_norm": 95.05918314506226, + "learning_rate": 1.5825617812726798e-05, + "loss": 1.452, + "step": 53721 + }, + { + "epoch": 0.65, + "grad_norm": 27.585142308466697, + "learning_rate": 1.5825142990808103e-05, + "loss": 1.7707, + "step": 53724 + }, + { + "epoch": 0.65, + "grad_norm": 12.620758590075098, + "learning_rate": 1.5824668149010294e-05, + "loss": 1.5475, + "step": 53727 + }, + { + "epoch": 0.65, + "grad_norm": 11.565335823975703, + "learning_rate": 1.582419328733499e-05, + "loss": 1.4344, + "step": 53730 + }, + { + "epoch": 0.65, + "grad_norm": 4.108755638512467, + "learning_rate": 1.582371840578381e-05, + "loss": 1.6694, + "step": 53733 + }, + { + "epoch": 0.65, + "grad_norm": 2.599042816471291, + "learning_rate": 1.582324350435838e-05, + "loss": 1.4141, + "step": 53736 + }, + { + "epoch": 0.65, + "grad_norm": 10.052970875391129, + "learning_rate": 1.5822768583060316e-05, + "loss": 1.3932, + "step": 53739 + }, + { + "epoch": 0.65, + "grad_norm": 6.742780509463357, + "learning_rate": 1.582229364189124e-05, + "loss": 1.2767, + "step": 53742 + }, + { + "epoch": 0.65, + "grad_norm": 6.8194012797764625, + "learning_rate": 1.5821818680852775e-05, + "loss": 1.372, + "step": 53745 + }, + { + "epoch": 0.65, + "grad_norm": 12.805255265874074, + "learning_rate": 1.5821343699946538e-05, + "loss": 1.3592, + "step": 53748 + }, + { + "epoch": 0.65, + "grad_norm": 24.659440952832732, + "learning_rate": 1.582086869917415e-05, + "loss": 1.1139, + "step": 53751 + }, + { + "epoch": 0.65, + "grad_norm": 13.58612289711589, + "learning_rate": 1.5820393678537236e-05, + "loss": 1.3128, + "step": 53754 + }, + { + "epoch": 0.65, + "grad_norm": 10.304651013551894, + "learning_rate": 1.5819918638037413e-05, + "loss": 1.4099, + "step": 53757 + }, + { + "epoch": 0.65, + "grad_norm": 8.339015699829293, + "learning_rate": 1.5819443577676308e-05, + "loss": 1.6561, + "step": 53760 + }, + { + "epoch": 0.65, + "grad_norm": 11.323353641398496, + "learning_rate": 1.5818968497455536e-05, + "loss": 1.3672, + "step": 53763 + }, + { + "epoch": 0.65, + "grad_norm": 8.368303871838474, + "learning_rate": 1.581849339737672e-05, + "loss": 1.1803, + "step": 53766 + }, + { + "epoch": 0.65, + "grad_norm": 17.974579498986277, + "learning_rate": 1.581801827744148e-05, + "loss": 1.4236, + "step": 53769 + }, + { + "epoch": 0.65, + "grad_norm": 72.73004194402645, + "learning_rate": 1.5817543137651443e-05, + "loss": 1.2778, + "step": 53772 + }, + { + "epoch": 0.65, + "grad_norm": 16.446293105954474, + "learning_rate": 1.5817067978008223e-05, + "loss": 1.2375, + "step": 53775 + }, + { + "epoch": 0.65, + "grad_norm": 2.850563396823384, + "learning_rate": 1.5816592798513452e-05, + "loss": 1.6296, + "step": 53778 + }, + { + "epoch": 0.65, + "grad_norm": 13.328830423087483, + "learning_rate": 1.5816117599168737e-05, + "loss": 1.1614, + "step": 53781 + }, + { + "epoch": 0.65, + "grad_norm": 7.7995426167857005, + "learning_rate": 1.5815642379975714e-05, + "loss": 1.4892, + "step": 53784 + }, + { + "epoch": 0.65, + "grad_norm": 16.45434186090536, + "learning_rate": 1.5815167140935993e-05, + "loss": 1.5186, + "step": 53787 + }, + { + "epoch": 0.65, + "grad_norm": 7.5095459378551634, + "learning_rate": 1.5814691882051202e-05, + "loss": 1.5705, + "step": 53790 + }, + { + "epoch": 0.65, + "grad_norm": 15.762540990684045, + "learning_rate": 1.5814216603322966e-05, + "loss": 1.5785, + "step": 53793 + }, + { + "epoch": 0.65, + "grad_norm": 15.720643253470193, + "learning_rate": 1.58137413047529e-05, + "loss": 1.625, + "step": 53796 + }, + { + "epoch": 0.65, + "grad_norm": 11.387655023130524, + "learning_rate": 1.581326598634263e-05, + "loss": 1.2894, + "step": 53799 + }, + { + "epoch": 0.65, + "grad_norm": 13.099845704117184, + "learning_rate": 1.5812790648093776e-05, + "loss": 1.0471, + "step": 53802 + }, + { + "epoch": 0.65, + "grad_norm": 3.9008502498867146, + "learning_rate": 1.581231529000796e-05, + "loss": 1.4259, + "step": 53805 + }, + { + "epoch": 0.65, + "grad_norm": 39.0830546419672, + "learning_rate": 1.581183991208681e-05, + "loss": 1.2462, + "step": 53808 + }, + { + "epoch": 0.65, + "grad_norm": 147.98706830802539, + "learning_rate": 1.581136451433194e-05, + "loss": 0.951, + "step": 53811 + }, + { + "epoch": 0.65, + "grad_norm": 5.338885282920388, + "learning_rate": 1.581088909674498e-05, + "loss": 1.2713, + "step": 53814 + }, + { + "epoch": 0.65, + "grad_norm": 10.469341486796907, + "learning_rate": 1.5810413659327545e-05, + "loss": 1.5627, + "step": 53817 + }, + { + "epoch": 0.65, + "grad_norm": 6.290311866915071, + "learning_rate": 1.580993820208126e-05, + "loss": 1.3275, + "step": 53820 + }, + { + "epoch": 0.65, + "grad_norm": 4.336510549162766, + "learning_rate": 1.5809462725007752e-05, + "loss": 1.3365, + "step": 53823 + }, + { + "epoch": 0.65, + "grad_norm": 14.23195435245739, + "learning_rate": 1.5808987228108637e-05, + "loss": 1.7421, + "step": 53826 + }, + { + "epoch": 0.65, + "grad_norm": 16.664575616261626, + "learning_rate": 1.5808511711385547e-05, + "loss": 1.2691, + "step": 53829 + }, + { + "epoch": 0.65, + "grad_norm": 13.927246182756216, + "learning_rate": 1.5808036174840093e-05, + "loss": 1.583, + "step": 53832 + }, + { + "epoch": 0.65, + "grad_norm": 66.2206264115106, + "learning_rate": 1.5807560618473905e-05, + "loss": 1.1888, + "step": 53835 + }, + { + "epoch": 0.65, + "grad_norm": 10.567699244054518, + "learning_rate": 1.5807085042288607e-05, + "loss": 1.2525, + "step": 53838 + }, + { + "epoch": 0.65, + "grad_norm": 22.394387734736654, + "learning_rate": 1.5806609446285814e-05, + "loss": 1.5422, + "step": 53841 + }, + { + "epoch": 0.65, + "grad_norm": 6.639701759654352, + "learning_rate": 1.580613383046716e-05, + "loss": 1.1808, + "step": 53844 + }, + { + "epoch": 0.65, + "grad_norm": 18.369726276824785, + "learning_rate": 1.5805658194834256e-05, + "loss": 1.2538, + "step": 53847 + }, + { + "epoch": 0.65, + "grad_norm": 6.512662550855133, + "learning_rate": 1.580518253938874e-05, + "loss": 1.7666, + "step": 53850 + }, + { + "epoch": 0.65, + "grad_norm": 20.869730879112414, + "learning_rate": 1.580470686413222e-05, + "loss": 1.4658, + "step": 53853 + }, + { + "epoch": 0.65, + "grad_norm": 12.680933806932407, + "learning_rate": 1.580423116906633e-05, + "loss": 1.3271, + "step": 53856 + }, + { + "epoch": 0.65, + "grad_norm": 3.035818806577292, + "learning_rate": 1.580375545419269e-05, + "loss": 1.2271, + "step": 53859 + }, + { + "epoch": 0.65, + "grad_norm": 7.20668761039928, + "learning_rate": 1.5803279719512924e-05, + "loss": 1.2454, + "step": 53862 + }, + { + "epoch": 0.65, + "grad_norm": 40.2218551590046, + "learning_rate": 1.580280396502865e-05, + "loss": 1.6679, + "step": 53865 + }, + { + "epoch": 0.65, + "grad_norm": 10.496207226737473, + "learning_rate": 1.58023281907415e-05, + "loss": 1.3421, + "step": 53868 + }, + { + "epoch": 0.65, + "grad_norm": 7.044592563529514, + "learning_rate": 1.5801852396653092e-05, + "loss": 1.5886, + "step": 53871 + }, + { + "epoch": 0.65, + "grad_norm": 14.317869123981577, + "learning_rate": 1.5801376582765053e-05, + "loss": 1.1433, + "step": 53874 + }, + { + "epoch": 0.65, + "grad_norm": 18.21425434413297, + "learning_rate": 1.5800900749079008e-05, + "loss": 1.0303, + "step": 53877 + }, + { + "epoch": 0.65, + "grad_norm": 6.842284763295452, + "learning_rate": 1.5800424895596573e-05, + "loss": 1.6947, + "step": 53880 + }, + { + "epoch": 0.65, + "grad_norm": 55.40627628523583, + "learning_rate": 1.579994902231938e-05, + "loss": 1.1415, + "step": 53883 + }, + { + "epoch": 0.65, + "grad_norm": 17.335169607617132, + "learning_rate": 1.579947312924905e-05, + "loss": 1.2923, + "step": 53886 + }, + { + "epoch": 0.65, + "grad_norm": 4.955850780262792, + "learning_rate": 1.5798997216387205e-05, + "loss": 1.091, + "step": 53889 + }, + { + "epoch": 0.65, + "grad_norm": 25.69394024439816, + "learning_rate": 1.5798521283735473e-05, + "loss": 1.2391, + "step": 53892 + }, + { + "epoch": 0.65, + "grad_norm": 10.1393483405134, + "learning_rate": 1.5798045331295473e-05, + "loss": 1.3639, + "step": 53895 + }, + { + "epoch": 0.65, + "grad_norm": 24.31477867690684, + "learning_rate": 1.579756935906884e-05, + "loss": 1.5006, + "step": 53898 + }, + { + "epoch": 0.65, + "grad_norm": 10.170168408127678, + "learning_rate": 1.5797093367057187e-05, + "loss": 1.5041, + "step": 53901 + }, + { + "epoch": 0.65, + "grad_norm": 9.866879261056905, + "learning_rate": 1.5796617355262146e-05, + "loss": 1.4885, + "step": 53904 + }, + { + "epoch": 0.65, + "grad_norm": 12.205391626125953, + "learning_rate": 1.579614132368533e-05, + "loss": 1.7566, + "step": 53907 + }, + { + "epoch": 0.65, + "grad_norm": 7.753973017569875, + "learning_rate": 1.5795665272328377e-05, + "loss": 1.4332, + "step": 53910 + }, + { + "epoch": 0.65, + "grad_norm": 18.67271045401699, + "learning_rate": 1.5795189201192904e-05, + "loss": 1.321, + "step": 53913 + }, + { + "epoch": 0.65, + "grad_norm": 30.217326788014248, + "learning_rate": 1.579471311028054e-05, + "loss": 1.4049, + "step": 53916 + }, + { + "epoch": 0.65, + "grad_norm": 21.75229482333176, + "learning_rate": 1.5794236999592906e-05, + "loss": 1.3005, + "step": 53919 + }, + { + "epoch": 0.65, + "grad_norm": 18.1428381978564, + "learning_rate": 1.5793760869131625e-05, + "loss": 1.4963, + "step": 53922 + }, + { + "epoch": 0.65, + "grad_norm": 17.867842715990147, + "learning_rate": 1.5793284718898333e-05, + "loss": 1.3572, + "step": 53925 + }, + { + "epoch": 0.65, + "grad_norm": 36.99181250916648, + "learning_rate": 1.579280854889464e-05, + "loss": 1.2439, + "step": 53928 + }, + { + "epoch": 0.65, + "grad_norm": 9.375032640712414, + "learning_rate": 1.579233235912218e-05, + "loss": 1.5996, + "step": 53931 + }, + { + "epoch": 0.65, + "grad_norm": 9.998231853474682, + "learning_rate": 1.5791856149582576e-05, + "loss": 1.2207, + "step": 53934 + }, + { + "epoch": 0.65, + "grad_norm": 18.24922532219106, + "learning_rate": 1.5791379920277457e-05, + "loss": 1.4038, + "step": 53937 + }, + { + "epoch": 0.65, + "grad_norm": 3.625715561322229, + "learning_rate": 1.5790903671208436e-05, + "loss": 1.2569, + "step": 53940 + }, + { + "epoch": 0.65, + "grad_norm": 5.664700684590585, + "learning_rate": 1.5790427402377155e-05, + "loss": 1.3601, + "step": 53943 + }, + { + "epoch": 0.65, + "grad_norm": 13.111851965447705, + "learning_rate": 1.5789951113785225e-05, + "loss": 1.7027, + "step": 53946 + }, + { + "epoch": 0.65, + "grad_norm": 7.206825521050025, + "learning_rate": 1.578947480543428e-05, + "loss": 1.8809, + "step": 53949 + }, + { + "epoch": 0.65, + "grad_norm": 8.760331248161334, + "learning_rate": 1.5788998477325944e-05, + "loss": 1.8818, + "step": 53952 + }, + { + "epoch": 0.65, + "grad_norm": 13.075588916146803, + "learning_rate": 1.5788522129461837e-05, + "loss": 1.6797, + "step": 53955 + }, + { + "epoch": 0.65, + "grad_norm": 10.148011973996379, + "learning_rate": 1.5788045761843593e-05, + "loss": 1.3495, + "step": 53958 + }, + { + "epoch": 0.65, + "grad_norm": 18.466036435847823, + "learning_rate": 1.5787569374472832e-05, + "loss": 1.2445, + "step": 53961 + }, + { + "epoch": 0.65, + "grad_norm": 3.519805914351069, + "learning_rate": 1.5787092967351184e-05, + "loss": 1.7196, + "step": 53964 + }, + { + "epoch": 0.65, + "grad_norm": 6.147210080587106, + "learning_rate": 1.578661654048027e-05, + "loss": 1.305, + "step": 53967 + }, + { + "epoch": 0.65, + "grad_norm": 4.3157918583928065, + "learning_rate": 1.578614009386172e-05, + "loss": 1.1961, + "step": 53970 + }, + { + "epoch": 0.65, + "grad_norm": 15.975527323478454, + "learning_rate": 1.5785663627497155e-05, + "loss": 1.6862, + "step": 53973 + }, + { + "epoch": 0.65, + "grad_norm": 5.901229302816992, + "learning_rate": 1.5785187141388204e-05, + "loss": 1.5209, + "step": 53976 + }, + { + "epoch": 0.65, + "grad_norm": 20.02802864460205, + "learning_rate": 1.5784710635536497e-05, + "loss": 1.5689, + "step": 53979 + }, + { + "epoch": 0.65, + "grad_norm": 21.162186639293385, + "learning_rate": 1.5784234109943654e-05, + "loss": 1.2709, + "step": 53982 + }, + { + "epoch": 0.65, + "grad_norm": 6.923010212323097, + "learning_rate": 1.57837575646113e-05, + "loss": 1.118, + "step": 53985 + }, + { + "epoch": 0.65, + "grad_norm": 78.38636924884304, + "learning_rate": 1.578328099954107e-05, + "loss": 1.2675, + "step": 53988 + }, + { + "epoch": 0.65, + "grad_norm": 18.11827510092273, + "learning_rate": 1.5782804414734583e-05, + "loss": 1.3649, + "step": 53991 + }, + { + "epoch": 0.65, + "grad_norm": 15.988329281467838, + "learning_rate": 1.578232781019347e-05, + "loss": 1.3875, + "step": 53994 + }, + { + "epoch": 0.65, + "grad_norm": 14.175565363444699, + "learning_rate": 1.578185118591935e-05, + "loss": 1.3595, + "step": 53997 + }, + { + "epoch": 0.65, + "grad_norm": 54.68644495902205, + "learning_rate": 1.5781374541913858e-05, + "loss": 1.6426, + "step": 54000 + }, + { + "epoch": 0.65, + "grad_norm": 13.577527782978509, + "learning_rate": 1.5780897878178615e-05, + "loss": 1.2004, + "step": 54003 + }, + { + "epoch": 0.65, + "grad_norm": 24.37489208625527, + "learning_rate": 1.5780421194715253e-05, + "loss": 1.3476, + "step": 54006 + }, + { + "epoch": 0.65, + "grad_norm": 7.04142709825994, + "learning_rate": 1.577994449152539e-05, + "loss": 1.4919, + "step": 54009 + }, + { + "epoch": 0.65, + "grad_norm": 12.357497584044534, + "learning_rate": 1.5779467768610666e-05, + "loss": 1.7136, + "step": 54012 + }, + { + "epoch": 0.65, + "grad_norm": 4.705378954908933, + "learning_rate": 1.5778991025972695e-05, + "loss": 1.2123, + "step": 54015 + }, + { + "epoch": 0.65, + "grad_norm": 59.567546019818, + "learning_rate": 1.577851426361311e-05, + "loss": 1.4867, + "step": 54018 + }, + { + "epoch": 0.65, + "grad_norm": 6.739036136067394, + "learning_rate": 1.5778037481533536e-05, + "loss": 1.3228, + "step": 54021 + }, + { + "epoch": 0.65, + "grad_norm": 31.2144925787601, + "learning_rate": 1.5777560679735605e-05, + "loss": 1.3444, + "step": 54024 + }, + { + "epoch": 0.65, + "grad_norm": 23.681319192863047, + "learning_rate": 1.577708385822094e-05, + "loss": 1.3624, + "step": 54027 + }, + { + "epoch": 0.65, + "grad_norm": 7.8040567272803685, + "learning_rate": 1.5776607016991162e-05, + "loss": 1.4567, + "step": 54030 + }, + { + "epoch": 0.65, + "grad_norm": 7.859571077389756, + "learning_rate": 1.5776130156047913e-05, + "loss": 1.0623, + "step": 54033 + }, + { + "epoch": 0.65, + "grad_norm": 21.989692951546267, + "learning_rate": 1.5775653275392807e-05, + "loss": 1.3882, + "step": 54036 + }, + { + "epoch": 0.65, + "grad_norm": 11.235324409917428, + "learning_rate": 1.577517637502748e-05, + "loss": 1.3359, + "step": 54039 + }, + { + "epoch": 0.65, + "grad_norm": 27.984498709752653, + "learning_rate": 1.5774699454953557e-05, + "loss": 1.6668, + "step": 54042 + }, + { + "epoch": 0.65, + "grad_norm": 15.216084473537302, + "learning_rate": 1.577422251517266e-05, + "loss": 1.4449, + "step": 54045 + }, + { + "epoch": 0.65, + "grad_norm": 37.381427222752016, + "learning_rate": 1.5773745555686425e-05, + "loss": 1.7319, + "step": 54048 + }, + { + "epoch": 0.65, + "grad_norm": 14.117770863165024, + "learning_rate": 1.5773268576496478e-05, + "loss": 1.1902, + "step": 54051 + }, + { + "epoch": 0.65, + "grad_norm": 27.72622702669315, + "learning_rate": 1.5772791577604443e-05, + "loss": 1.3449, + "step": 54054 + }, + { + "epoch": 0.65, + "grad_norm": 15.19638121809699, + "learning_rate": 1.5772314559011946e-05, + "loss": 1.3722, + "step": 54057 + }, + { + "epoch": 0.65, + "grad_norm": 14.728547291344627, + "learning_rate": 1.5771837520720624e-05, + "loss": 1.4036, + "step": 54060 + }, + { + "epoch": 0.65, + "grad_norm": 10.377349453309275, + "learning_rate": 1.5771360462732097e-05, + "loss": 1.4615, + "step": 54063 + }, + { + "epoch": 0.65, + "grad_norm": 23.870257329533327, + "learning_rate": 1.5770883385047997e-05, + "loss": 1.5493, + "step": 54066 + }, + { + "epoch": 0.65, + "grad_norm": 7.803975589192349, + "learning_rate": 1.5770406287669947e-05, + "loss": 1.3288, + "step": 54069 + }, + { + "epoch": 0.65, + "grad_norm": 14.6817283366565, + "learning_rate": 1.5769929170599583e-05, + "loss": 1.3419, + "step": 54072 + }, + { + "epoch": 0.65, + "grad_norm": 13.90442137339039, + "learning_rate": 1.5769452033838527e-05, + "loss": 1.1962, + "step": 54075 + }, + { + "epoch": 0.65, + "grad_norm": 139.24173807933872, + "learning_rate": 1.5768974877388412e-05, + "loss": 1.8659, + "step": 54078 + }, + { + "epoch": 0.65, + "grad_norm": 24.028599956046516, + "learning_rate": 1.576849770125086e-05, + "loss": 1.2638, + "step": 54081 + }, + { + "epoch": 0.65, + "grad_norm": 8.335428883061446, + "learning_rate": 1.5768020505427504e-05, + "loss": 1.3481, + "step": 54084 + }, + { + "epoch": 0.65, + "grad_norm": 12.092339433002774, + "learning_rate": 1.5767543289919973e-05, + "loss": 1.6029, + "step": 54087 + }, + { + "epoch": 0.65, + "grad_norm": 18.223041976150782, + "learning_rate": 1.5767066054729893e-05, + "loss": 1.542, + "step": 54090 + }, + { + "epoch": 0.65, + "grad_norm": 5.194606859035359, + "learning_rate": 1.5766588799858894e-05, + "loss": 1.6604, + "step": 54093 + }, + { + "epoch": 0.65, + "grad_norm": 6.716191523094016, + "learning_rate": 1.576611152530861e-05, + "loss": 1.3575, + "step": 54096 + }, + { + "epoch": 0.65, + "grad_norm": 28.969314053249875, + "learning_rate": 1.5765634231080658e-05, + "loss": 1.3043, + "step": 54099 + }, + { + "epoch": 0.65, + "grad_norm": 12.645213657506048, + "learning_rate": 1.5765156917176673e-05, + "loss": 1.4846, + "step": 54102 + }, + { + "epoch": 0.65, + "grad_norm": 5.850176424907599, + "learning_rate": 1.5764679583598285e-05, + "loss": 1.1269, + "step": 54105 + }, + { + "epoch": 0.65, + "grad_norm": 3.2903258899781926, + "learning_rate": 1.5764202230347123e-05, + "loss": 1.2997, + "step": 54108 + }, + { + "epoch": 0.65, + "grad_norm": 7.170662066916056, + "learning_rate": 1.5763724857424814e-05, + "loss": 1.4737, + "step": 54111 + }, + { + "epoch": 0.65, + "grad_norm": 11.706035312895416, + "learning_rate": 1.576324746483299e-05, + "loss": 1.5091, + "step": 54114 + }, + { + "epoch": 0.65, + "grad_norm": 11.147555177883845, + "learning_rate": 1.5762770052573274e-05, + "loss": 1.3123, + "step": 54117 + }, + { + "epoch": 0.65, + "grad_norm": 31.11753760569408, + "learning_rate": 1.5762292620647303e-05, + "loss": 1.6371, + "step": 54120 + }, + { + "epoch": 0.65, + "grad_norm": 16.056582036274232, + "learning_rate": 1.5761815169056704e-05, + "loss": 1.4056, + "step": 54123 + }, + { + "epoch": 0.65, + "grad_norm": 9.086416510642152, + "learning_rate": 1.57613376978031e-05, + "loss": 1.3353, + "step": 54126 + }, + { + "epoch": 0.65, + "grad_norm": 11.053897800736944, + "learning_rate": 1.576086020688813e-05, + "loss": 1.4345, + "step": 54129 + }, + { + "epoch": 0.65, + "grad_norm": 18.839866842690032, + "learning_rate": 1.5760382696313422e-05, + "loss": 1.2746, + "step": 54132 + }, + { + "epoch": 0.65, + "grad_norm": 8.078573682212236, + "learning_rate": 1.5759905166080598e-05, + "loss": 1.2366, + "step": 54135 + }, + { + "epoch": 0.65, + "grad_norm": 15.391973088824292, + "learning_rate": 1.5759427616191293e-05, + "loss": 1.2903, + "step": 54138 + }, + { + "epoch": 0.65, + "grad_norm": 41.09502764658907, + "learning_rate": 1.5758950046647137e-05, + "loss": 1.4263, + "step": 54141 + }, + { + "epoch": 0.65, + "grad_norm": 10.794728968290496, + "learning_rate": 1.5758472457449757e-05, + "loss": 1.0562, + "step": 54144 + }, + { + "epoch": 0.65, + "grad_norm": 12.1139638415274, + "learning_rate": 1.575799484860079e-05, + "loss": 1.2503, + "step": 54147 + }, + { + "epoch": 0.65, + "grad_norm": 8.334268666845453, + "learning_rate": 1.5757517220101853e-05, + "loss": 1.4317, + "step": 54150 + }, + { + "epoch": 0.65, + "grad_norm": 3.3335857260518247, + "learning_rate": 1.5757039571954588e-05, + "loss": 1.383, + "step": 54153 + }, + { + "epoch": 0.65, + "grad_norm": 22.50713993521438, + "learning_rate": 1.575656190416062e-05, + "loss": 1.6486, + "step": 54156 + }, + { + "epoch": 0.65, + "grad_norm": 32.670009090169735, + "learning_rate": 1.575608421672158e-05, + "loss": 1.4546, + "step": 54159 + }, + { + "epoch": 0.65, + "grad_norm": 26.036204361818257, + "learning_rate": 1.57556065096391e-05, + "loss": 1.5075, + "step": 54162 + }, + { + "epoch": 0.65, + "grad_norm": 7.32797017643273, + "learning_rate": 1.5755128782914804e-05, + "loss": 1.148, + "step": 54165 + }, + { + "epoch": 0.65, + "grad_norm": 18.46403608591972, + "learning_rate": 1.5754651036550328e-05, + "loss": 1.4431, + "step": 54168 + }, + { + "epoch": 0.65, + "grad_norm": 9.013691570346571, + "learning_rate": 1.57541732705473e-05, + "loss": 1.7707, + "step": 54171 + }, + { + "epoch": 0.65, + "grad_norm": 69.01995971787544, + "learning_rate": 1.5753695484907356e-05, + "loss": 1.339, + "step": 54174 + }, + { + "epoch": 0.65, + "grad_norm": 24.413497279888766, + "learning_rate": 1.5753217679632115e-05, + "loss": 1.2256, + "step": 54177 + }, + { + "epoch": 0.65, + "grad_norm": 26.123336065902436, + "learning_rate": 1.575273985472322e-05, + "loss": 1.7181, + "step": 54180 + }, + { + "epoch": 0.65, + "grad_norm": 11.195265900649035, + "learning_rate": 1.5752262010182292e-05, + "loss": 1.4603, + "step": 54183 + }, + { + "epoch": 0.65, + "grad_norm": 6.622243371607974, + "learning_rate": 1.5751784146010967e-05, + "loss": 1.5659, + "step": 54186 + }, + { + "epoch": 0.65, + "grad_norm": 4.220488075730262, + "learning_rate": 1.5751306262210873e-05, + "loss": 1.6981, + "step": 54189 + }, + { + "epoch": 0.65, + "grad_norm": 8.406791802336121, + "learning_rate": 1.5750828358783646e-05, + "loss": 1.3732, + "step": 54192 + }, + { + "epoch": 0.65, + "grad_norm": 10.603194850293153, + "learning_rate": 1.575035043573091e-05, + "loss": 1.3577, + "step": 54195 + }, + { + "epoch": 0.65, + "grad_norm": 15.997172590949502, + "learning_rate": 1.57498724930543e-05, + "loss": 1.3637, + "step": 54198 + }, + { + "epoch": 0.65, + "grad_norm": 16.02742954127266, + "learning_rate": 1.5749394530755448e-05, + "loss": 1.2776, + "step": 54201 + }, + { + "epoch": 0.65, + "grad_norm": 20.345692816894612, + "learning_rate": 1.5748916548835982e-05, + "loss": 1.4165, + "step": 54204 + }, + { + "epoch": 0.65, + "grad_norm": 9.905623118145552, + "learning_rate": 1.5748438547297532e-05, + "loss": 1.5448, + "step": 54207 + }, + { + "epoch": 0.65, + "grad_norm": 11.925469978356151, + "learning_rate": 1.5747960526141738e-05, + "loss": 1.4726, + "step": 54210 + }, + { + "epoch": 0.65, + "grad_norm": 7.3775847111428225, + "learning_rate": 1.5747482485370223e-05, + "loss": 1.5635, + "step": 54213 + }, + { + "epoch": 0.65, + "grad_norm": 10.4061356677385, + "learning_rate": 1.5747004424984617e-05, + "loss": 1.5278, + "step": 54216 + }, + { + "epoch": 0.65, + "grad_norm": 51.57782694324022, + "learning_rate": 1.5746526344986555e-05, + "loss": 1.5408, + "step": 54219 + }, + { + "epoch": 0.65, + "grad_norm": 22.820915953142254, + "learning_rate": 1.5746048245377672e-05, + "loss": 1.4215, + "step": 54222 + }, + { + "epoch": 0.65, + "grad_norm": 10.889832655426876, + "learning_rate": 1.5745570126159596e-05, + "loss": 1.5158, + "step": 54225 + }, + { + "epoch": 0.65, + "grad_norm": 9.906065848989813, + "learning_rate": 1.5745091987333955e-05, + "loss": 1.3442, + "step": 54228 + }, + { + "epoch": 0.65, + "grad_norm": 21.43144708413242, + "learning_rate": 1.5744613828902387e-05, + "loss": 1.2337, + "step": 54231 + }, + { + "epoch": 0.65, + "grad_norm": 8.195047833653506, + "learning_rate": 1.574413565086652e-05, + "loss": 1.2201, + "step": 54234 + }, + { + "epoch": 0.65, + "grad_norm": 17.53675008434123, + "learning_rate": 1.574365745322799e-05, + "loss": 1.1812, + "step": 54237 + }, + { + "epoch": 0.65, + "grad_norm": 32.08740260994777, + "learning_rate": 1.5743179235988425e-05, + "loss": 1.6086, + "step": 54240 + }, + { + "epoch": 0.65, + "grad_norm": 22.26029857659822, + "learning_rate": 1.5742700999149457e-05, + "loss": 1.3332, + "step": 54243 + }, + { + "epoch": 0.65, + "grad_norm": 13.78567633743247, + "learning_rate": 1.574222274271272e-05, + "loss": 1.4933, + "step": 54246 + }, + { + "epoch": 0.65, + "grad_norm": 24.842409807622523, + "learning_rate": 1.5741744466679842e-05, + "loss": 1.2709, + "step": 54249 + }, + { + "epoch": 0.65, + "grad_norm": 11.01421739539748, + "learning_rate": 1.5741266171052457e-05, + "loss": 1.3006, + "step": 54252 + }, + { + "epoch": 0.65, + "grad_norm": 9.112263900678618, + "learning_rate": 1.5740787855832205e-05, + "loss": 1.4139, + "step": 54255 + }, + { + "epoch": 0.65, + "grad_norm": 10.622585003991643, + "learning_rate": 1.574030952102071e-05, + "loss": 1.6005, + "step": 54258 + }, + { + "epoch": 0.65, + "grad_norm": 10.854679753156756, + "learning_rate": 1.5739831166619603e-05, + "loss": 1.9797, + "step": 54261 + }, + { + "epoch": 0.65, + "grad_norm": 9.797386224957789, + "learning_rate": 1.573935279263052e-05, + "loss": 1.3711, + "step": 54264 + }, + { + "epoch": 0.65, + "grad_norm": 47.45377247360012, + "learning_rate": 1.57388743990551e-05, + "loss": 1.1874, + "step": 54267 + }, + { + "epoch": 0.65, + "grad_norm": 10.872328704419877, + "learning_rate": 1.5738395985894964e-05, + "loss": 1.3096, + "step": 54270 + }, + { + "epoch": 0.65, + "grad_norm": 21.645207164054057, + "learning_rate": 1.5737917553151746e-05, + "loss": 1.7436, + "step": 54273 + }, + { + "epoch": 0.65, + "grad_norm": 7.761142740938802, + "learning_rate": 1.5737439100827088e-05, + "loss": 1.261, + "step": 54276 + }, + { + "epoch": 0.65, + "grad_norm": 15.27938011328396, + "learning_rate": 1.5736960628922613e-05, + "loss": 1.42, + "step": 54279 + }, + { + "epoch": 0.65, + "grad_norm": 15.524230416570553, + "learning_rate": 1.573648213743996e-05, + "loss": 1.3324, + "step": 54282 + }, + { + "epoch": 0.65, + "grad_norm": 15.92465782473805, + "learning_rate": 1.5736003626380757e-05, + "loss": 1.5747, + "step": 54285 + }, + { + "epoch": 0.65, + "grad_norm": 9.627208755545023, + "learning_rate": 1.5735525095746643e-05, + "loss": 1.044, + "step": 54288 + }, + { + "epoch": 0.65, + "grad_norm": 18.82474039603768, + "learning_rate": 1.5735046545539244e-05, + "loss": 1.2593, + "step": 54291 + }, + { + "epoch": 0.65, + "grad_norm": 10.271436328996728, + "learning_rate": 1.5734567975760203e-05, + "loss": 1.3229, + "step": 54294 + }, + { + "epoch": 0.65, + "grad_norm": 5.57008428774475, + "learning_rate": 1.573408938641114e-05, + "loss": 1.3319, + "step": 54297 + }, + { + "epoch": 0.65, + "grad_norm": 29.353028236975764, + "learning_rate": 1.57336107774937e-05, + "loss": 1.5074, + "step": 54300 + }, + { + "epoch": 0.65, + "grad_norm": 16.221647484005434, + "learning_rate": 1.5733132149009512e-05, + "loss": 1.3213, + "step": 54303 + }, + { + "epoch": 0.65, + "grad_norm": 8.123699158671748, + "learning_rate": 1.5732653500960205e-05, + "loss": 0.9202, + "step": 54306 + }, + { + "epoch": 0.65, + "grad_norm": 6.939421679810323, + "learning_rate": 1.573217483334742e-05, + "loss": 1.5047, + "step": 54309 + }, + { + "epoch": 0.65, + "grad_norm": 44.956938028876, + "learning_rate": 1.5731696146172787e-05, + "loss": 1.5388, + "step": 54312 + }, + { + "epoch": 0.65, + "grad_norm": 9.839256752316924, + "learning_rate": 1.5731217439437938e-05, + "loss": 1.6133, + "step": 54315 + }, + { + "epoch": 0.65, + "grad_norm": 15.549761490613482, + "learning_rate": 1.5730738713144508e-05, + "loss": 1.8022, + "step": 54318 + }, + { + "epoch": 0.65, + "grad_norm": 20.134702131568684, + "learning_rate": 1.573025996729413e-05, + "loss": 1.4036, + "step": 54321 + }, + { + "epoch": 0.65, + "grad_norm": 12.452818920322734, + "learning_rate": 1.5729781201888444e-05, + "loss": 1.3245, + "step": 54324 + }, + { + "epoch": 0.65, + "grad_norm": 12.640575380268443, + "learning_rate": 1.5729302416929072e-05, + "loss": 1.4638, + "step": 54327 + }, + { + "epoch": 0.65, + "grad_norm": 18.81972716234174, + "learning_rate": 1.572882361241766e-05, + "loss": 1.3941, + "step": 54330 + }, + { + "epoch": 0.65, + "grad_norm": 23.09870706586593, + "learning_rate": 1.5728344788355834e-05, + "loss": 1.2392, + "step": 54333 + }, + { + "epoch": 0.65, + "grad_norm": 9.561300056337156, + "learning_rate": 1.5727865944745235e-05, + "loss": 1.0945, + "step": 54336 + }, + { + "epoch": 0.65, + "grad_norm": 14.71713236409694, + "learning_rate": 1.5727387081587483e-05, + "loss": 1.3712, + "step": 54339 + }, + { + "epoch": 0.65, + "grad_norm": 3.381324968317014, + "learning_rate": 1.572690819888423e-05, + "loss": 1.4463, + "step": 54342 + }, + { + "epoch": 0.65, + "grad_norm": 16.931516571717857, + "learning_rate": 1.57264292966371e-05, + "loss": 1.4873, + "step": 54345 + }, + { + "epoch": 0.65, + "grad_norm": 12.12432750048045, + "learning_rate": 1.572595037484773e-05, + "loss": 1.1851, + "step": 54348 + }, + { + "epoch": 0.65, + "grad_norm": 33.37263501607961, + "learning_rate": 1.572547143351775e-05, + "loss": 1.8515, + "step": 54351 + }, + { + "epoch": 0.65, + "grad_norm": 50.963909917126834, + "learning_rate": 1.57249924726488e-05, + "loss": 1.7627, + "step": 54354 + }, + { + "epoch": 0.65, + "grad_norm": 5.053952224157684, + "learning_rate": 1.5724513492242515e-05, + "loss": 1.2765, + "step": 54357 + }, + { + "epoch": 0.65, + "grad_norm": 20.876935646747985, + "learning_rate": 1.5724034492300526e-05, + "loss": 1.157, + "step": 54360 + }, + { + "epoch": 0.65, + "grad_norm": 4.5859461564098565, + "learning_rate": 1.572355547282447e-05, + "loss": 1.5524, + "step": 54363 + }, + { + "epoch": 0.65, + "grad_norm": 11.636394958680532, + "learning_rate": 1.5723076433815977e-05, + "loss": 1.4452, + "step": 54366 + }, + { + "epoch": 0.65, + "grad_norm": 13.898428148137645, + "learning_rate": 1.5722597375276693e-05, + "loss": 1.4948, + "step": 54369 + }, + { + "epoch": 0.65, + "grad_norm": 58.57225417793588, + "learning_rate": 1.572211829720824e-05, + "loss": 1.5385, + "step": 54372 + }, + { + "epoch": 0.65, + "grad_norm": 9.037036693125554, + "learning_rate": 1.572163919961226e-05, + "loss": 1.4955, + "step": 54375 + }, + { + "epoch": 0.65, + "grad_norm": 41.27371593912591, + "learning_rate": 1.5721160082490386e-05, + "loss": 1.3968, + "step": 54378 + }, + { + "epoch": 0.65, + "grad_norm": 17.28631065881186, + "learning_rate": 1.5720680945844252e-05, + "loss": 1.2422, + "step": 54381 + }, + { + "epoch": 0.65, + "grad_norm": 4.979602105929323, + "learning_rate": 1.5720201789675496e-05, + "loss": 1.2655, + "step": 54384 + }, + { + "epoch": 0.65, + "grad_norm": 16.496761774882003, + "learning_rate": 1.5719722613985752e-05, + "loss": 1.4878, + "step": 54387 + }, + { + "epoch": 0.65, + "grad_norm": 5.498804570485812, + "learning_rate": 1.5719243418776655e-05, + "loss": 1.4071, + "step": 54390 + }, + { + "epoch": 0.65, + "grad_norm": 8.311406414463853, + "learning_rate": 1.5718764204049838e-05, + "loss": 1.2264, + "step": 54393 + }, + { + "epoch": 0.65, + "grad_norm": 20.104697509022618, + "learning_rate": 1.5718284969806942e-05, + "loss": 1.5882, + "step": 54396 + }, + { + "epoch": 0.65, + "grad_norm": 5.67952292196976, + "learning_rate": 1.5717805716049596e-05, + "loss": 1.4949, + "step": 54399 + }, + { + "epoch": 0.65, + "grad_norm": 5.465714709033509, + "learning_rate": 1.5717326442779443e-05, + "loss": 1.3577, + "step": 54402 + }, + { + "epoch": 0.65, + "grad_norm": 14.026956346501967, + "learning_rate": 1.571684714999811e-05, + "loss": 1.6539, + "step": 54405 + }, + { + "epoch": 0.65, + "grad_norm": 8.566643885281092, + "learning_rate": 1.5716367837707237e-05, + "loss": 1.3446, + "step": 54408 + }, + { + "epoch": 0.65, + "grad_norm": 8.100331704390754, + "learning_rate": 1.5715888505908463e-05, + "loss": 1.2889, + "step": 54411 + }, + { + "epoch": 0.65, + "grad_norm": 6.160148572066982, + "learning_rate": 1.5715409154603415e-05, + "loss": 1.0759, + "step": 54414 + }, + { + "epoch": 0.65, + "grad_norm": 6.196875187127914, + "learning_rate": 1.5714929783793742e-05, + "loss": 1.3613, + "step": 54417 + }, + { + "epoch": 0.65, + "grad_norm": 4.517107619419943, + "learning_rate": 1.5714450393481065e-05, + "loss": 1.4855, + "step": 54420 + }, + { + "epoch": 0.65, + "grad_norm": 25.103249311543067, + "learning_rate": 1.571397098366703e-05, + "loss": 1.2776, + "step": 54423 + }, + { + "epoch": 0.65, + "grad_norm": 21.06172322868258, + "learning_rate": 1.5713491554353272e-05, + "loss": 1.7748, + "step": 54426 + }, + { + "epoch": 0.65, + "grad_norm": 12.561084689596628, + "learning_rate": 1.5713012105541423e-05, + "loss": 1.5116, + "step": 54429 + }, + { + "epoch": 0.65, + "grad_norm": 11.442147804631071, + "learning_rate": 1.5712532637233122e-05, + "loss": 1.3923, + "step": 54432 + }, + { + "epoch": 0.65, + "grad_norm": 16.147965004533656, + "learning_rate": 1.571205314943e-05, + "loss": 1.5194, + "step": 54435 + }, + { + "epoch": 0.65, + "grad_norm": 3.843870355833791, + "learning_rate": 1.5711573642133705e-05, + "loss": 1.6373, + "step": 54438 + }, + { + "epoch": 0.65, + "grad_norm": 9.310659801387969, + "learning_rate": 1.5711094115345863e-05, + "loss": 1.4831, + "step": 54441 + }, + { + "epoch": 0.65, + "grad_norm": 49.11545566807667, + "learning_rate": 1.5710614569068118e-05, + "loss": 1.3542, + "step": 54444 + }, + { + "epoch": 0.65, + "grad_norm": 28.570662806217644, + "learning_rate": 1.5710135003302097e-05, + "loss": 1.3772, + "step": 54447 + }, + { + "epoch": 0.65, + "grad_norm": 43.858010065855325, + "learning_rate": 1.5709655418049445e-05, + "loss": 1.5501, + "step": 54450 + }, + { + "epoch": 0.65, + "grad_norm": 9.927119571088985, + "learning_rate": 1.5709175813311793e-05, + "loss": 1.09, + "step": 54453 + }, + { + "epoch": 0.65, + "grad_norm": 60.821206117314404, + "learning_rate": 1.5708696189090785e-05, + "loss": 1.1438, + "step": 54456 + }, + { + "epoch": 0.65, + "grad_norm": 12.695833040511925, + "learning_rate": 1.5708216545388052e-05, + "loss": 1.47, + "step": 54459 + }, + { + "epoch": 0.65, + "grad_norm": 36.395438724703126, + "learning_rate": 1.570773688220523e-05, + "loss": 1.2316, + "step": 54462 + }, + { + "epoch": 0.65, + "grad_norm": 2.78733393540824, + "learning_rate": 1.5707257199543958e-05, + "loss": 1.512, + "step": 54465 + }, + { + "epoch": 0.65, + "grad_norm": 9.168815447078284, + "learning_rate": 1.5706777497405872e-05, + "loss": 1.1443, + "step": 54468 + }, + { + "epoch": 0.65, + "grad_norm": 5.995560025245714, + "learning_rate": 1.5706297775792615e-05, + "loss": 1.2889, + "step": 54471 + }, + { + "epoch": 0.66, + "grad_norm": 4.211815895516051, + "learning_rate": 1.5705818034705814e-05, + "loss": 1.5078, + "step": 54474 + }, + { + "epoch": 0.66, + "grad_norm": 29.767920161250327, + "learning_rate": 1.5705338274147113e-05, + "loss": 1.5087, + "step": 54477 + }, + { + "epoch": 0.66, + "grad_norm": 21.954751285472447, + "learning_rate": 1.5704858494118148e-05, + "loss": 1.5488, + "step": 54480 + }, + { + "epoch": 0.66, + "grad_norm": 4.240819929601871, + "learning_rate": 1.5704378694620554e-05, + "loss": 1.0288, + "step": 54483 + }, + { + "epoch": 0.66, + "grad_norm": 5.656381585314865, + "learning_rate": 1.570389887565597e-05, + "loss": 1.0716, + "step": 54486 + }, + { + "epoch": 0.66, + "grad_norm": 16.07959902434954, + "learning_rate": 1.5703419037226036e-05, + "loss": 1.2896, + "step": 54489 + }, + { + "epoch": 0.66, + "grad_norm": 35.83443071694868, + "learning_rate": 1.5702939179332388e-05, + "loss": 1.5058, + "step": 54492 + }, + { + "epoch": 0.66, + "grad_norm": 8.581274644950055, + "learning_rate": 1.570245930197666e-05, + "loss": 1.5339, + "step": 54495 + }, + { + "epoch": 0.66, + "grad_norm": 3.2601144324294413, + "learning_rate": 1.5701979405160496e-05, + "loss": 1.6822, + "step": 54498 + }, + { + "epoch": 0.66, + "grad_norm": 11.65240695201445, + "learning_rate": 1.5701499488885526e-05, + "loss": 1.4933, + "step": 54501 + }, + { + "epoch": 0.66, + "grad_norm": 10.3032669494074, + "learning_rate": 1.5701019553153392e-05, + "loss": 1.1548, + "step": 54504 + }, + { + "epoch": 0.66, + "grad_norm": 11.08333963049815, + "learning_rate": 1.5700539597965735e-05, + "loss": 1.2555, + "step": 54507 + }, + { + "epoch": 0.66, + "grad_norm": 19.515362112276772, + "learning_rate": 1.5700059623324187e-05, + "loss": 1.427, + "step": 54510 + }, + { + "epoch": 0.66, + "grad_norm": 2.8736897700706123, + "learning_rate": 1.569957962923039e-05, + "loss": 1.3161, + "step": 54513 + }, + { + "epoch": 0.66, + "grad_norm": 20.0450388853903, + "learning_rate": 1.5699099615685977e-05, + "loss": 1.9512, + "step": 54516 + }, + { + "epoch": 0.66, + "grad_norm": 48.89666830698485, + "learning_rate": 1.5698619582692597e-05, + "loss": 1.2959, + "step": 54519 + }, + { + "epoch": 0.66, + "grad_norm": 12.18670557679727, + "learning_rate": 1.5698139530251876e-05, + "loss": 1.5788, + "step": 54522 + }, + { + "epoch": 0.66, + "grad_norm": 26.636093588172677, + "learning_rate": 1.569765945836546e-05, + "loss": 1.3464, + "step": 54525 + }, + { + "epoch": 0.66, + "grad_norm": 23.5822817429868, + "learning_rate": 1.5697179367034984e-05, + "loss": 1.3788, + "step": 54528 + }, + { + "epoch": 0.66, + "grad_norm": 11.054907469944798, + "learning_rate": 1.5696699256262085e-05, + "loss": 1.3501, + "step": 54531 + }, + { + "epoch": 0.66, + "grad_norm": 17.629183474591986, + "learning_rate": 1.5696219126048402e-05, + "loss": 1.5794, + "step": 54534 + }, + { + "epoch": 0.66, + "grad_norm": 19.356103691517852, + "learning_rate": 1.5695738976395577e-05, + "loss": 1.3721, + "step": 54537 + }, + { + "epoch": 0.66, + "grad_norm": 48.38712536335081, + "learning_rate": 1.5695258807305246e-05, + "loss": 1.4593, + "step": 54540 + }, + { + "epoch": 0.66, + "grad_norm": 34.13779633016837, + "learning_rate": 1.569477861877905e-05, + "loss": 1.5688, + "step": 54543 + }, + { + "epoch": 0.66, + "grad_norm": 8.986906639152112, + "learning_rate": 1.5694298410818624e-05, + "loss": 1.2082, + "step": 54546 + }, + { + "epoch": 0.66, + "grad_norm": 20.000712949119038, + "learning_rate": 1.569381818342561e-05, + "loss": 1.521, + "step": 54549 + }, + { + "epoch": 0.66, + "grad_norm": 2.7802098242802487, + "learning_rate": 1.5693337936601646e-05, + "loss": 1.1355, + "step": 54552 + }, + { + "epoch": 0.66, + "grad_norm": 5.728967212756064, + "learning_rate": 1.569285767034837e-05, + "loss": 1.6564, + "step": 54555 + }, + { + "epoch": 0.66, + "grad_norm": 46.43501633430307, + "learning_rate": 1.569237738466742e-05, + "loss": 1.5986, + "step": 54558 + }, + { + "epoch": 0.66, + "grad_norm": 43.10685785580293, + "learning_rate": 1.5691897079560436e-05, + "loss": 1.3134, + "step": 54561 + }, + { + "epoch": 0.66, + "grad_norm": 12.861733548418105, + "learning_rate": 1.5691416755029056e-05, + "loss": 1.497, + "step": 54564 + }, + { + "epoch": 0.66, + "grad_norm": 5.577764525624205, + "learning_rate": 1.5690936411074923e-05, + "loss": 1.6226, + "step": 54567 + }, + { + "epoch": 0.66, + "grad_norm": 15.609116356483787, + "learning_rate": 1.5690456047699672e-05, + "loss": 1.597, + "step": 54570 + }, + { + "epoch": 0.66, + "grad_norm": 5.904163632041804, + "learning_rate": 1.5689975664904945e-05, + "loss": 1.4985, + "step": 54573 + }, + { + "epoch": 0.66, + "grad_norm": 14.199551944799252, + "learning_rate": 1.5689495262692384e-05, + "loss": 1.5936, + "step": 54576 + }, + { + "epoch": 0.66, + "grad_norm": 44.769376421719784, + "learning_rate": 1.5689014841063625e-05, + "loss": 1.0899, + "step": 54579 + }, + { + "epoch": 0.66, + "grad_norm": 47.66825161546815, + "learning_rate": 1.5688534400020302e-05, + "loss": 1.3647, + "step": 54582 + }, + { + "epoch": 0.66, + "grad_norm": 11.762426951022853, + "learning_rate": 1.5688053939564064e-05, + "loss": 1.6751, + "step": 54585 + }, + { + "epoch": 0.66, + "grad_norm": 8.257786300496058, + "learning_rate": 1.5687573459696546e-05, + "loss": 1.1946, + "step": 54588 + }, + { + "epoch": 0.66, + "grad_norm": 30.632055730751485, + "learning_rate": 1.568709296041939e-05, + "loss": 1.0661, + "step": 54591 + }, + { + "epoch": 0.66, + "grad_norm": 7.157868429620519, + "learning_rate": 1.568661244173423e-05, + "loss": 1.1383, + "step": 54594 + }, + { + "epoch": 0.66, + "grad_norm": 11.72740441225127, + "learning_rate": 1.568613190364271e-05, + "loss": 1.2582, + "step": 54597 + }, + { + "epoch": 0.66, + "grad_norm": 18.302063597643183, + "learning_rate": 1.5685651346146476e-05, + "loss": 1.3813, + "step": 54600 + }, + { + "epoch": 0.66, + "grad_norm": 9.514885665723511, + "learning_rate": 1.568517076924716e-05, + "loss": 1.3676, + "step": 54603 + }, + { + "epoch": 0.66, + "grad_norm": 15.716369274970853, + "learning_rate": 1.56846901729464e-05, + "loss": 1.1048, + "step": 54606 + }, + { + "epoch": 0.66, + "grad_norm": 7.924466454178651, + "learning_rate": 1.568420955724584e-05, + "loss": 1.4216, + "step": 54609 + }, + { + "epoch": 0.66, + "grad_norm": 16.716475662744823, + "learning_rate": 1.5683728922147124e-05, + "loss": 1.2603, + "step": 54612 + }, + { + "epoch": 0.66, + "grad_norm": 16.046481009007408, + "learning_rate": 1.568324826765189e-05, + "loss": 1.405, + "step": 54615 + }, + { + "epoch": 0.66, + "grad_norm": 52.27899215510482, + "learning_rate": 1.5682767593761773e-05, + "loss": 1.1765, + "step": 54618 + }, + { + "epoch": 0.66, + "grad_norm": 56.10373503621395, + "learning_rate": 1.5682286900478418e-05, + "loss": 1.475, + "step": 54621 + }, + { + "epoch": 0.66, + "grad_norm": 15.830528282658634, + "learning_rate": 1.5681806187803463e-05, + "loss": 1.363, + "step": 54624 + }, + { + "epoch": 0.66, + "grad_norm": 10.117646484009347, + "learning_rate": 1.5681325455738552e-05, + "loss": 1.3008, + "step": 54627 + }, + { + "epoch": 0.66, + "grad_norm": 37.182119511179366, + "learning_rate": 1.568084470428532e-05, + "loss": 1.143, + "step": 54630 + }, + { + "epoch": 0.66, + "grad_norm": 49.79260022376914, + "learning_rate": 1.5680363933445418e-05, + "loss": 1.4764, + "step": 54633 + }, + { + "epoch": 0.66, + "grad_norm": 8.843133469845872, + "learning_rate": 1.5679883143220477e-05, + "loss": 1.2427, + "step": 54636 + }, + { + "epoch": 0.66, + "grad_norm": 15.996282007137863, + "learning_rate": 1.567940233361214e-05, + "loss": 1.2308, + "step": 54639 + }, + { + "epoch": 0.66, + "grad_norm": 8.668854895455269, + "learning_rate": 1.5678921504622047e-05, + "loss": 1.2052, + "step": 54642 + }, + { + "epoch": 0.66, + "grad_norm": 23.73409118220184, + "learning_rate": 1.5678440656251842e-05, + "loss": 1.3228, + "step": 54645 + }, + { + "epoch": 0.66, + "grad_norm": 17.316526628547415, + "learning_rate": 1.5677959788503166e-05, + "loss": 1.5768, + "step": 54648 + }, + { + "epoch": 0.66, + "grad_norm": 11.884881990194454, + "learning_rate": 1.5677478901377656e-05, + "loss": 1.337, + "step": 54651 + }, + { + "epoch": 0.66, + "grad_norm": 17.03058693643402, + "learning_rate": 1.5676997994876955e-05, + "loss": 1.3728, + "step": 54654 + }, + { + "epoch": 0.66, + "grad_norm": 18.509468602044468, + "learning_rate": 1.5676517069002707e-05, + "loss": 1.5331, + "step": 54657 + }, + { + "epoch": 0.66, + "grad_norm": 19.89836926449813, + "learning_rate": 1.5676036123756554e-05, + "loss": 1.7063, + "step": 54660 + }, + { + "epoch": 0.66, + "grad_norm": 21.339311133977017, + "learning_rate": 1.5675555159140126e-05, + "loss": 1.2552, + "step": 54663 + }, + { + "epoch": 0.66, + "grad_norm": 10.418863501707484, + "learning_rate": 1.5675074175155077e-05, + "loss": 1.767, + "step": 54666 + }, + { + "epoch": 0.66, + "grad_norm": 25.28179998815319, + "learning_rate": 1.5674593171803044e-05, + "loss": 0.8682, + "step": 54669 + }, + { + "epoch": 0.66, + "grad_norm": 36.19238346829859, + "learning_rate": 1.567411214908567e-05, + "loss": 1.0897, + "step": 54672 + }, + { + "epoch": 0.66, + "grad_norm": 13.054077286561784, + "learning_rate": 1.5673631107004595e-05, + "loss": 1.7348, + "step": 54675 + }, + { + "epoch": 0.66, + "grad_norm": 16.045340537529952, + "learning_rate": 1.5673150045561456e-05, + "loss": 1.9867, + "step": 54678 + }, + { + "epoch": 0.66, + "grad_norm": 11.809775778658418, + "learning_rate": 1.5672668964757902e-05, + "loss": 1.5129, + "step": 54681 + }, + { + "epoch": 0.66, + "grad_norm": 5.7789332851829425, + "learning_rate": 1.5672187864595572e-05, + "loss": 1.8097, + "step": 54684 + }, + { + "epoch": 0.66, + "grad_norm": 7.2150718658535515, + "learning_rate": 1.567170674507611e-05, + "loss": 1.5764, + "step": 54687 + }, + { + "epoch": 0.66, + "grad_norm": 8.98779739562023, + "learning_rate": 1.5671225606201153e-05, + "loss": 1.7195, + "step": 54690 + }, + { + "epoch": 0.66, + "grad_norm": 4.012285449344013, + "learning_rate": 1.567074444797235e-05, + "loss": 1.5353, + "step": 54693 + }, + { + "epoch": 0.66, + "grad_norm": 3.7622812461325634, + "learning_rate": 1.5670263270391335e-05, + "loss": 1.3005, + "step": 54696 + }, + { + "epoch": 0.66, + "grad_norm": 12.146064944167959, + "learning_rate": 1.5669782073459753e-05, + "loss": 1.6229, + "step": 54699 + }, + { + "epoch": 0.66, + "grad_norm": 4.886763682303691, + "learning_rate": 1.566930085717925e-05, + "loss": 1.3912, + "step": 54702 + }, + { + "epoch": 0.66, + "grad_norm": 8.396005064367937, + "learning_rate": 1.5668819621551462e-05, + "loss": 1.5268, + "step": 54705 + }, + { + "epoch": 0.66, + "grad_norm": 15.897558040437769, + "learning_rate": 1.566833836657804e-05, + "loss": 1.3026, + "step": 54708 + }, + { + "epoch": 0.66, + "grad_norm": 23.07335223315018, + "learning_rate": 1.5667857092260613e-05, + "loss": 1.3333, + "step": 54711 + }, + { + "epoch": 0.66, + "grad_norm": 49.08605020941378, + "learning_rate": 1.566737579860084e-05, + "loss": 1.1096, + "step": 54714 + }, + { + "epoch": 0.66, + "grad_norm": 10.707828884678644, + "learning_rate": 1.5666894485600347e-05, + "loss": 1.5957, + "step": 54717 + }, + { + "epoch": 0.66, + "grad_norm": 10.129630922671193, + "learning_rate": 1.5666413153260788e-05, + "loss": 1.1897, + "step": 54720 + }, + { + "epoch": 0.66, + "grad_norm": 10.563294268891344, + "learning_rate": 1.5665931801583805e-05, + "loss": 0.9421, + "step": 54723 + }, + { + "epoch": 0.66, + "grad_norm": 10.353035872520211, + "learning_rate": 1.5665450430571036e-05, + "loss": 1.4728, + "step": 54726 + }, + { + "epoch": 0.66, + "grad_norm": 52.03626285977643, + "learning_rate": 1.566496904022412e-05, + "loss": 1.5856, + "step": 54729 + }, + { + "epoch": 0.66, + "grad_norm": 18.444043274196716, + "learning_rate": 1.566448763054471e-05, + "loss": 1.3329, + "step": 54732 + }, + { + "epoch": 0.66, + "grad_norm": 23.174285964511533, + "learning_rate": 1.566400620153444e-05, + "loss": 1.4066, + "step": 54735 + }, + { + "epoch": 0.66, + "grad_norm": 13.55955764876601, + "learning_rate": 1.566352475319496e-05, + "loss": 1.2186, + "step": 54738 + }, + { + "epoch": 0.66, + "grad_norm": 35.629651522570896, + "learning_rate": 1.566304328552791e-05, + "loss": 1.4986, + "step": 54741 + }, + { + "epoch": 0.66, + "grad_norm": 10.191623472251619, + "learning_rate": 1.5662561798534935e-05, + "loss": 1.5761, + "step": 54744 + }, + { + "epoch": 0.66, + "grad_norm": 11.522865365835122, + "learning_rate": 1.5662080292217675e-05, + "loss": 2.0183, + "step": 54747 + }, + { + "epoch": 0.66, + "grad_norm": 46.34607871171943, + "learning_rate": 1.5661598766577774e-05, + "loss": 1.5303, + "step": 54750 + }, + { + "epoch": 0.66, + "grad_norm": 19.250755504416652, + "learning_rate": 1.566111722161688e-05, + "loss": 1.3531, + "step": 54753 + }, + { + "epoch": 0.66, + "grad_norm": 30.169168538842438, + "learning_rate": 1.566063565733662e-05, + "loss": 1.3669, + "step": 54756 + }, + { + "epoch": 0.66, + "grad_norm": 34.04668687404982, + "learning_rate": 1.5660154073738657e-05, + "loss": 0.8033, + "step": 54759 + }, + { + "epoch": 0.66, + "grad_norm": 114.5096537703394, + "learning_rate": 1.565967247082463e-05, + "loss": 1.9236, + "step": 54762 + }, + { + "epoch": 0.66, + "grad_norm": 7.55915898474625, + "learning_rate": 1.5659190848596174e-05, + "loss": 1.3558, + "step": 54765 + }, + { + "epoch": 0.66, + "grad_norm": 17.68462119089331, + "learning_rate": 1.565870920705494e-05, + "loss": 1.3783, + "step": 54768 + }, + { + "epoch": 0.66, + "grad_norm": 9.990215738196211, + "learning_rate": 1.565822754620257e-05, + "loss": 1.3446, + "step": 54771 + }, + { + "epoch": 0.66, + "grad_norm": 17.935296381008502, + "learning_rate": 1.565774586604071e-05, + "loss": 1.2232, + "step": 54774 + }, + { + "epoch": 0.66, + "grad_norm": 18.777884901633538, + "learning_rate": 1.5657264166571e-05, + "loss": 1.6707, + "step": 54777 + }, + { + "epoch": 0.66, + "grad_norm": 21.684446520246073, + "learning_rate": 1.5656782447795084e-05, + "loss": 1.37, + "step": 54780 + }, + { + "epoch": 0.66, + "grad_norm": 13.457858506207687, + "learning_rate": 1.5656300709714603e-05, + "loss": 1.2231, + "step": 54783 + }, + { + "epoch": 0.66, + "grad_norm": 10.024453435076763, + "learning_rate": 1.565581895233121e-05, + "loss": 1.3956, + "step": 54786 + }, + { + "epoch": 0.66, + "grad_norm": 17.555731154248413, + "learning_rate": 1.5655337175646542e-05, + "loss": 1.2916, + "step": 54789 + }, + { + "epoch": 0.66, + "grad_norm": 10.500932680939396, + "learning_rate": 1.565485537966225e-05, + "loss": 1.5639, + "step": 54792 + }, + { + "epoch": 0.66, + "grad_norm": 20.97007657402581, + "learning_rate": 1.5654373564379967e-05, + "loss": 1.1309, + "step": 54795 + }, + { + "epoch": 0.66, + "grad_norm": 5.726414199854901, + "learning_rate": 1.565389172980135e-05, + "loss": 1.3759, + "step": 54798 + }, + { + "epoch": 0.66, + "grad_norm": 7.767328430062786, + "learning_rate": 1.5653409875928032e-05, + "loss": 1.325, + "step": 54801 + }, + { + "epoch": 0.66, + "grad_norm": 37.612235351326476, + "learning_rate": 1.5652928002761663e-05, + "loss": 1.6907, + "step": 54804 + }, + { + "epoch": 0.66, + "grad_norm": 55.83089861272207, + "learning_rate": 1.565244611030389e-05, + "loss": 1.5105, + "step": 54807 + }, + { + "epoch": 0.66, + "grad_norm": 16.648018785283956, + "learning_rate": 1.565196419855635e-05, + "loss": 1.4484, + "step": 54810 + }, + { + "epoch": 0.66, + "grad_norm": 22.105250393094977, + "learning_rate": 1.5651482267520698e-05, + "loss": 1.4987, + "step": 54813 + }, + { + "epoch": 0.66, + "grad_norm": 44.29434568654883, + "learning_rate": 1.5651000317198568e-05, + "loss": 1.8638, + "step": 54816 + }, + { + "epoch": 0.66, + "grad_norm": 9.590788900606064, + "learning_rate": 1.565051834759161e-05, + "loss": 1.3161, + "step": 54819 + }, + { + "epoch": 0.66, + "grad_norm": 35.74818856887658, + "learning_rate": 1.565003635870147e-05, + "loss": 1.7015, + "step": 54822 + }, + { + "epoch": 0.66, + "grad_norm": 16.114390337576193, + "learning_rate": 1.564955435052979e-05, + "loss": 1.354, + "step": 54825 + }, + { + "epoch": 0.66, + "grad_norm": 12.042235394098942, + "learning_rate": 1.5649072323078216e-05, + "loss": 1.5481, + "step": 54828 + }, + { + "epoch": 0.66, + "grad_norm": 26.347101833296986, + "learning_rate": 1.5648590276348393e-05, + "loss": 1.7158, + "step": 54831 + }, + { + "epoch": 0.66, + "grad_norm": 31.440875316876262, + "learning_rate": 1.5648108210341966e-05, + "loss": 1.1905, + "step": 54834 + }, + { + "epoch": 0.66, + "grad_norm": 10.166848749687285, + "learning_rate": 1.5647626125060576e-05, + "loss": 1.4386, + "step": 54837 + }, + { + "epoch": 0.66, + "grad_norm": 146.1106231454141, + "learning_rate": 1.5647144020505877e-05, + "loss": 1.3871, + "step": 54840 + }, + { + "epoch": 0.66, + "grad_norm": 12.274719047395063, + "learning_rate": 1.564666189667951e-05, + "loss": 1.4915, + "step": 54843 + }, + { + "epoch": 0.66, + "grad_norm": 11.323196464581489, + "learning_rate": 1.5646179753583118e-05, + "loss": 1.7155, + "step": 54846 + }, + { + "epoch": 0.66, + "grad_norm": 5.528714748468073, + "learning_rate": 1.564569759121835e-05, + "loss": 1.1737, + "step": 54849 + }, + { + "epoch": 0.66, + "grad_norm": 10.265376808847424, + "learning_rate": 1.5645215409586846e-05, + "loss": 1.6185, + "step": 54852 + }, + { + "epoch": 0.66, + "grad_norm": 2.830790477890378, + "learning_rate": 1.5644733208690263e-05, + "loss": 1.3722, + "step": 54855 + }, + { + "epoch": 0.66, + "grad_norm": 17.984499118154243, + "learning_rate": 1.564425098853023e-05, + "loss": 1.0965, + "step": 54858 + }, + { + "epoch": 0.66, + "grad_norm": 7.307444051154578, + "learning_rate": 1.5643768749108407e-05, + "loss": 1.2602, + "step": 54861 + }, + { + "epoch": 0.66, + "grad_norm": 3.673657287870873, + "learning_rate": 1.564328649042643e-05, + "loss": 1.1866, + "step": 54864 + }, + { + "epoch": 0.66, + "grad_norm": 24.46770634870951, + "learning_rate": 1.564280421248595e-05, + "loss": 1.1186, + "step": 54867 + }, + { + "epoch": 0.66, + "grad_norm": 24.426694894030778, + "learning_rate": 1.5642321915288616e-05, + "loss": 1.4106, + "step": 54870 + }, + { + "epoch": 0.66, + "grad_norm": 12.755955893313642, + "learning_rate": 1.5641839598836064e-05, + "loss": 1.6416, + "step": 54873 + }, + { + "epoch": 0.66, + "grad_norm": 4.506275497794973, + "learning_rate": 1.564135726312995e-05, + "loss": 1.1431, + "step": 54876 + }, + { + "epoch": 0.66, + "grad_norm": 4.2862759797426495, + "learning_rate": 1.564087490817191e-05, + "loss": 1.3673, + "step": 54879 + }, + { + "epoch": 0.66, + "grad_norm": 10.281570935874969, + "learning_rate": 1.5640392533963602e-05, + "loss": 1.2414, + "step": 54882 + }, + { + "epoch": 0.66, + "grad_norm": 33.215812333861884, + "learning_rate": 1.5639910140506662e-05, + "loss": 1.8376, + "step": 54885 + }, + { + "epoch": 0.66, + "grad_norm": 6.026291560912276, + "learning_rate": 1.5639427727802745e-05, + "loss": 1.6656, + "step": 54888 + }, + { + "epoch": 0.66, + "grad_norm": 21.3969404555895, + "learning_rate": 1.5638945295853487e-05, + "loss": 1.4023, + "step": 54891 + }, + { + "epoch": 0.66, + "grad_norm": 8.772309564247953, + "learning_rate": 1.5638462844660542e-05, + "loss": 1.4005, + "step": 54894 + }, + { + "epoch": 0.66, + "grad_norm": 24.907915527046463, + "learning_rate": 1.5637980374225553e-05, + "loss": 1.3551, + "step": 54897 + }, + { + "epoch": 0.66, + "grad_norm": 17.515923300633506, + "learning_rate": 1.563749788455017e-05, + "loss": 1.127, + "step": 54900 + }, + { + "epoch": 0.66, + "grad_norm": 11.75324969497783, + "learning_rate": 1.5637015375636036e-05, + "loss": 1.3483, + "step": 54903 + }, + { + "epoch": 0.66, + "grad_norm": 38.883845067017674, + "learning_rate": 1.56365328474848e-05, + "loss": 1.4156, + "step": 54906 + }, + { + "epoch": 0.66, + "grad_norm": 3.8028124645238655, + "learning_rate": 1.5636050300098106e-05, + "loss": 1.3122, + "step": 54909 + }, + { + "epoch": 0.66, + "grad_norm": 10.593210204348324, + "learning_rate": 1.5635567733477603e-05, + "loss": 1.2842, + "step": 54912 + }, + { + "epoch": 0.66, + "grad_norm": 38.197727358474765, + "learning_rate": 1.563508514762494e-05, + "loss": 1.3219, + "step": 54915 + }, + { + "epoch": 0.66, + "grad_norm": 11.617848412340523, + "learning_rate": 1.5634602542541757e-05, + "loss": 1.4423, + "step": 54918 + }, + { + "epoch": 0.66, + "grad_norm": 8.429111514566767, + "learning_rate": 1.5634119918229704e-05, + "loss": 1.3491, + "step": 54921 + }, + { + "epoch": 0.66, + "grad_norm": 14.527271699971243, + "learning_rate": 1.5633637274690437e-05, + "loss": 1.3425, + "step": 54924 + }, + { + "epoch": 0.66, + "grad_norm": 13.962079116375817, + "learning_rate": 1.563315461192559e-05, + "loss": 1.1531, + "step": 54927 + }, + { + "epoch": 0.66, + "grad_norm": 11.593829072528894, + "learning_rate": 1.5632671929936813e-05, + "loss": 1.5436, + "step": 54930 + }, + { + "epoch": 0.66, + "grad_norm": 10.611150581635881, + "learning_rate": 1.563218922872576e-05, + "loss": 1.1233, + "step": 54933 + }, + { + "epoch": 0.66, + "grad_norm": 6.7110137950221755, + "learning_rate": 1.563170650829407e-05, + "loss": 1.5598, + "step": 54936 + }, + { + "epoch": 0.66, + "grad_norm": 3.1561312714024727, + "learning_rate": 1.56312237686434e-05, + "loss": 1.4238, + "step": 54939 + }, + { + "epoch": 0.66, + "grad_norm": 20.565054686875325, + "learning_rate": 1.5630741009775385e-05, + "loss": 1.6352, + "step": 54942 + }, + { + "epoch": 0.66, + "grad_norm": 17.811028829272196, + "learning_rate": 1.563025823169168e-05, + "loss": 1.3905, + "step": 54945 + }, + { + "epoch": 0.66, + "grad_norm": 16.715269290354946, + "learning_rate": 1.562977543439393e-05, + "loss": 1.4912, + "step": 54948 + }, + { + "epoch": 0.66, + "grad_norm": 14.067074557450892, + "learning_rate": 1.5629292617883794e-05, + "loss": 1.5554, + "step": 54951 + }, + { + "epoch": 0.66, + "grad_norm": 4.423097807674593, + "learning_rate": 1.56288097821629e-05, + "loss": 1.0856, + "step": 54954 + }, + { + "epoch": 0.66, + "grad_norm": 10.070374408320067, + "learning_rate": 1.5628326927232912e-05, + "loss": 1.0304, + "step": 54957 + }, + { + "epoch": 0.66, + "grad_norm": 10.041490370941755, + "learning_rate": 1.5627844053095468e-05, + "loss": 1.5976, + "step": 54960 + }, + { + "epoch": 0.66, + "grad_norm": 19.009808445631023, + "learning_rate": 1.5627361159752222e-05, + "loss": 1.6015, + "step": 54963 + }, + { + "epoch": 0.66, + "grad_norm": 10.046930822788992, + "learning_rate": 1.5626878247204818e-05, + "loss": 1.4263, + "step": 54966 + }, + { + "epoch": 0.66, + "grad_norm": 30.584513400862104, + "learning_rate": 1.5626395315454907e-05, + "loss": 1.6084, + "step": 54969 + }, + { + "epoch": 0.66, + "grad_norm": 14.147082465029918, + "learning_rate": 1.562591236450413e-05, + "loss": 1.1286, + "step": 54972 + }, + { + "epoch": 0.66, + "grad_norm": 53.866114771163545, + "learning_rate": 1.5625429394354143e-05, + "loss": 1.4175, + "step": 54975 + }, + { + "epoch": 0.66, + "grad_norm": 4.375573283434957, + "learning_rate": 1.5624946405006594e-05, + "loss": 1.1499, + "step": 54978 + }, + { + "epoch": 0.66, + "grad_norm": 16.015937485255414, + "learning_rate": 1.562446339646313e-05, + "loss": 1.2708, + "step": 54981 + }, + { + "epoch": 0.66, + "grad_norm": 9.085580948650232, + "learning_rate": 1.5623980368725395e-05, + "loss": 1.2386, + "step": 54984 + }, + { + "epoch": 0.66, + "grad_norm": 13.2341620640802, + "learning_rate": 1.562349732179504e-05, + "loss": 1.9786, + "step": 54987 + }, + { + "epoch": 0.66, + "grad_norm": 8.286006002264902, + "learning_rate": 1.5623014255673718e-05, + "loss": 1.3543, + "step": 54990 + }, + { + "epoch": 0.66, + "grad_norm": 21.613380126039377, + "learning_rate": 1.5622531170363072e-05, + "loss": 1.0963, + "step": 54993 + }, + { + "epoch": 0.66, + "grad_norm": 9.770224461536744, + "learning_rate": 1.5622048065864752e-05, + "loss": 1.3913, + "step": 54996 + }, + { + "epoch": 0.66, + "grad_norm": 9.09063356949427, + "learning_rate": 1.5621564942180407e-05, + "loss": 1.7789, + "step": 54999 + }, + { + "epoch": 0.66, + "grad_norm": 8.733126371928646, + "learning_rate": 1.5621081799311687e-05, + "loss": 1.0636, + "step": 55002 + }, + { + "epoch": 0.66, + "grad_norm": 9.413841759077544, + "learning_rate": 1.562059863726024e-05, + "loss": 1.2691, + "step": 55005 + }, + { + "epoch": 0.66, + "grad_norm": 10.645445065324864, + "learning_rate": 1.562011545602771e-05, + "loss": 1.4246, + "step": 55008 + }, + { + "epoch": 0.66, + "grad_norm": 15.39828404508053, + "learning_rate": 1.5619632255615752e-05, + "loss": 1.3493, + "step": 55011 + }, + { + "epoch": 0.66, + "grad_norm": 30.524662643843218, + "learning_rate": 1.5619149036026017e-05, + "loss": 1.1529, + "step": 55014 + }, + { + "epoch": 0.66, + "grad_norm": 7.685470872778648, + "learning_rate": 1.5618665797260147e-05, + "loss": 1.3078, + "step": 55017 + }, + { + "epoch": 0.66, + "grad_norm": 6.463574780484953, + "learning_rate": 1.5618182539319796e-05, + "loss": 1.217, + "step": 55020 + }, + { + "epoch": 0.66, + "grad_norm": 13.64788684301983, + "learning_rate": 1.5617699262206613e-05, + "loss": 1.2374, + "step": 55023 + }, + { + "epoch": 0.66, + "grad_norm": 6.846192277210089, + "learning_rate": 1.5617215965922242e-05, + "loss": 1.344, + "step": 55026 + }, + { + "epoch": 0.66, + "grad_norm": 12.710020874330421, + "learning_rate": 1.5616732650468337e-05, + "loss": 1.4003, + "step": 55029 + }, + { + "epoch": 0.66, + "grad_norm": 33.44403438154915, + "learning_rate": 1.5616249315846547e-05, + "loss": 1.6548, + "step": 55032 + }, + { + "epoch": 0.66, + "grad_norm": 14.370532038691183, + "learning_rate": 1.5615765962058522e-05, + "loss": 1.2762, + "step": 55035 + }, + { + "epoch": 0.66, + "grad_norm": 57.10428663647783, + "learning_rate": 1.561528258910591e-05, + "loss": 1.6896, + "step": 55038 + }, + { + "epoch": 0.66, + "grad_norm": 12.21087403402332, + "learning_rate": 1.561479919699036e-05, + "loss": 1.3241, + "step": 55041 + }, + { + "epoch": 0.66, + "grad_norm": 14.566990770609575, + "learning_rate": 1.5614315785713527e-05, + "loss": 1.8654, + "step": 55044 + }, + { + "epoch": 0.66, + "grad_norm": 9.925787987309832, + "learning_rate": 1.5613832355277054e-05, + "loss": 1.2273, + "step": 55047 + }, + { + "epoch": 0.66, + "grad_norm": 21.250493179430396, + "learning_rate": 1.5613348905682592e-05, + "loss": 1.2707, + "step": 55050 + }, + { + "epoch": 0.66, + "grad_norm": 22.195748148497245, + "learning_rate": 1.5612865436931793e-05, + "loss": 1.3342, + "step": 55053 + }, + { + "epoch": 0.66, + "grad_norm": 22.13515655798739, + "learning_rate": 1.5612381949026307e-05, + "loss": 1.3577, + "step": 55056 + }, + { + "epoch": 0.66, + "grad_norm": 20.65839651232083, + "learning_rate": 1.5611898441967782e-05, + "loss": 1.5468, + "step": 55059 + }, + { + "epoch": 0.66, + "grad_norm": 6.901654614104225, + "learning_rate": 1.5611414915757866e-05, + "loss": 1.4395, + "step": 55062 + }, + { + "epoch": 0.66, + "grad_norm": 4.322141681153583, + "learning_rate": 1.5610931370398216e-05, + "loss": 1.5491, + "step": 55065 + }, + { + "epoch": 0.66, + "grad_norm": 11.00999742519778, + "learning_rate": 1.5610447805890477e-05, + "loss": 1.8538, + "step": 55068 + }, + { + "epoch": 0.66, + "grad_norm": 13.27972733158726, + "learning_rate": 1.56099642222363e-05, + "loss": 0.8654, + "step": 55071 + }, + { + "epoch": 0.66, + "grad_norm": 9.894792704315794, + "learning_rate": 1.5609480619437338e-05, + "loss": 1.4998, + "step": 55074 + }, + { + "epoch": 0.66, + "grad_norm": 8.59396897847518, + "learning_rate": 1.560899699749524e-05, + "loss": 1.4451, + "step": 55077 + }, + { + "epoch": 0.66, + "grad_norm": 8.763451549649421, + "learning_rate": 1.560851335641165e-05, + "loss": 1.4853, + "step": 55080 + }, + { + "epoch": 0.66, + "grad_norm": 11.758081411097153, + "learning_rate": 1.560802969618823e-05, + "loss": 1.9232, + "step": 55083 + }, + { + "epoch": 0.66, + "grad_norm": 41.471420783476546, + "learning_rate": 1.5607546016826623e-05, + "loss": 1.4121, + "step": 55086 + }, + { + "epoch": 0.66, + "grad_norm": 9.362969485671673, + "learning_rate": 1.5607062318328478e-05, + "loss": 1.1782, + "step": 55089 + }, + { + "epoch": 0.66, + "grad_norm": 11.796547430727268, + "learning_rate": 1.5606578600695455e-05, + "loss": 1.4321, + "step": 55092 + }, + { + "epoch": 0.66, + "grad_norm": 12.177245248201753, + "learning_rate": 1.5606094863929194e-05, + "loss": 1.5267, + "step": 55095 + }, + { + "epoch": 0.66, + "grad_norm": 3.9590310943337093, + "learning_rate": 1.560561110803135e-05, + "loss": 1.1442, + "step": 55098 + }, + { + "epoch": 0.66, + "grad_norm": 19.23920083240551, + "learning_rate": 1.5605127333003574e-05, + "loss": 1.6989, + "step": 55101 + }, + { + "epoch": 0.66, + "grad_norm": 14.974184196509782, + "learning_rate": 1.5604643538847525e-05, + "loss": 1.4431, + "step": 55104 + }, + { + "epoch": 0.66, + "grad_norm": 13.944033535405628, + "learning_rate": 1.5604159725564838e-05, + "loss": 1.3979, + "step": 55107 + }, + { + "epoch": 0.66, + "grad_norm": 8.877688526994733, + "learning_rate": 1.5603675893157174e-05, + "loss": 1.3143, + "step": 55110 + }, + { + "epoch": 0.66, + "grad_norm": 9.116104428304416, + "learning_rate": 1.560319204162619e-05, + "loss": 1.3071, + "step": 55113 + }, + { + "epoch": 0.66, + "grad_norm": 4.881489095755788, + "learning_rate": 1.560270817097352e-05, + "loss": 1.6548, + "step": 55116 + }, + { + "epoch": 0.66, + "grad_norm": 4.654517440169295, + "learning_rate": 1.560222428120083e-05, + "loss": 1.6033, + "step": 55119 + }, + { + "epoch": 0.66, + "grad_norm": 15.473746328531645, + "learning_rate": 1.5601740372309764e-05, + "loss": 1.453, + "step": 55122 + }, + { + "epoch": 0.66, + "grad_norm": 14.809008328186332, + "learning_rate": 1.5601256444301977e-05, + "loss": 1.3606, + "step": 55125 + }, + { + "epoch": 0.66, + "grad_norm": 5.892981060828044, + "learning_rate": 1.560077249717912e-05, + "loss": 1.5489, + "step": 55128 + }, + { + "epoch": 0.66, + "grad_norm": 5.6237331874782654, + "learning_rate": 1.5600288530942844e-05, + "loss": 1.2588, + "step": 55131 + }, + { + "epoch": 0.66, + "grad_norm": 45.426062229328615, + "learning_rate": 1.55998045455948e-05, + "loss": 1.2957, + "step": 55134 + }, + { + "epoch": 0.66, + "grad_norm": 15.71661787042852, + "learning_rate": 1.5599320541136637e-05, + "loss": 1.542, + "step": 55137 + }, + { + "epoch": 0.66, + "grad_norm": 14.125287824510535, + "learning_rate": 1.5598836517570015e-05, + "loss": 1.5651, + "step": 55140 + }, + { + "epoch": 0.66, + "grad_norm": 23.647995603298057, + "learning_rate": 1.559835247489658e-05, + "loss": 1.2371, + "step": 55143 + }, + { + "epoch": 0.66, + "grad_norm": 9.942702379174142, + "learning_rate": 1.559786841311798e-05, + "loss": 1.3938, + "step": 55146 + }, + { + "epoch": 0.66, + "grad_norm": 98.71017487445694, + "learning_rate": 1.5597384332235877e-05, + "loss": 1.3639, + "step": 55149 + }, + { + "epoch": 0.66, + "grad_norm": 35.28491585358232, + "learning_rate": 1.5596900232251914e-05, + "loss": 1.4561, + "step": 55152 + }, + { + "epoch": 0.66, + "grad_norm": 27.385306008054936, + "learning_rate": 1.5596416113167748e-05, + "loss": 1.5091, + "step": 55155 + }, + { + "epoch": 0.66, + "grad_norm": 6.930217317841946, + "learning_rate": 1.5595931974985032e-05, + "loss": 1.6394, + "step": 55158 + }, + { + "epoch": 0.66, + "grad_norm": 85.96481157851856, + "learning_rate": 1.559544781770541e-05, + "loss": 1.3651, + "step": 55161 + }, + { + "epoch": 0.66, + "grad_norm": 8.237722481810627, + "learning_rate": 1.5594963641330543e-05, + "loss": 1.6309, + "step": 55164 + }, + { + "epoch": 0.66, + "grad_norm": 28.21010343792607, + "learning_rate": 1.559447944586208e-05, + "loss": 1.2446, + "step": 55167 + }, + { + "epoch": 0.66, + "grad_norm": 11.808834651150864, + "learning_rate": 1.5593995231301672e-05, + "loss": 1.3182, + "step": 55170 + }, + { + "epoch": 0.66, + "grad_norm": 7.832171906639997, + "learning_rate": 1.5593510997650977e-05, + "loss": 1.4898, + "step": 55173 + }, + { + "epoch": 0.66, + "grad_norm": 2.4696511787135647, + "learning_rate": 1.5593026744911642e-05, + "loss": 1.3787, + "step": 55176 + }, + { + "epoch": 0.66, + "grad_norm": 8.190497403613936, + "learning_rate": 1.559254247308532e-05, + "loss": 1.3035, + "step": 55179 + }, + { + "epoch": 0.66, + "grad_norm": 12.827421564349407, + "learning_rate": 1.5592058182173667e-05, + "loss": 1.3282, + "step": 55182 + }, + { + "epoch": 0.66, + "grad_norm": 14.291955383417605, + "learning_rate": 1.5591573872178334e-05, + "loss": 1.3677, + "step": 55185 + }, + { + "epoch": 0.66, + "grad_norm": 33.00923762969667, + "learning_rate": 1.5591089543100972e-05, + "loss": 1.1604, + "step": 55188 + }, + { + "epoch": 0.66, + "grad_norm": 6.335971648395358, + "learning_rate": 1.5590605194943233e-05, + "loss": 1.9874, + "step": 55191 + }, + { + "epoch": 0.66, + "grad_norm": 3.575711518298814, + "learning_rate": 1.5590120827706776e-05, + "loss": 1.4379, + "step": 55194 + }, + { + "epoch": 0.66, + "grad_norm": 19.612051289508862, + "learning_rate": 1.558963644139325e-05, + "loss": 1.7067, + "step": 55197 + }, + { + "epoch": 0.66, + "grad_norm": 9.287851567950048, + "learning_rate": 1.5589152036004306e-05, + "loss": 1.3922, + "step": 55200 + }, + { + "epoch": 0.66, + "grad_norm": 67.69219573843093, + "learning_rate": 1.5588667611541605e-05, + "loss": 1.2009, + "step": 55203 + }, + { + "epoch": 0.66, + "grad_norm": 3.8278143067288672, + "learning_rate": 1.5588183168006787e-05, + "loss": 1.1372, + "step": 55206 + }, + { + "epoch": 0.66, + "grad_norm": 12.574750565376455, + "learning_rate": 1.5587698705401515e-05, + "loss": 1.6603, + "step": 55209 + }, + { + "epoch": 0.66, + "grad_norm": 5.965353915985793, + "learning_rate": 1.5587214223727443e-05, + "loss": 1.5748, + "step": 55212 + }, + { + "epoch": 0.66, + "grad_norm": 6.301237134190815, + "learning_rate": 1.558672972298622e-05, + "loss": 1.3295, + "step": 55215 + }, + { + "epoch": 0.66, + "grad_norm": 11.333170909494788, + "learning_rate": 1.5586245203179497e-05, + "loss": 1.3232, + "step": 55218 + }, + { + "epoch": 0.66, + "grad_norm": 10.328213865914394, + "learning_rate": 1.5585760664308936e-05, + "loss": 1.3621, + "step": 55221 + }, + { + "epoch": 0.66, + "grad_norm": 8.62884696314154, + "learning_rate": 1.558527610637618e-05, + "loss": 1.723, + "step": 55224 + }, + { + "epoch": 0.66, + "grad_norm": 49.49589973434854, + "learning_rate": 1.5584791529382896e-05, + "loss": 1.1718, + "step": 55227 + }, + { + "epoch": 0.66, + "grad_norm": 16.400460302318226, + "learning_rate": 1.5584306933330726e-05, + "loss": 1.4783, + "step": 55230 + }, + { + "epoch": 0.66, + "grad_norm": 10.09712241099847, + "learning_rate": 1.558382231822133e-05, + "loss": 1.3267, + "step": 55233 + }, + { + "epoch": 0.66, + "grad_norm": 105.95121092047718, + "learning_rate": 1.5583337684056356e-05, + "loss": 1.4524, + "step": 55236 + }, + { + "epoch": 0.66, + "grad_norm": 11.146781247855822, + "learning_rate": 1.5582853030837465e-05, + "loss": 1.3452, + "step": 55239 + }, + { + "epoch": 0.66, + "grad_norm": 9.927395903823024, + "learning_rate": 1.5582368358566307e-05, + "loss": 1.6486, + "step": 55242 + }, + { + "epoch": 0.66, + "grad_norm": 26.11072735976031, + "learning_rate": 1.5581883667244537e-05, + "loss": 1.3179, + "step": 55245 + }, + { + "epoch": 0.66, + "grad_norm": 10.40732624268949, + "learning_rate": 1.5581398956873806e-05, + "loss": 1.3464, + "step": 55248 + }, + { + "epoch": 0.66, + "grad_norm": 44.40108638849512, + "learning_rate": 1.558091422745577e-05, + "loss": 1.7184, + "step": 55251 + }, + { + "epoch": 0.66, + "grad_norm": 14.410728195760667, + "learning_rate": 1.558042947899209e-05, + "loss": 1.2018, + "step": 55254 + }, + { + "epoch": 0.66, + "grad_norm": 9.142517256473797, + "learning_rate": 1.5579944711484406e-05, + "loss": 1.4094, + "step": 55257 + }, + { + "epoch": 0.66, + "grad_norm": 24.51444271139081, + "learning_rate": 1.5579459924934388e-05, + "loss": 1.3109, + "step": 55260 + }, + { + "epoch": 0.66, + "grad_norm": 31.613795679004667, + "learning_rate": 1.557897511934368e-05, + "loss": 1.495, + "step": 55263 + }, + { + "epoch": 0.66, + "grad_norm": 24.181238907316214, + "learning_rate": 1.557849029471394e-05, + "loss": 1.6222, + "step": 55266 + }, + { + "epoch": 0.66, + "grad_norm": 3.1483416129181716, + "learning_rate": 1.5578005451046822e-05, + "loss": 1.6826, + "step": 55269 + }, + { + "epoch": 0.66, + "grad_norm": 82.00543626690101, + "learning_rate": 1.557752058834398e-05, + "loss": 1.3408, + "step": 55272 + }, + { + "epoch": 0.66, + "grad_norm": 18.454368572209543, + "learning_rate": 1.5577035706607067e-05, + "loss": 1.7082, + "step": 55275 + }, + { + "epoch": 0.66, + "grad_norm": 9.83465380666673, + "learning_rate": 1.5576550805837743e-05, + "loss": 1.4546, + "step": 55278 + }, + { + "epoch": 0.66, + "grad_norm": 12.659194393736149, + "learning_rate": 1.557606588603766e-05, + "loss": 1.2898, + "step": 55281 + }, + { + "epoch": 0.66, + "grad_norm": 7.853664746458353, + "learning_rate": 1.557558094720847e-05, + "loss": 1.251, + "step": 55284 + }, + { + "epoch": 0.66, + "grad_norm": 14.767330699795172, + "learning_rate": 1.5575095989351835e-05, + "loss": 1.3161, + "step": 55287 + }, + { + "epoch": 0.66, + "grad_norm": 12.321200516056784, + "learning_rate": 1.5574611012469404e-05, + "loss": 1.4197, + "step": 55290 + }, + { + "epoch": 0.66, + "grad_norm": 30.933278724017494, + "learning_rate": 1.5574126016562833e-05, + "loss": 1.5598, + "step": 55293 + }, + { + "epoch": 0.66, + "grad_norm": 7.0848851658647805, + "learning_rate": 1.5573641001633776e-05, + "loss": 1.3031, + "step": 55296 + }, + { + "epoch": 0.66, + "grad_norm": 6.478388681340239, + "learning_rate": 1.5573155967683892e-05, + "loss": 1.5627, + "step": 55299 + }, + { + "epoch": 0.66, + "grad_norm": 13.111869890388391, + "learning_rate": 1.557267091471483e-05, + "loss": 1.3336, + "step": 55302 + }, + { + "epoch": 0.67, + "grad_norm": 55.85753180633757, + "learning_rate": 1.557218584272825e-05, + "loss": 1.1805, + "step": 55305 + }, + { + "epoch": 0.67, + "grad_norm": 17.338836308728272, + "learning_rate": 1.5571700751725812e-05, + "loss": 1.4413, + "step": 55308 + }, + { + "epoch": 0.67, + "grad_norm": 13.792039606949078, + "learning_rate": 1.5571215641709164e-05, + "loss": 1.2817, + "step": 55311 + }, + { + "epoch": 0.67, + "grad_norm": 10.584340928365602, + "learning_rate": 1.5570730512679965e-05, + "loss": 1.4165, + "step": 55314 + }, + { + "epoch": 0.67, + "grad_norm": 4.178220724486989, + "learning_rate": 1.5570245364639866e-05, + "loss": 1.5141, + "step": 55317 + }, + { + "epoch": 0.67, + "grad_norm": 8.95397802686648, + "learning_rate": 1.556976019759053e-05, + "loss": 1.6054, + "step": 55320 + }, + { + "epoch": 0.67, + "grad_norm": 41.37904219938211, + "learning_rate": 1.5569275011533606e-05, + "loss": 1.1909, + "step": 55323 + }, + { + "epoch": 0.67, + "grad_norm": 5.677949059838303, + "learning_rate": 1.556878980647075e-05, + "loss": 1.2637, + "step": 55326 + }, + { + "epoch": 0.67, + "grad_norm": 50.489551163325046, + "learning_rate": 1.5568304582403624e-05, + "loss": 1.519, + "step": 55329 + }, + { + "epoch": 0.67, + "grad_norm": 19.798854683192644, + "learning_rate": 1.556781933933388e-05, + "loss": 1.306, + "step": 55332 + }, + { + "epoch": 0.67, + "grad_norm": 8.941065151601281, + "learning_rate": 1.5567334077263173e-05, + "loss": 1.4258, + "step": 55335 + }, + { + "epoch": 0.67, + "grad_norm": 8.0364564046525, + "learning_rate": 1.556684879619316e-05, + "loss": 1.494, + "step": 55338 + }, + { + "epoch": 0.67, + "grad_norm": 8.49844255549592, + "learning_rate": 1.55663634961255e-05, + "loss": 1.3624, + "step": 55341 + }, + { + "epoch": 0.67, + "grad_norm": 28.92725036087135, + "learning_rate": 1.5565878177061842e-05, + "loss": 1.3258, + "step": 55344 + }, + { + "epoch": 0.67, + "grad_norm": 28.56208806673376, + "learning_rate": 1.556539283900385e-05, + "loss": 1.4642, + "step": 55347 + }, + { + "epoch": 0.67, + "grad_norm": 3.197854997906154, + "learning_rate": 1.5564907481953175e-05, + "loss": 1.1773, + "step": 55350 + }, + { + "epoch": 0.67, + "grad_norm": 19.16897473867643, + "learning_rate": 1.5564422105911475e-05, + "loss": 1.3473, + "step": 55353 + }, + { + "epoch": 0.67, + "grad_norm": 17.386413797367556, + "learning_rate": 1.5563936710880407e-05, + "loss": 1.4677, + "step": 55356 + }, + { + "epoch": 0.67, + "grad_norm": 28.640013712047725, + "learning_rate": 1.5563451296861626e-05, + "loss": 1.5148, + "step": 55359 + }, + { + "epoch": 0.67, + "grad_norm": 14.065228125062516, + "learning_rate": 1.5562965863856794e-05, + "loss": 1.5754, + "step": 55362 + }, + { + "epoch": 0.67, + "grad_norm": 13.522150885808113, + "learning_rate": 1.556248041186756e-05, + "loss": 1.3526, + "step": 55365 + }, + { + "epoch": 0.67, + "grad_norm": 17.051204376334223, + "learning_rate": 1.5561994940895583e-05, + "loss": 1.3219, + "step": 55368 + }, + { + "epoch": 0.67, + "grad_norm": 18.089806023203167, + "learning_rate": 1.556150945094252e-05, + "loss": 1.1949, + "step": 55371 + }, + { + "epoch": 0.67, + "grad_norm": 9.237205695532047, + "learning_rate": 1.5561023942010035e-05, + "loss": 1.5202, + "step": 55374 + }, + { + "epoch": 0.67, + "grad_norm": 8.175974424957902, + "learning_rate": 1.5560538414099773e-05, + "loss": 1.4193, + "step": 55377 + }, + { + "epoch": 0.67, + "grad_norm": 9.066190664376007, + "learning_rate": 1.5560052867213394e-05, + "loss": 1.1951, + "step": 55380 + }, + { + "epoch": 0.67, + "grad_norm": 26.265093361027404, + "learning_rate": 1.5559567301352566e-05, + "loss": 1.1593, + "step": 55383 + }, + { + "epoch": 0.67, + "grad_norm": 14.11956499213569, + "learning_rate": 1.555908171651893e-05, + "loss": 1.3172, + "step": 55386 + }, + { + "epoch": 0.67, + "grad_norm": 10.664302444661095, + "learning_rate": 1.5558596112714152e-05, + "loss": 1.2548, + "step": 55389 + }, + { + "epoch": 0.67, + "grad_norm": 39.69156824477946, + "learning_rate": 1.5558110489939886e-05, + "loss": 1.1776, + "step": 55392 + }, + { + "epoch": 0.67, + "grad_norm": 9.169425298600968, + "learning_rate": 1.5557624848197794e-05, + "loss": 1.2387, + "step": 55395 + }, + { + "epoch": 0.67, + "grad_norm": 7.374706549645863, + "learning_rate": 1.555713918748953e-05, + "loss": 1.4209, + "step": 55398 + }, + { + "epoch": 0.67, + "grad_norm": 14.292338081643273, + "learning_rate": 1.555665350781675e-05, + "loss": 1.5192, + "step": 55401 + }, + { + "epoch": 0.67, + "grad_norm": 10.11546210696811, + "learning_rate": 1.5556167809181113e-05, + "loss": 1.0771, + "step": 55404 + }, + { + "epoch": 0.67, + "grad_norm": 31.456190756210972, + "learning_rate": 1.5555682091584277e-05, + "loss": 1.3415, + "step": 55407 + }, + { + "epoch": 0.67, + "grad_norm": 20.273859044205697, + "learning_rate": 1.5555196355027902e-05, + "loss": 1.3322, + "step": 55410 + }, + { + "epoch": 0.67, + "grad_norm": 62.97387317609216, + "learning_rate": 1.555471059951364e-05, + "loss": 1.2513, + "step": 55413 + }, + { + "epoch": 0.67, + "grad_norm": 23.172348655580617, + "learning_rate": 1.5554224825043152e-05, + "loss": 1.175, + "step": 55416 + }, + { + "epoch": 0.67, + "grad_norm": 2.1977184121301154, + "learning_rate": 1.5553739031618093e-05, + "loss": 1.226, + "step": 55419 + }, + { + "epoch": 0.67, + "grad_norm": 3.8702234784083123, + "learning_rate": 1.5553253219240127e-05, + "loss": 1.5852, + "step": 55422 + }, + { + "epoch": 0.67, + "grad_norm": 33.77258912373755, + "learning_rate": 1.5552767387910906e-05, + "loss": 1.4405, + "step": 55425 + }, + { + "epoch": 0.67, + "grad_norm": 29.050143071748693, + "learning_rate": 1.555228153763209e-05, + "loss": 1.3296, + "step": 55428 + }, + { + "epoch": 0.67, + "grad_norm": 35.245798402184576, + "learning_rate": 1.555179566840534e-05, + "loss": 1.4704, + "step": 55431 + }, + { + "epoch": 0.67, + "grad_norm": 15.516366036318672, + "learning_rate": 1.5551309780232306e-05, + "loss": 1.4625, + "step": 55434 + }, + { + "epoch": 0.67, + "grad_norm": 6.927351751851986, + "learning_rate": 1.5550823873114657e-05, + "loss": 1.3619, + "step": 55437 + }, + { + "epoch": 0.67, + "grad_norm": 14.18474949462099, + "learning_rate": 1.555033794705404e-05, + "loss": 1.1435, + "step": 55440 + }, + { + "epoch": 0.67, + "grad_norm": 40.5534118621014, + "learning_rate": 1.554985200205212e-05, + "loss": 1.2487, + "step": 55443 + }, + { + "epoch": 0.67, + "grad_norm": 40.97279935214381, + "learning_rate": 1.5549366038110558e-05, + "loss": 1.4556, + "step": 55446 + }, + { + "epoch": 0.67, + "grad_norm": 5.1440461049117605, + "learning_rate": 1.5548880055231005e-05, + "loss": 1.1868, + "step": 55449 + }, + { + "epoch": 0.67, + "grad_norm": 3.4589982148765235, + "learning_rate": 1.5548394053415123e-05, + "loss": 1.5366, + "step": 55452 + }, + { + "epoch": 0.67, + "grad_norm": 14.243483455033855, + "learning_rate": 1.5547908032664575e-05, + "loss": 1.3943, + "step": 55455 + }, + { + "epoch": 0.67, + "grad_norm": 26.216102279149748, + "learning_rate": 1.554742199298101e-05, + "loss": 1.4929, + "step": 55458 + }, + { + "epoch": 0.67, + "grad_norm": 19.986054822497024, + "learning_rate": 1.554693593436609e-05, + "loss": 1.3578, + "step": 55461 + }, + { + "epoch": 0.67, + "grad_norm": 7.9702917575613315, + "learning_rate": 1.5546449856821482e-05, + "loss": 2.0109, + "step": 55464 + }, + { + "epoch": 0.67, + "grad_norm": 18.13130313497751, + "learning_rate": 1.5545963760348838e-05, + "loss": 1.6381, + "step": 55467 + }, + { + "epoch": 0.67, + "grad_norm": 5.514238636117579, + "learning_rate": 1.5545477644949815e-05, + "loss": 1.333, + "step": 55470 + }, + { + "epoch": 0.67, + "grad_norm": 14.35816313101517, + "learning_rate": 1.5544991510626074e-05, + "loss": 1.5296, + "step": 55473 + }, + { + "epoch": 0.67, + "grad_norm": 14.446857069181068, + "learning_rate": 1.5544505357379277e-05, + "loss": 1.5368, + "step": 55476 + }, + { + "epoch": 0.67, + "grad_norm": 26.156162678185343, + "learning_rate": 1.5544019185211077e-05, + "loss": 1.2912, + "step": 55479 + }, + { + "epoch": 0.67, + "grad_norm": 3.446356652205472, + "learning_rate": 1.5543532994123134e-05, + "loss": 1.3745, + "step": 55482 + }, + { + "epoch": 0.67, + "grad_norm": 19.098516904729035, + "learning_rate": 1.5543046784117115e-05, + "loss": 1.278, + "step": 55485 + }, + { + "epoch": 0.67, + "grad_norm": 162.97620980168358, + "learning_rate": 1.5542560555194674e-05, + "loss": 1.3279, + "step": 55488 + }, + { + "epoch": 0.67, + "grad_norm": 27.01167784356465, + "learning_rate": 1.5542074307357466e-05, + "loss": 1.6333, + "step": 55491 + }, + { + "epoch": 0.67, + "grad_norm": 11.457205608446275, + "learning_rate": 1.5541588040607157e-05, + "loss": 1.3536, + "step": 55494 + }, + { + "epoch": 0.67, + "grad_norm": 30.194866096762336, + "learning_rate": 1.5541101754945404e-05, + "loss": 1.2793, + "step": 55497 + }, + { + "epoch": 0.67, + "grad_norm": 30.10419919610881, + "learning_rate": 1.554061545037387e-05, + "loss": 1.8907, + "step": 55500 + }, + { + "epoch": 0.67, + "grad_norm": 13.857485333455985, + "learning_rate": 1.5540129126894206e-05, + "loss": 1.1037, + "step": 55503 + }, + { + "epoch": 0.67, + "grad_norm": 24.512207840196, + "learning_rate": 1.553964278450808e-05, + "loss": 1.4124, + "step": 55506 + }, + { + "epoch": 0.67, + "grad_norm": 16.825264156650892, + "learning_rate": 1.5539156423217144e-05, + "loss": 1.237, + "step": 55509 + }, + { + "epoch": 0.67, + "grad_norm": 8.4937614580776, + "learning_rate": 1.553867004302307e-05, + "loss": 1.63, + "step": 55512 + }, + { + "epoch": 0.67, + "grad_norm": 6.208631377802635, + "learning_rate": 1.55381836439275e-05, + "loss": 1.1064, + "step": 55515 + }, + { + "epoch": 0.67, + "grad_norm": 7.683801131989973, + "learning_rate": 1.5537697225932114e-05, + "loss": 1.1434, + "step": 55518 + }, + { + "epoch": 0.67, + "grad_norm": 8.359638371378692, + "learning_rate": 1.5537210789038558e-05, + "loss": 1.3462, + "step": 55521 + }, + { + "epoch": 0.67, + "grad_norm": 6.079692836017908, + "learning_rate": 1.5536724333248496e-05, + "loss": 1.5375, + "step": 55524 + }, + { + "epoch": 0.67, + "grad_norm": 21.6113829432612, + "learning_rate": 1.5536237858563586e-05, + "loss": 1.6364, + "step": 55527 + }, + { + "epoch": 0.67, + "grad_norm": 26.050116546697023, + "learning_rate": 1.5535751364985495e-05, + "loss": 1.4679, + "step": 55530 + }, + { + "epoch": 0.67, + "grad_norm": 10.209667300215358, + "learning_rate": 1.5535264852515875e-05, + "loss": 1.4763, + "step": 55533 + }, + { + "epoch": 0.67, + "grad_norm": 8.941646357138213, + "learning_rate": 1.5534778321156393e-05, + "loss": 1.3162, + "step": 55536 + }, + { + "epoch": 0.67, + "grad_norm": 27.31479462344656, + "learning_rate": 1.55342917709087e-05, + "loss": 1.4779, + "step": 55539 + }, + { + "epoch": 0.67, + "grad_norm": 10.827423524752376, + "learning_rate": 1.5533805201774467e-05, + "loss": 1.2324, + "step": 55542 + }, + { + "epoch": 0.67, + "grad_norm": 11.375619171758162, + "learning_rate": 1.553331861375535e-05, + "loss": 1.1949, + "step": 55545 + }, + { + "epoch": 0.67, + "grad_norm": 20.86054626346899, + "learning_rate": 1.553283200685301e-05, + "loss": 1.3563, + "step": 55548 + }, + { + "epoch": 0.67, + "grad_norm": 16.724831411957283, + "learning_rate": 1.5532345381069108e-05, + "loss": 1.3224, + "step": 55551 + }, + { + "epoch": 0.67, + "grad_norm": 7.353767672987892, + "learning_rate": 1.5531858736405302e-05, + "loss": 1.6367, + "step": 55554 + }, + { + "epoch": 0.67, + "grad_norm": 23.051857127426103, + "learning_rate": 1.5531372072863257e-05, + "loss": 1.2609, + "step": 55557 + }, + { + "epoch": 0.67, + "grad_norm": 13.685031777370867, + "learning_rate": 1.5530885390444627e-05, + "loss": 1.3268, + "step": 55560 + }, + { + "epoch": 0.67, + "grad_norm": 7.638874916602004, + "learning_rate": 1.5530398689151082e-05, + "loss": 1.3973, + "step": 55563 + }, + { + "epoch": 0.67, + "grad_norm": 59.25317098262042, + "learning_rate": 1.5529911968984274e-05, + "loss": 1.2655, + "step": 55566 + }, + { + "epoch": 0.67, + "grad_norm": 8.255096853034184, + "learning_rate": 1.5529425229945873e-05, + "loss": 1.7554, + "step": 55569 + }, + { + "epoch": 0.67, + "grad_norm": 2.7695575566850565, + "learning_rate": 1.5528938472037533e-05, + "loss": 1.5, + "step": 55572 + }, + { + "epoch": 0.67, + "grad_norm": 8.376713932472953, + "learning_rate": 1.552845169526092e-05, + "loss": 1.5125, + "step": 55575 + }, + { + "epoch": 0.67, + "grad_norm": 4.3234699422403615, + "learning_rate": 1.552796489961769e-05, + "loss": 1.5541, + "step": 55578 + }, + { + "epoch": 0.67, + "grad_norm": 57.56183566313263, + "learning_rate": 1.5527478085109508e-05, + "loss": 1.7406, + "step": 55581 + }, + { + "epoch": 0.67, + "grad_norm": 14.551488011944237, + "learning_rate": 1.5526991251738034e-05, + "loss": 1.6428, + "step": 55584 + }, + { + "epoch": 0.67, + "grad_norm": 10.858466054351775, + "learning_rate": 1.5526504399504928e-05, + "loss": 1.3003, + "step": 55587 + }, + { + "epoch": 0.67, + "grad_norm": 6.8089474155383245, + "learning_rate": 1.5526017528411858e-05, + "loss": 1.2992, + "step": 55590 + }, + { + "epoch": 0.67, + "grad_norm": 13.872077447864614, + "learning_rate": 1.5525530638460474e-05, + "loss": 1.3842, + "step": 55593 + }, + { + "epoch": 0.67, + "grad_norm": 9.48699781229329, + "learning_rate": 1.552504372965245e-05, + "loss": 1.2424, + "step": 55596 + }, + { + "epoch": 0.67, + "grad_norm": 11.016303946070307, + "learning_rate": 1.5524556801989436e-05, + "loss": 1.0366, + "step": 55599 + }, + { + "epoch": 0.67, + "grad_norm": 10.775763104809357, + "learning_rate": 1.5524069855473103e-05, + "loss": 1.6704, + "step": 55602 + }, + { + "epoch": 0.67, + "grad_norm": 19.5860604784167, + "learning_rate": 1.552358289010511e-05, + "loss": 1.1545, + "step": 55605 + }, + { + "epoch": 0.67, + "grad_norm": 65.28776203977421, + "learning_rate": 1.5523095905887114e-05, + "loss": 1.4875, + "step": 55608 + }, + { + "epoch": 0.67, + "grad_norm": 42.9388570415909, + "learning_rate": 1.5522608902820788e-05, + "loss": 1.4336, + "step": 55611 + }, + { + "epoch": 0.67, + "grad_norm": 11.128555785122863, + "learning_rate": 1.5522121880907783e-05, + "loss": 0.9668, + "step": 55614 + }, + { + "epoch": 0.67, + "grad_norm": 29.902186548657205, + "learning_rate": 1.5521634840149765e-05, + "loss": 1.4796, + "step": 55617 + }, + { + "epoch": 0.67, + "grad_norm": 5.185176832575304, + "learning_rate": 1.5521147780548398e-05, + "loss": 1.4579, + "step": 55620 + }, + { + "epoch": 0.67, + "grad_norm": 20.022086251832377, + "learning_rate": 1.5520660702105338e-05, + "loss": 0.9977, + "step": 55623 + }, + { + "epoch": 0.67, + "grad_norm": 8.660659966515912, + "learning_rate": 1.5520173604822257e-05, + "loss": 1.6798, + "step": 55626 + }, + { + "epoch": 0.67, + "grad_norm": 21.015492471098067, + "learning_rate": 1.551968648870081e-05, + "loss": 1.5135, + "step": 55629 + }, + { + "epoch": 0.67, + "grad_norm": 14.172595762678492, + "learning_rate": 1.551919935374266e-05, + "loss": 1.2417, + "step": 55632 + }, + { + "epoch": 0.67, + "grad_norm": 17.898149948578638, + "learning_rate": 1.551871219994947e-05, + "loss": 1.5553, + "step": 55635 + }, + { + "epoch": 0.67, + "grad_norm": 8.157300726946781, + "learning_rate": 1.5518225027322906e-05, + "loss": 1.2921, + "step": 55638 + }, + { + "epoch": 0.67, + "grad_norm": 32.226283431949525, + "learning_rate": 1.5517737835864625e-05, + "loss": 1.3047, + "step": 55641 + }, + { + "epoch": 0.67, + "grad_norm": 16.24622448180934, + "learning_rate": 1.5517250625576293e-05, + "loss": 1.5202, + "step": 55644 + }, + { + "epoch": 0.67, + "grad_norm": 12.836003729191653, + "learning_rate": 1.5516763396459574e-05, + "loss": 1.4147, + "step": 55647 + }, + { + "epoch": 0.67, + "grad_norm": 10.737825684869367, + "learning_rate": 1.5516276148516128e-05, + "loss": 1.3466, + "step": 55650 + }, + { + "epoch": 0.67, + "grad_norm": 51.27204780980419, + "learning_rate": 1.5515788881747615e-05, + "loss": 1.3964, + "step": 55653 + }, + { + "epoch": 0.67, + "grad_norm": 46.20096950043595, + "learning_rate": 1.5515301596155703e-05, + "loss": 1.5485, + "step": 55656 + }, + { + "epoch": 0.67, + "grad_norm": 15.638143849197782, + "learning_rate": 1.5514814291742055e-05, + "loss": 1.7743, + "step": 55659 + }, + { + "epoch": 0.67, + "grad_norm": 11.101384558812892, + "learning_rate": 1.551432696850833e-05, + "loss": 1.6915, + "step": 55662 + }, + { + "epoch": 0.67, + "grad_norm": 16.45965356615874, + "learning_rate": 1.5513839626456197e-05, + "loss": 0.961, + "step": 55665 + }, + { + "epoch": 0.67, + "grad_norm": 18.60854004815617, + "learning_rate": 1.551335226558731e-05, + "loss": 1.158, + "step": 55668 + }, + { + "epoch": 0.67, + "grad_norm": 5.413974345759687, + "learning_rate": 1.5512864885903343e-05, + "loss": 1.4382, + "step": 55671 + }, + { + "epoch": 0.67, + "grad_norm": 8.03737510342184, + "learning_rate": 1.551237748740595e-05, + "loss": 1.5355, + "step": 55674 + }, + { + "epoch": 0.67, + "grad_norm": 22.298459474306892, + "learning_rate": 1.55118900700968e-05, + "loss": 1.5616, + "step": 55677 + }, + { + "epoch": 0.67, + "grad_norm": 15.496895262394728, + "learning_rate": 1.5511402633977555e-05, + "loss": 1.5088, + "step": 55680 + }, + { + "epoch": 0.67, + "grad_norm": 15.027065247485243, + "learning_rate": 1.5510915179049875e-05, + "loss": 1.5077, + "step": 55683 + }, + { + "epoch": 0.67, + "grad_norm": 12.733636836810279, + "learning_rate": 1.5510427705315428e-05, + "loss": 1.4248, + "step": 55686 + }, + { + "epoch": 0.67, + "grad_norm": 47.739525057976905, + "learning_rate": 1.550994021277588e-05, + "loss": 1.706, + "step": 55689 + }, + { + "epoch": 0.67, + "grad_norm": 19.897580115868852, + "learning_rate": 1.5509452701432886e-05, + "loss": 1.2853, + "step": 55692 + }, + { + "epoch": 0.67, + "grad_norm": 12.77000879296262, + "learning_rate": 1.550896517128812e-05, + "loss": 1.2906, + "step": 55695 + }, + { + "epoch": 0.67, + "grad_norm": 8.418438781169641, + "learning_rate": 1.5508477622343234e-05, + "loss": 1.4795, + "step": 55698 + }, + { + "epoch": 0.67, + "grad_norm": 17.92942967046277, + "learning_rate": 1.55079900545999e-05, + "loss": 1.1412, + "step": 55701 + }, + { + "epoch": 0.67, + "grad_norm": 31.186813803320664, + "learning_rate": 1.5507502468059777e-05, + "loss": 1.3391, + "step": 55704 + }, + { + "epoch": 0.67, + "grad_norm": 5.954093211486461, + "learning_rate": 1.5507014862724535e-05, + "loss": 1.0017, + "step": 55707 + }, + { + "epoch": 0.67, + "grad_norm": 22.119165137142634, + "learning_rate": 1.5506527238595834e-05, + "loss": 1.1694, + "step": 55710 + }, + { + "epoch": 0.67, + "grad_norm": 13.479051311981678, + "learning_rate": 1.550603959567534e-05, + "loss": 1.1638, + "step": 55713 + }, + { + "epoch": 0.67, + "grad_norm": 11.913955283843071, + "learning_rate": 1.5505551933964717e-05, + "loss": 1.5186, + "step": 55716 + }, + { + "epoch": 0.67, + "grad_norm": 30.394317293105782, + "learning_rate": 1.550506425346563e-05, + "loss": 1.3092, + "step": 55719 + }, + { + "epoch": 0.67, + "grad_norm": 32.99944091705021, + "learning_rate": 1.5504576554179737e-05, + "loss": 1.4713, + "step": 55722 + }, + { + "epoch": 0.67, + "grad_norm": 22.441687696891172, + "learning_rate": 1.550408883610871e-05, + "loss": 1.3107, + "step": 55725 + }, + { + "epoch": 0.67, + "grad_norm": 21.34220181177431, + "learning_rate": 1.5503601099254207e-05, + "loss": 1.281, + "step": 55728 + }, + { + "epoch": 0.67, + "grad_norm": 27.99576455198888, + "learning_rate": 1.55031133436179e-05, + "loss": 1.5224, + "step": 55731 + }, + { + "epoch": 0.67, + "grad_norm": 28.444559432592843, + "learning_rate": 1.5502625569201446e-05, + "loss": 1.3497, + "step": 55734 + }, + { + "epoch": 0.67, + "grad_norm": 13.441733721801198, + "learning_rate": 1.5502137776006514e-05, + "loss": 1.4694, + "step": 55737 + }, + { + "epoch": 0.67, + "grad_norm": 13.65892547880514, + "learning_rate": 1.550164996403477e-05, + "loss": 1.4419, + "step": 55740 + }, + { + "epoch": 0.67, + "grad_norm": 8.512982466207227, + "learning_rate": 1.5501162133287874e-05, + "loss": 1.0302, + "step": 55743 + }, + { + "epoch": 0.67, + "grad_norm": 14.222393200354801, + "learning_rate": 1.5500674283767494e-05, + "loss": 1.6439, + "step": 55746 + }, + { + "epoch": 0.67, + "grad_norm": 10.358292035800083, + "learning_rate": 1.550018641547529e-05, + "loss": 1.7548, + "step": 55749 + }, + { + "epoch": 0.67, + "grad_norm": 11.44334301224182, + "learning_rate": 1.5499698528412937e-05, + "loss": 1.6436, + "step": 55752 + }, + { + "epoch": 0.67, + "grad_norm": 24.38535901491195, + "learning_rate": 1.549921062258209e-05, + "loss": 1.4147, + "step": 55755 + }, + { + "epoch": 0.67, + "grad_norm": 17.32944925810238, + "learning_rate": 1.549872269798442e-05, + "loss": 1.3323, + "step": 55758 + }, + { + "epoch": 0.67, + "grad_norm": 4.089976070516345, + "learning_rate": 1.549823475462159e-05, + "loss": 1.1833, + "step": 55761 + }, + { + "epoch": 0.67, + "grad_norm": 10.937048282953636, + "learning_rate": 1.5497746792495262e-05, + "loss": 1.7571, + "step": 55764 + }, + { + "epoch": 0.67, + "grad_norm": 14.225193403321734, + "learning_rate": 1.549725881160711e-05, + "loss": 1.5724, + "step": 55767 + }, + { + "epoch": 0.67, + "grad_norm": 6.524411042217403, + "learning_rate": 1.549677081195879e-05, + "loss": 1.4295, + "step": 55770 + }, + { + "epoch": 0.67, + "grad_norm": 21.46899923280688, + "learning_rate": 1.5496282793551975e-05, + "loss": 1.664, + "step": 55773 + }, + { + "epoch": 0.67, + "grad_norm": 4.023879332131031, + "learning_rate": 1.549579475638832e-05, + "loss": 1.6017, + "step": 55776 + }, + { + "epoch": 0.67, + "grad_norm": 7.575567863292787, + "learning_rate": 1.5495306700469505e-05, + "loss": 1.277, + "step": 55779 + }, + { + "epoch": 0.67, + "grad_norm": 21.882631152653833, + "learning_rate": 1.549481862579718e-05, + "loss": 1.1878, + "step": 55782 + }, + { + "epoch": 0.67, + "grad_norm": 67.78044417067845, + "learning_rate": 1.549433053237302e-05, + "loss": 1.3264, + "step": 55785 + }, + { + "epoch": 0.67, + "grad_norm": 27.65859272490245, + "learning_rate": 1.5493842420198692e-05, + "loss": 1.3723, + "step": 55788 + }, + { + "epoch": 0.67, + "grad_norm": 10.78097367321134, + "learning_rate": 1.5493354289275857e-05, + "loss": 1.326, + "step": 55791 + }, + { + "epoch": 0.67, + "grad_norm": 22.98933219266712, + "learning_rate": 1.5492866139606184e-05, + "loss": 1.5233, + "step": 55794 + }, + { + "epoch": 0.67, + "grad_norm": 12.964227864208626, + "learning_rate": 1.5492377971191333e-05, + "loss": 1.6016, + "step": 55797 + }, + { + "epoch": 0.67, + "grad_norm": 6.526944304415941, + "learning_rate": 1.549188978403298e-05, + "loss": 1.6015, + "step": 55800 + }, + { + "epoch": 0.67, + "grad_norm": 5.871552357800129, + "learning_rate": 1.5491401578132782e-05, + "loss": 1.7291, + "step": 55803 + }, + { + "epoch": 0.67, + "grad_norm": 43.16091398345591, + "learning_rate": 1.549091335349241e-05, + "loss": 1.1788, + "step": 55806 + }, + { + "epoch": 0.67, + "grad_norm": 9.492426454431811, + "learning_rate": 1.5490425110113526e-05, + "loss": 1.3809, + "step": 55809 + }, + { + "epoch": 0.67, + "grad_norm": 39.69556345962196, + "learning_rate": 1.54899368479978e-05, + "loss": 1.2721, + "step": 55812 + }, + { + "epoch": 0.67, + "grad_norm": 18.696374216787092, + "learning_rate": 1.5489448567146898e-05, + "loss": 1.5407, + "step": 55815 + }, + { + "epoch": 0.67, + "grad_norm": 16.76599543330243, + "learning_rate": 1.5488960267562486e-05, + "loss": 1.3663, + "step": 55818 + }, + { + "epoch": 0.67, + "grad_norm": 15.596749466555396, + "learning_rate": 1.5488471949246226e-05, + "loss": 1.3352, + "step": 55821 + }, + { + "epoch": 0.67, + "grad_norm": 9.604821547102345, + "learning_rate": 1.548798361219979e-05, + "loss": 1.3321, + "step": 55824 + }, + { + "epoch": 0.67, + "grad_norm": 9.942904566895324, + "learning_rate": 1.5487495256424846e-05, + "loss": 1.4255, + "step": 55827 + }, + { + "epoch": 0.67, + "grad_norm": 9.183668691720998, + "learning_rate": 1.5487006881923053e-05, + "loss": 1.361, + "step": 55830 + }, + { + "epoch": 0.67, + "grad_norm": 5.2912070740338, + "learning_rate": 1.5486518488696087e-05, + "loss": 1.3973, + "step": 55833 + }, + { + "epoch": 0.67, + "grad_norm": 8.500606623260573, + "learning_rate": 1.5486030076745602e-05, + "loss": 1.3579, + "step": 55836 + }, + { + "epoch": 0.67, + "grad_norm": 9.822754017863964, + "learning_rate": 1.5485541646073278e-05, + "loss": 1.5431, + "step": 55839 + }, + { + "epoch": 0.67, + "grad_norm": 7.400273152551597, + "learning_rate": 1.5485053196680776e-05, + "loss": 1.5801, + "step": 55842 + }, + { + "epoch": 0.67, + "grad_norm": 41.92750117534436, + "learning_rate": 1.548456472856976e-05, + "loss": 1.6554, + "step": 55845 + }, + { + "epoch": 0.67, + "grad_norm": 21.878873727552804, + "learning_rate": 1.5484076241741903e-05, + "loss": 1.4047, + "step": 55848 + }, + { + "epoch": 0.67, + "grad_norm": 10.448205391103127, + "learning_rate": 1.548358773619887e-05, + "loss": 1.5406, + "step": 55851 + }, + { + "epoch": 0.67, + "grad_norm": 25.149741469149188, + "learning_rate": 1.5483099211942322e-05, + "loss": 1.1528, + "step": 55854 + }, + { + "epoch": 0.67, + "grad_norm": 17.972260500756907, + "learning_rate": 1.5482610668973937e-05, + "loss": 1.2916, + "step": 55857 + }, + { + "epoch": 0.67, + "grad_norm": 5.186770432793296, + "learning_rate": 1.5482122107295373e-05, + "loss": 1.6704, + "step": 55860 + }, + { + "epoch": 0.67, + "grad_norm": 30.940636490220214, + "learning_rate": 1.54816335269083e-05, + "loss": 1.1236, + "step": 55863 + }, + { + "epoch": 0.67, + "grad_norm": 2.489893306457213, + "learning_rate": 1.548114492781439e-05, + "loss": 1.5582, + "step": 55866 + }, + { + "epoch": 0.67, + "grad_norm": 25.938135434722174, + "learning_rate": 1.5480656310015308e-05, + "loss": 1.646, + "step": 55869 + }, + { + "epoch": 0.67, + "grad_norm": 15.220393560063064, + "learning_rate": 1.5480167673512716e-05, + "loss": 1.9506, + "step": 55872 + }, + { + "epoch": 0.67, + "grad_norm": 11.034914164025626, + "learning_rate": 1.5479679018308288e-05, + "loss": 1.4936, + "step": 55875 + }, + { + "epoch": 0.67, + "grad_norm": 9.748578712924923, + "learning_rate": 1.5479190344403686e-05, + "loss": 1.6224, + "step": 55878 + }, + { + "epoch": 0.67, + "grad_norm": 6.456369925950945, + "learning_rate": 1.547870165180059e-05, + "loss": 1.4255, + "step": 55881 + }, + { + "epoch": 0.67, + "grad_norm": 37.216170089456796, + "learning_rate": 1.5478212940500648e-05, + "loss": 1.2989, + "step": 55884 + }, + { + "epoch": 0.67, + "grad_norm": 8.834323542666217, + "learning_rate": 1.547772421050554e-05, + "loss": 1.4213, + "step": 55887 + }, + { + "epoch": 0.67, + "grad_norm": 18.804420865470952, + "learning_rate": 1.5477235461816936e-05, + "loss": 1.3873, + "step": 55890 + }, + { + "epoch": 0.67, + "grad_norm": 19.257125141121968, + "learning_rate": 1.54767466944365e-05, + "loss": 1.3524, + "step": 55893 + }, + { + "epoch": 0.67, + "grad_norm": 13.21538479466986, + "learning_rate": 1.54762579083659e-05, + "loss": 1.6405, + "step": 55896 + }, + { + "epoch": 0.67, + "grad_norm": 13.004196685059545, + "learning_rate": 1.54757691036068e-05, + "loss": 1.4602, + "step": 55899 + }, + { + "epoch": 0.67, + "grad_norm": 19.829854732002342, + "learning_rate": 1.547528028016088e-05, + "loss": 1.3974, + "step": 55902 + }, + { + "epoch": 0.67, + "grad_norm": 20.31779228474073, + "learning_rate": 1.5474791438029795e-05, + "loss": 0.8965, + "step": 55905 + }, + { + "epoch": 0.67, + "grad_norm": 9.17015463490803, + "learning_rate": 1.547430257721522e-05, + "loss": 1.3954, + "step": 55908 + }, + { + "epoch": 0.67, + "grad_norm": 10.331866684855436, + "learning_rate": 1.5473813697718823e-05, + "loss": 1.43, + "step": 55911 + }, + { + "epoch": 0.67, + "grad_norm": 2.1913289214765728, + "learning_rate": 1.5473324799542272e-05, + "loss": 1.3184, + "step": 55914 + }, + { + "epoch": 0.67, + "grad_norm": 23.214970956264438, + "learning_rate": 1.5472835882687235e-05, + "loss": 1.4798, + "step": 55917 + }, + { + "epoch": 0.67, + "grad_norm": 20.64722115961253, + "learning_rate": 1.5472346947155376e-05, + "loss": 1.2126, + "step": 55920 + }, + { + "epoch": 0.67, + "grad_norm": 7.204231020535956, + "learning_rate": 1.5471857992948375e-05, + "loss": 1.6474, + "step": 55923 + }, + { + "epoch": 0.67, + "grad_norm": 21.140625310914523, + "learning_rate": 1.547136902006789e-05, + "loss": 1.3855, + "step": 55926 + }, + { + "epoch": 0.67, + "grad_norm": 15.919890657441371, + "learning_rate": 1.547088002851559e-05, + "loss": 1.4313, + "step": 55929 + }, + { + "epoch": 0.67, + "grad_norm": 14.722954789118852, + "learning_rate": 1.5470391018293153e-05, + "loss": 1.3886, + "step": 55932 + }, + { + "epoch": 0.67, + "grad_norm": 18.657713652158314, + "learning_rate": 1.546990198940224e-05, + "loss": 1.44, + "step": 55935 + }, + { + "epoch": 0.67, + "grad_norm": 28.020087976448753, + "learning_rate": 1.546941294184452e-05, + "loss": 1.5225, + "step": 55938 + }, + { + "epoch": 0.67, + "grad_norm": 26.420435345953525, + "learning_rate": 1.5468923875621668e-05, + "loss": 1.3908, + "step": 55941 + }, + { + "epoch": 0.67, + "grad_norm": 32.05076517890863, + "learning_rate": 1.546843479073534e-05, + "loss": 1.5221, + "step": 55944 + }, + { + "epoch": 0.67, + "grad_norm": 8.812264623756803, + "learning_rate": 1.5467945687187223e-05, + "loss": 1.4256, + "step": 55947 + }, + { + "epoch": 0.67, + "grad_norm": 20.849533975273037, + "learning_rate": 1.5467456564978972e-05, + "loss": 1.2345, + "step": 55950 + }, + { + "epoch": 0.67, + "grad_norm": 19.14846522216462, + "learning_rate": 1.5466967424112262e-05, + "loss": 1.5577, + "step": 55953 + }, + { + "epoch": 0.67, + "grad_norm": 5.718419108357059, + "learning_rate": 1.546647826458876e-05, + "loss": 1.3182, + "step": 55956 + }, + { + "epoch": 0.67, + "grad_norm": 4.804660317630728, + "learning_rate": 1.546598908641014e-05, + "loss": 1.3654, + "step": 55959 + }, + { + "epoch": 0.67, + "grad_norm": 18.087309585318735, + "learning_rate": 1.5465499889578067e-05, + "loss": 1.1336, + "step": 55962 + }, + { + "epoch": 0.67, + "grad_norm": 7.923028892370484, + "learning_rate": 1.546501067409421e-05, + "loss": 1.0488, + "step": 55965 + }, + { + "epoch": 0.67, + "grad_norm": 23.736158335916844, + "learning_rate": 1.546452143996024e-05, + "loss": 1.3443, + "step": 55968 + }, + { + "epoch": 0.67, + "grad_norm": 16.490560894097364, + "learning_rate": 1.546403218717783e-05, + "loss": 1.4466, + "step": 55971 + }, + { + "epoch": 0.67, + "grad_norm": 5.27512704232435, + "learning_rate": 1.5463542915748644e-05, + "loss": 1.1115, + "step": 55974 + }, + { + "epoch": 0.67, + "grad_norm": 14.658407509043478, + "learning_rate": 1.5463053625674353e-05, + "loss": 1.2828, + "step": 55977 + }, + { + "epoch": 0.67, + "grad_norm": 7.784924597602407, + "learning_rate": 1.546256431695663e-05, + "loss": 1.3903, + "step": 55980 + }, + { + "epoch": 0.67, + "grad_norm": 21.29719877663414, + "learning_rate": 1.546207498959714e-05, + "loss": 1.408, + "step": 55983 + }, + { + "epoch": 0.67, + "grad_norm": 5.939222941856865, + "learning_rate": 1.5461585643597558e-05, + "loss": 1.2538, + "step": 55986 + }, + { + "epoch": 0.67, + "grad_norm": 5.885916076908849, + "learning_rate": 1.546109627895955e-05, + "loss": 1.3146, + "step": 55989 + }, + { + "epoch": 0.67, + "grad_norm": 10.683070494900464, + "learning_rate": 1.5460606895684787e-05, + "loss": 1.219, + "step": 55992 + }, + { + "epoch": 0.67, + "grad_norm": 24.542303949419775, + "learning_rate": 1.546011749377494e-05, + "loss": 1.3753, + "step": 55995 + }, + { + "epoch": 0.67, + "grad_norm": 20.022632311525694, + "learning_rate": 1.5459628073231677e-05, + "loss": 1.3206, + "step": 55998 + }, + { + "epoch": 0.67, + "grad_norm": 18.000883672260017, + "learning_rate": 1.5459138634056673e-05, + "loss": 1.2864, + "step": 56001 + }, + { + "epoch": 0.67, + "grad_norm": 6.789269118116153, + "learning_rate": 1.5458649176251594e-05, + "loss": 1.1503, + "step": 56004 + }, + { + "epoch": 0.67, + "grad_norm": 9.252843542082136, + "learning_rate": 1.545815969981811e-05, + "loss": 1.1763, + "step": 56007 + }, + { + "epoch": 0.67, + "grad_norm": 23.497365760467574, + "learning_rate": 1.5457670204757896e-05, + "loss": 1.0656, + "step": 56010 + }, + { + "epoch": 0.67, + "grad_norm": 19.2465760389667, + "learning_rate": 1.545718069107262e-05, + "loss": 1.4792, + "step": 56013 + }, + { + "epoch": 0.67, + "grad_norm": 4.610395424616648, + "learning_rate": 1.545669115876395e-05, + "loss": 1.4888, + "step": 56016 + }, + { + "epoch": 0.67, + "grad_norm": 19.021808111509362, + "learning_rate": 1.545620160783356e-05, + "loss": 1.5197, + "step": 56019 + }, + { + "epoch": 0.67, + "grad_norm": 14.320128597592063, + "learning_rate": 1.5455712038283116e-05, + "loss": 1.6778, + "step": 56022 + }, + { + "epoch": 0.67, + "grad_norm": 8.385026076729051, + "learning_rate": 1.5455222450114295e-05, + "loss": 0.9726, + "step": 56025 + }, + { + "epoch": 0.67, + "grad_norm": 5.190080994558839, + "learning_rate": 1.5454732843328764e-05, + "loss": 1.7085, + "step": 56028 + }, + { + "epoch": 0.67, + "grad_norm": 7.636387613594286, + "learning_rate": 1.5454243217928194e-05, + "loss": 1.3246, + "step": 56031 + }, + { + "epoch": 0.67, + "grad_norm": 3.8254355047858812, + "learning_rate": 1.5453753573914257e-05, + "loss": 2.0575, + "step": 56034 + }, + { + "epoch": 0.67, + "grad_norm": 15.464679899766317, + "learning_rate": 1.545326391128862e-05, + "loss": 1.4404, + "step": 56037 + }, + { + "epoch": 0.67, + "grad_norm": 7.710307779176069, + "learning_rate": 1.5452774230052964e-05, + "loss": 1.4859, + "step": 56040 + }, + { + "epoch": 0.67, + "grad_norm": 6.568803466397471, + "learning_rate": 1.545228453020895e-05, + "loss": 1.541, + "step": 56043 + }, + { + "epoch": 0.67, + "grad_norm": 18.323881663605725, + "learning_rate": 1.545179481175825e-05, + "loss": 1.3456, + "step": 56046 + }, + { + "epoch": 0.67, + "grad_norm": 6.058943036592507, + "learning_rate": 1.5451305074702545e-05, + "loss": 1.8101, + "step": 56049 + }, + { + "epoch": 0.67, + "grad_norm": 7.232939089419904, + "learning_rate": 1.5450815319043497e-05, + "loss": 1.0236, + "step": 56052 + }, + { + "epoch": 0.67, + "grad_norm": 9.125080074994196, + "learning_rate": 1.5450325544782777e-05, + "loss": 1.4633, + "step": 56055 + }, + { + "epoch": 0.67, + "grad_norm": 13.099050520129012, + "learning_rate": 1.544983575192206e-05, + "loss": 1.3785, + "step": 56058 + }, + { + "epoch": 0.67, + "grad_norm": 14.084049726314086, + "learning_rate": 1.5449345940463017e-05, + "loss": 1.2358, + "step": 56061 + }, + { + "epoch": 0.67, + "grad_norm": 18.258819796933878, + "learning_rate": 1.5448856110407316e-05, + "loss": 1.4635, + "step": 56064 + }, + { + "epoch": 0.67, + "grad_norm": 5.795153052552452, + "learning_rate": 1.5448366261756636e-05, + "loss": 1.4613, + "step": 56067 + }, + { + "epoch": 0.67, + "grad_norm": 50.26239578698112, + "learning_rate": 1.5447876394512645e-05, + "loss": 1.8451, + "step": 56070 + }, + { + "epoch": 0.67, + "grad_norm": 7.2424822162044515, + "learning_rate": 1.5447386508677013e-05, + "loss": 1.6856, + "step": 56073 + }, + { + "epoch": 0.67, + "grad_norm": 11.480639060663245, + "learning_rate": 1.5446896604251412e-05, + "loss": 1.3897, + "step": 56076 + }, + { + "epoch": 0.67, + "grad_norm": 10.889341126481419, + "learning_rate": 1.5446406681237515e-05, + "loss": 1.9164, + "step": 56079 + }, + { + "epoch": 0.67, + "grad_norm": 7.905134535293093, + "learning_rate": 1.5445916739636994e-05, + "loss": 1.4092, + "step": 56082 + }, + { + "epoch": 0.67, + "grad_norm": 8.555629569906586, + "learning_rate": 1.544542677945152e-05, + "loss": 1.032, + "step": 56085 + }, + { + "epoch": 0.67, + "grad_norm": 12.648044193298235, + "learning_rate": 1.5444936800682766e-05, + "loss": 1.4593, + "step": 56088 + }, + { + "epoch": 0.67, + "grad_norm": 13.803846576905057, + "learning_rate": 1.5444446803332403e-05, + "loss": 1.4075, + "step": 56091 + }, + { + "epoch": 0.67, + "grad_norm": 10.059658426347625, + "learning_rate": 1.544395678740211e-05, + "loss": 1.6007, + "step": 56094 + }, + { + "epoch": 0.67, + "grad_norm": 17.83283756196059, + "learning_rate": 1.544346675289355e-05, + "loss": 1.1036, + "step": 56097 + }, + { + "epoch": 0.67, + "grad_norm": 6.699533940790826, + "learning_rate": 1.5442976699808398e-05, + "loss": 1.2054, + "step": 56100 + }, + { + "epoch": 0.67, + "grad_norm": 8.79276125920436, + "learning_rate": 1.5442486628148326e-05, + "loss": 1.7171, + "step": 56103 + }, + { + "epoch": 0.67, + "grad_norm": 18.808988285632125, + "learning_rate": 1.5441996537915007e-05, + "loss": 0.9653, + "step": 56106 + }, + { + "epoch": 0.67, + "grad_norm": 8.0573321266642, + "learning_rate": 1.5441506429110114e-05, + "loss": 1.2147, + "step": 56109 + }, + { + "epoch": 0.67, + "grad_norm": 9.050095001387165, + "learning_rate": 1.5441016301735322e-05, + "loss": 1.1347, + "step": 56112 + }, + { + "epoch": 0.67, + "grad_norm": 10.471450929692521, + "learning_rate": 1.54405261557923e-05, + "loss": 1.3098, + "step": 56115 + }, + { + "epoch": 0.67, + "grad_norm": 10.185040179197143, + "learning_rate": 1.5440035991282726e-05, + "loss": 1.6357, + "step": 56118 + }, + { + "epoch": 0.67, + "grad_norm": 25.32231057733267, + "learning_rate": 1.543954580820826e-05, + "loss": 1.3927, + "step": 56121 + }, + { + "epoch": 0.67, + "grad_norm": 14.24473301668214, + "learning_rate": 1.5439055606570593e-05, + "loss": 1.4527, + "step": 56124 + }, + { + "epoch": 0.67, + "grad_norm": 34.13283553565406, + "learning_rate": 1.543856538637138e-05, + "loss": 1.6017, + "step": 56127 + }, + { + "epoch": 0.67, + "grad_norm": 8.51643944187438, + "learning_rate": 1.543807514761231e-05, + "loss": 1.2315, + "step": 56130 + }, + { + "epoch": 0.67, + "grad_norm": 18.75508970819584, + "learning_rate": 1.5437584890295042e-05, + "loss": 1.0893, + "step": 56133 + }, + { + "epoch": 0.68, + "grad_norm": 12.291710747387011, + "learning_rate": 1.5437094614421256e-05, + "loss": 1.1291, + "step": 56136 + }, + { + "epoch": 0.68, + "grad_norm": 7.701389151653622, + "learning_rate": 1.5436604319992627e-05, + "loss": 2.1091, + "step": 56139 + }, + { + "epoch": 0.68, + "grad_norm": 19.514400742002568, + "learning_rate": 1.5436114007010825e-05, + "loss": 1.2281, + "step": 56142 + }, + { + "epoch": 0.68, + "grad_norm": 10.114601609367726, + "learning_rate": 1.5435623675477525e-05, + "loss": 1.4128, + "step": 56145 + }, + { + "epoch": 0.68, + "grad_norm": 16.998513374280797, + "learning_rate": 1.5435133325394396e-05, + "loss": 1.3669, + "step": 56148 + }, + { + "epoch": 0.68, + "grad_norm": 14.561038719981573, + "learning_rate": 1.5434642956763116e-05, + "loss": 1.7031, + "step": 56151 + }, + { + "epoch": 0.68, + "grad_norm": 24.38069031339385, + "learning_rate": 1.5434152569585358e-05, + "loss": 1.553, + "step": 56154 + }, + { + "epoch": 0.68, + "grad_norm": 11.793878773776592, + "learning_rate": 1.5433662163862796e-05, + "loss": 1.6136, + "step": 56157 + }, + { + "epoch": 0.68, + "grad_norm": 3.048839221672037, + "learning_rate": 1.54331717395971e-05, + "loss": 1.6175, + "step": 56160 + }, + { + "epoch": 0.68, + "grad_norm": 41.226179644208834, + "learning_rate": 1.5432681296789947e-05, + "loss": 1.1572, + "step": 56163 + }, + { + "epoch": 0.68, + "grad_norm": 31.060516287617613, + "learning_rate": 1.543219083544301e-05, + "loss": 1.6202, + "step": 56166 + }, + { + "epoch": 0.68, + "grad_norm": 13.684952075434351, + "learning_rate": 1.543170035555796e-05, + "loss": 1.6085, + "step": 56169 + }, + { + "epoch": 0.68, + "grad_norm": 12.750040472566099, + "learning_rate": 1.543120985713648e-05, + "loss": 1.6742, + "step": 56172 + }, + { + "epoch": 0.68, + "grad_norm": 25.07219833036899, + "learning_rate": 1.543071934018023e-05, + "loss": 1.6484, + "step": 56175 + }, + { + "epoch": 0.68, + "grad_norm": 11.756032001824478, + "learning_rate": 1.543022880469089e-05, + "loss": 1.1015, + "step": 56178 + }, + { + "epoch": 0.68, + "grad_norm": 5.2074177663231875, + "learning_rate": 1.5429738250670137e-05, + "loss": 1.5359, + "step": 56181 + }, + { + "epoch": 0.68, + "grad_norm": 15.245773005538862, + "learning_rate": 1.542924767811965e-05, + "loss": 1.3164, + "step": 56184 + }, + { + "epoch": 0.68, + "grad_norm": 9.059149715198068, + "learning_rate": 1.5428757087041085e-05, + "loss": 1.5954, + "step": 56187 + }, + { + "epoch": 0.68, + "grad_norm": 7.202830377035756, + "learning_rate": 1.5428266477436134e-05, + "loss": 1.3933, + "step": 56190 + }, + { + "epoch": 0.68, + "grad_norm": 37.33022813168873, + "learning_rate": 1.5427775849306465e-05, + "loss": 1.3059, + "step": 56193 + }, + { + "epoch": 0.68, + "grad_norm": 4.308670807909118, + "learning_rate": 1.542728520265375e-05, + "loss": 1.1653, + "step": 56196 + }, + { + "epoch": 0.68, + "grad_norm": 9.876710486561494, + "learning_rate": 1.5426794537479667e-05, + "loss": 1.7032, + "step": 56199 + }, + { + "epoch": 0.68, + "grad_norm": 5.072294948905628, + "learning_rate": 1.5426303853785887e-05, + "loss": 1.3937, + "step": 56202 + }, + { + "epoch": 0.68, + "grad_norm": 12.300387909084312, + "learning_rate": 1.542581315157409e-05, + "loss": 1.5219, + "step": 56205 + }, + { + "epoch": 0.68, + "grad_norm": 27.12963194675818, + "learning_rate": 1.5425322430845944e-05, + "loss": 1.6027, + "step": 56208 + }, + { + "epoch": 0.68, + "grad_norm": 32.604808038852134, + "learning_rate": 1.542483169160313e-05, + "loss": 1.5069, + "step": 56211 + }, + { + "epoch": 0.68, + "grad_norm": 8.110873052325125, + "learning_rate": 1.5424340933847312e-05, + "loss": 1.2214, + "step": 56214 + }, + { + "epoch": 0.68, + "grad_norm": 8.51536727421967, + "learning_rate": 1.542385015758018e-05, + "loss": 1.0665, + "step": 56217 + }, + { + "epoch": 0.68, + "grad_norm": 15.159718190120763, + "learning_rate": 1.5423359362803395e-05, + "loss": 1.6685, + "step": 56220 + }, + { + "epoch": 0.68, + "grad_norm": 8.758133624440141, + "learning_rate": 1.5422868549518642e-05, + "loss": 1.3515, + "step": 56223 + }, + { + "epoch": 0.68, + "grad_norm": 12.909341041877402, + "learning_rate": 1.542237771772759e-05, + "loss": 1.5741, + "step": 56226 + }, + { + "epoch": 0.68, + "grad_norm": 9.222208262887177, + "learning_rate": 1.542188686743192e-05, + "loss": 1.359, + "step": 56229 + }, + { + "epoch": 0.68, + "grad_norm": 19.49575690841117, + "learning_rate": 1.54213959986333e-05, + "loss": 1.4269, + "step": 56232 + }, + { + "epoch": 0.68, + "grad_norm": 7.9257604119700025, + "learning_rate": 1.5420905111333408e-05, + "loss": 1.2201, + "step": 56235 + }, + { + "epoch": 0.68, + "grad_norm": 24.266124927545455, + "learning_rate": 1.542041420553392e-05, + "loss": 1.0398, + "step": 56238 + }, + { + "epoch": 0.68, + "grad_norm": 7.966950854915801, + "learning_rate": 1.541992328123651e-05, + "loss": 1.4152, + "step": 56241 + }, + { + "epoch": 0.68, + "grad_norm": 11.467829370504493, + "learning_rate": 1.5419432338442854e-05, + "loss": 1.4915, + "step": 56244 + }, + { + "epoch": 0.68, + "grad_norm": 6.661609810769627, + "learning_rate": 1.541894137715463e-05, + "loss": 1.1992, + "step": 56247 + }, + { + "epoch": 0.68, + "grad_norm": 30.735552896663407, + "learning_rate": 1.5418450397373508e-05, + "loss": 1.3188, + "step": 56250 + }, + { + "epoch": 0.68, + "grad_norm": 18.885397900998807, + "learning_rate": 1.541795939910117e-05, + "loss": 1.1557, + "step": 56253 + }, + { + "epoch": 0.68, + "grad_norm": 27.220468116339575, + "learning_rate": 1.5417468382339283e-05, + "loss": 1.1896, + "step": 56256 + }, + { + "epoch": 0.68, + "grad_norm": 13.824485839609366, + "learning_rate": 1.5416977347089533e-05, + "loss": 1.3035, + "step": 56259 + }, + { + "epoch": 0.68, + "grad_norm": 7.571707322554452, + "learning_rate": 1.5416486293353587e-05, + "loss": 1.4578, + "step": 56262 + }, + { + "epoch": 0.68, + "grad_norm": 4.2407119696419295, + "learning_rate": 1.541599522113313e-05, + "loss": 1.1613, + "step": 56265 + }, + { + "epoch": 0.68, + "grad_norm": 17.994738246867186, + "learning_rate": 1.5415504130429822e-05, + "loss": 1.2485, + "step": 56268 + }, + { + "epoch": 0.68, + "grad_norm": 19.12724522223132, + "learning_rate": 1.5415013021245354e-05, + "loss": 1.2981, + "step": 56271 + }, + { + "epoch": 0.68, + "grad_norm": 10.83863420028437, + "learning_rate": 1.54145218935814e-05, + "loss": 1.2319, + "step": 56274 + }, + { + "epoch": 0.68, + "grad_norm": 6.4941303259462, + "learning_rate": 1.541403074743963e-05, + "loss": 1.2286, + "step": 56277 + }, + { + "epoch": 0.68, + "grad_norm": 9.293046367500937, + "learning_rate": 1.5413539582821725e-05, + "loss": 1.3064, + "step": 56280 + }, + { + "epoch": 0.68, + "grad_norm": 12.635754922252787, + "learning_rate": 1.5413048399729357e-05, + "loss": 1.3445, + "step": 56283 + }, + { + "epoch": 0.68, + "grad_norm": 9.776453315469125, + "learning_rate": 1.541255719816421e-05, + "loss": 1.2995, + "step": 56286 + }, + { + "epoch": 0.68, + "grad_norm": 31.414209472490214, + "learning_rate": 1.5412065978127947e-05, + "loss": 1.1391, + "step": 56289 + }, + { + "epoch": 0.68, + "grad_norm": 6.0185074832821055, + "learning_rate": 1.5411574739622257e-05, + "loss": 1.1941, + "step": 56292 + }, + { + "epoch": 0.68, + "grad_norm": 7.584664651617903, + "learning_rate": 1.541108348264881e-05, + "loss": 1.2242, + "step": 56295 + }, + { + "epoch": 0.68, + "grad_norm": 18.88404464149162, + "learning_rate": 1.5410592207209286e-05, + "loss": 1.2481, + "step": 56298 + }, + { + "epoch": 0.68, + "grad_norm": 13.774980846707612, + "learning_rate": 1.5410100913305357e-05, + "loss": 1.564, + "step": 56301 + }, + { + "epoch": 0.68, + "grad_norm": 3.8469405631011195, + "learning_rate": 1.5409609600938706e-05, + "loss": 1.4054, + "step": 56304 + }, + { + "epoch": 0.68, + "grad_norm": 17.335592774675106, + "learning_rate": 1.5409118270111004e-05, + "loss": 1.2007, + "step": 56307 + }, + { + "epoch": 0.68, + "grad_norm": 6.932508638070711, + "learning_rate": 1.5408626920823925e-05, + "loss": 1.7526, + "step": 56310 + }, + { + "epoch": 0.68, + "grad_norm": 11.215623212855528, + "learning_rate": 1.5408135553079157e-05, + "loss": 1.5209, + "step": 56313 + }, + { + "epoch": 0.68, + "grad_norm": 16.649327818257404, + "learning_rate": 1.5407644166878367e-05, + "loss": 1.3034, + "step": 56316 + }, + { + "epoch": 0.68, + "grad_norm": 16.715186368625993, + "learning_rate": 1.5407152762223237e-05, + "loss": 1.5944, + "step": 56319 + }, + { + "epoch": 0.68, + "grad_norm": 72.17168455638927, + "learning_rate": 1.5406661339115444e-05, + "loss": 1.4599, + "step": 56322 + }, + { + "epoch": 0.68, + "grad_norm": 4.434081905225561, + "learning_rate": 1.540616989755666e-05, + "loss": 1.2444, + "step": 56325 + }, + { + "epoch": 0.68, + "grad_norm": 8.659227106440204, + "learning_rate": 1.5405678437548564e-05, + "loss": 1.583, + "step": 56328 + }, + { + "epoch": 0.68, + "grad_norm": 21.61277741795885, + "learning_rate": 1.540518695909284e-05, + "loss": 1.5017, + "step": 56331 + }, + { + "epoch": 0.68, + "grad_norm": 9.07546141071407, + "learning_rate": 1.5404695462191156e-05, + "loss": 1.5006, + "step": 56334 + }, + { + "epoch": 0.68, + "grad_norm": 5.405151224457169, + "learning_rate": 1.5404203946845194e-05, + "loss": 1.2902, + "step": 56337 + }, + { + "epoch": 0.68, + "grad_norm": 11.916281619569009, + "learning_rate": 1.5403712413056632e-05, + "loss": 1.7642, + "step": 56340 + }, + { + "epoch": 0.68, + "grad_norm": 3.660589748649695, + "learning_rate": 1.5403220860827146e-05, + "loss": 1.1629, + "step": 56343 + }, + { + "epoch": 0.68, + "grad_norm": 7.056687051789327, + "learning_rate": 1.540272929015841e-05, + "loss": 1.6163, + "step": 56346 + }, + { + "epoch": 0.68, + "grad_norm": 32.71263029130927, + "learning_rate": 1.540223770105211e-05, + "loss": 1.6753, + "step": 56349 + }, + { + "epoch": 0.68, + "grad_norm": 16.148061353487016, + "learning_rate": 1.5401746093509914e-05, + "loss": 1.1921, + "step": 56352 + }, + { + "epoch": 0.68, + "grad_norm": 35.43841546578501, + "learning_rate": 1.5401254467533505e-05, + "loss": 1.4704, + "step": 56355 + }, + { + "epoch": 0.68, + "grad_norm": 15.735776108633559, + "learning_rate": 1.540076282312456e-05, + "loss": 1.2815, + "step": 56358 + }, + { + "epoch": 0.68, + "grad_norm": 3.2685596675579753, + "learning_rate": 1.5400271160284762e-05, + "loss": 0.9741, + "step": 56361 + }, + { + "epoch": 0.68, + "grad_norm": 17.10157782713814, + "learning_rate": 1.5399779479015777e-05, + "loss": 1.3869, + "step": 56364 + }, + { + "epoch": 0.68, + "grad_norm": 6.592470510146726, + "learning_rate": 1.539928777931929e-05, + "loss": 1.4553, + "step": 56367 + }, + { + "epoch": 0.68, + "grad_norm": 19.118148730876715, + "learning_rate": 1.539879606119698e-05, + "loss": 1.2627, + "step": 56370 + }, + { + "epoch": 0.68, + "grad_norm": 2.8675672096561784, + "learning_rate": 1.539830432465053e-05, + "loss": 1.4977, + "step": 56373 + }, + { + "epoch": 0.68, + "grad_norm": 3.535649856898228, + "learning_rate": 1.5397812569681604e-05, + "loss": 1.3697, + "step": 56376 + }, + { + "epoch": 0.68, + "grad_norm": 5.491643006821333, + "learning_rate": 1.539732079629189e-05, + "loss": 1.3267, + "step": 56379 + }, + { + "epoch": 0.68, + "grad_norm": 17.939150488519463, + "learning_rate": 1.539682900448306e-05, + "loss": 1.5537, + "step": 56382 + }, + { + "epoch": 0.68, + "grad_norm": 50.06792803299579, + "learning_rate": 1.5396337194256803e-05, + "loss": 1.475, + "step": 56385 + }, + { + "epoch": 0.68, + "grad_norm": 11.481010398709776, + "learning_rate": 1.539584536561479e-05, + "loss": 1.3872, + "step": 56388 + }, + { + "epoch": 0.68, + "grad_norm": 21.165761340902694, + "learning_rate": 1.53953535185587e-05, + "loss": 1.5143, + "step": 56391 + }, + { + "epoch": 0.68, + "grad_norm": 4.136531196985769, + "learning_rate": 1.5394861653090213e-05, + "loss": 1.2042, + "step": 56394 + }, + { + "epoch": 0.68, + "grad_norm": 6.631894226463712, + "learning_rate": 1.5394369769211e-05, + "loss": 1.3729, + "step": 56397 + }, + { + "epoch": 0.68, + "grad_norm": 5.732300461585715, + "learning_rate": 1.5393877866922754e-05, + "loss": 1.5695, + "step": 56400 + }, + { + "epoch": 0.68, + "grad_norm": 10.56550797080488, + "learning_rate": 1.5393385946227142e-05, + "loss": 1.2351, + "step": 56403 + }, + { + "epoch": 0.68, + "grad_norm": 29.91920386547383, + "learning_rate": 1.5392894007125844e-05, + "loss": 1.4022, + "step": 56406 + }, + { + "epoch": 0.68, + "grad_norm": 3.691694667166033, + "learning_rate": 1.5392402049620546e-05, + "loss": 1.3794, + "step": 56409 + }, + { + "epoch": 0.68, + "grad_norm": 23.680276709429943, + "learning_rate": 1.539191007371292e-05, + "loss": 1.5233, + "step": 56412 + }, + { + "epoch": 0.68, + "grad_norm": 10.360393421615653, + "learning_rate": 1.539141807940465e-05, + "loss": 1.4957, + "step": 56415 + }, + { + "epoch": 0.68, + "grad_norm": 15.617616651566706, + "learning_rate": 1.5390926066697404e-05, + "loss": 1.2461, + "step": 56418 + }, + { + "epoch": 0.68, + "grad_norm": 6.196549562036247, + "learning_rate": 1.5390434035592877e-05, + "loss": 1.0567, + "step": 56421 + }, + { + "epoch": 0.68, + "grad_norm": 22.80717545210163, + "learning_rate": 1.5389941986092737e-05, + "loss": 1.0951, + "step": 56424 + }, + { + "epoch": 0.68, + "grad_norm": 6.267562175540231, + "learning_rate": 1.5389449918198666e-05, + "loss": 1.3737, + "step": 56427 + }, + { + "epoch": 0.68, + "grad_norm": 8.818544117420334, + "learning_rate": 1.5388957831912346e-05, + "loss": 1.6044, + "step": 56430 + }, + { + "epoch": 0.68, + "grad_norm": 9.553014642570862, + "learning_rate": 1.5388465727235452e-05, + "loss": 1.3784, + "step": 56433 + }, + { + "epoch": 0.68, + "grad_norm": 9.346202138077105, + "learning_rate": 1.5387973604169667e-05, + "loss": 1.4543, + "step": 56436 + }, + { + "epoch": 0.68, + "grad_norm": 9.607379315185415, + "learning_rate": 1.5387481462716666e-05, + "loss": 1.5859, + "step": 56439 + }, + { + "epoch": 0.68, + "grad_norm": 25.53619543617948, + "learning_rate": 1.5386989302878135e-05, + "loss": 1.1457, + "step": 56442 + }, + { + "epoch": 0.68, + "grad_norm": 14.44781301785904, + "learning_rate": 1.538649712465575e-05, + "loss": 1.3102, + "step": 56445 + }, + { + "epoch": 0.68, + "grad_norm": 6.23196823896158, + "learning_rate": 1.5386004928051187e-05, + "loss": 1.2354, + "step": 56448 + }, + { + "epoch": 0.68, + "grad_norm": 48.173872317300244, + "learning_rate": 1.5385512713066128e-05, + "loss": 1.5045, + "step": 56451 + }, + { + "epoch": 0.68, + "grad_norm": 5.713894685875843, + "learning_rate": 1.538502047970226e-05, + "loss": 1.3823, + "step": 56454 + }, + { + "epoch": 0.68, + "grad_norm": 41.92411931278387, + "learning_rate": 1.5384528227961253e-05, + "loss": 1.4448, + "step": 56457 + }, + { + "epoch": 0.68, + "grad_norm": 9.278440219009891, + "learning_rate": 1.538403595784479e-05, + "loss": 1.6595, + "step": 56460 + }, + { + "epoch": 0.68, + "grad_norm": 61.159708824954976, + "learning_rate": 1.5383543669354556e-05, + "loss": 1.2502, + "step": 56463 + }, + { + "epoch": 0.68, + "grad_norm": 14.914498480161516, + "learning_rate": 1.538305136249222e-05, + "loss": 1.4551, + "step": 56466 + }, + { + "epoch": 0.68, + "grad_norm": 30.87628139432673, + "learning_rate": 1.5382559037259474e-05, + "loss": 1.143, + "step": 56469 + }, + { + "epoch": 0.68, + "grad_norm": 2.755186019096293, + "learning_rate": 1.538206669365799e-05, + "loss": 1.3418, + "step": 56472 + }, + { + "epoch": 0.68, + "grad_norm": 27.43978607953015, + "learning_rate": 1.5381574331689456e-05, + "loss": 1.383, + "step": 56475 + }, + { + "epoch": 0.68, + "grad_norm": 10.11560853881043, + "learning_rate": 1.538108195135554e-05, + "loss": 1.5235, + "step": 56478 + }, + { + "epoch": 0.68, + "grad_norm": 16.90751489426326, + "learning_rate": 1.5380589552657935e-05, + "loss": 1.4896, + "step": 56481 + }, + { + "epoch": 0.68, + "grad_norm": 4.604294945907081, + "learning_rate": 1.538009713559831e-05, + "loss": 1.2434, + "step": 56484 + }, + { + "epoch": 0.68, + "grad_norm": 27.25092141193828, + "learning_rate": 1.5379604700178358e-05, + "loss": 1.2028, + "step": 56487 + }, + { + "epoch": 0.68, + "grad_norm": 6.474365880310642, + "learning_rate": 1.537911224639975e-05, + "loss": 1.066, + "step": 56490 + }, + { + "epoch": 0.68, + "grad_norm": 13.153834693116428, + "learning_rate": 1.5378619774264168e-05, + "loss": 1.5242, + "step": 56493 + }, + { + "epoch": 0.68, + "grad_norm": 34.384553169855096, + "learning_rate": 1.5378127283773298e-05, + "loss": 1.2052, + "step": 56496 + }, + { + "epoch": 0.68, + "grad_norm": 15.61857746183514, + "learning_rate": 1.5377634774928813e-05, + "loss": 1.4985, + "step": 56499 + }, + { + "epoch": 0.68, + "grad_norm": 15.040900609896337, + "learning_rate": 1.53771422477324e-05, + "loss": 1.3214, + "step": 56502 + }, + { + "epoch": 0.68, + "grad_norm": 22.759878634925965, + "learning_rate": 1.5376649702185734e-05, + "loss": 1.1088, + "step": 56505 + }, + { + "epoch": 0.68, + "grad_norm": 31.022569193565975, + "learning_rate": 1.5376157138290505e-05, + "loss": 1.1096, + "step": 56508 + }, + { + "epoch": 0.68, + "grad_norm": 20.40148812472767, + "learning_rate": 1.5375664556048382e-05, + "loss": 2.0342, + "step": 56511 + }, + { + "epoch": 0.68, + "grad_norm": 9.203311023777978, + "learning_rate": 1.537517195546105e-05, + "loss": 1.2998, + "step": 56514 + }, + { + "epoch": 0.68, + "grad_norm": 2.7930096299597627, + "learning_rate": 1.53746793365302e-05, + "loss": 1.4997, + "step": 56517 + }, + { + "epoch": 0.68, + "grad_norm": 5.156210229978001, + "learning_rate": 1.5374186699257503e-05, + "loss": 1.4685, + "step": 56520 + }, + { + "epoch": 0.68, + "grad_norm": 23.07692247491305, + "learning_rate": 1.537369404364464e-05, + "loss": 1.5297, + "step": 56523 + }, + { + "epoch": 0.68, + "grad_norm": 15.880572410545932, + "learning_rate": 1.53732013696933e-05, + "loss": 1.3805, + "step": 56526 + }, + { + "epoch": 0.68, + "grad_norm": 18.13774902870975, + "learning_rate": 1.5372708677405155e-05, + "loss": 1.3075, + "step": 56529 + }, + { + "epoch": 0.68, + "grad_norm": 5.920825867182417, + "learning_rate": 1.537221596678189e-05, + "loss": 1.4751, + "step": 56532 + }, + { + "epoch": 0.68, + "grad_norm": 33.32339666759178, + "learning_rate": 1.537172323782519e-05, + "loss": 1.4084, + "step": 56535 + }, + { + "epoch": 0.68, + "grad_norm": 17.838194132848727, + "learning_rate": 1.5371230490536734e-05, + "loss": 1.4476, + "step": 56538 + }, + { + "epoch": 0.68, + "grad_norm": 14.274640607384669, + "learning_rate": 1.53707377249182e-05, + "loss": 1.2574, + "step": 56541 + }, + { + "epoch": 0.68, + "grad_norm": 6.768776966967655, + "learning_rate": 1.537024494097127e-05, + "loss": 1.2409, + "step": 56544 + }, + { + "epoch": 0.68, + "grad_norm": 4.179124266051397, + "learning_rate": 1.5369752138697632e-05, + "loss": 1.4561, + "step": 56547 + }, + { + "epoch": 0.68, + "grad_norm": 36.13067464909331, + "learning_rate": 1.5369259318098964e-05, + "loss": 1.1198, + "step": 56550 + }, + { + "epoch": 0.68, + "grad_norm": 6.641461284652114, + "learning_rate": 1.536876647917695e-05, + "loss": 1.5141, + "step": 56553 + }, + { + "epoch": 0.68, + "grad_norm": 6.324687981985766, + "learning_rate": 1.5368273621933267e-05, + "loss": 1.0716, + "step": 56556 + }, + { + "epoch": 0.68, + "grad_norm": 6.485993187904548, + "learning_rate": 1.53677807463696e-05, + "loss": 1.3328, + "step": 56559 + }, + { + "epoch": 0.68, + "grad_norm": 9.90856215472629, + "learning_rate": 1.5367287852487635e-05, + "loss": 1.3282, + "step": 56562 + }, + { + "epoch": 0.68, + "grad_norm": 19.143399059753992, + "learning_rate": 1.5366794940289046e-05, + "loss": 1.5386, + "step": 56565 + }, + { + "epoch": 0.68, + "grad_norm": 20.240339147923162, + "learning_rate": 1.5366302009775517e-05, + "loss": 1.133, + "step": 56568 + }, + { + "epoch": 0.68, + "grad_norm": 27.04924431891723, + "learning_rate": 1.5365809060948736e-05, + "loss": 1.4149, + "step": 56571 + }, + { + "epoch": 0.68, + "grad_norm": 6.193004868328271, + "learning_rate": 1.536531609381038e-05, + "loss": 1.2006, + "step": 56574 + }, + { + "epoch": 0.68, + "grad_norm": 4.094808005530165, + "learning_rate": 1.5364823108362133e-05, + "loss": 1.614, + "step": 56577 + }, + { + "epoch": 0.68, + "grad_norm": 10.368549644788155, + "learning_rate": 1.5364330104605672e-05, + "loss": 1.5479, + "step": 56580 + }, + { + "epoch": 0.68, + "grad_norm": 3.407210249375163, + "learning_rate": 1.5363837082542695e-05, + "loss": 1.5888, + "step": 56583 + }, + { + "epoch": 0.68, + "grad_norm": 5.590642620253402, + "learning_rate": 1.5363344042174864e-05, + "loss": 1.2722, + "step": 56586 + }, + { + "epoch": 0.68, + "grad_norm": 3.08504155546401, + "learning_rate": 1.536285098350388e-05, + "loss": 1.6888, + "step": 56589 + }, + { + "epoch": 0.68, + "grad_norm": 6.624074240270434, + "learning_rate": 1.5362357906531414e-05, + "loss": 1.4159, + "step": 56592 + }, + { + "epoch": 0.68, + "grad_norm": 5.65520412871798, + "learning_rate": 1.5361864811259145e-05, + "loss": 1.2925, + "step": 56595 + }, + { + "epoch": 0.68, + "grad_norm": 14.345828644807233, + "learning_rate": 1.5361371697688773e-05, + "loss": 1.4881, + "step": 56598 + }, + { + "epoch": 0.68, + "grad_norm": 10.677274636301226, + "learning_rate": 1.5360878565821963e-05, + "loss": 1.4533, + "step": 56601 + }, + { + "epoch": 0.68, + "grad_norm": 26.162890797472713, + "learning_rate": 1.5360385415660414e-05, + "loss": 1.5315, + "step": 56604 + }, + { + "epoch": 0.68, + "grad_norm": 22.22095446433664, + "learning_rate": 1.5359892247205793e-05, + "loss": 1.3612, + "step": 56607 + }, + { + "epoch": 0.68, + "grad_norm": 11.717692620085824, + "learning_rate": 1.5359399060459793e-05, + "loss": 1.2252, + "step": 56610 + }, + { + "epoch": 0.68, + "grad_norm": 51.61189837369192, + "learning_rate": 1.5358905855424096e-05, + "loss": 1.1145, + "step": 56613 + }, + { + "epoch": 0.68, + "grad_norm": 6.515028726986491, + "learning_rate": 1.535841263210038e-05, + "loss": 1.1576, + "step": 56616 + }, + { + "epoch": 0.68, + "grad_norm": 5.672513490650633, + "learning_rate": 1.5357919390490333e-05, + "loss": 1.446, + "step": 56619 + }, + { + "epoch": 0.68, + "grad_norm": 7.256580268321894, + "learning_rate": 1.5357426130595636e-05, + "loss": 1.3833, + "step": 56622 + }, + { + "epoch": 0.68, + "grad_norm": 4.550614331424477, + "learning_rate": 1.535693285241798e-05, + "loss": 1.4598, + "step": 56625 + }, + { + "epoch": 0.68, + "grad_norm": 6.476704691850849, + "learning_rate": 1.5356439555959034e-05, + "loss": 1.7105, + "step": 56628 + }, + { + "epoch": 0.68, + "grad_norm": 2.8871399048159714, + "learning_rate": 1.535594624122049e-05, + "loss": 1.1252, + "step": 56631 + }, + { + "epoch": 0.68, + "grad_norm": 11.303745588816343, + "learning_rate": 1.5355452908204035e-05, + "loss": 1.1053, + "step": 56634 + }, + { + "epoch": 0.68, + "grad_norm": 20.04853295047298, + "learning_rate": 1.5354959556911348e-05, + "loss": 1.2761, + "step": 56637 + }, + { + "epoch": 0.68, + "grad_norm": 17.313128825771784, + "learning_rate": 1.535446618734411e-05, + "loss": 1.2892, + "step": 56640 + }, + { + "epoch": 0.68, + "grad_norm": 3.1871162279513494, + "learning_rate": 1.5353972799504003e-05, + "loss": 1.2454, + "step": 56643 + }, + { + "epoch": 0.68, + "grad_norm": 22.86355644742125, + "learning_rate": 1.5353479393392727e-05, + "loss": 1.4791, + "step": 56646 + }, + { + "epoch": 0.68, + "grad_norm": 13.664026533032942, + "learning_rate": 1.5352985969011945e-05, + "loss": 1.3132, + "step": 56649 + }, + { + "epoch": 0.68, + "grad_norm": 9.935635434667027, + "learning_rate": 1.535249252636335e-05, + "loss": 1.5698, + "step": 56652 + }, + { + "epoch": 0.68, + "grad_norm": 14.075727209749518, + "learning_rate": 1.5351999065448632e-05, + "loss": 1.2778, + "step": 56655 + }, + { + "epoch": 0.68, + "grad_norm": 12.534557263267596, + "learning_rate": 1.5351505586269464e-05, + "loss": 1.6227, + "step": 56658 + }, + { + "epoch": 0.68, + "grad_norm": 17.95864179645749, + "learning_rate": 1.5351012088827538e-05, + "loss": 1.4904, + "step": 56661 + }, + { + "epoch": 0.68, + "grad_norm": 3.41251306067041, + "learning_rate": 1.5350518573124533e-05, + "loss": 1.3181, + "step": 56664 + }, + { + "epoch": 0.68, + "grad_norm": 16.468560020444876, + "learning_rate": 1.5350025039162137e-05, + "loss": 1.6779, + "step": 56667 + }, + { + "epoch": 0.68, + "grad_norm": 9.337029406085609, + "learning_rate": 1.534953148694203e-05, + "loss": 1.3525, + "step": 56670 + }, + { + "epoch": 0.68, + "grad_norm": 17.461275163035737, + "learning_rate": 1.53490379164659e-05, + "loss": 1.1727, + "step": 56673 + }, + { + "epoch": 0.68, + "grad_norm": 65.15393774321707, + "learning_rate": 1.5348544327735432e-05, + "loss": 1.5068, + "step": 56676 + }, + { + "epoch": 0.68, + "grad_norm": 7.520236299881596, + "learning_rate": 1.534805072075231e-05, + "loss": 1.2726, + "step": 56679 + }, + { + "epoch": 0.68, + "grad_norm": 8.981422646313195, + "learning_rate": 1.5347557095518212e-05, + "loss": 1.2751, + "step": 56682 + }, + { + "epoch": 0.68, + "grad_norm": 24.13500383847543, + "learning_rate": 1.5347063452034833e-05, + "loss": 1.3788, + "step": 56685 + }, + { + "epoch": 0.68, + "grad_norm": 7.288143023496267, + "learning_rate": 1.534656979030385e-05, + "loss": 1.672, + "step": 56688 + }, + { + "epoch": 0.68, + "grad_norm": 7.107809881210041, + "learning_rate": 1.534607611032695e-05, + "loss": 1.2841, + "step": 56691 + }, + { + "epoch": 0.68, + "grad_norm": 11.767793530869895, + "learning_rate": 1.534558241210582e-05, + "loss": 0.9052, + "step": 56694 + }, + { + "epoch": 0.68, + "grad_norm": 18.521843000389985, + "learning_rate": 1.534508869564214e-05, + "loss": 1.7098, + "step": 56697 + }, + { + "epoch": 0.68, + "grad_norm": 13.044806841552376, + "learning_rate": 1.53445949609376e-05, + "loss": 1.0664, + "step": 56700 + }, + { + "epoch": 0.68, + "grad_norm": 14.163620214123899, + "learning_rate": 1.5344101207993877e-05, + "loss": 1.5069, + "step": 56703 + }, + { + "epoch": 0.68, + "grad_norm": 37.75771502676271, + "learning_rate": 1.5343607436812667e-05, + "loss": 1.3222, + "step": 56706 + }, + { + "epoch": 0.68, + "grad_norm": 16.75707626995438, + "learning_rate": 1.534311364739565e-05, + "loss": 1.4451, + "step": 56709 + }, + { + "epoch": 0.68, + "grad_norm": 14.068736701292362, + "learning_rate": 1.534261983974451e-05, + "loss": 1.4347, + "step": 56712 + }, + { + "epoch": 0.68, + "grad_norm": 7.323632517443698, + "learning_rate": 1.534212601386093e-05, + "loss": 1.2229, + "step": 56715 + }, + { + "epoch": 0.68, + "grad_norm": 8.514536721161665, + "learning_rate": 1.5341632169746602e-05, + "loss": 1.3599, + "step": 56718 + }, + { + "epoch": 0.68, + "grad_norm": 14.484480585943995, + "learning_rate": 1.5341138307403203e-05, + "loss": 1.578, + "step": 56721 + }, + { + "epoch": 0.68, + "grad_norm": 6.117646731787612, + "learning_rate": 1.534064442683243e-05, + "loss": 1.2359, + "step": 56724 + }, + { + "epoch": 0.68, + "grad_norm": 10.1012624939886, + "learning_rate": 1.534015052803595e-05, + "loss": 1.735, + "step": 56727 + }, + { + "epoch": 0.68, + "grad_norm": 15.352691946437268, + "learning_rate": 1.5339656611015468e-05, + "loss": 1.7423, + "step": 56730 + }, + { + "epoch": 0.68, + "grad_norm": 5.222935948469925, + "learning_rate": 1.533916267577266e-05, + "loss": 1.3684, + "step": 56733 + }, + { + "epoch": 0.68, + "grad_norm": 10.488488704124066, + "learning_rate": 1.533866872230921e-05, + "loss": 1.2963, + "step": 56736 + }, + { + "epoch": 0.68, + "grad_norm": 10.575370485728248, + "learning_rate": 1.5338174750626812e-05, + "loss": 1.4204, + "step": 56739 + }, + { + "epoch": 0.68, + "grad_norm": 9.997020797662508, + "learning_rate": 1.5337680760727143e-05, + "loss": 1.3793, + "step": 56742 + }, + { + "epoch": 0.68, + "grad_norm": 61.17480248592195, + "learning_rate": 1.5337186752611895e-05, + "loss": 1.666, + "step": 56745 + }, + { + "epoch": 0.68, + "grad_norm": 8.093698297543542, + "learning_rate": 1.5336692726282746e-05, + "loss": 1.5742, + "step": 56748 + }, + { + "epoch": 0.68, + "grad_norm": 18.753350222310335, + "learning_rate": 1.533619868174139e-05, + "loss": 1.7369, + "step": 56751 + }, + { + "epoch": 0.68, + "grad_norm": 7.587940157231468, + "learning_rate": 1.533570461898951e-05, + "loss": 1.3653, + "step": 56754 + }, + { + "epoch": 0.68, + "grad_norm": 2.8403094783272196, + "learning_rate": 1.533521053802879e-05, + "loss": 1.6614, + "step": 56757 + }, + { + "epoch": 0.68, + "grad_norm": 13.4630586872322, + "learning_rate": 1.533471643886092e-05, + "loss": 1.1682, + "step": 56760 + }, + { + "epoch": 0.68, + "grad_norm": 31.0419366072421, + "learning_rate": 1.5334222321487587e-05, + "loss": 1.216, + "step": 56763 + }, + { + "epoch": 0.68, + "grad_norm": 4.512520131337294, + "learning_rate": 1.5333728185910473e-05, + "loss": 1.0378, + "step": 56766 + }, + { + "epoch": 0.68, + "grad_norm": 28.261988781945195, + "learning_rate": 1.5333234032131264e-05, + "loss": 1.3735, + "step": 56769 + }, + { + "epoch": 0.68, + "grad_norm": 24.929513912032007, + "learning_rate": 1.5332739860151648e-05, + "loss": 1.1832, + "step": 56772 + }, + { + "epoch": 0.68, + "grad_norm": 28.98078159805855, + "learning_rate": 1.5332245669973312e-05, + "loss": 1.2431, + "step": 56775 + }, + { + "epoch": 0.68, + "grad_norm": 8.485770923332181, + "learning_rate": 1.5331751461597947e-05, + "loss": 1.3488, + "step": 56778 + }, + { + "epoch": 0.68, + "grad_norm": 3.0951884408649706, + "learning_rate": 1.5331257235027227e-05, + "loss": 1.5672, + "step": 56781 + }, + { + "epoch": 0.68, + "grad_norm": 100.21713070971177, + "learning_rate": 1.533076299026285e-05, + "loss": 1.4382, + "step": 56784 + }, + { + "epoch": 0.68, + "grad_norm": 1.8373182464721107, + "learning_rate": 1.53302687273065e-05, + "loss": 1.6157, + "step": 56787 + }, + { + "epoch": 0.68, + "grad_norm": 9.570731701092454, + "learning_rate": 1.5329774446159862e-05, + "loss": 1.3743, + "step": 56790 + }, + { + "epoch": 0.68, + "grad_norm": 29.62942005364364, + "learning_rate": 1.5329280146824624e-05, + "loss": 1.3991, + "step": 56793 + }, + { + "epoch": 0.68, + "grad_norm": 9.394344480789409, + "learning_rate": 1.532878582930247e-05, + "loss": 1.3047, + "step": 56796 + }, + { + "epoch": 0.68, + "grad_norm": 3.97306637843372, + "learning_rate": 1.5328291493595093e-05, + "loss": 1.4078, + "step": 56799 + }, + { + "epoch": 0.68, + "grad_norm": 5.351621039275842, + "learning_rate": 1.5327797139704174e-05, + "loss": 0.9976, + "step": 56802 + }, + { + "epoch": 0.68, + "grad_norm": 18.927451165735352, + "learning_rate": 1.5327302767631405e-05, + "loss": 1.2881, + "step": 56805 + }, + { + "epoch": 0.68, + "grad_norm": 19.25698510509829, + "learning_rate": 1.532680837737847e-05, + "loss": 1.5819, + "step": 56808 + }, + { + "epoch": 0.68, + "grad_norm": 15.32892592328498, + "learning_rate": 1.532631396894705e-05, + "loss": 1.5778, + "step": 56811 + }, + { + "epoch": 0.68, + "grad_norm": 3.4664835177812745, + "learning_rate": 1.532581954233885e-05, + "loss": 1.5957, + "step": 56814 + }, + { + "epoch": 0.68, + "grad_norm": 4.3709030592851565, + "learning_rate": 1.5325325097555538e-05, + "loss": 1.3267, + "step": 56817 + }, + { + "epoch": 0.68, + "grad_norm": 9.085739045630543, + "learning_rate": 1.5324830634598813e-05, + "loss": 1.2501, + "step": 56820 + }, + { + "epoch": 0.68, + "grad_norm": 17.469581888796032, + "learning_rate": 1.532433615347036e-05, + "loss": 1.4378, + "step": 56823 + }, + { + "epoch": 0.68, + "grad_norm": 8.159776130641012, + "learning_rate": 1.5323841654171862e-05, + "loss": 1.2019, + "step": 56826 + }, + { + "epoch": 0.68, + "grad_norm": 57.35537237334978, + "learning_rate": 1.532334713670501e-05, + "loss": 1.3783, + "step": 56829 + }, + { + "epoch": 0.68, + "grad_norm": 6.298122074765311, + "learning_rate": 1.5322852601071497e-05, + "loss": 1.2894, + "step": 56832 + }, + { + "epoch": 0.68, + "grad_norm": 15.487313039369837, + "learning_rate": 1.5322358047273e-05, + "loss": 1.0481, + "step": 56835 + }, + { + "epoch": 0.68, + "grad_norm": 53.48531815059984, + "learning_rate": 1.5321863475311216e-05, + "loss": 1.4008, + "step": 56838 + }, + { + "epoch": 0.68, + "grad_norm": 4.8513382120165005, + "learning_rate": 1.5321368885187825e-05, + "loss": 1.4829, + "step": 56841 + }, + { + "epoch": 0.68, + "grad_norm": 25.201880360938702, + "learning_rate": 1.532087427690452e-05, + "loss": 1.3513, + "step": 56844 + }, + { + "epoch": 0.68, + "grad_norm": 25.710769181476934, + "learning_rate": 1.532037965046299e-05, + "loss": 1.2081, + "step": 56847 + }, + { + "epoch": 0.68, + "grad_norm": 20.540649190514323, + "learning_rate": 1.5319885005864918e-05, + "loss": 0.9795, + "step": 56850 + }, + { + "epoch": 0.68, + "grad_norm": 17.731767240283123, + "learning_rate": 1.5319390343111997e-05, + "loss": 1.4115, + "step": 56853 + }, + { + "epoch": 0.68, + "grad_norm": 15.602345171927666, + "learning_rate": 1.531889566220591e-05, + "loss": 1.5216, + "step": 56856 + }, + { + "epoch": 0.68, + "grad_norm": 8.333955289354966, + "learning_rate": 1.531840096314835e-05, + "loss": 1.3802, + "step": 56859 + }, + { + "epoch": 0.68, + "grad_norm": 13.216012861523168, + "learning_rate": 1.5317906245941e-05, + "loss": 1.2076, + "step": 56862 + }, + { + "epoch": 0.68, + "grad_norm": 10.95637750595003, + "learning_rate": 1.5317411510585555e-05, + "loss": 1.4657, + "step": 56865 + }, + { + "epoch": 0.68, + "grad_norm": 6.28389344053827, + "learning_rate": 1.53169167570837e-05, + "loss": 1.3066, + "step": 56868 + }, + { + "epoch": 0.68, + "grad_norm": 12.708472852679044, + "learning_rate": 1.5316421985437123e-05, + "loss": 1.0683, + "step": 56871 + }, + { + "epoch": 0.68, + "grad_norm": 22.423552641227584, + "learning_rate": 1.531592719564751e-05, + "loss": 1.6596, + "step": 56874 + }, + { + "epoch": 0.68, + "grad_norm": 7.965168802801913, + "learning_rate": 1.5315432387716555e-05, + "loss": 1.3862, + "step": 56877 + }, + { + "epoch": 0.68, + "grad_norm": 5.078359136506143, + "learning_rate": 1.5314937561645944e-05, + "loss": 1.2269, + "step": 56880 + }, + { + "epoch": 0.68, + "grad_norm": 28.129279439761316, + "learning_rate": 1.5314442717437366e-05, + "loss": 1.3884, + "step": 56883 + }, + { + "epoch": 0.68, + "grad_norm": 69.69986445685826, + "learning_rate": 1.5313947855092508e-05, + "loss": 1.4485, + "step": 56886 + }, + { + "epoch": 0.68, + "grad_norm": 8.7443307711188, + "learning_rate": 1.5313452974613057e-05, + "loss": 1.2687, + "step": 56889 + }, + { + "epoch": 0.68, + "grad_norm": 8.026895518204608, + "learning_rate": 1.531295807600071e-05, + "loss": 1.6355, + "step": 56892 + }, + { + "epoch": 0.68, + "grad_norm": 6.8647046903349365, + "learning_rate": 1.5312463159257148e-05, + "loss": 1.2774, + "step": 56895 + }, + { + "epoch": 0.68, + "grad_norm": 28.181242144533165, + "learning_rate": 1.5311968224384063e-05, + "loss": 1.4297, + "step": 56898 + }, + { + "epoch": 0.68, + "grad_norm": 13.504375548018622, + "learning_rate": 1.5311473271383143e-05, + "loss": 1.3745, + "step": 56901 + }, + { + "epoch": 0.68, + "grad_norm": 12.556630405134053, + "learning_rate": 1.531097830025608e-05, + "loss": 1.3941, + "step": 56904 + }, + { + "epoch": 0.68, + "grad_norm": 10.205622991954176, + "learning_rate": 1.531048331100456e-05, + "loss": 1.4199, + "step": 56907 + }, + { + "epoch": 0.68, + "grad_norm": 6.78678097420947, + "learning_rate": 1.530998830363027e-05, + "loss": 1.4186, + "step": 56910 + }, + { + "epoch": 0.68, + "grad_norm": 14.30027865236499, + "learning_rate": 1.5309493278134914e-05, + "loss": 1.5174, + "step": 56913 + }, + { + "epoch": 0.68, + "grad_norm": 13.148516226154713, + "learning_rate": 1.5308998234520157e-05, + "loss": 1.5176, + "step": 56916 + }, + { + "epoch": 0.68, + "grad_norm": 3.249147115683162, + "learning_rate": 1.5308503172787707e-05, + "loss": 1.4673, + "step": 56919 + }, + { + "epoch": 0.68, + "grad_norm": 10.49888367436354, + "learning_rate": 1.5308008092939245e-05, + "loss": 1.2281, + "step": 56922 + }, + { + "epoch": 0.68, + "grad_norm": 7.008790438632953, + "learning_rate": 1.530751299497647e-05, + "loss": 1.1903, + "step": 56925 + }, + { + "epoch": 0.68, + "grad_norm": 11.11261843441964, + "learning_rate": 1.530701787890106e-05, + "loss": 1.7882, + "step": 56928 + }, + { + "epoch": 0.68, + "grad_norm": 35.27481304374306, + "learning_rate": 1.5306522744714705e-05, + "loss": 1.44, + "step": 56931 + }, + { + "epoch": 0.68, + "grad_norm": 15.302739579361551, + "learning_rate": 1.530602759241911e-05, + "loss": 1.232, + "step": 56934 + }, + { + "epoch": 0.68, + "grad_norm": 15.322796236912914, + "learning_rate": 1.5305532422015943e-05, + "loss": 1.1944, + "step": 56937 + }, + { + "epoch": 0.68, + "grad_norm": 22.829853252211663, + "learning_rate": 1.5305037233506913e-05, + "loss": 1.6972, + "step": 56940 + }, + { + "epoch": 0.68, + "grad_norm": 19.908057163170316, + "learning_rate": 1.5304542026893696e-05, + "loss": 1.4482, + "step": 56943 + }, + { + "epoch": 0.68, + "grad_norm": 10.470276754978656, + "learning_rate": 1.530404680217799e-05, + "loss": 1.1997, + "step": 56946 + }, + { + "epoch": 0.68, + "grad_norm": 6.704061364194885, + "learning_rate": 1.5303551559361486e-05, + "loss": 1.0856, + "step": 56949 + }, + { + "epoch": 0.68, + "grad_norm": 19.48478018246726, + "learning_rate": 1.5303056298445868e-05, + "loss": 1.5666, + "step": 56952 + }, + { + "epoch": 0.68, + "grad_norm": 42.708155285450985, + "learning_rate": 1.5302561019432828e-05, + "loss": 1.3992, + "step": 56955 + }, + { + "epoch": 0.68, + "grad_norm": 4.415802671981838, + "learning_rate": 1.5302065722324057e-05, + "loss": 1.6432, + "step": 56958 + }, + { + "epoch": 0.68, + "grad_norm": 9.604024950053478, + "learning_rate": 1.530157040712125e-05, + "loss": 1.1096, + "step": 56961 + }, + { + "epoch": 0.68, + "grad_norm": 11.748712059749058, + "learning_rate": 1.5301075073826087e-05, + "loss": 1.1619, + "step": 56964 + }, + { + "epoch": 0.69, + "grad_norm": 71.55707418534327, + "learning_rate": 1.530057972244027e-05, + "loss": 1.7859, + "step": 56967 + }, + { + "epoch": 0.69, + "grad_norm": 21.06365825124803, + "learning_rate": 1.530008435296548e-05, + "loss": 1.4014, + "step": 56970 + }, + { + "epoch": 0.69, + "grad_norm": 4.950664836748517, + "learning_rate": 1.529958896540341e-05, + "loss": 1.5499, + "step": 56973 + }, + { + "epoch": 0.69, + "grad_norm": 8.062088644099331, + "learning_rate": 1.5299093559755755e-05, + "loss": 1.2082, + "step": 56976 + }, + { + "epoch": 0.69, + "grad_norm": 4.627905101981439, + "learning_rate": 1.52985981360242e-05, + "loss": 1.1791, + "step": 56979 + }, + { + "epoch": 0.69, + "grad_norm": 5.045764334362696, + "learning_rate": 1.529810269421044e-05, + "loss": 1.2556, + "step": 56982 + }, + { + "epoch": 0.69, + "grad_norm": 7.962555893125287, + "learning_rate": 1.529760723431616e-05, + "loss": 1.2491, + "step": 56985 + }, + { + "epoch": 0.69, + "grad_norm": 20.155654777730323, + "learning_rate": 1.529711175634306e-05, + "loss": 1.317, + "step": 56988 + }, + { + "epoch": 0.69, + "grad_norm": 16.30185360609623, + "learning_rate": 1.5296616260292824e-05, + "loss": 1.1155, + "step": 56991 + }, + { + "epoch": 0.69, + "grad_norm": 19.796732626101747, + "learning_rate": 1.529612074616714e-05, + "loss": 1.5578, + "step": 56994 + }, + { + "epoch": 0.69, + "grad_norm": 30.467314918166355, + "learning_rate": 1.529562521396771e-05, + "loss": 1.761, + "step": 56997 + }, + { + "epoch": 0.69, + "grad_norm": 20.069965137545132, + "learning_rate": 1.5295129663696215e-05, + "loss": 1.4189, + "step": 57000 + }, + { + "epoch": 0.69, + "grad_norm": 26.560535748375887, + "learning_rate": 1.5294634095354352e-05, + "loss": 1.4069, + "step": 57003 + }, + { + "epoch": 0.69, + "grad_norm": 5.993788949430158, + "learning_rate": 1.529413850894381e-05, + "loss": 1.3165, + "step": 57006 + }, + { + "epoch": 0.69, + "grad_norm": 12.1825713725951, + "learning_rate": 1.529364290446628e-05, + "loss": 1.7543, + "step": 57009 + }, + { + "epoch": 0.69, + "grad_norm": 14.364682615381383, + "learning_rate": 1.5293147281923447e-05, + "loss": 1.4576, + "step": 57012 + }, + { + "epoch": 0.69, + "grad_norm": 5.198136953514311, + "learning_rate": 1.529265164131702e-05, + "loss": 1.3874, + "step": 57015 + }, + { + "epoch": 0.69, + "grad_norm": 8.509027595031881, + "learning_rate": 1.5292155982648668e-05, + "loss": 1.3372, + "step": 57018 + }, + { + "epoch": 0.69, + "grad_norm": 6.31493992410359, + "learning_rate": 1.5291660305920103e-05, + "loss": 1.3577, + "step": 57021 + }, + { + "epoch": 0.69, + "grad_norm": 5.280630260388325, + "learning_rate": 1.5291164611133003e-05, + "loss": 1.8485, + "step": 57024 + }, + { + "epoch": 0.69, + "grad_norm": 19.817388920783085, + "learning_rate": 1.5290668898289064e-05, + "loss": 1.344, + "step": 57027 + }, + { + "epoch": 0.69, + "grad_norm": 4.00620279756684, + "learning_rate": 1.5290173167389978e-05, + "loss": 1.0268, + "step": 57030 + }, + { + "epoch": 0.69, + "grad_norm": 14.163253232802482, + "learning_rate": 1.5289677418437438e-05, + "loss": 1.4474, + "step": 57033 + }, + { + "epoch": 0.69, + "grad_norm": 30.201740304365327, + "learning_rate": 1.5289181651433135e-05, + "loss": 1.4532, + "step": 57036 + }, + { + "epoch": 0.69, + "grad_norm": 10.519378716468976, + "learning_rate": 1.5288685866378757e-05, + "loss": 1.5282, + "step": 57039 + }, + { + "epoch": 0.69, + "grad_norm": 30.960402806076953, + "learning_rate": 1.5288190063276004e-05, + "loss": 1.5942, + "step": 57042 + }, + { + "epoch": 0.69, + "grad_norm": 28.57946895206769, + "learning_rate": 1.5287694242126556e-05, + "loss": 1.1982, + "step": 57045 + }, + { + "epoch": 0.69, + "grad_norm": 123.97297847818561, + "learning_rate": 1.528719840293212e-05, + "loss": 1.4788, + "step": 57048 + }, + { + "epoch": 0.69, + "grad_norm": 27.982779711399576, + "learning_rate": 1.5286702545694376e-05, + "loss": 1.0789, + "step": 57051 + }, + { + "epoch": 0.69, + "grad_norm": 8.38686046796652, + "learning_rate": 1.5286206670415017e-05, + "loss": 1.7177, + "step": 57054 + }, + { + "epoch": 0.69, + "grad_norm": 11.067273244344547, + "learning_rate": 1.5285710777095743e-05, + "loss": 1.2998, + "step": 57057 + }, + { + "epoch": 0.69, + "grad_norm": 3.973667572843392, + "learning_rate": 1.528521486573824e-05, + "loss": 1.4085, + "step": 57060 + }, + { + "epoch": 0.69, + "grad_norm": 20.99622123809182, + "learning_rate": 1.528471893634421e-05, + "loss": 1.3647, + "step": 57063 + }, + { + "epoch": 0.69, + "grad_norm": 42.50351274871466, + "learning_rate": 1.528422298891533e-05, + "loss": 1.7063, + "step": 57066 + }, + { + "epoch": 0.69, + "grad_norm": 8.413161095006231, + "learning_rate": 1.52837270234533e-05, + "loss": 1.2782, + "step": 57069 + }, + { + "epoch": 0.69, + "grad_norm": 11.608581791770298, + "learning_rate": 1.5283231039959817e-05, + "loss": 1.1868, + "step": 57072 + }, + { + "epoch": 0.69, + "grad_norm": 59.80363333991232, + "learning_rate": 1.5282735038436567e-05, + "loss": 1.2625, + "step": 57075 + }, + { + "epoch": 0.69, + "grad_norm": 24.099364992847434, + "learning_rate": 1.5282239018885244e-05, + "loss": 1.2975, + "step": 57078 + }, + { + "epoch": 0.69, + "grad_norm": 5.753228432866332, + "learning_rate": 1.5281742981307542e-05, + "loss": 1.3485, + "step": 57081 + }, + { + "epoch": 0.69, + "grad_norm": 14.00865933902333, + "learning_rate": 1.528124692570516e-05, + "loss": 1.4787, + "step": 57084 + }, + { + "epoch": 0.69, + "grad_norm": 32.97398059130231, + "learning_rate": 1.5280750852079774e-05, + "loss": 1.1886, + "step": 57087 + }, + { + "epoch": 0.69, + "grad_norm": 11.947882874731027, + "learning_rate": 1.5280254760433096e-05, + "loss": 1.439, + "step": 57090 + }, + { + "epoch": 0.69, + "grad_norm": 17.95183651990741, + "learning_rate": 1.5279758650766808e-05, + "loss": 1.6245, + "step": 57093 + }, + { + "epoch": 0.69, + "grad_norm": 7.159173382159318, + "learning_rate": 1.5279262523082605e-05, + "loss": 1.2423, + "step": 57096 + }, + { + "epoch": 0.69, + "grad_norm": 8.247722214802502, + "learning_rate": 1.527876637738218e-05, + "loss": 1.4221, + "step": 57099 + }, + { + "epoch": 0.69, + "grad_norm": 19.241798379748964, + "learning_rate": 1.5278270213667226e-05, + "loss": 1.9001, + "step": 57102 + }, + { + "epoch": 0.69, + "grad_norm": 9.760806142146038, + "learning_rate": 1.527777403193944e-05, + "loss": 1.239, + "step": 57105 + }, + { + "epoch": 0.69, + "grad_norm": 14.26171436818081, + "learning_rate": 1.5277277832200507e-05, + "loss": 1.4068, + "step": 57108 + }, + { + "epoch": 0.69, + "grad_norm": 5.627138965895164, + "learning_rate": 1.527678161445213e-05, + "loss": 1.4526, + "step": 57111 + }, + { + "epoch": 0.69, + "grad_norm": 23.728912826559686, + "learning_rate": 1.5276285378696e-05, + "loss": 1.3085, + "step": 57114 + }, + { + "epoch": 0.69, + "grad_norm": 41.54170788890316, + "learning_rate": 1.5275789124933806e-05, + "loss": 1.3667, + "step": 57117 + }, + { + "epoch": 0.69, + "grad_norm": 11.834246129592989, + "learning_rate": 1.5275292853167242e-05, + "loss": 1.6913, + "step": 57120 + }, + { + "epoch": 0.69, + "grad_norm": 11.092378522767515, + "learning_rate": 1.5274796563398006e-05, + "loss": 1.5075, + "step": 57123 + }, + { + "epoch": 0.69, + "grad_norm": 6.050206759686615, + "learning_rate": 1.527430025562779e-05, + "loss": 1.1784, + "step": 57126 + }, + { + "epoch": 0.69, + "grad_norm": 12.346653502157231, + "learning_rate": 1.5273803929858286e-05, + "loss": 1.1658, + "step": 57129 + }, + { + "epoch": 0.69, + "grad_norm": 4.102679835515469, + "learning_rate": 1.5273307586091187e-05, + "loss": 1.2614, + "step": 57132 + }, + { + "epoch": 0.69, + "grad_norm": 6.997538647442625, + "learning_rate": 1.527281122432819e-05, + "loss": 1.7402, + "step": 57135 + }, + { + "epoch": 0.69, + "grad_norm": 20.53777293706829, + "learning_rate": 1.527231484457099e-05, + "loss": 1.674, + "step": 57138 + }, + { + "epoch": 0.69, + "grad_norm": 12.500111245868688, + "learning_rate": 1.5271818446821275e-05, + "loss": 1.352, + "step": 57141 + }, + { + "epoch": 0.69, + "grad_norm": 17.104305520227534, + "learning_rate": 1.5271322031080743e-05, + "loss": 1.2789, + "step": 57144 + }, + { + "epoch": 0.69, + "grad_norm": 15.485770645703653, + "learning_rate": 1.5270825597351092e-05, + "loss": 1.7112, + "step": 57147 + }, + { + "epoch": 0.69, + "grad_norm": 6.7552662675717885, + "learning_rate": 1.5270329145634008e-05, + "loss": 1.497, + "step": 57150 + }, + { + "epoch": 0.69, + "grad_norm": 11.89321646000113, + "learning_rate": 1.5269832675931186e-05, + "loss": 1.5662, + "step": 57153 + }, + { + "epoch": 0.69, + "grad_norm": 24.778036658317376, + "learning_rate": 1.526933618824433e-05, + "loss": 1.3272, + "step": 57156 + }, + { + "epoch": 0.69, + "grad_norm": 19.493997346705996, + "learning_rate": 1.5268839682575123e-05, + "loss": 0.7998, + "step": 57159 + }, + { + "epoch": 0.69, + "grad_norm": 38.881687647237186, + "learning_rate": 1.5268343158925263e-05, + "loss": 1.3759, + "step": 57162 + }, + { + "epoch": 0.69, + "grad_norm": 27.926043655661296, + "learning_rate": 1.5267846617296445e-05, + "loss": 1.3719, + "step": 57165 + }, + { + "epoch": 0.69, + "grad_norm": 4.309176726462793, + "learning_rate": 1.526735005769037e-05, + "loss": 1.3323, + "step": 57168 + }, + { + "epoch": 0.69, + "grad_norm": 45.61303738959956, + "learning_rate": 1.5266853480108723e-05, + "loss": 1.3385, + "step": 57171 + }, + { + "epoch": 0.69, + "grad_norm": 6.90592896350325, + "learning_rate": 1.5266356884553203e-05, + "loss": 1.4425, + "step": 57174 + }, + { + "epoch": 0.69, + "grad_norm": 7.522189192216296, + "learning_rate": 1.52658602710255e-05, + "loss": 1.2231, + "step": 57177 + }, + { + "epoch": 0.69, + "grad_norm": 7.904791241261385, + "learning_rate": 1.5265363639527317e-05, + "loss": 1.5038, + "step": 57180 + }, + { + "epoch": 0.69, + "grad_norm": 7.875378235923366, + "learning_rate": 1.5264866990060346e-05, + "loss": 1.2342, + "step": 57183 + }, + { + "epoch": 0.69, + "grad_norm": 32.62648957405423, + "learning_rate": 1.5264370322626275e-05, + "loss": 1.4411, + "step": 57186 + }, + { + "epoch": 0.69, + "grad_norm": 11.009454568200645, + "learning_rate": 1.5263873637226807e-05, + "loss": 1.2148, + "step": 57189 + }, + { + "epoch": 0.69, + "grad_norm": 10.957361967362772, + "learning_rate": 1.5263376933863638e-05, + "loss": 1.6202, + "step": 57192 + }, + { + "epoch": 0.69, + "grad_norm": 7.405580345833349, + "learning_rate": 1.526288021253845e-05, + "loss": 1.4044, + "step": 57195 + }, + { + "epoch": 0.69, + "grad_norm": 17.441100617863228, + "learning_rate": 1.5262383473252956e-05, + "loss": 1.4954, + "step": 57198 + }, + { + "epoch": 0.69, + "grad_norm": 11.634137075111543, + "learning_rate": 1.526188671600884e-05, + "loss": 1.3734, + "step": 57201 + }, + { + "epoch": 0.69, + "grad_norm": 30.83983087722384, + "learning_rate": 1.52613899408078e-05, + "loss": 1.1434, + "step": 57204 + }, + { + "epoch": 0.69, + "grad_norm": 16.772318850868626, + "learning_rate": 1.526089314765153e-05, + "loss": 1.4187, + "step": 57207 + }, + { + "epoch": 0.69, + "grad_norm": 8.283648821363911, + "learning_rate": 1.5260396336541727e-05, + "loss": 1.4107, + "step": 57210 + }, + { + "epoch": 0.69, + "grad_norm": 14.581052234366808, + "learning_rate": 1.525989950748009e-05, + "loss": 1.5388, + "step": 57213 + }, + { + "epoch": 0.69, + "grad_norm": 13.71158214617894, + "learning_rate": 1.5259402660468303e-05, + "loss": 1.0665, + "step": 57216 + }, + { + "epoch": 0.69, + "grad_norm": 15.944286922390456, + "learning_rate": 1.5258905795508073e-05, + "loss": 1.1213, + "step": 57219 + }, + { + "epoch": 0.69, + "grad_norm": 11.973730214263728, + "learning_rate": 1.5258408912601095e-05, + "loss": 1.6012, + "step": 57222 + }, + { + "epoch": 0.69, + "grad_norm": 35.66923263805218, + "learning_rate": 1.5257912011749061e-05, + "loss": 1.8331, + "step": 57225 + }, + { + "epoch": 0.69, + "grad_norm": 12.165881094597582, + "learning_rate": 1.5257415092953662e-05, + "loss": 1.3432, + "step": 57228 + }, + { + "epoch": 0.69, + "grad_norm": 7.917871262084829, + "learning_rate": 1.5256918156216603e-05, + "loss": 1.2765, + "step": 57231 + }, + { + "epoch": 0.69, + "grad_norm": 9.00390927523299, + "learning_rate": 1.5256421201539575e-05, + "loss": 1.4968, + "step": 57234 + }, + { + "epoch": 0.69, + "grad_norm": 7.0129832962689465, + "learning_rate": 1.5255924228924274e-05, + "loss": 1.3116, + "step": 57237 + }, + { + "epoch": 0.69, + "grad_norm": 10.287373758211485, + "learning_rate": 1.5255427238372398e-05, + "loss": 1.2188, + "step": 57240 + }, + { + "epoch": 0.69, + "grad_norm": 5.617595200173827, + "learning_rate": 1.525493022988564e-05, + "loss": 1.4907, + "step": 57243 + }, + { + "epoch": 0.69, + "grad_norm": 5.76680902444332, + "learning_rate": 1.5254433203465702e-05, + "loss": 1.1745, + "step": 57246 + }, + { + "epoch": 0.69, + "grad_norm": 99.44925100786637, + "learning_rate": 1.5253936159114272e-05, + "loss": 1.368, + "step": 57249 + }, + { + "epoch": 0.69, + "grad_norm": 3.5197755207249544, + "learning_rate": 1.5253439096833051e-05, + "loss": 1.0653, + "step": 57252 + }, + { + "epoch": 0.69, + "grad_norm": 28.889021520876806, + "learning_rate": 1.5252942016623737e-05, + "loss": 1.3562, + "step": 57255 + }, + { + "epoch": 0.69, + "grad_norm": 17.417483039854545, + "learning_rate": 1.525244491848802e-05, + "loss": 1.101, + "step": 57258 + }, + { + "epoch": 0.69, + "grad_norm": 14.300172328514972, + "learning_rate": 1.5251947802427602e-05, + "loss": 1.8337, + "step": 57261 + }, + { + "epoch": 0.69, + "grad_norm": 11.318591959134181, + "learning_rate": 1.5251450668444181e-05, + "loss": 1.1124, + "step": 57264 + }, + { + "epoch": 0.69, + "grad_norm": 17.448593935454205, + "learning_rate": 1.5250953516539448e-05, + "loss": 1.3593, + "step": 57267 + }, + { + "epoch": 0.69, + "grad_norm": 6.635377917575491, + "learning_rate": 1.52504563467151e-05, + "loss": 1.4836, + "step": 57270 + }, + { + "epoch": 0.69, + "grad_norm": 9.434597037365085, + "learning_rate": 1.524995915897284e-05, + "loss": 1.1188, + "step": 57273 + }, + { + "epoch": 0.69, + "grad_norm": 39.86850050850855, + "learning_rate": 1.5249461953314357e-05, + "loss": 1.6019, + "step": 57276 + }, + { + "epoch": 0.69, + "grad_norm": 9.895865510044139, + "learning_rate": 1.5248964729741353e-05, + "loss": 1.4546, + "step": 57279 + }, + { + "epoch": 0.69, + "grad_norm": 25.9770339900041, + "learning_rate": 1.524846748825552e-05, + "loss": 1.3783, + "step": 57282 + }, + { + "epoch": 0.69, + "grad_norm": 21.69773932444256, + "learning_rate": 1.5247970228858565e-05, + "loss": 1.5314, + "step": 57285 + }, + { + "epoch": 0.69, + "grad_norm": 26.62089083403036, + "learning_rate": 1.524747295155217e-05, + "loss": 1.3432, + "step": 57288 + }, + { + "epoch": 0.69, + "grad_norm": 12.666391440074733, + "learning_rate": 1.5246975656338046e-05, + "loss": 1.7729, + "step": 57291 + }, + { + "epoch": 0.69, + "grad_norm": 12.502740907219163, + "learning_rate": 1.524647834321788e-05, + "loss": 1.3089, + "step": 57294 + }, + { + "epoch": 0.69, + "grad_norm": 10.107789576091442, + "learning_rate": 1.5245981012193374e-05, + "loss": 1.2866, + "step": 57297 + }, + { + "epoch": 0.69, + "grad_norm": 11.069251140447594, + "learning_rate": 1.5245483663266226e-05, + "loss": 1.356, + "step": 57300 + }, + { + "epoch": 0.69, + "grad_norm": 15.479885038513972, + "learning_rate": 1.5244986296438129e-05, + "loss": 1.396, + "step": 57303 + }, + { + "epoch": 0.69, + "grad_norm": 18.582963131868702, + "learning_rate": 1.5244488911710784e-05, + "loss": 1.7471, + "step": 57306 + }, + { + "epoch": 0.69, + "grad_norm": 14.151090185756061, + "learning_rate": 1.5243991509085889e-05, + "loss": 1.4018, + "step": 57309 + }, + { + "epoch": 0.69, + "grad_norm": 6.757315590140247, + "learning_rate": 1.524349408856514e-05, + "loss": 1.0076, + "step": 57312 + }, + { + "epoch": 0.69, + "grad_norm": 23.027595942325824, + "learning_rate": 1.5242996650150232e-05, + "loss": 1.2668, + "step": 57315 + }, + { + "epoch": 0.69, + "grad_norm": 10.771109642270778, + "learning_rate": 1.5242499193842866e-05, + "loss": 1.3407, + "step": 57318 + }, + { + "epoch": 0.69, + "grad_norm": 9.8827460302438, + "learning_rate": 1.5242001719644739e-05, + "loss": 1.18, + "step": 57321 + }, + { + "epoch": 0.69, + "grad_norm": 4.811596352451683, + "learning_rate": 1.5241504227557547e-05, + "loss": 1.298, + "step": 57324 + }, + { + "epoch": 0.69, + "grad_norm": 13.674860991852404, + "learning_rate": 1.524100671758299e-05, + "loss": 1.1063, + "step": 57327 + }, + { + "epoch": 0.69, + "grad_norm": 27.33280571244453, + "learning_rate": 1.5240509189722765e-05, + "loss": 1.3149, + "step": 57330 + }, + { + "epoch": 0.69, + "grad_norm": 76.80893985018503, + "learning_rate": 1.5240011643978572e-05, + "loss": 1.2226, + "step": 57333 + }, + { + "epoch": 0.69, + "grad_norm": 4.048997146374792, + "learning_rate": 1.5239514080352102e-05, + "loss": 1.4065, + "step": 57336 + }, + { + "epoch": 0.69, + "grad_norm": 14.702525104737596, + "learning_rate": 1.523901649884506e-05, + "loss": 1.1052, + "step": 57339 + }, + { + "epoch": 0.69, + "grad_norm": 6.632228550406404, + "learning_rate": 1.5238518899459144e-05, + "loss": 1.3757, + "step": 57342 + }, + { + "epoch": 0.69, + "grad_norm": 18.438208462485107, + "learning_rate": 1.5238021282196048e-05, + "loss": 1.2594, + "step": 57345 + }, + { + "epoch": 0.69, + "grad_norm": 8.142332319222197, + "learning_rate": 1.5237523647057471e-05, + "loss": 1.6295, + "step": 57348 + }, + { + "epoch": 0.69, + "grad_norm": 12.071637298722539, + "learning_rate": 1.5237025994045112e-05, + "loss": 1.2295, + "step": 57351 + }, + { + "epoch": 0.69, + "grad_norm": 14.806838767788426, + "learning_rate": 1.5236528323160671e-05, + "loss": 1.4512, + "step": 57354 + }, + { + "epoch": 0.69, + "grad_norm": 23.380839313522003, + "learning_rate": 1.5236030634405844e-05, + "loss": 1.2116, + "step": 57357 + }, + { + "epoch": 0.69, + "grad_norm": 3.665291727839446, + "learning_rate": 1.5235532927782332e-05, + "loss": 1.5566, + "step": 57360 + }, + { + "epoch": 0.69, + "grad_norm": 6.831028962820828, + "learning_rate": 1.5235035203291832e-05, + "loss": 1.1155, + "step": 57363 + }, + { + "epoch": 0.69, + "grad_norm": 35.71006136333036, + "learning_rate": 1.5234537460936044e-05, + "loss": 1.2072, + "step": 57366 + }, + { + "epoch": 0.69, + "grad_norm": 36.99317061525177, + "learning_rate": 1.523403970071666e-05, + "loss": 1.3658, + "step": 57369 + }, + { + "epoch": 0.69, + "grad_norm": 6.25386841115963, + "learning_rate": 1.523354192263539e-05, + "loss": 1.4663, + "step": 57372 + }, + { + "epoch": 0.69, + "grad_norm": 5.432142586846842, + "learning_rate": 1.5233044126693921e-05, + "loss": 1.3327, + "step": 57375 + }, + { + "epoch": 0.69, + "grad_norm": 7.798496554717982, + "learning_rate": 1.5232546312893958e-05, + "loss": 1.6936, + "step": 57378 + }, + { + "epoch": 0.69, + "grad_norm": 5.718259782891614, + "learning_rate": 1.52320484812372e-05, + "loss": 1.7026, + "step": 57381 + }, + { + "epoch": 0.69, + "grad_norm": 7.748200857830616, + "learning_rate": 1.523155063172535e-05, + "loss": 1.4146, + "step": 57384 + }, + { + "epoch": 0.69, + "grad_norm": 10.050618541904694, + "learning_rate": 1.5231052764360096e-05, + "loss": 1.2933, + "step": 57387 + }, + { + "epoch": 0.69, + "grad_norm": 7.658372769333747, + "learning_rate": 1.5230554879143145e-05, + "loss": 1.0985, + "step": 57390 + }, + { + "epoch": 0.69, + "grad_norm": 8.695813445998086, + "learning_rate": 1.5230056976076192e-05, + "loss": 1.5966, + "step": 57393 + }, + { + "epoch": 0.69, + "grad_norm": 5.177850289327569, + "learning_rate": 1.5229559055160942e-05, + "loss": 1.297, + "step": 57396 + }, + { + "epoch": 0.69, + "grad_norm": 7.42243093473555, + "learning_rate": 1.5229061116399093e-05, + "loss": 1.4653, + "step": 57399 + }, + { + "epoch": 0.69, + "grad_norm": 12.85406319450407, + "learning_rate": 1.5228563159792336e-05, + "loss": 1.0543, + "step": 57402 + }, + { + "epoch": 0.69, + "grad_norm": 44.24120277340364, + "learning_rate": 1.522806518534238e-05, + "loss": 1.368, + "step": 57405 + }, + { + "epoch": 0.69, + "grad_norm": 16.813483030333344, + "learning_rate": 1.5227567193050919e-05, + "loss": 1.2265, + "step": 57408 + }, + { + "epoch": 0.69, + "grad_norm": 19.94191080555479, + "learning_rate": 1.5227069182919652e-05, + "loss": 1.334, + "step": 57411 + }, + { + "epoch": 0.69, + "grad_norm": 37.64437448050721, + "learning_rate": 1.5226571154950285e-05, + "loss": 1.3622, + "step": 57414 + }, + { + "epoch": 0.69, + "grad_norm": 20.657738719017424, + "learning_rate": 1.5226073109144511e-05, + "loss": 1.3604, + "step": 57417 + }, + { + "epoch": 0.69, + "grad_norm": 7.915736936154903, + "learning_rate": 1.5225575045504033e-05, + "loss": 1.065, + "step": 57420 + }, + { + "epoch": 0.69, + "grad_norm": 7.685488676535087, + "learning_rate": 1.5225076964030546e-05, + "loss": 1.2545, + "step": 57423 + }, + { + "epoch": 0.69, + "grad_norm": 21.345157801288433, + "learning_rate": 1.522457886472576e-05, + "loss": 1.1104, + "step": 57426 + }, + { + "epoch": 0.69, + "grad_norm": 8.902362122372262, + "learning_rate": 1.5224080747591363e-05, + "loss": 1.18, + "step": 57429 + }, + { + "epoch": 0.69, + "grad_norm": 16.59555712797056, + "learning_rate": 1.5223582612629058e-05, + "loss": 1.6655, + "step": 57432 + }, + { + "epoch": 0.69, + "grad_norm": 12.607797453698492, + "learning_rate": 1.5223084459840553e-05, + "loss": 1.255, + "step": 57435 + }, + { + "epoch": 0.69, + "grad_norm": 6.123424228909477, + "learning_rate": 1.5222586289227539e-05, + "loss": 1.4201, + "step": 57438 + }, + { + "epoch": 0.69, + "grad_norm": 4.933616376256182, + "learning_rate": 1.5222088100791718e-05, + "loss": 1.5111, + "step": 57441 + }, + { + "epoch": 0.69, + "grad_norm": 88.59072244490058, + "learning_rate": 1.5221589894534794e-05, + "loss": 1.4816, + "step": 57444 + }, + { + "epoch": 0.69, + "grad_norm": 4.724639133939518, + "learning_rate": 1.5221091670458466e-05, + "loss": 1.555, + "step": 57447 + }, + { + "epoch": 0.69, + "grad_norm": 25.82418743260766, + "learning_rate": 1.522059342856443e-05, + "loss": 1.0957, + "step": 57450 + }, + { + "epoch": 0.69, + "grad_norm": 22.528243217758032, + "learning_rate": 1.5220095168854388e-05, + "loss": 1.3871, + "step": 57453 + }, + { + "epoch": 0.69, + "grad_norm": 16.94144681836006, + "learning_rate": 1.5219596891330044e-05, + "loss": 1.3948, + "step": 57456 + }, + { + "epoch": 0.69, + "grad_norm": 5.459445058676915, + "learning_rate": 1.5219098595993094e-05, + "loss": 1.3589, + "step": 57459 + }, + { + "epoch": 0.69, + "grad_norm": 6.478099664343308, + "learning_rate": 1.5218600282845242e-05, + "loss": 1.7679, + "step": 57462 + }, + { + "epoch": 0.69, + "grad_norm": 7.471394809383852, + "learning_rate": 1.5218101951888184e-05, + "loss": 1.7581, + "step": 57465 + }, + { + "epoch": 0.69, + "grad_norm": 8.112073943248607, + "learning_rate": 1.5217603603123628e-05, + "loss": 1.3074, + "step": 57468 + }, + { + "epoch": 0.69, + "grad_norm": 73.33739040316952, + "learning_rate": 1.5217105236553267e-05, + "loss": 1.2402, + "step": 57471 + }, + { + "epoch": 0.69, + "grad_norm": 40.28128002139121, + "learning_rate": 1.5216606852178805e-05, + "loss": 1.4387, + "step": 57474 + }, + { + "epoch": 0.69, + "grad_norm": 18.00174980490018, + "learning_rate": 1.5216108450001942e-05, + "loss": 1.3093, + "step": 57477 + }, + { + "epoch": 0.69, + "grad_norm": 22.098912650433697, + "learning_rate": 1.5215610030024385e-05, + "loss": 1.082, + "step": 57480 + }, + { + "epoch": 0.69, + "grad_norm": 21.287254448885047, + "learning_rate": 1.5215111592247827e-05, + "loss": 1.3171, + "step": 57483 + }, + { + "epoch": 0.69, + "grad_norm": 21.139039447266892, + "learning_rate": 1.5214613136673967e-05, + "loss": 1.3168, + "step": 57486 + }, + { + "epoch": 0.69, + "grad_norm": 8.339190264951602, + "learning_rate": 1.5214114663304519e-05, + "loss": 1.3839, + "step": 57489 + }, + { + "epoch": 0.69, + "grad_norm": 2.55111465566527, + "learning_rate": 1.5213616172141171e-05, + "loss": 1.5796, + "step": 57492 + }, + { + "epoch": 0.69, + "grad_norm": 23.176941124128618, + "learning_rate": 1.5213117663185628e-05, + "loss": 1.5541, + "step": 57495 + }, + { + "epoch": 0.69, + "grad_norm": 3.8299278488654336, + "learning_rate": 1.5212619136439592e-05, + "loss": 1.2254, + "step": 57498 + }, + { + "epoch": 0.69, + "grad_norm": 17.366585193220992, + "learning_rate": 1.521212059190477e-05, + "loss": 1.4155, + "step": 57501 + }, + { + "epoch": 0.69, + "grad_norm": 35.657982069860225, + "learning_rate": 1.5211622029582848e-05, + "loss": 1.2734, + "step": 57504 + }, + { + "epoch": 0.69, + "grad_norm": 26.994009326898304, + "learning_rate": 1.5211123449475545e-05, + "loss": 1.3014, + "step": 57507 + }, + { + "epoch": 0.69, + "grad_norm": 19.905947492071476, + "learning_rate": 1.5210624851584554e-05, + "loss": 1.7264, + "step": 57510 + }, + { + "epoch": 0.69, + "grad_norm": 17.0720992093384, + "learning_rate": 1.5210126235911572e-05, + "loss": 1.3832, + "step": 57513 + }, + { + "epoch": 0.69, + "grad_norm": 6.587552409383891, + "learning_rate": 1.5209627602458313e-05, + "loss": 1.4776, + "step": 57516 + }, + { + "epoch": 0.69, + "grad_norm": 10.271112398415365, + "learning_rate": 1.5209128951226465e-05, + "loss": 0.9716, + "step": 57519 + }, + { + "epoch": 0.69, + "grad_norm": 10.863369432689574, + "learning_rate": 1.5208630282217742e-05, + "loss": 1.3975, + "step": 57522 + }, + { + "epoch": 0.69, + "grad_norm": 20.914527478693586, + "learning_rate": 1.5208131595433836e-05, + "loss": 1.4788, + "step": 57525 + }, + { + "epoch": 0.69, + "grad_norm": 17.12692741909859, + "learning_rate": 1.5207632890876456e-05, + "loss": 1.3974, + "step": 57528 + }, + { + "epoch": 0.69, + "grad_norm": 27.36686735734064, + "learning_rate": 1.5207134168547297e-05, + "loss": 1.2831, + "step": 57531 + }, + { + "epoch": 0.69, + "grad_norm": 6.403574832690422, + "learning_rate": 1.5206635428448069e-05, + "loss": 1.4744, + "step": 57534 + }, + { + "epoch": 0.69, + "grad_norm": 25.826937405025014, + "learning_rate": 1.5206136670580465e-05, + "loss": 1.9588, + "step": 57537 + }, + { + "epoch": 0.69, + "grad_norm": 5.240148371492539, + "learning_rate": 1.5205637894946193e-05, + "loss": 1.4095, + "step": 57540 + }, + { + "epoch": 0.69, + "grad_norm": 16.569518253649022, + "learning_rate": 1.5205139101546954e-05, + "loss": 1.3624, + "step": 57543 + }, + { + "epoch": 0.69, + "grad_norm": 3.7231551581905857, + "learning_rate": 1.520464029038445e-05, + "loss": 1.3632, + "step": 57546 + }, + { + "epoch": 0.69, + "grad_norm": 29.653622866541667, + "learning_rate": 1.5204141461460386e-05, + "loss": 1.3589, + "step": 57549 + }, + { + "epoch": 0.69, + "grad_norm": 3.899804931144466, + "learning_rate": 1.5203642614776462e-05, + "loss": 1.0122, + "step": 57552 + }, + { + "epoch": 0.69, + "grad_norm": 50.63499557783551, + "learning_rate": 1.520314375033438e-05, + "loss": 1.5081, + "step": 57555 + }, + { + "epoch": 0.69, + "grad_norm": 12.752968589434005, + "learning_rate": 1.520264486813584e-05, + "loss": 1.5439, + "step": 57558 + }, + { + "epoch": 0.69, + "grad_norm": 5.773141703876828, + "learning_rate": 1.5202145968182546e-05, + "loss": 1.4044, + "step": 57561 + }, + { + "epoch": 0.69, + "grad_norm": 24.47330787589, + "learning_rate": 1.5201647050476205e-05, + "loss": 1.1446, + "step": 57564 + }, + { + "epoch": 0.69, + "grad_norm": 15.576515798935725, + "learning_rate": 1.5201148115018511e-05, + "loss": 1.3709, + "step": 57567 + }, + { + "epoch": 0.69, + "grad_norm": 9.562577177175111, + "learning_rate": 1.5200649161811178e-05, + "loss": 1.3413, + "step": 57570 + }, + { + "epoch": 0.69, + "grad_norm": 10.466186134249735, + "learning_rate": 1.52001501908559e-05, + "loss": 1.3573, + "step": 57573 + }, + { + "epoch": 0.69, + "grad_norm": 6.587473902950932, + "learning_rate": 1.5199651202154385e-05, + "loss": 1.0727, + "step": 57576 + }, + { + "epoch": 0.69, + "grad_norm": 7.085083474389475, + "learning_rate": 1.5199152195708328e-05, + "loss": 1.3371, + "step": 57579 + }, + { + "epoch": 0.69, + "grad_norm": 19.676584203542795, + "learning_rate": 1.5198653171519442e-05, + "loss": 1.2485, + "step": 57582 + }, + { + "epoch": 0.69, + "grad_norm": 6.71962386801045, + "learning_rate": 1.5198154129589424e-05, + "loss": 1.561, + "step": 57585 + }, + { + "epoch": 0.69, + "grad_norm": 7.137711604800703, + "learning_rate": 1.5197655069919978e-05, + "loss": 1.3442, + "step": 57588 + }, + { + "epoch": 0.69, + "grad_norm": 10.698253134342524, + "learning_rate": 1.5197155992512807e-05, + "loss": 1.2325, + "step": 57591 + }, + { + "epoch": 0.69, + "grad_norm": 8.42823214502995, + "learning_rate": 1.5196656897369613e-05, + "loss": 1.2204, + "step": 57594 + }, + { + "epoch": 0.69, + "grad_norm": 10.171201623758494, + "learning_rate": 1.5196157784492105e-05, + "loss": 1.5798, + "step": 57597 + }, + { + "epoch": 0.69, + "grad_norm": 7.0465328591933245, + "learning_rate": 1.5195658653881979e-05, + "loss": 1.629, + "step": 57600 + }, + { + "epoch": 0.69, + "grad_norm": 25.43808835187158, + "learning_rate": 1.5195159505540945e-05, + "loss": 1.1617, + "step": 57603 + }, + { + "epoch": 0.69, + "grad_norm": 26.86219807069314, + "learning_rate": 1.51946603394707e-05, + "loss": 1.2041, + "step": 57606 + }, + { + "epoch": 0.69, + "grad_norm": 10.073159557385061, + "learning_rate": 1.519416115567295e-05, + "loss": 1.0992, + "step": 57609 + }, + { + "epoch": 0.69, + "grad_norm": 25.415628177913202, + "learning_rate": 1.51936619541494e-05, + "loss": 1.4335, + "step": 57612 + }, + { + "epoch": 0.69, + "grad_norm": 39.23256251700413, + "learning_rate": 1.5193162734901751e-05, + "loss": 1.3501, + "step": 57615 + }, + { + "epoch": 0.69, + "grad_norm": 15.487372352551558, + "learning_rate": 1.519266349793171e-05, + "loss": 1.3037, + "step": 57618 + }, + { + "epoch": 0.69, + "grad_norm": 13.145957910289443, + "learning_rate": 1.519216424324098e-05, + "loss": 1.4444, + "step": 57621 + }, + { + "epoch": 0.69, + "grad_norm": 19.652120822980816, + "learning_rate": 1.5191664970831262e-05, + "loss": 1.3734, + "step": 57624 + }, + { + "epoch": 0.69, + "grad_norm": 41.83784283339762, + "learning_rate": 1.5191165680704259e-05, + "loss": 1.5063, + "step": 57627 + }, + { + "epoch": 0.69, + "grad_norm": 10.795599004151411, + "learning_rate": 1.519066637286168e-05, + "loss": 1.8409, + "step": 57630 + }, + { + "epoch": 0.69, + "grad_norm": 47.52856579528045, + "learning_rate": 1.5190167047305227e-05, + "loss": 1.5342, + "step": 57633 + }, + { + "epoch": 0.69, + "grad_norm": 35.65140375870914, + "learning_rate": 1.5189667704036602e-05, + "loss": 1.2855, + "step": 57636 + }, + { + "epoch": 0.69, + "grad_norm": 25.363799997193034, + "learning_rate": 1.5189168343057513e-05, + "loss": 1.2879, + "step": 57639 + }, + { + "epoch": 0.69, + "grad_norm": 3.1841351650206042, + "learning_rate": 1.5188668964369657e-05, + "loss": 1.5352, + "step": 57642 + }, + { + "epoch": 0.69, + "grad_norm": 18.245669510607204, + "learning_rate": 1.5188169567974744e-05, + "loss": 1.5191, + "step": 57645 + }, + { + "epoch": 0.69, + "grad_norm": 14.253273068145674, + "learning_rate": 1.5187670153874478e-05, + "loss": 1.6892, + "step": 57648 + }, + { + "epoch": 0.69, + "grad_norm": 9.202130122777866, + "learning_rate": 1.5187170722070565e-05, + "loss": 1.1907, + "step": 57651 + }, + { + "epoch": 0.69, + "grad_norm": 20.85983290889995, + "learning_rate": 1.5186671272564701e-05, + "loss": 1.1515, + "step": 57654 + }, + { + "epoch": 0.69, + "grad_norm": 14.251423341828948, + "learning_rate": 1.51861718053586e-05, + "loss": 1.2496, + "step": 57657 + }, + { + "epoch": 0.69, + "grad_norm": 19.900583415664382, + "learning_rate": 1.5185672320453963e-05, + "loss": 1.3403, + "step": 57660 + }, + { + "epoch": 0.69, + "grad_norm": 5.347313743689829, + "learning_rate": 1.5185172817852496e-05, + "loss": 1.5893, + "step": 57663 + }, + { + "epoch": 0.69, + "grad_norm": 11.677155246642092, + "learning_rate": 1.5184673297555898e-05, + "loss": 1.1274, + "step": 57666 + }, + { + "epoch": 0.69, + "grad_norm": 18.839191482689298, + "learning_rate": 1.518417375956588e-05, + "loss": 1.7854, + "step": 57669 + }, + { + "epoch": 0.69, + "grad_norm": 9.248229455445335, + "learning_rate": 1.5183674203884143e-05, + "loss": 1.3249, + "step": 57672 + }, + { + "epoch": 0.69, + "grad_norm": 10.599260728694457, + "learning_rate": 1.5183174630512393e-05, + "loss": 1.6955, + "step": 57675 + }, + { + "epoch": 0.69, + "grad_norm": 21.8604010116449, + "learning_rate": 1.5182675039452334e-05, + "loss": 1.3018, + "step": 57678 + }, + { + "epoch": 0.69, + "grad_norm": 20.064889892294968, + "learning_rate": 1.5182175430705673e-05, + "loss": 1.1974, + "step": 57681 + }, + { + "epoch": 0.69, + "grad_norm": 9.19350147031032, + "learning_rate": 1.5181675804274117e-05, + "loss": 1.2809, + "step": 57684 + }, + { + "epoch": 0.69, + "grad_norm": 13.386295918194234, + "learning_rate": 1.518117616015936e-05, + "loss": 1.4139, + "step": 57687 + }, + { + "epoch": 0.69, + "grad_norm": 13.539257478556403, + "learning_rate": 1.5180676498363123e-05, + "loss": 1.4497, + "step": 57690 + }, + { + "epoch": 0.69, + "grad_norm": 14.131660832400668, + "learning_rate": 1.51801768188871e-05, + "loss": 1.1504, + "step": 57693 + }, + { + "epoch": 0.69, + "grad_norm": 8.219971742055723, + "learning_rate": 1.5179677121733e-05, + "loss": 1.372, + "step": 57696 + }, + { + "epoch": 0.69, + "grad_norm": 12.021480886069867, + "learning_rate": 1.5179177406902526e-05, + "loss": 1.3227, + "step": 57699 + }, + { + "epoch": 0.69, + "grad_norm": 6.881518413783922, + "learning_rate": 1.5178677674397387e-05, + "loss": 1.3911, + "step": 57702 + }, + { + "epoch": 0.69, + "grad_norm": 4.305986216167128, + "learning_rate": 1.5178177924219286e-05, + "loss": 1.3157, + "step": 57705 + }, + { + "epoch": 0.69, + "grad_norm": 30.311695408570245, + "learning_rate": 1.5177678156369926e-05, + "loss": 1.0744, + "step": 57708 + }, + { + "epoch": 0.69, + "grad_norm": 5.169973447223976, + "learning_rate": 1.517717837085102e-05, + "loss": 1.3545, + "step": 57711 + }, + { + "epoch": 0.69, + "grad_norm": 6.181918150179497, + "learning_rate": 1.5176678567664268e-05, + "loss": 1.7111, + "step": 57714 + }, + { + "epoch": 0.69, + "grad_norm": 30.19149466758898, + "learning_rate": 1.5176178746811374e-05, + "loss": 1.5138, + "step": 57717 + }, + { + "epoch": 0.69, + "grad_norm": 11.44636651358618, + "learning_rate": 1.5175678908294047e-05, + "loss": 1.1237, + "step": 57720 + }, + { + "epoch": 0.69, + "grad_norm": 14.739079644312243, + "learning_rate": 1.5175179052113996e-05, + "loss": 1.4528, + "step": 57723 + }, + { + "epoch": 0.69, + "grad_norm": 13.264335589347533, + "learning_rate": 1.517467917827292e-05, + "loss": 0.9393, + "step": 57726 + }, + { + "epoch": 0.69, + "grad_norm": 50.19366481371696, + "learning_rate": 1.5174179286772525e-05, + "loss": 1.6039, + "step": 57729 + }, + { + "epoch": 0.69, + "grad_norm": 14.490211184618454, + "learning_rate": 1.5173679377614523e-05, + "loss": 1.3545, + "step": 57732 + }, + { + "epoch": 0.69, + "grad_norm": 21.22520730175006, + "learning_rate": 1.5173179450800618e-05, + "loss": 1.4771, + "step": 57735 + }, + { + "epoch": 0.69, + "grad_norm": 13.698270753690888, + "learning_rate": 1.5172679506332513e-05, + "loss": 0.9707, + "step": 57738 + }, + { + "epoch": 0.69, + "grad_norm": 20.833295002236053, + "learning_rate": 1.5172179544211915e-05, + "loss": 1.5928, + "step": 57741 + }, + { + "epoch": 0.69, + "grad_norm": 9.816782164525415, + "learning_rate": 1.5171679564440535e-05, + "loss": 1.2887, + "step": 57744 + }, + { + "epoch": 0.69, + "grad_norm": 17.913141960753567, + "learning_rate": 1.5171179567020073e-05, + "loss": 1.7367, + "step": 57747 + }, + { + "epoch": 0.69, + "grad_norm": 11.94911350607636, + "learning_rate": 1.5170679551952238e-05, + "loss": 1.3914, + "step": 57750 + }, + { + "epoch": 0.69, + "grad_norm": 13.63354750603952, + "learning_rate": 1.5170179519238737e-05, + "loss": 1.1441, + "step": 57753 + }, + { + "epoch": 0.69, + "grad_norm": 28.47686239614465, + "learning_rate": 1.5169679468881275e-05, + "loss": 1.4534, + "step": 57756 + }, + { + "epoch": 0.69, + "grad_norm": 4.298046762892068, + "learning_rate": 1.5169179400881559e-05, + "loss": 1.1847, + "step": 57759 + }, + { + "epoch": 0.69, + "grad_norm": 8.116679759646404, + "learning_rate": 1.5168679315241294e-05, + "loss": 1.7089, + "step": 57762 + }, + { + "epoch": 0.69, + "grad_norm": 10.608170087868789, + "learning_rate": 1.5168179211962191e-05, + "loss": 1.4139, + "step": 57765 + }, + { + "epoch": 0.69, + "grad_norm": 5.938959758995824, + "learning_rate": 1.5167679091045955e-05, + "loss": 1.7926, + "step": 57768 + }, + { + "epoch": 0.69, + "grad_norm": 6.3963234636794954, + "learning_rate": 1.5167178952494288e-05, + "loss": 1.2715, + "step": 57771 + }, + { + "epoch": 0.69, + "grad_norm": 21.988431711298247, + "learning_rate": 1.51666787963089e-05, + "loss": 1.3792, + "step": 57774 + }, + { + "epoch": 0.69, + "grad_norm": 26.29273872322942, + "learning_rate": 1.5166178622491501e-05, + "loss": 1.6887, + "step": 57777 + }, + { + "epoch": 0.69, + "grad_norm": 36.73642083370646, + "learning_rate": 1.5165678431043796e-05, + "loss": 1.2861, + "step": 57780 + }, + { + "epoch": 0.69, + "grad_norm": 10.169574562864707, + "learning_rate": 1.516517822196749e-05, + "loss": 1.1535, + "step": 57783 + }, + { + "epoch": 0.69, + "grad_norm": 47.476714230495546, + "learning_rate": 1.516467799526429e-05, + "loss": 1.1948, + "step": 57786 + }, + { + "epoch": 0.69, + "grad_norm": 5.0067016807665246, + "learning_rate": 1.5164177750935904e-05, + "loss": 1.5296, + "step": 57789 + }, + { + "epoch": 0.69, + "grad_norm": 16.235126003228583, + "learning_rate": 1.516367748898404e-05, + "loss": 1.2372, + "step": 57792 + }, + { + "epoch": 0.69, + "grad_norm": 10.931690407890315, + "learning_rate": 1.5163177209410407e-05, + "loss": 1.5659, + "step": 57795 + }, + { + "epoch": 0.7, + "grad_norm": 33.337217393266734, + "learning_rate": 1.5162676912216707e-05, + "loss": 1.8285, + "step": 57798 + }, + { + "epoch": 0.7, + "grad_norm": 3.744690536838736, + "learning_rate": 1.5162176597404651e-05, + "loss": 1.2114, + "step": 57801 + }, + { + "epoch": 0.7, + "grad_norm": 19.290671338537532, + "learning_rate": 1.5161676264975948e-05, + "loss": 1.5053, + "step": 57804 + }, + { + "epoch": 0.7, + "grad_norm": 3.2914016628346707, + "learning_rate": 1.51611759149323e-05, + "loss": 1.331, + "step": 57807 + }, + { + "epoch": 0.7, + "grad_norm": 11.527850194566843, + "learning_rate": 1.5160675547275421e-05, + "loss": 1.1185, + "step": 57810 + }, + { + "epoch": 0.7, + "grad_norm": 11.297760501366081, + "learning_rate": 1.5160175162007014e-05, + "loss": 1.2028, + "step": 57813 + }, + { + "epoch": 0.7, + "grad_norm": 8.999059727236178, + "learning_rate": 1.5159674759128783e-05, + "loss": 1.3471, + "step": 57816 + }, + { + "epoch": 0.7, + "grad_norm": 11.818581587384383, + "learning_rate": 1.5159174338642447e-05, + "loss": 1.4525, + "step": 57819 + }, + { + "epoch": 0.7, + "grad_norm": 6.368079215724411, + "learning_rate": 1.5158673900549706e-05, + "loss": 1.4026, + "step": 57822 + }, + { + "epoch": 0.7, + "grad_norm": 7.002937115806137, + "learning_rate": 1.5158173444852269e-05, + "loss": 1.5868, + "step": 57825 + }, + { + "epoch": 0.7, + "grad_norm": 18.90003268802696, + "learning_rate": 1.515767297155184e-05, + "loss": 1.7087, + "step": 57828 + }, + { + "epoch": 0.7, + "grad_norm": 142.59577452856587, + "learning_rate": 1.5157172480650135e-05, + "loss": 1.3253, + "step": 57831 + }, + { + "epoch": 0.7, + "grad_norm": 72.52680092029222, + "learning_rate": 1.5156671972148857e-05, + "loss": 1.5526, + "step": 57834 + }, + { + "epoch": 0.7, + "grad_norm": 29.690734261167208, + "learning_rate": 1.5156171446049715e-05, + "loss": 1.3905, + "step": 57837 + }, + { + "epoch": 0.7, + "grad_norm": 10.696800844399597, + "learning_rate": 1.5155670902354418e-05, + "loss": 1.2678, + "step": 57840 + }, + { + "epoch": 0.7, + "grad_norm": 33.96560957398711, + "learning_rate": 1.5155170341064672e-05, + "loss": 1.0993, + "step": 57843 + }, + { + "epoch": 0.7, + "grad_norm": 13.133373375740431, + "learning_rate": 1.5154669762182188e-05, + "loss": 1.3414, + "step": 57846 + }, + { + "epoch": 0.7, + "grad_norm": 10.08655996345318, + "learning_rate": 1.515416916570867e-05, + "loss": 1.5482, + "step": 57849 + }, + { + "epoch": 0.7, + "grad_norm": 20.79639970619863, + "learning_rate": 1.515366855164583e-05, + "loss": 1.4911, + "step": 57852 + }, + { + "epoch": 0.7, + "grad_norm": 12.257894143719003, + "learning_rate": 1.5153167919995377e-05, + "loss": 1.4225, + "step": 57855 + }, + { + "epoch": 0.7, + "grad_norm": 8.614020271706446, + "learning_rate": 1.5152667270759018e-05, + "loss": 1.3237, + "step": 57858 + }, + { + "epoch": 0.7, + "grad_norm": 19.2526316995266, + "learning_rate": 1.5152166603938461e-05, + "loss": 1.3286, + "step": 57861 + }, + { + "epoch": 0.7, + "grad_norm": 3.4653319923259143, + "learning_rate": 1.5151665919535414e-05, + "loss": 1.3783, + "step": 57864 + }, + { + "epoch": 0.7, + "grad_norm": 17.168139908298617, + "learning_rate": 1.5151165217551589e-05, + "loss": 1.6092, + "step": 57867 + }, + { + "epoch": 0.7, + "grad_norm": 17.009874463060473, + "learning_rate": 1.515066449798869e-05, + "loss": 1.5491, + "step": 57870 + }, + { + "epoch": 0.7, + "grad_norm": 11.382779267305622, + "learning_rate": 1.515016376084843e-05, + "loss": 1.1015, + "step": 57873 + }, + { + "epoch": 0.7, + "grad_norm": 2.573122699493205, + "learning_rate": 1.5149663006132516e-05, + "loss": 1.5488, + "step": 57876 + }, + { + "epoch": 0.7, + "grad_norm": 8.31799292072717, + "learning_rate": 1.5149162233842657e-05, + "loss": 1.5742, + "step": 57879 + }, + { + "epoch": 0.7, + "grad_norm": 23.54086059548308, + "learning_rate": 1.5148661443980564e-05, + "loss": 1.7721, + "step": 57882 + }, + { + "epoch": 0.7, + "grad_norm": 10.31157124207863, + "learning_rate": 1.5148160636547942e-05, + "loss": 1.2833, + "step": 57885 + }, + { + "epoch": 0.7, + "grad_norm": 21.52531901104724, + "learning_rate": 1.5147659811546502e-05, + "loss": 2.0994, + "step": 57888 + }, + { + "epoch": 0.7, + "grad_norm": 18.58802324126777, + "learning_rate": 1.5147158968977951e-05, + "loss": 1.5022, + "step": 57891 + }, + { + "epoch": 0.7, + "grad_norm": 11.91170078337054, + "learning_rate": 1.5146658108844005e-05, + "loss": 1.0593, + "step": 57894 + }, + { + "epoch": 0.7, + "grad_norm": 46.616083301710155, + "learning_rate": 1.5146157231146368e-05, + "loss": 1.4105, + "step": 57897 + }, + { + "epoch": 0.7, + "grad_norm": 2.8987293416373996, + "learning_rate": 1.5145656335886747e-05, + "loss": 1.5638, + "step": 57900 + }, + { + "epoch": 0.7, + "grad_norm": 9.12677472605049, + "learning_rate": 1.5145155423066856e-05, + "loss": 1.6956, + "step": 57903 + }, + { + "epoch": 0.7, + "grad_norm": 81.75594128068671, + "learning_rate": 1.5144654492688405e-05, + "loss": 1.1357, + "step": 57906 + }, + { + "epoch": 0.7, + "grad_norm": 7.364453629656348, + "learning_rate": 1.5144153544753097e-05, + "loss": 1.7473, + "step": 57909 + }, + { + "epoch": 0.7, + "grad_norm": 9.910149218267723, + "learning_rate": 1.5143652579262648e-05, + "loss": 1.4241, + "step": 57912 + }, + { + "epoch": 0.7, + "grad_norm": 15.154520483701466, + "learning_rate": 1.5143151596218765e-05, + "loss": 1.1512, + "step": 57915 + }, + { + "epoch": 0.7, + "grad_norm": 13.12685170049671, + "learning_rate": 1.514265059562316e-05, + "loss": 1.4972, + "step": 57918 + }, + { + "epoch": 0.7, + "grad_norm": 6.096048143968076, + "learning_rate": 1.514214957747754e-05, + "loss": 1.0274, + "step": 57921 + }, + { + "epoch": 0.7, + "grad_norm": 25.97649693545719, + "learning_rate": 1.5141648541783614e-05, + "loss": 1.0939, + "step": 57924 + }, + { + "epoch": 0.7, + "grad_norm": 24.837331303215382, + "learning_rate": 1.5141147488543095e-05, + "loss": 1.1519, + "step": 57927 + }, + { + "epoch": 0.7, + "grad_norm": 5.448672349507669, + "learning_rate": 1.514064641775769e-05, + "loss": 1.3207, + "step": 57930 + }, + { + "epoch": 0.7, + "grad_norm": 18.96303900573141, + "learning_rate": 1.5140145329429112e-05, + "loss": 1.2573, + "step": 57933 + }, + { + "epoch": 0.7, + "grad_norm": 7.475800525606251, + "learning_rate": 1.513964422355907e-05, + "loss": 1.297, + "step": 57936 + }, + { + "epoch": 0.7, + "grad_norm": 17.33718215186402, + "learning_rate": 1.5139143100149274e-05, + "loss": 0.8888, + "step": 57939 + }, + { + "epoch": 0.7, + "grad_norm": 12.410636921262109, + "learning_rate": 1.5138641959201433e-05, + "loss": 1.4303, + "step": 57942 + }, + { + "epoch": 0.7, + "grad_norm": 8.473453385569359, + "learning_rate": 1.5138140800717253e-05, + "loss": 1.5988, + "step": 57945 + }, + { + "epoch": 0.7, + "grad_norm": 7.967207747431273, + "learning_rate": 1.5137639624698458e-05, + "loss": 1.8824, + "step": 57948 + }, + { + "epoch": 0.7, + "grad_norm": 32.84128597592939, + "learning_rate": 1.5137138431146743e-05, + "loss": 1.5336, + "step": 57951 + }, + { + "epoch": 0.7, + "grad_norm": 7.787862806371088, + "learning_rate": 1.5136637220063826e-05, + "loss": 1.2846, + "step": 57954 + }, + { + "epoch": 0.7, + "grad_norm": 31.237699408267265, + "learning_rate": 1.5136135991451417e-05, + "loss": 1.5027, + "step": 57957 + }, + { + "epoch": 0.7, + "grad_norm": 26.49779391174224, + "learning_rate": 1.513563474531123e-05, + "loss": 1.8596, + "step": 57960 + }, + { + "epoch": 0.7, + "grad_norm": 24.746107000991667, + "learning_rate": 1.5135133481644966e-05, + "loss": 1.0957, + "step": 57963 + }, + { + "epoch": 0.7, + "grad_norm": 9.898156171057556, + "learning_rate": 1.5134632200454342e-05, + "loss": 1.3344, + "step": 57966 + }, + { + "epoch": 0.7, + "grad_norm": 18.594176406889567, + "learning_rate": 1.5134130901741069e-05, + "loss": 1.0742, + "step": 57969 + }, + { + "epoch": 0.7, + "grad_norm": 13.463115234126187, + "learning_rate": 1.5133629585506856e-05, + "loss": 1.6068, + "step": 57972 + }, + { + "epoch": 0.7, + "grad_norm": 7.078739920532976, + "learning_rate": 1.5133128251753416e-05, + "loss": 1.4055, + "step": 57975 + }, + { + "epoch": 0.7, + "grad_norm": 22.91946598726227, + "learning_rate": 1.5132626900482455e-05, + "loss": 1.1938, + "step": 57978 + }, + { + "epoch": 0.7, + "grad_norm": 4.981563019787267, + "learning_rate": 1.513212553169569e-05, + "loss": 1.2906, + "step": 57981 + }, + { + "epoch": 0.7, + "grad_norm": 26.80226984267489, + "learning_rate": 1.5131624145394828e-05, + "loss": 1.2111, + "step": 57984 + }, + { + "epoch": 0.7, + "grad_norm": 22.855616920143724, + "learning_rate": 1.513112274158158e-05, + "loss": 1.3618, + "step": 57987 + }, + { + "epoch": 0.7, + "grad_norm": 31.504271485825125, + "learning_rate": 1.5130621320257656e-05, + "loss": 1.273, + "step": 57990 + }, + { + "epoch": 0.7, + "grad_norm": 37.624937823352575, + "learning_rate": 1.5130119881424778e-05, + "loss": 1.3784, + "step": 57993 + }, + { + "epoch": 0.7, + "grad_norm": 12.583970586454477, + "learning_rate": 1.5129618425084642e-05, + "loss": 1.6674, + "step": 57996 + }, + { + "epoch": 0.7, + "grad_norm": 15.239841945755451, + "learning_rate": 1.5129116951238967e-05, + "loss": 1.7564, + "step": 57999 + }, + { + "epoch": 0.7, + "grad_norm": 16.060874281202906, + "learning_rate": 1.5128615459889466e-05, + "loss": 1.2024, + "step": 58002 + }, + { + "epoch": 0.7, + "grad_norm": 14.762078342253373, + "learning_rate": 1.5128113951037843e-05, + "loss": 1.2086, + "step": 58005 + }, + { + "epoch": 0.7, + "grad_norm": 31.38962198892644, + "learning_rate": 1.5127612424685818e-05, + "loss": 1.1829, + "step": 58008 + }, + { + "epoch": 0.7, + "grad_norm": 22.028771943398706, + "learning_rate": 1.5127110880835097e-05, + "loss": 1.3172, + "step": 58011 + }, + { + "epoch": 0.7, + "grad_norm": 11.174596648048357, + "learning_rate": 1.5126609319487395e-05, + "loss": 1.1636, + "step": 58014 + }, + { + "epoch": 0.7, + "grad_norm": 10.158205016535495, + "learning_rate": 1.512610774064442e-05, + "loss": 1.0393, + "step": 58017 + }, + { + "epoch": 0.7, + "grad_norm": 20.108288945037472, + "learning_rate": 1.5125606144307888e-05, + "loss": 1.1029, + "step": 58020 + }, + { + "epoch": 0.7, + "grad_norm": 7.316872288966654, + "learning_rate": 1.5125104530479506e-05, + "loss": 1.004, + "step": 58023 + }, + { + "epoch": 0.7, + "grad_norm": 13.317797465359616, + "learning_rate": 1.512460289916099e-05, + "loss": 1.398, + "step": 58026 + }, + { + "epoch": 0.7, + "grad_norm": 46.68638739073584, + "learning_rate": 1.5124101250354048e-05, + "loss": 1.5282, + "step": 58029 + }, + { + "epoch": 0.7, + "grad_norm": 18.471982574763533, + "learning_rate": 1.5123599584060396e-05, + "loss": 1.1909, + "step": 58032 + }, + { + "epoch": 0.7, + "grad_norm": 6.0638247857445045, + "learning_rate": 1.5123097900281747e-05, + "loss": 1.6926, + "step": 58035 + }, + { + "epoch": 0.7, + "grad_norm": 26.461524334972573, + "learning_rate": 1.5122596199019805e-05, + "loss": 1.0661, + "step": 58038 + }, + { + "epoch": 0.7, + "grad_norm": 21.788673889269496, + "learning_rate": 1.5122094480276291e-05, + "loss": 1.5355, + "step": 58041 + }, + { + "epoch": 0.7, + "grad_norm": 37.6008922723845, + "learning_rate": 1.512159274405291e-05, + "loss": 1.4832, + "step": 58044 + }, + { + "epoch": 0.7, + "grad_norm": 58.60950464969554, + "learning_rate": 1.5121090990351382e-05, + "loss": 1.5224, + "step": 58047 + }, + { + "epoch": 0.7, + "grad_norm": 28.228168603626877, + "learning_rate": 1.5120589219173411e-05, + "loss": 1.1219, + "step": 58050 + }, + { + "epoch": 0.7, + "grad_norm": 6.379626878123164, + "learning_rate": 1.5120087430520713e-05, + "loss": 1.2085, + "step": 58053 + }, + { + "epoch": 0.7, + "grad_norm": 4.874840534249952, + "learning_rate": 1.5119585624395008e-05, + "loss": 1.3419, + "step": 58056 + }, + { + "epoch": 0.7, + "grad_norm": 34.347481172892834, + "learning_rate": 1.5119083800797993e-05, + "loss": 1.2378, + "step": 58059 + }, + { + "epoch": 0.7, + "grad_norm": 19.647308898648465, + "learning_rate": 1.5118581959731393e-05, + "loss": 1.3397, + "step": 58062 + }, + { + "epoch": 0.7, + "grad_norm": 23.37194819087754, + "learning_rate": 1.5118080101196917e-05, + "loss": 1.872, + "step": 58065 + }, + { + "epoch": 0.7, + "grad_norm": 6.899113680453129, + "learning_rate": 1.5117578225196274e-05, + "loss": 1.2498, + "step": 58068 + }, + { + "epoch": 0.7, + "grad_norm": 91.05002265303104, + "learning_rate": 1.5117076331731181e-05, + "loss": 1.1235, + "step": 58071 + }, + { + "epoch": 0.7, + "grad_norm": 9.486756183989351, + "learning_rate": 1.5116574420803352e-05, + "loss": 1.686, + "step": 58074 + }, + { + "epoch": 0.7, + "grad_norm": 12.448331395077343, + "learning_rate": 1.5116072492414495e-05, + "loss": 1.5667, + "step": 58077 + }, + { + "epoch": 0.7, + "grad_norm": 7.470732188138201, + "learning_rate": 1.5115570546566325e-05, + "loss": 1.4135, + "step": 58080 + }, + { + "epoch": 0.7, + "grad_norm": 21.14834208772842, + "learning_rate": 1.511506858326056e-05, + "loss": 1.8773, + "step": 58083 + }, + { + "epoch": 0.7, + "grad_norm": 7.085340482047785, + "learning_rate": 1.5114566602498904e-05, + "loss": 1.1057, + "step": 58086 + }, + { + "epoch": 0.7, + "grad_norm": 7.9424737023090906, + "learning_rate": 1.5114064604283076e-05, + "loss": 1.3401, + "step": 58089 + }, + { + "epoch": 0.7, + "grad_norm": 90.40076418939718, + "learning_rate": 1.5113562588614786e-05, + "loss": 1.5599, + "step": 58092 + }, + { + "epoch": 0.7, + "grad_norm": 16.167048179200467, + "learning_rate": 1.5113060555495752e-05, + "loss": 1.1355, + "step": 58095 + }, + { + "epoch": 0.7, + "grad_norm": 121.72729081888056, + "learning_rate": 1.5112558504927682e-05, + "loss": 1.4712, + "step": 58098 + }, + { + "epoch": 0.7, + "grad_norm": 5.278732864532598, + "learning_rate": 1.5112056436912288e-05, + "loss": 1.2416, + "step": 58101 + }, + { + "epoch": 0.7, + "grad_norm": 4.65333828068857, + "learning_rate": 1.511155435145129e-05, + "loss": 1.5738, + "step": 58104 + }, + { + "epoch": 0.7, + "grad_norm": 17.8242480689658, + "learning_rate": 1.5111052248546398e-05, + "loss": 1.2177, + "step": 58107 + }, + { + "epoch": 0.7, + "grad_norm": 14.871758146486485, + "learning_rate": 1.5110550128199326e-05, + "loss": 1.1807, + "step": 58110 + }, + { + "epoch": 0.7, + "grad_norm": 19.69908091245115, + "learning_rate": 1.5110047990411788e-05, + "loss": 1.9223, + "step": 58113 + }, + { + "epoch": 0.7, + "grad_norm": 34.20658966719913, + "learning_rate": 1.5109545835185495e-05, + "loss": 1.4038, + "step": 58116 + }, + { + "epoch": 0.7, + "grad_norm": 22.163023811035714, + "learning_rate": 1.5109043662522163e-05, + "loss": 1.6455, + "step": 58119 + }, + { + "epoch": 0.7, + "grad_norm": 16.202487212183545, + "learning_rate": 1.5108541472423505e-05, + "loss": 1.711, + "step": 58122 + }, + { + "epoch": 0.7, + "grad_norm": 23.793950816503482, + "learning_rate": 1.5108039264891235e-05, + "loss": 1.2037, + "step": 58125 + }, + { + "epoch": 0.7, + "grad_norm": 17.410071099317488, + "learning_rate": 1.510753703992707e-05, + "loss": 1.7188, + "step": 58128 + }, + { + "epoch": 0.7, + "grad_norm": 12.154296699906045, + "learning_rate": 1.5107034797532719e-05, + "loss": 1.4503, + "step": 58131 + }, + { + "epoch": 0.7, + "grad_norm": 7.934192483076258, + "learning_rate": 1.5106532537709894e-05, + "loss": 1.1573, + "step": 58134 + }, + { + "epoch": 0.7, + "grad_norm": 5.283931794919958, + "learning_rate": 1.5106030260460318e-05, + "loss": 1.7763, + "step": 58137 + }, + { + "epoch": 0.7, + "grad_norm": 12.084064800522317, + "learning_rate": 1.5105527965785696e-05, + "loss": 1.5435, + "step": 58140 + }, + { + "epoch": 0.7, + "grad_norm": 7.097537829222933, + "learning_rate": 1.5105025653687748e-05, + "loss": 1.1827, + "step": 58143 + }, + { + "epoch": 0.7, + "grad_norm": 11.140450518235006, + "learning_rate": 1.5104523324168185e-05, + "loss": 1.4547, + "step": 58146 + }, + { + "epoch": 0.7, + "grad_norm": 8.947086302320901, + "learning_rate": 1.5104020977228726e-05, + "loss": 1.4099, + "step": 58149 + }, + { + "epoch": 0.7, + "grad_norm": 24.971803878081587, + "learning_rate": 1.510351861287108e-05, + "loss": 1.3213, + "step": 58152 + }, + { + "epoch": 0.7, + "grad_norm": 33.2776793318235, + "learning_rate": 1.5103016231096963e-05, + "loss": 1.4473, + "step": 58155 + }, + { + "epoch": 0.7, + "grad_norm": 8.118965925116875, + "learning_rate": 1.5102513831908088e-05, + "loss": 1.4692, + "step": 58158 + }, + { + "epoch": 0.7, + "grad_norm": 16.398838382410045, + "learning_rate": 1.5102011415306172e-05, + "loss": 1.3489, + "step": 58161 + }, + { + "epoch": 0.7, + "grad_norm": 13.272053127841437, + "learning_rate": 1.5101508981292932e-05, + "loss": 1.4924, + "step": 58164 + }, + { + "epoch": 0.7, + "grad_norm": 13.537537097302312, + "learning_rate": 1.5101006529870074e-05, + "loss": 1.8635, + "step": 58167 + }, + { + "epoch": 0.7, + "grad_norm": 29.455150208865675, + "learning_rate": 1.510050406103932e-05, + "loss": 1.3521, + "step": 58170 + }, + { + "epoch": 0.7, + "grad_norm": 41.70958618600157, + "learning_rate": 1.5100001574802386e-05, + "loss": 1.572, + "step": 58173 + }, + { + "epoch": 0.7, + "grad_norm": 12.91868012410547, + "learning_rate": 1.5099499071160982e-05, + "loss": 1.2032, + "step": 58176 + }, + { + "epoch": 0.7, + "grad_norm": 12.46287755818609, + "learning_rate": 1.5098996550116823e-05, + "loss": 1.6901, + "step": 58179 + }, + { + "epoch": 0.7, + "grad_norm": 3.587521215958995, + "learning_rate": 1.5098494011671628e-05, + "loss": 1.3927, + "step": 58182 + }, + { + "epoch": 0.7, + "grad_norm": 56.353034264289356, + "learning_rate": 1.5097991455827105e-05, + "loss": 1.3879, + "step": 58185 + }, + { + "epoch": 0.7, + "grad_norm": 41.990889181796646, + "learning_rate": 1.5097488882584976e-05, + "loss": 1.5025, + "step": 58188 + }, + { + "epoch": 0.7, + "grad_norm": 14.151005567144406, + "learning_rate": 1.5096986291946954e-05, + "loss": 1.2587, + "step": 58191 + }, + { + "epoch": 0.7, + "grad_norm": 8.360190882687476, + "learning_rate": 1.5096483683914756e-05, + "loss": 1.3614, + "step": 58194 + }, + { + "epoch": 0.7, + "grad_norm": 9.448956245741686, + "learning_rate": 1.5095981058490092e-05, + "loss": 1.3662, + "step": 58197 + }, + { + "epoch": 0.7, + "grad_norm": 21.355322830784882, + "learning_rate": 1.5095478415674678e-05, + "loss": 1.8936, + "step": 58200 + }, + { + "epoch": 0.7, + "grad_norm": 18.35094722553124, + "learning_rate": 1.5094975755470236e-05, + "loss": 1.4313, + "step": 58203 + }, + { + "epoch": 0.7, + "grad_norm": 13.583379134523945, + "learning_rate": 1.5094473077878476e-05, + "loss": 1.2888, + "step": 58206 + }, + { + "epoch": 0.7, + "grad_norm": 8.563692410711248, + "learning_rate": 1.5093970382901113e-05, + "loss": 1.0798, + "step": 58209 + }, + { + "epoch": 0.7, + "grad_norm": 9.96829756786273, + "learning_rate": 1.5093467670539863e-05, + "loss": 1.7865, + "step": 58212 + }, + { + "epoch": 0.7, + "grad_norm": 52.32427699181972, + "learning_rate": 1.5092964940796444e-05, + "loss": 1.5745, + "step": 58215 + }, + { + "epoch": 0.7, + "grad_norm": 8.238695242007033, + "learning_rate": 1.509246219367257e-05, + "loss": 1.144, + "step": 58218 + }, + { + "epoch": 0.7, + "grad_norm": 15.194895100689573, + "learning_rate": 1.5091959429169958e-05, + "loss": 1.5437, + "step": 58221 + }, + { + "epoch": 0.7, + "grad_norm": 6.128581834952042, + "learning_rate": 1.509145664729032e-05, + "loss": 1.2866, + "step": 58224 + }, + { + "epoch": 0.7, + "grad_norm": 35.16261695682033, + "learning_rate": 1.5090953848035378e-05, + "loss": 1.2712, + "step": 58227 + }, + { + "epoch": 0.7, + "grad_norm": 9.490386185560752, + "learning_rate": 1.5090451031406841e-05, + "loss": 1.5682, + "step": 58230 + }, + { + "epoch": 0.7, + "grad_norm": 44.204238212294776, + "learning_rate": 1.508994819740643e-05, + "loss": 1.2999, + "step": 58233 + }, + { + "epoch": 0.7, + "grad_norm": 16.532776067086534, + "learning_rate": 1.5089445346035857e-05, + "loss": 1.5117, + "step": 58236 + }, + { + "epoch": 0.7, + "grad_norm": 11.435836568291517, + "learning_rate": 1.5088942477296842e-05, + "loss": 1.1768, + "step": 58239 + }, + { + "epoch": 0.7, + "grad_norm": 6.396443883453311, + "learning_rate": 1.5088439591191097e-05, + "loss": 1.5441, + "step": 58242 + }, + { + "epoch": 0.7, + "grad_norm": 6.647199622900113, + "learning_rate": 1.5087936687720346e-05, + "loss": 1.2257, + "step": 58245 + }, + { + "epoch": 0.7, + "grad_norm": 16.30560896072229, + "learning_rate": 1.5087433766886298e-05, + "loss": 1.3793, + "step": 58248 + }, + { + "epoch": 0.7, + "grad_norm": 22.483994286428953, + "learning_rate": 1.5086930828690669e-05, + "loss": 1.3656, + "step": 58251 + }, + { + "epoch": 0.7, + "grad_norm": 13.11852712110771, + "learning_rate": 1.5086427873135176e-05, + "loss": 1.4882, + "step": 58254 + }, + { + "epoch": 0.7, + "grad_norm": 12.20914395780112, + "learning_rate": 1.5085924900221537e-05, + "loss": 1.7346, + "step": 58257 + }, + { + "epoch": 0.7, + "grad_norm": 20.098335582651156, + "learning_rate": 1.5085421909951471e-05, + "loss": 1.6121, + "step": 58260 + }, + { + "epoch": 0.7, + "grad_norm": 9.535980742187046, + "learning_rate": 1.5084918902326693e-05, + "loss": 1.5035, + "step": 58263 + }, + { + "epoch": 0.7, + "grad_norm": 34.16401267179167, + "learning_rate": 1.5084415877348914e-05, + "loss": 1.3825, + "step": 58266 + }, + { + "epoch": 0.7, + "grad_norm": 14.348921111672599, + "learning_rate": 1.5083912835019856e-05, + "loss": 1.3217, + "step": 58269 + }, + { + "epoch": 0.7, + "grad_norm": 15.841663994076137, + "learning_rate": 1.5083409775341237e-05, + "loss": 1.2409, + "step": 58272 + }, + { + "epoch": 0.7, + "grad_norm": 95.69745254209703, + "learning_rate": 1.508290669831477e-05, + "loss": 1.8856, + "step": 58275 + }, + { + "epoch": 0.7, + "grad_norm": 11.78912933495902, + "learning_rate": 1.5082403603942175e-05, + "loss": 1.7179, + "step": 58278 + }, + { + "epoch": 0.7, + "grad_norm": 21.54494266627091, + "learning_rate": 1.5081900492225166e-05, + "loss": 1.1289, + "step": 58281 + }, + { + "epoch": 0.7, + "grad_norm": 8.249164638590267, + "learning_rate": 1.508139736316546e-05, + "loss": 1.402, + "step": 58284 + }, + { + "epoch": 0.7, + "grad_norm": 8.891802513231982, + "learning_rate": 1.5080894216764774e-05, + "loss": 1.4109, + "step": 58287 + }, + { + "epoch": 0.7, + "grad_norm": 13.946962642540292, + "learning_rate": 1.508039105302483e-05, + "loss": 1.349, + "step": 58290 + }, + { + "epoch": 0.7, + "grad_norm": 6.286056026194713, + "learning_rate": 1.5079887871947341e-05, + "loss": 1.3482, + "step": 58293 + }, + { + "epoch": 0.7, + "grad_norm": 8.534821187885044, + "learning_rate": 1.5079384673534021e-05, + "loss": 1.6387, + "step": 58296 + }, + { + "epoch": 0.7, + "grad_norm": 9.910157050988346, + "learning_rate": 1.5078881457786592e-05, + "loss": 1.8165, + "step": 58299 + }, + { + "epoch": 0.7, + "grad_norm": 14.396359100275783, + "learning_rate": 1.5078378224706771e-05, + "loss": 1.4195, + "step": 58302 + }, + { + "epoch": 0.7, + "grad_norm": 19.267124056763635, + "learning_rate": 1.5077874974296274e-05, + "loss": 1.0009, + "step": 58305 + }, + { + "epoch": 0.7, + "grad_norm": 8.624500166480312, + "learning_rate": 1.5077371706556819e-05, + "loss": 1.6064, + "step": 58308 + }, + { + "epoch": 0.7, + "grad_norm": 10.180064694536654, + "learning_rate": 1.507686842149012e-05, + "loss": 1.3758, + "step": 58311 + }, + { + "epoch": 0.7, + "grad_norm": 20.127457903437687, + "learning_rate": 1.5076365119097904e-05, + "loss": 1.5084, + "step": 58314 + }, + { + "epoch": 0.7, + "grad_norm": 5.141317418398838, + "learning_rate": 1.507586179938188e-05, + "loss": 1.0412, + "step": 58317 + }, + { + "epoch": 0.7, + "grad_norm": 9.308162124758681, + "learning_rate": 1.5075358462343762e-05, + "loss": 1.2159, + "step": 58320 + }, + { + "epoch": 0.7, + "grad_norm": 37.763947314982794, + "learning_rate": 1.507485510798528e-05, + "loss": 1.5732, + "step": 58323 + }, + { + "epoch": 0.7, + "grad_norm": 20.740857012357203, + "learning_rate": 1.5074351736308147e-05, + "loss": 1.3267, + "step": 58326 + }, + { + "epoch": 0.7, + "grad_norm": 17.333445643808233, + "learning_rate": 1.5073848347314074e-05, + "loss": 1.189, + "step": 58329 + }, + { + "epoch": 0.7, + "grad_norm": 26.814723732895427, + "learning_rate": 1.5073344941004785e-05, + "loss": 1.6754, + "step": 58332 + }, + { + "epoch": 0.7, + "grad_norm": 9.769336052658419, + "learning_rate": 1.5072841517382001e-05, + "loss": 1.4197, + "step": 58335 + }, + { + "epoch": 0.7, + "grad_norm": 12.604168891090053, + "learning_rate": 1.5072338076447434e-05, + "loss": 1.2082, + "step": 58338 + }, + { + "epoch": 0.7, + "grad_norm": 31.799842050976615, + "learning_rate": 1.5071834618202804e-05, + "loss": 1.3254, + "step": 58341 + }, + { + "epoch": 0.7, + "grad_norm": 40.7451976890028, + "learning_rate": 1.507133114264983e-05, + "loss": 1.2737, + "step": 58344 + }, + { + "epoch": 0.7, + "grad_norm": 8.288647444428769, + "learning_rate": 1.507082764979023e-05, + "loss": 1.3462, + "step": 58347 + }, + { + "epoch": 0.7, + "grad_norm": 26.673584199028657, + "learning_rate": 1.5070324139625717e-05, + "loss": 1.5189, + "step": 58350 + }, + { + "epoch": 0.7, + "grad_norm": 8.11598715323929, + "learning_rate": 1.5069820612158016e-05, + "loss": 1.0957, + "step": 58353 + }, + { + "epoch": 0.7, + "grad_norm": 32.556551834507296, + "learning_rate": 1.5069317067388847e-05, + "loss": 1.6335, + "step": 58356 + }, + { + "epoch": 0.7, + "grad_norm": 14.956103982977421, + "learning_rate": 1.5068813505319923e-05, + "loss": 1.5594, + "step": 58359 + }, + { + "epoch": 0.7, + "grad_norm": 20.81065855268539, + "learning_rate": 1.5068309925952964e-05, + "loss": 1.4023, + "step": 58362 + }, + { + "epoch": 0.7, + "grad_norm": 19.422844860520378, + "learning_rate": 1.506780632928969e-05, + "loss": 1.5189, + "step": 58365 + }, + { + "epoch": 0.7, + "grad_norm": 34.32236292031007, + "learning_rate": 1.5067302715331817e-05, + "loss": 1.266, + "step": 58368 + }, + { + "epoch": 0.7, + "grad_norm": 2.408917727382193, + "learning_rate": 1.5066799084081068e-05, + "loss": 1.046, + "step": 58371 + }, + { + "epoch": 0.7, + "grad_norm": 33.11218854735112, + "learning_rate": 1.5066295435539154e-05, + "loss": 1.3523, + "step": 58374 + }, + { + "epoch": 0.7, + "grad_norm": 16.82502146534403, + "learning_rate": 1.50657917697078e-05, + "loss": 1.6088, + "step": 58377 + }, + { + "epoch": 0.7, + "grad_norm": 9.575193359716122, + "learning_rate": 1.5065288086588726e-05, + "loss": 1.532, + "step": 58380 + }, + { + "epoch": 0.7, + "grad_norm": 18.38540995725583, + "learning_rate": 1.5064784386183644e-05, + "loss": 1.2197, + "step": 58383 + }, + { + "epoch": 0.7, + "grad_norm": 13.564363863118519, + "learning_rate": 1.5064280668494282e-05, + "loss": 1.3182, + "step": 58386 + }, + { + "epoch": 0.7, + "grad_norm": 24.359659716217028, + "learning_rate": 1.5063776933522349e-05, + "loss": 1.5873, + "step": 58389 + }, + { + "epoch": 0.7, + "grad_norm": 40.54713024496194, + "learning_rate": 1.5063273181269575e-05, + "loss": 1.4593, + "step": 58392 + }, + { + "epoch": 0.7, + "grad_norm": 44.091824739320316, + "learning_rate": 1.5062769411737668e-05, + "loss": 1.3182, + "step": 58395 + }, + { + "epoch": 0.7, + "grad_norm": 3.3565623469244468, + "learning_rate": 1.5062265624928358e-05, + "loss": 1.1105, + "step": 58398 + }, + { + "epoch": 0.7, + "grad_norm": 20.03132039749478, + "learning_rate": 1.5061761820843355e-05, + "loss": 1.3813, + "step": 58401 + }, + { + "epoch": 0.7, + "grad_norm": 11.385905598806207, + "learning_rate": 1.5061257999484382e-05, + "loss": 1.5128, + "step": 58404 + }, + { + "epoch": 0.7, + "grad_norm": 19.373796818841974, + "learning_rate": 1.506075416085316e-05, + "loss": 1.7444, + "step": 58407 + }, + { + "epoch": 0.7, + "grad_norm": 54.04404316273854, + "learning_rate": 1.5060250304951404e-05, + "loss": 1.2573, + "step": 58410 + }, + { + "epoch": 0.7, + "grad_norm": 15.305322039141139, + "learning_rate": 1.5059746431780842e-05, + "loss": 1.3812, + "step": 58413 + }, + { + "epoch": 0.7, + "grad_norm": 22.67568193724615, + "learning_rate": 1.5059242541343185e-05, + "loss": 1.3734, + "step": 58416 + }, + { + "epoch": 0.7, + "grad_norm": 26.417511714324103, + "learning_rate": 1.5058738633640158e-05, + "loss": 1.5395, + "step": 58419 + }, + { + "epoch": 0.7, + "grad_norm": 21.404674545979628, + "learning_rate": 1.5058234708673474e-05, + "loss": 1.5896, + "step": 58422 + }, + { + "epoch": 0.7, + "grad_norm": 19.66389506112113, + "learning_rate": 1.5057730766444857e-05, + "loss": 1.6026, + "step": 58425 + }, + { + "epoch": 0.7, + "grad_norm": 24.932174257456865, + "learning_rate": 1.5057226806956027e-05, + "loss": 1.3486, + "step": 58428 + }, + { + "epoch": 0.7, + "grad_norm": 8.032015744080747, + "learning_rate": 1.5056722830208706e-05, + "loss": 1.2553, + "step": 58431 + }, + { + "epoch": 0.7, + "grad_norm": 5.096374001198169, + "learning_rate": 1.5056218836204611e-05, + "loss": 1.2671, + "step": 58434 + }, + { + "epoch": 0.7, + "grad_norm": 39.67844092674189, + "learning_rate": 1.5055714824945459e-05, + "loss": 1.463, + "step": 58437 + }, + { + "epoch": 0.7, + "grad_norm": 6.442985063611653, + "learning_rate": 1.5055210796432978e-05, + "loss": 1.5487, + "step": 58440 + }, + { + "epoch": 0.7, + "grad_norm": 23.65076375225714, + "learning_rate": 1.5054706750668882e-05, + "loss": 1.5118, + "step": 58443 + }, + { + "epoch": 0.7, + "grad_norm": 49.887335119882636, + "learning_rate": 1.505420268765489e-05, + "loss": 1.3031, + "step": 58446 + }, + { + "epoch": 0.7, + "grad_norm": 15.767378655846217, + "learning_rate": 1.5053698607392726e-05, + "loss": 1.3184, + "step": 58449 + }, + { + "epoch": 0.7, + "grad_norm": 11.50502102375202, + "learning_rate": 1.5053194509884112e-05, + "loss": 1.473, + "step": 58452 + }, + { + "epoch": 0.7, + "grad_norm": 9.073679636953921, + "learning_rate": 1.5052690395130763e-05, + "loss": 1.2983, + "step": 58455 + }, + { + "epoch": 0.7, + "grad_norm": 15.392269080193405, + "learning_rate": 1.5052186263134398e-05, + "loss": 1.6488, + "step": 58458 + }, + { + "epoch": 0.7, + "grad_norm": 11.187950160524421, + "learning_rate": 1.5051682113896745e-05, + "loss": 1.5575, + "step": 58461 + }, + { + "epoch": 0.7, + "grad_norm": 19.861920073883216, + "learning_rate": 1.5051177947419522e-05, + "loss": 1.5313, + "step": 58464 + }, + { + "epoch": 0.7, + "grad_norm": 34.468386488276956, + "learning_rate": 1.5050673763704447e-05, + "loss": 1.4699, + "step": 58467 + }, + { + "epoch": 0.7, + "grad_norm": 9.717996504792266, + "learning_rate": 1.5050169562753239e-05, + "loss": 1.22, + "step": 58470 + }, + { + "epoch": 0.7, + "grad_norm": 7.276004841705744, + "learning_rate": 1.5049665344567626e-05, + "loss": 1.6043, + "step": 58473 + }, + { + "epoch": 0.7, + "grad_norm": 13.648737084278215, + "learning_rate": 1.504916110914932e-05, + "loss": 1.3447, + "step": 58476 + }, + { + "epoch": 0.7, + "grad_norm": 18.294212548884126, + "learning_rate": 1.5048656856500047e-05, + "loss": 1.1129, + "step": 58479 + }, + { + "epoch": 0.7, + "grad_norm": 7.492889840682949, + "learning_rate": 1.5048152586621527e-05, + "loss": 0.9962, + "step": 58482 + }, + { + "epoch": 0.7, + "grad_norm": 34.154624319191356, + "learning_rate": 1.5047648299515478e-05, + "loss": 1.359, + "step": 58485 + }, + { + "epoch": 0.7, + "grad_norm": 67.36382745487262, + "learning_rate": 1.5047143995183629e-05, + "loss": 1.2811, + "step": 58488 + }, + { + "epoch": 0.7, + "grad_norm": 21.8197909992261, + "learning_rate": 1.504663967362769e-05, + "loss": 1.2071, + "step": 58491 + }, + { + "epoch": 0.7, + "grad_norm": 24.29915224454261, + "learning_rate": 1.5046135334849392e-05, + "loss": 1.5136, + "step": 58494 + }, + { + "epoch": 0.7, + "grad_norm": 7.30511964710981, + "learning_rate": 1.5045630978850448e-05, + "loss": 1.0991, + "step": 58497 + }, + { + "epoch": 0.7, + "grad_norm": 11.365444857652069, + "learning_rate": 1.5045126605632585e-05, + "loss": 1.5907, + "step": 58500 + }, + { + "epoch": 0.7, + "grad_norm": 11.930328094387828, + "learning_rate": 1.5044622215197521e-05, + "loss": 1.1671, + "step": 58503 + }, + { + "epoch": 0.7, + "grad_norm": 17.404090362439526, + "learning_rate": 1.504411780754698e-05, + "loss": 1.0596, + "step": 58506 + }, + { + "epoch": 0.7, + "grad_norm": 9.810393126447437, + "learning_rate": 1.504361338268268e-05, + "loss": 1.5443, + "step": 58509 + }, + { + "epoch": 0.7, + "grad_norm": 16.83996736356981, + "learning_rate": 1.5043108940606344e-05, + "loss": 1.7025, + "step": 58512 + }, + { + "epoch": 0.7, + "grad_norm": 5.548366864386502, + "learning_rate": 1.5042604481319698e-05, + "loss": 1.3216, + "step": 58515 + }, + { + "epoch": 0.7, + "grad_norm": 13.478182519068957, + "learning_rate": 1.5042100004824454e-05, + "loss": 1.6991, + "step": 58518 + }, + { + "epoch": 0.7, + "grad_norm": 7.996192325971486, + "learning_rate": 1.5041595511122342e-05, + "loss": 1.2166, + "step": 58521 + }, + { + "epoch": 0.7, + "grad_norm": 11.106011753107236, + "learning_rate": 1.504109100021508e-05, + "loss": 1.6179, + "step": 58524 + }, + { + "epoch": 0.7, + "grad_norm": 5.3644151539659894, + "learning_rate": 1.504058647210439e-05, + "loss": 1.3431, + "step": 58527 + }, + { + "epoch": 0.7, + "grad_norm": 3.732904808214655, + "learning_rate": 1.5040081926791992e-05, + "loss": 1.6533, + "step": 58530 + }, + { + "epoch": 0.7, + "grad_norm": 4.09728725692471, + "learning_rate": 1.5039577364279612e-05, + "loss": 1.4235, + "step": 58533 + }, + { + "epoch": 0.7, + "grad_norm": 17.65134524888194, + "learning_rate": 1.5039072784568972e-05, + "loss": 1.6128, + "step": 58536 + }, + { + "epoch": 0.7, + "grad_norm": 46.98288026812266, + "learning_rate": 1.5038568187661785e-05, + "loss": 1.4046, + "step": 58539 + }, + { + "epoch": 0.7, + "grad_norm": 10.493904226612353, + "learning_rate": 1.5038063573559785e-05, + "loss": 1.2705, + "step": 58542 + }, + { + "epoch": 0.7, + "grad_norm": 3.060918868992545, + "learning_rate": 1.5037558942264687e-05, + "loss": 1.8166, + "step": 58545 + }, + { + "epoch": 0.7, + "grad_norm": 21.408797808974263, + "learning_rate": 1.5037054293778215e-05, + "loss": 1.5432, + "step": 58548 + }, + { + "epoch": 0.7, + "grad_norm": 64.84691259400259, + "learning_rate": 1.5036549628102091e-05, + "loss": 1.3564, + "step": 58551 + }, + { + "epoch": 0.7, + "grad_norm": 9.663075265089251, + "learning_rate": 1.5036044945238037e-05, + "loss": 1.3186, + "step": 58554 + }, + { + "epoch": 0.7, + "grad_norm": 5.0450248687233845, + "learning_rate": 1.5035540245187776e-05, + "loss": 1.5469, + "step": 58557 + }, + { + "epoch": 0.7, + "grad_norm": 10.365143473616548, + "learning_rate": 1.5035035527953032e-05, + "loss": 1.3586, + "step": 58560 + }, + { + "epoch": 0.7, + "grad_norm": 5.73200552941046, + "learning_rate": 1.5034530793535522e-05, + "loss": 1.5052, + "step": 58563 + }, + { + "epoch": 0.7, + "grad_norm": 7.311548958165388, + "learning_rate": 1.5034026041936974e-05, + "loss": 1.4881, + "step": 58566 + }, + { + "epoch": 0.7, + "grad_norm": 9.703457881257338, + "learning_rate": 1.5033521273159108e-05, + "loss": 1.2413, + "step": 58569 + }, + { + "epoch": 0.7, + "grad_norm": 11.869681276448567, + "learning_rate": 1.5033016487203645e-05, + "loss": 1.5334, + "step": 58572 + }, + { + "epoch": 0.7, + "grad_norm": 5.024909771030352, + "learning_rate": 1.5032511684072311e-05, + "loss": 1.9349, + "step": 58575 + }, + { + "epoch": 0.7, + "grad_norm": 9.04702932006042, + "learning_rate": 1.5032006863766828e-05, + "loss": 1.4694, + "step": 58578 + }, + { + "epoch": 0.7, + "grad_norm": 11.621354986176403, + "learning_rate": 1.5031502026288918e-05, + "loss": 1.4254, + "step": 58581 + }, + { + "epoch": 0.7, + "grad_norm": 8.324420973473742, + "learning_rate": 1.5030997171640302e-05, + "loss": 1.2903, + "step": 58584 + }, + { + "epoch": 0.7, + "grad_norm": 17.395139890402465, + "learning_rate": 1.5030492299822705e-05, + "loss": 1.1021, + "step": 58587 + }, + { + "epoch": 0.7, + "grad_norm": 16.9322281267418, + "learning_rate": 1.5029987410837854e-05, + "loss": 1.1988, + "step": 58590 + }, + { + "epoch": 0.7, + "grad_norm": 11.198625629210818, + "learning_rate": 1.5029482504687463e-05, + "loss": 1.4232, + "step": 58593 + }, + { + "epoch": 0.7, + "grad_norm": 5.503994888797044, + "learning_rate": 1.502897758137326e-05, + "loss": 1.4688, + "step": 58596 + }, + { + "epoch": 0.7, + "grad_norm": 24.50595950468605, + "learning_rate": 1.502847264089697e-05, + "loss": 1.5324, + "step": 58599 + }, + { + "epoch": 0.7, + "grad_norm": 108.56178651339512, + "learning_rate": 1.5027967683260313e-05, + "loss": 1.3039, + "step": 58602 + }, + { + "epoch": 0.7, + "grad_norm": 21.591114354732277, + "learning_rate": 1.5027462708465016e-05, + "loss": 1.3486, + "step": 58605 + }, + { + "epoch": 0.7, + "grad_norm": 11.286887686610926, + "learning_rate": 1.5026957716512796e-05, + "loss": 1.5436, + "step": 58608 + }, + { + "epoch": 0.7, + "grad_norm": 6.848579597588062, + "learning_rate": 1.5026452707405383e-05, + "loss": 1.2085, + "step": 58611 + }, + { + "epoch": 0.7, + "grad_norm": 3.32980390724917, + "learning_rate": 1.5025947681144497e-05, + "loss": 1.4259, + "step": 58614 + }, + { + "epoch": 0.7, + "grad_norm": 21.983771156022698, + "learning_rate": 1.502544263773186e-05, + "loss": 1.1559, + "step": 58617 + }, + { + "epoch": 0.7, + "grad_norm": 4.906401131193515, + "learning_rate": 1.50249375771692e-05, + "loss": 1.4712, + "step": 58620 + }, + { + "epoch": 0.7, + "grad_norm": 20.294803148667555, + "learning_rate": 1.5024432499458235e-05, + "loss": 1.4292, + "step": 58623 + }, + { + "epoch": 0.7, + "grad_norm": 14.463199375054929, + "learning_rate": 1.502392740460069e-05, + "loss": 1.4653, + "step": 58626 + }, + { + "epoch": 0.7, + "grad_norm": 10.842797110864456, + "learning_rate": 1.5023422292598295e-05, + "loss": 1.1896, + "step": 58629 + }, + { + "epoch": 0.71, + "grad_norm": 13.102904184078703, + "learning_rate": 1.5022917163452767e-05, + "loss": 1.8557, + "step": 58632 + }, + { + "epoch": 0.71, + "grad_norm": 5.0151044402050875, + "learning_rate": 1.5022412017165836e-05, + "loss": 1.4807, + "step": 58635 + }, + { + "epoch": 0.71, + "grad_norm": 21.16795874218313, + "learning_rate": 1.5021906853739214e-05, + "loss": 1.3268, + "step": 58638 + }, + { + "epoch": 0.71, + "grad_norm": 26.64886808781023, + "learning_rate": 1.5021401673174638e-05, + "loss": 1.4519, + "step": 58641 + }, + { + "epoch": 0.71, + "grad_norm": 26.573029313831686, + "learning_rate": 1.5020896475473828e-05, + "loss": 1.5813, + "step": 58644 + }, + { + "epoch": 0.71, + "grad_norm": 4.626588210946215, + "learning_rate": 1.5020391260638506e-05, + "loss": 1.2998, + "step": 58647 + }, + { + "epoch": 0.71, + "grad_norm": 22.010527497081306, + "learning_rate": 1.5019886028670393e-05, + "loss": 1.7459, + "step": 58650 + }, + { + "epoch": 0.71, + "grad_norm": 4.013435179708881, + "learning_rate": 1.5019380779571222e-05, + "loss": 1.0581, + "step": 58653 + }, + { + "epoch": 0.71, + "grad_norm": 5.175367711004878, + "learning_rate": 1.501887551334271e-05, + "loss": 0.9794, + "step": 58656 + }, + { + "epoch": 0.71, + "grad_norm": 26.03153206793122, + "learning_rate": 1.5018370229986584e-05, + "loss": 1.3326, + "step": 58659 + }, + { + "epoch": 0.71, + "grad_norm": 32.5779987976821, + "learning_rate": 1.5017864929504566e-05, + "loss": 1.5448, + "step": 58662 + }, + { + "epoch": 0.71, + "grad_norm": 14.697338190269758, + "learning_rate": 1.5017359611898386e-05, + "loss": 1.2462, + "step": 58665 + }, + { + "epoch": 0.71, + "grad_norm": 24.682424008559053, + "learning_rate": 1.5016854277169763e-05, + "loss": 1.4047, + "step": 58668 + }, + { + "epoch": 0.71, + "grad_norm": 14.19258032263866, + "learning_rate": 1.5016348925320423e-05, + "loss": 1.4973, + "step": 58671 + }, + { + "epoch": 0.71, + "grad_norm": 37.616261543161954, + "learning_rate": 1.5015843556352092e-05, + "loss": 1.3446, + "step": 58674 + }, + { + "epoch": 0.71, + "grad_norm": 5.095728562867086, + "learning_rate": 1.5015338170266493e-05, + "loss": 1.4523, + "step": 58677 + }, + { + "epoch": 0.71, + "grad_norm": 13.239547664169118, + "learning_rate": 1.5014832767065351e-05, + "loss": 1.5891, + "step": 58680 + }, + { + "epoch": 0.71, + "grad_norm": 25.72445450599136, + "learning_rate": 1.5014327346750394e-05, + "loss": 1.3673, + "step": 58683 + }, + { + "epoch": 0.71, + "grad_norm": 20.594100510209195, + "learning_rate": 1.5013821909323341e-05, + "loss": 1.539, + "step": 58686 + }, + { + "epoch": 0.71, + "grad_norm": 9.092637652202377, + "learning_rate": 1.5013316454785921e-05, + "loss": 1.2392, + "step": 58689 + }, + { + "epoch": 0.71, + "grad_norm": 2.7891426616752963, + "learning_rate": 1.5012810983139856e-05, + "loss": 1.679, + "step": 58692 + }, + { + "epoch": 0.71, + "grad_norm": 27.839581232454073, + "learning_rate": 1.5012305494386876e-05, + "loss": 1.8585, + "step": 58695 + }, + { + "epoch": 0.71, + "grad_norm": 111.08879504662704, + "learning_rate": 1.50117999885287e-05, + "loss": 1.402, + "step": 58698 + }, + { + "epoch": 0.71, + "grad_norm": 14.342994947188675, + "learning_rate": 1.5011294465567057e-05, + "loss": 1.5459, + "step": 58701 + }, + { + "epoch": 0.71, + "grad_norm": 30.747531459327533, + "learning_rate": 1.501078892550367e-05, + "loss": 1.426, + "step": 58704 + }, + { + "epoch": 0.71, + "grad_norm": 3.687573904858976, + "learning_rate": 1.5010283368340266e-05, + "loss": 1.659, + "step": 58707 + }, + { + "epoch": 0.71, + "grad_norm": 19.81309254087439, + "learning_rate": 1.5009777794078569e-05, + "loss": 1.3619, + "step": 58710 + }, + { + "epoch": 0.71, + "grad_norm": 8.608276514899405, + "learning_rate": 1.5009272202720304e-05, + "loss": 1.1769, + "step": 58713 + }, + { + "epoch": 0.71, + "grad_norm": 11.05328860744101, + "learning_rate": 1.5008766594267199e-05, + "loss": 1.4747, + "step": 58716 + }, + { + "epoch": 0.71, + "grad_norm": 18.80016601790616, + "learning_rate": 1.500826096872098e-05, + "loss": 1.3916, + "step": 58719 + }, + { + "epoch": 0.71, + "grad_norm": 23.88521581521757, + "learning_rate": 1.5007755326083367e-05, + "loss": 1.7832, + "step": 58722 + }, + { + "epoch": 0.71, + "grad_norm": 10.57898481759978, + "learning_rate": 1.500724966635609e-05, + "loss": 1.2343, + "step": 58725 + }, + { + "epoch": 0.71, + "grad_norm": 12.701532623288607, + "learning_rate": 1.5006743989540873e-05, + "loss": 1.3623, + "step": 58728 + }, + { + "epoch": 0.71, + "grad_norm": 5.58609944732066, + "learning_rate": 1.5006238295639443e-05, + "loss": 1.3621, + "step": 58731 + }, + { + "epoch": 0.71, + "grad_norm": 37.04698733813457, + "learning_rate": 1.5005732584653521e-05, + "loss": 1.5655, + "step": 58734 + }, + { + "epoch": 0.71, + "grad_norm": 9.844331376053587, + "learning_rate": 1.500522685658484e-05, + "loss": 1.0714, + "step": 58737 + }, + { + "epoch": 0.71, + "grad_norm": 36.01506035285197, + "learning_rate": 1.5004721111435126e-05, + "loss": 1.4769, + "step": 58740 + }, + { + "epoch": 0.71, + "grad_norm": 15.149499206026023, + "learning_rate": 1.5004215349206097e-05, + "loss": 1.3854, + "step": 58743 + }, + { + "epoch": 0.71, + "grad_norm": 6.314210403066424, + "learning_rate": 1.5003709569899486e-05, + "loss": 1.1328, + "step": 58746 + }, + { + "epoch": 0.71, + "grad_norm": 11.465708227119997, + "learning_rate": 1.5003203773517015e-05, + "loss": 1.1465, + "step": 58749 + }, + { + "epoch": 0.71, + "grad_norm": 21.516954489242273, + "learning_rate": 1.500269796006041e-05, + "loss": 1.4271, + "step": 58752 + }, + { + "epoch": 0.71, + "grad_norm": 7.311432217297758, + "learning_rate": 1.50021921295314e-05, + "loss": 1.5795, + "step": 58755 + }, + { + "epoch": 0.71, + "grad_norm": 7.380789555030279, + "learning_rate": 1.500168628193171e-05, + "loss": 1.4898, + "step": 58758 + }, + { + "epoch": 0.71, + "grad_norm": 17.457981333179813, + "learning_rate": 1.5001180417263068e-05, + "loss": 1.5593, + "step": 58761 + }, + { + "epoch": 0.71, + "grad_norm": 13.507350377999794, + "learning_rate": 1.50006745355272e-05, + "loss": 1.5818, + "step": 58764 + }, + { + "epoch": 0.71, + "grad_norm": 26.254680039926832, + "learning_rate": 1.5000168636725825e-05, + "loss": 1.4001, + "step": 58767 + }, + { + "epoch": 0.71, + "grad_norm": 43.563805424337176, + "learning_rate": 1.4999662720860682e-05, + "loss": 1.5207, + "step": 58770 + }, + { + "epoch": 0.71, + "grad_norm": 30.822334214788757, + "learning_rate": 1.4999156787933488e-05, + "loss": 1.5939, + "step": 58773 + }, + { + "epoch": 0.71, + "grad_norm": 11.4059649555184, + "learning_rate": 1.4998650837945974e-05, + "loss": 1.3197, + "step": 58776 + }, + { + "epoch": 0.71, + "grad_norm": 17.815589385755086, + "learning_rate": 1.4998144870899862e-05, + "loss": 1.2738, + "step": 58779 + }, + { + "epoch": 0.71, + "grad_norm": 8.370483246744929, + "learning_rate": 1.4997638886796884e-05, + "loss": 1.2905, + "step": 58782 + }, + { + "epoch": 0.71, + "grad_norm": 5.555739429971035, + "learning_rate": 1.4997132885638766e-05, + "loss": 1.2551, + "step": 58785 + }, + { + "epoch": 0.71, + "grad_norm": 7.078865629607786, + "learning_rate": 1.4996626867427231e-05, + "loss": 1.5006, + "step": 58788 + }, + { + "epoch": 0.71, + "grad_norm": 25.51415242553546, + "learning_rate": 1.4996120832164009e-05, + "loss": 1.0717, + "step": 58791 + }, + { + "epoch": 0.71, + "grad_norm": 13.583960482730504, + "learning_rate": 1.4995614779850826e-05, + "loss": 1.5828, + "step": 58794 + }, + { + "epoch": 0.71, + "grad_norm": 24.010271907866596, + "learning_rate": 1.499510871048941e-05, + "loss": 1.3914, + "step": 58797 + }, + { + "epoch": 0.71, + "grad_norm": 18.43793604892627, + "learning_rate": 1.4994602624081486e-05, + "loss": 1.1618, + "step": 58800 + }, + { + "epoch": 0.71, + "grad_norm": 20.816471288032826, + "learning_rate": 1.4994096520628784e-05, + "loss": 1.1882, + "step": 58803 + }, + { + "epoch": 0.71, + "grad_norm": 6.995731861225648, + "learning_rate": 1.4993590400133029e-05, + "loss": 1.4764, + "step": 58806 + }, + { + "epoch": 0.71, + "grad_norm": 5.891607180729166, + "learning_rate": 1.4993084262595946e-05, + "loss": 1.4473, + "step": 58809 + }, + { + "epoch": 0.71, + "grad_norm": 6.937461147652745, + "learning_rate": 1.4992578108019268e-05, + "loss": 1.3315, + "step": 58812 + }, + { + "epoch": 0.71, + "grad_norm": 29.400875163792165, + "learning_rate": 1.4992071936404719e-05, + "loss": 1.5015, + "step": 58815 + }, + { + "epoch": 0.71, + "grad_norm": 8.133424675787568, + "learning_rate": 1.4991565747754026e-05, + "loss": 1.1767, + "step": 58818 + }, + { + "epoch": 0.71, + "grad_norm": 8.4228275121166, + "learning_rate": 1.4991059542068915e-05, + "loss": 1.3392, + "step": 58821 + }, + { + "epoch": 0.71, + "grad_norm": 11.33599634915346, + "learning_rate": 1.499055331935112e-05, + "loss": 1.5947, + "step": 58824 + }, + { + "epoch": 0.71, + "grad_norm": 8.575594992319756, + "learning_rate": 1.4990047079602361e-05, + "loss": 1.2774, + "step": 58827 + }, + { + "epoch": 0.71, + "grad_norm": 14.492752969124384, + "learning_rate": 1.4989540822824372e-05, + "loss": 1.3016, + "step": 58830 + }, + { + "epoch": 0.71, + "grad_norm": 34.663104344356775, + "learning_rate": 1.4989034549018873e-05, + "loss": 1.642, + "step": 58833 + }, + { + "epoch": 0.71, + "grad_norm": 13.813659710433155, + "learning_rate": 1.4988528258187596e-05, + "loss": 1.182, + "step": 58836 + }, + { + "epoch": 0.71, + "grad_norm": 4.8180211677592135, + "learning_rate": 1.4988021950332273e-05, + "loss": 1.6263, + "step": 58839 + }, + { + "epoch": 0.71, + "grad_norm": 12.310567795647465, + "learning_rate": 1.4987515625454623e-05, + "loss": 1.5435, + "step": 58842 + }, + { + "epoch": 0.71, + "grad_norm": 9.822960950402734, + "learning_rate": 1.4987009283556382e-05, + "loss": 1.3747, + "step": 58845 + }, + { + "epoch": 0.71, + "grad_norm": 11.963722694647153, + "learning_rate": 1.4986502924639275e-05, + "loss": 1.1183, + "step": 58848 + }, + { + "epoch": 0.71, + "grad_norm": 9.53426942113521, + "learning_rate": 1.4985996548705029e-05, + "loss": 1.4765, + "step": 58851 + }, + { + "epoch": 0.71, + "grad_norm": 11.328763044288374, + "learning_rate": 1.4985490155755371e-05, + "loss": 1.3695, + "step": 58854 + }, + { + "epoch": 0.71, + "grad_norm": 2.924021603406874, + "learning_rate": 1.4984983745792034e-05, + "loss": 1.408, + "step": 58857 + }, + { + "epoch": 0.71, + "grad_norm": 6.61069689360933, + "learning_rate": 1.498447731881674e-05, + "loss": 0.9855, + "step": 58860 + }, + { + "epoch": 0.71, + "grad_norm": 49.92226672826759, + "learning_rate": 1.4983970874831219e-05, + "loss": 1.3302, + "step": 58863 + }, + { + "epoch": 0.71, + "grad_norm": 9.625761717527304, + "learning_rate": 1.4983464413837206e-05, + "loss": 1.0973, + "step": 58866 + }, + { + "epoch": 0.71, + "grad_norm": 10.790583027302327, + "learning_rate": 1.498295793583642e-05, + "loss": 1.4454, + "step": 58869 + }, + { + "epoch": 0.71, + "grad_norm": 5.07135725106169, + "learning_rate": 1.4982451440830596e-05, + "loss": 1.0846, + "step": 58872 + }, + { + "epoch": 0.71, + "grad_norm": 11.47151847317647, + "learning_rate": 1.4981944928821458e-05, + "loss": 1.4778, + "step": 58875 + }, + { + "epoch": 0.71, + "grad_norm": 6.329308011526465, + "learning_rate": 1.4981438399810738e-05, + "loss": 1.288, + "step": 58878 + }, + { + "epoch": 0.71, + "grad_norm": 20.218520614283676, + "learning_rate": 1.498093185380016e-05, + "loss": 1.5424, + "step": 58881 + }, + { + "epoch": 0.71, + "grad_norm": 11.156369224133435, + "learning_rate": 1.4980425290791459e-05, + "loss": 1.8123, + "step": 58884 + }, + { + "epoch": 0.71, + "grad_norm": 20.543508011628866, + "learning_rate": 1.4979918710786356e-05, + "loss": 1.1381, + "step": 58887 + }, + { + "epoch": 0.71, + "grad_norm": 6.464162663250162, + "learning_rate": 1.4979412113786588e-05, + "loss": 1.4515, + "step": 58890 + }, + { + "epoch": 0.71, + "grad_norm": 8.385915593418737, + "learning_rate": 1.4978905499793881e-05, + "loss": 1.1789, + "step": 58893 + }, + { + "epoch": 0.71, + "grad_norm": 23.39562660980101, + "learning_rate": 1.4978398868809958e-05, + "loss": 1.4739, + "step": 58896 + }, + { + "epoch": 0.71, + "grad_norm": 11.094570828062475, + "learning_rate": 1.497789222083656e-05, + "loss": 1.6195, + "step": 58899 + }, + { + "epoch": 0.71, + "grad_norm": 43.08406552463709, + "learning_rate": 1.4977385555875403e-05, + "loss": 1.0306, + "step": 58902 + }, + { + "epoch": 0.71, + "grad_norm": 20.88948778535644, + "learning_rate": 1.4976878873928224e-05, + "loss": 1.2118, + "step": 58905 + }, + { + "epoch": 0.71, + "grad_norm": 25.864206962462198, + "learning_rate": 1.4976372174996749e-05, + "loss": 1.3251, + "step": 58908 + }, + { + "epoch": 0.71, + "grad_norm": 8.293692227895477, + "learning_rate": 1.4975865459082712e-05, + "loss": 1.1856, + "step": 58911 + }, + { + "epoch": 0.71, + "grad_norm": 18.645552463539453, + "learning_rate": 1.4975358726187835e-05, + "loss": 1.4657, + "step": 58914 + }, + { + "epoch": 0.71, + "grad_norm": 9.201289865482858, + "learning_rate": 1.4974851976313848e-05, + "loss": 1.6632, + "step": 58917 + }, + { + "epoch": 0.71, + "grad_norm": 29.683170088126335, + "learning_rate": 1.4974345209462491e-05, + "loss": 1.3103, + "step": 58920 + }, + { + "epoch": 0.71, + "grad_norm": 20.147456366430983, + "learning_rate": 1.4973838425635478e-05, + "loss": 1.5933, + "step": 58923 + }, + { + "epoch": 0.71, + "grad_norm": 2.945200119457683, + "learning_rate": 1.4973331624834551e-05, + "loss": 1.6351, + "step": 58926 + }, + { + "epoch": 0.71, + "grad_norm": 15.386674174354527, + "learning_rate": 1.4972824807061433e-05, + "loss": 1.1334, + "step": 58929 + }, + { + "epoch": 0.71, + "grad_norm": 10.338486691802228, + "learning_rate": 1.4972317972317855e-05, + "loss": 1.2318, + "step": 58932 + }, + { + "epoch": 0.71, + "grad_norm": 71.95633783987822, + "learning_rate": 1.4971811120605546e-05, + "loss": 1.6951, + "step": 58935 + }, + { + "epoch": 0.71, + "grad_norm": 19.10724014963412, + "learning_rate": 1.4971304251926238e-05, + "loss": 1.5505, + "step": 58938 + }, + { + "epoch": 0.71, + "grad_norm": 7.560725467477641, + "learning_rate": 1.497079736628166e-05, + "loss": 1.5376, + "step": 58941 + }, + { + "epoch": 0.71, + "grad_norm": 9.849861629774525, + "learning_rate": 1.4970290463673535e-05, + "loss": 1.3468, + "step": 58944 + }, + { + "epoch": 0.71, + "grad_norm": 9.448824570895471, + "learning_rate": 1.4969783544103606e-05, + "loss": 1.4322, + "step": 58947 + }, + { + "epoch": 0.71, + "grad_norm": 4.165329412143839, + "learning_rate": 1.496927660757359e-05, + "loss": 1.1369, + "step": 58950 + }, + { + "epoch": 0.71, + "grad_norm": 16.488244771479312, + "learning_rate": 1.4968769654085228e-05, + "loss": 1.4399, + "step": 58953 + }, + { + "epoch": 0.71, + "grad_norm": 3.9119911721686744, + "learning_rate": 1.4968262683640244e-05, + "loss": 1.1338, + "step": 58956 + }, + { + "epoch": 0.71, + "grad_norm": 4.868958304441977, + "learning_rate": 1.4967755696240369e-05, + "loss": 1.7593, + "step": 58959 + }, + { + "epoch": 0.71, + "grad_norm": 5.789500443693777, + "learning_rate": 1.496724869188733e-05, + "loss": 1.2624, + "step": 58962 + }, + { + "epoch": 0.71, + "grad_norm": 6.516515716126429, + "learning_rate": 1.4966741670582865e-05, + "loss": 1.7021, + "step": 58965 + }, + { + "epoch": 0.71, + "grad_norm": 22.904469739893603, + "learning_rate": 1.4966234632328695e-05, + "loss": 1.4234, + "step": 58968 + }, + { + "epoch": 0.71, + "grad_norm": 13.270851301236528, + "learning_rate": 1.4965727577126556e-05, + "loss": 1.6731, + "step": 58971 + }, + { + "epoch": 0.71, + "grad_norm": 3.212993674830988, + "learning_rate": 1.4965220504978182e-05, + "loss": 1.1923, + "step": 58974 + }, + { + "epoch": 0.71, + "grad_norm": 8.19577161697521, + "learning_rate": 1.4964713415885294e-05, + "loss": 1.3333, + "step": 58977 + }, + { + "epoch": 0.71, + "grad_norm": 20.312856375351796, + "learning_rate": 1.4964206309849629e-05, + "loss": 1.2804, + "step": 58980 + }, + { + "epoch": 0.71, + "grad_norm": 18.403100066544848, + "learning_rate": 1.4963699186872918e-05, + "loss": 1.356, + "step": 58983 + }, + { + "epoch": 0.71, + "grad_norm": 27.770415091926264, + "learning_rate": 1.4963192046956887e-05, + "loss": 1.3513, + "step": 58986 + }, + { + "epoch": 0.71, + "grad_norm": 4.142378622365612, + "learning_rate": 1.496268489010327e-05, + "loss": 1.1901, + "step": 58989 + }, + { + "epoch": 0.71, + "grad_norm": 6.6432839859803865, + "learning_rate": 1.4962177716313797e-05, + "loss": 1.4117, + "step": 58992 + }, + { + "epoch": 0.71, + "grad_norm": 6.397577089790323, + "learning_rate": 1.4961670525590199e-05, + "loss": 1.0856, + "step": 58995 + }, + { + "epoch": 0.71, + "grad_norm": 5.020109921931061, + "learning_rate": 1.4961163317934203e-05, + "loss": 1.2225, + "step": 58998 + }, + { + "epoch": 0.71, + "grad_norm": 12.816421888249799, + "learning_rate": 1.4960656093347549e-05, + "loss": 1.2588, + "step": 59001 + }, + { + "epoch": 0.71, + "grad_norm": 6.533358383486922, + "learning_rate": 1.4960148851831959e-05, + "loss": 1.6473, + "step": 59004 + }, + { + "epoch": 0.71, + "grad_norm": 18.08176393319067, + "learning_rate": 1.4959641593389172e-05, + "loss": 1.3802, + "step": 59007 + }, + { + "epoch": 0.71, + "grad_norm": 5.858270934602908, + "learning_rate": 1.4959134318020912e-05, + "loss": 0.8592, + "step": 59010 + }, + { + "epoch": 0.71, + "grad_norm": 7.807343276076659, + "learning_rate": 1.4958627025728912e-05, + "loss": 1.5048, + "step": 59013 + }, + { + "epoch": 0.71, + "grad_norm": 38.44208969258065, + "learning_rate": 1.4958119716514904e-05, + "loss": 1.4039, + "step": 59016 + }, + { + "epoch": 0.71, + "grad_norm": 17.272620125712496, + "learning_rate": 1.4957612390380623e-05, + "loss": 1.4388, + "step": 59019 + }, + { + "epoch": 0.71, + "grad_norm": 24.964487147456516, + "learning_rate": 1.4957105047327791e-05, + "loss": 1.4523, + "step": 59022 + }, + { + "epoch": 0.71, + "grad_norm": 11.710593177260025, + "learning_rate": 1.4956597687358145e-05, + "loss": 1.1701, + "step": 59025 + }, + { + "epoch": 0.71, + "grad_norm": 7.5092958661696, + "learning_rate": 1.4956090310473423e-05, + "loss": 1.1836, + "step": 59028 + }, + { + "epoch": 0.71, + "grad_norm": 13.497830255989536, + "learning_rate": 1.4955582916675344e-05, + "loss": 1.2356, + "step": 59031 + }, + { + "epoch": 0.71, + "grad_norm": 12.440178814102305, + "learning_rate": 1.4955075505965648e-05, + "loss": 1.1353, + "step": 59034 + }, + { + "epoch": 0.71, + "grad_norm": 8.831019196728572, + "learning_rate": 1.4954568078346065e-05, + "loss": 1.2025, + "step": 59037 + }, + { + "epoch": 0.71, + "grad_norm": 8.670035988836924, + "learning_rate": 1.4954060633818326e-05, + "loss": 1.131, + "step": 59040 + }, + { + "epoch": 0.71, + "grad_norm": 9.816000893723135, + "learning_rate": 1.495355317238416e-05, + "loss": 1.6737, + "step": 59043 + }, + { + "epoch": 0.71, + "grad_norm": 40.85908765621117, + "learning_rate": 1.4953045694045302e-05, + "loss": 1.3032, + "step": 59046 + }, + { + "epoch": 0.71, + "grad_norm": 6.436466013079364, + "learning_rate": 1.4952538198803483e-05, + "loss": 1.8137, + "step": 59049 + }, + { + "epoch": 0.71, + "grad_norm": 8.805508096972208, + "learning_rate": 1.4952030686660434e-05, + "loss": 1.4657, + "step": 59052 + }, + { + "epoch": 0.71, + "grad_norm": 7.425619042574796, + "learning_rate": 1.495152315761789e-05, + "loss": 1.0598, + "step": 59055 + }, + { + "epoch": 0.71, + "grad_norm": 5.829063645049305, + "learning_rate": 1.4951015611677581e-05, + "loss": 1.3727, + "step": 59058 + }, + { + "epoch": 0.71, + "grad_norm": 48.989559711169484, + "learning_rate": 1.495050804884124e-05, + "loss": 1.46, + "step": 59061 + }, + { + "epoch": 0.71, + "grad_norm": 18.773222932149675, + "learning_rate": 1.4950000469110594e-05, + "loss": 1.4487, + "step": 59064 + }, + { + "epoch": 0.71, + "grad_norm": 13.536697911671295, + "learning_rate": 1.4949492872487383e-05, + "loss": 1.1965, + "step": 59067 + }, + { + "epoch": 0.71, + "grad_norm": 8.969195190521678, + "learning_rate": 1.4948985258973337e-05, + "loss": 1.3183, + "step": 59070 + }, + { + "epoch": 0.71, + "grad_norm": 5.806342440306415, + "learning_rate": 1.4948477628570183e-05, + "loss": 1.3488, + "step": 59073 + }, + { + "epoch": 0.71, + "grad_norm": 21.464484437391697, + "learning_rate": 1.4947969981279658e-05, + "loss": 1.4893, + "step": 59076 + }, + { + "epoch": 0.71, + "grad_norm": 23.3945622262149, + "learning_rate": 1.4947462317103496e-05, + "loss": 1.3364, + "step": 59079 + }, + { + "epoch": 0.71, + "grad_norm": 6.7870466799738844, + "learning_rate": 1.4946954636043427e-05, + "loss": 1.4561, + "step": 59082 + }, + { + "epoch": 0.71, + "grad_norm": 7.755697419087215, + "learning_rate": 1.494644693810118e-05, + "loss": 1.1878, + "step": 59085 + }, + { + "epoch": 0.71, + "grad_norm": 9.740928928052416, + "learning_rate": 1.4945939223278493e-05, + "loss": 1.6534, + "step": 59088 + }, + { + "epoch": 0.71, + "grad_norm": 8.354286936323913, + "learning_rate": 1.49454314915771e-05, + "loss": 1.3544, + "step": 59091 + }, + { + "epoch": 0.71, + "grad_norm": 9.532811302669735, + "learning_rate": 1.4944923742998729e-05, + "loss": 1.5294, + "step": 59094 + }, + { + "epoch": 0.71, + "grad_norm": 15.714012906427776, + "learning_rate": 1.4944415977545112e-05, + "loss": 1.5257, + "step": 59097 + }, + { + "epoch": 0.71, + "grad_norm": 5.605606973223274, + "learning_rate": 1.4943908195217987e-05, + "loss": 1.4467, + "step": 59100 + }, + { + "epoch": 0.71, + "grad_norm": 6.247259017077418, + "learning_rate": 1.4943400396019084e-05, + "loss": 1.6191, + "step": 59103 + }, + { + "epoch": 0.71, + "grad_norm": 20.593330383402588, + "learning_rate": 1.4942892579950132e-05, + "loss": 1.4043, + "step": 59106 + }, + { + "epoch": 0.71, + "grad_norm": 16.939106317227978, + "learning_rate": 1.4942384747012875e-05, + "loss": 1.6778, + "step": 59109 + }, + { + "epoch": 0.71, + "grad_norm": 11.467159059695247, + "learning_rate": 1.4941876897209034e-05, + "loss": 1.5555, + "step": 59112 + }, + { + "epoch": 0.71, + "grad_norm": 6.3789849332017265, + "learning_rate": 1.494136903054035e-05, + "loss": 1.3099, + "step": 59115 + }, + { + "epoch": 0.71, + "grad_norm": 6.639248428838243, + "learning_rate": 1.494086114700855e-05, + "loss": 1.5291, + "step": 59118 + }, + { + "epoch": 0.71, + "grad_norm": 28.791558433235657, + "learning_rate": 1.4940353246615374e-05, + "loss": 1.3129, + "step": 59121 + }, + { + "epoch": 0.71, + "grad_norm": 11.821789321853506, + "learning_rate": 1.493984532936255e-05, + "loss": 0.8521, + "step": 59124 + }, + { + "epoch": 0.71, + "grad_norm": 3.457174619043026, + "learning_rate": 1.4939337395251813e-05, + "loss": 1.2032, + "step": 59127 + }, + { + "epoch": 0.71, + "grad_norm": 13.264694607096327, + "learning_rate": 1.4938829444284898e-05, + "loss": 1.3958, + "step": 59130 + }, + { + "epoch": 0.71, + "grad_norm": 6.6760541288489215, + "learning_rate": 1.4938321476463536e-05, + "loss": 1.3838, + "step": 59133 + }, + { + "epoch": 0.71, + "grad_norm": 16.35488718193964, + "learning_rate": 1.4937813491789462e-05, + "loss": 1.2209, + "step": 59136 + }, + { + "epoch": 0.71, + "grad_norm": 6.562345475679593, + "learning_rate": 1.4937305490264408e-05, + "loss": 1.6498, + "step": 59139 + }, + { + "epoch": 0.71, + "grad_norm": 27.523344912912112, + "learning_rate": 1.493679747189011e-05, + "loss": 1.0101, + "step": 59142 + }, + { + "epoch": 0.71, + "grad_norm": 9.114404759638791, + "learning_rate": 1.4936289436668301e-05, + "loss": 1.5586, + "step": 59145 + }, + { + "epoch": 0.71, + "grad_norm": 13.057528864105118, + "learning_rate": 1.4935781384600714e-05, + "loss": 1.4316, + "step": 59148 + }, + { + "epoch": 0.71, + "grad_norm": 9.07867976312945, + "learning_rate": 1.493527331568908e-05, + "loss": 1.6066, + "step": 59151 + }, + { + "epoch": 0.71, + "grad_norm": 9.182838253387516, + "learning_rate": 1.493476522993514e-05, + "loss": 1.5788, + "step": 59154 + }, + { + "epoch": 0.71, + "grad_norm": 9.93140853217268, + "learning_rate": 1.4934257127340624e-05, + "loss": 1.304, + "step": 59157 + }, + { + "epoch": 0.71, + "grad_norm": 3.3027307116751907, + "learning_rate": 1.493374900790726e-05, + "loss": 1.3239, + "step": 59160 + }, + { + "epoch": 0.71, + "grad_norm": 19.214186188034184, + "learning_rate": 1.4933240871636794e-05, + "loss": 1.5565, + "step": 59163 + }, + { + "epoch": 0.71, + "grad_norm": 3.679055959353797, + "learning_rate": 1.493273271853095e-05, + "loss": 1.4285, + "step": 59166 + }, + { + "epoch": 0.71, + "grad_norm": 38.925986908930724, + "learning_rate": 1.493222454859147e-05, + "loss": 1.3952, + "step": 59169 + }, + { + "epoch": 0.71, + "grad_norm": 5.810735827066486, + "learning_rate": 1.493171636182008e-05, + "loss": 1.2331, + "step": 59172 + }, + { + "epoch": 0.71, + "grad_norm": 4.956488027075402, + "learning_rate": 1.493120815821852e-05, + "loss": 1.0506, + "step": 59175 + }, + { + "epoch": 0.71, + "grad_norm": 19.86144252420648, + "learning_rate": 1.4930699937788524e-05, + "loss": 1.3256, + "step": 59178 + }, + { + "epoch": 0.71, + "grad_norm": 24.20600676463976, + "learning_rate": 1.4930191700531825e-05, + "loss": 1.304, + "step": 59181 + }, + { + "epoch": 0.71, + "grad_norm": 7.219099311715965, + "learning_rate": 1.4929683446450154e-05, + "loss": 1.1834, + "step": 59184 + }, + { + "epoch": 0.71, + "grad_norm": 10.262968658492984, + "learning_rate": 1.4929175175545252e-05, + "loss": 1.4102, + "step": 59187 + }, + { + "epoch": 0.71, + "grad_norm": 27.75867623872599, + "learning_rate": 1.4928666887818853e-05, + "loss": 1.3088, + "step": 59190 + }, + { + "epoch": 0.71, + "grad_norm": 5.146215423769901, + "learning_rate": 1.4928158583272683e-05, + "loss": 1.696, + "step": 59193 + }, + { + "epoch": 0.71, + "grad_norm": 8.929075097487992, + "learning_rate": 1.4927650261908487e-05, + "loss": 1.3988, + "step": 59196 + }, + { + "epoch": 0.71, + "grad_norm": 5.904573803912801, + "learning_rate": 1.4927141923727995e-05, + "loss": 1.1573, + "step": 59199 + }, + { + "epoch": 0.71, + "grad_norm": 5.867560758672684, + "learning_rate": 1.4926633568732945e-05, + "loss": 1.3914, + "step": 59202 + }, + { + "epoch": 0.71, + "grad_norm": 6.815072287227388, + "learning_rate": 1.4926125196925063e-05, + "loss": 1.3683, + "step": 59205 + }, + { + "epoch": 0.71, + "grad_norm": 14.111507931222647, + "learning_rate": 1.4925616808306093e-05, + "loss": 1.3964, + "step": 59208 + }, + { + "epoch": 0.71, + "grad_norm": 7.726729266259303, + "learning_rate": 1.4925108402877771e-05, + "loss": 1.5158, + "step": 59211 + }, + { + "epoch": 0.71, + "grad_norm": 17.444828807457906, + "learning_rate": 1.492459998064182e-05, + "loss": 1.1395, + "step": 59214 + }, + { + "epoch": 0.71, + "grad_norm": 17.976010873187963, + "learning_rate": 1.492409154159999e-05, + "loss": 1.3062, + "step": 59217 + }, + { + "epoch": 0.71, + "grad_norm": 34.03738953316824, + "learning_rate": 1.4923583085754008e-05, + "loss": 1.0986, + "step": 59220 + }, + { + "epoch": 0.71, + "grad_norm": 8.530155473988923, + "learning_rate": 1.4923074613105609e-05, + "loss": 1.5492, + "step": 59223 + }, + { + "epoch": 0.71, + "grad_norm": 7.786144396722112, + "learning_rate": 1.4922566123656528e-05, + "loss": 1.4035, + "step": 59226 + }, + { + "epoch": 0.71, + "grad_norm": 5.880361334275277, + "learning_rate": 1.4922057617408505e-05, + "loss": 1.2081, + "step": 59229 + }, + { + "epoch": 0.71, + "grad_norm": 87.80653500036397, + "learning_rate": 1.4921549094363272e-05, + "loss": 1.5579, + "step": 59232 + }, + { + "epoch": 0.71, + "grad_norm": 38.3668354879069, + "learning_rate": 1.4921040554522563e-05, + "loss": 1.2375, + "step": 59235 + }, + { + "epoch": 0.71, + "grad_norm": 32.40030777539891, + "learning_rate": 1.4920531997888116e-05, + "loss": 1.3317, + "step": 59238 + }, + { + "epoch": 0.71, + "grad_norm": 5.209500869655828, + "learning_rate": 1.4920023424461665e-05, + "loss": 1.5633, + "step": 59241 + }, + { + "epoch": 0.71, + "grad_norm": 215.36479606263052, + "learning_rate": 1.4919514834244949e-05, + "loss": 1.658, + "step": 59244 + }, + { + "epoch": 0.71, + "grad_norm": 16.602373986298606, + "learning_rate": 1.4919006227239696e-05, + "loss": 1.0017, + "step": 59247 + }, + { + "epoch": 0.71, + "grad_norm": 3.280753692970694, + "learning_rate": 1.491849760344765e-05, + "loss": 1.4881, + "step": 59250 + }, + { + "epoch": 0.71, + "grad_norm": 5.727670414649517, + "learning_rate": 1.4917988962870543e-05, + "loss": 1.4963, + "step": 59253 + }, + { + "epoch": 0.71, + "grad_norm": 6.414421634595093, + "learning_rate": 1.4917480305510111e-05, + "loss": 1.2346, + "step": 59256 + }, + { + "epoch": 0.71, + "grad_norm": 4.91738141093451, + "learning_rate": 1.491697163136809e-05, + "loss": 1.6117, + "step": 59259 + }, + { + "epoch": 0.71, + "grad_norm": 17.43480914743566, + "learning_rate": 1.4916462940446215e-05, + "loss": 1.7481, + "step": 59262 + }, + { + "epoch": 0.71, + "grad_norm": 9.502282007179875, + "learning_rate": 1.4915954232746224e-05, + "loss": 1.3766, + "step": 59265 + }, + { + "epoch": 0.71, + "grad_norm": 14.743329401643697, + "learning_rate": 1.4915445508269851e-05, + "loss": 1.8346, + "step": 59268 + }, + { + "epoch": 0.71, + "grad_norm": 12.70570235432432, + "learning_rate": 1.4914936767018832e-05, + "loss": 1.3052, + "step": 59271 + }, + { + "epoch": 0.71, + "grad_norm": 16.112925133947005, + "learning_rate": 1.4914428008994908e-05, + "loss": 1.8752, + "step": 59274 + }, + { + "epoch": 0.71, + "grad_norm": 13.513442480483171, + "learning_rate": 1.4913919234199807e-05, + "loss": 1.768, + "step": 59277 + }, + { + "epoch": 0.71, + "grad_norm": 10.995864689075239, + "learning_rate": 1.4913410442635272e-05, + "loss": 1.453, + "step": 59280 + }, + { + "epoch": 0.71, + "grad_norm": 19.779578939118885, + "learning_rate": 1.4912901634303037e-05, + "loss": 1.7164, + "step": 59283 + }, + { + "epoch": 0.71, + "grad_norm": 21.710868114011998, + "learning_rate": 1.4912392809204838e-05, + "loss": 1.5213, + "step": 59286 + }, + { + "epoch": 0.71, + "grad_norm": 18.488132569861527, + "learning_rate": 1.4911883967342413e-05, + "loss": 1.4886, + "step": 59289 + }, + { + "epoch": 0.71, + "grad_norm": 30.617043305406725, + "learning_rate": 1.4911375108717494e-05, + "loss": 1.5782, + "step": 59292 + }, + { + "epoch": 0.71, + "grad_norm": 15.116329541400882, + "learning_rate": 1.4910866233331822e-05, + "loss": 1.5547, + "step": 59295 + }, + { + "epoch": 0.71, + "grad_norm": 19.019583498198514, + "learning_rate": 1.4910357341187136e-05, + "loss": 1.5072, + "step": 59298 + }, + { + "epoch": 0.71, + "grad_norm": 7.061746678713132, + "learning_rate": 1.4909848432285166e-05, + "loss": 1.6274, + "step": 59301 + }, + { + "epoch": 0.71, + "grad_norm": 4.646653548842739, + "learning_rate": 1.4909339506627651e-05, + "loss": 1.4411, + "step": 59304 + }, + { + "epoch": 0.71, + "grad_norm": 11.982940601358802, + "learning_rate": 1.490883056421633e-05, + "loss": 1.5711, + "step": 59307 + }, + { + "epoch": 0.71, + "grad_norm": 255.86883055210004, + "learning_rate": 1.4908321605052937e-05, + "loss": 1.4545, + "step": 59310 + }, + { + "epoch": 0.71, + "grad_norm": 9.262485546499745, + "learning_rate": 1.4907812629139211e-05, + "loss": 1.7513, + "step": 59313 + }, + { + "epoch": 0.71, + "grad_norm": 3.9706391996684074, + "learning_rate": 1.4907303636476891e-05, + "loss": 1.496, + "step": 59316 + }, + { + "epoch": 0.71, + "grad_norm": 36.59551442702434, + "learning_rate": 1.490679462706771e-05, + "loss": 1.4987, + "step": 59319 + }, + { + "epoch": 0.71, + "grad_norm": 16.526156110274847, + "learning_rate": 1.4906285600913402e-05, + "loss": 1.4925, + "step": 59322 + }, + { + "epoch": 0.71, + "grad_norm": 12.595970780198696, + "learning_rate": 1.4905776558015714e-05, + "loss": 1.2406, + "step": 59325 + }, + { + "epoch": 0.71, + "grad_norm": 15.271364382832184, + "learning_rate": 1.4905267498376374e-05, + "loss": 1.0732, + "step": 59328 + }, + { + "epoch": 0.71, + "grad_norm": 7.215324269072861, + "learning_rate": 1.4904758421997127e-05, + "loss": 1.4749, + "step": 59331 + }, + { + "epoch": 0.71, + "grad_norm": 10.366749185242668, + "learning_rate": 1.4904249328879701e-05, + "loss": 1.493, + "step": 59334 + }, + { + "epoch": 0.71, + "grad_norm": 16.12588201317063, + "learning_rate": 1.4903740219025844e-05, + "loss": 1.1938, + "step": 59337 + }, + { + "epoch": 0.71, + "grad_norm": 10.193917903470703, + "learning_rate": 1.4903231092437285e-05, + "loss": 1.3813, + "step": 59340 + }, + { + "epoch": 0.71, + "grad_norm": 8.972538748577593, + "learning_rate": 1.4902721949115764e-05, + "loss": 1.3317, + "step": 59343 + }, + { + "epoch": 0.71, + "grad_norm": 15.359865777990088, + "learning_rate": 1.4902212789063017e-05, + "loss": 1.2136, + "step": 59346 + }, + { + "epoch": 0.71, + "grad_norm": 22.379937132235554, + "learning_rate": 1.4901703612280786e-05, + "loss": 1.0863, + "step": 59349 + }, + { + "epoch": 0.71, + "grad_norm": 14.169982099191769, + "learning_rate": 1.4901194418770808e-05, + "loss": 1.4302, + "step": 59352 + }, + { + "epoch": 0.71, + "grad_norm": 20.774775173283885, + "learning_rate": 1.4900685208534817e-05, + "loss": 1.1823, + "step": 59355 + }, + { + "epoch": 0.71, + "grad_norm": 54.97619668187086, + "learning_rate": 1.4900175981574554e-05, + "loss": 1.6615, + "step": 59358 + }, + { + "epoch": 0.71, + "grad_norm": 15.999646570720353, + "learning_rate": 1.4899666737891754e-05, + "loss": 1.3851, + "step": 59361 + }, + { + "epoch": 0.71, + "grad_norm": 2.545885978210246, + "learning_rate": 1.4899157477488155e-05, + "loss": 1.4451, + "step": 59364 + }, + { + "epoch": 0.71, + "grad_norm": 9.203482529860873, + "learning_rate": 1.4898648200365498e-05, + "loss": 1.1763, + "step": 59367 + }, + { + "epoch": 0.71, + "grad_norm": 17.370603944451354, + "learning_rate": 1.489813890652552e-05, + "loss": 1.45, + "step": 59370 + }, + { + "epoch": 0.71, + "grad_norm": 11.663940775733241, + "learning_rate": 1.4897629595969956e-05, + "loss": 1.4093, + "step": 59373 + }, + { + "epoch": 0.71, + "grad_norm": 6.0832167482726724, + "learning_rate": 1.4897120268700547e-05, + "loss": 1.3669, + "step": 59376 + }, + { + "epoch": 0.71, + "grad_norm": 28.29203173916062, + "learning_rate": 1.4896610924719031e-05, + "loss": 1.761, + "step": 59379 + }, + { + "epoch": 0.71, + "grad_norm": 11.826021968499976, + "learning_rate": 1.4896101564027143e-05, + "loss": 1.3672, + "step": 59382 + }, + { + "epoch": 0.71, + "grad_norm": 6.996193470388824, + "learning_rate": 1.4895592186626629e-05, + "loss": 1.4975, + "step": 59385 + }, + { + "epoch": 0.71, + "grad_norm": 8.323539702247365, + "learning_rate": 1.4895082792519218e-05, + "loss": 1.1554, + "step": 59388 + }, + { + "epoch": 0.71, + "grad_norm": 9.586471454544082, + "learning_rate": 1.4894573381706655e-05, + "loss": 0.9845, + "step": 59391 + }, + { + "epoch": 0.71, + "grad_norm": 18.704898215066905, + "learning_rate": 1.4894063954190676e-05, + "loss": 1.1116, + "step": 59394 + }, + { + "epoch": 0.71, + "grad_norm": 3.3043731163052095, + "learning_rate": 1.4893554509973018e-05, + "loss": 1.2907, + "step": 59397 + }, + { + "epoch": 0.71, + "grad_norm": 25.945270336380748, + "learning_rate": 1.4893045049055423e-05, + "loss": 1.1178, + "step": 59400 + }, + { + "epoch": 0.71, + "grad_norm": 17.202886908224034, + "learning_rate": 1.4892535571439625e-05, + "loss": 1.2615, + "step": 59403 + }, + { + "epoch": 0.71, + "grad_norm": 23.581690666164086, + "learning_rate": 1.4892026077127368e-05, + "loss": 2.1058, + "step": 59406 + }, + { + "epoch": 0.71, + "grad_norm": 7.777179600966964, + "learning_rate": 1.4891516566120386e-05, + "loss": 1.3521, + "step": 59409 + }, + { + "epoch": 0.71, + "grad_norm": 15.505860537897547, + "learning_rate": 1.4891007038420421e-05, + "loss": 1.3127, + "step": 59412 + }, + { + "epoch": 0.71, + "grad_norm": 6.883450587031443, + "learning_rate": 1.489049749402921e-05, + "loss": 1.1191, + "step": 59415 + }, + { + "epoch": 0.71, + "grad_norm": 21.85098261231253, + "learning_rate": 1.4889987932948496e-05, + "loss": 1.4151, + "step": 59418 + }, + { + "epoch": 0.71, + "grad_norm": 11.241869475233855, + "learning_rate": 1.4889478355180009e-05, + "loss": 1.1446, + "step": 59421 + }, + { + "epoch": 0.71, + "grad_norm": 8.199323312781507, + "learning_rate": 1.48889687607255e-05, + "loss": 1.2232, + "step": 59424 + }, + { + "epoch": 0.71, + "grad_norm": 23.950701054175006, + "learning_rate": 1.4888459149586695e-05, + "loss": 1.3487, + "step": 59427 + }, + { + "epoch": 0.71, + "grad_norm": 7.677440784128069, + "learning_rate": 1.488794952176534e-05, + "loss": 1.4598, + "step": 59430 + }, + { + "epoch": 0.71, + "grad_norm": 6.467140979196112, + "learning_rate": 1.4887439877263181e-05, + "loss": 1.2668, + "step": 59433 + }, + { + "epoch": 0.71, + "grad_norm": 14.099308142929793, + "learning_rate": 1.4886930216081945e-05, + "loss": 1.1083, + "step": 59436 + }, + { + "epoch": 0.71, + "grad_norm": 9.432913285495397, + "learning_rate": 1.4886420538223378e-05, + "loss": 1.434, + "step": 59439 + }, + { + "epoch": 0.71, + "grad_norm": 8.883490069070628, + "learning_rate": 1.4885910843689215e-05, + "loss": 1.4308, + "step": 59442 + }, + { + "epoch": 0.71, + "grad_norm": 12.61463730102042, + "learning_rate": 1.4885401132481204e-05, + "loss": 1.4804, + "step": 59445 + }, + { + "epoch": 0.71, + "grad_norm": 19.89335838518956, + "learning_rate": 1.4884891404601072e-05, + "loss": 1.512, + "step": 59448 + }, + { + "epoch": 0.71, + "grad_norm": 42.39407627619391, + "learning_rate": 1.4884381660050569e-05, + "loss": 1.6647, + "step": 59451 + }, + { + "epoch": 0.71, + "grad_norm": 8.189497835922808, + "learning_rate": 1.488387189883143e-05, + "loss": 1.7148, + "step": 59454 + }, + { + "epoch": 0.71, + "grad_norm": 15.838363967411448, + "learning_rate": 1.4883362120945393e-05, + "loss": 1.6751, + "step": 59457 + }, + { + "epoch": 0.71, + "grad_norm": 8.495464064984578, + "learning_rate": 1.4882852326394204e-05, + "loss": 1.2013, + "step": 59460 + }, + { + "epoch": 0.72, + "grad_norm": 7.087838218143932, + "learning_rate": 1.4882342515179596e-05, + "loss": 1.6211, + "step": 59463 + }, + { + "epoch": 0.72, + "grad_norm": 13.340581226021932, + "learning_rate": 1.4881832687303312e-05, + "loss": 1.5544, + "step": 59466 + }, + { + "epoch": 0.72, + "grad_norm": 41.67753058380868, + "learning_rate": 1.4881322842767092e-05, + "loss": 1.1676, + "step": 59469 + }, + { + "epoch": 0.72, + "grad_norm": 7.398176357131502, + "learning_rate": 1.4880812981572677e-05, + "loss": 1.5944, + "step": 59472 + }, + { + "epoch": 0.72, + "grad_norm": 12.096110443942331, + "learning_rate": 1.4880303103721802e-05, + "loss": 1.5593, + "step": 59475 + }, + { + "epoch": 0.72, + "grad_norm": 16.357882912764655, + "learning_rate": 1.4879793209216215e-05, + "loss": 1.297, + "step": 59478 + }, + { + "epoch": 0.72, + "grad_norm": 35.15805202399291, + "learning_rate": 1.4879283298057647e-05, + "loss": 1.784, + "step": 59481 + }, + { + "epoch": 0.72, + "grad_norm": 18.02891490447443, + "learning_rate": 1.4878773370247844e-05, + "loss": 1.4258, + "step": 59484 + }, + { + "epoch": 0.72, + "grad_norm": 4.140595272435527, + "learning_rate": 1.4878263425788546e-05, + "loss": 1.3117, + "step": 59487 + }, + { + "epoch": 0.72, + "grad_norm": 7.224173552448953, + "learning_rate": 1.487775346468149e-05, + "loss": 1.3228, + "step": 59490 + }, + { + "epoch": 0.72, + "grad_norm": 5.511110585706627, + "learning_rate": 1.487724348692842e-05, + "loss": 1.3176, + "step": 59493 + }, + { + "epoch": 0.72, + "grad_norm": 18.05822750069333, + "learning_rate": 1.4876733492531075e-05, + "loss": 1.2582, + "step": 59496 + }, + { + "epoch": 0.72, + "grad_norm": 4.513756224271292, + "learning_rate": 1.4876223481491194e-05, + "loss": 1.5014, + "step": 59499 + }, + { + "epoch": 0.72, + "grad_norm": 33.244459790681866, + "learning_rate": 1.4875713453810518e-05, + "loss": 1.4132, + "step": 59502 + }, + { + "epoch": 0.72, + "grad_norm": 6.555241737809315, + "learning_rate": 1.487520340949079e-05, + "loss": 1.265, + "step": 59505 + }, + { + "epoch": 0.72, + "grad_norm": 10.836732500549664, + "learning_rate": 1.4874693348533749e-05, + "loss": 1.5174, + "step": 59508 + }, + { + "epoch": 0.72, + "grad_norm": 19.623375349570647, + "learning_rate": 1.4874183270941134e-05, + "loss": 1.2898, + "step": 59511 + }, + { + "epoch": 0.72, + "grad_norm": 22.131494809299838, + "learning_rate": 1.4873673176714687e-05, + "loss": 1.3591, + "step": 59514 + }, + { + "epoch": 0.72, + "grad_norm": 9.959823001938016, + "learning_rate": 1.4873163065856152e-05, + "loss": 1.4176, + "step": 59517 + }, + { + "epoch": 0.72, + "grad_norm": 17.94613814055991, + "learning_rate": 1.4872652938367264e-05, + "loss": 1.6675, + "step": 59520 + }, + { + "epoch": 0.72, + "grad_norm": 5.400836478777159, + "learning_rate": 1.4872142794249765e-05, + "loss": 1.5172, + "step": 59523 + }, + { + "epoch": 0.72, + "grad_norm": 16.937788159612037, + "learning_rate": 1.48716326335054e-05, + "loss": 1.2907, + "step": 59526 + }, + { + "epoch": 0.72, + "grad_norm": 7.461491333613598, + "learning_rate": 1.487112245613591e-05, + "loss": 1.9058, + "step": 59529 + }, + { + "epoch": 0.72, + "grad_norm": 4.942482978379788, + "learning_rate": 1.4870612262143028e-05, + "loss": 1.2978, + "step": 59532 + }, + { + "epoch": 0.72, + "grad_norm": 11.47237146090662, + "learning_rate": 1.4870102051528506e-05, + "loss": 1.3674, + "step": 59535 + }, + { + "epoch": 0.72, + "grad_norm": 2.8285429997998555, + "learning_rate": 1.4869591824294076e-05, + "loss": 1.5627, + "step": 59538 + }, + { + "epoch": 0.72, + "grad_norm": 2.6521171919830198, + "learning_rate": 1.4869081580441486e-05, + "loss": 1.3205, + "step": 59541 + }, + { + "epoch": 0.72, + "grad_norm": 10.03796086895401, + "learning_rate": 1.4868571319972472e-05, + "loss": 1.2319, + "step": 59544 + }, + { + "epoch": 0.72, + "grad_norm": 11.044102964501963, + "learning_rate": 1.4868061042888777e-05, + "loss": 1.385, + "step": 59547 + }, + { + "epoch": 0.72, + "grad_norm": 12.718756794848412, + "learning_rate": 1.4867550749192146e-05, + "loss": 1.5959, + "step": 59550 + }, + { + "epoch": 0.72, + "grad_norm": 23.830334148926305, + "learning_rate": 1.4867040438884317e-05, + "loss": 1.8562, + "step": 59553 + }, + { + "epoch": 0.72, + "grad_norm": 5.851535738017629, + "learning_rate": 1.4866530111967032e-05, + "loss": 1.196, + "step": 59556 + }, + { + "epoch": 0.72, + "grad_norm": 7.855830549964985, + "learning_rate": 1.4866019768442032e-05, + "loss": 1.6371, + "step": 59559 + }, + { + "epoch": 0.72, + "grad_norm": 20.3534174738692, + "learning_rate": 1.4865509408311061e-05, + "loss": 1.3711, + "step": 59562 + }, + { + "epoch": 0.72, + "grad_norm": 29.097444391869534, + "learning_rate": 1.4864999031575855e-05, + "loss": 1.3234, + "step": 59565 + }, + { + "epoch": 0.72, + "grad_norm": 6.376068426098594, + "learning_rate": 1.4864488638238164e-05, + "loss": 1.3411, + "step": 59568 + }, + { + "epoch": 0.72, + "grad_norm": 15.119894301953149, + "learning_rate": 1.4863978228299723e-05, + "loss": 1.3736, + "step": 59571 + }, + { + "epoch": 0.72, + "grad_norm": 35.076655607291904, + "learning_rate": 1.4863467801762278e-05, + "loss": 1.0233, + "step": 59574 + }, + { + "epoch": 0.72, + "grad_norm": 8.040985102182287, + "learning_rate": 1.4862957358627565e-05, + "loss": 1.3077, + "step": 59577 + }, + { + "epoch": 0.72, + "grad_norm": 18.099443685130407, + "learning_rate": 1.4862446898897336e-05, + "loss": 1.877, + "step": 59580 + }, + { + "epoch": 0.72, + "grad_norm": 19.262800376584664, + "learning_rate": 1.4861936422573324e-05, + "loss": 1.5744, + "step": 59583 + }, + { + "epoch": 0.72, + "grad_norm": 28.976589835031817, + "learning_rate": 1.4861425929657274e-05, + "loss": 1.2588, + "step": 59586 + }, + { + "epoch": 0.72, + "grad_norm": 4.833053149960425, + "learning_rate": 1.4860915420150927e-05, + "loss": 1.526, + "step": 59589 + }, + { + "epoch": 0.72, + "grad_norm": 10.681304983090415, + "learning_rate": 1.486040489405603e-05, + "loss": 1.2102, + "step": 59592 + }, + { + "epoch": 0.72, + "grad_norm": 27.786984105764375, + "learning_rate": 1.4859894351374322e-05, + "loss": 1.3424, + "step": 59595 + }, + { + "epoch": 0.72, + "grad_norm": 16.19411913706336, + "learning_rate": 1.485938379210754e-05, + "loss": 1.2861, + "step": 59598 + }, + { + "epoch": 0.72, + "grad_norm": 6.085381798434675, + "learning_rate": 1.4858873216257437e-05, + "loss": 1.2009, + "step": 59601 + }, + { + "epoch": 0.72, + "grad_norm": 5.899377028240293, + "learning_rate": 1.4858362623825748e-05, + "loss": 1.6756, + "step": 59604 + }, + { + "epoch": 0.72, + "grad_norm": 45.98418935597271, + "learning_rate": 1.4857852014814217e-05, + "loss": 1.7306, + "step": 59607 + }, + { + "epoch": 0.72, + "grad_norm": 21.902185423997963, + "learning_rate": 1.4857341389224587e-05, + "loss": 1.2984, + "step": 59610 + }, + { + "epoch": 0.72, + "grad_norm": 23.663554097053552, + "learning_rate": 1.48568307470586e-05, + "loss": 1.401, + "step": 59613 + }, + { + "epoch": 0.72, + "grad_norm": 12.22090165800523, + "learning_rate": 1.4856320088318001e-05, + "loss": 1.2412, + "step": 59616 + }, + { + "epoch": 0.72, + "grad_norm": 6.850368618761355, + "learning_rate": 1.4855809413004528e-05, + "loss": 1.2372, + "step": 59619 + }, + { + "epoch": 0.72, + "grad_norm": 14.95743650623534, + "learning_rate": 1.4855298721119929e-05, + "loss": 0.942, + "step": 59622 + }, + { + "epoch": 0.72, + "grad_norm": 15.297326210082657, + "learning_rate": 1.4854788012665942e-05, + "loss": 1.0735, + "step": 59625 + }, + { + "epoch": 0.72, + "grad_norm": 13.138669931146447, + "learning_rate": 1.4854277287644315e-05, + "loss": 1.5013, + "step": 59628 + }, + { + "epoch": 0.72, + "grad_norm": 10.70012842208559, + "learning_rate": 1.4853766546056785e-05, + "loss": 1.6514, + "step": 59631 + }, + { + "epoch": 0.72, + "grad_norm": 3.397226467129356, + "learning_rate": 1.4853255787905102e-05, + "loss": 1.1572, + "step": 59634 + }, + { + "epoch": 0.72, + "grad_norm": 5.816769344838583, + "learning_rate": 1.4852745013191e-05, + "loss": 1.4205, + "step": 59637 + }, + { + "epoch": 0.72, + "grad_norm": 11.574899628353489, + "learning_rate": 1.4852234221916232e-05, + "loss": 1.319, + "step": 59640 + }, + { + "epoch": 0.72, + "grad_norm": 13.880787170914216, + "learning_rate": 1.4851723414082532e-05, + "loss": 1.3413, + "step": 59643 + }, + { + "epoch": 0.72, + "grad_norm": 36.04035357419579, + "learning_rate": 1.4851212589691653e-05, + "loss": 1.518, + "step": 59646 + }, + { + "epoch": 0.72, + "grad_norm": 8.51276809242479, + "learning_rate": 1.4850701748745329e-05, + "loss": 1.6267, + "step": 59649 + }, + { + "epoch": 0.72, + "grad_norm": 9.607943216804104, + "learning_rate": 1.4850190891245306e-05, + "loss": 1.4701, + "step": 59652 + }, + { + "epoch": 0.72, + "grad_norm": 16.531165518012447, + "learning_rate": 1.484968001719333e-05, + "loss": 1.5633, + "step": 59655 + }, + { + "epoch": 0.72, + "grad_norm": 13.592227392323316, + "learning_rate": 1.4849169126591146e-05, + "loss": 1.6389, + "step": 59658 + }, + { + "epoch": 0.72, + "grad_norm": 25.74058726674828, + "learning_rate": 1.4848658219440491e-05, + "loss": 1.7563, + "step": 59661 + }, + { + "epoch": 0.72, + "grad_norm": 3.219466852634506, + "learning_rate": 1.4848147295743112e-05, + "loss": 1.3646, + "step": 59664 + }, + { + "epoch": 0.72, + "grad_norm": 5.498373062143017, + "learning_rate": 1.4847636355500755e-05, + "loss": 1.519, + "step": 59667 + }, + { + "epoch": 0.72, + "grad_norm": 9.647838495571637, + "learning_rate": 1.4847125398715158e-05, + "loss": 1.4348, + "step": 59670 + }, + { + "epoch": 0.72, + "grad_norm": 19.613957046741948, + "learning_rate": 1.4846614425388069e-05, + "loss": 1.3561, + "step": 59673 + }, + { + "epoch": 0.72, + "grad_norm": 32.33730268182465, + "learning_rate": 1.4846103435521233e-05, + "loss": 1.1355, + "step": 59676 + }, + { + "epoch": 0.72, + "grad_norm": 17.031774018388138, + "learning_rate": 1.4845592429116388e-05, + "loss": 1.4967, + "step": 59679 + }, + { + "epoch": 0.72, + "grad_norm": 8.349863003287078, + "learning_rate": 1.4845081406175284e-05, + "loss": 1.4738, + "step": 59682 + }, + { + "epoch": 0.72, + "grad_norm": 12.757050020353146, + "learning_rate": 1.4844570366699662e-05, + "loss": 0.9259, + "step": 59685 + }, + { + "epoch": 0.72, + "grad_norm": 30.91794320521937, + "learning_rate": 1.4844059310691265e-05, + "loss": 1.3751, + "step": 59688 + }, + { + "epoch": 0.72, + "grad_norm": 11.8229225383199, + "learning_rate": 1.4843548238151838e-05, + "loss": 1.4061, + "step": 59691 + }, + { + "epoch": 0.72, + "grad_norm": 4.782272675708805, + "learning_rate": 1.4843037149083128e-05, + "loss": 1.7154, + "step": 59694 + }, + { + "epoch": 0.72, + "grad_norm": 11.906808028168976, + "learning_rate": 1.4842526043486875e-05, + "loss": 1.3819, + "step": 59697 + }, + { + "epoch": 0.72, + "grad_norm": 23.879680054564208, + "learning_rate": 1.4842014921364826e-05, + "loss": 1.3999, + "step": 59700 + }, + { + "epoch": 0.72, + "grad_norm": 28.756617122141154, + "learning_rate": 1.4841503782718725e-05, + "loss": 1.0751, + "step": 59703 + }, + { + "epoch": 0.72, + "grad_norm": 8.645073644312284, + "learning_rate": 1.4840992627550313e-05, + "loss": 1.6847, + "step": 59706 + }, + { + "epoch": 0.72, + "grad_norm": 4.5573516591775665, + "learning_rate": 1.4840481455861337e-05, + "loss": 1.2239, + "step": 59709 + }, + { + "epoch": 0.72, + "grad_norm": 15.548267484205356, + "learning_rate": 1.4839970267653543e-05, + "loss": 1.0592, + "step": 59712 + }, + { + "epoch": 0.72, + "grad_norm": 30.88450152415426, + "learning_rate": 1.4839459062928673e-05, + "loss": 1.343, + "step": 59715 + }, + { + "epoch": 0.72, + "grad_norm": 6.9803507821073705, + "learning_rate": 1.483894784168847e-05, + "loss": 1.4738, + "step": 59718 + }, + { + "epoch": 0.72, + "grad_norm": 21.999880787421617, + "learning_rate": 1.4838436603934685e-05, + "loss": 2.0685, + "step": 59721 + }, + { + "epoch": 0.72, + "grad_norm": 10.420598286302804, + "learning_rate": 1.4837925349669057e-05, + "loss": 1.7095, + "step": 59724 + }, + { + "epoch": 0.72, + "grad_norm": 13.126388800629769, + "learning_rate": 1.4837414078893333e-05, + "loss": 1.7294, + "step": 59727 + }, + { + "epoch": 0.72, + "grad_norm": 20.720404982981574, + "learning_rate": 1.4836902791609256e-05, + "loss": 1.3723, + "step": 59730 + }, + { + "epoch": 0.72, + "grad_norm": 6.0460053306680965, + "learning_rate": 1.4836391487818573e-05, + "loss": 1.3492, + "step": 59733 + }, + { + "epoch": 0.72, + "grad_norm": 14.05902648913761, + "learning_rate": 1.4835880167523029e-05, + "loss": 1.2737, + "step": 59736 + }, + { + "epoch": 0.72, + "grad_norm": 20.530888517336606, + "learning_rate": 1.4835368830724364e-05, + "loss": 1.4313, + "step": 59739 + }, + { + "epoch": 0.72, + "grad_norm": 55.75664366233402, + "learning_rate": 1.4834857477424329e-05, + "loss": 1.3556, + "step": 59742 + }, + { + "epoch": 0.72, + "grad_norm": 10.515771355039634, + "learning_rate": 1.4834346107624669e-05, + "loss": 1.3836, + "step": 59745 + }, + { + "epoch": 0.72, + "grad_norm": 9.84125534433512, + "learning_rate": 1.4833834721327125e-05, + "loss": 1.7675, + "step": 59748 + }, + { + "epoch": 0.72, + "grad_norm": 25.112032728633753, + "learning_rate": 1.4833323318533444e-05, + "loss": 1.3174, + "step": 59751 + }, + { + "epoch": 0.72, + "grad_norm": 11.528977656350305, + "learning_rate": 1.4832811899245372e-05, + "loss": 1.4802, + "step": 59754 + }, + { + "epoch": 0.72, + "grad_norm": 8.117748668661507, + "learning_rate": 1.4832300463464655e-05, + "loss": 1.1987, + "step": 59757 + }, + { + "epoch": 0.72, + "grad_norm": 11.631940251708773, + "learning_rate": 1.4831789011193035e-05, + "loss": 1.6858, + "step": 59760 + }, + { + "epoch": 0.72, + "grad_norm": 22.414879761504327, + "learning_rate": 1.483127754243226e-05, + "loss": 1.1764, + "step": 59763 + }, + { + "epoch": 0.72, + "grad_norm": 6.747978309331465, + "learning_rate": 1.4830766057184076e-05, + "loss": 1.4079, + "step": 59766 + }, + { + "epoch": 0.72, + "grad_norm": 29.316140963587266, + "learning_rate": 1.4830254555450227e-05, + "loss": 1.2586, + "step": 59769 + }, + { + "epoch": 0.72, + "grad_norm": 11.354173744216768, + "learning_rate": 1.4829743037232459e-05, + "loss": 1.475, + "step": 59772 + }, + { + "epoch": 0.72, + "grad_norm": 3.360273182709526, + "learning_rate": 1.4829231502532518e-05, + "loss": 1.4511, + "step": 59775 + }, + { + "epoch": 0.72, + "grad_norm": 6.579376925651102, + "learning_rate": 1.4828719951352149e-05, + "loss": 1.3485, + "step": 59778 + }, + { + "epoch": 0.72, + "grad_norm": 6.781472396487896, + "learning_rate": 1.4828208383693094e-05, + "loss": 1.235, + "step": 59781 + }, + { + "epoch": 0.72, + "grad_norm": 17.116605160990794, + "learning_rate": 1.4827696799557109e-05, + "loss": 1.2271, + "step": 59784 + }, + { + "epoch": 0.72, + "grad_norm": 7.986926472847404, + "learning_rate": 1.4827185198945933e-05, + "loss": 1.3324, + "step": 59787 + }, + { + "epoch": 0.72, + "grad_norm": 32.09817010038977, + "learning_rate": 1.482667358186131e-05, + "loss": 1.1424, + "step": 59790 + }, + { + "epoch": 0.72, + "grad_norm": 13.587917853319645, + "learning_rate": 1.482616194830499e-05, + "loss": 1.1796, + "step": 59793 + }, + { + "epoch": 0.72, + "grad_norm": 12.197104712135992, + "learning_rate": 1.482565029827872e-05, + "loss": 1.3926, + "step": 59796 + }, + { + "epoch": 0.72, + "grad_norm": 55.26515296192514, + "learning_rate": 1.4825138631784239e-05, + "loss": 1.3191, + "step": 59799 + }, + { + "epoch": 0.72, + "grad_norm": 9.297870774467997, + "learning_rate": 1.4824626948823302e-05, + "loss": 1.2471, + "step": 59802 + }, + { + "epoch": 0.72, + "grad_norm": 16.538118725436004, + "learning_rate": 1.4824115249397645e-05, + "loss": 1.516, + "step": 59805 + }, + { + "epoch": 0.72, + "grad_norm": 7.008632697607967, + "learning_rate": 1.4823603533509028e-05, + "loss": 1.4568, + "step": 59808 + }, + { + "epoch": 0.72, + "grad_norm": 17.998801708231063, + "learning_rate": 1.4823091801159187e-05, + "loss": 1.5726, + "step": 59811 + }, + { + "epoch": 0.72, + "grad_norm": 106.17307046522279, + "learning_rate": 1.4822580052349866e-05, + "loss": 1.3873, + "step": 59814 + }, + { + "epoch": 0.72, + "grad_norm": 30.553562944899507, + "learning_rate": 1.4822068287082824e-05, + "loss": 1.3823, + "step": 59817 + }, + { + "epoch": 0.72, + "grad_norm": 22.534939199578883, + "learning_rate": 1.4821556505359796e-05, + "loss": 1.4596, + "step": 59820 + }, + { + "epoch": 0.72, + "grad_norm": 48.30144321481156, + "learning_rate": 1.4821044707182532e-05, + "loss": 1.0729, + "step": 59823 + }, + { + "epoch": 0.72, + "grad_norm": 43.94212789646125, + "learning_rate": 1.4820532892552776e-05, + "loss": 1.3769, + "step": 59826 + }, + { + "epoch": 0.72, + "grad_norm": 9.861361624133686, + "learning_rate": 1.4820021061472286e-05, + "loss": 1.5925, + "step": 59829 + }, + { + "epoch": 0.72, + "grad_norm": 21.95893980119048, + "learning_rate": 1.4819509213942793e-05, + "loss": 1.502, + "step": 59832 + }, + { + "epoch": 0.72, + "grad_norm": 6.0665055514435755, + "learning_rate": 1.4818997349966053e-05, + "loss": 1.6985, + "step": 59835 + }, + { + "epoch": 0.72, + "grad_norm": 14.69569532566831, + "learning_rate": 1.4818485469543813e-05, + "loss": 1.4167, + "step": 59838 + }, + { + "epoch": 0.72, + "grad_norm": 7.517766685462204, + "learning_rate": 1.4817973572677816e-05, + "loss": 1.0505, + "step": 59841 + }, + { + "epoch": 0.72, + "grad_norm": 13.793142890068516, + "learning_rate": 1.481746165936981e-05, + "loss": 1.237, + "step": 59844 + }, + { + "epoch": 0.72, + "grad_norm": 17.05350622376583, + "learning_rate": 1.481694972962154e-05, + "loss": 1.3857, + "step": 59847 + }, + { + "epoch": 0.72, + "grad_norm": 5.153017796368079, + "learning_rate": 1.4816437783434763e-05, + "loss": 1.4962, + "step": 59850 + }, + { + "epoch": 0.72, + "grad_norm": 26.542894865378212, + "learning_rate": 1.4815925820811214e-05, + "loss": 1.2931, + "step": 59853 + }, + { + "epoch": 0.72, + "grad_norm": 39.79742447418944, + "learning_rate": 1.4815413841752647e-05, + "loss": 1.8134, + "step": 59856 + }, + { + "epoch": 0.72, + "grad_norm": 12.393487769416689, + "learning_rate": 1.4814901846260803e-05, + "loss": 1.0954, + "step": 59859 + }, + { + "epoch": 0.72, + "grad_norm": 17.946746103319416, + "learning_rate": 1.4814389834337438e-05, + "loss": 1.935, + "step": 59862 + }, + { + "epoch": 0.72, + "grad_norm": 36.318859281833575, + "learning_rate": 1.4813877805984292e-05, + "loss": 1.4439, + "step": 59865 + }, + { + "epoch": 0.72, + "grad_norm": 10.768866832158121, + "learning_rate": 1.4813365761203113e-05, + "loss": 1.3972, + "step": 59868 + }, + { + "epoch": 0.72, + "grad_norm": 54.31084653851951, + "learning_rate": 1.4812853699995656e-05, + "loss": 1.6956, + "step": 59871 + }, + { + "epoch": 0.72, + "grad_norm": 42.08979884070107, + "learning_rate": 1.4812341622363658e-05, + "loss": 1.3726, + "step": 59874 + }, + { + "epoch": 0.72, + "grad_norm": 7.538206861690046, + "learning_rate": 1.4811829528308875e-05, + "loss": 1.1294, + "step": 59877 + }, + { + "epoch": 0.72, + "grad_norm": 19.525401346969144, + "learning_rate": 1.4811317417833047e-05, + "loss": 1.3887, + "step": 59880 + }, + { + "epoch": 0.72, + "grad_norm": 21.839774899938835, + "learning_rate": 1.4810805290937929e-05, + "loss": 1.4943, + "step": 59883 + }, + { + "epoch": 0.72, + "grad_norm": 8.753994488840057, + "learning_rate": 1.4810293147625263e-05, + "loss": 2.0314, + "step": 59886 + }, + { + "epoch": 0.72, + "grad_norm": 14.013474384395073, + "learning_rate": 1.48097809878968e-05, + "loss": 1.7818, + "step": 59889 + }, + { + "epoch": 0.72, + "grad_norm": 7.587749796922366, + "learning_rate": 1.4809268811754289e-05, + "loss": 1.1019, + "step": 59892 + }, + { + "epoch": 0.72, + "grad_norm": 4.986143182805031, + "learning_rate": 1.4808756619199474e-05, + "loss": 1.4623, + "step": 59895 + }, + { + "epoch": 0.72, + "grad_norm": 23.736704047356508, + "learning_rate": 1.4808244410234105e-05, + "loss": 1.3609, + "step": 59898 + }, + { + "epoch": 0.72, + "grad_norm": 8.278052937107109, + "learning_rate": 1.480773218485993e-05, + "loss": 1.3723, + "step": 59901 + }, + { + "epoch": 0.72, + "grad_norm": 33.92736297393913, + "learning_rate": 1.4807219943078697e-05, + "loss": 1.2931, + "step": 59904 + }, + { + "epoch": 0.72, + "grad_norm": 10.908425567445995, + "learning_rate": 1.4806707684892151e-05, + "loss": 1.23, + "step": 59907 + }, + { + "epoch": 0.72, + "grad_norm": 21.302274459757207, + "learning_rate": 1.4806195410302047e-05, + "loss": 1.5119, + "step": 59910 + }, + { + "epoch": 0.72, + "grad_norm": 8.087737927963362, + "learning_rate": 1.4805683119310128e-05, + "loss": 1.4214, + "step": 59913 + }, + { + "epoch": 0.72, + "grad_norm": 12.416563356687394, + "learning_rate": 1.480517081191814e-05, + "loss": 1.5395, + "step": 59916 + }, + { + "epoch": 0.72, + "grad_norm": 17.075192313629856, + "learning_rate": 1.4804658488127841e-05, + "loss": 1.3552, + "step": 59919 + }, + { + "epoch": 0.72, + "grad_norm": 14.946515184388717, + "learning_rate": 1.480414614794097e-05, + "loss": 1.2048, + "step": 59922 + }, + { + "epoch": 0.72, + "grad_norm": 9.15207237749323, + "learning_rate": 1.480363379135928e-05, + "loss": 1.4368, + "step": 59925 + }, + { + "epoch": 0.72, + "grad_norm": 11.066286218320009, + "learning_rate": 1.4803121418384516e-05, + "loss": 1.9228, + "step": 59928 + }, + { + "epoch": 0.72, + "grad_norm": 6.466725513085783, + "learning_rate": 1.4802609029018431e-05, + "loss": 1.2843, + "step": 59931 + }, + { + "epoch": 0.72, + "grad_norm": 11.060833898256572, + "learning_rate": 1.480209662326277e-05, + "loss": 1.4096, + "step": 59934 + }, + { + "epoch": 0.72, + "grad_norm": 9.285571402721379, + "learning_rate": 1.4801584201119283e-05, + "loss": 1.3292, + "step": 59937 + }, + { + "epoch": 0.72, + "grad_norm": 11.16124090882073, + "learning_rate": 1.4801071762589719e-05, + "loss": 1.6161, + "step": 59940 + }, + { + "epoch": 0.72, + "grad_norm": 33.34230006123077, + "learning_rate": 1.4800559307675824e-05, + "loss": 1.1435, + "step": 59943 + }, + { + "epoch": 0.72, + "grad_norm": 19.8233951628596, + "learning_rate": 1.4800046836379353e-05, + "loss": 1.1532, + "step": 59946 + }, + { + "epoch": 0.72, + "grad_norm": 23.817185326670206, + "learning_rate": 1.4799534348702047e-05, + "loss": 1.4917, + "step": 59949 + }, + { + "epoch": 0.72, + "grad_norm": 19.278628920992432, + "learning_rate": 1.4799021844645662e-05, + "loss": 1.5162, + "step": 59952 + }, + { + "epoch": 0.72, + "grad_norm": 8.198990700171853, + "learning_rate": 1.4798509324211946e-05, + "loss": 1.1494, + "step": 59955 + }, + { + "epoch": 0.72, + "grad_norm": 10.227838757450805, + "learning_rate": 1.4797996787402641e-05, + "loss": 1.3841, + "step": 59958 + }, + { + "epoch": 0.72, + "grad_norm": 10.05312786998102, + "learning_rate": 1.4797484234219504e-05, + "loss": 1.4256, + "step": 59961 + }, + { + "epoch": 0.72, + "grad_norm": 24.93218579341666, + "learning_rate": 1.4796971664664282e-05, + "loss": 1.4672, + "step": 59964 + }, + { + "epoch": 0.72, + "grad_norm": 41.51755461693767, + "learning_rate": 1.4796459078738723e-05, + "loss": 1.1314, + "step": 59967 + }, + { + "epoch": 0.72, + "grad_norm": 19.480238196464313, + "learning_rate": 1.4795946476444573e-05, + "loss": 1.3302, + "step": 59970 + }, + { + "epoch": 0.72, + "grad_norm": 8.761924014582123, + "learning_rate": 1.4795433857783592e-05, + "loss": 0.9545, + "step": 59973 + }, + { + "epoch": 0.72, + "grad_norm": 7.77429222496714, + "learning_rate": 1.4794921222757519e-05, + "loss": 1.1714, + "step": 59976 + }, + { + "epoch": 0.72, + "grad_norm": 10.513158606435034, + "learning_rate": 1.4794408571368108e-05, + "loss": 1.1628, + "step": 59979 + }, + { + "epoch": 0.72, + "grad_norm": 19.97739551505365, + "learning_rate": 1.4793895903617107e-05, + "loss": 1.5161, + "step": 59982 + }, + { + "epoch": 0.72, + "grad_norm": 17.820490458239732, + "learning_rate": 1.4793383219506266e-05, + "loss": 1.6798, + "step": 59985 + }, + { + "epoch": 0.72, + "grad_norm": 5.777527461227493, + "learning_rate": 1.4792870519037333e-05, + "loss": 1.1011, + "step": 59988 + }, + { + "epoch": 0.72, + "grad_norm": 18.3057964765407, + "learning_rate": 1.479235780221206e-05, + "loss": 0.9976, + "step": 59991 + }, + { + "epoch": 0.72, + "grad_norm": 14.031060257803773, + "learning_rate": 1.4791845069032195e-05, + "loss": 1.5776, + "step": 59994 + }, + { + "epoch": 0.72, + "grad_norm": 6.211858307220923, + "learning_rate": 1.4791332319499492e-05, + "loss": 1.3989, + "step": 59997 + }, + { + "epoch": 0.72, + "grad_norm": 6.620660931166045, + "learning_rate": 1.4790819553615699e-05, + "loss": 1.2956, + "step": 60000 + }, + { + "epoch": 0.72, + "grad_norm": 5.103014482099775, + "learning_rate": 1.4790306771382559e-05, + "loss": 1.7375, + "step": 60003 + }, + { + "epoch": 0.72, + "grad_norm": 48.15346520105264, + "learning_rate": 1.4789793972801828e-05, + "loss": 1.3138, + "step": 60006 + }, + { + "epoch": 0.72, + "grad_norm": 7.409424030837689, + "learning_rate": 1.4789281157875261e-05, + "loss": 1.352, + "step": 60009 + }, + { + "epoch": 0.72, + "grad_norm": 33.6808007748893, + "learning_rate": 1.4788768326604599e-05, + "loss": 1.6551, + "step": 60012 + }, + { + "epoch": 0.72, + "grad_norm": 13.86817444231091, + "learning_rate": 1.4788255478991593e-05, + "loss": 1.4578, + "step": 60015 + }, + { + "epoch": 0.72, + "grad_norm": 44.44720314328142, + "learning_rate": 1.4787742615038001e-05, + "loss": 1.3121, + "step": 60018 + }, + { + "epoch": 0.72, + "grad_norm": 15.68612504419463, + "learning_rate": 1.4787229734745564e-05, + "loss": 1.4722, + "step": 60021 + }, + { + "epoch": 0.72, + "grad_norm": 19.33517496950156, + "learning_rate": 1.4786716838116037e-05, + "loss": 1.3091, + "step": 60024 + }, + { + "epoch": 0.72, + "grad_norm": 30.935126678090985, + "learning_rate": 1.478620392515117e-05, + "loss": 1.6652, + "step": 60027 + }, + { + "epoch": 0.72, + "grad_norm": 14.0133818684738, + "learning_rate": 1.4785690995852715e-05, + "loss": 1.0397, + "step": 60030 + }, + { + "epoch": 0.72, + "grad_norm": 12.325824213472456, + "learning_rate": 1.478517805022242e-05, + "loss": 1.228, + "step": 60033 + }, + { + "epoch": 0.72, + "grad_norm": 21.776289452759745, + "learning_rate": 1.4784665088262031e-05, + "loss": 1.1878, + "step": 60036 + }, + { + "epoch": 0.72, + "grad_norm": 10.365971495491594, + "learning_rate": 1.4784152109973308e-05, + "loss": 1.6949, + "step": 60039 + }, + { + "epoch": 0.72, + "grad_norm": 87.50285699069578, + "learning_rate": 1.4783639115357997e-05, + "loss": 1.4116, + "step": 60042 + }, + { + "epoch": 0.72, + "grad_norm": 20.324334795267582, + "learning_rate": 1.4783126104417848e-05, + "loss": 1.3389, + "step": 60045 + }, + { + "epoch": 0.72, + "grad_norm": 10.424739120782656, + "learning_rate": 1.478261307715461e-05, + "loss": 1.3845, + "step": 60048 + }, + { + "epoch": 0.72, + "grad_norm": 18.199839511277535, + "learning_rate": 1.4782100033570042e-05, + "loss": 1.8943, + "step": 60051 + }, + { + "epoch": 0.72, + "grad_norm": 10.644853852632293, + "learning_rate": 1.4781586973665885e-05, + "loss": 1.0517, + "step": 60054 + }, + { + "epoch": 0.72, + "grad_norm": 20.468527140520965, + "learning_rate": 1.4781073897443894e-05, + "loss": 1.539, + "step": 60057 + }, + { + "epoch": 0.72, + "grad_norm": 5.0474639612035235, + "learning_rate": 1.478056080490582e-05, + "loss": 1.4614, + "step": 60060 + }, + { + "epoch": 0.72, + "grad_norm": 12.010181078244356, + "learning_rate": 1.4780047696053417e-05, + "loss": 1.3586, + "step": 60063 + }, + { + "epoch": 0.72, + "grad_norm": 9.290703198525689, + "learning_rate": 1.477953457088843e-05, + "loss": 1.4309, + "step": 60066 + }, + { + "epoch": 0.72, + "grad_norm": 27.292842332687187, + "learning_rate": 1.4779021429412612e-05, + "loss": 1.5534, + "step": 60069 + }, + { + "epoch": 0.72, + "grad_norm": 46.95942554300091, + "learning_rate": 1.4778508271627718e-05, + "loss": 1.814, + "step": 60072 + }, + { + "epoch": 0.72, + "grad_norm": 4.9675405507503365, + "learning_rate": 1.4777995097535496e-05, + "loss": 1.3261, + "step": 60075 + }, + { + "epoch": 0.72, + "grad_norm": 35.86475833753297, + "learning_rate": 1.4777481907137694e-05, + "loss": 1.4837, + "step": 60078 + }, + { + "epoch": 0.72, + "grad_norm": 10.910088703771688, + "learning_rate": 1.4776968700436073e-05, + "loss": 1.4631, + "step": 60081 + }, + { + "epoch": 0.72, + "grad_norm": 10.253061832297709, + "learning_rate": 1.4776455477432374e-05, + "loss": 1.8682, + "step": 60084 + }, + { + "epoch": 0.72, + "grad_norm": 13.340147335562836, + "learning_rate": 1.4775942238128358e-05, + "loss": 1.4831, + "step": 60087 + }, + { + "epoch": 0.72, + "grad_norm": 25.535984457754736, + "learning_rate": 1.4775428982525766e-05, + "loss": 1.56, + "step": 60090 + }, + { + "epoch": 0.72, + "grad_norm": 10.772211744169146, + "learning_rate": 1.4774915710626356e-05, + "loss": 1.6448, + "step": 60093 + }, + { + "epoch": 0.72, + "grad_norm": 113.05888389549725, + "learning_rate": 1.4774402422431881e-05, + "loss": 1.2099, + "step": 60096 + }, + { + "epoch": 0.72, + "grad_norm": 39.13398832579321, + "learning_rate": 1.4773889117944089e-05, + "loss": 1.6957, + "step": 60099 + }, + { + "epoch": 0.72, + "grad_norm": 11.220335926236096, + "learning_rate": 1.4773375797164731e-05, + "loss": 1.3073, + "step": 60102 + }, + { + "epoch": 0.72, + "grad_norm": 16.898839353191686, + "learning_rate": 1.4772862460095563e-05, + "loss": 1.454, + "step": 60105 + }, + { + "epoch": 0.72, + "grad_norm": 20.097347038605097, + "learning_rate": 1.4772349106738335e-05, + "loss": 1.3466, + "step": 60108 + }, + { + "epoch": 0.72, + "grad_norm": 3.538404400290574, + "learning_rate": 1.4771835737094797e-05, + "loss": 1.4684, + "step": 60111 + }, + { + "epoch": 0.72, + "grad_norm": 23.98823874932031, + "learning_rate": 1.4771322351166704e-05, + "loss": 1.256, + "step": 60114 + }, + { + "epoch": 0.72, + "grad_norm": 16.0882811223238, + "learning_rate": 1.4770808948955806e-05, + "loss": 1.4907, + "step": 60117 + }, + { + "epoch": 0.72, + "grad_norm": 18.964690773150487, + "learning_rate": 1.4770295530463856e-05, + "loss": 1.3677, + "step": 60120 + }, + { + "epoch": 0.72, + "grad_norm": 65.3385601514129, + "learning_rate": 1.4769782095692601e-05, + "loss": 1.2947, + "step": 60123 + }, + { + "epoch": 0.72, + "grad_norm": 61.7221653360956, + "learning_rate": 1.4769268644643803e-05, + "loss": 1.5192, + "step": 60126 + }, + { + "epoch": 0.72, + "grad_norm": 20.831248496477457, + "learning_rate": 1.4768755177319209e-05, + "loss": 1.0073, + "step": 60129 + }, + { + "epoch": 0.72, + "grad_norm": 27.691428058042945, + "learning_rate": 1.4768241693720569e-05, + "loss": 1.115, + "step": 60132 + }, + { + "epoch": 0.72, + "grad_norm": 7.442004226690931, + "learning_rate": 1.476772819384964e-05, + "loss": 1.4321, + "step": 60135 + }, + { + "epoch": 0.72, + "grad_norm": 7.4233446805314545, + "learning_rate": 1.4767214677708167e-05, + "loss": 1.6668, + "step": 60138 + }, + { + "epoch": 0.72, + "grad_norm": 13.63721817404792, + "learning_rate": 1.4766701145297914e-05, + "loss": 1.4245, + "step": 60141 + }, + { + "epoch": 0.72, + "grad_norm": 4.067890677162065, + "learning_rate": 1.4766187596620623e-05, + "loss": 1.257, + "step": 60144 + }, + { + "epoch": 0.72, + "grad_norm": 23.376501202528804, + "learning_rate": 1.4765674031678052e-05, + "loss": 1.3901, + "step": 60147 + }, + { + "epoch": 0.72, + "grad_norm": 25.798348407102967, + "learning_rate": 1.4765160450471952e-05, + "loss": 1.4979, + "step": 60150 + }, + { + "epoch": 0.72, + "grad_norm": 16.52887353195497, + "learning_rate": 1.4764646853004074e-05, + "loss": 1.4875, + "step": 60153 + }, + { + "epoch": 0.72, + "grad_norm": 17.90362879102514, + "learning_rate": 1.4764133239276172e-05, + "loss": 1.2555, + "step": 60156 + }, + { + "epoch": 0.72, + "grad_norm": 12.272992938264487, + "learning_rate": 1.4763619609290004e-05, + "loss": 1.4027, + "step": 60159 + }, + { + "epoch": 0.72, + "grad_norm": 10.38916630585912, + "learning_rate": 1.4763105963047314e-05, + "loss": 1.8154, + "step": 60162 + }, + { + "epoch": 0.72, + "grad_norm": 4.9154754827778655, + "learning_rate": 1.4762592300549858e-05, + "loss": 1.0723, + "step": 60165 + }, + { + "epoch": 0.72, + "grad_norm": 6.631548043699664, + "learning_rate": 1.4762078621799394e-05, + "loss": 1.4148, + "step": 60168 + }, + { + "epoch": 0.72, + "grad_norm": 36.515217780276316, + "learning_rate": 1.476156492679767e-05, + "loss": 1.4726, + "step": 60171 + }, + { + "epoch": 0.72, + "grad_norm": 6.418202677895902, + "learning_rate": 1.4761051215546438e-05, + "loss": 1.677, + "step": 60174 + }, + { + "epoch": 0.72, + "grad_norm": 18.788251256001967, + "learning_rate": 1.4760537488047453e-05, + "loss": 1.4346, + "step": 60177 + }, + { + "epoch": 0.72, + "grad_norm": 33.45895992255809, + "learning_rate": 1.4760023744302471e-05, + "loss": 1.4593, + "step": 60180 + }, + { + "epoch": 0.72, + "grad_norm": 17.76165123685788, + "learning_rate": 1.475950998431324e-05, + "loss": 1.4957, + "step": 60183 + }, + { + "epoch": 0.72, + "grad_norm": 45.50977998031872, + "learning_rate": 1.4758996208081517e-05, + "loss": 1.3889, + "step": 60186 + }, + { + "epoch": 0.72, + "grad_norm": 13.273655389170642, + "learning_rate": 1.4758482415609055e-05, + "loss": 1.392, + "step": 60189 + }, + { + "epoch": 0.72, + "grad_norm": 16.21282888701571, + "learning_rate": 1.4757968606897607e-05, + "loss": 1.4276, + "step": 60192 + }, + { + "epoch": 0.72, + "grad_norm": 11.624917127042455, + "learning_rate": 1.4757454781948924e-05, + "loss": 1.5453, + "step": 60195 + }, + { + "epoch": 0.72, + "grad_norm": 30.721257555125927, + "learning_rate": 1.475694094076476e-05, + "loss": 1.3366, + "step": 60198 + }, + { + "epoch": 0.72, + "grad_norm": 14.315770760984234, + "learning_rate": 1.4756427083346872e-05, + "loss": 1.2841, + "step": 60201 + }, + { + "epoch": 0.72, + "grad_norm": 13.84772303629808, + "learning_rate": 1.4755913209697012e-05, + "loss": 1.2547, + "step": 60204 + }, + { + "epoch": 0.72, + "grad_norm": 74.72786508293149, + "learning_rate": 1.4755399319816932e-05, + "loss": 1.052, + "step": 60207 + }, + { + "epoch": 0.72, + "grad_norm": 40.345075430319824, + "learning_rate": 1.4754885413708388e-05, + "loss": 1.2773, + "step": 60210 + }, + { + "epoch": 0.72, + "grad_norm": 13.965481856891316, + "learning_rate": 1.4754371491373131e-05, + "loss": 1.455, + "step": 60213 + }, + { + "epoch": 0.72, + "grad_norm": 18.4420719833952, + "learning_rate": 1.4753857552812921e-05, + "loss": 1.1438, + "step": 60216 + }, + { + "epoch": 0.72, + "grad_norm": 35.57107011692713, + "learning_rate": 1.4753343598029504e-05, + "loss": 1.708, + "step": 60219 + }, + { + "epoch": 0.72, + "grad_norm": 20.847866118101376, + "learning_rate": 1.4752829627024636e-05, + "loss": 1.3811, + "step": 60222 + }, + { + "epoch": 0.72, + "grad_norm": 96.9328202472327, + "learning_rate": 1.4752315639800076e-05, + "loss": 1.4805, + "step": 60225 + }, + { + "epoch": 0.72, + "grad_norm": 41.586038037350214, + "learning_rate": 1.4751801636357574e-05, + "loss": 1.0042, + "step": 60228 + }, + { + "epoch": 0.72, + "grad_norm": 14.169553731421518, + "learning_rate": 1.4751287616698881e-05, + "loss": 1.4293, + "step": 60231 + }, + { + "epoch": 0.72, + "grad_norm": 6.752470540821409, + "learning_rate": 1.4750773580825761e-05, + "loss": 1.3966, + "step": 60234 + }, + { + "epoch": 0.72, + "grad_norm": 43.95986903089284, + "learning_rate": 1.4750259528739956e-05, + "loss": 1.3153, + "step": 60237 + }, + { + "epoch": 0.72, + "grad_norm": 17.09287005254222, + "learning_rate": 1.4749745460443228e-05, + "loss": 1.32, + "step": 60240 + }, + { + "epoch": 0.72, + "grad_norm": 65.71393245056342, + "learning_rate": 1.4749231375937331e-05, + "loss": 1.0574, + "step": 60243 + }, + { + "epoch": 0.72, + "grad_norm": 18.824789831814243, + "learning_rate": 1.4748717275224018e-05, + "loss": 1.293, + "step": 60246 + }, + { + "epoch": 0.72, + "grad_norm": 8.07334364742223, + "learning_rate": 1.4748203158305044e-05, + "loss": 1.6246, + "step": 60249 + }, + { + "epoch": 0.72, + "grad_norm": 47.89367602676933, + "learning_rate": 1.4747689025182158e-05, + "loss": 1.383, + "step": 60252 + }, + { + "epoch": 0.72, + "grad_norm": 13.404491056475578, + "learning_rate": 1.4747174875857127e-05, + "loss": 1.4564, + "step": 60255 + }, + { + "epoch": 0.72, + "grad_norm": 40.20119705476918, + "learning_rate": 1.4746660710331692e-05, + "loss": 1.619, + "step": 60258 + }, + { + "epoch": 0.72, + "grad_norm": 19.17254910773299, + "learning_rate": 1.4746146528607616e-05, + "loss": 1.7115, + "step": 60261 + }, + { + "epoch": 0.72, + "grad_norm": 53.645231153281635, + "learning_rate": 1.474563233068665e-05, + "loss": 1.0632, + "step": 60264 + }, + { + "epoch": 0.72, + "grad_norm": 27.53943066672324, + "learning_rate": 1.4745118116570551e-05, + "loss": 1.5001, + "step": 60267 + }, + { + "epoch": 0.72, + "grad_norm": 39.62140100609661, + "learning_rate": 1.4744603886261075e-05, + "loss": 1.2589, + "step": 60270 + }, + { + "epoch": 0.72, + "grad_norm": 6.977127438680356, + "learning_rate": 1.4744089639759973e-05, + "loss": 1.106, + "step": 60273 + }, + { + "epoch": 0.72, + "grad_norm": 21.00847567884521, + "learning_rate": 1.4743575377069004e-05, + "loss": 1.1273, + "step": 60276 + }, + { + "epoch": 0.72, + "grad_norm": 10.31802809308254, + "learning_rate": 1.4743061098189919e-05, + "loss": 1.522, + "step": 60279 + }, + { + "epoch": 0.72, + "grad_norm": 63.491876033359055, + "learning_rate": 1.4742546803124476e-05, + "loss": 1.4988, + "step": 60282 + }, + { + "epoch": 0.72, + "grad_norm": 20.737507089331167, + "learning_rate": 1.4742032491874426e-05, + "loss": 1.3862, + "step": 60285 + }, + { + "epoch": 0.72, + "grad_norm": 25.988619843163228, + "learning_rate": 1.4741518164441533e-05, + "loss": 1.4669, + "step": 60288 + }, + { + "epoch": 0.72, + "grad_norm": 31.070658805174485, + "learning_rate": 1.474100382082754e-05, + "loss": 1.3758, + "step": 60291 + }, + { + "epoch": 0.73, + "grad_norm": 22.34785332790825, + "learning_rate": 1.4740489461034211e-05, + "loss": 1.3729, + "step": 60294 + }, + { + "epoch": 0.73, + "grad_norm": 28.6038444948255, + "learning_rate": 1.4739975085063298e-05, + "loss": 1.0966, + "step": 60297 + }, + { + "epoch": 0.73, + "grad_norm": 3.882862573200612, + "learning_rate": 1.473946069291656e-05, + "loss": 1.5052, + "step": 60300 + }, + { + "epoch": 0.73, + "grad_norm": 4.036752814643183, + "learning_rate": 1.473894628459575e-05, + "loss": 1.2515, + "step": 60303 + }, + { + "epoch": 0.73, + "grad_norm": 7.461330542065604, + "learning_rate": 1.473843186010262e-05, + "loss": 1.3595, + "step": 60306 + }, + { + "epoch": 0.73, + "grad_norm": 28.425097179071177, + "learning_rate": 1.4737917419438935e-05, + "loss": 1.2295, + "step": 60309 + }, + { + "epoch": 0.73, + "grad_norm": 7.407815768291063, + "learning_rate": 1.4737402962606437e-05, + "loss": 1.3073, + "step": 60312 + }, + { + "epoch": 0.73, + "grad_norm": 19.216570620313412, + "learning_rate": 1.4736888489606894e-05, + "loss": 1.4112, + "step": 60315 + }, + { + "epoch": 0.73, + "grad_norm": 5.801829166462727, + "learning_rate": 1.4736374000442054e-05, + "loss": 1.5898, + "step": 60318 + }, + { + "epoch": 0.73, + "grad_norm": 6.376339471585233, + "learning_rate": 1.4735859495113675e-05, + "loss": 1.2797, + "step": 60321 + }, + { + "epoch": 0.73, + "grad_norm": 5.717786177344966, + "learning_rate": 1.4735344973623518e-05, + "loss": 1.2406, + "step": 60324 + }, + { + "epoch": 0.73, + "grad_norm": 7.201091885624406, + "learning_rate": 1.4734830435973328e-05, + "loss": 1.7676, + "step": 60327 + }, + { + "epoch": 0.73, + "grad_norm": 34.78063468227709, + "learning_rate": 1.4734315882164874e-05, + "loss": 1.2302, + "step": 60330 + }, + { + "epoch": 0.73, + "grad_norm": 14.322017707001729, + "learning_rate": 1.47338013121999e-05, + "loss": 1.1736, + "step": 60333 + }, + { + "epoch": 0.73, + "grad_norm": 13.072932500334169, + "learning_rate": 1.473328672608017e-05, + "loss": 0.8285, + "step": 60336 + }, + { + "epoch": 0.73, + "grad_norm": 27.527446557167583, + "learning_rate": 1.4732772123807432e-05, + "loss": 1.6573, + "step": 60339 + }, + { + "epoch": 0.73, + "grad_norm": 8.351814809929795, + "learning_rate": 1.4732257505383457e-05, + "loss": 1.3769, + "step": 60342 + }, + { + "epoch": 0.73, + "grad_norm": 3.988025398083527, + "learning_rate": 1.4731742870809984e-05, + "loss": 1.2989, + "step": 60345 + }, + { + "epoch": 0.73, + "grad_norm": 19.024148814800537, + "learning_rate": 1.4731228220088777e-05, + "loss": 1.2478, + "step": 60348 + }, + { + "epoch": 0.73, + "grad_norm": 10.222544093370793, + "learning_rate": 1.4730713553221599e-05, + "loss": 1.7665, + "step": 60351 + }, + { + "epoch": 0.73, + "grad_norm": 7.787372767637259, + "learning_rate": 1.4730198870210192e-05, + "loss": 1.5306, + "step": 60354 + }, + { + "epoch": 0.73, + "grad_norm": 9.787892178800556, + "learning_rate": 1.4729684171056324e-05, + "loss": 1.2972, + "step": 60357 + }, + { + "epoch": 0.73, + "grad_norm": 19.86008384151903, + "learning_rate": 1.4729169455761745e-05, + "loss": 1.5839, + "step": 60360 + }, + { + "epoch": 0.73, + "grad_norm": 16.65035042944311, + "learning_rate": 1.472865472432822e-05, + "loss": 1.5293, + "step": 60363 + }, + { + "epoch": 0.73, + "grad_norm": 7.668964520715997, + "learning_rate": 1.4728139976757494e-05, + "loss": 1.3287, + "step": 60366 + }, + { + "epoch": 0.73, + "grad_norm": 6.764349000139534, + "learning_rate": 1.472762521305133e-05, + "loss": 1.6981, + "step": 60369 + }, + { + "epoch": 0.73, + "grad_norm": 8.4765975125827, + "learning_rate": 1.4727110433211486e-05, + "loss": 1.3982, + "step": 60372 + }, + { + "epoch": 0.73, + "grad_norm": 15.910178963798767, + "learning_rate": 1.4726595637239715e-05, + "loss": 1.7259, + "step": 60375 + }, + { + "epoch": 0.73, + "grad_norm": 14.834441380970757, + "learning_rate": 1.4726080825137776e-05, + "loss": 1.6162, + "step": 60378 + }, + { + "epoch": 0.73, + "grad_norm": 8.12123256580223, + "learning_rate": 1.4725565996907426e-05, + "loss": 1.831, + "step": 60381 + }, + { + "epoch": 0.73, + "grad_norm": 14.127093745656163, + "learning_rate": 1.4725051152550422e-05, + "loss": 1.3842, + "step": 60384 + }, + { + "epoch": 0.73, + "grad_norm": 4.503836968862676, + "learning_rate": 1.4724536292068518e-05, + "loss": 1.2184, + "step": 60387 + }, + { + "epoch": 0.73, + "grad_norm": 16.26989193246181, + "learning_rate": 1.4724021415463476e-05, + "loss": 1.3393, + "step": 60390 + }, + { + "epoch": 0.73, + "grad_norm": 17.759851942517347, + "learning_rate": 1.472350652273705e-05, + "loss": 1.3343, + "step": 60393 + }, + { + "epoch": 0.73, + "grad_norm": 17.61268563038917, + "learning_rate": 1.4722991613890998e-05, + "loss": 1.4837, + "step": 60396 + }, + { + "epoch": 0.73, + "grad_norm": 18.344979150598682, + "learning_rate": 1.4722476688927074e-05, + "loss": 1.4064, + "step": 60399 + }, + { + "epoch": 0.73, + "grad_norm": 30.758525621787673, + "learning_rate": 1.472196174784704e-05, + "loss": 1.2238, + "step": 60402 + }, + { + "epoch": 0.73, + "grad_norm": 15.158709286251552, + "learning_rate": 1.4721446790652654e-05, + "loss": 1.1955, + "step": 60405 + }, + { + "epoch": 0.73, + "grad_norm": 8.741778393314863, + "learning_rate": 1.4720931817345667e-05, + "loss": 1.0656, + "step": 60408 + }, + { + "epoch": 0.73, + "grad_norm": 20.810686176269854, + "learning_rate": 1.472041682792784e-05, + "loss": 1.2523, + "step": 60411 + }, + { + "epoch": 0.73, + "grad_norm": 5.498720541186902, + "learning_rate": 1.4719901822400935e-05, + "loss": 1.8774, + "step": 60414 + }, + { + "epoch": 0.73, + "grad_norm": 3.7312349031679664, + "learning_rate": 1.4719386800766702e-05, + "loss": 1.3974, + "step": 60417 + }, + { + "epoch": 0.73, + "grad_norm": 19.611721369128567, + "learning_rate": 1.4718871763026901e-05, + "loss": 1.099, + "step": 60420 + }, + { + "epoch": 0.73, + "grad_norm": 36.31042316932026, + "learning_rate": 1.471835670918329e-05, + "loss": 1.1999, + "step": 60423 + }, + { + "epoch": 0.73, + "grad_norm": 16.86910528114522, + "learning_rate": 1.471784163923763e-05, + "loss": 1.4858, + "step": 60426 + }, + { + "epoch": 0.73, + "grad_norm": 22.991247426432597, + "learning_rate": 1.4717326553191673e-05, + "loss": 1.367, + "step": 60429 + }, + { + "epoch": 0.73, + "grad_norm": 5.862197196873464, + "learning_rate": 1.4716811451047182e-05, + "loss": 1.5372, + "step": 60432 + }, + { + "epoch": 0.73, + "grad_norm": 15.381091845865654, + "learning_rate": 1.471629633280591e-05, + "loss": 1.5254, + "step": 60435 + }, + { + "epoch": 0.73, + "grad_norm": 4.149482877997955, + "learning_rate": 1.471578119846962e-05, + "loss": 1.2674, + "step": 60438 + }, + { + "epoch": 0.73, + "grad_norm": 6.4963679304570086, + "learning_rate": 1.4715266048040066e-05, + "loss": 1.259, + "step": 60441 + }, + { + "epoch": 0.73, + "grad_norm": 11.980355377862221, + "learning_rate": 1.471475088151901e-05, + "loss": 1.6447, + "step": 60444 + }, + { + "epoch": 0.73, + "grad_norm": 16.186646608238245, + "learning_rate": 1.4714235698908204e-05, + "loss": 1.4551, + "step": 60447 + }, + { + "epoch": 0.73, + "grad_norm": 5.990151662761157, + "learning_rate": 1.4713720500209412e-05, + "loss": 1.375, + "step": 60450 + }, + { + "epoch": 0.73, + "grad_norm": 16.175012076532546, + "learning_rate": 1.4713205285424388e-05, + "loss": 1.4472, + "step": 60453 + }, + { + "epoch": 0.73, + "grad_norm": 7.631312031547415, + "learning_rate": 1.471269005455489e-05, + "loss": 1.4792, + "step": 60456 + }, + { + "epoch": 0.73, + "grad_norm": 48.50415454383086, + "learning_rate": 1.4712174807602684e-05, + "loss": 1.1725, + "step": 60459 + }, + { + "epoch": 0.73, + "grad_norm": 45.017461207377174, + "learning_rate": 1.471165954456952e-05, + "loss": 1.2846, + "step": 60462 + }, + { + "epoch": 0.73, + "grad_norm": 44.254853209936456, + "learning_rate": 1.4711144265457159e-05, + "loss": 1.1674, + "step": 60465 + }, + { + "epoch": 0.73, + "grad_norm": 20.413254754902766, + "learning_rate": 1.4710628970267358e-05, + "loss": 1.6166, + "step": 60468 + }, + { + "epoch": 0.73, + "grad_norm": 6.844621128855545, + "learning_rate": 1.471011365900188e-05, + "loss": 1.4579, + "step": 60471 + }, + { + "epoch": 0.73, + "grad_norm": 9.668422545745011, + "learning_rate": 1.4709598331662478e-05, + "loss": 1.4731, + "step": 60474 + }, + { + "epoch": 0.73, + "grad_norm": 10.067916060177156, + "learning_rate": 1.4709082988250914e-05, + "loss": 1.5826, + "step": 60477 + }, + { + "epoch": 0.73, + "grad_norm": 9.045812622016763, + "learning_rate": 1.4708567628768947e-05, + "loss": 1.2072, + "step": 60480 + }, + { + "epoch": 0.73, + "grad_norm": 10.082024304207502, + "learning_rate": 1.4708052253218332e-05, + "loss": 1.343, + "step": 60483 + }, + { + "epoch": 0.73, + "grad_norm": 36.43595629422887, + "learning_rate": 1.4707536861600833e-05, + "loss": 1.4922, + "step": 60486 + }, + { + "epoch": 0.73, + "grad_norm": 20.067586148412968, + "learning_rate": 1.4707021453918206e-05, + "loss": 1.2131, + "step": 60489 + }, + { + "epoch": 0.73, + "grad_norm": 15.814484653217715, + "learning_rate": 1.4706506030172212e-05, + "loss": 1.1145, + "step": 60492 + }, + { + "epoch": 0.73, + "grad_norm": 31.007641499327452, + "learning_rate": 1.4705990590364606e-05, + "loss": 1.2371, + "step": 60495 + }, + { + "epoch": 0.73, + "grad_norm": 23.467465115087776, + "learning_rate": 1.470547513449715e-05, + "loss": 1.1615, + "step": 60498 + }, + { + "epoch": 0.73, + "grad_norm": 15.982735245097706, + "learning_rate": 1.4704959662571601e-05, + "loss": 1.6275, + "step": 60501 + }, + { + "epoch": 0.73, + "grad_norm": 11.322033675822654, + "learning_rate": 1.4704444174589721e-05, + "loss": 1.3628, + "step": 60504 + }, + { + "epoch": 0.73, + "grad_norm": 5.805021191905698, + "learning_rate": 1.4703928670553265e-05, + "loss": 1.1786, + "step": 60507 + }, + { + "epoch": 0.73, + "grad_norm": 21.411265383733586, + "learning_rate": 1.4703413150463996e-05, + "loss": 1.5213, + "step": 60510 + }, + { + "epoch": 0.73, + "grad_norm": 3.953177635070328, + "learning_rate": 1.4702897614323675e-05, + "loss": 1.0135, + "step": 60513 + }, + { + "epoch": 0.73, + "grad_norm": 55.67996688524202, + "learning_rate": 1.4702382062134054e-05, + "loss": 1.656, + "step": 60516 + }, + { + "epoch": 0.73, + "grad_norm": 53.71400772860735, + "learning_rate": 1.47018664938969e-05, + "loss": 1.6667, + "step": 60519 + }, + { + "epoch": 0.73, + "grad_norm": 21.326363839384538, + "learning_rate": 1.4701350909613967e-05, + "loss": 1.7336, + "step": 60522 + }, + { + "epoch": 0.73, + "grad_norm": 43.92583688520996, + "learning_rate": 1.470083530928702e-05, + "loss": 1.1648, + "step": 60525 + }, + { + "epoch": 0.73, + "grad_norm": 59.8236362017661, + "learning_rate": 1.4700319692917812e-05, + "loss": 1.1225, + "step": 60528 + }, + { + "epoch": 0.73, + "grad_norm": 6.031596432698628, + "learning_rate": 1.4699804060508107e-05, + "loss": 1.5846, + "step": 60531 + }, + { + "epoch": 0.73, + "grad_norm": 19.122797861278254, + "learning_rate": 1.4699288412059664e-05, + "loss": 1.4605, + "step": 60534 + }, + { + "epoch": 0.73, + "grad_norm": 33.671184219557944, + "learning_rate": 1.4698772747574241e-05, + "loss": 1.562, + "step": 60537 + }, + { + "epoch": 0.73, + "grad_norm": 16.07806768109722, + "learning_rate": 1.46982570670536e-05, + "loss": 1.4086, + "step": 60540 + }, + { + "epoch": 0.73, + "grad_norm": 21.812661807384405, + "learning_rate": 1.4697741370499501e-05, + "loss": 1.2166, + "step": 60543 + }, + { + "epoch": 0.73, + "grad_norm": 14.498488996927286, + "learning_rate": 1.4697225657913703e-05, + "loss": 1.5297, + "step": 60546 + }, + { + "epoch": 0.73, + "grad_norm": 13.776111819841711, + "learning_rate": 1.4696709929297963e-05, + "loss": 1.2892, + "step": 60549 + }, + { + "epoch": 0.73, + "grad_norm": 12.629839504145176, + "learning_rate": 1.4696194184654047e-05, + "loss": 1.0188, + "step": 60552 + }, + { + "epoch": 0.73, + "grad_norm": 29.36230758245151, + "learning_rate": 1.4695678423983711e-05, + "loss": 1.6345, + "step": 60555 + }, + { + "epoch": 0.73, + "grad_norm": 13.734974360332522, + "learning_rate": 1.4695162647288715e-05, + "loss": 1.256, + "step": 60558 + }, + { + "epoch": 0.73, + "grad_norm": 9.139879540910602, + "learning_rate": 1.4694646854570818e-05, + "loss": 1.02, + "step": 60561 + }, + { + "epoch": 0.73, + "grad_norm": 53.42989284718248, + "learning_rate": 1.4694131045831786e-05, + "loss": 1.7964, + "step": 60564 + }, + { + "epoch": 0.73, + "grad_norm": 7.921102842908836, + "learning_rate": 1.4693615221073374e-05, + "loss": 1.3231, + "step": 60567 + }, + { + "epoch": 0.73, + "grad_norm": 9.968073259296066, + "learning_rate": 1.4693099380297345e-05, + "loss": 1.4473, + "step": 60570 + }, + { + "epoch": 0.73, + "grad_norm": 14.661897599047307, + "learning_rate": 1.4692583523505458e-05, + "loss": 1.1613, + "step": 60573 + }, + { + "epoch": 0.73, + "grad_norm": 29.52630062141712, + "learning_rate": 1.4692067650699471e-05, + "loss": 1.6713, + "step": 60576 + }, + { + "epoch": 0.73, + "grad_norm": 25.12200254493817, + "learning_rate": 1.4691551761881151e-05, + "loss": 1.5736, + "step": 60579 + }, + { + "epoch": 0.73, + "grad_norm": 5.287026138350068, + "learning_rate": 1.4691035857052249e-05, + "loss": 1.2356, + "step": 60582 + }, + { + "epoch": 0.73, + "grad_norm": 22.154307029863766, + "learning_rate": 1.4690519936214537e-05, + "loss": 1.1123, + "step": 60585 + }, + { + "epoch": 0.73, + "grad_norm": 6.730492414230601, + "learning_rate": 1.4690003999369768e-05, + "loss": 2.1909, + "step": 60588 + }, + { + "epoch": 0.73, + "grad_norm": 16.814139296163642, + "learning_rate": 1.4689488046519704e-05, + "loss": 1.3962, + "step": 60591 + }, + { + "epoch": 0.73, + "grad_norm": 19.844859531535796, + "learning_rate": 1.4688972077666109e-05, + "loss": 1.6659, + "step": 60594 + }, + { + "epoch": 0.73, + "grad_norm": 8.919800470924285, + "learning_rate": 1.468845609281074e-05, + "loss": 1.5132, + "step": 60597 + }, + { + "epoch": 0.73, + "grad_norm": 9.146964299519807, + "learning_rate": 1.4687940091955359e-05, + "loss": 1.1529, + "step": 60600 + }, + { + "epoch": 0.73, + "grad_norm": 22.12868906302276, + "learning_rate": 1.4687424075101726e-05, + "loss": 1.6146, + "step": 60603 + }, + { + "epoch": 0.73, + "grad_norm": 4.534365586340853, + "learning_rate": 1.4686908042251604e-05, + "loss": 1.3776, + "step": 60606 + }, + { + "epoch": 0.73, + "grad_norm": 10.401638640737396, + "learning_rate": 1.4686391993406752e-05, + "loss": 1.1594, + "step": 60609 + }, + { + "epoch": 0.73, + "grad_norm": 21.753706963044543, + "learning_rate": 1.4685875928568933e-05, + "loss": 1.2019, + "step": 60612 + }, + { + "epoch": 0.73, + "grad_norm": 66.34069590594278, + "learning_rate": 1.4685359847739906e-05, + "loss": 1.6842, + "step": 60615 + }, + { + "epoch": 0.73, + "grad_norm": 37.39355620822403, + "learning_rate": 1.4684843750921436e-05, + "loss": 1.3304, + "step": 60618 + }, + { + "epoch": 0.73, + "grad_norm": 2.7681160679208747, + "learning_rate": 1.4684327638115281e-05, + "loss": 1.5319, + "step": 60621 + }, + { + "epoch": 0.73, + "grad_norm": 9.280350959945274, + "learning_rate": 1.46838115093232e-05, + "loss": 1.2493, + "step": 60624 + }, + { + "epoch": 0.73, + "grad_norm": 8.11395897408178, + "learning_rate": 1.4683295364546962e-05, + "loss": 1.5313, + "step": 60627 + }, + { + "epoch": 0.73, + "grad_norm": 23.347349205264692, + "learning_rate": 1.4682779203788323e-05, + "loss": 1.2426, + "step": 60630 + }, + { + "epoch": 0.73, + "grad_norm": 15.026818598696575, + "learning_rate": 1.4682263027049044e-05, + "loss": 1.4524, + "step": 60633 + }, + { + "epoch": 0.73, + "grad_norm": 8.959848163793671, + "learning_rate": 1.4681746834330888e-05, + "loss": 1.5984, + "step": 60636 + }, + { + "epoch": 0.73, + "grad_norm": 45.43592118146181, + "learning_rate": 1.4681230625635616e-05, + "loss": 1.2019, + "step": 60639 + }, + { + "epoch": 0.73, + "grad_norm": 17.4498915327891, + "learning_rate": 1.468071440096499e-05, + "loss": 1.4037, + "step": 60642 + }, + { + "epoch": 0.73, + "grad_norm": 27.499266608807996, + "learning_rate": 1.468019816032077e-05, + "loss": 0.9491, + "step": 60645 + }, + { + "epoch": 0.73, + "grad_norm": 32.46060699253502, + "learning_rate": 1.4679681903704724e-05, + "loss": 1.348, + "step": 60648 + }, + { + "epoch": 0.73, + "grad_norm": 32.22112290387697, + "learning_rate": 1.4679165631118605e-05, + "loss": 1.6324, + "step": 60651 + }, + { + "epoch": 0.73, + "grad_norm": 15.254677281120225, + "learning_rate": 1.4678649342564184e-05, + "loss": 1.5653, + "step": 60654 + }, + { + "epoch": 0.73, + "grad_norm": 4.840691898839975, + "learning_rate": 1.4678133038043214e-05, + "loss": 1.0259, + "step": 60657 + }, + { + "epoch": 0.73, + "grad_norm": 19.612654185357485, + "learning_rate": 1.4677616717557461e-05, + "loss": 1.4464, + "step": 60660 + }, + { + "epoch": 0.73, + "grad_norm": 4.823075699010471, + "learning_rate": 1.467710038110869e-05, + "loss": 1.1958, + "step": 60663 + }, + { + "epoch": 0.73, + "grad_norm": 6.186775739361566, + "learning_rate": 1.467658402869866e-05, + "loss": 1.2128, + "step": 60666 + }, + { + "epoch": 0.73, + "grad_norm": 33.35029908169653, + "learning_rate": 1.4676067660329128e-05, + "loss": 1.59, + "step": 60669 + }, + { + "epoch": 0.73, + "grad_norm": 11.787280255991337, + "learning_rate": 1.4675551276001866e-05, + "loss": 1.6339, + "step": 60672 + }, + { + "epoch": 0.73, + "grad_norm": 9.056026275690169, + "learning_rate": 1.467503487571863e-05, + "loss": 1.1873, + "step": 60675 + }, + { + "epoch": 0.73, + "grad_norm": 8.295905742539889, + "learning_rate": 1.4674518459481183e-05, + "loss": 1.0009, + "step": 60678 + }, + { + "epoch": 0.73, + "grad_norm": 42.765702295232636, + "learning_rate": 1.4674002027291292e-05, + "loss": 1.4972, + "step": 60681 + }, + { + "epoch": 0.73, + "grad_norm": 8.99886745802856, + "learning_rate": 1.4673485579150712e-05, + "loss": 1.1174, + "step": 60684 + }, + { + "epoch": 0.73, + "grad_norm": 7.631962275176291, + "learning_rate": 1.4672969115061212e-05, + "loss": 1.2619, + "step": 60687 + }, + { + "epoch": 0.73, + "grad_norm": 198.09961504300662, + "learning_rate": 1.4672452635024547e-05, + "loss": 1.3106, + "step": 60690 + }, + { + "epoch": 0.73, + "grad_norm": 62.26187968420647, + "learning_rate": 1.4671936139042489e-05, + "loss": 1.3542, + "step": 60693 + }, + { + "epoch": 0.73, + "grad_norm": 3.3646511742408216, + "learning_rate": 1.4671419627116794e-05, + "loss": 1.1794, + "step": 60696 + }, + { + "epoch": 0.73, + "grad_norm": 17.764812181721243, + "learning_rate": 1.4670903099249226e-05, + "loss": 1.2815, + "step": 60699 + }, + { + "epoch": 0.73, + "grad_norm": 17.860692938420993, + "learning_rate": 1.4670386555441546e-05, + "loss": 1.1101, + "step": 60702 + }, + { + "epoch": 0.73, + "grad_norm": 11.437947983838894, + "learning_rate": 1.4669869995695525e-05, + "loss": 1.5487, + "step": 60705 + }, + { + "epoch": 0.73, + "grad_norm": 6.446581203666429, + "learning_rate": 1.4669353420012915e-05, + "loss": 1.467, + "step": 60708 + }, + { + "epoch": 0.73, + "grad_norm": 17.715689381791208, + "learning_rate": 1.4668836828395482e-05, + "loss": 1.1904, + "step": 60711 + }, + { + "epoch": 0.73, + "grad_norm": 10.023539211889481, + "learning_rate": 1.4668320220844996e-05, + "loss": 1.5788, + "step": 60714 + }, + { + "epoch": 0.73, + "grad_norm": 13.510182707278702, + "learning_rate": 1.4667803597363213e-05, + "loss": 1.3277, + "step": 60717 + }, + { + "epoch": 0.73, + "grad_norm": 9.204656091416847, + "learning_rate": 1.46672869579519e-05, + "loss": 1.5561, + "step": 60720 + }, + { + "epoch": 0.73, + "grad_norm": 36.77857756922088, + "learning_rate": 1.466677030261281e-05, + "loss": 1.3819, + "step": 60723 + }, + { + "epoch": 0.73, + "grad_norm": 14.29785670133939, + "learning_rate": 1.466625363134772e-05, + "loss": 1.2877, + "step": 60726 + }, + { + "epoch": 0.73, + "grad_norm": 26.861870600252455, + "learning_rate": 1.4665736944158386e-05, + "loss": 1.5152, + "step": 60729 + }, + { + "epoch": 0.73, + "grad_norm": 9.255362450017346, + "learning_rate": 1.4665220241046569e-05, + "loss": 1.3353, + "step": 60732 + }, + { + "epoch": 0.73, + "grad_norm": 15.066499725542107, + "learning_rate": 1.466470352201404e-05, + "loss": 1.2729, + "step": 60735 + }, + { + "epoch": 0.73, + "grad_norm": 6.66002820051976, + "learning_rate": 1.4664186787062558e-05, + "loss": 1.3687, + "step": 60738 + }, + { + "epoch": 0.73, + "grad_norm": 14.15916525328195, + "learning_rate": 1.4663670036193885e-05, + "loss": 1.3297, + "step": 60741 + }, + { + "epoch": 0.73, + "grad_norm": 33.920483228888514, + "learning_rate": 1.4663153269409786e-05, + "loss": 1.5721, + "step": 60744 + }, + { + "epoch": 0.73, + "grad_norm": 24.34413448106921, + "learning_rate": 1.4662636486712027e-05, + "loss": 1.4479, + "step": 60747 + }, + { + "epoch": 0.73, + "grad_norm": 13.937275302121249, + "learning_rate": 1.4662119688102365e-05, + "loss": 1.3815, + "step": 60750 + }, + { + "epoch": 0.73, + "grad_norm": 21.362512378159323, + "learning_rate": 1.466160287358257e-05, + "loss": 1.1227, + "step": 60753 + }, + { + "epoch": 0.73, + "grad_norm": 5.070623537329924, + "learning_rate": 1.4661086043154402e-05, + "loss": 1.2959, + "step": 60756 + }, + { + "epoch": 0.73, + "grad_norm": 14.252996133126615, + "learning_rate": 1.466056919681963e-05, + "loss": 1.2387, + "step": 60759 + }, + { + "epoch": 0.73, + "grad_norm": 32.96306374348085, + "learning_rate": 1.466005233458001e-05, + "loss": 1.3354, + "step": 60762 + }, + { + "epoch": 0.73, + "grad_norm": 17.227288638594402, + "learning_rate": 1.465953545643731e-05, + "loss": 1.5139, + "step": 60765 + }, + { + "epoch": 0.73, + "grad_norm": 6.89863217321606, + "learning_rate": 1.46590185623933e-05, + "loss": 1.5199, + "step": 60768 + }, + { + "epoch": 0.73, + "grad_norm": 9.272422678305903, + "learning_rate": 1.465850165244973e-05, + "loss": 1.4745, + "step": 60771 + }, + { + "epoch": 0.73, + "grad_norm": 11.992435996550062, + "learning_rate": 1.4657984726608376e-05, + "loss": 1.5708, + "step": 60774 + }, + { + "epoch": 0.73, + "grad_norm": 39.1834464396513, + "learning_rate": 1.4657467784870996e-05, + "loss": 1.6346, + "step": 60777 + }, + { + "epoch": 0.73, + "grad_norm": 21.970057178424337, + "learning_rate": 1.4656950827239358e-05, + "loss": 1.7851, + "step": 60780 + }, + { + "epoch": 0.73, + "grad_norm": 37.610390176519076, + "learning_rate": 1.4656433853715223e-05, + "loss": 1.386, + "step": 60783 + }, + { + "epoch": 0.73, + "grad_norm": 14.155029616744905, + "learning_rate": 1.4655916864300356e-05, + "loss": 1.1765, + "step": 60786 + }, + { + "epoch": 0.73, + "grad_norm": 2.5185795281343464, + "learning_rate": 1.4655399858996526e-05, + "loss": 1.1491, + "step": 60789 + }, + { + "epoch": 0.73, + "grad_norm": 11.15418305139372, + "learning_rate": 1.4654882837805489e-05, + "loss": 1.0683, + "step": 60792 + }, + { + "epoch": 0.73, + "grad_norm": 4.663275602100514, + "learning_rate": 1.4654365800729013e-05, + "loss": 1.4563, + "step": 60795 + }, + { + "epoch": 0.73, + "grad_norm": 18.599195586868333, + "learning_rate": 1.4653848747768865e-05, + "loss": 1.103, + "step": 60798 + }, + { + "epoch": 0.73, + "grad_norm": 12.297999207035843, + "learning_rate": 1.4653331678926808e-05, + "loss": 1.2517, + "step": 60801 + }, + { + "epoch": 0.73, + "grad_norm": 18.71414716963558, + "learning_rate": 1.4652814594204603e-05, + "loss": 1.2451, + "step": 60804 + }, + { + "epoch": 0.73, + "grad_norm": 14.970518280817686, + "learning_rate": 1.465229749360402e-05, + "loss": 1.3631, + "step": 60807 + }, + { + "epoch": 0.73, + "grad_norm": 21.93478182414932, + "learning_rate": 1.4651780377126823e-05, + "loss": 1.5887, + "step": 60810 + }, + { + "epoch": 0.73, + "grad_norm": 18.88159961956351, + "learning_rate": 1.4651263244774772e-05, + "loss": 1.2299, + "step": 60813 + }, + { + "epoch": 0.73, + "grad_norm": 3.8748725779642923, + "learning_rate": 1.4650746096549634e-05, + "loss": 1.4282, + "step": 60816 + }, + { + "epoch": 0.73, + "grad_norm": 15.174152234415896, + "learning_rate": 1.4650228932453176e-05, + "loss": 1.4927, + "step": 60819 + }, + { + "epoch": 0.73, + "grad_norm": 3.0046515183473934, + "learning_rate": 1.4649711752487168e-05, + "loss": 1.7168, + "step": 60822 + }, + { + "epoch": 0.73, + "grad_norm": 13.504709591922744, + "learning_rate": 1.4649194556653362e-05, + "loss": 1.4614, + "step": 60825 + }, + { + "epoch": 0.73, + "grad_norm": 14.292554411497001, + "learning_rate": 1.464867734495353e-05, + "loss": 1.6162, + "step": 60828 + }, + { + "epoch": 0.73, + "grad_norm": 13.547111224300114, + "learning_rate": 1.4648160117389438e-05, + "loss": 1.2024, + "step": 60831 + }, + { + "epoch": 0.73, + "grad_norm": 11.650735707712451, + "learning_rate": 1.4647642873962849e-05, + "loss": 1.2766, + "step": 60834 + }, + { + "epoch": 0.73, + "grad_norm": 5.8264860799961475, + "learning_rate": 1.4647125614675527e-05, + "loss": 1.4518, + "step": 60837 + }, + { + "epoch": 0.73, + "grad_norm": 204.16339247030191, + "learning_rate": 1.464660833952924e-05, + "loss": 1.2787, + "step": 60840 + }, + { + "epoch": 0.73, + "grad_norm": 5.932059036239561, + "learning_rate": 1.464609104852576e-05, + "loss": 1.1365, + "step": 60843 + }, + { + "epoch": 0.73, + "grad_norm": 11.006824674562035, + "learning_rate": 1.4645573741666835e-05, + "loss": 1.6845, + "step": 60846 + }, + { + "epoch": 0.73, + "grad_norm": 7.523725873952103, + "learning_rate": 1.4645056418954245e-05, + "loss": 1.0191, + "step": 60849 + }, + { + "epoch": 0.73, + "grad_norm": 11.500533349680406, + "learning_rate": 1.464453908038975e-05, + "loss": 1.3386, + "step": 60852 + }, + { + "epoch": 0.73, + "grad_norm": 28.797123544996364, + "learning_rate": 1.4644021725975115e-05, + "loss": 1.2472, + "step": 60855 + }, + { + "epoch": 0.73, + "grad_norm": 15.313923900624063, + "learning_rate": 1.4643504355712105e-05, + "loss": 1.5009, + "step": 60858 + }, + { + "epoch": 0.73, + "grad_norm": 10.057347822674938, + "learning_rate": 1.4642986969602488e-05, + "loss": 1.9355, + "step": 60861 + }, + { + "epoch": 0.73, + "grad_norm": 26.30524740124375, + "learning_rate": 1.4642469567648033e-05, + "loss": 1.3716, + "step": 60864 + }, + { + "epoch": 0.73, + "grad_norm": 9.230731663704887, + "learning_rate": 1.4641952149850496e-05, + "loss": 1.5853, + "step": 60867 + }, + { + "epoch": 0.73, + "grad_norm": 2.869714841769577, + "learning_rate": 1.4641434716211653e-05, + "loss": 1.4967, + "step": 60870 + }, + { + "epoch": 0.73, + "grad_norm": 21.875736181956263, + "learning_rate": 1.464091726673326e-05, + "loss": 1.5469, + "step": 60873 + }, + { + "epoch": 0.73, + "grad_norm": 11.529581278562993, + "learning_rate": 1.4640399801417096e-05, + "loss": 1.2744, + "step": 60876 + }, + { + "epoch": 0.73, + "grad_norm": 11.71323091020378, + "learning_rate": 1.4639882320264912e-05, + "loss": 1.5309, + "step": 60879 + }, + { + "epoch": 0.73, + "grad_norm": 6.451004606625156, + "learning_rate": 1.4639364823278483e-05, + "loss": 1.34, + "step": 60882 + }, + { + "epoch": 0.73, + "grad_norm": 7.13964265544583, + "learning_rate": 1.4638847310459574e-05, + "loss": 1.6117, + "step": 60885 + }, + { + "epoch": 0.73, + "grad_norm": 16.956044588133626, + "learning_rate": 1.4638329781809948e-05, + "loss": 1.3717, + "step": 60888 + }, + { + "epoch": 0.73, + "grad_norm": 10.05666142296606, + "learning_rate": 1.4637812237331375e-05, + "loss": 1.202, + "step": 60891 + }, + { + "epoch": 0.73, + "grad_norm": 2.6882646774521897, + "learning_rate": 1.4637294677025615e-05, + "loss": 1.6466, + "step": 60894 + }, + { + "epoch": 0.73, + "grad_norm": 9.896815073244337, + "learning_rate": 1.4636777100894445e-05, + "loss": 1.15, + "step": 60897 + }, + { + "epoch": 0.73, + "grad_norm": 24.41521340095698, + "learning_rate": 1.4636259508939621e-05, + "loss": 1.4508, + "step": 60900 + }, + { + "epoch": 0.73, + "grad_norm": 10.315767115775069, + "learning_rate": 1.4635741901162916e-05, + "loss": 1.0392, + "step": 60903 + }, + { + "epoch": 0.73, + "grad_norm": 21.444678087790052, + "learning_rate": 1.463522427756609e-05, + "loss": 1.3203, + "step": 60906 + }, + { + "epoch": 0.73, + "grad_norm": 4.404172677665787, + "learning_rate": 1.4634706638150918e-05, + "loss": 1.3641, + "step": 60909 + }, + { + "epoch": 0.73, + "grad_norm": 10.13193351738529, + "learning_rate": 1.4634188982919158e-05, + "loss": 1.7296, + "step": 60912 + }, + { + "epoch": 0.73, + "grad_norm": 5.556135446488374, + "learning_rate": 1.4633671311872579e-05, + "loss": 1.4358, + "step": 60915 + }, + { + "epoch": 0.73, + "grad_norm": 6.0741150457424204, + "learning_rate": 1.4633153625012953e-05, + "loss": 1.3003, + "step": 60918 + }, + { + "epoch": 0.73, + "grad_norm": 10.995150202525117, + "learning_rate": 1.463263592234204e-05, + "loss": 1.3748, + "step": 60921 + }, + { + "epoch": 0.73, + "grad_norm": 8.057301372586089, + "learning_rate": 1.4632118203861609e-05, + "loss": 1.2435, + "step": 60924 + }, + { + "epoch": 0.73, + "grad_norm": 46.22707329820375, + "learning_rate": 1.4631600469573428e-05, + "loss": 1.167, + "step": 60927 + }, + { + "epoch": 0.73, + "grad_norm": 15.55850591116558, + "learning_rate": 1.4631082719479264e-05, + "loss": 1.3597, + "step": 60930 + }, + { + "epoch": 0.73, + "grad_norm": 9.967267056817997, + "learning_rate": 1.463056495358088e-05, + "loss": 1.3404, + "step": 60933 + }, + { + "epoch": 0.73, + "grad_norm": 44.767886097685434, + "learning_rate": 1.4630047171880048e-05, + "loss": 1.8853, + "step": 60936 + }, + { + "epoch": 0.73, + "grad_norm": 17.43949068843066, + "learning_rate": 1.4629529374378533e-05, + "loss": 1.6882, + "step": 60939 + }, + { + "epoch": 0.73, + "grad_norm": 42.47758554663169, + "learning_rate": 1.4629011561078099e-05, + "loss": 1.275, + "step": 60942 + }, + { + "epoch": 0.73, + "grad_norm": 8.946449695126008, + "learning_rate": 1.4628493731980518e-05, + "loss": 1.6263, + "step": 60945 + }, + { + "epoch": 0.73, + "grad_norm": 4.342015192274582, + "learning_rate": 1.4627975887087553e-05, + "loss": 1.3361, + "step": 60948 + }, + { + "epoch": 0.73, + "grad_norm": 15.284273739556072, + "learning_rate": 1.4627458026400975e-05, + "loss": 1.2194, + "step": 60951 + }, + { + "epoch": 0.73, + "grad_norm": 6.308853345789456, + "learning_rate": 1.462694014992255e-05, + "loss": 1.3407, + "step": 60954 + }, + { + "epoch": 0.73, + "grad_norm": 9.534404025613165, + "learning_rate": 1.4626422257654044e-05, + "loss": 1.0097, + "step": 60957 + }, + { + "epoch": 0.73, + "grad_norm": 7.075534969403971, + "learning_rate": 1.4625904349597226e-05, + "loss": 1.3204, + "step": 60960 + }, + { + "epoch": 0.73, + "grad_norm": 21.29337843949467, + "learning_rate": 1.4625386425753864e-05, + "loss": 0.9862, + "step": 60963 + }, + { + "epoch": 0.73, + "grad_norm": 32.05870031055737, + "learning_rate": 1.462486848612572e-05, + "loss": 1.1937, + "step": 60966 + }, + { + "epoch": 0.73, + "grad_norm": 7.2046643568474495, + "learning_rate": 1.462435053071457e-05, + "loss": 1.628, + "step": 60969 + }, + { + "epoch": 0.73, + "grad_norm": 18.500105979609025, + "learning_rate": 1.4623832559522175e-05, + "loss": 1.4221, + "step": 60972 + }, + { + "epoch": 0.73, + "grad_norm": 18.983307706796342, + "learning_rate": 1.4623314572550304e-05, + "loss": 1.754, + "step": 60975 + }, + { + "epoch": 0.73, + "grad_norm": 10.797725633724337, + "learning_rate": 1.4622796569800728e-05, + "loss": 1.4688, + "step": 60978 + }, + { + "epoch": 0.73, + "grad_norm": 16.889825546690854, + "learning_rate": 1.4622278551275212e-05, + "loss": 1.8069, + "step": 60981 + }, + { + "epoch": 0.73, + "grad_norm": 26.88647296055886, + "learning_rate": 1.4621760516975524e-05, + "loss": 1.3973, + "step": 60984 + }, + { + "epoch": 0.73, + "grad_norm": 17.765605790996027, + "learning_rate": 1.4621242466903432e-05, + "loss": 1.6236, + "step": 60987 + }, + { + "epoch": 0.73, + "grad_norm": 27.36925494715013, + "learning_rate": 1.4620724401060705e-05, + "loss": 1.3704, + "step": 60990 + }, + { + "epoch": 0.73, + "grad_norm": 21.959462344481185, + "learning_rate": 1.462020631944911e-05, + "loss": 1.4072, + "step": 60993 + }, + { + "epoch": 0.73, + "grad_norm": 5.372515198759975, + "learning_rate": 1.4619688222070411e-05, + "loss": 0.9914, + "step": 60996 + }, + { + "epoch": 0.73, + "grad_norm": 8.207037714619226, + "learning_rate": 1.4619170108926386e-05, + "loss": 1.6132, + "step": 60999 + }, + { + "epoch": 0.73, + "grad_norm": 4.612084726173163, + "learning_rate": 1.4618651980018794e-05, + "loss": 1.1884, + "step": 61002 + }, + { + "epoch": 0.73, + "grad_norm": 6.04539926118858, + "learning_rate": 1.4618133835349407e-05, + "loss": 1.6486, + "step": 61005 + }, + { + "epoch": 0.73, + "grad_norm": 5.042695681855023, + "learning_rate": 1.461761567491999e-05, + "loss": 1.1313, + "step": 61008 + }, + { + "epoch": 0.73, + "grad_norm": 24.230593569758614, + "learning_rate": 1.4617097498732318e-05, + "loss": 1.2365, + "step": 61011 + }, + { + "epoch": 0.73, + "grad_norm": 22.190041728786653, + "learning_rate": 1.4616579306788155e-05, + "loss": 1.6316, + "step": 61014 + }, + { + "epoch": 0.73, + "grad_norm": 6.044517749269459, + "learning_rate": 1.4616061099089267e-05, + "loss": 1.2726, + "step": 61017 + }, + { + "epoch": 0.73, + "grad_norm": 9.941486966564389, + "learning_rate": 1.4615542875637427e-05, + "loss": 1.5392, + "step": 61020 + }, + { + "epoch": 0.73, + "grad_norm": 2.2348940779565414, + "learning_rate": 1.46150246364344e-05, + "loss": 1.3583, + "step": 61023 + }, + { + "epoch": 0.73, + "grad_norm": 22.963256044191972, + "learning_rate": 1.4614506381481959e-05, + "loss": 1.2119, + "step": 61026 + }, + { + "epoch": 0.73, + "grad_norm": 6.869410397710932, + "learning_rate": 1.4613988110781867e-05, + "loss": 1.3336, + "step": 61029 + }, + { + "epoch": 0.73, + "grad_norm": 9.026289415165442, + "learning_rate": 1.4613469824335898e-05, + "loss": 1.0552, + "step": 61032 + }, + { + "epoch": 0.73, + "grad_norm": 5.680074272104276, + "learning_rate": 1.4612951522145817e-05, + "loss": 1.2904, + "step": 61035 + }, + { + "epoch": 0.73, + "grad_norm": 13.389415908732351, + "learning_rate": 1.4612433204213394e-05, + "loss": 1.1406, + "step": 61038 + }, + { + "epoch": 0.73, + "grad_norm": 23.21784043313647, + "learning_rate": 1.4611914870540398e-05, + "loss": 1.474, + "step": 61041 + }, + { + "epoch": 0.73, + "grad_norm": 16.52792575999567, + "learning_rate": 1.4611396521128597e-05, + "loss": 1.6069, + "step": 61044 + }, + { + "epoch": 0.73, + "grad_norm": 15.966659347388848, + "learning_rate": 1.4610878155979764e-05, + "loss": 1.2049, + "step": 61047 + }, + { + "epoch": 0.73, + "grad_norm": 9.54156175753744, + "learning_rate": 1.4610359775095662e-05, + "loss": 1.3935, + "step": 61050 + }, + { + "epoch": 0.73, + "grad_norm": 49.54670723195853, + "learning_rate": 1.4609841378478062e-05, + "loss": 1.3196, + "step": 61053 + }, + { + "epoch": 0.73, + "grad_norm": 17.271509678761745, + "learning_rate": 1.4609322966128737e-05, + "loss": 1.4204, + "step": 61056 + }, + { + "epoch": 0.73, + "grad_norm": 16.76322567747836, + "learning_rate": 1.4608804538049449e-05, + "loss": 1.2446, + "step": 61059 + }, + { + "epoch": 0.73, + "grad_norm": 15.05895057242544, + "learning_rate": 1.4608286094241972e-05, + "loss": 1.4545, + "step": 61062 + }, + { + "epoch": 0.73, + "grad_norm": 8.780726233222786, + "learning_rate": 1.4607767634708077e-05, + "loss": 1.291, + "step": 61065 + }, + { + "epoch": 0.73, + "grad_norm": 9.33055689257032, + "learning_rate": 1.4607249159449528e-05, + "loss": 1.603, + "step": 61068 + }, + { + "epoch": 0.73, + "grad_norm": 164.76316127913222, + "learning_rate": 1.46067306684681e-05, + "loss": 1.5114, + "step": 61071 + }, + { + "epoch": 0.73, + "grad_norm": 6.625768138280767, + "learning_rate": 1.4606212161765556e-05, + "loss": 1.1693, + "step": 61074 + }, + { + "epoch": 0.73, + "grad_norm": 30.674045838925245, + "learning_rate": 1.4605693639343673e-05, + "loss": 1.2552, + "step": 61077 + }, + { + "epoch": 0.73, + "grad_norm": 12.284513624954682, + "learning_rate": 1.4605175101204213e-05, + "loss": 1.4664, + "step": 61080 + }, + { + "epoch": 0.73, + "grad_norm": 23.391513332086216, + "learning_rate": 1.460465654734895e-05, + "loss": 1.4622, + "step": 61083 + }, + { + "epoch": 0.73, + "grad_norm": 99.7048437336834, + "learning_rate": 1.4604137977779653e-05, + "loss": 1.4237, + "step": 61086 + }, + { + "epoch": 0.73, + "grad_norm": 3.317675625735405, + "learning_rate": 1.4603619392498094e-05, + "loss": 1.5145, + "step": 61089 + }, + { + "epoch": 0.73, + "grad_norm": 58.0767839302068, + "learning_rate": 1.4603100791506037e-05, + "loss": 1.2697, + "step": 61092 + }, + { + "epoch": 0.73, + "grad_norm": 22.5413078251027, + "learning_rate": 1.4602582174805256e-05, + "loss": 1.7126, + "step": 61095 + }, + { + "epoch": 0.73, + "grad_norm": 6.903550162147489, + "learning_rate": 1.460206354239752e-05, + "loss": 1.3979, + "step": 61098 + }, + { + "epoch": 0.73, + "grad_norm": 29.116922748633883, + "learning_rate": 1.4601544894284598e-05, + "loss": 1.4384, + "step": 61101 + }, + { + "epoch": 0.73, + "grad_norm": 9.65765566470661, + "learning_rate": 1.4601026230468262e-05, + "loss": 1.3144, + "step": 61104 + }, + { + "epoch": 0.73, + "grad_norm": 4.073151018613468, + "learning_rate": 1.4600507550950281e-05, + "loss": 1.4819, + "step": 61107 + }, + { + "epoch": 0.73, + "grad_norm": 14.141502415779314, + "learning_rate": 1.4599988855732426e-05, + "loss": 1.0312, + "step": 61110 + }, + { + "epoch": 0.73, + "grad_norm": 22.675439660890284, + "learning_rate": 1.4599470144816463e-05, + "loss": 1.4323, + "step": 61113 + }, + { + "epoch": 0.73, + "grad_norm": 15.541931975731824, + "learning_rate": 1.4598951418204164e-05, + "loss": 1.6277, + "step": 61116 + }, + { + "epoch": 0.73, + "grad_norm": 9.374445510217635, + "learning_rate": 1.4598432675897303e-05, + "loss": 0.8837, + "step": 61119 + }, + { + "epoch": 0.73, + "grad_norm": 16.277068536844425, + "learning_rate": 1.4597913917897648e-05, + "loss": 1.21, + "step": 61122 + }, + { + "epoch": 0.74, + "grad_norm": 7.706701359374387, + "learning_rate": 1.4597395144206968e-05, + "loss": 1.7714, + "step": 61125 + }, + { + "epoch": 0.74, + "grad_norm": 34.16039221559217, + "learning_rate": 1.4596876354827035e-05, + "loss": 1.1445, + "step": 61128 + }, + { + "epoch": 0.74, + "grad_norm": 28.068059531488434, + "learning_rate": 1.4596357549759618e-05, + "loss": 1.462, + "step": 61131 + }, + { + "epoch": 0.74, + "grad_norm": 42.807731020602745, + "learning_rate": 1.4595838729006491e-05, + "loss": 1.2423, + "step": 61134 + }, + { + "epoch": 0.74, + "grad_norm": 16.241909993889834, + "learning_rate": 1.4595319892569417e-05, + "loss": 1.4615, + "step": 61137 + }, + { + "epoch": 0.74, + "grad_norm": 19.460511771741327, + "learning_rate": 1.4594801040450173e-05, + "loss": 1.255, + "step": 61140 + }, + { + "epoch": 0.74, + "grad_norm": 14.507587006643098, + "learning_rate": 1.4594282172650532e-05, + "loss": 1.1521, + "step": 61143 + }, + { + "epoch": 0.74, + "grad_norm": 29.5777135346815, + "learning_rate": 1.4593763289172259e-05, + "loss": 1.4074, + "step": 61146 + }, + { + "epoch": 0.74, + "grad_norm": 3.2355286644636, + "learning_rate": 1.459324439001712e-05, + "loss": 1.6228, + "step": 61149 + }, + { + "epoch": 0.74, + "grad_norm": 9.693885038432933, + "learning_rate": 1.45927254751869e-05, + "loss": 1.5682, + "step": 61152 + }, + { + "epoch": 0.74, + "grad_norm": 17.23076038384963, + "learning_rate": 1.459220654468336e-05, + "loss": 1.4614, + "step": 61155 + }, + { + "epoch": 0.74, + "grad_norm": 91.7822279894541, + "learning_rate": 1.4591687598508274e-05, + "loss": 1.1939, + "step": 61158 + }, + { + "epoch": 0.74, + "grad_norm": 18.743457419946843, + "learning_rate": 1.459116863666341e-05, + "loss": 1.3828, + "step": 61161 + }, + { + "epoch": 0.74, + "grad_norm": 9.242520176584096, + "learning_rate": 1.4590649659150543e-05, + "loss": 1.255, + "step": 61164 + }, + { + "epoch": 0.74, + "grad_norm": 28.22721071779406, + "learning_rate": 1.4590130665971442e-05, + "loss": 1.4694, + "step": 61167 + }, + { + "epoch": 0.74, + "grad_norm": 9.507396690609186, + "learning_rate": 1.4589611657127876e-05, + "loss": 1.0034, + "step": 61170 + }, + { + "epoch": 0.74, + "grad_norm": 5.9410062036106375, + "learning_rate": 1.4589092632621622e-05, + "loss": 1.3965, + "step": 61173 + }, + { + "epoch": 0.74, + "grad_norm": 9.722855639476704, + "learning_rate": 1.4588573592454446e-05, + "loss": 1.3445, + "step": 61176 + }, + { + "epoch": 0.74, + "grad_norm": 41.62269677432359, + "learning_rate": 1.4588054536628123e-05, + "loss": 0.952, + "step": 61179 + }, + { + "epoch": 0.74, + "grad_norm": 23.27288496579051, + "learning_rate": 1.458753546514442e-05, + "loss": 1.5262, + "step": 61182 + }, + { + "epoch": 0.74, + "grad_norm": 17.190245036118746, + "learning_rate": 1.4587016378005112e-05, + "loss": 1.2723, + "step": 61185 + }, + { + "epoch": 0.74, + "grad_norm": 41.56850033621979, + "learning_rate": 1.458649727521197e-05, + "loss": 1.366, + "step": 61188 + }, + { + "epoch": 0.74, + "grad_norm": 10.902355500582951, + "learning_rate": 1.4585978156766763e-05, + "loss": 1.0691, + "step": 61191 + }, + { + "epoch": 0.74, + "grad_norm": 8.306553929262435, + "learning_rate": 1.4585459022671264e-05, + "loss": 1.2017, + "step": 61194 + }, + { + "epoch": 0.74, + "grad_norm": 11.585979499305113, + "learning_rate": 1.458493987292725e-05, + "loss": 1.6259, + "step": 61197 + }, + { + "epoch": 0.74, + "grad_norm": 5.708882399013304, + "learning_rate": 1.4584420707536484e-05, + "loss": 1.3836, + "step": 61200 + }, + { + "epoch": 0.74, + "grad_norm": 7.411448973295081, + "learning_rate": 1.458390152650074e-05, + "loss": 1.4714, + "step": 61203 + }, + { + "epoch": 0.74, + "grad_norm": 10.213523767950461, + "learning_rate": 1.4583382329821793e-05, + "loss": 1.4921, + "step": 61206 + }, + { + "epoch": 0.74, + "grad_norm": 8.507428478698488, + "learning_rate": 1.4582863117501413e-05, + "loss": 1.9099, + "step": 61209 + }, + { + "epoch": 0.74, + "grad_norm": 7.267354533717216, + "learning_rate": 1.458234388954137e-05, + "loss": 1.3494, + "step": 61212 + }, + { + "epoch": 0.74, + "grad_norm": 26.286338490264676, + "learning_rate": 1.4581824645943442e-05, + "loss": 1.1077, + "step": 61215 + }, + { + "epoch": 0.74, + "grad_norm": 28.344393818940055, + "learning_rate": 1.4581305386709395e-05, + "loss": 1.2872, + "step": 61218 + }, + { + "epoch": 0.74, + "grad_norm": 11.73672239674694, + "learning_rate": 1.4580786111841003e-05, + "loss": 1.3134, + "step": 61221 + }, + { + "epoch": 0.74, + "grad_norm": 8.203196061243462, + "learning_rate": 1.4580266821340036e-05, + "loss": 1.0744, + "step": 61224 + }, + { + "epoch": 0.74, + "grad_norm": 13.464450667969558, + "learning_rate": 1.4579747515208272e-05, + "loss": 1.0588, + "step": 61227 + }, + { + "epoch": 0.74, + "grad_norm": 20.147573625766075, + "learning_rate": 1.4579228193447476e-05, + "loss": 1.3381, + "step": 61230 + }, + { + "epoch": 0.74, + "grad_norm": 6.494323489039543, + "learning_rate": 1.4578708856059422e-05, + "loss": 1.5309, + "step": 61233 + }, + { + "epoch": 0.74, + "grad_norm": 6.8054376837193855, + "learning_rate": 1.4578189503045888e-05, + "loss": 1.2255, + "step": 61236 + }, + { + "epoch": 0.74, + "grad_norm": 7.50994978797809, + "learning_rate": 1.457767013440864e-05, + "loss": 1.7953, + "step": 61239 + }, + { + "epoch": 0.74, + "grad_norm": 5.136610657279085, + "learning_rate": 1.4577150750149456e-05, + "loss": 1.3452, + "step": 61242 + }, + { + "epoch": 0.74, + "grad_norm": 4.882457470495338, + "learning_rate": 1.4576631350270101e-05, + "loss": 1.4471, + "step": 61245 + }, + { + "epoch": 0.74, + "grad_norm": 6.740865949811273, + "learning_rate": 1.4576111934772355e-05, + "loss": 1.1342, + "step": 61248 + }, + { + "epoch": 0.74, + "grad_norm": 22.449821873534166, + "learning_rate": 1.4575592503657986e-05, + "loss": 1.2714, + "step": 61251 + }, + { + "epoch": 0.74, + "grad_norm": 21.89721546280914, + "learning_rate": 1.4575073056928766e-05, + "loss": 1.5341, + "step": 61254 + }, + { + "epoch": 0.74, + "grad_norm": 8.179764908190117, + "learning_rate": 1.4574553594586469e-05, + "loss": 1.6367, + "step": 61257 + }, + { + "epoch": 0.74, + "grad_norm": 33.525166740220556, + "learning_rate": 1.4574034116632872e-05, + "loss": 1.4072, + "step": 61260 + }, + { + "epoch": 0.74, + "grad_norm": 17.893709569428314, + "learning_rate": 1.4573514623069742e-05, + "loss": 1.1153, + "step": 61263 + }, + { + "epoch": 0.74, + "grad_norm": 26.720565390525138, + "learning_rate": 1.4572995113898854e-05, + "loss": 1.1322, + "step": 61266 + }, + { + "epoch": 0.74, + "grad_norm": 15.103042712918048, + "learning_rate": 1.4572475589121984e-05, + "loss": 1.5686, + "step": 61269 + }, + { + "epoch": 0.74, + "grad_norm": 10.496829004239569, + "learning_rate": 1.4571956048740898e-05, + "loss": 1.7183, + "step": 61272 + }, + { + "epoch": 0.74, + "grad_norm": 8.432355167119757, + "learning_rate": 1.4571436492757374e-05, + "loss": 1.2481, + "step": 61275 + }, + { + "epoch": 0.74, + "grad_norm": 10.82024655682526, + "learning_rate": 1.4570916921173184e-05, + "loss": 1.2747, + "step": 61278 + }, + { + "epoch": 0.74, + "grad_norm": 8.342162525614919, + "learning_rate": 1.4570397333990103e-05, + "loss": 1.3835, + "step": 61281 + }, + { + "epoch": 0.74, + "grad_norm": 6.459125405217863, + "learning_rate": 1.4569877731209899e-05, + "loss": 1.2323, + "step": 61284 + }, + { + "epoch": 0.74, + "grad_norm": 15.262968240801671, + "learning_rate": 1.4569358112834346e-05, + "loss": 1.4408, + "step": 61287 + }, + { + "epoch": 0.74, + "grad_norm": 21.630385871625908, + "learning_rate": 1.4568838478865224e-05, + "loss": 1.4929, + "step": 61290 + }, + { + "epoch": 0.74, + "grad_norm": 28.19915810333012, + "learning_rate": 1.4568318829304302e-05, + "loss": 1.3204, + "step": 61293 + }, + { + "epoch": 0.74, + "grad_norm": 18.17843548253171, + "learning_rate": 1.4567799164153354e-05, + "loss": 1.6592, + "step": 61296 + }, + { + "epoch": 0.74, + "grad_norm": 6.918638455179696, + "learning_rate": 1.4567279483414147e-05, + "loss": 1.6648, + "step": 61299 + }, + { + "epoch": 0.74, + "grad_norm": 4.276141061019166, + "learning_rate": 1.4566759787088467e-05, + "loss": 1.2973, + "step": 61302 + }, + { + "epoch": 0.74, + "grad_norm": 12.543125217084224, + "learning_rate": 1.4566240075178078e-05, + "loss": 1.3312, + "step": 61305 + }, + { + "epoch": 0.74, + "grad_norm": 8.481889476628666, + "learning_rate": 1.4565720347684758e-05, + "loss": 1.2979, + "step": 61308 + }, + { + "epoch": 0.74, + "grad_norm": 7.417351116978046, + "learning_rate": 1.4565200604610275e-05, + "loss": 1.2289, + "step": 61311 + }, + { + "epoch": 0.74, + "grad_norm": 15.671490429670502, + "learning_rate": 1.4564680845956409e-05, + "loss": 1.0609, + "step": 61314 + }, + { + "epoch": 0.74, + "grad_norm": 26.170380801225793, + "learning_rate": 1.4564161071724934e-05, + "loss": 1.0622, + "step": 61317 + }, + { + "epoch": 0.74, + "grad_norm": 20.004804150552435, + "learning_rate": 1.4563641281917616e-05, + "loss": 1.3877, + "step": 61320 + }, + { + "epoch": 0.74, + "grad_norm": 2.6586041907910443, + "learning_rate": 1.4563121476536239e-05, + "loss": 1.5707, + "step": 61323 + }, + { + "epoch": 0.74, + "grad_norm": 23.464834837552175, + "learning_rate": 1.4562601655582569e-05, + "loss": 1.7715, + "step": 61326 + }, + { + "epoch": 0.74, + "grad_norm": 7.154549379570964, + "learning_rate": 1.4562081819058387e-05, + "loss": 1.6432, + "step": 61329 + }, + { + "epoch": 0.74, + "grad_norm": 9.22488601918341, + "learning_rate": 1.4561561966965458e-05, + "loss": 1.2114, + "step": 61332 + }, + { + "epoch": 0.74, + "grad_norm": 4.962367077419726, + "learning_rate": 1.4561042099305565e-05, + "loss": 1.5179, + "step": 61335 + }, + { + "epoch": 0.74, + "grad_norm": 8.662718928684846, + "learning_rate": 1.4560522216080476e-05, + "loss": 1.3095, + "step": 61338 + }, + { + "epoch": 0.74, + "grad_norm": 11.84118852496997, + "learning_rate": 1.4560002317291966e-05, + "loss": 1.2129, + "step": 61341 + }, + { + "epoch": 0.74, + "grad_norm": 7.691693531180237, + "learning_rate": 1.4559482402941814e-05, + "loss": 1.2909, + "step": 61344 + }, + { + "epoch": 0.74, + "grad_norm": 6.341310993599932, + "learning_rate": 1.4558962473031789e-05, + "loss": 1.1013, + "step": 61347 + }, + { + "epoch": 0.74, + "grad_norm": 10.515351903487979, + "learning_rate": 1.4558442527563666e-05, + "loss": 1.5075, + "step": 61350 + }, + { + "epoch": 0.74, + "grad_norm": 37.74118432318822, + "learning_rate": 1.455792256653922e-05, + "loss": 1.5691, + "step": 61353 + }, + { + "epoch": 0.74, + "grad_norm": 9.457018509719429, + "learning_rate": 1.4557402589960233e-05, + "loss": 1.4611, + "step": 61356 + }, + { + "epoch": 0.74, + "grad_norm": 14.688679820917732, + "learning_rate": 1.4556882597828465e-05, + "loss": 1.1649, + "step": 61359 + }, + { + "epoch": 0.74, + "grad_norm": 18.72960100980903, + "learning_rate": 1.45563625901457e-05, + "loss": 1.5387, + "step": 61362 + }, + { + "epoch": 0.74, + "grad_norm": 25.640464767423428, + "learning_rate": 1.4555842566913714e-05, + "loss": 1.0924, + "step": 61365 + }, + { + "epoch": 0.74, + "grad_norm": 5.659963984987544, + "learning_rate": 1.4555322528134273e-05, + "loss": 1.5794, + "step": 61368 + }, + { + "epoch": 0.74, + "grad_norm": 21.67444535917225, + "learning_rate": 1.455480247380916e-05, + "loss": 1.5052, + "step": 61371 + }, + { + "epoch": 0.74, + "grad_norm": 9.068043527656583, + "learning_rate": 1.4554282403940146e-05, + "loss": 1.5529, + "step": 61374 + }, + { + "epoch": 0.74, + "grad_norm": 6.748806955174262, + "learning_rate": 1.4553762318529007e-05, + "loss": 1.5216, + "step": 61377 + }, + { + "epoch": 0.74, + "grad_norm": 10.42889855218432, + "learning_rate": 1.4553242217577519e-05, + "loss": 1.6057, + "step": 61380 + }, + { + "epoch": 0.74, + "grad_norm": 13.045287942463727, + "learning_rate": 1.4552722101087454e-05, + "loss": 1.2958, + "step": 61383 + }, + { + "epoch": 0.74, + "grad_norm": 11.242020168896143, + "learning_rate": 1.4552201969060588e-05, + "loss": 1.0798, + "step": 61386 + }, + { + "epoch": 0.74, + "grad_norm": 4.9137789814129835, + "learning_rate": 1.4551681821498698e-05, + "loss": 1.4187, + "step": 61389 + }, + { + "epoch": 0.74, + "grad_norm": 3.0763914692391117, + "learning_rate": 1.4551161658403554e-05, + "loss": 1.5443, + "step": 61392 + }, + { + "epoch": 0.74, + "grad_norm": 32.70985395182816, + "learning_rate": 1.4550641479776935e-05, + "loss": 1.8287, + "step": 61395 + }, + { + "epoch": 0.74, + "grad_norm": 13.282259305299894, + "learning_rate": 1.455012128562062e-05, + "loss": 1.5027, + "step": 61398 + }, + { + "epoch": 0.74, + "grad_norm": 14.047021948682174, + "learning_rate": 1.4549601075936376e-05, + "loss": 0.9597, + "step": 61401 + }, + { + "epoch": 0.74, + "grad_norm": 76.52283699925464, + "learning_rate": 1.4549080850725985e-05, + "loss": 1.4918, + "step": 61404 + }, + { + "epoch": 0.74, + "grad_norm": 13.986366127169894, + "learning_rate": 1.4548560609991218e-05, + "loss": 1.3276, + "step": 61407 + }, + { + "epoch": 0.74, + "grad_norm": 15.410602404158386, + "learning_rate": 1.4548040353733854e-05, + "loss": 1.6503, + "step": 61410 + }, + { + "epoch": 0.74, + "grad_norm": 40.308174118877105, + "learning_rate": 1.4547520081955664e-05, + "loss": 0.9893, + "step": 61413 + }, + { + "epoch": 0.74, + "grad_norm": 9.509623238179556, + "learning_rate": 1.4546999794658426e-05, + "loss": 1.111, + "step": 61416 + }, + { + "epoch": 0.74, + "grad_norm": 14.076649334956553, + "learning_rate": 1.4546479491843917e-05, + "loss": 1.3623, + "step": 61419 + }, + { + "epoch": 0.74, + "grad_norm": 25.379407356303105, + "learning_rate": 1.4545959173513909e-05, + "loss": 1.849, + "step": 61422 + }, + { + "epoch": 0.74, + "grad_norm": 16.771687816718742, + "learning_rate": 1.4545438839670183e-05, + "loss": 1.505, + "step": 61425 + }, + { + "epoch": 0.74, + "grad_norm": 7.426200389298609, + "learning_rate": 1.454491849031451e-05, + "loss": 1.3592, + "step": 61428 + }, + { + "epoch": 0.74, + "grad_norm": 14.461398255370899, + "learning_rate": 1.4544398125448666e-05, + "loss": 1.3923, + "step": 61431 + }, + { + "epoch": 0.74, + "grad_norm": 6.305422284926244, + "learning_rate": 1.4543877745074428e-05, + "loss": 1.452, + "step": 61434 + }, + { + "epoch": 0.74, + "grad_norm": 5.812720691281785, + "learning_rate": 1.4543357349193574e-05, + "loss": 1.4198, + "step": 61437 + }, + { + "epoch": 0.74, + "grad_norm": 16.531165855035393, + "learning_rate": 1.4542836937807878e-05, + "loss": 1.3737, + "step": 61440 + }, + { + "epoch": 0.74, + "grad_norm": 30.66672960481962, + "learning_rate": 1.4542316510919115e-05, + "loss": 1.225, + "step": 61443 + }, + { + "epoch": 0.74, + "grad_norm": 13.774705375200172, + "learning_rate": 1.4541796068529061e-05, + "loss": 1.1068, + "step": 61446 + }, + { + "epoch": 0.74, + "grad_norm": 52.076080308495364, + "learning_rate": 1.4541275610639493e-05, + "loss": 1.4961, + "step": 61449 + }, + { + "epoch": 0.74, + "grad_norm": 26.312562759451765, + "learning_rate": 1.4540755137252189e-05, + "loss": 1.5207, + "step": 61452 + }, + { + "epoch": 0.74, + "grad_norm": 13.61711523777739, + "learning_rate": 1.454023464836892e-05, + "loss": 1.4608, + "step": 61455 + }, + { + "epoch": 0.74, + "grad_norm": 10.613623550309972, + "learning_rate": 1.4539714143991468e-05, + "loss": 1.5658, + "step": 61458 + }, + { + "epoch": 0.74, + "grad_norm": 14.374042456932871, + "learning_rate": 1.4539193624121608e-05, + "loss": 1.1851, + "step": 61461 + }, + { + "epoch": 0.74, + "grad_norm": 9.461513943010484, + "learning_rate": 1.4538673088761112e-05, + "loss": 1.1119, + "step": 61464 + }, + { + "epoch": 0.74, + "grad_norm": 11.163193431764636, + "learning_rate": 1.4538152537911762e-05, + "loss": 1.2412, + "step": 61467 + }, + { + "epoch": 0.74, + "grad_norm": 6.809673194819501, + "learning_rate": 1.453763197157533e-05, + "loss": 1.5749, + "step": 61470 + }, + { + "epoch": 0.74, + "grad_norm": 5.02910130351912, + "learning_rate": 1.4537111389753598e-05, + "loss": 1.2167, + "step": 61473 + }, + { + "epoch": 0.74, + "grad_norm": 33.88366024840518, + "learning_rate": 1.4536590792448335e-05, + "loss": 1.3589, + "step": 61476 + }, + { + "epoch": 0.74, + "grad_norm": 13.254331671139274, + "learning_rate": 1.4536070179661323e-05, + "loss": 1.4334, + "step": 61479 + }, + { + "epoch": 0.74, + "grad_norm": 4.128790552931256, + "learning_rate": 1.453554955139434e-05, + "loss": 1.1685, + "step": 61482 + }, + { + "epoch": 0.74, + "grad_norm": 11.582997149936855, + "learning_rate": 1.4535028907649158e-05, + "loss": 1.285, + "step": 61485 + }, + { + "epoch": 0.74, + "grad_norm": 13.646269310176312, + "learning_rate": 1.4534508248427555e-05, + "loss": 1.4306, + "step": 61488 + }, + { + "epoch": 0.74, + "grad_norm": 14.216930114532156, + "learning_rate": 1.453398757373131e-05, + "loss": 1.8395, + "step": 61491 + }, + { + "epoch": 0.74, + "grad_norm": 14.530678694628618, + "learning_rate": 1.4533466883562198e-05, + "loss": 1.344, + "step": 61494 + }, + { + "epoch": 0.74, + "grad_norm": 3.938313083750426, + "learning_rate": 1.4532946177921997e-05, + "loss": 1.3013, + "step": 61497 + }, + { + "epoch": 0.74, + "grad_norm": 7.5636702187311124, + "learning_rate": 1.453242545681248e-05, + "loss": 1.7113, + "step": 61500 + }, + { + "epoch": 0.74, + "grad_norm": 7.236609352858919, + "learning_rate": 1.4531904720235432e-05, + "loss": 1.2125, + "step": 61503 + }, + { + "epoch": 0.74, + "grad_norm": 6.140044640599011, + "learning_rate": 1.4531383968192624e-05, + "loss": 1.429, + "step": 61506 + }, + { + "epoch": 0.74, + "grad_norm": 4.760213476635136, + "learning_rate": 1.4530863200685835e-05, + "loss": 1.3217, + "step": 61509 + }, + { + "epoch": 0.74, + "grad_norm": 6.118950700585509, + "learning_rate": 1.4530342417716841e-05, + "loss": 1.5054, + "step": 61512 + }, + { + "epoch": 0.74, + "grad_norm": 45.26512550799636, + "learning_rate": 1.4529821619287421e-05, + "loss": 1.3644, + "step": 61515 + }, + { + "epoch": 0.74, + "grad_norm": 15.433900438071936, + "learning_rate": 1.4529300805399352e-05, + "loss": 1.6341, + "step": 61518 + }, + { + "epoch": 0.74, + "grad_norm": 9.16114885430467, + "learning_rate": 1.4528779976054408e-05, + "loss": 1.0984, + "step": 61521 + }, + { + "epoch": 0.74, + "grad_norm": 3.5159620350778775, + "learning_rate": 1.4528259131254371e-05, + "loss": 1.5227, + "step": 61524 + }, + { + "epoch": 0.74, + "grad_norm": 8.747891650929146, + "learning_rate": 1.4527738271001018e-05, + "loss": 1.3986, + "step": 61527 + }, + { + "epoch": 0.74, + "grad_norm": 9.418198003979965, + "learning_rate": 1.4527217395296123e-05, + "loss": 1.436, + "step": 61530 + }, + { + "epoch": 0.74, + "grad_norm": 12.98938142518291, + "learning_rate": 1.4526696504141466e-05, + "loss": 1.3497, + "step": 61533 + }, + { + "epoch": 0.74, + "grad_norm": 3.7251459921582946, + "learning_rate": 1.4526175597538825e-05, + "loss": 1.4958, + "step": 61536 + }, + { + "epoch": 0.74, + "grad_norm": 32.42518064356491, + "learning_rate": 1.452565467548998e-05, + "loss": 1.2727, + "step": 61539 + }, + { + "epoch": 0.74, + "grad_norm": 11.165483223773535, + "learning_rate": 1.4525133737996701e-05, + "loss": 1.7595, + "step": 61542 + }, + { + "epoch": 0.74, + "grad_norm": 8.618306597619174, + "learning_rate": 1.4524612785060772e-05, + "loss": 1.3431, + "step": 61545 + }, + { + "epoch": 0.74, + "grad_norm": 14.60475357153056, + "learning_rate": 1.4524091816683971e-05, + "loss": 1.1608, + "step": 61548 + }, + { + "epoch": 0.74, + "grad_norm": 11.026728250716488, + "learning_rate": 1.4523570832868072e-05, + "loss": 1.5959, + "step": 61551 + }, + { + "epoch": 0.74, + "grad_norm": 41.1186692658719, + "learning_rate": 1.4523049833614855e-05, + "loss": 1.0726, + "step": 61554 + }, + { + "epoch": 0.74, + "grad_norm": 5.873665618773768, + "learning_rate": 1.45225288189261e-05, + "loss": 1.5027, + "step": 61557 + }, + { + "epoch": 0.74, + "grad_norm": 13.653517853680983, + "learning_rate": 1.4522007788803583e-05, + "loss": 1.3023, + "step": 61560 + }, + { + "epoch": 0.74, + "grad_norm": 16.65434803547177, + "learning_rate": 1.452148674324908e-05, + "loss": 1.2834, + "step": 61563 + }, + { + "epoch": 0.74, + "grad_norm": 7.634956344937502, + "learning_rate": 1.4520965682264371e-05, + "loss": 1.5948, + "step": 61566 + }, + { + "epoch": 0.74, + "grad_norm": 53.76863194958499, + "learning_rate": 1.4520444605851238e-05, + "loss": 1.4654, + "step": 61569 + }, + { + "epoch": 0.74, + "grad_norm": 14.746712470442946, + "learning_rate": 1.4519923514011455e-05, + "loss": 1.749, + "step": 61572 + }, + { + "epoch": 0.74, + "grad_norm": 25.115875649045094, + "learning_rate": 1.45194024067468e-05, + "loss": 0.9354, + "step": 61575 + }, + { + "epoch": 0.74, + "grad_norm": 14.139106030466463, + "learning_rate": 1.4518881284059054e-05, + "loss": 1.2356, + "step": 61578 + }, + { + "epoch": 0.74, + "grad_norm": 4.932827241373597, + "learning_rate": 1.4518360145949993e-05, + "loss": 1.4416, + "step": 61581 + }, + { + "epoch": 0.74, + "grad_norm": 14.23423387004122, + "learning_rate": 1.4517838992421393e-05, + "loss": 1.5514, + "step": 61584 + }, + { + "epoch": 0.74, + "grad_norm": 13.222403468022879, + "learning_rate": 1.4517317823475039e-05, + "loss": 1.5431, + "step": 61587 + }, + { + "epoch": 0.74, + "grad_norm": 16.62080877776175, + "learning_rate": 1.4516796639112709e-05, + "loss": 1.3532, + "step": 61590 + }, + { + "epoch": 0.74, + "grad_norm": 27.51584352854562, + "learning_rate": 1.4516275439336176e-05, + "loss": 1.5697, + "step": 61593 + }, + { + "epoch": 0.74, + "grad_norm": 6.292233048513829, + "learning_rate": 1.451575422414722e-05, + "loss": 1.4213, + "step": 61596 + }, + { + "epoch": 0.74, + "grad_norm": 14.09764127116902, + "learning_rate": 1.4515232993547623e-05, + "loss": 1.1688, + "step": 61599 + }, + { + "epoch": 0.74, + "grad_norm": 3.9157061277601377, + "learning_rate": 1.4514711747539165e-05, + "loss": 1.3703, + "step": 61602 + }, + { + "epoch": 0.74, + "grad_norm": 17.6818013824399, + "learning_rate": 1.4514190486123618e-05, + "loss": 1.0562, + "step": 61605 + }, + { + "epoch": 0.74, + "grad_norm": 19.639006034903453, + "learning_rate": 1.4513669209302766e-05, + "loss": 1.5451, + "step": 61608 + }, + { + "epoch": 0.74, + "grad_norm": 17.03592668606688, + "learning_rate": 1.4513147917078388e-05, + "loss": 1.023, + "step": 61611 + }, + { + "epoch": 0.74, + "grad_norm": 11.956374363570882, + "learning_rate": 1.451262660945226e-05, + "loss": 1.5712, + "step": 61614 + }, + { + "epoch": 0.74, + "grad_norm": 26.134356341368765, + "learning_rate": 1.4512105286426163e-05, + "loss": 1.6847, + "step": 61617 + }, + { + "epoch": 0.74, + "grad_norm": 28.33101657993339, + "learning_rate": 1.4511583948001877e-05, + "loss": 1.5139, + "step": 61620 + }, + { + "epoch": 0.74, + "grad_norm": 72.74471080272015, + "learning_rate": 1.451106259418118e-05, + "loss": 1.4292, + "step": 61623 + }, + { + "epoch": 0.74, + "grad_norm": 3.856014471671674, + "learning_rate": 1.4510541224965852e-05, + "loss": 1.8184, + "step": 61626 + }, + { + "epoch": 0.74, + "grad_norm": 2.8209207717500964, + "learning_rate": 1.4510019840357669e-05, + "loss": 1.5151, + "step": 61629 + }, + { + "epoch": 0.74, + "grad_norm": 6.884471517714612, + "learning_rate": 1.4509498440358416e-05, + "loss": 1.4051, + "step": 61632 + }, + { + "epoch": 0.74, + "grad_norm": 12.877647744258605, + "learning_rate": 1.4508977024969869e-05, + "loss": 1.1756, + "step": 61635 + }, + { + "epoch": 0.74, + "grad_norm": 5.249125151589249, + "learning_rate": 1.4508455594193803e-05, + "loss": 1.1992, + "step": 61638 + }, + { + "epoch": 0.74, + "grad_norm": 14.395932566254615, + "learning_rate": 1.4507934148032006e-05, + "loss": 1.4688, + "step": 61641 + }, + { + "epoch": 0.74, + "grad_norm": 15.167070687602399, + "learning_rate": 1.4507412686486254e-05, + "loss": 1.6566, + "step": 61644 + }, + { + "epoch": 0.74, + "grad_norm": 11.746085251079775, + "learning_rate": 1.4506891209558323e-05, + "loss": 1.4844, + "step": 61647 + }, + { + "epoch": 0.74, + "grad_norm": 29.94604752430608, + "learning_rate": 1.4506369717249996e-05, + "loss": 1.4553, + "step": 61650 + }, + { + "epoch": 0.74, + "grad_norm": 47.74573948583986, + "learning_rate": 1.4505848209563053e-05, + "loss": 1.3746, + "step": 61653 + }, + { + "epoch": 0.74, + "grad_norm": 22.75206851864223, + "learning_rate": 1.4505326686499275e-05, + "loss": 1.3553, + "step": 61656 + }, + { + "epoch": 0.74, + "grad_norm": 4.468405204911702, + "learning_rate": 1.4504805148060438e-05, + "loss": 1.2717, + "step": 61659 + }, + { + "epoch": 0.74, + "grad_norm": 20.85348242410374, + "learning_rate": 1.4504283594248324e-05, + "loss": 1.6186, + "step": 61662 + }, + { + "epoch": 0.74, + "grad_norm": 3.9656326945316427, + "learning_rate": 1.4503762025064714e-05, + "loss": 1.1888, + "step": 61665 + }, + { + "epoch": 0.74, + "grad_norm": 51.67891108322155, + "learning_rate": 1.4503240440511386e-05, + "loss": 1.367, + "step": 61668 + }, + { + "epoch": 0.74, + "grad_norm": 7.523859627627094, + "learning_rate": 1.4502718840590118e-05, + "loss": 1.445, + "step": 61671 + }, + { + "epoch": 0.74, + "grad_norm": 7.046783653618836, + "learning_rate": 1.4502197225302692e-05, + "loss": 1.3656, + "step": 61674 + }, + { + "epoch": 0.74, + "grad_norm": 8.196045090696225, + "learning_rate": 1.4501675594650892e-05, + "loss": 1.03, + "step": 61677 + }, + { + "epoch": 0.74, + "grad_norm": 5.315572969240003, + "learning_rate": 1.4501153948636493e-05, + "loss": 1.5659, + "step": 61680 + }, + { + "epoch": 0.74, + "grad_norm": 4.342577564820317, + "learning_rate": 1.4500632287261275e-05, + "loss": 1.6785, + "step": 61683 + }, + { + "epoch": 0.74, + "grad_norm": 3.704346220980694, + "learning_rate": 1.4500110610527024e-05, + "loss": 1.5644, + "step": 61686 + }, + { + "epoch": 0.74, + "grad_norm": 10.828418200681678, + "learning_rate": 1.4499588918435517e-05, + "loss": 1.3669, + "step": 61689 + }, + { + "epoch": 0.74, + "grad_norm": 12.601017439628746, + "learning_rate": 1.4499067210988529e-05, + "loss": 1.6169, + "step": 61692 + }, + { + "epoch": 0.74, + "grad_norm": 5.416777806255498, + "learning_rate": 1.4498545488187847e-05, + "loss": 1.3429, + "step": 61695 + }, + { + "epoch": 0.74, + "grad_norm": 7.1469100491470305, + "learning_rate": 1.4498023750035251e-05, + "loss": 1.3911, + "step": 61698 + }, + { + "epoch": 0.74, + "grad_norm": 9.47622910815834, + "learning_rate": 1.449750199653252e-05, + "loss": 1.2663, + "step": 61701 + }, + { + "epoch": 0.74, + "grad_norm": 8.470283350052455, + "learning_rate": 1.4496980227681432e-05, + "loss": 1.3152, + "step": 61704 + }, + { + "epoch": 0.74, + "grad_norm": 10.71289635112305, + "learning_rate": 1.4496458443483774e-05, + "loss": 1.3492, + "step": 61707 + }, + { + "epoch": 0.74, + "grad_norm": 10.147471389657248, + "learning_rate": 1.4495936643941323e-05, + "loss": 1.4917, + "step": 61710 + }, + { + "epoch": 0.74, + "grad_norm": 10.22885437724695, + "learning_rate": 1.4495414829055856e-05, + "loss": 1.3478, + "step": 61713 + }, + { + "epoch": 0.74, + "grad_norm": 18.64742372496274, + "learning_rate": 1.4494892998829158e-05, + "loss": 1.6172, + "step": 61716 + }, + { + "epoch": 0.74, + "grad_norm": 10.80901596488007, + "learning_rate": 1.4494371153263011e-05, + "loss": 1.339, + "step": 61719 + }, + { + "epoch": 0.74, + "grad_norm": 12.156892752312277, + "learning_rate": 1.4493849292359194e-05, + "loss": 1.5631, + "step": 61722 + }, + { + "epoch": 0.74, + "grad_norm": 47.49445183979466, + "learning_rate": 1.4493327416119487e-05, + "loss": 1.377, + "step": 61725 + }, + { + "epoch": 0.74, + "grad_norm": 3.7307901371799903, + "learning_rate": 1.4492805524545674e-05, + "loss": 1.5813, + "step": 61728 + }, + { + "epoch": 0.74, + "grad_norm": 10.235242132037996, + "learning_rate": 1.4492283617639534e-05, + "loss": 1.4894, + "step": 61731 + }, + { + "epoch": 0.74, + "grad_norm": 7.497268219989141, + "learning_rate": 1.4491761695402846e-05, + "loss": 0.9937, + "step": 61734 + }, + { + "epoch": 0.74, + "grad_norm": 43.5116927407133, + "learning_rate": 1.4491239757837392e-05, + "loss": 1.7396, + "step": 61737 + }, + { + "epoch": 0.74, + "grad_norm": 7.439227552063059, + "learning_rate": 1.449071780494496e-05, + "loss": 1.6098, + "step": 61740 + }, + { + "epoch": 0.74, + "grad_norm": 7.624720900703849, + "learning_rate": 1.4490195836727321e-05, + "loss": 1.5346, + "step": 61743 + }, + { + "epoch": 0.74, + "grad_norm": 10.540492733751726, + "learning_rate": 1.4489673853186261e-05, + "loss": 1.3334, + "step": 61746 + }, + { + "epoch": 0.74, + "grad_norm": 1.9550608230824067, + "learning_rate": 1.4489151854323563e-05, + "loss": 1.6883, + "step": 61749 + }, + { + "epoch": 0.74, + "grad_norm": 4.940245578509012, + "learning_rate": 1.4488629840141006e-05, + "loss": 1.253, + "step": 61752 + }, + { + "epoch": 0.74, + "grad_norm": 7.441959693941268, + "learning_rate": 1.4488107810640373e-05, + "loss": 1.2853, + "step": 61755 + }, + { + "epoch": 0.74, + "grad_norm": 11.355568659856495, + "learning_rate": 1.4487585765823444e-05, + "loss": 1.3275, + "step": 61758 + }, + { + "epoch": 0.74, + "grad_norm": 5.792719610032986, + "learning_rate": 1.4487063705692004e-05, + "loss": 1.3788, + "step": 61761 + }, + { + "epoch": 0.74, + "grad_norm": 13.222144107878593, + "learning_rate": 1.4486541630247825e-05, + "loss": 1.1343, + "step": 61764 + }, + { + "epoch": 0.74, + "grad_norm": 18.252011173000717, + "learning_rate": 1.4486019539492701e-05, + "loss": 1.6326, + "step": 61767 + }, + { + "epoch": 0.74, + "grad_norm": 11.545080938665652, + "learning_rate": 1.4485497433428406e-05, + "loss": 1.471, + "step": 61770 + }, + { + "epoch": 0.74, + "grad_norm": 5.119722119557686, + "learning_rate": 1.4484975312056725e-05, + "loss": 1.4042, + "step": 61773 + }, + { + "epoch": 0.74, + "grad_norm": 27.715210049256516, + "learning_rate": 1.4484453175379439e-05, + "loss": 1.249, + "step": 61776 + }, + { + "epoch": 0.74, + "grad_norm": 6.577057807322401, + "learning_rate": 1.4483931023398328e-05, + "loss": 1.4912, + "step": 61779 + }, + { + "epoch": 0.74, + "grad_norm": 9.05662519024229, + "learning_rate": 1.448340885611518e-05, + "loss": 1.7725, + "step": 61782 + }, + { + "epoch": 0.74, + "grad_norm": 28.02896585114856, + "learning_rate": 1.4482886673531767e-05, + "loss": 1.424, + "step": 61785 + }, + { + "epoch": 0.74, + "grad_norm": 22.27293240728646, + "learning_rate": 1.448236447564988e-05, + "loss": 1.4226, + "step": 61788 + }, + { + "epoch": 0.74, + "grad_norm": 22.99767410386356, + "learning_rate": 1.4481842262471293e-05, + "loss": 1.1027, + "step": 61791 + }, + { + "epoch": 0.74, + "grad_norm": 14.942865940870298, + "learning_rate": 1.44813200339978e-05, + "loss": 1.1621, + "step": 61794 + }, + { + "epoch": 0.74, + "grad_norm": 7.2939728788289715, + "learning_rate": 1.4480797790231167e-05, + "loss": 1.5273, + "step": 61797 + }, + { + "epoch": 0.74, + "grad_norm": 6.461692339023313, + "learning_rate": 1.448027553117319e-05, + "loss": 1.6275, + "step": 61800 + }, + { + "epoch": 0.74, + "grad_norm": 11.477410665711623, + "learning_rate": 1.4479753256825648e-05, + "loss": 1.7887, + "step": 61803 + }, + { + "epoch": 0.74, + "grad_norm": 48.08404120890595, + "learning_rate": 1.447923096719032e-05, + "loss": 1.1658, + "step": 61806 + }, + { + "epoch": 0.74, + "grad_norm": 26.58161836985163, + "learning_rate": 1.4478708662268989e-05, + "loss": 1.2131, + "step": 61809 + }, + { + "epoch": 0.74, + "grad_norm": 11.988922397802062, + "learning_rate": 1.4478186342063439e-05, + "loss": 1.6187, + "step": 61812 + }, + { + "epoch": 0.74, + "grad_norm": 27.303378486243737, + "learning_rate": 1.4477664006575456e-05, + "loss": 1.6007, + "step": 61815 + }, + { + "epoch": 0.74, + "grad_norm": 7.641103406725288, + "learning_rate": 1.4477141655806812e-05, + "loss": 1.6586, + "step": 61818 + }, + { + "epoch": 0.74, + "grad_norm": 18.621031008010416, + "learning_rate": 1.4476619289759302e-05, + "loss": 1.5023, + "step": 61821 + }, + { + "epoch": 0.74, + "grad_norm": 19.442136886124825, + "learning_rate": 1.44760969084347e-05, + "loss": 0.9181, + "step": 61824 + }, + { + "epoch": 0.74, + "grad_norm": 11.366326456522192, + "learning_rate": 1.447557451183479e-05, + "loss": 1.144, + "step": 61827 + }, + { + "epoch": 0.74, + "grad_norm": 17.682735122845386, + "learning_rate": 1.4475052099961358e-05, + "loss": 1.2738, + "step": 61830 + }, + { + "epoch": 0.74, + "grad_norm": 9.360314694081332, + "learning_rate": 1.4474529672816184e-05, + "loss": 1.2629, + "step": 61833 + }, + { + "epoch": 0.74, + "grad_norm": 11.376406949753134, + "learning_rate": 1.4474007230401057e-05, + "loss": 1.8564, + "step": 61836 + }, + { + "epoch": 0.74, + "grad_norm": 60.29841834479047, + "learning_rate": 1.447348477271775e-05, + "loss": 1.2827, + "step": 61839 + }, + { + "epoch": 0.74, + "grad_norm": 2.670747135839561, + "learning_rate": 1.447296229976805e-05, + "loss": 1.4066, + "step": 61842 + }, + { + "epoch": 0.74, + "grad_norm": 4.737524886918601, + "learning_rate": 1.4472439811553745e-05, + "loss": 1.3837, + "step": 61845 + }, + { + "epoch": 0.74, + "grad_norm": 9.652475646997688, + "learning_rate": 1.447191730807661e-05, + "loss": 1.5488, + "step": 61848 + }, + { + "epoch": 0.74, + "grad_norm": 12.555466925815503, + "learning_rate": 1.4471394789338433e-05, + "loss": 1.3747, + "step": 61851 + }, + { + "epoch": 0.74, + "grad_norm": 3.8355883167526437, + "learning_rate": 1.4470872255340995e-05, + "loss": 1.5092, + "step": 61854 + }, + { + "epoch": 0.74, + "grad_norm": 25.528608391525825, + "learning_rate": 1.4470349706086087e-05, + "loss": 1.1422, + "step": 61857 + }, + { + "epoch": 0.74, + "grad_norm": 30.772352268792318, + "learning_rate": 1.4469827141575476e-05, + "loss": 1.4712, + "step": 61860 + }, + { + "epoch": 0.74, + "grad_norm": 28.427535406995226, + "learning_rate": 1.4469304561810962e-05, + "loss": 1.2661, + "step": 61863 + }, + { + "epoch": 0.74, + "grad_norm": 34.12563754859163, + "learning_rate": 1.4468781966794318e-05, + "loss": 1.1658, + "step": 61866 + }, + { + "epoch": 0.74, + "grad_norm": 5.420440893713072, + "learning_rate": 1.4468259356527334e-05, + "loss": 1.371, + "step": 61869 + }, + { + "epoch": 0.74, + "grad_norm": 47.12412152152141, + "learning_rate": 1.4467736731011787e-05, + "loss": 1.2597, + "step": 61872 + }, + { + "epoch": 0.74, + "grad_norm": 17.611854886639378, + "learning_rate": 1.4467214090249466e-05, + "loss": 1.4369, + "step": 61875 + }, + { + "epoch": 0.74, + "grad_norm": 9.047372389243652, + "learning_rate": 1.446669143424215e-05, + "loss": 1.4863, + "step": 61878 + }, + { + "epoch": 0.74, + "grad_norm": 76.71273666286089, + "learning_rate": 1.4466168762991625e-05, + "loss": 1.2575, + "step": 61881 + }, + { + "epoch": 0.74, + "grad_norm": 8.166153069597565, + "learning_rate": 1.4465646076499677e-05, + "loss": 1.4255, + "step": 61884 + }, + { + "epoch": 0.74, + "grad_norm": 34.17426886706727, + "learning_rate": 1.4465123374768087e-05, + "loss": 1.1345, + "step": 61887 + }, + { + "epoch": 0.74, + "grad_norm": 13.095602222056987, + "learning_rate": 1.446460065779864e-05, + "loss": 1.2177, + "step": 61890 + }, + { + "epoch": 0.74, + "grad_norm": 23.12543181649506, + "learning_rate": 1.4464077925593117e-05, + "loss": 1.5477, + "step": 61893 + }, + { + "epoch": 0.74, + "grad_norm": 22.889972895716497, + "learning_rate": 1.4463555178153306e-05, + "loss": 1.3549, + "step": 61896 + }, + { + "epoch": 0.74, + "grad_norm": 16.150482201153096, + "learning_rate": 1.4463032415480988e-05, + "loss": 1.4806, + "step": 61899 + }, + { + "epoch": 0.74, + "grad_norm": 18.102875710687137, + "learning_rate": 1.4462509637577948e-05, + "loss": 1.3006, + "step": 61902 + }, + { + "epoch": 0.74, + "grad_norm": 68.96057707635383, + "learning_rate": 1.4461986844445968e-05, + "loss": 1.3226, + "step": 61905 + }, + { + "epoch": 0.74, + "grad_norm": 13.17200028810646, + "learning_rate": 1.4461464036086834e-05, + "loss": 1.3109, + "step": 61908 + }, + { + "epoch": 0.74, + "grad_norm": 10.887038863288561, + "learning_rate": 1.4460941212502334e-05, + "loss": 1.2391, + "step": 61911 + }, + { + "epoch": 0.74, + "grad_norm": 5.956193044553968, + "learning_rate": 1.4460418373694247e-05, + "loss": 1.6928, + "step": 61914 + }, + { + "epoch": 0.74, + "grad_norm": 8.97496453130726, + "learning_rate": 1.4459895519664357e-05, + "loss": 1.2736, + "step": 61917 + }, + { + "epoch": 0.74, + "grad_norm": 12.145600132378487, + "learning_rate": 1.4459372650414455e-05, + "loss": 1.616, + "step": 61920 + }, + { + "epoch": 0.74, + "grad_norm": 4.818095473783446, + "learning_rate": 1.4458849765946317e-05, + "loss": 1.4261, + "step": 61923 + }, + { + "epoch": 0.74, + "grad_norm": 11.058035125743958, + "learning_rate": 1.4458326866261726e-05, + "loss": 1.6219, + "step": 61926 + }, + { + "epoch": 0.74, + "grad_norm": 4.432141395309549, + "learning_rate": 1.4457803951362479e-05, + "loss": 1.2083, + "step": 61929 + }, + { + "epoch": 0.74, + "grad_norm": 15.757140648193477, + "learning_rate": 1.4457281021250352e-05, + "loss": 1.1312, + "step": 61932 + }, + { + "epoch": 0.74, + "grad_norm": 73.65796322804137, + "learning_rate": 1.4456758075927124e-05, + "loss": 1.3977, + "step": 61935 + }, + { + "epoch": 0.74, + "grad_norm": 6.1257117873321425, + "learning_rate": 1.4456235115394593e-05, + "loss": 1.5219, + "step": 61938 + }, + { + "epoch": 0.74, + "grad_norm": 13.111013396784264, + "learning_rate": 1.4455712139654532e-05, + "loss": 1.4693, + "step": 61941 + }, + { + "epoch": 0.74, + "grad_norm": 257.77096213624066, + "learning_rate": 1.4455189148708733e-05, + "loss": 1.7081, + "step": 61944 + }, + { + "epoch": 0.74, + "grad_norm": 13.556872758829385, + "learning_rate": 1.4454666142558975e-05, + "loss": 1.3582, + "step": 61947 + }, + { + "epoch": 0.74, + "grad_norm": 10.711855591137011, + "learning_rate": 1.4454143121207049e-05, + "loss": 1.6372, + "step": 61950 + }, + { + "epoch": 0.74, + "grad_norm": 10.396534891711173, + "learning_rate": 1.4453620084654737e-05, + "loss": 1.4626, + "step": 61953 + }, + { + "epoch": 0.75, + "grad_norm": 34.609493820554086, + "learning_rate": 1.4453097032903824e-05, + "loss": 1.3161, + "step": 61956 + }, + { + "epoch": 0.75, + "grad_norm": 11.949699172125108, + "learning_rate": 1.4452573965956094e-05, + "loss": 1.2933, + "step": 61959 + }, + { + "epoch": 0.75, + "grad_norm": 13.610787772759922, + "learning_rate": 1.4452050883813334e-05, + "loss": 1.6552, + "step": 61962 + }, + { + "epoch": 0.75, + "grad_norm": 3.061566732148487, + "learning_rate": 1.4451527786477326e-05, + "loss": 0.9918, + "step": 61965 + }, + { + "epoch": 0.75, + "grad_norm": 26.15262155947436, + "learning_rate": 1.4451004673949856e-05, + "loss": 1.2477, + "step": 61968 + }, + { + "epoch": 0.75, + "grad_norm": 4.3540727753617805, + "learning_rate": 1.4450481546232713e-05, + "loss": 1.5202, + "step": 61971 + }, + { + "epoch": 0.75, + "grad_norm": 16.397271313234658, + "learning_rate": 1.444995840332768e-05, + "loss": 1.2182, + "step": 61974 + }, + { + "epoch": 0.75, + "grad_norm": 14.481273265428202, + "learning_rate": 1.444943524523654e-05, + "loss": 1.3179, + "step": 61977 + }, + { + "epoch": 0.75, + "grad_norm": 6.111426922500756, + "learning_rate": 1.4448912071961081e-05, + "loss": 1.5184, + "step": 61980 + }, + { + "epoch": 0.75, + "grad_norm": 4.656903240597188, + "learning_rate": 1.4448388883503087e-05, + "loss": 1.5798, + "step": 61983 + }, + { + "epoch": 0.75, + "grad_norm": 4.067034150996934, + "learning_rate": 1.4447865679864346e-05, + "loss": 1.4554, + "step": 61986 + }, + { + "epoch": 0.75, + "grad_norm": 11.331645664498259, + "learning_rate": 1.4447342461046636e-05, + "loss": 1.4494, + "step": 61989 + }, + { + "epoch": 0.75, + "grad_norm": 11.91072444840533, + "learning_rate": 1.4446819227051754e-05, + "loss": 1.4052, + "step": 61992 + }, + { + "epoch": 0.75, + "grad_norm": 30.120041212207134, + "learning_rate": 1.4446295977881478e-05, + "loss": 1.4765, + "step": 61995 + }, + { + "epoch": 0.75, + "grad_norm": 50.97829815092853, + "learning_rate": 1.4445772713537595e-05, + "loss": 1.4639, + "step": 61998 + }, + { + "epoch": 0.75, + "grad_norm": 13.204819346875357, + "learning_rate": 1.444524943402189e-05, + "loss": 1.3183, + "step": 62001 + }, + { + "epoch": 0.75, + "grad_norm": 37.74814934127398, + "learning_rate": 1.4444726139336151e-05, + "loss": 1.7929, + "step": 62004 + }, + { + "epoch": 0.75, + "grad_norm": 20.733006711138465, + "learning_rate": 1.4444202829482164e-05, + "loss": 1.5712, + "step": 62007 + }, + { + "epoch": 0.75, + "grad_norm": 6.0977662939577995, + "learning_rate": 1.4443679504461715e-05, + "loss": 1.7083, + "step": 62010 + }, + { + "epoch": 0.75, + "grad_norm": 47.71464056667683, + "learning_rate": 1.4443156164276582e-05, + "loss": 1.5234, + "step": 62013 + }, + { + "epoch": 0.75, + "grad_norm": 10.670561943514604, + "learning_rate": 1.444263280892856e-05, + "loss": 1.3094, + "step": 62016 + }, + { + "epoch": 0.75, + "grad_norm": 8.693136611221554, + "learning_rate": 1.4442109438419435e-05, + "loss": 1.3927, + "step": 62019 + }, + { + "epoch": 0.75, + "grad_norm": 14.109688684970948, + "learning_rate": 1.4441586052750988e-05, + "loss": 1.407, + "step": 62022 + }, + { + "epoch": 0.75, + "grad_norm": 18.50332500232424, + "learning_rate": 1.444106265192501e-05, + "loss": 1.4424, + "step": 62025 + }, + { + "epoch": 0.75, + "grad_norm": 20.77445420502808, + "learning_rate": 1.4440539235943286e-05, + "loss": 1.4054, + "step": 62028 + }, + { + "epoch": 0.75, + "grad_norm": 6.585044655884123, + "learning_rate": 1.4440015804807596e-05, + "loss": 1.6241, + "step": 62031 + }, + { + "epoch": 0.75, + "grad_norm": 56.852014069753494, + "learning_rate": 1.4439492358519733e-05, + "loss": 1.4552, + "step": 62034 + }, + { + "epoch": 0.75, + "grad_norm": 7.689587038932261, + "learning_rate": 1.4438968897081484e-05, + "loss": 1.803, + "step": 62037 + }, + { + "epoch": 0.75, + "grad_norm": 6.272635004837551, + "learning_rate": 1.4438445420494631e-05, + "loss": 1.364, + "step": 62040 + }, + { + "epoch": 0.75, + "grad_norm": 6.5051395148606215, + "learning_rate": 1.4437921928760963e-05, + "loss": 1.3513, + "step": 62043 + }, + { + "epoch": 0.75, + "grad_norm": 6.230742074012534, + "learning_rate": 1.4437398421882267e-05, + "loss": 1.1767, + "step": 62046 + }, + { + "epoch": 0.75, + "grad_norm": 8.887127059145175, + "learning_rate": 1.443687489986033e-05, + "loss": 0.8263, + "step": 62049 + }, + { + "epoch": 0.75, + "grad_norm": 30.947790959519697, + "learning_rate": 1.4436351362696934e-05, + "loss": 1.64, + "step": 62052 + }, + { + "epoch": 0.75, + "grad_norm": 10.29332406091762, + "learning_rate": 1.4435827810393868e-05, + "loss": 1.3617, + "step": 62055 + }, + { + "epoch": 0.75, + "grad_norm": 13.709127438190938, + "learning_rate": 1.4435304242952922e-05, + "loss": 1.2401, + "step": 62058 + }, + { + "epoch": 0.75, + "grad_norm": 6.28436085727832, + "learning_rate": 1.443478066037588e-05, + "loss": 1.4386, + "step": 62061 + }, + { + "epoch": 0.75, + "grad_norm": 27.413145291066137, + "learning_rate": 1.443425706266453e-05, + "loss": 1.2426, + "step": 62064 + }, + { + "epoch": 0.75, + "grad_norm": 19.315541834329192, + "learning_rate": 1.4433733449820652e-05, + "loss": 1.6278, + "step": 62067 + }, + { + "epoch": 0.75, + "grad_norm": 22.34048182232715, + "learning_rate": 1.4433209821846044e-05, + "loss": 1.0508, + "step": 62070 + }, + { + "epoch": 0.75, + "grad_norm": 20.011904433943908, + "learning_rate": 1.4432686178742488e-05, + "loss": 1.4835, + "step": 62073 + }, + { + "epoch": 0.75, + "grad_norm": 17.461829815446066, + "learning_rate": 1.4432162520511768e-05, + "loss": 1.2761, + "step": 62076 + }, + { + "epoch": 0.75, + "grad_norm": 11.738729798578692, + "learning_rate": 1.4431638847155675e-05, + "loss": 1.3551, + "step": 62079 + }, + { + "epoch": 0.75, + "grad_norm": 15.989851557805927, + "learning_rate": 1.4431115158675997e-05, + "loss": 1.3274, + "step": 62082 + }, + { + "epoch": 0.75, + "grad_norm": 6.840797349809295, + "learning_rate": 1.4430591455074516e-05, + "loss": 1.6616, + "step": 62085 + }, + { + "epoch": 0.75, + "grad_norm": 11.140580024800352, + "learning_rate": 1.4430067736353021e-05, + "loss": 1.3025, + "step": 62088 + }, + { + "epoch": 0.75, + "grad_norm": 13.137747352233513, + "learning_rate": 1.4429544002513304e-05, + "loss": 1.6798, + "step": 62091 + }, + { + "epoch": 0.75, + "grad_norm": 7.008374730942898, + "learning_rate": 1.442902025355715e-05, + "loss": 1.4249, + "step": 62094 + }, + { + "epoch": 0.75, + "grad_norm": 8.86174430010729, + "learning_rate": 1.442849648948634e-05, + "loss": 1.2536, + "step": 62097 + }, + { + "epoch": 0.75, + "grad_norm": 31.931803361846242, + "learning_rate": 1.4427972710302671e-05, + "loss": 1.6296, + "step": 62100 + }, + { + "epoch": 0.75, + "grad_norm": 14.166749140447074, + "learning_rate": 1.4427448916007924e-05, + "loss": 1.2912, + "step": 62103 + }, + { + "epoch": 0.75, + "grad_norm": 17.603402958523645, + "learning_rate": 1.4426925106603891e-05, + "loss": 1.2809, + "step": 62106 + }, + { + "epoch": 0.75, + "grad_norm": 34.23587143396523, + "learning_rate": 1.4426401282092352e-05, + "loss": 1.2259, + "step": 62109 + }, + { + "epoch": 0.75, + "grad_norm": 4.6995127293995065, + "learning_rate": 1.4425877442475105e-05, + "loss": 1.2628, + "step": 62112 + }, + { + "epoch": 0.75, + "grad_norm": 22.724768954321586, + "learning_rate": 1.4425353587753932e-05, + "loss": 1.3334, + "step": 62115 + }, + { + "epoch": 0.75, + "grad_norm": 13.046560029756979, + "learning_rate": 1.442482971793062e-05, + "loss": 1.3879, + "step": 62118 + }, + { + "epoch": 0.75, + "grad_norm": 13.175475702779828, + "learning_rate": 1.4424305833006958e-05, + "loss": 1.0722, + "step": 62121 + }, + { + "epoch": 0.75, + "grad_norm": 9.623103260489385, + "learning_rate": 1.4423781932984733e-05, + "loss": 1.3524, + "step": 62124 + }, + { + "epoch": 0.75, + "grad_norm": 11.375237092685444, + "learning_rate": 1.4423258017865736e-05, + "loss": 1.0624, + "step": 62127 + }, + { + "epoch": 0.75, + "grad_norm": 21.21982304659774, + "learning_rate": 1.4422734087651748e-05, + "loss": 1.6221, + "step": 62130 + }, + { + "epoch": 0.75, + "grad_norm": 5.7109362286933925, + "learning_rate": 1.4422210142344566e-05, + "loss": 1.5072, + "step": 62133 + }, + { + "epoch": 0.75, + "grad_norm": 10.50355215384408, + "learning_rate": 1.4421686181945973e-05, + "loss": 1.476, + "step": 62136 + }, + { + "epoch": 0.75, + "grad_norm": 15.906846991626818, + "learning_rate": 1.4421162206457757e-05, + "loss": 1.4507, + "step": 62139 + }, + { + "epoch": 0.75, + "grad_norm": 30.265605049626693, + "learning_rate": 1.4420638215881706e-05, + "loss": 1.3251, + "step": 62142 + }, + { + "epoch": 0.75, + "grad_norm": 4.36410549042528, + "learning_rate": 1.442011421021961e-05, + "loss": 1.592, + "step": 62145 + }, + { + "epoch": 0.75, + "grad_norm": 6.71188878628593, + "learning_rate": 1.4419590189473258e-05, + "loss": 1.2278, + "step": 62148 + }, + { + "epoch": 0.75, + "grad_norm": 12.275159480362884, + "learning_rate": 1.4419066153644432e-05, + "loss": 1.5359, + "step": 62151 + }, + { + "epoch": 0.75, + "grad_norm": 15.263780116813798, + "learning_rate": 1.441854210273493e-05, + "loss": 1.8359, + "step": 62154 + }, + { + "epoch": 0.75, + "grad_norm": 9.522521330540439, + "learning_rate": 1.4418018036746533e-05, + "loss": 1.6865, + "step": 62157 + }, + { + "epoch": 0.75, + "grad_norm": 6.047939539091605, + "learning_rate": 1.4417493955681033e-05, + "loss": 1.3132, + "step": 62160 + }, + { + "epoch": 0.75, + "grad_norm": 21.63852555778028, + "learning_rate": 1.4416969859540215e-05, + "loss": 1.3692, + "step": 62163 + }, + { + "epoch": 0.75, + "grad_norm": 19.12355632739807, + "learning_rate": 1.4416445748325871e-05, + "loss": 1.355, + "step": 62166 + }, + { + "epoch": 0.75, + "grad_norm": 16.52456470748271, + "learning_rate": 1.441592162203979e-05, + "loss": 1.3423, + "step": 62169 + }, + { + "epoch": 0.75, + "grad_norm": 54.92460071674511, + "learning_rate": 1.4415397480683757e-05, + "loss": 1.1447, + "step": 62172 + }, + { + "epoch": 0.75, + "grad_norm": 32.768862713588014, + "learning_rate": 1.4414873324259561e-05, + "loss": 1.3818, + "step": 62175 + }, + { + "epoch": 0.75, + "grad_norm": 46.81781324692615, + "learning_rate": 1.4414349152768998e-05, + "loss": 1.3343, + "step": 62178 + }, + { + "epoch": 0.75, + "grad_norm": 21.603231474525202, + "learning_rate": 1.4413824966213849e-05, + "loss": 1.1978, + "step": 62181 + }, + { + "epoch": 0.75, + "grad_norm": 12.07099779813474, + "learning_rate": 1.4413300764595904e-05, + "loss": 1.5896, + "step": 62184 + }, + { + "epoch": 0.75, + "grad_norm": 2.583504624138895, + "learning_rate": 1.4412776547916953e-05, + "loss": 1.5263, + "step": 62187 + }, + { + "epoch": 0.75, + "grad_norm": 12.254525095331811, + "learning_rate": 1.4412252316178788e-05, + "loss": 1.0384, + "step": 62190 + }, + { + "epoch": 0.75, + "grad_norm": 13.609690472909351, + "learning_rate": 1.4411728069383192e-05, + "loss": 1.263, + "step": 62193 + }, + { + "epoch": 0.75, + "grad_norm": 9.814677888359569, + "learning_rate": 1.4411203807531959e-05, + "loss": 1.2294, + "step": 62196 + }, + { + "epoch": 0.75, + "grad_norm": 9.292436680861739, + "learning_rate": 1.4410679530626876e-05, + "loss": 1.1626, + "step": 62199 + }, + { + "epoch": 0.75, + "grad_norm": 4.738179580853033, + "learning_rate": 1.4410155238669732e-05, + "loss": 1.4017, + "step": 62202 + }, + { + "epoch": 0.75, + "grad_norm": 21.032182209314037, + "learning_rate": 1.4409630931662315e-05, + "loss": 1.1561, + "step": 62205 + }, + { + "epoch": 0.75, + "grad_norm": 10.672848344427857, + "learning_rate": 1.4409106609606421e-05, + "loss": 1.21, + "step": 62208 + }, + { + "epoch": 0.75, + "grad_norm": 5.94604066938786, + "learning_rate": 1.440858227250383e-05, + "loss": 1.6446, + "step": 62211 + }, + { + "epoch": 0.75, + "grad_norm": 16.368207904585585, + "learning_rate": 1.4408057920356338e-05, + "loss": 1.3239, + "step": 62214 + }, + { + "epoch": 0.75, + "grad_norm": 21.99723279070492, + "learning_rate": 1.440753355316573e-05, + "loss": 1.2488, + "step": 62217 + }, + { + "epoch": 0.75, + "grad_norm": 3.6443932590805828, + "learning_rate": 1.4407009170933799e-05, + "loss": 1.1526, + "step": 62220 + }, + { + "epoch": 0.75, + "grad_norm": 8.77681288173323, + "learning_rate": 1.4406484773662333e-05, + "loss": 1.5354, + "step": 62223 + }, + { + "epoch": 0.75, + "grad_norm": 9.193025322028452, + "learning_rate": 1.440596036135312e-05, + "loss": 0.8099, + "step": 62226 + }, + { + "epoch": 0.75, + "grad_norm": 11.640086023743251, + "learning_rate": 1.4405435934007952e-05, + "loss": 1.1803, + "step": 62229 + }, + { + "epoch": 0.75, + "grad_norm": 13.565426887543996, + "learning_rate": 1.4404911491628619e-05, + "loss": 1.3289, + "step": 62232 + }, + { + "epoch": 0.75, + "grad_norm": 9.712413451536342, + "learning_rate": 1.4404387034216909e-05, + "loss": 1.407, + "step": 62235 + }, + { + "epoch": 0.75, + "grad_norm": 15.522645377560673, + "learning_rate": 1.4403862561774613e-05, + "loss": 1.5373, + "step": 62238 + }, + { + "epoch": 0.75, + "grad_norm": 13.59226793336822, + "learning_rate": 1.4403338074303521e-05, + "loss": 1.1636, + "step": 62241 + }, + { + "epoch": 0.75, + "grad_norm": 10.117838039961239, + "learning_rate": 1.4402813571805421e-05, + "loss": 1.2202, + "step": 62244 + }, + { + "epoch": 0.75, + "grad_norm": 11.897229983626097, + "learning_rate": 1.4402289054282106e-05, + "loss": 1.5496, + "step": 62247 + }, + { + "epoch": 0.75, + "grad_norm": 21.74327237338493, + "learning_rate": 1.440176452173536e-05, + "loss": 1.5507, + "step": 62250 + }, + { + "epoch": 0.75, + "grad_norm": 50.31435083156902, + "learning_rate": 1.4401239974166982e-05, + "loss": 1.363, + "step": 62253 + }, + { + "epoch": 0.75, + "grad_norm": 10.125321027268608, + "learning_rate": 1.4400715411578754e-05, + "loss": 1.1411, + "step": 62256 + }, + { + "epoch": 0.75, + "grad_norm": 3.7558806499079394, + "learning_rate": 1.4400190833972469e-05, + "loss": 1.1281, + "step": 62259 + }, + { + "epoch": 0.75, + "grad_norm": 51.6069291696635, + "learning_rate": 1.4399666241349923e-05, + "loss": 1.3728, + "step": 62262 + }, + { + "epoch": 0.75, + "grad_norm": 15.311108228530651, + "learning_rate": 1.4399141633712895e-05, + "loss": 1.2434, + "step": 62265 + }, + { + "epoch": 0.75, + "grad_norm": 5.744578638809897, + "learning_rate": 1.4398617011063183e-05, + "loss": 1.6351, + "step": 62268 + }, + { + "epoch": 0.75, + "grad_norm": 27.162967539825583, + "learning_rate": 1.4398092373402574e-05, + "loss": 1.4207, + "step": 62271 + }, + { + "epoch": 0.75, + "grad_norm": 14.73857022603757, + "learning_rate": 1.4397567720732866e-05, + "loss": 1.4493, + "step": 62274 + }, + { + "epoch": 0.75, + "grad_norm": 13.781968856563939, + "learning_rate": 1.4397043053055837e-05, + "loss": 1.454, + "step": 62277 + }, + { + "epoch": 0.75, + "grad_norm": 16.522891697144733, + "learning_rate": 1.4396518370373287e-05, + "loss": 1.3372, + "step": 62280 + }, + { + "epoch": 0.75, + "grad_norm": 53.387823065943834, + "learning_rate": 1.4395993672687004e-05, + "loss": 1.6449, + "step": 62283 + }, + { + "epoch": 0.75, + "grad_norm": 6.905759236733112, + "learning_rate": 1.4395468959998772e-05, + "loss": 1.3893, + "step": 62286 + }, + { + "epoch": 0.75, + "grad_norm": 12.115173511870477, + "learning_rate": 1.4394944232310396e-05, + "loss": 1.4458, + "step": 62289 + }, + { + "epoch": 0.75, + "grad_norm": 9.09136874573219, + "learning_rate": 1.439441948962365e-05, + "loss": 1.5426, + "step": 62292 + }, + { + "epoch": 0.75, + "grad_norm": 15.473688244794237, + "learning_rate": 1.4393894731940341e-05, + "loss": 1.2832, + "step": 62295 + }, + { + "epoch": 0.75, + "grad_norm": 47.59120486865374, + "learning_rate": 1.4393369959262247e-05, + "loss": 1.5131, + "step": 62298 + }, + { + "epoch": 0.75, + "grad_norm": 3.5269786212942806, + "learning_rate": 1.4392845171591166e-05, + "loss": 0.9601, + "step": 62301 + }, + { + "epoch": 0.75, + "grad_norm": 7.323984810615278, + "learning_rate": 1.4392320368928886e-05, + "loss": 1.2597, + "step": 62304 + }, + { + "epoch": 0.75, + "grad_norm": 4.997824737020323, + "learning_rate": 1.4391795551277199e-05, + "loss": 1.3546, + "step": 62307 + }, + { + "epoch": 0.75, + "grad_norm": 6.606695870089704, + "learning_rate": 1.4391270718637896e-05, + "loss": 1.4776, + "step": 62310 + }, + { + "epoch": 0.75, + "grad_norm": 16.704949810444, + "learning_rate": 1.4390745871012764e-05, + "loss": 1.4968, + "step": 62313 + }, + { + "epoch": 0.75, + "grad_norm": 4.472406161036123, + "learning_rate": 1.4390221008403604e-05, + "loss": 1.6191, + "step": 62316 + }, + { + "epoch": 0.75, + "grad_norm": 19.600191288180714, + "learning_rate": 1.4389696130812197e-05, + "loss": 1.8746, + "step": 62319 + }, + { + "epoch": 0.75, + "grad_norm": 15.257574392355053, + "learning_rate": 1.438917123824034e-05, + "loss": 1.4633, + "step": 62322 + }, + { + "epoch": 0.75, + "grad_norm": 31.866246148753092, + "learning_rate": 1.438864633068982e-05, + "loss": 1.4712, + "step": 62325 + }, + { + "epoch": 0.75, + "grad_norm": 33.90188486150516, + "learning_rate": 1.4388121408162437e-05, + "loss": 1.5697, + "step": 62328 + }, + { + "epoch": 0.75, + "grad_norm": 7.8198126717282594, + "learning_rate": 1.4387596470659971e-05, + "loss": 1.7017, + "step": 62331 + }, + { + "epoch": 0.75, + "grad_norm": 2.8436085244111986, + "learning_rate": 1.4387071518184219e-05, + "loss": 1.254, + "step": 62334 + }, + { + "epoch": 0.75, + "grad_norm": 2.4063362747962276, + "learning_rate": 1.4386546550736973e-05, + "loss": 1.1284, + "step": 62337 + }, + { + "epoch": 0.75, + "grad_norm": 21.063899398899917, + "learning_rate": 1.438602156832002e-05, + "loss": 1.1613, + "step": 62340 + }, + { + "epoch": 0.75, + "grad_norm": 13.683331703577025, + "learning_rate": 1.4385496570935162e-05, + "loss": 1.2978, + "step": 62343 + }, + { + "epoch": 0.75, + "grad_norm": 15.519963757692649, + "learning_rate": 1.4384971558584177e-05, + "loss": 1.5733, + "step": 62346 + }, + { + "epoch": 0.75, + "grad_norm": 9.81342858004068, + "learning_rate": 1.438444653126887e-05, + "loss": 1.4304, + "step": 62349 + }, + { + "epoch": 0.75, + "grad_norm": 6.721071911119042, + "learning_rate": 1.4383921488991023e-05, + "loss": 1.4215, + "step": 62352 + }, + { + "epoch": 0.75, + "grad_norm": 13.485241184989354, + "learning_rate": 1.4383396431752432e-05, + "loss": 1.1671, + "step": 62355 + }, + { + "epoch": 0.75, + "grad_norm": 10.116181420999103, + "learning_rate": 1.4382871359554886e-05, + "loss": 1.0875, + "step": 62358 + }, + { + "epoch": 0.75, + "grad_norm": 16.58758960166949, + "learning_rate": 1.4382346272400182e-05, + "loss": 1.7078, + "step": 62361 + }, + { + "epoch": 0.75, + "grad_norm": 4.697198091197393, + "learning_rate": 1.4381821170290105e-05, + "loss": 1.5681, + "step": 62364 + }, + { + "epoch": 0.75, + "grad_norm": 18.024816902337847, + "learning_rate": 1.438129605322645e-05, + "loss": 1.4377, + "step": 62367 + }, + { + "epoch": 0.75, + "grad_norm": 14.04981317749421, + "learning_rate": 1.4380770921211017e-05, + "loss": 1.4449, + "step": 62370 + }, + { + "epoch": 0.75, + "grad_norm": 13.0838627089833, + "learning_rate": 1.4380245774245583e-05, + "loss": 1.6201, + "step": 62373 + }, + { + "epoch": 0.75, + "grad_norm": 20.983460475573587, + "learning_rate": 1.4379720612331951e-05, + "loss": 1.6816, + "step": 62376 + }, + { + "epoch": 0.75, + "grad_norm": 8.771650980553673, + "learning_rate": 1.437919543547191e-05, + "loss": 1.4682, + "step": 62379 + }, + { + "epoch": 0.75, + "grad_norm": 26.786220833321302, + "learning_rate": 1.4378670243667255e-05, + "loss": 1.2138, + "step": 62382 + }, + { + "epoch": 0.75, + "grad_norm": 12.668689418909716, + "learning_rate": 1.4378145036919771e-05, + "loss": 1.1726, + "step": 62385 + }, + { + "epoch": 0.75, + "grad_norm": 15.574084517739706, + "learning_rate": 1.4377619815231256e-05, + "loss": 1.7017, + "step": 62388 + }, + { + "epoch": 0.75, + "grad_norm": 32.1942105619755, + "learning_rate": 1.4377094578603504e-05, + "loss": 1.3788, + "step": 62391 + }, + { + "epoch": 0.75, + "grad_norm": 6.181847812243015, + "learning_rate": 1.4376569327038302e-05, + "loss": 1.7317, + "step": 62394 + }, + { + "epoch": 0.75, + "grad_norm": 30.980461520585685, + "learning_rate": 1.4376044060537446e-05, + "loss": 1.2999, + "step": 62397 + }, + { + "epoch": 0.75, + "grad_norm": 17.599798303355826, + "learning_rate": 1.4375518779102728e-05, + "loss": 1.6214, + "step": 62400 + }, + { + "epoch": 0.75, + "grad_norm": 6.495016911797395, + "learning_rate": 1.4374993482735943e-05, + "loss": 1.402, + "step": 62403 + }, + { + "epoch": 0.75, + "grad_norm": 54.29934987224469, + "learning_rate": 1.4374468171438877e-05, + "loss": 1.5107, + "step": 62406 + }, + { + "epoch": 0.75, + "grad_norm": 18.78514289734229, + "learning_rate": 1.4373942845213329e-05, + "loss": 1.5724, + "step": 62409 + }, + { + "epoch": 0.75, + "grad_norm": 7.341907928803975, + "learning_rate": 1.4373417504061089e-05, + "loss": 1.1749, + "step": 62412 + }, + { + "epoch": 0.75, + "grad_norm": 14.80445216800976, + "learning_rate": 1.4372892147983953e-05, + "loss": 1.2395, + "step": 62415 + }, + { + "epoch": 0.75, + "grad_norm": 45.25261172009605, + "learning_rate": 1.4372366776983706e-05, + "loss": 1.7679, + "step": 62418 + }, + { + "epoch": 0.75, + "grad_norm": 31.0534579847651, + "learning_rate": 1.437184139106215e-05, + "loss": 1.267, + "step": 62421 + }, + { + "epoch": 0.75, + "grad_norm": 23.520724645739065, + "learning_rate": 1.4371315990221074e-05, + "loss": 1.4507, + "step": 62424 + }, + { + "epoch": 0.75, + "grad_norm": 50.690231238848604, + "learning_rate": 1.4370790574462267e-05, + "loss": 1.8848, + "step": 62427 + }, + { + "epoch": 0.75, + "grad_norm": 10.82190527551291, + "learning_rate": 1.437026514378753e-05, + "loss": 1.1765, + "step": 62430 + }, + { + "epoch": 0.75, + "grad_norm": 5.637250101125194, + "learning_rate": 1.4369739698198653e-05, + "loss": 1.5187, + "step": 62433 + }, + { + "epoch": 0.75, + "grad_norm": 17.109579935506726, + "learning_rate": 1.4369214237697426e-05, + "loss": 1.2522, + "step": 62436 + }, + { + "epoch": 0.75, + "grad_norm": 10.59971662691803, + "learning_rate": 1.4368688762285645e-05, + "loss": 1.2578, + "step": 62439 + }, + { + "epoch": 0.75, + "grad_norm": 3.7908812991491097, + "learning_rate": 1.4368163271965103e-05, + "loss": 1.1175, + "step": 62442 + }, + { + "epoch": 0.75, + "grad_norm": 13.02585323567627, + "learning_rate": 1.4367637766737594e-05, + "loss": 1.4241, + "step": 62445 + }, + { + "epoch": 0.75, + "grad_norm": 83.59848431310695, + "learning_rate": 1.436711224660491e-05, + "loss": 1.3554, + "step": 62448 + }, + { + "epoch": 0.75, + "grad_norm": 24.980646493633515, + "learning_rate": 1.4366586711568844e-05, + "loss": 1.7715, + "step": 62451 + }, + { + "epoch": 0.75, + "grad_norm": 25.3718165995172, + "learning_rate": 1.436606116163119e-05, + "loss": 1.4587, + "step": 62454 + }, + { + "epoch": 0.75, + "grad_norm": 28.318959836099953, + "learning_rate": 1.4365535596793745e-05, + "loss": 1.7974, + "step": 62457 + }, + { + "epoch": 0.75, + "grad_norm": 53.43627368188594, + "learning_rate": 1.4365010017058296e-05, + "loss": 1.0944, + "step": 62460 + }, + { + "epoch": 0.75, + "grad_norm": 15.208075886855976, + "learning_rate": 1.4364484422426643e-05, + "loss": 1.0829, + "step": 62463 + }, + { + "epoch": 0.75, + "grad_norm": 48.446922318603825, + "learning_rate": 1.4363958812900576e-05, + "loss": 1.4594, + "step": 62466 + }, + { + "epoch": 0.75, + "grad_norm": 7.957824802457542, + "learning_rate": 1.436343318848189e-05, + "loss": 1.7217, + "step": 62469 + }, + { + "epoch": 0.75, + "grad_norm": 8.451188673003776, + "learning_rate": 1.4362907549172375e-05, + "loss": 1.4529, + "step": 62472 + }, + { + "epoch": 0.75, + "grad_norm": 22.608489864072997, + "learning_rate": 1.4362381894973831e-05, + "loss": 1.5682, + "step": 62475 + }, + { + "epoch": 0.75, + "grad_norm": 18.177165989445463, + "learning_rate": 1.436185622588805e-05, + "loss": 1.3104, + "step": 62478 + }, + { + "epoch": 0.75, + "grad_norm": 11.635808351926483, + "learning_rate": 1.436133054191682e-05, + "loss": 1.4661, + "step": 62481 + }, + { + "epoch": 0.75, + "grad_norm": 20.223836497752735, + "learning_rate": 1.4360804843061944e-05, + "loss": 1.3725, + "step": 62484 + }, + { + "epoch": 0.75, + "grad_norm": 8.75251924681742, + "learning_rate": 1.436027912932521e-05, + "loss": 1.4169, + "step": 62487 + }, + { + "epoch": 0.75, + "grad_norm": 17.012634176319047, + "learning_rate": 1.4359753400708413e-05, + "loss": 1.3452, + "step": 62490 + }, + { + "epoch": 0.75, + "grad_norm": 21.730267996906303, + "learning_rate": 1.435922765721335e-05, + "loss": 1.3169, + "step": 62493 + }, + { + "epoch": 0.75, + "grad_norm": 15.156723350681194, + "learning_rate": 1.4358701898841814e-05, + "loss": 1.5441, + "step": 62496 + }, + { + "epoch": 0.75, + "grad_norm": 8.140563548487833, + "learning_rate": 1.4358176125595598e-05, + "loss": 1.8154, + "step": 62499 + }, + { + "epoch": 0.75, + "grad_norm": 9.437044561885555, + "learning_rate": 1.4357650337476491e-05, + "loss": 1.7505, + "step": 62502 + }, + { + "epoch": 0.75, + "grad_norm": 12.484436347850638, + "learning_rate": 1.43571245344863e-05, + "loss": 1.2031, + "step": 62505 + }, + { + "epoch": 0.75, + "grad_norm": 12.129884527570635, + "learning_rate": 1.4356598716626811e-05, + "loss": 1.338, + "step": 62508 + }, + { + "epoch": 0.75, + "grad_norm": 20.33249820164343, + "learning_rate": 1.4356072883899819e-05, + "loss": 1.2452, + "step": 62511 + }, + { + "epoch": 0.75, + "grad_norm": 25.16990707009944, + "learning_rate": 1.4355547036307119e-05, + "loss": 1.1727, + "step": 62514 + }, + { + "epoch": 0.75, + "grad_norm": 24.28646428238137, + "learning_rate": 1.4355021173850505e-05, + "loss": 1.4895, + "step": 62517 + }, + { + "epoch": 0.75, + "grad_norm": 17.98699487841245, + "learning_rate": 1.4354495296531773e-05, + "loss": 1.3722, + "step": 62520 + }, + { + "epoch": 0.75, + "grad_norm": 28.572108183500937, + "learning_rate": 1.435396940435272e-05, + "loss": 1.3816, + "step": 62523 + }, + { + "epoch": 0.75, + "grad_norm": 18.181948353828002, + "learning_rate": 1.4353443497315134e-05, + "loss": 1.32, + "step": 62526 + }, + { + "epoch": 0.75, + "grad_norm": 63.00189533736054, + "learning_rate": 1.4352917575420815e-05, + "loss": 1.4191, + "step": 62529 + }, + { + "epoch": 0.75, + "grad_norm": 18.071648092288633, + "learning_rate": 1.4352391638671557e-05, + "loss": 1.5161, + "step": 62532 + }, + { + "epoch": 0.75, + "grad_norm": 25.11192137025138, + "learning_rate": 1.435186568706915e-05, + "loss": 1.335, + "step": 62535 + }, + { + "epoch": 0.75, + "grad_norm": 12.02452370291651, + "learning_rate": 1.4351339720615398e-05, + "loss": 1.6988, + "step": 62538 + }, + { + "epoch": 0.75, + "grad_norm": 70.43782671840589, + "learning_rate": 1.4350813739312088e-05, + "loss": 1.3145, + "step": 62541 + }, + { + "epoch": 0.75, + "grad_norm": 40.135901565354196, + "learning_rate": 1.435028774316102e-05, + "loss": 1.4836, + "step": 62544 + }, + { + "epoch": 0.75, + "grad_norm": 8.521935379783342, + "learning_rate": 1.4349761732163985e-05, + "loss": 0.9879, + "step": 62547 + }, + { + "epoch": 0.75, + "grad_norm": 49.4447855940315, + "learning_rate": 1.434923570632278e-05, + "loss": 1.2383, + "step": 62550 + }, + { + "epoch": 0.75, + "grad_norm": 15.603800973912609, + "learning_rate": 1.4348709665639202e-05, + "loss": 1.2125, + "step": 62553 + }, + { + "epoch": 0.75, + "grad_norm": 21.773543795793834, + "learning_rate": 1.4348183610115041e-05, + "loss": 1.4968, + "step": 62556 + }, + { + "epoch": 0.75, + "grad_norm": 35.911283021048725, + "learning_rate": 1.4347657539752098e-05, + "loss": 1.5664, + "step": 62559 + }, + { + "epoch": 0.75, + "grad_norm": 52.7051305781587, + "learning_rate": 1.4347131454552166e-05, + "loss": 1.4497, + "step": 62562 + }, + { + "epoch": 0.75, + "grad_norm": 23.693102090371617, + "learning_rate": 1.434660535451704e-05, + "loss": 1.2423, + "step": 62565 + }, + { + "epoch": 0.75, + "grad_norm": 7.885126828226511, + "learning_rate": 1.4346079239648513e-05, + "loss": 1.2598, + "step": 62568 + }, + { + "epoch": 0.75, + "grad_norm": 14.29818592683256, + "learning_rate": 1.4345553109948387e-05, + "loss": 1.3718, + "step": 62571 + }, + { + "epoch": 0.75, + "grad_norm": 73.85774219560483, + "learning_rate": 1.4345026965418451e-05, + "loss": 1.3234, + "step": 62574 + }, + { + "epoch": 0.75, + "grad_norm": 23.266521577648945, + "learning_rate": 1.4344500806060505e-05, + "loss": 1.3832, + "step": 62577 + }, + { + "epoch": 0.75, + "grad_norm": 28.581706062061606, + "learning_rate": 1.4343974631876339e-05, + "loss": 1.5993, + "step": 62580 + }, + { + "epoch": 0.75, + "grad_norm": 24.639590228651315, + "learning_rate": 1.4343448442867756e-05, + "loss": 1.4992, + "step": 62583 + }, + { + "epoch": 0.75, + "grad_norm": 8.059242849135888, + "learning_rate": 1.4342922239036545e-05, + "loss": 1.4192, + "step": 62586 + }, + { + "epoch": 0.75, + "grad_norm": 53.71531882388953, + "learning_rate": 1.4342396020384506e-05, + "loss": 1.5346, + "step": 62589 + }, + { + "epoch": 0.75, + "grad_norm": 13.916081566992137, + "learning_rate": 1.4341869786913435e-05, + "loss": 1.3481, + "step": 62592 + }, + { + "epoch": 0.75, + "grad_norm": 18.94039162380381, + "learning_rate": 1.4341343538625126e-05, + "loss": 1.7236, + "step": 62595 + }, + { + "epoch": 0.75, + "grad_norm": 35.1100606265857, + "learning_rate": 1.4340817275521374e-05, + "loss": 0.986, + "step": 62598 + }, + { + "epoch": 0.75, + "grad_norm": 11.99733678018328, + "learning_rate": 1.4340290997603977e-05, + "loss": 1.4325, + "step": 62601 + }, + { + "epoch": 0.75, + "grad_norm": 11.853494994587162, + "learning_rate": 1.4339764704874728e-05, + "loss": 1.4398, + "step": 62604 + }, + { + "epoch": 0.75, + "grad_norm": 7.146960009266825, + "learning_rate": 1.433923839733543e-05, + "loss": 1.2804, + "step": 62607 + }, + { + "epoch": 0.75, + "grad_norm": 18.08339111342818, + "learning_rate": 1.4338712074987869e-05, + "loss": 1.3972, + "step": 62610 + }, + { + "epoch": 0.75, + "grad_norm": 15.527816137122548, + "learning_rate": 1.4338185737833848e-05, + "loss": 1.3716, + "step": 62613 + }, + { + "epoch": 0.75, + "grad_norm": 39.52156643853323, + "learning_rate": 1.4337659385875164e-05, + "loss": 1.015, + "step": 62616 + }, + { + "epoch": 0.75, + "grad_norm": 10.670487998850446, + "learning_rate": 1.4337133019113609e-05, + "loss": 1.5735, + "step": 62619 + }, + { + "epoch": 0.75, + "grad_norm": 15.46403231279512, + "learning_rate": 1.433660663755098e-05, + "loss": 1.1573, + "step": 62622 + }, + { + "epoch": 0.75, + "grad_norm": 11.184971636858933, + "learning_rate": 1.4336080241189076e-05, + "loss": 1.1147, + "step": 62625 + }, + { + "epoch": 0.75, + "grad_norm": 6.2003337897548345, + "learning_rate": 1.4335553830029694e-05, + "loss": 1.6875, + "step": 62628 + }, + { + "epoch": 0.75, + "grad_norm": 5.816065979318575, + "learning_rate": 1.4335027404074626e-05, + "loss": 1.0472, + "step": 62631 + }, + { + "epoch": 0.75, + "grad_norm": 9.475548391208944, + "learning_rate": 1.433450096332567e-05, + "loss": 1.0124, + "step": 62634 + }, + { + "epoch": 0.75, + "grad_norm": 2.6849907962236013, + "learning_rate": 1.4333974507784626e-05, + "loss": 1.088, + "step": 62637 + }, + { + "epoch": 0.75, + "grad_norm": 19.83777869809453, + "learning_rate": 1.4333448037453288e-05, + "loss": 1.1554, + "step": 62640 + }, + { + "epoch": 0.75, + "grad_norm": 2.990083640436342, + "learning_rate": 1.4332921552333451e-05, + "loss": 1.2691, + "step": 62643 + }, + { + "epoch": 0.75, + "grad_norm": 76.54666656657984, + "learning_rate": 1.4332395052426914e-05, + "loss": 1.637, + "step": 62646 + }, + { + "epoch": 0.75, + "grad_norm": 15.274277988336776, + "learning_rate": 1.4331868537735477e-05, + "loss": 1.5464, + "step": 62649 + }, + { + "epoch": 0.75, + "grad_norm": 23.395052360963053, + "learning_rate": 1.4331342008260927e-05, + "loss": 1.75, + "step": 62652 + }, + { + "epoch": 0.75, + "grad_norm": 4.8345001532728125, + "learning_rate": 1.4330815464005068e-05, + "loss": 1.3842, + "step": 62655 + }, + { + "epoch": 0.75, + "grad_norm": 6.528329955307958, + "learning_rate": 1.43302889049697e-05, + "loss": 1.3182, + "step": 62658 + }, + { + "epoch": 0.75, + "grad_norm": 18.41788383287987, + "learning_rate": 1.4329762331156612e-05, + "loss": 1.5114, + "step": 62661 + }, + { + "epoch": 0.75, + "grad_norm": 17.653938155101564, + "learning_rate": 1.4329235742567604e-05, + "loss": 1.6062, + "step": 62664 + }, + { + "epoch": 0.75, + "grad_norm": 3.760961661910125, + "learning_rate": 1.4328709139204478e-05, + "loss": 1.5676, + "step": 62667 + }, + { + "epoch": 0.75, + "grad_norm": 16.563351948172727, + "learning_rate": 1.4328182521069023e-05, + "loss": 1.3437, + "step": 62670 + }, + { + "epoch": 0.75, + "grad_norm": 8.855149316892058, + "learning_rate": 1.4327655888163041e-05, + "loss": 1.2562, + "step": 62673 + }, + { + "epoch": 0.75, + "grad_norm": 17.66926972373065, + "learning_rate": 1.4327129240488329e-05, + "loss": 1.3236, + "step": 62676 + }, + { + "epoch": 0.75, + "grad_norm": 31.475242098595988, + "learning_rate": 1.4326602578046686e-05, + "loss": 1.4884, + "step": 62679 + }, + { + "epoch": 0.75, + "grad_norm": 37.17240918672417, + "learning_rate": 1.4326075900839902e-05, + "loss": 1.2411, + "step": 62682 + }, + { + "epoch": 0.75, + "grad_norm": 13.660924836954361, + "learning_rate": 1.4325549208869783e-05, + "loss": 1.4812, + "step": 62685 + }, + { + "epoch": 0.75, + "grad_norm": 17.370402025628383, + "learning_rate": 1.432502250213812e-05, + "loss": 1.3303, + "step": 62688 + }, + { + "epoch": 0.75, + "grad_norm": 28.6987314995621, + "learning_rate": 1.4324495780646712e-05, + "loss": 1.2251, + "step": 62691 + }, + { + "epoch": 0.75, + "grad_norm": 7.711600391189806, + "learning_rate": 1.4323969044397361e-05, + "loss": 1.2646, + "step": 62694 + }, + { + "epoch": 0.75, + "grad_norm": 17.72649284985431, + "learning_rate": 1.4323442293391858e-05, + "loss": 1.6918, + "step": 62697 + }, + { + "epoch": 0.75, + "grad_norm": 14.5259120878138, + "learning_rate": 1.432291552763201e-05, + "loss": 1.2654, + "step": 62700 + }, + { + "epoch": 0.75, + "grad_norm": 20.09082280503143, + "learning_rate": 1.43223887471196e-05, + "loss": 1.2333, + "step": 62703 + }, + { + "epoch": 0.75, + "grad_norm": 31.084611732057994, + "learning_rate": 1.432186195185644e-05, + "loss": 1.3214, + "step": 62706 + }, + { + "epoch": 0.75, + "grad_norm": 20.291390376206536, + "learning_rate": 1.4321335141844317e-05, + "loss": 1.1675, + "step": 62709 + }, + { + "epoch": 0.75, + "grad_norm": 10.772075335588607, + "learning_rate": 1.432080831708504e-05, + "loss": 1.4832, + "step": 62712 + }, + { + "epoch": 0.75, + "grad_norm": 9.478189639906585, + "learning_rate": 1.4320281477580395e-05, + "loss": 1.3545, + "step": 62715 + }, + { + "epoch": 0.75, + "grad_norm": 4.763484807331091, + "learning_rate": 1.4319754623332185e-05, + "loss": 1.3971, + "step": 62718 + }, + { + "epoch": 0.75, + "grad_norm": 8.739999561982964, + "learning_rate": 1.4319227754342214e-05, + "loss": 1.306, + "step": 62721 + }, + { + "epoch": 0.75, + "grad_norm": 23.37803539465865, + "learning_rate": 1.4318700870612272e-05, + "loss": 1.479, + "step": 62724 + }, + { + "epoch": 0.75, + "grad_norm": 8.42855672050849, + "learning_rate": 1.4318173972144158e-05, + "loss": 1.3612, + "step": 62727 + }, + { + "epoch": 0.75, + "grad_norm": 8.290459916031669, + "learning_rate": 1.4317647058939671e-05, + "loss": 1.7591, + "step": 62730 + }, + { + "epoch": 0.75, + "grad_norm": 10.31766498444904, + "learning_rate": 1.4317120131000615e-05, + "loss": 1.7045, + "step": 62733 + }, + { + "epoch": 0.75, + "grad_norm": 119.23780701663198, + "learning_rate": 1.4316593188328777e-05, + "loss": 1.6197, + "step": 62736 + }, + { + "epoch": 0.75, + "grad_norm": 5.580810560256067, + "learning_rate": 1.4316066230925964e-05, + "loss": 1.0058, + "step": 62739 + }, + { + "epoch": 0.75, + "grad_norm": 2.795882259835972, + "learning_rate": 1.4315539258793972e-05, + "loss": 1.1345, + "step": 62742 + }, + { + "epoch": 0.75, + "grad_norm": 32.04146444261473, + "learning_rate": 1.4315012271934597e-05, + "loss": 1.3198, + "step": 62745 + }, + { + "epoch": 0.75, + "grad_norm": 36.41789108139263, + "learning_rate": 1.4314485270349641e-05, + "loss": 1.58, + "step": 62748 + }, + { + "epoch": 0.75, + "grad_norm": 8.47485437354245, + "learning_rate": 1.4313958254040899e-05, + "loss": 1.555, + "step": 62751 + }, + { + "epoch": 0.75, + "grad_norm": 13.298404700250176, + "learning_rate": 1.4313431223010174e-05, + "loss": 1.1939, + "step": 62754 + }, + { + "epoch": 0.75, + "grad_norm": 28.796207915883617, + "learning_rate": 1.4312904177259262e-05, + "loss": 1.0208, + "step": 62757 + }, + { + "epoch": 0.75, + "grad_norm": 22.714802362162214, + "learning_rate": 1.4312377116789959e-05, + "loss": 1.7203, + "step": 62760 + }, + { + "epoch": 0.75, + "grad_norm": 8.344158658817424, + "learning_rate": 1.4311850041604067e-05, + "loss": 1.1877, + "step": 62763 + }, + { + "epoch": 0.75, + "grad_norm": 17.83338631317238, + "learning_rate": 1.4311322951703388e-05, + "loss": 1.2269, + "step": 62766 + }, + { + "epoch": 0.75, + "grad_norm": 13.020246054712882, + "learning_rate": 1.4310795847089712e-05, + "loss": 1.3931, + "step": 62769 + }, + { + "epoch": 0.75, + "grad_norm": 3.9269180549968645, + "learning_rate": 1.4310268727764842e-05, + "loss": 1.2872, + "step": 62772 + }, + { + "epoch": 0.75, + "grad_norm": 6.203403528048234, + "learning_rate": 1.430974159373058e-05, + "loss": 1.2976, + "step": 62775 + }, + { + "epoch": 0.75, + "grad_norm": 10.896342410337637, + "learning_rate": 1.4309214444988722e-05, + "loss": 1.0917, + "step": 62778 + }, + { + "epoch": 0.75, + "grad_norm": 22.787102674612697, + "learning_rate": 1.4308687281541065e-05, + "loss": 1.309, + "step": 62781 + }, + { + "epoch": 0.75, + "grad_norm": 30.915685525127984, + "learning_rate": 1.4308160103389411e-05, + "loss": 1.3429, + "step": 62784 + }, + { + "epoch": 0.75, + "grad_norm": 8.31918774148593, + "learning_rate": 1.4307632910535562e-05, + "loss": 1.1033, + "step": 62787 + }, + { + "epoch": 0.76, + "grad_norm": 7.189800482357725, + "learning_rate": 1.430710570298131e-05, + "loss": 1.4833, + "step": 62790 + }, + { + "epoch": 0.76, + "grad_norm": 7.184742651261784, + "learning_rate": 1.430657848072846e-05, + "loss": 1.2984, + "step": 62793 + }, + { + "epoch": 0.76, + "grad_norm": 38.47934106266844, + "learning_rate": 1.4306051243778804e-05, + "loss": 1.6634, + "step": 62796 + }, + { + "epoch": 0.76, + "grad_norm": 8.3267841404083, + "learning_rate": 1.430552399213415e-05, + "loss": 1.3832, + "step": 62799 + }, + { + "epoch": 0.76, + "grad_norm": 17.269097833515296, + "learning_rate": 1.4304996725796292e-05, + "loss": 1.3776, + "step": 62802 + }, + { + "epoch": 0.76, + "grad_norm": 10.895647869351395, + "learning_rate": 1.4304469444767031e-05, + "loss": 1.3306, + "step": 62805 + }, + { + "epoch": 0.76, + "grad_norm": 11.957666944150091, + "learning_rate": 1.4303942149048168e-05, + "loss": 1.4755, + "step": 62808 + }, + { + "epoch": 0.76, + "grad_norm": 7.23918930516587, + "learning_rate": 1.4303414838641501e-05, + "loss": 1.1707, + "step": 62811 + }, + { + "epoch": 0.76, + "grad_norm": 14.649835466654853, + "learning_rate": 1.4302887513548828e-05, + "loss": 1.3875, + "step": 62814 + }, + { + "epoch": 0.76, + "grad_norm": 15.751841106947683, + "learning_rate": 1.430236017377195e-05, + "loss": 1.323, + "step": 62817 + }, + { + "epoch": 0.76, + "grad_norm": 6.222145501440086, + "learning_rate": 1.4301832819312666e-05, + "loss": 1.5584, + "step": 62820 + }, + { + "epoch": 0.76, + "grad_norm": 11.235294131936179, + "learning_rate": 1.4301305450172775e-05, + "loss": 1.3897, + "step": 62823 + }, + { + "epoch": 0.76, + "grad_norm": 16.34575922289642, + "learning_rate": 1.4300778066354078e-05, + "loss": 1.4819, + "step": 62826 + }, + { + "epoch": 0.76, + "grad_norm": 21.286585780938907, + "learning_rate": 1.4300250667858378e-05, + "loss": 1.5037, + "step": 62829 + }, + { + "epoch": 0.76, + "grad_norm": 22.08919506274347, + "learning_rate": 1.4299723254687468e-05, + "loss": 1.3666, + "step": 62832 + }, + { + "epoch": 0.76, + "grad_norm": 14.164002389254236, + "learning_rate": 1.4299195826843153e-05, + "loss": 1.3791, + "step": 62835 + }, + { + "epoch": 0.76, + "grad_norm": 17.78596713361961, + "learning_rate": 1.429866838432723e-05, + "loss": 1.5918, + "step": 62838 + }, + { + "epoch": 0.76, + "grad_norm": 9.546630736446687, + "learning_rate": 1.4298140927141502e-05, + "loss": 1.4025, + "step": 62841 + }, + { + "epoch": 0.76, + "grad_norm": 7.262483418386577, + "learning_rate": 1.4297613455287764e-05, + "loss": 1.1392, + "step": 62844 + }, + { + "epoch": 0.76, + "grad_norm": 21.521805905459505, + "learning_rate": 1.429708596876782e-05, + "loss": 1.6452, + "step": 62847 + }, + { + "epoch": 0.76, + "grad_norm": 23.88243230304812, + "learning_rate": 1.4296558467583473e-05, + "loss": 1.4339, + "step": 62850 + }, + { + "epoch": 0.76, + "grad_norm": 16.40947076973556, + "learning_rate": 1.4296030951736515e-05, + "loss": 1.1824, + "step": 62853 + }, + { + "epoch": 0.76, + "grad_norm": 7.762385003665202, + "learning_rate": 1.4295503421228753e-05, + "loss": 1.7047, + "step": 62856 + }, + { + "epoch": 0.76, + "grad_norm": 18.04143924658164, + "learning_rate": 1.4294975876061987e-05, + "loss": 1.4849, + "step": 62859 + }, + { + "epoch": 0.76, + "grad_norm": 12.799141209055241, + "learning_rate": 1.4294448316238014e-05, + "loss": 1.4486, + "step": 62862 + }, + { + "epoch": 0.76, + "grad_norm": 32.72891128410958, + "learning_rate": 1.4293920741758633e-05, + "loss": 1.2254, + "step": 62865 + }, + { + "epoch": 0.76, + "grad_norm": 9.338862026124787, + "learning_rate": 1.4293393152625649e-05, + "loss": 1.5355, + "step": 62868 + }, + { + "epoch": 0.76, + "grad_norm": 21.603435654024235, + "learning_rate": 1.4292865548840862e-05, + "loss": 1.0235, + "step": 62871 + }, + { + "epoch": 0.76, + "grad_norm": 31.500028354302525, + "learning_rate": 1.429233793040607e-05, + "loss": 1.0179, + "step": 62874 + }, + { + "epoch": 0.76, + "grad_norm": 85.52317458472307, + "learning_rate": 1.4291810297323074e-05, + "loss": 1.5918, + "step": 62877 + }, + { + "epoch": 0.76, + "grad_norm": 39.06053840699926, + "learning_rate": 1.4291282649593678e-05, + "loss": 1.535, + "step": 62880 + }, + { + "epoch": 0.76, + "grad_norm": 12.800513004515057, + "learning_rate": 1.429075498721968e-05, + "loss": 1.4134, + "step": 62883 + }, + { + "epoch": 0.76, + "grad_norm": 17.92554533049466, + "learning_rate": 1.4290227310202876e-05, + "loss": 1.2277, + "step": 62886 + }, + { + "epoch": 0.76, + "grad_norm": 14.206509732930396, + "learning_rate": 1.4289699618545075e-05, + "loss": 1.1512, + "step": 62889 + }, + { + "epoch": 0.76, + "grad_norm": 17.15580335548746, + "learning_rate": 1.4289171912248075e-05, + "loss": 1.073, + "step": 62892 + }, + { + "epoch": 0.76, + "grad_norm": 14.696084645995327, + "learning_rate": 1.4288644191313675e-05, + "loss": 1.2192, + "step": 62895 + }, + { + "epoch": 0.76, + "grad_norm": 10.654612125037849, + "learning_rate": 1.4288116455743675e-05, + "loss": 1.09, + "step": 62898 + }, + { + "epoch": 0.76, + "grad_norm": 18.453967378871447, + "learning_rate": 1.428758870553988e-05, + "loss": 1.6007, + "step": 62901 + }, + { + "epoch": 0.76, + "grad_norm": 3.9055722306524427, + "learning_rate": 1.4287060940704091e-05, + "loss": 1.4296, + "step": 62904 + }, + { + "epoch": 0.76, + "grad_norm": 9.669012656676426, + "learning_rate": 1.4286533161238105e-05, + "loss": 1.3431, + "step": 62907 + }, + { + "epoch": 0.76, + "grad_norm": 10.607014934117732, + "learning_rate": 1.4286005367143724e-05, + "loss": 1.7704, + "step": 62910 + }, + { + "epoch": 0.76, + "grad_norm": 11.288410969953274, + "learning_rate": 1.4285477558422755e-05, + "loss": 1.5146, + "step": 62913 + }, + { + "epoch": 0.76, + "grad_norm": 3.312085079361593, + "learning_rate": 1.4284949735076991e-05, + "loss": 1.288, + "step": 62916 + }, + { + "epoch": 0.76, + "grad_norm": 25.60509858165818, + "learning_rate": 1.4284421897108238e-05, + "loss": 1.3939, + "step": 62919 + }, + { + "epoch": 0.76, + "grad_norm": 13.039189299519194, + "learning_rate": 1.4283894044518296e-05, + "loss": 1.4381, + "step": 62922 + }, + { + "epoch": 0.76, + "grad_norm": 13.341956387203965, + "learning_rate": 1.4283366177308971e-05, + "loss": 1.9083, + "step": 62925 + }, + { + "epoch": 0.76, + "grad_norm": 27.122256647681734, + "learning_rate": 1.4282838295482055e-05, + "loss": 1.2047, + "step": 62928 + }, + { + "epoch": 0.76, + "grad_norm": 11.07006916277431, + "learning_rate": 1.4282310399039354e-05, + "loss": 1.5192, + "step": 62931 + }, + { + "epoch": 0.76, + "grad_norm": 6.329798575874617, + "learning_rate": 1.4281782487982674e-05, + "loss": 1.2838, + "step": 62934 + }, + { + "epoch": 0.76, + "grad_norm": 3.159003000099037, + "learning_rate": 1.4281254562313809e-05, + "loss": 1.5578, + "step": 62937 + }, + { + "epoch": 0.76, + "grad_norm": 17.150373104228827, + "learning_rate": 1.4280726622034564e-05, + "loss": 1.6142, + "step": 62940 + }, + { + "epoch": 0.76, + "grad_norm": 5.298085075360312, + "learning_rate": 1.4280198667146745e-05, + "loss": 1.5612, + "step": 62943 + }, + { + "epoch": 0.76, + "grad_norm": 32.46263206985871, + "learning_rate": 1.4279670697652149e-05, + "loss": 1.1606, + "step": 62946 + }, + { + "epoch": 0.76, + "grad_norm": 5.1187991955575685, + "learning_rate": 1.4279142713552576e-05, + "loss": 1.1287, + "step": 62949 + }, + { + "epoch": 0.76, + "grad_norm": 17.42118938157112, + "learning_rate": 1.427861471484983e-05, + "loss": 1.3286, + "step": 62952 + }, + { + "epoch": 0.76, + "grad_norm": 17.947504983441704, + "learning_rate": 1.4278086701545714e-05, + "loss": 1.1388, + "step": 62955 + }, + { + "epoch": 0.76, + "grad_norm": 11.838530360730168, + "learning_rate": 1.4277558673642032e-05, + "loss": 1.4083, + "step": 62958 + }, + { + "epoch": 0.76, + "grad_norm": 15.274425548073195, + "learning_rate": 1.4277030631140578e-05, + "loss": 1.6781, + "step": 62961 + }, + { + "epoch": 0.76, + "grad_norm": 9.978247646506599, + "learning_rate": 1.4276502574043161e-05, + "loss": 1.5418, + "step": 62964 + }, + { + "epoch": 0.76, + "grad_norm": 16.591004916724025, + "learning_rate": 1.4275974502351582e-05, + "loss": 1.3367, + "step": 62967 + }, + { + "epoch": 0.76, + "grad_norm": 12.475732106822806, + "learning_rate": 1.4275446416067642e-05, + "loss": 1.1521, + "step": 62970 + }, + { + "epoch": 0.76, + "grad_norm": 10.126459616320467, + "learning_rate": 1.4274918315193142e-05, + "loss": 1.2917, + "step": 62973 + }, + { + "epoch": 0.76, + "grad_norm": 15.409392302679215, + "learning_rate": 1.4274390199729889e-05, + "loss": 1.4949, + "step": 62976 + }, + { + "epoch": 0.76, + "grad_norm": 29.29075660547965, + "learning_rate": 1.4273862069679678e-05, + "loss": 1.1601, + "step": 62979 + }, + { + "epoch": 0.76, + "grad_norm": 17.070484848967514, + "learning_rate": 1.4273333925044317e-05, + "loss": 1.2686, + "step": 62982 + }, + { + "epoch": 0.76, + "grad_norm": 3.4652593132140814, + "learning_rate": 1.4272805765825605e-05, + "loss": 1.4341, + "step": 62985 + }, + { + "epoch": 0.76, + "grad_norm": 512.2724223667274, + "learning_rate": 1.4272277592025348e-05, + "loss": 1.3718, + "step": 62988 + }, + { + "epoch": 0.76, + "grad_norm": 7.3453241712062, + "learning_rate": 1.4271749403645347e-05, + "loss": 1.5527, + "step": 62991 + }, + { + "epoch": 0.76, + "grad_norm": 5.480949653286727, + "learning_rate": 1.4271221200687402e-05, + "loss": 1.5416, + "step": 62994 + }, + { + "epoch": 0.76, + "grad_norm": 8.489109317356197, + "learning_rate": 1.4270692983153317e-05, + "loss": 1.3312, + "step": 62997 + }, + { + "epoch": 0.76, + "grad_norm": 5.795748080933273, + "learning_rate": 1.4270164751044898e-05, + "loss": 1.3476, + "step": 63000 + }, + { + "epoch": 0.76, + "grad_norm": 5.381338639408543, + "learning_rate": 1.4269636504363944e-05, + "loss": 1.5461, + "step": 63003 + }, + { + "epoch": 0.76, + "grad_norm": 13.509990842613135, + "learning_rate": 1.4269108243112255e-05, + "loss": 1.5967, + "step": 63006 + }, + { + "epoch": 0.76, + "grad_norm": 136.87751422008017, + "learning_rate": 1.4268579967291643e-05, + "loss": 1.2461, + "step": 63009 + }, + { + "epoch": 0.76, + "grad_norm": 27.00332113562281, + "learning_rate": 1.42680516769039e-05, + "loss": 1.0724, + "step": 63012 + }, + { + "epoch": 0.76, + "grad_norm": 9.042079681522907, + "learning_rate": 1.4267523371950836e-05, + "loss": 1.2157, + "step": 63015 + }, + { + "epoch": 0.76, + "grad_norm": 6.175963796365888, + "learning_rate": 1.4266995052434252e-05, + "loss": 1.5959, + "step": 63018 + }, + { + "epoch": 0.76, + "grad_norm": 18.590278659942907, + "learning_rate": 1.426646671835595e-05, + "loss": 1.4539, + "step": 63021 + }, + { + "epoch": 0.76, + "grad_norm": 15.47762683410059, + "learning_rate": 1.4265938369717737e-05, + "loss": 1.3538, + "step": 63024 + }, + { + "epoch": 0.76, + "grad_norm": 25.316737101929984, + "learning_rate": 1.426541000652141e-05, + "loss": 1.4823, + "step": 63027 + }, + { + "epoch": 0.76, + "grad_norm": 12.55184194104439, + "learning_rate": 1.4264881628768773e-05, + "loss": 1.7069, + "step": 63030 + }, + { + "epoch": 0.76, + "grad_norm": 18.441136312379985, + "learning_rate": 1.4264353236461636e-05, + "loss": 1.2553, + "step": 63033 + }, + { + "epoch": 0.76, + "grad_norm": 16.02466855430227, + "learning_rate": 1.4263824829601798e-05, + "loss": 1.4611, + "step": 63036 + }, + { + "epoch": 0.76, + "grad_norm": 12.29842416827799, + "learning_rate": 1.4263296408191056e-05, + "loss": 1.162, + "step": 63039 + }, + { + "epoch": 0.76, + "grad_norm": 10.192064293655708, + "learning_rate": 1.4262767972231222e-05, + "loss": 0.9857, + "step": 63042 + }, + { + "epoch": 0.76, + "grad_norm": 8.617181523574114, + "learning_rate": 1.4262239521724096e-05, + "loss": 1.2893, + "step": 63045 + }, + { + "epoch": 0.76, + "grad_norm": 12.391566321724737, + "learning_rate": 1.426171105667148e-05, + "loss": 1.2944, + "step": 63048 + }, + { + "epoch": 0.76, + "grad_norm": 9.185492900321837, + "learning_rate": 1.4261182577075184e-05, + "loss": 1.2329, + "step": 63051 + }, + { + "epoch": 0.76, + "grad_norm": 33.0842462019577, + "learning_rate": 1.4260654082937004e-05, + "loss": 1.4718, + "step": 63054 + }, + { + "epoch": 0.76, + "grad_norm": 8.07190045534495, + "learning_rate": 1.4260125574258747e-05, + "loss": 1.7135, + "step": 63057 + }, + { + "epoch": 0.76, + "grad_norm": 13.511286220223258, + "learning_rate": 1.4259597051042214e-05, + "loss": 1.8111, + "step": 63060 + }, + { + "epoch": 0.76, + "grad_norm": 21.33135280697918, + "learning_rate": 1.4259068513289211e-05, + "loss": 1.4509, + "step": 63063 + }, + { + "epoch": 0.76, + "grad_norm": 32.84691951019806, + "learning_rate": 1.4258539961001545e-05, + "loss": 1.3539, + "step": 63066 + }, + { + "epoch": 0.76, + "grad_norm": 43.20524971907065, + "learning_rate": 1.425801139418101e-05, + "loss": 1.5783, + "step": 63069 + }, + { + "epoch": 0.76, + "grad_norm": 15.939244720217678, + "learning_rate": 1.4257482812829419e-05, + "loss": 1.5837, + "step": 63072 + }, + { + "epoch": 0.76, + "grad_norm": 15.195271487824225, + "learning_rate": 1.4256954216948575e-05, + "loss": 1.4141, + "step": 63075 + }, + { + "epoch": 0.76, + "grad_norm": 18.35793134020385, + "learning_rate": 1.4256425606540276e-05, + "loss": 1.2101, + "step": 63078 + }, + { + "epoch": 0.76, + "grad_norm": 18.98055957906918, + "learning_rate": 1.425589698160633e-05, + "loss": 1.2442, + "step": 63081 + }, + { + "epoch": 0.76, + "grad_norm": 14.955186930305167, + "learning_rate": 1.4255368342148544e-05, + "loss": 1.3857, + "step": 63084 + }, + { + "epoch": 0.76, + "grad_norm": 7.420019911438103, + "learning_rate": 1.4254839688168716e-05, + "loss": 1.3703, + "step": 63087 + }, + { + "epoch": 0.76, + "grad_norm": 15.655116740145093, + "learning_rate": 1.4254311019668652e-05, + "loss": 1.6754, + "step": 63090 + }, + { + "epoch": 0.76, + "grad_norm": 21.801659101308726, + "learning_rate": 1.4253782336650156e-05, + "loss": 1.2912, + "step": 63093 + }, + { + "epoch": 0.76, + "grad_norm": 15.611193184309037, + "learning_rate": 1.4253253639115037e-05, + "loss": 1.1861, + "step": 63096 + }, + { + "epoch": 0.76, + "grad_norm": 27.281740490537015, + "learning_rate": 1.4252724927065091e-05, + "loss": 1.9265, + "step": 63099 + }, + { + "epoch": 0.76, + "grad_norm": 3.783997018475644, + "learning_rate": 1.4252196200502129e-05, + "loss": 1.4623, + "step": 63102 + }, + { + "epoch": 0.76, + "grad_norm": 50.61133073898369, + "learning_rate": 1.4251667459427954e-05, + "loss": 1.3531, + "step": 63105 + }, + { + "epoch": 0.76, + "grad_norm": 30.495604315007512, + "learning_rate": 1.4251138703844367e-05, + "loss": 1.4603, + "step": 63108 + }, + { + "epoch": 0.76, + "grad_norm": 5.1191034519367244, + "learning_rate": 1.4250609933753177e-05, + "loss": 1.3152, + "step": 63111 + }, + { + "epoch": 0.76, + "grad_norm": 11.835106446958251, + "learning_rate": 1.4250081149156186e-05, + "loss": 1.3593, + "step": 63114 + }, + { + "epoch": 0.76, + "grad_norm": 45.06057325463936, + "learning_rate": 1.4249552350055199e-05, + "loss": 1.3201, + "step": 63117 + }, + { + "epoch": 0.76, + "grad_norm": 13.384949236890476, + "learning_rate": 1.4249023536452018e-05, + "loss": 1.5347, + "step": 63120 + }, + { + "epoch": 0.76, + "grad_norm": 14.610707369598533, + "learning_rate": 1.424849470834845e-05, + "loss": 1.3427, + "step": 63123 + }, + { + "epoch": 0.76, + "grad_norm": 6.800349391021728, + "learning_rate": 1.4247965865746303e-05, + "loss": 1.0066, + "step": 63126 + }, + { + "epoch": 0.76, + "grad_norm": 10.178042804443761, + "learning_rate": 1.4247437008647378e-05, + "loss": 1.08, + "step": 63129 + }, + { + "epoch": 0.76, + "grad_norm": 18.874967484767286, + "learning_rate": 1.4246908137053481e-05, + "loss": 1.5666, + "step": 63132 + }, + { + "epoch": 0.76, + "grad_norm": 10.677633547964858, + "learning_rate": 1.4246379250966413e-05, + "loss": 1.5745, + "step": 63135 + }, + { + "epoch": 0.76, + "grad_norm": 4.426978631617743, + "learning_rate": 1.4245850350387984e-05, + "loss": 1.1884, + "step": 63138 + }, + { + "epoch": 0.76, + "grad_norm": 17.118803886344306, + "learning_rate": 1.424532143532e-05, + "loss": 1.2707, + "step": 63141 + }, + { + "epoch": 0.76, + "grad_norm": 9.469704145202304, + "learning_rate": 1.424479250576426e-05, + "loss": 1.2193, + "step": 63144 + }, + { + "epoch": 0.76, + "grad_norm": 7.273788418189291, + "learning_rate": 1.4244263561722571e-05, + "loss": 1.1091, + "step": 63147 + }, + { + "epoch": 0.76, + "grad_norm": 31.17200441372486, + "learning_rate": 1.4243734603196741e-05, + "loss": 1.4094, + "step": 63150 + }, + { + "epoch": 0.76, + "grad_norm": 5.387488425711597, + "learning_rate": 1.4243205630188572e-05, + "loss": 1.9093, + "step": 63153 + }, + { + "epoch": 0.76, + "grad_norm": 18.793426379249063, + "learning_rate": 1.424267664269987e-05, + "loss": 1.4479, + "step": 63156 + }, + { + "epoch": 0.76, + "grad_norm": 14.470134044448988, + "learning_rate": 1.4242147640732444e-05, + "loss": 1.8596, + "step": 63159 + }, + { + "epoch": 0.76, + "grad_norm": 20.551615425085945, + "learning_rate": 1.4241618624288095e-05, + "loss": 2.0352, + "step": 63162 + }, + { + "epoch": 0.76, + "grad_norm": 32.4591648832947, + "learning_rate": 1.4241089593368631e-05, + "loss": 1.3387, + "step": 63165 + }, + { + "epoch": 0.76, + "grad_norm": 2.916470112438474, + "learning_rate": 1.4240560547975852e-05, + "loss": 1.4394, + "step": 63168 + }, + { + "epoch": 0.76, + "grad_norm": 9.44699584605871, + "learning_rate": 1.4240031488111573e-05, + "loss": 1.328, + "step": 63171 + }, + { + "epoch": 0.76, + "grad_norm": 9.803967938205414, + "learning_rate": 1.4239502413777589e-05, + "loss": 1.4739, + "step": 63174 + }, + { + "epoch": 0.76, + "grad_norm": 9.096067646863563, + "learning_rate": 1.423897332497571e-05, + "loss": 1.1136, + "step": 63177 + }, + { + "epoch": 0.76, + "grad_norm": 9.310093100587443, + "learning_rate": 1.4238444221707743e-05, + "loss": 1.4662, + "step": 63180 + }, + { + "epoch": 0.76, + "grad_norm": 26.665095742020974, + "learning_rate": 1.4237915103975492e-05, + "loss": 1.4173, + "step": 63183 + }, + { + "epoch": 0.76, + "grad_norm": 21.107196221014096, + "learning_rate": 1.4237385971780766e-05, + "loss": 1.1635, + "step": 63186 + }, + { + "epoch": 0.76, + "grad_norm": 32.66107053979559, + "learning_rate": 1.4236856825125366e-05, + "loss": 1.2489, + "step": 63189 + }, + { + "epoch": 0.76, + "grad_norm": 12.301827659170234, + "learning_rate": 1.4236327664011101e-05, + "loss": 1.2825, + "step": 63192 + }, + { + "epoch": 0.76, + "grad_norm": 8.111039618320245, + "learning_rate": 1.4235798488439772e-05, + "loss": 1.5339, + "step": 63195 + }, + { + "epoch": 0.76, + "grad_norm": 11.529318338965561, + "learning_rate": 1.4235269298413193e-05, + "loss": 1.2151, + "step": 63198 + }, + { + "epoch": 0.76, + "grad_norm": 9.360140573446625, + "learning_rate": 1.4234740093933161e-05, + "loss": 0.8597, + "step": 63201 + }, + { + "epoch": 0.76, + "grad_norm": 28.29728811428532, + "learning_rate": 1.4234210875001488e-05, + "loss": 1.4163, + "step": 63204 + }, + { + "epoch": 0.76, + "grad_norm": 40.96938773669461, + "learning_rate": 1.423368164161998e-05, + "loss": 1.6187, + "step": 63207 + }, + { + "epoch": 0.76, + "grad_norm": 3.5008974549190364, + "learning_rate": 1.4233152393790442e-05, + "loss": 1.5923, + "step": 63210 + }, + { + "epoch": 0.76, + "grad_norm": 9.43233269003757, + "learning_rate": 1.423262313151468e-05, + "loss": 1.2816, + "step": 63213 + }, + { + "epoch": 0.76, + "grad_norm": 14.684526268500266, + "learning_rate": 1.4232093854794498e-05, + "loss": 1.1236, + "step": 63216 + }, + { + "epoch": 0.76, + "grad_norm": 4.450956204836092, + "learning_rate": 1.4231564563631704e-05, + "loss": 1.392, + "step": 63219 + }, + { + "epoch": 0.76, + "grad_norm": 9.685775446890457, + "learning_rate": 1.4231035258028103e-05, + "loss": 1.1073, + "step": 63222 + }, + { + "epoch": 0.76, + "grad_norm": 8.99512145134286, + "learning_rate": 1.4230505937985506e-05, + "loss": 1.3632, + "step": 63225 + }, + { + "epoch": 0.76, + "grad_norm": 18.72616258147704, + "learning_rate": 1.4229976603505713e-05, + "loss": 1.5391, + "step": 63228 + }, + { + "epoch": 0.76, + "grad_norm": 2.4537367680573294, + "learning_rate": 1.4229447254590535e-05, + "loss": 1.3866, + "step": 63231 + }, + { + "epoch": 0.76, + "grad_norm": 27.189932234425196, + "learning_rate": 1.4228917891241779e-05, + "loss": 1.3774, + "step": 63234 + }, + { + "epoch": 0.76, + "grad_norm": 9.392981416805315, + "learning_rate": 1.4228388513461243e-05, + "loss": 1.01, + "step": 63237 + }, + { + "epoch": 0.76, + "grad_norm": 13.727960587236474, + "learning_rate": 1.4227859121250746e-05, + "loss": 1.1459, + "step": 63240 + }, + { + "epoch": 0.76, + "grad_norm": 12.715872988888918, + "learning_rate": 1.4227329714612086e-05, + "loss": 1.1715, + "step": 63243 + }, + { + "epoch": 0.76, + "grad_norm": 33.89430410558075, + "learning_rate": 1.4226800293547077e-05, + "loss": 1.0852, + "step": 63246 + }, + { + "epoch": 0.76, + "grad_norm": 37.52891836942528, + "learning_rate": 1.4226270858057516e-05, + "loss": 1.3769, + "step": 63249 + }, + { + "epoch": 0.76, + "grad_norm": 9.393798835512792, + "learning_rate": 1.4225741408145215e-05, + "loss": 1.3529, + "step": 63252 + }, + { + "epoch": 0.76, + "grad_norm": 27.09469969267799, + "learning_rate": 1.4225211943811985e-05, + "loss": 1.3115, + "step": 63255 + }, + { + "epoch": 0.76, + "grad_norm": 8.280333175599157, + "learning_rate": 1.4224682465059623e-05, + "loss": 1.4122, + "step": 63258 + }, + { + "epoch": 0.76, + "grad_norm": 12.850815032710472, + "learning_rate": 1.4224152971889944e-05, + "loss": 1.3172, + "step": 63261 + }, + { + "epoch": 0.76, + "grad_norm": 6.3505274349725545, + "learning_rate": 1.4223623464304752e-05, + "loss": 2.1422, + "step": 63264 + }, + { + "epoch": 0.76, + "grad_norm": 12.130730292036654, + "learning_rate": 1.4223093942305856e-05, + "loss": 1.5734, + "step": 63267 + }, + { + "epoch": 0.76, + "grad_norm": 22.638096683956952, + "learning_rate": 1.4222564405895061e-05, + "loss": 1.2741, + "step": 63270 + }, + { + "epoch": 0.76, + "grad_norm": 17.121326301053745, + "learning_rate": 1.4222034855074173e-05, + "loss": 1.3512, + "step": 63273 + }, + { + "epoch": 0.76, + "grad_norm": 3.8779664617358898, + "learning_rate": 1.4221505289845003e-05, + "loss": 1.9026, + "step": 63276 + }, + { + "epoch": 0.76, + "grad_norm": 5.417253198000838, + "learning_rate": 1.4220975710209355e-05, + "loss": 1.0092, + "step": 63279 + }, + { + "epoch": 0.76, + "grad_norm": 11.486373339766283, + "learning_rate": 1.4220446116169035e-05, + "loss": 1.5316, + "step": 63282 + }, + { + "epoch": 0.76, + "grad_norm": 16.047946222533376, + "learning_rate": 1.4219916507725855e-05, + "loss": 1.5147, + "step": 63285 + }, + { + "epoch": 0.76, + "grad_norm": 21.33733574891719, + "learning_rate": 1.4219386884881623e-05, + "loss": 1.5951, + "step": 63288 + }, + { + "epoch": 0.76, + "grad_norm": 11.586141064088928, + "learning_rate": 1.4218857247638138e-05, + "loss": 1.5202, + "step": 63291 + }, + { + "epoch": 0.76, + "grad_norm": 177.78906187508662, + "learning_rate": 1.4218327595997216e-05, + "loss": 1.8376, + "step": 63294 + }, + { + "epoch": 0.76, + "grad_norm": 15.73692315825567, + "learning_rate": 1.4217797929960662e-05, + "loss": 1.318, + "step": 63297 + }, + { + "epoch": 0.76, + "grad_norm": 9.827061661229967, + "learning_rate": 1.4217268249530281e-05, + "loss": 1.2178, + "step": 63300 + }, + { + "epoch": 0.76, + "grad_norm": 12.51954247116975, + "learning_rate": 1.421673855470788e-05, + "loss": 1.1423, + "step": 63303 + }, + { + "epoch": 0.76, + "grad_norm": 9.199801687607298, + "learning_rate": 1.4216208845495274e-05, + "loss": 1.3636, + "step": 63306 + }, + { + "epoch": 0.76, + "grad_norm": 17.684402274291728, + "learning_rate": 1.4215679121894263e-05, + "loss": 1.2826, + "step": 63309 + }, + { + "epoch": 0.76, + "grad_norm": 12.720937748691181, + "learning_rate": 1.421514938390666e-05, + "loss": 1.5635, + "step": 63312 + }, + { + "epoch": 0.76, + "grad_norm": 9.048376565705983, + "learning_rate": 1.4214619631534269e-05, + "loss": 1.1142, + "step": 63315 + }, + { + "epoch": 0.76, + "grad_norm": 7.953811081059257, + "learning_rate": 1.4214089864778897e-05, + "loss": 1.4961, + "step": 63318 + }, + { + "epoch": 0.76, + "grad_norm": 35.49922569623146, + "learning_rate": 1.421356008364236e-05, + "loss": 1.8447, + "step": 63321 + }, + { + "epoch": 0.76, + "grad_norm": 12.710294260958733, + "learning_rate": 1.4213030288126453e-05, + "loss": 1.4487, + "step": 63324 + }, + { + "epoch": 0.76, + "grad_norm": 3.9344118936529666, + "learning_rate": 1.4212500478232996e-05, + "loss": 1.4012, + "step": 63327 + }, + { + "epoch": 0.76, + "grad_norm": 8.083880820588504, + "learning_rate": 1.4211970653963791e-05, + "loss": 1.2721, + "step": 63330 + }, + { + "epoch": 0.76, + "grad_norm": 13.202609938384962, + "learning_rate": 1.4211440815320647e-05, + "loss": 1.3699, + "step": 63333 + }, + { + "epoch": 0.76, + "grad_norm": 32.39830698210595, + "learning_rate": 1.421091096230537e-05, + "loss": 1.6581, + "step": 63336 + }, + { + "epoch": 0.76, + "grad_norm": 20.369952834575322, + "learning_rate": 1.4210381094919774e-05, + "loss": 1.3763, + "step": 63339 + }, + { + "epoch": 0.76, + "grad_norm": 9.147719046345161, + "learning_rate": 1.4209851213165667e-05, + "loss": 1.5697, + "step": 63342 + }, + { + "epoch": 0.76, + "grad_norm": 90.84626787773642, + "learning_rate": 1.420932131704485e-05, + "loss": 1.2936, + "step": 63345 + }, + { + "epoch": 0.76, + "grad_norm": 11.68318381591743, + "learning_rate": 1.4208791406559136e-05, + "loss": 1.2857, + "step": 63348 + }, + { + "epoch": 0.76, + "grad_norm": 16.475375020883902, + "learning_rate": 1.4208261481710333e-05, + "loss": 1.5306, + "step": 63351 + }, + { + "epoch": 0.76, + "grad_norm": 2.126721774305351, + "learning_rate": 1.420773154250025e-05, + "loss": 1.3782, + "step": 63354 + }, + { + "epoch": 0.76, + "grad_norm": 3.518267136760873, + "learning_rate": 1.420720158893069e-05, + "loss": 1.5316, + "step": 63357 + }, + { + "epoch": 0.76, + "grad_norm": 14.4101671955086, + "learning_rate": 1.4206671621003472e-05, + "loss": 1.111, + "step": 63360 + }, + { + "epoch": 0.76, + "grad_norm": 4.103322690731552, + "learning_rate": 1.4206141638720398e-05, + "loss": 1.4187, + "step": 63363 + }, + { + "epoch": 0.76, + "grad_norm": 12.549118726451809, + "learning_rate": 1.4205611642083277e-05, + "loss": 1.6069, + "step": 63366 + }, + { + "epoch": 0.76, + "grad_norm": 3.480152268159873, + "learning_rate": 1.4205081631093917e-05, + "loss": 1.231, + "step": 63369 + }, + { + "epoch": 0.76, + "grad_norm": 8.153755198518148, + "learning_rate": 1.420455160575413e-05, + "loss": 1.3273, + "step": 63372 + }, + { + "epoch": 0.76, + "grad_norm": 23.057960834034258, + "learning_rate": 1.4204021566065724e-05, + "loss": 1.4989, + "step": 63375 + }, + { + "epoch": 0.76, + "grad_norm": 28.350137285457254, + "learning_rate": 1.4203491512030503e-05, + "loss": 1.4627, + "step": 63378 + }, + { + "epoch": 0.76, + "grad_norm": 4.9628445309357225, + "learning_rate": 1.4202961443650284e-05, + "loss": 1.2539, + "step": 63381 + }, + { + "epoch": 0.76, + "grad_norm": 43.86232478067105, + "learning_rate": 1.4202431360926868e-05, + "loss": 1.3452, + "step": 63384 + }, + { + "epoch": 0.76, + "grad_norm": 6.335311845149849, + "learning_rate": 1.420190126386207e-05, + "loss": 1.4067, + "step": 63387 + }, + { + "epoch": 0.76, + "grad_norm": 5.880857250812678, + "learning_rate": 1.4201371152457692e-05, + "loss": 0.9445, + "step": 63390 + }, + { + "epoch": 0.76, + "grad_norm": 7.89210132371437, + "learning_rate": 1.4200841026715553e-05, + "loss": 1.2049, + "step": 63393 + }, + { + "epoch": 0.76, + "grad_norm": 4.524395805782503, + "learning_rate": 1.4200310886637454e-05, + "loss": 1.4151, + "step": 63396 + }, + { + "epoch": 0.76, + "grad_norm": 8.819996560924714, + "learning_rate": 1.4199780732225207e-05, + "loss": 1.169, + "step": 63399 + }, + { + "epoch": 0.76, + "grad_norm": 46.541153823792264, + "learning_rate": 1.419925056348062e-05, + "loss": 1.2586, + "step": 63402 + }, + { + "epoch": 0.76, + "grad_norm": 3.7597026186406968, + "learning_rate": 1.4198720380405507e-05, + "loss": 1.4161, + "step": 63405 + }, + { + "epoch": 0.76, + "grad_norm": 9.98160369716013, + "learning_rate": 1.4198190183001672e-05, + "loss": 1.3164, + "step": 63408 + }, + { + "epoch": 0.76, + "grad_norm": 18.774274142823945, + "learning_rate": 1.4197659971270923e-05, + "loss": 1.3893, + "step": 63411 + }, + { + "epoch": 0.76, + "grad_norm": 158.99415060204015, + "learning_rate": 1.4197129745215077e-05, + "loss": 1.8704, + "step": 63414 + }, + { + "epoch": 0.76, + "grad_norm": 18.951989366244362, + "learning_rate": 1.4196599504835937e-05, + "loss": 1.1773, + "step": 63417 + }, + { + "epoch": 0.76, + "grad_norm": 9.218094855911557, + "learning_rate": 1.4196069250135314e-05, + "loss": 1.6804, + "step": 63420 + }, + { + "epoch": 0.76, + "grad_norm": 28.672941526283505, + "learning_rate": 1.4195538981115017e-05, + "loss": 1.6456, + "step": 63423 + }, + { + "epoch": 0.76, + "grad_norm": 11.723570208311779, + "learning_rate": 1.4195008697776859e-05, + "loss": 1.2797, + "step": 63426 + }, + { + "epoch": 0.76, + "grad_norm": 13.317423346749791, + "learning_rate": 1.419447840012265e-05, + "loss": 1.1989, + "step": 63429 + }, + { + "epoch": 0.76, + "grad_norm": 12.275700100801789, + "learning_rate": 1.4193948088154191e-05, + "loss": 1.8116, + "step": 63432 + }, + { + "epoch": 0.76, + "grad_norm": 5.7126784701318645, + "learning_rate": 1.41934177618733e-05, + "loss": 1.384, + "step": 63435 + }, + { + "epoch": 0.76, + "grad_norm": 8.340383719535122, + "learning_rate": 1.4192887421281788e-05, + "loss": 1.2052, + "step": 63438 + }, + { + "epoch": 0.76, + "grad_norm": 5.802480894930679, + "learning_rate": 1.4192357066381458e-05, + "loss": 1.36, + "step": 63441 + }, + { + "epoch": 0.76, + "grad_norm": 9.47823392643814, + "learning_rate": 1.4191826697174123e-05, + "loss": 1.5563, + "step": 63444 + }, + { + "epoch": 0.76, + "grad_norm": 9.918153942341787, + "learning_rate": 1.4191296313661592e-05, + "loss": 1.5424, + "step": 63447 + }, + { + "epoch": 0.76, + "grad_norm": 27.14107835185298, + "learning_rate": 1.4190765915845681e-05, + "loss": 1.4302, + "step": 63450 + }, + { + "epoch": 0.76, + "grad_norm": 13.817156277950845, + "learning_rate": 1.419023550372819e-05, + "loss": 1.7054, + "step": 63453 + }, + { + "epoch": 0.76, + "grad_norm": 3.0922417941425224, + "learning_rate": 1.418970507731094e-05, + "loss": 1.4921, + "step": 63456 + }, + { + "epoch": 0.76, + "grad_norm": 32.36076206961579, + "learning_rate": 1.4189174636595735e-05, + "loss": 1.2583, + "step": 63459 + }, + { + "epoch": 0.76, + "grad_norm": 11.423694698763187, + "learning_rate": 1.4188644181584386e-05, + "loss": 1.4254, + "step": 63462 + }, + { + "epoch": 0.76, + "grad_norm": 45.73941263663458, + "learning_rate": 1.4188113712278699e-05, + "loss": 1.1535, + "step": 63465 + }, + { + "epoch": 0.76, + "grad_norm": 2.313221233915766, + "learning_rate": 1.4187583228680493e-05, + "loss": 1.3249, + "step": 63468 + }, + { + "epoch": 0.76, + "grad_norm": 7.41980767843486, + "learning_rate": 1.418705273079157e-05, + "loss": 1.6792, + "step": 63471 + }, + { + "epoch": 0.76, + "grad_norm": 63.933002752972605, + "learning_rate": 1.4186522218613746e-05, + "loss": 1.5618, + "step": 63474 + }, + { + "epoch": 0.76, + "grad_norm": 30.23424673940073, + "learning_rate": 1.418599169214883e-05, + "loss": 1.1255, + "step": 63477 + }, + { + "epoch": 0.76, + "grad_norm": 8.637668523402176, + "learning_rate": 1.4185461151398632e-05, + "loss": 1.4415, + "step": 63480 + }, + { + "epoch": 0.76, + "grad_norm": 11.509618092291594, + "learning_rate": 1.4184930596364964e-05, + "loss": 1.3673, + "step": 63483 + }, + { + "epoch": 0.76, + "grad_norm": 18.25418028498952, + "learning_rate": 1.4184400027049632e-05, + "loss": 1.3821, + "step": 63486 + }, + { + "epoch": 0.76, + "grad_norm": 13.014019762834627, + "learning_rate": 1.4183869443454452e-05, + "loss": 1.0588, + "step": 63489 + }, + { + "epoch": 0.76, + "grad_norm": 9.668363494078323, + "learning_rate": 1.4183338845581234e-05, + "loss": 1.3399, + "step": 63492 + }, + { + "epoch": 0.76, + "grad_norm": 11.11141976623883, + "learning_rate": 1.4182808233431785e-05, + "loss": 1.0312, + "step": 63495 + }, + { + "epoch": 0.76, + "grad_norm": 61.486256384294826, + "learning_rate": 1.4182277607007917e-05, + "loss": 1.4274, + "step": 63498 + }, + { + "epoch": 0.76, + "grad_norm": 128.78230281476607, + "learning_rate": 1.4181746966311445e-05, + "loss": 1.3088, + "step": 63501 + }, + { + "epoch": 0.76, + "grad_norm": 21.0742786931568, + "learning_rate": 1.4181216311344174e-05, + "loss": 1.4896, + "step": 63504 + }, + { + "epoch": 0.76, + "grad_norm": 33.104743186806814, + "learning_rate": 1.418068564210792e-05, + "loss": 1.346, + "step": 63507 + }, + { + "epoch": 0.76, + "grad_norm": 3.8825217779918977, + "learning_rate": 1.4180154958604492e-05, + "loss": 1.2884, + "step": 63510 + }, + { + "epoch": 0.76, + "grad_norm": 7.915737547538962, + "learning_rate": 1.41796242608357e-05, + "loss": 1.4461, + "step": 63513 + }, + { + "epoch": 0.76, + "grad_norm": 14.051896910982075, + "learning_rate": 1.4179093548803358e-05, + "loss": 1.0743, + "step": 63516 + }, + { + "epoch": 0.76, + "grad_norm": 6.265407754284325, + "learning_rate": 1.417856282250927e-05, + "loss": 1.3039, + "step": 63519 + }, + { + "epoch": 0.76, + "grad_norm": 24.310651193785343, + "learning_rate": 1.4178032081955255e-05, + "loss": 1.4504, + "step": 63522 + }, + { + "epoch": 0.76, + "grad_norm": 8.663392103550647, + "learning_rate": 1.4177501327143124e-05, + "loss": 1.4229, + "step": 63525 + }, + { + "epoch": 0.76, + "grad_norm": 3.135362792531662, + "learning_rate": 1.417697055807468e-05, + "loss": 1.6581, + "step": 63528 + }, + { + "epoch": 0.76, + "grad_norm": 6.721477627981775, + "learning_rate": 1.4176439774751745e-05, + "loss": 1.3856, + "step": 63531 + }, + { + "epoch": 0.76, + "grad_norm": 5.448108809167892, + "learning_rate": 1.4175908977176125e-05, + "loss": 0.9737, + "step": 63534 + }, + { + "epoch": 0.76, + "grad_norm": 2.0110275370525095, + "learning_rate": 1.4175378165349632e-05, + "loss": 1.5267, + "step": 63537 + }, + { + "epoch": 0.76, + "grad_norm": 8.597803677068072, + "learning_rate": 1.4174847339274074e-05, + "loss": 1.4089, + "step": 63540 + }, + { + "epoch": 0.76, + "grad_norm": 6.680609594056335, + "learning_rate": 1.4174316498951266e-05, + "loss": 1.2192, + "step": 63543 + }, + { + "epoch": 0.76, + "grad_norm": 7.172460158483963, + "learning_rate": 1.4173785644383022e-05, + "loss": 1.1001, + "step": 63546 + }, + { + "epoch": 0.76, + "grad_norm": 16.31857358959921, + "learning_rate": 1.417325477557115e-05, + "loss": 1.1207, + "step": 63549 + }, + { + "epoch": 0.76, + "grad_norm": 6.028244717255294, + "learning_rate": 1.4172723892517461e-05, + "loss": 1.1772, + "step": 63552 + }, + { + "epoch": 0.76, + "grad_norm": 3.998342704112695, + "learning_rate": 1.417219299522377e-05, + "loss": 1.2885, + "step": 63555 + }, + { + "epoch": 0.76, + "grad_norm": 23.839754341245833, + "learning_rate": 1.4171662083691891e-05, + "loss": 1.2575, + "step": 63558 + }, + { + "epoch": 0.76, + "grad_norm": 17.376424674159235, + "learning_rate": 1.4171131157923625e-05, + "loss": 1.5313, + "step": 63561 + }, + { + "epoch": 0.76, + "grad_norm": 62.4778937239629, + "learning_rate": 1.4170600217920794e-05, + "loss": 1.5849, + "step": 63564 + }, + { + "epoch": 0.76, + "grad_norm": 17.85045560121177, + "learning_rate": 1.4170069263685207e-05, + "loss": 1.3137, + "step": 63567 + }, + { + "epoch": 0.76, + "grad_norm": 16.72613317142093, + "learning_rate": 1.4169538295218676e-05, + "loss": 1.3026, + "step": 63570 + }, + { + "epoch": 0.76, + "grad_norm": 10.507928263683882, + "learning_rate": 1.4169007312523011e-05, + "loss": 1.3304, + "step": 63573 + }, + { + "epoch": 0.76, + "grad_norm": 8.712961864243551, + "learning_rate": 1.4168476315600027e-05, + "loss": 1.3445, + "step": 63576 + }, + { + "epoch": 0.76, + "grad_norm": 10.609488514352904, + "learning_rate": 1.4167945304451534e-05, + "loss": 1.4531, + "step": 63579 + }, + { + "epoch": 0.76, + "grad_norm": 10.669679027301184, + "learning_rate": 1.4167414279079346e-05, + "loss": 1.0442, + "step": 63582 + }, + { + "epoch": 0.76, + "grad_norm": 3.1158568062868475, + "learning_rate": 1.4166883239485273e-05, + "loss": 1.4268, + "step": 63585 + }, + { + "epoch": 0.76, + "grad_norm": 36.37324367447946, + "learning_rate": 1.4166352185671129e-05, + "loss": 1.8406, + "step": 63588 + }, + { + "epoch": 0.76, + "grad_norm": 26.061837572146423, + "learning_rate": 1.4165821117638726e-05, + "loss": 1.4947, + "step": 63591 + }, + { + "epoch": 0.76, + "grad_norm": 8.860574491491292, + "learning_rate": 1.4165290035389874e-05, + "loss": 1.7922, + "step": 63594 + }, + { + "epoch": 0.76, + "grad_norm": 11.41459887882277, + "learning_rate": 1.4164758938926388e-05, + "loss": 1.1695, + "step": 63597 + }, + { + "epoch": 0.76, + "grad_norm": 8.066791077959264, + "learning_rate": 1.4164227828250083e-05, + "loss": 1.1339, + "step": 63600 + }, + { + "epoch": 0.76, + "grad_norm": 5.986803810436717, + "learning_rate": 1.4163696703362768e-05, + "loss": 1.4654, + "step": 63603 + }, + { + "epoch": 0.76, + "grad_norm": 15.428395419807762, + "learning_rate": 1.4163165564266252e-05, + "loss": 0.8926, + "step": 63606 + }, + { + "epoch": 0.76, + "grad_norm": 6.765719167530677, + "learning_rate": 1.4162634410962355e-05, + "loss": 1.0082, + "step": 63609 + }, + { + "epoch": 0.76, + "grad_norm": 12.325591913974726, + "learning_rate": 1.4162103243452884e-05, + "loss": 1.3904, + "step": 63612 + }, + { + "epoch": 0.76, + "grad_norm": 9.687301046494264, + "learning_rate": 1.4161572061739654e-05, + "loss": 1.3994, + "step": 63615 + }, + { + "epoch": 0.76, + "grad_norm": 22.031842850382606, + "learning_rate": 1.4161040865824478e-05, + "loss": 1.4096, + "step": 63618 + }, + { + "epoch": 0.77, + "grad_norm": 33.04576463750243, + "learning_rate": 1.416050965570917e-05, + "loss": 1.6685, + "step": 63621 + }, + { + "epoch": 0.77, + "grad_norm": 2.953085997946688, + "learning_rate": 1.415997843139554e-05, + "loss": 1.2216, + "step": 63624 + }, + { + "epoch": 0.77, + "grad_norm": 10.192242437992975, + "learning_rate": 1.41594471928854e-05, + "loss": 1.1406, + "step": 63627 + }, + { + "epoch": 0.77, + "grad_norm": 19.461171451072474, + "learning_rate": 1.415891594018057e-05, + "loss": 1.501, + "step": 63630 + }, + { + "epoch": 0.77, + "grad_norm": 7.165821541427396, + "learning_rate": 1.4158384673282852e-05, + "loss": 1.4005, + "step": 63633 + }, + { + "epoch": 0.77, + "grad_norm": 16.091165215845525, + "learning_rate": 1.4157853392194068e-05, + "loss": 1.2971, + "step": 63636 + }, + { + "epoch": 0.77, + "grad_norm": 4.823697344742797, + "learning_rate": 1.4157322096916027e-05, + "loss": 1.258, + "step": 63639 + }, + { + "epoch": 0.77, + "grad_norm": 2.7824243431604074, + "learning_rate": 1.4156790787450545e-05, + "loss": 1.3149, + "step": 63642 + }, + { + "epoch": 0.77, + "grad_norm": 7.938665253874877, + "learning_rate": 1.4156259463799433e-05, + "loss": 1.2555, + "step": 63645 + }, + { + "epoch": 0.77, + "grad_norm": 6.505512577571146, + "learning_rate": 1.4155728125964501e-05, + "loss": 1.1742, + "step": 63648 + }, + { + "epoch": 0.77, + "grad_norm": 7.839555200136509, + "learning_rate": 1.415519677394757e-05, + "loss": 1.7938, + "step": 63651 + }, + { + "epoch": 0.77, + "grad_norm": 10.931800562816642, + "learning_rate": 1.4154665407750447e-05, + "loss": 1.4772, + "step": 63654 + }, + { + "epoch": 0.77, + "grad_norm": 16.576546157533457, + "learning_rate": 1.4154134027374948e-05, + "loss": 1.485, + "step": 63657 + }, + { + "epoch": 0.77, + "grad_norm": 20.910849923717123, + "learning_rate": 1.4153602632822885e-05, + "loss": 1.4024, + "step": 63660 + }, + { + "epoch": 0.77, + "grad_norm": 14.852720745538042, + "learning_rate": 1.4153071224096074e-05, + "loss": 1.2849, + "step": 63663 + }, + { + "epoch": 0.77, + "grad_norm": 9.000085670692716, + "learning_rate": 1.4152539801196326e-05, + "loss": 1.4508, + "step": 63666 + }, + { + "epoch": 0.77, + "grad_norm": 9.212467843681832, + "learning_rate": 1.4152008364125454e-05, + "loss": 1.4366, + "step": 63669 + }, + { + "epoch": 0.77, + "grad_norm": 13.05870760748436, + "learning_rate": 1.4151476912885276e-05, + "loss": 1.4255, + "step": 63672 + }, + { + "epoch": 0.77, + "grad_norm": 13.194867557628932, + "learning_rate": 1.4150945447477603e-05, + "loss": 1.6704, + "step": 63675 + }, + { + "epoch": 0.77, + "grad_norm": 16.102839224425544, + "learning_rate": 1.4150413967904244e-05, + "loss": 1.195, + "step": 63678 + }, + { + "epoch": 0.77, + "grad_norm": 20.00563904923297, + "learning_rate": 1.4149882474167018e-05, + "loss": 1.2219, + "step": 63681 + }, + { + "epoch": 0.77, + "grad_norm": 20.23797521823308, + "learning_rate": 1.4149350966267743e-05, + "loss": 1.6114, + "step": 63684 + }, + { + "epoch": 0.77, + "grad_norm": 9.892590931810583, + "learning_rate": 1.414881944420822e-05, + "loss": 1.179, + "step": 63687 + }, + { + "epoch": 0.77, + "grad_norm": 15.269548189506969, + "learning_rate": 1.4148287907990277e-05, + "loss": 1.4738, + "step": 63690 + }, + { + "epoch": 0.77, + "grad_norm": 18.910179393544734, + "learning_rate": 1.4147756357615721e-05, + "loss": 1.1581, + "step": 63693 + }, + { + "epoch": 0.77, + "grad_norm": 7.6686834023156525, + "learning_rate": 1.4147224793086362e-05, + "loss": 1.0046, + "step": 63696 + }, + { + "epoch": 0.77, + "grad_norm": 19.692252565785964, + "learning_rate": 1.4146693214404024e-05, + "loss": 1.3346, + "step": 63699 + }, + { + "epoch": 0.77, + "grad_norm": 11.757691326528649, + "learning_rate": 1.414616162157051e-05, + "loss": 1.4183, + "step": 63702 + }, + { + "epoch": 0.77, + "grad_norm": 7.06457540659456, + "learning_rate": 1.4145630014587646e-05, + "loss": 1.3122, + "step": 63705 + }, + { + "epoch": 0.77, + "grad_norm": 9.458821314859001, + "learning_rate": 1.4145098393457235e-05, + "loss": 1.2281, + "step": 63708 + }, + { + "epoch": 0.77, + "grad_norm": 17.727037683583116, + "learning_rate": 1.4144566758181099e-05, + "loss": 1.2016, + "step": 63711 + }, + { + "epoch": 0.77, + "grad_norm": 6.211331448528442, + "learning_rate": 1.4144035108761051e-05, + "loss": 1.7804, + "step": 63714 + }, + { + "epoch": 0.77, + "grad_norm": 7.581171856848203, + "learning_rate": 1.4143503445198898e-05, + "loss": 1.3787, + "step": 63717 + }, + { + "epoch": 0.77, + "grad_norm": 22.084625785116284, + "learning_rate": 1.4142971767496464e-05, + "loss": 1.5197, + "step": 63720 + }, + { + "epoch": 0.77, + "grad_norm": 6.344578728922846, + "learning_rate": 1.414244007565556e-05, + "loss": 1.439, + "step": 63723 + }, + { + "epoch": 0.77, + "grad_norm": 38.757899848532446, + "learning_rate": 1.4141908369678002e-05, + "loss": 1.7375, + "step": 63726 + }, + { + "epoch": 0.77, + "grad_norm": 5.783218940995892, + "learning_rate": 1.4141376649565599e-05, + "loss": 1.5384, + "step": 63729 + }, + { + "epoch": 0.77, + "grad_norm": 18.971702451132963, + "learning_rate": 1.4140844915320171e-05, + "loss": 1.1187, + "step": 63732 + }, + { + "epoch": 0.77, + "grad_norm": 6.308128384419732, + "learning_rate": 1.4140313166943529e-05, + "loss": 1.3612, + "step": 63735 + }, + { + "epoch": 0.77, + "grad_norm": 6.387296068079834, + "learning_rate": 1.4139781404437493e-05, + "loss": 1.4119, + "step": 63738 + }, + { + "epoch": 0.77, + "grad_norm": 7.437315112248786, + "learning_rate": 1.413924962780387e-05, + "loss": 1.3374, + "step": 63741 + }, + { + "epoch": 0.77, + "grad_norm": 19.432695253015726, + "learning_rate": 1.413871783704448e-05, + "loss": 1.42, + "step": 63744 + }, + { + "epoch": 0.77, + "grad_norm": 10.163067136477375, + "learning_rate": 1.4138186032161141e-05, + "loss": 1.5576, + "step": 63747 + }, + { + "epoch": 0.77, + "grad_norm": 10.298459636619762, + "learning_rate": 1.4137654213155658e-05, + "loss": 1.5091, + "step": 63750 + }, + { + "epoch": 0.77, + "grad_norm": 40.10335319817708, + "learning_rate": 1.4137122380029855e-05, + "loss": 1.4082, + "step": 63753 + }, + { + "epoch": 0.77, + "grad_norm": 15.333383153608784, + "learning_rate": 1.4136590532785542e-05, + "loss": 1.18, + "step": 63756 + }, + { + "epoch": 0.77, + "grad_norm": 29.514140926376857, + "learning_rate": 1.4136058671424537e-05, + "loss": 1.622, + "step": 63759 + }, + { + "epoch": 0.77, + "grad_norm": 7.15205299731674, + "learning_rate": 1.4135526795948651e-05, + "loss": 1.2821, + "step": 63762 + }, + { + "epoch": 0.77, + "grad_norm": 5.689984396607862, + "learning_rate": 1.4134994906359705e-05, + "loss": 1.1605, + "step": 63765 + }, + { + "epoch": 0.77, + "grad_norm": 6.715868477700994, + "learning_rate": 1.413446300265951e-05, + "loss": 1.4945, + "step": 63768 + }, + { + "epoch": 0.77, + "grad_norm": 4.97721511672445, + "learning_rate": 1.4133931084849879e-05, + "loss": 1.3531, + "step": 63771 + }, + { + "epoch": 0.77, + "grad_norm": 12.564048489279093, + "learning_rate": 1.4133399152932632e-05, + "loss": 1.395, + "step": 63774 + }, + { + "epoch": 0.77, + "grad_norm": 8.49610485392453, + "learning_rate": 1.4132867206909583e-05, + "loss": 1.3261, + "step": 63777 + }, + { + "epoch": 0.77, + "grad_norm": 6.109359462744202, + "learning_rate": 1.4132335246782547e-05, + "loss": 1.2911, + "step": 63780 + }, + { + "epoch": 0.77, + "grad_norm": 42.56939721223883, + "learning_rate": 1.413180327255334e-05, + "loss": 1.3995, + "step": 63783 + }, + { + "epoch": 0.77, + "grad_norm": 23.55591437820872, + "learning_rate": 1.4131271284223776e-05, + "loss": 1.3208, + "step": 63786 + }, + { + "epoch": 0.77, + "grad_norm": 5.11436352045077, + "learning_rate": 1.413073928179567e-05, + "loss": 1.4774, + "step": 63789 + }, + { + "epoch": 0.77, + "grad_norm": 20.281469883385935, + "learning_rate": 1.413020726527084e-05, + "loss": 1.2081, + "step": 63792 + }, + { + "epoch": 0.77, + "grad_norm": 177.06155729450293, + "learning_rate": 1.4129675234651099e-05, + "loss": 1.4679, + "step": 63795 + }, + { + "epoch": 0.77, + "grad_norm": 21.299216301087604, + "learning_rate": 1.4129143189938264e-05, + "loss": 1.2482, + "step": 63798 + }, + { + "epoch": 0.77, + "grad_norm": 22.59276390661163, + "learning_rate": 1.4128611131134153e-05, + "loss": 1.7505, + "step": 63801 + }, + { + "epoch": 0.77, + "grad_norm": 20.548297175167047, + "learning_rate": 1.4128079058240576e-05, + "loss": 1.4914, + "step": 63804 + }, + { + "epoch": 0.77, + "grad_norm": 3.2446527766414435, + "learning_rate": 1.4127546971259354e-05, + "loss": 1.3424, + "step": 63807 + }, + { + "epoch": 0.77, + "grad_norm": 36.94103498202826, + "learning_rate": 1.4127014870192302e-05, + "loss": 1.3784, + "step": 63810 + }, + { + "epoch": 0.77, + "grad_norm": 58.246154055909464, + "learning_rate": 1.4126482755041232e-05, + "loss": 1.2975, + "step": 63813 + }, + { + "epoch": 0.77, + "grad_norm": 3.721807993639973, + "learning_rate": 1.4125950625807965e-05, + "loss": 1.3709, + "step": 63816 + }, + { + "epoch": 0.77, + "grad_norm": 22.61455291082519, + "learning_rate": 1.4125418482494313e-05, + "loss": 1.5007, + "step": 63819 + }, + { + "epoch": 0.77, + "grad_norm": 21.455246364824692, + "learning_rate": 1.4124886325102097e-05, + "loss": 1.1174, + "step": 63822 + }, + { + "epoch": 0.77, + "grad_norm": 10.418496731260932, + "learning_rate": 1.4124354153633124e-05, + "loss": 1.3406, + "step": 63825 + }, + { + "epoch": 0.77, + "grad_norm": 4.605117388902245, + "learning_rate": 1.4123821968089222e-05, + "loss": 1.3007, + "step": 63828 + }, + { + "epoch": 0.77, + "grad_norm": 8.42546993774703, + "learning_rate": 1.4123289768472197e-05, + "loss": 1.2059, + "step": 63831 + }, + { + "epoch": 0.77, + "grad_norm": 20.028591469538885, + "learning_rate": 1.4122757554783872e-05, + "loss": 1.442, + "step": 63834 + }, + { + "epoch": 0.77, + "grad_norm": 21.55673411001378, + "learning_rate": 1.4122225327026055e-05, + "loss": 1.6488, + "step": 63837 + }, + { + "epoch": 0.77, + "grad_norm": 5.369250839073018, + "learning_rate": 1.4121693085200574e-05, + "loss": 1.6282, + "step": 63840 + }, + { + "epoch": 0.77, + "grad_norm": 13.767901799851305, + "learning_rate": 1.4121160829309237e-05, + "loss": 1.2016, + "step": 63843 + }, + { + "epoch": 0.77, + "grad_norm": 19.063617971513178, + "learning_rate": 1.4120628559353863e-05, + "loss": 1.3353, + "step": 63846 + }, + { + "epoch": 0.77, + "grad_norm": 12.320163441234468, + "learning_rate": 1.4120096275336265e-05, + "loss": 1.1498, + "step": 63849 + }, + { + "epoch": 0.77, + "grad_norm": 2.5795262396209777, + "learning_rate": 1.4119563977258266e-05, + "loss": 1.4386, + "step": 63852 + }, + { + "epoch": 0.77, + "grad_norm": 14.074031606782807, + "learning_rate": 1.4119031665121678e-05, + "loss": 0.882, + "step": 63855 + }, + { + "epoch": 0.77, + "grad_norm": 40.89988398705769, + "learning_rate": 1.4118499338928317e-05, + "loss": 1.2475, + "step": 63858 + }, + { + "epoch": 0.77, + "grad_norm": 30.450543344095514, + "learning_rate": 1.4117966998680003e-05, + "loss": 1.2239, + "step": 63861 + }, + { + "epoch": 0.77, + "grad_norm": 14.157493327236791, + "learning_rate": 1.4117434644378552e-05, + "loss": 1.1063, + "step": 63864 + }, + { + "epoch": 0.77, + "grad_norm": 26.151488580601306, + "learning_rate": 1.4116902276025778e-05, + "loss": 1.1099, + "step": 63867 + }, + { + "epoch": 0.77, + "grad_norm": 4.780575518073219, + "learning_rate": 1.4116369893623498e-05, + "loss": 1.5065, + "step": 63870 + }, + { + "epoch": 0.77, + "grad_norm": 5.812772096250048, + "learning_rate": 1.4115837497173533e-05, + "loss": 1.0028, + "step": 63873 + }, + { + "epoch": 0.77, + "grad_norm": 17.402131682869552, + "learning_rate": 1.4115305086677697e-05, + "loss": 1.2788, + "step": 63876 + }, + { + "epoch": 0.77, + "grad_norm": 19.216777456394183, + "learning_rate": 1.4114772662137804e-05, + "loss": 1.1966, + "step": 63879 + }, + { + "epoch": 0.77, + "grad_norm": 47.76322757919785, + "learning_rate": 1.4114240223555677e-05, + "loss": 1.4899, + "step": 63882 + }, + { + "epoch": 0.77, + "grad_norm": 19.226731713808565, + "learning_rate": 1.411370777093313e-05, + "loss": 1.2537, + "step": 63885 + }, + { + "epoch": 0.77, + "grad_norm": 5.451168338726624, + "learning_rate": 1.411317530427198e-05, + "loss": 1.5006, + "step": 63888 + }, + { + "epoch": 0.77, + "grad_norm": 28.91818365803508, + "learning_rate": 1.411264282357404e-05, + "loss": 1.2675, + "step": 63891 + }, + { + "epoch": 0.77, + "grad_norm": 60.680761004388096, + "learning_rate": 1.4112110328841137e-05, + "loss": 1.3453, + "step": 63894 + }, + { + "epoch": 0.77, + "grad_norm": 8.108202506550043, + "learning_rate": 1.411157782007508e-05, + "loss": 1.1534, + "step": 63897 + }, + { + "epoch": 0.77, + "grad_norm": 56.067123339309894, + "learning_rate": 1.4111045297277693e-05, + "loss": 1.5569, + "step": 63900 + }, + { + "epoch": 0.77, + "grad_norm": 4.253094455521813, + "learning_rate": 1.4110512760450784e-05, + "loss": 1.5665, + "step": 63903 + }, + { + "epoch": 0.77, + "grad_norm": 5.12724160247508, + "learning_rate": 1.4109980209596174e-05, + "loss": 1.1929, + "step": 63906 + }, + { + "epoch": 0.77, + "grad_norm": 11.117732401612054, + "learning_rate": 1.4109447644715688e-05, + "loss": 1.4401, + "step": 63909 + }, + { + "epoch": 0.77, + "grad_norm": 10.935349940547978, + "learning_rate": 1.4108915065811134e-05, + "loss": 1.1165, + "step": 63912 + }, + { + "epoch": 0.77, + "grad_norm": 14.846974384463802, + "learning_rate": 1.4108382472884331e-05, + "loss": 1.2637, + "step": 63915 + }, + { + "epoch": 0.77, + "grad_norm": 20.202824511422158, + "learning_rate": 1.4107849865937105e-05, + "loss": 1.3635, + "step": 63918 + }, + { + "epoch": 0.77, + "grad_norm": 18.575570441558654, + "learning_rate": 1.4107317244971262e-05, + "loss": 1.3731, + "step": 63921 + }, + { + "epoch": 0.77, + "grad_norm": 20.580099047709865, + "learning_rate": 1.4106784609988624e-05, + "loss": 1.0011, + "step": 63924 + }, + { + "epoch": 0.77, + "grad_norm": 13.882720036649923, + "learning_rate": 1.410625196099101e-05, + "loss": 1.4991, + "step": 63927 + }, + { + "epoch": 0.77, + "grad_norm": 20.69913954252924, + "learning_rate": 1.4105719297980241e-05, + "loss": 1.165, + "step": 63930 + }, + { + "epoch": 0.77, + "grad_norm": 7.769983701445545, + "learning_rate": 1.4105186620958127e-05, + "loss": 1.0352, + "step": 63933 + }, + { + "epoch": 0.77, + "grad_norm": 7.832684913647847, + "learning_rate": 1.410465392992649e-05, + "loss": 1.3211, + "step": 63936 + }, + { + "epoch": 0.77, + "grad_norm": 2.4234538411761055, + "learning_rate": 1.4104121224887148e-05, + "loss": 1.5485, + "step": 63939 + }, + { + "epoch": 0.77, + "grad_norm": 25.07889143951376, + "learning_rate": 1.410358850584192e-05, + "loss": 1.374, + "step": 63942 + }, + { + "epoch": 0.77, + "grad_norm": 10.34239798475891, + "learning_rate": 1.410305577279262e-05, + "loss": 1.3466, + "step": 63945 + }, + { + "epoch": 0.77, + "grad_norm": 10.543207340786378, + "learning_rate": 1.4102523025741071e-05, + "loss": 1.3, + "step": 63948 + }, + { + "epoch": 0.77, + "grad_norm": 9.075697323805226, + "learning_rate": 1.410199026468909e-05, + "loss": 1.1893, + "step": 63951 + }, + { + "epoch": 0.77, + "grad_norm": 2.903400186347763, + "learning_rate": 1.410145748963849e-05, + "loss": 1.365, + "step": 63954 + }, + { + "epoch": 0.77, + "grad_norm": 7.5452524097222815, + "learning_rate": 1.4100924700591095e-05, + "loss": 0.9345, + "step": 63957 + }, + { + "epoch": 0.77, + "grad_norm": 2.613087519337907, + "learning_rate": 1.410039189754872e-05, + "loss": 1.1383, + "step": 63960 + }, + { + "epoch": 0.77, + "grad_norm": 20.564712065041704, + "learning_rate": 1.4099859080513185e-05, + "loss": 1.5166, + "step": 63963 + }, + { + "epoch": 0.77, + "grad_norm": 17.150742942050243, + "learning_rate": 1.4099326249486308e-05, + "loss": 1.4009, + "step": 63966 + }, + { + "epoch": 0.77, + "grad_norm": 12.796616839537831, + "learning_rate": 1.4098793404469907e-05, + "loss": 1.3089, + "step": 63969 + }, + { + "epoch": 0.77, + "grad_norm": 22.989076736407124, + "learning_rate": 1.40982605454658e-05, + "loss": 1.3191, + "step": 63972 + }, + { + "epoch": 0.77, + "grad_norm": 14.429769970176018, + "learning_rate": 1.4097727672475806e-05, + "loss": 1.3844, + "step": 63975 + }, + { + "epoch": 0.77, + "grad_norm": 4.854448076966387, + "learning_rate": 1.4097194785501743e-05, + "loss": 1.4659, + "step": 63978 + }, + { + "epoch": 0.77, + "grad_norm": 11.602115133620334, + "learning_rate": 1.4096661884545431e-05, + "loss": 1.2472, + "step": 63981 + }, + { + "epoch": 0.77, + "grad_norm": 11.206086849369175, + "learning_rate": 1.4096128969608686e-05, + "loss": 1.3632, + "step": 63984 + }, + { + "epoch": 0.77, + "grad_norm": 18.752504022561293, + "learning_rate": 1.409559604069333e-05, + "loss": 1.2476, + "step": 63987 + }, + { + "epoch": 0.77, + "grad_norm": 2.399340367676737, + "learning_rate": 1.409506309780118e-05, + "loss": 1.3711, + "step": 63990 + }, + { + "epoch": 0.77, + "grad_norm": 8.921819927123591, + "learning_rate": 1.4094530140934054e-05, + "loss": 1.4875, + "step": 63993 + }, + { + "epoch": 0.77, + "grad_norm": 21.811095260204745, + "learning_rate": 1.4093997170093771e-05, + "loss": 1.5134, + "step": 63996 + }, + { + "epoch": 0.77, + "grad_norm": 26.27074584829481, + "learning_rate": 1.4093464185282152e-05, + "loss": 1.4126, + "step": 63999 + }, + { + "epoch": 0.77, + "grad_norm": 3.991733452099282, + "learning_rate": 1.4092931186501014e-05, + "loss": 1.424, + "step": 64002 + }, + { + "epoch": 0.77, + "grad_norm": 7.725060863219139, + "learning_rate": 1.4092398173752174e-05, + "loss": 0.9122, + "step": 64005 + }, + { + "epoch": 0.77, + "grad_norm": 11.349113356862167, + "learning_rate": 1.4091865147037456e-05, + "loss": 1.336, + "step": 64008 + }, + { + "epoch": 0.77, + "grad_norm": 5.254283379664124, + "learning_rate": 1.4091332106358672e-05, + "loss": 1.5598, + "step": 64011 + }, + { + "epoch": 0.77, + "grad_norm": 14.384555619292133, + "learning_rate": 1.4090799051717648e-05, + "loss": 1.2388, + "step": 64014 + }, + { + "epoch": 0.77, + "grad_norm": 14.137364202389795, + "learning_rate": 1.40902659831162e-05, + "loss": 1.2338, + "step": 64017 + }, + { + "epoch": 0.77, + "grad_norm": 9.729805161127208, + "learning_rate": 1.4089732900556149e-05, + "loss": 1.317, + "step": 64020 + }, + { + "epoch": 0.77, + "grad_norm": 15.030146471607186, + "learning_rate": 1.408919980403931e-05, + "loss": 1.3204, + "step": 64023 + }, + { + "epoch": 0.77, + "grad_norm": 2.9701879069370007, + "learning_rate": 1.4088666693567506e-05, + "loss": 1.6933, + "step": 64026 + }, + { + "epoch": 0.77, + "grad_norm": 27.233721347557363, + "learning_rate": 1.4088133569142557e-05, + "loss": 1.2428, + "step": 64029 + }, + { + "epoch": 0.77, + "grad_norm": 4.237933000772064, + "learning_rate": 1.4087600430766276e-05, + "loss": 1.631, + "step": 64032 + }, + { + "epoch": 0.77, + "grad_norm": 65.48390272474722, + "learning_rate": 1.4087067278440491e-05, + "loss": 1.3547, + "step": 64035 + }, + { + "epoch": 0.77, + "grad_norm": 32.31394986889128, + "learning_rate": 1.4086534112167018e-05, + "loss": 1.5281, + "step": 64038 + }, + { + "epoch": 0.77, + "grad_norm": 15.286141706823148, + "learning_rate": 1.4086000931947674e-05, + "loss": 1.2552, + "step": 64041 + }, + { + "epoch": 0.77, + "grad_norm": 11.213131552672504, + "learning_rate": 1.4085467737784282e-05, + "loss": 1.3, + "step": 64044 + }, + { + "epoch": 0.77, + "grad_norm": 10.799606091254232, + "learning_rate": 1.408493452967866e-05, + "loss": 1.5543, + "step": 64047 + }, + { + "epoch": 0.77, + "grad_norm": 46.83272321809534, + "learning_rate": 1.4084401307632627e-05, + "loss": 1.8075, + "step": 64050 + }, + { + "epoch": 0.77, + "grad_norm": 11.194233651149304, + "learning_rate": 1.4083868071648003e-05, + "loss": 1.2978, + "step": 64053 + }, + { + "epoch": 0.77, + "grad_norm": 17.81144017287399, + "learning_rate": 1.408333482172661e-05, + "loss": 1.0882, + "step": 64056 + }, + { + "epoch": 0.77, + "grad_norm": 9.230686832678952, + "learning_rate": 1.4082801557870264e-05, + "loss": 1.352, + "step": 64059 + }, + { + "epoch": 0.77, + "grad_norm": 31.893065746776188, + "learning_rate": 1.4082268280080792e-05, + "loss": 1.3812, + "step": 64062 + }, + { + "epoch": 0.77, + "grad_norm": 4.27503688672673, + "learning_rate": 1.4081734988360002e-05, + "loss": 1.3429, + "step": 64065 + }, + { + "epoch": 0.77, + "grad_norm": 12.179277431468103, + "learning_rate": 1.4081201682709724e-05, + "loss": 1.247, + "step": 64068 + }, + { + "epoch": 0.77, + "grad_norm": 46.749576377063946, + "learning_rate": 1.4080668363131776e-05, + "loss": 1.4879, + "step": 64071 + }, + { + "epoch": 0.77, + "grad_norm": 7.813742705129909, + "learning_rate": 1.4080135029627972e-05, + "loss": 1.4629, + "step": 64074 + }, + { + "epoch": 0.77, + "grad_norm": 73.25208328130078, + "learning_rate": 1.4079601682200142e-05, + "loss": 1.5771, + "step": 64077 + }, + { + "epoch": 0.77, + "grad_norm": 4.697518448386561, + "learning_rate": 1.40790683208501e-05, + "loss": 1.2275, + "step": 64080 + }, + { + "epoch": 0.77, + "grad_norm": 12.881590446615437, + "learning_rate": 1.4078534945579664e-05, + "loss": 1.2608, + "step": 64083 + }, + { + "epoch": 0.77, + "grad_norm": 10.568577834910187, + "learning_rate": 1.407800155639066e-05, + "loss": 1.837, + "step": 64086 + }, + { + "epoch": 0.77, + "grad_norm": 31.526791653212005, + "learning_rate": 1.4077468153284907e-05, + "loss": 1.2725, + "step": 64089 + }, + { + "epoch": 0.77, + "grad_norm": 9.671741640702283, + "learning_rate": 1.4076934736264221e-05, + "loss": 1.6771, + "step": 64092 + }, + { + "epoch": 0.77, + "grad_norm": 24.40995989848664, + "learning_rate": 1.4076401305330425e-05, + "loss": 1.3553, + "step": 64095 + }, + { + "epoch": 0.77, + "grad_norm": 12.311340556307917, + "learning_rate": 1.407586786048534e-05, + "loss": 1.5436, + "step": 64098 + }, + { + "epoch": 0.77, + "grad_norm": 12.460016592668143, + "learning_rate": 1.4075334401730786e-05, + "loss": 1.7198, + "step": 64101 + }, + { + "epoch": 0.77, + "grad_norm": 13.048084213425586, + "learning_rate": 1.4074800929068586e-05, + "loss": 1.1438, + "step": 64104 + }, + { + "epoch": 0.77, + "grad_norm": 7.3572040094724045, + "learning_rate": 1.4074267442500555e-05, + "loss": 1.6471, + "step": 64107 + }, + { + "epoch": 0.77, + "grad_norm": 9.899415376847653, + "learning_rate": 1.407373394202852e-05, + "loss": 1.1527, + "step": 64110 + }, + { + "epoch": 0.77, + "grad_norm": 9.001132612786364, + "learning_rate": 1.4073200427654297e-05, + "loss": 1.1758, + "step": 64113 + }, + { + "epoch": 0.77, + "grad_norm": 9.16247192368612, + "learning_rate": 1.4072666899379707e-05, + "loss": 1.4924, + "step": 64116 + }, + { + "epoch": 0.77, + "grad_norm": 9.875349236220838, + "learning_rate": 1.4072133357206573e-05, + "loss": 1.656, + "step": 64119 + }, + { + "epoch": 0.77, + "grad_norm": 16.559516072517987, + "learning_rate": 1.4071599801136712e-05, + "loss": 1.2792, + "step": 64122 + }, + { + "epoch": 0.77, + "grad_norm": 9.259561875723408, + "learning_rate": 1.4071066231171953e-05, + "loss": 1.4155, + "step": 64125 + }, + { + "epoch": 0.77, + "grad_norm": 9.701319729889518, + "learning_rate": 1.4070532647314106e-05, + "loss": 1.3859, + "step": 64128 + }, + { + "epoch": 0.77, + "grad_norm": 27.023985631638148, + "learning_rate": 1.4069999049565002e-05, + "loss": 1.4244, + "step": 64131 + }, + { + "epoch": 0.77, + "grad_norm": 13.425990683356668, + "learning_rate": 1.4069465437926453e-05, + "loss": 1.3771, + "step": 64134 + }, + { + "epoch": 0.77, + "grad_norm": 21.444773627114017, + "learning_rate": 1.4068931812400286e-05, + "loss": 1.7378, + "step": 64137 + }, + { + "epoch": 0.77, + "grad_norm": 18.113817108495642, + "learning_rate": 1.406839817298832e-05, + "loss": 1.2813, + "step": 64140 + }, + { + "epoch": 0.77, + "grad_norm": 35.727904462862256, + "learning_rate": 1.4067864519692378e-05, + "loss": 1.8103, + "step": 64143 + }, + { + "epoch": 0.77, + "grad_norm": 32.731870073436575, + "learning_rate": 1.4067330852514277e-05, + "loss": 1.249, + "step": 64146 + }, + { + "epoch": 0.77, + "grad_norm": 13.066628541392912, + "learning_rate": 1.4066797171455842e-05, + "loss": 1.2625, + "step": 64149 + }, + { + "epoch": 0.77, + "grad_norm": 12.855451434030586, + "learning_rate": 1.4066263476518893e-05, + "loss": 1.1753, + "step": 64152 + }, + { + "epoch": 0.77, + "grad_norm": 4.6220185972624135, + "learning_rate": 1.4065729767705251e-05, + "loss": 1.3629, + "step": 64155 + }, + { + "epoch": 0.77, + "grad_norm": 6.657240304574745, + "learning_rate": 1.406519604501674e-05, + "loss": 1.2365, + "step": 64158 + }, + { + "epoch": 0.77, + "grad_norm": 15.210795650502806, + "learning_rate": 1.4064662308455176e-05, + "loss": 1.2305, + "step": 64161 + }, + { + "epoch": 0.77, + "grad_norm": 11.000850522394051, + "learning_rate": 1.406412855802239e-05, + "loss": 1.3246, + "step": 64164 + }, + { + "epoch": 0.77, + "grad_norm": 46.632029913868394, + "learning_rate": 1.406359479372019e-05, + "loss": 1.3799, + "step": 64167 + }, + { + "epoch": 0.77, + "grad_norm": 22.812672771123076, + "learning_rate": 1.4063061015550408e-05, + "loss": 1.1417, + "step": 64170 + }, + { + "epoch": 0.77, + "grad_norm": 3.022828834680775, + "learning_rate": 1.406252722351486e-05, + "loss": 1.0893, + "step": 64173 + }, + { + "epoch": 0.77, + "grad_norm": 28.561140136859365, + "learning_rate": 1.4061993417615372e-05, + "loss": 1.433, + "step": 64176 + }, + { + "epoch": 0.77, + "grad_norm": 9.421899284998341, + "learning_rate": 1.406145959785376e-05, + "loss": 1.192, + "step": 64179 + }, + { + "epoch": 0.77, + "grad_norm": 13.580758722629325, + "learning_rate": 1.4060925764231854e-05, + "loss": 1.328, + "step": 64182 + }, + { + "epoch": 0.77, + "grad_norm": 17.014998138933148, + "learning_rate": 1.406039191675147e-05, + "loss": 1.5495, + "step": 64185 + }, + { + "epoch": 0.77, + "grad_norm": 13.436466674639677, + "learning_rate": 1.4059858055414428e-05, + "loss": 1.3596, + "step": 64188 + }, + { + "epoch": 0.77, + "grad_norm": 62.459697115400516, + "learning_rate": 1.4059324180222558e-05, + "loss": 1.4208, + "step": 64191 + }, + { + "epoch": 0.77, + "grad_norm": 22.61735627460315, + "learning_rate": 1.4058790291177672e-05, + "loss": 1.5326, + "step": 64194 + }, + { + "epoch": 0.77, + "grad_norm": 15.026838030213662, + "learning_rate": 1.4058256388281599e-05, + "loss": 1.0419, + "step": 64197 + }, + { + "epoch": 0.77, + "grad_norm": 70.9275412983303, + "learning_rate": 1.4057722471536157e-05, + "loss": 1.2663, + "step": 64200 + }, + { + "epoch": 0.77, + "grad_norm": 33.53605190526456, + "learning_rate": 1.4057188540943169e-05, + "loss": 1.6208, + "step": 64203 + }, + { + "epoch": 0.77, + "grad_norm": 6.280888462232281, + "learning_rate": 1.4056654596504464e-05, + "loss": 1.3985, + "step": 64206 + }, + { + "epoch": 0.77, + "grad_norm": 7.268702687418431, + "learning_rate": 1.4056120638221854e-05, + "loss": 1.4553, + "step": 64209 + }, + { + "epoch": 0.77, + "grad_norm": 15.45600938378862, + "learning_rate": 1.4055586666097165e-05, + "loss": 1.4838, + "step": 64212 + }, + { + "epoch": 0.77, + "grad_norm": 3.391822048358299, + "learning_rate": 1.4055052680132217e-05, + "loss": 1.475, + "step": 64215 + }, + { + "epoch": 0.77, + "grad_norm": 15.22214098971746, + "learning_rate": 1.4054518680328842e-05, + "loss": 1.556, + "step": 64218 + }, + { + "epoch": 0.77, + "grad_norm": 22.041833055244652, + "learning_rate": 1.405398466668885e-05, + "loss": 1.9052, + "step": 64221 + }, + { + "epoch": 0.77, + "grad_norm": 16.053053878176076, + "learning_rate": 1.4053450639214068e-05, + "loss": 1.5743, + "step": 64224 + }, + { + "epoch": 0.77, + "grad_norm": 16.157081009726, + "learning_rate": 1.4052916597906322e-05, + "loss": 1.4814, + "step": 64227 + }, + { + "epoch": 0.77, + "grad_norm": 14.030417466425439, + "learning_rate": 1.4052382542767427e-05, + "loss": 1.5509, + "step": 64230 + }, + { + "epoch": 0.77, + "grad_norm": 3.110111534543203, + "learning_rate": 1.4051848473799212e-05, + "loss": 1.3255, + "step": 64233 + }, + { + "epoch": 0.77, + "grad_norm": 56.66026070125604, + "learning_rate": 1.4051314391003499e-05, + "loss": 1.7462, + "step": 64236 + }, + { + "epoch": 0.77, + "grad_norm": 8.970349237184184, + "learning_rate": 1.4050780294382111e-05, + "loss": 1.2093, + "step": 64239 + }, + { + "epoch": 0.77, + "grad_norm": 14.080463430418966, + "learning_rate": 1.4050246183936866e-05, + "loss": 1.5731, + "step": 64242 + }, + { + "epoch": 0.77, + "grad_norm": 31.802413758524562, + "learning_rate": 1.404971205966959e-05, + "loss": 1.1898, + "step": 64245 + }, + { + "epoch": 0.77, + "grad_norm": 13.177337074108191, + "learning_rate": 1.4049177921582107e-05, + "loss": 1.2794, + "step": 64248 + }, + { + "epoch": 0.77, + "grad_norm": 3.142374927076672, + "learning_rate": 1.4048643769676238e-05, + "loss": 1.9604, + "step": 64251 + }, + { + "epoch": 0.77, + "grad_norm": 7.737242075946506, + "learning_rate": 1.4048109603953804e-05, + "loss": 1.1308, + "step": 64254 + }, + { + "epoch": 0.77, + "grad_norm": 10.21871061779509, + "learning_rate": 1.4047575424416628e-05, + "loss": 1.5725, + "step": 64257 + }, + { + "epoch": 0.77, + "grad_norm": 5.600054560687346, + "learning_rate": 1.4047041231066541e-05, + "loss": 1.5645, + "step": 64260 + }, + { + "epoch": 0.77, + "grad_norm": 24.816926400051923, + "learning_rate": 1.4046507023905356e-05, + "loss": 1.4953, + "step": 64263 + }, + { + "epoch": 0.77, + "grad_norm": 17.11451852906565, + "learning_rate": 1.4045972802934902e-05, + "loss": 1.5192, + "step": 64266 + }, + { + "epoch": 0.77, + "grad_norm": 9.268456951851782, + "learning_rate": 1.4045438568156999e-05, + "loss": 0.9728, + "step": 64269 + }, + { + "epoch": 0.77, + "grad_norm": 25.840693870427103, + "learning_rate": 1.4044904319573474e-05, + "loss": 1.6608, + "step": 64272 + }, + { + "epoch": 0.77, + "grad_norm": 13.707463161522636, + "learning_rate": 1.4044370057186144e-05, + "loss": 1.2222, + "step": 64275 + }, + { + "epoch": 0.77, + "grad_norm": 6.601904905682858, + "learning_rate": 1.4043835780996837e-05, + "loss": 1.1409, + "step": 64278 + }, + { + "epoch": 0.77, + "grad_norm": 8.268831994707758, + "learning_rate": 1.4043301491007375e-05, + "loss": 1.221, + "step": 64281 + }, + { + "epoch": 0.77, + "grad_norm": 6.793095939806207, + "learning_rate": 1.404276718721958e-05, + "loss": 1.5376, + "step": 64284 + }, + { + "epoch": 0.77, + "grad_norm": 7.682761488240192, + "learning_rate": 1.404223286963528e-05, + "loss": 1.153, + "step": 64287 + }, + { + "epoch": 0.77, + "grad_norm": 2.790736373378536, + "learning_rate": 1.4041698538256295e-05, + "loss": 1.343, + "step": 64290 + }, + { + "epoch": 0.77, + "grad_norm": 47.38818213445392, + "learning_rate": 1.4041164193084447e-05, + "loss": 0.9602, + "step": 64293 + }, + { + "epoch": 0.77, + "grad_norm": 12.529994086185944, + "learning_rate": 1.404062983412156e-05, + "loss": 1.3758, + "step": 64296 + }, + { + "epoch": 0.77, + "grad_norm": 9.357409320060246, + "learning_rate": 1.404009546136946e-05, + "loss": 1.1547, + "step": 64299 + }, + { + "epoch": 0.77, + "grad_norm": 8.35526320612504, + "learning_rate": 1.403956107482997e-05, + "loss": 1.2379, + "step": 64302 + }, + { + "epoch": 0.77, + "grad_norm": 11.801700782440415, + "learning_rate": 1.4039026674504914e-05, + "loss": 1.6179, + "step": 64305 + }, + { + "epoch": 0.77, + "grad_norm": 21.1618432568991, + "learning_rate": 1.4038492260396113e-05, + "loss": 1.0959, + "step": 64308 + }, + { + "epoch": 0.77, + "grad_norm": 13.444100752981676, + "learning_rate": 1.4037957832505393e-05, + "loss": 1.357, + "step": 64311 + }, + { + "epoch": 0.77, + "grad_norm": 21.989305951178576, + "learning_rate": 1.403742339083458e-05, + "loss": 1.6616, + "step": 64314 + }, + { + "epoch": 0.77, + "grad_norm": 15.299222330959712, + "learning_rate": 1.4036888935385489e-05, + "loss": 1.5688, + "step": 64317 + }, + { + "epoch": 0.77, + "grad_norm": 9.372823552480762, + "learning_rate": 1.4036354466159955e-05, + "loss": 1.3661, + "step": 64320 + }, + { + "epoch": 0.77, + "grad_norm": 10.16044575414907, + "learning_rate": 1.4035819983159798e-05, + "loss": 1.427, + "step": 64323 + }, + { + "epoch": 0.77, + "grad_norm": 40.66973316774251, + "learning_rate": 1.4035285486386838e-05, + "loss": 1.1732, + "step": 64326 + }, + { + "epoch": 0.77, + "grad_norm": 11.728127530380016, + "learning_rate": 1.4034750975842904e-05, + "loss": 1.1451, + "step": 64329 + }, + { + "epoch": 0.77, + "grad_norm": 21.699473298360815, + "learning_rate": 1.4034216451529816e-05, + "loss": 1.3896, + "step": 64332 + }, + { + "epoch": 0.77, + "grad_norm": 16.551549532113324, + "learning_rate": 1.4033681913449404e-05, + "loss": 1.1825, + "step": 64335 + }, + { + "epoch": 0.77, + "grad_norm": 16.133096638747833, + "learning_rate": 1.4033147361603484e-05, + "loss": 1.5512, + "step": 64338 + }, + { + "epoch": 0.77, + "grad_norm": 24.378268692390762, + "learning_rate": 1.4032612795993887e-05, + "loss": 1.2052, + "step": 64341 + }, + { + "epoch": 0.77, + "grad_norm": 8.985085165727096, + "learning_rate": 1.4032078216622438e-05, + "loss": 1.5338, + "step": 64344 + }, + { + "epoch": 0.77, + "grad_norm": 16.36247502508372, + "learning_rate": 1.4031543623490954e-05, + "loss": 1.257, + "step": 64347 + }, + { + "epoch": 0.77, + "grad_norm": 14.064374962899585, + "learning_rate": 1.4031009016601264e-05, + "loss": 1.3906, + "step": 64350 + }, + { + "epoch": 0.77, + "grad_norm": 13.167879139322597, + "learning_rate": 1.4030474395955197e-05, + "loss": 1.4447, + "step": 64353 + }, + { + "epoch": 0.77, + "grad_norm": 24.192366599757392, + "learning_rate": 1.402993976155457e-05, + "loss": 1.5197, + "step": 64356 + }, + { + "epoch": 0.77, + "grad_norm": 8.048970310233928, + "learning_rate": 1.4029405113401208e-05, + "loss": 1.0902, + "step": 64359 + }, + { + "epoch": 0.77, + "grad_norm": 48.210454834879805, + "learning_rate": 1.4028870451496938e-05, + "loss": 1.5118, + "step": 64362 + }, + { + "epoch": 0.77, + "grad_norm": 3.8613193947181976, + "learning_rate": 1.4028335775843587e-05, + "loss": 1.2805, + "step": 64365 + }, + { + "epoch": 0.77, + "grad_norm": 16.846431065770915, + "learning_rate": 1.4027801086442976e-05, + "loss": 1.3237, + "step": 64368 + }, + { + "epoch": 0.77, + "grad_norm": 17.3688737561928, + "learning_rate": 1.402726638329693e-05, + "loss": 1.3791, + "step": 64371 + }, + { + "epoch": 0.77, + "grad_norm": 6.022043941152916, + "learning_rate": 1.4026731666407273e-05, + "loss": 0.9087, + "step": 64374 + }, + { + "epoch": 0.77, + "grad_norm": 15.779160765517345, + "learning_rate": 1.4026196935775836e-05, + "loss": 1.4951, + "step": 64377 + }, + { + "epoch": 0.77, + "grad_norm": 16.159387312464272, + "learning_rate": 1.4025662191404435e-05, + "loss": 1.2439, + "step": 64380 + }, + { + "epoch": 0.77, + "grad_norm": 31.224475761017036, + "learning_rate": 1.4025127433294898e-05, + "loss": 1.4733, + "step": 64383 + }, + { + "epoch": 0.77, + "grad_norm": 16.33284140066095, + "learning_rate": 1.4024592661449054e-05, + "loss": 1.7401, + "step": 64386 + }, + { + "epoch": 0.77, + "grad_norm": 10.673644613919913, + "learning_rate": 1.4024057875868724e-05, + "loss": 1.4123, + "step": 64389 + }, + { + "epoch": 0.77, + "grad_norm": 11.165056438824411, + "learning_rate": 1.4023523076555732e-05, + "loss": 1.7404, + "step": 64392 + }, + { + "epoch": 0.77, + "grad_norm": 7.688889373565507, + "learning_rate": 1.4022988263511907e-05, + "loss": 1.2115, + "step": 64395 + }, + { + "epoch": 0.77, + "grad_norm": 29.0264764605622, + "learning_rate": 1.4022453436739074e-05, + "loss": 1.4643, + "step": 64398 + }, + { + "epoch": 0.77, + "grad_norm": 15.324030841389664, + "learning_rate": 1.4021918596239054e-05, + "loss": 1.4982, + "step": 64401 + }, + { + "epoch": 0.77, + "grad_norm": 16.91642664879276, + "learning_rate": 1.4021383742013675e-05, + "loss": 1.4373, + "step": 64404 + }, + { + "epoch": 0.77, + "grad_norm": 12.225467163270899, + "learning_rate": 1.4020848874064761e-05, + "loss": 1.626, + "step": 64407 + }, + { + "epoch": 0.77, + "grad_norm": 13.766685100779737, + "learning_rate": 1.4020313992394139e-05, + "loss": 1.3921, + "step": 64410 + }, + { + "epoch": 0.77, + "grad_norm": 6.9206824083788225, + "learning_rate": 1.4019779097003635e-05, + "loss": 0.9758, + "step": 64413 + }, + { + "epoch": 0.77, + "grad_norm": 56.827143387277914, + "learning_rate": 1.4019244187895069e-05, + "loss": 1.2089, + "step": 64416 + }, + { + "epoch": 0.77, + "grad_norm": 15.576748934232267, + "learning_rate": 1.4018709265070272e-05, + "loss": 1.3565, + "step": 64419 + }, + { + "epoch": 0.77, + "grad_norm": 21.713572556121758, + "learning_rate": 1.401817432853107e-05, + "loss": 1.4345, + "step": 64422 + }, + { + "epoch": 0.77, + "grad_norm": 10.132620317436485, + "learning_rate": 1.4017639378279283e-05, + "loss": 1.137, + "step": 64425 + }, + { + "epoch": 0.77, + "grad_norm": 36.47934792768956, + "learning_rate": 1.401710441431674e-05, + "loss": 1.3188, + "step": 64428 + }, + { + "epoch": 0.77, + "grad_norm": 12.79108375424196, + "learning_rate": 1.4016569436645268e-05, + "loss": 1.0114, + "step": 64431 + }, + { + "epoch": 0.77, + "grad_norm": 13.241759528556996, + "learning_rate": 1.4016034445266693e-05, + "loss": 1.407, + "step": 64434 + }, + { + "epoch": 0.77, + "grad_norm": 20.69807381982672, + "learning_rate": 1.4015499440182837e-05, + "loss": 1.1456, + "step": 64437 + }, + { + "epoch": 0.77, + "grad_norm": 6.4251959757991814, + "learning_rate": 1.4014964421395527e-05, + "loss": 1.7288, + "step": 64440 + }, + { + "epoch": 0.77, + "grad_norm": 10.737264039106943, + "learning_rate": 1.4014429388906592e-05, + "loss": 1.3305, + "step": 64443 + }, + { + "epoch": 0.77, + "grad_norm": 19.03245731170353, + "learning_rate": 1.4013894342717853e-05, + "loss": 1.3169, + "step": 64446 + }, + { + "epoch": 0.77, + "grad_norm": 7.259598694744076, + "learning_rate": 1.401335928283114e-05, + "loss": 0.9958, + "step": 64449 + }, + { + "epoch": 0.78, + "grad_norm": 2.386897813879582, + "learning_rate": 1.4012824209248276e-05, + "loss": 1.4111, + "step": 64452 + }, + { + "epoch": 0.78, + "grad_norm": 22.971458767492024, + "learning_rate": 1.401228912197109e-05, + "loss": 1.3618, + "step": 64455 + }, + { + "epoch": 0.78, + "grad_norm": 9.048724636894383, + "learning_rate": 1.4011754021001405e-05, + "loss": 1.5396, + "step": 64458 + }, + { + "epoch": 0.78, + "grad_norm": 22.32706423142336, + "learning_rate": 1.401121890634105e-05, + "loss": 1.3109, + "step": 64461 + }, + { + "epoch": 0.78, + "grad_norm": 19.19072078925557, + "learning_rate": 1.4010683777991849e-05, + "loss": 1.1383, + "step": 64464 + }, + { + "epoch": 0.78, + "grad_norm": 7.515299569281537, + "learning_rate": 1.401014863595563e-05, + "loss": 1.4055, + "step": 64467 + }, + { + "epoch": 0.78, + "grad_norm": 20.009793725547905, + "learning_rate": 1.4009613480234214e-05, + "loss": 1.5016, + "step": 64470 + }, + { + "epoch": 0.78, + "grad_norm": 10.754373373061084, + "learning_rate": 1.4009078310829436e-05, + "loss": 1.3538, + "step": 64473 + }, + { + "epoch": 0.78, + "grad_norm": 7.827782247926287, + "learning_rate": 1.4008543127743116e-05, + "loss": 1.3455, + "step": 64476 + }, + { + "epoch": 0.78, + "grad_norm": 19.720240653792235, + "learning_rate": 1.4008007930977083e-05, + "loss": 1.3818, + "step": 64479 + }, + { + "epoch": 0.78, + "grad_norm": 7.468065919502427, + "learning_rate": 1.4007472720533162e-05, + "loss": 1.0261, + "step": 64482 + }, + { + "epoch": 0.78, + "grad_norm": 22.36497106813996, + "learning_rate": 1.400693749641318e-05, + "loss": 0.9139, + "step": 64485 + }, + { + "epoch": 0.78, + "grad_norm": 16.92309811664242, + "learning_rate": 1.4006402258618965e-05, + "loss": 1.3811, + "step": 64488 + }, + { + "epoch": 0.78, + "grad_norm": 21.83263549146742, + "learning_rate": 1.400586700715234e-05, + "loss": 1.4956, + "step": 64491 + }, + { + "epoch": 0.78, + "grad_norm": 31.764929379002435, + "learning_rate": 1.4005331742015135e-05, + "loss": 1.9875, + "step": 64494 + }, + { + "epoch": 0.78, + "grad_norm": 8.135462980027908, + "learning_rate": 1.4004796463209175e-05, + "loss": 1.1688, + "step": 64497 + }, + { + "epoch": 0.78, + "grad_norm": 5.844392741747679, + "learning_rate": 1.4004261170736286e-05, + "loss": 1.1061, + "step": 64500 + }, + { + "epoch": 0.78, + "grad_norm": 25.007786875130446, + "learning_rate": 1.40037258645983e-05, + "loss": 1.7528, + "step": 64503 + }, + { + "epoch": 0.78, + "grad_norm": 12.137355640236485, + "learning_rate": 1.4003190544797036e-05, + "loss": 1.8276, + "step": 64506 + }, + { + "epoch": 0.78, + "grad_norm": 3.9640499132980938, + "learning_rate": 1.4002655211334326e-05, + "loss": 1.2967, + "step": 64509 + }, + { + "epoch": 0.78, + "grad_norm": 19.460256023844835, + "learning_rate": 1.4002119864211996e-05, + "loss": 1.7455, + "step": 64512 + }, + { + "epoch": 0.78, + "grad_norm": 30.47618999171147, + "learning_rate": 1.4001584503431872e-05, + "loss": 1.77, + "step": 64515 + }, + { + "epoch": 0.78, + "grad_norm": 7.358184234230174, + "learning_rate": 1.400104912899578e-05, + "loss": 1.2663, + "step": 64518 + }, + { + "epoch": 0.78, + "grad_norm": 14.029177938149507, + "learning_rate": 1.400051374090555e-05, + "loss": 1.3663, + "step": 64521 + }, + { + "epoch": 0.78, + "grad_norm": 8.19657820416484, + "learning_rate": 1.3999978339163007e-05, + "loss": 1.4448, + "step": 64524 + }, + { + "epoch": 0.78, + "grad_norm": 3.3556716170042677, + "learning_rate": 1.3999442923769978e-05, + "loss": 1.5661, + "step": 64527 + }, + { + "epoch": 0.78, + "grad_norm": 20.754309544648148, + "learning_rate": 1.3998907494728294e-05, + "loss": 1.6243, + "step": 64530 + }, + { + "epoch": 0.78, + "grad_norm": 16.265048531348164, + "learning_rate": 1.3998372052039774e-05, + "loss": 1.3457, + "step": 64533 + }, + { + "epoch": 0.78, + "grad_norm": 7.4660287665879865, + "learning_rate": 1.3997836595706256e-05, + "loss": 1.5156, + "step": 64536 + }, + { + "epoch": 0.78, + "grad_norm": 14.616449950963865, + "learning_rate": 1.399730112572956e-05, + "loss": 1.5467, + "step": 64539 + }, + { + "epoch": 0.78, + "grad_norm": 11.19068497597158, + "learning_rate": 1.3996765642111514e-05, + "loss": 1.2576, + "step": 64542 + }, + { + "epoch": 0.78, + "grad_norm": 15.383212511664075, + "learning_rate": 1.3996230144853944e-05, + "loss": 1.1212, + "step": 64545 + }, + { + "epoch": 0.78, + "grad_norm": 20.338373534706403, + "learning_rate": 1.3995694633958685e-05, + "loss": 1.5836, + "step": 64548 + }, + { + "epoch": 0.78, + "grad_norm": 12.740515562504058, + "learning_rate": 1.3995159109427557e-05, + "loss": 1.2101, + "step": 64551 + }, + { + "epoch": 0.78, + "grad_norm": 9.346937727318629, + "learning_rate": 1.3994623571262389e-05, + "loss": 1.1893, + "step": 64554 + }, + { + "epoch": 0.78, + "grad_norm": 21.40051117127529, + "learning_rate": 1.3994088019465011e-05, + "loss": 1.2162, + "step": 64557 + }, + { + "epoch": 0.78, + "grad_norm": 6.647795447074588, + "learning_rate": 1.3993552454037249e-05, + "loss": 1.1946, + "step": 64560 + }, + { + "epoch": 0.78, + "grad_norm": 5.549361396488855, + "learning_rate": 1.3993016874980932e-05, + "loss": 1.2204, + "step": 64563 + }, + { + "epoch": 0.78, + "grad_norm": 29.46450082271026, + "learning_rate": 1.3992481282297884e-05, + "loss": 1.2318, + "step": 64566 + }, + { + "epoch": 0.78, + "grad_norm": 8.134488486657233, + "learning_rate": 1.3991945675989938e-05, + "loss": 1.3538, + "step": 64569 + }, + { + "epoch": 0.78, + "grad_norm": 4.242598138926836, + "learning_rate": 1.3991410056058918e-05, + "loss": 1.3862, + "step": 64572 + }, + { + "epoch": 0.78, + "grad_norm": 6.6271688832353695, + "learning_rate": 1.3990874422506655e-05, + "loss": 1.3871, + "step": 64575 + }, + { + "epoch": 0.78, + "grad_norm": 5.925970429620479, + "learning_rate": 1.3990338775334972e-05, + "loss": 1.1509, + "step": 64578 + }, + { + "epoch": 0.78, + "grad_norm": 10.412953008487492, + "learning_rate": 1.3989803114545701e-05, + "loss": 1.3746, + "step": 64581 + }, + { + "epoch": 0.78, + "grad_norm": 15.035411765898639, + "learning_rate": 1.3989267440140672e-05, + "loss": 1.3398, + "step": 64584 + }, + { + "epoch": 0.78, + "grad_norm": 5.597360668796977, + "learning_rate": 1.3988731752121706e-05, + "loss": 1.1983, + "step": 64587 + }, + { + "epoch": 0.78, + "grad_norm": 10.409470535345196, + "learning_rate": 1.398819605049064e-05, + "loss": 1.7022, + "step": 64590 + }, + { + "epoch": 0.78, + "grad_norm": 16.617677733109463, + "learning_rate": 1.3987660335249293e-05, + "loss": 1.3317, + "step": 64593 + }, + { + "epoch": 0.78, + "grad_norm": 4.992132864720175, + "learning_rate": 1.39871246063995e-05, + "loss": 1.3759, + "step": 64596 + }, + { + "epoch": 0.78, + "grad_norm": 4.637736193746205, + "learning_rate": 1.3986588863943084e-05, + "loss": 1.391, + "step": 64599 + }, + { + "epoch": 0.78, + "grad_norm": 7.800354687869267, + "learning_rate": 1.398605310788188e-05, + "loss": 1.2479, + "step": 64602 + }, + { + "epoch": 0.78, + "grad_norm": 24.776459019435908, + "learning_rate": 1.3985517338217711e-05, + "loss": 1.4492, + "step": 64605 + }, + { + "epoch": 0.78, + "grad_norm": 22.665406536037057, + "learning_rate": 1.3984981554952406e-05, + "loss": 1.3919, + "step": 64608 + }, + { + "epoch": 0.78, + "grad_norm": 27.278380219365367, + "learning_rate": 1.3984445758087794e-05, + "loss": 0.8922, + "step": 64611 + }, + { + "epoch": 0.78, + "grad_norm": 5.0691344532877505, + "learning_rate": 1.3983909947625707e-05, + "loss": 1.4521, + "step": 64614 + }, + { + "epoch": 0.78, + "grad_norm": 25.017058816152854, + "learning_rate": 1.3983374123567968e-05, + "loss": 1.164, + "step": 64617 + }, + { + "epoch": 0.78, + "grad_norm": 29.87874508878179, + "learning_rate": 1.3982838285916407e-05, + "loss": 1.4725, + "step": 64620 + }, + { + "epoch": 0.78, + "grad_norm": 57.89235025878757, + "learning_rate": 1.3982302434672857e-05, + "loss": 1.3604, + "step": 64623 + }, + { + "epoch": 0.78, + "grad_norm": 8.037435499905817, + "learning_rate": 1.398176656983914e-05, + "loss": 1.452, + "step": 64626 + }, + { + "epoch": 0.78, + "grad_norm": 14.432392890050266, + "learning_rate": 1.3981230691417089e-05, + "loss": 1.3296, + "step": 64629 + }, + { + "epoch": 0.78, + "grad_norm": 7.649897076703691, + "learning_rate": 1.398069479940853e-05, + "loss": 1.3967, + "step": 64632 + }, + { + "epoch": 0.78, + "grad_norm": 13.300324750254006, + "learning_rate": 1.3980158893815294e-05, + "loss": 1.4698, + "step": 64635 + }, + { + "epoch": 0.78, + "grad_norm": 6.905766148566995, + "learning_rate": 1.3979622974639214e-05, + "loss": 0.9257, + "step": 64638 + }, + { + "epoch": 0.78, + "grad_norm": 36.50209921813497, + "learning_rate": 1.3979087041882107e-05, + "loss": 1.6889, + "step": 64641 + }, + { + "epoch": 0.78, + "grad_norm": 18.05011307775366, + "learning_rate": 1.3978551095545816e-05, + "loss": 1.4781, + "step": 64644 + }, + { + "epoch": 0.78, + "grad_norm": 8.667570748472148, + "learning_rate": 1.3978015135632159e-05, + "loss": 1.2069, + "step": 64647 + }, + { + "epoch": 0.78, + "grad_norm": 12.456169696592042, + "learning_rate": 1.397747916214297e-05, + "loss": 1.5519, + "step": 64650 + }, + { + "epoch": 0.78, + "grad_norm": 9.30174285634783, + "learning_rate": 1.3976943175080075e-05, + "loss": 1.2948, + "step": 64653 + }, + { + "epoch": 0.78, + "grad_norm": 5.93740203836966, + "learning_rate": 1.3976407174445312e-05, + "loss": 1.1374, + "step": 64656 + }, + { + "epoch": 0.78, + "grad_norm": 22.691031885596477, + "learning_rate": 1.3975871160240497e-05, + "loss": 1.413, + "step": 64659 + }, + { + "epoch": 0.78, + "grad_norm": 19.078017286440033, + "learning_rate": 1.3975335132467466e-05, + "loss": 1.3631, + "step": 64662 + }, + { + "epoch": 0.78, + "grad_norm": 3.371075827753198, + "learning_rate": 1.3974799091128056e-05, + "loss": 1.2398, + "step": 64665 + }, + { + "epoch": 0.78, + "grad_norm": 14.961737255757683, + "learning_rate": 1.397426303622408e-05, + "loss": 1.2437, + "step": 64668 + }, + { + "epoch": 0.78, + "grad_norm": 20.232781726053055, + "learning_rate": 1.397372696775738e-05, + "loss": 1.2912, + "step": 64671 + }, + { + "epoch": 0.78, + "grad_norm": 9.650342765349185, + "learning_rate": 1.3973190885729778e-05, + "loss": 1.6797, + "step": 64674 + }, + { + "epoch": 0.78, + "grad_norm": 12.95702951930135, + "learning_rate": 1.397265479014311e-05, + "loss": 1.4884, + "step": 64677 + }, + { + "epoch": 0.78, + "grad_norm": 7.644877860048002, + "learning_rate": 1.3972118680999201e-05, + "loss": 1.7506, + "step": 64680 + }, + { + "epoch": 0.78, + "grad_norm": 24.772383136582135, + "learning_rate": 1.397158255829988e-05, + "loss": 1.7922, + "step": 64683 + }, + { + "epoch": 0.78, + "grad_norm": 9.802986750338045, + "learning_rate": 1.3971046422046981e-05, + "loss": 1.7746, + "step": 64686 + }, + { + "epoch": 0.78, + "grad_norm": 9.682679144947548, + "learning_rate": 1.3970510272242329e-05, + "loss": 1.3904, + "step": 64689 + }, + { + "epoch": 0.78, + "grad_norm": 7.870032880711532, + "learning_rate": 1.3969974108887756e-05, + "loss": 1.3194, + "step": 64692 + }, + { + "epoch": 0.78, + "grad_norm": 5.235648565006403, + "learning_rate": 1.396943793198509e-05, + "loss": 1.1258, + "step": 64695 + }, + { + "epoch": 0.78, + "grad_norm": 11.558052103126878, + "learning_rate": 1.3968901741536165e-05, + "loss": 1.1749, + "step": 64698 + }, + { + "epoch": 0.78, + "grad_norm": 3.5723125479439433, + "learning_rate": 1.3968365537542808e-05, + "loss": 1.3572, + "step": 64701 + }, + { + "epoch": 0.78, + "grad_norm": 12.524300168783405, + "learning_rate": 1.3967829320006847e-05, + "loss": 1.5406, + "step": 64704 + }, + { + "epoch": 0.78, + "grad_norm": 44.39828916516476, + "learning_rate": 1.3967293088930115e-05, + "loss": 1.4266, + "step": 64707 + }, + { + "epoch": 0.78, + "grad_norm": 14.30160845725915, + "learning_rate": 1.396675684431444e-05, + "loss": 1.5447, + "step": 64710 + }, + { + "epoch": 0.78, + "grad_norm": 10.835415026593383, + "learning_rate": 1.396622058616165e-05, + "loss": 1.5027, + "step": 64713 + }, + { + "epoch": 0.78, + "grad_norm": 16.97847537515112, + "learning_rate": 1.3965684314473577e-05, + "loss": 1.3037, + "step": 64716 + }, + { + "epoch": 0.78, + "grad_norm": 8.453110407289545, + "learning_rate": 1.396514802925206e-05, + "loss": 1.1171, + "step": 64719 + }, + { + "epoch": 0.78, + "grad_norm": 10.180784583047037, + "learning_rate": 1.3964611730498913e-05, + "loss": 1.6791, + "step": 64722 + }, + { + "epoch": 0.78, + "grad_norm": 9.805113300587792, + "learning_rate": 1.396407541821598e-05, + "loss": 0.942, + "step": 64725 + }, + { + "epoch": 0.78, + "grad_norm": 24.448562919299583, + "learning_rate": 1.3963539092405083e-05, + "loss": 1.4869, + "step": 64728 + }, + { + "epoch": 0.78, + "grad_norm": 5.405711668558091, + "learning_rate": 1.3963002753068056e-05, + "loss": 1.2528, + "step": 64731 + }, + { + "epoch": 0.78, + "grad_norm": 17.23809966379832, + "learning_rate": 1.3962466400206724e-05, + "loss": 1.3363, + "step": 64734 + }, + { + "epoch": 0.78, + "grad_norm": 12.885929409918552, + "learning_rate": 1.3961930033822926e-05, + "loss": 1.4006, + "step": 64737 + }, + { + "epoch": 0.78, + "grad_norm": 53.13812967771354, + "learning_rate": 1.3961393653918484e-05, + "loss": 1.4679, + "step": 64740 + }, + { + "epoch": 0.78, + "grad_norm": 9.60968063497247, + "learning_rate": 1.3960857260495235e-05, + "loss": 1.3306, + "step": 64743 + }, + { + "epoch": 0.78, + "grad_norm": 12.396927632573265, + "learning_rate": 1.3960320853555005e-05, + "loss": 1.2736, + "step": 64746 + }, + { + "epoch": 0.78, + "grad_norm": 13.375516395372916, + "learning_rate": 1.3959784433099626e-05, + "loss": 1.6324, + "step": 64749 + }, + { + "epoch": 0.78, + "grad_norm": 39.40385117061587, + "learning_rate": 1.3959247999130934e-05, + "loss": 1.075, + "step": 64752 + }, + { + "epoch": 0.78, + "grad_norm": 9.04700004262897, + "learning_rate": 1.395871155165075e-05, + "loss": 1.3366, + "step": 64755 + }, + { + "epoch": 0.78, + "grad_norm": 12.303112340433175, + "learning_rate": 1.3958175090660912e-05, + "loss": 1.276, + "step": 64758 + }, + { + "epoch": 0.78, + "grad_norm": 15.10506050127026, + "learning_rate": 1.3957638616163247e-05, + "loss": 0.9931, + "step": 64761 + }, + { + "epoch": 0.78, + "grad_norm": 7.580457985349767, + "learning_rate": 1.3957102128159588e-05, + "loss": 1.5637, + "step": 64764 + }, + { + "epoch": 0.78, + "grad_norm": 35.59789817265648, + "learning_rate": 1.3956565626651761e-05, + "loss": 1.5189, + "step": 64767 + }, + { + "epoch": 0.78, + "grad_norm": 122.64001702550603, + "learning_rate": 1.3956029111641603e-05, + "loss": 1.307, + "step": 64770 + }, + { + "epoch": 0.78, + "grad_norm": 15.36242564373642, + "learning_rate": 1.3955492583130946e-05, + "loss": 1.364, + "step": 64773 + }, + { + "epoch": 0.78, + "grad_norm": 19.70785283252396, + "learning_rate": 1.3954956041121612e-05, + "loss": 1.3718, + "step": 64776 + }, + { + "epoch": 0.78, + "grad_norm": 27.412545985500774, + "learning_rate": 1.395441948561544e-05, + "loss": 1.1613, + "step": 64779 + }, + { + "epoch": 0.78, + "grad_norm": 34.92440963498362, + "learning_rate": 1.3953882916614259e-05, + "loss": 1.322, + "step": 64782 + }, + { + "epoch": 0.78, + "grad_norm": 44.487474346074784, + "learning_rate": 1.3953346334119901e-05, + "loss": 1.407, + "step": 64785 + }, + { + "epoch": 0.78, + "grad_norm": 57.39778252459536, + "learning_rate": 1.3952809738134191e-05, + "loss": 1.5428, + "step": 64788 + }, + { + "epoch": 0.78, + "grad_norm": 23.26016301815022, + "learning_rate": 1.3952273128658971e-05, + "loss": 1.3783, + "step": 64791 + }, + { + "epoch": 0.78, + "grad_norm": 4.156374496180197, + "learning_rate": 1.3951736505696065e-05, + "loss": 1.3196, + "step": 64794 + }, + { + "epoch": 0.78, + "grad_norm": 11.48865858817788, + "learning_rate": 1.3951199869247303e-05, + "loss": 1.0452, + "step": 64797 + }, + { + "epoch": 0.78, + "grad_norm": 19.407569499125017, + "learning_rate": 1.3950663219314522e-05, + "loss": 1.3468, + "step": 64800 + }, + { + "epoch": 0.78, + "grad_norm": 11.417099321167202, + "learning_rate": 1.3950126555899548e-05, + "loss": 1.3286, + "step": 64803 + }, + { + "epoch": 0.78, + "grad_norm": 11.11080389094298, + "learning_rate": 1.3949589879004217e-05, + "loss": 1.0433, + "step": 64806 + }, + { + "epoch": 0.78, + "grad_norm": 14.629461706690062, + "learning_rate": 1.3949053188630358e-05, + "loss": 1.8292, + "step": 64809 + }, + { + "epoch": 0.78, + "grad_norm": 23.218198432902394, + "learning_rate": 1.3948516484779801e-05, + "loss": 1.5266, + "step": 64812 + }, + { + "epoch": 0.78, + "grad_norm": 4.290895566151745, + "learning_rate": 1.394797976745438e-05, + "loss": 1.6865, + "step": 64815 + }, + { + "epoch": 0.78, + "grad_norm": 4.488964292784583, + "learning_rate": 1.3947443036655929e-05, + "loss": 1.2607, + "step": 64818 + }, + { + "epoch": 0.78, + "grad_norm": 11.960169191516282, + "learning_rate": 1.3946906292386273e-05, + "loss": 1.4565, + "step": 64821 + }, + { + "epoch": 0.78, + "grad_norm": 38.03761293580538, + "learning_rate": 1.394636953464725e-05, + "loss": 1.3964, + "step": 64824 + }, + { + "epoch": 0.78, + "grad_norm": 18.830013285535493, + "learning_rate": 1.3945832763440689e-05, + "loss": 1.2737, + "step": 64827 + }, + { + "epoch": 0.78, + "grad_norm": 7.8273311156620275, + "learning_rate": 1.394529597876842e-05, + "loss": 1.5157, + "step": 64830 + }, + { + "epoch": 0.78, + "grad_norm": 12.761065282111508, + "learning_rate": 1.3944759180632278e-05, + "loss": 0.9401, + "step": 64833 + }, + { + "epoch": 0.78, + "grad_norm": 19.146800903958944, + "learning_rate": 1.3944222369034098e-05, + "loss": 1.139, + "step": 64836 + }, + { + "epoch": 0.78, + "grad_norm": 7.024862934488928, + "learning_rate": 1.3943685543975703e-05, + "loss": 1.4873, + "step": 64839 + }, + { + "epoch": 0.78, + "grad_norm": 7.6088504960321375, + "learning_rate": 1.3943148705458929e-05, + "loss": 1.2633, + "step": 64842 + }, + { + "epoch": 0.78, + "grad_norm": 19.271923861483828, + "learning_rate": 1.3942611853485613e-05, + "loss": 1.3243, + "step": 64845 + }, + { + "epoch": 0.78, + "grad_norm": 11.673697253136812, + "learning_rate": 1.3942074988057579e-05, + "loss": 1.3589, + "step": 64848 + }, + { + "epoch": 0.78, + "grad_norm": 7.879080972977438, + "learning_rate": 1.3941538109176664e-05, + "loss": 1.3579, + "step": 64851 + }, + { + "epoch": 0.78, + "grad_norm": 11.56471555831464, + "learning_rate": 1.39410012168447e-05, + "loss": 1.5472, + "step": 64854 + }, + { + "epoch": 0.78, + "grad_norm": 11.356789802477056, + "learning_rate": 1.394046431106352e-05, + "loss": 1.5671, + "step": 64857 + }, + { + "epoch": 0.78, + "grad_norm": 28.440416839175064, + "learning_rate": 1.3939927391834953e-05, + "loss": 1.1481, + "step": 64860 + }, + { + "epoch": 0.78, + "grad_norm": 11.759912165051064, + "learning_rate": 1.3939390459160831e-05, + "loss": 1.4703, + "step": 64863 + }, + { + "epoch": 0.78, + "grad_norm": 15.782710897727451, + "learning_rate": 1.3938853513042989e-05, + "loss": 1.2441, + "step": 64866 + }, + { + "epoch": 0.78, + "grad_norm": 6.498136616963808, + "learning_rate": 1.393831655348326e-05, + "loss": 1.3994, + "step": 64869 + }, + { + "epoch": 0.78, + "grad_norm": 8.666164140542318, + "learning_rate": 1.3937779580483478e-05, + "loss": 1.6881, + "step": 64872 + }, + { + "epoch": 0.78, + "grad_norm": 6.229078695015325, + "learning_rate": 1.3937242594045466e-05, + "loss": 1.2273, + "step": 64875 + }, + { + "epoch": 0.78, + "grad_norm": 37.22168665432201, + "learning_rate": 1.3936705594171066e-05, + "loss": 1.5389, + "step": 64878 + }, + { + "epoch": 0.78, + "grad_norm": 13.797001192351289, + "learning_rate": 1.393616858086211e-05, + "loss": 1.4537, + "step": 64881 + }, + { + "epoch": 0.78, + "grad_norm": 16.51747118871321, + "learning_rate": 1.3935631554120429e-05, + "loss": 1.7244, + "step": 64884 + }, + { + "epoch": 0.78, + "grad_norm": 12.136384050708351, + "learning_rate": 1.393509451394785e-05, + "loss": 1.5289, + "step": 64887 + }, + { + "epoch": 0.78, + "grad_norm": 7.274597914711427, + "learning_rate": 1.3934557460346216e-05, + "loss": 1.2786, + "step": 64890 + }, + { + "epoch": 0.78, + "grad_norm": 9.142238935509742, + "learning_rate": 1.3934020393317354e-05, + "loss": 1.2407, + "step": 64893 + }, + { + "epoch": 0.78, + "grad_norm": 6.9569368774563864, + "learning_rate": 1.3933483312863095e-05, + "loss": 1.5264, + "step": 64896 + }, + { + "epoch": 0.78, + "grad_norm": 9.608094589766077, + "learning_rate": 1.3932946218985275e-05, + "loss": 1.4198, + "step": 64899 + }, + { + "epoch": 0.78, + "grad_norm": 6.5538952179693215, + "learning_rate": 1.3932409111685728e-05, + "loss": 1.3856, + "step": 64902 + }, + { + "epoch": 0.78, + "grad_norm": 6.98762939073568, + "learning_rate": 1.3931871990966281e-05, + "loss": 1.4276, + "step": 64905 + }, + { + "epoch": 0.78, + "grad_norm": 7.307987074521166, + "learning_rate": 1.3931334856828773e-05, + "loss": 1.6683, + "step": 64908 + }, + { + "epoch": 0.78, + "grad_norm": 18.106469072763044, + "learning_rate": 1.3930797709275036e-05, + "loss": 1.3232, + "step": 64911 + }, + { + "epoch": 0.78, + "grad_norm": 35.22709168931902, + "learning_rate": 1.3930260548306904e-05, + "loss": 1.4238, + "step": 64914 + }, + { + "epoch": 0.78, + "grad_norm": 4.982335750175324, + "learning_rate": 1.3929723373926206e-05, + "loss": 1.3382, + "step": 64917 + }, + { + "epoch": 0.78, + "grad_norm": 18.62243157530049, + "learning_rate": 1.3929186186134777e-05, + "loss": 1.1774, + "step": 64920 + }, + { + "epoch": 0.78, + "grad_norm": 12.572439809531877, + "learning_rate": 1.3928648984934455e-05, + "loss": 1.7808, + "step": 64923 + }, + { + "epoch": 0.78, + "grad_norm": 8.81096402298208, + "learning_rate": 1.3928111770327066e-05, + "loss": 1.2347, + "step": 64926 + }, + { + "epoch": 0.78, + "grad_norm": 42.66052004794482, + "learning_rate": 1.3927574542314446e-05, + "loss": 1.4196, + "step": 64929 + }, + { + "epoch": 0.78, + "grad_norm": 12.815202164761452, + "learning_rate": 1.392703730089843e-05, + "loss": 1.4304, + "step": 64932 + }, + { + "epoch": 0.78, + "grad_norm": 7.779845056422956, + "learning_rate": 1.3926500046080848e-05, + "loss": 1.2886, + "step": 64935 + }, + { + "epoch": 0.78, + "grad_norm": 11.85875167428973, + "learning_rate": 1.3925962777863538e-05, + "loss": 1.2282, + "step": 64938 + }, + { + "epoch": 0.78, + "grad_norm": 2.9404186109590986, + "learning_rate": 1.392542549624833e-05, + "loss": 1.014, + "step": 64941 + }, + { + "epoch": 0.78, + "grad_norm": 14.596937622812876, + "learning_rate": 1.3924888201237061e-05, + "loss": 1.394, + "step": 64944 + }, + { + "epoch": 0.78, + "grad_norm": 14.461855596524126, + "learning_rate": 1.392435089283156e-05, + "loss": 0.9934, + "step": 64947 + }, + { + "epoch": 0.78, + "grad_norm": 32.60269276921288, + "learning_rate": 1.3923813571033663e-05, + "loss": 1.2551, + "step": 64950 + }, + { + "epoch": 0.78, + "grad_norm": 8.144270636564226, + "learning_rate": 1.3923276235845205e-05, + "loss": 1.3129, + "step": 64953 + }, + { + "epoch": 0.78, + "grad_norm": 8.982592082371006, + "learning_rate": 1.3922738887268017e-05, + "loss": 1.3651, + "step": 64956 + }, + { + "epoch": 0.78, + "grad_norm": 14.270894637329683, + "learning_rate": 1.3922201525303933e-05, + "loss": 1.3774, + "step": 64959 + }, + { + "epoch": 0.78, + "grad_norm": 13.651275707178145, + "learning_rate": 1.392166414995479e-05, + "loss": 1.0498, + "step": 64962 + }, + { + "epoch": 0.78, + "grad_norm": 29.515097290190614, + "learning_rate": 1.3921126761222419e-05, + "loss": 1.2867, + "step": 64965 + }, + { + "epoch": 0.78, + "grad_norm": 61.738387738288004, + "learning_rate": 1.3920589359108656e-05, + "loss": 1.7206, + "step": 64968 + }, + { + "epoch": 0.78, + "grad_norm": 9.784649816309411, + "learning_rate": 1.3920051943615332e-05, + "loss": 1.4496, + "step": 64971 + }, + { + "epoch": 0.78, + "grad_norm": 2.2776256470429472, + "learning_rate": 1.3919514514744283e-05, + "loss": 1.3019, + "step": 64974 + }, + { + "epoch": 0.78, + "grad_norm": 10.500108542379127, + "learning_rate": 1.3918977072497345e-05, + "loss": 1.2192, + "step": 64977 + }, + { + "epoch": 0.78, + "grad_norm": 8.607818205249046, + "learning_rate": 1.3918439616876349e-05, + "loss": 1.072, + "step": 64980 + }, + { + "epoch": 0.78, + "grad_norm": 15.314627310749234, + "learning_rate": 1.3917902147883126e-05, + "loss": 1.2699, + "step": 64983 + }, + { + "epoch": 0.78, + "grad_norm": 4.70545832099883, + "learning_rate": 1.391736466551952e-05, + "loss": 1.7231, + "step": 64986 + }, + { + "epoch": 0.78, + "grad_norm": 8.072175982533432, + "learning_rate": 1.3916827169787357e-05, + "loss": 1.2327, + "step": 64989 + }, + { + "epoch": 0.78, + "grad_norm": 15.781749992844173, + "learning_rate": 1.3916289660688471e-05, + "loss": 1.4019, + "step": 64992 + }, + { + "epoch": 0.78, + "grad_norm": 3.2239087687719272, + "learning_rate": 1.3915752138224703e-05, + "loss": 1.4015, + "step": 64995 + }, + { + "epoch": 0.78, + "grad_norm": 11.940041495665833, + "learning_rate": 1.3915214602397882e-05, + "loss": 1.5239, + "step": 64998 + }, + { + "epoch": 0.78, + "grad_norm": 26.57302733130567, + "learning_rate": 1.3914677053209844e-05, + "loss": 1.364, + "step": 65001 + }, + { + "epoch": 0.78, + "grad_norm": 9.384425493726592, + "learning_rate": 1.3914139490662421e-05, + "loss": 1.1614, + "step": 65004 + }, + { + "epoch": 0.78, + "grad_norm": 10.909531449931558, + "learning_rate": 1.3913601914757452e-05, + "loss": 1.1418, + "step": 65007 + }, + { + "epoch": 0.78, + "grad_norm": 10.116055318964749, + "learning_rate": 1.3913064325496773e-05, + "loss": 1.9644, + "step": 65010 + }, + { + "epoch": 0.78, + "grad_norm": 11.931315570810083, + "learning_rate": 1.3912526722882209e-05, + "loss": 1.2582, + "step": 65013 + }, + { + "epoch": 0.78, + "grad_norm": 33.868199479440925, + "learning_rate": 1.3911989106915602e-05, + "loss": 1.2548, + "step": 65016 + }, + { + "epoch": 0.78, + "grad_norm": 8.72322352819614, + "learning_rate": 1.3911451477598785e-05, + "loss": 1.1897, + "step": 65019 + }, + { + "epoch": 0.78, + "grad_norm": 3.593560206924166, + "learning_rate": 1.3910913834933596e-05, + "loss": 1.3677, + "step": 65022 + }, + { + "epoch": 0.78, + "grad_norm": 5.736927019275372, + "learning_rate": 1.3910376178921862e-05, + "loss": 1.1149, + "step": 65025 + }, + { + "epoch": 0.78, + "grad_norm": 32.04850020663842, + "learning_rate": 1.3909838509565428e-05, + "loss": 1.5541, + "step": 65028 + }, + { + "epoch": 0.78, + "grad_norm": 68.42188176987945, + "learning_rate": 1.3909300826866122e-05, + "loss": 1.3912, + "step": 65031 + }, + { + "epoch": 0.78, + "grad_norm": 17.794352881045604, + "learning_rate": 1.3908763130825777e-05, + "loss": 1.4153, + "step": 65034 + }, + { + "epoch": 0.78, + "grad_norm": 13.076268049527332, + "learning_rate": 1.3908225421446234e-05, + "loss": 1.466, + "step": 65037 + }, + { + "epoch": 0.78, + "grad_norm": 21.012573267277045, + "learning_rate": 1.3907687698729327e-05, + "loss": 1.2777, + "step": 65040 + }, + { + "epoch": 0.78, + "grad_norm": 18.15812587354238, + "learning_rate": 1.3907149962676885e-05, + "loss": 1.3063, + "step": 65043 + }, + { + "epoch": 0.78, + "grad_norm": 2.4190490869336574, + "learning_rate": 1.390661221329075e-05, + "loss": 1.4598, + "step": 65046 + }, + { + "epoch": 0.78, + "grad_norm": 18.5240749641236, + "learning_rate": 1.3906074450572755e-05, + "loss": 1.5524, + "step": 65049 + }, + { + "epoch": 0.78, + "grad_norm": 9.29006962364843, + "learning_rate": 1.3905536674524736e-05, + "loss": 1.4119, + "step": 65052 + }, + { + "epoch": 0.78, + "grad_norm": 11.384765386335134, + "learning_rate": 1.3904998885148525e-05, + "loss": 1.1934, + "step": 65055 + }, + { + "epoch": 0.78, + "grad_norm": 5.787405061807188, + "learning_rate": 1.3904461082445959e-05, + "loss": 1.3094, + "step": 65058 + }, + { + "epoch": 0.78, + "grad_norm": 9.160596468902494, + "learning_rate": 1.3903923266418878e-05, + "loss": 1.3175, + "step": 65061 + }, + { + "epoch": 0.78, + "grad_norm": 13.939870754281412, + "learning_rate": 1.3903385437069106e-05, + "loss": 1.2319, + "step": 65064 + }, + { + "epoch": 0.78, + "grad_norm": 12.719644079592854, + "learning_rate": 1.3902847594398488e-05, + "loss": 1.4197, + "step": 65067 + }, + { + "epoch": 0.78, + "grad_norm": 25.59670370291791, + "learning_rate": 1.390230973840886e-05, + "loss": 1.3324, + "step": 65070 + }, + { + "epoch": 0.78, + "grad_norm": 9.05421246479713, + "learning_rate": 1.3901771869102052e-05, + "loss": 1.6135, + "step": 65073 + }, + { + "epoch": 0.78, + "grad_norm": 25.0775678386687, + "learning_rate": 1.3901233986479904e-05, + "loss": 1.1088, + "step": 65076 + }, + { + "epoch": 0.78, + "grad_norm": 27.812207368700346, + "learning_rate": 1.3900696090544246e-05, + "loss": 1.3462, + "step": 65079 + }, + { + "epoch": 0.78, + "grad_norm": 36.70420268881782, + "learning_rate": 1.390015818129692e-05, + "loss": 1.6294, + "step": 65082 + }, + { + "epoch": 0.78, + "grad_norm": 13.75207284879714, + "learning_rate": 1.3899620258739759e-05, + "loss": 1.6067, + "step": 65085 + }, + { + "epoch": 0.78, + "grad_norm": 9.576912441802387, + "learning_rate": 1.38990823228746e-05, + "loss": 1.2666, + "step": 65088 + }, + { + "epoch": 0.78, + "grad_norm": 22.486002772423145, + "learning_rate": 1.3898544373703274e-05, + "loss": 1.5868, + "step": 65091 + }, + { + "epoch": 0.78, + "grad_norm": 8.339139368490542, + "learning_rate": 1.3898006411227623e-05, + "loss": 1.1821, + "step": 65094 + }, + { + "epoch": 0.78, + "grad_norm": 11.008728673075618, + "learning_rate": 1.389746843544948e-05, + "loss": 1.3386, + "step": 65097 + }, + { + "epoch": 0.78, + "grad_norm": 8.329491427218175, + "learning_rate": 1.3896930446370677e-05, + "loss": 1.554, + "step": 65100 + }, + { + "epoch": 0.78, + "grad_norm": 16.187273791586165, + "learning_rate": 1.3896392443993061e-05, + "loss": 1.4285, + "step": 65103 + }, + { + "epoch": 0.78, + "grad_norm": 29.026930460059933, + "learning_rate": 1.3895854428318455e-05, + "loss": 1.4407, + "step": 65106 + }, + { + "epoch": 0.78, + "grad_norm": 15.522015153444645, + "learning_rate": 1.3895316399348704e-05, + "loss": 1.0929, + "step": 65109 + }, + { + "epoch": 0.78, + "grad_norm": 169.63780178559824, + "learning_rate": 1.3894778357085639e-05, + "loss": 1.4464, + "step": 65112 + }, + { + "epoch": 0.78, + "grad_norm": 16.9382744835943, + "learning_rate": 1.3894240301531103e-05, + "loss": 1.3499, + "step": 65115 + }, + { + "epoch": 0.78, + "grad_norm": 9.614495257023727, + "learning_rate": 1.3893702232686923e-05, + "loss": 1.2832, + "step": 65118 + }, + { + "epoch": 0.78, + "grad_norm": 42.03671148377515, + "learning_rate": 1.389316415055494e-05, + "loss": 1.0635, + "step": 65121 + }, + { + "epoch": 0.78, + "grad_norm": 36.581672611456256, + "learning_rate": 1.3892626055136992e-05, + "loss": 1.2138, + "step": 65124 + }, + { + "epoch": 0.78, + "grad_norm": 25.32608936609289, + "learning_rate": 1.3892087946434913e-05, + "loss": 1.1929, + "step": 65127 + }, + { + "epoch": 0.78, + "grad_norm": 100.97276452217284, + "learning_rate": 1.389154982445054e-05, + "loss": 1.3711, + "step": 65130 + }, + { + "epoch": 0.78, + "grad_norm": 24.097188247180718, + "learning_rate": 1.389101168918571e-05, + "loss": 1.2346, + "step": 65133 + }, + { + "epoch": 0.78, + "grad_norm": 108.53721372296319, + "learning_rate": 1.3890473540642258e-05, + "loss": 1.4468, + "step": 65136 + }, + { + "epoch": 0.78, + "grad_norm": 18.126204824755025, + "learning_rate": 1.388993537882202e-05, + "loss": 1.4742, + "step": 65139 + }, + { + "epoch": 0.78, + "grad_norm": 44.34505800393275, + "learning_rate": 1.3889397203726833e-05, + "loss": 1.4765, + "step": 65142 + }, + { + "epoch": 0.78, + "grad_norm": 45.85727357236813, + "learning_rate": 1.3888859015358538e-05, + "loss": 1.3844, + "step": 65145 + }, + { + "epoch": 0.78, + "grad_norm": 15.241881434062991, + "learning_rate": 1.3888320813718962e-05, + "loss": 1.3105, + "step": 65148 + }, + { + "epoch": 0.78, + "grad_norm": 13.285064429635353, + "learning_rate": 1.3887782598809954e-05, + "loss": 1.2673, + "step": 65151 + }, + { + "epoch": 0.78, + "grad_norm": 8.783569069733536, + "learning_rate": 1.3887244370633339e-05, + "loss": 1.4177, + "step": 65154 + }, + { + "epoch": 0.78, + "grad_norm": 12.915291202655096, + "learning_rate": 1.3886706129190963e-05, + "loss": 1.3067, + "step": 65157 + }, + { + "epoch": 0.78, + "grad_norm": 9.059206546416107, + "learning_rate": 1.3886167874484657e-05, + "loss": 1.8014, + "step": 65160 + }, + { + "epoch": 0.78, + "grad_norm": 43.788903284593005, + "learning_rate": 1.3885629606516262e-05, + "loss": 1.574, + "step": 65163 + }, + { + "epoch": 0.78, + "grad_norm": 10.294611524011726, + "learning_rate": 1.388509132528761e-05, + "loss": 1.4323, + "step": 65166 + }, + { + "epoch": 0.78, + "grad_norm": 22.444969719131542, + "learning_rate": 1.3884553030800543e-05, + "loss": 1.2862, + "step": 65169 + }, + { + "epoch": 0.78, + "grad_norm": 21.29561465088725, + "learning_rate": 1.3884014723056893e-05, + "loss": 1.4222, + "step": 65172 + }, + { + "epoch": 0.78, + "grad_norm": 46.10718938010901, + "learning_rate": 1.38834764020585e-05, + "loss": 1.3733, + "step": 65175 + }, + { + "epoch": 0.78, + "grad_norm": 12.145640548568887, + "learning_rate": 1.3882938067807206e-05, + "loss": 1.5161, + "step": 65178 + }, + { + "epoch": 0.78, + "grad_norm": 5.418982508721794, + "learning_rate": 1.3882399720304838e-05, + "loss": 1.0873, + "step": 65181 + }, + { + "epoch": 0.78, + "grad_norm": 13.0166502004026, + "learning_rate": 1.388186135955324e-05, + "loss": 1.5171, + "step": 65184 + }, + { + "epoch": 0.78, + "grad_norm": 10.049880446316056, + "learning_rate": 1.3881322985554244e-05, + "loss": 1.1481, + "step": 65187 + }, + { + "epoch": 0.78, + "grad_norm": 10.048218981592582, + "learning_rate": 1.3880784598309695e-05, + "loss": 1.3034, + "step": 65190 + }, + { + "epoch": 0.78, + "grad_norm": 29.33372551593937, + "learning_rate": 1.3880246197821425e-05, + "loss": 1.5473, + "step": 65193 + }, + { + "epoch": 0.78, + "grad_norm": 72.75038388149166, + "learning_rate": 1.3879707784091272e-05, + "loss": 1.1883, + "step": 65196 + }, + { + "epoch": 0.78, + "grad_norm": 9.399502783020328, + "learning_rate": 1.3879169357121074e-05, + "loss": 1.3397, + "step": 65199 + }, + { + "epoch": 0.78, + "grad_norm": 17.513788748937998, + "learning_rate": 1.3878630916912667e-05, + "loss": 1.2076, + "step": 65202 + }, + { + "epoch": 0.78, + "grad_norm": 24.94926368332752, + "learning_rate": 1.3878092463467892e-05, + "loss": 1.4492, + "step": 65205 + }, + { + "epoch": 0.78, + "grad_norm": 17.990021105238704, + "learning_rate": 1.3877553996788581e-05, + "loss": 1.4934, + "step": 65208 + }, + { + "epoch": 0.78, + "grad_norm": 17.356916404580463, + "learning_rate": 1.387701551687658e-05, + "loss": 1.1246, + "step": 65211 + }, + { + "epoch": 0.78, + "grad_norm": 35.37255208015949, + "learning_rate": 1.3876477023733716e-05, + "loss": 1.2941, + "step": 65214 + }, + { + "epoch": 0.78, + "grad_norm": 38.027890043359406, + "learning_rate": 1.3875938517361835e-05, + "loss": 1.1194, + "step": 65217 + }, + { + "epoch": 0.78, + "grad_norm": 4.977219732750929, + "learning_rate": 1.3875399997762774e-05, + "loss": 1.47, + "step": 65220 + }, + { + "epoch": 0.78, + "grad_norm": 10.651263382562297, + "learning_rate": 1.3874861464938367e-05, + "loss": 1.4425, + "step": 65223 + }, + { + "epoch": 0.78, + "grad_norm": 8.625993965921685, + "learning_rate": 1.387432291889045e-05, + "loss": 1.3111, + "step": 65226 + }, + { + "epoch": 0.78, + "grad_norm": 8.466670401088836, + "learning_rate": 1.3873784359620866e-05, + "loss": 1.253, + "step": 65229 + }, + { + "epoch": 0.78, + "grad_norm": 12.283333373688047, + "learning_rate": 1.3873245787131456e-05, + "loss": 1.1234, + "step": 65232 + }, + { + "epoch": 0.78, + "grad_norm": 13.772954259618094, + "learning_rate": 1.387270720142405e-05, + "loss": 0.9605, + "step": 65235 + }, + { + "epoch": 0.78, + "grad_norm": 21.643294502080963, + "learning_rate": 1.3872168602500488e-05, + "loss": 1.3725, + "step": 65238 + }, + { + "epoch": 0.78, + "grad_norm": 24.82628344468288, + "learning_rate": 1.3871629990362611e-05, + "loss": 1.1964, + "step": 65241 + }, + { + "epoch": 0.78, + "grad_norm": 23.058353450595696, + "learning_rate": 1.3871091365012254e-05, + "loss": 1.5855, + "step": 65244 + }, + { + "epoch": 0.78, + "grad_norm": 56.22360089389105, + "learning_rate": 1.3870552726451259e-05, + "loss": 1.2532, + "step": 65247 + }, + { + "epoch": 0.78, + "grad_norm": 37.51446744593753, + "learning_rate": 1.387001407468146e-05, + "loss": 1.2936, + "step": 65250 + }, + { + "epoch": 0.78, + "grad_norm": 17.163566267530694, + "learning_rate": 1.3869475409704695e-05, + "loss": 1.4025, + "step": 65253 + }, + { + "epoch": 0.78, + "grad_norm": 8.097181605079063, + "learning_rate": 1.3868936731522806e-05, + "loss": 1.2056, + "step": 65256 + }, + { + "epoch": 0.78, + "grad_norm": 44.63538627496924, + "learning_rate": 1.3868398040137632e-05, + "loss": 1.5225, + "step": 65259 + }, + { + "epoch": 0.78, + "grad_norm": 9.715525925729711, + "learning_rate": 1.3867859335551005e-05, + "loss": 1.3781, + "step": 65262 + }, + { + "epoch": 0.78, + "grad_norm": 4.003569588631138, + "learning_rate": 1.386732061776477e-05, + "loss": 1.6268, + "step": 65265 + }, + { + "epoch": 0.78, + "grad_norm": 13.731980299867713, + "learning_rate": 1.386678188678076e-05, + "loss": 1.4354, + "step": 65268 + }, + { + "epoch": 0.78, + "grad_norm": 5.95749226366869, + "learning_rate": 1.386624314260082e-05, + "loss": 1.4703, + "step": 65271 + }, + { + "epoch": 0.78, + "grad_norm": 12.31601990974055, + "learning_rate": 1.386570438522678e-05, + "loss": 0.9392, + "step": 65274 + }, + { + "epoch": 0.78, + "grad_norm": 25.455578508020057, + "learning_rate": 1.386516561466049e-05, + "loss": 1.2614, + "step": 65277 + }, + { + "epoch": 0.78, + "grad_norm": 2.175051228420992, + "learning_rate": 1.3864626830903775e-05, + "loss": 1.0201, + "step": 65280 + }, + { + "epoch": 0.79, + "grad_norm": 10.256060390356213, + "learning_rate": 1.3864088033958485e-05, + "loss": 1.2106, + "step": 65283 + }, + { + "epoch": 0.79, + "grad_norm": 16.765870095962914, + "learning_rate": 1.3863549223826453e-05, + "loss": 1.5777, + "step": 65286 + }, + { + "epoch": 0.79, + "grad_norm": 71.88547395569684, + "learning_rate": 1.3863010400509517e-05, + "loss": 1.2374, + "step": 65289 + }, + { + "epoch": 0.79, + "grad_norm": 36.048956244860975, + "learning_rate": 1.3862471564009523e-05, + "loss": 1.4408, + "step": 65292 + }, + { + "epoch": 0.79, + "grad_norm": 6.515942624425835, + "learning_rate": 1.3861932714328302e-05, + "loss": 1.3083, + "step": 65295 + }, + { + "epoch": 0.79, + "grad_norm": 8.345911272588378, + "learning_rate": 1.3861393851467697e-05, + "loss": 1.1931, + "step": 65298 + }, + { + "epoch": 0.79, + "grad_norm": 17.22595288934826, + "learning_rate": 1.3860854975429541e-05, + "loss": 1.2018, + "step": 65301 + }, + { + "epoch": 0.79, + "grad_norm": 20.06833356747115, + "learning_rate": 1.3860316086215681e-05, + "loss": 1.312, + "step": 65304 + }, + { + "epoch": 0.79, + "grad_norm": 5.3892371377807775, + "learning_rate": 1.3859777183827953e-05, + "loss": 1.3592, + "step": 65307 + }, + { + "epoch": 0.79, + "grad_norm": 5.945835716438975, + "learning_rate": 1.3859238268268196e-05, + "loss": 1.3427, + "step": 65310 + }, + { + "epoch": 0.79, + "grad_norm": 19.328902357917027, + "learning_rate": 1.3858699339538248e-05, + "loss": 1.3858, + "step": 65313 + }, + { + "epoch": 0.79, + "grad_norm": 23.227022640337996, + "learning_rate": 1.385816039763995e-05, + "loss": 1.2673, + "step": 65316 + }, + { + "epoch": 0.79, + "grad_norm": 24.36191242939711, + "learning_rate": 1.385762144257514e-05, + "loss": 1.4453, + "step": 65319 + }, + { + "epoch": 0.79, + "grad_norm": 7.615793382214309, + "learning_rate": 1.3857082474345656e-05, + "loss": 1.2343, + "step": 65322 + }, + { + "epoch": 0.79, + "grad_norm": 20.736932351304922, + "learning_rate": 1.385654349295334e-05, + "loss": 1.2821, + "step": 65325 + }, + { + "epoch": 0.79, + "grad_norm": 5.778996514045352, + "learning_rate": 1.385600449840003e-05, + "loss": 1.2426, + "step": 65328 + }, + { + "epoch": 0.79, + "grad_norm": 7.848479833752328, + "learning_rate": 1.3855465490687568e-05, + "loss": 1.2461, + "step": 65331 + }, + { + "epoch": 0.79, + "grad_norm": 12.36997023892076, + "learning_rate": 1.3854926469817786e-05, + "loss": 1.1449, + "step": 65334 + }, + { + "epoch": 0.79, + "grad_norm": 22.412408895303987, + "learning_rate": 1.385438743579253e-05, + "loss": 1.0066, + "step": 65337 + }, + { + "epoch": 0.79, + "grad_norm": 6.556564801794917, + "learning_rate": 1.3853848388613641e-05, + "loss": 1.2145, + "step": 65340 + }, + { + "epoch": 0.79, + "grad_norm": 15.04216956349966, + "learning_rate": 1.3853309328282953e-05, + "loss": 1.3384, + "step": 65343 + }, + { + "epoch": 0.79, + "grad_norm": 6.533834019564422, + "learning_rate": 1.385277025480231e-05, + "loss": 1.3281, + "step": 65346 + }, + { + "epoch": 0.79, + "grad_norm": 13.123242208368426, + "learning_rate": 1.3852231168173549e-05, + "loss": 1.144, + "step": 65349 + }, + { + "epoch": 0.79, + "grad_norm": 14.158451782511053, + "learning_rate": 1.3851692068398512e-05, + "loss": 1.4932, + "step": 65352 + }, + { + "epoch": 0.79, + "grad_norm": 10.413608228559756, + "learning_rate": 1.3851152955479035e-05, + "loss": 1.4025, + "step": 65355 + }, + { + "epoch": 0.79, + "grad_norm": 8.348906551317762, + "learning_rate": 1.385061382941696e-05, + "loss": 1.292, + "step": 65358 + }, + { + "epoch": 0.79, + "grad_norm": 22.89947406185126, + "learning_rate": 1.3850074690214128e-05, + "loss": 1.5088, + "step": 65361 + }, + { + "epoch": 0.79, + "grad_norm": 17.675348730429377, + "learning_rate": 1.3849535537872378e-05, + "loss": 1.0827, + "step": 65364 + }, + { + "epoch": 0.79, + "grad_norm": 16.5514823812967, + "learning_rate": 1.384899637239355e-05, + "loss": 1.1361, + "step": 65367 + }, + { + "epoch": 0.79, + "grad_norm": 28.23256987490786, + "learning_rate": 1.3848457193779483e-05, + "loss": 1.7439, + "step": 65370 + }, + { + "epoch": 0.79, + "grad_norm": 15.188305656127824, + "learning_rate": 1.384791800203202e-05, + "loss": 1.4293, + "step": 65373 + }, + { + "epoch": 0.79, + "grad_norm": 11.464744766667849, + "learning_rate": 1.3847378797152995e-05, + "loss": 1.3378, + "step": 65376 + }, + { + "epoch": 0.79, + "grad_norm": 19.8985673647708, + "learning_rate": 1.3846839579144255e-05, + "loss": 1.2909, + "step": 65379 + }, + { + "epoch": 0.79, + "grad_norm": 23.006024267421793, + "learning_rate": 1.3846300348007637e-05, + "loss": 1.5305, + "step": 65382 + }, + { + "epoch": 0.79, + "grad_norm": 9.190078033770984, + "learning_rate": 1.3845761103744982e-05, + "loss": 1.3878, + "step": 65385 + }, + { + "epoch": 0.79, + "grad_norm": 18.14975524183371, + "learning_rate": 1.3845221846358126e-05, + "loss": 1.294, + "step": 65388 + }, + { + "epoch": 0.79, + "grad_norm": 10.324833833119055, + "learning_rate": 1.3844682575848916e-05, + "loss": 1.1093, + "step": 65391 + }, + { + "epoch": 0.79, + "grad_norm": 76.21477863426469, + "learning_rate": 1.3844143292219187e-05, + "loss": 1.2909, + "step": 65394 + }, + { + "epoch": 0.79, + "grad_norm": 14.291800183616735, + "learning_rate": 1.3843603995470783e-05, + "loss": 1.5977, + "step": 65397 + }, + { + "epoch": 0.79, + "grad_norm": 16.83540875874873, + "learning_rate": 1.3843064685605543e-05, + "loss": 1.3507, + "step": 65400 + }, + { + "epoch": 0.79, + "grad_norm": 5.59376799610713, + "learning_rate": 1.3842525362625308e-05, + "loss": 1.4322, + "step": 65403 + }, + { + "epoch": 0.79, + "grad_norm": 17.44000262639212, + "learning_rate": 1.3841986026531917e-05, + "loss": 1.1882, + "step": 65406 + }, + { + "epoch": 0.79, + "grad_norm": 16.8864990845879, + "learning_rate": 1.3841446677327213e-05, + "loss": 1.5661, + "step": 65409 + }, + { + "epoch": 0.79, + "grad_norm": 8.758997733538441, + "learning_rate": 1.3840907315013033e-05, + "loss": 1.2585, + "step": 65412 + }, + { + "epoch": 0.79, + "grad_norm": 4.104128133584508, + "learning_rate": 1.3840367939591221e-05, + "loss": 1.1281, + "step": 65415 + }, + { + "epoch": 0.79, + "grad_norm": 6.297448478773885, + "learning_rate": 1.3839828551063617e-05, + "loss": 1.2736, + "step": 65418 + }, + { + "epoch": 0.79, + "grad_norm": 19.06200396316679, + "learning_rate": 1.383928914943206e-05, + "loss": 1.5717, + "step": 65421 + }, + { + "epoch": 0.79, + "grad_norm": 14.91122540147664, + "learning_rate": 1.3838749734698393e-05, + "loss": 1.1903, + "step": 65424 + }, + { + "epoch": 0.79, + "grad_norm": 47.31963553949442, + "learning_rate": 1.3838210306864456e-05, + "loss": 1.5045, + "step": 65427 + }, + { + "epoch": 0.79, + "grad_norm": 10.326030816733672, + "learning_rate": 1.383767086593209e-05, + "loss": 1.1641, + "step": 65430 + }, + { + "epoch": 0.79, + "grad_norm": 62.59277822009485, + "learning_rate": 1.3837131411903135e-05, + "loss": 1.8477, + "step": 65433 + }, + { + "epoch": 0.79, + "grad_norm": 11.618770390944828, + "learning_rate": 1.3836591944779432e-05, + "loss": 1.4709, + "step": 65436 + }, + { + "epoch": 0.79, + "grad_norm": 24.017976660463766, + "learning_rate": 1.3836052464562823e-05, + "loss": 1.1806, + "step": 65439 + }, + { + "epoch": 0.79, + "grad_norm": 38.97150944088883, + "learning_rate": 1.383551297125515e-05, + "loss": 1.3383, + "step": 65442 + }, + { + "epoch": 0.79, + "grad_norm": 8.490562658253676, + "learning_rate": 1.3834973464858253e-05, + "loss": 1.6859, + "step": 65445 + }, + { + "epoch": 0.79, + "grad_norm": 8.644460526863558, + "learning_rate": 1.383443394537397e-05, + "loss": 1.172, + "step": 65448 + }, + { + "epoch": 0.79, + "grad_norm": 13.754026783507657, + "learning_rate": 1.3833894412804145e-05, + "loss": 1.0503, + "step": 65451 + }, + { + "epoch": 0.79, + "grad_norm": 8.056699307781846, + "learning_rate": 1.3833354867150623e-05, + "loss": 1.3395, + "step": 65454 + }, + { + "epoch": 0.79, + "grad_norm": 20.156584815041388, + "learning_rate": 1.383281530841524e-05, + "loss": 1.3394, + "step": 65457 + }, + { + "epoch": 0.79, + "grad_norm": 7.370003983846255, + "learning_rate": 1.3832275736599839e-05, + "loss": 1.4619, + "step": 65460 + }, + { + "epoch": 0.79, + "grad_norm": 24.91789775351792, + "learning_rate": 1.3831736151706259e-05, + "loss": 1.5534, + "step": 65463 + }, + { + "epoch": 0.79, + "grad_norm": 8.062356412767643, + "learning_rate": 1.3831196553736346e-05, + "loss": 1.1741, + "step": 65466 + }, + { + "epoch": 0.79, + "grad_norm": 3.8247168497042145, + "learning_rate": 1.383065694269194e-05, + "loss": 1.3657, + "step": 65469 + }, + { + "epoch": 0.79, + "grad_norm": 27.319877665892783, + "learning_rate": 1.3830117318574878e-05, + "loss": 1.6794, + "step": 65472 + }, + { + "epoch": 0.79, + "grad_norm": 13.252977336149925, + "learning_rate": 1.382957768138701e-05, + "loss": 1.6716, + "step": 65475 + }, + { + "epoch": 0.79, + "grad_norm": 14.36201989478699, + "learning_rate": 1.3829038031130168e-05, + "loss": 1.2583, + "step": 65478 + }, + { + "epoch": 0.79, + "grad_norm": 32.63266417856611, + "learning_rate": 1.3828498367806203e-05, + "loss": 1.3545, + "step": 65481 + }, + { + "epoch": 0.79, + "grad_norm": 50.02826490635576, + "learning_rate": 1.382795869141695e-05, + "loss": 1.3889, + "step": 65484 + }, + { + "epoch": 0.79, + "grad_norm": 12.60487932050569, + "learning_rate": 1.3827419001964254e-05, + "loss": 1.5056, + "step": 65487 + }, + { + "epoch": 0.79, + "grad_norm": 8.828029443483405, + "learning_rate": 1.3826879299449954e-05, + "loss": 1.2724, + "step": 65490 + }, + { + "epoch": 0.79, + "grad_norm": 7.411818518323269, + "learning_rate": 1.3826339583875892e-05, + "loss": 1.5109, + "step": 65493 + }, + { + "epoch": 0.79, + "grad_norm": 6.619937213115715, + "learning_rate": 1.3825799855243913e-05, + "loss": 1.6448, + "step": 65496 + }, + { + "epoch": 0.79, + "grad_norm": 21.019339348304996, + "learning_rate": 1.3825260113555858e-05, + "loss": 1.3872, + "step": 65499 + }, + { + "epoch": 0.79, + "grad_norm": 11.58945735179587, + "learning_rate": 1.3824720358813566e-05, + "loss": 1.5342, + "step": 65502 + }, + { + "epoch": 0.79, + "grad_norm": 18.929910686475907, + "learning_rate": 1.3824180591018881e-05, + "loss": 1.3215, + "step": 65505 + }, + { + "epoch": 0.79, + "grad_norm": 12.555123626920054, + "learning_rate": 1.3823640810173648e-05, + "loss": 1.2115, + "step": 65508 + }, + { + "epoch": 0.79, + "grad_norm": 15.355931031848446, + "learning_rate": 1.3823101016279705e-05, + "loss": 1.4193, + "step": 65511 + }, + { + "epoch": 0.79, + "grad_norm": 9.86743867231447, + "learning_rate": 1.3822561209338896e-05, + "loss": 1.5356, + "step": 65514 + }, + { + "epoch": 0.79, + "grad_norm": 6.493639471590526, + "learning_rate": 1.382202138935306e-05, + "loss": 1.2123, + "step": 65517 + }, + { + "epoch": 0.79, + "grad_norm": 18.238071396835785, + "learning_rate": 1.3821481556324043e-05, + "loss": 1.1386, + "step": 65520 + }, + { + "epoch": 0.79, + "grad_norm": 6.8851455502409715, + "learning_rate": 1.3820941710253686e-05, + "loss": 1.2988, + "step": 65523 + }, + { + "epoch": 0.79, + "grad_norm": 9.94370279011785, + "learning_rate": 1.382040185114383e-05, + "loss": 1.5002, + "step": 65526 + }, + { + "epoch": 0.79, + "grad_norm": 11.849830287575397, + "learning_rate": 1.3819861978996318e-05, + "loss": 1.1672, + "step": 65529 + }, + { + "epoch": 0.79, + "grad_norm": 9.378201982790083, + "learning_rate": 1.3819322093812997e-05, + "loss": 1.2879, + "step": 65532 + }, + { + "epoch": 0.79, + "grad_norm": 9.573271046598245, + "learning_rate": 1.3818782195595704e-05, + "loss": 1.3851, + "step": 65535 + }, + { + "epoch": 0.79, + "grad_norm": 5.716397576478543, + "learning_rate": 1.3818242284346281e-05, + "loss": 1.3877, + "step": 65538 + }, + { + "epoch": 0.79, + "grad_norm": 17.66019210163458, + "learning_rate": 1.3817702360066576e-05, + "loss": 1.5144, + "step": 65541 + }, + { + "epoch": 0.79, + "grad_norm": 15.529440771285492, + "learning_rate": 1.3817162422758423e-05, + "loss": 1.2622, + "step": 65544 + }, + { + "epoch": 0.79, + "grad_norm": 10.507829135660963, + "learning_rate": 1.3816622472423673e-05, + "loss": 1.1958, + "step": 65547 + }, + { + "epoch": 0.79, + "grad_norm": 17.620745192476214, + "learning_rate": 1.381608250906416e-05, + "loss": 1.371, + "step": 65550 + }, + { + "epoch": 0.79, + "grad_norm": 16.47441188010679, + "learning_rate": 1.3815542532681741e-05, + "loss": 1.1629, + "step": 65553 + }, + { + "epoch": 0.79, + "grad_norm": 7.4750158583592725, + "learning_rate": 1.3815002543278245e-05, + "loss": 1.2658, + "step": 65556 + }, + { + "epoch": 0.79, + "grad_norm": 5.237279329521076, + "learning_rate": 1.3814462540855518e-05, + "loss": 1.1092, + "step": 65559 + }, + { + "epoch": 0.79, + "grad_norm": 12.564424463575365, + "learning_rate": 1.3813922525415406e-05, + "loss": 1.8025, + "step": 65562 + }, + { + "epoch": 0.79, + "grad_norm": 20.8451322730363, + "learning_rate": 1.3813382496959749e-05, + "loss": 1.307, + "step": 65565 + }, + { + "epoch": 0.79, + "grad_norm": 19.999300447476323, + "learning_rate": 1.3812842455490391e-05, + "loss": 1.2812, + "step": 65568 + }, + { + "epoch": 0.79, + "grad_norm": 5.938038429270073, + "learning_rate": 1.3812302401009177e-05, + "loss": 1.0908, + "step": 65571 + }, + { + "epoch": 0.79, + "grad_norm": 34.30680439051136, + "learning_rate": 1.3811762333517949e-05, + "loss": 1.5538, + "step": 65574 + }, + { + "epoch": 0.79, + "grad_norm": 13.294789677383886, + "learning_rate": 1.3811222253018546e-05, + "loss": 1.1012, + "step": 65577 + }, + { + "epoch": 0.79, + "grad_norm": 17.257793436178723, + "learning_rate": 1.3810682159512813e-05, + "loss": 1.5081, + "step": 65580 + }, + { + "epoch": 0.79, + "grad_norm": 8.671310423090056, + "learning_rate": 1.3810142053002597e-05, + "loss": 1.0777, + "step": 65583 + }, + { + "epoch": 0.79, + "grad_norm": 40.5962220088557, + "learning_rate": 1.3809601933489741e-05, + "loss": 1.7434, + "step": 65586 + }, + { + "epoch": 0.79, + "grad_norm": 7.493537746301122, + "learning_rate": 1.3809061800976083e-05, + "loss": 1.1808, + "step": 65589 + }, + { + "epoch": 0.79, + "grad_norm": 16.568059441622623, + "learning_rate": 1.3808521655463467e-05, + "loss": 1.4204, + "step": 65592 + }, + { + "epoch": 0.79, + "grad_norm": 9.55897870248182, + "learning_rate": 1.3807981496953744e-05, + "loss": 1.188, + "step": 65595 + }, + { + "epoch": 0.79, + "grad_norm": 12.054921303601683, + "learning_rate": 1.3807441325448747e-05, + "loss": 1.543, + "step": 65598 + }, + { + "epoch": 0.79, + "grad_norm": 11.193760088163032, + "learning_rate": 1.3806901140950325e-05, + "loss": 1.3253, + "step": 65601 + }, + { + "epoch": 0.79, + "grad_norm": 18.69907417071391, + "learning_rate": 1.3806360943460323e-05, + "loss": 1.1208, + "step": 65604 + }, + { + "epoch": 0.79, + "grad_norm": 13.203426435316652, + "learning_rate": 1.380582073298058e-05, + "loss": 1.2118, + "step": 65607 + }, + { + "epoch": 0.79, + "grad_norm": 247.56636201262316, + "learning_rate": 1.3805280509512942e-05, + "loss": 1.7436, + "step": 65610 + }, + { + "epoch": 0.79, + "grad_norm": 37.823604170040156, + "learning_rate": 1.3804740273059253e-05, + "loss": 1.5312, + "step": 65613 + }, + { + "epoch": 0.79, + "grad_norm": 20.620157197575903, + "learning_rate": 1.3804200023621358e-05, + "loss": 1.4436, + "step": 65616 + }, + { + "epoch": 0.79, + "grad_norm": 5.522452209627084, + "learning_rate": 1.3803659761201094e-05, + "loss": 1.5595, + "step": 65619 + }, + { + "epoch": 0.79, + "grad_norm": 21.153701092170596, + "learning_rate": 1.3803119485800311e-05, + "loss": 1.5716, + "step": 65622 + }, + { + "epoch": 0.79, + "grad_norm": 5.175744015203894, + "learning_rate": 1.3802579197420849e-05, + "loss": 1.4339, + "step": 65625 + }, + { + "epoch": 0.79, + "grad_norm": 5.740695730379999, + "learning_rate": 1.380203889606456e-05, + "loss": 1.5973, + "step": 65628 + }, + { + "epoch": 0.79, + "grad_norm": 5.123597463356695, + "learning_rate": 1.3801498581733276e-05, + "loss": 1.3848, + "step": 65631 + }, + { + "epoch": 0.79, + "grad_norm": 14.146079329320274, + "learning_rate": 1.3800958254428847e-05, + "loss": 1.5094, + "step": 65634 + }, + { + "epoch": 0.79, + "grad_norm": 17.498417601086434, + "learning_rate": 1.3800417914153121e-05, + "loss": 1.4885, + "step": 65637 + }, + { + "epoch": 0.79, + "grad_norm": 3.071886475788063, + "learning_rate": 1.3799877560907933e-05, + "loss": 1.3404, + "step": 65640 + }, + { + "epoch": 0.79, + "grad_norm": 98.47836791996986, + "learning_rate": 1.3799337194695136e-05, + "loss": 1.4779, + "step": 65643 + }, + { + "epoch": 0.79, + "grad_norm": 5.574340248055012, + "learning_rate": 1.3798796815516564e-05, + "loss": 1.6551, + "step": 65646 + }, + { + "epoch": 0.79, + "grad_norm": 44.48446014193009, + "learning_rate": 1.3798256423374073e-05, + "loss": 1.6052, + "step": 65649 + }, + { + "epoch": 0.79, + "grad_norm": 10.79260470210595, + "learning_rate": 1.3797716018269497e-05, + "loss": 1.5898, + "step": 65652 + }, + { + "epoch": 0.79, + "grad_norm": 9.801249912993521, + "learning_rate": 1.3797175600204684e-05, + "loss": 1.1032, + "step": 65655 + }, + { + "epoch": 0.79, + "grad_norm": 19.5972390364721, + "learning_rate": 1.379663516918148e-05, + "loss": 1.3896, + "step": 65658 + }, + { + "epoch": 0.79, + "grad_norm": 22.01116121804883, + "learning_rate": 1.3796094725201725e-05, + "loss": 1.4078, + "step": 65661 + }, + { + "epoch": 0.79, + "grad_norm": 13.642572114509651, + "learning_rate": 1.3795554268267269e-05, + "loss": 1.5644, + "step": 65664 + }, + { + "epoch": 0.79, + "grad_norm": 10.222447601423102, + "learning_rate": 1.3795013798379952e-05, + "loss": 1.3585, + "step": 65667 + }, + { + "epoch": 0.79, + "grad_norm": 20.174987854060916, + "learning_rate": 1.3794473315541624e-05, + "loss": 1.1069, + "step": 65670 + }, + { + "epoch": 0.79, + "grad_norm": 18.421097490280996, + "learning_rate": 1.3793932819754118e-05, + "loss": 1.258, + "step": 65673 + }, + { + "epoch": 0.79, + "grad_norm": 8.201659653404962, + "learning_rate": 1.3793392311019291e-05, + "loss": 1.4715, + "step": 65676 + }, + { + "epoch": 0.79, + "grad_norm": 27.751481812186892, + "learning_rate": 1.3792851789338981e-05, + "loss": 1.407, + "step": 65679 + }, + { + "epoch": 0.79, + "grad_norm": 9.247049835261166, + "learning_rate": 1.3792311254715033e-05, + "loss": 1.4522, + "step": 65682 + }, + { + "epoch": 0.79, + "grad_norm": 4.646631907281371, + "learning_rate": 1.3791770707149295e-05, + "loss": 1.2491, + "step": 65685 + }, + { + "epoch": 0.79, + "grad_norm": 7.073475411064195, + "learning_rate": 1.3791230146643605e-05, + "loss": 1.3942, + "step": 65688 + }, + { + "epoch": 0.79, + "grad_norm": 4.606862911291284, + "learning_rate": 1.3790689573199818e-05, + "loss": 1.4031, + "step": 65691 + }, + { + "epoch": 0.79, + "grad_norm": 7.16419667849127, + "learning_rate": 1.3790148986819769e-05, + "loss": 1.6908, + "step": 65694 + }, + { + "epoch": 0.79, + "grad_norm": 15.816901693826916, + "learning_rate": 1.378960838750531e-05, + "loss": 1.2781, + "step": 65697 + }, + { + "epoch": 0.79, + "grad_norm": 26.90535021588842, + "learning_rate": 1.378906777525828e-05, + "loss": 1.187, + "step": 65700 + }, + { + "epoch": 0.79, + "grad_norm": 11.373061777958485, + "learning_rate": 1.3788527150080526e-05, + "loss": 1.3986, + "step": 65703 + }, + { + "epoch": 0.79, + "grad_norm": 18.35561354870083, + "learning_rate": 1.3787986511973894e-05, + "loss": 1.1491, + "step": 65706 + }, + { + "epoch": 0.79, + "grad_norm": 19.83732010717161, + "learning_rate": 1.378744586094023e-05, + "loss": 1.3416, + "step": 65709 + }, + { + "epoch": 0.79, + "grad_norm": 6.184559472114761, + "learning_rate": 1.3786905196981376e-05, + "loss": 1.5415, + "step": 65712 + }, + { + "epoch": 0.79, + "grad_norm": 7.545127535135617, + "learning_rate": 1.3786364520099177e-05, + "loss": 1.5363, + "step": 65715 + }, + { + "epoch": 0.79, + "grad_norm": 13.29469875640932, + "learning_rate": 1.3785823830295484e-05, + "loss": 1.2666, + "step": 65718 + }, + { + "epoch": 0.79, + "grad_norm": 9.49985917588164, + "learning_rate": 1.3785283127572135e-05, + "loss": 1.1806, + "step": 65721 + }, + { + "epoch": 0.79, + "grad_norm": 8.20207562460659, + "learning_rate": 1.3784742411930977e-05, + "loss": 1.2214, + "step": 65724 + }, + { + "epoch": 0.79, + "grad_norm": 8.025936964374692, + "learning_rate": 1.378420168337386e-05, + "loss": 1.5688, + "step": 65727 + }, + { + "epoch": 0.79, + "grad_norm": 14.5929012451301, + "learning_rate": 1.3783660941902623e-05, + "loss": 1.5837, + "step": 65730 + }, + { + "epoch": 0.79, + "grad_norm": 50.180858875902985, + "learning_rate": 1.3783120187519115e-05, + "loss": 1.3673, + "step": 65733 + }, + { + "epoch": 0.79, + "grad_norm": 7.124757257914744, + "learning_rate": 1.3782579420225181e-05, + "loss": 1.5058, + "step": 65736 + }, + { + "epoch": 0.79, + "grad_norm": 5.171573854759566, + "learning_rate": 1.3782038640022664e-05, + "loss": 1.0965, + "step": 65739 + }, + { + "epoch": 0.79, + "grad_norm": 11.569745581473237, + "learning_rate": 1.3781497846913414e-05, + "loss": 1.4385, + "step": 65742 + }, + { + "epoch": 0.79, + "grad_norm": 20.452957926537593, + "learning_rate": 1.3780957040899272e-05, + "loss": 1.2924, + "step": 65745 + }, + { + "epoch": 0.79, + "grad_norm": 18.226163315412258, + "learning_rate": 1.3780416221982085e-05, + "loss": 1.3086, + "step": 65748 + }, + { + "epoch": 0.79, + "grad_norm": 38.968675523326375, + "learning_rate": 1.37798753901637e-05, + "loss": 1.4459, + "step": 65751 + }, + { + "epoch": 0.79, + "grad_norm": 3.339572725371317, + "learning_rate": 1.3779334545445963e-05, + "loss": 1.4133, + "step": 65754 + }, + { + "epoch": 0.79, + "grad_norm": 6.419379646809961, + "learning_rate": 1.3778793687830718e-05, + "loss": 1.2103, + "step": 65757 + }, + { + "epoch": 0.79, + "grad_norm": 23.648707034717933, + "learning_rate": 1.3778252817319811e-05, + "loss": 1.2959, + "step": 65760 + }, + { + "epoch": 0.79, + "grad_norm": 14.875945400905499, + "learning_rate": 1.3777711933915087e-05, + "loss": 0.9742, + "step": 65763 + }, + { + "epoch": 0.79, + "grad_norm": 35.39773909362307, + "learning_rate": 1.3777171037618395e-05, + "loss": 1.4115, + "step": 65766 + }, + { + "epoch": 0.79, + "grad_norm": 7.289392245563977, + "learning_rate": 1.3776630128431576e-05, + "loss": 1.1218, + "step": 65769 + }, + { + "epoch": 0.79, + "grad_norm": 8.808609041920826, + "learning_rate": 1.3776089206356482e-05, + "loss": 1.3712, + "step": 65772 + }, + { + "epoch": 0.79, + "grad_norm": 17.186350193478635, + "learning_rate": 1.3775548271394954e-05, + "loss": 1.3392, + "step": 65775 + }, + { + "epoch": 0.79, + "grad_norm": 7.106207290549118, + "learning_rate": 1.3775007323548839e-05, + "loss": 1.4767, + "step": 65778 + }, + { + "epoch": 0.79, + "grad_norm": 6.0881023429876135, + "learning_rate": 1.3774466362819982e-05, + "loss": 1.3526, + "step": 65781 + }, + { + "epoch": 0.79, + "grad_norm": 5.428351760900333, + "learning_rate": 1.3773925389210234e-05, + "loss": 1.1387, + "step": 65784 + }, + { + "epoch": 0.79, + "grad_norm": 9.040880905995637, + "learning_rate": 1.377338440272144e-05, + "loss": 1.6607, + "step": 65787 + }, + { + "epoch": 0.79, + "grad_norm": 50.569705699419195, + "learning_rate": 1.3772843403355442e-05, + "loss": 1.377, + "step": 65790 + }, + { + "epoch": 0.79, + "grad_norm": 12.957383669708783, + "learning_rate": 1.3772302391114084e-05, + "loss": 1.5318, + "step": 65793 + }, + { + "epoch": 0.79, + "grad_norm": 15.210670333348144, + "learning_rate": 1.3771761365999224e-05, + "loss": 1.6193, + "step": 65796 + }, + { + "epoch": 0.79, + "grad_norm": 11.65032709720077, + "learning_rate": 1.3771220328012698e-05, + "loss": 1.4602, + "step": 65799 + }, + { + "epoch": 0.79, + "grad_norm": 43.90124320576912, + "learning_rate": 1.3770679277156354e-05, + "loss": 1.2929, + "step": 65802 + }, + { + "epoch": 0.79, + "grad_norm": 10.335546367398933, + "learning_rate": 1.3770138213432041e-05, + "loss": 1.5467, + "step": 65805 + }, + { + "epoch": 0.79, + "grad_norm": 14.01804929964006, + "learning_rate": 1.3769597136841604e-05, + "loss": 1.6062, + "step": 65808 + }, + { + "epoch": 0.79, + "grad_norm": 11.59115857716804, + "learning_rate": 1.376905604738689e-05, + "loss": 1.2445, + "step": 65811 + }, + { + "epoch": 0.79, + "grad_norm": 8.734414328772564, + "learning_rate": 1.3768514945069744e-05, + "loss": 1.5856, + "step": 65814 + }, + { + "epoch": 0.79, + "grad_norm": 17.294190436608403, + "learning_rate": 1.3767973829892016e-05, + "loss": 1.6184, + "step": 65817 + }, + { + "epoch": 0.79, + "grad_norm": 25.24734393990915, + "learning_rate": 1.376743270185555e-05, + "loss": 1.3548, + "step": 65820 + }, + { + "epoch": 0.79, + "grad_norm": 30.13897972052573, + "learning_rate": 1.3766891560962191e-05, + "loss": 1.628, + "step": 65823 + }, + { + "epoch": 0.79, + "grad_norm": 5.385438634849839, + "learning_rate": 1.376635040721379e-05, + "loss": 1.476, + "step": 65826 + }, + { + "epoch": 0.79, + "grad_norm": 13.724727117703463, + "learning_rate": 1.3765809240612193e-05, + "loss": 1.5715, + "step": 65829 + }, + { + "epoch": 0.79, + "grad_norm": 4.139391279623247, + "learning_rate": 1.3765268061159244e-05, + "loss": 1.393, + "step": 65832 + }, + { + "epoch": 0.79, + "grad_norm": 14.612917226854524, + "learning_rate": 1.376472686885679e-05, + "loss": 1.3787, + "step": 65835 + }, + { + "epoch": 0.79, + "grad_norm": 39.96367579035414, + "learning_rate": 1.3764185663706681e-05, + "loss": 1.5571, + "step": 65838 + }, + { + "epoch": 0.79, + "grad_norm": 18.956945004441177, + "learning_rate": 1.3763644445710761e-05, + "loss": 1.6181, + "step": 65841 + }, + { + "epoch": 0.79, + "grad_norm": 11.107417364105286, + "learning_rate": 1.3763103214870882e-05, + "loss": 1.2925, + "step": 65844 + }, + { + "epoch": 0.79, + "grad_norm": 25.92550893375924, + "learning_rate": 1.3762561971188881e-05, + "loss": 1.1859, + "step": 65847 + }, + { + "epoch": 0.79, + "grad_norm": 13.841595139092233, + "learning_rate": 1.3762020714666617e-05, + "loss": 1.3819, + "step": 65850 + }, + { + "epoch": 0.79, + "grad_norm": 7.499105353332877, + "learning_rate": 1.376147944530593e-05, + "loss": 1.3057, + "step": 65853 + }, + { + "epoch": 0.79, + "grad_norm": 6.9269795386933435, + "learning_rate": 1.3760938163108665e-05, + "loss": 1.1937, + "step": 65856 + }, + { + "epoch": 0.79, + "grad_norm": 9.789208342814382, + "learning_rate": 1.3760396868076675e-05, + "loss": 1.3974, + "step": 65859 + }, + { + "epoch": 0.79, + "grad_norm": 37.857739134451315, + "learning_rate": 1.3759855560211805e-05, + "loss": 1.4171, + "step": 65862 + }, + { + "epoch": 0.79, + "grad_norm": 29.412451618967353, + "learning_rate": 1.3759314239515903e-05, + "loss": 1.284, + "step": 65865 + }, + { + "epoch": 0.79, + "grad_norm": 10.862884899255711, + "learning_rate": 1.3758772905990814e-05, + "loss": 1.4057, + "step": 65868 + }, + { + "epoch": 0.79, + "grad_norm": 22.146183584413357, + "learning_rate": 1.3758231559638388e-05, + "loss": 1.3267, + "step": 65871 + }, + { + "epoch": 0.79, + "grad_norm": 11.46747435176242, + "learning_rate": 1.3757690200460472e-05, + "loss": 1.0402, + "step": 65874 + }, + { + "epoch": 0.79, + "grad_norm": 5.5637180197687535, + "learning_rate": 1.3757148828458911e-05, + "loss": 1.1564, + "step": 65877 + }, + { + "epoch": 0.79, + "grad_norm": 10.465059205150528, + "learning_rate": 1.3756607443635556e-05, + "loss": 1.4279, + "step": 65880 + }, + { + "epoch": 0.79, + "grad_norm": 9.104569667036158, + "learning_rate": 1.3756066045992253e-05, + "loss": 1.2334, + "step": 65883 + }, + { + "epoch": 0.79, + "grad_norm": 20.66027719248454, + "learning_rate": 1.375552463553085e-05, + "loss": 0.9925, + "step": 65886 + }, + { + "epoch": 0.79, + "grad_norm": 14.049471658476065, + "learning_rate": 1.3754983212253192e-05, + "loss": 1.2901, + "step": 65889 + }, + { + "epoch": 0.79, + "grad_norm": 13.449486955248354, + "learning_rate": 1.375444177616113e-05, + "loss": 1.522, + "step": 65892 + }, + { + "epoch": 0.79, + "grad_norm": 5.670366030586704, + "learning_rate": 1.3753900327256511e-05, + "loss": 1.4998, + "step": 65895 + }, + { + "epoch": 0.79, + "grad_norm": 25.05634688332825, + "learning_rate": 1.3753358865541183e-05, + "loss": 1.6253, + "step": 65898 + }, + { + "epoch": 0.79, + "grad_norm": 3.002139538238591, + "learning_rate": 1.375281739101699e-05, + "loss": 1.0947, + "step": 65901 + }, + { + "epoch": 0.79, + "grad_norm": 32.68993117443327, + "learning_rate": 1.3752275903685786e-05, + "loss": 1.3432, + "step": 65904 + }, + { + "epoch": 0.79, + "grad_norm": 13.646106719723509, + "learning_rate": 1.3751734403549416e-05, + "loss": 1.3965, + "step": 65907 + }, + { + "epoch": 0.79, + "grad_norm": 22.084122377555296, + "learning_rate": 1.3751192890609724e-05, + "loss": 1.4199, + "step": 65910 + }, + { + "epoch": 0.79, + "grad_norm": 17.151938526631465, + "learning_rate": 1.3750651364868566e-05, + "loss": 1.4348, + "step": 65913 + }, + { + "epoch": 0.79, + "grad_norm": 22.46826520900208, + "learning_rate": 1.3750109826327785e-05, + "loss": 1.3467, + "step": 65916 + }, + { + "epoch": 0.79, + "grad_norm": 14.85554206232255, + "learning_rate": 1.374956827498923e-05, + "loss": 1.4527, + "step": 65919 + }, + { + "epoch": 0.79, + "grad_norm": 14.579017220521857, + "learning_rate": 1.3749026710854745e-05, + "loss": 1.2166, + "step": 65922 + }, + { + "epoch": 0.79, + "grad_norm": 7.652536251677295, + "learning_rate": 1.3748485133926188e-05, + "loss": 1.021, + "step": 65925 + }, + { + "epoch": 0.79, + "grad_norm": 12.182724165267096, + "learning_rate": 1.3747943544205398e-05, + "loss": 1.3554, + "step": 65928 + }, + { + "epoch": 0.79, + "grad_norm": 21.532835960800416, + "learning_rate": 1.3747401941694227e-05, + "loss": 1.2478, + "step": 65931 + }, + { + "epoch": 0.79, + "grad_norm": 23.548164566999628, + "learning_rate": 1.3746860326394525e-05, + "loss": 1.4837, + "step": 65934 + }, + { + "epoch": 0.79, + "grad_norm": 13.077084510812217, + "learning_rate": 1.3746318698308136e-05, + "loss": 1.4233, + "step": 65937 + }, + { + "epoch": 0.79, + "grad_norm": 24.264566033053825, + "learning_rate": 1.3745777057436913e-05, + "loss": 1.5464, + "step": 65940 + }, + { + "epoch": 0.79, + "grad_norm": 15.321724378064896, + "learning_rate": 1.3745235403782699e-05, + "loss": 1.3519, + "step": 65943 + }, + { + "epoch": 0.79, + "grad_norm": 36.52411004963562, + "learning_rate": 1.3744693737347347e-05, + "loss": 1.472, + "step": 65946 + }, + { + "epoch": 0.79, + "grad_norm": 20.645047767484684, + "learning_rate": 1.3744152058132706e-05, + "loss": 1.5087, + "step": 65949 + }, + { + "epoch": 0.79, + "grad_norm": 50.22734030015934, + "learning_rate": 1.3743610366140622e-05, + "loss": 1.4644, + "step": 65952 + }, + { + "epoch": 0.79, + "grad_norm": 16.475430294392588, + "learning_rate": 1.3743068661372942e-05, + "loss": 1.5115, + "step": 65955 + }, + { + "epoch": 0.79, + "grad_norm": 46.455539758136005, + "learning_rate": 1.374252694383152e-05, + "loss": 1.1852, + "step": 65958 + }, + { + "epoch": 0.79, + "grad_norm": 25.615276051472286, + "learning_rate": 1.37419852135182e-05, + "loss": 1.5727, + "step": 65961 + }, + { + "epoch": 0.79, + "grad_norm": 8.73665762147774, + "learning_rate": 1.3741443470434833e-05, + "loss": 1.3263, + "step": 65964 + }, + { + "epoch": 0.79, + "grad_norm": 20.194622682088262, + "learning_rate": 1.3740901714583266e-05, + "loss": 1.4081, + "step": 65967 + }, + { + "epoch": 0.79, + "grad_norm": 8.31297275635282, + "learning_rate": 1.374035994596535e-05, + "loss": 1.0326, + "step": 65970 + }, + { + "epoch": 0.79, + "grad_norm": 9.606600061065594, + "learning_rate": 1.3739818164582933e-05, + "loss": 1.1907, + "step": 65973 + }, + { + "epoch": 0.79, + "grad_norm": 6.415047558435394, + "learning_rate": 1.3739276370437859e-05, + "loss": 1.3065, + "step": 65976 + }, + { + "epoch": 0.79, + "grad_norm": 11.530120894773413, + "learning_rate": 1.373873456353199e-05, + "loss": 1.4855, + "step": 65979 + }, + { + "epoch": 0.79, + "grad_norm": 28.0762734281196, + "learning_rate": 1.373819274386716e-05, + "loss": 1.4264, + "step": 65982 + }, + { + "epoch": 0.79, + "grad_norm": 8.057689621953505, + "learning_rate": 1.3737650911445226e-05, + "loss": 1.509, + "step": 65985 + }, + { + "epoch": 0.79, + "grad_norm": 10.84359351972258, + "learning_rate": 1.3737109066268037e-05, + "loss": 1.4794, + "step": 65988 + }, + { + "epoch": 0.79, + "grad_norm": 50.02046652713143, + "learning_rate": 1.3736567208337441e-05, + "loss": 1.1691, + "step": 65991 + }, + { + "epoch": 0.79, + "grad_norm": 10.979530675617863, + "learning_rate": 1.3736025337655287e-05, + "loss": 1.4097, + "step": 65994 + }, + { + "epoch": 0.79, + "grad_norm": 5.039120068638647, + "learning_rate": 1.3735483454223421e-05, + "loss": 1.1821, + "step": 65997 + }, + { + "epoch": 0.79, + "grad_norm": 4.731918578431722, + "learning_rate": 1.3734941558043702e-05, + "loss": 1.2244, + "step": 66000 + }, + { + "epoch": 0.79, + "grad_norm": 40.12998538406794, + "learning_rate": 1.3734399649117967e-05, + "loss": 1.5462, + "step": 66003 + }, + { + "epoch": 0.79, + "grad_norm": 8.00191973712989, + "learning_rate": 1.3733857727448072e-05, + "loss": 1.3837, + "step": 66006 + }, + { + "epoch": 0.79, + "grad_norm": 15.488516245428986, + "learning_rate": 1.3733315793035866e-05, + "loss": 1.2656, + "step": 66009 + }, + { + "epoch": 0.79, + "grad_norm": 11.130819249049267, + "learning_rate": 1.3732773845883198e-05, + "loss": 1.4217, + "step": 66012 + }, + { + "epoch": 0.79, + "grad_norm": 12.472036424814128, + "learning_rate": 1.373223188599192e-05, + "loss": 1.3171, + "step": 66015 + }, + { + "epoch": 0.79, + "grad_norm": 4.803674061264886, + "learning_rate": 1.3731689913363875e-05, + "loss": 1.1918, + "step": 66018 + }, + { + "epoch": 0.79, + "grad_norm": 13.771492574156685, + "learning_rate": 1.3731147928000918e-05, + "loss": 1.4016, + "step": 66021 + }, + { + "epoch": 0.79, + "grad_norm": 11.715971141334183, + "learning_rate": 1.3730605929904898e-05, + "loss": 1.3092, + "step": 66024 + }, + { + "epoch": 0.79, + "grad_norm": 10.156045125724127, + "learning_rate": 1.3730063919077665e-05, + "loss": 1.4399, + "step": 66027 + }, + { + "epoch": 0.79, + "grad_norm": 4.293831862143987, + "learning_rate": 1.3729521895521062e-05, + "loss": 1.6582, + "step": 66030 + }, + { + "epoch": 0.79, + "grad_norm": 1.9236266723563964, + "learning_rate": 1.372897985923695e-05, + "loss": 1.258, + "step": 66033 + }, + { + "epoch": 0.79, + "grad_norm": 11.101472353462642, + "learning_rate": 1.372843781022717e-05, + "loss": 1.2422, + "step": 66036 + }, + { + "epoch": 0.79, + "grad_norm": 22.852443337700585, + "learning_rate": 1.3727895748493573e-05, + "loss": 1.2123, + "step": 66039 + }, + { + "epoch": 0.79, + "grad_norm": 6.950332848346592, + "learning_rate": 1.3727353674038013e-05, + "loss": 1.3149, + "step": 66042 + }, + { + "epoch": 0.79, + "grad_norm": 19.1693324923257, + "learning_rate": 1.3726811586862338e-05, + "loss": 1.6061, + "step": 66045 + }, + { + "epoch": 0.79, + "grad_norm": 10.673614669080099, + "learning_rate": 1.3726269486968397e-05, + "loss": 1.2736, + "step": 66048 + }, + { + "epoch": 0.79, + "grad_norm": 36.79008010032276, + "learning_rate": 1.372572737435804e-05, + "loss": 1.7222, + "step": 66051 + }, + { + "epoch": 0.79, + "grad_norm": 7.443351275180341, + "learning_rate": 1.3725185249033119e-05, + "loss": 1.3069, + "step": 66054 + }, + { + "epoch": 0.79, + "grad_norm": 17.48199184658329, + "learning_rate": 1.3724643110995482e-05, + "loss": 1.3441, + "step": 66057 + }, + { + "epoch": 0.79, + "grad_norm": 20.29522716200975, + "learning_rate": 1.3724100960246978e-05, + "loss": 1.383, + "step": 66060 + }, + { + "epoch": 0.79, + "grad_norm": 10.24338914328549, + "learning_rate": 1.3723558796789459e-05, + "loss": 1.4048, + "step": 66063 + }, + { + "epoch": 0.79, + "grad_norm": 6.023696252293837, + "learning_rate": 1.3723016620624777e-05, + "loss": 0.9752, + "step": 66066 + }, + { + "epoch": 0.79, + "grad_norm": 4.079815522228471, + "learning_rate": 1.372247443175478e-05, + "loss": 1.3379, + "step": 66069 + }, + { + "epoch": 0.79, + "grad_norm": 15.377871720168088, + "learning_rate": 1.3721932230181314e-05, + "loss": 1.2249, + "step": 66072 + }, + { + "epoch": 0.79, + "grad_norm": 13.363734322698251, + "learning_rate": 1.372139001590624e-05, + "loss": 1.4604, + "step": 66075 + }, + { + "epoch": 0.79, + "grad_norm": 4.400504209717464, + "learning_rate": 1.37208477889314e-05, + "loss": 1.6188, + "step": 66078 + }, + { + "epoch": 0.79, + "grad_norm": 12.16769073579498, + "learning_rate": 1.3720305549258645e-05, + "loss": 1.2588, + "step": 66081 + }, + { + "epoch": 0.79, + "grad_norm": 5.111310675188535, + "learning_rate": 1.371976329688983e-05, + "loss": 0.7322, + "step": 66084 + }, + { + "epoch": 0.79, + "grad_norm": 21.86342012751177, + "learning_rate": 1.3719221031826804e-05, + "loss": 1.5205, + "step": 66087 + }, + { + "epoch": 0.79, + "grad_norm": 78.8712950585054, + "learning_rate": 1.3718678754071411e-05, + "loss": 1.2293, + "step": 66090 + }, + { + "epoch": 0.79, + "grad_norm": 17.715859196066287, + "learning_rate": 1.3718136463625508e-05, + "loss": 1.4425, + "step": 66093 + }, + { + "epoch": 0.79, + "grad_norm": 19.904290786860468, + "learning_rate": 1.371759416049095e-05, + "loss": 1.4168, + "step": 66096 + }, + { + "epoch": 0.79, + "grad_norm": 31.885293623116276, + "learning_rate": 1.3717051844669578e-05, + "loss": 1.2965, + "step": 66099 + }, + { + "epoch": 0.79, + "grad_norm": 6.067708935258289, + "learning_rate": 1.3716509516163246e-05, + "loss": 1.3451, + "step": 66102 + }, + { + "epoch": 0.79, + "grad_norm": 12.921804258888672, + "learning_rate": 1.3715967174973805e-05, + "loss": 1.1285, + "step": 66105 + }, + { + "epoch": 0.79, + "grad_norm": 25.863977136552815, + "learning_rate": 1.3715424821103112e-05, + "loss": 1.1499, + "step": 66108 + }, + { + "epoch": 0.79, + "grad_norm": 5.151260176561217, + "learning_rate": 1.3714882454553006e-05, + "loss": 1.5135, + "step": 66111 + }, + { + "epoch": 0.8, + "grad_norm": 15.517926467219702, + "learning_rate": 1.371434007532535e-05, + "loss": 1.5262, + "step": 66114 + }, + { + "epoch": 0.8, + "grad_norm": 4.49842751330781, + "learning_rate": 1.3713797683421985e-05, + "loss": 1.0393, + "step": 66117 + }, + { + "epoch": 0.8, + "grad_norm": 7.888324043630009, + "learning_rate": 1.3713255278844767e-05, + "loss": 1.2081, + "step": 66120 + }, + { + "epoch": 0.8, + "grad_norm": 4.4344229250347995, + "learning_rate": 1.3712712861595547e-05, + "loss": 1.0565, + "step": 66123 + }, + { + "epoch": 0.8, + "grad_norm": 21.821031027447514, + "learning_rate": 1.3712170431676174e-05, + "loss": 1.417, + "step": 66126 + }, + { + "epoch": 0.8, + "grad_norm": 40.01974440742804, + "learning_rate": 1.3711627989088506e-05, + "loss": 1.7482, + "step": 66129 + }, + { + "epoch": 0.8, + "grad_norm": 10.030648900129059, + "learning_rate": 1.3711085533834382e-05, + "loss": 1.3011, + "step": 66132 + }, + { + "epoch": 0.8, + "grad_norm": 23.847377710559897, + "learning_rate": 1.3710543065915663e-05, + "loss": 1.3279, + "step": 66135 + }, + { + "epoch": 0.8, + "grad_norm": 4.83156108653753, + "learning_rate": 1.3710000585334196e-05, + "loss": 1.2765, + "step": 66138 + }, + { + "epoch": 0.8, + "grad_norm": 20.996045722096177, + "learning_rate": 1.3709458092091834e-05, + "loss": 1.3682, + "step": 66141 + }, + { + "epoch": 0.8, + "grad_norm": 5.806338935123052, + "learning_rate": 1.3708915586190426e-05, + "loss": 1.5871, + "step": 66144 + }, + { + "epoch": 0.8, + "grad_norm": 10.24344417308976, + "learning_rate": 1.3708373067631824e-05, + "loss": 1.4129, + "step": 66147 + }, + { + "epoch": 0.8, + "grad_norm": 3.1166515569046638, + "learning_rate": 1.3707830536417885e-05, + "loss": 1.5201, + "step": 66150 + }, + { + "epoch": 0.8, + "grad_norm": 2.291910993774523, + "learning_rate": 1.3707287992550453e-05, + "loss": 1.3585, + "step": 66153 + }, + { + "epoch": 0.8, + "grad_norm": 25.59319994913, + "learning_rate": 1.3706745436031384e-05, + "loss": 1.1942, + "step": 66156 + }, + { + "epoch": 0.8, + "grad_norm": 10.926898317916804, + "learning_rate": 1.3706202866862529e-05, + "loss": 1.2775, + "step": 66159 + }, + { + "epoch": 0.8, + "grad_norm": 39.670819842132495, + "learning_rate": 1.3705660285045734e-05, + "loss": 1.1667, + "step": 66162 + }, + { + "epoch": 0.8, + "grad_norm": 10.207160841579595, + "learning_rate": 1.3705117690582857e-05, + "loss": 1.2089, + "step": 66165 + }, + { + "epoch": 0.8, + "grad_norm": 4.562403315237388, + "learning_rate": 1.3704575083475748e-05, + "loss": 1.261, + "step": 66168 + }, + { + "epoch": 0.8, + "grad_norm": 8.645183662832599, + "learning_rate": 1.370403246372626e-05, + "loss": 1.8592, + "step": 66171 + }, + { + "epoch": 0.8, + "grad_norm": 10.309767671632558, + "learning_rate": 1.3703489831336243e-05, + "loss": 1.3766, + "step": 66174 + }, + { + "epoch": 0.8, + "grad_norm": 43.492566851518156, + "learning_rate": 1.3702947186307549e-05, + "loss": 1.1264, + "step": 66177 + }, + { + "epoch": 0.8, + "grad_norm": 8.336215305186883, + "learning_rate": 1.3702404528642028e-05, + "loss": 1.7599, + "step": 66180 + }, + { + "epoch": 0.8, + "grad_norm": 12.427466555696252, + "learning_rate": 1.3701861858341538e-05, + "loss": 1.5319, + "step": 66183 + }, + { + "epoch": 0.8, + "grad_norm": 5.668123552122155, + "learning_rate": 1.3701319175407925e-05, + "loss": 1.3865, + "step": 66186 + }, + { + "epoch": 0.8, + "grad_norm": 15.234249802702314, + "learning_rate": 1.3700776479843044e-05, + "loss": 1.468, + "step": 66189 + }, + { + "epoch": 0.8, + "grad_norm": 43.633856513562264, + "learning_rate": 1.3700233771648743e-05, + "loss": 1.4013, + "step": 66192 + }, + { + "epoch": 0.8, + "grad_norm": 8.290381375646312, + "learning_rate": 1.369969105082688e-05, + "loss": 1.1991, + "step": 66195 + }, + { + "epoch": 0.8, + "grad_norm": 4.766633658110646, + "learning_rate": 1.36991483173793e-05, + "loss": 1.3078, + "step": 66198 + }, + { + "epoch": 0.8, + "grad_norm": 19.79590248815495, + "learning_rate": 1.3698605571307861e-05, + "loss": 1.4383, + "step": 66201 + }, + { + "epoch": 0.8, + "grad_norm": 24.13178203711139, + "learning_rate": 1.3698062812614417e-05, + "loss": 1.7948, + "step": 66204 + }, + { + "epoch": 0.8, + "grad_norm": 8.18285590444838, + "learning_rate": 1.3697520041300812e-05, + "loss": 1.209, + "step": 66207 + }, + { + "epoch": 0.8, + "grad_norm": 12.45314234439774, + "learning_rate": 1.3696977257368906e-05, + "loss": 1.6176, + "step": 66210 + }, + { + "epoch": 0.8, + "grad_norm": 8.920028295936863, + "learning_rate": 1.3696434460820546e-05, + "loss": 1.526, + "step": 66213 + }, + { + "epoch": 0.8, + "grad_norm": 3.5382152873153916, + "learning_rate": 1.3695891651657589e-05, + "loss": 1.8187, + "step": 66216 + }, + { + "epoch": 0.8, + "grad_norm": 13.220391591143233, + "learning_rate": 1.3695348829881882e-05, + "loss": 1.397, + "step": 66219 + }, + { + "epoch": 0.8, + "grad_norm": 12.52441854304524, + "learning_rate": 1.3694805995495283e-05, + "loss": 1.4183, + "step": 66222 + }, + { + "epoch": 0.8, + "grad_norm": 15.112537681472189, + "learning_rate": 1.3694263148499642e-05, + "loss": 1.4881, + "step": 66225 + }, + { + "epoch": 0.8, + "grad_norm": 9.691787591712602, + "learning_rate": 1.369372028889681e-05, + "loss": 1.1831, + "step": 66228 + }, + { + "epoch": 0.8, + "grad_norm": 7.482964817081894, + "learning_rate": 1.3693177416688641e-05, + "loss": 1.2331, + "step": 66231 + }, + { + "epoch": 0.8, + "grad_norm": 90.17264282862585, + "learning_rate": 1.369263453187699e-05, + "loss": 1.3099, + "step": 66234 + }, + { + "epoch": 0.8, + "grad_norm": 18.65995666864214, + "learning_rate": 1.3692091634463706e-05, + "loss": 1.2886, + "step": 66237 + }, + { + "epoch": 0.8, + "grad_norm": 12.143757273977311, + "learning_rate": 1.3691548724450643e-05, + "loss": 1.7103, + "step": 66240 + }, + { + "epoch": 0.8, + "grad_norm": 13.726676145369005, + "learning_rate": 1.3691005801839654e-05, + "loss": 1.2395, + "step": 66243 + }, + { + "epoch": 0.8, + "grad_norm": 54.53944751241216, + "learning_rate": 1.3690462866632592e-05, + "loss": 1.3839, + "step": 66246 + }, + { + "epoch": 0.8, + "grad_norm": 13.708164298476754, + "learning_rate": 1.3689919918831312e-05, + "loss": 1.5525, + "step": 66249 + }, + { + "epoch": 0.8, + "grad_norm": 4.134589981760325, + "learning_rate": 1.3689376958437659e-05, + "loss": 1.2419, + "step": 66252 + }, + { + "epoch": 0.8, + "grad_norm": 10.077329181081073, + "learning_rate": 1.3688833985453493e-05, + "loss": 1.576, + "step": 66255 + }, + { + "epoch": 0.8, + "grad_norm": 16.908932648793243, + "learning_rate": 1.368829099988067e-05, + "loss": 1.4237, + "step": 66258 + }, + { + "epoch": 0.8, + "grad_norm": 2.7327162956440496, + "learning_rate": 1.3687748001721031e-05, + "loss": 1.3155, + "step": 66261 + }, + { + "epoch": 0.8, + "grad_norm": 4.265735789934977, + "learning_rate": 1.3687204990976441e-05, + "loss": 1.529, + "step": 66264 + }, + { + "epoch": 0.8, + "grad_norm": 6.093600511568659, + "learning_rate": 1.3686661967648749e-05, + "loss": 1.2473, + "step": 66267 + }, + { + "epoch": 0.8, + "grad_norm": 6.616963602794991, + "learning_rate": 1.3686118931739807e-05, + "loss": 0.974, + "step": 66270 + }, + { + "epoch": 0.8, + "grad_norm": 15.11145859806039, + "learning_rate": 1.3685575883251464e-05, + "loss": 1.5944, + "step": 66273 + }, + { + "epoch": 0.8, + "grad_norm": 12.698719851765468, + "learning_rate": 1.3685032822185584e-05, + "loss": 1.3851, + "step": 66276 + }, + { + "epoch": 0.8, + "grad_norm": 16.263481362067505, + "learning_rate": 1.3684489748544014e-05, + "loss": 1.2306, + "step": 66279 + }, + { + "epoch": 0.8, + "grad_norm": 22.78199805782858, + "learning_rate": 1.3683946662328604e-05, + "loss": 1.7119, + "step": 66282 + }, + { + "epoch": 0.8, + "grad_norm": 40.517647239279995, + "learning_rate": 1.3683403563541213e-05, + "loss": 1.2634, + "step": 66285 + }, + { + "epoch": 0.8, + "grad_norm": 4.704358615019017, + "learning_rate": 1.3682860452183695e-05, + "loss": 1.5239, + "step": 66288 + }, + { + "epoch": 0.8, + "grad_norm": 6.784819367888241, + "learning_rate": 1.3682317328257897e-05, + "loss": 1.1684, + "step": 66291 + }, + { + "epoch": 0.8, + "grad_norm": 20.442396370263026, + "learning_rate": 1.3681774191765678e-05, + "loss": 1.4345, + "step": 66294 + }, + { + "epoch": 0.8, + "grad_norm": 5.199984251531991, + "learning_rate": 1.3681231042708889e-05, + "loss": 1.5687, + "step": 66297 + }, + { + "epoch": 0.8, + "grad_norm": 11.003659936313195, + "learning_rate": 1.3680687881089385e-05, + "loss": 1.3546, + "step": 66300 + }, + { + "epoch": 0.8, + "grad_norm": 5.363149574541334, + "learning_rate": 1.368014470690902e-05, + "loss": 1.3242, + "step": 66303 + }, + { + "epoch": 0.8, + "grad_norm": 10.849659624269972, + "learning_rate": 1.3679601520169644e-05, + "loss": 1.4265, + "step": 66306 + }, + { + "epoch": 0.8, + "grad_norm": 4.025511462858519, + "learning_rate": 1.3679058320873115e-05, + "loss": 1.4154, + "step": 66309 + }, + { + "epoch": 0.8, + "grad_norm": 6.671288924896419, + "learning_rate": 1.3678515109021286e-05, + "loss": 1.1614, + "step": 66312 + }, + { + "epoch": 0.8, + "grad_norm": 8.694067237288836, + "learning_rate": 1.3677971884616008e-05, + "loss": 1.8082, + "step": 66315 + }, + { + "epoch": 0.8, + "grad_norm": 11.786453086384025, + "learning_rate": 1.3677428647659139e-05, + "loss": 1.3897, + "step": 66318 + }, + { + "epoch": 0.8, + "grad_norm": 8.289102746758946, + "learning_rate": 1.3676885398152528e-05, + "loss": 1.4459, + "step": 66321 + }, + { + "epoch": 0.8, + "grad_norm": 2.115120631893389, + "learning_rate": 1.3676342136098035e-05, + "loss": 1.3888, + "step": 66324 + }, + { + "epoch": 0.8, + "grad_norm": 14.343936732995738, + "learning_rate": 1.3675798861497507e-05, + "loss": 1.5068, + "step": 66327 + }, + { + "epoch": 0.8, + "grad_norm": 9.732057925290887, + "learning_rate": 1.3675255574352804e-05, + "loss": 1.3046, + "step": 66330 + }, + { + "epoch": 0.8, + "grad_norm": 9.501039209282956, + "learning_rate": 1.3674712274665777e-05, + "loss": 1.0483, + "step": 66333 + }, + { + "epoch": 0.8, + "grad_norm": 10.39822112030194, + "learning_rate": 1.3674168962438279e-05, + "loss": 1.0431, + "step": 66336 + }, + { + "epoch": 0.8, + "grad_norm": 5.610477456016652, + "learning_rate": 1.3673625637672168e-05, + "loss": 1.6618, + "step": 66339 + }, + { + "epoch": 0.8, + "grad_norm": 8.961087867146372, + "learning_rate": 1.3673082300369296e-05, + "loss": 1.2869, + "step": 66342 + }, + { + "epoch": 0.8, + "grad_norm": 8.025545589488615, + "learning_rate": 1.3672538950531516e-05, + "loss": 1.3444, + "step": 66345 + }, + { + "epoch": 0.8, + "grad_norm": 8.865918257711764, + "learning_rate": 1.367199558816068e-05, + "loss": 1.4726, + "step": 66348 + }, + { + "epoch": 0.8, + "grad_norm": 4.206052160430535, + "learning_rate": 1.3671452213258652e-05, + "loss": 1.0858, + "step": 66351 + }, + { + "epoch": 0.8, + "grad_norm": 10.024171151168785, + "learning_rate": 1.3670908825827276e-05, + "loss": 1.4346, + "step": 66354 + }, + { + "epoch": 0.8, + "grad_norm": 18.135828885036588, + "learning_rate": 1.3670365425868413e-05, + "loss": 1.2933, + "step": 66357 + }, + { + "epoch": 0.8, + "grad_norm": 8.357786813883063, + "learning_rate": 1.3669822013383911e-05, + "loss": 1.2803, + "step": 66360 + }, + { + "epoch": 0.8, + "grad_norm": 3.7206285659835876, + "learning_rate": 1.3669278588375634e-05, + "loss": 1.0898, + "step": 66363 + }, + { + "epoch": 0.8, + "grad_norm": 8.41774044033275, + "learning_rate": 1.3668735150845427e-05, + "loss": 1.0708, + "step": 66366 + }, + { + "epoch": 0.8, + "grad_norm": 10.413363451634673, + "learning_rate": 1.3668191700795148e-05, + "loss": 1.1407, + "step": 66369 + }, + { + "epoch": 0.8, + "grad_norm": 8.39807386757084, + "learning_rate": 1.3667648238226653e-05, + "loss": 1.5919, + "step": 66372 + }, + { + "epoch": 0.8, + "grad_norm": 9.920728992019967, + "learning_rate": 1.3667104763141797e-05, + "loss": 1.556, + "step": 66375 + }, + { + "epoch": 0.8, + "grad_norm": 9.669516377791311, + "learning_rate": 1.3666561275542432e-05, + "loss": 1.1823, + "step": 66378 + }, + { + "epoch": 0.8, + "grad_norm": 13.150497660140532, + "learning_rate": 1.3666017775430414e-05, + "loss": 1.3518, + "step": 66381 + }, + { + "epoch": 0.8, + "grad_norm": 26.699175245015766, + "learning_rate": 1.3665474262807598e-05, + "loss": 1.3253, + "step": 66384 + }, + { + "epoch": 0.8, + "grad_norm": 6.946043468573246, + "learning_rate": 1.366493073767584e-05, + "loss": 1.2133, + "step": 66387 + }, + { + "epoch": 0.8, + "grad_norm": 3.148363360789712, + "learning_rate": 1.366438720003699e-05, + "loss": 1.2055, + "step": 66390 + }, + { + "epoch": 0.8, + "grad_norm": 9.516124409928155, + "learning_rate": 1.3663843649892909e-05, + "loss": 1.333, + "step": 66393 + }, + { + "epoch": 0.8, + "grad_norm": 8.325815404735572, + "learning_rate": 1.3663300087245447e-05, + "loss": 1.2057, + "step": 66396 + }, + { + "epoch": 0.8, + "grad_norm": 10.522065164419878, + "learning_rate": 1.3662756512096467e-05, + "loss": 1.4351, + "step": 66399 + }, + { + "epoch": 0.8, + "grad_norm": 46.317040168884375, + "learning_rate": 1.366221292444781e-05, + "loss": 1.4095, + "step": 66402 + }, + { + "epoch": 0.8, + "grad_norm": 7.782276739689167, + "learning_rate": 1.3661669324301345e-05, + "loss": 1.3364, + "step": 66405 + }, + { + "epoch": 0.8, + "grad_norm": 11.673833268864106, + "learning_rate": 1.3661125711658922e-05, + "loss": 1.2858, + "step": 66408 + }, + { + "epoch": 0.8, + "grad_norm": 5.839174071866697, + "learning_rate": 1.3660582086522393e-05, + "loss": 1.4504, + "step": 66411 + }, + { + "epoch": 0.8, + "grad_norm": 10.95694845274925, + "learning_rate": 1.3660038448893618e-05, + "loss": 1.1489, + "step": 66414 + }, + { + "epoch": 0.8, + "grad_norm": 13.456882273351937, + "learning_rate": 1.3659494798774448e-05, + "loss": 1.3915, + "step": 66417 + }, + { + "epoch": 0.8, + "grad_norm": 5.109538984542986, + "learning_rate": 1.3658951136166743e-05, + "loss": 1.4392, + "step": 66420 + }, + { + "epoch": 0.8, + "grad_norm": 16.966877971303724, + "learning_rate": 1.3658407461072351e-05, + "loss": 1.24, + "step": 66423 + }, + { + "epoch": 0.8, + "grad_norm": 2.8550923379385202, + "learning_rate": 1.3657863773493136e-05, + "loss": 0.9408, + "step": 66426 + }, + { + "epoch": 0.8, + "grad_norm": 4.008659212903686, + "learning_rate": 1.365732007343095e-05, + "loss": 1.3063, + "step": 66429 + }, + { + "epoch": 0.8, + "grad_norm": 31.64636978113605, + "learning_rate": 1.3656776360887647e-05, + "loss": 1.4131, + "step": 66432 + }, + { + "epoch": 0.8, + "grad_norm": 22.830281189181754, + "learning_rate": 1.3656232635865083e-05, + "loss": 1.4261, + "step": 66435 + }, + { + "epoch": 0.8, + "grad_norm": 3.124245696230815, + "learning_rate": 1.3655688898365113e-05, + "loss": 1.3926, + "step": 66438 + }, + { + "epoch": 0.8, + "grad_norm": 10.223903666005201, + "learning_rate": 1.3655145148389597e-05, + "loss": 1.3978, + "step": 66441 + }, + { + "epoch": 0.8, + "grad_norm": 13.651747478385264, + "learning_rate": 1.3654601385940382e-05, + "loss": 1.4404, + "step": 66444 + }, + { + "epoch": 0.8, + "grad_norm": 5.738317161299628, + "learning_rate": 1.3654057611019333e-05, + "loss": 1.7309, + "step": 66447 + }, + { + "epoch": 0.8, + "grad_norm": 17.849533101612597, + "learning_rate": 1.3653513823628302e-05, + "loss": 1.1295, + "step": 66450 + }, + { + "epoch": 0.8, + "grad_norm": 13.436907741952686, + "learning_rate": 1.3652970023769143e-05, + "loss": 1.5822, + "step": 66453 + }, + { + "epoch": 0.8, + "grad_norm": 4.242134719904104, + "learning_rate": 1.365242621144371e-05, + "loss": 1.1579, + "step": 66456 + }, + { + "epoch": 0.8, + "grad_norm": 11.166271296090484, + "learning_rate": 1.3651882386653865e-05, + "loss": 1.2441, + "step": 66459 + }, + { + "epoch": 0.8, + "grad_norm": 65.25713504295793, + "learning_rate": 1.365133854940146e-05, + "loss": 1.5012, + "step": 66462 + }, + { + "epoch": 0.8, + "grad_norm": 7.370410711275364, + "learning_rate": 1.3650794699688354e-05, + "loss": 1.3535, + "step": 66465 + }, + { + "epoch": 0.8, + "grad_norm": 11.87765423721937, + "learning_rate": 1.3650250837516397e-05, + "loss": 1.5302, + "step": 66468 + }, + { + "epoch": 0.8, + "grad_norm": 4.383401729013524, + "learning_rate": 1.3649706962887453e-05, + "loss": 1.5247, + "step": 66471 + }, + { + "epoch": 0.8, + "grad_norm": 2.639354072012393, + "learning_rate": 1.364916307580337e-05, + "loss": 1.3796, + "step": 66474 + }, + { + "epoch": 0.8, + "grad_norm": 9.646998319056712, + "learning_rate": 1.364861917626601e-05, + "loss": 1.5274, + "step": 66477 + }, + { + "epoch": 0.8, + "grad_norm": 9.26190161196139, + "learning_rate": 1.3648075264277224e-05, + "loss": 1.3583, + "step": 66480 + }, + { + "epoch": 0.8, + "grad_norm": 7.7477706143909115, + "learning_rate": 1.3647531339838876e-05, + "loss": 1.162, + "step": 66483 + }, + { + "epoch": 0.8, + "grad_norm": 26.21775206217401, + "learning_rate": 1.3646987402952814e-05, + "loss": 1.6554, + "step": 66486 + }, + { + "epoch": 0.8, + "grad_norm": 26.752655501606036, + "learning_rate": 1.3646443453620898e-05, + "loss": 1.3411, + "step": 66489 + }, + { + "epoch": 0.8, + "grad_norm": 15.850002844489572, + "learning_rate": 1.3645899491844987e-05, + "loss": 1.0408, + "step": 66492 + }, + { + "epoch": 0.8, + "grad_norm": 15.521604548911508, + "learning_rate": 1.364535551762693e-05, + "loss": 1.3047, + "step": 66495 + }, + { + "epoch": 0.8, + "grad_norm": 10.252092060039795, + "learning_rate": 1.3644811530968587e-05, + "loss": 1.2297, + "step": 66498 + }, + { + "epoch": 0.8, + "grad_norm": 26.75660050365165, + "learning_rate": 1.3644267531871817e-05, + "loss": 1.1404, + "step": 66501 + }, + { + "epoch": 0.8, + "grad_norm": 25.061424546214923, + "learning_rate": 1.3643723520338476e-05, + "loss": 1.1328, + "step": 66504 + }, + { + "epoch": 0.8, + "grad_norm": 7.3731451811207025, + "learning_rate": 1.364317949637042e-05, + "loss": 1.7005, + "step": 66507 + }, + { + "epoch": 0.8, + "grad_norm": 5.110203726086432, + "learning_rate": 1.36426354599695e-05, + "loss": 1.5017, + "step": 66510 + }, + { + "epoch": 0.8, + "grad_norm": 70.29110344328652, + "learning_rate": 1.3642091411137583e-05, + "loss": 1.5083, + "step": 66513 + }, + { + "epoch": 0.8, + "grad_norm": 13.021613363974339, + "learning_rate": 1.3641547349876517e-05, + "loss": 1.388, + "step": 66516 + }, + { + "epoch": 0.8, + "grad_norm": 18.726339170047787, + "learning_rate": 1.364100327618816e-05, + "loss": 1.62, + "step": 66519 + }, + { + "epoch": 0.8, + "grad_norm": 26.567249772491536, + "learning_rate": 1.3640459190074372e-05, + "loss": 1.0715, + "step": 66522 + }, + { + "epoch": 0.8, + "grad_norm": 50.11051012908734, + "learning_rate": 1.3639915091537006e-05, + "loss": 1.4561, + "step": 66525 + }, + { + "epoch": 0.8, + "grad_norm": 20.641391379421037, + "learning_rate": 1.3639370980577923e-05, + "loss": 1.3977, + "step": 66528 + }, + { + "epoch": 0.8, + "grad_norm": 12.689230259528399, + "learning_rate": 1.3638826857198977e-05, + "loss": 1.2882, + "step": 66531 + }, + { + "epoch": 0.8, + "grad_norm": 14.859526608884813, + "learning_rate": 1.3638282721402027e-05, + "loss": 1.2699, + "step": 66534 + }, + { + "epoch": 0.8, + "grad_norm": 10.838288924183573, + "learning_rate": 1.3637738573188927e-05, + "loss": 1.4176, + "step": 66537 + }, + { + "epoch": 0.8, + "grad_norm": 18.833425375849863, + "learning_rate": 1.3637194412561536e-05, + "loss": 1.4802, + "step": 66540 + }, + { + "epoch": 0.8, + "grad_norm": 8.26492188142801, + "learning_rate": 1.3636650239521711e-05, + "loss": 1.8143, + "step": 66543 + }, + { + "epoch": 0.8, + "grad_norm": 13.824980644445823, + "learning_rate": 1.363610605407131e-05, + "loss": 1.3467, + "step": 66546 + }, + { + "epoch": 0.8, + "grad_norm": 12.261071005315797, + "learning_rate": 1.3635561856212184e-05, + "loss": 1.5236, + "step": 66549 + }, + { + "epoch": 0.8, + "grad_norm": 24.805625908540893, + "learning_rate": 1.3635017645946198e-05, + "loss": 1.2574, + "step": 66552 + }, + { + "epoch": 0.8, + "grad_norm": 16.23567528167082, + "learning_rate": 1.3634473423275209e-05, + "loss": 1.6344, + "step": 66555 + }, + { + "epoch": 0.8, + "grad_norm": 5.200748157582912, + "learning_rate": 1.3633929188201067e-05, + "loss": 1.1188, + "step": 66558 + }, + { + "epoch": 0.8, + "grad_norm": 12.099939603652098, + "learning_rate": 1.3633384940725635e-05, + "loss": 1.5043, + "step": 66561 + }, + { + "epoch": 0.8, + "grad_norm": 9.43343299956786, + "learning_rate": 1.363284068085077e-05, + "loss": 1.2911, + "step": 66564 + }, + { + "epoch": 0.8, + "grad_norm": 25.208621662426594, + "learning_rate": 1.3632296408578327e-05, + "loss": 1.5204, + "step": 66567 + }, + { + "epoch": 0.8, + "grad_norm": 24.31599556466382, + "learning_rate": 1.3631752123910165e-05, + "loss": 1.196, + "step": 66570 + }, + { + "epoch": 0.8, + "grad_norm": 14.67417039830943, + "learning_rate": 1.3631207826848144e-05, + "loss": 0.9515, + "step": 66573 + }, + { + "epoch": 0.8, + "grad_norm": 12.217317077534268, + "learning_rate": 1.3630663517394115e-05, + "loss": 1.3462, + "step": 66576 + }, + { + "epoch": 0.8, + "grad_norm": 10.425875010940665, + "learning_rate": 1.3630119195549938e-05, + "loss": 1.459, + "step": 66579 + }, + { + "epoch": 0.8, + "grad_norm": 54.89618030707657, + "learning_rate": 1.3629574861317473e-05, + "loss": 1.5732, + "step": 66582 + }, + { + "epoch": 0.8, + "grad_norm": 53.63048000984661, + "learning_rate": 1.3629030514698576e-05, + "loss": 1.4064, + "step": 66585 + }, + { + "epoch": 0.8, + "grad_norm": 21.152558712495992, + "learning_rate": 1.3628486155695108e-05, + "loss": 1.0383, + "step": 66588 + }, + { + "epoch": 0.8, + "grad_norm": 10.675844578686245, + "learning_rate": 1.3627941784308922e-05, + "loss": 1.3588, + "step": 66591 + }, + { + "epoch": 0.8, + "grad_norm": 14.60479241979965, + "learning_rate": 1.3627397400541877e-05, + "loss": 1.1946, + "step": 66594 + }, + { + "epoch": 0.8, + "grad_norm": 13.43520951331601, + "learning_rate": 1.362685300439583e-05, + "loss": 1.6217, + "step": 66597 + }, + { + "epoch": 0.8, + "grad_norm": 8.91336634395577, + "learning_rate": 1.3626308595872644e-05, + "loss": 1.1222, + "step": 66600 + }, + { + "epoch": 0.8, + "grad_norm": 19.130231081433088, + "learning_rate": 1.362576417497417e-05, + "loss": 1.7021, + "step": 66603 + }, + { + "epoch": 0.8, + "grad_norm": 15.923519583864932, + "learning_rate": 1.3625219741702269e-05, + "loss": 1.1066, + "step": 66606 + }, + { + "epoch": 0.8, + "grad_norm": 10.56252400122656, + "learning_rate": 1.36246752960588e-05, + "loss": 1.1219, + "step": 66609 + }, + { + "epoch": 0.8, + "grad_norm": 77.63999223055184, + "learning_rate": 1.3624130838045617e-05, + "loss": 1.5357, + "step": 66612 + }, + { + "epoch": 0.8, + "grad_norm": 12.928511191557591, + "learning_rate": 1.3623586367664586e-05, + "loss": 1.9531, + "step": 66615 + }, + { + "epoch": 0.8, + "grad_norm": 27.172759693668034, + "learning_rate": 1.3623041884917553e-05, + "loss": 1.7386, + "step": 66618 + }, + { + "epoch": 0.8, + "grad_norm": 8.286520046305306, + "learning_rate": 1.362249738980639e-05, + "loss": 1.2338, + "step": 66621 + }, + { + "epoch": 0.8, + "grad_norm": 14.269902520001738, + "learning_rate": 1.3621952882332945e-05, + "loss": 1.5117, + "step": 66624 + }, + { + "epoch": 0.8, + "grad_norm": 10.583639245766786, + "learning_rate": 1.362140836249908e-05, + "loss": 1.4864, + "step": 66627 + }, + { + "epoch": 0.8, + "grad_norm": 10.907304650398512, + "learning_rate": 1.3620863830306653e-05, + "loss": 1.5809, + "step": 66630 + }, + { + "epoch": 0.8, + "grad_norm": 6.371532484697817, + "learning_rate": 1.362031928575752e-05, + "loss": 1.3753, + "step": 66633 + }, + { + "epoch": 0.8, + "grad_norm": 56.35211088204678, + "learning_rate": 1.3619774728853543e-05, + "loss": 1.3442, + "step": 66636 + }, + { + "epoch": 0.8, + "grad_norm": 11.184384486866637, + "learning_rate": 1.3619230159596578e-05, + "loss": 1.4632, + "step": 66639 + }, + { + "epoch": 0.8, + "grad_norm": 8.732541190183097, + "learning_rate": 1.3618685577988486e-05, + "loss": 1.5505, + "step": 66642 + }, + { + "epoch": 0.8, + "grad_norm": 22.822345791962555, + "learning_rate": 1.3618140984031121e-05, + "loss": 1.3172, + "step": 66645 + }, + { + "epoch": 0.8, + "grad_norm": 14.353988942257972, + "learning_rate": 1.3617596377726346e-05, + "loss": 1.4034, + "step": 66648 + }, + { + "epoch": 0.8, + "grad_norm": 84.95425963236744, + "learning_rate": 1.3617051759076016e-05, + "loss": 1.2543, + "step": 66651 + }, + { + "epoch": 0.8, + "grad_norm": 44.127136029040734, + "learning_rate": 1.3616507128081993e-05, + "loss": 1.2804, + "step": 66654 + }, + { + "epoch": 0.8, + "grad_norm": 13.188518261458896, + "learning_rate": 1.361596248474613e-05, + "loss": 1.4737, + "step": 66657 + }, + { + "epoch": 0.8, + "grad_norm": 4.807295799705889, + "learning_rate": 1.361541782907029e-05, + "loss": 1.8785, + "step": 66660 + }, + { + "epoch": 0.8, + "grad_norm": 11.358109032111434, + "learning_rate": 1.3614873161056337e-05, + "loss": 1.6152, + "step": 66663 + }, + { + "epoch": 0.8, + "grad_norm": 2.6854313687557028, + "learning_rate": 1.3614328480706118e-05, + "loss": 1.3991, + "step": 66666 + }, + { + "epoch": 0.8, + "grad_norm": 5.110428661718126, + "learning_rate": 1.3613783788021502e-05, + "loss": 1.4615, + "step": 66669 + }, + { + "epoch": 0.8, + "grad_norm": 39.239409830794536, + "learning_rate": 1.3613239083004342e-05, + "loss": 1.5381, + "step": 66672 + }, + { + "epoch": 0.8, + "grad_norm": 26.481279325043634, + "learning_rate": 1.3612694365656498e-05, + "loss": 1.5113, + "step": 66675 + }, + { + "epoch": 0.8, + "grad_norm": 11.728827726478865, + "learning_rate": 1.3612149635979828e-05, + "loss": 1.4713, + "step": 66678 + }, + { + "epoch": 0.8, + "grad_norm": 15.735393936896047, + "learning_rate": 1.3611604893976195e-05, + "loss": 1.3822, + "step": 66681 + }, + { + "epoch": 0.8, + "grad_norm": 6.239083328425778, + "learning_rate": 1.3611060139647455e-05, + "loss": 1.1328, + "step": 66684 + }, + { + "epoch": 0.8, + "grad_norm": 7.9801698255993925, + "learning_rate": 1.3610515372995465e-05, + "loss": 1.1402, + "step": 66687 + }, + { + "epoch": 0.8, + "grad_norm": 16.717909432538995, + "learning_rate": 1.360997059402209e-05, + "loss": 1.5783, + "step": 66690 + }, + { + "epoch": 0.8, + "grad_norm": 43.52888310062982, + "learning_rate": 1.3609425802729184e-05, + "loss": 1.0645, + "step": 66693 + }, + { + "epoch": 0.8, + "grad_norm": 14.135816246871208, + "learning_rate": 1.3608880999118607e-05, + "loss": 1.9025, + "step": 66696 + }, + { + "epoch": 0.8, + "grad_norm": 31.154112994307948, + "learning_rate": 1.3608336183192222e-05, + "loss": 1.2085, + "step": 66699 + }, + { + "epoch": 0.8, + "grad_norm": 15.400622748717142, + "learning_rate": 1.3607791354951883e-05, + "loss": 1.5856, + "step": 66702 + }, + { + "epoch": 0.8, + "grad_norm": 10.248749810142197, + "learning_rate": 1.3607246514399453e-05, + "loss": 1.2334, + "step": 66705 + }, + { + "epoch": 0.8, + "grad_norm": 16.331251751606914, + "learning_rate": 1.3606701661536788e-05, + "loss": 1.7436, + "step": 66708 + }, + { + "epoch": 0.8, + "grad_norm": 14.014176345754153, + "learning_rate": 1.3606156796365749e-05, + "loss": 1.3511, + "step": 66711 + }, + { + "epoch": 0.8, + "grad_norm": 15.728547833523901, + "learning_rate": 1.3605611918888198e-05, + "loss": 1.401, + "step": 66714 + }, + { + "epoch": 0.8, + "grad_norm": 20.550750199296555, + "learning_rate": 1.3605067029105991e-05, + "loss": 1.648, + "step": 66717 + }, + { + "epoch": 0.8, + "grad_norm": 24.44611314078912, + "learning_rate": 1.3604522127020986e-05, + "loss": 1.5026, + "step": 66720 + }, + { + "epoch": 0.8, + "grad_norm": 44.58077883025524, + "learning_rate": 1.360397721263505e-05, + "loss": 1.0869, + "step": 66723 + }, + { + "epoch": 0.8, + "grad_norm": 5.714543648120049, + "learning_rate": 1.3603432285950037e-05, + "loss": 1.3015, + "step": 66726 + }, + { + "epoch": 0.8, + "grad_norm": 6.681879511008527, + "learning_rate": 1.3602887346967808e-05, + "loss": 0.9067, + "step": 66729 + }, + { + "epoch": 0.8, + "grad_norm": 21.840430367079243, + "learning_rate": 1.360234239569022e-05, + "loss": 1.4902, + "step": 66732 + }, + { + "epoch": 0.8, + "grad_norm": 8.845803674076105, + "learning_rate": 1.3601797432119138e-05, + "loss": 1.6049, + "step": 66735 + }, + { + "epoch": 0.8, + "grad_norm": 14.097291754792446, + "learning_rate": 1.3601252456256418e-05, + "loss": 1.2514, + "step": 66738 + }, + { + "epoch": 0.8, + "grad_norm": 4.270602904128165, + "learning_rate": 1.3600707468103914e-05, + "loss": 1.5564, + "step": 66741 + }, + { + "epoch": 0.8, + "grad_norm": 9.493210526799356, + "learning_rate": 1.36001624676635e-05, + "loss": 0.8682, + "step": 66744 + }, + { + "epoch": 0.8, + "grad_norm": 12.211516377923783, + "learning_rate": 1.3599617454937026e-05, + "loss": 1.3988, + "step": 66747 + }, + { + "epoch": 0.8, + "grad_norm": 22.145077463025803, + "learning_rate": 1.3599072429926357e-05, + "loss": 1.5383, + "step": 66750 + }, + { + "epoch": 0.8, + "grad_norm": 10.42179318878871, + "learning_rate": 1.3598527392633345e-05, + "loss": 1.2802, + "step": 66753 + }, + { + "epoch": 0.8, + "grad_norm": 24.003083828548412, + "learning_rate": 1.3597982343059857e-05, + "loss": 1.6113, + "step": 66756 + }, + { + "epoch": 0.8, + "grad_norm": 20.109069155121265, + "learning_rate": 1.3597437281207753e-05, + "loss": 1.2884, + "step": 66759 + }, + { + "epoch": 0.8, + "grad_norm": 16.3998813068149, + "learning_rate": 1.3596892207078891e-05, + "loss": 1.6257, + "step": 66762 + }, + { + "epoch": 0.8, + "grad_norm": 5.586708574460763, + "learning_rate": 1.3596347120675128e-05, + "loss": 1.7284, + "step": 66765 + }, + { + "epoch": 0.8, + "grad_norm": 9.633190085279743, + "learning_rate": 1.3595802021998332e-05, + "loss": 1.211, + "step": 66768 + }, + { + "epoch": 0.8, + "grad_norm": 9.089756830470277, + "learning_rate": 1.3595256911050355e-05, + "loss": 1.3292, + "step": 66771 + }, + { + "epoch": 0.8, + "grad_norm": 11.072417461092066, + "learning_rate": 1.3594711787833063e-05, + "loss": 1.6375, + "step": 66774 + }, + { + "epoch": 0.8, + "grad_norm": 13.485624821041542, + "learning_rate": 1.3594166652348318e-05, + "loss": 1.2626, + "step": 66777 + }, + { + "epoch": 0.8, + "grad_norm": 15.377823379063924, + "learning_rate": 1.3593621504597973e-05, + "loss": 1.5616, + "step": 66780 + }, + { + "epoch": 0.8, + "grad_norm": 8.349204346879322, + "learning_rate": 1.3593076344583894e-05, + "loss": 1.1092, + "step": 66783 + }, + { + "epoch": 0.8, + "grad_norm": 5.582333646080133, + "learning_rate": 1.3592531172307937e-05, + "loss": 1.4405, + "step": 66786 + }, + { + "epoch": 0.8, + "grad_norm": 50.62514595449404, + "learning_rate": 1.3591985987771968e-05, + "loss": 1.3919, + "step": 66789 + }, + { + "epoch": 0.8, + "grad_norm": 11.119265027907785, + "learning_rate": 1.3591440790977843e-05, + "loss": 1.5683, + "step": 66792 + }, + { + "epoch": 0.8, + "grad_norm": 19.951104443639377, + "learning_rate": 1.3590895581927424e-05, + "loss": 1.2593, + "step": 66795 + }, + { + "epoch": 0.8, + "grad_norm": 6.836805813549961, + "learning_rate": 1.3590350360622572e-05, + "loss": 1.3757, + "step": 66798 + }, + { + "epoch": 0.8, + "grad_norm": 39.12283970919079, + "learning_rate": 1.3589805127065147e-05, + "loss": 1.2109, + "step": 66801 + }, + { + "epoch": 0.8, + "grad_norm": 3.903219620036172, + "learning_rate": 1.3589259881257014e-05, + "loss": 1.4549, + "step": 66804 + }, + { + "epoch": 0.8, + "grad_norm": 10.0249916971829, + "learning_rate": 1.3588714623200023e-05, + "loss": 1.1032, + "step": 66807 + }, + { + "epoch": 0.8, + "grad_norm": 5.96478897096565, + "learning_rate": 1.3588169352896049e-05, + "loss": 1.4383, + "step": 66810 + }, + { + "epoch": 0.8, + "grad_norm": 10.018933197724959, + "learning_rate": 1.3587624070346941e-05, + "loss": 1.5221, + "step": 66813 + }, + { + "epoch": 0.8, + "grad_norm": 4.471233446900304, + "learning_rate": 1.3587078775554565e-05, + "loss": 1.3008, + "step": 66816 + }, + { + "epoch": 0.8, + "grad_norm": 39.17076418977441, + "learning_rate": 1.3586533468520782e-05, + "loss": 1.4339, + "step": 66819 + }, + { + "epoch": 0.8, + "grad_norm": 50.59331208424343, + "learning_rate": 1.3585988149247451e-05, + "loss": 1.3052, + "step": 66822 + }, + { + "epoch": 0.8, + "grad_norm": 5.256759587883343, + "learning_rate": 1.3585442817736434e-05, + "loss": 1.2804, + "step": 66825 + }, + { + "epoch": 0.8, + "grad_norm": 13.866067215687648, + "learning_rate": 1.3584897473989593e-05, + "loss": 1.1064, + "step": 66828 + }, + { + "epoch": 0.8, + "grad_norm": 9.944521496345585, + "learning_rate": 1.3584352118008788e-05, + "loss": 1.4459, + "step": 66831 + }, + { + "epoch": 0.8, + "grad_norm": 9.336117092335217, + "learning_rate": 1.358380674979588e-05, + "loss": 1.2623, + "step": 66834 + }, + { + "epoch": 0.8, + "grad_norm": 5.871077044146499, + "learning_rate": 1.358326136935273e-05, + "loss": 1.6774, + "step": 66837 + }, + { + "epoch": 0.8, + "grad_norm": 17.53679401173907, + "learning_rate": 1.35827159766812e-05, + "loss": 1.3599, + "step": 66840 + }, + { + "epoch": 0.8, + "grad_norm": 24.301127357979343, + "learning_rate": 1.3582170571783153e-05, + "loss": 1.7623, + "step": 66843 + }, + { + "epoch": 0.8, + "grad_norm": 9.582469260137751, + "learning_rate": 1.3581625154660448e-05, + "loss": 1.1744, + "step": 66846 + }, + { + "epoch": 0.8, + "grad_norm": 6.478262072255594, + "learning_rate": 1.358107972531494e-05, + "loss": 1.3654, + "step": 66849 + }, + { + "epoch": 0.8, + "grad_norm": 6.034732434602565, + "learning_rate": 1.3580534283748503e-05, + "loss": 1.3947, + "step": 66852 + }, + { + "epoch": 0.8, + "grad_norm": 21.8007624106088, + "learning_rate": 1.3579988829962991e-05, + "loss": 1.6087, + "step": 66855 + }, + { + "epoch": 0.8, + "grad_norm": 46.06240052308355, + "learning_rate": 1.357944336396027e-05, + "loss": 1.1327, + "step": 66858 + }, + { + "epoch": 0.8, + "grad_norm": 19.773010568315506, + "learning_rate": 1.357889788574219e-05, + "loss": 1.3914, + "step": 66861 + }, + { + "epoch": 0.8, + "grad_norm": 45.92128828684148, + "learning_rate": 1.3578352395310628e-05, + "loss": 1.2862, + "step": 66864 + }, + { + "epoch": 0.8, + "grad_norm": 27.50348837190527, + "learning_rate": 1.3577806892667436e-05, + "loss": 1.4374, + "step": 66867 + }, + { + "epoch": 0.8, + "grad_norm": 16.272032980572764, + "learning_rate": 1.3577261377814477e-05, + "loss": 1.2398, + "step": 66870 + }, + { + "epoch": 0.8, + "grad_norm": 40.41599277443897, + "learning_rate": 1.3576715850753612e-05, + "loss": 1.5286, + "step": 66873 + }, + { + "epoch": 0.8, + "grad_norm": 12.510908175410687, + "learning_rate": 1.3576170311486707e-05, + "loss": 1.3173, + "step": 66876 + }, + { + "epoch": 0.8, + "grad_norm": 10.172491474427863, + "learning_rate": 1.357562476001562e-05, + "loss": 1.2135, + "step": 66879 + }, + { + "epoch": 0.8, + "grad_norm": 17.38979840419, + "learning_rate": 1.357507919634221e-05, + "loss": 1.5188, + "step": 66882 + }, + { + "epoch": 0.8, + "grad_norm": 32.727680898985675, + "learning_rate": 1.3574533620468348e-05, + "loss": 1.5737, + "step": 66885 + }, + { + "epoch": 0.8, + "grad_norm": 30.080806431199967, + "learning_rate": 1.3573988032395888e-05, + "loss": 1.1343, + "step": 66888 + }, + { + "epoch": 0.8, + "grad_norm": 9.44114657907012, + "learning_rate": 1.3573442432126695e-05, + "loss": 1.3973, + "step": 66891 + }, + { + "epoch": 0.8, + "grad_norm": 9.358235336363151, + "learning_rate": 1.3572896819662628e-05, + "loss": 1.3614, + "step": 66894 + }, + { + "epoch": 0.8, + "grad_norm": 13.468260647952473, + "learning_rate": 1.3572351195005553e-05, + "loss": 1.1782, + "step": 66897 + }, + { + "epoch": 0.8, + "grad_norm": 13.297382565804634, + "learning_rate": 1.3571805558157329e-05, + "loss": 1.1768, + "step": 66900 + }, + { + "epoch": 0.8, + "grad_norm": 9.305287390214547, + "learning_rate": 1.3571259909119818e-05, + "loss": 1.6006, + "step": 66903 + }, + { + "epoch": 0.8, + "grad_norm": 28.406343039068663, + "learning_rate": 1.3570714247894887e-05, + "loss": 1.4664, + "step": 66906 + }, + { + "epoch": 0.8, + "grad_norm": 48.5489988592733, + "learning_rate": 1.3570168574484393e-05, + "loss": 1.6076, + "step": 66909 + }, + { + "epoch": 0.8, + "grad_norm": 4.459663581398397, + "learning_rate": 1.35696228888902e-05, + "loss": 1.6313, + "step": 66912 + }, + { + "epoch": 0.8, + "grad_norm": 11.168894397416999, + "learning_rate": 1.3569077191114166e-05, + "loss": 1.3958, + "step": 66915 + }, + { + "epoch": 0.8, + "grad_norm": 41.359124170235745, + "learning_rate": 1.356853148115816e-05, + "loss": 1.5623, + "step": 66918 + }, + { + "epoch": 0.8, + "grad_norm": 17.97283126291415, + "learning_rate": 1.3567985759024042e-05, + "loss": 1.6637, + "step": 66921 + }, + { + "epoch": 0.8, + "grad_norm": 6.311971671790264, + "learning_rate": 1.3567440024713676e-05, + "loss": 1.3791, + "step": 66924 + }, + { + "epoch": 0.8, + "grad_norm": 53.564125108309916, + "learning_rate": 1.3566894278228918e-05, + "loss": 1.2476, + "step": 66927 + }, + { + "epoch": 0.8, + "grad_norm": 10.536449406406222, + "learning_rate": 1.3566348519571635e-05, + "loss": 1.4204, + "step": 66930 + }, + { + "epoch": 0.8, + "grad_norm": 6.524707108825628, + "learning_rate": 1.356580274874369e-05, + "loss": 1.3568, + "step": 66933 + }, + { + "epoch": 0.8, + "grad_norm": 17.72091881873186, + "learning_rate": 1.3565256965746944e-05, + "loss": 1.671, + "step": 66936 + }, + { + "epoch": 0.8, + "grad_norm": 13.695403030190866, + "learning_rate": 1.3564711170583263e-05, + "loss": 1.2946, + "step": 66939 + }, + { + "epoch": 0.8, + "grad_norm": 21.213551099656016, + "learning_rate": 1.3564165363254505e-05, + "loss": 1.2068, + "step": 66942 + }, + { + "epoch": 0.8, + "grad_norm": 18.422657336543473, + "learning_rate": 1.3563619543762533e-05, + "loss": 1.2749, + "step": 66945 + }, + { + "epoch": 0.81, + "grad_norm": 10.13774505810436, + "learning_rate": 1.3563073712109211e-05, + "loss": 1.7982, + "step": 66948 + }, + { + "epoch": 0.81, + "grad_norm": 12.952065760017463, + "learning_rate": 1.3562527868296407e-05, + "loss": 1.2106, + "step": 66951 + }, + { + "epoch": 0.81, + "grad_norm": 19.319950353072414, + "learning_rate": 1.3561982012325973e-05, + "loss": 1.6924, + "step": 66954 + }, + { + "epoch": 0.81, + "grad_norm": 7.876087809304267, + "learning_rate": 1.3561436144199777e-05, + "loss": 1.5977, + "step": 66957 + }, + { + "epoch": 0.81, + "grad_norm": 11.754663247197204, + "learning_rate": 1.3560890263919686e-05, + "loss": 1.4004, + "step": 66960 + }, + { + "epoch": 0.81, + "grad_norm": 12.347766282266724, + "learning_rate": 1.3560344371487558e-05, + "loss": 1.4187, + "step": 66963 + }, + { + "epoch": 0.81, + "grad_norm": 3.4472719961902585, + "learning_rate": 1.3559798466905257e-05, + "loss": 1.4791, + "step": 66966 + }, + { + "epoch": 0.81, + "grad_norm": 21.921683025683173, + "learning_rate": 1.3559252550174645e-05, + "loss": 1.1278, + "step": 66969 + }, + { + "epoch": 0.81, + "grad_norm": 19.788315353991344, + "learning_rate": 1.3558706621297589e-05, + "loss": 1.215, + "step": 66972 + }, + { + "epoch": 0.81, + "grad_norm": 13.499974445370587, + "learning_rate": 1.3558160680275945e-05, + "loss": 1.5285, + "step": 66975 + }, + { + "epoch": 0.81, + "grad_norm": 8.673501298439692, + "learning_rate": 1.3557614727111582e-05, + "loss": 1.1827, + "step": 66978 + }, + { + "epoch": 0.81, + "grad_norm": 5.487181539207891, + "learning_rate": 1.355706876180636e-05, + "loss": 0.9091, + "step": 66981 + }, + { + "epoch": 0.81, + "grad_norm": 81.08945976452291, + "learning_rate": 1.3556522784362145e-05, + "loss": 1.278, + "step": 66984 + }, + { + "epoch": 0.81, + "grad_norm": 27.037947158291082, + "learning_rate": 1.35559767947808e-05, + "loss": 1.2413, + "step": 66987 + }, + { + "epoch": 0.81, + "grad_norm": 16.153431593421104, + "learning_rate": 1.3555430793064183e-05, + "loss": 1.3878, + "step": 66990 + }, + { + "epoch": 0.81, + "grad_norm": 24.676077381342324, + "learning_rate": 1.3554884779214168e-05, + "loss": 1.4546, + "step": 66993 + }, + { + "epoch": 0.81, + "grad_norm": 7.8011759359111945, + "learning_rate": 1.3554338753232607e-05, + "loss": 1.4089, + "step": 66996 + }, + { + "epoch": 0.81, + "grad_norm": 22.39512627067095, + "learning_rate": 1.355379271512137e-05, + "loss": 1.1513, + "step": 66999 + }, + { + "epoch": 0.81, + "grad_norm": 15.536116002602425, + "learning_rate": 1.3553246664882315e-05, + "loss": 1.3468, + "step": 67002 + }, + { + "epoch": 0.81, + "grad_norm": 21.424040946856678, + "learning_rate": 1.3552700602517315e-05, + "loss": 1.0386, + "step": 67005 + }, + { + "epoch": 0.81, + "grad_norm": 7.752795020493892, + "learning_rate": 1.3552154528028223e-05, + "loss": 1.466, + "step": 67008 + }, + { + "epoch": 0.81, + "grad_norm": 14.15345797002246, + "learning_rate": 1.3551608441416905e-05, + "loss": 0.9968, + "step": 67011 + }, + { + "epoch": 0.81, + "grad_norm": 247.26606925867446, + "learning_rate": 1.3551062342685235e-05, + "loss": 1.574, + "step": 67014 + }, + { + "epoch": 0.81, + "grad_norm": 17.04502948406173, + "learning_rate": 1.355051623183506e-05, + "loss": 1.2309, + "step": 67017 + }, + { + "epoch": 0.81, + "grad_norm": 29.889865818975395, + "learning_rate": 1.3549970108868257e-05, + "loss": 1.0399, + "step": 67020 + }, + { + "epoch": 0.81, + "grad_norm": 37.70722544841323, + "learning_rate": 1.3549423973786682e-05, + "loss": 1.3155, + "step": 67023 + }, + { + "epoch": 0.81, + "grad_norm": 6.2115435977817475, + "learning_rate": 1.3548877826592205e-05, + "loss": 1.3715, + "step": 67026 + }, + { + "epoch": 0.81, + "grad_norm": 21.188367026291093, + "learning_rate": 1.3548331667286681e-05, + "loss": 1.4692, + "step": 67029 + }, + { + "epoch": 0.81, + "grad_norm": 21.794522713122063, + "learning_rate": 1.3547785495871981e-05, + "loss": 1.4107, + "step": 67032 + }, + { + "epoch": 0.81, + "grad_norm": 8.974366719422344, + "learning_rate": 1.3547239312349967e-05, + "loss": 1.2872, + "step": 67035 + }, + { + "epoch": 0.81, + "grad_norm": 172.85462398918995, + "learning_rate": 1.3546693116722505e-05, + "loss": 1.4658, + "step": 67038 + }, + { + "epoch": 0.81, + "grad_norm": 10.447207131449318, + "learning_rate": 1.3546146908991457e-05, + "loss": 1.6057, + "step": 67041 + }, + { + "epoch": 0.81, + "grad_norm": 13.69151032030337, + "learning_rate": 1.3545600689158684e-05, + "loss": 1.1673, + "step": 67044 + }, + { + "epoch": 0.81, + "grad_norm": 4.075413566771362, + "learning_rate": 1.3545054457226057e-05, + "loss": 1.2482, + "step": 67047 + }, + { + "epoch": 0.81, + "grad_norm": 19.97811489887453, + "learning_rate": 1.354450821319543e-05, + "loss": 1.2962, + "step": 67050 + }, + { + "epoch": 0.81, + "grad_norm": 19.85827796192019, + "learning_rate": 1.354396195706868e-05, + "loss": 1.3161, + "step": 67053 + }, + { + "epoch": 0.81, + "grad_norm": 5.981563338664852, + "learning_rate": 1.3543415688847658e-05, + "loss": 1.393, + "step": 67056 + }, + { + "epoch": 0.81, + "grad_norm": 230.074728037293, + "learning_rate": 1.354286940853424e-05, + "loss": 1.1479, + "step": 67059 + }, + { + "epoch": 0.81, + "grad_norm": 47.49791829522735, + "learning_rate": 1.3542323116130282e-05, + "loss": 1.2466, + "step": 67062 + }, + { + "epoch": 0.81, + "grad_norm": 22.719571649845722, + "learning_rate": 1.354177681163765e-05, + "loss": 1.1753, + "step": 67065 + }, + { + "epoch": 0.81, + "grad_norm": 22.24526342397352, + "learning_rate": 1.3541230495058215e-05, + "loss": 1.2167, + "step": 67068 + }, + { + "epoch": 0.81, + "grad_norm": 17.288315026838706, + "learning_rate": 1.3540684166393829e-05, + "loss": 1.4051, + "step": 67071 + }, + { + "epoch": 0.81, + "grad_norm": 20.653429445786514, + "learning_rate": 1.3540137825646369e-05, + "loss": 1.6169, + "step": 67074 + }, + { + "epoch": 0.81, + "grad_norm": 12.039809818313346, + "learning_rate": 1.3539591472817688e-05, + "loss": 1.5183, + "step": 67077 + }, + { + "epoch": 0.81, + "grad_norm": 83.99364861833281, + "learning_rate": 1.3539045107909663e-05, + "loss": 1.1631, + "step": 67080 + }, + { + "epoch": 0.81, + "grad_norm": 10.019805839922892, + "learning_rate": 1.3538498730924146e-05, + "loss": 1.3388, + "step": 67083 + }, + { + "epoch": 0.81, + "grad_norm": 28.290337350896387, + "learning_rate": 1.353795234186301e-05, + "loss": 1.157, + "step": 67086 + }, + { + "epoch": 0.81, + "grad_norm": 27.116743919795226, + "learning_rate": 1.3537405940728117e-05, + "loss": 1.9484, + "step": 67089 + }, + { + "epoch": 0.81, + "grad_norm": 10.083270183355799, + "learning_rate": 1.353685952752133e-05, + "loss": 1.7299, + "step": 67092 + }, + { + "epoch": 0.81, + "grad_norm": 7.907169694581948, + "learning_rate": 1.3536313102244519e-05, + "loss": 1.3131, + "step": 67095 + }, + { + "epoch": 0.81, + "grad_norm": 3.4318612163395725, + "learning_rate": 1.3535766664899539e-05, + "loss": 1.4447, + "step": 67098 + }, + { + "epoch": 0.81, + "grad_norm": 6.086217781942864, + "learning_rate": 1.3535220215488267e-05, + "loss": 1.0972, + "step": 67101 + }, + { + "epoch": 0.81, + "grad_norm": 19.703926507665578, + "learning_rate": 1.353467375401256e-05, + "loss": 1.3331, + "step": 67104 + }, + { + "epoch": 0.81, + "grad_norm": 7.262228907840665, + "learning_rate": 1.3534127280474285e-05, + "loss": 1.3444, + "step": 67107 + }, + { + "epoch": 0.81, + "grad_norm": 17.855107774733813, + "learning_rate": 1.3533580794875305e-05, + "loss": 1.5433, + "step": 67110 + }, + { + "epoch": 0.81, + "grad_norm": 10.501387987600175, + "learning_rate": 1.3533034297217488e-05, + "loss": 1.2888, + "step": 67113 + }, + { + "epoch": 0.81, + "grad_norm": 12.092787027571754, + "learning_rate": 1.3532487787502695e-05, + "loss": 1.2114, + "step": 67116 + }, + { + "epoch": 0.81, + "grad_norm": 45.48071296161603, + "learning_rate": 1.3531941265732795e-05, + "loss": 1.4204, + "step": 67119 + }, + { + "epoch": 0.81, + "grad_norm": 13.55278720476074, + "learning_rate": 1.3531394731909654e-05, + "loss": 1.5255, + "step": 67122 + }, + { + "epoch": 0.81, + "grad_norm": 52.535009875442725, + "learning_rate": 1.3530848186035133e-05, + "loss": 1.4775, + "step": 67125 + }, + { + "epoch": 0.81, + "grad_norm": 23.538495816300493, + "learning_rate": 1.35303016281111e-05, + "loss": 1.4014, + "step": 67128 + }, + { + "epoch": 0.81, + "grad_norm": 8.567963014018511, + "learning_rate": 1.3529755058139417e-05, + "loss": 1.684, + "step": 67131 + }, + { + "epoch": 0.81, + "grad_norm": 7.673787197985866, + "learning_rate": 1.3529208476121954e-05, + "loss": 1.6283, + "step": 67134 + }, + { + "epoch": 0.81, + "grad_norm": 5.59267826627944, + "learning_rate": 1.352866188206057e-05, + "loss": 1.1557, + "step": 67137 + }, + { + "epoch": 0.81, + "grad_norm": 12.57340103693851, + "learning_rate": 1.3528115275957137e-05, + "loss": 1.3029, + "step": 67140 + }, + { + "epoch": 0.81, + "grad_norm": 5.684186599725067, + "learning_rate": 1.3527568657813517e-05, + "loss": 1.1149, + "step": 67143 + }, + { + "epoch": 0.81, + "grad_norm": 10.811337806267167, + "learning_rate": 1.3527022027631573e-05, + "loss": 1.2817, + "step": 67146 + }, + { + "epoch": 0.81, + "grad_norm": 13.573047910438218, + "learning_rate": 1.3526475385413176e-05, + "loss": 1.0561, + "step": 67149 + }, + { + "epoch": 0.81, + "grad_norm": 9.876313283187539, + "learning_rate": 1.3525928731160189e-05, + "loss": 1.514, + "step": 67152 + }, + { + "epoch": 0.81, + "grad_norm": 6.579263882142174, + "learning_rate": 1.3525382064874477e-05, + "loss": 1.116, + "step": 67155 + }, + { + "epoch": 0.81, + "grad_norm": 5.705063576043581, + "learning_rate": 1.3524835386557904e-05, + "loss": 1.4584, + "step": 67158 + }, + { + "epoch": 0.81, + "grad_norm": 7.833909937866897, + "learning_rate": 1.352428869621234e-05, + "loss": 1.127, + "step": 67161 + }, + { + "epoch": 0.81, + "grad_norm": 4.713082228839517, + "learning_rate": 1.3523741993839647e-05, + "loss": 1.9466, + "step": 67164 + }, + { + "epoch": 0.81, + "grad_norm": 6.534226063454681, + "learning_rate": 1.3523195279441695e-05, + "loss": 1.2091, + "step": 67167 + }, + { + "epoch": 0.81, + "grad_norm": 5.997369076425644, + "learning_rate": 1.3522648553020342e-05, + "loss": 1.4647, + "step": 67170 + }, + { + "epoch": 0.81, + "grad_norm": 6.517050180909252, + "learning_rate": 1.3522101814577457e-05, + "loss": 1.2777, + "step": 67173 + }, + { + "epoch": 0.81, + "grad_norm": 18.66191407598645, + "learning_rate": 1.3521555064114913e-05, + "loss": 1.6886, + "step": 67176 + }, + { + "epoch": 0.81, + "grad_norm": 18.351050393840847, + "learning_rate": 1.3521008301634563e-05, + "loss": 1.2992, + "step": 67179 + }, + { + "epoch": 0.81, + "grad_norm": 6.326561415010949, + "learning_rate": 1.3520461527138285e-05, + "loss": 1.1627, + "step": 67182 + }, + { + "epoch": 0.81, + "grad_norm": 10.807156156332018, + "learning_rate": 1.3519914740627939e-05, + "loss": 1.4997, + "step": 67185 + }, + { + "epoch": 0.81, + "grad_norm": 8.802444420178015, + "learning_rate": 1.3519367942105392e-05, + "loss": 1.535, + "step": 67188 + }, + { + "epoch": 0.81, + "grad_norm": 10.483702082094148, + "learning_rate": 1.3518821131572508e-05, + "loss": 1.4261, + "step": 67191 + }, + { + "epoch": 0.81, + "grad_norm": 8.910735331986686, + "learning_rate": 1.3518274309031158e-05, + "loss": 1.2279, + "step": 67194 + }, + { + "epoch": 0.81, + "grad_norm": 11.729301897125687, + "learning_rate": 1.3517727474483205e-05, + "loss": 1.3358, + "step": 67197 + }, + { + "epoch": 0.81, + "grad_norm": 6.207479966674936, + "learning_rate": 1.3517180627930512e-05, + "loss": 1.2101, + "step": 67200 + }, + { + "epoch": 0.81, + "grad_norm": 9.208008131384767, + "learning_rate": 1.351663376937495e-05, + "loss": 1.6049, + "step": 67203 + }, + { + "epoch": 0.81, + "grad_norm": 6.9599195975425765, + "learning_rate": 1.3516086898818384e-05, + "loss": 1.4718, + "step": 67206 + }, + { + "epoch": 0.81, + "grad_norm": 8.957343001577977, + "learning_rate": 1.3515540016262681e-05, + "loss": 1.4042, + "step": 67209 + }, + { + "epoch": 0.81, + "grad_norm": 21.769909693543323, + "learning_rate": 1.3514993121709703e-05, + "loss": 1.59, + "step": 67212 + }, + { + "epoch": 0.81, + "grad_norm": 18.466510502868264, + "learning_rate": 1.351444621516132e-05, + "loss": 1.5166, + "step": 67215 + }, + { + "epoch": 0.81, + "grad_norm": 9.361358154403861, + "learning_rate": 1.3513899296619402e-05, + "loss": 1.1641, + "step": 67218 + }, + { + "epoch": 0.81, + "grad_norm": 3.6246324107611367, + "learning_rate": 1.3513352366085807e-05, + "loss": 1.3546, + "step": 67221 + }, + { + "epoch": 0.81, + "grad_norm": 20.46789404422965, + "learning_rate": 1.3512805423562407e-05, + "loss": 1.4418, + "step": 67224 + }, + { + "epoch": 0.81, + "grad_norm": 38.47737727950265, + "learning_rate": 1.3512258469051069e-05, + "loss": 1.1067, + "step": 67227 + }, + { + "epoch": 0.81, + "grad_norm": 13.216933074573001, + "learning_rate": 1.3511711502553656e-05, + "loss": 1.5853, + "step": 67230 + }, + { + "epoch": 0.81, + "grad_norm": 9.050646928711814, + "learning_rate": 1.3511164524072038e-05, + "loss": 1.3292, + "step": 67233 + }, + { + "epoch": 0.81, + "grad_norm": 10.467749016357596, + "learning_rate": 1.351061753360808e-05, + "loss": 1.091, + "step": 67236 + }, + { + "epoch": 0.81, + "grad_norm": 24.793071340725557, + "learning_rate": 1.3510070531163649e-05, + "loss": 1.1595, + "step": 67239 + }, + { + "epoch": 0.81, + "grad_norm": 3.1844783134054637, + "learning_rate": 1.3509523516740613e-05, + "loss": 1.2534, + "step": 67242 + }, + { + "epoch": 0.81, + "grad_norm": 3.4402081591849867, + "learning_rate": 1.3508976490340832e-05, + "loss": 1.2394, + "step": 67245 + }, + { + "epoch": 0.81, + "grad_norm": 9.977249158175796, + "learning_rate": 1.3508429451966183e-05, + "loss": 0.9537, + "step": 67248 + }, + { + "epoch": 0.81, + "grad_norm": 8.951922316032203, + "learning_rate": 1.3507882401618528e-05, + "loss": 1.1366, + "step": 67251 + }, + { + "epoch": 0.81, + "grad_norm": 16.90460783555665, + "learning_rate": 1.3507335339299733e-05, + "loss": 1.2321, + "step": 67254 + }, + { + "epoch": 0.81, + "grad_norm": 25.306457012053862, + "learning_rate": 1.3506788265011665e-05, + "loss": 1.4147, + "step": 67257 + }, + { + "epoch": 0.81, + "grad_norm": 5.75076118953212, + "learning_rate": 1.3506241178756192e-05, + "loss": 0.8939, + "step": 67260 + }, + { + "epoch": 0.81, + "grad_norm": 6.631525327120197, + "learning_rate": 1.3505694080535182e-05, + "loss": 1.5615, + "step": 67263 + }, + { + "epoch": 0.81, + "grad_norm": 7.120011617289422, + "learning_rate": 1.3505146970350498e-05, + "loss": 1.4788, + "step": 67266 + }, + { + "epoch": 0.81, + "grad_norm": 18.375582010754982, + "learning_rate": 1.3504599848204013e-05, + "loss": 1.3481, + "step": 67269 + }, + { + "epoch": 0.81, + "grad_norm": 8.552637815509714, + "learning_rate": 1.3504052714097591e-05, + "loss": 1.4901, + "step": 67272 + }, + { + "epoch": 0.81, + "grad_norm": 37.61951949709706, + "learning_rate": 1.3503505568033097e-05, + "loss": 1.4521, + "step": 67275 + }, + { + "epoch": 0.81, + "grad_norm": 21.540443969133534, + "learning_rate": 1.3502958410012401e-05, + "loss": 1.3036, + "step": 67278 + }, + { + "epoch": 0.81, + "grad_norm": 23.2634693457437, + "learning_rate": 1.3502411240037372e-05, + "loss": 1.3459, + "step": 67281 + }, + { + "epoch": 0.81, + "grad_norm": 22.06560755163412, + "learning_rate": 1.3501864058109872e-05, + "loss": 1.3017, + "step": 67284 + }, + { + "epoch": 0.81, + "grad_norm": 9.650984702973942, + "learning_rate": 1.3501316864231773e-05, + "loss": 1.3298, + "step": 67287 + }, + { + "epoch": 0.81, + "grad_norm": 12.6375834878712, + "learning_rate": 1.350076965840494e-05, + "loss": 1.7355, + "step": 67290 + }, + { + "epoch": 0.81, + "grad_norm": 21.151918060332, + "learning_rate": 1.350022244063124e-05, + "loss": 1.5477, + "step": 67293 + }, + { + "epoch": 0.81, + "grad_norm": 29.530721629080393, + "learning_rate": 1.3499675210912546e-05, + "loss": 1.1899, + "step": 67296 + }, + { + "epoch": 0.81, + "grad_norm": 15.166652110851677, + "learning_rate": 1.3499127969250716e-05, + "loss": 1.4776, + "step": 67299 + }, + { + "epoch": 0.81, + "grad_norm": 6.829154330573687, + "learning_rate": 1.3498580715647624e-05, + "loss": 1.4686, + "step": 67302 + }, + { + "epoch": 0.81, + "grad_norm": 6.6111475519897285, + "learning_rate": 1.349803345010514e-05, + "loss": 1.3962, + "step": 67305 + }, + { + "epoch": 0.81, + "grad_norm": 11.04115772622918, + "learning_rate": 1.3497486172625122e-05, + "loss": 1.183, + "step": 67308 + }, + { + "epoch": 0.81, + "grad_norm": 15.045952011350646, + "learning_rate": 1.3496938883209447e-05, + "loss": 1.5466, + "step": 67311 + }, + { + "epoch": 0.81, + "grad_norm": 10.925543497742309, + "learning_rate": 1.349639158185998e-05, + "loss": 1.0577, + "step": 67314 + }, + { + "epoch": 0.81, + "grad_norm": 6.237150304156069, + "learning_rate": 1.3495844268578587e-05, + "loss": 1.2655, + "step": 67317 + }, + { + "epoch": 0.81, + "grad_norm": 2.2789794283096616, + "learning_rate": 1.3495296943367133e-05, + "loss": 1.4639, + "step": 67320 + }, + { + "epoch": 0.81, + "grad_norm": 15.977849001537754, + "learning_rate": 1.3494749606227493e-05, + "loss": 1.6123, + "step": 67323 + }, + { + "epoch": 0.81, + "grad_norm": 8.479557387213909, + "learning_rate": 1.3494202257161533e-05, + "loss": 1.3002, + "step": 67326 + }, + { + "epoch": 0.81, + "grad_norm": 16.333918090079887, + "learning_rate": 1.3493654896171118e-05, + "loss": 1.3871, + "step": 67329 + }, + { + "epoch": 0.81, + "grad_norm": 12.034795251302302, + "learning_rate": 1.3493107523258115e-05, + "loss": 1.2855, + "step": 67332 + }, + { + "epoch": 0.81, + "grad_norm": 5.329473744618807, + "learning_rate": 1.3492560138424395e-05, + "loss": 1.4133, + "step": 67335 + }, + { + "epoch": 0.81, + "grad_norm": 12.34644934933224, + "learning_rate": 1.3492012741671827e-05, + "loss": 1.4077, + "step": 67338 + }, + { + "epoch": 0.81, + "grad_norm": 12.480895508485736, + "learning_rate": 1.3491465333002276e-05, + "loss": 1.2828, + "step": 67341 + }, + { + "epoch": 0.81, + "grad_norm": 17.705650970495125, + "learning_rate": 1.3490917912417612e-05, + "loss": 1.2762, + "step": 67344 + }, + { + "epoch": 0.81, + "grad_norm": 2.625565780583041, + "learning_rate": 1.3490370479919703e-05, + "loss": 1.5025, + "step": 67347 + }, + { + "epoch": 0.81, + "grad_norm": 9.21848946064136, + "learning_rate": 1.3489823035510417e-05, + "loss": 1.0289, + "step": 67350 + }, + { + "epoch": 0.81, + "grad_norm": 6.336630673393975, + "learning_rate": 1.3489275579191619e-05, + "loss": 1.0061, + "step": 67353 + }, + { + "epoch": 0.81, + "grad_norm": 26.265478000271763, + "learning_rate": 1.3488728110965184e-05, + "loss": 1.4471, + "step": 67356 + }, + { + "epoch": 0.81, + "grad_norm": 17.400847138844746, + "learning_rate": 1.3488180630832974e-05, + "loss": 1.3291, + "step": 67359 + }, + { + "epoch": 0.81, + "grad_norm": 14.46140316267949, + "learning_rate": 1.3487633138796861e-05, + "loss": 1.3522, + "step": 67362 + }, + { + "epoch": 0.81, + "grad_norm": 13.547374517677406, + "learning_rate": 1.3487085634858711e-05, + "loss": 1.4811, + "step": 67365 + }, + { + "epoch": 0.81, + "grad_norm": 4.75110242200308, + "learning_rate": 1.3486538119020395e-05, + "loss": 1.5126, + "step": 67368 + }, + { + "epoch": 0.81, + "grad_norm": 10.737634703674845, + "learning_rate": 1.3485990591283784e-05, + "loss": 1.3481, + "step": 67371 + }, + { + "epoch": 0.81, + "grad_norm": 9.114088591444359, + "learning_rate": 1.3485443051650737e-05, + "loss": 1.147, + "step": 67374 + }, + { + "epoch": 0.81, + "grad_norm": 20.49865843410054, + "learning_rate": 1.348489550012313e-05, + "loss": 1.3917, + "step": 67377 + }, + { + "epoch": 0.81, + "grad_norm": 15.732730845654169, + "learning_rate": 1.348434793670283e-05, + "loss": 1.114, + "step": 67380 + }, + { + "epoch": 0.81, + "grad_norm": 43.01507192077437, + "learning_rate": 1.3483800361391707e-05, + "loss": 1.507, + "step": 67383 + }, + { + "epoch": 0.81, + "grad_norm": 41.359046642757036, + "learning_rate": 1.3483252774191626e-05, + "loss": 1.3888, + "step": 67386 + }, + { + "epoch": 0.81, + "grad_norm": 10.69487583772947, + "learning_rate": 1.348270517510446e-05, + "loss": 1.5097, + "step": 67389 + }, + { + "epoch": 0.81, + "grad_norm": 8.489319633793631, + "learning_rate": 1.3482157564132076e-05, + "loss": 1.3566, + "step": 67392 + }, + { + "epoch": 0.81, + "grad_norm": 4.302208662163969, + "learning_rate": 1.348160994127634e-05, + "loss": 1.0736, + "step": 67395 + }, + { + "epoch": 0.81, + "grad_norm": 7.547452521900001, + "learning_rate": 1.3481062306539126e-05, + "loss": 1.5691, + "step": 67398 + }, + { + "epoch": 0.81, + "grad_norm": 18.7394374112875, + "learning_rate": 1.34805146599223e-05, + "loss": 1.5927, + "step": 67401 + }, + { + "epoch": 0.81, + "grad_norm": 4.036003875195913, + "learning_rate": 1.347996700142773e-05, + "loss": 1.6415, + "step": 67404 + }, + { + "epoch": 0.81, + "grad_norm": 15.003208455763211, + "learning_rate": 1.3479419331057285e-05, + "loss": 1.2725, + "step": 67407 + }, + { + "epoch": 0.81, + "grad_norm": 6.267236198343758, + "learning_rate": 1.3478871648812836e-05, + "loss": 1.1956, + "step": 67410 + }, + { + "epoch": 0.81, + "grad_norm": 30.999598274024795, + "learning_rate": 1.3478323954696251e-05, + "loss": 1.3837, + "step": 67413 + }, + { + "epoch": 0.81, + "grad_norm": 22.17933366051817, + "learning_rate": 1.3477776248709398e-05, + "loss": 1.6718, + "step": 67416 + }, + { + "epoch": 0.81, + "grad_norm": 14.26553046423511, + "learning_rate": 1.3477228530854149e-05, + "loss": 1.1921, + "step": 67419 + }, + { + "epoch": 0.81, + "grad_norm": 4.164337492848105, + "learning_rate": 1.3476680801132374e-05, + "loss": 1.1304, + "step": 67422 + }, + { + "epoch": 0.81, + "grad_norm": 20.32713080445839, + "learning_rate": 1.3476133059545937e-05, + "loss": 1.0355, + "step": 67425 + }, + { + "epoch": 0.81, + "grad_norm": 16.707335612259524, + "learning_rate": 1.3475585306096708e-05, + "loss": 1.4702, + "step": 67428 + }, + { + "epoch": 0.81, + "grad_norm": 23.396524062508796, + "learning_rate": 1.3475037540786564e-05, + "loss": 1.2397, + "step": 67431 + }, + { + "epoch": 0.81, + "grad_norm": 2.553428228308382, + "learning_rate": 1.3474489763617362e-05, + "loss": 1.3082, + "step": 67434 + }, + { + "epoch": 0.81, + "grad_norm": 11.908557579444395, + "learning_rate": 1.3473941974590983e-05, + "loss": 1.4857, + "step": 67437 + }, + { + "epoch": 0.81, + "grad_norm": 32.54584123137624, + "learning_rate": 1.3473394173709287e-05, + "loss": 1.1223, + "step": 67440 + }, + { + "epoch": 0.81, + "grad_norm": 14.5101691679443, + "learning_rate": 1.347284636097415e-05, + "loss": 1.9757, + "step": 67443 + }, + { + "epoch": 0.81, + "grad_norm": 47.19859035381539, + "learning_rate": 1.347229853638744e-05, + "loss": 1.4352, + "step": 67446 + }, + { + "epoch": 0.81, + "grad_norm": 6.107845653876031, + "learning_rate": 1.3471750699951023e-05, + "loss": 1.3754, + "step": 67449 + }, + { + "epoch": 0.81, + "grad_norm": 19.749225184793442, + "learning_rate": 1.3471202851666776e-05, + "loss": 1.224, + "step": 67452 + }, + { + "epoch": 0.81, + "grad_norm": 11.782090648824225, + "learning_rate": 1.347065499153656e-05, + "loss": 1.0165, + "step": 67455 + }, + { + "epoch": 0.81, + "grad_norm": 6.030986173335963, + "learning_rate": 1.3470107119562252e-05, + "loss": 1.3958, + "step": 67458 + }, + { + "epoch": 0.81, + "grad_norm": 21.942756826824446, + "learning_rate": 1.3469559235745715e-05, + "loss": 1.4834, + "step": 67461 + }, + { + "epoch": 0.81, + "grad_norm": 10.708350152025517, + "learning_rate": 1.3469011340088827e-05, + "loss": 1.5765, + "step": 67464 + }, + { + "epoch": 0.81, + "grad_norm": 13.826330946174911, + "learning_rate": 1.3468463432593448e-05, + "loss": 1.7879, + "step": 67467 + }, + { + "epoch": 0.81, + "grad_norm": 27.109415473561157, + "learning_rate": 1.3467915513261452e-05, + "loss": 1.3299, + "step": 67470 + }, + { + "epoch": 0.81, + "grad_norm": 3.0701049552889907, + "learning_rate": 1.3467367582094713e-05, + "loss": 1.396, + "step": 67473 + }, + { + "epoch": 0.81, + "grad_norm": 17.732155637320357, + "learning_rate": 1.3466819639095095e-05, + "loss": 1.8402, + "step": 67476 + }, + { + "epoch": 0.81, + "grad_norm": 56.11471621490546, + "learning_rate": 1.3466271684264471e-05, + "loss": 1.3404, + "step": 67479 + }, + { + "epoch": 0.81, + "grad_norm": 22.26645241819693, + "learning_rate": 1.346572371760471e-05, + "loss": 1.5659, + "step": 67482 + }, + { + "epoch": 0.81, + "grad_norm": 37.18806308673831, + "learning_rate": 1.3465175739117685e-05, + "loss": 1.3439, + "step": 67485 + }, + { + "epoch": 0.81, + "grad_norm": 31.084277289815795, + "learning_rate": 1.3464627748805259e-05, + "loss": 1.0736, + "step": 67488 + }, + { + "epoch": 0.81, + "grad_norm": 3.8468206238197333, + "learning_rate": 1.3464079746669309e-05, + "loss": 1.3695, + "step": 67491 + }, + { + "epoch": 0.81, + "grad_norm": 27.850586419662037, + "learning_rate": 1.3463531732711701e-05, + "loss": 1.1092, + "step": 67494 + }, + { + "epoch": 0.81, + "grad_norm": 36.13611097689491, + "learning_rate": 1.346298370693431e-05, + "loss": 1.3313, + "step": 67497 + }, + { + "epoch": 0.81, + "grad_norm": 32.83540741979448, + "learning_rate": 1.3462435669339e-05, + "loss": 1.2521, + "step": 67500 + }, + { + "epoch": 0.81, + "grad_norm": 12.640953984714205, + "learning_rate": 1.3461887619927644e-05, + "loss": 1.369, + "step": 67503 + }, + { + "epoch": 0.81, + "grad_norm": 32.608935210828605, + "learning_rate": 1.3461339558702115e-05, + "loss": 1.3562, + "step": 67506 + }, + { + "epoch": 0.81, + "grad_norm": 22.397237314047434, + "learning_rate": 1.3460791485664279e-05, + "loss": 1.1956, + "step": 67509 + }, + { + "epoch": 0.81, + "grad_norm": 7.324910241331386, + "learning_rate": 1.346024340081601e-05, + "loss": 1.3256, + "step": 67512 + }, + { + "epoch": 0.81, + "grad_norm": 28.446909535086796, + "learning_rate": 1.3459695304159173e-05, + "loss": 0.9778, + "step": 67515 + }, + { + "epoch": 0.81, + "grad_norm": 6.163920533634509, + "learning_rate": 1.3459147195695648e-05, + "loss": 1.5545, + "step": 67518 + }, + { + "epoch": 0.81, + "grad_norm": 7.998083257975125, + "learning_rate": 1.3458599075427294e-05, + "loss": 1.0711, + "step": 67521 + }, + { + "epoch": 0.81, + "grad_norm": 6.824890708346734, + "learning_rate": 1.345805094335599e-05, + "loss": 1.239, + "step": 67524 + }, + { + "epoch": 0.81, + "grad_norm": 9.940309526623437, + "learning_rate": 1.3457502799483604e-05, + "loss": 1.417, + "step": 67527 + }, + { + "epoch": 0.81, + "grad_norm": 3.794949090013183, + "learning_rate": 1.3456954643812004e-05, + "loss": 1.3189, + "step": 67530 + }, + { + "epoch": 0.81, + "grad_norm": 18.741909422491048, + "learning_rate": 1.3456406476343064e-05, + "loss": 1.554, + "step": 67533 + }, + { + "epoch": 0.81, + "grad_norm": 14.663830703147156, + "learning_rate": 1.3455858297078652e-05, + "loss": 1.2773, + "step": 67536 + }, + { + "epoch": 0.81, + "grad_norm": 15.544883084877888, + "learning_rate": 1.3455310106020644e-05, + "loss": 1.0593, + "step": 67539 + }, + { + "epoch": 0.81, + "grad_norm": 11.052798253702655, + "learning_rate": 1.3454761903170905e-05, + "loss": 1.491, + "step": 67542 + }, + { + "epoch": 0.81, + "grad_norm": 33.656031219418416, + "learning_rate": 1.345421368853131e-05, + "loss": 1.4163, + "step": 67545 + }, + { + "epoch": 0.81, + "grad_norm": 37.20012884505561, + "learning_rate": 1.3453665462103726e-05, + "loss": 1.3772, + "step": 67548 + }, + { + "epoch": 0.81, + "grad_norm": 17.707629346594857, + "learning_rate": 1.3453117223890027e-05, + "loss": 1.2583, + "step": 67551 + }, + { + "epoch": 0.81, + "grad_norm": 12.823133453289962, + "learning_rate": 1.3452568973892082e-05, + "loss": 1.3532, + "step": 67554 + }, + { + "epoch": 0.81, + "grad_norm": 17.087970604859688, + "learning_rate": 1.345202071211176e-05, + "loss": 1.3475, + "step": 67557 + }, + { + "epoch": 0.81, + "grad_norm": 31.016710108397795, + "learning_rate": 1.345147243855094e-05, + "loss": 1.4785, + "step": 67560 + }, + { + "epoch": 0.81, + "grad_norm": 19.36763210736845, + "learning_rate": 1.3450924153211485e-05, + "loss": 1.3126, + "step": 67563 + }, + { + "epoch": 0.81, + "grad_norm": 7.169909135646204, + "learning_rate": 1.3450375856095267e-05, + "loss": 1.7197, + "step": 67566 + }, + { + "epoch": 0.81, + "grad_norm": 18.365720358487575, + "learning_rate": 1.3449827547204163e-05, + "loss": 1.2159, + "step": 67569 + }, + { + "epoch": 0.81, + "grad_norm": 11.574355888623838, + "learning_rate": 1.344927922654004e-05, + "loss": 1.201, + "step": 67572 + }, + { + "epoch": 0.81, + "grad_norm": 10.782635955099073, + "learning_rate": 1.3448730894104765e-05, + "loss": 1.4293, + "step": 67575 + }, + { + "epoch": 0.81, + "grad_norm": 21.09492555567845, + "learning_rate": 1.3448182549900215e-05, + "loss": 1.4793, + "step": 67578 + }, + { + "epoch": 0.81, + "grad_norm": 7.479021865251064, + "learning_rate": 1.3447634193928262e-05, + "loss": 1.3285, + "step": 67581 + }, + { + "epoch": 0.81, + "grad_norm": 3.2158717347452566, + "learning_rate": 1.3447085826190776e-05, + "loss": 1.3112, + "step": 67584 + }, + { + "epoch": 0.81, + "grad_norm": 3.9127001540794106, + "learning_rate": 1.3446537446689627e-05, + "loss": 1.0356, + "step": 67587 + }, + { + "epoch": 0.81, + "grad_norm": 42.80465057435661, + "learning_rate": 1.3445989055426686e-05, + "loss": 1.5863, + "step": 67590 + }, + { + "epoch": 0.81, + "grad_norm": 7.936549314373272, + "learning_rate": 1.3445440652403827e-05, + "loss": 1.6247, + "step": 67593 + }, + { + "epoch": 0.81, + "grad_norm": 4.5057280895605505, + "learning_rate": 1.3444892237622917e-05, + "loss": 1.2429, + "step": 67596 + }, + { + "epoch": 0.81, + "grad_norm": 11.680052990061299, + "learning_rate": 1.3444343811085833e-05, + "loss": 1.3973, + "step": 67599 + }, + { + "epoch": 0.81, + "grad_norm": 19.717278793321558, + "learning_rate": 1.3443795372794446e-05, + "loss": 1.4686, + "step": 67602 + }, + { + "epoch": 0.81, + "grad_norm": 16.87688482514811, + "learning_rate": 1.3443246922750621e-05, + "loss": 1.5251, + "step": 67605 + }, + { + "epoch": 0.81, + "grad_norm": 18.364751218387212, + "learning_rate": 1.344269846095624e-05, + "loss": 1.031, + "step": 67608 + }, + { + "epoch": 0.81, + "grad_norm": 15.388765831900319, + "learning_rate": 1.3442149987413166e-05, + "loss": 1.0749, + "step": 67611 + }, + { + "epoch": 0.81, + "grad_norm": 10.860682938362386, + "learning_rate": 1.3441601502123275e-05, + "loss": 1.3405, + "step": 67614 + }, + { + "epoch": 0.81, + "grad_norm": 7.634505258112642, + "learning_rate": 1.3441053005088436e-05, + "loss": 1.3868, + "step": 67617 + }, + { + "epoch": 0.81, + "grad_norm": 11.204859125329875, + "learning_rate": 1.3440504496310525e-05, + "loss": 1.0838, + "step": 67620 + }, + { + "epoch": 0.81, + "grad_norm": 10.492191818631706, + "learning_rate": 1.343995597579141e-05, + "loss": 1.3496, + "step": 67623 + }, + { + "epoch": 0.81, + "grad_norm": 30.7227708028237, + "learning_rate": 1.3439407443532966e-05, + "loss": 1.1379, + "step": 67626 + }, + { + "epoch": 0.81, + "grad_norm": 7.098860972729263, + "learning_rate": 1.3438858899537062e-05, + "loss": 1.1327, + "step": 67629 + }, + { + "epoch": 0.81, + "grad_norm": 6.415917335501943, + "learning_rate": 1.3438310343805568e-05, + "loss": 0.746, + "step": 67632 + }, + { + "epoch": 0.81, + "grad_norm": 9.691985563977724, + "learning_rate": 1.3437761776340366e-05, + "loss": 1.2301, + "step": 67635 + }, + { + "epoch": 0.81, + "grad_norm": 17.26109451554833, + "learning_rate": 1.3437213197143315e-05, + "loss": 1.4729, + "step": 67638 + }, + { + "epoch": 0.81, + "grad_norm": 37.53980565702518, + "learning_rate": 1.3436664606216296e-05, + "loss": 1.0087, + "step": 67641 + }, + { + "epoch": 0.81, + "grad_norm": 38.4047157614266, + "learning_rate": 1.343611600356118e-05, + "loss": 1.2993, + "step": 67644 + }, + { + "epoch": 0.81, + "grad_norm": 27.95109150547175, + "learning_rate": 1.3435567389179837e-05, + "loss": 1.4666, + "step": 67647 + }, + { + "epoch": 0.81, + "grad_norm": 6.812720978029432, + "learning_rate": 1.3435018763074139e-05, + "loss": 1.0645, + "step": 67650 + }, + { + "epoch": 0.81, + "grad_norm": 16.269027681516008, + "learning_rate": 1.343447012524596e-05, + "loss": 1.5894, + "step": 67653 + }, + { + "epoch": 0.81, + "grad_norm": 4.207257916472358, + "learning_rate": 1.3433921475697172e-05, + "loss": 1.2542, + "step": 67656 + }, + { + "epoch": 0.81, + "grad_norm": 11.05632742414192, + "learning_rate": 1.3433372814429644e-05, + "loss": 1.3455, + "step": 67659 + }, + { + "epoch": 0.81, + "grad_norm": 20.121749864326688, + "learning_rate": 1.3432824141445253e-05, + "loss": 1.7858, + "step": 67662 + }, + { + "epoch": 0.81, + "grad_norm": 10.684189640624702, + "learning_rate": 1.3432275456745871e-05, + "loss": 1.5945, + "step": 67665 + }, + { + "epoch": 0.81, + "grad_norm": 15.005475518660019, + "learning_rate": 1.343172676033337e-05, + "loss": 1.2899, + "step": 67668 + }, + { + "epoch": 0.81, + "grad_norm": 7.092836274890489, + "learning_rate": 1.343117805220962e-05, + "loss": 1.5745, + "step": 67671 + }, + { + "epoch": 0.81, + "grad_norm": 24.619951535485463, + "learning_rate": 1.3430629332376495e-05, + "loss": 1.5069, + "step": 67674 + }, + { + "epoch": 0.81, + "grad_norm": 8.085337499852828, + "learning_rate": 1.343008060083587e-05, + "loss": 1.3239, + "step": 67677 + }, + { + "epoch": 0.81, + "grad_norm": 16.913121127862134, + "learning_rate": 1.3429531857589613e-05, + "loss": 1.3193, + "step": 67680 + }, + { + "epoch": 0.81, + "grad_norm": 10.244985406455108, + "learning_rate": 1.3428983102639597e-05, + "loss": 1.201, + "step": 67683 + }, + { + "epoch": 0.81, + "grad_norm": 12.473539978166308, + "learning_rate": 1.3428434335987702e-05, + "loss": 1.4599, + "step": 67686 + }, + { + "epoch": 0.81, + "grad_norm": 97.72280740185617, + "learning_rate": 1.3427885557635792e-05, + "loss": 1.6194, + "step": 67689 + }, + { + "epoch": 0.81, + "grad_norm": 4.645766075958806, + "learning_rate": 1.3427336767585742e-05, + "loss": 1.6471, + "step": 67692 + }, + { + "epoch": 0.81, + "grad_norm": 14.81653172307152, + "learning_rate": 1.342678796583943e-05, + "loss": 1.088, + "step": 67695 + }, + { + "epoch": 0.81, + "grad_norm": 15.321851227592388, + "learning_rate": 1.3426239152398724e-05, + "loss": 1.1945, + "step": 67698 + }, + { + "epoch": 0.81, + "grad_norm": 20.671349695579575, + "learning_rate": 1.3425690327265498e-05, + "loss": 1.4547, + "step": 67701 + }, + { + "epoch": 0.81, + "grad_norm": 5.401807303199367, + "learning_rate": 1.342514149044162e-05, + "loss": 1.5531, + "step": 67704 + }, + { + "epoch": 0.81, + "grad_norm": 25.56068872666652, + "learning_rate": 1.3424592641928972e-05, + "loss": 1.2759, + "step": 67707 + }, + { + "epoch": 0.81, + "grad_norm": 7.045773999498888, + "learning_rate": 1.3424043781729426e-05, + "loss": 1.4337, + "step": 67710 + }, + { + "epoch": 0.81, + "grad_norm": 8.713418927741465, + "learning_rate": 1.3423494909844845e-05, + "loss": 1.2078, + "step": 67713 + }, + { + "epoch": 0.81, + "grad_norm": 12.510761530586372, + "learning_rate": 1.3422946026277112e-05, + "loss": 1.5391, + "step": 67716 + }, + { + "epoch": 0.81, + "grad_norm": 17.852535253890768, + "learning_rate": 1.34223971310281e-05, + "loss": 1.1465, + "step": 67719 + }, + { + "epoch": 0.81, + "grad_norm": 7.236868192489584, + "learning_rate": 1.3421848224099678e-05, + "loss": 1.3238, + "step": 67722 + }, + { + "epoch": 0.81, + "grad_norm": 31.7283936562993, + "learning_rate": 1.3421299305493719e-05, + "loss": 1.4716, + "step": 67725 + }, + { + "epoch": 0.81, + "grad_norm": 23.614908563473232, + "learning_rate": 1.3420750375212097e-05, + "loss": 1.227, + "step": 67728 + }, + { + "epoch": 0.81, + "grad_norm": 26.81476989690292, + "learning_rate": 1.342020143325669e-05, + "loss": 1.7078, + "step": 67731 + }, + { + "epoch": 0.81, + "grad_norm": 19.500313945362844, + "learning_rate": 1.3419652479629366e-05, + "loss": 1.2718, + "step": 67734 + }, + { + "epoch": 0.81, + "grad_norm": 2.12301527525668, + "learning_rate": 1.3419103514331997e-05, + "loss": 1.5948, + "step": 67737 + }, + { + "epoch": 0.81, + "grad_norm": 12.156782431911937, + "learning_rate": 1.3418554537366461e-05, + "loss": 1.3858, + "step": 67740 + }, + { + "epoch": 0.81, + "grad_norm": 21.932351300542734, + "learning_rate": 1.341800554873463e-05, + "loss": 1.1736, + "step": 67743 + }, + { + "epoch": 0.81, + "grad_norm": 7.407483742431923, + "learning_rate": 1.3417456548438376e-05, + "loss": 1.2746, + "step": 67746 + }, + { + "epoch": 0.81, + "grad_norm": 24.886769707959225, + "learning_rate": 1.3416907536479575e-05, + "loss": 1.5204, + "step": 67749 + }, + { + "epoch": 0.81, + "grad_norm": 11.882662178789964, + "learning_rate": 1.3416358512860099e-05, + "loss": 1.5151, + "step": 67752 + }, + { + "epoch": 0.81, + "grad_norm": 27.64454221487935, + "learning_rate": 1.3415809477581823e-05, + "loss": 1.4162, + "step": 67755 + }, + { + "epoch": 0.81, + "grad_norm": 9.523803679505333, + "learning_rate": 1.3415260430646616e-05, + "loss": 1.2531, + "step": 67758 + }, + { + "epoch": 0.81, + "grad_norm": 3.4861098892218005, + "learning_rate": 1.3414711372056361e-05, + "loss": 1.3705, + "step": 67761 + }, + { + "epoch": 0.81, + "grad_norm": 11.326191787847655, + "learning_rate": 1.3414162301812924e-05, + "loss": 1.4436, + "step": 67764 + }, + { + "epoch": 0.81, + "grad_norm": 6.350706028002877, + "learning_rate": 1.3413613219918178e-05, + "loss": 1.6185, + "step": 67767 + }, + { + "epoch": 0.81, + "grad_norm": 25.607426004380365, + "learning_rate": 1.3413064126374002e-05, + "loss": 1.4493, + "step": 67770 + }, + { + "epoch": 0.81, + "grad_norm": 13.745397414945913, + "learning_rate": 1.3412515021182268e-05, + "loss": 1.2305, + "step": 67773 + }, + { + "epoch": 0.81, + "grad_norm": 27.737842666400763, + "learning_rate": 1.341196590434485e-05, + "loss": 1.2917, + "step": 67776 + }, + { + "epoch": 0.82, + "grad_norm": 11.624200400043085, + "learning_rate": 1.3411416775863617e-05, + "loss": 1.3822, + "step": 67779 + }, + { + "epoch": 0.82, + "grad_norm": 8.299905782510015, + "learning_rate": 1.3410867635740452e-05, + "loss": 1.2064, + "step": 67782 + }, + { + "epoch": 0.82, + "grad_norm": 11.082385702539089, + "learning_rate": 1.3410318483977223e-05, + "loss": 1.3373, + "step": 67785 + }, + { + "epoch": 0.82, + "grad_norm": 14.907748563942267, + "learning_rate": 1.3409769320575806e-05, + "loss": 1.2715, + "step": 67788 + }, + { + "epoch": 0.82, + "grad_norm": 4.66016568517091, + "learning_rate": 1.3409220145538072e-05, + "loss": 1.2837, + "step": 67791 + }, + { + "epoch": 0.82, + "grad_norm": 27.18942618988597, + "learning_rate": 1.34086709588659e-05, + "loss": 1.1685, + "step": 67794 + }, + { + "epoch": 0.82, + "grad_norm": 14.81190895774382, + "learning_rate": 1.3408121760561162e-05, + "loss": 1.222, + "step": 67797 + }, + { + "epoch": 0.82, + "grad_norm": 12.462182055855225, + "learning_rate": 1.340757255062573e-05, + "loss": 1.2815, + "step": 67800 + }, + { + "epoch": 0.82, + "grad_norm": 11.660090850249551, + "learning_rate": 1.340702332906148e-05, + "loss": 1.3326, + "step": 67803 + }, + { + "epoch": 0.82, + "grad_norm": 14.004201985670306, + "learning_rate": 1.3406474095870287e-05, + "loss": 1.3269, + "step": 67806 + }, + { + "epoch": 0.82, + "grad_norm": 15.157068046868954, + "learning_rate": 1.3405924851054028e-05, + "loss": 0.9658, + "step": 67809 + }, + { + "epoch": 0.82, + "grad_norm": 5.503827563788221, + "learning_rate": 1.3405375594614571e-05, + "loss": 1.5521, + "step": 67812 + }, + { + "epoch": 0.82, + "grad_norm": 6.158119685244015, + "learning_rate": 1.3404826326553795e-05, + "loss": 1.3768, + "step": 67815 + }, + { + "epoch": 0.82, + "grad_norm": 20.743966163783135, + "learning_rate": 1.3404277046873574e-05, + "loss": 1.6263, + "step": 67818 + }, + { + "epoch": 0.82, + "grad_norm": 6.84307802871161, + "learning_rate": 1.3403727755575777e-05, + "loss": 1.4618, + "step": 67821 + }, + { + "epoch": 0.82, + "grad_norm": 8.803126133261532, + "learning_rate": 1.3403178452662288e-05, + "loss": 1.7348, + "step": 67824 + }, + { + "epoch": 0.82, + "grad_norm": 5.183957837076258, + "learning_rate": 1.3402629138134977e-05, + "loss": 1.4796, + "step": 67827 + }, + { + "epoch": 0.82, + "grad_norm": 6.881500710458455, + "learning_rate": 1.3402079811995718e-05, + "loss": 1.4544, + "step": 67830 + }, + { + "epoch": 0.82, + "grad_norm": 16.83390839547877, + "learning_rate": 1.3401530474246382e-05, + "loss": 1.175, + "step": 67833 + }, + { + "epoch": 0.82, + "grad_norm": 23.30004094960719, + "learning_rate": 1.3400981124888852e-05, + "loss": 1.4811, + "step": 67836 + }, + { + "epoch": 0.82, + "grad_norm": 41.38222740454802, + "learning_rate": 1.3400431763924996e-05, + "loss": 1.1522, + "step": 67839 + }, + { + "epoch": 0.82, + "grad_norm": 35.37793868840758, + "learning_rate": 1.3399882391356695e-05, + "loss": 1.0137, + "step": 67842 + }, + { + "epoch": 0.82, + "grad_norm": 15.786075340893149, + "learning_rate": 1.3399333007185815e-05, + "loss": 1.4815, + "step": 67845 + }, + { + "epoch": 0.82, + "grad_norm": 11.821303980922272, + "learning_rate": 1.339878361141424e-05, + "loss": 1.2928, + "step": 67848 + }, + { + "epoch": 0.82, + "grad_norm": 5.718046853561758, + "learning_rate": 1.3398234204043838e-05, + "loss": 1.429, + "step": 67851 + }, + { + "epoch": 0.82, + "grad_norm": 22.820895181502618, + "learning_rate": 1.3397684785076489e-05, + "loss": 1.5131, + "step": 67854 + }, + { + "epoch": 0.82, + "grad_norm": 9.355993317784291, + "learning_rate": 1.3397135354514064e-05, + "loss": 1.7103, + "step": 67857 + }, + { + "epoch": 0.82, + "grad_norm": 6.237132136916537, + "learning_rate": 1.339658591235844e-05, + "loss": 1.4492, + "step": 67860 + }, + { + "epoch": 0.82, + "grad_norm": 45.970903319478694, + "learning_rate": 1.3396036458611492e-05, + "loss": 1.3821, + "step": 67863 + }, + { + "epoch": 0.82, + "grad_norm": 18.432733596076865, + "learning_rate": 1.3395486993275092e-05, + "loss": 1.3014, + "step": 67866 + }, + { + "epoch": 0.82, + "grad_norm": 9.512876969189302, + "learning_rate": 1.3394937516351123e-05, + "loss": 1.2557, + "step": 67869 + }, + { + "epoch": 0.82, + "grad_norm": 36.724019898600915, + "learning_rate": 1.3394388027841453e-05, + "loss": 1.6705, + "step": 67872 + }, + { + "epoch": 0.82, + "grad_norm": 17.96340795902824, + "learning_rate": 1.3393838527747958e-05, + "loss": 1.0627, + "step": 67875 + }, + { + "epoch": 0.82, + "grad_norm": 9.001352098866235, + "learning_rate": 1.3393289016072515e-05, + "loss": 1.5094, + "step": 67878 + }, + { + "epoch": 0.82, + "grad_norm": 4.6185113695433815, + "learning_rate": 1.3392739492816999e-05, + "loss": 0.9095, + "step": 67881 + }, + { + "epoch": 0.82, + "grad_norm": 9.072924963145779, + "learning_rate": 1.3392189957983286e-05, + "loss": 1.1742, + "step": 67884 + }, + { + "epoch": 0.82, + "grad_norm": 12.257202421530968, + "learning_rate": 1.339164041157325e-05, + "loss": 1.7113, + "step": 67887 + }, + { + "epoch": 0.82, + "grad_norm": 6.991223673520937, + "learning_rate": 1.3391090853588766e-05, + "loss": 1.4697, + "step": 67890 + }, + { + "epoch": 0.82, + "grad_norm": 11.075594819452453, + "learning_rate": 1.3390541284031713e-05, + "loss": 1.5288, + "step": 67893 + }, + { + "epoch": 0.82, + "grad_norm": 4.623937416805812, + "learning_rate": 1.3389991702903963e-05, + "loss": 1.055, + "step": 67896 + }, + { + "epoch": 0.82, + "grad_norm": 12.509462800674969, + "learning_rate": 1.338944211020739e-05, + "loss": 1.3855, + "step": 67899 + }, + { + "epoch": 0.82, + "grad_norm": 6.365359598164599, + "learning_rate": 1.3388892505943875e-05, + "loss": 1.2562, + "step": 67902 + }, + { + "epoch": 0.82, + "grad_norm": 15.57547298705376, + "learning_rate": 1.338834289011529e-05, + "loss": 1.4439, + "step": 67905 + }, + { + "epoch": 0.82, + "grad_norm": 10.297605136837864, + "learning_rate": 1.3387793262723508e-05, + "loss": 1.1612, + "step": 67908 + }, + { + "epoch": 0.82, + "grad_norm": 24.787624714599602, + "learning_rate": 1.338724362377041e-05, + "loss": 1.2836, + "step": 67911 + }, + { + "epoch": 0.82, + "grad_norm": 48.74860597104899, + "learning_rate": 1.3386693973257871e-05, + "loss": 1.0878, + "step": 67914 + }, + { + "epoch": 0.82, + "grad_norm": 8.519129699955888, + "learning_rate": 1.3386144311187766e-05, + "loss": 1.4574, + "step": 67917 + }, + { + "epoch": 0.82, + "grad_norm": 46.50957545920361, + "learning_rate": 1.3385594637561965e-05, + "loss": 1.3759, + "step": 67920 + }, + { + "epoch": 0.82, + "grad_norm": 41.51504499159231, + "learning_rate": 1.3385044952382355e-05, + "loss": 1.0555, + "step": 67923 + }, + { + "epoch": 0.82, + "grad_norm": 17.047693903118137, + "learning_rate": 1.3384495255650803e-05, + "loss": 1.23, + "step": 67926 + }, + { + "epoch": 0.82, + "grad_norm": 31.276126100277498, + "learning_rate": 1.3383945547369186e-05, + "loss": 1.2556, + "step": 67929 + }, + { + "epoch": 0.82, + "grad_norm": 24.982460115350783, + "learning_rate": 1.3383395827539384e-05, + "loss": 1.4516, + "step": 67932 + }, + { + "epoch": 0.82, + "grad_norm": 20.696023531224878, + "learning_rate": 1.3382846096163272e-05, + "loss": 1.1986, + "step": 67935 + }, + { + "epoch": 0.82, + "grad_norm": 9.876938869063466, + "learning_rate": 1.3382296353242724e-05, + "loss": 1.4604, + "step": 67938 + }, + { + "epoch": 0.82, + "grad_norm": 7.344237941527781, + "learning_rate": 1.3381746598779612e-05, + "loss": 1.5916, + "step": 67941 + }, + { + "epoch": 0.82, + "grad_norm": 3.841019083974416, + "learning_rate": 1.3381196832775825e-05, + "loss": 1.1321, + "step": 67944 + }, + { + "epoch": 0.82, + "grad_norm": 34.60678589227559, + "learning_rate": 1.3380647055233227e-05, + "loss": 1.335, + "step": 67947 + }, + { + "epoch": 0.82, + "grad_norm": 13.4925953912811, + "learning_rate": 1.3380097266153698e-05, + "loss": 1.3349, + "step": 67950 + }, + { + "epoch": 0.82, + "grad_norm": 23.749936517964823, + "learning_rate": 1.3379547465539113e-05, + "loss": 1.4727, + "step": 67953 + }, + { + "epoch": 0.82, + "grad_norm": 12.078240416876465, + "learning_rate": 1.3378997653391353e-05, + "loss": 1.6597, + "step": 67956 + }, + { + "epoch": 0.82, + "grad_norm": 18.525188371020047, + "learning_rate": 1.3378447829712288e-05, + "loss": 1.5692, + "step": 67959 + }, + { + "epoch": 0.82, + "grad_norm": 37.05615371703006, + "learning_rate": 1.3377897994503799e-05, + "loss": 1.3524, + "step": 67962 + }, + { + "epoch": 0.82, + "grad_norm": 23.598586609564425, + "learning_rate": 1.3377348147767763e-05, + "loss": 1.3487, + "step": 67965 + }, + { + "epoch": 0.82, + "grad_norm": 10.433640698071619, + "learning_rate": 1.3376798289506053e-05, + "loss": 1.4018, + "step": 67968 + }, + { + "epoch": 0.82, + "grad_norm": 6.062570732224384, + "learning_rate": 1.3376248419720545e-05, + "loss": 1.2606, + "step": 67971 + }, + { + "epoch": 0.82, + "grad_norm": 10.232280536236539, + "learning_rate": 1.3375698538413118e-05, + "loss": 1.394, + "step": 67974 + }, + { + "epoch": 0.82, + "grad_norm": 4.221565097682613, + "learning_rate": 1.3375148645585652e-05, + "loss": 1.4498, + "step": 67977 + }, + { + "epoch": 0.82, + "grad_norm": 27.59249455677885, + "learning_rate": 1.3374598741240013e-05, + "loss": 1.3135, + "step": 67980 + }, + { + "epoch": 0.82, + "grad_norm": 18.012000539751952, + "learning_rate": 1.3374048825378085e-05, + "loss": 1.1838, + "step": 67983 + }, + { + "epoch": 0.82, + "grad_norm": 7.765511586567523, + "learning_rate": 1.3373498898001748e-05, + "loss": 1.2268, + "step": 67986 + }, + { + "epoch": 0.82, + "grad_norm": 16.67287598399637, + "learning_rate": 1.337294895911287e-05, + "loss": 1.4017, + "step": 67989 + }, + { + "epoch": 0.82, + "grad_norm": 6.02378180559125, + "learning_rate": 1.3372399008713335e-05, + "loss": 1.3603, + "step": 67992 + }, + { + "epoch": 0.82, + "grad_norm": 15.727129363964467, + "learning_rate": 1.3371849046805015e-05, + "loss": 1.4407, + "step": 67995 + }, + { + "epoch": 0.82, + "grad_norm": 15.877475249197763, + "learning_rate": 1.3371299073389792e-05, + "loss": 1.4337, + "step": 67998 + }, + { + "epoch": 0.82, + "grad_norm": 12.922603820939223, + "learning_rate": 1.3370749088469537e-05, + "loss": 1.0831, + "step": 68001 + }, + { + "epoch": 0.82, + "grad_norm": 7.509631630819806, + "learning_rate": 1.3370199092046129e-05, + "loss": 1.3837, + "step": 68004 + }, + { + "epoch": 0.82, + "grad_norm": 8.318106416562127, + "learning_rate": 1.3369649084121448e-05, + "loss": 1.7621, + "step": 68007 + }, + { + "epoch": 0.82, + "grad_norm": 44.70753594015618, + "learning_rate": 1.3369099064697364e-05, + "loss": 0.9333, + "step": 68010 + }, + { + "epoch": 0.82, + "grad_norm": 6.457979868919684, + "learning_rate": 1.3368549033775762e-05, + "loss": 1.2124, + "step": 68013 + }, + { + "epoch": 0.82, + "grad_norm": 2.9789659678593994, + "learning_rate": 1.3367998991358513e-05, + "loss": 1.3233, + "step": 68016 + }, + { + "epoch": 0.82, + "grad_norm": 30.4144294321394, + "learning_rate": 1.33674489374475e-05, + "loss": 1.5846, + "step": 68019 + }, + { + "epoch": 0.82, + "grad_norm": 17.82180130964719, + "learning_rate": 1.3366898872044594e-05, + "loss": 1.4153, + "step": 68022 + }, + { + "epoch": 0.82, + "grad_norm": 12.749287067587593, + "learning_rate": 1.3366348795151675e-05, + "loss": 1.5718, + "step": 68025 + }, + { + "epoch": 0.82, + "grad_norm": 7.243651494235466, + "learning_rate": 1.3365798706770621e-05, + "loss": 1.4638, + "step": 68028 + }, + { + "epoch": 0.82, + "grad_norm": 20.807387581784084, + "learning_rate": 1.336524860690331e-05, + "loss": 1.2103, + "step": 68031 + }, + { + "epoch": 0.82, + "grad_norm": 39.37963394096755, + "learning_rate": 1.3364698495551614e-05, + "loss": 1.3068, + "step": 68034 + }, + { + "epoch": 0.82, + "grad_norm": 15.516285957011082, + "learning_rate": 1.3364148372717413e-05, + "loss": 1.3406, + "step": 68037 + }, + { + "epoch": 0.82, + "grad_norm": 4.200648401631344, + "learning_rate": 1.3363598238402591e-05, + "loss": 1.3299, + "step": 68040 + }, + { + "epoch": 0.82, + "grad_norm": 10.244094690792789, + "learning_rate": 1.3363048092609016e-05, + "loss": 1.4549, + "step": 68043 + }, + { + "epoch": 0.82, + "grad_norm": 5.77839149406497, + "learning_rate": 1.3362497935338573e-05, + "loss": 1.4537, + "step": 68046 + }, + { + "epoch": 0.82, + "grad_norm": 6.823738847292591, + "learning_rate": 1.3361947766593129e-05, + "loss": 1.3986, + "step": 68049 + }, + { + "epoch": 0.82, + "grad_norm": 8.752159793041654, + "learning_rate": 1.3361397586374576e-05, + "loss": 1.3652, + "step": 68052 + }, + { + "epoch": 0.82, + "grad_norm": 13.272472452536785, + "learning_rate": 1.3360847394684778e-05, + "loss": 1.1891, + "step": 68055 + }, + { + "epoch": 0.82, + "grad_norm": 7.800483636448671, + "learning_rate": 1.336029719152562e-05, + "loss": 1.3022, + "step": 68058 + }, + { + "epoch": 0.82, + "grad_norm": 5.477550604511851, + "learning_rate": 1.335974697689898e-05, + "loss": 1.2215, + "step": 68061 + }, + { + "epoch": 0.82, + "grad_norm": 19.641851830040054, + "learning_rate": 1.3359196750806731e-05, + "loss": 1.3212, + "step": 68064 + }, + { + "epoch": 0.82, + "grad_norm": 41.711715456514746, + "learning_rate": 1.3358646513250755e-05, + "loss": 1.1458, + "step": 68067 + }, + { + "epoch": 0.82, + "grad_norm": 18.35923412229394, + "learning_rate": 1.3358096264232926e-05, + "loss": 1.7463, + "step": 68070 + }, + { + "epoch": 0.82, + "grad_norm": 9.607595888230232, + "learning_rate": 1.3357546003755129e-05, + "loss": 1.5201, + "step": 68073 + }, + { + "epoch": 0.82, + "grad_norm": 10.540581898927414, + "learning_rate": 1.3356995731819232e-05, + "loss": 1.0369, + "step": 68076 + }, + { + "epoch": 0.82, + "grad_norm": 38.90563269049481, + "learning_rate": 1.335644544842712e-05, + "loss": 1.0624, + "step": 68079 + }, + { + "epoch": 0.82, + "grad_norm": 30.646928649234766, + "learning_rate": 1.335589515358067e-05, + "loss": 1.3749, + "step": 68082 + }, + { + "epoch": 0.82, + "grad_norm": 9.944395241564296, + "learning_rate": 1.3355344847281754e-05, + "loss": 1.0578, + "step": 68085 + }, + { + "epoch": 0.82, + "grad_norm": 3.857905917943087, + "learning_rate": 1.3354794529532257e-05, + "loss": 1.4383, + "step": 68088 + }, + { + "epoch": 0.82, + "grad_norm": 15.037177797459142, + "learning_rate": 1.3354244200334055e-05, + "loss": 1.2926, + "step": 68091 + }, + { + "epoch": 0.82, + "grad_norm": 29.128121081648185, + "learning_rate": 1.3353693859689027e-05, + "loss": 1.0987, + "step": 68094 + }, + { + "epoch": 0.82, + "grad_norm": 28.584334315179447, + "learning_rate": 1.335314350759905e-05, + "loss": 1.1172, + "step": 68097 + }, + { + "epoch": 0.82, + "grad_norm": 8.96459563071628, + "learning_rate": 1.3352593144065999e-05, + "loss": 1.3141, + "step": 68100 + }, + { + "epoch": 0.82, + "grad_norm": 12.705758899160172, + "learning_rate": 1.3352042769091757e-05, + "loss": 1.1232, + "step": 68103 + }, + { + "epoch": 0.82, + "grad_norm": 8.073540804613412, + "learning_rate": 1.3351492382678202e-05, + "loss": 1.5528, + "step": 68106 + }, + { + "epoch": 0.82, + "grad_norm": 10.041239281177264, + "learning_rate": 1.3350941984827208e-05, + "loss": 1.4436, + "step": 68109 + }, + { + "epoch": 0.82, + "grad_norm": 6.789089129995132, + "learning_rate": 1.3350391575540658e-05, + "loss": 1.341, + "step": 68112 + }, + { + "epoch": 0.82, + "grad_norm": 6.065255873624518, + "learning_rate": 1.3349841154820427e-05, + "loss": 1.5655, + "step": 68115 + }, + { + "epoch": 0.82, + "grad_norm": 7.7421800810803285, + "learning_rate": 1.3349290722668394e-05, + "loss": 1.213, + "step": 68118 + }, + { + "epoch": 0.82, + "grad_norm": 14.528036538411175, + "learning_rate": 1.334874027908644e-05, + "loss": 1.2484, + "step": 68121 + }, + { + "epoch": 0.82, + "grad_norm": 12.628294159668929, + "learning_rate": 1.334818982407644e-05, + "loss": 1.4838, + "step": 68124 + }, + { + "epoch": 0.82, + "grad_norm": 10.611086384982624, + "learning_rate": 1.3347639357640276e-05, + "loss": 1.5863, + "step": 68127 + }, + { + "epoch": 0.82, + "grad_norm": 21.30374080318242, + "learning_rate": 1.3347088879779824e-05, + "loss": 1.7357, + "step": 68130 + }, + { + "epoch": 0.82, + "grad_norm": 20.380759132022593, + "learning_rate": 1.3346538390496963e-05, + "loss": 1.3737, + "step": 68133 + }, + { + "epoch": 0.82, + "grad_norm": 33.02021604451439, + "learning_rate": 1.3345987889793573e-05, + "loss": 1.6015, + "step": 68136 + }, + { + "epoch": 0.82, + "grad_norm": 24.12530049141091, + "learning_rate": 1.334543737767153e-05, + "loss": 1.3136, + "step": 68139 + }, + { + "epoch": 0.82, + "grad_norm": 22.547393676328618, + "learning_rate": 1.3344886854132713e-05, + "loss": 1.6887, + "step": 68142 + }, + { + "epoch": 0.82, + "grad_norm": 10.67185696943894, + "learning_rate": 1.3344336319179005e-05, + "loss": 1.4544, + "step": 68145 + }, + { + "epoch": 0.82, + "grad_norm": 18.59111797763058, + "learning_rate": 1.3343785772812281e-05, + "loss": 1.5573, + "step": 68148 + }, + { + "epoch": 0.82, + "grad_norm": 4.179248359904746, + "learning_rate": 1.3343235215034418e-05, + "loss": 1.3329, + "step": 68151 + }, + { + "epoch": 0.82, + "grad_norm": 13.496338433419044, + "learning_rate": 1.3342684645847301e-05, + "loss": 1.3631, + "step": 68154 + }, + { + "epoch": 0.82, + "grad_norm": 18.87164260861596, + "learning_rate": 1.3342134065252803e-05, + "loss": 1.3075, + "step": 68157 + }, + { + "epoch": 0.82, + "grad_norm": 6.152421663715517, + "learning_rate": 1.3341583473252808e-05, + "loss": 1.7703, + "step": 68160 + }, + { + "epoch": 0.82, + "grad_norm": 9.754425100400741, + "learning_rate": 1.3341032869849188e-05, + "loss": 1.3074, + "step": 68163 + }, + { + "epoch": 0.82, + "grad_norm": 4.772103526174412, + "learning_rate": 1.3340482255043829e-05, + "loss": 1.4846, + "step": 68166 + }, + { + "epoch": 0.82, + "grad_norm": 24.67836477516034, + "learning_rate": 1.3339931628838607e-05, + "loss": 1.6088, + "step": 68169 + }, + { + "epoch": 0.82, + "grad_norm": 56.36813289308347, + "learning_rate": 1.33393809912354e-05, + "loss": 1.493, + "step": 68172 + }, + { + "epoch": 0.82, + "grad_norm": 21.75830720090923, + "learning_rate": 1.333883034223609e-05, + "loss": 1.6919, + "step": 68175 + }, + { + "epoch": 0.82, + "grad_norm": 16.523536816068717, + "learning_rate": 1.3338279681842556e-05, + "loss": 1.385, + "step": 68178 + }, + { + "epoch": 0.82, + "grad_norm": 14.72865877841047, + "learning_rate": 1.3337729010056675e-05, + "loss": 1.4048, + "step": 68181 + }, + { + "epoch": 0.82, + "grad_norm": 6.7345803220459715, + "learning_rate": 1.3337178326880324e-05, + "loss": 1.2224, + "step": 68184 + }, + { + "epoch": 0.82, + "grad_norm": 21.921560344438625, + "learning_rate": 1.3336627632315388e-05, + "loss": 1.5396, + "step": 68187 + }, + { + "epoch": 0.82, + "grad_norm": 10.143042530722576, + "learning_rate": 1.3336076926363746e-05, + "loss": 1.2241, + "step": 68190 + }, + { + "epoch": 0.82, + "grad_norm": 17.993740281941253, + "learning_rate": 1.3335526209027271e-05, + "loss": 1.1905, + "step": 68193 + }, + { + "epoch": 0.82, + "grad_norm": 6.969667192861153, + "learning_rate": 1.3334975480307848e-05, + "loss": 1.3894, + "step": 68196 + }, + { + "epoch": 0.82, + "grad_norm": 23.9517570659415, + "learning_rate": 1.3334424740207357e-05, + "loss": 1.312, + "step": 68199 + }, + { + "epoch": 0.82, + "grad_norm": 30.92087002549872, + "learning_rate": 1.3333873988727675e-05, + "loss": 1.4172, + "step": 68202 + }, + { + "epoch": 0.82, + "grad_norm": 19.9765057549362, + "learning_rate": 1.3333323225870678e-05, + "loss": 1.5607, + "step": 68205 + }, + { + "epoch": 0.82, + "grad_norm": 3.2382677292150324, + "learning_rate": 1.3332772451638251e-05, + "loss": 1.0979, + "step": 68208 + }, + { + "epoch": 0.82, + "grad_norm": 4.089703620165903, + "learning_rate": 1.3332221666032274e-05, + "loss": 1.4329, + "step": 68211 + }, + { + "epoch": 0.82, + "grad_norm": 15.13649980072986, + "learning_rate": 1.3331670869054624e-05, + "loss": 1.4252, + "step": 68214 + }, + { + "epoch": 0.82, + "grad_norm": 35.50307363974789, + "learning_rate": 1.3331120060707182e-05, + "loss": 1.3712, + "step": 68217 + }, + { + "epoch": 0.82, + "grad_norm": 18.179564863788784, + "learning_rate": 1.3330569240991827e-05, + "loss": 1.6116, + "step": 68220 + }, + { + "epoch": 0.82, + "grad_norm": 10.266377146033527, + "learning_rate": 1.333001840991044e-05, + "loss": 1.6323, + "step": 68223 + }, + { + "epoch": 0.82, + "grad_norm": 48.88216733534378, + "learning_rate": 1.3329467567464893e-05, + "loss": 1.2911, + "step": 68226 + }, + { + "epoch": 0.82, + "grad_norm": 16.270959612463045, + "learning_rate": 1.3328916713657079e-05, + "loss": 1.6879, + "step": 68229 + }, + { + "epoch": 0.82, + "grad_norm": 4.463170054415568, + "learning_rate": 1.3328365848488872e-05, + "loss": 1.5874, + "step": 68232 + }, + { + "epoch": 0.82, + "grad_norm": 3.030823732789915, + "learning_rate": 1.3327814971962148e-05, + "loss": 1.1146, + "step": 68235 + }, + { + "epoch": 0.82, + "grad_norm": 8.294144267370847, + "learning_rate": 1.3327264084078788e-05, + "loss": 1.5719, + "step": 68238 + }, + { + "epoch": 0.82, + "grad_norm": 13.236029101132438, + "learning_rate": 1.3326713184840678e-05, + "loss": 1.8003, + "step": 68241 + }, + { + "epoch": 0.82, + "grad_norm": 16.11273947757304, + "learning_rate": 1.3326162274249694e-05, + "loss": 1.3171, + "step": 68244 + }, + { + "epoch": 0.82, + "grad_norm": 12.745036202891457, + "learning_rate": 1.3325611352307717e-05, + "loss": 1.2629, + "step": 68247 + }, + { + "epoch": 0.82, + "grad_norm": 9.223164360615517, + "learning_rate": 1.3325060419016622e-05, + "loss": 1.1672, + "step": 68250 + }, + { + "epoch": 0.82, + "grad_norm": 16.727814770559704, + "learning_rate": 1.3324509474378297e-05, + "loss": 1.1811, + "step": 68253 + }, + { + "epoch": 0.82, + "grad_norm": 9.505051865908673, + "learning_rate": 1.332395851839462e-05, + "loss": 1.1444, + "step": 68256 + }, + { + "epoch": 0.82, + "grad_norm": 8.559721525280528, + "learning_rate": 1.3323407551067466e-05, + "loss": 1.625, + "step": 68259 + }, + { + "epoch": 0.82, + "grad_norm": 41.981281975237486, + "learning_rate": 1.3322856572398722e-05, + "loss": 1.4351, + "step": 68262 + }, + { + "epoch": 0.82, + "grad_norm": 8.03677073483806, + "learning_rate": 1.3322305582390264e-05, + "loss": 1.188, + "step": 68265 + }, + { + "epoch": 0.82, + "grad_norm": 16.766890336710286, + "learning_rate": 1.3321754581043976e-05, + "loss": 1.0858, + "step": 68268 + }, + { + "epoch": 0.82, + "grad_norm": 11.141950274774365, + "learning_rate": 1.3321203568361734e-05, + "loss": 1.405, + "step": 68271 + }, + { + "epoch": 0.82, + "grad_norm": 18.25236690486029, + "learning_rate": 1.332065254434542e-05, + "loss": 1.4193, + "step": 68274 + }, + { + "epoch": 0.82, + "grad_norm": 8.477047589781765, + "learning_rate": 1.3320101508996917e-05, + "loss": 1.3636, + "step": 68277 + }, + { + "epoch": 0.82, + "grad_norm": 8.200197484818833, + "learning_rate": 1.3319550462318098e-05, + "loss": 1.2327, + "step": 68280 + }, + { + "epoch": 0.82, + "grad_norm": 4.229241279398534, + "learning_rate": 1.3318999404310855e-05, + "loss": 1.2893, + "step": 68283 + }, + { + "epoch": 0.82, + "grad_norm": 16.53744470548124, + "learning_rate": 1.331844833497706e-05, + "loss": 1.2773, + "step": 68286 + }, + { + "epoch": 0.82, + "grad_norm": 18.602434265909412, + "learning_rate": 1.3317897254318598e-05, + "loss": 1.6284, + "step": 68289 + }, + { + "epoch": 0.82, + "grad_norm": 27.006024257106194, + "learning_rate": 1.3317346162337345e-05, + "loss": 1.1485, + "step": 68292 + }, + { + "epoch": 0.82, + "grad_norm": 10.585630910037917, + "learning_rate": 1.3316795059035188e-05, + "loss": 1.4263, + "step": 68295 + }, + { + "epoch": 0.82, + "grad_norm": 3.5865156175548676, + "learning_rate": 1.3316243944414002e-05, + "loss": 1.0983, + "step": 68298 + }, + { + "epoch": 0.82, + "grad_norm": 63.957179910199386, + "learning_rate": 1.3315692818475672e-05, + "loss": 1.4796, + "step": 68301 + }, + { + "epoch": 0.82, + "grad_norm": 36.480906031784755, + "learning_rate": 1.3315141681222073e-05, + "loss": 1.5024, + "step": 68304 + }, + { + "epoch": 0.82, + "grad_norm": 13.811425788716189, + "learning_rate": 1.3314590532655092e-05, + "loss": 1.4685, + "step": 68307 + }, + { + "epoch": 0.82, + "grad_norm": 30.681476216347413, + "learning_rate": 1.3314039372776606e-05, + "loss": 1.6089, + "step": 68310 + }, + { + "epoch": 0.82, + "grad_norm": 29.72420747202602, + "learning_rate": 1.3313488201588496e-05, + "loss": 1.6346, + "step": 68313 + }, + { + "epoch": 0.82, + "grad_norm": 38.330232711927664, + "learning_rate": 1.3312937019092648e-05, + "loss": 1.4601, + "step": 68316 + }, + { + "epoch": 0.82, + "grad_norm": 9.41541671712497, + "learning_rate": 1.3312385825290938e-05, + "loss": 1.4118, + "step": 68319 + }, + { + "epoch": 0.82, + "grad_norm": 74.51062309108727, + "learning_rate": 1.3311834620185249e-05, + "loss": 1.6666, + "step": 68322 + }, + { + "epoch": 0.82, + "grad_norm": 12.55874530057555, + "learning_rate": 1.3311283403777458e-05, + "loss": 1.5345, + "step": 68325 + }, + { + "epoch": 0.82, + "grad_norm": 26.845580863221272, + "learning_rate": 1.3310732176069454e-05, + "loss": 1.4142, + "step": 68328 + }, + { + "epoch": 0.82, + "grad_norm": 25.745295107132993, + "learning_rate": 1.331018093706311e-05, + "loss": 1.7697, + "step": 68331 + }, + { + "epoch": 0.82, + "grad_norm": 5.046653735828955, + "learning_rate": 1.3309629686760312e-05, + "loss": 1.6306, + "step": 68334 + }, + { + "epoch": 0.82, + "grad_norm": 7.526893622280555, + "learning_rate": 1.330907842516294e-05, + "loss": 1.4499, + "step": 68337 + }, + { + "epoch": 0.82, + "grad_norm": 14.752026254086894, + "learning_rate": 1.3308527152272877e-05, + "loss": 1.2339, + "step": 68340 + }, + { + "epoch": 0.82, + "grad_norm": 5.146402239653592, + "learning_rate": 1.3307975868092001e-05, + "loss": 1.4376, + "step": 68343 + }, + { + "epoch": 0.82, + "grad_norm": 13.006323662098271, + "learning_rate": 1.3307424572622194e-05, + "loss": 1.1194, + "step": 68346 + }, + { + "epoch": 0.82, + "grad_norm": 11.178850369950728, + "learning_rate": 1.3306873265865339e-05, + "loss": 1.1664, + "step": 68349 + }, + { + "epoch": 0.82, + "grad_norm": 10.418964758028602, + "learning_rate": 1.3306321947823318e-05, + "loss": 1.2973, + "step": 68352 + }, + { + "epoch": 0.82, + "grad_norm": 43.516692701237886, + "learning_rate": 1.330577061849801e-05, + "loss": 1.666, + "step": 68355 + }, + { + "epoch": 0.82, + "grad_norm": 10.229206321134756, + "learning_rate": 1.3305219277891298e-05, + "loss": 1.4509, + "step": 68358 + }, + { + "epoch": 0.82, + "grad_norm": 23.591827100606526, + "learning_rate": 1.330466792600506e-05, + "loss": 1.2747, + "step": 68361 + }, + { + "epoch": 0.82, + "grad_norm": 5.526918898627657, + "learning_rate": 1.3304116562841185e-05, + "loss": 1.5339, + "step": 68364 + }, + { + "epoch": 0.82, + "grad_norm": 17.190596064197596, + "learning_rate": 1.3303565188401549e-05, + "loss": 1.9809, + "step": 68367 + }, + { + "epoch": 0.82, + "grad_norm": 14.814456185824344, + "learning_rate": 1.3303013802688033e-05, + "loss": 1.1879, + "step": 68370 + }, + { + "epoch": 0.82, + "grad_norm": 7.476686824118683, + "learning_rate": 1.3302462405702523e-05, + "loss": 1.0596, + "step": 68373 + }, + { + "epoch": 0.82, + "grad_norm": 12.20581692235514, + "learning_rate": 1.33019109974469e-05, + "loss": 0.9147, + "step": 68376 + }, + { + "epoch": 0.82, + "grad_norm": 15.817603968414254, + "learning_rate": 1.3301359577923039e-05, + "loss": 1.3875, + "step": 68379 + }, + { + "epoch": 0.82, + "grad_norm": 8.900281556234791, + "learning_rate": 1.3300808147132833e-05, + "loss": 1.3743, + "step": 68382 + }, + { + "epoch": 0.82, + "grad_norm": 32.82002960523005, + "learning_rate": 1.3300256705078152e-05, + "loss": 1.0496, + "step": 68385 + }, + { + "epoch": 0.82, + "grad_norm": 13.86477850404952, + "learning_rate": 1.3299705251760883e-05, + "loss": 1.4544, + "step": 68388 + }, + { + "epoch": 0.82, + "grad_norm": 12.714090083320507, + "learning_rate": 1.3299153787182915e-05, + "loss": 1.5993, + "step": 68391 + }, + { + "epoch": 0.82, + "grad_norm": 10.667045931928133, + "learning_rate": 1.329860231134612e-05, + "loss": 1.9749, + "step": 68394 + }, + { + "epoch": 0.82, + "grad_norm": 10.441835240548917, + "learning_rate": 1.3298050824252385e-05, + "loss": 1.3521, + "step": 68397 + }, + { + "epoch": 0.82, + "grad_norm": 16.91524586884098, + "learning_rate": 1.3297499325903587e-05, + "loss": 1.5549, + "step": 68400 + }, + { + "epoch": 0.82, + "grad_norm": 14.28702873116534, + "learning_rate": 1.3296947816301616e-05, + "loss": 1.1937, + "step": 68403 + }, + { + "epoch": 0.82, + "grad_norm": 18.378205410746332, + "learning_rate": 1.3296396295448344e-05, + "loss": 1.2795, + "step": 68406 + }, + { + "epoch": 0.82, + "grad_norm": 14.92349468168153, + "learning_rate": 1.3295844763345663e-05, + "loss": 1.4289, + "step": 68409 + }, + { + "epoch": 0.82, + "grad_norm": 8.484204646484148, + "learning_rate": 1.3295293219995448e-05, + "loss": 1.5656, + "step": 68412 + }, + { + "epoch": 0.82, + "grad_norm": 7.627638599477931, + "learning_rate": 1.3294741665399587e-05, + "loss": 0.9583, + "step": 68415 + }, + { + "epoch": 0.82, + "grad_norm": 9.68322339006555, + "learning_rate": 1.3294190099559961e-05, + "loss": 1.5589, + "step": 68418 + }, + { + "epoch": 0.82, + "grad_norm": 27.619725865022172, + "learning_rate": 1.3293638522478445e-05, + "loss": 1.0728, + "step": 68421 + }, + { + "epoch": 0.82, + "grad_norm": 40.4689228748809, + "learning_rate": 1.3293086934156933e-05, + "loss": 0.9892, + "step": 68424 + }, + { + "epoch": 0.82, + "grad_norm": 17.235175521253957, + "learning_rate": 1.3292535334597299e-05, + "loss": 1.04, + "step": 68427 + }, + { + "epoch": 0.82, + "grad_norm": 29.47726059814439, + "learning_rate": 1.3291983723801426e-05, + "loss": 1.3013, + "step": 68430 + }, + { + "epoch": 0.82, + "grad_norm": 3.89887613072151, + "learning_rate": 1.32914321017712e-05, + "loss": 1.6353, + "step": 68433 + }, + { + "epoch": 0.82, + "grad_norm": 7.3600693403218544, + "learning_rate": 1.3290880468508504e-05, + "loss": 1.3131, + "step": 68436 + }, + { + "epoch": 0.82, + "grad_norm": 24.07530586255646, + "learning_rate": 1.3290328824015215e-05, + "loss": 1.0414, + "step": 68439 + }, + { + "epoch": 0.82, + "grad_norm": 5.277784774831111, + "learning_rate": 1.3289777168293219e-05, + "loss": 1.2054, + "step": 68442 + }, + { + "epoch": 0.82, + "grad_norm": 46.94540957720134, + "learning_rate": 1.3289225501344403e-05, + "loss": 1.436, + "step": 68445 + }, + { + "epoch": 0.82, + "grad_norm": 8.401079497361707, + "learning_rate": 1.328867382317064e-05, + "loss": 1.3681, + "step": 68448 + }, + { + "epoch": 0.82, + "grad_norm": 21.314042939682764, + "learning_rate": 1.328812213377382e-05, + "loss": 1.4866, + "step": 68451 + }, + { + "epoch": 0.82, + "grad_norm": 13.78843464200703, + "learning_rate": 1.3287570433155821e-05, + "loss": 1.2718, + "step": 68454 + }, + { + "epoch": 0.82, + "grad_norm": 12.075481198725855, + "learning_rate": 1.3287018721318532e-05, + "loss": 1.1551, + "step": 68457 + }, + { + "epoch": 0.82, + "grad_norm": 32.88553851137493, + "learning_rate": 1.328646699826383e-05, + "loss": 1.3124, + "step": 68460 + }, + { + "epoch": 0.82, + "grad_norm": 18.960123094479595, + "learning_rate": 1.32859152639936e-05, + "loss": 1.1797, + "step": 68463 + }, + { + "epoch": 0.82, + "grad_norm": 63.929567157212254, + "learning_rate": 1.3285363518509725e-05, + "loss": 1.5254, + "step": 68466 + }, + { + "epoch": 0.82, + "grad_norm": 3.922760522342883, + "learning_rate": 1.3284811761814088e-05, + "loss": 1.3258, + "step": 68469 + }, + { + "epoch": 0.82, + "grad_norm": 11.600621557238819, + "learning_rate": 1.3284259993908572e-05, + "loss": 1.1196, + "step": 68472 + }, + { + "epoch": 0.82, + "grad_norm": 29.59472852798375, + "learning_rate": 1.328370821479506e-05, + "loss": 1.4132, + "step": 68475 + }, + { + "epoch": 0.82, + "grad_norm": 6.178388197954627, + "learning_rate": 1.3283156424475433e-05, + "loss": 1.3189, + "step": 68478 + }, + { + "epoch": 0.82, + "grad_norm": 5.626178390946285, + "learning_rate": 1.3282604622951575e-05, + "loss": 1.231, + "step": 68481 + }, + { + "epoch": 0.82, + "grad_norm": 3.6259235168905932, + "learning_rate": 1.3282052810225374e-05, + "loss": 1.391, + "step": 68484 + }, + { + "epoch": 0.82, + "grad_norm": 12.069455416726989, + "learning_rate": 1.3281500986298703e-05, + "loss": 1.2421, + "step": 68487 + }, + { + "epoch": 0.82, + "grad_norm": 11.298055980706367, + "learning_rate": 1.3280949151173457e-05, + "loss": 1.5795, + "step": 68490 + }, + { + "epoch": 0.82, + "grad_norm": 38.516555092837855, + "learning_rate": 1.328039730485151e-05, + "loss": 1.3042, + "step": 68493 + }, + { + "epoch": 0.82, + "grad_norm": 5.7096800955847735, + "learning_rate": 1.3279845447334748e-05, + "loss": 1.5733, + "step": 68496 + }, + { + "epoch": 0.82, + "grad_norm": 4.004911933846899, + "learning_rate": 1.3279293578625058e-05, + "loss": 1.2434, + "step": 68499 + }, + { + "epoch": 0.82, + "grad_norm": 11.281584893233587, + "learning_rate": 1.3278741698724318e-05, + "loss": 1.3119, + "step": 68502 + }, + { + "epoch": 0.82, + "grad_norm": 18.927133411589445, + "learning_rate": 1.3278189807634413e-05, + "loss": 1.5547, + "step": 68505 + }, + { + "epoch": 0.82, + "grad_norm": 13.439334331818625, + "learning_rate": 1.3277637905357228e-05, + "loss": 1.2108, + "step": 68508 + }, + { + "epoch": 0.82, + "grad_norm": 18.687060460063222, + "learning_rate": 1.3277085991894647e-05, + "loss": 1.7246, + "step": 68511 + }, + { + "epoch": 0.82, + "grad_norm": 4.130574777601256, + "learning_rate": 1.327653406724855e-05, + "loss": 1.0045, + "step": 68514 + }, + { + "epoch": 0.82, + "grad_norm": 31.872160146574892, + "learning_rate": 1.3275982131420823e-05, + "loss": 1.4674, + "step": 68517 + }, + { + "epoch": 0.82, + "grad_norm": 3.593003223203284, + "learning_rate": 1.3275430184413349e-05, + "loss": 1.3537, + "step": 68520 + }, + { + "epoch": 0.82, + "grad_norm": 8.355470872404664, + "learning_rate": 1.327487822622801e-05, + "loss": 1.8187, + "step": 68523 + }, + { + "epoch": 0.82, + "grad_norm": 16.190313252830954, + "learning_rate": 1.3274326256866692e-05, + "loss": 0.8906, + "step": 68526 + }, + { + "epoch": 0.82, + "grad_norm": 6.266071660429661, + "learning_rate": 1.3273774276331279e-05, + "loss": 1.297, + "step": 68529 + }, + { + "epoch": 0.82, + "grad_norm": 7.761209804803089, + "learning_rate": 1.3273222284623655e-05, + "loss": 1.0832, + "step": 68532 + }, + { + "epoch": 0.82, + "grad_norm": 8.725366052645763, + "learning_rate": 1.3272670281745698e-05, + "loss": 1.3865, + "step": 68535 + }, + { + "epoch": 0.82, + "grad_norm": 11.956170738418967, + "learning_rate": 1.3272118267699302e-05, + "loss": 1.1666, + "step": 68538 + }, + { + "epoch": 0.82, + "grad_norm": 11.924189571552194, + "learning_rate": 1.327156624248634e-05, + "loss": 1.7236, + "step": 68541 + }, + { + "epoch": 0.82, + "grad_norm": 7.999147412760834, + "learning_rate": 1.3271014206108704e-05, + "loss": 1.0608, + "step": 68544 + }, + { + "epoch": 0.82, + "grad_norm": 45.171527681864596, + "learning_rate": 1.327046215856827e-05, + "loss": 1.2837, + "step": 68547 + }, + { + "epoch": 0.82, + "grad_norm": 7.497249005024467, + "learning_rate": 1.326991009986693e-05, + "loss": 1.2123, + "step": 68550 + }, + { + "epoch": 0.82, + "grad_norm": 8.76558362317677, + "learning_rate": 1.3269358030006564e-05, + "loss": 1.1116, + "step": 68553 + }, + { + "epoch": 0.82, + "grad_norm": 16.798102980714535, + "learning_rate": 1.3268805948989056e-05, + "loss": 1.5797, + "step": 68556 + }, + { + "epoch": 0.82, + "grad_norm": 20.78674784917051, + "learning_rate": 1.3268253856816294e-05, + "loss": 1.2475, + "step": 68559 + }, + { + "epoch": 0.82, + "grad_norm": 19.80861550068163, + "learning_rate": 1.3267701753490157e-05, + "loss": 1.7332, + "step": 68562 + }, + { + "epoch": 0.82, + "grad_norm": 4.996798706326659, + "learning_rate": 1.326714963901253e-05, + "loss": 1.3055, + "step": 68565 + }, + { + "epoch": 0.82, + "grad_norm": 24.456636421061514, + "learning_rate": 1.3266597513385298e-05, + "loss": 1.5005, + "step": 68568 + }, + { + "epoch": 0.82, + "grad_norm": 7.05513436422839, + "learning_rate": 1.3266045376610347e-05, + "loss": 1.3756, + "step": 68571 + }, + { + "epoch": 0.82, + "grad_norm": 7.500848603292276, + "learning_rate": 1.3265493228689556e-05, + "loss": 1.5788, + "step": 68574 + }, + { + "epoch": 0.82, + "grad_norm": 11.047081429442764, + "learning_rate": 1.3264941069624815e-05, + "loss": 1.3554, + "step": 68577 + }, + { + "epoch": 0.82, + "grad_norm": 8.286087628465882, + "learning_rate": 1.3264388899418008e-05, + "loss": 1.3239, + "step": 68580 + }, + { + "epoch": 0.82, + "grad_norm": 11.654211047796922, + "learning_rate": 1.3263836718071016e-05, + "loss": 1.5257, + "step": 68583 + }, + { + "epoch": 0.82, + "grad_norm": 9.483554193100584, + "learning_rate": 1.3263284525585726e-05, + "loss": 1.2621, + "step": 68586 + }, + { + "epoch": 0.82, + "grad_norm": 6.040107710039978, + "learning_rate": 1.3262732321964018e-05, + "loss": 0.9858, + "step": 68589 + }, + { + "epoch": 0.82, + "grad_norm": 12.542966499491193, + "learning_rate": 1.3262180107207783e-05, + "loss": 1.2387, + "step": 68592 + }, + { + "epoch": 0.82, + "grad_norm": 11.946111832081932, + "learning_rate": 1.3261627881318902e-05, + "loss": 1.2373, + "step": 68595 + }, + { + "epoch": 0.82, + "grad_norm": 9.463660990139235, + "learning_rate": 1.326107564429926e-05, + "loss": 1.7742, + "step": 68598 + }, + { + "epoch": 0.82, + "grad_norm": 9.06080624803706, + "learning_rate": 1.3260523396150737e-05, + "loss": 1.3839, + "step": 68601 + }, + { + "epoch": 0.82, + "grad_norm": 12.8092142311354, + "learning_rate": 1.3259971136875228e-05, + "loss": 1.3769, + "step": 68604 + }, + { + "epoch": 0.82, + "grad_norm": 11.175651473683255, + "learning_rate": 1.3259418866474611e-05, + "loss": 1.3803, + "step": 68607 + }, + { + "epoch": 0.83, + "grad_norm": 19.257371296807435, + "learning_rate": 1.3258866584950767e-05, + "loss": 1.2666, + "step": 68610 + }, + { + "epoch": 0.83, + "grad_norm": 6.956422176572401, + "learning_rate": 1.325831429230559e-05, + "loss": 1.5864, + "step": 68613 + }, + { + "epoch": 0.83, + "grad_norm": 14.532930325381734, + "learning_rate": 1.3257761988540959e-05, + "loss": 1.5431, + "step": 68616 + }, + { + "epoch": 0.83, + "grad_norm": 10.146495021955499, + "learning_rate": 1.325720967365876e-05, + "loss": 1.452, + "step": 68619 + }, + { + "epoch": 0.83, + "grad_norm": 4.364805416514462, + "learning_rate": 1.3256657347660872e-05, + "loss": 1.2156, + "step": 68622 + }, + { + "epoch": 0.83, + "grad_norm": 12.710251002994971, + "learning_rate": 1.3256105010549191e-05, + "loss": 1.6149, + "step": 68625 + }, + { + "epoch": 0.83, + "grad_norm": 12.519670402958601, + "learning_rate": 1.3255552662325596e-05, + "loss": 1.6266, + "step": 68628 + }, + { + "epoch": 0.83, + "grad_norm": 8.280194729401193, + "learning_rate": 1.3255000302991972e-05, + "loss": 1.2385, + "step": 68631 + }, + { + "epoch": 0.83, + "grad_norm": 5.57065370460376, + "learning_rate": 1.3254447932550205e-05, + "loss": 1.3513, + "step": 68634 + }, + { + "epoch": 0.83, + "grad_norm": 8.858413899794087, + "learning_rate": 1.3253895551002178e-05, + "loss": 0.9968, + "step": 68637 + }, + { + "epoch": 0.83, + "grad_norm": 6.449942090064247, + "learning_rate": 1.325334315834978e-05, + "loss": 1.3175, + "step": 68640 + }, + { + "epoch": 0.83, + "grad_norm": 23.186541179051545, + "learning_rate": 1.325279075459489e-05, + "loss": 1.4387, + "step": 68643 + }, + { + "epoch": 0.83, + "grad_norm": 2.5462077568793684, + "learning_rate": 1.3252238339739398e-05, + "loss": 1.5411, + "step": 68646 + }, + { + "epoch": 0.83, + "grad_norm": 6.038297069890196, + "learning_rate": 1.325168591378519e-05, + "loss": 1.2119, + "step": 68649 + }, + { + "epoch": 0.83, + "grad_norm": 23.508283417007224, + "learning_rate": 1.3251133476734149e-05, + "loss": 1.3273, + "step": 68652 + }, + { + "epoch": 0.83, + "grad_norm": 11.847071339456432, + "learning_rate": 1.3250581028588156e-05, + "loss": 1.4914, + "step": 68655 + }, + { + "epoch": 0.83, + "grad_norm": 7.126435518884075, + "learning_rate": 1.3250028569349106e-05, + "loss": 1.1022, + "step": 68658 + }, + { + "epoch": 0.83, + "grad_norm": 2.6364186591812144, + "learning_rate": 1.3249476099018878e-05, + "loss": 1.4299, + "step": 68661 + }, + { + "epoch": 0.83, + "grad_norm": 16.297397373847165, + "learning_rate": 1.3248923617599356e-05, + "loss": 1.3723, + "step": 68664 + }, + { + "epoch": 0.83, + "grad_norm": 4.34478820861025, + "learning_rate": 1.3248371125092432e-05, + "loss": 1.717, + "step": 68667 + }, + { + "epoch": 0.83, + "grad_norm": 6.499344537820825, + "learning_rate": 1.3247818621499984e-05, + "loss": 1.2255, + "step": 68670 + }, + { + "epoch": 0.83, + "grad_norm": 19.344328642609778, + "learning_rate": 1.3247266106823904e-05, + "loss": 1.0822, + "step": 68673 + }, + { + "epoch": 0.83, + "grad_norm": 10.741192463615493, + "learning_rate": 1.3246713581066071e-05, + "loss": 1.5364, + "step": 68676 + }, + { + "epoch": 0.83, + "grad_norm": 9.073248161104027, + "learning_rate": 1.3246161044228376e-05, + "loss": 1.6767, + "step": 68679 + }, + { + "epoch": 0.83, + "grad_norm": 3.5431134083081095, + "learning_rate": 1.3245608496312704e-05, + "loss": 1.3403, + "step": 68682 + }, + { + "epoch": 0.83, + "grad_norm": 130.04870901309454, + "learning_rate": 1.3245055937320935e-05, + "loss": 1.0976, + "step": 68685 + }, + { + "epoch": 0.83, + "grad_norm": 13.318172100968539, + "learning_rate": 1.3244503367254961e-05, + "loss": 1.3592, + "step": 68688 + }, + { + "epoch": 0.83, + "grad_norm": 7.163664603339309, + "learning_rate": 1.3243950786116668e-05, + "loss": 0.938, + "step": 68691 + }, + { + "epoch": 0.83, + "grad_norm": 14.078142823282615, + "learning_rate": 1.3243398193907938e-05, + "loss": 1.496, + "step": 68694 + }, + { + "epoch": 0.83, + "grad_norm": 28.251358721589398, + "learning_rate": 1.3242845590630658e-05, + "loss": 1.6106, + "step": 68697 + }, + { + "epoch": 0.83, + "grad_norm": 14.403943139680345, + "learning_rate": 1.3242292976286712e-05, + "loss": 1.3925, + "step": 68700 + }, + { + "epoch": 0.83, + "grad_norm": 33.32359280854836, + "learning_rate": 1.3241740350877991e-05, + "loss": 1.0574, + "step": 68703 + }, + { + "epoch": 0.83, + "grad_norm": 156.5683407119478, + "learning_rate": 1.3241187714406376e-05, + "loss": 1.3074, + "step": 68706 + }, + { + "epoch": 0.83, + "grad_norm": 11.400795112099209, + "learning_rate": 1.3240635066873755e-05, + "loss": 1.5112, + "step": 68709 + }, + { + "epoch": 0.83, + "grad_norm": 2.563961006677907, + "learning_rate": 1.3240082408282016e-05, + "loss": 1.6132, + "step": 68712 + }, + { + "epoch": 0.83, + "grad_norm": 13.720436583694553, + "learning_rate": 1.3239529738633042e-05, + "loss": 1.2151, + "step": 68715 + }, + { + "epoch": 0.83, + "grad_norm": 8.045700036324252, + "learning_rate": 1.3238977057928714e-05, + "loss": 1.4394, + "step": 68718 + }, + { + "epoch": 0.83, + "grad_norm": 15.458163486707587, + "learning_rate": 1.3238424366170933e-05, + "loss": 1.5085, + "step": 68721 + }, + { + "epoch": 0.83, + "grad_norm": 6.805707289136381, + "learning_rate": 1.3237871663361572e-05, + "loss": 1.1601, + "step": 68724 + }, + { + "epoch": 0.83, + "grad_norm": 36.59194751532229, + "learning_rate": 1.323731894950252e-05, + "loss": 1.3549, + "step": 68727 + }, + { + "epoch": 0.83, + "grad_norm": 3.576073214238875, + "learning_rate": 1.3236766224595665e-05, + "loss": 1.4868, + "step": 68730 + }, + { + "epoch": 0.83, + "grad_norm": 8.731026723285494, + "learning_rate": 1.3236213488642895e-05, + "loss": 1.3078, + "step": 68733 + }, + { + "epoch": 0.83, + "grad_norm": 42.91158403898595, + "learning_rate": 1.3235660741646092e-05, + "loss": 1.25, + "step": 68736 + }, + { + "epoch": 0.83, + "grad_norm": 5.5733740191522685, + "learning_rate": 1.3235107983607143e-05, + "loss": 1.4982, + "step": 68739 + }, + { + "epoch": 0.83, + "grad_norm": 2.1703451616049834, + "learning_rate": 1.323455521452794e-05, + "loss": 1.4622, + "step": 68742 + }, + { + "epoch": 0.83, + "grad_norm": 5.669505573227329, + "learning_rate": 1.3234002434410362e-05, + "loss": 1.1452, + "step": 68745 + }, + { + "epoch": 0.83, + "grad_norm": 11.134929112811113, + "learning_rate": 1.32334496432563e-05, + "loss": 1.6766, + "step": 68748 + }, + { + "epoch": 0.83, + "grad_norm": 12.195765933536203, + "learning_rate": 1.3232896841067637e-05, + "loss": 1.2681, + "step": 68751 + }, + { + "epoch": 0.83, + "grad_norm": 6.365703033609398, + "learning_rate": 1.3232344027846265e-05, + "loss": 1.4118, + "step": 68754 + }, + { + "epoch": 0.83, + "grad_norm": 32.386183267991214, + "learning_rate": 1.3231791203594064e-05, + "loss": 1.0494, + "step": 68757 + }, + { + "epoch": 0.83, + "grad_norm": 17.34880936989957, + "learning_rate": 1.3231238368312926e-05, + "loss": 1.2165, + "step": 68760 + }, + { + "epoch": 0.83, + "grad_norm": 17.28018246926369, + "learning_rate": 1.3230685522004734e-05, + "loss": 1.3942, + "step": 68763 + }, + { + "epoch": 0.83, + "grad_norm": 16.024458723179556, + "learning_rate": 1.3230132664671375e-05, + "loss": 1.3249, + "step": 68766 + }, + { + "epoch": 0.83, + "grad_norm": 12.221110911515956, + "learning_rate": 1.3229579796314739e-05, + "loss": 1.0439, + "step": 68769 + }, + { + "epoch": 0.83, + "grad_norm": 5.611978861192867, + "learning_rate": 1.3229026916936709e-05, + "loss": 1.0773, + "step": 68772 + }, + { + "epoch": 0.83, + "grad_norm": 27.778100402282053, + "learning_rate": 1.3228474026539173e-05, + "loss": 1.2429, + "step": 68775 + }, + { + "epoch": 0.83, + "grad_norm": 10.170158705614275, + "learning_rate": 1.3227921125124019e-05, + "loss": 1.0593, + "step": 68778 + }, + { + "epoch": 0.83, + "grad_norm": 3.9440929505418465, + "learning_rate": 1.3227368212693135e-05, + "loss": 1.2551, + "step": 68781 + }, + { + "epoch": 0.83, + "grad_norm": 6.5188283900722706, + "learning_rate": 1.32268152892484e-05, + "loss": 1.4594, + "step": 68784 + }, + { + "epoch": 0.83, + "grad_norm": 16.40401249117463, + "learning_rate": 1.322626235479171e-05, + "loss": 1.4636, + "step": 68787 + }, + { + "epoch": 0.83, + "grad_norm": 73.87584321062333, + "learning_rate": 1.3225709409324949e-05, + "loss": 1.286, + "step": 68790 + }, + { + "epoch": 0.83, + "grad_norm": 6.386048610348065, + "learning_rate": 1.3225156452850004e-05, + "loss": 1.352, + "step": 68793 + }, + { + "epoch": 0.83, + "grad_norm": 9.179101572510012, + "learning_rate": 1.3224603485368761e-05, + "loss": 1.3361, + "step": 68796 + }, + { + "epoch": 0.83, + "grad_norm": 21.033251508691183, + "learning_rate": 1.3224050506883109e-05, + "loss": 1.5611, + "step": 68799 + }, + { + "epoch": 0.83, + "grad_norm": 10.696344108566311, + "learning_rate": 1.3223497517394932e-05, + "loss": 1.204, + "step": 68802 + }, + { + "epoch": 0.83, + "grad_norm": 37.24407313520587, + "learning_rate": 1.322294451690612e-05, + "loss": 1.1206, + "step": 68805 + }, + { + "epoch": 0.83, + "grad_norm": 21.518984102445472, + "learning_rate": 1.3222391505418557e-05, + "loss": 1.2869, + "step": 68808 + }, + { + "epoch": 0.83, + "grad_norm": 17.441899257700065, + "learning_rate": 1.3221838482934137e-05, + "loss": 1.2509, + "step": 68811 + }, + { + "epoch": 0.83, + "grad_norm": 5.015767943382486, + "learning_rate": 1.3221285449454739e-05, + "loss": 1.3503, + "step": 68814 + }, + { + "epoch": 0.83, + "grad_norm": 33.07890088402602, + "learning_rate": 1.3220732404982256e-05, + "loss": 1.7612, + "step": 68817 + }, + { + "epoch": 0.83, + "grad_norm": 9.847377542996771, + "learning_rate": 1.3220179349518571e-05, + "loss": 1.4523, + "step": 68820 + }, + { + "epoch": 0.83, + "grad_norm": 14.724384978283792, + "learning_rate": 1.321962628306558e-05, + "loss": 1.2362, + "step": 68823 + }, + { + "epoch": 0.83, + "grad_norm": 9.452993941198903, + "learning_rate": 1.3219073205625157e-05, + "loss": 1.3682, + "step": 68826 + }, + { + "epoch": 0.83, + "grad_norm": 12.936812271625156, + "learning_rate": 1.32185201171992e-05, + "loss": 1.7653, + "step": 68829 + }, + { + "epoch": 0.83, + "grad_norm": 18.09473184407072, + "learning_rate": 1.3217967017789592e-05, + "loss": 1.1856, + "step": 68832 + }, + { + "epoch": 0.83, + "grad_norm": 4.01261285255407, + "learning_rate": 1.3217413907398223e-05, + "loss": 1.3491, + "step": 68835 + }, + { + "epoch": 0.83, + "grad_norm": 8.076068715034607, + "learning_rate": 1.3216860786026977e-05, + "loss": 1.2986, + "step": 68838 + }, + { + "epoch": 0.83, + "grad_norm": 9.441952400460544, + "learning_rate": 1.321630765367775e-05, + "loss": 1.0933, + "step": 68841 + }, + { + "epoch": 0.83, + "grad_norm": 6.523116989732859, + "learning_rate": 1.3215754510352417e-05, + "loss": 1.2193, + "step": 68844 + }, + { + "epoch": 0.83, + "grad_norm": 6.7831017060382806, + "learning_rate": 1.3215201356052872e-05, + "loss": 1.8157, + "step": 68847 + }, + { + "epoch": 0.83, + "grad_norm": 16.44997482269884, + "learning_rate": 1.3214648190781005e-05, + "loss": 1.4889, + "step": 68850 + }, + { + "epoch": 0.83, + "grad_norm": 8.592486701212476, + "learning_rate": 1.3214095014538701e-05, + "loss": 1.6051, + "step": 68853 + }, + { + "epoch": 0.83, + "grad_norm": 10.952390063042008, + "learning_rate": 1.321354182732785e-05, + "loss": 1.1861, + "step": 68856 + }, + { + "epoch": 0.83, + "grad_norm": 12.188106866259135, + "learning_rate": 1.3212988629150336e-05, + "loss": 1.3017, + "step": 68859 + }, + { + "epoch": 0.83, + "grad_norm": 13.904268291182936, + "learning_rate": 1.3212435420008051e-05, + "loss": 1.2282, + "step": 68862 + }, + { + "epoch": 0.83, + "grad_norm": 13.735283926529698, + "learning_rate": 1.3211882199902881e-05, + "loss": 1.2735, + "step": 68865 + }, + { + "epoch": 0.83, + "grad_norm": 13.389479601183616, + "learning_rate": 1.3211328968836714e-05, + "loss": 1.3908, + "step": 68868 + }, + { + "epoch": 0.83, + "grad_norm": 9.395874008094124, + "learning_rate": 1.3210775726811436e-05, + "loss": 1.3921, + "step": 68871 + }, + { + "epoch": 0.83, + "grad_norm": 6.039574786192382, + "learning_rate": 1.321022247382894e-05, + "loss": 1.3835, + "step": 68874 + }, + { + "epoch": 0.83, + "grad_norm": 4.046758356596412, + "learning_rate": 1.3209669209891107e-05, + "loss": 1.3589, + "step": 68877 + }, + { + "epoch": 0.83, + "grad_norm": 10.754747720811107, + "learning_rate": 1.3209115934999832e-05, + "loss": 1.4245, + "step": 68880 + }, + { + "epoch": 0.83, + "grad_norm": 23.766275432076313, + "learning_rate": 1.3208562649157e-05, + "loss": 1.897, + "step": 68883 + }, + { + "epoch": 0.83, + "grad_norm": 33.016282178029655, + "learning_rate": 1.3208009352364499e-05, + "loss": 1.5393, + "step": 68886 + }, + { + "epoch": 0.83, + "grad_norm": 21.269218236105868, + "learning_rate": 1.3207456044624218e-05, + "loss": 1.2772, + "step": 68889 + }, + { + "epoch": 0.83, + "grad_norm": 75.33050998273191, + "learning_rate": 1.3206902725938042e-05, + "loss": 1.2889, + "step": 68892 + }, + { + "epoch": 0.83, + "grad_norm": 9.470710809819003, + "learning_rate": 1.3206349396307868e-05, + "loss": 1.3296, + "step": 68895 + }, + { + "epoch": 0.83, + "grad_norm": 18.573841628855057, + "learning_rate": 1.3205796055735573e-05, + "loss": 1.1117, + "step": 68898 + }, + { + "epoch": 0.83, + "grad_norm": 22.618546540049653, + "learning_rate": 1.3205242704223052e-05, + "loss": 1.3142, + "step": 68901 + }, + { + "epoch": 0.83, + "grad_norm": 6.179150577459071, + "learning_rate": 1.3204689341772193e-05, + "loss": 1.7431, + "step": 68904 + }, + { + "epoch": 0.83, + "grad_norm": 16.295236699636156, + "learning_rate": 1.3204135968384883e-05, + "loss": 1.3487, + "step": 68907 + }, + { + "epoch": 0.83, + "grad_norm": 4.504143214738483, + "learning_rate": 1.3203582584063015e-05, + "loss": 1.7721, + "step": 68910 + }, + { + "epoch": 0.83, + "grad_norm": 4.163678720490402, + "learning_rate": 1.3203029188808467e-05, + "loss": 1.4004, + "step": 68913 + }, + { + "epoch": 0.83, + "grad_norm": 53.717443072008585, + "learning_rate": 1.3202475782623142e-05, + "loss": 1.564, + "step": 68916 + }, + { + "epoch": 0.83, + "grad_norm": 3.6500244844925835, + "learning_rate": 1.3201922365508913e-05, + "loss": 1.3221, + "step": 68919 + }, + { + "epoch": 0.83, + "grad_norm": 16.83131061460115, + "learning_rate": 1.320136893746768e-05, + "loss": 1.5753, + "step": 68922 + }, + { + "epoch": 0.83, + "grad_norm": 12.347959505040148, + "learning_rate": 1.3200815498501324e-05, + "loss": 1.1697, + "step": 68925 + }, + { + "epoch": 0.83, + "grad_norm": 30.80398871942965, + "learning_rate": 1.3200262048611743e-05, + "loss": 1.1548, + "step": 68928 + }, + { + "epoch": 0.83, + "grad_norm": 11.250720876791776, + "learning_rate": 1.319970858780082e-05, + "loss": 1.459, + "step": 68931 + }, + { + "epoch": 0.83, + "grad_norm": 16.7116593917066, + "learning_rate": 1.3199155116070438e-05, + "loss": 1.6362, + "step": 68934 + }, + { + "epoch": 0.83, + "grad_norm": 18.91612768260169, + "learning_rate": 1.31986016334225e-05, + "loss": 1.2881, + "step": 68937 + }, + { + "epoch": 0.83, + "grad_norm": 8.05375103829415, + "learning_rate": 1.3198048139858881e-05, + "loss": 1.6831, + "step": 68940 + }, + { + "epoch": 0.83, + "grad_norm": 25.53713155152485, + "learning_rate": 1.3197494635381478e-05, + "loss": 1.2917, + "step": 68943 + }, + { + "epoch": 0.83, + "grad_norm": 7.361389984886002, + "learning_rate": 1.3196941119992178e-05, + "loss": 1.2389, + "step": 68946 + }, + { + "epoch": 0.83, + "grad_norm": 10.220779101238401, + "learning_rate": 1.319638759369287e-05, + "loss": 1.3789, + "step": 68949 + }, + { + "epoch": 0.83, + "grad_norm": 8.362127272745463, + "learning_rate": 1.3195834056485439e-05, + "loss": 1.5142, + "step": 68952 + }, + { + "epoch": 0.83, + "grad_norm": 5.278221240716144, + "learning_rate": 1.3195280508371778e-05, + "loss": 1.2953, + "step": 68955 + }, + { + "epoch": 0.83, + "grad_norm": 90.31919868516167, + "learning_rate": 1.319472694935378e-05, + "loss": 1.2635, + "step": 68958 + }, + { + "epoch": 0.83, + "grad_norm": 47.84943521956235, + "learning_rate": 1.3194173379433326e-05, + "loss": 1.8637, + "step": 68961 + }, + { + "epoch": 0.83, + "grad_norm": 15.084482622874452, + "learning_rate": 1.319361979861231e-05, + "loss": 1.7575, + "step": 68964 + }, + { + "epoch": 0.83, + "grad_norm": 24.646427179149274, + "learning_rate": 1.3193066206892618e-05, + "loss": 1.3351, + "step": 68967 + }, + { + "epoch": 0.83, + "grad_norm": 27.098103278737703, + "learning_rate": 1.3192512604276146e-05, + "loss": 1.4762, + "step": 68970 + }, + { + "epoch": 0.83, + "grad_norm": 12.585333566238006, + "learning_rate": 1.3191958990764772e-05, + "loss": 1.3728, + "step": 68973 + }, + { + "epoch": 0.83, + "grad_norm": 5.461781478943331, + "learning_rate": 1.3191405366360397e-05, + "loss": 1.6366, + "step": 68976 + }, + { + "epoch": 0.83, + "grad_norm": 9.776540900097729, + "learning_rate": 1.3190851731064904e-05, + "loss": 1.795, + "step": 68979 + }, + { + "epoch": 0.83, + "grad_norm": 19.264513213339132, + "learning_rate": 1.319029808488018e-05, + "loss": 1.1837, + "step": 68982 + }, + { + "epoch": 0.83, + "grad_norm": 28.38227135764784, + "learning_rate": 1.318974442780812e-05, + "loss": 1.5409, + "step": 68985 + }, + { + "epoch": 0.83, + "grad_norm": 21.17423667347356, + "learning_rate": 1.3189190759850609e-05, + "loss": 1.5558, + "step": 68988 + }, + { + "epoch": 0.83, + "grad_norm": 14.92029912222824, + "learning_rate": 1.3188637081009546e-05, + "loss": 1.1127, + "step": 68991 + }, + { + "epoch": 0.83, + "grad_norm": 16.331085515932866, + "learning_rate": 1.3188083391286806e-05, + "loss": 1.4904, + "step": 68994 + }, + { + "epoch": 0.83, + "grad_norm": 22.286437399847085, + "learning_rate": 1.3187529690684289e-05, + "loss": 1.2267, + "step": 68997 + }, + { + "epoch": 0.83, + "grad_norm": 16.559985080245287, + "learning_rate": 1.318697597920388e-05, + "loss": 1.4189, + "step": 69000 + }, + { + "epoch": 0.83, + "grad_norm": 8.96312320556807, + "learning_rate": 1.318642225684747e-05, + "loss": 1.3937, + "step": 69003 + }, + { + "epoch": 0.83, + "grad_norm": 10.954312566905537, + "learning_rate": 1.3185868523616946e-05, + "loss": 1.6494, + "step": 69006 + }, + { + "epoch": 0.83, + "grad_norm": 7.84284169268949, + "learning_rate": 1.3185314779514202e-05, + "loss": 1.2908, + "step": 69009 + }, + { + "epoch": 0.83, + "grad_norm": 12.006378499103171, + "learning_rate": 1.318476102454113e-05, + "loss": 1.4517, + "step": 69012 + }, + { + "epoch": 0.83, + "grad_norm": 3.0521996190368417, + "learning_rate": 1.3184207258699613e-05, + "loss": 1.4916, + "step": 69015 + }, + { + "epoch": 0.83, + "grad_norm": 7.172646309996135, + "learning_rate": 1.3183653481991542e-05, + "loss": 1.2687, + "step": 69018 + }, + { + "epoch": 0.83, + "grad_norm": 2.836031230893266, + "learning_rate": 1.3183099694418811e-05, + "loss": 1.3557, + "step": 69021 + }, + { + "epoch": 0.83, + "grad_norm": 6.487616644327821, + "learning_rate": 1.3182545895983308e-05, + "loss": 1.4721, + "step": 69024 + }, + { + "epoch": 0.83, + "grad_norm": 5.746506151926705, + "learning_rate": 1.3181992086686917e-05, + "loss": 1.289, + "step": 69027 + }, + { + "epoch": 0.83, + "grad_norm": 16.92717970275095, + "learning_rate": 1.3181438266531537e-05, + "loss": 1.2317, + "step": 69030 + }, + { + "epoch": 0.83, + "grad_norm": 11.524946453918583, + "learning_rate": 1.3180884435519055e-05, + "loss": 1.3968, + "step": 69033 + }, + { + "epoch": 0.83, + "grad_norm": 7.487565944409895, + "learning_rate": 1.3180330593651356e-05, + "loss": 1.4586, + "step": 69036 + }, + { + "epoch": 0.83, + "grad_norm": 5.087382099364869, + "learning_rate": 1.3179776740930339e-05, + "loss": 1.2336, + "step": 69039 + }, + { + "epoch": 0.83, + "grad_norm": 23.142522939247463, + "learning_rate": 1.3179222877357886e-05, + "loss": 1.3104, + "step": 69042 + }, + { + "epoch": 0.83, + "grad_norm": 15.76853464180265, + "learning_rate": 1.3178669002935894e-05, + "loss": 1.487, + "step": 69045 + }, + { + "epoch": 0.83, + "grad_norm": 14.841611951899969, + "learning_rate": 1.3178115117666245e-05, + "loss": 1.4266, + "step": 69048 + }, + { + "epoch": 0.83, + "grad_norm": 14.823618334157246, + "learning_rate": 1.3177561221550836e-05, + "loss": 1.6281, + "step": 69051 + }, + { + "epoch": 0.83, + "grad_norm": 49.083863029599684, + "learning_rate": 1.3177007314591557e-05, + "loss": 1.436, + "step": 69054 + }, + { + "epoch": 0.83, + "grad_norm": 50.2598058662714, + "learning_rate": 1.3176453396790297e-05, + "loss": 1.3526, + "step": 69057 + }, + { + "epoch": 0.83, + "grad_norm": 25.171489255545943, + "learning_rate": 1.3175899468148941e-05, + "loss": 1.2195, + "step": 69060 + }, + { + "epoch": 0.83, + "grad_norm": 3.113817155113126, + "learning_rate": 1.3175345528669384e-05, + "loss": 1.278, + "step": 69063 + }, + { + "epoch": 0.83, + "grad_norm": 6.600124768827967, + "learning_rate": 1.317479157835352e-05, + "loss": 1.5645, + "step": 69066 + }, + { + "epoch": 0.83, + "grad_norm": 8.62843347267528, + "learning_rate": 1.3174237617203237e-05, + "loss": 1.1965, + "step": 69069 + }, + { + "epoch": 0.83, + "grad_norm": 10.699302674711888, + "learning_rate": 1.3173683645220421e-05, + "loss": 1.3315, + "step": 69072 + }, + { + "epoch": 0.83, + "grad_norm": 9.291549168891942, + "learning_rate": 1.3173129662406967e-05, + "loss": 1.4131, + "step": 69075 + }, + { + "epoch": 0.83, + "grad_norm": 125.20790160127925, + "learning_rate": 1.3172575668764767e-05, + "loss": 1.6941, + "step": 69078 + }, + { + "epoch": 0.83, + "grad_norm": 4.861202934090306, + "learning_rate": 1.3172021664295702e-05, + "loss": 1.2047, + "step": 69081 + }, + { + "epoch": 0.83, + "grad_norm": 9.938138871481083, + "learning_rate": 1.3171467649001676e-05, + "loss": 1.6993, + "step": 69084 + }, + { + "epoch": 0.83, + "grad_norm": 10.432126881647948, + "learning_rate": 1.3170913622884572e-05, + "loss": 1.3704, + "step": 69087 + }, + { + "epoch": 0.83, + "grad_norm": 14.544839604415147, + "learning_rate": 1.317035958594628e-05, + "loss": 1.0294, + "step": 69090 + }, + { + "epoch": 0.83, + "grad_norm": 21.30487787322942, + "learning_rate": 1.3169805538188696e-05, + "loss": 1.0269, + "step": 69093 + }, + { + "epoch": 0.83, + "grad_norm": 4.784454178454685, + "learning_rate": 1.3169251479613704e-05, + "loss": 1.9996, + "step": 69096 + }, + { + "epoch": 0.83, + "grad_norm": 34.706280322709105, + "learning_rate": 1.3168697410223201e-05, + "loss": 1.6153, + "step": 69099 + }, + { + "epoch": 0.83, + "grad_norm": 8.968172180693363, + "learning_rate": 1.3168143330019072e-05, + "loss": 1.1379, + "step": 69102 + }, + { + "epoch": 0.83, + "grad_norm": 9.999812385900434, + "learning_rate": 1.3167589239003213e-05, + "loss": 1.5355, + "step": 69105 + }, + { + "epoch": 0.83, + "grad_norm": 6.213874022277811, + "learning_rate": 1.3167035137177515e-05, + "loss": 1.1442, + "step": 69108 + }, + { + "epoch": 0.83, + "grad_norm": 32.486140551862015, + "learning_rate": 1.3166481024543865e-05, + "loss": 1.3578, + "step": 69111 + }, + { + "epoch": 0.83, + "grad_norm": 32.98836240781974, + "learning_rate": 1.3165926901104154e-05, + "loss": 1.1248, + "step": 69114 + }, + { + "epoch": 0.83, + "grad_norm": 21.91135256584199, + "learning_rate": 1.3165372766860275e-05, + "loss": 1.0016, + "step": 69117 + }, + { + "epoch": 0.83, + "grad_norm": 9.28270693830942, + "learning_rate": 1.3164818621814121e-05, + "loss": 1.0843, + "step": 69120 + }, + { + "epoch": 0.83, + "grad_norm": 3.614757399561793, + "learning_rate": 1.316426446596758e-05, + "loss": 0.9743, + "step": 69123 + }, + { + "epoch": 0.83, + "grad_norm": 20.98980543141366, + "learning_rate": 1.3163710299322543e-05, + "loss": 1.3794, + "step": 69126 + }, + { + "epoch": 0.83, + "grad_norm": 5.653111251730774, + "learning_rate": 1.3163156121880905e-05, + "loss": 1.2122, + "step": 69129 + }, + { + "epoch": 0.83, + "grad_norm": 47.55241672306585, + "learning_rate": 1.3162601933644554e-05, + "loss": 1.2628, + "step": 69132 + }, + { + "epoch": 0.83, + "grad_norm": 12.038965189661116, + "learning_rate": 1.3162047734615379e-05, + "loss": 1.1942, + "step": 69135 + }, + { + "epoch": 0.83, + "grad_norm": 9.117456870800014, + "learning_rate": 1.3161493524795277e-05, + "loss": 1.3895, + "step": 69138 + }, + { + "epoch": 0.83, + "grad_norm": 20.082122036588096, + "learning_rate": 1.3160939304186136e-05, + "loss": 1.5073, + "step": 69141 + }, + { + "epoch": 0.83, + "grad_norm": 12.104743117363974, + "learning_rate": 1.3160385072789845e-05, + "loss": 1.1296, + "step": 69144 + }, + { + "epoch": 0.83, + "grad_norm": 5.422707439182116, + "learning_rate": 1.31598308306083e-05, + "loss": 1.141, + "step": 69147 + }, + { + "epoch": 0.83, + "grad_norm": 13.818871315063005, + "learning_rate": 1.3159276577643392e-05, + "loss": 1.4448, + "step": 69150 + }, + { + "epoch": 0.83, + "grad_norm": 12.833422387627735, + "learning_rate": 1.315872231389701e-05, + "loss": 1.3367, + "step": 69153 + }, + { + "epoch": 0.83, + "grad_norm": 11.375943161018467, + "learning_rate": 1.3158168039371045e-05, + "loss": 1.1434, + "step": 69156 + }, + { + "epoch": 0.83, + "grad_norm": 8.562913160153053, + "learning_rate": 1.3157613754067393e-05, + "loss": 1.0147, + "step": 69159 + }, + { + "epoch": 0.83, + "grad_norm": 31.091301082222902, + "learning_rate": 1.3157059457987939e-05, + "loss": 1.0091, + "step": 69162 + }, + { + "epoch": 0.83, + "grad_norm": 10.15537545473495, + "learning_rate": 1.3156505151134582e-05, + "loss": 1.2993, + "step": 69165 + }, + { + "epoch": 0.83, + "grad_norm": 10.044057096254372, + "learning_rate": 1.3155950833509206e-05, + "loss": 1.5929, + "step": 69168 + }, + { + "epoch": 0.83, + "grad_norm": 7.150875643308427, + "learning_rate": 1.3155396505113709e-05, + "loss": 1.4883, + "step": 69171 + }, + { + "epoch": 0.83, + "grad_norm": 6.898035255955166, + "learning_rate": 1.3154842165949982e-05, + "loss": 1.1963, + "step": 69174 + }, + { + "epoch": 0.83, + "grad_norm": 19.892164901706458, + "learning_rate": 1.315428781601991e-05, + "loss": 1.4032, + "step": 69177 + }, + { + "epoch": 0.83, + "grad_norm": 11.067735539634315, + "learning_rate": 1.3153733455325393e-05, + "loss": 1.5438, + "step": 69180 + }, + { + "epoch": 0.83, + "grad_norm": 15.500464020480813, + "learning_rate": 1.315317908386832e-05, + "loss": 1.172, + "step": 69183 + }, + { + "epoch": 0.83, + "grad_norm": 13.914456547291223, + "learning_rate": 1.3152624701650583e-05, + "loss": 1.5549, + "step": 69186 + }, + { + "epoch": 0.83, + "grad_norm": 9.136157659751207, + "learning_rate": 1.3152070308674071e-05, + "loss": 1.2414, + "step": 69189 + }, + { + "epoch": 0.83, + "grad_norm": 19.33643378939285, + "learning_rate": 1.3151515904940682e-05, + "loss": 1.7122, + "step": 69192 + }, + { + "epoch": 0.83, + "grad_norm": 4.089059152474941, + "learning_rate": 1.31509614904523e-05, + "loss": 1.464, + "step": 69195 + }, + { + "epoch": 0.83, + "grad_norm": 11.537213230764817, + "learning_rate": 1.3150407065210825e-05, + "loss": 1.6815, + "step": 69198 + }, + { + "epoch": 0.83, + "grad_norm": 11.96883681403586, + "learning_rate": 1.3149852629218143e-05, + "loss": 1.2992, + "step": 69201 + }, + { + "epoch": 0.83, + "grad_norm": 34.9747556504778, + "learning_rate": 1.3149298182476151e-05, + "loss": 1.5939, + "step": 69204 + }, + { + "epoch": 0.83, + "grad_norm": 12.531226380305156, + "learning_rate": 1.3148743724986739e-05, + "loss": 1.4252, + "step": 69207 + }, + { + "epoch": 0.83, + "grad_norm": 5.159197181388223, + "learning_rate": 1.3148189256751796e-05, + "loss": 1.2991, + "step": 69210 + }, + { + "epoch": 0.83, + "grad_norm": 26.22691410532794, + "learning_rate": 1.3147634777773217e-05, + "loss": 1.6134, + "step": 69213 + }, + { + "epoch": 0.83, + "grad_norm": 2.1000703498320097, + "learning_rate": 1.3147080288052895e-05, + "loss": 1.4701, + "step": 69216 + }, + { + "epoch": 0.83, + "grad_norm": 7.632047215992011, + "learning_rate": 1.3146525787592724e-05, + "loss": 1.0604, + "step": 69219 + }, + { + "epoch": 0.83, + "grad_norm": 10.758798095993926, + "learning_rate": 1.314597127639459e-05, + "loss": 1.6885, + "step": 69222 + }, + { + "epoch": 0.83, + "grad_norm": 7.221412469580031, + "learning_rate": 1.3145416754460391e-05, + "loss": 1.3721, + "step": 69225 + }, + { + "epoch": 0.83, + "grad_norm": 11.462292984533093, + "learning_rate": 1.314486222179202e-05, + "loss": 1.2506, + "step": 69228 + }, + { + "epoch": 0.83, + "grad_norm": 10.816682579906416, + "learning_rate": 1.3144307678391362e-05, + "loss": 1.8364, + "step": 69231 + }, + { + "epoch": 0.83, + "grad_norm": 17.821829293321333, + "learning_rate": 1.3143753124260319e-05, + "loss": 1.1361, + "step": 69234 + }, + { + "epoch": 0.83, + "grad_norm": 4.340655049604236, + "learning_rate": 1.3143198559400778e-05, + "loss": 1.296, + "step": 69237 + }, + { + "epoch": 0.83, + "grad_norm": 32.99081246106664, + "learning_rate": 1.314264398381463e-05, + "loss": 1.4897, + "step": 69240 + }, + { + "epoch": 0.83, + "grad_norm": 9.059405812995928, + "learning_rate": 1.3142089397503772e-05, + "loss": 1.2846, + "step": 69243 + }, + { + "epoch": 0.83, + "grad_norm": 9.537488210136942, + "learning_rate": 1.3141534800470095e-05, + "loss": 1.3962, + "step": 69246 + }, + { + "epoch": 0.83, + "grad_norm": 2.9409724642013533, + "learning_rate": 1.3140980192715491e-05, + "loss": 1.3973, + "step": 69249 + }, + { + "epoch": 0.83, + "grad_norm": 10.229510008245267, + "learning_rate": 1.314042557424185e-05, + "loss": 1.1797, + "step": 69252 + }, + { + "epoch": 0.83, + "grad_norm": 3.852175119379856, + "learning_rate": 1.3139870945051072e-05, + "loss": 1.3663, + "step": 69255 + }, + { + "epoch": 0.83, + "grad_norm": 15.810982727689193, + "learning_rate": 1.3139316305145044e-05, + "loss": 1.2357, + "step": 69258 + }, + { + "epoch": 0.83, + "grad_norm": 23.51604624680312, + "learning_rate": 1.313876165452566e-05, + "loss": 1.3724, + "step": 69261 + }, + { + "epoch": 0.83, + "grad_norm": 8.562262311586515, + "learning_rate": 1.3138206993194813e-05, + "loss": 1.1321, + "step": 69264 + }, + { + "epoch": 0.83, + "grad_norm": 20.526508565090058, + "learning_rate": 1.3137652321154395e-05, + "loss": 1.4697, + "step": 69267 + }, + { + "epoch": 0.83, + "grad_norm": 20.032297948106308, + "learning_rate": 1.3137097638406301e-05, + "loss": 1.2303, + "step": 69270 + }, + { + "epoch": 0.83, + "grad_norm": 26.345948974060327, + "learning_rate": 1.3136542944952424e-05, + "loss": 1.3748, + "step": 69273 + }, + { + "epoch": 0.83, + "grad_norm": 5.851940508647357, + "learning_rate": 1.3135988240794653e-05, + "loss": 1.3403, + "step": 69276 + }, + { + "epoch": 0.83, + "grad_norm": 4.993494808960485, + "learning_rate": 1.3135433525934886e-05, + "loss": 1.01, + "step": 69279 + }, + { + "epoch": 0.83, + "grad_norm": 14.365897559135332, + "learning_rate": 1.3134878800375013e-05, + "loss": 1.1093, + "step": 69282 + }, + { + "epoch": 0.83, + "grad_norm": 29.62163907482614, + "learning_rate": 1.3134324064116926e-05, + "loss": 1.4849, + "step": 69285 + }, + { + "epoch": 0.83, + "grad_norm": 5.4065602687238545, + "learning_rate": 1.3133769317162522e-05, + "loss": 1.3334, + "step": 69288 + }, + { + "epoch": 0.83, + "grad_norm": 22.253757372769083, + "learning_rate": 1.3133214559513691e-05, + "loss": 1.2302, + "step": 69291 + }, + { + "epoch": 0.83, + "grad_norm": 5.53161532208283, + "learning_rate": 1.313265979117233e-05, + "loss": 0.9645, + "step": 69294 + }, + { + "epoch": 0.83, + "grad_norm": 9.280713061416753, + "learning_rate": 1.3132105012140326e-05, + "loss": 1.6029, + "step": 69297 + }, + { + "epoch": 0.83, + "grad_norm": 5.644968379721853, + "learning_rate": 1.3131550222419578e-05, + "loss": 1.6387, + "step": 69300 + }, + { + "epoch": 0.83, + "grad_norm": 12.550888992262495, + "learning_rate": 1.3130995422011977e-05, + "loss": 1.1731, + "step": 69303 + }, + { + "epoch": 0.83, + "grad_norm": 10.923982497296333, + "learning_rate": 1.3130440610919413e-05, + "loss": 1.5718, + "step": 69306 + }, + { + "epoch": 0.83, + "grad_norm": 7.159078764714523, + "learning_rate": 1.3129885789143787e-05, + "loss": 1.4901, + "step": 69309 + }, + { + "epoch": 0.83, + "grad_norm": 9.116101990856102, + "learning_rate": 1.3129330956686985e-05, + "loss": 1.6087, + "step": 69312 + }, + { + "epoch": 0.83, + "grad_norm": 15.901596748906256, + "learning_rate": 1.3128776113550907e-05, + "loss": 1.3278, + "step": 69315 + }, + { + "epoch": 0.83, + "grad_norm": 5.007992718449669, + "learning_rate": 1.3128221259737439e-05, + "loss": 1.3367, + "step": 69318 + }, + { + "epoch": 0.83, + "grad_norm": 8.95886001466827, + "learning_rate": 1.3127666395248482e-05, + "loss": 1.2153, + "step": 69321 + }, + { + "epoch": 0.83, + "grad_norm": 27.40579025458894, + "learning_rate": 1.3127111520085926e-05, + "loss": 1.4339, + "step": 69324 + }, + { + "epoch": 0.83, + "grad_norm": 10.293168963052265, + "learning_rate": 1.3126556634251664e-05, + "loss": 1.4667, + "step": 69327 + }, + { + "epoch": 0.83, + "grad_norm": 5.896403610957726, + "learning_rate": 1.312600173774759e-05, + "loss": 1.2245, + "step": 69330 + }, + { + "epoch": 0.83, + "grad_norm": 4.329624641926572, + "learning_rate": 1.3125446830575599e-05, + "loss": 1.4894, + "step": 69333 + }, + { + "epoch": 0.83, + "grad_norm": 33.436317041168806, + "learning_rate": 1.3124891912737583e-05, + "loss": 1.1779, + "step": 69336 + }, + { + "epoch": 0.83, + "grad_norm": 4.346119263084808, + "learning_rate": 1.3124336984235435e-05, + "loss": 1.291, + "step": 69339 + }, + { + "epoch": 0.83, + "grad_norm": 3.8696618555272924, + "learning_rate": 1.3123782045071052e-05, + "loss": 1.2857, + "step": 69342 + }, + { + "epoch": 0.83, + "grad_norm": 11.628951343731297, + "learning_rate": 1.3123227095246325e-05, + "loss": 1.1761, + "step": 69345 + }, + { + "epoch": 0.83, + "grad_norm": 10.551978257622167, + "learning_rate": 1.3122672134763153e-05, + "loss": 1.0587, + "step": 69348 + }, + { + "epoch": 0.83, + "grad_norm": 10.697070199808097, + "learning_rate": 1.312211716362342e-05, + "loss": 1.1575, + "step": 69351 + }, + { + "epoch": 0.83, + "grad_norm": 5.251726180949287, + "learning_rate": 1.312156218182903e-05, + "loss": 1.3787, + "step": 69354 + }, + { + "epoch": 0.83, + "grad_norm": 7.191135533764286, + "learning_rate": 1.3121007189381869e-05, + "loss": 1.1848, + "step": 69357 + }, + { + "epoch": 0.83, + "grad_norm": 58.962045644744705, + "learning_rate": 1.3120452186283836e-05, + "loss": 1.1601, + "step": 69360 + }, + { + "epoch": 0.83, + "grad_norm": 4.1323568804573, + "learning_rate": 1.3119897172536825e-05, + "loss": 1.7127, + "step": 69363 + }, + { + "epoch": 0.83, + "grad_norm": 28.001234052812894, + "learning_rate": 1.3119342148142728e-05, + "loss": 1.4671, + "step": 69366 + }, + { + "epoch": 0.83, + "grad_norm": 24.200622747353115, + "learning_rate": 1.3118787113103442e-05, + "loss": 1.2131, + "step": 69369 + }, + { + "epoch": 0.83, + "grad_norm": 2.608795010892271, + "learning_rate": 1.3118232067420853e-05, + "loss": 1.0698, + "step": 69372 + }, + { + "epoch": 0.83, + "grad_norm": 8.549555247670986, + "learning_rate": 1.3117677011096867e-05, + "loss": 1.4584, + "step": 69375 + }, + { + "epoch": 0.83, + "grad_norm": 567.390039318718, + "learning_rate": 1.3117121944133371e-05, + "loss": 1.5365, + "step": 69378 + }, + { + "epoch": 0.83, + "grad_norm": 27.831359946510712, + "learning_rate": 1.3116566866532258e-05, + "loss": 1.3017, + "step": 69381 + }, + { + "epoch": 0.83, + "grad_norm": 20.176808307287125, + "learning_rate": 1.3116011778295425e-05, + "loss": 1.3863, + "step": 69384 + }, + { + "epoch": 0.83, + "grad_norm": 7.37605464927193, + "learning_rate": 1.311545667942477e-05, + "loss": 1.4376, + "step": 69387 + }, + { + "epoch": 0.83, + "grad_norm": 9.58730207135586, + "learning_rate": 1.311490156992218e-05, + "loss": 1.5487, + "step": 69390 + }, + { + "epoch": 0.83, + "grad_norm": 27.770593987653985, + "learning_rate": 1.3114346449789555e-05, + "loss": 1.1094, + "step": 69393 + }, + { + "epoch": 0.83, + "grad_norm": 7.149190715162022, + "learning_rate": 1.3113791319028788e-05, + "loss": 1.2151, + "step": 69396 + }, + { + "epoch": 0.83, + "grad_norm": 16.22439929209489, + "learning_rate": 1.3113236177641771e-05, + "loss": 1.7523, + "step": 69399 + }, + { + "epoch": 0.83, + "grad_norm": 19.97487569724974, + "learning_rate": 1.31126810256304e-05, + "loss": 1.5358, + "step": 69402 + }, + { + "epoch": 0.83, + "grad_norm": 8.628857723420726, + "learning_rate": 1.311212586299657e-05, + "loss": 1.5497, + "step": 69405 + }, + { + "epoch": 0.83, + "grad_norm": 45.645083983354404, + "learning_rate": 1.311157068974218e-05, + "loss": 1.1146, + "step": 69408 + }, + { + "epoch": 0.83, + "grad_norm": 6.202795208341663, + "learning_rate": 1.3111015505869113e-05, + "loss": 1.4138, + "step": 69411 + }, + { + "epoch": 0.83, + "grad_norm": 9.242390503481317, + "learning_rate": 1.3110460311379272e-05, + "loss": 1.2566, + "step": 69414 + }, + { + "epoch": 0.83, + "grad_norm": 11.640946906343355, + "learning_rate": 1.3109905106274554e-05, + "loss": 1.3054, + "step": 69417 + }, + { + "epoch": 0.83, + "grad_norm": 5.0686994145346045, + "learning_rate": 1.3109349890556848e-05, + "loss": 1.6055, + "step": 69420 + }, + { + "epoch": 0.83, + "grad_norm": 7.4943047090027095, + "learning_rate": 1.310879466422805e-05, + "loss": 1.5034, + "step": 69423 + }, + { + "epoch": 0.83, + "grad_norm": 10.521053669524939, + "learning_rate": 1.3108239427290057e-05, + "loss": 1.4026, + "step": 69426 + }, + { + "epoch": 0.83, + "grad_norm": 20.607010229004036, + "learning_rate": 1.3107684179744763e-05, + "loss": 1.2135, + "step": 69429 + }, + { + "epoch": 0.83, + "grad_norm": 10.07578421275829, + "learning_rate": 1.3107128921594061e-05, + "loss": 1.6028, + "step": 69432 + }, + { + "epoch": 0.83, + "grad_norm": 3.478105902602873, + "learning_rate": 1.3106573652839846e-05, + "loss": 1.4055, + "step": 69435 + }, + { + "epoch": 0.83, + "grad_norm": 49.152698557412506, + "learning_rate": 1.3106018373484017e-05, + "loss": 1.6764, + "step": 69438 + }, + { + "epoch": 0.84, + "grad_norm": 4.29295592016942, + "learning_rate": 1.3105463083528464e-05, + "loss": 1.4127, + "step": 69441 + }, + { + "epoch": 0.84, + "grad_norm": 7.352418796386606, + "learning_rate": 1.3104907782975084e-05, + "loss": 1.2434, + "step": 69444 + }, + { + "epoch": 0.84, + "grad_norm": 41.19229992839416, + "learning_rate": 1.3104352471825773e-05, + "loss": 1.3693, + "step": 69447 + }, + { + "epoch": 0.84, + "grad_norm": 9.33676696735099, + "learning_rate": 1.3103797150082427e-05, + "loss": 1.5256, + "step": 69450 + }, + { + "epoch": 0.84, + "grad_norm": 5.556443566988108, + "learning_rate": 1.3103241817746935e-05, + "loss": 1.2822, + "step": 69453 + }, + { + "epoch": 0.84, + "grad_norm": 10.448781561027582, + "learning_rate": 1.31026864748212e-05, + "loss": 1.6219, + "step": 69456 + }, + { + "epoch": 0.84, + "grad_norm": 18.974183207652114, + "learning_rate": 1.3102131121307111e-05, + "loss": 1.194, + "step": 69459 + }, + { + "epoch": 0.84, + "grad_norm": 15.837788460368746, + "learning_rate": 1.3101575757206569e-05, + "loss": 1.0945, + "step": 69462 + }, + { + "epoch": 0.84, + "grad_norm": 5.642580246698374, + "learning_rate": 1.3101020382521463e-05, + "loss": 1.6361, + "step": 69465 + }, + { + "epoch": 0.84, + "grad_norm": 18.344964981783846, + "learning_rate": 1.310046499725369e-05, + "loss": 1.6014, + "step": 69468 + }, + { + "epoch": 0.84, + "grad_norm": 6.912950287543804, + "learning_rate": 1.3099909601405154e-05, + "loss": 1.5085, + "step": 69471 + }, + { + "epoch": 0.84, + "grad_norm": 4.815498635935818, + "learning_rate": 1.3099354194977736e-05, + "loss": 1.5009, + "step": 69474 + }, + { + "epoch": 0.84, + "grad_norm": 37.24782719104036, + "learning_rate": 1.3098798777973343e-05, + "loss": 1.5431, + "step": 69477 + }, + { + "epoch": 0.84, + "grad_norm": 5.995700232691115, + "learning_rate": 1.3098243350393863e-05, + "loss": 1.1762, + "step": 69480 + }, + { + "epoch": 0.84, + "grad_norm": 6.85737678595407, + "learning_rate": 1.3097687912241197e-05, + "loss": 1.4749, + "step": 69483 + }, + { + "epoch": 0.84, + "grad_norm": 6.156917973504205, + "learning_rate": 1.3097132463517235e-05, + "loss": 1.1389, + "step": 69486 + }, + { + "epoch": 0.84, + "grad_norm": 21.54341357963797, + "learning_rate": 1.3096577004223879e-05, + "loss": 1.5563, + "step": 69489 + }, + { + "epoch": 0.84, + "grad_norm": 15.527620055559346, + "learning_rate": 1.3096021534363019e-05, + "loss": 1.6169, + "step": 69492 + }, + { + "epoch": 0.84, + "grad_norm": 3.0983530340407346, + "learning_rate": 1.309546605393655e-05, + "loss": 1.1743, + "step": 69495 + }, + { + "epoch": 0.84, + "grad_norm": 5.642229880501909, + "learning_rate": 1.3094910562946375e-05, + "loss": 1.4003, + "step": 69498 + }, + { + "epoch": 0.84, + "grad_norm": 6.186948012130336, + "learning_rate": 1.3094355061394381e-05, + "loss": 1.3132, + "step": 69501 + }, + { + "epoch": 0.84, + "grad_norm": 13.452642100028052, + "learning_rate": 1.3093799549282471e-05, + "loss": 1.5663, + "step": 69504 + }, + { + "epoch": 0.84, + "grad_norm": 25.835757784024327, + "learning_rate": 1.3093244026612534e-05, + "loss": 1.3042, + "step": 69507 + }, + { + "epoch": 0.84, + "grad_norm": 45.797365951516326, + "learning_rate": 1.3092688493386475e-05, + "loss": 0.9658, + "step": 69510 + }, + { + "epoch": 0.84, + "grad_norm": 6.831183176076103, + "learning_rate": 1.309213294960618e-05, + "loss": 1.1657, + "step": 69513 + }, + { + "epoch": 0.84, + "grad_norm": 17.258326473813344, + "learning_rate": 1.309157739527355e-05, + "loss": 1.251, + "step": 69516 + }, + { + "epoch": 0.84, + "grad_norm": 5.225822626458428, + "learning_rate": 1.3091021830390477e-05, + "loss": 1.5248, + "step": 69519 + }, + { + "epoch": 0.84, + "grad_norm": 22.97451018987342, + "learning_rate": 1.3090466254958862e-05, + "loss": 1.2141, + "step": 69522 + }, + { + "epoch": 0.84, + "grad_norm": 20.140919987637613, + "learning_rate": 1.3089910668980603e-05, + "loss": 1.3257, + "step": 69525 + }, + { + "epoch": 0.84, + "grad_norm": 5.274320367838162, + "learning_rate": 1.3089355072457588e-05, + "loss": 1.5071, + "step": 69528 + }, + { + "epoch": 0.84, + "grad_norm": 17.20277926427142, + "learning_rate": 1.3088799465391717e-05, + "loss": 1.2784, + "step": 69531 + }, + { + "epoch": 0.84, + "grad_norm": 2.8923715145175533, + "learning_rate": 1.3088243847784888e-05, + "loss": 1.5028, + "step": 69534 + }, + { + "epoch": 0.84, + "grad_norm": 14.81485654441996, + "learning_rate": 1.3087688219638991e-05, + "loss": 1.3264, + "step": 69537 + }, + { + "epoch": 0.84, + "grad_norm": 14.228409650894923, + "learning_rate": 1.3087132580955929e-05, + "loss": 1.1927, + "step": 69540 + }, + { + "epoch": 0.84, + "grad_norm": 18.845898075628824, + "learning_rate": 1.3086576931737596e-05, + "loss": 1.2552, + "step": 69543 + }, + { + "epoch": 0.84, + "grad_norm": 4.8326225383152215, + "learning_rate": 1.3086021271985887e-05, + "loss": 1.6944, + "step": 69546 + }, + { + "epoch": 0.84, + "grad_norm": 24.53774262796848, + "learning_rate": 1.3085465601702696e-05, + "loss": 1.5775, + "step": 69549 + }, + { + "epoch": 0.84, + "grad_norm": 18.469782725869784, + "learning_rate": 1.3084909920889928e-05, + "loss": 1.5078, + "step": 69552 + }, + { + "epoch": 0.84, + "grad_norm": 8.349232692649728, + "learning_rate": 1.308435422954947e-05, + "loss": 1.4327, + "step": 69555 + }, + { + "epoch": 0.84, + "grad_norm": 14.380421803073228, + "learning_rate": 1.3083798527683223e-05, + "loss": 1.6999, + "step": 69558 + }, + { + "epoch": 0.84, + "grad_norm": 10.417645524791583, + "learning_rate": 1.3083242815293083e-05, + "loss": 1.5433, + "step": 69561 + }, + { + "epoch": 0.84, + "grad_norm": 15.837092317816435, + "learning_rate": 1.3082687092380944e-05, + "loss": 1.2591, + "step": 69564 + }, + { + "epoch": 0.84, + "grad_norm": 3.4763851605641984, + "learning_rate": 1.3082131358948707e-05, + "loss": 1.0561, + "step": 69567 + }, + { + "epoch": 0.84, + "grad_norm": 71.72772505282477, + "learning_rate": 1.3081575614998263e-05, + "loss": 1.3531, + "step": 69570 + }, + { + "epoch": 0.84, + "grad_norm": 13.483271247828593, + "learning_rate": 1.3081019860531513e-05, + "loss": 1.2952, + "step": 69573 + }, + { + "epoch": 0.84, + "grad_norm": 5.848689504197648, + "learning_rate": 1.3080464095550352e-05, + "loss": 1.4103, + "step": 69576 + }, + { + "epoch": 0.84, + "grad_norm": 14.663731179599857, + "learning_rate": 1.3079908320056676e-05, + "loss": 1.2156, + "step": 69579 + }, + { + "epoch": 0.84, + "grad_norm": 6.049807794699696, + "learning_rate": 1.307935253405238e-05, + "loss": 0.9779, + "step": 69582 + }, + { + "epoch": 0.84, + "grad_norm": 11.937921550920697, + "learning_rate": 1.3078796737539365e-05, + "loss": 1.4552, + "step": 69585 + }, + { + "epoch": 0.84, + "grad_norm": 7.39975240018927, + "learning_rate": 1.3078240930519527e-05, + "loss": 1.0826, + "step": 69588 + }, + { + "epoch": 0.84, + "grad_norm": 54.798385961995436, + "learning_rate": 1.3077685112994762e-05, + "loss": 1.7053, + "step": 69591 + }, + { + "epoch": 0.84, + "grad_norm": 44.82160198932741, + "learning_rate": 1.3077129284966966e-05, + "loss": 1.293, + "step": 69594 + }, + { + "epoch": 0.84, + "grad_norm": 31.905618078796035, + "learning_rate": 1.3076573446438035e-05, + "loss": 1.6832, + "step": 69597 + }, + { + "epoch": 0.84, + "grad_norm": 6.288836247022667, + "learning_rate": 1.3076017597409867e-05, + "loss": 1.2134, + "step": 69600 + }, + { + "epoch": 0.84, + "grad_norm": 33.056350125006716, + "learning_rate": 1.3075461737884357e-05, + "loss": 1.3334, + "step": 69603 + }, + { + "epoch": 0.84, + "grad_norm": 17.11863913248391, + "learning_rate": 1.3074905867863406e-05, + "loss": 1.5194, + "step": 69606 + }, + { + "epoch": 0.84, + "grad_norm": 6.745027795152064, + "learning_rate": 1.3074349987348909e-05, + "loss": 1.2218, + "step": 69609 + }, + { + "epoch": 0.84, + "grad_norm": 4.6329943032697765, + "learning_rate": 1.3073794096342764e-05, + "loss": 1.5163, + "step": 69612 + }, + { + "epoch": 0.84, + "grad_norm": 6.750953204068119, + "learning_rate": 1.3073238194846864e-05, + "loss": 1.3052, + "step": 69615 + }, + { + "epoch": 0.84, + "grad_norm": 6.87431788561439, + "learning_rate": 1.3072682282863109e-05, + "loss": 1.3443, + "step": 69618 + }, + { + "epoch": 0.84, + "grad_norm": 4.179093259114801, + "learning_rate": 1.30721263603934e-05, + "loss": 1.2862, + "step": 69621 + }, + { + "epoch": 0.84, + "grad_norm": 12.041491321524664, + "learning_rate": 1.3071570427439629e-05, + "loss": 1.107, + "step": 69624 + }, + { + "epoch": 0.84, + "grad_norm": 14.160717004993206, + "learning_rate": 1.3071014484003692e-05, + "loss": 1.4192, + "step": 69627 + }, + { + "epoch": 0.84, + "grad_norm": 15.521606912028306, + "learning_rate": 1.307045853008749e-05, + "loss": 1.1303, + "step": 69630 + }, + { + "epoch": 0.84, + "grad_norm": 5.077604420279488, + "learning_rate": 1.306990256569292e-05, + "loss": 1.4374, + "step": 69633 + }, + { + "epoch": 0.84, + "grad_norm": 23.712384806428673, + "learning_rate": 1.3069346590821877e-05, + "loss": 1.1422, + "step": 69636 + }, + { + "epoch": 0.84, + "grad_norm": 17.630128298581745, + "learning_rate": 1.3068790605476261e-05, + "loss": 1.2404, + "step": 69639 + }, + { + "epoch": 0.84, + "grad_norm": 22.285175608701888, + "learning_rate": 1.3068234609657965e-05, + "loss": 1.9597, + "step": 69642 + }, + { + "epoch": 0.84, + "grad_norm": 11.69850351366166, + "learning_rate": 1.3067678603368894e-05, + "loss": 1.6444, + "step": 69645 + }, + { + "epoch": 0.84, + "grad_norm": 119.12338725813368, + "learning_rate": 1.3067122586610937e-05, + "loss": 1.1769, + "step": 69648 + }, + { + "epoch": 0.84, + "grad_norm": 4.38553037325666, + "learning_rate": 1.3066566559385998e-05, + "loss": 1.4131, + "step": 69651 + }, + { + "epoch": 0.84, + "grad_norm": 6.907310015043061, + "learning_rate": 1.3066010521695972e-05, + "loss": 1.2814, + "step": 69654 + }, + { + "epoch": 0.84, + "grad_norm": 18.21934718543269, + "learning_rate": 1.3065454473542754e-05, + "loss": 1.509, + "step": 69657 + }, + { + "epoch": 0.84, + "grad_norm": 9.472322189216596, + "learning_rate": 1.3064898414928244e-05, + "loss": 1.1883, + "step": 69660 + }, + { + "epoch": 0.84, + "grad_norm": 8.531788668362223, + "learning_rate": 1.3064342345854341e-05, + "loss": 1.5729, + "step": 69663 + }, + { + "epoch": 0.84, + "grad_norm": 18.235812961506394, + "learning_rate": 1.3063786266322944e-05, + "loss": 1.4872, + "step": 69666 + }, + { + "epoch": 0.84, + "grad_norm": 5.534517040950985, + "learning_rate": 1.3063230176335943e-05, + "loss": 1.588, + "step": 69669 + }, + { + "epoch": 0.84, + "grad_norm": 12.236439583640093, + "learning_rate": 1.3062674075895245e-05, + "loss": 1.4482, + "step": 69672 + }, + { + "epoch": 0.84, + "grad_norm": 14.7639084978739, + "learning_rate": 1.306211796500274e-05, + "loss": 1.2516, + "step": 69675 + }, + { + "epoch": 0.84, + "grad_norm": 7.502490269671432, + "learning_rate": 1.3061561843660334e-05, + "loss": 1.5409, + "step": 69678 + }, + { + "epoch": 0.84, + "grad_norm": 17.71360520688674, + "learning_rate": 1.3061005711869914e-05, + "loss": 1.5373, + "step": 69681 + }, + { + "epoch": 0.84, + "grad_norm": 7.152481541157534, + "learning_rate": 1.306044956963339e-05, + "loss": 1.4788, + "step": 69684 + }, + { + "epoch": 0.84, + "grad_norm": 2.876091922723139, + "learning_rate": 1.3059893416952652e-05, + "loss": 1.3023, + "step": 69687 + }, + { + "epoch": 0.84, + "grad_norm": 7.376694407251383, + "learning_rate": 1.3059337253829596e-05, + "loss": 1.3656, + "step": 69690 + }, + { + "epoch": 0.84, + "grad_norm": 12.42488173179398, + "learning_rate": 1.305878108026613e-05, + "loss": 1.1915, + "step": 69693 + }, + { + "epoch": 0.84, + "grad_norm": 11.369005731055296, + "learning_rate": 1.3058224896264142e-05, + "loss": 1.6134, + "step": 69696 + }, + { + "epoch": 0.84, + "grad_norm": 2.9023789288919923, + "learning_rate": 1.3057668701825536e-05, + "loss": 1.3489, + "step": 69699 + }, + { + "epoch": 0.84, + "grad_norm": 19.12832152325841, + "learning_rate": 1.3057112496952206e-05, + "loss": 1.5287, + "step": 69702 + }, + { + "epoch": 0.84, + "grad_norm": 13.383076097278783, + "learning_rate": 1.3056556281646056e-05, + "loss": 1.346, + "step": 69705 + }, + { + "epoch": 0.84, + "grad_norm": 53.2130082797599, + "learning_rate": 1.3056000055908977e-05, + "loss": 1.1439, + "step": 69708 + }, + { + "epoch": 0.84, + "grad_norm": 2.3982926658816393, + "learning_rate": 1.305544381974287e-05, + "loss": 1.1957, + "step": 69711 + }, + { + "epoch": 0.84, + "grad_norm": 41.30029865756221, + "learning_rate": 1.3054887573149635e-05, + "loss": 1.717, + "step": 69714 + }, + { + "epoch": 0.84, + "grad_norm": 15.701142405303823, + "learning_rate": 1.3054331316131171e-05, + "loss": 1.4276, + "step": 69717 + }, + { + "epoch": 0.84, + "grad_norm": 26.762935736240735, + "learning_rate": 1.3053775048689372e-05, + "loss": 1.3564, + "step": 69720 + }, + { + "epoch": 0.84, + "grad_norm": 10.060560699674745, + "learning_rate": 1.305321877082614e-05, + "loss": 1.3601, + "step": 69723 + }, + { + "epoch": 0.84, + "grad_norm": 4.242642048077441, + "learning_rate": 1.305266248254337e-05, + "loss": 1.1695, + "step": 69726 + }, + { + "epoch": 0.84, + "grad_norm": 22.65965268864551, + "learning_rate": 1.3052106183842965e-05, + "loss": 1.3872, + "step": 69729 + }, + { + "epoch": 0.84, + "grad_norm": 7.052357833529118, + "learning_rate": 1.3051549874726821e-05, + "loss": 0.9168, + "step": 69732 + }, + { + "epoch": 0.84, + "grad_norm": 9.637036988865345, + "learning_rate": 1.3050993555196834e-05, + "loss": 1.5972, + "step": 69735 + }, + { + "epoch": 0.84, + "grad_norm": 31.072857176602405, + "learning_rate": 1.3050437225254906e-05, + "loss": 1.2962, + "step": 69738 + }, + { + "epoch": 0.84, + "grad_norm": 55.59458728597385, + "learning_rate": 1.3049880884902937e-05, + "loss": 1.6261, + "step": 69741 + }, + { + "epoch": 0.84, + "grad_norm": 16.869330196959076, + "learning_rate": 1.304932453414282e-05, + "loss": 1.2202, + "step": 69744 + }, + { + "epoch": 0.84, + "grad_norm": 6.02771067175579, + "learning_rate": 1.3048768172976458e-05, + "loss": 1.3398, + "step": 69747 + }, + { + "epoch": 0.84, + "grad_norm": 51.914975553421925, + "learning_rate": 1.3048211801405747e-05, + "loss": 1.5189, + "step": 69750 + }, + { + "epoch": 0.84, + "grad_norm": 23.88363486026298, + "learning_rate": 1.304765541943259e-05, + "loss": 1.5178, + "step": 69753 + }, + { + "epoch": 0.84, + "grad_norm": 7.8191266562604165, + "learning_rate": 1.3047099027058879e-05, + "loss": 1.3492, + "step": 69756 + }, + { + "epoch": 0.84, + "grad_norm": 15.465322969534471, + "learning_rate": 1.304654262428652e-05, + "loss": 1.2416, + "step": 69759 + }, + { + "epoch": 0.84, + "grad_norm": 11.153238506764819, + "learning_rate": 1.3045986211117407e-05, + "loss": 1.1157, + "step": 69762 + }, + { + "epoch": 0.84, + "grad_norm": 36.85627736615964, + "learning_rate": 1.3045429787553438e-05, + "loss": 1.1895, + "step": 69765 + }, + { + "epoch": 0.84, + "grad_norm": 27.93829070482489, + "learning_rate": 1.3044873353596517e-05, + "loss": 1.2771, + "step": 69768 + }, + { + "epoch": 0.84, + "grad_norm": 31.593396327284303, + "learning_rate": 1.304431690924854e-05, + "loss": 1.2968, + "step": 69771 + }, + { + "epoch": 0.84, + "grad_norm": 3.853133563431, + "learning_rate": 1.3043760454511407e-05, + "loss": 1.4859, + "step": 69774 + }, + { + "epoch": 0.84, + "grad_norm": 13.875677753712035, + "learning_rate": 1.3043203989387013e-05, + "loss": 0.9394, + "step": 69777 + }, + { + "epoch": 0.84, + "grad_norm": 14.459121624808077, + "learning_rate": 1.3042647513877262e-05, + "loss": 1.1177, + "step": 69780 + }, + { + "epoch": 0.84, + "grad_norm": 55.55975764152439, + "learning_rate": 1.304209102798405e-05, + "loss": 1.2151, + "step": 69783 + }, + { + "epoch": 0.84, + "grad_norm": 28.136635239892197, + "learning_rate": 1.3041534531709276e-05, + "loss": 1.1064, + "step": 69786 + }, + { + "epoch": 0.84, + "grad_norm": 33.4846518434778, + "learning_rate": 1.304097802505484e-05, + "loss": 1.5787, + "step": 69789 + }, + { + "epoch": 0.84, + "grad_norm": 16.089361172844644, + "learning_rate": 1.3040421508022646e-05, + "loss": 1.4738, + "step": 69792 + }, + { + "epoch": 0.84, + "grad_norm": 8.956372445280621, + "learning_rate": 1.3039864980614585e-05, + "loss": 1.3602, + "step": 69795 + }, + { + "epoch": 0.84, + "grad_norm": 11.62894413817771, + "learning_rate": 1.303930844283256e-05, + "loss": 1.399, + "step": 69798 + }, + { + "epoch": 0.84, + "grad_norm": 9.640606872695571, + "learning_rate": 1.303875189467847e-05, + "loss": 1.2764, + "step": 69801 + }, + { + "epoch": 0.84, + "grad_norm": 4.55606124850367, + "learning_rate": 1.3038195336154213e-05, + "loss": 1.1074, + "step": 69804 + }, + { + "epoch": 0.84, + "grad_norm": 17.242910315776605, + "learning_rate": 1.3037638767261693e-05, + "loss": 1.0625, + "step": 69807 + }, + { + "epoch": 0.84, + "grad_norm": 4.391099159148321, + "learning_rate": 1.3037082188002803e-05, + "loss": 1.5333, + "step": 69810 + }, + { + "epoch": 0.84, + "grad_norm": 7.719723935844558, + "learning_rate": 1.3036525598379447e-05, + "loss": 1.5987, + "step": 69813 + }, + { + "epoch": 0.84, + "grad_norm": 8.0458618598732, + "learning_rate": 1.3035968998393523e-05, + "loss": 1.2183, + "step": 69816 + }, + { + "epoch": 0.84, + "grad_norm": 32.91465202502655, + "learning_rate": 1.3035412388046928e-05, + "loss": 1.5238, + "step": 69819 + }, + { + "epoch": 0.84, + "grad_norm": 13.912065034449295, + "learning_rate": 1.3034855767341565e-05, + "loss": 1.3321, + "step": 69822 + }, + { + "epoch": 0.84, + "grad_norm": 10.283301609043518, + "learning_rate": 1.3034299136279333e-05, + "loss": 1.3769, + "step": 69825 + }, + { + "epoch": 0.84, + "grad_norm": 4.481583228815251, + "learning_rate": 1.3033742494862133e-05, + "loss": 1.2502, + "step": 69828 + }, + { + "epoch": 0.84, + "grad_norm": 10.630436429565444, + "learning_rate": 1.303318584309186e-05, + "loss": 1.1839, + "step": 69831 + }, + { + "epoch": 0.84, + "grad_norm": 15.680976266082032, + "learning_rate": 1.303262918097042e-05, + "loss": 1.4019, + "step": 69834 + }, + { + "epoch": 0.84, + "grad_norm": 20.997942437781482, + "learning_rate": 1.3032072508499704e-05, + "loss": 1.6368, + "step": 69837 + }, + { + "epoch": 0.84, + "grad_norm": 32.40061670895546, + "learning_rate": 1.303151582568162e-05, + "loss": 1.3837, + "step": 69840 + }, + { + "epoch": 0.84, + "grad_norm": 9.79510462864659, + "learning_rate": 1.3030959132518061e-05, + "loss": 1.0131, + "step": 69843 + }, + { + "epoch": 0.84, + "grad_norm": 14.733483437517558, + "learning_rate": 1.3030402429010933e-05, + "loss": 1.4643, + "step": 69846 + }, + { + "epoch": 0.84, + "grad_norm": 7.8858735693743585, + "learning_rate": 1.3029845715162133e-05, + "loss": 1.2383, + "step": 69849 + }, + { + "epoch": 0.84, + "grad_norm": 16.993094419154104, + "learning_rate": 1.3029288990973558e-05, + "loss": 1.2791, + "step": 69852 + }, + { + "epoch": 0.84, + "grad_norm": 39.71336489354274, + "learning_rate": 1.3028732256447116e-05, + "loss": 1.2161, + "step": 69855 + }, + { + "epoch": 0.84, + "grad_norm": 11.219778731875383, + "learning_rate": 1.3028175511584698e-05, + "loss": 1.3307, + "step": 69858 + }, + { + "epoch": 0.84, + "grad_norm": 7.012332858067158, + "learning_rate": 1.3027618756388209e-05, + "loss": 1.3027, + "step": 69861 + }, + { + "epoch": 0.84, + "grad_norm": 4.114662478038204, + "learning_rate": 1.3027061990859546e-05, + "loss": 1.4592, + "step": 69864 + }, + { + "epoch": 0.84, + "grad_norm": 9.074828017586615, + "learning_rate": 1.3026505215000613e-05, + "loss": 1.8023, + "step": 69867 + }, + { + "epoch": 0.84, + "grad_norm": 11.275978802900893, + "learning_rate": 1.3025948428813306e-05, + "loss": 1.3081, + "step": 69870 + }, + { + "epoch": 0.84, + "grad_norm": 9.906567427257546, + "learning_rate": 1.3025391632299527e-05, + "loss": 1.4164, + "step": 69873 + }, + { + "epoch": 0.84, + "grad_norm": 16.46184170172965, + "learning_rate": 1.3024834825461179e-05, + "loss": 1.2975, + "step": 69876 + }, + { + "epoch": 0.84, + "grad_norm": 22.945787488536123, + "learning_rate": 1.3024278008300155e-05, + "loss": 1.3412, + "step": 69879 + }, + { + "epoch": 0.84, + "grad_norm": 48.95801779035109, + "learning_rate": 1.3023721180818366e-05, + "loss": 1.5098, + "step": 69882 + }, + { + "epoch": 0.84, + "grad_norm": 6.95659581818736, + "learning_rate": 1.3023164343017699e-05, + "loss": 0.9609, + "step": 69885 + }, + { + "epoch": 0.84, + "grad_norm": 15.503712820174886, + "learning_rate": 1.3022607494900066e-05, + "loss": 1.5655, + "step": 69888 + }, + { + "epoch": 0.84, + "grad_norm": 61.75340891476536, + "learning_rate": 1.3022050636467358e-05, + "loss": 1.6678, + "step": 69891 + }, + { + "epoch": 0.84, + "grad_norm": 17.991834993917347, + "learning_rate": 1.3021493767721483e-05, + "loss": 1.5223, + "step": 69894 + }, + { + "epoch": 0.84, + "grad_norm": 18.450050275463344, + "learning_rate": 1.3020936888664335e-05, + "loss": 1.1939, + "step": 69897 + }, + { + "epoch": 0.84, + "grad_norm": 11.21570841252115, + "learning_rate": 1.3020379999297818e-05, + "loss": 1.2697, + "step": 69900 + }, + { + "epoch": 0.84, + "grad_norm": 275.8315743581423, + "learning_rate": 1.3019823099623834e-05, + "loss": 1.2401, + "step": 69903 + }, + { + "epoch": 0.84, + "grad_norm": 9.771757749385639, + "learning_rate": 1.301926618964428e-05, + "loss": 1.3442, + "step": 69906 + }, + { + "epoch": 0.84, + "grad_norm": 20.434980545509596, + "learning_rate": 1.301870926936106e-05, + "loss": 1.2464, + "step": 69909 + }, + { + "epoch": 0.84, + "grad_norm": 3.317586354777884, + "learning_rate": 1.3018152338776071e-05, + "loss": 1.2246, + "step": 69912 + }, + { + "epoch": 0.84, + "grad_norm": 35.010842703562076, + "learning_rate": 1.3017595397891217e-05, + "loss": 1.5475, + "step": 69915 + }, + { + "epoch": 0.84, + "grad_norm": 18.831422861148656, + "learning_rate": 1.3017038446708395e-05, + "loss": 1.207, + "step": 69918 + }, + { + "epoch": 0.84, + "grad_norm": 9.271279137020864, + "learning_rate": 1.3016481485229508e-05, + "loss": 1.5949, + "step": 69921 + }, + { + "epoch": 0.84, + "grad_norm": 21.76869788372045, + "learning_rate": 1.3015924513456456e-05, + "loss": 1.3593, + "step": 69924 + }, + { + "epoch": 0.84, + "grad_norm": 4.75726763707972, + "learning_rate": 1.301536753139114e-05, + "loss": 1.5829, + "step": 69927 + }, + { + "epoch": 0.84, + "grad_norm": 7.034838550254608, + "learning_rate": 1.301481053903546e-05, + "loss": 1.3274, + "step": 69930 + }, + { + "epoch": 0.84, + "grad_norm": 15.025097340715744, + "learning_rate": 1.301425353639132e-05, + "loss": 0.8899, + "step": 69933 + }, + { + "epoch": 0.84, + "grad_norm": 15.727523964442195, + "learning_rate": 1.3013696523460615e-05, + "loss": 1.546, + "step": 69936 + }, + { + "epoch": 0.84, + "grad_norm": 16.638189366238887, + "learning_rate": 1.301313950024525e-05, + "loss": 1.4633, + "step": 69939 + }, + { + "epoch": 0.84, + "grad_norm": 6.157183683399492, + "learning_rate": 1.301258246674713e-05, + "loss": 1.4588, + "step": 69942 + }, + { + "epoch": 0.84, + "grad_norm": 204.8130683021292, + "learning_rate": 1.3012025422968147e-05, + "loss": 1.1132, + "step": 69945 + }, + { + "epoch": 0.84, + "grad_norm": 13.687002143192219, + "learning_rate": 1.3011468368910206e-05, + "loss": 1.2698, + "step": 69948 + }, + { + "epoch": 0.84, + "grad_norm": 15.748001942676465, + "learning_rate": 1.3010911304575209e-05, + "loss": 1.1035, + "step": 69951 + }, + { + "epoch": 0.84, + "grad_norm": 27.457777124659486, + "learning_rate": 1.3010354229965055e-05, + "loss": 1.2181, + "step": 69954 + }, + { + "epoch": 0.84, + "grad_norm": 6.431805966362359, + "learning_rate": 1.3009797145081649e-05, + "loss": 1.6836, + "step": 69957 + }, + { + "epoch": 0.84, + "grad_norm": 14.398375821957002, + "learning_rate": 1.3009240049926885e-05, + "loss": 1.42, + "step": 69960 + }, + { + "epoch": 0.84, + "grad_norm": 13.294137114538206, + "learning_rate": 1.3008682944502672e-05, + "loss": 1.1212, + "step": 69963 + }, + { + "epoch": 0.84, + "grad_norm": 3.6715725462486133, + "learning_rate": 1.3008125828810907e-05, + "loss": 1.1484, + "step": 69966 + }, + { + "epoch": 0.84, + "grad_norm": 13.207286194858542, + "learning_rate": 1.3007568702853494e-05, + "loss": 1.4664, + "step": 69969 + }, + { + "epoch": 0.84, + "grad_norm": 14.174478361163809, + "learning_rate": 1.3007011566632332e-05, + "loss": 1.3966, + "step": 69972 + }, + { + "epoch": 0.84, + "grad_norm": 38.54381641040758, + "learning_rate": 1.3006454420149322e-05, + "loss": 1.703, + "step": 69975 + }, + { + "epoch": 0.84, + "grad_norm": 30.051326425625074, + "learning_rate": 1.3005897263406364e-05, + "loss": 1.0929, + "step": 69978 + }, + { + "epoch": 0.84, + "grad_norm": 17.51673620905773, + "learning_rate": 1.3005340096405362e-05, + "loss": 1.3457, + "step": 69981 + }, + { + "epoch": 0.84, + "grad_norm": 17.525933808220984, + "learning_rate": 1.3004782919148218e-05, + "loss": 1.4148, + "step": 69984 + }, + { + "epoch": 0.84, + "grad_norm": 26.554763019074443, + "learning_rate": 1.3004225731636831e-05, + "loss": 1.6009, + "step": 69987 + }, + { + "epoch": 0.84, + "grad_norm": 10.230488861013571, + "learning_rate": 1.3003668533873106e-05, + "loss": 1.1623, + "step": 69990 + }, + { + "epoch": 0.84, + "grad_norm": 26.645145346242273, + "learning_rate": 1.3003111325858941e-05, + "loss": 1.188, + "step": 69993 + }, + { + "epoch": 0.84, + "grad_norm": 16.0548621627384, + "learning_rate": 1.300255410759624e-05, + "loss": 1.2532, + "step": 69996 + }, + { + "epoch": 0.84, + "grad_norm": 9.389380650720609, + "learning_rate": 1.3001996879086898e-05, + "loss": 1.3919, + "step": 69999 + }, + { + "epoch": 0.84, + "grad_norm": 12.158970154273888, + "learning_rate": 1.3001439640332828e-05, + "loss": 1.4552, + "step": 70002 + }, + { + "epoch": 0.84, + "grad_norm": 22.164602390837768, + "learning_rate": 1.3000882391335924e-05, + "loss": 1.5041, + "step": 70005 + }, + { + "epoch": 0.84, + "grad_norm": 7.649879873755944, + "learning_rate": 1.3000325132098086e-05, + "loss": 1.4775, + "step": 70008 + }, + { + "epoch": 0.84, + "grad_norm": 13.928423992980088, + "learning_rate": 1.2999767862621222e-05, + "loss": 1.1301, + "step": 70011 + }, + { + "epoch": 0.84, + "grad_norm": 11.808680434284483, + "learning_rate": 1.2999210582907232e-05, + "loss": 1.129, + "step": 70014 + }, + { + "epoch": 0.84, + "grad_norm": 5.971735320009422, + "learning_rate": 1.2998653292958015e-05, + "loss": 1.177, + "step": 70017 + }, + { + "epoch": 0.84, + "grad_norm": 13.096166620147962, + "learning_rate": 1.2998095992775472e-05, + "loss": 1.5801, + "step": 70020 + }, + { + "epoch": 0.84, + "grad_norm": 14.555952076218867, + "learning_rate": 1.299753868236151e-05, + "loss": 1.5964, + "step": 70023 + }, + { + "epoch": 0.84, + "grad_norm": 14.242041243781118, + "learning_rate": 1.2996981361718031e-05, + "loss": 1.4313, + "step": 70026 + }, + { + "epoch": 0.84, + "grad_norm": 15.8308717910255, + "learning_rate": 1.299642403084693e-05, + "loss": 1.567, + "step": 70029 + }, + { + "epoch": 0.84, + "grad_norm": 9.103084973711438, + "learning_rate": 1.2995866689750112e-05, + "loss": 1.3856, + "step": 70032 + }, + { + "epoch": 0.84, + "grad_norm": 7.579651992757099, + "learning_rate": 1.2995309338429485e-05, + "loss": 1.4089, + "step": 70035 + }, + { + "epoch": 0.84, + "grad_norm": 13.694057819858724, + "learning_rate": 1.2994751976886944e-05, + "loss": 1.7339, + "step": 70038 + }, + { + "epoch": 0.84, + "grad_norm": 13.874421220321157, + "learning_rate": 1.299419460512439e-05, + "loss": 1.489, + "step": 70041 + }, + { + "epoch": 0.84, + "grad_norm": 6.524523725219204, + "learning_rate": 1.2993637223143732e-05, + "loss": 1.3074, + "step": 70044 + }, + { + "epoch": 0.84, + "grad_norm": 15.531775250589536, + "learning_rate": 1.2993079830946869e-05, + "loss": 1.3831, + "step": 70047 + }, + { + "epoch": 0.84, + "grad_norm": 5.30803550968457, + "learning_rate": 1.2992522428535701e-05, + "loss": 1.1452, + "step": 70050 + }, + { + "epoch": 0.84, + "grad_norm": 8.481892131555984, + "learning_rate": 1.2991965015912128e-05, + "loss": 1.5487, + "step": 70053 + }, + { + "epoch": 0.84, + "grad_norm": 23.013143877798942, + "learning_rate": 1.2991407593078063e-05, + "loss": 1.4523, + "step": 70056 + }, + { + "epoch": 0.84, + "grad_norm": 5.475106510489329, + "learning_rate": 1.2990850160035398e-05, + "loss": 1.3277, + "step": 70059 + }, + { + "epoch": 0.84, + "grad_norm": 13.571799606823062, + "learning_rate": 1.2990292716786037e-05, + "loss": 1.6974, + "step": 70062 + }, + { + "epoch": 0.84, + "grad_norm": 25.19730527768482, + "learning_rate": 1.2989735263331889e-05, + "loss": 1.5841, + "step": 70065 + }, + { + "epoch": 0.84, + "grad_norm": 10.577973503096258, + "learning_rate": 1.2989177799674849e-05, + "loss": 1.4885, + "step": 70068 + }, + { + "epoch": 0.84, + "grad_norm": 15.212754310795995, + "learning_rate": 1.2988620325816821e-05, + "loss": 1.2651, + "step": 70071 + }, + { + "epoch": 0.84, + "grad_norm": 114.71114150145287, + "learning_rate": 1.2988062841759708e-05, + "loss": 1.8636, + "step": 70074 + }, + { + "epoch": 0.84, + "grad_norm": 12.345998829075699, + "learning_rate": 1.2987505347505413e-05, + "loss": 1.6358, + "step": 70077 + }, + { + "epoch": 0.84, + "grad_norm": 45.0622404356294, + "learning_rate": 1.2986947843055839e-05, + "loss": 1.4674, + "step": 70080 + }, + { + "epoch": 0.84, + "grad_norm": 18.282248872386916, + "learning_rate": 1.298639032841289e-05, + "loss": 1.2277, + "step": 70083 + }, + { + "epoch": 0.84, + "grad_norm": 12.470078302563056, + "learning_rate": 1.2985832803578462e-05, + "loss": 1.467, + "step": 70086 + }, + { + "epoch": 0.84, + "grad_norm": 14.418750962631183, + "learning_rate": 1.2985275268554465e-05, + "loss": 1.4472, + "step": 70089 + }, + { + "epoch": 0.84, + "grad_norm": 13.344127199260802, + "learning_rate": 1.2984717723342799e-05, + "loss": 1.625, + "step": 70092 + }, + { + "epoch": 0.84, + "grad_norm": 3.636705377230639, + "learning_rate": 1.2984160167945363e-05, + "loss": 1.9882, + "step": 70095 + }, + { + "epoch": 0.84, + "grad_norm": 22.834282470260955, + "learning_rate": 1.2983602602364068e-05, + "loss": 1.2885, + "step": 70098 + }, + { + "epoch": 0.84, + "grad_norm": 21.55921735133312, + "learning_rate": 1.2983045026600811e-05, + "loss": 1.5414, + "step": 70101 + }, + { + "epoch": 0.84, + "grad_norm": 5.250828386488734, + "learning_rate": 1.2982487440657494e-05, + "loss": 1.3965, + "step": 70104 + }, + { + "epoch": 0.84, + "grad_norm": 14.14252248896646, + "learning_rate": 1.298192984453602e-05, + "loss": 1.4621, + "step": 70107 + }, + { + "epoch": 0.84, + "grad_norm": 6.840753104522996, + "learning_rate": 1.2981372238238299e-05, + "loss": 1.2304, + "step": 70110 + }, + { + "epoch": 0.84, + "grad_norm": 9.151665505523393, + "learning_rate": 1.2980814621766225e-05, + "loss": 1.3955, + "step": 70113 + }, + { + "epoch": 0.84, + "grad_norm": 13.012208935991032, + "learning_rate": 1.2980256995121703e-05, + "loss": 1.3008, + "step": 70116 + }, + { + "epoch": 0.84, + "grad_norm": 3.2122313689584074, + "learning_rate": 1.2979699358306639e-05, + "loss": 1.4338, + "step": 70119 + }, + { + "epoch": 0.84, + "grad_norm": 3.8006654486646623, + "learning_rate": 1.2979141711322936e-05, + "loss": 1.2988, + "step": 70122 + }, + { + "epoch": 0.84, + "grad_norm": 11.512758442854926, + "learning_rate": 1.2978584054172495e-05, + "loss": 1.2461, + "step": 70125 + }, + { + "epoch": 0.84, + "grad_norm": 23.740078765773017, + "learning_rate": 1.2978026386857216e-05, + "loss": 1.1149, + "step": 70128 + }, + { + "epoch": 0.84, + "grad_norm": 16.50050300472757, + "learning_rate": 1.2977468709379009e-05, + "loss": 1.1006, + "step": 70131 + }, + { + "epoch": 0.84, + "grad_norm": 11.449400648974608, + "learning_rate": 1.2976911021739773e-05, + "loss": 1.3717, + "step": 70134 + }, + { + "epoch": 0.84, + "grad_norm": 124.01596047456641, + "learning_rate": 1.2976353323941413e-05, + "loss": 1.181, + "step": 70137 + }, + { + "epoch": 0.84, + "grad_norm": 19.5574540595203, + "learning_rate": 1.2975795615985829e-05, + "loss": 1.4744, + "step": 70140 + }, + { + "epoch": 0.84, + "grad_norm": 11.277298457857267, + "learning_rate": 1.2975237897874928e-05, + "loss": 1.2508, + "step": 70143 + }, + { + "epoch": 0.84, + "grad_norm": 2.4607260658756354, + "learning_rate": 1.2974680169610613e-05, + "loss": 1.3108, + "step": 70146 + }, + { + "epoch": 0.84, + "grad_norm": 2.8789604415347765, + "learning_rate": 1.297412243119478e-05, + "loss": 1.1149, + "step": 70149 + }, + { + "epoch": 0.84, + "grad_norm": 2.3171969927404357, + "learning_rate": 1.2973564682629345e-05, + "loss": 1.5155, + "step": 70152 + }, + { + "epoch": 0.84, + "grad_norm": 8.813354907321624, + "learning_rate": 1.2973006923916202e-05, + "loss": 1.3456, + "step": 70155 + }, + { + "epoch": 0.84, + "grad_norm": 4.268302814773929, + "learning_rate": 1.2972449155057259e-05, + "loss": 1.4197, + "step": 70158 + }, + { + "epoch": 0.84, + "grad_norm": 12.959987840940496, + "learning_rate": 1.2971891376054415e-05, + "loss": 1.312, + "step": 70161 + }, + { + "epoch": 0.84, + "grad_norm": 28.97591507575269, + "learning_rate": 1.2971333586909577e-05, + "loss": 1.4424, + "step": 70164 + }, + { + "epoch": 0.84, + "grad_norm": 4.3672526275864625, + "learning_rate": 1.297077578762465e-05, + "loss": 1.1424, + "step": 70167 + }, + { + "epoch": 0.84, + "grad_norm": 5.007190635132231, + "learning_rate": 1.2970217978201532e-05, + "loss": 1.4817, + "step": 70170 + }, + { + "epoch": 0.84, + "grad_norm": 5.829811689479159, + "learning_rate": 1.2969660158642132e-05, + "loss": 1.1878, + "step": 70173 + }, + { + "epoch": 0.84, + "grad_norm": 26.831428409732368, + "learning_rate": 1.2969102328948352e-05, + "loss": 1.4787, + "step": 70176 + }, + { + "epoch": 0.84, + "grad_norm": 16.876057937079274, + "learning_rate": 1.2968544489122094e-05, + "loss": 1.4749, + "step": 70179 + }, + { + "epoch": 0.84, + "grad_norm": 4.78546207655664, + "learning_rate": 1.2967986639165262e-05, + "loss": 1.4286, + "step": 70182 + }, + { + "epoch": 0.84, + "grad_norm": 15.992117536771337, + "learning_rate": 1.2967428779079763e-05, + "loss": 1.5278, + "step": 70185 + }, + { + "epoch": 0.84, + "grad_norm": 61.1158541006807, + "learning_rate": 1.2966870908867497e-05, + "loss": 1.7643, + "step": 70188 + }, + { + "epoch": 0.84, + "grad_norm": 12.525987461289487, + "learning_rate": 1.296631302853037e-05, + "loss": 1.264, + "step": 70191 + }, + { + "epoch": 0.84, + "grad_norm": 64.14582426215875, + "learning_rate": 1.2965755138070284e-05, + "loss": 1.8818, + "step": 70194 + }, + { + "epoch": 0.84, + "grad_norm": 7.215131382090667, + "learning_rate": 1.2965197237489147e-05, + "loss": 0.9819, + "step": 70197 + }, + { + "epoch": 0.84, + "grad_norm": 29.905315656311593, + "learning_rate": 1.2964639326788856e-05, + "loss": 1.5421, + "step": 70200 + }, + { + "epoch": 0.84, + "grad_norm": 18.16126977235202, + "learning_rate": 1.2964081405971318e-05, + "loss": 1.1753, + "step": 70203 + }, + { + "epoch": 0.84, + "grad_norm": 11.57957365133481, + "learning_rate": 1.2963523475038442e-05, + "loss": 1.5007, + "step": 70206 + }, + { + "epoch": 0.84, + "grad_norm": 7.630655001821629, + "learning_rate": 1.2962965533992127e-05, + "loss": 1.4515, + "step": 70209 + }, + { + "epoch": 0.84, + "grad_norm": 7.059576002367565, + "learning_rate": 1.2962407582834278e-05, + "loss": 1.3072, + "step": 70212 + }, + { + "epoch": 0.84, + "grad_norm": 12.62369349191909, + "learning_rate": 1.2961849621566796e-05, + "loss": 1.2294, + "step": 70215 + }, + { + "epoch": 0.84, + "grad_norm": 100.22211473865086, + "learning_rate": 1.296129165019159e-05, + "loss": 1.5288, + "step": 70218 + }, + { + "epoch": 0.84, + "grad_norm": 43.47744615263598, + "learning_rate": 1.2960733668710563e-05, + "loss": 1.3057, + "step": 70221 + }, + { + "epoch": 0.84, + "grad_norm": 6.799045203460555, + "learning_rate": 1.2960175677125617e-05, + "loss": 1.1094, + "step": 70224 + }, + { + "epoch": 0.84, + "grad_norm": 37.84954657342591, + "learning_rate": 1.2959617675438658e-05, + "loss": 1.5343, + "step": 70227 + }, + { + "epoch": 0.84, + "grad_norm": 13.753358850260307, + "learning_rate": 1.2959059663651592e-05, + "loss": 1.4592, + "step": 70230 + }, + { + "epoch": 0.84, + "grad_norm": 16.807708809467982, + "learning_rate": 1.295850164176632e-05, + "loss": 1.6044, + "step": 70233 + }, + { + "epoch": 0.84, + "grad_norm": 3.6970695115573693, + "learning_rate": 1.2957943609784744e-05, + "loss": 1.6108, + "step": 70236 + }, + { + "epoch": 0.84, + "grad_norm": 13.5250861589726, + "learning_rate": 1.2957385567708776e-05, + "loss": 0.8973, + "step": 70239 + }, + { + "epoch": 0.84, + "grad_norm": 8.608955056129998, + "learning_rate": 1.2956827515540315e-05, + "loss": 1.5775, + "step": 70242 + }, + { + "epoch": 0.84, + "grad_norm": 20.334112612759398, + "learning_rate": 1.2956269453281269e-05, + "loss": 1.2395, + "step": 70245 + }, + { + "epoch": 0.84, + "grad_norm": 11.228464776018734, + "learning_rate": 1.2955711380933536e-05, + "loss": 1.5536, + "step": 70248 + }, + { + "epoch": 0.84, + "grad_norm": 54.39625176919403, + "learning_rate": 1.2955153298499028e-05, + "loss": 1.2257, + "step": 70251 + }, + { + "epoch": 0.84, + "grad_norm": 2.96708554190399, + "learning_rate": 1.2954595205979645e-05, + "loss": 1.273, + "step": 70254 + }, + { + "epoch": 0.84, + "grad_norm": 11.18907187783948, + "learning_rate": 1.2954037103377293e-05, + "loss": 1.3499, + "step": 70257 + }, + { + "epoch": 0.84, + "grad_norm": 11.528851281639147, + "learning_rate": 1.2953478990693876e-05, + "loss": 1.4012, + "step": 70260 + }, + { + "epoch": 0.84, + "grad_norm": 18.041830589292548, + "learning_rate": 1.29529208679313e-05, + "loss": 1.2156, + "step": 70263 + }, + { + "epoch": 0.84, + "grad_norm": 6.928957440303451, + "learning_rate": 1.2952362735091468e-05, + "loss": 1.4759, + "step": 70266 + }, + { + "epoch": 0.84, + "grad_norm": 19.89961931340237, + "learning_rate": 1.2951804592176284e-05, + "loss": 1.4155, + "step": 70269 + }, + { + "epoch": 0.84, + "grad_norm": 10.763016384866711, + "learning_rate": 1.2951246439187659e-05, + "loss": 1.4179, + "step": 70272 + }, + { + "epoch": 0.85, + "grad_norm": 14.798368674967707, + "learning_rate": 1.2950688276127488e-05, + "loss": 1.3089, + "step": 70275 + }, + { + "epoch": 0.85, + "grad_norm": 21.348588813218672, + "learning_rate": 1.2950130102997682e-05, + "loss": 1.1883, + "step": 70278 + }, + { + "epoch": 0.85, + "grad_norm": 14.210184217056632, + "learning_rate": 1.2949571919800143e-05, + "loss": 1.2526, + "step": 70281 + }, + { + "epoch": 0.85, + "grad_norm": 18.6561590117791, + "learning_rate": 1.2949013726536781e-05, + "loss": 1.1496, + "step": 70284 + }, + { + "epoch": 0.85, + "grad_norm": 10.86875366243833, + "learning_rate": 1.2948455523209497e-05, + "loss": 1.7266, + "step": 70287 + }, + { + "epoch": 0.85, + "grad_norm": 8.014585220740011, + "learning_rate": 1.2947897309820195e-05, + "loss": 1.3478, + "step": 70290 + }, + { + "epoch": 0.85, + "grad_norm": 9.683977066475567, + "learning_rate": 1.2947339086370783e-05, + "loss": 1.2395, + "step": 70293 + }, + { + "epoch": 0.85, + "grad_norm": 9.090855132017314, + "learning_rate": 1.2946780852863161e-05, + "loss": 1.3683, + "step": 70296 + }, + { + "epoch": 0.85, + "grad_norm": 7.501549164633127, + "learning_rate": 1.2946222609299239e-05, + "loss": 1.3289, + "step": 70299 + }, + { + "epoch": 0.85, + "grad_norm": 6.627015322469626, + "learning_rate": 1.294566435568092e-05, + "loss": 1.3718, + "step": 70302 + }, + { + "epoch": 0.85, + "grad_norm": 6.719843385468887, + "learning_rate": 1.2945106092010111e-05, + "loss": 1.508, + "step": 70305 + }, + { + "epoch": 0.85, + "grad_norm": 12.267334230962579, + "learning_rate": 1.2944547818288714e-05, + "loss": 1.5494, + "step": 70308 + }, + { + "epoch": 0.85, + "grad_norm": 5.991381614531002, + "learning_rate": 1.2943989534518635e-05, + "loss": 1.3183, + "step": 70311 + }, + { + "epoch": 0.85, + "grad_norm": 8.927316351757165, + "learning_rate": 1.2943431240701783e-05, + "loss": 1.8017, + "step": 70314 + }, + { + "epoch": 0.85, + "grad_norm": 4.493005283115264, + "learning_rate": 1.294287293684006e-05, + "loss": 1.342, + "step": 70317 + }, + { + "epoch": 0.85, + "grad_norm": 17.444051415958327, + "learning_rate": 1.2942314622935373e-05, + "loss": 1.4756, + "step": 70320 + }, + { + "epoch": 0.85, + "grad_norm": 24.04996588624106, + "learning_rate": 1.294175629898962e-05, + "loss": 1.3398, + "step": 70323 + }, + { + "epoch": 0.85, + "grad_norm": 3.1460215090596533, + "learning_rate": 1.2941197965004722e-05, + "loss": 1.2214, + "step": 70326 + }, + { + "epoch": 0.85, + "grad_norm": 12.361088018057238, + "learning_rate": 1.2940639620982567e-05, + "loss": 1.8707, + "step": 70329 + }, + { + "epoch": 0.85, + "grad_norm": 5.0436034408855805, + "learning_rate": 1.2940081266925069e-05, + "loss": 1.3772, + "step": 70332 + }, + { + "epoch": 0.85, + "grad_norm": 110.86786840813666, + "learning_rate": 1.2939522902834133e-05, + "loss": 1.4272, + "step": 70335 + }, + { + "epoch": 0.85, + "grad_norm": 26.41700410021252, + "learning_rate": 1.2938964528711666e-05, + "loss": 1.3229, + "step": 70338 + }, + { + "epoch": 0.85, + "grad_norm": 14.596428232613158, + "learning_rate": 1.2938406144559572e-05, + "loss": 1.3818, + "step": 70341 + }, + { + "epoch": 0.85, + "grad_norm": 5.228182975674127, + "learning_rate": 1.2937847750379753e-05, + "loss": 1.0238, + "step": 70344 + }, + { + "epoch": 0.85, + "grad_norm": 20.876273519956317, + "learning_rate": 1.2937289346174123e-05, + "loss": 1.023, + "step": 70347 + }, + { + "epoch": 0.85, + "grad_norm": 15.109868930846732, + "learning_rate": 1.293673093194458e-05, + "loss": 1.3569, + "step": 70350 + }, + { + "epoch": 0.85, + "grad_norm": 7.411960417266696, + "learning_rate": 1.293617250769303e-05, + "loss": 1.0445, + "step": 70353 + }, + { + "epoch": 0.85, + "grad_norm": 6.659730262990709, + "learning_rate": 1.293561407342138e-05, + "loss": 1.2304, + "step": 70356 + }, + { + "epoch": 0.85, + "grad_norm": 32.66256597420538, + "learning_rate": 1.293505562913154e-05, + "loss": 1.3881, + "step": 70359 + }, + { + "epoch": 0.85, + "grad_norm": 6.994340195716558, + "learning_rate": 1.2934497174825413e-05, + "loss": 1.5453, + "step": 70362 + }, + { + "epoch": 0.85, + "grad_norm": 52.48416411279605, + "learning_rate": 1.2933938710504901e-05, + "loss": 1.3512, + "step": 70365 + }, + { + "epoch": 0.85, + "grad_norm": 14.436786915376397, + "learning_rate": 1.2933380236171918e-05, + "loss": 1.4288, + "step": 70368 + }, + { + "epoch": 0.85, + "grad_norm": 6.668651686765196, + "learning_rate": 1.293282175182836e-05, + "loss": 1.2652, + "step": 70371 + }, + { + "epoch": 0.85, + "grad_norm": 14.953451122129977, + "learning_rate": 1.2932263257476136e-05, + "loss": 1.0068, + "step": 70374 + }, + { + "epoch": 0.85, + "grad_norm": 11.02073592294077, + "learning_rate": 1.2931704753117157e-05, + "loss": 1.5588, + "step": 70377 + }, + { + "epoch": 0.85, + "grad_norm": 12.314257168263076, + "learning_rate": 1.2931146238753327e-05, + "loss": 1.2182, + "step": 70380 + }, + { + "epoch": 0.85, + "grad_norm": 2.514511981681224, + "learning_rate": 1.2930587714386548e-05, + "loss": 1.2265, + "step": 70383 + }, + { + "epoch": 0.85, + "grad_norm": 10.400010520519949, + "learning_rate": 1.2930029180018729e-05, + "loss": 1.4469, + "step": 70386 + }, + { + "epoch": 0.85, + "grad_norm": 8.228755959722, + "learning_rate": 1.2929470635651779e-05, + "loss": 1.4728, + "step": 70389 + }, + { + "epoch": 0.85, + "grad_norm": 18.934035879845865, + "learning_rate": 1.2928912081287597e-05, + "loss": 1.3632, + "step": 70392 + }, + { + "epoch": 0.85, + "grad_norm": 28.149939262816122, + "learning_rate": 1.2928353516928093e-05, + "loss": 1.5532, + "step": 70395 + }, + { + "epoch": 0.85, + "grad_norm": 20.963894066289217, + "learning_rate": 1.2927794942575176e-05, + "loss": 1.4417, + "step": 70398 + }, + { + "epoch": 0.85, + "grad_norm": 11.484355482032933, + "learning_rate": 1.2927236358230751e-05, + "loss": 1.2278, + "step": 70401 + }, + { + "epoch": 0.85, + "grad_norm": 15.679731446934577, + "learning_rate": 1.2926677763896718e-05, + "loss": 1.4813, + "step": 70404 + }, + { + "epoch": 0.85, + "grad_norm": 22.61860288570342, + "learning_rate": 1.292611915957499e-05, + "loss": 1.7118, + "step": 70407 + }, + { + "epoch": 0.85, + "grad_norm": 16.21262527626355, + "learning_rate": 1.2925560545267471e-05, + "loss": 1.1951, + "step": 70410 + }, + { + "epoch": 0.85, + "grad_norm": 5.756566288909471, + "learning_rate": 1.2925001920976068e-05, + "loss": 1.1036, + "step": 70413 + }, + { + "epoch": 0.85, + "grad_norm": 24.243061280880983, + "learning_rate": 1.2924443286702686e-05, + "loss": 1.4218, + "step": 70416 + }, + { + "epoch": 0.85, + "grad_norm": 12.993347183971753, + "learning_rate": 1.2923884642449232e-05, + "loss": 1.1727, + "step": 70419 + }, + { + "epoch": 0.85, + "grad_norm": 24.724881734614303, + "learning_rate": 1.2923325988217616e-05, + "loss": 1.4731, + "step": 70422 + }, + { + "epoch": 0.85, + "grad_norm": 15.800288982289223, + "learning_rate": 1.2922767324009739e-05, + "loss": 1.573, + "step": 70425 + }, + { + "epoch": 0.85, + "grad_norm": 23.973855659825247, + "learning_rate": 1.2922208649827512e-05, + "loss": 1.7267, + "step": 70428 + }, + { + "epoch": 0.85, + "grad_norm": 9.509599612576208, + "learning_rate": 1.2921649965672838e-05, + "loss": 1.035, + "step": 70431 + }, + { + "epoch": 0.85, + "grad_norm": 8.480271950139558, + "learning_rate": 1.2921091271547625e-05, + "loss": 1.0019, + "step": 70434 + }, + { + "epoch": 0.85, + "grad_norm": 10.883422786863886, + "learning_rate": 1.2920532567453779e-05, + "loss": 1.2675, + "step": 70437 + }, + { + "epoch": 0.85, + "grad_norm": 10.73205563910678, + "learning_rate": 1.2919973853393207e-05, + "loss": 1.2213, + "step": 70440 + }, + { + "epoch": 0.85, + "grad_norm": 20.41017458107733, + "learning_rate": 1.2919415129367818e-05, + "loss": 1.1346, + "step": 70443 + }, + { + "epoch": 0.85, + "grad_norm": 11.939804439790736, + "learning_rate": 1.2918856395379514e-05, + "loss": 1.296, + "step": 70446 + }, + { + "epoch": 0.85, + "grad_norm": 5.304304819824916, + "learning_rate": 1.2918297651430207e-05, + "loss": 1.5302, + "step": 70449 + }, + { + "epoch": 0.85, + "grad_norm": 20.286208624830895, + "learning_rate": 1.29177388975218e-05, + "loss": 1.2777, + "step": 70452 + }, + { + "epoch": 0.85, + "grad_norm": 61.688715139121406, + "learning_rate": 1.2917180133656203e-05, + "loss": 1.5378, + "step": 70455 + }, + { + "epoch": 0.85, + "grad_norm": 6.758039836626348, + "learning_rate": 1.2916621359835317e-05, + "loss": 1.4059, + "step": 70458 + }, + { + "epoch": 0.85, + "grad_norm": 15.482399313219483, + "learning_rate": 1.2916062576061057e-05, + "loss": 1.6547, + "step": 70461 + }, + { + "epoch": 0.85, + "grad_norm": 18.928765535447813, + "learning_rate": 1.2915503782335326e-05, + "loss": 1.3664, + "step": 70464 + }, + { + "epoch": 0.85, + "grad_norm": 8.3766778038515, + "learning_rate": 1.2914944978660027e-05, + "loss": 1.4022, + "step": 70467 + }, + { + "epoch": 0.85, + "grad_norm": 5.772753998266491, + "learning_rate": 1.2914386165037071e-05, + "loss": 0.8919, + "step": 70470 + }, + { + "epoch": 0.85, + "grad_norm": 7.556070961372706, + "learning_rate": 1.2913827341468368e-05, + "loss": 1.5789, + "step": 70473 + }, + { + "epoch": 0.85, + "grad_norm": 2.3708302196029853, + "learning_rate": 1.291326850795582e-05, + "loss": 1.3642, + "step": 70476 + }, + { + "epoch": 0.85, + "grad_norm": 5.955838573445295, + "learning_rate": 1.2912709664501337e-05, + "loss": 1.5759, + "step": 70479 + }, + { + "epoch": 0.85, + "grad_norm": 10.268362981020816, + "learning_rate": 1.2912150811106825e-05, + "loss": 1.1414, + "step": 70482 + }, + { + "epoch": 0.85, + "grad_norm": 44.47348475652096, + "learning_rate": 1.2911591947774191e-05, + "loss": 1.2047, + "step": 70485 + }, + { + "epoch": 0.85, + "grad_norm": 14.4888111076672, + "learning_rate": 1.291103307450534e-05, + "loss": 1.2133, + "step": 70488 + }, + { + "epoch": 0.85, + "grad_norm": 20.89187768379056, + "learning_rate": 1.2910474191302184e-05, + "loss": 1.4464, + "step": 70491 + }, + { + "epoch": 0.85, + "grad_norm": 8.99570922687112, + "learning_rate": 1.2909915298166627e-05, + "loss": 1.6803, + "step": 70494 + }, + { + "epoch": 0.85, + "grad_norm": 19.62469407935233, + "learning_rate": 1.2909356395100578e-05, + "loss": 1.2079, + "step": 70497 + }, + { + "epoch": 0.85, + "grad_norm": 89.80123879581494, + "learning_rate": 1.2908797482105943e-05, + "loss": 1.2424, + "step": 70500 + }, + { + "epoch": 0.85, + "grad_norm": 18.924022881761225, + "learning_rate": 1.2908238559184631e-05, + "loss": 1.475, + "step": 70503 + }, + { + "epoch": 0.85, + "grad_norm": 12.914999124486057, + "learning_rate": 1.2907679626338548e-05, + "loss": 1.6676, + "step": 70506 + }, + { + "epoch": 0.85, + "grad_norm": 6.025365000237158, + "learning_rate": 1.2907120683569604e-05, + "loss": 1.3538, + "step": 70509 + }, + { + "epoch": 0.85, + "grad_norm": 13.583123670446236, + "learning_rate": 1.2906561730879698e-05, + "loss": 1.0573, + "step": 70512 + }, + { + "epoch": 0.85, + "grad_norm": 7.551603637142646, + "learning_rate": 1.2906002768270749e-05, + "loss": 1.0494, + "step": 70515 + }, + { + "epoch": 0.85, + "grad_norm": 6.4174549807690555, + "learning_rate": 1.290544379574466e-05, + "loss": 1.2168, + "step": 70518 + }, + { + "epoch": 0.85, + "grad_norm": 6.857903270136695, + "learning_rate": 1.2904884813303332e-05, + "loss": 1.0801, + "step": 70521 + }, + { + "epoch": 0.85, + "grad_norm": 4.965897412148585, + "learning_rate": 1.2904325820948682e-05, + "loss": 1.4211, + "step": 70524 + }, + { + "epoch": 0.85, + "grad_norm": 8.559166999396842, + "learning_rate": 1.2903766818682614e-05, + "loss": 1.5458, + "step": 70527 + }, + { + "epoch": 0.85, + "grad_norm": 7.711363246120876, + "learning_rate": 1.2903207806507037e-05, + "loss": 1.3275, + "step": 70530 + }, + { + "epoch": 0.85, + "grad_norm": 4.909633840621802, + "learning_rate": 1.2902648784423855e-05, + "loss": 1.0807, + "step": 70533 + }, + { + "epoch": 0.85, + "grad_norm": 32.96858961047493, + "learning_rate": 1.290208975243498e-05, + "loss": 1.259, + "step": 70536 + }, + { + "epoch": 0.85, + "grad_norm": 17.532884379083377, + "learning_rate": 1.2901530710542318e-05, + "loss": 1.5432, + "step": 70539 + }, + { + "epoch": 0.85, + "grad_norm": 21.70800510104948, + "learning_rate": 1.2900971658747779e-05, + "loss": 1.0181, + "step": 70542 + }, + { + "epoch": 0.85, + "grad_norm": 11.723059681781166, + "learning_rate": 1.2900412597053262e-05, + "loss": 1.3694, + "step": 70545 + }, + { + "epoch": 0.85, + "grad_norm": 5.283146500488186, + "learning_rate": 1.2899853525460687e-05, + "loss": 1.6277, + "step": 70548 + }, + { + "epoch": 0.85, + "grad_norm": 17.8625561258839, + "learning_rate": 1.2899294443971954e-05, + "loss": 1.1802, + "step": 70551 + }, + { + "epoch": 0.85, + "grad_norm": 6.076203320698928, + "learning_rate": 1.2898735352588974e-05, + "loss": 1.4666, + "step": 70554 + }, + { + "epoch": 0.85, + "grad_norm": 7.035839629504321, + "learning_rate": 1.2898176251313656e-05, + "loss": 1.1624, + "step": 70557 + }, + { + "epoch": 0.85, + "grad_norm": 9.54554729043714, + "learning_rate": 1.2897617140147906e-05, + "loss": 1.4589, + "step": 70560 + }, + { + "epoch": 0.85, + "grad_norm": 13.75047076556674, + "learning_rate": 1.2897058019093631e-05, + "loss": 1.1736, + "step": 70563 + }, + { + "epoch": 0.85, + "grad_norm": 9.784964784681083, + "learning_rate": 1.2896498888152742e-05, + "loss": 1.3313, + "step": 70566 + }, + { + "epoch": 0.85, + "grad_norm": 16.00830846144236, + "learning_rate": 1.2895939747327144e-05, + "loss": 1.2925, + "step": 70569 + }, + { + "epoch": 0.85, + "grad_norm": 12.796164568274975, + "learning_rate": 1.289538059661875e-05, + "loss": 1.3188, + "step": 70572 + }, + { + "epoch": 0.85, + "grad_norm": 14.3314891066257, + "learning_rate": 1.2894821436029459e-05, + "loss": 1.292, + "step": 70575 + }, + { + "epoch": 0.85, + "grad_norm": 13.756915767853156, + "learning_rate": 1.289426226556119e-05, + "loss": 1.2581, + "step": 70578 + }, + { + "epoch": 0.85, + "grad_norm": 4.5137129854578735, + "learning_rate": 1.2893703085215844e-05, + "loss": 1.3451, + "step": 70581 + }, + { + "epoch": 0.85, + "grad_norm": 9.319589781131937, + "learning_rate": 1.2893143894995335e-05, + "loss": 1.452, + "step": 70584 + }, + { + "epoch": 0.85, + "grad_norm": 44.58344465831587, + "learning_rate": 1.2892584694901565e-05, + "loss": 1.3191, + "step": 70587 + }, + { + "epoch": 0.85, + "grad_norm": 6.626267837935127, + "learning_rate": 1.2892025484936443e-05, + "loss": 0.9152, + "step": 70590 + }, + { + "epoch": 0.85, + "grad_norm": 20.56551778036955, + "learning_rate": 1.2891466265101884e-05, + "loss": 1.5579, + "step": 70593 + }, + { + "epoch": 0.85, + "grad_norm": 7.7364076656244, + "learning_rate": 1.2890907035399792e-05, + "loss": 1.1241, + "step": 70596 + }, + { + "epoch": 0.85, + "grad_norm": 22.37652573018472, + "learning_rate": 1.2890347795832072e-05, + "loss": 1.3796, + "step": 70599 + }, + { + "epoch": 0.85, + "grad_norm": 6.877726473797557, + "learning_rate": 1.288978854640064e-05, + "loss": 1.2271, + "step": 70602 + }, + { + "epoch": 0.85, + "grad_norm": 22.378259793606723, + "learning_rate": 1.2889229287107399e-05, + "loss": 1.3703, + "step": 70605 + }, + { + "epoch": 0.85, + "grad_norm": 23.811428178381764, + "learning_rate": 1.288867001795426e-05, + "loss": 1.5979, + "step": 70608 + }, + { + "epoch": 0.85, + "grad_norm": 22.573269256941874, + "learning_rate": 1.2888110738943129e-05, + "loss": 1.402, + "step": 70611 + }, + { + "epoch": 0.85, + "grad_norm": 9.11749644625452, + "learning_rate": 1.2887551450075916e-05, + "loss": 1.3325, + "step": 70614 + }, + { + "epoch": 0.85, + "grad_norm": 6.253390621775611, + "learning_rate": 1.2886992151354534e-05, + "loss": 1.5089, + "step": 70617 + }, + { + "epoch": 0.85, + "grad_norm": 5.37844614326318, + "learning_rate": 1.2886432842780883e-05, + "loss": 1.4263, + "step": 70620 + }, + { + "epoch": 0.85, + "grad_norm": 27.530356632133305, + "learning_rate": 1.288587352435688e-05, + "loss": 1.4547, + "step": 70623 + }, + { + "epoch": 0.85, + "grad_norm": 12.295443322825557, + "learning_rate": 1.288531419608443e-05, + "loss": 1.5831, + "step": 70626 + }, + { + "epoch": 0.85, + "grad_norm": 9.66268052428927, + "learning_rate": 1.2884754857965435e-05, + "loss": 1.2719, + "step": 70629 + }, + { + "epoch": 0.85, + "grad_norm": 3.2257320360698887, + "learning_rate": 1.2884195510001818e-05, + "loss": 1.6952, + "step": 70632 + }, + { + "epoch": 0.85, + "grad_norm": 4.805544017718555, + "learning_rate": 1.288363615219548e-05, + "loss": 1.1462, + "step": 70635 + }, + { + "epoch": 0.85, + "grad_norm": 7.38672145896153, + "learning_rate": 1.288307678454833e-05, + "loss": 1.2351, + "step": 70638 + }, + { + "epoch": 0.85, + "grad_norm": 7.639029684109042, + "learning_rate": 1.2882517407062273e-05, + "loss": 1.2194, + "step": 70641 + }, + { + "epoch": 0.85, + "grad_norm": 21.14381978838369, + "learning_rate": 1.2881958019739228e-05, + "loss": 1.3233, + "step": 70644 + }, + { + "epoch": 0.85, + "grad_norm": 14.587128535617575, + "learning_rate": 1.2881398622581096e-05, + "loss": 1.3401, + "step": 70647 + }, + { + "epoch": 0.85, + "grad_norm": 8.157257420347564, + "learning_rate": 1.288083921558979e-05, + "loss": 1.0331, + "step": 70650 + }, + { + "epoch": 0.85, + "grad_norm": 4.639211094959049, + "learning_rate": 1.2880279798767216e-05, + "loss": 1.7265, + "step": 70653 + }, + { + "epoch": 0.85, + "grad_norm": 20.88579264442389, + "learning_rate": 1.2879720372115285e-05, + "loss": 1.2606, + "step": 70656 + }, + { + "epoch": 0.85, + "grad_norm": 21.72919113479426, + "learning_rate": 1.2879160935635906e-05, + "loss": 1.3492, + "step": 70659 + }, + { + "epoch": 0.85, + "grad_norm": 6.242563330994965, + "learning_rate": 1.2878601489330986e-05, + "loss": 1.3681, + "step": 70662 + }, + { + "epoch": 0.85, + "grad_norm": 39.926018406192775, + "learning_rate": 1.2878042033202439e-05, + "loss": 1.4068, + "step": 70665 + }, + { + "epoch": 0.85, + "grad_norm": 44.37948173505245, + "learning_rate": 1.2877482567252168e-05, + "loss": 1.123, + "step": 70668 + }, + { + "epoch": 0.85, + "grad_norm": 21.34054851369706, + "learning_rate": 1.2876923091482087e-05, + "loss": 1.5139, + "step": 70671 + }, + { + "epoch": 0.85, + "grad_norm": 11.578633154703025, + "learning_rate": 1.2876363605894102e-05, + "loss": 1.2484, + "step": 70674 + }, + { + "epoch": 0.85, + "grad_norm": 9.713929347456538, + "learning_rate": 1.2875804110490126e-05, + "loss": 1.2466, + "step": 70677 + }, + { + "epoch": 0.85, + "grad_norm": 2.913186956783695, + "learning_rate": 1.2875244605272069e-05, + "loss": 1.3884, + "step": 70680 + }, + { + "epoch": 0.85, + "grad_norm": 9.644949213146047, + "learning_rate": 1.2874685090241833e-05, + "loss": 1.6244, + "step": 70683 + }, + { + "epoch": 0.85, + "grad_norm": 18.133908930072366, + "learning_rate": 1.2874125565401334e-05, + "loss": 1.6514, + "step": 70686 + }, + { + "epoch": 0.85, + "grad_norm": 3.8522141583639384, + "learning_rate": 1.287356603075248e-05, + "loss": 1.3801, + "step": 70689 + }, + { + "epoch": 0.85, + "grad_norm": 29.966125168055207, + "learning_rate": 1.2873006486297182e-05, + "loss": 1.1469, + "step": 70692 + }, + { + "epoch": 0.85, + "grad_norm": 5.980027635773705, + "learning_rate": 1.2872446932037343e-05, + "loss": 1.1425, + "step": 70695 + }, + { + "epoch": 0.85, + "grad_norm": 89.1847712321215, + "learning_rate": 1.287188736797488e-05, + "loss": 1.6462, + "step": 70698 + }, + { + "epoch": 0.85, + "grad_norm": 17.40817316372151, + "learning_rate": 1.2871327794111701e-05, + "loss": 1.4636, + "step": 70701 + }, + { + "epoch": 0.85, + "grad_norm": 7.970381267975396, + "learning_rate": 1.2870768210449713e-05, + "loss": 1.066, + "step": 70704 + }, + { + "epoch": 0.85, + "grad_norm": 7.936221205791487, + "learning_rate": 1.2870208616990828e-05, + "loss": 1.1864, + "step": 70707 + }, + { + "epoch": 0.85, + "grad_norm": 31.367552645910386, + "learning_rate": 1.2869649013736954e-05, + "loss": 1.4075, + "step": 70710 + }, + { + "epoch": 0.85, + "grad_norm": 16.721427391374867, + "learning_rate": 1.2869089400690003e-05, + "loss": 1.3689, + "step": 70713 + }, + { + "epoch": 0.85, + "grad_norm": 10.121747898205998, + "learning_rate": 1.2868529777851878e-05, + "loss": 1.2954, + "step": 70716 + }, + { + "epoch": 0.85, + "grad_norm": 15.918579096685677, + "learning_rate": 1.28679701452245e-05, + "loss": 1.5099, + "step": 70719 + }, + { + "epoch": 0.85, + "grad_norm": 34.213476696255, + "learning_rate": 1.2867410502809771e-05, + "loss": 1.0191, + "step": 70722 + }, + { + "epoch": 0.85, + "grad_norm": 7.667626653336615, + "learning_rate": 1.2866850850609605e-05, + "loss": 1.0489, + "step": 70725 + }, + { + "epoch": 0.85, + "grad_norm": 12.826013154358618, + "learning_rate": 1.2866291188625905e-05, + "loss": 1.2262, + "step": 70728 + }, + { + "epoch": 0.85, + "grad_norm": 9.927365352273425, + "learning_rate": 1.2865731516860591e-05, + "loss": 1.4908, + "step": 70731 + }, + { + "epoch": 0.85, + "grad_norm": 18.479610178398108, + "learning_rate": 1.2865171835315563e-05, + "loss": 1.2603, + "step": 70734 + }, + { + "epoch": 0.85, + "grad_norm": 18.957198149878124, + "learning_rate": 1.2864612143992736e-05, + "loss": 1.1804, + "step": 70737 + }, + { + "epoch": 0.85, + "grad_norm": 9.055638116436565, + "learning_rate": 1.2864052442894024e-05, + "loss": 1.5588, + "step": 70740 + }, + { + "epoch": 0.85, + "grad_norm": 22.818299348811358, + "learning_rate": 1.286349273202133e-05, + "loss": 1.5259, + "step": 70743 + }, + { + "epoch": 0.85, + "grad_norm": 12.215432298391638, + "learning_rate": 1.2862933011376566e-05, + "loss": 1.5447, + "step": 70746 + }, + { + "epoch": 0.85, + "grad_norm": 6.304847499507832, + "learning_rate": 1.2862373280961642e-05, + "loss": 1.3077, + "step": 70749 + }, + { + "epoch": 0.85, + "grad_norm": 10.042162218599389, + "learning_rate": 1.2861813540778471e-05, + "loss": 1.3652, + "step": 70752 + }, + { + "epoch": 0.85, + "grad_norm": 14.62565532116405, + "learning_rate": 1.286125379082896e-05, + "loss": 1.1682, + "step": 70755 + }, + { + "epoch": 0.85, + "grad_norm": 9.962251203432565, + "learning_rate": 1.2860694031115022e-05, + "loss": 1.7379, + "step": 70758 + }, + { + "epoch": 0.85, + "grad_norm": 14.708058461219487, + "learning_rate": 1.2860134261638564e-05, + "loss": 1.5268, + "step": 70761 + }, + { + "epoch": 0.85, + "grad_norm": 12.286589098527816, + "learning_rate": 1.2859574482401498e-05, + "loss": 1.6276, + "step": 70764 + }, + { + "epoch": 0.85, + "grad_norm": 20.838277443520255, + "learning_rate": 1.2859014693405736e-05, + "loss": 1.6541, + "step": 70767 + }, + { + "epoch": 0.85, + "grad_norm": 12.931523865778528, + "learning_rate": 1.2858454894653185e-05, + "loss": 1.0391, + "step": 70770 + }, + { + "epoch": 0.85, + "grad_norm": 5.785281562748029, + "learning_rate": 1.2857895086145757e-05, + "loss": 1.236, + "step": 70773 + }, + { + "epoch": 0.85, + "grad_norm": 23.75524040703169, + "learning_rate": 1.2857335267885364e-05, + "loss": 1.1371, + "step": 70776 + }, + { + "epoch": 0.85, + "grad_norm": 12.959591089950775, + "learning_rate": 1.2856775439873913e-05, + "loss": 1.0376, + "step": 70779 + }, + { + "epoch": 0.85, + "grad_norm": 11.712026553596276, + "learning_rate": 1.2856215602113318e-05, + "loss": 1.3528, + "step": 70782 + }, + { + "epoch": 0.85, + "grad_norm": 3.662569215687594, + "learning_rate": 1.285565575460549e-05, + "loss": 1.0236, + "step": 70785 + }, + { + "epoch": 0.85, + "grad_norm": 42.47401429317657, + "learning_rate": 1.2855095897352335e-05, + "loss": 1.5789, + "step": 70788 + }, + { + "epoch": 0.85, + "grad_norm": 16.50659198778306, + "learning_rate": 1.2854536030355762e-05, + "loss": 1.3718, + "step": 70791 + }, + { + "epoch": 0.85, + "grad_norm": 25.07423118485598, + "learning_rate": 1.285397615361769e-05, + "loss": 1.1894, + "step": 70794 + }, + { + "epoch": 0.85, + "grad_norm": 38.997215187172905, + "learning_rate": 1.2853416267140026e-05, + "loss": 1.4965, + "step": 70797 + }, + { + "epoch": 0.85, + "grad_norm": 10.485356969092306, + "learning_rate": 1.285285637092468e-05, + "loss": 1.1222, + "step": 70800 + }, + { + "epoch": 0.85, + "grad_norm": 4.992433356895846, + "learning_rate": 1.2852296464973562e-05, + "loss": 1.263, + "step": 70803 + }, + { + "epoch": 0.85, + "grad_norm": 26.69050185578846, + "learning_rate": 1.2851736549288585e-05, + "loss": 1.2699, + "step": 70806 + }, + { + "epoch": 0.85, + "grad_norm": 18.57614581683104, + "learning_rate": 1.2851176623871656e-05, + "loss": 1.5032, + "step": 70809 + }, + { + "epoch": 0.85, + "grad_norm": 18.489807517403012, + "learning_rate": 1.2850616688724689e-05, + "loss": 1.6894, + "step": 70812 + }, + { + "epoch": 0.85, + "grad_norm": 17.150849439085604, + "learning_rate": 1.2850056743849593e-05, + "loss": 1.4038, + "step": 70815 + }, + { + "epoch": 0.85, + "grad_norm": 3.7117983780185666, + "learning_rate": 1.2849496789248282e-05, + "loss": 1.6119, + "step": 70818 + }, + { + "epoch": 0.85, + "grad_norm": 14.704189510729734, + "learning_rate": 1.2848936824922665e-05, + "loss": 1.2799, + "step": 70821 + }, + { + "epoch": 0.85, + "grad_norm": 6.912168320090236, + "learning_rate": 1.2848376850874648e-05, + "loss": 1.3655, + "step": 70824 + }, + { + "epoch": 0.85, + "grad_norm": 7.447261025776363, + "learning_rate": 1.2847816867106154e-05, + "loss": 1.0519, + "step": 70827 + }, + { + "epoch": 0.85, + "grad_norm": 4.948103711748057, + "learning_rate": 1.2847256873619083e-05, + "loss": 1.1315, + "step": 70830 + }, + { + "epoch": 0.85, + "grad_norm": 3.3374857073002366, + "learning_rate": 1.2846696870415349e-05, + "loss": 1.1661, + "step": 70833 + }, + { + "epoch": 0.85, + "grad_norm": 78.54153449983029, + "learning_rate": 1.2846136857496864e-05, + "loss": 1.2072, + "step": 70836 + }, + { + "epoch": 0.85, + "grad_norm": 12.365907699927572, + "learning_rate": 1.2845576834865543e-05, + "loss": 1.259, + "step": 70839 + }, + { + "epoch": 0.85, + "grad_norm": 24.118235959414356, + "learning_rate": 1.2845016802523289e-05, + "loss": 1.2823, + "step": 70842 + }, + { + "epoch": 0.85, + "grad_norm": 9.72415223944401, + "learning_rate": 1.2844456760472018e-05, + "loss": 1.2615, + "step": 70845 + }, + { + "epoch": 0.85, + "grad_norm": 13.60337668703658, + "learning_rate": 1.2843896708713644e-05, + "loss": 1.3598, + "step": 70848 + }, + { + "epoch": 0.85, + "grad_norm": 16.3921383788842, + "learning_rate": 1.284333664725007e-05, + "loss": 1.2207, + "step": 70851 + }, + { + "epoch": 0.85, + "grad_norm": 5.583775689287025, + "learning_rate": 1.2842776576083216e-05, + "loss": 1.2441, + "step": 70854 + }, + { + "epoch": 0.85, + "grad_norm": 19.46073514736116, + "learning_rate": 1.2842216495214984e-05, + "loss": 1.4189, + "step": 70857 + }, + { + "epoch": 0.85, + "grad_norm": 14.87487608874483, + "learning_rate": 1.2841656404647301e-05, + "loss": 1.3156, + "step": 70860 + }, + { + "epoch": 0.85, + "grad_norm": 25.34973787440228, + "learning_rate": 1.284109630438206e-05, + "loss": 1.1308, + "step": 70863 + }, + { + "epoch": 0.85, + "grad_norm": 6.314508958039272, + "learning_rate": 1.2840536194421182e-05, + "loss": 1.3617, + "step": 70866 + }, + { + "epoch": 0.85, + "grad_norm": 16.233650462936982, + "learning_rate": 1.283997607476658e-05, + "loss": 1.4672, + "step": 70869 + }, + { + "epoch": 0.85, + "grad_norm": 9.766525864123187, + "learning_rate": 1.2839415945420158e-05, + "loss": 1.4545, + "step": 70872 + }, + { + "epoch": 0.85, + "grad_norm": 5.567114823831528, + "learning_rate": 1.2838855806383836e-05, + "loss": 1.501, + "step": 70875 + }, + { + "epoch": 0.85, + "grad_norm": 14.674134597973978, + "learning_rate": 1.2838295657659518e-05, + "loss": 1.5634, + "step": 70878 + }, + { + "epoch": 0.85, + "grad_norm": 7.3394548373034105, + "learning_rate": 1.2837735499249125e-05, + "loss": 1.2134, + "step": 70881 + }, + { + "epoch": 0.85, + "grad_norm": 22.968969175270132, + "learning_rate": 1.2837175331154559e-05, + "loss": 1.0922, + "step": 70884 + }, + { + "epoch": 0.85, + "grad_norm": 7.101783122316604, + "learning_rate": 1.2836615153377735e-05, + "loss": 1.3682, + "step": 70887 + }, + { + "epoch": 0.85, + "grad_norm": 13.509774042868004, + "learning_rate": 1.2836054965920569e-05, + "loss": 1.8146, + "step": 70890 + }, + { + "epoch": 0.85, + "grad_norm": 10.69147865511284, + "learning_rate": 1.2835494768784966e-05, + "loss": 1.4532, + "step": 70893 + }, + { + "epoch": 0.85, + "grad_norm": 8.896955691661152, + "learning_rate": 1.2834934561972841e-05, + "loss": 1.1882, + "step": 70896 + }, + { + "epoch": 0.85, + "grad_norm": 22.556283408886898, + "learning_rate": 1.2834374345486102e-05, + "loss": 1.4231, + "step": 70899 + }, + { + "epoch": 0.85, + "grad_norm": 34.39668932320179, + "learning_rate": 1.283381411932667e-05, + "loss": 1.3992, + "step": 70902 + }, + { + "epoch": 0.85, + "grad_norm": 24.22721244562618, + "learning_rate": 1.2833253883496448e-05, + "loss": 1.4477, + "step": 70905 + }, + { + "epoch": 0.85, + "grad_norm": 3.5946361746451734, + "learning_rate": 1.2832693637997353e-05, + "loss": 0.9904, + "step": 70908 + }, + { + "epoch": 0.85, + "grad_norm": 5.8857939912395505, + "learning_rate": 1.2832133382831291e-05, + "loss": 1.3238, + "step": 70911 + }, + { + "epoch": 0.85, + "grad_norm": 14.22647519502113, + "learning_rate": 1.2831573118000184e-05, + "loss": 1.1936, + "step": 70914 + }, + { + "epoch": 0.85, + "grad_norm": 6.553313823966849, + "learning_rate": 1.2831012843505932e-05, + "loss": 1.2736, + "step": 70917 + }, + { + "epoch": 0.85, + "grad_norm": 7.003758907420429, + "learning_rate": 1.2830452559350457e-05, + "loss": 1.4945, + "step": 70920 + }, + { + "epoch": 0.85, + "grad_norm": 15.016135375215834, + "learning_rate": 1.2829892265535666e-05, + "loss": 1.0279, + "step": 70923 + }, + { + "epoch": 0.85, + "grad_norm": 17.99614680629922, + "learning_rate": 1.2829331962063471e-05, + "loss": 1.6401, + "step": 70926 + }, + { + "epoch": 0.85, + "grad_norm": 6.347279279632619, + "learning_rate": 1.2828771648935785e-05, + "loss": 1.6687, + "step": 70929 + }, + { + "epoch": 0.85, + "grad_norm": 22.896458063354125, + "learning_rate": 1.2828211326154521e-05, + "loss": 1.3044, + "step": 70932 + }, + { + "epoch": 0.85, + "grad_norm": 26.87102092342866, + "learning_rate": 1.2827650993721592e-05, + "loss": 1.3325, + "step": 70935 + }, + { + "epoch": 0.85, + "grad_norm": 8.48429668659073, + "learning_rate": 1.2827090651638905e-05, + "loss": 1.6877, + "step": 70938 + }, + { + "epoch": 0.85, + "grad_norm": 19.296696033627306, + "learning_rate": 1.282653029990838e-05, + "loss": 1.1399, + "step": 70941 + }, + { + "epoch": 0.85, + "grad_norm": 14.892350008900674, + "learning_rate": 1.2825969938531924e-05, + "loss": 1.2798, + "step": 70944 + }, + { + "epoch": 0.85, + "grad_norm": 4.4925762304323085, + "learning_rate": 1.2825409567511448e-05, + "loss": 1.5135, + "step": 70947 + }, + { + "epoch": 0.85, + "grad_norm": 39.96490395292163, + "learning_rate": 1.2824849186848868e-05, + "loss": 1.7442, + "step": 70950 + }, + { + "epoch": 0.85, + "grad_norm": 17.945650221525245, + "learning_rate": 1.2824288796546095e-05, + "loss": 1.1713, + "step": 70953 + }, + { + "epoch": 0.85, + "grad_norm": 9.59301391795924, + "learning_rate": 1.2823728396605045e-05, + "loss": 1.1673, + "step": 70956 + }, + { + "epoch": 0.85, + "grad_norm": 4.200449005670431, + "learning_rate": 1.2823167987027623e-05, + "loss": 0.9582, + "step": 70959 + }, + { + "epoch": 0.85, + "grad_norm": 5.508196446590165, + "learning_rate": 1.2822607567815749e-05, + "loss": 1.2149, + "step": 70962 + }, + { + "epoch": 0.85, + "grad_norm": 10.465810858749897, + "learning_rate": 1.2822047138971327e-05, + "loss": 1.5871, + "step": 70965 + }, + { + "epoch": 0.85, + "grad_norm": 2.9822487738290264, + "learning_rate": 1.2821486700496279e-05, + "loss": 1.3, + "step": 70968 + }, + { + "epoch": 0.85, + "grad_norm": 16.967809907211173, + "learning_rate": 1.2820926252392512e-05, + "loss": 1.6456, + "step": 70971 + }, + { + "epoch": 0.85, + "grad_norm": 7.2719428066166385, + "learning_rate": 1.282036579466194e-05, + "loss": 1.5487, + "step": 70974 + }, + { + "epoch": 0.85, + "grad_norm": 10.048728070414267, + "learning_rate": 1.2819805327306476e-05, + "loss": 1.4425, + "step": 70977 + }, + { + "epoch": 0.85, + "grad_norm": 31.034334725340546, + "learning_rate": 1.281924485032803e-05, + "loss": 1.4661, + "step": 70980 + }, + { + "epoch": 0.85, + "grad_norm": 21.43391541182417, + "learning_rate": 1.2818684363728517e-05, + "loss": 1.0989, + "step": 70983 + }, + { + "epoch": 0.85, + "grad_norm": 17.876404713908236, + "learning_rate": 1.2818123867509853e-05, + "loss": 1.5019, + "step": 70986 + }, + { + "epoch": 0.85, + "grad_norm": 17.925735012500724, + "learning_rate": 1.2817563361673945e-05, + "loss": 1.3394, + "step": 70989 + }, + { + "epoch": 0.85, + "grad_norm": 31.701426597973374, + "learning_rate": 1.2817002846222706e-05, + "loss": 1.3751, + "step": 70992 + }, + { + "epoch": 0.85, + "grad_norm": 20.930116560685235, + "learning_rate": 1.2816442321158054e-05, + "loss": 1.1336, + "step": 70995 + }, + { + "epoch": 0.85, + "grad_norm": 3.3299924497840716, + "learning_rate": 1.28158817864819e-05, + "loss": 1.4198, + "step": 70998 + }, + { + "epoch": 0.85, + "grad_norm": 32.45809979104479, + "learning_rate": 1.2815321242196152e-05, + "loss": 1.4597, + "step": 71001 + }, + { + "epoch": 0.85, + "grad_norm": 21.609763688569934, + "learning_rate": 1.2814760688302727e-05, + "loss": 1.1571, + "step": 71004 + }, + { + "epoch": 0.85, + "grad_norm": 10.36854508647862, + "learning_rate": 1.281420012480354e-05, + "loss": 1.475, + "step": 71007 + }, + { + "epoch": 0.85, + "grad_norm": 5.931518782534843, + "learning_rate": 1.2813639551700501e-05, + "loss": 1.4034, + "step": 71010 + }, + { + "epoch": 0.85, + "grad_norm": 18.929681860649907, + "learning_rate": 1.281307896899552e-05, + "loss": 1.2291, + "step": 71013 + }, + { + "epoch": 0.85, + "grad_norm": 27.114230704979366, + "learning_rate": 1.281251837669052e-05, + "loss": 1.5089, + "step": 71016 + }, + { + "epoch": 0.85, + "grad_norm": 7.832694145875391, + "learning_rate": 1.2811957774787404e-05, + "loss": 1.4494, + "step": 71019 + }, + { + "epoch": 0.85, + "grad_norm": 6.280257400106174, + "learning_rate": 1.281139716328809e-05, + "loss": 1.1402, + "step": 71022 + }, + { + "epoch": 0.85, + "grad_norm": 15.25662361074278, + "learning_rate": 1.281083654219449e-05, + "loss": 1.6979, + "step": 71025 + }, + { + "epoch": 0.85, + "grad_norm": 7.576104102979096, + "learning_rate": 1.2810275911508519e-05, + "loss": 1.2744, + "step": 71028 + }, + { + "epoch": 0.85, + "grad_norm": 21.74741416424957, + "learning_rate": 1.2809715271232086e-05, + "loss": 1.0126, + "step": 71031 + }, + { + "epoch": 0.85, + "grad_norm": 20.75192292484387, + "learning_rate": 1.2809154621367105e-05, + "loss": 1.4569, + "step": 71034 + }, + { + "epoch": 0.85, + "grad_norm": 8.872603796104586, + "learning_rate": 1.2808593961915494e-05, + "loss": 1.6746, + "step": 71037 + }, + { + "epoch": 0.85, + "grad_norm": 9.13560159255158, + "learning_rate": 1.2808033292879165e-05, + "loss": 1.4214, + "step": 71040 + }, + { + "epoch": 0.85, + "grad_norm": 130.1812956172109, + "learning_rate": 1.2807472614260029e-05, + "loss": 1.3684, + "step": 71043 + }, + { + "epoch": 0.85, + "grad_norm": 10.4253942931129, + "learning_rate": 1.2806911926059997e-05, + "loss": 1.2359, + "step": 71046 + }, + { + "epoch": 0.85, + "grad_norm": 11.883734657373845, + "learning_rate": 1.280635122828099e-05, + "loss": 1.4133, + "step": 71049 + }, + { + "epoch": 0.85, + "grad_norm": 15.81577076740303, + "learning_rate": 1.2805790520924915e-05, + "loss": 1.1692, + "step": 71052 + }, + { + "epoch": 0.85, + "grad_norm": 21.360774555038198, + "learning_rate": 1.2805229803993689e-05, + "loss": 1.3855, + "step": 71055 + }, + { + "epoch": 0.85, + "grad_norm": 21.818342303986878, + "learning_rate": 1.280466907748922e-05, + "loss": 1.4953, + "step": 71058 + }, + { + "epoch": 0.85, + "grad_norm": 20.762291717270287, + "learning_rate": 1.280410834141343e-05, + "loss": 1.2548, + "step": 71061 + }, + { + "epoch": 0.85, + "grad_norm": 4.85684031027468, + "learning_rate": 1.2803547595768227e-05, + "loss": 1.2791, + "step": 71064 + }, + { + "epoch": 0.85, + "grad_norm": 7.153097825335757, + "learning_rate": 1.2802986840555525e-05, + "loss": 1.2357, + "step": 71067 + }, + { + "epoch": 0.85, + "grad_norm": 17.996074963888166, + "learning_rate": 1.2802426075777239e-05, + "loss": 1.3206, + "step": 71070 + }, + { + "epoch": 0.85, + "grad_norm": 26.840243069440152, + "learning_rate": 1.2801865301435283e-05, + "loss": 1.3785, + "step": 71073 + }, + { + "epoch": 0.85, + "grad_norm": 4.48960419747657, + "learning_rate": 1.2801304517531569e-05, + "loss": 1.1786, + "step": 71076 + }, + { + "epoch": 0.85, + "grad_norm": 21.07057953368005, + "learning_rate": 1.280074372406801e-05, + "loss": 1.4892, + "step": 71079 + }, + { + "epoch": 0.85, + "grad_norm": 3.6508940856407714, + "learning_rate": 1.2800182921046524e-05, + "loss": 1.0862, + "step": 71082 + }, + { + "epoch": 0.85, + "grad_norm": 7.8507925928282765, + "learning_rate": 1.2799622108469024e-05, + "loss": 1.3628, + "step": 71085 + }, + { + "epoch": 0.85, + "grad_norm": 17.0355968024603, + "learning_rate": 1.2799061286337417e-05, + "loss": 1.1104, + "step": 71088 + }, + { + "epoch": 0.85, + "grad_norm": 17.69448866028389, + "learning_rate": 1.2798500454653626e-05, + "loss": 1.3763, + "step": 71091 + }, + { + "epoch": 0.85, + "grad_norm": 17.012740566797323, + "learning_rate": 1.2797939613419561e-05, + "loss": 1.4923, + "step": 71094 + }, + { + "epoch": 0.85, + "grad_norm": 60.406636407101956, + "learning_rate": 1.2797378762637136e-05, + "loss": 1.798, + "step": 71097 + }, + { + "epoch": 0.85, + "grad_norm": 11.838020067669246, + "learning_rate": 1.2796817902308263e-05, + "loss": 1.3571, + "step": 71100 + }, + { + "epoch": 0.85, + "grad_norm": 7.888116010738586, + "learning_rate": 1.2796257032434857e-05, + "loss": 1.3923, + "step": 71103 + }, + { + "epoch": 0.86, + "grad_norm": 15.198080970015626, + "learning_rate": 1.2795696153018837e-05, + "loss": 1.4936, + "step": 71106 + }, + { + "epoch": 0.86, + "grad_norm": 13.631764610274539, + "learning_rate": 1.2795135264062111e-05, + "loss": 1.4145, + "step": 71109 + }, + { + "epoch": 0.86, + "grad_norm": 18.922181600264548, + "learning_rate": 1.2794574365566593e-05, + "loss": 1.3173, + "step": 71112 + }, + { + "epoch": 0.86, + "grad_norm": 5.391005968155792, + "learning_rate": 1.2794013457534201e-05, + "loss": 1.4677, + "step": 71115 + }, + { + "epoch": 0.86, + "grad_norm": 13.131491756301692, + "learning_rate": 1.2793452539966849e-05, + "loss": 1.792, + "step": 71118 + }, + { + "epoch": 0.86, + "grad_norm": 7.412028143478481, + "learning_rate": 1.2792891612866447e-05, + "loss": 1.2215, + "step": 71121 + }, + { + "epoch": 0.86, + "grad_norm": 9.080210557455953, + "learning_rate": 1.2792330676234915e-05, + "loss": 0.9062, + "step": 71124 + }, + { + "epoch": 0.86, + "grad_norm": 11.807293835482742, + "learning_rate": 1.2791769730074162e-05, + "loss": 1.0022, + "step": 71127 + }, + { + "epoch": 0.86, + "grad_norm": 15.225682121096854, + "learning_rate": 1.2791208774386105e-05, + "loss": 1.5953, + "step": 71130 + }, + { + "epoch": 0.86, + "grad_norm": 9.670315656976893, + "learning_rate": 1.2790647809172656e-05, + "loss": 1.2461, + "step": 71133 + }, + { + "epoch": 0.86, + "grad_norm": 2.6314009268987832, + "learning_rate": 1.2790086834435736e-05, + "loss": 1.3594, + "step": 71136 + }, + { + "epoch": 0.86, + "grad_norm": 12.337833907482906, + "learning_rate": 1.2789525850177249e-05, + "loss": 1.0568, + "step": 71139 + }, + { + "epoch": 0.86, + "grad_norm": 11.377300554088464, + "learning_rate": 1.2788964856399118e-05, + "loss": 1.4246, + "step": 71142 + }, + { + "epoch": 0.86, + "grad_norm": 15.7396691289355, + "learning_rate": 1.2788403853103252e-05, + "loss": 1.9743, + "step": 71145 + }, + { + "epoch": 0.86, + "grad_norm": 21.51702564579214, + "learning_rate": 1.2787842840291573e-05, + "loss": 1.2481, + "step": 71148 + }, + { + "epoch": 0.86, + "grad_norm": 13.06952653738905, + "learning_rate": 1.2787281817965989e-05, + "loss": 1.1679, + "step": 71151 + }, + { + "epoch": 0.86, + "grad_norm": 8.862533951874022, + "learning_rate": 1.2786720786128412e-05, + "loss": 0.9121, + "step": 71154 + }, + { + "epoch": 0.86, + "grad_norm": 18.31598785171322, + "learning_rate": 1.2786159744780764e-05, + "loss": 1.1736, + "step": 71157 + }, + { + "epoch": 0.86, + "grad_norm": 9.429715266681901, + "learning_rate": 1.2785598693924959e-05, + "loss": 1.0231, + "step": 71160 + }, + { + "epoch": 0.86, + "grad_norm": 14.314122302456132, + "learning_rate": 1.2785037633562904e-05, + "loss": 1.3276, + "step": 71163 + }, + { + "epoch": 0.86, + "grad_norm": 7.011832880030671, + "learning_rate": 1.278447656369652e-05, + "loss": 1.3831, + "step": 71166 + }, + { + "epoch": 0.86, + "grad_norm": 18.2648360631901, + "learning_rate": 1.2783915484327723e-05, + "loss": 1.0433, + "step": 71169 + }, + { + "epoch": 0.86, + "grad_norm": 29.110073449152722, + "learning_rate": 1.2783354395458422e-05, + "loss": 1.1671, + "step": 71172 + }, + { + "epoch": 0.86, + "grad_norm": 2.374492636224086, + "learning_rate": 1.2782793297090536e-05, + "loss": 1.418, + "step": 71175 + }, + { + "epoch": 0.86, + "grad_norm": 20.180906930176963, + "learning_rate": 1.278223218922598e-05, + "loss": 1.4438, + "step": 71178 + }, + { + "epoch": 0.86, + "grad_norm": 4.066166930339938, + "learning_rate": 1.2781671071866669e-05, + "loss": 1.0774, + "step": 71181 + }, + { + "epoch": 0.86, + "grad_norm": 20.270020145361517, + "learning_rate": 1.2781109945014513e-05, + "loss": 1.5542, + "step": 71184 + }, + { + "epoch": 0.86, + "grad_norm": 14.545777449886256, + "learning_rate": 1.2780548808671432e-05, + "loss": 1.7666, + "step": 71187 + }, + { + "epoch": 0.86, + "grad_norm": 9.581185808828563, + "learning_rate": 1.277998766283934e-05, + "loss": 1.4838, + "step": 71190 + }, + { + "epoch": 0.86, + "grad_norm": 23.484171220934577, + "learning_rate": 1.2779426507520149e-05, + "loss": 1.3562, + "step": 71193 + }, + { + "epoch": 0.86, + "grad_norm": 18.82544581565676, + "learning_rate": 1.2778865342715778e-05, + "loss": 1.5403, + "step": 71196 + }, + { + "epoch": 0.86, + "grad_norm": 9.81671423244433, + "learning_rate": 1.2778304168428141e-05, + "loss": 1.311, + "step": 71199 + }, + { + "epoch": 0.86, + "grad_norm": 21.52805345161366, + "learning_rate": 1.2777742984659153e-05, + "loss": 1.7099, + "step": 71202 + }, + { + "epoch": 0.86, + "grad_norm": 8.673912197944777, + "learning_rate": 1.277718179141073e-05, + "loss": 1.2552, + "step": 71205 + }, + { + "epoch": 0.86, + "grad_norm": 8.331163070487959, + "learning_rate": 1.2776620588684783e-05, + "loss": 1.474, + "step": 71208 + }, + { + "epoch": 0.86, + "grad_norm": 25.303161809975755, + "learning_rate": 1.2776059376483232e-05, + "loss": 1.2067, + "step": 71211 + }, + { + "epoch": 0.86, + "grad_norm": 10.227381024124307, + "learning_rate": 1.277549815480799e-05, + "loss": 1.6369, + "step": 71214 + }, + { + "epoch": 0.86, + "grad_norm": 20.159179516167644, + "learning_rate": 1.2774936923660972e-05, + "loss": 1.2478, + "step": 71217 + }, + { + "epoch": 0.86, + "grad_norm": 5.361533603227299, + "learning_rate": 1.2774375683044093e-05, + "loss": 1.1249, + "step": 71220 + }, + { + "epoch": 0.86, + "grad_norm": 16.13743160674484, + "learning_rate": 1.2773814432959271e-05, + "loss": 1.5546, + "step": 71223 + }, + { + "epoch": 0.86, + "grad_norm": 20.867021973607844, + "learning_rate": 1.277325317340842e-05, + "loss": 0.9578, + "step": 71226 + }, + { + "epoch": 0.86, + "grad_norm": 10.100539192483895, + "learning_rate": 1.277269190439345e-05, + "loss": 1.1734, + "step": 71229 + }, + { + "epoch": 0.86, + "grad_norm": 8.33153142319635, + "learning_rate": 1.2772130625916285e-05, + "loss": 1.0237, + "step": 71232 + }, + { + "epoch": 0.86, + "grad_norm": 11.007204868876617, + "learning_rate": 1.2771569337978838e-05, + "loss": 1.2648, + "step": 71235 + }, + { + "epoch": 0.86, + "grad_norm": 18.04053486735413, + "learning_rate": 1.2771008040583021e-05, + "loss": 0.8939, + "step": 71238 + }, + { + "epoch": 0.86, + "grad_norm": 5.881683011561373, + "learning_rate": 1.277044673373075e-05, + "loss": 1.2876, + "step": 71241 + }, + { + "epoch": 0.86, + "grad_norm": 19.543250773456194, + "learning_rate": 1.2769885417423949e-05, + "loss": 1.3546, + "step": 71244 + }, + { + "epoch": 0.86, + "grad_norm": 4.50529476244378, + "learning_rate": 1.2769324091664519e-05, + "loss": 1.1711, + "step": 71247 + }, + { + "epoch": 0.86, + "grad_norm": 9.460145942580514, + "learning_rate": 1.2768762756454385e-05, + "loss": 1.2924, + "step": 71250 + }, + { + "epoch": 0.86, + "grad_norm": 17.72344028541226, + "learning_rate": 1.2768201411795462e-05, + "loss": 1.5231, + "step": 71253 + }, + { + "epoch": 0.86, + "grad_norm": 4.903616537413955, + "learning_rate": 1.2767640057689668e-05, + "loss": 1.5475, + "step": 71256 + }, + { + "epoch": 0.86, + "grad_norm": 10.093768889378195, + "learning_rate": 1.2767078694138913e-05, + "loss": 1.5284, + "step": 71259 + }, + { + "epoch": 0.86, + "grad_norm": 29.902523872040526, + "learning_rate": 1.276651732114511e-05, + "loss": 1.3312, + "step": 71262 + }, + { + "epoch": 0.86, + "grad_norm": 3.793093423106478, + "learning_rate": 1.2765955938710189e-05, + "loss": 1.3066, + "step": 71265 + }, + { + "epoch": 0.86, + "grad_norm": 27.772186228060846, + "learning_rate": 1.2765394546836051e-05, + "loss": 1.3646, + "step": 71268 + }, + { + "epoch": 0.86, + "grad_norm": 4.557481733978151, + "learning_rate": 1.2764833145524618e-05, + "loss": 1.3483, + "step": 71271 + }, + { + "epoch": 0.86, + "grad_norm": 28.306324887347692, + "learning_rate": 1.2764271734777807e-05, + "loss": 1.2584, + "step": 71274 + }, + { + "epoch": 0.86, + "grad_norm": 2.779649014265736, + "learning_rate": 1.276371031459753e-05, + "loss": 1.6015, + "step": 71277 + }, + { + "epoch": 0.86, + "grad_norm": 95.51174938165936, + "learning_rate": 1.2763148884985708e-05, + "loss": 1.4265, + "step": 71280 + }, + { + "epoch": 0.86, + "grad_norm": 7.267924082405622, + "learning_rate": 1.276258744594425e-05, + "loss": 1.3073, + "step": 71283 + }, + { + "epoch": 0.86, + "grad_norm": 20.606537193193596, + "learning_rate": 1.2762025997475083e-05, + "loss": 1.5783, + "step": 71286 + }, + { + "epoch": 0.86, + "grad_norm": 9.313386180686873, + "learning_rate": 1.276146453958011e-05, + "loss": 1.1069, + "step": 71289 + }, + { + "epoch": 0.86, + "grad_norm": 5.146034752311246, + "learning_rate": 1.2760903072261255e-05, + "loss": 1.0743, + "step": 71292 + }, + { + "epoch": 0.86, + "grad_norm": 20.809369081439065, + "learning_rate": 1.2760341595520431e-05, + "loss": 1.0634, + "step": 71295 + }, + { + "epoch": 0.86, + "grad_norm": 30.551363819760873, + "learning_rate": 1.2759780109359561e-05, + "loss": 1.513, + "step": 71298 + }, + { + "epoch": 0.86, + "grad_norm": 24.716438153470005, + "learning_rate": 1.2759218613780549e-05, + "loss": 1.1931, + "step": 71301 + }, + { + "epoch": 0.86, + "grad_norm": 11.398533293519261, + "learning_rate": 1.2758657108785317e-05, + "loss": 1.3389, + "step": 71304 + }, + { + "epoch": 0.86, + "grad_norm": 13.300463168822013, + "learning_rate": 1.2758095594375788e-05, + "loss": 1.749, + "step": 71307 + }, + { + "epoch": 0.86, + "grad_norm": 7.493124049511455, + "learning_rate": 1.2757534070553868e-05, + "loss": 1.2231, + "step": 71310 + }, + { + "epoch": 0.86, + "grad_norm": 13.093319155878156, + "learning_rate": 1.275697253732148e-05, + "loss": 1.4479, + "step": 71313 + }, + { + "epoch": 0.86, + "grad_norm": 7.887671846834009, + "learning_rate": 1.2756410994680535e-05, + "loss": 1.4087, + "step": 71316 + }, + { + "epoch": 0.86, + "grad_norm": 16.252696271905933, + "learning_rate": 1.2755849442632955e-05, + "loss": 1.3918, + "step": 71319 + }, + { + "epoch": 0.86, + "grad_norm": 12.354039083598458, + "learning_rate": 1.2755287881180651e-05, + "loss": 1.1848, + "step": 71322 + }, + { + "epoch": 0.86, + "grad_norm": 13.146025896759879, + "learning_rate": 1.2754726310325544e-05, + "loss": 1.704, + "step": 71325 + }, + { + "epoch": 0.86, + "grad_norm": 23.885823629549414, + "learning_rate": 1.2754164730069544e-05, + "loss": 1.0844, + "step": 71328 + }, + { + "epoch": 0.86, + "grad_norm": 8.339848396220999, + "learning_rate": 1.2753603140414579e-05, + "loss": 1.3969, + "step": 71331 + }, + { + "epoch": 0.86, + "grad_norm": 17.322772416941813, + "learning_rate": 1.2753041541362553e-05, + "loss": 1.4324, + "step": 71334 + }, + { + "epoch": 0.86, + "grad_norm": 10.330809025167746, + "learning_rate": 1.275247993291539e-05, + "loss": 1.0747, + "step": 71337 + }, + { + "epoch": 0.86, + "grad_norm": 30.944992611081776, + "learning_rate": 1.2751918315075004e-05, + "loss": 1.3836, + "step": 71340 + }, + { + "epoch": 0.86, + "grad_norm": 86.45051927441918, + "learning_rate": 1.2751356687843312e-05, + "loss": 1.4106, + "step": 71343 + }, + { + "epoch": 0.86, + "grad_norm": 9.193910311926171, + "learning_rate": 1.2750795051222232e-05, + "loss": 1.4116, + "step": 71346 + }, + { + "epoch": 0.86, + "grad_norm": 11.574350797918354, + "learning_rate": 1.2750233405213675e-05, + "loss": 1.6556, + "step": 71349 + }, + { + "epoch": 0.86, + "grad_norm": 11.929579103044086, + "learning_rate": 1.2749671749819567e-05, + "loss": 1.0829, + "step": 71352 + }, + { + "epoch": 0.86, + "grad_norm": 15.55245779320808, + "learning_rate": 1.2749110085041815e-05, + "loss": 1.2317, + "step": 71355 + }, + { + "epoch": 0.86, + "grad_norm": 13.064424425382985, + "learning_rate": 1.2748548410882342e-05, + "loss": 1.3453, + "step": 71358 + }, + { + "epoch": 0.86, + "grad_norm": 23.29030870547917, + "learning_rate": 1.2747986727343067e-05, + "loss": 1.1859, + "step": 71361 + }, + { + "epoch": 0.86, + "grad_norm": 2.562463662910246, + "learning_rate": 1.27474250344259e-05, + "loss": 1.2264, + "step": 71364 + }, + { + "epoch": 0.86, + "grad_norm": 12.989348946227038, + "learning_rate": 1.2746863332132763e-05, + "loss": 1.6679, + "step": 71367 + }, + { + "epoch": 0.86, + "grad_norm": 2.58355661985164, + "learning_rate": 1.2746301620465567e-05, + "loss": 1.3389, + "step": 71370 + }, + { + "epoch": 0.86, + "grad_norm": 6.683864134998426, + "learning_rate": 1.2745739899426237e-05, + "loss": 1.0903, + "step": 71373 + }, + { + "epoch": 0.86, + "grad_norm": 4.323553999172257, + "learning_rate": 1.2745178169016683e-05, + "loss": 1.5697, + "step": 71376 + }, + { + "epoch": 0.86, + "grad_norm": 36.04769455557189, + "learning_rate": 1.2744616429238827e-05, + "loss": 1.1734, + "step": 71379 + }, + { + "epoch": 0.86, + "grad_norm": 8.85060144344514, + "learning_rate": 1.2744054680094583e-05, + "loss": 1.6551, + "step": 71382 + }, + { + "epoch": 0.86, + "grad_norm": 7.873631560066964, + "learning_rate": 1.2743492921585867e-05, + "loss": 1.3246, + "step": 71385 + }, + { + "epoch": 0.86, + "grad_norm": 4.872607461453594, + "learning_rate": 1.2742931153714597e-05, + "loss": 1.3305, + "step": 71388 + }, + { + "epoch": 0.86, + "grad_norm": 25.307643580346546, + "learning_rate": 1.2742369376482693e-05, + "loss": 1.5196, + "step": 71391 + }, + { + "epoch": 0.86, + "grad_norm": 14.003729649138743, + "learning_rate": 1.2741807589892072e-05, + "loss": 1.4635, + "step": 71394 + }, + { + "epoch": 0.86, + "grad_norm": 6.95888892314571, + "learning_rate": 1.2741245793944645e-05, + "loss": 1.0947, + "step": 71397 + }, + { + "epoch": 0.86, + "grad_norm": 14.964846619319097, + "learning_rate": 1.2740683988642338e-05, + "loss": 1.3635, + "step": 71400 + }, + { + "epoch": 0.86, + "grad_norm": 10.157961124890615, + "learning_rate": 1.2740122173987061e-05, + "loss": 1.3099, + "step": 71403 + }, + { + "epoch": 0.86, + "grad_norm": 3.7365420376928915, + "learning_rate": 1.2739560349980735e-05, + "loss": 1.502, + "step": 71406 + }, + { + "epoch": 0.86, + "grad_norm": 25.01191588924219, + "learning_rate": 1.2738998516625277e-05, + "loss": 1.3251, + "step": 71409 + }, + { + "epoch": 0.86, + "grad_norm": 2.280765636109395, + "learning_rate": 1.2738436673922601e-05, + "loss": 1.4471, + "step": 71412 + }, + { + "epoch": 0.86, + "grad_norm": 17.22566421356042, + "learning_rate": 1.273787482187463e-05, + "loss": 1.3666, + "step": 71415 + }, + { + "epoch": 0.86, + "grad_norm": 31.593102333050002, + "learning_rate": 1.2737312960483275e-05, + "loss": 1.4459, + "step": 71418 + }, + { + "epoch": 0.86, + "grad_norm": 7.258971277640354, + "learning_rate": 1.2736751089750458e-05, + "loss": 1.4115, + "step": 71421 + }, + { + "epoch": 0.86, + "grad_norm": 15.985701229235094, + "learning_rate": 1.2736189209678098e-05, + "loss": 1.3863, + "step": 71424 + }, + { + "epoch": 0.86, + "grad_norm": 42.691875309583544, + "learning_rate": 1.2735627320268108e-05, + "loss": 1.626, + "step": 71427 + }, + { + "epoch": 0.86, + "grad_norm": 21.527890057136666, + "learning_rate": 1.2735065421522406e-05, + "loss": 1.4117, + "step": 71430 + }, + { + "epoch": 0.86, + "grad_norm": 12.268455747712794, + "learning_rate": 1.2734503513442914e-05, + "loss": 1.6055, + "step": 71433 + }, + { + "epoch": 0.86, + "grad_norm": 11.697790747940754, + "learning_rate": 1.2733941596031544e-05, + "loss": 1.5217, + "step": 71436 + }, + { + "epoch": 0.86, + "grad_norm": 5.457729064012559, + "learning_rate": 1.2733379669290215e-05, + "loss": 1.339, + "step": 71439 + }, + { + "epoch": 0.86, + "grad_norm": 5.910837059573662, + "learning_rate": 1.2732817733220848e-05, + "loss": 1.5559, + "step": 71442 + }, + { + "epoch": 0.86, + "grad_norm": 11.459186745093033, + "learning_rate": 1.2732255787825358e-05, + "loss": 1.0626, + "step": 71445 + }, + { + "epoch": 0.86, + "grad_norm": 13.241932024036329, + "learning_rate": 1.2731693833105665e-05, + "loss": 1.4278, + "step": 71448 + }, + { + "epoch": 0.86, + "grad_norm": 31.971591183798513, + "learning_rate": 1.2731131869063682e-05, + "loss": 1.4039, + "step": 71451 + }, + { + "epoch": 0.86, + "grad_norm": 16.92083166717739, + "learning_rate": 1.2730569895701331e-05, + "loss": 1.4714, + "step": 71454 + }, + { + "epoch": 0.86, + "grad_norm": 9.12237399905034, + "learning_rate": 1.2730007913020528e-05, + "loss": 1.1532, + "step": 71457 + }, + { + "epoch": 0.86, + "grad_norm": 22.33448631384747, + "learning_rate": 1.2729445921023191e-05, + "loss": 1.4976, + "step": 71460 + }, + { + "epoch": 0.86, + "grad_norm": 9.56021201765093, + "learning_rate": 1.2728883919711237e-05, + "loss": 1.392, + "step": 71463 + }, + { + "epoch": 0.86, + "grad_norm": 24.890125139746843, + "learning_rate": 1.272832190908659e-05, + "loss": 1.4159, + "step": 71466 + }, + { + "epoch": 0.86, + "grad_norm": 23.973935861424508, + "learning_rate": 1.2727759889151159e-05, + "loss": 1.3774, + "step": 71469 + }, + { + "epoch": 0.86, + "grad_norm": 12.518687435474064, + "learning_rate": 1.2727197859906863e-05, + "loss": 1.2963, + "step": 71472 + }, + { + "epoch": 0.86, + "grad_norm": 18.319848158632926, + "learning_rate": 1.2726635821355632e-05, + "loss": 1.8356, + "step": 71475 + }, + { + "epoch": 0.86, + "grad_norm": 14.783080438049831, + "learning_rate": 1.2726073773499367e-05, + "loss": 1.1852, + "step": 71478 + }, + { + "epoch": 0.86, + "grad_norm": 9.811010222609509, + "learning_rate": 1.2725511716339999e-05, + "loss": 1.1349, + "step": 71481 + }, + { + "epoch": 0.86, + "grad_norm": 5.533279596547511, + "learning_rate": 1.2724949649879437e-05, + "loss": 1.1068, + "step": 71484 + }, + { + "epoch": 0.86, + "grad_norm": 7.702169728322516, + "learning_rate": 1.2724387574119605e-05, + "loss": 1.2428, + "step": 71487 + }, + { + "epoch": 0.86, + "grad_norm": 12.510453513884444, + "learning_rate": 1.2723825489062423e-05, + "loss": 1.4746, + "step": 71490 + }, + { + "epoch": 0.86, + "grad_norm": 14.888515373968618, + "learning_rate": 1.2723263394709802e-05, + "loss": 1.1755, + "step": 71493 + }, + { + "epoch": 0.86, + "grad_norm": 18.714743411808858, + "learning_rate": 1.2722701291063664e-05, + "loss": 1.1158, + "step": 71496 + }, + { + "epoch": 0.86, + "grad_norm": 7.162948353120359, + "learning_rate": 1.272213917812593e-05, + "loss": 1.3294, + "step": 71499 + }, + { + "epoch": 0.86, + "grad_norm": 16.45855423964549, + "learning_rate": 1.2721577055898514e-05, + "loss": 1.6792, + "step": 71502 + }, + { + "epoch": 0.86, + "grad_norm": 17.51617322630141, + "learning_rate": 1.2721014924383333e-05, + "loss": 1.3048, + "step": 71505 + }, + { + "epoch": 0.86, + "grad_norm": 18.90184575395797, + "learning_rate": 1.272045278358231e-05, + "loss": 1.5756, + "step": 71508 + }, + { + "epoch": 0.86, + "grad_norm": 9.107868733946415, + "learning_rate": 1.2719890633497362e-05, + "loss": 1.1222, + "step": 71511 + }, + { + "epoch": 0.86, + "grad_norm": 6.513516161405301, + "learning_rate": 1.2719328474130411e-05, + "loss": 1.5194, + "step": 71514 + }, + { + "epoch": 0.86, + "grad_norm": 11.661581041532713, + "learning_rate": 1.2718766305483365e-05, + "loss": 1.4807, + "step": 71517 + }, + { + "epoch": 0.86, + "grad_norm": 18.766505010921474, + "learning_rate": 1.2718204127558151e-05, + "loss": 1.3641, + "step": 71520 + }, + { + "epoch": 0.86, + "grad_norm": 52.12784301458139, + "learning_rate": 1.2717641940356687e-05, + "loss": 1.1978, + "step": 71523 + }, + { + "epoch": 0.86, + "grad_norm": 25.309893545763835, + "learning_rate": 1.2717079743880887e-05, + "loss": 1.4775, + "step": 71526 + }, + { + "epoch": 0.86, + "grad_norm": 4.566727005356068, + "learning_rate": 1.2716517538132675e-05, + "loss": 1.4684, + "step": 71529 + }, + { + "epoch": 0.86, + "grad_norm": 25.40350788597232, + "learning_rate": 1.2715955323113967e-05, + "loss": 1.3438, + "step": 71532 + }, + { + "epoch": 0.86, + "grad_norm": 35.38848997873889, + "learning_rate": 1.2715393098826683e-05, + "loss": 1.5738, + "step": 71535 + }, + { + "epoch": 0.86, + "grad_norm": 16.87905648632041, + "learning_rate": 1.2714830865272736e-05, + "loss": 1.5079, + "step": 71538 + }, + { + "epoch": 0.86, + "grad_norm": 5.6715631072117425, + "learning_rate": 1.2714268622454054e-05, + "loss": 1.6414, + "step": 71541 + }, + { + "epoch": 0.86, + "grad_norm": 9.392064522769898, + "learning_rate": 1.2713706370372547e-05, + "loss": 0.9815, + "step": 71544 + }, + { + "epoch": 0.86, + "grad_norm": 4.486796749905889, + "learning_rate": 1.2713144109030138e-05, + "loss": 1.3945, + "step": 71547 + }, + { + "epoch": 0.86, + "grad_norm": 24.54326356439229, + "learning_rate": 1.2712581838428748e-05, + "loss": 1.1858, + "step": 71550 + }, + { + "epoch": 0.86, + "grad_norm": 6.347292966601297, + "learning_rate": 1.2712019558570292e-05, + "loss": 1.2688, + "step": 71553 + }, + { + "epoch": 0.86, + "grad_norm": 10.127114027080037, + "learning_rate": 1.271145726945669e-05, + "loss": 1.3815, + "step": 71556 + }, + { + "epoch": 0.86, + "grad_norm": 14.44365159222195, + "learning_rate": 1.271089497108986e-05, + "loss": 1.4564, + "step": 71559 + }, + { + "epoch": 0.86, + "grad_norm": 13.942850936832833, + "learning_rate": 1.2710332663471724e-05, + "loss": 1.4955, + "step": 71562 + }, + { + "epoch": 0.86, + "grad_norm": 14.125547979818354, + "learning_rate": 1.2709770346604198e-05, + "loss": 1.3077, + "step": 71565 + }, + { + "epoch": 0.86, + "grad_norm": 18.117388070782507, + "learning_rate": 1.2709208020489204e-05, + "loss": 1.3955, + "step": 71568 + }, + { + "epoch": 0.86, + "grad_norm": 8.55228066865213, + "learning_rate": 1.2708645685128653e-05, + "loss": 0.9931, + "step": 71571 + }, + { + "epoch": 0.86, + "grad_norm": 10.480869785966194, + "learning_rate": 1.2708083340524475e-05, + "loss": 0.9711, + "step": 71574 + }, + { + "epoch": 0.86, + "grad_norm": 15.261610350221344, + "learning_rate": 1.2707520986678583e-05, + "loss": 1.7414, + "step": 71577 + }, + { + "epoch": 0.86, + "grad_norm": 7.543129433162891, + "learning_rate": 1.2706958623592895e-05, + "loss": 1.0211, + "step": 71580 + }, + { + "epoch": 0.86, + "grad_norm": 26.748342953531015, + "learning_rate": 1.2706396251269335e-05, + "loss": 1.5286, + "step": 71583 + }, + { + "epoch": 0.86, + "grad_norm": 4.689780685705827, + "learning_rate": 1.2705833869709818e-05, + "loss": 1.2968, + "step": 71586 + }, + { + "epoch": 0.86, + "grad_norm": 13.44193579556887, + "learning_rate": 1.2705271478916266e-05, + "loss": 1.4664, + "step": 71589 + }, + { + "epoch": 0.86, + "grad_norm": 11.186397297136871, + "learning_rate": 1.2704709078890594e-05, + "loss": 1.4301, + "step": 71592 + }, + { + "epoch": 0.86, + "grad_norm": 7.394692040818557, + "learning_rate": 1.2704146669634727e-05, + "loss": 1.4066, + "step": 71595 + }, + { + "epoch": 0.86, + "grad_norm": 7.019354514661095, + "learning_rate": 1.270358425115058e-05, + "loss": 1.5126, + "step": 71598 + }, + { + "epoch": 0.86, + "grad_norm": 14.675244539930796, + "learning_rate": 1.2703021823440074e-05, + "loss": 0.945, + "step": 71601 + }, + { + "epoch": 0.86, + "grad_norm": 8.967367768070375, + "learning_rate": 1.2702459386505129e-05, + "loss": 1.0415, + "step": 71604 + }, + { + "epoch": 0.86, + "grad_norm": 26.24643617157075, + "learning_rate": 1.2701896940347662e-05, + "loss": 1.0619, + "step": 71607 + }, + { + "epoch": 0.86, + "grad_norm": 15.335038981292065, + "learning_rate": 1.2701334484969596e-05, + "loss": 1.4465, + "step": 71610 + }, + { + "epoch": 0.86, + "grad_norm": 6.749089971890475, + "learning_rate": 1.2700772020372845e-05, + "loss": 1.2226, + "step": 71613 + }, + { + "epoch": 0.86, + "grad_norm": 22.719975913724006, + "learning_rate": 1.2700209546559336e-05, + "loss": 1.6341, + "step": 71616 + }, + { + "epoch": 0.86, + "grad_norm": 9.679466870709458, + "learning_rate": 1.2699647063530983e-05, + "loss": 1.4067, + "step": 71619 + }, + { + "epoch": 0.86, + "grad_norm": 10.605393203128852, + "learning_rate": 1.2699084571289707e-05, + "loss": 1.4265, + "step": 71622 + }, + { + "epoch": 0.86, + "grad_norm": 15.611917714491373, + "learning_rate": 1.2698522069837425e-05, + "loss": 1.1595, + "step": 71625 + }, + { + "epoch": 0.86, + "grad_norm": 7.159261386654156, + "learning_rate": 1.2697959559176063e-05, + "loss": 1.2682, + "step": 71628 + }, + { + "epoch": 0.86, + "grad_norm": 3.708071904531351, + "learning_rate": 1.2697397039307535e-05, + "loss": 1.101, + "step": 71631 + }, + { + "epoch": 0.86, + "grad_norm": 2.5311327576741935, + "learning_rate": 1.2696834510233762e-05, + "loss": 1.7072, + "step": 71634 + }, + { + "epoch": 0.86, + "grad_norm": 3.50073833604041, + "learning_rate": 1.2696271971956664e-05, + "loss": 1.2097, + "step": 71637 + }, + { + "epoch": 0.86, + "grad_norm": 2.5041142219438624, + "learning_rate": 1.2695709424478162e-05, + "loss": 1.5259, + "step": 71640 + }, + { + "epoch": 0.86, + "grad_norm": 13.400185745951482, + "learning_rate": 1.2695146867800176e-05, + "loss": 1.5586, + "step": 71643 + }, + { + "epoch": 0.86, + "grad_norm": 9.885702694870414, + "learning_rate": 1.2694584301924621e-05, + "loss": 1.2546, + "step": 71646 + }, + { + "epoch": 0.86, + "grad_norm": 17.544969912336406, + "learning_rate": 1.2694021726853426e-05, + "loss": 1.156, + "step": 71649 + }, + { + "epoch": 0.86, + "grad_norm": 8.32452710364933, + "learning_rate": 1.26934591425885e-05, + "loss": 1.4529, + "step": 71652 + }, + { + "epoch": 0.86, + "grad_norm": 11.835304535779878, + "learning_rate": 1.2692896549131767e-05, + "loss": 1.2408, + "step": 71655 + }, + { + "epoch": 0.86, + "grad_norm": 65.35714218760407, + "learning_rate": 1.2692333946485151e-05, + "loss": 1.3791, + "step": 71658 + }, + { + "epoch": 0.86, + "grad_norm": 4.494713370878438, + "learning_rate": 1.2691771334650571e-05, + "loss": 1.3066, + "step": 71661 + }, + { + "epoch": 0.86, + "grad_norm": 6.541451177445484, + "learning_rate": 1.2691208713629944e-05, + "loss": 1.31, + "step": 71664 + }, + { + "epoch": 0.86, + "grad_norm": 27.3250623921958, + "learning_rate": 1.2690646083425189e-05, + "loss": 1.1024, + "step": 71667 + }, + { + "epoch": 0.86, + "grad_norm": 17.840359728302023, + "learning_rate": 1.2690083444038228e-05, + "loss": 1.234, + "step": 71670 + }, + { + "epoch": 0.86, + "grad_norm": 5.740585151365474, + "learning_rate": 1.2689520795470985e-05, + "loss": 1.272, + "step": 71673 + }, + { + "epoch": 0.86, + "grad_norm": 7.247221105903923, + "learning_rate": 1.2688958137725372e-05, + "loss": 1.3533, + "step": 71676 + }, + { + "epoch": 0.86, + "grad_norm": 25.096840937561083, + "learning_rate": 1.2688395470803314e-05, + "loss": 1.4989, + "step": 71679 + }, + { + "epoch": 0.86, + "grad_norm": 7.1934774604815965, + "learning_rate": 1.2687832794706731e-05, + "loss": 1.0592, + "step": 71682 + }, + { + "epoch": 0.86, + "grad_norm": 11.511157214122713, + "learning_rate": 1.2687270109437544e-05, + "loss": 1.4242, + "step": 71685 + }, + { + "epoch": 0.86, + "grad_norm": 3.9433848131524303, + "learning_rate": 1.268670741499767e-05, + "loss": 1.1205, + "step": 71688 + }, + { + "epoch": 0.86, + "grad_norm": 27.079250024350905, + "learning_rate": 1.2686144711389034e-05, + "loss": 1.4862, + "step": 71691 + }, + { + "epoch": 0.86, + "grad_norm": 9.177402068752242, + "learning_rate": 1.2685581998613553e-05, + "loss": 1.5954, + "step": 71694 + }, + { + "epoch": 0.86, + "grad_norm": 2.8738721418504807, + "learning_rate": 1.2685019276673148e-05, + "loss": 1.1457, + "step": 71697 + }, + { + "epoch": 0.86, + "grad_norm": 4.897017151357767, + "learning_rate": 1.2684456545569737e-05, + "loss": 1.8298, + "step": 71700 + }, + { + "epoch": 0.86, + "grad_norm": 7.105010169465413, + "learning_rate": 1.2683893805305248e-05, + "loss": 1.219, + "step": 71703 + }, + { + "epoch": 0.86, + "grad_norm": 5.070371669614137, + "learning_rate": 1.2683331055881588e-05, + "loss": 1.5848, + "step": 71706 + }, + { + "epoch": 0.86, + "grad_norm": 17.531482792531733, + "learning_rate": 1.268276829730069e-05, + "loss": 1.6687, + "step": 71709 + }, + { + "epoch": 0.86, + "grad_norm": 16.522833137265664, + "learning_rate": 1.268220552956447e-05, + "loss": 1.3753, + "step": 71712 + }, + { + "epoch": 0.86, + "grad_norm": 6.69882157625138, + "learning_rate": 1.268164275267485e-05, + "loss": 1.429, + "step": 71715 + }, + { + "epoch": 0.86, + "grad_norm": 120.34333390477181, + "learning_rate": 1.268107996663375e-05, + "loss": 1.4518, + "step": 71718 + }, + { + "epoch": 0.86, + "grad_norm": 16.90780920194358, + "learning_rate": 1.2680517171443087e-05, + "loss": 1.3108, + "step": 71721 + }, + { + "epoch": 0.86, + "grad_norm": 10.580678905240974, + "learning_rate": 1.2679954367104787e-05, + "loss": 1.4653, + "step": 71724 + }, + { + "epoch": 0.86, + "grad_norm": 15.761611764274644, + "learning_rate": 1.2679391553620766e-05, + "loss": 1.86, + "step": 71727 + }, + { + "epoch": 0.86, + "grad_norm": 97.48880660571601, + "learning_rate": 1.2678828730992947e-05, + "loss": 1.2345, + "step": 71730 + }, + { + "epoch": 0.86, + "grad_norm": 5.710087607426951, + "learning_rate": 1.267826589922325e-05, + "loss": 1.4142, + "step": 71733 + }, + { + "epoch": 0.86, + "grad_norm": 26.31779448835706, + "learning_rate": 1.2677703058313597e-05, + "loss": 1.5554, + "step": 71736 + }, + { + "epoch": 0.86, + "grad_norm": 13.853197208146401, + "learning_rate": 1.2677140208265909e-05, + "loss": 1.0668, + "step": 71739 + }, + { + "epoch": 0.86, + "grad_norm": 41.6743945731506, + "learning_rate": 1.2676577349082102e-05, + "loss": 1.1353, + "step": 71742 + }, + { + "epoch": 0.86, + "grad_norm": 8.83201638484304, + "learning_rate": 1.2676014480764105e-05, + "loss": 1.3602, + "step": 71745 + }, + { + "epoch": 0.86, + "grad_norm": 11.649177004682333, + "learning_rate": 1.2675451603313833e-05, + "loss": 1.4741, + "step": 71748 + }, + { + "epoch": 0.86, + "grad_norm": 13.877078276704289, + "learning_rate": 1.2674888716733208e-05, + "loss": 1.3315, + "step": 71751 + }, + { + "epoch": 0.86, + "grad_norm": 22.084091840830727, + "learning_rate": 1.267432582102415e-05, + "loss": 1.1565, + "step": 71754 + }, + { + "epoch": 0.86, + "grad_norm": 142.33823055537562, + "learning_rate": 1.2673762916188586e-05, + "loss": 1.1636, + "step": 71757 + }, + { + "epoch": 0.86, + "grad_norm": 26.396734527176747, + "learning_rate": 1.2673200002228428e-05, + "loss": 1.371, + "step": 71760 + }, + { + "epoch": 0.86, + "grad_norm": 9.29868104533712, + "learning_rate": 1.26726370791456e-05, + "loss": 1.5742, + "step": 71763 + }, + { + "epoch": 0.86, + "grad_norm": 25.44923288639223, + "learning_rate": 1.2672074146942026e-05, + "loss": 1.1663, + "step": 71766 + }, + { + "epoch": 0.86, + "grad_norm": 16.829136312138573, + "learning_rate": 1.2671511205619627e-05, + "loss": 1.0469, + "step": 71769 + }, + { + "epoch": 0.86, + "grad_norm": 22.000262316641265, + "learning_rate": 1.2670948255180323e-05, + "loss": 1.3503, + "step": 71772 + }, + { + "epoch": 0.86, + "grad_norm": 25.174457637706194, + "learning_rate": 1.267038529562603e-05, + "loss": 1.3837, + "step": 71775 + }, + { + "epoch": 0.86, + "grad_norm": 15.721003311503813, + "learning_rate": 1.266982232695868e-05, + "loss": 1.4675, + "step": 71778 + }, + { + "epoch": 0.86, + "grad_norm": 23.684355336415333, + "learning_rate": 1.2669259349180183e-05, + "loss": 1.544, + "step": 71781 + }, + { + "epoch": 0.86, + "grad_norm": 6.388172233161301, + "learning_rate": 1.266869636229247e-05, + "loss": 1.2695, + "step": 71784 + }, + { + "epoch": 0.86, + "grad_norm": 9.190253519254599, + "learning_rate": 1.2668133366297453e-05, + "loss": 1.4239, + "step": 71787 + }, + { + "epoch": 0.86, + "grad_norm": 10.28071790141051, + "learning_rate": 1.266757036119706e-05, + "loss": 1.5742, + "step": 71790 + }, + { + "epoch": 0.86, + "grad_norm": 27.1846678919755, + "learning_rate": 1.266700734699321e-05, + "loss": 1.326, + "step": 71793 + }, + { + "epoch": 0.86, + "grad_norm": 30.995823620926085, + "learning_rate": 1.2666444323687823e-05, + "loss": 1.3498, + "step": 71796 + }, + { + "epoch": 0.86, + "grad_norm": 33.11639447791553, + "learning_rate": 1.2665881291282826e-05, + "loss": 1.2682, + "step": 71799 + }, + { + "epoch": 0.86, + "grad_norm": 13.025857915166341, + "learning_rate": 1.2665318249780133e-05, + "loss": 1.4326, + "step": 71802 + }, + { + "epoch": 0.86, + "grad_norm": 31.85885615402559, + "learning_rate": 1.2664755199181671e-05, + "loss": 1.4084, + "step": 71805 + }, + { + "epoch": 0.86, + "grad_norm": 10.234726845329414, + "learning_rate": 1.2664192139489355e-05, + "loss": 0.971, + "step": 71808 + }, + { + "epoch": 0.86, + "grad_norm": 11.046411887872374, + "learning_rate": 1.2663629070705118e-05, + "loss": 1.3809, + "step": 71811 + }, + { + "epoch": 0.86, + "grad_norm": 9.531802583007767, + "learning_rate": 1.2663065992830867e-05, + "loss": 1.5999, + "step": 71814 + }, + { + "epoch": 0.86, + "grad_norm": 20.893665953303586, + "learning_rate": 1.2662502905868535e-05, + "loss": 0.9861, + "step": 71817 + }, + { + "epoch": 0.86, + "grad_norm": 20.903374706238893, + "learning_rate": 1.266193980982004e-05, + "loss": 1.3045, + "step": 71820 + }, + { + "epoch": 0.86, + "grad_norm": 31.497094517190018, + "learning_rate": 1.26613767046873e-05, + "loss": 1.3393, + "step": 71823 + }, + { + "epoch": 0.86, + "grad_norm": 6.640648842369192, + "learning_rate": 1.2660813590472246e-05, + "loss": 1.3062, + "step": 71826 + }, + { + "epoch": 0.86, + "grad_norm": 26.725046404966275, + "learning_rate": 1.2660250467176787e-05, + "loss": 1.7637, + "step": 71829 + }, + { + "epoch": 0.86, + "grad_norm": 5.301846080731273, + "learning_rate": 1.2659687334802857e-05, + "loss": 1.1751, + "step": 71832 + }, + { + "epoch": 0.86, + "grad_norm": 4.26657050922342, + "learning_rate": 1.2659124193352368e-05, + "loss": 1.5078, + "step": 71835 + }, + { + "epoch": 0.86, + "grad_norm": 22.06511995356354, + "learning_rate": 1.2658561042827247e-05, + "loss": 1.5049, + "step": 71838 + }, + { + "epoch": 0.86, + "grad_norm": 17.463352872181886, + "learning_rate": 1.2657997883229416e-05, + "loss": 1.5589, + "step": 71841 + }, + { + "epoch": 0.86, + "grad_norm": 17.870653093318708, + "learning_rate": 1.2657434714560792e-05, + "loss": 1.0474, + "step": 71844 + }, + { + "epoch": 0.86, + "grad_norm": 18.943353437328266, + "learning_rate": 1.2656871536823305e-05, + "loss": 1.3149, + "step": 71847 + }, + { + "epoch": 0.86, + "grad_norm": 20.925399885219818, + "learning_rate": 1.2656308350018869e-05, + "loss": 1.4961, + "step": 71850 + }, + { + "epoch": 0.86, + "grad_norm": 3.6598886163757025, + "learning_rate": 1.2655745154149415e-05, + "loss": 1.5714, + "step": 71853 + }, + { + "epoch": 0.86, + "grad_norm": 9.203173821752804, + "learning_rate": 1.2655181949216852e-05, + "loss": 1.4709, + "step": 71856 + }, + { + "epoch": 0.86, + "grad_norm": 12.990990807072832, + "learning_rate": 1.2654618735223114e-05, + "loss": 1.1724, + "step": 71859 + }, + { + "epoch": 0.86, + "grad_norm": 14.675184695679754, + "learning_rate": 1.2654055512170118e-05, + "loss": 1.3965, + "step": 71862 + }, + { + "epoch": 0.86, + "grad_norm": 21.524295554835327, + "learning_rate": 1.2653492280059787e-05, + "loss": 1.2045, + "step": 71865 + }, + { + "epoch": 0.86, + "grad_norm": 11.27754492025086, + "learning_rate": 1.265292903889404e-05, + "loss": 1.4045, + "step": 71868 + }, + { + "epoch": 0.86, + "grad_norm": 8.855116886484286, + "learning_rate": 1.26523657886748e-05, + "loss": 1.4326, + "step": 71871 + }, + { + "epoch": 0.86, + "grad_norm": 13.939641316965645, + "learning_rate": 1.2651802529403998e-05, + "loss": 1.1604, + "step": 71874 + }, + { + "epoch": 0.86, + "grad_norm": 19.490675506538434, + "learning_rate": 1.2651239261083544e-05, + "loss": 1.4595, + "step": 71877 + }, + { + "epoch": 0.86, + "grad_norm": 14.74269029184281, + "learning_rate": 1.2650675983715366e-05, + "loss": 1.1304, + "step": 71880 + }, + { + "epoch": 0.86, + "grad_norm": 9.438357911495993, + "learning_rate": 1.2650112697301389e-05, + "loss": 1.2811, + "step": 71883 + }, + { + "epoch": 0.86, + "grad_norm": 21.082407820370552, + "learning_rate": 1.2649549401843528e-05, + "loss": 1.5569, + "step": 71886 + }, + { + "epoch": 0.86, + "grad_norm": 31.590653528053387, + "learning_rate": 1.264898609734371e-05, + "loss": 1.4298, + "step": 71889 + }, + { + "epoch": 0.86, + "grad_norm": 17.98053482534874, + "learning_rate": 1.2648422783803858e-05, + "loss": 1.707, + "step": 71892 + }, + { + "epoch": 0.86, + "grad_norm": 18.345315917229822, + "learning_rate": 1.2647859461225893e-05, + "loss": 1.5542, + "step": 71895 + }, + { + "epoch": 0.86, + "grad_norm": 9.935306520586309, + "learning_rate": 1.2647296129611736e-05, + "loss": 1.121, + "step": 71898 + }, + { + "epoch": 0.86, + "grad_norm": 32.665946154280874, + "learning_rate": 1.2646732788963309e-05, + "loss": 1.3012, + "step": 71901 + }, + { + "epoch": 0.86, + "grad_norm": 21.010959941565297, + "learning_rate": 1.2646169439282541e-05, + "loss": 1.2721, + "step": 71904 + }, + { + "epoch": 0.86, + "grad_norm": 11.09806562586839, + "learning_rate": 1.2645606080571349e-05, + "loss": 1.1025, + "step": 71907 + }, + { + "epoch": 0.86, + "grad_norm": 9.916534258485266, + "learning_rate": 1.2645042712831653e-05, + "loss": 1.3951, + "step": 71910 + }, + { + "epoch": 0.86, + "grad_norm": 5.768346247187216, + "learning_rate": 1.264447933606538e-05, + "loss": 1.1781, + "step": 71913 + }, + { + "epoch": 0.86, + "grad_norm": 19.40854659666961, + "learning_rate": 1.2643915950274455e-05, + "loss": 1.6899, + "step": 71916 + }, + { + "epoch": 0.86, + "grad_norm": 10.13508739280759, + "learning_rate": 1.2643352555460795e-05, + "loss": 1.5853, + "step": 71919 + }, + { + "epoch": 0.86, + "grad_norm": 14.811266833755521, + "learning_rate": 1.2642789151626323e-05, + "loss": 1.4016, + "step": 71922 + }, + { + "epoch": 0.86, + "grad_norm": 8.890254144201004, + "learning_rate": 1.2642225738772962e-05, + "loss": 1.7291, + "step": 71925 + }, + { + "epoch": 0.86, + "grad_norm": 21.413746245047324, + "learning_rate": 1.2641662316902643e-05, + "loss": 1.4351, + "step": 71928 + }, + { + "epoch": 0.86, + "grad_norm": 13.119381010657143, + "learning_rate": 1.2641098886017277e-05, + "loss": 1.2606, + "step": 71931 + }, + { + "epoch": 0.86, + "grad_norm": 5.260016502497041, + "learning_rate": 1.2640535446118793e-05, + "loss": 1.6928, + "step": 71934 + }, + { + "epoch": 0.87, + "grad_norm": 9.846250432575404, + "learning_rate": 1.2639971997209112e-05, + "loss": 1.1813, + "step": 71937 + }, + { + "epoch": 0.87, + "grad_norm": 4.47312293646955, + "learning_rate": 1.2639408539290162e-05, + "loss": 1.6093, + "step": 71940 + }, + { + "epoch": 0.87, + "grad_norm": 8.75532436570833, + "learning_rate": 1.2638845072363854e-05, + "loss": 1.4357, + "step": 71943 + }, + { + "epoch": 0.87, + "grad_norm": 15.019445028286473, + "learning_rate": 1.2638281596432123e-05, + "loss": 1.3124, + "step": 71946 + }, + { + "epoch": 0.87, + "grad_norm": 10.114927527748529, + "learning_rate": 1.2637718111496888e-05, + "loss": 1.2416, + "step": 71949 + }, + { + "epoch": 0.87, + "grad_norm": 9.642342589989184, + "learning_rate": 1.2637154617560067e-05, + "loss": 1.0757, + "step": 71952 + }, + { + "epoch": 0.87, + "grad_norm": 12.907403300846058, + "learning_rate": 1.263659111462359e-05, + "loss": 1.2073, + "step": 71955 + }, + { + "epoch": 0.87, + "grad_norm": 26.885232293997905, + "learning_rate": 1.2636027602689378e-05, + "loss": 1.4589, + "step": 71958 + }, + { + "epoch": 0.87, + "grad_norm": 40.5495532482708, + "learning_rate": 1.2635464081759352e-05, + "loss": 1.4805, + "step": 71961 + }, + { + "epoch": 0.87, + "grad_norm": 43.67496231578074, + "learning_rate": 1.2634900551835436e-05, + "loss": 1.5344, + "step": 71964 + }, + { + "epoch": 0.87, + "grad_norm": 9.003351957749263, + "learning_rate": 1.2634337012919553e-05, + "loss": 1.2185, + "step": 71967 + }, + { + "epoch": 0.87, + "grad_norm": 7.037559758867756, + "learning_rate": 1.263377346501363e-05, + "loss": 1.286, + "step": 71970 + }, + { + "epoch": 0.87, + "grad_norm": 15.821596996351502, + "learning_rate": 1.2633209908119585e-05, + "loss": 1.397, + "step": 71973 + }, + { + "epoch": 0.87, + "grad_norm": 3.1803043594308993, + "learning_rate": 1.2632646342239342e-05, + "loss": 1.4152, + "step": 71976 + }, + { + "epoch": 0.87, + "grad_norm": 44.97294524999657, + "learning_rate": 1.2632082767374824e-05, + "loss": 1.3258, + "step": 71979 + }, + { + "epoch": 0.87, + "grad_norm": 36.021724649003, + "learning_rate": 1.2631519183527958e-05, + "loss": 1.4272, + "step": 71982 + }, + { + "epoch": 0.87, + "grad_norm": 24.016082181543343, + "learning_rate": 1.2630955590700664e-05, + "loss": 1.464, + "step": 71985 + }, + { + "epoch": 0.87, + "grad_norm": 19.080343548049903, + "learning_rate": 1.2630391988894868e-05, + "loss": 1.0477, + "step": 71988 + }, + { + "epoch": 0.87, + "grad_norm": 16.45219125592664, + "learning_rate": 1.262982837811249e-05, + "loss": 1.6722, + "step": 71991 + }, + { + "epoch": 0.87, + "grad_norm": 84.33092007820753, + "learning_rate": 1.2629264758355455e-05, + "loss": 1.2815, + "step": 71994 + }, + { + "epoch": 0.87, + "grad_norm": 10.645199281988239, + "learning_rate": 1.2628701129625686e-05, + "loss": 1.056, + "step": 71997 + }, + { + "epoch": 0.87, + "grad_norm": 13.392057708180403, + "learning_rate": 1.2628137491925107e-05, + "loss": 1.3779, + "step": 72000 + }, + { + "epoch": 0.87, + "grad_norm": 22.857079075313905, + "learning_rate": 1.2627573845255642e-05, + "loss": 1.5718, + "step": 72003 + }, + { + "epoch": 0.87, + "grad_norm": 20.379319509680297, + "learning_rate": 1.2627010189619213e-05, + "loss": 1.2709, + "step": 72006 + }, + { + "epoch": 0.87, + "grad_norm": 4.831757277043281, + "learning_rate": 1.2626446525017745e-05, + "loss": 1.5393, + "step": 72009 + }, + { + "epoch": 0.87, + "grad_norm": 113.50022835191399, + "learning_rate": 1.2625882851453165e-05, + "loss": 1.279, + "step": 72012 + }, + { + "epoch": 0.87, + "grad_norm": 10.750039220634754, + "learning_rate": 1.2625319168927389e-05, + "loss": 1.5283, + "step": 72015 + }, + { + "epoch": 0.87, + "grad_norm": 3.358523056368613, + "learning_rate": 1.2624755477442343e-05, + "loss": 1.1644, + "step": 72018 + }, + { + "epoch": 0.87, + "grad_norm": 3.2449975805236435, + "learning_rate": 1.2624191776999952e-05, + "loss": 1.149, + "step": 72021 + }, + { + "epoch": 0.87, + "grad_norm": 33.98343523487401, + "learning_rate": 1.2623628067602142e-05, + "loss": 1.7006, + "step": 72024 + }, + { + "epoch": 0.87, + "grad_norm": 8.490124928445443, + "learning_rate": 1.2623064349250833e-05, + "loss": 1.2107, + "step": 72027 + }, + { + "epoch": 0.87, + "grad_norm": 15.357686574484914, + "learning_rate": 1.2622500621947951e-05, + "loss": 1.2709, + "step": 72030 + }, + { + "epoch": 0.87, + "grad_norm": 30.46019133538501, + "learning_rate": 1.2621936885695418e-05, + "loss": 1.2552, + "step": 72033 + }, + { + "epoch": 0.87, + "grad_norm": 21.24530094149496, + "learning_rate": 1.2621373140495161e-05, + "loss": 1.5454, + "step": 72036 + }, + { + "epoch": 0.87, + "grad_norm": 10.975629249070918, + "learning_rate": 1.2620809386349097e-05, + "loss": 1.291, + "step": 72039 + }, + { + "epoch": 0.87, + "grad_norm": 12.08777447364672, + "learning_rate": 1.262024562325916e-05, + "loss": 1.2598, + "step": 72042 + }, + { + "epoch": 0.87, + "grad_norm": 13.932215757364698, + "learning_rate": 1.2619681851227265e-05, + "loss": 1.4799, + "step": 72045 + }, + { + "epoch": 0.87, + "grad_norm": 12.967952933255992, + "learning_rate": 1.2619118070255342e-05, + "loss": 1.4702, + "step": 72048 + }, + { + "epoch": 0.87, + "grad_norm": 22.85600812827872, + "learning_rate": 1.2618554280345309e-05, + "loss": 1.2472, + "step": 72051 + }, + { + "epoch": 0.87, + "grad_norm": 23.208513830203874, + "learning_rate": 1.2617990481499099e-05, + "loss": 1.7471, + "step": 72054 + }, + { + "epoch": 0.87, + "grad_norm": 13.232838044323012, + "learning_rate": 1.2617426673718627e-05, + "loss": 1.6281, + "step": 72057 + }, + { + "epoch": 0.87, + "grad_norm": 52.94636947628399, + "learning_rate": 1.2616862857005819e-05, + "loss": 1.3357, + "step": 72060 + }, + { + "epoch": 0.87, + "grad_norm": 4.074731838763648, + "learning_rate": 1.2616299031362601e-05, + "loss": 1.1261, + "step": 72063 + }, + { + "epoch": 0.87, + "grad_norm": 83.8223942781845, + "learning_rate": 1.26157351967909e-05, + "loss": 1.5055, + "step": 72066 + }, + { + "epoch": 0.87, + "grad_norm": 20.87116464364439, + "learning_rate": 1.2615171353292634e-05, + "loss": 1.2664, + "step": 72069 + }, + { + "epoch": 0.87, + "grad_norm": 9.29240503472934, + "learning_rate": 1.2614607500869732e-05, + "loss": 1.3712, + "step": 72072 + }, + { + "epoch": 0.87, + "grad_norm": 26.246588874101317, + "learning_rate": 1.2614043639524116e-05, + "loss": 1.4152, + "step": 72075 + }, + { + "epoch": 0.87, + "grad_norm": 8.22858672815871, + "learning_rate": 1.2613479769257707e-05, + "loss": 1.2461, + "step": 72078 + }, + { + "epoch": 0.87, + "grad_norm": 27.98147106349277, + "learning_rate": 1.261291589007244e-05, + "loss": 1.5279, + "step": 72081 + }, + { + "epoch": 0.87, + "grad_norm": 10.490285633968925, + "learning_rate": 1.2612352001970228e-05, + "loss": 1.7516, + "step": 72084 + }, + { + "epoch": 0.87, + "grad_norm": 5.847008292948285, + "learning_rate": 1.2611788104952999e-05, + "loss": 0.9416, + "step": 72087 + }, + { + "epoch": 0.87, + "grad_norm": 6.51613982171708, + "learning_rate": 1.2611224199022681e-05, + "loss": 1.2614, + "step": 72090 + }, + { + "epoch": 0.87, + "grad_norm": 24.956537246727123, + "learning_rate": 1.261066028418119e-05, + "loss": 1.4202, + "step": 72093 + }, + { + "epoch": 0.87, + "grad_norm": 11.394848470099314, + "learning_rate": 1.261009636043046e-05, + "loss": 1.0847, + "step": 72096 + }, + { + "epoch": 0.87, + "grad_norm": 37.41168745111685, + "learning_rate": 1.260953242777241e-05, + "loss": 1.1616, + "step": 72099 + }, + { + "epoch": 0.87, + "grad_norm": 30.261861525178176, + "learning_rate": 1.2608968486208965e-05, + "loss": 1.1703, + "step": 72102 + }, + { + "epoch": 0.87, + "grad_norm": 3.734797324010017, + "learning_rate": 1.2608404535742051e-05, + "loss": 1.2109, + "step": 72105 + }, + { + "epoch": 0.87, + "grad_norm": 9.607447427735385, + "learning_rate": 1.2607840576373594e-05, + "loss": 0.9303, + "step": 72108 + }, + { + "epoch": 0.87, + "grad_norm": 13.271647301176483, + "learning_rate": 1.2607276608105513e-05, + "loss": 1.5124, + "step": 72111 + }, + { + "epoch": 0.87, + "grad_norm": 17.009192399190944, + "learning_rate": 1.2606712630939737e-05, + "loss": 1.3376, + "step": 72114 + }, + { + "epoch": 0.87, + "grad_norm": 5.435396551807051, + "learning_rate": 1.2606148644878191e-05, + "loss": 1.1818, + "step": 72117 + }, + { + "epoch": 0.87, + "grad_norm": 18.836195321470328, + "learning_rate": 1.26055846499228e-05, + "loss": 1.3066, + "step": 72120 + }, + { + "epoch": 0.87, + "grad_norm": 19.62131525119184, + "learning_rate": 1.2605020646075485e-05, + "loss": 1.0656, + "step": 72123 + }, + { + "epoch": 0.87, + "grad_norm": 60.09695236454491, + "learning_rate": 1.2604456633338172e-05, + "loss": 1.4804, + "step": 72126 + }, + { + "epoch": 0.87, + "grad_norm": 25.410705041268432, + "learning_rate": 1.2603892611712788e-05, + "loss": 1.5494, + "step": 72129 + }, + { + "epoch": 0.87, + "grad_norm": 15.95863984254975, + "learning_rate": 1.2603328581201255e-05, + "loss": 1.5545, + "step": 72132 + }, + { + "epoch": 0.87, + "grad_norm": 7.673300226826029, + "learning_rate": 1.2602764541805501e-05, + "loss": 1.3493, + "step": 72135 + }, + { + "epoch": 0.87, + "grad_norm": 21.448932365474164, + "learning_rate": 1.2602200493527448e-05, + "loss": 1.2034, + "step": 72138 + }, + { + "epoch": 0.87, + "grad_norm": 4.520293831606909, + "learning_rate": 1.2601636436369023e-05, + "loss": 1.5146, + "step": 72141 + }, + { + "epoch": 0.87, + "grad_norm": 19.19640271927287, + "learning_rate": 1.260107237033215e-05, + "loss": 1.3097, + "step": 72144 + }, + { + "epoch": 0.87, + "grad_norm": 23.467852439302195, + "learning_rate": 1.260050829541875e-05, + "loss": 1.4118, + "step": 72147 + }, + { + "epoch": 0.87, + "grad_norm": 9.600809964951246, + "learning_rate": 1.2599944211630756e-05, + "loss": 1.5272, + "step": 72150 + }, + { + "epoch": 0.87, + "grad_norm": 20.626886236482257, + "learning_rate": 1.2599380118970087e-05, + "loss": 1.7394, + "step": 72153 + }, + { + "epoch": 0.87, + "grad_norm": 24.039371783460663, + "learning_rate": 1.2598816017438673e-05, + "loss": 1.4502, + "step": 72156 + }, + { + "epoch": 0.87, + "grad_norm": 13.493318260608127, + "learning_rate": 1.2598251907038432e-05, + "loss": 1.1169, + "step": 72159 + }, + { + "epoch": 0.87, + "grad_norm": 15.22423220606263, + "learning_rate": 1.2597687787771298e-05, + "loss": 1.2851, + "step": 72162 + }, + { + "epoch": 0.87, + "grad_norm": 14.349094577414096, + "learning_rate": 1.2597123659639188e-05, + "loss": 1.5429, + "step": 72165 + }, + { + "epoch": 0.87, + "grad_norm": 13.922354876247756, + "learning_rate": 1.259655952264403e-05, + "loss": 1.5167, + "step": 72168 + }, + { + "epoch": 0.87, + "grad_norm": 24.706763105983327, + "learning_rate": 1.259599537678775e-05, + "loss": 1.1347, + "step": 72171 + }, + { + "epoch": 0.87, + "grad_norm": 11.005544588524987, + "learning_rate": 1.2595431222072274e-05, + "loss": 1.2275, + "step": 72174 + }, + { + "epoch": 0.87, + "grad_norm": 4.416805860885293, + "learning_rate": 1.2594867058499528e-05, + "loss": 1.0308, + "step": 72177 + }, + { + "epoch": 0.87, + "grad_norm": 7.07858961606499, + "learning_rate": 1.2594302886071428e-05, + "loss": 1.2856, + "step": 72180 + }, + { + "epoch": 0.87, + "grad_norm": 7.3271696119926935, + "learning_rate": 1.2593738704789913e-05, + "loss": 1.4193, + "step": 72183 + }, + { + "epoch": 0.87, + "grad_norm": 17.23704454931595, + "learning_rate": 1.25931745146569e-05, + "loss": 1.132, + "step": 72186 + }, + { + "epoch": 0.87, + "grad_norm": 6.515971818372184, + "learning_rate": 1.2592610315674318e-05, + "loss": 1.3307, + "step": 72189 + }, + { + "epoch": 0.87, + "grad_norm": 9.161056715855413, + "learning_rate": 1.2592046107844089e-05, + "loss": 1.6547, + "step": 72192 + }, + { + "epoch": 0.87, + "grad_norm": 12.679847834149763, + "learning_rate": 1.259148189116814e-05, + "loss": 1.7285, + "step": 72195 + }, + { + "epoch": 0.87, + "grad_norm": 14.717470892722966, + "learning_rate": 1.2590917665648399e-05, + "loss": 1.1081, + "step": 72198 + }, + { + "epoch": 0.87, + "grad_norm": 7.250863656991992, + "learning_rate": 1.2590353431286788e-05, + "loss": 0.9962, + "step": 72201 + }, + { + "epoch": 0.87, + "grad_norm": 9.301307498932923, + "learning_rate": 1.2589789188085236e-05, + "loss": 1.3424, + "step": 72204 + }, + { + "epoch": 0.87, + "grad_norm": 4.444722628705339, + "learning_rate": 1.2589224936045663e-05, + "loss": 1.5351, + "step": 72207 + }, + { + "epoch": 0.87, + "grad_norm": 8.68305145494774, + "learning_rate": 1.2588660675169999e-05, + "loss": 1.1753, + "step": 72210 + }, + { + "epoch": 0.87, + "grad_norm": 4.802673082973187, + "learning_rate": 1.2588096405460169e-05, + "loss": 1.3556, + "step": 72213 + }, + { + "epoch": 0.87, + "grad_norm": 24.935301536515595, + "learning_rate": 1.25875321269181e-05, + "loss": 1.5715, + "step": 72216 + }, + { + "epoch": 0.87, + "grad_norm": 11.766134946653787, + "learning_rate": 1.2586967839545711e-05, + "loss": 1.0627, + "step": 72219 + }, + { + "epoch": 0.87, + "grad_norm": 3.340506698218796, + "learning_rate": 1.2586403543344934e-05, + "loss": 1.2439, + "step": 72222 + }, + { + "epoch": 0.87, + "grad_norm": 8.844182392385383, + "learning_rate": 1.2585839238317696e-05, + "loss": 1.5039, + "step": 72225 + }, + { + "epoch": 0.87, + "grad_norm": 20.06215819687956, + "learning_rate": 1.258527492446592e-05, + "loss": 1.6046, + "step": 72228 + }, + { + "epoch": 0.87, + "grad_norm": 9.821511966578196, + "learning_rate": 1.258471060179153e-05, + "loss": 1.3321, + "step": 72231 + }, + { + "epoch": 0.87, + "grad_norm": 6.660738219015246, + "learning_rate": 1.2584146270296453e-05, + "loss": 1.2492, + "step": 72234 + }, + { + "epoch": 0.87, + "grad_norm": 5.4467262724484895, + "learning_rate": 1.258358192998262e-05, + "loss": 1.4426, + "step": 72237 + }, + { + "epoch": 0.87, + "grad_norm": 19.22870576207462, + "learning_rate": 1.2583017580851949e-05, + "loss": 1.1358, + "step": 72240 + }, + { + "epoch": 0.87, + "grad_norm": 7.363875941181574, + "learning_rate": 1.258245322290637e-05, + "loss": 1.6859, + "step": 72243 + }, + { + "epoch": 0.87, + "grad_norm": 6.866820708970589, + "learning_rate": 1.2581888856147806e-05, + "loss": 1.2056, + "step": 72246 + }, + { + "epoch": 0.87, + "grad_norm": 12.377270829219917, + "learning_rate": 1.2581324480578188e-05, + "loss": 1.682, + "step": 72249 + }, + { + "epoch": 0.87, + "grad_norm": 6.75362327963807, + "learning_rate": 1.258076009619944e-05, + "loss": 1.4103, + "step": 72252 + }, + { + "epoch": 0.87, + "grad_norm": 26.67167116740056, + "learning_rate": 1.2580195703013485e-05, + "loss": 1.1715, + "step": 72255 + }, + { + "epoch": 0.87, + "grad_norm": 18.785237753477958, + "learning_rate": 1.2579631301022256e-05, + "loss": 1.5532, + "step": 72258 + }, + { + "epoch": 0.87, + "grad_norm": 15.95644721766673, + "learning_rate": 1.2579066890227667e-05, + "loss": 1.2508, + "step": 72261 + }, + { + "epoch": 0.87, + "grad_norm": 9.091150980232488, + "learning_rate": 1.2578502470631657e-05, + "loss": 1.425, + "step": 72264 + }, + { + "epoch": 0.87, + "grad_norm": 38.119168602165516, + "learning_rate": 1.2577938042236143e-05, + "loss": 1.2951, + "step": 72267 + }, + { + "epoch": 0.87, + "grad_norm": 5.854293833590822, + "learning_rate": 1.2577373605043061e-05, + "loss": 1.0521, + "step": 72270 + }, + { + "epoch": 0.87, + "grad_norm": 24.123066034462223, + "learning_rate": 1.2576809159054328e-05, + "loss": 1.1359, + "step": 72273 + }, + { + "epoch": 0.87, + "grad_norm": 34.201535552392336, + "learning_rate": 1.2576244704271872e-05, + "loss": 1.3808, + "step": 72276 + }, + { + "epoch": 0.87, + "grad_norm": 12.525888858643238, + "learning_rate": 1.2575680240697625e-05, + "loss": 1.1215, + "step": 72279 + }, + { + "epoch": 0.87, + "grad_norm": 7.616143443894559, + "learning_rate": 1.2575115768333505e-05, + "loss": 1.3718, + "step": 72282 + }, + { + "epoch": 0.87, + "grad_norm": 26.778077929888976, + "learning_rate": 1.2574551287181444e-05, + "loss": 1.5431, + "step": 72285 + }, + { + "epoch": 0.87, + "grad_norm": 7.7984627435646106, + "learning_rate": 1.2573986797243365e-05, + "loss": 1.1509, + "step": 72288 + }, + { + "epoch": 0.87, + "grad_norm": 18.04547717559482, + "learning_rate": 1.2573422298521201e-05, + "loss": 1.3868, + "step": 72291 + }, + { + "epoch": 0.87, + "grad_norm": 13.284572957167937, + "learning_rate": 1.2572857791016868e-05, + "loss": 1.4071, + "step": 72294 + }, + { + "epoch": 0.87, + "grad_norm": 6.387509560339361, + "learning_rate": 1.2572293274732301e-05, + "loss": 1.2517, + "step": 72297 + }, + { + "epoch": 0.87, + "grad_norm": 6.4984737722755455, + "learning_rate": 1.2571728749669426e-05, + "loss": 1.3707, + "step": 72300 + }, + { + "epoch": 0.87, + "grad_norm": 9.839852688930211, + "learning_rate": 1.257116421583016e-05, + "loss": 1.4809, + "step": 72303 + }, + { + "epoch": 0.87, + "grad_norm": 18.98006283446536, + "learning_rate": 1.2570599673216444e-05, + "loss": 1.2289, + "step": 72306 + }, + { + "epoch": 0.87, + "grad_norm": 3.8786715969736645, + "learning_rate": 1.2570035121830193e-05, + "loss": 1.3369, + "step": 72309 + }, + { + "epoch": 0.87, + "grad_norm": 47.163860090330395, + "learning_rate": 1.2569470561673341e-05, + "loss": 1.2088, + "step": 72312 + }, + { + "epoch": 0.87, + "grad_norm": 4.210890009462579, + "learning_rate": 1.2568905992747808e-05, + "loss": 1.1799, + "step": 72315 + }, + { + "epoch": 0.87, + "grad_norm": 14.602903490006916, + "learning_rate": 1.2568341415055524e-05, + "loss": 1.4963, + "step": 72318 + }, + { + "epoch": 0.87, + "grad_norm": 8.215790604861544, + "learning_rate": 1.256777682859842e-05, + "loss": 1.4832, + "step": 72321 + }, + { + "epoch": 0.87, + "grad_norm": 17.61463616824854, + "learning_rate": 1.2567212233378416e-05, + "loss": 1.47, + "step": 72324 + }, + { + "epoch": 0.87, + "grad_norm": 15.658791570998906, + "learning_rate": 1.2566647629397441e-05, + "loss": 1.155, + "step": 72327 + }, + { + "epoch": 0.87, + "grad_norm": 35.02242914271197, + "learning_rate": 1.2566083016657421e-05, + "loss": 0.8847, + "step": 72330 + }, + { + "epoch": 0.87, + "grad_norm": 24.941743126952048, + "learning_rate": 1.2565518395160288e-05, + "loss": 0.9016, + "step": 72333 + }, + { + "epoch": 0.87, + "grad_norm": 16.51412944578483, + "learning_rate": 1.256495376490796e-05, + "loss": 1.1687, + "step": 72336 + }, + { + "epoch": 0.87, + "grad_norm": 146.6360121846189, + "learning_rate": 1.2564389125902372e-05, + "loss": 1.2318, + "step": 72339 + }, + { + "epoch": 0.87, + "grad_norm": 8.54931448355533, + "learning_rate": 1.2563824478145446e-05, + "loss": 1.0714, + "step": 72342 + }, + { + "epoch": 0.87, + "grad_norm": 13.74571208061365, + "learning_rate": 1.2563259821639113e-05, + "loss": 1.2305, + "step": 72345 + }, + { + "epoch": 0.87, + "grad_norm": 5.3067672433468305, + "learning_rate": 1.2562695156385293e-05, + "loss": 1.177, + "step": 72348 + }, + { + "epoch": 0.87, + "grad_norm": 16.71130248513589, + "learning_rate": 1.2562130482385924e-05, + "loss": 1.1893, + "step": 72351 + }, + { + "epoch": 0.87, + "grad_norm": 7.333215264538934, + "learning_rate": 1.2561565799642924e-05, + "loss": 1.1926, + "step": 72354 + }, + { + "epoch": 0.87, + "grad_norm": 24.078389771589215, + "learning_rate": 1.256100110815822e-05, + "loss": 1.5845, + "step": 72357 + }, + { + "epoch": 0.87, + "grad_norm": 8.202059629059194, + "learning_rate": 1.2560436407933743e-05, + "loss": 1.0835, + "step": 72360 + }, + { + "epoch": 0.87, + "grad_norm": 10.058641547360308, + "learning_rate": 1.2559871698971416e-05, + "loss": 1.5126, + "step": 72363 + }, + { + "epoch": 0.87, + "grad_norm": 24.463545548599907, + "learning_rate": 1.2559306981273177e-05, + "loss": 1.4707, + "step": 72366 + }, + { + "epoch": 0.87, + "grad_norm": 25.112660298081764, + "learning_rate": 1.2558742254840937e-05, + "loss": 1.768, + "step": 72369 + }, + { + "epoch": 0.87, + "grad_norm": 18.448318292777255, + "learning_rate": 1.2558177519676633e-05, + "loss": 1.2623, + "step": 72372 + }, + { + "epoch": 0.87, + "grad_norm": 4.584429297512915, + "learning_rate": 1.2557612775782194e-05, + "loss": 1.6469, + "step": 72375 + }, + { + "epoch": 0.87, + "grad_norm": 9.844972347735041, + "learning_rate": 1.255704802315954e-05, + "loss": 1.6108, + "step": 72378 + }, + { + "epoch": 0.87, + "grad_norm": 14.05535551713102, + "learning_rate": 1.2556483261810604e-05, + "loss": 1.2599, + "step": 72381 + }, + { + "epoch": 0.87, + "grad_norm": 12.145820147655222, + "learning_rate": 1.255591849173731e-05, + "loss": 1.207, + "step": 72384 + }, + { + "epoch": 0.87, + "grad_norm": 21.810532477995245, + "learning_rate": 1.2555353712941592e-05, + "loss": 1.8015, + "step": 72387 + }, + { + "epoch": 0.87, + "grad_norm": 14.677916753422636, + "learning_rate": 1.2554788925425365e-05, + "loss": 1.3209, + "step": 72390 + }, + { + "epoch": 0.87, + "grad_norm": 39.68868357290506, + "learning_rate": 1.255422412919057e-05, + "loss": 1.461, + "step": 72393 + }, + { + "epoch": 0.87, + "grad_norm": 4.589333167354618, + "learning_rate": 1.2553659324239124e-05, + "loss": 1.6407, + "step": 72396 + }, + { + "epoch": 0.87, + "grad_norm": 22.54690726496005, + "learning_rate": 1.255309451057296e-05, + "loss": 1.5116, + "step": 72399 + }, + { + "epoch": 0.87, + "grad_norm": 6.585835145281641, + "learning_rate": 1.2552529688194003e-05, + "loss": 1.2405, + "step": 72402 + }, + { + "epoch": 0.87, + "grad_norm": 23.01660021532191, + "learning_rate": 1.2551964857104184e-05, + "loss": 1.9341, + "step": 72405 + }, + { + "epoch": 0.87, + "grad_norm": 9.65882423381941, + "learning_rate": 1.2551400017305427e-05, + "loss": 1.4738, + "step": 72408 + }, + { + "epoch": 0.87, + "grad_norm": 13.259996289371685, + "learning_rate": 1.2550835168799657e-05, + "loss": 1.6621, + "step": 72411 + }, + { + "epoch": 0.87, + "grad_norm": 6.596561648217983, + "learning_rate": 1.255027031158881e-05, + "loss": 1.0789, + "step": 72414 + }, + { + "epoch": 0.87, + "grad_norm": 27.411675805707617, + "learning_rate": 1.2549705445674806e-05, + "loss": 1.4868, + "step": 72417 + }, + { + "epoch": 0.87, + "grad_norm": 9.436517037771003, + "learning_rate": 1.254914057105958e-05, + "loss": 1.3448, + "step": 72420 + }, + { + "epoch": 0.87, + "grad_norm": 14.945283441374679, + "learning_rate": 1.2548575687745051e-05, + "loss": 1.6059, + "step": 72423 + }, + { + "epoch": 0.87, + "grad_norm": 9.46326223253655, + "learning_rate": 1.2548010795733154e-05, + "loss": 1.2553, + "step": 72426 + }, + { + "epoch": 0.87, + "grad_norm": 10.042205199593852, + "learning_rate": 1.2547445895025811e-05, + "loss": 1.4773, + "step": 72429 + }, + { + "epoch": 0.87, + "grad_norm": 3.4690183140714144, + "learning_rate": 1.2546880985624958e-05, + "loss": 1.3088, + "step": 72432 + }, + { + "epoch": 0.87, + "grad_norm": 10.48810020952195, + "learning_rate": 1.2546316067532511e-05, + "loss": 1.1386, + "step": 72435 + }, + { + "epoch": 0.87, + "grad_norm": 163.4707079004434, + "learning_rate": 1.2545751140750408e-05, + "loss": 1.3743, + "step": 72438 + }, + { + "epoch": 0.87, + "grad_norm": 10.059084409478354, + "learning_rate": 1.2545186205280576e-05, + "loss": 1.748, + "step": 72441 + }, + { + "epoch": 0.87, + "grad_norm": 7.7483138227431585, + "learning_rate": 1.2544621261124934e-05, + "loss": 1.4403, + "step": 72444 + }, + { + "epoch": 0.87, + "grad_norm": 35.97440288460584, + "learning_rate": 1.2544056308285422e-05, + "loss": 1.1031, + "step": 72447 + }, + { + "epoch": 0.87, + "grad_norm": 15.442404105120401, + "learning_rate": 1.2543491346763959e-05, + "loss": 1.2521, + "step": 72450 + }, + { + "epoch": 0.87, + "grad_norm": 9.538006685967218, + "learning_rate": 1.2542926376562477e-05, + "loss": 1.3395, + "step": 72453 + }, + { + "epoch": 0.87, + "grad_norm": 2.8434812376814107, + "learning_rate": 1.2542361397682901e-05, + "loss": 1.5944, + "step": 72456 + }, + { + "epoch": 0.87, + "grad_norm": 12.617322669615067, + "learning_rate": 1.2541796410127164e-05, + "loss": 1.2412, + "step": 72459 + }, + { + "epoch": 0.87, + "grad_norm": 33.30576893707991, + "learning_rate": 1.2541231413897192e-05, + "loss": 1.3408, + "step": 72462 + }, + { + "epoch": 0.87, + "grad_norm": 52.46493148894334, + "learning_rate": 1.2540666408994909e-05, + "loss": 1.4653, + "step": 72465 + }, + { + "epoch": 0.87, + "grad_norm": 22.637530992412415, + "learning_rate": 1.254010139542225e-05, + "loss": 1.1539, + "step": 72468 + }, + { + "epoch": 0.87, + "grad_norm": 13.965806314380519, + "learning_rate": 1.253953637318114e-05, + "loss": 1.5704, + "step": 72471 + }, + { + "epoch": 0.87, + "grad_norm": 9.767102663482394, + "learning_rate": 1.2538971342273505e-05, + "loss": 1.477, + "step": 72474 + }, + { + "epoch": 0.87, + "grad_norm": 10.039971723665493, + "learning_rate": 1.2538406302701276e-05, + "loss": 1.5845, + "step": 72477 + }, + { + "epoch": 0.87, + "grad_norm": 15.289836323498672, + "learning_rate": 1.2537841254466381e-05, + "loss": 1.1329, + "step": 72480 + }, + { + "epoch": 0.87, + "grad_norm": 21.168877280841308, + "learning_rate": 1.2537276197570749e-05, + "loss": 1.0889, + "step": 72483 + }, + { + "epoch": 0.87, + "grad_norm": 23.9367396830072, + "learning_rate": 1.2536711132016305e-05, + "loss": 1.4113, + "step": 72486 + }, + { + "epoch": 0.87, + "grad_norm": 67.6417853840997, + "learning_rate": 1.253614605780498e-05, + "loss": 1.3808, + "step": 72489 + }, + { + "epoch": 0.87, + "grad_norm": 3.950067076148626, + "learning_rate": 1.2535580974938704e-05, + "loss": 1.2771, + "step": 72492 + }, + { + "epoch": 0.87, + "grad_norm": 23.59049921074317, + "learning_rate": 1.2535015883419401e-05, + "loss": 1.1708, + "step": 72495 + }, + { + "epoch": 0.87, + "grad_norm": 77.31776056016234, + "learning_rate": 1.2534450783249003e-05, + "loss": 0.9215, + "step": 72498 + }, + { + "epoch": 0.87, + "grad_norm": 11.670456515692818, + "learning_rate": 1.2533885674429438e-05, + "loss": 1.2155, + "step": 72501 + }, + { + "epoch": 0.87, + "grad_norm": 8.83980273299851, + "learning_rate": 1.2533320556962634e-05, + "loss": 1.4494, + "step": 72504 + }, + { + "epoch": 0.87, + "grad_norm": 12.647521678518656, + "learning_rate": 1.253275543085052e-05, + "loss": 1.4561, + "step": 72507 + }, + { + "epoch": 0.87, + "grad_norm": 21.64613746789931, + "learning_rate": 1.2532190296095021e-05, + "loss": 1.6224, + "step": 72510 + }, + { + "epoch": 0.87, + "grad_norm": 17.67575905093471, + "learning_rate": 1.253162515269807e-05, + "loss": 1.1537, + "step": 72513 + }, + { + "epoch": 0.87, + "grad_norm": 21.328803335535817, + "learning_rate": 1.2531060000661595e-05, + "loss": 1.4583, + "step": 72516 + }, + { + "epoch": 0.87, + "grad_norm": 2.896623040549609, + "learning_rate": 1.2530494839987522e-05, + "loss": 1.2528, + "step": 72519 + }, + { + "epoch": 0.87, + "grad_norm": 22.085452576384487, + "learning_rate": 1.2529929670677785e-05, + "loss": 1.345, + "step": 72522 + }, + { + "epoch": 0.87, + "grad_norm": 10.407868005271485, + "learning_rate": 1.252936449273431e-05, + "loss": 1.0803, + "step": 72525 + }, + { + "epoch": 0.87, + "grad_norm": 5.939843635151537, + "learning_rate": 1.252879930615902e-05, + "loss": 1.3355, + "step": 72528 + }, + { + "epoch": 0.87, + "grad_norm": 14.303353729138193, + "learning_rate": 1.2528234110953851e-05, + "loss": 1.5592, + "step": 72531 + }, + { + "epoch": 0.87, + "grad_norm": 5.099612027591676, + "learning_rate": 1.2527668907120729e-05, + "loss": 1.2071, + "step": 72534 + }, + { + "epoch": 0.87, + "grad_norm": 5.685688606245888, + "learning_rate": 1.2527103694661587e-05, + "loss": 1.4462, + "step": 72537 + }, + { + "epoch": 0.87, + "grad_norm": 7.439505842576901, + "learning_rate": 1.2526538473578347e-05, + "loss": 1.5997, + "step": 72540 + }, + { + "epoch": 0.87, + "grad_norm": 28.906887305363583, + "learning_rate": 1.2525973243872942e-05, + "loss": 1.3358, + "step": 72543 + }, + { + "epoch": 0.87, + "grad_norm": 6.745396829696995, + "learning_rate": 1.2525408005547299e-05, + "loss": 1.24, + "step": 72546 + }, + { + "epoch": 0.87, + "grad_norm": 7.322988088484403, + "learning_rate": 1.252484275860335e-05, + "loss": 1.0128, + "step": 72549 + }, + { + "epoch": 0.87, + "grad_norm": 10.7388239934766, + "learning_rate": 1.2524277503043022e-05, + "loss": 1.1454, + "step": 72552 + }, + { + "epoch": 0.87, + "grad_norm": 10.085635737680075, + "learning_rate": 1.2523712238868244e-05, + "loss": 1.5068, + "step": 72555 + }, + { + "epoch": 0.87, + "grad_norm": 7.949674436187838, + "learning_rate": 1.2523146966080946e-05, + "loss": 1.2095, + "step": 72558 + }, + { + "epoch": 0.87, + "grad_norm": 12.111511285952432, + "learning_rate": 1.2522581684683054e-05, + "loss": 1.4833, + "step": 72561 + }, + { + "epoch": 0.87, + "grad_norm": 10.695307730678664, + "learning_rate": 1.25220163946765e-05, + "loss": 1.3114, + "step": 72564 + }, + { + "epoch": 0.87, + "grad_norm": 13.738931297560217, + "learning_rate": 1.2521451096063212e-05, + "loss": 1.4099, + "step": 72567 + }, + { + "epoch": 0.87, + "grad_norm": 41.56973356286722, + "learning_rate": 1.252088578884512e-05, + "loss": 1.247, + "step": 72570 + }, + { + "epoch": 0.87, + "grad_norm": 3.1342700312982643, + "learning_rate": 1.2520320473024153e-05, + "loss": 1.3616, + "step": 72573 + }, + { + "epoch": 0.87, + "grad_norm": 6.6173316617739815, + "learning_rate": 1.2519755148602242e-05, + "loss": 1.5107, + "step": 72576 + }, + { + "epoch": 0.87, + "grad_norm": 11.226901250980767, + "learning_rate": 1.2519189815581311e-05, + "loss": 1.4012, + "step": 72579 + }, + { + "epoch": 0.87, + "grad_norm": 14.141880973846538, + "learning_rate": 1.2518624473963294e-05, + "loss": 1.3014, + "step": 72582 + }, + { + "epoch": 0.87, + "grad_norm": 6.369751997279821, + "learning_rate": 1.2518059123750119e-05, + "loss": 1.3029, + "step": 72585 + }, + { + "epoch": 0.87, + "grad_norm": 28.038957472240366, + "learning_rate": 1.2517493764943716e-05, + "loss": 1.0789, + "step": 72588 + }, + { + "epoch": 0.87, + "grad_norm": 18.905542875152953, + "learning_rate": 1.2516928397546013e-05, + "loss": 1.2805, + "step": 72591 + }, + { + "epoch": 0.87, + "grad_norm": 10.629655287997158, + "learning_rate": 1.2516363021558942e-05, + "loss": 1.3127, + "step": 72594 + }, + { + "epoch": 0.87, + "grad_norm": 16.737962645862936, + "learning_rate": 1.2515797636984426e-05, + "loss": 1.4923, + "step": 72597 + }, + { + "epoch": 0.87, + "grad_norm": 3.9300092347797113, + "learning_rate": 1.25152322438244e-05, + "loss": 1.1704, + "step": 72600 + }, + { + "epoch": 0.87, + "grad_norm": 14.65586621656754, + "learning_rate": 1.2514666842080796e-05, + "loss": 1.3031, + "step": 72603 + }, + { + "epoch": 0.87, + "grad_norm": 6.965240892909672, + "learning_rate": 1.2514101431755535e-05, + "loss": 1.3955, + "step": 72606 + }, + { + "epoch": 0.87, + "grad_norm": 14.784014869357623, + "learning_rate": 1.2513536012850554e-05, + "loss": 1.1471, + "step": 72609 + }, + { + "epoch": 0.87, + "grad_norm": 45.639605030967864, + "learning_rate": 1.2512970585367782e-05, + "loss": 1.2255, + "step": 72612 + }, + { + "epoch": 0.87, + "grad_norm": 23.564514546918495, + "learning_rate": 1.2512405149309143e-05, + "loss": 1.2652, + "step": 72615 + }, + { + "epoch": 0.87, + "grad_norm": 7.483764408542763, + "learning_rate": 1.251183970467657e-05, + "loss": 1.3236, + "step": 72618 + }, + { + "epoch": 0.87, + "grad_norm": 48.530025273714166, + "learning_rate": 1.2511274251471994e-05, + "loss": 1.3609, + "step": 72621 + }, + { + "epoch": 0.87, + "grad_norm": 12.974598759606979, + "learning_rate": 1.2510708789697345e-05, + "loss": 1.6277, + "step": 72624 + }, + { + "epoch": 0.87, + "grad_norm": 22.057972449916587, + "learning_rate": 1.2510143319354549e-05, + "loss": 1.8062, + "step": 72627 + }, + { + "epoch": 0.87, + "grad_norm": 17.094239308480255, + "learning_rate": 1.250957784044554e-05, + "loss": 1.583, + "step": 72630 + }, + { + "epoch": 0.87, + "grad_norm": 4.812585148473151, + "learning_rate": 1.2509012352972242e-05, + "loss": 1.1963, + "step": 72633 + }, + { + "epoch": 0.87, + "grad_norm": 16.643562053787452, + "learning_rate": 1.2508446856936593e-05, + "loss": 1.6302, + "step": 72636 + }, + { + "epoch": 0.87, + "grad_norm": 6.330633991182446, + "learning_rate": 1.2507881352340516e-05, + "loss": 1.3856, + "step": 72639 + }, + { + "epoch": 0.87, + "grad_norm": 3.5309328130181927, + "learning_rate": 1.2507315839185945e-05, + "loss": 1.5577, + "step": 72642 + }, + { + "epoch": 0.87, + "grad_norm": 30.12097512864121, + "learning_rate": 1.2506750317474808e-05, + "loss": 1.6274, + "step": 72645 + }, + { + "epoch": 0.87, + "grad_norm": 3.676012547327999, + "learning_rate": 1.2506184787209035e-05, + "loss": 1.541, + "step": 72648 + }, + { + "epoch": 0.87, + "grad_norm": 11.26338039356805, + "learning_rate": 1.2505619248390553e-05, + "loss": 1.4214, + "step": 72651 + }, + { + "epoch": 0.87, + "grad_norm": 6.2228449169765785, + "learning_rate": 1.2505053701021299e-05, + "loss": 1.1978, + "step": 72654 + }, + { + "epoch": 0.87, + "grad_norm": 8.566432221322486, + "learning_rate": 1.2504488145103197e-05, + "loss": 1.6088, + "step": 72657 + }, + { + "epoch": 0.87, + "grad_norm": 6.221225021410994, + "learning_rate": 1.2503922580638177e-05, + "loss": 1.0586, + "step": 72660 + }, + { + "epoch": 0.87, + "grad_norm": 3.505334531917571, + "learning_rate": 1.2503357007628174e-05, + "loss": 1.6575, + "step": 72663 + }, + { + "epoch": 0.87, + "grad_norm": 18.739159865230274, + "learning_rate": 1.2502791426075117e-05, + "loss": 1.6857, + "step": 72666 + }, + { + "epoch": 0.87, + "grad_norm": 12.425430307427773, + "learning_rate": 1.2502225835980933e-05, + "loss": 1.6864, + "step": 72669 + }, + { + "epoch": 0.87, + "grad_norm": 9.639080835180117, + "learning_rate": 1.2501660237347551e-05, + "loss": 1.3026, + "step": 72672 + }, + { + "epoch": 0.87, + "grad_norm": 5.430582157585007, + "learning_rate": 1.2501094630176907e-05, + "loss": 1.1533, + "step": 72675 + }, + { + "epoch": 0.87, + "grad_norm": 8.798650104102682, + "learning_rate": 1.2500529014470925e-05, + "loss": 1.2524, + "step": 72678 + }, + { + "epoch": 0.87, + "grad_norm": 26.933585084773835, + "learning_rate": 1.2499963390231537e-05, + "loss": 1.3032, + "step": 72681 + }, + { + "epoch": 0.87, + "grad_norm": 8.190429394625248, + "learning_rate": 1.2499397757460678e-05, + "loss": 1.4935, + "step": 72684 + }, + { + "epoch": 0.87, + "grad_norm": 7.456482383739191, + "learning_rate": 1.2498832116160276e-05, + "loss": 1.0304, + "step": 72687 + }, + { + "epoch": 0.87, + "grad_norm": 9.578596426604523, + "learning_rate": 1.2498266466332258e-05, + "loss": 1.212, + "step": 72690 + }, + { + "epoch": 0.87, + "grad_norm": 14.447376461404806, + "learning_rate": 1.2497700807978554e-05, + "loss": 1.2585, + "step": 72693 + }, + { + "epoch": 0.87, + "grad_norm": 37.079504976302715, + "learning_rate": 1.2497135141101101e-05, + "loss": 1.1617, + "step": 72696 + }, + { + "epoch": 0.87, + "grad_norm": 14.336400386933791, + "learning_rate": 1.2496569465701821e-05, + "loss": 1.3933, + "step": 72699 + }, + { + "epoch": 0.87, + "grad_norm": 17.799385680928403, + "learning_rate": 1.2496003781782654e-05, + "loss": 1.15, + "step": 72702 + }, + { + "epoch": 0.87, + "grad_norm": 3.3369517330857805, + "learning_rate": 1.249543808934552e-05, + "loss": 1.8743, + "step": 72705 + }, + { + "epoch": 0.87, + "grad_norm": 6.675567094517696, + "learning_rate": 1.2494872388392356e-05, + "loss": 1.3316, + "step": 72708 + }, + { + "epoch": 0.87, + "grad_norm": 6.970217060771754, + "learning_rate": 1.2494306678925093e-05, + "loss": 1.464, + "step": 72711 + }, + { + "epoch": 0.87, + "grad_norm": 28.959284394149805, + "learning_rate": 1.2493740960945658e-05, + "loss": 1.1386, + "step": 72714 + }, + { + "epoch": 0.87, + "grad_norm": 4.990585646349572, + "learning_rate": 1.2493175234455985e-05, + "loss": 1.168, + "step": 72717 + }, + { + "epoch": 0.87, + "grad_norm": 72.5810097078325, + "learning_rate": 1.2492609499458001e-05, + "loss": 1.5203, + "step": 72720 + }, + { + "epoch": 0.87, + "grad_norm": 26.82323872214275, + "learning_rate": 1.249204375595364e-05, + "loss": 1.3002, + "step": 72723 + }, + { + "epoch": 0.87, + "grad_norm": 11.293850351645249, + "learning_rate": 1.2491478003944831e-05, + "loss": 1.5634, + "step": 72726 + }, + { + "epoch": 0.87, + "grad_norm": 16.352844537038546, + "learning_rate": 1.2490912243433507e-05, + "loss": 1.2194, + "step": 72729 + }, + { + "epoch": 0.87, + "grad_norm": 154.96831141653962, + "learning_rate": 1.2490346474421592e-05, + "loss": 1.5175, + "step": 72732 + }, + { + "epoch": 0.87, + "grad_norm": 11.360649768991383, + "learning_rate": 1.2489780696911022e-05, + "loss": 1.5065, + "step": 72735 + }, + { + "epoch": 0.87, + "grad_norm": 85.81250910776728, + "learning_rate": 1.2489214910903733e-05, + "loss": 1.4145, + "step": 72738 + }, + { + "epoch": 0.87, + "grad_norm": 118.93710133066139, + "learning_rate": 1.2488649116401646e-05, + "loss": 1.1849, + "step": 72741 + }, + { + "epoch": 0.87, + "grad_norm": 19.226236318924393, + "learning_rate": 1.2488083313406695e-05, + "loss": 1.4506, + "step": 72744 + }, + { + "epoch": 0.87, + "grad_norm": 10.671421377753877, + "learning_rate": 1.2487517501920811e-05, + "loss": 1.3692, + "step": 72747 + }, + { + "epoch": 0.87, + "grad_norm": 15.855263751591268, + "learning_rate": 1.2486951681945931e-05, + "loss": 1.2349, + "step": 72750 + }, + { + "epoch": 0.87, + "grad_norm": 14.95572544837717, + "learning_rate": 1.2486385853483978e-05, + "loss": 1.3569, + "step": 72753 + }, + { + "epoch": 0.87, + "grad_norm": 11.13041904897371, + "learning_rate": 1.2485820016536885e-05, + "loss": 1.2375, + "step": 72756 + }, + { + "epoch": 0.87, + "grad_norm": 20.485233585621593, + "learning_rate": 1.2485254171106584e-05, + "loss": 1.4765, + "step": 72759 + }, + { + "epoch": 0.87, + "grad_norm": 7.646643190808668, + "learning_rate": 1.2484688317195006e-05, + "loss": 1.3265, + "step": 72762 + }, + { + "epoch": 0.87, + "grad_norm": 6.134391628096428, + "learning_rate": 1.2484122454804082e-05, + "loss": 1.2637, + "step": 72765 + }, + { + "epoch": 0.88, + "grad_norm": 43.35803516544088, + "learning_rate": 1.248355658393574e-05, + "loss": 1.6002, + "step": 72768 + }, + { + "epoch": 0.88, + "grad_norm": 4.464884482492112, + "learning_rate": 1.248299070459192e-05, + "loss": 1.4507, + "step": 72771 + }, + { + "epoch": 0.88, + "grad_norm": 6.047300280762051, + "learning_rate": 1.2482424816774542e-05, + "loss": 1.832, + "step": 72774 + }, + { + "epoch": 0.88, + "grad_norm": 36.26028700168088, + "learning_rate": 1.2481858920485544e-05, + "loss": 1.6899, + "step": 72777 + }, + { + "epoch": 0.88, + "grad_norm": 17.318440653393807, + "learning_rate": 1.2481293015726853e-05, + "loss": 1.3783, + "step": 72780 + }, + { + "epoch": 0.88, + "grad_norm": 29.472115539659047, + "learning_rate": 1.2480727102500407e-05, + "loss": 1.207, + "step": 72783 + }, + { + "epoch": 0.88, + "grad_norm": 9.312340119050022, + "learning_rate": 1.248016118080813e-05, + "loss": 1.3917, + "step": 72786 + }, + { + "epoch": 0.88, + "grad_norm": 10.767702721886986, + "learning_rate": 1.2479595250651956e-05, + "loss": 1.3755, + "step": 72789 + }, + { + "epoch": 0.88, + "grad_norm": 10.666796159598357, + "learning_rate": 1.2479029312033819e-05, + "loss": 1.1579, + "step": 72792 + }, + { + "epoch": 0.88, + "grad_norm": 16.86748593922053, + "learning_rate": 1.2478463364955646e-05, + "loss": 1.5692, + "step": 72795 + }, + { + "epoch": 0.88, + "grad_norm": 13.823088801837866, + "learning_rate": 1.247789740941937e-05, + "loss": 1.1376, + "step": 72798 + }, + { + "epoch": 0.88, + "grad_norm": 5.866997878342316, + "learning_rate": 1.2477331445426924e-05, + "loss": 1.2724, + "step": 72801 + }, + { + "epoch": 0.88, + "grad_norm": 28.992058320009107, + "learning_rate": 1.247676547298024e-05, + "loss": 1.2896, + "step": 72804 + }, + { + "epoch": 0.88, + "grad_norm": 16.668606026498313, + "learning_rate": 1.2476199492081244e-05, + "loss": 1.2999, + "step": 72807 + }, + { + "epoch": 0.88, + "grad_norm": 7.558121119873989, + "learning_rate": 1.2475633502731872e-05, + "loss": 1.3093, + "step": 72810 + }, + { + "epoch": 0.88, + "grad_norm": 29.26377359814005, + "learning_rate": 1.2475067504934055e-05, + "loss": 0.9938, + "step": 72813 + }, + { + "epoch": 0.88, + "grad_norm": 12.184872957715335, + "learning_rate": 1.2474501498689721e-05, + "loss": 1.2595, + "step": 72816 + }, + { + "epoch": 0.88, + "grad_norm": 2.585637710846543, + "learning_rate": 1.247393548400081e-05, + "loss": 1.1222, + "step": 72819 + }, + { + "epoch": 0.88, + "grad_norm": 11.01911403175581, + "learning_rate": 1.2473369460869244e-05, + "loss": 1.5369, + "step": 72822 + }, + { + "epoch": 0.88, + "grad_norm": 8.687230147088194, + "learning_rate": 1.2472803429296965e-05, + "loss": 1.228, + "step": 72825 + }, + { + "epoch": 0.88, + "grad_norm": 2.9942418591203808, + "learning_rate": 1.2472237389285892e-05, + "loss": 1.2115, + "step": 72828 + }, + { + "epoch": 0.88, + "grad_norm": 8.879348929607794, + "learning_rate": 1.2471671340837966e-05, + "loss": 1.0661, + "step": 72831 + }, + { + "epoch": 0.88, + "grad_norm": 6.817807852932026, + "learning_rate": 1.2471105283955116e-05, + "loss": 1.6889, + "step": 72834 + }, + { + "epoch": 0.88, + "grad_norm": 20.66365644117694, + "learning_rate": 1.2470539218639275e-05, + "loss": 1.2233, + "step": 72837 + }, + { + "epoch": 0.88, + "grad_norm": 14.307044603022934, + "learning_rate": 1.2469973144892369e-05, + "loss": 1.925, + "step": 72840 + }, + { + "epoch": 0.88, + "grad_norm": 6.488241235089305, + "learning_rate": 1.2469407062716337e-05, + "loss": 1.1137, + "step": 72843 + }, + { + "epoch": 0.88, + "grad_norm": 52.434074474823774, + "learning_rate": 1.2468840972113113e-05, + "loss": 1.55, + "step": 72846 + }, + { + "epoch": 0.88, + "grad_norm": 39.205421287088576, + "learning_rate": 1.2468274873084616e-05, + "loss": 1.3593, + "step": 72849 + }, + { + "epoch": 0.88, + "grad_norm": 17.413146399731733, + "learning_rate": 1.246770876563279e-05, + "loss": 1.1006, + "step": 72852 + }, + { + "epoch": 0.88, + "grad_norm": 23.899751865172554, + "learning_rate": 1.2467142649759563e-05, + "loss": 1.119, + "step": 72855 + }, + { + "epoch": 0.88, + "grad_norm": 12.870288504186954, + "learning_rate": 1.2466576525466866e-05, + "loss": 1.3091, + "step": 72858 + }, + { + "epoch": 0.88, + "grad_norm": 98.0443953442231, + "learning_rate": 1.2466010392756631e-05, + "loss": 1.3328, + "step": 72861 + }, + { + "epoch": 0.88, + "grad_norm": 17.83482566165204, + "learning_rate": 1.2465444251630791e-05, + "loss": 1.1807, + "step": 72864 + }, + { + "epoch": 0.88, + "grad_norm": 4.35202444486909, + "learning_rate": 1.246487810209128e-05, + "loss": 1.4255, + "step": 72867 + }, + { + "epoch": 0.88, + "grad_norm": 15.284815645944905, + "learning_rate": 1.2464311944140027e-05, + "loss": 1.1461, + "step": 72870 + }, + { + "epoch": 0.88, + "grad_norm": 19.78071167369147, + "learning_rate": 1.2463745777778965e-05, + "loss": 1.4318, + "step": 72873 + }, + { + "epoch": 0.88, + "grad_norm": 19.541242697144405, + "learning_rate": 1.2463179603010025e-05, + "loss": 1.0584, + "step": 72876 + }, + { + "epoch": 0.88, + "grad_norm": 18.166443972752486, + "learning_rate": 1.2462613419835142e-05, + "loss": 0.798, + "step": 72879 + }, + { + "epoch": 0.88, + "grad_norm": 8.814981203727234, + "learning_rate": 1.2462047228256242e-05, + "loss": 1.3201, + "step": 72882 + }, + { + "epoch": 0.88, + "grad_norm": 14.633547447939748, + "learning_rate": 1.2461481028275266e-05, + "loss": 1.1504, + "step": 72885 + }, + { + "epoch": 0.88, + "grad_norm": 5.6069099702066305, + "learning_rate": 1.2460914819894142e-05, + "loss": 1.4563, + "step": 72888 + }, + { + "epoch": 0.88, + "grad_norm": 9.702298152069469, + "learning_rate": 1.24603486031148e-05, + "loss": 1.3208, + "step": 72891 + }, + { + "epoch": 0.88, + "grad_norm": 13.087175613926107, + "learning_rate": 1.2459782377939174e-05, + "loss": 1.1734, + "step": 72894 + }, + { + "epoch": 0.88, + "grad_norm": 76.35343727111963, + "learning_rate": 1.2459216144369196e-05, + "loss": 1.4784, + "step": 72897 + }, + { + "epoch": 0.88, + "grad_norm": 3.993876109471871, + "learning_rate": 1.2458649902406803e-05, + "loss": 0.9313, + "step": 72900 + }, + { + "epoch": 0.88, + "grad_norm": 7.446960712335193, + "learning_rate": 1.245808365205392e-05, + "loss": 1.3399, + "step": 72903 + }, + { + "epoch": 0.88, + "grad_norm": 15.189069806486282, + "learning_rate": 1.2457517393312485e-05, + "loss": 1.5478, + "step": 72906 + }, + { + "epoch": 0.88, + "grad_norm": 11.759482792312893, + "learning_rate": 1.2456951126184428e-05, + "loss": 1.0726, + "step": 72909 + }, + { + "epoch": 0.88, + "grad_norm": 13.678503762504512, + "learning_rate": 1.2456384850671683e-05, + "loss": 1.4586, + "step": 72912 + }, + { + "epoch": 0.88, + "grad_norm": 10.610418248531568, + "learning_rate": 1.2455818566776179e-05, + "loss": 1.4385, + "step": 72915 + }, + { + "epoch": 0.88, + "grad_norm": 18.19555133890319, + "learning_rate": 1.2455252274499852e-05, + "loss": 1.2361, + "step": 72918 + }, + { + "epoch": 0.88, + "grad_norm": 11.559259124645154, + "learning_rate": 1.2454685973844634e-05, + "loss": 1.4168, + "step": 72921 + }, + { + "epoch": 0.88, + "grad_norm": 8.650824847050043, + "learning_rate": 1.2454119664812454e-05, + "loss": 1.3502, + "step": 72924 + }, + { + "epoch": 0.88, + "grad_norm": 14.539239872115251, + "learning_rate": 1.245355334740525e-05, + "loss": 1.526, + "step": 72927 + }, + { + "epoch": 0.88, + "grad_norm": 16.385922206388912, + "learning_rate": 1.2452987021624951e-05, + "loss": 1.3627, + "step": 72930 + }, + { + "epoch": 0.88, + "grad_norm": 24.362656497919915, + "learning_rate": 1.2452420687473495e-05, + "loss": 1.0852, + "step": 72933 + }, + { + "epoch": 0.88, + "grad_norm": 9.046953526184959, + "learning_rate": 1.2451854344952805e-05, + "loss": 1.5731, + "step": 72936 + }, + { + "epoch": 0.88, + "grad_norm": 5.956007193866554, + "learning_rate": 1.2451287994064821e-05, + "loss": 1.1944, + "step": 72939 + }, + { + "epoch": 0.88, + "grad_norm": 28.838004417283855, + "learning_rate": 1.2450721634811475e-05, + "loss": 1.1545, + "step": 72942 + }, + { + "epoch": 0.88, + "grad_norm": 21.53817165151227, + "learning_rate": 1.2450155267194698e-05, + "loss": 0.9189, + "step": 72945 + }, + { + "epoch": 0.88, + "grad_norm": 13.322970466020044, + "learning_rate": 1.2449588891216423e-05, + "loss": 1.1694, + "step": 72948 + }, + { + "epoch": 0.88, + "grad_norm": 16.276823609349503, + "learning_rate": 1.2449022506878586e-05, + "loss": 1.2095, + "step": 72951 + }, + { + "epoch": 0.88, + "grad_norm": 6.231304627362016, + "learning_rate": 1.2448456114183117e-05, + "loss": 1.2497, + "step": 72954 + }, + { + "epoch": 0.88, + "grad_norm": 15.227498127816936, + "learning_rate": 1.2447889713131944e-05, + "loss": 1.4132, + "step": 72957 + }, + { + "epoch": 0.88, + "grad_norm": 20.10764058736445, + "learning_rate": 1.244732330372701e-05, + "loss": 1.1157, + "step": 72960 + }, + { + "epoch": 0.88, + "grad_norm": 19.505932309951636, + "learning_rate": 1.2446756885970244e-05, + "loss": 1.2681, + "step": 72963 + }, + { + "epoch": 0.88, + "grad_norm": 18.542844899602187, + "learning_rate": 1.2446190459863576e-05, + "loss": 1.3373, + "step": 72966 + }, + { + "epoch": 0.88, + "grad_norm": 11.845740345075757, + "learning_rate": 1.244562402540894e-05, + "loss": 1.4476, + "step": 72969 + }, + { + "epoch": 0.88, + "grad_norm": 5.685362658682823, + "learning_rate": 1.2445057582608271e-05, + "loss": 1.2748, + "step": 72972 + }, + { + "epoch": 0.88, + "grad_norm": 27.21673851823224, + "learning_rate": 1.2444491131463503e-05, + "loss": 1.1648, + "step": 72975 + }, + { + "epoch": 0.88, + "grad_norm": 6.868483596976782, + "learning_rate": 1.2443924671976564e-05, + "loss": 1.4658, + "step": 72978 + }, + { + "epoch": 0.88, + "grad_norm": 41.63176253685119, + "learning_rate": 1.2443358204149391e-05, + "loss": 1.3938, + "step": 72981 + }, + { + "epoch": 0.88, + "grad_norm": 15.018381054097858, + "learning_rate": 1.2442791727983918e-05, + "loss": 1.3392, + "step": 72984 + }, + { + "epoch": 0.88, + "grad_norm": 12.356356901056612, + "learning_rate": 1.2442225243482078e-05, + "loss": 1.1622, + "step": 72987 + }, + { + "epoch": 0.88, + "grad_norm": 27.247684571997333, + "learning_rate": 1.24416587506458e-05, + "loss": 1.1586, + "step": 72990 + }, + { + "epoch": 0.88, + "grad_norm": 4.163986563117845, + "learning_rate": 1.2441092249477022e-05, + "loss": 1.2944, + "step": 72993 + }, + { + "epoch": 0.88, + "grad_norm": 24.708640045688494, + "learning_rate": 1.2440525739977675e-05, + "loss": 1.4071, + "step": 72996 + }, + { + "epoch": 0.88, + "grad_norm": 26.747988201210998, + "learning_rate": 1.2439959222149694e-05, + "loss": 1.7091, + "step": 72999 + }, + { + "epoch": 0.88, + "grad_norm": 9.84310997033373, + "learning_rate": 1.2439392695995007e-05, + "loss": 1.3666, + "step": 73002 + }, + { + "epoch": 0.88, + "grad_norm": 12.441057621273057, + "learning_rate": 1.2438826161515556e-05, + "loss": 1.505, + "step": 73005 + }, + { + "epoch": 0.88, + "grad_norm": 7.3931663993491075, + "learning_rate": 1.2438259618713268e-05, + "loss": 1.4811, + "step": 73008 + }, + { + "epoch": 0.88, + "grad_norm": 22.549314039662534, + "learning_rate": 1.2437693067590077e-05, + "loss": 1.3465, + "step": 73011 + }, + { + "epoch": 0.88, + "grad_norm": 7.943156126784637, + "learning_rate": 1.2437126508147919e-05, + "loss": 1.1201, + "step": 73014 + }, + { + "epoch": 0.88, + "grad_norm": 18.746889175824304, + "learning_rate": 1.2436559940388726e-05, + "loss": 1.3722, + "step": 73017 + }, + { + "epoch": 0.88, + "grad_norm": 8.88700966561594, + "learning_rate": 1.2435993364314432e-05, + "loss": 1.3464, + "step": 73020 + }, + { + "epoch": 0.88, + "grad_norm": 18.107200400795247, + "learning_rate": 1.243542677992697e-05, + "loss": 1.3619, + "step": 73023 + }, + { + "epoch": 0.88, + "grad_norm": 9.087066287356059, + "learning_rate": 1.2434860187228272e-05, + "loss": 1.2465, + "step": 73026 + }, + { + "epoch": 0.88, + "grad_norm": 6.844028098471071, + "learning_rate": 1.2434293586220274e-05, + "loss": 1.2884, + "step": 73029 + }, + { + "epoch": 0.88, + "grad_norm": 8.785089181045869, + "learning_rate": 1.2433726976904908e-05, + "loss": 1.3718, + "step": 73032 + }, + { + "epoch": 0.88, + "grad_norm": 4.966052143206714, + "learning_rate": 1.243316035928411e-05, + "loss": 1.3655, + "step": 73035 + }, + { + "epoch": 0.88, + "grad_norm": 9.665512576892185, + "learning_rate": 1.2432593733359812e-05, + "loss": 1.1892, + "step": 73038 + }, + { + "epoch": 0.88, + "grad_norm": 6.8814166233476755, + "learning_rate": 1.2432027099133947e-05, + "loss": 1.4189, + "step": 73041 + }, + { + "epoch": 0.88, + "grad_norm": 7.264321432007085, + "learning_rate": 1.243146045660845e-05, + "loss": 1.5295, + "step": 73044 + }, + { + "epoch": 0.88, + "grad_norm": 18.161294442658505, + "learning_rate": 1.2430893805785256e-05, + "loss": 1.1271, + "step": 73047 + }, + { + "epoch": 0.88, + "grad_norm": 8.038200718409223, + "learning_rate": 1.2430327146666295e-05, + "loss": 1.4783, + "step": 73050 + }, + { + "epoch": 0.88, + "grad_norm": 19.646830143435807, + "learning_rate": 1.2429760479253503e-05, + "loss": 1.209, + "step": 73053 + }, + { + "epoch": 0.88, + "grad_norm": 2.9168975919892843, + "learning_rate": 1.2429193803548812e-05, + "loss": 1.3582, + "step": 73056 + }, + { + "epoch": 0.88, + "grad_norm": 11.069755529539583, + "learning_rate": 1.2428627119554159e-05, + "loss": 1.5962, + "step": 73059 + }, + { + "epoch": 0.88, + "grad_norm": 41.730443870774295, + "learning_rate": 1.2428060427271478e-05, + "loss": 1.4442, + "step": 73062 + }, + { + "epoch": 0.88, + "grad_norm": 17.175510945270627, + "learning_rate": 1.24274937267027e-05, + "loss": 1.5815, + "step": 73065 + }, + { + "epoch": 0.88, + "grad_norm": 20.897206524244357, + "learning_rate": 1.242692701784976e-05, + "loss": 1.1705, + "step": 73068 + }, + { + "epoch": 0.88, + "grad_norm": 9.884872556761525, + "learning_rate": 1.2426360300714593e-05, + "loss": 1.4825, + "step": 73071 + }, + { + "epoch": 0.88, + "grad_norm": 9.638099791647393, + "learning_rate": 1.2425793575299133e-05, + "loss": 1.1722, + "step": 73074 + }, + { + "epoch": 0.88, + "grad_norm": 5.159954652058385, + "learning_rate": 1.2425226841605311e-05, + "loss": 1.1427, + "step": 73077 + }, + { + "epoch": 0.88, + "grad_norm": 17.954023097978837, + "learning_rate": 1.2424660099635065e-05, + "loss": 1.2289, + "step": 73080 + }, + { + "epoch": 0.88, + "grad_norm": 12.809344038804037, + "learning_rate": 1.2424093349390328e-05, + "loss": 1.2242, + "step": 73083 + }, + { + "epoch": 0.88, + "grad_norm": 50.383932140535826, + "learning_rate": 1.242352659087303e-05, + "loss": 1.6582, + "step": 73086 + }, + { + "epoch": 0.88, + "grad_norm": 11.747069620749114, + "learning_rate": 1.2422959824085113e-05, + "loss": 1.1876, + "step": 73089 + }, + { + "epoch": 0.88, + "grad_norm": 18.762973749069754, + "learning_rate": 1.2422393049028506e-05, + "loss": 1.3795, + "step": 73092 + }, + { + "epoch": 0.88, + "grad_norm": 47.16803792122129, + "learning_rate": 1.2421826265705143e-05, + "loss": 1.2777, + "step": 73095 + }, + { + "epoch": 0.88, + "grad_norm": 23.277863720467792, + "learning_rate": 1.2421259474116958e-05, + "loss": 1.5377, + "step": 73098 + }, + { + "epoch": 0.88, + "grad_norm": 6.507282768150901, + "learning_rate": 1.2420692674265888e-05, + "loss": 1.1835, + "step": 73101 + }, + { + "epoch": 0.88, + "grad_norm": 6.629332815262016, + "learning_rate": 1.2420125866153867e-05, + "loss": 1.5365, + "step": 73104 + }, + { + "epoch": 0.88, + "grad_norm": 18.059025707853884, + "learning_rate": 1.2419559049782828e-05, + "loss": 1.7651, + "step": 73107 + }, + { + "epoch": 0.88, + "grad_norm": 3.79764369372483, + "learning_rate": 1.2418992225154702e-05, + "loss": 1.6995, + "step": 73110 + }, + { + "epoch": 0.88, + "grad_norm": 5.2206442526365695, + "learning_rate": 1.2418425392271432e-05, + "loss": 1.6639, + "step": 73113 + }, + { + "epoch": 0.88, + "grad_norm": 75.78853228046901, + "learning_rate": 1.2417858551134945e-05, + "loss": 1.0533, + "step": 73116 + }, + { + "epoch": 0.88, + "grad_norm": 31.59264412519004, + "learning_rate": 1.2417291701747174e-05, + "loss": 1.2469, + "step": 73119 + }, + { + "epoch": 0.88, + "grad_norm": 15.092269521758247, + "learning_rate": 1.2416724844110062e-05, + "loss": 1.0255, + "step": 73122 + }, + { + "epoch": 0.88, + "grad_norm": 11.943985208932695, + "learning_rate": 1.2416157978225536e-05, + "loss": 1.3319, + "step": 73125 + }, + { + "epoch": 0.88, + "grad_norm": 5.906728845936556, + "learning_rate": 1.2415591104095535e-05, + "loss": 1.1943, + "step": 73128 + }, + { + "epoch": 0.88, + "grad_norm": 39.71955019496662, + "learning_rate": 1.241502422172199e-05, + "loss": 1.6601, + "step": 73131 + }, + { + "epoch": 0.88, + "grad_norm": 12.429888846930151, + "learning_rate": 1.241445733110684e-05, + "loss": 1.8251, + "step": 73134 + }, + { + "epoch": 0.88, + "grad_norm": 11.828600208413237, + "learning_rate": 1.2413890432252013e-05, + "loss": 1.1573, + "step": 73137 + }, + { + "epoch": 0.88, + "grad_norm": 4.036795853773928, + "learning_rate": 1.2413323525159448e-05, + "loss": 1.2189, + "step": 73140 + }, + { + "epoch": 0.88, + "grad_norm": 13.813746015806387, + "learning_rate": 1.2412756609831083e-05, + "loss": 1.2931, + "step": 73143 + }, + { + "epoch": 0.88, + "grad_norm": 13.39579277371603, + "learning_rate": 1.2412189686268844e-05, + "loss": 1.5022, + "step": 73146 + }, + { + "epoch": 0.88, + "grad_norm": 10.366650204433888, + "learning_rate": 1.2411622754474676e-05, + "loss": 1.4799, + "step": 73149 + }, + { + "epoch": 0.88, + "grad_norm": 19.28930931329395, + "learning_rate": 1.2411055814450501e-05, + "loss": 1.2145, + "step": 73152 + }, + { + "epoch": 0.88, + "grad_norm": 13.011449150255682, + "learning_rate": 1.2410488866198267e-05, + "loss": 1.389, + "step": 73155 + }, + { + "epoch": 0.88, + "grad_norm": 23.28378520396615, + "learning_rate": 1.24099219097199e-05, + "loss": 1.3506, + "step": 73158 + }, + { + "epoch": 0.88, + "grad_norm": 6.92571110368025, + "learning_rate": 1.2409354945017338e-05, + "loss": 1.0816, + "step": 73161 + }, + { + "epoch": 0.88, + "grad_norm": 13.852475255370528, + "learning_rate": 1.2408787972092515e-05, + "loss": 1.3391, + "step": 73164 + }, + { + "epoch": 0.88, + "grad_norm": 5.350793400197045, + "learning_rate": 1.2408220990947366e-05, + "loss": 1.371, + "step": 73167 + }, + { + "epoch": 0.88, + "grad_norm": 11.207853231176989, + "learning_rate": 1.2407654001583829e-05, + "loss": 1.2402, + "step": 73170 + }, + { + "epoch": 0.88, + "grad_norm": 16.48269686554874, + "learning_rate": 1.2407087004003833e-05, + "loss": 1.4985, + "step": 73173 + }, + { + "epoch": 0.88, + "grad_norm": 34.168448813164666, + "learning_rate": 1.2406519998209318e-05, + "loss": 1.1538, + "step": 73176 + }, + { + "epoch": 0.88, + "grad_norm": 7.729946177225208, + "learning_rate": 1.2405952984202215e-05, + "loss": 1.4385, + "step": 73179 + }, + { + "epoch": 0.88, + "grad_norm": 19.52270831052387, + "learning_rate": 1.2405385961984463e-05, + "loss": 1.6121, + "step": 73182 + }, + { + "epoch": 0.88, + "grad_norm": 47.87557947753742, + "learning_rate": 1.2404818931557994e-05, + "loss": 0.9983, + "step": 73185 + }, + { + "epoch": 0.88, + "grad_norm": 11.076113166097228, + "learning_rate": 1.2404251892924745e-05, + "loss": 1.402, + "step": 73188 + }, + { + "epoch": 0.88, + "grad_norm": 7.783775332338169, + "learning_rate": 1.240368484608665e-05, + "loss": 1.4961, + "step": 73191 + }, + { + "epoch": 0.88, + "grad_norm": 2.398398243788334, + "learning_rate": 1.2403117791045643e-05, + "loss": 1.1595, + "step": 73194 + }, + { + "epoch": 0.88, + "grad_norm": 40.658811431034216, + "learning_rate": 1.2402550727803662e-05, + "loss": 1.4423, + "step": 73197 + }, + { + "epoch": 0.88, + "grad_norm": 9.647880020275313, + "learning_rate": 1.2401983656362642e-05, + "loss": 1.1886, + "step": 73200 + }, + { + "epoch": 0.88, + "grad_norm": 51.43081558739332, + "learning_rate": 1.2401416576724517e-05, + "loss": 1.3355, + "step": 73203 + }, + { + "epoch": 0.88, + "grad_norm": 6.787263746204008, + "learning_rate": 1.2400849488891217e-05, + "loss": 1.2528, + "step": 73206 + }, + { + "epoch": 0.88, + "grad_norm": 6.594322606812946, + "learning_rate": 1.240028239286469e-05, + "loss": 1.3546, + "step": 73209 + }, + { + "epoch": 0.88, + "grad_norm": 2.9384799735530596, + "learning_rate": 1.2399715288646858e-05, + "loss": 1.5606, + "step": 73212 + }, + { + "epoch": 0.88, + "grad_norm": 3.979202861146471, + "learning_rate": 1.2399148176239666e-05, + "loss": 1.0247, + "step": 73215 + }, + { + "epoch": 0.88, + "grad_norm": 10.014160005810146, + "learning_rate": 1.2398581055645043e-05, + "loss": 1.637, + "step": 73218 + }, + { + "epoch": 0.88, + "grad_norm": 11.688034143077513, + "learning_rate": 1.2398013926864926e-05, + "loss": 1.4451, + "step": 73221 + }, + { + "epoch": 0.88, + "grad_norm": 26.64719699151644, + "learning_rate": 1.2397446789901254e-05, + "loss": 1.2662, + "step": 73224 + }, + { + "epoch": 0.88, + "grad_norm": 12.803153168238333, + "learning_rate": 1.2396879644755958e-05, + "loss": 1.5181, + "step": 73227 + }, + { + "epoch": 0.88, + "grad_norm": 16.476920660708736, + "learning_rate": 1.2396312491430977e-05, + "loss": 1.4391, + "step": 73230 + }, + { + "epoch": 0.88, + "grad_norm": 12.263589171909143, + "learning_rate": 1.239574532992824e-05, + "loss": 1.2276, + "step": 73233 + }, + { + "epoch": 0.88, + "grad_norm": 8.164830274553681, + "learning_rate": 1.2395178160249692e-05, + "loss": 1.2645, + "step": 73236 + }, + { + "epoch": 0.88, + "grad_norm": 5.464952664496858, + "learning_rate": 1.2394610982397261e-05, + "loss": 1.4128, + "step": 73239 + }, + { + "epoch": 0.88, + "grad_norm": 5.348690839602637, + "learning_rate": 1.2394043796372887e-05, + "loss": 1.3296, + "step": 73242 + }, + { + "epoch": 0.88, + "grad_norm": 6.898870828210687, + "learning_rate": 1.2393476602178503e-05, + "loss": 1.3813, + "step": 73245 + }, + { + "epoch": 0.88, + "grad_norm": 6.534425390413608, + "learning_rate": 1.2392909399816044e-05, + "loss": 1.296, + "step": 73248 + }, + { + "epoch": 0.88, + "grad_norm": 18.189651778085565, + "learning_rate": 1.2392342189287452e-05, + "loss": 1.3452, + "step": 73251 + }, + { + "epoch": 0.88, + "grad_norm": 16.87542890812915, + "learning_rate": 1.2391774970594652e-05, + "loss": 1.2191, + "step": 73254 + }, + { + "epoch": 0.88, + "grad_norm": 28.927810748380324, + "learning_rate": 1.2391207743739588e-05, + "loss": 1.1874, + "step": 73257 + }, + { + "epoch": 0.88, + "grad_norm": 31.22133376093573, + "learning_rate": 1.2390640508724192e-05, + "loss": 1.2034, + "step": 73260 + }, + { + "epoch": 0.88, + "grad_norm": 2.663415364581756, + "learning_rate": 1.2390073265550407e-05, + "loss": 1.1136, + "step": 73263 + }, + { + "epoch": 0.88, + "grad_norm": 19.855551728313007, + "learning_rate": 1.2389506014220155e-05, + "loss": 1.3221, + "step": 73266 + }, + { + "epoch": 0.88, + "grad_norm": 5.312598026561036, + "learning_rate": 1.2388938754735385e-05, + "loss": 1.3707, + "step": 73269 + }, + { + "epoch": 0.88, + "grad_norm": 12.891568918013784, + "learning_rate": 1.2388371487098025e-05, + "loss": 1.4874, + "step": 73272 + }, + { + "epoch": 0.88, + "grad_norm": 12.291118991651626, + "learning_rate": 1.2387804211310014e-05, + "loss": 1.4594, + "step": 73275 + }, + { + "epoch": 0.88, + "grad_norm": 11.363190545116984, + "learning_rate": 1.2387236927373284e-05, + "loss": 1.3947, + "step": 73278 + }, + { + "epoch": 0.88, + "grad_norm": 7.2065728693612074, + "learning_rate": 1.2386669635289779e-05, + "loss": 1.4796, + "step": 73281 + }, + { + "epoch": 0.88, + "grad_norm": 9.69529149206455, + "learning_rate": 1.238610233506143e-05, + "loss": 1.3897, + "step": 73284 + }, + { + "epoch": 0.88, + "grad_norm": 12.988107663916685, + "learning_rate": 1.238553502669017e-05, + "loss": 1.1937, + "step": 73287 + }, + { + "epoch": 0.88, + "grad_norm": 36.45377726344995, + "learning_rate": 1.2384967710177941e-05, + "loss": 1.3317, + "step": 73290 + }, + { + "epoch": 0.88, + "grad_norm": 7.635873770521982, + "learning_rate": 1.2384400385526676e-05, + "loss": 1.4293, + "step": 73293 + }, + { + "epoch": 0.88, + "grad_norm": 9.920395217535239, + "learning_rate": 1.238383305273831e-05, + "loss": 1.3837, + "step": 73296 + }, + { + "epoch": 0.88, + "grad_norm": 8.010735999464394, + "learning_rate": 1.238326571181478e-05, + "loss": 1.3169, + "step": 73299 + }, + { + "epoch": 0.88, + "grad_norm": 20.565993041681757, + "learning_rate": 1.2382698362758023e-05, + "loss": 1.6735, + "step": 73302 + }, + { + "epoch": 0.88, + "grad_norm": 6.0185080924183225, + "learning_rate": 1.2382131005569978e-05, + "loss": 1.3442, + "step": 73305 + }, + { + "epoch": 0.88, + "grad_norm": 11.780712635959484, + "learning_rate": 1.2381563640252575e-05, + "loss": 1.2977, + "step": 73308 + }, + { + "epoch": 0.88, + "grad_norm": 3.142977839353885, + "learning_rate": 1.2380996266807751e-05, + "loss": 1.1538, + "step": 73311 + }, + { + "epoch": 0.88, + "grad_norm": 6.132901170540214, + "learning_rate": 1.2380428885237448e-05, + "loss": 1.6082, + "step": 73314 + }, + { + "epoch": 0.88, + "grad_norm": 7.512704780670197, + "learning_rate": 1.23798614955436e-05, + "loss": 1.1781, + "step": 73317 + }, + { + "epoch": 0.88, + "grad_norm": 2.5688305445554303, + "learning_rate": 1.2379294097728136e-05, + "loss": 1.3116, + "step": 73320 + }, + { + "epoch": 0.88, + "grad_norm": 3.296845179319428, + "learning_rate": 1.2378726691793e-05, + "loss": 1.3286, + "step": 73323 + }, + { + "epoch": 0.88, + "grad_norm": 3.0982268159459454, + "learning_rate": 1.2378159277740132e-05, + "loss": 1.6982, + "step": 73326 + }, + { + "epoch": 0.88, + "grad_norm": 36.36554052510147, + "learning_rate": 1.2377591855571458e-05, + "loss": 1.2958, + "step": 73329 + }, + { + "epoch": 0.88, + "grad_norm": 10.444246621728311, + "learning_rate": 1.237702442528892e-05, + "loss": 1.5926, + "step": 73332 + }, + { + "epoch": 0.88, + "grad_norm": 7.168604637296101, + "learning_rate": 1.2376456986894455e-05, + "loss": 1.4307, + "step": 73335 + }, + { + "epoch": 0.88, + "grad_norm": 18.12274684608635, + "learning_rate": 1.2375889540389998e-05, + "loss": 1.2244, + "step": 73338 + }, + { + "epoch": 0.88, + "grad_norm": 30.545593130022983, + "learning_rate": 1.2375322085777483e-05, + "loss": 1.4978, + "step": 73341 + }, + { + "epoch": 0.88, + "grad_norm": 10.640028745830737, + "learning_rate": 1.2374754623058853e-05, + "loss": 1.2086, + "step": 73344 + }, + { + "epoch": 0.88, + "grad_norm": 10.608464590566973, + "learning_rate": 1.2374187152236041e-05, + "loss": 1.528, + "step": 73347 + }, + { + "epoch": 0.88, + "grad_norm": 14.165476290060417, + "learning_rate": 1.2373619673310984e-05, + "loss": 1.4926, + "step": 73350 + }, + { + "epoch": 0.88, + "grad_norm": 37.54711871224283, + "learning_rate": 1.2373052186285612e-05, + "loss": 1.5907, + "step": 73353 + }, + { + "epoch": 0.88, + "grad_norm": 21.700401087452665, + "learning_rate": 1.2372484691161875e-05, + "loss": 1.4192, + "step": 73356 + }, + { + "epoch": 0.88, + "grad_norm": 10.379494722516716, + "learning_rate": 1.2371917187941697e-05, + "loss": 1.435, + "step": 73359 + }, + { + "epoch": 0.88, + "grad_norm": 22.270081637281958, + "learning_rate": 1.2371349676627022e-05, + "loss": 1.4467, + "step": 73362 + }, + { + "epoch": 0.88, + "grad_norm": 3.84224112176038, + "learning_rate": 1.2370782157219785e-05, + "loss": 1.5784, + "step": 73365 + }, + { + "epoch": 0.88, + "grad_norm": 8.879998932669235, + "learning_rate": 1.2370214629721924e-05, + "loss": 1.3205, + "step": 73368 + }, + { + "epoch": 0.88, + "grad_norm": 26.21012626077752, + "learning_rate": 1.2369647094135374e-05, + "loss": 1.3124, + "step": 73371 + }, + { + "epoch": 0.88, + "grad_norm": 15.399715021980285, + "learning_rate": 1.2369079550462066e-05, + "loss": 1.3744, + "step": 73374 + }, + { + "epoch": 0.88, + "grad_norm": 3.8471045323784265, + "learning_rate": 1.236851199870395e-05, + "loss": 1.1674, + "step": 73377 + }, + { + "epoch": 0.88, + "grad_norm": 7.353438908531257, + "learning_rate": 1.2367944438862953e-05, + "loss": 1.4796, + "step": 73380 + }, + { + "epoch": 0.88, + "grad_norm": 4.9218022019794025, + "learning_rate": 1.2367376870941012e-05, + "loss": 1.4892, + "step": 73383 + }, + { + "epoch": 0.88, + "grad_norm": 14.894202904809632, + "learning_rate": 1.236680929494007e-05, + "loss": 1.532, + "step": 73386 + }, + { + "epoch": 0.88, + "grad_norm": 22.009884871538784, + "learning_rate": 1.2366241710862061e-05, + "loss": 1.1836, + "step": 73389 + }, + { + "epoch": 0.88, + "grad_norm": 14.078000501706606, + "learning_rate": 1.2365674118708921e-05, + "loss": 1.3869, + "step": 73392 + }, + { + "epoch": 0.88, + "grad_norm": 11.876294550537585, + "learning_rate": 1.2365106518482583e-05, + "loss": 1.1843, + "step": 73395 + }, + { + "epoch": 0.88, + "grad_norm": 6.871127280660534, + "learning_rate": 1.2364538910184994e-05, + "loss": 1.0913, + "step": 73398 + }, + { + "epoch": 0.88, + "grad_norm": 2.948188393709502, + "learning_rate": 1.2363971293818082e-05, + "loss": 1.584, + "step": 73401 + }, + { + "epoch": 0.88, + "grad_norm": 3.405773480089607, + "learning_rate": 1.236340366938379e-05, + "loss": 1.5548, + "step": 73404 + }, + { + "epoch": 0.88, + "grad_norm": 6.702354203696071, + "learning_rate": 1.236283603688405e-05, + "loss": 1.3529, + "step": 73407 + }, + { + "epoch": 0.88, + "grad_norm": 11.503031963296747, + "learning_rate": 1.2362268396320803e-05, + "loss": 1.3703, + "step": 73410 + }, + { + "epoch": 0.88, + "grad_norm": 35.721236656992296, + "learning_rate": 1.2361700747695983e-05, + "loss": 1.2131, + "step": 73413 + }, + { + "epoch": 0.88, + "grad_norm": 9.659313317678457, + "learning_rate": 1.236113309101153e-05, + "loss": 1.528, + "step": 73416 + }, + { + "epoch": 0.88, + "grad_norm": 19.132746671408153, + "learning_rate": 1.2360565426269382e-05, + "loss": 1.2918, + "step": 73419 + }, + { + "epoch": 0.88, + "grad_norm": 3.300567742414142, + "learning_rate": 1.2359997753471474e-05, + "loss": 1.261, + "step": 73422 + }, + { + "epoch": 0.88, + "grad_norm": 4.382115381132273, + "learning_rate": 1.2359430072619742e-05, + "loss": 1.3986, + "step": 73425 + }, + { + "epoch": 0.88, + "grad_norm": 12.460724978278074, + "learning_rate": 1.2358862383716126e-05, + "loss": 1.3801, + "step": 73428 + }, + { + "epoch": 0.88, + "grad_norm": 12.41756280272217, + "learning_rate": 1.2358294686762562e-05, + "loss": 1.6267, + "step": 73431 + }, + { + "epoch": 0.88, + "grad_norm": 15.670991781877595, + "learning_rate": 1.235772698176099e-05, + "loss": 1.8398, + "step": 73434 + }, + { + "epoch": 0.88, + "grad_norm": 11.76503925392025, + "learning_rate": 1.2357159268713339e-05, + "loss": 1.0796, + "step": 73437 + }, + { + "epoch": 0.88, + "grad_norm": 21.558982751761846, + "learning_rate": 1.2356591547621559e-05, + "loss": 1.466, + "step": 73440 + }, + { + "epoch": 0.88, + "grad_norm": 6.738373715879998, + "learning_rate": 1.2356023818487579e-05, + "loss": 1.3242, + "step": 73443 + }, + { + "epoch": 0.88, + "grad_norm": 6.065721041351441, + "learning_rate": 1.2355456081313337e-05, + "loss": 1.3838, + "step": 73446 + }, + { + "epoch": 0.88, + "grad_norm": 5.97294099089111, + "learning_rate": 1.2354888336100772e-05, + "loss": 1.7127, + "step": 73449 + }, + { + "epoch": 0.88, + "grad_norm": 7.399354956255622, + "learning_rate": 1.2354320582851822e-05, + "loss": 1.4115, + "step": 73452 + }, + { + "epoch": 0.88, + "grad_norm": 14.387203365349801, + "learning_rate": 1.2353752821568425e-05, + "loss": 1.3839, + "step": 73455 + }, + { + "epoch": 0.88, + "grad_norm": 4.5425883469119395, + "learning_rate": 1.2353185052252517e-05, + "loss": 0.9401, + "step": 73458 + }, + { + "epoch": 0.88, + "grad_norm": 8.688521668406162, + "learning_rate": 1.2352617274906033e-05, + "loss": 1.1819, + "step": 73461 + }, + { + "epoch": 0.88, + "grad_norm": 8.77390703055462, + "learning_rate": 1.2352049489530917e-05, + "loss": 1.3604, + "step": 73464 + }, + { + "epoch": 0.88, + "grad_norm": 2.377284529234241, + "learning_rate": 1.2351481696129101e-05, + "loss": 1.2983, + "step": 73467 + }, + { + "epoch": 0.88, + "grad_norm": 9.273266891151138, + "learning_rate": 1.2350913894702526e-05, + "loss": 1.3276, + "step": 73470 + }, + { + "epoch": 0.88, + "grad_norm": 9.544991665918605, + "learning_rate": 1.2350346085253131e-05, + "loss": 1.3709, + "step": 73473 + }, + { + "epoch": 0.88, + "grad_norm": 6.327456199410352, + "learning_rate": 1.2349778267782849e-05, + "loss": 1.3559, + "step": 73476 + }, + { + "epoch": 0.88, + "grad_norm": 7.111702794056244, + "learning_rate": 1.234921044229362e-05, + "loss": 1.1324, + "step": 73479 + }, + { + "epoch": 0.88, + "grad_norm": 6.411369984565012, + "learning_rate": 1.2348642608787383e-05, + "loss": 1.5959, + "step": 73482 + }, + { + "epoch": 0.88, + "grad_norm": 15.279348440909335, + "learning_rate": 1.2348074767266074e-05, + "loss": 1.5576, + "step": 73485 + }, + { + "epoch": 0.88, + "grad_norm": 13.99071330451756, + "learning_rate": 1.2347506917731631e-05, + "loss": 1.4316, + "step": 73488 + }, + { + "epoch": 0.88, + "grad_norm": 7.415950186412928, + "learning_rate": 1.2346939060185992e-05, + "loss": 1.5192, + "step": 73491 + }, + { + "epoch": 0.88, + "grad_norm": 12.85206006187545, + "learning_rate": 1.2346371194631097e-05, + "loss": 1.4717, + "step": 73494 + }, + { + "epoch": 0.88, + "grad_norm": 28.762051058745598, + "learning_rate": 1.2345803321068883e-05, + "loss": 1.2555, + "step": 73497 + }, + { + "epoch": 0.88, + "grad_norm": 24.040733087053326, + "learning_rate": 1.2345235439501287e-05, + "loss": 1.0526, + "step": 73500 + }, + { + "epoch": 0.88, + "grad_norm": 17.83271707575828, + "learning_rate": 1.2344667549930244e-05, + "loss": 1.4887, + "step": 73503 + }, + { + "epoch": 0.88, + "grad_norm": 7.876346199886205, + "learning_rate": 1.2344099652357698e-05, + "loss": 1.1655, + "step": 73506 + }, + { + "epoch": 0.88, + "grad_norm": 18.382747150122615, + "learning_rate": 1.2343531746785584e-05, + "loss": 1.2296, + "step": 73509 + }, + { + "epoch": 0.88, + "grad_norm": 6.030422243791523, + "learning_rate": 1.234296383321584e-05, + "loss": 0.8692, + "step": 73512 + }, + { + "epoch": 0.88, + "grad_norm": 27.094240192027666, + "learning_rate": 1.2342395911650403e-05, + "loss": 1.0938, + "step": 73515 + }, + { + "epoch": 0.88, + "grad_norm": 6.133970162944081, + "learning_rate": 1.2341827982091214e-05, + "loss": 1.1788, + "step": 73518 + }, + { + "epoch": 0.88, + "grad_norm": 4.4070009194755135, + "learning_rate": 1.2341260044540212e-05, + "loss": 1.5041, + "step": 73521 + }, + { + "epoch": 0.88, + "grad_norm": 23.725000160334552, + "learning_rate": 1.2340692098999327e-05, + "loss": 1.4972, + "step": 73524 + }, + { + "epoch": 0.88, + "grad_norm": 3.010879026516235, + "learning_rate": 1.2340124145470507e-05, + "loss": 1.4882, + "step": 73527 + }, + { + "epoch": 0.88, + "grad_norm": 3.7988631401674446, + "learning_rate": 1.2339556183955686e-05, + "loss": 1.2318, + "step": 73530 + }, + { + "epoch": 0.88, + "grad_norm": 35.836386818611786, + "learning_rate": 1.2338988214456801e-05, + "loss": 1.3998, + "step": 73533 + }, + { + "epoch": 0.88, + "grad_norm": 8.832377049246857, + "learning_rate": 1.2338420236975792e-05, + "loss": 1.5014, + "step": 73536 + }, + { + "epoch": 0.88, + "grad_norm": 10.075272649151424, + "learning_rate": 1.2337852251514599e-05, + "loss": 1.1918, + "step": 73539 + }, + { + "epoch": 0.88, + "grad_norm": 17.208295022004414, + "learning_rate": 1.2337284258075158e-05, + "loss": 1.4247, + "step": 73542 + }, + { + "epoch": 0.88, + "grad_norm": 2.758303271738096, + "learning_rate": 1.2336716256659405e-05, + "loss": 1.2314, + "step": 73545 + }, + { + "epoch": 0.88, + "grad_norm": 17.184258812007396, + "learning_rate": 1.2336148247269282e-05, + "loss": 1.0565, + "step": 73548 + }, + { + "epoch": 0.88, + "grad_norm": 5.109752173371861, + "learning_rate": 1.2335580229906727e-05, + "loss": 1.4883, + "step": 73551 + }, + { + "epoch": 0.88, + "grad_norm": 10.373659057964543, + "learning_rate": 1.233501220457368e-05, + "loss": 1.2143, + "step": 73554 + }, + { + "epoch": 0.88, + "grad_norm": 3.2099037908124144, + "learning_rate": 1.2334444171272073e-05, + "loss": 1.3285, + "step": 73557 + }, + { + "epoch": 0.88, + "grad_norm": 8.792884471858551, + "learning_rate": 1.2333876130003852e-05, + "loss": 1.2315, + "step": 73560 + }, + { + "epoch": 0.88, + "grad_norm": 178.0403487284838, + "learning_rate": 1.2333308080770954e-05, + "loss": 1.3429, + "step": 73563 + }, + { + "epoch": 0.88, + "grad_norm": 10.413533817113095, + "learning_rate": 1.2332740023575313e-05, + "loss": 1.2537, + "step": 73566 + }, + { + "epoch": 0.88, + "grad_norm": 17.08714212685175, + "learning_rate": 1.233217195841887e-05, + "loss": 1.4749, + "step": 73569 + }, + { + "epoch": 0.88, + "grad_norm": 9.443556427962468, + "learning_rate": 1.2331603885303567e-05, + "loss": 1.4663, + "step": 73572 + }, + { + "epoch": 0.88, + "grad_norm": 6.210489361566935, + "learning_rate": 1.2331035804231339e-05, + "loss": 1.4392, + "step": 73575 + }, + { + "epoch": 0.88, + "grad_norm": 12.197519794019229, + "learning_rate": 1.2330467715204124e-05, + "loss": 1.7895, + "step": 73578 + }, + { + "epoch": 0.88, + "grad_norm": 14.595081271463593, + "learning_rate": 1.2329899618223862e-05, + "loss": 1.3199, + "step": 73581 + }, + { + "epoch": 0.88, + "grad_norm": 36.33234441094494, + "learning_rate": 1.2329331513292495e-05, + "loss": 1.2161, + "step": 73584 + }, + { + "epoch": 0.88, + "grad_norm": 6.871738464055064, + "learning_rate": 1.2328763400411956e-05, + "loss": 1.1013, + "step": 73587 + }, + { + "epoch": 0.88, + "grad_norm": 41.52822739574011, + "learning_rate": 1.2328195279584185e-05, + "loss": 1.6948, + "step": 73590 + }, + { + "epoch": 0.88, + "grad_norm": 7.118389581924228, + "learning_rate": 1.2327627150811127e-05, + "loss": 1.2729, + "step": 73593 + }, + { + "epoch": 0.88, + "grad_norm": 8.916475455948104, + "learning_rate": 1.2327059014094712e-05, + "loss": 1.1888, + "step": 73596 + }, + { + "epoch": 0.89, + "grad_norm": 19.82965752381223, + "learning_rate": 1.232649086943688e-05, + "loss": 1.306, + "step": 73599 + }, + { + "epoch": 0.89, + "grad_norm": 25.73287545094314, + "learning_rate": 1.2325922716839577e-05, + "loss": 1.3631, + "step": 73602 + }, + { + "epoch": 0.89, + "grad_norm": 14.643746164996886, + "learning_rate": 1.2325354556304738e-05, + "loss": 1.1631, + "step": 73605 + }, + { + "epoch": 0.89, + "grad_norm": 39.57203833292844, + "learning_rate": 1.23247863878343e-05, + "loss": 1.2413, + "step": 73608 + }, + { + "epoch": 0.89, + "grad_norm": 16.64746500734821, + "learning_rate": 1.2324218211430204e-05, + "loss": 1.6229, + "step": 73611 + }, + { + "epoch": 0.89, + "grad_norm": 8.361071657092275, + "learning_rate": 1.2323650027094387e-05, + "loss": 1.1939, + "step": 73614 + }, + { + "epoch": 0.89, + "grad_norm": 8.801400577981978, + "learning_rate": 1.2323081834828792e-05, + "loss": 1.386, + "step": 73617 + }, + { + "epoch": 0.89, + "grad_norm": 10.82593673588669, + "learning_rate": 1.2322513634635353e-05, + "loss": 1.0871, + "step": 73620 + }, + { + "epoch": 0.89, + "grad_norm": 10.061359897495427, + "learning_rate": 1.2321945426516013e-05, + "loss": 1.5173, + "step": 73623 + }, + { + "epoch": 0.89, + "grad_norm": 5.224901212085083, + "learning_rate": 1.232137721047271e-05, + "loss": 1.1537, + "step": 73626 + }, + { + "epoch": 0.89, + "grad_norm": 41.015673707419545, + "learning_rate": 1.2320808986507382e-05, + "loss": 1.3274, + "step": 73629 + }, + { + "epoch": 0.89, + "grad_norm": 21.98149672791187, + "learning_rate": 1.2320240754621967e-05, + "loss": 1.2456, + "step": 73632 + }, + { + "epoch": 0.89, + "grad_norm": 10.744094288707807, + "learning_rate": 1.2319672514818407e-05, + "loss": 1.408, + "step": 73635 + }, + { + "epoch": 0.89, + "grad_norm": 6.7767385499531905, + "learning_rate": 1.2319104267098643e-05, + "loss": 1.4613, + "step": 73638 + }, + { + "epoch": 0.89, + "grad_norm": 6.631143481119982, + "learning_rate": 1.231853601146461e-05, + "loss": 1.1863, + "step": 73641 + }, + { + "epoch": 0.89, + "grad_norm": 2.1858900544499518, + "learning_rate": 1.2317967747918247e-05, + "loss": 1.041, + "step": 73644 + }, + { + "epoch": 0.89, + "grad_norm": 7.293364265350622, + "learning_rate": 1.2317399476461499e-05, + "loss": 1.0105, + "step": 73647 + }, + { + "epoch": 0.89, + "grad_norm": 14.275253986736704, + "learning_rate": 1.2316831197096297e-05, + "loss": 1.0586, + "step": 73650 + }, + { + "epoch": 0.89, + "grad_norm": 12.779136035826046, + "learning_rate": 1.2316262909824584e-05, + "loss": 1.0617, + "step": 73653 + }, + { + "epoch": 0.89, + "grad_norm": 14.641785692303545, + "learning_rate": 1.2315694614648304e-05, + "loss": 1.2535, + "step": 73656 + }, + { + "epoch": 0.89, + "grad_norm": 20.110383980862068, + "learning_rate": 1.231512631156939e-05, + "loss": 1.6633, + "step": 73659 + }, + { + "epoch": 0.89, + "grad_norm": 9.607116503224054, + "learning_rate": 1.2314558000589785e-05, + "loss": 1.1484, + "step": 73662 + }, + { + "epoch": 0.89, + "grad_norm": 60.60662755731379, + "learning_rate": 1.2313989681711424e-05, + "loss": 1.2057, + "step": 73665 + }, + { + "epoch": 0.89, + "grad_norm": 5.803761799114252, + "learning_rate": 1.2313421354936257e-05, + "loss": 1.5609, + "step": 73668 + }, + { + "epoch": 0.89, + "grad_norm": 7.299075035356698, + "learning_rate": 1.2312853020266209e-05, + "loss": 1.6144, + "step": 73671 + }, + { + "epoch": 0.89, + "grad_norm": 4.216474863709585, + "learning_rate": 1.231228467770323e-05, + "loss": 1.3081, + "step": 73674 + }, + { + "epoch": 0.89, + "grad_norm": 3.4448930135041382, + "learning_rate": 1.2311716327249255e-05, + "loss": 1.0579, + "step": 73677 + }, + { + "epoch": 0.89, + "grad_norm": 14.714520628322244, + "learning_rate": 1.2311147968906225e-05, + "loss": 1.4203, + "step": 73680 + }, + { + "epoch": 0.89, + "grad_norm": 6.621344053501755, + "learning_rate": 1.2310579602676081e-05, + "loss": 1.5295, + "step": 73683 + }, + { + "epoch": 0.89, + "grad_norm": 18.609955184240537, + "learning_rate": 1.2310011228560759e-05, + "loss": 1.4993, + "step": 73686 + }, + { + "epoch": 0.89, + "grad_norm": 13.815614715788659, + "learning_rate": 1.2309442846562203e-05, + "loss": 1.2071, + "step": 73689 + }, + { + "epoch": 0.89, + "grad_norm": 21.732032382378733, + "learning_rate": 1.2308874456682349e-05, + "loss": 1.4756, + "step": 73692 + }, + { + "epoch": 0.89, + "grad_norm": 2.8271251177816854, + "learning_rate": 1.2308306058923139e-05, + "loss": 1.5606, + "step": 73695 + }, + { + "epoch": 0.89, + "grad_norm": 18.55923287777274, + "learning_rate": 1.2307737653286509e-05, + "loss": 1.394, + "step": 73698 + }, + { + "epoch": 0.89, + "grad_norm": 9.637350564676357, + "learning_rate": 1.2307169239774405e-05, + "loss": 1.3733, + "step": 73701 + }, + { + "epoch": 0.89, + "grad_norm": 22.609611044318925, + "learning_rate": 1.2306600818388761e-05, + "loss": 1.1117, + "step": 73704 + }, + { + "epoch": 0.89, + "grad_norm": 7.764225691141034, + "learning_rate": 1.230603238913152e-05, + "loss": 1.2656, + "step": 73707 + }, + { + "epoch": 0.89, + "grad_norm": 16.66659831676579, + "learning_rate": 1.2305463952004624e-05, + "loss": 1.3488, + "step": 73710 + }, + { + "epoch": 0.89, + "grad_norm": 20.7502740516024, + "learning_rate": 1.2304895507010006e-05, + "loss": 1.2196, + "step": 73713 + }, + { + "epoch": 0.89, + "grad_norm": 79.941143565013, + "learning_rate": 1.2304327054149612e-05, + "loss": 1.28, + "step": 73716 + }, + { + "epoch": 0.89, + "grad_norm": 33.07297064431213, + "learning_rate": 1.2303758593425376e-05, + "loss": 1.6899, + "step": 73719 + }, + { + "epoch": 0.89, + "grad_norm": 21.319966012363025, + "learning_rate": 1.2303190124839248e-05, + "loss": 1.1487, + "step": 73722 + }, + { + "epoch": 0.89, + "grad_norm": 5.471386089479951, + "learning_rate": 1.2302621648393157e-05, + "loss": 1.5176, + "step": 73725 + }, + { + "epoch": 0.89, + "grad_norm": 24.38437977754084, + "learning_rate": 1.2302053164089052e-05, + "loss": 1.5787, + "step": 73728 + }, + { + "epoch": 0.89, + "grad_norm": 16.77675512499763, + "learning_rate": 1.2301484671928866e-05, + "loss": 1.5268, + "step": 73731 + }, + { + "epoch": 0.89, + "grad_norm": 21.370352499963435, + "learning_rate": 1.2300916171914539e-05, + "loss": 1.5975, + "step": 73734 + }, + { + "epoch": 0.89, + "grad_norm": 7.544694042777553, + "learning_rate": 1.2300347664048018e-05, + "loss": 1.227, + "step": 73737 + }, + { + "epoch": 0.89, + "grad_norm": 4.37318748706403, + "learning_rate": 1.2299779148331235e-05, + "loss": 1.3537, + "step": 73740 + }, + { + "epoch": 0.89, + "grad_norm": 6.692037256565452, + "learning_rate": 1.229921062476614e-05, + "loss": 1.3485, + "step": 73743 + }, + { + "epoch": 0.89, + "grad_norm": 20.044898861109004, + "learning_rate": 1.2298642093354662e-05, + "loss": 1.0338, + "step": 73746 + }, + { + "epoch": 0.89, + "grad_norm": 17.06202968356748, + "learning_rate": 1.229807355409875e-05, + "loss": 1.3911, + "step": 73749 + }, + { + "epoch": 0.89, + "grad_norm": 19.8606812714388, + "learning_rate": 1.2297505007000342e-05, + "loss": 1.2558, + "step": 73752 + }, + { + "epoch": 0.89, + "grad_norm": 10.817346847760513, + "learning_rate": 1.2296936452061371e-05, + "loss": 1.7689, + "step": 73755 + }, + { + "epoch": 0.89, + "grad_norm": 12.333908355357563, + "learning_rate": 1.2296367889283787e-05, + "loss": 1.1157, + "step": 73758 + }, + { + "epoch": 0.89, + "grad_norm": 14.213602227589819, + "learning_rate": 1.2295799318669523e-05, + "loss": 1.5343, + "step": 73761 + }, + { + "epoch": 0.89, + "grad_norm": 31.673512056594845, + "learning_rate": 1.229523074022053e-05, + "loss": 1.3151, + "step": 73764 + }, + { + "epoch": 0.89, + "grad_norm": 7.233814903854343, + "learning_rate": 1.2294662153938737e-05, + "loss": 1.3966, + "step": 73767 + }, + { + "epoch": 0.89, + "grad_norm": 3.352018818519332, + "learning_rate": 1.2294093559826088e-05, + "loss": 1.1643, + "step": 73770 + }, + { + "epoch": 0.89, + "grad_norm": 21.23336889864799, + "learning_rate": 1.2293524957884526e-05, + "loss": 1.5031, + "step": 73773 + }, + { + "epoch": 0.89, + "grad_norm": 7.71019763557629, + "learning_rate": 1.229295634811599e-05, + "loss": 1.1857, + "step": 73776 + }, + { + "epoch": 0.89, + "grad_norm": 14.466120762457301, + "learning_rate": 1.2292387730522416e-05, + "loss": 1.3005, + "step": 73779 + }, + { + "epoch": 0.89, + "grad_norm": 13.013664784169176, + "learning_rate": 1.229181910510575e-05, + "loss": 1.657, + "step": 73782 + }, + { + "epoch": 0.89, + "grad_norm": 19.146746874914303, + "learning_rate": 1.2291250471867933e-05, + "loss": 1.4715, + "step": 73785 + }, + { + "epoch": 0.89, + "grad_norm": 21.55315469554051, + "learning_rate": 1.22906818308109e-05, + "loss": 1.6166, + "step": 73788 + }, + { + "epoch": 0.89, + "grad_norm": 24.67249288039403, + "learning_rate": 1.2290113181936599e-05, + "loss": 1.478, + "step": 73791 + }, + { + "epoch": 0.89, + "grad_norm": 11.667049820883612, + "learning_rate": 1.2289544525246962e-05, + "loss": 1.657, + "step": 73794 + }, + { + "epoch": 0.89, + "grad_norm": 22.651137060108294, + "learning_rate": 1.228897586074394e-05, + "loss": 1.1204, + "step": 73797 + }, + { + "epoch": 0.89, + "grad_norm": 22.85111057826053, + "learning_rate": 1.228840718842946e-05, + "loss": 1.1673, + "step": 73800 + }, + { + "epoch": 0.89, + "grad_norm": 4.699412566718804, + "learning_rate": 1.2287838508305478e-05, + "loss": 1.4678, + "step": 73803 + }, + { + "epoch": 0.89, + "grad_norm": 4.88454698021527, + "learning_rate": 1.2287269820373924e-05, + "loss": 1.4097, + "step": 73806 + }, + { + "epoch": 0.89, + "grad_norm": 61.97319630866149, + "learning_rate": 1.2286701124636744e-05, + "loss": 1.2602, + "step": 73809 + }, + { + "epoch": 0.89, + "grad_norm": 11.282149895234683, + "learning_rate": 1.2286132421095874e-05, + "loss": 1.4984, + "step": 73812 + }, + { + "epoch": 0.89, + "grad_norm": 4.848157089059823, + "learning_rate": 1.2285563709753257e-05, + "loss": 1.5288, + "step": 73815 + }, + { + "epoch": 0.89, + "grad_norm": 12.726960103567288, + "learning_rate": 1.228499499061084e-05, + "loss": 1.2409, + "step": 73818 + }, + { + "epoch": 0.89, + "grad_norm": 45.3402055777046, + "learning_rate": 1.228442626367055e-05, + "loss": 1.2635, + "step": 73821 + }, + { + "epoch": 0.89, + "grad_norm": 12.156083610661845, + "learning_rate": 1.2283857528934342e-05, + "loss": 1.1904, + "step": 73824 + }, + { + "epoch": 0.89, + "grad_norm": 10.087758068978985, + "learning_rate": 1.2283288786404149e-05, + "loss": 1.812, + "step": 73827 + }, + { + "epoch": 0.89, + "grad_norm": 4.751746766129143, + "learning_rate": 1.2282720036081914e-05, + "loss": 1.3757, + "step": 73830 + }, + { + "epoch": 0.89, + "grad_norm": 9.085290507653847, + "learning_rate": 1.2282151277969576e-05, + "loss": 1.4849, + "step": 73833 + }, + { + "epoch": 0.89, + "grad_norm": 5.576453619662017, + "learning_rate": 1.228158251206908e-05, + "loss": 1.4719, + "step": 73836 + }, + { + "epoch": 0.89, + "grad_norm": 25.720578708610844, + "learning_rate": 1.2281013738382366e-05, + "loss": 1.2828, + "step": 73839 + }, + { + "epoch": 0.89, + "grad_norm": 10.874658277499691, + "learning_rate": 1.2280444956911368e-05, + "loss": 1.2423, + "step": 73842 + }, + { + "epoch": 0.89, + "grad_norm": 3.3791273020780914, + "learning_rate": 1.2279876167658037e-05, + "loss": 1.3902, + "step": 73845 + }, + { + "epoch": 0.89, + "grad_norm": 2.0799479009890915, + "learning_rate": 1.2279307370624311e-05, + "loss": 1.374, + "step": 73848 + }, + { + "epoch": 0.89, + "grad_norm": 8.504178601799806, + "learning_rate": 1.2278738565812129e-05, + "loss": 1.5499, + "step": 73851 + }, + { + "epoch": 0.89, + "grad_norm": 19.459606077167095, + "learning_rate": 1.227816975322343e-05, + "loss": 1.3901, + "step": 73854 + }, + { + "epoch": 0.89, + "grad_norm": 9.702900125362799, + "learning_rate": 1.227760093286016e-05, + "loss": 1.2519, + "step": 73857 + }, + { + "epoch": 0.89, + "grad_norm": 20.714196651102426, + "learning_rate": 1.2277032104724262e-05, + "loss": 1.534, + "step": 73860 + }, + { + "epoch": 0.89, + "grad_norm": 10.301714839798457, + "learning_rate": 1.2276463268817672e-05, + "loss": 1.2479, + "step": 73863 + }, + { + "epoch": 0.89, + "grad_norm": 4.82824532707676, + "learning_rate": 1.2275894425142327e-05, + "loss": 1.1159, + "step": 73866 + }, + { + "epoch": 0.89, + "grad_norm": 13.486915071807312, + "learning_rate": 1.2275325573700182e-05, + "loss": 0.9252, + "step": 73869 + }, + { + "epoch": 0.89, + "grad_norm": 25.842508542869147, + "learning_rate": 1.2274756714493166e-05, + "loss": 1.2821, + "step": 73872 + }, + { + "epoch": 0.89, + "grad_norm": 4.16960326174979, + "learning_rate": 1.2274187847523225e-05, + "loss": 1.3855, + "step": 73875 + }, + { + "epoch": 0.89, + "grad_norm": 14.052461517742438, + "learning_rate": 1.22736189727923e-05, + "loss": 1.3039, + "step": 73878 + }, + { + "epoch": 0.89, + "grad_norm": 61.78978936466808, + "learning_rate": 1.2273050090302334e-05, + "loss": 1.2612, + "step": 73881 + }, + { + "epoch": 0.89, + "grad_norm": 19.068741204187276, + "learning_rate": 1.2272481200055267e-05, + "loss": 1.6945, + "step": 73884 + }, + { + "epoch": 0.89, + "grad_norm": 9.446820598038496, + "learning_rate": 1.2271912302053038e-05, + "loss": 1.6399, + "step": 73887 + }, + { + "epoch": 0.89, + "grad_norm": 5.007720659986226, + "learning_rate": 1.227134339629759e-05, + "loss": 1.4311, + "step": 73890 + }, + { + "epoch": 0.89, + "grad_norm": 22.059176444638524, + "learning_rate": 1.2270774482790869e-05, + "loss": 1.3317, + "step": 73893 + }, + { + "epoch": 0.89, + "grad_norm": 7.827919029519328, + "learning_rate": 1.2270205561534809e-05, + "loss": 1.2816, + "step": 73896 + }, + { + "epoch": 0.89, + "grad_norm": 10.722788558886817, + "learning_rate": 1.2269636632531356e-05, + "loss": 1.3087, + "step": 73899 + }, + { + "epoch": 0.89, + "grad_norm": 11.223696719907585, + "learning_rate": 1.2269067695782452e-05, + "loss": 1.3518, + "step": 73902 + }, + { + "epoch": 0.89, + "grad_norm": 13.453090039985373, + "learning_rate": 1.2268498751290037e-05, + "loss": 1.6159, + "step": 73905 + }, + { + "epoch": 0.89, + "grad_norm": 19.172903228066073, + "learning_rate": 1.2267929799056052e-05, + "loss": 1.4872, + "step": 73908 + }, + { + "epoch": 0.89, + "grad_norm": 10.688090486713403, + "learning_rate": 1.2267360839082441e-05, + "loss": 1.5202, + "step": 73911 + }, + { + "epoch": 0.89, + "grad_norm": 22.39855939284673, + "learning_rate": 1.2266791871371143e-05, + "loss": 1.3653, + "step": 73914 + }, + { + "epoch": 0.89, + "grad_norm": 4.72378173964213, + "learning_rate": 1.22662228959241e-05, + "loss": 1.2647, + "step": 73917 + }, + { + "epoch": 0.89, + "grad_norm": 4.557059648237682, + "learning_rate": 1.2265653912743254e-05, + "loss": 1.4955, + "step": 73920 + }, + { + "epoch": 0.89, + "grad_norm": 16.82549180512508, + "learning_rate": 1.2265084921830551e-05, + "loss": 1.3785, + "step": 73923 + }, + { + "epoch": 0.89, + "grad_norm": 7.052178974621409, + "learning_rate": 1.2264515923187928e-05, + "loss": 1.163, + "step": 73926 + }, + { + "epoch": 0.89, + "grad_norm": 11.969478918774238, + "learning_rate": 1.2263946916817324e-05, + "loss": 1.5825, + "step": 73929 + }, + { + "epoch": 0.89, + "grad_norm": 20.30054075263146, + "learning_rate": 1.2263377902720689e-05, + "loss": 1.3433, + "step": 73932 + }, + { + "epoch": 0.89, + "grad_norm": 9.670255249598236, + "learning_rate": 1.226280888089996e-05, + "loss": 1.0176, + "step": 73935 + }, + { + "epoch": 0.89, + "grad_norm": 9.939927506704123, + "learning_rate": 1.2262239851357077e-05, + "loss": 1.3089, + "step": 73938 + }, + { + "epoch": 0.89, + "grad_norm": 5.817750202047302, + "learning_rate": 1.2261670814093982e-05, + "loss": 1.5028, + "step": 73941 + }, + { + "epoch": 0.89, + "grad_norm": 11.124981324995355, + "learning_rate": 1.2261101769112624e-05, + "loss": 1.5431, + "step": 73944 + }, + { + "epoch": 0.89, + "grad_norm": 10.874438299705188, + "learning_rate": 1.2260532716414939e-05, + "loss": 1.1416, + "step": 73947 + }, + { + "epoch": 0.89, + "grad_norm": 3.3284285625474728, + "learning_rate": 1.2259963656002869e-05, + "loss": 1.042, + "step": 73950 + }, + { + "epoch": 0.89, + "grad_norm": 10.343735779305398, + "learning_rate": 1.2259394587878357e-05, + "loss": 1.4578, + "step": 73953 + }, + { + "epoch": 0.89, + "grad_norm": 12.986156947088219, + "learning_rate": 1.2258825512043347e-05, + "loss": 1.738, + "step": 73956 + }, + { + "epoch": 0.89, + "grad_norm": 4.599077105491629, + "learning_rate": 1.2258256428499779e-05, + "loss": 1.8243, + "step": 73959 + }, + { + "epoch": 0.89, + "grad_norm": 8.285526144979636, + "learning_rate": 1.2257687337249592e-05, + "loss": 1.5908, + "step": 73962 + }, + { + "epoch": 0.89, + "grad_norm": 25.782842761715038, + "learning_rate": 1.2257118238294735e-05, + "loss": 1.7788, + "step": 73965 + }, + { + "epoch": 0.89, + "grad_norm": 4.918431624308067, + "learning_rate": 1.2256549131637146e-05, + "loss": 1.0588, + "step": 73968 + }, + { + "epoch": 0.89, + "grad_norm": 6.785899503724586, + "learning_rate": 1.2255980017278765e-05, + "loss": 1.536, + "step": 73971 + }, + { + "epoch": 0.89, + "grad_norm": 21.6682168681313, + "learning_rate": 1.2255410895221538e-05, + "loss": 1.4267, + "step": 73974 + }, + { + "epoch": 0.89, + "grad_norm": 4.4611984693807045, + "learning_rate": 1.2254841765467407e-05, + "loss": 1.1471, + "step": 73977 + }, + { + "epoch": 0.89, + "grad_norm": 16.0969222254875, + "learning_rate": 1.2254272628018314e-05, + "loss": 1.0975, + "step": 73980 + }, + { + "epoch": 0.89, + "grad_norm": 4.064949999015733, + "learning_rate": 1.2253703482876198e-05, + "loss": 1.1282, + "step": 73983 + }, + { + "epoch": 0.89, + "grad_norm": 9.830385536880529, + "learning_rate": 1.2253134330043006e-05, + "loss": 1.1957, + "step": 73986 + }, + { + "epoch": 0.89, + "grad_norm": 9.584615641467328, + "learning_rate": 1.2252565169520676e-05, + "loss": 1.2562, + "step": 73989 + }, + { + "epoch": 0.89, + "grad_norm": 4.929154197082247, + "learning_rate": 1.2251996001311154e-05, + "loss": 1.4955, + "step": 73992 + }, + { + "epoch": 0.89, + "grad_norm": 7.74078249893654, + "learning_rate": 1.2251426825416379e-05, + "loss": 1.3125, + "step": 73995 + }, + { + "epoch": 0.89, + "grad_norm": 5.752177382264097, + "learning_rate": 1.2250857641838298e-05, + "loss": 1.162, + "step": 73998 + }, + { + "epoch": 0.89, + "grad_norm": 19.790237858048062, + "learning_rate": 1.2250288450578848e-05, + "loss": 1.1938, + "step": 74001 + }, + { + "epoch": 0.89, + "grad_norm": 11.125137864972185, + "learning_rate": 1.2249719251639976e-05, + "loss": 1.4589, + "step": 74004 + }, + { + "epoch": 0.89, + "grad_norm": 76.98769311829886, + "learning_rate": 1.224915004502362e-05, + "loss": 1.3774, + "step": 74007 + }, + { + "epoch": 0.89, + "grad_norm": 38.664804472544034, + "learning_rate": 1.2248580830731729e-05, + "loss": 1.5059, + "step": 74010 + }, + { + "epoch": 0.89, + "grad_norm": 22.593714609343188, + "learning_rate": 1.2248011608766239e-05, + "loss": 1.2161, + "step": 74013 + }, + { + "epoch": 0.89, + "grad_norm": 10.655788218466698, + "learning_rate": 1.2247442379129095e-05, + "loss": 1.1034, + "step": 74016 + }, + { + "epoch": 0.89, + "grad_norm": 4.76921691124, + "learning_rate": 1.2246873141822242e-05, + "loss": 1.5208, + "step": 74019 + }, + { + "epoch": 0.89, + "grad_norm": 21.37148527703841, + "learning_rate": 1.2246303896847617e-05, + "loss": 1.3226, + "step": 74022 + }, + { + "epoch": 0.89, + "grad_norm": 4.948385057423461, + "learning_rate": 1.224573464420717e-05, + "loss": 1.2704, + "step": 74025 + }, + { + "epoch": 0.89, + "grad_norm": 28.5551618669481, + "learning_rate": 1.2245165383902835e-05, + "loss": 1.492, + "step": 74028 + }, + { + "epoch": 0.89, + "grad_norm": 5.171109717763916, + "learning_rate": 1.2244596115936563e-05, + "loss": 1.6229, + "step": 74031 + }, + { + "epoch": 0.89, + "grad_norm": 12.001035177111353, + "learning_rate": 1.224402684031029e-05, + "loss": 1.3964, + "step": 74034 + }, + { + "epoch": 0.89, + "grad_norm": 11.14201950223128, + "learning_rate": 1.2243457557025962e-05, + "loss": 1.104, + "step": 74037 + }, + { + "epoch": 0.89, + "grad_norm": 14.057824471965285, + "learning_rate": 1.2242888266085522e-05, + "loss": 1.7389, + "step": 74040 + }, + { + "epoch": 0.89, + "grad_norm": 54.27829714943814, + "learning_rate": 1.2242318967490914e-05, + "loss": 1.3141, + "step": 74043 + }, + { + "epoch": 0.89, + "grad_norm": 6.770642674956182, + "learning_rate": 1.2241749661244077e-05, + "loss": 1.3652, + "step": 74046 + }, + { + "epoch": 0.89, + "grad_norm": 13.453839035558486, + "learning_rate": 1.2241180347346955e-05, + "loss": 1.7582, + "step": 74049 + }, + { + "epoch": 0.89, + "grad_norm": 8.867564579993859, + "learning_rate": 1.2240611025801494e-05, + "loss": 1.151, + "step": 74052 + }, + { + "epoch": 0.89, + "grad_norm": 12.152217535279824, + "learning_rate": 1.2240041696609635e-05, + "loss": 1.4733, + "step": 74055 + }, + { + "epoch": 0.89, + "grad_norm": 34.287439777762536, + "learning_rate": 1.2239472359773316e-05, + "loss": 1.2656, + "step": 74058 + }, + { + "epoch": 0.89, + "grad_norm": 34.826154224288494, + "learning_rate": 1.223890301529449e-05, + "loss": 1.208, + "step": 74061 + }, + { + "epoch": 0.89, + "grad_norm": 15.356987990572799, + "learning_rate": 1.2238333663175093e-05, + "loss": 1.223, + "step": 74064 + }, + { + "epoch": 0.89, + "grad_norm": 2.299141472396778, + "learning_rate": 1.223776430341707e-05, + "loss": 1.4024, + "step": 74067 + }, + { + "epoch": 0.89, + "grad_norm": 7.264790550090241, + "learning_rate": 1.2237194936022361e-05, + "loss": 1.2361, + "step": 74070 + }, + { + "epoch": 0.89, + "grad_norm": 26.592151065781618, + "learning_rate": 1.2236625560992912e-05, + "loss": 1.3979, + "step": 74073 + }, + { + "epoch": 0.89, + "grad_norm": 9.195638429212403, + "learning_rate": 1.2236056178330667e-05, + "loss": 1.4563, + "step": 74076 + }, + { + "epoch": 0.89, + "grad_norm": 9.373715758256749, + "learning_rate": 1.2235486788037567e-05, + "loss": 1.2896, + "step": 74079 + }, + { + "epoch": 0.89, + "grad_norm": 6.989097508954979, + "learning_rate": 1.2234917390115554e-05, + "loss": 1.1964, + "step": 74082 + }, + { + "epoch": 0.89, + "grad_norm": 13.808225982444748, + "learning_rate": 1.2234347984566577e-05, + "loss": 1.1689, + "step": 74085 + }, + { + "epoch": 0.89, + "grad_norm": 12.691006216493376, + "learning_rate": 1.2233778571392574e-05, + "loss": 1.0978, + "step": 74088 + }, + { + "epoch": 0.89, + "grad_norm": 11.053348657965289, + "learning_rate": 1.2233209150595487e-05, + "loss": 1.327, + "step": 74091 + }, + { + "epoch": 0.89, + "grad_norm": 26.891586343481304, + "learning_rate": 1.2232639722177262e-05, + "loss": 1.3879, + "step": 74094 + }, + { + "epoch": 0.89, + "grad_norm": 8.68913844425471, + "learning_rate": 1.2232070286139843e-05, + "loss": 1.2797, + "step": 74097 + }, + { + "epoch": 0.89, + "grad_norm": 7.031735136899811, + "learning_rate": 1.2231500842485173e-05, + "loss": 1.3691, + "step": 74100 + }, + { + "epoch": 0.89, + "grad_norm": 5.338769980274693, + "learning_rate": 1.2230931391215192e-05, + "loss": 1.1849, + "step": 74103 + }, + { + "epoch": 0.89, + "grad_norm": 12.32182194590931, + "learning_rate": 1.2230361932331849e-05, + "loss": 1.4413, + "step": 74106 + }, + { + "epoch": 0.89, + "grad_norm": 5.71797216225574, + "learning_rate": 1.222979246583708e-05, + "loss": 1.4282, + "step": 74109 + }, + { + "epoch": 0.89, + "grad_norm": 14.239765467337012, + "learning_rate": 1.2229222991732834e-05, + "loss": 1.443, + "step": 74112 + }, + { + "epoch": 0.89, + "grad_norm": 6.975009214431334, + "learning_rate": 1.2228653510021053e-05, + "loss": 1.4369, + "step": 74115 + }, + { + "epoch": 0.89, + "grad_norm": 16.927597925882004, + "learning_rate": 1.2228084020703683e-05, + "loss": 1.2554, + "step": 74118 + }, + { + "epoch": 0.89, + "grad_norm": 9.29422660303194, + "learning_rate": 1.2227514523782661e-05, + "loss": 1.5735, + "step": 74121 + }, + { + "epoch": 0.89, + "grad_norm": 16.34300084530313, + "learning_rate": 1.2226945019259935e-05, + "loss": 1.7399, + "step": 74124 + }, + { + "epoch": 0.89, + "grad_norm": 5.677778405817671, + "learning_rate": 1.2226375507137452e-05, + "loss": 1.1308, + "step": 74127 + }, + { + "epoch": 0.89, + "grad_norm": 6.39152918468884, + "learning_rate": 1.2225805987417148e-05, + "loss": 1.1369, + "step": 74130 + }, + { + "epoch": 0.89, + "grad_norm": 2.8419820261734996, + "learning_rate": 1.2225236460100968e-05, + "loss": 1.6295, + "step": 74133 + }, + { + "epoch": 0.89, + "grad_norm": 12.20691108173806, + "learning_rate": 1.2224666925190858e-05, + "loss": 1.4318, + "step": 74136 + }, + { + "epoch": 0.89, + "grad_norm": 8.867664355726697, + "learning_rate": 1.2224097382688764e-05, + "loss": 1.3668, + "step": 74139 + }, + { + "epoch": 0.89, + "grad_norm": 1.878023524900519, + "learning_rate": 1.2223527832596624e-05, + "loss": 1.6294, + "step": 74142 + }, + { + "epoch": 0.89, + "grad_norm": 6.662942477260707, + "learning_rate": 1.2222958274916386e-05, + "loss": 1.5145, + "step": 74145 + }, + { + "epoch": 0.89, + "grad_norm": 19.442080253068188, + "learning_rate": 1.2222388709649992e-05, + "loss": 1.5573, + "step": 74148 + }, + { + "epoch": 0.89, + "grad_norm": 26.84474424083106, + "learning_rate": 1.2221819136799386e-05, + "loss": 1.4141, + "step": 74151 + }, + { + "epoch": 0.89, + "grad_norm": 190.41930737244667, + "learning_rate": 1.2221249556366512e-05, + "loss": 1.647, + "step": 74154 + }, + { + "epoch": 0.89, + "grad_norm": 9.657667872492585, + "learning_rate": 1.2220679968353311e-05, + "loss": 1.728, + "step": 74157 + }, + { + "epoch": 0.89, + "grad_norm": 5.391034479096315, + "learning_rate": 1.2220110372761733e-05, + "loss": 1.288, + "step": 74160 + }, + { + "epoch": 0.89, + "grad_norm": 10.697238519547277, + "learning_rate": 1.2219540769593714e-05, + "loss": 1.4474, + "step": 74163 + }, + { + "epoch": 0.89, + "grad_norm": 5.795762148374539, + "learning_rate": 1.22189711588512e-05, + "loss": 1.0936, + "step": 74166 + }, + { + "epoch": 0.89, + "grad_norm": 16.20170763233237, + "learning_rate": 1.2218401540536145e-05, + "loss": 1.2848, + "step": 74169 + }, + { + "epoch": 0.89, + "grad_norm": 3.317419370269133, + "learning_rate": 1.2217831914650478e-05, + "loss": 1.4912, + "step": 74172 + }, + { + "epoch": 0.89, + "grad_norm": 22.35459403967085, + "learning_rate": 1.2217262281196151e-05, + "loss": 1.2119, + "step": 74175 + }, + { + "epoch": 0.89, + "grad_norm": 28.593294187805014, + "learning_rate": 1.2216692640175105e-05, + "loss": 1.1777, + "step": 74178 + }, + { + "epoch": 0.89, + "grad_norm": 29.250464144539468, + "learning_rate": 1.2216122991589293e-05, + "loss": 1.6042, + "step": 74181 + }, + { + "epoch": 0.89, + "grad_norm": 9.6214182071701, + "learning_rate": 1.2215553335440641e-05, + "loss": 1.6329, + "step": 74184 + }, + { + "epoch": 0.89, + "grad_norm": 17.202462545408004, + "learning_rate": 1.2214983671731111e-05, + "loss": 1.4132, + "step": 74187 + }, + { + "epoch": 0.89, + "grad_norm": 7.95552773453108, + "learning_rate": 1.2214414000462635e-05, + "loss": 1.3663, + "step": 74190 + }, + { + "epoch": 0.89, + "grad_norm": 12.378614479207862, + "learning_rate": 1.2213844321637162e-05, + "loss": 1.2187, + "step": 74193 + }, + { + "epoch": 0.89, + "grad_norm": 19.702054262699964, + "learning_rate": 1.2213274635256637e-05, + "loss": 1.15, + "step": 74196 + }, + { + "epoch": 0.89, + "grad_norm": 2.228199001962741, + "learning_rate": 1.2212704941323003e-05, + "loss": 1.2911, + "step": 74199 + }, + { + "epoch": 0.89, + "grad_norm": 25.011488727438927, + "learning_rate": 1.2212135239838208e-05, + "loss": 1.5492, + "step": 74202 + }, + { + "epoch": 0.89, + "grad_norm": 5.741652051762253, + "learning_rate": 1.2211565530804185e-05, + "loss": 1.2717, + "step": 74205 + }, + { + "epoch": 0.89, + "grad_norm": 14.874288248212403, + "learning_rate": 1.221099581422289e-05, + "loss": 1.227, + "step": 74208 + }, + { + "epoch": 0.89, + "grad_norm": 8.473776724690786, + "learning_rate": 1.221042609009626e-05, + "loss": 1.2284, + "step": 74211 + }, + { + "epoch": 0.89, + "grad_norm": 27.968894725136643, + "learning_rate": 1.2209856358426247e-05, + "loss": 1.5253, + "step": 74214 + }, + { + "epoch": 0.89, + "grad_norm": 6.440297345846609, + "learning_rate": 1.2209286619214785e-05, + "loss": 1.3879, + "step": 74217 + }, + { + "epoch": 0.89, + "grad_norm": 8.689057570559191, + "learning_rate": 1.2208716872463823e-05, + "loss": 1.15, + "step": 74220 + }, + { + "epoch": 0.89, + "grad_norm": 55.241583314950525, + "learning_rate": 1.220814711817531e-05, + "loss": 1.6379, + "step": 74223 + }, + { + "epoch": 0.89, + "grad_norm": 16.25673294732259, + "learning_rate": 1.2207577356351183e-05, + "loss": 1.1826, + "step": 74226 + }, + { + "epoch": 0.89, + "grad_norm": 19.85871431294942, + "learning_rate": 1.2207007586993393e-05, + "loss": 1.2895, + "step": 74229 + }, + { + "epoch": 0.89, + "grad_norm": 6.89967258332652, + "learning_rate": 1.2206437810103878e-05, + "loss": 1.4196, + "step": 74232 + }, + { + "epoch": 0.89, + "grad_norm": 13.31470492056446, + "learning_rate": 1.2205868025684589e-05, + "loss": 1.2644, + "step": 74235 + }, + { + "epoch": 0.89, + "grad_norm": 6.874248111090277, + "learning_rate": 1.2205298233737462e-05, + "loss": 1.0027, + "step": 74238 + }, + { + "epoch": 0.89, + "grad_norm": 9.114253940578344, + "learning_rate": 1.220472843426445e-05, + "loss": 1.3973, + "step": 74241 + }, + { + "epoch": 0.89, + "grad_norm": 28.838333881812513, + "learning_rate": 1.2204158627267492e-05, + "loss": 1.3766, + "step": 74244 + }, + { + "epoch": 0.89, + "grad_norm": 35.31546421844518, + "learning_rate": 1.2203588812748535e-05, + "loss": 1.468, + "step": 74247 + }, + { + "epoch": 0.89, + "grad_norm": 6.596216648839841, + "learning_rate": 1.2203018990709526e-05, + "loss": 1.8764, + "step": 74250 + }, + { + "epoch": 0.89, + "grad_norm": 3.2981873968064237, + "learning_rate": 1.2202449161152402e-05, + "loss": 1.2727, + "step": 74253 + }, + { + "epoch": 0.89, + "grad_norm": 14.591721050635442, + "learning_rate": 1.2201879324079117e-05, + "loss": 1.1534, + "step": 74256 + }, + { + "epoch": 0.89, + "grad_norm": 31.468358583388778, + "learning_rate": 1.2201309479491608e-05, + "loss": 1.4856, + "step": 74259 + }, + { + "epoch": 0.89, + "grad_norm": 15.844692560140217, + "learning_rate": 1.2200739627391822e-05, + "loss": 1.4888, + "step": 74262 + }, + { + "epoch": 0.89, + "grad_norm": 6.4298141970437905, + "learning_rate": 1.2200169767781706e-05, + "loss": 1.4609, + "step": 74265 + }, + { + "epoch": 0.89, + "grad_norm": 8.514916736855755, + "learning_rate": 1.2199599900663204e-05, + "loss": 0.9033, + "step": 74268 + }, + { + "epoch": 0.89, + "grad_norm": 20.120877533895065, + "learning_rate": 1.2199030026038257e-05, + "loss": 1.3622, + "step": 74271 + }, + { + "epoch": 0.89, + "grad_norm": 28.960428450586242, + "learning_rate": 1.2198460143908813e-05, + "loss": 1.4556, + "step": 74274 + }, + { + "epoch": 0.89, + "grad_norm": 14.166650105377549, + "learning_rate": 1.219789025427682e-05, + "loss": 1.0906, + "step": 74277 + }, + { + "epoch": 0.89, + "grad_norm": 20.34971206747622, + "learning_rate": 1.2197320357144216e-05, + "loss": 1.1356, + "step": 74280 + }, + { + "epoch": 0.89, + "grad_norm": 22.617238047052915, + "learning_rate": 1.219675045251295e-05, + "loss": 1.4252, + "step": 74283 + }, + { + "epoch": 0.89, + "grad_norm": 5.147615520466322, + "learning_rate": 1.2196180540384968e-05, + "loss": 1.53, + "step": 74286 + }, + { + "epoch": 0.89, + "grad_norm": 14.846337340290553, + "learning_rate": 1.219561062076221e-05, + "loss": 1.3462, + "step": 74289 + }, + { + "epoch": 0.89, + "grad_norm": 15.297884651772167, + "learning_rate": 1.2195040693646623e-05, + "loss": 1.1791, + "step": 74292 + }, + { + "epoch": 0.89, + "grad_norm": 11.085532610895749, + "learning_rate": 1.2194470759040156e-05, + "loss": 1.1246, + "step": 74295 + }, + { + "epoch": 0.89, + "grad_norm": 10.273308787223039, + "learning_rate": 1.219390081694475e-05, + "loss": 1.1093, + "step": 74298 + }, + { + "epoch": 0.89, + "grad_norm": 7.689894560212753, + "learning_rate": 1.2193330867362347e-05, + "loss": 1.8221, + "step": 74301 + }, + { + "epoch": 0.89, + "grad_norm": 32.13407607214977, + "learning_rate": 1.21927609102949e-05, + "loss": 1.0959, + "step": 74304 + }, + { + "epoch": 0.89, + "grad_norm": 6.536337742228385, + "learning_rate": 1.2192190945744348e-05, + "loss": 1.1376, + "step": 74307 + }, + { + "epoch": 0.89, + "grad_norm": 2.225317087815023, + "learning_rate": 1.2191620973712639e-05, + "loss": 1.3076, + "step": 74310 + }, + { + "epoch": 0.89, + "grad_norm": 22.901217120115188, + "learning_rate": 1.2191050994201716e-05, + "loss": 1.3395, + "step": 74313 + }, + { + "epoch": 0.89, + "grad_norm": 18.109446501981598, + "learning_rate": 1.2190481007213527e-05, + "loss": 1.5726, + "step": 74316 + }, + { + "epoch": 0.89, + "grad_norm": 9.110419771326253, + "learning_rate": 1.2189911012750013e-05, + "loss": 1.4515, + "step": 74319 + }, + { + "epoch": 0.89, + "grad_norm": 13.431771638377148, + "learning_rate": 1.2189341010813123e-05, + "loss": 1.3466, + "step": 74322 + }, + { + "epoch": 0.89, + "grad_norm": 17.202063947165936, + "learning_rate": 1.21887710014048e-05, + "loss": 1.3449, + "step": 74325 + }, + { + "epoch": 0.89, + "grad_norm": 12.038251453653402, + "learning_rate": 1.218820098452699e-05, + "loss": 1.1974, + "step": 74328 + }, + { + "epoch": 0.89, + "grad_norm": 5.345702965687078, + "learning_rate": 1.2187630960181638e-05, + "loss": 1.6505, + "step": 74331 + }, + { + "epoch": 0.89, + "grad_norm": 8.444696086129495, + "learning_rate": 1.2187060928370688e-05, + "loss": 1.2678, + "step": 74334 + }, + { + "epoch": 0.89, + "grad_norm": 10.114962371607488, + "learning_rate": 1.218649088909609e-05, + "loss": 1.0494, + "step": 74337 + }, + { + "epoch": 0.89, + "grad_norm": 22.137614885452532, + "learning_rate": 1.2185920842359786e-05, + "loss": 1.6498, + "step": 74340 + }, + { + "epoch": 0.89, + "grad_norm": 8.880654341653575, + "learning_rate": 1.218535078816372e-05, + "loss": 1.2622, + "step": 74343 + }, + { + "epoch": 0.89, + "grad_norm": 8.000937295914303, + "learning_rate": 1.218478072650984e-05, + "loss": 1.5282, + "step": 74346 + }, + { + "epoch": 0.89, + "grad_norm": 14.770927406433119, + "learning_rate": 1.2184210657400089e-05, + "loss": 0.936, + "step": 74349 + }, + { + "epoch": 0.89, + "grad_norm": 5.76538920901333, + "learning_rate": 1.2183640580836415e-05, + "loss": 1.5293, + "step": 74352 + }, + { + "epoch": 0.89, + "grad_norm": 20.26213299822411, + "learning_rate": 1.2183070496820761e-05, + "loss": 1.354, + "step": 74355 + }, + { + "epoch": 0.89, + "grad_norm": 10.09099506317279, + "learning_rate": 1.2182500405355073e-05, + "loss": 1.4029, + "step": 74358 + }, + { + "epoch": 0.89, + "grad_norm": 5.901005748538122, + "learning_rate": 1.21819303064413e-05, + "loss": 1.4401, + "step": 74361 + }, + { + "epoch": 0.89, + "grad_norm": 6.748211322701696, + "learning_rate": 1.2181360200081385e-05, + "loss": 1.0939, + "step": 74364 + }, + { + "epoch": 0.89, + "grad_norm": 21.864455268038967, + "learning_rate": 1.218079008627727e-05, + "loss": 1.1969, + "step": 74367 + }, + { + "epoch": 0.89, + "grad_norm": 2.86505950102518, + "learning_rate": 1.2180219965030906e-05, + "loss": 1.1088, + "step": 74370 + }, + { + "epoch": 0.89, + "grad_norm": 47.4044147129168, + "learning_rate": 1.2179649836344237e-05, + "loss": 1.2535, + "step": 74373 + }, + { + "epoch": 0.89, + "grad_norm": 10.764219946162498, + "learning_rate": 1.217907970021921e-05, + "loss": 1.5925, + "step": 74376 + }, + { + "epoch": 0.89, + "grad_norm": 20.306709939890247, + "learning_rate": 1.2178509556657762e-05, + "loss": 1.2926, + "step": 74379 + }, + { + "epoch": 0.89, + "grad_norm": 19.44266724330003, + "learning_rate": 1.2177939405661852e-05, + "loss": 1.2982, + "step": 74382 + }, + { + "epoch": 0.89, + "grad_norm": 4.585495508085835, + "learning_rate": 1.2177369247233416e-05, + "loss": 1.451, + "step": 74385 + }, + { + "epoch": 0.89, + "grad_norm": 6.468012154426237, + "learning_rate": 1.2176799081374403e-05, + "loss": 1.2136, + "step": 74388 + }, + { + "epoch": 0.89, + "grad_norm": 13.508822825173434, + "learning_rate": 1.2176228908086762e-05, + "loss": 1.4542, + "step": 74391 + }, + { + "epoch": 0.89, + "grad_norm": 11.943909674332605, + "learning_rate": 1.2175658727372435e-05, + "loss": 1.3224, + "step": 74394 + }, + { + "epoch": 0.89, + "grad_norm": 8.226016654887008, + "learning_rate": 1.2175088539233368e-05, + "loss": 1.2311, + "step": 74397 + }, + { + "epoch": 0.89, + "grad_norm": 11.45663232545666, + "learning_rate": 1.2174518343671503e-05, + "loss": 1.4964, + "step": 74400 + }, + { + "epoch": 0.89, + "grad_norm": 3.1961736227515933, + "learning_rate": 1.2173948140688796e-05, + "loss": 1.6816, + "step": 74403 + }, + { + "epoch": 0.89, + "grad_norm": 11.440269573418908, + "learning_rate": 1.2173377930287185e-05, + "loss": 1.3499, + "step": 74406 + }, + { + "epoch": 0.89, + "grad_norm": 2.3913587665625866, + "learning_rate": 1.2172807712468616e-05, + "loss": 1.5516, + "step": 74409 + }, + { + "epoch": 0.89, + "grad_norm": 3.682423534018282, + "learning_rate": 1.2172237487235038e-05, + "loss": 1.4079, + "step": 74412 + }, + { + "epoch": 0.89, + "grad_norm": 14.86243751640635, + "learning_rate": 1.2171667254588399e-05, + "loss": 0.9678, + "step": 74415 + }, + { + "epoch": 0.89, + "grad_norm": 31.976583018731187, + "learning_rate": 1.217109701453064e-05, + "loss": 1.3045, + "step": 74418 + }, + { + "epoch": 0.89, + "grad_norm": 13.746982566053084, + "learning_rate": 1.2170526767063705e-05, + "loss": 1.1879, + "step": 74421 + }, + { + "epoch": 0.89, + "grad_norm": 15.769449147258861, + "learning_rate": 1.216995651218955e-05, + "loss": 1.2396, + "step": 74424 + }, + { + "epoch": 0.89, + "grad_norm": 6.9239595229739175, + "learning_rate": 1.2169386249910111e-05, + "loss": 1.268, + "step": 74427 + }, + { + "epoch": 0.89, + "grad_norm": 7.292327282103173, + "learning_rate": 1.2168815980227342e-05, + "loss": 1.2968, + "step": 74430 + }, + { + "epoch": 0.9, + "grad_norm": 13.809953141277921, + "learning_rate": 1.216824570314318e-05, + "loss": 1.1667, + "step": 74433 + }, + { + "epoch": 0.9, + "grad_norm": 10.864043379831653, + "learning_rate": 1.2167675418659579e-05, + "loss": 1.2949, + "step": 74436 + }, + { + "epoch": 0.9, + "grad_norm": 4.726625612289434, + "learning_rate": 1.2167105126778485e-05, + "loss": 1.4239, + "step": 74439 + }, + { + "epoch": 0.9, + "grad_norm": 7.174891621111259, + "learning_rate": 1.2166534827501837e-05, + "loss": 1.5682, + "step": 74442 + }, + { + "epoch": 0.9, + "grad_norm": 19.360167124535746, + "learning_rate": 1.216596452083159e-05, + "loss": 1.4023, + "step": 74445 + }, + { + "epoch": 0.9, + "grad_norm": 38.40103029973788, + "learning_rate": 1.2165394206769684e-05, + "loss": 1.3145, + "step": 74448 + }, + { + "epoch": 0.9, + "grad_norm": 6.421820107905783, + "learning_rate": 1.216482388531807e-05, + "loss": 1.0053, + "step": 74451 + }, + { + "epoch": 0.9, + "grad_norm": 15.380780252009224, + "learning_rate": 1.2164253556478689e-05, + "loss": 1.3023, + "step": 74454 + }, + { + "epoch": 0.9, + "grad_norm": 8.547789634074169, + "learning_rate": 1.216368322025349e-05, + "loss": 1.2851, + "step": 74457 + }, + { + "epoch": 0.9, + "grad_norm": 5.413624580118624, + "learning_rate": 1.2163112876644423e-05, + "loss": 1.5653, + "step": 74460 + }, + { + "epoch": 0.9, + "grad_norm": 2.7047092068134315, + "learning_rate": 1.2162542525653427e-05, + "loss": 1.2019, + "step": 74463 + }, + { + "epoch": 0.9, + "grad_norm": 3.9602874580527767, + "learning_rate": 1.2161972167282454e-05, + "loss": 1.0975, + "step": 74466 + }, + { + "epoch": 0.9, + "grad_norm": 20.12929095883587, + "learning_rate": 1.216140180153345e-05, + "loss": 1.4376, + "step": 74469 + }, + { + "epoch": 0.9, + "grad_norm": 8.568714803647287, + "learning_rate": 1.216083142840836e-05, + "loss": 1.2683, + "step": 74472 + }, + { + "epoch": 0.9, + "grad_norm": 5.7297407432087235, + "learning_rate": 1.2160261047909128e-05, + "loss": 1.2964, + "step": 74475 + }, + { + "epoch": 0.9, + "grad_norm": 27.70856684026252, + "learning_rate": 1.2159690660037704e-05, + "loss": 1.3156, + "step": 74478 + }, + { + "epoch": 0.9, + "grad_norm": 11.128988429967599, + "learning_rate": 1.2159120264796036e-05, + "loss": 1.3487, + "step": 74481 + }, + { + "epoch": 0.9, + "grad_norm": 5.196579320120912, + "learning_rate": 1.2158549862186066e-05, + "loss": 1.1882, + "step": 74484 + }, + { + "epoch": 0.9, + "grad_norm": 9.766919320393832, + "learning_rate": 1.2157979452209742e-05, + "loss": 1.2869, + "step": 74487 + }, + { + "epoch": 0.9, + "grad_norm": 10.3795035816798, + "learning_rate": 1.2157409034869014e-05, + "loss": 1.083, + "step": 74490 + }, + { + "epoch": 0.9, + "grad_norm": 10.331623380037028, + "learning_rate": 1.2156838610165826e-05, + "loss": 0.8848, + "step": 74493 + }, + { + "epoch": 0.9, + "grad_norm": 13.002083969812587, + "learning_rate": 1.2156268178102125e-05, + "loss": 0.8828, + "step": 74496 + }, + { + "epoch": 0.9, + "grad_norm": 26.144646304594097, + "learning_rate": 1.2155697738679855e-05, + "loss": 1.2483, + "step": 74499 + }, + { + "epoch": 0.9, + "grad_norm": 5.9004656383841505, + "learning_rate": 1.2155127291900968e-05, + "loss": 1.5775, + "step": 74502 + }, + { + "epoch": 0.9, + "grad_norm": 15.761133306889292, + "learning_rate": 1.2154556837767406e-05, + "loss": 1.5387, + "step": 74505 + }, + { + "epoch": 0.9, + "grad_norm": 15.712871808431803, + "learning_rate": 1.2153986376281115e-05, + "loss": 1.1791, + "step": 74508 + }, + { + "epoch": 0.9, + "grad_norm": 10.366479121859074, + "learning_rate": 1.215341590744405e-05, + "loss": 1.2944, + "step": 74511 + }, + { + "epoch": 0.9, + "grad_norm": 15.653753512620025, + "learning_rate": 1.2152845431258146e-05, + "loss": 1.4052, + "step": 74514 + }, + { + "epoch": 0.9, + "grad_norm": 3.835369735412278, + "learning_rate": 1.215227494772536e-05, + "loss": 1.0873, + "step": 74517 + }, + { + "epoch": 0.9, + "grad_norm": 19.499004502738927, + "learning_rate": 1.2151704456847635e-05, + "loss": 1.3098, + "step": 74520 + }, + { + "epoch": 0.9, + "grad_norm": 9.784912482083024, + "learning_rate": 1.2151133958626917e-05, + "loss": 1.3515, + "step": 74523 + }, + { + "epoch": 0.9, + "grad_norm": 24.70609359094652, + "learning_rate": 1.2150563453065154e-05, + "loss": 1.6157, + "step": 74526 + }, + { + "epoch": 0.9, + "grad_norm": 4.730115925131623, + "learning_rate": 1.214999294016429e-05, + "loss": 1.18, + "step": 74529 + }, + { + "epoch": 0.9, + "grad_norm": 20.110885249795736, + "learning_rate": 1.2149422419926278e-05, + "loss": 1.198, + "step": 74532 + }, + { + "epoch": 0.9, + "grad_norm": 10.243839438547122, + "learning_rate": 1.214885189235306e-05, + "loss": 1.1432, + "step": 74535 + }, + { + "epoch": 0.9, + "grad_norm": 15.53413893385121, + "learning_rate": 1.2148281357446582e-05, + "loss": 1.0631, + "step": 74538 + }, + { + "epoch": 0.9, + "grad_norm": 15.52739478868286, + "learning_rate": 1.2147710815208798e-05, + "loss": 1.2705, + "step": 74541 + }, + { + "epoch": 0.9, + "grad_norm": 18.466602546055825, + "learning_rate": 1.2147140265641649e-05, + "loss": 1.0784, + "step": 74544 + }, + { + "epoch": 0.9, + "grad_norm": 3.8810567261542825, + "learning_rate": 1.2146569708747082e-05, + "loss": 1.3655, + "step": 74547 + }, + { + "epoch": 0.9, + "grad_norm": 12.35772950076805, + "learning_rate": 1.2145999144527047e-05, + "loss": 1.5519, + "step": 74550 + }, + { + "epoch": 0.9, + "grad_norm": 5.778293481072378, + "learning_rate": 1.2145428572983491e-05, + "loss": 1.1979, + "step": 74553 + }, + { + "epoch": 0.9, + "grad_norm": 6.826774783869867, + "learning_rate": 1.214485799411836e-05, + "loss": 1.5004, + "step": 74556 + }, + { + "epoch": 0.9, + "grad_norm": 19.886415212204263, + "learning_rate": 1.21442874079336e-05, + "loss": 1.7937, + "step": 74559 + }, + { + "epoch": 0.9, + "grad_norm": 3.1887030621609904, + "learning_rate": 1.214371681443116e-05, + "loss": 1.2786, + "step": 74562 + }, + { + "epoch": 0.9, + "grad_norm": 3.8962858357458003, + "learning_rate": 1.2143146213612985e-05, + "loss": 1.8, + "step": 74565 + }, + { + "epoch": 0.9, + "grad_norm": 17.475512502712828, + "learning_rate": 1.2142575605481026e-05, + "loss": 1.4358, + "step": 74568 + }, + { + "epoch": 0.9, + "grad_norm": 7.856387356161822, + "learning_rate": 1.2142004990037226e-05, + "loss": 1.8424, + "step": 74571 + }, + { + "epoch": 0.9, + "grad_norm": 5.040490172678819, + "learning_rate": 1.2141434367283535e-05, + "loss": 0.999, + "step": 74574 + }, + { + "epoch": 0.9, + "grad_norm": 7.328858695354695, + "learning_rate": 1.2140863737221903e-05, + "loss": 1.3707, + "step": 74577 + }, + { + "epoch": 0.9, + "grad_norm": 11.262423098329913, + "learning_rate": 1.2140293099854272e-05, + "loss": 1.3868, + "step": 74580 + }, + { + "epoch": 0.9, + "grad_norm": 16.6577480381797, + "learning_rate": 1.2139722455182592e-05, + "loss": 1.5445, + "step": 74583 + }, + { + "epoch": 0.9, + "grad_norm": 21.629013738714796, + "learning_rate": 1.2139151803208811e-05, + "loss": 1.299, + "step": 74586 + }, + { + "epoch": 0.9, + "grad_norm": 11.302722131441833, + "learning_rate": 1.2138581143934873e-05, + "loss": 1.264, + "step": 74589 + }, + { + "epoch": 0.9, + "grad_norm": 11.479544507491111, + "learning_rate": 1.2138010477362728e-05, + "loss": 1.1539, + "step": 74592 + }, + { + "epoch": 0.9, + "grad_norm": 8.766572865304036, + "learning_rate": 1.2137439803494324e-05, + "loss": 1.4089, + "step": 74595 + }, + { + "epoch": 0.9, + "grad_norm": 7.416383494032829, + "learning_rate": 1.213686912233161e-05, + "loss": 1.0562, + "step": 74598 + }, + { + "epoch": 0.9, + "grad_norm": 22.837927319720194, + "learning_rate": 1.213629843387653e-05, + "loss": 1.213, + "step": 74601 + }, + { + "epoch": 0.9, + "grad_norm": 25.32149777372958, + "learning_rate": 1.213572773813103e-05, + "loss": 1.4803, + "step": 74604 + }, + { + "epoch": 0.9, + "grad_norm": 9.159324170081387, + "learning_rate": 1.2135157035097065e-05, + "loss": 1.3162, + "step": 74607 + }, + { + "epoch": 0.9, + "grad_norm": 15.532138145182326, + "learning_rate": 1.2134586324776577e-05, + "loss": 1.1944, + "step": 74610 + }, + { + "epoch": 0.9, + "grad_norm": 9.352354199583884, + "learning_rate": 1.2134015607171511e-05, + "loss": 1.1848, + "step": 74613 + }, + { + "epoch": 0.9, + "grad_norm": 19.76746179418506, + "learning_rate": 1.2133444882283821e-05, + "loss": 1.3644, + "step": 74616 + }, + { + "epoch": 0.9, + "grad_norm": 10.988966154056342, + "learning_rate": 1.2132874150115452e-05, + "loss": 1.6287, + "step": 74619 + }, + { + "epoch": 0.9, + "grad_norm": 10.15937708422261, + "learning_rate": 1.2132303410668353e-05, + "loss": 0.9227, + "step": 74622 + }, + { + "epoch": 0.9, + "grad_norm": 25.53715685408876, + "learning_rate": 1.2131732663944469e-05, + "loss": 1.5758, + "step": 74625 + }, + { + "epoch": 0.9, + "grad_norm": 6.050724708605315, + "learning_rate": 1.213116190994575e-05, + "loss": 1.4411, + "step": 74628 + }, + { + "epoch": 0.9, + "grad_norm": 9.239931773097487, + "learning_rate": 1.2130591148674144e-05, + "loss": 1.3976, + "step": 74631 + }, + { + "epoch": 0.9, + "grad_norm": 3.0774486795125466, + "learning_rate": 1.2130020380131597e-05, + "loss": 1.2299, + "step": 74634 + }, + { + "epoch": 0.9, + "grad_norm": 9.836021795234872, + "learning_rate": 1.2129449604320056e-05, + "loss": 1.3883, + "step": 74637 + }, + { + "epoch": 0.9, + "grad_norm": 11.11454418018397, + "learning_rate": 1.2128878821241475e-05, + "loss": 1.2479, + "step": 74640 + }, + { + "epoch": 0.9, + "grad_norm": 11.660101802130976, + "learning_rate": 1.2128308030897792e-05, + "loss": 1.462, + "step": 74643 + }, + { + "epoch": 0.9, + "grad_norm": 11.076460075935582, + "learning_rate": 1.2127737233290962e-05, + "loss": 1.3301, + "step": 74646 + }, + { + "epoch": 0.9, + "grad_norm": 22.653775115694504, + "learning_rate": 1.2127166428422932e-05, + "loss": 1.7648, + "step": 74649 + }, + { + "epoch": 0.9, + "grad_norm": 10.744044728360079, + "learning_rate": 1.2126595616295651e-05, + "loss": 1.387, + "step": 74652 + }, + { + "epoch": 0.9, + "grad_norm": 12.84403142030199, + "learning_rate": 1.2126024796911063e-05, + "loss": 1.4008, + "step": 74655 + }, + { + "epoch": 0.9, + "grad_norm": 2.3697409715919147, + "learning_rate": 1.2125453970271117e-05, + "loss": 1.3761, + "step": 74658 + }, + { + "epoch": 0.9, + "grad_norm": 13.479714575312112, + "learning_rate": 1.2124883136377768e-05, + "loss": 1.3158, + "step": 74661 + }, + { + "epoch": 0.9, + "grad_norm": 46.0924986508087, + "learning_rate": 1.2124312295232952e-05, + "loss": 1.0979, + "step": 74664 + }, + { + "epoch": 0.9, + "grad_norm": 23.04088031003447, + "learning_rate": 1.2123741446838624e-05, + "loss": 1.1112, + "step": 74667 + }, + { + "epoch": 0.9, + "grad_norm": 23.271155858203322, + "learning_rate": 1.2123170591196731e-05, + "loss": 1.4521, + "step": 74670 + }, + { + "epoch": 0.9, + "grad_norm": 25.805626861646836, + "learning_rate": 1.2122599728309223e-05, + "loss": 1.4767, + "step": 74673 + }, + { + "epoch": 0.9, + "grad_norm": 5.312974144055547, + "learning_rate": 1.2122028858178048e-05, + "loss": 1.3005, + "step": 74676 + }, + { + "epoch": 0.9, + "grad_norm": 11.609523394253976, + "learning_rate": 1.2121457980805152e-05, + "loss": 1.5832, + "step": 74679 + }, + { + "epoch": 0.9, + "grad_norm": 18.325843780571464, + "learning_rate": 1.2120887096192484e-05, + "loss": 1.3264, + "step": 74682 + }, + { + "epoch": 0.9, + "grad_norm": 35.349653349487916, + "learning_rate": 1.2120316204341991e-05, + "loss": 1.6629, + "step": 74685 + }, + { + "epoch": 0.9, + "grad_norm": 8.337257927295983, + "learning_rate": 1.2119745305255624e-05, + "loss": 1.0989, + "step": 74688 + }, + { + "epoch": 0.9, + "grad_norm": 19.304452748573453, + "learning_rate": 1.2119174398935326e-05, + "loss": 1.5768, + "step": 74691 + }, + { + "epoch": 0.9, + "grad_norm": 14.054396878488092, + "learning_rate": 1.2118603485383058e-05, + "loss": 1.3562, + "step": 74694 + }, + { + "epoch": 0.9, + "grad_norm": 4.730221999691739, + "learning_rate": 1.2118032564600752e-05, + "loss": 1.2534, + "step": 74697 + }, + { + "epoch": 0.9, + "grad_norm": 4.907037517742494, + "learning_rate": 1.2117461636590365e-05, + "loss": 1.4352, + "step": 74700 + }, + { + "epoch": 0.9, + "grad_norm": 5.944054857405081, + "learning_rate": 1.2116890701353845e-05, + "loss": 0.8946, + "step": 74703 + }, + { + "epoch": 0.9, + "grad_norm": 10.46784863121651, + "learning_rate": 1.2116319758893138e-05, + "loss": 1.0451, + "step": 74706 + }, + { + "epoch": 0.9, + "grad_norm": 52.95547623439735, + "learning_rate": 1.2115748809210197e-05, + "loss": 1.5529, + "step": 74709 + }, + { + "epoch": 0.9, + "grad_norm": 5.110238251645221, + "learning_rate": 1.2115177852306963e-05, + "loss": 1.2768, + "step": 74712 + }, + { + "epoch": 0.9, + "grad_norm": 11.483663526150144, + "learning_rate": 1.2114606888185395e-05, + "loss": 1.5499, + "step": 74715 + }, + { + "epoch": 0.9, + "grad_norm": 17.481785420643092, + "learning_rate": 1.2114035916847431e-05, + "loss": 1.4001, + "step": 74718 + }, + { + "epoch": 0.9, + "grad_norm": 21.44620893759168, + "learning_rate": 1.2113464938295025e-05, + "loss": 1.4862, + "step": 74721 + }, + { + "epoch": 0.9, + "grad_norm": 6.798518528103484, + "learning_rate": 1.2112893952530125e-05, + "loss": 1.1696, + "step": 74724 + }, + { + "epoch": 0.9, + "grad_norm": 19.489159251167575, + "learning_rate": 1.2112322959554677e-05, + "loss": 1.1861, + "step": 74727 + }, + { + "epoch": 0.9, + "grad_norm": 30.614089688624915, + "learning_rate": 1.2111751959370632e-05, + "loss": 0.9227, + "step": 74730 + }, + { + "epoch": 0.9, + "grad_norm": 19.508382678137657, + "learning_rate": 1.2111180951979937e-05, + "loss": 1.1492, + "step": 74733 + }, + { + "epoch": 0.9, + "grad_norm": 30.825624434413292, + "learning_rate": 1.2110609937384548e-05, + "loss": 1.4162, + "step": 74736 + }, + { + "epoch": 0.9, + "grad_norm": 6.878025099308505, + "learning_rate": 1.2110038915586402e-05, + "loss": 1.303, + "step": 74739 + }, + { + "epoch": 0.9, + "grad_norm": 10.868798263124326, + "learning_rate": 1.2109467886587454e-05, + "loss": 1.4223, + "step": 74742 + }, + { + "epoch": 0.9, + "grad_norm": 5.729315067960338, + "learning_rate": 1.2108896850389653e-05, + "loss": 1.0264, + "step": 74745 + }, + { + "epoch": 0.9, + "grad_norm": 17.723698337801558, + "learning_rate": 1.2108325806994947e-05, + "loss": 0.8835, + "step": 74748 + }, + { + "epoch": 0.9, + "grad_norm": 12.591676753905872, + "learning_rate": 1.2107754756405283e-05, + "loss": 1.1145, + "step": 74751 + }, + { + "epoch": 0.9, + "grad_norm": 19.695718190708554, + "learning_rate": 1.2107183698622609e-05, + "loss": 1.2712, + "step": 74754 + }, + { + "epoch": 0.9, + "grad_norm": 17.473520216168914, + "learning_rate": 1.2106612633648881e-05, + "loss": 1.1846, + "step": 74757 + }, + { + "epoch": 0.9, + "grad_norm": 6.676086756636326, + "learning_rate": 1.210604156148604e-05, + "loss": 1.0917, + "step": 74760 + }, + { + "epoch": 0.9, + "grad_norm": 20.315356709884597, + "learning_rate": 1.2105470482136038e-05, + "loss": 1.2923, + "step": 74763 + }, + { + "epoch": 0.9, + "grad_norm": 19.591852540747265, + "learning_rate": 1.2104899395600826e-05, + "loss": 1.2543, + "step": 74766 + }, + { + "epoch": 0.9, + "grad_norm": 37.65517836044732, + "learning_rate": 1.210432830188235e-05, + "loss": 1.3736, + "step": 74769 + }, + { + "epoch": 0.9, + "grad_norm": 7.3688058086355746, + "learning_rate": 1.2103757200982555e-05, + "loss": 1.2004, + "step": 74772 + }, + { + "epoch": 0.9, + "grad_norm": 28.821477200688232, + "learning_rate": 1.2103186092903398e-05, + "loss": 1.5996, + "step": 74775 + }, + { + "epoch": 0.9, + "grad_norm": 9.25858142982135, + "learning_rate": 1.2102614977646825e-05, + "loss": 1.3481, + "step": 74778 + }, + { + "epoch": 0.9, + "grad_norm": 33.85506317490283, + "learning_rate": 1.2102043855214782e-05, + "loss": 1.4066, + "step": 74781 + }, + { + "epoch": 0.9, + "grad_norm": 11.303389332613358, + "learning_rate": 1.2101472725609222e-05, + "loss": 1.0266, + "step": 74784 + }, + { + "epoch": 0.9, + "grad_norm": 23.09181428280858, + "learning_rate": 1.2100901588832095e-05, + "loss": 1.5677, + "step": 74787 + }, + { + "epoch": 0.9, + "grad_norm": 16.849912608541015, + "learning_rate": 1.2100330444885345e-05, + "loss": 1.3246, + "step": 74790 + }, + { + "epoch": 0.9, + "grad_norm": 16.307146105171412, + "learning_rate": 1.2099759293770923e-05, + "loss": 1.2106, + "step": 74793 + }, + { + "epoch": 0.9, + "grad_norm": 7.996778661616097, + "learning_rate": 1.209918813549078e-05, + "loss": 1.3427, + "step": 74796 + }, + { + "epoch": 0.9, + "grad_norm": 8.711642171387627, + "learning_rate": 1.2098616970046863e-05, + "loss": 1.1422, + "step": 74799 + }, + { + "epoch": 0.9, + "grad_norm": 12.262313677671125, + "learning_rate": 1.2098045797441125e-05, + "loss": 1.4644, + "step": 74802 + }, + { + "epoch": 0.9, + "grad_norm": 4.978478312489895, + "learning_rate": 1.2097474617675508e-05, + "loss": 1.0767, + "step": 74805 + }, + { + "epoch": 0.9, + "grad_norm": 11.47831262061394, + "learning_rate": 1.2096903430751967e-05, + "loss": 1.3532, + "step": 74808 + }, + { + "epoch": 0.9, + "grad_norm": 2.5989189799728623, + "learning_rate": 1.2096332236672455e-05, + "loss": 1.6674, + "step": 74811 + }, + { + "epoch": 0.9, + "grad_norm": 28.08140489114206, + "learning_rate": 1.2095761035438908e-05, + "loss": 1.6315, + "step": 74814 + }, + { + "epoch": 0.9, + "grad_norm": 21.92355217669491, + "learning_rate": 1.2095189827053289e-05, + "loss": 1.372, + "step": 74817 + }, + { + "epoch": 0.9, + "grad_norm": 2.5618809694841564, + "learning_rate": 1.209461861151754e-05, + "loss": 1.1116, + "step": 74820 + }, + { + "epoch": 0.9, + "grad_norm": 12.076113697979865, + "learning_rate": 1.2094047388833614e-05, + "loss": 1.2389, + "step": 74823 + }, + { + "epoch": 0.9, + "grad_norm": 25.1835716001138, + "learning_rate": 1.2093476159003454e-05, + "loss": 1.1758, + "step": 74826 + }, + { + "epoch": 0.9, + "grad_norm": 7.221478634430066, + "learning_rate": 1.2092904922029019e-05, + "loss": 1.1547, + "step": 74829 + }, + { + "epoch": 0.9, + "grad_norm": 6.583088580195856, + "learning_rate": 1.2092333677912251e-05, + "loss": 1.3938, + "step": 74832 + }, + { + "epoch": 0.9, + "grad_norm": 5.761047546517611, + "learning_rate": 1.2091762426655101e-05, + "loss": 1.6718, + "step": 74835 + }, + { + "epoch": 0.9, + "grad_norm": 17.145367425788027, + "learning_rate": 1.2091191168259521e-05, + "loss": 1.258, + "step": 74838 + }, + { + "epoch": 0.9, + "grad_norm": 29.72527610468979, + "learning_rate": 1.2090619902727458e-05, + "loss": 1.0123, + "step": 74841 + }, + { + "epoch": 0.9, + "grad_norm": 11.744798725673599, + "learning_rate": 1.2090048630060864e-05, + "loss": 1.1608, + "step": 74844 + }, + { + "epoch": 0.9, + "grad_norm": 42.51765346270779, + "learning_rate": 1.2089477350261685e-05, + "loss": 1.2593, + "step": 74847 + }, + { + "epoch": 0.9, + "grad_norm": 27.865984178305762, + "learning_rate": 1.2088906063331871e-05, + "loss": 1.256, + "step": 74850 + }, + { + "epoch": 0.9, + "grad_norm": 6.252826138820738, + "learning_rate": 1.2088334769273375e-05, + "loss": 1.2554, + "step": 74853 + }, + { + "epoch": 0.9, + "grad_norm": 4.158225477117098, + "learning_rate": 1.2087763468088147e-05, + "loss": 1.528, + "step": 74856 + }, + { + "epoch": 0.9, + "grad_norm": 7.237342301240244, + "learning_rate": 1.2087192159778129e-05, + "loss": 1.2478, + "step": 74859 + }, + { + "epoch": 0.9, + "grad_norm": 27.884711456761337, + "learning_rate": 1.208662084434528e-05, + "loss": 1.6311, + "step": 74862 + }, + { + "epoch": 0.9, + "grad_norm": 16.278992241216024, + "learning_rate": 1.2086049521791545e-05, + "loss": 1.0892, + "step": 74865 + }, + { + "epoch": 0.9, + "grad_norm": 9.906837409457284, + "learning_rate": 1.2085478192118871e-05, + "loss": 1.3978, + "step": 74868 + }, + { + "epoch": 0.9, + "grad_norm": 6.134986177773175, + "learning_rate": 1.2084906855329215e-05, + "loss": 1.555, + "step": 74871 + }, + { + "epoch": 0.9, + "grad_norm": 14.882747786293544, + "learning_rate": 1.2084335511424524e-05, + "loss": 0.9902, + "step": 74874 + }, + { + "epoch": 0.9, + "grad_norm": 22.520792676815887, + "learning_rate": 1.2083764160406743e-05, + "loss": 1.1854, + "step": 74877 + }, + { + "epoch": 0.9, + "grad_norm": 9.414491739270849, + "learning_rate": 1.2083192802277825e-05, + "loss": 1.3298, + "step": 74880 + }, + { + "epoch": 0.9, + "grad_norm": 13.170695818077837, + "learning_rate": 1.2082621437039723e-05, + "loss": 1.6274, + "step": 74883 + }, + { + "epoch": 0.9, + "grad_norm": 6.086965281760304, + "learning_rate": 1.2082050064694383e-05, + "loss": 1.4491, + "step": 74886 + }, + { + "epoch": 0.9, + "grad_norm": 8.087299191134369, + "learning_rate": 1.2081478685243757e-05, + "loss": 1.1587, + "step": 74889 + }, + { + "epoch": 0.9, + "grad_norm": 6.9159378010197115, + "learning_rate": 1.2080907298689795e-05, + "loss": 1.4434, + "step": 74892 + }, + { + "epoch": 0.9, + "grad_norm": 15.280471185418534, + "learning_rate": 1.2080335905034444e-05, + "loss": 1.5752, + "step": 74895 + }, + { + "epoch": 0.9, + "grad_norm": 3.363384797938944, + "learning_rate": 1.2079764504279656e-05, + "loss": 1.3793, + "step": 74898 + }, + { + "epoch": 0.9, + "grad_norm": 12.639435658794696, + "learning_rate": 1.207919309642738e-05, + "loss": 1.5279, + "step": 74901 + }, + { + "epoch": 0.9, + "grad_norm": 42.50412959442351, + "learning_rate": 1.207862168147957e-05, + "loss": 1.1649, + "step": 74904 + }, + { + "epoch": 0.9, + "grad_norm": 6.618520451692335, + "learning_rate": 1.207805025943817e-05, + "loss": 1.2811, + "step": 74907 + }, + { + "epoch": 0.9, + "grad_norm": 18.195453568235767, + "learning_rate": 1.2077478830305136e-05, + "loss": 1.5549, + "step": 74910 + }, + { + "epoch": 0.9, + "grad_norm": 12.15325915295898, + "learning_rate": 1.2076907394082411e-05, + "loss": 1.8065, + "step": 74913 + }, + { + "epoch": 0.9, + "grad_norm": 5.22395711433016, + "learning_rate": 1.2076335950771951e-05, + "loss": 1.5911, + "step": 74916 + }, + { + "epoch": 0.9, + "grad_norm": 11.868100890021164, + "learning_rate": 1.2075764500375707e-05, + "loss": 1.4612, + "step": 74919 + }, + { + "epoch": 0.9, + "grad_norm": 16.386342716240144, + "learning_rate": 1.207519304289562e-05, + "loss": 1.3866, + "step": 74922 + }, + { + "epoch": 0.9, + "grad_norm": 3.6594098038274097, + "learning_rate": 1.2074621578333651e-05, + "loss": 1.1897, + "step": 74925 + }, + { + "epoch": 0.9, + "grad_norm": 7.384876628433488, + "learning_rate": 1.2074050106691747e-05, + "loss": 1.1143, + "step": 74928 + }, + { + "epoch": 0.9, + "grad_norm": 26.68286229739949, + "learning_rate": 1.2073478627971855e-05, + "loss": 1.4076, + "step": 74931 + }, + { + "epoch": 0.9, + "grad_norm": 18.19804211165011, + "learning_rate": 1.2072907142175927e-05, + "loss": 1.3824, + "step": 74934 + }, + { + "epoch": 0.9, + "grad_norm": 9.62040087698752, + "learning_rate": 1.2072335649305915e-05, + "loss": 1.1246, + "step": 74937 + }, + { + "epoch": 0.9, + "grad_norm": 10.605866322745673, + "learning_rate": 1.2071764149363767e-05, + "loss": 1.0567, + "step": 74940 + }, + { + "epoch": 0.9, + "grad_norm": 5.859525108993114, + "learning_rate": 1.2071192642351432e-05, + "loss": 1.3075, + "step": 74943 + }, + { + "epoch": 0.9, + "grad_norm": 6.437094752484019, + "learning_rate": 1.2070621128270868e-05, + "loss": 1.4484, + "step": 74946 + }, + { + "epoch": 0.9, + "grad_norm": 11.741452102614552, + "learning_rate": 1.2070049607124016e-05, + "loss": 1.4771, + "step": 74949 + }, + { + "epoch": 0.9, + "grad_norm": 14.270545347340487, + "learning_rate": 1.2069478078912834e-05, + "loss": 1.1469, + "step": 74952 + }, + { + "epoch": 0.9, + "grad_norm": 13.652499515372588, + "learning_rate": 1.2068906543639263e-05, + "loss": 1.4391, + "step": 74955 + }, + { + "epoch": 0.9, + "grad_norm": 10.177359470944605, + "learning_rate": 1.2068335001305263e-05, + "loss": 1.4351, + "step": 74958 + }, + { + "epoch": 0.9, + "grad_norm": 21.21186033496345, + "learning_rate": 1.206776345191278e-05, + "loss": 1.1782, + "step": 74961 + }, + { + "epoch": 0.9, + "grad_norm": 211.38869188224396, + "learning_rate": 1.2067191895463766e-05, + "loss": 1.4987, + "step": 74964 + }, + { + "epoch": 0.9, + "grad_norm": 22.168530186099108, + "learning_rate": 1.2066620331960168e-05, + "loss": 1.4953, + "step": 74967 + }, + { + "epoch": 0.9, + "grad_norm": 12.7502626913087, + "learning_rate": 1.2066048761403941e-05, + "loss": 1.5372, + "step": 74970 + }, + { + "epoch": 0.9, + "grad_norm": 27.7628180643645, + "learning_rate": 1.2065477183797034e-05, + "loss": 1.4448, + "step": 74973 + }, + { + "epoch": 0.9, + "grad_norm": 6.763776661192181, + "learning_rate": 1.2064905599141395e-05, + "loss": 1.3491, + "step": 74976 + }, + { + "epoch": 0.9, + "grad_norm": 8.67341813570985, + "learning_rate": 1.206433400743898e-05, + "loss": 1.2237, + "step": 74979 + }, + { + "epoch": 0.9, + "grad_norm": 50.97527644877002, + "learning_rate": 1.2063762408691734e-05, + "loss": 1.3333, + "step": 74982 + }, + { + "epoch": 0.9, + "grad_norm": 8.235632076959034, + "learning_rate": 1.2063190802901613e-05, + "loss": 1.5617, + "step": 74985 + }, + { + "epoch": 0.9, + "grad_norm": 3.2141648922216097, + "learning_rate": 1.2062619190070564e-05, + "loss": 1.4862, + "step": 74988 + }, + { + "epoch": 0.9, + "grad_norm": 11.177646185886482, + "learning_rate": 1.206204757020054e-05, + "loss": 1.2783, + "step": 74991 + }, + { + "epoch": 0.9, + "grad_norm": 2.8295427328269596, + "learning_rate": 1.2061475943293488e-05, + "loss": 1.5974, + "step": 74994 + }, + { + "epoch": 0.9, + "grad_norm": 18.950866479095303, + "learning_rate": 1.206090430935136e-05, + "loss": 2.0779, + "step": 74997 + }, + { + "epoch": 0.9, + "grad_norm": 4.296034614864359, + "learning_rate": 1.2060332668376109e-05, + "loss": 1.2163, + "step": 75000 + }, + { + "epoch": 0.9, + "grad_norm": 5.7232739686817515, + "learning_rate": 1.2059761020369686e-05, + "loss": 1.5544, + "step": 75003 + }, + { + "epoch": 0.9, + "grad_norm": 9.705458931994649, + "learning_rate": 1.2059189365334043e-05, + "loss": 1.372, + "step": 75006 + }, + { + "epoch": 0.9, + "grad_norm": 5.550020264572746, + "learning_rate": 1.2058617703271122e-05, + "loss": 1.1758, + "step": 75009 + }, + { + "epoch": 0.9, + "grad_norm": 9.571576768362565, + "learning_rate": 1.2058046034182885e-05, + "loss": 1.4991, + "step": 75012 + }, + { + "epoch": 0.9, + "grad_norm": 12.021339959925074, + "learning_rate": 1.2057474358071278e-05, + "loss": 1.2831, + "step": 75015 + }, + { + "epoch": 0.9, + "grad_norm": 36.8207498465064, + "learning_rate": 1.2056902674938251e-05, + "loss": 1.3907, + "step": 75018 + }, + { + "epoch": 0.9, + "grad_norm": 6.706404920387894, + "learning_rate": 1.2056330984785755e-05, + "loss": 1.325, + "step": 75021 + }, + { + "epoch": 0.9, + "grad_norm": 7.749056130141393, + "learning_rate": 1.2055759287615745e-05, + "loss": 1.3196, + "step": 75024 + }, + { + "epoch": 0.9, + "grad_norm": 80.47802209862051, + "learning_rate": 1.2055187583430165e-05, + "loss": 1.3112, + "step": 75027 + }, + { + "epoch": 0.9, + "grad_norm": 9.95530073367355, + "learning_rate": 1.2054615872230972e-05, + "loss": 1.5904, + "step": 75030 + }, + { + "epoch": 0.9, + "grad_norm": 6.958467683181611, + "learning_rate": 1.2054044154020116e-05, + "loss": 1.1641, + "step": 75033 + }, + { + "epoch": 0.9, + "grad_norm": 14.543660390807588, + "learning_rate": 1.2053472428799546e-05, + "loss": 1.1872, + "step": 75036 + }, + { + "epoch": 0.9, + "grad_norm": 11.132615575320179, + "learning_rate": 1.2052900696571216e-05, + "loss": 1.4124, + "step": 75039 + }, + { + "epoch": 0.9, + "grad_norm": 27.583429879730115, + "learning_rate": 1.2052328957337074e-05, + "loss": 1.1249, + "step": 75042 + }, + { + "epoch": 0.9, + "grad_norm": 13.77300424706146, + "learning_rate": 1.2051757211099073e-05, + "loss": 1.049, + "step": 75045 + }, + { + "epoch": 0.9, + "grad_norm": 7.809782311321696, + "learning_rate": 1.2051185457859163e-05, + "loss": 1.3217, + "step": 75048 + }, + { + "epoch": 0.9, + "grad_norm": 12.253426091127519, + "learning_rate": 1.2050613697619295e-05, + "loss": 1.4291, + "step": 75051 + }, + { + "epoch": 0.9, + "grad_norm": 3.704275671919323, + "learning_rate": 1.2050041930381422e-05, + "loss": 1.2118, + "step": 75054 + }, + { + "epoch": 0.9, + "grad_norm": 9.86065197284997, + "learning_rate": 1.2049470156147497e-05, + "loss": 1.4055, + "step": 75057 + }, + { + "epoch": 0.9, + "grad_norm": 8.002940319626074, + "learning_rate": 1.2048898374919466e-05, + "loss": 1.2792, + "step": 75060 + }, + { + "epoch": 0.9, + "grad_norm": 17.459875378531017, + "learning_rate": 1.204832658669928e-05, + "loss": 1.2794, + "step": 75063 + }, + { + "epoch": 0.9, + "grad_norm": 25.974742910457252, + "learning_rate": 1.2047754791488897e-05, + "loss": 1.3884, + "step": 75066 + }, + { + "epoch": 0.9, + "grad_norm": 11.709992988533939, + "learning_rate": 1.2047182989290265e-05, + "loss": 1.5689, + "step": 75069 + }, + { + "epoch": 0.9, + "grad_norm": 25.657562565965094, + "learning_rate": 1.2046611180105335e-05, + "loss": 0.9485, + "step": 75072 + }, + { + "epoch": 0.9, + "grad_norm": 25.605497364655058, + "learning_rate": 1.2046039363936055e-05, + "loss": 1.3434, + "step": 75075 + }, + { + "epoch": 0.9, + "grad_norm": 12.14435205594786, + "learning_rate": 1.2045467540784382e-05, + "loss": 1.9213, + "step": 75078 + }, + { + "epoch": 0.9, + "grad_norm": 10.765677125405368, + "learning_rate": 1.2044895710652264e-05, + "loss": 1.2009, + "step": 75081 + }, + { + "epoch": 0.9, + "grad_norm": 19.477694003913548, + "learning_rate": 1.2044323873541655e-05, + "loss": 1.4818, + "step": 75084 + }, + { + "epoch": 0.9, + "grad_norm": 13.693571616685723, + "learning_rate": 1.2043752029454504e-05, + "loss": 1.2349, + "step": 75087 + }, + { + "epoch": 0.9, + "grad_norm": 7.186257163858981, + "learning_rate": 1.2043180178392764e-05, + "loss": 1.4998, + "step": 75090 + }, + { + "epoch": 0.9, + "grad_norm": 23.372434778675604, + "learning_rate": 1.2042608320358385e-05, + "loss": 1.4352, + "step": 75093 + }, + { + "epoch": 0.9, + "grad_norm": 3.7907371972718433, + "learning_rate": 1.204203645535332e-05, + "loss": 1.5407, + "step": 75096 + }, + { + "epoch": 0.9, + "grad_norm": 28.597603521435634, + "learning_rate": 1.204146458337952e-05, + "loss": 1.0542, + "step": 75099 + }, + { + "epoch": 0.9, + "grad_norm": 7.911798809423211, + "learning_rate": 1.2040892704438935e-05, + "loss": 1.3607, + "step": 75102 + }, + { + "epoch": 0.9, + "grad_norm": 23.08725494274426, + "learning_rate": 1.204032081853352e-05, + "loss": 1.314, + "step": 75105 + }, + { + "epoch": 0.9, + "grad_norm": 24.356125884426827, + "learning_rate": 1.2039748925665227e-05, + "loss": 1.7763, + "step": 75108 + }, + { + "epoch": 0.9, + "grad_norm": 6.290673685343704, + "learning_rate": 1.2039177025836004e-05, + "loss": 0.8804, + "step": 75111 + }, + { + "epoch": 0.9, + "grad_norm": 11.974161648811682, + "learning_rate": 1.2038605119047803e-05, + "loss": 1.2208, + "step": 75114 + }, + { + "epoch": 0.9, + "grad_norm": 5.815765683486658, + "learning_rate": 1.2038033205302578e-05, + "loss": 1.3531, + "step": 75117 + }, + { + "epoch": 0.9, + "grad_norm": 10.328817121373145, + "learning_rate": 1.203746128460228e-05, + "loss": 1.493, + "step": 75120 + }, + { + "epoch": 0.9, + "grad_norm": 5.330457767185018, + "learning_rate": 1.203688935694886e-05, + "loss": 1.3539, + "step": 75123 + }, + { + "epoch": 0.9, + "grad_norm": 10.24616563773819, + "learning_rate": 1.2036317422344273e-05, + "loss": 1.4825, + "step": 75126 + }, + { + "epoch": 0.9, + "grad_norm": 23.237352392421773, + "learning_rate": 1.2035745480790463e-05, + "loss": 1.5287, + "step": 75129 + }, + { + "epoch": 0.9, + "grad_norm": 3.600962411392128, + "learning_rate": 1.2035173532289391e-05, + "loss": 0.9583, + "step": 75132 + }, + { + "epoch": 0.9, + "grad_norm": 4.566960634338481, + "learning_rate": 1.2034601576843004e-05, + "loss": 0.9002, + "step": 75135 + }, + { + "epoch": 0.9, + "grad_norm": 20.40786172071809, + "learning_rate": 1.2034029614453253e-05, + "loss": 1.4158, + "step": 75138 + }, + { + "epoch": 0.9, + "grad_norm": 14.083157158254915, + "learning_rate": 1.2033457645122095e-05, + "loss": 1.4969, + "step": 75141 + }, + { + "epoch": 0.9, + "grad_norm": 11.372185274484481, + "learning_rate": 1.2032885668851474e-05, + "loss": 1.4548, + "step": 75144 + }, + { + "epoch": 0.9, + "grad_norm": 4.987906113721698, + "learning_rate": 1.2032313685643349e-05, + "loss": 1.3241, + "step": 75147 + }, + { + "epoch": 0.9, + "grad_norm": 9.944325884807192, + "learning_rate": 1.2031741695499669e-05, + "loss": 1.1635, + "step": 75150 + }, + { + "epoch": 0.9, + "grad_norm": 6.651238822663614, + "learning_rate": 1.2031169698422388e-05, + "loss": 1.3302, + "step": 75153 + }, + { + "epoch": 0.9, + "grad_norm": 15.678332268349205, + "learning_rate": 1.2030597694413455e-05, + "loss": 1.5154, + "step": 75156 + }, + { + "epoch": 0.9, + "grad_norm": 26.98677339952287, + "learning_rate": 1.2030025683474822e-05, + "loss": 1.312, + "step": 75159 + }, + { + "epoch": 0.9, + "grad_norm": 13.334751068403968, + "learning_rate": 1.2029453665608446e-05, + "loss": 1.1715, + "step": 75162 + }, + { + "epoch": 0.9, + "grad_norm": 2.7814093062140475, + "learning_rate": 1.2028881640816272e-05, + "loss": 1.2353, + "step": 75165 + }, + { + "epoch": 0.9, + "grad_norm": 8.781222517677449, + "learning_rate": 1.202830960910026e-05, + "loss": 1.295, + "step": 75168 + }, + { + "epoch": 0.9, + "grad_norm": 7.174353005286557, + "learning_rate": 1.2027737570462353e-05, + "loss": 1.2135, + "step": 75171 + }, + { + "epoch": 0.9, + "grad_norm": 13.695436935579552, + "learning_rate": 1.2027165524904511e-05, + "loss": 1.5446, + "step": 75174 + }, + { + "epoch": 0.9, + "grad_norm": 23.998989054700047, + "learning_rate": 1.2026593472428683e-05, + "loss": 1.6656, + "step": 75177 + }, + { + "epoch": 0.9, + "grad_norm": 15.115502660324921, + "learning_rate": 1.2026021413036822e-05, + "loss": 1.2771, + "step": 75180 + }, + { + "epoch": 0.9, + "grad_norm": 4.396823539032373, + "learning_rate": 1.202544934673088e-05, + "loss": 1.3929, + "step": 75183 + }, + { + "epoch": 0.9, + "grad_norm": 11.797869510919748, + "learning_rate": 1.2024877273512805e-05, + "loss": 1.308, + "step": 75186 + }, + { + "epoch": 0.9, + "grad_norm": 15.452165575403548, + "learning_rate": 1.2024305193384555e-05, + "loss": 1.6692, + "step": 75189 + }, + { + "epoch": 0.9, + "grad_norm": 34.38752676080205, + "learning_rate": 1.2023733106348082e-05, + "loss": 1.5554, + "step": 75192 + }, + { + "epoch": 0.9, + "grad_norm": 12.623329552351485, + "learning_rate": 1.2023161012405338e-05, + "loss": 1.2902, + "step": 75195 + }, + { + "epoch": 0.9, + "grad_norm": 15.533511330055337, + "learning_rate": 1.202258891155827e-05, + "loss": 1.2742, + "step": 75198 + }, + { + "epoch": 0.9, + "grad_norm": 34.59004069797991, + "learning_rate": 1.202201680380884e-05, + "loss": 1.7581, + "step": 75201 + }, + { + "epoch": 0.9, + "grad_norm": 6.2560693916409775, + "learning_rate": 1.2021444689158993e-05, + "loss": 1.0601, + "step": 75204 + }, + { + "epoch": 0.9, + "grad_norm": 8.509938445714228, + "learning_rate": 1.2020872567610682e-05, + "loss": 1.2811, + "step": 75207 + }, + { + "epoch": 0.9, + "grad_norm": 2.5440300430897262, + "learning_rate": 1.2020300439165859e-05, + "loss": 1.157, + "step": 75210 + }, + { + "epoch": 0.9, + "grad_norm": 12.417808391084607, + "learning_rate": 1.201972830382648e-05, + "loss": 1.6045, + "step": 75213 + }, + { + "epoch": 0.9, + "grad_norm": 8.32784093233093, + "learning_rate": 1.2019156161594499e-05, + "loss": 1.4992, + "step": 75216 + }, + { + "epoch": 0.9, + "grad_norm": 19.795740656281062, + "learning_rate": 1.201858401247186e-05, + "loss": 1.4761, + "step": 75219 + }, + { + "epoch": 0.9, + "grad_norm": 9.221104213089834, + "learning_rate": 1.2018011856460525e-05, + "loss": 1.6838, + "step": 75222 + }, + { + "epoch": 0.9, + "grad_norm": 3.5997627606589617, + "learning_rate": 1.201743969356244e-05, + "loss": 1.5683, + "step": 75225 + }, + { + "epoch": 0.9, + "grad_norm": 12.74064318631087, + "learning_rate": 1.2016867523779565e-05, + "loss": 1.197, + "step": 75228 + }, + { + "epoch": 0.9, + "grad_norm": 6.375024113409684, + "learning_rate": 1.2016295347113843e-05, + "loss": 1.3313, + "step": 75231 + }, + { + "epoch": 0.9, + "grad_norm": 18.507137656269567, + "learning_rate": 1.2015723163567233e-05, + "loss": 1.6092, + "step": 75234 + }, + { + "epoch": 0.9, + "grad_norm": 23.896309037032655, + "learning_rate": 1.2015150973141682e-05, + "loss": 1.5116, + "step": 75237 + }, + { + "epoch": 0.9, + "grad_norm": 11.987316923856019, + "learning_rate": 1.201457877583915e-05, + "loss": 1.1419, + "step": 75240 + }, + { + "epoch": 0.9, + "grad_norm": 7.198962444746753, + "learning_rate": 1.2014006571661584e-05, + "loss": 1.4496, + "step": 75243 + }, + { + "epoch": 0.9, + "grad_norm": 65.40770684133422, + "learning_rate": 1.201343436061094e-05, + "loss": 1.4914, + "step": 75246 + }, + { + "epoch": 0.9, + "grad_norm": 6.372254055051533, + "learning_rate": 1.2012862142689174e-05, + "loss": 1.1837, + "step": 75249 + }, + { + "epoch": 0.9, + "grad_norm": 21.83776008177806, + "learning_rate": 1.201228991789823e-05, + "loss": 1.6014, + "step": 75252 + }, + { + "epoch": 0.9, + "grad_norm": 10.79300411854074, + "learning_rate": 1.2011717686240068e-05, + "loss": 1.311, + "step": 75255 + }, + { + "epoch": 0.9, + "grad_norm": 65.52003934581427, + "learning_rate": 1.2011145447716634e-05, + "loss": 1.1385, + "step": 75258 + }, + { + "epoch": 0.9, + "grad_norm": 11.270949082887688, + "learning_rate": 1.2010573202329889e-05, + "loss": 1.37, + "step": 75261 + }, + { + "epoch": 0.91, + "grad_norm": 46.13373009838014, + "learning_rate": 1.201000095008178e-05, + "loss": 1.3455, + "step": 75264 + }, + { + "epoch": 0.91, + "grad_norm": 3.205341392519245, + "learning_rate": 1.2009428690974261e-05, + "loss": 1.3711, + "step": 75267 + }, + { + "epoch": 0.91, + "grad_norm": 33.38820798245061, + "learning_rate": 1.200885642500929e-05, + "loss": 1.6109, + "step": 75270 + }, + { + "epoch": 0.91, + "grad_norm": 7.229234829161961, + "learning_rate": 1.200828415218881e-05, + "loss": 1.4691, + "step": 75273 + }, + { + "epoch": 0.91, + "grad_norm": 5.904080781890889, + "learning_rate": 1.2007711872514782e-05, + "loss": 1.0652, + "step": 75276 + }, + { + "epoch": 0.91, + "grad_norm": 9.128536167253962, + "learning_rate": 1.2007139585989159e-05, + "loss": 1.5413, + "step": 75279 + }, + { + "epoch": 0.91, + "grad_norm": 27.133721631033325, + "learning_rate": 1.2006567292613889e-05, + "loss": 1.4129, + "step": 75282 + }, + { + "epoch": 0.91, + "grad_norm": 3.9223479969529467, + "learning_rate": 1.2005994992390926e-05, + "loss": 1.2486, + "step": 75285 + }, + { + "epoch": 0.91, + "grad_norm": 23.552516020617382, + "learning_rate": 1.2005422685322225e-05, + "loss": 1.2482, + "step": 75288 + }, + { + "epoch": 0.91, + "grad_norm": 5.6398924950639255, + "learning_rate": 1.2004850371409742e-05, + "loss": 1.4078, + "step": 75291 + }, + { + "epoch": 0.91, + "grad_norm": 2.8931730759786234, + "learning_rate": 1.2004278050655422e-05, + "loss": 1.1687, + "step": 75294 + }, + { + "epoch": 0.91, + "grad_norm": 32.926403612636086, + "learning_rate": 1.2003705723061227e-05, + "loss": 1.2035, + "step": 75297 + }, + { + "epoch": 0.91, + "grad_norm": 5.870954285762647, + "learning_rate": 1.2003133388629107e-05, + "loss": 1.4752, + "step": 75300 + }, + { + "epoch": 0.91, + "grad_norm": 16.822465251553485, + "learning_rate": 1.2002561047361013e-05, + "loss": 1.7889, + "step": 75303 + }, + { + "epoch": 0.91, + "grad_norm": 8.896422669577307, + "learning_rate": 1.2001988699258895e-05, + "loss": 1.368, + "step": 75306 + }, + { + "epoch": 0.91, + "grad_norm": 9.465631794644723, + "learning_rate": 1.2001416344324716e-05, + "loss": 1.4078, + "step": 75309 + }, + { + "epoch": 0.91, + "grad_norm": 25.336548197202376, + "learning_rate": 1.2000843982560424e-05, + "loss": 1.1174, + "step": 75312 + }, + { + "epoch": 0.91, + "grad_norm": 7.320723223822847, + "learning_rate": 1.2000271613967971e-05, + "loss": 1.3719, + "step": 75315 + }, + { + "epoch": 0.91, + "grad_norm": 24.09648933690968, + "learning_rate": 1.1999699238549306e-05, + "loss": 1.491, + "step": 75318 + }, + { + "epoch": 0.91, + "grad_norm": 9.796469240414636, + "learning_rate": 1.1999126856306396e-05, + "loss": 1.0595, + "step": 75321 + }, + { + "epoch": 0.91, + "grad_norm": 6.139502448312938, + "learning_rate": 1.1998554467241183e-05, + "loss": 1.234, + "step": 75324 + }, + { + "epoch": 0.91, + "grad_norm": 8.986386686431235, + "learning_rate": 1.1997982071355623e-05, + "loss": 1.5582, + "step": 75327 + }, + { + "epoch": 0.91, + "grad_norm": 30.523858722662318, + "learning_rate": 1.1997409668651669e-05, + "loss": 1.2376, + "step": 75330 + }, + { + "epoch": 0.91, + "grad_norm": 23.255167494865375, + "learning_rate": 1.1996837259131278e-05, + "loss": 1.419, + "step": 75333 + }, + { + "epoch": 0.91, + "grad_norm": 15.158894818615563, + "learning_rate": 1.1996264842796399e-05, + "loss": 1.75, + "step": 75336 + }, + { + "epoch": 0.91, + "grad_norm": 22.018595912284873, + "learning_rate": 1.1995692419648985e-05, + "loss": 1.3513, + "step": 75339 + }, + { + "epoch": 0.91, + "grad_norm": 14.601992972131105, + "learning_rate": 1.1995119989690995e-05, + "loss": 1.425, + "step": 75342 + }, + { + "epoch": 0.91, + "grad_norm": 11.660701630751243, + "learning_rate": 1.1994547552924379e-05, + "loss": 1.1415, + "step": 75345 + }, + { + "epoch": 0.91, + "grad_norm": 8.373751339402999, + "learning_rate": 1.1993975109351088e-05, + "loss": 1.4553, + "step": 75348 + }, + { + "epoch": 0.91, + "grad_norm": 14.044197348500061, + "learning_rate": 1.1993402658973081e-05, + "loss": 1.6799, + "step": 75351 + }, + { + "epoch": 0.91, + "grad_norm": 16.759327852958428, + "learning_rate": 1.1992830201792307e-05, + "loss": 1.2489, + "step": 75354 + }, + { + "epoch": 0.91, + "grad_norm": 6.572008562654409, + "learning_rate": 1.1992257737810725e-05, + "loss": 1.379, + "step": 75357 + }, + { + "epoch": 0.91, + "grad_norm": 11.736651603062462, + "learning_rate": 1.199168526703028e-05, + "loss": 1.7141, + "step": 75360 + }, + { + "epoch": 0.91, + "grad_norm": 4.934151495275098, + "learning_rate": 1.1991112789452932e-05, + "loss": 1.1338, + "step": 75363 + }, + { + "epoch": 0.91, + "grad_norm": 20.11903397895261, + "learning_rate": 1.1990540305080633e-05, + "loss": 1.1257, + "step": 75366 + }, + { + "epoch": 0.91, + "grad_norm": 5.716681728046648, + "learning_rate": 1.198996781391534e-05, + "loss": 1.0995, + "step": 75369 + }, + { + "epoch": 0.91, + "grad_norm": 36.53396681929097, + "learning_rate": 1.1989395315959e-05, + "loss": 1.2573, + "step": 75372 + }, + { + "epoch": 0.91, + "grad_norm": 15.003328158775707, + "learning_rate": 1.1988822811213572e-05, + "loss": 1.33, + "step": 75375 + }, + { + "epoch": 0.91, + "grad_norm": 3.2493042972167396, + "learning_rate": 1.1988250299681007e-05, + "loss": 1.1987, + "step": 75378 + }, + { + "epoch": 0.91, + "grad_norm": 15.562384762895402, + "learning_rate": 1.1987677781363261e-05, + "loss": 1.4081, + "step": 75381 + }, + { + "epoch": 0.91, + "grad_norm": 11.048696027466962, + "learning_rate": 1.1987105256262286e-05, + "loss": 1.4548, + "step": 75384 + }, + { + "epoch": 0.91, + "grad_norm": 13.991122827710365, + "learning_rate": 1.1986532724380037e-05, + "loss": 1.2345, + "step": 75387 + }, + { + "epoch": 0.91, + "grad_norm": 18.78603855170711, + "learning_rate": 1.1985960185718468e-05, + "loss": 1.3696, + "step": 75390 + }, + { + "epoch": 0.91, + "grad_norm": 5.250502066154317, + "learning_rate": 1.1985387640279532e-05, + "loss": 1.4295, + "step": 75393 + }, + { + "epoch": 0.91, + "grad_norm": 25.107828259104963, + "learning_rate": 1.198481508806518e-05, + "loss": 1.8223, + "step": 75396 + }, + { + "epoch": 0.91, + "grad_norm": 7.1131840944444145, + "learning_rate": 1.1984242529077373e-05, + "loss": 1.2821, + "step": 75399 + }, + { + "epoch": 0.91, + "grad_norm": 10.73126528577502, + "learning_rate": 1.1983669963318059e-05, + "loss": 1.4085, + "step": 75402 + }, + { + "epoch": 0.91, + "grad_norm": 11.875288308384677, + "learning_rate": 1.1983097390789193e-05, + "loss": 1.3428, + "step": 75405 + }, + { + "epoch": 0.91, + "grad_norm": 28.617000425829133, + "learning_rate": 1.1982524811492732e-05, + "loss": 0.9345, + "step": 75408 + }, + { + "epoch": 0.91, + "grad_norm": 18.44491879680986, + "learning_rate": 1.198195222543063e-05, + "loss": 1.5995, + "step": 75411 + }, + { + "epoch": 0.91, + "grad_norm": 17.267473793832004, + "learning_rate": 1.1981379632604835e-05, + "loss": 1.4735, + "step": 75414 + }, + { + "epoch": 0.91, + "grad_norm": 15.719524815633815, + "learning_rate": 1.1980807033017305e-05, + "loss": 1.081, + "step": 75417 + }, + { + "epoch": 0.91, + "grad_norm": 58.36995441192338, + "learning_rate": 1.1980234426669997e-05, + "loss": 1.143, + "step": 75420 + }, + { + "epoch": 0.91, + "grad_norm": 5.4156980625271816, + "learning_rate": 1.197966181356486e-05, + "loss": 1.5857, + "step": 75423 + }, + { + "epoch": 0.91, + "grad_norm": 20.7023910054278, + "learning_rate": 1.197908919370385e-05, + "loss": 1.7028, + "step": 75426 + }, + { + "epoch": 0.91, + "grad_norm": 14.590720766277526, + "learning_rate": 1.1978516567088922e-05, + "loss": 1.3551, + "step": 75429 + }, + { + "epoch": 0.91, + "grad_norm": 15.131186094484661, + "learning_rate": 1.1977943933722031e-05, + "loss": 0.8893, + "step": 75432 + }, + { + "epoch": 0.91, + "grad_norm": 7.514196549488626, + "learning_rate": 1.1977371293605127e-05, + "loss": 1.3178, + "step": 75435 + }, + { + "epoch": 0.91, + "grad_norm": 3.7767861595555376, + "learning_rate": 1.1976798646740169e-05, + "loss": 1.5832, + "step": 75438 + }, + { + "epoch": 0.91, + "grad_norm": 7.9082025186305325, + "learning_rate": 1.197622599312911e-05, + "loss": 1.3664, + "step": 75441 + }, + { + "epoch": 0.91, + "grad_norm": 17.6117166036375, + "learning_rate": 1.1975653332773903e-05, + "loss": 1.2361, + "step": 75444 + }, + { + "epoch": 0.91, + "grad_norm": 14.233757777187085, + "learning_rate": 1.1975080665676502e-05, + "loss": 1.2266, + "step": 75447 + }, + { + "epoch": 0.91, + "grad_norm": 4.646995999450765, + "learning_rate": 1.1974507991838862e-05, + "loss": 1.416, + "step": 75450 + }, + { + "epoch": 0.91, + "grad_norm": 6.4077218889641285, + "learning_rate": 1.1973935311262939e-05, + "loss": 1.7086, + "step": 75453 + }, + { + "epoch": 0.91, + "grad_norm": 3.253136998403617, + "learning_rate": 1.1973362623950682e-05, + "loss": 1.3734, + "step": 75456 + }, + { + "epoch": 0.91, + "grad_norm": 11.885588888628615, + "learning_rate": 1.1972789929904054e-05, + "loss": 1.1432, + "step": 75459 + }, + { + "epoch": 0.91, + "grad_norm": 4.599020510129688, + "learning_rate": 1.1972217229125003e-05, + "loss": 1.6978, + "step": 75462 + }, + { + "epoch": 0.91, + "grad_norm": 9.22681515465336, + "learning_rate": 1.1971644521615484e-05, + "loss": 1.3082, + "step": 75465 + }, + { + "epoch": 0.91, + "grad_norm": 35.90253882167688, + "learning_rate": 1.197107180737745e-05, + "loss": 1.2776, + "step": 75468 + }, + { + "epoch": 0.91, + "grad_norm": 10.300246420947172, + "learning_rate": 1.1970499086412862e-05, + "loss": 1.2328, + "step": 75471 + }, + { + "epoch": 0.91, + "grad_norm": 16.84577500096588, + "learning_rate": 1.1969926358723669e-05, + "loss": 1.518, + "step": 75474 + }, + { + "epoch": 0.91, + "grad_norm": 2.6192538069989277, + "learning_rate": 1.1969353624311828e-05, + "loss": 1.3082, + "step": 75477 + }, + { + "epoch": 0.91, + "grad_norm": 14.662471866024669, + "learning_rate": 1.1968780883179289e-05, + "loss": 1.4385, + "step": 75480 + }, + { + "epoch": 0.91, + "grad_norm": 13.099185526890846, + "learning_rate": 1.1968208135328015e-05, + "loss": 1.2854, + "step": 75483 + }, + { + "epoch": 0.91, + "grad_norm": 5.874766638981322, + "learning_rate": 1.1967635380759952e-05, + "loss": 1.4697, + "step": 75486 + }, + { + "epoch": 0.91, + "grad_norm": 7.0445300931303585, + "learning_rate": 1.1967062619477058e-05, + "loss": 1.5398, + "step": 75489 + }, + { + "epoch": 0.91, + "grad_norm": 5.189718381727047, + "learning_rate": 1.1966489851481291e-05, + "loss": 1.0565, + "step": 75492 + }, + { + "epoch": 0.91, + "grad_norm": 10.075665980425597, + "learning_rate": 1.19659170767746e-05, + "loss": 1.5804, + "step": 75495 + }, + { + "epoch": 0.91, + "grad_norm": 8.053978107622733, + "learning_rate": 1.1965344295358943e-05, + "loss": 1.0357, + "step": 75498 + }, + { + "epoch": 0.91, + "grad_norm": 11.748756789854122, + "learning_rate": 1.1964771507236272e-05, + "loss": 1.3409, + "step": 75501 + }, + { + "epoch": 0.91, + "grad_norm": 57.85639208497903, + "learning_rate": 1.1964198712408545e-05, + "loss": 1.1128, + "step": 75504 + }, + { + "epoch": 0.91, + "grad_norm": 41.53655975759362, + "learning_rate": 1.1963625910877717e-05, + "loss": 1.5003, + "step": 75507 + }, + { + "epoch": 0.91, + "grad_norm": 4.651203434303718, + "learning_rate": 1.1963053102645737e-05, + "loss": 1.1667, + "step": 75510 + }, + { + "epoch": 0.91, + "grad_norm": 3.9603647623182145, + "learning_rate": 1.1962480287714566e-05, + "loss": 1.5051, + "step": 75513 + }, + { + "epoch": 0.91, + "grad_norm": 19.77939776872543, + "learning_rate": 1.1961907466086158e-05, + "loss": 1.5, + "step": 75516 + }, + { + "epoch": 0.91, + "grad_norm": 8.265350355977503, + "learning_rate": 1.1961334637762465e-05, + "loss": 1.6929, + "step": 75519 + }, + { + "epoch": 0.91, + "grad_norm": 21.25457128036211, + "learning_rate": 1.1960761802745444e-05, + "loss": 1.1376, + "step": 75522 + }, + { + "epoch": 0.91, + "grad_norm": 8.94066005976238, + "learning_rate": 1.196018896103705e-05, + "loss": 1.2633, + "step": 75525 + }, + { + "epoch": 0.91, + "grad_norm": 3.8108697485871645, + "learning_rate": 1.1959616112639236e-05, + "loss": 1.2161, + "step": 75528 + }, + { + "epoch": 0.91, + "grad_norm": 5.5533905389072515, + "learning_rate": 1.1959043257553957e-05, + "loss": 1.2469, + "step": 75531 + }, + { + "epoch": 0.91, + "grad_norm": 8.965137129457222, + "learning_rate": 1.195847039578317e-05, + "loss": 1.5135, + "step": 75534 + }, + { + "epoch": 0.91, + "grad_norm": 14.1361936724784, + "learning_rate": 1.195789752732883e-05, + "loss": 1.498, + "step": 75537 + }, + { + "epoch": 0.91, + "grad_norm": 4.413699923886649, + "learning_rate": 1.195732465219289e-05, + "loss": 1.5592, + "step": 75540 + }, + { + "epoch": 0.91, + "grad_norm": 18.535731921007148, + "learning_rate": 1.1956751770377304e-05, + "loss": 1.5813, + "step": 75543 + }, + { + "epoch": 0.91, + "grad_norm": 14.30820887994323, + "learning_rate": 1.1956178881884034e-05, + "loss": 1.217, + "step": 75546 + }, + { + "epoch": 0.91, + "grad_norm": 6.9456491095517645, + "learning_rate": 1.1955605986715026e-05, + "loss": 1.3762, + "step": 75549 + }, + { + "epoch": 0.91, + "grad_norm": 6.945259215014207, + "learning_rate": 1.195503308487224e-05, + "loss": 1.0595, + "step": 75552 + }, + { + "epoch": 0.91, + "grad_norm": 2.7196466248498834, + "learning_rate": 1.195446017635763e-05, + "loss": 1.1637, + "step": 75555 + }, + { + "epoch": 0.91, + "grad_norm": 3.695153090822849, + "learning_rate": 1.1953887261173153e-05, + "loss": 1.1163, + "step": 75558 + }, + { + "epoch": 0.91, + "grad_norm": 25.807691230439374, + "learning_rate": 1.1953314339320761e-05, + "loss": 1.8169, + "step": 75561 + }, + { + "epoch": 0.91, + "grad_norm": 3.0802358160693935, + "learning_rate": 1.1952741410802411e-05, + "loss": 1.4796, + "step": 75564 + }, + { + "epoch": 0.91, + "grad_norm": 6.405432247599337, + "learning_rate": 1.1952168475620058e-05, + "loss": 1.6151, + "step": 75567 + }, + { + "epoch": 0.91, + "grad_norm": 27.143138458904417, + "learning_rate": 1.1951595533775658e-05, + "loss": 1.3023, + "step": 75570 + }, + { + "epoch": 0.91, + "grad_norm": 10.970497516715046, + "learning_rate": 1.1951022585271165e-05, + "loss": 1.5279, + "step": 75573 + }, + { + "epoch": 0.91, + "grad_norm": 9.213727878169633, + "learning_rate": 1.1950449630108534e-05, + "loss": 1.1918, + "step": 75576 + }, + { + "epoch": 0.91, + "grad_norm": 9.133178650473633, + "learning_rate": 1.1949876668289724e-05, + "loss": 1.1195, + "step": 75579 + }, + { + "epoch": 0.91, + "grad_norm": 7.029502537687345, + "learning_rate": 1.1949303699816684e-05, + "loss": 1.3272, + "step": 75582 + }, + { + "epoch": 0.91, + "grad_norm": 5.7221878339674666, + "learning_rate": 1.1948730724691373e-05, + "loss": 1.1226, + "step": 75585 + }, + { + "epoch": 0.91, + "grad_norm": 6.8349539243151, + "learning_rate": 1.1948157742915745e-05, + "loss": 1.3019, + "step": 75588 + }, + { + "epoch": 0.91, + "grad_norm": 14.74571806292541, + "learning_rate": 1.1947584754491759e-05, + "loss": 0.9899, + "step": 75591 + }, + { + "epoch": 0.91, + "grad_norm": 2.676400003838422, + "learning_rate": 1.1947011759421366e-05, + "loss": 1.5157, + "step": 75594 + }, + { + "epoch": 0.91, + "grad_norm": 2.9113197408541285, + "learning_rate": 1.1946438757706522e-05, + "loss": 1.6784, + "step": 75597 + }, + { + "epoch": 0.91, + "grad_norm": 10.389215317702174, + "learning_rate": 1.1945865749349189e-05, + "loss": 1.272, + "step": 75600 + }, + { + "epoch": 0.91, + "grad_norm": 8.343224468489634, + "learning_rate": 1.1945292734351311e-05, + "loss": 1.0545, + "step": 75603 + }, + { + "epoch": 0.91, + "grad_norm": 11.993891083031714, + "learning_rate": 1.1944719712714851e-05, + "loss": 1.3366, + "step": 75606 + }, + { + "epoch": 0.91, + "grad_norm": 12.535997995738748, + "learning_rate": 1.1944146684441765e-05, + "loss": 1.8318, + "step": 75609 + }, + { + "epoch": 0.91, + "grad_norm": 9.805124439696296, + "learning_rate": 1.1943573649534007e-05, + "loss": 1.2653, + "step": 75612 + }, + { + "epoch": 0.91, + "grad_norm": 7.43901043953444, + "learning_rate": 1.1943000607993528e-05, + "loss": 1.4873, + "step": 75615 + }, + { + "epoch": 0.91, + "grad_norm": 12.204670799962303, + "learning_rate": 1.194242755982229e-05, + "loss": 1.4871, + "step": 75618 + }, + { + "epoch": 0.91, + "grad_norm": 18.624646332573374, + "learning_rate": 1.194185450502225e-05, + "loss": 1.1803, + "step": 75621 + }, + { + "epoch": 0.91, + "grad_norm": 2.832568501421807, + "learning_rate": 1.1941281443595356e-05, + "loss": 1.7073, + "step": 75624 + }, + { + "epoch": 0.91, + "grad_norm": 8.039505956442182, + "learning_rate": 1.1940708375543568e-05, + "loss": 1.3015, + "step": 75627 + }, + { + "epoch": 0.91, + "grad_norm": 14.713035426045272, + "learning_rate": 1.1940135300868842e-05, + "loss": 1.3775, + "step": 75630 + }, + { + "epoch": 0.91, + "grad_norm": 8.90701584163776, + "learning_rate": 1.1939562219573135e-05, + "loss": 1.2858, + "step": 75633 + }, + { + "epoch": 0.91, + "grad_norm": 10.52982773056225, + "learning_rate": 1.1938989131658399e-05, + "loss": 1.5439, + "step": 75636 + }, + { + "epoch": 0.91, + "grad_norm": 25.220235931724293, + "learning_rate": 1.193841603712659e-05, + "loss": 1.3169, + "step": 75639 + }, + { + "epoch": 0.91, + "grad_norm": 3.94546635246013, + "learning_rate": 1.1937842935979666e-05, + "loss": 1.2752, + "step": 75642 + }, + { + "epoch": 0.91, + "grad_norm": 9.575433503708629, + "learning_rate": 1.1937269828219583e-05, + "loss": 1.3541, + "step": 75645 + }, + { + "epoch": 0.91, + "grad_norm": 16.666581230171367, + "learning_rate": 1.1936696713848297e-05, + "loss": 1.2658, + "step": 75648 + }, + { + "epoch": 0.91, + "grad_norm": 14.026118808203547, + "learning_rate": 1.1936123592867759e-05, + "loss": 1.4279, + "step": 75651 + }, + { + "epoch": 0.91, + "grad_norm": 5.077170894910626, + "learning_rate": 1.1935550465279934e-05, + "loss": 1.0656, + "step": 75654 + }, + { + "epoch": 0.91, + "grad_norm": 17.61057871233437, + "learning_rate": 1.1934977331086768e-05, + "loss": 1.4083, + "step": 75657 + }, + { + "epoch": 0.91, + "grad_norm": 28.129784939990195, + "learning_rate": 1.1934404190290224e-05, + "loss": 1.274, + "step": 75660 + }, + { + "epoch": 0.91, + "grad_norm": 8.594584485917924, + "learning_rate": 1.1933831042892252e-05, + "loss": 1.5284, + "step": 75663 + }, + { + "epoch": 0.91, + "grad_norm": 3.4096926889903387, + "learning_rate": 1.1933257888894815e-05, + "loss": 1.065, + "step": 75666 + }, + { + "epoch": 0.91, + "grad_norm": 6.467066494944799, + "learning_rate": 1.1932684728299861e-05, + "loss": 1.7152, + "step": 75669 + }, + { + "epoch": 0.91, + "grad_norm": 13.917779698514183, + "learning_rate": 1.1932111561109352e-05, + "loss": 1.076, + "step": 75672 + }, + { + "epoch": 0.91, + "grad_norm": 5.680105973516289, + "learning_rate": 1.1931538387325246e-05, + "loss": 1.1952, + "step": 75675 + }, + { + "epoch": 0.91, + "grad_norm": 6.217328857491271, + "learning_rate": 1.193096520694949e-05, + "loss": 1.0462, + "step": 75678 + }, + { + "epoch": 0.91, + "grad_norm": 19.610711976245984, + "learning_rate": 1.1930392019984047e-05, + "loss": 0.9689, + "step": 75681 + }, + { + "epoch": 0.91, + "grad_norm": 8.505480004328689, + "learning_rate": 1.192981882643087e-05, + "loss": 1.4628, + "step": 75684 + }, + { + "epoch": 0.91, + "grad_norm": 13.692573110544553, + "learning_rate": 1.1929245626291922e-05, + "loss": 1.2518, + "step": 75687 + }, + { + "epoch": 0.91, + "grad_norm": 41.628858957193565, + "learning_rate": 1.1928672419569148e-05, + "loss": 1.3193, + "step": 75690 + }, + { + "epoch": 0.91, + "grad_norm": 23.633854901409613, + "learning_rate": 1.1928099206264512e-05, + "loss": 1.1054, + "step": 75693 + }, + { + "epoch": 0.91, + "grad_norm": 14.58271072638862, + "learning_rate": 1.1927525986379966e-05, + "loss": 1.1587, + "step": 75696 + }, + { + "epoch": 0.91, + "grad_norm": 29.21290946579051, + "learning_rate": 1.192695275991747e-05, + "loss": 1.1862, + "step": 75699 + }, + { + "epoch": 0.91, + "grad_norm": 7.734413787184924, + "learning_rate": 1.1926379526878977e-05, + "loss": 1.6146, + "step": 75702 + }, + { + "epoch": 0.91, + "grad_norm": 3.5136427486851685, + "learning_rate": 1.1925806287266443e-05, + "loss": 1.6092, + "step": 75705 + }, + { + "epoch": 0.91, + "grad_norm": 11.023359509568422, + "learning_rate": 1.1925233041081832e-05, + "loss": 1.2645, + "step": 75708 + }, + { + "epoch": 0.91, + "grad_norm": 8.753598481683678, + "learning_rate": 1.1924659788327088e-05, + "loss": 1.116, + "step": 75711 + }, + { + "epoch": 0.91, + "grad_norm": 5.675746349605047, + "learning_rate": 1.1924086529004178e-05, + "loss": 1.6027, + "step": 75714 + }, + { + "epoch": 0.91, + "grad_norm": 2.521323725416252, + "learning_rate": 1.1923513263115051e-05, + "loss": 1.2605, + "step": 75717 + }, + { + "epoch": 0.91, + "grad_norm": 14.813800802277106, + "learning_rate": 1.1922939990661667e-05, + "loss": 1.0739, + "step": 75720 + }, + { + "epoch": 0.91, + "grad_norm": 6.105083991081752, + "learning_rate": 1.1922366711645979e-05, + "loss": 1.4134, + "step": 75723 + }, + { + "epoch": 0.91, + "grad_norm": 9.782096083952931, + "learning_rate": 1.1921793426069947e-05, + "loss": 0.9733, + "step": 75726 + }, + { + "epoch": 0.91, + "grad_norm": 4.783157692478318, + "learning_rate": 1.192122013393553e-05, + "loss": 1.5112, + "step": 75729 + }, + { + "epoch": 0.91, + "grad_norm": 10.860191096737823, + "learning_rate": 1.1920646835244676e-05, + "loss": 1.7318, + "step": 75732 + }, + { + "epoch": 0.91, + "grad_norm": 5.038404754287034, + "learning_rate": 1.1920073529999348e-05, + "loss": 1.1355, + "step": 75735 + }, + { + "epoch": 0.91, + "grad_norm": 40.441426500447214, + "learning_rate": 1.1919500218201501e-05, + "loss": 1.3352, + "step": 75738 + }, + { + "epoch": 0.91, + "grad_norm": 13.021129069336078, + "learning_rate": 1.1918926899853092e-05, + "loss": 1.2122, + "step": 75741 + }, + { + "epoch": 0.91, + "grad_norm": 4.634254729249175, + "learning_rate": 1.1918353574956072e-05, + "loss": 1.3621, + "step": 75744 + }, + { + "epoch": 0.91, + "grad_norm": 17.36807256722545, + "learning_rate": 1.1917780243512408e-05, + "loss": 0.984, + "step": 75747 + }, + { + "epoch": 0.91, + "grad_norm": 148.98176234133533, + "learning_rate": 1.1917206905524048e-05, + "loss": 1.3457, + "step": 75750 + }, + { + "epoch": 0.91, + "grad_norm": 12.880119440485823, + "learning_rate": 1.1916633560992949e-05, + "loss": 1.1704, + "step": 75753 + }, + { + "epoch": 0.91, + "grad_norm": 7.429546727768349, + "learning_rate": 1.1916060209921075e-05, + "loss": 1.2844, + "step": 75756 + }, + { + "epoch": 0.91, + "grad_norm": 14.658440376246324, + "learning_rate": 1.1915486852310374e-05, + "loss": 1.3081, + "step": 75759 + }, + { + "epoch": 0.91, + "grad_norm": 5.6554074177289495, + "learning_rate": 1.191491348816281e-05, + "loss": 1.1723, + "step": 75762 + }, + { + "epoch": 0.91, + "grad_norm": 8.531081522701168, + "learning_rate": 1.1914340117480332e-05, + "loss": 1.1037, + "step": 75765 + }, + { + "epoch": 0.91, + "grad_norm": 10.723972667987644, + "learning_rate": 1.1913766740264903e-05, + "loss": 1.2317, + "step": 75768 + }, + { + "epoch": 0.91, + "grad_norm": 15.736072749143867, + "learning_rate": 1.1913193356518477e-05, + "loss": 1.3386, + "step": 75771 + }, + { + "epoch": 0.91, + "grad_norm": 9.68039806106847, + "learning_rate": 1.1912619966243013e-05, + "loss": 1.2777, + "step": 75774 + }, + { + "epoch": 0.91, + "grad_norm": 9.083885889193326, + "learning_rate": 1.1912046569440463e-05, + "loss": 1.288, + "step": 75777 + }, + { + "epoch": 0.91, + "grad_norm": 17.216062189893563, + "learning_rate": 1.1911473166112789e-05, + "loss": 1.384, + "step": 75780 + }, + { + "epoch": 0.91, + "grad_norm": 38.52527252531633, + "learning_rate": 1.1910899756261944e-05, + "loss": 1.3358, + "step": 75783 + }, + { + "epoch": 0.91, + "grad_norm": 5.682217552508422, + "learning_rate": 1.1910326339889884e-05, + "loss": 1.3148, + "step": 75786 + }, + { + "epoch": 0.91, + "grad_norm": 14.160304465745869, + "learning_rate": 1.1909752916998571e-05, + "loss": 1.5019, + "step": 75789 + }, + { + "epoch": 0.91, + "grad_norm": 5.04505902857672, + "learning_rate": 1.1909179487589961e-05, + "loss": 1.2986, + "step": 75792 + }, + { + "epoch": 0.91, + "grad_norm": 14.581925399359193, + "learning_rate": 1.1908606051666009e-05, + "loss": 1.268, + "step": 75795 + }, + { + "epoch": 0.91, + "grad_norm": 6.749591252017042, + "learning_rate": 1.1908032609228669e-05, + "loss": 1.0739, + "step": 75798 + }, + { + "epoch": 0.91, + "grad_norm": 14.437146869261268, + "learning_rate": 1.1907459160279901e-05, + "loss": 1.6384, + "step": 75801 + }, + { + "epoch": 0.91, + "grad_norm": 23.96059355939532, + "learning_rate": 1.1906885704821667e-05, + "loss": 1.7201, + "step": 75804 + }, + { + "epoch": 0.91, + "grad_norm": 25.219897876141218, + "learning_rate": 1.1906312242855912e-05, + "loss": 1.1992, + "step": 75807 + }, + { + "epoch": 0.91, + "grad_norm": 34.8843261203895, + "learning_rate": 1.1905738774384606e-05, + "loss": 1.3278, + "step": 75810 + }, + { + "epoch": 0.91, + "grad_norm": 15.031805650939244, + "learning_rate": 1.1905165299409696e-05, + "loss": 1.1545, + "step": 75813 + }, + { + "epoch": 0.91, + "grad_norm": 5.144789638618342, + "learning_rate": 1.1904591817933147e-05, + "loss": 1.2653, + "step": 75816 + }, + { + "epoch": 0.91, + "grad_norm": 8.548224853598166, + "learning_rate": 1.1904018329956908e-05, + "loss": 1.0787, + "step": 75819 + }, + { + "epoch": 0.91, + "grad_norm": 32.30624085477884, + "learning_rate": 1.190344483548294e-05, + "loss": 1.647, + "step": 75822 + }, + { + "epoch": 0.91, + "grad_norm": 22.466254394444125, + "learning_rate": 1.1902871334513204e-05, + "loss": 1.3842, + "step": 75825 + }, + { + "epoch": 0.91, + "grad_norm": 29.19602356074145, + "learning_rate": 1.1902297827049654e-05, + "loss": 1.3233, + "step": 75828 + }, + { + "epoch": 0.91, + "grad_norm": 13.577005245147468, + "learning_rate": 1.190172431309424e-05, + "loss": 1.2447, + "step": 75831 + }, + { + "epoch": 0.91, + "grad_norm": 9.673362363757997, + "learning_rate": 1.1901150792648933e-05, + "loss": 1.0612, + "step": 75834 + }, + { + "epoch": 0.91, + "grad_norm": 18.639570907845208, + "learning_rate": 1.190057726571568e-05, + "loss": 1.284, + "step": 75837 + }, + { + "epoch": 0.91, + "grad_norm": 6.61393786405183, + "learning_rate": 1.1900003732296442e-05, + "loss": 1.3493, + "step": 75840 + }, + { + "epoch": 0.91, + "grad_norm": 13.502663485046803, + "learning_rate": 1.1899430192393175e-05, + "loss": 1.4325, + "step": 75843 + }, + { + "epoch": 0.91, + "grad_norm": 24.496694660950276, + "learning_rate": 1.189885664600784e-05, + "loss": 1.789, + "step": 75846 + }, + { + "epoch": 0.91, + "grad_norm": 23.568470725132524, + "learning_rate": 1.1898283093142388e-05, + "loss": 1.4769, + "step": 75849 + }, + { + "epoch": 0.91, + "grad_norm": 6.764370650180572, + "learning_rate": 1.1897709533798779e-05, + "loss": 1.2772, + "step": 75852 + }, + { + "epoch": 0.91, + "grad_norm": 10.651248509426413, + "learning_rate": 1.1897135967978972e-05, + "loss": 1.3302, + "step": 75855 + }, + { + "epoch": 0.91, + "grad_norm": 9.266851053192626, + "learning_rate": 1.1896562395684926e-05, + "loss": 1.3188, + "step": 75858 + }, + { + "epoch": 0.91, + "grad_norm": 14.456627083554135, + "learning_rate": 1.189598881691859e-05, + "loss": 1.5808, + "step": 75861 + }, + { + "epoch": 0.91, + "grad_norm": 4.417352738851542, + "learning_rate": 1.189541523168193e-05, + "loss": 1.1363, + "step": 75864 + }, + { + "epoch": 0.91, + "grad_norm": 5.492888639176051, + "learning_rate": 1.1894841639976903e-05, + "loss": 1.2346, + "step": 75867 + }, + { + "epoch": 0.91, + "grad_norm": 10.234052288126824, + "learning_rate": 1.1894268041805462e-05, + "loss": 1.2261, + "step": 75870 + }, + { + "epoch": 0.91, + "grad_norm": 45.97450811890815, + "learning_rate": 1.1893694437169565e-05, + "loss": 1.7623, + "step": 75873 + }, + { + "epoch": 0.91, + "grad_norm": 5.244020181477392, + "learning_rate": 1.1893120826071172e-05, + "loss": 1.4417, + "step": 75876 + }, + { + "epoch": 0.91, + "grad_norm": 15.940102784577626, + "learning_rate": 1.1892547208512243e-05, + "loss": 1.1012, + "step": 75879 + }, + { + "epoch": 0.91, + "grad_norm": 6.713939284943749, + "learning_rate": 1.1891973584494726e-05, + "loss": 1.0711, + "step": 75882 + }, + { + "epoch": 0.91, + "grad_norm": 40.920680404186676, + "learning_rate": 1.1891399954020588e-05, + "loss": 1.6007, + "step": 75885 + }, + { + "epoch": 0.91, + "grad_norm": 27.422080770319226, + "learning_rate": 1.1890826317091784e-05, + "loss": 1.1955, + "step": 75888 + }, + { + "epoch": 0.91, + "grad_norm": 7.639955893506465, + "learning_rate": 1.189025267371027e-05, + "loss": 1.4198, + "step": 75891 + }, + { + "epoch": 0.91, + "grad_norm": 9.48960884900334, + "learning_rate": 1.1889679023878003e-05, + "loss": 1.1038, + "step": 75894 + }, + { + "epoch": 0.91, + "grad_norm": 5.860132321435421, + "learning_rate": 1.1889105367596942e-05, + "loss": 1.0269, + "step": 75897 + }, + { + "epoch": 0.91, + "grad_norm": 42.147085439109276, + "learning_rate": 1.1888531704869046e-05, + "loss": 1.5566, + "step": 75900 + }, + { + "epoch": 0.91, + "grad_norm": 97.77629217425995, + "learning_rate": 1.1887958035696274e-05, + "loss": 1.3633, + "step": 75903 + }, + { + "epoch": 0.91, + "grad_norm": 13.600950066506169, + "learning_rate": 1.1887384360080576e-05, + "loss": 1.5507, + "step": 75906 + }, + { + "epoch": 0.91, + "grad_norm": 22.590030653738122, + "learning_rate": 1.188681067802392e-05, + "loss": 1.568, + "step": 75909 + }, + { + "epoch": 0.91, + "grad_norm": 65.6398627517276, + "learning_rate": 1.1886236989528256e-05, + "loss": 1.1844, + "step": 75912 + }, + { + "epoch": 0.91, + "grad_norm": 32.55291769406154, + "learning_rate": 1.1885663294595544e-05, + "loss": 1.4394, + "step": 75915 + }, + { + "epoch": 0.91, + "grad_norm": 8.68686108609956, + "learning_rate": 1.1885089593227745e-05, + "loss": 1.3704, + "step": 75918 + }, + { + "epoch": 0.91, + "grad_norm": 6.067461027278127, + "learning_rate": 1.1884515885426813e-05, + "loss": 1.4842, + "step": 75921 + }, + { + "epoch": 0.91, + "grad_norm": 4.287705029222466, + "learning_rate": 1.1883942171194706e-05, + "loss": 1.2145, + "step": 75924 + }, + { + "epoch": 0.91, + "grad_norm": 5.262946072673577, + "learning_rate": 1.1883368450533384e-05, + "loss": 1.4454, + "step": 75927 + }, + { + "epoch": 0.91, + "grad_norm": 31.48649733001724, + "learning_rate": 1.1882794723444804e-05, + "loss": 1.2855, + "step": 75930 + }, + { + "epoch": 0.91, + "grad_norm": 11.95782573570306, + "learning_rate": 1.1882220989930927e-05, + "loss": 1.3897, + "step": 75933 + }, + { + "epoch": 0.91, + "grad_norm": 21.306314428810914, + "learning_rate": 1.1881647249993702e-05, + "loss": 1.3956, + "step": 75936 + }, + { + "epoch": 0.91, + "grad_norm": 7.793264341153012, + "learning_rate": 1.1881073503635094e-05, + "loss": 1.4544, + "step": 75939 + }, + { + "epoch": 0.91, + "grad_norm": 39.25766229107364, + "learning_rate": 1.1880499750857062e-05, + "loss": 1.3048, + "step": 75942 + }, + { + "epoch": 0.91, + "grad_norm": 11.568117157987933, + "learning_rate": 1.1879925991661561e-05, + "loss": 1.7303, + "step": 75945 + }, + { + "epoch": 0.91, + "grad_norm": 14.79406715354991, + "learning_rate": 1.1879352226050546e-05, + "loss": 1.2133, + "step": 75948 + }, + { + "epoch": 0.91, + "grad_norm": 3.6043663443463414, + "learning_rate": 1.1878778454025982e-05, + "loss": 1.1588, + "step": 75951 + }, + { + "epoch": 0.91, + "grad_norm": 19.787334658324497, + "learning_rate": 1.1878204675589826e-05, + "loss": 1.2348, + "step": 75954 + }, + { + "epoch": 0.91, + "grad_norm": 7.284891299487082, + "learning_rate": 1.1877630890744032e-05, + "loss": 1.5107, + "step": 75957 + }, + { + "epoch": 0.91, + "grad_norm": 23.853984332621145, + "learning_rate": 1.1877057099490558e-05, + "loss": 1.371, + "step": 75960 + }, + { + "epoch": 0.91, + "grad_norm": 7.86256838980432, + "learning_rate": 1.1876483301831367e-05, + "loss": 1.3093, + "step": 75963 + }, + { + "epoch": 0.91, + "grad_norm": 18.154780146106965, + "learning_rate": 1.1875909497768414e-05, + "loss": 1.4723, + "step": 75966 + }, + { + "epoch": 0.91, + "grad_norm": 8.4663231613412, + "learning_rate": 1.1875335687303657e-05, + "loss": 1.209, + "step": 75969 + }, + { + "epoch": 0.91, + "grad_norm": 16.76096070227251, + "learning_rate": 1.1874761870439056e-05, + "loss": 1.5079, + "step": 75972 + }, + { + "epoch": 0.91, + "grad_norm": 13.321577143579624, + "learning_rate": 1.1874188047176567e-05, + "loss": 1.4936, + "step": 75975 + }, + { + "epoch": 0.91, + "grad_norm": 13.35064030759926, + "learning_rate": 1.187361421751815e-05, + "loss": 1.1491, + "step": 75978 + }, + { + "epoch": 0.91, + "grad_norm": 13.570197191247749, + "learning_rate": 1.187304038146576e-05, + "loss": 1.1927, + "step": 75981 + }, + { + "epoch": 0.91, + "grad_norm": 20.206667338390425, + "learning_rate": 1.187246653902136e-05, + "loss": 1.4106, + "step": 75984 + }, + { + "epoch": 0.91, + "grad_norm": 42.87846536491369, + "learning_rate": 1.1871892690186906e-05, + "loss": 1.1923, + "step": 75987 + }, + { + "epoch": 0.91, + "grad_norm": 27.194805319091284, + "learning_rate": 1.1871318834964356e-05, + "loss": 0.8493, + "step": 75990 + }, + { + "epoch": 0.91, + "grad_norm": 16.64738892620637, + "learning_rate": 1.1870744973355668e-05, + "loss": 1.3103, + "step": 75993 + }, + { + "epoch": 0.91, + "grad_norm": 10.550282411798305, + "learning_rate": 1.1870171105362802e-05, + "loss": 1.325, + "step": 75996 + }, + { + "epoch": 0.91, + "grad_norm": 35.73664166432236, + "learning_rate": 1.1869597230987717e-05, + "loss": 1.2772, + "step": 75999 + }, + { + "epoch": 0.91, + "grad_norm": 15.742538683524169, + "learning_rate": 1.1869023350232368e-05, + "loss": 1.3662, + "step": 76002 + }, + { + "epoch": 0.91, + "grad_norm": 26.36233801493472, + "learning_rate": 1.1868449463098717e-05, + "loss": 1.3389, + "step": 76005 + }, + { + "epoch": 0.91, + "grad_norm": 13.394865490948286, + "learning_rate": 1.1867875569588722e-05, + "loss": 1.6111, + "step": 76008 + }, + { + "epoch": 0.91, + "grad_norm": 14.050582717706417, + "learning_rate": 1.1867301669704338e-05, + "loss": 1.288, + "step": 76011 + }, + { + "epoch": 0.91, + "grad_norm": 41.72613104183762, + "learning_rate": 1.1866727763447528e-05, + "loss": 1.3855, + "step": 76014 + }, + { + "epoch": 0.91, + "grad_norm": 14.283323943916594, + "learning_rate": 1.1866153850820248e-05, + "loss": 1.2888, + "step": 76017 + }, + { + "epoch": 0.91, + "grad_norm": 9.184554373543618, + "learning_rate": 1.1865579931824455e-05, + "loss": 1.33, + "step": 76020 + }, + { + "epoch": 0.91, + "grad_norm": 19.47059568912093, + "learning_rate": 1.1865006006462111e-05, + "loss": 1.36, + "step": 76023 + }, + { + "epoch": 0.91, + "grad_norm": 8.498151199460613, + "learning_rate": 1.1864432074735174e-05, + "loss": 1.2414, + "step": 76026 + }, + { + "epoch": 0.91, + "grad_norm": 27.539267340816963, + "learning_rate": 1.1863858136645603e-05, + "loss": 1.1259, + "step": 76029 + }, + { + "epoch": 0.91, + "grad_norm": 9.338555839922051, + "learning_rate": 1.1863284192195355e-05, + "loss": 1.4701, + "step": 76032 + }, + { + "epoch": 0.91, + "grad_norm": 15.110040022730951, + "learning_rate": 1.1862710241386385e-05, + "loss": 1.4418, + "step": 76035 + }, + { + "epoch": 0.91, + "grad_norm": 29.6451432828364, + "learning_rate": 1.1862136284220662e-05, + "loss": 0.9888, + "step": 76038 + }, + { + "epoch": 0.91, + "grad_norm": 20.79193837095094, + "learning_rate": 1.1861562320700134e-05, + "loss": 1.0885, + "step": 76041 + }, + { + "epoch": 0.91, + "grad_norm": 6.238825038633445, + "learning_rate": 1.1860988350826767e-05, + "loss": 0.987, + "step": 76044 + }, + { + "epoch": 0.91, + "grad_norm": 12.536458722866128, + "learning_rate": 1.1860414374602516e-05, + "loss": 1.454, + "step": 76047 + }, + { + "epoch": 0.91, + "grad_norm": 26.14094304572381, + "learning_rate": 1.185984039202934e-05, + "loss": 1.552, + "step": 76050 + }, + { + "epoch": 0.91, + "grad_norm": 9.24469870379007, + "learning_rate": 1.1859266403109202e-05, + "loss": 1.2695, + "step": 76053 + }, + { + "epoch": 0.91, + "grad_norm": 5.194657250684387, + "learning_rate": 1.1858692407844052e-05, + "loss": 1.0011, + "step": 76056 + }, + { + "epoch": 0.91, + "grad_norm": 6.922553358538238, + "learning_rate": 1.185811840623586e-05, + "loss": 1.2612, + "step": 76059 + }, + { + "epoch": 0.91, + "grad_norm": 7.449849112553473, + "learning_rate": 1.1857544398286576e-05, + "loss": 1.3333, + "step": 76062 + }, + { + "epoch": 0.91, + "grad_norm": 2.4119933706883763, + "learning_rate": 1.1856970383998163e-05, + "loss": 1.1813, + "step": 76065 + }, + { + "epoch": 0.91, + "grad_norm": 29.43096866792503, + "learning_rate": 1.1856396363372577e-05, + "loss": 1.6747, + "step": 76068 + }, + { + "epoch": 0.91, + "grad_norm": 48.91446613535111, + "learning_rate": 1.1855822336411784e-05, + "loss": 1.091, + "step": 76071 + }, + { + "epoch": 0.91, + "grad_norm": 30.67955074688279, + "learning_rate": 1.1855248303117732e-05, + "loss": 1.0648, + "step": 76074 + }, + { + "epoch": 0.91, + "grad_norm": 19.464212509460676, + "learning_rate": 1.1854674263492388e-05, + "loss": 1.6748, + "step": 76077 + }, + { + "epoch": 0.91, + "grad_norm": 8.346494013417667, + "learning_rate": 1.185410021753771e-05, + "loss": 1.2285, + "step": 76080 + }, + { + "epoch": 0.91, + "grad_norm": 12.399642031212922, + "learning_rate": 1.1853526165255654e-05, + "loss": 1.6886, + "step": 76083 + }, + { + "epoch": 0.91, + "grad_norm": 62.194879646910984, + "learning_rate": 1.185295210664818e-05, + "loss": 1.1629, + "step": 76086 + }, + { + "epoch": 0.91, + "grad_norm": 11.351844605862325, + "learning_rate": 1.185237804171725e-05, + "loss": 1.7294, + "step": 76089 + }, + { + "epoch": 0.91, + "grad_norm": 22.156991384569253, + "learning_rate": 1.1851803970464823e-05, + "loss": 1.4778, + "step": 76092 + }, + { + "epoch": 0.92, + "grad_norm": 6.833886414893191, + "learning_rate": 1.1851229892892853e-05, + "loss": 1.6581, + "step": 76095 + }, + { + "epoch": 0.92, + "grad_norm": 38.45251931563954, + "learning_rate": 1.1850655809003303e-05, + "loss": 1.6998, + "step": 76098 + }, + { + "epoch": 0.92, + "grad_norm": 5.842698312900821, + "learning_rate": 1.185008171879813e-05, + "loss": 1.5397, + "step": 76101 + }, + { + "epoch": 0.92, + "grad_norm": 50.99560138135778, + "learning_rate": 1.1849507622279295e-05, + "loss": 1.4562, + "step": 76104 + }, + { + "epoch": 0.92, + "grad_norm": 3.549568834216086, + "learning_rate": 1.1848933519448758e-05, + "loss": 1.4247, + "step": 76107 + }, + { + "epoch": 0.92, + "grad_norm": 4.182874968609963, + "learning_rate": 1.1848359410308475e-05, + "loss": 1.5098, + "step": 76110 + }, + { + "epoch": 0.92, + "grad_norm": 13.478113773781663, + "learning_rate": 1.184778529486041e-05, + "loss": 1.5434, + "step": 76113 + }, + { + "epoch": 0.92, + "grad_norm": 21.776691710826533, + "learning_rate": 1.1847211173106517e-05, + "loss": 1.507, + "step": 76116 + }, + { + "epoch": 0.92, + "grad_norm": 2.896221961812782, + "learning_rate": 1.1846637045048758e-05, + "loss": 1.4967, + "step": 76119 + }, + { + "epoch": 0.92, + "grad_norm": 5.408523974853067, + "learning_rate": 1.184606291068909e-05, + "loss": 1.0427, + "step": 76122 + }, + { + "epoch": 0.92, + "grad_norm": 20.001856092286552, + "learning_rate": 1.1845488770029481e-05, + "loss": 1.2101, + "step": 76125 + }, + { + "epoch": 0.92, + "grad_norm": 24.551036193932287, + "learning_rate": 1.1844914623071877e-05, + "loss": 1.5353, + "step": 76128 + }, + { + "epoch": 0.92, + "grad_norm": 16.583218455567177, + "learning_rate": 1.1844340469818243e-05, + "loss": 1.1132, + "step": 76131 + }, + { + "epoch": 0.92, + "grad_norm": 17.425922103828924, + "learning_rate": 1.1843766310270546e-05, + "loss": 1.1858, + "step": 76134 + }, + { + "epoch": 0.92, + "grad_norm": 34.8356842550288, + "learning_rate": 1.1843192144430732e-05, + "loss": 1.3784, + "step": 76137 + }, + { + "epoch": 0.92, + "grad_norm": 7.488869789635238, + "learning_rate": 1.1842617972300772e-05, + "loss": 1.2498, + "step": 76140 + }, + { + "epoch": 0.92, + "grad_norm": 20.47713763225802, + "learning_rate": 1.1842043793882616e-05, + "loss": 1.3936, + "step": 76143 + }, + { + "epoch": 0.92, + "grad_norm": 14.927391610938189, + "learning_rate": 1.1841469609178234e-05, + "loss": 1.2629, + "step": 76146 + }, + { + "epoch": 0.92, + "grad_norm": 10.580708936007813, + "learning_rate": 1.1840895418189575e-05, + "loss": 1.4488, + "step": 76149 + }, + { + "epoch": 0.92, + "grad_norm": 6.4655777536504475, + "learning_rate": 1.1840321220918605e-05, + "loss": 1.2098, + "step": 76152 + }, + { + "epoch": 0.92, + "grad_norm": 10.816813012049781, + "learning_rate": 1.183974701736728e-05, + "loss": 1.1068, + "step": 76155 + }, + { + "epoch": 0.92, + "grad_norm": 8.337106531184519, + "learning_rate": 1.183917280753756e-05, + "loss": 1.4796, + "step": 76158 + }, + { + "epoch": 0.92, + "grad_norm": 12.463731800332877, + "learning_rate": 1.183859859143141e-05, + "loss": 1.2791, + "step": 76161 + }, + { + "epoch": 0.92, + "grad_norm": 4.393528155005999, + "learning_rate": 1.183802436905078e-05, + "loss": 1.3648, + "step": 76164 + }, + { + "epoch": 0.92, + "grad_norm": 7.2210720036555704, + "learning_rate": 1.183745014039764e-05, + "loss": 1.3734, + "step": 76167 + }, + { + "epoch": 0.92, + "grad_norm": 12.772521208551435, + "learning_rate": 1.1836875905473941e-05, + "loss": 1.3743, + "step": 76170 + }, + { + "epoch": 0.92, + "grad_norm": 12.709042760885628, + "learning_rate": 1.1836301664281647e-05, + "loss": 1.4655, + "step": 76173 + }, + { + "epoch": 0.92, + "grad_norm": 7.966343865408469, + "learning_rate": 1.1835727416822718e-05, + "loss": 1.4868, + "step": 76176 + }, + { + "epoch": 0.92, + "grad_norm": 8.969695937113908, + "learning_rate": 1.1835153163099114e-05, + "loss": 1.9293, + "step": 76179 + }, + { + "epoch": 0.92, + "grad_norm": 8.435378760558265, + "learning_rate": 1.1834578903112788e-05, + "loss": 0.9627, + "step": 76182 + }, + { + "epoch": 0.92, + "grad_norm": 61.63955248732206, + "learning_rate": 1.1834004636865706e-05, + "loss": 1.6374, + "step": 76185 + }, + { + "epoch": 0.92, + "grad_norm": 10.465591199971128, + "learning_rate": 1.1833430364359833e-05, + "loss": 1.4232, + "step": 76188 + }, + { + "epoch": 0.92, + "grad_norm": 20.612230425988127, + "learning_rate": 1.1832856085597116e-05, + "loss": 1.6679, + "step": 76191 + }, + { + "epoch": 0.92, + "grad_norm": 2.9361262024720425, + "learning_rate": 1.1832281800579524e-05, + "loss": 1.2757, + "step": 76194 + }, + { + "epoch": 0.92, + "grad_norm": 6.42155877675029, + "learning_rate": 1.1831707509309013e-05, + "loss": 1.1189, + "step": 76197 + }, + { + "epoch": 0.92, + "grad_norm": 67.27429434470815, + "learning_rate": 1.1831133211787547e-05, + "loss": 1.7319, + "step": 76200 + }, + { + "epoch": 0.92, + "grad_norm": 34.14554870032878, + "learning_rate": 1.183055890801708e-05, + "loss": 1.5052, + "step": 76203 + }, + { + "epoch": 0.92, + "grad_norm": 7.628152733017371, + "learning_rate": 1.1829984597999575e-05, + "loss": 1.4042, + "step": 76206 + }, + { + "epoch": 0.92, + "grad_norm": 4.277899818799936, + "learning_rate": 1.1829410281736993e-05, + "loss": 1.5393, + "step": 76209 + }, + { + "epoch": 0.92, + "grad_norm": 11.622843791783819, + "learning_rate": 1.182883595923129e-05, + "loss": 1.2547, + "step": 76212 + }, + { + "epoch": 0.92, + "grad_norm": 20.345269243521706, + "learning_rate": 1.1828261630484432e-05, + "loss": 1.3961, + "step": 76215 + }, + { + "epoch": 0.92, + "grad_norm": 13.01158280847304, + "learning_rate": 1.1827687295498375e-05, + "loss": 1.6605, + "step": 76218 + }, + { + "epoch": 0.92, + "grad_norm": 12.886212663591524, + "learning_rate": 1.1827112954275079e-05, + "loss": 1.3449, + "step": 76221 + }, + { + "epoch": 0.92, + "grad_norm": 12.966596815340289, + "learning_rate": 1.1826538606816502e-05, + "loss": 1.2921, + "step": 76224 + }, + { + "epoch": 0.92, + "grad_norm": 6.686849433008033, + "learning_rate": 1.1825964253124612e-05, + "loss": 1.0845, + "step": 76227 + }, + { + "epoch": 0.92, + "grad_norm": 39.635217054148484, + "learning_rate": 1.182538989320136e-05, + "loss": 1.2472, + "step": 76230 + }, + { + "epoch": 0.92, + "grad_norm": 9.893253449383693, + "learning_rate": 1.1824815527048711e-05, + "loss": 1.5455, + "step": 76233 + }, + { + "epoch": 0.92, + "grad_norm": 4.791202613824734, + "learning_rate": 1.1824241154668623e-05, + "loss": 1.2736, + "step": 76236 + }, + { + "epoch": 0.92, + "grad_norm": 7.004982641734167, + "learning_rate": 1.1823666776063057e-05, + "loss": 1.5799, + "step": 76239 + }, + { + "epoch": 0.92, + "grad_norm": 7.679759686655241, + "learning_rate": 1.1823092391233975e-05, + "loss": 1.4846, + "step": 76242 + }, + { + "epoch": 0.92, + "grad_norm": 7.48114987706708, + "learning_rate": 1.1822518000183333e-05, + "loss": 1.0998, + "step": 76245 + }, + { + "epoch": 0.92, + "grad_norm": 35.56088788463801, + "learning_rate": 1.1821943602913095e-05, + "loss": 0.9353, + "step": 76248 + }, + { + "epoch": 0.92, + "grad_norm": 5.5654320875673475, + "learning_rate": 1.182136919942522e-05, + "loss": 1.4079, + "step": 76251 + }, + { + "epoch": 0.92, + "grad_norm": 4.914138293726393, + "learning_rate": 1.1820794789721669e-05, + "loss": 1.1112, + "step": 76254 + }, + { + "epoch": 0.92, + "grad_norm": 10.692919752488047, + "learning_rate": 1.1820220373804397e-05, + "loss": 1.4491, + "step": 76257 + }, + { + "epoch": 0.92, + "grad_norm": 119.73723872605017, + "learning_rate": 1.1819645951675371e-05, + "loss": 1.6179, + "step": 76260 + }, + { + "epoch": 0.92, + "grad_norm": 10.936278903587626, + "learning_rate": 1.181907152333655e-05, + "loss": 1.4582, + "step": 76263 + }, + { + "epoch": 0.92, + "grad_norm": 7.040204036369796, + "learning_rate": 1.181849708878989e-05, + "loss": 1.2956, + "step": 76266 + }, + { + "epoch": 0.92, + "grad_norm": 22.66548950369023, + "learning_rate": 1.1817922648037357e-05, + "loss": 1.3288, + "step": 76269 + }, + { + "epoch": 0.92, + "grad_norm": 5.728786010004436, + "learning_rate": 1.181734820108091e-05, + "loss": 1.5713, + "step": 76272 + }, + { + "epoch": 0.92, + "grad_norm": 9.967877510997882, + "learning_rate": 1.1816773747922508e-05, + "loss": 1.2878, + "step": 76275 + }, + { + "epoch": 0.92, + "grad_norm": 18.076010503493773, + "learning_rate": 1.1816199288564107e-05, + "loss": 1.3236, + "step": 76278 + }, + { + "epoch": 0.92, + "grad_norm": 14.677016565109561, + "learning_rate": 1.1815624823007676e-05, + "loss": 1.1841, + "step": 76281 + }, + { + "epoch": 0.92, + "grad_norm": 9.305388490341606, + "learning_rate": 1.1815050351255171e-05, + "loss": 1.287, + "step": 76284 + }, + { + "epoch": 0.92, + "grad_norm": 20.535813877674986, + "learning_rate": 1.1814475873308553e-05, + "loss": 1.0537, + "step": 76287 + }, + { + "epoch": 0.92, + "grad_norm": 5.240317116958467, + "learning_rate": 1.1813901389169779e-05, + "loss": 1.2288, + "step": 76290 + }, + { + "epoch": 0.92, + "grad_norm": 5.972175949424702, + "learning_rate": 1.1813326898840818e-05, + "loss": 1.4854, + "step": 76293 + }, + { + "epoch": 0.92, + "grad_norm": 13.854640583181121, + "learning_rate": 1.1812752402323625e-05, + "loss": 1.5901, + "step": 76296 + }, + { + "epoch": 0.92, + "grad_norm": 14.864919500284367, + "learning_rate": 1.1812177899620157e-05, + "loss": 1.4067, + "step": 76299 + }, + { + "epoch": 0.92, + "grad_norm": 9.493990054776235, + "learning_rate": 1.181160339073238e-05, + "loss": 1.3932, + "step": 76302 + }, + { + "epoch": 0.92, + "grad_norm": 12.506043122014454, + "learning_rate": 1.1811028875662255e-05, + "loss": 1.0653, + "step": 76305 + }, + { + "epoch": 0.92, + "grad_norm": 8.000520276030436, + "learning_rate": 1.181045435441174e-05, + "loss": 1.3581, + "step": 76308 + }, + { + "epoch": 0.92, + "grad_norm": 8.139422660522609, + "learning_rate": 1.1809879826982796e-05, + "loss": 1.4226, + "step": 76311 + }, + { + "epoch": 0.92, + "grad_norm": 4.133459460520702, + "learning_rate": 1.1809305293377384e-05, + "loss": 1.5613, + "step": 76314 + }, + { + "epoch": 0.92, + "grad_norm": 21.364206430769414, + "learning_rate": 1.1808730753597466e-05, + "loss": 1.5245, + "step": 76317 + }, + { + "epoch": 0.92, + "grad_norm": 9.322190458625132, + "learning_rate": 1.1808156207644999e-05, + "loss": 1.0744, + "step": 76320 + }, + { + "epoch": 0.92, + "grad_norm": 13.520094724715248, + "learning_rate": 1.1807581655521947e-05, + "loss": 1.5942, + "step": 76323 + }, + { + "epoch": 0.92, + "grad_norm": 6.146150207529958, + "learning_rate": 1.1807007097230271e-05, + "loss": 1.3238, + "step": 76326 + }, + { + "epoch": 0.92, + "grad_norm": 16.279691370353234, + "learning_rate": 1.1806432532771932e-05, + "loss": 1.3159, + "step": 76329 + }, + { + "epoch": 0.92, + "grad_norm": 33.22616046217233, + "learning_rate": 1.1805857962148886e-05, + "loss": 1.4792, + "step": 76332 + }, + { + "epoch": 0.92, + "grad_norm": 15.517095155642062, + "learning_rate": 1.1805283385363098e-05, + "loss": 1.6843, + "step": 76335 + }, + { + "epoch": 0.92, + "grad_norm": 25.353234768681375, + "learning_rate": 1.1804708802416531e-05, + "loss": 1.236, + "step": 76338 + }, + { + "epoch": 0.92, + "grad_norm": 9.301211145572482, + "learning_rate": 1.180413421331114e-05, + "loss": 1.2485, + "step": 76341 + }, + { + "epoch": 0.92, + "grad_norm": 11.69028146347937, + "learning_rate": 1.1803559618048887e-05, + "loss": 1.1535, + "step": 76344 + }, + { + "epoch": 0.92, + "grad_norm": 19.38224454672622, + "learning_rate": 1.1802985016631739e-05, + "loss": 1.7524, + "step": 76347 + }, + { + "epoch": 0.92, + "grad_norm": 16.89606779626623, + "learning_rate": 1.1802410409061651e-05, + "loss": 1.3717, + "step": 76350 + }, + { + "epoch": 0.92, + "grad_norm": 10.37147110308485, + "learning_rate": 1.1801835795340583e-05, + "loss": 1.0431, + "step": 76353 + }, + { + "epoch": 0.92, + "grad_norm": 9.316185378234142, + "learning_rate": 1.1801261175470502e-05, + "loss": 1.321, + "step": 76356 + }, + { + "epoch": 0.92, + "grad_norm": 5.080402278978662, + "learning_rate": 1.1800686549453367e-05, + "loss": 1.3645, + "step": 76359 + }, + { + "epoch": 0.92, + "grad_norm": 7.750698295412144, + "learning_rate": 1.1800111917291134e-05, + "loss": 1.344, + "step": 76362 + }, + { + "epoch": 0.92, + "grad_norm": 32.900922633968, + "learning_rate": 1.1799537278985767e-05, + "loss": 1.4454, + "step": 76365 + }, + { + "epoch": 0.92, + "grad_norm": 5.153912750250826, + "learning_rate": 1.1798962634539228e-05, + "loss": 1.0874, + "step": 76368 + }, + { + "epoch": 0.92, + "grad_norm": 21.095591102864148, + "learning_rate": 1.1798387983953478e-05, + "loss": 1.1638, + "step": 76371 + }, + { + "epoch": 0.92, + "grad_norm": 9.177686082453594, + "learning_rate": 1.1797813327230476e-05, + "loss": 1.4926, + "step": 76374 + }, + { + "epoch": 0.92, + "grad_norm": 21.18339519896067, + "learning_rate": 1.1797238664372188e-05, + "loss": 1.5996, + "step": 76377 + }, + { + "epoch": 0.92, + "grad_norm": 13.354070336901906, + "learning_rate": 1.179666399538057e-05, + "loss": 1.6075, + "step": 76380 + }, + { + "epoch": 0.92, + "grad_norm": 2.716492107123295, + "learning_rate": 1.1796089320257586e-05, + "loss": 1.2109, + "step": 76383 + }, + { + "epoch": 0.92, + "grad_norm": 2.4145268624241174, + "learning_rate": 1.1795514639005195e-05, + "loss": 1.6644, + "step": 76386 + }, + { + "epoch": 0.92, + "grad_norm": 8.78734230600509, + "learning_rate": 1.179493995162536e-05, + "loss": 1.2486, + "step": 76389 + }, + { + "epoch": 0.92, + "grad_norm": 12.389757369561224, + "learning_rate": 1.1794365258120039e-05, + "loss": 1.459, + "step": 76392 + }, + { + "epoch": 0.92, + "grad_norm": 2.714970810126414, + "learning_rate": 1.1793790558491199e-05, + "loss": 1.4342, + "step": 76395 + }, + { + "epoch": 0.92, + "grad_norm": 13.92781852215983, + "learning_rate": 1.1793215852740794e-05, + "loss": 1.7772, + "step": 76398 + }, + { + "epoch": 0.92, + "grad_norm": 13.899790145171501, + "learning_rate": 1.1792641140870793e-05, + "loss": 1.4141, + "step": 76401 + }, + { + "epoch": 0.92, + "grad_norm": 26.331106258041697, + "learning_rate": 1.1792066422883153e-05, + "loss": 0.9834, + "step": 76404 + }, + { + "epoch": 0.92, + "grad_norm": 40.12567080739326, + "learning_rate": 1.1791491698779833e-05, + "loss": 1.1802, + "step": 76407 + }, + { + "epoch": 0.92, + "grad_norm": 13.13667420722248, + "learning_rate": 1.17909169685628e-05, + "loss": 1.3529, + "step": 76410 + }, + { + "epoch": 0.92, + "grad_norm": 22.8093389359918, + "learning_rate": 1.1790342232234013e-05, + "loss": 1.3291, + "step": 76413 + }, + { + "epoch": 0.92, + "grad_norm": 8.98230653922312, + "learning_rate": 1.1789767489795431e-05, + "loss": 1.2344, + "step": 76416 + }, + { + "epoch": 0.92, + "grad_norm": 3.785596023450154, + "learning_rate": 1.1789192741249015e-05, + "loss": 1.4796, + "step": 76419 + }, + { + "epoch": 0.92, + "grad_norm": 11.774461364130678, + "learning_rate": 1.1788617986596734e-05, + "loss": 1.3511, + "step": 76422 + }, + { + "epoch": 0.92, + "grad_norm": 10.36792140431813, + "learning_rate": 1.1788043225840541e-05, + "loss": 1.3837, + "step": 76425 + }, + { + "epoch": 0.92, + "grad_norm": 6.234599570705856, + "learning_rate": 1.1787468458982398e-05, + "loss": 1.2256, + "step": 76428 + }, + { + "epoch": 0.92, + "grad_norm": 6.459365315069522, + "learning_rate": 1.1786893686024274e-05, + "loss": 1.1456, + "step": 76431 + }, + { + "epoch": 0.92, + "grad_norm": 6.33127813402371, + "learning_rate": 1.1786318906968125e-05, + "loss": 1.0477, + "step": 76434 + }, + { + "epoch": 0.92, + "grad_norm": 46.97103849355247, + "learning_rate": 1.178574412181591e-05, + "loss": 1.2027, + "step": 76437 + }, + { + "epoch": 0.92, + "grad_norm": 8.877034477926216, + "learning_rate": 1.1785169330569594e-05, + "loss": 1.3366, + "step": 76440 + }, + { + "epoch": 0.92, + "grad_norm": 19.053260766625804, + "learning_rate": 1.178459453323114e-05, + "loss": 1.4699, + "step": 76443 + }, + { + "epoch": 0.92, + "grad_norm": 13.473051487592757, + "learning_rate": 1.1784019729802508e-05, + "loss": 1.137, + "step": 76446 + }, + { + "epoch": 0.92, + "grad_norm": 6.06019009401704, + "learning_rate": 1.1783444920285658e-05, + "loss": 1.1104, + "step": 76449 + }, + { + "epoch": 0.92, + "grad_norm": 23.67780626866841, + "learning_rate": 1.178287010468255e-05, + "loss": 1.4259, + "step": 76452 + }, + { + "epoch": 0.92, + "grad_norm": 9.095418471756975, + "learning_rate": 1.1782295282995153e-05, + "loss": 1.4744, + "step": 76455 + }, + { + "epoch": 0.92, + "grad_norm": 11.34565119073454, + "learning_rate": 1.1781720455225422e-05, + "loss": 1.0428, + "step": 76458 + }, + { + "epoch": 0.92, + "grad_norm": 37.48182560189832, + "learning_rate": 1.1781145621375321e-05, + "loss": 1.3681, + "step": 76461 + }, + { + "epoch": 0.92, + "grad_norm": 15.235646221077614, + "learning_rate": 1.1780570781446812e-05, + "loss": 1.2664, + "step": 76464 + }, + { + "epoch": 0.92, + "grad_norm": 14.399369761888776, + "learning_rate": 1.1779995935441857e-05, + "loss": 0.9485, + "step": 76467 + }, + { + "epoch": 0.92, + "grad_norm": 59.6183113437512, + "learning_rate": 1.1779421083362416e-05, + "loss": 1.2721, + "step": 76470 + }, + { + "epoch": 0.92, + "grad_norm": 2.7626144403398847, + "learning_rate": 1.1778846225210452e-05, + "loss": 0.9747, + "step": 76473 + }, + { + "epoch": 0.92, + "grad_norm": 15.495213524404376, + "learning_rate": 1.1778271360987925e-05, + "loss": 1.3057, + "step": 76476 + }, + { + "epoch": 0.92, + "grad_norm": 7.176805114517522, + "learning_rate": 1.1777696490696801e-05, + "loss": 1.6775, + "step": 76479 + }, + { + "epoch": 0.92, + "grad_norm": 17.515693513694583, + "learning_rate": 1.1777121614339038e-05, + "loss": 1.279, + "step": 76482 + }, + { + "epoch": 0.92, + "grad_norm": 19.14311496339749, + "learning_rate": 1.1776546731916598e-05, + "loss": 1.0974, + "step": 76485 + }, + { + "epoch": 0.92, + "grad_norm": 12.347060768671339, + "learning_rate": 1.1775971843431446e-05, + "loss": 1.1393, + "step": 76488 + }, + { + "epoch": 0.92, + "grad_norm": 38.47371775634574, + "learning_rate": 1.1775396948885542e-05, + "loss": 1.3522, + "step": 76491 + }, + { + "epoch": 0.92, + "grad_norm": 18.934424025739844, + "learning_rate": 1.1774822048280845e-05, + "loss": 1.5327, + "step": 76494 + }, + { + "epoch": 0.92, + "grad_norm": 10.441484509512891, + "learning_rate": 1.1774247141619321e-05, + "loss": 1.292, + "step": 76497 + }, + { + "epoch": 0.92, + "grad_norm": 13.980907975421538, + "learning_rate": 1.1773672228902933e-05, + "loss": 1.5138, + "step": 76500 + }, + { + "epoch": 0.92, + "grad_norm": 13.548551670639016, + "learning_rate": 1.1773097310133639e-05, + "loss": 1.5845, + "step": 76503 + }, + { + "epoch": 0.92, + "grad_norm": 11.191408491128549, + "learning_rate": 1.17725223853134e-05, + "loss": 1.4349, + "step": 76506 + }, + { + "epoch": 0.92, + "grad_norm": 10.874062660288397, + "learning_rate": 1.1771947454444184e-05, + "loss": 1.7337, + "step": 76509 + }, + { + "epoch": 0.92, + "grad_norm": 23.496786714466115, + "learning_rate": 1.1771372517527948e-05, + "loss": 1.4209, + "step": 76512 + }, + { + "epoch": 0.92, + "grad_norm": 22.60150766809856, + "learning_rate": 1.1770797574566654e-05, + "loss": 1.2676, + "step": 76515 + }, + { + "epoch": 0.92, + "grad_norm": 5.883757203911471, + "learning_rate": 1.1770222625562268e-05, + "loss": 1.1747, + "step": 76518 + }, + { + "epoch": 0.92, + "grad_norm": 9.64536060166802, + "learning_rate": 1.1769647670516751e-05, + "loss": 1.0115, + "step": 76521 + }, + { + "epoch": 0.92, + "grad_norm": 9.911089576075156, + "learning_rate": 1.1769072709432061e-05, + "loss": 1.3673, + "step": 76524 + }, + { + "epoch": 0.92, + "grad_norm": 14.926874829564305, + "learning_rate": 1.1768497742310164e-05, + "loss": 1.5016, + "step": 76527 + }, + { + "epoch": 0.92, + "grad_norm": 22.348720484630153, + "learning_rate": 1.1767922769153024e-05, + "loss": 1.1424, + "step": 76530 + }, + { + "epoch": 0.92, + "grad_norm": 16.40134589755416, + "learning_rate": 1.1767347789962595e-05, + "loss": 1.445, + "step": 76533 + }, + { + "epoch": 0.92, + "grad_norm": 3.707328968647709, + "learning_rate": 1.1766772804740847e-05, + "loss": 1.5609, + "step": 76536 + }, + { + "epoch": 0.92, + "grad_norm": 16.763766502497177, + "learning_rate": 1.1766197813489742e-05, + "loss": 1.238, + "step": 76539 + }, + { + "epoch": 0.92, + "grad_norm": 15.013738246054466, + "learning_rate": 1.1765622816211238e-05, + "loss": 1.3741, + "step": 76542 + }, + { + "epoch": 0.92, + "grad_norm": 13.2561480439873, + "learning_rate": 1.17650478129073e-05, + "loss": 1.3638, + "step": 76545 + }, + { + "epoch": 0.92, + "grad_norm": 4.0623783635878015, + "learning_rate": 1.1764472803579887e-05, + "loss": 1.456, + "step": 76548 + }, + { + "epoch": 0.92, + "grad_norm": 20.480840611646247, + "learning_rate": 1.1763897788230969e-05, + "loss": 1.6206, + "step": 76551 + }, + { + "epoch": 0.92, + "grad_norm": 20.552262702035442, + "learning_rate": 1.17633227668625e-05, + "loss": 1.2007, + "step": 76554 + }, + { + "epoch": 0.92, + "grad_norm": 16.49655515065677, + "learning_rate": 1.1762747739476445e-05, + "loss": 1.1263, + "step": 76557 + }, + { + "epoch": 0.92, + "grad_norm": 7.391804292902434, + "learning_rate": 1.1762172706074766e-05, + "loss": 1.4436, + "step": 76560 + }, + { + "epoch": 0.92, + "grad_norm": 4.265031958133299, + "learning_rate": 1.176159766665943e-05, + "loss": 1.2142, + "step": 76563 + }, + { + "epoch": 0.92, + "grad_norm": 10.720116361690987, + "learning_rate": 1.1761022621232395e-05, + "loss": 1.5308, + "step": 76566 + }, + { + "epoch": 0.92, + "grad_norm": 19.462476631078285, + "learning_rate": 1.1760447569795618e-05, + "loss": 0.9656, + "step": 76569 + }, + { + "epoch": 0.92, + "grad_norm": 15.968464961656561, + "learning_rate": 1.1759872512351076e-05, + "loss": 1.3655, + "step": 76572 + }, + { + "epoch": 0.92, + "grad_norm": 12.884590162748975, + "learning_rate": 1.1759297448900719e-05, + "loss": 1.4439, + "step": 76575 + }, + { + "epoch": 0.92, + "grad_norm": 12.930383903263092, + "learning_rate": 1.1758722379446515e-05, + "loss": 1.2169, + "step": 76578 + }, + { + "epoch": 0.92, + "grad_norm": 10.700353739782285, + "learning_rate": 1.1758147303990422e-05, + "loss": 1.5623, + "step": 76581 + }, + { + "epoch": 0.92, + "grad_norm": 51.149436754735326, + "learning_rate": 1.1757572222534411e-05, + "loss": 1.4466, + "step": 76584 + }, + { + "epoch": 0.92, + "grad_norm": 24.37583744458784, + "learning_rate": 1.1756997135080433e-05, + "loss": 1.2923, + "step": 76587 + }, + { + "epoch": 0.92, + "grad_norm": 15.136919385646527, + "learning_rate": 1.1756422041630458e-05, + "loss": 1.2964, + "step": 76590 + }, + { + "epoch": 0.92, + "grad_norm": 11.571291659744139, + "learning_rate": 1.1755846942186451e-05, + "loss": 1.2498, + "step": 76593 + }, + { + "epoch": 0.92, + "grad_norm": 8.708931406106972, + "learning_rate": 1.1755271836750367e-05, + "loss": 1.1989, + "step": 76596 + }, + { + "epoch": 0.92, + "grad_norm": 8.058177393568469, + "learning_rate": 1.1754696725324172e-05, + "loss": 1.3451, + "step": 76599 + }, + { + "epoch": 0.92, + "grad_norm": 4.649754979656283, + "learning_rate": 1.175412160790983e-05, + "loss": 1.6449, + "step": 76602 + }, + { + "epoch": 0.92, + "grad_norm": 20.996180626981, + "learning_rate": 1.1753546484509306e-05, + "loss": 1.2772, + "step": 76605 + }, + { + "epoch": 0.92, + "grad_norm": 7.804953352335855, + "learning_rate": 1.1752971355124557e-05, + "loss": 1.3297, + "step": 76608 + }, + { + "epoch": 0.92, + "grad_norm": 16.888835089278746, + "learning_rate": 1.1752396219757547e-05, + "loss": 1.2349, + "step": 76611 + }, + { + "epoch": 0.92, + "grad_norm": 11.984239703857506, + "learning_rate": 1.1751821078410242e-05, + "loss": 1.3491, + "step": 76614 + }, + { + "epoch": 0.92, + "grad_norm": 33.5117581417912, + "learning_rate": 1.1751245931084601e-05, + "loss": 1.352, + "step": 76617 + }, + { + "epoch": 0.92, + "grad_norm": 6.872653107072385, + "learning_rate": 1.1750670777782588e-05, + "loss": 1.2953, + "step": 76620 + }, + { + "epoch": 0.92, + "grad_norm": 4.710024736424262, + "learning_rate": 1.1750095618506168e-05, + "loss": 1.6133, + "step": 76623 + }, + { + "epoch": 0.92, + "grad_norm": 13.069667956018982, + "learning_rate": 1.1749520453257304e-05, + "loss": 1.171, + "step": 76626 + }, + { + "epoch": 0.92, + "grad_norm": 8.104120771518346, + "learning_rate": 1.1748945282037953e-05, + "loss": 1.522, + "step": 76629 + }, + { + "epoch": 0.92, + "grad_norm": 7.183001899141741, + "learning_rate": 1.1748370104850082e-05, + "loss": 1.4824, + "step": 76632 + }, + { + "epoch": 0.92, + "grad_norm": 9.57364463218817, + "learning_rate": 1.1747794921695657e-05, + "loss": 1.5216, + "step": 76635 + }, + { + "epoch": 0.92, + "grad_norm": 5.157221811982693, + "learning_rate": 1.1747219732576636e-05, + "loss": 1.6303, + "step": 76638 + }, + { + "epoch": 0.92, + "grad_norm": 22.787355014301365, + "learning_rate": 1.174664453749498e-05, + "loss": 1.3206, + "step": 76641 + }, + { + "epoch": 0.92, + "grad_norm": 10.078883330366459, + "learning_rate": 1.1746069336452657e-05, + "loss": 0.9617, + "step": 76644 + }, + { + "epoch": 0.92, + "grad_norm": 11.029547435915589, + "learning_rate": 1.1745494129451632e-05, + "loss": 1.3792, + "step": 76647 + }, + { + "epoch": 0.92, + "grad_norm": 6.924411335349225, + "learning_rate": 1.174491891649386e-05, + "loss": 1.3398, + "step": 76650 + }, + { + "epoch": 0.92, + "grad_norm": 19.1138245439745, + "learning_rate": 1.174434369758131e-05, + "loss": 1.3145, + "step": 76653 + }, + { + "epoch": 0.92, + "grad_norm": 25.977533469173153, + "learning_rate": 1.1743768472715942e-05, + "loss": 1.4154, + "step": 76656 + }, + { + "epoch": 0.92, + "grad_norm": 6.868165256613846, + "learning_rate": 1.1743193241899723e-05, + "loss": 1.6469, + "step": 76659 + }, + { + "epoch": 0.92, + "grad_norm": 13.670508843735835, + "learning_rate": 1.174261800513461e-05, + "loss": 1.2957, + "step": 76662 + }, + { + "epoch": 0.92, + "grad_norm": 12.137775960018846, + "learning_rate": 1.1742042762422572e-05, + "loss": 1.245, + "step": 76665 + }, + { + "epoch": 0.92, + "grad_norm": 3.2812539262250437, + "learning_rate": 1.174146751376557e-05, + "loss": 1.2612, + "step": 76668 + }, + { + "epoch": 0.92, + "grad_norm": 19.588106767984065, + "learning_rate": 1.1740892259165562e-05, + "loss": 1.3938, + "step": 76671 + }, + { + "epoch": 0.92, + "grad_norm": 5.257931408104107, + "learning_rate": 1.1740316998624518e-05, + "loss": 1.3787, + "step": 76674 + }, + { + "epoch": 0.92, + "grad_norm": 10.78935187335044, + "learning_rate": 1.1739741732144401e-05, + "loss": 1.127, + "step": 76677 + }, + { + "epoch": 0.92, + "grad_norm": 6.359365067866678, + "learning_rate": 1.1739166459727174e-05, + "loss": 1.4791, + "step": 76680 + }, + { + "epoch": 0.92, + "grad_norm": 8.397635445465813, + "learning_rate": 1.1738591181374794e-05, + "loss": 1.239, + "step": 76683 + }, + { + "epoch": 0.92, + "grad_norm": 12.280203016501734, + "learning_rate": 1.173801589708923e-05, + "loss": 1.5503, + "step": 76686 + }, + { + "epoch": 0.92, + "grad_norm": 8.846295713852303, + "learning_rate": 1.1737440606872444e-05, + "loss": 1.2512, + "step": 76689 + }, + { + "epoch": 0.92, + "grad_norm": 15.05398251495617, + "learning_rate": 1.17368653107264e-05, + "loss": 1.2371, + "step": 76692 + }, + { + "epoch": 0.92, + "grad_norm": 9.504609833359769, + "learning_rate": 1.1736290008653057e-05, + "loss": 1.2228, + "step": 76695 + }, + { + "epoch": 0.92, + "grad_norm": 10.80249222205036, + "learning_rate": 1.1735714700654382e-05, + "loss": 1.6996, + "step": 76698 + }, + { + "epoch": 0.92, + "grad_norm": 7.363802798503267, + "learning_rate": 1.1735139386732342e-05, + "loss": 1.2047, + "step": 76701 + }, + { + "epoch": 0.92, + "grad_norm": 11.126718288077935, + "learning_rate": 1.1734564066888893e-05, + "loss": 1.3089, + "step": 76704 + }, + { + "epoch": 0.92, + "grad_norm": 20.81979562340559, + "learning_rate": 1.1733988741126004e-05, + "loss": 1.4891, + "step": 76707 + }, + { + "epoch": 0.92, + "grad_norm": 3.255993705842156, + "learning_rate": 1.1733413409445636e-05, + "loss": 1.0772, + "step": 76710 + }, + { + "epoch": 0.92, + "grad_norm": 3.774678755590187, + "learning_rate": 1.173283807184975e-05, + "loss": 1.3013, + "step": 76713 + }, + { + "epoch": 0.92, + "grad_norm": 23.97373729779271, + "learning_rate": 1.1732262728340311e-05, + "loss": 1.1577, + "step": 76716 + }, + { + "epoch": 0.92, + "grad_norm": 5.267520774660001, + "learning_rate": 1.1731687378919285e-05, + "loss": 1.5934, + "step": 76719 + }, + { + "epoch": 0.92, + "grad_norm": 17.727097162590262, + "learning_rate": 1.1731112023588636e-05, + "loss": 1.7075, + "step": 76722 + }, + { + "epoch": 0.92, + "grad_norm": 91.91888352891185, + "learning_rate": 1.1730536662350321e-05, + "loss": 1.371, + "step": 76725 + }, + { + "epoch": 0.92, + "grad_norm": 13.740888409401434, + "learning_rate": 1.172996129520631e-05, + "loss": 1.748, + "step": 76728 + }, + { + "epoch": 0.92, + "grad_norm": 8.226475446930014, + "learning_rate": 1.1729385922158563e-05, + "loss": 1.6604, + "step": 76731 + }, + { + "epoch": 0.92, + "grad_norm": 22.60449012462593, + "learning_rate": 1.1728810543209045e-05, + "loss": 1.2699, + "step": 76734 + }, + { + "epoch": 0.92, + "grad_norm": 57.83303289839631, + "learning_rate": 1.172823515835972e-05, + "loss": 1.8008, + "step": 76737 + }, + { + "epoch": 0.92, + "grad_norm": 18.197508777110748, + "learning_rate": 1.1727659767612552e-05, + "loss": 1.1854, + "step": 76740 + }, + { + "epoch": 0.92, + "grad_norm": 4.664625990802745, + "learning_rate": 1.1727084370969504e-05, + "loss": 1.2913, + "step": 76743 + }, + { + "epoch": 0.92, + "grad_norm": 10.868808252581166, + "learning_rate": 1.1726508968432537e-05, + "loss": 1.0884, + "step": 76746 + }, + { + "epoch": 0.92, + "grad_norm": 6.4108746550997315, + "learning_rate": 1.1725933560003615e-05, + "loss": 1.1971, + "step": 76749 + }, + { + "epoch": 0.92, + "grad_norm": 3.7596299468103105, + "learning_rate": 1.1725358145684706e-05, + "loss": 1.4052, + "step": 76752 + }, + { + "epoch": 0.92, + "grad_norm": 5.08064880294589, + "learning_rate": 1.1724782725477772e-05, + "loss": 1.1356, + "step": 76755 + }, + { + "epoch": 0.92, + "grad_norm": 13.628544568878855, + "learning_rate": 1.1724207299384774e-05, + "loss": 1.1908, + "step": 76758 + }, + { + "epoch": 0.92, + "grad_norm": 4.406986640641216, + "learning_rate": 1.1723631867407678e-05, + "loss": 1.0376, + "step": 76761 + }, + { + "epoch": 0.92, + "grad_norm": 14.573987196708666, + "learning_rate": 1.172305642954845e-05, + "loss": 1.4108, + "step": 76764 + }, + { + "epoch": 0.92, + "grad_norm": 12.048586510751958, + "learning_rate": 1.1722480985809049e-05, + "loss": 1.6092, + "step": 76767 + }, + { + "epoch": 0.92, + "grad_norm": 18.499592575018863, + "learning_rate": 1.1721905536191438e-05, + "loss": 1.2182, + "step": 76770 + }, + { + "epoch": 0.92, + "grad_norm": 9.881462713890077, + "learning_rate": 1.1721330080697586e-05, + "loss": 1.3711, + "step": 76773 + }, + { + "epoch": 0.92, + "grad_norm": 7.120847062135313, + "learning_rate": 1.1720754619329455e-05, + "loss": 1.8476, + "step": 76776 + }, + { + "epoch": 0.92, + "grad_norm": 2.665061218521194, + "learning_rate": 1.1720179152089007e-05, + "loss": 1.2432, + "step": 76779 + }, + { + "epoch": 0.92, + "grad_norm": 20.96153954257949, + "learning_rate": 1.1719603678978209e-05, + "loss": 1.497, + "step": 76782 + }, + { + "epoch": 0.92, + "grad_norm": 6.482281027810728, + "learning_rate": 1.1719028199999021e-05, + "loss": 1.1816, + "step": 76785 + }, + { + "epoch": 0.92, + "grad_norm": 20.65893790006452, + "learning_rate": 1.1718452715153412e-05, + "loss": 1.0878, + "step": 76788 + }, + { + "epoch": 0.92, + "grad_norm": 5.622556882247135, + "learning_rate": 1.1717877224443338e-05, + "loss": 1.7282, + "step": 76791 + }, + { + "epoch": 0.92, + "grad_norm": 3.571287547795154, + "learning_rate": 1.1717301727870772e-05, + "loss": 1.1599, + "step": 76794 + }, + { + "epoch": 0.92, + "grad_norm": 17.024713985159504, + "learning_rate": 1.1716726225437674e-05, + "loss": 1.4407, + "step": 76797 + }, + { + "epoch": 0.92, + "grad_norm": 8.607110402932692, + "learning_rate": 1.1716150717146006e-05, + "loss": 1.2111, + "step": 76800 + }, + { + "epoch": 0.92, + "grad_norm": 7.9082165421406305, + "learning_rate": 1.1715575202997731e-05, + "loss": 1.5543, + "step": 76803 + }, + { + "epoch": 0.92, + "grad_norm": 2.793934258888997, + "learning_rate": 1.1714999682994822e-05, + "loss": 0.8934, + "step": 76806 + }, + { + "epoch": 0.92, + "grad_norm": 53.810313151028936, + "learning_rate": 1.1714424157139233e-05, + "loss": 1.2121, + "step": 76809 + }, + { + "epoch": 0.92, + "grad_norm": 7.324786043495891, + "learning_rate": 1.171384862543293e-05, + "loss": 1.1182, + "step": 76812 + }, + { + "epoch": 0.92, + "grad_norm": 27.79040195430007, + "learning_rate": 1.171327308787788e-05, + "loss": 1.3681, + "step": 76815 + }, + { + "epoch": 0.92, + "grad_norm": 10.7526076135439, + "learning_rate": 1.171269754447605e-05, + "loss": 1.2545, + "step": 76818 + }, + { + "epoch": 0.92, + "grad_norm": 15.764057045764162, + "learning_rate": 1.1712121995229398e-05, + "loss": 1.2455, + "step": 76821 + }, + { + "epoch": 0.92, + "grad_norm": 9.13037076549354, + "learning_rate": 1.1711546440139889e-05, + "loss": 1.6291, + "step": 76824 + }, + { + "epoch": 0.92, + "grad_norm": 5.0444702118615234, + "learning_rate": 1.1710970879209489e-05, + "loss": 1.476, + "step": 76827 + }, + { + "epoch": 0.92, + "grad_norm": 51.108768876099255, + "learning_rate": 1.1710395312440163e-05, + "loss": 1.3875, + "step": 76830 + }, + { + "epoch": 0.92, + "grad_norm": 13.674393721907686, + "learning_rate": 1.1709819739833872e-05, + "loss": 1.4126, + "step": 76833 + }, + { + "epoch": 0.92, + "grad_norm": 43.92811042964104, + "learning_rate": 1.1709244161392584e-05, + "loss": 1.1676, + "step": 76836 + }, + { + "epoch": 0.92, + "grad_norm": 15.671909372413792, + "learning_rate": 1.1708668577118261e-05, + "loss": 1.2315, + "step": 76839 + }, + { + "epoch": 0.92, + "grad_norm": 8.199830649456468, + "learning_rate": 1.1708092987012867e-05, + "loss": 1.665, + "step": 76842 + }, + { + "epoch": 0.92, + "grad_norm": 15.835249467844271, + "learning_rate": 1.1707517391078365e-05, + "loss": 1.2917, + "step": 76845 + }, + { + "epoch": 0.92, + "grad_norm": 6.63307519741678, + "learning_rate": 1.1706941789316724e-05, + "loss": 1.1611, + "step": 76848 + }, + { + "epoch": 0.92, + "grad_norm": 10.842306587207194, + "learning_rate": 1.1706366181729905e-05, + "loss": 1.2692, + "step": 76851 + }, + { + "epoch": 0.92, + "grad_norm": 4.021355928178205, + "learning_rate": 1.1705790568319873e-05, + "loss": 1.4762, + "step": 76854 + }, + { + "epoch": 0.92, + "grad_norm": 3.0972318172404556, + "learning_rate": 1.170521494908859e-05, + "loss": 1.0335, + "step": 76857 + }, + { + "epoch": 0.92, + "grad_norm": 6.991660024196468, + "learning_rate": 1.1704639324038025e-05, + "loss": 1.1351, + "step": 76860 + }, + { + "epoch": 0.92, + "grad_norm": 5.614649404967022, + "learning_rate": 1.170406369317014e-05, + "loss": 1.5227, + "step": 76863 + }, + { + "epoch": 0.92, + "grad_norm": 8.214869879297696, + "learning_rate": 1.1703488056486899e-05, + "loss": 1.7675, + "step": 76866 + }, + { + "epoch": 0.92, + "grad_norm": 33.75380920471971, + "learning_rate": 1.1702912413990266e-05, + "loss": 0.8986, + "step": 76869 + }, + { + "epoch": 0.92, + "grad_norm": 2.981249875082057, + "learning_rate": 1.1702336765682207e-05, + "loss": 1.3652, + "step": 76872 + }, + { + "epoch": 0.92, + "grad_norm": 12.416473588343692, + "learning_rate": 1.1701761111564687e-05, + "loss": 0.9406, + "step": 76875 + }, + { + "epoch": 0.92, + "grad_norm": 20.541180039899658, + "learning_rate": 1.1701185451639665e-05, + "loss": 1.2524, + "step": 76878 + }, + { + "epoch": 0.92, + "grad_norm": 9.243950629347914, + "learning_rate": 1.1700609785909114e-05, + "loss": 1.2855, + "step": 76881 + }, + { + "epoch": 0.92, + "grad_norm": 8.262367051036732, + "learning_rate": 1.1700034114374994e-05, + "loss": 1.584, + "step": 76884 + }, + { + "epoch": 0.92, + "grad_norm": 5.996607125639224, + "learning_rate": 1.1699458437039265e-05, + "loss": 1.3675, + "step": 76887 + }, + { + "epoch": 0.92, + "grad_norm": 25.05738757459179, + "learning_rate": 1.1698882753903904e-05, + "loss": 1.4646, + "step": 76890 + }, + { + "epoch": 0.92, + "grad_norm": 10.975127844081333, + "learning_rate": 1.1698307064970865e-05, + "loss": 1.2755, + "step": 76893 + }, + { + "epoch": 0.92, + "grad_norm": 4.236207804750401, + "learning_rate": 1.1697731370242116e-05, + "loss": 1.2474, + "step": 76896 + }, + { + "epoch": 0.92, + "grad_norm": 14.466257583590453, + "learning_rate": 1.169715566971962e-05, + "loss": 1.4965, + "step": 76899 + }, + { + "epoch": 0.92, + "grad_norm": 44.1472825189965, + "learning_rate": 1.1696579963405344e-05, + "loss": 1.5424, + "step": 76902 + }, + { + "epoch": 0.92, + "grad_norm": 11.789442542816511, + "learning_rate": 1.1696004251301252e-05, + "loss": 1.4075, + "step": 76905 + }, + { + "epoch": 0.92, + "grad_norm": 14.311435697895492, + "learning_rate": 1.1695428533409308e-05, + "loss": 1.2126, + "step": 76908 + }, + { + "epoch": 0.92, + "grad_norm": 5.595925016744858, + "learning_rate": 1.1694852809731474e-05, + "loss": 1.1499, + "step": 76911 + }, + { + "epoch": 0.92, + "grad_norm": 47.42288555843983, + "learning_rate": 1.1694277080269722e-05, + "loss": 1.4082, + "step": 76914 + }, + { + "epoch": 0.92, + "grad_norm": 3.680999069941543, + "learning_rate": 1.1693701345026012e-05, + "loss": 1.2608, + "step": 76917 + }, + { + "epoch": 0.92, + "grad_norm": 2.839410253096357, + "learning_rate": 1.1693125604002307e-05, + "loss": 1.4399, + "step": 76920 + }, + { + "epoch": 0.92, + "grad_norm": 9.717424200337515, + "learning_rate": 1.1692549857200578e-05, + "loss": 1.5305, + "step": 76923 + }, + { + "epoch": 0.93, + "grad_norm": 7.425720482833185, + "learning_rate": 1.1691974104622784e-05, + "loss": 1.4099, + "step": 76926 + }, + { + "epoch": 0.93, + "grad_norm": 11.632382547056245, + "learning_rate": 1.1691398346270893e-05, + "loss": 1.3729, + "step": 76929 + }, + { + "epoch": 0.93, + "grad_norm": 34.985189906579244, + "learning_rate": 1.1690822582146866e-05, + "loss": 1.5432, + "step": 76932 + }, + { + "epoch": 0.93, + "grad_norm": 9.840815643367627, + "learning_rate": 1.169024681225267e-05, + "loss": 1.3078, + "step": 76935 + }, + { + "epoch": 0.93, + "grad_norm": 5.4436475885643665, + "learning_rate": 1.1689671036590273e-05, + "loss": 1.0137, + "step": 76938 + }, + { + "epoch": 0.93, + "grad_norm": 7.70642740002137, + "learning_rate": 1.1689095255161636e-05, + "loss": 1.4802, + "step": 76941 + }, + { + "epoch": 0.93, + "grad_norm": 22.887639155646042, + "learning_rate": 1.1688519467968726e-05, + "loss": 1.7968, + "step": 76944 + }, + { + "epoch": 0.93, + "grad_norm": 9.154923595576138, + "learning_rate": 1.1687943675013508e-05, + "loss": 1.1694, + "step": 76947 + }, + { + "epoch": 0.93, + "grad_norm": 9.91043077038839, + "learning_rate": 1.1687367876297948e-05, + "loss": 1.1637, + "step": 76950 + }, + { + "epoch": 0.93, + "grad_norm": 6.256887982408525, + "learning_rate": 1.1686792071824004e-05, + "loss": 1.3907, + "step": 76953 + }, + { + "epoch": 0.93, + "grad_norm": 11.491387241789093, + "learning_rate": 1.1686216261593651e-05, + "loss": 1.6268, + "step": 76956 + }, + { + "epoch": 0.93, + "grad_norm": 9.339677063519492, + "learning_rate": 1.1685640445608848e-05, + "loss": 1.1253, + "step": 76959 + }, + { + "epoch": 0.93, + "grad_norm": 3.2926664999445294, + "learning_rate": 1.168506462387156e-05, + "loss": 1.5362, + "step": 76962 + }, + { + "epoch": 0.93, + "grad_norm": 6.130245533442927, + "learning_rate": 1.1684488796383753e-05, + "loss": 1.3605, + "step": 76965 + }, + { + "epoch": 0.93, + "grad_norm": 4.257154374332889, + "learning_rate": 1.1683912963147395e-05, + "loss": 1.2827, + "step": 76968 + }, + { + "epoch": 0.93, + "grad_norm": 5.99832100364972, + "learning_rate": 1.1683337124164447e-05, + "loss": 1.3234, + "step": 76971 + }, + { + "epoch": 0.93, + "grad_norm": 6.969446746395116, + "learning_rate": 1.1682761279436873e-05, + "loss": 1.3236, + "step": 76974 + }, + { + "epoch": 0.93, + "grad_norm": 13.656504252534084, + "learning_rate": 1.1682185428966647e-05, + "loss": 0.9629, + "step": 76977 + }, + { + "epoch": 0.93, + "grad_norm": 7.561967972995368, + "learning_rate": 1.1681609572755723e-05, + "loss": 1.3993, + "step": 76980 + }, + { + "epoch": 0.93, + "grad_norm": 4.048072155209299, + "learning_rate": 1.1681033710806074e-05, + "loss": 0.9051, + "step": 76983 + }, + { + "epoch": 0.93, + "grad_norm": 11.966642009517003, + "learning_rate": 1.1680457843119659e-05, + "loss": 1.428, + "step": 76986 + }, + { + "epoch": 0.93, + "grad_norm": 8.042170745614673, + "learning_rate": 1.1679881969698452e-05, + "loss": 1.4643, + "step": 76989 + }, + { + "epoch": 0.93, + "grad_norm": 10.226995317488086, + "learning_rate": 1.1679306090544408e-05, + "loss": 1.3759, + "step": 76992 + }, + { + "epoch": 0.93, + "grad_norm": 8.983105422445869, + "learning_rate": 1.1678730205659499e-05, + "loss": 1.3169, + "step": 76995 + }, + { + "epoch": 0.93, + "grad_norm": 21.118692410547073, + "learning_rate": 1.1678154315045689e-05, + "loss": 1.0721, + "step": 76998 + }, + { + "epoch": 0.93, + "grad_norm": 4.554545766621562, + "learning_rate": 1.1677578418704945e-05, + "loss": 1.3123, + "step": 77001 + }, + { + "epoch": 0.93, + "grad_norm": 4.7572413935863604, + "learning_rate": 1.1677002516639226e-05, + "loss": 1.3505, + "step": 77004 + }, + { + "epoch": 0.93, + "grad_norm": 19.969825831096966, + "learning_rate": 1.1676426608850501e-05, + "loss": 1.1552, + "step": 77007 + }, + { + "epoch": 0.93, + "grad_norm": 35.883155812196414, + "learning_rate": 1.1675850695340742e-05, + "loss": 1.4981, + "step": 77010 + }, + { + "epoch": 0.93, + "grad_norm": 7.644920483139784, + "learning_rate": 1.1675274776111902e-05, + "loss": 1.5562, + "step": 77013 + }, + { + "epoch": 0.93, + "grad_norm": 68.04203812776703, + "learning_rate": 1.1674698851165958e-05, + "loss": 1.4742, + "step": 77016 + }, + { + "epoch": 0.93, + "grad_norm": 7.426018518740904, + "learning_rate": 1.1674122920504864e-05, + "loss": 1.6628, + "step": 77019 + }, + { + "epoch": 0.93, + "grad_norm": 9.728194427890356, + "learning_rate": 1.1673546984130595e-05, + "loss": 1.2749, + "step": 77022 + }, + { + "epoch": 0.93, + "grad_norm": 46.62540902784199, + "learning_rate": 1.1672971042045113e-05, + "loss": 1.5178, + "step": 77025 + }, + { + "epoch": 0.93, + "grad_norm": 17.678340748439286, + "learning_rate": 1.1672395094250381e-05, + "loss": 1.1314, + "step": 77028 + }, + { + "epoch": 0.93, + "grad_norm": 14.918880219035758, + "learning_rate": 1.167181914074837e-05, + "loss": 1.5556, + "step": 77031 + }, + { + "epoch": 0.93, + "grad_norm": 8.931861312031982, + "learning_rate": 1.1671243181541041e-05, + "loss": 1.6, + "step": 77034 + }, + { + "epoch": 0.93, + "grad_norm": 8.003527926230312, + "learning_rate": 1.1670667216630363e-05, + "loss": 1.6241, + "step": 77037 + }, + { + "epoch": 0.93, + "grad_norm": 12.49060015964158, + "learning_rate": 1.1670091246018296e-05, + "loss": 1.2136, + "step": 77040 + }, + { + "epoch": 0.93, + "grad_norm": 10.454920048776785, + "learning_rate": 1.1669515269706812e-05, + "loss": 1.418, + "step": 77043 + }, + { + "epoch": 0.93, + "grad_norm": 14.901190987666375, + "learning_rate": 1.166893928769787e-05, + "loss": 1.2964, + "step": 77046 + }, + { + "epoch": 0.93, + "grad_norm": 3.105147078932577, + "learning_rate": 1.166836329999344e-05, + "loss": 1.4535, + "step": 77049 + }, + { + "epoch": 0.93, + "grad_norm": 6.489017019601052, + "learning_rate": 1.1667787306595493e-05, + "loss": 1.4193, + "step": 77052 + }, + { + "epoch": 0.93, + "grad_norm": 8.479586479445693, + "learning_rate": 1.1667211307505983e-05, + "loss": 1.4869, + "step": 77055 + }, + { + "epoch": 0.93, + "grad_norm": 11.223870112328802, + "learning_rate": 1.1666635302726884e-05, + "loss": 0.9938, + "step": 77058 + }, + { + "epoch": 0.93, + "grad_norm": 4.7087220052216345, + "learning_rate": 1.1666059292260156e-05, + "loss": 1.1021, + "step": 77061 + }, + { + "epoch": 0.93, + "grad_norm": 13.836882836326467, + "learning_rate": 1.1665483276107774e-05, + "loss": 1.3312, + "step": 77064 + }, + { + "epoch": 0.93, + "grad_norm": 11.399570386697098, + "learning_rate": 1.1664907254271691e-05, + "loss": 1.4552, + "step": 77067 + }, + { + "epoch": 0.93, + "grad_norm": 12.602237871624233, + "learning_rate": 1.1664331226753882e-05, + "loss": 1.1921, + "step": 77070 + }, + { + "epoch": 0.93, + "grad_norm": 24.91543843765017, + "learning_rate": 1.1663755193556307e-05, + "loss": 1.3233, + "step": 77073 + }, + { + "epoch": 0.93, + "grad_norm": 26.44044428989655, + "learning_rate": 1.166317915468094e-05, + "loss": 1.0649, + "step": 77076 + }, + { + "epoch": 0.93, + "grad_norm": 6.369879804160333, + "learning_rate": 1.1662603110129737e-05, + "loss": 1.0857, + "step": 77079 + }, + { + "epoch": 0.93, + "grad_norm": 2.7486638876636453, + "learning_rate": 1.166202705990467e-05, + "loss": 1.0923, + "step": 77082 + }, + { + "epoch": 0.93, + "grad_norm": 10.074783030104545, + "learning_rate": 1.1661451004007707e-05, + "loss": 1.2055, + "step": 77085 + }, + { + "epoch": 0.93, + "grad_norm": 30.575702076779024, + "learning_rate": 1.1660874942440805e-05, + "loss": 1.1794, + "step": 77088 + }, + { + "epoch": 0.93, + "grad_norm": 19.717297201937082, + "learning_rate": 1.1660298875205936e-05, + "loss": 1.0604, + "step": 77091 + }, + { + "epoch": 0.93, + "grad_norm": 26.54528971697669, + "learning_rate": 1.1659722802305064e-05, + "loss": 1.2175, + "step": 77094 + }, + { + "epoch": 0.93, + "grad_norm": 8.638829765836176, + "learning_rate": 1.165914672374016e-05, + "loss": 1.4317, + "step": 77097 + }, + { + "epoch": 0.93, + "grad_norm": 13.036281263968919, + "learning_rate": 1.1658570639513182e-05, + "loss": 0.9863, + "step": 77100 + }, + { + "epoch": 0.93, + "grad_norm": 4.657628614097729, + "learning_rate": 1.1657994549626101e-05, + "loss": 1.701, + "step": 77103 + }, + { + "epoch": 0.93, + "grad_norm": 17.324767744310318, + "learning_rate": 1.1657418454080884e-05, + "loss": 1.5786, + "step": 77106 + }, + { + "epoch": 0.93, + "grad_norm": 15.84235986824571, + "learning_rate": 1.1656842352879494e-05, + "loss": 1.663, + "step": 77109 + }, + { + "epoch": 0.93, + "grad_norm": 11.108648184709129, + "learning_rate": 1.1656266246023895e-05, + "loss": 1.1666, + "step": 77112 + }, + { + "epoch": 0.93, + "grad_norm": 23.65741934908886, + "learning_rate": 1.1655690133516057e-05, + "loss": 1.3451, + "step": 77115 + }, + { + "epoch": 0.93, + "grad_norm": 22.930420094566486, + "learning_rate": 1.1655114015357949e-05, + "loss": 1.2944, + "step": 77118 + }, + { + "epoch": 0.93, + "grad_norm": 33.26312599083179, + "learning_rate": 1.1654537891551528e-05, + "loss": 1.4317, + "step": 77121 + }, + { + "epoch": 0.93, + "grad_norm": 17.75671049658173, + "learning_rate": 1.1653961762098767e-05, + "loss": 1.5956, + "step": 77124 + }, + { + "epoch": 0.93, + "grad_norm": 8.231565144185867, + "learning_rate": 1.1653385627001633e-05, + "loss": 0.8457, + "step": 77127 + }, + { + "epoch": 0.93, + "grad_norm": 10.273733345718291, + "learning_rate": 1.1652809486262085e-05, + "loss": 1.3828, + "step": 77130 + }, + { + "epoch": 0.93, + "grad_norm": 4.915951839394378, + "learning_rate": 1.1652233339882092e-05, + "loss": 1.3163, + "step": 77133 + }, + { + "epoch": 0.93, + "grad_norm": 10.265522712786629, + "learning_rate": 1.1651657187863625e-05, + "loss": 1.1993, + "step": 77136 + }, + { + "epoch": 0.93, + "grad_norm": 25.917306852586055, + "learning_rate": 1.1651081030208651e-05, + "loss": 1.5005, + "step": 77139 + }, + { + "epoch": 0.93, + "grad_norm": 17.880713457628687, + "learning_rate": 1.1650504866919125e-05, + "loss": 1.3977, + "step": 77142 + }, + { + "epoch": 0.93, + "grad_norm": 12.933307366886984, + "learning_rate": 1.1649928697997024e-05, + "loss": 1.4061, + "step": 77145 + }, + { + "epoch": 0.93, + "grad_norm": 17.030680879119576, + "learning_rate": 1.164935252344431e-05, + "loss": 1.3888, + "step": 77148 + }, + { + "epoch": 0.93, + "grad_norm": 5.6342491777408945, + "learning_rate": 1.1648776343262952e-05, + "loss": 1.3104, + "step": 77151 + }, + { + "epoch": 0.93, + "grad_norm": 7.731538031361613, + "learning_rate": 1.164820015745491e-05, + "loss": 1.4372, + "step": 77154 + }, + { + "epoch": 0.93, + "grad_norm": 16.155135535726824, + "learning_rate": 1.1647623966022155e-05, + "loss": 1.3565, + "step": 77157 + }, + { + "epoch": 0.93, + "grad_norm": 16.24730241150902, + "learning_rate": 1.1647047768966658e-05, + "loss": 1.5382, + "step": 77160 + }, + { + "epoch": 0.93, + "grad_norm": 24.079973002902985, + "learning_rate": 1.1646471566290374e-05, + "loss": 1.3852, + "step": 77163 + }, + { + "epoch": 0.93, + "grad_norm": 5.1967971520358045, + "learning_rate": 1.1645895357995281e-05, + "loss": 1.272, + "step": 77166 + }, + { + "epoch": 0.93, + "grad_norm": 5.839122189344295, + "learning_rate": 1.1645319144083337e-05, + "loss": 1.4446, + "step": 77169 + }, + { + "epoch": 0.93, + "grad_norm": 9.719613306340662, + "learning_rate": 1.1644742924556512e-05, + "loss": 1.4773, + "step": 77172 + }, + { + "epoch": 0.93, + "grad_norm": 55.04167646836957, + "learning_rate": 1.164416669941677e-05, + "loss": 1.4835, + "step": 77175 + }, + { + "epoch": 0.93, + "grad_norm": 4.940407668543396, + "learning_rate": 1.1643590468666082e-05, + "loss": 1.6405, + "step": 77178 + }, + { + "epoch": 0.93, + "grad_norm": 6.93925840428645, + "learning_rate": 1.1643014232306411e-05, + "loss": 1.4262, + "step": 77181 + }, + { + "epoch": 0.93, + "grad_norm": 13.298246018787522, + "learning_rate": 1.1642437990339723e-05, + "loss": 1.2554, + "step": 77184 + }, + { + "epoch": 0.93, + "grad_norm": 4.757809960903234, + "learning_rate": 1.1641861742767985e-05, + "loss": 1.5181, + "step": 77187 + }, + { + "epoch": 0.93, + "grad_norm": 14.34854715848812, + "learning_rate": 1.1641285489593169e-05, + "loss": 1.321, + "step": 77190 + }, + { + "epoch": 0.93, + "grad_norm": 19.675603199388462, + "learning_rate": 1.1640709230817233e-05, + "loss": 1.1614, + "step": 77193 + }, + { + "epoch": 0.93, + "grad_norm": 4.8794747100366616, + "learning_rate": 1.1640132966442148e-05, + "loss": 0.9844, + "step": 77196 + }, + { + "epoch": 0.93, + "grad_norm": 15.5089580677881, + "learning_rate": 1.163955669646988e-05, + "loss": 1.2887, + "step": 77199 + }, + { + "epoch": 0.93, + "grad_norm": 9.0877218017704, + "learning_rate": 1.1638980420902395e-05, + "loss": 1.1829, + "step": 77202 + }, + { + "epoch": 0.93, + "grad_norm": 6.591109180333855, + "learning_rate": 1.1638404139741662e-05, + "loss": 1.019, + "step": 77205 + }, + { + "epoch": 0.93, + "grad_norm": 8.961080518238607, + "learning_rate": 1.1637827852989643e-05, + "loss": 1.3978, + "step": 77208 + }, + { + "epoch": 0.93, + "grad_norm": 18.234516236664344, + "learning_rate": 1.163725156064831e-05, + "loss": 1.1198, + "step": 77211 + }, + { + "epoch": 0.93, + "grad_norm": 23.26554427918974, + "learning_rate": 1.1636675262719628e-05, + "loss": 1.2605, + "step": 77214 + }, + { + "epoch": 0.93, + "grad_norm": 27.690339090396094, + "learning_rate": 1.163609895920556e-05, + "loss": 1.5694, + "step": 77217 + }, + { + "epoch": 0.93, + "grad_norm": 14.538239735215043, + "learning_rate": 1.1635522650108076e-05, + "loss": 1.3354, + "step": 77220 + }, + { + "epoch": 0.93, + "grad_norm": 7.605187564679401, + "learning_rate": 1.1634946335429142e-05, + "loss": 1.228, + "step": 77223 + }, + { + "epoch": 0.93, + "grad_norm": 10.65237808631693, + "learning_rate": 1.1634370015170729e-05, + "loss": 1.5578, + "step": 77226 + }, + { + "epoch": 0.93, + "grad_norm": 6.411882503255693, + "learning_rate": 1.1633793689334794e-05, + "loss": 1.5089, + "step": 77229 + }, + { + "epoch": 0.93, + "grad_norm": 17.356280759842143, + "learning_rate": 1.1633217357923315e-05, + "loss": 1.4676, + "step": 77232 + }, + { + "epoch": 0.93, + "grad_norm": 4.75571278632569, + "learning_rate": 1.163264102093825e-05, + "loss": 1.6252, + "step": 77235 + }, + { + "epoch": 0.93, + "grad_norm": 15.277518065109296, + "learning_rate": 1.1632064678381569e-05, + "loss": 1.3583, + "step": 77238 + }, + { + "epoch": 0.93, + "grad_norm": 20.391427602832145, + "learning_rate": 1.163148833025524e-05, + "loss": 1.2957, + "step": 77241 + }, + { + "epoch": 0.93, + "grad_norm": 21.03112117370075, + "learning_rate": 1.163091197656123e-05, + "loss": 1.2985, + "step": 77244 + }, + { + "epoch": 0.93, + "grad_norm": 6.677768879991374, + "learning_rate": 1.1630335617301505e-05, + "loss": 1.5183, + "step": 77247 + }, + { + "epoch": 0.93, + "grad_norm": 5.943204833105513, + "learning_rate": 1.162975925247803e-05, + "loss": 1.3053, + "step": 77250 + }, + { + "epoch": 0.93, + "grad_norm": 17.78624084258632, + "learning_rate": 1.1629182882092775e-05, + "loss": 1.6896, + "step": 77253 + }, + { + "epoch": 0.93, + "grad_norm": 24.12103607862217, + "learning_rate": 1.1628606506147706e-05, + "loss": 1.4284, + "step": 77256 + }, + { + "epoch": 0.93, + "grad_norm": 17.728248166413817, + "learning_rate": 1.162803012464479e-05, + "loss": 1.4385, + "step": 77259 + }, + { + "epoch": 0.93, + "grad_norm": 21.33287883643555, + "learning_rate": 1.1627453737585992e-05, + "loss": 1.3928, + "step": 77262 + }, + { + "epoch": 0.93, + "grad_norm": 9.647402678411472, + "learning_rate": 1.162687734497328e-05, + "loss": 1.384, + "step": 77265 + }, + { + "epoch": 0.93, + "grad_norm": 7.253579075312659, + "learning_rate": 1.1626300946808625e-05, + "loss": 1.3148, + "step": 77268 + }, + { + "epoch": 0.93, + "grad_norm": 13.471555605089502, + "learning_rate": 1.1625724543093985e-05, + "loss": 1.3413, + "step": 77271 + }, + { + "epoch": 0.93, + "grad_norm": 19.43546922865804, + "learning_rate": 1.1625148133831339e-05, + "loss": 1.4938, + "step": 77274 + }, + { + "epoch": 0.93, + "grad_norm": 12.681720460464721, + "learning_rate": 1.1624571719022643e-05, + "loss": 1.3693, + "step": 77277 + }, + { + "epoch": 0.93, + "grad_norm": 19.27454999487831, + "learning_rate": 1.1623995298669872e-05, + "loss": 1.5629, + "step": 77280 + }, + { + "epoch": 0.93, + "grad_norm": 14.706356011189124, + "learning_rate": 1.1623418872774987e-05, + "loss": 1.4589, + "step": 77283 + }, + { + "epoch": 0.93, + "grad_norm": 6.110069231535841, + "learning_rate": 1.1622842441339961e-05, + "loss": 1.388, + "step": 77286 + }, + { + "epoch": 0.93, + "grad_norm": 18.16223613822536, + "learning_rate": 1.1622266004366757e-05, + "loss": 1.2164, + "step": 77289 + }, + { + "epoch": 0.93, + "grad_norm": 6.381586700732483, + "learning_rate": 1.1621689561857343e-05, + "loss": 1.4622, + "step": 77292 + }, + { + "epoch": 0.93, + "grad_norm": 3.4883203741234716, + "learning_rate": 1.162111311381369e-05, + "loss": 1.3382, + "step": 77295 + }, + { + "epoch": 0.93, + "grad_norm": 25.295754786305995, + "learning_rate": 1.162053666023776e-05, + "loss": 1.4902, + "step": 77298 + }, + { + "epoch": 0.93, + "grad_norm": 37.34347946806139, + "learning_rate": 1.1619960201131522e-05, + "loss": 1.014, + "step": 77301 + }, + { + "epoch": 0.93, + "grad_norm": 8.515540039161788, + "learning_rate": 1.1619383736496941e-05, + "loss": 1.2866, + "step": 77304 + }, + { + "epoch": 0.93, + "grad_norm": 8.249254519276038, + "learning_rate": 1.1618807266335987e-05, + "loss": 1.5006, + "step": 77307 + }, + { + "epoch": 0.93, + "grad_norm": 11.309572494809174, + "learning_rate": 1.1618230790650632e-05, + "loss": 1.356, + "step": 77310 + }, + { + "epoch": 0.93, + "grad_norm": 9.077244281929934, + "learning_rate": 1.1617654309442835e-05, + "loss": 1.192, + "step": 77313 + }, + { + "epoch": 0.93, + "grad_norm": 8.718045034918777, + "learning_rate": 1.1617077822714562e-05, + "loss": 1.2388, + "step": 77316 + }, + { + "epoch": 0.93, + "grad_norm": 14.309024951464496, + "learning_rate": 1.161650133046779e-05, + "loss": 1.6112, + "step": 77319 + }, + { + "epoch": 0.93, + "grad_norm": 7.598123568791792, + "learning_rate": 1.161592483270448e-05, + "loss": 1.1785, + "step": 77322 + }, + { + "epoch": 0.93, + "grad_norm": 6.348704401072124, + "learning_rate": 1.1615348329426599e-05, + "loss": 1.174, + "step": 77325 + }, + { + "epoch": 0.93, + "grad_norm": 4.415543993812032, + "learning_rate": 1.161477182063612e-05, + "loss": 1.3479, + "step": 77328 + }, + { + "epoch": 0.93, + "grad_norm": 2.2761667279952356, + "learning_rate": 1.1614195306335003e-05, + "loss": 1.4544, + "step": 77331 + }, + { + "epoch": 0.93, + "grad_norm": 7.499290622061059, + "learning_rate": 1.161361878652522e-05, + "loss": 1.2223, + "step": 77334 + }, + { + "epoch": 0.93, + "grad_norm": 4.886384157847994, + "learning_rate": 1.1613042261208736e-05, + "loss": 1.2717, + "step": 77337 + }, + { + "epoch": 0.93, + "grad_norm": 21.277593725874596, + "learning_rate": 1.161246573038752e-05, + "loss": 1.0765, + "step": 77340 + }, + { + "epoch": 0.93, + "grad_norm": 6.699576129324728, + "learning_rate": 1.1611889194063544e-05, + "loss": 1.3186, + "step": 77343 + }, + { + "epoch": 0.93, + "grad_norm": 10.85074825368484, + "learning_rate": 1.1611312652238764e-05, + "loss": 1.3292, + "step": 77346 + }, + { + "epoch": 0.93, + "grad_norm": 4.525800856800069, + "learning_rate": 1.1610736104915157e-05, + "loss": 1.5397, + "step": 77349 + }, + { + "epoch": 0.93, + "grad_norm": 10.672589557103034, + "learning_rate": 1.1610159552094688e-05, + "loss": 0.9146, + "step": 77352 + }, + { + "epoch": 0.93, + "grad_norm": 8.231209880771955, + "learning_rate": 1.1609582993779327e-05, + "loss": 1.0552, + "step": 77355 + }, + { + "epoch": 0.93, + "grad_norm": 16.079136753954405, + "learning_rate": 1.1609006429971034e-05, + "loss": 0.9845, + "step": 77358 + }, + { + "epoch": 0.93, + "grad_norm": 17.774618830138547, + "learning_rate": 1.1608429860671785e-05, + "loss": 1.2763, + "step": 77361 + }, + { + "epoch": 0.93, + "grad_norm": 23.344748138576, + "learning_rate": 1.1607853285883546e-05, + "loss": 1.4799, + "step": 77364 + }, + { + "epoch": 0.93, + "grad_norm": 17.825667904259166, + "learning_rate": 1.1607276705608278e-05, + "loss": 1.4138, + "step": 77367 + }, + { + "epoch": 0.93, + "grad_norm": 9.36410632241091, + "learning_rate": 1.1606700119847956e-05, + "loss": 1.2448, + "step": 77370 + }, + { + "epoch": 0.93, + "grad_norm": 40.13051619051536, + "learning_rate": 1.1606123528604546e-05, + "loss": 1.5027, + "step": 77373 + }, + { + "epoch": 0.93, + "grad_norm": 11.163412027302476, + "learning_rate": 1.1605546931880014e-05, + "loss": 1.331, + "step": 77376 + }, + { + "epoch": 0.93, + "grad_norm": 19.278341876423788, + "learning_rate": 1.1604970329676329e-05, + "loss": 1.3001, + "step": 77379 + }, + { + "epoch": 0.93, + "grad_norm": 12.883445160093773, + "learning_rate": 1.1604393721995457e-05, + "loss": 1.3088, + "step": 77382 + }, + { + "epoch": 0.93, + "grad_norm": 46.96921468402046, + "learning_rate": 1.1603817108839369e-05, + "loss": 1.1709, + "step": 77385 + }, + { + "epoch": 0.93, + "grad_norm": 7.469952784940086, + "learning_rate": 1.160324049021003e-05, + "loss": 1.2715, + "step": 77388 + }, + { + "epoch": 0.93, + "grad_norm": 3.957027960379429, + "learning_rate": 1.1602663866109407e-05, + "loss": 1.1642, + "step": 77391 + }, + { + "epoch": 0.93, + "grad_norm": 13.236043128235007, + "learning_rate": 1.1602087236539473e-05, + "loss": 1.2208, + "step": 77394 + }, + { + "epoch": 0.93, + "grad_norm": 39.07121798902239, + "learning_rate": 1.160151060150219e-05, + "loss": 1.2153, + "step": 77397 + }, + { + "epoch": 0.93, + "grad_norm": 6.991560201162705, + "learning_rate": 1.1600933960999527e-05, + "loss": 1.2851, + "step": 77400 + }, + { + "epoch": 0.93, + "grad_norm": 15.249206418466947, + "learning_rate": 1.1600357315033456e-05, + "loss": 1.0724, + "step": 77403 + }, + { + "epoch": 0.93, + "grad_norm": 17.750932161827887, + "learning_rate": 1.1599780663605942e-05, + "loss": 1.2943, + "step": 77406 + }, + { + "epoch": 0.93, + "grad_norm": 23.378853756265432, + "learning_rate": 1.1599204006718952e-05, + "loss": 1.1547, + "step": 77409 + }, + { + "epoch": 0.93, + "grad_norm": 26.902567020905845, + "learning_rate": 1.1598627344374453e-05, + "loss": 1.137, + "step": 77412 + }, + { + "epoch": 0.93, + "grad_norm": 8.295777229867399, + "learning_rate": 1.1598050676574417e-05, + "loss": 1.1468, + "step": 77415 + }, + { + "epoch": 0.93, + "grad_norm": 17.045038290983, + "learning_rate": 1.1597474003320807e-05, + "loss": 1.2452, + "step": 77418 + }, + { + "epoch": 0.93, + "grad_norm": 16.000179014267285, + "learning_rate": 1.1596897324615596e-05, + "loss": 1.2906, + "step": 77421 + }, + { + "epoch": 0.93, + "grad_norm": 13.011313161739562, + "learning_rate": 1.159632064046075e-05, + "loss": 1.3168, + "step": 77424 + }, + { + "epoch": 0.93, + "grad_norm": 11.772722779513515, + "learning_rate": 1.1595743950858235e-05, + "loss": 1.4238, + "step": 77427 + }, + { + "epoch": 0.93, + "grad_norm": 11.565441844247069, + "learning_rate": 1.1595167255810023e-05, + "loss": 1.245, + "step": 77430 + }, + { + "epoch": 0.93, + "grad_norm": 7.431227498804948, + "learning_rate": 1.1594590555318075e-05, + "loss": 1.6851, + "step": 77433 + }, + { + "epoch": 0.93, + "grad_norm": 14.383805945784482, + "learning_rate": 1.1594013849384368e-05, + "loss": 1.3143, + "step": 77436 + }, + { + "epoch": 0.93, + "grad_norm": 2.5295780600630864, + "learning_rate": 1.1593437138010865e-05, + "loss": 1.4246, + "step": 77439 + }, + { + "epoch": 0.93, + "grad_norm": 9.292406379884032, + "learning_rate": 1.1592860421199536e-05, + "loss": 1.4879, + "step": 77442 + }, + { + "epoch": 0.93, + "grad_norm": 5.825179709344377, + "learning_rate": 1.1592283698952344e-05, + "loss": 1.0293, + "step": 77445 + }, + { + "epoch": 0.93, + "grad_norm": 10.9501434022309, + "learning_rate": 1.1591706971271269e-05, + "loss": 1.3617, + "step": 77448 + }, + { + "epoch": 0.93, + "grad_norm": 3.1041305229690552, + "learning_rate": 1.1591130238158264e-05, + "loss": 1.554, + "step": 77451 + }, + { + "epoch": 0.93, + "grad_norm": 14.161593583828818, + "learning_rate": 1.1590553499615305e-05, + "loss": 1.252, + "step": 77454 + }, + { + "epoch": 0.93, + "grad_norm": 9.400866595893339, + "learning_rate": 1.1589976755644364e-05, + "loss": 1.8064, + "step": 77457 + }, + { + "epoch": 0.93, + "grad_norm": 12.694433015894845, + "learning_rate": 1.1589400006247403e-05, + "loss": 1.3579, + "step": 77460 + }, + { + "epoch": 0.93, + "grad_norm": 6.093456311551937, + "learning_rate": 1.1588823251426393e-05, + "loss": 1.0971, + "step": 77463 + }, + { + "epoch": 0.93, + "grad_norm": 2.5533701137829965, + "learning_rate": 1.15882464911833e-05, + "loss": 1.3026, + "step": 77466 + }, + { + "epoch": 0.93, + "grad_norm": 11.27734843155401, + "learning_rate": 1.1587669725520097e-05, + "loss": 1.373, + "step": 77469 + }, + { + "epoch": 0.93, + "grad_norm": 7.210655988212734, + "learning_rate": 1.1587092954438744e-05, + "loss": 1.3888, + "step": 77472 + }, + { + "epoch": 0.93, + "grad_norm": 11.713478606859479, + "learning_rate": 1.1586516177941218e-05, + "loss": 1.4835, + "step": 77475 + }, + { + "epoch": 0.93, + "grad_norm": 7.310139466176368, + "learning_rate": 1.1585939396029482e-05, + "loss": 1.4228, + "step": 77478 + }, + { + "epoch": 0.93, + "grad_norm": 28.789634818146556, + "learning_rate": 1.1585362608705508e-05, + "loss": 1.4569, + "step": 77481 + }, + { + "epoch": 0.93, + "grad_norm": 2.4371807478051473, + "learning_rate": 1.1584785815971262e-05, + "loss": 1.4043, + "step": 77484 + }, + { + "epoch": 0.93, + "grad_norm": 11.582659848953291, + "learning_rate": 1.158420901782871e-05, + "loss": 1.5791, + "step": 77487 + }, + { + "epoch": 0.93, + "grad_norm": 23.583159758829556, + "learning_rate": 1.1583632214279828e-05, + "loss": 1.6195, + "step": 77490 + }, + { + "epoch": 0.93, + "grad_norm": 13.900481886595625, + "learning_rate": 1.1583055405326576e-05, + "loss": 1.3474, + "step": 77493 + }, + { + "epoch": 0.93, + "grad_norm": 4.790998396266763, + "learning_rate": 1.158247859097093e-05, + "loss": 1.186, + "step": 77496 + }, + { + "epoch": 0.93, + "grad_norm": 9.112411582958645, + "learning_rate": 1.158190177121485e-05, + "loss": 1.1133, + "step": 77499 + }, + { + "epoch": 0.93, + "grad_norm": 14.372892739944444, + "learning_rate": 1.1581324946060312e-05, + "loss": 1.019, + "step": 77502 + }, + { + "epoch": 0.93, + "grad_norm": 18.24425042830143, + "learning_rate": 1.1580748115509279e-05, + "loss": 1.6202, + "step": 77505 + }, + { + "epoch": 0.93, + "grad_norm": 9.524827468744352, + "learning_rate": 1.1580171279563723e-05, + "loss": 1.4719, + "step": 77508 + }, + { + "epoch": 0.93, + "grad_norm": 11.731387297786426, + "learning_rate": 1.1579594438225613e-05, + "loss": 1.2512, + "step": 77511 + }, + { + "epoch": 0.93, + "grad_norm": 7.782040006699373, + "learning_rate": 1.1579017591496916e-05, + "loss": 1.3455, + "step": 77514 + }, + { + "epoch": 0.93, + "grad_norm": 8.14788769104548, + "learning_rate": 1.15784407393796e-05, + "loss": 1.3534, + "step": 77517 + }, + { + "epoch": 0.93, + "grad_norm": 8.465694082813556, + "learning_rate": 1.1577863881875632e-05, + "loss": 1.155, + "step": 77520 + }, + { + "epoch": 0.93, + "grad_norm": 10.04233270632943, + "learning_rate": 1.1577287018986988e-05, + "loss": 1.3561, + "step": 77523 + }, + { + "epoch": 0.93, + "grad_norm": 9.097466182271644, + "learning_rate": 1.157671015071563e-05, + "loss": 1.2444, + "step": 77526 + }, + { + "epoch": 0.93, + "grad_norm": 5.114968037864007, + "learning_rate": 1.1576133277063526e-05, + "loss": 1.1018, + "step": 77529 + }, + { + "epoch": 0.93, + "grad_norm": 8.446069779962682, + "learning_rate": 1.1575556398032647e-05, + "loss": 1.1897, + "step": 77532 + }, + { + "epoch": 0.93, + "grad_norm": 8.004858267209576, + "learning_rate": 1.1574979513624963e-05, + "loss": 1.2985, + "step": 77535 + }, + { + "epoch": 0.93, + "grad_norm": 6.460351170039702, + "learning_rate": 1.157440262384244e-05, + "loss": 1.084, + "step": 77538 + }, + { + "epoch": 0.93, + "grad_norm": 10.306695513765755, + "learning_rate": 1.1573825728687046e-05, + "loss": 1.5352, + "step": 77541 + }, + { + "epoch": 0.93, + "grad_norm": 8.214881839998043, + "learning_rate": 1.157324882816076e-05, + "loss": 1.4751, + "step": 77544 + }, + { + "epoch": 0.93, + "grad_norm": 23.00821073770805, + "learning_rate": 1.1572671922265532e-05, + "loss": 1.3271, + "step": 77547 + }, + { + "epoch": 0.93, + "grad_norm": 9.75975247650397, + "learning_rate": 1.1572095011003348e-05, + "loss": 1.383, + "step": 77550 + }, + { + "epoch": 0.93, + "grad_norm": 19.913273092158022, + "learning_rate": 1.1571518094376166e-05, + "loss": 1.2448, + "step": 77553 + }, + { + "epoch": 0.93, + "grad_norm": 11.594741947933162, + "learning_rate": 1.1570941172385964e-05, + "loss": 1.0686, + "step": 77556 + }, + { + "epoch": 0.93, + "grad_norm": 12.683837899565598, + "learning_rate": 1.15703642450347e-05, + "loss": 1.3633, + "step": 77559 + }, + { + "epoch": 0.93, + "grad_norm": 9.807236926010287, + "learning_rate": 1.156978731232435e-05, + "loss": 1.383, + "step": 77562 + }, + { + "epoch": 0.93, + "grad_norm": 6.266773540849871, + "learning_rate": 1.1569210374256885e-05, + "loss": 1.5187, + "step": 77565 + }, + { + "epoch": 0.93, + "grad_norm": 24.00619502428843, + "learning_rate": 1.1568633430834264e-05, + "loss": 1.6752, + "step": 77568 + }, + { + "epoch": 0.93, + "grad_norm": 8.78142173815208, + "learning_rate": 1.1568056482058468e-05, + "loss": 1.5293, + "step": 77571 + }, + { + "epoch": 0.93, + "grad_norm": 7.8019789547275895, + "learning_rate": 1.1567479527931455e-05, + "loss": 1.3012, + "step": 77574 + }, + { + "epoch": 0.93, + "grad_norm": 6.882964943126112, + "learning_rate": 1.1566902568455205e-05, + "loss": 1.4213, + "step": 77577 + }, + { + "epoch": 0.93, + "grad_norm": 11.171263327836428, + "learning_rate": 1.1566325603631673e-05, + "loss": 1.3009, + "step": 77580 + }, + { + "epoch": 0.93, + "grad_norm": 14.087671181860015, + "learning_rate": 1.1565748633462844e-05, + "loss": 1.5815, + "step": 77583 + }, + { + "epoch": 0.93, + "grad_norm": 16.603829210155613, + "learning_rate": 1.1565171657950675e-05, + "loss": 1.1605, + "step": 77586 + }, + { + "epoch": 0.93, + "grad_norm": 11.5039287349933, + "learning_rate": 1.1564594677097136e-05, + "loss": 1.5033, + "step": 77589 + }, + { + "epoch": 0.93, + "grad_norm": 5.765589762207086, + "learning_rate": 1.1564017690904204e-05, + "loss": 1.4656, + "step": 77592 + }, + { + "epoch": 0.93, + "grad_norm": 20.792155377047806, + "learning_rate": 1.1563440699373838e-05, + "loss": 1.3845, + "step": 77595 + }, + { + "epoch": 0.93, + "grad_norm": 17.775176781928213, + "learning_rate": 1.1562863702508018e-05, + "loss": 1.4442, + "step": 77598 + }, + { + "epoch": 0.93, + "grad_norm": 4.496844470295612, + "learning_rate": 1.1562286700308705e-05, + "loss": 1.2714, + "step": 77601 + }, + { + "epoch": 0.93, + "grad_norm": 7.889807118862447, + "learning_rate": 1.1561709692777869e-05, + "loss": 0.929, + "step": 77604 + }, + { + "epoch": 0.93, + "grad_norm": 15.355186272296915, + "learning_rate": 1.1561132679917481e-05, + "loss": 1.4609, + "step": 77607 + }, + { + "epoch": 0.93, + "grad_norm": 65.26506309277416, + "learning_rate": 1.1560555661729509e-05, + "loss": 1.4621, + "step": 77610 + }, + { + "epoch": 0.93, + "grad_norm": 17.540044903458444, + "learning_rate": 1.1559978638215923e-05, + "loss": 1.4938, + "step": 77613 + }, + { + "epoch": 0.93, + "grad_norm": 27.310999310996092, + "learning_rate": 1.1559401609378688e-05, + "loss": 1.286, + "step": 77616 + }, + { + "epoch": 0.93, + "grad_norm": 11.322038601427, + "learning_rate": 1.1558824575219784e-05, + "loss": 1.5306, + "step": 77619 + }, + { + "epoch": 0.93, + "grad_norm": 22.316820324276993, + "learning_rate": 1.1558247535741169e-05, + "loss": 1.3843, + "step": 77622 + }, + { + "epoch": 0.93, + "grad_norm": 9.381614220717363, + "learning_rate": 1.1557670490944817e-05, + "loss": 1.2567, + "step": 77625 + }, + { + "epoch": 0.93, + "grad_norm": 32.02146629419398, + "learning_rate": 1.15570934408327e-05, + "loss": 1.345, + "step": 77628 + }, + { + "epoch": 0.93, + "grad_norm": 16.948115858846815, + "learning_rate": 1.155651638540678e-05, + "loss": 1.3757, + "step": 77631 + }, + { + "epoch": 0.93, + "grad_norm": 9.75735818956224, + "learning_rate": 1.155593932466903e-05, + "loss": 1.3303, + "step": 77634 + }, + { + "epoch": 0.93, + "grad_norm": 15.810528269673156, + "learning_rate": 1.1555362258621421e-05, + "loss": 1.411, + "step": 77637 + }, + { + "epoch": 0.93, + "grad_norm": 6.039123616771686, + "learning_rate": 1.155478518726592e-05, + "loss": 1.4315, + "step": 77640 + }, + { + "epoch": 0.93, + "grad_norm": 20.082604615379175, + "learning_rate": 1.1554208110604497e-05, + "loss": 1.2933, + "step": 77643 + }, + { + "epoch": 0.93, + "grad_norm": 5.1815729358251685, + "learning_rate": 1.1553631028639123e-05, + "loss": 1.4165, + "step": 77646 + }, + { + "epoch": 0.93, + "grad_norm": 4.310144914002116, + "learning_rate": 1.1553053941371766e-05, + "loss": 1.2934, + "step": 77649 + }, + { + "epoch": 0.93, + "grad_norm": 9.82800138147533, + "learning_rate": 1.1552476848804394e-05, + "loss": 1.3495, + "step": 77652 + }, + { + "epoch": 0.93, + "grad_norm": 8.582615352583534, + "learning_rate": 1.1551899750938978e-05, + "loss": 1.2456, + "step": 77655 + }, + { + "epoch": 0.93, + "grad_norm": 8.469274594950932, + "learning_rate": 1.1551322647777487e-05, + "loss": 1.285, + "step": 77658 + }, + { + "epoch": 0.93, + "grad_norm": 46.474430180165236, + "learning_rate": 1.1550745539321892e-05, + "loss": 1.478, + "step": 77661 + }, + { + "epoch": 0.93, + "grad_norm": 5.925033459977693, + "learning_rate": 1.155016842557416e-05, + "loss": 1.3559, + "step": 77664 + }, + { + "epoch": 0.93, + "grad_norm": 4.2043520987206255, + "learning_rate": 1.154959130653626e-05, + "loss": 1.4473, + "step": 77667 + }, + { + "epoch": 0.93, + "grad_norm": 12.995658079330124, + "learning_rate": 1.1549014182210162e-05, + "loss": 1.171, + "step": 77670 + }, + { + "epoch": 0.93, + "grad_norm": 7.270275822838659, + "learning_rate": 1.1548437052597843e-05, + "loss": 1.1797, + "step": 77673 + }, + { + "epoch": 0.93, + "grad_norm": 5.981687315817044, + "learning_rate": 1.1547859917701261e-05, + "loss": 1.077, + "step": 77676 + }, + { + "epoch": 0.93, + "grad_norm": 10.037836025312021, + "learning_rate": 1.1547282777522392e-05, + "loss": 1.3008, + "step": 77679 + }, + { + "epoch": 0.93, + "grad_norm": 9.047460094848557, + "learning_rate": 1.1546705632063205e-05, + "loss": 1.2472, + "step": 77682 + }, + { + "epoch": 0.93, + "grad_norm": 15.344090992299884, + "learning_rate": 1.1546128481325669e-05, + "loss": 1.2196, + "step": 77685 + }, + { + "epoch": 0.93, + "grad_norm": 10.107475496369508, + "learning_rate": 1.1545551325311751e-05, + "loss": 1.3464, + "step": 77688 + }, + { + "epoch": 0.93, + "grad_norm": 7.801510559344298, + "learning_rate": 1.1544974164023423e-05, + "loss": 1.2211, + "step": 77691 + }, + { + "epoch": 0.93, + "grad_norm": 12.633030521744212, + "learning_rate": 1.1544396997462656e-05, + "loss": 1.3366, + "step": 77694 + }, + { + "epoch": 0.93, + "grad_norm": 60.49938721059182, + "learning_rate": 1.154381982563142e-05, + "loss": 1.4224, + "step": 77697 + }, + { + "epoch": 0.93, + "grad_norm": 7.828329537083123, + "learning_rate": 1.154324264853168e-05, + "loss": 1.2806, + "step": 77700 + }, + { + "epoch": 0.93, + "grad_norm": 6.808192812321816, + "learning_rate": 1.154266546616541e-05, + "loss": 1.0821, + "step": 77703 + }, + { + "epoch": 0.93, + "grad_norm": 8.299530037486978, + "learning_rate": 1.154208827853458e-05, + "loss": 1.734, + "step": 77706 + }, + { + "epoch": 0.93, + "grad_norm": 14.063916886962849, + "learning_rate": 1.1541511085641155e-05, + "loss": 1.4692, + "step": 77709 + }, + { + "epoch": 0.93, + "grad_norm": 16.28058204918854, + "learning_rate": 1.154093388748711e-05, + "loss": 1.2421, + "step": 77712 + }, + { + "epoch": 0.93, + "grad_norm": 6.861034659533583, + "learning_rate": 1.1540356684074415e-05, + "loss": 1.0935, + "step": 77715 + }, + { + "epoch": 0.93, + "grad_norm": 21.528332116784913, + "learning_rate": 1.1539779475405033e-05, + "loss": 1.2472, + "step": 77718 + }, + { + "epoch": 0.93, + "grad_norm": 7.042346833398156, + "learning_rate": 1.153920226148094e-05, + "loss": 1.0552, + "step": 77721 + }, + { + "epoch": 0.93, + "grad_norm": 6.797331609401715, + "learning_rate": 1.1538625042304103e-05, + "loss": 1.42, + "step": 77724 + }, + { + "epoch": 0.93, + "grad_norm": 13.448609807872062, + "learning_rate": 1.1538047817876495e-05, + "loss": 1.4839, + "step": 77727 + }, + { + "epoch": 0.93, + "grad_norm": 12.06532162524222, + "learning_rate": 1.153747058820008e-05, + "loss": 1.2148, + "step": 77730 + }, + { + "epoch": 0.93, + "grad_norm": 11.054839572014933, + "learning_rate": 1.1536893353276836e-05, + "loss": 1.6284, + "step": 77733 + }, + { + "epoch": 0.93, + "grad_norm": 26.090622749366958, + "learning_rate": 1.1536316113108728e-05, + "loss": 1.4384, + "step": 77736 + }, + { + "epoch": 0.93, + "grad_norm": 64.86917005061227, + "learning_rate": 1.1535738867697725e-05, + "loss": 1.1604, + "step": 77739 + }, + { + "epoch": 0.93, + "grad_norm": 8.556105476412831, + "learning_rate": 1.1535161617045798e-05, + "loss": 1.2851, + "step": 77742 + }, + { + "epoch": 0.93, + "grad_norm": 4.5563480065450985, + "learning_rate": 1.1534584361154919e-05, + "loss": 1.2646, + "step": 77745 + }, + { + "epoch": 0.93, + "grad_norm": 25.73180477014578, + "learning_rate": 1.1534007100027057e-05, + "loss": 1.4726, + "step": 77748 + }, + { + "epoch": 0.93, + "grad_norm": 3.483510530365477, + "learning_rate": 1.1533429833664179e-05, + "loss": 1.3175, + "step": 77751 + }, + { + "epoch": 0.93, + "grad_norm": 6.180256326754811, + "learning_rate": 1.1532852562068258e-05, + "loss": 1.157, + "step": 77754 + }, + { + "epoch": 0.94, + "grad_norm": 4.41716212695526, + "learning_rate": 1.1532275285241263e-05, + "loss": 1.7243, + "step": 77757 + }, + { + "epoch": 0.94, + "grad_norm": 71.59053053086777, + "learning_rate": 1.1531698003185165e-05, + "loss": 1.2121, + "step": 77760 + }, + { + "epoch": 0.94, + "grad_norm": 24.489382758025343, + "learning_rate": 1.1531120715901934e-05, + "loss": 1.5232, + "step": 77763 + }, + { + "epoch": 0.94, + "grad_norm": 22.53487460496232, + "learning_rate": 1.1530543423393539e-05, + "loss": 1.0836, + "step": 77766 + }, + { + "epoch": 0.94, + "grad_norm": 4.275447965167067, + "learning_rate": 1.152996612566195e-05, + "loss": 1.1885, + "step": 77769 + }, + { + "epoch": 0.94, + "grad_norm": 3.036255161544036, + "learning_rate": 1.152938882270914e-05, + "loss": 1.3538, + "step": 77772 + }, + { + "epoch": 0.94, + "grad_norm": 8.630066837821111, + "learning_rate": 1.1528811514537073e-05, + "loss": 1.2347, + "step": 77775 + }, + { + "epoch": 0.94, + "grad_norm": 3.833758313497795, + "learning_rate": 1.1528234201147725e-05, + "loss": 1.3343, + "step": 77778 + }, + { + "epoch": 0.94, + "grad_norm": 21.382453047705035, + "learning_rate": 1.1527656882543067e-05, + "loss": 1.227, + "step": 77781 + }, + { + "epoch": 0.94, + "grad_norm": 16.58150305161152, + "learning_rate": 1.1527079558725061e-05, + "loss": 1.2094, + "step": 77784 + }, + { + "epoch": 0.94, + "grad_norm": 2.5267959972767144, + "learning_rate": 1.1526502229695687e-05, + "loss": 1.5076, + "step": 77787 + }, + { + "epoch": 0.94, + "grad_norm": 12.57270570377342, + "learning_rate": 1.1525924895456908e-05, + "loss": 1.1489, + "step": 77790 + }, + { + "epoch": 0.94, + "grad_norm": 10.022975132144225, + "learning_rate": 1.1525347556010697e-05, + "loss": 1.2307, + "step": 77793 + }, + { + "epoch": 0.94, + "grad_norm": 27.423603014190647, + "learning_rate": 1.1524770211359025e-05, + "loss": 1.1592, + "step": 77796 + }, + { + "epoch": 0.94, + "grad_norm": 8.416450759338256, + "learning_rate": 1.1524192861503862e-05, + "loss": 0.8103, + "step": 77799 + }, + { + "epoch": 0.94, + "grad_norm": 12.11168003947061, + "learning_rate": 1.1523615506447176e-05, + "loss": 1.2254, + "step": 77802 + }, + { + "epoch": 0.94, + "grad_norm": 6.879035349720384, + "learning_rate": 1.152303814619094e-05, + "loss": 1.3844, + "step": 77805 + }, + { + "epoch": 0.94, + "grad_norm": 14.063127188597084, + "learning_rate": 1.1522460780737124e-05, + "loss": 1.2797, + "step": 77808 + }, + { + "epoch": 0.94, + "grad_norm": 10.087404676730424, + "learning_rate": 1.1521883410087697e-05, + "loss": 1.6642, + "step": 77811 + }, + { + "epoch": 0.94, + "grad_norm": 13.462286867816285, + "learning_rate": 1.1521306034244633e-05, + "loss": 1.491, + "step": 77814 + }, + { + "epoch": 0.94, + "grad_norm": 13.001265612479656, + "learning_rate": 1.1520728653209894e-05, + "loss": 1.239, + "step": 77817 + }, + { + "epoch": 0.94, + "grad_norm": 8.457055855902995, + "learning_rate": 1.152015126698546e-05, + "loss": 1.541, + "step": 77820 + }, + { + "epoch": 0.94, + "grad_norm": 20.74260978317525, + "learning_rate": 1.1519573875573297e-05, + "loss": 1.5537, + "step": 77823 + }, + { + "epoch": 0.94, + "grad_norm": 9.081560833804078, + "learning_rate": 1.1518996478975375e-05, + "loss": 1.3844, + "step": 77826 + }, + { + "epoch": 0.94, + "grad_norm": 5.658296078790744, + "learning_rate": 1.1518419077193663e-05, + "loss": 1.601, + "step": 77829 + }, + { + "epoch": 0.94, + "grad_norm": 16.030438315114793, + "learning_rate": 1.1517841670230136e-05, + "loss": 1.2211, + "step": 77832 + }, + { + "epoch": 0.94, + "grad_norm": 5.431747493002514, + "learning_rate": 1.1517264258086764e-05, + "loss": 1.5537, + "step": 77835 + }, + { + "epoch": 0.94, + "grad_norm": 6.313269942377391, + "learning_rate": 1.1516686840765513e-05, + "loss": 1.5253, + "step": 77838 + }, + { + "epoch": 0.94, + "grad_norm": 19.066541344615565, + "learning_rate": 1.1516109418268355e-05, + "loss": 1.184, + "step": 77841 + }, + { + "epoch": 0.94, + "grad_norm": 4.005204548124792, + "learning_rate": 1.1515531990597264e-05, + "loss": 1.2123, + "step": 77844 + }, + { + "epoch": 0.94, + "grad_norm": 3.022574089605048, + "learning_rate": 1.1514954557754208e-05, + "loss": 1.2344, + "step": 77847 + }, + { + "epoch": 0.94, + "grad_norm": 8.466519365472505, + "learning_rate": 1.1514377119741156e-05, + "loss": 1.3463, + "step": 77850 + }, + { + "epoch": 0.94, + "grad_norm": 12.861985339853979, + "learning_rate": 1.1513799676560083e-05, + "loss": 1.264, + "step": 77853 + }, + { + "epoch": 0.94, + "grad_norm": 5.335151881877965, + "learning_rate": 1.1513222228212955e-05, + "loss": 1.3804, + "step": 77856 + }, + { + "epoch": 0.94, + "grad_norm": 5.945776501347113, + "learning_rate": 1.1512644774701742e-05, + "loss": 1.1759, + "step": 77859 + }, + { + "epoch": 0.94, + "grad_norm": 10.662323168631357, + "learning_rate": 1.1512067316028422e-05, + "loss": 1.4606, + "step": 77862 + }, + { + "epoch": 0.94, + "grad_norm": 13.499003094519471, + "learning_rate": 1.1511489852194959e-05, + "loss": 0.9456, + "step": 77865 + }, + { + "epoch": 0.94, + "grad_norm": 40.82711397664282, + "learning_rate": 1.1510912383203328e-05, + "loss": 1.1819, + "step": 77868 + }, + { + "epoch": 0.94, + "grad_norm": 15.894784565592552, + "learning_rate": 1.1510334909055492e-05, + "loss": 1.696, + "step": 77871 + }, + { + "epoch": 0.94, + "grad_norm": 9.755587065496593, + "learning_rate": 1.150975742975343e-05, + "loss": 1.4268, + "step": 77874 + }, + { + "epoch": 0.94, + "grad_norm": 4.206693435106879, + "learning_rate": 1.1509179945299112e-05, + "loss": 1.5066, + "step": 77877 + }, + { + "epoch": 0.94, + "grad_norm": 7.442884677007586, + "learning_rate": 1.1508602455694504e-05, + "loss": 1.282, + "step": 77880 + }, + { + "epoch": 0.94, + "grad_norm": 5.538541496357797, + "learning_rate": 1.1508024960941577e-05, + "loss": 1.1577, + "step": 77883 + }, + { + "epoch": 0.94, + "grad_norm": 10.728388172712336, + "learning_rate": 1.1507447461042308e-05, + "loss": 1.2669, + "step": 77886 + }, + { + "epoch": 0.94, + "grad_norm": 8.697092719129836, + "learning_rate": 1.1506869955998662e-05, + "loss": 1.5958, + "step": 77889 + }, + { + "epoch": 0.94, + "grad_norm": 4.290298176987197, + "learning_rate": 1.150629244581261e-05, + "loss": 1.3632, + "step": 77892 + }, + { + "epoch": 0.94, + "grad_norm": 18.08483318330058, + "learning_rate": 1.1505714930486127e-05, + "loss": 1.2032, + "step": 77895 + }, + { + "epoch": 0.94, + "grad_norm": 15.390505815994226, + "learning_rate": 1.1505137410021181e-05, + "loss": 1.6859, + "step": 77898 + }, + { + "epoch": 0.94, + "grad_norm": 6.445126156416963, + "learning_rate": 1.1504559884419744e-05, + "loss": 1.2224, + "step": 77901 + }, + { + "epoch": 0.94, + "grad_norm": 4.400036154581407, + "learning_rate": 1.1503982353683782e-05, + "loss": 1.2297, + "step": 77904 + }, + { + "epoch": 0.94, + "grad_norm": 8.78485202435493, + "learning_rate": 1.1503404817815276e-05, + "loss": 1.1712, + "step": 77907 + }, + { + "epoch": 0.94, + "grad_norm": 27.371075517214734, + "learning_rate": 1.1502827276816186e-05, + "loss": 1.4658, + "step": 77910 + }, + { + "epoch": 0.94, + "grad_norm": 8.575748012717064, + "learning_rate": 1.1502249730688488e-05, + "loss": 1.3455, + "step": 77913 + }, + { + "epoch": 0.94, + "grad_norm": 6.56478431555519, + "learning_rate": 1.1501672179434156e-05, + "loss": 1.6277, + "step": 77916 + }, + { + "epoch": 0.94, + "grad_norm": 7.09650900960737, + "learning_rate": 1.1501094623055155e-05, + "loss": 1.2409, + "step": 77919 + }, + { + "epoch": 0.94, + "grad_norm": 14.295579466261657, + "learning_rate": 1.150051706155346e-05, + "loss": 1.5147, + "step": 77922 + }, + { + "epoch": 0.94, + "grad_norm": 12.007615702301763, + "learning_rate": 1.1499939494931041e-05, + "loss": 1.6313, + "step": 77925 + }, + { + "epoch": 0.94, + "grad_norm": 6.830582566995112, + "learning_rate": 1.1499361923189866e-05, + "loss": 1.5014, + "step": 77928 + }, + { + "epoch": 0.94, + "grad_norm": 7.010191180616088, + "learning_rate": 1.1498784346331913e-05, + "loss": 1.4239, + "step": 77931 + }, + { + "epoch": 0.94, + "grad_norm": 31.989383251322735, + "learning_rate": 1.1498206764359148e-05, + "loss": 1.4571, + "step": 77934 + }, + { + "epoch": 0.94, + "grad_norm": 8.781529646155256, + "learning_rate": 1.149762917727354e-05, + "loss": 1.5152, + "step": 77937 + }, + { + "epoch": 0.94, + "grad_norm": 36.73263048101617, + "learning_rate": 1.1497051585077066e-05, + "loss": 1.3542, + "step": 77940 + }, + { + "epoch": 0.94, + "grad_norm": 16.658386591340673, + "learning_rate": 1.1496473987771692e-05, + "loss": 1.4457, + "step": 77943 + }, + { + "epoch": 0.94, + "grad_norm": 22.89360250435378, + "learning_rate": 1.1495896385359391e-05, + "loss": 1.3898, + "step": 77946 + }, + { + "epoch": 0.94, + "grad_norm": 17.670119821182958, + "learning_rate": 1.1495318777842137e-05, + "loss": 1.1337, + "step": 77949 + }, + { + "epoch": 0.94, + "grad_norm": 4.504334021562517, + "learning_rate": 1.1494741165221898e-05, + "loss": 1.2382, + "step": 77952 + }, + { + "epoch": 0.94, + "grad_norm": 27.66689887973172, + "learning_rate": 1.1494163547500645e-05, + "loss": 1.2952, + "step": 77955 + }, + { + "epoch": 0.94, + "grad_norm": 4.850345404131202, + "learning_rate": 1.1493585924680351e-05, + "loss": 1.1999, + "step": 77958 + }, + { + "epoch": 0.94, + "grad_norm": 17.94917281600128, + "learning_rate": 1.1493008296762986e-05, + "loss": 1.2024, + "step": 77961 + }, + { + "epoch": 0.94, + "grad_norm": 5.6011766780766274, + "learning_rate": 1.149243066375052e-05, + "loss": 1.5867, + "step": 77964 + }, + { + "epoch": 0.94, + "grad_norm": 4.165994425955852, + "learning_rate": 1.1491853025644926e-05, + "loss": 1.2227, + "step": 77967 + }, + { + "epoch": 0.94, + "grad_norm": 4.906360995042228, + "learning_rate": 1.1491275382448176e-05, + "loss": 1.4259, + "step": 77970 + }, + { + "epoch": 0.94, + "grad_norm": 8.289421285806341, + "learning_rate": 1.149069773416224e-05, + "loss": 1.4183, + "step": 77973 + }, + { + "epoch": 0.94, + "grad_norm": 8.10287409507072, + "learning_rate": 1.1490120080789091e-05, + "loss": 1.2963, + "step": 77976 + }, + { + "epoch": 0.94, + "grad_norm": 11.152883332043476, + "learning_rate": 1.1489542422330696e-05, + "loss": 1.2936, + "step": 77979 + }, + { + "epoch": 0.94, + "grad_norm": 30.22850237224158, + "learning_rate": 1.1488964758789032e-05, + "loss": 1.401, + "step": 77982 + }, + { + "epoch": 0.94, + "grad_norm": 13.923268349388053, + "learning_rate": 1.1488387090166064e-05, + "loss": 1.7226, + "step": 77985 + }, + { + "epoch": 0.94, + "grad_norm": 35.468180213349946, + "learning_rate": 1.148780941646377e-05, + "loss": 1.2386, + "step": 77988 + }, + { + "epoch": 0.94, + "grad_norm": 6.7073059721223895, + "learning_rate": 1.1487231737684116e-05, + "loss": 1.058, + "step": 77991 + }, + { + "epoch": 0.94, + "grad_norm": 10.286841316717457, + "learning_rate": 1.148665405382908e-05, + "loss": 1.3305, + "step": 77994 + }, + { + "epoch": 0.94, + "grad_norm": 15.792399914852819, + "learning_rate": 1.1486076364900628e-05, + "loss": 1.311, + "step": 77997 + }, + { + "epoch": 0.94, + "grad_norm": 8.396015183058145, + "learning_rate": 1.1485498670900728e-05, + "loss": 1.4868, + "step": 78000 + }, + { + "epoch": 0.94, + "grad_norm": 4.492819188919563, + "learning_rate": 1.1484920971831364e-05, + "loss": 1.2356, + "step": 78003 + }, + { + "epoch": 0.94, + "grad_norm": 7.981308346098126, + "learning_rate": 1.148434326769449e-05, + "loss": 1.186, + "step": 78006 + }, + { + "epoch": 0.94, + "grad_norm": 15.02198626523493, + "learning_rate": 1.1483765558492095e-05, + "loss": 1.6594, + "step": 78009 + }, + { + "epoch": 0.94, + "grad_norm": 9.176894947687241, + "learning_rate": 1.1483187844226137e-05, + "loss": 1.3587, + "step": 78012 + }, + { + "epoch": 0.94, + "grad_norm": 6.828099324397339, + "learning_rate": 1.14826101248986e-05, + "loss": 1.5294, + "step": 78015 + }, + { + "epoch": 0.94, + "grad_norm": 19.29037606074118, + "learning_rate": 1.1482032400511442e-05, + "loss": 1.5421, + "step": 78018 + }, + { + "epoch": 0.94, + "grad_norm": 7.173979665191071, + "learning_rate": 1.1481454671066643e-05, + "loss": 1.2733, + "step": 78021 + }, + { + "epoch": 0.94, + "grad_norm": 15.343067082752615, + "learning_rate": 1.1480876936566174e-05, + "loss": 1.3988, + "step": 78024 + }, + { + "epoch": 0.94, + "grad_norm": 11.818321877437578, + "learning_rate": 1.1480299197012003e-05, + "loss": 1.1983, + "step": 78027 + }, + { + "epoch": 0.94, + "grad_norm": 7.52761769862169, + "learning_rate": 1.147972145240611e-05, + "loss": 1.5064, + "step": 78030 + }, + { + "epoch": 0.94, + "grad_norm": 10.420020755531695, + "learning_rate": 1.1479143702750452e-05, + "loss": 1.3347, + "step": 78033 + }, + { + "epoch": 0.94, + "grad_norm": 4.201666764410366, + "learning_rate": 1.1478565948047018e-05, + "loss": 1.3513, + "step": 78036 + }, + { + "epoch": 0.94, + "grad_norm": 19.448034088609226, + "learning_rate": 1.1477988188297767e-05, + "loss": 1.43, + "step": 78039 + }, + { + "epoch": 0.94, + "grad_norm": 16.39029770398327, + "learning_rate": 1.1477410423504673e-05, + "loss": 1.4461, + "step": 78042 + }, + { + "epoch": 0.94, + "grad_norm": 7.756449099380436, + "learning_rate": 1.1476832653669712e-05, + "loss": 0.9554, + "step": 78045 + }, + { + "epoch": 0.94, + "grad_norm": 24.500970185840124, + "learning_rate": 1.147625487879485e-05, + "loss": 1.4778, + "step": 78048 + }, + { + "epoch": 0.94, + "grad_norm": 16.58972559343688, + "learning_rate": 1.1475677098882065e-05, + "loss": 1.4788, + "step": 78051 + }, + { + "epoch": 0.94, + "grad_norm": 17.66204954118651, + "learning_rate": 1.1475099313933324e-05, + "loss": 1.3023, + "step": 78054 + }, + { + "epoch": 0.94, + "grad_norm": 12.573626661460828, + "learning_rate": 1.1474521523950604e-05, + "loss": 1.2497, + "step": 78057 + }, + { + "epoch": 0.94, + "grad_norm": 6.217092012747682, + "learning_rate": 1.1473943728935868e-05, + "loss": 1.2707, + "step": 78060 + }, + { + "epoch": 0.94, + "grad_norm": 3.55484860300061, + "learning_rate": 1.1473365928891096e-05, + "loss": 1.2213, + "step": 78063 + }, + { + "epoch": 0.94, + "grad_norm": 21.915760554518496, + "learning_rate": 1.1472788123818257e-05, + "loss": 1.4949, + "step": 78066 + }, + { + "epoch": 0.94, + "grad_norm": 9.59991198504227, + "learning_rate": 1.147221031371932e-05, + "loss": 1.4222, + "step": 78069 + }, + { + "epoch": 0.94, + "grad_norm": 67.14194846634868, + "learning_rate": 1.147163249859626e-05, + "loss": 1.4588, + "step": 78072 + }, + { + "epoch": 0.94, + "grad_norm": 20.81238527584584, + "learning_rate": 1.1471054678451048e-05, + "loss": 1.4186, + "step": 78075 + }, + { + "epoch": 0.94, + "grad_norm": 9.060147119204531, + "learning_rate": 1.1470476853285662e-05, + "loss": 1.4643, + "step": 78078 + }, + { + "epoch": 0.94, + "grad_norm": 6.096913184567077, + "learning_rate": 1.1469899023102062e-05, + "loss": 1.428, + "step": 78081 + }, + { + "epoch": 0.94, + "grad_norm": 24.51418509685202, + "learning_rate": 1.1469321187902228e-05, + "loss": 1.0481, + "step": 78084 + }, + { + "epoch": 0.94, + "grad_norm": 10.228945092331395, + "learning_rate": 1.1468743347688128e-05, + "loss": 1.1705, + "step": 78087 + }, + { + "epoch": 0.94, + "grad_norm": 11.20845373240937, + "learning_rate": 1.146816550246174e-05, + "loss": 1.1669, + "step": 78090 + }, + { + "epoch": 0.94, + "grad_norm": 7.194602751860106, + "learning_rate": 1.146758765222503e-05, + "loss": 1.4262, + "step": 78093 + }, + { + "epoch": 0.94, + "grad_norm": 15.59312956222715, + "learning_rate": 1.1467009796979971e-05, + "loss": 1.4279, + "step": 78096 + }, + { + "epoch": 0.94, + "grad_norm": 7.532035450956601, + "learning_rate": 1.1466431936728536e-05, + "loss": 1.0557, + "step": 78099 + }, + { + "epoch": 0.94, + "grad_norm": 20.320217216719726, + "learning_rate": 1.1465854071472697e-05, + "loss": 1.3785, + "step": 78102 + }, + { + "epoch": 0.94, + "grad_norm": 42.680913889679566, + "learning_rate": 1.1465276201214428e-05, + "loss": 1.1746, + "step": 78105 + }, + { + "epoch": 0.94, + "grad_norm": 18.359008262976054, + "learning_rate": 1.1464698325955697e-05, + "loss": 1.3485, + "step": 78108 + }, + { + "epoch": 0.94, + "grad_norm": 15.640057329076324, + "learning_rate": 1.1464120445698482e-05, + "loss": 1.293, + "step": 78111 + }, + { + "epoch": 0.94, + "grad_norm": 11.20856351490486, + "learning_rate": 1.1463542560444746e-05, + "loss": 1.5238, + "step": 78114 + }, + { + "epoch": 0.94, + "grad_norm": 28.750450777718896, + "learning_rate": 1.1462964670196471e-05, + "loss": 1.0696, + "step": 78117 + }, + { + "epoch": 0.94, + "grad_norm": 11.719755455642446, + "learning_rate": 1.1462386774955623e-05, + "loss": 1.4656, + "step": 78120 + }, + { + "epoch": 0.94, + "grad_norm": 16.488653125331552, + "learning_rate": 1.1461808874724173e-05, + "loss": 1.2225, + "step": 78123 + }, + { + "epoch": 0.94, + "grad_norm": 23.998348685085205, + "learning_rate": 1.1461230969504098e-05, + "loss": 0.9473, + "step": 78126 + }, + { + "epoch": 0.94, + "grad_norm": 24.06289800862422, + "learning_rate": 1.1460653059297366e-05, + "loss": 1.5346, + "step": 78129 + }, + { + "epoch": 0.94, + "grad_norm": 22.829519157600657, + "learning_rate": 1.1460075144105958e-05, + "loss": 1.1467, + "step": 78132 + }, + { + "epoch": 0.94, + "grad_norm": 6.228183201875889, + "learning_rate": 1.1459497223931833e-05, + "loss": 1.4888, + "step": 78135 + }, + { + "epoch": 0.94, + "grad_norm": 12.414264103086918, + "learning_rate": 1.1458919298776972e-05, + "loss": 1.2637, + "step": 78138 + }, + { + "epoch": 0.94, + "grad_norm": 10.539753190347112, + "learning_rate": 1.1458341368643344e-05, + "loss": 1.6793, + "step": 78141 + }, + { + "epoch": 0.94, + "grad_norm": 7.557590984988952, + "learning_rate": 1.1457763433532922e-05, + "loss": 1.6097, + "step": 78144 + }, + { + "epoch": 0.94, + "grad_norm": 17.838700261871118, + "learning_rate": 1.1457185493447678e-05, + "loss": 1.4094, + "step": 78147 + }, + { + "epoch": 0.94, + "grad_norm": 3.1375892064741566, + "learning_rate": 1.1456607548389586e-05, + "loss": 1.2463, + "step": 78150 + }, + { + "epoch": 0.94, + "grad_norm": 9.187431619796964, + "learning_rate": 1.145602959836062e-05, + "loss": 1.7485, + "step": 78153 + }, + { + "epoch": 0.94, + "grad_norm": 6.1886578977830515, + "learning_rate": 1.1455451643362743e-05, + "loss": 1.5843, + "step": 78156 + }, + { + "epoch": 0.94, + "grad_norm": 4.381206063135946, + "learning_rate": 1.1454873683397938e-05, + "loss": 1.0314, + "step": 78159 + }, + { + "epoch": 0.94, + "grad_norm": 14.348348632768468, + "learning_rate": 1.1454295718468172e-05, + "loss": 1.7644, + "step": 78162 + }, + { + "epoch": 0.94, + "grad_norm": 13.79369648428079, + "learning_rate": 1.145371774857542e-05, + "loss": 1.6134, + "step": 78165 + }, + { + "epoch": 0.94, + "grad_norm": 22.40004984540254, + "learning_rate": 1.1453139773721648e-05, + "loss": 1.4625, + "step": 78168 + }, + { + "epoch": 0.94, + "grad_norm": 6.174189629293825, + "learning_rate": 1.145256179390884e-05, + "loss": 1.4589, + "step": 78171 + }, + { + "epoch": 0.94, + "grad_norm": 9.355219835000804, + "learning_rate": 1.145198380913896e-05, + "loss": 1.333, + "step": 78174 + }, + { + "epoch": 0.94, + "grad_norm": 9.007237622689843, + "learning_rate": 1.1451405819413982e-05, + "loss": 1.6973, + "step": 78177 + }, + { + "epoch": 0.94, + "grad_norm": 9.211726770900096, + "learning_rate": 1.1450827824735875e-05, + "loss": 1.0643, + "step": 78180 + }, + { + "epoch": 0.94, + "grad_norm": 4.840280394131273, + "learning_rate": 1.1450249825106618e-05, + "loss": 1.3159, + "step": 78183 + }, + { + "epoch": 0.94, + "grad_norm": 35.1093939858535, + "learning_rate": 1.1449671820528182e-05, + "loss": 1.0744, + "step": 78186 + }, + { + "epoch": 0.94, + "grad_norm": 16.111113893687488, + "learning_rate": 1.1449093811002537e-05, + "loss": 1.3601, + "step": 78189 + }, + { + "epoch": 0.94, + "grad_norm": 4.640228304713573, + "learning_rate": 1.1448515796531657e-05, + "loss": 1.3266, + "step": 78192 + }, + { + "epoch": 0.94, + "grad_norm": 8.29024963395462, + "learning_rate": 1.1447937777117515e-05, + "loss": 1.251, + "step": 78195 + }, + { + "epoch": 0.94, + "grad_norm": 14.883810464292212, + "learning_rate": 1.1447359752762081e-05, + "loss": 1.2899, + "step": 78198 + }, + { + "epoch": 0.94, + "grad_norm": 9.248364710167426, + "learning_rate": 1.144678172346733e-05, + "loss": 1.4847, + "step": 78201 + }, + { + "epoch": 0.94, + "grad_norm": 12.139493139506795, + "learning_rate": 1.1446203689235236e-05, + "loss": 1.2764, + "step": 78204 + }, + { + "epoch": 0.94, + "grad_norm": 5.618541623441156, + "learning_rate": 1.144562565006777e-05, + "loss": 1.4603, + "step": 78207 + }, + { + "epoch": 0.94, + "grad_norm": 4.307300717090548, + "learning_rate": 1.14450476059669e-05, + "loss": 1.4022, + "step": 78210 + }, + { + "epoch": 0.94, + "grad_norm": 44.721261809598, + "learning_rate": 1.1444469556934607e-05, + "loss": 1.03, + "step": 78213 + }, + { + "epoch": 0.94, + "grad_norm": 2.1586967659573557, + "learning_rate": 1.144389150297286e-05, + "loss": 1.4009, + "step": 78216 + }, + { + "epoch": 0.94, + "grad_norm": 29.695792405928145, + "learning_rate": 1.144331344408363e-05, + "loss": 1.2003, + "step": 78219 + }, + { + "epoch": 0.94, + "grad_norm": 6.996617307759762, + "learning_rate": 1.1442735380268888e-05, + "loss": 1.1951, + "step": 78222 + }, + { + "epoch": 0.94, + "grad_norm": 8.323529653612647, + "learning_rate": 1.1442157311530615e-05, + "loss": 1.6432, + "step": 78225 + }, + { + "epoch": 0.94, + "grad_norm": 37.03024906942197, + "learning_rate": 1.1441579237870777e-05, + "loss": 1.3722, + "step": 78228 + }, + { + "epoch": 0.94, + "grad_norm": 16.250377010982362, + "learning_rate": 1.1441001159291348e-05, + "loss": 1.6035, + "step": 78231 + }, + { + "epoch": 0.94, + "grad_norm": 30.8307472830031, + "learning_rate": 1.1440423075794298e-05, + "loss": 1.4063, + "step": 78234 + }, + { + "epoch": 0.94, + "grad_norm": 6.210330794191861, + "learning_rate": 1.1439844987381608e-05, + "loss": 1.4129, + "step": 78237 + }, + { + "epoch": 0.94, + "grad_norm": 24.777377910162752, + "learning_rate": 1.1439266894055244e-05, + "loss": 1.4489, + "step": 78240 + }, + { + "epoch": 0.94, + "grad_norm": 14.615520242084289, + "learning_rate": 1.1438688795817176e-05, + "loss": 1.0764, + "step": 78243 + }, + { + "epoch": 0.94, + "grad_norm": 31.740979865613976, + "learning_rate": 1.1438110692669389e-05, + "loss": 1.5766, + "step": 78246 + }, + { + "epoch": 0.94, + "grad_norm": 19.811857837105535, + "learning_rate": 1.1437532584613844e-05, + "loss": 1.1028, + "step": 78249 + }, + { + "epoch": 0.94, + "grad_norm": 70.22712690749702, + "learning_rate": 1.1436954471652519e-05, + "loss": 1.2635, + "step": 78252 + }, + { + "epoch": 0.94, + "grad_norm": 13.285047241314647, + "learning_rate": 1.1436376353787384e-05, + "loss": 1.2865, + "step": 78255 + }, + { + "epoch": 0.94, + "grad_norm": 16.035824540937746, + "learning_rate": 1.1435798231020416e-05, + "loss": 1.2999, + "step": 78258 + }, + { + "epoch": 0.94, + "grad_norm": 9.318294551799353, + "learning_rate": 1.1435220103353586e-05, + "loss": 0.7632, + "step": 78261 + }, + { + "epoch": 0.94, + "grad_norm": 8.822542723202874, + "learning_rate": 1.1434641970788865e-05, + "loss": 0.9996, + "step": 78264 + }, + { + "epoch": 0.94, + "grad_norm": 31.616673680244702, + "learning_rate": 1.1434063833328229e-05, + "loss": 1.4715, + "step": 78267 + }, + { + "epoch": 0.94, + "grad_norm": 17.590997218241125, + "learning_rate": 1.143348569097365e-05, + "loss": 1.3146, + "step": 78270 + }, + { + "epoch": 0.94, + "grad_norm": 12.765057470089397, + "learning_rate": 1.1432907543727101e-05, + "loss": 1.2748, + "step": 78273 + }, + { + "epoch": 0.94, + "grad_norm": 35.57837330804405, + "learning_rate": 1.1432329391590552e-05, + "loss": 1.1996, + "step": 78276 + }, + { + "epoch": 0.94, + "grad_norm": 4.984630584642728, + "learning_rate": 1.143175123456598e-05, + "loss": 1.2658, + "step": 78279 + }, + { + "epoch": 0.94, + "grad_norm": 26.953082207524158, + "learning_rate": 1.1431173072655357e-05, + "loss": 1.2246, + "step": 78282 + }, + { + "epoch": 0.94, + "grad_norm": 20.00828697441896, + "learning_rate": 1.1430594905860657e-05, + "loss": 1.4515, + "step": 78285 + }, + { + "epoch": 0.94, + "grad_norm": 47.24648164625302, + "learning_rate": 1.1430016734183852e-05, + "loss": 1.4147, + "step": 78288 + }, + { + "epoch": 0.94, + "grad_norm": 15.485648686536859, + "learning_rate": 1.1429438557626912e-05, + "loss": 1.2451, + "step": 78291 + }, + { + "epoch": 0.94, + "grad_norm": 2.32591210703215, + "learning_rate": 1.1428860376191815e-05, + "loss": 1.6488, + "step": 78294 + }, + { + "epoch": 0.94, + "grad_norm": 19.277585683669717, + "learning_rate": 1.142828218988053e-05, + "loss": 1.5013, + "step": 78297 + }, + { + "epoch": 0.94, + "grad_norm": 6.873977662017458, + "learning_rate": 1.1427703998695037e-05, + "loss": 1.3385, + "step": 78300 + }, + { + "epoch": 0.94, + "grad_norm": 11.97589001122687, + "learning_rate": 1.1427125802637302e-05, + "loss": 1.1595, + "step": 78303 + }, + { + "epoch": 0.94, + "grad_norm": 30.180190155953486, + "learning_rate": 1.1426547601709302e-05, + "loss": 1.3681, + "step": 78306 + }, + { + "epoch": 0.94, + "grad_norm": 3.137717597479155, + "learning_rate": 1.1425969395913005e-05, + "loss": 1.2049, + "step": 78309 + }, + { + "epoch": 0.94, + "grad_norm": 6.732710345743392, + "learning_rate": 1.1425391185250391e-05, + "loss": 1.0707, + "step": 78312 + }, + { + "epoch": 0.94, + "grad_norm": 13.030854343435404, + "learning_rate": 1.142481296972343e-05, + "loss": 1.2393, + "step": 78315 + }, + { + "epoch": 0.94, + "grad_norm": 11.082717078910036, + "learning_rate": 1.1424234749334095e-05, + "loss": 1.2153, + "step": 78318 + }, + { + "epoch": 0.94, + "grad_norm": 7.827619523180857, + "learning_rate": 1.1423656524084362e-05, + "loss": 1.5122, + "step": 78321 + }, + { + "epoch": 0.94, + "grad_norm": 6.9694590391212605, + "learning_rate": 1.14230782939762e-05, + "loss": 1.0791, + "step": 78324 + }, + { + "epoch": 0.94, + "grad_norm": 10.76280474393756, + "learning_rate": 1.1422500059011586e-05, + "loss": 1.1843, + "step": 78327 + }, + { + "epoch": 0.94, + "grad_norm": 18.04589132261315, + "learning_rate": 1.142192181919249e-05, + "loss": 1.2953, + "step": 78330 + }, + { + "epoch": 0.94, + "grad_norm": 11.902318079063798, + "learning_rate": 1.1421343574520887e-05, + "loss": 1.4497, + "step": 78333 + }, + { + "epoch": 0.94, + "grad_norm": 33.1422710620591, + "learning_rate": 1.1420765324998751e-05, + "loss": 1.4747, + "step": 78336 + }, + { + "epoch": 0.94, + "grad_norm": 47.23022428203449, + "learning_rate": 1.1420187070628056e-05, + "loss": 1.4605, + "step": 78339 + }, + { + "epoch": 0.94, + "grad_norm": 14.905774462115483, + "learning_rate": 1.1419608811410772e-05, + "loss": 1.4807, + "step": 78342 + }, + { + "epoch": 0.94, + "grad_norm": 6.2160853144648796, + "learning_rate": 1.1419030547348874e-05, + "loss": 1.4177, + "step": 78345 + }, + { + "epoch": 0.94, + "grad_norm": 250.05047765862292, + "learning_rate": 1.1418452278444339e-05, + "loss": 1.3224, + "step": 78348 + }, + { + "epoch": 0.94, + "grad_norm": 17.854281611950007, + "learning_rate": 1.1417874004699132e-05, + "loss": 1.344, + "step": 78351 + }, + { + "epoch": 0.94, + "grad_norm": 21.47296312768936, + "learning_rate": 1.1417295726115236e-05, + "loss": 1.4366, + "step": 78354 + }, + { + "epoch": 0.94, + "grad_norm": 5.535284683748018, + "learning_rate": 1.1416717442694621e-05, + "loss": 1.0636, + "step": 78357 + }, + { + "epoch": 0.94, + "grad_norm": 12.799725581492591, + "learning_rate": 1.1416139154439257e-05, + "loss": 1.1526, + "step": 78360 + }, + { + "epoch": 0.94, + "grad_norm": 10.824481850277495, + "learning_rate": 1.1415560861351121e-05, + "loss": 0.9443, + "step": 78363 + }, + { + "epoch": 0.94, + "grad_norm": 24.203324811707635, + "learning_rate": 1.1414982563432184e-05, + "loss": 1.0002, + "step": 78366 + }, + { + "epoch": 0.94, + "grad_norm": 11.355398491242157, + "learning_rate": 1.1414404260684425e-05, + "loss": 1.183, + "step": 78369 + }, + { + "epoch": 0.94, + "grad_norm": 7.366344792347657, + "learning_rate": 1.141382595310981e-05, + "loss": 1.2245, + "step": 78372 + }, + { + "epoch": 0.94, + "grad_norm": 19.60096309804077, + "learning_rate": 1.1413247640710319e-05, + "loss": 1.419, + "step": 78375 + }, + { + "epoch": 0.94, + "grad_norm": 23.61266163760101, + "learning_rate": 1.1412669323487922e-05, + "loss": 1.291, + "step": 78378 + }, + { + "epoch": 0.94, + "grad_norm": 8.775032442912185, + "learning_rate": 1.1412091001444593e-05, + "loss": 1.3719, + "step": 78381 + }, + { + "epoch": 0.94, + "grad_norm": 9.956057580906954, + "learning_rate": 1.1411512674582306e-05, + "loss": 1.4398, + "step": 78384 + }, + { + "epoch": 0.94, + "grad_norm": 23.96795597173401, + "learning_rate": 1.1410934342903034e-05, + "loss": 1.2648, + "step": 78387 + }, + { + "epoch": 0.94, + "grad_norm": 21.69369982124897, + "learning_rate": 1.1410356006408754e-05, + "loss": 1.6011, + "step": 78390 + }, + { + "epoch": 0.94, + "grad_norm": 34.4289791726859, + "learning_rate": 1.1409777665101433e-05, + "loss": 1.4263, + "step": 78393 + }, + { + "epoch": 0.94, + "grad_norm": 31.504211293515915, + "learning_rate": 1.1409199318983051e-05, + "loss": 1.1457, + "step": 78396 + }, + { + "epoch": 0.94, + "grad_norm": 9.307249521442758, + "learning_rate": 1.1408620968055579e-05, + "loss": 1.1697, + "step": 78399 + }, + { + "epoch": 0.94, + "grad_norm": 15.724784863066558, + "learning_rate": 1.1408042612320993e-05, + "loss": 1.3353, + "step": 78402 + }, + { + "epoch": 0.94, + "grad_norm": 88.2073504483865, + "learning_rate": 1.1407464251781259e-05, + "loss": 1.2058, + "step": 78405 + }, + { + "epoch": 0.94, + "grad_norm": 7.976275271003148, + "learning_rate": 1.1406885886438361e-05, + "loss": 1.2572, + "step": 78408 + }, + { + "epoch": 0.94, + "grad_norm": 10.472302161980863, + "learning_rate": 1.1406307516294271e-05, + "loss": 1.2121, + "step": 78411 + }, + { + "epoch": 0.94, + "grad_norm": 8.53580762657213, + "learning_rate": 1.1405729141350957e-05, + "loss": 1.302, + "step": 78414 + }, + { + "epoch": 0.94, + "grad_norm": 14.318024387848832, + "learning_rate": 1.1405150761610393e-05, + "loss": 1.7136, + "step": 78417 + }, + { + "epoch": 0.94, + "grad_norm": 21.533168031555675, + "learning_rate": 1.1404572377074561e-05, + "loss": 1.2714, + "step": 78420 + }, + { + "epoch": 0.94, + "grad_norm": 5.113401398163937, + "learning_rate": 1.1403993987745424e-05, + "loss": 1.3704, + "step": 78423 + }, + { + "epoch": 0.94, + "grad_norm": 22.52096523099942, + "learning_rate": 1.1403415593624963e-05, + "loss": 1.5133, + "step": 78426 + }, + { + "epoch": 0.94, + "grad_norm": 25.421558436360453, + "learning_rate": 1.1402837194715151e-05, + "loss": 0.938, + "step": 78429 + }, + { + "epoch": 0.94, + "grad_norm": 30.661352662524134, + "learning_rate": 1.140225879101796e-05, + "loss": 0.9894, + "step": 78432 + }, + { + "epoch": 0.94, + "grad_norm": 7.385814566060641, + "learning_rate": 1.1401680382535364e-05, + "loss": 1.356, + "step": 78435 + }, + { + "epoch": 0.94, + "grad_norm": 8.865202767395777, + "learning_rate": 1.1401101969269339e-05, + "loss": 1.0276, + "step": 78438 + }, + { + "epoch": 0.94, + "grad_norm": 10.104851273473546, + "learning_rate": 1.140052355122186e-05, + "loss": 1.3396, + "step": 78441 + }, + { + "epoch": 0.94, + "grad_norm": 9.170233103708593, + "learning_rate": 1.1399945128394896e-05, + "loss": 1.1715, + "step": 78444 + }, + { + "epoch": 0.94, + "grad_norm": 18.348229222856183, + "learning_rate": 1.1399366700790424e-05, + "loss": 1.0522, + "step": 78447 + }, + { + "epoch": 0.94, + "grad_norm": 4.977327450650522, + "learning_rate": 1.1398788268410417e-05, + "loss": 1.9035, + "step": 78450 + }, + { + "epoch": 0.94, + "grad_norm": 7.846632582112846, + "learning_rate": 1.139820983125685e-05, + "loss": 1.1578, + "step": 78453 + }, + { + "epoch": 0.94, + "grad_norm": 7.936686932547267, + "learning_rate": 1.1397631389331697e-05, + "loss": 1.3175, + "step": 78456 + }, + { + "epoch": 0.94, + "grad_norm": 14.227080258538978, + "learning_rate": 1.1397052942636928e-05, + "loss": 1.4657, + "step": 78459 + }, + { + "epoch": 0.94, + "grad_norm": 24.505208136558, + "learning_rate": 1.1396474491174527e-05, + "loss": 1.452, + "step": 78462 + }, + { + "epoch": 0.94, + "grad_norm": 15.290621786104744, + "learning_rate": 1.1395896034946456e-05, + "loss": 1.0864, + "step": 78465 + }, + { + "epoch": 0.94, + "grad_norm": 8.249018315323376, + "learning_rate": 1.1395317573954698e-05, + "loss": 1.3422, + "step": 78468 + }, + { + "epoch": 0.94, + "grad_norm": 13.51436074644936, + "learning_rate": 1.139473910820122e-05, + "loss": 1.5099, + "step": 78471 + }, + { + "epoch": 0.94, + "grad_norm": 93.8946935526526, + "learning_rate": 1.1394160637688005e-05, + "loss": 1.16, + "step": 78474 + }, + { + "epoch": 0.94, + "grad_norm": 23.8298531541571, + "learning_rate": 1.1393582162417019e-05, + "loss": 1.2973, + "step": 78477 + }, + { + "epoch": 0.94, + "grad_norm": 23.277645290405296, + "learning_rate": 1.1393003682390236e-05, + "loss": 1.4422, + "step": 78480 + }, + { + "epoch": 0.94, + "grad_norm": 10.98309140923778, + "learning_rate": 1.139242519760964e-05, + "loss": 1.2464, + "step": 78483 + }, + { + "epoch": 0.94, + "grad_norm": 26.651945136427546, + "learning_rate": 1.1391846708077191e-05, + "loss": 1.1568, + "step": 78486 + }, + { + "epoch": 0.94, + "grad_norm": 16.220027627675027, + "learning_rate": 1.1391268213794876e-05, + "loss": 1.4053, + "step": 78489 + }, + { + "epoch": 0.94, + "grad_norm": 9.643769196459761, + "learning_rate": 1.139068971476466e-05, + "loss": 1.3472, + "step": 78492 + }, + { + "epoch": 0.94, + "grad_norm": 2.115674597235875, + "learning_rate": 1.1390111210988526e-05, + "loss": 1.4189, + "step": 78495 + }, + { + "epoch": 0.94, + "grad_norm": 15.674855707325516, + "learning_rate": 1.1389532702468437e-05, + "loss": 1.4092, + "step": 78498 + }, + { + "epoch": 0.94, + "grad_norm": 22.178741030030583, + "learning_rate": 1.1388954189206379e-05, + "loss": 1.2923, + "step": 78501 + }, + { + "epoch": 0.94, + "grad_norm": 18.108506924503896, + "learning_rate": 1.1388375671204318e-05, + "loss": 1.2207, + "step": 78504 + }, + { + "epoch": 0.94, + "grad_norm": 16.700882595372057, + "learning_rate": 1.138779714846423e-05, + "loss": 1.4658, + "step": 78507 + }, + { + "epoch": 0.94, + "grad_norm": 78.8202371063095, + "learning_rate": 1.1387218620988091e-05, + "loss": 1.3069, + "step": 78510 + }, + { + "epoch": 0.94, + "grad_norm": 12.489469874673757, + "learning_rate": 1.1386640088777874e-05, + "loss": 1.5921, + "step": 78513 + }, + { + "epoch": 0.94, + "grad_norm": 8.65983822839281, + "learning_rate": 1.1386061551835557e-05, + "loss": 1.2211, + "step": 78516 + }, + { + "epoch": 0.94, + "grad_norm": 16.869079266662652, + "learning_rate": 1.1385483010163108e-05, + "loss": 1.4118, + "step": 78519 + }, + { + "epoch": 0.94, + "grad_norm": 16.74327749948381, + "learning_rate": 1.1384904463762505e-05, + "loss": 1.405, + "step": 78522 + }, + { + "epoch": 0.94, + "grad_norm": 34.57145540828336, + "learning_rate": 1.1384325912635719e-05, + "loss": 1.4462, + "step": 78525 + }, + { + "epoch": 0.94, + "grad_norm": 26.198849132270006, + "learning_rate": 1.1383747356784734e-05, + "loss": 1.5975, + "step": 78528 + }, + { + "epoch": 0.94, + "grad_norm": 8.808897395462226, + "learning_rate": 1.1383168796211513e-05, + "loss": 1.4594, + "step": 78531 + }, + { + "epoch": 0.94, + "grad_norm": 10.374254285394382, + "learning_rate": 1.1382590230918033e-05, + "loss": 1.3516, + "step": 78534 + }, + { + "epoch": 0.94, + "grad_norm": 8.481186352488349, + "learning_rate": 1.1382011660906279e-05, + "loss": 1.2219, + "step": 78537 + }, + { + "epoch": 0.94, + "grad_norm": 8.808568151634141, + "learning_rate": 1.1381433086178209e-05, + "loss": 1.0793, + "step": 78540 + }, + { + "epoch": 0.94, + "grad_norm": 3.827076577140839, + "learning_rate": 1.1380854506735808e-05, + "loss": 1.497, + "step": 78543 + }, + { + "epoch": 0.94, + "grad_norm": 9.713494012205905, + "learning_rate": 1.1380275922581047e-05, + "loss": 1.3078, + "step": 78546 + }, + { + "epoch": 0.94, + "grad_norm": 7.499020967955586, + "learning_rate": 1.1379697333715906e-05, + "loss": 1.3372, + "step": 78549 + }, + { + "epoch": 0.94, + "grad_norm": 69.17708247703493, + "learning_rate": 1.137911874014235e-05, + "loss": 1.1215, + "step": 78552 + }, + { + "epoch": 0.94, + "grad_norm": 11.950259938545498, + "learning_rate": 1.137854014186236e-05, + "loss": 1.1341, + "step": 78555 + }, + { + "epoch": 0.94, + "grad_norm": 31.40960077847979, + "learning_rate": 1.1377961538877911e-05, + "loss": 1.3965, + "step": 78558 + }, + { + "epoch": 0.94, + "grad_norm": 12.601584251148301, + "learning_rate": 1.1377382931190973e-05, + "loss": 1.3126, + "step": 78561 + }, + { + "epoch": 0.94, + "grad_norm": 12.37904436585688, + "learning_rate": 1.1376804318803523e-05, + "loss": 1.0805, + "step": 78564 + }, + { + "epoch": 0.94, + "grad_norm": 38.541757703177126, + "learning_rate": 1.1376225701717533e-05, + "loss": 1.3027, + "step": 78567 + }, + { + "epoch": 0.94, + "grad_norm": 2.6341096346324653, + "learning_rate": 1.1375647079934989e-05, + "loss": 1.4061, + "step": 78570 + }, + { + "epoch": 0.94, + "grad_norm": 24.815589087196514, + "learning_rate": 1.137506845345785e-05, + "loss": 1.1641, + "step": 78573 + }, + { + "epoch": 0.94, + "grad_norm": 22.019722683272928, + "learning_rate": 1.13744898222881e-05, + "loss": 1.5014, + "step": 78576 + }, + { + "epoch": 0.94, + "grad_norm": 21.229957063773515, + "learning_rate": 1.1373911186427713e-05, + "loss": 1.1276, + "step": 78579 + }, + { + "epoch": 0.94, + "grad_norm": 7.44384945413091, + "learning_rate": 1.137333254587866e-05, + "loss": 1.4825, + "step": 78582 + }, + { + "epoch": 0.94, + "grad_norm": 12.421916729938207, + "learning_rate": 1.1372753900642916e-05, + "loss": 1.4646, + "step": 78585 + }, + { + "epoch": 0.94, + "grad_norm": 5.516906682319132, + "learning_rate": 1.1372175250722456e-05, + "loss": 1.1782, + "step": 78588 + }, + { + "epoch": 0.95, + "grad_norm": 11.455471834190641, + "learning_rate": 1.1371596596119263e-05, + "loss": 1.2641, + "step": 78591 + }, + { + "epoch": 0.95, + "grad_norm": 22.736728561239612, + "learning_rate": 1.13710179368353e-05, + "loss": 1.454, + "step": 78594 + }, + { + "epoch": 0.95, + "grad_norm": 3.440094951263277, + "learning_rate": 1.1370439272872549e-05, + "loss": 1.3845, + "step": 78597 + }, + { + "epoch": 0.95, + "grad_norm": 6.6092690026706675, + "learning_rate": 1.1369860604232982e-05, + "loss": 1.509, + "step": 78600 + }, + { + "epoch": 0.95, + "grad_norm": 14.964983883270396, + "learning_rate": 1.1369281930918575e-05, + "loss": 1.3058, + "step": 78603 + }, + { + "epoch": 0.95, + "grad_norm": 5.555700904074656, + "learning_rate": 1.1368703252931297e-05, + "loss": 1.2927, + "step": 78606 + }, + { + "epoch": 0.95, + "grad_norm": 7.329705893852648, + "learning_rate": 1.1368124570273134e-05, + "loss": 1.3077, + "step": 78609 + }, + { + "epoch": 0.95, + "grad_norm": 6.463405461658767, + "learning_rate": 1.1367545882946052e-05, + "loss": 1.1349, + "step": 78612 + }, + { + "epoch": 0.95, + "grad_norm": 14.917938256255338, + "learning_rate": 1.1366967190952026e-05, + "loss": 1.1686, + "step": 78615 + }, + { + "epoch": 0.95, + "grad_norm": 4.267499769683272, + "learning_rate": 1.1366388494293035e-05, + "loss": 1.0387, + "step": 78618 + }, + { + "epoch": 0.95, + "grad_norm": 6.881574327201719, + "learning_rate": 1.1365809792971054e-05, + "loss": 1.2416, + "step": 78621 + }, + { + "epoch": 0.95, + "grad_norm": 8.470635144993938, + "learning_rate": 1.1365231086988055e-05, + "loss": 1.3341, + "step": 78624 + }, + { + "epoch": 0.95, + "grad_norm": 26.91167183029843, + "learning_rate": 1.1364652376346012e-05, + "loss": 1.6399, + "step": 78627 + }, + { + "epoch": 0.95, + "grad_norm": 13.910177445648017, + "learning_rate": 1.1364073661046906e-05, + "loss": 1.4561, + "step": 78630 + }, + { + "epoch": 0.95, + "grad_norm": 5.779797762353487, + "learning_rate": 1.1363494941092708e-05, + "loss": 1.5577, + "step": 78633 + }, + { + "epoch": 0.95, + "grad_norm": 12.160134502506557, + "learning_rate": 1.1362916216485392e-05, + "loss": 1.4504, + "step": 78636 + }, + { + "epoch": 0.95, + "grad_norm": 7.991878507623753, + "learning_rate": 1.1362337487226928e-05, + "loss": 1.6155, + "step": 78639 + }, + { + "epoch": 0.95, + "grad_norm": 20.179944565651052, + "learning_rate": 1.1361758753319303e-05, + "loss": 1.3489, + "step": 78642 + }, + { + "epoch": 0.95, + "grad_norm": 13.425655496396292, + "learning_rate": 1.1361180014764487e-05, + "loss": 1.3095, + "step": 78645 + }, + { + "epoch": 0.95, + "grad_norm": 6.581003284353473, + "learning_rate": 1.1360601271564449e-05, + "loss": 1.3951, + "step": 78648 + }, + { + "epoch": 0.95, + "grad_norm": 45.67643282123648, + "learning_rate": 1.1360022523721172e-05, + "loss": 1.2746, + "step": 78651 + }, + { + "epoch": 0.95, + "grad_norm": 11.1711679741333, + "learning_rate": 1.1359443771236627e-05, + "loss": 1.4249, + "step": 78654 + }, + { + "epoch": 0.95, + "grad_norm": 5.119696857664689, + "learning_rate": 1.1358865014112792e-05, + "loss": 1.1161, + "step": 78657 + }, + { + "epoch": 0.95, + "grad_norm": 27.57128313227083, + "learning_rate": 1.1358286252351638e-05, + "loss": 1.0746, + "step": 78660 + }, + { + "epoch": 0.95, + "grad_norm": 20.943503051113513, + "learning_rate": 1.1357707485955144e-05, + "loss": 1.3426, + "step": 78663 + }, + { + "epoch": 0.95, + "grad_norm": 19.22268141938474, + "learning_rate": 1.135712871492528e-05, + "loss": 1.2645, + "step": 78666 + }, + { + "epoch": 0.95, + "grad_norm": 4.373614933007041, + "learning_rate": 1.1356549939264025e-05, + "loss": 1.5531, + "step": 78669 + }, + { + "epoch": 0.95, + "grad_norm": 3.2218650788598064, + "learning_rate": 1.1355971158973358e-05, + "loss": 1.314, + "step": 78672 + }, + { + "epoch": 0.95, + "grad_norm": 6.906298147716678, + "learning_rate": 1.1355392374055248e-05, + "loss": 1.2626, + "step": 78675 + }, + { + "epoch": 0.95, + "grad_norm": 6.1299076589448465, + "learning_rate": 1.135481358451167e-05, + "loss": 1.3365, + "step": 78678 + }, + { + "epoch": 0.95, + "grad_norm": 10.576453423455492, + "learning_rate": 1.1354234790344603e-05, + "loss": 1.7043, + "step": 78681 + }, + { + "epoch": 0.95, + "grad_norm": 6.97361390544268, + "learning_rate": 1.135365599155602e-05, + "loss": 1.7648, + "step": 78684 + }, + { + "epoch": 0.95, + "grad_norm": 10.059806444064796, + "learning_rate": 1.1353077188147897e-05, + "loss": 0.9749, + "step": 78687 + }, + { + "epoch": 0.95, + "grad_norm": 13.036830548865238, + "learning_rate": 1.135249838012221e-05, + "loss": 1.3449, + "step": 78690 + }, + { + "epoch": 0.95, + "grad_norm": 12.390590090262524, + "learning_rate": 1.1351919567480932e-05, + "loss": 1.1937, + "step": 78693 + }, + { + "epoch": 0.95, + "grad_norm": 13.978186197580536, + "learning_rate": 1.1351340750226038e-05, + "loss": 1.4121, + "step": 78696 + }, + { + "epoch": 0.95, + "grad_norm": 9.876510296961287, + "learning_rate": 1.1350761928359506e-05, + "loss": 1.6027, + "step": 78699 + }, + { + "epoch": 0.95, + "grad_norm": 13.396636319929785, + "learning_rate": 1.135018310188331e-05, + "loss": 1.347, + "step": 78702 + }, + { + "epoch": 0.95, + "grad_norm": 33.544044288444795, + "learning_rate": 1.1349604270799426e-05, + "loss": 1.1841, + "step": 78705 + }, + { + "epoch": 0.95, + "grad_norm": 11.571857477096415, + "learning_rate": 1.1349025435109828e-05, + "loss": 1.3164, + "step": 78708 + }, + { + "epoch": 0.95, + "grad_norm": 20.859285119746175, + "learning_rate": 1.1348446594816494e-05, + "loss": 1.2295, + "step": 78711 + }, + { + "epoch": 0.95, + "grad_norm": 12.151936288991923, + "learning_rate": 1.1347867749921396e-05, + "loss": 1.1522, + "step": 78714 + }, + { + "epoch": 0.95, + "grad_norm": 7.558235228927271, + "learning_rate": 1.134728890042651e-05, + "loss": 1.3856, + "step": 78717 + }, + { + "epoch": 0.95, + "grad_norm": 19.19099157454658, + "learning_rate": 1.1346710046333814e-05, + "loss": 1.4246, + "step": 78720 + }, + { + "epoch": 0.95, + "grad_norm": 11.207607735079515, + "learning_rate": 1.134613118764528e-05, + "loss": 1.4922, + "step": 78723 + }, + { + "epoch": 0.95, + "grad_norm": 16.096383020048613, + "learning_rate": 1.1345552324362886e-05, + "loss": 1.2403, + "step": 78726 + }, + { + "epoch": 0.95, + "grad_norm": 8.237706052770909, + "learning_rate": 1.1344973456488607e-05, + "loss": 1.0601, + "step": 78729 + }, + { + "epoch": 0.95, + "grad_norm": 19.622878625813758, + "learning_rate": 1.1344394584024419e-05, + "loss": 1.8167, + "step": 78732 + }, + { + "epoch": 0.95, + "grad_norm": 5.424523793161692, + "learning_rate": 1.1343815706972293e-05, + "loss": 1.5112, + "step": 78735 + }, + { + "epoch": 0.95, + "grad_norm": 17.114089854761552, + "learning_rate": 1.134323682533421e-05, + "loss": 1.4473, + "step": 78738 + }, + { + "epoch": 0.95, + "grad_norm": 12.09882514651031, + "learning_rate": 1.1342657939112145e-05, + "loss": 1.2461, + "step": 78741 + }, + { + "epoch": 0.95, + "grad_norm": 32.756034722982015, + "learning_rate": 1.134207904830807e-05, + "loss": 2.0059, + "step": 78744 + }, + { + "epoch": 0.95, + "grad_norm": 4.873132657025323, + "learning_rate": 1.1341500152923963e-05, + "loss": 1.5919, + "step": 78747 + }, + { + "epoch": 0.95, + "grad_norm": 14.653216716139971, + "learning_rate": 1.1340921252961798e-05, + "loss": 1.331, + "step": 78750 + }, + { + "epoch": 0.95, + "grad_norm": 14.094438370638686, + "learning_rate": 1.1340342348423556e-05, + "loss": 1.4044, + "step": 78753 + }, + { + "epoch": 0.95, + "grad_norm": 2.934593975889254, + "learning_rate": 1.1339763439311202e-05, + "loss": 1.176, + "step": 78756 + }, + { + "epoch": 0.95, + "grad_norm": 15.62811741919354, + "learning_rate": 1.1339184525626723e-05, + "loss": 1.057, + "step": 78759 + }, + { + "epoch": 0.95, + "grad_norm": 13.660948572437208, + "learning_rate": 1.133860560737209e-05, + "loss": 1.2712, + "step": 78762 + }, + { + "epoch": 0.95, + "grad_norm": 20.787760977694237, + "learning_rate": 1.1338026684549277e-05, + "loss": 1.2471, + "step": 78765 + }, + { + "epoch": 0.95, + "grad_norm": 10.809472930282189, + "learning_rate": 1.1337447757160257e-05, + "loss": 1.3453, + "step": 78768 + }, + { + "epoch": 0.95, + "grad_norm": 39.55635113701145, + "learning_rate": 1.1336868825207015e-05, + "loss": 1.205, + "step": 78771 + }, + { + "epoch": 0.95, + "grad_norm": 11.899510430238623, + "learning_rate": 1.1336289888691518e-05, + "loss": 1.5656, + "step": 78774 + }, + { + "epoch": 0.95, + "grad_norm": 9.706049925451302, + "learning_rate": 1.1335710947615746e-05, + "loss": 1.4413, + "step": 78777 + }, + { + "epoch": 0.95, + "grad_norm": 7.069208968602721, + "learning_rate": 1.1335132001981674e-05, + "loss": 1.4375, + "step": 78780 + }, + { + "epoch": 0.95, + "grad_norm": 16.595902194766236, + "learning_rate": 1.1334553051791277e-05, + "loss": 1.4103, + "step": 78783 + }, + { + "epoch": 0.95, + "grad_norm": 9.458102576665265, + "learning_rate": 1.1333974097046534e-05, + "loss": 1.2601, + "step": 78786 + }, + { + "epoch": 0.95, + "grad_norm": 19.243348291682405, + "learning_rate": 1.1333395137749413e-05, + "loss": 1.8276, + "step": 78789 + }, + { + "epoch": 0.95, + "grad_norm": 14.98783815531291, + "learning_rate": 1.1332816173901896e-05, + "loss": 1.6283, + "step": 78792 + }, + { + "epoch": 0.95, + "grad_norm": 4.323004775912735, + "learning_rate": 1.133223720550596e-05, + "loss": 1.2941, + "step": 78795 + }, + { + "epoch": 0.95, + "grad_norm": 10.709049210481053, + "learning_rate": 1.1331658232563579e-05, + "loss": 1.3513, + "step": 78798 + }, + { + "epoch": 0.95, + "grad_norm": 19.001536490753363, + "learning_rate": 1.1331079255076723e-05, + "loss": 1.6023, + "step": 78801 + }, + { + "epoch": 0.95, + "grad_norm": 5.3117142456052395, + "learning_rate": 1.1330500273047375e-05, + "loss": 1.2888, + "step": 78804 + }, + { + "epoch": 0.95, + "grad_norm": 17.70933300188624, + "learning_rate": 1.132992128647751e-05, + "loss": 1.0732, + "step": 78807 + }, + { + "epoch": 0.95, + "grad_norm": 24.104116809696464, + "learning_rate": 1.1329342295369101e-05, + "loss": 1.2718, + "step": 78810 + }, + { + "epoch": 0.95, + "grad_norm": 7.269580481454983, + "learning_rate": 1.1328763299724127e-05, + "loss": 1.3995, + "step": 78813 + }, + { + "epoch": 0.95, + "grad_norm": 27.13374432368122, + "learning_rate": 1.1328184299544563e-05, + "loss": 1.9008, + "step": 78816 + }, + { + "epoch": 0.95, + "grad_norm": 9.06736336010811, + "learning_rate": 1.1327605294832384e-05, + "loss": 1.0837, + "step": 78819 + }, + { + "epoch": 0.95, + "grad_norm": 18.005686117849866, + "learning_rate": 1.1327026285589563e-05, + "loss": 1.1805, + "step": 78822 + }, + { + "epoch": 0.95, + "grad_norm": 5.510152786532311, + "learning_rate": 1.1326447271818084e-05, + "loss": 1.0749, + "step": 78825 + }, + { + "epoch": 0.95, + "grad_norm": 20.110551061731183, + "learning_rate": 1.1325868253519917e-05, + "loss": 1.0902, + "step": 78828 + }, + { + "epoch": 0.95, + "grad_norm": 76.03942551026924, + "learning_rate": 1.1325289230697038e-05, + "loss": 0.8789, + "step": 78831 + }, + { + "epoch": 0.95, + "grad_norm": 6.052048479409023, + "learning_rate": 1.1324710203351426e-05, + "loss": 1.2058, + "step": 78834 + }, + { + "epoch": 0.95, + "grad_norm": 19.16682024357207, + "learning_rate": 1.1324131171485053e-05, + "loss": 1.2721, + "step": 78837 + }, + { + "epoch": 0.95, + "grad_norm": 5.268827072539059, + "learning_rate": 1.13235521350999e-05, + "loss": 1.6783, + "step": 78840 + }, + { + "epoch": 0.95, + "grad_norm": 46.15124421227939, + "learning_rate": 1.1322973094197937e-05, + "loss": 1.2543, + "step": 78843 + }, + { + "epoch": 0.95, + "grad_norm": 18.016566031740922, + "learning_rate": 1.1322394048781146e-05, + "loss": 1.8244, + "step": 78846 + }, + { + "epoch": 0.95, + "grad_norm": 4.6225643874448, + "learning_rate": 1.13218149988515e-05, + "loss": 1.1067, + "step": 78849 + }, + { + "epoch": 0.95, + "grad_norm": 4.891734136864974, + "learning_rate": 1.1321235944410975e-05, + "loss": 1.5921, + "step": 78852 + }, + { + "epoch": 0.95, + "grad_norm": 21.868415412320317, + "learning_rate": 1.1320656885461547e-05, + "loss": 1.3719, + "step": 78855 + }, + { + "epoch": 0.95, + "grad_norm": 59.916346888619934, + "learning_rate": 1.1320077822005193e-05, + "loss": 1.0669, + "step": 78858 + }, + { + "epoch": 0.95, + "grad_norm": 18.891638146237337, + "learning_rate": 1.1319498754043892e-05, + "loss": 1.472, + "step": 78861 + }, + { + "epoch": 0.95, + "grad_norm": 6.234508549777917, + "learning_rate": 1.1318919681579613e-05, + "loss": 1.2869, + "step": 78864 + }, + { + "epoch": 0.95, + "grad_norm": 12.0490566967561, + "learning_rate": 1.1318340604614338e-05, + "loss": 1.5825, + "step": 78867 + }, + { + "epoch": 0.95, + "grad_norm": 5.4040228751379376, + "learning_rate": 1.1317761523150039e-05, + "loss": 1.3822, + "step": 78870 + }, + { + "epoch": 0.95, + "grad_norm": 36.906599882572635, + "learning_rate": 1.1317182437188698e-05, + "loss": 1.8309, + "step": 78873 + }, + { + "epoch": 0.95, + "grad_norm": 14.18268348980541, + "learning_rate": 1.1316603346732284e-05, + "loss": 1.2535, + "step": 78876 + }, + { + "epoch": 0.95, + "grad_norm": 16.855329175844233, + "learning_rate": 1.1316024251782782e-05, + "loss": 0.9881, + "step": 78879 + }, + { + "epoch": 0.95, + "grad_norm": 12.354727179814535, + "learning_rate": 1.1315445152342159e-05, + "loss": 1.1408, + "step": 78882 + }, + { + "epoch": 0.95, + "grad_norm": 3.030978730518429, + "learning_rate": 1.1314866048412396e-05, + "loss": 1.2307, + "step": 78885 + }, + { + "epoch": 0.95, + "grad_norm": 26.496944758492912, + "learning_rate": 1.1314286939995472e-05, + "loss": 1.3317, + "step": 78888 + }, + { + "epoch": 0.95, + "grad_norm": 7.651508246138696, + "learning_rate": 1.1313707827093357e-05, + "loss": 1.196, + "step": 78891 + }, + { + "epoch": 0.95, + "grad_norm": 5.6333839133555825, + "learning_rate": 1.1313128709708032e-05, + "loss": 1.3907, + "step": 78894 + }, + { + "epoch": 0.95, + "grad_norm": 12.425444384661589, + "learning_rate": 1.131254958784147e-05, + "loss": 1.4184, + "step": 78897 + }, + { + "epoch": 0.95, + "grad_norm": 8.324088887678638, + "learning_rate": 1.1311970461495651e-05, + "loss": 1.2154, + "step": 78900 + }, + { + "epoch": 0.95, + "grad_norm": 11.571711759705112, + "learning_rate": 1.1311391330672547e-05, + "loss": 1.4045, + "step": 78903 + }, + { + "epoch": 0.95, + "grad_norm": 15.682523208145879, + "learning_rate": 1.1310812195374141e-05, + "loss": 1.3283, + "step": 78906 + }, + { + "epoch": 0.95, + "grad_norm": 12.238917403433932, + "learning_rate": 1.13102330556024e-05, + "loss": 1.502, + "step": 78909 + }, + { + "epoch": 0.95, + "grad_norm": 4.076048901172263, + "learning_rate": 1.1309653911359309e-05, + "loss": 1.0517, + "step": 78912 + }, + { + "epoch": 0.95, + "grad_norm": 4.968214487232372, + "learning_rate": 1.130907476264684e-05, + "loss": 1.2755, + "step": 78915 + }, + { + "epoch": 0.95, + "grad_norm": 3.9115731958208286, + "learning_rate": 1.1308495609466967e-05, + "loss": 0.9065, + "step": 78918 + }, + { + "epoch": 0.95, + "grad_norm": 5.110720492957733, + "learning_rate": 1.1307916451821676e-05, + "loss": 1.5001, + "step": 78921 + }, + { + "epoch": 0.95, + "grad_norm": 4.232618509908726, + "learning_rate": 1.1307337289712934e-05, + "loss": 1.3055, + "step": 78924 + }, + { + "epoch": 0.95, + "grad_norm": 49.8564933769613, + "learning_rate": 1.130675812314272e-05, + "loss": 1.2474, + "step": 78927 + }, + { + "epoch": 0.95, + "grad_norm": 19.27984205721076, + "learning_rate": 1.1306178952113012e-05, + "loss": 1.1494, + "step": 78930 + }, + { + "epoch": 0.95, + "grad_norm": 2.4924127904846274, + "learning_rate": 1.1305599776625788e-05, + "loss": 1.2485, + "step": 78933 + }, + { + "epoch": 0.95, + "grad_norm": 3.3147623573600167, + "learning_rate": 1.1305020596683018e-05, + "loss": 1.3878, + "step": 78936 + }, + { + "epoch": 0.95, + "grad_norm": 15.454534613091264, + "learning_rate": 1.1304441412286684e-05, + "loss": 1.4698, + "step": 78939 + }, + { + "epoch": 0.95, + "grad_norm": 15.813690244457625, + "learning_rate": 1.1303862223438763e-05, + "loss": 1.131, + "step": 78942 + }, + { + "epoch": 0.95, + "grad_norm": 2.114247746969587, + "learning_rate": 1.130328303014123e-05, + "loss": 1.2512, + "step": 78945 + }, + { + "epoch": 0.95, + "grad_norm": 14.355360682995581, + "learning_rate": 1.130270383239606e-05, + "loss": 1.112, + "step": 78948 + }, + { + "epoch": 0.95, + "grad_norm": 14.465034127076871, + "learning_rate": 1.130212463020523e-05, + "loss": 1.3131, + "step": 78951 + }, + { + "epoch": 0.95, + "grad_norm": 7.097779868361138, + "learning_rate": 1.1301545423570723e-05, + "loss": 1.2607, + "step": 78954 + }, + { + "epoch": 0.95, + "grad_norm": 5.218437721694946, + "learning_rate": 1.1300966212494504e-05, + "loss": 1.1377, + "step": 78957 + }, + { + "epoch": 0.95, + "grad_norm": 24.85682760524128, + "learning_rate": 1.1300386996978561e-05, + "loss": 1.4085, + "step": 78960 + }, + { + "epoch": 0.95, + "grad_norm": 9.495768274438124, + "learning_rate": 1.129980777702486e-05, + "loss": 1.2599, + "step": 78963 + }, + { + "epoch": 0.95, + "grad_norm": 17.9567702561739, + "learning_rate": 1.1299228552635386e-05, + "loss": 1.4191, + "step": 78966 + }, + { + "epoch": 0.95, + "grad_norm": 19.56350157668452, + "learning_rate": 1.1298649323812115e-05, + "loss": 1.0992, + "step": 78969 + }, + { + "epoch": 0.95, + "grad_norm": 4.242801561460808, + "learning_rate": 1.1298070090557018e-05, + "loss": 1.1793, + "step": 78972 + }, + { + "epoch": 0.95, + "grad_norm": 4.458259009177544, + "learning_rate": 1.129749085287208e-05, + "loss": 1.2565, + "step": 78975 + }, + { + "epoch": 0.95, + "grad_norm": 8.837764525842527, + "learning_rate": 1.129691161075927e-05, + "loss": 1.3355, + "step": 78978 + }, + { + "epoch": 0.95, + "grad_norm": 11.456375727384874, + "learning_rate": 1.1296332364220567e-05, + "loss": 1.591, + "step": 78981 + }, + { + "epoch": 0.95, + "grad_norm": 12.238679029767228, + "learning_rate": 1.1295753113257949e-05, + "loss": 1.9586, + "step": 78984 + }, + { + "epoch": 0.95, + "grad_norm": 5.246177761538569, + "learning_rate": 1.1295173857873396e-05, + "loss": 1.526, + "step": 78987 + }, + { + "epoch": 0.95, + "grad_norm": 5.5462579743296825, + "learning_rate": 1.1294594598068875e-05, + "loss": 1.5723, + "step": 78990 + }, + { + "epoch": 0.95, + "grad_norm": 3.0509664372191, + "learning_rate": 1.129401533384637e-05, + "loss": 1.3122, + "step": 78993 + }, + { + "epoch": 0.95, + "grad_norm": 6.3720687243527, + "learning_rate": 1.1293436065207864e-05, + "loss": 1.362, + "step": 78996 + }, + { + "epoch": 0.95, + "grad_norm": 5.144630145396441, + "learning_rate": 1.1292856792155319e-05, + "loss": 1.4081, + "step": 78999 + }, + { + "epoch": 0.95, + "grad_norm": 18.81851610025398, + "learning_rate": 1.1292277514690721e-05, + "loss": 1.3861, + "step": 79002 + }, + { + "epoch": 0.95, + "grad_norm": 43.47808953820299, + "learning_rate": 1.1291698232816047e-05, + "loss": 1.3078, + "step": 79005 + }, + { + "epoch": 0.95, + "grad_norm": 3.456254344747846, + "learning_rate": 1.1291118946533273e-05, + "loss": 1.5894, + "step": 79008 + }, + { + "epoch": 0.95, + "grad_norm": 5.677394761692514, + "learning_rate": 1.1290539655844372e-05, + "loss": 1.1885, + "step": 79011 + }, + { + "epoch": 0.95, + "grad_norm": 19.81775656865898, + "learning_rate": 1.1289960360751326e-05, + "loss": 1.0492, + "step": 79014 + }, + { + "epoch": 0.95, + "grad_norm": 11.403704910856352, + "learning_rate": 1.128938106125611e-05, + "loss": 1.1156, + "step": 79017 + }, + { + "epoch": 0.95, + "grad_norm": 24.914409075155216, + "learning_rate": 1.1288801757360699e-05, + "loss": 1.2416, + "step": 79020 + }, + { + "epoch": 0.95, + "grad_norm": 8.574128506620637, + "learning_rate": 1.1288222449067075e-05, + "loss": 1.3201, + "step": 79023 + }, + { + "epoch": 0.95, + "grad_norm": 13.65832047991241, + "learning_rate": 1.1287643136377207e-05, + "loss": 1.2233, + "step": 79026 + }, + { + "epoch": 0.95, + "grad_norm": 7.748973859487625, + "learning_rate": 1.1287063819293082e-05, + "loss": 1.2659, + "step": 79029 + }, + { + "epoch": 0.95, + "grad_norm": 7.373529032760566, + "learning_rate": 1.1286484497816668e-05, + "loss": 1.3854, + "step": 79032 + }, + { + "epoch": 0.95, + "grad_norm": 11.017858347766188, + "learning_rate": 1.1285905171949948e-05, + "loss": 1.012, + "step": 79035 + }, + { + "epoch": 0.95, + "grad_norm": 8.605661338326666, + "learning_rate": 1.1285325841694897e-05, + "loss": 1.2522, + "step": 79038 + }, + { + "epoch": 0.95, + "grad_norm": 13.839476236395363, + "learning_rate": 1.1284746507053492e-05, + "loss": 1.3899, + "step": 79041 + }, + { + "epoch": 0.95, + "grad_norm": 15.074064924613962, + "learning_rate": 1.1284167168027707e-05, + "loss": 1.0265, + "step": 79044 + }, + { + "epoch": 0.95, + "grad_norm": 7.6452489516216335, + "learning_rate": 1.1283587824619521e-05, + "loss": 1.183, + "step": 79047 + }, + { + "epoch": 0.95, + "grad_norm": 24.76320452229208, + "learning_rate": 1.1283008476830918e-05, + "loss": 1.2295, + "step": 79050 + }, + { + "epoch": 0.95, + "grad_norm": 15.042424731564063, + "learning_rate": 1.1282429124663868e-05, + "loss": 1.4532, + "step": 79053 + }, + { + "epoch": 0.95, + "grad_norm": 14.681685346637883, + "learning_rate": 1.1281849768120346e-05, + "loss": 1.3879, + "step": 79056 + }, + { + "epoch": 0.95, + "grad_norm": 15.80610766507596, + "learning_rate": 1.1281270407202336e-05, + "loss": 1.4574, + "step": 79059 + }, + { + "epoch": 0.95, + "grad_norm": 15.646195670894086, + "learning_rate": 1.128069104191181e-05, + "loss": 1.5099, + "step": 79062 + }, + { + "epoch": 0.95, + "grad_norm": 94.20868570040096, + "learning_rate": 1.1280111672250744e-05, + "loss": 1.4163, + "step": 79065 + }, + { + "epoch": 0.95, + "grad_norm": 7.860924089763627, + "learning_rate": 1.127953229822112e-05, + "loss": 1.0666, + "step": 79068 + }, + { + "epoch": 0.95, + "grad_norm": 7.1016145881050905, + "learning_rate": 1.1278952919824914e-05, + "loss": 1.1274, + "step": 79071 + }, + { + "epoch": 0.95, + "grad_norm": 6.910389797496105, + "learning_rate": 1.1278373537064103e-05, + "loss": 1.2543, + "step": 79074 + }, + { + "epoch": 0.95, + "grad_norm": 3.2152362560101886, + "learning_rate": 1.1277794149940662e-05, + "loss": 1.0104, + "step": 79077 + }, + { + "epoch": 0.95, + "grad_norm": 33.99644084877288, + "learning_rate": 1.1277214758456572e-05, + "loss": 1.1196, + "step": 79080 + }, + { + "epoch": 0.95, + "grad_norm": 10.984183821762525, + "learning_rate": 1.1276635362613806e-05, + "loss": 0.9911, + "step": 79083 + }, + { + "epoch": 0.95, + "grad_norm": 9.213521061234353, + "learning_rate": 1.1276055962414343e-05, + "loss": 1.1006, + "step": 79086 + }, + { + "epoch": 0.95, + "grad_norm": 16.133734205721684, + "learning_rate": 1.1275476557860161e-05, + "loss": 1.5844, + "step": 79089 + }, + { + "epoch": 0.95, + "grad_norm": 4.281342681498471, + "learning_rate": 1.1274897148953239e-05, + "loss": 1.4566, + "step": 79092 + }, + { + "epoch": 0.95, + "grad_norm": 6.569224354539641, + "learning_rate": 1.127431773569555e-05, + "loss": 1.1754, + "step": 79095 + }, + { + "epoch": 0.95, + "grad_norm": 10.500737139750305, + "learning_rate": 1.1273738318089072e-05, + "loss": 1.4635, + "step": 79098 + }, + { + "epoch": 0.95, + "grad_norm": 30.757289409027823, + "learning_rate": 1.1273158896135787e-05, + "loss": 1.105, + "step": 79101 + }, + { + "epoch": 0.95, + "grad_norm": 9.597528650323222, + "learning_rate": 1.127257946983767e-05, + "loss": 1.5047, + "step": 79104 + }, + { + "epoch": 0.95, + "grad_norm": 69.46710707253747, + "learning_rate": 1.1272000039196693e-05, + "loss": 1.2455, + "step": 79107 + }, + { + "epoch": 0.95, + "grad_norm": 12.649899185585975, + "learning_rate": 1.1271420604214842e-05, + "loss": 1.1228, + "step": 79110 + }, + { + "epoch": 0.95, + "grad_norm": 25.4197773110971, + "learning_rate": 1.127084116489409e-05, + "loss": 1.4135, + "step": 79113 + }, + { + "epoch": 0.95, + "grad_norm": 24.501895249353307, + "learning_rate": 1.1270261721236414e-05, + "loss": 1.5951, + "step": 79116 + }, + { + "epoch": 0.95, + "grad_norm": 54.569929958122536, + "learning_rate": 1.1269682273243792e-05, + "loss": 1.6641, + "step": 79119 + }, + { + "epoch": 0.95, + "grad_norm": 17.010715238922916, + "learning_rate": 1.1269102820918202e-05, + "loss": 1.4751, + "step": 79122 + }, + { + "epoch": 0.95, + "grad_norm": 7.29396359429788, + "learning_rate": 1.1268523364261623e-05, + "loss": 1.1512, + "step": 79125 + }, + { + "epoch": 0.95, + "grad_norm": 81.47169349167478, + "learning_rate": 1.1267943903276028e-05, + "loss": 1.4945, + "step": 79128 + }, + { + "epoch": 0.95, + "grad_norm": 15.792975322821555, + "learning_rate": 1.12673644379634e-05, + "loss": 1.1519, + "step": 79131 + }, + { + "epoch": 0.95, + "grad_norm": 10.913529422272887, + "learning_rate": 1.1266784968325712e-05, + "loss": 1.1201, + "step": 79134 + }, + { + "epoch": 0.95, + "grad_norm": 2.3629879925336184, + "learning_rate": 1.1266205494364943e-05, + "loss": 1.5114, + "step": 79137 + }, + { + "epoch": 0.95, + "grad_norm": 7.165308761651588, + "learning_rate": 1.126562601608307e-05, + "loss": 1.1262, + "step": 79140 + }, + { + "epoch": 0.95, + "grad_norm": 6.479757055440886, + "learning_rate": 1.1265046533482074e-05, + "loss": 1.2878, + "step": 79143 + }, + { + "epoch": 0.95, + "grad_norm": 14.934715237273402, + "learning_rate": 1.1264467046563927e-05, + "loss": 1.4122, + "step": 79146 + }, + { + "epoch": 0.95, + "grad_norm": 11.320389460732892, + "learning_rate": 1.1263887555330613e-05, + "loss": 1.5661, + "step": 79149 + }, + { + "epoch": 0.95, + "grad_norm": 6.521219777507231, + "learning_rate": 1.1263308059784102e-05, + "loss": 1.5456, + "step": 79152 + }, + { + "epoch": 0.95, + "grad_norm": 7.212071853560859, + "learning_rate": 1.1262728559926378e-05, + "loss": 1.101, + "step": 79155 + }, + { + "epoch": 0.95, + "grad_norm": 11.305503615007957, + "learning_rate": 1.1262149055759414e-05, + "loss": 1.2491, + "step": 79158 + }, + { + "epoch": 0.95, + "grad_norm": 11.723994681991211, + "learning_rate": 1.1261569547285191e-05, + "loss": 1.3769, + "step": 79161 + }, + { + "epoch": 0.95, + "grad_norm": 10.696299475144496, + "learning_rate": 1.1260990034505685e-05, + "loss": 1.4917, + "step": 79164 + }, + { + "epoch": 0.95, + "grad_norm": 4.596596976781942, + "learning_rate": 1.1260410517422877e-05, + "loss": 1.3464, + "step": 79167 + }, + { + "epoch": 0.95, + "grad_norm": 7.782957663009039, + "learning_rate": 1.125983099603874e-05, + "loss": 1.2164, + "step": 79170 + }, + { + "epoch": 0.95, + "grad_norm": 12.073745395015107, + "learning_rate": 1.1259251470355253e-05, + "loss": 1.524, + "step": 79173 + }, + { + "epoch": 0.95, + "grad_norm": 31.344997721154378, + "learning_rate": 1.1258671940374394e-05, + "loss": 1.1264, + "step": 79176 + }, + { + "epoch": 0.95, + "grad_norm": 20.17202107408646, + "learning_rate": 1.1258092406098145e-05, + "loss": 1.3377, + "step": 79179 + }, + { + "epoch": 0.95, + "grad_norm": 6.045417415058041, + "learning_rate": 1.1257512867528474e-05, + "loss": 1.1405, + "step": 79182 + }, + { + "epoch": 0.95, + "grad_norm": 19.894607917016714, + "learning_rate": 1.1256933324667367e-05, + "loss": 1.3183, + "step": 79185 + }, + { + "epoch": 0.95, + "grad_norm": 10.48711397724247, + "learning_rate": 1.1256353777516802e-05, + "loss": 1.3345, + "step": 79188 + }, + { + "epoch": 0.95, + "grad_norm": 12.22976351771054, + "learning_rate": 1.1255774226078752e-05, + "loss": 1.4578, + "step": 79191 + }, + { + "epoch": 0.95, + "grad_norm": 12.186560005515462, + "learning_rate": 1.1255194670355194e-05, + "loss": 1.2009, + "step": 79194 + }, + { + "epoch": 0.95, + "grad_norm": 30.165965102726556, + "learning_rate": 1.1254615110348113e-05, + "loss": 1.1514, + "step": 79197 + }, + { + "epoch": 0.95, + "grad_norm": 48.93257859443523, + "learning_rate": 1.1254035546059481e-05, + "loss": 1.428, + "step": 79200 + }, + { + "epoch": 0.95, + "grad_norm": 8.667194077009022, + "learning_rate": 1.1253455977491278e-05, + "loss": 1.1956, + "step": 79203 + }, + { + "epoch": 0.95, + "grad_norm": 9.829528074615828, + "learning_rate": 1.1252876404645479e-05, + "loss": 1.2967, + "step": 79206 + }, + { + "epoch": 0.95, + "grad_norm": 37.78864797012118, + "learning_rate": 1.1252296827524065e-05, + "loss": 1.4262, + "step": 79209 + }, + { + "epoch": 0.95, + "grad_norm": 5.908583752820597, + "learning_rate": 1.1251717246129015e-05, + "loss": 1.0583, + "step": 79212 + }, + { + "epoch": 0.95, + "grad_norm": 10.461485114774925, + "learning_rate": 1.1251137660462302e-05, + "loss": 1.6726, + "step": 79215 + }, + { + "epoch": 0.95, + "grad_norm": 16.40244319291179, + "learning_rate": 1.125055807052591e-05, + "loss": 1.5075, + "step": 79218 + }, + { + "epoch": 0.95, + "grad_norm": 6.258834610485345, + "learning_rate": 1.1249978476321813e-05, + "loss": 1.2926, + "step": 79221 + }, + { + "epoch": 0.95, + "grad_norm": 15.312941451101251, + "learning_rate": 1.124939887785199e-05, + "loss": 1.1813, + "step": 79224 + }, + { + "epoch": 0.95, + "grad_norm": 26.061945037869656, + "learning_rate": 1.1248819275118414e-05, + "loss": 1.4507, + "step": 79227 + }, + { + "epoch": 0.95, + "grad_norm": 5.770293856898047, + "learning_rate": 1.1248239668123072e-05, + "loss": 1.4081, + "step": 79230 + }, + { + "epoch": 0.95, + "grad_norm": 17.5647182085003, + "learning_rate": 1.1247660056867937e-05, + "loss": 1.1701, + "step": 79233 + }, + { + "epoch": 0.95, + "grad_norm": 15.318507326367001, + "learning_rate": 1.1247080441354986e-05, + "loss": 1.2045, + "step": 79236 + }, + { + "epoch": 0.95, + "grad_norm": 17.833774718780216, + "learning_rate": 1.1246500821586202e-05, + "loss": 1.3529, + "step": 79239 + }, + { + "epoch": 0.95, + "grad_norm": 30.725407397852887, + "learning_rate": 1.1245921197563556e-05, + "loss": 1.4003, + "step": 79242 + }, + { + "epoch": 0.95, + "grad_norm": 33.115503688838594, + "learning_rate": 1.1245341569289032e-05, + "loss": 1.5416, + "step": 79245 + }, + { + "epoch": 0.95, + "grad_norm": 20.49877697251467, + "learning_rate": 1.1244761936764603e-05, + "loss": 1.7111, + "step": 79248 + }, + { + "epoch": 0.95, + "grad_norm": 17.437235972939494, + "learning_rate": 1.1244182299992255e-05, + "loss": 1.197, + "step": 79251 + }, + { + "epoch": 0.95, + "grad_norm": 7.4323398141018675, + "learning_rate": 1.1243602658973958e-05, + "loss": 1.4508, + "step": 79254 + }, + { + "epoch": 0.95, + "grad_norm": 20.867962506626643, + "learning_rate": 1.1243023013711693e-05, + "loss": 1.3186, + "step": 79257 + }, + { + "epoch": 0.95, + "grad_norm": 10.265849568160728, + "learning_rate": 1.1242443364207439e-05, + "loss": 1.2152, + "step": 79260 + }, + { + "epoch": 0.95, + "grad_norm": 28.188755097572177, + "learning_rate": 1.1241863710463173e-05, + "loss": 1.1525, + "step": 79263 + }, + { + "epoch": 0.95, + "grad_norm": 24.685251390644616, + "learning_rate": 1.1241284052480872e-05, + "loss": 1.2615, + "step": 79266 + }, + { + "epoch": 0.95, + "grad_norm": 13.560171958990079, + "learning_rate": 1.1240704390262516e-05, + "loss": 1.9447, + "step": 79269 + }, + { + "epoch": 0.95, + "grad_norm": 14.813981965042373, + "learning_rate": 1.1240124723810084e-05, + "loss": 1.1636, + "step": 79272 + }, + { + "epoch": 0.95, + "grad_norm": 11.681054452638735, + "learning_rate": 1.1239545053125554e-05, + "loss": 1.545, + "step": 79275 + }, + { + "epoch": 0.95, + "grad_norm": 20.789893220459113, + "learning_rate": 1.1238965378210902e-05, + "loss": 1.5574, + "step": 79278 + }, + { + "epoch": 0.95, + "grad_norm": 16.261814707072787, + "learning_rate": 1.1238385699068104e-05, + "loss": 1.2064, + "step": 79281 + }, + { + "epoch": 0.95, + "grad_norm": 15.764577247096401, + "learning_rate": 1.1237806015699147e-05, + "loss": 1.2855, + "step": 79284 + }, + { + "epoch": 0.95, + "grad_norm": 9.702104391290751, + "learning_rate": 1.1237226328106e-05, + "loss": 1.6883, + "step": 79287 + }, + { + "epoch": 0.95, + "grad_norm": 17.26101638472248, + "learning_rate": 1.1236646636290646e-05, + "loss": 1.6934, + "step": 79290 + }, + { + "epoch": 0.95, + "grad_norm": 30.479911720048328, + "learning_rate": 1.1236066940255065e-05, + "loss": 1.1089, + "step": 79293 + }, + { + "epoch": 0.95, + "grad_norm": 11.263259531158022, + "learning_rate": 1.1235487240001233e-05, + "loss": 1.3056, + "step": 79296 + }, + { + "epoch": 0.95, + "grad_norm": 5.332774232380375, + "learning_rate": 1.1234907535531125e-05, + "loss": 1.094, + "step": 79299 + }, + { + "epoch": 0.95, + "grad_norm": 19.312315045916915, + "learning_rate": 1.1234327826846723e-05, + "loss": 1.1489, + "step": 79302 + }, + { + "epoch": 0.95, + "grad_norm": 9.71089653885029, + "learning_rate": 1.1233748113950004e-05, + "loss": 1.6232, + "step": 79305 + }, + { + "epoch": 0.95, + "grad_norm": 4.984127866297618, + "learning_rate": 1.123316839684295e-05, + "loss": 1.2614, + "step": 79308 + }, + { + "epoch": 0.95, + "grad_norm": 25.96434365472898, + "learning_rate": 1.1232588675527536e-05, + "loss": 1.164, + "step": 79311 + }, + { + "epoch": 0.95, + "grad_norm": 13.479043474520013, + "learning_rate": 1.123200895000574e-05, + "loss": 1.115, + "step": 79314 + }, + { + "epoch": 0.95, + "grad_norm": 10.417121663808219, + "learning_rate": 1.1231429220279539e-05, + "loss": 1.4074, + "step": 79317 + }, + { + "epoch": 0.95, + "grad_norm": 12.622620495987507, + "learning_rate": 1.123084948635092e-05, + "loss": 1.4746, + "step": 79320 + }, + { + "epoch": 0.95, + "grad_norm": 3.980713925349812, + "learning_rate": 1.1230269748221846e-05, + "loss": 1.1854, + "step": 79323 + }, + { + "epoch": 0.95, + "grad_norm": 7.8420836320720735, + "learning_rate": 1.1229690005894312e-05, + "loss": 1.3564, + "step": 79326 + }, + { + "epoch": 0.95, + "grad_norm": 7.947873398680663, + "learning_rate": 1.1229110259370285e-05, + "loss": 1.2688, + "step": 79329 + }, + { + "epoch": 0.95, + "grad_norm": 17.229603696205274, + "learning_rate": 1.122853050865175e-05, + "loss": 1.6771, + "step": 79332 + }, + { + "epoch": 0.95, + "grad_norm": 8.306551114531434, + "learning_rate": 1.1227950753740679e-05, + "loss": 1.6667, + "step": 79335 + }, + { + "epoch": 0.95, + "grad_norm": 7.895737798340753, + "learning_rate": 1.122737099463906e-05, + "loss": 1.7066, + "step": 79338 + }, + { + "epoch": 0.95, + "grad_norm": 4.1603207909725315, + "learning_rate": 1.1226791231348861e-05, + "loss": 1.3074, + "step": 79341 + }, + { + "epoch": 0.95, + "grad_norm": 70.32744337599597, + "learning_rate": 1.1226211463872065e-05, + "loss": 1.3703, + "step": 79344 + }, + { + "epoch": 0.95, + "grad_norm": 3.163560025321647, + "learning_rate": 1.1225631692210655e-05, + "loss": 1.1983, + "step": 79347 + }, + { + "epoch": 0.95, + "grad_norm": 7.9693233715959675, + "learning_rate": 1.1225051916366603e-05, + "loss": 1.1882, + "step": 79350 + }, + { + "epoch": 0.95, + "grad_norm": 23.694235953317484, + "learning_rate": 1.122447213634189e-05, + "loss": 1.4043, + "step": 79353 + }, + { + "epoch": 0.95, + "grad_norm": 14.576429344549606, + "learning_rate": 1.1223892352138495e-05, + "loss": 1.0892, + "step": 79356 + }, + { + "epoch": 0.95, + "grad_norm": 6.137983676020402, + "learning_rate": 1.1223312563758397e-05, + "loss": 1.104, + "step": 79359 + }, + { + "epoch": 0.95, + "grad_norm": 23.97285017225866, + "learning_rate": 1.1222732771203576e-05, + "loss": 1.4668, + "step": 79362 + }, + { + "epoch": 0.95, + "grad_norm": 6.275310618263059, + "learning_rate": 1.1222152974476004e-05, + "loss": 1.3182, + "step": 79365 + }, + { + "epoch": 0.95, + "grad_norm": 6.5189975578468005, + "learning_rate": 1.1221573173577665e-05, + "loss": 1.0119, + "step": 79368 + }, + { + "epoch": 0.95, + "grad_norm": 8.756839380455164, + "learning_rate": 1.1220993368510538e-05, + "loss": 1.159, + "step": 79371 + }, + { + "epoch": 0.95, + "grad_norm": 6.233718925178835, + "learning_rate": 1.1220413559276601e-05, + "loss": 1.436, + "step": 79374 + }, + { + "epoch": 0.95, + "grad_norm": 8.691520875504345, + "learning_rate": 1.121983374587783e-05, + "loss": 1.3402, + "step": 79377 + }, + { + "epoch": 0.95, + "grad_norm": 19.566643547445835, + "learning_rate": 1.1219253928316207e-05, + "loss": 1.5333, + "step": 79380 + }, + { + "epoch": 0.95, + "grad_norm": 4.932944659277154, + "learning_rate": 1.121867410659371e-05, + "loss": 1.2893, + "step": 79383 + }, + { + "epoch": 0.95, + "grad_norm": 7.941532014490775, + "learning_rate": 1.1218094280712316e-05, + "loss": 1.4146, + "step": 79386 + }, + { + "epoch": 0.95, + "grad_norm": 16.829434896673884, + "learning_rate": 1.1217514450674007e-05, + "loss": 1.3032, + "step": 79389 + }, + { + "epoch": 0.95, + "grad_norm": 23.481922438969598, + "learning_rate": 1.1216934616480761e-05, + "loss": 1.7665, + "step": 79392 + }, + { + "epoch": 0.95, + "grad_norm": 15.376056703356928, + "learning_rate": 1.1216354778134551e-05, + "loss": 1.3784, + "step": 79395 + }, + { + "epoch": 0.95, + "grad_norm": 8.490778879300885, + "learning_rate": 1.121577493563736e-05, + "loss": 1.3505, + "step": 79398 + }, + { + "epoch": 0.95, + "grad_norm": 16.859104623012158, + "learning_rate": 1.121519508899117e-05, + "loss": 1.4401, + "step": 79401 + }, + { + "epoch": 0.95, + "grad_norm": 16.982346018113756, + "learning_rate": 1.1214615238197959e-05, + "loss": 1.4961, + "step": 79404 + }, + { + "epoch": 0.95, + "grad_norm": 7.360008933933341, + "learning_rate": 1.12140353832597e-05, + "loss": 1.4242, + "step": 79407 + }, + { + "epoch": 0.95, + "grad_norm": 9.473381520029903, + "learning_rate": 1.1213455524178376e-05, + "loss": 1.253, + "step": 79410 + }, + { + "epoch": 0.95, + "grad_norm": 9.001631795662263, + "learning_rate": 1.1212875660955969e-05, + "loss": 0.9928, + "step": 79413 + }, + { + "epoch": 0.95, + "grad_norm": 100.31349709618391, + "learning_rate": 1.1212295793594447e-05, + "loss": 1.4445, + "step": 79416 + }, + { + "epoch": 0.95, + "grad_norm": 11.03652295568675, + "learning_rate": 1.1211715922095803e-05, + "loss": 1.2335, + "step": 79419 + }, + { + "epoch": 0.96, + "grad_norm": 10.932274782444583, + "learning_rate": 1.1211136046462004e-05, + "loss": 1.2586, + "step": 79422 + }, + { + "epoch": 0.96, + "grad_norm": 132.985663287573, + "learning_rate": 1.1210556166695037e-05, + "loss": 1.5572, + "step": 79425 + }, + { + "epoch": 0.96, + "grad_norm": 9.924907102616645, + "learning_rate": 1.1209976282796878e-05, + "loss": 1.4367, + "step": 79428 + }, + { + "epoch": 0.96, + "grad_norm": 22.255092570182516, + "learning_rate": 1.1209396394769502e-05, + "loss": 1.7085, + "step": 79431 + }, + { + "epoch": 0.96, + "grad_norm": 9.821384331904275, + "learning_rate": 1.12088165026149e-05, + "loss": 1.1904, + "step": 79434 + }, + { + "epoch": 0.96, + "grad_norm": 39.6528712742858, + "learning_rate": 1.1208236606335034e-05, + "loss": 1.3145, + "step": 79437 + }, + { + "epoch": 0.96, + "grad_norm": 22.81898166084217, + "learning_rate": 1.1207656705931896e-05, + "loss": 1.64, + "step": 79440 + }, + { + "epoch": 0.96, + "grad_norm": 26.525833420217563, + "learning_rate": 1.1207076801407457e-05, + "loss": 1.3579, + "step": 79443 + }, + { + "epoch": 0.96, + "grad_norm": 10.287773371318032, + "learning_rate": 1.1206496892763705e-05, + "loss": 1.2518, + "step": 79446 + }, + { + "epoch": 0.96, + "grad_norm": 10.987208888888318, + "learning_rate": 1.120591698000261e-05, + "loss": 1.1888, + "step": 79449 + }, + { + "epoch": 0.96, + "grad_norm": 14.114625896311145, + "learning_rate": 1.1205337063126154e-05, + "loss": 1.2612, + "step": 79452 + }, + { + "epoch": 0.96, + "grad_norm": 14.501977026368307, + "learning_rate": 1.120475714213632e-05, + "loss": 1.494, + "step": 79455 + }, + { + "epoch": 0.96, + "grad_norm": 8.211947492465823, + "learning_rate": 1.1204177217035082e-05, + "loss": 1.1588, + "step": 79458 + }, + { + "epoch": 0.96, + "grad_norm": 6.323113690180361, + "learning_rate": 1.1203597287824422e-05, + "loss": 1.2553, + "step": 79461 + }, + { + "epoch": 0.96, + "grad_norm": 18.935401594991347, + "learning_rate": 1.1203017354506313e-05, + "loss": 1.5533, + "step": 79464 + }, + { + "epoch": 0.96, + "grad_norm": 19.277201780026584, + "learning_rate": 1.1202437417082748e-05, + "loss": 1.5712, + "step": 79467 + }, + { + "epoch": 0.96, + "grad_norm": 14.173279142544475, + "learning_rate": 1.120185747555569e-05, + "loss": 1.3501, + "step": 79470 + }, + { + "epoch": 0.96, + "grad_norm": 4.370224416802547, + "learning_rate": 1.1201277529927128e-05, + "loss": 1.3583, + "step": 79473 + }, + { + "epoch": 0.96, + "grad_norm": 11.957555970433448, + "learning_rate": 1.1200697580199039e-05, + "loss": 1.3252, + "step": 79476 + }, + { + "epoch": 0.96, + "grad_norm": 7.20280308769144, + "learning_rate": 1.1200117626373398e-05, + "loss": 1.1268, + "step": 79479 + }, + { + "epoch": 0.96, + "grad_norm": 13.280708918319485, + "learning_rate": 1.1199537668452191e-05, + "loss": 1.3943, + "step": 79482 + }, + { + "epoch": 0.96, + "grad_norm": 10.151754253438487, + "learning_rate": 1.1198957706437394e-05, + "loss": 1.1748, + "step": 79485 + }, + { + "epoch": 0.96, + "grad_norm": 14.312219112702447, + "learning_rate": 1.1198377740330989e-05, + "loss": 1.2045, + "step": 79488 + }, + { + "epoch": 0.96, + "grad_norm": 9.337815054773134, + "learning_rate": 1.1197797770134948e-05, + "loss": 1.457, + "step": 79491 + }, + { + "epoch": 0.96, + "grad_norm": 24.124906490665108, + "learning_rate": 1.1197217795851256e-05, + "loss": 1.2016, + "step": 79494 + }, + { + "epoch": 0.96, + "grad_norm": 65.35845546389132, + "learning_rate": 1.1196637817481892e-05, + "loss": 1.4183, + "step": 79497 + }, + { + "epoch": 0.96, + "grad_norm": 8.840157011850911, + "learning_rate": 1.1196057835028834e-05, + "loss": 1.3148, + "step": 79500 + }, + { + "epoch": 0.96, + "grad_norm": 19.02319088554088, + "learning_rate": 1.1195477848494059e-05, + "loss": 1.4579, + "step": 79503 + }, + { + "epoch": 0.96, + "grad_norm": 15.050883613326812, + "learning_rate": 1.1194897857879548e-05, + "loss": 1.5772, + "step": 79506 + }, + { + "epoch": 0.96, + "grad_norm": 23.092086407508614, + "learning_rate": 1.1194317863187286e-05, + "loss": 1.5683, + "step": 79509 + }, + { + "epoch": 0.96, + "grad_norm": 5.064445922547523, + "learning_rate": 1.1193737864419243e-05, + "loss": 1.6188, + "step": 79512 + }, + { + "epoch": 0.96, + "grad_norm": 6.04515024192456, + "learning_rate": 1.1193157861577405e-05, + "loss": 1.3917, + "step": 79515 + }, + { + "epoch": 0.96, + "grad_norm": 14.365912692561439, + "learning_rate": 1.1192577854663749e-05, + "loss": 1.3499, + "step": 79518 + }, + { + "epoch": 0.96, + "grad_norm": 11.640193567462086, + "learning_rate": 1.1191997843680253e-05, + "loss": 1.2607, + "step": 79521 + }, + { + "epoch": 0.96, + "grad_norm": 30.305480499085036, + "learning_rate": 1.1191417828628897e-05, + "loss": 1.2348, + "step": 79524 + }, + { + "epoch": 0.96, + "grad_norm": 18.481150799292294, + "learning_rate": 1.1190837809511664e-05, + "loss": 1.1567, + "step": 79527 + }, + { + "epoch": 0.96, + "grad_norm": 3.050260755195484, + "learning_rate": 1.1190257786330527e-05, + "loss": 1.322, + "step": 79530 + }, + { + "epoch": 0.96, + "grad_norm": 64.78699277300636, + "learning_rate": 1.1189677759087469e-05, + "loss": 1.2881, + "step": 79533 + }, + { + "epoch": 0.96, + "grad_norm": 2.9093735363479354, + "learning_rate": 1.1189097727784472e-05, + "loss": 1.286, + "step": 79536 + }, + { + "epoch": 0.96, + "grad_norm": 11.114527874787173, + "learning_rate": 1.1188517692423511e-05, + "loss": 1.2826, + "step": 79539 + }, + { + "epoch": 0.96, + "grad_norm": 14.007443753151888, + "learning_rate": 1.118793765300657e-05, + "loss": 1.258, + "step": 79542 + }, + { + "epoch": 0.96, + "grad_norm": 57.9618008864522, + "learning_rate": 1.1187357609535623e-05, + "loss": 1.2096, + "step": 79545 + }, + { + "epoch": 0.96, + "grad_norm": 52.70700702858153, + "learning_rate": 1.1186777562012654e-05, + "loss": 1.3164, + "step": 79548 + }, + { + "epoch": 0.96, + "grad_norm": 5.349615905053985, + "learning_rate": 1.118619751043964e-05, + "loss": 1.3593, + "step": 79551 + }, + { + "epoch": 0.96, + "grad_norm": 12.576924862229196, + "learning_rate": 1.1185617454818561e-05, + "loss": 1.4037, + "step": 79554 + }, + { + "epoch": 0.96, + "grad_norm": 9.771887005735303, + "learning_rate": 1.1185037395151395e-05, + "loss": 1.2349, + "step": 79557 + }, + { + "epoch": 0.96, + "grad_norm": 6.642447746332852, + "learning_rate": 1.1184457331440121e-05, + "loss": 1.3874, + "step": 79560 + }, + { + "epoch": 0.96, + "grad_norm": 5.7922915443338425, + "learning_rate": 1.1183877263686729e-05, + "loss": 1.1977, + "step": 79563 + }, + { + "epoch": 0.96, + "grad_norm": 13.607809967056562, + "learning_rate": 1.1183297191893183e-05, + "loss": 1.2307, + "step": 79566 + }, + { + "epoch": 0.96, + "grad_norm": 14.121295590271604, + "learning_rate": 1.1182717116061475e-05, + "loss": 1.3277, + "step": 79569 + }, + { + "epoch": 0.96, + "grad_norm": 16.731598098148517, + "learning_rate": 1.1182137036193576e-05, + "loss": 1.5128, + "step": 79572 + }, + { + "epoch": 0.96, + "grad_norm": 22.173990913585314, + "learning_rate": 1.1181556952291473e-05, + "loss": 1.3041, + "step": 79575 + }, + { + "epoch": 0.96, + "grad_norm": 5.991181799695892, + "learning_rate": 1.1180976864357135e-05, + "loss": 1.241, + "step": 79578 + }, + { + "epoch": 0.96, + "grad_norm": 11.298829590694929, + "learning_rate": 1.1180396772392554e-05, + "loss": 1.347, + "step": 79581 + }, + { + "epoch": 0.96, + "grad_norm": 6.773123566509448, + "learning_rate": 1.1179816676399701e-05, + "loss": 1.6229, + "step": 79584 + }, + { + "epoch": 0.96, + "grad_norm": 25.16112979319133, + "learning_rate": 1.117923657638056e-05, + "loss": 1.5765, + "step": 79587 + }, + { + "epoch": 0.96, + "grad_norm": 20.106826085700998, + "learning_rate": 1.117865647233711e-05, + "loss": 1.4933, + "step": 79590 + }, + { + "epoch": 0.96, + "grad_norm": 22.731368548132103, + "learning_rate": 1.1178076364271328e-05, + "loss": 1.2926, + "step": 79593 + }, + { + "epoch": 0.96, + "grad_norm": 16.525595308066425, + "learning_rate": 1.11774962521852e-05, + "loss": 1.458, + "step": 79596 + }, + { + "epoch": 0.96, + "grad_norm": 10.856729065982663, + "learning_rate": 1.1176916136080695e-05, + "loss": 1.0503, + "step": 79599 + }, + { + "epoch": 0.96, + "grad_norm": 51.289940565699204, + "learning_rate": 1.1176336015959803e-05, + "loss": 1.199, + "step": 79602 + }, + { + "epoch": 0.96, + "grad_norm": 17.586557058633648, + "learning_rate": 1.11757558918245e-05, + "loss": 1.5544, + "step": 79605 + }, + { + "epoch": 0.96, + "grad_norm": 19.192420614004835, + "learning_rate": 1.1175175763676766e-05, + "loss": 1.51, + "step": 79608 + }, + { + "epoch": 0.96, + "grad_norm": 14.849237681787358, + "learning_rate": 1.117459563151858e-05, + "loss": 1.2346, + "step": 79611 + }, + { + "epoch": 0.96, + "grad_norm": 14.539857693536327, + "learning_rate": 1.1174015495351923e-05, + "loss": 1.0624, + "step": 79614 + }, + { + "epoch": 0.96, + "grad_norm": 11.341783735370216, + "learning_rate": 1.1173435355178771e-05, + "loss": 0.9743, + "step": 79617 + }, + { + "epoch": 0.96, + "grad_norm": 8.884950555407345, + "learning_rate": 1.1172855211001108e-05, + "loss": 1.3087, + "step": 79620 + }, + { + "epoch": 0.96, + "grad_norm": 10.080075930834177, + "learning_rate": 1.1172275062820911e-05, + "loss": 1.5287, + "step": 79623 + }, + { + "epoch": 0.96, + "grad_norm": 12.844651799542651, + "learning_rate": 1.1171694910640165e-05, + "loss": 1.4049, + "step": 79626 + }, + { + "epoch": 0.96, + "grad_norm": 35.7150225895987, + "learning_rate": 1.1171114754460846e-05, + "loss": 1.3516, + "step": 79629 + }, + { + "epoch": 0.96, + "grad_norm": 5.123164469969902, + "learning_rate": 1.1170534594284932e-05, + "loss": 1.429, + "step": 79632 + }, + { + "epoch": 0.96, + "grad_norm": 70.43874371408943, + "learning_rate": 1.1169954430114407e-05, + "loss": 1.3097, + "step": 79635 + }, + { + "epoch": 0.96, + "grad_norm": 14.019718593864132, + "learning_rate": 1.1169374261951248e-05, + "loss": 1.5508, + "step": 79638 + }, + { + "epoch": 0.96, + "grad_norm": 2.253098190632896, + "learning_rate": 1.1168794089797435e-05, + "loss": 1.3765, + "step": 79641 + }, + { + "epoch": 0.96, + "grad_norm": 22.416801494709883, + "learning_rate": 1.1168213913654951e-05, + "loss": 1.8099, + "step": 79644 + }, + { + "epoch": 0.96, + "grad_norm": 18.614579701581402, + "learning_rate": 1.1167633733525772e-05, + "loss": 1.3948, + "step": 79647 + }, + { + "epoch": 0.96, + "grad_norm": 6.501536636277624, + "learning_rate": 1.1167053549411882e-05, + "loss": 1.6539, + "step": 79650 + }, + { + "epoch": 0.96, + "grad_norm": 2.683078510483184, + "learning_rate": 1.1166473361315253e-05, + "loss": 1.0844, + "step": 79653 + }, + { + "epoch": 0.96, + "grad_norm": 9.958201448077906, + "learning_rate": 1.1165893169237876e-05, + "loss": 0.935, + "step": 79656 + }, + { + "epoch": 0.96, + "grad_norm": 3.2548116041487285, + "learning_rate": 1.1165312973181724e-05, + "loss": 1.1339, + "step": 79659 + }, + { + "epoch": 0.96, + "grad_norm": 19.266007511478765, + "learning_rate": 1.1164732773148778e-05, + "loss": 1.0873, + "step": 79662 + }, + { + "epoch": 0.96, + "grad_norm": 10.757218802573657, + "learning_rate": 1.1164152569141018e-05, + "loss": 1.5229, + "step": 79665 + }, + { + "epoch": 0.96, + "grad_norm": 3.6888779350506105, + "learning_rate": 1.1163572361160426e-05, + "loss": 1.3635, + "step": 79668 + }, + { + "epoch": 0.96, + "grad_norm": 9.486635684421843, + "learning_rate": 1.116299214920898e-05, + "loss": 1.261, + "step": 79671 + }, + { + "epoch": 0.96, + "grad_norm": 17.546088866498472, + "learning_rate": 1.1162411933288659e-05, + "loss": 1.2025, + "step": 79674 + }, + { + "epoch": 0.96, + "grad_norm": 24.014481509952066, + "learning_rate": 1.1161831713401447e-05, + "loss": 1.4313, + "step": 79677 + }, + { + "epoch": 0.96, + "grad_norm": 6.53947726595052, + "learning_rate": 1.1161251489549323e-05, + "loss": 1.2618, + "step": 79680 + }, + { + "epoch": 0.96, + "grad_norm": 3.122590233204833, + "learning_rate": 1.1160671261734265e-05, + "loss": 1.2963, + "step": 79683 + }, + { + "epoch": 0.96, + "grad_norm": 8.602361161372057, + "learning_rate": 1.1160091029958253e-05, + "loss": 1.4143, + "step": 79686 + }, + { + "epoch": 0.96, + "grad_norm": 101.80930068965233, + "learning_rate": 1.115951079422327e-05, + "loss": 1.512, + "step": 79689 + }, + { + "epoch": 0.96, + "grad_norm": 10.317919091387257, + "learning_rate": 1.1158930554531293e-05, + "loss": 1.1595, + "step": 79692 + }, + { + "epoch": 0.96, + "grad_norm": 7.861605229627184, + "learning_rate": 1.11583503108843e-05, + "loss": 1.2472, + "step": 79695 + }, + { + "epoch": 0.96, + "grad_norm": 16.17184800017516, + "learning_rate": 1.1157770063284279e-05, + "loss": 1.0037, + "step": 79698 + }, + { + "epoch": 0.96, + "grad_norm": 21.17683360778833, + "learning_rate": 1.1157189811733207e-05, + "loss": 1.4743, + "step": 79701 + }, + { + "epoch": 0.96, + "grad_norm": 49.91332849711456, + "learning_rate": 1.115660955623306e-05, + "loss": 1.2883, + "step": 79704 + }, + { + "epoch": 0.96, + "grad_norm": 14.555926920706694, + "learning_rate": 1.1156029296785822e-05, + "loss": 1.6193, + "step": 79707 + }, + { + "epoch": 0.96, + "grad_norm": 7.316138391205684, + "learning_rate": 1.1155449033393473e-05, + "loss": 1.4487, + "step": 79710 + }, + { + "epoch": 0.96, + "grad_norm": 8.139745376869106, + "learning_rate": 1.1154868766057991e-05, + "loss": 1.4037, + "step": 79713 + }, + { + "epoch": 0.96, + "grad_norm": 8.44707403822863, + "learning_rate": 1.115428849478136e-05, + "loss": 1.5365, + "step": 79716 + }, + { + "epoch": 0.96, + "grad_norm": 5.180150460273317, + "learning_rate": 1.1153708219565556e-05, + "loss": 1.4992, + "step": 79719 + }, + { + "epoch": 0.96, + "grad_norm": 7.687584004397149, + "learning_rate": 1.1153127940412562e-05, + "loss": 1.3376, + "step": 79722 + }, + { + "epoch": 0.96, + "grad_norm": 42.088014081116015, + "learning_rate": 1.115254765732436e-05, + "loss": 1.4671, + "step": 79725 + }, + { + "epoch": 0.96, + "grad_norm": 3.6124984133984994, + "learning_rate": 1.1151967370302925e-05, + "loss": 1.2938, + "step": 79728 + }, + { + "epoch": 0.96, + "grad_norm": 5.947108042104679, + "learning_rate": 1.1151387079350242e-05, + "loss": 1.5548, + "step": 79731 + }, + { + "epoch": 0.96, + "grad_norm": 6.744984195299092, + "learning_rate": 1.1150806784468288e-05, + "loss": 1.2586, + "step": 79734 + }, + { + "epoch": 0.96, + "grad_norm": 11.142116765716091, + "learning_rate": 1.1150226485659047e-05, + "loss": 1.2037, + "step": 79737 + }, + { + "epoch": 0.96, + "grad_norm": 3.818909127261761, + "learning_rate": 1.1149646182924497e-05, + "loss": 1.0073, + "step": 79740 + }, + { + "epoch": 0.96, + "grad_norm": 9.24352358416749, + "learning_rate": 1.1149065876266618e-05, + "loss": 1.1923, + "step": 79743 + }, + { + "epoch": 0.96, + "grad_norm": 16.12336689559153, + "learning_rate": 1.1148485565687392e-05, + "loss": 1.1975, + "step": 79746 + }, + { + "epoch": 0.96, + "grad_norm": 20.805569171429976, + "learning_rate": 1.1147905251188795e-05, + "loss": 1.506, + "step": 79749 + }, + { + "epoch": 0.96, + "grad_norm": 14.494880382727898, + "learning_rate": 1.1147324932772814e-05, + "loss": 1.4678, + "step": 79752 + }, + { + "epoch": 0.96, + "grad_norm": 88.96163367956763, + "learning_rate": 1.1146744610441427e-05, + "loss": 1.0736, + "step": 79755 + }, + { + "epoch": 0.96, + "grad_norm": 18.43157613389844, + "learning_rate": 1.1146164284196613e-05, + "loss": 1.6383, + "step": 79758 + }, + { + "epoch": 0.96, + "grad_norm": 15.26286518608441, + "learning_rate": 1.1145583954040351e-05, + "loss": 1.3888, + "step": 79761 + }, + { + "epoch": 0.96, + "grad_norm": 22.346029016109988, + "learning_rate": 1.1145003619974626e-05, + "loss": 1.5313, + "step": 79764 + }, + { + "epoch": 0.96, + "grad_norm": 29.73009166474725, + "learning_rate": 1.1144423282001414e-05, + "loss": 1.3583, + "step": 79767 + }, + { + "epoch": 0.96, + "grad_norm": 15.683989915892035, + "learning_rate": 1.1143842940122701e-05, + "loss": 1.168, + "step": 79770 + }, + { + "epoch": 0.96, + "grad_norm": 9.750835965152664, + "learning_rate": 1.114326259434046e-05, + "loss": 1.331, + "step": 79773 + }, + { + "epoch": 0.96, + "grad_norm": 11.530786842480795, + "learning_rate": 1.114268224465668e-05, + "loss": 1.4333, + "step": 79776 + }, + { + "epoch": 0.96, + "grad_norm": 5.852663926330328, + "learning_rate": 1.1142101891073333e-05, + "loss": 1.0223, + "step": 79779 + }, + { + "epoch": 0.96, + "grad_norm": 18.758340521834587, + "learning_rate": 1.1141521533592404e-05, + "loss": 1.1613, + "step": 79782 + }, + { + "epoch": 0.96, + "grad_norm": 4.2490633232651795, + "learning_rate": 1.1140941172215875e-05, + "loss": 1.2041, + "step": 79785 + }, + { + "epoch": 0.96, + "grad_norm": 10.464012132191202, + "learning_rate": 1.1140360806945725e-05, + "loss": 1.118, + "step": 79788 + }, + { + "epoch": 0.96, + "grad_norm": 9.720563873268441, + "learning_rate": 1.1139780437783934e-05, + "loss": 1.5344, + "step": 79791 + }, + { + "epoch": 0.96, + "grad_norm": 17.14301763763529, + "learning_rate": 1.113920006473248e-05, + "loss": 1.3332, + "step": 79794 + }, + { + "epoch": 0.96, + "grad_norm": 6.412791445782356, + "learning_rate": 1.1138619687793351e-05, + "loss": 1.027, + "step": 79797 + }, + { + "epoch": 0.96, + "grad_norm": 14.76631808797315, + "learning_rate": 1.113803930696852e-05, + "loss": 1.1715, + "step": 79800 + }, + { + "epoch": 0.96, + "grad_norm": 9.978524540408648, + "learning_rate": 1.1137458922259971e-05, + "loss": 1.3224, + "step": 79803 + }, + { + "epoch": 0.96, + "grad_norm": 15.018803142558133, + "learning_rate": 1.1136878533669686e-05, + "loss": 0.8064, + "step": 79806 + }, + { + "epoch": 0.96, + "grad_norm": 7.498569515016764, + "learning_rate": 1.1136298141199643e-05, + "loss": 1.0189, + "step": 79809 + }, + { + "epoch": 0.96, + "grad_norm": 23.01245011121089, + "learning_rate": 1.1135717744851825e-05, + "loss": 1.7218, + "step": 79812 + }, + { + "epoch": 0.96, + "grad_norm": 29.370810540796505, + "learning_rate": 1.1135137344628208e-05, + "loss": 1.0864, + "step": 79815 + }, + { + "epoch": 0.96, + "grad_norm": 69.91512691142431, + "learning_rate": 1.1134556940530782e-05, + "loss": 1.1919, + "step": 79818 + }, + { + "epoch": 0.96, + "grad_norm": 22.522302236488734, + "learning_rate": 1.1133976532561516e-05, + "loss": 1.1969, + "step": 79821 + }, + { + "epoch": 0.96, + "grad_norm": 9.226822717862586, + "learning_rate": 1.11333961207224e-05, + "loss": 1.294, + "step": 79824 + }, + { + "epoch": 0.96, + "grad_norm": 10.847922003768053, + "learning_rate": 1.113281570501541e-05, + "loss": 1.3366, + "step": 79827 + }, + { + "epoch": 0.96, + "grad_norm": 9.449976105427561, + "learning_rate": 1.1132235285442528e-05, + "loss": 1.3929, + "step": 79830 + }, + { + "epoch": 0.96, + "grad_norm": 47.91236197719874, + "learning_rate": 1.1131654862005735e-05, + "loss": 1.0817, + "step": 79833 + }, + { + "epoch": 0.96, + "grad_norm": 12.077629433436506, + "learning_rate": 1.113107443470701e-05, + "loss": 1.603, + "step": 79836 + }, + { + "epoch": 0.96, + "grad_norm": 29.755309397697502, + "learning_rate": 1.1130494003548338e-05, + "loss": 1.1725, + "step": 79839 + }, + { + "epoch": 0.96, + "grad_norm": 13.207379365490395, + "learning_rate": 1.1129913568531696e-05, + "loss": 1.1419, + "step": 79842 + }, + { + "epoch": 0.96, + "grad_norm": 9.43915106341081, + "learning_rate": 1.1129333129659068e-05, + "loss": 1.528, + "step": 79845 + }, + { + "epoch": 0.96, + "grad_norm": 13.661271896492874, + "learning_rate": 1.1128752686932429e-05, + "loss": 1.1212, + "step": 79848 + }, + { + "epoch": 0.96, + "grad_norm": 10.753720122840708, + "learning_rate": 1.1128172240353767e-05, + "loss": 1.2403, + "step": 79851 + }, + { + "epoch": 0.96, + "grad_norm": 8.87844151600561, + "learning_rate": 1.1127591789925056e-05, + "loss": 1.3405, + "step": 79854 + }, + { + "epoch": 0.96, + "grad_norm": 8.97317382908035, + "learning_rate": 1.1127011335648281e-05, + "loss": 1.2929, + "step": 79857 + }, + { + "epoch": 0.96, + "grad_norm": 9.007894670382374, + "learning_rate": 1.1126430877525425e-05, + "loss": 1.3478, + "step": 79860 + }, + { + "epoch": 0.96, + "grad_norm": 4.081005989483668, + "learning_rate": 1.1125850415558466e-05, + "loss": 1.4465, + "step": 79863 + }, + { + "epoch": 0.96, + "grad_norm": 9.504820826175653, + "learning_rate": 1.1125269949749384e-05, + "loss": 1.3006, + "step": 79866 + }, + { + "epoch": 0.96, + "grad_norm": 5.6978311947309015, + "learning_rate": 1.1124689480100159e-05, + "loss": 1.5321, + "step": 79869 + }, + { + "epoch": 0.96, + "grad_norm": 11.05105445317947, + "learning_rate": 1.112410900661278e-05, + "loss": 1.215, + "step": 79872 + }, + { + "epoch": 0.96, + "grad_norm": 13.19100566906737, + "learning_rate": 1.1123528529289216e-05, + "loss": 1.0623, + "step": 79875 + }, + { + "epoch": 0.96, + "grad_norm": 14.869643907599984, + "learning_rate": 1.1122948048131454e-05, + "loss": 1.2972, + "step": 79878 + }, + { + "epoch": 0.96, + "grad_norm": 12.023204269395853, + "learning_rate": 1.1122367563141476e-05, + "loss": 1.2468, + "step": 79881 + }, + { + "epoch": 0.96, + "grad_norm": 37.419731133833125, + "learning_rate": 1.1121787074321261e-05, + "loss": 1.1931, + "step": 79884 + }, + { + "epoch": 0.96, + "grad_norm": 18.849069489642773, + "learning_rate": 1.1121206581672794e-05, + "loss": 1.3984, + "step": 79887 + }, + { + "epoch": 0.96, + "grad_norm": 14.274220928029301, + "learning_rate": 1.112062608519805e-05, + "loss": 1.2091, + "step": 79890 + }, + { + "epoch": 0.96, + "grad_norm": 9.556040007438542, + "learning_rate": 1.1120045584899015e-05, + "loss": 1.3688, + "step": 79893 + }, + { + "epoch": 0.96, + "grad_norm": 14.139701106260388, + "learning_rate": 1.1119465080777665e-05, + "loss": 1.1724, + "step": 79896 + }, + { + "epoch": 0.96, + "grad_norm": 33.364038277490934, + "learning_rate": 1.1118884572835986e-05, + "loss": 1.3764, + "step": 79899 + }, + { + "epoch": 0.96, + "grad_norm": 15.95637487737347, + "learning_rate": 1.1118304061075955e-05, + "loss": 1.2785, + "step": 79902 + }, + { + "epoch": 0.96, + "grad_norm": 15.721406678400932, + "learning_rate": 1.1117723545499558e-05, + "loss": 1.3971, + "step": 79905 + }, + { + "epoch": 0.96, + "grad_norm": 13.910925829358158, + "learning_rate": 1.1117143026108769e-05, + "loss": 1.0387, + "step": 79908 + }, + { + "epoch": 0.96, + "grad_norm": 31.1588168586542, + "learning_rate": 1.1116562502905574e-05, + "loss": 1.1918, + "step": 79911 + }, + { + "epoch": 0.96, + "grad_norm": 14.49779540213655, + "learning_rate": 1.1115981975891959e-05, + "loss": 1.4297, + "step": 79914 + }, + { + "epoch": 0.96, + "grad_norm": 15.833817767566597, + "learning_rate": 1.1115401445069892e-05, + "loss": 1.219, + "step": 79917 + }, + { + "epoch": 0.96, + "grad_norm": 10.515402828093837, + "learning_rate": 1.1114820910441368e-05, + "loss": 1.1269, + "step": 79920 + }, + { + "epoch": 0.96, + "grad_norm": 8.225135822006985, + "learning_rate": 1.1114240372008358e-05, + "loss": 1.4608, + "step": 79923 + }, + { + "epoch": 0.96, + "grad_norm": 48.7127938335253, + "learning_rate": 1.111365982977285e-05, + "loss": 1.1742, + "step": 79926 + }, + { + "epoch": 0.96, + "grad_norm": 17.24362421988669, + "learning_rate": 1.1113079283736818e-05, + "loss": 1.2146, + "step": 79929 + }, + { + "epoch": 0.96, + "grad_norm": 11.059920466812132, + "learning_rate": 1.1112498733902251e-05, + "loss": 1.6087, + "step": 79932 + }, + { + "epoch": 0.96, + "grad_norm": 14.55831228437192, + "learning_rate": 1.1111918180271124e-05, + "loss": 1.5223, + "step": 79935 + }, + { + "epoch": 0.96, + "grad_norm": 41.99520122918898, + "learning_rate": 1.111133762284542e-05, + "loss": 1.2147, + "step": 79938 + }, + { + "epoch": 0.96, + "grad_norm": 7.965183354561916, + "learning_rate": 1.1110757061627125e-05, + "loss": 1.1575, + "step": 79941 + }, + { + "epoch": 0.96, + "grad_norm": 11.099276536393647, + "learning_rate": 1.111017649661821e-05, + "loss": 1.1858, + "step": 79944 + }, + { + "epoch": 0.96, + "grad_norm": 12.035052498524232, + "learning_rate": 1.110959592782067e-05, + "loss": 1.2014, + "step": 79947 + }, + { + "epoch": 0.96, + "grad_norm": 6.123336887630901, + "learning_rate": 1.1109015355236474e-05, + "loss": 1.5465, + "step": 79950 + }, + { + "epoch": 0.96, + "grad_norm": 11.678949582413052, + "learning_rate": 1.110843477886761e-05, + "loss": 1.2323, + "step": 79953 + }, + { + "epoch": 0.96, + "grad_norm": 59.98512105178703, + "learning_rate": 1.1107854198716055e-05, + "loss": 1.5111, + "step": 79956 + }, + { + "epoch": 0.96, + "grad_norm": 20.9460484559321, + "learning_rate": 1.1107273614783798e-05, + "loss": 1.1835, + "step": 79959 + }, + { + "epoch": 0.96, + "grad_norm": 9.553883853367129, + "learning_rate": 1.1106693027072809e-05, + "loss": 1.3872, + "step": 79962 + }, + { + "epoch": 0.96, + "grad_norm": 20.48148693615772, + "learning_rate": 1.1106112435585075e-05, + "loss": 1.4066, + "step": 79965 + }, + { + "epoch": 0.96, + "grad_norm": 5.377938549212227, + "learning_rate": 1.1105531840322583e-05, + "loss": 1.4979, + "step": 79968 + }, + { + "epoch": 0.96, + "grad_norm": 16.343819283278986, + "learning_rate": 1.1104951241287305e-05, + "loss": 1.077, + "step": 79971 + }, + { + "epoch": 0.96, + "grad_norm": 13.597422703745261, + "learning_rate": 1.1104370638481228e-05, + "loss": 1.2173, + "step": 79974 + }, + { + "epoch": 0.96, + "grad_norm": 14.686452188900418, + "learning_rate": 1.110379003190633e-05, + "loss": 1.5897, + "step": 79977 + }, + { + "epoch": 0.96, + "grad_norm": 4.838545582113251, + "learning_rate": 1.1103209421564598e-05, + "loss": 1.1462, + "step": 79980 + }, + { + "epoch": 0.96, + "grad_norm": 9.556341580234596, + "learning_rate": 1.1102628807458007e-05, + "loss": 1.1542, + "step": 79983 + }, + { + "epoch": 0.96, + "grad_norm": 9.49963735480119, + "learning_rate": 1.1102048189588541e-05, + "loss": 1.1928, + "step": 79986 + }, + { + "epoch": 0.96, + "grad_norm": 5.60994744250597, + "learning_rate": 1.1101467567958183e-05, + "loss": 1.7519, + "step": 79989 + }, + { + "epoch": 0.96, + "grad_norm": 17.395339607097675, + "learning_rate": 1.110088694256891e-05, + "loss": 1.345, + "step": 79992 + }, + { + "epoch": 0.96, + "grad_norm": 10.390952793195243, + "learning_rate": 1.1100306313422708e-05, + "loss": 1.3742, + "step": 79995 + }, + { + "epoch": 0.96, + "grad_norm": 5.022935344406562, + "learning_rate": 1.1099725680521555e-05, + "loss": 1.2358, + "step": 79998 + }, + { + "epoch": 0.96, + "grad_norm": 13.01708405506821, + "learning_rate": 1.1099145043867442e-05, + "loss": 1.8418, + "step": 80001 + }, + { + "epoch": 0.96, + "grad_norm": 11.692638718859246, + "learning_rate": 1.1098564403462336e-05, + "loss": 1.5647, + "step": 80004 + }, + { + "epoch": 0.96, + "grad_norm": 10.888488549227722, + "learning_rate": 1.1097983759308226e-05, + "loss": 1.505, + "step": 80007 + }, + { + "epoch": 0.96, + "grad_norm": 7.6781411894269604, + "learning_rate": 1.1097403111407094e-05, + "loss": 1.5712, + "step": 80010 + }, + { + "epoch": 0.96, + "grad_norm": 7.894178365446107, + "learning_rate": 1.109682245976092e-05, + "loss": 1.2913, + "step": 80013 + }, + { + "epoch": 0.96, + "grad_norm": 7.560715271704673, + "learning_rate": 1.1096241804371685e-05, + "loss": 1.1023, + "step": 80016 + }, + { + "epoch": 0.96, + "grad_norm": 17.66984313902982, + "learning_rate": 1.1095661145241373e-05, + "loss": 1.3168, + "step": 80019 + }, + { + "epoch": 0.96, + "grad_norm": 12.355713517940014, + "learning_rate": 1.1095080482371965e-05, + "loss": 1.2957, + "step": 80022 + }, + { + "epoch": 0.96, + "grad_norm": 7.016275794161473, + "learning_rate": 1.1094499815765438e-05, + "loss": 1.3709, + "step": 80025 + }, + { + "epoch": 0.96, + "grad_norm": 5.0257625389957346, + "learning_rate": 1.109391914542378e-05, + "loss": 0.9311, + "step": 80028 + }, + { + "epoch": 0.96, + "grad_norm": 20.592312848951416, + "learning_rate": 1.1093338471348971e-05, + "loss": 1.1412, + "step": 80031 + }, + { + "epoch": 0.96, + "grad_norm": 20.08059836105867, + "learning_rate": 1.1092757793542992e-05, + "loss": 1.294, + "step": 80034 + }, + { + "epoch": 0.96, + "grad_norm": 5.0572916228380205, + "learning_rate": 1.1092177112007821e-05, + "loss": 1.5384, + "step": 80037 + }, + { + "epoch": 0.96, + "grad_norm": 11.056252414480044, + "learning_rate": 1.1091596426745446e-05, + "loss": 1.1226, + "step": 80040 + }, + { + "epoch": 0.96, + "grad_norm": 2.208512496326632, + "learning_rate": 1.1091015737757844e-05, + "loss": 1.523, + "step": 80043 + }, + { + "epoch": 0.96, + "grad_norm": 7.920507019286625, + "learning_rate": 1.1090435045046998e-05, + "loss": 1.2453, + "step": 80046 + }, + { + "epoch": 0.96, + "grad_norm": 16.634468440833558, + "learning_rate": 1.108985434861489e-05, + "loss": 1.3745, + "step": 80049 + }, + { + "epoch": 0.96, + "grad_norm": 10.240641798992797, + "learning_rate": 1.1089273648463503e-05, + "loss": 1.5072, + "step": 80052 + }, + { + "epoch": 0.96, + "grad_norm": 11.87042674283431, + "learning_rate": 1.108869294459482e-05, + "loss": 1.3235, + "step": 80055 + }, + { + "epoch": 0.96, + "grad_norm": 7.231744338502009, + "learning_rate": 1.1088112237010812e-05, + "loss": 1.1245, + "step": 80058 + }, + { + "epoch": 0.96, + "grad_norm": 13.19558705889107, + "learning_rate": 1.1087531525713475e-05, + "loss": 1.3148, + "step": 80061 + }, + { + "epoch": 0.96, + "grad_norm": 21.90214353964397, + "learning_rate": 1.1086950810704784e-05, + "loss": 1.3497, + "step": 80064 + }, + { + "epoch": 0.96, + "grad_norm": 5.5630829151971115, + "learning_rate": 1.1086370091986722e-05, + "loss": 1.336, + "step": 80067 + }, + { + "epoch": 0.96, + "grad_norm": 4.2746198482212066, + "learning_rate": 1.1085789369561266e-05, + "loss": 1.3072, + "step": 80070 + }, + { + "epoch": 0.96, + "grad_norm": 6.35153264466079, + "learning_rate": 1.1085208643430407e-05, + "loss": 1.4216, + "step": 80073 + }, + { + "epoch": 0.96, + "grad_norm": 23.63222712190207, + "learning_rate": 1.108462791359612e-05, + "loss": 1.683, + "step": 80076 + }, + { + "epoch": 0.96, + "grad_norm": 10.808115237045874, + "learning_rate": 1.1084047180060386e-05, + "loss": 1.1178, + "step": 80079 + }, + { + "epoch": 0.96, + "grad_norm": 16.871305564781785, + "learning_rate": 1.1083466442825193e-05, + "loss": 1.4713, + "step": 80082 + }, + { + "epoch": 0.96, + "grad_norm": 6.656245755550099, + "learning_rate": 1.1082885701892518e-05, + "loss": 1.5819, + "step": 80085 + }, + { + "epoch": 0.96, + "grad_norm": 12.886499082120558, + "learning_rate": 1.1082304957264344e-05, + "loss": 1.2848, + "step": 80088 + }, + { + "epoch": 0.96, + "grad_norm": 4.644965327864699, + "learning_rate": 1.1081724208942652e-05, + "loss": 1.0564, + "step": 80091 + }, + { + "epoch": 0.96, + "grad_norm": 11.704726941806747, + "learning_rate": 1.1081143456929427e-05, + "loss": 1.3958, + "step": 80094 + }, + { + "epoch": 0.96, + "grad_norm": 16.62773796190375, + "learning_rate": 1.1080562701226649e-05, + "loss": 1.3178, + "step": 80097 + }, + { + "epoch": 0.96, + "grad_norm": 7.134694802175012, + "learning_rate": 1.1079981941836297e-05, + "loss": 1.4071, + "step": 80100 + }, + { + "epoch": 0.96, + "grad_norm": 8.262285670865577, + "learning_rate": 1.1079401178760358e-05, + "loss": 1.4334, + "step": 80103 + }, + { + "epoch": 0.96, + "grad_norm": 4.713639347091028, + "learning_rate": 1.1078820412000811e-05, + "loss": 1.4006, + "step": 80106 + }, + { + "epoch": 0.96, + "grad_norm": 3.6147820283575776, + "learning_rate": 1.1078239641559639e-05, + "loss": 1.3928, + "step": 80109 + }, + { + "epoch": 0.96, + "grad_norm": 6.953367904327196, + "learning_rate": 1.107765886743882e-05, + "loss": 1.5828, + "step": 80112 + }, + { + "epoch": 0.96, + "grad_norm": 59.63983443983928, + "learning_rate": 1.1077078089640346e-05, + "loss": 1.0877, + "step": 80115 + }, + { + "epoch": 0.96, + "grad_norm": 16.655628886674513, + "learning_rate": 1.1076497308166187e-05, + "loss": 1.3447, + "step": 80118 + }, + { + "epoch": 0.96, + "grad_norm": 9.731551367930532, + "learning_rate": 1.1075916523018332e-05, + "loss": 1.3108, + "step": 80121 + }, + { + "epoch": 0.96, + "grad_norm": 15.667336758161627, + "learning_rate": 1.1075335734198763e-05, + "loss": 1.1652, + "step": 80124 + }, + { + "epoch": 0.96, + "grad_norm": 62.94191869959095, + "learning_rate": 1.1074754941709457e-05, + "loss": 1.4341, + "step": 80127 + }, + { + "epoch": 0.96, + "grad_norm": 12.598455341082774, + "learning_rate": 1.1074174145552404e-05, + "loss": 1.4303, + "step": 80130 + }, + { + "epoch": 0.96, + "grad_norm": 5.586505669252681, + "learning_rate": 1.1073593345729577e-05, + "loss": 1.091, + "step": 80133 + }, + { + "epoch": 0.96, + "grad_norm": 11.292202808830126, + "learning_rate": 1.1073012542242964e-05, + "loss": 1.5255, + "step": 80136 + }, + { + "epoch": 0.96, + "grad_norm": 9.052595736641662, + "learning_rate": 1.1072431735094549e-05, + "loss": 1.3901, + "step": 80139 + }, + { + "epoch": 0.96, + "grad_norm": 53.279179349358714, + "learning_rate": 1.1071850924286307e-05, + "loss": 1.4464, + "step": 80142 + }, + { + "epoch": 0.96, + "grad_norm": 8.164566258453329, + "learning_rate": 1.1071270109820225e-05, + "loss": 1.4447, + "step": 80145 + }, + { + "epoch": 0.96, + "grad_norm": 2.58339211077852, + "learning_rate": 1.1070689291698285e-05, + "loss": 1.5477, + "step": 80148 + }, + { + "epoch": 0.96, + "grad_norm": 8.709278121488675, + "learning_rate": 1.1070108469922467e-05, + "loss": 1.3564, + "step": 80151 + }, + { + "epoch": 0.96, + "grad_norm": 12.876818364772145, + "learning_rate": 1.1069527644494752e-05, + "loss": 1.6723, + "step": 80154 + }, + { + "epoch": 0.96, + "grad_norm": 13.775695303896777, + "learning_rate": 1.1068946815417128e-05, + "loss": 1.0466, + "step": 80157 + }, + { + "epoch": 0.96, + "grad_norm": 8.931105497359116, + "learning_rate": 1.1068365982691574e-05, + "loss": 1.3995, + "step": 80160 + }, + { + "epoch": 0.96, + "grad_norm": 22.461361335724696, + "learning_rate": 1.106778514632007e-05, + "loss": 1.1311, + "step": 80163 + }, + { + "epoch": 0.96, + "grad_norm": 34.83777009947861, + "learning_rate": 1.1067204306304597e-05, + "loss": 1.462, + "step": 80166 + }, + { + "epoch": 0.96, + "grad_norm": 3.743726374621094, + "learning_rate": 1.1066623462647146e-05, + "loss": 1.5141, + "step": 80169 + }, + { + "epoch": 0.96, + "grad_norm": 28.58292980488643, + "learning_rate": 1.106604261534969e-05, + "loss": 1.2987, + "step": 80172 + }, + { + "epoch": 0.96, + "grad_norm": 5.976916563128109, + "learning_rate": 1.1065461764414215e-05, + "loss": 0.9423, + "step": 80175 + }, + { + "epoch": 0.96, + "grad_norm": 13.119270829564957, + "learning_rate": 1.1064880909842702e-05, + "loss": 1.2767, + "step": 80178 + }, + { + "epoch": 0.96, + "grad_norm": 3.3353499893264216, + "learning_rate": 1.1064300051637136e-05, + "loss": 1.4866, + "step": 80181 + }, + { + "epoch": 0.96, + "grad_norm": 6.001372165513141, + "learning_rate": 1.1063719189799496e-05, + "loss": 1.2174, + "step": 80184 + }, + { + "epoch": 0.96, + "grad_norm": 19.795250898394666, + "learning_rate": 1.1063138324331763e-05, + "loss": 1.7812, + "step": 80187 + }, + { + "epoch": 0.96, + "grad_norm": 9.10413466062267, + "learning_rate": 1.1062557455235925e-05, + "loss": 1.4836, + "step": 80190 + }, + { + "epoch": 0.96, + "grad_norm": 57.20433278784758, + "learning_rate": 1.1061976582513963e-05, + "loss": 1.3935, + "step": 80193 + }, + { + "epoch": 0.96, + "grad_norm": 11.131441561298772, + "learning_rate": 1.1061395706167855e-05, + "loss": 1.1464, + "step": 80196 + }, + { + "epoch": 0.96, + "grad_norm": 30.833951575377103, + "learning_rate": 1.1060814826199583e-05, + "loss": 1.0641, + "step": 80199 + }, + { + "epoch": 0.96, + "grad_norm": 30.23935561372538, + "learning_rate": 1.1060233942611135e-05, + "loss": 1.4065, + "step": 80202 + }, + { + "epoch": 0.96, + "grad_norm": 29.19151146129116, + "learning_rate": 1.1059653055404493e-05, + "loss": 1.1267, + "step": 80205 + }, + { + "epoch": 0.96, + "grad_norm": 12.357251122601676, + "learning_rate": 1.1059072164581633e-05, + "loss": 1.4515, + "step": 80208 + }, + { + "epoch": 0.96, + "grad_norm": 2.8898855541800494, + "learning_rate": 1.1058491270144543e-05, + "loss": 1.2056, + "step": 80211 + }, + { + "epoch": 0.96, + "grad_norm": 19.57412848550996, + "learning_rate": 1.1057910372095203e-05, + "loss": 1.5625, + "step": 80214 + }, + { + "epoch": 0.96, + "grad_norm": 16.671025139730144, + "learning_rate": 1.1057329470435596e-05, + "loss": 1.4826, + "step": 80217 + }, + { + "epoch": 0.96, + "grad_norm": 16.3440180439932, + "learning_rate": 1.1056748565167703e-05, + "loss": 1.1144, + "step": 80220 + }, + { + "epoch": 0.96, + "grad_norm": 19.625419607229833, + "learning_rate": 1.1056167656293508e-05, + "loss": 1.4094, + "step": 80223 + }, + { + "epoch": 0.96, + "grad_norm": 20.2304094257218, + "learning_rate": 1.1055586743814996e-05, + "loss": 1.0662, + "step": 80226 + }, + { + "epoch": 0.96, + "grad_norm": 11.609379208612458, + "learning_rate": 1.1055005827734147e-05, + "loss": 1.3063, + "step": 80229 + }, + { + "epoch": 0.96, + "grad_norm": 18.628915124906733, + "learning_rate": 1.105442490805294e-05, + "loss": 1.113, + "step": 80232 + }, + { + "epoch": 0.96, + "grad_norm": 19.17022844837448, + "learning_rate": 1.105384398477336e-05, + "loss": 1.3918, + "step": 80235 + }, + { + "epoch": 0.96, + "grad_norm": 20.961622919726764, + "learning_rate": 1.1053263057897392e-05, + "loss": 1.5704, + "step": 80238 + }, + { + "epoch": 0.96, + "grad_norm": 15.612793059095694, + "learning_rate": 1.1052682127427014e-05, + "loss": 1.4437, + "step": 80241 + }, + { + "epoch": 0.96, + "grad_norm": 11.700165888502452, + "learning_rate": 1.1052101193364215e-05, + "loss": 1.3496, + "step": 80244 + }, + { + "epoch": 0.96, + "grad_norm": 2.3215644815553547, + "learning_rate": 1.1051520255710972e-05, + "loss": 1.4181, + "step": 80247 + }, + { + "epoch": 0.96, + "grad_norm": 22.904102594088926, + "learning_rate": 1.1050939314469272e-05, + "loss": 0.956, + "step": 80250 + }, + { + "epoch": 0.97, + "grad_norm": 6.879358385744035, + "learning_rate": 1.1050358369641087e-05, + "loss": 0.8978, + "step": 80253 + }, + { + "epoch": 0.97, + "grad_norm": 8.83134489099982, + "learning_rate": 1.1049777421228413e-05, + "loss": 1.2819, + "step": 80256 + }, + { + "epoch": 0.97, + "grad_norm": 12.554384690428003, + "learning_rate": 1.1049196469233227e-05, + "loss": 1.3479, + "step": 80259 + }, + { + "epoch": 0.97, + "grad_norm": 5.8671346442241665, + "learning_rate": 1.1048615513657509e-05, + "loss": 1.2141, + "step": 80262 + }, + { + "epoch": 0.97, + "grad_norm": 11.841479280011082, + "learning_rate": 1.1048034554503246e-05, + "loss": 1.2341, + "step": 80265 + }, + { + "epoch": 0.97, + "grad_norm": 23.316391090996717, + "learning_rate": 1.1047453591772417e-05, + "loss": 1.2175, + "step": 80268 + }, + { + "epoch": 0.97, + "grad_norm": 10.764825514466215, + "learning_rate": 1.1046872625467008e-05, + "loss": 1.2007, + "step": 80271 + }, + { + "epoch": 0.97, + "grad_norm": 12.126248301861628, + "learning_rate": 1.1046291655588997e-05, + "loss": 1.0251, + "step": 80274 + }, + { + "epoch": 0.97, + "grad_norm": 5.630229587985899, + "learning_rate": 1.1045710682140372e-05, + "loss": 1.1187, + "step": 80277 + }, + { + "epoch": 0.97, + "grad_norm": 51.25490528328959, + "learning_rate": 1.1045129705123112e-05, + "loss": 1.1812, + "step": 80280 + }, + { + "epoch": 0.97, + "grad_norm": 9.611510931910296, + "learning_rate": 1.1044548724539202e-05, + "loss": 1.1598, + "step": 80283 + }, + { + "epoch": 0.97, + "grad_norm": 7.145949724826262, + "learning_rate": 1.1043967740390623e-05, + "loss": 1.4547, + "step": 80286 + }, + { + "epoch": 0.97, + "grad_norm": 225.43803218943577, + "learning_rate": 1.1043386752679357e-05, + "loss": 1.0613, + "step": 80289 + }, + { + "epoch": 0.97, + "grad_norm": 5.100585560529549, + "learning_rate": 1.1042805761407389e-05, + "loss": 1.435, + "step": 80292 + }, + { + "epoch": 0.97, + "grad_norm": 22.596907309545244, + "learning_rate": 1.1042224766576697e-05, + "loss": 1.0976, + "step": 80295 + }, + { + "epoch": 0.97, + "grad_norm": 10.22055725903474, + "learning_rate": 1.1041643768189271e-05, + "loss": 1.2454, + "step": 80298 + }, + { + "epoch": 0.97, + "grad_norm": 10.934329906366708, + "learning_rate": 1.104106276624709e-05, + "loss": 1.6882, + "step": 80301 + }, + { + "epoch": 0.97, + "grad_norm": 18.30897633196261, + "learning_rate": 1.1040481760752137e-05, + "loss": 1.1427, + "step": 80304 + }, + { + "epoch": 0.97, + "grad_norm": 12.956175982459046, + "learning_rate": 1.1039900751706391e-05, + "loss": 1.0531, + "step": 80307 + }, + { + "epoch": 0.97, + "grad_norm": 45.54575865677155, + "learning_rate": 1.1039319739111844e-05, + "loss": 1.3767, + "step": 80310 + }, + { + "epoch": 0.97, + "grad_norm": 10.866879262084511, + "learning_rate": 1.1038738722970468e-05, + "loss": 1.2732, + "step": 80313 + }, + { + "epoch": 0.97, + "grad_norm": 3.3940222044853057, + "learning_rate": 1.103815770328425e-05, + "loss": 1.3108, + "step": 80316 + }, + { + "epoch": 0.97, + "grad_norm": 6.969298320072298, + "learning_rate": 1.103757668005518e-05, + "loss": 1.3784, + "step": 80319 + }, + { + "epoch": 0.97, + "grad_norm": 13.200190397758897, + "learning_rate": 1.1036995653285229e-05, + "loss": 1.3675, + "step": 80322 + }, + { + "epoch": 0.97, + "grad_norm": 29.86794621238926, + "learning_rate": 1.103641462297639e-05, + "loss": 1.2921, + "step": 80325 + }, + { + "epoch": 0.97, + "grad_norm": 14.39234880277871, + "learning_rate": 1.1035833589130636e-05, + "loss": 1.558, + "step": 80328 + }, + { + "epoch": 0.97, + "grad_norm": 9.50613450359891, + "learning_rate": 1.1035252551749962e-05, + "loss": 0.9297, + "step": 80331 + }, + { + "epoch": 0.97, + "grad_norm": 11.710508703212811, + "learning_rate": 1.1034671510836336e-05, + "loss": 0.7997, + "step": 80334 + }, + { + "epoch": 0.97, + "grad_norm": 10.133961866453632, + "learning_rate": 1.1034090466391754e-05, + "loss": 1.4417, + "step": 80337 + }, + { + "epoch": 0.97, + "grad_norm": 8.969146851645393, + "learning_rate": 1.103350941841819e-05, + "loss": 1.5646, + "step": 80340 + }, + { + "epoch": 0.97, + "grad_norm": 17.812740173691466, + "learning_rate": 1.1032928366917634e-05, + "loss": 1.2785, + "step": 80343 + }, + { + "epoch": 0.97, + "grad_norm": 3.5193864344340215, + "learning_rate": 1.1032347311892064e-05, + "loss": 1.6902, + "step": 80346 + }, + { + "epoch": 0.97, + "grad_norm": 13.699712686214532, + "learning_rate": 1.1031766253343464e-05, + "loss": 1.2152, + "step": 80349 + }, + { + "epoch": 0.97, + "grad_norm": 18.07230258413646, + "learning_rate": 1.1031185191273822e-05, + "loss": 1.2943, + "step": 80352 + }, + { + "epoch": 0.97, + "grad_norm": 29.656628050341215, + "learning_rate": 1.1030604125685109e-05, + "loss": 1.5589, + "step": 80355 + }, + { + "epoch": 0.97, + "grad_norm": 64.12327032826039, + "learning_rate": 1.103002305657932e-05, + "loss": 1.3667, + "step": 80358 + }, + { + "epoch": 0.97, + "grad_norm": 20.628623481974046, + "learning_rate": 1.102944198395843e-05, + "loss": 1.4404, + "step": 80361 + }, + { + "epoch": 0.97, + "grad_norm": 8.050085920134707, + "learning_rate": 1.1028860907824432e-05, + "loss": 1.4224, + "step": 80364 + }, + { + "epoch": 0.97, + "grad_norm": 44.45548929433496, + "learning_rate": 1.1028279828179297e-05, + "loss": 1.4552, + "step": 80367 + }, + { + "epoch": 0.97, + "grad_norm": 16.79180985105246, + "learning_rate": 1.102769874502501e-05, + "loss": 1.5291, + "step": 80370 + }, + { + "epoch": 0.97, + "grad_norm": 18.3467040446681, + "learning_rate": 1.1027117658363564e-05, + "loss": 1.2389, + "step": 80373 + }, + { + "epoch": 0.97, + "grad_norm": 19.125644010313103, + "learning_rate": 1.1026536568196933e-05, + "loss": 1.3591, + "step": 80376 + }, + { + "epoch": 0.97, + "grad_norm": 7.084431415406455, + "learning_rate": 1.1025955474527104e-05, + "loss": 1.7315, + "step": 80379 + }, + { + "epoch": 0.97, + "grad_norm": 7.99453384925603, + "learning_rate": 1.1025374377356055e-05, + "loss": 1.412, + "step": 80382 + }, + { + "epoch": 0.97, + "grad_norm": 14.29979109389013, + "learning_rate": 1.1024793276685776e-05, + "loss": 1.4013, + "step": 80385 + }, + { + "epoch": 0.97, + "grad_norm": 30.033441960276196, + "learning_rate": 1.1024212172518244e-05, + "loss": 1.3628, + "step": 80388 + }, + { + "epoch": 0.97, + "grad_norm": 12.293927502410678, + "learning_rate": 1.1023631064855446e-05, + "loss": 1.3939, + "step": 80391 + }, + { + "epoch": 0.97, + "grad_norm": 6.925281907204708, + "learning_rate": 1.1023049953699362e-05, + "loss": 1.6011, + "step": 80394 + }, + { + "epoch": 0.97, + "grad_norm": 8.184198946867458, + "learning_rate": 1.102246883905198e-05, + "loss": 1.4777, + "step": 80397 + }, + { + "epoch": 0.97, + "grad_norm": 16.707981682091482, + "learning_rate": 1.102188772091528e-05, + "loss": 0.9268, + "step": 80400 + }, + { + "epoch": 0.97, + "grad_norm": 4.187394549814836, + "learning_rate": 1.1021306599291242e-05, + "loss": 1.4236, + "step": 80403 + }, + { + "epoch": 0.97, + "grad_norm": 13.321753063810494, + "learning_rate": 1.1020725474181857e-05, + "loss": 1.3245, + "step": 80406 + }, + { + "epoch": 0.97, + "grad_norm": 15.233933876182173, + "learning_rate": 1.10201443455891e-05, + "loss": 1.475, + "step": 80409 + }, + { + "epoch": 0.97, + "grad_norm": 8.744043787602603, + "learning_rate": 1.101956321351496e-05, + "loss": 0.9916, + "step": 80412 + }, + { + "epoch": 0.97, + "grad_norm": 8.71493798722391, + "learning_rate": 1.1018982077961415e-05, + "loss": 1.2476, + "step": 80415 + }, + { + "epoch": 0.97, + "grad_norm": 7.020416395177053, + "learning_rate": 1.1018400938930458e-05, + "loss": 0.9329, + "step": 80418 + }, + { + "epoch": 0.97, + "grad_norm": 9.52486397325595, + "learning_rate": 1.1017819796424059e-05, + "loss": 1.1455, + "step": 80421 + }, + { + "epoch": 0.97, + "grad_norm": 10.59844627778406, + "learning_rate": 1.1017238650444206e-05, + "loss": 1.1352, + "step": 80424 + }, + { + "epoch": 0.97, + "grad_norm": 17.912712011750877, + "learning_rate": 1.1016657500992891e-05, + "loss": 1.4488, + "step": 80427 + }, + { + "epoch": 0.97, + "grad_norm": 26.701689422061186, + "learning_rate": 1.1016076348072085e-05, + "loss": 1.0679, + "step": 80430 + }, + { + "epoch": 0.97, + "grad_norm": 4.557429214913834, + "learning_rate": 1.1015495191683779e-05, + "loss": 1.2886, + "step": 80433 + }, + { + "epoch": 0.97, + "grad_norm": 5.4465245060195775, + "learning_rate": 1.101491403182995e-05, + "loss": 1.502, + "step": 80436 + }, + { + "epoch": 0.97, + "grad_norm": 5.721663391503158, + "learning_rate": 1.1014332868512591e-05, + "loss": 1.0844, + "step": 80439 + }, + { + "epoch": 0.97, + "grad_norm": 14.041034339647883, + "learning_rate": 1.1013751701733675e-05, + "loss": 1.3941, + "step": 80442 + }, + { + "epoch": 0.97, + "grad_norm": 9.856721374734843, + "learning_rate": 1.101317053149519e-05, + "loss": 1.188, + "step": 80445 + }, + { + "epoch": 0.97, + "grad_norm": 6.882192810891647, + "learning_rate": 1.101258935779912e-05, + "loss": 1.2447, + "step": 80448 + }, + { + "epoch": 0.97, + "grad_norm": 6.34889039475332, + "learning_rate": 1.1012008180647444e-05, + "loss": 1.5608, + "step": 80451 + }, + { + "epoch": 0.97, + "grad_norm": 15.443853696121762, + "learning_rate": 1.1011427000042152e-05, + "loss": 1.2027, + "step": 80454 + }, + { + "epoch": 0.97, + "grad_norm": 11.899475114004414, + "learning_rate": 1.1010845815985223e-05, + "loss": 1.2247, + "step": 80457 + }, + { + "epoch": 0.97, + "grad_norm": 25.386463091469913, + "learning_rate": 1.1010264628478643e-05, + "loss": 1.424, + "step": 80460 + }, + { + "epoch": 0.97, + "grad_norm": 6.180646853438665, + "learning_rate": 1.1009683437524392e-05, + "loss": 1.242, + "step": 80463 + }, + { + "epoch": 0.97, + "grad_norm": 18.411846750642418, + "learning_rate": 1.1009102243124455e-05, + "loss": 1.4138, + "step": 80466 + }, + { + "epoch": 0.97, + "grad_norm": 7.058396640919044, + "learning_rate": 1.1008521045280818e-05, + "loss": 1.3131, + "step": 80469 + }, + { + "epoch": 0.97, + "grad_norm": 42.89126940704266, + "learning_rate": 1.1007939843995458e-05, + "loss": 1.3122, + "step": 80472 + }, + { + "epoch": 0.97, + "grad_norm": 9.370343961917355, + "learning_rate": 1.1007358639270364e-05, + "loss": 1.3508, + "step": 80475 + }, + { + "epoch": 0.97, + "grad_norm": 17.692480642076184, + "learning_rate": 1.1006777431107517e-05, + "loss": 1.7347, + "step": 80478 + }, + { + "epoch": 0.97, + "grad_norm": 15.967997366417164, + "learning_rate": 1.1006196219508904e-05, + "loss": 1.0922, + "step": 80481 + }, + { + "epoch": 0.97, + "grad_norm": 35.51260228097135, + "learning_rate": 1.1005615004476503e-05, + "loss": 1.5627, + "step": 80484 + }, + { + "epoch": 0.97, + "grad_norm": 22.954757800452946, + "learning_rate": 1.1005033786012303e-05, + "loss": 1.4686, + "step": 80487 + }, + { + "epoch": 0.97, + "grad_norm": 15.184586059184998, + "learning_rate": 1.1004452564118283e-05, + "loss": 1.4255, + "step": 80490 + }, + { + "epoch": 0.97, + "grad_norm": 15.17476432783127, + "learning_rate": 1.1003871338796428e-05, + "loss": 1.5368, + "step": 80493 + }, + { + "epoch": 0.97, + "grad_norm": 3.4407696252045628, + "learning_rate": 1.1003290110048719e-05, + "loss": 1.4256, + "step": 80496 + }, + { + "epoch": 0.97, + "grad_norm": 3.9986447017994737, + "learning_rate": 1.1002708877877146e-05, + "loss": 1.4863, + "step": 80499 + }, + { + "epoch": 0.97, + "grad_norm": 10.016712109093135, + "learning_rate": 1.100212764228369e-05, + "loss": 1.3586, + "step": 80502 + }, + { + "epoch": 0.97, + "grad_norm": 12.462031221054591, + "learning_rate": 1.1001546403270328e-05, + "loss": 1.1545, + "step": 80505 + }, + { + "epoch": 0.97, + "grad_norm": 43.506515667498014, + "learning_rate": 1.1000965160839052e-05, + "loss": 1.5095, + "step": 80508 + }, + { + "epoch": 0.97, + "grad_norm": 16.522226682805492, + "learning_rate": 1.1000383914991843e-05, + "loss": 1.1943, + "step": 80511 + }, + { + "epoch": 0.97, + "grad_norm": 3.608661984464051, + "learning_rate": 1.0999802665730686e-05, + "loss": 1.268, + "step": 80514 + }, + { + "epoch": 0.97, + "grad_norm": 10.060850560786184, + "learning_rate": 1.0999221413057557e-05, + "loss": 1.4584, + "step": 80517 + }, + { + "epoch": 0.97, + "grad_norm": 4.2013773236747385, + "learning_rate": 1.099864015697445e-05, + "loss": 1.4108, + "step": 80520 + }, + { + "epoch": 0.97, + "grad_norm": 11.754029334418798, + "learning_rate": 1.0998058897483342e-05, + "loss": 1.4565, + "step": 80523 + }, + { + "epoch": 0.97, + "grad_norm": 8.08768365233197, + "learning_rate": 1.099747763458622e-05, + "loss": 1.4199, + "step": 80526 + }, + { + "epoch": 0.97, + "grad_norm": 11.43126789165471, + "learning_rate": 1.0996896368285064e-05, + "loss": 1.3244, + "step": 80529 + }, + { + "epoch": 0.97, + "grad_norm": 32.41535242085388, + "learning_rate": 1.099631509858186e-05, + "loss": 1.3412, + "step": 80532 + }, + { + "epoch": 0.97, + "grad_norm": 10.430553242430225, + "learning_rate": 1.0995733825478592e-05, + "loss": 1.5367, + "step": 80535 + }, + { + "epoch": 0.97, + "grad_norm": 43.35269783107882, + "learning_rate": 1.0995152548977242e-05, + "loss": 1.3344, + "step": 80538 + }, + { + "epoch": 0.97, + "grad_norm": 12.257276852918434, + "learning_rate": 1.0994571269079796e-05, + "loss": 1.2454, + "step": 80541 + }, + { + "epoch": 0.97, + "grad_norm": 13.297760315229427, + "learning_rate": 1.0993989985788238e-05, + "loss": 0.9461, + "step": 80544 + }, + { + "epoch": 0.97, + "grad_norm": 12.056073004380002, + "learning_rate": 1.099340869910455e-05, + "loss": 1.5899, + "step": 80547 + }, + { + "epoch": 0.97, + "grad_norm": 2.8582106393290547, + "learning_rate": 1.0992827409030712e-05, + "loss": 0.9683, + "step": 80550 + }, + { + "epoch": 0.97, + "grad_norm": 4.324795321880965, + "learning_rate": 1.0992246115568715e-05, + "loss": 1.3659, + "step": 80553 + }, + { + "epoch": 0.97, + "grad_norm": 13.25963677895972, + "learning_rate": 1.099166481872054e-05, + "loss": 1.1142, + "step": 80556 + }, + { + "epoch": 0.97, + "grad_norm": 14.922565849553276, + "learning_rate": 1.0991083518488167e-05, + "loss": 1.5003, + "step": 80559 + }, + { + "epoch": 0.97, + "grad_norm": 4.759199158767955, + "learning_rate": 1.0990502214873585e-05, + "loss": 1.4158, + "step": 80562 + }, + { + "epoch": 0.97, + "grad_norm": 26.824689747027538, + "learning_rate": 1.0989920907878777e-05, + "loss": 1.2631, + "step": 80565 + }, + { + "epoch": 0.97, + "grad_norm": 37.070323669219064, + "learning_rate": 1.0989339597505723e-05, + "loss": 1.7569, + "step": 80568 + }, + { + "epoch": 0.97, + "grad_norm": 11.20468788428996, + "learning_rate": 1.0988758283756409e-05, + "loss": 1.5316, + "step": 80571 + }, + { + "epoch": 0.97, + "grad_norm": 11.011423670583124, + "learning_rate": 1.0988176966632822e-05, + "loss": 1.0775, + "step": 80574 + }, + { + "epoch": 0.97, + "grad_norm": 12.222018110060073, + "learning_rate": 1.0987595646136942e-05, + "loss": 1.309, + "step": 80577 + }, + { + "epoch": 0.97, + "grad_norm": 13.097924660960402, + "learning_rate": 1.0987014322270752e-05, + "loss": 1.0214, + "step": 80580 + }, + { + "epoch": 0.97, + "grad_norm": 21.73973221614665, + "learning_rate": 1.0986432995036239e-05, + "loss": 1.1959, + "step": 80583 + }, + { + "epoch": 0.97, + "grad_norm": 7.434129551879867, + "learning_rate": 1.0985851664435385e-05, + "loss": 1.3326, + "step": 80586 + }, + { + "epoch": 0.97, + "grad_norm": 16.547316615866052, + "learning_rate": 1.0985270330470175e-05, + "loss": 1.1387, + "step": 80589 + }, + { + "epoch": 0.97, + "grad_norm": 5.600629703719318, + "learning_rate": 1.0984688993142592e-05, + "loss": 1.1001, + "step": 80592 + }, + { + "epoch": 0.97, + "grad_norm": 24.743021573677645, + "learning_rate": 1.098410765245462e-05, + "loss": 1.327, + "step": 80595 + }, + { + "epoch": 0.97, + "grad_norm": 4.51459121863127, + "learning_rate": 1.0983526308408244e-05, + "loss": 1.5829, + "step": 80598 + }, + { + "epoch": 0.97, + "grad_norm": 5.537818711344128, + "learning_rate": 1.0982944961005447e-05, + "loss": 1.3262, + "step": 80601 + }, + { + "epoch": 0.97, + "grad_norm": 24.65620225334131, + "learning_rate": 1.098236361024821e-05, + "loss": 1.705, + "step": 80604 + }, + { + "epoch": 0.97, + "grad_norm": 7.517844991421341, + "learning_rate": 1.0981782256138523e-05, + "loss": 1.376, + "step": 80607 + }, + { + "epoch": 0.97, + "grad_norm": 17.427014473247997, + "learning_rate": 1.0981200898678366e-05, + "loss": 1.265, + "step": 80610 + }, + { + "epoch": 0.97, + "grad_norm": 12.613491181305141, + "learning_rate": 1.0980619537869721e-05, + "loss": 1.6563, + "step": 80613 + }, + { + "epoch": 0.97, + "grad_norm": 6.689097863791755, + "learning_rate": 1.0980038173714578e-05, + "loss": 1.3311, + "step": 80616 + }, + { + "epoch": 0.97, + "grad_norm": 4.44832194400768, + "learning_rate": 1.097945680621492e-05, + "loss": 1.3999, + "step": 80619 + }, + { + "epoch": 0.97, + "grad_norm": 14.610565662366545, + "learning_rate": 1.0978875435372725e-05, + "loss": 1.3211, + "step": 80622 + }, + { + "epoch": 0.97, + "grad_norm": 7.650106324240366, + "learning_rate": 1.0978294061189978e-05, + "loss": 1.5299, + "step": 80625 + }, + { + "epoch": 0.97, + "grad_norm": 11.380083045132517, + "learning_rate": 1.097771268366867e-05, + "loss": 1.0545, + "step": 80628 + }, + { + "epoch": 0.97, + "grad_norm": 20.20837416657992, + "learning_rate": 1.0977131302810782e-05, + "loss": 1.2252, + "step": 80631 + }, + { + "epoch": 0.97, + "grad_norm": 18.406529318147705, + "learning_rate": 1.0976549918618293e-05, + "loss": 1.5833, + "step": 80634 + }, + { + "epoch": 0.97, + "grad_norm": 10.18447728089702, + "learning_rate": 1.0975968531093192e-05, + "loss": 1.3487, + "step": 80637 + }, + { + "epoch": 0.97, + "grad_norm": 4.969694473956226, + "learning_rate": 1.0975387140237463e-05, + "loss": 1.0671, + "step": 80640 + }, + { + "epoch": 0.97, + "grad_norm": 17.133592715334192, + "learning_rate": 1.097480574605309e-05, + "loss": 1.3088, + "step": 80643 + }, + { + "epoch": 0.97, + "grad_norm": 10.29129345525132, + "learning_rate": 1.0974224348542053e-05, + "loss": 1.6519, + "step": 80646 + }, + { + "epoch": 0.97, + "grad_norm": 17.62397288198508, + "learning_rate": 1.097364294770634e-05, + "loss": 1.1976, + "step": 80649 + }, + { + "epoch": 0.97, + "grad_norm": 7.235094993787205, + "learning_rate": 1.0973061543547935e-05, + "loss": 1.4273, + "step": 80652 + }, + { + "epoch": 0.97, + "grad_norm": 14.571318634153394, + "learning_rate": 1.0972480136068823e-05, + "loss": 1.4108, + "step": 80655 + }, + { + "epoch": 0.97, + "grad_norm": 36.182134335515094, + "learning_rate": 1.0971898725270984e-05, + "loss": 1.3265, + "step": 80658 + }, + { + "epoch": 0.97, + "grad_norm": 9.95604234439763, + "learning_rate": 1.0971317311156406e-05, + "loss": 1.4222, + "step": 80661 + }, + { + "epoch": 0.97, + "grad_norm": 13.762077591283676, + "learning_rate": 1.0970735893727074e-05, + "loss": 1.221, + "step": 80664 + }, + { + "epoch": 0.97, + "grad_norm": 7.327783882914382, + "learning_rate": 1.0970154472984966e-05, + "loss": 1.2255, + "step": 80667 + }, + { + "epoch": 0.97, + "grad_norm": 4.727370649061556, + "learning_rate": 1.0969573048932072e-05, + "loss": 1.5024, + "step": 80670 + }, + { + "epoch": 0.97, + "grad_norm": 16.880857435827856, + "learning_rate": 1.0968991621570375e-05, + "loss": 1.4018, + "step": 80673 + }, + { + "epoch": 0.97, + "grad_norm": 7.673921663123402, + "learning_rate": 1.0968410190901859e-05, + "loss": 1.4114, + "step": 80676 + }, + { + "epoch": 0.97, + "grad_norm": 8.709882963956487, + "learning_rate": 1.0967828756928506e-05, + "loss": 1.1321, + "step": 80679 + }, + { + "epoch": 0.97, + "grad_norm": 19.605779754012232, + "learning_rate": 1.0967247319652303e-05, + "loss": 1.3062, + "step": 80682 + }, + { + "epoch": 0.97, + "grad_norm": 6.955532278045184, + "learning_rate": 1.0966665879075235e-05, + "loss": 1.5167, + "step": 80685 + }, + { + "epoch": 0.97, + "grad_norm": 6.096999811916637, + "learning_rate": 1.0966084435199282e-05, + "loss": 1.2867, + "step": 80688 + }, + { + "epoch": 0.97, + "grad_norm": 21.457809557014283, + "learning_rate": 1.096550298802643e-05, + "loss": 1.0912, + "step": 80691 + }, + { + "epoch": 0.97, + "grad_norm": 24.418179899981837, + "learning_rate": 1.0964921537558669e-05, + "loss": 1.379, + "step": 80694 + }, + { + "epoch": 0.97, + "grad_norm": 3.2629296343625707, + "learning_rate": 1.0964340083797975e-05, + "loss": 1.366, + "step": 80697 + }, + { + "epoch": 0.97, + "grad_norm": 20.908761046889673, + "learning_rate": 1.0963758626746332e-05, + "loss": 1.2832, + "step": 80700 + }, + { + "epoch": 0.97, + "grad_norm": 6.882540127617943, + "learning_rate": 1.0963177166405734e-05, + "loss": 1.1698, + "step": 80703 + }, + { + "epoch": 0.97, + "grad_norm": 8.402809572369685, + "learning_rate": 1.0962595702778158e-05, + "loss": 1.3729, + "step": 80706 + }, + { + "epoch": 0.97, + "grad_norm": 23.223318372165664, + "learning_rate": 1.0962014235865588e-05, + "loss": 1.3961, + "step": 80709 + }, + { + "epoch": 0.97, + "grad_norm": 58.785433530028676, + "learning_rate": 1.096143276567001e-05, + "loss": 1.2211, + "step": 80712 + }, + { + "epoch": 0.97, + "grad_norm": 20.81235614500842, + "learning_rate": 1.0960851292193408e-05, + "loss": 1.2352, + "step": 80715 + }, + { + "epoch": 0.97, + "grad_norm": 12.058485232774487, + "learning_rate": 1.0960269815437769e-05, + "loss": 1.1391, + "step": 80718 + }, + { + "epoch": 0.97, + "grad_norm": 21.202323308142685, + "learning_rate": 1.0959688335405072e-05, + "loss": 1.3609, + "step": 80721 + }, + { + "epoch": 0.97, + "grad_norm": 7.235612374350229, + "learning_rate": 1.0959106852097308e-05, + "loss": 1.0188, + "step": 80724 + }, + { + "epoch": 0.97, + "grad_norm": 3.957534803988434, + "learning_rate": 1.0958525365516455e-05, + "loss": 1.3707, + "step": 80727 + }, + { + "epoch": 0.97, + "grad_norm": 25.959519920753348, + "learning_rate": 1.09579438756645e-05, + "loss": 1.2681, + "step": 80730 + }, + { + "epoch": 0.97, + "grad_norm": 9.73237292290273, + "learning_rate": 1.0957362382543429e-05, + "loss": 1.3807, + "step": 80733 + }, + { + "epoch": 0.97, + "grad_norm": 17.69676948939373, + "learning_rate": 1.0956780886155226e-05, + "loss": 1.0414, + "step": 80736 + }, + { + "epoch": 0.97, + "grad_norm": 35.81787496200951, + "learning_rate": 1.0956199386501872e-05, + "loss": 1.291, + "step": 80739 + }, + { + "epoch": 0.97, + "grad_norm": 88.95768730840378, + "learning_rate": 1.0955617883585355e-05, + "loss": 1.2962, + "step": 80742 + }, + { + "epoch": 0.97, + "grad_norm": 16.889087234452365, + "learning_rate": 1.0955036377407657e-05, + "loss": 1.0779, + "step": 80745 + }, + { + "epoch": 0.97, + "grad_norm": 28.555266736356742, + "learning_rate": 1.0954454867970766e-05, + "loss": 1.5572, + "step": 80748 + }, + { + "epoch": 0.97, + "grad_norm": 46.94074142118792, + "learning_rate": 1.0953873355276663e-05, + "loss": 1.3941, + "step": 80751 + }, + { + "epoch": 0.97, + "grad_norm": 5.326948031539383, + "learning_rate": 1.0953291839327334e-05, + "loss": 1.115, + "step": 80754 + }, + { + "epoch": 0.97, + "grad_norm": 20.64867259002088, + "learning_rate": 1.0952710320124762e-05, + "loss": 1.2437, + "step": 80757 + }, + { + "epoch": 0.97, + "grad_norm": 8.99329481415587, + "learning_rate": 1.0952128797670936e-05, + "loss": 1.1844, + "step": 80760 + }, + { + "epoch": 0.97, + "grad_norm": 3.4070174414627328, + "learning_rate": 1.0951547271967838e-05, + "loss": 1.5966, + "step": 80763 + }, + { + "epoch": 0.97, + "grad_norm": 26.51253915605803, + "learning_rate": 1.0950965743017445e-05, + "loss": 1.4934, + "step": 80766 + }, + { + "epoch": 0.97, + "grad_norm": 9.424875842438865, + "learning_rate": 1.0950384210821756e-05, + "loss": 1.3658, + "step": 80769 + }, + { + "epoch": 0.97, + "grad_norm": 4.23214563478336, + "learning_rate": 1.0949802675382746e-05, + "loss": 1.7382, + "step": 80772 + }, + { + "epoch": 0.97, + "grad_norm": 134.87067726553633, + "learning_rate": 1.0949221136702398e-05, + "loss": 1.6146, + "step": 80775 + }, + { + "epoch": 0.97, + "grad_norm": 31.77344536995443, + "learning_rate": 1.0948639594782704e-05, + "loss": 1.1232, + "step": 80778 + }, + { + "epoch": 0.97, + "grad_norm": 29.054687014656217, + "learning_rate": 1.0948058049625642e-05, + "loss": 1.4511, + "step": 80781 + }, + { + "epoch": 0.97, + "grad_norm": 18.524016943028357, + "learning_rate": 1.0947476501233203e-05, + "loss": 1.4364, + "step": 80784 + }, + { + "epoch": 0.97, + "grad_norm": 57.85836148019462, + "learning_rate": 1.0946894949607365e-05, + "loss": 1.2873, + "step": 80787 + }, + { + "epoch": 0.97, + "grad_norm": 9.30546453902644, + "learning_rate": 1.0946313394750116e-05, + "loss": 1.243, + "step": 80790 + }, + { + "epoch": 0.97, + "grad_norm": 7.806418377858255, + "learning_rate": 1.0945731836663442e-05, + "loss": 1.2388, + "step": 80793 + }, + { + "epoch": 0.97, + "grad_norm": 10.218565649981649, + "learning_rate": 1.0945150275349325e-05, + "loss": 1.2253, + "step": 80796 + }, + { + "epoch": 0.97, + "grad_norm": 6.739912680792697, + "learning_rate": 1.0944568710809749e-05, + "loss": 1.3079, + "step": 80799 + }, + { + "epoch": 0.97, + "grad_norm": 8.29547830028814, + "learning_rate": 1.0943987143046702e-05, + "loss": 1.1965, + "step": 80802 + }, + { + "epoch": 0.97, + "grad_norm": 90.53824058394255, + "learning_rate": 1.0943405572062168e-05, + "loss": 1.1937, + "step": 80805 + }, + { + "epoch": 0.97, + "grad_norm": 8.474479162326801, + "learning_rate": 1.0942823997858127e-05, + "loss": 1.3993, + "step": 80808 + }, + { + "epoch": 0.97, + "grad_norm": 11.072362017284052, + "learning_rate": 1.0942242420436569e-05, + "loss": 1.4164, + "step": 80811 + }, + { + "epoch": 0.97, + "grad_norm": 13.636257255598599, + "learning_rate": 1.094166083979948e-05, + "loss": 1.2887, + "step": 80814 + }, + { + "epoch": 0.97, + "grad_norm": 10.497079584607476, + "learning_rate": 1.0941079255948839e-05, + "loss": 1.7283, + "step": 80817 + }, + { + "epoch": 0.97, + "grad_norm": 13.910509729003573, + "learning_rate": 1.0940497668886631e-05, + "loss": 1.0172, + "step": 80820 + }, + { + "epoch": 0.97, + "grad_norm": 5.3187126150008845, + "learning_rate": 1.093991607861485e-05, + "loss": 1.0697, + "step": 80823 + }, + { + "epoch": 0.97, + "grad_norm": 9.214215710548348, + "learning_rate": 1.0939334485135466e-05, + "loss": 0.9835, + "step": 80826 + }, + { + "epoch": 0.97, + "grad_norm": 3.169166346902451, + "learning_rate": 1.0938752888450475e-05, + "loss": 1.1525, + "step": 80829 + }, + { + "epoch": 0.97, + "grad_norm": 41.21807479777265, + "learning_rate": 1.0938171288561859e-05, + "loss": 1.4149, + "step": 80832 + }, + { + "epoch": 0.97, + "grad_norm": 26.903436399005493, + "learning_rate": 1.0937589685471604e-05, + "loss": 1.3626, + "step": 80835 + }, + { + "epoch": 0.97, + "grad_norm": 8.56265915850383, + "learning_rate": 1.0937008079181691e-05, + "loss": 0.9838, + "step": 80838 + }, + { + "epoch": 0.97, + "grad_norm": 10.257336840066968, + "learning_rate": 1.0936426469694106e-05, + "loss": 1.5076, + "step": 80841 + }, + { + "epoch": 0.97, + "grad_norm": 8.03964341670653, + "learning_rate": 1.093584485701084e-05, + "loss": 1.57, + "step": 80844 + }, + { + "epoch": 0.97, + "grad_norm": 9.234641899865254, + "learning_rate": 1.0935263241133867e-05, + "loss": 1.5071, + "step": 80847 + }, + { + "epoch": 0.97, + "grad_norm": 12.375975389480805, + "learning_rate": 1.093468162206518e-05, + "loss": 1.0064, + "step": 80850 + }, + { + "epoch": 0.97, + "grad_norm": 7.399028179394949, + "learning_rate": 1.0934099999806757e-05, + "loss": 1.1508, + "step": 80853 + }, + { + "epoch": 0.97, + "grad_norm": 45.09446609412807, + "learning_rate": 1.0933518374360592e-05, + "loss": 1.3087, + "step": 80856 + }, + { + "epoch": 0.97, + "grad_norm": 7.840322474067834, + "learning_rate": 1.0932936745728664e-05, + "loss": 1.3975, + "step": 80859 + }, + { + "epoch": 0.97, + "grad_norm": 11.875850770676676, + "learning_rate": 1.0932355113912957e-05, + "loss": 1.1236, + "step": 80862 + }, + { + "epoch": 0.97, + "grad_norm": 14.376003855887562, + "learning_rate": 1.0931773478915461e-05, + "loss": 1.5392, + "step": 80865 + }, + { + "epoch": 0.97, + "grad_norm": 19.21288143417276, + "learning_rate": 1.0931191840738156e-05, + "loss": 1.4669, + "step": 80868 + }, + { + "epoch": 0.97, + "grad_norm": 19.863402504139692, + "learning_rate": 1.0930610199383029e-05, + "loss": 0.9288, + "step": 80871 + }, + { + "epoch": 0.97, + "grad_norm": 7.4430107292617524, + "learning_rate": 1.0930028554852063e-05, + "loss": 1.409, + "step": 80874 + }, + { + "epoch": 0.97, + "grad_norm": 4.348206917145999, + "learning_rate": 1.092944690714725e-05, + "loss": 1.3991, + "step": 80877 + }, + { + "epoch": 0.97, + "grad_norm": 9.378873710568357, + "learning_rate": 1.0928865256270562e-05, + "loss": 1.3434, + "step": 80880 + }, + { + "epoch": 0.97, + "grad_norm": 7.536402370403472, + "learning_rate": 1.0928283602223995e-05, + "loss": 1.7443, + "step": 80883 + }, + { + "epoch": 0.97, + "grad_norm": 14.003476815371734, + "learning_rate": 1.0927701945009533e-05, + "loss": 0.9552, + "step": 80886 + }, + { + "epoch": 0.97, + "grad_norm": 16.17268633499318, + "learning_rate": 1.0927120284629154e-05, + "loss": 1.3487, + "step": 80889 + }, + { + "epoch": 0.97, + "grad_norm": 28.74249178263427, + "learning_rate": 1.092653862108485e-05, + "loss": 1.2729, + "step": 80892 + }, + { + "epoch": 0.97, + "grad_norm": 5.224925618596821, + "learning_rate": 1.09259569543786e-05, + "loss": 1.3011, + "step": 80895 + }, + { + "epoch": 0.97, + "grad_norm": 9.13953085466348, + "learning_rate": 1.09253752845124e-05, + "loss": 1.3697, + "step": 80898 + }, + { + "epoch": 0.97, + "grad_norm": 10.332932309152168, + "learning_rate": 1.0924793611488218e-05, + "loss": 1.4911, + "step": 80901 + }, + { + "epoch": 0.97, + "grad_norm": 5.903383908572872, + "learning_rate": 1.0924211935308056e-05, + "loss": 1.5918, + "step": 80904 + }, + { + "epoch": 0.97, + "grad_norm": 7.071257758120201, + "learning_rate": 1.0923630255973887e-05, + "loss": 1.3026, + "step": 80907 + }, + { + "epoch": 0.97, + "grad_norm": 9.151974663424072, + "learning_rate": 1.0923048573487702e-05, + "loss": 1.4731, + "step": 80910 + }, + { + "epoch": 0.97, + "grad_norm": 12.315755985924838, + "learning_rate": 1.0922466887851483e-05, + "loss": 1.44, + "step": 80913 + }, + { + "epoch": 0.97, + "grad_norm": 49.8345535319941, + "learning_rate": 1.0921885199067219e-05, + "loss": 1.364, + "step": 80916 + }, + { + "epoch": 0.97, + "grad_norm": 17.92913973514305, + "learning_rate": 1.0921303507136896e-05, + "loss": 1.0135, + "step": 80919 + }, + { + "epoch": 0.97, + "grad_norm": 14.249441267119154, + "learning_rate": 1.0920721812062492e-05, + "loss": 1.2382, + "step": 80922 + }, + { + "epoch": 0.97, + "grad_norm": 10.800096041126332, + "learning_rate": 1.0920140113845995e-05, + "loss": 1.303, + "step": 80925 + }, + { + "epoch": 0.97, + "grad_norm": 4.958497495605078, + "learning_rate": 1.0919558412489395e-05, + "loss": 1.2396, + "step": 80928 + }, + { + "epoch": 0.97, + "grad_norm": 12.512751818557087, + "learning_rate": 1.091897670799467e-05, + "loss": 1.5857, + "step": 80931 + }, + { + "epoch": 0.97, + "grad_norm": 91.41599198275686, + "learning_rate": 1.0918395000363809e-05, + "loss": 1.2147, + "step": 80934 + }, + { + "epoch": 0.97, + "grad_norm": 10.39281643096648, + "learning_rate": 1.0917813289598796e-05, + "loss": 1.2878, + "step": 80937 + }, + { + "epoch": 0.97, + "grad_norm": 20.86345260147837, + "learning_rate": 1.091723157570162e-05, + "loss": 1.3712, + "step": 80940 + }, + { + "epoch": 0.97, + "grad_norm": 4.485306196798413, + "learning_rate": 1.091664985867426e-05, + "loss": 1.5234, + "step": 80943 + }, + { + "epoch": 0.97, + "grad_norm": 25.66062869364371, + "learning_rate": 1.0916068138518708e-05, + "loss": 1.1621, + "step": 80946 + }, + { + "epoch": 0.97, + "grad_norm": 17.690776546788523, + "learning_rate": 1.0915486415236943e-05, + "loss": 1.3442, + "step": 80949 + }, + { + "epoch": 0.97, + "grad_norm": 43.529330553650375, + "learning_rate": 1.0914904688830952e-05, + "loss": 1.2137, + "step": 80952 + }, + { + "epoch": 0.97, + "grad_norm": 9.894342925521487, + "learning_rate": 1.091432295930272e-05, + "loss": 1.2517, + "step": 80955 + }, + { + "epoch": 0.97, + "grad_norm": 20.545524405714776, + "learning_rate": 1.0913741226654235e-05, + "loss": 1.2824, + "step": 80958 + }, + { + "epoch": 0.97, + "grad_norm": 9.62582569030147, + "learning_rate": 1.0913159490887479e-05, + "loss": 1.2871, + "step": 80961 + }, + { + "epoch": 0.97, + "grad_norm": 8.484955458191992, + "learning_rate": 1.0912577752004438e-05, + "loss": 1.2709, + "step": 80964 + }, + { + "epoch": 0.97, + "grad_norm": 15.618697868892669, + "learning_rate": 1.09119960100071e-05, + "loss": 1.4745, + "step": 80967 + }, + { + "epoch": 0.97, + "grad_norm": 20.507037834084496, + "learning_rate": 1.0911414264897447e-05, + "loss": 1.3151, + "step": 80970 + }, + { + "epoch": 0.97, + "grad_norm": 11.015213576672371, + "learning_rate": 1.0910832516677468e-05, + "loss": 1.3554, + "step": 80973 + }, + { + "epoch": 0.97, + "grad_norm": 34.08246039219094, + "learning_rate": 1.0910250765349141e-05, + "loss": 1.4692, + "step": 80976 + }, + { + "epoch": 0.97, + "grad_norm": 15.812153813217327, + "learning_rate": 1.0909669010914458e-05, + "loss": 1.5978, + "step": 80979 + }, + { + "epoch": 0.97, + "grad_norm": 23.397476407802575, + "learning_rate": 1.0909087253375405e-05, + "loss": 1.232, + "step": 80982 + }, + { + "epoch": 0.97, + "grad_norm": 14.302506737373493, + "learning_rate": 1.0908505492733962e-05, + "loss": 1.1141, + "step": 80985 + }, + { + "epoch": 0.97, + "grad_norm": 15.577232118679321, + "learning_rate": 1.0907923728992115e-05, + "loss": 1.2889, + "step": 80988 + }, + { + "epoch": 0.97, + "grad_norm": 5.688133141861953, + "learning_rate": 1.0907341962151854e-05, + "loss": 1.3461, + "step": 80991 + }, + { + "epoch": 0.97, + "grad_norm": 2.038274138409865, + "learning_rate": 1.0906760192215165e-05, + "loss": 1.3741, + "step": 80994 + }, + { + "epoch": 0.97, + "grad_norm": 3.0092461279758185, + "learning_rate": 1.0906178419184024e-05, + "loss": 1.1162, + "step": 80997 + }, + { + "epoch": 0.97, + "grad_norm": 9.323035116735465, + "learning_rate": 1.0905596643060426e-05, + "loss": 1.3783, + "step": 81000 + }, + { + "epoch": 0.97, + "grad_norm": 5.417088966309866, + "learning_rate": 1.0905014863846353e-05, + "loss": 1.2395, + "step": 81003 + }, + { + "epoch": 0.97, + "grad_norm": 8.443008803093143, + "learning_rate": 1.0904433081543788e-05, + "loss": 1.619, + "step": 81006 + }, + { + "epoch": 0.97, + "grad_norm": 13.109921468987459, + "learning_rate": 1.090385129615472e-05, + "loss": 1.4635, + "step": 81009 + }, + { + "epoch": 0.97, + "grad_norm": 8.821772253885817, + "learning_rate": 1.0903269507681133e-05, + "loss": 1.1312, + "step": 81012 + }, + { + "epoch": 0.97, + "grad_norm": 8.222149615792318, + "learning_rate": 1.0902687716125012e-05, + "loss": 1.5922, + "step": 81015 + }, + { + "epoch": 0.97, + "grad_norm": 10.235740002522675, + "learning_rate": 1.0902105921488342e-05, + "loss": 1.4362, + "step": 81018 + }, + { + "epoch": 0.97, + "grad_norm": 28.13387525010548, + "learning_rate": 1.090152412377311e-05, + "loss": 1.0327, + "step": 81021 + }, + { + "epoch": 0.97, + "grad_norm": 7.768715587284806, + "learning_rate": 1.0900942322981302e-05, + "loss": 1.4804, + "step": 81024 + }, + { + "epoch": 0.97, + "grad_norm": 47.50010814876286, + "learning_rate": 1.0900360519114903e-05, + "loss": 1.4338, + "step": 81027 + }, + { + "epoch": 0.97, + "grad_norm": 4.123177482601579, + "learning_rate": 1.0899778712175897e-05, + "loss": 1.7721, + "step": 81030 + }, + { + "epoch": 0.97, + "grad_norm": 5.823191528830231, + "learning_rate": 1.089919690216627e-05, + "loss": 1.5656, + "step": 81033 + }, + { + "epoch": 0.97, + "grad_norm": 62.21911478404807, + "learning_rate": 1.0898615089088009e-05, + "loss": 1.3242, + "step": 81036 + }, + { + "epoch": 0.97, + "grad_norm": 9.861347370841182, + "learning_rate": 1.0898033272943098e-05, + "loss": 1.265, + "step": 81039 + }, + { + "epoch": 0.97, + "grad_norm": 6.082546286051712, + "learning_rate": 1.0897451453733521e-05, + "loss": 1.2383, + "step": 81042 + }, + { + "epoch": 0.97, + "grad_norm": 23.88453682099086, + "learning_rate": 1.0896869631461267e-05, + "loss": 1.4885, + "step": 81045 + }, + { + "epoch": 0.97, + "grad_norm": 3.713986886381834, + "learning_rate": 1.089628780612832e-05, + "loss": 1.4592, + "step": 81048 + }, + { + "epoch": 0.97, + "grad_norm": 5.559866369690797, + "learning_rate": 1.0895705977736663e-05, + "loss": 1.147, + "step": 81051 + }, + { + "epoch": 0.97, + "grad_norm": 7.579882074881289, + "learning_rate": 1.089512414628829e-05, + "loss": 1.7053, + "step": 81054 + }, + { + "epoch": 0.97, + "grad_norm": 7.615569021551017, + "learning_rate": 1.0894542311785176e-05, + "loss": 1.1861, + "step": 81057 + }, + { + "epoch": 0.97, + "grad_norm": 14.379353032453047, + "learning_rate": 1.0893960474229312e-05, + "loss": 1.5946, + "step": 81060 + }, + { + "epoch": 0.97, + "grad_norm": 7.777962201670378, + "learning_rate": 1.0893378633622684e-05, + "loss": 1.0195, + "step": 81063 + }, + { + "epoch": 0.97, + "grad_norm": 10.713913866193817, + "learning_rate": 1.0892796789967276e-05, + "loss": 1.3091, + "step": 81066 + }, + { + "epoch": 0.97, + "grad_norm": 6.6137484624673135, + "learning_rate": 1.0892214943265076e-05, + "loss": 1.3591, + "step": 81069 + }, + { + "epoch": 0.97, + "grad_norm": 7.537387213187975, + "learning_rate": 1.0891633093518063e-05, + "loss": 0.9864, + "step": 81072 + }, + { + "epoch": 0.97, + "grad_norm": 7.33863658518383, + "learning_rate": 1.0891051240728231e-05, + "loss": 1.5799, + "step": 81075 + }, + { + "epoch": 0.97, + "grad_norm": 8.670184621009362, + "learning_rate": 1.0890469384897562e-05, + "loss": 1.3021, + "step": 81078 + }, + { + "epoch": 0.97, + "grad_norm": 5.3989576067926786, + "learning_rate": 1.0889887526028043e-05, + "loss": 1.323, + "step": 81081 + }, + { + "epoch": 0.98, + "grad_norm": 61.25114221476106, + "learning_rate": 1.0889305664121654e-05, + "loss": 1.4994, + "step": 81084 + }, + { + "epoch": 0.98, + "grad_norm": 7.115246555712933, + "learning_rate": 1.0888723799180388e-05, + "loss": 1.4264, + "step": 81087 + }, + { + "epoch": 0.98, + "grad_norm": 2.4823760226507225, + "learning_rate": 1.0888141931206229e-05, + "loss": 1.6511, + "step": 81090 + }, + { + "epoch": 0.98, + "grad_norm": 23.814955880792983, + "learning_rate": 1.0887560060201159e-05, + "loss": 1.5447, + "step": 81093 + }, + { + "epoch": 0.98, + "grad_norm": 22.29338336477069, + "learning_rate": 1.0886978186167165e-05, + "loss": 1.4533, + "step": 81096 + }, + { + "epoch": 0.98, + "grad_norm": 5.936501568545617, + "learning_rate": 1.0886396309106238e-05, + "loss": 1.015, + "step": 81099 + }, + { + "epoch": 0.98, + "grad_norm": 6.336397431872456, + "learning_rate": 1.0885814429020358e-05, + "loss": 1.5911, + "step": 81102 + }, + { + "epoch": 0.98, + "grad_norm": 18.84530764421659, + "learning_rate": 1.0885232545911511e-05, + "loss": 1.189, + "step": 81105 + }, + { + "epoch": 0.98, + "grad_norm": 9.14957187287234, + "learning_rate": 1.0884650659781685e-05, + "loss": 1.5982, + "step": 81108 + }, + { + "epoch": 0.98, + "grad_norm": 9.232324590604883, + "learning_rate": 1.0884068770632865e-05, + "loss": 1.2476, + "step": 81111 + }, + { + "epoch": 0.98, + "grad_norm": 25.33509117359218, + "learning_rate": 1.0883486878467038e-05, + "loss": 1.6984, + "step": 81114 + }, + { + "epoch": 0.98, + "grad_norm": 8.847940894291504, + "learning_rate": 1.0882904983286187e-05, + "loss": 1.4708, + "step": 81117 + }, + { + "epoch": 0.98, + "grad_norm": 9.72595689054997, + "learning_rate": 1.0882323085092298e-05, + "loss": 1.4228, + "step": 81120 + }, + { + "epoch": 0.98, + "grad_norm": 5.708107486541164, + "learning_rate": 1.0881741183887361e-05, + "loss": 1.1233, + "step": 81123 + }, + { + "epoch": 0.98, + "grad_norm": 18.329875030016314, + "learning_rate": 1.0881159279673357e-05, + "loss": 1.3006, + "step": 81126 + }, + { + "epoch": 0.98, + "grad_norm": 2.983971300750768, + "learning_rate": 1.0880577372452277e-05, + "loss": 1.4132, + "step": 81129 + }, + { + "epoch": 0.98, + "grad_norm": 49.38272912628786, + "learning_rate": 1.08799954622261e-05, + "loss": 1.2485, + "step": 81132 + }, + { + "epoch": 0.98, + "grad_norm": 9.771119701698623, + "learning_rate": 1.0879413548996817e-05, + "loss": 1.3473, + "step": 81135 + }, + { + "epoch": 0.98, + "grad_norm": 21.825452682834708, + "learning_rate": 1.0878831632766412e-05, + "loss": 1.2203, + "step": 81138 + }, + { + "epoch": 0.98, + "grad_norm": 7.6106049030747425, + "learning_rate": 1.087824971353687e-05, + "loss": 1.2019, + "step": 81141 + }, + { + "epoch": 0.98, + "grad_norm": 15.796294020717877, + "learning_rate": 1.087766779131018e-05, + "loss": 1.0151, + "step": 81144 + }, + { + "epoch": 0.98, + "grad_norm": 6.809280494529101, + "learning_rate": 1.0877085866088326e-05, + "loss": 1.2739, + "step": 81147 + }, + { + "epoch": 0.98, + "grad_norm": 31.312527658264308, + "learning_rate": 1.0876503937873294e-05, + "loss": 1.2695, + "step": 81150 + }, + { + "epoch": 0.98, + "grad_norm": 11.291911372886446, + "learning_rate": 1.0875922006667068e-05, + "loss": 1.4071, + "step": 81153 + }, + { + "epoch": 0.98, + "grad_norm": 8.186659345597977, + "learning_rate": 1.0875340072471636e-05, + "loss": 1.4376, + "step": 81156 + }, + { + "epoch": 0.98, + "grad_norm": 11.424971794345113, + "learning_rate": 1.0874758135288983e-05, + "loss": 1.2167, + "step": 81159 + }, + { + "epoch": 0.98, + "grad_norm": 12.471300720528415, + "learning_rate": 1.0874176195121098e-05, + "loss": 1.4301, + "step": 81162 + }, + { + "epoch": 0.98, + "grad_norm": 33.857942926452516, + "learning_rate": 1.0873594251969963e-05, + "loss": 1.5002, + "step": 81165 + }, + { + "epoch": 0.98, + "grad_norm": 15.242685215803526, + "learning_rate": 1.0873012305837566e-05, + "loss": 1.0932, + "step": 81168 + }, + { + "epoch": 0.98, + "grad_norm": 9.053889279736644, + "learning_rate": 1.0872430356725891e-05, + "loss": 1.3315, + "step": 81171 + }, + { + "epoch": 0.98, + "grad_norm": 33.03940504879895, + "learning_rate": 1.0871848404636929e-05, + "loss": 1.0063, + "step": 81174 + }, + { + "epoch": 0.98, + "grad_norm": 5.464106794239687, + "learning_rate": 1.087126644957266e-05, + "loss": 1.1857, + "step": 81177 + }, + { + "epoch": 0.98, + "grad_norm": 8.029189616394829, + "learning_rate": 1.0870684491535071e-05, + "loss": 1.3324, + "step": 81180 + }, + { + "epoch": 0.98, + "grad_norm": 8.872895586585054, + "learning_rate": 1.0870102530526152e-05, + "loss": 1.1136, + "step": 81183 + }, + { + "epoch": 0.98, + "grad_norm": 7.945060342366477, + "learning_rate": 1.0869520566547886e-05, + "loss": 1.5246, + "step": 81186 + }, + { + "epoch": 0.98, + "grad_norm": 11.218437300916312, + "learning_rate": 1.086893859960226e-05, + "loss": 1.2963, + "step": 81189 + }, + { + "epoch": 0.98, + "grad_norm": 11.862803489509924, + "learning_rate": 1.0868356629691256e-05, + "loss": 1.3189, + "step": 81192 + }, + { + "epoch": 0.98, + "grad_norm": 60.0884235020527, + "learning_rate": 1.0867774656816867e-05, + "loss": 1.6165, + "step": 81195 + }, + { + "epoch": 0.98, + "grad_norm": 5.813139490695401, + "learning_rate": 1.0867192680981076e-05, + "loss": 1.1692, + "step": 81198 + }, + { + "epoch": 0.98, + "grad_norm": 10.122905474392745, + "learning_rate": 1.0866610702185867e-05, + "loss": 1.5028, + "step": 81201 + }, + { + "epoch": 0.98, + "grad_norm": 16.054911037445873, + "learning_rate": 1.0866028720433228e-05, + "loss": 1.226, + "step": 81204 + }, + { + "epoch": 0.98, + "grad_norm": 7.139484815550108, + "learning_rate": 1.0865446735725144e-05, + "loss": 1.4616, + "step": 81207 + }, + { + "epoch": 0.98, + "grad_norm": 11.646266445486875, + "learning_rate": 1.0864864748063607e-05, + "loss": 1.301, + "step": 81210 + }, + { + "epoch": 0.98, + "grad_norm": 4.473270888459227, + "learning_rate": 1.086428275745059e-05, + "loss": 1.3133, + "step": 81213 + }, + { + "epoch": 0.98, + "grad_norm": 10.700712277866899, + "learning_rate": 1.0863700763888094e-05, + "loss": 1.5394, + "step": 81216 + }, + { + "epoch": 0.98, + "grad_norm": 5.371498361880972, + "learning_rate": 1.0863118767378096e-05, + "loss": 1.3701, + "step": 81219 + }, + { + "epoch": 0.98, + "grad_norm": 69.16937282018056, + "learning_rate": 1.0862536767922584e-05, + "loss": 1.5103, + "step": 81222 + }, + { + "epoch": 0.98, + "grad_norm": 8.444012023244847, + "learning_rate": 1.0861954765523546e-05, + "loss": 1.2815, + "step": 81225 + }, + { + "epoch": 0.98, + "grad_norm": 4.276379160669883, + "learning_rate": 1.0861372760182967e-05, + "loss": 1.1435, + "step": 81228 + }, + { + "epoch": 0.98, + "grad_norm": 22.584971461663176, + "learning_rate": 1.0860790751902832e-05, + "loss": 1.1949, + "step": 81231 + }, + { + "epoch": 0.98, + "grad_norm": 8.897257137200839, + "learning_rate": 1.0860208740685128e-05, + "loss": 1.2704, + "step": 81234 + }, + { + "epoch": 0.98, + "grad_norm": 7.894518189272508, + "learning_rate": 1.0859626726531843e-05, + "loss": 1.2217, + "step": 81237 + }, + { + "epoch": 0.98, + "grad_norm": 15.972989907038908, + "learning_rate": 1.085904470944496e-05, + "loss": 1.1569, + "step": 81240 + }, + { + "epoch": 0.98, + "grad_norm": 19.008781365400186, + "learning_rate": 1.0858462689426467e-05, + "loss": 1.4384, + "step": 81243 + }, + { + "epoch": 0.98, + "grad_norm": 8.506568549540491, + "learning_rate": 1.0857880666478349e-05, + "loss": 1.0609, + "step": 81246 + }, + { + "epoch": 0.98, + "grad_norm": 30.340839801768873, + "learning_rate": 1.0857298640602597e-05, + "loss": 1.5325, + "step": 81249 + }, + { + "epoch": 0.98, + "grad_norm": 41.54859025547029, + "learning_rate": 1.085671661180119e-05, + "loss": 1.3415, + "step": 81252 + }, + { + "epoch": 0.98, + "grad_norm": 10.922348993058774, + "learning_rate": 1.085613458007612e-05, + "loss": 1.46, + "step": 81255 + }, + { + "epoch": 0.98, + "grad_norm": 15.115177367483623, + "learning_rate": 1.0855552545429369e-05, + "loss": 1.2322, + "step": 81258 + }, + { + "epoch": 0.98, + "grad_norm": 14.688200576416994, + "learning_rate": 1.0854970507862927e-05, + "loss": 1.7078, + "step": 81261 + }, + { + "epoch": 0.98, + "grad_norm": 6.578386078519204, + "learning_rate": 1.0854388467378778e-05, + "loss": 1.5048, + "step": 81264 + }, + { + "epoch": 0.98, + "grad_norm": 8.903391342684722, + "learning_rate": 1.0853806423978907e-05, + "loss": 1.3741, + "step": 81267 + }, + { + "epoch": 0.98, + "grad_norm": 25.332800635126418, + "learning_rate": 1.0853224377665305e-05, + "loss": 1.5047, + "step": 81270 + }, + { + "epoch": 0.98, + "grad_norm": 18.803292735550308, + "learning_rate": 1.0852642328439954e-05, + "loss": 1.2522, + "step": 81273 + }, + { + "epoch": 0.98, + "grad_norm": 4.072910532537182, + "learning_rate": 1.0852060276304846e-05, + "loss": 1.5212, + "step": 81276 + }, + { + "epoch": 0.98, + "grad_norm": 32.89906788374013, + "learning_rate": 1.0851478221261958e-05, + "loss": 1.6208, + "step": 81279 + }, + { + "epoch": 0.98, + "grad_norm": 9.686888264397627, + "learning_rate": 1.0850896163313284e-05, + "loss": 1.5187, + "step": 81282 + }, + { + "epoch": 0.98, + "grad_norm": 46.76347406143051, + "learning_rate": 1.0850314102460807e-05, + "loss": 1.162, + "step": 81285 + }, + { + "epoch": 0.98, + "grad_norm": 10.768583530370266, + "learning_rate": 1.0849732038706514e-05, + "loss": 1.4565, + "step": 81288 + }, + { + "epoch": 0.98, + "grad_norm": 21.339366029030373, + "learning_rate": 1.0849149972052391e-05, + "loss": 1.2558, + "step": 81291 + }, + { + "epoch": 0.98, + "grad_norm": 29.274710525095777, + "learning_rate": 1.0848567902500427e-05, + "loss": 1.4422, + "step": 81294 + }, + { + "epoch": 0.98, + "grad_norm": 43.006862603321494, + "learning_rate": 1.0847985830052606e-05, + "loss": 1.383, + "step": 81297 + }, + { + "epoch": 0.98, + "grad_norm": 55.728087939872836, + "learning_rate": 1.0847403754710912e-05, + "loss": 1.2061, + "step": 81300 + }, + { + "epoch": 0.98, + "grad_norm": 8.338398853807732, + "learning_rate": 1.0846821676477339e-05, + "loss": 1.3511, + "step": 81303 + }, + { + "epoch": 0.98, + "grad_norm": 11.320014056536184, + "learning_rate": 1.0846239595353864e-05, + "loss": 1.209, + "step": 81306 + }, + { + "epoch": 0.98, + "grad_norm": 19.65371893224816, + "learning_rate": 1.0845657511342482e-05, + "loss": 1.1866, + "step": 81309 + }, + { + "epoch": 0.98, + "grad_norm": 9.82076586520123, + "learning_rate": 1.0845075424445171e-05, + "loss": 1.352, + "step": 81312 + }, + { + "epoch": 0.98, + "grad_norm": 6.565994502033077, + "learning_rate": 1.0844493334663925e-05, + "loss": 1.1451, + "step": 81315 + }, + { + "epoch": 0.98, + "grad_norm": 15.199532557628753, + "learning_rate": 1.0843911242000728e-05, + "loss": 1.5543, + "step": 81318 + }, + { + "epoch": 0.98, + "grad_norm": 15.205867150254083, + "learning_rate": 1.0843329146457564e-05, + "loss": 1.1592, + "step": 81321 + }, + { + "epoch": 0.98, + "grad_norm": 9.358389871752406, + "learning_rate": 1.0842747048036427e-05, + "loss": 1.4089, + "step": 81324 + }, + { + "epoch": 0.98, + "grad_norm": 84.22707509655417, + "learning_rate": 1.084216494673929e-05, + "loss": 1.6716, + "step": 81327 + }, + { + "epoch": 0.98, + "grad_norm": 4.453004924215254, + "learning_rate": 1.084158284256815e-05, + "loss": 1.2837, + "step": 81330 + }, + { + "epoch": 0.98, + "grad_norm": 18.858714479260982, + "learning_rate": 1.0841000735524992e-05, + "loss": 1.365, + "step": 81333 + }, + { + "epoch": 0.98, + "grad_norm": 11.39198444882083, + "learning_rate": 1.0840418625611804e-05, + "loss": 1.4286, + "step": 81336 + }, + { + "epoch": 0.98, + "grad_norm": 7.023295339289432, + "learning_rate": 1.0839836512830564e-05, + "loss": 1.434, + "step": 81339 + }, + { + "epoch": 0.98, + "grad_norm": 6.921794063864865, + "learning_rate": 1.0839254397183265e-05, + "loss": 1.0327, + "step": 81342 + }, + { + "epoch": 0.98, + "grad_norm": 22.669291430886943, + "learning_rate": 1.08386722786719e-05, + "loss": 1.5169, + "step": 81345 + }, + { + "epoch": 0.98, + "grad_norm": 4.400104301020429, + "learning_rate": 1.0838090157298441e-05, + "loss": 1.5748, + "step": 81348 + }, + { + "epoch": 0.98, + "grad_norm": 8.800012897632739, + "learning_rate": 1.0837508033064888e-05, + "loss": 1.139, + "step": 81351 + }, + { + "epoch": 0.98, + "grad_norm": 7.121648600026208, + "learning_rate": 1.0836925905973214e-05, + "loss": 1.312, + "step": 81354 + }, + { + "epoch": 0.98, + "grad_norm": 15.560495560709416, + "learning_rate": 1.0836343776025422e-05, + "loss": 1.2537, + "step": 81357 + }, + { + "epoch": 0.98, + "grad_norm": 6.101383433256805, + "learning_rate": 1.0835761643223485e-05, + "loss": 1.2077, + "step": 81360 + }, + { + "epoch": 0.98, + "grad_norm": 10.42324486908012, + "learning_rate": 1.0835179507569398e-05, + "loss": 1.3655, + "step": 81363 + }, + { + "epoch": 0.98, + "grad_norm": 13.37265458340057, + "learning_rate": 1.083459736906514e-05, + "loss": 1.6246, + "step": 81366 + }, + { + "epoch": 0.98, + "grad_norm": 9.18317878145978, + "learning_rate": 1.0834015227712705e-05, + "loss": 1.4183, + "step": 81369 + }, + { + "epoch": 0.98, + "grad_norm": 7.195384071880628, + "learning_rate": 1.0833433083514073e-05, + "loss": 1.3306, + "step": 81372 + }, + { + "epoch": 0.98, + "grad_norm": 17.3265808826699, + "learning_rate": 1.0832850936471235e-05, + "loss": 1.7002, + "step": 81375 + }, + { + "epoch": 0.98, + "grad_norm": 9.058034989695502, + "learning_rate": 1.0832268786586181e-05, + "loss": 1.1183, + "step": 81378 + }, + { + "epoch": 0.98, + "grad_norm": 8.012835547637026, + "learning_rate": 1.0831686633860889e-05, + "loss": 1.1385, + "step": 81381 + }, + { + "epoch": 0.98, + "grad_norm": 9.076357160219322, + "learning_rate": 1.0831104478297352e-05, + "loss": 1.3948, + "step": 81384 + }, + { + "epoch": 0.98, + "grad_norm": 11.379113016318344, + "learning_rate": 1.0830522319897553e-05, + "loss": 1.1739, + "step": 81387 + }, + { + "epoch": 0.98, + "grad_norm": 14.660833045908, + "learning_rate": 1.0829940158663485e-05, + "loss": 1.2275, + "step": 81390 + }, + { + "epoch": 0.98, + "grad_norm": 2.9304034326925197, + "learning_rate": 1.0829357994597124e-05, + "loss": 1.3169, + "step": 81393 + }, + { + "epoch": 0.98, + "grad_norm": 8.995279868222248, + "learning_rate": 1.0828775827700466e-05, + "loss": 1.2174, + "step": 81396 + }, + { + "epoch": 0.98, + "grad_norm": 12.121701037322893, + "learning_rate": 1.0828193657975497e-05, + "loss": 1.1261, + "step": 81399 + }, + { + "epoch": 0.98, + "grad_norm": 4.539091863217431, + "learning_rate": 1.0827611485424197e-05, + "loss": 1.4396, + "step": 81402 + }, + { + "epoch": 0.98, + "grad_norm": 2.297359666628732, + "learning_rate": 1.0827029310048561e-05, + "loss": 1.1444, + "step": 81405 + }, + { + "epoch": 0.98, + "grad_norm": 5.320613214306147, + "learning_rate": 1.0826447131850569e-05, + "loss": 1.2766, + "step": 81408 + }, + { + "epoch": 0.98, + "grad_norm": 6.430866867897558, + "learning_rate": 1.0825864950832213e-05, + "loss": 1.2956, + "step": 81411 + }, + { + "epoch": 0.98, + "grad_norm": 19.10501762666797, + "learning_rate": 1.0825282766995473e-05, + "loss": 1.4018, + "step": 81414 + }, + { + "epoch": 0.98, + "grad_norm": 15.705023761920772, + "learning_rate": 1.0824700580342345e-05, + "loss": 1.325, + "step": 81417 + }, + { + "epoch": 0.98, + "grad_norm": 12.865202579979572, + "learning_rate": 1.082411839087481e-05, + "loss": 1.0671, + "step": 81420 + }, + { + "epoch": 0.98, + "grad_norm": 40.23098409231473, + "learning_rate": 1.0823536198594853e-05, + "loss": 1.0179, + "step": 81423 + }, + { + "epoch": 0.98, + "grad_norm": 12.338349402237752, + "learning_rate": 1.0822954003504464e-05, + "loss": 1.4468, + "step": 81426 + }, + { + "epoch": 0.98, + "grad_norm": 30.187125644682805, + "learning_rate": 1.0822371805605629e-05, + "loss": 1.1091, + "step": 81429 + }, + { + "epoch": 0.98, + "grad_norm": 3.579279016049837, + "learning_rate": 1.0821789604900341e-05, + "loss": 1.6002, + "step": 81432 + }, + { + "epoch": 0.98, + "grad_norm": 2.788009093590457, + "learning_rate": 1.0821207401390577e-05, + "loss": 1.4983, + "step": 81435 + }, + { + "epoch": 0.98, + "grad_norm": 9.426646936761, + "learning_rate": 1.0820625195078326e-05, + "loss": 1.0, + "step": 81438 + }, + { + "epoch": 0.98, + "grad_norm": 11.624514005044006, + "learning_rate": 1.082004298596558e-05, + "loss": 1.285, + "step": 81441 + }, + { + "epoch": 0.98, + "grad_norm": 4.767959032829818, + "learning_rate": 1.0819460774054322e-05, + "loss": 1.4942, + "step": 81444 + }, + { + "epoch": 0.98, + "grad_norm": 4.970129084006106, + "learning_rate": 1.0818878559346537e-05, + "loss": 1.2657, + "step": 81447 + }, + { + "epoch": 0.98, + "grad_norm": 17.725887723310624, + "learning_rate": 1.0818296341844217e-05, + "loss": 1.1666, + "step": 81450 + }, + { + "epoch": 0.98, + "grad_norm": 4.735925555466369, + "learning_rate": 1.0817714121549348e-05, + "loss": 0.9312, + "step": 81453 + }, + { + "epoch": 0.98, + "grad_norm": 16.229000640687975, + "learning_rate": 1.081713189846391e-05, + "loss": 1.6954, + "step": 81456 + }, + { + "epoch": 0.98, + "grad_norm": 3.576697029287397, + "learning_rate": 1.08165496725899e-05, + "loss": 1.6619, + "step": 81459 + }, + { + "epoch": 0.98, + "grad_norm": 13.033275572553514, + "learning_rate": 1.0815967443929298e-05, + "loss": 1.2363, + "step": 81462 + }, + { + "epoch": 0.98, + "grad_norm": 13.668679102590385, + "learning_rate": 1.0815385212484095e-05, + "loss": 1.4849, + "step": 81465 + }, + { + "epoch": 0.98, + "grad_norm": 6.3906972966944835, + "learning_rate": 1.0814802978256273e-05, + "loss": 1.0529, + "step": 81468 + }, + { + "epoch": 0.98, + "grad_norm": 7.208014441289013, + "learning_rate": 1.0814220741247823e-05, + "loss": 1.0573, + "step": 81471 + }, + { + "epoch": 0.98, + "grad_norm": 20.207921271900403, + "learning_rate": 1.0813638501460733e-05, + "loss": 1.0086, + "step": 81474 + }, + { + "epoch": 0.98, + "grad_norm": 7.699205968048694, + "learning_rate": 1.0813056258896984e-05, + "loss": 1.1046, + "step": 81477 + }, + { + "epoch": 0.98, + "grad_norm": 12.881547685526067, + "learning_rate": 1.0812474013558569e-05, + "loss": 1.5479, + "step": 81480 + }, + { + "epoch": 0.98, + "grad_norm": 11.987103191861246, + "learning_rate": 1.0811891765447473e-05, + "loss": 1.342, + "step": 81483 + }, + { + "epoch": 0.98, + "grad_norm": 19.51485590288263, + "learning_rate": 1.0811309514565684e-05, + "loss": 0.9424, + "step": 81486 + }, + { + "epoch": 0.98, + "grad_norm": 12.474677582400272, + "learning_rate": 1.0810727260915184e-05, + "loss": 1.4249, + "step": 81489 + }, + { + "epoch": 0.98, + "grad_norm": 16.58662233121577, + "learning_rate": 1.0810145004497967e-05, + "loss": 1.5393, + "step": 81492 + }, + { + "epoch": 0.98, + "grad_norm": 6.157826295387031, + "learning_rate": 1.0809562745316018e-05, + "loss": 1.2316, + "step": 81495 + }, + { + "epoch": 0.98, + "grad_norm": 43.271895375356834, + "learning_rate": 1.0808980483371321e-05, + "loss": 1.4427, + "step": 81498 + }, + { + "epoch": 0.98, + "grad_norm": 6.92693323757538, + "learning_rate": 1.0808398218665863e-05, + "loss": 1.3071, + "step": 81501 + }, + { + "epoch": 0.98, + "grad_norm": 15.56377001143155, + "learning_rate": 1.0807815951201636e-05, + "loss": 1.4258, + "step": 81504 + }, + { + "epoch": 0.98, + "grad_norm": 2.0646222834812353, + "learning_rate": 1.0807233680980624e-05, + "loss": 1.1891, + "step": 81507 + }, + { + "epoch": 0.98, + "grad_norm": 12.888477004580693, + "learning_rate": 1.080665140800481e-05, + "loss": 1.5503, + "step": 81510 + }, + { + "epoch": 0.98, + "grad_norm": 44.30639455134428, + "learning_rate": 1.0806069132276192e-05, + "loss": 1.3308, + "step": 81513 + }, + { + "epoch": 0.98, + "grad_norm": 28.42591869704936, + "learning_rate": 1.0805486853796748e-05, + "loss": 1.3205, + "step": 81516 + }, + { + "epoch": 0.98, + "grad_norm": 30.7026055386882, + "learning_rate": 1.0804904572568466e-05, + "loss": 1.2678, + "step": 81519 + }, + { + "epoch": 0.98, + "grad_norm": 10.899849070621123, + "learning_rate": 1.0804322288593333e-05, + "loss": 1.3264, + "step": 81522 + }, + { + "epoch": 0.98, + "grad_norm": 6.691443241949737, + "learning_rate": 1.0803740001873341e-05, + "loss": 1.1874, + "step": 81525 + }, + { + "epoch": 0.98, + "grad_norm": 10.20694930086295, + "learning_rate": 1.0803157712410475e-05, + "loss": 1.3203, + "step": 81528 + }, + { + "epoch": 0.98, + "grad_norm": 6.751772022563384, + "learning_rate": 1.0802575420206718e-05, + "loss": 1.325, + "step": 81531 + }, + { + "epoch": 0.98, + "grad_norm": 9.771562132683211, + "learning_rate": 1.080199312526406e-05, + "loss": 1.7112, + "step": 81534 + }, + { + "epoch": 0.98, + "grad_norm": 3.4419080789622676, + "learning_rate": 1.0801410827584492e-05, + "loss": 1.2089, + "step": 81537 + }, + { + "epoch": 0.98, + "grad_norm": 4.893156702886213, + "learning_rate": 1.0800828527169996e-05, + "loss": 1.1541, + "step": 81540 + }, + { + "epoch": 0.98, + "grad_norm": 32.00293956726678, + "learning_rate": 1.0800246224022557e-05, + "loss": 1.0564, + "step": 81543 + }, + { + "epoch": 0.98, + "grad_norm": 17.789083391171186, + "learning_rate": 1.079966391814417e-05, + "loss": 1.3209, + "step": 81546 + }, + { + "epoch": 0.98, + "grad_norm": 18.8663849428427, + "learning_rate": 1.0799081609536817e-05, + "loss": 1.055, + "step": 81549 + }, + { + "epoch": 0.98, + "grad_norm": 30.53123194176584, + "learning_rate": 1.0798499298202485e-05, + "loss": 1.5563, + "step": 81552 + }, + { + "epoch": 0.98, + "grad_norm": 5.618510454950511, + "learning_rate": 1.0797916984143162e-05, + "loss": 1.4826, + "step": 81555 + }, + { + "epoch": 0.98, + "grad_norm": 10.5335723385481, + "learning_rate": 1.0797334667360839e-05, + "loss": 1.0658, + "step": 81558 + }, + { + "epoch": 0.98, + "grad_norm": 24.208635924352457, + "learning_rate": 1.0796752347857497e-05, + "loss": 1.1651, + "step": 81561 + }, + { + "epoch": 0.98, + "grad_norm": 10.017141790536606, + "learning_rate": 1.0796170025635127e-05, + "loss": 1.4152, + "step": 81564 + }, + { + "epoch": 0.98, + "grad_norm": 42.318088710283504, + "learning_rate": 1.0795587700695718e-05, + "loss": 1.22, + "step": 81567 + }, + { + "epoch": 0.98, + "grad_norm": 5.78652024741248, + "learning_rate": 1.0795005373041251e-05, + "loss": 1.0903, + "step": 81570 + }, + { + "epoch": 0.98, + "grad_norm": 25.18398632574471, + "learning_rate": 1.079442304267372e-05, + "loss": 1.2973, + "step": 81573 + }, + { + "epoch": 0.98, + "grad_norm": 13.00590472070998, + "learning_rate": 1.0793840709595107e-05, + "loss": 1.5538, + "step": 81576 + }, + { + "epoch": 0.98, + "grad_norm": 13.243589652951231, + "learning_rate": 1.0793258373807402e-05, + "loss": 1.081, + "step": 81579 + }, + { + "epoch": 0.98, + "grad_norm": 2.9515934769654, + "learning_rate": 1.0792676035312595e-05, + "loss": 1.3434, + "step": 81582 + }, + { + "epoch": 0.98, + "grad_norm": 14.738296762506723, + "learning_rate": 1.0792093694112667e-05, + "loss": 1.4663, + "step": 81585 + }, + { + "epoch": 0.98, + "grad_norm": 13.535778732818667, + "learning_rate": 1.0791511350209609e-05, + "loss": 1.0863, + "step": 81588 + }, + { + "epoch": 0.98, + "grad_norm": 12.718006424616338, + "learning_rate": 1.0790929003605409e-05, + "loss": 1.2496, + "step": 81591 + }, + { + "epoch": 0.98, + "grad_norm": 19.3939614090871, + "learning_rate": 1.0790346654302054e-05, + "loss": 1.4396, + "step": 81594 + }, + { + "epoch": 0.98, + "grad_norm": 31.587128915437052, + "learning_rate": 1.0789764302301525e-05, + "loss": 1.7195, + "step": 81597 + }, + { + "epoch": 0.98, + "grad_norm": 18.648476665195915, + "learning_rate": 1.078918194760582e-05, + "loss": 1.3054, + "step": 81600 + }, + { + "epoch": 0.98, + "grad_norm": 6.434395023974136, + "learning_rate": 1.0788599590216919e-05, + "loss": 1.0012, + "step": 81603 + }, + { + "epoch": 0.98, + "grad_norm": 18.386247656685796, + "learning_rate": 1.0788017230136814e-05, + "loss": 1.3533, + "step": 81606 + }, + { + "epoch": 0.98, + "grad_norm": 13.391871000946331, + "learning_rate": 1.0787434867367488e-05, + "loss": 1.2954, + "step": 81609 + }, + { + "epoch": 0.98, + "grad_norm": 55.45042009986292, + "learning_rate": 1.0786852501910932e-05, + "loss": 1.31, + "step": 81612 + }, + { + "epoch": 0.98, + "grad_norm": 22.634596332102674, + "learning_rate": 1.078627013376913e-05, + "loss": 1.6027, + "step": 81615 + }, + { + "epoch": 0.98, + "grad_norm": 37.18260938829996, + "learning_rate": 1.0785687762944072e-05, + "loss": 1.4101, + "step": 81618 + }, + { + "epoch": 0.98, + "grad_norm": 16.467280674015704, + "learning_rate": 1.0785105389437744e-05, + "loss": 1.237, + "step": 81621 + }, + { + "epoch": 0.98, + "grad_norm": 16.843380876883025, + "learning_rate": 1.0784523013252136e-05, + "loss": 1.374, + "step": 81624 + }, + { + "epoch": 0.98, + "grad_norm": 15.387412679003294, + "learning_rate": 1.0783940634389234e-05, + "loss": 1.7458, + "step": 81627 + }, + { + "epoch": 0.98, + "grad_norm": 17.466912371402895, + "learning_rate": 1.0783358252851021e-05, + "loss": 1.2314, + "step": 81630 + }, + { + "epoch": 0.98, + "grad_norm": 14.078166216049029, + "learning_rate": 1.0782775868639492e-05, + "loss": 1.3101, + "step": 81633 + }, + { + "epoch": 0.98, + "grad_norm": 17.46993321233906, + "learning_rate": 1.078219348175663e-05, + "loss": 1.1538, + "step": 81636 + }, + { + "epoch": 0.98, + "grad_norm": 8.895707125262321, + "learning_rate": 1.0781611092204422e-05, + "loss": 1.6275, + "step": 81639 + }, + { + "epoch": 0.98, + "grad_norm": 24.38576113471228, + "learning_rate": 1.0781028699984859e-05, + "loss": 1.2041, + "step": 81642 + }, + { + "epoch": 0.98, + "grad_norm": 10.19563481524365, + "learning_rate": 1.0780446305099928e-05, + "loss": 1.4506, + "step": 81645 + }, + { + "epoch": 0.98, + "grad_norm": 15.4037747907706, + "learning_rate": 1.077986390755161e-05, + "loss": 0.9526, + "step": 81648 + }, + { + "epoch": 0.98, + "grad_norm": 36.98113791469378, + "learning_rate": 1.0779281507341902e-05, + "loss": 1.4817, + "step": 81651 + }, + { + "epoch": 0.98, + "grad_norm": 98.14571523799685, + "learning_rate": 1.0778699104472782e-05, + "loss": 1.7061, + "step": 81654 + }, + { + "epoch": 0.98, + "grad_norm": 20.137101743363043, + "learning_rate": 1.0778116698946248e-05, + "loss": 1.108, + "step": 81657 + }, + { + "epoch": 0.98, + "grad_norm": 15.324265215957254, + "learning_rate": 1.077753429076428e-05, + "loss": 1.1377, + "step": 81660 + }, + { + "epoch": 0.98, + "grad_norm": 25.078765847829473, + "learning_rate": 1.0776951879928866e-05, + "loss": 1.3089, + "step": 81663 + }, + { + "epoch": 0.98, + "grad_norm": 11.8698187583696, + "learning_rate": 1.0776369466441993e-05, + "loss": 1.1499, + "step": 81666 + }, + { + "epoch": 0.98, + "grad_norm": 38.08804189746525, + "learning_rate": 1.0775787050305655e-05, + "loss": 1.4203, + "step": 81669 + }, + { + "epoch": 0.98, + "grad_norm": 10.505214067683818, + "learning_rate": 1.0775204631521832e-05, + "loss": 1.2456, + "step": 81672 + }, + { + "epoch": 0.98, + "grad_norm": 6.837071183908404, + "learning_rate": 1.0774622210092518e-05, + "loss": 1.1455, + "step": 81675 + }, + { + "epoch": 0.98, + "grad_norm": 10.176468658073555, + "learning_rate": 1.0774039786019696e-05, + "loss": 0.8983, + "step": 81678 + }, + { + "epoch": 0.98, + "grad_norm": 11.386235057600357, + "learning_rate": 1.0773457359305356e-05, + "loss": 1.5881, + "step": 81681 + }, + { + "epoch": 0.98, + "grad_norm": 5.898814636059536, + "learning_rate": 1.0772874929951483e-05, + "loss": 0.9571, + "step": 81684 + }, + { + "epoch": 0.98, + "grad_norm": 26.083975323598555, + "learning_rate": 1.0772292497960066e-05, + "loss": 1.3532, + "step": 81687 + }, + { + "epoch": 0.98, + "grad_norm": 55.462204872443245, + "learning_rate": 1.0771710063333093e-05, + "loss": 1.4973, + "step": 81690 + }, + { + "epoch": 0.98, + "grad_norm": 15.282419048657465, + "learning_rate": 1.0771127626072553e-05, + "loss": 1.3974, + "step": 81693 + }, + { + "epoch": 0.98, + "grad_norm": 19.6207982283104, + "learning_rate": 1.077054518618043e-05, + "loss": 1.2167, + "step": 81696 + }, + { + "epoch": 0.98, + "grad_norm": 21.655402033903986, + "learning_rate": 1.0769962743658717e-05, + "loss": 1.0293, + "step": 81699 + }, + { + "epoch": 0.98, + "grad_norm": 9.780056956264747, + "learning_rate": 1.0769380298509397e-05, + "loss": 1.0219, + "step": 81702 + }, + { + "epoch": 0.98, + "grad_norm": 5.03210943280578, + "learning_rate": 1.0768797850734459e-05, + "loss": 1.4889, + "step": 81705 + }, + { + "epoch": 0.98, + "grad_norm": 15.03998599680377, + "learning_rate": 1.0768215400335891e-05, + "loss": 1.3745, + "step": 81708 + }, + { + "epoch": 0.98, + "grad_norm": 13.128471963538104, + "learning_rate": 1.0767632947315681e-05, + "loss": 1.3306, + "step": 81711 + }, + { + "epoch": 0.98, + "grad_norm": 16.97061422314595, + "learning_rate": 1.0767050491675814e-05, + "loss": 1.2437, + "step": 81714 + }, + { + "epoch": 0.98, + "grad_norm": 28.4029720273342, + "learning_rate": 1.076646803341828e-05, + "loss": 1.5032, + "step": 81717 + }, + { + "epoch": 0.98, + "grad_norm": 20.724240149437485, + "learning_rate": 1.076588557254507e-05, + "loss": 1.3421, + "step": 81720 + }, + { + "epoch": 0.98, + "grad_norm": 5.847293181194464, + "learning_rate": 1.0765303109058167e-05, + "loss": 1.2719, + "step": 81723 + }, + { + "epoch": 0.98, + "grad_norm": 13.133449879412602, + "learning_rate": 1.076472064295956e-05, + "loss": 1.3962, + "step": 81726 + }, + { + "epoch": 0.98, + "grad_norm": 10.902639490614686, + "learning_rate": 1.0764138174251235e-05, + "loss": 1.426, + "step": 81729 + }, + { + "epoch": 0.98, + "grad_norm": 6.269966315843265, + "learning_rate": 1.0763555702935184e-05, + "loss": 1.0515, + "step": 81732 + }, + { + "epoch": 0.98, + "grad_norm": 9.92233795871834, + "learning_rate": 1.0762973229013392e-05, + "loss": 1.5847, + "step": 81735 + }, + { + "epoch": 0.98, + "grad_norm": 16.135086164375068, + "learning_rate": 1.0762390752487846e-05, + "loss": 1.0285, + "step": 81738 + }, + { + "epoch": 0.98, + "grad_norm": 7.547000106890662, + "learning_rate": 1.0761808273360539e-05, + "loss": 1.096, + "step": 81741 + }, + { + "epoch": 0.98, + "grad_norm": 24.022658825299995, + "learning_rate": 1.076122579163345e-05, + "loss": 0.9646, + "step": 81744 + }, + { + "epoch": 0.98, + "grad_norm": 4.65343784332596, + "learning_rate": 1.0760643307308572e-05, + "loss": 1.6468, + "step": 81747 + }, + { + "epoch": 0.98, + "grad_norm": 3.6057618223267114, + "learning_rate": 1.0760060820387892e-05, + "loss": 1.4212, + "step": 81750 + }, + { + "epoch": 0.98, + "grad_norm": 35.67509037758211, + "learning_rate": 1.0759478330873401e-05, + "loss": 1.3282, + "step": 81753 + }, + { + "epoch": 0.98, + "grad_norm": 8.798790789809201, + "learning_rate": 1.0758895838767086e-05, + "loss": 1.4724, + "step": 81756 + }, + { + "epoch": 0.98, + "grad_norm": 4.39723276973049, + "learning_rate": 1.0758313344070925e-05, + "loss": 1.3422, + "step": 81759 + }, + { + "epoch": 0.98, + "grad_norm": 27.104072418033766, + "learning_rate": 1.0757730846786922e-05, + "loss": 1.3159, + "step": 81762 + }, + { + "epoch": 0.98, + "grad_norm": 11.78504242099747, + "learning_rate": 1.0757148346917052e-05, + "loss": 1.1921, + "step": 81765 + }, + { + "epoch": 0.98, + "grad_norm": 4.55764061948976, + "learning_rate": 1.0756565844463309e-05, + "loss": 1.2515, + "step": 81768 + }, + { + "epoch": 0.98, + "grad_norm": 15.593437167066185, + "learning_rate": 1.0755983339427676e-05, + "loss": 1.4417, + "step": 81771 + }, + { + "epoch": 0.98, + "grad_norm": 7.1563174231974225, + "learning_rate": 1.0755400831812148e-05, + "loss": 1.5469, + "step": 81774 + }, + { + "epoch": 0.98, + "grad_norm": 9.178682319134328, + "learning_rate": 1.0754818321618708e-05, + "loss": 1.6695, + "step": 81777 + }, + { + "epoch": 0.98, + "grad_norm": 9.442852380231717, + "learning_rate": 1.0754235808849343e-05, + "loss": 1.3342, + "step": 81780 + }, + { + "epoch": 0.98, + "grad_norm": 9.491419307805552, + "learning_rate": 1.0753653293506047e-05, + "loss": 1.3538, + "step": 81783 + }, + { + "epoch": 0.98, + "grad_norm": 15.019001870187255, + "learning_rate": 1.0753070775590802e-05, + "loss": 1.373, + "step": 81786 + }, + { + "epoch": 0.98, + "grad_norm": 28.78655896516582, + "learning_rate": 1.0752488255105595e-05, + "loss": 1.2444, + "step": 81789 + }, + { + "epoch": 0.98, + "grad_norm": 8.056606929864596, + "learning_rate": 1.0751905732052419e-05, + "loss": 1.2632, + "step": 81792 + }, + { + "epoch": 0.98, + "grad_norm": 3.8110724572644354, + "learning_rate": 1.0751323206433262e-05, + "loss": 1.3653, + "step": 81795 + }, + { + "epoch": 0.98, + "grad_norm": 78.346357494343, + "learning_rate": 1.0750740678250104e-05, + "loss": 1.4509, + "step": 81798 + }, + { + "epoch": 0.98, + "grad_norm": 11.326314147164132, + "learning_rate": 1.075015814750494e-05, + "loss": 1.4827, + "step": 81801 + }, + { + "epoch": 0.98, + "grad_norm": 12.278074367136153, + "learning_rate": 1.074957561419976e-05, + "loss": 0.9971, + "step": 81804 + }, + { + "epoch": 0.98, + "grad_norm": 14.624267732809964, + "learning_rate": 1.0748993078336545e-05, + "loss": 1.1401, + "step": 81807 + }, + { + "epoch": 0.98, + "grad_norm": 5.688457563154303, + "learning_rate": 1.0748410539917289e-05, + "loss": 1.1431, + "step": 81810 + }, + { + "epoch": 0.98, + "grad_norm": 13.4020473162876, + "learning_rate": 1.0747827998943974e-05, + "loss": 1.6958, + "step": 81813 + }, + { + "epoch": 0.98, + "grad_norm": 15.261713713514023, + "learning_rate": 1.0747245455418595e-05, + "loss": 1.1755, + "step": 81816 + }, + { + "epoch": 0.98, + "grad_norm": 12.5732309811781, + "learning_rate": 1.0746662909343135e-05, + "loss": 1.8785, + "step": 81819 + }, + { + "epoch": 0.98, + "grad_norm": 4.537421364655145, + "learning_rate": 1.0746080360719584e-05, + "loss": 1.6037, + "step": 81822 + }, + { + "epoch": 0.98, + "grad_norm": 4.192411191709955, + "learning_rate": 1.0745497809549928e-05, + "loss": 1.2314, + "step": 81825 + }, + { + "epoch": 0.98, + "grad_norm": 6.277870143818559, + "learning_rate": 1.0744915255836156e-05, + "loss": 1.2928, + "step": 81828 + }, + { + "epoch": 0.98, + "grad_norm": 61.1028280425759, + "learning_rate": 1.0744332699580258e-05, + "loss": 1.224, + "step": 81831 + }, + { + "epoch": 0.98, + "grad_norm": 9.316695163700155, + "learning_rate": 1.074375014078422e-05, + "loss": 1.1102, + "step": 81834 + }, + { + "epoch": 0.98, + "grad_norm": 11.140169275850905, + "learning_rate": 1.0743167579450034e-05, + "loss": 1.3195, + "step": 81837 + }, + { + "epoch": 0.98, + "grad_norm": 12.314409106592981, + "learning_rate": 1.074258501557968e-05, + "loss": 0.9132, + "step": 81840 + }, + { + "epoch": 0.98, + "grad_norm": 11.592573350405111, + "learning_rate": 1.0742002449175153e-05, + "loss": 1.2296, + "step": 81843 + }, + { + "epoch": 0.98, + "grad_norm": 2.5767722918916, + "learning_rate": 1.0741419880238437e-05, + "loss": 1.0916, + "step": 81846 + }, + { + "epoch": 0.98, + "grad_norm": 29.230849208061617, + "learning_rate": 1.0740837308771526e-05, + "loss": 1.3203, + "step": 81849 + }, + { + "epoch": 0.98, + "grad_norm": 10.965585213748476, + "learning_rate": 1.0740254734776401e-05, + "loss": 1.0958, + "step": 81852 + }, + { + "epoch": 0.98, + "grad_norm": 3.611236950958945, + "learning_rate": 1.0739672158255052e-05, + "loss": 1.3577, + "step": 81855 + }, + { + "epoch": 0.98, + "grad_norm": 8.493604672827367, + "learning_rate": 1.0739089579209471e-05, + "loss": 1.1611, + "step": 81858 + }, + { + "epoch": 0.98, + "grad_norm": 5.091855336519364, + "learning_rate": 1.0738506997641641e-05, + "loss": 1.8469, + "step": 81861 + }, + { + "epoch": 0.98, + "grad_norm": 7.3592761731181975, + "learning_rate": 1.0737924413553555e-05, + "loss": 1.4415, + "step": 81864 + }, + { + "epoch": 0.98, + "grad_norm": 13.701860933939216, + "learning_rate": 1.07373418269472e-05, + "loss": 1.3404, + "step": 81867 + }, + { + "epoch": 0.98, + "grad_norm": 16.800794010084356, + "learning_rate": 1.073675923782456e-05, + "loss": 1.3665, + "step": 81870 + }, + { + "epoch": 0.98, + "grad_norm": 20.498351296926277, + "learning_rate": 1.0736176646187629e-05, + "loss": 1.624, + "step": 81873 + }, + { + "epoch": 0.98, + "grad_norm": 4.804938570495952, + "learning_rate": 1.073559405203839e-05, + "loss": 1.2798, + "step": 81876 + }, + { + "epoch": 0.98, + "grad_norm": 11.393959891549983, + "learning_rate": 1.0735011455378834e-05, + "loss": 1.2868, + "step": 81879 + }, + { + "epoch": 0.98, + "grad_norm": 7.199210459516859, + "learning_rate": 1.0734428856210947e-05, + "loss": 1.3731, + "step": 81882 + }, + { + "epoch": 0.98, + "grad_norm": 8.090492097140274, + "learning_rate": 1.0733846254536722e-05, + "loss": 1.2477, + "step": 81885 + }, + { + "epoch": 0.98, + "grad_norm": 24.749164127007806, + "learning_rate": 1.073326365035814e-05, + "loss": 1.0381, + "step": 81888 + }, + { + "epoch": 0.98, + "grad_norm": 27.48110812546877, + "learning_rate": 1.0732681043677197e-05, + "loss": 1.3236, + "step": 81891 + }, + { + "epoch": 0.98, + "grad_norm": 9.499778402545275, + "learning_rate": 1.0732098434495875e-05, + "loss": 0.9047, + "step": 81894 + }, + { + "epoch": 0.98, + "grad_norm": 10.187402292393678, + "learning_rate": 1.0731515822816167e-05, + "loss": 1.1023, + "step": 81897 + }, + { + "epoch": 0.98, + "grad_norm": 10.917955889514428, + "learning_rate": 1.0730933208640059e-05, + "loss": 1.1331, + "step": 81900 + }, + { + "epoch": 0.98, + "grad_norm": 39.33404671270456, + "learning_rate": 1.0730350591969539e-05, + "loss": 1.4332, + "step": 81903 + }, + { + "epoch": 0.98, + "grad_norm": 25.9866396527391, + "learning_rate": 1.0729767972806593e-05, + "loss": 1.3223, + "step": 81906 + }, + { + "epoch": 0.98, + "grad_norm": 9.757196063145562, + "learning_rate": 1.072918535115321e-05, + "loss": 1.2335, + "step": 81909 + }, + { + "epoch": 0.98, + "grad_norm": 7.7812998703331315, + "learning_rate": 1.0728602727011389e-05, + "loss": 1.3273, + "step": 81912 + }, + { + "epoch": 0.99, + "grad_norm": 4.990729075250919, + "learning_rate": 1.07280201003831e-05, + "loss": 1.2253, + "step": 81915 + }, + { + "epoch": 0.99, + "grad_norm": 9.58083570003906, + "learning_rate": 1.0727437471270346e-05, + "loss": 1.3594, + "step": 81918 + }, + { + "epoch": 0.99, + "grad_norm": 5.755527953398171, + "learning_rate": 1.0726854839675109e-05, + "loss": 1.4879, + "step": 81921 + }, + { + "epoch": 0.99, + "grad_norm": 5.941730866902871, + "learning_rate": 1.072627220559938e-05, + "loss": 1.5498, + "step": 81924 + }, + { + "epoch": 0.99, + "grad_norm": 10.641844587412763, + "learning_rate": 1.0725689569045141e-05, + "loss": 1.432, + "step": 81927 + }, + { + "epoch": 0.99, + "grad_norm": 26.173132910041364, + "learning_rate": 1.0725106930014386e-05, + "loss": 1.6063, + "step": 81930 + }, + { + "epoch": 0.99, + "grad_norm": 8.617787477536021, + "learning_rate": 1.0724524288509104e-05, + "loss": 1.1412, + "step": 81933 + }, + { + "epoch": 0.99, + "grad_norm": 32.90922299360931, + "learning_rate": 1.072394164453128e-05, + "loss": 1.1527, + "step": 81936 + }, + { + "epoch": 0.99, + "grad_norm": 2.8739175281040414, + "learning_rate": 1.0723358998082906e-05, + "loss": 1.5851, + "step": 81939 + }, + { + "epoch": 0.99, + "grad_norm": 10.86457241832795, + "learning_rate": 1.0722776349165966e-05, + "loss": 0.9964, + "step": 81942 + }, + { + "epoch": 0.99, + "grad_norm": 37.690436297297225, + "learning_rate": 1.0722193697782455e-05, + "loss": 1.3668, + "step": 81945 + }, + { + "epoch": 0.99, + "grad_norm": 21.817795504090963, + "learning_rate": 1.072161104393435e-05, + "loss": 1.2107, + "step": 81948 + }, + { + "epoch": 0.99, + "grad_norm": 5.931889462077331, + "learning_rate": 1.072102838762365e-05, + "loss": 1.3366, + "step": 81951 + }, + { + "epoch": 0.99, + "grad_norm": 8.264232217285798, + "learning_rate": 1.0720445728852342e-05, + "loss": 1.3992, + "step": 81954 + }, + { + "epoch": 0.99, + "grad_norm": 4.459695330442133, + "learning_rate": 1.0719863067622412e-05, + "loss": 1.4756, + "step": 81957 + }, + { + "epoch": 0.99, + "grad_norm": 4.757028666015242, + "learning_rate": 1.0719280403935846e-05, + "loss": 1.0976, + "step": 81960 + }, + { + "epoch": 0.99, + "grad_norm": 30.73512370124467, + "learning_rate": 1.0718697737794636e-05, + "loss": 1.0796, + "step": 81963 + }, + { + "epoch": 0.99, + "grad_norm": 11.916427073998605, + "learning_rate": 1.071811506920077e-05, + "loss": 1.2391, + "step": 81966 + }, + { + "epoch": 0.99, + "grad_norm": 11.667950428020985, + "learning_rate": 1.0717532398156234e-05, + "loss": 1.0162, + "step": 81969 + }, + { + "epoch": 0.99, + "grad_norm": 10.109830792821983, + "learning_rate": 1.0716949724663019e-05, + "loss": 1.2255, + "step": 81972 + }, + { + "epoch": 0.99, + "grad_norm": 6.085190531552813, + "learning_rate": 1.0716367048723115e-05, + "loss": 1.3776, + "step": 81975 + }, + { + "epoch": 0.99, + "grad_norm": 9.786572643734878, + "learning_rate": 1.071578437033851e-05, + "loss": 1.602, + "step": 81978 + }, + { + "epoch": 0.99, + "grad_norm": 3.0370167240304697, + "learning_rate": 1.0715201689511184e-05, + "loss": 1.1675, + "step": 81981 + }, + { + "epoch": 0.99, + "grad_norm": 11.248899834370246, + "learning_rate": 1.0714619006243137e-05, + "loss": 1.2179, + "step": 81984 + }, + { + "epoch": 0.99, + "grad_norm": 6.158410837990407, + "learning_rate": 1.0714036320536352e-05, + "loss": 1.4566, + "step": 81987 + }, + { + "epoch": 0.99, + "grad_norm": 8.164325961195996, + "learning_rate": 1.0713453632392816e-05, + "loss": 1.2018, + "step": 81990 + }, + { + "epoch": 0.99, + "grad_norm": 19.306805186579243, + "learning_rate": 1.0712870941814525e-05, + "loss": 1.4021, + "step": 81993 + }, + { + "epoch": 0.99, + "grad_norm": 33.261601954167546, + "learning_rate": 1.0712288248803458e-05, + "loss": 1.1584, + "step": 81996 + }, + { + "epoch": 0.99, + "grad_norm": 10.858586249303485, + "learning_rate": 1.0711705553361608e-05, + "loss": 1.0745, + "step": 81999 + }, + { + "epoch": 0.99, + "grad_norm": 9.689283709590496, + "learning_rate": 1.0711122855490963e-05, + "loss": 1.239, + "step": 82002 + }, + { + "epoch": 0.99, + "grad_norm": 30.301954390217798, + "learning_rate": 1.0710540155193513e-05, + "loss": 1.225, + "step": 82005 + }, + { + "epoch": 0.99, + "grad_norm": 7.517086798041868, + "learning_rate": 1.0709957452471246e-05, + "loss": 1.6112, + "step": 82008 + }, + { + "epoch": 0.99, + "grad_norm": 11.97308776212428, + "learning_rate": 1.0709374747326151e-05, + "loss": 1.06, + "step": 82011 + }, + { + "epoch": 0.99, + "grad_norm": 12.29223222875128, + "learning_rate": 1.0708792039760212e-05, + "loss": 1.2381, + "step": 82014 + }, + { + "epoch": 0.99, + "grad_norm": 8.972046219405655, + "learning_rate": 1.0708209329775424e-05, + "loss": 1.1403, + "step": 82017 + }, + { + "epoch": 0.99, + "grad_norm": 7.691991859372429, + "learning_rate": 1.0707626617373772e-05, + "loss": 1.054, + "step": 82020 + }, + { + "epoch": 0.99, + "grad_norm": 6.646668558225121, + "learning_rate": 1.0707043902557241e-05, + "loss": 1.2785, + "step": 82023 + }, + { + "epoch": 0.99, + "grad_norm": 9.23789280647386, + "learning_rate": 1.070646118532783e-05, + "loss": 1.1941, + "step": 82026 + }, + { + "epoch": 0.99, + "grad_norm": 6.903145362131037, + "learning_rate": 1.0705878465687518e-05, + "loss": 1.0471, + "step": 82029 + }, + { + "epoch": 0.99, + "grad_norm": 8.862224018934432, + "learning_rate": 1.07052957436383e-05, + "loss": 1.3109, + "step": 82032 + }, + { + "epoch": 0.99, + "grad_norm": 20.365909918679076, + "learning_rate": 1.0704713019182159e-05, + "loss": 1.2213, + "step": 82035 + }, + { + "epoch": 0.99, + "grad_norm": 8.732392631334372, + "learning_rate": 1.0704130292321087e-05, + "loss": 1.4361, + "step": 82038 + }, + { + "epoch": 0.99, + "grad_norm": 12.915230869713312, + "learning_rate": 1.0703547563057071e-05, + "loss": 1.2408, + "step": 82041 + }, + { + "epoch": 0.99, + "grad_norm": 7.611178293960525, + "learning_rate": 1.07029648313921e-05, + "loss": 1.5699, + "step": 82044 + }, + { + "epoch": 0.99, + "grad_norm": 15.41415987929577, + "learning_rate": 1.0702382097328165e-05, + "loss": 1.5077, + "step": 82047 + }, + { + "epoch": 0.99, + "grad_norm": 8.549132915551352, + "learning_rate": 1.0701799360867252e-05, + "loss": 1.0632, + "step": 82050 + }, + { + "epoch": 0.99, + "grad_norm": 50.40017295618396, + "learning_rate": 1.0701216622011354e-05, + "loss": 1.3951, + "step": 82053 + }, + { + "epoch": 0.99, + "grad_norm": 6.120877277290232, + "learning_rate": 1.0700633880762452e-05, + "loss": 1.3747, + "step": 82056 + }, + { + "epoch": 0.99, + "grad_norm": 10.85676661802377, + "learning_rate": 1.070005113712254e-05, + "loss": 1.5374, + "step": 82059 + }, + { + "epoch": 0.99, + "grad_norm": 30.44497675723126, + "learning_rate": 1.0699468391093606e-05, + "loss": 1.1156, + "step": 82062 + }, + { + "epoch": 0.99, + "grad_norm": 12.798902711072838, + "learning_rate": 1.0698885642677639e-05, + "loss": 0.8759, + "step": 82065 + }, + { + "epoch": 0.99, + "grad_norm": 13.396034931266506, + "learning_rate": 1.0698302891876626e-05, + "loss": 1.4711, + "step": 82068 + }, + { + "epoch": 0.99, + "grad_norm": 4.92421687287079, + "learning_rate": 1.0697720138692557e-05, + "loss": 1.3167, + "step": 82071 + }, + { + "epoch": 0.99, + "grad_norm": 21.934154359336052, + "learning_rate": 1.0697137383127423e-05, + "loss": 1.5535, + "step": 82074 + }, + { + "epoch": 0.99, + "grad_norm": 12.348076316300155, + "learning_rate": 1.0696554625183205e-05, + "loss": 1.0424, + "step": 82077 + }, + { + "epoch": 0.99, + "grad_norm": 15.981837530640822, + "learning_rate": 1.0695971864861901e-05, + "loss": 1.5541, + "step": 82080 + }, + { + "epoch": 0.99, + "grad_norm": 11.830942620247251, + "learning_rate": 1.0695389102165495e-05, + "loss": 1.2231, + "step": 82083 + }, + { + "epoch": 0.99, + "grad_norm": 3.3687913196780452, + "learning_rate": 1.0694806337095975e-05, + "loss": 1.4004, + "step": 82086 + }, + { + "epoch": 0.99, + "grad_norm": 13.302525542931953, + "learning_rate": 1.069422356965533e-05, + "loss": 1.3716, + "step": 82089 + }, + { + "epoch": 0.99, + "grad_norm": 13.485616697326385, + "learning_rate": 1.0693640799845555e-05, + "loss": 1.1812, + "step": 82092 + }, + { + "epoch": 0.99, + "grad_norm": 12.760729974911392, + "learning_rate": 1.069305802766863e-05, + "loss": 1.5178, + "step": 82095 + }, + { + "epoch": 0.99, + "grad_norm": 12.441075854023135, + "learning_rate": 1.0692475253126548e-05, + "loss": 1.6855, + "step": 82098 + }, + { + "epoch": 0.99, + "grad_norm": 17.31694718091119, + "learning_rate": 1.06918924762213e-05, + "loss": 1.3694, + "step": 82101 + }, + { + "epoch": 0.99, + "grad_norm": 11.965700273745894, + "learning_rate": 1.069130969695487e-05, + "loss": 1.1433, + "step": 82104 + }, + { + "epoch": 0.99, + "grad_norm": 7.81775953075285, + "learning_rate": 1.069072691532925e-05, + "loss": 1.3163, + "step": 82107 + }, + { + "epoch": 0.99, + "grad_norm": 12.526567572376607, + "learning_rate": 1.0690144131346424e-05, + "loss": 1.3249, + "step": 82110 + }, + { + "epoch": 0.99, + "grad_norm": 16.86844446862535, + "learning_rate": 1.068956134500839e-05, + "loss": 1.3113, + "step": 82113 + }, + { + "epoch": 0.99, + "grad_norm": 14.646508224814571, + "learning_rate": 1.0688978556317128e-05, + "loss": 1.2494, + "step": 82116 + }, + { + "epoch": 0.99, + "grad_norm": 21.07982159400534, + "learning_rate": 1.0688395765274632e-05, + "loss": 1.7374, + "step": 82119 + }, + { + "epoch": 0.99, + "grad_norm": 8.747298473643967, + "learning_rate": 1.0687812971882889e-05, + "loss": 0.9525, + "step": 82122 + }, + { + "epoch": 0.99, + "grad_norm": 4.564765439926206, + "learning_rate": 1.0687230176143888e-05, + "loss": 1.3298, + "step": 82125 + }, + { + "epoch": 0.99, + "grad_norm": 5.706750829292893, + "learning_rate": 1.0686647378059618e-05, + "loss": 1.3863, + "step": 82128 + }, + { + "epoch": 0.99, + "grad_norm": 8.905141548334951, + "learning_rate": 1.0686064577632068e-05, + "loss": 1.3542, + "step": 82131 + }, + { + "epoch": 0.99, + "grad_norm": 10.264772860226637, + "learning_rate": 1.0685481774863224e-05, + "loss": 1.3588, + "step": 82134 + }, + { + "epoch": 0.99, + "grad_norm": 12.822293638786187, + "learning_rate": 1.0684898969755082e-05, + "loss": 1.1901, + "step": 82137 + }, + { + "epoch": 0.99, + "grad_norm": 19.51370318903196, + "learning_rate": 1.0684316162309625e-05, + "loss": 1.1321, + "step": 82140 + }, + { + "epoch": 0.99, + "grad_norm": 5.991147881119902, + "learning_rate": 1.0683733352528842e-05, + "loss": 1.3316, + "step": 82143 + }, + { + "epoch": 0.99, + "grad_norm": 5.907111408236527, + "learning_rate": 1.0683150540414724e-05, + "loss": 1.1588, + "step": 82146 + }, + { + "epoch": 0.99, + "grad_norm": 6.722566841621678, + "learning_rate": 1.0682567725969259e-05, + "loss": 1.0606, + "step": 82149 + }, + { + "epoch": 0.99, + "grad_norm": 20.546588961535026, + "learning_rate": 1.0681984909194435e-05, + "loss": 1.4962, + "step": 82152 + }, + { + "epoch": 0.99, + "grad_norm": 38.92076834715488, + "learning_rate": 1.0681402090092245e-05, + "loss": 1.4313, + "step": 82155 + }, + { + "epoch": 0.99, + "grad_norm": 27.092603249923272, + "learning_rate": 1.0680819268664675e-05, + "loss": 1.3989, + "step": 82158 + }, + { + "epoch": 0.99, + "grad_norm": 18.58403856622269, + "learning_rate": 1.0680236444913713e-05, + "loss": 1.5431, + "step": 82161 + }, + { + "epoch": 0.99, + "grad_norm": 16.774234639057468, + "learning_rate": 1.0679653618841348e-05, + "loss": 1.4385, + "step": 82164 + }, + { + "epoch": 0.99, + "grad_norm": 3.0544721243896955, + "learning_rate": 1.0679070790449573e-05, + "loss": 1.1395, + "step": 82167 + }, + { + "epoch": 0.99, + "grad_norm": 15.619500571720174, + "learning_rate": 1.0678487959740373e-05, + "loss": 1.2101, + "step": 82170 + }, + { + "epoch": 0.99, + "grad_norm": 13.171756700806744, + "learning_rate": 1.0677905126715738e-05, + "loss": 1.134, + "step": 82173 + }, + { + "epoch": 0.99, + "grad_norm": 3.752521355281665, + "learning_rate": 1.0677322291377654e-05, + "loss": 1.2556, + "step": 82176 + }, + { + "epoch": 0.99, + "grad_norm": 7.676459163328011, + "learning_rate": 1.0676739453728115e-05, + "loss": 1.2434, + "step": 82179 + }, + { + "epoch": 0.99, + "grad_norm": 16.071631839082166, + "learning_rate": 1.0676156613769109e-05, + "loss": 1.6611, + "step": 82182 + }, + { + "epoch": 0.99, + "grad_norm": 9.63124068007226, + "learning_rate": 1.067557377150262e-05, + "loss": 1.2655, + "step": 82185 + }, + { + "epoch": 0.99, + "grad_norm": 15.953930887477924, + "learning_rate": 1.0674990926930646e-05, + "loss": 1.1146, + "step": 82188 + }, + { + "epoch": 0.99, + "grad_norm": 288.9358701739878, + "learning_rate": 1.067440808005517e-05, + "loss": 1.3594, + "step": 82191 + }, + { + "epoch": 0.99, + "grad_norm": 13.257465843517977, + "learning_rate": 1.0673825230878184e-05, + "loss": 1.1216, + "step": 82194 + }, + { + "epoch": 0.99, + "grad_norm": 7.474943663788322, + "learning_rate": 1.067324237940167e-05, + "loss": 1.2335, + "step": 82197 + }, + { + "epoch": 0.99, + "grad_norm": 8.774500805560457, + "learning_rate": 1.0672659525627627e-05, + "loss": 1.2013, + "step": 82200 + }, + { + "epoch": 0.99, + "grad_norm": 19.302578941116348, + "learning_rate": 1.0672076669558035e-05, + "loss": 1.478, + "step": 82203 + }, + { + "epoch": 0.99, + "grad_norm": 8.215398743754763, + "learning_rate": 1.0671493811194889e-05, + "loss": 1.3801, + "step": 82206 + }, + { + "epoch": 0.99, + "grad_norm": 18.52831648972554, + "learning_rate": 1.0670910950540176e-05, + "loss": 1.3936, + "step": 82209 + }, + { + "epoch": 0.99, + "grad_norm": 14.191488124392817, + "learning_rate": 1.0670328087595889e-05, + "loss": 1.2883, + "step": 82212 + }, + { + "epoch": 0.99, + "grad_norm": 11.660823869179326, + "learning_rate": 1.0669745222364011e-05, + "loss": 1.6866, + "step": 82215 + }, + { + "epoch": 0.99, + "grad_norm": 7.14068891489138, + "learning_rate": 1.0669162354846534e-05, + "loss": 0.8948, + "step": 82218 + }, + { + "epoch": 0.99, + "grad_norm": 8.213346159046303, + "learning_rate": 1.0668579485045447e-05, + "loss": 1.6601, + "step": 82221 + }, + { + "epoch": 0.99, + "grad_norm": 2.1573159784779445, + "learning_rate": 1.0667996612962739e-05, + "loss": 1.0893, + "step": 82224 + }, + { + "epoch": 0.99, + "grad_norm": 34.15846985514783, + "learning_rate": 1.06674137386004e-05, + "loss": 1.3542, + "step": 82227 + }, + { + "epoch": 0.99, + "grad_norm": 10.18038460387223, + "learning_rate": 1.0666830861960415e-05, + "loss": 1.4638, + "step": 82230 + }, + { + "epoch": 0.99, + "grad_norm": 4.663432870041932, + "learning_rate": 1.0666247983044782e-05, + "loss": 1.4178, + "step": 82233 + }, + { + "epoch": 0.99, + "grad_norm": 14.837181725693954, + "learning_rate": 1.0665665101855483e-05, + "loss": 1.4008, + "step": 82236 + }, + { + "epoch": 0.99, + "grad_norm": 28.791384394432928, + "learning_rate": 1.0665082218394504e-05, + "loss": 1.6179, + "step": 82239 + }, + { + "epoch": 0.99, + "grad_norm": 11.511478397050292, + "learning_rate": 1.0664499332663842e-05, + "loss": 1.1225, + "step": 82242 + }, + { + "epoch": 0.99, + "grad_norm": 11.831703250748001, + "learning_rate": 1.0663916444665485e-05, + "loss": 0.8895, + "step": 82245 + }, + { + "epoch": 0.99, + "grad_norm": 5.186897043592093, + "learning_rate": 1.0663333554401421e-05, + "loss": 1.5962, + "step": 82248 + }, + { + "epoch": 0.99, + "grad_norm": 19.093210656839062, + "learning_rate": 1.0662750661873633e-05, + "loss": 1.7365, + "step": 82251 + }, + { + "epoch": 0.99, + "grad_norm": 25.133818922764924, + "learning_rate": 1.0662167767084122e-05, + "loss": 1.844, + "step": 82254 + }, + { + "epoch": 0.99, + "grad_norm": 6.770030056035524, + "learning_rate": 1.0661584870034867e-05, + "loss": 1.2788, + "step": 82257 + }, + { + "epoch": 0.99, + "grad_norm": 29.656606378143852, + "learning_rate": 1.066100197072786e-05, + "loss": 1.4195, + "step": 82260 + }, + { + "epoch": 0.99, + "grad_norm": 9.014139592338362, + "learning_rate": 1.0660419069165096e-05, + "loss": 1.4939, + "step": 82263 + }, + { + "epoch": 0.99, + "grad_norm": 7.635021856706855, + "learning_rate": 1.0659836165348556e-05, + "loss": 1.3144, + "step": 82266 + }, + { + "epoch": 0.99, + "grad_norm": 3.149674363712058, + "learning_rate": 1.0659253259280235e-05, + "loss": 1.5354, + "step": 82269 + }, + { + "epoch": 0.99, + "grad_norm": 9.086594773352502, + "learning_rate": 1.0658670350962116e-05, + "loss": 1.3069, + "step": 82272 + }, + { + "epoch": 0.99, + "grad_norm": 11.98950759320211, + "learning_rate": 1.0658087440396199e-05, + "loss": 0.8616, + "step": 82275 + }, + { + "epoch": 0.99, + "grad_norm": 6.888325117181336, + "learning_rate": 1.0657504527584461e-05, + "loss": 1.2787, + "step": 82278 + }, + { + "epoch": 0.99, + "grad_norm": 9.029838838157309, + "learning_rate": 1.0656921612528901e-05, + "loss": 1.0819, + "step": 82281 + }, + { + "epoch": 0.99, + "grad_norm": 11.632438494488062, + "learning_rate": 1.0656338695231501e-05, + "loss": 1.3734, + "step": 82284 + }, + { + "epoch": 0.99, + "grad_norm": 11.037671937978487, + "learning_rate": 1.0655755775694254e-05, + "loss": 1.4044, + "step": 82287 + }, + { + "epoch": 0.99, + "grad_norm": 12.838537373402696, + "learning_rate": 1.0655172853919148e-05, + "loss": 1.518, + "step": 82290 + }, + { + "epoch": 0.99, + "grad_norm": 14.301029150746698, + "learning_rate": 1.0654589929908175e-05, + "loss": 1.2934, + "step": 82293 + }, + { + "epoch": 0.99, + "grad_norm": 20.229489439432133, + "learning_rate": 1.0654007003663325e-05, + "loss": 1.2582, + "step": 82296 + }, + { + "epoch": 0.99, + "grad_norm": 8.82065103518507, + "learning_rate": 1.0653424075186577e-05, + "loss": 1.3782, + "step": 82299 + }, + { + "epoch": 0.99, + "grad_norm": 14.915596803278248, + "learning_rate": 1.0652841144479936e-05, + "loss": 1.3522, + "step": 82302 + }, + { + "epoch": 0.99, + "grad_norm": 54.36956649799725, + "learning_rate": 1.0652258211545377e-05, + "loss": 1.3214, + "step": 82305 + }, + { + "epoch": 0.99, + "grad_norm": 10.495549255959373, + "learning_rate": 1.06516752763849e-05, + "loss": 1.1175, + "step": 82308 + }, + { + "epoch": 0.99, + "grad_norm": 4.014748362445671, + "learning_rate": 1.0651092339000487e-05, + "loss": 0.9303, + "step": 82311 + }, + { + "epoch": 0.99, + "grad_norm": 3.873205497009608, + "learning_rate": 1.0650509399394131e-05, + "loss": 1.4022, + "step": 82314 + }, + { + "epoch": 0.99, + "grad_norm": 10.114855143553996, + "learning_rate": 1.0649926457567822e-05, + "loss": 1.5736, + "step": 82317 + }, + { + "epoch": 0.99, + "grad_norm": 3.1622880821240824, + "learning_rate": 1.0649343513523547e-05, + "loss": 1.221, + "step": 82320 + }, + { + "epoch": 0.99, + "grad_norm": 5.244989910030433, + "learning_rate": 1.0648760567263299e-05, + "loss": 1.4344, + "step": 82323 + }, + { + "epoch": 0.99, + "grad_norm": 3.8300484081912067, + "learning_rate": 1.064817761878906e-05, + "loss": 1.4197, + "step": 82326 + }, + { + "epoch": 0.99, + "grad_norm": 4.321688934170245, + "learning_rate": 1.0647594668102829e-05, + "loss": 1.6419, + "step": 82329 + }, + { + "epoch": 0.99, + "grad_norm": 5.160335376489899, + "learning_rate": 1.0647011715206587e-05, + "loss": 1.0793, + "step": 82332 + }, + { + "epoch": 0.99, + "grad_norm": 5.401150214854103, + "learning_rate": 1.064642876010233e-05, + "loss": 1.0475, + "step": 82335 + }, + { + "epoch": 0.99, + "grad_norm": 28.977119931491032, + "learning_rate": 1.0645845802792046e-05, + "loss": 1.5178, + "step": 82338 + }, + { + "epoch": 0.99, + "grad_norm": 10.691835816747366, + "learning_rate": 1.0645262843277717e-05, + "loss": 1.3159, + "step": 82341 + }, + { + "epoch": 0.99, + "grad_norm": 33.28476447708441, + "learning_rate": 1.0644679881561342e-05, + "loss": 1.3238, + "step": 82344 + }, + { + "epoch": 0.99, + "grad_norm": 46.81566603485137, + "learning_rate": 1.0644096917644906e-05, + "loss": 1.4036, + "step": 82347 + }, + { + "epoch": 0.99, + "grad_norm": 14.555150389068395, + "learning_rate": 1.0643513951530402e-05, + "loss": 1.0683, + "step": 82350 + }, + { + "epoch": 0.99, + "grad_norm": 38.170129089945675, + "learning_rate": 1.0642930983219812e-05, + "loss": 1.3774, + "step": 82353 + }, + { + "epoch": 0.99, + "grad_norm": 24.50282258362453, + "learning_rate": 1.0642348012715133e-05, + "loss": 1.6125, + "step": 82356 + }, + { + "epoch": 0.99, + "grad_norm": 33.86018316520157, + "learning_rate": 1.0641765040018352e-05, + "loss": 1.3633, + "step": 82359 + }, + { + "epoch": 0.99, + "grad_norm": 29.787708325899274, + "learning_rate": 1.0641182065131456e-05, + "loss": 1.4352, + "step": 82362 + }, + { + "epoch": 0.99, + "grad_norm": 21.995270614155473, + "learning_rate": 1.0640599088056437e-05, + "loss": 1.504, + "step": 82365 + }, + { + "epoch": 0.99, + "grad_norm": 3.390609289579308, + "learning_rate": 1.0640016108795284e-05, + "loss": 1.2183, + "step": 82368 + }, + { + "epoch": 0.99, + "grad_norm": 7.642608524840516, + "learning_rate": 1.063943312734999e-05, + "loss": 1.7802, + "step": 82371 + }, + { + "epoch": 0.99, + "grad_norm": 7.689935986696429, + "learning_rate": 1.0638850143722537e-05, + "loss": 1.31, + "step": 82374 + }, + { + "epoch": 0.99, + "grad_norm": 2.9754420564743156, + "learning_rate": 1.0638267157914918e-05, + "loss": 1.1631, + "step": 82377 + }, + { + "epoch": 0.99, + "grad_norm": 21.380869942631012, + "learning_rate": 1.0637684169929129e-05, + "loss": 1.4222, + "step": 82380 + }, + { + "epoch": 0.99, + "grad_norm": 19.596712834792918, + "learning_rate": 1.0637101179767149e-05, + "loss": 1.2083, + "step": 82383 + }, + { + "epoch": 0.99, + "grad_norm": 2.744693792200793, + "learning_rate": 1.0636518187430971e-05, + "loss": 1.5662, + "step": 82386 + }, + { + "epoch": 0.99, + "grad_norm": 7.30596052759995, + "learning_rate": 1.0635935192922586e-05, + "loss": 1.5556, + "step": 82389 + }, + { + "epoch": 0.99, + "grad_norm": 4.524147661658081, + "learning_rate": 1.0635352196243987e-05, + "loss": 1.5156, + "step": 82392 + }, + { + "epoch": 0.99, + "grad_norm": 35.010306778873876, + "learning_rate": 1.0634769197397154e-05, + "loss": 1.4503, + "step": 82395 + }, + { + "epoch": 0.99, + "grad_norm": 11.31670657385857, + "learning_rate": 1.0634186196384088e-05, + "loss": 1.3081, + "step": 82398 + }, + { + "epoch": 0.99, + "grad_norm": 4.0403456510602425, + "learning_rate": 1.063360319320677e-05, + "loss": 1.5125, + "step": 82401 + }, + { + "epoch": 0.99, + "grad_norm": 11.09683941515318, + "learning_rate": 1.0633020187867196e-05, + "loss": 1.2084, + "step": 82404 + }, + { + "epoch": 0.99, + "grad_norm": 12.32945738483692, + "learning_rate": 1.0632437180367348e-05, + "loss": 1.326, + "step": 82407 + }, + { + "epoch": 0.99, + "grad_norm": 13.765511149344949, + "learning_rate": 1.0631854170709221e-05, + "loss": 1.7187, + "step": 82410 + }, + { + "epoch": 0.99, + "grad_norm": 8.271561979840447, + "learning_rate": 1.0631271158894806e-05, + "loss": 1.2425, + "step": 82413 + }, + { + "epoch": 0.99, + "grad_norm": 6.86763027468246, + "learning_rate": 1.0630688144926084e-05, + "loss": 1.4542, + "step": 82416 + }, + { + "epoch": 0.99, + "grad_norm": 7.1888319652957335, + "learning_rate": 1.0630105128805055e-05, + "loss": 1.5597, + "step": 82419 + }, + { + "epoch": 0.99, + "grad_norm": 17.416065644039115, + "learning_rate": 1.06295221105337e-05, + "loss": 1.3679, + "step": 82422 + }, + { + "epoch": 0.99, + "grad_norm": 12.415269366519974, + "learning_rate": 1.0628939090114018e-05, + "loss": 1.347, + "step": 82425 + }, + { + "epoch": 0.99, + "grad_norm": 4.213636009699072, + "learning_rate": 1.0628356067547992e-05, + "loss": 1.1773, + "step": 82428 + }, + { + "epoch": 0.99, + "grad_norm": 5.490626433890228, + "learning_rate": 1.0627773042837612e-05, + "loss": 1.442, + "step": 82431 + }, + { + "epoch": 0.99, + "grad_norm": 13.155052670588507, + "learning_rate": 1.062719001598487e-05, + "loss": 1.9315, + "step": 82434 + }, + { + "epoch": 0.99, + "grad_norm": 29.645946902002454, + "learning_rate": 1.0626606986991754e-05, + "loss": 1.3393, + "step": 82437 + }, + { + "epoch": 0.99, + "grad_norm": 11.593014209040163, + "learning_rate": 1.0626023955860251e-05, + "loss": 1.2116, + "step": 82440 + }, + { + "epoch": 0.99, + "grad_norm": 8.283815812002647, + "learning_rate": 1.0625440922592358e-05, + "loss": 1.2148, + "step": 82443 + }, + { + "epoch": 0.99, + "grad_norm": 3.319696843363533, + "learning_rate": 1.0624857887190059e-05, + "loss": 1.2111, + "step": 82446 + }, + { + "epoch": 0.99, + "grad_norm": 12.3424100329996, + "learning_rate": 1.0624274849655342e-05, + "loss": 1.1675, + "step": 82449 + }, + { + "epoch": 0.99, + "grad_norm": 9.529609342196533, + "learning_rate": 1.0623691809990204e-05, + "loss": 1.1917, + "step": 82452 + }, + { + "epoch": 0.99, + "grad_norm": 7.460118387528809, + "learning_rate": 1.0623108768196628e-05, + "loss": 1.1404, + "step": 82455 + }, + { + "epoch": 0.99, + "grad_norm": 9.830131327982736, + "learning_rate": 1.062252572427661e-05, + "loss": 1.5681, + "step": 82458 + }, + { + "epoch": 0.99, + "grad_norm": 20.487307973820823, + "learning_rate": 1.0621942678232131e-05, + "loss": 1.6107, + "step": 82461 + }, + { + "epoch": 0.99, + "grad_norm": 52.683955986307495, + "learning_rate": 1.062135963006519e-05, + "loss": 1.3243, + "step": 82464 + }, + { + "epoch": 0.99, + "grad_norm": 2.927733199086251, + "learning_rate": 1.062077657977777e-05, + "loss": 1.086, + "step": 82467 + }, + { + "epoch": 0.99, + "grad_norm": 12.253547073067837, + "learning_rate": 1.0620193527371862e-05, + "loss": 1.3275, + "step": 82470 + }, + { + "epoch": 0.99, + "grad_norm": 21.88168770347239, + "learning_rate": 1.0619610472849458e-05, + "loss": 1.1325, + "step": 82473 + }, + { + "epoch": 0.99, + "grad_norm": 13.243363197877894, + "learning_rate": 1.061902741621255e-05, + "loss": 1.2498, + "step": 82476 + }, + { + "epoch": 0.99, + "grad_norm": 8.590595910059653, + "learning_rate": 1.061844435746312e-05, + "loss": 1.1432, + "step": 82479 + }, + { + "epoch": 0.99, + "grad_norm": 15.548654331519609, + "learning_rate": 1.0617861296603163e-05, + "loss": 1.5287, + "step": 82482 + }, + { + "epoch": 0.99, + "grad_norm": 18.155651895784064, + "learning_rate": 1.0617278233634667e-05, + "loss": 1.1453, + "step": 82485 + }, + { + "epoch": 0.99, + "grad_norm": 13.626971626530716, + "learning_rate": 1.0616695168559625e-05, + "loss": 1.3663, + "step": 82488 + }, + { + "epoch": 0.99, + "grad_norm": 13.220354131210525, + "learning_rate": 1.0616112101380024e-05, + "loss": 1.2449, + "step": 82491 + }, + { + "epoch": 0.99, + "grad_norm": 36.59790347463286, + "learning_rate": 1.0615529032097854e-05, + "loss": 1.3303, + "step": 82494 + }, + { + "epoch": 0.99, + "grad_norm": 216.45400568601954, + "learning_rate": 1.0614945960715105e-05, + "loss": 1.6091, + "step": 82497 + }, + { + "epoch": 0.99, + "grad_norm": 24.094473476988473, + "learning_rate": 1.0614362887233767e-05, + "loss": 1.3148, + "step": 82500 + }, + { + "epoch": 0.99, + "grad_norm": 4.140919945348374, + "learning_rate": 1.061377981165583e-05, + "loss": 1.2862, + "step": 82503 + }, + { + "epoch": 0.99, + "grad_norm": 6.753162070412579, + "learning_rate": 1.0613196733983283e-05, + "loss": 1.3099, + "step": 82506 + }, + { + "epoch": 0.99, + "grad_norm": 12.925040996910134, + "learning_rate": 1.0612613654218117e-05, + "loss": 1.2053, + "step": 82509 + }, + { + "epoch": 0.99, + "grad_norm": 5.125857103944722, + "learning_rate": 1.0612030572362322e-05, + "loss": 1.3029, + "step": 82512 + }, + { + "epoch": 0.99, + "grad_norm": 18.291851169577964, + "learning_rate": 1.0611447488417885e-05, + "loss": 1.4612, + "step": 82515 + }, + { + "epoch": 0.99, + "grad_norm": 11.03959269587506, + "learning_rate": 1.06108644023868e-05, + "loss": 1.3886, + "step": 82518 + }, + { + "epoch": 0.99, + "grad_norm": 9.3968657944277, + "learning_rate": 1.0610281314271054e-05, + "loss": 1.5459, + "step": 82521 + }, + { + "epoch": 0.99, + "grad_norm": 8.651558501734312, + "learning_rate": 1.0609698224072638e-05, + "loss": 1.1032, + "step": 82524 + }, + { + "epoch": 0.99, + "grad_norm": 11.41113296716586, + "learning_rate": 1.060911513179354e-05, + "loss": 1.4481, + "step": 82527 + }, + { + "epoch": 0.99, + "grad_norm": 4.259153642224572, + "learning_rate": 1.0608532037435752e-05, + "loss": 1.442, + "step": 82530 + }, + { + "epoch": 0.99, + "grad_norm": 8.613798926447966, + "learning_rate": 1.0607948941001266e-05, + "loss": 1.6095, + "step": 82533 + }, + { + "epoch": 0.99, + "grad_norm": 40.823357500278895, + "learning_rate": 1.0607365842492065e-05, + "loss": 1.1275, + "step": 82536 + }, + { + "epoch": 0.99, + "grad_norm": 8.163098899886485, + "learning_rate": 1.0606782741910147e-05, + "loss": 1.1318, + "step": 82539 + }, + { + "epoch": 0.99, + "grad_norm": 11.05096596779795, + "learning_rate": 1.0606199639257497e-05, + "loss": 1.0957, + "step": 82542 + }, + { + "epoch": 0.99, + "grad_norm": 10.297433752923185, + "learning_rate": 1.0605616534536107e-05, + "loss": 1.1886, + "step": 82545 + }, + { + "epoch": 0.99, + "grad_norm": 15.277171287387116, + "learning_rate": 1.0605033427747962e-05, + "loss": 0.9962, + "step": 82548 + }, + { + "epoch": 0.99, + "grad_norm": 9.062134409256174, + "learning_rate": 1.0604450318895058e-05, + "loss": 1.1496, + "step": 82551 + }, + { + "epoch": 0.99, + "grad_norm": 4.242864243751871, + "learning_rate": 1.0603867207979384e-05, + "loss": 1.3003, + "step": 82554 + }, + { + "epoch": 0.99, + "grad_norm": 20.729004168164895, + "learning_rate": 1.0603284095002926e-05, + "loss": 1.3899, + "step": 82557 + }, + { + "epoch": 0.99, + "grad_norm": 12.940853937964599, + "learning_rate": 1.060270097996768e-05, + "loss": 1.5994, + "step": 82560 + }, + { + "epoch": 0.99, + "grad_norm": 8.73392952617277, + "learning_rate": 1.060211786287563e-05, + "loss": 1.4168, + "step": 82563 + }, + { + "epoch": 0.99, + "grad_norm": 7.449091841404295, + "learning_rate": 1.0601534743728771e-05, + "loss": 0.8637, + "step": 82566 + }, + { + "epoch": 0.99, + "grad_norm": 28.510210452085413, + "learning_rate": 1.060095162252909e-05, + "loss": 1.554, + "step": 82569 + }, + { + "epoch": 0.99, + "grad_norm": 4.018720321056271, + "learning_rate": 1.0600368499278577e-05, + "loss": 1.5523, + "step": 82572 + }, + { + "epoch": 0.99, + "grad_norm": 10.491788242706694, + "learning_rate": 1.0599785373979223e-05, + "loss": 1.1091, + "step": 82575 + }, + { + "epoch": 0.99, + "grad_norm": 5.596974458557761, + "learning_rate": 1.0599202246633018e-05, + "loss": 1.3871, + "step": 82578 + }, + { + "epoch": 0.99, + "grad_norm": 12.355396155177536, + "learning_rate": 1.0598619117241948e-05, + "loss": 1.4425, + "step": 82581 + }, + { + "epoch": 0.99, + "grad_norm": 19.88870621990585, + "learning_rate": 1.0598035985808011e-05, + "loss": 1.4684, + "step": 82584 + }, + { + "epoch": 0.99, + "grad_norm": 11.834724697930943, + "learning_rate": 1.059745285233319e-05, + "loss": 1.5316, + "step": 82587 + }, + { + "epoch": 0.99, + "grad_norm": 32.98953960369521, + "learning_rate": 1.0596869716819477e-05, + "loss": 1.3231, + "step": 82590 + }, + { + "epoch": 0.99, + "grad_norm": 12.539850507611208, + "learning_rate": 1.0596286579268864e-05, + "loss": 1.3663, + "step": 82593 + }, + { + "epoch": 0.99, + "grad_norm": 5.273310067117091, + "learning_rate": 1.059570343968334e-05, + "loss": 1.4807, + "step": 82596 + }, + { + "epoch": 0.99, + "grad_norm": 5.3360072148045665, + "learning_rate": 1.0595120298064896e-05, + "loss": 1.4286, + "step": 82599 + }, + { + "epoch": 0.99, + "grad_norm": 41.489549877349674, + "learning_rate": 1.0594537154415519e-05, + "loss": 1.2064, + "step": 82602 + }, + { + "epoch": 0.99, + "grad_norm": 13.79322519509738, + "learning_rate": 1.0593954008737199e-05, + "loss": 1.1344, + "step": 82605 + }, + { + "epoch": 0.99, + "grad_norm": 21.33274251607885, + "learning_rate": 1.0593370861031932e-05, + "loss": 1.1504, + "step": 82608 + }, + { + "epoch": 0.99, + "grad_norm": 7.106475567773723, + "learning_rate": 1.05927877113017e-05, + "loss": 1.2586, + "step": 82611 + }, + { + "epoch": 0.99, + "grad_norm": 39.32543622404858, + "learning_rate": 1.05922045595485e-05, + "loss": 1.1731, + "step": 82614 + }, + { + "epoch": 0.99, + "grad_norm": 37.848511120578046, + "learning_rate": 1.059162140577432e-05, + "loss": 0.9032, + "step": 82617 + }, + { + "epoch": 0.99, + "grad_norm": 11.78648605319366, + "learning_rate": 1.0591038249981147e-05, + "loss": 1.4283, + "step": 82620 + }, + { + "epoch": 0.99, + "grad_norm": 21.486721742408672, + "learning_rate": 1.0590455092170973e-05, + "loss": 1.4403, + "step": 82623 + }, + { + "epoch": 0.99, + "grad_norm": 8.741140027607026, + "learning_rate": 1.058987193234579e-05, + "loss": 1.4068, + "step": 82626 + }, + { + "epoch": 0.99, + "grad_norm": 7.370086144716972, + "learning_rate": 1.0589288770507587e-05, + "loss": 1.3696, + "step": 82629 + }, + { + "epoch": 0.99, + "grad_norm": 11.897652753907378, + "learning_rate": 1.0588705606658352e-05, + "loss": 1.2591, + "step": 82632 + }, + { + "epoch": 0.99, + "grad_norm": 39.565016339928285, + "learning_rate": 1.0588122440800076e-05, + "loss": 1.2166, + "step": 82635 + }, + { + "epoch": 0.99, + "grad_norm": 8.11561775077135, + "learning_rate": 1.058753927293475e-05, + "loss": 1.296, + "step": 82638 + }, + { + "epoch": 0.99, + "grad_norm": 5.081089475256267, + "learning_rate": 1.0586956103064367e-05, + "loss": 1.3357, + "step": 82641 + }, + { + "epoch": 0.99, + "grad_norm": 17.29332101853748, + "learning_rate": 1.0586372931190911e-05, + "loss": 1.5075, + "step": 82644 + }, + { + "epoch": 0.99, + "grad_norm": 10.663112123618209, + "learning_rate": 1.0585789757316377e-05, + "loss": 1.3901, + "step": 82647 + }, + { + "epoch": 0.99, + "grad_norm": 4.2724575195753, + "learning_rate": 1.0585206581442756e-05, + "loss": 1.4214, + "step": 82650 + }, + { + "epoch": 0.99, + "grad_norm": 12.501340909300477, + "learning_rate": 1.0584623403572032e-05, + "loss": 2.1379, + "step": 82653 + }, + { + "epoch": 0.99, + "grad_norm": 2.8949824908537156, + "learning_rate": 1.05840402237062e-05, + "loss": 1.1204, + "step": 82656 + }, + { + "epoch": 0.99, + "grad_norm": 18.330309868281226, + "learning_rate": 1.058345704184725e-05, + "loss": 1.4885, + "step": 82659 + }, + { + "epoch": 0.99, + "grad_norm": 11.460172672787875, + "learning_rate": 1.058287385799717e-05, + "loss": 1.2455, + "step": 82662 + }, + { + "epoch": 0.99, + "grad_norm": 9.919306727373774, + "learning_rate": 1.0582290672157952e-05, + "loss": 1.3239, + "step": 82665 + }, + { + "epoch": 0.99, + "grad_norm": 9.667694225366589, + "learning_rate": 1.0581707484331584e-05, + "loss": 1.3526, + "step": 82668 + }, + { + "epoch": 0.99, + "grad_norm": 9.1933788885106, + "learning_rate": 1.058112429452006e-05, + "loss": 1.17, + "step": 82671 + }, + { + "epoch": 0.99, + "grad_norm": 4.925967969341952, + "learning_rate": 1.058054110272537e-05, + "loss": 1.0871, + "step": 82674 + }, + { + "epoch": 0.99, + "grad_norm": 10.095313695515333, + "learning_rate": 1.0579957908949498e-05, + "loss": 1.2383, + "step": 82677 + }, + { + "epoch": 0.99, + "grad_norm": 29.38863439583016, + "learning_rate": 1.0579374713194441e-05, + "loss": 1.606, + "step": 82680 + }, + { + "epoch": 0.99, + "grad_norm": 19.165081862981683, + "learning_rate": 1.0578791515462186e-05, + "loss": 1.2445, + "step": 82683 + }, + { + "epoch": 0.99, + "grad_norm": 26.24088591881522, + "learning_rate": 1.0578208315754726e-05, + "loss": 1.2049, + "step": 82686 + }, + { + "epoch": 0.99, + "grad_norm": 10.02116141173596, + "learning_rate": 1.0577625114074046e-05, + "loss": 1.4003, + "step": 82689 + }, + { + "epoch": 0.99, + "grad_norm": 10.015840264832152, + "learning_rate": 1.0577041910422141e-05, + "loss": 1.5499, + "step": 82692 + }, + { + "epoch": 0.99, + "grad_norm": 23.33329297730386, + "learning_rate": 1.0576458704801e-05, + "loss": 1.318, + "step": 82695 + }, + { + "epoch": 0.99, + "grad_norm": 8.736535438664527, + "learning_rate": 1.0575875497212614e-05, + "loss": 1.2214, + "step": 82698 + }, + { + "epoch": 0.99, + "grad_norm": 9.053092834797656, + "learning_rate": 1.0575292287658972e-05, + "loss": 1.3653, + "step": 82701 + }, + { + "epoch": 0.99, + "grad_norm": 12.810749954879736, + "learning_rate": 1.0574709076142065e-05, + "loss": 1.2391, + "step": 82704 + }, + { + "epoch": 0.99, + "grad_norm": 25.368032470162383, + "learning_rate": 1.0574125862663883e-05, + "loss": 1.3356, + "step": 82707 + }, + { + "epoch": 0.99, + "grad_norm": 18.60324691972324, + "learning_rate": 1.0573542647226411e-05, + "loss": 1.368, + "step": 82710 + }, + { + "epoch": 0.99, + "grad_norm": 12.24902411631832, + "learning_rate": 1.0572959429831653e-05, + "loss": 1.0402, + "step": 82713 + }, + { + "epoch": 0.99, + "grad_norm": 11.03766349773707, + "learning_rate": 1.0572376210481585e-05, + "loss": 1.3185, + "step": 82716 + }, + { + "epoch": 0.99, + "grad_norm": 3.0203880979339788, + "learning_rate": 1.0571792989178202e-05, + "loss": 1.4032, + "step": 82719 + }, + { + "epoch": 0.99, + "grad_norm": 4.068429927613593, + "learning_rate": 1.05712097659235e-05, + "loss": 1.5121, + "step": 82722 + }, + { + "epoch": 0.99, + "grad_norm": 8.202048366671821, + "learning_rate": 1.0570626540719465e-05, + "loss": 1.4314, + "step": 82725 + }, + { + "epoch": 0.99, + "grad_norm": 5.16662683239903, + "learning_rate": 1.0570043313568083e-05, + "loss": 1.5695, + "step": 82728 + }, + { + "epoch": 0.99, + "grad_norm": 22.822909394715268, + "learning_rate": 1.056946008447135e-05, + "loss": 1.3225, + "step": 82731 + }, + { + "epoch": 0.99, + "grad_norm": 13.790752380939427, + "learning_rate": 1.0568876853431259e-05, + "loss": 1.107, + "step": 82734 + }, + { + "epoch": 0.99, + "grad_norm": 11.955777733340813, + "learning_rate": 1.056829362044979e-05, + "loss": 0.9969, + "step": 82737 + }, + { + "epoch": 0.99, + "grad_norm": 7.899985548336549, + "learning_rate": 1.0567710385528944e-05, + "loss": 1.1399, + "step": 82740 + }, + { + "epoch": 0.99, + "grad_norm": 2.743007483589155, + "learning_rate": 1.0567127148670704e-05, + "loss": 1.3385, + "step": 82743 + }, + { + "epoch": 0.99, + "grad_norm": 9.383403614612526, + "learning_rate": 1.0566543909877065e-05, + "loss": 1.1606, + "step": 82746 + }, + { + "epoch": 1.0, + "grad_norm": 8.748082532299083, + "learning_rate": 1.0565960669150015e-05, + "loss": 1.3678, + "step": 82749 + }, + { + "epoch": 1.0, + "grad_norm": 10.148552792810046, + "learning_rate": 1.0565377426491546e-05, + "loss": 1.2048, + "step": 82752 + }, + { + "epoch": 1.0, + "grad_norm": 11.127343131762792, + "learning_rate": 1.0564794181903649e-05, + "loss": 1.2458, + "step": 82755 + }, + { + "epoch": 1.0, + "grad_norm": 13.36031112807222, + "learning_rate": 1.0564210935388308e-05, + "loss": 1.4685, + "step": 82758 + }, + { + "epoch": 1.0, + "grad_norm": 28.750864106222647, + "learning_rate": 1.0563627686947522e-05, + "loss": 1.3182, + "step": 82761 + }, + { + "epoch": 1.0, + "grad_norm": 10.511325451486151, + "learning_rate": 1.0563044436583276e-05, + "loss": 1.4589, + "step": 82764 + }, + { + "epoch": 1.0, + "grad_norm": 9.255299809468811, + "learning_rate": 1.0562461184297565e-05, + "loss": 1.2132, + "step": 82767 + }, + { + "epoch": 1.0, + "grad_norm": 12.48362675209038, + "learning_rate": 1.0561877930092375e-05, + "loss": 0.9469, + "step": 82770 + }, + { + "epoch": 1.0, + "grad_norm": 12.114465474864776, + "learning_rate": 1.0561294673969695e-05, + "loss": 1.3393, + "step": 82773 + }, + { + "epoch": 1.0, + "grad_norm": 8.09500006214965, + "learning_rate": 1.0560711415931524e-05, + "loss": 1.174, + "step": 82776 + }, + { + "epoch": 1.0, + "grad_norm": 6.500410714063341, + "learning_rate": 1.0560128155979842e-05, + "loss": 1.4597, + "step": 82779 + }, + { + "epoch": 1.0, + "grad_norm": 8.322883639383486, + "learning_rate": 1.0559544894116647e-05, + "loss": 1.3204, + "step": 82782 + }, + { + "epoch": 1.0, + "grad_norm": 28.00430599004416, + "learning_rate": 1.0558961630343925e-05, + "loss": 1.8772, + "step": 82785 + }, + { + "epoch": 1.0, + "grad_norm": 5.846660798413902, + "learning_rate": 1.0558378364663671e-05, + "loss": 1.1087, + "step": 82788 + }, + { + "epoch": 1.0, + "grad_norm": 8.835202578919668, + "learning_rate": 1.055779509707787e-05, + "loss": 1.6504, + "step": 82791 + }, + { + "epoch": 1.0, + "grad_norm": 18.837119868464413, + "learning_rate": 1.0557211827588518e-05, + "loss": 1.6084, + "step": 82794 + }, + { + "epoch": 1.0, + "grad_norm": 13.198394080203277, + "learning_rate": 1.0556628556197601e-05, + "loss": 0.9104, + "step": 82797 + }, + { + "epoch": 1.0, + "grad_norm": 9.524662190189416, + "learning_rate": 1.055604528290711e-05, + "loss": 0.9859, + "step": 82800 + }, + { + "epoch": 1.0, + "grad_norm": 7.752085525446686, + "learning_rate": 1.055546200771904e-05, + "loss": 1.5081, + "step": 82803 + }, + { + "epoch": 1.0, + "grad_norm": 4.366942585773405, + "learning_rate": 1.0554878730635375e-05, + "loss": 1.7571, + "step": 82806 + }, + { + "epoch": 1.0, + "grad_norm": 11.567166058199955, + "learning_rate": 1.0554295451658111e-05, + "loss": 1.3969, + "step": 82809 + }, + { + "epoch": 1.0, + "grad_norm": 9.895787777044847, + "learning_rate": 1.0553712170789235e-05, + "loss": 1.2149, + "step": 82812 + }, + { + "epoch": 1.0, + "grad_norm": 5.879589807320254, + "learning_rate": 1.0553128888030738e-05, + "loss": 1.1681, + "step": 82815 + }, + { + "epoch": 1.0, + "grad_norm": 10.932445751369679, + "learning_rate": 1.0552545603384612e-05, + "loss": 1.188, + "step": 82818 + }, + { + "epoch": 1.0, + "grad_norm": 15.670791488198255, + "learning_rate": 1.0551962316852851e-05, + "loss": 1.206, + "step": 82821 + }, + { + "epoch": 1.0, + "grad_norm": 12.016846105458441, + "learning_rate": 1.0551379028437437e-05, + "loss": 1.4946, + "step": 82824 + }, + { + "epoch": 1.0, + "grad_norm": 2.332539781770528, + "learning_rate": 1.0550795738140362e-05, + "loss": 1.3368, + "step": 82827 + }, + { + "epoch": 1.0, + "grad_norm": 3.420045144907408, + "learning_rate": 1.0550212445963628e-05, + "loss": 2.0835, + "step": 82830 + }, + { + "epoch": 1.0, + "grad_norm": 8.65157342145106, + "learning_rate": 1.054962915190921e-05, + "loss": 1.2543, + "step": 82833 + }, + { + "epoch": 1.0, + "grad_norm": 15.250123599327482, + "learning_rate": 1.054904585597911e-05, + "loss": 1.1359, + "step": 82836 + }, + { + "epoch": 1.0, + "grad_norm": 27.31618013473958, + "learning_rate": 1.054846255817531e-05, + "loss": 0.8452, + "step": 82839 + }, + { + "epoch": 1.0, + "grad_norm": 18.465356758888817, + "learning_rate": 1.054787925849981e-05, + "loss": 1.5004, + "step": 82842 + }, + { + "epoch": 1.0, + "grad_norm": 3.095190881428495, + "learning_rate": 1.054729595695459e-05, + "loss": 1.4262, + "step": 82845 + }, + { + "epoch": 1.0, + "grad_norm": 11.539829718644425, + "learning_rate": 1.0546712653541652e-05, + "loss": 1.1464, + "step": 82848 + }, + { + "epoch": 1.0, + "grad_norm": 11.139637441941266, + "learning_rate": 1.0546129348262976e-05, + "loss": 1.3835, + "step": 82851 + }, + { + "epoch": 1.0, + "grad_norm": 17.838676471904048, + "learning_rate": 1.0545546041120558e-05, + "loss": 1.1613, + "step": 82854 + }, + { + "epoch": 1.0, + "grad_norm": 12.35177012383852, + "learning_rate": 1.0544962732116388e-05, + "loss": 1.2785, + "step": 82857 + }, + { + "epoch": 1.0, + "grad_norm": 14.423486013362616, + "learning_rate": 1.0544379421252457e-05, + "loss": 1.3557, + "step": 82860 + }, + { + "epoch": 1.0, + "grad_norm": 15.441557344832646, + "learning_rate": 1.0543796108530756e-05, + "loss": 1.5838, + "step": 82863 + }, + { + "epoch": 1.0, + "grad_norm": 8.209286519211515, + "learning_rate": 1.0543212793953274e-05, + "loss": 1.3029, + "step": 82866 + }, + { + "epoch": 1.0, + "grad_norm": 10.139405931300594, + "learning_rate": 1.0542629477522002e-05, + "loss": 1.8746, + "step": 82869 + }, + { + "epoch": 1.0, + "grad_norm": 11.582904884012645, + "learning_rate": 1.0542046159238932e-05, + "loss": 1.4269, + "step": 82872 + }, + { + "epoch": 1.0, + "grad_norm": 217.44481529110243, + "learning_rate": 1.0541462839106054e-05, + "loss": 1.7364, + "step": 82875 + }, + { + "epoch": 1.0, + "grad_norm": 26.876376089018873, + "learning_rate": 1.0540879517125356e-05, + "loss": 1.4618, + "step": 82878 + }, + { + "epoch": 1.0, + "grad_norm": 2.138948250324114, + "learning_rate": 1.0540296193298831e-05, + "loss": 1.4012, + "step": 82881 + }, + { + "epoch": 1.0, + "grad_norm": 7.553333083444172, + "learning_rate": 1.0539712867628473e-05, + "loss": 1.4459, + "step": 82884 + }, + { + "epoch": 1.0, + "grad_norm": 22.314694832579107, + "learning_rate": 1.0539129540116266e-05, + "loss": 1.6072, + "step": 82887 + }, + { + "epoch": 1.0, + "grad_norm": 9.766095175003827, + "learning_rate": 1.0538546210764206e-05, + "loss": 1.1784, + "step": 82890 + }, + { + "epoch": 1.0, + "grad_norm": 9.108317688055012, + "learning_rate": 1.0537962879574284e-05, + "loss": 1.2381, + "step": 82893 + }, + { + "epoch": 1.0, + "grad_norm": 7.468283680903308, + "learning_rate": 1.0537379546548485e-05, + "loss": 1.5088, + "step": 82896 + }, + { + "epoch": 1.0, + "grad_norm": 21.94779006004716, + "learning_rate": 1.0536796211688802e-05, + "loss": 1.4623, + "step": 82899 + }, + { + "epoch": 1.0, + "grad_norm": 10.081932791085398, + "learning_rate": 1.0536212874997232e-05, + "loss": 1.5603, + "step": 82902 + }, + { + "epoch": 1.0, + "grad_norm": 13.32319053984159, + "learning_rate": 1.0535629536475758e-05, + "loss": 1.4936, + "step": 82905 + }, + { + "epoch": 1.0, + "grad_norm": 15.69245511691406, + "learning_rate": 1.053504619612637e-05, + "loss": 1.0204, + "step": 82908 + }, + { + "epoch": 1.0, + "grad_norm": 14.25416452204323, + "learning_rate": 1.0534462853951067e-05, + "loss": 1.1315, + "step": 82911 + }, + { + "epoch": 1.0, + "grad_norm": 17.648995379235078, + "learning_rate": 1.0533879509951832e-05, + "loss": 1.3771, + "step": 82914 + }, + { + "epoch": 1.0, + "grad_norm": 10.279339094410288, + "learning_rate": 1.053329616413066e-05, + "loss": 1.0431, + "step": 82917 + }, + { + "epoch": 1.0, + "grad_norm": 2.568966946890639, + "learning_rate": 1.0532712816489537e-05, + "loss": 1.6096, + "step": 82920 + }, + { + "epoch": 1.0, + "grad_norm": 6.701142511492499, + "learning_rate": 1.0532129467030462e-05, + "loss": 1.2749, + "step": 82923 + }, + { + "epoch": 1.0, + "grad_norm": 6.962753523518163, + "learning_rate": 1.0531546115755417e-05, + "loss": 1.2559, + "step": 82926 + }, + { + "epoch": 1.0, + "grad_norm": 5.344652098667398, + "learning_rate": 1.05309627626664e-05, + "loss": 1.5982, + "step": 82929 + }, + { + "epoch": 1.0, + "grad_norm": 24.801510331148425, + "learning_rate": 1.0530379407765394e-05, + "loss": 1.2645, + "step": 82932 + }, + { + "epoch": 1.0, + "grad_norm": 12.037888884873947, + "learning_rate": 1.0529796051054397e-05, + "loss": 1.4269, + "step": 82935 + }, + { + "epoch": 1.0, + "grad_norm": 7.421772615057355, + "learning_rate": 1.0529212692535398e-05, + "loss": 1.5589, + "step": 82938 + }, + { + "epoch": 1.0, + "grad_norm": 16.35604489917899, + "learning_rate": 1.0528629332210383e-05, + "loss": 1.0378, + "step": 82941 + }, + { + "epoch": 1.0, + "grad_norm": 2.8052436888832304, + "learning_rate": 1.0528045970081349e-05, + "loss": 1.1066, + "step": 82944 + }, + { + "epoch": 1.0, + "grad_norm": 6.669717489469178, + "learning_rate": 1.0527462606150282e-05, + "loss": 1.2696, + "step": 82947 + }, + { + "epoch": 1.0, + "grad_norm": 10.293612544723802, + "learning_rate": 1.0526879240419179e-05, + "loss": 1.3078, + "step": 82950 + }, + { + "epoch": 1.0, + "grad_norm": 11.017336883462944, + "learning_rate": 1.0526295872890025e-05, + "loss": 1.3236, + "step": 82953 + }, + { + "epoch": 1.0, + "grad_norm": 13.26215116046952, + "learning_rate": 1.0525712503564812e-05, + "loss": 1.459, + "step": 82956 + }, + { + "epoch": 1.0, + "grad_norm": 19.178023423355047, + "learning_rate": 1.0525129132445532e-05, + "loss": 1.3885, + "step": 82959 + }, + { + "epoch": 1.0, + "grad_norm": 19.468526423332577, + "learning_rate": 1.0524545759534175e-05, + "loss": 1.2508, + "step": 82962 + }, + { + "epoch": 1.0, + "grad_norm": 9.922137960371286, + "learning_rate": 1.0523962384832734e-05, + "loss": 1.5196, + "step": 82965 + }, + { + "epoch": 1.0, + "grad_norm": 5.822061472818698, + "learning_rate": 1.0523379008343197e-05, + "loss": 1.0444, + "step": 82968 + }, + { + "epoch": 1.0, + "grad_norm": 7.11583022390884, + "learning_rate": 1.0522795630067555e-05, + "loss": 1.4355, + "step": 82971 + }, + { + "epoch": 1.0, + "grad_norm": 21.280662448191695, + "learning_rate": 1.0522212250007799e-05, + "loss": 1.372, + "step": 82974 + }, + { + "epoch": 1.0, + "grad_norm": 9.733421639703643, + "learning_rate": 1.0521628868165926e-05, + "loss": 1.4186, + "step": 82977 + }, + { + "epoch": 1.0, + "grad_norm": 5.488651200070034, + "learning_rate": 1.0521045484543916e-05, + "loss": 1.4188, + "step": 82980 + }, + { + "epoch": 1.0, + "grad_norm": 9.579470236592035, + "learning_rate": 1.052046209914377e-05, + "loss": 1.4551, + "step": 82983 + }, + { + "epoch": 1.0, + "grad_norm": 8.219860815025422, + "learning_rate": 1.0519878711967468e-05, + "loss": 1.2256, + "step": 82986 + }, + { + "epoch": 1.0, + "grad_norm": 20.72945817207792, + "learning_rate": 1.0519295323017013e-05, + "loss": 1.3661, + "step": 82989 + }, + { + "epoch": 1.0, + "grad_norm": 8.121365768237402, + "learning_rate": 1.0518711932294387e-05, + "loss": 1.4761, + "step": 82992 + }, + { + "epoch": 1.0, + "grad_norm": 11.350678968941548, + "learning_rate": 1.0518128539801585e-05, + "loss": 1.3062, + "step": 82995 + }, + { + "epoch": 1.0, + "grad_norm": 17.75791346917324, + "learning_rate": 1.0517545145540598e-05, + "loss": 1.3379, + "step": 82998 + }, + { + "epoch": 1.0, + "grad_norm": 14.63102584665141, + "learning_rate": 1.0516961749513415e-05, + "loss": 1.4505, + "step": 83001 + }, + { + "epoch": 1.0, + "grad_norm": 5.954879141435297, + "learning_rate": 1.0516378351722027e-05, + "loss": 1.6422, + "step": 83004 + }, + { + "epoch": 1.0, + "grad_norm": 2.672813042523531, + "learning_rate": 1.0515794952168425e-05, + "loss": 1.5299, + "step": 83007 + }, + { + "epoch": 1.0, + "grad_norm": 16.152793879561123, + "learning_rate": 1.0515211550854603e-05, + "loss": 1.6186, + "step": 83010 + }, + { + "epoch": 1.0, + "grad_norm": 34.61204661503339, + "learning_rate": 1.051462814778255e-05, + "loss": 1.3754, + "step": 83013 + }, + { + "epoch": 1.0, + "grad_norm": 51.327703171715974, + "learning_rate": 1.0514044742954253e-05, + "loss": 1.4367, + "step": 83016 + }, + { + "epoch": 1.0, + "grad_norm": 22.038320830323098, + "learning_rate": 1.0513461336371708e-05, + "loss": 1.2653, + "step": 83019 + }, + { + "epoch": 1.0, + "grad_norm": 18.496425945353693, + "learning_rate": 1.0512877928036906e-05, + "loss": 1.1049, + "step": 83022 + }, + { + "epoch": 1.0, + "grad_norm": 2.550295925430467, + "learning_rate": 1.0512294517951838e-05, + "loss": 1.617, + "step": 83025 + }, + { + "epoch": 1.0, + "grad_norm": 56.98299358293828, + "learning_rate": 1.0511711106118488e-05, + "loss": 1.3136, + "step": 83028 + }, + { + "epoch": 1.0, + "grad_norm": 7.698349152360634, + "learning_rate": 1.0511127692538856e-05, + "loss": 1.5533, + "step": 83031 + }, + { + "epoch": 1.0, + "grad_norm": 25.955834665736035, + "learning_rate": 1.0510544277214926e-05, + "loss": 1.6447, + "step": 83034 + }, + { + "epoch": 1.0, + "grad_norm": 6.956122287141531, + "learning_rate": 1.0509960860148698e-05, + "loss": 1.2951, + "step": 83037 + }, + { + "epoch": 1.0, + "grad_norm": 10.61667540005078, + "learning_rate": 1.050937744134215e-05, + "loss": 1.2026, + "step": 83040 + }, + { + "epoch": 1.0, + "grad_norm": 29.322955325504385, + "learning_rate": 1.0508794020797287e-05, + "loss": 1.5641, + "step": 83043 + }, + { + "epoch": 1.0, + "grad_norm": 38.782834170953194, + "learning_rate": 1.0508210598516089e-05, + "loss": 1.2997, + "step": 83046 + }, + { + "epoch": 1.0, + "grad_norm": 7.927142124875097, + "learning_rate": 1.0507627174500552e-05, + "loss": 1.4822, + "step": 83049 + }, + { + "epoch": 1.0, + "grad_norm": 32.19475217296812, + "learning_rate": 1.0507043748752667e-05, + "loss": 1.2765, + "step": 83052 + }, + { + "epoch": 1.0, + "grad_norm": 28.055124477091972, + "learning_rate": 1.0506460321274426e-05, + "loss": 0.9779, + "step": 83055 + }, + { + "epoch": 1.0, + "grad_norm": 19.889321223877168, + "learning_rate": 1.0505876892067817e-05, + "loss": 1.5242, + "step": 83058 + }, + { + "epoch": 1.0, + "grad_norm": 6.191183280349238, + "learning_rate": 1.050529346113483e-05, + "loss": 1.4226, + "step": 83061 + }, + { + "epoch": 1.0, + "grad_norm": 8.170790647504548, + "learning_rate": 1.0504710028477461e-05, + "loss": 1.4925, + "step": 83064 + }, + { + "epoch": 1.0, + "grad_norm": 20.032333441810074, + "learning_rate": 1.0504126594097697e-05, + "loss": 1.8281, + "step": 83067 + }, + { + "epoch": 1.0, + "grad_norm": 13.695231078471359, + "learning_rate": 1.0503543157997531e-05, + "loss": 0.9725, + "step": 83070 + }, + { + "epoch": 1.0, + "grad_norm": 4.462112396893103, + "learning_rate": 1.0502959720178954e-05, + "loss": 1.2975, + "step": 83073 + }, + { + "epoch": 1.0, + "grad_norm": 6.454740604045372, + "learning_rate": 1.0502376280643958e-05, + "loss": 1.2352, + "step": 83076 + }, + { + "epoch": 1.0, + "grad_norm": 38.455297058335425, + "learning_rate": 1.0501792839394531e-05, + "loss": 1.2381, + "step": 83079 + }, + { + "epoch": 1.0, + "grad_norm": 4.628092095085108, + "learning_rate": 1.0501209396432663e-05, + "loss": 1.3612, + "step": 83082 + }, + { + "epoch": 1.0, + "grad_norm": 22.24177941188859, + "learning_rate": 1.0500625951760352e-05, + "loss": 1.5404, + "step": 83085 + }, + { + "epoch": 1.0, + "grad_norm": 14.05127907221463, + "learning_rate": 1.0500042505379587e-05, + "loss": 1.3443, + "step": 83088 + }, + { + "epoch": 1.0, + "grad_norm": 6.192318570319999, + "learning_rate": 1.0499459057292354e-05, + "loss": 1.4034, + "step": 83091 + }, + { + "epoch": 1.0, + "grad_norm": 17.71382944737277, + "learning_rate": 1.0498875607500644e-05, + "loss": 1.3889, + "step": 83094 + }, + { + "epoch": 1.0, + "grad_norm": 10.420308324530652, + "learning_rate": 1.0498292156006456e-05, + "loss": 1.7093, + "step": 83097 + }, + { + "epoch": 1.0, + "grad_norm": 3.413933657141158, + "learning_rate": 1.0497708702811776e-05, + "loss": 1.3084, + "step": 83100 + }, + { + "epoch": 1.0, + "grad_norm": 5.945789952638551, + "learning_rate": 1.0497125247918592e-05, + "loss": 1.2937, + "step": 83103 + }, + { + "epoch": 1.0, + "grad_norm": 21.06415246364301, + "learning_rate": 1.04965417913289e-05, + "loss": 1.2501, + "step": 83106 + }, + { + "epoch": 1.0, + "grad_norm": 6.368420951214879, + "learning_rate": 1.049595833304469e-05, + "loss": 1.1809, + "step": 83109 + }, + { + "epoch": 1.0, + "grad_norm": 5.597908044887006, + "learning_rate": 1.0495374873067953e-05, + "loss": 1.3639, + "step": 83112 + }, + { + "epoch": 1.0, + "grad_norm": 5.831729543311872, + "learning_rate": 1.049479141140068e-05, + "loss": 1.2343, + "step": 83115 + }, + { + "epoch": 1.0, + "grad_norm": 6.307317374365867, + "learning_rate": 1.0494207948044863e-05, + "loss": 1.2023, + "step": 83118 + }, + { + "epoch": 1.0, + "grad_norm": 23.711671505684535, + "learning_rate": 1.0493624483002494e-05, + "loss": 1.2937, + "step": 83121 + }, + { + "epoch": 1.0, + "grad_norm": 4.562980903731294, + "learning_rate": 1.0493041016275558e-05, + "loss": 1.2845, + "step": 83124 + }, + { + "epoch": 1.0, + "grad_norm": 17.996326185481724, + "learning_rate": 1.0492457547866052e-05, + "loss": 1.2247, + "step": 83127 + }, + { + "epoch": 1.0, + "grad_norm": 7.2261664115710715, + "learning_rate": 1.0491874077775967e-05, + "loss": 1.4768, + "step": 83130 + }, + { + "epoch": 1.0, + "grad_norm": 7.414586260112242, + "learning_rate": 1.0491290606007294e-05, + "loss": 1.4404, + "step": 83133 + }, + { + "epoch": 1.0, + "grad_norm": 5.69198990659866, + "learning_rate": 1.0490707132562018e-05, + "loss": 1.6573, + "step": 83136 + }, + { + "epoch": 1.0, + "grad_norm": 5.527722130212611, + "learning_rate": 1.0490123657442141e-05, + "loss": 1.6372, + "step": 83139 + }, + { + "epoch": 1.0, + "grad_norm": 12.009422436153098, + "learning_rate": 1.0489540180649647e-05, + "loss": 1.4876, + "step": 83142 + }, + { + "epoch": 1.0, + "grad_norm": 11.737440760754938, + "learning_rate": 1.048895670218653e-05, + "loss": 1.2008, + "step": 83145 + }, + { + "epoch": 1.0, + "grad_norm": 8.460798736064918, + "learning_rate": 1.0488373222054775e-05, + "loss": 1.0711, + "step": 83148 + }, + { + "epoch": 1.0, + "grad_norm": 6.700948838202026, + "learning_rate": 1.0487789740256382e-05, + "loss": 1.256, + "step": 83151 + }, + { + "epoch": 1.0, + "grad_norm": 3.268177423290983, + "learning_rate": 1.0487206256793339e-05, + "loss": 1.044, + "step": 83154 + }, + { + "epoch": 1.0, + "grad_norm": 14.288277033173632, + "learning_rate": 1.0486622771667632e-05, + "loss": 0.963, + "step": 83157 + }, + { + "epoch": 1.0, + "grad_norm": 13.8399545083853, + "learning_rate": 1.048603928488126e-05, + "loss": 1.2234, + "step": 83160 + }, + { + "epoch": 1.0, + "grad_norm": 8.916459854582953, + "learning_rate": 1.0485455796436212e-05, + "loss": 1.7288, + "step": 83163 + }, + { + "epoch": 1.0, + "grad_norm": 69.25489026488872, + "learning_rate": 1.0484872306334477e-05, + "loss": 1.1632, + "step": 83166 + }, + { + "epoch": 1.0, + "grad_norm": 39.867972796181874, + "learning_rate": 1.0484288814578046e-05, + "loss": 1.5403, + "step": 83169 + }, + { + "epoch": 1.0, + "grad_norm": 8.381553381738815, + "learning_rate": 1.0483705321168915e-05, + "loss": 1.0858, + "step": 83172 + }, + { + "epoch": 1.0, + "grad_norm": 16.95809007247903, + "learning_rate": 1.048312182610907e-05, + "loss": 1.3053, + "step": 83175 + }, + { + "epoch": 1.0, + "grad_norm": 23.11229340846609, + "learning_rate": 1.0482538329400502e-05, + "loss": 1.0659, + "step": 83178 + }, + { + "epoch": 1.0, + "grad_norm": 17.31128180715004, + "learning_rate": 1.0481954831045207e-05, + "loss": 1.272, + "step": 83181 + }, + { + "epoch": 1.0, + "grad_norm": 2.3629446735208646, + "learning_rate": 1.0481371331045175e-05, + "loss": 1.2709, + "step": 83184 + }, + { + "epoch": 1.0, + "grad_norm": 2.9269677655126474, + "learning_rate": 1.0480787829402394e-05, + "loss": 1.092, + "step": 83187 + }, + { + "epoch": 1.0, + "grad_norm": 34.12110770175551, + "learning_rate": 1.0480204326118856e-05, + "loss": 1.1679, + "step": 83190 + }, + { + "epoch": 1.0, + "grad_norm": 8.759761780011768, + "learning_rate": 1.0479620821196559e-05, + "loss": 1.1486, + "step": 83193 + }, + { + "epoch": 1.0, + "grad_norm": 5.880603568347509, + "learning_rate": 1.0479037314637483e-05, + "loss": 1.4854, + "step": 83196 + }, + { + "epoch": 1.0, + "grad_norm": 6.995270131200509, + "learning_rate": 1.047845380644363e-05, + "loss": 1.1081, + "step": 83199 + }, + { + "epoch": 1.0, + "grad_norm": 12.286323137567942, + "learning_rate": 1.0477870296616984e-05, + "loss": 1.0569, + "step": 83202 + }, + { + "epoch": 1.0, + "grad_norm": 4.764518022780131, + "learning_rate": 1.0477286785159537e-05, + "loss": 1.2093, + "step": 83205 + }, + { + "epoch": 1.0, + "grad_norm": 12.611174995231565, + "learning_rate": 1.0476703272073285e-05, + "loss": 1.2271, + "step": 83208 + }, + { + "epoch": 1.0, + "grad_norm": 4.5713380081220585, + "learning_rate": 1.0476119757360215e-05, + "loss": 1.196, + "step": 83211 + }, + { + "epoch": 1.0, + "grad_norm": 17.711090916167073, + "learning_rate": 1.0475536241022322e-05, + "loss": 1.3295, + "step": 83214 + }, + { + "epoch": 1.0, + "grad_norm": 6.684731348501579, + "learning_rate": 1.047495272306159e-05, + "loss": 1.1525, + "step": 83217 + }, + { + "epoch": 1.0, + "grad_norm": 12.203669118225427, + "learning_rate": 1.0474369203480022e-05, + "loss": 1.1464, + "step": 83220 + }, + { + "epoch": 1.0, + "grad_norm": 7.542864918683101, + "learning_rate": 1.0473785682279598e-05, + "loss": 1.4766, + "step": 83223 + }, + { + "epoch": 1.0, + "grad_norm": 15.050441549471369, + "learning_rate": 1.0473202159462316e-05, + "loss": 1.1329, + "step": 83226 + }, + { + "epoch": 1.0, + "grad_norm": 6.937969181728156, + "learning_rate": 1.0472618635030165e-05, + "loss": 0.9696, + "step": 83229 + }, + { + "epoch": 1.0, + "grad_norm": 11.988884932944215, + "learning_rate": 1.0472035108985135e-05, + "loss": 1.0057, + "step": 83232 + }, + { + "epoch": 1.0, + "grad_norm": 3.8710879061071686, + "learning_rate": 1.0471451581329222e-05, + "loss": 1.3093, + "step": 83235 + }, + { + "epoch": 1.0, + "grad_norm": 26.523283904917523, + "learning_rate": 1.0470868052064416e-05, + "loss": 0.8287, + "step": 83238 + }, + { + "epoch": 1.0, + "grad_norm": 8.167417248450592, + "learning_rate": 1.0470284521192705e-05, + "loss": 1.0571, + "step": 83241 + }, + { + "epoch": 1.0, + "grad_norm": 20.82417067720935, + "learning_rate": 1.0469700988716078e-05, + "loss": 1.2101, + "step": 83244 + }, + { + "epoch": 1.0, + "grad_norm": 19.824442952563203, + "learning_rate": 1.046911745463654e-05, + "loss": 1.1307, + "step": 83247 + }, + { + "epoch": 1.0, + "grad_norm": 18.102996682431453, + "learning_rate": 1.0468533918956065e-05, + "loss": 1.2177, + "step": 83250 + }, + { + "epoch": 1.0, + "grad_norm": 14.029272365648907, + "learning_rate": 1.0467950381676656e-05, + "loss": 1.1834, + "step": 83253 + }, + { + "epoch": 1.0, + "grad_norm": 14.156036527805455, + "learning_rate": 1.04673668428003e-05, + "loss": 1.7026, + "step": 83256 + }, + { + "epoch": 1.0, + "grad_norm": 5.680261966062984, + "learning_rate": 1.046678330232899e-05, + "loss": 1.1997, + "step": 83259 + }, + { + "epoch": 1.0, + "grad_norm": 11.641050077560681, + "learning_rate": 1.0466199760264715e-05, + "loss": 1.5053, + "step": 83262 + }, + { + "epoch": 1.0, + "grad_norm": 14.857484180550037, + "learning_rate": 1.046561621660947e-05, + "loss": 1.2003, + "step": 83265 + }, + { + "epoch": 1.0, + "grad_norm": 4.5701687693864, + "learning_rate": 1.0465032671365244e-05, + "loss": 1.419, + "step": 83268 + }, + { + "epoch": 1.0, + "grad_norm": 9.029338013043846, + "learning_rate": 1.0464449124534028e-05, + "loss": 1.3692, + "step": 83271 + }, + { + "epoch": 1.0, + "grad_norm": 9.460173467623438, + "learning_rate": 1.0463865576117818e-05, + "loss": 1.3523, + "step": 83274 + }, + { + "epoch": 1.0, + "grad_norm": 4.671484087283426, + "learning_rate": 1.0463282026118596e-05, + "loss": 1.1134, + "step": 83277 + }, + { + "epoch": 1.0, + "grad_norm": 21.154969549159834, + "learning_rate": 1.0462698474538366e-05, + "loss": 1.2931, + "step": 83280 + }, + { + "epoch": 1.0, + "grad_norm": 8.58910003612136, + "learning_rate": 1.0462114921379107e-05, + "loss": 1.0039, + "step": 83283 + }, + { + "epoch": 1.0, + "grad_norm": 18.92038904569465, + "learning_rate": 1.0461531366642817e-05, + "loss": 1.1207, + "step": 83286 + }, + { + "epoch": 1.0, + "grad_norm": 9.514451481055925, + "learning_rate": 1.0460947810331493e-05, + "loss": 1.3294, + "step": 83289 + }, + { + "epoch": 1.0, + "grad_norm": 3.4725460371819965, + "learning_rate": 1.0460364252447113e-05, + "loss": 1.8872, + "step": 83292 + }, + { + "epoch": 1.0, + "grad_norm": 30.586320834253268, + "learning_rate": 1.0459780692991677e-05, + "loss": 1.2113, + "step": 83295 + }, + { + "epoch": 1.0, + "grad_norm": 3.8592619340723124, + "learning_rate": 1.0459197131967176e-05, + "loss": 1.2125, + "step": 83298 + }, + { + "epoch": 1.0, + "grad_norm": 5.776100296633168, + "learning_rate": 1.0458613569375602e-05, + "loss": 0.9772, + "step": 83301 + }, + { + "epoch": 1.0, + "grad_norm": 6.446355022836182, + "learning_rate": 1.0458030005218942e-05, + "loss": 0.9443, + "step": 83304 + }, + { + "epoch": 1.0, + "grad_norm": 24.926479390583765, + "learning_rate": 1.0457446439499194e-05, + "loss": 1.3078, + "step": 83307 + }, + { + "epoch": 1.0, + "grad_norm": 17.70012072252278, + "learning_rate": 1.0456862872218345e-05, + "loss": 1.2421, + "step": 83310 + }, + { + "epoch": 1.0, + "grad_norm": 6.042193564515159, + "learning_rate": 1.0456279303378385e-05, + "loss": 1.2972, + "step": 83313 + }, + { + "epoch": 1.0, + "grad_norm": 5.413153569177476, + "learning_rate": 1.0455695732981313e-05, + "loss": 1.1736, + "step": 83316 + }, + { + "epoch": 1.0, + "grad_norm": 5.576604719210642, + "learning_rate": 1.0455112161029109e-05, + "loss": 1.2479, + "step": 83319 + }, + { + "epoch": 1.0, + "grad_norm": 5.976915627878201, + "learning_rate": 1.0454528587523778e-05, + "loss": 1.0696, + "step": 83322 + }, + { + "epoch": 1.0, + "grad_norm": 4.36214205031923, + "learning_rate": 1.04539450124673e-05, + "loss": 1.086, + "step": 83325 + }, + { + "epoch": 1.0, + "grad_norm": 6.0541392752356735, + "learning_rate": 1.0453361435861673e-05, + "loss": 1.1442, + "step": 83328 + }, + { + "epoch": 1.0, + "grad_norm": 3.0741384762896415, + "learning_rate": 1.0452777857708888e-05, + "loss": 1.128, + "step": 83331 + }, + { + "epoch": 1.0, + "grad_norm": 9.94836664977598, + "learning_rate": 1.0452194278010935e-05, + "loss": 0.9904, + "step": 83334 + }, + { + "epoch": 1.0, + "grad_norm": 9.750920454604431, + "learning_rate": 1.0451610696769801e-05, + "loss": 1.2361, + "step": 83337 + }, + { + "epoch": 1.0, + "grad_norm": 27.703778641688487, + "learning_rate": 1.0451027113987485e-05, + "loss": 1.4657, + "step": 83340 + }, + { + "epoch": 1.0, + "grad_norm": 6.655308709472749, + "learning_rate": 1.0450443529665979e-05, + "loss": 1.2576, + "step": 83343 + }, + { + "epoch": 1.0, + "grad_norm": 24.3858456811903, + "learning_rate": 1.0449859943807267e-05, + "loss": 1.0805, + "step": 83346 + }, + { + "epoch": 1.0, + "grad_norm": 8.452885949147596, + "learning_rate": 1.0449276356413349e-05, + "loss": 1.422, + "step": 83349 + }, + { + "epoch": 1.0, + "grad_norm": 16.78023376220807, + "learning_rate": 1.0448692767486213e-05, + "loss": 1.0797, + "step": 83352 + }, + { + "epoch": 1.0, + "grad_norm": 6.444428006967768, + "learning_rate": 1.0448109177027847e-05, + "loss": 1.158, + "step": 83355 + }, + { + "epoch": 1.0, + "grad_norm": 6.203341850689107, + "learning_rate": 1.0447525585040245e-05, + "loss": 0.9179, + "step": 83358 + }, + { + "epoch": 1.0, + "grad_norm": 3.1129041414877796, + "learning_rate": 1.0446941991525403e-05, + "loss": 1.432, + "step": 83361 + }, + { + "epoch": 1.0, + "grad_norm": 9.303504397637026, + "learning_rate": 1.0446358396485308e-05, + "loss": 1.0969, + "step": 83364 + }, + { + "epoch": 1.0, + "grad_norm": 8.426382367294405, + "learning_rate": 1.044577479992195e-05, + "loss": 1.3362, + "step": 83367 + }, + { + "epoch": 1.0, + "grad_norm": 8.553286168779723, + "learning_rate": 1.0445191201837326e-05, + "loss": 0.7341, + "step": 83370 + }, + { + "epoch": 1.0, + "grad_norm": 12.583155795452178, + "learning_rate": 1.0444607602233423e-05, + "loss": 0.8901, + "step": 83373 + }, + { + "epoch": 1.0, + "grad_norm": 5.1764479198245255, + "learning_rate": 1.0444024001112237e-05, + "loss": 1.3393, + "step": 83376 + }, + { + "epoch": 1.0, + "grad_norm": 13.226240247855282, + "learning_rate": 1.0443440398475752e-05, + "loss": 1.0526, + "step": 83379 + }, + { + "epoch": 1.0, + "grad_norm": 23.341203848906673, + "learning_rate": 1.0442856794325969e-05, + "loss": 0.9742, + "step": 83382 + }, + { + "epoch": 1.0, + "grad_norm": 9.714380094402967, + "learning_rate": 1.0442273188664876e-05, + "loss": 1.0234, + "step": 83385 + }, + { + "epoch": 1.0, + "grad_norm": 15.194375480699685, + "learning_rate": 1.0441689581494461e-05, + "loss": 0.9178, + "step": 83388 + }, + { + "epoch": 1.0, + "grad_norm": 5.318718088877644, + "learning_rate": 1.044110597281672e-05, + "loss": 1.0305, + "step": 83391 + }, + { + "epoch": 1.0, + "grad_norm": 17.918157225647917, + "learning_rate": 1.0440522362633642e-05, + "loss": 1.2152, + "step": 83394 + }, + { + "epoch": 1.0, + "grad_norm": 7.593587049340328, + "learning_rate": 1.043993875094722e-05, + "loss": 1.0061, + "step": 83397 + }, + { + "epoch": 1.0, + "grad_norm": 13.463679602281402, + "learning_rate": 1.0439355137759445e-05, + "loss": 1.4715, + "step": 83400 + }, + { + "epoch": 1.0, + "grad_norm": 4.608124448219482, + "learning_rate": 1.0438771523072313e-05, + "loss": 1.5558, + "step": 83403 + }, + { + "epoch": 1.0, + "grad_norm": 9.554047679660904, + "learning_rate": 1.043818790688781e-05, + "loss": 1.2951, + "step": 83406 + }, + { + "epoch": 1.0, + "grad_norm": 2.9915819131604175, + "learning_rate": 1.0437604289207928e-05, + "loss": 0.9288, + "step": 83409 + }, + { + "epoch": 1.0, + "grad_norm": 14.157229104073561, + "learning_rate": 1.0437020670034659e-05, + "loss": 1.0533, + "step": 83412 + }, + { + "epoch": 1.0, + "grad_norm": 6.734187718539394, + "learning_rate": 1.0436437049369998e-05, + "loss": 0.8758, + "step": 83415 + }, + { + "epoch": 1.0, + "grad_norm": 4.1723996741240414, + "learning_rate": 1.0435853427215937e-05, + "loss": 0.756, + "step": 83418 + }, + { + "epoch": 1.0, + "grad_norm": 4.5971022582957675, + "learning_rate": 1.043526980357446e-05, + "loss": 1.6393, + "step": 83421 + }, + { + "epoch": 1.0, + "grad_norm": 9.103014275799174, + "learning_rate": 1.0434686178447567e-05, + "loss": 1.1965, + "step": 83424 + }, + { + "epoch": 1.0, + "grad_norm": 9.177382270480969, + "learning_rate": 1.0434102551837247e-05, + "loss": 1.2918, + "step": 83427 + }, + { + "epoch": 1.0, + "grad_norm": 11.948189358545482, + "learning_rate": 1.0433518923745491e-05, + "loss": 1.3096, + "step": 83430 + }, + { + "epoch": 1.0, + "grad_norm": 11.918600806086044, + "learning_rate": 1.0432935294174289e-05, + "loss": 1.4811, + "step": 83433 + }, + { + "epoch": 1.0, + "grad_norm": 3.970519867151957, + "learning_rate": 1.0432351663125638e-05, + "loss": 0.9576, + "step": 83436 + }, + { + "epoch": 1.0, + "grad_norm": 16.321105850027383, + "learning_rate": 1.0431768030601523e-05, + "loss": 1.2874, + "step": 83439 + }, + { + "epoch": 1.0, + "grad_norm": 4.10217747194296, + "learning_rate": 1.0431184396603942e-05, + "loss": 1.0797, + "step": 83442 + }, + { + "epoch": 1.0, + "grad_norm": 7.40083423526283, + "learning_rate": 1.0430600761134882e-05, + "loss": 0.7663, + "step": 83445 + }, + { + "epoch": 1.0, + "grad_norm": 8.257672901503796, + "learning_rate": 1.043001712419634e-05, + "loss": 1.2071, + "step": 83448 + }, + { + "epoch": 1.0, + "grad_norm": 10.292216809890931, + "learning_rate": 1.0429433485790301e-05, + "loss": 1.3596, + "step": 83451 + }, + { + "epoch": 1.0, + "grad_norm": 21.070065968405252, + "learning_rate": 1.042884984591876e-05, + "loss": 1.644, + "step": 83454 + }, + { + "epoch": 1.0, + "grad_norm": 19.48498245444674, + "learning_rate": 1.042826620458371e-05, + "loss": 0.9551, + "step": 83457 + }, + { + "epoch": 1.0, + "grad_norm": 4.602307693482198, + "learning_rate": 1.0427682561787143e-05, + "loss": 0.864, + "step": 83460 + }, + { + "epoch": 1.0, + "grad_norm": 21.191031977586775, + "learning_rate": 1.0427098917531049e-05, + "loss": 1.1529, + "step": 83463 + }, + { + "epoch": 1.0, + "grad_norm": 5.443224993674127, + "learning_rate": 1.0426515271817418e-05, + "loss": 1.2877, + "step": 83466 + }, + { + "epoch": 1.0, + "grad_norm": 10.562439712917898, + "learning_rate": 1.0425931624648246e-05, + "loss": 1.1232, + "step": 83469 + }, + { + "epoch": 1.0, + "grad_norm": 6.318840100941687, + "learning_rate": 1.0425347976025523e-05, + "loss": 1.2164, + "step": 83472 + }, + { + "epoch": 1.0, + "grad_norm": 15.817897270686402, + "learning_rate": 1.042476432595124e-05, + "loss": 1.0578, + "step": 83475 + }, + { + "epoch": 1.0, + "grad_norm": 5.319573843316944, + "learning_rate": 1.0424180674427388e-05, + "loss": 1.1772, + "step": 83478 + }, + { + "epoch": 1.0, + "grad_norm": 5.742126266935401, + "learning_rate": 1.0423597021455964e-05, + "loss": 1.3094, + "step": 83481 + }, + { + "epoch": 1.0, + "grad_norm": 16.07105445136594, + "learning_rate": 1.0423013367038955e-05, + "loss": 1.2644, + "step": 83484 + }, + { + "epoch": 1.0, + "grad_norm": 32.840515125347565, + "learning_rate": 1.042242971117835e-05, + "loss": 1.1719, + "step": 83487 + }, + { + "epoch": 1.0, + "grad_norm": 4.223371493496445, + "learning_rate": 1.0421846053876147e-05, + "loss": 1.6662, + "step": 83490 + }, + { + "epoch": 1.0, + "grad_norm": 7.627134553090195, + "learning_rate": 1.0421262395134335e-05, + "loss": 1.1688, + "step": 83493 + }, + { + "epoch": 1.0, + "grad_norm": 16.969857833435608, + "learning_rate": 1.0420678734954908e-05, + "loss": 1.0394, + "step": 83496 + }, + { + "epoch": 1.0, + "grad_norm": 17.45735311932637, + "learning_rate": 1.0420095073339853e-05, + "loss": 1.1031, + "step": 83499 + }, + { + "epoch": 1.0, + "grad_norm": 2.2117600410006917, + "learning_rate": 1.0419511410291165e-05, + "loss": 1.4228, + "step": 83502 + }, + { + "epoch": 1.0, + "grad_norm": 7.5835948236057416, + "learning_rate": 1.0418927745810838e-05, + "loss": 1.2193, + "step": 83505 + }, + { + "epoch": 1.0, + "grad_norm": 27.770456019791347, + "learning_rate": 1.0418344079900859e-05, + "loss": 1.0207, + "step": 83508 + }, + { + "epoch": 1.0, + "grad_norm": 8.45467984128809, + "learning_rate": 1.0417760412563224e-05, + "loss": 0.692, + "step": 83511 + }, + { + "epoch": 1.0, + "grad_norm": 43.29007661024513, + "learning_rate": 1.0417176743799924e-05, + "loss": 1.1372, + "step": 83514 + }, + { + "epoch": 1.0, + "grad_norm": 7.852300117600095, + "learning_rate": 1.041659307361295e-05, + "loss": 1.2729, + "step": 83517 + }, + { + "epoch": 1.0, + "grad_norm": 4.792718876633365, + "learning_rate": 1.0416009402004293e-05, + "loss": 1.3353, + "step": 83520 + }, + { + "epoch": 1.0, + "grad_norm": 4.098701501441322, + "learning_rate": 1.0415425728975945e-05, + "loss": 1.0689, + "step": 83523 + }, + { + "epoch": 1.0, + "grad_norm": 6.711767994040417, + "learning_rate": 1.04148420545299e-05, + "loss": 0.9398, + "step": 83526 + }, + { + "epoch": 1.0, + "grad_norm": 9.515424620525922, + "learning_rate": 1.0414258378668145e-05, + "loss": 1.1216, + "step": 83529 + }, + { + "epoch": 1.0, + "grad_norm": 12.049236065622832, + "learning_rate": 1.041367470139268e-05, + "loss": 1.129, + "step": 83532 + }, + { + "epoch": 1.0, + "grad_norm": 3.32684893355296, + "learning_rate": 1.041309102270549e-05, + "loss": 1.2235, + "step": 83535 + }, + { + "epoch": 1.0, + "grad_norm": 25.629840132032516, + "learning_rate": 1.041250734260857e-05, + "loss": 1.4597, + "step": 83538 + }, + { + "epoch": 1.0, + "grad_norm": 7.744107522335026, + "learning_rate": 1.0411923661103908e-05, + "loss": 1.0571, + "step": 83541 + }, + { + "epoch": 1.0, + "grad_norm": 6.809958940947934, + "learning_rate": 1.0411339978193504e-05, + "loss": 1.344, + "step": 83544 + }, + { + "epoch": 1.0, + "grad_norm": 20.15825890133192, + "learning_rate": 1.0410756293879343e-05, + "loss": 1.3904, + "step": 83547 + }, + { + "epoch": 1.0, + "grad_norm": 8.264453216119293, + "learning_rate": 1.0410172608163418e-05, + "loss": 1.2232, + "step": 83550 + }, + { + "epoch": 1.0, + "grad_norm": 19.053914561238976, + "learning_rate": 1.040958892104772e-05, + "loss": 1.2164, + "step": 83553 + }, + { + "epoch": 1.0, + "grad_norm": 5.023032057704403, + "learning_rate": 1.0409005232534244e-05, + "loss": 1.1164, + "step": 83556 + }, + { + "epoch": 1.0, + "grad_norm": 9.195243981166374, + "learning_rate": 1.0408421542624982e-05, + "loss": 1.1802, + "step": 83559 + }, + { + "epoch": 1.0, + "grad_norm": 4.89869465327709, + "learning_rate": 1.040783785132192e-05, + "loss": 1.7884, + "step": 83562 + }, + { + "epoch": 1.0, + "grad_norm": 8.516279552507719, + "learning_rate": 1.0407254158627059e-05, + "loss": 0.9094, + "step": 83565 + }, + { + "epoch": 1.0, + "grad_norm": 8.720234885220806, + "learning_rate": 1.0406670464542385e-05, + "loss": 1.426, + "step": 83568 + }, + { + "epoch": 1.0, + "grad_norm": 16.46269741721285, + "learning_rate": 1.0406086769069892e-05, + "loss": 0.8386, + "step": 83571 + }, + { + "epoch": 1.0, + "grad_norm": 6.056026744113149, + "learning_rate": 1.0405503072211567e-05, + "loss": 1.5011, + "step": 83574 + }, + { + "epoch": 1.0, + "grad_norm": 16.165438979894162, + "learning_rate": 1.0404919373969411e-05, + "loss": 1.2515, + "step": 83577 + }, + { + "epoch": 1.01, + "grad_norm": 5.704190838911902, + "learning_rate": 1.040433567434541e-05, + "loss": 1.2838, + "step": 83580 + }, + { + "epoch": 1.01, + "grad_norm": 16.54084568382424, + "learning_rate": 1.0403751973341555e-05, + "loss": 0.9378, + "step": 83583 + }, + { + "epoch": 1.01, + "grad_norm": 3.026379854102857, + "learning_rate": 1.0403168270959841e-05, + "loss": 1.3696, + "step": 83586 + }, + { + "epoch": 1.01, + "grad_norm": 11.415172033956914, + "learning_rate": 1.0402584567202258e-05, + "loss": 0.923, + "step": 83589 + }, + { + "epoch": 1.01, + "grad_norm": 3.90013992263717, + "learning_rate": 1.04020008620708e-05, + "loss": 0.9355, + "step": 83592 + }, + { + "epoch": 1.01, + "grad_norm": 12.801996370664408, + "learning_rate": 1.0401417155567457e-05, + "loss": 1.2666, + "step": 83595 + }, + { + "epoch": 1.01, + "grad_norm": 3.5933901335833807, + "learning_rate": 1.0400833447694223e-05, + "loss": 1.2884, + "step": 83598 + }, + { + "epoch": 1.01, + "grad_norm": 15.208022062696132, + "learning_rate": 1.0400249738453088e-05, + "loss": 1.4938, + "step": 83601 + }, + { + "epoch": 1.01, + "grad_norm": 10.483844608234849, + "learning_rate": 1.0399666027846047e-05, + "loss": 1.1195, + "step": 83604 + }, + { + "epoch": 1.01, + "grad_norm": 7.401567105926169, + "learning_rate": 1.0399082315875084e-05, + "loss": 1.1529, + "step": 83607 + }, + { + "epoch": 1.01, + "grad_norm": 7.9079445726515, + "learning_rate": 1.0398498602542201e-05, + "loss": 0.7813, + "step": 83610 + }, + { + "epoch": 1.01, + "grad_norm": 5.951527460960364, + "learning_rate": 1.0397914887849387e-05, + "loss": 1.2597, + "step": 83613 + }, + { + "epoch": 1.01, + "grad_norm": 6.235977562777209, + "learning_rate": 1.039733117179863e-05, + "loss": 1.3666, + "step": 83616 + }, + { + "epoch": 1.01, + "grad_norm": 21.00127940516573, + "learning_rate": 1.0396747454391926e-05, + "loss": 0.9092, + "step": 83619 + }, + { + "epoch": 1.01, + "grad_norm": 5.256147470805138, + "learning_rate": 1.0396163735631268e-05, + "loss": 1.4846, + "step": 83622 + }, + { + "epoch": 1.01, + "grad_norm": 5.269706727011647, + "learning_rate": 1.0395580015518644e-05, + "loss": 1.4796, + "step": 83625 + }, + { + "epoch": 1.01, + "grad_norm": 9.06983997056626, + "learning_rate": 1.0394996294056046e-05, + "loss": 1.2351, + "step": 83628 + }, + { + "epoch": 1.01, + "grad_norm": 14.498854308008275, + "learning_rate": 1.0394412571245473e-05, + "loss": 1.2798, + "step": 83631 + }, + { + "epoch": 1.01, + "grad_norm": 3.362015482928231, + "learning_rate": 1.0393828847088908e-05, + "loss": 1.259, + "step": 83634 + }, + { + "epoch": 1.01, + "grad_norm": 15.185983166668043, + "learning_rate": 1.0393245121588344e-05, + "loss": 1.4291, + "step": 83637 + }, + { + "epoch": 1.01, + "grad_norm": 17.86027408658117, + "learning_rate": 1.039266139474578e-05, + "loss": 1.2762, + "step": 83640 + }, + { + "epoch": 1.01, + "grad_norm": 5.605316442560263, + "learning_rate": 1.0392077666563205e-05, + "loss": 1.2984, + "step": 83643 + }, + { + "epoch": 1.01, + "grad_norm": 3.994572510288069, + "learning_rate": 1.039149393704261e-05, + "loss": 1.1241, + "step": 83646 + }, + { + "epoch": 1.01, + "grad_norm": 6.181763370263747, + "learning_rate": 1.0390910206185985e-05, + "loss": 1.124, + "step": 83649 + }, + { + "epoch": 1.01, + "grad_norm": 8.012587455086441, + "learning_rate": 1.0390326473995326e-05, + "loss": 1.236, + "step": 83652 + }, + { + "epoch": 1.01, + "grad_norm": 11.157312408178285, + "learning_rate": 1.0389742740472621e-05, + "loss": 0.9503, + "step": 83655 + }, + { + "epoch": 1.01, + "grad_norm": 30.29803053333128, + "learning_rate": 1.0389159005619869e-05, + "loss": 1.4308, + "step": 83658 + }, + { + "epoch": 1.01, + "grad_norm": 6.00848000363227, + "learning_rate": 1.0388575269439052e-05, + "loss": 1.1021, + "step": 83661 + }, + { + "epoch": 1.01, + "grad_norm": 14.76331738977945, + "learning_rate": 1.0387991531932171e-05, + "loss": 1.04, + "step": 83664 + }, + { + "epoch": 1.01, + "grad_norm": 10.701381148329862, + "learning_rate": 1.0387407793101213e-05, + "loss": 1.1054, + "step": 83667 + }, + { + "epoch": 1.01, + "grad_norm": 5.729060693628784, + "learning_rate": 1.0386824052948173e-05, + "loss": 1.3897, + "step": 83670 + }, + { + "epoch": 1.01, + "grad_norm": 14.772112297375696, + "learning_rate": 1.0386240311475041e-05, + "loss": 1.3325, + "step": 83673 + }, + { + "epoch": 1.01, + "grad_norm": 11.739372388326556, + "learning_rate": 1.038565656868381e-05, + "loss": 0.8732, + "step": 83676 + }, + { + "epoch": 1.01, + "grad_norm": 7.227703121169419, + "learning_rate": 1.0385072824576474e-05, + "loss": 1.0821, + "step": 83679 + }, + { + "epoch": 1.01, + "grad_norm": 4.032726487958254, + "learning_rate": 1.038448907915502e-05, + "loss": 1.1716, + "step": 83682 + }, + { + "epoch": 1.01, + "grad_norm": 13.707024950376965, + "learning_rate": 1.0383905332421449e-05, + "loss": 0.871, + "step": 83685 + }, + { + "epoch": 1.01, + "grad_norm": 19.744294283592062, + "learning_rate": 1.038332158437774e-05, + "loss": 1.3245, + "step": 83688 + }, + { + "epoch": 1.01, + "grad_norm": 4.229077407201864, + "learning_rate": 1.0382737835025895e-05, + "loss": 1.0906, + "step": 83691 + }, + { + "epoch": 1.01, + "grad_norm": 7.408084090750113, + "learning_rate": 1.0382154084367903e-05, + "loss": 1.1845, + "step": 83694 + }, + { + "epoch": 1.01, + "grad_norm": 10.475217054501552, + "learning_rate": 1.038157033240576e-05, + "loss": 1.2458, + "step": 83697 + }, + { + "epoch": 1.01, + "grad_norm": 9.447505116409662, + "learning_rate": 1.0380986579141454e-05, + "loss": 0.9258, + "step": 83700 + }, + { + "epoch": 1.01, + "grad_norm": 5.784254449481988, + "learning_rate": 1.0380402824576976e-05, + "loss": 1.2845, + "step": 83703 + }, + { + "epoch": 1.01, + "grad_norm": 35.64766258608924, + "learning_rate": 1.0379819068714323e-05, + "loss": 1.1746, + "step": 83706 + }, + { + "epoch": 1.01, + "grad_norm": 8.003977559821434, + "learning_rate": 1.0379235311555482e-05, + "loss": 1.2218, + "step": 83709 + }, + { + "epoch": 1.01, + "grad_norm": 19.794125836183387, + "learning_rate": 1.0378651553102447e-05, + "loss": 1.509, + "step": 83712 + }, + { + "epoch": 1.01, + "grad_norm": 29.334299934506742, + "learning_rate": 1.0378067793357211e-05, + "loss": 1.2021, + "step": 83715 + }, + { + "epoch": 1.01, + "grad_norm": 11.974309708955248, + "learning_rate": 1.0377484032321767e-05, + "loss": 1.0308, + "step": 83718 + }, + { + "epoch": 1.01, + "grad_norm": 5.941335162938728, + "learning_rate": 1.0376900269998107e-05, + "loss": 1.1271, + "step": 83721 + }, + { + "epoch": 1.01, + "grad_norm": 6.6971891776892685, + "learning_rate": 1.0376316506388217e-05, + "loss": 0.9789, + "step": 83724 + }, + { + "epoch": 1.01, + "grad_norm": 9.364242092517069, + "learning_rate": 1.0375732741494103e-05, + "loss": 1.0371, + "step": 83727 + }, + { + "epoch": 1.01, + "grad_norm": 5.970917860560911, + "learning_rate": 1.0375148975317742e-05, + "loss": 1.7375, + "step": 83730 + }, + { + "epoch": 1.01, + "grad_norm": 20.326656379042852, + "learning_rate": 1.0374565207861133e-05, + "loss": 1.0116, + "step": 83733 + }, + { + "epoch": 1.01, + "grad_norm": 2.4492567056765844, + "learning_rate": 1.0373981439126268e-05, + "loss": 1.345, + "step": 83736 + }, + { + "epoch": 1.01, + "grad_norm": 4.333118759409336, + "learning_rate": 1.0373397669115142e-05, + "loss": 1.0184, + "step": 83739 + }, + { + "epoch": 1.01, + "grad_norm": 6.296295726848292, + "learning_rate": 1.037281389782974e-05, + "loss": 1.2889, + "step": 83742 + }, + { + "epoch": 1.01, + "grad_norm": 6.222005094378804, + "learning_rate": 1.037223012527206e-05, + "loss": 1.266, + "step": 83745 + }, + { + "epoch": 1.01, + "grad_norm": 10.115644756151307, + "learning_rate": 1.0371646351444095e-05, + "loss": 0.8693, + "step": 83748 + }, + { + "epoch": 1.01, + "grad_norm": 11.394448999547862, + "learning_rate": 1.0371062576347832e-05, + "loss": 1.0774, + "step": 83751 + }, + { + "epoch": 1.01, + "grad_norm": 16.86928728156643, + "learning_rate": 1.0370478799985268e-05, + "loss": 1.1824, + "step": 83754 + }, + { + "epoch": 1.01, + "grad_norm": 16.177247170153976, + "learning_rate": 1.0369895022358392e-05, + "loss": 1.315, + "step": 83757 + }, + { + "epoch": 1.01, + "grad_norm": 6.613583155172398, + "learning_rate": 1.03693112434692e-05, + "loss": 1.0805, + "step": 83760 + }, + { + "epoch": 1.01, + "grad_norm": 27.384573660321426, + "learning_rate": 1.0368727463319678e-05, + "loss": 1.3224, + "step": 83763 + }, + { + "epoch": 1.01, + "grad_norm": 8.349023686587909, + "learning_rate": 1.0368143681911824e-05, + "loss": 1.2668, + "step": 83766 + }, + { + "epoch": 1.01, + "grad_norm": 2.8614665964445005, + "learning_rate": 1.0367559899247631e-05, + "loss": 1.5028, + "step": 83769 + }, + { + "epoch": 1.01, + "grad_norm": 5.32784176487719, + "learning_rate": 1.0366976115329082e-05, + "loss": 1.2583, + "step": 83772 + }, + { + "epoch": 1.01, + "grad_norm": 4.190512884543261, + "learning_rate": 1.0366392330158182e-05, + "loss": 1.45, + "step": 83775 + }, + { + "epoch": 1.01, + "grad_norm": 15.011834711240265, + "learning_rate": 1.036580854373691e-05, + "loss": 1.3195, + "step": 83778 + }, + { + "epoch": 1.01, + "grad_norm": 6.07961722486803, + "learning_rate": 1.0365224756067273e-05, + "loss": 1.434, + "step": 83781 + }, + { + "epoch": 1.01, + "grad_norm": 10.803036903772714, + "learning_rate": 1.0364640967151249e-05, + "loss": 1.0552, + "step": 83784 + }, + { + "epoch": 1.01, + "grad_norm": 2.5987723830967506, + "learning_rate": 1.0364057176990843e-05, + "loss": 1.3325, + "step": 83787 + }, + { + "epoch": 1.01, + "grad_norm": 13.39793842792077, + "learning_rate": 1.0363473385588037e-05, + "loss": 1.1162, + "step": 83790 + }, + { + "epoch": 1.01, + "grad_norm": 16.786467504387808, + "learning_rate": 1.0362889592944829e-05, + "loss": 1.1394, + "step": 83793 + }, + { + "epoch": 1.01, + "grad_norm": 8.47305100027734, + "learning_rate": 1.0362305799063206e-05, + "loss": 0.9183, + "step": 83796 + }, + { + "epoch": 1.01, + "grad_norm": 11.500256898132761, + "learning_rate": 1.0361722003945166e-05, + "loss": 1.334, + "step": 83799 + }, + { + "epoch": 1.01, + "grad_norm": 27.732933589058803, + "learning_rate": 1.03611382075927e-05, + "loss": 1.0904, + "step": 83802 + }, + { + "epoch": 1.01, + "grad_norm": 15.465747707575833, + "learning_rate": 1.0360554410007797e-05, + "loss": 1.1944, + "step": 83805 + }, + { + "epoch": 1.01, + "grad_norm": 7.6118079264130465, + "learning_rate": 1.0359970611192453e-05, + "loss": 1.4132, + "step": 83808 + }, + { + "epoch": 1.01, + "grad_norm": 13.804024730804693, + "learning_rate": 1.0359386811148662e-05, + "loss": 1.0454, + "step": 83811 + }, + { + "epoch": 1.01, + "grad_norm": 33.17841345224011, + "learning_rate": 1.0358803009878411e-05, + "loss": 1.1688, + "step": 83814 + }, + { + "epoch": 1.01, + "grad_norm": 6.6329785781575765, + "learning_rate": 1.0358219207383693e-05, + "loss": 1.4941, + "step": 83817 + }, + { + "epoch": 1.01, + "grad_norm": 16.84731940300692, + "learning_rate": 1.0357635403666502e-05, + "loss": 1.0227, + "step": 83820 + }, + { + "epoch": 1.01, + "grad_norm": 20.59053572772907, + "learning_rate": 1.035705159872883e-05, + "loss": 1.147, + "step": 83823 + }, + { + "epoch": 1.01, + "grad_norm": 61.09951708646134, + "learning_rate": 1.035646779257267e-05, + "loss": 1.2981, + "step": 83826 + }, + { + "epoch": 1.01, + "grad_norm": 142.58213092735667, + "learning_rate": 1.0355883985200017e-05, + "loss": 1.3235, + "step": 83829 + }, + { + "epoch": 1.01, + "grad_norm": 5.885317543007085, + "learning_rate": 1.0355300176612855e-05, + "loss": 1.1684, + "step": 83832 + }, + { + "epoch": 1.01, + "grad_norm": 7.454805828904102, + "learning_rate": 1.0354716366813186e-05, + "loss": 1.0344, + "step": 83835 + }, + { + "epoch": 1.01, + "grad_norm": 18.46897030840531, + "learning_rate": 1.0354132555802997e-05, + "loss": 1.443, + "step": 83838 + }, + { + "epoch": 1.01, + "grad_norm": 5.052948214873234, + "learning_rate": 1.0353548743584278e-05, + "loss": 1.1843, + "step": 83841 + }, + { + "epoch": 1.01, + "grad_norm": 5.372116668052041, + "learning_rate": 1.0352964930159028e-05, + "loss": 1.1214, + "step": 83844 + }, + { + "epoch": 1.01, + "grad_norm": 8.466187652394156, + "learning_rate": 1.0352381115529235e-05, + "loss": 1.0618, + "step": 83847 + }, + { + "epoch": 1.01, + "grad_norm": 7.6789768180817815, + "learning_rate": 1.035179729969689e-05, + "loss": 1.2136, + "step": 83850 + }, + { + "epoch": 1.01, + "grad_norm": 18.6936834431653, + "learning_rate": 1.0351213482663989e-05, + "loss": 1.5199, + "step": 83853 + }, + { + "epoch": 1.01, + "grad_norm": 2.7060432440084305, + "learning_rate": 1.0350629664432525e-05, + "loss": 1.0245, + "step": 83856 + }, + { + "epoch": 1.01, + "grad_norm": 9.82087828664761, + "learning_rate": 1.0350045845004484e-05, + "loss": 1.483, + "step": 83859 + }, + { + "epoch": 1.01, + "grad_norm": 5.637799501996743, + "learning_rate": 1.0349462024381865e-05, + "loss": 1.2706, + "step": 83862 + }, + { + "epoch": 1.01, + "grad_norm": 7.563446267427706, + "learning_rate": 1.0348878202566659e-05, + "loss": 1.4106, + "step": 83865 + }, + { + "epoch": 1.01, + "grad_norm": 4.3791817597554425, + "learning_rate": 1.0348294379560857e-05, + "loss": 1.1861, + "step": 83868 + }, + { + "epoch": 1.01, + "grad_norm": 8.034444197876624, + "learning_rate": 1.0347710555366449e-05, + "loss": 0.9918, + "step": 83871 + }, + { + "epoch": 1.01, + "grad_norm": 14.3774763331281, + "learning_rate": 1.0347126729985432e-05, + "loss": 1.0913, + "step": 83874 + }, + { + "epoch": 1.01, + "grad_norm": 13.10167461543742, + "learning_rate": 1.0346542903419798e-05, + "loss": 1.2687, + "step": 83877 + }, + { + "epoch": 1.01, + "grad_norm": 10.799824801935841, + "learning_rate": 1.0345959075671535e-05, + "loss": 1.567, + "step": 83880 + }, + { + "epoch": 1.01, + "grad_norm": 14.522971939466082, + "learning_rate": 1.034537524674264e-05, + "loss": 1.0748, + "step": 83883 + }, + { + "epoch": 1.01, + "grad_norm": 8.976043808048134, + "learning_rate": 1.0344791416635105e-05, + "loss": 1.1107, + "step": 83886 + }, + { + "epoch": 1.01, + "grad_norm": 10.001706604027707, + "learning_rate": 1.0344207585350918e-05, + "loss": 1.0888, + "step": 83889 + }, + { + "epoch": 1.01, + "grad_norm": 9.820174014133856, + "learning_rate": 1.0343623752892076e-05, + "loss": 1.6102, + "step": 83892 + }, + { + "epoch": 1.01, + "grad_norm": 49.243345580394056, + "learning_rate": 1.0343039919260569e-05, + "loss": 1.1319, + "step": 83895 + }, + { + "epoch": 1.01, + "grad_norm": 12.090832434650574, + "learning_rate": 1.0342456084458391e-05, + "loss": 1.1116, + "step": 83898 + }, + { + "epoch": 1.01, + "grad_norm": 5.683186492069419, + "learning_rate": 1.0341872248487537e-05, + "loss": 1.057, + "step": 83901 + }, + { + "epoch": 1.01, + "grad_norm": 16.47437806251746, + "learning_rate": 1.034128841134999e-05, + "loss": 0.9826, + "step": 83904 + }, + { + "epoch": 1.01, + "grad_norm": 11.66289008559158, + "learning_rate": 1.0340704573047754e-05, + "loss": 1.4744, + "step": 83907 + }, + { + "epoch": 1.01, + "grad_norm": 5.636155524203348, + "learning_rate": 1.0340120733582815e-05, + "loss": 1.3219, + "step": 83910 + }, + { + "epoch": 1.01, + "grad_norm": 54.40571349628466, + "learning_rate": 1.0339536892957163e-05, + "loss": 1.172, + "step": 83913 + }, + { + "epoch": 1.01, + "grad_norm": 3.3029600846261964, + "learning_rate": 1.0338953051172796e-05, + "loss": 1.3007, + "step": 83916 + }, + { + "epoch": 1.01, + "grad_norm": 7.3351376166297495, + "learning_rate": 1.0338369208231704e-05, + "loss": 1.3466, + "step": 83919 + }, + { + "epoch": 1.01, + "grad_norm": 6.824271771025918, + "learning_rate": 1.0337785364135881e-05, + "loss": 1.3654, + "step": 83922 + }, + { + "epoch": 1.01, + "grad_norm": 6.406549963324064, + "learning_rate": 1.0337201518887316e-05, + "loss": 1.5362, + "step": 83925 + }, + { + "epoch": 1.01, + "grad_norm": 15.494616956904895, + "learning_rate": 1.0336617672488005e-05, + "loss": 1.1977, + "step": 83928 + }, + { + "epoch": 1.01, + "grad_norm": 8.211289002100298, + "learning_rate": 1.0336033824939942e-05, + "loss": 0.9962, + "step": 83931 + }, + { + "epoch": 1.01, + "grad_norm": 15.36082968152698, + "learning_rate": 1.0335449976245111e-05, + "loss": 1.0157, + "step": 83934 + }, + { + "epoch": 1.01, + "grad_norm": 17.779877357956945, + "learning_rate": 1.0334866126405514e-05, + "loss": 1.2559, + "step": 83937 + }, + { + "epoch": 1.01, + "grad_norm": 9.86336771446466, + "learning_rate": 1.0334282275423138e-05, + "loss": 1.1241, + "step": 83940 + }, + { + "epoch": 1.01, + "grad_norm": 5.137559126487393, + "learning_rate": 1.0333698423299979e-05, + "loss": 1.2642, + "step": 83943 + }, + { + "epoch": 1.01, + "grad_norm": 7.121624908742714, + "learning_rate": 1.0333114570038023e-05, + "loss": 1.0304, + "step": 83946 + }, + { + "epoch": 1.01, + "grad_norm": 5.070569919817766, + "learning_rate": 1.033253071563927e-05, + "loss": 1.1547, + "step": 83949 + }, + { + "epoch": 1.01, + "grad_norm": 3.2569015309921543, + "learning_rate": 1.033194686010571e-05, + "loss": 1.1339, + "step": 83952 + }, + { + "epoch": 1.01, + "grad_norm": 10.9458938938015, + "learning_rate": 1.0331363003439336e-05, + "loss": 1.0435, + "step": 83955 + }, + { + "epoch": 1.01, + "grad_norm": 9.118532991517611, + "learning_rate": 1.0330779145642135e-05, + "loss": 1.2228, + "step": 83958 + }, + { + "epoch": 1.01, + "grad_norm": 11.84340321838539, + "learning_rate": 1.0330195286716107e-05, + "loss": 1.0894, + "step": 83961 + }, + { + "epoch": 1.01, + "grad_norm": 234.80851753493744, + "learning_rate": 1.0329611426663242e-05, + "loss": 1.1543, + "step": 83964 + }, + { + "epoch": 1.01, + "grad_norm": 8.516718689633372, + "learning_rate": 1.0329027565485529e-05, + "loss": 0.8608, + "step": 83967 + }, + { + "epoch": 1.01, + "grad_norm": 6.806491309126017, + "learning_rate": 1.0328443703184967e-05, + "loss": 1.1447, + "step": 83970 + }, + { + "epoch": 1.01, + "grad_norm": 41.03556970146521, + "learning_rate": 1.0327859839763544e-05, + "loss": 0.8759, + "step": 83973 + }, + { + "epoch": 1.01, + "grad_norm": 6.02924600696256, + "learning_rate": 1.0327275975223252e-05, + "loss": 0.9519, + "step": 83976 + }, + { + "epoch": 1.01, + "grad_norm": 9.110712061349739, + "learning_rate": 1.0326692109566085e-05, + "loss": 1.4148, + "step": 83979 + }, + { + "epoch": 1.01, + "grad_norm": 7.654008381027682, + "learning_rate": 1.0326108242794035e-05, + "loss": 1.3632, + "step": 83982 + }, + { + "epoch": 1.01, + "grad_norm": 6.164806452157621, + "learning_rate": 1.0325524374909097e-05, + "loss": 1.163, + "step": 83985 + }, + { + "epoch": 1.01, + "grad_norm": 18.003787453084783, + "learning_rate": 1.0324940505913262e-05, + "loss": 1.4641, + "step": 83988 + }, + { + "epoch": 1.01, + "grad_norm": 52.731045753989015, + "learning_rate": 1.032435663580852e-05, + "loss": 0.9438, + "step": 83991 + }, + { + "epoch": 1.01, + "grad_norm": 12.373156045818083, + "learning_rate": 1.0323772764596866e-05, + "loss": 1.5186, + "step": 83994 + }, + { + "epoch": 1.01, + "grad_norm": 18.292723850418955, + "learning_rate": 1.0323188892280294e-05, + "loss": 1.3829, + "step": 83997 + }, + { + "epoch": 1.01, + "grad_norm": 6.8732903100731955, + "learning_rate": 1.0322605018860792e-05, + "loss": 1.6633, + "step": 84000 + }, + { + "epoch": 1.01, + "grad_norm": 14.621546651381017, + "learning_rate": 1.0322021144340357e-05, + "loss": 0.8552, + "step": 84003 + }, + { + "epoch": 1.01, + "grad_norm": 4.388642000569976, + "learning_rate": 1.032143726872098e-05, + "loss": 1.0473, + "step": 84006 + }, + { + "epoch": 1.01, + "grad_norm": 40.1275859431481, + "learning_rate": 1.0320853392004655e-05, + "loss": 1.5602, + "step": 84009 + }, + { + "epoch": 1.01, + "grad_norm": 17.73927303164488, + "learning_rate": 1.0320269514193369e-05, + "loss": 1.7256, + "step": 84012 + }, + { + "epoch": 1.01, + "grad_norm": 5.233543576561511, + "learning_rate": 1.0319685635289121e-05, + "loss": 1.1996, + "step": 84015 + }, + { + "epoch": 1.01, + "grad_norm": 5.946915958513234, + "learning_rate": 1.03191017552939e-05, + "loss": 1.3344, + "step": 84018 + }, + { + "epoch": 1.01, + "grad_norm": 17.3011950678445, + "learning_rate": 1.0318517874209699e-05, + "loss": 1.2829, + "step": 84021 + }, + { + "epoch": 1.01, + "grad_norm": 6.390866025006737, + "learning_rate": 1.0317933992038513e-05, + "loss": 1.2341, + "step": 84024 + }, + { + "epoch": 1.01, + "grad_norm": 20.80938979624518, + "learning_rate": 1.0317350108782331e-05, + "loss": 1.156, + "step": 84027 + }, + { + "epoch": 1.01, + "grad_norm": 11.722737711651906, + "learning_rate": 1.0316766224443151e-05, + "loss": 1.3244, + "step": 84030 + }, + { + "epoch": 1.01, + "grad_norm": 10.564169211316432, + "learning_rate": 1.0316182339022957e-05, + "loss": 1.0415, + "step": 84033 + }, + { + "epoch": 1.01, + "grad_norm": 4.331195110723718, + "learning_rate": 1.031559845252375e-05, + "loss": 0.8487, + "step": 84036 + }, + { + "epoch": 1.01, + "grad_norm": 9.268296554045497, + "learning_rate": 1.0315014564947518e-05, + "loss": 1.2834, + "step": 84039 + }, + { + "epoch": 1.01, + "grad_norm": 11.31811545344057, + "learning_rate": 1.0314430676296255e-05, + "loss": 1.1297, + "step": 84042 + }, + { + "epoch": 1.01, + "grad_norm": 10.131960347819712, + "learning_rate": 1.0313846786571953e-05, + "loss": 1.1471, + "step": 84045 + }, + { + "epoch": 1.01, + "grad_norm": 10.110339298085508, + "learning_rate": 1.0313262895776606e-05, + "loss": 1.1397, + "step": 84048 + }, + { + "epoch": 1.01, + "grad_norm": 2.3768846156220476, + "learning_rate": 1.0312679003912204e-05, + "loss": 1.3126, + "step": 84051 + }, + { + "epoch": 1.01, + "grad_norm": 10.091358729826384, + "learning_rate": 1.0312095110980742e-05, + "loss": 1.4732, + "step": 84054 + }, + { + "epoch": 1.01, + "grad_norm": 14.781317953815373, + "learning_rate": 1.0311511216984211e-05, + "loss": 1.2172, + "step": 84057 + }, + { + "epoch": 1.01, + "grad_norm": 16.70891586113313, + "learning_rate": 1.0310927321924606e-05, + "loss": 1.1601, + "step": 84060 + }, + { + "epoch": 1.01, + "grad_norm": 33.48446273961648, + "learning_rate": 1.031034342580392e-05, + "loss": 1.0837, + "step": 84063 + }, + { + "epoch": 1.01, + "grad_norm": 8.556650837338331, + "learning_rate": 1.0309759528624138e-05, + "loss": 1.0214, + "step": 84066 + }, + { + "epoch": 1.01, + "grad_norm": 7.318482551231358, + "learning_rate": 1.0309175630387262e-05, + "loss": 1.1793, + "step": 84069 + }, + { + "epoch": 1.01, + "grad_norm": 36.619062419825354, + "learning_rate": 1.030859173109528e-05, + "loss": 1.108, + "step": 84072 + }, + { + "epoch": 1.01, + "grad_norm": 8.061440875239246, + "learning_rate": 1.0308007830750183e-05, + "loss": 1.3979, + "step": 84075 + }, + { + "epoch": 1.01, + "grad_norm": 6.553492840906934, + "learning_rate": 1.0307423929353972e-05, + "loss": 1.3089, + "step": 84078 + }, + { + "epoch": 1.01, + "grad_norm": 3.7194942726761044, + "learning_rate": 1.0306840026908631e-05, + "loss": 1.0834, + "step": 84081 + }, + { + "epoch": 1.01, + "grad_norm": 5.896941659754804, + "learning_rate": 1.0306256123416156e-05, + "loss": 1.1968, + "step": 84084 + }, + { + "epoch": 1.01, + "grad_norm": 14.735762993680162, + "learning_rate": 1.0305672218878537e-05, + "loss": 1.2442, + "step": 84087 + }, + { + "epoch": 1.01, + "grad_norm": 34.73234152614074, + "learning_rate": 1.0305088313297771e-05, + "loss": 1.0957, + "step": 84090 + }, + { + "epoch": 1.01, + "grad_norm": 6.641788925590674, + "learning_rate": 1.0304504406675848e-05, + "loss": 1.1777, + "step": 84093 + }, + { + "epoch": 1.01, + "grad_norm": 6.563737961313762, + "learning_rate": 1.030392049901476e-05, + "loss": 1.1292, + "step": 84096 + }, + { + "epoch": 1.01, + "grad_norm": 4.7641896351375586, + "learning_rate": 1.0303336590316502e-05, + "loss": 1.2307, + "step": 84099 + }, + { + "epoch": 1.01, + "grad_norm": 3.834800898536189, + "learning_rate": 1.0302752680583066e-05, + "loss": 1.2183, + "step": 84102 + }, + { + "epoch": 1.01, + "grad_norm": 30.222281216178903, + "learning_rate": 1.0302168769816445e-05, + "loss": 1.3555, + "step": 84105 + }, + { + "epoch": 1.01, + "grad_norm": 5.639184207182997, + "learning_rate": 1.0301584858018627e-05, + "loss": 1.5127, + "step": 84108 + }, + { + "epoch": 1.01, + "grad_norm": 5.021771790055027, + "learning_rate": 1.0301000945191611e-05, + "loss": 1.1593, + "step": 84111 + }, + { + "epoch": 1.01, + "grad_norm": 4.679596683906922, + "learning_rate": 1.0300417031337389e-05, + "loss": 1.1407, + "step": 84114 + }, + { + "epoch": 1.01, + "grad_norm": 3.092003660954768, + "learning_rate": 1.029983311645795e-05, + "loss": 0.8049, + "step": 84117 + }, + { + "epoch": 1.01, + "grad_norm": 31.26700604801497, + "learning_rate": 1.0299249200555286e-05, + "loss": 1.4493, + "step": 84120 + }, + { + "epoch": 1.01, + "grad_norm": 16.021313557265028, + "learning_rate": 1.0298665283631394e-05, + "loss": 1.2444, + "step": 84123 + }, + { + "epoch": 1.01, + "grad_norm": 24.82390413832033, + "learning_rate": 1.0298081365688269e-05, + "loss": 1.4798, + "step": 84126 + }, + { + "epoch": 1.01, + "grad_norm": 6.642362674303739, + "learning_rate": 1.0297497446727893e-05, + "loss": 1.0534, + "step": 84129 + }, + { + "epoch": 1.01, + "grad_norm": 61.19667017885358, + "learning_rate": 1.029691352675227e-05, + "loss": 1.0771, + "step": 84132 + }, + { + "epoch": 1.01, + "grad_norm": 7.666888270869635, + "learning_rate": 1.0296329605763387e-05, + "loss": 1.4255, + "step": 84135 + }, + { + "epoch": 1.01, + "grad_norm": 4.906097268182711, + "learning_rate": 1.0295745683763239e-05, + "loss": 1.2351, + "step": 84138 + }, + { + "epoch": 1.01, + "grad_norm": 20.26720334983518, + "learning_rate": 1.0295161760753813e-05, + "loss": 0.854, + "step": 84141 + }, + { + "epoch": 1.01, + "grad_norm": 10.731716447038515, + "learning_rate": 1.0294577836737111e-05, + "loss": 1.0904, + "step": 84144 + }, + { + "epoch": 1.01, + "grad_norm": 3.129622917745253, + "learning_rate": 1.0293993911715118e-05, + "loss": 1.4446, + "step": 84147 + }, + { + "epoch": 1.01, + "grad_norm": 7.960279149372275, + "learning_rate": 1.029340998568983e-05, + "loss": 1.0473, + "step": 84150 + }, + { + "epoch": 1.01, + "grad_norm": 7.042603845452997, + "learning_rate": 1.029282605866324e-05, + "loss": 1.3574, + "step": 84153 + }, + { + "epoch": 1.01, + "grad_norm": 3.2933192749945954, + "learning_rate": 1.0292242130637342e-05, + "loss": 1.0828, + "step": 84156 + }, + { + "epoch": 1.01, + "grad_norm": 8.292736590717794, + "learning_rate": 1.0291658201614128e-05, + "loss": 1.1566, + "step": 84159 + }, + { + "epoch": 1.01, + "grad_norm": 5.636298242204335, + "learning_rate": 1.0291074271595584e-05, + "loss": 1.5903, + "step": 84162 + }, + { + "epoch": 1.01, + "grad_norm": 2.4105902993863197, + "learning_rate": 1.0290490340583713e-05, + "loss": 1.3415, + "step": 84165 + }, + { + "epoch": 1.01, + "grad_norm": 14.62039617510096, + "learning_rate": 1.02899064085805e-05, + "loss": 0.9767, + "step": 84168 + }, + { + "epoch": 1.01, + "grad_norm": 10.608999894897607, + "learning_rate": 1.0289322475587944e-05, + "loss": 1.3789, + "step": 84171 + }, + { + "epoch": 1.01, + "grad_norm": 12.80279315813125, + "learning_rate": 1.0288738541608033e-05, + "loss": 1.3069, + "step": 84174 + }, + { + "epoch": 1.01, + "grad_norm": 6.043883076133766, + "learning_rate": 1.0288154606642759e-05, + "loss": 1.2441, + "step": 84177 + }, + { + "epoch": 1.01, + "grad_norm": 11.331543170277753, + "learning_rate": 1.028757067069412e-05, + "loss": 1.2059, + "step": 84180 + }, + { + "epoch": 1.01, + "grad_norm": 12.923781975716942, + "learning_rate": 1.0286986733764103e-05, + "loss": 1.2545, + "step": 84183 + }, + { + "epoch": 1.01, + "grad_norm": 10.005432498848801, + "learning_rate": 1.028640279585471e-05, + "loss": 1.232, + "step": 84186 + }, + { + "epoch": 1.01, + "grad_norm": 27.75736502513153, + "learning_rate": 1.0285818856967922e-05, + "loss": 1.2464, + "step": 84189 + }, + { + "epoch": 1.01, + "grad_norm": 8.477781008793988, + "learning_rate": 1.0285234917105739e-05, + "loss": 1.1314, + "step": 84192 + }, + { + "epoch": 1.01, + "grad_norm": 31.70472073639876, + "learning_rate": 1.028465097627015e-05, + "loss": 1.4439, + "step": 84195 + }, + { + "epoch": 1.01, + "grad_norm": 17.08566974114935, + "learning_rate": 1.0284067034463155e-05, + "loss": 1.2548, + "step": 84198 + }, + { + "epoch": 1.01, + "grad_norm": 2.413216139664266, + "learning_rate": 1.0283483091686735e-05, + "loss": 1.4118, + "step": 84201 + }, + { + "epoch": 1.01, + "grad_norm": 4.155801535553856, + "learning_rate": 1.0282899147942891e-05, + "loss": 1.0947, + "step": 84204 + }, + { + "epoch": 1.01, + "grad_norm": 22.746595259156038, + "learning_rate": 1.0282315203233617e-05, + "loss": 1.1865, + "step": 84207 + }, + { + "epoch": 1.01, + "grad_norm": 57.500181002068146, + "learning_rate": 1.0281731257560901e-05, + "loss": 1.1031, + "step": 84210 + }, + { + "epoch": 1.01, + "grad_norm": 11.270151829296442, + "learning_rate": 1.0281147310926737e-05, + "loss": 1.3328, + "step": 84213 + }, + { + "epoch": 1.01, + "grad_norm": 13.704744366697156, + "learning_rate": 1.0280563363333117e-05, + "loss": 1.0775, + "step": 84216 + }, + { + "epoch": 1.01, + "grad_norm": 4.850696269138052, + "learning_rate": 1.0279979414782042e-05, + "loss": 1.1532, + "step": 84219 + }, + { + "epoch": 1.01, + "grad_norm": 2.2400897127859714, + "learning_rate": 1.0279395465275493e-05, + "loss": 1.2976, + "step": 84222 + }, + { + "epoch": 1.01, + "grad_norm": 4.870236621966525, + "learning_rate": 1.0278811514815467e-05, + "loss": 0.9931, + "step": 84225 + }, + { + "epoch": 1.01, + "grad_norm": 16.506469717818753, + "learning_rate": 1.027822756340396e-05, + "loss": 1.2157, + "step": 84228 + }, + { + "epoch": 1.01, + "grad_norm": 19.08339329303319, + "learning_rate": 1.027764361104296e-05, + "loss": 1.2384, + "step": 84231 + }, + { + "epoch": 1.01, + "grad_norm": 25.89694532093884, + "learning_rate": 1.0277059657734465e-05, + "loss": 1.2381, + "step": 84234 + }, + { + "epoch": 1.01, + "grad_norm": 4.809074366301664, + "learning_rate": 1.0276475703480464e-05, + "loss": 1.2703, + "step": 84237 + }, + { + "epoch": 1.01, + "grad_norm": 6.158847207715076, + "learning_rate": 1.0275891748282954e-05, + "loss": 1.394, + "step": 84240 + }, + { + "epoch": 1.01, + "grad_norm": 13.399737400478315, + "learning_rate": 1.027530779214392e-05, + "loss": 1.43, + "step": 84243 + }, + { + "epoch": 1.01, + "grad_norm": 13.715203294797389, + "learning_rate": 1.0274723835065364e-05, + "loss": 1.0547, + "step": 84246 + }, + { + "epoch": 1.01, + "grad_norm": 17.12565600100045, + "learning_rate": 1.0274139877049271e-05, + "loss": 1.1921, + "step": 84249 + }, + { + "epoch": 1.01, + "grad_norm": 12.814391685126225, + "learning_rate": 1.0273555918097639e-05, + "loss": 1.2453, + "step": 84252 + }, + { + "epoch": 1.01, + "grad_norm": 26.603208994285414, + "learning_rate": 1.027297195821246e-05, + "loss": 1.1095, + "step": 84255 + }, + { + "epoch": 1.01, + "grad_norm": 8.907077859898408, + "learning_rate": 1.0272387997395723e-05, + "loss": 1.2897, + "step": 84258 + }, + { + "epoch": 1.01, + "grad_norm": 2.7898768734116555, + "learning_rate": 1.027180403564943e-05, + "loss": 1.2269, + "step": 84261 + }, + { + "epoch": 1.01, + "grad_norm": 5.08229164519671, + "learning_rate": 1.027122007297556e-05, + "loss": 1.1194, + "step": 84264 + }, + { + "epoch": 1.01, + "grad_norm": 8.288973742843313, + "learning_rate": 1.0270636109376117e-05, + "loss": 1.1178, + "step": 84267 + }, + { + "epoch": 1.01, + "grad_norm": 13.921801085786369, + "learning_rate": 1.0270052144853089e-05, + "loss": 1.2872, + "step": 84270 + }, + { + "epoch": 1.01, + "grad_norm": 30.340349852171972, + "learning_rate": 1.0269468179408474e-05, + "loss": 1.3721, + "step": 84273 + }, + { + "epoch": 1.01, + "grad_norm": 24.10623146060855, + "learning_rate": 1.0268884213044259e-05, + "loss": 1.1896, + "step": 84276 + }, + { + "epoch": 1.01, + "grad_norm": 110.8944896663593, + "learning_rate": 1.0268300245762438e-05, + "loss": 0.8935, + "step": 84279 + }, + { + "epoch": 1.01, + "grad_norm": 6.40102086470596, + "learning_rate": 1.0267716277565008e-05, + "loss": 1.3202, + "step": 84282 + }, + { + "epoch": 1.01, + "grad_norm": 14.062746611881488, + "learning_rate": 1.0267132308453956e-05, + "loss": 1.5734, + "step": 84285 + }, + { + "epoch": 1.01, + "grad_norm": 4.855054363176399, + "learning_rate": 1.026654833843128e-05, + "loss": 1.0, + "step": 84288 + }, + { + "epoch": 1.01, + "grad_norm": 9.251615980623255, + "learning_rate": 1.0265964367498966e-05, + "loss": 0.9298, + "step": 84291 + }, + { + "epoch": 1.01, + "grad_norm": 9.643267705419849, + "learning_rate": 1.0265380395659017e-05, + "loss": 1.2821, + "step": 84294 + }, + { + "epoch": 1.01, + "grad_norm": 13.594316476720252, + "learning_rate": 1.0264796422913416e-05, + "loss": 1.333, + "step": 84297 + }, + { + "epoch": 1.01, + "grad_norm": 3.827883756551065, + "learning_rate": 1.0264212449264161e-05, + "loss": 1.4846, + "step": 84300 + }, + { + "epoch": 1.01, + "grad_norm": 16.248232937172375, + "learning_rate": 1.0263628474713248e-05, + "loss": 0.8073, + "step": 84303 + }, + { + "epoch": 1.01, + "grad_norm": 6.053603864240969, + "learning_rate": 1.0263044499262663e-05, + "loss": 1.2111, + "step": 84306 + }, + { + "epoch": 1.01, + "grad_norm": 8.805298084480018, + "learning_rate": 1.0262460522914398e-05, + "loss": 1.4857, + "step": 84309 + }, + { + "epoch": 1.01, + "grad_norm": 3.248543361183157, + "learning_rate": 1.0261876545670453e-05, + "loss": 1.3079, + "step": 84312 + }, + { + "epoch": 1.01, + "grad_norm": 3.333677023690389, + "learning_rate": 1.0261292567532821e-05, + "loss": 1.4625, + "step": 84315 + }, + { + "epoch": 1.01, + "grad_norm": 3.1790086053368936, + "learning_rate": 1.0260708588503487e-05, + "loss": 1.3523, + "step": 84318 + }, + { + "epoch": 1.01, + "grad_norm": 9.792063589536296, + "learning_rate": 1.0260124608584452e-05, + "loss": 1.2934, + "step": 84321 + }, + { + "epoch": 1.01, + "grad_norm": 5.740703566053078, + "learning_rate": 1.0259540627777705e-05, + "loss": 1.3866, + "step": 84324 + }, + { + "epoch": 1.01, + "grad_norm": 5.7570004482572505, + "learning_rate": 1.0258956646085239e-05, + "loss": 1.2279, + "step": 84327 + }, + { + "epoch": 1.01, + "grad_norm": 64.56098629651433, + "learning_rate": 1.0258372663509044e-05, + "loss": 1.0997, + "step": 84330 + }, + { + "epoch": 1.01, + "grad_norm": 7.047993580661065, + "learning_rate": 1.025778868005112e-05, + "loss": 1.3045, + "step": 84333 + }, + { + "epoch": 1.01, + "grad_norm": 13.78733892000758, + "learning_rate": 1.0257204695713454e-05, + "loss": 1.5196, + "step": 84336 + }, + { + "epoch": 1.01, + "grad_norm": 13.378114344777849, + "learning_rate": 1.025662071049804e-05, + "loss": 1.2371, + "step": 84339 + }, + { + "epoch": 1.01, + "grad_norm": 18.125528665687394, + "learning_rate": 1.0256036724406874e-05, + "loss": 1.3064, + "step": 84342 + }, + { + "epoch": 1.01, + "grad_norm": 2.425629151518693, + "learning_rate": 1.0255452737441947e-05, + "loss": 0.9508, + "step": 84345 + }, + { + "epoch": 1.01, + "grad_norm": 44.30722441547674, + "learning_rate": 1.025486874960525e-05, + "loss": 1.2906, + "step": 84348 + }, + { + "epoch": 1.01, + "grad_norm": 7.526695206622946, + "learning_rate": 1.0254284760898779e-05, + "loss": 1.4899, + "step": 84351 + }, + { + "epoch": 1.01, + "grad_norm": 3.3708827698787034, + "learning_rate": 1.0253700771324526e-05, + "loss": 1.4679, + "step": 84354 + }, + { + "epoch": 1.01, + "grad_norm": 6.03427099165627, + "learning_rate": 1.0253116780884484e-05, + "loss": 0.9662, + "step": 84357 + }, + { + "epoch": 1.01, + "grad_norm": 15.775438753321147, + "learning_rate": 1.0252532789580647e-05, + "loss": 1.4252, + "step": 84360 + }, + { + "epoch": 1.01, + "grad_norm": 10.753438541788768, + "learning_rate": 1.0251948797415001e-05, + "loss": 1.5423, + "step": 84363 + }, + { + "epoch": 1.01, + "grad_norm": 5.888630501170888, + "learning_rate": 1.025136480438955e-05, + "loss": 1.2051, + "step": 84366 + }, + { + "epoch": 1.01, + "grad_norm": 11.074098208667955, + "learning_rate": 1.025078081050628e-05, + "loss": 1.4954, + "step": 84369 + }, + { + "epoch": 1.01, + "grad_norm": 18.46556339649463, + "learning_rate": 1.0250196815767184e-05, + "loss": 1.2148, + "step": 84372 + }, + { + "epoch": 1.01, + "grad_norm": 6.666998116323423, + "learning_rate": 1.0249612820174258e-05, + "loss": 1.1742, + "step": 84375 + }, + { + "epoch": 1.01, + "grad_norm": 6.674894938188308, + "learning_rate": 1.0249028823729492e-05, + "loss": 1.3134, + "step": 84378 + }, + { + "epoch": 1.01, + "grad_norm": 15.690030645038528, + "learning_rate": 1.0248444826434882e-05, + "loss": 1.3833, + "step": 84381 + }, + { + "epoch": 1.01, + "grad_norm": 12.508819229249239, + "learning_rate": 1.0247860828292417e-05, + "loss": 1.4463, + "step": 84384 + }, + { + "epoch": 1.01, + "grad_norm": 12.83956197817154, + "learning_rate": 1.0247276829304094e-05, + "loss": 1.066, + "step": 84387 + }, + { + "epoch": 1.01, + "grad_norm": 11.136437886519401, + "learning_rate": 1.0246692829471906e-05, + "loss": 1.5734, + "step": 84390 + }, + { + "epoch": 1.01, + "grad_norm": 9.155149765103475, + "learning_rate": 1.0246108828797838e-05, + "loss": 1.087, + "step": 84393 + }, + { + "epoch": 1.01, + "grad_norm": 10.156867217637927, + "learning_rate": 1.0245524827283894e-05, + "loss": 1.5078, + "step": 84396 + }, + { + "epoch": 1.01, + "grad_norm": 7.440322487977511, + "learning_rate": 1.0244940824932062e-05, + "loss": 1.2034, + "step": 84399 + }, + { + "epoch": 1.01, + "grad_norm": 16.984100768065602, + "learning_rate": 1.0244356821744334e-05, + "loss": 1.3197, + "step": 84402 + }, + { + "epoch": 1.01, + "grad_norm": 4.11964958385666, + "learning_rate": 1.0243772817722704e-05, + "loss": 1.3489, + "step": 84405 + }, + { + "epoch": 1.01, + "grad_norm": 2.5893956487798953, + "learning_rate": 1.0243188812869167e-05, + "loss": 1.2433, + "step": 84408 + }, + { + "epoch": 1.02, + "grad_norm": 2.773498192289432, + "learning_rate": 1.0242604807185714e-05, + "loss": 1.0581, + "step": 84411 + }, + { + "epoch": 1.02, + "grad_norm": 13.857733302130274, + "learning_rate": 1.0242020800674337e-05, + "loss": 1.2867, + "step": 84414 + }, + { + "epoch": 1.02, + "grad_norm": 10.833832503612555, + "learning_rate": 1.024143679333703e-05, + "loss": 1.3661, + "step": 84417 + }, + { + "epoch": 1.02, + "grad_norm": 8.928299620131558, + "learning_rate": 1.0240852785175787e-05, + "loss": 1.1333, + "step": 84420 + }, + { + "epoch": 1.02, + "grad_norm": 11.211548075533383, + "learning_rate": 1.0240268776192602e-05, + "loss": 1.2095, + "step": 84423 + }, + { + "epoch": 1.02, + "grad_norm": 4.541737096559567, + "learning_rate": 1.0239684766389462e-05, + "loss": 1.2464, + "step": 84426 + }, + { + "epoch": 1.02, + "grad_norm": 20.589570058077147, + "learning_rate": 1.0239100755768366e-05, + "loss": 0.9217, + "step": 84429 + }, + { + "epoch": 1.02, + "grad_norm": 25.547776449754565, + "learning_rate": 1.0238516744331306e-05, + "loss": 1.2751, + "step": 84432 + }, + { + "epoch": 1.02, + "grad_norm": 16.76350249176966, + "learning_rate": 1.0237932732080276e-05, + "loss": 0.8739, + "step": 84435 + }, + { + "epoch": 1.02, + "grad_norm": 12.909529745802866, + "learning_rate": 1.0237348719017262e-05, + "loss": 1.2674, + "step": 84438 + }, + { + "epoch": 1.02, + "grad_norm": 13.910787789250024, + "learning_rate": 1.0236764705144266e-05, + "loss": 1.3228, + "step": 84441 + }, + { + "epoch": 1.02, + "grad_norm": 8.760087051340218, + "learning_rate": 1.0236180690463278e-05, + "loss": 1.1086, + "step": 84444 + }, + { + "epoch": 1.02, + "grad_norm": 4.731751888420956, + "learning_rate": 1.0235596674976286e-05, + "loss": 1.0983, + "step": 84447 + }, + { + "epoch": 1.02, + "grad_norm": 6.881685580203124, + "learning_rate": 1.0235012658685292e-05, + "loss": 0.9046, + "step": 84450 + }, + { + "epoch": 1.02, + "grad_norm": 24.889071332484093, + "learning_rate": 1.0234428641592285e-05, + "loss": 0.9597, + "step": 84453 + }, + { + "epoch": 1.02, + "grad_norm": 5.823167025612883, + "learning_rate": 1.0233844623699255e-05, + "loss": 1.3478, + "step": 84456 + }, + { + "epoch": 1.02, + "grad_norm": 3.8092733593216486, + "learning_rate": 1.0233260605008197e-05, + "loss": 1.672, + "step": 84459 + }, + { + "epoch": 1.02, + "grad_norm": 11.178859661270716, + "learning_rate": 1.0232676585521107e-05, + "loss": 1.2717, + "step": 84462 + }, + { + "epoch": 1.02, + "grad_norm": 18.050800533630586, + "learning_rate": 1.0232092565239974e-05, + "loss": 1.0514, + "step": 84465 + }, + { + "epoch": 1.02, + "grad_norm": 3.572595062451694, + "learning_rate": 1.0231508544166793e-05, + "loss": 1.0314, + "step": 84468 + }, + { + "epoch": 1.02, + "grad_norm": 14.505629804973909, + "learning_rate": 1.0230924522303556e-05, + "loss": 1.5872, + "step": 84471 + }, + { + "epoch": 1.02, + "grad_norm": 13.878511180854852, + "learning_rate": 1.0230340499652257e-05, + "loss": 1.2097, + "step": 84474 + }, + { + "epoch": 1.02, + "grad_norm": 3.5146906202217405, + "learning_rate": 1.0229756476214892e-05, + "loss": 0.8733, + "step": 84477 + }, + { + "epoch": 1.02, + "grad_norm": 10.921936505060968, + "learning_rate": 1.0229172451993446e-05, + "loss": 1.4376, + "step": 84480 + }, + { + "epoch": 1.02, + "grad_norm": 18.998785091309916, + "learning_rate": 1.0228588426989918e-05, + "loss": 1.2879, + "step": 84483 + }, + { + "epoch": 1.02, + "grad_norm": 2.988528324166531, + "learning_rate": 1.0228004401206302e-05, + "loss": 1.2108, + "step": 84486 + }, + { + "epoch": 1.02, + "grad_norm": 5.392519454581731, + "learning_rate": 1.022742037464459e-05, + "loss": 1.2843, + "step": 84489 + }, + { + "epoch": 1.02, + "grad_norm": 23.47033273051004, + "learning_rate": 1.022683634730677e-05, + "loss": 1.0593, + "step": 84492 + }, + { + "epoch": 1.02, + "grad_norm": 6.264236634511704, + "learning_rate": 1.0226252319194843e-05, + "loss": 1.1419, + "step": 84495 + }, + { + "epoch": 1.02, + "grad_norm": 34.8618377728834, + "learning_rate": 1.0225668290310797e-05, + "loss": 1.2152, + "step": 84498 + }, + { + "epoch": 1.02, + "grad_norm": 17.292330620933054, + "learning_rate": 1.0225084260656627e-05, + "loss": 1.4375, + "step": 84501 + }, + { + "epoch": 1.02, + "grad_norm": 7.150292519069851, + "learning_rate": 1.0224500230234324e-05, + "loss": 1.459, + "step": 84504 + }, + { + "epoch": 1.02, + "grad_norm": 18.66291244281606, + "learning_rate": 1.0223916199045884e-05, + "loss": 1.3686, + "step": 84507 + }, + { + "epoch": 1.02, + "grad_norm": 7.576391127076673, + "learning_rate": 1.0223332167093298e-05, + "loss": 1.2074, + "step": 84510 + }, + { + "epoch": 1.02, + "grad_norm": 13.261613745893047, + "learning_rate": 1.022274813437856e-05, + "loss": 1.2402, + "step": 84513 + }, + { + "epoch": 1.02, + "grad_norm": 11.557913119333566, + "learning_rate": 1.0222164100903662e-05, + "loss": 1.2797, + "step": 84516 + }, + { + "epoch": 1.02, + "grad_norm": 16.51604390139008, + "learning_rate": 1.02215800666706e-05, + "loss": 1.1701, + "step": 84519 + }, + { + "epoch": 1.02, + "grad_norm": 5.9932229720599715, + "learning_rate": 1.0220996031681365e-05, + "loss": 1.0978, + "step": 84522 + }, + { + "epoch": 1.02, + "grad_norm": 6.403599406181281, + "learning_rate": 1.0220411995937946e-05, + "loss": 1.024, + "step": 84525 + }, + { + "epoch": 1.02, + "grad_norm": 8.14286225722578, + "learning_rate": 1.0219827959442346e-05, + "loss": 1.357, + "step": 84528 + }, + { + "epoch": 1.02, + "grad_norm": 6.28286772555774, + "learning_rate": 1.0219243922196548e-05, + "loss": 1.3175, + "step": 84531 + }, + { + "epoch": 1.02, + "grad_norm": 25.17981253088603, + "learning_rate": 1.021865988420255e-05, + "loss": 1.2445, + "step": 84534 + }, + { + "epoch": 1.02, + "grad_norm": 18.848661170580296, + "learning_rate": 1.0218075845462348e-05, + "loss": 1.2276, + "step": 84537 + }, + { + "epoch": 1.02, + "grad_norm": 18.419197794436776, + "learning_rate": 1.0217491805977928e-05, + "loss": 1.2429, + "step": 84540 + }, + { + "epoch": 1.02, + "grad_norm": 41.52754116337086, + "learning_rate": 1.0216907765751289e-05, + "loss": 0.9688, + "step": 84543 + }, + { + "epoch": 1.02, + "grad_norm": 6.223851444958418, + "learning_rate": 1.0216323724784421e-05, + "loss": 1.0806, + "step": 84546 + }, + { + "epoch": 1.02, + "grad_norm": 31.695092367525852, + "learning_rate": 1.0215739683079318e-05, + "loss": 1.6777, + "step": 84549 + }, + { + "epoch": 1.02, + "grad_norm": 7.158198024937208, + "learning_rate": 1.0215155640637973e-05, + "loss": 1.5457, + "step": 84552 + }, + { + "epoch": 1.02, + "grad_norm": 17.209053565171907, + "learning_rate": 1.0214571597462376e-05, + "loss": 1.1689, + "step": 84555 + }, + { + "epoch": 1.02, + "grad_norm": 4.1868973125996405, + "learning_rate": 1.021398755355453e-05, + "loss": 0.8926, + "step": 84558 + }, + { + "epoch": 1.02, + "grad_norm": 4.451496448850095, + "learning_rate": 1.021340350891642e-05, + "loss": 1.3593, + "step": 84561 + }, + { + "epoch": 1.02, + "grad_norm": 10.691546181309286, + "learning_rate": 1.021281946355004e-05, + "loss": 1.1391, + "step": 84564 + }, + { + "epoch": 1.02, + "grad_norm": 6.651788402371478, + "learning_rate": 1.0212235417457379e-05, + "loss": 1.2905, + "step": 84567 + }, + { + "epoch": 1.02, + "grad_norm": 8.867758217485784, + "learning_rate": 1.021165137064044e-05, + "loss": 1.4539, + "step": 84570 + }, + { + "epoch": 1.02, + "grad_norm": 16.76235007656778, + "learning_rate": 1.0211067323101212e-05, + "loss": 1.535, + "step": 84573 + }, + { + "epoch": 1.02, + "grad_norm": 14.599783893016856, + "learning_rate": 1.0210483274841685e-05, + "loss": 1.3566, + "step": 84576 + }, + { + "epoch": 1.02, + "grad_norm": 15.393736081033532, + "learning_rate": 1.0209899225863854e-05, + "loss": 1.1837, + "step": 84579 + }, + { + "epoch": 1.02, + "grad_norm": 24.082743965715636, + "learning_rate": 1.0209315176169714e-05, + "loss": 1.5436, + "step": 84582 + }, + { + "epoch": 1.02, + "grad_norm": 26.572983394198623, + "learning_rate": 1.0208731125761256e-05, + "loss": 1.0067, + "step": 84585 + }, + { + "epoch": 1.02, + "grad_norm": 30.032992316128013, + "learning_rate": 1.0208147074640474e-05, + "loss": 1.3653, + "step": 84588 + }, + { + "epoch": 1.02, + "grad_norm": 24.77987631062018, + "learning_rate": 1.020756302280936e-05, + "loss": 1.533, + "step": 84591 + }, + { + "epoch": 1.02, + "grad_norm": 15.346943102215437, + "learning_rate": 1.020697897026991e-05, + "loss": 1.3387, + "step": 84594 + }, + { + "epoch": 1.02, + "grad_norm": 19.778694760967227, + "learning_rate": 1.0206394917024115e-05, + "loss": 1.1587, + "step": 84597 + }, + { + "epoch": 1.02, + "grad_norm": 10.48609105933022, + "learning_rate": 1.0205810863073966e-05, + "loss": 1.1813, + "step": 84600 + }, + { + "epoch": 1.02, + "grad_norm": 10.319189317741703, + "learning_rate": 1.0205226808421462e-05, + "loss": 1.0173, + "step": 84603 + }, + { + "epoch": 1.02, + "grad_norm": 9.383902655109319, + "learning_rate": 1.020464275306859e-05, + "loss": 1.2595, + "step": 84606 + }, + { + "epoch": 1.02, + "grad_norm": 17.76448344809569, + "learning_rate": 1.0204058697017347e-05, + "loss": 0.9494, + "step": 84609 + }, + { + "epoch": 1.02, + "grad_norm": 15.976155917266324, + "learning_rate": 1.0203474640269727e-05, + "loss": 1.1168, + "step": 84612 + }, + { + "epoch": 1.02, + "grad_norm": 10.58318077319906, + "learning_rate": 1.0202890582827717e-05, + "loss": 1.1242, + "step": 84615 + }, + { + "epoch": 1.02, + "grad_norm": 25.816229597130942, + "learning_rate": 1.020230652469332e-05, + "loss": 1.2411, + "step": 84618 + }, + { + "epoch": 1.02, + "grad_norm": 9.550541125600489, + "learning_rate": 1.020172246586852e-05, + "loss": 1.212, + "step": 84621 + }, + { + "epoch": 1.02, + "grad_norm": 18.58371145053266, + "learning_rate": 1.0201138406355317e-05, + "loss": 1.025, + "step": 84624 + }, + { + "epoch": 1.02, + "grad_norm": 23.029013983775748, + "learning_rate": 1.0200554346155697e-05, + "loss": 1.0167, + "step": 84627 + }, + { + "epoch": 1.02, + "grad_norm": 6.177047552562036, + "learning_rate": 1.019997028527166e-05, + "loss": 1.4351, + "step": 84630 + }, + { + "epoch": 1.02, + "grad_norm": 7.968040975988464, + "learning_rate": 1.0199386223705194e-05, + "loss": 1.2798, + "step": 84633 + }, + { + "epoch": 1.02, + "grad_norm": 12.11926661760149, + "learning_rate": 1.0198802161458298e-05, + "loss": 0.9615, + "step": 84636 + }, + { + "epoch": 1.02, + "grad_norm": 3.3601140825503144, + "learning_rate": 1.0198218098532962e-05, + "loss": 1.1519, + "step": 84639 + }, + { + "epoch": 1.02, + "grad_norm": 8.27011653829041, + "learning_rate": 1.0197634034931175e-05, + "loss": 1.247, + "step": 84642 + }, + { + "epoch": 1.02, + "grad_norm": 18.095377892759338, + "learning_rate": 1.0197049970654937e-05, + "loss": 1.3427, + "step": 84645 + }, + { + "epoch": 1.02, + "grad_norm": 10.969494615096318, + "learning_rate": 1.0196465905706239e-05, + "loss": 1.3416, + "step": 84648 + }, + { + "epoch": 1.02, + "grad_norm": 6.990965624574638, + "learning_rate": 1.0195881840087072e-05, + "loss": 1.1392, + "step": 84651 + }, + { + "epoch": 1.02, + "grad_norm": 53.270273853346374, + "learning_rate": 1.0195297773799431e-05, + "loss": 1.1976, + "step": 84654 + }, + { + "epoch": 1.02, + "grad_norm": 7.819384503122607, + "learning_rate": 1.0194713706845311e-05, + "loss": 1.3722, + "step": 84657 + }, + { + "epoch": 1.02, + "grad_norm": 2.99114449074628, + "learning_rate": 1.0194129639226699e-05, + "loss": 1.1473, + "step": 84660 + }, + { + "epoch": 1.02, + "grad_norm": 16.230997779653677, + "learning_rate": 1.0193545570945594e-05, + "loss": 1.3102, + "step": 84663 + }, + { + "epoch": 1.02, + "grad_norm": 9.965475675913181, + "learning_rate": 1.0192961502003994e-05, + "loss": 0.9986, + "step": 84666 + }, + { + "epoch": 1.02, + "grad_norm": 11.823223761483467, + "learning_rate": 1.0192377432403877e-05, + "loss": 1.2834, + "step": 84669 + }, + { + "epoch": 1.02, + "grad_norm": 4.036633889728718, + "learning_rate": 1.0191793362147248e-05, + "loss": 1.1878, + "step": 84672 + }, + { + "epoch": 1.02, + "grad_norm": 6.624328849591447, + "learning_rate": 1.0191209291236097e-05, + "loss": 1.1034, + "step": 84675 + }, + { + "epoch": 1.02, + "grad_norm": 24.468764059214443, + "learning_rate": 1.0190625219672423e-05, + "loss": 1.3195, + "step": 84678 + }, + { + "epoch": 1.02, + "grad_norm": 13.182725201897666, + "learning_rate": 1.0190041147458207e-05, + "loss": 1.339, + "step": 84681 + }, + { + "epoch": 1.02, + "grad_norm": 7.336597479863353, + "learning_rate": 1.0189457074595453e-05, + "loss": 1.4228, + "step": 84684 + }, + { + "epoch": 1.02, + "grad_norm": 15.114585696666687, + "learning_rate": 1.0188873001086146e-05, + "loss": 1.3728, + "step": 84687 + }, + { + "epoch": 1.02, + "grad_norm": 10.415092940321832, + "learning_rate": 1.0188288926932289e-05, + "loss": 1.0863, + "step": 84690 + }, + { + "epoch": 1.02, + "grad_norm": 14.528833407299556, + "learning_rate": 1.0187704852135867e-05, + "loss": 0.886, + "step": 84693 + }, + { + "epoch": 1.02, + "grad_norm": 12.262888710200906, + "learning_rate": 1.0187120776698873e-05, + "loss": 0.8055, + "step": 84696 + }, + { + "epoch": 1.02, + "grad_norm": 26.583708564202215, + "learning_rate": 1.0186536700623309e-05, + "loss": 1.3813, + "step": 84699 + }, + { + "epoch": 1.02, + "grad_norm": 16.206918034158715, + "learning_rate": 1.0185952623911158e-05, + "loss": 1.0582, + "step": 84702 + }, + { + "epoch": 1.02, + "grad_norm": 9.999150466138353, + "learning_rate": 1.018536854656442e-05, + "loss": 0.9309, + "step": 84705 + }, + { + "epoch": 1.02, + "grad_norm": 9.171573343129456, + "learning_rate": 1.0184784468585083e-05, + "loss": 1.3852, + "step": 84708 + }, + { + "epoch": 1.02, + "grad_norm": 5.3627535970495614, + "learning_rate": 1.018420038997515e-05, + "loss": 1.1967, + "step": 84711 + }, + { + "epoch": 1.02, + "grad_norm": 16.594565584352015, + "learning_rate": 1.0183616310736599e-05, + "loss": 1.2517, + "step": 84714 + }, + { + "epoch": 1.02, + "grad_norm": 11.634047342129788, + "learning_rate": 1.0183032230871435e-05, + "loss": 1.2421, + "step": 84717 + }, + { + "epoch": 1.02, + "grad_norm": 10.753755116733188, + "learning_rate": 1.0182448150381649e-05, + "loss": 1.4785, + "step": 84720 + }, + { + "epoch": 1.02, + "grad_norm": 9.763806249094209, + "learning_rate": 1.0181864069269231e-05, + "loss": 1.0672, + "step": 84723 + }, + { + "epoch": 1.02, + "grad_norm": 50.99734938063786, + "learning_rate": 1.0181279987536182e-05, + "loss": 1.3298, + "step": 84726 + }, + { + "epoch": 1.02, + "grad_norm": 7.599922773535082, + "learning_rate": 1.0180695905184482e-05, + "loss": 1.1748, + "step": 84729 + }, + { + "epoch": 1.02, + "grad_norm": 6.210853793476033, + "learning_rate": 1.0180111822216138e-05, + "loss": 1.0592, + "step": 84732 + }, + { + "epoch": 1.02, + "grad_norm": 10.59585050566672, + "learning_rate": 1.0179527738633135e-05, + "loss": 1.0488, + "step": 84735 + }, + { + "epoch": 1.02, + "grad_norm": 9.433887279838219, + "learning_rate": 1.0178943654437468e-05, + "loss": 1.0197, + "step": 84738 + }, + { + "epoch": 1.02, + "grad_norm": 11.151165132769362, + "learning_rate": 1.0178359569631131e-05, + "loss": 1.5182, + "step": 84741 + }, + { + "epoch": 1.02, + "grad_norm": 19.634789132228953, + "learning_rate": 1.0177775484216114e-05, + "loss": 1.1078, + "step": 84744 + }, + { + "epoch": 1.02, + "grad_norm": 5.727410146037258, + "learning_rate": 1.0177191398194419e-05, + "loss": 1.2705, + "step": 84747 + }, + { + "epoch": 1.02, + "grad_norm": 2.3572186057069473, + "learning_rate": 1.017660731156803e-05, + "loss": 1.1, + "step": 84750 + }, + { + "epoch": 1.02, + "grad_norm": 7.7107873377267655, + "learning_rate": 1.0176023224338946e-05, + "loss": 1.0084, + "step": 84753 + }, + { + "epoch": 1.02, + "grad_norm": 6.1853688915030105, + "learning_rate": 1.0175439136509155e-05, + "loss": 1.1932, + "step": 84756 + }, + { + "epoch": 1.02, + "grad_norm": 6.07347765577976, + "learning_rate": 1.0174855048080656e-05, + "loss": 1.3409, + "step": 84759 + }, + { + "epoch": 1.02, + "grad_norm": 46.03402589433869, + "learning_rate": 1.0174270959055441e-05, + "loss": 1.2555, + "step": 84762 + }, + { + "epoch": 1.02, + "grad_norm": 8.465394219542889, + "learning_rate": 1.0173686869435502e-05, + "loss": 1.0311, + "step": 84765 + }, + { + "epoch": 1.02, + "grad_norm": 4.635728111463941, + "learning_rate": 1.0173102779222828e-05, + "loss": 1.3138, + "step": 84768 + }, + { + "epoch": 1.02, + "grad_norm": 2.7241470063376254, + "learning_rate": 1.0172518688419416e-05, + "loss": 1.3855, + "step": 84771 + }, + { + "epoch": 1.02, + "grad_norm": 2.8503243066623223, + "learning_rate": 1.0171934597027266e-05, + "loss": 1.3929, + "step": 84774 + }, + { + "epoch": 1.02, + "grad_norm": 16.377053622441032, + "learning_rate": 1.017135050504836e-05, + "loss": 1.1537, + "step": 84777 + }, + { + "epoch": 1.02, + "grad_norm": 11.886344755366657, + "learning_rate": 1.0170766412484701e-05, + "loss": 1.4938, + "step": 84780 + }, + { + "epoch": 1.02, + "grad_norm": 2.709783406159376, + "learning_rate": 1.0170182319338276e-05, + "loss": 0.9655, + "step": 84783 + }, + { + "epoch": 1.02, + "grad_norm": 8.698927009519217, + "learning_rate": 1.0169598225611079e-05, + "loss": 1.3629, + "step": 84786 + }, + { + "epoch": 1.02, + "grad_norm": 5.24649542380088, + "learning_rate": 1.0169014131305101e-05, + "loss": 1.5302, + "step": 84789 + }, + { + "epoch": 1.02, + "grad_norm": 8.321884153869567, + "learning_rate": 1.0168430036422343e-05, + "loss": 1.15, + "step": 84792 + }, + { + "epoch": 1.02, + "grad_norm": 12.27063132749526, + "learning_rate": 1.0167845940964796e-05, + "loss": 1.1142, + "step": 84795 + }, + { + "epoch": 1.02, + "grad_norm": 8.210022519265708, + "learning_rate": 1.0167261844934447e-05, + "loss": 1.248, + "step": 84798 + }, + { + "epoch": 1.02, + "grad_norm": 7.661248026681783, + "learning_rate": 1.0166677748333293e-05, + "loss": 1.4523, + "step": 84801 + }, + { + "epoch": 1.02, + "grad_norm": 20.739991441058702, + "learning_rate": 1.016609365116333e-05, + "loss": 1.1998, + "step": 84804 + }, + { + "epoch": 1.02, + "grad_norm": 5.34241750179672, + "learning_rate": 1.0165509553426551e-05, + "loss": 1.3332, + "step": 84807 + }, + { + "epoch": 1.02, + "grad_norm": 19.58165019089616, + "learning_rate": 1.0164925455124943e-05, + "loss": 1.1933, + "step": 84810 + }, + { + "epoch": 1.02, + "grad_norm": 19.139836331876687, + "learning_rate": 1.0164341356260508e-05, + "loss": 1.2544, + "step": 84813 + }, + { + "epoch": 1.02, + "grad_norm": 2.677002320685511, + "learning_rate": 1.0163757256835235e-05, + "loss": 1.3116, + "step": 84816 + }, + { + "epoch": 1.02, + "grad_norm": 15.76717302084235, + "learning_rate": 1.0163173156851116e-05, + "loss": 1.1843, + "step": 84819 + }, + { + "epoch": 1.02, + "grad_norm": 14.087711947544838, + "learning_rate": 1.0162589056310144e-05, + "loss": 1.1098, + "step": 84822 + }, + { + "epoch": 1.02, + "grad_norm": 5.337464149975048, + "learning_rate": 1.0162004955214318e-05, + "loss": 1.2624, + "step": 84825 + }, + { + "epoch": 1.02, + "grad_norm": 6.266647194434942, + "learning_rate": 1.0161420853565625e-05, + "loss": 1.3458, + "step": 84828 + }, + { + "epoch": 1.02, + "grad_norm": 3.075526178219873, + "learning_rate": 1.0160836751366059e-05, + "loss": 0.9245, + "step": 84831 + }, + { + "epoch": 1.02, + "grad_norm": 7.865036362945836, + "learning_rate": 1.0160252648617616e-05, + "loss": 1.0625, + "step": 84834 + }, + { + "epoch": 1.02, + "grad_norm": 76.25604723751061, + "learning_rate": 1.0159668545322293e-05, + "loss": 1.0265, + "step": 84837 + }, + { + "epoch": 1.02, + "grad_norm": 3.574452963924273, + "learning_rate": 1.0159084441482075e-05, + "loss": 1.1267, + "step": 84840 + }, + { + "epoch": 1.02, + "grad_norm": 8.673921732972723, + "learning_rate": 1.0158500337098958e-05, + "loss": 1.261, + "step": 84843 + }, + { + "epoch": 1.02, + "grad_norm": 72.93778827252743, + "learning_rate": 1.0157916232174938e-05, + "loss": 1.355, + "step": 84846 + }, + { + "epoch": 1.02, + "grad_norm": 7.007278742094071, + "learning_rate": 1.0157332126712007e-05, + "loss": 1.5036, + "step": 84849 + }, + { + "epoch": 1.02, + "grad_norm": 5.921252077093303, + "learning_rate": 1.0156748020712158e-05, + "loss": 1.2555, + "step": 84852 + }, + { + "epoch": 1.02, + "grad_norm": 12.269480284924391, + "learning_rate": 1.0156163914177384e-05, + "loss": 0.9201, + "step": 84855 + }, + { + "epoch": 1.02, + "grad_norm": 8.313179767330926, + "learning_rate": 1.015557980710968e-05, + "loss": 1.2296, + "step": 84858 + }, + { + "epoch": 1.02, + "grad_norm": 4.674711108271382, + "learning_rate": 1.0154995699511036e-05, + "loss": 0.9032, + "step": 84861 + }, + { + "epoch": 1.02, + "grad_norm": 6.635592142536199, + "learning_rate": 1.015441159138345e-05, + "loss": 1.2353, + "step": 84864 + }, + { + "epoch": 1.02, + "grad_norm": 14.029535783056797, + "learning_rate": 1.0153827482728911e-05, + "loss": 1.289, + "step": 84867 + }, + { + "epoch": 1.02, + "grad_norm": 17.03085703095606, + "learning_rate": 1.0153243373549415e-05, + "loss": 1.3922, + "step": 84870 + }, + { + "epoch": 1.02, + "grad_norm": 6.977592659137581, + "learning_rate": 1.0152659263846956e-05, + "loss": 1.2409, + "step": 84873 + }, + { + "epoch": 1.02, + "grad_norm": 6.548909445193196, + "learning_rate": 1.0152075153623522e-05, + "loss": 0.7599, + "step": 84876 + }, + { + "epoch": 1.02, + "grad_norm": 19.96798616469941, + "learning_rate": 1.0151491042881114e-05, + "loss": 1.6216, + "step": 84879 + }, + { + "epoch": 1.02, + "grad_norm": 3.1371717568279074, + "learning_rate": 1.015090693162172e-05, + "loss": 1.32, + "step": 84882 + }, + { + "epoch": 1.02, + "grad_norm": 9.812486133379378, + "learning_rate": 1.0150322819847334e-05, + "loss": 1.0935, + "step": 84885 + }, + { + "epoch": 1.02, + "grad_norm": 12.72842012669202, + "learning_rate": 1.0149738707559953e-05, + "loss": 1.4494, + "step": 84888 + }, + { + "epoch": 1.02, + "grad_norm": 21.84302805847071, + "learning_rate": 1.0149154594761565e-05, + "loss": 1.3064, + "step": 84891 + }, + { + "epoch": 1.02, + "grad_norm": 12.030004480702699, + "learning_rate": 1.014857048145417e-05, + "loss": 1.4411, + "step": 84894 + }, + { + "epoch": 1.02, + "grad_norm": 13.74698121749161, + "learning_rate": 1.0147986367639753e-05, + "loss": 1.3377, + "step": 84897 + }, + { + "epoch": 1.02, + "grad_norm": 7.920399503489492, + "learning_rate": 1.0147402253320313e-05, + "loss": 1.3103, + "step": 84900 + }, + { + "epoch": 1.02, + "grad_norm": 6.9314104970507735, + "learning_rate": 1.0146818138497844e-05, + "loss": 1.1316, + "step": 84903 + }, + { + "epoch": 1.02, + "grad_norm": 9.098106348155078, + "learning_rate": 1.0146234023174334e-05, + "loss": 1.1777, + "step": 84906 + }, + { + "epoch": 1.02, + "grad_norm": 12.944802242735392, + "learning_rate": 1.0145649907351785e-05, + "loss": 0.9173, + "step": 84909 + }, + { + "epoch": 1.02, + "grad_norm": 10.15103459032793, + "learning_rate": 1.0145065791032182e-05, + "loss": 1.1143, + "step": 84912 + }, + { + "epoch": 1.02, + "grad_norm": 9.038329540589476, + "learning_rate": 1.0144481674217523e-05, + "loss": 1.2437, + "step": 84915 + }, + { + "epoch": 1.02, + "grad_norm": 8.32117880867014, + "learning_rate": 1.0143897556909799e-05, + "loss": 1.3676, + "step": 84918 + }, + { + "epoch": 1.02, + "grad_norm": 10.975295331710864, + "learning_rate": 1.0143313439111006e-05, + "loss": 0.9431, + "step": 84921 + }, + { + "epoch": 1.02, + "grad_norm": 9.973508893421485, + "learning_rate": 1.0142729320823136e-05, + "loss": 0.9466, + "step": 84924 + }, + { + "epoch": 1.02, + "grad_norm": 6.289039366324501, + "learning_rate": 1.0142145202048182e-05, + "loss": 1.3489, + "step": 84927 + }, + { + "epoch": 1.02, + "grad_norm": 6.585609538304382, + "learning_rate": 1.0141561082788134e-05, + "loss": 1.1992, + "step": 84930 + }, + { + "epoch": 1.02, + "grad_norm": 6.656486449788701, + "learning_rate": 1.0140976963044993e-05, + "loss": 1.1128, + "step": 84933 + }, + { + "epoch": 1.02, + "grad_norm": 14.59577183525722, + "learning_rate": 1.0140392842820747e-05, + "loss": 0.9736, + "step": 84936 + }, + { + "epoch": 1.02, + "grad_norm": 9.97778603059969, + "learning_rate": 1.013980872211739e-05, + "loss": 1.1118, + "step": 84939 + }, + { + "epoch": 1.02, + "grad_norm": 7.010664750411222, + "learning_rate": 1.0139224600936916e-05, + "loss": 1.1182, + "step": 84942 + }, + { + "epoch": 1.02, + "grad_norm": 15.356876215195827, + "learning_rate": 1.013864047928132e-05, + "loss": 1.3594, + "step": 84945 + }, + { + "epoch": 1.02, + "grad_norm": 15.472273722010357, + "learning_rate": 1.0138056357152598e-05, + "loss": 1.2795, + "step": 84948 + }, + { + "epoch": 1.02, + "grad_norm": 4.352451454734664, + "learning_rate": 1.0137472234552733e-05, + "loss": 1.0681, + "step": 84951 + }, + { + "epoch": 1.02, + "grad_norm": 6.591614696957857, + "learning_rate": 1.0136888111483727e-05, + "loss": 1.25, + "step": 84954 + }, + { + "epoch": 1.02, + "grad_norm": 5.05638270249918, + "learning_rate": 1.013630398794757e-05, + "loss": 1.2168, + "step": 84957 + }, + { + "epoch": 1.02, + "grad_norm": 13.496526336229275, + "learning_rate": 1.0135719863946256e-05, + "loss": 0.926, + "step": 84960 + }, + { + "epoch": 1.02, + "grad_norm": 20.34950613223148, + "learning_rate": 1.0135135739481782e-05, + "loss": 1.1914, + "step": 84963 + }, + { + "epoch": 1.02, + "grad_norm": 17.998694860598825, + "learning_rate": 1.0134551614556138e-05, + "loss": 1.1751, + "step": 84966 + }, + { + "epoch": 1.02, + "grad_norm": 5.026442864986859, + "learning_rate": 1.0133967489171317e-05, + "loss": 1.2163, + "step": 84969 + }, + { + "epoch": 1.02, + "grad_norm": 5.722887807820825, + "learning_rate": 1.0133383363329313e-05, + "loss": 1.1145, + "step": 84972 + }, + { + "epoch": 1.02, + "grad_norm": 13.2169888360065, + "learning_rate": 1.013279923703212e-05, + "loss": 1.1978, + "step": 84975 + }, + { + "epoch": 1.02, + "grad_norm": 8.251175157272797, + "learning_rate": 1.0132215110281732e-05, + "loss": 1.2404, + "step": 84978 + }, + { + "epoch": 1.02, + "grad_norm": 15.16344078220077, + "learning_rate": 1.0131630983080139e-05, + "loss": 0.994, + "step": 84981 + }, + { + "epoch": 1.02, + "grad_norm": 18.147880120578392, + "learning_rate": 1.0131046855429337e-05, + "loss": 1.0614, + "step": 84984 + }, + { + "epoch": 1.02, + "grad_norm": 4.299845254207952, + "learning_rate": 1.013046272733132e-05, + "loss": 1.1605, + "step": 84987 + }, + { + "epoch": 1.02, + "grad_norm": 5.030107024615406, + "learning_rate": 1.0129878598788083e-05, + "loss": 0.9903, + "step": 84990 + }, + { + "epoch": 1.02, + "grad_norm": 22.434953335696132, + "learning_rate": 1.0129294469801613e-05, + "loss": 1.2162, + "step": 84993 + }, + { + "epoch": 1.02, + "grad_norm": 11.728898813606731, + "learning_rate": 1.012871034037391e-05, + "loss": 1.2317, + "step": 84996 + }, + { + "epoch": 1.02, + "grad_norm": 14.389003294204143, + "learning_rate": 1.0128126210506966e-05, + "loss": 0.977, + "step": 84999 + }, + { + "epoch": 1.02, + "grad_norm": 12.234480415643127, + "learning_rate": 1.0127542080202772e-05, + "loss": 1.2586, + "step": 85002 + }, + { + "epoch": 1.02, + "grad_norm": 5.306926907827116, + "learning_rate": 1.012695794946332e-05, + "loss": 1.1582, + "step": 85005 + }, + { + "epoch": 1.02, + "grad_norm": 60.55204876932592, + "learning_rate": 1.0126373818290612e-05, + "loss": 0.9929, + "step": 85008 + }, + { + "epoch": 1.02, + "grad_norm": 12.913014622518695, + "learning_rate": 1.0125789686686633e-05, + "loss": 1.3549, + "step": 85011 + }, + { + "epoch": 1.02, + "grad_norm": 45.23649407402916, + "learning_rate": 1.0125205554653377e-05, + "loss": 1.463, + "step": 85014 + }, + { + "epoch": 1.02, + "grad_norm": 9.780797471390882, + "learning_rate": 1.0124621422192841e-05, + "loss": 1.2903, + "step": 85017 + }, + { + "epoch": 1.02, + "grad_norm": 8.178776857449579, + "learning_rate": 1.012403728930702e-05, + "loss": 1.3677, + "step": 85020 + }, + { + "epoch": 1.02, + "grad_norm": 25.524824611478323, + "learning_rate": 1.0123453155997902e-05, + "loss": 0.8947, + "step": 85023 + }, + { + "epoch": 1.02, + "grad_norm": 10.103716779983948, + "learning_rate": 1.0122869022267481e-05, + "loss": 1.0919, + "step": 85026 + }, + { + "epoch": 1.02, + "grad_norm": 15.208911315493658, + "learning_rate": 1.0122284888117754e-05, + "loss": 1.5243, + "step": 85029 + }, + { + "epoch": 1.02, + "grad_norm": 3.416309959532323, + "learning_rate": 1.0121700753550713e-05, + "loss": 1.3343, + "step": 85032 + }, + { + "epoch": 1.02, + "grad_norm": 13.03241158758391, + "learning_rate": 1.0121116618568352e-05, + "loss": 1.2577, + "step": 85035 + }, + { + "epoch": 1.02, + "grad_norm": 8.850786943238866, + "learning_rate": 1.0120532483172662e-05, + "loss": 1.0728, + "step": 85038 + }, + { + "epoch": 1.02, + "grad_norm": 3.3995933798553555, + "learning_rate": 1.0119948347365637e-05, + "loss": 0.97, + "step": 85041 + }, + { + "epoch": 1.02, + "grad_norm": 8.664216842965455, + "learning_rate": 1.0119364211149276e-05, + "loss": 1.3801, + "step": 85044 + }, + { + "epoch": 1.02, + "grad_norm": 4.365494576608179, + "learning_rate": 1.0118780074525563e-05, + "loss": 0.8604, + "step": 85047 + }, + { + "epoch": 1.02, + "grad_norm": 11.220880197179486, + "learning_rate": 1.0118195937496498e-05, + "loss": 1.1493, + "step": 85050 + }, + { + "epoch": 1.02, + "grad_norm": 16.566129585577436, + "learning_rate": 1.0117611800064075e-05, + "loss": 1.1012, + "step": 85053 + }, + { + "epoch": 1.02, + "grad_norm": 12.555169744840338, + "learning_rate": 1.0117027662230282e-05, + "loss": 1.2431, + "step": 85056 + }, + { + "epoch": 1.02, + "grad_norm": 6.894999862733121, + "learning_rate": 1.0116443523997117e-05, + "loss": 1.1513, + "step": 85059 + }, + { + "epoch": 1.02, + "grad_norm": 8.184531369068095, + "learning_rate": 1.0115859385366576e-05, + "loss": 1.0527, + "step": 85062 + }, + { + "epoch": 1.02, + "grad_norm": 3.9679588135527992, + "learning_rate": 1.0115275246340643e-05, + "loss": 1.6936, + "step": 85065 + }, + { + "epoch": 1.02, + "grad_norm": 9.50741542700174, + "learning_rate": 1.0114691106921317e-05, + "loss": 1.378, + "step": 85068 + }, + { + "epoch": 1.02, + "grad_norm": 8.073955391897613, + "learning_rate": 1.0114106967110594e-05, + "loss": 0.9577, + "step": 85071 + }, + { + "epoch": 1.02, + "grad_norm": 5.739182396397968, + "learning_rate": 1.0113522826910467e-05, + "loss": 1.2736, + "step": 85074 + }, + { + "epoch": 1.02, + "grad_norm": 6.068158841077927, + "learning_rate": 1.0112938686322926e-05, + "loss": 1.1397, + "step": 85077 + }, + { + "epoch": 1.02, + "grad_norm": 1.7543573987340624, + "learning_rate": 1.0112354545349963e-05, + "loss": 0.9966, + "step": 85080 + }, + { + "epoch": 1.02, + "grad_norm": 18.05979919005698, + "learning_rate": 1.0111770403993579e-05, + "loss": 1.0427, + "step": 85083 + }, + { + "epoch": 1.02, + "grad_norm": 11.964489672078711, + "learning_rate": 1.0111186262255758e-05, + "loss": 1.0472, + "step": 85086 + }, + { + "epoch": 1.02, + "grad_norm": 13.016231367412072, + "learning_rate": 1.0110602120138501e-05, + "loss": 1.3228, + "step": 85089 + }, + { + "epoch": 1.02, + "grad_norm": 13.134548123595014, + "learning_rate": 1.0110017977643798e-05, + "loss": 1.2669, + "step": 85092 + }, + { + "epoch": 1.02, + "grad_norm": 9.767320615355837, + "learning_rate": 1.0109433834773645e-05, + "loss": 0.9189, + "step": 85095 + }, + { + "epoch": 1.02, + "grad_norm": 22.51305105200722, + "learning_rate": 1.0108849691530034e-05, + "loss": 1.5887, + "step": 85098 + }, + { + "epoch": 1.02, + "grad_norm": 7.014557303679311, + "learning_rate": 1.0108265547914955e-05, + "loss": 1.3637, + "step": 85101 + }, + { + "epoch": 1.02, + "grad_norm": 12.110806856191939, + "learning_rate": 1.0107681403930407e-05, + "loss": 1.3262, + "step": 85104 + }, + { + "epoch": 1.02, + "grad_norm": 7.364708986295819, + "learning_rate": 1.0107097259578382e-05, + "loss": 0.9242, + "step": 85107 + }, + { + "epoch": 1.02, + "grad_norm": 9.17311274644233, + "learning_rate": 1.0106513114860872e-05, + "loss": 0.8941, + "step": 85110 + }, + { + "epoch": 1.02, + "grad_norm": 11.042024982736839, + "learning_rate": 1.0105928969779867e-05, + "loss": 1.3316, + "step": 85113 + }, + { + "epoch": 1.02, + "grad_norm": 16.336086471014294, + "learning_rate": 1.010534482433737e-05, + "loss": 1.221, + "step": 85116 + }, + { + "epoch": 1.02, + "grad_norm": 4.786860622481065, + "learning_rate": 1.0104760678535367e-05, + "loss": 1.0968, + "step": 85119 + }, + { + "epoch": 1.02, + "grad_norm": 7.3244493825652155, + "learning_rate": 1.0104176532375854e-05, + "loss": 1.2184, + "step": 85122 + }, + { + "epoch": 1.02, + "grad_norm": 2.397215518957126, + "learning_rate": 1.0103592385860823e-05, + "loss": 0.8302, + "step": 85125 + }, + { + "epoch": 1.02, + "grad_norm": 4.613789914484635, + "learning_rate": 1.0103008238992269e-05, + "loss": 1.259, + "step": 85128 + }, + { + "epoch": 1.02, + "grad_norm": 7.988295286664194, + "learning_rate": 1.0102424091772185e-05, + "loss": 1.1089, + "step": 85131 + }, + { + "epoch": 1.02, + "grad_norm": 12.716452332088304, + "learning_rate": 1.0101839944202565e-05, + "loss": 1.1513, + "step": 85134 + }, + { + "epoch": 1.02, + "grad_norm": 3.7248358864514386, + "learning_rate": 1.0101255796285402e-05, + "loss": 1.474, + "step": 85137 + }, + { + "epoch": 1.02, + "grad_norm": 12.25877876928373, + "learning_rate": 1.0100671648022689e-05, + "loss": 1.2572, + "step": 85140 + }, + { + "epoch": 1.02, + "grad_norm": 3.767038450516626, + "learning_rate": 1.010008749941642e-05, + "loss": 0.8786, + "step": 85143 + }, + { + "epoch": 1.02, + "grad_norm": 10.769380371877787, + "learning_rate": 1.0099503350468586e-05, + "loss": 1.1716, + "step": 85146 + }, + { + "epoch": 1.02, + "grad_norm": 4.021683582756964, + "learning_rate": 1.0098919201181188e-05, + "loss": 1.0913, + "step": 85149 + }, + { + "epoch": 1.02, + "grad_norm": 20.622039424862137, + "learning_rate": 1.0098335051556212e-05, + "loss": 1.1802, + "step": 85152 + }, + { + "epoch": 1.02, + "grad_norm": 13.96944496999393, + "learning_rate": 1.009775090159565e-05, + "loss": 0.8432, + "step": 85155 + }, + { + "epoch": 1.02, + "grad_norm": 33.44552114316424, + "learning_rate": 1.0097166751301506e-05, + "loss": 0.9688, + "step": 85158 + }, + { + "epoch": 1.02, + "grad_norm": 23.300141218533604, + "learning_rate": 1.0096582600675762e-05, + "loss": 1.1062, + "step": 85161 + }, + { + "epoch": 1.02, + "grad_norm": 11.483886819893787, + "learning_rate": 1.009599844972042e-05, + "loss": 1.2555, + "step": 85164 + }, + { + "epoch": 1.02, + "grad_norm": 7.593716169495026, + "learning_rate": 1.0095414298437464e-05, + "loss": 1.3025, + "step": 85167 + }, + { + "epoch": 1.02, + "grad_norm": 3.697773308639347, + "learning_rate": 1.0094830146828902e-05, + "loss": 1.4245, + "step": 85170 + }, + { + "epoch": 1.02, + "grad_norm": 6.787293788587154, + "learning_rate": 1.0094245994896713e-05, + "loss": 1.0654, + "step": 85173 + }, + { + "epoch": 1.02, + "grad_norm": 7.2767348373058605, + "learning_rate": 1.0093661842642897e-05, + "loss": 1.1351, + "step": 85176 + }, + { + "epoch": 1.02, + "grad_norm": 7.439724456223983, + "learning_rate": 1.0093077690069448e-05, + "loss": 1.3209, + "step": 85179 + }, + { + "epoch": 1.02, + "grad_norm": 16.140658694519818, + "learning_rate": 1.0092493537178359e-05, + "loss": 1.1476, + "step": 85182 + }, + { + "epoch": 1.02, + "grad_norm": 10.113428079894899, + "learning_rate": 1.0091909383971621e-05, + "loss": 1.4226, + "step": 85185 + }, + { + "epoch": 1.02, + "grad_norm": 10.785110925879632, + "learning_rate": 1.0091325230451229e-05, + "loss": 1.0726, + "step": 85188 + }, + { + "epoch": 1.02, + "grad_norm": 10.145978845076687, + "learning_rate": 1.0090741076619179e-05, + "loss": 1.5515, + "step": 85191 + }, + { + "epoch": 1.02, + "grad_norm": 9.327855864816103, + "learning_rate": 1.009015692247746e-05, + "loss": 1.1756, + "step": 85194 + }, + { + "epoch": 1.02, + "grad_norm": 26.815670777885387, + "learning_rate": 1.0089572768028071e-05, + "loss": 1.3422, + "step": 85197 + }, + { + "epoch": 1.02, + "grad_norm": 62.67439161213309, + "learning_rate": 1.0088988613273e-05, + "loss": 1.2024, + "step": 85200 + }, + { + "epoch": 1.02, + "grad_norm": 5.573264995935641, + "learning_rate": 1.0088404458214242e-05, + "loss": 1.4927, + "step": 85203 + }, + { + "epoch": 1.02, + "grad_norm": 6.348997313749252, + "learning_rate": 1.0087820302853794e-05, + "loss": 1.2959, + "step": 85206 + }, + { + "epoch": 1.02, + "grad_norm": 6.588695026370165, + "learning_rate": 1.0087236147193644e-05, + "loss": 1.6415, + "step": 85209 + }, + { + "epoch": 1.02, + "grad_norm": 7.278262748545764, + "learning_rate": 1.0086651991235794e-05, + "loss": 1.0631, + "step": 85212 + }, + { + "epoch": 1.02, + "grad_norm": 3.5957786531877365, + "learning_rate": 1.0086067834982228e-05, + "loss": 1.1144, + "step": 85215 + }, + { + "epoch": 1.02, + "grad_norm": 11.838191430163187, + "learning_rate": 1.0085483678434944e-05, + "loss": 1.0514, + "step": 85218 + }, + { + "epoch": 1.02, + "grad_norm": 6.566115151235129, + "learning_rate": 1.0084899521595934e-05, + "loss": 1.2996, + "step": 85221 + }, + { + "epoch": 1.02, + "grad_norm": 29.873013857979664, + "learning_rate": 1.0084315364467195e-05, + "loss": 1.4396, + "step": 85224 + }, + { + "epoch": 1.02, + "grad_norm": 6.692781597563336, + "learning_rate": 1.0083731207050715e-05, + "loss": 1.102, + "step": 85227 + }, + { + "epoch": 1.02, + "grad_norm": 6.957436987912215, + "learning_rate": 1.0083147049348489e-05, + "loss": 1.4261, + "step": 85230 + }, + { + "epoch": 1.02, + "grad_norm": 7.281183444847375, + "learning_rate": 1.008256289136252e-05, + "loss": 1.4225, + "step": 85233 + }, + { + "epoch": 1.02, + "grad_norm": 44.52477113968149, + "learning_rate": 1.0081978733094786e-05, + "loss": 1.1604, + "step": 85236 + }, + { + "epoch": 1.02, + "grad_norm": 34.03054280113851, + "learning_rate": 1.0081394574547291e-05, + "loss": 0.8388, + "step": 85239 + }, + { + "epoch": 1.03, + "grad_norm": 7.141656813701021, + "learning_rate": 1.0080810415722026e-05, + "loss": 1.0023, + "step": 85242 + }, + { + "epoch": 1.03, + "grad_norm": 14.768169139982213, + "learning_rate": 1.0080226256620984e-05, + "loss": 1.3568, + "step": 85245 + }, + { + "epoch": 1.03, + "grad_norm": 8.662436770629634, + "learning_rate": 1.0079642097246156e-05, + "loss": 1.3873, + "step": 85248 + }, + { + "epoch": 1.03, + "grad_norm": 51.89245103363658, + "learning_rate": 1.0079057937599542e-05, + "loss": 1.4529, + "step": 85251 + }, + { + "epoch": 1.03, + "grad_norm": 12.549734438181899, + "learning_rate": 1.007847377768313e-05, + "loss": 1.2376, + "step": 85254 + }, + { + "epoch": 1.03, + "grad_norm": 24.58201194205639, + "learning_rate": 1.0077889617498915e-05, + "loss": 1.3835, + "step": 85257 + }, + { + "epoch": 1.03, + "grad_norm": 11.126605625755749, + "learning_rate": 1.0077305457048891e-05, + "loss": 0.7298, + "step": 85260 + }, + { + "epoch": 1.03, + "grad_norm": 30.539978584302187, + "learning_rate": 1.0076721296335053e-05, + "loss": 1.1002, + "step": 85263 + }, + { + "epoch": 1.03, + "grad_norm": 12.229871665454388, + "learning_rate": 1.0076137135359392e-05, + "loss": 1.4504, + "step": 85266 + }, + { + "epoch": 1.03, + "grad_norm": 20.96717889593007, + "learning_rate": 1.00755529741239e-05, + "loss": 1.3367, + "step": 85269 + }, + { + "epoch": 1.03, + "grad_norm": 9.183830021166068, + "learning_rate": 1.0074968812630574e-05, + "loss": 1.0091, + "step": 85272 + }, + { + "epoch": 1.03, + "grad_norm": 7.976186378402363, + "learning_rate": 1.0074384650881409e-05, + "loss": 1.1191, + "step": 85275 + }, + { + "epoch": 1.03, + "grad_norm": 9.293483553196777, + "learning_rate": 1.0073800488878393e-05, + "loss": 1.4857, + "step": 85278 + }, + { + "epoch": 1.03, + "grad_norm": 19.17659313231674, + "learning_rate": 1.0073216326623524e-05, + "loss": 1.2896, + "step": 85281 + }, + { + "epoch": 1.03, + "grad_norm": 6.5725533931592395, + "learning_rate": 1.0072632164118791e-05, + "loss": 1.4764, + "step": 85284 + }, + { + "epoch": 1.03, + "grad_norm": 33.634635778663856, + "learning_rate": 1.0072048001366197e-05, + "loss": 1.0524, + "step": 85287 + }, + { + "epoch": 1.03, + "grad_norm": 19.801917111784878, + "learning_rate": 1.0071463838367723e-05, + "loss": 1.3824, + "step": 85290 + }, + { + "epoch": 1.03, + "grad_norm": 11.773887652669282, + "learning_rate": 1.007087967512537e-05, + "loss": 1.0849, + "step": 85293 + }, + { + "epoch": 1.03, + "grad_norm": 14.001679651975753, + "learning_rate": 1.0070295511641133e-05, + "loss": 1.1663, + "step": 85296 + }, + { + "epoch": 1.03, + "grad_norm": 22.681289587525495, + "learning_rate": 1.0069711347917002e-05, + "loss": 1.1142, + "step": 85299 + }, + { + "epoch": 1.03, + "grad_norm": 7.773769906001962, + "learning_rate": 1.0069127183954967e-05, + "loss": 1.3343, + "step": 85302 + }, + { + "epoch": 1.03, + "grad_norm": 42.100789551747205, + "learning_rate": 1.0068543019757031e-05, + "loss": 0.9774, + "step": 85305 + }, + { + "epoch": 1.03, + "grad_norm": 45.58991715561575, + "learning_rate": 1.0067958855325182e-05, + "loss": 1.4762, + "step": 85308 + }, + { + "epoch": 1.03, + "grad_norm": 81.0124705686933, + "learning_rate": 1.0067374690661409e-05, + "loss": 1.0016, + "step": 85311 + }, + { + "epoch": 1.03, + "grad_norm": 3.673917444759441, + "learning_rate": 1.0066790525767715e-05, + "loss": 1.3872, + "step": 85314 + }, + { + "epoch": 1.03, + "grad_norm": 6.3345961286722545, + "learning_rate": 1.0066206360646088e-05, + "loss": 1.0445, + "step": 85317 + }, + { + "epoch": 1.03, + "grad_norm": 6.460613196946547, + "learning_rate": 1.0065622195298524e-05, + "loss": 1.3376, + "step": 85320 + }, + { + "epoch": 1.03, + "grad_norm": 20.997547631364053, + "learning_rate": 1.0065038029727013e-05, + "loss": 1.3827, + "step": 85323 + }, + { + "epoch": 1.03, + "grad_norm": 16.228742823422156, + "learning_rate": 1.006445386393355e-05, + "loss": 1.5774, + "step": 85326 + }, + { + "epoch": 1.03, + "grad_norm": 5.172362315722681, + "learning_rate": 1.006386969792013e-05, + "loss": 0.8817, + "step": 85329 + }, + { + "epoch": 1.03, + "grad_norm": 17.71657717780651, + "learning_rate": 1.0063285531688747e-05, + "loss": 1.3725, + "step": 85332 + }, + { + "epoch": 1.03, + "grad_norm": 4.587454331260239, + "learning_rate": 1.0062701365241392e-05, + "loss": 1.2203, + "step": 85335 + }, + { + "epoch": 1.03, + "grad_norm": 17.99938347791738, + "learning_rate": 1.0062117198580059e-05, + "loss": 1.5144, + "step": 85338 + }, + { + "epoch": 1.03, + "grad_norm": 12.054893568996368, + "learning_rate": 1.0061533031706744e-05, + "loss": 1.1417, + "step": 85341 + }, + { + "epoch": 1.03, + "grad_norm": 4.74626501343382, + "learning_rate": 1.0060948864623436e-05, + "loss": 1.1573, + "step": 85344 + }, + { + "epoch": 1.03, + "grad_norm": 27.812027176639237, + "learning_rate": 1.0060364697332136e-05, + "loss": 1.2566, + "step": 85347 + }, + { + "epoch": 1.03, + "grad_norm": 16.605510931182092, + "learning_rate": 1.0059780529834831e-05, + "loss": 1.6252, + "step": 85350 + }, + { + "epoch": 1.03, + "grad_norm": 22.722351399505953, + "learning_rate": 1.0059196362133516e-05, + "loss": 1.0517, + "step": 85353 + }, + { + "epoch": 1.03, + "grad_norm": 7.606939867583954, + "learning_rate": 1.0058612194230186e-05, + "loss": 1.1775, + "step": 85356 + }, + { + "epoch": 1.03, + "grad_norm": 4.894296185903937, + "learning_rate": 1.0058028026126834e-05, + "loss": 1.5352, + "step": 85359 + }, + { + "epoch": 1.03, + "grad_norm": 6.609846832651612, + "learning_rate": 1.0057443857825454e-05, + "loss": 1.3533, + "step": 85362 + }, + { + "epoch": 1.03, + "grad_norm": 5.191132908909788, + "learning_rate": 1.0056859689328033e-05, + "loss": 1.3405, + "step": 85365 + }, + { + "epoch": 1.03, + "grad_norm": 20.353275066940583, + "learning_rate": 1.0056275520636576e-05, + "loss": 1.0181, + "step": 85368 + }, + { + "epoch": 1.03, + "grad_norm": 7.646642695000768, + "learning_rate": 1.0055691351753069e-05, + "loss": 0.9143, + "step": 85371 + }, + { + "epoch": 1.03, + "grad_norm": 11.471234245602023, + "learning_rate": 1.0055107182679512e-05, + "loss": 1.2236, + "step": 85374 + }, + { + "epoch": 1.03, + "grad_norm": 13.511677632524334, + "learning_rate": 1.0054523013417887e-05, + "loss": 1.3019, + "step": 85377 + }, + { + "epoch": 1.03, + "grad_norm": 5.430407174921219, + "learning_rate": 1.0053938843970199e-05, + "loss": 1.0538, + "step": 85380 + }, + { + "epoch": 1.03, + "grad_norm": 8.476032120485247, + "learning_rate": 1.0053354674338435e-05, + "loss": 1.6831, + "step": 85383 + }, + { + "epoch": 1.03, + "grad_norm": 8.118764984457334, + "learning_rate": 1.0052770504524592e-05, + "loss": 1.5606, + "step": 85386 + }, + { + "epoch": 1.03, + "grad_norm": 15.23428885473868, + "learning_rate": 1.005218633453066e-05, + "loss": 1.2644, + "step": 85389 + }, + { + "epoch": 1.03, + "grad_norm": 13.205993253249773, + "learning_rate": 1.0051602164358638e-05, + "loss": 1.1231, + "step": 85392 + }, + { + "epoch": 1.03, + "grad_norm": 9.98637544618989, + "learning_rate": 1.0051017994010517e-05, + "loss": 0.9969, + "step": 85395 + }, + { + "epoch": 1.03, + "grad_norm": 48.48436825138453, + "learning_rate": 1.0050433823488286e-05, + "loss": 1.2907, + "step": 85398 + }, + { + "epoch": 1.03, + "grad_norm": 9.274237935380333, + "learning_rate": 1.0049849652793944e-05, + "loss": 1.338, + "step": 85401 + }, + { + "epoch": 1.03, + "grad_norm": 7.827917404089423, + "learning_rate": 1.0049265481929485e-05, + "loss": 1.3138, + "step": 85404 + }, + { + "epoch": 1.03, + "grad_norm": 8.979263951108091, + "learning_rate": 1.00486813108969e-05, + "loss": 1.4106, + "step": 85407 + }, + { + "epoch": 1.03, + "grad_norm": 10.42327797139252, + "learning_rate": 1.004809713969818e-05, + "loss": 1.2373, + "step": 85410 + }, + { + "epoch": 1.03, + "grad_norm": 2.3037682364209298, + "learning_rate": 1.0047512968335324e-05, + "loss": 1.325, + "step": 85413 + }, + { + "epoch": 1.03, + "grad_norm": 3.6888772642846983, + "learning_rate": 1.0046928796810326e-05, + "loss": 1.0327, + "step": 85416 + }, + { + "epoch": 1.03, + "grad_norm": 31.75546230481384, + "learning_rate": 1.0046344625125173e-05, + "loss": 1.2502, + "step": 85419 + }, + { + "epoch": 1.03, + "grad_norm": 10.804142733606, + "learning_rate": 1.0045760453281865e-05, + "loss": 1.3591, + "step": 85422 + }, + { + "epoch": 1.03, + "grad_norm": 5.145726092729981, + "learning_rate": 1.0045176281282392e-05, + "loss": 1.1859, + "step": 85425 + }, + { + "epoch": 1.03, + "grad_norm": 7.390431562649442, + "learning_rate": 1.004459210912875e-05, + "loss": 1.3468, + "step": 85428 + }, + { + "epoch": 1.03, + "grad_norm": 22.28198016821026, + "learning_rate": 1.0044007936822929e-05, + "loss": 0.9465, + "step": 85431 + }, + { + "epoch": 1.03, + "grad_norm": 8.73737218975032, + "learning_rate": 1.0043423764366927e-05, + "loss": 1.1596, + "step": 85434 + }, + { + "epoch": 1.03, + "grad_norm": 6.599184241996177, + "learning_rate": 1.0042839591762731e-05, + "loss": 1.4567, + "step": 85437 + }, + { + "epoch": 1.03, + "grad_norm": 5.745125855277944, + "learning_rate": 1.0042255419012346e-05, + "loss": 1.1088, + "step": 85440 + }, + { + "epoch": 1.03, + "grad_norm": 8.70922031780551, + "learning_rate": 1.0041671246117752e-05, + "loss": 1.2188, + "step": 85443 + }, + { + "epoch": 1.03, + "grad_norm": 32.781151220345826, + "learning_rate": 1.0041087073080951e-05, + "loss": 0.8907, + "step": 85446 + }, + { + "epoch": 1.03, + "grad_norm": 6.329674967214983, + "learning_rate": 1.0040502899903936e-05, + "loss": 1.0297, + "step": 85449 + }, + { + "epoch": 1.03, + "grad_norm": 8.24856903172381, + "learning_rate": 1.0039918726588697e-05, + "loss": 1.3569, + "step": 85452 + }, + { + "epoch": 1.03, + "grad_norm": 10.157988882489663, + "learning_rate": 1.0039334553137233e-05, + "loss": 1.1289, + "step": 85455 + }, + { + "epoch": 1.03, + "grad_norm": 9.188570802572036, + "learning_rate": 1.0038750379551531e-05, + "loss": 1.6984, + "step": 85458 + }, + { + "epoch": 1.03, + "grad_norm": 5.479724333643483, + "learning_rate": 1.003816620583359e-05, + "loss": 1.4833, + "step": 85461 + }, + { + "epoch": 1.03, + "grad_norm": 7.716838454013847, + "learning_rate": 1.00375820319854e-05, + "loss": 0.9778, + "step": 85464 + }, + { + "epoch": 1.03, + "grad_norm": 3.898623202285963, + "learning_rate": 1.0036997858008957e-05, + "loss": 1.3261, + "step": 85467 + }, + { + "epoch": 1.03, + "grad_norm": 15.968686584702272, + "learning_rate": 1.0036413683906255e-05, + "loss": 1.2955, + "step": 85470 + }, + { + "epoch": 1.03, + "grad_norm": 6.512111833030704, + "learning_rate": 1.0035829509679283e-05, + "loss": 1.1595, + "step": 85473 + }, + { + "epoch": 1.03, + "grad_norm": 16.613522947178783, + "learning_rate": 1.0035245335330039e-05, + "loss": 1.5352, + "step": 85476 + }, + { + "epoch": 1.03, + "grad_norm": 9.846191992965613, + "learning_rate": 1.0034661160860517e-05, + "loss": 0.886, + "step": 85479 + }, + { + "epoch": 1.03, + "grad_norm": 10.736716832461097, + "learning_rate": 1.003407698627271e-05, + "loss": 1.1768, + "step": 85482 + }, + { + "epoch": 1.03, + "grad_norm": 16.160735281155446, + "learning_rate": 1.0033492811568606e-05, + "loss": 1.4631, + "step": 85485 + }, + { + "epoch": 1.03, + "grad_norm": 2.3928453174288244, + "learning_rate": 1.0032908636750205e-05, + "loss": 0.8445, + "step": 85488 + }, + { + "epoch": 1.03, + "grad_norm": 12.322467820931543, + "learning_rate": 1.0032324461819501e-05, + "loss": 1.1818, + "step": 85491 + }, + { + "epoch": 1.03, + "grad_norm": 5.615270169429205, + "learning_rate": 1.0031740286778482e-05, + "loss": 1.1299, + "step": 85494 + }, + { + "epoch": 1.03, + "grad_norm": 5.937100473717122, + "learning_rate": 1.0031156111629145e-05, + "loss": 1.2892, + "step": 85497 + }, + { + "epoch": 1.03, + "grad_norm": 48.95450250458415, + "learning_rate": 1.0030571936373486e-05, + "loss": 1.5176, + "step": 85500 + }, + { + "epoch": 1.03, + "grad_norm": 11.864890840472054, + "learning_rate": 1.0029987761013495e-05, + "loss": 1.1414, + "step": 85503 + }, + { + "epoch": 1.03, + "grad_norm": 11.981250218465215, + "learning_rate": 1.0029403585551165e-05, + "loss": 1.3, + "step": 85506 + }, + { + "epoch": 1.03, + "grad_norm": 15.811468011960061, + "learning_rate": 1.0028819409988493e-05, + "loss": 1.153, + "step": 85509 + }, + { + "epoch": 1.03, + "grad_norm": 11.316276274403211, + "learning_rate": 1.0028235234327471e-05, + "loss": 1.0332, + "step": 85512 + }, + { + "epoch": 1.03, + "grad_norm": 10.714104030518955, + "learning_rate": 1.0027651058570092e-05, + "loss": 1.1505, + "step": 85515 + }, + { + "epoch": 1.03, + "grad_norm": 3.7151411769065263, + "learning_rate": 1.0027066882718347e-05, + "loss": 1.0983, + "step": 85518 + }, + { + "epoch": 1.03, + "grad_norm": 18.230865410424794, + "learning_rate": 1.0026482706774234e-05, + "loss": 1.4611, + "step": 85521 + }, + { + "epoch": 1.03, + "grad_norm": 5.976885973900104, + "learning_rate": 1.0025898530739748e-05, + "loss": 1.4824, + "step": 85524 + }, + { + "epoch": 1.03, + "grad_norm": 19.315677818242648, + "learning_rate": 1.0025314354616874e-05, + "loss": 1.4274, + "step": 85527 + }, + { + "epoch": 1.03, + "grad_norm": 19.087155470505934, + "learning_rate": 1.0024730178407615e-05, + "loss": 1.09, + "step": 85530 + }, + { + "epoch": 1.03, + "grad_norm": 10.016119134019931, + "learning_rate": 1.0024146002113964e-05, + "loss": 1.2201, + "step": 85533 + }, + { + "epoch": 1.03, + "grad_norm": 5.846123703240417, + "learning_rate": 1.0023561825737907e-05, + "loss": 1.4246, + "step": 85536 + }, + { + "epoch": 1.03, + "grad_norm": 23.448635074785305, + "learning_rate": 1.0022977649281443e-05, + "loss": 1.1358, + "step": 85539 + }, + { + "epoch": 1.03, + "grad_norm": 18.8014980215574, + "learning_rate": 1.0022393472746564e-05, + "loss": 1.1814, + "step": 85542 + }, + { + "epoch": 1.03, + "grad_norm": 6.305699885194027, + "learning_rate": 1.0021809296135265e-05, + "loss": 1.2353, + "step": 85545 + }, + { + "epoch": 1.03, + "grad_norm": 9.2340877492867, + "learning_rate": 1.0021225119449538e-05, + "loss": 1.177, + "step": 85548 + }, + { + "epoch": 1.03, + "grad_norm": 14.400178447789804, + "learning_rate": 1.0020640942691378e-05, + "loss": 1.4996, + "step": 85551 + }, + { + "epoch": 1.03, + "grad_norm": 14.840363394539647, + "learning_rate": 1.0020056765862775e-05, + "loss": 1.3444, + "step": 85554 + }, + { + "epoch": 1.03, + "grad_norm": 7.204714486052466, + "learning_rate": 1.0019472588965732e-05, + "loss": 1.3417, + "step": 85557 + }, + { + "epoch": 1.03, + "grad_norm": 9.237056104090273, + "learning_rate": 1.0018888412002228e-05, + "loss": 1.489, + "step": 85560 + }, + { + "epoch": 1.03, + "grad_norm": 21.959777263304616, + "learning_rate": 1.001830423497427e-05, + "loss": 1.3936, + "step": 85563 + }, + { + "epoch": 1.03, + "grad_norm": 20.366025634991214, + "learning_rate": 1.0017720057883847e-05, + "loss": 1.0981, + "step": 85566 + }, + { + "epoch": 1.03, + "grad_norm": 4.982653973829294, + "learning_rate": 1.001713588073295e-05, + "loss": 1.1979, + "step": 85569 + }, + { + "epoch": 1.03, + "grad_norm": 5.050499828116948, + "learning_rate": 1.0016551703523571e-05, + "loss": 1.2129, + "step": 85572 + }, + { + "epoch": 1.03, + "grad_norm": 7.337298000741719, + "learning_rate": 1.0015967526257713e-05, + "loss": 1.3029, + "step": 85575 + }, + { + "epoch": 1.03, + "grad_norm": 21.78104780194273, + "learning_rate": 1.001538334893736e-05, + "loss": 1.289, + "step": 85578 + }, + { + "epoch": 1.03, + "grad_norm": 8.649296993938869, + "learning_rate": 1.001479917156451e-05, + "loss": 1.0464, + "step": 85581 + }, + { + "epoch": 1.03, + "grad_norm": 14.215047438385296, + "learning_rate": 1.0014214994141154e-05, + "loss": 1.057, + "step": 85584 + }, + { + "epoch": 1.03, + "grad_norm": 20.26361761828112, + "learning_rate": 1.001363081666929e-05, + "loss": 1.5232, + "step": 85587 + }, + { + "epoch": 1.03, + "grad_norm": 3.088797345740112, + "learning_rate": 1.0013046639150907e-05, + "loss": 0.8574, + "step": 85590 + }, + { + "epoch": 1.03, + "grad_norm": 6.902638485731304, + "learning_rate": 1.0012462461588001e-05, + "loss": 1.648, + "step": 85593 + }, + { + "epoch": 1.03, + "grad_norm": 3.723054996648584, + "learning_rate": 1.0011878283982567e-05, + "loss": 0.946, + "step": 85596 + }, + { + "epoch": 1.03, + "grad_norm": 5.044528723140593, + "learning_rate": 1.0011294106336595e-05, + "loss": 1.5093, + "step": 85599 + }, + { + "epoch": 1.03, + "grad_norm": 17.985111637659987, + "learning_rate": 1.0010709928652083e-05, + "loss": 1.2626, + "step": 85602 + }, + { + "epoch": 1.03, + "grad_norm": 4.213458396197536, + "learning_rate": 1.0010125750931016e-05, + "loss": 1.0819, + "step": 85605 + }, + { + "epoch": 1.03, + "grad_norm": 4.241453351424551, + "learning_rate": 1.0009541573175398e-05, + "loss": 1.1077, + "step": 85608 + }, + { + "epoch": 1.03, + "grad_norm": 6.787294435100865, + "learning_rate": 1.0008957395387219e-05, + "loss": 1.4339, + "step": 85611 + }, + { + "epoch": 1.03, + "grad_norm": 8.668583091094444, + "learning_rate": 1.0008373217568466e-05, + "loss": 1.1121, + "step": 85614 + }, + { + "epoch": 1.03, + "grad_norm": 12.641080467687848, + "learning_rate": 1.0007789039721145e-05, + "loss": 1.2584, + "step": 85617 + }, + { + "epoch": 1.03, + "grad_norm": 7.147759608994226, + "learning_rate": 1.0007204861847237e-05, + "loss": 1.4785, + "step": 85620 + }, + { + "epoch": 1.03, + "grad_norm": 7.865830713408092, + "learning_rate": 1.0006620683948747e-05, + "loss": 1.2279, + "step": 85623 + }, + { + "epoch": 1.03, + "grad_norm": 20.173804450030136, + "learning_rate": 1.0006036506027656e-05, + "loss": 1.1258, + "step": 85626 + }, + { + "epoch": 1.03, + "grad_norm": 12.202308192244688, + "learning_rate": 1.0005452328085972e-05, + "loss": 1.0491, + "step": 85629 + }, + { + "epoch": 1.03, + "grad_norm": 8.249309434157967, + "learning_rate": 1.0004868150125677e-05, + "loss": 1.4592, + "step": 85632 + }, + { + "epoch": 1.03, + "grad_norm": 22.88230095695248, + "learning_rate": 1.0004283972148767e-05, + "loss": 1.2286, + "step": 85635 + }, + { + "epoch": 1.03, + "grad_norm": 5.864465607956402, + "learning_rate": 1.0003699794157242e-05, + "loss": 1.2468, + "step": 85638 + }, + { + "epoch": 1.03, + "grad_norm": 3.7691152713045475, + "learning_rate": 1.000311561615309e-05, + "loss": 1.0056, + "step": 85641 + }, + { + "epoch": 1.03, + "grad_norm": 26.59732422587097, + "learning_rate": 1.0002531438138303e-05, + "loss": 1.3549, + "step": 85644 + }, + { + "epoch": 1.03, + "grad_norm": 8.822443797166319, + "learning_rate": 1.0001947260114877e-05, + "loss": 1.0108, + "step": 85647 + }, + { + "epoch": 1.03, + "grad_norm": 4.036376402511291, + "learning_rate": 1.000136308208481e-05, + "loss": 1.4538, + "step": 85650 + }, + { + "epoch": 1.03, + "grad_norm": 32.761056927769516, + "learning_rate": 1.0000778904050088e-05, + "loss": 1.2631, + "step": 85653 + }, + { + "epoch": 1.03, + "grad_norm": 7.910881722889523, + "learning_rate": 1.0000194726012708e-05, + "loss": 1.2462, + "step": 85656 + }, + { + "epoch": 1.03, + "grad_norm": 23.637523865893773, + "learning_rate": 9.999610547974662e-06, + "loss": 1.3619, + "step": 85659 + }, + { + "epoch": 1.03, + "grad_norm": 14.54602881979626, + "learning_rate": 9.99902636993795e-06, + "loss": 0.9352, + "step": 85662 + }, + { + "epoch": 1.03, + "grad_norm": 36.82078918002373, + "learning_rate": 9.998442191904556e-06, + "loss": 1.2037, + "step": 85665 + }, + { + "epoch": 1.03, + "grad_norm": 20.800038971716248, + "learning_rate": 9.997858013876478e-06, + "loss": 1.1777, + "step": 85668 + }, + { + "epoch": 1.03, + "grad_norm": 8.833628856205488, + "learning_rate": 9.997273835855712e-06, + "loss": 1.3426, + "step": 85671 + }, + { + "epoch": 1.03, + "grad_norm": 11.475979107812222, + "learning_rate": 9.996689657844253e-06, + "loss": 1.1931, + "step": 85674 + }, + { + "epoch": 1.03, + "grad_norm": 5.249047637808942, + "learning_rate": 9.996105479844084e-06, + "loss": 0.9073, + "step": 85677 + }, + { + "epoch": 1.03, + "grad_norm": 23.029969883453912, + "learning_rate": 9.99552130185721e-06, + "loss": 1.3551, + "step": 85680 + }, + { + "epoch": 1.03, + "grad_norm": 14.007532245519382, + "learning_rate": 9.994937123885617e-06, + "loss": 1.3114, + "step": 85683 + }, + { + "epoch": 1.03, + "grad_norm": 7.569802938814566, + "learning_rate": 9.994352945931302e-06, + "loss": 1.0923, + "step": 85686 + }, + { + "epoch": 1.03, + "grad_norm": 9.029603517504386, + "learning_rate": 9.993768767996262e-06, + "loss": 0.844, + "step": 85689 + }, + { + "epoch": 1.03, + "grad_norm": 11.771110388012529, + "learning_rate": 9.993184590082485e-06, + "loss": 1.0531, + "step": 85692 + }, + { + "epoch": 1.03, + "grad_norm": 5.643174390642436, + "learning_rate": 9.992600412191965e-06, + "loss": 1.0774, + "step": 85695 + }, + { + "epoch": 1.03, + "grad_norm": 10.271799162315228, + "learning_rate": 9.992016234326697e-06, + "loss": 1.0908, + "step": 85698 + }, + { + "epoch": 1.03, + "grad_norm": 12.725253257744823, + "learning_rate": 9.99143205648868e-06, + "loss": 1.306, + "step": 85701 + }, + { + "epoch": 1.03, + "grad_norm": 9.804986028952603, + "learning_rate": 9.990847878679898e-06, + "loss": 1.0428, + "step": 85704 + }, + { + "epoch": 1.03, + "grad_norm": 39.19762493882909, + "learning_rate": 9.99026370090235e-06, + "loss": 0.8761, + "step": 85707 + }, + { + "epoch": 1.03, + "grad_norm": 4.956217987024694, + "learning_rate": 9.989679523158024e-06, + "loss": 1.4095, + "step": 85710 + }, + { + "epoch": 1.03, + "grad_norm": 5.814079042657329, + "learning_rate": 9.989095345448923e-06, + "loss": 0.9779, + "step": 85713 + }, + { + "epoch": 1.03, + "grad_norm": 5.629259803872819, + "learning_rate": 9.988511167777038e-06, + "loss": 0.8916, + "step": 85716 + }, + { + "epoch": 1.03, + "grad_norm": 13.708313783149844, + "learning_rate": 9.987926990144355e-06, + "loss": 1.288, + "step": 85719 + }, + { + "epoch": 1.03, + "grad_norm": 18.98797159674687, + "learning_rate": 9.987342812552872e-06, + "loss": 1.2552, + "step": 85722 + }, + { + "epoch": 1.03, + "grad_norm": 9.72446652914158, + "learning_rate": 9.986758635004587e-06, + "loss": 1.6414, + "step": 85725 + }, + { + "epoch": 1.03, + "grad_norm": 5.443003621603042, + "learning_rate": 9.986174457501494e-06, + "loss": 1.2182, + "step": 85728 + }, + { + "epoch": 1.03, + "grad_norm": 17.25172223410565, + "learning_rate": 9.985590280045575e-06, + "loss": 1.316, + "step": 85731 + }, + { + "epoch": 1.03, + "grad_norm": 11.634128830249443, + "learning_rate": 9.985006102638834e-06, + "loss": 0.851, + "step": 85734 + }, + { + "epoch": 1.03, + "grad_norm": 20.763973923940934, + "learning_rate": 9.98442192528326e-06, + "loss": 1.2043, + "step": 85737 + }, + { + "epoch": 1.03, + "grad_norm": 14.6261843096917, + "learning_rate": 9.983837747980849e-06, + "loss": 1.0337, + "step": 85740 + }, + { + "epoch": 1.03, + "grad_norm": 4.398543760350934, + "learning_rate": 9.983253570733598e-06, + "loss": 1.0795, + "step": 85743 + }, + { + "epoch": 1.03, + "grad_norm": 5.559656446137687, + "learning_rate": 9.982669393543493e-06, + "loss": 1.057, + "step": 85746 + }, + { + "epoch": 1.03, + "grad_norm": 3.036140672430355, + "learning_rate": 9.982085216412532e-06, + "loss": 1.2912, + "step": 85749 + }, + { + "epoch": 1.03, + "grad_norm": 11.8734813506269, + "learning_rate": 9.981501039342705e-06, + "loss": 1.2942, + "step": 85752 + }, + { + "epoch": 1.03, + "grad_norm": 16.788388597046694, + "learning_rate": 9.980916862336014e-06, + "loss": 1.3891, + "step": 85755 + }, + { + "epoch": 1.03, + "grad_norm": 16.066309061135314, + "learning_rate": 9.980332685394444e-06, + "loss": 1.1653, + "step": 85758 + }, + { + "epoch": 1.03, + "grad_norm": 10.583667669135428, + "learning_rate": 9.979748508519989e-06, + "loss": 1.2716, + "step": 85761 + }, + { + "epoch": 1.03, + "grad_norm": 10.71533661300311, + "learning_rate": 9.979164331714646e-06, + "loss": 1.2908, + "step": 85764 + }, + { + "epoch": 1.03, + "grad_norm": 48.638687884453915, + "learning_rate": 9.978580154980409e-06, + "loss": 1.26, + "step": 85767 + }, + { + "epoch": 1.03, + "grad_norm": 3.764094866013613, + "learning_rate": 9.977995978319273e-06, + "loss": 1.2043, + "step": 85770 + }, + { + "epoch": 1.03, + "grad_norm": 15.0745089515974, + "learning_rate": 9.977411801733223e-06, + "loss": 1.0798, + "step": 85773 + }, + { + "epoch": 1.03, + "grad_norm": 7.659263315585828, + "learning_rate": 9.976827625224261e-06, + "loss": 1.4634, + "step": 85776 + }, + { + "epoch": 1.03, + "grad_norm": 4.193022216328874, + "learning_rate": 9.976243448794377e-06, + "loss": 1.3775, + "step": 85779 + }, + { + "epoch": 1.03, + "grad_norm": 7.139133948166948, + "learning_rate": 9.975659272445569e-06, + "loss": 1.4602, + "step": 85782 + }, + { + "epoch": 1.03, + "grad_norm": 6.330532375351445, + "learning_rate": 9.975075096179824e-06, + "loss": 1.1797, + "step": 85785 + }, + { + "epoch": 1.03, + "grad_norm": 15.686204711179991, + "learning_rate": 9.97449091999914e-06, + "loss": 1.4547, + "step": 85788 + }, + { + "epoch": 1.03, + "grad_norm": 10.750508083608361, + "learning_rate": 9.973906743905508e-06, + "loss": 1.0953, + "step": 85791 + }, + { + "epoch": 1.03, + "grad_norm": 5.464513492077461, + "learning_rate": 9.97332256790092e-06, + "loss": 0.8821, + "step": 85794 + }, + { + "epoch": 1.03, + "grad_norm": 10.740391210695954, + "learning_rate": 9.97273839198738e-06, + "loss": 1.2239, + "step": 85797 + }, + { + "epoch": 1.03, + "grad_norm": 13.051666114103016, + "learning_rate": 9.97215421616687e-06, + "loss": 1.4348, + "step": 85800 + }, + { + "epoch": 1.03, + "grad_norm": 13.321997820873856, + "learning_rate": 9.971570040441386e-06, + "loss": 1.3479, + "step": 85803 + }, + { + "epoch": 1.03, + "grad_norm": 6.676472478579087, + "learning_rate": 9.970985864812925e-06, + "loss": 1.3035, + "step": 85806 + }, + { + "epoch": 1.03, + "grad_norm": 10.972621929400802, + "learning_rate": 9.97040168928348e-06, + "loss": 1.2271, + "step": 85809 + }, + { + "epoch": 1.03, + "grad_norm": 4.573964895399558, + "learning_rate": 9.969817513855043e-06, + "loss": 1.2067, + "step": 85812 + }, + { + "epoch": 1.03, + "grad_norm": 8.855054055937712, + "learning_rate": 9.969233338529605e-06, + "loss": 1.1917, + "step": 85815 + }, + { + "epoch": 1.03, + "grad_norm": 4.496242044556738, + "learning_rate": 9.968649163309166e-06, + "loss": 1.1525, + "step": 85818 + }, + { + "epoch": 1.03, + "grad_norm": 18.612876993013852, + "learning_rate": 9.968064988195713e-06, + "loss": 1.096, + "step": 85821 + }, + { + "epoch": 1.03, + "grad_norm": 4.18434861868155, + "learning_rate": 9.967480813191248e-06, + "loss": 1.1662, + "step": 85824 + }, + { + "epoch": 1.03, + "grad_norm": 5.277403705434774, + "learning_rate": 9.966896638297754e-06, + "loss": 0.9648, + "step": 85827 + }, + { + "epoch": 1.03, + "grad_norm": 10.931761510903561, + "learning_rate": 9.966312463517231e-06, + "loss": 1.0218, + "step": 85830 + }, + { + "epoch": 1.03, + "grad_norm": 8.074346237859848, + "learning_rate": 9.96572828885167e-06, + "loss": 0.8923, + "step": 85833 + }, + { + "epoch": 1.03, + "grad_norm": 15.95074681204804, + "learning_rate": 9.965144114303072e-06, + "loss": 1.0386, + "step": 85836 + }, + { + "epoch": 1.03, + "grad_norm": 14.346783821345618, + "learning_rate": 9.964559939873419e-06, + "loss": 1.5443, + "step": 85839 + }, + { + "epoch": 1.03, + "grad_norm": 5.370521655586365, + "learning_rate": 9.963975765564712e-06, + "loss": 1.3251, + "step": 85842 + }, + { + "epoch": 1.03, + "grad_norm": 7.139642133147064, + "learning_rate": 9.963391591378941e-06, + "loss": 1.4897, + "step": 85845 + }, + { + "epoch": 1.03, + "grad_norm": 16.998782854520748, + "learning_rate": 9.962807417318101e-06, + "loss": 1.2846, + "step": 85848 + }, + { + "epoch": 1.03, + "grad_norm": 10.05719980273902, + "learning_rate": 9.962223243384191e-06, + "loss": 1.3157, + "step": 85851 + }, + { + "epoch": 1.03, + "grad_norm": 21.21583682505408, + "learning_rate": 9.961639069579196e-06, + "loss": 0.8905, + "step": 85854 + }, + { + "epoch": 1.03, + "grad_norm": 88.22732022624793, + "learning_rate": 9.961054895905113e-06, + "loss": 1.0387, + "step": 85857 + }, + { + "epoch": 1.03, + "grad_norm": 9.013383340679678, + "learning_rate": 9.960470722363935e-06, + "loss": 1.5461, + "step": 85860 + }, + { + "epoch": 1.03, + "grad_norm": 4.5260537749508565, + "learning_rate": 9.959886548957658e-06, + "loss": 1.086, + "step": 85863 + }, + { + "epoch": 1.03, + "grad_norm": 6.200408589098322, + "learning_rate": 9.959302375688274e-06, + "loss": 1.24, + "step": 85866 + }, + { + "epoch": 1.03, + "grad_norm": 15.023650022510527, + "learning_rate": 9.958718202557773e-06, + "loss": 1.2718, + "step": 85869 + }, + { + "epoch": 1.03, + "grad_norm": 21.952644651320202, + "learning_rate": 9.958134029568153e-06, + "loss": 1.0351, + "step": 85872 + }, + { + "epoch": 1.03, + "grad_norm": 13.06881435449095, + "learning_rate": 9.957549856721406e-06, + "loss": 1.0541, + "step": 85875 + }, + { + "epoch": 1.03, + "grad_norm": 5.903031999308935, + "learning_rate": 9.956965684019531e-06, + "loss": 1.0373, + "step": 85878 + }, + { + "epoch": 1.03, + "grad_norm": 5.461003515966601, + "learning_rate": 9.956381511464512e-06, + "loss": 1.2322, + "step": 85881 + }, + { + "epoch": 1.03, + "grad_norm": 8.69237738701354, + "learning_rate": 9.955797339058348e-06, + "loss": 1.1721, + "step": 85884 + }, + { + "epoch": 1.03, + "grad_norm": 110.21175301875583, + "learning_rate": 9.955213166803032e-06, + "loss": 1.1858, + "step": 85887 + }, + { + "epoch": 1.03, + "grad_norm": 24.583595705677784, + "learning_rate": 9.95462899470056e-06, + "loss": 1.1123, + "step": 85890 + }, + { + "epoch": 1.03, + "grad_norm": 6.552697176818476, + "learning_rate": 9.95404482275292e-06, + "loss": 1.1909, + "step": 85893 + }, + { + "epoch": 1.03, + "grad_norm": 17.57988997961102, + "learning_rate": 9.953460650962107e-06, + "loss": 1.2555, + "step": 85896 + }, + { + "epoch": 1.03, + "grad_norm": 10.781298857073683, + "learning_rate": 9.952876479330117e-06, + "loss": 1.0371, + "step": 85899 + }, + { + "epoch": 1.03, + "grad_norm": 6.156142340246759, + "learning_rate": 9.952292307858942e-06, + "loss": 1.4744, + "step": 85902 + }, + { + "epoch": 1.03, + "grad_norm": 30.142884054416317, + "learning_rate": 9.95170813655058e-06, + "loss": 1.5584, + "step": 85905 + }, + { + "epoch": 1.03, + "grad_norm": 10.647369647525487, + "learning_rate": 9.95112396540702e-06, + "loss": 1.5189, + "step": 85908 + }, + { + "epoch": 1.03, + "grad_norm": 5.871860480237783, + "learning_rate": 9.950539794430252e-06, + "loss": 1.3249, + "step": 85911 + }, + { + "epoch": 1.03, + "grad_norm": 6.776272615524995, + "learning_rate": 9.949955623622278e-06, + "loss": 1.1499, + "step": 85914 + }, + { + "epoch": 1.03, + "grad_norm": 7.885382282071238, + "learning_rate": 9.949371452985088e-06, + "loss": 1.3578, + "step": 85917 + }, + { + "epoch": 1.03, + "grad_norm": 12.975297512521744, + "learning_rate": 9.948787282520672e-06, + "loss": 1.6064, + "step": 85920 + }, + { + "epoch": 1.03, + "grad_norm": 5.604224177004354, + "learning_rate": 9.948203112231026e-06, + "loss": 1.1601, + "step": 85923 + }, + { + "epoch": 1.03, + "grad_norm": 6.8384979894509765, + "learning_rate": 9.947618942118144e-06, + "loss": 1.196, + "step": 85926 + }, + { + "epoch": 1.03, + "grad_norm": 8.852789707013512, + "learning_rate": 9.94703477218402e-06, + "loss": 1.2589, + "step": 85929 + }, + { + "epoch": 1.03, + "grad_norm": 10.174691121374062, + "learning_rate": 9.946450602430652e-06, + "loss": 1.2589, + "step": 85932 + }, + { + "epoch": 1.03, + "grad_norm": 9.967302033719372, + "learning_rate": 9.945866432860022e-06, + "loss": 1.1639, + "step": 85935 + }, + { + "epoch": 1.03, + "grad_norm": 40.957312118641326, + "learning_rate": 9.945282263474135e-06, + "loss": 1.0652, + "step": 85938 + }, + { + "epoch": 1.03, + "grad_norm": 21.67486073851175, + "learning_rate": 9.944698094274978e-06, + "loss": 0.9253, + "step": 85941 + }, + { + "epoch": 1.03, + "grad_norm": 10.218673384233472, + "learning_rate": 9.944113925264549e-06, + "loss": 0.8618, + "step": 85944 + }, + { + "epoch": 1.03, + "grad_norm": 15.170749214614553, + "learning_rate": 9.943529756444837e-06, + "loss": 0.9236, + "step": 85947 + }, + { + "epoch": 1.03, + "grad_norm": 8.206097983073063, + "learning_rate": 9.942945587817837e-06, + "loss": 1.2539, + "step": 85950 + }, + { + "epoch": 1.03, + "grad_norm": 8.43349938371194, + "learning_rate": 9.942361419385543e-06, + "loss": 1.5724, + "step": 85953 + }, + { + "epoch": 1.03, + "grad_norm": 6.0256874307205734, + "learning_rate": 9.94177725114995e-06, + "loss": 1.2884, + "step": 85956 + }, + { + "epoch": 1.03, + "grad_norm": 6.669963432821464, + "learning_rate": 9.94119308311305e-06, + "loss": 1.2501, + "step": 85959 + }, + { + "epoch": 1.03, + "grad_norm": 2.1986583849540433, + "learning_rate": 9.940608915276838e-06, + "loss": 1.3124, + "step": 85962 + }, + { + "epoch": 1.03, + "grad_norm": 7.29081594117179, + "learning_rate": 9.940024747643303e-06, + "loss": 1.3538, + "step": 85965 + }, + { + "epoch": 1.03, + "grad_norm": 3.9148626925675916, + "learning_rate": 9.939440580214444e-06, + "loss": 1.4492, + "step": 85968 + }, + { + "epoch": 1.03, + "grad_norm": 8.301864570050947, + "learning_rate": 9.938856412992256e-06, + "loss": 1.086, + "step": 85971 + }, + { + "epoch": 1.03, + "grad_norm": 13.360430808101226, + "learning_rate": 9.938272245978725e-06, + "loss": 0.9796, + "step": 85974 + }, + { + "epoch": 1.03, + "grad_norm": 15.682725927698678, + "learning_rate": 9.937688079175846e-06, + "loss": 1.1866, + "step": 85977 + }, + { + "epoch": 1.03, + "grad_norm": 9.904147987189035, + "learning_rate": 9.93710391258562e-06, + "loss": 1.5363, + "step": 85980 + }, + { + "epoch": 1.03, + "grad_norm": 4.989622411963386, + "learning_rate": 9.936519746210032e-06, + "loss": 1.2439, + "step": 85983 + }, + { + "epoch": 1.03, + "grad_norm": 33.45431871861028, + "learning_rate": 9.935935580051084e-06, + "loss": 1.4316, + "step": 85986 + }, + { + "epoch": 1.03, + "grad_norm": 7.904556800861756, + "learning_rate": 9.93535141411076e-06, + "loss": 1.262, + "step": 85989 + }, + { + "epoch": 1.03, + "grad_norm": 13.446953560773906, + "learning_rate": 9.934767248391061e-06, + "loss": 0.9384, + "step": 85992 + }, + { + "epoch": 1.03, + "grad_norm": 7.624728483546544, + "learning_rate": 9.934183082893975e-06, + "loss": 1.1917, + "step": 85995 + }, + { + "epoch": 1.03, + "grad_norm": 30.909504342225176, + "learning_rate": 9.933598917621504e-06, + "loss": 1.1587, + "step": 85998 + }, + { + "epoch": 1.03, + "grad_norm": 9.70738861736029, + "learning_rate": 9.933014752575632e-06, + "loss": 1.1329, + "step": 86001 + }, + { + "epoch": 1.03, + "grad_norm": 9.374603187330525, + "learning_rate": 9.932430587758356e-06, + "loss": 1.3868, + "step": 86004 + }, + { + "epoch": 1.03, + "grad_norm": 54.74641352057304, + "learning_rate": 9.93184642317167e-06, + "loss": 1.0791, + "step": 86007 + }, + { + "epoch": 1.03, + "grad_norm": 3.4050810059345658, + "learning_rate": 9.93126225881757e-06, + "loss": 1.7982, + "step": 86010 + }, + { + "epoch": 1.03, + "grad_norm": 6.284323214707101, + "learning_rate": 9.93067809469805e-06, + "loss": 1.1956, + "step": 86013 + }, + { + "epoch": 1.03, + "grad_norm": 15.147583301205357, + "learning_rate": 9.930093930815093e-06, + "loss": 0.8373, + "step": 86016 + }, + { + "epoch": 1.03, + "grad_norm": 5.22824966945976, + "learning_rate": 9.929509767170703e-06, + "loss": 1.2431, + "step": 86019 + }, + { + "epoch": 1.03, + "grad_norm": 20.876093449357597, + "learning_rate": 9.928925603766873e-06, + "loss": 1.174, + "step": 86022 + }, + { + "epoch": 1.03, + "grad_norm": 7.320432706390303, + "learning_rate": 9.928341440605594e-06, + "loss": 1.2874, + "step": 86025 + }, + { + "epoch": 1.03, + "grad_norm": 21.1213342132754, + "learning_rate": 9.927757277688858e-06, + "loss": 1.0229, + "step": 86028 + }, + { + "epoch": 1.03, + "grad_norm": 29.921780047293407, + "learning_rate": 9.92717311501866e-06, + "loss": 1.1355, + "step": 86031 + }, + { + "epoch": 1.03, + "grad_norm": 35.083709173556265, + "learning_rate": 9.926588952596994e-06, + "loss": 1.1574, + "step": 86034 + }, + { + "epoch": 1.03, + "grad_norm": 18.880902541179925, + "learning_rate": 9.926004790425854e-06, + "loss": 1.1904, + "step": 86037 + }, + { + "epoch": 1.03, + "grad_norm": 13.769188269559624, + "learning_rate": 9.925420628507237e-06, + "loss": 0.9035, + "step": 86040 + }, + { + "epoch": 1.03, + "grad_norm": 21.18137407386203, + "learning_rate": 9.924836466843127e-06, + "loss": 1.3152, + "step": 86043 + }, + { + "epoch": 1.03, + "grad_norm": 13.874532387575197, + "learning_rate": 9.924252305435527e-06, + "loss": 1.2878, + "step": 86046 + }, + { + "epoch": 1.03, + "grad_norm": 5.671177223402072, + "learning_rate": 9.923668144286422e-06, + "loss": 1.2164, + "step": 86049 + }, + { + "epoch": 1.03, + "grad_norm": 3.9276213108389375, + "learning_rate": 9.923083983397817e-06, + "loss": 1.2648, + "step": 86052 + }, + { + "epoch": 1.03, + "grad_norm": 5.786569406502759, + "learning_rate": 9.922499822771693e-06, + "loss": 1.4689, + "step": 86055 + }, + { + "epoch": 1.03, + "grad_norm": 9.75305837801219, + "learning_rate": 9.92191566241005e-06, + "loss": 1.4495, + "step": 86058 + }, + { + "epoch": 1.03, + "grad_norm": 4.60384339736049, + "learning_rate": 9.92133150231488e-06, + "loss": 1.0733, + "step": 86061 + }, + { + "epoch": 1.03, + "grad_norm": 2.639034699485281, + "learning_rate": 9.920747342488179e-06, + "loss": 0.9944, + "step": 86064 + }, + { + "epoch": 1.03, + "grad_norm": 8.698487161901738, + "learning_rate": 9.920163182931943e-06, + "loss": 1.5358, + "step": 86067 + }, + { + "epoch": 1.03, + "grad_norm": 16.54996807663129, + "learning_rate": 9.919579023648154e-06, + "loss": 0.9249, + "step": 86070 + }, + { + "epoch": 1.04, + "grad_norm": 84.07548445740593, + "learning_rate": 9.918994864638815e-06, + "loss": 1.5047, + "step": 86073 + }, + { + "epoch": 1.04, + "grad_norm": 17.901278150812708, + "learning_rate": 9.918410705905917e-06, + "loss": 1.1652, + "step": 86076 + }, + { + "epoch": 1.04, + "grad_norm": 5.58950941643167, + "learning_rate": 9.917826547451458e-06, + "loss": 0.8544, + "step": 86079 + }, + { + "epoch": 1.04, + "grad_norm": 10.239476931650636, + "learning_rate": 9.917242389277422e-06, + "loss": 1.3543, + "step": 86082 + }, + { + "epoch": 1.04, + "grad_norm": 11.539786225010365, + "learning_rate": 9.91665823138581e-06, + "loss": 1.1366, + "step": 86085 + }, + { + "epoch": 1.04, + "grad_norm": 7.5728196519641635, + "learning_rate": 9.916074073778615e-06, + "loss": 0.9639, + "step": 86088 + }, + { + "epoch": 1.04, + "grad_norm": 15.168911538155502, + "learning_rate": 9.915489916457824e-06, + "loss": 1.4063, + "step": 86091 + }, + { + "epoch": 1.04, + "grad_norm": 7.457287366077279, + "learning_rate": 9.914905759425443e-06, + "loss": 1.4518, + "step": 86094 + }, + { + "epoch": 1.04, + "grad_norm": 21.058750961280737, + "learning_rate": 9.914321602683454e-06, + "loss": 1.5273, + "step": 86097 + }, + { + "epoch": 1.04, + "grad_norm": 8.044995491730488, + "learning_rate": 9.913737446233852e-06, + "loss": 1.1998, + "step": 86100 + }, + { + "epoch": 1.04, + "grad_norm": 5.932621448048179, + "learning_rate": 9.913153290078635e-06, + "loss": 0.9886, + "step": 86103 + }, + { + "epoch": 1.04, + "grad_norm": 6.816137970343937, + "learning_rate": 9.912569134219798e-06, + "loss": 1.3398, + "step": 86106 + }, + { + "epoch": 1.04, + "grad_norm": 9.522926632389357, + "learning_rate": 9.911984978659328e-06, + "loss": 1.2277, + "step": 86109 + }, + { + "epoch": 1.04, + "grad_norm": 11.586610049867412, + "learning_rate": 9.911400823399222e-06, + "loss": 1.2305, + "step": 86112 + }, + { + "epoch": 1.04, + "grad_norm": 5.949578360509421, + "learning_rate": 9.91081666844147e-06, + "loss": 1.299, + "step": 86115 + }, + { + "epoch": 1.04, + "grad_norm": 5.272286639635921, + "learning_rate": 9.910232513788071e-06, + "loss": 1.0749, + "step": 86118 + }, + { + "epoch": 1.04, + "grad_norm": 16.374191213574264, + "learning_rate": 9.90964835944102e-06, + "loss": 1.4565, + "step": 86121 + }, + { + "epoch": 1.04, + "grad_norm": 7.77792361479321, + "learning_rate": 9.909064205402303e-06, + "loss": 1.4392, + "step": 86124 + }, + { + "epoch": 1.04, + "grad_norm": 4.782182925464225, + "learning_rate": 9.908480051673914e-06, + "loss": 1.2734, + "step": 86127 + }, + { + "epoch": 1.04, + "grad_norm": 23.98759528381816, + "learning_rate": 9.907895898257853e-06, + "loss": 1.3468, + "step": 86130 + }, + { + "epoch": 1.04, + "grad_norm": 10.848254491855531, + "learning_rate": 9.907311745156115e-06, + "loss": 1.3446, + "step": 86133 + }, + { + "epoch": 1.04, + "grad_norm": 5.841217594221368, + "learning_rate": 9.90672759237068e-06, + "loss": 1.0096, + "step": 86136 + }, + { + "epoch": 1.04, + "grad_norm": 3.3924520996075254, + "learning_rate": 9.906143439903553e-06, + "loss": 1.498, + "step": 86139 + }, + { + "epoch": 1.04, + "grad_norm": 11.793526594483197, + "learning_rate": 9.905559287756725e-06, + "loss": 1.3451, + "step": 86142 + }, + { + "epoch": 1.04, + "grad_norm": 10.783472075195126, + "learning_rate": 9.904975135932189e-06, + "loss": 1.0629, + "step": 86145 + }, + { + "epoch": 1.04, + "grad_norm": 12.116806357344007, + "learning_rate": 9.904390984431941e-06, + "loss": 1.5398, + "step": 86148 + }, + { + "epoch": 1.04, + "grad_norm": 4.463407514521961, + "learning_rate": 9.90380683325797e-06, + "loss": 1.3861, + "step": 86151 + }, + { + "epoch": 1.04, + "grad_norm": 7.6000146707536445, + "learning_rate": 9.90322268241227e-06, + "loss": 1.5099, + "step": 86154 + }, + { + "epoch": 1.04, + "grad_norm": 8.15050843260091, + "learning_rate": 9.902638531896838e-06, + "loss": 1.3656, + "step": 86157 + }, + { + "epoch": 1.04, + "grad_norm": 33.1875267800086, + "learning_rate": 9.902054381713668e-06, + "loss": 1.1118, + "step": 86160 + }, + { + "epoch": 1.04, + "grad_norm": 11.436684698736416, + "learning_rate": 9.901470231864749e-06, + "loss": 1.0928, + "step": 86163 + }, + { + "epoch": 1.04, + "grad_norm": 27.834470674450774, + "learning_rate": 9.900886082352074e-06, + "loss": 1.5544, + "step": 86166 + }, + { + "epoch": 1.04, + "grad_norm": 9.167857205563182, + "learning_rate": 9.900301933177641e-06, + "loss": 1.1123, + "step": 86169 + }, + { + "epoch": 1.04, + "grad_norm": 11.786335833853318, + "learning_rate": 9.899717784343442e-06, + "loss": 1.4497, + "step": 86172 + }, + { + "epoch": 1.04, + "grad_norm": 8.280261588035671, + "learning_rate": 9.899133635851474e-06, + "loss": 1.0476, + "step": 86175 + }, + { + "epoch": 1.04, + "grad_norm": 19.942054724284407, + "learning_rate": 9.89854948770372e-06, + "loss": 0.9872, + "step": 86178 + }, + { + "epoch": 1.04, + "grad_norm": 6.5782809487162845, + "learning_rate": 9.897965339902182e-06, + "loss": 1.007, + "step": 86181 + }, + { + "epoch": 1.04, + "grad_norm": 3.038816318746159, + "learning_rate": 9.897381192448853e-06, + "loss": 1.085, + "step": 86184 + }, + { + "epoch": 1.04, + "grad_norm": 11.25211869846068, + "learning_rate": 9.896797045345726e-06, + "loss": 1.5595, + "step": 86187 + }, + { + "epoch": 1.04, + "grad_norm": 9.970826361322885, + "learning_rate": 9.89621289859479e-06, + "loss": 0.9712, + "step": 86190 + }, + { + "epoch": 1.04, + "grad_norm": 34.189110556996276, + "learning_rate": 9.895628752198045e-06, + "loss": 1.1339, + "step": 86193 + }, + { + "epoch": 1.04, + "grad_norm": 4.134740444754565, + "learning_rate": 9.895044606157478e-06, + "loss": 1.3401, + "step": 86196 + }, + { + "epoch": 1.04, + "grad_norm": 2.3753589384388296, + "learning_rate": 9.894460460475086e-06, + "loss": 1.3792, + "step": 86199 + }, + { + "epoch": 1.04, + "grad_norm": 7.884325666788037, + "learning_rate": 9.89387631515287e-06, + "loss": 1.1344, + "step": 86202 + }, + { + "epoch": 1.04, + "grad_norm": 11.53936544751531, + "learning_rate": 9.89329217019281e-06, + "loss": 1.3188, + "step": 86205 + }, + { + "epoch": 1.04, + "grad_norm": 45.785644030400036, + "learning_rate": 9.892708025596905e-06, + "loss": 1.2439, + "step": 86208 + }, + { + "epoch": 1.04, + "grad_norm": 8.314475598815717, + "learning_rate": 9.892123881367148e-06, + "loss": 1.2052, + "step": 86211 + }, + { + "epoch": 1.04, + "grad_norm": 13.50840504801906, + "learning_rate": 9.891539737505538e-06, + "loss": 1.526, + "step": 86214 + }, + { + "epoch": 1.04, + "grad_norm": 29.52942845563075, + "learning_rate": 9.890955594014061e-06, + "loss": 1.4343, + "step": 86217 + }, + { + "epoch": 1.04, + "grad_norm": 12.071912767072968, + "learning_rate": 9.890371450894711e-06, + "loss": 1.3319, + "step": 86220 + }, + { + "epoch": 1.04, + "grad_norm": 9.125535541669755, + "learning_rate": 9.889787308149486e-06, + "loss": 1.2323, + "step": 86223 + }, + { + "epoch": 1.04, + "grad_norm": 5.510800240091907, + "learning_rate": 9.889203165780377e-06, + "loss": 1.1734, + "step": 86226 + }, + { + "epoch": 1.04, + "grad_norm": 19.080140999536372, + "learning_rate": 9.88861902378938e-06, + "loss": 1.2029, + "step": 86229 + }, + { + "epoch": 1.04, + "grad_norm": 13.814966095868085, + "learning_rate": 9.888034882178483e-06, + "loss": 1.2662, + "step": 86232 + }, + { + "epoch": 1.04, + "grad_norm": 12.292564518081594, + "learning_rate": 9.887450740949685e-06, + "loss": 1.2363, + "step": 86235 + }, + { + "epoch": 1.04, + "grad_norm": 8.155596748267682, + "learning_rate": 9.886866600104975e-06, + "loss": 1.1837, + "step": 86238 + }, + { + "epoch": 1.04, + "grad_norm": 15.883621327210927, + "learning_rate": 9.88628245964635e-06, + "loss": 1.1252, + "step": 86241 + }, + { + "epoch": 1.04, + "grad_norm": 5.015056329455227, + "learning_rate": 9.8856983195758e-06, + "loss": 1.2371, + "step": 86244 + }, + { + "epoch": 1.04, + "grad_norm": 4.141258421232244, + "learning_rate": 9.885114179895323e-06, + "loss": 1.3036, + "step": 86247 + }, + { + "epoch": 1.04, + "grad_norm": 4.353080727231007, + "learning_rate": 9.884530040606907e-06, + "loss": 0.9111, + "step": 86250 + }, + { + "epoch": 1.04, + "grad_norm": 6.2762137831684495, + "learning_rate": 9.883945901712548e-06, + "loss": 0.9662, + "step": 86253 + }, + { + "epoch": 1.04, + "grad_norm": 17.667115978694632, + "learning_rate": 9.883361763214246e-06, + "loss": 1.3234, + "step": 86256 + }, + { + "epoch": 1.04, + "grad_norm": 36.599257633873734, + "learning_rate": 9.882777625113984e-06, + "loss": 1.1524, + "step": 86259 + }, + { + "epoch": 1.04, + "grad_norm": 10.725053745450545, + "learning_rate": 9.882193487413758e-06, + "loss": 0.9223, + "step": 86262 + }, + { + "epoch": 1.04, + "grad_norm": 6.888159501777561, + "learning_rate": 9.881609350115563e-06, + "loss": 1.4164, + "step": 86265 + }, + { + "epoch": 1.04, + "grad_norm": 12.412830805794169, + "learning_rate": 9.881025213221397e-06, + "loss": 1.3467, + "step": 86268 + }, + { + "epoch": 1.04, + "grad_norm": 7.525291073815757, + "learning_rate": 9.880441076733248e-06, + "loss": 1.0936, + "step": 86271 + }, + { + "epoch": 1.04, + "grad_norm": 9.949028920030235, + "learning_rate": 9.879856940653108e-06, + "loss": 1.2836, + "step": 86274 + }, + { + "epoch": 1.04, + "grad_norm": 3.088689866584229, + "learning_rate": 9.879272804982976e-06, + "loss": 0.872, + "step": 86277 + }, + { + "epoch": 1.04, + "grad_norm": 9.837800761059114, + "learning_rate": 9.878688669724838e-06, + "loss": 1.2086, + "step": 86280 + }, + { + "epoch": 1.04, + "grad_norm": 9.331158185693324, + "learning_rate": 9.878104534880698e-06, + "loss": 1.1729, + "step": 86283 + }, + { + "epoch": 1.04, + "grad_norm": 25.339221192817604, + "learning_rate": 9.877520400452539e-06, + "loss": 1.127, + "step": 86286 + }, + { + "epoch": 1.04, + "grad_norm": 2.4825221760198244, + "learning_rate": 9.876936266442359e-06, + "loss": 1.3347, + "step": 86289 + }, + { + "epoch": 1.04, + "grad_norm": 16.532379773499358, + "learning_rate": 9.876352132852152e-06, + "loss": 1.1099, + "step": 86292 + }, + { + "epoch": 1.04, + "grad_norm": 11.930102090095328, + "learning_rate": 9.875767999683913e-06, + "loss": 1.3674, + "step": 86295 + }, + { + "epoch": 1.04, + "grad_norm": 3.857173224896706, + "learning_rate": 9.875183866939628e-06, + "loss": 1.2264, + "step": 86298 + }, + { + "epoch": 1.04, + "grad_norm": 5.866114879546895, + "learning_rate": 9.8745997346213e-06, + "loss": 1.3663, + "step": 86301 + }, + { + "epoch": 1.04, + "grad_norm": 3.1700881229262095, + "learning_rate": 9.874015602730914e-06, + "loss": 1.3422, + "step": 86304 + }, + { + "epoch": 1.04, + "grad_norm": 13.855280008304215, + "learning_rate": 9.873431471270467e-06, + "loss": 1.3838, + "step": 86307 + }, + { + "epoch": 1.04, + "grad_norm": 8.540355406771766, + "learning_rate": 9.872847340241959e-06, + "loss": 1.1854, + "step": 86310 + }, + { + "epoch": 1.04, + "grad_norm": 6.486246894120979, + "learning_rate": 9.872263209647374e-06, + "loss": 1.0523, + "step": 86313 + }, + { + "epoch": 1.04, + "grad_norm": 6.020514476627005, + "learning_rate": 9.871679079488706e-06, + "loss": 1.3684, + "step": 86316 + }, + { + "epoch": 1.04, + "grad_norm": 9.65931395123624, + "learning_rate": 9.871094949767952e-06, + "loss": 1.0744, + "step": 86319 + }, + { + "epoch": 1.04, + "grad_norm": 4.341477956006761, + "learning_rate": 9.870510820487108e-06, + "loss": 1.2762, + "step": 86322 + }, + { + "epoch": 1.04, + "grad_norm": 6.214306693582119, + "learning_rate": 9.869926691648163e-06, + "loss": 1.2269, + "step": 86325 + }, + { + "epoch": 1.04, + "grad_norm": 8.644383630162112, + "learning_rate": 9.869342563253109e-06, + "loss": 1.3549, + "step": 86328 + }, + { + "epoch": 1.04, + "grad_norm": 27.668753907819926, + "learning_rate": 9.868758435303943e-06, + "loss": 1.3579, + "step": 86331 + }, + { + "epoch": 1.04, + "grad_norm": 6.530291957955677, + "learning_rate": 9.868174307802654e-06, + "loss": 1.1542, + "step": 86334 + }, + { + "epoch": 1.04, + "grad_norm": 20.993923426467543, + "learning_rate": 9.867590180751245e-06, + "loss": 1.1641, + "step": 86337 + }, + { + "epoch": 1.04, + "grad_norm": 8.358541508569399, + "learning_rate": 9.8670060541517e-06, + "loss": 0.9862, + "step": 86340 + }, + { + "epoch": 1.04, + "grad_norm": 14.59167862971384, + "learning_rate": 9.866421928006014e-06, + "loss": 1.3925, + "step": 86343 + }, + { + "epoch": 1.04, + "grad_norm": 17.796744262775366, + "learning_rate": 9.865837802316181e-06, + "loss": 1.4164, + "step": 86346 + }, + { + "epoch": 1.04, + "grad_norm": 7.119400479768585, + "learning_rate": 9.8652536770842e-06, + "loss": 1.1661, + "step": 86349 + }, + { + "epoch": 1.04, + "grad_norm": 54.01225978373008, + "learning_rate": 9.864669552312056e-06, + "loss": 1.1443, + "step": 86352 + }, + { + "epoch": 1.04, + "grad_norm": 3.8462873933458295, + "learning_rate": 9.864085428001747e-06, + "loss": 1.5772, + "step": 86355 + }, + { + "epoch": 1.04, + "grad_norm": 28.304707338933156, + "learning_rate": 9.863501304155265e-06, + "loss": 1.0198, + "step": 86358 + }, + { + "epoch": 1.04, + "grad_norm": 23.04627622418507, + "learning_rate": 9.862917180774602e-06, + "loss": 1.3383, + "step": 86361 + }, + { + "epoch": 1.04, + "grad_norm": 4.019509887394363, + "learning_rate": 9.862333057861756e-06, + "loss": 1.2541, + "step": 86364 + }, + { + "epoch": 1.04, + "grad_norm": 20.02100659127197, + "learning_rate": 9.86174893541872e-06, + "loss": 1.0079, + "step": 86367 + }, + { + "epoch": 1.04, + "grad_norm": 2.573095682261397, + "learning_rate": 9.86116481344748e-06, + "loss": 1.4358, + "step": 86370 + }, + { + "epoch": 1.04, + "grad_norm": 18.385986821774857, + "learning_rate": 9.860580691950038e-06, + "loss": 1.2775, + "step": 86373 + }, + { + "epoch": 1.04, + "grad_norm": 12.640493599109398, + "learning_rate": 9.859996570928383e-06, + "loss": 0.8898, + "step": 86376 + }, + { + "epoch": 1.04, + "grad_norm": 9.001429564237823, + "learning_rate": 9.85941245038451e-06, + "loss": 1.0599, + "step": 86379 + }, + { + "epoch": 1.04, + "grad_norm": 3.4376949887709944, + "learning_rate": 9.858828330320408e-06, + "loss": 1.1088, + "step": 86382 + }, + { + "epoch": 1.04, + "grad_norm": 14.617588166174498, + "learning_rate": 9.858244210738077e-06, + "loss": 1.2804, + "step": 86385 + }, + { + "epoch": 1.04, + "grad_norm": 6.231798184274679, + "learning_rate": 9.857660091639506e-06, + "loss": 1.2289, + "step": 86388 + }, + { + "epoch": 1.04, + "grad_norm": 6.439352361633444, + "learning_rate": 9.857075973026693e-06, + "loss": 1.2784, + "step": 86391 + }, + { + "epoch": 1.04, + "grad_norm": 13.96073646519543, + "learning_rate": 9.856491854901626e-06, + "loss": 1.196, + "step": 86394 + }, + { + "epoch": 1.04, + "grad_norm": 5.279275999201672, + "learning_rate": 9.8559077372663e-06, + "loss": 1.2638, + "step": 86397 + }, + { + "epoch": 1.04, + "grad_norm": 5.567747767056973, + "learning_rate": 9.855323620122709e-06, + "loss": 1.3948, + "step": 86400 + }, + { + "epoch": 1.04, + "grad_norm": 8.1254576022914, + "learning_rate": 9.85473950347285e-06, + "loss": 1.2037, + "step": 86403 + }, + { + "epoch": 1.04, + "grad_norm": 6.090943049636875, + "learning_rate": 9.854155387318709e-06, + "loss": 1.2332, + "step": 86406 + }, + { + "epoch": 1.04, + "grad_norm": 12.052964549892284, + "learning_rate": 9.853571271662283e-06, + "loss": 1.4043, + "step": 86409 + }, + { + "epoch": 1.04, + "grad_norm": 21.63915676321051, + "learning_rate": 9.852987156505566e-06, + "loss": 1.1523, + "step": 86412 + }, + { + "epoch": 1.04, + "grad_norm": 12.332769768007013, + "learning_rate": 9.852403041850552e-06, + "loss": 1.1145, + "step": 86415 + }, + { + "epoch": 1.04, + "grad_norm": 21.291765405778282, + "learning_rate": 9.851818927699232e-06, + "loss": 1.0263, + "step": 86418 + }, + { + "epoch": 1.04, + "grad_norm": 25.081330210433613, + "learning_rate": 9.851234814053602e-06, + "loss": 1.1935, + "step": 86421 + }, + { + "epoch": 1.04, + "grad_norm": 15.697170155068296, + "learning_rate": 9.850650700915651e-06, + "loss": 0.9295, + "step": 86424 + }, + { + "epoch": 1.04, + "grad_norm": 11.292588212698602, + "learning_rate": 9.85006658828738e-06, + "loss": 1.1249, + "step": 86427 + }, + { + "epoch": 1.04, + "grad_norm": 11.703068331178388, + "learning_rate": 9.849482476170778e-06, + "loss": 1.4661, + "step": 86430 + }, + { + "epoch": 1.04, + "grad_norm": 84.07833809367668, + "learning_rate": 9.848898364567833e-06, + "loss": 1.081, + "step": 86433 + }, + { + "epoch": 1.04, + "grad_norm": 8.9029782992294, + "learning_rate": 9.848314253480544e-06, + "loss": 1.1781, + "step": 86436 + }, + { + "epoch": 1.04, + "grad_norm": 3.9663763853067073, + "learning_rate": 9.847730142910907e-06, + "loss": 1.3777, + "step": 86439 + }, + { + "epoch": 1.04, + "grad_norm": 4.99312151049835, + "learning_rate": 9.84714603286091e-06, + "loss": 1.1818, + "step": 86442 + }, + { + "epoch": 1.04, + "grad_norm": 9.458773800627347, + "learning_rate": 9.846561923332552e-06, + "loss": 0.9612, + "step": 86445 + }, + { + "epoch": 1.04, + "grad_norm": 2.1854623839625313, + "learning_rate": 9.845977814327818e-06, + "loss": 1.1897, + "step": 86448 + }, + { + "epoch": 1.04, + "grad_norm": 12.709558996603183, + "learning_rate": 9.84539370584871e-06, + "loss": 0.9458, + "step": 86451 + }, + { + "epoch": 1.04, + "grad_norm": 9.166785071919374, + "learning_rate": 9.844809597897214e-06, + "loss": 1.2718, + "step": 86454 + }, + { + "epoch": 1.04, + "grad_norm": 3.9806724116516534, + "learning_rate": 9.844225490475332e-06, + "loss": 1.3474, + "step": 86457 + }, + { + "epoch": 1.04, + "grad_norm": 12.179673372257998, + "learning_rate": 9.843641383585047e-06, + "loss": 1.1115, + "step": 86460 + }, + { + "epoch": 1.04, + "grad_norm": 24.116600069181402, + "learning_rate": 9.84305727722836e-06, + "loss": 1.168, + "step": 86463 + }, + { + "epoch": 1.04, + "grad_norm": 4.479414776516501, + "learning_rate": 9.842473171407261e-06, + "loss": 1.1149, + "step": 86466 + }, + { + "epoch": 1.04, + "grad_norm": 5.237126984981284, + "learning_rate": 9.841889066123747e-06, + "loss": 1.1275, + "step": 86469 + }, + { + "epoch": 1.04, + "grad_norm": 11.576872035605842, + "learning_rate": 9.84130496137981e-06, + "loss": 1.2332, + "step": 86472 + }, + { + "epoch": 1.04, + "grad_norm": 12.28413622859297, + "learning_rate": 9.840720857177438e-06, + "loss": 1.2303, + "step": 86475 + }, + { + "epoch": 1.04, + "grad_norm": 3.972426147209776, + "learning_rate": 9.840136753518627e-06, + "loss": 1.0065, + "step": 86478 + }, + { + "epoch": 1.04, + "grad_norm": 14.046813220542012, + "learning_rate": 9.839552650405374e-06, + "loss": 1.0584, + "step": 86481 + }, + { + "epoch": 1.04, + "grad_norm": 10.067994647807996, + "learning_rate": 9.838968547839673e-06, + "loss": 1.1093, + "step": 86484 + }, + { + "epoch": 1.04, + "grad_norm": 17.765209068723287, + "learning_rate": 9.83838444582351e-06, + "loss": 1.055, + "step": 86487 + }, + { + "epoch": 1.04, + "grad_norm": 18.042788918137926, + "learning_rate": 9.837800344358882e-06, + "loss": 1.0334, + "step": 86490 + }, + { + "epoch": 1.04, + "grad_norm": 3.267476159143958, + "learning_rate": 9.837216243447786e-06, + "loss": 1.0993, + "step": 86493 + }, + { + "epoch": 1.04, + "grad_norm": 5.577047433340887, + "learning_rate": 9.836632143092209e-06, + "loss": 0.7461, + "step": 86496 + }, + { + "epoch": 1.04, + "grad_norm": 10.971574265437715, + "learning_rate": 9.836048043294153e-06, + "loss": 1.3483, + "step": 86499 + }, + { + "epoch": 1.04, + "grad_norm": 7.156520453385618, + "learning_rate": 9.835463944055602e-06, + "loss": 0.7564, + "step": 86502 + }, + { + "epoch": 1.04, + "grad_norm": 12.464467123143834, + "learning_rate": 9.834879845378554e-06, + "loss": 1.2063, + "step": 86505 + }, + { + "epoch": 1.04, + "grad_norm": 10.492627735173954, + "learning_rate": 9.834295747265002e-06, + "loss": 0.9137, + "step": 86508 + }, + { + "epoch": 1.04, + "grad_norm": 5.538910125701033, + "learning_rate": 9.833711649716941e-06, + "loss": 0.9034, + "step": 86511 + }, + { + "epoch": 1.04, + "grad_norm": 8.189097724052298, + "learning_rate": 9.83312755273636e-06, + "loss": 1.3922, + "step": 86514 + }, + { + "epoch": 1.04, + "grad_norm": 10.791145953859468, + "learning_rate": 9.832543456325253e-06, + "loss": 1.4525, + "step": 86517 + }, + { + "epoch": 1.04, + "grad_norm": 5.890903284551094, + "learning_rate": 9.831959360485614e-06, + "loss": 1.4862, + "step": 86520 + }, + { + "epoch": 1.04, + "grad_norm": 16.263418082449522, + "learning_rate": 9.831375265219442e-06, + "loss": 1.1356, + "step": 86523 + }, + { + "epoch": 1.04, + "grad_norm": 13.540359183337195, + "learning_rate": 9.830791170528725e-06, + "loss": 1.3818, + "step": 86526 + }, + { + "epoch": 1.04, + "grad_norm": 10.03131059578212, + "learning_rate": 9.830207076415454e-06, + "loss": 1.4064, + "step": 86529 + }, + { + "epoch": 1.04, + "grad_norm": 9.86568466122246, + "learning_rate": 9.829622982881623e-06, + "loss": 0.9224, + "step": 86532 + }, + { + "epoch": 1.04, + "grad_norm": 2.7170314837280554, + "learning_rate": 9.829038889929233e-06, + "loss": 1.0281, + "step": 86535 + }, + { + "epoch": 1.04, + "grad_norm": 3.8480994833106257, + "learning_rate": 9.82845479756027e-06, + "loss": 0.8916, + "step": 86538 + }, + { + "epoch": 1.04, + "grad_norm": 7.485524020208618, + "learning_rate": 9.827870705776728e-06, + "loss": 1.2801, + "step": 86541 + }, + { + "epoch": 1.04, + "grad_norm": 15.129347432265336, + "learning_rate": 9.8272866145806e-06, + "loss": 1.2581, + "step": 86544 + }, + { + "epoch": 1.04, + "grad_norm": 8.913211935986252, + "learning_rate": 9.826702523973882e-06, + "loss": 0.9969, + "step": 86547 + }, + { + "epoch": 1.04, + "grad_norm": 20.081243500941202, + "learning_rate": 9.826118433958564e-06, + "loss": 1.1584, + "step": 86550 + }, + { + "epoch": 1.04, + "grad_norm": 5.797775337419519, + "learning_rate": 9.825534344536646e-06, + "loss": 1.1316, + "step": 86553 + }, + { + "epoch": 1.04, + "grad_norm": 27.300807311522682, + "learning_rate": 9.824950255710112e-06, + "loss": 1.3522, + "step": 86556 + }, + { + "epoch": 1.04, + "grad_norm": 93.80779587610482, + "learning_rate": 9.82436616748096e-06, + "loss": 1.213, + "step": 86559 + }, + { + "epoch": 1.04, + "grad_norm": 13.193215423780234, + "learning_rate": 9.823782079851185e-06, + "loss": 1.1295, + "step": 86562 + }, + { + "epoch": 1.04, + "grad_norm": 7.833818726294701, + "learning_rate": 9.823197992822779e-06, + "loss": 1.2723, + "step": 86565 + }, + { + "epoch": 1.04, + "grad_norm": 10.599829492359424, + "learning_rate": 9.822613906397733e-06, + "loss": 1.4134, + "step": 86568 + }, + { + "epoch": 1.04, + "grad_norm": 9.581804337880884, + "learning_rate": 9.822029820578041e-06, + "loss": 1.45, + "step": 86571 + }, + { + "epoch": 1.04, + "grad_norm": 5.655386289533402, + "learning_rate": 9.821445735365697e-06, + "loss": 1.1032, + "step": 86574 + }, + { + "epoch": 1.04, + "grad_norm": 10.155015558214824, + "learning_rate": 9.820861650762695e-06, + "loss": 1.2297, + "step": 86577 + }, + { + "epoch": 1.04, + "grad_norm": 13.126882418452224, + "learning_rate": 9.820277566771031e-06, + "loss": 0.8218, + "step": 86580 + }, + { + "epoch": 1.04, + "grad_norm": 8.682377410094288, + "learning_rate": 9.81969348339269e-06, + "loss": 1.0226, + "step": 86583 + }, + { + "epoch": 1.04, + "grad_norm": 30.580139333122734, + "learning_rate": 9.81910940062967e-06, + "loss": 1.4887, + "step": 86586 + }, + { + "epoch": 1.04, + "grad_norm": 8.409870291076759, + "learning_rate": 9.818525318483967e-06, + "loss": 1.0655, + "step": 86589 + }, + { + "epoch": 1.04, + "grad_norm": 8.189250667572882, + "learning_rate": 9.817941236957576e-06, + "loss": 1.1109, + "step": 86592 + }, + { + "epoch": 1.04, + "grad_norm": 12.626380817381856, + "learning_rate": 9.817357156052479e-06, + "loss": 1.0689, + "step": 86595 + }, + { + "epoch": 1.04, + "grad_norm": 14.163476048968482, + "learning_rate": 9.816773075770677e-06, + "loss": 1.54, + "step": 86598 + }, + { + "epoch": 1.04, + "grad_norm": 8.841261397905027, + "learning_rate": 9.816188996114164e-06, + "loss": 1.0421, + "step": 86601 + }, + { + "epoch": 1.04, + "grad_norm": 7.547160258691588, + "learning_rate": 9.815604917084929e-06, + "loss": 0.7064, + "step": 86604 + }, + { + "epoch": 1.04, + "grad_norm": 10.58268153900303, + "learning_rate": 9.815020838684975e-06, + "loss": 1.2711, + "step": 86607 + }, + { + "epoch": 1.04, + "grad_norm": 5.014880718272636, + "learning_rate": 9.814436760916285e-06, + "loss": 1.1902, + "step": 86610 + }, + { + "epoch": 1.04, + "grad_norm": 10.708946779022936, + "learning_rate": 9.813852683780853e-06, + "loss": 1.5037, + "step": 86613 + }, + { + "epoch": 1.04, + "grad_norm": 14.98396648319474, + "learning_rate": 9.813268607280674e-06, + "loss": 1.2721, + "step": 86616 + }, + { + "epoch": 1.04, + "grad_norm": 5.46726508178486, + "learning_rate": 9.812684531417747e-06, + "loss": 1.1026, + "step": 86619 + }, + { + "epoch": 1.04, + "grad_norm": 7.466409851541535, + "learning_rate": 9.812100456194058e-06, + "loss": 1.2513, + "step": 86622 + }, + { + "epoch": 1.04, + "grad_norm": 14.883616471321005, + "learning_rate": 9.811516381611602e-06, + "loss": 1.2104, + "step": 86625 + }, + { + "epoch": 1.04, + "grad_norm": 8.510829851769294, + "learning_rate": 9.810932307672369e-06, + "loss": 0.8421, + "step": 86628 + }, + { + "epoch": 1.04, + "grad_norm": 17.35289183981192, + "learning_rate": 9.810348234378361e-06, + "loss": 1.0991, + "step": 86631 + }, + { + "epoch": 1.04, + "grad_norm": 4.725639733199752, + "learning_rate": 9.809764161731568e-06, + "loss": 1.3313, + "step": 86634 + }, + { + "epoch": 1.04, + "grad_norm": 16.184634739475985, + "learning_rate": 9.809180089733975e-06, + "loss": 1.1955, + "step": 86637 + }, + { + "epoch": 1.04, + "grad_norm": 23.953352071417186, + "learning_rate": 9.808596018387585e-06, + "loss": 1.5186, + "step": 86640 + }, + { + "epoch": 1.04, + "grad_norm": 16.090696718234586, + "learning_rate": 9.808011947694387e-06, + "loss": 1.3419, + "step": 86643 + }, + { + "epoch": 1.04, + "grad_norm": 5.935745319672721, + "learning_rate": 9.807427877656377e-06, + "loss": 1.3914, + "step": 86646 + }, + { + "epoch": 1.04, + "grad_norm": 7.693673262262596, + "learning_rate": 9.806843808275545e-06, + "loss": 1.2735, + "step": 86649 + }, + { + "epoch": 1.04, + "grad_norm": 15.514397617740824, + "learning_rate": 9.806259739553886e-06, + "loss": 1.2027, + "step": 86652 + }, + { + "epoch": 1.04, + "grad_norm": 12.885075314579957, + "learning_rate": 9.80567567149339e-06, + "loss": 1.2262, + "step": 86655 + }, + { + "epoch": 1.04, + "grad_norm": 8.488464839377517, + "learning_rate": 9.805091604096054e-06, + "loss": 1.5086, + "step": 86658 + }, + { + "epoch": 1.04, + "grad_norm": 5.794875924604411, + "learning_rate": 9.804507537363875e-06, + "loss": 1.2498, + "step": 86661 + }, + { + "epoch": 1.04, + "grad_norm": 13.027842180959466, + "learning_rate": 9.803923471298838e-06, + "loss": 0.9562, + "step": 86664 + }, + { + "epoch": 1.04, + "grad_norm": 18.449289842961416, + "learning_rate": 9.803339405902937e-06, + "loss": 1.1264, + "step": 86667 + }, + { + "epoch": 1.04, + "grad_norm": 12.040471365212253, + "learning_rate": 9.802755341178168e-06, + "loss": 1.3109, + "step": 86670 + }, + { + "epoch": 1.04, + "grad_norm": 2.6977941274813757, + "learning_rate": 9.802171277126529e-06, + "loss": 0.9966, + "step": 86673 + }, + { + "epoch": 1.04, + "grad_norm": 6.1736851351635975, + "learning_rate": 9.801587213750005e-06, + "loss": 0.9977, + "step": 86676 + }, + { + "epoch": 1.04, + "grad_norm": 7.8832933583128195, + "learning_rate": 9.80100315105059e-06, + "loss": 0.8706, + "step": 86679 + }, + { + "epoch": 1.04, + "grad_norm": 17.696024184757615, + "learning_rate": 9.800419089030282e-06, + "loss": 1.5599, + "step": 86682 + }, + { + "epoch": 1.04, + "grad_norm": 10.79383512770612, + "learning_rate": 9.799835027691072e-06, + "loss": 1.4067, + "step": 86685 + }, + { + "epoch": 1.04, + "grad_norm": 5.188980704235686, + "learning_rate": 9.799250967034955e-06, + "loss": 0.8915, + "step": 86688 + }, + { + "epoch": 1.04, + "grad_norm": 16.055369806988487, + "learning_rate": 9.798666907063919e-06, + "loss": 1.3965, + "step": 86691 + }, + { + "epoch": 1.04, + "grad_norm": 3.259912069754453, + "learning_rate": 9.798082847779962e-06, + "loss": 1.333, + "step": 86694 + }, + { + "epoch": 1.04, + "grad_norm": 46.13916049467129, + "learning_rate": 9.797498789185073e-06, + "loss": 1.2698, + "step": 86697 + }, + { + "epoch": 1.04, + "grad_norm": 15.13438437799071, + "learning_rate": 9.796914731281253e-06, + "loss": 1.3023, + "step": 86700 + }, + { + "epoch": 1.04, + "grad_norm": 5.4316537528467, + "learning_rate": 9.796330674070483e-06, + "loss": 1.0502, + "step": 86703 + }, + { + "epoch": 1.04, + "grad_norm": 7.952060290598965, + "learning_rate": 9.795746617554769e-06, + "loss": 1.246, + "step": 86706 + }, + { + "epoch": 1.04, + "grad_norm": 6.82677734037209, + "learning_rate": 9.795162561736093e-06, + "loss": 1.415, + "step": 86709 + }, + { + "epoch": 1.04, + "grad_norm": 11.61238201633441, + "learning_rate": 9.794578506616454e-06, + "loss": 1.3371, + "step": 86712 + }, + { + "epoch": 1.04, + "grad_norm": 14.156892757743933, + "learning_rate": 9.79399445219785e-06, + "loss": 1.1429, + "step": 86715 + }, + { + "epoch": 1.04, + "grad_norm": 15.691946253354981, + "learning_rate": 9.793410398482265e-06, + "loss": 1.3134, + "step": 86718 + }, + { + "epoch": 1.04, + "grad_norm": 2.6466323127915117, + "learning_rate": 9.792826345471695e-06, + "loss": 0.7878, + "step": 86721 + }, + { + "epoch": 1.04, + "grad_norm": 17.799962880224555, + "learning_rate": 9.792242293168134e-06, + "loss": 0.9, + "step": 86724 + }, + { + "epoch": 1.04, + "grad_norm": 8.334285043208425, + "learning_rate": 9.791658241573578e-06, + "loss": 1.1681, + "step": 86727 + }, + { + "epoch": 1.04, + "grad_norm": 37.08159688207101, + "learning_rate": 9.791074190690016e-06, + "loss": 1.3778, + "step": 86730 + }, + { + "epoch": 1.04, + "grad_norm": 6.786960430838977, + "learning_rate": 9.790490140519442e-06, + "loss": 1.3069, + "step": 86733 + }, + { + "epoch": 1.04, + "grad_norm": 2.8813175619453784, + "learning_rate": 9.789906091063847e-06, + "loss": 1.1026, + "step": 86736 + }, + { + "epoch": 1.04, + "grad_norm": 37.70532102299736, + "learning_rate": 9.789322042325232e-06, + "loss": 1.0004, + "step": 86739 + }, + { + "epoch": 1.04, + "grad_norm": 11.73426316167802, + "learning_rate": 9.788737994305584e-06, + "loss": 1.5475, + "step": 86742 + }, + { + "epoch": 1.04, + "grad_norm": 13.740569095014141, + "learning_rate": 9.788153947006896e-06, + "loss": 1.2361, + "step": 86745 + }, + { + "epoch": 1.04, + "grad_norm": 6.547715864855941, + "learning_rate": 9.787569900431162e-06, + "loss": 1.4384, + "step": 86748 + }, + { + "epoch": 1.04, + "grad_norm": 32.78678727838862, + "learning_rate": 9.786985854580374e-06, + "loss": 1.5904, + "step": 86751 + }, + { + "epoch": 1.04, + "grad_norm": 3.600750447271652, + "learning_rate": 9.786401809456532e-06, + "loss": 1.0505, + "step": 86754 + }, + { + "epoch": 1.04, + "grad_norm": 7.296087394284862, + "learning_rate": 9.785817765061619e-06, + "loss": 0.8349, + "step": 86757 + }, + { + "epoch": 1.04, + "grad_norm": 9.89505234843679, + "learning_rate": 9.785233721397633e-06, + "loss": 1.2665, + "step": 86760 + }, + { + "epoch": 1.04, + "grad_norm": 4.385109698272467, + "learning_rate": 9.784649678466566e-06, + "loss": 0.9622, + "step": 86763 + }, + { + "epoch": 1.04, + "grad_norm": 5.968747073743874, + "learning_rate": 9.784065636270412e-06, + "loss": 1.376, + "step": 86766 + }, + { + "epoch": 1.04, + "grad_norm": 11.588734138733255, + "learning_rate": 9.783481594811169e-06, + "loss": 0.9581, + "step": 86769 + }, + { + "epoch": 1.04, + "grad_norm": 10.273903969518889, + "learning_rate": 9.782897554090822e-06, + "loss": 1.0936, + "step": 86772 + }, + { + "epoch": 1.04, + "grad_norm": 20.927736927484844, + "learning_rate": 9.782313514111367e-06, + "loss": 1.0141, + "step": 86775 + }, + { + "epoch": 1.04, + "grad_norm": 8.687566336765343, + "learning_rate": 9.781729474874798e-06, + "loss": 1.0439, + "step": 86778 + }, + { + "epoch": 1.04, + "grad_norm": 5.548043343558296, + "learning_rate": 9.781145436383109e-06, + "loss": 1.1555, + "step": 86781 + }, + { + "epoch": 1.04, + "grad_norm": 8.406009782121734, + "learning_rate": 9.780561398638292e-06, + "loss": 1.1419, + "step": 86784 + }, + { + "epoch": 1.04, + "grad_norm": 5.262093540375779, + "learning_rate": 9.779977361642336e-06, + "loss": 1.2992, + "step": 86787 + }, + { + "epoch": 1.04, + "grad_norm": 4.264514382294244, + "learning_rate": 9.779393325397242e-06, + "loss": 1.1049, + "step": 86790 + }, + { + "epoch": 1.04, + "grad_norm": 61.630480684928095, + "learning_rate": 9.778809289904995e-06, + "loss": 1.2434, + "step": 86793 + }, + { + "epoch": 1.04, + "grad_norm": 4.178834855878112, + "learning_rate": 9.778225255167599e-06, + "loss": 1.092, + "step": 86796 + }, + { + "epoch": 1.04, + "grad_norm": 6.605161266679774, + "learning_rate": 9.777641221187035e-06, + "loss": 1.0951, + "step": 86799 + }, + { + "epoch": 1.04, + "grad_norm": 6.100041757763016, + "learning_rate": 9.777057187965304e-06, + "loss": 1.3157, + "step": 86802 + }, + { + "epoch": 1.04, + "grad_norm": 7.261154354827531, + "learning_rate": 9.776473155504392e-06, + "loss": 1.2653, + "step": 86805 + }, + { + "epoch": 1.04, + "grad_norm": 13.508191468171866, + "learning_rate": 9.775889123806304e-06, + "loss": 1.5031, + "step": 86808 + }, + { + "epoch": 1.04, + "grad_norm": 3.8130135816139217, + "learning_rate": 9.77530509287302e-06, + "loss": 1.0999, + "step": 86811 + }, + { + "epoch": 1.04, + "grad_norm": 19.72450465859233, + "learning_rate": 9.77472106270654e-06, + "loss": 1.1222, + "step": 86814 + }, + { + "epoch": 1.04, + "grad_norm": 9.370700846145608, + "learning_rate": 9.774137033308855e-06, + "loss": 1.6366, + "step": 86817 + }, + { + "epoch": 1.04, + "grad_norm": 10.270122156897735, + "learning_rate": 9.773553004681958e-06, + "loss": 1.5775, + "step": 86820 + }, + { + "epoch": 1.04, + "grad_norm": 11.492123914595174, + "learning_rate": 9.772968976827848e-06, + "loss": 0.8913, + "step": 86823 + }, + { + "epoch": 1.04, + "grad_norm": 16.62994802108245, + "learning_rate": 9.77238494974851e-06, + "loss": 1.1091, + "step": 86826 + }, + { + "epoch": 1.04, + "grad_norm": 9.717280294992236, + "learning_rate": 9.77180092344594e-06, + "loss": 1.1819, + "step": 86829 + }, + { + "epoch": 1.04, + "grad_norm": 7.899223761577368, + "learning_rate": 9.771216897922132e-06, + "loss": 1.1937, + "step": 86832 + }, + { + "epoch": 1.04, + "grad_norm": 8.589342649153135, + "learning_rate": 9.770632873179079e-06, + "loss": 1.4694, + "step": 86835 + }, + { + "epoch": 1.04, + "grad_norm": 4.7977051491106, + "learning_rate": 9.770048849218773e-06, + "loss": 1.2581, + "step": 86838 + }, + { + "epoch": 1.04, + "grad_norm": 6.050793545594052, + "learning_rate": 9.769464826043205e-06, + "loss": 1.2397, + "step": 86841 + }, + { + "epoch": 1.04, + "grad_norm": 74.5340713478652, + "learning_rate": 9.768880803654373e-06, + "loss": 1.0481, + "step": 86844 + }, + { + "epoch": 1.04, + "grad_norm": 10.466355625018883, + "learning_rate": 9.768296782054267e-06, + "loss": 1.1724, + "step": 86847 + }, + { + "epoch": 1.04, + "grad_norm": 40.39795420890514, + "learning_rate": 9.767712761244882e-06, + "loss": 1.1604, + "step": 86850 + }, + { + "epoch": 1.04, + "grad_norm": 7.350104204817819, + "learning_rate": 9.767128741228207e-06, + "loss": 1.012, + "step": 86853 + }, + { + "epoch": 1.04, + "grad_norm": 13.936810281489945, + "learning_rate": 9.76654472200624e-06, + "loss": 1.3149, + "step": 86856 + }, + { + "epoch": 1.04, + "grad_norm": 10.746478735409472, + "learning_rate": 9.765960703580968e-06, + "loss": 0.9655, + "step": 86859 + }, + { + "epoch": 1.04, + "grad_norm": 5.475840931039299, + "learning_rate": 9.765376685954395e-06, + "loss": 1.3852, + "step": 86862 + }, + { + "epoch": 1.04, + "grad_norm": 6.6183825880821825, + "learning_rate": 9.7647926691285e-06, + "loss": 1.3658, + "step": 86865 + }, + { + "epoch": 1.04, + "grad_norm": 9.970544252157106, + "learning_rate": 9.764208653105287e-06, + "loss": 1.2955, + "step": 86868 + }, + { + "epoch": 1.04, + "grad_norm": 6.828696515066499, + "learning_rate": 9.763624637886741e-06, + "loss": 1.3774, + "step": 86871 + }, + { + "epoch": 1.04, + "grad_norm": 4.613056817231557, + "learning_rate": 9.763040623474863e-06, + "loss": 1.3756, + "step": 86874 + }, + { + "epoch": 1.04, + "grad_norm": 9.504987819525041, + "learning_rate": 9.762456609871638e-06, + "loss": 1.6754, + "step": 86877 + }, + { + "epoch": 1.04, + "grad_norm": 3.5714403739737217, + "learning_rate": 9.761872597079068e-06, + "loss": 0.9452, + "step": 86880 + }, + { + "epoch": 1.04, + "grad_norm": 6.671705004062291, + "learning_rate": 9.761288585099136e-06, + "loss": 0.9992, + "step": 86883 + }, + { + "epoch": 1.04, + "grad_norm": 5.913310160545605, + "learning_rate": 9.760704573933844e-06, + "loss": 1.0346, + "step": 86886 + }, + { + "epoch": 1.04, + "grad_norm": 33.723696765533774, + "learning_rate": 9.760120563585182e-06, + "loss": 1.3464, + "step": 86889 + }, + { + "epoch": 1.04, + "grad_norm": 22.985220355002543, + "learning_rate": 9.759536554055137e-06, + "loss": 1.3128, + "step": 86892 + }, + { + "epoch": 1.04, + "grad_norm": 38.54422205971087, + "learning_rate": 9.758952545345708e-06, + "loss": 0.9421, + "step": 86895 + }, + { + "epoch": 1.04, + "grad_norm": 18.261981814836112, + "learning_rate": 9.75836853745889e-06, + "loss": 1.3916, + "step": 86898 + }, + { + "epoch": 1.04, + "grad_norm": 11.083251540319216, + "learning_rate": 9.75778453039667e-06, + "loss": 1.5365, + "step": 86901 + }, + { + "epoch": 1.04, + "grad_norm": 8.086982384679184, + "learning_rate": 9.757200524161049e-06, + "loss": 1.4793, + "step": 86904 + }, + { + "epoch": 1.05, + "grad_norm": 10.074210960231678, + "learning_rate": 9.75661651875401e-06, + "loss": 1.1043, + "step": 86907 + }, + { + "epoch": 1.05, + "grad_norm": 7.410750594113653, + "learning_rate": 9.756032514177554e-06, + "loss": 0.9639, + "step": 86910 + }, + { + "epoch": 1.05, + "grad_norm": 5.8259279105939346, + "learning_rate": 9.75544851043367e-06, + "loss": 1.1467, + "step": 86913 + }, + { + "epoch": 1.05, + "grad_norm": 11.086753018627066, + "learning_rate": 9.754864507524355e-06, + "loss": 1.4701, + "step": 86916 + }, + { + "epoch": 1.05, + "grad_norm": 4.489795597898434, + "learning_rate": 9.754280505451596e-06, + "loss": 1.442, + "step": 86919 + }, + { + "epoch": 1.05, + "grad_norm": 4.0229955775237025, + "learning_rate": 9.75369650421739e-06, + "loss": 1.3158, + "step": 86922 + }, + { + "epoch": 1.05, + "grad_norm": 2.905241712275011, + "learning_rate": 9.753112503823728e-06, + "loss": 1.111, + "step": 86925 + }, + { + "epoch": 1.05, + "grad_norm": 11.249453342962932, + "learning_rate": 9.752528504272605e-06, + "loss": 1.1455, + "step": 86928 + }, + { + "epoch": 1.05, + "grad_norm": 17.230640564559824, + "learning_rate": 9.751944505566018e-06, + "loss": 1.1843, + "step": 86931 + }, + { + "epoch": 1.05, + "grad_norm": 42.65552661997684, + "learning_rate": 9.751360507705948e-06, + "loss": 1.2081, + "step": 86934 + }, + { + "epoch": 1.05, + "grad_norm": 16.13490870732242, + "learning_rate": 9.750776510694396e-06, + "loss": 1.2877, + "step": 86937 + }, + { + "epoch": 1.05, + "grad_norm": 3.586103310911347, + "learning_rate": 9.750192514533357e-06, + "loss": 0.9797, + "step": 86940 + }, + { + "epoch": 1.05, + "grad_norm": 9.554567683156344, + "learning_rate": 9.749608519224821e-06, + "loss": 1.8012, + "step": 86943 + }, + { + "epoch": 1.05, + "grad_norm": 10.96674739135773, + "learning_rate": 9.749024524770779e-06, + "loss": 1.198, + "step": 86946 + }, + { + "epoch": 1.05, + "grad_norm": 14.859476326493656, + "learning_rate": 9.748440531173224e-06, + "loss": 1.1635, + "step": 86949 + }, + { + "epoch": 1.05, + "grad_norm": 15.409628882648324, + "learning_rate": 9.747856538434153e-06, + "loss": 1.3116, + "step": 86952 + }, + { + "epoch": 1.05, + "grad_norm": 4.615383758865233, + "learning_rate": 9.747272546555555e-06, + "loss": 1.5498, + "step": 86955 + }, + { + "epoch": 1.05, + "grad_norm": 10.86211195980153, + "learning_rate": 9.74668855553943e-06, + "loss": 1.0666, + "step": 86958 + }, + { + "epoch": 1.05, + "grad_norm": 11.983544924953287, + "learning_rate": 9.74610456538776e-06, + "loss": 1.1035, + "step": 86961 + }, + { + "epoch": 1.05, + "grad_norm": 4.4247726210136085, + "learning_rate": 9.745520576102547e-06, + "loss": 1.2892, + "step": 86964 + }, + { + "epoch": 1.05, + "grad_norm": 87.28553779897547, + "learning_rate": 9.744936587685778e-06, + "loss": 1.3851, + "step": 86967 + }, + { + "epoch": 1.05, + "grad_norm": 9.452679291984662, + "learning_rate": 9.744352600139453e-06, + "loss": 1.1122, + "step": 86970 + }, + { + "epoch": 1.05, + "grad_norm": 9.425961379812533, + "learning_rate": 9.743768613465556e-06, + "loss": 1.384, + "step": 86973 + }, + { + "epoch": 1.05, + "grad_norm": 15.379907683758345, + "learning_rate": 9.743184627666088e-06, + "loss": 1.4008, + "step": 86976 + }, + { + "epoch": 1.05, + "grad_norm": 6.5582811034181505, + "learning_rate": 9.742600642743034e-06, + "loss": 1.4833, + "step": 86979 + }, + { + "epoch": 1.05, + "grad_norm": 8.829484889616294, + "learning_rate": 9.742016658698395e-06, + "loss": 1.6021, + "step": 86982 + }, + { + "epoch": 1.05, + "grad_norm": 4.467799104903766, + "learning_rate": 9.741432675534161e-06, + "loss": 1.1985, + "step": 86985 + }, + { + "epoch": 1.05, + "grad_norm": 26.871468013946973, + "learning_rate": 9.740848693252321e-06, + "loss": 1.4994, + "step": 86988 + }, + { + "epoch": 1.05, + "grad_norm": 9.404100075349644, + "learning_rate": 9.74026471185487e-06, + "loss": 1.7725, + "step": 86991 + }, + { + "epoch": 1.05, + "grad_norm": 11.27733870341892, + "learning_rate": 9.739680731343805e-06, + "loss": 1.0976, + "step": 86994 + }, + { + "epoch": 1.05, + "grad_norm": 7.90367308459963, + "learning_rate": 9.739096751721118e-06, + "loss": 1.4658, + "step": 86997 + }, + { + "epoch": 1.05, + "grad_norm": 3.3043517003388105, + "learning_rate": 9.738512772988796e-06, + "loss": 1.3169, + "step": 87000 + }, + { + "epoch": 1.05, + "grad_norm": 11.13491455005935, + "learning_rate": 9.737928795148834e-06, + "loss": 1.2252, + "step": 87003 + }, + { + "epoch": 1.05, + "grad_norm": 7.275978144095964, + "learning_rate": 9.73734481820323e-06, + "loss": 0.9844, + "step": 87006 + }, + { + "epoch": 1.05, + "grad_norm": 5.972020324216951, + "learning_rate": 9.736760842153972e-06, + "loss": 1.5522, + "step": 87009 + }, + { + "epoch": 1.05, + "grad_norm": 16.578179228556543, + "learning_rate": 9.736176867003058e-06, + "loss": 1.3569, + "step": 87012 + }, + { + "epoch": 1.05, + "grad_norm": 178.57091052800465, + "learning_rate": 9.735592892752472e-06, + "loss": 1.1317, + "step": 87015 + }, + { + "epoch": 1.05, + "grad_norm": 17.037109834849083, + "learning_rate": 9.735008919404216e-06, + "loss": 1.3366, + "step": 87018 + }, + { + "epoch": 1.05, + "grad_norm": 2.4766527315996676, + "learning_rate": 9.734424946960275e-06, + "loss": 1.0471, + "step": 87021 + }, + { + "epoch": 1.05, + "grad_norm": 6.195507187373478, + "learning_rate": 9.733840975422653e-06, + "loss": 1.2697, + "step": 87024 + }, + { + "epoch": 1.05, + "grad_norm": 13.558537070017737, + "learning_rate": 9.733257004793332e-06, + "loss": 1.1528, + "step": 87027 + }, + { + "epoch": 1.05, + "grad_norm": 7.977510621122991, + "learning_rate": 9.732673035074307e-06, + "loss": 1.2996, + "step": 87030 + }, + { + "epoch": 1.05, + "grad_norm": 6.360159844882094, + "learning_rate": 9.732089066267571e-06, + "loss": 1.1554, + "step": 87033 + }, + { + "epoch": 1.05, + "grad_norm": 79.84610070517478, + "learning_rate": 9.731505098375123e-06, + "loss": 0.9719, + "step": 87036 + }, + { + "epoch": 1.05, + "grad_norm": 9.754759115313025, + "learning_rate": 9.730921131398953e-06, + "loss": 1.1622, + "step": 87039 + }, + { + "epoch": 1.05, + "grad_norm": 4.448126588683539, + "learning_rate": 9.730337165341047e-06, + "loss": 1.2272, + "step": 87042 + }, + { + "epoch": 1.05, + "grad_norm": 20.58084647115098, + "learning_rate": 9.729753200203404e-06, + "loss": 1.2911, + "step": 87045 + }, + { + "epoch": 1.05, + "grad_norm": 21.253204297428887, + "learning_rate": 9.72916923598802e-06, + "loss": 0.9831, + "step": 87048 + }, + { + "epoch": 1.05, + "grad_norm": 10.779319064581745, + "learning_rate": 9.728585272696881e-06, + "loss": 1.1201, + "step": 87051 + }, + { + "epoch": 1.05, + "grad_norm": 34.17730432024248, + "learning_rate": 9.728001310331982e-06, + "loss": 0.8891, + "step": 87054 + }, + { + "epoch": 1.05, + "grad_norm": 4.585602837601101, + "learning_rate": 9.727417348895316e-06, + "loss": 1.235, + "step": 87057 + }, + { + "epoch": 1.05, + "grad_norm": 6.9802987410759645, + "learning_rate": 9.726833388388879e-06, + "loss": 1.2535, + "step": 87060 + }, + { + "epoch": 1.05, + "grad_norm": 6.297523215458904, + "learning_rate": 9.726249428814659e-06, + "loss": 1.218, + "step": 87063 + }, + { + "epoch": 1.05, + "grad_norm": 13.951932094533005, + "learning_rate": 9.725665470174655e-06, + "loss": 1.312, + "step": 87066 + }, + { + "epoch": 1.05, + "grad_norm": 4.299319099350837, + "learning_rate": 9.725081512470853e-06, + "loss": 1.221, + "step": 87069 + }, + { + "epoch": 1.05, + "grad_norm": 9.248841556621851, + "learning_rate": 9.724497555705248e-06, + "loss": 1.043, + "step": 87072 + }, + { + "epoch": 1.05, + "grad_norm": 23.12843702191691, + "learning_rate": 9.723913599879834e-06, + "loss": 1.5314, + "step": 87075 + }, + { + "epoch": 1.05, + "grad_norm": 3.9984165640928464, + "learning_rate": 9.723329644996606e-06, + "loss": 1.2619, + "step": 87078 + }, + { + "epoch": 1.05, + "grad_norm": 12.470911529859205, + "learning_rate": 9.722745691057552e-06, + "loss": 1.1939, + "step": 87081 + }, + { + "epoch": 1.05, + "grad_norm": 17.90142372212032, + "learning_rate": 9.722161738064666e-06, + "loss": 1.2324, + "step": 87084 + }, + { + "epoch": 1.05, + "grad_norm": 50.34487861237242, + "learning_rate": 9.721577786019942e-06, + "loss": 1.1406, + "step": 87087 + }, + { + "epoch": 1.05, + "grad_norm": 2.8109295907799057, + "learning_rate": 9.720993834925373e-06, + "loss": 1.225, + "step": 87090 + }, + { + "epoch": 1.05, + "grad_norm": 9.458487907703502, + "learning_rate": 9.720409884782957e-06, + "loss": 1.1546, + "step": 87093 + }, + { + "epoch": 1.05, + "grad_norm": 15.365827024393422, + "learning_rate": 9.719825935594675e-06, + "loss": 1.3258, + "step": 87096 + }, + { + "epoch": 1.05, + "grad_norm": 5.704436813001684, + "learning_rate": 9.719241987362526e-06, + "loss": 1.1285, + "step": 87099 + }, + { + "epoch": 1.05, + "grad_norm": 6.3743864112357285, + "learning_rate": 9.718658040088505e-06, + "loss": 1.2039, + "step": 87102 + }, + { + "epoch": 1.05, + "grad_norm": 8.997872123016238, + "learning_rate": 9.718074093774606e-06, + "loss": 1.1619, + "step": 87105 + }, + { + "epoch": 1.05, + "grad_norm": 3.1667528497668, + "learning_rate": 9.717490148422814e-06, + "loss": 1.1638, + "step": 87108 + }, + { + "epoch": 1.05, + "grad_norm": 5.796002869256192, + "learning_rate": 9.716906204035129e-06, + "loss": 1.3914, + "step": 87111 + }, + { + "epoch": 1.05, + "grad_norm": 6.8697775953216915, + "learning_rate": 9.716322260613538e-06, + "loss": 1.2934, + "step": 87114 + }, + { + "epoch": 1.05, + "grad_norm": 10.196072894103835, + "learning_rate": 9.715738318160037e-06, + "loss": 1.3419, + "step": 87117 + }, + { + "epoch": 1.05, + "grad_norm": 2.154264888501351, + "learning_rate": 9.715154376676623e-06, + "loss": 1.1415, + "step": 87120 + }, + { + "epoch": 1.05, + "grad_norm": 9.988490149164415, + "learning_rate": 9.714570436165284e-06, + "loss": 1.4875, + "step": 87123 + }, + { + "epoch": 1.05, + "grad_norm": 9.687531290998079, + "learning_rate": 9.713986496628008e-06, + "loss": 1.3075, + "step": 87126 + }, + { + "epoch": 1.05, + "grad_norm": 6.266044776923588, + "learning_rate": 9.713402558066796e-06, + "loss": 1.0906, + "step": 87129 + }, + { + "epoch": 1.05, + "grad_norm": 13.157512874786164, + "learning_rate": 9.71281862048364e-06, + "loss": 1.2986, + "step": 87132 + }, + { + "epoch": 1.05, + "grad_norm": 23.388759607438413, + "learning_rate": 9.71223468388053e-06, + "loss": 1.431, + "step": 87135 + }, + { + "epoch": 1.05, + "grad_norm": 18.277201574108613, + "learning_rate": 9.711650748259457e-06, + "loss": 1.2523, + "step": 87138 + }, + { + "epoch": 1.05, + "grad_norm": 10.616812331370193, + "learning_rate": 9.711066813622415e-06, + "loss": 1.2276, + "step": 87141 + }, + { + "epoch": 1.05, + "grad_norm": 8.423831236097158, + "learning_rate": 9.710482879971401e-06, + "loss": 1.007, + "step": 87144 + }, + { + "epoch": 1.05, + "grad_norm": 3.1148743157090903, + "learning_rate": 9.709898947308407e-06, + "loss": 1.3559, + "step": 87147 + }, + { + "epoch": 1.05, + "grad_norm": 20.259912249003488, + "learning_rate": 9.709315015635418e-06, + "loss": 1.3287, + "step": 87150 + }, + { + "epoch": 1.05, + "grad_norm": 16.17449622939204, + "learning_rate": 9.708731084954434e-06, + "loss": 1.2661, + "step": 87153 + }, + { + "epoch": 1.05, + "grad_norm": 10.598020617071137, + "learning_rate": 9.708147155267446e-06, + "loss": 1.1168, + "step": 87156 + }, + { + "epoch": 1.05, + "grad_norm": 76.42215478180985, + "learning_rate": 9.70756322657645e-06, + "loss": 0.9905, + "step": 87159 + }, + { + "epoch": 1.05, + "grad_norm": 40.30887994630278, + "learning_rate": 9.70697929888343e-06, + "loss": 1.3851, + "step": 87162 + }, + { + "epoch": 1.05, + "grad_norm": 5.884841600736092, + "learning_rate": 9.706395372190387e-06, + "loss": 1.2458, + "step": 87165 + }, + { + "epoch": 1.05, + "grad_norm": 27.322234435612614, + "learning_rate": 9.70581144649931e-06, + "loss": 1.141, + "step": 87168 + }, + { + "epoch": 1.05, + "grad_norm": 17.69110443942097, + "learning_rate": 9.705227521812191e-06, + "loss": 1.3787, + "step": 87171 + }, + { + "epoch": 1.05, + "grad_norm": 23.323638119816355, + "learning_rate": 9.70464359813103e-06, + "loss": 1.0431, + "step": 87174 + }, + { + "epoch": 1.05, + "grad_norm": 11.219979453102093, + "learning_rate": 9.70405967545781e-06, + "loss": 1.4378, + "step": 87177 + }, + { + "epoch": 1.05, + "grad_norm": 5.868345194184566, + "learning_rate": 9.703475753794526e-06, + "loss": 1.1805, + "step": 87180 + }, + { + "epoch": 1.05, + "grad_norm": 4.098222381983178, + "learning_rate": 9.702891833143173e-06, + "loss": 1.0511, + "step": 87183 + }, + { + "epoch": 1.05, + "grad_norm": 16.14358610859366, + "learning_rate": 9.702307913505748e-06, + "loss": 1.0096, + "step": 87186 + }, + { + "epoch": 1.05, + "grad_norm": 3.981949199908855, + "learning_rate": 9.701723994884236e-06, + "loss": 1.2855, + "step": 87189 + }, + { + "epoch": 1.05, + "grad_norm": 7.248479648344746, + "learning_rate": 9.70114007728063e-06, + "loss": 1.1386, + "step": 87192 + }, + { + "epoch": 1.05, + "grad_norm": 14.95094919736981, + "learning_rate": 9.700556160696925e-06, + "loss": 0.7871, + "step": 87195 + }, + { + "epoch": 1.05, + "grad_norm": 14.112181257435063, + "learning_rate": 9.699972245135116e-06, + "loss": 0.9397, + "step": 87198 + }, + { + "epoch": 1.05, + "grad_norm": 6.658625169727604, + "learning_rate": 9.699388330597198e-06, + "loss": 1.13, + "step": 87201 + }, + { + "epoch": 1.05, + "grad_norm": 15.10541139217331, + "learning_rate": 9.698804417085153e-06, + "loss": 1.0446, + "step": 87204 + }, + { + "epoch": 1.05, + "grad_norm": 7.386855039470322, + "learning_rate": 9.698220504600982e-06, + "loss": 1.0325, + "step": 87207 + }, + { + "epoch": 1.05, + "grad_norm": 2.8742087744486637, + "learning_rate": 9.697636593146675e-06, + "loss": 1.0033, + "step": 87210 + }, + { + "epoch": 1.05, + "grad_norm": 19.600397616964138, + "learning_rate": 9.69705268272423e-06, + "loss": 1.1487, + "step": 87213 + }, + { + "epoch": 1.05, + "grad_norm": 11.256861790923088, + "learning_rate": 9.69646877333563e-06, + "loss": 1.6413, + "step": 87216 + }, + { + "epoch": 1.05, + "grad_norm": 20.738416965927065, + "learning_rate": 9.695884864982874e-06, + "loss": 1.0609, + "step": 87219 + }, + { + "epoch": 1.05, + "grad_norm": 4.105030936088687, + "learning_rate": 9.69530095766795e-06, + "loss": 1.0754, + "step": 87222 + }, + { + "epoch": 1.05, + "grad_norm": 10.538107691563253, + "learning_rate": 9.694717051392857e-06, + "loss": 1.246, + "step": 87225 + }, + { + "epoch": 1.05, + "grad_norm": 10.668153227729427, + "learning_rate": 9.694133146159588e-06, + "loss": 1.0603, + "step": 87228 + }, + { + "epoch": 1.05, + "grad_norm": 7.279822934775569, + "learning_rate": 9.693549241970132e-06, + "loss": 1.2274, + "step": 87231 + }, + { + "epoch": 1.05, + "grad_norm": 10.232778036202149, + "learning_rate": 9.692965338826478e-06, + "loss": 1.4192, + "step": 87234 + }, + { + "epoch": 1.05, + "grad_norm": 4.403967325273754, + "learning_rate": 9.692381436730622e-06, + "loss": 1.3242, + "step": 87237 + }, + { + "epoch": 1.05, + "grad_norm": 7.4102284326476555, + "learning_rate": 9.691797535684562e-06, + "loss": 1.1914, + "step": 87240 + }, + { + "epoch": 1.05, + "grad_norm": 11.804714405948012, + "learning_rate": 9.691213635690285e-06, + "loss": 0.9777, + "step": 87243 + }, + { + "epoch": 1.05, + "grad_norm": 32.747230580463786, + "learning_rate": 9.690629736749782e-06, + "loss": 1.3421, + "step": 87246 + }, + { + "epoch": 1.05, + "grad_norm": 14.296312619012133, + "learning_rate": 9.690045838865051e-06, + "loss": 1.0349, + "step": 87249 + }, + { + "epoch": 1.05, + "grad_norm": 6.225161772698656, + "learning_rate": 9.689461942038078e-06, + "loss": 1.3769, + "step": 87252 + }, + { + "epoch": 1.05, + "grad_norm": 21.131755897942924, + "learning_rate": 9.688878046270865e-06, + "loss": 1.2112, + "step": 87255 + }, + { + "epoch": 1.05, + "grad_norm": 10.03795364922722, + "learning_rate": 9.688294151565396e-06, + "loss": 1.1893, + "step": 87258 + }, + { + "epoch": 1.05, + "grad_norm": 24.068888259018692, + "learning_rate": 9.687710257923666e-06, + "loss": 1.1543, + "step": 87261 + }, + { + "epoch": 1.05, + "grad_norm": 12.464490021892152, + "learning_rate": 9.68712636534767e-06, + "loss": 1.2606, + "step": 87264 + }, + { + "epoch": 1.05, + "grad_norm": 12.156116402964047, + "learning_rate": 9.686542473839402e-06, + "loss": 1.5375, + "step": 87267 + }, + { + "epoch": 1.05, + "grad_norm": 10.171352998303652, + "learning_rate": 9.685958583400846e-06, + "loss": 1.13, + "step": 87270 + }, + { + "epoch": 1.05, + "grad_norm": 17.426988992388825, + "learning_rate": 9.685374694034002e-06, + "loss": 1.2374, + "step": 87273 + }, + { + "epoch": 1.05, + "grad_norm": 3.6924044605349726, + "learning_rate": 9.68479080574086e-06, + "loss": 1.0305, + "step": 87276 + }, + { + "epoch": 1.05, + "grad_norm": 12.623824347255841, + "learning_rate": 9.684206918523413e-06, + "loss": 1.597, + "step": 87279 + }, + { + "epoch": 1.05, + "grad_norm": 7.034511938402267, + "learning_rate": 9.683623032383658e-06, + "loss": 1.3753, + "step": 87282 + }, + { + "epoch": 1.05, + "grad_norm": 8.629933353749331, + "learning_rate": 9.683039147323584e-06, + "loss": 0.98, + "step": 87285 + }, + { + "epoch": 1.05, + "grad_norm": 14.632770504599737, + "learning_rate": 9.682455263345179e-06, + "loss": 1.5046, + "step": 87288 + }, + { + "epoch": 1.05, + "grad_norm": 14.0974185333478, + "learning_rate": 9.68187138045044e-06, + "loss": 1.4792, + "step": 87291 + }, + { + "epoch": 1.05, + "grad_norm": 7.930134828841702, + "learning_rate": 9.681287498641363e-06, + "loss": 1.0876, + "step": 87294 + }, + { + "epoch": 1.05, + "grad_norm": 12.781193475023091, + "learning_rate": 9.680703617919934e-06, + "loss": 1.3227, + "step": 87297 + }, + { + "epoch": 1.05, + "grad_norm": 6.88163194985256, + "learning_rate": 9.680119738288148e-06, + "loss": 0.9459, + "step": 87300 + }, + { + "epoch": 1.05, + "grad_norm": 30.684065593715626, + "learning_rate": 9.679535859748001e-06, + "loss": 1.0786, + "step": 87303 + }, + { + "epoch": 1.05, + "grad_norm": 27.78895232270435, + "learning_rate": 9.678951982301479e-06, + "loss": 1.1123, + "step": 87306 + }, + { + "epoch": 1.05, + "grad_norm": 5.535276332382793, + "learning_rate": 9.678368105950584e-06, + "loss": 1.0247, + "step": 87309 + }, + { + "epoch": 1.05, + "grad_norm": 8.289981936015929, + "learning_rate": 9.677784230697298e-06, + "loss": 0.983, + "step": 87312 + }, + { + "epoch": 1.05, + "grad_norm": 2.935545727234428, + "learning_rate": 9.67720035654362e-06, + "loss": 0.9439, + "step": 87315 + }, + { + "epoch": 1.05, + "grad_norm": 10.66672926072368, + "learning_rate": 9.676616483491538e-06, + "loss": 1.3307, + "step": 87318 + }, + { + "epoch": 1.05, + "grad_norm": 7.590559198222894, + "learning_rate": 9.676032611543052e-06, + "loss": 1.2343, + "step": 87321 + }, + { + "epoch": 1.05, + "grad_norm": 6.473811212497985, + "learning_rate": 9.675448740700148e-06, + "loss": 1.1676, + "step": 87324 + }, + { + "epoch": 1.05, + "grad_norm": 65.0542051974952, + "learning_rate": 9.674864870964819e-06, + "loss": 1.4993, + "step": 87327 + }, + { + "epoch": 1.05, + "grad_norm": 8.14122131595592, + "learning_rate": 9.67428100233906e-06, + "loss": 1.6115, + "step": 87330 + }, + { + "epoch": 1.05, + "grad_norm": 9.4664491977448, + "learning_rate": 9.67369713482486e-06, + "loss": 1.2054, + "step": 87333 + }, + { + "epoch": 1.05, + "grad_norm": 38.17565754211461, + "learning_rate": 9.673113268424222e-06, + "loss": 1.2225, + "step": 87336 + }, + { + "epoch": 1.05, + "grad_norm": 10.97778424769896, + "learning_rate": 9.672529403139126e-06, + "loss": 1.5246, + "step": 87339 + }, + { + "epoch": 1.05, + "grad_norm": 9.793497973679944, + "learning_rate": 9.671945538971568e-06, + "loss": 1.1163, + "step": 87342 + }, + { + "epoch": 1.05, + "grad_norm": 16.46833952905425, + "learning_rate": 9.671361675923542e-06, + "loss": 1.2453, + "step": 87345 + }, + { + "epoch": 1.05, + "grad_norm": 19.71556770354049, + "learning_rate": 9.670777813997046e-06, + "loss": 1.4407, + "step": 87348 + }, + { + "epoch": 1.05, + "grad_norm": 40.778510208098105, + "learning_rate": 9.670193953194058e-06, + "loss": 1.48, + "step": 87351 + }, + { + "epoch": 1.05, + "grad_norm": 3.235740063273613, + "learning_rate": 9.669610093516583e-06, + "loss": 1.0038, + "step": 87354 + }, + { + "epoch": 1.05, + "grad_norm": 8.015152102267447, + "learning_rate": 9.66902623496661e-06, + "loss": 1.3328, + "step": 87357 + }, + { + "epoch": 1.05, + "grad_norm": 10.843118635005563, + "learning_rate": 9.66844237754613e-06, + "loss": 0.9827, + "step": 87360 + }, + { + "epoch": 1.05, + "grad_norm": 10.668117787515854, + "learning_rate": 9.667858521257141e-06, + "loss": 1.0995, + "step": 87363 + }, + { + "epoch": 1.05, + "grad_norm": 13.282001955639995, + "learning_rate": 9.667274666101626e-06, + "loss": 1.3054, + "step": 87366 + }, + { + "epoch": 1.05, + "grad_norm": 14.550177687655104, + "learning_rate": 9.666690812081586e-06, + "loss": 1.1812, + "step": 87369 + }, + { + "epoch": 1.05, + "grad_norm": 8.211901622904858, + "learning_rate": 9.666106959199007e-06, + "loss": 0.9193, + "step": 87372 + }, + { + "epoch": 1.05, + "grad_norm": 33.33722201536033, + "learning_rate": 9.66552310745589e-06, + "loss": 1.3908, + "step": 87375 + }, + { + "epoch": 1.05, + "grad_norm": 23.482734718969873, + "learning_rate": 9.664939256854216e-06, + "loss": 1.5799, + "step": 87378 + }, + { + "epoch": 1.05, + "grad_norm": 5.762979687967724, + "learning_rate": 9.664355407395987e-06, + "loss": 0.9878, + "step": 87381 + }, + { + "epoch": 1.05, + "grad_norm": 2.940838170708348, + "learning_rate": 9.66377155908319e-06, + "loss": 1.1147, + "step": 87384 + }, + { + "epoch": 1.05, + "grad_norm": 26.27875011675324, + "learning_rate": 9.663187711917821e-06, + "loss": 1.2509, + "step": 87387 + }, + { + "epoch": 1.05, + "grad_norm": 33.328612188601205, + "learning_rate": 9.662603865901873e-06, + "loss": 1.4033, + "step": 87390 + }, + { + "epoch": 1.05, + "grad_norm": 4.121138805012598, + "learning_rate": 9.662020021037335e-06, + "loss": 1.0644, + "step": 87393 + }, + { + "epoch": 1.05, + "grad_norm": 5.60260954672941, + "learning_rate": 9.661436177326199e-06, + "loss": 1.0402, + "step": 87396 + }, + { + "epoch": 1.05, + "grad_norm": 27.558248495791744, + "learning_rate": 9.660852334770461e-06, + "loss": 0.9087, + "step": 87399 + }, + { + "epoch": 1.05, + "grad_norm": 19.560961626744326, + "learning_rate": 9.660268493372115e-06, + "loss": 1.2124, + "step": 87402 + }, + { + "epoch": 1.05, + "grad_norm": 11.938649801993849, + "learning_rate": 9.659684653133144e-06, + "loss": 1.2566, + "step": 87405 + }, + { + "epoch": 1.05, + "grad_norm": 8.345024096362469, + "learning_rate": 9.65910081405555e-06, + "loss": 1.2323, + "step": 87408 + }, + { + "epoch": 1.05, + "grad_norm": 3.5929910426068754, + "learning_rate": 9.65851697614132e-06, + "loss": 1.0215, + "step": 87411 + }, + { + "epoch": 1.05, + "grad_norm": 7.987268355502171, + "learning_rate": 9.657933139392451e-06, + "loss": 1.362, + "step": 87414 + }, + { + "epoch": 1.05, + "grad_norm": 6.121523259006024, + "learning_rate": 9.657349303810934e-06, + "loss": 1.3357, + "step": 87417 + }, + { + "epoch": 1.05, + "grad_norm": 10.973730375680018, + "learning_rate": 9.656765469398757e-06, + "loss": 1.2802, + "step": 87420 + }, + { + "epoch": 1.05, + "grad_norm": 25.13278357513919, + "learning_rate": 9.656181636157919e-06, + "loss": 1.5116, + "step": 87423 + }, + { + "epoch": 1.05, + "grad_norm": 10.68990146616114, + "learning_rate": 9.655597804090406e-06, + "loss": 1.5488, + "step": 87426 + }, + { + "epoch": 1.05, + "grad_norm": 5.675328172152399, + "learning_rate": 9.655013973198217e-06, + "loss": 1.0024, + "step": 87429 + }, + { + "epoch": 1.05, + "grad_norm": 26.447881599020704, + "learning_rate": 9.654430143483338e-06, + "loss": 1.4897, + "step": 87432 + }, + { + "epoch": 1.05, + "grad_norm": 5.0629889653916385, + "learning_rate": 9.653846314947766e-06, + "loss": 1.5876, + "step": 87435 + }, + { + "epoch": 1.05, + "grad_norm": 17.039211520423663, + "learning_rate": 9.653262487593492e-06, + "loss": 1.1954, + "step": 87438 + }, + { + "epoch": 1.05, + "grad_norm": 7.12593207122544, + "learning_rate": 9.652678661422507e-06, + "loss": 0.9307, + "step": 87441 + }, + { + "epoch": 1.05, + "grad_norm": 7.7322720388953154, + "learning_rate": 9.65209483643681e-06, + "loss": 1.5985, + "step": 87444 + }, + { + "epoch": 1.05, + "grad_norm": 6.626304170127657, + "learning_rate": 9.651511012638381e-06, + "loss": 1.163, + "step": 87447 + }, + { + "epoch": 1.05, + "grad_norm": 11.92508794882151, + "learning_rate": 9.650927190029221e-06, + "loss": 1.0708, + "step": 87450 + }, + { + "epoch": 1.05, + "grad_norm": 11.442720350281283, + "learning_rate": 9.650343368611323e-06, + "loss": 0.9066, + "step": 87453 + }, + { + "epoch": 1.05, + "grad_norm": 7.455589778828041, + "learning_rate": 9.649759548386679e-06, + "loss": 1.596, + "step": 87456 + }, + { + "epoch": 1.05, + "grad_norm": 6.3211027910513, + "learning_rate": 9.649175729357275e-06, + "loss": 1.4812, + "step": 87459 + }, + { + "epoch": 1.05, + "grad_norm": 18.27739361496896, + "learning_rate": 9.648591911525107e-06, + "loss": 1.0296, + "step": 87462 + }, + { + "epoch": 1.05, + "grad_norm": 80.5490835790519, + "learning_rate": 9.648008094892171e-06, + "loss": 0.9957, + "step": 87465 + }, + { + "epoch": 1.05, + "grad_norm": 4.908643346310861, + "learning_rate": 9.647424279460455e-06, + "loss": 0.9731, + "step": 87468 + }, + { + "epoch": 1.05, + "grad_norm": 21.449789845361543, + "learning_rate": 9.646840465231955e-06, + "loss": 0.9622, + "step": 87471 + }, + { + "epoch": 1.05, + "grad_norm": 7.315054307675574, + "learning_rate": 9.64625665220866e-06, + "loss": 1.0706, + "step": 87474 + }, + { + "epoch": 1.05, + "grad_norm": 15.701821908149432, + "learning_rate": 9.645672840392565e-06, + "loss": 1.1893, + "step": 87477 + }, + { + "epoch": 1.05, + "grad_norm": 4.834325971508353, + "learning_rate": 9.645089029785658e-06, + "loss": 1.303, + "step": 87480 + }, + { + "epoch": 1.05, + "grad_norm": 18.137284271196975, + "learning_rate": 9.644505220389939e-06, + "loss": 1.1216, + "step": 87483 + }, + { + "epoch": 1.05, + "grad_norm": 5.135071701831096, + "learning_rate": 9.643921412207393e-06, + "loss": 0.9389, + "step": 87486 + }, + { + "epoch": 1.05, + "grad_norm": 13.963669236170173, + "learning_rate": 9.643337605240012e-06, + "loss": 1.2933, + "step": 87489 + }, + { + "epoch": 1.05, + "grad_norm": 9.807093495204205, + "learning_rate": 9.642753799489794e-06, + "loss": 1.2307, + "step": 87492 + }, + { + "epoch": 1.05, + "grad_norm": 4.819749324099109, + "learning_rate": 9.64216999495873e-06, + "loss": 1.1878, + "step": 87495 + }, + { + "epoch": 1.05, + "grad_norm": 4.297034592364866, + "learning_rate": 9.641586191648812e-06, + "loss": 1.2309, + "step": 87498 + }, + { + "epoch": 1.05, + "grad_norm": 8.535703467313766, + "learning_rate": 9.641002389562026e-06, + "loss": 1.0131, + "step": 87501 + }, + { + "epoch": 1.05, + "grad_norm": 19.475598707256463, + "learning_rate": 9.640418588700373e-06, + "loss": 1.2334, + "step": 87504 + }, + { + "epoch": 1.05, + "grad_norm": 43.54070914936917, + "learning_rate": 9.639834789065841e-06, + "loss": 1.1338, + "step": 87507 + }, + { + "epoch": 1.05, + "grad_norm": 5.4918831758783515, + "learning_rate": 9.639250990660425e-06, + "loss": 1.196, + "step": 87510 + }, + { + "epoch": 1.05, + "grad_norm": 5.660028381797816, + "learning_rate": 9.638667193486113e-06, + "loss": 1.2665, + "step": 87513 + }, + { + "epoch": 1.05, + "grad_norm": 6.425970068614961, + "learning_rate": 9.638083397544899e-06, + "loss": 0.97, + "step": 87516 + }, + { + "epoch": 1.05, + "grad_norm": 15.159045113926071, + "learning_rate": 9.637499602838777e-06, + "loss": 1.6035, + "step": 87519 + }, + { + "epoch": 1.05, + "grad_norm": 10.970622289217319, + "learning_rate": 9.636915809369738e-06, + "loss": 1.0153, + "step": 87522 + }, + { + "epoch": 1.05, + "grad_norm": 5.415145907404554, + "learning_rate": 9.636332017139778e-06, + "loss": 1.2677, + "step": 87525 + }, + { + "epoch": 1.05, + "grad_norm": 16.458943352314172, + "learning_rate": 9.635748226150885e-06, + "loss": 1.0356, + "step": 87528 + }, + { + "epoch": 1.05, + "grad_norm": 3.9823424108192564, + "learning_rate": 9.635164436405048e-06, + "loss": 1.4704, + "step": 87531 + }, + { + "epoch": 1.05, + "grad_norm": 69.08594309267623, + "learning_rate": 9.634580647904264e-06, + "loss": 1.3369, + "step": 87534 + }, + { + "epoch": 1.05, + "grad_norm": 5.6448602837056185, + "learning_rate": 9.633996860650529e-06, + "loss": 0.9306, + "step": 87537 + }, + { + "epoch": 1.05, + "grad_norm": 17.07893266683463, + "learning_rate": 9.63341307464583e-06, + "loss": 1.145, + "step": 87540 + }, + { + "epoch": 1.05, + "grad_norm": 33.114554054609826, + "learning_rate": 9.632829289892155e-06, + "loss": 0.9719, + "step": 87543 + }, + { + "epoch": 1.05, + "grad_norm": 4.10223922968809, + "learning_rate": 9.632245506391502e-06, + "loss": 1.3464, + "step": 87546 + }, + { + "epoch": 1.05, + "grad_norm": 4.186255234898888, + "learning_rate": 9.631661724145868e-06, + "loss": 1.3122, + "step": 87549 + }, + { + "epoch": 1.05, + "grad_norm": 10.80757790673894, + "learning_rate": 9.631077943157238e-06, + "loss": 0.9871, + "step": 87552 + }, + { + "epoch": 1.05, + "grad_norm": 11.600134629982193, + "learning_rate": 9.630494163427605e-06, + "loss": 0.9871, + "step": 87555 + }, + { + "epoch": 1.05, + "grad_norm": 12.049476917260801, + "learning_rate": 9.629910384958961e-06, + "loss": 1.1732, + "step": 87558 + }, + { + "epoch": 1.05, + "grad_norm": 9.388296005302747, + "learning_rate": 9.629326607753303e-06, + "loss": 1.3098, + "step": 87561 + }, + { + "epoch": 1.05, + "grad_norm": 9.326021078559195, + "learning_rate": 9.62874283181262e-06, + "loss": 1.4373, + "step": 87564 + }, + { + "epoch": 1.05, + "grad_norm": 6.237396101049408, + "learning_rate": 9.6281590571389e-06, + "loss": 1.1013, + "step": 87567 + }, + { + "epoch": 1.05, + "grad_norm": 12.376697420259793, + "learning_rate": 9.627575283734139e-06, + "loss": 1.328, + "step": 87570 + }, + { + "epoch": 1.05, + "grad_norm": 9.45903345251924, + "learning_rate": 9.626991511600333e-06, + "loss": 1.211, + "step": 87573 + }, + { + "epoch": 1.05, + "grad_norm": 5.716906020320441, + "learning_rate": 9.626407740739466e-06, + "loss": 1.2206, + "step": 87576 + }, + { + "epoch": 1.05, + "grad_norm": 2.5948453489529126, + "learning_rate": 9.625823971153542e-06, + "loss": 1.2698, + "step": 87579 + }, + { + "epoch": 1.05, + "grad_norm": 10.496956333952303, + "learning_rate": 9.625240202844543e-06, + "loss": 0.9472, + "step": 87582 + }, + { + "epoch": 1.05, + "grad_norm": 10.817145812793118, + "learning_rate": 9.62465643581446e-06, + "loss": 1.3225, + "step": 87585 + }, + { + "epoch": 1.05, + "grad_norm": 6.94354677842944, + "learning_rate": 9.624072670065293e-06, + "loss": 1.3162, + "step": 87588 + }, + { + "epoch": 1.05, + "grad_norm": 14.462396114978109, + "learning_rate": 9.62348890559903e-06, + "loss": 1.393, + "step": 87591 + }, + { + "epoch": 1.05, + "grad_norm": 17.899088017791414, + "learning_rate": 9.622905142417666e-06, + "loss": 1.3412, + "step": 87594 + }, + { + "epoch": 1.05, + "grad_norm": 7.945180225170188, + "learning_rate": 9.622321380523187e-06, + "loss": 0.8595, + "step": 87597 + }, + { + "epoch": 1.05, + "grad_norm": 45.33393322736496, + "learning_rate": 9.621737619917589e-06, + "loss": 1.1836, + "step": 87600 + }, + { + "epoch": 1.05, + "grad_norm": 15.894479937608889, + "learning_rate": 9.621153860602866e-06, + "loss": 1.3255, + "step": 87603 + }, + { + "epoch": 1.05, + "grad_norm": 7.7975342752777195, + "learning_rate": 9.620570102581013e-06, + "loss": 1.2098, + "step": 87606 + }, + { + "epoch": 1.05, + "grad_norm": 9.277375981449088, + "learning_rate": 9.619986345854011e-06, + "loss": 0.9432, + "step": 87609 + }, + { + "epoch": 1.05, + "grad_norm": 7.255528877738414, + "learning_rate": 9.619402590423859e-06, + "loss": 1.2358, + "step": 87612 + }, + { + "epoch": 1.05, + "grad_norm": 4.554955032509019, + "learning_rate": 9.618818836292552e-06, + "loss": 1.5581, + "step": 87615 + }, + { + "epoch": 1.05, + "grad_norm": 6.414625375187739, + "learning_rate": 9.61823508346208e-06, + "loss": 1.5025, + "step": 87618 + }, + { + "epoch": 1.05, + "grad_norm": 12.579566828478457, + "learning_rate": 9.61765133193443e-06, + "loss": 1.4446, + "step": 87621 + }, + { + "epoch": 1.05, + "grad_norm": 5.910164864248913, + "learning_rate": 9.6170675817116e-06, + "loss": 1.4534, + "step": 87624 + }, + { + "epoch": 1.05, + "grad_norm": 16.461964561419236, + "learning_rate": 9.61648383279558e-06, + "loss": 1.2261, + "step": 87627 + }, + { + "epoch": 1.05, + "grad_norm": 15.372673009478367, + "learning_rate": 9.61590008518836e-06, + "loss": 1.4022, + "step": 87630 + }, + { + "epoch": 1.05, + "grad_norm": 4.956164771283212, + "learning_rate": 9.61531633889194e-06, + "loss": 1.1202, + "step": 87633 + }, + { + "epoch": 1.05, + "grad_norm": 14.727046845728806, + "learning_rate": 9.614732593908307e-06, + "loss": 1.1132, + "step": 87636 + }, + { + "epoch": 1.05, + "grad_norm": 8.078436155042715, + "learning_rate": 9.61414885023945e-06, + "loss": 1.4378, + "step": 87639 + }, + { + "epoch": 1.05, + "grad_norm": 6.496870721580175, + "learning_rate": 9.613565107887362e-06, + "loss": 1.2933, + "step": 87642 + }, + { + "epoch": 1.05, + "grad_norm": 35.11477619840754, + "learning_rate": 9.612981366854041e-06, + "loss": 1.2642, + "step": 87645 + }, + { + "epoch": 1.05, + "grad_norm": 17.186763501569256, + "learning_rate": 9.612397627141474e-06, + "loss": 1.1273, + "step": 87648 + }, + { + "epoch": 1.05, + "grad_norm": 6.992716999465578, + "learning_rate": 9.611813888751653e-06, + "loss": 1.0815, + "step": 87651 + }, + { + "epoch": 1.05, + "grad_norm": 4.956424292274528, + "learning_rate": 9.61123015168657e-06, + "loss": 1.1962, + "step": 87654 + }, + { + "epoch": 1.05, + "grad_norm": 10.707795897108308, + "learning_rate": 9.610646415948222e-06, + "loss": 1.255, + "step": 87657 + }, + { + "epoch": 1.05, + "grad_norm": 6.908082971060663, + "learning_rate": 9.610062681538599e-06, + "loss": 1.155, + "step": 87660 + }, + { + "epoch": 1.05, + "grad_norm": 12.304059496460408, + "learning_rate": 9.609478948459688e-06, + "loss": 1.0534, + "step": 87663 + }, + { + "epoch": 1.05, + "grad_norm": 14.912864841784952, + "learning_rate": 9.608895216713485e-06, + "loss": 1.511, + "step": 87666 + }, + { + "epoch": 1.05, + "grad_norm": 7.865399217879783, + "learning_rate": 9.60831148630198e-06, + "loss": 1.7303, + "step": 87669 + }, + { + "epoch": 1.05, + "grad_norm": 19.20885186564899, + "learning_rate": 9.607727757227174e-06, + "loss": 1.4844, + "step": 87672 + }, + { + "epoch": 1.05, + "grad_norm": 12.09952654109106, + "learning_rate": 9.607144029491045e-06, + "loss": 1.0001, + "step": 87675 + }, + { + "epoch": 1.05, + "grad_norm": 9.30146107020093, + "learning_rate": 9.606560303095595e-06, + "loss": 1.3593, + "step": 87678 + }, + { + "epoch": 1.05, + "grad_norm": 2.5474471197258235, + "learning_rate": 9.605976578042809e-06, + "loss": 1.3454, + "step": 87681 + }, + { + "epoch": 1.05, + "grad_norm": 7.459299043316654, + "learning_rate": 9.605392854334686e-06, + "loss": 1.2666, + "step": 87684 + }, + { + "epoch": 1.05, + "grad_norm": 16.67208760205638, + "learning_rate": 9.604809131973218e-06, + "loss": 1.3342, + "step": 87687 + }, + { + "epoch": 1.05, + "grad_norm": 62.40840136470322, + "learning_rate": 9.604225410960391e-06, + "loss": 1.591, + "step": 87690 + }, + { + "epoch": 1.05, + "grad_norm": 19.513975776646422, + "learning_rate": 9.603641691298198e-06, + "loss": 1.1255, + "step": 87693 + }, + { + "epoch": 1.05, + "grad_norm": 7.817516235659797, + "learning_rate": 9.603057972988634e-06, + "loss": 1.3495, + "step": 87696 + }, + { + "epoch": 1.05, + "grad_norm": 5.7111989956467655, + "learning_rate": 9.602474256033693e-06, + "loss": 1.2737, + "step": 87699 + }, + { + "epoch": 1.05, + "grad_norm": 9.703610091668132, + "learning_rate": 9.601890540435365e-06, + "loss": 1.2812, + "step": 87702 + }, + { + "epoch": 1.05, + "grad_norm": 14.041939125799198, + "learning_rate": 9.601306826195637e-06, + "loss": 1.3408, + "step": 87705 + }, + { + "epoch": 1.05, + "grad_norm": 13.54803683066578, + "learning_rate": 9.600723113316507e-06, + "loss": 1.1279, + "step": 87708 + }, + { + "epoch": 1.05, + "grad_norm": 17.517328884504856, + "learning_rate": 9.600139401799963e-06, + "loss": 1.3786, + "step": 87711 + }, + { + "epoch": 1.05, + "grad_norm": 16.87099218775374, + "learning_rate": 9.599555691648005e-06, + "loss": 1.4362, + "step": 87714 + }, + { + "epoch": 1.05, + "grad_norm": 10.109192170765118, + "learning_rate": 9.598971982862613e-06, + "loss": 1.1812, + "step": 87717 + }, + { + "epoch": 1.05, + "grad_norm": 3.1965234190886576, + "learning_rate": 9.598388275445789e-06, + "loss": 0.8621, + "step": 87720 + }, + { + "epoch": 1.05, + "grad_norm": 6.788079112657287, + "learning_rate": 9.597804569399519e-06, + "loss": 1.36, + "step": 87723 + }, + { + "epoch": 1.05, + "grad_norm": 3.9694599071010153, + "learning_rate": 9.597220864725802e-06, + "loss": 1.5072, + "step": 87726 + }, + { + "epoch": 1.05, + "grad_norm": 7.9094148489759775, + "learning_rate": 9.59663716142662e-06, + "loss": 1.6252, + "step": 87729 + }, + { + "epoch": 1.05, + "grad_norm": 9.705592163119144, + "learning_rate": 9.59605345950397e-06, + "loss": 1.1733, + "step": 87732 + }, + { + "epoch": 1.05, + "grad_norm": 22.71703263622432, + "learning_rate": 9.595469758959844e-06, + "loss": 1.3724, + "step": 87735 + }, + { + "epoch": 1.06, + "grad_norm": 5.952143945507231, + "learning_rate": 9.594886059796234e-06, + "loss": 1.5918, + "step": 87738 + }, + { + "epoch": 1.06, + "grad_norm": 3.9935930600622425, + "learning_rate": 9.594302362015135e-06, + "loss": 1.3032, + "step": 87741 + }, + { + "epoch": 1.06, + "grad_norm": 18.945642626700007, + "learning_rate": 9.593718665618536e-06, + "loss": 1.375, + "step": 87744 + }, + { + "epoch": 1.06, + "grad_norm": 19.152388798923738, + "learning_rate": 9.593134970608425e-06, + "loss": 1.0581, + "step": 87747 + }, + { + "epoch": 1.06, + "grad_norm": 9.034411972898216, + "learning_rate": 9.592551276986799e-06, + "loss": 1.3286, + "step": 87750 + }, + { + "epoch": 1.06, + "grad_norm": 2.806464487813962, + "learning_rate": 9.591967584755652e-06, + "loss": 1.4438, + "step": 87753 + }, + { + "epoch": 1.06, + "grad_norm": 7.653826420679986, + "learning_rate": 9.591383893916971e-06, + "loss": 1.4661, + "step": 87756 + }, + { + "epoch": 1.06, + "grad_norm": 21.289844925454265, + "learning_rate": 9.590800204472747e-06, + "loss": 1.4359, + "step": 87759 + }, + { + "epoch": 1.06, + "grad_norm": 10.909680564142914, + "learning_rate": 9.590216516424975e-06, + "loss": 1.2863, + "step": 87762 + }, + { + "epoch": 1.06, + "grad_norm": 22.18530704928252, + "learning_rate": 9.589632829775648e-06, + "loss": 1.1824, + "step": 87765 + }, + { + "epoch": 1.06, + "grad_norm": 42.773599761260755, + "learning_rate": 9.589049144526758e-06, + "loss": 1.0805, + "step": 87768 + }, + { + "epoch": 1.06, + "grad_norm": 24.392291600609973, + "learning_rate": 9.588465460680292e-06, + "loss": 1.427, + "step": 87771 + }, + { + "epoch": 1.06, + "grad_norm": 19.46791937952064, + "learning_rate": 9.587881778238247e-06, + "loss": 1.2288, + "step": 87774 + }, + { + "epoch": 1.06, + "grad_norm": 27.25594020298538, + "learning_rate": 9.587298097202611e-06, + "loss": 1.0745, + "step": 87777 + }, + { + "epoch": 1.06, + "grad_norm": 17.506745738973645, + "learning_rate": 9.586714417575382e-06, + "loss": 1.2686, + "step": 87780 + }, + { + "epoch": 1.06, + "grad_norm": 5.3166962897295225, + "learning_rate": 9.586130739358544e-06, + "loss": 1.2622, + "step": 87783 + }, + { + "epoch": 1.06, + "grad_norm": 5.683242617001313, + "learning_rate": 9.585547062554095e-06, + "loss": 1.0509, + "step": 87786 + }, + { + "epoch": 1.06, + "grad_norm": 11.447113243907584, + "learning_rate": 9.584963387164021e-06, + "loss": 1.2239, + "step": 87789 + }, + { + "epoch": 1.06, + "grad_norm": 11.503955270305479, + "learning_rate": 9.584379713190319e-06, + "loss": 1.3991, + "step": 87792 + }, + { + "epoch": 1.06, + "grad_norm": 12.01003209791297, + "learning_rate": 9.583796040634985e-06, + "loss": 1.1566, + "step": 87795 + }, + { + "epoch": 1.06, + "grad_norm": 5.06641376089957, + "learning_rate": 9.5832123695e-06, + "loss": 0.9335, + "step": 87798 + }, + { + "epoch": 1.06, + "grad_norm": 6.750364280539912, + "learning_rate": 9.582628699787359e-06, + "loss": 0.7539, + "step": 87801 + }, + { + "epoch": 1.06, + "grad_norm": 9.09321411672297, + "learning_rate": 9.58204503149906e-06, + "loss": 1.3335, + "step": 87804 + }, + { + "epoch": 1.06, + "grad_norm": 9.125709426502556, + "learning_rate": 9.58146136463709e-06, + "loss": 1.0021, + "step": 87807 + }, + { + "epoch": 1.06, + "grad_norm": 12.76899298161244, + "learning_rate": 9.580877699203442e-06, + "loss": 1.0306, + "step": 87810 + }, + { + "epoch": 1.06, + "grad_norm": 4.568665822449119, + "learning_rate": 9.580294035200103e-06, + "loss": 1.013, + "step": 87813 + }, + { + "epoch": 1.06, + "grad_norm": 8.54812379117168, + "learning_rate": 9.579710372629073e-06, + "loss": 1.2226, + "step": 87816 + }, + { + "epoch": 1.06, + "grad_norm": 6.279732527699836, + "learning_rate": 9.57912671149234e-06, + "loss": 0.8121, + "step": 87819 + }, + { + "epoch": 1.06, + "grad_norm": 7.590662706967861, + "learning_rate": 9.578543051791898e-06, + "loss": 1.2948, + "step": 87822 + }, + { + "epoch": 1.06, + "grad_norm": 2.168624782160589, + "learning_rate": 9.577959393529732e-06, + "loss": 1.2934, + "step": 87825 + }, + { + "epoch": 1.06, + "grad_norm": 16.626760331951022, + "learning_rate": 9.577375736707842e-06, + "loss": 0.9947, + "step": 87828 + }, + { + "epoch": 1.06, + "grad_norm": 14.71153804180084, + "learning_rate": 9.576792081328215e-06, + "loss": 1.2561, + "step": 87831 + }, + { + "epoch": 1.06, + "grad_norm": 15.171469997537153, + "learning_rate": 9.576208427392847e-06, + "loss": 1.386, + "step": 87834 + }, + { + "epoch": 1.06, + "grad_norm": 3.1639198113148868, + "learning_rate": 9.575624774903722e-06, + "loss": 1.1718, + "step": 87837 + }, + { + "epoch": 1.06, + "grad_norm": 26.402676737722704, + "learning_rate": 9.57504112386284e-06, + "loss": 1.2377, + "step": 87840 + }, + { + "epoch": 1.06, + "grad_norm": 13.66476639178875, + "learning_rate": 9.574457474272187e-06, + "loss": 1.3062, + "step": 87843 + }, + { + "epoch": 1.06, + "grad_norm": 19.563193971834963, + "learning_rate": 9.57387382613376e-06, + "loss": 0.8574, + "step": 87846 + }, + { + "epoch": 1.06, + "grad_norm": 4.388119076543419, + "learning_rate": 9.573290179449549e-06, + "loss": 1.2804, + "step": 87849 + }, + { + "epoch": 1.06, + "grad_norm": 5.3371326287563345, + "learning_rate": 9.572706534221542e-06, + "loss": 0.7276, + "step": 87852 + }, + { + "epoch": 1.06, + "grad_norm": 10.744855646695248, + "learning_rate": 9.572122890451735e-06, + "loss": 1.3208, + "step": 87855 + }, + { + "epoch": 1.06, + "grad_norm": 13.849362162782178, + "learning_rate": 9.571539248142118e-06, + "loss": 1.107, + "step": 87858 + }, + { + "epoch": 1.06, + "grad_norm": 12.141424859958518, + "learning_rate": 9.570955607294687e-06, + "loss": 1.3743, + "step": 87861 + }, + { + "epoch": 1.06, + "grad_norm": 18.987796204348466, + "learning_rate": 9.570371967911426e-06, + "loss": 1.2856, + "step": 87864 + }, + { + "epoch": 1.06, + "grad_norm": 3.4476747369579255, + "learning_rate": 9.56978832999433e-06, + "loss": 1.3501, + "step": 87867 + }, + { + "epoch": 1.06, + "grad_norm": 6.62416379012574, + "learning_rate": 9.569204693545392e-06, + "loss": 1.3524, + "step": 87870 + }, + { + "epoch": 1.06, + "grad_norm": 8.54892591491607, + "learning_rate": 9.568621058566603e-06, + "loss": 0.9454, + "step": 87873 + }, + { + "epoch": 1.06, + "grad_norm": 7.951026158899724, + "learning_rate": 9.56803742505996e-06, + "loss": 1.1099, + "step": 87876 + }, + { + "epoch": 1.06, + "grad_norm": 9.103205670342668, + "learning_rate": 9.567453793027444e-06, + "loss": 1.204, + "step": 87879 + }, + { + "epoch": 1.06, + "grad_norm": 27.050908392802754, + "learning_rate": 9.566870162471054e-06, + "loss": 1.2501, + "step": 87882 + }, + { + "epoch": 1.06, + "grad_norm": 11.769545484078126, + "learning_rate": 9.56628653339278e-06, + "loss": 1.1496, + "step": 87885 + }, + { + "epoch": 1.06, + "grad_norm": 14.979253220057075, + "learning_rate": 9.565702905794616e-06, + "loss": 1.3243, + "step": 87888 + }, + { + "epoch": 1.06, + "grad_norm": 17.14543340830945, + "learning_rate": 9.565119279678547e-06, + "loss": 1.5757, + "step": 87891 + }, + { + "epoch": 1.06, + "grad_norm": 3.977750634240016, + "learning_rate": 9.564535655046572e-06, + "loss": 1.1832, + "step": 87894 + }, + { + "epoch": 1.06, + "grad_norm": 8.323070596626653, + "learning_rate": 9.56395203190068e-06, + "loss": 0.7879, + "step": 87897 + }, + { + "epoch": 1.06, + "grad_norm": 55.73464240100427, + "learning_rate": 9.563368410242862e-06, + "loss": 1.2069, + "step": 87900 + }, + { + "epoch": 1.06, + "grad_norm": 4.493350380187181, + "learning_rate": 9.562784790075113e-06, + "loss": 1.5164, + "step": 87903 + }, + { + "epoch": 1.06, + "grad_norm": 9.099878166143055, + "learning_rate": 9.562201171399418e-06, + "loss": 1.1206, + "step": 87906 + }, + { + "epoch": 1.06, + "grad_norm": 10.589437329369973, + "learning_rate": 9.561617554217774e-06, + "loss": 1.5689, + "step": 87909 + }, + { + "epoch": 1.06, + "grad_norm": 7.045276398810894, + "learning_rate": 9.561033938532173e-06, + "loss": 1.4994, + "step": 87912 + }, + { + "epoch": 1.06, + "grad_norm": 9.53507040498742, + "learning_rate": 9.560450324344607e-06, + "loss": 1.3629, + "step": 87915 + }, + { + "epoch": 1.06, + "grad_norm": 6.863822349276762, + "learning_rate": 9.55986671165706e-06, + "loss": 1.0616, + "step": 87918 + }, + { + "epoch": 1.06, + "grad_norm": 2.7735852285414624, + "learning_rate": 9.559283100471532e-06, + "loss": 1.3216, + "step": 87921 + }, + { + "epoch": 1.06, + "grad_norm": 13.435550086790217, + "learning_rate": 9.558699490790012e-06, + "loss": 1.3402, + "step": 87924 + }, + { + "epoch": 1.06, + "grad_norm": 27.906745376548105, + "learning_rate": 9.558115882614491e-06, + "loss": 1.1926, + "step": 87927 + }, + { + "epoch": 1.06, + "grad_norm": 14.131254033811423, + "learning_rate": 9.557532275946963e-06, + "loss": 1.2621, + "step": 87930 + }, + { + "epoch": 1.06, + "grad_norm": 11.605661772317406, + "learning_rate": 9.556948670789416e-06, + "loss": 1.468, + "step": 87933 + }, + { + "epoch": 1.06, + "grad_norm": 9.644651687324497, + "learning_rate": 9.556365067143846e-06, + "loss": 1.1429, + "step": 87936 + }, + { + "epoch": 1.06, + "grad_norm": 5.202458542974545, + "learning_rate": 9.555781465012239e-06, + "loss": 1.1004, + "step": 87939 + }, + { + "epoch": 1.06, + "grad_norm": 13.30788125345922, + "learning_rate": 9.555197864396596e-06, + "loss": 1.0287, + "step": 87942 + }, + { + "epoch": 1.06, + "grad_norm": 8.132940523350237, + "learning_rate": 9.554614265298898e-06, + "loss": 1.3212, + "step": 87945 + }, + { + "epoch": 1.06, + "grad_norm": 9.064821331561932, + "learning_rate": 9.55403066772114e-06, + "loss": 1.1759, + "step": 87948 + }, + { + "epoch": 1.06, + "grad_norm": 9.046184305331868, + "learning_rate": 9.553447071665314e-06, + "loss": 0.9263, + "step": 87951 + }, + { + "epoch": 1.06, + "grad_norm": 19.968656116607626, + "learning_rate": 9.552863477133415e-06, + "loss": 0.781, + "step": 87954 + }, + { + "epoch": 1.06, + "grad_norm": 8.125428265826361, + "learning_rate": 9.552279884127433e-06, + "loss": 1.021, + "step": 87957 + }, + { + "epoch": 1.06, + "grad_norm": 5.278625220812145, + "learning_rate": 9.551696292649356e-06, + "loss": 1.664, + "step": 87960 + }, + { + "epoch": 1.06, + "grad_norm": 4.014985134404684, + "learning_rate": 9.551112702701176e-06, + "loss": 1.0142, + "step": 87963 + }, + { + "epoch": 1.06, + "grad_norm": 8.499512903135864, + "learning_rate": 9.550529114284891e-06, + "loss": 1.2926, + "step": 87966 + }, + { + "epoch": 1.06, + "grad_norm": 3.1246025192772033, + "learning_rate": 9.54994552740249e-06, + "loss": 1.1047, + "step": 87969 + }, + { + "epoch": 1.06, + "grad_norm": 10.210047414885963, + "learning_rate": 9.549361942055956e-06, + "loss": 1.1233, + "step": 87972 + }, + { + "epoch": 1.06, + "grad_norm": 5.112780518167597, + "learning_rate": 9.54877835824729e-06, + "loss": 1.4809, + "step": 87975 + }, + { + "epoch": 1.06, + "grad_norm": 14.42828968193799, + "learning_rate": 9.548194775978484e-06, + "loss": 1.0738, + "step": 87978 + }, + { + "epoch": 1.06, + "grad_norm": 11.761424567803093, + "learning_rate": 9.547611195251521e-06, + "loss": 1.0884, + "step": 87981 + }, + { + "epoch": 1.06, + "grad_norm": 15.054706689118904, + "learning_rate": 9.547027616068405e-06, + "loss": 1.0529, + "step": 87984 + }, + { + "epoch": 1.06, + "grad_norm": 18.553011271529186, + "learning_rate": 9.546444038431116e-06, + "loss": 0.8724, + "step": 87987 + }, + { + "epoch": 1.06, + "grad_norm": 9.90091325033857, + "learning_rate": 9.54586046234165e-06, + "loss": 0.9475, + "step": 87990 + }, + { + "epoch": 1.06, + "grad_norm": 11.771819105345742, + "learning_rate": 9.545276887802e-06, + "loss": 1.241, + "step": 87993 + }, + { + "epoch": 1.06, + "grad_norm": 13.704593000516697, + "learning_rate": 9.544693314814157e-06, + "loss": 1.2064, + "step": 87996 + }, + { + "epoch": 1.06, + "grad_norm": 8.62696904197176, + "learning_rate": 9.544109743380111e-06, + "loss": 1.2043, + "step": 87999 + }, + { + "epoch": 1.06, + "grad_norm": 46.00225644254303, + "learning_rate": 9.543526173501852e-06, + "loss": 1.1762, + "step": 88002 + }, + { + "epoch": 1.06, + "grad_norm": 5.097270983112146, + "learning_rate": 9.542942605181374e-06, + "loss": 1.3425, + "step": 88005 + }, + { + "epoch": 1.06, + "grad_norm": 16.902185436757076, + "learning_rate": 9.54235903842067e-06, + "loss": 1.1133, + "step": 88008 + }, + { + "epoch": 1.06, + "grad_norm": 6.781361167940591, + "learning_rate": 9.541775473221733e-06, + "loss": 1.1951, + "step": 88011 + }, + { + "epoch": 1.06, + "grad_norm": 13.610113370285012, + "learning_rate": 9.541191909586547e-06, + "loss": 1.0236, + "step": 88014 + }, + { + "epoch": 1.06, + "grad_norm": 19.289561440600504, + "learning_rate": 9.540608347517106e-06, + "loss": 1.128, + "step": 88017 + }, + { + "epoch": 1.06, + "grad_norm": 32.27270207881756, + "learning_rate": 9.540024787015407e-06, + "loss": 1.3094, + "step": 88020 + }, + { + "epoch": 1.06, + "grad_norm": 43.15839757814743, + "learning_rate": 9.53944122808344e-06, + "loss": 1.1108, + "step": 88023 + }, + { + "epoch": 1.06, + "grad_norm": 5.687172254811444, + "learning_rate": 9.538857670723189e-06, + "loss": 1.3576, + "step": 88026 + }, + { + "epoch": 1.06, + "grad_norm": 15.141134045363149, + "learning_rate": 9.53827411493665e-06, + "loss": 0.8122, + "step": 88029 + }, + { + "epoch": 1.06, + "grad_norm": 9.528742978711014, + "learning_rate": 9.537690560725819e-06, + "loss": 1.4078, + "step": 88032 + }, + { + "epoch": 1.06, + "grad_norm": 17.43856673554071, + "learning_rate": 9.537107008092681e-06, + "loss": 1.1212, + "step": 88035 + }, + { + "epoch": 1.06, + "grad_norm": 4.601414972950885, + "learning_rate": 9.536523457039233e-06, + "loss": 1.3563, + "step": 88038 + }, + { + "epoch": 1.06, + "grad_norm": 7.551933204478397, + "learning_rate": 9.535939907567463e-06, + "loss": 0.9899, + "step": 88041 + }, + { + "epoch": 1.06, + "grad_norm": 8.944139082780367, + "learning_rate": 9.53535635967936e-06, + "loss": 1.1267, + "step": 88044 + }, + { + "epoch": 1.06, + "grad_norm": 4.741969508146198, + "learning_rate": 9.534772813376919e-06, + "loss": 0.8393, + "step": 88047 + }, + { + "epoch": 1.06, + "grad_norm": 6.483230102161856, + "learning_rate": 9.534189268662135e-06, + "loss": 1.5793, + "step": 88050 + }, + { + "epoch": 1.06, + "grad_norm": 8.442919899660225, + "learning_rate": 9.533605725536993e-06, + "loss": 1.4455, + "step": 88053 + }, + { + "epoch": 1.06, + "grad_norm": 11.872800124679111, + "learning_rate": 9.533022184003481e-06, + "loss": 1.1058, + "step": 88056 + }, + { + "epoch": 1.06, + "grad_norm": 9.854756553118289, + "learning_rate": 9.532438644063602e-06, + "loss": 1.2456, + "step": 88059 + }, + { + "epoch": 1.06, + "grad_norm": 6.360874112332968, + "learning_rate": 9.53185510571934e-06, + "loss": 1.2734, + "step": 88062 + }, + { + "epoch": 1.06, + "grad_norm": 20.946979230806036, + "learning_rate": 9.53127156897269e-06, + "loss": 1.1336, + "step": 88065 + }, + { + "epoch": 1.06, + "grad_norm": 45.09992217094336, + "learning_rate": 9.53068803382564e-06, + "loss": 1.0695, + "step": 88068 + }, + { + "epoch": 1.06, + "grad_norm": 11.069969704519046, + "learning_rate": 9.53010450028018e-06, + "loss": 1.7373, + "step": 88071 + }, + { + "epoch": 1.06, + "grad_norm": 11.626512002661622, + "learning_rate": 9.529520968338306e-06, + "loss": 1.4828, + "step": 88074 + }, + { + "epoch": 1.06, + "grad_norm": 7.75090614123616, + "learning_rate": 9.52893743800201e-06, + "loss": 1.1906, + "step": 88077 + }, + { + "epoch": 1.06, + "grad_norm": 6.907165809793612, + "learning_rate": 9.528353909273279e-06, + "loss": 1.2773, + "step": 88080 + }, + { + "epoch": 1.06, + "grad_norm": 12.766015516659806, + "learning_rate": 9.527770382154105e-06, + "loss": 1.3234, + "step": 88083 + }, + { + "epoch": 1.06, + "grad_norm": 7.485964884962783, + "learning_rate": 9.527186856646479e-06, + "loss": 1.2157, + "step": 88086 + }, + { + "epoch": 1.06, + "grad_norm": 6.450144483126829, + "learning_rate": 9.526603332752395e-06, + "loss": 1.3523, + "step": 88089 + }, + { + "epoch": 1.06, + "grad_norm": 13.475068019446308, + "learning_rate": 9.526019810473848e-06, + "loss": 1.3246, + "step": 88092 + }, + { + "epoch": 1.06, + "grad_norm": 11.808510050332048, + "learning_rate": 9.525436289812821e-06, + "loss": 1.5833, + "step": 88095 + }, + { + "epoch": 1.06, + "grad_norm": 8.370836711441484, + "learning_rate": 9.52485277077131e-06, + "loss": 1.1302, + "step": 88098 + }, + { + "epoch": 1.06, + "grad_norm": 5.085190829966752, + "learning_rate": 9.524269253351302e-06, + "loss": 1.2349, + "step": 88101 + }, + { + "epoch": 1.06, + "grad_norm": 8.77974305018128, + "learning_rate": 9.523685737554798e-06, + "loss": 1.4246, + "step": 88104 + }, + { + "epoch": 1.06, + "grad_norm": 17.693430199755365, + "learning_rate": 9.523102223383778e-06, + "loss": 1.1209, + "step": 88107 + }, + { + "epoch": 1.06, + "grad_norm": 29.199425790258417, + "learning_rate": 9.522518710840239e-06, + "loss": 1.3239, + "step": 88110 + }, + { + "epoch": 1.06, + "grad_norm": 8.408890824480782, + "learning_rate": 9.521935199926171e-06, + "loss": 1.3732, + "step": 88113 + }, + { + "epoch": 1.06, + "grad_norm": 9.355378400943174, + "learning_rate": 9.521351690643568e-06, + "loss": 1.5847, + "step": 88116 + }, + { + "epoch": 1.06, + "grad_norm": 4.875709682899641, + "learning_rate": 9.52076818299442e-06, + "loss": 1.2191, + "step": 88119 + }, + { + "epoch": 1.06, + "grad_norm": 2.2739664149418273, + "learning_rate": 9.520184676980716e-06, + "loss": 1.3098, + "step": 88122 + }, + { + "epoch": 1.06, + "grad_norm": 4.698831638483373, + "learning_rate": 9.51960117260445e-06, + "loss": 1.0829, + "step": 88125 + }, + { + "epoch": 1.06, + "grad_norm": 11.307463654746297, + "learning_rate": 9.51901766986761e-06, + "loss": 1.0524, + "step": 88128 + }, + { + "epoch": 1.06, + "grad_norm": 11.820288308970282, + "learning_rate": 9.518434168772192e-06, + "loss": 1.1328, + "step": 88131 + }, + { + "epoch": 1.06, + "grad_norm": 7.919386353377307, + "learning_rate": 9.517850669320182e-06, + "loss": 1.2634, + "step": 88134 + }, + { + "epoch": 1.06, + "grad_norm": 5.1759166709497375, + "learning_rate": 9.517267171513576e-06, + "loss": 1.0856, + "step": 88137 + }, + { + "epoch": 1.06, + "grad_norm": 3.928001743155209, + "learning_rate": 9.516683675354361e-06, + "loss": 1.0575, + "step": 88140 + }, + { + "epoch": 1.06, + "grad_norm": 2.289459111806088, + "learning_rate": 9.516100180844531e-06, + "loss": 1.3341, + "step": 88143 + }, + { + "epoch": 1.06, + "grad_norm": 6.066109905776389, + "learning_rate": 9.51551668798608e-06, + "loss": 1.1525, + "step": 88146 + }, + { + "epoch": 1.06, + "grad_norm": 5.340453938556, + "learning_rate": 9.514933196780995e-06, + "loss": 1.2362, + "step": 88149 + }, + { + "epoch": 1.06, + "grad_norm": 6.596239956041528, + "learning_rate": 9.514349707231266e-06, + "loss": 1.1597, + "step": 88152 + }, + { + "epoch": 1.06, + "grad_norm": 16.937004977997173, + "learning_rate": 9.513766219338886e-06, + "loss": 1.106, + "step": 88155 + }, + { + "epoch": 1.06, + "grad_norm": 6.850439166786785, + "learning_rate": 9.513182733105852e-06, + "loss": 1.1197, + "step": 88158 + }, + { + "epoch": 1.06, + "grad_norm": 3.7671907803867635, + "learning_rate": 9.512599248534147e-06, + "loss": 1.475, + "step": 88161 + }, + { + "epoch": 1.06, + "grad_norm": 8.17622894690193, + "learning_rate": 9.512015765625763e-06, + "loss": 1.1923, + "step": 88164 + }, + { + "epoch": 1.06, + "grad_norm": 14.626512934713071, + "learning_rate": 9.511432284382694e-06, + "loss": 1.0505, + "step": 88167 + }, + { + "epoch": 1.06, + "grad_norm": 1.8579062044479806, + "learning_rate": 9.510848804806932e-06, + "loss": 1.162, + "step": 88170 + }, + { + "epoch": 1.06, + "grad_norm": 3.1860236030795646, + "learning_rate": 9.510265326900469e-06, + "loss": 1.4693, + "step": 88173 + }, + { + "epoch": 1.06, + "grad_norm": 12.285983450195959, + "learning_rate": 9.50968185066529e-06, + "loss": 1.4557, + "step": 88176 + }, + { + "epoch": 1.06, + "grad_norm": 16.460256475302064, + "learning_rate": 9.509098376103392e-06, + "loss": 1.269, + "step": 88179 + }, + { + "epoch": 1.06, + "grad_norm": 7.499792242874227, + "learning_rate": 9.508514903216763e-06, + "loss": 1.2489, + "step": 88182 + }, + { + "epoch": 1.06, + "grad_norm": 3.65836161934152, + "learning_rate": 9.5079314320074e-06, + "loss": 0.876, + "step": 88185 + }, + { + "epoch": 1.06, + "grad_norm": 2.511368986988855, + "learning_rate": 9.507347962477285e-06, + "loss": 1.3689, + "step": 88188 + }, + { + "epoch": 1.06, + "grad_norm": 13.446905632365462, + "learning_rate": 9.506764494628417e-06, + "loss": 1.1642, + "step": 88191 + }, + { + "epoch": 1.06, + "grad_norm": 16.53895142606982, + "learning_rate": 9.50618102846278e-06, + "loss": 1.189, + "step": 88194 + }, + { + "epoch": 1.06, + "grad_norm": 7.948118946910644, + "learning_rate": 9.505597563982371e-06, + "loss": 1.2227, + "step": 88197 + }, + { + "epoch": 1.06, + "grad_norm": 14.12619968809441, + "learning_rate": 9.505014101189183e-06, + "loss": 1.2855, + "step": 88200 + }, + { + "epoch": 1.06, + "grad_norm": 6.9715321059406286, + "learning_rate": 9.504430640085202e-06, + "loss": 1.3446, + "step": 88203 + }, + { + "epoch": 1.06, + "grad_norm": 17.143113666591177, + "learning_rate": 9.503847180672418e-06, + "loss": 1.6206, + "step": 88206 + }, + { + "epoch": 1.06, + "grad_norm": 6.79443981810446, + "learning_rate": 9.503263722952825e-06, + "loss": 1.2648, + "step": 88209 + }, + { + "epoch": 1.06, + "grad_norm": 4.226600675520406, + "learning_rate": 9.502680266928418e-06, + "loss": 1.1144, + "step": 88212 + }, + { + "epoch": 1.06, + "grad_norm": 14.075131844746124, + "learning_rate": 9.502096812601182e-06, + "loss": 0.9379, + "step": 88215 + }, + { + "epoch": 1.06, + "grad_norm": 5.670916429061997, + "learning_rate": 9.50151335997311e-06, + "loss": 1.1814, + "step": 88218 + }, + { + "epoch": 1.06, + "grad_norm": 11.442853223093422, + "learning_rate": 9.500929909046193e-06, + "loss": 1.0864, + "step": 88221 + }, + { + "epoch": 1.06, + "grad_norm": 14.197646262919754, + "learning_rate": 9.50034645982242e-06, + "loss": 1.1803, + "step": 88224 + }, + { + "epoch": 1.06, + "grad_norm": 12.652780637339506, + "learning_rate": 9.499763012303791e-06, + "loss": 1.158, + "step": 88227 + }, + { + "epoch": 1.06, + "grad_norm": 8.813669442021254, + "learning_rate": 9.499179566492286e-06, + "loss": 1.0472, + "step": 88230 + }, + { + "epoch": 1.06, + "grad_norm": 15.755223070562625, + "learning_rate": 9.498596122389901e-06, + "loss": 1.0445, + "step": 88233 + }, + { + "epoch": 1.06, + "grad_norm": 3.493704356084187, + "learning_rate": 9.498012679998627e-06, + "loss": 1.2252, + "step": 88236 + }, + { + "epoch": 1.06, + "grad_norm": 4.138571963700667, + "learning_rate": 9.497429239320458e-06, + "loss": 1.1128, + "step": 88239 + }, + { + "epoch": 1.06, + "grad_norm": 8.834264515342202, + "learning_rate": 9.496845800357378e-06, + "loss": 1.3027, + "step": 88242 + }, + { + "epoch": 1.06, + "grad_norm": 23.94318685036318, + "learning_rate": 9.496262363111383e-06, + "loss": 1.3747, + "step": 88245 + }, + { + "epoch": 1.06, + "grad_norm": 3.08874360564522, + "learning_rate": 9.495678927584464e-06, + "loss": 1.3747, + "step": 88248 + }, + { + "epoch": 1.06, + "grad_norm": 17.13720832215497, + "learning_rate": 9.495095493778608e-06, + "loss": 1.2389, + "step": 88251 + }, + { + "epoch": 1.06, + "grad_norm": 6.001834046233393, + "learning_rate": 9.494512061695815e-06, + "loss": 1.1308, + "step": 88254 + }, + { + "epoch": 1.06, + "grad_norm": 6.046551401549236, + "learning_rate": 9.493928631338068e-06, + "loss": 1.491, + "step": 88257 + }, + { + "epoch": 1.06, + "grad_norm": 12.445509090398224, + "learning_rate": 9.493345202707359e-06, + "loss": 1.0184, + "step": 88260 + }, + { + "epoch": 1.06, + "grad_norm": 4.179190048043172, + "learning_rate": 9.49276177580568e-06, + "loss": 1.1114, + "step": 88263 + }, + { + "epoch": 1.06, + "grad_norm": 8.707571615954137, + "learning_rate": 9.492178350635025e-06, + "loss": 1.4707, + "step": 88266 + }, + { + "epoch": 1.06, + "grad_norm": 6.5610449221109235, + "learning_rate": 9.49159492719738e-06, + "loss": 1.4665, + "step": 88269 + }, + { + "epoch": 1.06, + "grad_norm": 75.96974945251789, + "learning_rate": 9.491011505494738e-06, + "loss": 1.4877, + "step": 88272 + }, + { + "epoch": 1.06, + "grad_norm": 11.16775538817046, + "learning_rate": 9.49042808552909e-06, + "loss": 1.6151, + "step": 88275 + }, + { + "epoch": 1.06, + "grad_norm": 3.247732130291066, + "learning_rate": 9.489844667302428e-06, + "loss": 0.9846, + "step": 88278 + }, + { + "epoch": 1.06, + "grad_norm": 13.65780433295068, + "learning_rate": 9.489261250816746e-06, + "loss": 1.163, + "step": 88281 + }, + { + "epoch": 1.06, + "grad_norm": 10.340571650602495, + "learning_rate": 9.488677836074028e-06, + "loss": 1.3523, + "step": 88284 + }, + { + "epoch": 1.06, + "grad_norm": 3.2538882333771992, + "learning_rate": 9.488094423076269e-06, + "loss": 1.2103, + "step": 88287 + }, + { + "epoch": 1.06, + "grad_norm": 9.636600668289566, + "learning_rate": 9.487511011825458e-06, + "loss": 1.2636, + "step": 88290 + }, + { + "epoch": 1.06, + "grad_norm": 51.67163732318268, + "learning_rate": 9.486927602323592e-06, + "loss": 0.7554, + "step": 88293 + }, + { + "epoch": 1.06, + "grad_norm": 3.733058227923869, + "learning_rate": 9.486344194572651e-06, + "loss": 1.3498, + "step": 88296 + }, + { + "epoch": 1.06, + "grad_norm": 4.20813298364782, + "learning_rate": 9.485760788574637e-06, + "loss": 1.1143, + "step": 88299 + }, + { + "epoch": 1.06, + "grad_norm": 13.844904906930783, + "learning_rate": 9.485177384331532e-06, + "loss": 1.2532, + "step": 88302 + }, + { + "epoch": 1.06, + "grad_norm": 3.7197846231160328, + "learning_rate": 9.484593981845334e-06, + "loss": 1.1959, + "step": 88305 + }, + { + "epoch": 1.06, + "grad_norm": 10.947164194484884, + "learning_rate": 9.484010581118031e-06, + "loss": 1.1494, + "step": 88308 + }, + { + "epoch": 1.06, + "grad_norm": 6.775761689669471, + "learning_rate": 9.483427182151615e-06, + "loss": 1.1969, + "step": 88311 + }, + { + "epoch": 1.06, + "grad_norm": 7.257231565102305, + "learning_rate": 9.482843784948071e-06, + "loss": 1.1169, + "step": 88314 + }, + { + "epoch": 1.06, + "grad_norm": 30.080145315226897, + "learning_rate": 9.4822603895094e-06, + "loss": 1.1656, + "step": 88317 + }, + { + "epoch": 1.06, + "grad_norm": 3.1601947025608803, + "learning_rate": 9.481676995837588e-06, + "loss": 1.077, + "step": 88320 + }, + { + "epoch": 1.06, + "grad_norm": 4.2666082845435875, + "learning_rate": 9.481093603934624e-06, + "loss": 1.1277, + "step": 88323 + }, + { + "epoch": 1.06, + "grad_norm": 10.602122424238287, + "learning_rate": 9.480510213802499e-06, + "loss": 0.966, + "step": 88326 + }, + { + "epoch": 1.06, + "grad_norm": 25.497074962944247, + "learning_rate": 9.479926825443206e-06, + "loss": 1.2939, + "step": 88329 + }, + { + "epoch": 1.06, + "grad_norm": 5.594023935577034, + "learning_rate": 9.479343438858736e-06, + "loss": 1.2941, + "step": 88332 + }, + { + "epoch": 1.06, + "grad_norm": 36.38033652760194, + "learning_rate": 9.478760054051082e-06, + "loss": 1.4825, + "step": 88335 + }, + { + "epoch": 1.06, + "grad_norm": 9.726321523915146, + "learning_rate": 9.47817667102223e-06, + "loss": 1.0226, + "step": 88338 + }, + { + "epoch": 1.06, + "grad_norm": 4.941529572866425, + "learning_rate": 9.477593289774172e-06, + "loss": 1.257, + "step": 88341 + }, + { + "epoch": 1.06, + "grad_norm": 5.501252324553812, + "learning_rate": 9.4770099103089e-06, + "loss": 1.4394, + "step": 88344 + }, + { + "epoch": 1.06, + "grad_norm": 12.623476250345892, + "learning_rate": 9.476426532628408e-06, + "loss": 1.4056, + "step": 88347 + }, + { + "epoch": 1.06, + "grad_norm": 6.537985564321063, + "learning_rate": 9.47584315673468e-06, + "loss": 0.9144, + "step": 88350 + }, + { + "epoch": 1.06, + "grad_norm": 25.622289875572097, + "learning_rate": 9.475259782629712e-06, + "loss": 0.9496, + "step": 88353 + }, + { + "epoch": 1.06, + "grad_norm": 4.671255075059446, + "learning_rate": 9.474676410315493e-06, + "loss": 1.1738, + "step": 88356 + }, + { + "epoch": 1.06, + "grad_norm": 17.94066867129066, + "learning_rate": 9.474093039794015e-06, + "loss": 0.9731, + "step": 88359 + }, + { + "epoch": 1.06, + "grad_norm": 11.126589643720497, + "learning_rate": 9.47350967106727e-06, + "loss": 1.6162, + "step": 88362 + }, + { + "epoch": 1.06, + "grad_norm": 13.550565483949638, + "learning_rate": 9.472926304137242e-06, + "loss": 0.9182, + "step": 88365 + }, + { + "epoch": 1.06, + "grad_norm": 24.88673980828463, + "learning_rate": 9.472342939005927e-06, + "loss": 1.3848, + "step": 88368 + }, + { + "epoch": 1.06, + "grad_norm": 5.621383986899228, + "learning_rate": 9.471759575675318e-06, + "loss": 0.9302, + "step": 88371 + }, + { + "epoch": 1.06, + "grad_norm": 32.351836159931, + "learning_rate": 9.471176214147406e-06, + "loss": 1.2171, + "step": 88374 + }, + { + "epoch": 1.06, + "grad_norm": 14.76493727717123, + "learning_rate": 9.470592854424174e-06, + "loss": 1.3168, + "step": 88377 + }, + { + "epoch": 1.06, + "grad_norm": 10.82640843503219, + "learning_rate": 9.470009496507619e-06, + "loss": 1.203, + "step": 88380 + }, + { + "epoch": 1.06, + "grad_norm": 4.624346960019035, + "learning_rate": 9.469426140399732e-06, + "loss": 1.3588, + "step": 88383 + }, + { + "epoch": 1.06, + "grad_norm": 4.529253129550986, + "learning_rate": 9.4688427861025e-06, + "loss": 1.0255, + "step": 88386 + }, + { + "epoch": 1.06, + "grad_norm": 2.6420661810719395, + "learning_rate": 9.468259433617921e-06, + "loss": 1.296, + "step": 88389 + }, + { + "epoch": 1.06, + "grad_norm": 6.932968894325179, + "learning_rate": 9.467676082947977e-06, + "loss": 1.2535, + "step": 88392 + }, + { + "epoch": 1.06, + "grad_norm": 7.492615437756891, + "learning_rate": 9.467092734094666e-06, + "loss": 1.2412, + "step": 88395 + }, + { + "epoch": 1.06, + "grad_norm": 5.136430783774969, + "learning_rate": 9.46650938705997e-06, + "loss": 1.3686, + "step": 88398 + }, + { + "epoch": 1.06, + "grad_norm": 40.71057593996881, + "learning_rate": 9.465926041845892e-06, + "loss": 1.1806, + "step": 88401 + }, + { + "epoch": 1.06, + "grad_norm": 4.098976902887741, + "learning_rate": 9.465342698454412e-06, + "loss": 1.0014, + "step": 88404 + }, + { + "epoch": 1.06, + "grad_norm": 7.562736737317492, + "learning_rate": 9.464759356887524e-06, + "loss": 1.1257, + "step": 88407 + }, + { + "epoch": 1.06, + "grad_norm": 5.897940788649049, + "learning_rate": 9.46417601714722e-06, + "loss": 1.18, + "step": 88410 + }, + { + "epoch": 1.06, + "grad_norm": 7.03507207545411, + "learning_rate": 9.463592679235494e-06, + "loss": 1.5674, + "step": 88413 + }, + { + "epoch": 1.06, + "grad_norm": 5.271034803608211, + "learning_rate": 9.463009343154333e-06, + "loss": 0.9242, + "step": 88416 + }, + { + "epoch": 1.06, + "grad_norm": 10.69564816447628, + "learning_rate": 9.462426008905724e-06, + "loss": 1.1068, + "step": 88419 + }, + { + "epoch": 1.06, + "grad_norm": 4.1211371079999735, + "learning_rate": 9.46184267649166e-06, + "loss": 1.0367, + "step": 88422 + }, + { + "epoch": 1.06, + "grad_norm": 10.343498594838945, + "learning_rate": 9.461259345914137e-06, + "loss": 1.3111, + "step": 88425 + }, + { + "epoch": 1.06, + "grad_norm": 19.13726333529669, + "learning_rate": 9.460676017175141e-06, + "loss": 1.5172, + "step": 88428 + }, + { + "epoch": 1.06, + "grad_norm": 4.301426598132894, + "learning_rate": 9.460092690276664e-06, + "loss": 1.2558, + "step": 88431 + }, + { + "epoch": 1.06, + "grad_norm": 4.989641392304444, + "learning_rate": 9.459509365220693e-06, + "loss": 1.4642, + "step": 88434 + }, + { + "epoch": 1.06, + "grad_norm": 4.751042003680369, + "learning_rate": 9.458926042009224e-06, + "loss": 1.1822, + "step": 88437 + }, + { + "epoch": 1.06, + "grad_norm": 6.672573340388523, + "learning_rate": 9.458342720644243e-06, + "loss": 1.4572, + "step": 88440 + }, + { + "epoch": 1.06, + "grad_norm": 11.16718730713745, + "learning_rate": 9.457759401127749e-06, + "loss": 1.4199, + "step": 88443 + }, + { + "epoch": 1.06, + "grad_norm": 5.99533295434128, + "learning_rate": 9.457176083461721e-06, + "loss": 1.3022, + "step": 88446 + }, + { + "epoch": 1.06, + "grad_norm": 4.382178928662663, + "learning_rate": 9.456592767648159e-06, + "loss": 0.8981, + "step": 88449 + }, + { + "epoch": 1.06, + "grad_norm": 8.477795971861148, + "learning_rate": 9.456009453689048e-06, + "loss": 0.8845, + "step": 88452 + }, + { + "epoch": 1.06, + "grad_norm": 3.7882117619075784, + "learning_rate": 9.455426141586384e-06, + "loss": 1.2259, + "step": 88455 + }, + { + "epoch": 1.06, + "grad_norm": 2.649014414608646, + "learning_rate": 9.454842831342154e-06, + "loss": 1.2296, + "step": 88458 + }, + { + "epoch": 1.06, + "grad_norm": 11.295531327137427, + "learning_rate": 9.454259522958346e-06, + "loss": 1.3781, + "step": 88461 + }, + { + "epoch": 1.06, + "grad_norm": 11.188968559228385, + "learning_rate": 9.453676216436954e-06, + "loss": 1.1599, + "step": 88464 + }, + { + "epoch": 1.06, + "grad_norm": 11.57412025221209, + "learning_rate": 9.453092911779969e-06, + "loss": 1.1684, + "step": 88467 + }, + { + "epoch": 1.06, + "grad_norm": 7.2607285331307505, + "learning_rate": 9.452509608989383e-06, + "loss": 0.9627, + "step": 88470 + }, + { + "epoch": 1.06, + "grad_norm": 7.05071286990416, + "learning_rate": 9.451926308067182e-06, + "loss": 1.3246, + "step": 88473 + }, + { + "epoch": 1.06, + "grad_norm": 15.574395122158844, + "learning_rate": 9.45134300901536e-06, + "loss": 1.2108, + "step": 88476 + }, + { + "epoch": 1.06, + "grad_norm": 12.71068359340996, + "learning_rate": 9.450759711835907e-06, + "loss": 1.14, + "step": 88479 + }, + { + "epoch": 1.06, + "grad_norm": 18.482137824765648, + "learning_rate": 9.450176416530814e-06, + "loss": 1.163, + "step": 88482 + }, + { + "epoch": 1.06, + "grad_norm": 14.527570479091336, + "learning_rate": 9.449593123102069e-06, + "loss": 1.5295, + "step": 88485 + }, + { + "epoch": 1.06, + "grad_norm": 25.57032751951017, + "learning_rate": 9.449009831551664e-06, + "loss": 1.3556, + "step": 88488 + }, + { + "epoch": 1.06, + "grad_norm": 13.79096736609109, + "learning_rate": 9.44842654188159e-06, + "loss": 1.0487, + "step": 88491 + }, + { + "epoch": 1.06, + "grad_norm": 11.436026714873368, + "learning_rate": 9.447843254093839e-06, + "loss": 1.166, + "step": 88494 + }, + { + "epoch": 1.06, + "grad_norm": 6.667128682581583, + "learning_rate": 9.4472599681904e-06, + "loss": 1.3514, + "step": 88497 + }, + { + "epoch": 1.06, + "grad_norm": 7.823719438496617, + "learning_rate": 9.446676684173265e-06, + "loss": 1.0276, + "step": 88500 + }, + { + "epoch": 1.06, + "grad_norm": 7.2078587892661155, + "learning_rate": 9.446093402044419e-06, + "loss": 1.1343, + "step": 88503 + }, + { + "epoch": 1.06, + "grad_norm": 10.057660052497514, + "learning_rate": 9.445510121805859e-06, + "loss": 1.1524, + "step": 88506 + }, + { + "epoch": 1.06, + "grad_norm": 3.1349540251112464, + "learning_rate": 9.444926843459576e-06, + "loss": 1.3721, + "step": 88509 + }, + { + "epoch": 1.06, + "grad_norm": 13.996622894282828, + "learning_rate": 9.444343567007556e-06, + "loss": 1.301, + "step": 88512 + }, + { + "epoch": 1.06, + "grad_norm": 2.546895490429119, + "learning_rate": 9.443760292451788e-06, + "loss": 1.3928, + "step": 88515 + }, + { + "epoch": 1.06, + "grad_norm": 6.625415493950806, + "learning_rate": 9.443177019794267e-06, + "loss": 1.3169, + "step": 88518 + }, + { + "epoch": 1.06, + "grad_norm": 15.033200076287567, + "learning_rate": 9.442593749036984e-06, + "loss": 1.0864, + "step": 88521 + }, + { + "epoch": 1.06, + "grad_norm": 9.383199034832062, + "learning_rate": 9.44201048018193e-06, + "loss": 1.438, + "step": 88524 + }, + { + "epoch": 1.06, + "grad_norm": 15.960986697038578, + "learning_rate": 9.44142721323109e-06, + "loss": 1.3457, + "step": 88527 + }, + { + "epoch": 1.06, + "grad_norm": 3.5559659417311487, + "learning_rate": 9.440843948186456e-06, + "loss": 1.3638, + "step": 88530 + }, + { + "epoch": 1.06, + "grad_norm": 4.21784694683074, + "learning_rate": 9.440260685050023e-06, + "loss": 1.0719, + "step": 88533 + }, + { + "epoch": 1.06, + "grad_norm": 3.777804645207605, + "learning_rate": 9.43967742382378e-06, + "loss": 1.1993, + "step": 88536 + }, + { + "epoch": 1.06, + "grad_norm": 15.69989606734536, + "learning_rate": 9.439094164509713e-06, + "loss": 1.2432, + "step": 88539 + }, + { + "epoch": 1.06, + "grad_norm": 8.858294477083914, + "learning_rate": 9.438510907109816e-06, + "loss": 1.6281, + "step": 88542 + }, + { + "epoch": 1.06, + "grad_norm": 15.41060194331248, + "learning_rate": 9.43792765162608e-06, + "loss": 0.7721, + "step": 88545 + }, + { + "epoch": 1.06, + "grad_norm": 55.11458032732869, + "learning_rate": 9.437344398060491e-06, + "loss": 1.198, + "step": 88548 + }, + { + "epoch": 1.06, + "grad_norm": 8.678489766215069, + "learning_rate": 9.43676114641505e-06, + "loss": 1.3044, + "step": 88551 + }, + { + "epoch": 1.06, + "grad_norm": 13.272378871958058, + "learning_rate": 9.436177896691738e-06, + "loss": 1.2442, + "step": 88554 + }, + { + "epoch": 1.06, + "grad_norm": 5.870693717104553, + "learning_rate": 9.435594648892544e-06, + "loss": 1.331, + "step": 88557 + }, + { + "epoch": 1.06, + "grad_norm": 13.418776346227649, + "learning_rate": 9.435011403019463e-06, + "loss": 1.1544, + "step": 88560 + }, + { + "epoch": 1.06, + "grad_norm": 6.640942690004588, + "learning_rate": 9.434428159074487e-06, + "loss": 0.9011, + "step": 88563 + }, + { + "epoch": 1.06, + "grad_norm": 9.324283996495653, + "learning_rate": 9.433844917059605e-06, + "loss": 1.2279, + "step": 88566 + }, + { + "epoch": 1.07, + "grad_norm": 5.985792244513816, + "learning_rate": 9.433261676976802e-06, + "loss": 1.4309, + "step": 88569 + }, + { + "epoch": 1.07, + "grad_norm": 16.316622903651627, + "learning_rate": 9.432678438828073e-06, + "loss": 1.5354, + "step": 88572 + }, + { + "epoch": 1.07, + "grad_norm": 2.677236848005725, + "learning_rate": 9.432095202615413e-06, + "loss": 0.8606, + "step": 88575 + }, + { + "epoch": 1.07, + "grad_norm": 6.658192572891367, + "learning_rate": 9.431511968340806e-06, + "loss": 1.5029, + "step": 88578 + }, + { + "epoch": 1.07, + "grad_norm": 20.55313635936904, + "learning_rate": 9.430928736006242e-06, + "loss": 1.159, + "step": 88581 + }, + { + "epoch": 1.07, + "grad_norm": 64.74876331609406, + "learning_rate": 9.430345505613712e-06, + "loss": 1.4025, + "step": 88584 + }, + { + "epoch": 1.07, + "grad_norm": 13.333550803838822, + "learning_rate": 9.42976227716521e-06, + "loss": 1.3883, + "step": 88587 + }, + { + "epoch": 1.07, + "grad_norm": 11.223543485155348, + "learning_rate": 9.429179050662725e-06, + "loss": 1.1284, + "step": 88590 + }, + { + "epoch": 1.07, + "grad_norm": 14.566459237915627, + "learning_rate": 9.428595826108245e-06, + "loss": 1.5106, + "step": 88593 + }, + { + "epoch": 1.07, + "grad_norm": 13.792940494258017, + "learning_rate": 9.428012603503761e-06, + "loss": 0.9798, + "step": 88596 + }, + { + "epoch": 1.07, + "grad_norm": 11.743013452564977, + "learning_rate": 9.427429382851264e-06, + "loss": 1.0111, + "step": 88599 + }, + { + "epoch": 1.07, + "grad_norm": 7.680501374241334, + "learning_rate": 9.426846164152742e-06, + "loss": 1.2038, + "step": 88602 + }, + { + "epoch": 1.07, + "grad_norm": 4.943245760927607, + "learning_rate": 9.426262947410193e-06, + "loss": 0.9616, + "step": 88605 + }, + { + "epoch": 1.07, + "grad_norm": 18.78217111144158, + "learning_rate": 9.425679732625599e-06, + "loss": 1.5208, + "step": 88608 + }, + { + "epoch": 1.07, + "grad_norm": 22.671835878852765, + "learning_rate": 9.425096519800951e-06, + "loss": 1.2112, + "step": 88611 + }, + { + "epoch": 1.07, + "grad_norm": 10.477891922483407, + "learning_rate": 9.424513308938244e-06, + "loss": 1.3788, + "step": 88614 + }, + { + "epoch": 1.07, + "grad_norm": 8.805033223279258, + "learning_rate": 9.423930100039467e-06, + "loss": 1.2132, + "step": 88617 + }, + { + "epoch": 1.07, + "grad_norm": 8.69752638050604, + "learning_rate": 9.423346893106609e-06, + "loss": 1.435, + "step": 88620 + }, + { + "epoch": 1.07, + "grad_norm": 14.70117089215965, + "learning_rate": 9.422763688141658e-06, + "loss": 1.362, + "step": 88623 + }, + { + "epoch": 1.07, + "grad_norm": 13.386827442708451, + "learning_rate": 9.422180485146604e-06, + "loss": 1.6784, + "step": 88626 + }, + { + "epoch": 1.07, + "grad_norm": 4.614277776104495, + "learning_rate": 9.421597284123444e-06, + "loss": 1.0633, + "step": 88629 + }, + { + "epoch": 1.07, + "grad_norm": 10.233552971671163, + "learning_rate": 9.421014085074166e-06, + "loss": 0.843, + "step": 88632 + }, + { + "epoch": 1.07, + "grad_norm": 3.596756710490051, + "learning_rate": 9.420430888000755e-06, + "loss": 0.9858, + "step": 88635 + }, + { + "epoch": 1.07, + "grad_norm": 6.1565068764042366, + "learning_rate": 9.419847692905207e-06, + "loss": 1.4714, + "step": 88638 + }, + { + "epoch": 1.07, + "grad_norm": 10.29637814231686, + "learning_rate": 9.419264499789507e-06, + "loss": 1.7758, + "step": 88641 + }, + { + "epoch": 1.07, + "grad_norm": 3.8327078621352046, + "learning_rate": 9.418681308655651e-06, + "loss": 1.3912, + "step": 88644 + }, + { + "epoch": 1.07, + "grad_norm": 7.255608151050537, + "learning_rate": 9.418098119505624e-06, + "loss": 1.1735, + "step": 88647 + }, + { + "epoch": 1.07, + "grad_norm": 8.926974855896992, + "learning_rate": 9.41751493234142e-06, + "loss": 1.5321, + "step": 88650 + }, + { + "epoch": 1.07, + "grad_norm": 12.812411415584625, + "learning_rate": 9.416931747165026e-06, + "loss": 1.4007, + "step": 88653 + }, + { + "epoch": 1.07, + "grad_norm": 17.75134424241113, + "learning_rate": 9.416348563978432e-06, + "loss": 0.8388, + "step": 88656 + }, + { + "epoch": 1.07, + "grad_norm": 5.701108615310943, + "learning_rate": 9.415765382783636e-06, + "loss": 1.1384, + "step": 88659 + }, + { + "epoch": 1.07, + "grad_norm": 8.206544379124676, + "learning_rate": 9.41518220358262e-06, + "loss": 1.2424, + "step": 88662 + }, + { + "epoch": 1.07, + "grad_norm": 3.0615492564501086, + "learning_rate": 9.414599026377375e-06, + "loss": 1.0074, + "step": 88665 + }, + { + "epoch": 1.07, + "grad_norm": 13.165483448545244, + "learning_rate": 9.414015851169893e-06, + "loss": 1.0201, + "step": 88668 + }, + { + "epoch": 1.07, + "grad_norm": 2.916064528895285, + "learning_rate": 9.413432677962167e-06, + "loss": 1.3152, + "step": 88671 + }, + { + "epoch": 1.07, + "grad_norm": 12.144189282252471, + "learning_rate": 9.412849506756182e-06, + "loss": 1.3717, + "step": 88674 + }, + { + "epoch": 1.07, + "grad_norm": 3.900424726178629, + "learning_rate": 9.412266337553927e-06, + "loss": 1.1346, + "step": 88677 + }, + { + "epoch": 1.07, + "grad_norm": 27.35572860497167, + "learning_rate": 9.4116831703574e-06, + "loss": 1.2833, + "step": 88680 + }, + { + "epoch": 1.07, + "grad_norm": 4.1964780171693805, + "learning_rate": 9.41110000516858e-06, + "loss": 1.5121, + "step": 88683 + }, + { + "epoch": 1.07, + "grad_norm": 4.15601139234597, + "learning_rate": 9.410516841989472e-06, + "loss": 1.3381, + "step": 88686 + }, + { + "epoch": 1.07, + "grad_norm": 9.146262162696463, + "learning_rate": 9.409933680822052e-06, + "loss": 1.0912, + "step": 88689 + }, + { + "epoch": 1.07, + "grad_norm": 7.068397020258264, + "learning_rate": 9.409350521668317e-06, + "loss": 1.04, + "step": 88692 + }, + { + "epoch": 1.07, + "grad_norm": 18.847380989777513, + "learning_rate": 9.408767364530255e-06, + "loss": 1.4833, + "step": 88695 + }, + { + "epoch": 1.07, + "grad_norm": 12.191782707055532, + "learning_rate": 9.40818420940986e-06, + "loss": 1.1854, + "step": 88698 + }, + { + "epoch": 1.07, + "grad_norm": 12.52314929180301, + "learning_rate": 9.407601056309116e-06, + "loss": 1.0768, + "step": 88701 + }, + { + "epoch": 1.07, + "grad_norm": 7.642190385873095, + "learning_rate": 9.40701790523002e-06, + "loss": 1.0502, + "step": 88704 + }, + { + "epoch": 1.07, + "grad_norm": 3.1642167878494547, + "learning_rate": 9.406434756174552e-06, + "loss": 1.4689, + "step": 88707 + }, + { + "epoch": 1.07, + "grad_norm": 14.471234704822644, + "learning_rate": 9.405851609144711e-06, + "loss": 1.1104, + "step": 88710 + }, + { + "epoch": 1.07, + "grad_norm": 5.4093664015536245, + "learning_rate": 9.40526846414249e-06, + "loss": 0.928, + "step": 88713 + }, + { + "epoch": 1.07, + "grad_norm": 10.746774149354266, + "learning_rate": 9.40468532116987e-06, + "loss": 1.2026, + "step": 88716 + }, + { + "epoch": 1.07, + "grad_norm": 4.304680527169825, + "learning_rate": 9.40410218022884e-06, + "loss": 1.228, + "step": 88719 + }, + { + "epoch": 1.07, + "grad_norm": 6.51426097932379, + "learning_rate": 9.4035190413214e-06, + "loss": 1.3999, + "step": 88722 + }, + { + "epoch": 1.07, + "grad_norm": 8.810294060206017, + "learning_rate": 9.402935904449533e-06, + "loss": 1.0474, + "step": 88725 + }, + { + "epoch": 1.07, + "grad_norm": 7.160372496921631, + "learning_rate": 9.402352769615232e-06, + "loss": 1.3998, + "step": 88728 + }, + { + "epoch": 1.07, + "grad_norm": 14.813145722926839, + "learning_rate": 9.401769636820483e-06, + "loss": 0.9616, + "step": 88731 + }, + { + "epoch": 1.07, + "grad_norm": 22.706833483712895, + "learning_rate": 9.40118650606728e-06, + "loss": 1.4957, + "step": 88734 + }, + { + "epoch": 1.07, + "grad_norm": 5.711189672349961, + "learning_rate": 9.40060337735761e-06, + "loss": 1.08, + "step": 88737 + }, + { + "epoch": 1.07, + "grad_norm": 18.316515552548232, + "learning_rate": 9.400020250693471e-06, + "loss": 1.301, + "step": 88740 + }, + { + "epoch": 1.07, + "grad_norm": 4.856692826143683, + "learning_rate": 9.39943712607684e-06, + "loss": 1.2596, + "step": 88743 + }, + { + "epoch": 1.07, + "grad_norm": 10.212751297104012, + "learning_rate": 9.398854003509718e-06, + "loss": 1.1231, + "step": 88746 + }, + { + "epoch": 1.07, + "grad_norm": 11.894223847090124, + "learning_rate": 9.398270882994088e-06, + "loss": 1.2432, + "step": 88749 + }, + { + "epoch": 1.07, + "grad_norm": 11.982029969073965, + "learning_rate": 9.397687764531946e-06, + "loss": 0.9941, + "step": 88752 + }, + { + "epoch": 1.07, + "grad_norm": 15.437988931617117, + "learning_rate": 9.397104648125275e-06, + "loss": 1.4839, + "step": 88755 + }, + { + "epoch": 1.07, + "grad_norm": 11.946146024666064, + "learning_rate": 9.39652153377607e-06, + "loss": 1.338, + "step": 88758 + }, + { + "epoch": 1.07, + "grad_norm": 12.683657602034659, + "learning_rate": 9.395938421486319e-06, + "loss": 1.2363, + "step": 88761 + }, + { + "epoch": 1.07, + "grad_norm": 6.162286113436955, + "learning_rate": 9.395355311258011e-06, + "loss": 1.341, + "step": 88764 + }, + { + "epoch": 1.07, + "grad_norm": 37.042981339547225, + "learning_rate": 9.394772203093143e-06, + "loss": 1.4884, + "step": 88767 + }, + { + "epoch": 1.07, + "grad_norm": 14.401040439789588, + "learning_rate": 9.394189096993698e-06, + "loss": 1.2142, + "step": 88770 + }, + { + "epoch": 1.07, + "grad_norm": 7.778397537737938, + "learning_rate": 9.393605992961663e-06, + "loss": 1.3524, + "step": 88773 + }, + { + "epoch": 1.07, + "grad_norm": 6.834482510126328, + "learning_rate": 9.393022890999036e-06, + "loss": 0.8816, + "step": 88776 + }, + { + "epoch": 1.07, + "grad_norm": 12.157536878632193, + "learning_rate": 9.392439791107805e-06, + "loss": 1.0328, + "step": 88779 + }, + { + "epoch": 1.07, + "grad_norm": 3.3148988519225573, + "learning_rate": 9.391856693289954e-06, + "loss": 1.5003, + "step": 88782 + }, + { + "epoch": 1.07, + "grad_norm": 7.776590713610265, + "learning_rate": 9.391273597547477e-06, + "loss": 1.5528, + "step": 88785 + }, + { + "epoch": 1.07, + "grad_norm": 8.011378443345397, + "learning_rate": 9.390690503882366e-06, + "loss": 1.3436, + "step": 88788 + }, + { + "epoch": 1.07, + "grad_norm": 6.59882172446726, + "learning_rate": 9.390107412296607e-06, + "loss": 1.3731, + "step": 88791 + }, + { + "epoch": 1.07, + "grad_norm": 34.008987668557964, + "learning_rate": 9.389524322792196e-06, + "loss": 1.0977, + "step": 88794 + }, + { + "epoch": 1.07, + "grad_norm": 8.139530346618695, + "learning_rate": 9.388941235371115e-06, + "loss": 1.6368, + "step": 88797 + }, + { + "epoch": 1.07, + "grad_norm": 6.059525575837778, + "learning_rate": 9.388358150035357e-06, + "loss": 1.1081, + "step": 88800 + }, + { + "epoch": 1.07, + "grad_norm": 3.280746332605137, + "learning_rate": 9.387775066786913e-06, + "loss": 1.218, + "step": 88803 + }, + { + "epoch": 1.07, + "grad_norm": 11.106155424003347, + "learning_rate": 9.387191985627776e-06, + "loss": 1.0399, + "step": 88806 + }, + { + "epoch": 1.07, + "grad_norm": 14.97826534858268, + "learning_rate": 9.386608906559927e-06, + "loss": 0.9205, + "step": 88809 + }, + { + "epoch": 1.07, + "grad_norm": 17.05927507103483, + "learning_rate": 9.386025829585362e-06, + "loss": 1.1889, + "step": 88812 + }, + { + "epoch": 1.07, + "grad_norm": 8.703106086941332, + "learning_rate": 9.38544275470607e-06, + "loss": 1.495, + "step": 88815 + }, + { + "epoch": 1.07, + "grad_norm": 6.312079533783171, + "learning_rate": 9.38485968192404e-06, + "loss": 1.1829, + "step": 88818 + }, + { + "epoch": 1.07, + "grad_norm": 19.95989820141865, + "learning_rate": 9.384276611241264e-06, + "loss": 1.3065, + "step": 88821 + }, + { + "epoch": 1.07, + "grad_norm": 19.07326436096018, + "learning_rate": 9.383693542659729e-06, + "loss": 0.9746, + "step": 88824 + }, + { + "epoch": 1.07, + "grad_norm": 4.942561944927924, + "learning_rate": 9.383110476181425e-06, + "loss": 1.1326, + "step": 88827 + }, + { + "epoch": 1.07, + "grad_norm": 11.378833660018298, + "learning_rate": 9.382527411808345e-06, + "loss": 1.4463, + "step": 88830 + }, + { + "epoch": 1.07, + "grad_norm": 6.595286972388106, + "learning_rate": 9.381944349542479e-06, + "loss": 0.8616, + "step": 88833 + }, + { + "epoch": 1.07, + "grad_norm": 13.891218993882775, + "learning_rate": 9.381361289385808e-06, + "loss": 1.3106, + "step": 88836 + }, + { + "epoch": 1.07, + "grad_norm": 26.695718648620733, + "learning_rate": 9.380778231340329e-06, + "loss": 1.2373, + "step": 88839 + }, + { + "epoch": 1.07, + "grad_norm": 5.086191290690416, + "learning_rate": 9.380195175408033e-06, + "loss": 1.2573, + "step": 88842 + }, + { + "epoch": 1.07, + "grad_norm": 25.618330537663287, + "learning_rate": 9.379612121590906e-06, + "loss": 1.0972, + "step": 88845 + }, + { + "epoch": 1.07, + "grad_norm": 22.02046078519227, + "learning_rate": 9.379029069890942e-06, + "loss": 1.161, + "step": 88848 + }, + { + "epoch": 1.07, + "grad_norm": 10.435908407512803, + "learning_rate": 9.378446020310126e-06, + "loss": 1.2851, + "step": 88851 + }, + { + "epoch": 1.07, + "grad_norm": 6.590247117351858, + "learning_rate": 9.37786297285045e-06, + "loss": 1.052, + "step": 88854 + }, + { + "epoch": 1.07, + "grad_norm": 30.34868226312036, + "learning_rate": 9.377279927513903e-06, + "loss": 1.2732, + "step": 88857 + }, + { + "epoch": 1.07, + "grad_norm": 8.96673541592325, + "learning_rate": 9.37669688430248e-06, + "loss": 1.1627, + "step": 88860 + }, + { + "epoch": 1.07, + "grad_norm": 18.466934709056527, + "learning_rate": 9.37611384321816e-06, + "loss": 1.2438, + "step": 88863 + }, + { + "epoch": 1.07, + "grad_norm": 10.777722953564211, + "learning_rate": 9.375530804262943e-06, + "loss": 1.1168, + "step": 88866 + }, + { + "epoch": 1.07, + "grad_norm": 10.386094842129404, + "learning_rate": 9.37494776743881e-06, + "loss": 1.1118, + "step": 88869 + }, + { + "epoch": 1.07, + "grad_norm": 5.0871952039013, + "learning_rate": 9.37436473274776e-06, + "loss": 1.3987, + "step": 88872 + }, + { + "epoch": 1.07, + "grad_norm": 4.7652711333911695, + "learning_rate": 9.373781700191778e-06, + "loss": 1.0995, + "step": 88875 + }, + { + "epoch": 1.07, + "grad_norm": 11.298088211229924, + "learning_rate": 9.37319866977285e-06, + "loss": 1.2125, + "step": 88878 + }, + { + "epoch": 1.07, + "grad_norm": 8.363443952619866, + "learning_rate": 9.372615641492967e-06, + "loss": 1.5467, + "step": 88881 + }, + { + "epoch": 1.07, + "grad_norm": 9.575603829673554, + "learning_rate": 9.372032615354125e-06, + "loss": 1.2124, + "step": 88884 + }, + { + "epoch": 1.07, + "grad_norm": 35.87314844382939, + "learning_rate": 9.371449591358312e-06, + "loss": 1.9226, + "step": 88887 + }, + { + "epoch": 1.07, + "grad_norm": 5.3721023659094085, + "learning_rate": 9.370866569507509e-06, + "loss": 1.0695, + "step": 88890 + }, + { + "epoch": 1.07, + "grad_norm": 189.76644063796093, + "learning_rate": 9.370283549803713e-06, + "loss": 1.2641, + "step": 88893 + }, + { + "epoch": 1.07, + "grad_norm": 12.303444324857004, + "learning_rate": 9.369700532248915e-06, + "loss": 0.9203, + "step": 88896 + }, + { + "epoch": 1.07, + "grad_norm": 7.639990626287281, + "learning_rate": 9.3691175168451e-06, + "loss": 1.3236, + "step": 88899 + }, + { + "epoch": 1.07, + "grad_norm": 9.099933985172498, + "learning_rate": 9.368534503594264e-06, + "loss": 1.1826, + "step": 88902 + }, + { + "epoch": 1.07, + "grad_norm": 32.471955607605, + "learning_rate": 9.367951492498386e-06, + "loss": 1.0666, + "step": 88905 + }, + { + "epoch": 1.07, + "grad_norm": 13.261978962932728, + "learning_rate": 9.367368483559467e-06, + "loss": 1.1909, + "step": 88908 + }, + { + "epoch": 1.07, + "grad_norm": 9.380453618085252, + "learning_rate": 9.366785476779486e-06, + "loss": 1.3421, + "step": 88911 + }, + { + "epoch": 1.07, + "grad_norm": 8.608059722998242, + "learning_rate": 9.366202472160445e-06, + "loss": 1.3168, + "step": 88914 + }, + { + "epoch": 1.07, + "grad_norm": 7.511135692772621, + "learning_rate": 9.365619469704324e-06, + "loss": 0.9347, + "step": 88917 + }, + { + "epoch": 1.07, + "grad_norm": 6.3524665031951235, + "learning_rate": 9.365036469413113e-06, + "loss": 1.0139, + "step": 88920 + }, + { + "epoch": 1.07, + "grad_norm": 21.773810921534086, + "learning_rate": 9.364453471288803e-06, + "loss": 1.163, + "step": 88923 + }, + { + "epoch": 1.07, + "grad_norm": 25.943022228873414, + "learning_rate": 9.363870475333387e-06, + "loss": 1.4072, + "step": 88926 + }, + { + "epoch": 1.07, + "grad_norm": 10.861737169596534, + "learning_rate": 9.363287481548854e-06, + "loss": 1.3563, + "step": 88929 + }, + { + "epoch": 1.07, + "grad_norm": 18.11908642095987, + "learning_rate": 9.362704489937188e-06, + "loss": 1.0765, + "step": 88932 + }, + { + "epoch": 1.07, + "grad_norm": 12.086758684230363, + "learning_rate": 9.362121500500382e-06, + "loss": 1.127, + "step": 88935 + }, + { + "epoch": 1.07, + "grad_norm": 21.89130470141232, + "learning_rate": 9.361538513240426e-06, + "loss": 1.1601, + "step": 88938 + }, + { + "epoch": 1.07, + "grad_norm": 12.007125180393434, + "learning_rate": 9.360955528159313e-06, + "loss": 1.2019, + "step": 88941 + }, + { + "epoch": 1.07, + "grad_norm": 8.670604464039084, + "learning_rate": 9.360372545259023e-06, + "loss": 1.0162, + "step": 88944 + }, + { + "epoch": 1.07, + "grad_norm": 7.147611639621648, + "learning_rate": 9.359789564541552e-06, + "loss": 1.2257, + "step": 88947 + }, + { + "epoch": 1.07, + "grad_norm": 36.864449181499964, + "learning_rate": 9.35920658600889e-06, + "loss": 1.1459, + "step": 88950 + }, + { + "epoch": 1.07, + "grad_norm": 13.79003954872505, + "learning_rate": 9.358623609663023e-06, + "loss": 1.0416, + "step": 88953 + }, + { + "epoch": 1.07, + "grad_norm": 11.934085106861072, + "learning_rate": 9.358040635505949e-06, + "loss": 1.1424, + "step": 88956 + }, + { + "epoch": 1.07, + "grad_norm": 3.415747053940344, + "learning_rate": 9.357457663539646e-06, + "loss": 1.3032, + "step": 88959 + }, + { + "epoch": 1.07, + "grad_norm": 10.922803561529767, + "learning_rate": 9.356874693766109e-06, + "loss": 0.8664, + "step": 88962 + }, + { + "epoch": 1.07, + "grad_norm": 6.768174752228726, + "learning_rate": 9.356291726187325e-06, + "loss": 0.9027, + "step": 88965 + }, + { + "epoch": 1.07, + "grad_norm": 5.684111094009238, + "learning_rate": 9.35570876080529e-06, + "loss": 0.949, + "step": 88968 + }, + { + "epoch": 1.07, + "grad_norm": 15.701066023418612, + "learning_rate": 9.355125797621988e-06, + "loss": 1.176, + "step": 88971 + }, + { + "epoch": 1.07, + "grad_norm": 13.270788190788094, + "learning_rate": 9.354542836639406e-06, + "loss": 1.247, + "step": 88974 + }, + { + "epoch": 1.07, + "grad_norm": 9.243278787777967, + "learning_rate": 9.353959877859537e-06, + "loss": 1.3555, + "step": 88977 + }, + { + "epoch": 1.07, + "grad_norm": 11.02380281257609, + "learning_rate": 9.353376921284372e-06, + "loss": 1.1073, + "step": 88980 + }, + { + "epoch": 1.07, + "grad_norm": 7.4051117292539095, + "learning_rate": 9.352793966915903e-06, + "loss": 1.2689, + "step": 88983 + }, + { + "epoch": 1.07, + "grad_norm": 16.205180750695927, + "learning_rate": 9.35221101475611e-06, + "loss": 1.24, + "step": 88986 + }, + { + "epoch": 1.07, + "grad_norm": 11.225113141965927, + "learning_rate": 9.351628064806986e-06, + "loss": 1.0977, + "step": 88989 + }, + { + "epoch": 1.07, + "grad_norm": 8.472404308135774, + "learning_rate": 9.351045117070525e-06, + "loss": 1.2867, + "step": 88992 + }, + { + "epoch": 1.07, + "grad_norm": 22.104264749187983, + "learning_rate": 9.350462171548714e-06, + "loss": 1.275, + "step": 88995 + }, + { + "epoch": 1.07, + "grad_norm": 13.248578029264829, + "learning_rate": 9.349879228243539e-06, + "loss": 1.2536, + "step": 88998 + }, + { + "epoch": 1.07, + "grad_norm": 12.005828345753377, + "learning_rate": 9.349296287156991e-06, + "loss": 1.1772, + "step": 89001 + }, + { + "epoch": 1.07, + "grad_norm": 15.967866385889037, + "learning_rate": 9.348713348291064e-06, + "loss": 0.8155, + "step": 89004 + }, + { + "epoch": 1.07, + "grad_norm": 22.019059436641516, + "learning_rate": 9.34813041164774e-06, + "loss": 1.0701, + "step": 89007 + }, + { + "epoch": 1.07, + "grad_norm": 8.655527732773088, + "learning_rate": 9.347547477229018e-06, + "loss": 0.9698, + "step": 89010 + }, + { + "epoch": 1.07, + "grad_norm": 6.5517926289526685, + "learning_rate": 9.346964545036879e-06, + "loss": 1.0106, + "step": 89013 + }, + { + "epoch": 1.07, + "grad_norm": 7.8535796216714076, + "learning_rate": 9.34638161507331e-06, + "loss": 1.289, + "step": 89016 + }, + { + "epoch": 1.07, + "grad_norm": 9.049798632758952, + "learning_rate": 9.345798687340309e-06, + "loss": 1.4623, + "step": 89019 + }, + { + "epoch": 1.07, + "grad_norm": 15.406476362169482, + "learning_rate": 9.345215761839863e-06, + "loss": 1.4401, + "step": 89022 + }, + { + "epoch": 1.07, + "grad_norm": 4.131615852159249, + "learning_rate": 9.344632838573959e-06, + "loss": 1.045, + "step": 89025 + }, + { + "epoch": 1.07, + "grad_norm": 4.266198675314362, + "learning_rate": 9.344049917544584e-06, + "loss": 1.0438, + "step": 89028 + }, + { + "epoch": 1.07, + "grad_norm": 10.818308720557246, + "learning_rate": 9.343466998753731e-06, + "loss": 1.3079, + "step": 89031 + }, + { + "epoch": 1.07, + "grad_norm": 22.009978917378895, + "learning_rate": 9.34288408220339e-06, + "loss": 1.022, + "step": 89034 + }, + { + "epoch": 1.07, + "grad_norm": 15.303832015028005, + "learning_rate": 9.342301167895552e-06, + "loss": 1.2535, + "step": 89037 + }, + { + "epoch": 1.07, + "grad_norm": 13.068640657621753, + "learning_rate": 9.341718255832199e-06, + "loss": 1.1988, + "step": 89040 + }, + { + "epoch": 1.07, + "grad_norm": 10.319828936668088, + "learning_rate": 9.341135346015322e-06, + "loss": 1.3203, + "step": 89043 + }, + { + "epoch": 1.07, + "grad_norm": 2.5038957894021507, + "learning_rate": 9.340552438446919e-06, + "loss": 1.1679, + "step": 89046 + }, + { + "epoch": 1.07, + "grad_norm": 5.069489787836212, + "learning_rate": 9.339969533128972e-06, + "loss": 1.5998, + "step": 89049 + }, + { + "epoch": 1.07, + "grad_norm": 4.526987509564913, + "learning_rate": 9.339386630063468e-06, + "loss": 0.9159, + "step": 89052 + }, + { + "epoch": 1.07, + "grad_norm": 7.600808793989795, + "learning_rate": 9.338803729252402e-06, + "loss": 0.8416, + "step": 89055 + }, + { + "epoch": 1.07, + "grad_norm": 8.580506159561864, + "learning_rate": 9.338220830697757e-06, + "loss": 1.1646, + "step": 89058 + }, + { + "epoch": 1.07, + "grad_norm": 13.526872538507453, + "learning_rate": 9.337637934401527e-06, + "loss": 1.1292, + "step": 89061 + }, + { + "epoch": 1.07, + "grad_norm": 8.624986771633031, + "learning_rate": 9.337055040365705e-06, + "loss": 1.0032, + "step": 89064 + }, + { + "epoch": 1.07, + "grad_norm": 8.340845800673259, + "learning_rate": 9.336472148592274e-06, + "loss": 1.2517, + "step": 89067 + }, + { + "epoch": 1.07, + "grad_norm": 4.233492633825603, + "learning_rate": 9.335889259083221e-06, + "loss": 1.0865, + "step": 89070 + }, + { + "epoch": 1.07, + "grad_norm": 15.442802296524416, + "learning_rate": 9.335306371840538e-06, + "loss": 1.2431, + "step": 89073 + }, + { + "epoch": 1.07, + "grad_norm": 15.5905281822027, + "learning_rate": 9.33472348686622e-06, + "loss": 1.0899, + "step": 89076 + }, + { + "epoch": 1.07, + "grad_norm": 4.427161133830219, + "learning_rate": 9.334140604162249e-06, + "loss": 1.4687, + "step": 89079 + }, + { + "epoch": 1.07, + "grad_norm": 15.280133117305637, + "learning_rate": 9.333557723730616e-06, + "loss": 1.4568, + "step": 89082 + }, + { + "epoch": 1.07, + "grad_norm": 29.922580767983977, + "learning_rate": 9.332974845573307e-06, + "loss": 1.3904, + "step": 89085 + }, + { + "epoch": 1.07, + "grad_norm": 11.786408374157702, + "learning_rate": 9.33239196969232e-06, + "loss": 1.2685, + "step": 89088 + }, + { + "epoch": 1.07, + "grad_norm": 11.01976376267588, + "learning_rate": 9.331809096089637e-06, + "loss": 0.8756, + "step": 89091 + }, + { + "epoch": 1.07, + "grad_norm": 9.389769054902024, + "learning_rate": 9.331226224767247e-06, + "loss": 1.3462, + "step": 89094 + }, + { + "epoch": 1.07, + "grad_norm": 5.150317874176814, + "learning_rate": 9.330643355727146e-06, + "loss": 1.2885, + "step": 89097 + }, + { + "epoch": 1.07, + "grad_norm": 3.857181426322432, + "learning_rate": 9.33006048897131e-06, + "loss": 1.0377, + "step": 89100 + }, + { + "epoch": 1.07, + "grad_norm": 3.212789859932743, + "learning_rate": 9.329477624501745e-06, + "loss": 1.0636, + "step": 89103 + }, + { + "epoch": 1.07, + "grad_norm": 18.77615832600963, + "learning_rate": 9.328894762320425e-06, + "loss": 1.1494, + "step": 89106 + }, + { + "epoch": 1.07, + "grad_norm": 8.353620566590312, + "learning_rate": 9.32831190242935e-06, + "loss": 1.1972, + "step": 89109 + }, + { + "epoch": 1.07, + "grad_norm": 7.787457561942953, + "learning_rate": 9.3277290448305e-06, + "loss": 1.3183, + "step": 89112 + }, + { + "epoch": 1.07, + "grad_norm": 2.6918548007539673, + "learning_rate": 9.327146189525868e-06, + "loss": 1.5426, + "step": 89115 + }, + { + "epoch": 1.07, + "grad_norm": 3.26554069417275, + "learning_rate": 9.326563336517449e-06, + "loss": 1.1023, + "step": 89118 + }, + { + "epoch": 1.07, + "grad_norm": 7.945768399752882, + "learning_rate": 9.325980485807226e-06, + "loss": 1.2679, + "step": 89121 + }, + { + "epoch": 1.07, + "grad_norm": 6.054703562393767, + "learning_rate": 9.325397637397184e-06, + "loss": 1.3162, + "step": 89124 + }, + { + "epoch": 1.07, + "grad_norm": 12.736273908792604, + "learning_rate": 9.324814791289319e-06, + "loss": 1.3698, + "step": 89127 + }, + { + "epoch": 1.07, + "grad_norm": 3.2483618410962514, + "learning_rate": 9.324231947485621e-06, + "loss": 0.99, + "step": 89130 + }, + { + "epoch": 1.07, + "grad_norm": 3.2452613873807623, + "learning_rate": 9.323649105988075e-06, + "loss": 1.2135, + "step": 89133 + }, + { + "epoch": 1.07, + "grad_norm": 2.3789855836580593, + "learning_rate": 9.323066266798669e-06, + "loss": 1.1053, + "step": 89136 + }, + { + "epoch": 1.07, + "grad_norm": 96.94374304285685, + "learning_rate": 9.322483429919395e-06, + "loss": 1.0855, + "step": 89139 + }, + { + "epoch": 1.07, + "grad_norm": 9.062303014900847, + "learning_rate": 9.32190059535224e-06, + "loss": 1.1057, + "step": 89142 + }, + { + "epoch": 1.07, + "grad_norm": 9.719721413053424, + "learning_rate": 9.321317763099195e-06, + "loss": 1.1126, + "step": 89145 + }, + { + "epoch": 1.07, + "grad_norm": 31.90064861365799, + "learning_rate": 9.320734933162247e-06, + "loss": 1.2024, + "step": 89148 + }, + { + "epoch": 1.07, + "grad_norm": 15.949374884749778, + "learning_rate": 9.320152105543388e-06, + "loss": 1.056, + "step": 89151 + }, + { + "epoch": 1.07, + "grad_norm": 9.048562069330147, + "learning_rate": 9.319569280244603e-06, + "loss": 1.1345, + "step": 89154 + }, + { + "epoch": 1.07, + "grad_norm": 5.253656019256866, + "learning_rate": 9.318986457267886e-06, + "loss": 1.0682, + "step": 89157 + }, + { + "epoch": 1.07, + "grad_norm": 9.568709291822701, + "learning_rate": 9.318403636615218e-06, + "loss": 1.344, + "step": 89160 + }, + { + "epoch": 1.07, + "grad_norm": 9.299448267569977, + "learning_rate": 9.317820818288597e-06, + "loss": 1.167, + "step": 89163 + }, + { + "epoch": 1.07, + "grad_norm": 12.83640043775908, + "learning_rate": 9.317238002290005e-06, + "loss": 1.2413, + "step": 89166 + }, + { + "epoch": 1.07, + "grad_norm": 7.1736450389566135, + "learning_rate": 9.316655188621432e-06, + "loss": 1.7033, + "step": 89169 + }, + { + "epoch": 1.07, + "grad_norm": 4.589140597509683, + "learning_rate": 9.316072377284876e-06, + "loss": 1.0113, + "step": 89172 + }, + { + "epoch": 1.07, + "grad_norm": 3.72944988009892, + "learning_rate": 9.315489568282315e-06, + "loss": 1.0537, + "step": 89175 + }, + { + "epoch": 1.07, + "grad_norm": 4.565765936563589, + "learning_rate": 9.31490676161574e-06, + "loss": 1.1045, + "step": 89178 + }, + { + "epoch": 1.07, + "grad_norm": 8.894108697796046, + "learning_rate": 9.31432395728714e-06, + "loss": 1.1388, + "step": 89181 + }, + { + "epoch": 1.07, + "grad_norm": 8.23806362529093, + "learning_rate": 9.31374115529851e-06, + "loss": 1.0123, + "step": 89184 + }, + { + "epoch": 1.07, + "grad_norm": 8.446553084593146, + "learning_rate": 9.313158355651834e-06, + "loss": 1.0015, + "step": 89187 + }, + { + "epoch": 1.07, + "grad_norm": 32.40650903449209, + "learning_rate": 9.312575558349097e-06, + "loss": 1.2039, + "step": 89190 + }, + { + "epoch": 1.07, + "grad_norm": 9.250055210444982, + "learning_rate": 9.311992763392294e-06, + "loss": 1.3576, + "step": 89193 + }, + { + "epoch": 1.07, + "grad_norm": 5.395496611783145, + "learning_rate": 9.311409970783412e-06, + "loss": 1.0488, + "step": 89196 + }, + { + "epoch": 1.07, + "grad_norm": 12.541653473013259, + "learning_rate": 9.310827180524442e-06, + "loss": 1.4446, + "step": 89199 + }, + { + "epoch": 1.07, + "grad_norm": 9.717143287971576, + "learning_rate": 9.310244392617367e-06, + "loss": 1.66, + "step": 89202 + }, + { + "epoch": 1.07, + "grad_norm": 15.798800240800274, + "learning_rate": 9.309661607064182e-06, + "loss": 0.9975, + "step": 89205 + }, + { + "epoch": 1.07, + "grad_norm": 18.865980396707933, + "learning_rate": 9.309078823866869e-06, + "loss": 1.181, + "step": 89208 + }, + { + "epoch": 1.07, + "grad_norm": 8.354066519339364, + "learning_rate": 9.308496043027427e-06, + "loss": 1.2992, + "step": 89211 + }, + { + "epoch": 1.07, + "grad_norm": 12.600008396391202, + "learning_rate": 9.307913264547836e-06, + "loss": 1.1871, + "step": 89214 + }, + { + "epoch": 1.07, + "grad_norm": 8.699722918716086, + "learning_rate": 9.307330488430087e-06, + "loss": 1.1248, + "step": 89217 + }, + { + "epoch": 1.07, + "grad_norm": 7.973612215205209, + "learning_rate": 9.30674771467617e-06, + "loss": 1.2341, + "step": 89220 + }, + { + "epoch": 1.07, + "grad_norm": 35.3789505355611, + "learning_rate": 9.306164943288071e-06, + "loss": 1.326, + "step": 89223 + }, + { + "epoch": 1.07, + "grad_norm": 26.527043126145344, + "learning_rate": 9.305582174267788e-06, + "loss": 1.033, + "step": 89226 + }, + { + "epoch": 1.07, + "grad_norm": 3.383929594017714, + "learning_rate": 9.3049994076173e-06, + "loss": 1.746, + "step": 89229 + }, + { + "epoch": 1.07, + "grad_norm": 6.4276239454520345, + "learning_rate": 9.304416643338595e-06, + "loss": 0.9091, + "step": 89232 + }, + { + "epoch": 1.07, + "grad_norm": 2.870066084209719, + "learning_rate": 9.303833881433668e-06, + "loss": 1.3084, + "step": 89235 + }, + { + "epoch": 1.07, + "grad_norm": 11.89412929737297, + "learning_rate": 9.303251121904506e-06, + "loss": 1.3884, + "step": 89238 + }, + { + "epoch": 1.07, + "grad_norm": 9.559940720127159, + "learning_rate": 9.302668364753096e-06, + "loss": 1.4013, + "step": 89241 + }, + { + "epoch": 1.07, + "grad_norm": 2.8215302401685647, + "learning_rate": 9.302085609981427e-06, + "loss": 0.7718, + "step": 89244 + }, + { + "epoch": 1.07, + "grad_norm": 4.629539981586016, + "learning_rate": 9.30150285759149e-06, + "loss": 1.4809, + "step": 89247 + }, + { + "epoch": 1.07, + "grad_norm": 16.31386654695957, + "learning_rate": 9.300920107585269e-06, + "loss": 1.2859, + "step": 89250 + }, + { + "epoch": 1.07, + "grad_norm": 5.20206705031188, + "learning_rate": 9.300337359964761e-06, + "loss": 1.6231, + "step": 89253 + }, + { + "epoch": 1.07, + "grad_norm": 21.074115716806602, + "learning_rate": 9.299754614731947e-06, + "loss": 1.5341, + "step": 89256 + }, + { + "epoch": 1.07, + "grad_norm": 6.689454005676129, + "learning_rate": 9.299171871888819e-06, + "loss": 1.1169, + "step": 89259 + }, + { + "epoch": 1.07, + "grad_norm": 14.61285114682204, + "learning_rate": 9.298589131437361e-06, + "loss": 1.3686, + "step": 89262 + }, + { + "epoch": 1.07, + "grad_norm": 10.675593050922235, + "learning_rate": 9.298006393379571e-06, + "loss": 1.211, + "step": 89265 + }, + { + "epoch": 1.07, + "grad_norm": 21.498418176776696, + "learning_rate": 9.297423657717429e-06, + "loss": 1.0965, + "step": 89268 + }, + { + "epoch": 1.07, + "grad_norm": 8.466819774871508, + "learning_rate": 9.296840924452929e-06, + "loss": 1.276, + "step": 89271 + }, + { + "epoch": 1.07, + "grad_norm": 5.750418523816944, + "learning_rate": 9.296258193588055e-06, + "loss": 1.2596, + "step": 89274 + }, + { + "epoch": 1.07, + "grad_norm": 24.00173021881464, + "learning_rate": 9.2956754651248e-06, + "loss": 1.3173, + "step": 89277 + }, + { + "epoch": 1.07, + "grad_norm": 8.820903367795454, + "learning_rate": 9.295092739065152e-06, + "loss": 1.0548, + "step": 89280 + }, + { + "epoch": 1.07, + "grad_norm": 8.878922787275261, + "learning_rate": 9.294510015411098e-06, + "loss": 1.305, + "step": 89283 + }, + { + "epoch": 1.07, + "grad_norm": 7.146222844661285, + "learning_rate": 9.293927294164625e-06, + "loss": 1.158, + "step": 89286 + }, + { + "epoch": 1.07, + "grad_norm": 10.430219794574372, + "learning_rate": 9.293344575327726e-06, + "loss": 1.2185, + "step": 89289 + }, + { + "epoch": 1.07, + "grad_norm": 7.2464551584597645, + "learning_rate": 9.29276185890239e-06, + "loss": 0.9205, + "step": 89292 + }, + { + "epoch": 1.07, + "grad_norm": 4.193966536680064, + "learning_rate": 9.292179144890598e-06, + "loss": 0.971, + "step": 89295 + }, + { + "epoch": 1.07, + "grad_norm": 4.992403225674177, + "learning_rate": 9.291596433294344e-06, + "loss": 1.0144, + "step": 89298 + }, + { + "epoch": 1.07, + "grad_norm": 5.971525141805151, + "learning_rate": 9.291013724115619e-06, + "loss": 1.0218, + "step": 89301 + }, + { + "epoch": 1.07, + "grad_norm": 8.138419783152893, + "learning_rate": 9.290431017356406e-06, + "loss": 1.3149, + "step": 89304 + }, + { + "epoch": 1.07, + "grad_norm": 6.843211138872861, + "learning_rate": 9.289848313018701e-06, + "loss": 1.019, + "step": 89307 + }, + { + "epoch": 1.07, + "grad_norm": 3.4911508424140645, + "learning_rate": 9.289265611104483e-06, + "loss": 1.1302, + "step": 89310 + }, + { + "epoch": 1.07, + "grad_norm": 11.037059540336497, + "learning_rate": 9.288682911615747e-06, + "loss": 1.5327, + "step": 89313 + }, + { + "epoch": 1.07, + "grad_norm": 3.9454827273233968, + "learning_rate": 9.288100214554479e-06, + "loss": 1.6095, + "step": 89316 + }, + { + "epoch": 1.07, + "grad_norm": 20.621894147352414, + "learning_rate": 9.287517519922672e-06, + "loss": 1.2141, + "step": 89319 + }, + { + "epoch": 1.07, + "grad_norm": 9.63067893654758, + "learning_rate": 9.286934827722308e-06, + "loss": 1.2016, + "step": 89322 + }, + { + "epoch": 1.07, + "grad_norm": 3.929038200942458, + "learning_rate": 9.28635213795538e-06, + "loss": 1.3137, + "step": 89325 + }, + { + "epoch": 1.07, + "grad_norm": 5.652160985197983, + "learning_rate": 9.285769450623875e-06, + "loss": 1.2857, + "step": 89328 + }, + { + "epoch": 1.07, + "grad_norm": 14.15921699879952, + "learning_rate": 9.28518676572978e-06, + "loss": 1.2572, + "step": 89331 + }, + { + "epoch": 1.07, + "grad_norm": 7.622333861505051, + "learning_rate": 9.284604083275089e-06, + "loss": 1.1729, + "step": 89334 + }, + { + "epoch": 1.07, + "grad_norm": 10.280758811120617, + "learning_rate": 9.284021403261783e-06, + "loss": 1.1636, + "step": 89337 + }, + { + "epoch": 1.07, + "grad_norm": 10.240749808696457, + "learning_rate": 9.283438725691853e-06, + "loss": 1.357, + "step": 89340 + }, + { + "epoch": 1.07, + "grad_norm": 2.2209957875662045, + "learning_rate": 9.282856050567291e-06, + "loss": 1.0789, + "step": 89343 + }, + { + "epoch": 1.07, + "grad_norm": 5.412988183577372, + "learning_rate": 9.282273377890085e-06, + "loss": 1.1787, + "step": 89346 + }, + { + "epoch": 1.07, + "grad_norm": 9.39513249352096, + "learning_rate": 9.281690707662218e-06, + "loss": 1.2065, + "step": 89349 + }, + { + "epoch": 1.07, + "grad_norm": 8.681570173640619, + "learning_rate": 9.28110803988568e-06, + "loss": 0.996, + "step": 89352 + }, + { + "epoch": 1.07, + "grad_norm": 9.337176670148356, + "learning_rate": 9.280525374562466e-06, + "loss": 0.9957, + "step": 89355 + }, + { + "epoch": 1.07, + "grad_norm": 18.68683701190792, + "learning_rate": 9.279942711694555e-06, + "loss": 1.1936, + "step": 89358 + }, + { + "epoch": 1.07, + "grad_norm": 9.573646761755061, + "learning_rate": 9.279360051283947e-06, + "loss": 1.2727, + "step": 89361 + }, + { + "epoch": 1.07, + "grad_norm": 3.9349879699423456, + "learning_rate": 9.278777393332618e-06, + "loss": 1.0655, + "step": 89364 + }, + { + "epoch": 1.07, + "grad_norm": 18.186845099310474, + "learning_rate": 9.278194737842564e-06, + "loss": 1.0848, + "step": 89367 + }, + { + "epoch": 1.07, + "grad_norm": 2.723492216402876, + "learning_rate": 9.277612084815769e-06, + "loss": 0.9456, + "step": 89370 + }, + { + "epoch": 1.07, + "grad_norm": 5.1281987174401085, + "learning_rate": 9.27702943425423e-06, + "loss": 1.2949, + "step": 89373 + }, + { + "epoch": 1.07, + "grad_norm": 16.675493801081142, + "learning_rate": 9.276446786159924e-06, + "loss": 1.4682, + "step": 89376 + }, + { + "epoch": 1.07, + "grad_norm": 7.60300861323345, + "learning_rate": 9.275864140534843e-06, + "loss": 1.0102, + "step": 89379 + }, + { + "epoch": 1.07, + "grad_norm": 15.805938918599992, + "learning_rate": 9.275281497380978e-06, + "loss": 1.0782, + "step": 89382 + }, + { + "epoch": 1.07, + "grad_norm": 15.353282473569683, + "learning_rate": 9.274698856700318e-06, + "loss": 1.1813, + "step": 89385 + }, + { + "epoch": 1.07, + "grad_norm": 8.793435244315136, + "learning_rate": 9.274116218494851e-06, + "loss": 1.1917, + "step": 89388 + }, + { + "epoch": 1.07, + "grad_norm": 6.065785153825382, + "learning_rate": 9.273533582766561e-06, + "loss": 1.4249, + "step": 89391 + }, + { + "epoch": 1.07, + "grad_norm": 16.88080909377357, + "learning_rate": 9.272950949517438e-06, + "loss": 1.1548, + "step": 89394 + }, + { + "epoch": 1.07, + "grad_norm": 3.3711657256544063, + "learning_rate": 9.272368318749474e-06, + "loss": 1.1639, + "step": 89397 + }, + { + "epoch": 1.08, + "grad_norm": 6.386752274750486, + "learning_rate": 9.271785690464657e-06, + "loss": 1.2082, + "step": 89400 + }, + { + "epoch": 1.08, + "grad_norm": 5.9085673730067185, + "learning_rate": 9.27120306466497e-06, + "loss": 1.2275, + "step": 89403 + }, + { + "epoch": 1.08, + "grad_norm": 6.059935980818698, + "learning_rate": 9.270620441352403e-06, + "loss": 1.1913, + "step": 89406 + }, + { + "epoch": 1.08, + "grad_norm": 7.779981775930935, + "learning_rate": 9.270037820528948e-06, + "loss": 1.1646, + "step": 89409 + }, + { + "epoch": 1.08, + "grad_norm": 4.63499607669863, + "learning_rate": 9.269455202196589e-06, + "loss": 1.191, + "step": 89412 + }, + { + "epoch": 1.08, + "grad_norm": 3.2502191261764333, + "learning_rate": 9.268872586357321e-06, + "loss": 1.244, + "step": 89415 + }, + { + "epoch": 1.08, + "grad_norm": 2.4505247870248867, + "learning_rate": 9.268289973013122e-06, + "loss": 0.9014, + "step": 89418 + }, + { + "epoch": 1.08, + "grad_norm": 4.474786449101657, + "learning_rate": 9.26770736216599e-06, + "loss": 1.3246, + "step": 89421 + }, + { + "epoch": 1.08, + "grad_norm": 11.754327677994583, + "learning_rate": 9.267124753817905e-06, + "loss": 0.947, + "step": 89424 + }, + { + "epoch": 1.08, + "grad_norm": 6.809896723473706, + "learning_rate": 9.266542147970864e-06, + "loss": 1.5483, + "step": 89427 + }, + { + "epoch": 1.08, + "grad_norm": 19.20776982556842, + "learning_rate": 9.265959544626849e-06, + "loss": 1.5831, + "step": 89430 + }, + { + "epoch": 1.08, + "grad_norm": 6.721999378727695, + "learning_rate": 9.265376943787848e-06, + "loss": 1.5941, + "step": 89433 + }, + { + "epoch": 1.08, + "grad_norm": 11.591144559291825, + "learning_rate": 9.264794345455848e-06, + "loss": 1.213, + "step": 89436 + }, + { + "epoch": 1.08, + "grad_norm": 2.5542165099681235, + "learning_rate": 9.264211749632844e-06, + "loss": 1.2414, + "step": 89439 + }, + { + "epoch": 1.08, + "grad_norm": 20.004744951315253, + "learning_rate": 9.263629156320823e-06, + "loss": 1.1003, + "step": 89442 + }, + { + "epoch": 1.08, + "grad_norm": 8.154111557730307, + "learning_rate": 9.263046565521766e-06, + "loss": 1.2269, + "step": 89445 + }, + { + "epoch": 1.08, + "grad_norm": 34.68079366927153, + "learning_rate": 9.262463977237665e-06, + "loss": 0.9132, + "step": 89448 + }, + { + "epoch": 1.08, + "grad_norm": 31.956467669805097, + "learning_rate": 9.26188139147051e-06, + "loss": 1.3778, + "step": 89451 + }, + { + "epoch": 1.08, + "grad_norm": 9.391828291729066, + "learning_rate": 9.261298808222292e-06, + "loss": 1.3659, + "step": 89454 + }, + { + "epoch": 1.08, + "grad_norm": 10.023741286816065, + "learning_rate": 9.26071622749499e-06, + "loss": 1.0719, + "step": 89457 + }, + { + "epoch": 1.08, + "grad_norm": 25.835468901329918, + "learning_rate": 9.260133649290598e-06, + "loss": 1.0146, + "step": 89460 + }, + { + "epoch": 1.08, + "grad_norm": 5.725431299411326, + "learning_rate": 9.259551073611105e-06, + "loss": 1.4115, + "step": 89463 + }, + { + "epoch": 1.08, + "grad_norm": 3.4733647726474546, + "learning_rate": 9.258968500458495e-06, + "loss": 1.2091, + "step": 89466 + }, + { + "epoch": 1.08, + "grad_norm": 10.326934038962344, + "learning_rate": 9.258385929834763e-06, + "loss": 1.3414, + "step": 89469 + }, + { + "epoch": 1.08, + "grad_norm": 3.539773391692772, + "learning_rate": 9.257803361741891e-06, + "loss": 0.9366, + "step": 89472 + }, + { + "epoch": 1.08, + "grad_norm": 49.6905018989187, + "learning_rate": 9.257220796181864e-06, + "loss": 1.3308, + "step": 89475 + }, + { + "epoch": 1.08, + "grad_norm": 7.858158394320103, + "learning_rate": 9.256638233156679e-06, + "loss": 1.2366, + "step": 89478 + }, + { + "epoch": 1.08, + "grad_norm": 20.0758231098652, + "learning_rate": 9.256055672668321e-06, + "loss": 1.1641, + "step": 89481 + }, + { + "epoch": 1.08, + "grad_norm": 9.85199118695297, + "learning_rate": 9.255473114718777e-06, + "loss": 1.1269, + "step": 89484 + }, + { + "epoch": 1.08, + "grad_norm": 21.641663191594088, + "learning_rate": 9.254890559310032e-06, + "loss": 1.2225, + "step": 89487 + }, + { + "epoch": 1.08, + "grad_norm": 16.20666645014975, + "learning_rate": 9.254308006444077e-06, + "loss": 1.2224, + "step": 89490 + }, + { + "epoch": 1.08, + "grad_norm": 28.026563810276606, + "learning_rate": 9.253725456122903e-06, + "loss": 1.0565, + "step": 89493 + }, + { + "epoch": 1.08, + "grad_norm": 9.30061347604212, + "learning_rate": 9.253142908348496e-06, + "loss": 0.9508, + "step": 89496 + }, + { + "epoch": 1.08, + "grad_norm": 8.448944831925944, + "learning_rate": 9.25256036312284e-06, + "loss": 1.0924, + "step": 89499 + }, + { + "epoch": 1.08, + "grad_norm": 8.426758728778944, + "learning_rate": 9.251977820447926e-06, + "loss": 1.2271, + "step": 89502 + }, + { + "epoch": 1.08, + "grad_norm": 3.904019330321786, + "learning_rate": 9.251395280325746e-06, + "loss": 1.1282, + "step": 89505 + }, + { + "epoch": 1.08, + "grad_norm": 7.175954156423301, + "learning_rate": 9.250812742758285e-06, + "loss": 1.3411, + "step": 89508 + }, + { + "epoch": 1.08, + "grad_norm": 14.85131667771382, + "learning_rate": 9.250230207747526e-06, + "loss": 1.4017, + "step": 89511 + }, + { + "epoch": 1.08, + "grad_norm": 7.768339541036479, + "learning_rate": 9.249647675295463e-06, + "loss": 1.0434, + "step": 89514 + }, + { + "epoch": 1.08, + "grad_norm": 16.938358629785135, + "learning_rate": 9.24906514540408e-06, + "loss": 1.2156, + "step": 89517 + }, + { + "epoch": 1.08, + "grad_norm": 11.468531132768375, + "learning_rate": 9.248482618075368e-06, + "loss": 1.2728, + "step": 89520 + }, + { + "epoch": 1.08, + "grad_norm": 6.406512202935944, + "learning_rate": 9.247900093311318e-06, + "loss": 1.1846, + "step": 89523 + }, + { + "epoch": 1.08, + "grad_norm": 5.467545591889672, + "learning_rate": 9.247317571113911e-06, + "loss": 1.0156, + "step": 89526 + }, + { + "epoch": 1.08, + "grad_norm": 6.575339602763436, + "learning_rate": 9.246735051485137e-06, + "loss": 1.535, + "step": 89529 + }, + { + "epoch": 1.08, + "grad_norm": 6.666286354866019, + "learning_rate": 9.246152534426985e-06, + "loss": 1.4081, + "step": 89532 + }, + { + "epoch": 1.08, + "grad_norm": 7.1952625642260015, + "learning_rate": 9.245570019941447e-06, + "loss": 1.2438, + "step": 89535 + }, + { + "epoch": 1.08, + "grad_norm": 15.025850442612498, + "learning_rate": 9.244987508030505e-06, + "loss": 1.302, + "step": 89538 + }, + { + "epoch": 1.08, + "grad_norm": 3.65842756142595, + "learning_rate": 9.244404998696146e-06, + "loss": 1.1606, + "step": 89541 + }, + { + "epoch": 1.08, + "grad_norm": 8.944696260303534, + "learning_rate": 9.243822491940359e-06, + "loss": 1.3151, + "step": 89544 + }, + { + "epoch": 1.08, + "grad_norm": 14.896039032381772, + "learning_rate": 9.243239987765139e-06, + "loss": 1.256, + "step": 89547 + }, + { + "epoch": 1.08, + "grad_norm": 8.351309361707132, + "learning_rate": 9.242657486172468e-06, + "loss": 1.3748, + "step": 89550 + }, + { + "epoch": 1.08, + "grad_norm": 7.192370867346874, + "learning_rate": 9.242074987164331e-06, + "loss": 1.2468, + "step": 89553 + }, + { + "epoch": 1.08, + "grad_norm": 100.41780357481771, + "learning_rate": 9.24149249074272e-06, + "loss": 1.2337, + "step": 89556 + }, + { + "epoch": 1.08, + "grad_norm": 5.724531313691277, + "learning_rate": 9.240909996909621e-06, + "loss": 1.2022, + "step": 89559 + }, + { + "epoch": 1.08, + "grad_norm": 3.919124495868843, + "learning_rate": 9.240327505667027e-06, + "loss": 1.6632, + "step": 89562 + }, + { + "epoch": 1.08, + "grad_norm": 28.81310059897418, + "learning_rate": 9.239745017016916e-06, + "loss": 1.2717, + "step": 89565 + }, + { + "epoch": 1.08, + "grad_norm": 7.651500496661885, + "learning_rate": 9.239162530961286e-06, + "loss": 0.9876, + "step": 89568 + }, + { + "epoch": 1.08, + "grad_norm": 17.85465275866031, + "learning_rate": 9.238580047502116e-06, + "loss": 1.0326, + "step": 89571 + }, + { + "epoch": 1.08, + "grad_norm": 13.952356875997566, + "learning_rate": 9.237997566641398e-06, + "loss": 1.0424, + "step": 89574 + }, + { + "epoch": 1.08, + "grad_norm": 5.441754709456006, + "learning_rate": 9.237415088381125e-06, + "loss": 1.2704, + "step": 89577 + }, + { + "epoch": 1.08, + "grad_norm": 8.374470559140958, + "learning_rate": 9.236832612723278e-06, + "loss": 1.004, + "step": 89580 + }, + { + "epoch": 1.08, + "grad_norm": 4.851330294292857, + "learning_rate": 9.236250139669843e-06, + "loss": 1.1059, + "step": 89583 + }, + { + "epoch": 1.08, + "grad_norm": 5.079330321186687, + "learning_rate": 9.235667669222812e-06, + "loss": 1.0753, + "step": 89586 + }, + { + "epoch": 1.08, + "grad_norm": 9.774580319743002, + "learning_rate": 9.235085201384175e-06, + "loss": 1.3124, + "step": 89589 + }, + { + "epoch": 1.08, + "grad_norm": 31.233417970133317, + "learning_rate": 9.234502736155916e-06, + "loss": 1.1603, + "step": 89592 + }, + { + "epoch": 1.08, + "grad_norm": 4.681811165777408, + "learning_rate": 9.23392027354002e-06, + "loss": 1.1528, + "step": 89595 + }, + { + "epoch": 1.08, + "grad_norm": 6.795311689424216, + "learning_rate": 9.23333781353848e-06, + "loss": 1.4559, + "step": 89598 + }, + { + "epoch": 1.08, + "grad_norm": 8.212084518569124, + "learning_rate": 9.232755356153283e-06, + "loss": 1.3142, + "step": 89601 + }, + { + "epoch": 1.08, + "grad_norm": 15.4979836259235, + "learning_rate": 9.232172901386417e-06, + "loss": 1.2932, + "step": 89604 + }, + { + "epoch": 1.08, + "grad_norm": 8.6246878157295, + "learning_rate": 9.231590449239864e-06, + "loss": 1.16, + "step": 89607 + }, + { + "epoch": 1.08, + "grad_norm": 13.822265507341662, + "learning_rate": 9.23100799971562e-06, + "loss": 1.1925, + "step": 89610 + }, + { + "epoch": 1.08, + "grad_norm": 11.747542645581234, + "learning_rate": 9.230425552815666e-06, + "loss": 1.1243, + "step": 89613 + }, + { + "epoch": 1.08, + "grad_norm": 5.584236995878647, + "learning_rate": 9.229843108541995e-06, + "loss": 1.0664, + "step": 89616 + }, + { + "epoch": 1.08, + "grad_norm": 7.492438100126823, + "learning_rate": 9.229260666896589e-06, + "loss": 1.2093, + "step": 89619 + }, + { + "epoch": 1.08, + "grad_norm": 5.196292692952653, + "learning_rate": 9.22867822788144e-06, + "loss": 1.0173, + "step": 89622 + }, + { + "epoch": 1.08, + "grad_norm": 8.947802573676773, + "learning_rate": 9.228095791498533e-06, + "loss": 1.0115, + "step": 89625 + }, + { + "epoch": 1.08, + "grad_norm": 9.206636956607577, + "learning_rate": 9.227513357749857e-06, + "loss": 1.3505, + "step": 89628 + }, + { + "epoch": 1.08, + "grad_norm": 18.80280681668475, + "learning_rate": 9.226930926637405e-06, + "loss": 1.0956, + "step": 89631 + }, + { + "epoch": 1.08, + "grad_norm": 19.103496423361257, + "learning_rate": 9.226348498163156e-06, + "loss": 1.231, + "step": 89634 + }, + { + "epoch": 1.08, + "grad_norm": 4.5055806203845, + "learning_rate": 9.225766072329098e-06, + "loss": 1.1869, + "step": 89637 + }, + { + "epoch": 1.08, + "grad_norm": 27.4226114530349, + "learning_rate": 9.225183649137222e-06, + "loss": 1.2219, + "step": 89640 + }, + { + "epoch": 1.08, + "grad_norm": 7.6538259075347295, + "learning_rate": 9.224601228589521e-06, + "loss": 1.3855, + "step": 89643 + }, + { + "epoch": 1.08, + "grad_norm": 5.6326289856159875, + "learning_rate": 9.224018810687974e-06, + "loss": 1.2608, + "step": 89646 + }, + { + "epoch": 1.08, + "grad_norm": 8.266787534301951, + "learning_rate": 9.223436395434567e-06, + "loss": 1.2338, + "step": 89649 + }, + { + "epoch": 1.08, + "grad_norm": 14.763311765226533, + "learning_rate": 9.222853982831296e-06, + "loss": 1.5043, + "step": 89652 + }, + { + "epoch": 1.08, + "grad_norm": 6.282761495884856, + "learning_rate": 9.222271572880141e-06, + "loss": 1.498, + "step": 89655 + }, + { + "epoch": 1.08, + "grad_norm": 10.634776822539646, + "learning_rate": 9.221689165583098e-06, + "loss": 1.3767, + "step": 89658 + }, + { + "epoch": 1.08, + "grad_norm": 50.66776261536961, + "learning_rate": 9.221106760942145e-06, + "loss": 1.1674, + "step": 89661 + }, + { + "epoch": 1.08, + "grad_norm": 15.234513648566912, + "learning_rate": 9.220524358959278e-06, + "loss": 0.9597, + "step": 89664 + }, + { + "epoch": 1.08, + "grad_norm": 10.311297657464483, + "learning_rate": 9.219941959636477e-06, + "loss": 1.2113, + "step": 89667 + }, + { + "epoch": 1.08, + "grad_norm": 9.323515344456695, + "learning_rate": 9.219359562975737e-06, + "loss": 0.7516, + "step": 89670 + }, + { + "epoch": 1.08, + "grad_norm": 2.6692446170933057, + "learning_rate": 9.218777168979037e-06, + "loss": 1.2285, + "step": 89673 + }, + { + "epoch": 1.08, + "grad_norm": 5.428241069472246, + "learning_rate": 9.218194777648373e-06, + "loss": 1.1978, + "step": 89676 + }, + { + "epoch": 1.08, + "grad_norm": 4.107566230749577, + "learning_rate": 9.217612388985725e-06, + "loss": 1.345, + "step": 89679 + }, + { + "epoch": 1.08, + "grad_norm": 9.81889144250608, + "learning_rate": 9.217030002993085e-06, + "loss": 1.3224, + "step": 89682 + }, + { + "epoch": 1.08, + "grad_norm": 6.735890028846919, + "learning_rate": 9.216447619672442e-06, + "loss": 1.2047, + "step": 89685 + }, + { + "epoch": 1.08, + "grad_norm": 2.649905992852791, + "learning_rate": 9.215865239025781e-06, + "loss": 1.3939, + "step": 89688 + }, + { + "epoch": 1.08, + "grad_norm": 8.866198184044158, + "learning_rate": 9.215282861055087e-06, + "loss": 0.9846, + "step": 89691 + }, + { + "epoch": 1.08, + "grad_norm": 6.844335286552321, + "learning_rate": 9.214700485762352e-06, + "loss": 1.3051, + "step": 89694 + }, + { + "epoch": 1.08, + "grad_norm": 6.042317767516292, + "learning_rate": 9.214118113149561e-06, + "loss": 1.068, + "step": 89697 + }, + { + "epoch": 1.08, + "grad_norm": 14.276603600939689, + "learning_rate": 9.213535743218702e-06, + "loss": 1.2044, + "step": 89700 + }, + { + "epoch": 1.08, + "grad_norm": 14.765456466522274, + "learning_rate": 9.21295337597176e-06, + "loss": 1.2733, + "step": 89703 + }, + { + "epoch": 1.08, + "grad_norm": 7.4341306897600195, + "learning_rate": 9.212371011410727e-06, + "loss": 1.0012, + "step": 89706 + }, + { + "epoch": 1.08, + "grad_norm": 5.948698243944818, + "learning_rate": 9.211788649537586e-06, + "loss": 1.111, + "step": 89709 + }, + { + "epoch": 1.08, + "grad_norm": 6.442799174620198, + "learning_rate": 9.21120629035433e-06, + "loss": 1.2873, + "step": 89712 + }, + { + "epoch": 1.08, + "grad_norm": 21.916487920319476, + "learning_rate": 9.21062393386294e-06, + "loss": 1.6512, + "step": 89715 + }, + { + "epoch": 1.08, + "grad_norm": 2.6569393323484913, + "learning_rate": 9.210041580065407e-06, + "loss": 1.4517, + "step": 89718 + }, + { + "epoch": 1.08, + "grad_norm": 8.021580999291638, + "learning_rate": 9.209459228963714e-06, + "loss": 1.3322, + "step": 89721 + }, + { + "epoch": 1.08, + "grad_norm": 17.876569575162563, + "learning_rate": 9.208876880559858e-06, + "loss": 1.2086, + "step": 89724 + }, + { + "epoch": 1.08, + "grad_norm": 24.825607673752916, + "learning_rate": 9.208294534855817e-06, + "loss": 1.1551, + "step": 89727 + }, + { + "epoch": 1.08, + "grad_norm": 23.914842752057286, + "learning_rate": 9.207712191853582e-06, + "loss": 1.1025, + "step": 89730 + }, + { + "epoch": 1.08, + "grad_norm": 9.74645695105887, + "learning_rate": 9.207129851555137e-06, + "loss": 1.0842, + "step": 89733 + }, + { + "epoch": 1.08, + "grad_norm": 24.022457207775116, + "learning_rate": 9.206547513962476e-06, + "loss": 1.1776, + "step": 89736 + }, + { + "epoch": 1.08, + "grad_norm": 20.43270054365606, + "learning_rate": 9.205965179077584e-06, + "loss": 0.997, + "step": 89739 + }, + { + "epoch": 1.08, + "grad_norm": 14.370932778344702, + "learning_rate": 9.205382846902444e-06, + "loss": 1.5551, + "step": 89742 + }, + { + "epoch": 1.08, + "grad_norm": 38.03558982726539, + "learning_rate": 9.204800517439045e-06, + "loss": 1.0753, + "step": 89745 + }, + { + "epoch": 1.08, + "grad_norm": 7.843215149035022, + "learning_rate": 9.204218190689378e-06, + "loss": 1.037, + "step": 89748 + }, + { + "epoch": 1.08, + "grad_norm": 8.290214839243225, + "learning_rate": 9.20363586665543e-06, + "loss": 1.3973, + "step": 89751 + }, + { + "epoch": 1.08, + "grad_norm": 19.09705026580909, + "learning_rate": 9.20305354533918e-06, + "loss": 0.9889, + "step": 89754 + }, + { + "epoch": 1.08, + "grad_norm": 58.69686262890676, + "learning_rate": 9.202471226742622e-06, + "loss": 0.9712, + "step": 89757 + }, + { + "epoch": 1.08, + "grad_norm": 4.03947993449126, + "learning_rate": 9.201888910867745e-06, + "loss": 0.9754, + "step": 89760 + }, + { + "epoch": 1.08, + "grad_norm": 5.818184950289821, + "learning_rate": 9.201306597716532e-06, + "loss": 1.2872, + "step": 89763 + }, + { + "epoch": 1.08, + "grad_norm": 6.002654734487332, + "learning_rate": 9.200724287290976e-06, + "loss": 1.4585, + "step": 89766 + }, + { + "epoch": 1.08, + "grad_norm": 18.66770848732039, + "learning_rate": 9.200141979593054e-06, + "loss": 0.9916, + "step": 89769 + }, + { + "epoch": 1.08, + "grad_norm": 8.921456938732847, + "learning_rate": 9.199559674624763e-06, + "loss": 1.5025, + "step": 89772 + }, + { + "epoch": 1.08, + "grad_norm": 6.82911120098199, + "learning_rate": 9.198977372388084e-06, + "loss": 1.1039, + "step": 89775 + }, + { + "epoch": 1.08, + "grad_norm": 21.752515044578942, + "learning_rate": 9.198395072885011e-06, + "loss": 1.0718, + "step": 89778 + }, + { + "epoch": 1.08, + "grad_norm": 22.736017404231475, + "learning_rate": 9.197812776117522e-06, + "loss": 0.849, + "step": 89781 + }, + { + "epoch": 1.08, + "grad_norm": 30.62915221311035, + "learning_rate": 9.197230482087612e-06, + "loss": 1.2459, + "step": 89784 + }, + { + "epoch": 1.08, + "grad_norm": 14.04686152319225, + "learning_rate": 9.196648190797264e-06, + "loss": 0.7621, + "step": 89787 + }, + { + "epoch": 1.08, + "grad_norm": 10.437363752879284, + "learning_rate": 9.196065902248468e-06, + "loss": 1.4253, + "step": 89790 + }, + { + "epoch": 1.08, + "grad_norm": 12.729431092913535, + "learning_rate": 9.19548361644321e-06, + "loss": 1.0905, + "step": 89793 + }, + { + "epoch": 1.08, + "grad_norm": 6.362905858273269, + "learning_rate": 9.194901333383474e-06, + "loss": 0.7924, + "step": 89796 + }, + { + "epoch": 1.08, + "grad_norm": 6.338775051101609, + "learning_rate": 9.194319053071249e-06, + "loss": 1.3582, + "step": 89799 + }, + { + "epoch": 1.08, + "grad_norm": 7.149570489872362, + "learning_rate": 9.193736775508525e-06, + "loss": 1.2137, + "step": 89802 + }, + { + "epoch": 1.08, + "grad_norm": 7.291398470694233, + "learning_rate": 9.19315450069729e-06, + "loss": 1.1956, + "step": 89805 + }, + { + "epoch": 1.08, + "grad_norm": 7.613680945125706, + "learning_rate": 9.192572228639523e-06, + "loss": 1.1662, + "step": 89808 + }, + { + "epoch": 1.08, + "grad_norm": 19.39953901665966, + "learning_rate": 9.191989959337216e-06, + "loss": 1.1845, + "step": 89811 + }, + { + "epoch": 1.08, + "grad_norm": 6.155301386346501, + "learning_rate": 9.19140769279236e-06, + "loss": 1.1827, + "step": 89814 + }, + { + "epoch": 1.08, + "grad_norm": 7.787302782241116, + "learning_rate": 9.190825429006933e-06, + "loss": 1.542, + "step": 89817 + }, + { + "epoch": 1.08, + "grad_norm": 8.217245382026011, + "learning_rate": 9.190243167982934e-06, + "loss": 1.0453, + "step": 89820 + }, + { + "epoch": 1.08, + "grad_norm": 14.272492007380393, + "learning_rate": 9.189660909722339e-06, + "loss": 1.2015, + "step": 89823 + }, + { + "epoch": 1.08, + "grad_norm": 6.980026658478144, + "learning_rate": 9.18907865422714e-06, + "loss": 1.021, + "step": 89826 + }, + { + "epoch": 1.08, + "grad_norm": 6.504382267726964, + "learning_rate": 9.188496401499324e-06, + "loss": 1.346, + "step": 89829 + }, + { + "epoch": 1.08, + "grad_norm": 29.33585817776279, + "learning_rate": 9.18791415154088e-06, + "loss": 1.3526, + "step": 89832 + }, + { + "epoch": 1.08, + "grad_norm": 27.276305693500575, + "learning_rate": 9.187331904353789e-06, + "loss": 1.0267, + "step": 89835 + }, + { + "epoch": 1.08, + "grad_norm": 12.315148090273317, + "learning_rate": 9.186749659940043e-06, + "loss": 1.4394, + "step": 89838 + }, + { + "epoch": 1.08, + "grad_norm": 5.536486904746148, + "learning_rate": 9.186167418301624e-06, + "loss": 1.3397, + "step": 89841 + }, + { + "epoch": 1.08, + "grad_norm": 3.9067157463530013, + "learning_rate": 9.185585179440526e-06, + "loss": 1.2465, + "step": 89844 + }, + { + "epoch": 1.08, + "grad_norm": 33.567667002926726, + "learning_rate": 9.185002943358735e-06, + "loss": 0.9469, + "step": 89847 + }, + { + "epoch": 1.08, + "grad_norm": 12.467190230072267, + "learning_rate": 9.18442071005823e-06, + "loss": 1.3024, + "step": 89850 + }, + { + "epoch": 1.08, + "grad_norm": 16.26057721938739, + "learning_rate": 9.183838479541004e-06, + "loss": 1.2564, + "step": 89853 + }, + { + "epoch": 1.08, + "grad_norm": 7.414236584069428, + "learning_rate": 9.183256251809047e-06, + "loss": 1.1926, + "step": 89856 + }, + { + "epoch": 1.08, + "grad_norm": 18.007674816564293, + "learning_rate": 9.182674026864343e-06, + "loss": 1.2273, + "step": 89859 + }, + { + "epoch": 1.08, + "grad_norm": 11.82551485377053, + "learning_rate": 9.182091804708873e-06, + "loss": 1.0239, + "step": 89862 + }, + { + "epoch": 1.08, + "grad_norm": 8.807175902582806, + "learning_rate": 9.181509585344629e-06, + "loss": 1.2685, + "step": 89865 + }, + { + "epoch": 1.08, + "grad_norm": 5.952798731978603, + "learning_rate": 9.180927368773601e-06, + "loss": 1.1841, + "step": 89868 + }, + { + "epoch": 1.08, + "grad_norm": 4.417722961475974, + "learning_rate": 9.18034515499777e-06, + "loss": 1.2467, + "step": 89871 + }, + { + "epoch": 1.08, + "grad_norm": 17.982334755996437, + "learning_rate": 9.17976294401913e-06, + "loss": 1.0857, + "step": 89874 + }, + { + "epoch": 1.08, + "grad_norm": 2.923895699301868, + "learning_rate": 9.179180735839661e-06, + "loss": 1.3854, + "step": 89877 + }, + { + "epoch": 1.08, + "grad_norm": 69.74432020753815, + "learning_rate": 9.178598530461353e-06, + "loss": 1.1115, + "step": 89880 + }, + { + "epoch": 1.08, + "grad_norm": 5.009832334034481, + "learning_rate": 9.178016327886191e-06, + "loss": 1.1853, + "step": 89883 + }, + { + "epoch": 1.08, + "grad_norm": 6.717982292400094, + "learning_rate": 9.177434128116167e-06, + "loss": 1.2684, + "step": 89886 + }, + { + "epoch": 1.08, + "grad_norm": 3.3084974947841865, + "learning_rate": 9.176851931153263e-06, + "loss": 1.5745, + "step": 89889 + }, + { + "epoch": 1.08, + "grad_norm": 3.529187098334392, + "learning_rate": 9.176269736999463e-06, + "loss": 1.2667, + "step": 89892 + }, + { + "epoch": 1.08, + "grad_norm": 19.722842784425136, + "learning_rate": 9.175687545656758e-06, + "loss": 1.1935, + "step": 89895 + }, + { + "epoch": 1.08, + "grad_norm": 14.785289711216485, + "learning_rate": 9.175105357127139e-06, + "loss": 1.33, + "step": 89898 + }, + { + "epoch": 1.08, + "grad_norm": 6.315663788105738, + "learning_rate": 9.174523171412588e-06, + "loss": 1.0963, + "step": 89901 + }, + { + "epoch": 1.08, + "grad_norm": 6.100192648449498, + "learning_rate": 9.173940988515088e-06, + "loss": 1.0749, + "step": 89904 + }, + { + "epoch": 1.08, + "grad_norm": 6.326672150049339, + "learning_rate": 9.17335880843663e-06, + "loss": 1.1462, + "step": 89907 + }, + { + "epoch": 1.08, + "grad_norm": 4.057095293228827, + "learning_rate": 9.172776631179203e-06, + "loss": 1.0287, + "step": 89910 + }, + { + "epoch": 1.08, + "grad_norm": 19.439158467841786, + "learning_rate": 9.172194456744794e-06, + "loss": 1.3319, + "step": 89913 + }, + { + "epoch": 1.08, + "grad_norm": 6.133580625287186, + "learning_rate": 9.171612285135383e-06, + "loss": 1.2664, + "step": 89916 + }, + { + "epoch": 1.08, + "grad_norm": 9.798455417036065, + "learning_rate": 9.17103011635296e-06, + "loss": 1.3825, + "step": 89919 + }, + { + "epoch": 1.08, + "grad_norm": 15.323377156201069, + "learning_rate": 9.170447950399515e-06, + "loss": 0.9589, + "step": 89922 + }, + { + "epoch": 1.08, + "grad_norm": 8.281898401338648, + "learning_rate": 9.16986578727703e-06, + "loss": 1.1451, + "step": 89925 + }, + { + "epoch": 1.08, + "grad_norm": 14.34432591606182, + "learning_rate": 9.169283626987498e-06, + "loss": 1.3109, + "step": 89928 + }, + { + "epoch": 1.08, + "grad_norm": 7.537808755638396, + "learning_rate": 9.1687014695329e-06, + "loss": 1.3892, + "step": 89931 + }, + { + "epoch": 1.08, + "grad_norm": 10.229562554116931, + "learning_rate": 9.168119314915223e-06, + "loss": 1.0236, + "step": 89934 + }, + { + "epoch": 1.08, + "grad_norm": 11.656478041323693, + "learning_rate": 9.167537163136455e-06, + "loss": 1.1518, + "step": 89937 + }, + { + "epoch": 1.08, + "grad_norm": 4.984798035686343, + "learning_rate": 9.166955014198587e-06, + "loss": 1.1794, + "step": 89940 + }, + { + "epoch": 1.08, + "grad_norm": 17.287342773375883, + "learning_rate": 9.1663728681036e-06, + "loss": 1.2882, + "step": 89943 + }, + { + "epoch": 1.08, + "grad_norm": 7.042213033818246, + "learning_rate": 9.165790724853478e-06, + "loss": 1.2845, + "step": 89946 + }, + { + "epoch": 1.08, + "grad_norm": 14.887426781015519, + "learning_rate": 9.165208584450213e-06, + "loss": 1.0691, + "step": 89949 + }, + { + "epoch": 1.08, + "grad_norm": 10.141517476689184, + "learning_rate": 9.164626446895792e-06, + "loss": 1.413, + "step": 89952 + }, + { + "epoch": 1.08, + "grad_norm": 7.023297479359792, + "learning_rate": 9.164044312192203e-06, + "loss": 1.0126, + "step": 89955 + }, + { + "epoch": 1.08, + "grad_norm": 3.2093296127721875, + "learning_rate": 9.163462180341425e-06, + "loss": 0.9562, + "step": 89958 + }, + { + "epoch": 1.08, + "grad_norm": 8.844385011052356, + "learning_rate": 9.16288005134545e-06, + "loss": 1.297, + "step": 89961 + }, + { + "epoch": 1.08, + "grad_norm": 4.022369725067203, + "learning_rate": 9.162297925206266e-06, + "loss": 1.428, + "step": 89964 + }, + { + "epoch": 1.08, + "grad_norm": 19.63530747840167, + "learning_rate": 9.161715801925858e-06, + "loss": 1.2223, + "step": 89967 + }, + { + "epoch": 1.08, + "grad_norm": 12.862820455252741, + "learning_rate": 9.161133681506207e-06, + "loss": 1.4855, + "step": 89970 + }, + { + "epoch": 1.08, + "grad_norm": 6.814027374478658, + "learning_rate": 9.16055156394931e-06, + "loss": 1.241, + "step": 89973 + }, + { + "epoch": 1.08, + "grad_norm": 3.134624668873251, + "learning_rate": 9.159969449257143e-06, + "loss": 1.2258, + "step": 89976 + }, + { + "epoch": 1.08, + "grad_norm": 10.243362375192598, + "learning_rate": 9.1593873374317e-06, + "loss": 0.8763, + "step": 89979 + }, + { + "epoch": 1.08, + "grad_norm": 13.657082396843563, + "learning_rate": 9.158805228474967e-06, + "loss": 1.2322, + "step": 89982 + }, + { + "epoch": 1.08, + "grad_norm": 5.278983747458231, + "learning_rate": 9.15822312238893e-06, + "loss": 1.4036, + "step": 89985 + }, + { + "epoch": 1.08, + "grad_norm": 12.43889006380608, + "learning_rate": 9.15764101917557e-06, + "loss": 1.2882, + "step": 89988 + }, + { + "epoch": 1.08, + "grad_norm": 10.177346565109508, + "learning_rate": 9.157058918836878e-06, + "loss": 1.1746, + "step": 89991 + }, + { + "epoch": 1.08, + "grad_norm": 30.91726618900137, + "learning_rate": 9.156476821374843e-06, + "loss": 1.306, + "step": 89994 + }, + { + "epoch": 1.08, + "grad_norm": 5.5142600791975935, + "learning_rate": 9.155894726791449e-06, + "loss": 1.3458, + "step": 89997 + }, + { + "epoch": 1.08, + "grad_norm": 5.517839243982771, + "learning_rate": 9.15531263508868e-06, + "loss": 1.4348, + "step": 90000 + }, + { + "epoch": 1.08, + "grad_norm": 2.8503360021049797, + "learning_rate": 9.154730546268523e-06, + "loss": 1.2923, + "step": 90003 + }, + { + "epoch": 1.08, + "grad_norm": 14.69572397668834, + "learning_rate": 9.15414846033297e-06, + "loss": 1.0175, + "step": 90006 + }, + { + "epoch": 1.08, + "grad_norm": 12.171400124661277, + "learning_rate": 9.153566377284004e-06, + "loss": 1.459, + "step": 90009 + }, + { + "epoch": 1.08, + "grad_norm": 17.77386513353285, + "learning_rate": 9.15298429712361e-06, + "loss": 1.3509, + "step": 90012 + }, + { + "epoch": 1.08, + "grad_norm": 4.140203287083574, + "learning_rate": 9.152402219853772e-06, + "loss": 1.3773, + "step": 90015 + }, + { + "epoch": 1.08, + "grad_norm": 7.909384303201182, + "learning_rate": 9.151820145476485e-06, + "loss": 1.4038, + "step": 90018 + }, + { + "epoch": 1.08, + "grad_norm": 8.695778975888995, + "learning_rate": 9.15123807399373e-06, + "loss": 1.0406, + "step": 90021 + }, + { + "epoch": 1.08, + "grad_norm": 25.030072835398183, + "learning_rate": 9.15065600540749e-06, + "loss": 1.0483, + "step": 90024 + }, + { + "epoch": 1.08, + "grad_norm": 4.999816944880091, + "learning_rate": 9.150073939719758e-06, + "loss": 1.1953, + "step": 90027 + }, + { + "epoch": 1.08, + "grad_norm": 11.744831219673404, + "learning_rate": 9.149491876932516e-06, + "loss": 1.3398, + "step": 90030 + }, + { + "epoch": 1.08, + "grad_norm": 29.251400970315853, + "learning_rate": 9.14890981704775e-06, + "loss": 0.943, + "step": 90033 + }, + { + "epoch": 1.08, + "grad_norm": 10.056798013795868, + "learning_rate": 9.148327760067454e-06, + "loss": 1.5297, + "step": 90036 + }, + { + "epoch": 1.08, + "grad_norm": 3.7493759825439508, + "learning_rate": 9.147745705993605e-06, + "loss": 1.3683, + "step": 90039 + }, + { + "epoch": 1.08, + "grad_norm": 3.4040097063059642, + "learning_rate": 9.147163654828191e-06, + "loss": 1.7281, + "step": 90042 + }, + { + "epoch": 1.08, + "grad_norm": 7.973377356972806, + "learning_rate": 9.146581606573202e-06, + "loss": 1.2619, + "step": 90045 + }, + { + "epoch": 1.08, + "grad_norm": 15.554666563865574, + "learning_rate": 9.145999561230625e-06, + "loss": 1.0639, + "step": 90048 + }, + { + "epoch": 1.08, + "grad_norm": 6.554057542613621, + "learning_rate": 9.145417518802443e-06, + "loss": 0.9478, + "step": 90051 + }, + { + "epoch": 1.08, + "grad_norm": 27.104927264511375, + "learning_rate": 9.14483547929064e-06, + "loss": 1.645, + "step": 90054 + }, + { + "epoch": 1.08, + "grad_norm": 2.892840887486808, + "learning_rate": 9.144253442697206e-06, + "loss": 1.4448, + "step": 90057 + }, + { + "epoch": 1.08, + "grad_norm": 7.229626605486757, + "learning_rate": 9.14367140902413e-06, + "loss": 1.0634, + "step": 90060 + }, + { + "epoch": 1.08, + "grad_norm": 16.992695764555055, + "learning_rate": 9.143089378273395e-06, + "loss": 1.0744, + "step": 90063 + }, + { + "epoch": 1.08, + "grad_norm": 9.286277159739448, + "learning_rate": 9.142507350446985e-06, + "loss": 1.3193, + "step": 90066 + }, + { + "epoch": 1.08, + "grad_norm": 107.22111843045039, + "learning_rate": 9.14192532554689e-06, + "loss": 1.3608, + "step": 90069 + }, + { + "epoch": 1.08, + "grad_norm": 60.82740561620539, + "learning_rate": 9.141343303575091e-06, + "loss": 1.2167, + "step": 90072 + }, + { + "epoch": 1.08, + "grad_norm": 5.826180019604495, + "learning_rate": 9.140761284533585e-06, + "loss": 1.0533, + "step": 90075 + }, + { + "epoch": 1.08, + "grad_norm": 3.6071017090638207, + "learning_rate": 9.140179268424345e-06, + "loss": 1.3313, + "step": 90078 + }, + { + "epoch": 1.08, + "grad_norm": 8.409850167047514, + "learning_rate": 9.139597255249367e-06, + "loss": 1.0602, + "step": 90081 + }, + { + "epoch": 1.08, + "grad_norm": 20.136545816303965, + "learning_rate": 9.13901524501063e-06, + "loss": 1.2836, + "step": 90084 + }, + { + "epoch": 1.08, + "grad_norm": 150.11204626187686, + "learning_rate": 9.138433237710124e-06, + "loss": 1.3983, + "step": 90087 + }, + { + "epoch": 1.08, + "grad_norm": 6.375320972911142, + "learning_rate": 9.137851233349842e-06, + "loss": 1.0812, + "step": 90090 + }, + { + "epoch": 1.08, + "grad_norm": 7.356841405633584, + "learning_rate": 9.137269231931758e-06, + "loss": 1.2917, + "step": 90093 + }, + { + "epoch": 1.08, + "grad_norm": 13.16556346921253, + "learning_rate": 9.136687233457864e-06, + "loss": 1.111, + "step": 90096 + }, + { + "epoch": 1.08, + "grad_norm": 25.414165449188754, + "learning_rate": 9.136105237930141e-06, + "loss": 1.1411, + "step": 90099 + }, + { + "epoch": 1.08, + "grad_norm": 7.268082011944324, + "learning_rate": 9.135523245350588e-06, + "loss": 1.2426, + "step": 90102 + }, + { + "epoch": 1.08, + "grad_norm": 9.007121908479556, + "learning_rate": 9.13494125572118e-06, + "loss": 1.1639, + "step": 90105 + }, + { + "epoch": 1.08, + "grad_norm": 13.843260989864307, + "learning_rate": 9.134359269043901e-06, + "loss": 1.0205, + "step": 90108 + }, + { + "epoch": 1.08, + "grad_norm": 42.118906864195786, + "learning_rate": 9.133777285320747e-06, + "loss": 1.3702, + "step": 90111 + }, + { + "epoch": 1.08, + "grad_norm": 7.301109625327795, + "learning_rate": 9.133195304553697e-06, + "loss": 0.8498, + "step": 90114 + }, + { + "epoch": 1.08, + "grad_norm": 13.641404903160065, + "learning_rate": 9.132613326744742e-06, + "loss": 1.1399, + "step": 90117 + }, + { + "epoch": 1.08, + "grad_norm": 7.979853719508621, + "learning_rate": 9.132031351895862e-06, + "loss": 1.1968, + "step": 90120 + }, + { + "epoch": 1.08, + "grad_norm": 16.462637749747692, + "learning_rate": 9.13144938000905e-06, + "loss": 1.033, + "step": 90123 + }, + { + "epoch": 1.08, + "grad_norm": 36.69449464352989, + "learning_rate": 9.130867411086283e-06, + "loss": 1.6215, + "step": 90126 + }, + { + "epoch": 1.08, + "grad_norm": 11.79905030537751, + "learning_rate": 9.130285445129558e-06, + "loss": 1.1673, + "step": 90129 + }, + { + "epoch": 1.08, + "grad_norm": 6.784096703682611, + "learning_rate": 9.129703482140852e-06, + "loss": 1.5984, + "step": 90132 + }, + { + "epoch": 1.08, + "grad_norm": 25.658984881937727, + "learning_rate": 9.129121522122157e-06, + "loss": 1.2148, + "step": 90135 + }, + { + "epoch": 1.08, + "grad_norm": 7.516264587855809, + "learning_rate": 9.128539565075453e-06, + "loss": 1.165, + "step": 90138 + }, + { + "epoch": 1.08, + "grad_norm": 3.2315162047759483, + "learning_rate": 9.12795761100273e-06, + "loss": 1.0618, + "step": 90141 + }, + { + "epoch": 1.08, + "grad_norm": 9.86274713589164, + "learning_rate": 9.127375659905976e-06, + "loss": 1.1023, + "step": 90144 + }, + { + "epoch": 1.08, + "grad_norm": 3.4938412792877593, + "learning_rate": 9.126793711787176e-06, + "loss": 1.7621, + "step": 90147 + }, + { + "epoch": 1.08, + "grad_norm": 7.631731939229492, + "learning_rate": 9.12621176664831e-06, + "loss": 1.348, + "step": 90150 + }, + { + "epoch": 1.08, + "grad_norm": 10.207215257740158, + "learning_rate": 9.125629824491372e-06, + "loss": 1.2096, + "step": 90153 + }, + { + "epoch": 1.08, + "grad_norm": 9.083405206818624, + "learning_rate": 9.125047885318344e-06, + "loss": 1.2584, + "step": 90156 + }, + { + "epoch": 1.08, + "grad_norm": 10.768974892264456, + "learning_rate": 9.124465949131212e-06, + "loss": 1.486, + "step": 90159 + }, + { + "epoch": 1.08, + "grad_norm": 17.36601394564647, + "learning_rate": 9.12388401593196e-06, + "loss": 1.1966, + "step": 90162 + }, + { + "epoch": 1.08, + "grad_norm": 2.765333478973029, + "learning_rate": 9.12330208572258e-06, + "loss": 0.9121, + "step": 90165 + }, + { + "epoch": 1.08, + "grad_norm": 30.697752192623597, + "learning_rate": 9.122720158505051e-06, + "loss": 0.9945, + "step": 90168 + }, + { + "epoch": 1.08, + "grad_norm": 10.530187839588441, + "learning_rate": 9.122138234281366e-06, + "loss": 0.9198, + "step": 90171 + }, + { + "epoch": 1.08, + "grad_norm": 7.23452787620084, + "learning_rate": 9.121556313053503e-06, + "loss": 1.113, + "step": 90174 + }, + { + "epoch": 1.08, + "grad_norm": 43.438872354045024, + "learning_rate": 9.120974394823455e-06, + "loss": 1.1062, + "step": 90177 + }, + { + "epoch": 1.08, + "grad_norm": 4.486062827887406, + "learning_rate": 9.1203924795932e-06, + "loss": 1.1007, + "step": 90180 + }, + { + "epoch": 1.08, + "grad_norm": 18.87361293390727, + "learning_rate": 9.119810567364736e-06, + "loss": 0.9152, + "step": 90183 + }, + { + "epoch": 1.08, + "grad_norm": 7.090239070538091, + "learning_rate": 9.119228658140035e-06, + "loss": 1.4633, + "step": 90186 + }, + { + "epoch": 1.08, + "grad_norm": 16.362751589426626, + "learning_rate": 9.118646751921092e-06, + "loss": 1.238, + "step": 90189 + }, + { + "epoch": 1.08, + "grad_norm": 3.3461314104844573, + "learning_rate": 9.11806484870989e-06, + "loss": 1.079, + "step": 90192 + }, + { + "epoch": 1.08, + "grad_norm": 3.5385900503141743, + "learning_rate": 9.117482948508412e-06, + "loss": 1.2934, + "step": 90195 + }, + { + "epoch": 1.08, + "grad_norm": 7.5513444875167215, + "learning_rate": 9.116901051318653e-06, + "loss": 1.2699, + "step": 90198 + }, + { + "epoch": 1.08, + "grad_norm": 9.774836563175008, + "learning_rate": 9.116319157142589e-06, + "loss": 1.4498, + "step": 90201 + }, + { + "epoch": 1.08, + "grad_norm": 11.698455648975436, + "learning_rate": 9.115737265982209e-06, + "loss": 0.7933, + "step": 90204 + }, + { + "epoch": 1.08, + "grad_norm": 12.401717611821866, + "learning_rate": 9.1151553778395e-06, + "loss": 1.2727, + "step": 90207 + }, + { + "epoch": 1.08, + "grad_norm": 6.308627116385388, + "learning_rate": 9.11457349271645e-06, + "loss": 1.0808, + "step": 90210 + }, + { + "epoch": 1.08, + "grad_norm": 5.352535269341761, + "learning_rate": 9.113991610615037e-06, + "loss": 1.226, + "step": 90213 + }, + { + "epoch": 1.08, + "grad_norm": 5.995494644299637, + "learning_rate": 9.113409731537252e-06, + "loss": 0.8706, + "step": 90216 + }, + { + "epoch": 1.08, + "grad_norm": 5.61445776294096, + "learning_rate": 9.112827855485082e-06, + "loss": 1.5606, + "step": 90219 + }, + { + "epoch": 1.08, + "grad_norm": 10.8111941148317, + "learning_rate": 9.112245982460509e-06, + "loss": 1.2015, + "step": 90222 + }, + { + "epoch": 1.08, + "grad_norm": 11.538136705591086, + "learning_rate": 9.111664112465524e-06, + "loss": 1.0601, + "step": 90225 + }, + { + "epoch": 1.08, + "grad_norm": 13.363155129339361, + "learning_rate": 9.111082245502106e-06, + "loss": 1.1562, + "step": 90228 + }, + { + "epoch": 1.08, + "grad_norm": 44.80862457217331, + "learning_rate": 9.110500381572248e-06, + "loss": 1.0778, + "step": 90231 + }, + { + "epoch": 1.09, + "grad_norm": 13.290511708789367, + "learning_rate": 9.109918520677925e-06, + "loss": 1.4967, + "step": 90234 + }, + { + "epoch": 1.09, + "grad_norm": 3.4111174840988774, + "learning_rate": 9.109336662821138e-06, + "loss": 1.1602, + "step": 90237 + }, + { + "epoch": 1.09, + "grad_norm": 9.553669537861682, + "learning_rate": 9.108754808003859e-06, + "loss": 1.3238, + "step": 90240 + }, + { + "epoch": 1.09, + "grad_norm": 11.266808691545043, + "learning_rate": 9.10817295622808e-06, + "loss": 1.287, + "step": 90243 + }, + { + "epoch": 1.09, + "grad_norm": 3.359826069872179, + "learning_rate": 9.107591107495784e-06, + "loss": 1.0139, + "step": 90246 + }, + { + "epoch": 1.09, + "grad_norm": 7.16698389133419, + "learning_rate": 9.107009261808959e-06, + "loss": 1.3828, + "step": 90249 + }, + { + "epoch": 1.09, + "grad_norm": 4.606173941960671, + "learning_rate": 9.10642741916959e-06, + "loss": 1.0908, + "step": 90252 + }, + { + "epoch": 1.09, + "grad_norm": 9.152708105269465, + "learning_rate": 9.105845579579665e-06, + "loss": 1.1692, + "step": 90255 + }, + { + "epoch": 1.09, + "grad_norm": 12.819181596875497, + "learning_rate": 9.105263743041162e-06, + "loss": 1.1357, + "step": 90258 + }, + { + "epoch": 1.09, + "grad_norm": 10.037220000952331, + "learning_rate": 9.104681909556075e-06, + "loss": 1.2982, + "step": 90261 + }, + { + "epoch": 1.09, + "grad_norm": 15.02291085248569, + "learning_rate": 9.104100079126387e-06, + "loss": 1.307, + "step": 90264 + }, + { + "epoch": 1.09, + "grad_norm": 8.217720750461133, + "learning_rate": 9.10351825175408e-06, + "loss": 1.0429, + "step": 90267 + }, + { + "epoch": 1.09, + "grad_norm": 9.324742291957199, + "learning_rate": 9.102936427441142e-06, + "loss": 1.3447, + "step": 90270 + }, + { + "epoch": 1.09, + "grad_norm": 6.87545509188531, + "learning_rate": 9.102354606189561e-06, + "loss": 1.2333, + "step": 90273 + }, + { + "epoch": 1.09, + "grad_norm": 5.71032167582111, + "learning_rate": 9.101772788001317e-06, + "loss": 1.1973, + "step": 90276 + }, + { + "epoch": 1.09, + "grad_norm": 25.94181011883036, + "learning_rate": 9.101190972878404e-06, + "loss": 1.2301, + "step": 90279 + }, + { + "epoch": 1.09, + "grad_norm": 6.8296033608954865, + "learning_rate": 9.100609160822798e-06, + "loss": 0.9102, + "step": 90282 + }, + { + "epoch": 1.09, + "grad_norm": 13.232227166936244, + "learning_rate": 9.100027351836492e-06, + "loss": 1.1442, + "step": 90285 + }, + { + "epoch": 1.09, + "grad_norm": 13.116226664819262, + "learning_rate": 9.099445545921465e-06, + "loss": 1.068, + "step": 90288 + }, + { + "epoch": 1.09, + "grad_norm": 12.108392336381455, + "learning_rate": 9.098863743079711e-06, + "loss": 1.2488, + "step": 90291 + }, + { + "epoch": 1.09, + "grad_norm": 4.413456573951665, + "learning_rate": 9.098281943313207e-06, + "loss": 0.8326, + "step": 90294 + }, + { + "epoch": 1.09, + "grad_norm": 7.775978625682672, + "learning_rate": 9.097700146623944e-06, + "loss": 1.1126, + "step": 90297 + }, + { + "epoch": 1.09, + "grad_norm": 5.518472618757126, + "learning_rate": 9.097118353013902e-06, + "loss": 1.2635, + "step": 90300 + }, + { + "epoch": 1.09, + "grad_norm": 11.441708897614538, + "learning_rate": 9.096536562485072e-06, + "loss": 1.2297, + "step": 90303 + }, + { + "epoch": 1.09, + "grad_norm": 58.525274321900184, + "learning_rate": 9.095954775039441e-06, + "loss": 1.578, + "step": 90306 + }, + { + "epoch": 1.09, + "grad_norm": 6.785392931439679, + "learning_rate": 9.095372990678984e-06, + "loss": 1.2094, + "step": 90309 + }, + { + "epoch": 1.09, + "grad_norm": 7.803011399229446, + "learning_rate": 9.094791209405696e-06, + "loss": 1.3335, + "step": 90312 + }, + { + "epoch": 1.09, + "grad_norm": 25.38648023264346, + "learning_rate": 9.09420943122156e-06, + "loss": 1.0727, + "step": 90315 + }, + { + "epoch": 1.09, + "grad_norm": 4.05556223481955, + "learning_rate": 9.093627656128563e-06, + "loss": 1.4223, + "step": 90318 + }, + { + "epoch": 1.09, + "grad_norm": 45.28809039259894, + "learning_rate": 9.093045884128685e-06, + "loss": 1.3006, + "step": 90321 + }, + { + "epoch": 1.09, + "grad_norm": 17.546544309657556, + "learning_rate": 9.092464115223914e-06, + "loss": 1.1435, + "step": 90324 + }, + { + "epoch": 1.09, + "grad_norm": 10.823958917096919, + "learning_rate": 9.091882349416238e-06, + "loss": 1.076, + "step": 90327 + }, + { + "epoch": 1.09, + "grad_norm": 9.579981953852043, + "learning_rate": 9.091300586707637e-06, + "loss": 0.9743, + "step": 90330 + }, + { + "epoch": 1.09, + "grad_norm": 14.294404403189114, + "learning_rate": 9.090718827100107e-06, + "loss": 1.2187, + "step": 90333 + }, + { + "epoch": 1.09, + "grad_norm": 3.5634724309684125, + "learning_rate": 9.09013707059562e-06, + "loss": 0.9992, + "step": 90336 + }, + { + "epoch": 1.09, + "grad_norm": 6.934209126078252, + "learning_rate": 9.08955531719617e-06, + "loss": 0.8847, + "step": 90339 + }, + { + "epoch": 1.09, + "grad_norm": 4.713144531227437, + "learning_rate": 9.088973566903739e-06, + "loss": 1.3675, + "step": 90342 + }, + { + "epoch": 1.09, + "grad_norm": 16.469810857386538, + "learning_rate": 9.088391819720315e-06, + "loss": 0.7807, + "step": 90345 + }, + { + "epoch": 1.09, + "grad_norm": 15.545776395565932, + "learning_rate": 9.087810075647881e-06, + "loss": 1.3929, + "step": 90348 + }, + { + "epoch": 1.09, + "grad_norm": 8.456193844195935, + "learning_rate": 9.087228334688418e-06, + "loss": 1.4412, + "step": 90351 + }, + { + "epoch": 1.09, + "grad_norm": 5.946639214157132, + "learning_rate": 9.086646596843919e-06, + "loss": 1.2137, + "step": 90354 + }, + { + "epoch": 1.09, + "grad_norm": 15.473500673294536, + "learning_rate": 9.086064862116365e-06, + "loss": 1.4405, + "step": 90357 + }, + { + "epoch": 1.09, + "grad_norm": 6.4340980800887495, + "learning_rate": 9.085483130507748e-06, + "loss": 0.8874, + "step": 90360 + }, + { + "epoch": 1.09, + "grad_norm": 13.923382420392315, + "learning_rate": 9.084901402020042e-06, + "loss": 1.5299, + "step": 90363 + }, + { + "epoch": 1.09, + "grad_norm": 7.489474918855169, + "learning_rate": 9.084319676655237e-06, + "loss": 0.8568, + "step": 90366 + }, + { + "epoch": 1.09, + "grad_norm": 13.031718800827793, + "learning_rate": 9.083737954415322e-06, + "loss": 1.2242, + "step": 90369 + }, + { + "epoch": 1.09, + "grad_norm": 7.550426717887429, + "learning_rate": 9.083156235302283e-06, + "loss": 1.0371, + "step": 90372 + }, + { + "epoch": 1.09, + "grad_norm": 5.230045046073648, + "learning_rate": 9.082574519318094e-06, + "loss": 1.2788, + "step": 90375 + }, + { + "epoch": 1.09, + "grad_norm": 9.908208874195267, + "learning_rate": 9.081992806464752e-06, + "loss": 1.2752, + "step": 90378 + }, + { + "epoch": 1.09, + "grad_norm": 10.412657185419656, + "learning_rate": 9.081411096744237e-06, + "loss": 0.9329, + "step": 90381 + }, + { + "epoch": 1.09, + "grad_norm": 48.078837736324836, + "learning_rate": 9.080829390158533e-06, + "loss": 1.3318, + "step": 90384 + }, + { + "epoch": 1.09, + "grad_norm": 7.6184879636902405, + "learning_rate": 9.080247686709631e-06, + "loss": 1.1974, + "step": 90387 + }, + { + "epoch": 1.09, + "grad_norm": 14.886151153182858, + "learning_rate": 9.079665986399511e-06, + "loss": 1.1765, + "step": 90390 + }, + { + "epoch": 1.09, + "grad_norm": 6.585078908615171, + "learning_rate": 9.079084289230158e-06, + "loss": 0.8182, + "step": 90393 + }, + { + "epoch": 1.09, + "grad_norm": 9.13552242325569, + "learning_rate": 9.078502595203557e-06, + "loss": 1.2729, + "step": 90396 + }, + { + "epoch": 1.09, + "grad_norm": 9.864639444090287, + "learning_rate": 9.077920904321702e-06, + "loss": 0.9385, + "step": 90399 + }, + { + "epoch": 1.09, + "grad_norm": 6.830116058492362, + "learning_rate": 9.077339216586566e-06, + "loss": 1.4666, + "step": 90402 + }, + { + "epoch": 1.09, + "grad_norm": 21.38624764135244, + "learning_rate": 9.076757532000137e-06, + "loss": 1.1594, + "step": 90405 + }, + { + "epoch": 1.09, + "grad_norm": 3.1570516227794174, + "learning_rate": 9.076175850564403e-06, + "loss": 1.2136, + "step": 90408 + }, + { + "epoch": 1.09, + "grad_norm": 25.37830113105421, + "learning_rate": 9.07559417228135e-06, + "loss": 1.2799, + "step": 90411 + }, + { + "epoch": 1.09, + "grad_norm": 11.970446010141627, + "learning_rate": 9.075012497152963e-06, + "loss": 0.8949, + "step": 90414 + }, + { + "epoch": 1.09, + "grad_norm": 4.85246420327987, + "learning_rate": 9.074430825181221e-06, + "loss": 1.0082, + "step": 90417 + }, + { + "epoch": 1.09, + "grad_norm": 70.81440463174866, + "learning_rate": 9.073849156368113e-06, + "loss": 1.2667, + "step": 90420 + }, + { + "epoch": 1.09, + "grad_norm": 36.655242470159294, + "learning_rate": 9.073267490715627e-06, + "loss": 1.104, + "step": 90423 + }, + { + "epoch": 1.09, + "grad_norm": 11.247708255724332, + "learning_rate": 9.072685828225747e-06, + "loss": 1.5485, + "step": 90426 + }, + { + "epoch": 1.09, + "grad_norm": 10.506645375477625, + "learning_rate": 9.07210416890045e-06, + "loss": 1.1786, + "step": 90429 + }, + { + "epoch": 1.09, + "grad_norm": 4.675172100018446, + "learning_rate": 9.071522512741731e-06, + "loss": 1.1092, + "step": 90432 + }, + { + "epoch": 1.09, + "grad_norm": 3.750953796370493, + "learning_rate": 9.070940859751571e-06, + "loss": 1.1427, + "step": 90435 + }, + { + "epoch": 1.09, + "grad_norm": 17.96248128971269, + "learning_rate": 9.070359209931954e-06, + "loss": 1.3802, + "step": 90438 + }, + { + "epoch": 1.09, + "grad_norm": 3.410364375426827, + "learning_rate": 9.06977756328487e-06, + "loss": 1.3331, + "step": 90441 + }, + { + "epoch": 1.09, + "grad_norm": 33.684346295611476, + "learning_rate": 9.069195919812297e-06, + "loss": 1.4055, + "step": 90444 + }, + { + "epoch": 1.09, + "grad_norm": 3.9144189860300753, + "learning_rate": 9.068614279516222e-06, + "loss": 1.0829, + "step": 90447 + }, + { + "epoch": 1.09, + "grad_norm": 20.904291283850565, + "learning_rate": 9.068032642398631e-06, + "loss": 1.2413, + "step": 90450 + }, + { + "epoch": 1.09, + "grad_norm": 10.164436484354594, + "learning_rate": 9.067451008461514e-06, + "loss": 1.4159, + "step": 90453 + }, + { + "epoch": 1.09, + "grad_norm": 15.45104853912788, + "learning_rate": 9.066869377706846e-06, + "loss": 1.1107, + "step": 90456 + }, + { + "epoch": 1.09, + "grad_norm": 7.978358519235992, + "learning_rate": 9.066287750136616e-06, + "loss": 0.8832, + "step": 90459 + }, + { + "epoch": 1.09, + "grad_norm": 10.510041996678295, + "learning_rate": 9.06570612575281e-06, + "loss": 1.2175, + "step": 90462 + }, + { + "epoch": 1.09, + "grad_norm": 10.25691727381504, + "learning_rate": 9.065124504557414e-06, + "loss": 1.2853, + "step": 90465 + }, + { + "epoch": 1.09, + "grad_norm": 8.206052429900586, + "learning_rate": 9.064542886552414e-06, + "loss": 1.1827, + "step": 90468 + }, + { + "epoch": 1.09, + "grad_norm": 4.459692664650586, + "learning_rate": 9.063961271739789e-06, + "loss": 1.1053, + "step": 90471 + }, + { + "epoch": 1.09, + "grad_norm": 9.792612411304024, + "learning_rate": 9.063379660121525e-06, + "loss": 0.9874, + "step": 90474 + }, + { + "epoch": 1.09, + "grad_norm": 5.1826159319098135, + "learning_rate": 9.062798051699612e-06, + "loss": 1.1364, + "step": 90477 + }, + { + "epoch": 1.09, + "grad_norm": 10.969272557073854, + "learning_rate": 9.062216446476035e-06, + "loss": 1.3246, + "step": 90480 + }, + { + "epoch": 1.09, + "grad_norm": 14.275672876294712, + "learning_rate": 9.061634844452769e-06, + "loss": 1.0836, + "step": 90483 + }, + { + "epoch": 1.09, + "grad_norm": 36.46149318103197, + "learning_rate": 9.061053245631808e-06, + "loss": 1.4052, + "step": 90486 + }, + { + "epoch": 1.09, + "grad_norm": 11.154155380885706, + "learning_rate": 9.060471650015132e-06, + "loss": 0.9196, + "step": 90489 + }, + { + "epoch": 1.09, + "grad_norm": 5.8835989288766095, + "learning_rate": 9.059890057604727e-06, + "loss": 1.151, + "step": 90492 + }, + { + "epoch": 1.09, + "grad_norm": 3.0947533448321596, + "learning_rate": 9.059308468402584e-06, + "loss": 1.132, + "step": 90495 + }, + { + "epoch": 1.09, + "grad_norm": 10.103376867481682, + "learning_rate": 9.05872688241068e-06, + "loss": 1.0016, + "step": 90498 + }, + { + "epoch": 1.09, + "grad_norm": 8.184660654654213, + "learning_rate": 9.058145299631e-06, + "loss": 1.057, + "step": 90501 + }, + { + "epoch": 1.09, + "grad_norm": 9.237994368885293, + "learning_rate": 9.057563720065532e-06, + "loss": 1.3256, + "step": 90504 + }, + { + "epoch": 1.09, + "grad_norm": 48.71581133469654, + "learning_rate": 9.056982143716262e-06, + "loss": 0.9826, + "step": 90507 + }, + { + "epoch": 1.09, + "grad_norm": 13.33274924962153, + "learning_rate": 9.056400570585171e-06, + "loss": 1.1432, + "step": 90510 + }, + { + "epoch": 1.09, + "grad_norm": 9.671552782413166, + "learning_rate": 9.055819000674243e-06, + "loss": 1.4869, + "step": 90513 + }, + { + "epoch": 1.09, + "grad_norm": 20.32507898433826, + "learning_rate": 9.055237433985464e-06, + "loss": 1.2276, + "step": 90516 + }, + { + "epoch": 1.09, + "grad_norm": 18.948188152540073, + "learning_rate": 9.054655870520822e-06, + "loss": 1.5506, + "step": 90519 + }, + { + "epoch": 1.09, + "grad_norm": 9.424709199754668, + "learning_rate": 9.054074310282301e-06, + "loss": 1.3415, + "step": 90522 + }, + { + "epoch": 1.09, + "grad_norm": 19.72318638501903, + "learning_rate": 9.053492753271879e-06, + "loss": 1.0842, + "step": 90525 + }, + { + "epoch": 1.09, + "grad_norm": 11.454787964600195, + "learning_rate": 9.052911199491549e-06, + "loss": 1.2162, + "step": 90528 + }, + { + "epoch": 1.09, + "grad_norm": 19.988229013651488, + "learning_rate": 9.052329648943287e-06, + "loss": 0.7574, + "step": 90531 + }, + { + "epoch": 1.09, + "grad_norm": 12.088084957787645, + "learning_rate": 9.05174810162909e-06, + "loss": 1.3151, + "step": 90534 + }, + { + "epoch": 1.09, + "grad_norm": 40.56399470554746, + "learning_rate": 9.05116655755093e-06, + "loss": 1.1622, + "step": 90537 + }, + { + "epoch": 1.09, + "grad_norm": 15.879252744645775, + "learning_rate": 9.050585016710796e-06, + "loss": 1.3945, + "step": 90540 + }, + { + "epoch": 1.09, + "grad_norm": 11.472477302950633, + "learning_rate": 9.050003479110674e-06, + "loss": 1.2073, + "step": 90543 + }, + { + "epoch": 1.09, + "grad_norm": 27.744687175922362, + "learning_rate": 9.049421944752547e-06, + "loss": 1.37, + "step": 90546 + }, + { + "epoch": 1.09, + "grad_norm": 9.064962958908188, + "learning_rate": 9.048840413638406e-06, + "loss": 1.5302, + "step": 90549 + }, + { + "epoch": 1.09, + "grad_norm": 27.030627235227108, + "learning_rate": 9.048258885770227e-06, + "loss": 1.1042, + "step": 90552 + }, + { + "epoch": 1.09, + "grad_norm": 8.299787978184055, + "learning_rate": 9.047677361149995e-06, + "loss": 1.1717, + "step": 90555 + }, + { + "epoch": 1.09, + "grad_norm": 7.410888811488014, + "learning_rate": 9.047095839779699e-06, + "loss": 1.2146, + "step": 90558 + }, + { + "epoch": 1.09, + "grad_norm": 41.533361093912745, + "learning_rate": 9.046514321661325e-06, + "loss": 1.2764, + "step": 90561 + }, + { + "epoch": 1.09, + "grad_norm": 49.986240822920614, + "learning_rate": 9.045932806796851e-06, + "loss": 1.1404, + "step": 90564 + }, + { + "epoch": 1.09, + "grad_norm": 12.757428903626701, + "learning_rate": 9.045351295188262e-06, + "loss": 1.1185, + "step": 90567 + }, + { + "epoch": 1.09, + "grad_norm": 8.907745389538096, + "learning_rate": 9.04476978683755e-06, + "loss": 1.299, + "step": 90570 + }, + { + "epoch": 1.09, + "grad_norm": 3.9956457562327485, + "learning_rate": 9.044188281746692e-06, + "loss": 1.4825, + "step": 90573 + }, + { + "epoch": 1.09, + "grad_norm": 10.059802785793881, + "learning_rate": 9.043606779917676e-06, + "loss": 1.2726, + "step": 90576 + }, + { + "epoch": 1.09, + "grad_norm": 18.234330541358496, + "learning_rate": 9.043025281352484e-06, + "loss": 1.0592, + "step": 90579 + }, + { + "epoch": 1.09, + "grad_norm": 7.231037959011974, + "learning_rate": 9.042443786053105e-06, + "loss": 1.7913, + "step": 90582 + }, + { + "epoch": 1.09, + "grad_norm": 14.041197349810362, + "learning_rate": 9.041862294021516e-06, + "loss": 1.2124, + "step": 90585 + }, + { + "epoch": 1.09, + "grad_norm": 18.549625105127127, + "learning_rate": 9.041280805259713e-06, + "loss": 0.9578, + "step": 90588 + }, + { + "epoch": 1.09, + "grad_norm": 3.8084033787468616, + "learning_rate": 9.040699319769666e-06, + "loss": 0.7515, + "step": 90591 + }, + { + "epoch": 1.09, + "grad_norm": 10.946269178303922, + "learning_rate": 9.040117837553371e-06, + "loss": 1.0964, + "step": 90594 + }, + { + "epoch": 1.09, + "grad_norm": 5.842644297535991, + "learning_rate": 9.039536358612803e-06, + "loss": 1.0469, + "step": 90597 + }, + { + "epoch": 1.09, + "grad_norm": 17.80016580006006, + "learning_rate": 9.038954882949955e-06, + "loss": 1.3431, + "step": 90600 + }, + { + "epoch": 1.09, + "grad_norm": 9.3112753423171, + "learning_rate": 9.038373410566811e-06, + "loss": 1.1763, + "step": 90603 + }, + { + "epoch": 1.09, + "grad_norm": 6.63928328237091, + "learning_rate": 9.03779194146535e-06, + "loss": 1.1904, + "step": 90606 + }, + { + "epoch": 1.09, + "grad_norm": 2.586798836320749, + "learning_rate": 9.037210475647555e-06, + "loss": 1.3405, + "step": 90609 + }, + { + "epoch": 1.09, + "grad_norm": 14.265857982747264, + "learning_rate": 9.036629013115416e-06, + "loss": 1.3468, + "step": 90612 + }, + { + "epoch": 1.09, + "grad_norm": 25.77345094905005, + "learning_rate": 9.036047553870917e-06, + "loss": 1.1451, + "step": 90615 + }, + { + "epoch": 1.09, + "grad_norm": 12.652034714770295, + "learning_rate": 9.03546609791604e-06, + "loss": 1.2536, + "step": 90618 + }, + { + "epoch": 1.09, + "grad_norm": 12.543276236425138, + "learning_rate": 9.034884645252767e-06, + "loss": 1.2651, + "step": 90621 + }, + { + "epoch": 1.09, + "grad_norm": 3.70366577479384, + "learning_rate": 9.034303195883088e-06, + "loss": 0.8289, + "step": 90624 + }, + { + "epoch": 1.09, + "grad_norm": 5.969878592537242, + "learning_rate": 9.033721749808981e-06, + "loss": 1.2097, + "step": 90627 + }, + { + "epoch": 1.09, + "grad_norm": 10.033778749903297, + "learning_rate": 9.03314030703244e-06, + "loss": 1.1253, + "step": 90630 + }, + { + "epoch": 1.09, + "grad_norm": 10.073898005055034, + "learning_rate": 9.032558867555436e-06, + "loss": 1.1792, + "step": 90633 + }, + { + "epoch": 1.09, + "grad_norm": 61.781175809528634, + "learning_rate": 9.031977431379965e-06, + "loss": 0.9789, + "step": 90636 + }, + { + "epoch": 1.09, + "grad_norm": 41.34878603452974, + "learning_rate": 9.031395998508001e-06, + "loss": 1.4898, + "step": 90639 + }, + { + "epoch": 1.09, + "grad_norm": 31.423796140645536, + "learning_rate": 9.030814568941541e-06, + "loss": 1.0533, + "step": 90642 + }, + { + "epoch": 1.09, + "grad_norm": 18.54072955363052, + "learning_rate": 9.030233142682555e-06, + "loss": 1.2462, + "step": 90645 + }, + { + "epoch": 1.09, + "grad_norm": 7.672960868299703, + "learning_rate": 9.029651719733038e-06, + "loss": 1.161, + "step": 90648 + }, + { + "epoch": 1.09, + "grad_norm": 5.6034873981695865, + "learning_rate": 9.029070300094967e-06, + "loss": 1.273, + "step": 90651 + }, + { + "epoch": 1.09, + "grad_norm": 14.236587971967232, + "learning_rate": 9.028488883770329e-06, + "loss": 1.3234, + "step": 90654 + }, + { + "epoch": 1.09, + "grad_norm": 2.8917125972491853, + "learning_rate": 9.027907470761113e-06, + "loss": 1.3393, + "step": 90657 + }, + { + "epoch": 1.09, + "grad_norm": 18.862993947338147, + "learning_rate": 9.027326061069297e-06, + "loss": 1.5912, + "step": 90660 + }, + { + "epoch": 1.09, + "grad_norm": 7.671826078868766, + "learning_rate": 9.026744654696866e-06, + "loss": 1.3108, + "step": 90663 + }, + { + "epoch": 1.09, + "grad_norm": 6.9903713038123625, + "learning_rate": 9.026163251645805e-06, + "loss": 0.8282, + "step": 90666 + }, + { + "epoch": 1.09, + "grad_norm": 9.504007107610548, + "learning_rate": 9.025581851918098e-06, + "loss": 1.0756, + "step": 90669 + }, + { + "epoch": 1.09, + "grad_norm": 9.793154259132479, + "learning_rate": 9.02500045551573e-06, + "loss": 1.1305, + "step": 90672 + }, + { + "epoch": 1.09, + "grad_norm": 8.683041415197005, + "learning_rate": 9.024419062440682e-06, + "loss": 1.3704, + "step": 90675 + }, + { + "epoch": 1.09, + "grad_norm": 5.950696732660057, + "learning_rate": 9.023837672694942e-06, + "loss": 1.4024, + "step": 90678 + }, + { + "epoch": 1.09, + "grad_norm": 3.514127787873856, + "learning_rate": 9.02325628628049e-06, + "loss": 1.257, + "step": 90681 + }, + { + "epoch": 1.09, + "grad_norm": 7.351064759922315, + "learning_rate": 9.022674903199317e-06, + "loss": 1.5777, + "step": 90684 + }, + { + "epoch": 1.09, + "grad_norm": 13.01863364324877, + "learning_rate": 9.0220935234534e-06, + "loss": 1.082, + "step": 90687 + }, + { + "epoch": 1.09, + "grad_norm": 8.232882674902882, + "learning_rate": 9.021512147044726e-06, + "loss": 1.4478, + "step": 90690 + }, + { + "epoch": 1.09, + "grad_norm": 12.329642070192495, + "learning_rate": 9.020930773975277e-06, + "loss": 1.4113, + "step": 90693 + }, + { + "epoch": 1.09, + "grad_norm": 18.877418346706616, + "learning_rate": 9.020349404247043e-06, + "loss": 1.1321, + "step": 90696 + }, + { + "epoch": 1.09, + "grad_norm": 12.581460199681354, + "learning_rate": 9.019768037861999e-06, + "loss": 1.0429, + "step": 90699 + }, + { + "epoch": 1.09, + "grad_norm": 6.645662363176122, + "learning_rate": 9.019186674822135e-06, + "loss": 1.4952, + "step": 90702 + }, + { + "epoch": 1.09, + "grad_norm": 20.40054299809602, + "learning_rate": 9.018605315129434e-06, + "loss": 0.9847, + "step": 90705 + }, + { + "epoch": 1.09, + "grad_norm": 10.878637672374918, + "learning_rate": 9.018023958785879e-06, + "loss": 1.1349, + "step": 90708 + }, + { + "epoch": 1.09, + "grad_norm": 5.977222791347772, + "learning_rate": 9.017442605793455e-06, + "loss": 1.5727, + "step": 90711 + }, + { + "epoch": 1.09, + "grad_norm": 7.522145575967814, + "learning_rate": 9.016861256154146e-06, + "loss": 1.0628, + "step": 90714 + }, + { + "epoch": 1.09, + "grad_norm": 8.912732477609293, + "learning_rate": 9.016279909869932e-06, + "loss": 1.1513, + "step": 90717 + }, + { + "epoch": 1.09, + "grad_norm": 2.9848893972153543, + "learning_rate": 9.015698566942804e-06, + "loss": 1.2195, + "step": 90720 + }, + { + "epoch": 1.09, + "grad_norm": 8.455735760972937, + "learning_rate": 9.015117227374744e-06, + "loss": 1.4111, + "step": 90723 + }, + { + "epoch": 1.09, + "grad_norm": 8.375759915565961, + "learning_rate": 9.01453589116773e-06, + "loss": 1.0006, + "step": 90726 + }, + { + "epoch": 1.09, + "grad_norm": 9.06481423800086, + "learning_rate": 9.01395455832375e-06, + "loss": 1.2306, + "step": 90729 + }, + { + "epoch": 1.09, + "grad_norm": 8.592935798835832, + "learning_rate": 9.01337322884479e-06, + "loss": 1.5111, + "step": 90732 + }, + { + "epoch": 1.09, + "grad_norm": 9.008570733340958, + "learning_rate": 9.012791902732831e-06, + "loss": 1.1494, + "step": 90735 + }, + { + "epoch": 1.09, + "grad_norm": 6.141052038026454, + "learning_rate": 9.01221057998986e-06, + "loss": 1.6023, + "step": 90738 + }, + { + "epoch": 1.09, + "grad_norm": 9.740772612807525, + "learning_rate": 9.011629260617855e-06, + "loss": 1.2094, + "step": 90741 + }, + { + "epoch": 1.09, + "grad_norm": 8.865335991218325, + "learning_rate": 9.011047944618804e-06, + "loss": 1.4313, + "step": 90744 + }, + { + "epoch": 1.09, + "grad_norm": 8.669122933288122, + "learning_rate": 9.01046663199469e-06, + "loss": 1.2538, + "step": 90747 + }, + { + "epoch": 1.09, + "grad_norm": 7.765165812542521, + "learning_rate": 9.0098853227475e-06, + "loss": 1.2892, + "step": 90750 + }, + { + "epoch": 1.09, + "grad_norm": 6.014133712347344, + "learning_rate": 9.009304016879211e-06, + "loss": 1.2444, + "step": 90753 + }, + { + "epoch": 1.09, + "grad_norm": 9.935133269972079, + "learning_rate": 9.008722714391814e-06, + "loss": 1.1752, + "step": 90756 + }, + { + "epoch": 1.09, + "grad_norm": 15.040329797224569, + "learning_rate": 9.008141415287284e-06, + "loss": 1.6981, + "step": 90759 + }, + { + "epoch": 1.09, + "grad_norm": 19.0438133219411, + "learning_rate": 9.007560119567614e-06, + "loss": 1.0349, + "step": 90762 + }, + { + "epoch": 1.09, + "grad_norm": 9.542925671390085, + "learning_rate": 9.006978827234787e-06, + "loss": 1.537, + "step": 90765 + }, + { + "epoch": 1.09, + "grad_norm": 5.38352845750246, + "learning_rate": 9.006397538290779e-06, + "loss": 1.1032, + "step": 90768 + }, + { + "epoch": 1.09, + "grad_norm": 27.33976560711598, + "learning_rate": 9.005816252737577e-06, + "loss": 1.1039, + "step": 90771 + }, + { + "epoch": 1.09, + "grad_norm": 353.3454063648616, + "learning_rate": 9.005234970577168e-06, + "loss": 1.033, + "step": 90774 + }, + { + "epoch": 1.09, + "grad_norm": 3.121919509363564, + "learning_rate": 9.004653691811537e-06, + "loss": 1.2481, + "step": 90777 + }, + { + "epoch": 1.09, + "grad_norm": 6.034495975560084, + "learning_rate": 9.00407241644266e-06, + "loss": 1.6621, + "step": 90780 + }, + { + "epoch": 1.09, + "grad_norm": 15.831011391465832, + "learning_rate": 9.003491144472524e-06, + "loss": 1.3936, + "step": 90783 + }, + { + "epoch": 1.09, + "grad_norm": 24.07483656592775, + "learning_rate": 9.002909875903118e-06, + "loss": 1.2897, + "step": 90786 + }, + { + "epoch": 1.09, + "grad_norm": 12.52634757677314, + "learning_rate": 9.002328610736418e-06, + "loss": 1.087, + "step": 90789 + }, + { + "epoch": 1.09, + "grad_norm": 7.0505588467526605, + "learning_rate": 9.001747348974416e-06, + "loss": 1.3307, + "step": 90792 + }, + { + "epoch": 1.09, + "grad_norm": 7.786865177113198, + "learning_rate": 9.001166090619086e-06, + "loss": 1.4748, + "step": 90795 + }, + { + "epoch": 1.09, + "grad_norm": 10.604714984695343, + "learning_rate": 9.000584835672417e-06, + "loss": 1.3202, + "step": 90798 + }, + { + "epoch": 1.09, + "grad_norm": 11.622211359523858, + "learning_rate": 9.000003584136392e-06, + "loss": 1.3705, + "step": 90801 + }, + { + "epoch": 1.09, + "grad_norm": 7.723312451486184, + "learning_rate": 8.999422336012999e-06, + "loss": 1.0367, + "step": 90804 + }, + { + "epoch": 1.09, + "grad_norm": 7.961435058364422, + "learning_rate": 8.998841091304214e-06, + "loss": 1.038, + "step": 90807 + }, + { + "epoch": 1.09, + "grad_norm": 10.87613721387705, + "learning_rate": 8.99825985001202e-06, + "loss": 0.8926, + "step": 90810 + }, + { + "epoch": 1.09, + "grad_norm": 11.216268765321768, + "learning_rate": 8.997678612138407e-06, + "loss": 1.1103, + "step": 90813 + }, + { + "epoch": 1.09, + "grad_norm": 37.689989175442484, + "learning_rate": 8.997097377685358e-06, + "loss": 1.3465, + "step": 90816 + }, + { + "epoch": 1.09, + "grad_norm": 15.200096620510262, + "learning_rate": 8.996516146654854e-06, + "loss": 0.9658, + "step": 90819 + }, + { + "epoch": 1.09, + "grad_norm": 12.74779876566268, + "learning_rate": 8.995934919048877e-06, + "loss": 1.2414, + "step": 90822 + }, + { + "epoch": 1.09, + "grad_norm": 11.229511179224746, + "learning_rate": 8.995353694869413e-06, + "loss": 1.249, + "step": 90825 + }, + { + "epoch": 1.09, + "grad_norm": 13.486438807927634, + "learning_rate": 8.994772474118445e-06, + "loss": 1.2559, + "step": 90828 + }, + { + "epoch": 1.09, + "grad_norm": 13.323717850566346, + "learning_rate": 8.99419125679796e-06, + "loss": 1.1883, + "step": 90831 + }, + { + "epoch": 1.09, + "grad_norm": 10.362721868542287, + "learning_rate": 8.993610042909932e-06, + "loss": 1.1597, + "step": 90834 + }, + { + "epoch": 1.09, + "grad_norm": 13.445032156597813, + "learning_rate": 8.993028832456352e-06, + "loss": 1.5739, + "step": 90837 + }, + { + "epoch": 1.09, + "grad_norm": 7.950582752630227, + "learning_rate": 8.992447625439203e-06, + "loss": 1.0835, + "step": 90840 + }, + { + "epoch": 1.09, + "grad_norm": 8.267149171962732, + "learning_rate": 8.991866421860466e-06, + "loss": 1.082, + "step": 90843 + }, + { + "epoch": 1.09, + "grad_norm": 12.202199744731164, + "learning_rate": 8.99128522172213e-06, + "loss": 1.0343, + "step": 90846 + }, + { + "epoch": 1.09, + "grad_norm": 10.03264326713289, + "learning_rate": 8.99070402502617e-06, + "loss": 1.1656, + "step": 90849 + }, + { + "epoch": 1.09, + "grad_norm": 6.231892444008149, + "learning_rate": 8.990122831774574e-06, + "loss": 1.2624, + "step": 90852 + }, + { + "epoch": 1.09, + "grad_norm": 3.1514267607296853, + "learning_rate": 8.989541641969327e-06, + "loss": 1.3053, + "step": 90855 + }, + { + "epoch": 1.09, + "grad_norm": 9.404159282031282, + "learning_rate": 8.98896045561241e-06, + "loss": 0.928, + "step": 90858 + }, + { + "epoch": 1.09, + "grad_norm": 9.552678142158895, + "learning_rate": 8.988379272705808e-06, + "loss": 1.1209, + "step": 90861 + }, + { + "epoch": 1.09, + "grad_norm": 8.505901268997558, + "learning_rate": 8.987798093251499e-06, + "loss": 1.1184, + "step": 90864 + }, + { + "epoch": 1.09, + "grad_norm": 12.076380201507318, + "learning_rate": 8.987216917251472e-06, + "loss": 1.2536, + "step": 90867 + }, + { + "epoch": 1.09, + "grad_norm": 18.48082757112339, + "learning_rate": 8.98663574470771e-06, + "loss": 1.3031, + "step": 90870 + }, + { + "epoch": 1.09, + "grad_norm": 12.512145307808664, + "learning_rate": 8.986054575622198e-06, + "loss": 1.2142, + "step": 90873 + }, + { + "epoch": 1.09, + "grad_norm": 14.371848608816528, + "learning_rate": 8.985473409996911e-06, + "loss": 1.2388, + "step": 90876 + }, + { + "epoch": 1.09, + "grad_norm": 17.200889245473547, + "learning_rate": 8.984892247833838e-06, + "loss": 0.9588, + "step": 90879 + }, + { + "epoch": 1.09, + "grad_norm": 8.403656412107116, + "learning_rate": 8.984311089134966e-06, + "loss": 1.1797, + "step": 90882 + }, + { + "epoch": 1.09, + "grad_norm": 7.880320257663668, + "learning_rate": 8.983729933902276e-06, + "loss": 1.3007, + "step": 90885 + }, + { + "epoch": 1.09, + "grad_norm": 7.564433261206776, + "learning_rate": 8.983148782137743e-06, + "loss": 1.1904, + "step": 90888 + }, + { + "epoch": 1.09, + "grad_norm": 10.692820835523678, + "learning_rate": 8.98256763384336e-06, + "loss": 1.3612, + "step": 90891 + }, + { + "epoch": 1.09, + "grad_norm": 3.1454810839859637, + "learning_rate": 8.981986489021109e-06, + "loss": 1.0618, + "step": 90894 + }, + { + "epoch": 1.09, + "grad_norm": 12.152265746197092, + "learning_rate": 8.981405347672966e-06, + "loss": 1.1709, + "step": 90897 + }, + { + "epoch": 1.09, + "grad_norm": 19.069546920286726, + "learning_rate": 8.980824209800928e-06, + "loss": 1.3864, + "step": 90900 + }, + { + "epoch": 1.09, + "grad_norm": 7.531019216458916, + "learning_rate": 8.980243075406966e-06, + "loss": 0.8663, + "step": 90903 + }, + { + "epoch": 1.09, + "grad_norm": 15.301770830315718, + "learning_rate": 8.979661944493064e-06, + "loss": 1.2632, + "step": 90906 + }, + { + "epoch": 1.09, + "grad_norm": 10.599830597918558, + "learning_rate": 8.97908081706121e-06, + "loss": 1.2517, + "step": 90909 + }, + { + "epoch": 1.09, + "grad_norm": 13.683801880159557, + "learning_rate": 8.978499693113389e-06, + "loss": 1.3991, + "step": 90912 + }, + { + "epoch": 1.09, + "grad_norm": 9.458685470085904, + "learning_rate": 8.977918572651578e-06, + "loss": 0.9585, + "step": 90915 + }, + { + "epoch": 1.09, + "grad_norm": 9.294298538267082, + "learning_rate": 8.977337455677761e-06, + "loss": 1.2847, + "step": 90918 + }, + { + "epoch": 1.09, + "grad_norm": 16.96153346851611, + "learning_rate": 8.976756342193923e-06, + "loss": 1.0582, + "step": 90921 + }, + { + "epoch": 1.09, + "grad_norm": 36.38693012975579, + "learning_rate": 8.976175232202048e-06, + "loss": 0.8543, + "step": 90924 + }, + { + "epoch": 1.09, + "grad_norm": 14.389356683086255, + "learning_rate": 8.975594125704122e-06, + "loss": 1.265, + "step": 90927 + }, + { + "epoch": 1.09, + "grad_norm": 3.1921329856708778, + "learning_rate": 8.97501302270212e-06, + "loss": 1.2558, + "step": 90930 + }, + { + "epoch": 1.09, + "grad_norm": 13.9687151401017, + "learning_rate": 8.974431923198028e-06, + "loss": 1.1625, + "step": 90933 + }, + { + "epoch": 1.09, + "grad_norm": 18.527188975521995, + "learning_rate": 8.973850827193834e-06, + "loss": 1.0628, + "step": 90936 + }, + { + "epoch": 1.09, + "grad_norm": 24.50670609150858, + "learning_rate": 8.97326973469152e-06, + "loss": 1.2691, + "step": 90939 + }, + { + "epoch": 1.09, + "grad_norm": 10.50989876930219, + "learning_rate": 8.97268864569306e-06, + "loss": 1.2858, + "step": 90942 + }, + { + "epoch": 1.09, + "grad_norm": 6.278084455783773, + "learning_rate": 8.972107560200446e-06, + "loss": 1.3445, + "step": 90945 + }, + { + "epoch": 1.09, + "grad_norm": 7.022228977313614, + "learning_rate": 8.971526478215659e-06, + "loss": 1.5237, + "step": 90948 + }, + { + "epoch": 1.09, + "grad_norm": 12.614183277567513, + "learning_rate": 8.970945399740681e-06, + "loss": 1.0453, + "step": 90951 + }, + { + "epoch": 1.09, + "grad_norm": 5.858977968682196, + "learning_rate": 8.970364324777499e-06, + "loss": 1.2419, + "step": 90954 + }, + { + "epoch": 1.09, + "grad_norm": 3.907933429574127, + "learning_rate": 8.96978325332809e-06, + "loss": 1.1697, + "step": 90957 + }, + { + "epoch": 1.09, + "grad_norm": 8.732519204380347, + "learning_rate": 8.969202185394437e-06, + "loss": 1.2936, + "step": 90960 + }, + { + "epoch": 1.09, + "grad_norm": 6.894788089211333, + "learning_rate": 8.968621120978528e-06, + "loss": 1.3823, + "step": 90963 + }, + { + "epoch": 1.09, + "grad_norm": 10.862152927434584, + "learning_rate": 8.968040060082346e-06, + "loss": 0.9856, + "step": 90966 + }, + { + "epoch": 1.09, + "grad_norm": 8.006210639931675, + "learning_rate": 8.96745900270787e-06, + "loss": 1.4297, + "step": 90969 + }, + { + "epoch": 1.09, + "grad_norm": 15.888230049132838, + "learning_rate": 8.966877948857083e-06, + "loss": 1.1279, + "step": 90972 + }, + { + "epoch": 1.09, + "grad_norm": 16.834021172724302, + "learning_rate": 8.96629689853197e-06, + "loss": 1.2075, + "step": 90975 + }, + { + "epoch": 1.09, + "grad_norm": 3.2923361222564034, + "learning_rate": 8.965715851734514e-06, + "loss": 1.3484, + "step": 90978 + }, + { + "epoch": 1.09, + "grad_norm": 5.836824714033092, + "learning_rate": 8.9651348084667e-06, + "loss": 0.9244, + "step": 90981 + }, + { + "epoch": 1.09, + "grad_norm": 8.491947263913767, + "learning_rate": 8.964553768730505e-06, + "loss": 0.9845, + "step": 90984 + }, + { + "epoch": 1.09, + "grad_norm": 5.73069327515066, + "learning_rate": 8.963972732527914e-06, + "loss": 1.0977, + "step": 90987 + }, + { + "epoch": 1.09, + "grad_norm": 34.82821085087994, + "learning_rate": 8.963391699860912e-06, + "loss": 0.9017, + "step": 90990 + }, + { + "epoch": 1.09, + "grad_norm": 10.92594055322589, + "learning_rate": 8.962810670731483e-06, + "loss": 1.0024, + "step": 90993 + }, + { + "epoch": 1.09, + "grad_norm": 12.1109489398224, + "learning_rate": 8.962229645141604e-06, + "loss": 1.2472, + "step": 90996 + }, + { + "epoch": 1.09, + "grad_norm": 8.286270265947227, + "learning_rate": 8.961648623093262e-06, + "loss": 1.2595, + "step": 90999 + }, + { + "epoch": 1.09, + "grad_norm": 9.955636451610856, + "learning_rate": 8.96106760458844e-06, + "loss": 0.9582, + "step": 91002 + }, + { + "epoch": 1.09, + "grad_norm": 8.478765283740286, + "learning_rate": 8.960486589629118e-06, + "loss": 1.0502, + "step": 91005 + }, + { + "epoch": 1.09, + "grad_norm": 10.219779778963254, + "learning_rate": 8.959905578217286e-06, + "loss": 1.3835, + "step": 91008 + }, + { + "epoch": 1.09, + "grad_norm": 7.91291312755911, + "learning_rate": 8.959324570354919e-06, + "loss": 0.9028, + "step": 91011 + }, + { + "epoch": 1.09, + "grad_norm": 11.746705742621117, + "learning_rate": 8.958743566044001e-06, + "loss": 0.8998, + "step": 91014 + }, + { + "epoch": 1.09, + "grad_norm": 7.030425193832082, + "learning_rate": 8.958162565286515e-06, + "loss": 1.1198, + "step": 91017 + }, + { + "epoch": 1.09, + "grad_norm": 17.17910404401927, + "learning_rate": 8.95758156808445e-06, + "loss": 0.8558, + "step": 91020 + }, + { + "epoch": 1.09, + "grad_norm": 21.45941666392879, + "learning_rate": 8.957000574439782e-06, + "loss": 1.1627, + "step": 91023 + }, + { + "epoch": 1.09, + "grad_norm": 11.762678093136707, + "learning_rate": 8.956419584354493e-06, + "loss": 0.9581, + "step": 91026 + }, + { + "epoch": 1.09, + "grad_norm": 10.608037259254328, + "learning_rate": 8.955838597830567e-06, + "loss": 0.8795, + "step": 91029 + }, + { + "epoch": 1.09, + "grad_norm": 15.199734656993106, + "learning_rate": 8.955257614869991e-06, + "loss": 1.4474, + "step": 91032 + }, + { + "epoch": 1.09, + "grad_norm": 7.0007902695534465, + "learning_rate": 8.954676635474746e-06, + "loss": 0.7784, + "step": 91035 + }, + { + "epoch": 1.09, + "grad_norm": 9.28871386498251, + "learning_rate": 8.954095659646809e-06, + "loss": 1.1839, + "step": 91038 + }, + { + "epoch": 1.09, + "grad_norm": 22.189746496910214, + "learning_rate": 8.95351468738817e-06, + "loss": 1.5579, + "step": 91041 + }, + { + "epoch": 1.09, + "grad_norm": 5.719256715109521, + "learning_rate": 8.952933718700804e-06, + "loss": 1.1383, + "step": 91044 + }, + { + "epoch": 1.09, + "grad_norm": 9.724699481189727, + "learning_rate": 8.952352753586703e-06, + "loss": 1.4858, + "step": 91047 + }, + { + "epoch": 1.09, + "grad_norm": 4.8581049646353, + "learning_rate": 8.951771792047843e-06, + "loss": 1.418, + "step": 91050 + }, + { + "epoch": 1.09, + "grad_norm": 5.728690930621754, + "learning_rate": 8.951190834086207e-06, + "loss": 1.2239, + "step": 91053 + }, + { + "epoch": 1.09, + "grad_norm": 44.80955433406518, + "learning_rate": 8.950609879703777e-06, + "loss": 1.3001, + "step": 91056 + }, + { + "epoch": 1.09, + "grad_norm": 16.85373836907492, + "learning_rate": 8.95002892890254e-06, + "loss": 1.4907, + "step": 91059 + }, + { + "epoch": 1.09, + "grad_norm": 18.10067482384878, + "learning_rate": 8.949447981684479e-06, + "loss": 1.0354, + "step": 91062 + }, + { + "epoch": 1.1, + "grad_norm": 12.601078670739948, + "learning_rate": 8.94886703805157e-06, + "loss": 1.1071, + "step": 91065 + }, + { + "epoch": 1.1, + "grad_norm": 6.199164913030652, + "learning_rate": 8.9482860980058e-06, + "loss": 1.2459, + "step": 91068 + }, + { + "epoch": 1.1, + "grad_norm": 4.184861302137598, + "learning_rate": 8.947705161549149e-06, + "loss": 0.9122, + "step": 91071 + }, + { + "epoch": 1.1, + "grad_norm": 10.444753329773786, + "learning_rate": 8.947124228683605e-06, + "loss": 0.8931, + "step": 91074 + }, + { + "epoch": 1.1, + "grad_norm": 1.523478694648967, + "learning_rate": 8.946543299411145e-06, + "loss": 1.1846, + "step": 91077 + }, + { + "epoch": 1.1, + "grad_norm": 12.027334779399014, + "learning_rate": 8.945962373733752e-06, + "loss": 1.3416, + "step": 91080 + }, + { + "epoch": 1.1, + "grad_norm": 2.4402545087240863, + "learning_rate": 8.945381451653413e-06, + "loss": 1.3749, + "step": 91083 + }, + { + "epoch": 1.1, + "grad_norm": 18.907363263407067, + "learning_rate": 8.944800533172101e-06, + "loss": 0.87, + "step": 91086 + }, + { + "epoch": 1.1, + "grad_norm": 14.420708728204591, + "learning_rate": 8.944219618291814e-06, + "loss": 1.1843, + "step": 91089 + }, + { + "epoch": 1.1, + "grad_norm": 6.150506570502741, + "learning_rate": 8.943638707014517e-06, + "loss": 1.4776, + "step": 91092 + }, + { + "epoch": 1.1, + "grad_norm": 21.085448627368685, + "learning_rate": 8.943057799342204e-06, + "loss": 1.0336, + "step": 91095 + }, + { + "epoch": 1.1, + "grad_norm": 8.576411317901956, + "learning_rate": 8.942476895276853e-06, + "loss": 1.3108, + "step": 91098 + }, + { + "epoch": 1.1, + "grad_norm": 10.520944872059479, + "learning_rate": 8.94189599482045e-06, + "loss": 1.3309, + "step": 91101 + }, + { + "epoch": 1.1, + "grad_norm": 13.598955064276955, + "learning_rate": 8.94131509797497e-06, + "loss": 1.3498, + "step": 91104 + }, + { + "epoch": 1.1, + "grad_norm": 25.330927024419417, + "learning_rate": 8.940734204742406e-06, + "loss": 0.8058, + "step": 91107 + }, + { + "epoch": 1.1, + "grad_norm": 12.472956444609778, + "learning_rate": 8.94015331512473e-06, + "loss": 1.1477, + "step": 91110 + }, + { + "epoch": 1.1, + "grad_norm": 10.835649953897795, + "learning_rate": 8.939572429123928e-06, + "loss": 1.3851, + "step": 91113 + }, + { + "epoch": 1.1, + "grad_norm": 4.162339642094705, + "learning_rate": 8.938991546741989e-06, + "loss": 1.1583, + "step": 91116 + }, + { + "epoch": 1.1, + "grad_norm": 9.508617742263645, + "learning_rate": 8.938410667980887e-06, + "loss": 1.1382, + "step": 91119 + }, + { + "epoch": 1.1, + "grad_norm": 24.767727161023387, + "learning_rate": 8.937829792842605e-06, + "loss": 1.423, + "step": 91122 + }, + { + "epoch": 1.1, + "grad_norm": 7.833564568402202, + "learning_rate": 8.937248921329129e-06, + "loss": 1.2325, + "step": 91125 + }, + { + "epoch": 1.1, + "grad_norm": 4.801595406365414, + "learning_rate": 8.93666805344244e-06, + "loss": 1.0247, + "step": 91128 + }, + { + "epoch": 1.1, + "grad_norm": 44.998797984502794, + "learning_rate": 8.936087189184521e-06, + "loss": 1.2257, + "step": 91131 + }, + { + "epoch": 1.1, + "grad_norm": 7.0084916486366735, + "learning_rate": 8.935506328557349e-06, + "loss": 0.8875, + "step": 91134 + }, + { + "epoch": 1.1, + "grad_norm": 7.583003373523236, + "learning_rate": 8.934925471562915e-06, + "loss": 0.7236, + "step": 91137 + }, + { + "epoch": 1.1, + "grad_norm": 8.032759691253412, + "learning_rate": 8.934344618203194e-06, + "loss": 1.574, + "step": 91140 + }, + { + "epoch": 1.1, + "grad_norm": 12.24162702956848, + "learning_rate": 8.933763768480173e-06, + "loss": 1.2379, + "step": 91143 + }, + { + "epoch": 1.1, + "grad_norm": 50.55162681483415, + "learning_rate": 8.93318292239583e-06, + "loss": 1.1066, + "step": 91146 + }, + { + "epoch": 1.1, + "grad_norm": 17.603667865337133, + "learning_rate": 8.932602079952152e-06, + "loss": 1.2375, + "step": 91149 + }, + { + "epoch": 1.1, + "grad_norm": 16.133750734771624, + "learning_rate": 8.932021241151115e-06, + "loss": 1.1928, + "step": 91152 + }, + { + "epoch": 1.1, + "grad_norm": 6.093646503844971, + "learning_rate": 8.93144040599471e-06, + "loss": 1.4316, + "step": 91155 + }, + { + "epoch": 1.1, + "grad_norm": 7.351168399245702, + "learning_rate": 8.93085957448491e-06, + "loss": 1.0349, + "step": 91158 + }, + { + "epoch": 1.1, + "grad_norm": 4.551211919323801, + "learning_rate": 8.930278746623701e-06, + "loss": 1.2979, + "step": 91161 + }, + { + "epoch": 1.1, + "grad_norm": 19.747474121225935, + "learning_rate": 8.929697922413066e-06, + "loss": 0.9283, + "step": 91164 + }, + { + "epoch": 1.1, + "grad_norm": 4.070781973438525, + "learning_rate": 8.929117101854987e-06, + "loss": 0.875, + "step": 91167 + }, + { + "epoch": 1.1, + "grad_norm": 5.628552486255137, + "learning_rate": 8.928536284951446e-06, + "loss": 1.1721, + "step": 91170 + }, + { + "epoch": 1.1, + "grad_norm": 36.43221120375014, + "learning_rate": 8.927955471704425e-06, + "loss": 1.1752, + "step": 91173 + }, + { + "epoch": 1.1, + "grad_norm": 21.461162884380123, + "learning_rate": 8.927374662115905e-06, + "loss": 1.2564, + "step": 91176 + }, + { + "epoch": 1.1, + "grad_norm": 12.31636080041842, + "learning_rate": 8.926793856187869e-06, + "loss": 1.0032, + "step": 91179 + }, + { + "epoch": 1.1, + "grad_norm": 12.48039564801879, + "learning_rate": 8.926213053922302e-06, + "loss": 1.3865, + "step": 91182 + }, + { + "epoch": 1.1, + "grad_norm": 19.470050737704973, + "learning_rate": 8.925632255321177e-06, + "loss": 1.1743, + "step": 91185 + }, + { + "epoch": 1.1, + "grad_norm": 9.18582188245335, + "learning_rate": 8.925051460386484e-06, + "loss": 1.0457, + "step": 91188 + }, + { + "epoch": 1.1, + "grad_norm": 8.846380127609523, + "learning_rate": 8.924470669120206e-06, + "loss": 1.2535, + "step": 91191 + }, + { + "epoch": 1.1, + "grad_norm": 9.421898570284641, + "learning_rate": 8.923889881524318e-06, + "loss": 1.1179, + "step": 91194 + }, + { + "epoch": 1.1, + "grad_norm": 7.493758226352472, + "learning_rate": 8.923309097600812e-06, + "loss": 1.0414, + "step": 91197 + }, + { + "epoch": 1.1, + "grad_norm": 4.5799163627601445, + "learning_rate": 8.92272831735166e-06, + "loss": 1.1887, + "step": 91200 + }, + { + "epoch": 1.1, + "grad_norm": 7.224924811185924, + "learning_rate": 8.92214754077885e-06, + "loss": 1.1297, + "step": 91203 + }, + { + "epoch": 1.1, + "grad_norm": 4.104148811031889, + "learning_rate": 8.921566767884359e-06, + "loss": 1.0789, + "step": 91206 + }, + { + "epoch": 1.1, + "grad_norm": 10.152713017686324, + "learning_rate": 8.920985998670177e-06, + "loss": 1.4526, + "step": 91209 + }, + { + "epoch": 1.1, + "grad_norm": 3.1850922349885016, + "learning_rate": 8.920405233138277e-06, + "loss": 1.0148, + "step": 91212 + }, + { + "epoch": 1.1, + "grad_norm": 10.727438105368787, + "learning_rate": 8.919824471290648e-06, + "loss": 1.1438, + "step": 91215 + }, + { + "epoch": 1.1, + "grad_norm": 12.543018924454852, + "learning_rate": 8.919243713129266e-06, + "loss": 1.0793, + "step": 91218 + }, + { + "epoch": 1.1, + "grad_norm": 6.193383182900239, + "learning_rate": 8.918662958656117e-06, + "loss": 1.2492, + "step": 91221 + }, + { + "epoch": 1.1, + "grad_norm": 5.492342828115595, + "learning_rate": 8.918082207873187e-06, + "loss": 1.2224, + "step": 91224 + }, + { + "epoch": 1.1, + "grad_norm": 3.5975932436324594, + "learning_rate": 8.917501460782445e-06, + "loss": 1.0941, + "step": 91227 + }, + { + "epoch": 1.1, + "grad_norm": 6.244261971505746, + "learning_rate": 8.916920717385883e-06, + "loss": 1.3719, + "step": 91230 + }, + { + "epoch": 1.1, + "grad_norm": 8.86630759968894, + "learning_rate": 8.916339977685482e-06, + "loss": 1.1703, + "step": 91233 + }, + { + "epoch": 1.1, + "grad_norm": 12.010493345906998, + "learning_rate": 8.915759241683225e-06, + "loss": 1.3395, + "step": 91236 + }, + { + "epoch": 1.1, + "grad_norm": 6.735968943400345, + "learning_rate": 8.915178509381087e-06, + "loss": 0.8705, + "step": 91239 + }, + { + "epoch": 1.1, + "grad_norm": 3.574376898784145, + "learning_rate": 8.914597780781052e-06, + "loss": 1.4087, + "step": 91242 + }, + { + "epoch": 1.1, + "grad_norm": 26.264337758525468, + "learning_rate": 8.914017055885107e-06, + "loss": 1.2092, + "step": 91245 + }, + { + "epoch": 1.1, + "grad_norm": 25.763796623014684, + "learning_rate": 8.913436334695229e-06, + "loss": 1.1068, + "step": 91248 + }, + { + "epoch": 1.1, + "grad_norm": 6.40943141475664, + "learning_rate": 8.912855617213406e-06, + "loss": 1.2426, + "step": 91251 + }, + { + "epoch": 1.1, + "grad_norm": 9.393830785446408, + "learning_rate": 8.91227490344161e-06, + "loss": 1.2522, + "step": 91254 + }, + { + "epoch": 1.1, + "grad_norm": 37.120180785803456, + "learning_rate": 8.91169419338183e-06, + "loss": 1.1307, + "step": 91257 + }, + { + "epoch": 1.1, + "grad_norm": 16.14833803317401, + "learning_rate": 8.911113487036044e-06, + "loss": 1.3472, + "step": 91260 + }, + { + "epoch": 1.1, + "grad_norm": 18.863311533739395, + "learning_rate": 8.910532784406241e-06, + "loss": 0.9695, + "step": 91263 + }, + { + "epoch": 1.1, + "grad_norm": 14.392461289580288, + "learning_rate": 8.909952085494391e-06, + "loss": 1.3328, + "step": 91266 + }, + { + "epoch": 1.1, + "grad_norm": 11.73536300503253, + "learning_rate": 8.909371390302484e-06, + "loss": 1.018, + "step": 91269 + }, + { + "epoch": 1.1, + "grad_norm": 5.258894082218101, + "learning_rate": 8.9087906988325e-06, + "loss": 1.3239, + "step": 91272 + }, + { + "epoch": 1.1, + "grad_norm": 2.9338761156423603, + "learning_rate": 8.908210011086419e-06, + "loss": 1.4774, + "step": 91275 + }, + { + "epoch": 1.1, + "grad_norm": 4.943191757332032, + "learning_rate": 8.907629327066228e-06, + "loss": 1.432, + "step": 91278 + }, + { + "epoch": 1.1, + "grad_norm": 13.94355855147831, + "learning_rate": 8.9070486467739e-06, + "loss": 1.2846, + "step": 91281 + }, + { + "epoch": 1.1, + "grad_norm": 6.271546307932512, + "learning_rate": 8.906467970211421e-06, + "loss": 1.0065, + "step": 91284 + }, + { + "epoch": 1.1, + "grad_norm": 11.167898445788069, + "learning_rate": 8.905887297380776e-06, + "loss": 1.3147, + "step": 91287 + }, + { + "epoch": 1.1, + "grad_norm": 18.48477501769404, + "learning_rate": 8.905306628283945e-06, + "loss": 1.2757, + "step": 91290 + }, + { + "epoch": 1.1, + "grad_norm": 19.30520926980379, + "learning_rate": 8.904725962922904e-06, + "loss": 1.1746, + "step": 91293 + }, + { + "epoch": 1.1, + "grad_norm": 5.515686026145575, + "learning_rate": 8.904145301299638e-06, + "loss": 1.4505, + "step": 91296 + }, + { + "epoch": 1.1, + "grad_norm": 11.731510758252961, + "learning_rate": 8.903564643416132e-06, + "loss": 1.2179, + "step": 91299 + }, + { + "epoch": 1.1, + "grad_norm": 13.550987569145786, + "learning_rate": 8.902983989274365e-06, + "loss": 1.4099, + "step": 91302 + }, + { + "epoch": 1.1, + "grad_norm": 20.843124629189607, + "learning_rate": 8.90240333887632e-06, + "loss": 1.3182, + "step": 91305 + }, + { + "epoch": 1.1, + "grad_norm": 4.219356847228079, + "learning_rate": 8.901822692223973e-06, + "loss": 1.0507, + "step": 91308 + }, + { + "epoch": 1.1, + "grad_norm": 12.115938743060177, + "learning_rate": 8.901242049319312e-06, + "loss": 1.3716, + "step": 91311 + }, + { + "epoch": 1.1, + "grad_norm": 19.851644546223667, + "learning_rate": 8.900661410164315e-06, + "loss": 1.0868, + "step": 91314 + }, + { + "epoch": 1.1, + "grad_norm": 17.889106389882006, + "learning_rate": 8.900080774760969e-06, + "loss": 1.1864, + "step": 91317 + }, + { + "epoch": 1.1, + "grad_norm": 8.12826912404563, + "learning_rate": 8.899500143111248e-06, + "loss": 1.1649, + "step": 91320 + }, + { + "epoch": 1.1, + "grad_norm": 9.851808731304137, + "learning_rate": 8.898919515217134e-06, + "loss": 1.0321, + "step": 91323 + }, + { + "epoch": 1.1, + "grad_norm": 6.592396718327739, + "learning_rate": 8.898338891080612e-06, + "loss": 1.6373, + "step": 91326 + }, + { + "epoch": 1.1, + "grad_norm": 8.776386664247575, + "learning_rate": 8.897758270703664e-06, + "loss": 1.2641, + "step": 91329 + }, + { + "epoch": 1.1, + "grad_norm": 6.982080879347138, + "learning_rate": 8.897177654088273e-06, + "loss": 1.6446, + "step": 91332 + }, + { + "epoch": 1.1, + "grad_norm": 8.153608648497974, + "learning_rate": 8.896597041236412e-06, + "loss": 1.61, + "step": 91335 + }, + { + "epoch": 1.1, + "grad_norm": 6.541260884361361, + "learning_rate": 8.896016432150068e-06, + "loss": 1.3991, + "step": 91338 + }, + { + "epoch": 1.1, + "grad_norm": 13.843079287764082, + "learning_rate": 8.895435826831225e-06, + "loss": 1.2347, + "step": 91341 + }, + { + "epoch": 1.1, + "grad_norm": 5.777102100682265, + "learning_rate": 8.894855225281864e-06, + "loss": 1.2327, + "step": 91344 + }, + { + "epoch": 1.1, + "grad_norm": 5.230922324492739, + "learning_rate": 8.89427462750396e-06, + "loss": 1.448, + "step": 91347 + }, + { + "epoch": 1.1, + "grad_norm": 71.48045857040657, + "learning_rate": 8.893694033499499e-06, + "loss": 0.9559, + "step": 91350 + }, + { + "epoch": 1.1, + "grad_norm": 9.904372536358057, + "learning_rate": 8.893113443270463e-06, + "loss": 1.3877, + "step": 91353 + }, + { + "epoch": 1.1, + "grad_norm": 16.051518778742775, + "learning_rate": 8.892532856818828e-06, + "loss": 1.1686, + "step": 91356 + }, + { + "epoch": 1.1, + "grad_norm": 20.716069157704748, + "learning_rate": 8.891952274146587e-06, + "loss": 1.2558, + "step": 91359 + }, + { + "epoch": 1.1, + "grad_norm": 8.451255718271174, + "learning_rate": 8.891371695255709e-06, + "loss": 1.333, + "step": 91362 + }, + { + "epoch": 1.1, + "grad_norm": 6.344346903578724, + "learning_rate": 8.89079112014818e-06, + "loss": 1.4836, + "step": 91365 + }, + { + "epoch": 1.1, + "grad_norm": 10.913388596257452, + "learning_rate": 8.89021054882598e-06, + "loss": 1.5854, + "step": 91368 + }, + { + "epoch": 1.1, + "grad_norm": 19.415608309655177, + "learning_rate": 8.889629981291096e-06, + "loss": 1.259, + "step": 91371 + }, + { + "epoch": 1.1, + "grad_norm": 11.811238252064667, + "learning_rate": 8.889049417545504e-06, + "loss": 1.0011, + "step": 91374 + }, + { + "epoch": 1.1, + "grad_norm": 6.132083028611731, + "learning_rate": 8.888468857591183e-06, + "loss": 1.2988, + "step": 91377 + }, + { + "epoch": 1.1, + "grad_norm": 19.065746884204273, + "learning_rate": 8.887888301430119e-06, + "loss": 1.0564, + "step": 91380 + }, + { + "epoch": 1.1, + "grad_norm": 7.097680884165932, + "learning_rate": 8.887307749064291e-06, + "loss": 1.2693, + "step": 91383 + }, + { + "epoch": 1.1, + "grad_norm": 13.557059894036668, + "learning_rate": 8.886727200495685e-06, + "loss": 1.5351, + "step": 91386 + }, + { + "epoch": 1.1, + "grad_norm": 11.713377744812313, + "learning_rate": 8.886146655726274e-06, + "loss": 1.3705, + "step": 91389 + }, + { + "epoch": 1.1, + "grad_norm": 18.32743282669731, + "learning_rate": 8.885566114758042e-06, + "loss": 1.0794, + "step": 91392 + }, + { + "epoch": 1.1, + "grad_norm": 11.174559353153748, + "learning_rate": 8.884985577592974e-06, + "loss": 0.8003, + "step": 91395 + }, + { + "epoch": 1.1, + "grad_norm": 5.975009432880824, + "learning_rate": 8.884405044233051e-06, + "loss": 1.297, + "step": 91398 + }, + { + "epoch": 1.1, + "grad_norm": 11.725073465175468, + "learning_rate": 8.883824514680246e-06, + "loss": 1.0048, + "step": 91401 + }, + { + "epoch": 1.1, + "grad_norm": 10.095163951850262, + "learning_rate": 8.88324398893655e-06, + "loss": 1.3767, + "step": 91404 + }, + { + "epoch": 1.1, + "grad_norm": 12.887369887988768, + "learning_rate": 8.882663467003937e-06, + "loss": 1.7126, + "step": 91407 + }, + { + "epoch": 1.1, + "grad_norm": 16.90408515620405, + "learning_rate": 8.882082948884392e-06, + "loss": 1.1771, + "step": 91410 + }, + { + "epoch": 1.1, + "grad_norm": 10.243178079855438, + "learning_rate": 8.8815024345799e-06, + "loss": 1.1948, + "step": 91413 + }, + { + "epoch": 1.1, + "grad_norm": 4.0450372999262125, + "learning_rate": 8.880921924092433e-06, + "loss": 1.0774, + "step": 91416 + }, + { + "epoch": 1.1, + "grad_norm": 12.52852492815482, + "learning_rate": 8.880341417423975e-06, + "loss": 1.0337, + "step": 91419 + }, + { + "epoch": 1.1, + "grad_norm": 16.048164731499526, + "learning_rate": 8.87976091457651e-06, + "loss": 1.4819, + "step": 91422 + }, + { + "epoch": 1.1, + "grad_norm": 20.458895416086712, + "learning_rate": 8.87918041555202e-06, + "loss": 1.4445, + "step": 91425 + }, + { + "epoch": 1.1, + "grad_norm": 205.7311465779493, + "learning_rate": 8.878599920352482e-06, + "loss": 1.3169, + "step": 91428 + }, + { + "epoch": 1.1, + "grad_norm": 4.712528709375757, + "learning_rate": 8.878019428979876e-06, + "loss": 1.2199, + "step": 91431 + }, + { + "epoch": 1.1, + "grad_norm": 16.965757796118353, + "learning_rate": 8.877438941436185e-06, + "loss": 1.3641, + "step": 91434 + }, + { + "epoch": 1.1, + "grad_norm": 8.755269588072272, + "learning_rate": 8.876858457723394e-06, + "loss": 1.0112, + "step": 91437 + }, + { + "epoch": 1.1, + "grad_norm": 10.346220693528673, + "learning_rate": 8.876277977843481e-06, + "loss": 0.9569, + "step": 91440 + }, + { + "epoch": 1.1, + "grad_norm": 10.01698820591698, + "learning_rate": 8.875697501798424e-06, + "loss": 1.3293, + "step": 91443 + }, + { + "epoch": 1.1, + "grad_norm": 23.40165701207353, + "learning_rate": 8.875117029590207e-06, + "loss": 1.2255, + "step": 91446 + }, + { + "epoch": 1.1, + "grad_norm": 9.5643679336434, + "learning_rate": 8.87453656122081e-06, + "loss": 1.322, + "step": 91449 + }, + { + "epoch": 1.1, + "grad_norm": 11.780778132991099, + "learning_rate": 8.873956096692219e-06, + "loss": 1.0149, + "step": 91452 + }, + { + "epoch": 1.1, + "grad_norm": 15.109482544356009, + "learning_rate": 8.873375636006405e-06, + "loss": 1.4171, + "step": 91455 + }, + { + "epoch": 1.1, + "grad_norm": 4.234407938378036, + "learning_rate": 8.872795179165355e-06, + "loss": 1.3568, + "step": 91458 + }, + { + "epoch": 1.1, + "grad_norm": 10.759373785725812, + "learning_rate": 8.872214726171049e-06, + "loss": 0.9183, + "step": 91461 + }, + { + "epoch": 1.1, + "grad_norm": 7.219760802974894, + "learning_rate": 8.871634277025466e-06, + "loss": 1.2491, + "step": 91464 + }, + { + "epoch": 1.1, + "grad_norm": 3.9676504324483575, + "learning_rate": 8.871053831730594e-06, + "loss": 0.894, + "step": 91467 + }, + { + "epoch": 1.1, + "grad_norm": 15.963597788277847, + "learning_rate": 8.870473390288407e-06, + "loss": 1.2275, + "step": 91470 + }, + { + "epoch": 1.1, + "grad_norm": 2.8328546666831773, + "learning_rate": 8.869892952700884e-06, + "loss": 1.1207, + "step": 91473 + }, + { + "epoch": 1.1, + "grad_norm": 9.794607803965416, + "learning_rate": 8.86931251897001e-06, + "loss": 1.3853, + "step": 91476 + }, + { + "epoch": 1.1, + "grad_norm": 7.414405018754385, + "learning_rate": 8.86873208909777e-06, + "loss": 1.4143, + "step": 91479 + }, + { + "epoch": 1.1, + "grad_norm": 10.776460043600158, + "learning_rate": 8.868151663086138e-06, + "loss": 0.9799, + "step": 91482 + }, + { + "epoch": 1.1, + "grad_norm": 9.71056305414876, + "learning_rate": 8.867571240937092e-06, + "loss": 1.3104, + "step": 91485 + }, + { + "epoch": 1.1, + "grad_norm": 3.623251247494822, + "learning_rate": 8.866990822652621e-06, + "loss": 1.1187, + "step": 91488 + }, + { + "epoch": 1.1, + "grad_norm": 6.213585785283053, + "learning_rate": 8.866410408234701e-06, + "loss": 1.1241, + "step": 91491 + }, + { + "epoch": 1.1, + "grad_norm": 35.70215513304299, + "learning_rate": 8.865829997685318e-06, + "loss": 1.1179, + "step": 91494 + }, + { + "epoch": 1.1, + "grad_norm": 4.0305076636383985, + "learning_rate": 8.865249591006444e-06, + "loss": 0.8684, + "step": 91497 + }, + { + "epoch": 1.1, + "grad_norm": 7.715923416978497, + "learning_rate": 8.864669188200067e-06, + "loss": 1.0442, + "step": 91500 + }, + { + "epoch": 1.1, + "grad_norm": 8.373037940632148, + "learning_rate": 8.864088789268162e-06, + "loss": 1.2377, + "step": 91503 + }, + { + "epoch": 1.1, + "grad_norm": 7.292627611199188, + "learning_rate": 8.86350839421272e-06, + "loss": 1.2442, + "step": 91506 + }, + { + "epoch": 1.1, + "grad_norm": 10.209001006748014, + "learning_rate": 8.862928003035706e-06, + "loss": 1.4084, + "step": 91509 + }, + { + "epoch": 1.1, + "grad_norm": 12.332572059417211, + "learning_rate": 8.862347615739114e-06, + "loss": 0.8123, + "step": 91512 + }, + { + "epoch": 1.1, + "grad_norm": 14.684908772664057, + "learning_rate": 8.861767232324915e-06, + "loss": 0.9678, + "step": 91515 + }, + { + "epoch": 1.1, + "grad_norm": 10.580757282339622, + "learning_rate": 8.861186852795098e-06, + "loss": 0.9783, + "step": 91518 + }, + { + "epoch": 1.1, + "grad_norm": 2.533894343925447, + "learning_rate": 8.860606477151643e-06, + "loss": 1.5809, + "step": 91521 + }, + { + "epoch": 1.1, + "grad_norm": 16.78732687068603, + "learning_rate": 8.860026105396524e-06, + "loss": 0.8415, + "step": 91524 + }, + { + "epoch": 1.1, + "grad_norm": 28.465483175941614, + "learning_rate": 8.859445737531725e-06, + "loss": 1.2229, + "step": 91527 + }, + { + "epoch": 1.1, + "grad_norm": 23.804950448825604, + "learning_rate": 8.858865373559226e-06, + "loss": 1.2388, + "step": 91530 + }, + { + "epoch": 1.1, + "grad_norm": 9.609764035055203, + "learning_rate": 8.858285013481012e-06, + "loss": 1.0685, + "step": 91533 + }, + { + "epoch": 1.1, + "grad_norm": 30.878388923797672, + "learning_rate": 8.85770465729906e-06, + "loss": 1.1448, + "step": 91536 + }, + { + "epoch": 1.1, + "grad_norm": 13.032186921095093, + "learning_rate": 8.857124305015347e-06, + "loss": 1.0458, + "step": 91539 + }, + { + "epoch": 1.1, + "grad_norm": 11.202665525852444, + "learning_rate": 8.85654395663186e-06, + "loss": 1.3825, + "step": 91542 + }, + { + "epoch": 1.1, + "grad_norm": 5.536195232280782, + "learning_rate": 8.855963612150574e-06, + "loss": 0.9853, + "step": 91545 + }, + { + "epoch": 1.1, + "grad_norm": 37.690760212532226, + "learning_rate": 8.855383271573476e-06, + "loss": 1.4288, + "step": 91548 + }, + { + "epoch": 1.1, + "grad_norm": 7.816630482888693, + "learning_rate": 8.854802934902538e-06, + "loss": 1.5069, + "step": 91551 + }, + { + "epoch": 1.1, + "grad_norm": 3.3291767725471533, + "learning_rate": 8.854222602139748e-06, + "loss": 1.3288, + "step": 91554 + }, + { + "epoch": 1.1, + "grad_norm": 10.053600045149292, + "learning_rate": 8.853642273287082e-06, + "loss": 0.9136, + "step": 91557 + }, + { + "epoch": 1.1, + "grad_norm": 162.81241560438409, + "learning_rate": 8.853061948346526e-06, + "loss": 1.0783, + "step": 91560 + }, + { + "epoch": 1.1, + "grad_norm": 11.455768917540967, + "learning_rate": 8.852481627320051e-06, + "loss": 1.2719, + "step": 91563 + }, + { + "epoch": 1.1, + "grad_norm": 9.783585763002376, + "learning_rate": 8.851901310209646e-06, + "loss": 1.2557, + "step": 91566 + }, + { + "epoch": 1.1, + "grad_norm": 6.143718960779315, + "learning_rate": 8.851320997017286e-06, + "loss": 1.2672, + "step": 91569 + }, + { + "epoch": 1.1, + "grad_norm": 5.984838892204617, + "learning_rate": 8.850740687744953e-06, + "loss": 1.3282, + "step": 91572 + }, + { + "epoch": 1.1, + "grad_norm": 120.6633900671181, + "learning_rate": 8.850160382394633e-06, + "loss": 1.1913, + "step": 91575 + }, + { + "epoch": 1.1, + "grad_norm": 18.522587116126154, + "learning_rate": 8.849580080968299e-06, + "loss": 1.2986, + "step": 91578 + }, + { + "epoch": 1.1, + "grad_norm": 141.97672100463762, + "learning_rate": 8.848999783467932e-06, + "loss": 1.1487, + "step": 91581 + }, + { + "epoch": 1.1, + "grad_norm": 7.298330956736403, + "learning_rate": 8.848419489895515e-06, + "loss": 1.3186, + "step": 91584 + }, + { + "epoch": 1.1, + "grad_norm": 15.643306930382295, + "learning_rate": 8.84783920025303e-06, + "loss": 1.3105, + "step": 91587 + }, + { + "epoch": 1.1, + "grad_norm": 15.91798732972055, + "learning_rate": 8.847258914542453e-06, + "loss": 0.9303, + "step": 91590 + }, + { + "epoch": 1.1, + "grad_norm": 6.2403001441557535, + "learning_rate": 8.846678632765764e-06, + "loss": 1.2473, + "step": 91593 + }, + { + "epoch": 1.1, + "grad_norm": 11.616744597201848, + "learning_rate": 8.846098354924948e-06, + "loss": 0.9965, + "step": 91596 + }, + { + "epoch": 1.1, + "grad_norm": 15.727580784769438, + "learning_rate": 8.84551808102198e-06, + "loss": 0.8877, + "step": 91599 + }, + { + "epoch": 1.1, + "grad_norm": 29.596751874901063, + "learning_rate": 8.844937811058848e-06, + "loss": 1.2415, + "step": 91602 + }, + { + "epoch": 1.1, + "grad_norm": 8.218608981247488, + "learning_rate": 8.844357545037523e-06, + "loss": 1.3256, + "step": 91605 + }, + { + "epoch": 1.1, + "grad_norm": 13.318008598526943, + "learning_rate": 8.843777282959991e-06, + "loss": 1.2279, + "step": 91608 + }, + { + "epoch": 1.1, + "grad_norm": 10.575055651552182, + "learning_rate": 8.84319702482823e-06, + "loss": 1.5992, + "step": 91611 + }, + { + "epoch": 1.1, + "grad_norm": 23.826122517649626, + "learning_rate": 8.842616770644224e-06, + "loss": 1.3161, + "step": 91614 + }, + { + "epoch": 1.1, + "grad_norm": 16.992198799432394, + "learning_rate": 8.842036520409944e-06, + "loss": 1.0765, + "step": 91617 + }, + { + "epoch": 1.1, + "grad_norm": 20.96762714583294, + "learning_rate": 8.841456274127381e-06, + "loss": 1.3075, + "step": 91620 + }, + { + "epoch": 1.1, + "grad_norm": 13.182390733139712, + "learning_rate": 8.840876031798508e-06, + "loss": 1.1336, + "step": 91623 + }, + { + "epoch": 1.1, + "grad_norm": 12.384484514916274, + "learning_rate": 8.840295793425306e-06, + "loss": 0.9243, + "step": 91626 + }, + { + "epoch": 1.1, + "grad_norm": 11.484317700772467, + "learning_rate": 8.839715559009763e-06, + "loss": 1.0152, + "step": 91629 + }, + { + "epoch": 1.1, + "grad_norm": 8.648647442956374, + "learning_rate": 8.83913532855385e-06, + "loss": 1.5718, + "step": 91632 + }, + { + "epoch": 1.1, + "grad_norm": 39.61944666403946, + "learning_rate": 8.838555102059548e-06, + "loss": 1.2301, + "step": 91635 + }, + { + "epoch": 1.1, + "grad_norm": 5.665431042052733, + "learning_rate": 8.837974879528839e-06, + "loss": 1.1239, + "step": 91638 + }, + { + "epoch": 1.1, + "grad_norm": 7.822846774418267, + "learning_rate": 8.837394660963707e-06, + "loss": 1.1225, + "step": 91641 + }, + { + "epoch": 1.1, + "grad_norm": 5.626078351932479, + "learning_rate": 8.836814446366124e-06, + "loss": 1.3559, + "step": 91644 + }, + { + "epoch": 1.1, + "grad_norm": 9.258973911845288, + "learning_rate": 8.836234235738075e-06, + "loss": 0.8473, + "step": 91647 + }, + { + "epoch": 1.1, + "grad_norm": 18.194930980122248, + "learning_rate": 8.835654029081541e-06, + "loss": 1.1677, + "step": 91650 + }, + { + "epoch": 1.1, + "grad_norm": 47.466544486910465, + "learning_rate": 8.835073826398499e-06, + "loss": 1.4342, + "step": 91653 + }, + { + "epoch": 1.1, + "grad_norm": 18.598401817798543, + "learning_rate": 8.834493627690934e-06, + "loss": 1.3963, + "step": 91656 + }, + { + "epoch": 1.1, + "grad_norm": 23.71425496152869, + "learning_rate": 8.833913432960817e-06, + "loss": 1.125, + "step": 91659 + }, + { + "epoch": 1.1, + "grad_norm": 4.835567116084661, + "learning_rate": 8.833333242210138e-06, + "loss": 1.6088, + "step": 91662 + }, + { + "epoch": 1.1, + "grad_norm": 4.138349649964422, + "learning_rate": 8.832753055440869e-06, + "loss": 1.1536, + "step": 91665 + }, + { + "epoch": 1.1, + "grad_norm": 11.673560639503279, + "learning_rate": 8.832172872654996e-06, + "loss": 1.4337, + "step": 91668 + }, + { + "epoch": 1.1, + "grad_norm": 14.429310721223542, + "learning_rate": 8.831592693854494e-06, + "loss": 1.2317, + "step": 91671 + }, + { + "epoch": 1.1, + "grad_norm": 7.2826886473620345, + "learning_rate": 8.831012519041346e-06, + "loss": 1.1594, + "step": 91674 + }, + { + "epoch": 1.1, + "grad_norm": 7.082575924170931, + "learning_rate": 8.83043234821753e-06, + "loss": 1.252, + "step": 91677 + }, + { + "epoch": 1.1, + "grad_norm": 2.2914281039428634, + "learning_rate": 8.82985218138503e-06, + "loss": 1.1934, + "step": 91680 + }, + { + "epoch": 1.1, + "grad_norm": 10.84821650978113, + "learning_rate": 8.829272018545822e-06, + "loss": 1.2187, + "step": 91683 + }, + { + "epoch": 1.1, + "grad_norm": 11.515269486347192, + "learning_rate": 8.828691859701888e-06, + "loss": 0.9752, + "step": 91686 + }, + { + "epoch": 1.1, + "grad_norm": 7.014221113132716, + "learning_rate": 8.828111704855204e-06, + "loss": 1.0963, + "step": 91689 + }, + { + "epoch": 1.1, + "grad_norm": 3.44194430052588, + "learning_rate": 8.827531554007755e-06, + "loss": 1.3068, + "step": 91692 + }, + { + "epoch": 1.1, + "grad_norm": 11.083248455785611, + "learning_rate": 8.826951407161519e-06, + "loss": 1.3394, + "step": 91695 + }, + { + "epoch": 1.1, + "grad_norm": 5.552298386845284, + "learning_rate": 8.826371264318472e-06, + "loss": 1.2101, + "step": 91698 + }, + { + "epoch": 1.1, + "grad_norm": 3.36384054860583, + "learning_rate": 8.825791125480599e-06, + "loss": 1.116, + "step": 91701 + }, + { + "epoch": 1.1, + "grad_norm": 4.18833314883953, + "learning_rate": 8.825210990649878e-06, + "loss": 1.0387, + "step": 91704 + }, + { + "epoch": 1.1, + "grad_norm": 3.6994712629019, + "learning_rate": 8.824630859828287e-06, + "loss": 1.0361, + "step": 91707 + }, + { + "epoch": 1.1, + "grad_norm": 8.752573271263593, + "learning_rate": 8.824050733017812e-06, + "loss": 1.3804, + "step": 91710 + }, + { + "epoch": 1.1, + "grad_norm": 2.5124032111169297, + "learning_rate": 8.823470610220424e-06, + "loss": 1.3476, + "step": 91713 + }, + { + "epoch": 1.1, + "grad_norm": 4.917260041877356, + "learning_rate": 8.822890491438108e-06, + "loss": 1.1264, + "step": 91716 + }, + { + "epoch": 1.1, + "grad_norm": 15.0211807282437, + "learning_rate": 8.822310376672842e-06, + "loss": 1.0089, + "step": 91719 + }, + { + "epoch": 1.1, + "grad_norm": 17.593494539628907, + "learning_rate": 8.821730265926611e-06, + "loss": 1.5864, + "step": 91722 + }, + { + "epoch": 1.1, + "grad_norm": 8.0028061230193, + "learning_rate": 8.821150159201385e-06, + "loss": 1.4112, + "step": 91725 + }, + { + "epoch": 1.1, + "grad_norm": 10.32702005485786, + "learning_rate": 8.820570056499152e-06, + "loss": 1.1472, + "step": 91728 + }, + { + "epoch": 1.1, + "grad_norm": 17.279671898089035, + "learning_rate": 8.819989957821886e-06, + "loss": 1.258, + "step": 91731 + }, + { + "epoch": 1.1, + "grad_norm": 4.544109753813919, + "learning_rate": 8.819409863171571e-06, + "loss": 1.0522, + "step": 91734 + }, + { + "epoch": 1.1, + "grad_norm": 56.60677672291436, + "learning_rate": 8.818829772550186e-06, + "loss": 1.3556, + "step": 91737 + }, + { + "epoch": 1.1, + "grad_norm": 19.31698725396447, + "learning_rate": 8.818249685959707e-06, + "loss": 1.0071, + "step": 91740 + }, + { + "epoch": 1.1, + "grad_norm": 4.650701256744056, + "learning_rate": 8.817669603402116e-06, + "loss": 1.4317, + "step": 91743 + }, + { + "epoch": 1.1, + "grad_norm": 11.909180721646873, + "learning_rate": 8.817089524879396e-06, + "loss": 1.3094, + "step": 91746 + }, + { + "epoch": 1.1, + "grad_norm": 8.806781678993465, + "learning_rate": 8.816509450393523e-06, + "loss": 1.1286, + "step": 91749 + }, + { + "epoch": 1.1, + "grad_norm": 4.441310752709155, + "learning_rate": 8.815929379946473e-06, + "loss": 1.1341, + "step": 91752 + }, + { + "epoch": 1.1, + "grad_norm": 3.952591193762816, + "learning_rate": 8.81534931354023e-06, + "loss": 1.3355, + "step": 91755 + }, + { + "epoch": 1.1, + "grad_norm": 6.522662475288598, + "learning_rate": 8.814769251176776e-06, + "loss": 1.2378, + "step": 91758 + }, + { + "epoch": 1.1, + "grad_norm": 11.078737866303728, + "learning_rate": 8.814189192858085e-06, + "loss": 0.9997, + "step": 91761 + }, + { + "epoch": 1.1, + "grad_norm": 8.641486529531047, + "learning_rate": 8.813609138586145e-06, + "loss": 1.2325, + "step": 91764 + }, + { + "epoch": 1.1, + "grad_norm": 13.947779190586267, + "learning_rate": 8.813029088362924e-06, + "loss": 1.274, + "step": 91767 + }, + { + "epoch": 1.1, + "grad_norm": 4.125720082193439, + "learning_rate": 8.812449042190407e-06, + "loss": 1.1853, + "step": 91770 + }, + { + "epoch": 1.1, + "grad_norm": 8.751643956670412, + "learning_rate": 8.811869000070574e-06, + "loss": 1.3995, + "step": 91773 + }, + { + "epoch": 1.1, + "grad_norm": 11.151211329654343, + "learning_rate": 8.811288962005409e-06, + "loss": 1.1601, + "step": 91776 + }, + { + "epoch": 1.1, + "grad_norm": 6.295070221971297, + "learning_rate": 8.810708927996883e-06, + "loss": 1.2958, + "step": 91779 + }, + { + "epoch": 1.1, + "grad_norm": 12.152093353317833, + "learning_rate": 8.810128898046977e-06, + "loss": 1.365, + "step": 91782 + }, + { + "epoch": 1.1, + "grad_norm": 7.4297085328084975, + "learning_rate": 8.809548872157675e-06, + "loss": 1.2748, + "step": 91785 + }, + { + "epoch": 1.1, + "grad_norm": 7.421913316671481, + "learning_rate": 8.808968850330952e-06, + "loss": 1.1815, + "step": 91788 + }, + { + "epoch": 1.1, + "grad_norm": 8.869988464254186, + "learning_rate": 8.808388832568793e-06, + "loss": 1.0854, + "step": 91791 + }, + { + "epoch": 1.1, + "grad_norm": 11.755484871428676, + "learning_rate": 8.807808818873172e-06, + "loss": 0.8798, + "step": 91794 + }, + { + "epoch": 1.1, + "grad_norm": 20.80550647119529, + "learning_rate": 8.807228809246066e-06, + "loss": 1.2406, + "step": 91797 + }, + { + "epoch": 1.1, + "grad_norm": 9.029046109144673, + "learning_rate": 8.806648803689464e-06, + "loss": 1.3612, + "step": 91800 + }, + { + "epoch": 1.1, + "grad_norm": 35.87119153793572, + "learning_rate": 8.806068802205339e-06, + "loss": 1.3906, + "step": 91803 + }, + { + "epoch": 1.1, + "grad_norm": 3.512578184135437, + "learning_rate": 8.805488804795669e-06, + "loss": 1.3374, + "step": 91806 + }, + { + "epoch": 1.1, + "grad_norm": 10.49778827298209, + "learning_rate": 8.804908811462435e-06, + "loss": 1.2034, + "step": 91809 + }, + { + "epoch": 1.1, + "grad_norm": 9.734781941235394, + "learning_rate": 8.804328822207619e-06, + "loss": 1.115, + "step": 91812 + }, + { + "epoch": 1.1, + "grad_norm": 15.178078641471352, + "learning_rate": 8.803748837033194e-06, + "loss": 1.616, + "step": 91815 + }, + { + "epoch": 1.1, + "grad_norm": 8.596877059545527, + "learning_rate": 8.80316885594115e-06, + "loss": 1.0812, + "step": 91818 + }, + { + "epoch": 1.1, + "grad_norm": 19.304225922465143, + "learning_rate": 8.802588878933456e-06, + "loss": 1.3868, + "step": 91821 + }, + { + "epoch": 1.1, + "grad_norm": 8.936160468922905, + "learning_rate": 8.802008906012094e-06, + "loss": 1.062, + "step": 91824 + }, + { + "epoch": 1.1, + "grad_norm": 10.810737108929022, + "learning_rate": 8.801428937179044e-06, + "loss": 1.0375, + "step": 91827 + }, + { + "epoch": 1.1, + "grad_norm": 16.347395652823806, + "learning_rate": 8.800848972436287e-06, + "loss": 1.0516, + "step": 91830 + }, + { + "epoch": 1.1, + "grad_norm": 8.257527359888382, + "learning_rate": 8.8002690117858e-06, + "loss": 1.2992, + "step": 91833 + }, + { + "epoch": 1.1, + "grad_norm": 51.908906307612824, + "learning_rate": 8.799689055229562e-06, + "loss": 0.9949, + "step": 91836 + }, + { + "epoch": 1.1, + "grad_norm": 16.918234605806038, + "learning_rate": 8.799109102769552e-06, + "loss": 1.2618, + "step": 91839 + }, + { + "epoch": 1.1, + "grad_norm": 8.525629341100533, + "learning_rate": 8.798529154407752e-06, + "loss": 1.2636, + "step": 91842 + }, + { + "epoch": 1.1, + "grad_norm": 9.806428304063635, + "learning_rate": 8.797949210146142e-06, + "loss": 1.3013, + "step": 91845 + }, + { + "epoch": 1.1, + "grad_norm": 31.682637973912765, + "learning_rate": 8.797369269986693e-06, + "loss": 1.5737, + "step": 91848 + }, + { + "epoch": 1.1, + "grad_norm": 7.321853028387233, + "learning_rate": 8.796789333931391e-06, + "loss": 1.083, + "step": 91851 + }, + { + "epoch": 1.1, + "grad_norm": 13.904447714387413, + "learning_rate": 8.796209401982215e-06, + "loss": 1.1317, + "step": 91854 + }, + { + "epoch": 1.1, + "grad_norm": 5.734708228392484, + "learning_rate": 8.795629474141145e-06, + "loss": 1.1856, + "step": 91857 + }, + { + "epoch": 1.1, + "grad_norm": 2.147227601691294, + "learning_rate": 8.795049550410151e-06, + "loss": 1.1867, + "step": 91860 + }, + { + "epoch": 1.1, + "grad_norm": 23.3607480760572, + "learning_rate": 8.794469630791222e-06, + "loss": 1.3096, + "step": 91863 + }, + { + "epoch": 1.1, + "grad_norm": 13.244279003382582, + "learning_rate": 8.793889715286334e-06, + "loss": 1.0125, + "step": 91866 + }, + { + "epoch": 1.1, + "grad_norm": 4.303917025089799, + "learning_rate": 8.793309803897466e-06, + "loss": 1.1823, + "step": 91869 + }, + { + "epoch": 1.1, + "grad_norm": 8.46095160725044, + "learning_rate": 8.7927298966266e-06, + "loss": 1.005, + "step": 91872 + }, + { + "epoch": 1.1, + "grad_norm": 5.4336843293154775, + "learning_rate": 8.79214999347571e-06, + "loss": 1.2256, + "step": 91875 + }, + { + "epoch": 1.1, + "grad_norm": 2.551910543564114, + "learning_rate": 8.791570094446776e-06, + "loss": 1.4734, + "step": 91878 + }, + { + "epoch": 1.1, + "grad_norm": 19.259446121980353, + "learning_rate": 8.790990199541776e-06, + "loss": 1.1042, + "step": 91881 + }, + { + "epoch": 1.1, + "grad_norm": 6.772571905817864, + "learning_rate": 8.790410308762695e-06, + "loss": 1.1026, + "step": 91884 + }, + { + "epoch": 1.1, + "grad_norm": 16.45225659298318, + "learning_rate": 8.789830422111509e-06, + "loss": 1.1689, + "step": 91887 + }, + { + "epoch": 1.1, + "grad_norm": 23.390430940695563, + "learning_rate": 8.78925053959019e-06, + "loss": 1.0805, + "step": 91890 + }, + { + "epoch": 1.1, + "grad_norm": 25.331127324933945, + "learning_rate": 8.788670661200724e-06, + "loss": 1.0315, + "step": 91893 + }, + { + "epoch": 1.11, + "grad_norm": 8.183549200382084, + "learning_rate": 8.788090786945091e-06, + "loss": 1.2935, + "step": 91896 + }, + { + "epoch": 1.11, + "grad_norm": 8.908346233182563, + "learning_rate": 8.787510916825271e-06, + "loss": 1.2065, + "step": 91899 + }, + { + "epoch": 1.11, + "grad_norm": 11.758353953150745, + "learning_rate": 8.786931050843234e-06, + "loss": 0.9718, + "step": 91902 + }, + { + "epoch": 1.11, + "grad_norm": 12.827370402233253, + "learning_rate": 8.786351189000966e-06, + "loss": 1.0202, + "step": 91905 + }, + { + "epoch": 1.11, + "grad_norm": 3.4499192658466757, + "learning_rate": 8.785771331300444e-06, + "loss": 1.1288, + "step": 91908 + }, + { + "epoch": 1.11, + "grad_norm": 12.176160192012745, + "learning_rate": 8.78519147774365e-06, + "loss": 1.075, + "step": 91911 + }, + { + "epoch": 1.11, + "grad_norm": 7.321186655913857, + "learning_rate": 8.784611628332556e-06, + "loss": 1.3892, + "step": 91914 + }, + { + "epoch": 1.11, + "grad_norm": 8.256332395311611, + "learning_rate": 8.784031783069147e-06, + "loss": 1.0762, + "step": 91917 + }, + { + "epoch": 1.11, + "grad_norm": 24.126808163703725, + "learning_rate": 8.783451941955397e-06, + "loss": 1.3015, + "step": 91920 + }, + { + "epoch": 1.11, + "grad_norm": 8.969309917483573, + "learning_rate": 8.782872104993288e-06, + "loss": 1.2863, + "step": 91923 + }, + { + "epoch": 1.11, + "grad_norm": 12.597401030194163, + "learning_rate": 8.782292272184802e-06, + "loss": 1.5449, + "step": 91926 + }, + { + "epoch": 1.11, + "grad_norm": 32.95400196823069, + "learning_rate": 8.781712443531912e-06, + "loss": 1.1139, + "step": 91929 + }, + { + "epoch": 1.11, + "grad_norm": 11.165529678860972, + "learning_rate": 8.781132619036595e-06, + "loss": 0.8355, + "step": 91932 + }, + { + "epoch": 1.11, + "grad_norm": 15.75999600605974, + "learning_rate": 8.780552798700835e-06, + "loss": 1.5281, + "step": 91935 + }, + { + "epoch": 1.11, + "grad_norm": 4.348975329098861, + "learning_rate": 8.779972982526612e-06, + "loss": 1.2614, + "step": 91938 + }, + { + "epoch": 1.11, + "grad_norm": 18.300896262248486, + "learning_rate": 8.779393170515901e-06, + "loss": 1.0443, + "step": 91941 + }, + { + "epoch": 1.11, + "grad_norm": 20.858549235872022, + "learning_rate": 8.778813362670679e-06, + "loss": 0.9752, + "step": 91944 + }, + { + "epoch": 1.11, + "grad_norm": 12.22624214800294, + "learning_rate": 8.778233558992926e-06, + "loss": 1.1372, + "step": 91947 + }, + { + "epoch": 1.11, + "grad_norm": 69.80192503386863, + "learning_rate": 8.777653759484625e-06, + "loss": 1.0402, + "step": 91950 + }, + { + "epoch": 1.11, + "grad_norm": 4.133954997467751, + "learning_rate": 8.777073964147753e-06, + "loss": 1.3146, + "step": 91953 + }, + { + "epoch": 1.11, + "grad_norm": 11.528778629855774, + "learning_rate": 8.776494172984282e-06, + "loss": 1.201, + "step": 91956 + }, + { + "epoch": 1.11, + "grad_norm": 5.44637008297467, + "learning_rate": 8.775914385996199e-06, + "loss": 1.1436, + "step": 91959 + }, + { + "epoch": 1.11, + "grad_norm": 6.002751763838709, + "learning_rate": 8.775334603185477e-06, + "loss": 1.1138, + "step": 91962 + }, + { + "epoch": 1.11, + "grad_norm": 10.075145117581382, + "learning_rate": 8.774754824554101e-06, + "loss": 0.9778, + "step": 91965 + }, + { + "epoch": 1.11, + "grad_norm": 52.79391047170582, + "learning_rate": 8.774175050104042e-06, + "loss": 1.2152, + "step": 91968 + }, + { + "epoch": 1.11, + "grad_norm": 4.825994229112197, + "learning_rate": 8.773595279837283e-06, + "loss": 1.2592, + "step": 91971 + }, + { + "epoch": 1.11, + "grad_norm": 11.014554020436506, + "learning_rate": 8.773015513755801e-06, + "loss": 1.0474, + "step": 91974 + }, + { + "epoch": 1.11, + "grad_norm": 30.203209339493828, + "learning_rate": 8.772435751861574e-06, + "loss": 0.978, + "step": 91977 + }, + { + "epoch": 1.11, + "grad_norm": 24.308009492668262, + "learning_rate": 8.771855994156587e-06, + "loss": 1.2282, + "step": 91980 + }, + { + "epoch": 1.11, + "grad_norm": 70.49302686873993, + "learning_rate": 8.771276240642809e-06, + "loss": 1.2953, + "step": 91983 + }, + { + "epoch": 1.11, + "grad_norm": 17.04566596651335, + "learning_rate": 8.77069649132222e-06, + "loss": 1.4119, + "step": 91986 + }, + { + "epoch": 1.11, + "grad_norm": 41.185018293681615, + "learning_rate": 8.770116746196804e-06, + "loss": 1.3478, + "step": 91989 + }, + { + "epoch": 1.11, + "grad_norm": 6.174993464821019, + "learning_rate": 8.769537005268537e-06, + "loss": 1.184, + "step": 91992 + }, + { + "epoch": 1.11, + "grad_norm": 12.698475839399949, + "learning_rate": 8.768957268539398e-06, + "loss": 1.0419, + "step": 91995 + }, + { + "epoch": 1.11, + "grad_norm": 6.069123059676869, + "learning_rate": 8.768377536011363e-06, + "loss": 0.9397, + "step": 91998 + }, + { + "epoch": 1.11, + "grad_norm": 13.230046257720844, + "learning_rate": 8.767797807686411e-06, + "loss": 1.3536, + "step": 92001 + }, + { + "epoch": 1.11, + "grad_norm": 4.024142351306801, + "learning_rate": 8.767218083566521e-06, + "loss": 1.4188, + "step": 92004 + }, + { + "epoch": 1.11, + "grad_norm": 3.311491613448634, + "learning_rate": 8.766638363653676e-06, + "loss": 1.1449, + "step": 92007 + }, + { + "epoch": 1.11, + "grad_norm": 8.346512451231604, + "learning_rate": 8.766058647949844e-06, + "loss": 1.198, + "step": 92010 + }, + { + "epoch": 1.11, + "grad_norm": 21.178351245328265, + "learning_rate": 8.765478936457013e-06, + "loss": 1.1677, + "step": 92013 + }, + { + "epoch": 1.11, + "grad_norm": 13.08346507473269, + "learning_rate": 8.764899229177155e-06, + "loss": 1.3752, + "step": 92016 + }, + { + "epoch": 1.11, + "grad_norm": 5.306820989020207, + "learning_rate": 8.764319526112256e-06, + "loss": 1.1131, + "step": 92019 + }, + { + "epoch": 1.11, + "grad_norm": 17.072712686934842, + "learning_rate": 8.763739827264283e-06, + "loss": 1.0119, + "step": 92022 + }, + { + "epoch": 1.11, + "grad_norm": 6.303446864505051, + "learning_rate": 8.763160132635226e-06, + "loss": 1.3897, + "step": 92025 + }, + { + "epoch": 1.11, + "grad_norm": 3.6477847931774336, + "learning_rate": 8.762580442227054e-06, + "loss": 1.3494, + "step": 92028 + }, + { + "epoch": 1.11, + "grad_norm": 8.820418302323423, + "learning_rate": 8.76200075604175e-06, + "loss": 1.2441, + "step": 92031 + }, + { + "epoch": 1.11, + "grad_norm": 4.655152903607065, + "learning_rate": 8.761421074081295e-06, + "loss": 0.9812, + "step": 92034 + }, + { + "epoch": 1.11, + "grad_norm": 9.471505426272888, + "learning_rate": 8.760841396347661e-06, + "loss": 1.0871, + "step": 92037 + }, + { + "epoch": 1.11, + "grad_norm": 39.986772035329516, + "learning_rate": 8.760261722842828e-06, + "loss": 1.3371, + "step": 92040 + }, + { + "epoch": 1.11, + "grad_norm": 8.69800125617804, + "learning_rate": 8.759682053568773e-06, + "loss": 0.8963, + "step": 92043 + }, + { + "epoch": 1.11, + "grad_norm": 230.11201789267244, + "learning_rate": 8.759102388527485e-06, + "loss": 1.0119, + "step": 92046 + }, + { + "epoch": 1.11, + "grad_norm": 15.083759193042036, + "learning_rate": 8.758522727720928e-06, + "loss": 1.0217, + "step": 92049 + }, + { + "epoch": 1.11, + "grad_norm": 22.48444777500834, + "learning_rate": 8.757943071151085e-06, + "loss": 1.3019, + "step": 92052 + }, + { + "epoch": 1.11, + "grad_norm": 4.469313685371669, + "learning_rate": 8.757363418819936e-06, + "loss": 1.3156, + "step": 92055 + }, + { + "epoch": 1.11, + "grad_norm": 7.8829824791131, + "learning_rate": 8.756783770729457e-06, + "loss": 0.961, + "step": 92058 + }, + { + "epoch": 1.11, + "grad_norm": 5.01268524211347, + "learning_rate": 8.756204126881631e-06, + "loss": 1.1156, + "step": 92061 + }, + { + "epoch": 1.11, + "grad_norm": 8.529631358347993, + "learning_rate": 8.755624487278427e-06, + "loss": 1.1277, + "step": 92064 + }, + { + "epoch": 1.11, + "grad_norm": 15.970175731747055, + "learning_rate": 8.755044851921834e-06, + "loss": 1.3981, + "step": 92067 + }, + { + "epoch": 1.11, + "grad_norm": 36.09195869159173, + "learning_rate": 8.754465220813818e-06, + "loss": 0.9059, + "step": 92070 + }, + { + "epoch": 1.11, + "grad_norm": 8.247886468555274, + "learning_rate": 8.75388559395637e-06, + "loss": 1.2187, + "step": 92073 + }, + { + "epoch": 1.11, + "grad_norm": 13.483855682406025, + "learning_rate": 8.753305971351458e-06, + "loss": 1.3678, + "step": 92076 + }, + { + "epoch": 1.11, + "grad_norm": 18.62535432645153, + "learning_rate": 8.752726353001065e-06, + "loss": 1.1411, + "step": 92079 + }, + { + "epoch": 1.11, + "grad_norm": 39.158252082434096, + "learning_rate": 8.752146738907165e-06, + "loss": 1.3887, + "step": 92082 + }, + { + "epoch": 1.11, + "grad_norm": 8.997552172506168, + "learning_rate": 8.75156712907174e-06, + "loss": 1.5625, + "step": 92085 + }, + { + "epoch": 1.11, + "grad_norm": 15.203288032347785, + "learning_rate": 8.75098752349677e-06, + "loss": 1.1635, + "step": 92088 + }, + { + "epoch": 1.11, + "grad_norm": 11.667989438768222, + "learning_rate": 8.750407922184227e-06, + "loss": 1.2464, + "step": 92091 + }, + { + "epoch": 1.11, + "grad_norm": 12.761211996479885, + "learning_rate": 8.749828325136091e-06, + "loss": 1.0435, + "step": 92094 + }, + { + "epoch": 1.11, + "grad_norm": 6.735272779553814, + "learning_rate": 8.749248732354341e-06, + "loss": 1.2118, + "step": 92097 + }, + { + "epoch": 1.11, + "grad_norm": 46.47575116301996, + "learning_rate": 8.748669143840957e-06, + "loss": 1.5428, + "step": 92100 + }, + { + "epoch": 1.11, + "grad_norm": 15.101266858735212, + "learning_rate": 8.748089559597912e-06, + "loss": 1.1266, + "step": 92103 + }, + { + "epoch": 1.11, + "grad_norm": 10.478015715982096, + "learning_rate": 8.747509979627185e-06, + "loss": 0.9988, + "step": 92106 + }, + { + "epoch": 1.11, + "grad_norm": 10.051128180725886, + "learning_rate": 8.746930403930757e-06, + "loss": 1.3047, + "step": 92109 + }, + { + "epoch": 1.11, + "grad_norm": 7.241474451264488, + "learning_rate": 8.746350832510604e-06, + "loss": 1.2888, + "step": 92112 + }, + { + "epoch": 1.11, + "grad_norm": 2.5256650547267205, + "learning_rate": 8.745771265368708e-06, + "loss": 1.0349, + "step": 92115 + }, + { + "epoch": 1.11, + "grad_norm": 17.002340900585054, + "learning_rate": 8.745191702507037e-06, + "loss": 1.2544, + "step": 92118 + }, + { + "epoch": 1.11, + "grad_norm": 8.156277212841227, + "learning_rate": 8.744612143927578e-06, + "loss": 1.039, + "step": 92121 + }, + { + "epoch": 1.11, + "grad_norm": 15.350796604845222, + "learning_rate": 8.744032589632303e-06, + "loss": 1.3599, + "step": 92124 + }, + { + "epoch": 1.11, + "grad_norm": 13.762247119632663, + "learning_rate": 8.743453039623197e-06, + "loss": 1.5549, + "step": 92127 + }, + { + "epoch": 1.11, + "grad_norm": 11.018886106790411, + "learning_rate": 8.742873493902228e-06, + "loss": 1.2533, + "step": 92130 + }, + { + "epoch": 1.11, + "grad_norm": 10.200462195915808, + "learning_rate": 8.742293952471382e-06, + "loss": 1.207, + "step": 92133 + }, + { + "epoch": 1.11, + "grad_norm": 17.481438564064778, + "learning_rate": 8.74171441533263e-06, + "loss": 1.2839, + "step": 92136 + }, + { + "epoch": 1.11, + "grad_norm": 14.969761251850054, + "learning_rate": 8.741134882487957e-06, + "loss": 1.1995, + "step": 92139 + }, + { + "epoch": 1.11, + "grad_norm": 12.570496187498032, + "learning_rate": 8.740555353939337e-06, + "loss": 1.3418, + "step": 92142 + }, + { + "epoch": 1.11, + "grad_norm": 11.592554353805989, + "learning_rate": 8.739975829688748e-06, + "loss": 1.0423, + "step": 92145 + }, + { + "epoch": 1.11, + "grad_norm": 3.1936502912248237, + "learning_rate": 8.739396309738167e-06, + "loss": 1.4751, + "step": 92148 + }, + { + "epoch": 1.11, + "grad_norm": 11.814793242083791, + "learning_rate": 8.738816794089572e-06, + "loss": 1.2222, + "step": 92151 + }, + { + "epoch": 1.11, + "grad_norm": 24.44494352332181, + "learning_rate": 8.738237282744944e-06, + "loss": 0.7319, + "step": 92154 + }, + { + "epoch": 1.11, + "grad_norm": 6.67995387342365, + "learning_rate": 8.737657775706253e-06, + "loss": 0.9458, + "step": 92157 + }, + { + "epoch": 1.11, + "grad_norm": 11.413499979031027, + "learning_rate": 8.737078272975483e-06, + "loss": 1.0873, + "step": 92160 + }, + { + "epoch": 1.11, + "grad_norm": 11.122538318758899, + "learning_rate": 8.736498774554611e-06, + "loss": 1.0876, + "step": 92163 + }, + { + "epoch": 1.11, + "grad_norm": 11.581485384807992, + "learning_rate": 8.73591928044561e-06, + "loss": 1.3388, + "step": 92166 + }, + { + "epoch": 1.11, + "grad_norm": 5.070868226896694, + "learning_rate": 8.735339790650468e-06, + "loss": 1.3731, + "step": 92169 + }, + { + "epoch": 1.11, + "grad_norm": 4.3548061354258, + "learning_rate": 8.73476030517115e-06, + "loss": 1.1671, + "step": 92172 + }, + { + "epoch": 1.11, + "grad_norm": 8.427850816364877, + "learning_rate": 8.734180824009642e-06, + "loss": 1.4011, + "step": 92175 + }, + { + "epoch": 1.11, + "grad_norm": 5.7134270938609655, + "learning_rate": 8.733601347167915e-06, + "loss": 1.0203, + "step": 92178 + }, + { + "epoch": 1.11, + "grad_norm": 4.26954651460153, + "learning_rate": 8.733021874647956e-06, + "loss": 1.1196, + "step": 92181 + }, + { + "epoch": 1.11, + "grad_norm": 26.725648926798822, + "learning_rate": 8.732442406451734e-06, + "loss": 1.5319, + "step": 92184 + }, + { + "epoch": 1.11, + "grad_norm": 15.10488715342064, + "learning_rate": 8.731862942581231e-06, + "loss": 1.0625, + "step": 92187 + }, + { + "epoch": 1.11, + "grad_norm": 9.06523179798364, + "learning_rate": 8.73128348303842e-06, + "loss": 1.3782, + "step": 92190 + }, + { + "epoch": 1.11, + "grad_norm": 17.946424966257375, + "learning_rate": 8.730704027825283e-06, + "loss": 1.1096, + "step": 92193 + }, + { + "epoch": 1.11, + "grad_norm": 7.6982505056128785, + "learning_rate": 8.730124576943799e-06, + "loss": 1.2569, + "step": 92196 + }, + { + "epoch": 1.11, + "grad_norm": 22.368468038718586, + "learning_rate": 8.72954513039594e-06, + "loss": 0.9774, + "step": 92199 + }, + { + "epoch": 1.11, + "grad_norm": 9.9081746574389, + "learning_rate": 8.728965688183683e-06, + "loss": 1.1202, + "step": 92202 + }, + { + "epoch": 1.11, + "grad_norm": 7.330761954741646, + "learning_rate": 8.728386250309011e-06, + "loss": 1.0536, + "step": 92205 + }, + { + "epoch": 1.11, + "grad_norm": 10.16835246054572, + "learning_rate": 8.727806816773903e-06, + "loss": 1.1827, + "step": 92208 + }, + { + "epoch": 1.11, + "grad_norm": 6.499273714462334, + "learning_rate": 8.727227387580326e-06, + "loss": 1.1844, + "step": 92211 + }, + { + "epoch": 1.11, + "grad_norm": 10.801763815430302, + "learning_rate": 8.726647962730262e-06, + "loss": 1.3587, + "step": 92214 + }, + { + "epoch": 1.11, + "grad_norm": 52.43865477239944, + "learning_rate": 8.726068542225693e-06, + "loss": 1.2268, + "step": 92217 + }, + { + "epoch": 1.11, + "grad_norm": 7.873569505485272, + "learning_rate": 8.725489126068594e-06, + "loss": 0.8871, + "step": 92220 + }, + { + "epoch": 1.11, + "grad_norm": 9.276425751112477, + "learning_rate": 8.724909714260942e-06, + "loss": 1.2333, + "step": 92223 + }, + { + "epoch": 1.11, + "grad_norm": 7.540827233905148, + "learning_rate": 8.724330306804711e-06, + "loss": 1.0837, + "step": 92226 + }, + { + "epoch": 1.11, + "grad_norm": 15.089304422724515, + "learning_rate": 8.723750903701883e-06, + "loss": 1.1486, + "step": 92229 + }, + { + "epoch": 1.11, + "grad_norm": 23.263618363107817, + "learning_rate": 8.723171504954433e-06, + "loss": 1.2966, + "step": 92232 + }, + { + "epoch": 1.11, + "grad_norm": 8.829402517388392, + "learning_rate": 8.722592110564342e-06, + "loss": 1.0156, + "step": 92235 + }, + { + "epoch": 1.11, + "grad_norm": 8.076641530723972, + "learning_rate": 8.722012720533581e-06, + "loss": 1.3449, + "step": 92238 + }, + { + "epoch": 1.11, + "grad_norm": 9.124209451080391, + "learning_rate": 8.72143333486413e-06, + "loss": 1.5137, + "step": 92241 + }, + { + "epoch": 1.11, + "grad_norm": 11.831065300891403, + "learning_rate": 8.720853953557963e-06, + "loss": 1.3465, + "step": 92244 + }, + { + "epoch": 1.11, + "grad_norm": 5.639156094471902, + "learning_rate": 8.720274576617066e-06, + "loss": 1.1786, + "step": 92247 + }, + { + "epoch": 1.11, + "grad_norm": 14.383442770859151, + "learning_rate": 8.719695204043412e-06, + "loss": 1.4273, + "step": 92250 + }, + { + "epoch": 1.11, + "grad_norm": 15.249904215930112, + "learning_rate": 8.719115835838973e-06, + "loss": 1.2433, + "step": 92253 + }, + { + "epoch": 1.11, + "grad_norm": 6.275388031214312, + "learning_rate": 8.718536472005732e-06, + "loss": 1.2227, + "step": 92256 + }, + { + "epoch": 1.11, + "grad_norm": 14.641524088574533, + "learning_rate": 8.717957112545663e-06, + "loss": 1.2601, + "step": 92259 + }, + { + "epoch": 1.11, + "grad_norm": 12.313149046516063, + "learning_rate": 8.717377757460749e-06, + "loss": 1.1365, + "step": 92262 + }, + { + "epoch": 1.11, + "grad_norm": 8.659270835214627, + "learning_rate": 8.716798406752957e-06, + "loss": 1.1956, + "step": 92265 + }, + { + "epoch": 1.11, + "grad_norm": 18.86145649916493, + "learning_rate": 8.716219060424271e-06, + "loss": 1.1375, + "step": 92268 + }, + { + "epoch": 1.11, + "grad_norm": 5.525416628881388, + "learning_rate": 8.71563971847667e-06, + "loss": 0.9091, + "step": 92271 + }, + { + "epoch": 1.11, + "grad_norm": 15.648678098311501, + "learning_rate": 8.715060380912125e-06, + "loss": 1.3571, + "step": 92274 + }, + { + "epoch": 1.11, + "grad_norm": 20.73952192997933, + "learning_rate": 8.71448104773262e-06, + "loss": 1.0326, + "step": 92277 + }, + { + "epoch": 1.11, + "grad_norm": 18.342532022713623, + "learning_rate": 8.713901718940123e-06, + "loss": 1.2072, + "step": 92280 + }, + { + "epoch": 1.11, + "grad_norm": 11.776709530575724, + "learning_rate": 8.713322394536619e-06, + "loss": 0.8758, + "step": 92283 + }, + { + "epoch": 1.11, + "grad_norm": 16.24293352033776, + "learning_rate": 8.712743074524081e-06, + "loss": 1.2968, + "step": 92286 + }, + { + "epoch": 1.11, + "grad_norm": 9.187749106264587, + "learning_rate": 8.71216375890449e-06, + "loss": 1.1564, + "step": 92289 + }, + { + "epoch": 1.11, + "grad_norm": 9.516475625994213, + "learning_rate": 8.711584447679819e-06, + "loss": 1.1942, + "step": 92292 + }, + { + "epoch": 1.11, + "grad_norm": 18.737416246323807, + "learning_rate": 8.711005140852045e-06, + "loss": 1.4591, + "step": 92295 + }, + { + "epoch": 1.11, + "grad_norm": 8.285639574902923, + "learning_rate": 8.710425838423146e-06, + "loss": 1.3155, + "step": 92298 + }, + { + "epoch": 1.11, + "grad_norm": 11.247806731746362, + "learning_rate": 8.7098465403951e-06, + "loss": 1.165, + "step": 92301 + }, + { + "epoch": 1.11, + "grad_norm": 29.592108081292977, + "learning_rate": 8.709267246769886e-06, + "loss": 0.7904, + "step": 92304 + }, + { + "epoch": 1.11, + "grad_norm": 10.824112858571592, + "learning_rate": 8.708687957549472e-06, + "loss": 1.0224, + "step": 92307 + }, + { + "epoch": 1.11, + "grad_norm": 10.65335943818443, + "learning_rate": 8.708108672735843e-06, + "loss": 1.0968, + "step": 92310 + }, + { + "epoch": 1.11, + "grad_norm": 19.520917220727053, + "learning_rate": 8.707529392330974e-06, + "loss": 1.4714, + "step": 92313 + }, + { + "epoch": 1.11, + "grad_norm": 6.273299518397336, + "learning_rate": 8.706950116336846e-06, + "loss": 1.1195, + "step": 92316 + }, + { + "epoch": 1.11, + "grad_norm": 7.515099297704466, + "learning_rate": 8.706370844755425e-06, + "loss": 1.0475, + "step": 92319 + }, + { + "epoch": 1.11, + "grad_norm": 4.91357862492657, + "learning_rate": 8.705791577588696e-06, + "loss": 1.2751, + "step": 92322 + }, + { + "epoch": 1.11, + "grad_norm": 4.235184110979162, + "learning_rate": 8.705212314838635e-06, + "loss": 1.2631, + "step": 92325 + }, + { + "epoch": 1.11, + "grad_norm": 4.9026829903929725, + "learning_rate": 8.704633056507216e-06, + "loss": 1.0365, + "step": 92328 + }, + { + "epoch": 1.11, + "grad_norm": 5.661453512222085, + "learning_rate": 8.704053802596423e-06, + "loss": 1.0459, + "step": 92331 + }, + { + "epoch": 1.11, + "grad_norm": 27.242136135115878, + "learning_rate": 8.703474553108226e-06, + "loss": 1.7274, + "step": 92334 + }, + { + "epoch": 1.11, + "grad_norm": 14.024249645860372, + "learning_rate": 8.7028953080446e-06, + "loss": 0.7676, + "step": 92337 + }, + { + "epoch": 1.11, + "grad_norm": 7.242587923486996, + "learning_rate": 8.702316067407526e-06, + "loss": 1.0283, + "step": 92340 + }, + { + "epoch": 1.11, + "grad_norm": 5.609966936939655, + "learning_rate": 8.701736831198983e-06, + "loss": 1.5561, + "step": 92343 + }, + { + "epoch": 1.11, + "grad_norm": 20.70636563053719, + "learning_rate": 8.701157599420942e-06, + "loss": 1.2032, + "step": 92346 + }, + { + "epoch": 1.11, + "grad_norm": 13.991227659341256, + "learning_rate": 8.700578372075383e-06, + "loss": 1.5341, + "step": 92349 + }, + { + "epoch": 1.11, + "grad_norm": 16.97353106483495, + "learning_rate": 8.69999914916428e-06, + "loss": 1.3998, + "step": 92352 + }, + { + "epoch": 1.11, + "grad_norm": 7.1848494291303036, + "learning_rate": 8.699419930689613e-06, + "loss": 0.9466, + "step": 92355 + }, + { + "epoch": 1.11, + "grad_norm": 5.51013313164598, + "learning_rate": 8.69884071665336e-06, + "loss": 1.254, + "step": 92358 + }, + { + "epoch": 1.11, + "grad_norm": 4.427318254843078, + "learning_rate": 8.698261507057491e-06, + "loss": 1.0828, + "step": 92361 + }, + { + "epoch": 1.11, + "grad_norm": 11.878783239710225, + "learning_rate": 8.697682301903986e-06, + "loss": 1.1531, + "step": 92364 + }, + { + "epoch": 1.11, + "grad_norm": 9.600554670928574, + "learning_rate": 8.697103101194826e-06, + "loss": 1.1354, + "step": 92367 + }, + { + "epoch": 1.11, + "grad_norm": 3.669206089038608, + "learning_rate": 8.696523904931984e-06, + "loss": 1.2106, + "step": 92370 + }, + { + "epoch": 1.11, + "grad_norm": 9.47313563975922, + "learning_rate": 8.695944713117434e-06, + "loss": 0.8364, + "step": 92373 + }, + { + "epoch": 1.11, + "grad_norm": 2.6872058113396826, + "learning_rate": 8.695365525753156e-06, + "loss": 1.1273, + "step": 92376 + }, + { + "epoch": 1.11, + "grad_norm": 10.95971223858525, + "learning_rate": 8.694786342841122e-06, + "loss": 1.3253, + "step": 92379 + }, + { + "epoch": 1.11, + "grad_norm": 19.039315038493246, + "learning_rate": 8.694207164383314e-06, + "loss": 1.1533, + "step": 92382 + }, + { + "epoch": 1.11, + "grad_norm": 7.473970584105371, + "learning_rate": 8.69362799038171e-06, + "loss": 1.3223, + "step": 92385 + }, + { + "epoch": 1.11, + "grad_norm": 3.7262327701081652, + "learning_rate": 8.693048820838283e-06, + "loss": 1.0825, + "step": 92388 + }, + { + "epoch": 1.11, + "grad_norm": 7.095686232099183, + "learning_rate": 8.692469655755005e-06, + "loss": 0.9005, + "step": 92391 + }, + { + "epoch": 1.11, + "grad_norm": 16.296091349786735, + "learning_rate": 8.691890495133858e-06, + "loss": 1.3288, + "step": 92394 + }, + { + "epoch": 1.11, + "grad_norm": 18.70559194340835, + "learning_rate": 8.691311338976822e-06, + "loss": 1.0761, + "step": 92397 + }, + { + "epoch": 1.11, + "grad_norm": 18.89759035048204, + "learning_rate": 8.690732187285866e-06, + "loss": 1.2081, + "step": 92400 + }, + { + "epoch": 1.11, + "grad_norm": 19.437140065441866, + "learning_rate": 8.690153040062968e-06, + "loss": 1.3644, + "step": 92403 + }, + { + "epoch": 1.11, + "grad_norm": 4.262783045393909, + "learning_rate": 8.689573897310106e-06, + "loss": 0.877, + "step": 92406 + }, + { + "epoch": 1.11, + "grad_norm": 47.70449757776182, + "learning_rate": 8.68899475902926e-06, + "loss": 1.2434, + "step": 92409 + }, + { + "epoch": 1.11, + "grad_norm": 23.63689177540174, + "learning_rate": 8.688415625222402e-06, + "loss": 1.475, + "step": 92412 + }, + { + "epoch": 1.11, + "grad_norm": 5.926485096779174, + "learning_rate": 8.687836495891506e-06, + "loss": 1.1686, + "step": 92415 + }, + { + "epoch": 1.11, + "grad_norm": 21.08283179513609, + "learning_rate": 8.687257371038554e-06, + "loss": 1.1736, + "step": 92418 + }, + { + "epoch": 1.11, + "grad_norm": 9.049222000097103, + "learning_rate": 8.686678250665515e-06, + "loss": 1.2611, + "step": 92421 + }, + { + "epoch": 1.11, + "grad_norm": 6.186679906876765, + "learning_rate": 8.686099134774377e-06, + "loss": 1.4303, + "step": 92424 + }, + { + "epoch": 1.11, + "grad_norm": 5.412748566335621, + "learning_rate": 8.685520023367105e-06, + "loss": 1.1704, + "step": 92427 + }, + { + "epoch": 1.11, + "grad_norm": 19.502798427211495, + "learning_rate": 8.684940916445681e-06, + "loss": 1.0846, + "step": 92430 + }, + { + "epoch": 1.11, + "grad_norm": 6.4261679077021325, + "learning_rate": 8.684361814012077e-06, + "loss": 0.8597, + "step": 92433 + }, + { + "epoch": 1.11, + "grad_norm": 4.916927805045132, + "learning_rate": 8.683782716068274e-06, + "loss": 1.1474, + "step": 92436 + }, + { + "epoch": 1.11, + "grad_norm": 11.458658190624968, + "learning_rate": 8.68320362261625e-06, + "loss": 1.2313, + "step": 92439 + }, + { + "epoch": 1.11, + "grad_norm": 9.38159735584321, + "learning_rate": 8.682624533657977e-06, + "loss": 1.0417, + "step": 92442 + }, + { + "epoch": 1.11, + "grad_norm": 20.86286744902771, + "learning_rate": 8.682045449195428e-06, + "loss": 1.1641, + "step": 92445 + }, + { + "epoch": 1.11, + "grad_norm": 3.6857824924223306, + "learning_rate": 8.681466369230583e-06, + "loss": 1.0564, + "step": 92448 + }, + { + "epoch": 1.11, + "grad_norm": 8.54530227666256, + "learning_rate": 8.680887293765424e-06, + "loss": 1.3236, + "step": 92451 + }, + { + "epoch": 1.11, + "grad_norm": 12.187765902742507, + "learning_rate": 8.68030822280192e-06, + "loss": 1.1019, + "step": 92454 + }, + { + "epoch": 1.11, + "grad_norm": 26.93039922044298, + "learning_rate": 8.679729156342043e-06, + "loss": 1.1799, + "step": 92457 + }, + { + "epoch": 1.11, + "grad_norm": 9.801781837809793, + "learning_rate": 8.679150094387778e-06, + "loss": 1.3837, + "step": 92460 + }, + { + "epoch": 1.11, + "grad_norm": 9.115277768676998, + "learning_rate": 8.678571036941098e-06, + "loss": 1.1374, + "step": 92463 + }, + { + "epoch": 1.11, + "grad_norm": 35.513264157076264, + "learning_rate": 8.677991984003983e-06, + "loss": 0.998, + "step": 92466 + }, + { + "epoch": 1.11, + "grad_norm": 16.795738030502086, + "learning_rate": 8.677412935578399e-06, + "loss": 1.3294, + "step": 92469 + }, + { + "epoch": 1.11, + "grad_norm": 7.994474047255095, + "learning_rate": 8.676833891666332e-06, + "loss": 1.2098, + "step": 92472 + }, + { + "epoch": 1.11, + "grad_norm": 24.96116150160192, + "learning_rate": 8.676254852269752e-06, + "loss": 1.186, + "step": 92475 + }, + { + "epoch": 1.11, + "grad_norm": 2.3423561734070844, + "learning_rate": 8.675675817390642e-06, + "loss": 1.1108, + "step": 92478 + }, + { + "epoch": 1.11, + "grad_norm": 46.10655629301725, + "learning_rate": 8.675096787030968e-06, + "loss": 0.9808, + "step": 92481 + }, + { + "epoch": 1.11, + "grad_norm": 2.864121525933077, + "learning_rate": 8.674517761192715e-06, + "loss": 1.0937, + "step": 92484 + }, + { + "epoch": 1.11, + "grad_norm": 24.32792474036833, + "learning_rate": 8.673938739877852e-06, + "loss": 1.3278, + "step": 92487 + }, + { + "epoch": 1.11, + "grad_norm": 6.307283555207742, + "learning_rate": 8.67335972308836e-06, + "loss": 1.1783, + "step": 92490 + }, + { + "epoch": 1.11, + "grad_norm": 12.516687551924802, + "learning_rate": 8.672780710826217e-06, + "loss": 1.2418, + "step": 92493 + }, + { + "epoch": 1.11, + "grad_norm": 9.748379417806058, + "learning_rate": 8.672201703093392e-06, + "loss": 1.4719, + "step": 92496 + }, + { + "epoch": 1.11, + "grad_norm": 14.74153353752134, + "learning_rate": 8.671622699891863e-06, + "loss": 1.4209, + "step": 92499 + }, + { + "epoch": 1.11, + "grad_norm": 10.367182904693587, + "learning_rate": 8.671043701223608e-06, + "loss": 1.245, + "step": 92502 + }, + { + "epoch": 1.11, + "grad_norm": 4.963749127090099, + "learning_rate": 8.670464707090605e-06, + "loss": 1.3043, + "step": 92505 + }, + { + "epoch": 1.11, + "grad_norm": 9.507365871778358, + "learning_rate": 8.669885717494825e-06, + "loss": 1.1449, + "step": 92508 + }, + { + "epoch": 1.11, + "grad_norm": 18.160493584661637, + "learning_rate": 8.669306732438246e-06, + "loss": 1.3807, + "step": 92511 + }, + { + "epoch": 1.11, + "grad_norm": 16.202762809102513, + "learning_rate": 8.668727751922842e-06, + "loss": 1.0547, + "step": 92514 + }, + { + "epoch": 1.11, + "grad_norm": 19.34518349779038, + "learning_rate": 8.668148775950592e-06, + "loss": 0.7819, + "step": 92517 + }, + { + "epoch": 1.11, + "grad_norm": 11.659650177494985, + "learning_rate": 8.667569804523474e-06, + "loss": 0.9252, + "step": 92520 + }, + { + "epoch": 1.11, + "grad_norm": 11.302319165799709, + "learning_rate": 8.666990837643456e-06, + "loss": 1.0007, + "step": 92523 + }, + { + "epoch": 1.11, + "grad_norm": 30.581873313761154, + "learning_rate": 8.666411875312518e-06, + "loss": 0.9016, + "step": 92526 + }, + { + "epoch": 1.11, + "grad_norm": 24.972369596452744, + "learning_rate": 8.665832917532638e-06, + "loss": 0.9665, + "step": 92529 + }, + { + "epoch": 1.11, + "grad_norm": 2.795791730902685, + "learning_rate": 8.665253964305792e-06, + "loss": 1.3756, + "step": 92532 + }, + { + "epoch": 1.11, + "grad_norm": 16.055634059606472, + "learning_rate": 8.664675015633948e-06, + "loss": 1.399, + "step": 92535 + }, + { + "epoch": 1.11, + "grad_norm": 6.211704328005464, + "learning_rate": 8.664096071519091e-06, + "loss": 1.0744, + "step": 92538 + }, + { + "epoch": 1.11, + "grad_norm": 3.7554829684723954, + "learning_rate": 8.663517131963189e-06, + "loss": 1.4419, + "step": 92541 + }, + { + "epoch": 1.11, + "grad_norm": 12.058174403668518, + "learning_rate": 8.662938196968222e-06, + "loss": 1.1396, + "step": 92544 + }, + { + "epoch": 1.11, + "grad_norm": 9.186694349830333, + "learning_rate": 8.66235926653617e-06, + "loss": 1.3228, + "step": 92547 + }, + { + "epoch": 1.11, + "grad_norm": 4.125631352472471, + "learning_rate": 8.661780340669004e-06, + "loss": 1.3209, + "step": 92550 + }, + { + "epoch": 1.11, + "grad_norm": 11.158740483347467, + "learning_rate": 8.661201419368694e-06, + "loss": 1.1058, + "step": 92553 + }, + { + "epoch": 1.11, + "grad_norm": 3.308877212293965, + "learning_rate": 8.660622502637227e-06, + "loss": 1.2451, + "step": 92556 + }, + { + "epoch": 1.11, + "grad_norm": 10.561518263692982, + "learning_rate": 8.66004359047657e-06, + "loss": 1.1478, + "step": 92559 + }, + { + "epoch": 1.11, + "grad_norm": 10.359188266084868, + "learning_rate": 8.659464682888703e-06, + "loss": 1.0439, + "step": 92562 + }, + { + "epoch": 1.11, + "grad_norm": 8.836629233691761, + "learning_rate": 8.658885779875598e-06, + "loss": 1.0839, + "step": 92565 + }, + { + "epoch": 1.11, + "grad_norm": 8.113637508764779, + "learning_rate": 8.658306881439234e-06, + "loss": 1.3158, + "step": 92568 + }, + { + "epoch": 1.11, + "grad_norm": 12.389597949130325, + "learning_rate": 8.657727987581584e-06, + "loss": 0.8867, + "step": 92571 + }, + { + "epoch": 1.11, + "grad_norm": 10.878195821108344, + "learning_rate": 8.657149098304629e-06, + "loss": 1.0477, + "step": 92574 + }, + { + "epoch": 1.11, + "grad_norm": 11.981161068562459, + "learning_rate": 8.656570213610336e-06, + "loss": 1.4084, + "step": 92577 + }, + { + "epoch": 1.11, + "grad_norm": 17.456544369574875, + "learning_rate": 8.655991333500687e-06, + "loss": 1.5059, + "step": 92580 + }, + { + "epoch": 1.11, + "grad_norm": 3.315197842076012, + "learning_rate": 8.655412457977653e-06, + "loss": 1.2842, + "step": 92583 + }, + { + "epoch": 1.11, + "grad_norm": 9.029297621803016, + "learning_rate": 8.654833587043217e-06, + "loss": 1.2013, + "step": 92586 + }, + { + "epoch": 1.11, + "grad_norm": 7.645163389183129, + "learning_rate": 8.654254720699346e-06, + "loss": 1.2566, + "step": 92589 + }, + { + "epoch": 1.11, + "grad_norm": 9.916335158552748, + "learning_rate": 8.653675858948019e-06, + "loss": 1.0785, + "step": 92592 + }, + { + "epoch": 1.11, + "grad_norm": 8.158094332373823, + "learning_rate": 8.65309700179121e-06, + "loss": 1.3232, + "step": 92595 + }, + { + "epoch": 1.11, + "grad_norm": 13.45874858138619, + "learning_rate": 8.652518149230898e-06, + "loss": 1.2644, + "step": 92598 + }, + { + "epoch": 1.11, + "grad_norm": 4.470835343909254, + "learning_rate": 8.651939301269056e-06, + "loss": 1.3869, + "step": 92601 + }, + { + "epoch": 1.11, + "grad_norm": 4.697243693028476, + "learning_rate": 8.65136045790766e-06, + "loss": 1.0357, + "step": 92604 + }, + { + "epoch": 1.11, + "grad_norm": 24.264250121232852, + "learning_rate": 8.650781619148684e-06, + "loss": 1.3659, + "step": 92607 + }, + { + "epoch": 1.11, + "grad_norm": 14.458014279673348, + "learning_rate": 8.650202784994106e-06, + "loss": 1.1623, + "step": 92610 + }, + { + "epoch": 1.11, + "grad_norm": 18.599918237671876, + "learning_rate": 8.649623955445901e-06, + "loss": 1.4847, + "step": 92613 + }, + { + "epoch": 1.11, + "grad_norm": 5.828148324039103, + "learning_rate": 8.649045130506037e-06, + "loss": 1.1018, + "step": 92616 + }, + { + "epoch": 1.11, + "grad_norm": 5.556759655294418, + "learning_rate": 8.648466310176498e-06, + "loss": 1.2726, + "step": 92619 + }, + { + "epoch": 1.11, + "grad_norm": 3.1078892035667627, + "learning_rate": 8.647887494459259e-06, + "loss": 0.9116, + "step": 92622 + }, + { + "epoch": 1.11, + "grad_norm": 29.6498889399761, + "learning_rate": 8.64730868335629e-06, + "loss": 1.4421, + "step": 92625 + }, + { + "epoch": 1.11, + "grad_norm": 33.466284915590094, + "learning_rate": 8.646729876869573e-06, + "loss": 1.2599, + "step": 92628 + }, + { + "epoch": 1.11, + "grad_norm": 11.50530783510583, + "learning_rate": 8.646151075001074e-06, + "loss": 1.1881, + "step": 92631 + }, + { + "epoch": 1.11, + "grad_norm": 10.671961371451207, + "learning_rate": 8.645572277752778e-06, + "loss": 1.3984, + "step": 92634 + }, + { + "epoch": 1.11, + "grad_norm": 5.473608348702408, + "learning_rate": 8.644993485126654e-06, + "loss": 1.1544, + "step": 92637 + }, + { + "epoch": 1.11, + "grad_norm": 10.578534196445911, + "learning_rate": 8.644414697124682e-06, + "loss": 1.2001, + "step": 92640 + }, + { + "epoch": 1.11, + "grad_norm": 7.819781484455753, + "learning_rate": 8.64383591374883e-06, + "loss": 1.3278, + "step": 92643 + }, + { + "epoch": 1.11, + "grad_norm": 13.603683845837528, + "learning_rate": 8.643257135001082e-06, + "loss": 1.1397, + "step": 92646 + }, + { + "epoch": 1.11, + "grad_norm": 5.540012899474239, + "learning_rate": 8.642678360883404e-06, + "loss": 1.0414, + "step": 92649 + }, + { + "epoch": 1.11, + "grad_norm": 3.0051468449193717, + "learning_rate": 8.642099591397779e-06, + "loss": 1.315, + "step": 92652 + }, + { + "epoch": 1.11, + "grad_norm": 5.684515207505569, + "learning_rate": 8.641520826546181e-06, + "loss": 1.2962, + "step": 92655 + }, + { + "epoch": 1.11, + "grad_norm": 5.695740016835192, + "learning_rate": 8.640942066330578e-06, + "loss": 0.9509, + "step": 92658 + }, + { + "epoch": 1.11, + "grad_norm": 17.17738801404256, + "learning_rate": 8.640363310752953e-06, + "loss": 1.5048, + "step": 92661 + }, + { + "epoch": 1.11, + "grad_norm": 10.292202236102144, + "learning_rate": 8.639784559815277e-06, + "loss": 1.2955, + "step": 92664 + }, + { + "epoch": 1.11, + "grad_norm": 7.790317554327582, + "learning_rate": 8.639205813519532e-06, + "loss": 1.1232, + "step": 92667 + }, + { + "epoch": 1.11, + "grad_norm": 10.76055747291327, + "learning_rate": 8.63862707186768e-06, + "loss": 1.0999, + "step": 92670 + }, + { + "epoch": 1.11, + "grad_norm": 22.625799719379806, + "learning_rate": 8.638048334861705e-06, + "loss": 1.3593, + "step": 92673 + }, + { + "epoch": 1.11, + "grad_norm": 68.89938284477704, + "learning_rate": 8.63746960250358e-06, + "loss": 0.9482, + "step": 92676 + }, + { + "epoch": 1.11, + "grad_norm": 23.549027048007478, + "learning_rate": 8.636890874795282e-06, + "loss": 1.1638, + "step": 92679 + }, + { + "epoch": 1.11, + "grad_norm": 19.163852001771833, + "learning_rate": 8.636312151738787e-06, + "loss": 1.1665, + "step": 92682 + }, + { + "epoch": 1.11, + "grad_norm": 12.501171909768905, + "learning_rate": 8.635733433336064e-06, + "loss": 0.9565, + "step": 92685 + }, + { + "epoch": 1.11, + "grad_norm": 54.759803155933405, + "learning_rate": 8.635154719589092e-06, + "loss": 1.4399, + "step": 92688 + }, + { + "epoch": 1.11, + "grad_norm": 2.7695616870453352, + "learning_rate": 8.634576010499844e-06, + "loss": 1.0074, + "step": 92691 + }, + { + "epoch": 1.11, + "grad_norm": 7.075389745148031, + "learning_rate": 8.633997306070301e-06, + "loss": 1.0862, + "step": 92694 + }, + { + "epoch": 1.11, + "grad_norm": 2.460452884388993, + "learning_rate": 8.633418606302428e-06, + "loss": 1.2408, + "step": 92697 + }, + { + "epoch": 1.11, + "grad_norm": 52.938939429122954, + "learning_rate": 8.632839911198208e-06, + "loss": 1.1568, + "step": 92700 + }, + { + "epoch": 1.11, + "grad_norm": 12.33040559029796, + "learning_rate": 8.63226122075961e-06, + "loss": 1.0078, + "step": 92703 + }, + { + "epoch": 1.11, + "grad_norm": 15.13246788944391, + "learning_rate": 8.631682534988615e-06, + "loss": 1.1084, + "step": 92706 + }, + { + "epoch": 1.11, + "grad_norm": 9.921945225032495, + "learning_rate": 8.631103853887196e-06, + "loss": 1.4014, + "step": 92709 + }, + { + "epoch": 1.11, + "grad_norm": 3.554555549299684, + "learning_rate": 8.630525177457322e-06, + "loss": 1.2609, + "step": 92712 + }, + { + "epoch": 1.11, + "grad_norm": 9.690965866802042, + "learning_rate": 8.629946505700973e-06, + "loss": 1.3439, + "step": 92715 + }, + { + "epoch": 1.11, + "grad_norm": 17.99128424888057, + "learning_rate": 8.629367838620125e-06, + "loss": 1.3199, + "step": 92718 + }, + { + "epoch": 1.11, + "grad_norm": 20.210268949597534, + "learning_rate": 8.628789176216754e-06, + "loss": 1.434, + "step": 92721 + }, + { + "epoch": 1.11, + "grad_norm": 12.756392291434715, + "learning_rate": 8.628210518492825e-06, + "loss": 1.03, + "step": 92724 + }, + { + "epoch": 1.12, + "grad_norm": 5.168492131749504, + "learning_rate": 8.627631865450322e-06, + "loss": 1.1702, + "step": 92727 + }, + { + "epoch": 1.12, + "grad_norm": 24.829317325193646, + "learning_rate": 8.627053217091216e-06, + "loss": 1.3377, + "step": 92730 + }, + { + "epoch": 1.12, + "grad_norm": 10.56022999531873, + "learning_rate": 8.626474573417483e-06, + "loss": 1.2018, + "step": 92733 + }, + { + "epoch": 1.12, + "grad_norm": 23.85092411179477, + "learning_rate": 8.625895934431102e-06, + "loss": 1.2087, + "step": 92736 + }, + { + "epoch": 1.12, + "grad_norm": 8.211143466031169, + "learning_rate": 8.625317300134038e-06, + "loss": 1.5945, + "step": 92739 + }, + { + "epoch": 1.12, + "grad_norm": 6.852033635643961, + "learning_rate": 8.624738670528274e-06, + "loss": 1.4462, + "step": 92742 + }, + { + "epoch": 1.12, + "grad_norm": 2.68617375846317, + "learning_rate": 8.624160045615779e-06, + "loss": 1.1496, + "step": 92745 + }, + { + "epoch": 1.12, + "grad_norm": 16.116434869967417, + "learning_rate": 8.623581425398535e-06, + "loss": 1.3914, + "step": 92748 + }, + { + "epoch": 1.12, + "grad_norm": 11.470103043576641, + "learning_rate": 8.623002809878508e-06, + "loss": 1.4136, + "step": 92751 + }, + { + "epoch": 1.12, + "grad_norm": 81.49296935384679, + "learning_rate": 8.622424199057675e-06, + "loss": 1.3335, + "step": 92754 + }, + { + "epoch": 1.12, + "grad_norm": 9.662300144802733, + "learning_rate": 8.621845592938012e-06, + "loss": 1.6181, + "step": 92757 + }, + { + "epoch": 1.12, + "grad_norm": 7.845068088959358, + "learning_rate": 8.621266991521496e-06, + "loss": 1.5076, + "step": 92760 + }, + { + "epoch": 1.12, + "grad_norm": 21.692213441512457, + "learning_rate": 8.620688394810101e-06, + "loss": 1.3024, + "step": 92763 + }, + { + "epoch": 1.12, + "grad_norm": 7.123961483133522, + "learning_rate": 8.620109802805796e-06, + "loss": 1.113, + "step": 92766 + }, + { + "epoch": 1.12, + "grad_norm": 17.09284811886956, + "learning_rate": 8.61953121551056e-06, + "loss": 1.1101, + "step": 92769 + }, + { + "epoch": 1.12, + "grad_norm": 72.89525080872913, + "learning_rate": 8.618952632926368e-06, + "loss": 0.9671, + "step": 92772 + }, + { + "epoch": 1.12, + "grad_norm": 7.295235207563473, + "learning_rate": 8.618374055055194e-06, + "loss": 0.8443, + "step": 92775 + }, + { + "epoch": 1.12, + "grad_norm": 10.53217378703924, + "learning_rate": 8.61779548189901e-06, + "loss": 0.8869, + "step": 92778 + }, + { + "epoch": 1.12, + "grad_norm": 7.933533008227519, + "learning_rate": 8.61721691345979e-06, + "loss": 1.2112, + "step": 92781 + }, + { + "epoch": 1.12, + "grad_norm": 12.849948816083394, + "learning_rate": 8.616638349739513e-06, + "loss": 1.1885, + "step": 92784 + }, + { + "epoch": 1.12, + "grad_norm": 25.291646184762655, + "learning_rate": 8.616059790740149e-06, + "loss": 1.2883, + "step": 92787 + }, + { + "epoch": 1.12, + "grad_norm": 15.98734304501569, + "learning_rate": 8.61548123646368e-06, + "loss": 1.1424, + "step": 92790 + }, + { + "epoch": 1.12, + "grad_norm": 25.408248833671383, + "learning_rate": 8.614902686912072e-06, + "loss": 1.3958, + "step": 92793 + }, + { + "epoch": 1.12, + "grad_norm": 10.306685446975553, + "learning_rate": 8.614324142087297e-06, + "loss": 1.4488, + "step": 92796 + }, + { + "epoch": 1.12, + "grad_norm": 19.449055394070786, + "learning_rate": 8.613745601991338e-06, + "loss": 1.114, + "step": 92799 + }, + { + "epoch": 1.12, + "grad_norm": 10.965839640495787, + "learning_rate": 8.61316706662617e-06, + "loss": 0.9976, + "step": 92802 + }, + { + "epoch": 1.12, + "grad_norm": 8.36636128115086, + "learning_rate": 8.612588535993762e-06, + "loss": 1.2828, + "step": 92805 + }, + { + "epoch": 1.12, + "grad_norm": 5.177517682699399, + "learning_rate": 8.612010010096086e-06, + "loss": 1.336, + "step": 92808 + }, + { + "epoch": 1.12, + "grad_norm": 16.746897810192017, + "learning_rate": 8.611431488935119e-06, + "loss": 0.831, + "step": 92811 + }, + { + "epoch": 1.12, + "grad_norm": 12.269562378166357, + "learning_rate": 8.61085297251284e-06, + "loss": 1.1809, + "step": 92814 + }, + { + "epoch": 1.12, + "grad_norm": 14.803471465832525, + "learning_rate": 8.610274460831221e-06, + "loss": 1.3679, + "step": 92817 + }, + { + "epoch": 1.12, + "grad_norm": 16.06696679218614, + "learning_rate": 8.60969595389223e-06, + "loss": 1.2125, + "step": 92820 + }, + { + "epoch": 1.12, + "grad_norm": 8.314163578483198, + "learning_rate": 8.609117451697846e-06, + "loss": 1.7844, + "step": 92823 + }, + { + "epoch": 1.12, + "grad_norm": 10.633458204255655, + "learning_rate": 8.608538954250045e-06, + "loss": 1.0223, + "step": 92826 + }, + { + "epoch": 1.12, + "grad_norm": 8.41133766786086, + "learning_rate": 8.607960461550803e-06, + "loss": 0.9686, + "step": 92829 + }, + { + "epoch": 1.12, + "grad_norm": 9.722834752878944, + "learning_rate": 8.607381973602085e-06, + "loss": 1.448, + "step": 92832 + }, + { + "epoch": 1.12, + "grad_norm": 16.75023688075116, + "learning_rate": 8.606803490405873e-06, + "loss": 1.3385, + "step": 92835 + }, + { + "epoch": 1.12, + "grad_norm": 8.47864425885319, + "learning_rate": 8.606225011964136e-06, + "loss": 1.1434, + "step": 92838 + }, + { + "epoch": 1.12, + "grad_norm": 16.462494504106235, + "learning_rate": 8.605646538278852e-06, + "loss": 1.1001, + "step": 92841 + }, + { + "epoch": 1.12, + "grad_norm": 25.20769645569098, + "learning_rate": 8.605068069351999e-06, + "loss": 1.4228, + "step": 92844 + }, + { + "epoch": 1.12, + "grad_norm": 10.67341807031223, + "learning_rate": 8.604489605185542e-06, + "loss": 1.3932, + "step": 92847 + }, + { + "epoch": 1.12, + "grad_norm": 8.088472541199227, + "learning_rate": 8.603911145781459e-06, + "loss": 1.1393, + "step": 92850 + }, + { + "epoch": 1.12, + "grad_norm": 11.88476889849324, + "learning_rate": 8.603332691141724e-06, + "loss": 1.0297, + "step": 92853 + }, + { + "epoch": 1.12, + "grad_norm": 12.891119947076954, + "learning_rate": 8.602754241268317e-06, + "loss": 0.9881, + "step": 92856 + }, + { + "epoch": 1.12, + "grad_norm": 28.11875915424003, + "learning_rate": 8.602175796163203e-06, + "loss": 1.1478, + "step": 92859 + }, + { + "epoch": 1.12, + "grad_norm": 9.843184634781633, + "learning_rate": 8.601597355828356e-06, + "loss": 1.5522, + "step": 92862 + }, + { + "epoch": 1.12, + "grad_norm": 10.569932208290929, + "learning_rate": 8.601018920265756e-06, + "loss": 1.1241, + "step": 92865 + }, + { + "epoch": 1.12, + "grad_norm": 23.30372522895102, + "learning_rate": 8.600440489477375e-06, + "loss": 1.2732, + "step": 92868 + }, + { + "epoch": 1.12, + "grad_norm": 10.225516777495912, + "learning_rate": 8.59986206346519e-06, + "loss": 1.4696, + "step": 92871 + }, + { + "epoch": 1.12, + "grad_norm": 17.077407432712445, + "learning_rate": 8.599283642231166e-06, + "loss": 1.1117, + "step": 92874 + }, + { + "epoch": 1.12, + "grad_norm": 5.053291189350817, + "learning_rate": 8.598705225777282e-06, + "loss": 1.2378, + "step": 92877 + }, + { + "epoch": 1.12, + "grad_norm": 12.856034137588104, + "learning_rate": 8.598126814105515e-06, + "loss": 1.3891, + "step": 92880 + }, + { + "epoch": 1.12, + "grad_norm": 6.832727002852374, + "learning_rate": 8.59754840721784e-06, + "loss": 1.108, + "step": 92883 + }, + { + "epoch": 1.12, + "grad_norm": 20.548243025364656, + "learning_rate": 8.59697000511622e-06, + "loss": 1.1774, + "step": 92886 + }, + { + "epoch": 1.12, + "grad_norm": 7.678499215432755, + "learning_rate": 8.59639160780264e-06, + "loss": 1.2728, + "step": 92889 + }, + { + "epoch": 1.12, + "grad_norm": 11.851350753968655, + "learning_rate": 8.595813215279067e-06, + "loss": 1.315, + "step": 92892 + }, + { + "epoch": 1.12, + "grad_norm": 26.33084045100471, + "learning_rate": 8.595234827547478e-06, + "loss": 1.3508, + "step": 92895 + }, + { + "epoch": 1.12, + "grad_norm": 119.806080045959, + "learning_rate": 8.59465644460985e-06, + "loss": 1.2776, + "step": 92898 + }, + { + "epoch": 1.12, + "grad_norm": 24.18897772476286, + "learning_rate": 8.594078066468152e-06, + "loss": 1.3067, + "step": 92901 + }, + { + "epoch": 1.12, + "grad_norm": 4.297668746808721, + "learning_rate": 8.593499693124358e-06, + "loss": 1.0077, + "step": 92904 + }, + { + "epoch": 1.12, + "grad_norm": 5.337658909704279, + "learning_rate": 8.592921324580441e-06, + "loss": 1.5079, + "step": 92907 + }, + { + "epoch": 1.12, + "grad_norm": 3.866780411843506, + "learning_rate": 8.592342960838382e-06, + "loss": 1.0371, + "step": 92910 + }, + { + "epoch": 1.12, + "grad_norm": 20.247934870387855, + "learning_rate": 8.591764601900148e-06, + "loss": 1.266, + "step": 92913 + }, + { + "epoch": 1.12, + "grad_norm": 7.101861108362758, + "learning_rate": 8.591186247767711e-06, + "loss": 1.0244, + "step": 92916 + }, + { + "epoch": 1.12, + "grad_norm": 5.193014493351942, + "learning_rate": 8.590607898443049e-06, + "loss": 1.0244, + "step": 92919 + }, + { + "epoch": 1.12, + "grad_norm": 23.97618834946305, + "learning_rate": 8.590029553928135e-06, + "loss": 1.2803, + "step": 92922 + }, + { + "epoch": 1.12, + "grad_norm": 14.40636735142393, + "learning_rate": 8.589451214224945e-06, + "loss": 1.2567, + "step": 92925 + }, + { + "epoch": 1.12, + "grad_norm": 11.313994413061396, + "learning_rate": 8.588872879335447e-06, + "loss": 1.1337, + "step": 92928 + }, + { + "epoch": 1.12, + "grad_norm": 4.512652482135099, + "learning_rate": 8.588294549261617e-06, + "loss": 0.7455, + "step": 92931 + }, + { + "epoch": 1.12, + "grad_norm": 15.305828848741946, + "learning_rate": 8.58771622400543e-06, + "loss": 1.0377, + "step": 92934 + }, + { + "epoch": 1.12, + "grad_norm": 6.180050496370779, + "learning_rate": 8.58713790356886e-06, + "loss": 1.0201, + "step": 92937 + }, + { + "epoch": 1.12, + "grad_norm": 6.08082551304199, + "learning_rate": 8.586559587953878e-06, + "loss": 1.3321, + "step": 92940 + }, + { + "epoch": 1.12, + "grad_norm": 7.808777236649724, + "learning_rate": 8.58598127716246e-06, + "loss": 1.0122, + "step": 92943 + }, + { + "epoch": 1.12, + "grad_norm": 18.13741954822018, + "learning_rate": 8.585402971196578e-06, + "loss": 1.5176, + "step": 92946 + }, + { + "epoch": 1.12, + "grad_norm": 12.359698437720231, + "learning_rate": 8.584824670058203e-06, + "loss": 0.8673, + "step": 92949 + }, + { + "epoch": 1.12, + "grad_norm": 14.59064567914665, + "learning_rate": 8.584246373749318e-06, + "loss": 1.2866, + "step": 92952 + }, + { + "epoch": 1.12, + "grad_norm": 7.813686536981429, + "learning_rate": 8.583668082271888e-06, + "loss": 1.2726, + "step": 92955 + }, + { + "epoch": 1.12, + "grad_norm": 4.481710121643184, + "learning_rate": 8.583089795627886e-06, + "loss": 1.1432, + "step": 92958 + }, + { + "epoch": 1.12, + "grad_norm": 10.349965278160498, + "learning_rate": 8.582511513819288e-06, + "loss": 1.3893, + "step": 92961 + }, + { + "epoch": 1.12, + "grad_norm": 3.9348336342141383, + "learning_rate": 8.581933236848073e-06, + "loss": 1.2363, + "step": 92964 + }, + { + "epoch": 1.12, + "grad_norm": 3.964438762848782, + "learning_rate": 8.581354964716205e-06, + "loss": 1.5381, + "step": 92967 + }, + { + "epoch": 1.12, + "grad_norm": 9.92593031574584, + "learning_rate": 8.580776697425662e-06, + "loss": 1.199, + "step": 92970 + }, + { + "epoch": 1.12, + "grad_norm": 11.61474910786206, + "learning_rate": 8.580198434978416e-06, + "loss": 1.1941, + "step": 92973 + }, + { + "epoch": 1.12, + "grad_norm": 3.785301276955539, + "learning_rate": 8.57962017737644e-06, + "loss": 1.3271, + "step": 92976 + }, + { + "epoch": 1.12, + "grad_norm": 5.772364997010064, + "learning_rate": 8.579041924621715e-06, + "loss": 1.3931, + "step": 92979 + }, + { + "epoch": 1.12, + "grad_norm": 38.18959248913475, + "learning_rate": 8.578463676716202e-06, + "loss": 1.0345, + "step": 92982 + }, + { + "epoch": 1.12, + "grad_norm": 13.06719407881227, + "learning_rate": 8.577885433661884e-06, + "loss": 1.2813, + "step": 92985 + }, + { + "epoch": 1.12, + "grad_norm": 6.310952072174638, + "learning_rate": 8.577307195460727e-06, + "loss": 1.1669, + "step": 92988 + }, + { + "epoch": 1.12, + "grad_norm": 9.941284166761962, + "learning_rate": 8.576728962114713e-06, + "loss": 1.3089, + "step": 92991 + }, + { + "epoch": 1.12, + "grad_norm": 13.573749080325083, + "learning_rate": 8.576150733625806e-06, + "loss": 1.1365, + "step": 92994 + }, + { + "epoch": 1.12, + "grad_norm": 7.205665406418131, + "learning_rate": 8.575572509995986e-06, + "loss": 1.3569, + "step": 92997 + }, + { + "epoch": 1.12, + "grad_norm": 3.4769895103693407, + "learning_rate": 8.574994291227222e-06, + "loss": 1.1461, + "step": 93000 + }, + { + "epoch": 1.12, + "grad_norm": 4.838948819815804, + "learning_rate": 8.57441607732149e-06, + "loss": 1.2164, + "step": 93003 + }, + { + "epoch": 1.12, + "grad_norm": 8.781814496421916, + "learning_rate": 8.573837868280766e-06, + "loss": 1.215, + "step": 93006 + }, + { + "epoch": 1.12, + "grad_norm": 13.417813131395581, + "learning_rate": 8.573259664107017e-06, + "loss": 1.2302, + "step": 93009 + }, + { + "epoch": 1.12, + "grad_norm": 6.198853460670032, + "learning_rate": 8.572681464802218e-06, + "loss": 1.525, + "step": 93012 + }, + { + "epoch": 1.12, + "grad_norm": 8.875986525654312, + "learning_rate": 8.572103270368343e-06, + "loss": 0.9782, + "step": 93015 + }, + { + "epoch": 1.12, + "grad_norm": 7.184132451143153, + "learning_rate": 8.571525080807366e-06, + "loss": 1.343, + "step": 93018 + }, + { + "epoch": 1.12, + "grad_norm": 21.766657703677467, + "learning_rate": 8.57094689612126e-06, + "loss": 1.2453, + "step": 93021 + }, + { + "epoch": 1.12, + "grad_norm": 11.320813122059802, + "learning_rate": 8.570368716311995e-06, + "loss": 1.2576, + "step": 93024 + }, + { + "epoch": 1.12, + "grad_norm": 11.091547312907464, + "learning_rate": 8.56979054138155e-06, + "loss": 1.472, + "step": 93027 + }, + { + "epoch": 1.12, + "grad_norm": 4.0581278624498776, + "learning_rate": 8.56921237133189e-06, + "loss": 1.154, + "step": 93030 + }, + { + "epoch": 1.12, + "grad_norm": 9.649308791902014, + "learning_rate": 8.568634206165e-06, + "loss": 1.1617, + "step": 93033 + }, + { + "epoch": 1.12, + "grad_norm": 17.41331150233712, + "learning_rate": 8.56805604588284e-06, + "loss": 0.9578, + "step": 93036 + }, + { + "epoch": 1.12, + "grad_norm": 17.89668225864532, + "learning_rate": 8.567477890487392e-06, + "loss": 1.0759, + "step": 93039 + }, + { + "epoch": 1.12, + "grad_norm": 5.193806973213431, + "learning_rate": 8.566899739980622e-06, + "loss": 1.1892, + "step": 93042 + }, + { + "epoch": 1.12, + "grad_norm": 18.163517445087663, + "learning_rate": 8.566321594364514e-06, + "loss": 1.2499, + "step": 93045 + }, + { + "epoch": 1.12, + "grad_norm": 10.014352281398399, + "learning_rate": 8.565743453641027e-06, + "loss": 1.2541, + "step": 93048 + }, + { + "epoch": 1.12, + "grad_norm": 14.90771387619373, + "learning_rate": 8.565165317812144e-06, + "loss": 1.0727, + "step": 93051 + }, + { + "epoch": 1.12, + "grad_norm": 6.110235915053856, + "learning_rate": 8.564587186879833e-06, + "loss": 1.1638, + "step": 93054 + }, + { + "epoch": 1.12, + "grad_norm": 11.996559555333715, + "learning_rate": 8.564009060846067e-06, + "loss": 1.4308, + "step": 93057 + }, + { + "epoch": 1.12, + "grad_norm": 17.333784858654518, + "learning_rate": 8.56343093971283e-06, + "loss": 1.1638, + "step": 93060 + }, + { + "epoch": 1.12, + "grad_norm": 38.19498551799819, + "learning_rate": 8.56285282348208e-06, + "loss": 1.3911, + "step": 93063 + }, + { + "epoch": 1.12, + "grad_norm": 10.466155077477334, + "learning_rate": 8.562274712155793e-06, + "loss": 1.1469, + "step": 93066 + }, + { + "epoch": 1.12, + "grad_norm": 10.232109528087145, + "learning_rate": 8.561696605735948e-06, + "loss": 0.9299, + "step": 93069 + }, + { + "epoch": 1.12, + "grad_norm": 8.174432854774276, + "learning_rate": 8.561118504224517e-06, + "loss": 1.4523, + "step": 93072 + }, + { + "epoch": 1.12, + "grad_norm": 12.794195439479353, + "learning_rate": 8.560540407623464e-06, + "loss": 1.1837, + "step": 93075 + }, + { + "epoch": 1.12, + "grad_norm": 9.07572734684219, + "learning_rate": 8.55996231593477e-06, + "loss": 1.1297, + "step": 93078 + }, + { + "epoch": 1.12, + "grad_norm": 6.081199585595496, + "learning_rate": 8.559384229160408e-06, + "loss": 1.3252, + "step": 93081 + }, + { + "epoch": 1.12, + "grad_norm": 22.950818349249197, + "learning_rate": 8.558806147302345e-06, + "loss": 1.0596, + "step": 93084 + }, + { + "epoch": 1.12, + "grad_norm": 16.401808217472478, + "learning_rate": 8.558228070362563e-06, + "loss": 1.0524, + "step": 93087 + }, + { + "epoch": 1.12, + "grad_norm": 6.605051075804091, + "learning_rate": 8.557649998343025e-06, + "loss": 1.275, + "step": 93090 + }, + { + "epoch": 1.12, + "grad_norm": 10.63392614732676, + "learning_rate": 8.55707193124571e-06, + "loss": 1.1235, + "step": 93093 + }, + { + "epoch": 1.12, + "grad_norm": 28.982683411152635, + "learning_rate": 8.556493869072586e-06, + "loss": 1.3955, + "step": 93096 + }, + { + "epoch": 1.12, + "grad_norm": 3.234458955454091, + "learning_rate": 8.555915811825635e-06, + "loss": 0.9855, + "step": 93099 + }, + { + "epoch": 1.12, + "grad_norm": 11.429332413550073, + "learning_rate": 8.555337759506816e-06, + "loss": 0.9835, + "step": 93102 + }, + { + "epoch": 1.12, + "grad_norm": 9.2525278198976, + "learning_rate": 8.554759712118113e-06, + "loss": 1.1392, + "step": 93105 + }, + { + "epoch": 1.12, + "grad_norm": 7.825105305086947, + "learning_rate": 8.554181669661492e-06, + "loss": 1.2799, + "step": 93108 + }, + { + "epoch": 1.12, + "grad_norm": 2.4831749975334194, + "learning_rate": 8.553603632138929e-06, + "loss": 1.2097, + "step": 93111 + }, + { + "epoch": 1.12, + "grad_norm": 5.312471487726923, + "learning_rate": 8.553025599552396e-06, + "loss": 1.2949, + "step": 93114 + }, + { + "epoch": 1.12, + "grad_norm": 39.214163771354045, + "learning_rate": 8.552447571903868e-06, + "loss": 1.2317, + "step": 93117 + }, + { + "epoch": 1.12, + "grad_norm": 23.22804574936018, + "learning_rate": 8.551869549195311e-06, + "loss": 1.0786, + "step": 93120 + }, + { + "epoch": 1.12, + "grad_norm": 5.995292271304497, + "learning_rate": 8.551291531428704e-06, + "loss": 1.3084, + "step": 93123 + }, + { + "epoch": 1.12, + "grad_norm": 5.807926339746982, + "learning_rate": 8.550713518606018e-06, + "loss": 0.8765, + "step": 93126 + }, + { + "epoch": 1.12, + "grad_norm": 14.878863406828769, + "learning_rate": 8.550135510729222e-06, + "loss": 1.0742, + "step": 93129 + }, + { + "epoch": 1.12, + "grad_norm": 7.754308104169995, + "learning_rate": 8.549557507800292e-06, + "loss": 1.4105, + "step": 93132 + }, + { + "epoch": 1.12, + "grad_norm": 15.253726099746412, + "learning_rate": 8.548979509821201e-06, + "loss": 1.3224, + "step": 93135 + }, + { + "epoch": 1.12, + "grad_norm": 90.28660924915292, + "learning_rate": 8.548401516793917e-06, + "loss": 1.4548, + "step": 93138 + }, + { + "epoch": 1.12, + "grad_norm": 20.24719583636287, + "learning_rate": 8.547823528720421e-06, + "loss": 1.1359, + "step": 93141 + }, + { + "epoch": 1.12, + "grad_norm": 12.497219315581027, + "learning_rate": 8.547245545602676e-06, + "loss": 1.0738, + "step": 93144 + }, + { + "epoch": 1.12, + "grad_norm": 9.692949139339547, + "learning_rate": 8.54666756744266e-06, + "loss": 0.9088, + "step": 93147 + }, + { + "epoch": 1.12, + "grad_norm": 5.923869395597586, + "learning_rate": 8.546089594242343e-06, + "loss": 1.1612, + "step": 93150 + }, + { + "epoch": 1.12, + "grad_norm": 12.516783661192166, + "learning_rate": 8.545511626003703e-06, + "loss": 0.9642, + "step": 93153 + }, + { + "epoch": 1.12, + "grad_norm": 18.03772155709524, + "learning_rate": 8.544933662728702e-06, + "loss": 1.3263, + "step": 93156 + }, + { + "epoch": 1.12, + "grad_norm": 9.727929587624136, + "learning_rate": 8.544355704419322e-06, + "loss": 1.3332, + "step": 93159 + }, + { + "epoch": 1.12, + "grad_norm": 2.055524039997583, + "learning_rate": 8.54377775107753e-06, + "loss": 1.4622, + "step": 93162 + }, + { + "epoch": 1.12, + "grad_norm": 12.9551057365114, + "learning_rate": 8.543199802705301e-06, + "loss": 1.3185, + "step": 93165 + }, + { + "epoch": 1.12, + "grad_norm": 52.48438813436388, + "learning_rate": 8.542621859304609e-06, + "loss": 1.1454, + "step": 93168 + }, + { + "epoch": 1.12, + "grad_norm": 9.161456397852639, + "learning_rate": 8.542043920877418e-06, + "loss": 1.0741, + "step": 93171 + }, + { + "epoch": 1.12, + "grad_norm": 11.475378652579604, + "learning_rate": 8.541465987425708e-06, + "loss": 0.9438, + "step": 93174 + }, + { + "epoch": 1.12, + "grad_norm": 4.583863430703663, + "learning_rate": 8.54088805895145e-06, + "loss": 0.7387, + "step": 93177 + }, + { + "epoch": 1.12, + "grad_norm": 8.967098186863906, + "learning_rate": 8.540310135456619e-06, + "loss": 1.4639, + "step": 93180 + }, + { + "epoch": 1.12, + "grad_norm": 27.781960588397872, + "learning_rate": 8.539732216943178e-06, + "loss": 0.8233, + "step": 93183 + }, + { + "epoch": 1.12, + "grad_norm": 3.7022083746264545, + "learning_rate": 8.539154303413106e-06, + "loss": 1.0665, + "step": 93186 + }, + { + "epoch": 1.12, + "grad_norm": 8.270974084291451, + "learning_rate": 8.538576394868375e-06, + "loss": 1.4803, + "step": 93189 + }, + { + "epoch": 1.12, + "grad_norm": 7.3005542317773715, + "learning_rate": 8.537998491310954e-06, + "loss": 0.8256, + "step": 93192 + }, + { + "epoch": 1.12, + "grad_norm": 19.24475305687266, + "learning_rate": 8.537420592742825e-06, + "loss": 1.2457, + "step": 93195 + }, + { + "epoch": 1.12, + "grad_norm": 2.4876337298783877, + "learning_rate": 8.536842699165946e-06, + "loss": 0.8993, + "step": 93198 + }, + { + "epoch": 1.12, + "grad_norm": 19.70093729535404, + "learning_rate": 8.536264810582297e-06, + "loss": 1.381, + "step": 93201 + }, + { + "epoch": 1.12, + "grad_norm": 7.431977815693235, + "learning_rate": 8.535686926993849e-06, + "loss": 1.3203, + "step": 93204 + }, + { + "epoch": 1.12, + "grad_norm": 10.96023617340228, + "learning_rate": 8.535109048402577e-06, + "loss": 1.0942, + "step": 93207 + }, + { + "epoch": 1.12, + "grad_norm": 7.360518379438733, + "learning_rate": 8.534531174810449e-06, + "loss": 1.3763, + "step": 93210 + }, + { + "epoch": 1.12, + "grad_norm": 6.580533624762253, + "learning_rate": 8.533953306219436e-06, + "loss": 1.1417, + "step": 93213 + }, + { + "epoch": 1.12, + "grad_norm": 14.816792520055449, + "learning_rate": 8.53337544263151e-06, + "loss": 1.4448, + "step": 93216 + }, + { + "epoch": 1.12, + "grad_norm": 14.816871993746519, + "learning_rate": 8.53279758404865e-06, + "loss": 1.4343, + "step": 93219 + }, + { + "epoch": 1.12, + "grad_norm": 7.7873418798921925, + "learning_rate": 8.532219730472824e-06, + "loss": 1.1179, + "step": 93222 + }, + { + "epoch": 1.12, + "grad_norm": 22.99286119080968, + "learning_rate": 8.531641881906e-06, + "loss": 0.9948, + "step": 93225 + }, + { + "epoch": 1.12, + "grad_norm": 5.039889617855216, + "learning_rate": 8.531064038350154e-06, + "loss": 1.2832, + "step": 93228 + }, + { + "epoch": 1.12, + "grad_norm": 11.0350402198206, + "learning_rate": 8.530486199807258e-06, + "loss": 1.2043, + "step": 93231 + }, + { + "epoch": 1.12, + "grad_norm": 7.082868090709982, + "learning_rate": 8.529908366279287e-06, + "loss": 1.2318, + "step": 93234 + }, + { + "epoch": 1.12, + "grad_norm": 3.4350246077930096, + "learning_rate": 8.529330537768203e-06, + "loss": 1.1734, + "step": 93237 + }, + { + "epoch": 1.12, + "grad_norm": 10.854058224786053, + "learning_rate": 8.528752714275987e-06, + "loss": 1.4764, + "step": 93240 + }, + { + "epoch": 1.12, + "grad_norm": 16.871367191366907, + "learning_rate": 8.528174895804608e-06, + "loss": 1.5638, + "step": 93243 + }, + { + "epoch": 1.12, + "grad_norm": 18.454263110002504, + "learning_rate": 8.527597082356036e-06, + "loss": 1.2139, + "step": 93246 + }, + { + "epoch": 1.12, + "grad_norm": 8.271750667797114, + "learning_rate": 8.52701927393225e-06, + "loss": 0.9205, + "step": 93249 + }, + { + "epoch": 1.12, + "grad_norm": 8.628852086891282, + "learning_rate": 8.526441470535212e-06, + "loss": 1.4987, + "step": 93252 + }, + { + "epoch": 1.12, + "grad_norm": 8.029900752592702, + "learning_rate": 8.525863672166898e-06, + "loss": 1.169, + "step": 93255 + }, + { + "epoch": 1.12, + "grad_norm": 11.313568817940762, + "learning_rate": 8.525285878829282e-06, + "loss": 0.9739, + "step": 93258 + }, + { + "epoch": 1.12, + "grad_norm": 7.671673584440915, + "learning_rate": 8.524708090524336e-06, + "loss": 1.2715, + "step": 93261 + }, + { + "epoch": 1.12, + "grad_norm": 8.857147382095587, + "learning_rate": 8.524130307254028e-06, + "loss": 1.7547, + "step": 93264 + }, + { + "epoch": 1.12, + "grad_norm": 21.594076342025758, + "learning_rate": 8.52355252902033e-06, + "loss": 1.5366, + "step": 93267 + }, + { + "epoch": 1.12, + "grad_norm": 3.6097223774702223, + "learning_rate": 8.522974755825215e-06, + "loss": 1.0516, + "step": 93270 + }, + { + "epoch": 1.12, + "grad_norm": 11.888190795009042, + "learning_rate": 8.52239698767066e-06, + "loss": 1.2683, + "step": 93273 + }, + { + "epoch": 1.12, + "grad_norm": 4.616053971979683, + "learning_rate": 8.52181922455863e-06, + "loss": 1.223, + "step": 93276 + }, + { + "epoch": 1.12, + "grad_norm": 9.05103247254442, + "learning_rate": 8.521241466491096e-06, + "loss": 1.2417, + "step": 93279 + }, + { + "epoch": 1.12, + "grad_norm": 12.196889691913436, + "learning_rate": 8.520663713470033e-06, + "loss": 1.0929, + "step": 93282 + }, + { + "epoch": 1.12, + "grad_norm": 13.228007646725052, + "learning_rate": 8.520085965497414e-06, + "loss": 1.2258, + "step": 93285 + }, + { + "epoch": 1.12, + "grad_norm": 10.23279885713832, + "learning_rate": 8.51950822257521e-06, + "loss": 1.3222, + "step": 93288 + }, + { + "epoch": 1.12, + "grad_norm": 3.63088876061491, + "learning_rate": 8.518930484705386e-06, + "loss": 1.2006, + "step": 93291 + }, + { + "epoch": 1.12, + "grad_norm": 21.193622741901663, + "learning_rate": 8.51835275188992e-06, + "loss": 1.2575, + "step": 93294 + }, + { + "epoch": 1.12, + "grad_norm": 10.551656239503448, + "learning_rate": 8.517775024130784e-06, + "loss": 1.3464, + "step": 93297 + }, + { + "epoch": 1.12, + "grad_norm": 27.85075639178114, + "learning_rate": 8.517197301429946e-06, + "loss": 1.4674, + "step": 93300 + }, + { + "epoch": 1.12, + "grad_norm": 8.01600291738502, + "learning_rate": 8.516619583789384e-06, + "loss": 1.0519, + "step": 93303 + }, + { + "epoch": 1.12, + "grad_norm": 5.872411327939731, + "learning_rate": 8.516041871211062e-06, + "loss": 1.2099, + "step": 93306 + }, + { + "epoch": 1.12, + "grad_norm": 17.724752462282378, + "learning_rate": 8.515464163696954e-06, + "loss": 1.2186, + "step": 93309 + }, + { + "epoch": 1.12, + "grad_norm": 10.08769006628507, + "learning_rate": 8.51488646124903e-06, + "loss": 1.5293, + "step": 93312 + }, + { + "epoch": 1.12, + "grad_norm": 7.9071577792139145, + "learning_rate": 8.51430876386927e-06, + "loss": 1.0717, + "step": 93315 + }, + { + "epoch": 1.12, + "grad_norm": 6.291037327012728, + "learning_rate": 8.513731071559635e-06, + "loss": 1.0278, + "step": 93318 + }, + { + "epoch": 1.12, + "grad_norm": 31.748328151901518, + "learning_rate": 8.513153384322099e-06, + "loss": 0.9827, + "step": 93321 + }, + { + "epoch": 1.12, + "grad_norm": 9.749549296580897, + "learning_rate": 8.512575702158636e-06, + "loss": 1.7513, + "step": 93324 + }, + { + "epoch": 1.12, + "grad_norm": 13.797048741526478, + "learning_rate": 8.511998025071217e-06, + "loss": 1.1155, + "step": 93327 + }, + { + "epoch": 1.12, + "grad_norm": 5.7025009680583905, + "learning_rate": 8.511420353061817e-06, + "loss": 1.7116, + "step": 93330 + }, + { + "epoch": 1.12, + "grad_norm": 10.93804183381154, + "learning_rate": 8.510842686132396e-06, + "loss": 0.8671, + "step": 93333 + }, + { + "epoch": 1.12, + "grad_norm": 8.21939863704492, + "learning_rate": 8.510265024284936e-06, + "loss": 0.9973, + "step": 93336 + }, + { + "epoch": 1.12, + "grad_norm": 13.431495341154832, + "learning_rate": 8.509687367521404e-06, + "loss": 1.1918, + "step": 93339 + }, + { + "epoch": 1.12, + "grad_norm": 2.721942007773412, + "learning_rate": 8.509109715843775e-06, + "loss": 1.016, + "step": 93342 + }, + { + "epoch": 1.12, + "grad_norm": 15.913904907698798, + "learning_rate": 8.508532069254014e-06, + "loss": 1.2259, + "step": 93345 + }, + { + "epoch": 1.12, + "grad_norm": 12.930233841708036, + "learning_rate": 8.507954427754098e-06, + "loss": 1.2161, + "step": 93348 + }, + { + "epoch": 1.12, + "grad_norm": 16.046392726481244, + "learning_rate": 8.507376791345993e-06, + "loss": 1.3509, + "step": 93351 + }, + { + "epoch": 1.12, + "grad_norm": 11.202762410527638, + "learning_rate": 8.506799160031673e-06, + "loss": 1.1892, + "step": 93354 + }, + { + "epoch": 1.12, + "grad_norm": 52.948007583023184, + "learning_rate": 8.506221533813115e-06, + "loss": 1.2966, + "step": 93357 + }, + { + "epoch": 1.12, + "grad_norm": 12.947469539586388, + "learning_rate": 8.505643912692283e-06, + "loss": 0.9085, + "step": 93360 + }, + { + "epoch": 1.12, + "grad_norm": 11.622602090774844, + "learning_rate": 8.505066296671148e-06, + "loss": 0.8912, + "step": 93363 + }, + { + "epoch": 1.12, + "grad_norm": 13.487846501705475, + "learning_rate": 8.504488685751682e-06, + "loss": 1.0999, + "step": 93366 + }, + { + "epoch": 1.12, + "grad_norm": 10.504234756367417, + "learning_rate": 8.503911079935863e-06, + "loss": 1.2599, + "step": 93369 + }, + { + "epoch": 1.12, + "grad_norm": 10.61430258402351, + "learning_rate": 8.503333479225653e-06, + "loss": 1.3995, + "step": 93372 + }, + { + "epoch": 1.12, + "grad_norm": 6.463354392624035, + "learning_rate": 8.502755883623027e-06, + "loss": 1.1114, + "step": 93375 + }, + { + "epoch": 1.12, + "grad_norm": 5.064830129550256, + "learning_rate": 8.502178293129954e-06, + "loss": 1.2698, + "step": 93378 + }, + { + "epoch": 1.12, + "grad_norm": 7.620771658370106, + "learning_rate": 8.50160070774841e-06, + "loss": 1.0278, + "step": 93381 + }, + { + "epoch": 1.12, + "grad_norm": 4.920177904799746, + "learning_rate": 8.501023127480363e-06, + "loss": 1.3784, + "step": 93384 + }, + { + "epoch": 1.12, + "grad_norm": 3.3212190730768643, + "learning_rate": 8.500445552327783e-06, + "loss": 1.4373, + "step": 93387 + }, + { + "epoch": 1.12, + "grad_norm": 43.52203828673676, + "learning_rate": 8.499867982292643e-06, + "loss": 1.3074, + "step": 93390 + }, + { + "epoch": 1.12, + "grad_norm": 5.527881162492409, + "learning_rate": 8.49929041737691e-06, + "loss": 1.3461, + "step": 93393 + }, + { + "epoch": 1.12, + "grad_norm": 2.6069573844686276, + "learning_rate": 8.498712857582565e-06, + "loss": 1.0566, + "step": 93396 + }, + { + "epoch": 1.12, + "grad_norm": 8.201147632262717, + "learning_rate": 8.498135302911565e-06, + "loss": 1.2152, + "step": 93399 + }, + { + "epoch": 1.12, + "grad_norm": 22.37293733848336, + "learning_rate": 8.497557753365891e-06, + "loss": 1.2138, + "step": 93402 + }, + { + "epoch": 1.12, + "grad_norm": 26.490382750434488, + "learning_rate": 8.496980208947511e-06, + "loss": 1.183, + "step": 93405 + }, + { + "epoch": 1.12, + "grad_norm": 29.128031000965574, + "learning_rate": 8.496402669658394e-06, + "loss": 1.3098, + "step": 93408 + }, + { + "epoch": 1.12, + "grad_norm": 15.520349775696914, + "learning_rate": 8.495825135500519e-06, + "loss": 1.6883, + "step": 93411 + }, + { + "epoch": 1.12, + "grad_norm": 13.221621524301959, + "learning_rate": 8.495247606475848e-06, + "loss": 1.105, + "step": 93414 + }, + { + "epoch": 1.12, + "grad_norm": 18.067381437541265, + "learning_rate": 8.494670082586354e-06, + "loss": 1.5547, + "step": 93417 + }, + { + "epoch": 1.12, + "grad_norm": 11.235692366971158, + "learning_rate": 8.494092563834007e-06, + "loss": 0.8798, + "step": 93420 + }, + { + "epoch": 1.12, + "grad_norm": 13.831999844651495, + "learning_rate": 8.493515050220785e-06, + "loss": 1.2402, + "step": 93423 + }, + { + "epoch": 1.12, + "grad_norm": 6.579313692213909, + "learning_rate": 8.492937541748652e-06, + "loss": 1.1837, + "step": 93426 + }, + { + "epoch": 1.12, + "grad_norm": 25.93882801018075, + "learning_rate": 8.492360038419577e-06, + "loss": 1.1656, + "step": 93429 + }, + { + "epoch": 1.12, + "grad_norm": 25.171851141438808, + "learning_rate": 8.491782540235537e-06, + "loss": 1.2992, + "step": 93432 + }, + { + "epoch": 1.12, + "grad_norm": 3.4148550957124706, + "learning_rate": 8.491205047198497e-06, + "loss": 0.8221, + "step": 93435 + }, + { + "epoch": 1.12, + "grad_norm": 22.411959474200955, + "learning_rate": 8.490627559310434e-06, + "loss": 1.2295, + "step": 93438 + }, + { + "epoch": 1.12, + "grad_norm": 8.96901899982755, + "learning_rate": 8.490050076573313e-06, + "loss": 1.4208, + "step": 93441 + }, + { + "epoch": 1.12, + "grad_norm": 5.952884238946839, + "learning_rate": 8.48947259898911e-06, + "loss": 1.2987, + "step": 93444 + }, + { + "epoch": 1.12, + "grad_norm": 8.63711110563434, + "learning_rate": 8.48889512655979e-06, + "loss": 1.1011, + "step": 93447 + }, + { + "epoch": 1.12, + "grad_norm": 14.357262503376546, + "learning_rate": 8.48831765928733e-06, + "loss": 1.1164, + "step": 93450 + }, + { + "epoch": 1.12, + "grad_norm": 5.104683101366748, + "learning_rate": 8.487740197173694e-06, + "loss": 0.9667, + "step": 93453 + }, + { + "epoch": 1.12, + "grad_norm": 8.929438287003851, + "learning_rate": 8.487162740220858e-06, + "loss": 1.2382, + "step": 93456 + }, + { + "epoch": 1.12, + "grad_norm": 8.780692427614232, + "learning_rate": 8.486585288430787e-06, + "loss": 1.2957, + "step": 93459 + }, + { + "epoch": 1.12, + "grad_norm": 2.67990790343444, + "learning_rate": 8.486007841805457e-06, + "loss": 1.2438, + "step": 93462 + }, + { + "epoch": 1.12, + "grad_norm": 15.103151510845182, + "learning_rate": 8.485430400346842e-06, + "loss": 1.0268, + "step": 93465 + }, + { + "epoch": 1.12, + "grad_norm": 25.631560902771866, + "learning_rate": 8.484852964056903e-06, + "loss": 1.3005, + "step": 93468 + }, + { + "epoch": 1.12, + "grad_norm": 4.116970814768388, + "learning_rate": 8.484275532937615e-06, + "loss": 1.061, + "step": 93471 + }, + { + "epoch": 1.12, + "grad_norm": 5.837150977548384, + "learning_rate": 8.483698106990947e-06, + "loss": 1.1423, + "step": 93474 + }, + { + "epoch": 1.12, + "grad_norm": 13.110637976107833, + "learning_rate": 8.483120686218877e-06, + "loss": 1.4006, + "step": 93477 + }, + { + "epoch": 1.12, + "grad_norm": 3.536249780256475, + "learning_rate": 8.482543270623367e-06, + "loss": 1.3436, + "step": 93480 + }, + { + "epoch": 1.12, + "grad_norm": 9.743602424421022, + "learning_rate": 8.481965860206387e-06, + "loss": 1.0645, + "step": 93483 + }, + { + "epoch": 1.12, + "grad_norm": 6.092070375998646, + "learning_rate": 8.481388454969914e-06, + "loss": 1.2305, + "step": 93486 + }, + { + "epoch": 1.12, + "grad_norm": 4.219689292027788, + "learning_rate": 8.480811054915913e-06, + "loss": 1.154, + "step": 93489 + }, + { + "epoch": 1.12, + "grad_norm": 19.45861297143789, + "learning_rate": 8.480233660046361e-06, + "loss": 0.9161, + "step": 93492 + }, + { + "epoch": 1.12, + "grad_norm": 14.89639831267446, + "learning_rate": 8.479656270363219e-06, + "loss": 1.3562, + "step": 93495 + }, + { + "epoch": 1.12, + "grad_norm": 2.861789134486248, + "learning_rate": 8.479078885868466e-06, + "loss": 1.1479, + "step": 93498 + }, + { + "epoch": 1.12, + "grad_norm": 9.107083796798998, + "learning_rate": 8.478501506564066e-06, + "loss": 1.1707, + "step": 93501 + }, + { + "epoch": 1.12, + "grad_norm": 7.9579157996040255, + "learning_rate": 8.477924132451995e-06, + "loss": 1.2805, + "step": 93504 + }, + { + "epoch": 1.12, + "grad_norm": 16.55870103131258, + "learning_rate": 8.477346763534218e-06, + "loss": 1.3946, + "step": 93507 + }, + { + "epoch": 1.12, + "grad_norm": 8.623472557703183, + "learning_rate": 8.47676939981271e-06, + "loss": 1.0061, + "step": 93510 + }, + { + "epoch": 1.12, + "grad_norm": 39.35189216521326, + "learning_rate": 8.476192041289438e-06, + "loss": 1.2895, + "step": 93513 + }, + { + "epoch": 1.12, + "grad_norm": 32.66261955479693, + "learning_rate": 8.475614687966374e-06, + "loss": 1.2897, + "step": 93516 + }, + { + "epoch": 1.12, + "grad_norm": 29.70081322171474, + "learning_rate": 8.47503733984549e-06, + "loss": 1.1304, + "step": 93519 + }, + { + "epoch": 1.12, + "grad_norm": 4.199657908282196, + "learning_rate": 8.474459996928754e-06, + "loss": 1.1559, + "step": 93522 + }, + { + "epoch": 1.12, + "grad_norm": 3.075661999097063, + "learning_rate": 8.473882659218134e-06, + "loss": 1.2749, + "step": 93525 + }, + { + "epoch": 1.12, + "grad_norm": 6.185355097083879, + "learning_rate": 8.473305326715605e-06, + "loss": 1.6618, + "step": 93528 + }, + { + "epoch": 1.12, + "grad_norm": 36.40436493509173, + "learning_rate": 8.472727999423134e-06, + "loss": 1.1275, + "step": 93531 + }, + { + "epoch": 1.12, + "grad_norm": 20.419555761470097, + "learning_rate": 8.472150677342695e-06, + "loss": 1.0566, + "step": 93534 + }, + { + "epoch": 1.12, + "grad_norm": 15.89750311602247, + "learning_rate": 8.471573360476251e-06, + "loss": 1.2112, + "step": 93537 + }, + { + "epoch": 1.12, + "grad_norm": 7.698356508868736, + "learning_rate": 8.470996048825779e-06, + "loss": 1.4841, + "step": 93540 + }, + { + "epoch": 1.12, + "grad_norm": 19.5928884256514, + "learning_rate": 8.470418742393243e-06, + "loss": 1.7216, + "step": 93543 + }, + { + "epoch": 1.12, + "grad_norm": 7.777654548222956, + "learning_rate": 8.469841441180624e-06, + "loss": 1.3356, + "step": 93546 + }, + { + "epoch": 1.12, + "grad_norm": 10.541166463959794, + "learning_rate": 8.469264145189879e-06, + "loss": 1.2564, + "step": 93549 + }, + { + "epoch": 1.12, + "grad_norm": 9.531761178948402, + "learning_rate": 8.468686854422987e-06, + "loss": 0.9059, + "step": 93552 + }, + { + "epoch": 1.12, + "grad_norm": 24.100394985272295, + "learning_rate": 8.468109568881912e-06, + "loss": 1.4617, + "step": 93555 + }, + { + "epoch": 1.13, + "grad_norm": 16.6908355277405, + "learning_rate": 8.467532288568634e-06, + "loss": 1.2184, + "step": 93558 + }, + { + "epoch": 1.13, + "grad_norm": 8.575526126220712, + "learning_rate": 8.466955013485109e-06, + "loss": 1.5616, + "step": 93561 + }, + { + "epoch": 1.13, + "grad_norm": 11.304833975770551, + "learning_rate": 8.466377743633319e-06, + "loss": 1.1533, + "step": 93564 + }, + { + "epoch": 1.13, + "grad_norm": 7.528003431167032, + "learning_rate": 8.465800479015225e-06, + "loss": 1.0429, + "step": 93567 + }, + { + "epoch": 1.13, + "grad_norm": 14.163253362953476, + "learning_rate": 8.465223219632805e-06, + "loss": 1.0736, + "step": 93570 + }, + { + "epoch": 1.13, + "grad_norm": 12.886705395550198, + "learning_rate": 8.464645965488025e-06, + "loss": 1.2095, + "step": 93573 + }, + { + "epoch": 1.13, + "grad_norm": 16.478270976700557, + "learning_rate": 8.464068716582857e-06, + "loss": 1.2362, + "step": 93576 + }, + { + "epoch": 1.13, + "grad_norm": 13.888336489961283, + "learning_rate": 8.463491472919264e-06, + "loss": 1.3082, + "step": 93579 + }, + { + "epoch": 1.13, + "grad_norm": 15.257072938703407, + "learning_rate": 8.462914234499225e-06, + "loss": 1.297, + "step": 93582 + }, + { + "epoch": 1.13, + "grad_norm": 14.390454014967938, + "learning_rate": 8.462337001324707e-06, + "loss": 1.3598, + "step": 93585 + }, + { + "epoch": 1.13, + "grad_norm": 10.438579081734447, + "learning_rate": 8.461759773397675e-06, + "loss": 1.176, + "step": 93588 + }, + { + "epoch": 1.13, + "grad_norm": 3.543771679059328, + "learning_rate": 8.461182550720104e-06, + "loss": 1.4066, + "step": 93591 + }, + { + "epoch": 1.13, + "grad_norm": 7.064066514816289, + "learning_rate": 8.460605333293963e-06, + "loss": 1.5235, + "step": 93594 + }, + { + "epoch": 1.13, + "grad_norm": 11.403342769095959, + "learning_rate": 8.46002812112122e-06, + "loss": 0.921, + "step": 93597 + }, + { + "epoch": 1.13, + "grad_norm": 17.872369263063753, + "learning_rate": 8.459450914203853e-06, + "loss": 1.408, + "step": 93600 + }, + { + "epoch": 1.13, + "grad_norm": 21.720191281304135, + "learning_rate": 8.458873712543818e-06, + "loss": 0.9966, + "step": 93603 + }, + { + "epoch": 1.13, + "grad_norm": 30.10108068584492, + "learning_rate": 8.458296516143095e-06, + "loss": 1.108, + "step": 93606 + }, + { + "epoch": 1.13, + "grad_norm": 5.681627544021341, + "learning_rate": 8.457719325003649e-06, + "loss": 0.9362, + "step": 93609 + }, + { + "epoch": 1.13, + "grad_norm": 4.894783992802964, + "learning_rate": 8.457142139127454e-06, + "loss": 1.5392, + "step": 93612 + }, + { + "epoch": 1.13, + "grad_norm": 7.044156495429403, + "learning_rate": 8.456564958516476e-06, + "loss": 1.021, + "step": 93615 + }, + { + "epoch": 1.13, + "grad_norm": 53.059012333279476, + "learning_rate": 8.455987783172685e-06, + "loss": 1.3456, + "step": 93618 + }, + { + "epoch": 1.13, + "grad_norm": 20.825055651898133, + "learning_rate": 8.45541061309805e-06, + "loss": 1.0816, + "step": 93621 + }, + { + "epoch": 1.13, + "grad_norm": 9.79237307331197, + "learning_rate": 8.454833448294545e-06, + "loss": 1.2168, + "step": 93624 + }, + { + "epoch": 1.13, + "grad_norm": 8.703574738906763, + "learning_rate": 8.45425628876414e-06, + "loss": 1.2072, + "step": 93627 + }, + { + "epoch": 1.13, + "grad_norm": 8.2294322346576, + "learning_rate": 8.453679134508796e-06, + "loss": 1.3126, + "step": 93630 + }, + { + "epoch": 1.13, + "grad_norm": 4.9826461692770305, + "learning_rate": 8.453101985530488e-06, + "loss": 1.3157, + "step": 93633 + }, + { + "epoch": 1.13, + "grad_norm": 23.29013656618887, + "learning_rate": 8.452524841831187e-06, + "loss": 0.8978, + "step": 93636 + }, + { + "epoch": 1.13, + "grad_norm": 13.214772005501347, + "learning_rate": 8.451947703412864e-06, + "loss": 0.8171, + "step": 93639 + }, + { + "epoch": 1.13, + "grad_norm": 8.954255086978632, + "learning_rate": 8.451370570277483e-06, + "loss": 1.2371, + "step": 93642 + }, + { + "epoch": 1.13, + "grad_norm": 10.975738016876038, + "learning_rate": 8.450793442427016e-06, + "loss": 0.9773, + "step": 93645 + }, + { + "epoch": 1.13, + "grad_norm": 12.648724344644007, + "learning_rate": 8.450216319863435e-06, + "loss": 1.3771, + "step": 93648 + }, + { + "epoch": 1.13, + "grad_norm": 21.90739070428694, + "learning_rate": 8.449639202588705e-06, + "loss": 0.938, + "step": 93651 + }, + { + "epoch": 1.13, + "grad_norm": 5.84384842850082, + "learning_rate": 8.449062090604801e-06, + "loss": 1.176, + "step": 93654 + }, + { + "epoch": 1.13, + "grad_norm": 7.975081648573274, + "learning_rate": 8.448484983913687e-06, + "loss": 1.1983, + "step": 93657 + }, + { + "epoch": 1.13, + "grad_norm": 3.6846502721110235, + "learning_rate": 8.447907882517336e-06, + "loss": 1.1625, + "step": 93660 + }, + { + "epoch": 1.13, + "grad_norm": 7.9763531794545734, + "learning_rate": 8.447330786417714e-06, + "loss": 1.2279, + "step": 93663 + }, + { + "epoch": 1.13, + "grad_norm": 4.377442185565584, + "learning_rate": 8.446753695616799e-06, + "loss": 1.4535, + "step": 93666 + }, + { + "epoch": 1.13, + "grad_norm": 29.115574370469783, + "learning_rate": 8.44617661011655e-06, + "loss": 1.2149, + "step": 93669 + }, + { + "epoch": 1.13, + "grad_norm": 6.382182337865355, + "learning_rate": 8.44559952991894e-06, + "loss": 1.1141, + "step": 93672 + }, + { + "epoch": 1.13, + "grad_norm": 18.369279098129688, + "learning_rate": 8.445022455025938e-06, + "loss": 1.2645, + "step": 93675 + }, + { + "epoch": 1.13, + "grad_norm": 37.18283384310354, + "learning_rate": 8.444445385439517e-06, + "loss": 1.2154, + "step": 93678 + }, + { + "epoch": 1.13, + "grad_norm": 31.217792915675393, + "learning_rate": 8.443868321161644e-06, + "loss": 1.0748, + "step": 93681 + }, + { + "epoch": 1.13, + "grad_norm": 3.4686604221597057, + "learning_rate": 8.443291262194285e-06, + "loss": 1.3042, + "step": 93684 + }, + { + "epoch": 1.13, + "grad_norm": 10.319613143454477, + "learning_rate": 8.442714208539411e-06, + "loss": 1.0886, + "step": 93687 + }, + { + "epoch": 1.13, + "grad_norm": 7.397962090071504, + "learning_rate": 8.442137160198995e-06, + "loss": 1.1976, + "step": 93690 + }, + { + "epoch": 1.13, + "grad_norm": 2.193490115887012, + "learning_rate": 8.441560117175007e-06, + "loss": 1.2455, + "step": 93693 + }, + { + "epoch": 1.13, + "grad_norm": 19.20825788690145, + "learning_rate": 8.44098307946941e-06, + "loss": 1.4602, + "step": 93696 + }, + { + "epoch": 1.13, + "grad_norm": 3.9365315863074533, + "learning_rate": 8.440406047084172e-06, + "loss": 1.1602, + "step": 93699 + }, + { + "epoch": 1.13, + "grad_norm": 17.093649401317624, + "learning_rate": 8.439829020021271e-06, + "loss": 1.2313, + "step": 93702 + }, + { + "epoch": 1.13, + "grad_norm": 5.909801303281867, + "learning_rate": 8.439251998282669e-06, + "loss": 1.2615, + "step": 93705 + }, + { + "epoch": 1.13, + "grad_norm": 9.924416372710498, + "learning_rate": 8.438674981870343e-06, + "loss": 1.1514, + "step": 93708 + }, + { + "epoch": 1.13, + "grad_norm": 4.149240877446641, + "learning_rate": 8.438097970786251e-06, + "loss": 1.1852, + "step": 93711 + }, + { + "epoch": 1.13, + "grad_norm": 9.283539382178851, + "learning_rate": 8.437520965032372e-06, + "loss": 1.1194, + "step": 93714 + }, + { + "epoch": 1.13, + "grad_norm": 4.644155329287178, + "learning_rate": 8.43694396461067e-06, + "loss": 1.1271, + "step": 93717 + }, + { + "epoch": 1.13, + "grad_norm": 5.959338700096501, + "learning_rate": 8.436366969523117e-06, + "loss": 1.3148, + "step": 93720 + }, + { + "epoch": 1.13, + "grad_norm": 10.658527218604682, + "learning_rate": 8.435789979771678e-06, + "loss": 1.5245, + "step": 93723 + }, + { + "epoch": 1.13, + "grad_norm": 14.839882370304437, + "learning_rate": 8.435212995358323e-06, + "loss": 1.3113, + "step": 93726 + }, + { + "epoch": 1.13, + "grad_norm": 12.27853917039176, + "learning_rate": 8.434636016285022e-06, + "loss": 1.1195, + "step": 93729 + }, + { + "epoch": 1.13, + "grad_norm": 28.430614104397687, + "learning_rate": 8.434059042553747e-06, + "loss": 1.0719, + "step": 93732 + }, + { + "epoch": 1.13, + "grad_norm": 15.084825213796694, + "learning_rate": 8.433482074166466e-06, + "loss": 1.342, + "step": 93735 + }, + { + "epoch": 1.13, + "grad_norm": 16.45357762722038, + "learning_rate": 8.432905111125143e-06, + "loss": 0.9659, + "step": 93738 + }, + { + "epoch": 1.13, + "grad_norm": 6.160457599170353, + "learning_rate": 8.432328153431752e-06, + "loss": 1.3597, + "step": 93741 + }, + { + "epoch": 1.13, + "grad_norm": 10.304625525079688, + "learning_rate": 8.43175120108826e-06, + "loss": 1.2257, + "step": 93744 + }, + { + "epoch": 1.13, + "grad_norm": 17.34869704629294, + "learning_rate": 8.431174254096639e-06, + "loss": 1.2383, + "step": 93747 + }, + { + "epoch": 1.13, + "grad_norm": 6.052305782732428, + "learning_rate": 8.43059731245885e-06, + "loss": 1.1391, + "step": 93750 + }, + { + "epoch": 1.13, + "grad_norm": 13.476129024763507, + "learning_rate": 8.430020376176868e-06, + "loss": 1.1098, + "step": 93753 + }, + { + "epoch": 1.13, + "grad_norm": 4.37054798664731, + "learning_rate": 8.429443445252664e-06, + "loss": 0.9876, + "step": 93756 + }, + { + "epoch": 1.13, + "grad_norm": 2.9233435515283794, + "learning_rate": 8.4288665196882e-06, + "loss": 0.9776, + "step": 93759 + }, + { + "epoch": 1.13, + "grad_norm": 9.731045103488821, + "learning_rate": 8.428289599485454e-06, + "loss": 1.1537, + "step": 93762 + }, + { + "epoch": 1.13, + "grad_norm": 12.17514889185242, + "learning_rate": 8.427712684646387e-06, + "loss": 1.3215, + "step": 93765 + }, + { + "epoch": 1.13, + "grad_norm": 4.579690247830863, + "learning_rate": 8.42713577517297e-06, + "loss": 1.1348, + "step": 93768 + }, + { + "epoch": 1.13, + "grad_norm": 10.131766971154667, + "learning_rate": 8.426558871067168e-06, + "loss": 1.2631, + "step": 93771 + }, + { + "epoch": 1.13, + "grad_norm": 15.576254499361093, + "learning_rate": 8.425981972330962e-06, + "loss": 1.0798, + "step": 93774 + }, + { + "epoch": 1.13, + "grad_norm": 10.47518363478538, + "learning_rate": 8.425405078966308e-06, + "loss": 1.3047, + "step": 93777 + }, + { + "epoch": 1.13, + "grad_norm": 2.8017395166661916, + "learning_rate": 8.424828190975178e-06, + "loss": 1.3853, + "step": 93780 + }, + { + "epoch": 1.13, + "grad_norm": 6.951228054129175, + "learning_rate": 8.424251308359542e-06, + "loss": 1.3618, + "step": 93783 + }, + { + "epoch": 1.13, + "grad_norm": 7.983879763592824, + "learning_rate": 8.42367443112137e-06, + "loss": 1.3652, + "step": 93786 + }, + { + "epoch": 1.13, + "grad_norm": 16.931434274465058, + "learning_rate": 8.423097559262634e-06, + "loss": 1.0676, + "step": 93789 + }, + { + "epoch": 1.13, + "grad_norm": 8.50099585986351, + "learning_rate": 8.42252069278529e-06, + "loss": 1.2581, + "step": 93792 + }, + { + "epoch": 1.13, + "grad_norm": 8.756269843467436, + "learning_rate": 8.421943831691318e-06, + "loss": 1.2206, + "step": 93795 + }, + { + "epoch": 1.13, + "grad_norm": 6.323509529064818, + "learning_rate": 8.421366975982682e-06, + "loss": 1.2742, + "step": 93798 + }, + { + "epoch": 1.13, + "grad_norm": 7.671351430595515, + "learning_rate": 8.420790125661358e-06, + "loss": 1.3365, + "step": 93801 + }, + { + "epoch": 1.13, + "grad_norm": 4.711295032496855, + "learning_rate": 8.420213280729302e-06, + "loss": 1.0887, + "step": 93804 + }, + { + "epoch": 1.13, + "grad_norm": 18.011271359229635, + "learning_rate": 8.419636441188491e-06, + "loss": 1.2962, + "step": 93807 + }, + { + "epoch": 1.13, + "grad_norm": 6.1551489168450315, + "learning_rate": 8.41905960704089e-06, + "loss": 1.133, + "step": 93810 + }, + { + "epoch": 1.13, + "grad_norm": 38.942382933569945, + "learning_rate": 8.41848277828847e-06, + "loss": 1.0121, + "step": 93813 + }, + { + "epoch": 1.13, + "grad_norm": 12.666896439981802, + "learning_rate": 8.417905954933201e-06, + "loss": 1.1368, + "step": 93816 + }, + { + "epoch": 1.13, + "grad_norm": 2.391916613066591, + "learning_rate": 8.417329136977048e-06, + "loss": 1.3853, + "step": 93819 + }, + { + "epoch": 1.13, + "grad_norm": 9.33755218554048, + "learning_rate": 8.41675232442198e-06, + "loss": 1.4261, + "step": 93822 + }, + { + "epoch": 1.13, + "grad_norm": 8.42040186167588, + "learning_rate": 8.416175517269964e-06, + "loss": 1.0145, + "step": 93825 + }, + { + "epoch": 1.13, + "grad_norm": 11.467070897547352, + "learning_rate": 8.415598715522975e-06, + "loss": 1.226, + "step": 93828 + }, + { + "epoch": 1.13, + "grad_norm": 9.153960641963186, + "learning_rate": 8.415021919182976e-06, + "loss": 1.4354, + "step": 93831 + }, + { + "epoch": 1.13, + "grad_norm": 10.026277196059942, + "learning_rate": 8.414445128251932e-06, + "loss": 1.288, + "step": 93834 + }, + { + "epoch": 1.13, + "grad_norm": 15.658992746209961, + "learning_rate": 8.413868342731818e-06, + "loss": 1.0809, + "step": 93837 + }, + { + "epoch": 1.13, + "grad_norm": 10.815417712743173, + "learning_rate": 8.4132915626246e-06, + "loss": 1.0758, + "step": 93840 + }, + { + "epoch": 1.13, + "grad_norm": 15.385470645236573, + "learning_rate": 8.412714787932248e-06, + "loss": 1.1553, + "step": 93843 + }, + { + "epoch": 1.13, + "grad_norm": 12.473753934741946, + "learning_rate": 8.412138018656727e-06, + "loss": 1.1902, + "step": 93846 + }, + { + "epoch": 1.13, + "grad_norm": 7.044032705195903, + "learning_rate": 8.411561254800007e-06, + "loss": 1.3977, + "step": 93849 + }, + { + "epoch": 1.13, + "grad_norm": 19.012083117824428, + "learning_rate": 8.410984496364056e-06, + "loss": 1.266, + "step": 93852 + }, + { + "epoch": 1.13, + "grad_norm": 7.858128869174761, + "learning_rate": 8.410407743350845e-06, + "loss": 1.1693, + "step": 93855 + }, + { + "epoch": 1.13, + "grad_norm": 2.6184339406190604, + "learning_rate": 8.409830995762337e-06, + "loss": 1.0484, + "step": 93858 + }, + { + "epoch": 1.13, + "grad_norm": 16.78916045922439, + "learning_rate": 8.409254253600506e-06, + "loss": 1.0228, + "step": 93861 + }, + { + "epoch": 1.13, + "grad_norm": 18.30129070670482, + "learning_rate": 8.408677516867312e-06, + "loss": 1.3332, + "step": 93864 + }, + { + "epoch": 1.13, + "grad_norm": 4.015953209112338, + "learning_rate": 8.40810078556473e-06, + "loss": 1.4956, + "step": 93867 + }, + { + "epoch": 1.13, + "grad_norm": 3.0375456033193386, + "learning_rate": 8.407524059694731e-06, + "loss": 1.1629, + "step": 93870 + }, + { + "epoch": 1.13, + "grad_norm": 5.529499233978845, + "learning_rate": 8.406947339259277e-06, + "loss": 1.0698, + "step": 93873 + }, + { + "epoch": 1.13, + "grad_norm": 11.984707686454643, + "learning_rate": 8.406370624260335e-06, + "loss": 1.1811, + "step": 93876 + }, + { + "epoch": 1.13, + "grad_norm": 8.866256399713112, + "learning_rate": 8.405793914699876e-06, + "loss": 1.1504, + "step": 93879 + }, + { + "epoch": 1.13, + "grad_norm": 4.964498976102068, + "learning_rate": 8.405217210579875e-06, + "loss": 1.0804, + "step": 93882 + }, + { + "epoch": 1.13, + "grad_norm": 3.6207558204195713, + "learning_rate": 8.404640511902288e-06, + "loss": 1.0489, + "step": 93885 + }, + { + "epoch": 1.13, + "grad_norm": 7.528623380308246, + "learning_rate": 8.404063818669087e-06, + "loss": 1.39, + "step": 93888 + }, + { + "epoch": 1.13, + "grad_norm": 4.913929845679775, + "learning_rate": 8.403487130882241e-06, + "loss": 1.1988, + "step": 93891 + }, + { + "epoch": 1.13, + "grad_norm": 7.610247509767298, + "learning_rate": 8.402910448543722e-06, + "loss": 1.1275, + "step": 93894 + }, + { + "epoch": 1.13, + "grad_norm": 15.547463786722552, + "learning_rate": 8.402333771655495e-06, + "loss": 1.1277, + "step": 93897 + }, + { + "epoch": 1.13, + "grad_norm": 5.8541481007299465, + "learning_rate": 8.401757100219524e-06, + "loss": 1.1345, + "step": 93900 + }, + { + "epoch": 1.13, + "grad_norm": 11.915790065570116, + "learning_rate": 8.401180434237782e-06, + "loss": 1.3821, + "step": 93903 + }, + { + "epoch": 1.13, + "grad_norm": 5.128583824002754, + "learning_rate": 8.400603773712232e-06, + "loss": 1.2568, + "step": 93906 + }, + { + "epoch": 1.13, + "grad_norm": 42.98669357424562, + "learning_rate": 8.400027118644852e-06, + "loss": 1.179, + "step": 93909 + }, + { + "epoch": 1.13, + "grad_norm": 5.909629501949201, + "learning_rate": 8.399450469037598e-06, + "loss": 1.2344, + "step": 93912 + }, + { + "epoch": 1.13, + "grad_norm": 11.949181200823878, + "learning_rate": 8.398873824892444e-06, + "loss": 1.182, + "step": 93915 + }, + { + "epoch": 1.13, + "grad_norm": 5.919654234306395, + "learning_rate": 8.398297186211356e-06, + "loss": 1.3357, + "step": 93918 + }, + { + "epoch": 1.13, + "grad_norm": 10.649585704957797, + "learning_rate": 8.397720552996302e-06, + "loss": 1.2744, + "step": 93921 + }, + { + "epoch": 1.13, + "grad_norm": 13.025393445672425, + "learning_rate": 8.397143925249254e-06, + "loss": 1.3156, + "step": 93924 + }, + { + "epoch": 1.13, + "grad_norm": 4.769761654887809, + "learning_rate": 8.396567302972176e-06, + "loss": 1.397, + "step": 93927 + }, + { + "epoch": 1.13, + "grad_norm": 8.280131029275028, + "learning_rate": 8.395990686167035e-06, + "loss": 1.4031, + "step": 93930 + }, + { + "epoch": 1.13, + "grad_norm": 8.305338028780485, + "learning_rate": 8.395414074835799e-06, + "loss": 1.1155, + "step": 93933 + }, + { + "epoch": 1.13, + "grad_norm": 5.301095834897262, + "learning_rate": 8.39483746898044e-06, + "loss": 1.1241, + "step": 93936 + }, + { + "epoch": 1.13, + "grad_norm": 6.57218581834864, + "learning_rate": 8.39426086860292e-06, + "loss": 1.1309, + "step": 93939 + }, + { + "epoch": 1.13, + "grad_norm": 5.8732455373764765, + "learning_rate": 8.393684273705209e-06, + "loss": 1.4526, + "step": 93942 + }, + { + "epoch": 1.13, + "grad_norm": 9.041847508904853, + "learning_rate": 8.393107684289277e-06, + "loss": 1.3572, + "step": 93945 + }, + { + "epoch": 1.13, + "grad_norm": 2.8323438458060624, + "learning_rate": 8.392531100357086e-06, + "loss": 1.5045, + "step": 93948 + }, + { + "epoch": 1.13, + "grad_norm": 6.685349769934678, + "learning_rate": 8.391954521910612e-06, + "loss": 1.2276, + "step": 93951 + }, + { + "epoch": 1.13, + "grad_norm": 23.433822616538137, + "learning_rate": 8.391377948951815e-06, + "loss": 1.393, + "step": 93954 + }, + { + "epoch": 1.13, + "grad_norm": 17.027768733763644, + "learning_rate": 8.390801381482667e-06, + "loss": 1.1158, + "step": 93957 + }, + { + "epoch": 1.13, + "grad_norm": 7.108093522798905, + "learning_rate": 8.390224819505132e-06, + "loss": 1.2146, + "step": 93960 + }, + { + "epoch": 1.13, + "grad_norm": 6.498814264415353, + "learning_rate": 8.389648263021184e-06, + "loss": 0.8005, + "step": 93963 + }, + { + "epoch": 1.13, + "grad_norm": 12.38351410108846, + "learning_rate": 8.389071712032783e-06, + "loss": 1.1228, + "step": 93966 + }, + { + "epoch": 1.13, + "grad_norm": 4.2962159559543265, + "learning_rate": 8.388495166541901e-06, + "loss": 1.293, + "step": 93969 + }, + { + "epoch": 1.13, + "grad_norm": 5.927358310749035, + "learning_rate": 8.387918626550504e-06, + "loss": 1.2407, + "step": 93972 + }, + { + "epoch": 1.13, + "grad_norm": 14.357501627581682, + "learning_rate": 8.387342092060557e-06, + "loss": 1.1672, + "step": 93975 + }, + { + "epoch": 1.13, + "grad_norm": 3.4658485347326207, + "learning_rate": 8.386765563074037e-06, + "loss": 1.2791, + "step": 93978 + }, + { + "epoch": 1.13, + "grad_norm": 2.977893060209298, + "learning_rate": 8.386189039592903e-06, + "loss": 1.2203, + "step": 93981 + }, + { + "epoch": 1.13, + "grad_norm": 2.7084743022633977, + "learning_rate": 8.385612521619122e-06, + "loss": 1.5787, + "step": 93984 + }, + { + "epoch": 1.13, + "grad_norm": 3.8417684659926805, + "learning_rate": 8.385036009154666e-06, + "loss": 1.1108, + "step": 93987 + }, + { + "epoch": 1.13, + "grad_norm": 40.24256804379916, + "learning_rate": 8.384459502201501e-06, + "loss": 1.2022, + "step": 93990 + }, + { + "epoch": 1.13, + "grad_norm": 4.811235748576005, + "learning_rate": 8.383883000761594e-06, + "loss": 1.3045, + "step": 93993 + }, + { + "epoch": 1.13, + "grad_norm": 4.232201709463307, + "learning_rate": 8.38330650483691e-06, + "loss": 1.3583, + "step": 93996 + }, + { + "epoch": 1.13, + "grad_norm": 14.23420274637687, + "learning_rate": 8.38273001442942e-06, + "loss": 1.0871, + "step": 93999 + }, + { + "epoch": 1.13, + "grad_norm": 21.418893991054464, + "learning_rate": 8.382153529541087e-06, + "loss": 0.9027, + "step": 94002 + }, + { + "epoch": 1.13, + "grad_norm": 7.7407259029297, + "learning_rate": 8.381577050173886e-06, + "loss": 1.3797, + "step": 94005 + }, + { + "epoch": 1.13, + "grad_norm": 12.649439089080282, + "learning_rate": 8.381000576329776e-06, + "loss": 1.2106, + "step": 94008 + }, + { + "epoch": 1.13, + "grad_norm": 5.476904788145983, + "learning_rate": 8.38042410801073e-06, + "loss": 0.7945, + "step": 94011 + }, + { + "epoch": 1.13, + "grad_norm": 13.974964831981568, + "learning_rate": 8.379847645218712e-06, + "loss": 1.308, + "step": 94014 + }, + { + "epoch": 1.13, + "grad_norm": 16.752946923434877, + "learning_rate": 8.379271187955693e-06, + "loss": 1.3734, + "step": 94017 + }, + { + "epoch": 1.13, + "grad_norm": 7.767269594840944, + "learning_rate": 8.378694736223633e-06, + "loss": 1.0565, + "step": 94020 + }, + { + "epoch": 1.13, + "grad_norm": 6.306584592092458, + "learning_rate": 8.378118290024508e-06, + "loss": 1.0515, + "step": 94023 + }, + { + "epoch": 1.13, + "grad_norm": 4.327640443443836, + "learning_rate": 8.377541849360278e-06, + "loss": 1.2106, + "step": 94026 + }, + { + "epoch": 1.13, + "grad_norm": 4.177013941039119, + "learning_rate": 8.376965414232915e-06, + "loss": 1.2463, + "step": 94029 + }, + { + "epoch": 1.13, + "grad_norm": 9.27230350528209, + "learning_rate": 8.376388984644385e-06, + "loss": 1.3139, + "step": 94032 + }, + { + "epoch": 1.13, + "grad_norm": 9.688129119421161, + "learning_rate": 8.375812560596654e-06, + "loss": 0.9731, + "step": 94035 + }, + { + "epoch": 1.13, + "grad_norm": 11.472176424404136, + "learning_rate": 8.375236142091687e-06, + "loss": 1.028, + "step": 94038 + }, + { + "epoch": 1.13, + "grad_norm": 15.973500449210082, + "learning_rate": 8.374659729131457e-06, + "loss": 1.127, + "step": 94041 + }, + { + "epoch": 1.13, + "grad_norm": 10.277410541428127, + "learning_rate": 8.37408332171793e-06, + "loss": 1.0617, + "step": 94044 + }, + { + "epoch": 1.13, + "grad_norm": 13.33337098731292, + "learning_rate": 8.373506919853066e-06, + "loss": 0.9658, + "step": 94047 + }, + { + "epoch": 1.13, + "grad_norm": 5.184457527588113, + "learning_rate": 8.372930523538838e-06, + "loss": 1.1488, + "step": 94050 + }, + { + "epoch": 1.13, + "grad_norm": 10.574248164946844, + "learning_rate": 8.372354132777214e-06, + "loss": 1.0688, + "step": 94053 + }, + { + "epoch": 1.13, + "grad_norm": 5.326811328836146, + "learning_rate": 8.371777747570156e-06, + "loss": 1.3939, + "step": 94056 + }, + { + "epoch": 1.13, + "grad_norm": 15.037467859110231, + "learning_rate": 8.37120136791964e-06, + "loss": 1.2436, + "step": 94059 + }, + { + "epoch": 1.13, + "grad_norm": 13.820614187137137, + "learning_rate": 8.370624993827621e-06, + "loss": 1.1323, + "step": 94062 + }, + { + "epoch": 1.13, + "grad_norm": 9.439002812339648, + "learning_rate": 8.370048625296074e-06, + "loss": 1.1376, + "step": 94065 + }, + { + "epoch": 1.13, + "grad_norm": 6.3633488664897495, + "learning_rate": 8.369472262326963e-06, + "loss": 1.2749, + "step": 94068 + }, + { + "epoch": 1.13, + "grad_norm": 4.32618483114058, + "learning_rate": 8.36889590492226e-06, + "loss": 1.1805, + "step": 94071 + }, + { + "epoch": 1.13, + "grad_norm": 3.7672422483731585, + "learning_rate": 8.368319553083923e-06, + "loss": 1.3048, + "step": 94074 + }, + { + "epoch": 1.13, + "grad_norm": 4.606890846958802, + "learning_rate": 8.367743206813926e-06, + "loss": 1.139, + "step": 94077 + }, + { + "epoch": 1.13, + "grad_norm": 13.334833661549885, + "learning_rate": 8.367166866114232e-06, + "loss": 1.0632, + "step": 94080 + }, + { + "epoch": 1.13, + "grad_norm": 4.903582929224713, + "learning_rate": 8.366590530986811e-06, + "loss": 1.4525, + "step": 94083 + }, + { + "epoch": 1.13, + "grad_norm": 4.540456694030976, + "learning_rate": 8.36601420143363e-06, + "loss": 1.3383, + "step": 94086 + }, + { + "epoch": 1.13, + "grad_norm": 6.5992204949708295, + "learning_rate": 8.36543787745665e-06, + "loss": 1.0815, + "step": 94089 + }, + { + "epoch": 1.13, + "grad_norm": 7.819793651269894, + "learning_rate": 8.364861559057842e-06, + "loss": 1.2185, + "step": 94092 + }, + { + "epoch": 1.13, + "grad_norm": 10.072287821391456, + "learning_rate": 8.364285246239174e-06, + "loss": 1.1912, + "step": 94095 + }, + { + "epoch": 1.13, + "grad_norm": 16.97999605226597, + "learning_rate": 8.363708939002614e-06, + "loss": 1.1174, + "step": 94098 + }, + { + "epoch": 1.13, + "grad_norm": 6.532282126785535, + "learning_rate": 8.363132637350121e-06, + "loss": 1.1791, + "step": 94101 + }, + { + "epoch": 1.13, + "grad_norm": 9.275661653101992, + "learning_rate": 8.362556341283667e-06, + "loss": 1.6785, + "step": 94104 + }, + { + "epoch": 1.13, + "grad_norm": 7.906185137197657, + "learning_rate": 8.36198005080522e-06, + "loss": 1.1579, + "step": 94107 + }, + { + "epoch": 1.13, + "grad_norm": 5.430090676068287, + "learning_rate": 8.361403765916745e-06, + "loss": 0.9376, + "step": 94110 + }, + { + "epoch": 1.13, + "grad_norm": 16.44336688022109, + "learning_rate": 8.360827486620212e-06, + "loss": 1.3105, + "step": 94113 + }, + { + "epoch": 1.13, + "grad_norm": 5.45452698956952, + "learning_rate": 8.360251212917579e-06, + "loss": 1.0163, + "step": 94116 + }, + { + "epoch": 1.13, + "grad_norm": 8.498726452040133, + "learning_rate": 8.35967494481082e-06, + "loss": 1.2645, + "step": 94119 + }, + { + "epoch": 1.13, + "grad_norm": 7.342415475470283, + "learning_rate": 8.359098682301899e-06, + "loss": 1.2383, + "step": 94122 + }, + { + "epoch": 1.13, + "grad_norm": 16.833787975179394, + "learning_rate": 8.358522425392787e-06, + "loss": 1.3104, + "step": 94125 + }, + { + "epoch": 1.13, + "grad_norm": 11.129748883215903, + "learning_rate": 8.357946174085441e-06, + "loss": 0.8522, + "step": 94128 + }, + { + "epoch": 1.13, + "grad_norm": 10.809913407095774, + "learning_rate": 8.357369928381838e-06, + "loss": 1.5141, + "step": 94131 + }, + { + "epoch": 1.13, + "grad_norm": 7.658832224263862, + "learning_rate": 8.356793688283935e-06, + "loss": 1.3707, + "step": 94134 + }, + { + "epoch": 1.13, + "grad_norm": 26.984568206152584, + "learning_rate": 8.356217453793707e-06, + "loss": 1.1448, + "step": 94137 + }, + { + "epoch": 1.13, + "grad_norm": 2.9525236923425098, + "learning_rate": 8.355641224913118e-06, + "loss": 0.9569, + "step": 94140 + }, + { + "epoch": 1.13, + "grad_norm": 9.265821274956885, + "learning_rate": 8.35506500164413e-06, + "loss": 1.1905, + "step": 94143 + }, + { + "epoch": 1.13, + "grad_norm": 4.220233721293586, + "learning_rate": 8.354488783988712e-06, + "loss": 1.2821, + "step": 94146 + }, + { + "epoch": 1.13, + "grad_norm": 3.238894087284461, + "learning_rate": 8.353912571948832e-06, + "loss": 1.2369, + "step": 94149 + }, + { + "epoch": 1.13, + "grad_norm": 15.118310793689002, + "learning_rate": 8.35333636552646e-06, + "loss": 1.242, + "step": 94152 + }, + { + "epoch": 1.13, + "grad_norm": 8.724708744970748, + "learning_rate": 8.352760164723552e-06, + "loss": 1.0735, + "step": 94155 + }, + { + "epoch": 1.13, + "grad_norm": 10.108238773122771, + "learning_rate": 8.35218396954208e-06, + "loss": 1.2843, + "step": 94158 + }, + { + "epoch": 1.13, + "grad_norm": 18.43052096338735, + "learning_rate": 8.351607779984015e-06, + "loss": 1.3505, + "step": 94161 + }, + { + "epoch": 1.13, + "grad_norm": 4.097046711193919, + "learning_rate": 8.351031596051313e-06, + "loss": 1.1698, + "step": 94164 + }, + { + "epoch": 1.13, + "grad_norm": 7.62430364191168, + "learning_rate": 8.350455417745953e-06, + "loss": 1.2443, + "step": 94167 + }, + { + "epoch": 1.13, + "grad_norm": 15.296312726062691, + "learning_rate": 8.34987924506989e-06, + "loss": 1.4237, + "step": 94170 + }, + { + "epoch": 1.13, + "grad_norm": 31.8610984715728, + "learning_rate": 8.349303078025097e-06, + "loss": 1.3791, + "step": 94173 + }, + { + "epoch": 1.13, + "grad_norm": 8.8643479631231, + "learning_rate": 8.348726916613536e-06, + "loss": 1.4251, + "step": 94176 + }, + { + "epoch": 1.13, + "grad_norm": 13.268780168160621, + "learning_rate": 8.34815076083718e-06, + "loss": 1.6064, + "step": 94179 + }, + { + "epoch": 1.13, + "grad_norm": 10.238954646442696, + "learning_rate": 8.347574610697987e-06, + "loss": 0.8599, + "step": 94182 + }, + { + "epoch": 1.13, + "grad_norm": 15.930296058970885, + "learning_rate": 8.346998466197925e-06, + "loss": 1.3145, + "step": 94185 + }, + { + "epoch": 1.13, + "grad_norm": 11.766218899772948, + "learning_rate": 8.346422327338961e-06, + "loss": 1.1161, + "step": 94188 + }, + { + "epoch": 1.13, + "grad_norm": 28.860221442536602, + "learning_rate": 8.345846194123063e-06, + "loss": 1.2265, + "step": 94191 + }, + { + "epoch": 1.13, + "grad_norm": 9.459351463808591, + "learning_rate": 8.345270066552201e-06, + "loss": 1.2684, + "step": 94194 + }, + { + "epoch": 1.13, + "grad_norm": 24.886640511999595, + "learning_rate": 8.344693944628332e-06, + "loss": 1.4214, + "step": 94197 + }, + { + "epoch": 1.13, + "grad_norm": 5.37218834963248, + "learning_rate": 8.344117828353425e-06, + "loss": 1.6647, + "step": 94200 + }, + { + "epoch": 1.13, + "grad_norm": 7.829763030507433, + "learning_rate": 8.343541717729449e-06, + "loss": 1.2845, + "step": 94203 + }, + { + "epoch": 1.13, + "grad_norm": 13.885775643312018, + "learning_rate": 8.342965612758373e-06, + "loss": 1.6284, + "step": 94206 + }, + { + "epoch": 1.13, + "grad_norm": 11.244813585743536, + "learning_rate": 8.342389513442153e-06, + "loss": 1.4184, + "step": 94209 + }, + { + "epoch": 1.13, + "grad_norm": 6.613593855302264, + "learning_rate": 8.34181341978276e-06, + "loss": 1.2137, + "step": 94212 + }, + { + "epoch": 1.13, + "grad_norm": 3.523084619507374, + "learning_rate": 8.341237331782162e-06, + "loss": 1.3168, + "step": 94215 + }, + { + "epoch": 1.13, + "grad_norm": 13.777503526708845, + "learning_rate": 8.340661249442324e-06, + "loss": 1.3144, + "step": 94218 + }, + { + "epoch": 1.13, + "grad_norm": 7.001149739091543, + "learning_rate": 8.340085172765213e-06, + "loss": 1.4353, + "step": 94221 + }, + { + "epoch": 1.13, + "grad_norm": 4.631662448534289, + "learning_rate": 8.339509101752792e-06, + "loss": 1.2588, + "step": 94224 + }, + { + "epoch": 1.13, + "grad_norm": 3.373648314546725, + "learning_rate": 8.338933036407027e-06, + "loss": 1.3309, + "step": 94227 + }, + { + "epoch": 1.13, + "grad_norm": 10.165281451830042, + "learning_rate": 8.338356976729884e-06, + "loss": 1.4214, + "step": 94230 + }, + { + "epoch": 1.13, + "grad_norm": 12.835888175202387, + "learning_rate": 8.337780922723336e-06, + "loss": 1.093, + "step": 94233 + }, + { + "epoch": 1.13, + "grad_norm": 10.817569139190214, + "learning_rate": 8.33720487438934e-06, + "loss": 1.2096, + "step": 94236 + }, + { + "epoch": 1.13, + "grad_norm": 6.757830651636007, + "learning_rate": 8.336628831729863e-06, + "loss": 1.0058, + "step": 94239 + }, + { + "epoch": 1.13, + "grad_norm": 3.482727245291952, + "learning_rate": 8.336052794746872e-06, + "loss": 1.1395, + "step": 94242 + }, + { + "epoch": 1.13, + "grad_norm": 8.602536102795836, + "learning_rate": 8.335476763442336e-06, + "loss": 0.8883, + "step": 94245 + }, + { + "epoch": 1.13, + "grad_norm": 14.683294785014883, + "learning_rate": 8.33490073781822e-06, + "loss": 1.1955, + "step": 94248 + }, + { + "epoch": 1.13, + "grad_norm": 7.190174362993439, + "learning_rate": 8.334324717876484e-06, + "loss": 1.5938, + "step": 94251 + }, + { + "epoch": 1.13, + "grad_norm": 6.349279243920942, + "learning_rate": 8.333748703619098e-06, + "loss": 0.9326, + "step": 94254 + }, + { + "epoch": 1.13, + "grad_norm": 9.397444168809354, + "learning_rate": 8.33317269504803e-06, + "loss": 1.2716, + "step": 94257 + }, + { + "epoch": 1.13, + "grad_norm": 5.452701452739684, + "learning_rate": 8.332596692165244e-06, + "loss": 1.2381, + "step": 94260 + }, + { + "epoch": 1.13, + "grad_norm": 13.524984249963675, + "learning_rate": 8.332020694972702e-06, + "loss": 1.2914, + "step": 94263 + }, + { + "epoch": 1.13, + "grad_norm": 10.727375970349119, + "learning_rate": 8.331444703472374e-06, + "loss": 1.2145, + "step": 94266 + }, + { + "epoch": 1.13, + "grad_norm": 12.529818087546818, + "learning_rate": 8.330868717666221e-06, + "loss": 1.3561, + "step": 94269 + }, + { + "epoch": 1.13, + "grad_norm": 11.785987609035377, + "learning_rate": 8.330292737556214e-06, + "loss": 1.1387, + "step": 94272 + }, + { + "epoch": 1.13, + "grad_norm": 8.91797299384948, + "learning_rate": 8.32971676314432e-06, + "loss": 1.1777, + "step": 94275 + }, + { + "epoch": 1.13, + "grad_norm": 12.106096573796611, + "learning_rate": 8.329140794432498e-06, + "loss": 0.9125, + "step": 94278 + }, + { + "epoch": 1.13, + "grad_norm": 8.71943069783542, + "learning_rate": 8.328564831422716e-06, + "loss": 1.352, + "step": 94281 + }, + { + "epoch": 1.13, + "grad_norm": 7.819529428067942, + "learning_rate": 8.327988874116939e-06, + "loss": 0.9411, + "step": 94284 + }, + { + "epoch": 1.13, + "grad_norm": 18.13097423881798, + "learning_rate": 8.32741292251714e-06, + "loss": 1.5367, + "step": 94287 + }, + { + "epoch": 1.13, + "grad_norm": 9.460168863526567, + "learning_rate": 8.326836976625273e-06, + "loss": 1.1489, + "step": 94290 + }, + { + "epoch": 1.13, + "grad_norm": 12.805961950758181, + "learning_rate": 8.326261036443308e-06, + "loss": 0.9593, + "step": 94293 + }, + { + "epoch": 1.13, + "grad_norm": 12.805712552864298, + "learning_rate": 8.32568510197321e-06, + "loss": 1.4205, + "step": 94296 + }, + { + "epoch": 1.13, + "grad_norm": 16.562449387186074, + "learning_rate": 8.325109173216949e-06, + "loss": 1.258, + "step": 94299 + }, + { + "epoch": 1.13, + "grad_norm": 6.866781214058433, + "learning_rate": 8.32453325017649e-06, + "loss": 1.594, + "step": 94302 + }, + { + "epoch": 1.13, + "grad_norm": 2.3466029362000156, + "learning_rate": 8.323957332853791e-06, + "loss": 0.8797, + "step": 94305 + }, + { + "epoch": 1.13, + "grad_norm": 4.001483213447053, + "learning_rate": 8.323381421250822e-06, + "loss": 1.179, + "step": 94308 + }, + { + "epoch": 1.13, + "grad_norm": 10.057824571635221, + "learning_rate": 8.32280551536955e-06, + "loss": 0.8908, + "step": 94311 + }, + { + "epoch": 1.13, + "grad_norm": 19.707951941047302, + "learning_rate": 8.32222961521194e-06, + "loss": 0.8577, + "step": 94314 + }, + { + "epoch": 1.13, + "grad_norm": 11.265340881935273, + "learning_rate": 8.321653720779952e-06, + "loss": 1.0696, + "step": 94317 + }, + { + "epoch": 1.13, + "grad_norm": 6.369765214818941, + "learning_rate": 8.321077832075558e-06, + "loss": 1.0061, + "step": 94320 + }, + { + "epoch": 1.13, + "grad_norm": 4.470825933400908, + "learning_rate": 8.32050194910072e-06, + "loss": 1.1026, + "step": 94323 + }, + { + "epoch": 1.13, + "grad_norm": 8.52936839112168, + "learning_rate": 8.3199260718574e-06, + "loss": 1.1672, + "step": 94326 + }, + { + "epoch": 1.13, + "grad_norm": 5.914676451553275, + "learning_rate": 8.319350200347575e-06, + "loss": 1.0462, + "step": 94329 + }, + { + "epoch": 1.13, + "grad_norm": 15.27631775474731, + "learning_rate": 8.3187743345732e-06, + "loss": 1.0312, + "step": 94332 + }, + { + "epoch": 1.13, + "grad_norm": 4.1137567542082225, + "learning_rate": 8.318198474536238e-06, + "loss": 1.3524, + "step": 94335 + }, + { + "epoch": 1.13, + "grad_norm": 8.929949645054336, + "learning_rate": 8.31762262023866e-06, + "loss": 1.267, + "step": 94338 + }, + { + "epoch": 1.13, + "grad_norm": 4.595823464583354, + "learning_rate": 8.317046771682434e-06, + "loss": 1.1394, + "step": 94341 + }, + { + "epoch": 1.13, + "grad_norm": 3.702138581574941, + "learning_rate": 8.31647092886952e-06, + "loss": 1.5439, + "step": 94344 + }, + { + "epoch": 1.13, + "grad_norm": 2.677619563228951, + "learning_rate": 8.315895091801882e-06, + "loss": 1.0586, + "step": 94347 + }, + { + "epoch": 1.13, + "grad_norm": 28.059950249679023, + "learning_rate": 8.315319260481486e-06, + "loss": 1.2493, + "step": 94350 + }, + { + "epoch": 1.13, + "grad_norm": 5.937120438264475, + "learning_rate": 8.314743434910303e-06, + "loss": 1.3853, + "step": 94353 + }, + { + "epoch": 1.13, + "grad_norm": 5.678358758523519, + "learning_rate": 8.314167615090294e-06, + "loss": 1.4743, + "step": 94356 + }, + { + "epoch": 1.13, + "grad_norm": 14.85972759099449, + "learning_rate": 8.31359180102342e-06, + "loss": 1.5223, + "step": 94359 + }, + { + "epoch": 1.13, + "grad_norm": 16.366636542679533, + "learning_rate": 8.313015992711653e-06, + "loss": 1.2831, + "step": 94362 + }, + { + "epoch": 1.13, + "grad_norm": 13.697128679422717, + "learning_rate": 8.312440190156952e-06, + "loss": 1.215, + "step": 94365 + }, + { + "epoch": 1.13, + "grad_norm": 17.71054694336823, + "learning_rate": 8.311864393361288e-06, + "loss": 1.4194, + "step": 94368 + }, + { + "epoch": 1.13, + "grad_norm": 14.91903564572041, + "learning_rate": 8.311288602326618e-06, + "loss": 1.2485, + "step": 94371 + }, + { + "epoch": 1.13, + "grad_norm": 12.668653079011202, + "learning_rate": 8.310712817054916e-06, + "loss": 1.1837, + "step": 94374 + }, + { + "epoch": 1.13, + "grad_norm": 13.042941137168912, + "learning_rate": 8.310137037548139e-06, + "loss": 1.133, + "step": 94377 + }, + { + "epoch": 1.13, + "grad_norm": 7.148972798916011, + "learning_rate": 8.309561263808256e-06, + "loss": 1.3098, + "step": 94380 + }, + { + "epoch": 1.13, + "grad_norm": 7.441066834615705, + "learning_rate": 8.308985495837236e-06, + "loss": 0.9114, + "step": 94383 + }, + { + "epoch": 1.13, + "grad_norm": 4.293187524832036, + "learning_rate": 8.308409733637038e-06, + "loss": 1.1176, + "step": 94386 + }, + { + "epoch": 1.13, + "grad_norm": 3.594228227745269, + "learning_rate": 8.307833977209623e-06, + "loss": 1.1099, + "step": 94389 + }, + { + "epoch": 1.14, + "grad_norm": 4.3829612885863325, + "learning_rate": 8.307258226556964e-06, + "loss": 1.174, + "step": 94392 + }, + { + "epoch": 1.14, + "grad_norm": 11.958493729963283, + "learning_rate": 8.306682481681027e-06, + "loss": 1.2738, + "step": 94395 + }, + { + "epoch": 1.14, + "grad_norm": 413.2520667762035, + "learning_rate": 8.306106742583771e-06, + "loss": 1.1125, + "step": 94398 + }, + { + "epoch": 1.14, + "grad_norm": 26.27932800589909, + "learning_rate": 8.305531009267159e-06, + "loss": 1.5238, + "step": 94401 + }, + { + "epoch": 1.14, + "grad_norm": 5.4586366613910355, + "learning_rate": 8.304955281733163e-06, + "loss": 1.2955, + "step": 94404 + }, + { + "epoch": 1.14, + "grad_norm": 12.289690722723218, + "learning_rate": 8.30437955998374e-06, + "loss": 1.2309, + "step": 94407 + }, + { + "epoch": 1.14, + "grad_norm": 13.054293962905001, + "learning_rate": 8.303803844020864e-06, + "loss": 1.5018, + "step": 94410 + }, + { + "epoch": 1.14, + "grad_norm": 3.866476582878092, + "learning_rate": 8.30322813384649e-06, + "loss": 1.1668, + "step": 94413 + }, + { + "epoch": 1.14, + "grad_norm": 6.645237709292185, + "learning_rate": 8.30265242946259e-06, + "loss": 1.1655, + "step": 94416 + }, + { + "epoch": 1.14, + "grad_norm": 13.387523517939142, + "learning_rate": 8.302076730871124e-06, + "loss": 1.13, + "step": 94419 + }, + { + "epoch": 1.14, + "grad_norm": 3.81182573322559, + "learning_rate": 8.301501038074063e-06, + "loss": 1.6555, + "step": 94422 + }, + { + "epoch": 1.14, + "grad_norm": 5.870570769404845, + "learning_rate": 8.300925351073361e-06, + "loss": 1.2483, + "step": 94425 + }, + { + "epoch": 1.14, + "grad_norm": 5.943919166562631, + "learning_rate": 8.300349669870992e-06, + "loss": 1.3235, + "step": 94428 + }, + { + "epoch": 1.14, + "grad_norm": 9.176101417194742, + "learning_rate": 8.299773994468914e-06, + "loss": 0.9879, + "step": 94431 + }, + { + "epoch": 1.14, + "grad_norm": 7.087334613101196, + "learning_rate": 8.299198324869099e-06, + "loss": 1.686, + "step": 94434 + }, + { + "epoch": 1.14, + "grad_norm": 2.703526668838931, + "learning_rate": 8.298622661073508e-06, + "loss": 1.1235, + "step": 94437 + }, + { + "epoch": 1.14, + "grad_norm": 10.10035702060681, + "learning_rate": 8.298047003084104e-06, + "loss": 1.5868, + "step": 94440 + }, + { + "epoch": 1.14, + "grad_norm": 13.8656292945976, + "learning_rate": 8.29747135090285e-06, + "loss": 1.2782, + "step": 94443 + }, + { + "epoch": 1.14, + "grad_norm": 12.458404224186138, + "learning_rate": 8.296895704531715e-06, + "loss": 1.0917, + "step": 94446 + }, + { + "epoch": 1.14, + "grad_norm": 10.20978059992443, + "learning_rate": 8.296320063972662e-06, + "loss": 1.3924, + "step": 94449 + }, + { + "epoch": 1.14, + "grad_norm": 7.946911850565521, + "learning_rate": 8.295744429227654e-06, + "loss": 1.0883, + "step": 94452 + }, + { + "epoch": 1.14, + "grad_norm": 58.635352719680384, + "learning_rate": 8.295168800298657e-06, + "loss": 1.3125, + "step": 94455 + }, + { + "epoch": 1.14, + "grad_norm": 9.63030906446731, + "learning_rate": 8.294593177187635e-06, + "loss": 1.1759, + "step": 94458 + }, + { + "epoch": 1.14, + "grad_norm": 5.477571468375494, + "learning_rate": 8.29401755989655e-06, + "loss": 0.9696, + "step": 94461 + }, + { + "epoch": 1.14, + "grad_norm": 18.11839465240317, + "learning_rate": 8.293441948427372e-06, + "loss": 1.4365, + "step": 94464 + }, + { + "epoch": 1.14, + "grad_norm": 4.904012231194503, + "learning_rate": 8.292866342782059e-06, + "loss": 1.1333, + "step": 94467 + }, + { + "epoch": 1.14, + "grad_norm": 41.0451054971641, + "learning_rate": 8.292290742962579e-06, + "loss": 1.4076, + "step": 94470 + }, + { + "epoch": 1.14, + "grad_norm": 29.595749331468625, + "learning_rate": 8.291715148970896e-06, + "loss": 1.2692, + "step": 94473 + }, + { + "epoch": 1.14, + "grad_norm": 5.6898180054027465, + "learning_rate": 8.291139560808975e-06, + "loss": 1.0651, + "step": 94476 + }, + { + "epoch": 1.14, + "grad_norm": 8.944620150631192, + "learning_rate": 8.290563978478778e-06, + "loss": 1.1689, + "step": 94479 + }, + { + "epoch": 1.14, + "grad_norm": 10.492828262422758, + "learning_rate": 8.289988401982271e-06, + "loss": 1.0153, + "step": 94482 + }, + { + "epoch": 1.14, + "grad_norm": 22.004705582654577, + "learning_rate": 8.289412831321415e-06, + "loss": 1.2386, + "step": 94485 + }, + { + "epoch": 1.14, + "grad_norm": 5.16603905916595, + "learning_rate": 8.288837266498177e-06, + "loss": 1.0408, + "step": 94488 + }, + { + "epoch": 1.14, + "grad_norm": 2.2254678645765846, + "learning_rate": 8.288261707514527e-06, + "loss": 1.0179, + "step": 94491 + }, + { + "epoch": 1.14, + "grad_norm": 11.430644195294716, + "learning_rate": 8.287686154372419e-06, + "loss": 1.2876, + "step": 94494 + }, + { + "epoch": 1.14, + "grad_norm": 4.5265433750718955, + "learning_rate": 8.28711060707382e-06, + "loss": 1.2301, + "step": 94497 + }, + { + "epoch": 1.14, + "grad_norm": 7.235611449795115, + "learning_rate": 8.286535065620698e-06, + "loss": 1.1345, + "step": 94500 + }, + { + "epoch": 1.14, + "grad_norm": 9.995131951837022, + "learning_rate": 8.285959530015017e-06, + "loss": 0.9842, + "step": 94503 + }, + { + "epoch": 1.14, + "grad_norm": 16.02364934781651, + "learning_rate": 8.285384000258735e-06, + "loss": 1.3371, + "step": 94506 + }, + { + "epoch": 1.14, + "grad_norm": 23.614124383723073, + "learning_rate": 8.284808476353817e-06, + "loss": 1.1636, + "step": 94509 + }, + { + "epoch": 1.14, + "grad_norm": 24.65589841094704, + "learning_rate": 8.284232958302235e-06, + "loss": 1.1394, + "step": 94512 + }, + { + "epoch": 1.14, + "grad_norm": 8.840561518400413, + "learning_rate": 8.283657446105943e-06, + "loss": 1.063, + "step": 94515 + }, + { + "epoch": 1.14, + "grad_norm": 21.24204031150313, + "learning_rate": 8.283081939766916e-06, + "loss": 1.3695, + "step": 94518 + }, + { + "epoch": 1.14, + "grad_norm": 4.838102897353319, + "learning_rate": 8.282506439287108e-06, + "loss": 1.4531, + "step": 94521 + }, + { + "epoch": 1.14, + "grad_norm": 15.22078639706279, + "learning_rate": 8.281930944668488e-06, + "loss": 1.1027, + "step": 94524 + }, + { + "epoch": 1.14, + "grad_norm": 8.574722628468063, + "learning_rate": 8.281355455913018e-06, + "loss": 0.826, + "step": 94527 + }, + { + "epoch": 1.14, + "grad_norm": 10.122840583909403, + "learning_rate": 8.280779973022665e-06, + "loss": 1.4014, + "step": 94530 + }, + { + "epoch": 1.14, + "grad_norm": 2.72104614442553, + "learning_rate": 8.280204495999387e-06, + "loss": 1.5403, + "step": 94533 + }, + { + "epoch": 1.14, + "grad_norm": 5.611105335942591, + "learning_rate": 8.279629024845156e-06, + "loss": 1.6405, + "step": 94536 + }, + { + "epoch": 1.14, + "grad_norm": 18.621029625087992, + "learning_rate": 8.279053559561927e-06, + "loss": 1.3397, + "step": 94539 + }, + { + "epoch": 1.14, + "grad_norm": 6.8890552682044905, + "learning_rate": 8.27847810015167e-06, + "loss": 1.2909, + "step": 94542 + }, + { + "epoch": 1.14, + "grad_norm": 13.044805747198877, + "learning_rate": 8.27790264661635e-06, + "loss": 1.0481, + "step": 94545 + }, + { + "epoch": 1.14, + "grad_norm": 13.144924381096086, + "learning_rate": 8.277327198957926e-06, + "loss": 1.1121, + "step": 94548 + }, + { + "epoch": 1.14, + "grad_norm": 5.984314054154225, + "learning_rate": 8.27675175717836e-06, + "loss": 1.5006, + "step": 94551 + }, + { + "epoch": 1.14, + "grad_norm": 7.419539322288058, + "learning_rate": 8.276176321279624e-06, + "loss": 1.3877, + "step": 94554 + }, + { + "epoch": 1.14, + "grad_norm": 5.102160102252743, + "learning_rate": 8.27560089126368e-06, + "loss": 1.0199, + "step": 94557 + }, + { + "epoch": 1.14, + "grad_norm": 7.992082778548372, + "learning_rate": 8.275025467132483e-06, + "loss": 1.2032, + "step": 94560 + }, + { + "epoch": 1.14, + "grad_norm": 7.389908815816575, + "learning_rate": 8.274450048888002e-06, + "loss": 1.2993, + "step": 94563 + }, + { + "epoch": 1.14, + "grad_norm": 4.538878940125143, + "learning_rate": 8.273874636532206e-06, + "loss": 1.2643, + "step": 94566 + }, + { + "epoch": 1.14, + "grad_norm": 21.830128267121882, + "learning_rate": 8.273299230067052e-06, + "loss": 1.0828, + "step": 94569 + }, + { + "epoch": 1.14, + "grad_norm": 7.989824468455614, + "learning_rate": 8.27272382949451e-06, + "loss": 1.1012, + "step": 94572 + }, + { + "epoch": 1.14, + "grad_norm": 7.905597907370432, + "learning_rate": 8.272148434816533e-06, + "loss": 1.1398, + "step": 94575 + }, + { + "epoch": 1.14, + "grad_norm": 4.626988210713392, + "learning_rate": 8.271573046035094e-06, + "loss": 1.1487, + "step": 94578 + }, + { + "epoch": 1.14, + "grad_norm": 4.829556884750733, + "learning_rate": 8.270997663152151e-06, + "loss": 1.1944, + "step": 94581 + }, + { + "epoch": 1.14, + "grad_norm": 15.278262371053387, + "learning_rate": 8.270422286169676e-06, + "loss": 1.0164, + "step": 94584 + }, + { + "epoch": 1.14, + "grad_norm": 2.677246341487733, + "learning_rate": 8.269846915089621e-06, + "loss": 1.4204, + "step": 94587 + }, + { + "epoch": 1.14, + "grad_norm": 11.556020116212245, + "learning_rate": 8.269271549913958e-06, + "loss": 1.2232, + "step": 94590 + }, + { + "epoch": 1.14, + "grad_norm": 10.03167231056864, + "learning_rate": 8.268696190644648e-06, + "loss": 0.9908, + "step": 94593 + }, + { + "epoch": 1.14, + "grad_norm": 18.435772858091358, + "learning_rate": 8.268120837283653e-06, + "loss": 1.0315, + "step": 94596 + }, + { + "epoch": 1.14, + "grad_norm": 7.359797723913302, + "learning_rate": 8.267545489832941e-06, + "loss": 1.3742, + "step": 94599 + }, + { + "epoch": 1.14, + "grad_norm": 26.125309401860356, + "learning_rate": 8.266970148294468e-06, + "loss": 1.2735, + "step": 94602 + }, + { + "epoch": 1.14, + "grad_norm": 18.52337528986557, + "learning_rate": 8.266394812670203e-06, + "loss": 0.7541, + "step": 94605 + }, + { + "epoch": 1.14, + "grad_norm": 6.677810121006406, + "learning_rate": 8.265819482962108e-06, + "loss": 1.1603, + "step": 94608 + }, + { + "epoch": 1.14, + "grad_norm": 32.3481508974736, + "learning_rate": 8.26524415917215e-06, + "loss": 1.1078, + "step": 94611 + }, + { + "epoch": 1.14, + "grad_norm": 20.894563487033835, + "learning_rate": 8.264668841302283e-06, + "loss": 1.3301, + "step": 94614 + }, + { + "epoch": 1.14, + "grad_norm": 5.779987096776814, + "learning_rate": 8.264093529354477e-06, + "loss": 1.6184, + "step": 94617 + }, + { + "epoch": 1.14, + "grad_norm": 29.694037270593867, + "learning_rate": 8.263518223330698e-06, + "loss": 1.2706, + "step": 94620 + }, + { + "epoch": 1.14, + "grad_norm": 6.862037676596654, + "learning_rate": 8.262942923232902e-06, + "loss": 1.4027, + "step": 94623 + }, + { + "epoch": 1.14, + "grad_norm": 6.166779418115147, + "learning_rate": 8.26236762906306e-06, + "loss": 1.1239, + "step": 94626 + }, + { + "epoch": 1.14, + "grad_norm": 22.150578405129956, + "learning_rate": 8.26179234082313e-06, + "loss": 1.0455, + "step": 94629 + }, + { + "epoch": 1.14, + "grad_norm": 6.0843028798285035, + "learning_rate": 8.261217058515077e-06, + "loss": 0.921, + "step": 94632 + }, + { + "epoch": 1.14, + "grad_norm": 8.6359246767852, + "learning_rate": 8.260641782140861e-06, + "loss": 0.9925, + "step": 94635 + }, + { + "epoch": 1.14, + "grad_norm": 6.687271732094089, + "learning_rate": 8.260066511702452e-06, + "loss": 1.2055, + "step": 94638 + }, + { + "epoch": 1.14, + "grad_norm": 14.587639845426585, + "learning_rate": 8.25949124720181e-06, + "loss": 0.8902, + "step": 94641 + }, + { + "epoch": 1.14, + "grad_norm": 2.358984082917668, + "learning_rate": 8.258915988640892e-06, + "loss": 1.0487, + "step": 94644 + }, + { + "epoch": 1.14, + "grad_norm": 5.90133765581934, + "learning_rate": 8.258340736021668e-06, + "loss": 0.9533, + "step": 94647 + }, + { + "epoch": 1.14, + "grad_norm": 3.730497328804637, + "learning_rate": 8.257765489346102e-06, + "loss": 1.195, + "step": 94650 + }, + { + "epoch": 1.14, + "grad_norm": 10.592377372207046, + "learning_rate": 8.257190248616158e-06, + "loss": 0.98, + "step": 94653 + }, + { + "epoch": 1.14, + "grad_norm": 31.693317527735744, + "learning_rate": 8.25661501383379e-06, + "loss": 0.8764, + "step": 94656 + }, + { + "epoch": 1.14, + "grad_norm": 7.962110458750776, + "learning_rate": 8.256039785000967e-06, + "loss": 1.4192, + "step": 94659 + }, + { + "epoch": 1.14, + "grad_norm": 9.81346460767255, + "learning_rate": 8.255464562119655e-06, + "loss": 1.1896, + "step": 94662 + }, + { + "epoch": 1.14, + "grad_norm": 20.749626636078315, + "learning_rate": 8.254889345191816e-06, + "loss": 0.8291, + "step": 94665 + }, + { + "epoch": 1.14, + "grad_norm": 8.233299680645079, + "learning_rate": 8.254314134219406e-06, + "loss": 1.3109, + "step": 94668 + }, + { + "epoch": 1.14, + "grad_norm": 3.2667705132312346, + "learning_rate": 8.253738929204395e-06, + "loss": 1.2781, + "step": 94671 + }, + { + "epoch": 1.14, + "grad_norm": 9.806510140134925, + "learning_rate": 8.253163730148744e-06, + "loss": 1.1363, + "step": 94674 + }, + { + "epoch": 1.14, + "grad_norm": 10.194086185120634, + "learning_rate": 8.252588537054415e-06, + "loss": 1.2678, + "step": 94677 + }, + { + "epoch": 1.14, + "grad_norm": 2.958443508151346, + "learning_rate": 8.252013349923375e-06, + "loss": 1.0692, + "step": 94680 + }, + { + "epoch": 1.14, + "grad_norm": 29.56426302051352, + "learning_rate": 8.251438168757584e-06, + "loss": 1.0274, + "step": 94683 + }, + { + "epoch": 1.14, + "grad_norm": 5.039300324443897, + "learning_rate": 8.250862993559002e-06, + "loss": 1.3892, + "step": 94686 + }, + { + "epoch": 1.14, + "grad_norm": 11.281396957595517, + "learning_rate": 8.250287824329592e-06, + "loss": 0.9073, + "step": 94689 + }, + { + "epoch": 1.14, + "grad_norm": 6.285764440047181, + "learning_rate": 8.249712661071327e-06, + "loss": 1.1979, + "step": 94692 + }, + { + "epoch": 1.14, + "grad_norm": 12.04305293666902, + "learning_rate": 8.249137503786157e-06, + "loss": 1.2176, + "step": 94695 + }, + { + "epoch": 1.14, + "grad_norm": 6.832491339587311, + "learning_rate": 8.248562352476051e-06, + "loss": 1.4193, + "step": 94698 + }, + { + "epoch": 1.14, + "grad_norm": 7.62748420254335, + "learning_rate": 8.247987207142968e-06, + "loss": 1.2815, + "step": 94701 + }, + { + "epoch": 1.14, + "grad_norm": 5.214948891345563, + "learning_rate": 8.247412067788876e-06, + "loss": 1.435, + "step": 94704 + }, + { + "epoch": 1.14, + "grad_norm": 13.205180751271493, + "learning_rate": 8.24683693441574e-06, + "loss": 1.2818, + "step": 94707 + }, + { + "epoch": 1.14, + "grad_norm": 6.497715195367462, + "learning_rate": 8.246261807025512e-06, + "loss": 1.3917, + "step": 94710 + }, + { + "epoch": 1.14, + "grad_norm": 4.618667721679847, + "learning_rate": 8.245686685620161e-06, + "loss": 1.2704, + "step": 94713 + }, + { + "epoch": 1.14, + "grad_norm": 11.14364879699561, + "learning_rate": 8.245111570201652e-06, + "loss": 1.2739, + "step": 94716 + }, + { + "epoch": 1.14, + "grad_norm": 4.27972641124457, + "learning_rate": 8.244536460771946e-06, + "loss": 1.1033, + "step": 94719 + }, + { + "epoch": 1.14, + "grad_norm": 9.593898428607238, + "learning_rate": 8.243961357333001e-06, + "loss": 1.1107, + "step": 94722 + }, + { + "epoch": 1.14, + "grad_norm": 7.886587650086926, + "learning_rate": 8.243386259886784e-06, + "loss": 1.0493, + "step": 94725 + }, + { + "epoch": 1.14, + "grad_norm": 18.61070942398451, + "learning_rate": 8.242811168435258e-06, + "loss": 1.4544, + "step": 94728 + }, + { + "epoch": 1.14, + "grad_norm": 17.732523840920074, + "learning_rate": 8.242236082980385e-06, + "loss": 1.1599, + "step": 94731 + }, + { + "epoch": 1.14, + "grad_norm": 11.699486469826555, + "learning_rate": 8.241661003524129e-06, + "loss": 1.4333, + "step": 94734 + }, + { + "epoch": 1.14, + "grad_norm": 11.12822698518508, + "learning_rate": 8.241085930068448e-06, + "loss": 1.3382, + "step": 94737 + }, + { + "epoch": 1.14, + "grad_norm": 4.5815081140977645, + "learning_rate": 8.240510862615307e-06, + "loss": 0.9588, + "step": 94740 + }, + { + "epoch": 1.14, + "grad_norm": 4.388750462720142, + "learning_rate": 8.239935801166667e-06, + "loss": 1.0837, + "step": 94743 + }, + { + "epoch": 1.14, + "grad_norm": 9.56843292433543, + "learning_rate": 8.239360745724498e-06, + "loss": 1.333, + "step": 94746 + }, + { + "epoch": 1.14, + "grad_norm": 23.540027779802305, + "learning_rate": 8.238785696290755e-06, + "loss": 1.0408, + "step": 94749 + }, + { + "epoch": 1.14, + "grad_norm": 16.48329635457845, + "learning_rate": 8.238210652867398e-06, + "loss": 1.3417, + "step": 94752 + }, + { + "epoch": 1.14, + "grad_norm": 5.65183748111272, + "learning_rate": 8.237635615456392e-06, + "loss": 1.349, + "step": 94755 + }, + { + "epoch": 1.14, + "grad_norm": 29.620648828053767, + "learning_rate": 8.237060584059707e-06, + "loss": 1.6035, + "step": 94758 + }, + { + "epoch": 1.14, + "grad_norm": 10.246841150635712, + "learning_rate": 8.236485558679299e-06, + "loss": 1.1742, + "step": 94761 + }, + { + "epoch": 1.14, + "grad_norm": 6.134957938223892, + "learning_rate": 8.235910539317125e-06, + "loss": 1.0167, + "step": 94764 + }, + { + "epoch": 1.14, + "grad_norm": 10.541035392584272, + "learning_rate": 8.235335525975156e-06, + "loss": 0.9913, + "step": 94767 + }, + { + "epoch": 1.14, + "grad_norm": 4.392274602045803, + "learning_rate": 8.234760518655352e-06, + "loss": 1.2894, + "step": 94770 + }, + { + "epoch": 1.14, + "grad_norm": 14.851820198174876, + "learning_rate": 8.234185517359676e-06, + "loss": 1.2139, + "step": 94773 + }, + { + "epoch": 1.14, + "grad_norm": 6.1389372070150285, + "learning_rate": 8.233610522090085e-06, + "loss": 1.2791, + "step": 94776 + }, + { + "epoch": 1.14, + "grad_norm": 15.115941924480765, + "learning_rate": 8.233035532848549e-06, + "loss": 1.4336, + "step": 94779 + }, + { + "epoch": 1.14, + "grad_norm": 5.5056862341551485, + "learning_rate": 8.23246054963702e-06, + "loss": 1.3009, + "step": 94782 + }, + { + "epoch": 1.14, + "grad_norm": 9.33193105951866, + "learning_rate": 8.23188557245747e-06, + "loss": 1.3283, + "step": 94785 + }, + { + "epoch": 1.14, + "grad_norm": 12.284107172993298, + "learning_rate": 8.23131060131186e-06, + "loss": 1.2594, + "step": 94788 + }, + { + "epoch": 1.14, + "grad_norm": 7.507510395000246, + "learning_rate": 8.230735636202149e-06, + "loss": 1.136, + "step": 94791 + }, + { + "epoch": 1.14, + "grad_norm": 7.307012042022388, + "learning_rate": 8.230160677130297e-06, + "loss": 1.1852, + "step": 94794 + }, + { + "epoch": 1.14, + "grad_norm": 12.766210132155141, + "learning_rate": 8.229585724098269e-06, + "loss": 1.01, + "step": 94797 + }, + { + "epoch": 1.14, + "grad_norm": 4.106398230356514, + "learning_rate": 8.229010777108032e-06, + "loss": 1.0688, + "step": 94800 + }, + { + "epoch": 1.14, + "grad_norm": 13.768930353579817, + "learning_rate": 8.228435836161542e-06, + "loss": 1.4445, + "step": 94803 + }, + { + "epoch": 1.14, + "grad_norm": 19.137706897116445, + "learning_rate": 8.227860901260759e-06, + "loss": 1.3042, + "step": 94806 + }, + { + "epoch": 1.14, + "grad_norm": 7.0011742273839594, + "learning_rate": 8.227285972407648e-06, + "loss": 1.1772, + "step": 94809 + }, + { + "epoch": 1.14, + "grad_norm": 14.367256547282329, + "learning_rate": 8.226711049604174e-06, + "loss": 1.1683, + "step": 94812 + }, + { + "epoch": 1.14, + "grad_norm": 13.2530134691104, + "learning_rate": 8.2261361328523e-06, + "loss": 1.2952, + "step": 94815 + }, + { + "epoch": 1.14, + "grad_norm": 12.325645121007142, + "learning_rate": 8.225561222153979e-06, + "loss": 1.5571, + "step": 94818 + }, + { + "epoch": 1.14, + "grad_norm": 12.539785464089766, + "learning_rate": 8.224986317511181e-06, + "loss": 1.08, + "step": 94821 + }, + { + "epoch": 1.14, + "grad_norm": 30.18781171263004, + "learning_rate": 8.224411418925862e-06, + "loss": 1.4199, + "step": 94824 + }, + { + "epoch": 1.14, + "grad_norm": 13.678449905637594, + "learning_rate": 8.223836526399993e-06, + "loss": 1.2091, + "step": 94827 + }, + { + "epoch": 1.14, + "grad_norm": 2.624862608105079, + "learning_rate": 8.223261639935524e-06, + "loss": 1.2299, + "step": 94830 + }, + { + "epoch": 1.14, + "grad_norm": 8.741054369632229, + "learning_rate": 8.222686759534427e-06, + "loss": 1.5298, + "step": 94833 + }, + { + "epoch": 1.14, + "grad_norm": 5.192153760450747, + "learning_rate": 8.222111885198658e-06, + "loss": 0.8031, + "step": 94836 + }, + { + "epoch": 1.14, + "grad_norm": 5.7532112499958465, + "learning_rate": 8.22153701693018e-06, + "loss": 1.0975, + "step": 94839 + }, + { + "epoch": 1.14, + "grad_norm": 3.8330273246827513, + "learning_rate": 8.22096215473096e-06, + "loss": 1.3478, + "step": 94842 + }, + { + "epoch": 1.14, + "grad_norm": 11.62681562643364, + "learning_rate": 8.220387298602952e-06, + "loss": 1.0537, + "step": 94845 + }, + { + "epoch": 1.14, + "grad_norm": 10.148434140494517, + "learning_rate": 8.219812448548122e-06, + "loss": 1.2952, + "step": 94848 + }, + { + "epoch": 1.14, + "grad_norm": 14.260100537870015, + "learning_rate": 8.219237604568428e-06, + "loss": 1.4744, + "step": 94851 + }, + { + "epoch": 1.14, + "grad_norm": 7.222091506799937, + "learning_rate": 8.218662766665839e-06, + "loss": 1.4746, + "step": 94854 + }, + { + "epoch": 1.14, + "grad_norm": 11.387232631325524, + "learning_rate": 8.218087934842311e-06, + "loss": 1.1096, + "step": 94857 + }, + { + "epoch": 1.14, + "grad_norm": 4.003137947528622, + "learning_rate": 8.217513109099805e-06, + "loss": 1.3179, + "step": 94860 + }, + { + "epoch": 1.14, + "grad_norm": 15.361503791130785, + "learning_rate": 8.216938289440287e-06, + "loss": 1.0, + "step": 94863 + }, + { + "epoch": 1.14, + "grad_norm": 12.054942245375107, + "learning_rate": 8.216363475865714e-06, + "loss": 1.1595, + "step": 94866 + }, + { + "epoch": 1.14, + "grad_norm": 4.79327800243904, + "learning_rate": 8.215788668378055e-06, + "loss": 1.1871, + "step": 94869 + }, + { + "epoch": 1.14, + "grad_norm": 19.41169899159478, + "learning_rate": 8.215213866979261e-06, + "loss": 1.0644, + "step": 94872 + }, + { + "epoch": 1.14, + "grad_norm": 15.923394736693057, + "learning_rate": 8.214639071671302e-06, + "loss": 1.1426, + "step": 94875 + }, + { + "epoch": 1.14, + "grad_norm": 6.767555273739499, + "learning_rate": 8.214064282456135e-06, + "loss": 1.1826, + "step": 94878 + }, + { + "epoch": 1.14, + "grad_norm": 13.489618509220856, + "learning_rate": 8.213489499335726e-06, + "loss": 1.067, + "step": 94881 + }, + { + "epoch": 1.14, + "grad_norm": 4.9680806720356845, + "learning_rate": 8.21291472231203e-06, + "loss": 1.2391, + "step": 94884 + }, + { + "epoch": 1.14, + "grad_norm": 5.496589249425542, + "learning_rate": 8.212339951387016e-06, + "loss": 1.1544, + "step": 94887 + }, + { + "epoch": 1.14, + "grad_norm": 25.93772020343729, + "learning_rate": 8.21176518656264e-06, + "loss": 1.0185, + "step": 94890 + }, + { + "epoch": 1.14, + "grad_norm": 16.291286436534158, + "learning_rate": 8.211190427840863e-06, + "loss": 1.5327, + "step": 94893 + }, + { + "epoch": 1.14, + "grad_norm": 7.508038480642259, + "learning_rate": 8.210615675223654e-06, + "loss": 1.3997, + "step": 94896 + }, + { + "epoch": 1.14, + "grad_norm": 9.19433311782963, + "learning_rate": 8.210040928712968e-06, + "loss": 1.2962, + "step": 94899 + }, + { + "epoch": 1.14, + "grad_norm": 10.441747505475249, + "learning_rate": 8.209466188310764e-06, + "loss": 1.0919, + "step": 94902 + }, + { + "epoch": 1.14, + "grad_norm": 20.036136986926508, + "learning_rate": 8.208891454019008e-06, + "loss": 1.3859, + "step": 94905 + }, + { + "epoch": 1.14, + "grad_norm": 7.6375002420368165, + "learning_rate": 8.208316725839664e-06, + "loss": 1.1437, + "step": 94908 + }, + { + "epoch": 1.14, + "grad_norm": 11.028604686734367, + "learning_rate": 8.207742003774688e-06, + "loss": 1.3697, + "step": 94911 + }, + { + "epoch": 1.14, + "grad_norm": 14.892545421930622, + "learning_rate": 8.20716728782604e-06, + "loss": 1.3926, + "step": 94914 + }, + { + "epoch": 1.14, + "grad_norm": 10.679888479160244, + "learning_rate": 8.206592577995688e-06, + "loss": 0.9467, + "step": 94917 + }, + { + "epoch": 1.14, + "grad_norm": 10.877113109041133, + "learning_rate": 8.206017874285586e-06, + "loss": 0.9336, + "step": 94920 + }, + { + "epoch": 1.14, + "grad_norm": 8.58818183086923, + "learning_rate": 8.205443176697702e-06, + "loss": 1.1879, + "step": 94923 + }, + { + "epoch": 1.14, + "grad_norm": 12.278535759307585, + "learning_rate": 8.204868485233992e-06, + "loss": 1.4463, + "step": 94926 + }, + { + "epoch": 1.14, + "grad_norm": 5.622217922946001, + "learning_rate": 8.20429379989642e-06, + "loss": 1.113, + "step": 94929 + }, + { + "epoch": 1.14, + "grad_norm": 4.6464905696718075, + "learning_rate": 8.203719120686946e-06, + "loss": 1.2972, + "step": 94932 + }, + { + "epoch": 1.14, + "grad_norm": 12.057661318196828, + "learning_rate": 8.203144447607535e-06, + "loss": 1.2758, + "step": 94935 + }, + { + "epoch": 1.14, + "grad_norm": 6.0837465511058255, + "learning_rate": 8.20256978066014e-06, + "loss": 1.2646, + "step": 94938 + }, + { + "epoch": 1.14, + "grad_norm": 7.625137251577744, + "learning_rate": 8.201995119846729e-06, + "loss": 1.0595, + "step": 94941 + }, + { + "epoch": 1.14, + "grad_norm": 12.287764908097452, + "learning_rate": 8.20142046516926e-06, + "loss": 1.351, + "step": 94944 + }, + { + "epoch": 1.14, + "grad_norm": 12.007050321894075, + "learning_rate": 8.200845816629695e-06, + "loss": 0.9314, + "step": 94947 + }, + { + "epoch": 1.14, + "grad_norm": 8.348193246601863, + "learning_rate": 8.200271174229997e-06, + "loss": 1.22, + "step": 94950 + }, + { + "epoch": 1.14, + "grad_norm": 8.902180557334823, + "learning_rate": 8.199696537972125e-06, + "loss": 0.9891, + "step": 94953 + }, + { + "epoch": 1.14, + "grad_norm": 22.05424743314358, + "learning_rate": 8.19912190785804e-06, + "loss": 0.8142, + "step": 94956 + }, + { + "epoch": 1.14, + "grad_norm": 26.679739025624656, + "learning_rate": 8.198547283889702e-06, + "loss": 1.357, + "step": 94959 + }, + { + "epoch": 1.14, + "grad_norm": 5.2660818759071475, + "learning_rate": 8.197972666069078e-06, + "loss": 1.2991, + "step": 94962 + }, + { + "epoch": 1.14, + "grad_norm": 12.439389925942713, + "learning_rate": 8.197398054398118e-06, + "loss": 1.4266, + "step": 94965 + }, + { + "epoch": 1.14, + "grad_norm": 9.51893828146344, + "learning_rate": 8.19682344887879e-06, + "loss": 1.0714, + "step": 94968 + }, + { + "epoch": 1.14, + "grad_norm": 8.502738810883457, + "learning_rate": 8.196248849513056e-06, + "loss": 1.3853, + "step": 94971 + }, + { + "epoch": 1.14, + "grad_norm": 32.83909261569412, + "learning_rate": 8.195674256302872e-06, + "loss": 1.0324, + "step": 94974 + }, + { + "epoch": 1.14, + "grad_norm": 11.158826289944633, + "learning_rate": 8.195099669250208e-06, + "loss": 1.5438, + "step": 94977 + }, + { + "epoch": 1.14, + "grad_norm": 2.2296908514523475, + "learning_rate": 8.194525088357013e-06, + "loss": 1.3398, + "step": 94980 + }, + { + "epoch": 1.14, + "grad_norm": 3.8062056412462013, + "learning_rate": 8.193950513625256e-06, + "loss": 1.254, + "step": 94983 + }, + { + "epoch": 1.14, + "grad_norm": 5.247259737085978, + "learning_rate": 8.193375945056894e-06, + "loss": 1.0757, + "step": 94986 + }, + { + "epoch": 1.14, + "grad_norm": 11.437909394933477, + "learning_rate": 8.192801382653892e-06, + "loss": 1.19, + "step": 94989 + }, + { + "epoch": 1.14, + "grad_norm": 31.216186734290844, + "learning_rate": 8.192226826418205e-06, + "loss": 1.1569, + "step": 94992 + }, + { + "epoch": 1.14, + "grad_norm": 8.93417636493588, + "learning_rate": 8.191652276351797e-06, + "loss": 1.0686, + "step": 94995 + }, + { + "epoch": 1.14, + "grad_norm": 6.128717046816758, + "learning_rate": 8.191077732456629e-06, + "loss": 1.1136, + "step": 94998 + }, + { + "epoch": 1.14, + "grad_norm": 7.109722010544633, + "learning_rate": 8.19050319473466e-06, + "loss": 1.294, + "step": 95001 + }, + { + "epoch": 1.14, + "grad_norm": 16.371905194932246, + "learning_rate": 8.189928663187855e-06, + "loss": 1.0736, + "step": 95004 + }, + { + "epoch": 1.14, + "grad_norm": 6.035201416918275, + "learning_rate": 8.18935413781817e-06, + "loss": 0.9563, + "step": 95007 + }, + { + "epoch": 1.14, + "grad_norm": 11.894856858965522, + "learning_rate": 8.188779618627565e-06, + "loss": 1.3988, + "step": 95010 + }, + { + "epoch": 1.14, + "grad_norm": 8.180307636186809, + "learning_rate": 8.188205105618006e-06, + "loss": 1.315, + "step": 95013 + }, + { + "epoch": 1.14, + "grad_norm": 19.619922325954782, + "learning_rate": 8.187630598791451e-06, + "loss": 1.31, + "step": 95016 + }, + { + "epoch": 1.14, + "grad_norm": 8.320313056473681, + "learning_rate": 8.187056098149856e-06, + "loss": 1.0666, + "step": 95019 + }, + { + "epoch": 1.14, + "grad_norm": 9.81465094300236, + "learning_rate": 8.186481603695186e-06, + "loss": 0.8974, + "step": 95022 + }, + { + "epoch": 1.14, + "grad_norm": 5.74964927703913, + "learning_rate": 8.185907115429403e-06, + "loss": 1.3191, + "step": 95025 + }, + { + "epoch": 1.14, + "grad_norm": 5.3706811388956135, + "learning_rate": 8.185332633354463e-06, + "loss": 1.2211, + "step": 95028 + }, + { + "epoch": 1.14, + "grad_norm": 8.83341903040392, + "learning_rate": 8.184758157472334e-06, + "loss": 1.3365, + "step": 95031 + }, + { + "epoch": 1.14, + "grad_norm": 10.510008987969746, + "learning_rate": 8.184183687784967e-06, + "loss": 1.5279, + "step": 95034 + }, + { + "epoch": 1.14, + "grad_norm": 3.6713143132441566, + "learning_rate": 8.183609224294328e-06, + "loss": 0.8589, + "step": 95037 + }, + { + "epoch": 1.14, + "grad_norm": 2.8685038343602907, + "learning_rate": 8.183034767002375e-06, + "loss": 1.2102, + "step": 95040 + }, + { + "epoch": 1.14, + "grad_norm": 15.103992919559008, + "learning_rate": 8.182460315911076e-06, + "loss": 1.2909, + "step": 95043 + }, + { + "epoch": 1.14, + "grad_norm": 13.396496707963834, + "learning_rate": 8.18188587102238e-06, + "loss": 1.0006, + "step": 95046 + }, + { + "epoch": 1.14, + "grad_norm": 8.46836931162215, + "learning_rate": 8.181311432338253e-06, + "loss": 1.2815, + "step": 95049 + }, + { + "epoch": 1.14, + "grad_norm": 13.10153210303775, + "learning_rate": 8.180736999860655e-06, + "loss": 1.5685, + "step": 95052 + }, + { + "epoch": 1.14, + "grad_norm": 5.486098431926329, + "learning_rate": 8.180162573591548e-06, + "loss": 1.4728, + "step": 95055 + }, + { + "epoch": 1.14, + "grad_norm": 9.781450034760608, + "learning_rate": 8.179588153532893e-06, + "loss": 1.3801, + "step": 95058 + }, + { + "epoch": 1.14, + "grad_norm": 5.1921264862182195, + "learning_rate": 8.179013739686644e-06, + "loss": 1.3272, + "step": 95061 + }, + { + "epoch": 1.14, + "grad_norm": 7.371460128486965, + "learning_rate": 8.178439332054764e-06, + "loss": 1.3105, + "step": 95064 + }, + { + "epoch": 1.14, + "grad_norm": 7.580316953246673, + "learning_rate": 8.177864930639216e-06, + "loss": 1.1287, + "step": 95067 + }, + { + "epoch": 1.14, + "grad_norm": 13.024971488803367, + "learning_rate": 8.177290535441962e-06, + "loss": 1.0227, + "step": 95070 + }, + { + "epoch": 1.14, + "grad_norm": 2.8907327832921204, + "learning_rate": 8.176716146464954e-06, + "loss": 1.1556, + "step": 95073 + }, + { + "epoch": 1.14, + "grad_norm": 13.690249887560308, + "learning_rate": 8.176141763710157e-06, + "loss": 1.3244, + "step": 95076 + }, + { + "epoch": 1.14, + "grad_norm": 11.508729000304353, + "learning_rate": 8.175567387179534e-06, + "loss": 0.9295, + "step": 95079 + }, + { + "epoch": 1.14, + "grad_norm": 11.423632193105057, + "learning_rate": 8.174993016875039e-06, + "loss": 0.9294, + "step": 95082 + }, + { + "epoch": 1.14, + "grad_norm": 10.209794657360778, + "learning_rate": 8.174418652798641e-06, + "loss": 1.217, + "step": 95085 + }, + { + "epoch": 1.14, + "grad_norm": 12.703510740832455, + "learning_rate": 8.17384429495229e-06, + "loss": 1.1478, + "step": 95088 + }, + { + "epoch": 1.14, + "grad_norm": 15.269855964647785, + "learning_rate": 8.173269943337954e-06, + "loss": 1.362, + "step": 95091 + }, + { + "epoch": 1.14, + "grad_norm": 20.247672991954396, + "learning_rate": 8.172695597957585e-06, + "loss": 0.8587, + "step": 95094 + }, + { + "epoch": 1.14, + "grad_norm": 3.7712707032766395, + "learning_rate": 8.172121258813153e-06, + "loss": 1.0339, + "step": 95097 + }, + { + "epoch": 1.14, + "grad_norm": 13.792474203416452, + "learning_rate": 8.171546925906611e-06, + "loss": 1.1819, + "step": 95100 + }, + { + "epoch": 1.14, + "grad_norm": 11.68555129239652, + "learning_rate": 8.17097259923992e-06, + "loss": 1.2978, + "step": 95103 + }, + { + "epoch": 1.14, + "grad_norm": 6.685853462098797, + "learning_rate": 8.170398278815038e-06, + "loss": 1.3287, + "step": 95106 + }, + { + "epoch": 1.14, + "grad_norm": 6.377491499030881, + "learning_rate": 8.169823964633932e-06, + "loss": 1.4371, + "step": 95109 + }, + { + "epoch": 1.14, + "grad_norm": 17.389614665179202, + "learning_rate": 8.169249656698558e-06, + "loss": 0.9757, + "step": 95112 + }, + { + "epoch": 1.14, + "grad_norm": 4.556659431488291, + "learning_rate": 8.168675355010872e-06, + "loss": 0.7348, + "step": 95115 + }, + { + "epoch": 1.14, + "grad_norm": 13.765024386021251, + "learning_rate": 8.168101059572837e-06, + "loss": 1.3728, + "step": 95118 + }, + { + "epoch": 1.14, + "grad_norm": 7.83088700667091, + "learning_rate": 8.167526770386415e-06, + "loss": 0.99, + "step": 95121 + }, + { + "epoch": 1.14, + "grad_norm": 16.334426347534013, + "learning_rate": 8.166952487453568e-06, + "loss": 1.239, + "step": 95124 + }, + { + "epoch": 1.14, + "grad_norm": 16.33453765321461, + "learning_rate": 8.166378210776246e-06, + "loss": 1.3233, + "step": 95127 + }, + { + "epoch": 1.14, + "grad_norm": 7.604186447064581, + "learning_rate": 8.165803940356415e-06, + "loss": 1.2143, + "step": 95130 + }, + { + "epoch": 1.14, + "grad_norm": 13.434319692312013, + "learning_rate": 8.165229676196036e-06, + "loss": 1.0724, + "step": 95133 + }, + { + "epoch": 1.14, + "grad_norm": 9.042516984781843, + "learning_rate": 8.164655418297067e-06, + "loss": 1.1918, + "step": 95136 + }, + { + "epoch": 1.14, + "grad_norm": 8.733182235409911, + "learning_rate": 8.164081166661471e-06, + "loss": 1.0807, + "step": 95139 + }, + { + "epoch": 1.14, + "grad_norm": 109.91472285501169, + "learning_rate": 8.163506921291201e-06, + "loss": 1.3189, + "step": 95142 + }, + { + "epoch": 1.14, + "grad_norm": 5.73986717493447, + "learning_rate": 8.162932682188222e-06, + "loss": 0.9237, + "step": 95145 + }, + { + "epoch": 1.14, + "grad_norm": 3.3470457260664337, + "learning_rate": 8.162358449354489e-06, + "loss": 1.298, + "step": 95148 + }, + { + "epoch": 1.14, + "grad_norm": 11.120417706244183, + "learning_rate": 8.16178422279197e-06, + "loss": 1.3648, + "step": 95151 + }, + { + "epoch": 1.14, + "grad_norm": 13.814725745162376, + "learning_rate": 8.161210002502618e-06, + "loss": 1.2981, + "step": 95154 + }, + { + "epoch": 1.14, + "grad_norm": 21.61505609322556, + "learning_rate": 8.160635788488392e-06, + "loss": 1.3593, + "step": 95157 + }, + { + "epoch": 1.14, + "grad_norm": 26.538708161172515, + "learning_rate": 8.16006158075125e-06, + "loss": 1.1559, + "step": 95160 + }, + { + "epoch": 1.14, + "grad_norm": 28.189209031845568, + "learning_rate": 8.159487379293161e-06, + "loss": 1.1777, + "step": 95163 + }, + { + "epoch": 1.14, + "grad_norm": 4.928626867673514, + "learning_rate": 8.15891318411608e-06, + "loss": 1.0643, + "step": 95166 + }, + { + "epoch": 1.14, + "grad_norm": 6.791186035536612, + "learning_rate": 8.15833899522196e-06, + "loss": 1.0192, + "step": 95169 + }, + { + "epoch": 1.14, + "grad_norm": 3.9714852786184114, + "learning_rate": 8.157764812612766e-06, + "loss": 1.0245, + "step": 95172 + }, + { + "epoch": 1.14, + "grad_norm": 60.703557391475435, + "learning_rate": 8.157190636290459e-06, + "loss": 1.4827, + "step": 95175 + }, + { + "epoch": 1.14, + "grad_norm": 3.458873871631177, + "learning_rate": 8.156616466256998e-06, + "loss": 1.1461, + "step": 95178 + }, + { + "epoch": 1.14, + "grad_norm": 13.387886856325267, + "learning_rate": 8.156042302514339e-06, + "loss": 1.4672, + "step": 95181 + }, + { + "epoch": 1.14, + "grad_norm": 7.1301824245754455, + "learning_rate": 8.15546814506444e-06, + "loss": 1.3089, + "step": 95184 + }, + { + "epoch": 1.14, + "grad_norm": 62.07319126022817, + "learning_rate": 8.15489399390927e-06, + "loss": 1.4418, + "step": 95187 + }, + { + "epoch": 1.14, + "grad_norm": 1.99299105277211, + "learning_rate": 8.154319849050777e-06, + "loss": 0.7445, + "step": 95190 + }, + { + "epoch": 1.14, + "grad_norm": 4.667962876910906, + "learning_rate": 8.153745710490932e-06, + "loss": 1.2932, + "step": 95193 + }, + { + "epoch": 1.14, + "grad_norm": 24.9286704492404, + "learning_rate": 8.153171578231684e-06, + "loss": 1.5864, + "step": 95196 + }, + { + "epoch": 1.14, + "grad_norm": 2.4657880758333706, + "learning_rate": 8.152597452274994e-06, + "loss": 1.1845, + "step": 95199 + }, + { + "epoch": 1.14, + "grad_norm": 22.116867595269266, + "learning_rate": 8.152023332622825e-06, + "loss": 1.246, + "step": 95202 + }, + { + "epoch": 1.14, + "grad_norm": 17.916195310531823, + "learning_rate": 8.151449219277139e-06, + "loss": 1.8169, + "step": 95205 + }, + { + "epoch": 1.14, + "grad_norm": 5.6517394826275655, + "learning_rate": 8.150875112239887e-06, + "loss": 1.3654, + "step": 95208 + }, + { + "epoch": 1.14, + "grad_norm": 2.3866186638800735, + "learning_rate": 8.150301011513031e-06, + "loss": 1.3383, + "step": 95211 + }, + { + "epoch": 1.14, + "grad_norm": 6.743876742502486, + "learning_rate": 8.149726917098532e-06, + "loss": 1.1554, + "step": 95214 + }, + { + "epoch": 1.14, + "grad_norm": 9.493046942146664, + "learning_rate": 8.14915282899835e-06, + "loss": 1.1884, + "step": 95217 + }, + { + "epoch": 1.14, + "grad_norm": 11.653111428615484, + "learning_rate": 8.148578747214444e-06, + "loss": 1.2159, + "step": 95220 + }, + { + "epoch": 1.15, + "grad_norm": 12.003570537914783, + "learning_rate": 8.14800467174877e-06, + "loss": 1.401, + "step": 95223 + }, + { + "epoch": 1.15, + "grad_norm": 30.800844753902144, + "learning_rate": 8.147430602603287e-06, + "loss": 1.3333, + "step": 95226 + }, + { + "epoch": 1.15, + "grad_norm": 6.110355867114731, + "learning_rate": 8.14685653977996e-06, + "loss": 1.3679, + "step": 95229 + }, + { + "epoch": 1.15, + "grad_norm": 22.181670992609483, + "learning_rate": 8.146282483280744e-06, + "loss": 1.4375, + "step": 95232 + }, + { + "epoch": 1.15, + "grad_norm": 31.986594627661653, + "learning_rate": 8.145708433107595e-06, + "loss": 1.1519, + "step": 95235 + }, + { + "epoch": 1.15, + "grad_norm": 5.305664300611685, + "learning_rate": 8.145134389262477e-06, + "loss": 1.0686, + "step": 95238 + }, + { + "epoch": 1.15, + "grad_norm": 11.97882223886896, + "learning_rate": 8.144560351747345e-06, + "loss": 1.0688, + "step": 95241 + }, + { + "epoch": 1.15, + "grad_norm": 9.784843942299815, + "learning_rate": 8.143986320564161e-06, + "loss": 0.8959, + "step": 95244 + }, + { + "epoch": 1.15, + "grad_norm": 5.508020005556684, + "learning_rate": 8.143412295714887e-06, + "loss": 1.257, + "step": 95247 + }, + { + "epoch": 1.15, + "grad_norm": 10.362257330341738, + "learning_rate": 8.142838277201476e-06, + "loss": 1.2186, + "step": 95250 + }, + { + "epoch": 1.15, + "grad_norm": 8.656749205841196, + "learning_rate": 8.142264265025887e-06, + "loss": 1.2629, + "step": 95253 + }, + { + "epoch": 1.15, + "grad_norm": 4.164398394006707, + "learning_rate": 8.14169025919008e-06, + "loss": 1.3157, + "step": 95256 + }, + { + "epoch": 1.15, + "grad_norm": 5.864567488674035, + "learning_rate": 8.14111625969602e-06, + "loss": 1.0527, + "step": 95259 + }, + { + "epoch": 1.15, + "grad_norm": 8.10397822799045, + "learning_rate": 8.140542266545659e-06, + "loss": 1.1399, + "step": 95262 + }, + { + "epoch": 1.15, + "grad_norm": 48.73492958206974, + "learning_rate": 8.139968279740955e-06, + "loss": 1.4625, + "step": 95265 + }, + { + "epoch": 1.15, + "grad_norm": 15.754765072736722, + "learning_rate": 8.139394299283868e-06, + "loss": 1.2906, + "step": 95268 + }, + { + "epoch": 1.15, + "grad_norm": 10.627657987879399, + "learning_rate": 8.138820325176362e-06, + "loss": 0.9742, + "step": 95271 + }, + { + "epoch": 1.15, + "grad_norm": 9.608767775831135, + "learning_rate": 8.138246357420392e-06, + "loss": 1.2453, + "step": 95274 + }, + { + "epoch": 1.15, + "grad_norm": 10.964738427004052, + "learning_rate": 8.137672396017913e-06, + "loss": 1.2105, + "step": 95277 + }, + { + "epoch": 1.15, + "grad_norm": 5.696612571123359, + "learning_rate": 8.13709844097089e-06, + "loss": 1.2468, + "step": 95280 + }, + { + "epoch": 1.15, + "grad_norm": 7.667515299736085, + "learning_rate": 8.136524492281278e-06, + "loss": 1.0892, + "step": 95283 + }, + { + "epoch": 1.15, + "grad_norm": 3.702551856456233, + "learning_rate": 8.13595054995104e-06, + "loss": 1.0525, + "step": 95286 + }, + { + "epoch": 1.15, + "grad_norm": 2.6773310472155765, + "learning_rate": 8.135376613982126e-06, + "loss": 1.3715, + "step": 95289 + }, + { + "epoch": 1.15, + "grad_norm": 5.8972932752434, + "learning_rate": 8.134802684376503e-06, + "loss": 1.0799, + "step": 95292 + }, + { + "epoch": 1.15, + "grad_norm": 29.565947997264416, + "learning_rate": 8.134228761136125e-06, + "loss": 1.2411, + "step": 95295 + }, + { + "epoch": 1.15, + "grad_norm": 44.959200470863095, + "learning_rate": 8.13365484426295e-06, + "loss": 1.3391, + "step": 95298 + }, + { + "epoch": 1.15, + "grad_norm": 8.968267048106199, + "learning_rate": 8.133080933758945e-06, + "loss": 0.9333, + "step": 95301 + }, + { + "epoch": 1.15, + "grad_norm": 4.77735056951186, + "learning_rate": 8.13250702962606e-06, + "loss": 1.0444, + "step": 95304 + }, + { + "epoch": 1.15, + "grad_norm": 13.587992516205746, + "learning_rate": 8.131933131866255e-06, + "loss": 1.0098, + "step": 95307 + }, + { + "epoch": 1.15, + "grad_norm": 8.56952251596459, + "learning_rate": 8.131359240481488e-06, + "loss": 1.236, + "step": 95310 + }, + { + "epoch": 1.15, + "grad_norm": 13.492230959035615, + "learning_rate": 8.130785355473721e-06, + "loss": 0.9067, + "step": 95313 + }, + { + "epoch": 1.15, + "grad_norm": 15.659991965586544, + "learning_rate": 8.13021147684491e-06, + "loss": 1.3724, + "step": 95316 + }, + { + "epoch": 1.15, + "grad_norm": 7.872965830422657, + "learning_rate": 8.129637604597012e-06, + "loss": 1.2242, + "step": 95319 + }, + { + "epoch": 1.15, + "grad_norm": 53.057564526777234, + "learning_rate": 8.129063738731986e-06, + "loss": 1.4475, + "step": 95322 + }, + { + "epoch": 1.15, + "grad_norm": 2.700059034995624, + "learning_rate": 8.128489879251795e-06, + "loss": 1.2183, + "step": 95325 + }, + { + "epoch": 1.15, + "grad_norm": 14.920536061870981, + "learning_rate": 8.127916026158396e-06, + "loss": 1.5911, + "step": 95328 + }, + { + "epoch": 1.15, + "grad_norm": 9.527436296906368, + "learning_rate": 8.12734217945374e-06, + "loss": 1.3379, + "step": 95331 + }, + { + "epoch": 1.15, + "grad_norm": 15.376374041367603, + "learning_rate": 8.126768339139794e-06, + "loss": 1.171, + "step": 95334 + }, + { + "epoch": 1.15, + "grad_norm": 2.883890359245659, + "learning_rate": 8.12619450521851e-06, + "loss": 1.4421, + "step": 95337 + }, + { + "epoch": 1.15, + "grad_norm": 9.70561916482383, + "learning_rate": 8.125620677691854e-06, + "loss": 0.9769, + "step": 95340 + }, + { + "epoch": 1.15, + "grad_norm": 6.5048424497712345, + "learning_rate": 8.125046856561773e-06, + "loss": 1.1105, + "step": 95343 + }, + { + "epoch": 1.15, + "grad_norm": 5.726890048697678, + "learning_rate": 8.124473041830237e-06, + "loss": 0.9654, + "step": 95346 + }, + { + "epoch": 1.15, + "grad_norm": 13.33273545244192, + "learning_rate": 8.123899233499195e-06, + "loss": 1.1968, + "step": 95349 + }, + { + "epoch": 1.15, + "grad_norm": 3.3417267939899897, + "learning_rate": 8.12332543157061e-06, + "loss": 1.4586, + "step": 95352 + }, + { + "epoch": 1.15, + "grad_norm": 12.877746436371902, + "learning_rate": 8.122751636046443e-06, + "loss": 0.9944, + "step": 95355 + }, + { + "epoch": 1.15, + "grad_norm": 13.174963642104007, + "learning_rate": 8.122177846928646e-06, + "loss": 1.6061, + "step": 95358 + }, + { + "epoch": 1.15, + "grad_norm": 5.8551657091131775, + "learning_rate": 8.121604064219178e-06, + "loss": 1.0363, + "step": 95361 + }, + { + "epoch": 1.15, + "grad_norm": 12.546614714367278, + "learning_rate": 8.121030287919998e-06, + "loss": 1.2504, + "step": 95364 + }, + { + "epoch": 1.15, + "grad_norm": 2.9093831673403017, + "learning_rate": 8.120456518033071e-06, + "loss": 1.2697, + "step": 95367 + }, + { + "epoch": 1.15, + "grad_norm": 4.1230857333273825, + "learning_rate": 8.119882754560345e-06, + "loss": 1.449, + "step": 95370 + }, + { + "epoch": 1.15, + "grad_norm": 15.331100767071812, + "learning_rate": 8.11930899750378e-06, + "loss": 1.2292, + "step": 95373 + }, + { + "epoch": 1.15, + "grad_norm": 23.44906718006387, + "learning_rate": 8.11873524686534e-06, + "loss": 1.3979, + "step": 95376 + }, + { + "epoch": 1.15, + "grad_norm": 6.620260198968721, + "learning_rate": 8.118161502646976e-06, + "loss": 1.3512, + "step": 95379 + }, + { + "epoch": 1.15, + "grad_norm": 6.888641005562595, + "learning_rate": 8.117587764850652e-06, + "loss": 0.9641, + "step": 95382 + }, + { + "epoch": 1.15, + "grad_norm": 13.679570961511308, + "learning_rate": 8.117014033478319e-06, + "loss": 1.3874, + "step": 95385 + }, + { + "epoch": 1.15, + "grad_norm": 5.036687374673415, + "learning_rate": 8.116440308531941e-06, + "loss": 1.5177, + "step": 95388 + }, + { + "epoch": 1.15, + "grad_norm": 7.206695964807436, + "learning_rate": 8.115866590013471e-06, + "loss": 1.046, + "step": 95391 + }, + { + "epoch": 1.15, + "grad_norm": 11.957692379915738, + "learning_rate": 8.115292877924876e-06, + "loss": 0.8753, + "step": 95394 + }, + { + "epoch": 1.15, + "grad_norm": 35.655204705455596, + "learning_rate": 8.114719172268101e-06, + "loss": 1.3893, + "step": 95397 + }, + { + "epoch": 1.15, + "grad_norm": 6.014790043716638, + "learning_rate": 8.114145473045115e-06, + "loss": 0.992, + "step": 95400 + }, + { + "epoch": 1.15, + "grad_norm": 7.576919387678904, + "learning_rate": 8.113571780257868e-06, + "loss": 0.8895, + "step": 95403 + }, + { + "epoch": 1.15, + "grad_norm": 7.068241631718074, + "learning_rate": 8.11299809390832e-06, + "loss": 1.2426, + "step": 95406 + }, + { + "epoch": 1.15, + "grad_norm": 13.119929989290751, + "learning_rate": 8.112424413998435e-06, + "loss": 1.0187, + "step": 95409 + }, + { + "epoch": 1.15, + "grad_norm": 14.879525122127477, + "learning_rate": 8.111850740530163e-06, + "loss": 1.1439, + "step": 95412 + }, + { + "epoch": 1.15, + "grad_norm": 16.734223430637073, + "learning_rate": 8.111277073505462e-06, + "loss": 1.0935, + "step": 95415 + }, + { + "epoch": 1.15, + "grad_norm": 5.728593109195092, + "learning_rate": 8.110703412926295e-06, + "loss": 1.0879, + "step": 95418 + }, + { + "epoch": 1.15, + "grad_norm": 7.897646714932255, + "learning_rate": 8.110129758794616e-06, + "loss": 1.0698, + "step": 95421 + }, + { + "epoch": 1.15, + "grad_norm": 10.819292365737974, + "learning_rate": 8.109556111112385e-06, + "loss": 0.9548, + "step": 95424 + }, + { + "epoch": 1.15, + "grad_norm": 3.1413576221395547, + "learning_rate": 8.108982469881553e-06, + "loss": 1.0394, + "step": 95427 + }, + { + "epoch": 1.15, + "grad_norm": 20.592339208067017, + "learning_rate": 8.108408835104086e-06, + "loss": 1.2539, + "step": 95430 + }, + { + "epoch": 1.15, + "grad_norm": 1.91498284119866, + "learning_rate": 8.107835206781938e-06, + "loss": 1.4374, + "step": 95433 + }, + { + "epoch": 1.15, + "grad_norm": 16.847852787043653, + "learning_rate": 8.107261584917068e-06, + "loss": 1.5137, + "step": 95436 + }, + { + "epoch": 1.15, + "grad_norm": 47.15926405568624, + "learning_rate": 8.10668796951143e-06, + "loss": 1.1142, + "step": 95439 + }, + { + "epoch": 1.15, + "grad_norm": 20.707402417711794, + "learning_rate": 8.106114360566988e-06, + "loss": 1.1781, + "step": 95442 + }, + { + "epoch": 1.15, + "grad_norm": 7.00039709547902, + "learning_rate": 8.10554075808569e-06, + "loss": 0.8795, + "step": 95445 + }, + { + "epoch": 1.15, + "grad_norm": 10.579361118587272, + "learning_rate": 8.104967162069506e-06, + "loss": 1.2783, + "step": 95448 + }, + { + "epoch": 1.15, + "grad_norm": 5.845264230209366, + "learning_rate": 8.10439357252038e-06, + "loss": 1.1283, + "step": 95451 + }, + { + "epoch": 1.15, + "grad_norm": 3.835569701349506, + "learning_rate": 8.103819989440279e-06, + "loss": 1.1225, + "step": 95454 + }, + { + "epoch": 1.15, + "grad_norm": 5.226421455818433, + "learning_rate": 8.103246412831156e-06, + "loss": 1.3038, + "step": 95457 + }, + { + "epoch": 1.15, + "grad_norm": 8.37627242957869, + "learning_rate": 8.102672842694971e-06, + "loss": 0.9416, + "step": 95460 + }, + { + "epoch": 1.15, + "grad_norm": 6.76460465768912, + "learning_rate": 8.10209927903368e-06, + "loss": 1.1501, + "step": 95463 + }, + { + "epoch": 1.15, + "grad_norm": 10.948422094781286, + "learning_rate": 8.101525721849242e-06, + "loss": 1.0181, + "step": 95466 + }, + { + "epoch": 1.15, + "grad_norm": 4.0570950208327945, + "learning_rate": 8.10095217114361e-06, + "loss": 1.1192, + "step": 95469 + }, + { + "epoch": 1.15, + "grad_norm": 56.47561046902366, + "learning_rate": 8.100378626918745e-06, + "loss": 1.1422, + "step": 95472 + }, + { + "epoch": 1.15, + "grad_norm": 16.113739526262673, + "learning_rate": 8.099805089176606e-06, + "loss": 1.035, + "step": 95475 + }, + { + "epoch": 1.15, + "grad_norm": 2.985715001293303, + "learning_rate": 8.099231557919145e-06, + "loss": 1.514, + "step": 95478 + }, + { + "epoch": 1.15, + "grad_norm": 3.9083022930241067, + "learning_rate": 8.09865803314832e-06, + "loss": 1.0774, + "step": 95481 + }, + { + "epoch": 1.15, + "grad_norm": 3.6881931438980047, + "learning_rate": 8.098084514866095e-06, + "loss": 1.4192, + "step": 95484 + }, + { + "epoch": 1.15, + "grad_norm": 13.395839819670568, + "learning_rate": 8.097511003074418e-06, + "loss": 1.0789, + "step": 95487 + }, + { + "epoch": 1.15, + "grad_norm": 21.83780677231539, + "learning_rate": 8.096937497775257e-06, + "loss": 0.9865, + "step": 95490 + }, + { + "epoch": 1.15, + "grad_norm": 13.032223255208532, + "learning_rate": 8.096363998970556e-06, + "loss": 1.2259, + "step": 95493 + }, + { + "epoch": 1.15, + "grad_norm": 10.741304591522969, + "learning_rate": 8.095790506662284e-06, + "loss": 1.4664, + "step": 95496 + }, + { + "epoch": 1.15, + "grad_norm": 15.994605548508634, + "learning_rate": 8.095217020852389e-06, + "loss": 0.9531, + "step": 95499 + }, + { + "epoch": 1.15, + "grad_norm": 6.3968577361437955, + "learning_rate": 8.094643541542837e-06, + "loss": 1.6674, + "step": 95502 + }, + { + "epoch": 1.15, + "grad_norm": 11.300303308475922, + "learning_rate": 8.094070068735576e-06, + "loss": 1.1426, + "step": 95505 + }, + { + "epoch": 1.15, + "grad_norm": 41.64718899068936, + "learning_rate": 8.093496602432569e-06, + "loss": 1.1116, + "step": 95508 + }, + { + "epoch": 1.15, + "grad_norm": 5.845406571431255, + "learning_rate": 8.09292314263577e-06, + "loss": 1.0656, + "step": 95511 + }, + { + "epoch": 1.15, + "grad_norm": 13.685801666105666, + "learning_rate": 8.09234968934714e-06, + "loss": 1.4397, + "step": 95514 + }, + { + "epoch": 1.15, + "grad_norm": 5.698358269589174, + "learning_rate": 8.091776242568634e-06, + "loss": 1.067, + "step": 95517 + }, + { + "epoch": 1.15, + "grad_norm": 46.30451420847018, + "learning_rate": 8.091202802302205e-06, + "loss": 0.8761, + "step": 95520 + }, + { + "epoch": 1.15, + "grad_norm": 5.706601883910375, + "learning_rate": 8.090629368549813e-06, + "loss": 1.172, + "step": 95523 + }, + { + "epoch": 1.15, + "grad_norm": 11.827463488962877, + "learning_rate": 8.090055941313417e-06, + "loss": 1.27, + "step": 95526 + }, + { + "epoch": 1.15, + "grad_norm": 7.652370451815293, + "learning_rate": 8.089482520594976e-06, + "loss": 1.1621, + "step": 95529 + }, + { + "epoch": 1.15, + "grad_norm": 7.6755811704409584, + "learning_rate": 8.088909106396436e-06, + "loss": 1.2164, + "step": 95532 + }, + { + "epoch": 1.15, + "grad_norm": 34.680615457149926, + "learning_rate": 8.088335698719764e-06, + "loss": 0.9866, + "step": 95535 + }, + { + "epoch": 1.15, + "grad_norm": 6.332472009931912, + "learning_rate": 8.087762297566913e-06, + "loss": 1.3929, + "step": 95538 + }, + { + "epoch": 1.15, + "grad_norm": 9.388494049676016, + "learning_rate": 8.087188902939839e-06, + "loss": 1.4077, + "step": 95541 + }, + { + "epoch": 1.15, + "grad_norm": 7.536145564839204, + "learning_rate": 8.086615514840506e-06, + "loss": 1.3688, + "step": 95544 + }, + { + "epoch": 1.15, + "grad_norm": 11.425098212491772, + "learning_rate": 8.08604213327086e-06, + "loss": 1.005, + "step": 95547 + }, + { + "epoch": 1.15, + "grad_norm": 3.885326114365537, + "learning_rate": 8.085468758232866e-06, + "loss": 1.1694, + "step": 95550 + }, + { + "epoch": 1.15, + "grad_norm": 13.782690019377139, + "learning_rate": 8.084895389728472e-06, + "loss": 1.2309, + "step": 95553 + }, + { + "epoch": 1.15, + "grad_norm": 15.331272093346536, + "learning_rate": 8.084322027759648e-06, + "loss": 0.9883, + "step": 95556 + }, + { + "epoch": 1.15, + "grad_norm": 17.602362554619294, + "learning_rate": 8.08374867232834e-06, + "loss": 1.2881, + "step": 95559 + }, + { + "epoch": 1.15, + "grad_norm": 7.109610789478691, + "learning_rate": 8.083175323436504e-06, + "loss": 1.2701, + "step": 95562 + }, + { + "epoch": 1.15, + "grad_norm": 11.649646142377756, + "learning_rate": 8.082601981086102e-06, + "loss": 1.1472, + "step": 95565 + }, + { + "epoch": 1.15, + "grad_norm": 14.091234359351143, + "learning_rate": 8.08202864527909e-06, + "loss": 1.5324, + "step": 95568 + }, + { + "epoch": 1.15, + "grad_norm": 1.9833800043197767, + "learning_rate": 8.081455316017424e-06, + "loss": 1.3417, + "step": 95571 + }, + { + "epoch": 1.15, + "grad_norm": 8.314029365762044, + "learning_rate": 8.080881993303058e-06, + "loss": 1.3631, + "step": 95574 + }, + { + "epoch": 1.15, + "grad_norm": 4.036917586422021, + "learning_rate": 8.08030867713795e-06, + "loss": 0.8986, + "step": 95577 + }, + { + "epoch": 1.15, + "grad_norm": 11.017899901581785, + "learning_rate": 8.079735367524058e-06, + "loss": 1.4027, + "step": 95580 + }, + { + "epoch": 1.15, + "grad_norm": 3.5194627629770676, + "learning_rate": 8.07916206446334e-06, + "loss": 1.0792, + "step": 95583 + }, + { + "epoch": 1.15, + "grad_norm": 5.0310840519990325, + "learning_rate": 8.078588767957747e-06, + "loss": 1.1525, + "step": 95586 + }, + { + "epoch": 1.15, + "grad_norm": 18.8344659607977, + "learning_rate": 8.078015478009236e-06, + "loss": 1.2795, + "step": 95589 + }, + { + "epoch": 1.15, + "grad_norm": 8.368452273062257, + "learning_rate": 8.077442194619772e-06, + "loss": 1.1343, + "step": 95592 + }, + { + "epoch": 1.15, + "grad_norm": 10.385460136904085, + "learning_rate": 8.0768689177913e-06, + "loss": 1.2759, + "step": 95595 + }, + { + "epoch": 1.15, + "grad_norm": 27.43340592926914, + "learning_rate": 8.076295647525786e-06, + "loss": 1.3733, + "step": 95598 + }, + { + "epoch": 1.15, + "grad_norm": 14.832128475062571, + "learning_rate": 8.075722383825179e-06, + "loss": 1.1094, + "step": 95601 + }, + { + "epoch": 1.15, + "grad_norm": 12.107503680162042, + "learning_rate": 8.07514912669144e-06, + "loss": 0.7556, + "step": 95604 + }, + { + "epoch": 1.15, + "grad_norm": 13.709960430595693, + "learning_rate": 8.074575876126522e-06, + "loss": 1.1965, + "step": 95607 + }, + { + "epoch": 1.15, + "grad_norm": 9.253292063644105, + "learning_rate": 8.074002632132386e-06, + "loss": 1.0731, + "step": 95610 + }, + { + "epoch": 1.15, + "grad_norm": 6.565118726651764, + "learning_rate": 8.073429394710983e-06, + "loss": 0.8767, + "step": 95613 + }, + { + "epoch": 1.15, + "grad_norm": 10.71176009317848, + "learning_rate": 8.072856163864271e-06, + "loss": 1.0794, + "step": 95616 + }, + { + "epoch": 1.15, + "grad_norm": 11.260005700039459, + "learning_rate": 8.072282939594205e-06, + "loss": 1.3316, + "step": 95619 + }, + { + "epoch": 1.15, + "grad_norm": 14.931891500279303, + "learning_rate": 8.071709721902748e-06, + "loss": 0.9991, + "step": 95622 + }, + { + "epoch": 1.15, + "grad_norm": 16.72371011079018, + "learning_rate": 8.07113651079185e-06, + "loss": 1.27, + "step": 95625 + }, + { + "epoch": 1.15, + "grad_norm": 12.018932480908001, + "learning_rate": 8.070563306263467e-06, + "loss": 1.2614, + "step": 95628 + }, + { + "epoch": 1.15, + "grad_norm": 13.40467212476544, + "learning_rate": 8.069990108319554e-06, + "loss": 1.4224, + "step": 95631 + }, + { + "epoch": 1.15, + "grad_norm": 13.859773923998056, + "learning_rate": 8.069416916962075e-06, + "loss": 1.2645, + "step": 95634 + }, + { + "epoch": 1.15, + "grad_norm": 4.020969324011683, + "learning_rate": 8.06884373219298e-06, + "loss": 1.3392, + "step": 95637 + }, + { + "epoch": 1.15, + "grad_norm": 3.556853181885099, + "learning_rate": 8.068270554014222e-06, + "loss": 1.3264, + "step": 95640 + }, + { + "epoch": 1.15, + "grad_norm": 14.105416106965006, + "learning_rate": 8.06769738242776e-06, + "loss": 1.0278, + "step": 95643 + }, + { + "epoch": 1.15, + "grad_norm": 11.09540946935605, + "learning_rate": 8.067124217435553e-06, + "loss": 1.6573, + "step": 95646 + }, + { + "epoch": 1.15, + "grad_norm": 20.102484165790244, + "learning_rate": 8.066551059039554e-06, + "loss": 1.5427, + "step": 95649 + }, + { + "epoch": 1.15, + "grad_norm": 18.614634085267525, + "learning_rate": 8.065977907241724e-06, + "loss": 1.4935, + "step": 95652 + }, + { + "epoch": 1.15, + "grad_norm": 11.386388752920592, + "learning_rate": 8.065404762044011e-06, + "loss": 1.3986, + "step": 95655 + }, + { + "epoch": 1.15, + "grad_norm": 7.4423616604055765, + "learning_rate": 8.064831623448375e-06, + "loss": 0.9801, + "step": 95658 + }, + { + "epoch": 1.15, + "grad_norm": 16.220385498795295, + "learning_rate": 8.06425849145677e-06, + "loss": 1.3058, + "step": 95661 + }, + { + "epoch": 1.15, + "grad_norm": 18.83346622220354, + "learning_rate": 8.063685366071158e-06, + "loss": 1.1682, + "step": 95664 + }, + { + "epoch": 1.15, + "grad_norm": 8.384870973018606, + "learning_rate": 8.063112247293487e-06, + "loss": 1.0645, + "step": 95667 + }, + { + "epoch": 1.15, + "grad_norm": 7.570261319206471, + "learning_rate": 8.062539135125717e-06, + "loss": 1.0589, + "step": 95670 + }, + { + "epoch": 1.15, + "grad_norm": 9.860691666361662, + "learning_rate": 8.0619660295698e-06, + "loss": 0.8561, + "step": 95673 + }, + { + "epoch": 1.15, + "grad_norm": 5.387267810911414, + "learning_rate": 8.0613929306277e-06, + "loss": 0.9942, + "step": 95676 + }, + { + "epoch": 1.15, + "grad_norm": 24.145396516720016, + "learning_rate": 8.060819838301369e-06, + "loss": 0.9562, + "step": 95679 + }, + { + "epoch": 1.15, + "grad_norm": 8.210343415495215, + "learning_rate": 8.060246752592755e-06, + "loss": 1.5457, + "step": 95682 + }, + { + "epoch": 1.15, + "grad_norm": 9.436506316498761, + "learning_rate": 8.059673673503823e-06, + "loss": 1.199, + "step": 95685 + }, + { + "epoch": 1.15, + "grad_norm": 11.597536272548911, + "learning_rate": 8.059100601036526e-06, + "loss": 1.0193, + "step": 95688 + }, + { + "epoch": 1.15, + "grad_norm": 21.362667501514878, + "learning_rate": 8.058527535192822e-06, + "loss": 1.3171, + "step": 95691 + }, + { + "epoch": 1.15, + "grad_norm": 7.790362532215912, + "learning_rate": 8.057954475974661e-06, + "loss": 1.0561, + "step": 95694 + }, + { + "epoch": 1.15, + "grad_norm": 3.787390777028874, + "learning_rate": 8.057381423384003e-06, + "loss": 1.3407, + "step": 95697 + }, + { + "epoch": 1.15, + "grad_norm": 4.273938146050188, + "learning_rate": 8.0568083774228e-06, + "loss": 1.3677, + "step": 95700 + }, + { + "epoch": 1.15, + "grad_norm": 7.194406969180405, + "learning_rate": 8.056235338093011e-06, + "loss": 1.0483, + "step": 95703 + }, + { + "epoch": 1.15, + "grad_norm": 14.33712952383317, + "learning_rate": 8.055662305396595e-06, + "loss": 1.3379, + "step": 95706 + }, + { + "epoch": 1.15, + "grad_norm": 7.129949169563408, + "learning_rate": 8.0550892793355e-06, + "loss": 1.1513, + "step": 95709 + }, + { + "epoch": 1.15, + "grad_norm": 3.3095965967528067, + "learning_rate": 8.054516259911683e-06, + "loss": 1.4416, + "step": 95712 + }, + { + "epoch": 1.15, + "grad_norm": 3.2888317503087445, + "learning_rate": 8.0539432471271e-06, + "loss": 1.1163, + "step": 95715 + }, + { + "epoch": 1.15, + "grad_norm": 11.35337638952475, + "learning_rate": 8.053370240983714e-06, + "loss": 1.2416, + "step": 95718 + }, + { + "epoch": 1.15, + "grad_norm": 3.5741223325849463, + "learning_rate": 8.052797241483473e-06, + "loss": 1.0701, + "step": 95721 + }, + { + "epoch": 1.15, + "grad_norm": 5.538694540811837, + "learning_rate": 8.052224248628329e-06, + "loss": 1.4368, + "step": 95724 + }, + { + "epoch": 1.15, + "grad_norm": 8.900825550865592, + "learning_rate": 8.051651262420243e-06, + "loss": 1.2163, + "step": 95727 + }, + { + "epoch": 1.15, + "grad_norm": 25.23875297208897, + "learning_rate": 8.051078282861172e-06, + "loss": 1.0316, + "step": 95730 + }, + { + "epoch": 1.15, + "grad_norm": 2.890848584119175, + "learning_rate": 8.050505309953071e-06, + "loss": 1.1341, + "step": 95733 + }, + { + "epoch": 1.15, + "grad_norm": 9.350371176050654, + "learning_rate": 8.049932343697889e-06, + "loss": 1.5178, + "step": 95736 + }, + { + "epoch": 1.15, + "grad_norm": 10.739212994290476, + "learning_rate": 8.049359384097585e-06, + "loss": 1.3637, + "step": 95739 + }, + { + "epoch": 1.15, + "grad_norm": 30.49368659377929, + "learning_rate": 8.048786431154117e-06, + "loss": 1.1463, + "step": 95742 + }, + { + "epoch": 1.15, + "grad_norm": 5.209389061621458, + "learning_rate": 8.04821348486944e-06, + "loss": 0.9998, + "step": 95745 + }, + { + "epoch": 1.15, + "grad_norm": 7.415983011082451, + "learning_rate": 8.047640545245503e-06, + "loss": 1.0275, + "step": 95748 + }, + { + "epoch": 1.15, + "grad_norm": 11.06219362592268, + "learning_rate": 8.047067612284267e-06, + "loss": 1.3193, + "step": 95751 + }, + { + "epoch": 1.15, + "grad_norm": 3.0466449198706016, + "learning_rate": 8.046494685987685e-06, + "loss": 1.0936, + "step": 95754 + }, + { + "epoch": 1.15, + "grad_norm": 3.980891341479214, + "learning_rate": 8.045921766357712e-06, + "loss": 0.9033, + "step": 95757 + }, + { + "epoch": 1.15, + "grad_norm": 14.122489715260501, + "learning_rate": 8.04534885339631e-06, + "loss": 1.2353, + "step": 95760 + }, + { + "epoch": 1.15, + "grad_norm": 7.035428019237439, + "learning_rate": 8.044775947105426e-06, + "loss": 1.1799, + "step": 95763 + }, + { + "epoch": 1.15, + "grad_norm": 3.549376920389567, + "learning_rate": 8.044203047487013e-06, + "loss": 1.3658, + "step": 95766 + }, + { + "epoch": 1.15, + "grad_norm": 125.0356764034693, + "learning_rate": 8.043630154543032e-06, + "loss": 1.2134, + "step": 95769 + }, + { + "epoch": 1.15, + "grad_norm": 13.735743252696258, + "learning_rate": 8.043057268275441e-06, + "loss": 1.6254, + "step": 95772 + }, + { + "epoch": 1.15, + "grad_norm": 4.457851623296107, + "learning_rate": 8.042484388686188e-06, + "loss": 0.9875, + "step": 95775 + }, + { + "epoch": 1.15, + "grad_norm": 18.94437847906567, + "learning_rate": 8.04191151577723e-06, + "loss": 1.305, + "step": 95778 + }, + { + "epoch": 1.15, + "grad_norm": 21.13941278281549, + "learning_rate": 8.04133864955052e-06, + "loss": 1.2733, + "step": 95781 + }, + { + "epoch": 1.15, + "grad_norm": 11.520966437016751, + "learning_rate": 8.04076579000802e-06, + "loss": 1.5478, + "step": 95784 + }, + { + "epoch": 1.15, + "grad_norm": 6.8023976379143525, + "learning_rate": 8.040192937151682e-06, + "loss": 0.9227, + "step": 95787 + }, + { + "epoch": 1.15, + "grad_norm": 15.380568094564362, + "learning_rate": 8.039620090983456e-06, + "loss": 1.4193, + "step": 95790 + }, + { + "epoch": 1.15, + "grad_norm": 7.4748625505046435, + "learning_rate": 8.039047251505303e-06, + "loss": 1.2154, + "step": 95793 + }, + { + "epoch": 1.15, + "grad_norm": 7.413550605243022, + "learning_rate": 8.038474418719172e-06, + "loss": 1.1674, + "step": 95796 + }, + { + "epoch": 1.15, + "grad_norm": 9.487899521999624, + "learning_rate": 8.037901592627026e-06, + "loss": 1.3815, + "step": 95799 + }, + { + "epoch": 1.15, + "grad_norm": 33.463420358512074, + "learning_rate": 8.03732877323081e-06, + "loss": 1.0226, + "step": 95802 + }, + { + "epoch": 1.15, + "grad_norm": 9.652976356929901, + "learning_rate": 8.036755960532487e-06, + "loss": 1.0419, + "step": 95805 + }, + { + "epoch": 1.15, + "grad_norm": 19.41368713572812, + "learning_rate": 8.036183154534007e-06, + "loss": 1.2671, + "step": 95808 + }, + { + "epoch": 1.15, + "grad_norm": 5.675633885804371, + "learning_rate": 8.035610355237326e-06, + "loss": 1.1059, + "step": 95811 + }, + { + "epoch": 1.15, + "grad_norm": 12.130451310606704, + "learning_rate": 8.035037562644405e-06, + "loss": 1.1958, + "step": 95814 + }, + { + "epoch": 1.15, + "grad_norm": 24.09936082629555, + "learning_rate": 8.034464776757189e-06, + "loss": 1.0896, + "step": 95817 + }, + { + "epoch": 1.15, + "grad_norm": 14.545208030723652, + "learning_rate": 8.033891997577634e-06, + "loss": 1.3897, + "step": 95820 + }, + { + "epoch": 1.15, + "grad_norm": 6.963433330784425, + "learning_rate": 8.0333192251077e-06, + "loss": 1.4647, + "step": 95823 + }, + { + "epoch": 1.15, + "grad_norm": 5.764209371455971, + "learning_rate": 8.032746459349341e-06, + "loss": 1.1553, + "step": 95826 + }, + { + "epoch": 1.15, + "grad_norm": 3.838818662277664, + "learning_rate": 8.032173700304508e-06, + "loss": 1.0182, + "step": 95829 + }, + { + "epoch": 1.15, + "grad_norm": 12.00961293993373, + "learning_rate": 8.031600947975157e-06, + "loss": 1.3107, + "step": 95832 + }, + { + "epoch": 1.15, + "grad_norm": 21.83469860359368, + "learning_rate": 8.031028202363244e-06, + "loss": 1.1736, + "step": 95835 + }, + { + "epoch": 1.15, + "grad_norm": 8.612305330663753, + "learning_rate": 8.03045546347072e-06, + "loss": 1.0209, + "step": 95838 + }, + { + "epoch": 1.15, + "grad_norm": 5.291721872219061, + "learning_rate": 8.029882731299546e-06, + "loss": 1.0142, + "step": 95841 + }, + { + "epoch": 1.15, + "grad_norm": 6.93297697321686, + "learning_rate": 8.02931000585167e-06, + "loss": 1.125, + "step": 95844 + }, + { + "epoch": 1.15, + "grad_norm": 9.685093275807459, + "learning_rate": 8.028737287129049e-06, + "loss": 1.2614, + "step": 95847 + }, + { + "epoch": 1.15, + "grad_norm": 4.781578357941217, + "learning_rate": 8.028164575133637e-06, + "loss": 1.0839, + "step": 95850 + }, + { + "epoch": 1.15, + "grad_norm": 22.684522049773314, + "learning_rate": 8.027591869867393e-06, + "loss": 1.2213, + "step": 95853 + }, + { + "epoch": 1.15, + "grad_norm": 4.484112063938693, + "learning_rate": 8.027019171332263e-06, + "loss": 1.3445, + "step": 95856 + }, + { + "epoch": 1.15, + "grad_norm": 3.2272173332710077, + "learning_rate": 8.026446479530208e-06, + "loss": 1.7056, + "step": 95859 + }, + { + "epoch": 1.15, + "grad_norm": 5.944332153044108, + "learning_rate": 8.025873794463179e-06, + "loss": 1.1193, + "step": 95862 + }, + { + "epoch": 1.15, + "grad_norm": 6.679111325019053, + "learning_rate": 8.02530111613313e-06, + "loss": 1.2604, + "step": 95865 + }, + { + "epoch": 1.15, + "grad_norm": 4.393022277068093, + "learning_rate": 8.024728444542023e-06, + "loss": 1.0941, + "step": 95868 + }, + { + "epoch": 1.15, + "grad_norm": 11.797580173162174, + "learning_rate": 8.024155779691802e-06, + "loss": 1.2221, + "step": 95871 + }, + { + "epoch": 1.15, + "grad_norm": 11.591220063453719, + "learning_rate": 8.023583121584426e-06, + "loss": 1.0637, + "step": 95874 + }, + { + "epoch": 1.15, + "grad_norm": 2.3071856545579874, + "learning_rate": 8.02301047022185e-06, + "loss": 1.3891, + "step": 95877 + }, + { + "epoch": 1.15, + "grad_norm": 13.04525567704135, + "learning_rate": 8.022437825606027e-06, + "loss": 1.2668, + "step": 95880 + }, + { + "epoch": 1.15, + "grad_norm": 19.009543908413104, + "learning_rate": 8.021865187738911e-06, + "loss": 1.3714, + "step": 95883 + }, + { + "epoch": 1.15, + "grad_norm": 17.467361213971163, + "learning_rate": 8.021292556622455e-06, + "loss": 0.8579, + "step": 95886 + }, + { + "epoch": 1.15, + "grad_norm": 12.832224817917707, + "learning_rate": 8.020719932258617e-06, + "loss": 1.326, + "step": 95889 + }, + { + "epoch": 1.15, + "grad_norm": 16.159086226698307, + "learning_rate": 8.020147314649346e-06, + "loss": 1.0856, + "step": 95892 + }, + { + "epoch": 1.15, + "grad_norm": 7.387701167264064, + "learning_rate": 8.019574703796605e-06, + "loss": 1.5419, + "step": 95895 + }, + { + "epoch": 1.15, + "grad_norm": 14.407506211273809, + "learning_rate": 8.019002099702338e-06, + "loss": 1.1452, + "step": 95898 + }, + { + "epoch": 1.15, + "grad_norm": 8.963336628699503, + "learning_rate": 8.018429502368504e-06, + "loss": 1.759, + "step": 95901 + }, + { + "epoch": 1.15, + "grad_norm": 6.456960863745935, + "learning_rate": 8.017856911797054e-06, + "loss": 1.2779, + "step": 95904 + }, + { + "epoch": 1.15, + "grad_norm": 6.441160715547535, + "learning_rate": 8.01728432798995e-06, + "loss": 1.1516, + "step": 95907 + }, + { + "epoch": 1.15, + "grad_norm": 16.218078649420743, + "learning_rate": 8.016711750949135e-06, + "loss": 1.3582, + "step": 95910 + }, + { + "epoch": 1.15, + "grad_norm": 5.6254363936764635, + "learning_rate": 8.01613918067657e-06, + "loss": 1.1332, + "step": 95913 + }, + { + "epoch": 1.15, + "grad_norm": 7.529623720502542, + "learning_rate": 8.015566617174208e-06, + "loss": 0.9487, + "step": 95916 + }, + { + "epoch": 1.15, + "grad_norm": 3.0376325691404062, + "learning_rate": 8.014994060443999e-06, + "loss": 1.0972, + "step": 95919 + }, + { + "epoch": 1.15, + "grad_norm": 8.256765365807048, + "learning_rate": 8.014421510487906e-06, + "loss": 1.3214, + "step": 95922 + }, + { + "epoch": 1.15, + "grad_norm": 8.091262050372173, + "learning_rate": 8.013848967307876e-06, + "loss": 1.059, + "step": 95925 + }, + { + "epoch": 1.15, + "grad_norm": 21.519282922986374, + "learning_rate": 8.013276430905861e-06, + "loss": 1.0418, + "step": 95928 + }, + { + "epoch": 1.15, + "grad_norm": 15.786198629266922, + "learning_rate": 8.01270390128382e-06, + "loss": 1.2706, + "step": 95931 + }, + { + "epoch": 1.15, + "grad_norm": 16.244842342401647, + "learning_rate": 8.012131378443706e-06, + "loss": 1.1615, + "step": 95934 + }, + { + "epoch": 1.15, + "grad_norm": 13.759947950035803, + "learning_rate": 8.011558862387469e-06, + "loss": 1.3029, + "step": 95937 + }, + { + "epoch": 1.15, + "grad_norm": 6.380243933866684, + "learning_rate": 8.010986353117065e-06, + "loss": 1.1571, + "step": 95940 + }, + { + "epoch": 1.15, + "grad_norm": 3.3708137491489643, + "learning_rate": 8.010413850634451e-06, + "loss": 1.3215, + "step": 95943 + }, + { + "epoch": 1.15, + "grad_norm": 26.779837498665966, + "learning_rate": 8.009841354941574e-06, + "loss": 1.4944, + "step": 95946 + }, + { + "epoch": 1.15, + "grad_norm": 3.224649744487699, + "learning_rate": 8.009268866040398e-06, + "loss": 1.0309, + "step": 95949 + }, + { + "epoch": 1.15, + "grad_norm": 25.87985284392228, + "learning_rate": 8.008696383932863e-06, + "loss": 1.0833, + "step": 95952 + }, + { + "epoch": 1.15, + "grad_norm": 9.93896044816089, + "learning_rate": 8.008123908620933e-06, + "loss": 1.5234, + "step": 95955 + }, + { + "epoch": 1.15, + "grad_norm": 11.334365237348196, + "learning_rate": 8.007551440106558e-06, + "loss": 1.1984, + "step": 95958 + }, + { + "epoch": 1.15, + "grad_norm": 5.419898908117643, + "learning_rate": 8.006978978391695e-06, + "loss": 1.2202, + "step": 95961 + }, + { + "epoch": 1.15, + "grad_norm": 29.557199516105445, + "learning_rate": 8.006406523478293e-06, + "loss": 1.4442, + "step": 95964 + }, + { + "epoch": 1.15, + "grad_norm": 10.627374943597513, + "learning_rate": 8.005834075368309e-06, + "loss": 1.2863, + "step": 95967 + }, + { + "epoch": 1.15, + "grad_norm": 14.476275680289017, + "learning_rate": 8.00526163406369e-06, + "loss": 1.3724, + "step": 95970 + }, + { + "epoch": 1.15, + "grad_norm": 7.54309468408649, + "learning_rate": 8.004689199566398e-06, + "loss": 1.1327, + "step": 95973 + }, + { + "epoch": 1.15, + "grad_norm": 19.7644244973731, + "learning_rate": 8.004116771878388e-06, + "loss": 1.4033, + "step": 95976 + }, + { + "epoch": 1.15, + "grad_norm": 7.093008834429702, + "learning_rate": 8.003544351001601e-06, + "loss": 0.8265, + "step": 95979 + }, + { + "epoch": 1.15, + "grad_norm": 29.40261741937424, + "learning_rate": 8.002971936937998e-06, + "loss": 1.1577, + "step": 95982 + }, + { + "epoch": 1.15, + "grad_norm": 5.215146627261371, + "learning_rate": 8.002399529689537e-06, + "loss": 1.4434, + "step": 95985 + }, + { + "epoch": 1.15, + "grad_norm": 7.306561784538569, + "learning_rate": 8.001827129258167e-06, + "loss": 1.3467, + "step": 95988 + }, + { + "epoch": 1.15, + "grad_norm": 9.589200509838003, + "learning_rate": 8.001254735645838e-06, + "loss": 1.3058, + "step": 95991 + }, + { + "epoch": 1.15, + "grad_norm": 16.22330093610375, + "learning_rate": 8.000682348854506e-06, + "loss": 0.9798, + "step": 95994 + }, + { + "epoch": 1.15, + "grad_norm": 5.51790412161033, + "learning_rate": 8.000109968886127e-06, + "loss": 1.4227, + "step": 95997 + }, + { + "epoch": 1.15, + "grad_norm": 21.95229361582208, + "learning_rate": 7.999537595742651e-06, + "loss": 0.8866, + "step": 96000 + }, + { + "epoch": 1.15, + "grad_norm": 6.926365516390119, + "learning_rate": 7.998965229426036e-06, + "loss": 0.9521, + "step": 96003 + }, + { + "epoch": 1.15, + "grad_norm": 8.23964840154153, + "learning_rate": 7.998392869938229e-06, + "loss": 1.0427, + "step": 96006 + }, + { + "epoch": 1.15, + "grad_norm": 7.728854107800637, + "learning_rate": 7.997820517281186e-06, + "loss": 1.1363, + "step": 96009 + }, + { + "epoch": 1.15, + "grad_norm": 4.5543105180246055, + "learning_rate": 7.99724817145686e-06, + "loss": 1.2493, + "step": 96012 + }, + { + "epoch": 1.15, + "grad_norm": 22.184291033048723, + "learning_rate": 7.996675832467209e-06, + "loss": 0.9224, + "step": 96015 + }, + { + "epoch": 1.15, + "grad_norm": 41.077672589895606, + "learning_rate": 7.996103500314176e-06, + "loss": 1.0351, + "step": 96018 + }, + { + "epoch": 1.15, + "grad_norm": 8.299434355089378, + "learning_rate": 7.995531174999724e-06, + "loss": 1.2008, + "step": 96021 + }, + { + "epoch": 1.15, + "grad_norm": 6.6030896486943265, + "learning_rate": 7.994958856525799e-06, + "loss": 1.3783, + "step": 96024 + }, + { + "epoch": 1.15, + "grad_norm": 23.728331638340045, + "learning_rate": 7.99438654489436e-06, + "loss": 1.194, + "step": 96027 + }, + { + "epoch": 1.15, + "grad_norm": 9.985610642526018, + "learning_rate": 7.993814240107358e-06, + "loss": 1.4725, + "step": 96030 + }, + { + "epoch": 1.15, + "grad_norm": 9.255777600417435, + "learning_rate": 7.993241942166742e-06, + "loss": 1.247, + "step": 96033 + }, + { + "epoch": 1.15, + "grad_norm": 6.409835493288495, + "learning_rate": 7.992669651074468e-06, + "loss": 1.122, + "step": 96036 + }, + { + "epoch": 1.15, + "grad_norm": 6.537926435916999, + "learning_rate": 7.992097366832491e-06, + "loss": 1.42, + "step": 96039 + }, + { + "epoch": 1.15, + "grad_norm": 11.608904817764614, + "learning_rate": 7.991525089442767e-06, + "loss": 1.1747, + "step": 96042 + }, + { + "epoch": 1.15, + "grad_norm": 6.021081728423249, + "learning_rate": 7.990952818907237e-06, + "loss": 1.2578, + "step": 96045 + }, + { + "epoch": 1.15, + "grad_norm": 9.591642664321492, + "learning_rate": 7.990380555227865e-06, + "loss": 1.1233, + "step": 96048 + }, + { + "epoch": 1.15, + "grad_norm": 9.909597502769318, + "learning_rate": 7.9898082984066e-06, + "loss": 1.0789, + "step": 96051 + }, + { + "epoch": 1.16, + "grad_norm": 4.825271809392355, + "learning_rate": 7.989236048445394e-06, + "loss": 0.9412, + "step": 96054 + }, + { + "epoch": 1.16, + "grad_norm": 3.2662181234777203, + "learning_rate": 7.988663805346205e-06, + "loss": 1.3076, + "step": 96057 + }, + { + "epoch": 1.16, + "grad_norm": 2.947948204469338, + "learning_rate": 7.988091569110977e-06, + "loss": 1.0972, + "step": 96060 + }, + { + "epoch": 1.16, + "grad_norm": 3.8998038894878824, + "learning_rate": 7.98751933974167e-06, + "loss": 1.5164, + "step": 96063 + }, + { + "epoch": 1.16, + "grad_norm": 5.369002933734958, + "learning_rate": 7.986947117240234e-06, + "loss": 1.6242, + "step": 96066 + }, + { + "epoch": 1.16, + "grad_norm": 2.985869318633122, + "learning_rate": 7.986374901608626e-06, + "loss": 1.0954, + "step": 96069 + }, + { + "epoch": 1.16, + "grad_norm": 13.679310753362167, + "learning_rate": 7.985802692848791e-06, + "loss": 1.242, + "step": 96072 + }, + { + "epoch": 1.16, + "grad_norm": 4.248795578136698, + "learning_rate": 7.985230490962686e-06, + "loss": 1.3037, + "step": 96075 + }, + { + "epoch": 1.16, + "grad_norm": 2.61727993688707, + "learning_rate": 7.984658295952263e-06, + "loss": 0.9724, + "step": 96078 + }, + { + "epoch": 1.16, + "grad_norm": 6.3426746338645845, + "learning_rate": 7.984086107819478e-06, + "loss": 1.3473, + "step": 96081 + }, + { + "epoch": 1.16, + "grad_norm": 10.488777765160942, + "learning_rate": 7.983513926566283e-06, + "loss": 1.2134, + "step": 96084 + }, + { + "epoch": 1.16, + "grad_norm": 16.574024527651847, + "learning_rate": 7.982941752194624e-06, + "loss": 1.3729, + "step": 96087 + }, + { + "epoch": 1.16, + "grad_norm": 6.876726752849689, + "learning_rate": 7.982369584706457e-06, + "loss": 1.3062, + "step": 96090 + }, + { + "epoch": 1.16, + "grad_norm": 9.979824605124175, + "learning_rate": 7.98179742410374e-06, + "loss": 0.9825, + "step": 96093 + }, + { + "epoch": 1.16, + "grad_norm": 12.386746215790241, + "learning_rate": 7.981225270388423e-06, + "loss": 1.201, + "step": 96096 + }, + { + "epoch": 1.16, + "grad_norm": 22.78404900473416, + "learning_rate": 7.980653123562454e-06, + "loss": 1.1081, + "step": 96099 + }, + { + "epoch": 1.16, + "grad_norm": 16.510945800988555, + "learning_rate": 7.980080983627786e-06, + "loss": 1.2384, + "step": 96102 + }, + { + "epoch": 1.16, + "grad_norm": 17.52531467334217, + "learning_rate": 7.979508850586376e-06, + "loss": 1.0972, + "step": 96105 + }, + { + "epoch": 1.16, + "grad_norm": 9.708603433694382, + "learning_rate": 7.978936724440174e-06, + "loss": 1.1521, + "step": 96108 + }, + { + "epoch": 1.16, + "grad_norm": 25.1383920047169, + "learning_rate": 7.978364605191137e-06, + "loss": 1.0063, + "step": 96111 + }, + { + "epoch": 1.16, + "grad_norm": 21.81509107787079, + "learning_rate": 7.977792492841213e-06, + "loss": 1.087, + "step": 96114 + }, + { + "epoch": 1.16, + "grad_norm": 30.920144941468052, + "learning_rate": 7.97722038739235e-06, + "loss": 1.1974, + "step": 96117 + }, + { + "epoch": 1.16, + "grad_norm": 33.1350932508132, + "learning_rate": 7.976648288846506e-06, + "loss": 1.6653, + "step": 96120 + }, + { + "epoch": 1.16, + "grad_norm": 11.378246507604066, + "learning_rate": 7.976076197205637e-06, + "loss": 1.5268, + "step": 96123 + }, + { + "epoch": 1.16, + "grad_norm": 5.3737259047565455, + "learning_rate": 7.975504112471689e-06, + "loss": 1.4914, + "step": 96126 + }, + { + "epoch": 1.16, + "grad_norm": 3.8987348156999824, + "learning_rate": 7.974932034646614e-06, + "loss": 1.1705, + "step": 96129 + }, + { + "epoch": 1.16, + "grad_norm": 6.007682518848358, + "learning_rate": 7.974359963732365e-06, + "loss": 1.2628, + "step": 96132 + }, + { + "epoch": 1.16, + "grad_norm": 2.88452174841637, + "learning_rate": 7.9737878997309e-06, + "loss": 1.1921, + "step": 96135 + }, + { + "epoch": 1.16, + "grad_norm": 7.102192085581919, + "learning_rate": 7.97321584264417e-06, + "loss": 1.2704, + "step": 96138 + }, + { + "epoch": 1.16, + "grad_norm": 8.265715494490076, + "learning_rate": 7.972643792474117e-06, + "loss": 0.9004, + "step": 96141 + }, + { + "epoch": 1.16, + "grad_norm": 4.046818533695352, + "learning_rate": 7.9720717492227e-06, + "loss": 1.2716, + "step": 96144 + }, + { + "epoch": 1.16, + "grad_norm": 12.610968469598106, + "learning_rate": 7.971499712891877e-06, + "loss": 1.2385, + "step": 96147 + }, + { + "epoch": 1.16, + "grad_norm": 10.718988118805237, + "learning_rate": 7.970927683483596e-06, + "loss": 0.9609, + "step": 96150 + }, + { + "epoch": 1.16, + "grad_norm": 6.8773088906970905, + "learning_rate": 7.970355660999803e-06, + "loss": 1.3869, + "step": 96153 + }, + { + "epoch": 1.16, + "grad_norm": 60.3398118243489, + "learning_rate": 7.969783645442458e-06, + "loss": 1.3429, + "step": 96156 + }, + { + "epoch": 1.16, + "grad_norm": 2.225843276436241, + "learning_rate": 7.969211636813507e-06, + "loss": 1.1511, + "step": 96159 + }, + { + "epoch": 1.16, + "grad_norm": 7.198239327292548, + "learning_rate": 7.968639635114906e-06, + "loss": 1.2554, + "step": 96162 + }, + { + "epoch": 1.16, + "grad_norm": 24.945357513198314, + "learning_rate": 7.968067640348611e-06, + "loss": 1.1798, + "step": 96165 + }, + { + "epoch": 1.16, + "grad_norm": 6.6691884728879485, + "learning_rate": 7.967495652516567e-06, + "loss": 1.2615, + "step": 96168 + }, + { + "epoch": 1.16, + "grad_norm": 5.384627823581925, + "learning_rate": 7.966923671620724e-06, + "loss": 1.2769, + "step": 96171 + }, + { + "epoch": 1.16, + "grad_norm": 32.8675028325508, + "learning_rate": 7.966351697663039e-06, + "loss": 1.2817, + "step": 96174 + }, + { + "epoch": 1.16, + "grad_norm": 15.874447924506228, + "learning_rate": 7.965779730645469e-06, + "loss": 1.2883, + "step": 96177 + }, + { + "epoch": 1.16, + "grad_norm": 25.801304226457972, + "learning_rate": 7.965207770569957e-06, + "loss": 1.3744, + "step": 96180 + }, + { + "epoch": 1.16, + "grad_norm": 16.066677935248283, + "learning_rate": 7.964635817438457e-06, + "loss": 1.3536, + "step": 96183 + }, + { + "epoch": 1.16, + "grad_norm": 19.85759710026654, + "learning_rate": 7.96406387125292e-06, + "loss": 1.3738, + "step": 96186 + }, + { + "epoch": 1.16, + "grad_norm": 8.505218129865806, + "learning_rate": 7.963491932015303e-06, + "loss": 1.4007, + "step": 96189 + }, + { + "epoch": 1.16, + "grad_norm": 33.073539053436946, + "learning_rate": 7.962919999727555e-06, + "loss": 1.2689, + "step": 96192 + }, + { + "epoch": 1.16, + "grad_norm": 9.650668946208079, + "learning_rate": 7.962348074391625e-06, + "loss": 1.4801, + "step": 96195 + }, + { + "epoch": 1.16, + "grad_norm": 7.565173972687762, + "learning_rate": 7.961776156009466e-06, + "loss": 0.8068, + "step": 96198 + }, + { + "epoch": 1.16, + "grad_norm": 5.750643961831448, + "learning_rate": 7.961204244583032e-06, + "loss": 0.924, + "step": 96201 + }, + { + "epoch": 1.16, + "grad_norm": 8.824930579044535, + "learning_rate": 7.960632340114278e-06, + "loss": 1.4302, + "step": 96204 + }, + { + "epoch": 1.16, + "grad_norm": 5.784524937228596, + "learning_rate": 7.960060442605145e-06, + "loss": 1.2992, + "step": 96207 + }, + { + "epoch": 1.16, + "grad_norm": 12.714554527525932, + "learning_rate": 7.959488552057595e-06, + "loss": 1.3443, + "step": 96210 + }, + { + "epoch": 1.16, + "grad_norm": 7.218565970367054, + "learning_rate": 7.958916668473572e-06, + "loss": 1.0292, + "step": 96213 + }, + { + "epoch": 1.16, + "grad_norm": 10.660948774764925, + "learning_rate": 7.95834479185503e-06, + "loss": 1.3476, + "step": 96216 + }, + { + "epoch": 1.16, + "grad_norm": 9.809264956904455, + "learning_rate": 7.957772922203928e-06, + "loss": 1.3523, + "step": 96219 + }, + { + "epoch": 1.16, + "grad_norm": 10.540961799235198, + "learning_rate": 7.95720105952221e-06, + "loss": 1.2208, + "step": 96222 + }, + { + "epoch": 1.16, + "grad_norm": 2.3121177392440333, + "learning_rate": 7.956629203811825e-06, + "loss": 1.383, + "step": 96225 + }, + { + "epoch": 1.16, + "grad_norm": 8.846495014685598, + "learning_rate": 7.956057355074728e-06, + "loss": 1.3179, + "step": 96228 + }, + { + "epoch": 1.16, + "grad_norm": 9.637611195895357, + "learning_rate": 7.955485513312878e-06, + "loss": 1.2425, + "step": 96231 + }, + { + "epoch": 1.16, + "grad_norm": 4.26921908600552, + "learning_rate": 7.954913678528215e-06, + "loss": 1.0854, + "step": 96234 + }, + { + "epoch": 1.16, + "grad_norm": 5.617428875297374, + "learning_rate": 7.954341850722694e-06, + "loss": 0.6839, + "step": 96237 + }, + { + "epoch": 1.16, + "grad_norm": 5.071951630374985, + "learning_rate": 7.953770029898269e-06, + "loss": 1.234, + "step": 96240 + }, + { + "epoch": 1.16, + "grad_norm": 24.393443712302922, + "learning_rate": 7.953198216056888e-06, + "loss": 1.2141, + "step": 96243 + }, + { + "epoch": 1.16, + "grad_norm": 26.02373474700959, + "learning_rate": 7.95262640920051e-06, + "loss": 1.1218, + "step": 96246 + }, + { + "epoch": 1.16, + "grad_norm": 74.16763248186932, + "learning_rate": 7.952054609331074e-06, + "loss": 1.1183, + "step": 96249 + }, + { + "epoch": 1.16, + "grad_norm": 7.871871940841245, + "learning_rate": 7.95148281645054e-06, + "loss": 1.4377, + "step": 96252 + }, + { + "epoch": 1.16, + "grad_norm": 12.546400159071768, + "learning_rate": 7.950911030560858e-06, + "loss": 0.9874, + "step": 96255 + }, + { + "epoch": 1.16, + "grad_norm": 58.4114267610316, + "learning_rate": 7.950339251663979e-06, + "loss": 1.185, + "step": 96258 + }, + { + "epoch": 1.16, + "grad_norm": 3.6300939507490124, + "learning_rate": 7.949767479761853e-06, + "loss": 1.2008, + "step": 96261 + }, + { + "epoch": 1.16, + "grad_norm": 15.230123268592893, + "learning_rate": 7.949195714856432e-06, + "loss": 1.0079, + "step": 96264 + }, + { + "epoch": 1.16, + "grad_norm": 22.452895169088634, + "learning_rate": 7.948623956949667e-06, + "loss": 0.9438, + "step": 96267 + }, + { + "epoch": 1.16, + "grad_norm": 2.79359575520591, + "learning_rate": 7.948052206043509e-06, + "loss": 1.8916, + "step": 96270 + }, + { + "epoch": 1.16, + "grad_norm": 28.19514274499773, + "learning_rate": 7.947480462139912e-06, + "loss": 1.2851, + "step": 96273 + }, + { + "epoch": 1.16, + "grad_norm": 9.226075242772781, + "learning_rate": 7.946908725240824e-06, + "loss": 1.2082, + "step": 96276 + }, + { + "epoch": 1.16, + "grad_norm": 12.569007088730757, + "learning_rate": 7.946336995348195e-06, + "loss": 1.34, + "step": 96279 + }, + { + "epoch": 1.16, + "grad_norm": 22.60889735281662, + "learning_rate": 7.945765272463979e-06, + "loss": 1.2187, + "step": 96282 + }, + { + "epoch": 1.16, + "grad_norm": 4.982874140426279, + "learning_rate": 7.94519355659013e-06, + "loss": 1.2606, + "step": 96285 + }, + { + "epoch": 1.16, + "grad_norm": 9.090491310578772, + "learning_rate": 7.944621847728593e-06, + "loss": 1.4035, + "step": 96288 + }, + { + "epoch": 1.16, + "grad_norm": 4.335775032304314, + "learning_rate": 7.944050145881317e-06, + "loss": 1.2175, + "step": 96291 + }, + { + "epoch": 1.16, + "grad_norm": 8.278378603864013, + "learning_rate": 7.943478451050261e-06, + "loss": 0.9033, + "step": 96294 + }, + { + "epoch": 1.16, + "grad_norm": 11.179392847126048, + "learning_rate": 7.942906763237372e-06, + "loss": 1.2304, + "step": 96297 + }, + { + "epoch": 1.16, + "grad_norm": 3.157392094627232, + "learning_rate": 7.942335082444603e-06, + "loss": 1.104, + "step": 96300 + }, + { + "epoch": 1.16, + "grad_norm": 10.778734050755489, + "learning_rate": 7.9417634086739e-06, + "loss": 1.1191, + "step": 96303 + }, + { + "epoch": 1.16, + "grad_norm": 16.89357132154241, + "learning_rate": 7.941191741927219e-06, + "loss": 1.5716, + "step": 96306 + }, + { + "epoch": 1.16, + "grad_norm": 7.841926116009393, + "learning_rate": 7.940620082206507e-06, + "loss": 1.2068, + "step": 96309 + }, + { + "epoch": 1.16, + "grad_norm": 3.4536564134654464, + "learning_rate": 7.94004842951372e-06, + "loss": 1.1659, + "step": 96312 + }, + { + "epoch": 1.16, + "grad_norm": 8.526550606988826, + "learning_rate": 7.939476783850805e-06, + "loss": 1.2482, + "step": 96315 + }, + { + "epoch": 1.16, + "grad_norm": 8.623872076077026, + "learning_rate": 7.938905145219713e-06, + "loss": 1.1075, + "step": 96318 + }, + { + "epoch": 1.16, + "grad_norm": 5.5810163694582755, + "learning_rate": 7.938333513622394e-06, + "loss": 0.9632, + "step": 96321 + }, + { + "epoch": 1.16, + "grad_norm": 14.653892664601907, + "learning_rate": 7.937761889060798e-06, + "loss": 1.4214, + "step": 96324 + }, + { + "epoch": 1.16, + "grad_norm": 10.110568208369136, + "learning_rate": 7.937190271536884e-06, + "loss": 1.2364, + "step": 96327 + }, + { + "epoch": 1.16, + "grad_norm": 13.337011135238109, + "learning_rate": 7.936618661052595e-06, + "loss": 1.4857, + "step": 96330 + }, + { + "epoch": 1.16, + "grad_norm": 29.040212563449188, + "learning_rate": 7.93604705760988e-06, + "loss": 1.0608, + "step": 96333 + }, + { + "epoch": 1.16, + "grad_norm": 9.94355435366581, + "learning_rate": 7.935475461210696e-06, + "loss": 0.7924, + "step": 96336 + }, + { + "epoch": 1.16, + "grad_norm": 10.7399438528151, + "learning_rate": 7.93490387185699e-06, + "loss": 1.0146, + "step": 96339 + }, + { + "epoch": 1.16, + "grad_norm": 4.706110078025285, + "learning_rate": 7.934332289550712e-06, + "loss": 1.0906, + "step": 96342 + }, + { + "epoch": 1.16, + "grad_norm": 6.635625099759149, + "learning_rate": 7.933760714293815e-06, + "loss": 1.3971, + "step": 96345 + }, + { + "epoch": 1.16, + "grad_norm": 5.108317733152859, + "learning_rate": 7.933189146088247e-06, + "loss": 1.4225, + "step": 96348 + }, + { + "epoch": 1.16, + "grad_norm": 10.530317137206472, + "learning_rate": 7.93261758493596e-06, + "loss": 1.1086, + "step": 96351 + }, + { + "epoch": 1.16, + "grad_norm": 7.490070736510484, + "learning_rate": 7.932046030838908e-06, + "loss": 1.2586, + "step": 96354 + }, + { + "epoch": 1.16, + "grad_norm": 3.677797348666098, + "learning_rate": 7.931474483799035e-06, + "loss": 1.4259, + "step": 96357 + }, + { + "epoch": 1.16, + "grad_norm": 6.1595002485357035, + "learning_rate": 7.930902943818296e-06, + "loss": 0.9369, + "step": 96360 + }, + { + "epoch": 1.16, + "grad_norm": 3.7666139490732244, + "learning_rate": 7.930331410898636e-06, + "loss": 1.0332, + "step": 96363 + }, + { + "epoch": 1.16, + "grad_norm": 5.952233948895658, + "learning_rate": 7.929759885042015e-06, + "loss": 1.3452, + "step": 96366 + }, + { + "epoch": 1.16, + "grad_norm": 4.339295035339757, + "learning_rate": 7.929188366250373e-06, + "loss": 1.4453, + "step": 96369 + }, + { + "epoch": 1.16, + "grad_norm": 7.239819744957411, + "learning_rate": 7.928616854525668e-06, + "loss": 1.1786, + "step": 96372 + }, + { + "epoch": 1.16, + "grad_norm": 14.779458411543107, + "learning_rate": 7.928045349869846e-06, + "loss": 1.2794, + "step": 96375 + }, + { + "epoch": 1.16, + "grad_norm": 3.585052963357124, + "learning_rate": 7.927473852284858e-06, + "loss": 1.15, + "step": 96378 + }, + { + "epoch": 1.16, + "grad_norm": 10.694470246110564, + "learning_rate": 7.92690236177266e-06, + "loss": 0.793, + "step": 96381 + }, + { + "epoch": 1.16, + "grad_norm": 9.409337642136625, + "learning_rate": 7.926330878335195e-06, + "loss": 1.4778, + "step": 96384 + }, + { + "epoch": 1.16, + "grad_norm": 2.591017787790116, + "learning_rate": 7.925759401974414e-06, + "loss": 1.321, + "step": 96387 + }, + { + "epoch": 1.16, + "grad_norm": 4.150820171187996, + "learning_rate": 7.92518793269227e-06, + "loss": 1.2405, + "step": 96390 + }, + { + "epoch": 1.16, + "grad_norm": 10.300716071828054, + "learning_rate": 7.924616470490718e-06, + "loss": 0.7658, + "step": 96393 + }, + { + "epoch": 1.16, + "grad_norm": 18.321859355523483, + "learning_rate": 7.924045015371694e-06, + "loss": 1.0521, + "step": 96396 + }, + { + "epoch": 1.16, + "grad_norm": 8.89334806002527, + "learning_rate": 7.923473567337158e-06, + "loss": 0.9159, + "step": 96399 + }, + { + "epoch": 1.16, + "grad_norm": 12.590794864945915, + "learning_rate": 7.922902126389061e-06, + "loss": 1.2361, + "step": 96402 + }, + { + "epoch": 1.16, + "grad_norm": 13.017719490674265, + "learning_rate": 7.92233069252935e-06, + "loss": 1.3582, + "step": 96405 + }, + { + "epoch": 1.16, + "grad_norm": 6.690303203559339, + "learning_rate": 7.92175926575998e-06, + "loss": 1.2055, + "step": 96408 + }, + { + "epoch": 1.16, + "grad_norm": 6.459700997155608, + "learning_rate": 7.921187846082889e-06, + "loss": 0.9512, + "step": 96411 + }, + { + "epoch": 1.16, + "grad_norm": 11.825223114599039, + "learning_rate": 7.920616433500041e-06, + "loss": 0.9457, + "step": 96414 + }, + { + "epoch": 1.16, + "grad_norm": 8.857983606026915, + "learning_rate": 7.920045028013377e-06, + "loss": 1.1158, + "step": 96417 + }, + { + "epoch": 1.16, + "grad_norm": 20.531126162309867, + "learning_rate": 7.919473629624854e-06, + "loss": 1.2015, + "step": 96420 + }, + { + "epoch": 1.16, + "grad_norm": 10.366349363486338, + "learning_rate": 7.918902238336415e-06, + "loss": 0.7898, + "step": 96423 + }, + { + "epoch": 1.16, + "grad_norm": 6.303557916336879, + "learning_rate": 7.918330854150013e-06, + "loss": 1.1909, + "step": 96426 + }, + { + "epoch": 1.16, + "grad_norm": 15.80240033767494, + "learning_rate": 7.917759477067598e-06, + "loss": 0.9235, + "step": 96429 + }, + { + "epoch": 1.16, + "grad_norm": 3.363429186819492, + "learning_rate": 7.917188107091122e-06, + "loss": 1.3856, + "step": 96432 + }, + { + "epoch": 1.16, + "grad_norm": 5.6771210186958045, + "learning_rate": 7.916616744222531e-06, + "loss": 1.3321, + "step": 96435 + }, + { + "epoch": 1.16, + "grad_norm": 9.183052870183058, + "learning_rate": 7.916045388463778e-06, + "loss": 1.4341, + "step": 96438 + }, + { + "epoch": 1.16, + "grad_norm": 9.808821740479996, + "learning_rate": 7.91547403981681e-06, + "loss": 1.0305, + "step": 96441 + }, + { + "epoch": 1.16, + "grad_norm": 8.57874108594841, + "learning_rate": 7.914902698283579e-06, + "loss": 1.2329, + "step": 96444 + }, + { + "epoch": 1.16, + "grad_norm": 10.094630569667158, + "learning_rate": 7.914331363866036e-06, + "loss": 0.9803, + "step": 96447 + }, + { + "epoch": 1.16, + "grad_norm": 8.175548507116885, + "learning_rate": 7.913760036566125e-06, + "loss": 1.2158, + "step": 96450 + }, + { + "epoch": 1.16, + "grad_norm": 4.316194817107968, + "learning_rate": 7.9131887163858e-06, + "loss": 1.4987, + "step": 96453 + }, + { + "epoch": 1.16, + "grad_norm": 15.0665243317859, + "learning_rate": 7.912617403327012e-06, + "loss": 1.3912, + "step": 96456 + }, + { + "epoch": 1.16, + "grad_norm": 8.813993543608557, + "learning_rate": 7.912046097391707e-06, + "loss": 1.2074, + "step": 96459 + }, + { + "epoch": 1.16, + "grad_norm": 16.561727457937057, + "learning_rate": 7.91147479858184e-06, + "loss": 1.4761, + "step": 96462 + }, + { + "epoch": 1.16, + "grad_norm": 13.229702298225913, + "learning_rate": 7.910903506899355e-06, + "loss": 1.1723, + "step": 96465 + }, + { + "epoch": 1.16, + "grad_norm": 30.666207171953573, + "learning_rate": 7.910332222346203e-06, + "loss": 1.2796, + "step": 96468 + }, + { + "epoch": 1.16, + "grad_norm": 17.909325896500313, + "learning_rate": 7.909760944924334e-06, + "loss": 1.1913, + "step": 96471 + }, + { + "epoch": 1.16, + "grad_norm": 8.912715970525692, + "learning_rate": 7.909189674635702e-06, + "loss": 0.9728, + "step": 96474 + }, + { + "epoch": 1.16, + "grad_norm": 6.104017634813369, + "learning_rate": 7.90861841148225e-06, + "loss": 1.0773, + "step": 96477 + }, + { + "epoch": 1.16, + "grad_norm": 7.416387676232407, + "learning_rate": 7.90804715546593e-06, + "loss": 1.3446, + "step": 96480 + }, + { + "epoch": 1.16, + "grad_norm": 5.9384683316046365, + "learning_rate": 7.90747590658869e-06, + "loss": 1.0623, + "step": 96483 + }, + { + "epoch": 1.16, + "grad_norm": 25.80267478113431, + "learning_rate": 7.906904664852482e-06, + "loss": 1.2359, + "step": 96486 + }, + { + "epoch": 1.16, + "grad_norm": 18.50051783921476, + "learning_rate": 7.906333430259259e-06, + "loss": 1.2196, + "step": 96489 + }, + { + "epoch": 1.16, + "grad_norm": 20.76335522983382, + "learning_rate": 7.905762202810961e-06, + "loss": 1.3415, + "step": 96492 + }, + { + "epoch": 1.16, + "grad_norm": 4.775948670333234, + "learning_rate": 7.905190982509541e-06, + "loss": 1.1539, + "step": 96495 + }, + { + "epoch": 1.16, + "grad_norm": 6.230372062981808, + "learning_rate": 7.904619769356952e-06, + "loss": 1.5568, + "step": 96498 + }, + { + "epoch": 1.16, + "grad_norm": 8.719629053668934, + "learning_rate": 7.904048563355144e-06, + "loss": 1.3323, + "step": 96501 + }, + { + "epoch": 1.16, + "grad_norm": 8.022365138588173, + "learning_rate": 7.903477364506058e-06, + "loss": 1.2184, + "step": 96504 + }, + { + "epoch": 1.16, + "grad_norm": 3.284181288039269, + "learning_rate": 7.902906172811648e-06, + "loss": 1.176, + "step": 96507 + }, + { + "epoch": 1.16, + "grad_norm": 13.97893389722988, + "learning_rate": 7.902334988273867e-06, + "loss": 0.9029, + "step": 96510 + }, + { + "epoch": 1.16, + "grad_norm": 10.102224089671743, + "learning_rate": 7.901763810894661e-06, + "loss": 1.0411, + "step": 96513 + }, + { + "epoch": 1.16, + "grad_norm": 3.346420195799892, + "learning_rate": 7.901192640675982e-06, + "loss": 1.3116, + "step": 96516 + }, + { + "epoch": 1.16, + "grad_norm": 19.853266686439333, + "learning_rate": 7.900621477619769e-06, + "loss": 1.3012, + "step": 96519 + }, + { + "epoch": 1.16, + "grad_norm": 6.07203247613834, + "learning_rate": 7.900050321727986e-06, + "loss": 1.269, + "step": 96522 + }, + { + "epoch": 1.16, + "grad_norm": 7.683577705990207, + "learning_rate": 7.89947917300257e-06, + "loss": 1.1375, + "step": 96525 + }, + { + "epoch": 1.16, + "grad_norm": 4.729488085332168, + "learning_rate": 7.89890803144548e-06, + "loss": 0.9567, + "step": 96528 + }, + { + "epoch": 1.16, + "grad_norm": 7.829587674114825, + "learning_rate": 7.898336897058658e-06, + "loss": 1.1957, + "step": 96531 + }, + { + "epoch": 1.16, + "grad_norm": 7.017832192148959, + "learning_rate": 7.897765769844053e-06, + "loss": 1.0923, + "step": 96534 + }, + { + "epoch": 1.16, + "grad_norm": 5.15389731572278, + "learning_rate": 7.897194649803614e-06, + "loss": 1.4197, + "step": 96537 + }, + { + "epoch": 1.16, + "grad_norm": 9.417247139228794, + "learning_rate": 7.896623536939297e-06, + "loss": 1.3179, + "step": 96540 + }, + { + "epoch": 1.16, + "grad_norm": 10.596039624892924, + "learning_rate": 7.896052431253048e-06, + "loss": 1.2955, + "step": 96543 + }, + { + "epoch": 1.16, + "grad_norm": 16.264452265335258, + "learning_rate": 7.895481332746809e-06, + "loss": 1.0098, + "step": 96546 + }, + { + "epoch": 1.16, + "grad_norm": 27.315579249829923, + "learning_rate": 7.894910241422535e-06, + "loss": 1.346, + "step": 96549 + }, + { + "epoch": 1.16, + "grad_norm": 2.3734821612898203, + "learning_rate": 7.894339157282177e-06, + "loss": 1.2005, + "step": 96552 + }, + { + "epoch": 1.16, + "grad_norm": 18.00668878158654, + "learning_rate": 7.893768080327681e-06, + "loss": 1.3966, + "step": 96555 + }, + { + "epoch": 1.16, + "grad_norm": 14.982107151169238, + "learning_rate": 7.893197010560994e-06, + "loss": 1.5651, + "step": 96558 + }, + { + "epoch": 1.16, + "grad_norm": 16.953005806503636, + "learning_rate": 7.892625947984063e-06, + "loss": 1.3595, + "step": 96561 + }, + { + "epoch": 1.16, + "grad_norm": 7.557712725581497, + "learning_rate": 7.892054892598847e-06, + "loss": 1.6181, + "step": 96564 + }, + { + "epoch": 1.16, + "grad_norm": 7.084140231011369, + "learning_rate": 7.891483844407283e-06, + "loss": 1.1482, + "step": 96567 + }, + { + "epoch": 1.16, + "grad_norm": 6.895363200295716, + "learning_rate": 7.890912803411332e-06, + "loss": 1.1019, + "step": 96570 + }, + { + "epoch": 1.16, + "grad_norm": 11.05849467225764, + "learning_rate": 7.890341769612933e-06, + "loss": 1.0941, + "step": 96573 + }, + { + "epoch": 1.16, + "grad_norm": 17.014906102143996, + "learning_rate": 7.889770743014035e-06, + "loss": 1.1295, + "step": 96576 + }, + { + "epoch": 1.16, + "grad_norm": 10.795483193657972, + "learning_rate": 7.889199723616589e-06, + "loss": 1.1879, + "step": 96579 + }, + { + "epoch": 1.16, + "grad_norm": 4.820085440744451, + "learning_rate": 7.888628711422548e-06, + "loss": 1.7335, + "step": 96582 + }, + { + "epoch": 1.16, + "grad_norm": 9.871221113703587, + "learning_rate": 7.888057706433856e-06, + "loss": 1.6745, + "step": 96585 + }, + { + "epoch": 1.16, + "grad_norm": 32.97375717189734, + "learning_rate": 7.88748670865246e-06, + "loss": 1.2281, + "step": 96588 + }, + { + "epoch": 1.16, + "grad_norm": 6.7217058701003305, + "learning_rate": 7.886915718080311e-06, + "loss": 1.3449, + "step": 96591 + }, + { + "epoch": 1.16, + "grad_norm": 9.240497152865164, + "learning_rate": 7.886344734719358e-06, + "loss": 1.3869, + "step": 96594 + }, + { + "epoch": 1.16, + "grad_norm": 7.204852609275229, + "learning_rate": 7.885773758571551e-06, + "loss": 1.2194, + "step": 96597 + }, + { + "epoch": 1.16, + "grad_norm": 11.962677689328428, + "learning_rate": 7.885202789638836e-06, + "loss": 1.2652, + "step": 96600 + }, + { + "epoch": 1.16, + "grad_norm": 12.016893660817843, + "learning_rate": 7.88463182792316e-06, + "loss": 1.1185, + "step": 96603 + }, + { + "epoch": 1.16, + "grad_norm": 6.10188011419617, + "learning_rate": 7.884060873426474e-06, + "loss": 1.1345, + "step": 96606 + }, + { + "epoch": 1.16, + "grad_norm": 5.995564059257896, + "learning_rate": 7.88348992615073e-06, + "loss": 1.3053, + "step": 96609 + }, + { + "epoch": 1.16, + "grad_norm": 7.660047877774891, + "learning_rate": 7.882918986097869e-06, + "loss": 1.1382, + "step": 96612 + }, + { + "epoch": 1.16, + "grad_norm": 5.689079579111631, + "learning_rate": 7.882348053269841e-06, + "loss": 1.3081, + "step": 96615 + }, + { + "epoch": 1.16, + "grad_norm": 3.014474033593283, + "learning_rate": 7.881777127668599e-06, + "loss": 1.1253, + "step": 96618 + }, + { + "epoch": 1.16, + "grad_norm": 4.653124898109236, + "learning_rate": 7.881206209296088e-06, + "loss": 1.4851, + "step": 96621 + }, + { + "epoch": 1.16, + "grad_norm": 4.588363210185443, + "learning_rate": 7.88063529815426e-06, + "loss": 1.1129, + "step": 96624 + }, + { + "epoch": 1.16, + "grad_norm": 6.3332321742771205, + "learning_rate": 7.880064394245059e-06, + "loss": 1.2436, + "step": 96627 + }, + { + "epoch": 1.16, + "grad_norm": 10.307889605076847, + "learning_rate": 7.879493497570432e-06, + "loss": 1.2963, + "step": 96630 + }, + { + "epoch": 1.16, + "grad_norm": 10.084630300452254, + "learning_rate": 7.87892260813233e-06, + "loss": 1.0411, + "step": 96633 + }, + { + "epoch": 1.16, + "grad_norm": 8.069780203975514, + "learning_rate": 7.878351725932704e-06, + "loss": 1.2123, + "step": 96636 + }, + { + "epoch": 1.16, + "grad_norm": 5.117758804528068, + "learning_rate": 7.877780850973498e-06, + "loss": 1.336, + "step": 96639 + }, + { + "epoch": 1.16, + "grad_norm": 7.886158282650577, + "learning_rate": 7.877209983256661e-06, + "loss": 1.1834, + "step": 96642 + }, + { + "epoch": 1.16, + "grad_norm": 8.192266522582075, + "learning_rate": 7.87663912278414e-06, + "loss": 1.4589, + "step": 96645 + }, + { + "epoch": 1.16, + "grad_norm": 6.590857616861575, + "learning_rate": 7.876068269557888e-06, + "loss": 0.8888, + "step": 96648 + }, + { + "epoch": 1.16, + "grad_norm": 3.199903086333823, + "learning_rate": 7.875497423579852e-06, + "loss": 1.4916, + "step": 96651 + }, + { + "epoch": 1.16, + "grad_norm": 23.292827856069916, + "learning_rate": 7.874926584851973e-06, + "loss": 1.138, + "step": 96654 + }, + { + "epoch": 1.16, + "grad_norm": 8.689009275623267, + "learning_rate": 7.874355753376205e-06, + "loss": 1.1429, + "step": 96657 + }, + { + "epoch": 1.16, + "grad_norm": 7.833016636332273, + "learning_rate": 7.873784929154496e-06, + "loss": 1.1575, + "step": 96660 + }, + { + "epoch": 1.16, + "grad_norm": 6.236950731193374, + "learning_rate": 7.873214112188797e-06, + "loss": 1.1892, + "step": 96663 + }, + { + "epoch": 1.16, + "grad_norm": 12.22810002978966, + "learning_rate": 7.872643302481047e-06, + "loss": 1.0838, + "step": 96666 + }, + { + "epoch": 1.16, + "grad_norm": 4.393788125593726, + "learning_rate": 7.8720725000332e-06, + "loss": 1.4484, + "step": 96669 + }, + { + "epoch": 1.16, + "grad_norm": 57.61167542226641, + "learning_rate": 7.871501704847203e-06, + "loss": 1.2268, + "step": 96672 + }, + { + "epoch": 1.16, + "grad_norm": 18.975590789762393, + "learning_rate": 7.870930916925003e-06, + "loss": 1.3513, + "step": 96675 + }, + { + "epoch": 1.16, + "grad_norm": 10.874581162417192, + "learning_rate": 7.870360136268554e-06, + "loss": 1.442, + "step": 96678 + }, + { + "epoch": 1.16, + "grad_norm": 8.99343651891392, + "learning_rate": 7.869789362879796e-06, + "loss": 1.4259, + "step": 96681 + }, + { + "epoch": 1.16, + "grad_norm": 4.751145613931663, + "learning_rate": 7.869218596760676e-06, + "loss": 1.5255, + "step": 96684 + }, + { + "epoch": 1.16, + "grad_norm": 10.472461218640062, + "learning_rate": 7.868647837913148e-06, + "loss": 1.5355, + "step": 96687 + }, + { + "epoch": 1.16, + "grad_norm": 5.517741583251109, + "learning_rate": 7.868077086339162e-06, + "loss": 1.0401, + "step": 96690 + }, + { + "epoch": 1.16, + "grad_norm": 5.203248696331991, + "learning_rate": 7.867506342040656e-06, + "loss": 1.0634, + "step": 96693 + }, + { + "epoch": 1.16, + "grad_norm": 13.454889098197878, + "learning_rate": 7.866935605019583e-06, + "loss": 1.3546, + "step": 96696 + }, + { + "epoch": 1.16, + "grad_norm": 1.9880335070408433, + "learning_rate": 7.866364875277889e-06, + "loss": 1.4674, + "step": 96699 + }, + { + "epoch": 1.16, + "grad_norm": 9.466490296886064, + "learning_rate": 7.865794152817526e-06, + "loss": 1.3918, + "step": 96702 + }, + { + "epoch": 1.16, + "grad_norm": 19.89467213818782, + "learning_rate": 7.86522343764044e-06, + "loss": 1.2772, + "step": 96705 + }, + { + "epoch": 1.16, + "grad_norm": 8.99566537015131, + "learning_rate": 7.864652729748572e-06, + "loss": 1.2998, + "step": 96708 + }, + { + "epoch": 1.16, + "grad_norm": 13.616195940623227, + "learning_rate": 7.86408202914388e-06, + "loss": 1.2131, + "step": 96711 + }, + { + "epoch": 1.16, + "grad_norm": 4.713455678927719, + "learning_rate": 7.863511335828305e-06, + "loss": 1.2097, + "step": 96714 + }, + { + "epoch": 1.16, + "grad_norm": 6.045876820162057, + "learning_rate": 7.862940649803799e-06, + "loss": 1.3009, + "step": 96717 + }, + { + "epoch": 1.16, + "grad_norm": 14.45266082234278, + "learning_rate": 7.862369971072301e-06, + "loss": 1.5845, + "step": 96720 + }, + { + "epoch": 1.16, + "grad_norm": 19.459884260154112, + "learning_rate": 7.861799299635768e-06, + "loss": 1.1056, + "step": 96723 + }, + { + "epoch": 1.16, + "grad_norm": 8.021853648876146, + "learning_rate": 7.861228635496142e-06, + "loss": 1.1361, + "step": 96726 + }, + { + "epoch": 1.16, + "grad_norm": 26.518718917275883, + "learning_rate": 7.860657978655372e-06, + "loss": 1.1614, + "step": 96729 + }, + { + "epoch": 1.16, + "grad_norm": 43.636833834672984, + "learning_rate": 7.86008732911541e-06, + "loss": 1.0074, + "step": 96732 + }, + { + "epoch": 1.16, + "grad_norm": 13.653520336984311, + "learning_rate": 7.859516686878197e-06, + "loss": 0.9177, + "step": 96735 + }, + { + "epoch": 1.16, + "grad_norm": 3.976300340411407, + "learning_rate": 7.85894605194568e-06, + "loss": 1.0922, + "step": 96738 + }, + { + "epoch": 1.16, + "grad_norm": 9.40930032664075, + "learning_rate": 7.85837542431981e-06, + "loss": 1.6222, + "step": 96741 + }, + { + "epoch": 1.16, + "grad_norm": 16.53691573728128, + "learning_rate": 7.857804804002537e-06, + "loss": 1.1467, + "step": 96744 + }, + { + "epoch": 1.16, + "grad_norm": 13.376427072682013, + "learning_rate": 7.857234190995802e-06, + "loss": 1.2646, + "step": 96747 + }, + { + "epoch": 1.16, + "grad_norm": 17.736609717757236, + "learning_rate": 7.856663585301553e-06, + "loss": 0.9613, + "step": 96750 + }, + { + "epoch": 1.16, + "grad_norm": 2.5554032316967183, + "learning_rate": 7.856092986921742e-06, + "loss": 1.287, + "step": 96753 + }, + { + "epoch": 1.16, + "grad_norm": 25.737896857875512, + "learning_rate": 7.85552239585831e-06, + "loss": 0.812, + "step": 96756 + }, + { + "epoch": 1.16, + "grad_norm": 10.87351854709453, + "learning_rate": 7.854951812113212e-06, + "loss": 1.3701, + "step": 96759 + }, + { + "epoch": 1.16, + "grad_norm": 14.378897882966191, + "learning_rate": 7.854381235688388e-06, + "loss": 1.0448, + "step": 96762 + }, + { + "epoch": 1.16, + "grad_norm": 6.385917840419892, + "learning_rate": 7.85381066658579e-06, + "loss": 1.288, + "step": 96765 + }, + { + "epoch": 1.16, + "grad_norm": 17.106482639162657, + "learning_rate": 7.85324010480736e-06, + "loss": 1.22, + "step": 96768 + }, + { + "epoch": 1.16, + "grad_norm": 12.134519564692226, + "learning_rate": 7.852669550355052e-06, + "loss": 1.0845, + "step": 96771 + }, + { + "epoch": 1.16, + "grad_norm": 9.70613796488791, + "learning_rate": 7.852099003230807e-06, + "loss": 1.3025, + "step": 96774 + }, + { + "epoch": 1.16, + "grad_norm": 9.725326024068625, + "learning_rate": 7.851528463436575e-06, + "loss": 1.5472, + "step": 96777 + }, + { + "epoch": 1.16, + "grad_norm": 17.730229365783337, + "learning_rate": 7.850957930974301e-06, + "loss": 0.9698, + "step": 96780 + }, + { + "epoch": 1.16, + "grad_norm": 3.451243936594347, + "learning_rate": 7.850387405845933e-06, + "loss": 1.6131, + "step": 96783 + }, + { + "epoch": 1.16, + "grad_norm": 5.66235208856686, + "learning_rate": 7.849816888053422e-06, + "loss": 0.8408, + "step": 96786 + }, + { + "epoch": 1.16, + "grad_norm": 3.044909903648988, + "learning_rate": 7.849246377598711e-06, + "loss": 1.059, + "step": 96789 + }, + { + "epoch": 1.16, + "grad_norm": 4.529761987609719, + "learning_rate": 7.848675874483745e-06, + "loss": 1.1379, + "step": 96792 + }, + { + "epoch": 1.16, + "grad_norm": 17.4479194523972, + "learning_rate": 7.848105378710473e-06, + "loss": 1.661, + "step": 96795 + }, + { + "epoch": 1.16, + "grad_norm": 11.909077857162226, + "learning_rate": 7.847534890280847e-06, + "loss": 1.3556, + "step": 96798 + }, + { + "epoch": 1.16, + "grad_norm": 6.884464569859545, + "learning_rate": 7.846964409196807e-06, + "loss": 1.1606, + "step": 96801 + }, + { + "epoch": 1.16, + "grad_norm": 39.30568590134909, + "learning_rate": 7.846393935460298e-06, + "loss": 1.0887, + "step": 96804 + }, + { + "epoch": 1.16, + "grad_norm": 11.959750264642361, + "learning_rate": 7.845823469073275e-06, + "loss": 1.4241, + "step": 96807 + }, + { + "epoch": 1.16, + "grad_norm": 6.130898367141622, + "learning_rate": 7.845253010037677e-06, + "loss": 1.2072, + "step": 96810 + }, + { + "epoch": 1.16, + "grad_norm": 9.23122744431043, + "learning_rate": 7.84468255835546e-06, + "loss": 1.864, + "step": 96813 + }, + { + "epoch": 1.16, + "grad_norm": 42.952178524739935, + "learning_rate": 7.844112114028559e-06, + "loss": 1.0684, + "step": 96816 + }, + { + "epoch": 1.16, + "grad_norm": 4.406500371012087, + "learning_rate": 7.84354167705893e-06, + "loss": 1.4223, + "step": 96819 + }, + { + "epoch": 1.16, + "grad_norm": 7.80050389021075, + "learning_rate": 7.842971247448515e-06, + "loss": 1.7183, + "step": 96822 + }, + { + "epoch": 1.16, + "grad_norm": 11.998140004553475, + "learning_rate": 7.842400825199265e-06, + "loss": 1.2246, + "step": 96825 + }, + { + "epoch": 1.16, + "grad_norm": 11.811199074128156, + "learning_rate": 7.841830410313121e-06, + "loss": 0.9752, + "step": 96828 + }, + { + "epoch": 1.16, + "grad_norm": 7.5642531807198194, + "learning_rate": 7.841260002792034e-06, + "loss": 0.9248, + "step": 96831 + }, + { + "epoch": 1.16, + "grad_norm": 5.047099594179063, + "learning_rate": 7.840689602637947e-06, + "loss": 0.9861, + "step": 96834 + }, + { + "epoch": 1.16, + "grad_norm": 10.817738721598614, + "learning_rate": 7.840119209852807e-06, + "loss": 1.5396, + "step": 96837 + }, + { + "epoch": 1.16, + "grad_norm": 24.807787515300436, + "learning_rate": 7.83954882443857e-06, + "loss": 1.107, + "step": 96840 + }, + { + "epoch": 1.16, + "grad_norm": 24.650633446002335, + "learning_rate": 7.83897844639717e-06, + "loss": 1.2586, + "step": 96843 + }, + { + "epoch": 1.16, + "grad_norm": 12.342547649583436, + "learning_rate": 7.838408075730556e-06, + "loss": 1.2308, + "step": 96846 + }, + { + "epoch": 1.16, + "grad_norm": 10.716414132243187, + "learning_rate": 7.837837712440678e-06, + "loss": 1.4135, + "step": 96849 + }, + { + "epoch": 1.16, + "grad_norm": 13.294542611527188, + "learning_rate": 7.837267356529483e-06, + "loss": 1.2913, + "step": 96852 + }, + { + "epoch": 1.16, + "grad_norm": 8.897492886035923, + "learning_rate": 7.836697007998916e-06, + "loss": 1.1479, + "step": 96855 + }, + { + "epoch": 1.16, + "grad_norm": 7.503739332952797, + "learning_rate": 7.836126666850918e-06, + "loss": 1.3862, + "step": 96858 + }, + { + "epoch": 1.16, + "grad_norm": 15.738644560040024, + "learning_rate": 7.835556333087446e-06, + "loss": 1.1313, + "step": 96861 + }, + { + "epoch": 1.16, + "grad_norm": 15.30604186490166, + "learning_rate": 7.834986006710436e-06, + "loss": 1.4583, + "step": 96864 + }, + { + "epoch": 1.16, + "grad_norm": 12.008858256412427, + "learning_rate": 7.834415687721844e-06, + "loss": 1.2423, + "step": 96867 + }, + { + "epoch": 1.16, + "grad_norm": 15.777415916346923, + "learning_rate": 7.833845376123606e-06, + "loss": 1.1009, + "step": 96870 + }, + { + "epoch": 1.16, + "grad_norm": 12.21904584131503, + "learning_rate": 7.833275071917675e-06, + "loss": 1.3108, + "step": 96873 + }, + { + "epoch": 1.16, + "grad_norm": 17.748216772517825, + "learning_rate": 7.832704775105996e-06, + "loss": 1.1596, + "step": 96876 + }, + { + "epoch": 1.16, + "grad_norm": 3.8975652181754343, + "learning_rate": 7.832134485690517e-06, + "loss": 1.2686, + "step": 96879 + }, + { + "epoch": 1.16, + "grad_norm": 8.397865263334063, + "learning_rate": 7.831564203673178e-06, + "loss": 1.327, + "step": 96882 + }, + { + "epoch": 1.17, + "grad_norm": 11.06794915907594, + "learning_rate": 7.830993929055934e-06, + "loss": 1.0389, + "step": 96885 + }, + { + "epoch": 1.17, + "grad_norm": 4.5878356315817435, + "learning_rate": 7.830423661840723e-06, + "loss": 1.051, + "step": 96888 + }, + { + "epoch": 1.17, + "grad_norm": 29.15925178053024, + "learning_rate": 7.829853402029497e-06, + "loss": 1.1308, + "step": 96891 + }, + { + "epoch": 1.17, + "grad_norm": 6.298155304944311, + "learning_rate": 7.829283149624198e-06, + "loss": 1.2145, + "step": 96894 + }, + { + "epoch": 1.17, + "grad_norm": 2.876962909940948, + "learning_rate": 7.828712904626774e-06, + "loss": 1.1248, + "step": 96897 + }, + { + "epoch": 1.17, + "grad_norm": 2.9235281073863275, + "learning_rate": 7.82814266703917e-06, + "loss": 1.095, + "step": 96900 + }, + { + "epoch": 1.17, + "grad_norm": 8.862411299605396, + "learning_rate": 7.827572436863334e-06, + "loss": 1.299, + "step": 96903 + }, + { + "epoch": 1.17, + "grad_norm": 40.29777379227539, + "learning_rate": 7.827002214101213e-06, + "loss": 1.4023, + "step": 96906 + }, + { + "epoch": 1.17, + "grad_norm": 6.843678528446429, + "learning_rate": 7.826431998754747e-06, + "loss": 1.2174, + "step": 96909 + }, + { + "epoch": 1.17, + "grad_norm": 6.926709744211904, + "learning_rate": 7.825861790825885e-06, + "loss": 1.556, + "step": 96912 + }, + { + "epoch": 1.17, + "grad_norm": 6.885631528814326, + "learning_rate": 7.825291590316576e-06, + "loss": 1.3169, + "step": 96915 + }, + { + "epoch": 1.17, + "grad_norm": 14.080498335040101, + "learning_rate": 7.824721397228761e-06, + "loss": 1.1214, + "step": 96918 + }, + { + "epoch": 1.17, + "grad_norm": 5.772673655459286, + "learning_rate": 7.824151211564393e-06, + "loss": 1.0032, + "step": 96921 + }, + { + "epoch": 1.17, + "grad_norm": 6.090075141937399, + "learning_rate": 7.823581033325409e-06, + "loss": 1.1579, + "step": 96924 + }, + { + "epoch": 1.17, + "grad_norm": 2.9079614514665417, + "learning_rate": 7.82301086251376e-06, + "loss": 0.9465, + "step": 96927 + }, + { + "epoch": 1.17, + "grad_norm": 9.162720562784488, + "learning_rate": 7.82244069913139e-06, + "loss": 1.371, + "step": 96930 + }, + { + "epoch": 1.17, + "grad_norm": 11.217460384226403, + "learning_rate": 7.82187054318025e-06, + "loss": 1.306, + "step": 96933 + }, + { + "epoch": 1.17, + "grad_norm": 19.003246757403637, + "learning_rate": 7.821300394662277e-06, + "loss": 1.3663, + "step": 96936 + }, + { + "epoch": 1.17, + "grad_norm": 14.612882465134502, + "learning_rate": 7.820730253579421e-06, + "loss": 1.0976, + "step": 96939 + }, + { + "epoch": 1.17, + "grad_norm": 11.915757837740143, + "learning_rate": 7.820160119933628e-06, + "loss": 1.1253, + "step": 96942 + }, + { + "epoch": 1.17, + "grad_norm": 7.510284069046425, + "learning_rate": 7.819589993726845e-06, + "loss": 1.1978, + "step": 96945 + }, + { + "epoch": 1.17, + "grad_norm": 10.348671661403689, + "learning_rate": 7.819019874961017e-06, + "loss": 1.4803, + "step": 96948 + }, + { + "epoch": 1.17, + "grad_norm": 10.551269934221812, + "learning_rate": 7.818449763638085e-06, + "loss": 1.4298, + "step": 96951 + }, + { + "epoch": 1.17, + "grad_norm": 16.67615601195489, + "learning_rate": 7.817879659759998e-06, + "loss": 1.1959, + "step": 96954 + }, + { + "epoch": 1.17, + "grad_norm": 8.342512718515664, + "learning_rate": 7.817309563328704e-06, + "loss": 1.3014, + "step": 96957 + }, + { + "epoch": 1.17, + "grad_norm": 7.057246055840619, + "learning_rate": 7.81673947434615e-06, + "loss": 1.3961, + "step": 96960 + }, + { + "epoch": 1.17, + "grad_norm": 7.440929861482254, + "learning_rate": 7.81616939281427e-06, + "loss": 1.1588, + "step": 96963 + }, + { + "epoch": 1.17, + "grad_norm": 7.7487145840527445, + "learning_rate": 7.81559931873502e-06, + "loss": 0.9985, + "step": 96966 + }, + { + "epoch": 1.17, + "grad_norm": 6.734933865049942, + "learning_rate": 7.815029252110344e-06, + "loss": 1.4408, + "step": 96969 + }, + { + "epoch": 1.17, + "grad_norm": 11.075624701183319, + "learning_rate": 7.814459192942183e-06, + "loss": 1.3237, + "step": 96972 + }, + { + "epoch": 1.17, + "grad_norm": 13.677419313884576, + "learning_rate": 7.813889141232491e-06, + "loss": 1.3169, + "step": 96975 + }, + { + "epoch": 1.17, + "grad_norm": 8.183431285128279, + "learning_rate": 7.813319096983205e-06, + "loss": 1.2656, + "step": 96978 + }, + { + "epoch": 1.17, + "grad_norm": 12.062706698375816, + "learning_rate": 7.812749060196273e-06, + "loss": 1.2564, + "step": 96981 + }, + { + "epoch": 1.17, + "grad_norm": 2.2199375276379514, + "learning_rate": 7.812179030873638e-06, + "loss": 1.273, + "step": 96984 + }, + { + "epoch": 1.17, + "grad_norm": 3.3306889111462294, + "learning_rate": 7.811609009017255e-06, + "loss": 1.4817, + "step": 96987 + }, + { + "epoch": 1.17, + "grad_norm": 10.842789175380181, + "learning_rate": 7.81103899462906e-06, + "loss": 1.3482, + "step": 96990 + }, + { + "epoch": 1.17, + "grad_norm": 7.254707691108085, + "learning_rate": 7.810468987710995e-06, + "loss": 1.3622, + "step": 96993 + }, + { + "epoch": 1.17, + "grad_norm": 8.81089486665046, + "learning_rate": 7.809898988265013e-06, + "loss": 1.3678, + "step": 96996 + }, + { + "epoch": 1.17, + "grad_norm": 8.312131600606628, + "learning_rate": 7.809328996293058e-06, + "loss": 0.9849, + "step": 96999 + }, + { + "epoch": 1.17, + "grad_norm": 7.683829681943055, + "learning_rate": 7.808759011797079e-06, + "loss": 1.0968, + "step": 97002 + }, + { + "epoch": 1.17, + "grad_norm": 2.6993877418679046, + "learning_rate": 7.808189034779008e-06, + "loss": 1.3009, + "step": 97005 + }, + { + "epoch": 1.17, + "grad_norm": 5.495483823017087, + "learning_rate": 7.8076190652408e-06, + "loss": 1.3894, + "step": 97008 + }, + { + "epoch": 1.17, + "grad_norm": 13.434935923046796, + "learning_rate": 7.8070491031844e-06, + "loss": 1.0431, + "step": 97011 + }, + { + "epoch": 1.17, + "grad_norm": 10.572135609032411, + "learning_rate": 7.806479148611756e-06, + "loss": 1.4131, + "step": 97014 + }, + { + "epoch": 1.17, + "grad_norm": 7.012658494158467, + "learning_rate": 7.805909201524802e-06, + "loss": 1.1056, + "step": 97017 + }, + { + "epoch": 1.17, + "grad_norm": 2.8722352468445482, + "learning_rate": 7.80533926192549e-06, + "loss": 1.2502, + "step": 97020 + }, + { + "epoch": 1.17, + "grad_norm": 55.80121921785146, + "learning_rate": 7.804769329815767e-06, + "loss": 1.2606, + "step": 97023 + }, + { + "epoch": 1.17, + "grad_norm": 34.97587455274908, + "learning_rate": 7.804199405197573e-06, + "loss": 1.1535, + "step": 97026 + }, + { + "epoch": 1.17, + "grad_norm": 7.662943526326957, + "learning_rate": 7.803629488072861e-06, + "loss": 1.0828, + "step": 97029 + }, + { + "epoch": 1.17, + "grad_norm": 4.604466375349558, + "learning_rate": 7.803059578443564e-06, + "loss": 1.2533, + "step": 97032 + }, + { + "epoch": 1.17, + "grad_norm": 7.4832213041081115, + "learning_rate": 7.802489676311637e-06, + "loss": 0.9604, + "step": 97035 + }, + { + "epoch": 1.17, + "grad_norm": 14.76444880087377, + "learning_rate": 7.80191978167902e-06, + "loss": 1.2451, + "step": 97038 + }, + { + "epoch": 1.17, + "grad_norm": 8.92790437255163, + "learning_rate": 7.801349894547662e-06, + "loss": 1.0807, + "step": 97041 + }, + { + "epoch": 1.17, + "grad_norm": 12.585582656194049, + "learning_rate": 7.800780014919503e-06, + "loss": 1.2882, + "step": 97044 + }, + { + "epoch": 1.17, + "grad_norm": 21.01296615085694, + "learning_rate": 7.800210142796488e-06, + "loss": 0.9394, + "step": 97047 + }, + { + "epoch": 1.17, + "grad_norm": 17.660893817152544, + "learning_rate": 7.799640278180562e-06, + "loss": 1.0826, + "step": 97050 + }, + { + "epoch": 1.17, + "grad_norm": 47.72440802900383, + "learning_rate": 7.799070421073674e-06, + "loss": 1.0399, + "step": 97053 + }, + { + "epoch": 1.17, + "grad_norm": 3.2226472786207876, + "learning_rate": 7.798500571477768e-06, + "loss": 0.9863, + "step": 97056 + }, + { + "epoch": 1.17, + "grad_norm": 5.653253699412214, + "learning_rate": 7.797930729394783e-06, + "loss": 1.1959, + "step": 97059 + }, + { + "epoch": 1.17, + "grad_norm": 16.703053562741253, + "learning_rate": 7.797360894826666e-06, + "loss": 1.0519, + "step": 97062 + }, + { + "epoch": 1.17, + "grad_norm": 9.38041843086253, + "learning_rate": 7.796791067775365e-06, + "loss": 1.0961, + "step": 97065 + }, + { + "epoch": 1.17, + "grad_norm": 13.162981344196913, + "learning_rate": 7.796221248242825e-06, + "loss": 1.0692, + "step": 97068 + }, + { + "epoch": 1.17, + "grad_norm": 11.894775312934376, + "learning_rate": 7.795651436230985e-06, + "loss": 1.1993, + "step": 97071 + }, + { + "epoch": 1.17, + "grad_norm": 5.674630671253251, + "learning_rate": 7.795081631741793e-06, + "loss": 1.2856, + "step": 97074 + }, + { + "epoch": 1.17, + "grad_norm": 25.363592126084427, + "learning_rate": 7.794511834777193e-06, + "loss": 1.4363, + "step": 97077 + }, + { + "epoch": 1.17, + "grad_norm": 9.911053908327839, + "learning_rate": 7.793942045339129e-06, + "loss": 1.1386, + "step": 97080 + }, + { + "epoch": 1.17, + "grad_norm": 16.259369730157044, + "learning_rate": 7.79337226342955e-06, + "loss": 1.2531, + "step": 97083 + }, + { + "epoch": 1.17, + "grad_norm": 26.051057100592928, + "learning_rate": 7.792802489050394e-06, + "loss": 1.2226, + "step": 97086 + }, + { + "epoch": 1.17, + "grad_norm": 5.013589999370948, + "learning_rate": 7.792232722203607e-06, + "loss": 1.2021, + "step": 97089 + }, + { + "epoch": 1.17, + "grad_norm": 17.810028973484666, + "learning_rate": 7.791662962891133e-06, + "loss": 1.3998, + "step": 97092 + }, + { + "epoch": 1.17, + "grad_norm": 8.344721002681936, + "learning_rate": 7.791093211114924e-06, + "loss": 1.2541, + "step": 97095 + }, + { + "epoch": 1.17, + "grad_norm": 14.00766635258007, + "learning_rate": 7.790523466876914e-06, + "loss": 0.8698, + "step": 97098 + }, + { + "epoch": 1.17, + "grad_norm": 25.715097187093377, + "learning_rate": 7.789953730179051e-06, + "loss": 1.2864, + "step": 97101 + }, + { + "epoch": 1.17, + "grad_norm": 3.9802985766706014, + "learning_rate": 7.78938400102328e-06, + "loss": 1.1578, + "step": 97104 + }, + { + "epoch": 1.17, + "grad_norm": 9.944067549829551, + "learning_rate": 7.788814279411547e-06, + "loss": 1.6376, + "step": 97107 + }, + { + "epoch": 1.17, + "grad_norm": 2.807546465181462, + "learning_rate": 7.788244565345796e-06, + "loss": 0.8433, + "step": 97110 + }, + { + "epoch": 1.17, + "grad_norm": 9.316861754302975, + "learning_rate": 7.787674858827966e-06, + "loss": 1.3344, + "step": 97113 + }, + { + "epoch": 1.17, + "grad_norm": 6.580428808297592, + "learning_rate": 7.787105159860004e-06, + "loss": 1.0083, + "step": 97116 + }, + { + "epoch": 1.17, + "grad_norm": 8.138091903571453, + "learning_rate": 7.786535468443858e-06, + "loss": 0.9996, + "step": 97119 + }, + { + "epoch": 1.17, + "grad_norm": 8.769781042028509, + "learning_rate": 7.785965784581471e-06, + "loss": 1.4788, + "step": 97122 + }, + { + "epoch": 1.17, + "grad_norm": 6.424688081583678, + "learning_rate": 7.78539610827478e-06, + "loss": 1.2261, + "step": 97125 + }, + { + "epoch": 1.17, + "grad_norm": 5.314296803752743, + "learning_rate": 7.784826439525738e-06, + "loss": 1.6573, + "step": 97128 + }, + { + "epoch": 1.17, + "grad_norm": 15.709085871312833, + "learning_rate": 7.784256778336284e-06, + "loss": 1.1775, + "step": 97131 + }, + { + "epoch": 1.17, + "grad_norm": 16.95306225557738, + "learning_rate": 7.783687124708362e-06, + "loss": 1.2104, + "step": 97134 + }, + { + "epoch": 1.17, + "grad_norm": 5.468467976738142, + "learning_rate": 7.783117478643922e-06, + "loss": 0.906, + "step": 97137 + }, + { + "epoch": 1.17, + "grad_norm": 3.6434754781327365, + "learning_rate": 7.782547840144901e-06, + "loss": 1.233, + "step": 97140 + }, + { + "epoch": 1.17, + "grad_norm": 11.019248592287514, + "learning_rate": 7.781978209213244e-06, + "loss": 1.5256, + "step": 97143 + }, + { + "epoch": 1.17, + "grad_norm": 25.279324854722308, + "learning_rate": 7.781408585850897e-06, + "loss": 1.0617, + "step": 97146 + }, + { + "epoch": 1.17, + "grad_norm": 13.794190279362567, + "learning_rate": 7.780838970059806e-06, + "loss": 1.123, + "step": 97149 + }, + { + "epoch": 1.17, + "grad_norm": 6.047085090749825, + "learning_rate": 7.780269361841912e-06, + "loss": 0.7814, + "step": 97152 + }, + { + "epoch": 1.17, + "grad_norm": 14.178296002702227, + "learning_rate": 7.779699761199155e-06, + "loss": 1.447, + "step": 97155 + }, + { + "epoch": 1.17, + "grad_norm": 7.8438999609677165, + "learning_rate": 7.779130168133486e-06, + "loss": 1.2982, + "step": 97158 + }, + { + "epoch": 1.17, + "grad_norm": 3.040014387428746, + "learning_rate": 7.778560582646845e-06, + "loss": 1.366, + "step": 97161 + }, + { + "epoch": 1.17, + "grad_norm": 5.724581891528448, + "learning_rate": 7.77799100474118e-06, + "loss": 1.0209, + "step": 97164 + }, + { + "epoch": 1.17, + "grad_norm": 5.3496025836449475, + "learning_rate": 7.777421434418427e-06, + "loss": 1.1741, + "step": 97167 + }, + { + "epoch": 1.17, + "grad_norm": 11.66912161332359, + "learning_rate": 7.776851871680535e-06, + "loss": 1.2449, + "step": 97170 + }, + { + "epoch": 1.17, + "grad_norm": 347.88452337549137, + "learning_rate": 7.776282316529446e-06, + "loss": 1.0627, + "step": 97173 + }, + { + "epoch": 1.17, + "grad_norm": 10.269257230166316, + "learning_rate": 7.775712768967108e-06, + "loss": 1.2097, + "step": 97176 + }, + { + "epoch": 1.17, + "grad_norm": 9.424049950216409, + "learning_rate": 7.775143228995458e-06, + "loss": 1.2841, + "step": 97179 + }, + { + "epoch": 1.17, + "grad_norm": 16.42940738404081, + "learning_rate": 7.774573696616443e-06, + "loss": 0.9701, + "step": 97182 + }, + { + "epoch": 1.17, + "grad_norm": 4.125784170437203, + "learning_rate": 7.774004171832005e-06, + "loss": 1.1867, + "step": 97185 + }, + { + "epoch": 1.17, + "grad_norm": 3.9189339230870868, + "learning_rate": 7.77343465464409e-06, + "loss": 1.3419, + "step": 97188 + }, + { + "epoch": 1.17, + "grad_norm": 8.568866095366007, + "learning_rate": 7.772865145054644e-06, + "loss": 1.4432, + "step": 97191 + }, + { + "epoch": 1.17, + "grad_norm": 8.364242189514156, + "learning_rate": 7.772295643065606e-06, + "loss": 1.4978, + "step": 97194 + }, + { + "epoch": 1.17, + "grad_norm": 2.3601986991818538, + "learning_rate": 7.771726148678916e-06, + "loss": 1.2973, + "step": 97197 + }, + { + "epoch": 1.17, + "grad_norm": 6.2815105905124415, + "learning_rate": 7.771156661896525e-06, + "loss": 1.4298, + "step": 97200 + }, + { + "epoch": 1.17, + "grad_norm": 5.0923595664863575, + "learning_rate": 7.770587182720374e-06, + "loss": 1.3747, + "step": 97203 + }, + { + "epoch": 1.17, + "grad_norm": 18.759375356453038, + "learning_rate": 7.770017711152408e-06, + "loss": 0.9805, + "step": 97206 + }, + { + "epoch": 1.17, + "grad_norm": 10.193374542231155, + "learning_rate": 7.769448247194565e-06, + "loss": 0.956, + "step": 97209 + }, + { + "epoch": 1.17, + "grad_norm": 3.019107627769214, + "learning_rate": 7.76887879084879e-06, + "loss": 1.4477, + "step": 97212 + }, + { + "epoch": 1.17, + "grad_norm": 10.160112126132654, + "learning_rate": 7.76830934211703e-06, + "loss": 1.2513, + "step": 97215 + }, + { + "epoch": 1.17, + "grad_norm": 6.489068094771074, + "learning_rate": 7.76773990100123e-06, + "loss": 1.2212, + "step": 97218 + }, + { + "epoch": 1.17, + "grad_norm": 6.8002513000171705, + "learning_rate": 7.767170467503325e-06, + "loss": 1.3107, + "step": 97221 + }, + { + "epoch": 1.17, + "grad_norm": 5.814594324720152, + "learning_rate": 7.766601041625266e-06, + "loss": 0.9357, + "step": 97224 + }, + { + "epoch": 1.17, + "grad_norm": 13.987391139339273, + "learning_rate": 7.76603162336899e-06, + "loss": 1.0498, + "step": 97227 + }, + { + "epoch": 1.17, + "grad_norm": 10.365965867694433, + "learning_rate": 7.765462212736448e-06, + "loss": 1.2894, + "step": 97230 + }, + { + "epoch": 1.17, + "grad_norm": 3.108396937498625, + "learning_rate": 7.764892809729573e-06, + "loss": 1.3002, + "step": 97233 + }, + { + "epoch": 1.17, + "grad_norm": 3.0138050140634434, + "learning_rate": 7.764323414350316e-06, + "loss": 1.2897, + "step": 97236 + }, + { + "epoch": 1.17, + "grad_norm": 17.58395963043583, + "learning_rate": 7.763754026600617e-06, + "loss": 1.0512, + "step": 97239 + }, + { + "epoch": 1.17, + "grad_norm": 6.626883124812611, + "learning_rate": 7.76318464648242e-06, + "loss": 1.5116, + "step": 97242 + }, + { + "epoch": 1.17, + "grad_norm": 9.112847044817855, + "learning_rate": 7.762615273997673e-06, + "loss": 1.2546, + "step": 97245 + }, + { + "epoch": 1.17, + "grad_norm": 20.896216666832586, + "learning_rate": 7.76204590914831e-06, + "loss": 1.407, + "step": 97248 + }, + { + "epoch": 1.17, + "grad_norm": 16.22945277965431, + "learning_rate": 7.761476551936278e-06, + "loss": 0.9548, + "step": 97251 + }, + { + "epoch": 1.17, + "grad_norm": 35.83347684833698, + "learning_rate": 7.76090720236352e-06, + "loss": 0.8375, + "step": 97254 + }, + { + "epoch": 1.17, + "grad_norm": 17.819373094412406, + "learning_rate": 7.760337860431982e-06, + "loss": 0.9527, + "step": 97257 + }, + { + "epoch": 1.17, + "grad_norm": 10.345492437322319, + "learning_rate": 7.759768526143603e-06, + "loss": 1.444, + "step": 97260 + }, + { + "epoch": 1.17, + "grad_norm": 6.190535505654694, + "learning_rate": 7.759199199500324e-06, + "loss": 0.9531, + "step": 97263 + }, + { + "epoch": 1.17, + "grad_norm": 8.44390632441593, + "learning_rate": 7.758629880504094e-06, + "loss": 1.1399, + "step": 97266 + }, + { + "epoch": 1.17, + "grad_norm": 8.313201417336627, + "learning_rate": 7.758060569156852e-06, + "loss": 0.8584, + "step": 97269 + }, + { + "epoch": 1.17, + "grad_norm": 16.566168322471675, + "learning_rate": 7.757491265460544e-06, + "loss": 1.063, + "step": 97272 + }, + { + "epoch": 1.17, + "grad_norm": 2.930534797616689, + "learning_rate": 7.756921969417107e-06, + "loss": 1.3451, + "step": 97275 + }, + { + "epoch": 1.17, + "grad_norm": 16.98633277976079, + "learning_rate": 7.756352681028489e-06, + "loss": 1.0359, + "step": 97278 + }, + { + "epoch": 1.17, + "grad_norm": 14.338414105979554, + "learning_rate": 7.75578340029663e-06, + "loss": 1.1607, + "step": 97281 + }, + { + "epoch": 1.17, + "grad_norm": 7.343588025171376, + "learning_rate": 7.755214127223478e-06, + "loss": 1.1136, + "step": 97284 + }, + { + "epoch": 1.17, + "grad_norm": 30.818884469384905, + "learning_rate": 7.754644861810967e-06, + "loss": 0.8609, + "step": 97287 + }, + { + "epoch": 1.17, + "grad_norm": 276.79219535899665, + "learning_rate": 7.754075604061046e-06, + "loss": 1.6003, + "step": 97290 + }, + { + "epoch": 1.17, + "grad_norm": 12.87436024576827, + "learning_rate": 7.753506353975656e-06, + "loss": 1.356, + "step": 97293 + }, + { + "epoch": 1.17, + "grad_norm": 15.023402675902817, + "learning_rate": 7.752937111556737e-06, + "loss": 1.302, + "step": 97296 + }, + { + "epoch": 1.17, + "grad_norm": 9.10086778611243, + "learning_rate": 7.752367876806239e-06, + "loss": 1.2049, + "step": 97299 + }, + { + "epoch": 1.17, + "grad_norm": 9.204837287234744, + "learning_rate": 7.7517986497261e-06, + "loss": 1.4632, + "step": 97302 + }, + { + "epoch": 1.17, + "grad_norm": 11.028232827357497, + "learning_rate": 7.751229430318257e-06, + "loss": 1.0964, + "step": 97305 + }, + { + "epoch": 1.17, + "grad_norm": 6.725952298614236, + "learning_rate": 7.750660218584662e-06, + "loss": 1.3586, + "step": 97308 + }, + { + "epoch": 1.17, + "grad_norm": 21.40352358810947, + "learning_rate": 7.750091014527253e-06, + "loss": 0.8305, + "step": 97311 + }, + { + "epoch": 1.17, + "grad_norm": 12.612943043159776, + "learning_rate": 7.749521818147974e-06, + "loss": 1.4378, + "step": 97314 + }, + { + "epoch": 1.17, + "grad_norm": 10.84437626091353, + "learning_rate": 7.748952629448762e-06, + "loss": 1.0336, + "step": 97317 + }, + { + "epoch": 1.17, + "grad_norm": 4.4418553112950505, + "learning_rate": 7.748383448431569e-06, + "loss": 1.3151, + "step": 97320 + }, + { + "epoch": 1.17, + "grad_norm": 4.768076479548929, + "learning_rate": 7.747814275098326e-06, + "loss": 1.5952, + "step": 97323 + }, + { + "epoch": 1.17, + "grad_norm": 8.375694795968219, + "learning_rate": 7.74724510945099e-06, + "loss": 1.2603, + "step": 97326 + }, + { + "epoch": 1.17, + "grad_norm": 8.67030971852544, + "learning_rate": 7.74667595149149e-06, + "loss": 1.2922, + "step": 97329 + }, + { + "epoch": 1.17, + "grad_norm": 11.060195548075676, + "learning_rate": 7.746106801221773e-06, + "loss": 1.4196, + "step": 97332 + }, + { + "epoch": 1.17, + "grad_norm": 12.393291012975917, + "learning_rate": 7.745537658643782e-06, + "loss": 1.3709, + "step": 97335 + }, + { + "epoch": 1.17, + "grad_norm": 22.71620242526617, + "learning_rate": 7.744968523759462e-06, + "loss": 1.1589, + "step": 97338 + }, + { + "epoch": 1.17, + "grad_norm": 9.671018845012577, + "learning_rate": 7.744399396570747e-06, + "loss": 0.992, + "step": 97341 + }, + { + "epoch": 1.17, + "grad_norm": 17.6591086615621, + "learning_rate": 7.743830277079589e-06, + "loss": 1.1853, + "step": 97344 + }, + { + "epoch": 1.17, + "grad_norm": 6.465629247146886, + "learning_rate": 7.743261165287921e-06, + "loss": 0.8615, + "step": 97347 + }, + { + "epoch": 1.17, + "grad_norm": 22.005470165021237, + "learning_rate": 7.742692061197693e-06, + "loss": 1.337, + "step": 97350 + }, + { + "epoch": 1.17, + "grad_norm": 7.9875762570014155, + "learning_rate": 7.742122964810844e-06, + "loss": 1.1719, + "step": 97353 + }, + { + "epoch": 1.17, + "grad_norm": 9.471606340383168, + "learning_rate": 7.741553876129317e-06, + "loss": 1.4544, + "step": 97356 + }, + { + "epoch": 1.17, + "grad_norm": 80.01512486106841, + "learning_rate": 7.74098479515505e-06, + "loss": 1.203, + "step": 97359 + }, + { + "epoch": 1.17, + "grad_norm": 21.94526052874996, + "learning_rate": 7.74041572188999e-06, + "loss": 0.9389, + "step": 97362 + }, + { + "epoch": 1.17, + "grad_norm": 6.822161121780187, + "learning_rate": 7.739846656336079e-06, + "loss": 1.0163, + "step": 97365 + }, + { + "epoch": 1.17, + "grad_norm": 32.05656327820233, + "learning_rate": 7.739277598495253e-06, + "loss": 1.44, + "step": 97368 + }, + { + "epoch": 1.17, + "grad_norm": 3.4081359512086173, + "learning_rate": 7.73870854836946e-06, + "loss": 1.2808, + "step": 97371 + }, + { + "epoch": 1.17, + "grad_norm": 8.64001868649804, + "learning_rate": 7.73813950596064e-06, + "loss": 0.7448, + "step": 97374 + }, + { + "epoch": 1.17, + "grad_norm": 6.584053674679125, + "learning_rate": 7.737570471270734e-06, + "loss": 1.3405, + "step": 97377 + }, + { + "epoch": 1.17, + "grad_norm": 4.604770082813063, + "learning_rate": 7.73700144430169e-06, + "loss": 0.6483, + "step": 97380 + }, + { + "epoch": 1.17, + "grad_norm": 11.966291425596907, + "learning_rate": 7.73643242505544e-06, + "loss": 1.0662, + "step": 97383 + }, + { + "epoch": 1.17, + "grad_norm": 11.221634108679188, + "learning_rate": 7.735863413533933e-06, + "loss": 1.4625, + "step": 97386 + }, + { + "epoch": 1.17, + "grad_norm": 7.55930168933416, + "learning_rate": 7.735294409739107e-06, + "loss": 0.8796, + "step": 97389 + }, + { + "epoch": 1.17, + "grad_norm": 12.0369209414137, + "learning_rate": 7.73472541367291e-06, + "loss": 1.1446, + "step": 97392 + }, + { + "epoch": 1.17, + "grad_norm": 6.047732144045989, + "learning_rate": 7.734156425337275e-06, + "loss": 1.2933, + "step": 97395 + }, + { + "epoch": 1.17, + "grad_norm": 15.814379765321002, + "learning_rate": 7.733587444734149e-06, + "loss": 1.2499, + "step": 97398 + }, + { + "epoch": 1.17, + "grad_norm": 7.944314670986778, + "learning_rate": 7.733018471865471e-06, + "loss": 1.3334, + "step": 97401 + }, + { + "epoch": 1.17, + "grad_norm": 10.732072738869302, + "learning_rate": 7.732449506733186e-06, + "loss": 1.3162, + "step": 97404 + }, + { + "epoch": 1.17, + "grad_norm": 10.79267193866298, + "learning_rate": 7.731880549339237e-06, + "loss": 1.2407, + "step": 97407 + }, + { + "epoch": 1.17, + "grad_norm": 6.163337514230113, + "learning_rate": 7.731311599685557e-06, + "loss": 0.9067, + "step": 97410 + }, + { + "epoch": 1.17, + "grad_norm": 10.376803483286416, + "learning_rate": 7.730742657774095e-06, + "loss": 1.0116, + "step": 97413 + }, + { + "epoch": 1.17, + "grad_norm": 8.293230801912754, + "learning_rate": 7.730173723606793e-06, + "loss": 1.5354, + "step": 97416 + }, + { + "epoch": 1.17, + "grad_norm": 2.789646993763046, + "learning_rate": 7.729604797185593e-06, + "loss": 1.2597, + "step": 97419 + }, + { + "epoch": 1.17, + "grad_norm": 6.86899115195856, + "learning_rate": 7.72903587851243e-06, + "loss": 0.8988, + "step": 97422 + }, + { + "epoch": 1.17, + "grad_norm": 2.395206187369039, + "learning_rate": 7.728466967589247e-06, + "loss": 1.0662, + "step": 97425 + }, + { + "epoch": 1.17, + "grad_norm": 8.687622743411694, + "learning_rate": 7.727898064417991e-06, + "loss": 1.2356, + "step": 97428 + }, + { + "epoch": 1.17, + "grad_norm": 30.073048607547335, + "learning_rate": 7.727329169000599e-06, + "loss": 1.1192, + "step": 97431 + }, + { + "epoch": 1.17, + "grad_norm": 29.370590143627307, + "learning_rate": 7.726760281339018e-06, + "loss": 1.2007, + "step": 97434 + }, + { + "epoch": 1.17, + "grad_norm": 7.184715075318393, + "learning_rate": 7.726191401435183e-06, + "loss": 1.2603, + "step": 97437 + }, + { + "epoch": 1.17, + "grad_norm": 6.367416838089957, + "learning_rate": 7.725622529291037e-06, + "loss": 1.1115, + "step": 97440 + }, + { + "epoch": 1.17, + "grad_norm": 5.842312189896483, + "learning_rate": 7.725053664908522e-06, + "loss": 1.0754, + "step": 97443 + }, + { + "epoch": 1.17, + "grad_norm": 3.3264255132579343, + "learning_rate": 7.724484808289582e-06, + "loss": 1.3469, + "step": 97446 + }, + { + "epoch": 1.17, + "grad_norm": 9.440666122802552, + "learning_rate": 7.723915959436152e-06, + "loss": 0.9947, + "step": 97449 + }, + { + "epoch": 1.17, + "grad_norm": 8.585797418945615, + "learning_rate": 7.723347118350179e-06, + "loss": 1.6319, + "step": 97452 + }, + { + "epoch": 1.17, + "grad_norm": 19.31024318073638, + "learning_rate": 7.7227782850336e-06, + "loss": 1.389, + "step": 97455 + }, + { + "epoch": 1.17, + "grad_norm": 11.103072310830274, + "learning_rate": 7.722209459488362e-06, + "loss": 0.9007, + "step": 97458 + }, + { + "epoch": 1.17, + "grad_norm": 11.22871225999722, + "learning_rate": 7.721640641716405e-06, + "loss": 1.5661, + "step": 97461 + }, + { + "epoch": 1.17, + "grad_norm": 7.21085692071956, + "learning_rate": 7.721071831719663e-06, + "loss": 1.0182, + "step": 97464 + }, + { + "epoch": 1.17, + "grad_norm": 10.010659605977736, + "learning_rate": 7.72050302950008e-06, + "loss": 1.421, + "step": 97467 + }, + { + "epoch": 1.17, + "grad_norm": 9.39291959997368, + "learning_rate": 7.719934235059603e-06, + "loss": 0.8712, + "step": 97470 + }, + { + "epoch": 1.17, + "grad_norm": 14.26781133596421, + "learning_rate": 7.71936544840017e-06, + "loss": 1.1947, + "step": 97473 + }, + { + "epoch": 1.17, + "grad_norm": 82.30130651893492, + "learning_rate": 7.718796669523719e-06, + "loss": 1.0515, + "step": 97476 + }, + { + "epoch": 1.17, + "grad_norm": 23.34558797051178, + "learning_rate": 7.718227898432193e-06, + "loss": 1.5507, + "step": 97479 + }, + { + "epoch": 1.17, + "grad_norm": 11.290481701097844, + "learning_rate": 7.717659135127534e-06, + "loss": 1.0223, + "step": 97482 + }, + { + "epoch": 1.17, + "grad_norm": 17.940418647695644, + "learning_rate": 7.71709037961168e-06, + "loss": 1.2716, + "step": 97485 + }, + { + "epoch": 1.17, + "grad_norm": 18.008570446756835, + "learning_rate": 7.71652163188658e-06, + "loss": 1.1875, + "step": 97488 + }, + { + "epoch": 1.17, + "grad_norm": 21.340759719485668, + "learning_rate": 7.715952891954163e-06, + "loss": 1.089, + "step": 97491 + }, + { + "epoch": 1.17, + "grad_norm": 2.823160013165265, + "learning_rate": 7.715384159816381e-06, + "loss": 1.4059, + "step": 97494 + }, + { + "epoch": 1.17, + "grad_norm": 6.461160744370371, + "learning_rate": 7.714815435475165e-06, + "loss": 1.2274, + "step": 97497 + }, + { + "epoch": 1.17, + "grad_norm": 6.785934442663806, + "learning_rate": 7.714246718932467e-06, + "loss": 1.2402, + "step": 97500 + }, + { + "epoch": 1.17, + "grad_norm": 17.112235360177063, + "learning_rate": 7.713678010190218e-06, + "loss": 1.274, + "step": 97503 + }, + { + "epoch": 1.17, + "grad_norm": 10.98750330318373, + "learning_rate": 7.713109309250362e-06, + "loss": 1.1984, + "step": 97506 + }, + { + "epoch": 1.17, + "grad_norm": 8.77735065744507, + "learning_rate": 7.71254061611484e-06, + "loss": 0.8851, + "step": 97509 + }, + { + "epoch": 1.17, + "grad_norm": 18.78107141409159, + "learning_rate": 7.711971930785594e-06, + "loss": 1.469, + "step": 97512 + }, + { + "epoch": 1.17, + "grad_norm": 26.843433388648194, + "learning_rate": 7.711403253264564e-06, + "loss": 1.2299, + "step": 97515 + }, + { + "epoch": 1.17, + "grad_norm": 5.911960606638643, + "learning_rate": 7.71083458355369e-06, + "loss": 0.9877, + "step": 97518 + }, + { + "epoch": 1.17, + "grad_norm": 12.843666090679216, + "learning_rate": 7.71026592165491e-06, + "loss": 1.2101, + "step": 97521 + }, + { + "epoch": 1.17, + "grad_norm": 9.173819754197089, + "learning_rate": 7.709697267570171e-06, + "loss": 1.297, + "step": 97524 + }, + { + "epoch": 1.17, + "grad_norm": 3.5988974691410394, + "learning_rate": 7.709128621301413e-06, + "loss": 1.4364, + "step": 97527 + }, + { + "epoch": 1.17, + "grad_norm": 22.88499912392368, + "learning_rate": 7.70855998285057e-06, + "loss": 1.0576, + "step": 97530 + }, + { + "epoch": 1.17, + "grad_norm": 8.578874445350097, + "learning_rate": 7.707991352219585e-06, + "loss": 1.3273, + "step": 97533 + }, + { + "epoch": 1.17, + "grad_norm": 10.534180536932878, + "learning_rate": 7.707422729410402e-06, + "loss": 1.0335, + "step": 97536 + }, + { + "epoch": 1.17, + "grad_norm": 8.677373045766084, + "learning_rate": 7.706854114424957e-06, + "loss": 1.2718, + "step": 97539 + }, + { + "epoch": 1.17, + "grad_norm": 20.820045047755535, + "learning_rate": 7.706285507265198e-06, + "loss": 1.2308, + "step": 97542 + }, + { + "epoch": 1.17, + "grad_norm": 16.28602291141266, + "learning_rate": 7.705716907933058e-06, + "loss": 1.0473, + "step": 97545 + }, + { + "epoch": 1.17, + "grad_norm": 25.08944038515849, + "learning_rate": 7.705148316430477e-06, + "loss": 0.9397, + "step": 97548 + }, + { + "epoch": 1.17, + "grad_norm": 18.379030463206927, + "learning_rate": 7.7045797327594e-06, + "loss": 1.4616, + "step": 97551 + }, + { + "epoch": 1.17, + "grad_norm": 7.996319328405605, + "learning_rate": 7.704011156921769e-06, + "loss": 1.0951, + "step": 97554 + }, + { + "epoch": 1.17, + "grad_norm": 3.872358891289848, + "learning_rate": 7.70344258891952e-06, + "loss": 1.0535, + "step": 97557 + }, + { + "epoch": 1.17, + "grad_norm": 7.4924794584222045, + "learning_rate": 7.70287402875459e-06, + "loss": 0.9058, + "step": 97560 + }, + { + "epoch": 1.17, + "grad_norm": 4.196286944049267, + "learning_rate": 7.702305476428922e-06, + "loss": 1.5614, + "step": 97563 + }, + { + "epoch": 1.17, + "grad_norm": 7.599644688727848, + "learning_rate": 7.701736931944464e-06, + "loss": 1.0209, + "step": 97566 + }, + { + "epoch": 1.17, + "grad_norm": 8.844953365851476, + "learning_rate": 7.701168395303149e-06, + "loss": 1.1165, + "step": 97569 + }, + { + "epoch": 1.17, + "grad_norm": 4.378411831997266, + "learning_rate": 7.700599866506917e-06, + "loss": 1.0532, + "step": 97572 + }, + { + "epoch": 1.17, + "grad_norm": 15.068323373781295, + "learning_rate": 7.700031345557706e-06, + "loss": 1.1959, + "step": 97575 + }, + { + "epoch": 1.17, + "grad_norm": 7.311595174468573, + "learning_rate": 7.699462832457464e-06, + "loss": 1.2175, + "step": 97578 + }, + { + "epoch": 1.17, + "grad_norm": 14.935042499133196, + "learning_rate": 7.698894327208128e-06, + "loss": 1.2448, + "step": 97581 + }, + { + "epoch": 1.17, + "grad_norm": 11.492348317703707, + "learning_rate": 7.698325829811633e-06, + "loss": 1.4444, + "step": 97584 + }, + { + "epoch": 1.17, + "grad_norm": 9.830491900235728, + "learning_rate": 7.697757340269925e-06, + "loss": 0.9903, + "step": 97587 + }, + { + "epoch": 1.17, + "grad_norm": 15.381292206833852, + "learning_rate": 7.697188858584939e-06, + "loss": 0.9222, + "step": 97590 + }, + { + "epoch": 1.17, + "grad_norm": 17.590644552405205, + "learning_rate": 7.69662038475862e-06, + "loss": 1.1184, + "step": 97593 + }, + { + "epoch": 1.17, + "grad_norm": 12.519886059780681, + "learning_rate": 7.696051918792908e-06, + "loss": 1.1764, + "step": 97596 + }, + { + "epoch": 1.17, + "grad_norm": 4.895812933896538, + "learning_rate": 7.695483460689739e-06, + "loss": 0.9383, + "step": 97599 + }, + { + "epoch": 1.17, + "grad_norm": 22.410557417338232, + "learning_rate": 7.694915010451054e-06, + "loss": 1.2123, + "step": 97602 + }, + { + "epoch": 1.17, + "grad_norm": 10.146470306837132, + "learning_rate": 7.694346568078792e-06, + "loss": 1.1347, + "step": 97605 + }, + { + "epoch": 1.17, + "grad_norm": 7.658463715545877, + "learning_rate": 7.693778133574901e-06, + "loss": 1.2427, + "step": 97608 + }, + { + "epoch": 1.17, + "grad_norm": 9.865380252558612, + "learning_rate": 7.693209706941312e-06, + "loss": 1.0913, + "step": 97611 + }, + { + "epoch": 1.17, + "grad_norm": 3.2320751076955383, + "learning_rate": 7.692641288179962e-06, + "loss": 1.4562, + "step": 97614 + }, + { + "epoch": 1.17, + "grad_norm": 8.298734453321744, + "learning_rate": 7.6920728772928e-06, + "loss": 1.445, + "step": 97617 + }, + { + "epoch": 1.17, + "grad_norm": 7.821696631684021, + "learning_rate": 7.691504474281764e-06, + "loss": 1.4931, + "step": 97620 + }, + { + "epoch": 1.17, + "grad_norm": 10.820056720209614, + "learning_rate": 7.690936079148791e-06, + "loss": 1.0538, + "step": 97623 + }, + { + "epoch": 1.17, + "grad_norm": 3.9777715789288153, + "learning_rate": 7.690367691895819e-06, + "loss": 1.3503, + "step": 97626 + }, + { + "epoch": 1.17, + "grad_norm": 32.149766814916525, + "learning_rate": 7.68979931252479e-06, + "loss": 1.011, + "step": 97629 + }, + { + "epoch": 1.17, + "grad_norm": 8.801787474462797, + "learning_rate": 7.689230941037646e-06, + "loss": 1.2355, + "step": 97632 + }, + { + "epoch": 1.17, + "grad_norm": 6.654152274945561, + "learning_rate": 7.688662577436329e-06, + "loss": 1.2629, + "step": 97635 + }, + { + "epoch": 1.17, + "grad_norm": 6.033356369550221, + "learning_rate": 7.688094221722767e-06, + "loss": 1.1329, + "step": 97638 + }, + { + "epoch": 1.17, + "grad_norm": 7.5386728930988305, + "learning_rate": 7.687525873898908e-06, + "loss": 1.3024, + "step": 97641 + }, + { + "epoch": 1.17, + "grad_norm": 2.870430240303868, + "learning_rate": 7.686957533966688e-06, + "loss": 1.1662, + "step": 97644 + }, + { + "epoch": 1.17, + "grad_norm": 26.90930460785647, + "learning_rate": 7.686389201928052e-06, + "loss": 1.0657, + "step": 97647 + }, + { + "epoch": 1.17, + "grad_norm": 9.610843145460692, + "learning_rate": 7.685820877784939e-06, + "loss": 1.1101, + "step": 97650 + }, + { + "epoch": 1.17, + "grad_norm": 4.926449894421247, + "learning_rate": 7.685252561539282e-06, + "loss": 0.9756, + "step": 97653 + }, + { + "epoch": 1.17, + "grad_norm": 3.599805057779246, + "learning_rate": 7.684684253193024e-06, + "loss": 1.2519, + "step": 97656 + }, + { + "epoch": 1.17, + "grad_norm": 3.0613002634140667, + "learning_rate": 7.684115952748103e-06, + "loss": 1.3753, + "step": 97659 + }, + { + "epoch": 1.17, + "grad_norm": 6.0425642126430334, + "learning_rate": 7.683547660206467e-06, + "loss": 1.2656, + "step": 97662 + }, + { + "epoch": 1.17, + "grad_norm": 12.036189418106567, + "learning_rate": 7.682979375570045e-06, + "loss": 1.3787, + "step": 97665 + }, + { + "epoch": 1.17, + "grad_norm": 7.819980025061169, + "learning_rate": 7.682411098840776e-06, + "loss": 1.1646, + "step": 97668 + }, + { + "epoch": 1.17, + "grad_norm": 4.7311094443728345, + "learning_rate": 7.681842830020605e-06, + "loss": 1.1188, + "step": 97671 + }, + { + "epoch": 1.17, + "grad_norm": 11.658723327656158, + "learning_rate": 7.681274569111472e-06, + "loss": 1.0295, + "step": 97674 + }, + { + "epoch": 1.17, + "grad_norm": 15.787787549029183, + "learning_rate": 7.680706316115314e-06, + "loss": 1.2245, + "step": 97677 + }, + { + "epoch": 1.17, + "grad_norm": 5.120071809728451, + "learning_rate": 7.680138071034067e-06, + "loss": 1.2239, + "step": 97680 + }, + { + "epoch": 1.17, + "grad_norm": 14.605751279067075, + "learning_rate": 7.679569833869674e-06, + "loss": 1.187, + "step": 97683 + }, + { + "epoch": 1.17, + "grad_norm": 7.435626752368641, + "learning_rate": 7.679001604624073e-06, + "loss": 1.1268, + "step": 97686 + }, + { + "epoch": 1.17, + "grad_norm": 9.361187645678367, + "learning_rate": 7.678433383299206e-06, + "loss": 1.2752, + "step": 97689 + }, + { + "epoch": 1.17, + "grad_norm": 3.6358165673310725, + "learning_rate": 7.677865169897007e-06, + "loss": 1.1028, + "step": 97692 + }, + { + "epoch": 1.17, + "grad_norm": 7.821221030868105, + "learning_rate": 7.677296964419419e-06, + "loss": 1.1903, + "step": 97695 + }, + { + "epoch": 1.17, + "grad_norm": 5.0830104293215, + "learning_rate": 7.676728766868377e-06, + "loss": 1.1568, + "step": 97698 + }, + { + "epoch": 1.17, + "grad_norm": 2.7399946380090636, + "learning_rate": 7.676160577245825e-06, + "loss": 1.0686, + "step": 97701 + }, + { + "epoch": 1.17, + "grad_norm": 16.74110317811334, + "learning_rate": 7.675592395553703e-06, + "loss": 0.8505, + "step": 97704 + }, + { + "epoch": 1.17, + "grad_norm": 10.791807893128361, + "learning_rate": 7.675024221793943e-06, + "loss": 1.0875, + "step": 97707 + }, + { + "epoch": 1.17, + "grad_norm": 11.538120181091942, + "learning_rate": 7.674456055968488e-06, + "loss": 1.1499, + "step": 97710 + }, + { + "epoch": 1.17, + "grad_norm": 7.959266499371376, + "learning_rate": 7.673887898079278e-06, + "loss": 1.0821, + "step": 97713 + }, + { + "epoch": 1.18, + "grad_norm": 14.143321589432817, + "learning_rate": 7.673319748128253e-06, + "loss": 0.9415, + "step": 97716 + }, + { + "epoch": 1.18, + "grad_norm": 9.007518950076234, + "learning_rate": 7.672751606117348e-06, + "loss": 1.5462, + "step": 97719 + }, + { + "epoch": 1.18, + "grad_norm": 37.29053715792359, + "learning_rate": 7.6721834720485e-06, + "loss": 1.3207, + "step": 97722 + }, + { + "epoch": 1.18, + "grad_norm": 11.171136432604065, + "learning_rate": 7.671615345923655e-06, + "loss": 1.4045, + "step": 97725 + }, + { + "epoch": 1.18, + "grad_norm": 6.103333227192128, + "learning_rate": 7.671047227744745e-06, + "loss": 1.274, + "step": 97728 + }, + { + "epoch": 1.18, + "grad_norm": 16.446380384713418, + "learning_rate": 7.670479117513717e-06, + "loss": 1.0088, + "step": 97731 + }, + { + "epoch": 1.18, + "grad_norm": 10.985554914106544, + "learning_rate": 7.669911015232501e-06, + "loss": 1.3288, + "step": 97734 + }, + { + "epoch": 1.18, + "grad_norm": 4.838052931705938, + "learning_rate": 7.66934292090304e-06, + "loss": 1.3772, + "step": 97737 + }, + { + "epoch": 1.18, + "grad_norm": 9.951430446283233, + "learning_rate": 7.66877483452727e-06, + "loss": 1.099, + "step": 97740 + }, + { + "epoch": 1.18, + "grad_norm": 9.781448450492356, + "learning_rate": 7.668206756107137e-06, + "loss": 1.0886, + "step": 97743 + }, + { + "epoch": 1.18, + "grad_norm": 8.544827984386112, + "learning_rate": 7.667638685644571e-06, + "loss": 0.9736, + "step": 97746 + }, + { + "epoch": 1.18, + "grad_norm": 4.793626746889023, + "learning_rate": 7.667070623141515e-06, + "loss": 1.4076, + "step": 97749 + }, + { + "epoch": 1.18, + "grad_norm": 9.49416611857335, + "learning_rate": 7.666502568599905e-06, + "loss": 1.0703, + "step": 97752 + }, + { + "epoch": 1.18, + "grad_norm": 8.894797724243867, + "learning_rate": 7.66593452202168e-06, + "loss": 0.7829, + "step": 97755 + }, + { + "epoch": 1.18, + "grad_norm": 12.408246272277083, + "learning_rate": 7.665366483408786e-06, + "loss": 1.6619, + "step": 97758 + }, + { + "epoch": 1.18, + "grad_norm": 6.84636040542808, + "learning_rate": 7.664798452763152e-06, + "loss": 1.0499, + "step": 97761 + }, + { + "epoch": 1.18, + "grad_norm": 12.055583376211175, + "learning_rate": 7.664230430086716e-06, + "loss": 1.3256, + "step": 97764 + }, + { + "epoch": 1.18, + "grad_norm": 9.341335852388601, + "learning_rate": 7.663662415381424e-06, + "loss": 1.4483, + "step": 97767 + }, + { + "epoch": 1.18, + "grad_norm": 11.083187561593407, + "learning_rate": 7.66309440864921e-06, + "loss": 0.9337, + "step": 97770 + }, + { + "epoch": 1.18, + "grad_norm": 15.80165878348035, + "learning_rate": 7.662526409892014e-06, + "loss": 1.3901, + "step": 97773 + }, + { + "epoch": 1.18, + "grad_norm": 11.731726113317162, + "learning_rate": 7.66195841911177e-06, + "loss": 1.2817, + "step": 97776 + }, + { + "epoch": 1.18, + "grad_norm": 5.167438955984575, + "learning_rate": 7.661390436310424e-06, + "loss": 1.161, + "step": 97779 + }, + { + "epoch": 1.18, + "grad_norm": 8.965181964400877, + "learning_rate": 7.660822461489907e-06, + "loss": 1.1037, + "step": 97782 + }, + { + "epoch": 1.18, + "grad_norm": 7.528339845955968, + "learning_rate": 7.660254494652163e-06, + "loss": 1.1765, + "step": 97785 + }, + { + "epoch": 1.18, + "grad_norm": 4.0702753147704565, + "learning_rate": 7.659686535799126e-06, + "loss": 1.085, + "step": 97788 + }, + { + "epoch": 1.18, + "grad_norm": 12.133793244993, + "learning_rate": 7.659118584932737e-06, + "loss": 1.1915, + "step": 97791 + }, + { + "epoch": 1.18, + "grad_norm": 16.457836263452716, + "learning_rate": 7.658550642054931e-06, + "loss": 1.1749, + "step": 97794 + }, + { + "epoch": 1.18, + "grad_norm": 9.240369722105433, + "learning_rate": 7.657982707167651e-06, + "loss": 1.4831, + "step": 97797 + }, + { + "epoch": 1.18, + "grad_norm": 2.2204576460373735, + "learning_rate": 7.65741478027283e-06, + "loss": 1.333, + "step": 97800 + }, + { + "epoch": 1.18, + "grad_norm": 4.432087699784907, + "learning_rate": 7.65684686137241e-06, + "loss": 1.1165, + "step": 97803 + }, + { + "epoch": 1.18, + "grad_norm": 3.278849770016691, + "learning_rate": 7.656278950468326e-06, + "loss": 1.0058, + "step": 97806 + }, + { + "epoch": 1.18, + "grad_norm": 7.366827836056629, + "learning_rate": 7.655711047562518e-06, + "loss": 1.1895, + "step": 97809 + }, + { + "epoch": 1.18, + "grad_norm": 4.349318764910201, + "learning_rate": 7.655143152656926e-06, + "loss": 1.1089, + "step": 97812 + }, + { + "epoch": 1.18, + "grad_norm": 15.436193061442012, + "learning_rate": 7.654575265753485e-06, + "loss": 1.0604, + "step": 97815 + }, + { + "epoch": 1.18, + "grad_norm": 5.487690661647474, + "learning_rate": 7.654007386854132e-06, + "loss": 1.0238, + "step": 97818 + }, + { + "epoch": 1.18, + "grad_norm": 3.3651058274261616, + "learning_rate": 7.65343951596081e-06, + "loss": 0.8839, + "step": 97821 + }, + { + "epoch": 1.18, + "grad_norm": 8.341794077237429, + "learning_rate": 7.652871653075453e-06, + "loss": 1.3036, + "step": 97824 + }, + { + "epoch": 1.18, + "grad_norm": 6.595713208970973, + "learning_rate": 7.652303798199996e-06, + "loss": 0.9963, + "step": 97827 + }, + { + "epoch": 1.18, + "grad_norm": 6.3111675419357445, + "learning_rate": 7.651735951336382e-06, + "loss": 1.2163, + "step": 97830 + }, + { + "epoch": 1.18, + "grad_norm": 8.492035590984003, + "learning_rate": 7.65116811248655e-06, + "loss": 1.0525, + "step": 97833 + }, + { + "epoch": 1.18, + "grad_norm": 15.758995425374973, + "learning_rate": 7.650600281652432e-06, + "loss": 1.3141, + "step": 97836 + }, + { + "epoch": 1.18, + "grad_norm": 5.093407990237378, + "learning_rate": 7.650032458835973e-06, + "loss": 1.2848, + "step": 97839 + }, + { + "epoch": 1.18, + "grad_norm": 19.23568501356218, + "learning_rate": 7.649464644039101e-06, + "loss": 0.9933, + "step": 97842 + }, + { + "epoch": 1.18, + "grad_norm": 3.261983287657454, + "learning_rate": 7.648896837263764e-06, + "loss": 1.0571, + "step": 97845 + }, + { + "epoch": 1.18, + "grad_norm": 6.006367902334539, + "learning_rate": 7.648329038511893e-06, + "loss": 1.264, + "step": 97848 + }, + { + "epoch": 1.18, + "grad_norm": 22.158289911463385, + "learning_rate": 7.647761247785431e-06, + "loss": 1.4137, + "step": 97851 + }, + { + "epoch": 1.18, + "grad_norm": 5.380416182872319, + "learning_rate": 7.647193465086309e-06, + "loss": 1.2357, + "step": 97854 + }, + { + "epoch": 1.18, + "grad_norm": 12.564728595057627, + "learning_rate": 7.64662569041647e-06, + "loss": 1.2231, + "step": 97857 + }, + { + "epoch": 1.18, + "grad_norm": 10.730724690602166, + "learning_rate": 7.646057923777848e-06, + "loss": 0.9759, + "step": 97860 + }, + { + "epoch": 1.18, + "grad_norm": 10.429155264603503, + "learning_rate": 7.645490165172384e-06, + "loss": 0.9195, + "step": 97863 + }, + { + "epoch": 1.18, + "grad_norm": 4.518311255239726, + "learning_rate": 7.644922414602015e-06, + "loss": 0.9636, + "step": 97866 + }, + { + "epoch": 1.18, + "grad_norm": 11.455410496896423, + "learning_rate": 7.644354672068677e-06, + "loss": 1.2551, + "step": 97869 + }, + { + "epoch": 1.18, + "grad_norm": 2.7629129382619633, + "learning_rate": 7.643786937574306e-06, + "loss": 0.7361, + "step": 97872 + }, + { + "epoch": 1.18, + "grad_norm": 8.449903545571342, + "learning_rate": 7.643219211120843e-06, + "loss": 1.1021, + "step": 97875 + }, + { + "epoch": 1.18, + "grad_norm": 6.112961308837551, + "learning_rate": 7.642651492710227e-06, + "loss": 1.184, + "step": 97878 + }, + { + "epoch": 1.18, + "grad_norm": 27.090130425570194, + "learning_rate": 7.642083782344387e-06, + "loss": 1.351, + "step": 97881 + }, + { + "epoch": 1.18, + "grad_norm": 6.129629327119453, + "learning_rate": 7.641516080025266e-06, + "loss": 0.9447, + "step": 97884 + }, + { + "epoch": 1.18, + "grad_norm": 13.351139719295228, + "learning_rate": 7.640948385754804e-06, + "loss": 1.5091, + "step": 97887 + }, + { + "epoch": 1.18, + "grad_norm": 8.758813056521177, + "learning_rate": 7.640380699534934e-06, + "loss": 1.4884, + "step": 97890 + }, + { + "epoch": 1.18, + "grad_norm": 14.979733802996451, + "learning_rate": 7.639813021367597e-06, + "loss": 1.0681, + "step": 97893 + }, + { + "epoch": 1.18, + "grad_norm": 5.465869903125296, + "learning_rate": 7.639245351254725e-06, + "loss": 1.2545, + "step": 97896 + }, + { + "epoch": 1.18, + "grad_norm": 15.634976877700511, + "learning_rate": 7.63867768919826e-06, + "loss": 1.1681, + "step": 97899 + }, + { + "epoch": 1.18, + "grad_norm": 5.2400451406405395, + "learning_rate": 7.638110035200134e-06, + "loss": 1.3145, + "step": 97902 + }, + { + "epoch": 1.18, + "grad_norm": 7.168264322957541, + "learning_rate": 7.637542389262294e-06, + "loss": 1.2295, + "step": 97905 + }, + { + "epoch": 1.18, + "grad_norm": 13.956701013822435, + "learning_rate": 7.636974751386667e-06, + "loss": 1.5483, + "step": 97908 + }, + { + "epoch": 1.18, + "grad_norm": 7.78404990257073, + "learning_rate": 7.636407121575194e-06, + "loss": 1.3452, + "step": 97911 + }, + { + "epoch": 1.18, + "grad_norm": 14.632845890696297, + "learning_rate": 7.635839499829811e-06, + "loss": 1.3596, + "step": 97914 + }, + { + "epoch": 1.18, + "grad_norm": 9.237837552238911, + "learning_rate": 7.63527188615246e-06, + "loss": 1.0865, + "step": 97917 + }, + { + "epoch": 1.18, + "grad_norm": 7.0521679292011035, + "learning_rate": 7.634704280545074e-06, + "loss": 1.141, + "step": 97920 + }, + { + "epoch": 1.18, + "grad_norm": 32.816238203851285, + "learning_rate": 7.634136683009586e-06, + "loss": 0.919, + "step": 97923 + }, + { + "epoch": 1.18, + "grad_norm": 95.66111849666947, + "learning_rate": 7.63356909354794e-06, + "loss": 0.8788, + "step": 97926 + }, + { + "epoch": 1.18, + "grad_norm": 29.314372543388284, + "learning_rate": 7.633001512162069e-06, + "loss": 1.2896, + "step": 97929 + }, + { + "epoch": 1.18, + "grad_norm": 56.972410495926184, + "learning_rate": 7.632433938853916e-06, + "loss": 1.297, + "step": 97932 + }, + { + "epoch": 1.18, + "grad_norm": 11.312404137638818, + "learning_rate": 7.631866373625407e-06, + "loss": 1.5096, + "step": 97935 + }, + { + "epoch": 1.18, + "grad_norm": 8.061658370943531, + "learning_rate": 7.631298816478487e-06, + "loss": 1.1006, + "step": 97938 + }, + { + "epoch": 1.18, + "grad_norm": 7.363034853233976, + "learning_rate": 7.630731267415092e-06, + "loss": 0.896, + "step": 97941 + }, + { + "epoch": 1.18, + "grad_norm": 21.76981098258564, + "learning_rate": 7.630163726437156e-06, + "loss": 0.9906, + "step": 97944 + }, + { + "epoch": 1.18, + "grad_norm": 40.274232526869916, + "learning_rate": 7.629596193546622e-06, + "loss": 1.3435, + "step": 97947 + }, + { + "epoch": 1.18, + "grad_norm": 12.758417613085633, + "learning_rate": 7.629028668745417e-06, + "loss": 1.5162, + "step": 97950 + }, + { + "epoch": 1.18, + "grad_norm": 4.922151025615115, + "learning_rate": 7.628461152035486e-06, + "loss": 0.8385, + "step": 97953 + }, + { + "epoch": 1.18, + "grad_norm": 7.083808076848387, + "learning_rate": 7.62789364341876e-06, + "loss": 0.9149, + "step": 97956 + }, + { + "epoch": 1.18, + "grad_norm": 7.446000052373365, + "learning_rate": 7.627326142897185e-06, + "loss": 1.2613, + "step": 97959 + }, + { + "epoch": 1.18, + "grad_norm": 5.447762877764543, + "learning_rate": 7.626758650472688e-06, + "loss": 1.1939, + "step": 97962 + }, + { + "epoch": 1.18, + "grad_norm": 4.9309458399083645, + "learning_rate": 7.626191166147205e-06, + "loss": 1.3059, + "step": 97965 + }, + { + "epoch": 1.18, + "grad_norm": 9.38230530729987, + "learning_rate": 7.625623689922679e-06, + "loss": 1.1873, + "step": 97968 + }, + { + "epoch": 1.18, + "grad_norm": 5.3510896869554285, + "learning_rate": 7.625056221801044e-06, + "loss": 0.9643, + "step": 97971 + }, + { + "epoch": 1.18, + "grad_norm": 6.185049715339866, + "learning_rate": 7.62448876178424e-06, + "loss": 0.9572, + "step": 97974 + }, + { + "epoch": 1.18, + "grad_norm": 30.06883976266151, + "learning_rate": 7.623921309874196e-06, + "loss": 1.2521, + "step": 97977 + }, + { + "epoch": 1.18, + "grad_norm": 7.037592618256587, + "learning_rate": 7.623353866072853e-06, + "loss": 0.8829, + "step": 97980 + }, + { + "epoch": 1.18, + "grad_norm": 7.798701967762116, + "learning_rate": 7.6227864303821484e-06, + "loss": 0.7242, + "step": 97983 + }, + { + "epoch": 1.18, + "grad_norm": 6.945077569713278, + "learning_rate": 7.622219002804021e-06, + "loss": 1.0826, + "step": 97986 + }, + { + "epoch": 1.18, + "grad_norm": 13.940425666277465, + "learning_rate": 7.6216515833403984e-06, + "loss": 1.03, + "step": 97989 + }, + { + "epoch": 1.18, + "grad_norm": 13.176968286445181, + "learning_rate": 7.6210841719932216e-06, + "loss": 1.4531, + "step": 97992 + }, + { + "epoch": 1.18, + "grad_norm": 8.230057963430705, + "learning_rate": 7.620516768764431e-06, + "loss": 1.2336, + "step": 97995 + }, + { + "epoch": 1.18, + "grad_norm": 22.270982837678552, + "learning_rate": 7.6199493736559575e-06, + "loss": 1.2395, + "step": 97998 + }, + { + "epoch": 1.18, + "grad_norm": 3.220067507436454, + "learning_rate": 7.619381986669743e-06, + "loss": 1.1978, + "step": 98001 + }, + { + "epoch": 1.18, + "grad_norm": 12.284621616150748, + "learning_rate": 7.618814607807719e-06, + "loss": 1.2838, + "step": 98004 + }, + { + "epoch": 1.18, + "grad_norm": 6.737416047190653, + "learning_rate": 7.618247237071819e-06, + "loss": 0.8351, + "step": 98007 + }, + { + "epoch": 1.18, + "grad_norm": 2.8427162978536344, + "learning_rate": 7.617679874463985e-06, + "loss": 1.2433, + "step": 98010 + }, + { + "epoch": 1.18, + "grad_norm": 5.604651994278733, + "learning_rate": 7.617112519986155e-06, + "loss": 1.3466, + "step": 98013 + }, + { + "epoch": 1.18, + "grad_norm": 6.798365696842357, + "learning_rate": 7.61654517364026e-06, + "loss": 0.9671, + "step": 98016 + }, + { + "epoch": 1.18, + "grad_norm": 2.596985757176232, + "learning_rate": 7.615977835428235e-06, + "loss": 0.9866, + "step": 98019 + }, + { + "epoch": 1.18, + "grad_norm": 3.6398933707774437, + "learning_rate": 7.61541050535202e-06, + "loss": 1.0454, + "step": 98022 + }, + { + "epoch": 1.18, + "grad_norm": 23.71993247042555, + "learning_rate": 7.614843183413552e-06, + "loss": 1.2943, + "step": 98025 + }, + { + "epoch": 1.18, + "grad_norm": 18.174722127593085, + "learning_rate": 7.614275869614767e-06, + "loss": 1.5439, + "step": 98028 + }, + { + "epoch": 1.18, + "grad_norm": 7.06609813484478, + "learning_rate": 7.613708563957595e-06, + "loss": 1.0979, + "step": 98031 + }, + { + "epoch": 1.18, + "grad_norm": 13.00720891277921, + "learning_rate": 7.613141266443975e-06, + "loss": 1.5092, + "step": 98034 + }, + { + "epoch": 1.18, + "grad_norm": 6.286244865957048, + "learning_rate": 7.612573977075848e-06, + "loss": 1.4482, + "step": 98037 + }, + { + "epoch": 1.18, + "grad_norm": 9.246863842398225, + "learning_rate": 7.612006695855146e-06, + "loss": 0.8283, + "step": 98040 + }, + { + "epoch": 1.18, + "grad_norm": 9.60242968288982, + "learning_rate": 7.6114394227838025e-06, + "loss": 1.1811, + "step": 98043 + }, + { + "epoch": 1.18, + "grad_norm": 15.943900154971258, + "learning_rate": 7.610872157863756e-06, + "loss": 1.3027, + "step": 98046 + }, + { + "epoch": 1.18, + "grad_norm": 11.763096250689545, + "learning_rate": 7.6103049010969434e-06, + "loss": 1.3093, + "step": 98049 + }, + { + "epoch": 1.18, + "grad_norm": 6.114132253387017, + "learning_rate": 7.609737652485297e-06, + "loss": 1.1996, + "step": 98052 + }, + { + "epoch": 1.18, + "grad_norm": 20.67402067923259, + "learning_rate": 7.609170412030761e-06, + "loss": 1.1504, + "step": 98055 + }, + { + "epoch": 1.18, + "grad_norm": 9.526649868097772, + "learning_rate": 7.608603179735261e-06, + "loss": 1.0538, + "step": 98058 + }, + { + "epoch": 1.18, + "grad_norm": 6.536482276275186, + "learning_rate": 7.608035955600737e-06, + "loss": 1.2384, + "step": 98061 + }, + { + "epoch": 1.18, + "grad_norm": 12.637986110316199, + "learning_rate": 7.6074687396291225e-06, + "loss": 1.2595, + "step": 98064 + }, + { + "epoch": 1.18, + "grad_norm": 27.10192013830747, + "learning_rate": 7.606901531822362e-06, + "loss": 1.4542, + "step": 98067 + }, + { + "epoch": 1.18, + "grad_norm": 3.6961743713094983, + "learning_rate": 7.6063343321823805e-06, + "loss": 1.2679, + "step": 98070 + }, + { + "epoch": 1.18, + "grad_norm": 7.122603221913184, + "learning_rate": 7.605767140711116e-06, + "loss": 1.1855, + "step": 98073 + }, + { + "epoch": 1.18, + "grad_norm": 26.741146483341208, + "learning_rate": 7.605199957410506e-06, + "loss": 1.1733, + "step": 98076 + }, + { + "epoch": 1.18, + "grad_norm": 7.658150107869851, + "learning_rate": 7.6046327822824886e-06, + "loss": 1.0755, + "step": 98079 + }, + { + "epoch": 1.18, + "grad_norm": 4.336925920161585, + "learning_rate": 7.604065615328998e-06, + "loss": 1.3423, + "step": 98082 + }, + { + "epoch": 1.18, + "grad_norm": 15.198804327821678, + "learning_rate": 7.603498456551965e-06, + "loss": 1.2924, + "step": 98085 + }, + { + "epoch": 1.18, + "grad_norm": 16.94924073681557, + "learning_rate": 7.602931305953328e-06, + "loss": 1.0831, + "step": 98088 + }, + { + "epoch": 1.18, + "grad_norm": 10.332141250092736, + "learning_rate": 7.602364163535026e-06, + "loss": 1.0867, + "step": 98091 + }, + { + "epoch": 1.18, + "grad_norm": 4.3111068858901405, + "learning_rate": 7.601797029298993e-06, + "loss": 1.103, + "step": 98094 + }, + { + "epoch": 1.18, + "grad_norm": 8.70983578242624, + "learning_rate": 7.601229903247158e-06, + "loss": 1.5541, + "step": 98097 + }, + { + "epoch": 1.18, + "grad_norm": 19.7241910355709, + "learning_rate": 7.600662785381464e-06, + "loss": 1.1724, + "step": 98100 + }, + { + "epoch": 1.18, + "grad_norm": 6.490343045933166, + "learning_rate": 7.600095675703843e-06, + "loss": 1.1159, + "step": 98103 + }, + { + "epoch": 1.18, + "grad_norm": 2.611122583911337, + "learning_rate": 7.599528574216228e-06, + "loss": 0.7565, + "step": 98106 + }, + { + "epoch": 1.18, + "grad_norm": 6.161642346077273, + "learning_rate": 7.5989614809205635e-06, + "loss": 1.0413, + "step": 98109 + }, + { + "epoch": 1.18, + "grad_norm": 6.41362921656865, + "learning_rate": 7.598394395818777e-06, + "loss": 1.1484, + "step": 98112 + }, + { + "epoch": 1.18, + "grad_norm": 5.128323125659922, + "learning_rate": 7.597827318912803e-06, + "loss": 1.0532, + "step": 98115 + }, + { + "epoch": 1.18, + "grad_norm": 10.1411929557851, + "learning_rate": 7.597260250204578e-06, + "loss": 1.2749, + "step": 98118 + }, + { + "epoch": 1.18, + "grad_norm": 8.909226544819088, + "learning_rate": 7.596693189696043e-06, + "loss": 1.0099, + "step": 98121 + }, + { + "epoch": 1.18, + "grad_norm": 5.594287703272457, + "learning_rate": 7.596126137389126e-06, + "loss": 0.843, + "step": 98124 + }, + { + "epoch": 1.18, + "grad_norm": 6.618302575479485, + "learning_rate": 7.595559093285763e-06, + "loss": 1.252, + "step": 98127 + }, + { + "epoch": 1.18, + "grad_norm": 4.012246731945083, + "learning_rate": 7.594992057387891e-06, + "loss": 1.3501, + "step": 98130 + }, + { + "epoch": 1.18, + "grad_norm": 4.284559871804083, + "learning_rate": 7.594425029697447e-06, + "loss": 1.2, + "step": 98133 + }, + { + "epoch": 1.18, + "grad_norm": 8.768376683579376, + "learning_rate": 7.5938580102163654e-06, + "loss": 1.1359, + "step": 98136 + }, + { + "epoch": 1.18, + "grad_norm": 16.245039214471408, + "learning_rate": 7.593290998946575e-06, + "loss": 1.324, + "step": 98139 + }, + { + "epoch": 1.18, + "grad_norm": 17.211830896177204, + "learning_rate": 7.592723995890019e-06, + "loss": 1.0658, + "step": 98142 + }, + { + "epoch": 1.18, + "grad_norm": 22.54703962053782, + "learning_rate": 7.592157001048626e-06, + "loss": 1.4388, + "step": 98145 + }, + { + "epoch": 1.18, + "grad_norm": 9.770008120664135, + "learning_rate": 7.591590014424338e-06, + "loss": 1.6875, + "step": 98148 + }, + { + "epoch": 1.18, + "grad_norm": 9.923497472969094, + "learning_rate": 7.591023036019082e-06, + "loss": 0.8743, + "step": 98151 + }, + { + "epoch": 1.18, + "grad_norm": 24.669634015631054, + "learning_rate": 7.590456065834799e-06, + "loss": 1.5368, + "step": 98154 + }, + { + "epoch": 1.18, + "grad_norm": 11.765833992969561, + "learning_rate": 7.589889103873419e-06, + "loss": 0.9922, + "step": 98157 + }, + { + "epoch": 1.18, + "grad_norm": 5.3875795756845335, + "learning_rate": 7.589322150136879e-06, + "loss": 1.2615, + "step": 98160 + }, + { + "epoch": 1.18, + "grad_norm": 5.632442485642884, + "learning_rate": 7.5887552046271184e-06, + "loss": 1.4092, + "step": 98163 + }, + { + "epoch": 1.18, + "grad_norm": 9.568977124777698, + "learning_rate": 7.588188267346066e-06, + "loss": 1.2265, + "step": 98166 + }, + { + "epoch": 1.18, + "grad_norm": 19.87387995512604, + "learning_rate": 7.587621338295656e-06, + "loss": 1.3471, + "step": 98169 + }, + { + "epoch": 1.18, + "grad_norm": 26.104027206861293, + "learning_rate": 7.587054417477826e-06, + "loss": 1.3663, + "step": 98172 + }, + { + "epoch": 1.18, + "grad_norm": 13.261308680485396, + "learning_rate": 7.586487504894514e-06, + "loss": 1.3254, + "step": 98175 + }, + { + "epoch": 1.18, + "grad_norm": 14.108616020511583, + "learning_rate": 7.585920600547649e-06, + "loss": 1.534, + "step": 98178 + }, + { + "epoch": 1.18, + "grad_norm": 10.95364888646584, + "learning_rate": 7.585353704439165e-06, + "loss": 1.1778, + "step": 98181 + }, + { + "epoch": 1.18, + "grad_norm": 9.009159470582032, + "learning_rate": 7.584786816571001e-06, + "loss": 1.3098, + "step": 98184 + }, + { + "epoch": 1.18, + "grad_norm": 13.43207989686817, + "learning_rate": 7.584219936945088e-06, + "loss": 0.9788, + "step": 98187 + }, + { + "epoch": 1.18, + "grad_norm": 12.31346231182523, + "learning_rate": 7.583653065563367e-06, + "loss": 1.4353, + "step": 98190 + }, + { + "epoch": 1.18, + "grad_norm": 12.80199675597166, + "learning_rate": 7.583086202427762e-06, + "loss": 1.2365, + "step": 98193 + }, + { + "epoch": 1.18, + "grad_norm": 8.434024275446012, + "learning_rate": 7.582519347540216e-06, + "loss": 1.0266, + "step": 98196 + }, + { + "epoch": 1.18, + "grad_norm": 12.383120619476237, + "learning_rate": 7.581952500902658e-06, + "loss": 1.0457, + "step": 98199 + }, + { + "epoch": 1.18, + "grad_norm": 6.351429064318734, + "learning_rate": 7.5813856625170304e-06, + "loss": 1.473, + "step": 98202 + }, + { + "epoch": 1.18, + "grad_norm": 6.18094052010338, + "learning_rate": 7.580818832385257e-06, + "loss": 1.0522, + "step": 98205 + }, + { + "epoch": 1.18, + "grad_norm": 6.419832687262627, + "learning_rate": 7.580252010509281e-06, + "loss": 0.9079, + "step": 98208 + }, + { + "epoch": 1.18, + "grad_norm": 2.275298048985323, + "learning_rate": 7.579685196891031e-06, + "loss": 1.2919, + "step": 98211 + }, + { + "epoch": 1.18, + "grad_norm": 11.785525782633037, + "learning_rate": 7.579118391532443e-06, + "loss": 1.4123, + "step": 98214 + }, + { + "epoch": 1.18, + "grad_norm": 5.751242363256058, + "learning_rate": 7.578551594435456e-06, + "loss": 1.332, + "step": 98217 + }, + { + "epoch": 1.18, + "grad_norm": 8.654363096081216, + "learning_rate": 7.577984805601999e-06, + "loss": 1.4295, + "step": 98220 + }, + { + "epoch": 1.18, + "grad_norm": 8.361620016642432, + "learning_rate": 7.577418025034004e-06, + "loss": 1.139, + "step": 98223 + }, + { + "epoch": 1.18, + "grad_norm": 20.63419234934428, + "learning_rate": 7.5768512527334094e-06, + "loss": 1.3885, + "step": 98226 + }, + { + "epoch": 1.18, + "grad_norm": 9.42595142024936, + "learning_rate": 7.5762844887021525e-06, + "loss": 1.2616, + "step": 98229 + }, + { + "epoch": 1.18, + "grad_norm": 13.366870630097967, + "learning_rate": 7.575717732942163e-06, + "loss": 0.8664, + "step": 98232 + }, + { + "epoch": 1.18, + "grad_norm": 9.05804829335151, + "learning_rate": 7.575150985455372e-06, + "loss": 0.9657, + "step": 98235 + }, + { + "epoch": 1.18, + "grad_norm": 14.357761465982554, + "learning_rate": 7.5745842462437194e-06, + "loss": 1.3146, + "step": 98238 + }, + { + "epoch": 1.18, + "grad_norm": 4.775977765929045, + "learning_rate": 7.574017515309136e-06, + "loss": 1.6832, + "step": 98241 + }, + { + "epoch": 1.18, + "grad_norm": 23.33096350474986, + "learning_rate": 7.57345079265356e-06, + "loss": 1.4768, + "step": 98244 + }, + { + "epoch": 1.18, + "grad_norm": 5.960523162488028, + "learning_rate": 7.5728840782789195e-06, + "loss": 1.371, + "step": 98247 + }, + { + "epoch": 1.18, + "grad_norm": 6.102782929832441, + "learning_rate": 7.572317372187152e-06, + "loss": 1.359, + "step": 98250 + }, + { + "epoch": 1.18, + "grad_norm": 23.361399074276534, + "learning_rate": 7.5717506743801896e-06, + "loss": 1.3396, + "step": 98253 + }, + { + "epoch": 1.18, + "grad_norm": 3.4817280674293545, + "learning_rate": 7.571183984859972e-06, + "loss": 0.9265, + "step": 98256 + }, + { + "epoch": 1.18, + "grad_norm": 8.189022754037573, + "learning_rate": 7.5706173036284245e-06, + "loss": 0.926, + "step": 98259 + }, + { + "epoch": 1.18, + "grad_norm": 6.328643500307459, + "learning_rate": 7.570050630687486e-06, + "loss": 1.0616, + "step": 98262 + }, + { + "epoch": 1.18, + "grad_norm": 16.015840141898565, + "learning_rate": 7.569483966039089e-06, + "loss": 1.1621, + "step": 98265 + }, + { + "epoch": 1.18, + "grad_norm": 14.831971614749097, + "learning_rate": 7.568917309685166e-06, + "loss": 1.2415, + "step": 98268 + }, + { + "epoch": 1.18, + "grad_norm": 5.861221850221625, + "learning_rate": 7.568350661627658e-06, + "loss": 1.1151, + "step": 98271 + }, + { + "epoch": 1.18, + "grad_norm": 5.089118403899847, + "learning_rate": 7.567784021868491e-06, + "loss": 0.966, + "step": 98274 + }, + { + "epoch": 1.18, + "grad_norm": 11.381022493177719, + "learning_rate": 7.567217390409599e-06, + "loss": 1.1766, + "step": 98277 + }, + { + "epoch": 1.18, + "grad_norm": 10.676327321854032, + "learning_rate": 7.566650767252919e-06, + "loss": 1.3129, + "step": 98280 + }, + { + "epoch": 1.18, + "grad_norm": 6.053078963454853, + "learning_rate": 7.566084152400384e-06, + "loss": 1.2717, + "step": 98283 + }, + { + "epoch": 1.18, + "grad_norm": 8.146262648784656, + "learning_rate": 7.565517545853927e-06, + "loss": 1.572, + "step": 98286 + }, + { + "epoch": 1.18, + "grad_norm": 7.40302710753483, + "learning_rate": 7.56495094761548e-06, + "loss": 1.1732, + "step": 98289 + }, + { + "epoch": 1.18, + "grad_norm": 13.363259704291789, + "learning_rate": 7.56438435768698e-06, + "loss": 1.0856, + "step": 98292 + }, + { + "epoch": 1.18, + "grad_norm": 7.771612153277555, + "learning_rate": 7.5638177760703575e-06, + "loss": 1.4362, + "step": 98295 + }, + { + "epoch": 1.18, + "grad_norm": 5.709031996074695, + "learning_rate": 7.563251202767552e-06, + "loss": 1.3552, + "step": 98298 + }, + { + "epoch": 1.18, + "grad_norm": 9.88060062489101, + "learning_rate": 7.562684637780486e-06, + "loss": 1.4786, + "step": 98301 + }, + { + "epoch": 1.18, + "grad_norm": 9.343408921280941, + "learning_rate": 7.562118081111104e-06, + "loss": 1.0199, + "step": 98304 + }, + { + "epoch": 1.18, + "grad_norm": 77.90693331242194, + "learning_rate": 7.561551532761331e-06, + "loss": 0.9847, + "step": 98307 + }, + { + "epoch": 1.18, + "grad_norm": 5.5573088979061955, + "learning_rate": 7.5609849927331094e-06, + "loss": 0.9541, + "step": 98310 + }, + { + "epoch": 1.18, + "grad_norm": 7.906052062488325, + "learning_rate": 7.5604184610283625e-06, + "loss": 1.1781, + "step": 98313 + }, + { + "epoch": 1.18, + "grad_norm": 8.012221951942625, + "learning_rate": 7.559851937649032e-06, + "loss": 1.034, + "step": 98316 + }, + { + "epoch": 1.18, + "grad_norm": 13.212693510143518, + "learning_rate": 7.559285422597044e-06, + "loss": 1.2239, + "step": 98319 + }, + { + "epoch": 1.18, + "grad_norm": 7.175703461802143, + "learning_rate": 7.558718915874339e-06, + "loss": 1.2306, + "step": 98322 + }, + { + "epoch": 1.18, + "grad_norm": 5.245661136538084, + "learning_rate": 7.558152417482848e-06, + "loss": 1.078, + "step": 98325 + }, + { + "epoch": 1.18, + "grad_norm": 18.571957428866078, + "learning_rate": 7.557585927424502e-06, + "loss": 1.3025, + "step": 98328 + }, + { + "epoch": 1.18, + "grad_norm": 8.666841930920782, + "learning_rate": 7.557019445701234e-06, + "loss": 1.0479, + "step": 98331 + }, + { + "epoch": 1.18, + "grad_norm": 13.391824121721287, + "learning_rate": 7.556452972314979e-06, + "loss": 1.2538, + "step": 98334 + }, + { + "epoch": 1.18, + "grad_norm": 2.9969999447457636, + "learning_rate": 7.555886507267674e-06, + "loss": 0.8481, + "step": 98337 + }, + { + "epoch": 1.18, + "grad_norm": 13.015288963422238, + "learning_rate": 7.555320050561243e-06, + "loss": 1.144, + "step": 98340 + }, + { + "epoch": 1.18, + "grad_norm": 10.760710794803261, + "learning_rate": 7.554753602197624e-06, + "loss": 1.204, + "step": 98343 + }, + { + "epoch": 1.18, + "grad_norm": 8.315807727188908, + "learning_rate": 7.554187162178752e-06, + "loss": 1.5367, + "step": 98346 + }, + { + "epoch": 1.18, + "grad_norm": 8.599945428736163, + "learning_rate": 7.553620730506557e-06, + "loss": 1.166, + "step": 98349 + }, + { + "epoch": 1.18, + "grad_norm": 6.077128241509708, + "learning_rate": 7.553054307182976e-06, + "loss": 1.6283, + "step": 98352 + }, + { + "epoch": 1.18, + "grad_norm": 2.6804800042099335, + "learning_rate": 7.552487892209936e-06, + "loss": 1.2064, + "step": 98355 + }, + { + "epoch": 1.18, + "grad_norm": 44.313821324444184, + "learning_rate": 7.551921485589376e-06, + "loss": 1.4128, + "step": 98358 + }, + { + "epoch": 1.18, + "grad_norm": 20.649475609910862, + "learning_rate": 7.551355087323224e-06, + "loss": 1.0695, + "step": 98361 + }, + { + "epoch": 1.18, + "grad_norm": 4.804763807797207, + "learning_rate": 7.550788697413418e-06, + "loss": 1.176, + "step": 98364 + }, + { + "epoch": 1.18, + "grad_norm": 12.006988213725078, + "learning_rate": 7.550222315861885e-06, + "loss": 0.9559, + "step": 98367 + }, + { + "epoch": 1.18, + "grad_norm": 17.03439023906074, + "learning_rate": 7.5496559426705625e-06, + "loss": 1.1695, + "step": 98370 + }, + { + "epoch": 1.18, + "grad_norm": 5.3342646456788385, + "learning_rate": 7.54908957784138e-06, + "loss": 0.9531, + "step": 98373 + }, + { + "epoch": 1.18, + "grad_norm": 16.09666866501474, + "learning_rate": 7.548523221376274e-06, + "loss": 1.3668, + "step": 98376 + }, + { + "epoch": 1.18, + "grad_norm": 5.482452661587724, + "learning_rate": 7.5479568732771756e-06, + "loss": 1.0496, + "step": 98379 + }, + { + "epoch": 1.18, + "grad_norm": 12.398279188798794, + "learning_rate": 7.5473905335460154e-06, + "loss": 1.4196, + "step": 98382 + }, + { + "epoch": 1.18, + "grad_norm": 9.033782995049373, + "learning_rate": 7.546824202184726e-06, + "loss": 0.9394, + "step": 98385 + }, + { + "epoch": 1.18, + "grad_norm": 4.776623536262188, + "learning_rate": 7.546257879195245e-06, + "loss": 1.2852, + "step": 98388 + }, + { + "epoch": 1.18, + "grad_norm": 8.98958099171367, + "learning_rate": 7.5456915645795034e-06, + "loss": 1.4097, + "step": 98391 + }, + { + "epoch": 1.18, + "grad_norm": 6.199038775887947, + "learning_rate": 7.54512525833943e-06, + "loss": 0.9775, + "step": 98394 + }, + { + "epoch": 1.18, + "grad_norm": 24.533902625913704, + "learning_rate": 7.544558960476957e-06, + "loss": 1.1757, + "step": 98397 + }, + { + "epoch": 1.18, + "grad_norm": 15.250737219610745, + "learning_rate": 7.5439926709940245e-06, + "loss": 1.3612, + "step": 98400 + }, + { + "epoch": 1.18, + "grad_norm": 8.094262845086375, + "learning_rate": 7.543426389892557e-06, + "loss": 1.0944, + "step": 98403 + }, + { + "epoch": 1.18, + "grad_norm": 15.613863009897146, + "learning_rate": 7.542860117174495e-06, + "loss": 1.277, + "step": 98406 + }, + { + "epoch": 1.18, + "grad_norm": 11.189065667169086, + "learning_rate": 7.542293852841762e-06, + "loss": 1.1564, + "step": 98409 + }, + { + "epoch": 1.18, + "grad_norm": 7.454047168312598, + "learning_rate": 7.541727596896298e-06, + "loss": 1.1059, + "step": 98412 + }, + { + "epoch": 1.18, + "grad_norm": 8.521703314917366, + "learning_rate": 7.541161349340028e-06, + "loss": 1.2605, + "step": 98415 + }, + { + "epoch": 1.18, + "grad_norm": 19.475959929042126, + "learning_rate": 7.540595110174894e-06, + "loss": 1.0079, + "step": 98418 + }, + { + "epoch": 1.18, + "grad_norm": 8.903649422181395, + "learning_rate": 7.540028879402821e-06, + "loss": 1.1618, + "step": 98421 + }, + { + "epoch": 1.18, + "grad_norm": 12.544065898387158, + "learning_rate": 7.539462657025741e-06, + "loss": 1.1765, + "step": 98424 + }, + { + "epoch": 1.18, + "grad_norm": 35.67661128856333, + "learning_rate": 7.538896443045589e-06, + "loss": 1.1824, + "step": 98427 + }, + { + "epoch": 1.18, + "grad_norm": 11.778202990141375, + "learning_rate": 7.5383302374643e-06, + "loss": 1.5245, + "step": 98430 + }, + { + "epoch": 1.18, + "grad_norm": 6.420688785242526, + "learning_rate": 7.5377640402838035e-06, + "loss": 1.2471, + "step": 98433 + }, + { + "epoch": 1.18, + "grad_norm": 4.730160356232949, + "learning_rate": 7.537197851506029e-06, + "loss": 1.1156, + "step": 98436 + }, + { + "epoch": 1.18, + "grad_norm": 6.554380198036362, + "learning_rate": 7.536631671132909e-06, + "loss": 1.4836, + "step": 98439 + }, + { + "epoch": 1.18, + "grad_norm": 5.8803811900055445, + "learning_rate": 7.536065499166381e-06, + "loss": 1.3639, + "step": 98442 + }, + { + "epoch": 1.18, + "grad_norm": 5.4409781065479805, + "learning_rate": 7.535499335608376e-06, + "loss": 1.2008, + "step": 98445 + }, + { + "epoch": 1.18, + "grad_norm": 18.034127490585675, + "learning_rate": 7.534933180460819e-06, + "loss": 1.1677, + "step": 98448 + }, + { + "epoch": 1.18, + "grad_norm": 27.830929770084897, + "learning_rate": 7.534367033725649e-06, + "loss": 1.2919, + "step": 98451 + }, + { + "epoch": 1.18, + "grad_norm": 2.1194906367877384, + "learning_rate": 7.533800895404797e-06, + "loss": 1.3038, + "step": 98454 + }, + { + "epoch": 1.18, + "grad_norm": 6.6829045745437545, + "learning_rate": 7.533234765500192e-06, + "loss": 1.2366, + "step": 98457 + }, + { + "epoch": 1.18, + "grad_norm": 10.252302005425177, + "learning_rate": 7.5326686440137745e-06, + "loss": 1.1703, + "step": 98460 + }, + { + "epoch": 1.18, + "grad_norm": 19.07594480846434, + "learning_rate": 7.532102530947464e-06, + "loss": 1.0314, + "step": 98463 + }, + { + "epoch": 1.18, + "grad_norm": 2.8720526596302367, + "learning_rate": 7.531536426303201e-06, + "loss": 1.4649, + "step": 98466 + }, + { + "epoch": 1.18, + "grad_norm": 10.266930543861314, + "learning_rate": 7.530970330082913e-06, + "loss": 1.3709, + "step": 98469 + }, + { + "epoch": 1.18, + "grad_norm": 3.621036947043638, + "learning_rate": 7.530404242288538e-06, + "loss": 1.2736, + "step": 98472 + }, + { + "epoch": 1.18, + "grad_norm": 9.408351696351144, + "learning_rate": 7.529838162922002e-06, + "loss": 1.1978, + "step": 98475 + }, + { + "epoch": 1.18, + "grad_norm": 3.9177806658478893, + "learning_rate": 7.529272091985235e-06, + "loss": 1.1385, + "step": 98478 + }, + { + "epoch": 1.18, + "grad_norm": 5.98023810351731, + "learning_rate": 7.528706029480175e-06, + "loss": 1.1634, + "step": 98481 + }, + { + "epoch": 1.18, + "grad_norm": 6.512643238611918, + "learning_rate": 7.528139975408752e-06, + "loss": 1.1982, + "step": 98484 + }, + { + "epoch": 1.18, + "grad_norm": 8.682655537753428, + "learning_rate": 7.527573929772899e-06, + "loss": 0.8804, + "step": 98487 + }, + { + "epoch": 1.18, + "grad_norm": 10.86036249609364, + "learning_rate": 7.52700789257454e-06, + "loss": 0.891, + "step": 98490 + }, + { + "epoch": 1.18, + "grad_norm": 71.54476815938033, + "learning_rate": 7.526441863815614e-06, + "loss": 1.4679, + "step": 98493 + }, + { + "epoch": 1.18, + "grad_norm": 9.257315618327521, + "learning_rate": 7.525875843498054e-06, + "loss": 1.0024, + "step": 98496 + }, + { + "epoch": 1.18, + "grad_norm": 11.078383099603366, + "learning_rate": 7.5253098316237885e-06, + "loss": 1.2746, + "step": 98499 + }, + { + "epoch": 1.18, + "grad_norm": 7.4717974605537, + "learning_rate": 7.524743828194746e-06, + "loss": 1.0431, + "step": 98502 + }, + { + "epoch": 1.18, + "grad_norm": 23.488667883406684, + "learning_rate": 7.524177833212861e-06, + "loss": 1.0233, + "step": 98505 + }, + { + "epoch": 1.18, + "grad_norm": 6.405315300604527, + "learning_rate": 7.523611846680068e-06, + "loss": 1.163, + "step": 98508 + }, + { + "epoch": 1.18, + "grad_norm": 6.277635835428639, + "learning_rate": 7.523045868598293e-06, + "loss": 1.2601, + "step": 98511 + }, + { + "epoch": 1.18, + "grad_norm": 7.124493745316654, + "learning_rate": 7.522479898969475e-06, + "loss": 1.2121, + "step": 98514 + }, + { + "epoch": 1.18, + "grad_norm": 13.010261234920655, + "learning_rate": 7.5219139377955376e-06, + "loss": 0.9643, + "step": 98517 + }, + { + "epoch": 1.18, + "grad_norm": 15.54618371197408, + "learning_rate": 7.521347985078414e-06, + "loss": 1.3361, + "step": 98520 + }, + { + "epoch": 1.18, + "grad_norm": 3.3342222502312304, + "learning_rate": 7.520782040820037e-06, + "loss": 0.8268, + "step": 98523 + }, + { + "epoch": 1.18, + "grad_norm": 9.475414226027961, + "learning_rate": 7.520216105022342e-06, + "loss": 1.2916, + "step": 98526 + }, + { + "epoch": 1.18, + "grad_norm": 9.451300721333572, + "learning_rate": 7.519650177687255e-06, + "loss": 1.0997, + "step": 98529 + }, + { + "epoch": 1.18, + "grad_norm": 9.171603700261072, + "learning_rate": 7.519084258816705e-06, + "loss": 1.1248, + "step": 98532 + }, + { + "epoch": 1.18, + "grad_norm": 4.069793541197175, + "learning_rate": 7.518518348412626e-06, + "loss": 1.0797, + "step": 98535 + }, + { + "epoch": 1.18, + "grad_norm": 18.73289557620144, + "learning_rate": 7.517952446476954e-06, + "loss": 0.9874, + "step": 98538 + }, + { + "epoch": 1.18, + "grad_norm": 14.147788603302919, + "learning_rate": 7.517386553011617e-06, + "loss": 0.7836, + "step": 98541 + }, + { + "epoch": 1.18, + "grad_norm": 9.445635103823877, + "learning_rate": 7.516820668018543e-06, + "loss": 1.3656, + "step": 98544 + }, + { + "epoch": 1.18, + "grad_norm": 7.9048589399909455, + "learning_rate": 7.516254791499664e-06, + "loss": 1.4423, + "step": 98547 + }, + { + "epoch": 1.19, + "grad_norm": 16.325701840733338, + "learning_rate": 7.5156889234569155e-06, + "loss": 1.3508, + "step": 98550 + }, + { + "epoch": 1.19, + "grad_norm": 7.022740366248589, + "learning_rate": 7.515123063892228e-06, + "loss": 1.5724, + "step": 98553 + }, + { + "epoch": 1.19, + "grad_norm": 9.501930462671758, + "learning_rate": 7.514557212807526e-06, + "loss": 1.2563, + "step": 98556 + }, + { + "epoch": 1.19, + "grad_norm": 4.1692838494658515, + "learning_rate": 7.5139913702047476e-06, + "loss": 1.0655, + "step": 98559 + }, + { + "epoch": 1.19, + "grad_norm": 8.355489200476947, + "learning_rate": 7.513425536085819e-06, + "loss": 0.9885, + "step": 98562 + }, + { + "epoch": 1.19, + "grad_norm": 14.369878786337043, + "learning_rate": 7.5128597104526725e-06, + "loss": 0.9144, + "step": 98565 + }, + { + "epoch": 1.19, + "grad_norm": 24.539259388678598, + "learning_rate": 7.512293893307245e-06, + "loss": 1.0635, + "step": 98568 + }, + { + "epoch": 1.19, + "grad_norm": 6.9709529751155666, + "learning_rate": 7.511728084651459e-06, + "loss": 1.0636, + "step": 98571 + }, + { + "epoch": 1.19, + "grad_norm": 13.834553388620032, + "learning_rate": 7.511162284487249e-06, + "loss": 0.9669, + "step": 98574 + }, + { + "epoch": 1.19, + "grad_norm": 4.481958674200597, + "learning_rate": 7.510596492816544e-06, + "loss": 1.2213, + "step": 98577 + }, + { + "epoch": 1.19, + "grad_norm": 11.480181093368692, + "learning_rate": 7.510030709641281e-06, + "loss": 1.3333, + "step": 98580 + }, + { + "epoch": 1.19, + "grad_norm": 10.729667663150467, + "learning_rate": 7.509464934963384e-06, + "loss": 1.1953, + "step": 98583 + }, + { + "epoch": 1.19, + "grad_norm": 4.1765003300199215, + "learning_rate": 7.508899168784784e-06, + "loss": 1.224, + "step": 98586 + }, + { + "epoch": 1.19, + "grad_norm": 9.934162211598556, + "learning_rate": 7.508333411107415e-06, + "loss": 1.2259, + "step": 98589 + }, + { + "epoch": 1.19, + "grad_norm": 11.762224985597907, + "learning_rate": 7.507767661933207e-06, + "loss": 1.3468, + "step": 98592 + }, + { + "epoch": 1.19, + "grad_norm": 4.158043820617115, + "learning_rate": 7.507201921264093e-06, + "loss": 1.1806, + "step": 98595 + }, + { + "epoch": 1.19, + "grad_norm": 7.904321763956366, + "learning_rate": 7.506636189101998e-06, + "loss": 1.4198, + "step": 98598 + }, + { + "epoch": 1.19, + "grad_norm": 23.158394446582612, + "learning_rate": 7.506070465448856e-06, + "loss": 1.3967, + "step": 98601 + }, + { + "epoch": 1.19, + "grad_norm": 9.247893461330415, + "learning_rate": 7.5055047503065955e-06, + "loss": 1.2659, + "step": 98604 + }, + { + "epoch": 1.19, + "grad_norm": 5.404045204297207, + "learning_rate": 7.504939043677153e-06, + "loss": 1.0652, + "step": 98607 + }, + { + "epoch": 1.19, + "grad_norm": 9.118215872174884, + "learning_rate": 7.504373345562451e-06, + "loss": 1.4025, + "step": 98610 + }, + { + "epoch": 1.19, + "grad_norm": 9.407052382213202, + "learning_rate": 7.503807655964425e-06, + "loss": 1.0234, + "step": 98613 + }, + { + "epoch": 1.19, + "grad_norm": 9.381992757248897, + "learning_rate": 7.503241974885004e-06, + "loss": 1.1297, + "step": 98616 + }, + { + "epoch": 1.19, + "grad_norm": 10.03125180339112, + "learning_rate": 7.502676302326117e-06, + "loss": 1.0708, + "step": 98619 + }, + { + "epoch": 1.19, + "grad_norm": 8.275388737216646, + "learning_rate": 7.502110638289701e-06, + "loss": 1.2082, + "step": 98622 + }, + { + "epoch": 1.19, + "grad_norm": 33.51227890395093, + "learning_rate": 7.501544982777681e-06, + "loss": 1.114, + "step": 98625 + }, + { + "epoch": 1.19, + "grad_norm": 6.87806223604677, + "learning_rate": 7.500979335791985e-06, + "loss": 0.8943, + "step": 98628 + }, + { + "epoch": 1.19, + "grad_norm": 17.80341660184497, + "learning_rate": 7.500413697334545e-06, + "loss": 0.961, + "step": 98631 + }, + { + "epoch": 1.19, + "grad_norm": 12.368002687696109, + "learning_rate": 7.499848067407298e-06, + "loss": 0.9372, + "step": 98634 + }, + { + "epoch": 1.19, + "grad_norm": 16.34539372910003, + "learning_rate": 7.499282446012167e-06, + "loss": 1.2362, + "step": 98637 + }, + { + "epoch": 1.19, + "grad_norm": 9.764986195488113, + "learning_rate": 7.498716833151083e-06, + "loss": 1.0807, + "step": 98640 + }, + { + "epoch": 1.19, + "grad_norm": 18.004932377569975, + "learning_rate": 7.498151228825976e-06, + "loss": 1.3201, + "step": 98643 + }, + { + "epoch": 1.19, + "grad_norm": 7.553046423635492, + "learning_rate": 7.49758563303878e-06, + "loss": 1.1454, + "step": 98646 + }, + { + "epoch": 1.19, + "grad_norm": 23.604574709946032, + "learning_rate": 7.497020045791425e-06, + "loss": 1.3028, + "step": 98649 + }, + { + "epoch": 1.19, + "grad_norm": 19.049598287055325, + "learning_rate": 7.496454467085836e-06, + "loss": 0.9115, + "step": 98652 + }, + { + "epoch": 1.19, + "grad_norm": 10.134677857239401, + "learning_rate": 7.495888896923946e-06, + "loss": 1.3391, + "step": 98655 + }, + { + "epoch": 1.19, + "grad_norm": 23.102824756841322, + "learning_rate": 7.495323335307685e-06, + "loss": 1.0986, + "step": 98658 + }, + { + "epoch": 1.19, + "grad_norm": 15.14941167441997, + "learning_rate": 7.494757782238987e-06, + "loss": 0.9358, + "step": 98661 + }, + { + "epoch": 1.19, + "grad_norm": 9.678274054198864, + "learning_rate": 7.4941922377197754e-06, + "loss": 1.3564, + "step": 98664 + }, + { + "epoch": 1.19, + "grad_norm": 6.489151578694178, + "learning_rate": 7.493626701751982e-06, + "loss": 1.1883, + "step": 98667 + }, + { + "epoch": 1.19, + "grad_norm": 8.690258659656157, + "learning_rate": 7.4930611743375395e-06, + "loss": 1.0686, + "step": 98670 + }, + { + "epoch": 1.19, + "grad_norm": 5.9699600307872736, + "learning_rate": 7.4924956554783736e-06, + "loss": 1.1114, + "step": 98673 + }, + { + "epoch": 1.19, + "grad_norm": 8.784689151865203, + "learning_rate": 7.4919301451764215e-06, + "loss": 1.3002, + "step": 98676 + }, + { + "epoch": 1.19, + "grad_norm": 2.7067578351495554, + "learning_rate": 7.491364643433608e-06, + "loss": 1.4929, + "step": 98679 + }, + { + "epoch": 1.19, + "grad_norm": 4.816786005120998, + "learning_rate": 7.4907991502518585e-06, + "loss": 0.9726, + "step": 98682 + }, + { + "epoch": 1.19, + "grad_norm": 2.7356169570029643, + "learning_rate": 7.490233665633108e-06, + "loss": 1.6175, + "step": 98685 + }, + { + "epoch": 1.19, + "grad_norm": 15.171574266423553, + "learning_rate": 7.489668189579292e-06, + "loss": 1.3772, + "step": 98688 + }, + { + "epoch": 1.19, + "grad_norm": 10.662887324429136, + "learning_rate": 7.489102722092332e-06, + "loss": 1.3977, + "step": 98691 + }, + { + "epoch": 1.19, + "grad_norm": 13.130174393854634, + "learning_rate": 7.488537263174156e-06, + "loss": 1.0307, + "step": 98694 + }, + { + "epoch": 1.19, + "grad_norm": 5.0479905567017544, + "learning_rate": 7.487971812826702e-06, + "loss": 0.8516, + "step": 98697 + }, + { + "epoch": 1.19, + "grad_norm": 6.20016276813425, + "learning_rate": 7.487406371051893e-06, + "loss": 0.9437, + "step": 98700 + }, + { + "epoch": 1.19, + "grad_norm": 12.962082067895766, + "learning_rate": 7.4868409378516635e-06, + "loss": 1.1749, + "step": 98703 + }, + { + "epoch": 1.19, + "grad_norm": 10.522095132867934, + "learning_rate": 7.4862755132279385e-06, + "loss": 1.1276, + "step": 98706 + }, + { + "epoch": 1.19, + "grad_norm": 8.593891736427592, + "learning_rate": 7.48571009718265e-06, + "loss": 1.0396, + "step": 98709 + }, + { + "epoch": 1.19, + "grad_norm": 5.627422797271525, + "learning_rate": 7.485144689717726e-06, + "loss": 1.0551, + "step": 98712 + }, + { + "epoch": 1.19, + "grad_norm": 10.158604812617195, + "learning_rate": 7.484579290835101e-06, + "loss": 1.1417, + "step": 98715 + }, + { + "epoch": 1.19, + "grad_norm": 6.900824481164165, + "learning_rate": 7.484013900536698e-06, + "loss": 1.2808, + "step": 98718 + }, + { + "epoch": 1.19, + "grad_norm": 3.887803050013755, + "learning_rate": 7.48344851882445e-06, + "loss": 1.4247, + "step": 98721 + }, + { + "epoch": 1.19, + "grad_norm": 5.761635867138403, + "learning_rate": 7.482883145700285e-06, + "loss": 1.2519, + "step": 98724 + }, + { + "epoch": 1.19, + "grad_norm": 21.129727324606456, + "learning_rate": 7.4823177811661315e-06, + "loss": 1.453, + "step": 98727 + }, + { + "epoch": 1.19, + "grad_norm": 7.049082992382218, + "learning_rate": 7.481752425223927e-06, + "loss": 1.2261, + "step": 98730 + }, + { + "epoch": 1.19, + "grad_norm": 9.867099737050532, + "learning_rate": 7.48118707787559e-06, + "loss": 1.598, + "step": 98733 + }, + { + "epoch": 1.19, + "grad_norm": 36.38449606360568, + "learning_rate": 7.480621739123053e-06, + "loss": 1.0841, + "step": 98736 + }, + { + "epoch": 1.19, + "grad_norm": 5.009739937022775, + "learning_rate": 7.480056408968249e-06, + "loss": 1.3022, + "step": 98739 + }, + { + "epoch": 1.19, + "grad_norm": 23.24236491080458, + "learning_rate": 7.479491087413104e-06, + "loss": 0.8834, + "step": 98742 + }, + { + "epoch": 1.19, + "grad_norm": 11.306423844826002, + "learning_rate": 7.478925774459549e-06, + "loss": 1.4146, + "step": 98745 + }, + { + "epoch": 1.19, + "grad_norm": 17.51640782560647, + "learning_rate": 7.478360470109509e-06, + "loss": 1.2146, + "step": 98748 + }, + { + "epoch": 1.19, + "grad_norm": 7.875571994495166, + "learning_rate": 7.47779517436492e-06, + "loss": 0.9621, + "step": 98751 + }, + { + "epoch": 1.19, + "grad_norm": 6.024925309682602, + "learning_rate": 7.477229887227705e-06, + "loss": 1.1249, + "step": 98754 + }, + { + "epoch": 1.19, + "grad_norm": 21.219881139179407, + "learning_rate": 7.4766646086997994e-06, + "loss": 1.3982, + "step": 98757 + }, + { + "epoch": 1.19, + "grad_norm": 20.053756010127053, + "learning_rate": 7.4760993387831245e-06, + "loss": 1.2427, + "step": 98760 + }, + { + "epoch": 1.19, + "grad_norm": 5.108963165479232, + "learning_rate": 7.475534077479616e-06, + "loss": 1.283, + "step": 98763 + }, + { + "epoch": 1.19, + "grad_norm": 9.574232474055956, + "learning_rate": 7.474968824791197e-06, + "loss": 1.53, + "step": 98766 + }, + { + "epoch": 1.19, + "grad_norm": 10.348314202808796, + "learning_rate": 7.474403580719806e-06, + "loss": 0.8114, + "step": 98769 + }, + { + "epoch": 1.19, + "grad_norm": 5.521683156272356, + "learning_rate": 7.47383834526736e-06, + "loss": 1.1404, + "step": 98772 + }, + { + "epoch": 1.19, + "grad_norm": 34.35059067823658, + "learning_rate": 7.473273118435796e-06, + "loss": 0.9853, + "step": 98775 + }, + { + "epoch": 1.19, + "grad_norm": 2.6164087976008115, + "learning_rate": 7.4727079002270395e-06, + "loss": 1.2009, + "step": 98778 + }, + { + "epoch": 1.19, + "grad_norm": 9.68979426849962, + "learning_rate": 7.472142690643023e-06, + "loss": 1.4019, + "step": 98781 + }, + { + "epoch": 1.19, + "grad_norm": 29.572888960158906, + "learning_rate": 7.471577489685673e-06, + "loss": 1.3506, + "step": 98784 + }, + { + "epoch": 1.19, + "grad_norm": 10.582794257053557, + "learning_rate": 7.471012297356917e-06, + "loss": 1.4435, + "step": 98787 + }, + { + "epoch": 1.19, + "grad_norm": 2.964561043076222, + "learning_rate": 7.470447113658683e-06, + "loss": 1.3549, + "step": 98790 + }, + { + "epoch": 1.19, + "grad_norm": 10.494551253711636, + "learning_rate": 7.469881938592904e-06, + "loss": 1.3181, + "step": 98793 + }, + { + "epoch": 1.19, + "grad_norm": 5.9275937115899335, + "learning_rate": 7.46931677216151e-06, + "loss": 1.195, + "step": 98796 + }, + { + "epoch": 1.19, + "grad_norm": 16.61160315705927, + "learning_rate": 7.468751614366422e-06, + "loss": 1.1932, + "step": 98799 + }, + { + "epoch": 1.19, + "grad_norm": 8.460079615566835, + "learning_rate": 7.468186465209572e-06, + "loss": 1.0888, + "step": 98802 + }, + { + "epoch": 1.19, + "grad_norm": 5.145905272457661, + "learning_rate": 7.4676213246928934e-06, + "loss": 1.2741, + "step": 98805 + }, + { + "epoch": 1.19, + "grad_norm": 10.17320781097036, + "learning_rate": 7.467056192818307e-06, + "loss": 1.2521, + "step": 98808 + }, + { + "epoch": 1.19, + "grad_norm": 6.446907793496297, + "learning_rate": 7.46649106958775e-06, + "loss": 1.416, + "step": 98811 + }, + { + "epoch": 1.19, + "grad_norm": 6.711471631673248, + "learning_rate": 7.465925955003143e-06, + "loss": 1.0269, + "step": 98814 + }, + { + "epoch": 1.19, + "grad_norm": 7.084979345615003, + "learning_rate": 7.465360849066419e-06, + "loss": 1.2124, + "step": 98817 + }, + { + "epoch": 1.19, + "grad_norm": 6.565796348481527, + "learning_rate": 7.4647957517795045e-06, + "loss": 1.3587, + "step": 98820 + }, + { + "epoch": 1.19, + "grad_norm": 4.242823764665511, + "learning_rate": 7.4642306631443314e-06, + "loss": 1.0793, + "step": 98823 + }, + { + "epoch": 1.19, + "grad_norm": 10.846512099553475, + "learning_rate": 7.463665583162825e-06, + "loss": 1.2828, + "step": 98826 + }, + { + "epoch": 1.19, + "grad_norm": 50.616208625387905, + "learning_rate": 7.463100511836914e-06, + "loss": 1.1021, + "step": 98829 + }, + { + "epoch": 1.19, + "grad_norm": 44.44356688864858, + "learning_rate": 7.462535449168524e-06, + "loss": 1.1297, + "step": 98832 + }, + { + "epoch": 1.19, + "grad_norm": 14.077263189847223, + "learning_rate": 7.4619703951595905e-06, + "loss": 1.1185, + "step": 98835 + }, + { + "epoch": 1.19, + "grad_norm": 7.166153237666228, + "learning_rate": 7.461405349812039e-06, + "loss": 1.2802, + "step": 98838 + }, + { + "epoch": 1.19, + "grad_norm": 6.826805317344639, + "learning_rate": 7.4608403131277925e-06, + "loss": 1.0175, + "step": 98841 + }, + { + "epoch": 1.19, + "grad_norm": 5.85853732608283, + "learning_rate": 7.460275285108783e-06, + "loss": 1.2353, + "step": 98844 + }, + { + "epoch": 1.19, + "grad_norm": 5.409493451809546, + "learning_rate": 7.459710265756943e-06, + "loss": 1.324, + "step": 98847 + }, + { + "epoch": 1.19, + "grad_norm": 27.865007007151902, + "learning_rate": 7.459145255074198e-06, + "loss": 1.5263, + "step": 98850 + }, + { + "epoch": 1.19, + "grad_norm": 15.102357769441996, + "learning_rate": 7.458580253062471e-06, + "loss": 1.5003, + "step": 98853 + }, + { + "epoch": 1.19, + "grad_norm": 5.843895904557891, + "learning_rate": 7.458015259723693e-06, + "loss": 1.2372, + "step": 98856 + }, + { + "epoch": 1.19, + "grad_norm": 18.001818928324745, + "learning_rate": 7.457450275059797e-06, + "loss": 0.9076, + "step": 98859 + }, + { + "epoch": 1.19, + "grad_norm": 11.747985629109163, + "learning_rate": 7.456885299072703e-06, + "loss": 1.2853, + "step": 98862 + }, + { + "epoch": 1.19, + "grad_norm": 8.616639716386938, + "learning_rate": 7.456320331764351e-06, + "loss": 1.2092, + "step": 98865 + }, + { + "epoch": 1.19, + "grad_norm": 8.629130468561023, + "learning_rate": 7.455755373136655e-06, + "loss": 1.0514, + "step": 98868 + }, + { + "epoch": 1.19, + "grad_norm": 4.778600142174983, + "learning_rate": 7.455190423191552e-06, + "loss": 1.1588, + "step": 98871 + }, + { + "epoch": 1.19, + "grad_norm": 21.61922076421169, + "learning_rate": 7.454625481930965e-06, + "loss": 1.3651, + "step": 98874 + }, + { + "epoch": 1.19, + "grad_norm": 6.664585443936309, + "learning_rate": 7.45406054935683e-06, + "loss": 0.8573, + "step": 98877 + }, + { + "epoch": 1.19, + "grad_norm": 18.719390785744736, + "learning_rate": 7.453495625471063e-06, + "loss": 1.2015, + "step": 98880 + }, + { + "epoch": 1.19, + "grad_norm": 4.775879837465358, + "learning_rate": 7.452930710275601e-06, + "loss": 0.945, + "step": 98883 + }, + { + "epoch": 1.19, + "grad_norm": 3.9627559790881612, + "learning_rate": 7.4523658037723676e-06, + "loss": 1.1188, + "step": 98886 + }, + { + "epoch": 1.19, + "grad_norm": 9.997532944217589, + "learning_rate": 7.451800905963293e-06, + "loss": 1.2945, + "step": 98889 + }, + { + "epoch": 1.19, + "grad_norm": 7.318739440749297, + "learning_rate": 7.451236016850308e-06, + "loss": 1.5384, + "step": 98892 + }, + { + "epoch": 1.19, + "grad_norm": 12.95432406907328, + "learning_rate": 7.450671136435331e-06, + "loss": 1.3893, + "step": 98895 + }, + { + "epoch": 1.19, + "grad_norm": 7.811563308245245, + "learning_rate": 7.450106264720296e-06, + "loss": 1.0715, + "step": 98898 + }, + { + "epoch": 1.19, + "grad_norm": 11.679040047376226, + "learning_rate": 7.449541401707131e-06, + "loss": 0.9212, + "step": 98901 + }, + { + "epoch": 1.19, + "grad_norm": 16.844020751930778, + "learning_rate": 7.448976547397765e-06, + "loss": 1.1647, + "step": 98904 + }, + { + "epoch": 1.19, + "grad_norm": 10.375674187228519, + "learning_rate": 7.4484117017941185e-06, + "loss": 1.114, + "step": 98907 + }, + { + "epoch": 1.19, + "grad_norm": 4.493821725637852, + "learning_rate": 7.447846864898125e-06, + "loss": 1.1875, + "step": 98910 + }, + { + "epoch": 1.19, + "grad_norm": 11.63039185146992, + "learning_rate": 7.447282036711711e-06, + "loss": 1.4481, + "step": 98913 + }, + { + "epoch": 1.19, + "grad_norm": 7.37343320516193, + "learning_rate": 7.446717217236803e-06, + "loss": 1.2292, + "step": 98916 + }, + { + "epoch": 1.19, + "grad_norm": 7.330568613562959, + "learning_rate": 7.446152406475334e-06, + "loss": 1.3243, + "step": 98919 + }, + { + "epoch": 1.19, + "grad_norm": 3.1151845856468223, + "learning_rate": 7.445587604429223e-06, + "loss": 0.973, + "step": 98922 + }, + { + "epoch": 1.19, + "grad_norm": 4.551978502669483, + "learning_rate": 7.445022811100402e-06, + "loss": 1.3168, + "step": 98925 + }, + { + "epoch": 1.19, + "grad_norm": 5.499970869535435, + "learning_rate": 7.444458026490798e-06, + "loss": 1.3126, + "step": 98928 + }, + { + "epoch": 1.19, + "grad_norm": 4.044890137450549, + "learning_rate": 7.443893250602341e-06, + "loss": 1.204, + "step": 98931 + }, + { + "epoch": 1.19, + "grad_norm": 11.802809955273544, + "learning_rate": 7.4433284834369534e-06, + "loss": 1.1771, + "step": 98934 + }, + { + "epoch": 1.19, + "grad_norm": 11.463492522830553, + "learning_rate": 7.4427637249965625e-06, + "loss": 1.4356, + "step": 98937 + }, + { + "epoch": 1.19, + "grad_norm": 3.299753013995924, + "learning_rate": 7.4421989752830994e-06, + "loss": 1.0954, + "step": 98940 + }, + { + "epoch": 1.19, + "grad_norm": 16.40444715628753, + "learning_rate": 7.44163423429849e-06, + "loss": 1.4744, + "step": 98943 + }, + { + "epoch": 1.19, + "grad_norm": 7.931076066834219, + "learning_rate": 7.441069502044664e-06, + "loss": 1.1459, + "step": 98946 + }, + { + "epoch": 1.19, + "grad_norm": 3.173650691691471, + "learning_rate": 7.440504778523543e-06, + "loss": 1.2907, + "step": 98949 + }, + { + "epoch": 1.19, + "grad_norm": 10.275257100259255, + "learning_rate": 7.439940063737057e-06, + "loss": 1.0773, + "step": 98952 + }, + { + "epoch": 1.19, + "grad_norm": 34.311859531736836, + "learning_rate": 7.439375357687134e-06, + "loss": 0.9075, + "step": 98955 + }, + { + "epoch": 1.19, + "grad_norm": 12.336894128445577, + "learning_rate": 7.438810660375704e-06, + "loss": 1.0189, + "step": 98958 + }, + { + "epoch": 1.19, + "grad_norm": 25.75889705619112, + "learning_rate": 7.438245971804687e-06, + "loss": 1.1761, + "step": 98961 + }, + { + "epoch": 1.19, + "grad_norm": 8.30102845975069, + "learning_rate": 7.437681291976012e-06, + "loss": 1.1607, + "step": 98964 + }, + { + "epoch": 1.19, + "grad_norm": 9.237404916574024, + "learning_rate": 7.4371166208916115e-06, + "loss": 1.2302, + "step": 98967 + }, + { + "epoch": 1.19, + "grad_norm": 12.049955015066736, + "learning_rate": 7.436551958553406e-06, + "loss": 1.1064, + "step": 98970 + }, + { + "epoch": 1.19, + "grad_norm": 6.0916593366168375, + "learning_rate": 7.43598730496333e-06, + "loss": 1.4914, + "step": 98973 + }, + { + "epoch": 1.19, + "grad_norm": 5.956088275533506, + "learning_rate": 7.435422660123304e-06, + "loss": 1.3413, + "step": 98976 + }, + { + "epoch": 1.19, + "grad_norm": 4.556328405671122, + "learning_rate": 7.434858024035253e-06, + "loss": 1.3093, + "step": 98979 + }, + { + "epoch": 1.19, + "grad_norm": 5.595698858058858, + "learning_rate": 7.434293396701109e-06, + "loss": 1.0516, + "step": 98982 + }, + { + "epoch": 1.19, + "grad_norm": 18.587260182302423, + "learning_rate": 7.433728778122801e-06, + "loss": 1.3224, + "step": 98985 + }, + { + "epoch": 1.19, + "grad_norm": 10.14308279328768, + "learning_rate": 7.433164168302249e-06, + "loss": 1.5061, + "step": 98988 + }, + { + "epoch": 1.19, + "grad_norm": 7.559386177424669, + "learning_rate": 7.432599567241383e-06, + "loss": 1.0809, + "step": 98991 + }, + { + "epoch": 1.19, + "grad_norm": 21.18651905064724, + "learning_rate": 7.43203497494213e-06, + "loss": 1.3354, + "step": 98994 + }, + { + "epoch": 1.19, + "grad_norm": 8.865085594003803, + "learning_rate": 7.431470391406418e-06, + "loss": 1.1925, + "step": 98997 + }, + { + "epoch": 1.19, + "grad_norm": 9.642460797298142, + "learning_rate": 7.4309058166361745e-06, + "loss": 1.1562, + "step": 99000 + }, + { + "epoch": 1.19, + "grad_norm": 33.53748303817405, + "learning_rate": 7.430341250633319e-06, + "loss": 1.5381, + "step": 99003 + }, + { + "epoch": 1.19, + "grad_norm": 7.031182494709887, + "learning_rate": 7.429776693399785e-06, + "loss": 1.2199, + "step": 99006 + }, + { + "epoch": 1.19, + "grad_norm": 16.948193002220165, + "learning_rate": 7.4292121449374986e-06, + "loss": 1.342, + "step": 99009 + }, + { + "epoch": 1.19, + "grad_norm": 4.267350640215074, + "learning_rate": 7.428647605248387e-06, + "loss": 1.2265, + "step": 99012 + }, + { + "epoch": 1.19, + "grad_norm": 11.808020548190262, + "learning_rate": 7.428083074334372e-06, + "loss": 1.4415, + "step": 99015 + }, + { + "epoch": 1.19, + "grad_norm": 6.8099957535918545, + "learning_rate": 7.427518552197383e-06, + "loss": 1.2697, + "step": 99018 + }, + { + "epoch": 1.19, + "grad_norm": 6.708436159895389, + "learning_rate": 7.426954038839347e-06, + "loss": 1.3739, + "step": 99021 + }, + { + "epoch": 1.19, + "grad_norm": 12.20403596758964, + "learning_rate": 7.426389534262188e-06, + "loss": 1.3862, + "step": 99024 + }, + { + "epoch": 1.19, + "grad_norm": 10.278713330437874, + "learning_rate": 7.42582503846784e-06, + "loss": 1.1011, + "step": 99027 + }, + { + "epoch": 1.19, + "grad_norm": 6.728915798658283, + "learning_rate": 7.425260551458221e-06, + "loss": 1.1588, + "step": 99030 + }, + { + "epoch": 1.19, + "grad_norm": 5.933918661765066, + "learning_rate": 7.424696073235258e-06, + "loss": 0.9605, + "step": 99033 + }, + { + "epoch": 1.19, + "grad_norm": 9.909166271093307, + "learning_rate": 7.42413160380088e-06, + "loss": 1.0597, + "step": 99036 + }, + { + "epoch": 1.19, + "grad_norm": 12.155040862912445, + "learning_rate": 7.423567143157017e-06, + "loss": 1.1872, + "step": 99039 + }, + { + "epoch": 1.19, + "grad_norm": 3.4749658895330673, + "learning_rate": 7.42300269130559e-06, + "loss": 1.1499, + "step": 99042 + }, + { + "epoch": 1.19, + "grad_norm": 10.946526267915184, + "learning_rate": 7.422438248248525e-06, + "loss": 1.2427, + "step": 99045 + }, + { + "epoch": 1.19, + "grad_norm": 11.49517176284716, + "learning_rate": 7.421873813987748e-06, + "loss": 1.1018, + "step": 99048 + }, + { + "epoch": 1.19, + "grad_norm": 12.705256169351864, + "learning_rate": 7.421309388525191e-06, + "loss": 1.0836, + "step": 99051 + }, + { + "epoch": 1.19, + "grad_norm": 6.646601347920979, + "learning_rate": 7.4207449718627765e-06, + "loss": 1.3221, + "step": 99054 + }, + { + "epoch": 1.19, + "grad_norm": 9.170139791033998, + "learning_rate": 7.4201805640024275e-06, + "loss": 1.2095, + "step": 99057 + }, + { + "epoch": 1.19, + "grad_norm": 3.2354133368776434, + "learning_rate": 7.419616164946072e-06, + "loss": 1.255, + "step": 99060 + }, + { + "epoch": 1.19, + "grad_norm": 8.643207217874677, + "learning_rate": 7.419051774695641e-06, + "loss": 1.2005, + "step": 99063 + }, + { + "epoch": 1.19, + "grad_norm": 45.75461415976953, + "learning_rate": 7.418487393253057e-06, + "loss": 1.3915, + "step": 99066 + }, + { + "epoch": 1.19, + "grad_norm": 8.158898436369505, + "learning_rate": 7.417923020620241e-06, + "loss": 1.3019, + "step": 99069 + }, + { + "epoch": 1.19, + "grad_norm": 8.125818697143934, + "learning_rate": 7.417358656799128e-06, + "loss": 0.9713, + "step": 99072 + }, + { + "epoch": 1.19, + "grad_norm": 21.20571187132744, + "learning_rate": 7.416794301791635e-06, + "loss": 1.2003, + "step": 99075 + }, + { + "epoch": 1.19, + "grad_norm": 5.254234970940699, + "learning_rate": 7.416229955599694e-06, + "loss": 0.8956, + "step": 99078 + }, + { + "epoch": 1.19, + "grad_norm": 7.794812043012604, + "learning_rate": 7.415665618225234e-06, + "loss": 1.164, + "step": 99081 + }, + { + "epoch": 1.19, + "grad_norm": 31.80607934783783, + "learning_rate": 7.415101289670173e-06, + "loss": 0.9613, + "step": 99084 + }, + { + "epoch": 1.19, + "grad_norm": 3.708915927426752, + "learning_rate": 7.414536969936439e-06, + "loss": 1.4643, + "step": 99087 + }, + { + "epoch": 1.19, + "grad_norm": 6.345196616029877, + "learning_rate": 7.413972659025959e-06, + "loss": 1.1455, + "step": 99090 + }, + { + "epoch": 1.19, + "grad_norm": 14.894647866827896, + "learning_rate": 7.413408356940663e-06, + "loss": 1.2866, + "step": 99093 + }, + { + "epoch": 1.19, + "grad_norm": 22.550495581350784, + "learning_rate": 7.4128440636824695e-06, + "loss": 1.2907, + "step": 99096 + }, + { + "epoch": 1.19, + "grad_norm": 8.401036661691196, + "learning_rate": 7.412279779253307e-06, + "loss": 1.0222, + "step": 99099 + }, + { + "epoch": 1.19, + "grad_norm": 4.081867206602174, + "learning_rate": 7.4117155036551e-06, + "loss": 1.3631, + "step": 99102 + }, + { + "epoch": 1.19, + "grad_norm": 18.321551426578363, + "learning_rate": 7.41115123688978e-06, + "loss": 1.2452, + "step": 99105 + }, + { + "epoch": 1.19, + "grad_norm": 22.621633783464436, + "learning_rate": 7.410586978959268e-06, + "loss": 1.0922, + "step": 99108 + }, + { + "epoch": 1.19, + "grad_norm": 13.095914490950369, + "learning_rate": 7.410022729865487e-06, + "loss": 1.4371, + "step": 99111 + }, + { + "epoch": 1.19, + "grad_norm": 5.745311859421275, + "learning_rate": 7.4094584896103665e-06, + "loss": 1.0065, + "step": 99114 + }, + { + "epoch": 1.19, + "grad_norm": 35.009196480660144, + "learning_rate": 7.40889425819583e-06, + "loss": 1.2587, + "step": 99117 + }, + { + "epoch": 1.19, + "grad_norm": 17.711017773297513, + "learning_rate": 7.408330035623807e-06, + "loss": 1.0135, + "step": 99120 + }, + { + "epoch": 1.19, + "grad_norm": 13.479259113995179, + "learning_rate": 7.407765821896218e-06, + "loss": 1.1445, + "step": 99123 + }, + { + "epoch": 1.19, + "grad_norm": 8.691348401963063, + "learning_rate": 7.407201617014992e-06, + "loss": 1.4243, + "step": 99126 + }, + { + "epoch": 1.19, + "grad_norm": 3.864402862936312, + "learning_rate": 7.40663742098205e-06, + "loss": 0.97, + "step": 99129 + }, + { + "epoch": 1.19, + "grad_norm": 9.796221743701237, + "learning_rate": 7.40607323379932e-06, + "loss": 1.558, + "step": 99132 + }, + { + "epoch": 1.19, + "grad_norm": 3.176084408310324, + "learning_rate": 7.405509055468734e-06, + "loss": 0.9842, + "step": 99135 + }, + { + "epoch": 1.19, + "grad_norm": 8.191424773348787, + "learning_rate": 7.404944885992207e-06, + "loss": 1.3601, + "step": 99138 + }, + { + "epoch": 1.19, + "grad_norm": 4.389904427804221, + "learning_rate": 7.404380725371667e-06, + "loss": 1.1548, + "step": 99141 + }, + { + "epoch": 1.19, + "grad_norm": 8.710316442577657, + "learning_rate": 7.40381657360904e-06, + "loss": 0.9976, + "step": 99144 + }, + { + "epoch": 1.19, + "grad_norm": 3.860757583799337, + "learning_rate": 7.403252430706257e-06, + "loss": 1.2363, + "step": 99147 + }, + { + "epoch": 1.19, + "grad_norm": 10.871951952826965, + "learning_rate": 7.4026882966652345e-06, + "loss": 1.1203, + "step": 99150 + }, + { + "epoch": 1.19, + "grad_norm": 10.815829100896071, + "learning_rate": 7.402124171487901e-06, + "loss": 1.2252, + "step": 99153 + }, + { + "epoch": 1.19, + "grad_norm": 15.204611109513896, + "learning_rate": 7.401560055176184e-06, + "loss": 1.0441, + "step": 99156 + }, + { + "epoch": 1.19, + "grad_norm": 11.181256143117913, + "learning_rate": 7.400995947732004e-06, + "loss": 1.1982, + "step": 99159 + }, + { + "epoch": 1.19, + "grad_norm": 9.087946102456426, + "learning_rate": 7.4004318491572925e-06, + "loss": 1.0019, + "step": 99162 + }, + { + "epoch": 1.19, + "grad_norm": 8.301550148557086, + "learning_rate": 7.399867759453967e-06, + "loss": 1.0419, + "step": 99165 + }, + { + "epoch": 1.19, + "grad_norm": 22.55277097921266, + "learning_rate": 7.399303678623957e-06, + "loss": 1.2019, + "step": 99168 + }, + { + "epoch": 1.19, + "grad_norm": 3.7746918213908485, + "learning_rate": 7.398739606669186e-06, + "loss": 1.1503, + "step": 99171 + }, + { + "epoch": 1.19, + "grad_norm": 7.732298801758708, + "learning_rate": 7.3981755435915835e-06, + "loss": 1.3066, + "step": 99174 + }, + { + "epoch": 1.19, + "grad_norm": 5.820654194350502, + "learning_rate": 7.397611489393067e-06, + "loss": 1.1079, + "step": 99177 + }, + { + "epoch": 1.19, + "grad_norm": 17.62110712648736, + "learning_rate": 7.397047444075567e-06, + "loss": 1.2479, + "step": 99180 + }, + { + "epoch": 1.19, + "grad_norm": 18.111030002914738, + "learning_rate": 7.396483407641004e-06, + "loss": 0.6925, + "step": 99183 + }, + { + "epoch": 1.19, + "grad_norm": 13.993575517743384, + "learning_rate": 7.395919380091306e-06, + "loss": 1.1942, + "step": 99186 + }, + { + "epoch": 1.19, + "grad_norm": 21.733878015971662, + "learning_rate": 7.395355361428403e-06, + "loss": 0.8678, + "step": 99189 + }, + { + "epoch": 1.19, + "grad_norm": 26.84569962197166, + "learning_rate": 7.394791351654209e-06, + "loss": 1.1534, + "step": 99192 + }, + { + "epoch": 1.19, + "grad_norm": 10.005454162633947, + "learning_rate": 7.394227350770653e-06, + "loss": 1.3665, + "step": 99195 + }, + { + "epoch": 1.19, + "grad_norm": 7.487097844933699, + "learning_rate": 7.393663358779661e-06, + "loss": 1.6015, + "step": 99198 + }, + { + "epoch": 1.19, + "grad_norm": 6.273390477247978, + "learning_rate": 7.3930993756831596e-06, + "loss": 1.1109, + "step": 99201 + }, + { + "epoch": 1.19, + "grad_norm": 2.8286737122769035, + "learning_rate": 7.392535401483069e-06, + "loss": 1.5779, + "step": 99204 + }, + { + "epoch": 1.19, + "grad_norm": 8.35660386776482, + "learning_rate": 7.391971436181314e-06, + "loss": 1.046, + "step": 99207 + }, + { + "epoch": 1.19, + "grad_norm": 17.01747027812052, + "learning_rate": 7.391407479779824e-06, + "loss": 1.2651, + "step": 99210 + }, + { + "epoch": 1.19, + "grad_norm": 4.938740973280001, + "learning_rate": 7.390843532280518e-06, + "loss": 1.0573, + "step": 99213 + }, + { + "epoch": 1.19, + "grad_norm": 28.966561390027127, + "learning_rate": 7.390279593685328e-06, + "loss": 1.3122, + "step": 99216 + }, + { + "epoch": 1.19, + "grad_norm": 16.677428476068645, + "learning_rate": 7.389715663996169e-06, + "loss": 1.3052, + "step": 99219 + }, + { + "epoch": 1.19, + "grad_norm": 4.0086877095416735, + "learning_rate": 7.389151743214973e-06, + "loss": 1.2259, + "step": 99222 + }, + { + "epoch": 1.19, + "grad_norm": 4.252085144364798, + "learning_rate": 7.3885878313436585e-06, + "loss": 1.2742, + "step": 99225 + }, + { + "epoch": 1.19, + "grad_norm": 8.617800678304995, + "learning_rate": 7.388023928384158e-06, + "loss": 1.0694, + "step": 99228 + }, + { + "epoch": 1.19, + "grad_norm": 10.74814146558831, + "learning_rate": 7.387460034338388e-06, + "loss": 1.3139, + "step": 99231 + }, + { + "epoch": 1.19, + "grad_norm": 13.767064181002299, + "learning_rate": 7.386896149208276e-06, + "loss": 1.3128, + "step": 99234 + }, + { + "epoch": 1.19, + "grad_norm": 6.658206249869504, + "learning_rate": 7.386332272995744e-06, + "loss": 1.4949, + "step": 99237 + }, + { + "epoch": 1.19, + "grad_norm": 8.220482878083786, + "learning_rate": 7.38576840570272e-06, + "loss": 1.2713, + "step": 99240 + }, + { + "epoch": 1.19, + "grad_norm": 10.03657662460241, + "learning_rate": 7.3852045473311294e-06, + "loss": 1.2945, + "step": 99243 + }, + { + "epoch": 1.19, + "grad_norm": 5.290976395529662, + "learning_rate": 7.384640697882892e-06, + "loss": 1.3261, + "step": 99246 + }, + { + "epoch": 1.19, + "grad_norm": 68.91483353943201, + "learning_rate": 7.3840768573599335e-06, + "loss": 1.0114, + "step": 99249 + }, + { + "epoch": 1.19, + "grad_norm": 7.675587275823236, + "learning_rate": 7.383513025764179e-06, + "loss": 1.2243, + "step": 99252 + }, + { + "epoch": 1.19, + "grad_norm": 9.633445835321114, + "learning_rate": 7.382949203097554e-06, + "loss": 1.4537, + "step": 99255 + }, + { + "epoch": 1.19, + "grad_norm": 2.627935550506675, + "learning_rate": 7.382385389361978e-06, + "loss": 1.2969, + "step": 99258 + }, + { + "epoch": 1.19, + "grad_norm": 9.064599052995856, + "learning_rate": 7.381821584559377e-06, + "loss": 1.0144, + "step": 99261 + }, + { + "epoch": 1.19, + "grad_norm": 9.90140207918693, + "learning_rate": 7.381257788691678e-06, + "loss": 1.628, + "step": 99264 + }, + { + "epoch": 1.19, + "grad_norm": 20.17479101674989, + "learning_rate": 7.3806940017608e-06, + "loss": 1.323, + "step": 99267 + }, + { + "epoch": 1.19, + "grad_norm": 15.046675511123809, + "learning_rate": 7.3801302237686755e-06, + "loss": 1.2464, + "step": 99270 + }, + { + "epoch": 1.19, + "grad_norm": 5.035062561974826, + "learning_rate": 7.379566454717218e-06, + "loss": 1.1319, + "step": 99273 + }, + { + "epoch": 1.19, + "grad_norm": 11.320523106692594, + "learning_rate": 7.379002694608357e-06, + "loss": 1.1747, + "step": 99276 + }, + { + "epoch": 1.19, + "grad_norm": 7.510624255951006, + "learning_rate": 7.378438943444015e-06, + "loss": 1.2591, + "step": 99279 + }, + { + "epoch": 1.19, + "grad_norm": 3.28875010694998, + "learning_rate": 7.377875201226122e-06, + "loss": 1.0447, + "step": 99282 + }, + { + "epoch": 1.19, + "grad_norm": 8.129453463591558, + "learning_rate": 7.377311467956591e-06, + "loss": 0.8493, + "step": 99285 + }, + { + "epoch": 1.19, + "grad_norm": 7.992383979825447, + "learning_rate": 7.3767477436373534e-06, + "loss": 1.3438, + "step": 99288 + }, + { + "epoch": 1.19, + "grad_norm": 7.023469592036835, + "learning_rate": 7.376184028270328e-06, + "loss": 0.9306, + "step": 99291 + }, + { + "epoch": 1.19, + "grad_norm": 2.751834555852429, + "learning_rate": 7.375620321857444e-06, + "loss": 1.1145, + "step": 99294 + }, + { + "epoch": 1.19, + "grad_norm": 13.537272183962344, + "learning_rate": 7.375056624400623e-06, + "loss": 0.9278, + "step": 99297 + }, + { + "epoch": 1.19, + "grad_norm": 5.548380040102186, + "learning_rate": 7.374492935901789e-06, + "loss": 1.0242, + "step": 99300 + }, + { + "epoch": 1.19, + "grad_norm": 8.233644143444595, + "learning_rate": 7.373929256362861e-06, + "loss": 0.9736, + "step": 99303 + }, + { + "epoch": 1.19, + "grad_norm": 11.02868411586611, + "learning_rate": 7.37336558578577e-06, + "loss": 1.114, + "step": 99306 + }, + { + "epoch": 1.19, + "grad_norm": 10.009818938760137, + "learning_rate": 7.372801924172437e-06, + "loss": 1.4198, + "step": 99309 + }, + { + "epoch": 1.19, + "grad_norm": 11.202747256921016, + "learning_rate": 7.372238271524782e-06, + "loss": 1.2664, + "step": 99312 + }, + { + "epoch": 1.19, + "grad_norm": 6.704689036089447, + "learning_rate": 7.37167462784473e-06, + "loss": 1.1846, + "step": 99315 + }, + { + "epoch": 1.19, + "grad_norm": 4.01493685408144, + "learning_rate": 7.37111099313421e-06, + "loss": 0.9893, + "step": 99318 + }, + { + "epoch": 1.19, + "grad_norm": 16.53719769524406, + "learning_rate": 7.370547367395135e-06, + "loss": 1.2671, + "step": 99321 + }, + { + "epoch": 1.19, + "grad_norm": 3.092852381178289, + "learning_rate": 7.369983750629443e-06, + "loss": 1.3874, + "step": 99324 + }, + { + "epoch": 1.19, + "grad_norm": 14.207941034845174, + "learning_rate": 7.3694201428390434e-06, + "loss": 1.3791, + "step": 99327 + }, + { + "epoch": 1.19, + "grad_norm": 10.676981355125559, + "learning_rate": 7.368856544025867e-06, + "loss": 1.1922, + "step": 99330 + }, + { + "epoch": 1.19, + "grad_norm": 10.253883055827119, + "learning_rate": 7.368292954191833e-06, + "loss": 1.2511, + "step": 99333 + }, + { + "epoch": 1.19, + "grad_norm": 8.852236320748611, + "learning_rate": 7.367729373338874e-06, + "loss": 1.1289, + "step": 99336 + }, + { + "epoch": 1.19, + "grad_norm": 13.881614650926334, + "learning_rate": 7.367165801468901e-06, + "loss": 1.4112, + "step": 99339 + }, + { + "epoch": 1.19, + "grad_norm": 4.518007000063903, + "learning_rate": 7.3666022385838435e-06, + "loss": 1.1349, + "step": 99342 + }, + { + "epoch": 1.19, + "grad_norm": 13.54629267778213, + "learning_rate": 7.366038684685623e-06, + "loss": 1.2661, + "step": 99345 + }, + { + "epoch": 1.19, + "grad_norm": 9.63527252520437, + "learning_rate": 7.365475139776166e-06, + "loss": 1.2459, + "step": 99348 + }, + { + "epoch": 1.19, + "grad_norm": 9.642901267271823, + "learning_rate": 7.364911603857394e-06, + "loss": 1.307, + "step": 99351 + }, + { + "epoch": 1.19, + "grad_norm": 10.10787831442285, + "learning_rate": 7.364348076931227e-06, + "loss": 1.1934, + "step": 99354 + }, + { + "epoch": 1.19, + "grad_norm": 6.790391926949847, + "learning_rate": 7.363784558999589e-06, + "loss": 0.9758, + "step": 99357 + }, + { + "epoch": 1.19, + "grad_norm": 15.299975364602542, + "learning_rate": 7.363221050064408e-06, + "loss": 1.3485, + "step": 99360 + }, + { + "epoch": 1.19, + "grad_norm": 16.992749716498796, + "learning_rate": 7.362657550127607e-06, + "loss": 1.1178, + "step": 99363 + }, + { + "epoch": 1.19, + "grad_norm": 9.951867590034714, + "learning_rate": 7.362094059191101e-06, + "loss": 1.163, + "step": 99366 + }, + { + "epoch": 1.19, + "grad_norm": 18.927894861826452, + "learning_rate": 7.361530577256817e-06, + "loss": 1.2911, + "step": 99369 + }, + { + "epoch": 1.19, + "grad_norm": 13.158392213108138, + "learning_rate": 7.3609671043266816e-06, + "loss": 1.2731, + "step": 99372 + }, + { + "epoch": 1.19, + "grad_norm": 7.695310342004523, + "learning_rate": 7.360403640402611e-06, + "loss": 1.4295, + "step": 99375 + }, + { + "epoch": 1.19, + "grad_norm": 8.418691553642017, + "learning_rate": 7.359840185486537e-06, + "loss": 1.0476, + "step": 99378 + }, + { + "epoch": 1.2, + "grad_norm": 12.446944393106937, + "learning_rate": 7.359276739580375e-06, + "loss": 0.7825, + "step": 99381 + }, + { + "epoch": 1.2, + "grad_norm": 9.780187100883003, + "learning_rate": 7.3587133026860515e-06, + "loss": 1.4179, + "step": 99384 + }, + { + "epoch": 1.2, + "grad_norm": 3.726680128452392, + "learning_rate": 7.358149874805486e-06, + "loss": 1.2658, + "step": 99387 + }, + { + "epoch": 1.2, + "grad_norm": 9.629032435763007, + "learning_rate": 7.3575864559406065e-06, + "loss": 1.1491, + "step": 99390 + }, + { + "epoch": 1.2, + "grad_norm": 12.63539766331321, + "learning_rate": 7.357023046093332e-06, + "loss": 1.1886, + "step": 99393 + }, + { + "epoch": 1.2, + "grad_norm": 21.467911099177062, + "learning_rate": 7.356459645265584e-06, + "loss": 1.3864, + "step": 99396 + }, + { + "epoch": 1.2, + "grad_norm": 7.927595884840027, + "learning_rate": 7.355896253459285e-06, + "loss": 1.2286, + "step": 99399 + }, + { + "epoch": 1.2, + "grad_norm": 18.44593698497087, + "learning_rate": 7.355332870676364e-06, + "loss": 1.2605, + "step": 99402 + }, + { + "epoch": 1.2, + "grad_norm": 24.80136376971534, + "learning_rate": 7.354769496918739e-06, + "loss": 0.8801, + "step": 99405 + }, + { + "epoch": 1.2, + "grad_norm": 11.498849564634051, + "learning_rate": 7.354206132188331e-06, + "loss": 0.9492, + "step": 99408 + }, + { + "epoch": 1.2, + "grad_norm": 8.720499346017219, + "learning_rate": 7.353642776487062e-06, + "loss": 1.2917, + "step": 99411 + }, + { + "epoch": 1.2, + "grad_norm": 17.609209736622883, + "learning_rate": 7.353079429816861e-06, + "loss": 1.002, + "step": 99414 + }, + { + "epoch": 1.2, + "grad_norm": 66.15100234103251, + "learning_rate": 7.352516092179649e-06, + "loss": 1.3091, + "step": 99417 + }, + { + "epoch": 1.2, + "grad_norm": 8.670356372755787, + "learning_rate": 7.351952763577341e-06, + "loss": 1.1365, + "step": 99420 + }, + { + "epoch": 1.2, + "grad_norm": 13.210200419588922, + "learning_rate": 7.351389444011864e-06, + "loss": 1.6019, + "step": 99423 + }, + { + "epoch": 1.2, + "grad_norm": 33.71202539506104, + "learning_rate": 7.350826133485143e-06, + "loss": 1.3295, + "step": 99426 + }, + { + "epoch": 1.2, + "grad_norm": 29.34441948689391, + "learning_rate": 7.350262831999097e-06, + "loss": 1.4617, + "step": 99429 + }, + { + "epoch": 1.2, + "grad_norm": 7.2770695503136915, + "learning_rate": 7.349699539555652e-06, + "loss": 1.7742, + "step": 99432 + }, + { + "epoch": 1.2, + "grad_norm": 7.023740634259068, + "learning_rate": 7.349136256156728e-06, + "loss": 1.0954, + "step": 99435 + }, + { + "epoch": 1.2, + "grad_norm": 104.97699958861507, + "learning_rate": 7.348572981804243e-06, + "loss": 1.2546, + "step": 99438 + }, + { + "epoch": 1.2, + "grad_norm": 2.7660035903597753, + "learning_rate": 7.348009716500124e-06, + "loss": 1.2331, + "step": 99441 + }, + { + "epoch": 1.2, + "grad_norm": 6.225832891947064, + "learning_rate": 7.3474464602462966e-06, + "loss": 1.1694, + "step": 99444 + }, + { + "epoch": 1.2, + "grad_norm": 15.919050955322366, + "learning_rate": 7.3468832130446775e-06, + "loss": 0.9409, + "step": 99447 + }, + { + "epoch": 1.2, + "grad_norm": 9.626340466005871, + "learning_rate": 7.346319974897189e-06, + "loss": 1.4156, + "step": 99450 + }, + { + "epoch": 1.2, + "grad_norm": 3.8194279103493396, + "learning_rate": 7.345756745805753e-06, + "loss": 1.3408, + "step": 99453 + }, + { + "epoch": 1.2, + "grad_norm": 4.986828679319281, + "learning_rate": 7.345193525772296e-06, + "loss": 1.0732, + "step": 99456 + }, + { + "epoch": 1.2, + "grad_norm": 13.498553284263656, + "learning_rate": 7.34463031479874e-06, + "loss": 1.4909, + "step": 99459 + }, + { + "epoch": 1.2, + "grad_norm": 27.662614735448326, + "learning_rate": 7.344067112886999e-06, + "loss": 1.2767, + "step": 99462 + }, + { + "epoch": 1.2, + "grad_norm": 7.188713681304645, + "learning_rate": 7.343503920038999e-06, + "loss": 1.5427, + "step": 99465 + }, + { + "epoch": 1.2, + "grad_norm": 13.099385680021703, + "learning_rate": 7.342940736256667e-06, + "loss": 0.985, + "step": 99468 + }, + { + "epoch": 1.2, + "grad_norm": 8.901913562851473, + "learning_rate": 7.3423775615419245e-06, + "loss": 1.0221, + "step": 99471 + }, + { + "epoch": 1.2, + "grad_norm": 15.908078621746036, + "learning_rate": 7.341814395896685e-06, + "loss": 1.1607, + "step": 99474 + }, + { + "epoch": 1.2, + "grad_norm": 8.11640614111056, + "learning_rate": 7.341251239322874e-06, + "loss": 1.0829, + "step": 99477 + }, + { + "epoch": 1.2, + "grad_norm": 9.571127585811503, + "learning_rate": 7.340688091822419e-06, + "loss": 1.1134, + "step": 99480 + }, + { + "epoch": 1.2, + "grad_norm": 3.1819466929806084, + "learning_rate": 7.340124953397234e-06, + "loss": 1.2955, + "step": 99483 + }, + { + "epoch": 1.2, + "grad_norm": 4.978288784005012, + "learning_rate": 7.339561824049249e-06, + "loss": 1.2191, + "step": 99486 + }, + { + "epoch": 1.2, + "grad_norm": 5.672958682094151, + "learning_rate": 7.33899870378038e-06, + "loss": 1.2428, + "step": 99489 + }, + { + "epoch": 1.2, + "grad_norm": 19.455764657064023, + "learning_rate": 7.338435592592546e-06, + "loss": 1.3304, + "step": 99492 + }, + { + "epoch": 1.2, + "grad_norm": 14.571252680867948, + "learning_rate": 7.337872490487674e-06, + "loss": 1.3462, + "step": 99495 + }, + { + "epoch": 1.2, + "grad_norm": 18.114024526994122, + "learning_rate": 7.337309397467689e-06, + "loss": 1.4495, + "step": 99498 + }, + { + "epoch": 1.2, + "grad_norm": 21.02372686946249, + "learning_rate": 7.336746313534505e-06, + "loss": 1.4022, + "step": 99501 + }, + { + "epoch": 1.2, + "grad_norm": 19.892967457909783, + "learning_rate": 7.336183238690044e-06, + "loss": 1.6594, + "step": 99504 + }, + { + "epoch": 1.2, + "grad_norm": 12.37924220195899, + "learning_rate": 7.335620172936231e-06, + "loss": 1.2687, + "step": 99507 + }, + { + "epoch": 1.2, + "grad_norm": 5.646164784660148, + "learning_rate": 7.3350571162749886e-06, + "loss": 1.3871, + "step": 99510 + }, + { + "epoch": 1.2, + "grad_norm": 5.41366903605801, + "learning_rate": 7.334494068708238e-06, + "loss": 0.9059, + "step": 99513 + }, + { + "epoch": 1.2, + "grad_norm": 8.518874492811356, + "learning_rate": 7.3339310302378955e-06, + "loss": 1.0768, + "step": 99516 + }, + { + "epoch": 1.2, + "grad_norm": 4.687732460076332, + "learning_rate": 7.333368000865886e-06, + "loss": 1.0877, + "step": 99519 + }, + { + "epoch": 1.2, + "grad_norm": 9.158834500856386, + "learning_rate": 7.332804980594133e-06, + "loss": 1.3917, + "step": 99522 + }, + { + "epoch": 1.2, + "grad_norm": 13.253694827724864, + "learning_rate": 7.3322419694245574e-06, + "loss": 1.6092, + "step": 99525 + }, + { + "epoch": 1.2, + "grad_norm": 13.387296469665127, + "learning_rate": 7.331678967359075e-06, + "loss": 1.2209, + "step": 99528 + }, + { + "epoch": 1.2, + "grad_norm": 7.436029374418091, + "learning_rate": 7.331115974399614e-06, + "loss": 1.2598, + "step": 99531 + }, + { + "epoch": 1.2, + "grad_norm": 12.688422513332686, + "learning_rate": 7.33055299054809e-06, + "loss": 1.4922, + "step": 99534 + }, + { + "epoch": 1.2, + "grad_norm": 4.45565621655596, + "learning_rate": 7.329990015806427e-06, + "loss": 1.0427, + "step": 99537 + }, + { + "epoch": 1.2, + "grad_norm": 6.130738631136424, + "learning_rate": 7.3294270501765515e-06, + "loss": 1.358, + "step": 99540 + }, + { + "epoch": 1.2, + "grad_norm": 48.1784605186399, + "learning_rate": 7.328864093660377e-06, + "loss": 1.2187, + "step": 99543 + }, + { + "epoch": 1.2, + "grad_norm": 33.192708168544534, + "learning_rate": 7.328301146259824e-06, + "loss": 1.203, + "step": 99546 + }, + { + "epoch": 1.2, + "grad_norm": 10.043307878580165, + "learning_rate": 7.327738207976817e-06, + "loss": 1.0808, + "step": 99549 + }, + { + "epoch": 1.2, + "grad_norm": 7.482775327178816, + "learning_rate": 7.327175278813283e-06, + "loss": 1.3153, + "step": 99552 + }, + { + "epoch": 1.2, + "grad_norm": 6.912990175826878, + "learning_rate": 7.326612358771135e-06, + "loss": 1.2268, + "step": 99555 + }, + { + "epoch": 1.2, + "grad_norm": 9.924134734720226, + "learning_rate": 7.326049447852292e-06, + "loss": 1.0442, + "step": 99558 + }, + { + "epoch": 1.2, + "grad_norm": 5.456249404523082, + "learning_rate": 7.32548654605868e-06, + "loss": 1.2377, + "step": 99561 + }, + { + "epoch": 1.2, + "grad_norm": 9.411228595896217, + "learning_rate": 7.32492365339222e-06, + "loss": 1.2049, + "step": 99564 + }, + { + "epoch": 1.2, + "grad_norm": 11.727678834514991, + "learning_rate": 7.324360769854836e-06, + "loss": 1.2913, + "step": 99567 + }, + { + "epoch": 1.2, + "grad_norm": 7.330085938952844, + "learning_rate": 7.32379789544844e-06, + "loss": 1.2537, + "step": 99570 + }, + { + "epoch": 1.2, + "grad_norm": 21.39158483636092, + "learning_rate": 7.32323503017496e-06, + "loss": 1.2512, + "step": 99573 + }, + { + "epoch": 1.2, + "grad_norm": 7.419233860958353, + "learning_rate": 7.322672174036312e-06, + "loss": 0.9348, + "step": 99576 + }, + { + "epoch": 1.2, + "grad_norm": 12.269528543136834, + "learning_rate": 7.322109327034424e-06, + "loss": 1.3308, + "step": 99579 + }, + { + "epoch": 1.2, + "grad_norm": 4.479598269195477, + "learning_rate": 7.321546489171209e-06, + "loss": 1.4221, + "step": 99582 + }, + { + "epoch": 1.2, + "grad_norm": 3.598460736261043, + "learning_rate": 7.3209836604485915e-06, + "loss": 1.2585, + "step": 99585 + }, + { + "epoch": 1.2, + "grad_norm": 35.150381961784895, + "learning_rate": 7.3204208408684904e-06, + "loss": 1.2566, + "step": 99588 + }, + { + "epoch": 1.2, + "grad_norm": 10.791263553758206, + "learning_rate": 7.319858030432828e-06, + "loss": 1.2652, + "step": 99591 + }, + { + "epoch": 1.2, + "grad_norm": 20.079277108873107, + "learning_rate": 7.31929522914353e-06, + "loss": 1.191, + "step": 99594 + }, + { + "epoch": 1.2, + "grad_norm": 12.396239799226516, + "learning_rate": 7.318732437002508e-06, + "loss": 1.1578, + "step": 99597 + }, + { + "epoch": 1.2, + "grad_norm": 40.292079803526995, + "learning_rate": 7.318169654011684e-06, + "loss": 0.7986, + "step": 99600 + }, + { + "epoch": 1.2, + "grad_norm": 23.705784190556535, + "learning_rate": 7.317606880172982e-06, + "loss": 1.3342, + "step": 99603 + }, + { + "epoch": 1.2, + "grad_norm": 5.211664742327666, + "learning_rate": 7.317044115488327e-06, + "loss": 1.1859, + "step": 99606 + }, + { + "epoch": 1.2, + "grad_norm": 9.732566778761194, + "learning_rate": 7.316481359959631e-06, + "loss": 1.2026, + "step": 99609 + }, + { + "epoch": 1.2, + "grad_norm": 8.378637401103015, + "learning_rate": 7.3159186135888135e-06, + "loss": 1.146, + "step": 99612 + }, + { + "epoch": 1.2, + "grad_norm": 7.297258748777649, + "learning_rate": 7.315355876377803e-06, + "loss": 1.2293, + "step": 99615 + }, + { + "epoch": 1.2, + "grad_norm": 23.962606716954003, + "learning_rate": 7.314793148328514e-06, + "loss": 1.4237, + "step": 99618 + }, + { + "epoch": 1.2, + "grad_norm": 6.33733191310364, + "learning_rate": 7.314230429442872e-06, + "loss": 1.1737, + "step": 99621 + }, + { + "epoch": 1.2, + "grad_norm": 11.265575240409609, + "learning_rate": 7.3136677197227904e-06, + "loss": 1.6529, + "step": 99624 + }, + { + "epoch": 1.2, + "grad_norm": 13.234815564373434, + "learning_rate": 7.313105019170194e-06, + "loss": 1.2329, + "step": 99627 + }, + { + "epoch": 1.2, + "grad_norm": 6.128850509341856, + "learning_rate": 7.312542327787003e-06, + "loss": 1.0325, + "step": 99630 + }, + { + "epoch": 1.2, + "grad_norm": 3.5201002895578846, + "learning_rate": 7.311979645575139e-06, + "loss": 1.126, + "step": 99633 + }, + { + "epoch": 1.2, + "grad_norm": 9.109474474944768, + "learning_rate": 7.311416972536516e-06, + "loss": 1.3881, + "step": 99636 + }, + { + "epoch": 1.2, + "grad_norm": 8.934525248560044, + "learning_rate": 7.310854308673061e-06, + "loss": 1.3721, + "step": 99639 + }, + { + "epoch": 1.2, + "grad_norm": 8.072900152432782, + "learning_rate": 7.310291653986691e-06, + "loss": 1.614, + "step": 99642 + }, + { + "epoch": 1.2, + "grad_norm": 7.427431986173235, + "learning_rate": 7.309729008479325e-06, + "loss": 1.2629, + "step": 99645 + }, + { + "epoch": 1.2, + "grad_norm": 9.159012334570946, + "learning_rate": 7.309166372152889e-06, + "loss": 1.4384, + "step": 99648 + }, + { + "epoch": 1.2, + "grad_norm": 8.756664598858597, + "learning_rate": 7.308603745009298e-06, + "loss": 1.2412, + "step": 99651 + }, + { + "epoch": 1.2, + "grad_norm": 6.44562505540904, + "learning_rate": 7.30804112705047e-06, + "loss": 1.2152, + "step": 99654 + }, + { + "epoch": 1.2, + "grad_norm": 10.466796596041931, + "learning_rate": 7.307478518278328e-06, + "loss": 1.021, + "step": 99657 + }, + { + "epoch": 1.2, + "grad_norm": 26.901308014844435, + "learning_rate": 7.306915918694797e-06, + "loss": 1.0873, + "step": 99660 + }, + { + "epoch": 1.2, + "grad_norm": 13.007373589718396, + "learning_rate": 7.3063533283017895e-06, + "loss": 1.1204, + "step": 99663 + }, + { + "epoch": 1.2, + "grad_norm": 4.4926164409188125, + "learning_rate": 7.3057907471012255e-06, + "loss": 1.267, + "step": 99666 + }, + { + "epoch": 1.2, + "grad_norm": 2.7558692600744394, + "learning_rate": 7.30522817509503e-06, + "loss": 1.1269, + "step": 99669 + }, + { + "epoch": 1.2, + "grad_norm": 12.276187646538308, + "learning_rate": 7.304665612285118e-06, + "loss": 1.1082, + "step": 99672 + }, + { + "epoch": 1.2, + "grad_norm": 7.157966378934237, + "learning_rate": 7.304103058673414e-06, + "loss": 1.3544, + "step": 99675 + }, + { + "epoch": 1.2, + "grad_norm": 14.010856622427712, + "learning_rate": 7.303540514261832e-06, + "loss": 1.6757, + "step": 99678 + }, + { + "epoch": 1.2, + "grad_norm": 7.799413184043556, + "learning_rate": 7.302977979052297e-06, + "loss": 1.2095, + "step": 99681 + }, + { + "epoch": 1.2, + "grad_norm": 15.665546258192258, + "learning_rate": 7.302415453046725e-06, + "loss": 0.6844, + "step": 99684 + }, + { + "epoch": 1.2, + "grad_norm": 17.21892461208639, + "learning_rate": 7.301852936247041e-06, + "loss": 1.2828, + "step": 99687 + }, + { + "epoch": 1.2, + "grad_norm": 9.904173228376829, + "learning_rate": 7.3012904286551566e-06, + "loss": 1.2234, + "step": 99690 + }, + { + "epoch": 1.2, + "grad_norm": 6.89741933403923, + "learning_rate": 7.300727930272999e-06, + "loss": 1.3459, + "step": 99693 + }, + { + "epoch": 1.2, + "grad_norm": 5.432703338508576, + "learning_rate": 7.300165441102483e-06, + "loss": 1.2459, + "step": 99696 + }, + { + "epoch": 1.2, + "grad_norm": 2.743751585615949, + "learning_rate": 7.299602961145529e-06, + "loss": 1.2257, + "step": 99699 + }, + { + "epoch": 1.2, + "grad_norm": 8.677866735703104, + "learning_rate": 7.299040490404061e-06, + "loss": 1.1486, + "step": 99702 + }, + { + "epoch": 1.2, + "grad_norm": 7.0835201170533635, + "learning_rate": 7.2984780288799935e-06, + "loss": 1.3023, + "step": 99705 + }, + { + "epoch": 1.2, + "grad_norm": 2.3835196928433, + "learning_rate": 7.297915576575245e-06, + "loss": 1.1624, + "step": 99708 + }, + { + "epoch": 1.2, + "grad_norm": 3.17801220067864, + "learning_rate": 7.29735313349174e-06, + "loss": 1.1485, + "step": 99711 + }, + { + "epoch": 1.2, + "grad_norm": 17.956417423056728, + "learning_rate": 7.296790699631396e-06, + "loss": 1.2189, + "step": 99714 + }, + { + "epoch": 1.2, + "grad_norm": 6.866236808812696, + "learning_rate": 7.296228274996129e-06, + "loss": 1.1518, + "step": 99717 + }, + { + "epoch": 1.2, + "grad_norm": 15.713192618370945, + "learning_rate": 7.2956658595878606e-06, + "loss": 0.9093, + "step": 99720 + }, + { + "epoch": 1.2, + "grad_norm": 4.269553905896492, + "learning_rate": 7.295103453408512e-06, + "loss": 1.1254, + "step": 99723 + }, + { + "epoch": 1.2, + "grad_norm": 8.944498052855803, + "learning_rate": 7.294541056460001e-06, + "loss": 1.0731, + "step": 99726 + }, + { + "epoch": 1.2, + "grad_norm": 7.747612923528532, + "learning_rate": 7.293978668744248e-06, + "loss": 1.1874, + "step": 99729 + }, + { + "epoch": 1.2, + "grad_norm": 18.48662267071465, + "learning_rate": 7.293416290263169e-06, + "loss": 1.3765, + "step": 99732 + }, + { + "epoch": 1.2, + "grad_norm": 4.338339864533051, + "learning_rate": 7.2928539210186865e-06, + "loss": 0.8524, + "step": 99735 + }, + { + "epoch": 1.2, + "grad_norm": 4.882665041824571, + "learning_rate": 7.292291561012716e-06, + "loss": 1.103, + "step": 99738 + }, + { + "epoch": 1.2, + "grad_norm": 2.455713313248423, + "learning_rate": 7.291729210247184e-06, + "loss": 1.2422, + "step": 99741 + }, + { + "epoch": 1.2, + "grad_norm": 8.48888966704603, + "learning_rate": 7.291166868724001e-06, + "loss": 1.608, + "step": 99744 + }, + { + "epoch": 1.2, + "grad_norm": 12.728787395183163, + "learning_rate": 7.290604536445092e-06, + "loss": 1.4853, + "step": 99747 + }, + { + "epoch": 1.2, + "grad_norm": 4.316267644431836, + "learning_rate": 7.290042213412371e-06, + "loss": 1.4067, + "step": 99750 + }, + { + "epoch": 1.2, + "grad_norm": 10.107021288547315, + "learning_rate": 7.289479899627762e-06, + "loss": 1.2414, + "step": 99753 + }, + { + "epoch": 1.2, + "grad_norm": 4.814271704769895, + "learning_rate": 7.2889175950931814e-06, + "loss": 1.4545, + "step": 99756 + }, + { + "epoch": 1.2, + "grad_norm": 7.324602923739828, + "learning_rate": 7.288355299810549e-06, + "loss": 1.2092, + "step": 99759 + }, + { + "epoch": 1.2, + "grad_norm": 3.8727248447713674, + "learning_rate": 7.287793013781781e-06, + "loss": 1.4712, + "step": 99762 + }, + { + "epoch": 1.2, + "grad_norm": 3.3170682501373396, + "learning_rate": 7.2872307370088e-06, + "loss": 1.1257, + "step": 99765 + }, + { + "epoch": 1.2, + "grad_norm": 13.034471929697622, + "learning_rate": 7.286668469493526e-06, + "loss": 1.4267, + "step": 99768 + }, + { + "epoch": 1.2, + "grad_norm": 10.615675102307193, + "learning_rate": 7.286106211237871e-06, + "loss": 1.4066, + "step": 99771 + }, + { + "epoch": 1.2, + "grad_norm": 8.388023652227963, + "learning_rate": 7.285543962243757e-06, + "loss": 1.2792, + "step": 99774 + }, + { + "epoch": 1.2, + "grad_norm": 5.445827717163656, + "learning_rate": 7.284981722513108e-06, + "loss": 1.272, + "step": 99777 + }, + { + "epoch": 1.2, + "grad_norm": 13.002183798767959, + "learning_rate": 7.284419492047835e-06, + "loss": 1.0812, + "step": 99780 + }, + { + "epoch": 1.2, + "grad_norm": 10.332982924696672, + "learning_rate": 7.283857270849865e-06, + "loss": 1.5357, + "step": 99783 + }, + { + "epoch": 1.2, + "grad_norm": 38.55053372329583, + "learning_rate": 7.283295058921107e-06, + "loss": 1.4118, + "step": 99786 + }, + { + "epoch": 1.2, + "grad_norm": 14.794938055724396, + "learning_rate": 7.282732856263486e-06, + "loss": 1.8709, + "step": 99789 + }, + { + "epoch": 1.2, + "grad_norm": 11.2899695849268, + "learning_rate": 7.282170662878918e-06, + "loss": 1.2619, + "step": 99792 + }, + { + "epoch": 1.2, + "grad_norm": 2.4670083070933004, + "learning_rate": 7.281608478769326e-06, + "loss": 1.0234, + "step": 99795 + }, + { + "epoch": 1.2, + "grad_norm": 6.826517598274125, + "learning_rate": 7.28104630393662e-06, + "loss": 1.3707, + "step": 99798 + }, + { + "epoch": 1.2, + "grad_norm": 16.935270433131535, + "learning_rate": 7.2804841383827275e-06, + "loss": 0.8972, + "step": 99801 + }, + { + "epoch": 1.2, + "grad_norm": 8.103399792775141, + "learning_rate": 7.27992198210956e-06, + "loss": 1.3889, + "step": 99804 + }, + { + "epoch": 1.2, + "grad_norm": 7.661373477466106, + "learning_rate": 7.279359835119042e-06, + "loss": 1.0076, + "step": 99807 + }, + { + "epoch": 1.2, + "grad_norm": 20.34427830671243, + "learning_rate": 7.278797697413089e-06, + "loss": 1.4552, + "step": 99810 + }, + { + "epoch": 1.2, + "grad_norm": 19.629555341844828, + "learning_rate": 7.2782355689936166e-06, + "loss": 1.2383, + "step": 99813 + }, + { + "epoch": 1.2, + "grad_norm": 64.69005580035369, + "learning_rate": 7.277673449862547e-06, + "loss": 1.3378, + "step": 99816 + }, + { + "epoch": 1.2, + "grad_norm": 14.960305870129476, + "learning_rate": 7.277111340021798e-06, + "loss": 1.1134, + "step": 99819 + }, + { + "epoch": 1.2, + "grad_norm": 10.949871162988796, + "learning_rate": 7.276549239473289e-06, + "loss": 1.1102, + "step": 99822 + }, + { + "epoch": 1.2, + "grad_norm": 20.478290877024712, + "learning_rate": 7.2759871482189325e-06, + "loss": 1.1505, + "step": 99825 + }, + { + "epoch": 1.2, + "grad_norm": 8.919965835630741, + "learning_rate": 7.275425066260651e-06, + "loss": 1.4754, + "step": 99828 + }, + { + "epoch": 1.2, + "grad_norm": 8.373585401266006, + "learning_rate": 7.274862993600365e-06, + "loss": 1.3526, + "step": 99831 + }, + { + "epoch": 1.2, + "grad_norm": 5.144906894900409, + "learning_rate": 7.274300930239987e-06, + "loss": 1.5739, + "step": 99834 + }, + { + "epoch": 1.2, + "grad_norm": 19.315967791412074, + "learning_rate": 7.273738876181442e-06, + "loss": 1.2693, + "step": 99837 + }, + { + "epoch": 1.2, + "grad_norm": 6.449919441797521, + "learning_rate": 7.273176831426641e-06, + "loss": 1.1563, + "step": 99840 + }, + { + "epoch": 1.2, + "grad_norm": 18.831668335712656, + "learning_rate": 7.272614795977507e-06, + "loss": 1.268, + "step": 99843 + }, + { + "epoch": 1.2, + "grad_norm": 13.574706078945717, + "learning_rate": 7.272052769835954e-06, + "loss": 1.1614, + "step": 99846 + }, + { + "epoch": 1.2, + "grad_norm": 14.653642304303949, + "learning_rate": 7.271490753003907e-06, + "loss": 1.1464, + "step": 99849 + }, + { + "epoch": 1.2, + "grad_norm": 5.954274236055424, + "learning_rate": 7.270928745483277e-06, + "loss": 1.4399, + "step": 99852 + }, + { + "epoch": 1.2, + "grad_norm": 6.471555768400485, + "learning_rate": 7.270366747275982e-06, + "loss": 1.1874, + "step": 99855 + }, + { + "epoch": 1.2, + "grad_norm": 5.820548541352418, + "learning_rate": 7.269804758383943e-06, + "loss": 1.1448, + "step": 99858 + }, + { + "epoch": 1.2, + "grad_norm": 5.727609549006326, + "learning_rate": 7.269242778809077e-06, + "loss": 1.1359, + "step": 99861 + }, + { + "epoch": 1.2, + "grad_norm": 4.566481584385315, + "learning_rate": 7.268680808553306e-06, + "loss": 1.0612, + "step": 99864 + }, + { + "epoch": 1.2, + "grad_norm": 3.2221048373213317, + "learning_rate": 7.2681188476185375e-06, + "loss": 1.2863, + "step": 99867 + }, + { + "epoch": 1.2, + "grad_norm": 41.19454790232551, + "learning_rate": 7.267556896006696e-06, + "loss": 1.1656, + "step": 99870 + }, + { + "epoch": 1.2, + "grad_norm": 10.911392390805243, + "learning_rate": 7.266994953719702e-06, + "loss": 0.9455, + "step": 99873 + }, + { + "epoch": 1.2, + "grad_norm": 10.76077837505379, + "learning_rate": 7.266433020759469e-06, + "loss": 1.4251, + "step": 99876 + }, + { + "epoch": 1.2, + "grad_norm": 7.63299142135978, + "learning_rate": 7.265871097127914e-06, + "loss": 1.1265, + "step": 99879 + }, + { + "epoch": 1.2, + "grad_norm": 6.139203479008657, + "learning_rate": 7.265309182826954e-06, + "loss": 1.2931, + "step": 99882 + }, + { + "epoch": 1.2, + "grad_norm": 11.197714268135673, + "learning_rate": 7.264747277858512e-06, + "loss": 1.209, + "step": 99885 + }, + { + "epoch": 1.2, + "grad_norm": 5.989475778027595, + "learning_rate": 7.264185382224501e-06, + "loss": 1.285, + "step": 99888 + }, + { + "epoch": 1.2, + "grad_norm": 6.883357955100043, + "learning_rate": 7.263623495926843e-06, + "loss": 1.383, + "step": 99891 + }, + { + "epoch": 1.2, + "grad_norm": 4.361404517098497, + "learning_rate": 7.263061618967448e-06, + "loss": 0.9878, + "step": 99894 + }, + { + "epoch": 1.2, + "grad_norm": 6.766186833454597, + "learning_rate": 7.262499751348241e-06, + "loss": 1.1804, + "step": 99897 + }, + { + "epoch": 1.2, + "grad_norm": 6.81722063810998, + "learning_rate": 7.261937893071134e-06, + "loss": 1.37, + "step": 99900 + }, + { + "epoch": 1.2, + "grad_norm": 10.823673623921408, + "learning_rate": 7.261376044138051e-06, + "loss": 1.1002, + "step": 99903 + }, + { + "epoch": 1.2, + "grad_norm": 8.859882401731193, + "learning_rate": 7.260814204550901e-06, + "loss": 1.3085, + "step": 99906 + }, + { + "epoch": 1.2, + "grad_norm": 9.31881643441527, + "learning_rate": 7.260252374311605e-06, + "loss": 1.0763, + "step": 99909 + }, + { + "epoch": 1.2, + "grad_norm": 14.25593229783884, + "learning_rate": 7.259690553422081e-06, + "loss": 1.2174, + "step": 99912 + }, + { + "epoch": 1.2, + "grad_norm": 2.9092702793028904, + "learning_rate": 7.259128741884248e-06, + "loss": 1.6978, + "step": 99915 + }, + { + "epoch": 1.2, + "grad_norm": 16.36253310114934, + "learning_rate": 7.2585669397000235e-06, + "loss": 1.1191, + "step": 99918 + }, + { + "epoch": 1.2, + "grad_norm": 12.19739856087672, + "learning_rate": 7.258005146871319e-06, + "loss": 1.0372, + "step": 99921 + }, + { + "epoch": 1.2, + "grad_norm": 9.999606439081717, + "learning_rate": 7.257443363400054e-06, + "loss": 1.1794, + "step": 99924 + }, + { + "epoch": 1.2, + "grad_norm": 7.368330220960729, + "learning_rate": 7.2568815892881495e-06, + "loss": 0.9071, + "step": 99927 + }, + { + "epoch": 1.2, + "grad_norm": 7.114344288343082, + "learning_rate": 7.256319824537522e-06, + "loss": 1.117, + "step": 99930 + }, + { + "epoch": 1.2, + "grad_norm": 16.26339016760461, + "learning_rate": 7.255758069150083e-06, + "loss": 1.2758, + "step": 99933 + }, + { + "epoch": 1.2, + "grad_norm": 7.692922087130826, + "learning_rate": 7.255196323127752e-06, + "loss": 1.1854, + "step": 99936 + }, + { + "epoch": 1.2, + "grad_norm": 1.9680112311015727, + "learning_rate": 7.254634586472451e-06, + "loss": 1.2469, + "step": 99939 + }, + { + "epoch": 1.2, + "grad_norm": 6.604763421525624, + "learning_rate": 7.25407285918609e-06, + "loss": 1.2625, + "step": 99942 + }, + { + "epoch": 1.2, + "grad_norm": 5.519939847263719, + "learning_rate": 7.253511141270594e-06, + "loss": 1.3901, + "step": 99945 + }, + { + "epoch": 1.2, + "grad_norm": 110.70087619068151, + "learning_rate": 7.252949432727873e-06, + "loss": 1.1323, + "step": 99948 + }, + { + "epoch": 1.2, + "grad_norm": 15.833900687929615, + "learning_rate": 7.252387733559844e-06, + "loss": 1.221, + "step": 99951 + }, + { + "epoch": 1.2, + "grad_norm": 35.1482308322501, + "learning_rate": 7.251826043768426e-06, + "loss": 1.133, + "step": 99954 + }, + { + "epoch": 1.2, + "grad_norm": 15.459602703912546, + "learning_rate": 7.25126436335554e-06, + "loss": 1.211, + "step": 99957 + }, + { + "epoch": 1.2, + "grad_norm": 13.27639574031707, + "learning_rate": 7.250702692323097e-06, + "loss": 1.0672, + "step": 99960 + }, + { + "epoch": 1.2, + "grad_norm": 16.959780401556447, + "learning_rate": 7.250141030673013e-06, + "loss": 1.1475, + "step": 99963 + }, + { + "epoch": 1.2, + "grad_norm": 3.5402600716174937, + "learning_rate": 7.249579378407206e-06, + "loss": 1.3158, + "step": 99966 + }, + { + "epoch": 1.2, + "grad_norm": 12.722726689805853, + "learning_rate": 7.249017735527597e-06, + "loss": 1.2785, + "step": 99969 + }, + { + "epoch": 1.2, + "grad_norm": 10.773854378710391, + "learning_rate": 7.248456102036103e-06, + "loss": 1.0761, + "step": 99972 + }, + { + "epoch": 1.2, + "grad_norm": 8.485421828065334, + "learning_rate": 7.24789447793463e-06, + "loss": 1.0891, + "step": 99975 + }, + { + "epoch": 1.2, + "grad_norm": 4.457648555002537, + "learning_rate": 7.247332863225105e-06, + "loss": 0.8657, + "step": 99978 + }, + { + "epoch": 1.2, + "grad_norm": 23.134007984429076, + "learning_rate": 7.2467712579094415e-06, + "loss": 1.4321, + "step": 99981 + }, + { + "epoch": 1.2, + "grad_norm": 12.519559665287343, + "learning_rate": 7.2462096619895585e-06, + "loss": 1.5613, + "step": 99984 + }, + { + "epoch": 1.2, + "grad_norm": 4.577538159424078, + "learning_rate": 7.245648075467366e-06, + "loss": 1.0499, + "step": 99987 + }, + { + "epoch": 1.2, + "grad_norm": 6.222704016562103, + "learning_rate": 7.245086498344787e-06, + "loss": 0.8608, + "step": 99990 + }, + { + "epoch": 1.2, + "grad_norm": 26.547304066634435, + "learning_rate": 7.244524930623733e-06, + "loss": 1.0285, + "step": 99993 + }, + { + "epoch": 1.2, + "grad_norm": 7.723938829696334, + "learning_rate": 7.243963372306122e-06, + "loss": 1.2015, + "step": 99996 + }, + { + "epoch": 1.2, + "grad_norm": 13.88819651338016, + "learning_rate": 7.243401823393877e-06, + "loss": 1.2203, + "step": 99999 + }, + { + "epoch": 1.2, + "grad_norm": 10.023800217535939, + "learning_rate": 7.242840283888905e-06, + "loss": 1.2334, + "step": 100002 + }, + { + "epoch": 1.2, + "grad_norm": 3.494618058903597, + "learning_rate": 7.242278753793126e-06, + "loss": 1.1047, + "step": 100005 + }, + { + "epoch": 1.2, + "grad_norm": 12.896737435640974, + "learning_rate": 7.241717233108454e-06, + "loss": 1.2275, + "step": 100008 + }, + { + "epoch": 1.2, + "grad_norm": 7.852049289941328, + "learning_rate": 7.2411557218368135e-06, + "loss": 1.379, + "step": 100011 + }, + { + "epoch": 1.2, + "grad_norm": 3.5368299875171267, + "learning_rate": 7.240594219980111e-06, + "loss": 1.2165, + "step": 100014 + }, + { + "epoch": 1.2, + "grad_norm": 19.747280274865332, + "learning_rate": 7.240032727540266e-06, + "loss": 1.2952, + "step": 100017 + }, + { + "epoch": 1.2, + "grad_norm": 17.71309634074192, + "learning_rate": 7.239471244519194e-06, + "loss": 1.2967, + "step": 100020 + }, + { + "epoch": 1.2, + "grad_norm": 4.394543544275164, + "learning_rate": 7.238909770918816e-06, + "loss": 1.2503, + "step": 100023 + }, + { + "epoch": 1.2, + "grad_norm": 4.292370561857112, + "learning_rate": 7.238348306741046e-06, + "loss": 1.2503, + "step": 100026 + }, + { + "epoch": 1.2, + "grad_norm": 12.925227773865602, + "learning_rate": 7.237786851987794e-06, + "loss": 1.2712, + "step": 100029 + }, + { + "epoch": 1.2, + "grad_norm": 6.496547527575599, + "learning_rate": 7.237225406660981e-06, + "loss": 1.1897, + "step": 100032 + }, + { + "epoch": 1.2, + "grad_norm": 8.626412904924953, + "learning_rate": 7.236663970762522e-06, + "loss": 0.8615, + "step": 100035 + }, + { + "epoch": 1.2, + "grad_norm": 3.476547516575093, + "learning_rate": 7.236102544294338e-06, + "loss": 1.1123, + "step": 100038 + }, + { + "epoch": 1.2, + "grad_norm": 9.27895698684674, + "learning_rate": 7.235541127258335e-06, + "loss": 0.9741, + "step": 100041 + }, + { + "epoch": 1.2, + "grad_norm": 13.868063477793356, + "learning_rate": 7.234979719656438e-06, + "loss": 1.3715, + "step": 100044 + }, + { + "epoch": 1.2, + "grad_norm": 17.833516585114765, + "learning_rate": 7.234418321490556e-06, + "loss": 1.0767, + "step": 100047 + }, + { + "epoch": 1.2, + "grad_norm": 11.250720657129317, + "learning_rate": 7.233856932762609e-06, + "loss": 1.3235, + "step": 100050 + }, + { + "epoch": 1.2, + "grad_norm": 9.463603758877003, + "learning_rate": 7.233295553474516e-06, + "loss": 1.1098, + "step": 100053 + }, + { + "epoch": 1.2, + "grad_norm": 13.536858999220039, + "learning_rate": 7.232734183628185e-06, + "loss": 1.0385, + "step": 100056 + }, + { + "epoch": 1.2, + "grad_norm": 12.737863293956439, + "learning_rate": 7.232172823225536e-06, + "loss": 0.9266, + "step": 100059 + }, + { + "epoch": 1.2, + "grad_norm": 6.996869579438184, + "learning_rate": 7.231611472268483e-06, + "loss": 1.2487, + "step": 100062 + }, + { + "epoch": 1.2, + "grad_norm": 7.074645899203167, + "learning_rate": 7.231050130758947e-06, + "loss": 1.0996, + "step": 100065 + }, + { + "epoch": 1.2, + "grad_norm": 5.049668974623689, + "learning_rate": 7.230488798698836e-06, + "loss": 1.1154, + "step": 100068 + }, + { + "epoch": 1.2, + "grad_norm": 7.570334524222563, + "learning_rate": 7.2299274760900686e-06, + "loss": 1.2616, + "step": 100071 + }, + { + "epoch": 1.2, + "grad_norm": 40.021920285595144, + "learning_rate": 7.2293661629345616e-06, + "loss": 0.8503, + "step": 100074 + }, + { + "epoch": 1.2, + "grad_norm": 18.38975817890804, + "learning_rate": 7.228804859234231e-06, + "loss": 1.6196, + "step": 100077 + }, + { + "epoch": 1.2, + "grad_norm": 20.280068249609542, + "learning_rate": 7.2282435649909936e-06, + "loss": 1.1927, + "step": 100080 + }, + { + "epoch": 1.2, + "grad_norm": 7.020198995991492, + "learning_rate": 7.227682280206758e-06, + "loss": 1.2245, + "step": 100083 + }, + { + "epoch": 1.2, + "grad_norm": 9.265129988420654, + "learning_rate": 7.227121004883446e-06, + "loss": 0.9854, + "step": 100086 + }, + { + "epoch": 1.2, + "grad_norm": 16.901201396557717, + "learning_rate": 7.22655973902297e-06, + "loss": 1.093, + "step": 100089 + }, + { + "epoch": 1.2, + "grad_norm": 4.642560042937971, + "learning_rate": 7.22599848262725e-06, + "loss": 1.4529, + "step": 100092 + }, + { + "epoch": 1.2, + "grad_norm": 19.093271530102857, + "learning_rate": 7.225437235698195e-06, + "loss": 1.4574, + "step": 100095 + }, + { + "epoch": 1.2, + "grad_norm": 13.839084096093735, + "learning_rate": 7.2248759982377245e-06, + "loss": 1.1013, + "step": 100098 + }, + { + "epoch": 1.2, + "grad_norm": 11.112913312148711, + "learning_rate": 7.224314770247751e-06, + "loss": 1.0219, + "step": 100101 + }, + { + "epoch": 1.2, + "grad_norm": 45.62984655708869, + "learning_rate": 7.223753551730191e-06, + "loss": 1.2437, + "step": 100104 + }, + { + "epoch": 1.2, + "grad_norm": 9.331366483148544, + "learning_rate": 7.223192342686964e-06, + "loss": 1.2367, + "step": 100107 + }, + { + "epoch": 1.2, + "grad_norm": 5.429618022731663, + "learning_rate": 7.222631143119981e-06, + "loss": 1.0848, + "step": 100110 + }, + { + "epoch": 1.2, + "grad_norm": 10.636807432298173, + "learning_rate": 7.2220699530311535e-06, + "loss": 1.1338, + "step": 100113 + }, + { + "epoch": 1.2, + "grad_norm": 32.910169239602, + "learning_rate": 7.221508772422401e-06, + "loss": 0.8807, + "step": 100116 + }, + { + "epoch": 1.2, + "grad_norm": 11.323820609842068, + "learning_rate": 7.220947601295642e-06, + "loss": 1.104, + "step": 100119 + }, + { + "epoch": 1.2, + "grad_norm": 5.120132807126399, + "learning_rate": 7.220386439652787e-06, + "loss": 1.1619, + "step": 100122 + }, + { + "epoch": 1.2, + "grad_norm": 7.174985336614597, + "learning_rate": 7.2198252874957496e-06, + "loss": 1.0689, + "step": 100125 + }, + { + "epoch": 1.2, + "grad_norm": 4.912817722303113, + "learning_rate": 7.2192641448264475e-06, + "loss": 1.4174, + "step": 100128 + }, + { + "epoch": 1.2, + "grad_norm": 11.52461449497518, + "learning_rate": 7.218703011646795e-06, + "loss": 1.2371, + "step": 100131 + }, + { + "epoch": 1.2, + "grad_norm": 8.300844691570525, + "learning_rate": 7.218141887958712e-06, + "loss": 0.9379, + "step": 100134 + }, + { + "epoch": 1.2, + "grad_norm": 19.28771772441598, + "learning_rate": 7.217580773764103e-06, + "loss": 1.3979, + "step": 100137 + }, + { + "epoch": 1.2, + "grad_norm": 16.856493834106253, + "learning_rate": 7.217019669064891e-06, + "loss": 1.0747, + "step": 100140 + }, + { + "epoch": 1.2, + "grad_norm": 15.350848923226492, + "learning_rate": 7.216458573862986e-06, + "loss": 1.2457, + "step": 100143 + }, + { + "epoch": 1.2, + "grad_norm": 7.237212277105599, + "learning_rate": 7.21589748816031e-06, + "loss": 1.3796, + "step": 100146 + }, + { + "epoch": 1.2, + "grad_norm": 9.691039654470554, + "learning_rate": 7.215336411958769e-06, + "loss": 1.0681, + "step": 100149 + }, + { + "epoch": 1.2, + "grad_norm": 4.5381039111441455, + "learning_rate": 7.214775345260283e-06, + "loss": 1.3103, + "step": 100152 + }, + { + "epoch": 1.2, + "grad_norm": 50.10841077478795, + "learning_rate": 7.214214288066765e-06, + "loss": 1.2689, + "step": 100155 + }, + { + "epoch": 1.2, + "grad_norm": 4.892255691710489, + "learning_rate": 7.21365324038013e-06, + "loss": 1.1205, + "step": 100158 + }, + { + "epoch": 1.2, + "grad_norm": 3.049382744159641, + "learning_rate": 7.213092202202295e-06, + "loss": 1.1801, + "step": 100161 + }, + { + "epoch": 1.2, + "grad_norm": 13.564029666276552, + "learning_rate": 7.212531173535172e-06, + "loss": 1.1413, + "step": 100164 + }, + { + "epoch": 1.2, + "grad_norm": 7.754812193419272, + "learning_rate": 7.211970154380674e-06, + "loss": 1.2852, + "step": 100167 + }, + { + "epoch": 1.2, + "grad_norm": 2.3228303731788595, + "learning_rate": 7.2114091447407195e-06, + "loss": 0.9177, + "step": 100170 + }, + { + "epoch": 1.2, + "grad_norm": 3.6627400209018193, + "learning_rate": 7.210848144617221e-06, + "loss": 1.1181, + "step": 100173 + }, + { + "epoch": 1.2, + "grad_norm": 9.677737637248937, + "learning_rate": 7.210287154012092e-06, + "loss": 1.0422, + "step": 100176 + }, + { + "epoch": 1.2, + "grad_norm": 5.786309695115055, + "learning_rate": 7.209726172927248e-06, + "loss": 1.169, + "step": 100179 + }, + { + "epoch": 1.2, + "grad_norm": 4.029559035858057, + "learning_rate": 7.209165201364605e-06, + "loss": 1.0901, + "step": 100182 + }, + { + "epoch": 1.2, + "grad_norm": 23.206198271771807, + "learning_rate": 7.2086042393260735e-06, + "loss": 1.3208, + "step": 100185 + }, + { + "epoch": 1.2, + "grad_norm": 8.774202452991398, + "learning_rate": 7.208043286813575e-06, + "loss": 1.0945, + "step": 100188 + }, + { + "epoch": 1.2, + "grad_norm": 19.251668943783244, + "learning_rate": 7.207482343829014e-06, + "loss": 1.2451, + "step": 100191 + }, + { + "epoch": 1.2, + "grad_norm": 14.914142995711785, + "learning_rate": 7.206921410374312e-06, + "loss": 1.3828, + "step": 100194 + }, + { + "epoch": 1.2, + "grad_norm": 5.288180612404789, + "learning_rate": 7.206360486451381e-06, + "loss": 1.045, + "step": 100197 + }, + { + "epoch": 1.2, + "grad_norm": 10.989323108062777, + "learning_rate": 7.205799572062138e-06, + "loss": 1.3028, + "step": 100200 + }, + { + "epoch": 1.2, + "grad_norm": 37.093240590518896, + "learning_rate": 7.205238667208489e-06, + "loss": 1.3807, + "step": 100203 + }, + { + "epoch": 1.2, + "grad_norm": 9.731838155134382, + "learning_rate": 7.204677771892359e-06, + "loss": 1.5614, + "step": 100206 + }, + { + "epoch": 1.2, + "grad_norm": 5.198894631385159, + "learning_rate": 7.204116886115653e-06, + "loss": 0.9215, + "step": 100209 + }, + { + "epoch": 1.21, + "grad_norm": 43.6951372437036, + "learning_rate": 7.203556009880291e-06, + "loss": 1.4019, + "step": 100212 + }, + { + "epoch": 1.21, + "grad_norm": 5.332315459643836, + "learning_rate": 7.202995143188185e-06, + "loss": 1.0766, + "step": 100215 + }, + { + "epoch": 1.21, + "grad_norm": 4.81571105003941, + "learning_rate": 7.202434286041249e-06, + "loss": 1.1902, + "step": 100218 + }, + { + "epoch": 1.21, + "grad_norm": 4.752994863723298, + "learning_rate": 7.201873438441396e-06, + "loss": 1.5261, + "step": 100221 + }, + { + "epoch": 1.21, + "grad_norm": 14.202447206776426, + "learning_rate": 7.201312600390542e-06, + "loss": 1.2182, + "step": 100224 + }, + { + "epoch": 1.21, + "grad_norm": 20.368661963963497, + "learning_rate": 7.200751771890604e-06, + "loss": 1.0446, + "step": 100227 + }, + { + "epoch": 1.21, + "grad_norm": 15.013337586768557, + "learning_rate": 7.200190952943486e-06, + "loss": 1.1571, + "step": 100230 + }, + { + "epoch": 1.21, + "grad_norm": 12.941754890463717, + "learning_rate": 7.199630143551109e-06, + "loss": 1.421, + "step": 100233 + }, + { + "epoch": 1.21, + "grad_norm": 6.113648279898289, + "learning_rate": 7.199069343715386e-06, + "loss": 1.3659, + "step": 100236 + }, + { + "epoch": 1.21, + "grad_norm": 9.63711191415724, + "learning_rate": 7.198508553438229e-06, + "loss": 0.9524, + "step": 100239 + }, + { + "epoch": 1.21, + "grad_norm": 10.414261695941251, + "learning_rate": 7.197947772721559e-06, + "loss": 1.2943, + "step": 100242 + }, + { + "epoch": 1.21, + "grad_norm": 12.88908034387387, + "learning_rate": 7.197387001567278e-06, + "loss": 0.9334, + "step": 100245 + }, + { + "epoch": 1.21, + "grad_norm": 7.946455143905621, + "learning_rate": 7.196826239977308e-06, + "loss": 1.1369, + "step": 100248 + }, + { + "epoch": 1.21, + "grad_norm": 9.841476458160589, + "learning_rate": 7.196265487953557e-06, + "loss": 1.1754, + "step": 100251 + }, + { + "epoch": 1.21, + "grad_norm": 10.20131716272573, + "learning_rate": 7.195704745497949e-06, + "loss": 1.0632, + "step": 100254 + }, + { + "epoch": 1.21, + "grad_norm": 2.8265061706974057, + "learning_rate": 7.195144012612385e-06, + "loss": 1.2215, + "step": 100257 + }, + { + "epoch": 1.21, + "grad_norm": 13.74477108355974, + "learning_rate": 7.194583289298786e-06, + "loss": 1.3894, + "step": 100260 + }, + { + "epoch": 1.21, + "grad_norm": 8.098292774639402, + "learning_rate": 7.194022575559062e-06, + "loss": 1.2689, + "step": 100263 + }, + { + "epoch": 1.21, + "grad_norm": 15.59275374392516, + "learning_rate": 7.193461871395131e-06, + "loss": 1.3303, + "step": 100266 + }, + { + "epoch": 1.21, + "grad_norm": 19.101117598113618, + "learning_rate": 7.192901176808904e-06, + "loss": 1.1364, + "step": 100269 + }, + { + "epoch": 1.21, + "grad_norm": 3.154224538415319, + "learning_rate": 7.192340491802292e-06, + "loss": 1.43, + "step": 100272 + }, + { + "epoch": 1.21, + "grad_norm": 17.577007456645028, + "learning_rate": 7.191779816377209e-06, + "loss": 1.2135, + "step": 100275 + }, + { + "epoch": 1.21, + "grad_norm": 7.044995092067752, + "learning_rate": 7.191219150535573e-06, + "loss": 1.2604, + "step": 100278 + }, + { + "epoch": 1.21, + "grad_norm": 16.197189144958593, + "learning_rate": 7.1906584942792964e-06, + "loss": 1.2714, + "step": 100281 + }, + { + "epoch": 1.21, + "grad_norm": 3.4848057382081548, + "learning_rate": 7.190097847610287e-06, + "loss": 0.9889, + "step": 100284 + }, + { + "epoch": 1.21, + "grad_norm": 10.774211132216843, + "learning_rate": 7.18953721053046e-06, + "loss": 1.193, + "step": 100287 + }, + { + "epoch": 1.21, + "grad_norm": 3.1352383060661118, + "learning_rate": 7.188976583041732e-06, + "loss": 1.2576, + "step": 100290 + }, + { + "epoch": 1.21, + "grad_norm": 10.510203488934529, + "learning_rate": 7.188415965146014e-06, + "loss": 1.36, + "step": 100293 + }, + { + "epoch": 1.21, + "grad_norm": 12.804154506065391, + "learning_rate": 7.1878553568452225e-06, + "loss": 1.0689, + "step": 100296 + }, + { + "epoch": 1.21, + "grad_norm": 15.787614816842812, + "learning_rate": 7.1872947581412635e-06, + "loss": 1.2053, + "step": 100299 + }, + { + "epoch": 1.21, + "grad_norm": 3.8325015144940116, + "learning_rate": 7.186734169036058e-06, + "loss": 1.5847, + "step": 100302 + }, + { + "epoch": 1.21, + "grad_norm": 12.806743105890515, + "learning_rate": 7.186173589531511e-06, + "loss": 1.356, + "step": 100305 + }, + { + "epoch": 1.21, + "grad_norm": 12.297516816353605, + "learning_rate": 7.185613019629547e-06, + "loss": 1.0832, + "step": 100308 + }, + { + "epoch": 1.21, + "grad_norm": 3.0924871925642843, + "learning_rate": 7.185052459332066e-06, + "loss": 1.2618, + "step": 100311 + }, + { + "epoch": 1.21, + "grad_norm": 7.243919181623426, + "learning_rate": 7.184491908640989e-06, + "loss": 1.104, + "step": 100314 + }, + { + "epoch": 1.21, + "grad_norm": 7.143185927979307, + "learning_rate": 7.183931367558226e-06, + "loss": 1.2248, + "step": 100317 + }, + { + "epoch": 1.21, + "grad_norm": 3.092952027938858, + "learning_rate": 7.183370836085692e-06, + "loss": 1.2545, + "step": 100320 + }, + { + "epoch": 1.21, + "grad_norm": 4.23986933495322, + "learning_rate": 7.1828103142253015e-06, + "loss": 1.0932, + "step": 100323 + }, + { + "epoch": 1.21, + "grad_norm": 8.900514179107825, + "learning_rate": 7.18224980197896e-06, + "loss": 1.4591, + "step": 100326 + }, + { + "epoch": 1.21, + "grad_norm": 7.305103810802193, + "learning_rate": 7.181689299348584e-06, + "loss": 1.3774, + "step": 100329 + }, + { + "epoch": 1.21, + "grad_norm": 23.21963224216031, + "learning_rate": 7.181128806336091e-06, + "loss": 0.9653, + "step": 100332 + }, + { + "epoch": 1.21, + "grad_norm": 14.831989106220648, + "learning_rate": 7.180568322943391e-06, + "loss": 1.2624, + "step": 100335 + }, + { + "epoch": 1.21, + "grad_norm": 5.453916397024872, + "learning_rate": 7.180007849172392e-06, + "loss": 1.171, + "step": 100338 + }, + { + "epoch": 1.21, + "grad_norm": 7.2099127802134095, + "learning_rate": 7.179447385025011e-06, + "loss": 1.2696, + "step": 100341 + }, + { + "epoch": 1.21, + "grad_norm": 45.00935773896699, + "learning_rate": 7.178886930503162e-06, + "loss": 1.1407, + "step": 100344 + }, + { + "epoch": 1.21, + "grad_norm": 15.403062865149371, + "learning_rate": 7.178326485608753e-06, + "loss": 1.2275, + "step": 100347 + }, + { + "epoch": 1.21, + "grad_norm": 5.428616072090912, + "learning_rate": 7.177766050343704e-06, + "loss": 0.9149, + "step": 100350 + }, + { + "epoch": 1.21, + "grad_norm": 13.800618440148948, + "learning_rate": 7.17720562470992e-06, + "loss": 0.96, + "step": 100353 + }, + { + "epoch": 1.21, + "grad_norm": 9.930156551714097, + "learning_rate": 7.176645208709316e-06, + "loss": 1.1944, + "step": 100356 + }, + { + "epoch": 1.21, + "grad_norm": 7.446949090891585, + "learning_rate": 7.176084802343806e-06, + "loss": 1.2827, + "step": 100359 + }, + { + "epoch": 1.21, + "grad_norm": 13.637181859520759, + "learning_rate": 7.175524405615303e-06, + "loss": 1.1349, + "step": 100362 + }, + { + "epoch": 1.21, + "grad_norm": 27.476082340907727, + "learning_rate": 7.1749640185257165e-06, + "loss": 1.1425, + "step": 100365 + }, + { + "epoch": 1.21, + "grad_norm": 12.327439941276012, + "learning_rate": 7.174403641076957e-06, + "loss": 0.8045, + "step": 100368 + }, + { + "epoch": 1.21, + "grad_norm": 8.070303052379968, + "learning_rate": 7.173843273270942e-06, + "loss": 1.9637, + "step": 100371 + }, + { + "epoch": 1.21, + "grad_norm": 7.3263682640958026, + "learning_rate": 7.173282915109583e-06, + "loss": 1.2327, + "step": 100374 + }, + { + "epoch": 1.21, + "grad_norm": 10.072939557541583, + "learning_rate": 7.172722566594794e-06, + "loss": 1.1907, + "step": 100377 + }, + { + "epoch": 1.21, + "grad_norm": 8.042814079796509, + "learning_rate": 7.172162227728479e-06, + "loss": 1.2659, + "step": 100380 + }, + { + "epoch": 1.21, + "grad_norm": 20.20665558583576, + "learning_rate": 7.1716018985125566e-06, + "loss": 1.3214, + "step": 100383 + }, + { + "epoch": 1.21, + "grad_norm": 11.518051948404334, + "learning_rate": 7.171041578948941e-06, + "loss": 1.4282, + "step": 100386 + }, + { + "epoch": 1.21, + "grad_norm": 6.715688178349343, + "learning_rate": 7.170481269039542e-06, + "loss": 0.8737, + "step": 100389 + }, + { + "epoch": 1.21, + "grad_norm": 28.939292969422638, + "learning_rate": 7.169920968786268e-06, + "loss": 1.2869, + "step": 100392 + }, + { + "epoch": 1.21, + "grad_norm": 8.743172951869138, + "learning_rate": 7.169360678191034e-06, + "loss": 1.1409, + "step": 100395 + }, + { + "epoch": 1.21, + "grad_norm": 28.70483529591111, + "learning_rate": 7.168800397255755e-06, + "loss": 1.3902, + "step": 100398 + }, + { + "epoch": 1.21, + "grad_norm": 5.936254814767914, + "learning_rate": 7.168240125982337e-06, + "loss": 0.9795, + "step": 100401 + }, + { + "epoch": 1.21, + "grad_norm": 5.739643096895569, + "learning_rate": 7.167679864372702e-06, + "loss": 0.8195, + "step": 100404 + }, + { + "epoch": 1.21, + "grad_norm": 17.779808911569674, + "learning_rate": 7.167119612428752e-06, + "loss": 1.6726, + "step": 100407 + }, + { + "epoch": 1.21, + "grad_norm": 11.442977145457094, + "learning_rate": 7.1665593701524e-06, + "loss": 1.0129, + "step": 100410 + }, + { + "epoch": 1.21, + "grad_norm": 26.315323266314643, + "learning_rate": 7.165999137545561e-06, + "loss": 1.14, + "step": 100413 + }, + { + "epoch": 1.21, + "grad_norm": 10.742382982727994, + "learning_rate": 7.1654389146101496e-06, + "loss": 0.9346, + "step": 100416 + }, + { + "epoch": 1.21, + "grad_norm": 18.047746969936117, + "learning_rate": 7.1648787013480724e-06, + "loss": 1.029, + "step": 100419 + }, + { + "epoch": 1.21, + "grad_norm": 8.40011944389833, + "learning_rate": 7.16431849776124e-06, + "loss": 1.6461, + "step": 100422 + }, + { + "epoch": 1.21, + "grad_norm": 4.083124728760746, + "learning_rate": 7.163758303851569e-06, + "loss": 1.2462, + "step": 100425 + }, + { + "epoch": 1.21, + "grad_norm": 10.865825589167535, + "learning_rate": 7.163198119620971e-06, + "loss": 1.1701, + "step": 100428 + }, + { + "epoch": 1.21, + "grad_norm": 152.27711953565927, + "learning_rate": 7.162637945071357e-06, + "loss": 1.1825, + "step": 100431 + }, + { + "epoch": 1.21, + "grad_norm": 23.37899851691048, + "learning_rate": 7.162077780204634e-06, + "loss": 1.052, + "step": 100434 + }, + { + "epoch": 1.21, + "grad_norm": 21.322741744326333, + "learning_rate": 7.161517625022718e-06, + "loss": 1.0851, + "step": 100437 + }, + { + "epoch": 1.21, + "grad_norm": 2.9502476371328004, + "learning_rate": 7.16095747952752e-06, + "loss": 1.2551, + "step": 100440 + }, + { + "epoch": 1.21, + "grad_norm": 10.208297607439505, + "learning_rate": 7.160397343720955e-06, + "loss": 1.1416, + "step": 100443 + }, + { + "epoch": 1.21, + "grad_norm": 3.8420993621976804, + "learning_rate": 7.159837217604928e-06, + "loss": 1.248, + "step": 100446 + }, + { + "epoch": 1.21, + "grad_norm": 67.19270869007295, + "learning_rate": 7.159277101181354e-06, + "loss": 0.7829, + "step": 100449 + }, + { + "epoch": 1.21, + "grad_norm": 2.7092821543916226, + "learning_rate": 7.158716994452142e-06, + "loss": 1.367, + "step": 100452 + }, + { + "epoch": 1.21, + "grad_norm": 12.062924551504134, + "learning_rate": 7.158156897419206e-06, + "loss": 0.9968, + "step": 100455 + }, + { + "epoch": 1.21, + "grad_norm": 4.539139828272919, + "learning_rate": 7.157596810084462e-06, + "loss": 1.2162, + "step": 100458 + }, + { + "epoch": 1.21, + "grad_norm": 4.560065078115152, + "learning_rate": 7.157036732449813e-06, + "loss": 1.1839, + "step": 100461 + }, + { + "epoch": 1.21, + "grad_norm": 7.43109601774303, + "learning_rate": 7.156476664517171e-06, + "loss": 0.8556, + "step": 100464 + }, + { + "epoch": 1.21, + "grad_norm": 121.07031633357984, + "learning_rate": 7.155916606288451e-06, + "loss": 0.8222, + "step": 100467 + }, + { + "epoch": 1.21, + "grad_norm": 8.368033853332113, + "learning_rate": 7.155356557765567e-06, + "loss": 1.31, + "step": 100470 + }, + { + "epoch": 1.21, + "grad_norm": 9.818996833986466, + "learning_rate": 7.154796518950424e-06, + "loss": 1.3765, + "step": 100473 + }, + { + "epoch": 1.21, + "grad_norm": 9.826581951449334, + "learning_rate": 7.154236489844934e-06, + "loss": 1.1618, + "step": 100476 + }, + { + "epoch": 1.21, + "grad_norm": 13.795288345806902, + "learning_rate": 7.15367647045101e-06, + "loss": 1.336, + "step": 100479 + }, + { + "epoch": 1.21, + "grad_norm": 34.92835464574361, + "learning_rate": 7.153116460770564e-06, + "loss": 0.8771, + "step": 100482 + }, + { + "epoch": 1.21, + "grad_norm": 8.090993476951285, + "learning_rate": 7.152556460805509e-06, + "loss": 1.1726, + "step": 100485 + }, + { + "epoch": 1.21, + "grad_norm": 19.10752807899822, + "learning_rate": 7.151996470557748e-06, + "loss": 0.7417, + "step": 100488 + }, + { + "epoch": 1.21, + "grad_norm": 9.420802277732793, + "learning_rate": 7.151436490029198e-06, + "loss": 1.3003, + "step": 100491 + }, + { + "epoch": 1.21, + "grad_norm": 8.875913162101797, + "learning_rate": 7.150876519221772e-06, + "loss": 1.1652, + "step": 100494 + }, + { + "epoch": 1.21, + "grad_norm": 16.02820197088645, + "learning_rate": 7.15031655813738e-06, + "loss": 1.0182, + "step": 100497 + }, + { + "epoch": 1.21, + "grad_norm": 46.61766141740607, + "learning_rate": 7.149756606777927e-06, + "loss": 1.1965, + "step": 100500 + }, + { + "epoch": 1.21, + "grad_norm": 19.981210632047066, + "learning_rate": 7.149196665145328e-06, + "loss": 1.3637, + "step": 100503 + }, + { + "epoch": 1.21, + "grad_norm": 5.722490161609169, + "learning_rate": 7.148636733241494e-06, + "loss": 1.162, + "step": 100506 + }, + { + "epoch": 1.21, + "grad_norm": 10.688047953386008, + "learning_rate": 7.148076811068336e-06, + "loss": 1.1155, + "step": 100509 + }, + { + "epoch": 1.21, + "grad_norm": 18.085363682275496, + "learning_rate": 7.147516898627767e-06, + "loss": 1.2954, + "step": 100512 + }, + { + "epoch": 1.21, + "grad_norm": 19.50270543802929, + "learning_rate": 7.146956995921694e-06, + "loss": 1.036, + "step": 100515 + }, + { + "epoch": 1.21, + "grad_norm": 7.874518315110318, + "learning_rate": 7.146397102952028e-06, + "loss": 1.0651, + "step": 100518 + }, + { + "epoch": 1.21, + "grad_norm": 11.06316750041052, + "learning_rate": 7.14583721972068e-06, + "loss": 0.9901, + "step": 100521 + }, + { + "epoch": 1.21, + "grad_norm": 20.927664381630414, + "learning_rate": 7.145277346229567e-06, + "loss": 1.3907, + "step": 100524 + }, + { + "epoch": 1.21, + "grad_norm": 4.198494242581524, + "learning_rate": 7.14471748248059e-06, + "loss": 0.9322, + "step": 100527 + }, + { + "epoch": 1.21, + "grad_norm": 12.541782393695314, + "learning_rate": 7.144157628475663e-06, + "loss": 0.9981, + "step": 100530 + }, + { + "epoch": 1.21, + "grad_norm": 9.479208942300971, + "learning_rate": 7.143597784216697e-06, + "loss": 0.9247, + "step": 100533 + }, + { + "epoch": 1.21, + "grad_norm": 3.997480871889, + "learning_rate": 7.143037949705604e-06, + "loss": 1.2969, + "step": 100536 + }, + { + "epoch": 1.21, + "grad_norm": 13.57689794313538, + "learning_rate": 7.1424781249442965e-06, + "loss": 0.8639, + "step": 100539 + }, + { + "epoch": 1.21, + "grad_norm": 13.562578573099414, + "learning_rate": 7.141918309934679e-06, + "loss": 1.1525, + "step": 100542 + }, + { + "epoch": 1.21, + "grad_norm": 15.468753414080913, + "learning_rate": 7.141358504678665e-06, + "loss": 1.2377, + "step": 100545 + }, + { + "epoch": 1.21, + "grad_norm": 2.669549175133409, + "learning_rate": 7.140798709178163e-06, + "loss": 1.4345, + "step": 100548 + }, + { + "epoch": 1.21, + "grad_norm": 6.089762579587427, + "learning_rate": 7.14023892343509e-06, + "loss": 1.0511, + "step": 100551 + }, + { + "epoch": 1.21, + "grad_norm": 9.919848122728137, + "learning_rate": 7.139679147451346e-06, + "loss": 1.4549, + "step": 100554 + }, + { + "epoch": 1.21, + "grad_norm": 12.926070383177604, + "learning_rate": 7.1391193812288495e-06, + "loss": 1.3814, + "step": 100557 + }, + { + "epoch": 1.21, + "grad_norm": 22.306513921420308, + "learning_rate": 7.138559624769508e-06, + "loss": 1.0815, + "step": 100560 + }, + { + "epoch": 1.21, + "grad_norm": 18.789497284309242, + "learning_rate": 7.137999878075229e-06, + "loss": 1.0389, + "step": 100563 + }, + { + "epoch": 1.21, + "grad_norm": 2.89873397609211, + "learning_rate": 7.137440141147931e-06, + "loss": 1.2299, + "step": 100566 + }, + { + "epoch": 1.21, + "grad_norm": 5.3031543442297, + "learning_rate": 7.136880413989515e-06, + "loss": 1.1972, + "step": 100569 + }, + { + "epoch": 1.21, + "grad_norm": 6.971962324790116, + "learning_rate": 7.136320696601895e-06, + "loss": 1.3732, + "step": 100572 + }, + { + "epoch": 1.21, + "grad_norm": 10.761036505429567, + "learning_rate": 7.13576098898698e-06, + "loss": 1.5287, + "step": 100575 + }, + { + "epoch": 1.21, + "grad_norm": 11.141268355857498, + "learning_rate": 7.135201291146685e-06, + "loss": 1.3211, + "step": 100578 + }, + { + "epoch": 1.21, + "grad_norm": 10.172146650722683, + "learning_rate": 7.134641603082915e-06, + "loss": 1.3169, + "step": 100581 + }, + { + "epoch": 1.21, + "grad_norm": 5.229901713301131, + "learning_rate": 7.134081924797578e-06, + "loss": 1.2472, + "step": 100584 + }, + { + "epoch": 1.21, + "grad_norm": 6.333717842231082, + "learning_rate": 7.133522256292589e-06, + "loss": 1.0089, + "step": 100587 + }, + { + "epoch": 1.21, + "grad_norm": 16.955006684044765, + "learning_rate": 7.132962597569855e-06, + "loss": 1.0616, + "step": 100590 + }, + { + "epoch": 1.21, + "grad_norm": 11.805767870153165, + "learning_rate": 7.132402948631291e-06, + "loss": 1.2464, + "step": 100593 + }, + { + "epoch": 1.21, + "grad_norm": 9.265877251309371, + "learning_rate": 7.131843309478798e-06, + "loss": 1.3191, + "step": 100596 + }, + { + "epoch": 1.21, + "grad_norm": 7.833541895947724, + "learning_rate": 7.131283680114293e-06, + "loss": 1.2937, + "step": 100599 + }, + { + "epoch": 1.21, + "grad_norm": 29.242098888181516, + "learning_rate": 7.130724060539681e-06, + "loss": 1.1391, + "step": 100602 + }, + { + "epoch": 1.21, + "grad_norm": 7.021928522667998, + "learning_rate": 7.130164450756879e-06, + "loss": 1.4254, + "step": 100605 + }, + { + "epoch": 1.21, + "grad_norm": 8.984815940218807, + "learning_rate": 7.129604850767787e-06, + "loss": 0.8142, + "step": 100608 + }, + { + "epoch": 1.21, + "grad_norm": 4.308763756386548, + "learning_rate": 7.129045260574323e-06, + "loss": 1.1045, + "step": 100611 + }, + { + "epoch": 1.21, + "grad_norm": 8.265505946449164, + "learning_rate": 7.128485680178391e-06, + "loss": 1.2875, + "step": 100614 + }, + { + "epoch": 1.21, + "grad_norm": 6.867743273064045, + "learning_rate": 7.127926109581903e-06, + "loss": 1.054, + "step": 100617 + }, + { + "epoch": 1.21, + "grad_norm": 12.235515078475622, + "learning_rate": 7.127366548786772e-06, + "loss": 1.4101, + "step": 100620 + }, + { + "epoch": 1.21, + "grad_norm": 14.730483795370763, + "learning_rate": 7.126806997794904e-06, + "loss": 0.9675, + "step": 100623 + }, + { + "epoch": 1.21, + "grad_norm": 16.4817831729627, + "learning_rate": 7.126247456608205e-06, + "loss": 1.0439, + "step": 100626 + }, + { + "epoch": 1.21, + "grad_norm": 12.52599093266223, + "learning_rate": 7.125687925228591e-06, + "loss": 1.4949, + "step": 100629 + }, + { + "epoch": 1.21, + "grad_norm": 5.553654238170918, + "learning_rate": 7.125128403657968e-06, + "loss": 0.9281, + "step": 100632 + }, + { + "epoch": 1.21, + "grad_norm": 3.885124103582497, + "learning_rate": 7.124568891898248e-06, + "loss": 1.2129, + "step": 100635 + }, + { + "epoch": 1.21, + "grad_norm": 27.447384518098396, + "learning_rate": 7.1240093899513365e-06, + "loss": 1.3385, + "step": 100638 + }, + { + "epoch": 1.21, + "grad_norm": 82.55894923438105, + "learning_rate": 7.1234498978191456e-06, + "loss": 1.1954, + "step": 100641 + }, + { + "epoch": 1.21, + "grad_norm": 6.425840482893921, + "learning_rate": 7.122890415503583e-06, + "loss": 0.9776, + "step": 100644 + }, + { + "epoch": 1.21, + "grad_norm": 5.736073359023903, + "learning_rate": 7.122330943006562e-06, + "loss": 1.0725, + "step": 100647 + }, + { + "epoch": 1.21, + "grad_norm": 30.286417830182373, + "learning_rate": 7.121771480329986e-06, + "loss": 1.1162, + "step": 100650 + }, + { + "epoch": 1.21, + "grad_norm": 21.78245869901247, + "learning_rate": 7.121212027475768e-06, + "loss": 1.0071, + "step": 100653 + }, + { + "epoch": 1.21, + "grad_norm": 8.383937546405033, + "learning_rate": 7.120652584445815e-06, + "loss": 1.6108, + "step": 100656 + }, + { + "epoch": 1.21, + "grad_norm": 9.324561659922054, + "learning_rate": 7.120093151242042e-06, + "loss": 1.3051, + "step": 100659 + }, + { + "epoch": 1.21, + "grad_norm": 3.4054592292544323, + "learning_rate": 7.11953372786635e-06, + "loss": 1.1294, + "step": 100662 + }, + { + "epoch": 1.21, + "grad_norm": 7.6141568117935075, + "learning_rate": 7.118974314320655e-06, + "loss": 1.1974, + "step": 100665 + }, + { + "epoch": 1.21, + "grad_norm": 8.55026015883358, + "learning_rate": 7.118414910606859e-06, + "loss": 1.4329, + "step": 100668 + }, + { + "epoch": 1.21, + "grad_norm": 10.21396946064595, + "learning_rate": 7.117855516726875e-06, + "loss": 1.3044, + "step": 100671 + }, + { + "epoch": 1.21, + "grad_norm": 4.65817419553912, + "learning_rate": 7.117296132682617e-06, + "loss": 1.2033, + "step": 100674 + }, + { + "epoch": 1.21, + "grad_norm": 6.9473882629685555, + "learning_rate": 7.1167367584759865e-06, + "loss": 1.0226, + "step": 100677 + }, + { + "epoch": 1.21, + "grad_norm": 6.121160477146031, + "learning_rate": 7.116177394108892e-06, + "loss": 1.2455, + "step": 100680 + }, + { + "epoch": 1.21, + "grad_norm": 8.948050556250532, + "learning_rate": 7.115618039583247e-06, + "loss": 1.4321, + "step": 100683 + }, + { + "epoch": 1.21, + "grad_norm": 18.737542032620272, + "learning_rate": 7.115058694900963e-06, + "loss": 0.9672, + "step": 100686 + }, + { + "epoch": 1.21, + "grad_norm": 5.568247740605919, + "learning_rate": 7.114499360063938e-06, + "loss": 0.9664, + "step": 100689 + }, + { + "epoch": 1.21, + "grad_norm": 7.631922681738445, + "learning_rate": 7.113940035074089e-06, + "loss": 0.9359, + "step": 100692 + }, + { + "epoch": 1.21, + "grad_norm": 25.272407702351224, + "learning_rate": 7.113380719933325e-06, + "loss": 1.0677, + "step": 100695 + }, + { + "epoch": 1.21, + "grad_norm": 22.20179295947839, + "learning_rate": 7.11282141464355e-06, + "loss": 1.0387, + "step": 100698 + }, + { + "epoch": 1.21, + "grad_norm": 7.946035724122921, + "learning_rate": 7.112262119206681e-06, + "loss": 1.0766, + "step": 100701 + }, + { + "epoch": 1.21, + "grad_norm": 65.57621723307011, + "learning_rate": 7.111702833624616e-06, + "loss": 1.4024, + "step": 100704 + }, + { + "epoch": 1.21, + "grad_norm": 16.91260457336681, + "learning_rate": 7.111143557899272e-06, + "loss": 1.1921, + "step": 100707 + }, + { + "epoch": 1.21, + "grad_norm": 8.947222132651035, + "learning_rate": 7.110584292032549e-06, + "loss": 1.2528, + "step": 100710 + }, + { + "epoch": 1.21, + "grad_norm": 9.130774143338623, + "learning_rate": 7.110025036026369e-06, + "loss": 1.3321, + "step": 100713 + }, + { + "epoch": 1.21, + "grad_norm": 1.8957172949253156, + "learning_rate": 7.109465789882628e-06, + "loss": 0.7344, + "step": 100716 + }, + { + "epoch": 1.21, + "grad_norm": 10.21905726171058, + "learning_rate": 7.10890655360324e-06, + "loss": 1.4522, + "step": 100719 + }, + { + "epoch": 1.21, + "grad_norm": 9.228565155768804, + "learning_rate": 7.10834732719011e-06, + "loss": 1.3792, + "step": 100722 + }, + { + "epoch": 1.21, + "grad_norm": 5.33274471871315, + "learning_rate": 7.107788110645153e-06, + "loss": 1.0959, + "step": 100725 + }, + { + "epoch": 1.21, + "grad_norm": 65.54738913708769, + "learning_rate": 7.107228903970271e-06, + "loss": 1.5043, + "step": 100728 + }, + { + "epoch": 1.21, + "grad_norm": 12.670943112790834, + "learning_rate": 7.1066697071673775e-06, + "loss": 1.2088, + "step": 100731 + }, + { + "epoch": 1.21, + "grad_norm": 8.29387981678274, + "learning_rate": 7.106110520238374e-06, + "loss": 0.9537, + "step": 100734 + }, + { + "epoch": 1.21, + "grad_norm": 17.92728876517942, + "learning_rate": 7.105551343185175e-06, + "loss": 1.5339, + "step": 100737 + }, + { + "epoch": 1.21, + "grad_norm": 15.27305684599867, + "learning_rate": 7.104992176009691e-06, + "loss": 1.3615, + "step": 100740 + }, + { + "epoch": 1.21, + "grad_norm": 3.2196563323019314, + "learning_rate": 7.10443301871382e-06, + "loss": 1.2603, + "step": 100743 + }, + { + "epoch": 1.21, + "grad_norm": 8.981869422216882, + "learning_rate": 7.103873871299476e-06, + "loss": 1.165, + "step": 100746 + }, + { + "epoch": 1.21, + "grad_norm": 6.0696148434091794, + "learning_rate": 7.103314733768569e-06, + "loss": 0.9662, + "step": 100749 + }, + { + "epoch": 1.21, + "grad_norm": 13.293770563359606, + "learning_rate": 7.102755606123004e-06, + "loss": 1.4608, + "step": 100752 + }, + { + "epoch": 1.21, + "grad_norm": 33.747568932539465, + "learning_rate": 7.1021964883646955e-06, + "loss": 1.3311, + "step": 100755 + }, + { + "epoch": 1.21, + "grad_norm": 5.192409488591224, + "learning_rate": 7.101637380495542e-06, + "loss": 1.3829, + "step": 100758 + }, + { + "epoch": 1.21, + "grad_norm": 11.98450943852443, + "learning_rate": 7.101078282517457e-06, + "loss": 1.67, + "step": 100761 + }, + { + "epoch": 1.21, + "grad_norm": 5.774073822874115, + "learning_rate": 7.100519194432347e-06, + "loss": 1.2226, + "step": 100764 + }, + { + "epoch": 1.21, + "grad_norm": 9.566271598685374, + "learning_rate": 7.099960116242124e-06, + "loss": 1.0283, + "step": 100767 + }, + { + "epoch": 1.21, + "grad_norm": 3.6536228555035266, + "learning_rate": 7.099401047948688e-06, + "loss": 1.3069, + "step": 100770 + }, + { + "epoch": 1.21, + "grad_norm": 37.70683047082725, + "learning_rate": 7.098841989553955e-06, + "loss": 1.2772, + "step": 100773 + }, + { + "epoch": 1.21, + "grad_norm": 3.461268319080594, + "learning_rate": 7.098282941059826e-06, + "loss": 1.1573, + "step": 100776 + }, + { + "epoch": 1.21, + "grad_norm": 6.7833342464296535, + "learning_rate": 7.097723902468214e-06, + "loss": 1.0684, + "step": 100779 + }, + { + "epoch": 1.21, + "grad_norm": 8.25708134058825, + "learning_rate": 7.097164873781028e-06, + "loss": 1.075, + "step": 100782 + }, + { + "epoch": 1.21, + "grad_norm": 6.541217436125839, + "learning_rate": 7.096605855000169e-06, + "loss": 1.3362, + "step": 100785 + }, + { + "epoch": 1.21, + "grad_norm": 6.561557932740957, + "learning_rate": 7.0960468461275464e-06, + "loss": 1.0812, + "step": 100788 + }, + { + "epoch": 1.21, + "grad_norm": 2.8744383419168034, + "learning_rate": 7.095487847165075e-06, + "loss": 1.2511, + "step": 100791 + }, + { + "epoch": 1.21, + "grad_norm": 6.537719835634235, + "learning_rate": 7.094928858114656e-06, + "loss": 1.0134, + "step": 100794 + }, + { + "epoch": 1.21, + "grad_norm": 7.717098958853254, + "learning_rate": 7.0943698789781956e-06, + "loss": 1.2277, + "step": 100797 + }, + { + "epoch": 1.21, + "grad_norm": 3.9536627427674245, + "learning_rate": 7.093810909757606e-06, + "loss": 1.3106, + "step": 100800 + }, + { + "epoch": 1.21, + "grad_norm": 3.5982133214362055, + "learning_rate": 7.093251950454792e-06, + "loss": 1.1558, + "step": 100803 + }, + { + "epoch": 1.21, + "grad_norm": 14.943705172414917, + "learning_rate": 7.092693001071662e-06, + "loss": 1.0639, + "step": 100806 + }, + { + "epoch": 1.21, + "grad_norm": 12.705061606049073, + "learning_rate": 7.092134061610127e-06, + "loss": 1.085, + "step": 100809 + }, + { + "epoch": 1.21, + "grad_norm": 5.472269222026581, + "learning_rate": 7.091575132072087e-06, + "loss": 1.1442, + "step": 100812 + }, + { + "epoch": 1.21, + "grad_norm": 12.557652373049756, + "learning_rate": 7.091016212459456e-06, + "loss": 1.0843, + "step": 100815 + }, + { + "epoch": 1.21, + "grad_norm": 10.306912649167716, + "learning_rate": 7.090457302774136e-06, + "loss": 1.1306, + "step": 100818 + }, + { + "epoch": 1.21, + "grad_norm": 6.3865803145782705, + "learning_rate": 7.0898984030180426e-06, + "loss": 1.0905, + "step": 100821 + }, + { + "epoch": 1.21, + "grad_norm": 4.553910310306823, + "learning_rate": 7.089339513193074e-06, + "loss": 1.2461, + "step": 100824 + }, + { + "epoch": 1.21, + "grad_norm": 39.379918640893706, + "learning_rate": 7.08878063330114e-06, + "loss": 1.2129, + "step": 100827 + }, + { + "epoch": 1.21, + "grad_norm": 18.22001268544341, + "learning_rate": 7.08822176334415e-06, + "loss": 1.3636, + "step": 100830 + }, + { + "epoch": 1.21, + "grad_norm": 12.460359014259863, + "learning_rate": 7.087662903324011e-06, + "loss": 1.2877, + "step": 100833 + }, + { + "epoch": 1.21, + "grad_norm": 43.037104734530374, + "learning_rate": 7.087104053242631e-06, + "loss": 0.9153, + "step": 100836 + }, + { + "epoch": 1.21, + "grad_norm": 5.881182955253433, + "learning_rate": 7.086545213101914e-06, + "loss": 1.0144, + "step": 100839 + }, + { + "epoch": 1.21, + "grad_norm": 8.556448320631349, + "learning_rate": 7.085986382903767e-06, + "loss": 1.1576, + "step": 100842 + }, + { + "epoch": 1.21, + "grad_norm": 7.506378658334156, + "learning_rate": 7.0854275626501e-06, + "loss": 1.3751, + "step": 100845 + }, + { + "epoch": 1.21, + "grad_norm": 4.590569977811575, + "learning_rate": 7.084868752342823e-06, + "loss": 1.2929, + "step": 100848 + }, + { + "epoch": 1.21, + "grad_norm": 12.657416551864898, + "learning_rate": 7.084309951983834e-06, + "loss": 1.1112, + "step": 100851 + }, + { + "epoch": 1.21, + "grad_norm": 21.068184659895067, + "learning_rate": 7.083751161575046e-06, + "loss": 0.9598, + "step": 100854 + }, + { + "epoch": 1.21, + "grad_norm": 8.343633385602459, + "learning_rate": 7.083192381118365e-06, + "loss": 1.1912, + "step": 100857 + }, + { + "epoch": 1.21, + "grad_norm": 5.30021996346263, + "learning_rate": 7.082633610615697e-06, + "loss": 1.0999, + "step": 100860 + }, + { + "epoch": 1.21, + "grad_norm": 2.4724526318959152, + "learning_rate": 7.082074850068951e-06, + "loss": 1.5763, + "step": 100863 + }, + { + "epoch": 1.21, + "grad_norm": 20.894323279128834, + "learning_rate": 7.081516099480035e-06, + "loss": 1.11, + "step": 100866 + }, + { + "epoch": 1.21, + "grad_norm": 9.252264299779972, + "learning_rate": 7.080957358850848e-06, + "loss": 1.3072, + "step": 100869 + }, + { + "epoch": 1.21, + "grad_norm": 13.986149712607771, + "learning_rate": 7.080398628183303e-06, + "loss": 0.9689, + "step": 100872 + }, + { + "epoch": 1.21, + "grad_norm": 44.23281203337138, + "learning_rate": 7.07983990747931e-06, + "loss": 1.1718, + "step": 100875 + }, + { + "epoch": 1.21, + "grad_norm": 6.921624355977189, + "learning_rate": 7.07928119674077e-06, + "loss": 1.3769, + "step": 100878 + }, + { + "epoch": 1.21, + "grad_norm": 18.404981328852685, + "learning_rate": 7.078722495969588e-06, + "loss": 1.4246, + "step": 100881 + }, + { + "epoch": 1.21, + "grad_norm": 6.859507537475367, + "learning_rate": 7.0781638051676746e-06, + "loss": 1.2701, + "step": 100884 + }, + { + "epoch": 1.21, + "grad_norm": 16.41886084472994, + "learning_rate": 7.077605124336938e-06, + "loss": 1.1248, + "step": 100887 + }, + { + "epoch": 1.21, + "grad_norm": 10.746292527140845, + "learning_rate": 7.0770464534792834e-06, + "loss": 1.1195, + "step": 100890 + }, + { + "epoch": 1.21, + "grad_norm": 5.701860813600944, + "learning_rate": 7.076487792596613e-06, + "loss": 1.1546, + "step": 100893 + }, + { + "epoch": 1.21, + "grad_norm": 6.618481083386417, + "learning_rate": 7.075929141690838e-06, + "loss": 1.1144, + "step": 100896 + }, + { + "epoch": 1.21, + "grad_norm": 16.94320814949837, + "learning_rate": 7.0753705007638655e-06, + "loss": 1.2357, + "step": 100899 + }, + { + "epoch": 1.21, + "grad_norm": 6.098284422301099, + "learning_rate": 7.0748118698176004e-06, + "loss": 1.3165, + "step": 100902 + }, + { + "epoch": 1.21, + "grad_norm": 16.865094705479983, + "learning_rate": 7.074253248853945e-06, + "loss": 1.7596, + "step": 100905 + }, + { + "epoch": 1.21, + "grad_norm": 12.17238112906283, + "learning_rate": 7.07369463787481e-06, + "loss": 1.5978, + "step": 100908 + }, + { + "epoch": 1.21, + "grad_norm": 5.037465676606441, + "learning_rate": 7.073136036882103e-06, + "loss": 1.2663, + "step": 100911 + }, + { + "epoch": 1.21, + "grad_norm": 21.621201684541948, + "learning_rate": 7.072577445877726e-06, + "loss": 1.2894, + "step": 100914 + }, + { + "epoch": 1.21, + "grad_norm": 10.22109055817799, + "learning_rate": 7.0720188648635925e-06, + "loss": 1.5243, + "step": 100917 + }, + { + "epoch": 1.21, + "grad_norm": 10.732274477523093, + "learning_rate": 7.071460293841601e-06, + "loss": 1.2957, + "step": 100920 + }, + { + "epoch": 1.21, + "grad_norm": 41.507974400297826, + "learning_rate": 7.07090173281366e-06, + "loss": 0.8368, + "step": 100923 + }, + { + "epoch": 1.21, + "grad_norm": 12.751145291857394, + "learning_rate": 7.070343181781676e-06, + "loss": 1.0914, + "step": 100926 + }, + { + "epoch": 1.21, + "grad_norm": 2.535940792319476, + "learning_rate": 7.069784640747559e-06, + "loss": 0.9802, + "step": 100929 + }, + { + "epoch": 1.21, + "grad_norm": 23.670854430235057, + "learning_rate": 7.06922610971321e-06, + "loss": 1.3364, + "step": 100932 + }, + { + "epoch": 1.21, + "grad_norm": 10.754682182815609, + "learning_rate": 7.068667588680534e-06, + "loss": 1.1084, + "step": 100935 + }, + { + "epoch": 1.21, + "grad_norm": 6.376450990109269, + "learning_rate": 7.068109077651439e-06, + "loss": 0.7533, + "step": 100938 + }, + { + "epoch": 1.21, + "grad_norm": 8.988142325688905, + "learning_rate": 7.067550576627835e-06, + "loss": 1.399, + "step": 100941 + }, + { + "epoch": 1.21, + "grad_norm": 4.537841106796412, + "learning_rate": 7.066992085611626e-06, + "loss": 1.2539, + "step": 100944 + }, + { + "epoch": 1.21, + "grad_norm": 39.06959442073717, + "learning_rate": 7.066433604604712e-06, + "loss": 1.1196, + "step": 100947 + }, + { + "epoch": 1.21, + "grad_norm": 11.366261266644575, + "learning_rate": 7.065875133609005e-06, + "loss": 1.3327, + "step": 100950 + }, + { + "epoch": 1.21, + "grad_norm": 5.88752510056263, + "learning_rate": 7.06531667262641e-06, + "loss": 1.0862, + "step": 100953 + }, + { + "epoch": 1.21, + "grad_norm": 15.265884113715813, + "learning_rate": 7.064758221658835e-06, + "loss": 1.1448, + "step": 100956 + }, + { + "epoch": 1.21, + "grad_norm": 11.394183741920669, + "learning_rate": 7.0641997807081785e-06, + "loss": 0.9443, + "step": 100959 + }, + { + "epoch": 1.21, + "grad_norm": 13.712237053354846, + "learning_rate": 7.0636413497763535e-06, + "loss": 0.9996, + "step": 100962 + }, + { + "epoch": 1.21, + "grad_norm": 8.134918448667467, + "learning_rate": 7.0630829288652595e-06, + "loss": 1.4013, + "step": 100965 + }, + { + "epoch": 1.21, + "grad_norm": 17.89228830253378, + "learning_rate": 7.062524517976806e-06, + "loss": 0.9807, + "step": 100968 + }, + { + "epoch": 1.21, + "grad_norm": 22.377328320195044, + "learning_rate": 7.061966117112903e-06, + "loss": 1.1547, + "step": 100971 + }, + { + "epoch": 1.21, + "grad_norm": 11.887983357004629, + "learning_rate": 7.0614077262754485e-06, + "loss": 1.3165, + "step": 100974 + }, + { + "epoch": 1.21, + "grad_norm": 37.76164601482668, + "learning_rate": 7.060849345466349e-06, + "loss": 1.2876, + "step": 100977 + }, + { + "epoch": 1.21, + "grad_norm": 15.497258309913216, + "learning_rate": 7.060290974687512e-06, + "loss": 1.234, + "step": 100980 + }, + { + "epoch": 1.21, + "grad_norm": 22.215262254264786, + "learning_rate": 7.059732613940849e-06, + "loss": 1.4779, + "step": 100983 + }, + { + "epoch": 1.21, + "grad_norm": 17.579519511859854, + "learning_rate": 7.059174263228255e-06, + "loss": 1.303, + "step": 100986 + }, + { + "epoch": 1.21, + "grad_norm": 10.117528203015084, + "learning_rate": 7.058615922551638e-06, + "loss": 1.2249, + "step": 100989 + }, + { + "epoch": 1.21, + "grad_norm": 33.47556746414955, + "learning_rate": 7.058057591912906e-06, + "loss": 1.1405, + "step": 100992 + }, + { + "epoch": 1.21, + "grad_norm": 11.577227709887955, + "learning_rate": 7.057499271313966e-06, + "loss": 1.3812, + "step": 100995 + }, + { + "epoch": 1.21, + "grad_norm": 11.028114986576067, + "learning_rate": 7.0569409607567245e-06, + "loss": 1.0692, + "step": 100998 + }, + { + "epoch": 1.21, + "grad_norm": 3.49725662211796, + "learning_rate": 7.056382660243077e-06, + "loss": 1.1858, + "step": 101001 + }, + { + "epoch": 1.21, + "grad_norm": 2.49279259160925, + "learning_rate": 7.055824369774937e-06, + "loss": 1.1355, + "step": 101004 + }, + { + "epoch": 1.21, + "grad_norm": 11.839133979418893, + "learning_rate": 7.055266089354208e-06, + "loss": 1.4678, + "step": 101007 + }, + { + "epoch": 1.21, + "grad_norm": 17.616680377641273, + "learning_rate": 7.054707818982798e-06, + "loss": 1.4908, + "step": 101010 + }, + { + "epoch": 1.21, + "grad_norm": 10.094012505382848, + "learning_rate": 7.054149558662606e-06, + "loss": 1.1615, + "step": 101013 + }, + { + "epoch": 1.21, + "grad_norm": 1.6953253401324966, + "learning_rate": 7.053591308395542e-06, + "loss": 1.0305, + "step": 101016 + }, + { + "epoch": 1.21, + "grad_norm": 13.608004466515764, + "learning_rate": 7.0530330681835065e-06, + "loss": 1.3666, + "step": 101019 + }, + { + "epoch": 1.21, + "grad_norm": 15.096793446298507, + "learning_rate": 7.052474838028409e-06, + "loss": 1.1172, + "step": 101022 + }, + { + "epoch": 1.21, + "grad_norm": 18.206622071475227, + "learning_rate": 7.051916617932157e-06, + "loss": 1.1697, + "step": 101025 + }, + { + "epoch": 1.21, + "grad_norm": 4.97682550839387, + "learning_rate": 7.051358407896649e-06, + "loss": 0.9142, + "step": 101028 + }, + { + "epoch": 1.21, + "grad_norm": 6.696163860813569, + "learning_rate": 7.050800207923791e-06, + "loss": 1.3709, + "step": 101031 + }, + { + "epoch": 1.21, + "grad_norm": 6.475121820591005, + "learning_rate": 7.050242018015491e-06, + "loss": 0.9962, + "step": 101034 + }, + { + "epoch": 1.21, + "grad_norm": 8.32890121919173, + "learning_rate": 7.049683838173656e-06, + "loss": 1.2295, + "step": 101037 + }, + { + "epoch": 1.21, + "grad_norm": 2.760612860856553, + "learning_rate": 7.0491256684001844e-06, + "loss": 1.3157, + "step": 101040 + }, + { + "epoch": 1.22, + "grad_norm": 4.411263002557661, + "learning_rate": 7.048567508696982e-06, + "loss": 0.8227, + "step": 101043 + }, + { + "epoch": 1.22, + "grad_norm": 11.623561756042792, + "learning_rate": 7.0480093590659575e-06, + "loss": 1.4985, + "step": 101046 + }, + { + "epoch": 1.22, + "grad_norm": 12.316698681574836, + "learning_rate": 7.047451219509012e-06, + "loss": 1.1887, + "step": 101049 + }, + { + "epoch": 1.22, + "grad_norm": 39.780042896350196, + "learning_rate": 7.046893090028057e-06, + "loss": 1.2074, + "step": 101052 + }, + { + "epoch": 1.22, + "grad_norm": 6.68382136667997, + "learning_rate": 7.046334970624986e-06, + "loss": 1.1647, + "step": 101055 + }, + { + "epoch": 1.22, + "grad_norm": 16.6599708554432, + "learning_rate": 7.045776861301714e-06, + "loss": 1.5749, + "step": 101058 + }, + { + "epoch": 1.22, + "grad_norm": 24.977765475832154, + "learning_rate": 7.045218762060138e-06, + "loss": 1.3096, + "step": 101061 + }, + { + "epoch": 1.22, + "grad_norm": 90.08127449383836, + "learning_rate": 7.044660672902171e-06, + "loss": 1.3411, + "step": 101064 + }, + { + "epoch": 1.22, + "grad_norm": 8.573192622296414, + "learning_rate": 7.044102593829708e-06, + "loss": 1.1868, + "step": 101067 + }, + { + "epoch": 1.22, + "grad_norm": 8.254761157946959, + "learning_rate": 7.043544524844661e-06, + "loss": 1.4024, + "step": 101070 + }, + { + "epoch": 1.22, + "grad_norm": 12.725013352421064, + "learning_rate": 7.042986465948929e-06, + "loss": 1.4001, + "step": 101073 + }, + { + "epoch": 1.22, + "grad_norm": 36.06666452418038, + "learning_rate": 7.042428417144418e-06, + "loss": 1.3807, + "step": 101076 + }, + { + "epoch": 1.22, + "grad_norm": 19.02983674169937, + "learning_rate": 7.04187037843304e-06, + "loss": 1.0568, + "step": 101079 + }, + { + "epoch": 1.22, + "grad_norm": 31.753525960755493, + "learning_rate": 7.041312349816689e-06, + "loss": 1.1722, + "step": 101082 + }, + { + "epoch": 1.22, + "grad_norm": 4.614083736631049, + "learning_rate": 7.040754331297271e-06, + "loss": 1.2693, + "step": 101085 + }, + { + "epoch": 1.22, + "grad_norm": 3.0777790152885944, + "learning_rate": 7.0401963228766934e-06, + "loss": 1.0497, + "step": 101088 + }, + { + "epoch": 1.22, + "grad_norm": 4.844479291470774, + "learning_rate": 7.039638324556863e-06, + "loss": 1.5436, + "step": 101091 + }, + { + "epoch": 1.22, + "grad_norm": 3.0602092786211945, + "learning_rate": 7.039080336339681e-06, + "loss": 1.3785, + "step": 101094 + }, + { + "epoch": 1.22, + "grad_norm": 5.709403172906268, + "learning_rate": 7.038522358227047e-06, + "loss": 1.3225, + "step": 101097 + }, + { + "epoch": 1.22, + "grad_norm": 15.242207727303397, + "learning_rate": 7.0379643902208715e-06, + "loss": 1.0708, + "step": 101100 + }, + { + "epoch": 1.22, + "grad_norm": 17.523418134204817, + "learning_rate": 7.037406432323055e-06, + "loss": 1.0649, + "step": 101103 + }, + { + "epoch": 1.22, + "grad_norm": 3.259572513205224, + "learning_rate": 7.036848484535508e-06, + "loss": 1.2692, + "step": 101106 + }, + { + "epoch": 1.22, + "grad_norm": 6.69195098844697, + "learning_rate": 7.036290546860125e-06, + "loss": 1.1596, + "step": 101109 + }, + { + "epoch": 1.22, + "grad_norm": 9.076217958672704, + "learning_rate": 7.035732619298816e-06, + "loss": 0.8464, + "step": 101112 + }, + { + "epoch": 1.22, + "grad_norm": 4.161375012314767, + "learning_rate": 7.0351747018534836e-06, + "loss": 1.6713, + "step": 101115 + }, + { + "epoch": 1.22, + "grad_norm": 18.49206475901445, + "learning_rate": 7.034616794526035e-06, + "loss": 0.7605, + "step": 101118 + }, + { + "epoch": 1.22, + "grad_norm": 10.751896559738734, + "learning_rate": 7.034058897318368e-06, + "loss": 0.978, + "step": 101121 + }, + { + "epoch": 1.22, + "grad_norm": 6.50184467893146, + "learning_rate": 7.033501010232391e-06, + "loss": 0.9143, + "step": 101124 + }, + { + "epoch": 1.22, + "grad_norm": 5.968233822053769, + "learning_rate": 7.032943133270004e-06, + "loss": 1.1173, + "step": 101127 + }, + { + "epoch": 1.22, + "grad_norm": 24.67446969261684, + "learning_rate": 7.032385266433114e-06, + "loss": 1.1871, + "step": 101130 + }, + { + "epoch": 1.22, + "grad_norm": 2.273151577389946, + "learning_rate": 7.031827409723629e-06, + "loss": 0.9579, + "step": 101133 + }, + { + "epoch": 1.22, + "grad_norm": 42.502277300006234, + "learning_rate": 7.031269563143444e-06, + "loss": 1.4458, + "step": 101136 + }, + { + "epoch": 1.22, + "grad_norm": 21.78351670416569, + "learning_rate": 7.030711726694466e-06, + "loss": 1.0653, + "step": 101139 + }, + { + "epoch": 1.22, + "grad_norm": 10.633090388673077, + "learning_rate": 7.0301539003786e-06, + "loss": 1.1564, + "step": 101142 + }, + { + "epoch": 1.22, + "grad_norm": 7.3978338988440555, + "learning_rate": 7.029596084197751e-06, + "loss": 1.32, + "step": 101145 + }, + { + "epoch": 1.22, + "grad_norm": 24.063594803165202, + "learning_rate": 7.029038278153819e-06, + "loss": 1.1123, + "step": 101148 + }, + { + "epoch": 1.22, + "grad_norm": 7.14460436094733, + "learning_rate": 7.028480482248707e-06, + "loss": 1.0989, + "step": 101151 + }, + { + "epoch": 1.22, + "grad_norm": 4.360106147624355, + "learning_rate": 7.0279226964843235e-06, + "loss": 1.2045, + "step": 101154 + }, + { + "epoch": 1.22, + "grad_norm": 4.09669947554003, + "learning_rate": 7.027364920862568e-06, + "loss": 1.1005, + "step": 101157 + }, + { + "epoch": 1.22, + "grad_norm": 8.346996240620232, + "learning_rate": 7.026807155385349e-06, + "loss": 1.2846, + "step": 101160 + }, + { + "epoch": 1.22, + "grad_norm": 27.81758328133361, + "learning_rate": 7.026249400054561e-06, + "loss": 1.2659, + "step": 101163 + }, + { + "epoch": 1.22, + "grad_norm": 4.5294611458613145, + "learning_rate": 7.025691654872117e-06, + "loss": 1.3783, + "step": 101166 + }, + { + "epoch": 1.22, + "grad_norm": 6.707527453401214, + "learning_rate": 7.0251339198399126e-06, + "loss": 1.1278, + "step": 101169 + }, + { + "epoch": 1.22, + "grad_norm": 5.627325888784953, + "learning_rate": 7.024576194959858e-06, + "loss": 1.3758, + "step": 101172 + }, + { + "epoch": 1.22, + "grad_norm": 14.67025643178988, + "learning_rate": 7.02401848023385e-06, + "loss": 1.4557, + "step": 101175 + }, + { + "epoch": 1.22, + "grad_norm": 17.2176858330096, + "learning_rate": 7.023460775663797e-06, + "loss": 1.4822, + "step": 101178 + }, + { + "epoch": 1.22, + "grad_norm": 6.966953529737707, + "learning_rate": 7.022903081251597e-06, + "loss": 1.2558, + "step": 101181 + }, + { + "epoch": 1.22, + "grad_norm": 5.784078170246296, + "learning_rate": 7.02234539699916e-06, + "loss": 1.1853, + "step": 101184 + }, + { + "epoch": 1.22, + "grad_norm": 13.072441984547524, + "learning_rate": 7.021787722908384e-06, + "loss": 1.3012, + "step": 101187 + }, + { + "epoch": 1.22, + "grad_norm": 9.114737982717742, + "learning_rate": 7.021230058981175e-06, + "loss": 1.4236, + "step": 101190 + }, + { + "epoch": 1.22, + "grad_norm": 6.97017275432692, + "learning_rate": 7.020672405219433e-06, + "loss": 1.1165, + "step": 101193 + }, + { + "epoch": 1.22, + "grad_norm": 3.523967102831234, + "learning_rate": 7.020114761625065e-06, + "loss": 1.1512, + "step": 101196 + }, + { + "epoch": 1.22, + "grad_norm": 8.792889067874109, + "learning_rate": 7.019557128199974e-06, + "loss": 1.0832, + "step": 101199 + }, + { + "epoch": 1.22, + "grad_norm": 24.540257670711984, + "learning_rate": 7.018999504946056e-06, + "loss": 1.4362, + "step": 101202 + }, + { + "epoch": 1.22, + "grad_norm": 10.644610180138882, + "learning_rate": 7.01844189186522e-06, + "loss": 1.2188, + "step": 101205 + }, + { + "epoch": 1.22, + "grad_norm": 5.46147099384169, + "learning_rate": 7.017884288959369e-06, + "loss": 1.2522, + "step": 101208 + }, + { + "epoch": 1.22, + "grad_norm": 15.298012474934643, + "learning_rate": 7.017326696230402e-06, + "loss": 1.4746, + "step": 101211 + }, + { + "epoch": 1.22, + "grad_norm": 18.52845600939016, + "learning_rate": 7.0167691136802305e-06, + "loss": 1.2764, + "step": 101214 + }, + { + "epoch": 1.22, + "grad_norm": 10.805215720169905, + "learning_rate": 7.016211541310747e-06, + "loss": 1.223, + "step": 101217 + }, + { + "epoch": 1.22, + "grad_norm": 8.610368464709726, + "learning_rate": 7.0156539791238596e-06, + "loss": 1.0488, + "step": 101220 + }, + { + "epoch": 1.22, + "grad_norm": 9.872949600126837, + "learning_rate": 7.015096427121469e-06, + "loss": 1.2212, + "step": 101223 + }, + { + "epoch": 1.22, + "grad_norm": 9.48755337404208, + "learning_rate": 7.014538885305484e-06, + "loss": 1.4409, + "step": 101226 + }, + { + "epoch": 1.22, + "grad_norm": 6.502286963413701, + "learning_rate": 7.013981353677797e-06, + "loss": 0.7005, + "step": 101229 + }, + { + "epoch": 1.22, + "grad_norm": 6.6399713388147195, + "learning_rate": 7.013423832240318e-06, + "loss": 1.3049, + "step": 101232 + }, + { + "epoch": 1.22, + "grad_norm": 13.18236965741476, + "learning_rate": 7.012866320994947e-06, + "loss": 1.5628, + "step": 101235 + }, + { + "epoch": 1.22, + "grad_norm": 3.2557024480342496, + "learning_rate": 7.0123088199435884e-06, + "loss": 0.9741, + "step": 101238 + }, + { + "epoch": 1.22, + "grad_norm": 11.54980969880763, + "learning_rate": 7.011751329088146e-06, + "loss": 1.1873, + "step": 101241 + }, + { + "epoch": 1.22, + "grad_norm": 18.968593909975798, + "learning_rate": 7.011193848430515e-06, + "loss": 0.9614, + "step": 101244 + }, + { + "epoch": 1.22, + "grad_norm": 19.66249742025671, + "learning_rate": 7.010636377972604e-06, + "loss": 1.2376, + "step": 101247 + }, + { + "epoch": 1.22, + "grad_norm": 5.754536587359656, + "learning_rate": 7.010078917716317e-06, + "loss": 1.1563, + "step": 101250 + }, + { + "epoch": 1.22, + "grad_norm": 8.532673863277665, + "learning_rate": 7.009521467663553e-06, + "loss": 0.9817, + "step": 101253 + }, + { + "epoch": 1.22, + "grad_norm": 4.433322533489429, + "learning_rate": 7.008964027816214e-06, + "loss": 1.136, + "step": 101256 + }, + { + "epoch": 1.22, + "grad_norm": 3.7010585279819157, + "learning_rate": 7.008406598176202e-06, + "loss": 0.9427, + "step": 101259 + }, + { + "epoch": 1.22, + "grad_norm": 7.6777765921222345, + "learning_rate": 7.007849178745422e-06, + "loss": 0.8988, + "step": 101262 + }, + { + "epoch": 1.22, + "grad_norm": 7.146671795856572, + "learning_rate": 7.0072917695257744e-06, + "loss": 1.1744, + "step": 101265 + }, + { + "epoch": 1.22, + "grad_norm": 17.16175162401076, + "learning_rate": 7.006734370519166e-06, + "loss": 1.1414, + "step": 101268 + }, + { + "epoch": 1.22, + "grad_norm": 12.184648434687015, + "learning_rate": 7.0061769817274905e-06, + "loss": 1.0024, + "step": 101271 + }, + { + "epoch": 1.22, + "grad_norm": 6.539367510874448, + "learning_rate": 7.005619603152656e-06, + "loss": 1.1922, + "step": 101274 + }, + { + "epoch": 1.22, + "grad_norm": 6.823720158744518, + "learning_rate": 7.005062234796562e-06, + "loss": 1.3367, + "step": 101277 + }, + { + "epoch": 1.22, + "grad_norm": 5.295090870804091, + "learning_rate": 7.004504876661116e-06, + "loss": 1.4746, + "step": 101280 + }, + { + "epoch": 1.22, + "grad_norm": 3.2136781118980964, + "learning_rate": 7.003947528748213e-06, + "loss": 1.1082, + "step": 101283 + }, + { + "epoch": 1.22, + "grad_norm": 14.290564982428785, + "learning_rate": 7.003390191059756e-06, + "loss": 1.3104, + "step": 101286 + }, + { + "epoch": 1.22, + "grad_norm": 10.55544649533929, + "learning_rate": 7.002832863597649e-06, + "loss": 1.1632, + "step": 101289 + }, + { + "epoch": 1.22, + "grad_norm": 7.259406950822022, + "learning_rate": 7.002275546363797e-06, + "loss": 1.2052, + "step": 101292 + }, + { + "epoch": 1.22, + "grad_norm": 2.750812828919261, + "learning_rate": 7.0017182393601e-06, + "loss": 1.4342, + "step": 101295 + }, + { + "epoch": 1.22, + "grad_norm": 47.18733144716303, + "learning_rate": 7.001160942588454e-06, + "loss": 1.2867, + "step": 101298 + }, + { + "epoch": 1.22, + "grad_norm": 4.106135809171496, + "learning_rate": 7.000603656050766e-06, + "loss": 0.751, + "step": 101301 + }, + { + "epoch": 1.22, + "grad_norm": 10.287412489270954, + "learning_rate": 7.0000463797489394e-06, + "loss": 1.1576, + "step": 101304 + }, + { + "epoch": 1.22, + "grad_norm": 10.02461986191503, + "learning_rate": 6.999489113684876e-06, + "loss": 1.1571, + "step": 101307 + }, + { + "epoch": 1.22, + "grad_norm": 10.344962411236526, + "learning_rate": 6.998931857860472e-06, + "loss": 1.3366, + "step": 101310 + }, + { + "epoch": 1.22, + "grad_norm": 4.1532725941917645, + "learning_rate": 6.9983746122776316e-06, + "loss": 0.9382, + "step": 101313 + }, + { + "epoch": 1.22, + "grad_norm": 6.562215873545907, + "learning_rate": 6.997817376938261e-06, + "loss": 1.065, + "step": 101316 + }, + { + "epoch": 1.22, + "grad_norm": 15.339007667366184, + "learning_rate": 6.997260151844254e-06, + "loss": 1.1569, + "step": 101319 + }, + { + "epoch": 1.22, + "grad_norm": 12.261275248777144, + "learning_rate": 6.996702936997522e-06, + "loss": 0.7831, + "step": 101322 + }, + { + "epoch": 1.22, + "grad_norm": 3.558431797484096, + "learning_rate": 6.996145732399957e-06, + "loss": 1.5295, + "step": 101325 + }, + { + "epoch": 1.22, + "grad_norm": 18.604554803526568, + "learning_rate": 6.995588538053466e-06, + "loss": 1.0001, + "step": 101328 + }, + { + "epoch": 1.22, + "grad_norm": 10.234293827064983, + "learning_rate": 6.995031353959948e-06, + "loss": 1.5935, + "step": 101331 + }, + { + "epoch": 1.22, + "grad_norm": 9.269584095406318, + "learning_rate": 6.994474180121309e-06, + "loss": 1.1519, + "step": 101334 + }, + { + "epoch": 1.22, + "grad_norm": 5.550854005532001, + "learning_rate": 6.993917016539446e-06, + "loss": 1.1159, + "step": 101337 + }, + { + "epoch": 1.22, + "grad_norm": 5.12295791597999, + "learning_rate": 6.993359863216258e-06, + "loss": 1.3041, + "step": 101340 + }, + { + "epoch": 1.22, + "grad_norm": 3.115955776028865, + "learning_rate": 6.992802720153651e-06, + "loss": 1.2447, + "step": 101343 + }, + { + "epoch": 1.22, + "grad_norm": 6.077345790335601, + "learning_rate": 6.992245587353525e-06, + "loss": 1.2364, + "step": 101346 + }, + { + "epoch": 1.22, + "grad_norm": 9.22776911072096, + "learning_rate": 6.991688464817787e-06, + "loss": 1.1022, + "step": 101349 + }, + { + "epoch": 1.22, + "grad_norm": 26.092422341929538, + "learning_rate": 6.991131352548326e-06, + "loss": 1.1096, + "step": 101352 + }, + { + "epoch": 1.22, + "grad_norm": 6.168952631479733, + "learning_rate": 6.990574250547051e-06, + "loss": 1.2079, + "step": 101355 + }, + { + "epoch": 1.22, + "grad_norm": 8.036356009445404, + "learning_rate": 6.990017158815863e-06, + "loss": 1.1844, + "step": 101358 + }, + { + "epoch": 1.22, + "grad_norm": 28.289856343911538, + "learning_rate": 6.989460077356666e-06, + "loss": 1.41, + "step": 101361 + }, + { + "epoch": 1.22, + "grad_norm": 16.269637431570967, + "learning_rate": 6.9889030061713524e-06, + "loss": 1.2044, + "step": 101364 + }, + { + "epoch": 1.22, + "grad_norm": 63.25142281439878, + "learning_rate": 6.988345945261828e-06, + "loss": 1.1302, + "step": 101367 + }, + { + "epoch": 1.22, + "grad_norm": 2.5904192156774553, + "learning_rate": 6.987788894629997e-06, + "loss": 1.3295, + "step": 101370 + }, + { + "epoch": 1.22, + "grad_norm": 3.5831615627847393, + "learning_rate": 6.987231854277754e-06, + "loss": 1.3823, + "step": 101373 + }, + { + "epoch": 1.22, + "grad_norm": 5.373127884290406, + "learning_rate": 6.986674824207008e-06, + "loss": 1.3273, + "step": 101376 + }, + { + "epoch": 1.22, + "grad_norm": 19.10669594358548, + "learning_rate": 6.9861178044196545e-06, + "loss": 1.1743, + "step": 101379 + }, + { + "epoch": 1.22, + "grad_norm": 5.601062235216746, + "learning_rate": 6.985560794917592e-06, + "loss": 1.3941, + "step": 101382 + }, + { + "epoch": 1.22, + "grad_norm": 11.001853383044644, + "learning_rate": 6.985003795702725e-06, + "loss": 1.3024, + "step": 101385 + }, + { + "epoch": 1.22, + "grad_norm": 11.25080343069378, + "learning_rate": 6.984446806776959e-06, + "loss": 0.9081, + "step": 101388 + }, + { + "epoch": 1.22, + "grad_norm": 5.392957818220105, + "learning_rate": 6.9838898281421865e-06, + "loss": 1.2625, + "step": 101391 + }, + { + "epoch": 1.22, + "grad_norm": 11.272455379336305, + "learning_rate": 6.98333285980031e-06, + "loss": 1.6138, + "step": 101394 + }, + { + "epoch": 1.22, + "grad_norm": 8.861271315349859, + "learning_rate": 6.982775901753232e-06, + "loss": 1.0257, + "step": 101397 + }, + { + "epoch": 1.22, + "grad_norm": 9.260666310264197, + "learning_rate": 6.982218954002855e-06, + "loss": 1.1453, + "step": 101400 + }, + { + "epoch": 1.22, + "grad_norm": 9.132030602926337, + "learning_rate": 6.98166201655108e-06, + "loss": 1.0861, + "step": 101403 + }, + { + "epoch": 1.22, + "grad_norm": 8.5603929693892, + "learning_rate": 6.981105089399802e-06, + "loss": 1.1775, + "step": 101406 + }, + { + "epoch": 1.22, + "grad_norm": 2.6396086504703478, + "learning_rate": 6.980548172550923e-06, + "loss": 1.3267, + "step": 101409 + }, + { + "epoch": 1.22, + "grad_norm": 9.144865951564803, + "learning_rate": 6.97999126600635e-06, + "loss": 1.1961, + "step": 101412 + }, + { + "epoch": 1.22, + "grad_norm": 4.734346499752536, + "learning_rate": 6.979434369767979e-06, + "loss": 1.0835, + "step": 101415 + }, + { + "epoch": 1.22, + "grad_norm": 8.594754843619768, + "learning_rate": 6.978877483837707e-06, + "loss": 1.3104, + "step": 101418 + }, + { + "epoch": 1.22, + "grad_norm": 7.791904309961366, + "learning_rate": 6.97832060821744e-06, + "loss": 1.1365, + "step": 101421 + }, + { + "epoch": 1.22, + "grad_norm": 37.03492794213102, + "learning_rate": 6.977763742909074e-06, + "loss": 1.2775, + "step": 101424 + }, + { + "epoch": 1.22, + "grad_norm": 5.832737120702172, + "learning_rate": 6.977206887914513e-06, + "loss": 1.0065, + "step": 101427 + }, + { + "epoch": 1.22, + "grad_norm": 15.215017814883943, + "learning_rate": 6.97665004323566e-06, + "loss": 0.9033, + "step": 101430 + }, + { + "epoch": 1.22, + "grad_norm": 8.122256938406963, + "learning_rate": 6.976093208874408e-06, + "loss": 1.1842, + "step": 101433 + }, + { + "epoch": 1.22, + "grad_norm": 6.390623243148509, + "learning_rate": 6.97553638483266e-06, + "loss": 1.1208, + "step": 101436 + }, + { + "epoch": 1.22, + "grad_norm": 12.127850470481382, + "learning_rate": 6.974979571112315e-06, + "loss": 1.168, + "step": 101439 + }, + { + "epoch": 1.22, + "grad_norm": 6.228474143246815, + "learning_rate": 6.974422767715281e-06, + "loss": 1.1071, + "step": 101442 + }, + { + "epoch": 1.22, + "grad_norm": 16.940249252137857, + "learning_rate": 6.97386597464345e-06, + "loss": 1.4318, + "step": 101445 + }, + { + "epoch": 1.22, + "grad_norm": 21.318989336426206, + "learning_rate": 6.97330919189872e-06, + "loss": 1.4046, + "step": 101448 + }, + { + "epoch": 1.22, + "grad_norm": 4.528170636337612, + "learning_rate": 6.972752419482998e-06, + "loss": 1.4659, + "step": 101451 + }, + { + "epoch": 1.22, + "grad_norm": 2.651412196734252, + "learning_rate": 6.972195657398184e-06, + "loss": 1.4412, + "step": 101454 + }, + { + "epoch": 1.22, + "grad_norm": 10.136907855651799, + "learning_rate": 6.971638905646176e-06, + "loss": 1.25, + "step": 101457 + }, + { + "epoch": 1.22, + "grad_norm": 8.836111887165305, + "learning_rate": 6.971082164228871e-06, + "loss": 1.0596, + "step": 101460 + }, + { + "epoch": 1.22, + "grad_norm": 6.579249488477518, + "learning_rate": 6.970525433148173e-06, + "loss": 1.4213, + "step": 101463 + }, + { + "epoch": 1.22, + "grad_norm": 7.836324838763254, + "learning_rate": 6.969968712405979e-06, + "loss": 1.0629, + "step": 101466 + }, + { + "epoch": 1.22, + "grad_norm": 14.05399414801391, + "learning_rate": 6.969412002004194e-06, + "loss": 1.2928, + "step": 101469 + }, + { + "epoch": 1.22, + "grad_norm": 16.499922815216845, + "learning_rate": 6.96885530194471e-06, + "loss": 1.2595, + "step": 101472 + }, + { + "epoch": 1.22, + "grad_norm": 2.879417802400088, + "learning_rate": 6.968298612229433e-06, + "loss": 1.351, + "step": 101475 + }, + { + "epoch": 1.22, + "grad_norm": 9.287507871087396, + "learning_rate": 6.967741932860259e-06, + "loss": 1.3117, + "step": 101478 + }, + { + "epoch": 1.22, + "grad_norm": 7.877844855862109, + "learning_rate": 6.967185263839089e-06, + "loss": 0.7048, + "step": 101481 + }, + { + "epoch": 1.22, + "grad_norm": 11.647920885156381, + "learning_rate": 6.9666286051678275e-06, + "loss": 1.3418, + "step": 101484 + }, + { + "epoch": 1.22, + "grad_norm": 24.357092753701323, + "learning_rate": 6.966071956848368e-06, + "loss": 1.3009, + "step": 101487 + }, + { + "epoch": 1.22, + "grad_norm": 9.236338109125033, + "learning_rate": 6.96551531888261e-06, + "loss": 1.2435, + "step": 101490 + }, + { + "epoch": 1.22, + "grad_norm": 25.55613371796078, + "learning_rate": 6.964958691272456e-06, + "loss": 1.2218, + "step": 101493 + }, + { + "epoch": 1.22, + "grad_norm": 4.595523795641649, + "learning_rate": 6.964402074019809e-06, + "loss": 1.2908, + "step": 101496 + }, + { + "epoch": 1.22, + "grad_norm": 4.327942167461801, + "learning_rate": 6.963845467126561e-06, + "loss": 1.3401, + "step": 101499 + }, + { + "epoch": 1.22, + "grad_norm": 9.886264905466616, + "learning_rate": 6.963288870594613e-06, + "loss": 1.2426, + "step": 101502 + }, + { + "epoch": 1.22, + "grad_norm": 20.637695625191324, + "learning_rate": 6.962732284425867e-06, + "loss": 1.1759, + "step": 101505 + }, + { + "epoch": 1.22, + "grad_norm": 17.621384887401078, + "learning_rate": 6.962175708622223e-06, + "loss": 1.1014, + "step": 101508 + }, + { + "epoch": 1.22, + "grad_norm": 7.034134721783511, + "learning_rate": 6.961619143185582e-06, + "loss": 1.3962, + "step": 101511 + }, + { + "epoch": 1.22, + "grad_norm": 8.4601461408264, + "learning_rate": 6.961062588117835e-06, + "loss": 1.269, + "step": 101514 + }, + { + "epoch": 1.22, + "grad_norm": 15.12739337043541, + "learning_rate": 6.96050604342089e-06, + "loss": 1.2304, + "step": 101517 + }, + { + "epoch": 1.22, + "grad_norm": 7.038775098466598, + "learning_rate": 6.9599495090966405e-06, + "loss": 1.1051, + "step": 101520 + }, + { + "epoch": 1.22, + "grad_norm": 4.557229430859457, + "learning_rate": 6.959392985146993e-06, + "loss": 1.235, + "step": 101523 + }, + { + "epoch": 1.22, + "grad_norm": 11.382983325252011, + "learning_rate": 6.958836471573838e-06, + "loss": 0.9316, + "step": 101526 + }, + { + "epoch": 1.22, + "grad_norm": 17.411238453804476, + "learning_rate": 6.95827996837908e-06, + "loss": 1.3825, + "step": 101529 + }, + { + "epoch": 1.22, + "grad_norm": 9.570026085919302, + "learning_rate": 6.957723475564615e-06, + "loss": 1.3334, + "step": 101532 + }, + { + "epoch": 1.22, + "grad_norm": 6.795838128783869, + "learning_rate": 6.957166993132344e-06, + "loss": 1.3881, + "step": 101535 + }, + { + "epoch": 1.22, + "grad_norm": 6.531325538135528, + "learning_rate": 6.9566105210841705e-06, + "loss": 1.1448, + "step": 101538 + }, + { + "epoch": 1.22, + "grad_norm": 3.460276091828411, + "learning_rate": 6.956054059421986e-06, + "loss": 1.0603, + "step": 101541 + }, + { + "epoch": 1.22, + "grad_norm": 22.734669402149596, + "learning_rate": 6.955497608147691e-06, + "loss": 0.9889, + "step": 101544 + }, + { + "epoch": 1.22, + "grad_norm": 3.869923046997723, + "learning_rate": 6.954941167263186e-06, + "loss": 1.1269, + "step": 101547 + }, + { + "epoch": 1.22, + "grad_norm": 5.030392573554158, + "learning_rate": 6.954384736770374e-06, + "loss": 1.0137, + "step": 101550 + }, + { + "epoch": 1.22, + "grad_norm": 17.491276063385722, + "learning_rate": 6.953828316671148e-06, + "loss": 1.2337, + "step": 101553 + }, + { + "epoch": 1.22, + "grad_norm": 6.239619950518631, + "learning_rate": 6.953271906967407e-06, + "loss": 0.8986, + "step": 101556 + }, + { + "epoch": 1.22, + "grad_norm": 7.3855308277551925, + "learning_rate": 6.952715507661054e-06, + "loss": 1.1617, + "step": 101559 + }, + { + "epoch": 1.22, + "grad_norm": 8.554967016059743, + "learning_rate": 6.952159118753982e-06, + "loss": 1.0723, + "step": 101562 + }, + { + "epoch": 1.22, + "grad_norm": 7.436820373147705, + "learning_rate": 6.9516027402480975e-06, + "loss": 1.12, + "step": 101565 + }, + { + "epoch": 1.22, + "grad_norm": 4.311148672963685, + "learning_rate": 6.951046372145292e-06, + "loss": 1.2501, + "step": 101568 + }, + { + "epoch": 1.22, + "grad_norm": 23.479735900075084, + "learning_rate": 6.950490014447468e-06, + "loss": 1.0454, + "step": 101571 + }, + { + "epoch": 1.22, + "grad_norm": 9.458154711738157, + "learning_rate": 6.949933667156521e-06, + "loss": 1.1508, + "step": 101574 + }, + { + "epoch": 1.22, + "grad_norm": 11.024480904893881, + "learning_rate": 6.949377330274356e-06, + "loss": 1.2061, + "step": 101577 + }, + { + "epoch": 1.22, + "grad_norm": 10.32059022965385, + "learning_rate": 6.948821003802864e-06, + "loss": 1.2356, + "step": 101580 + }, + { + "epoch": 1.22, + "grad_norm": 20.564439698094723, + "learning_rate": 6.9482646877439486e-06, + "loss": 1.5356, + "step": 101583 + }, + { + "epoch": 1.22, + "grad_norm": 6.36342340202827, + "learning_rate": 6.947708382099503e-06, + "loss": 1.2155, + "step": 101586 + }, + { + "epoch": 1.22, + "grad_norm": 5.183236705449972, + "learning_rate": 6.947152086871431e-06, + "loss": 1.0809, + "step": 101589 + }, + { + "epoch": 1.22, + "grad_norm": 17.076604505994908, + "learning_rate": 6.946595802061633e-06, + "loss": 0.9987, + "step": 101592 + }, + { + "epoch": 1.22, + "grad_norm": 5.204995937813945, + "learning_rate": 6.946039527672001e-06, + "loss": 1.075, + "step": 101595 + }, + { + "epoch": 1.22, + "grad_norm": 9.639689443508551, + "learning_rate": 6.945483263704435e-06, + "loss": 0.9136, + "step": 101598 + }, + { + "epoch": 1.22, + "grad_norm": 14.961175033180261, + "learning_rate": 6.944927010160836e-06, + "loss": 1.1645, + "step": 101601 + }, + { + "epoch": 1.22, + "grad_norm": 4.46267219400727, + "learning_rate": 6.944370767043101e-06, + "loss": 1.0544, + "step": 101604 + }, + { + "epoch": 1.22, + "grad_norm": 10.68748557024998, + "learning_rate": 6.943814534353127e-06, + "loss": 1.3258, + "step": 101607 + }, + { + "epoch": 1.22, + "grad_norm": 9.0497192662945, + "learning_rate": 6.943258312092811e-06, + "loss": 0.9137, + "step": 101610 + }, + { + "epoch": 1.22, + "grad_norm": 5.103310338895252, + "learning_rate": 6.942702100264056e-06, + "loss": 1.1455, + "step": 101613 + }, + { + "epoch": 1.22, + "grad_norm": 10.922896676133211, + "learning_rate": 6.942145898868756e-06, + "loss": 0.8952, + "step": 101616 + }, + { + "epoch": 1.22, + "grad_norm": 13.592500122111229, + "learning_rate": 6.9415897079088145e-06, + "loss": 1.5163, + "step": 101619 + }, + { + "epoch": 1.22, + "grad_norm": 9.192000324998522, + "learning_rate": 6.94103352738612e-06, + "loss": 1.1483, + "step": 101622 + }, + { + "epoch": 1.22, + "grad_norm": 6.942796056470094, + "learning_rate": 6.940477357302582e-06, + "loss": 1.1908, + "step": 101625 + }, + { + "epoch": 1.22, + "grad_norm": 14.867403400821912, + "learning_rate": 6.939921197660087e-06, + "loss": 1.0429, + "step": 101628 + }, + { + "epoch": 1.22, + "grad_norm": 6.777928208327214, + "learning_rate": 6.939365048460545e-06, + "loss": 1.1464, + "step": 101631 + }, + { + "epoch": 1.22, + "grad_norm": 11.387454052831751, + "learning_rate": 6.938808909705842e-06, + "loss": 0.8437, + "step": 101634 + }, + { + "epoch": 1.22, + "grad_norm": 3.9693446903749243, + "learning_rate": 6.938252781397885e-06, + "loss": 1.0822, + "step": 101637 + }, + { + "epoch": 1.22, + "grad_norm": 4.17773453196834, + "learning_rate": 6.937696663538565e-06, + "loss": 1.0246, + "step": 101640 + }, + { + "epoch": 1.22, + "grad_norm": 37.80626186377561, + "learning_rate": 6.937140556129787e-06, + "loss": 1.0667, + "step": 101643 + }, + { + "epoch": 1.22, + "grad_norm": 25.30110837447924, + "learning_rate": 6.936584459173443e-06, + "loss": 1.4071, + "step": 101646 + }, + { + "epoch": 1.22, + "grad_norm": 3.889157852252599, + "learning_rate": 6.936028372671434e-06, + "loss": 1.1605, + "step": 101649 + }, + { + "epoch": 1.22, + "grad_norm": 16.340929845913656, + "learning_rate": 6.9354722966256535e-06, + "loss": 1.3654, + "step": 101652 + }, + { + "epoch": 1.22, + "grad_norm": 5.803750005200916, + "learning_rate": 6.934916231038006e-06, + "loss": 1.2489, + "step": 101655 + }, + { + "epoch": 1.22, + "grad_norm": 5.262240293034435, + "learning_rate": 6.934360175910386e-06, + "loss": 1.2738, + "step": 101658 + }, + { + "epoch": 1.22, + "grad_norm": 7.393996833372951, + "learning_rate": 6.933804131244688e-06, + "loss": 1.2148, + "step": 101661 + }, + { + "epoch": 1.22, + "grad_norm": 5.9550236266965495, + "learning_rate": 6.933248097042811e-06, + "loss": 1.1309, + "step": 101664 + }, + { + "epoch": 1.22, + "grad_norm": 21.843051754489558, + "learning_rate": 6.932692073306655e-06, + "loss": 1.3962, + "step": 101667 + }, + { + "epoch": 1.22, + "grad_norm": 14.736936318857733, + "learning_rate": 6.932136060038115e-06, + "loss": 1.4026, + "step": 101670 + }, + { + "epoch": 1.22, + "grad_norm": 5.455020853208602, + "learning_rate": 6.9315800572390935e-06, + "loss": 0.9937, + "step": 101673 + }, + { + "epoch": 1.22, + "grad_norm": 9.429458482183113, + "learning_rate": 6.931024064911479e-06, + "loss": 0.9183, + "step": 101676 + }, + { + "epoch": 1.22, + "grad_norm": 8.682662317885944, + "learning_rate": 6.930468083057177e-06, + "loss": 1.5326, + "step": 101679 + }, + { + "epoch": 1.22, + "grad_norm": 5.254590133113963, + "learning_rate": 6.929912111678079e-06, + "loss": 1.2883, + "step": 101682 + }, + { + "epoch": 1.22, + "grad_norm": 12.786631043950331, + "learning_rate": 6.92935615077609e-06, + "loss": 1.1817, + "step": 101685 + }, + { + "epoch": 1.22, + "grad_norm": 11.902688270677139, + "learning_rate": 6.928800200353099e-06, + "loss": 0.925, + "step": 101688 + }, + { + "epoch": 1.22, + "grad_norm": 3.8426571637877767, + "learning_rate": 6.928244260411007e-06, + "loss": 1.0051, + "step": 101691 + }, + { + "epoch": 1.22, + "grad_norm": 7.347614783408368, + "learning_rate": 6.92768833095171e-06, + "loss": 0.854, + "step": 101694 + }, + { + "epoch": 1.22, + "grad_norm": 9.29678896058829, + "learning_rate": 6.927132411977107e-06, + "loss": 1.2184, + "step": 101697 + }, + { + "epoch": 1.22, + "grad_norm": 6.2031973149952195, + "learning_rate": 6.926576503489098e-06, + "loss": 1.0336, + "step": 101700 + }, + { + "epoch": 1.22, + "grad_norm": 4.264506340168972, + "learning_rate": 6.926020605489569e-06, + "loss": 1.2929, + "step": 101703 + }, + { + "epoch": 1.22, + "grad_norm": 6.226817339419521, + "learning_rate": 6.925464717980428e-06, + "loss": 0.9675, + "step": 101706 + }, + { + "epoch": 1.22, + "grad_norm": 12.958985261501352, + "learning_rate": 6.924908840963569e-06, + "loss": 1.5786, + "step": 101709 + }, + { + "epoch": 1.22, + "grad_norm": 18.15353513113443, + "learning_rate": 6.924352974440891e-06, + "loss": 1.0154, + "step": 101712 + }, + { + "epoch": 1.22, + "grad_norm": 11.946306465659468, + "learning_rate": 6.923797118414283e-06, + "loss": 1.4176, + "step": 101715 + }, + { + "epoch": 1.22, + "grad_norm": 6.49406737142203, + "learning_rate": 6.92324127288565e-06, + "loss": 0.9239, + "step": 101718 + }, + { + "epoch": 1.22, + "grad_norm": 16.899181732083218, + "learning_rate": 6.922685437856887e-06, + "loss": 1.016, + "step": 101721 + }, + { + "epoch": 1.22, + "grad_norm": 3.5767338649675855, + "learning_rate": 6.922129613329886e-06, + "loss": 1.5543, + "step": 101724 + }, + { + "epoch": 1.22, + "grad_norm": 6.664451960136109, + "learning_rate": 6.921573799306556e-06, + "loss": 1.1812, + "step": 101727 + }, + { + "epoch": 1.22, + "grad_norm": 9.468128237718291, + "learning_rate": 6.921017995788779e-06, + "loss": 1.3639, + "step": 101730 + }, + { + "epoch": 1.22, + "grad_norm": 10.305962679532467, + "learning_rate": 6.920462202778462e-06, + "loss": 1.4572, + "step": 101733 + }, + { + "epoch": 1.22, + "grad_norm": 12.08604075495288, + "learning_rate": 6.919906420277494e-06, + "loss": 1.0992, + "step": 101736 + }, + { + "epoch": 1.22, + "grad_norm": 19.06610196565076, + "learning_rate": 6.919350648287783e-06, + "loss": 1.1587, + "step": 101739 + }, + { + "epoch": 1.22, + "grad_norm": 11.37379738157654, + "learning_rate": 6.918794886811212e-06, + "loss": 1.1108, + "step": 101742 + }, + { + "epoch": 1.22, + "grad_norm": 3.2069885415119894, + "learning_rate": 6.918239135849687e-06, + "loss": 1.5354, + "step": 101745 + }, + { + "epoch": 1.22, + "grad_norm": 23.040299206260364, + "learning_rate": 6.9176833954051e-06, + "loss": 1.415, + "step": 101748 + }, + { + "epoch": 1.22, + "grad_norm": 4.249606811210251, + "learning_rate": 6.917127665479352e-06, + "loss": 1.2797, + "step": 101751 + }, + { + "epoch": 1.22, + "grad_norm": 15.472765198190684, + "learning_rate": 6.916571946074339e-06, + "loss": 1.2112, + "step": 101754 + }, + { + "epoch": 1.22, + "grad_norm": 17.62011154744526, + "learning_rate": 6.916016237191949e-06, + "loss": 1.4885, + "step": 101757 + }, + { + "epoch": 1.22, + "grad_norm": 63.65202393064678, + "learning_rate": 6.915460538834087e-06, + "loss": 1.2734, + "step": 101760 + }, + { + "epoch": 1.22, + "grad_norm": 12.031269292242303, + "learning_rate": 6.914904851002648e-06, + "loss": 1.4881, + "step": 101763 + }, + { + "epoch": 1.22, + "grad_norm": 3.9262384611289067, + "learning_rate": 6.91434917369953e-06, + "loss": 1.1227, + "step": 101766 + }, + { + "epoch": 1.22, + "grad_norm": 8.504266135770845, + "learning_rate": 6.9137935069266224e-06, + "loss": 1.0697, + "step": 101769 + }, + { + "epoch": 1.22, + "grad_norm": 13.241609138438518, + "learning_rate": 6.9132378506858265e-06, + "loss": 0.8701, + "step": 101772 + }, + { + "epoch": 1.22, + "grad_norm": 10.653999889887865, + "learning_rate": 6.91268220497904e-06, + "loss": 1.4528, + "step": 101775 + }, + { + "epoch": 1.22, + "grad_norm": 5.022073366265837, + "learning_rate": 6.912126569808155e-06, + "loss": 1.2625, + "step": 101778 + }, + { + "epoch": 1.22, + "grad_norm": 10.640111908761577, + "learning_rate": 6.911570945175073e-06, + "loss": 1.1162, + "step": 101781 + }, + { + "epoch": 1.22, + "grad_norm": 4.289431778026039, + "learning_rate": 6.911015331081684e-06, + "loss": 1.0651, + "step": 101784 + }, + { + "epoch": 1.22, + "grad_norm": 9.429127969668158, + "learning_rate": 6.910459727529888e-06, + "loss": 1.559, + "step": 101787 + }, + { + "epoch": 1.22, + "grad_norm": 2.7056587967982573, + "learning_rate": 6.909904134521579e-06, + "loss": 1.0768, + "step": 101790 + }, + { + "epoch": 1.22, + "grad_norm": 30.2978730545343, + "learning_rate": 6.909348552058658e-06, + "loss": 1.0357, + "step": 101793 + }, + { + "epoch": 1.22, + "grad_norm": 12.6193119228079, + "learning_rate": 6.908792980143016e-06, + "loss": 1.287, + "step": 101796 + }, + { + "epoch": 1.22, + "grad_norm": 20.927440905166996, + "learning_rate": 6.908237418776546e-06, + "loss": 1.2888, + "step": 101799 + }, + { + "epoch": 1.22, + "grad_norm": 8.344640662546912, + "learning_rate": 6.907681867961149e-06, + "loss": 1.2434, + "step": 101802 + }, + { + "epoch": 1.22, + "grad_norm": 5.684530484309863, + "learning_rate": 6.907126327698721e-06, + "loss": 1.2418, + "step": 101805 + }, + { + "epoch": 1.22, + "grad_norm": 14.674458406185373, + "learning_rate": 6.9065707979911614e-06, + "loss": 1.2116, + "step": 101808 + }, + { + "epoch": 1.22, + "grad_norm": 11.110891618463754, + "learning_rate": 6.906015278840354e-06, + "loss": 1.0286, + "step": 101811 + }, + { + "epoch": 1.22, + "grad_norm": 2.8597588125718154, + "learning_rate": 6.905459770248204e-06, + "loss": 1.3036, + "step": 101814 + }, + { + "epoch": 1.22, + "grad_norm": 4.706736704996497, + "learning_rate": 6.904904272216608e-06, + "loss": 1.4832, + "step": 101817 + }, + { + "epoch": 1.22, + "grad_norm": 17.368454094545648, + "learning_rate": 6.904348784747459e-06, + "loss": 1.4294, + "step": 101820 + }, + { + "epoch": 1.22, + "grad_norm": 21.536950347072892, + "learning_rate": 6.90379330784265e-06, + "loss": 1.0533, + "step": 101823 + }, + { + "epoch": 1.22, + "grad_norm": 10.74329042322749, + "learning_rate": 6.9032378415040775e-06, + "loss": 1.2682, + "step": 101826 + }, + { + "epoch": 1.22, + "grad_norm": 31.455980594157552, + "learning_rate": 6.902682385733643e-06, + "loss": 1.1617, + "step": 101829 + }, + { + "epoch": 1.22, + "grad_norm": 18.475540755006257, + "learning_rate": 6.902126940533234e-06, + "loss": 1.0641, + "step": 101832 + }, + { + "epoch": 1.22, + "grad_norm": 8.479665936247736, + "learning_rate": 6.901571505904755e-06, + "loss": 0.8996, + "step": 101835 + }, + { + "epoch": 1.22, + "grad_norm": 9.399852454966744, + "learning_rate": 6.901016081850094e-06, + "loss": 1.0095, + "step": 101838 + }, + { + "epoch": 1.22, + "grad_norm": 15.910531519597857, + "learning_rate": 6.900460668371147e-06, + "loss": 1.3843, + "step": 101841 + }, + { + "epoch": 1.22, + "grad_norm": 4.709744802334666, + "learning_rate": 6.899905265469811e-06, + "loss": 0.8488, + "step": 101844 + }, + { + "epoch": 1.22, + "grad_norm": 8.996915196982014, + "learning_rate": 6.899349873147987e-06, + "loss": 1.4407, + "step": 101847 + }, + { + "epoch": 1.22, + "grad_norm": 6.221266507129188, + "learning_rate": 6.898794491407561e-06, + "loss": 0.977, + "step": 101850 + }, + { + "epoch": 1.22, + "grad_norm": 6.385417394856782, + "learning_rate": 6.898239120250432e-06, + "loss": 1.2017, + "step": 101853 + }, + { + "epoch": 1.22, + "grad_norm": 25.07729801750652, + "learning_rate": 6.897683759678494e-06, + "loss": 0.9058, + "step": 101856 + }, + { + "epoch": 1.22, + "grad_norm": 4.122550963851537, + "learning_rate": 6.897128409693648e-06, + "loss": 1.1135, + "step": 101859 + }, + { + "epoch": 1.22, + "grad_norm": 11.266593519128591, + "learning_rate": 6.896573070297785e-06, + "loss": 0.8416, + "step": 101862 + }, + { + "epoch": 1.22, + "grad_norm": 10.748459405699641, + "learning_rate": 6.896017741492797e-06, + "loss": 0.8915, + "step": 101865 + }, + { + "epoch": 1.22, + "grad_norm": 15.420136208101335, + "learning_rate": 6.895462423280582e-06, + "loss": 1.2789, + "step": 101868 + }, + { + "epoch": 1.22, + "grad_norm": 4.658460239154021, + "learning_rate": 6.894907115663039e-06, + "loss": 1.4595, + "step": 101871 + }, + { + "epoch": 1.23, + "grad_norm": 10.830711297150861, + "learning_rate": 6.89435181864206e-06, + "loss": 1.1275, + "step": 101874 + }, + { + "epoch": 1.23, + "grad_norm": 20.81625096437959, + "learning_rate": 6.893796532219536e-06, + "loss": 0.8392, + "step": 101877 + }, + { + "epoch": 1.23, + "grad_norm": 4.63587737857731, + "learning_rate": 6.893241256397369e-06, + "loss": 1.0819, + "step": 101880 + }, + { + "epoch": 1.23, + "grad_norm": 30.268091338074505, + "learning_rate": 6.8926859911774456e-06, + "loss": 1.1791, + "step": 101883 + }, + { + "epoch": 1.23, + "grad_norm": 6.422418573606809, + "learning_rate": 6.8921307365616686e-06, + "loss": 1.0998, + "step": 101886 + }, + { + "epoch": 1.23, + "grad_norm": 8.91158609753383, + "learning_rate": 6.891575492551932e-06, + "loss": 1.4296, + "step": 101889 + }, + { + "epoch": 1.23, + "grad_norm": 22.73952113628641, + "learning_rate": 6.891020259150128e-06, + "loss": 1.0882, + "step": 101892 + }, + { + "epoch": 1.23, + "grad_norm": 2.796158304963767, + "learning_rate": 6.890465036358149e-06, + "loss": 1.0347, + "step": 101895 + }, + { + "epoch": 1.23, + "grad_norm": 21.079691414701806, + "learning_rate": 6.889909824177893e-06, + "loss": 1.1405, + "step": 101898 + }, + { + "epoch": 1.23, + "grad_norm": 7.00938852279086, + "learning_rate": 6.889354622611258e-06, + "loss": 1.1739, + "step": 101901 + }, + { + "epoch": 1.23, + "grad_norm": 15.136869743581228, + "learning_rate": 6.8887994316601345e-06, + "loss": 0.9667, + "step": 101904 + }, + { + "epoch": 1.23, + "grad_norm": 43.095139954663296, + "learning_rate": 6.888244251326414e-06, + "loss": 1.4061, + "step": 101907 + }, + { + "epoch": 1.23, + "grad_norm": 38.5045245346439, + "learning_rate": 6.887689081611996e-06, + "loss": 0.9935, + "step": 101910 + }, + { + "epoch": 1.23, + "grad_norm": 8.349874247877013, + "learning_rate": 6.8871339225187786e-06, + "loss": 1.3817, + "step": 101913 + }, + { + "epoch": 1.23, + "grad_norm": 6.897321872198681, + "learning_rate": 6.886578774048651e-06, + "loss": 1.1317, + "step": 101916 + }, + { + "epoch": 1.23, + "grad_norm": 7.85747882282682, + "learning_rate": 6.886023636203505e-06, + "loss": 1.1484, + "step": 101919 + }, + { + "epoch": 1.23, + "grad_norm": 6.46878213885603, + "learning_rate": 6.885468508985239e-06, + "loss": 1.3553, + "step": 101922 + }, + { + "epoch": 1.23, + "grad_norm": 3.049740796004158, + "learning_rate": 6.884913392395749e-06, + "loss": 1.2528, + "step": 101925 + }, + { + "epoch": 1.23, + "grad_norm": 8.430578039098807, + "learning_rate": 6.8843582864369294e-06, + "loss": 0.9224, + "step": 101928 + }, + { + "epoch": 1.23, + "grad_norm": 17.61431890885584, + "learning_rate": 6.883803191110669e-06, + "loss": 1.0362, + "step": 101931 + }, + { + "epoch": 1.23, + "grad_norm": 7.730345942392915, + "learning_rate": 6.883248106418868e-06, + "loss": 1.2222, + "step": 101934 + }, + { + "epoch": 1.23, + "grad_norm": 13.758076953350685, + "learning_rate": 6.882693032363416e-06, + "loss": 1.2627, + "step": 101937 + }, + { + "epoch": 1.23, + "grad_norm": 6.634669982258493, + "learning_rate": 6.88213796894621e-06, + "loss": 1.0153, + "step": 101940 + }, + { + "epoch": 1.23, + "grad_norm": 22.6334783611121, + "learning_rate": 6.88158291616915e-06, + "loss": 0.9399, + "step": 101943 + }, + { + "epoch": 1.23, + "grad_norm": 25.05643934683881, + "learning_rate": 6.8810278740341184e-06, + "loss": 1.3454, + "step": 101946 + }, + { + "epoch": 1.23, + "grad_norm": 3.8456359305975045, + "learning_rate": 6.880472842543016e-06, + "loss": 1.1488, + "step": 101949 + }, + { + "epoch": 1.23, + "grad_norm": 9.323292623906545, + "learning_rate": 6.879917821697735e-06, + "loss": 1.357, + "step": 101952 + }, + { + "epoch": 1.23, + "grad_norm": 10.588785049419949, + "learning_rate": 6.879362811500177e-06, + "loss": 0.9738, + "step": 101955 + }, + { + "epoch": 1.23, + "grad_norm": 8.149143622747433, + "learning_rate": 6.878807811952224e-06, + "loss": 1.2309, + "step": 101958 + }, + { + "epoch": 1.23, + "grad_norm": 4.479224334319711, + "learning_rate": 6.878252823055776e-06, + "loss": 1.4104, + "step": 101961 + }, + { + "epoch": 1.23, + "grad_norm": 14.511917568903732, + "learning_rate": 6.877697844812725e-06, + "loss": 1.2527, + "step": 101964 + }, + { + "epoch": 1.23, + "grad_norm": 3.5951463607274974, + "learning_rate": 6.8771428772249704e-06, + "loss": 1.0338, + "step": 101967 + }, + { + "epoch": 1.23, + "grad_norm": 12.433296962493944, + "learning_rate": 6.876587920294404e-06, + "loss": 0.7264, + "step": 101970 + }, + { + "epoch": 1.23, + "grad_norm": 4.666718883836916, + "learning_rate": 6.876032974022913e-06, + "loss": 1.0777, + "step": 101973 + }, + { + "epoch": 1.23, + "grad_norm": 8.482704770193381, + "learning_rate": 6.8754780384124e-06, + "loss": 1.413, + "step": 101976 + }, + { + "epoch": 1.23, + "grad_norm": 16.96614273943061, + "learning_rate": 6.874923113464751e-06, + "loss": 1.2407, + "step": 101979 + }, + { + "epoch": 1.23, + "grad_norm": 2.405420848218955, + "learning_rate": 6.874368199181869e-06, + "loss": 1.0242, + "step": 101982 + }, + { + "epoch": 1.23, + "grad_norm": 11.351144621007771, + "learning_rate": 6.873813295565638e-06, + "loss": 0.9994, + "step": 101985 + }, + { + "epoch": 1.23, + "grad_norm": 13.03311910271902, + "learning_rate": 6.873258402617958e-06, + "loss": 1.4117, + "step": 101988 + }, + { + "epoch": 1.23, + "grad_norm": 5.35596583788947, + "learning_rate": 6.8727035203407185e-06, + "loss": 1.3785, + "step": 101991 + }, + { + "epoch": 1.23, + "grad_norm": 4.908042007261615, + "learning_rate": 6.872148648735817e-06, + "loss": 1.0457, + "step": 101994 + }, + { + "epoch": 1.23, + "grad_norm": 12.250513326780696, + "learning_rate": 6.871593787805149e-06, + "loss": 0.8495, + "step": 101997 + }, + { + "epoch": 1.23, + "grad_norm": 13.495175268334366, + "learning_rate": 6.871038937550601e-06, + "loss": 1.1952, + "step": 102000 + }, + { + "epoch": 1.23, + "grad_norm": 19.13709390876305, + "learning_rate": 6.870484097974069e-06, + "loss": 1.2033, + "step": 102003 + }, + { + "epoch": 1.23, + "grad_norm": 25.872342026080457, + "learning_rate": 6.869929269077448e-06, + "loss": 1.0931, + "step": 102006 + }, + { + "epoch": 1.23, + "grad_norm": 11.900863239920062, + "learning_rate": 6.8693744508626346e-06, + "loss": 1.0724, + "step": 102009 + }, + { + "epoch": 1.23, + "grad_norm": 5.79918871370188, + "learning_rate": 6.868819643331517e-06, + "loss": 1.2219, + "step": 102012 + }, + { + "epoch": 1.23, + "grad_norm": 13.885586167916587, + "learning_rate": 6.868264846485988e-06, + "loss": 1.1256, + "step": 102015 + }, + { + "epoch": 1.23, + "grad_norm": 5.558147363024365, + "learning_rate": 6.867710060327943e-06, + "loss": 1.1389, + "step": 102018 + }, + { + "epoch": 1.23, + "grad_norm": 17.36776240168554, + "learning_rate": 6.867155284859276e-06, + "loss": 1.3195, + "step": 102021 + }, + { + "epoch": 1.23, + "grad_norm": 7.289386566367442, + "learning_rate": 6.866600520081883e-06, + "loss": 1.2916, + "step": 102024 + }, + { + "epoch": 1.23, + "grad_norm": 8.56676981199415, + "learning_rate": 6.866045765997649e-06, + "loss": 0.9105, + "step": 102027 + }, + { + "epoch": 1.23, + "grad_norm": 6.769274274660411, + "learning_rate": 6.865491022608475e-06, + "loss": 1.0295, + "step": 102030 + }, + { + "epoch": 1.23, + "grad_norm": 6.007752948322164, + "learning_rate": 6.864936289916249e-06, + "loss": 1.0187, + "step": 102033 + }, + { + "epoch": 1.23, + "grad_norm": 14.003399359414264, + "learning_rate": 6.864381567922869e-06, + "loss": 1.3918, + "step": 102036 + }, + { + "epoch": 1.23, + "grad_norm": 4.176346695223808, + "learning_rate": 6.8638268566302216e-06, + "loss": 1.0186, + "step": 102039 + }, + { + "epoch": 1.23, + "grad_norm": 5.164405805708493, + "learning_rate": 6.863272156040206e-06, + "loss": 1.0015, + "step": 102042 + }, + { + "epoch": 1.23, + "grad_norm": 8.353564321468815, + "learning_rate": 6.8627174661547106e-06, + "loss": 1.2922, + "step": 102045 + }, + { + "epoch": 1.23, + "grad_norm": 9.253739186020681, + "learning_rate": 6.86216278697563e-06, + "loss": 1.3217, + "step": 102048 + }, + { + "epoch": 1.23, + "grad_norm": 9.036467263382939, + "learning_rate": 6.861608118504861e-06, + "loss": 1.3792, + "step": 102051 + }, + { + "epoch": 1.23, + "grad_norm": 7.878963527466611, + "learning_rate": 6.861053460744293e-06, + "loss": 1.2655, + "step": 102054 + }, + { + "epoch": 1.23, + "grad_norm": 5.933773252746012, + "learning_rate": 6.860498813695815e-06, + "loss": 1.3086, + "step": 102057 + }, + { + "epoch": 1.23, + "grad_norm": 8.845837419503345, + "learning_rate": 6.859944177361327e-06, + "loss": 1.3314, + "step": 102060 + }, + { + "epoch": 1.23, + "grad_norm": 12.841965457493425, + "learning_rate": 6.859389551742719e-06, + "loss": 1.2496, + "step": 102063 + }, + { + "epoch": 1.23, + "grad_norm": 3.4542285019665293, + "learning_rate": 6.858834936841881e-06, + "loss": 0.8807, + "step": 102066 + }, + { + "epoch": 1.23, + "grad_norm": 8.4201869584072, + "learning_rate": 6.858280332660707e-06, + "loss": 1.0326, + "step": 102069 + }, + { + "epoch": 1.23, + "grad_norm": 8.10868633921704, + "learning_rate": 6.857725739201093e-06, + "loss": 1.6708, + "step": 102072 + }, + { + "epoch": 1.23, + "grad_norm": 9.287462451069798, + "learning_rate": 6.857171156464928e-06, + "loss": 1.4242, + "step": 102075 + }, + { + "epoch": 1.23, + "grad_norm": 28.254264626151468, + "learning_rate": 6.856616584454108e-06, + "loss": 1.1852, + "step": 102078 + }, + { + "epoch": 1.23, + "grad_norm": 2.8365462234991146, + "learning_rate": 6.85606202317052e-06, + "loss": 1.1103, + "step": 102081 + }, + { + "epoch": 1.23, + "grad_norm": 10.375021158461742, + "learning_rate": 6.855507472616062e-06, + "loss": 1.1988, + "step": 102084 + }, + { + "epoch": 1.23, + "grad_norm": 25.70791449402535, + "learning_rate": 6.8549529327926225e-06, + "loss": 1.3674, + "step": 102087 + }, + { + "epoch": 1.23, + "grad_norm": 12.620395794669626, + "learning_rate": 6.854398403702101e-06, + "loss": 1.5727, + "step": 102090 + }, + { + "epoch": 1.23, + "grad_norm": 20.107839938900828, + "learning_rate": 6.853843885346378e-06, + "loss": 1.0486, + "step": 102093 + }, + { + "epoch": 1.23, + "grad_norm": 56.569410723072274, + "learning_rate": 6.853289377727356e-06, + "loss": 1.6649, + "step": 102096 + }, + { + "epoch": 1.23, + "grad_norm": 13.848737239958657, + "learning_rate": 6.852734880846922e-06, + "loss": 1.4674, + "step": 102099 + }, + { + "epoch": 1.23, + "grad_norm": 10.28978055905428, + "learning_rate": 6.85218039470697e-06, + "loss": 1.6037, + "step": 102102 + }, + { + "epoch": 1.23, + "grad_norm": 8.079591949257246, + "learning_rate": 6.851625919309397e-06, + "loss": 1.4082, + "step": 102105 + }, + { + "epoch": 1.23, + "grad_norm": 24.02604223088346, + "learning_rate": 6.8510714546560875e-06, + "loss": 1.1954, + "step": 102108 + }, + { + "epoch": 1.23, + "grad_norm": 5.360673640632434, + "learning_rate": 6.850517000748935e-06, + "loss": 1.1928, + "step": 102111 + }, + { + "epoch": 1.23, + "grad_norm": 3.2206874073005594, + "learning_rate": 6.849962557589837e-06, + "loss": 1.2886, + "step": 102114 + }, + { + "epoch": 1.23, + "grad_norm": 24.984155997625955, + "learning_rate": 6.849408125180683e-06, + "loss": 1.0487, + "step": 102117 + }, + { + "epoch": 1.23, + "grad_norm": 19.923729333009412, + "learning_rate": 6.848853703523361e-06, + "loss": 1.0416, + "step": 102120 + }, + { + "epoch": 1.23, + "grad_norm": 18.481934722987194, + "learning_rate": 6.848299292619765e-06, + "loss": 1.5141, + "step": 102123 + }, + { + "epoch": 1.23, + "grad_norm": 20.505731848045972, + "learning_rate": 6.847744892471792e-06, + "loss": 0.9617, + "step": 102126 + }, + { + "epoch": 1.23, + "grad_norm": 11.645014770060119, + "learning_rate": 6.847190503081326e-06, + "loss": 1.2396, + "step": 102129 + }, + { + "epoch": 1.23, + "grad_norm": 15.655692965427031, + "learning_rate": 6.84663612445027e-06, + "loss": 1.4424, + "step": 102132 + }, + { + "epoch": 1.23, + "grad_norm": 3.4282833870729017, + "learning_rate": 6.846081756580503e-06, + "loss": 1.5051, + "step": 102135 + }, + { + "epoch": 1.23, + "grad_norm": 11.914779225298282, + "learning_rate": 6.845527399473926e-06, + "loss": 1.0699, + "step": 102138 + }, + { + "epoch": 1.23, + "grad_norm": 10.16938805032738, + "learning_rate": 6.844973053132424e-06, + "loss": 0.8828, + "step": 102141 + }, + { + "epoch": 1.23, + "grad_norm": 6.395887030037406, + "learning_rate": 6.844418717557899e-06, + "loss": 1.0351, + "step": 102144 + }, + { + "epoch": 1.23, + "grad_norm": 9.660614355559046, + "learning_rate": 6.843864392752231e-06, + "loss": 1.1903, + "step": 102147 + }, + { + "epoch": 1.23, + "grad_norm": 28.583542182171158, + "learning_rate": 6.843310078717318e-06, + "loss": 1.0557, + "step": 102150 + }, + { + "epoch": 1.23, + "grad_norm": 12.623989266315084, + "learning_rate": 6.84275577545505e-06, + "loss": 0.9961, + "step": 102153 + }, + { + "epoch": 1.23, + "grad_norm": 12.91275390286833, + "learning_rate": 6.842201482967321e-06, + "loss": 1.2279, + "step": 102156 + }, + { + "epoch": 1.23, + "grad_norm": 65.63221153822023, + "learning_rate": 6.841647201256021e-06, + "loss": 1.3976, + "step": 102159 + }, + { + "epoch": 1.23, + "grad_norm": 11.533393882693165, + "learning_rate": 6.8410929303230414e-06, + "loss": 1.5284, + "step": 102162 + }, + { + "epoch": 1.23, + "grad_norm": 12.24628874491013, + "learning_rate": 6.840538670170271e-06, + "loss": 1.3998, + "step": 102165 + }, + { + "epoch": 1.23, + "grad_norm": 7.8687605541633845, + "learning_rate": 6.839984420799608e-06, + "loss": 1.1696, + "step": 102168 + }, + { + "epoch": 1.23, + "grad_norm": 10.364692071070774, + "learning_rate": 6.83943018221294e-06, + "loss": 1.1334, + "step": 102171 + }, + { + "epoch": 1.23, + "grad_norm": 4.317090714392425, + "learning_rate": 6.838875954412157e-06, + "loss": 1.0594, + "step": 102174 + }, + { + "epoch": 1.23, + "grad_norm": 54.25431589029237, + "learning_rate": 6.83832173739915e-06, + "loss": 0.9444, + "step": 102177 + }, + { + "epoch": 1.23, + "grad_norm": 13.366289034491853, + "learning_rate": 6.837767531175813e-06, + "loss": 1.2863, + "step": 102180 + }, + { + "epoch": 1.23, + "grad_norm": 8.029268415740507, + "learning_rate": 6.837213335744035e-06, + "loss": 1.0686, + "step": 102183 + }, + { + "epoch": 1.23, + "grad_norm": 6.564635005706666, + "learning_rate": 6.836659151105714e-06, + "loss": 1.1999, + "step": 102186 + }, + { + "epoch": 1.23, + "grad_norm": 13.272055473117932, + "learning_rate": 6.8361049772627295e-06, + "loss": 1.0294, + "step": 102189 + }, + { + "epoch": 1.23, + "grad_norm": 9.886496206973465, + "learning_rate": 6.8355508142169835e-06, + "loss": 1.6176, + "step": 102192 + }, + { + "epoch": 1.23, + "grad_norm": 7.8555699448000995, + "learning_rate": 6.834996661970359e-06, + "loss": 1.3177, + "step": 102195 + }, + { + "epoch": 1.23, + "grad_norm": 12.537896096435063, + "learning_rate": 6.834442520524756e-06, + "loss": 1.0953, + "step": 102198 + }, + { + "epoch": 1.23, + "grad_norm": 11.077454361327229, + "learning_rate": 6.833888389882056e-06, + "loss": 1.1376, + "step": 102201 + }, + { + "epoch": 1.23, + "grad_norm": 12.654842613152386, + "learning_rate": 6.833334270044156e-06, + "loss": 1.0968, + "step": 102204 + }, + { + "epoch": 1.23, + "grad_norm": 3.870305638484363, + "learning_rate": 6.832780161012944e-06, + "loss": 0.8836, + "step": 102207 + }, + { + "epoch": 1.23, + "grad_norm": 12.524590161573403, + "learning_rate": 6.832226062790315e-06, + "loss": 1.0285, + "step": 102210 + }, + { + "epoch": 1.23, + "grad_norm": 10.100314837950155, + "learning_rate": 6.83167197537816e-06, + "loss": 1.2375, + "step": 102213 + }, + { + "epoch": 1.23, + "grad_norm": 8.41763593941541, + "learning_rate": 6.8311178987783615e-06, + "loss": 1.146, + "step": 102216 + }, + { + "epoch": 1.23, + "grad_norm": 9.816439156364229, + "learning_rate": 6.830563832992817e-06, + "loss": 0.8841, + "step": 102219 + }, + { + "epoch": 1.23, + "grad_norm": 11.824883480165036, + "learning_rate": 6.830009778023418e-06, + "loss": 0.9426, + "step": 102222 + }, + { + "epoch": 1.23, + "grad_norm": 4.499436321297428, + "learning_rate": 6.829455733872057e-06, + "loss": 1.0804, + "step": 102225 + }, + { + "epoch": 1.23, + "grad_norm": 10.38222146258777, + "learning_rate": 6.828901700540617e-06, + "loss": 1.2522, + "step": 102228 + }, + { + "epoch": 1.23, + "grad_norm": 13.04516088922658, + "learning_rate": 6.828347678030993e-06, + "loss": 0.9042, + "step": 102231 + }, + { + "epoch": 1.23, + "grad_norm": 7.8734345729401, + "learning_rate": 6.827793666345078e-06, + "loss": 1.0993, + "step": 102234 + }, + { + "epoch": 1.23, + "grad_norm": 4.093209839416008, + "learning_rate": 6.8272396654847585e-06, + "loss": 0.8967, + "step": 102237 + }, + { + "epoch": 1.23, + "grad_norm": 3.4495895626264006, + "learning_rate": 6.826685675451932e-06, + "loss": 1.3424, + "step": 102240 + }, + { + "epoch": 1.23, + "grad_norm": 6.729555923359456, + "learning_rate": 6.826131696248479e-06, + "loss": 1.3659, + "step": 102243 + }, + { + "epoch": 1.23, + "grad_norm": 9.205391731253853, + "learning_rate": 6.825577727876298e-06, + "loss": 1.4526, + "step": 102246 + }, + { + "epoch": 1.23, + "grad_norm": 8.973368350322431, + "learning_rate": 6.8250237703372745e-06, + "loss": 0.9702, + "step": 102249 + }, + { + "epoch": 1.23, + "grad_norm": 25.797785747682003, + "learning_rate": 6.824469823633306e-06, + "loss": 1.4316, + "step": 102252 + }, + { + "epoch": 1.23, + "grad_norm": 15.065014587096917, + "learning_rate": 6.823915887766275e-06, + "loss": 0.95, + "step": 102255 + }, + { + "epoch": 1.23, + "grad_norm": 14.337128938733871, + "learning_rate": 6.8233619627380745e-06, + "loss": 1.2463, + "step": 102258 + }, + { + "epoch": 1.23, + "grad_norm": 8.780686602357267, + "learning_rate": 6.822808048550594e-06, + "loss": 1.1089, + "step": 102261 + }, + { + "epoch": 1.23, + "grad_norm": 4.027100052787361, + "learning_rate": 6.822254145205728e-06, + "loss": 1.0687, + "step": 102264 + }, + { + "epoch": 1.23, + "grad_norm": 12.246914885465735, + "learning_rate": 6.821700252705366e-06, + "loss": 0.9961, + "step": 102267 + }, + { + "epoch": 1.23, + "grad_norm": 4.241670654344556, + "learning_rate": 6.8211463710513924e-06, + "loss": 1.3816, + "step": 102270 + }, + { + "epoch": 1.23, + "grad_norm": 43.03517358439387, + "learning_rate": 6.8205925002456996e-06, + "loss": 1.1, + "step": 102273 + }, + { + "epoch": 1.23, + "grad_norm": 3.4933274894890634, + "learning_rate": 6.820038640290183e-06, + "loss": 1.3236, + "step": 102276 + }, + { + "epoch": 1.23, + "grad_norm": 9.376669279237143, + "learning_rate": 6.819484791186731e-06, + "loss": 1.7111, + "step": 102279 + }, + { + "epoch": 1.23, + "grad_norm": 2.972374226284696, + "learning_rate": 6.818930952937227e-06, + "loss": 1.306, + "step": 102282 + }, + { + "epoch": 1.23, + "grad_norm": 8.220461715705047, + "learning_rate": 6.818377125543567e-06, + "loss": 1.4156, + "step": 102285 + }, + { + "epoch": 1.23, + "grad_norm": 10.454291501624164, + "learning_rate": 6.817823309007641e-06, + "loss": 0.9683, + "step": 102288 + }, + { + "epoch": 1.23, + "grad_norm": 4.350362241110337, + "learning_rate": 6.817269503331336e-06, + "loss": 1.2331, + "step": 102291 + }, + { + "epoch": 1.23, + "grad_norm": 4.182095519176329, + "learning_rate": 6.816715708516548e-06, + "loss": 1.0951, + "step": 102294 + }, + { + "epoch": 1.23, + "grad_norm": 14.114076548250846, + "learning_rate": 6.816161924565161e-06, + "loss": 1.2817, + "step": 102297 + }, + { + "epoch": 1.23, + "grad_norm": 7.805819399130199, + "learning_rate": 6.815608151479063e-06, + "loss": 1.6191, + "step": 102300 + }, + { + "epoch": 1.23, + "grad_norm": 9.927979451157148, + "learning_rate": 6.815054389260148e-06, + "loss": 1.1794, + "step": 102303 + }, + { + "epoch": 1.23, + "grad_norm": 20.178836802171634, + "learning_rate": 6.814500637910311e-06, + "loss": 1.3176, + "step": 102306 + }, + { + "epoch": 1.23, + "grad_norm": 5.758439965553689, + "learning_rate": 6.8139468974314314e-06, + "loss": 1.048, + "step": 102309 + }, + { + "epoch": 1.23, + "grad_norm": 14.429090741310828, + "learning_rate": 6.8133931678254024e-06, + "loss": 1.0308, + "step": 102312 + }, + { + "epoch": 1.23, + "grad_norm": 13.386485109704456, + "learning_rate": 6.812839449094115e-06, + "loss": 1.3275, + "step": 102315 + }, + { + "epoch": 1.23, + "grad_norm": 12.950710307475354, + "learning_rate": 6.81228574123946e-06, + "loss": 1.364, + "step": 102318 + }, + { + "epoch": 1.23, + "grad_norm": 5.58657988939078, + "learning_rate": 6.811732044263328e-06, + "loss": 1.276, + "step": 102321 + }, + { + "epoch": 1.23, + "grad_norm": 11.735466568633068, + "learning_rate": 6.811178358167603e-06, + "loss": 1.2737, + "step": 102324 + }, + { + "epoch": 1.23, + "grad_norm": 7.863974039159601, + "learning_rate": 6.810624682954177e-06, + "loss": 1.3838, + "step": 102327 + }, + { + "epoch": 1.23, + "grad_norm": 5.829253858551501, + "learning_rate": 6.810071018624941e-06, + "loss": 1.6209, + "step": 102330 + }, + { + "epoch": 1.23, + "grad_norm": 12.777317341937229, + "learning_rate": 6.809517365181786e-06, + "loss": 1.5154, + "step": 102333 + }, + { + "epoch": 1.23, + "grad_norm": 8.189895496575403, + "learning_rate": 6.808963722626596e-06, + "loss": 1.0697, + "step": 102336 + }, + { + "epoch": 1.23, + "grad_norm": 15.345141836436564, + "learning_rate": 6.808410090961263e-06, + "loss": 1.1063, + "step": 102339 + }, + { + "epoch": 1.23, + "grad_norm": 8.139269349897901, + "learning_rate": 6.807856470187678e-06, + "loss": 1.345, + "step": 102342 + }, + { + "epoch": 1.23, + "grad_norm": 5.202745572669587, + "learning_rate": 6.807302860307728e-06, + "loss": 1.1675, + "step": 102345 + }, + { + "epoch": 1.23, + "grad_norm": 13.299434439658082, + "learning_rate": 6.806749261323307e-06, + "loss": 1.4029, + "step": 102348 + }, + { + "epoch": 1.23, + "grad_norm": 16.575894354638148, + "learning_rate": 6.8061956732362985e-06, + "loss": 1.0581, + "step": 102351 + }, + { + "epoch": 1.23, + "grad_norm": 7.102793731565877, + "learning_rate": 6.8056420960485925e-06, + "loss": 1.1269, + "step": 102354 + }, + { + "epoch": 1.23, + "grad_norm": 28.68604509238215, + "learning_rate": 6.805088529762079e-06, + "loss": 1.0621, + "step": 102357 + }, + { + "epoch": 1.23, + "grad_norm": 8.617826774927945, + "learning_rate": 6.804534974378651e-06, + "loss": 0.962, + "step": 102360 + }, + { + "epoch": 1.23, + "grad_norm": 3.2020718398657873, + "learning_rate": 6.803981429900193e-06, + "loss": 1.1055, + "step": 102363 + }, + { + "epoch": 1.23, + "grad_norm": 13.289908030486515, + "learning_rate": 6.8034278963285915e-06, + "loss": 1.1829, + "step": 102366 + }, + { + "epoch": 1.23, + "grad_norm": 6.7694572339669845, + "learning_rate": 6.80287437366574e-06, + "loss": 1.3945, + "step": 102369 + }, + { + "epoch": 1.23, + "grad_norm": 9.701019388044449, + "learning_rate": 6.8023208619135295e-06, + "loss": 1.3892, + "step": 102372 + }, + { + "epoch": 1.23, + "grad_norm": 4.374816981821841, + "learning_rate": 6.801767361073848e-06, + "loss": 1.093, + "step": 102375 + }, + { + "epoch": 1.23, + "grad_norm": 6.241904599213644, + "learning_rate": 6.8012138711485774e-06, + "loss": 1.7781, + "step": 102378 + }, + { + "epoch": 1.23, + "grad_norm": 9.270648386447405, + "learning_rate": 6.800660392139613e-06, + "loss": 1.3649, + "step": 102381 + }, + { + "epoch": 1.23, + "grad_norm": 15.777456465190918, + "learning_rate": 6.800106924048844e-06, + "loss": 1.2854, + "step": 102384 + }, + { + "epoch": 1.23, + "grad_norm": 7.454151136005063, + "learning_rate": 6.7995534668781585e-06, + "loss": 1.3292, + "step": 102387 + }, + { + "epoch": 1.23, + "grad_norm": 5.981883550180213, + "learning_rate": 6.7990000206294405e-06, + "loss": 1.262, + "step": 102390 + }, + { + "epoch": 1.23, + "grad_norm": 6.874881160255529, + "learning_rate": 6.7984465853045845e-06, + "loss": 1.3321, + "step": 102393 + }, + { + "epoch": 1.23, + "grad_norm": 28.98134178049899, + "learning_rate": 6.797893160905475e-06, + "loss": 0.9782, + "step": 102396 + }, + { + "epoch": 1.23, + "grad_norm": 24.447991373765543, + "learning_rate": 6.797339747434004e-06, + "loss": 1.1822, + "step": 102399 + }, + { + "epoch": 1.23, + "grad_norm": 11.937272229795479, + "learning_rate": 6.7967863448920615e-06, + "loss": 1.1254, + "step": 102402 + }, + { + "epoch": 1.23, + "grad_norm": 8.25626021859625, + "learning_rate": 6.7962329532815316e-06, + "loss": 1.2613, + "step": 102405 + }, + { + "epoch": 1.23, + "grad_norm": 24.51023037291509, + "learning_rate": 6.795679572604304e-06, + "loss": 0.8371, + "step": 102408 + }, + { + "epoch": 1.23, + "grad_norm": 11.860243364730563, + "learning_rate": 6.7951262028622655e-06, + "loss": 1.4661, + "step": 102411 + }, + { + "epoch": 1.23, + "grad_norm": 5.90069366641943, + "learning_rate": 6.794572844057312e-06, + "loss": 1.3688, + "step": 102414 + }, + { + "epoch": 1.23, + "grad_norm": 13.844276148161766, + "learning_rate": 6.794019496191326e-06, + "loss": 1.3665, + "step": 102417 + }, + { + "epoch": 1.23, + "grad_norm": 16.16462469843771, + "learning_rate": 6.793466159266193e-06, + "loss": 1.1435, + "step": 102420 + }, + { + "epoch": 1.23, + "grad_norm": 12.235499839502758, + "learning_rate": 6.792912833283806e-06, + "loss": 1.2644, + "step": 102423 + }, + { + "epoch": 1.23, + "grad_norm": 7.024355318580928, + "learning_rate": 6.792359518246053e-06, + "loss": 1.2902, + "step": 102426 + }, + { + "epoch": 1.23, + "grad_norm": 27.56156447750141, + "learning_rate": 6.7918062141548256e-06, + "loss": 1.2508, + "step": 102429 + }, + { + "epoch": 1.23, + "grad_norm": 5.228964308102961, + "learning_rate": 6.791252921012003e-06, + "loss": 1.4625, + "step": 102432 + }, + { + "epoch": 1.23, + "grad_norm": 9.351790509135046, + "learning_rate": 6.7906996388194805e-06, + "loss": 1.0455, + "step": 102435 + }, + { + "epoch": 1.23, + "grad_norm": 21.736531308969827, + "learning_rate": 6.790146367579142e-06, + "loss": 1.202, + "step": 102438 + }, + { + "epoch": 1.23, + "grad_norm": 4.60294946589995, + "learning_rate": 6.78959310729288e-06, + "loss": 1.0896, + "step": 102441 + }, + { + "epoch": 1.23, + "grad_norm": 4.918595077475108, + "learning_rate": 6.789039857962578e-06, + "loss": 0.9367, + "step": 102444 + }, + { + "epoch": 1.23, + "grad_norm": 6.89738875041752, + "learning_rate": 6.788486619590129e-06, + "loss": 1.2721, + "step": 102447 + }, + { + "epoch": 1.23, + "grad_norm": 10.711855031307769, + "learning_rate": 6.787933392177416e-06, + "loss": 1.282, + "step": 102450 + }, + { + "epoch": 1.23, + "grad_norm": 11.302762600460575, + "learning_rate": 6.787380175726329e-06, + "loss": 1.7292, + "step": 102453 + }, + { + "epoch": 1.23, + "grad_norm": 12.872886143631115, + "learning_rate": 6.78682697023876e-06, + "loss": 1.195, + "step": 102456 + }, + { + "epoch": 1.23, + "grad_norm": 7.9723340120054065, + "learning_rate": 6.7862737757165916e-06, + "loss": 0.9968, + "step": 102459 + }, + { + "epoch": 1.23, + "grad_norm": 4.221958084085894, + "learning_rate": 6.7857205921617116e-06, + "loss": 1.1087, + "step": 102462 + }, + { + "epoch": 1.23, + "grad_norm": 10.387688926667746, + "learning_rate": 6.785167419576009e-06, + "loss": 1.5167, + "step": 102465 + }, + { + "epoch": 1.23, + "grad_norm": 5.022436424059397, + "learning_rate": 6.784614257961377e-06, + "loss": 1.1009, + "step": 102468 + }, + { + "epoch": 1.23, + "grad_norm": 23.10173165760141, + "learning_rate": 6.784061107319695e-06, + "loss": 1.1152, + "step": 102471 + }, + { + "epoch": 1.23, + "grad_norm": 31.511761102939907, + "learning_rate": 6.7835079676528525e-06, + "loss": 0.9717, + "step": 102474 + }, + { + "epoch": 1.23, + "grad_norm": 5.621627153219825, + "learning_rate": 6.782954838962741e-06, + "loss": 0.8248, + "step": 102477 + }, + { + "epoch": 1.23, + "grad_norm": 12.843292716358533, + "learning_rate": 6.782401721251244e-06, + "loss": 1.0905, + "step": 102480 + }, + { + "epoch": 1.23, + "grad_norm": 7.466858633851014, + "learning_rate": 6.781848614520255e-06, + "loss": 1.2506, + "step": 102483 + }, + { + "epoch": 1.23, + "grad_norm": 3.772672257458304, + "learning_rate": 6.781295518771651e-06, + "loss": 1.1931, + "step": 102486 + }, + { + "epoch": 1.23, + "grad_norm": 7.928780663130869, + "learning_rate": 6.78074243400733e-06, + "loss": 0.8958, + "step": 102489 + }, + { + "epoch": 1.23, + "grad_norm": 7.384954475801369, + "learning_rate": 6.780189360229174e-06, + "loss": 1.0697, + "step": 102492 + }, + { + "epoch": 1.23, + "grad_norm": 36.14781764251171, + "learning_rate": 6.779636297439076e-06, + "loss": 1.5394, + "step": 102495 + }, + { + "epoch": 1.23, + "grad_norm": 3.213713315918769, + "learning_rate": 6.779083245638915e-06, + "loss": 1.0598, + "step": 102498 + }, + { + "epoch": 1.23, + "grad_norm": 11.47977327483631, + "learning_rate": 6.778530204830585e-06, + "loss": 1.0289, + "step": 102501 + }, + { + "epoch": 1.23, + "grad_norm": 3.312333691489754, + "learning_rate": 6.777977175015968e-06, + "loss": 1.2937, + "step": 102504 + }, + { + "epoch": 1.23, + "grad_norm": 9.485539652803235, + "learning_rate": 6.777424156196954e-06, + "loss": 1.1341, + "step": 102507 + }, + { + "epoch": 1.23, + "grad_norm": 9.042575080631579, + "learning_rate": 6.776871148375437e-06, + "loss": 1.0936, + "step": 102510 + }, + { + "epoch": 1.23, + "grad_norm": 12.096927604872063, + "learning_rate": 6.776318151553294e-06, + "loss": 0.7808, + "step": 102513 + }, + { + "epoch": 1.23, + "grad_norm": 26.857896807524508, + "learning_rate": 6.775765165732414e-06, + "loss": 1.1616, + "step": 102516 + }, + { + "epoch": 1.23, + "grad_norm": 11.937655363454603, + "learning_rate": 6.775212190914686e-06, + "loss": 1.2197, + "step": 102519 + }, + { + "epoch": 1.23, + "grad_norm": 3.7542161294268612, + "learning_rate": 6.774659227102003e-06, + "loss": 1.0685, + "step": 102522 + }, + { + "epoch": 1.23, + "grad_norm": 12.07965126727538, + "learning_rate": 6.774106274296243e-06, + "loss": 1.0192, + "step": 102525 + }, + { + "epoch": 1.23, + "grad_norm": 3.260912094559793, + "learning_rate": 6.773553332499294e-06, + "loss": 1.0942, + "step": 102528 + }, + { + "epoch": 1.23, + "grad_norm": 8.908618265333645, + "learning_rate": 6.773000401713048e-06, + "loss": 1.2878, + "step": 102531 + }, + { + "epoch": 1.23, + "grad_norm": 4.660378026918454, + "learning_rate": 6.772447481939386e-06, + "loss": 1.1953, + "step": 102534 + }, + { + "epoch": 1.23, + "grad_norm": 13.066760928759898, + "learning_rate": 6.771894573180203e-06, + "loss": 0.7595, + "step": 102537 + }, + { + "epoch": 1.23, + "grad_norm": 3.9136278445992256, + "learning_rate": 6.771341675437378e-06, + "loss": 1.2374, + "step": 102540 + }, + { + "epoch": 1.23, + "grad_norm": 15.664366761658659, + "learning_rate": 6.770788788712801e-06, + "loss": 1.205, + "step": 102543 + }, + { + "epoch": 1.23, + "grad_norm": 3.734134440679381, + "learning_rate": 6.770235913008359e-06, + "loss": 1.2402, + "step": 102546 + }, + { + "epoch": 1.23, + "grad_norm": 7.054156027336238, + "learning_rate": 6.769683048325941e-06, + "loss": 1.2278, + "step": 102549 + }, + { + "epoch": 1.23, + "grad_norm": 8.776998430876828, + "learning_rate": 6.769130194667427e-06, + "loss": 1.231, + "step": 102552 + }, + { + "epoch": 1.23, + "grad_norm": 4.403636967640633, + "learning_rate": 6.768577352034711e-06, + "loss": 1.0694, + "step": 102555 + }, + { + "epoch": 1.23, + "grad_norm": 14.553246691784219, + "learning_rate": 6.768024520429674e-06, + "loss": 1.1526, + "step": 102558 + }, + { + "epoch": 1.23, + "grad_norm": 9.6949953418946, + "learning_rate": 6.767471699854205e-06, + "loss": 0.715, + "step": 102561 + }, + { + "epoch": 1.23, + "grad_norm": 4.469359798850819, + "learning_rate": 6.7669188903101946e-06, + "loss": 1.1712, + "step": 102564 + }, + { + "epoch": 1.23, + "grad_norm": 8.493960501799544, + "learning_rate": 6.766366091799523e-06, + "loss": 1.0717, + "step": 102567 + }, + { + "epoch": 1.23, + "grad_norm": 4.035520261277247, + "learning_rate": 6.765813304324078e-06, + "loss": 1.2179, + "step": 102570 + }, + { + "epoch": 1.23, + "grad_norm": 5.417073186441987, + "learning_rate": 6.765260527885748e-06, + "loss": 1.0486, + "step": 102573 + }, + { + "epoch": 1.23, + "grad_norm": 6.124524785599086, + "learning_rate": 6.764707762486421e-06, + "loss": 0.8831, + "step": 102576 + }, + { + "epoch": 1.23, + "grad_norm": 8.015887181364272, + "learning_rate": 6.76415500812798e-06, + "loss": 1.3937, + "step": 102579 + }, + { + "epoch": 1.23, + "grad_norm": 7.441972168785116, + "learning_rate": 6.76360226481231e-06, + "loss": 1.2723, + "step": 102582 + }, + { + "epoch": 1.23, + "grad_norm": 8.889905689356546, + "learning_rate": 6.7630495325413015e-06, + "loss": 1.2299, + "step": 102585 + }, + { + "epoch": 1.23, + "grad_norm": 16.309016335863085, + "learning_rate": 6.762496811316837e-06, + "loss": 1.0236, + "step": 102588 + }, + { + "epoch": 1.23, + "grad_norm": 7.778595080431493, + "learning_rate": 6.761944101140809e-06, + "loss": 1.4741, + "step": 102591 + }, + { + "epoch": 1.23, + "grad_norm": 9.640822939870159, + "learning_rate": 6.761391402015096e-06, + "loss": 1.4065, + "step": 102594 + }, + { + "epoch": 1.23, + "grad_norm": 6.103762275325087, + "learning_rate": 6.76083871394159e-06, + "loss": 1.4067, + "step": 102597 + }, + { + "epoch": 1.23, + "grad_norm": 11.125077421645935, + "learning_rate": 6.7602860369221724e-06, + "loss": 0.9378, + "step": 102600 + }, + { + "epoch": 1.23, + "grad_norm": 15.098626397144727, + "learning_rate": 6.759733370958734e-06, + "loss": 1.3732, + "step": 102603 + }, + { + "epoch": 1.23, + "grad_norm": 14.118581471100814, + "learning_rate": 6.759180716053155e-06, + "loss": 1.1299, + "step": 102606 + }, + { + "epoch": 1.23, + "grad_norm": 25.339727672567534, + "learning_rate": 6.758628072207329e-06, + "loss": 1.0372, + "step": 102609 + }, + { + "epoch": 1.23, + "grad_norm": 29.054458734053124, + "learning_rate": 6.758075439423134e-06, + "loss": 1.3529, + "step": 102612 + }, + { + "epoch": 1.23, + "grad_norm": 21.132107940218628, + "learning_rate": 6.757522817702462e-06, + "loss": 0.8924, + "step": 102615 + }, + { + "epoch": 1.23, + "grad_norm": 16.87161399422371, + "learning_rate": 6.756970207047197e-06, + "loss": 1.2792, + "step": 102618 + }, + { + "epoch": 1.23, + "grad_norm": 8.496604869513268, + "learning_rate": 6.756417607459225e-06, + "loss": 1.1598, + "step": 102621 + }, + { + "epoch": 1.23, + "grad_norm": 12.557351121206368, + "learning_rate": 6.755865018940428e-06, + "loss": 1.3783, + "step": 102624 + }, + { + "epoch": 1.23, + "grad_norm": 23.710948640108754, + "learning_rate": 6.755312441492699e-06, + "loss": 1.5064, + "step": 102627 + }, + { + "epoch": 1.23, + "grad_norm": 6.627924706713798, + "learning_rate": 6.75475987511792e-06, + "loss": 1.1589, + "step": 102630 + }, + { + "epoch": 1.23, + "grad_norm": 6.335472381512895, + "learning_rate": 6.754207319817973e-06, + "loss": 1.3997, + "step": 102633 + }, + { + "epoch": 1.23, + "grad_norm": 10.01073540864187, + "learning_rate": 6.753654775594748e-06, + "loss": 1.0812, + "step": 102636 + }, + { + "epoch": 1.23, + "grad_norm": 4.921741052618214, + "learning_rate": 6.753102242450131e-06, + "loss": 0.9649, + "step": 102639 + }, + { + "epoch": 1.23, + "grad_norm": 22.210998107885025, + "learning_rate": 6.752549720386006e-06, + "loss": 1.4985, + "step": 102642 + }, + { + "epoch": 1.23, + "grad_norm": 6.201428392264997, + "learning_rate": 6.751997209404262e-06, + "loss": 1.3305, + "step": 102645 + }, + { + "epoch": 1.23, + "grad_norm": 13.667855698431733, + "learning_rate": 6.751444709506778e-06, + "loss": 1.0313, + "step": 102648 + }, + { + "epoch": 1.23, + "grad_norm": 61.06570414406456, + "learning_rate": 6.750892220695444e-06, + "loss": 1.0372, + "step": 102651 + }, + { + "epoch": 1.23, + "grad_norm": 13.3840991767891, + "learning_rate": 6.750339742972145e-06, + "loss": 1.3346, + "step": 102654 + }, + { + "epoch": 1.23, + "grad_norm": 10.770121706744973, + "learning_rate": 6.7497872763387684e-06, + "loss": 1.0821, + "step": 102657 + }, + { + "epoch": 1.23, + "grad_norm": 5.3259780439677336, + "learning_rate": 6.749234820797193e-06, + "loss": 1.0571, + "step": 102660 + }, + { + "epoch": 1.23, + "grad_norm": 4.311363242019201, + "learning_rate": 6.7486823763493095e-06, + "loss": 1.107, + "step": 102663 + }, + { + "epoch": 1.23, + "grad_norm": 8.442312965935598, + "learning_rate": 6.748129942997001e-06, + "loss": 1.1649, + "step": 102666 + }, + { + "epoch": 1.23, + "grad_norm": 12.783209405801932, + "learning_rate": 6.747577520742156e-06, + "loss": 1.0108, + "step": 102669 + }, + { + "epoch": 1.23, + "grad_norm": 17.42555289533121, + "learning_rate": 6.747025109586658e-06, + "loss": 1.0488, + "step": 102672 + }, + { + "epoch": 1.23, + "grad_norm": 5.7292203445980485, + "learning_rate": 6.746472709532389e-06, + "loss": 1.1417, + "step": 102675 + }, + { + "epoch": 1.23, + "grad_norm": 2.4865244994556885, + "learning_rate": 6.745920320581237e-06, + "loss": 1.0491, + "step": 102678 + }, + { + "epoch": 1.23, + "grad_norm": 13.280577283926851, + "learning_rate": 6.745367942735087e-06, + "loss": 1.4458, + "step": 102681 + }, + { + "epoch": 1.23, + "grad_norm": 3.1431975288610676, + "learning_rate": 6.744815575995827e-06, + "loss": 0.8126, + "step": 102684 + }, + { + "epoch": 1.23, + "grad_norm": 8.511117825526108, + "learning_rate": 6.7442632203653346e-06, + "loss": 1.4327, + "step": 102687 + }, + { + "epoch": 1.23, + "grad_norm": 11.191924150240675, + "learning_rate": 6.743710875845499e-06, + "loss": 1.379, + "step": 102690 + }, + { + "epoch": 1.23, + "grad_norm": 5.243563537669413, + "learning_rate": 6.743158542438208e-06, + "loss": 1.2236, + "step": 102693 + }, + { + "epoch": 1.23, + "grad_norm": 4.433242426881576, + "learning_rate": 6.742606220145341e-06, + "loss": 1.3824, + "step": 102696 + }, + { + "epoch": 1.23, + "grad_norm": 10.747857587243065, + "learning_rate": 6.742053908968791e-06, + "loss": 1.0731, + "step": 102699 + }, + { + "epoch": 1.23, + "grad_norm": 10.080591747817548, + "learning_rate": 6.741501608910432e-06, + "loss": 1.0221, + "step": 102702 + }, + { + "epoch": 1.23, + "grad_norm": 3.0846338773756865, + "learning_rate": 6.740949319972157e-06, + "loss": 1.347, + "step": 102705 + }, + { + "epoch": 1.24, + "grad_norm": 11.003002361941682, + "learning_rate": 6.740397042155846e-06, + "loss": 0.8613, + "step": 102708 + }, + { + "epoch": 1.24, + "grad_norm": 10.21171093693973, + "learning_rate": 6.73984477546339e-06, + "loss": 1.4072, + "step": 102711 + }, + { + "epoch": 1.24, + "grad_norm": 5.89522391707343, + "learning_rate": 6.739292519896667e-06, + "loss": 1.0378, + "step": 102714 + }, + { + "epoch": 1.24, + "grad_norm": 9.492125316989144, + "learning_rate": 6.738740275457563e-06, + "loss": 1.5336, + "step": 102717 + }, + { + "epoch": 1.24, + "grad_norm": 15.158831292909296, + "learning_rate": 6.738188042147962e-06, + "loss": 1.1131, + "step": 102720 + }, + { + "epoch": 1.24, + "grad_norm": 20.66984851830857, + "learning_rate": 6.7376358199697546e-06, + "loss": 0.9637, + "step": 102723 + }, + { + "epoch": 1.24, + "grad_norm": 10.540092545808088, + "learning_rate": 6.737083608924821e-06, + "loss": 1.0826, + "step": 102726 + }, + { + "epoch": 1.24, + "grad_norm": 13.72996878564045, + "learning_rate": 6.736531409015042e-06, + "loss": 1.3406, + "step": 102729 + }, + { + "epoch": 1.24, + "grad_norm": 44.612912966798696, + "learning_rate": 6.735979220242307e-06, + "loss": 1.2711, + "step": 102732 + }, + { + "epoch": 1.24, + "grad_norm": 6.524962585822639, + "learning_rate": 6.7354270426085e-06, + "loss": 0.8741, + "step": 102735 + }, + { + "epoch": 1.24, + "grad_norm": 31.17266182186863, + "learning_rate": 6.734874876115508e-06, + "loss": 1.3078, + "step": 102738 + }, + { + "epoch": 1.24, + "grad_norm": 9.890526921802785, + "learning_rate": 6.734322720765207e-06, + "loss": 0.9036, + "step": 102741 + }, + { + "epoch": 1.24, + "grad_norm": 6.518641690584873, + "learning_rate": 6.733770576559486e-06, + "loss": 1.2565, + "step": 102744 + }, + { + "epoch": 1.24, + "grad_norm": 11.289192181553492, + "learning_rate": 6.7332184435002325e-06, + "loss": 1.3853, + "step": 102747 + }, + { + "epoch": 1.24, + "grad_norm": 5.235509353979741, + "learning_rate": 6.732666321589325e-06, + "loss": 1.0472, + "step": 102750 + }, + { + "epoch": 1.24, + "grad_norm": 3.724035332475827, + "learning_rate": 6.732114210828655e-06, + "loss": 1.4435, + "step": 102753 + }, + { + "epoch": 1.24, + "grad_norm": 13.567249043188578, + "learning_rate": 6.731562111220098e-06, + "loss": 1.2779, + "step": 102756 + }, + { + "epoch": 1.24, + "grad_norm": 10.99481563411823, + "learning_rate": 6.731010022765543e-06, + "loss": 0.9432, + "step": 102759 + }, + { + "epoch": 1.24, + "grad_norm": 33.99343204230765, + "learning_rate": 6.730457945466873e-06, + "loss": 1.2149, + "step": 102762 + }, + { + "epoch": 1.24, + "grad_norm": 3.0249576984845628, + "learning_rate": 6.729905879325975e-06, + "loss": 1.0284, + "step": 102765 + }, + { + "epoch": 1.24, + "grad_norm": 14.838115815721803, + "learning_rate": 6.72935382434473e-06, + "loss": 1.2477, + "step": 102768 + }, + { + "epoch": 1.24, + "grad_norm": 8.794357989323359, + "learning_rate": 6.728801780525018e-06, + "loss": 1.3694, + "step": 102771 + }, + { + "epoch": 1.24, + "grad_norm": 5.121369918792811, + "learning_rate": 6.728249747868729e-06, + "loss": 1.3403, + "step": 102774 + }, + { + "epoch": 1.24, + "grad_norm": 10.648692511552474, + "learning_rate": 6.7276977263777475e-06, + "loss": 1.1933, + "step": 102777 + }, + { + "epoch": 1.24, + "grad_norm": 6.832568110611606, + "learning_rate": 6.727145716053956e-06, + "loss": 1.3904, + "step": 102780 + }, + { + "epoch": 1.24, + "grad_norm": 62.929111061776794, + "learning_rate": 6.726593716899234e-06, + "loss": 0.8477, + "step": 102783 + }, + { + "epoch": 1.24, + "grad_norm": 5.767032543020133, + "learning_rate": 6.7260417289154675e-06, + "loss": 1.4968, + "step": 102786 + }, + { + "epoch": 1.24, + "grad_norm": 17.421891998744673, + "learning_rate": 6.725489752104544e-06, + "loss": 1.0479, + "step": 102789 + }, + { + "epoch": 1.24, + "grad_norm": 13.600547584670544, + "learning_rate": 6.724937786468346e-06, + "loss": 1.2396, + "step": 102792 + }, + { + "epoch": 1.24, + "grad_norm": 18.248241609100596, + "learning_rate": 6.724385832008753e-06, + "loss": 1.3153, + "step": 102795 + }, + { + "epoch": 1.24, + "grad_norm": 10.152602189116356, + "learning_rate": 6.72383388872765e-06, + "loss": 1.4977, + "step": 102798 + }, + { + "epoch": 1.24, + "grad_norm": 5.239431147720881, + "learning_rate": 6.723281956626924e-06, + "loss": 1.3684, + "step": 102801 + }, + { + "epoch": 1.24, + "grad_norm": 10.764009079286211, + "learning_rate": 6.722730035708455e-06, + "loss": 1.1069, + "step": 102804 + }, + { + "epoch": 1.24, + "grad_norm": 4.644085801163032, + "learning_rate": 6.722178125974131e-06, + "loss": 0.9107, + "step": 102807 + }, + { + "epoch": 1.24, + "grad_norm": 12.877218319980628, + "learning_rate": 6.72162622742583e-06, + "loss": 1.1391, + "step": 102810 + }, + { + "epoch": 1.24, + "grad_norm": 3.433576873991791, + "learning_rate": 6.721074340065438e-06, + "loss": 1.2383, + "step": 102813 + }, + { + "epoch": 1.24, + "grad_norm": 44.23074289163248, + "learning_rate": 6.720522463894836e-06, + "loss": 1.3339, + "step": 102816 + }, + { + "epoch": 1.24, + "grad_norm": 6.349705787012437, + "learning_rate": 6.719970598915915e-06, + "loss": 0.8799, + "step": 102819 + }, + { + "epoch": 1.24, + "grad_norm": 10.067997514828097, + "learning_rate": 6.719418745130551e-06, + "loss": 1.0425, + "step": 102822 + }, + { + "epoch": 1.24, + "grad_norm": 7.267721810949627, + "learning_rate": 6.718866902540626e-06, + "loss": 1.2146, + "step": 102825 + }, + { + "epoch": 1.24, + "grad_norm": 3.2004357264509453, + "learning_rate": 6.718315071148027e-06, + "loss": 1.4945, + "step": 102828 + }, + { + "epoch": 1.24, + "grad_norm": 27.9842906637403, + "learning_rate": 6.717763250954639e-06, + "loss": 1.0218, + "step": 102831 + }, + { + "epoch": 1.24, + "grad_norm": 7.116149965474208, + "learning_rate": 6.717211441962344e-06, + "loss": 1.2152, + "step": 102834 + }, + { + "epoch": 1.24, + "grad_norm": 14.974444406596287, + "learning_rate": 6.71665964417302e-06, + "loss": 1.2028, + "step": 102837 + }, + { + "epoch": 1.24, + "grad_norm": 6.348617810285029, + "learning_rate": 6.716107857588554e-06, + "loss": 1.1655, + "step": 102840 + }, + { + "epoch": 1.24, + "grad_norm": 9.74762068439731, + "learning_rate": 6.715556082210833e-06, + "loss": 1.1198, + "step": 102843 + }, + { + "epoch": 1.24, + "grad_norm": 8.693413696455172, + "learning_rate": 6.715004318041736e-06, + "loss": 1.16, + "step": 102846 + }, + { + "epoch": 1.24, + "grad_norm": 17.54921695145082, + "learning_rate": 6.714452565083142e-06, + "loss": 0.9285, + "step": 102849 + }, + { + "epoch": 1.24, + "grad_norm": 10.616320185210506, + "learning_rate": 6.71390082333694e-06, + "loss": 1.0692, + "step": 102852 + }, + { + "epoch": 1.24, + "grad_norm": 10.577794159565329, + "learning_rate": 6.713349092805009e-06, + "loss": 0.8361, + "step": 102855 + }, + { + "epoch": 1.24, + "grad_norm": 8.5932953498716, + "learning_rate": 6.7127973734892335e-06, + "loss": 1.3754, + "step": 102858 + }, + { + "epoch": 1.24, + "grad_norm": 6.84702787567128, + "learning_rate": 6.712245665391501e-06, + "loss": 1.0007, + "step": 102861 + }, + { + "epoch": 1.24, + "grad_norm": 18.836552392267762, + "learning_rate": 6.711693968513688e-06, + "loss": 1.2742, + "step": 102864 + }, + { + "epoch": 1.24, + "grad_norm": 8.21793249417136, + "learning_rate": 6.711142282857677e-06, + "loss": 1.0723, + "step": 102867 + }, + { + "epoch": 1.24, + "grad_norm": 7.906233120456625, + "learning_rate": 6.7105906084253516e-06, + "loss": 1.1637, + "step": 102870 + }, + { + "epoch": 1.24, + "grad_norm": 15.194104541584496, + "learning_rate": 6.7100389452186e-06, + "loss": 1.2317, + "step": 102873 + }, + { + "epoch": 1.24, + "grad_norm": 10.924712864470125, + "learning_rate": 6.7094872932393e-06, + "loss": 1.173, + "step": 102876 + }, + { + "epoch": 1.24, + "grad_norm": 16.70349637766508, + "learning_rate": 6.7089356524893305e-06, + "loss": 1.0494, + "step": 102879 + }, + { + "epoch": 1.24, + "grad_norm": 14.719970066003718, + "learning_rate": 6.70838402297058e-06, + "loss": 1.5605, + "step": 102882 + }, + { + "epoch": 1.24, + "grad_norm": 2.119845973991613, + "learning_rate": 6.70783240468493e-06, + "loss": 1.2401, + "step": 102885 + }, + { + "epoch": 1.24, + "grad_norm": 8.770221808593856, + "learning_rate": 6.707280797634265e-06, + "loss": 1.1689, + "step": 102888 + }, + { + "epoch": 1.24, + "grad_norm": 9.04198706837304, + "learning_rate": 6.706729201820459e-06, + "loss": 1.0767, + "step": 102891 + }, + { + "epoch": 1.24, + "grad_norm": 9.960857067835175, + "learning_rate": 6.706177617245404e-06, + "loss": 1.4944, + "step": 102894 + }, + { + "epoch": 1.24, + "grad_norm": 32.69153286113662, + "learning_rate": 6.705626043910976e-06, + "loss": 1.368, + "step": 102897 + }, + { + "epoch": 1.24, + "grad_norm": 2.9408998433063185, + "learning_rate": 6.705074481819063e-06, + "loss": 0.9881, + "step": 102900 + }, + { + "epoch": 1.24, + "grad_norm": 15.077077814173737, + "learning_rate": 6.70452293097154e-06, + "loss": 1.3309, + "step": 102903 + }, + { + "epoch": 1.24, + "grad_norm": 16.303899055715437, + "learning_rate": 6.7039713913702955e-06, + "loss": 1.4899, + "step": 102906 + }, + { + "epoch": 1.24, + "grad_norm": 8.863404539385693, + "learning_rate": 6.703419863017206e-06, + "loss": 0.991, + "step": 102909 + }, + { + "epoch": 1.24, + "grad_norm": 7.898784230838777, + "learning_rate": 6.70286834591416e-06, + "loss": 1.1851, + "step": 102912 + }, + { + "epoch": 1.24, + "grad_norm": 5.449177711883924, + "learning_rate": 6.702316840063039e-06, + "loss": 1.2413, + "step": 102915 + }, + { + "epoch": 1.24, + "grad_norm": 6.689353641618293, + "learning_rate": 6.701765345465721e-06, + "loss": 0.8838, + "step": 102918 + }, + { + "epoch": 1.24, + "grad_norm": 14.564962634449694, + "learning_rate": 6.701213862124088e-06, + "loss": 1.1088, + "step": 102921 + }, + { + "epoch": 1.24, + "grad_norm": 5.246016227667572, + "learning_rate": 6.7006623900400225e-06, + "loss": 1.289, + "step": 102924 + }, + { + "epoch": 1.24, + "grad_norm": 5.439026238155601, + "learning_rate": 6.700110929215413e-06, + "loss": 1.3618, + "step": 102927 + }, + { + "epoch": 1.24, + "grad_norm": 7.802053083944518, + "learning_rate": 6.6995594796521335e-06, + "loss": 1.132, + "step": 102930 + }, + { + "epoch": 1.24, + "grad_norm": 11.340848895105069, + "learning_rate": 6.699008041352069e-06, + "loss": 0.9503, + "step": 102933 + }, + { + "epoch": 1.24, + "grad_norm": 10.72051545636784, + "learning_rate": 6.698456614317099e-06, + "loss": 1.2778, + "step": 102936 + }, + { + "epoch": 1.24, + "grad_norm": 8.133398353626596, + "learning_rate": 6.69790519854911e-06, + "loss": 0.8217, + "step": 102939 + }, + { + "epoch": 1.24, + "grad_norm": 7.586661912545793, + "learning_rate": 6.697353794049983e-06, + "loss": 1.0678, + "step": 102942 + }, + { + "epoch": 1.24, + "grad_norm": 5.946747035147715, + "learning_rate": 6.696802400821593e-06, + "loss": 1.227, + "step": 102945 + }, + { + "epoch": 1.24, + "grad_norm": 10.654874667882527, + "learning_rate": 6.696251018865829e-06, + "loss": 0.9243, + "step": 102948 + }, + { + "epoch": 1.24, + "grad_norm": 8.442706267250836, + "learning_rate": 6.695699648184569e-06, + "loss": 1.0358, + "step": 102951 + }, + { + "epoch": 1.24, + "grad_norm": 9.892683462533387, + "learning_rate": 6.6951482887797e-06, + "loss": 1.0105, + "step": 102954 + }, + { + "epoch": 1.24, + "grad_norm": 4.847872360992101, + "learning_rate": 6.694596940653095e-06, + "loss": 0.936, + "step": 102957 + }, + { + "epoch": 1.24, + "grad_norm": 3.266103828406449, + "learning_rate": 6.694045603806642e-06, + "loss": 1.375, + "step": 102960 + }, + { + "epoch": 1.24, + "grad_norm": 15.32538804736807, + "learning_rate": 6.6934942782422185e-06, + "loss": 1.2456, + "step": 102963 + }, + { + "epoch": 1.24, + "grad_norm": 3.7778922586017876, + "learning_rate": 6.6929429639617085e-06, + "loss": 1.3453, + "step": 102966 + }, + { + "epoch": 1.24, + "grad_norm": 7.6637132164040445, + "learning_rate": 6.692391660966997e-06, + "loss": 1.4536, + "step": 102969 + }, + { + "epoch": 1.24, + "grad_norm": 21.5132850540807, + "learning_rate": 6.691840369259959e-06, + "loss": 1.2756, + "step": 102972 + }, + { + "epoch": 1.24, + "grad_norm": 11.928981694127234, + "learning_rate": 6.691289088842477e-06, + "loss": 1.389, + "step": 102975 + }, + { + "epoch": 1.24, + "grad_norm": 10.23277341978126, + "learning_rate": 6.690737819716433e-06, + "loss": 1.1915, + "step": 102978 + }, + { + "epoch": 1.24, + "grad_norm": 9.476535150878405, + "learning_rate": 6.690186561883713e-06, + "loss": 1.1657, + "step": 102981 + }, + { + "epoch": 1.24, + "grad_norm": 6.005611714262276, + "learning_rate": 6.6896353153461914e-06, + "loss": 1.3081, + "step": 102984 + }, + { + "epoch": 1.24, + "grad_norm": 20.724877263547917, + "learning_rate": 6.68908408010575e-06, + "loss": 1.2024, + "step": 102987 + }, + { + "epoch": 1.24, + "grad_norm": 4.053082948462987, + "learning_rate": 6.688532856164275e-06, + "loss": 1.4045, + "step": 102990 + }, + { + "epoch": 1.24, + "grad_norm": 13.526996707475487, + "learning_rate": 6.687981643523642e-06, + "loss": 1.2234, + "step": 102993 + }, + { + "epoch": 1.24, + "grad_norm": 3.150630748173776, + "learning_rate": 6.687430442185738e-06, + "loss": 1.5371, + "step": 102996 + }, + { + "epoch": 1.24, + "grad_norm": 6.784640825962084, + "learning_rate": 6.686879252152438e-06, + "loss": 0.9117, + "step": 102999 + }, + { + "epoch": 1.24, + "grad_norm": 6.570225490491427, + "learning_rate": 6.686328073425628e-06, + "loss": 1.3254, + "step": 103002 + }, + { + "epoch": 1.24, + "grad_norm": 8.74266680766347, + "learning_rate": 6.685776906007183e-06, + "loss": 1.0354, + "step": 103005 + }, + { + "epoch": 1.24, + "grad_norm": 44.127640310930055, + "learning_rate": 6.6852257498989926e-06, + "loss": 1.1766, + "step": 103008 + }, + { + "epoch": 1.24, + "grad_norm": 10.316491175714793, + "learning_rate": 6.684674605102929e-06, + "loss": 1.1964, + "step": 103011 + }, + { + "epoch": 1.24, + "grad_norm": 2.926479771884032, + "learning_rate": 6.684123471620878e-06, + "loss": 1.1473, + "step": 103014 + }, + { + "epoch": 1.24, + "grad_norm": 3.7333957369903508, + "learning_rate": 6.683572349454718e-06, + "loss": 1.4925, + "step": 103017 + }, + { + "epoch": 1.24, + "grad_norm": 83.61950542096054, + "learning_rate": 6.683021238606331e-06, + "loss": 1.3091, + "step": 103020 + }, + { + "epoch": 1.24, + "grad_norm": 3.065637670241157, + "learning_rate": 6.682470139077601e-06, + "loss": 1.1382, + "step": 103023 + }, + { + "epoch": 1.24, + "grad_norm": 24.378534016864155, + "learning_rate": 6.681919050870405e-06, + "loss": 1.2015, + "step": 103026 + }, + { + "epoch": 1.24, + "grad_norm": 5.586252113375959, + "learning_rate": 6.68136797398662e-06, + "loss": 1.0904, + "step": 103029 + }, + { + "epoch": 1.24, + "grad_norm": 4.503289699571878, + "learning_rate": 6.680816908428134e-06, + "loss": 1.0653, + "step": 103032 + }, + { + "epoch": 1.24, + "grad_norm": 4.8595340792568935, + "learning_rate": 6.680265854196824e-06, + "loss": 1.0595, + "step": 103035 + }, + { + "epoch": 1.24, + "grad_norm": 4.9256758632360365, + "learning_rate": 6.679714811294571e-06, + "loss": 1.4651, + "step": 103038 + }, + { + "epoch": 1.24, + "grad_norm": 20.201812516677588, + "learning_rate": 6.679163779723252e-06, + "loss": 1.0198, + "step": 103041 + }, + { + "epoch": 1.24, + "grad_norm": 16.869513162828426, + "learning_rate": 6.678612759484755e-06, + "loss": 1.2889, + "step": 103044 + }, + { + "epoch": 1.24, + "grad_norm": 10.75021639613975, + "learning_rate": 6.678061750580953e-06, + "loss": 1.1099, + "step": 103047 + }, + { + "epoch": 1.24, + "grad_norm": 3.321374586051933, + "learning_rate": 6.677510753013735e-06, + "loss": 1.2493, + "step": 103050 + }, + { + "epoch": 1.24, + "grad_norm": 3.227739760397712, + "learning_rate": 6.676959766784971e-06, + "loss": 0.7826, + "step": 103053 + }, + { + "epoch": 1.24, + "grad_norm": 45.947312382091624, + "learning_rate": 6.676408791896549e-06, + "loss": 1.116, + "step": 103056 + }, + { + "epoch": 1.24, + "grad_norm": 18.092601718093697, + "learning_rate": 6.675857828350344e-06, + "loss": 1.0125, + "step": 103059 + }, + { + "epoch": 1.24, + "grad_norm": 7.783780975480839, + "learning_rate": 6.675306876148243e-06, + "loss": 0.9346, + "step": 103062 + }, + { + "epoch": 1.24, + "grad_norm": 15.190560949822837, + "learning_rate": 6.674755935292119e-06, + "loss": 0.9838, + "step": 103065 + }, + { + "epoch": 1.24, + "grad_norm": 10.161414820769846, + "learning_rate": 6.674205005783856e-06, + "loss": 1.1691, + "step": 103068 + }, + { + "epoch": 1.24, + "grad_norm": 28.469449728985037, + "learning_rate": 6.6736540876253315e-06, + "loss": 1.0278, + "step": 103071 + }, + { + "epoch": 1.24, + "grad_norm": 22.398810844898268, + "learning_rate": 6.67310318081843e-06, + "loss": 1.3211, + "step": 103074 + }, + { + "epoch": 1.24, + "grad_norm": 10.124091236815378, + "learning_rate": 6.672552285365028e-06, + "loss": 1.024, + "step": 103077 + }, + { + "epoch": 1.24, + "grad_norm": 20.124561544209076, + "learning_rate": 6.672001401267008e-06, + "loss": 1.1755, + "step": 103080 + }, + { + "epoch": 1.24, + "grad_norm": 5.861544279728637, + "learning_rate": 6.671450528526245e-06, + "loss": 1.3483, + "step": 103083 + }, + { + "epoch": 1.24, + "grad_norm": 16.718247990703727, + "learning_rate": 6.6708996671446255e-06, + "loss": 1.7169, + "step": 103086 + }, + { + "epoch": 1.24, + "grad_norm": 12.576118123197597, + "learning_rate": 6.670348817124028e-06, + "loss": 1.1648, + "step": 103089 + }, + { + "epoch": 1.24, + "grad_norm": 13.035692987011535, + "learning_rate": 6.6697979784663265e-06, + "loss": 1.3757, + "step": 103092 + }, + { + "epoch": 1.24, + "grad_norm": 15.904614814260986, + "learning_rate": 6.6692471511734055e-06, + "loss": 1.0537, + "step": 103095 + }, + { + "epoch": 1.24, + "grad_norm": 3.4202212627113404, + "learning_rate": 6.668696335247146e-06, + "loss": 1.4313, + "step": 103098 + }, + { + "epoch": 1.24, + "grad_norm": 8.63773079211504, + "learning_rate": 6.6681455306894225e-06, + "loss": 1.1045, + "step": 103101 + }, + { + "epoch": 1.24, + "grad_norm": 5.3417423530324, + "learning_rate": 6.667594737502125e-06, + "loss": 0.8716, + "step": 103104 + }, + { + "epoch": 1.24, + "grad_norm": 20.190377229760532, + "learning_rate": 6.66704395568712e-06, + "loss": 1.2833, + "step": 103107 + }, + { + "epoch": 1.24, + "grad_norm": 4.3463045205626285, + "learning_rate": 6.666493185246297e-06, + "loss": 1.3319, + "step": 103110 + }, + { + "epoch": 1.24, + "grad_norm": 9.874519605156184, + "learning_rate": 6.665942426181529e-06, + "loss": 0.8252, + "step": 103113 + }, + { + "epoch": 1.24, + "grad_norm": 12.316188602981114, + "learning_rate": 6.665391678494702e-06, + "loss": 1.3329, + "step": 103116 + }, + { + "epoch": 1.24, + "grad_norm": 66.08857506018872, + "learning_rate": 6.6648409421876896e-06, + "loss": 1.0616, + "step": 103119 + }, + { + "epoch": 1.24, + "grad_norm": 14.31728594492075, + "learning_rate": 6.664290217262375e-06, + "loss": 1.0378, + "step": 103122 + }, + { + "epoch": 1.24, + "grad_norm": 9.34091261664632, + "learning_rate": 6.6637395037206345e-06, + "loss": 1.1408, + "step": 103125 + }, + { + "epoch": 1.24, + "grad_norm": 21.60107998219549, + "learning_rate": 6.663188801564351e-06, + "loss": 1.1727, + "step": 103128 + }, + { + "epoch": 1.24, + "grad_norm": 4.968860358329605, + "learning_rate": 6.662638110795405e-06, + "loss": 1.0403, + "step": 103131 + }, + { + "epoch": 1.24, + "grad_norm": 15.617423642699485, + "learning_rate": 6.6620874314156695e-06, + "loss": 1.118, + "step": 103134 + }, + { + "epoch": 1.24, + "grad_norm": 7.360323318032104, + "learning_rate": 6.661536763427026e-06, + "loss": 1.2783, + "step": 103137 + }, + { + "epoch": 1.24, + "grad_norm": 15.201241348297966, + "learning_rate": 6.6609861068313565e-06, + "loss": 1.2587, + "step": 103140 + }, + { + "epoch": 1.24, + "grad_norm": 4.622617165122887, + "learning_rate": 6.660435461630541e-06, + "loss": 1.3554, + "step": 103143 + }, + { + "epoch": 1.24, + "grad_norm": 5.228835905775444, + "learning_rate": 6.659884827826454e-06, + "loss": 0.9525, + "step": 103146 + }, + { + "epoch": 1.24, + "grad_norm": 12.193886766197426, + "learning_rate": 6.659334205420976e-06, + "loss": 1.1444, + "step": 103149 + }, + { + "epoch": 1.24, + "grad_norm": 9.256543364442651, + "learning_rate": 6.658783594415988e-06, + "loss": 0.9704, + "step": 103152 + }, + { + "epoch": 1.24, + "grad_norm": 15.635904012469135, + "learning_rate": 6.658232994813367e-06, + "loss": 1.064, + "step": 103155 + }, + { + "epoch": 1.24, + "grad_norm": 6.8760024043491175, + "learning_rate": 6.657682406614998e-06, + "loss": 1.3153, + "step": 103158 + }, + { + "epoch": 1.24, + "grad_norm": 17.422742568891834, + "learning_rate": 6.65713182982275e-06, + "loss": 1.2905, + "step": 103161 + }, + { + "epoch": 1.24, + "grad_norm": 12.289841661252607, + "learning_rate": 6.656581264438509e-06, + "loss": 1.3944, + "step": 103164 + }, + { + "epoch": 1.24, + "grad_norm": 51.00716116589514, + "learning_rate": 6.65603071046415e-06, + "loss": 1.444, + "step": 103167 + }, + { + "epoch": 1.24, + "grad_norm": 12.757299314699297, + "learning_rate": 6.655480167901558e-06, + "loss": 1.063, + "step": 103170 + }, + { + "epoch": 1.24, + "grad_norm": 9.662946104929459, + "learning_rate": 6.654929636752603e-06, + "loss": 1.3095, + "step": 103173 + }, + { + "epoch": 1.24, + "grad_norm": 6.926698141442414, + "learning_rate": 6.65437911701917e-06, + "loss": 1.0251, + "step": 103176 + }, + { + "epoch": 1.24, + "grad_norm": 9.242211623951036, + "learning_rate": 6.653828608703135e-06, + "loss": 1.0575, + "step": 103179 + }, + { + "epoch": 1.24, + "grad_norm": 4.221608203134335, + "learning_rate": 6.653278111806378e-06, + "loss": 1.1702, + "step": 103182 + }, + { + "epoch": 1.24, + "grad_norm": 8.599097164366716, + "learning_rate": 6.6527276263307815e-06, + "loss": 1.632, + "step": 103185 + }, + { + "epoch": 1.24, + "grad_norm": 18.88260733494272, + "learning_rate": 6.6521771522782145e-06, + "loss": 0.868, + "step": 103188 + }, + { + "epoch": 1.24, + "grad_norm": 8.918327463769371, + "learning_rate": 6.651626689650561e-06, + "loss": 1.0627, + "step": 103191 + }, + { + "epoch": 1.24, + "grad_norm": 7.364570486787987, + "learning_rate": 6.651076238449703e-06, + "loss": 1.2361, + "step": 103194 + }, + { + "epoch": 1.24, + "grad_norm": 6.301495373673019, + "learning_rate": 6.650525798677516e-06, + "loss": 0.9952, + "step": 103197 + }, + { + "epoch": 1.24, + "grad_norm": 14.642642526894607, + "learning_rate": 6.649975370335875e-06, + "loss": 1.3487, + "step": 103200 + }, + { + "epoch": 1.24, + "grad_norm": 6.5283101468126805, + "learning_rate": 6.649424953426659e-06, + "loss": 1.1211, + "step": 103203 + }, + { + "epoch": 1.24, + "grad_norm": 10.636519480168294, + "learning_rate": 6.648874547951753e-06, + "loss": 1.4675, + "step": 103206 + }, + { + "epoch": 1.24, + "grad_norm": 10.69883972921734, + "learning_rate": 6.64832415391303e-06, + "loss": 1.1038, + "step": 103209 + }, + { + "epoch": 1.24, + "grad_norm": 10.451867631888247, + "learning_rate": 6.647773771312371e-06, + "loss": 1.3857, + "step": 103212 + }, + { + "epoch": 1.24, + "grad_norm": 5.089229064229416, + "learning_rate": 6.64722340015165e-06, + "loss": 1.0817, + "step": 103215 + }, + { + "epoch": 1.24, + "grad_norm": 16.175077238521045, + "learning_rate": 6.64667304043275e-06, + "loss": 1.1909, + "step": 103218 + }, + { + "epoch": 1.24, + "grad_norm": 10.28675377632212, + "learning_rate": 6.646122692157544e-06, + "loss": 1.1999, + "step": 103221 + }, + { + "epoch": 1.24, + "grad_norm": 4.549938730918835, + "learning_rate": 6.645572355327918e-06, + "loss": 1.6659, + "step": 103224 + }, + { + "epoch": 1.24, + "grad_norm": 3.002320921966119, + "learning_rate": 6.645022029945742e-06, + "loss": 0.8691, + "step": 103227 + }, + { + "epoch": 1.24, + "grad_norm": 9.48611319155924, + "learning_rate": 6.644471716012897e-06, + "loss": 1.1453, + "step": 103230 + }, + { + "epoch": 1.24, + "grad_norm": 6.389208910700121, + "learning_rate": 6.643921413531261e-06, + "loss": 1.2567, + "step": 103233 + }, + { + "epoch": 1.24, + "grad_norm": 24.53959432634798, + "learning_rate": 6.643371122502716e-06, + "loss": 1.473, + "step": 103236 + }, + { + "epoch": 1.24, + "grad_norm": 20.708433365753784, + "learning_rate": 6.642820842929135e-06, + "loss": 1.6076, + "step": 103239 + }, + { + "epoch": 1.24, + "grad_norm": 5.914270426954437, + "learning_rate": 6.642270574812395e-06, + "loss": 1.012, + "step": 103242 + }, + { + "epoch": 1.24, + "grad_norm": 8.245200942883331, + "learning_rate": 6.641720318154376e-06, + "loss": 1.3867, + "step": 103245 + }, + { + "epoch": 1.24, + "grad_norm": 3.653383693484206, + "learning_rate": 6.641170072956957e-06, + "loss": 1.2695, + "step": 103248 + }, + { + "epoch": 1.24, + "grad_norm": 15.057319888410898, + "learning_rate": 6.6406198392220176e-06, + "loss": 1.3312, + "step": 103251 + }, + { + "epoch": 1.24, + "grad_norm": 26.688507444498825, + "learning_rate": 6.640069616951428e-06, + "loss": 1.2195, + "step": 103254 + }, + { + "epoch": 1.24, + "grad_norm": 20.3099400898302, + "learning_rate": 6.639519406147071e-06, + "loss": 1.3227, + "step": 103257 + }, + { + "epoch": 1.24, + "grad_norm": 8.838141078866787, + "learning_rate": 6.6389692068108254e-06, + "loss": 1.249, + "step": 103260 + }, + { + "epoch": 1.24, + "grad_norm": 2.166980721619557, + "learning_rate": 6.638419018944566e-06, + "loss": 1.38, + "step": 103263 + }, + { + "epoch": 1.24, + "grad_norm": 9.79482944353207, + "learning_rate": 6.637868842550176e-06, + "loss": 1.0472, + "step": 103266 + }, + { + "epoch": 1.24, + "grad_norm": 8.115044198013543, + "learning_rate": 6.637318677629525e-06, + "loss": 1.1657, + "step": 103269 + }, + { + "epoch": 1.24, + "grad_norm": 38.95075730681609, + "learning_rate": 6.636768524184493e-06, + "loss": 1.351, + "step": 103272 + }, + { + "epoch": 1.24, + "grad_norm": 9.381327529015268, + "learning_rate": 6.636218382216959e-06, + "loss": 1.7201, + "step": 103275 + }, + { + "epoch": 1.24, + "grad_norm": 5.297200160449751, + "learning_rate": 6.635668251728804e-06, + "loss": 1.0732, + "step": 103278 + }, + { + "epoch": 1.24, + "grad_norm": 2.5395284784539545, + "learning_rate": 6.6351181327218994e-06, + "loss": 0.7817, + "step": 103281 + }, + { + "epoch": 1.24, + "grad_norm": 10.652653705530579, + "learning_rate": 6.634568025198122e-06, + "loss": 1.0403, + "step": 103284 + }, + { + "epoch": 1.24, + "grad_norm": 7.525513540831253, + "learning_rate": 6.634017929159352e-06, + "loss": 1.1283, + "step": 103287 + }, + { + "epoch": 1.24, + "grad_norm": 10.12142957090914, + "learning_rate": 6.633467844607468e-06, + "loss": 1.0906, + "step": 103290 + }, + { + "epoch": 1.24, + "grad_norm": 8.398661141042368, + "learning_rate": 6.632917771544349e-06, + "loss": 1.2671, + "step": 103293 + }, + { + "epoch": 1.24, + "grad_norm": 16.25336316250642, + "learning_rate": 6.6323677099718635e-06, + "loss": 0.8387, + "step": 103296 + }, + { + "epoch": 1.24, + "grad_norm": 36.56700754053823, + "learning_rate": 6.631817659891895e-06, + "loss": 1.0794, + "step": 103299 + }, + { + "epoch": 1.24, + "grad_norm": 7.731113082369868, + "learning_rate": 6.631267621306321e-06, + "loss": 1.1934, + "step": 103302 + }, + { + "epoch": 1.24, + "grad_norm": 4.577352060401858, + "learning_rate": 6.630717594217019e-06, + "loss": 1.306, + "step": 103305 + }, + { + "epoch": 1.24, + "grad_norm": 9.204135288378156, + "learning_rate": 6.63016757862586e-06, + "loss": 1.0056, + "step": 103308 + }, + { + "epoch": 1.24, + "grad_norm": 29.13832271903771, + "learning_rate": 6.629617574534728e-06, + "loss": 0.9394, + "step": 103311 + }, + { + "epoch": 1.24, + "grad_norm": 14.012798956110915, + "learning_rate": 6.629067581945494e-06, + "loss": 1.2385, + "step": 103314 + }, + { + "epoch": 1.24, + "grad_norm": 9.981442312908113, + "learning_rate": 6.62851760086004e-06, + "loss": 1.1367, + "step": 103317 + }, + { + "epoch": 1.24, + "grad_norm": 8.576464287925178, + "learning_rate": 6.627967631280243e-06, + "loss": 1.0296, + "step": 103320 + }, + { + "epoch": 1.24, + "grad_norm": 13.066732358685398, + "learning_rate": 6.6274176732079766e-06, + "loss": 1.0826, + "step": 103323 + }, + { + "epoch": 1.24, + "grad_norm": 10.31541038293196, + "learning_rate": 6.626867726645117e-06, + "loss": 1.1589, + "step": 103326 + }, + { + "epoch": 1.24, + "grad_norm": 4.37665655395491, + "learning_rate": 6.626317791593544e-06, + "loss": 1.2099, + "step": 103329 + }, + { + "epoch": 1.24, + "grad_norm": 9.00620595549318, + "learning_rate": 6.625767868055136e-06, + "loss": 1.1729, + "step": 103332 + }, + { + "epoch": 1.24, + "grad_norm": 4.256956362588271, + "learning_rate": 6.6252179560317655e-06, + "loss": 1.2129, + "step": 103335 + }, + { + "epoch": 1.24, + "grad_norm": 10.524816760636849, + "learning_rate": 6.624668055525307e-06, + "loss": 1.1971, + "step": 103338 + }, + { + "epoch": 1.24, + "grad_norm": 8.670248550066828, + "learning_rate": 6.624118166537643e-06, + "loss": 1.1464, + "step": 103341 + }, + { + "epoch": 1.24, + "grad_norm": 10.935454125627079, + "learning_rate": 6.623568289070648e-06, + "loss": 0.857, + "step": 103344 + }, + { + "epoch": 1.24, + "grad_norm": 4.718848272464833, + "learning_rate": 6.623018423126201e-06, + "loss": 1.1251, + "step": 103347 + }, + { + "epoch": 1.24, + "grad_norm": 4.940268643077085, + "learning_rate": 6.6224685687061705e-06, + "loss": 1.0965, + "step": 103350 + }, + { + "epoch": 1.24, + "grad_norm": 17.574055406256498, + "learning_rate": 6.621918725812439e-06, + "loss": 1.1374, + "step": 103353 + }, + { + "epoch": 1.24, + "grad_norm": 12.818456068252564, + "learning_rate": 6.621368894446884e-06, + "loss": 1.2842, + "step": 103356 + }, + { + "epoch": 1.24, + "grad_norm": 4.074298681504689, + "learning_rate": 6.620819074611383e-06, + "loss": 1.1958, + "step": 103359 + }, + { + "epoch": 1.24, + "grad_norm": 17.826132251609888, + "learning_rate": 6.620269266307805e-06, + "loss": 1.0995, + "step": 103362 + }, + { + "epoch": 1.24, + "grad_norm": 27.75074037052656, + "learning_rate": 6.619719469538032e-06, + "loss": 1.2988, + "step": 103365 + }, + { + "epoch": 1.24, + "grad_norm": 7.235282355833982, + "learning_rate": 6.619169684303936e-06, + "loss": 0.8735, + "step": 103368 + }, + { + "epoch": 1.24, + "grad_norm": 13.604380184508665, + "learning_rate": 6.618619910607398e-06, + "loss": 1.2285, + "step": 103371 + }, + { + "epoch": 1.24, + "grad_norm": 3.4951219413824175, + "learning_rate": 6.618070148450295e-06, + "loss": 1.0084, + "step": 103374 + }, + { + "epoch": 1.24, + "grad_norm": 7.769633146697299, + "learning_rate": 6.617520397834499e-06, + "loss": 1.0446, + "step": 103377 + }, + { + "epoch": 1.24, + "grad_norm": 8.63321828451223, + "learning_rate": 6.616970658761885e-06, + "loss": 0.881, + "step": 103380 + }, + { + "epoch": 1.24, + "grad_norm": 7.920080803308684, + "learning_rate": 6.616420931234333e-06, + "loss": 1.182, + "step": 103383 + }, + { + "epoch": 1.24, + "grad_norm": 5.453481550713518, + "learning_rate": 6.615871215253719e-06, + "loss": 0.9591, + "step": 103386 + }, + { + "epoch": 1.24, + "grad_norm": 20.212744833796076, + "learning_rate": 6.615321510821918e-06, + "loss": 1.085, + "step": 103389 + }, + { + "epoch": 1.24, + "grad_norm": 19.950963406165133, + "learning_rate": 6.614771817940802e-06, + "loss": 1.3878, + "step": 103392 + }, + { + "epoch": 1.24, + "grad_norm": 5.725183158433125, + "learning_rate": 6.6142221366122515e-06, + "loss": 1.2308, + "step": 103395 + }, + { + "epoch": 1.24, + "grad_norm": 5.825492946242594, + "learning_rate": 6.613672466838144e-06, + "loss": 1.257, + "step": 103398 + }, + { + "epoch": 1.24, + "grad_norm": 20.896432944094116, + "learning_rate": 6.613122808620353e-06, + "loss": 1.2736, + "step": 103401 + }, + { + "epoch": 1.24, + "grad_norm": 4.107384275943529, + "learning_rate": 6.61257316196075e-06, + "loss": 1.1992, + "step": 103404 + }, + { + "epoch": 1.24, + "grad_norm": 3.8642975915113946, + "learning_rate": 6.612023526861218e-06, + "loss": 0.7776, + "step": 103407 + }, + { + "epoch": 1.24, + "grad_norm": 8.840847225724293, + "learning_rate": 6.611473903323627e-06, + "loss": 1.0328, + "step": 103410 + }, + { + "epoch": 1.24, + "grad_norm": 15.871439619933316, + "learning_rate": 6.6109242913498574e-06, + "loss": 1.6945, + "step": 103413 + }, + { + "epoch": 1.24, + "grad_norm": 29.038100659446727, + "learning_rate": 6.610374690941778e-06, + "loss": 1.3905, + "step": 103416 + }, + { + "epoch": 1.24, + "grad_norm": 8.948805138979354, + "learning_rate": 6.609825102101274e-06, + "loss": 1.2239, + "step": 103419 + }, + { + "epoch": 1.24, + "grad_norm": 5.305989194075293, + "learning_rate": 6.609275524830212e-06, + "loss": 1.2733, + "step": 103422 + }, + { + "epoch": 1.24, + "grad_norm": 14.361930259175027, + "learning_rate": 6.608725959130471e-06, + "loss": 1.323, + "step": 103425 + }, + { + "epoch": 1.24, + "grad_norm": 3.7972620758174815, + "learning_rate": 6.608176405003932e-06, + "loss": 1.3851, + "step": 103428 + }, + { + "epoch": 1.24, + "grad_norm": 4.98406792051907, + "learning_rate": 6.607626862452462e-06, + "loss": 1.1544, + "step": 103431 + }, + { + "epoch": 1.24, + "grad_norm": 7.571168150363757, + "learning_rate": 6.607077331477938e-06, + "loss": 1.1398, + "step": 103434 + }, + { + "epoch": 1.24, + "grad_norm": 8.128587787268057, + "learning_rate": 6.6065278120822374e-06, + "loss": 1.1021, + "step": 103437 + }, + { + "epoch": 1.24, + "grad_norm": 17.372285710398163, + "learning_rate": 6.605978304267237e-06, + "loss": 1.2532, + "step": 103440 + }, + { + "epoch": 1.24, + "grad_norm": 6.142577224783432, + "learning_rate": 6.60542880803481e-06, + "loss": 1.3232, + "step": 103443 + }, + { + "epoch": 1.24, + "grad_norm": 41.58035341712889, + "learning_rate": 6.60487932338683e-06, + "loss": 1.3139, + "step": 103446 + }, + { + "epoch": 1.24, + "grad_norm": 5.993255422961999, + "learning_rate": 6.604329850325175e-06, + "loss": 1.0424, + "step": 103449 + }, + { + "epoch": 1.24, + "grad_norm": 10.113853697889807, + "learning_rate": 6.6037803888517185e-06, + "loss": 1.573, + "step": 103452 + }, + { + "epoch": 1.24, + "grad_norm": 2.5065588742147797, + "learning_rate": 6.603230938968339e-06, + "loss": 1.1241, + "step": 103455 + }, + { + "epoch": 1.24, + "grad_norm": 5.148094997950581, + "learning_rate": 6.602681500676905e-06, + "loss": 1.3706, + "step": 103458 + }, + { + "epoch": 1.24, + "grad_norm": 5.355782557180625, + "learning_rate": 6.602132073979297e-06, + "loss": 1.4167, + "step": 103461 + }, + { + "epoch": 1.24, + "grad_norm": 10.32784875709612, + "learning_rate": 6.601582658877388e-06, + "loss": 1.3503, + "step": 103464 + }, + { + "epoch": 1.24, + "grad_norm": 10.777666968010216, + "learning_rate": 6.601033255373055e-06, + "loss": 1.0963, + "step": 103467 + }, + { + "epoch": 1.24, + "grad_norm": 4.674422473724578, + "learning_rate": 6.600483863468168e-06, + "loss": 0.9182, + "step": 103470 + }, + { + "epoch": 1.24, + "grad_norm": 4.083043403526388, + "learning_rate": 6.599934483164607e-06, + "loss": 1.4383, + "step": 103473 + }, + { + "epoch": 1.24, + "grad_norm": 9.790457704814397, + "learning_rate": 6.599385114464244e-06, + "loss": 1.2092, + "step": 103476 + }, + { + "epoch": 1.24, + "grad_norm": 7.634668884821228, + "learning_rate": 6.598835757368953e-06, + "loss": 1.1157, + "step": 103479 + }, + { + "epoch": 1.24, + "grad_norm": 17.46313858848756, + "learning_rate": 6.598286411880617e-06, + "loss": 1.1682, + "step": 103482 + }, + { + "epoch": 1.24, + "grad_norm": 13.507379976872079, + "learning_rate": 6.5977370780011e-06, + "loss": 1.1468, + "step": 103485 + }, + { + "epoch": 1.24, + "grad_norm": 30.794313671328123, + "learning_rate": 6.597187755732279e-06, + "loss": 1.0275, + "step": 103488 + }, + { + "epoch": 1.24, + "grad_norm": 13.290595287855847, + "learning_rate": 6.596638445076033e-06, + "loss": 1.2882, + "step": 103491 + }, + { + "epoch": 1.24, + "grad_norm": 7.542095479025238, + "learning_rate": 6.596089146034235e-06, + "loss": 1.2995, + "step": 103494 + }, + { + "epoch": 1.24, + "grad_norm": 15.684646168831854, + "learning_rate": 6.595539858608757e-06, + "loss": 1.0839, + "step": 103497 + }, + { + "epoch": 1.24, + "grad_norm": 22.863700969235815, + "learning_rate": 6.594990582801473e-06, + "loss": 1.3415, + "step": 103500 + }, + { + "epoch": 1.24, + "grad_norm": 8.066236022821863, + "learning_rate": 6.594441318614263e-06, + "loss": 1.2509, + "step": 103503 + }, + { + "epoch": 1.24, + "grad_norm": 8.720170988431862, + "learning_rate": 6.593892066048994e-06, + "loss": 1.0479, + "step": 103506 + }, + { + "epoch": 1.24, + "grad_norm": 5.905917411189286, + "learning_rate": 6.593342825107552e-06, + "loss": 0.822, + "step": 103509 + }, + { + "epoch": 1.24, + "grad_norm": 9.080845207417743, + "learning_rate": 6.5927935957917975e-06, + "loss": 1.2267, + "step": 103512 + }, + { + "epoch": 1.24, + "grad_norm": 16.612258238128828, + "learning_rate": 6.592244378103614e-06, + "loss": 1.0065, + "step": 103515 + }, + { + "epoch": 1.24, + "grad_norm": 67.57063478954443, + "learning_rate": 6.59169517204487e-06, + "loss": 1.3558, + "step": 103518 + }, + { + "epoch": 1.24, + "grad_norm": 12.671473238913059, + "learning_rate": 6.591145977617448e-06, + "loss": 1.0988, + "step": 103521 + }, + { + "epoch": 1.24, + "grad_norm": 5.638961370529391, + "learning_rate": 6.590596794823212e-06, + "loss": 1.0381, + "step": 103524 + }, + { + "epoch": 1.24, + "grad_norm": 12.99038680786036, + "learning_rate": 6.590047623664043e-06, + "loss": 1.2612, + "step": 103527 + }, + { + "epoch": 1.24, + "grad_norm": 12.592286475758627, + "learning_rate": 6.5894984641418125e-06, + "loss": 1.1408, + "step": 103530 + }, + { + "epoch": 1.24, + "grad_norm": 6.555778785187149, + "learning_rate": 6.588949316258393e-06, + "loss": 1.1832, + "step": 103533 + }, + { + "epoch": 1.24, + "grad_norm": 6.544632358176496, + "learning_rate": 6.588400180015666e-06, + "loss": 1.6305, + "step": 103536 + }, + { + "epoch": 1.25, + "grad_norm": 6.220192589499191, + "learning_rate": 6.587851055415499e-06, + "loss": 1.2042, + "step": 103539 + }, + { + "epoch": 1.25, + "grad_norm": 7.00082535868014, + "learning_rate": 6.587301942459765e-06, + "loss": 1.5164, + "step": 103542 + }, + { + "epoch": 1.25, + "grad_norm": 16.791146106568046, + "learning_rate": 6.586752841150342e-06, + "loss": 1.0204, + "step": 103545 + }, + { + "epoch": 1.25, + "grad_norm": 11.034027817006674, + "learning_rate": 6.586203751489104e-06, + "loss": 0.9621, + "step": 103548 + }, + { + "epoch": 1.25, + "grad_norm": 7.845832238798617, + "learning_rate": 6.585654673477919e-06, + "loss": 1.4542, + "step": 103551 + }, + { + "epoch": 1.25, + "grad_norm": 12.928923622699967, + "learning_rate": 6.585105607118665e-06, + "loss": 1.2717, + "step": 103554 + }, + { + "epoch": 1.25, + "grad_norm": 25.061736045771376, + "learning_rate": 6.584556552413217e-06, + "loss": 1.4322, + "step": 103557 + }, + { + "epoch": 1.25, + "grad_norm": 7.143698010369117, + "learning_rate": 6.584007509363446e-06, + "loss": 1.2383, + "step": 103560 + }, + { + "epoch": 1.25, + "grad_norm": 2.977455072355138, + "learning_rate": 6.583458477971231e-06, + "loss": 1.048, + "step": 103563 + }, + { + "epoch": 1.25, + "grad_norm": 3.4718935596119755, + "learning_rate": 6.582909458238437e-06, + "loss": 1.1689, + "step": 103566 + }, + { + "epoch": 1.25, + "grad_norm": 8.527527892224828, + "learning_rate": 6.5823604501669444e-06, + "loss": 1.1746, + "step": 103569 + }, + { + "epoch": 1.25, + "grad_norm": 12.741119521714738, + "learning_rate": 6.581811453758622e-06, + "loss": 1.1392, + "step": 103572 + }, + { + "epoch": 1.25, + "grad_norm": 16.48532446684939, + "learning_rate": 6.58126246901535e-06, + "loss": 0.8791, + "step": 103575 + }, + { + "epoch": 1.25, + "grad_norm": 6.0251510011654865, + "learning_rate": 6.580713495938995e-06, + "loss": 1.056, + "step": 103578 + }, + { + "epoch": 1.25, + "grad_norm": 3.816359811028654, + "learning_rate": 6.580164534531435e-06, + "loss": 1.2561, + "step": 103581 + }, + { + "epoch": 1.25, + "grad_norm": 6.366827696958492, + "learning_rate": 6.5796155847945375e-06, + "loss": 1.3145, + "step": 103584 + }, + { + "epoch": 1.25, + "grad_norm": 16.59606395336586, + "learning_rate": 6.579066646730184e-06, + "loss": 0.9003, + "step": 103587 + }, + { + "epoch": 1.25, + "grad_norm": 7.8266432530529, + "learning_rate": 6.578517720340243e-06, + "loss": 1.4214, + "step": 103590 + }, + { + "epoch": 1.25, + "grad_norm": 8.003132580262095, + "learning_rate": 6.577968805626589e-06, + "loss": 1.6595, + "step": 103593 + }, + { + "epoch": 1.25, + "grad_norm": 5.328801871995173, + "learning_rate": 6.577419902591094e-06, + "loss": 1.6652, + "step": 103596 + }, + { + "epoch": 1.25, + "grad_norm": 33.09207561266568, + "learning_rate": 6.5768710112356315e-06, + "loss": 1.1378, + "step": 103599 + }, + { + "epoch": 1.25, + "grad_norm": 5.658702110877757, + "learning_rate": 6.576322131562078e-06, + "loss": 1.1513, + "step": 103602 + }, + { + "epoch": 1.25, + "grad_norm": 17.769167572776418, + "learning_rate": 6.5757732635723e-06, + "loss": 1.3189, + "step": 103605 + }, + { + "epoch": 1.25, + "grad_norm": 4.26080356140845, + "learning_rate": 6.575224407268176e-06, + "loss": 1.4766, + "step": 103608 + }, + { + "epoch": 1.25, + "grad_norm": 11.021759703551982, + "learning_rate": 6.5746755626515775e-06, + "loss": 1.4284, + "step": 103611 + }, + { + "epoch": 1.25, + "grad_norm": 8.78268468933132, + "learning_rate": 6.574126729724377e-06, + "loss": 1.3585, + "step": 103614 + }, + { + "epoch": 1.25, + "grad_norm": 5.657069350612067, + "learning_rate": 6.573577908488451e-06, + "loss": 1.0922, + "step": 103617 + }, + { + "epoch": 1.25, + "grad_norm": 5.499695806353658, + "learning_rate": 6.573029098945666e-06, + "loss": 1.3144, + "step": 103620 + }, + { + "epoch": 1.25, + "grad_norm": 15.91555333013542, + "learning_rate": 6.5724803010979e-06, + "loss": 1.1877, + "step": 103623 + }, + { + "epoch": 1.25, + "grad_norm": 5.296612664094685, + "learning_rate": 6.571931514947022e-06, + "loss": 1.0489, + "step": 103626 + }, + { + "epoch": 1.25, + "grad_norm": 6.236715820059444, + "learning_rate": 6.571382740494911e-06, + "loss": 1.3174, + "step": 103629 + }, + { + "epoch": 1.25, + "grad_norm": 18.846774930544132, + "learning_rate": 6.570833977743431e-06, + "loss": 1.0398, + "step": 103632 + }, + { + "epoch": 1.25, + "grad_norm": 8.425337322048483, + "learning_rate": 6.570285226694464e-06, + "loss": 1.0279, + "step": 103635 + }, + { + "epoch": 1.25, + "grad_norm": 6.646263051692449, + "learning_rate": 6.569736487349874e-06, + "loss": 1.3264, + "step": 103638 + }, + { + "epoch": 1.25, + "grad_norm": 17.54445202821549, + "learning_rate": 6.56918775971154e-06, + "loss": 1.3491, + "step": 103641 + }, + { + "epoch": 1.25, + "grad_norm": 3.9948384627272397, + "learning_rate": 6.568639043781335e-06, + "loss": 0.9783, + "step": 103644 + }, + { + "epoch": 1.25, + "grad_norm": 17.252222371117092, + "learning_rate": 6.568090339561124e-06, + "loss": 1.0313, + "step": 103647 + }, + { + "epoch": 1.25, + "grad_norm": 6.022335626001992, + "learning_rate": 6.567541647052786e-06, + "loss": 0.9263, + "step": 103650 + }, + { + "epoch": 1.25, + "grad_norm": 6.335207674808363, + "learning_rate": 6.566992966258192e-06, + "loss": 0.9122, + "step": 103653 + }, + { + "epoch": 1.25, + "grad_norm": 30.258342089179084, + "learning_rate": 6.566444297179218e-06, + "loss": 1.3226, + "step": 103656 + }, + { + "epoch": 1.25, + "grad_norm": 21.659107126041683, + "learning_rate": 6.565895639817729e-06, + "loss": 1.0983, + "step": 103659 + }, + { + "epoch": 1.25, + "grad_norm": 8.488581378879807, + "learning_rate": 6.565346994175601e-06, + "loss": 1.2279, + "step": 103662 + }, + { + "epoch": 1.25, + "grad_norm": 4.177808955549184, + "learning_rate": 6.564798360254707e-06, + "loss": 1.263, + "step": 103665 + }, + { + "epoch": 1.25, + "grad_norm": 14.298479570538207, + "learning_rate": 6.5642497380569185e-06, + "loss": 1.1851, + "step": 103668 + }, + { + "epoch": 1.25, + "grad_norm": 16.234149674188824, + "learning_rate": 6.563701127584112e-06, + "loss": 1.2442, + "step": 103671 + }, + { + "epoch": 1.25, + "grad_norm": 4.038512648594039, + "learning_rate": 6.56315252883815e-06, + "loss": 1.2023, + "step": 103674 + }, + { + "epoch": 1.25, + "grad_norm": 11.338576654133755, + "learning_rate": 6.562603941820915e-06, + "loss": 0.9475, + "step": 103677 + }, + { + "epoch": 1.25, + "grad_norm": 5.819050207584827, + "learning_rate": 6.5620553665342714e-06, + "loss": 0.8166, + "step": 103680 + }, + { + "epoch": 1.25, + "grad_norm": 13.39083373157857, + "learning_rate": 6.561506802980099e-06, + "loss": 1.3053, + "step": 103683 + }, + { + "epoch": 1.25, + "grad_norm": 8.741561883421507, + "learning_rate": 6.5609582511602635e-06, + "loss": 1.0029, + "step": 103686 + }, + { + "epoch": 1.25, + "grad_norm": 11.97423562949993, + "learning_rate": 6.560409711076635e-06, + "loss": 1.3451, + "step": 103689 + }, + { + "epoch": 1.25, + "grad_norm": 187.54936963214837, + "learning_rate": 6.559861182731091e-06, + "loss": 1.0368, + "step": 103692 + }, + { + "epoch": 1.25, + "grad_norm": 8.808456644945576, + "learning_rate": 6.559312666125502e-06, + "loss": 1.0577, + "step": 103695 + }, + { + "epoch": 1.25, + "grad_norm": 7.445882010915813, + "learning_rate": 6.558764161261744e-06, + "loss": 1.3512, + "step": 103698 + }, + { + "epoch": 1.25, + "grad_norm": 13.777630695959177, + "learning_rate": 6.558215668141679e-06, + "loss": 1.0074, + "step": 103701 + }, + { + "epoch": 1.25, + "grad_norm": 6.1761640430461044, + "learning_rate": 6.557667186767184e-06, + "loss": 1.0631, + "step": 103704 + }, + { + "epoch": 1.25, + "grad_norm": 6.366424629819209, + "learning_rate": 6.557118717140133e-06, + "loss": 0.9824, + "step": 103707 + }, + { + "epoch": 1.25, + "grad_norm": 4.729936794679899, + "learning_rate": 6.556570259262398e-06, + "loss": 1.1149, + "step": 103710 + }, + { + "epoch": 1.25, + "grad_norm": 15.991183802809283, + "learning_rate": 6.556021813135844e-06, + "loss": 1.1951, + "step": 103713 + }, + { + "epoch": 1.25, + "grad_norm": 10.687735899575442, + "learning_rate": 6.555473378762347e-06, + "loss": 1.0896, + "step": 103716 + }, + { + "epoch": 1.25, + "grad_norm": 16.79409587967676, + "learning_rate": 6.554924956143781e-06, + "loss": 1.3005, + "step": 103719 + }, + { + "epoch": 1.25, + "grad_norm": 16.183056704290554, + "learning_rate": 6.554376545282013e-06, + "loss": 1.4512, + "step": 103722 + }, + { + "epoch": 1.25, + "grad_norm": 12.06606796885475, + "learning_rate": 6.553828146178921e-06, + "loss": 1.2399, + "step": 103725 + }, + { + "epoch": 1.25, + "grad_norm": 4.747885777345855, + "learning_rate": 6.5532797588363706e-06, + "loss": 1.1291, + "step": 103728 + }, + { + "epoch": 1.25, + "grad_norm": 6.082933108366627, + "learning_rate": 6.5527313832562325e-06, + "loss": 1.3773, + "step": 103731 + }, + { + "epoch": 1.25, + "grad_norm": 37.16649628871341, + "learning_rate": 6.552183019440379e-06, + "loss": 0.8535, + "step": 103734 + }, + { + "epoch": 1.25, + "grad_norm": 11.536887973325944, + "learning_rate": 6.5516346673906895e-06, + "loss": 1.1601, + "step": 103737 + }, + { + "epoch": 1.25, + "grad_norm": 11.337670576778006, + "learning_rate": 6.551086327109025e-06, + "loss": 1.2147, + "step": 103740 + }, + { + "epoch": 1.25, + "grad_norm": 5.563750413309458, + "learning_rate": 6.550537998597259e-06, + "loss": 0.9159, + "step": 103743 + }, + { + "epoch": 1.25, + "grad_norm": 17.441603899696066, + "learning_rate": 6.549989681857265e-06, + "loss": 1.3129, + "step": 103746 + }, + { + "epoch": 1.25, + "grad_norm": 6.303163365837733, + "learning_rate": 6.549441376890915e-06, + "loss": 1.1098, + "step": 103749 + }, + { + "epoch": 1.25, + "grad_norm": 16.321908315198694, + "learning_rate": 6.548893083700081e-06, + "loss": 1.123, + "step": 103752 + }, + { + "epoch": 1.25, + "grad_norm": 9.738852313314297, + "learning_rate": 6.548344802286628e-06, + "loss": 0.9273, + "step": 103755 + }, + { + "epoch": 1.25, + "grad_norm": 8.699060411404254, + "learning_rate": 6.54779653265243e-06, + "loss": 1.3423, + "step": 103758 + }, + { + "epoch": 1.25, + "grad_norm": 6.164261892759574, + "learning_rate": 6.54724827479936e-06, + "loss": 1.424, + "step": 103761 + }, + { + "epoch": 1.25, + "grad_norm": 16.22887223874407, + "learning_rate": 6.546700028729293e-06, + "loss": 1.2616, + "step": 103764 + }, + { + "epoch": 1.25, + "grad_norm": 19.175037568790863, + "learning_rate": 6.546151794444089e-06, + "loss": 1.3113, + "step": 103767 + }, + { + "epoch": 1.25, + "grad_norm": 19.17661274513879, + "learning_rate": 6.545603571945624e-06, + "loss": 1.1117, + "step": 103770 + }, + { + "epoch": 1.25, + "grad_norm": 11.58358104589091, + "learning_rate": 6.5450553612357746e-06, + "loss": 1.2481, + "step": 103773 + }, + { + "epoch": 1.25, + "grad_norm": 14.785823012471406, + "learning_rate": 6.5445071623164025e-06, + "loss": 1.379, + "step": 103776 + }, + { + "epoch": 1.25, + "grad_norm": 5.959779251871953, + "learning_rate": 6.5439589751893876e-06, + "loss": 0.9716, + "step": 103779 + }, + { + "epoch": 1.25, + "grad_norm": 17.070426141750133, + "learning_rate": 6.543410799856593e-06, + "loss": 1.4425, + "step": 103782 + }, + { + "epoch": 1.25, + "grad_norm": 3.6182937280929837, + "learning_rate": 6.542862636319891e-06, + "loss": 1.3285, + "step": 103785 + }, + { + "epoch": 1.25, + "grad_norm": 10.961982755950523, + "learning_rate": 6.542314484581154e-06, + "loss": 1.0786, + "step": 103788 + }, + { + "epoch": 1.25, + "grad_norm": 6.8411003257144225, + "learning_rate": 6.541766344642256e-06, + "loss": 1.0692, + "step": 103791 + }, + { + "epoch": 1.25, + "grad_norm": 11.214083860485148, + "learning_rate": 6.5412182165050605e-06, + "loss": 1.3827, + "step": 103794 + }, + { + "epoch": 1.25, + "grad_norm": 15.660137490904551, + "learning_rate": 6.5406701001714414e-06, + "loss": 1.381, + "step": 103797 + }, + { + "epoch": 1.25, + "grad_norm": 30.12414670740495, + "learning_rate": 6.540121995643267e-06, + "loss": 1.1426, + "step": 103800 + }, + { + "epoch": 1.25, + "grad_norm": 5.175911723615834, + "learning_rate": 6.539573902922413e-06, + "loss": 1.3335, + "step": 103803 + }, + { + "epoch": 1.25, + "grad_norm": 15.609175955874015, + "learning_rate": 6.539025822010749e-06, + "loss": 1.4956, + "step": 103806 + }, + { + "epoch": 1.25, + "grad_norm": 15.35265320801917, + "learning_rate": 6.538477752910141e-06, + "loss": 1.5414, + "step": 103809 + }, + { + "epoch": 1.25, + "grad_norm": 14.17277430959608, + "learning_rate": 6.5379296956224595e-06, + "loss": 1.2004, + "step": 103812 + }, + { + "epoch": 1.25, + "grad_norm": 7.990982144194415, + "learning_rate": 6.537381650149579e-06, + "loss": 1.2877, + "step": 103815 + }, + { + "epoch": 1.25, + "grad_norm": 9.917613036844804, + "learning_rate": 6.53683361649337e-06, + "loss": 1.3041, + "step": 103818 + }, + { + "epoch": 1.25, + "grad_norm": 11.927216783291074, + "learning_rate": 6.5362855946556954e-06, + "loss": 0.981, + "step": 103821 + }, + { + "epoch": 1.25, + "grad_norm": 7.4849255623511, + "learning_rate": 6.535737584638435e-06, + "loss": 0.9588, + "step": 103824 + }, + { + "epoch": 1.25, + "grad_norm": 13.46869855946841, + "learning_rate": 6.53518958644345e-06, + "loss": 1.0222, + "step": 103827 + }, + { + "epoch": 1.25, + "grad_norm": 4.522115443578858, + "learning_rate": 6.5346416000726155e-06, + "loss": 1.5023, + "step": 103830 + }, + { + "epoch": 1.25, + "grad_norm": 3.9326984570536987, + "learning_rate": 6.5340936255278065e-06, + "loss": 1.0923, + "step": 103833 + }, + { + "epoch": 1.25, + "grad_norm": 22.74477775563528, + "learning_rate": 6.533545662810884e-06, + "loss": 1.3722, + "step": 103836 + }, + { + "epoch": 1.25, + "grad_norm": 11.199983337331368, + "learning_rate": 6.53299771192372e-06, + "loss": 1.1784, + "step": 103839 + }, + { + "epoch": 1.25, + "grad_norm": 9.494513634359818, + "learning_rate": 6.532449772868186e-06, + "loss": 1.5799, + "step": 103842 + }, + { + "epoch": 1.25, + "grad_norm": 4.923531567725103, + "learning_rate": 6.531901845646156e-06, + "loss": 1.2166, + "step": 103845 + }, + { + "epoch": 1.25, + "grad_norm": 12.388514524432559, + "learning_rate": 6.531353930259493e-06, + "loss": 1.1934, + "step": 103848 + }, + { + "epoch": 1.25, + "grad_norm": 33.118718395943574, + "learning_rate": 6.530806026710068e-06, + "loss": 1.0661, + "step": 103851 + }, + { + "epoch": 1.25, + "grad_norm": 11.986127234132992, + "learning_rate": 6.530258134999752e-06, + "loss": 0.7428, + "step": 103854 + }, + { + "epoch": 1.25, + "grad_norm": 10.698933298987386, + "learning_rate": 6.529710255130419e-06, + "loss": 1.3212, + "step": 103857 + }, + { + "epoch": 1.25, + "grad_norm": 4.105812238390225, + "learning_rate": 6.529162387103935e-06, + "loss": 0.8649, + "step": 103860 + }, + { + "epoch": 1.25, + "grad_norm": 3.035775879262536, + "learning_rate": 6.528614530922166e-06, + "loss": 1.0602, + "step": 103863 + }, + { + "epoch": 1.25, + "grad_norm": 8.98066715363193, + "learning_rate": 6.528066686586988e-06, + "loss": 1.3422, + "step": 103866 + }, + { + "epoch": 1.25, + "grad_norm": 11.420493672871427, + "learning_rate": 6.527518854100266e-06, + "loss": 1.1613, + "step": 103869 + }, + { + "epoch": 1.25, + "grad_norm": 9.363744835488102, + "learning_rate": 6.526971033463874e-06, + "loss": 1.3942, + "step": 103872 + }, + { + "epoch": 1.25, + "grad_norm": 10.33074365356551, + "learning_rate": 6.5264232246796765e-06, + "loss": 0.9239, + "step": 103875 + }, + { + "epoch": 1.25, + "grad_norm": 5.60973868504503, + "learning_rate": 6.525875427749546e-06, + "loss": 1.3197, + "step": 103878 + }, + { + "epoch": 1.25, + "grad_norm": 6.352950843264014, + "learning_rate": 6.52532764267535e-06, + "loss": 1.1784, + "step": 103881 + }, + { + "epoch": 1.25, + "grad_norm": 3.3217458013300782, + "learning_rate": 6.524779869458958e-06, + "loss": 1.2677, + "step": 103884 + }, + { + "epoch": 1.25, + "grad_norm": 14.803619721479702, + "learning_rate": 6.524232108102246e-06, + "loss": 1.1303, + "step": 103887 + }, + { + "epoch": 1.25, + "grad_norm": 13.655044398789991, + "learning_rate": 6.523684358607075e-06, + "loss": 1.4082, + "step": 103890 + }, + { + "epoch": 1.25, + "grad_norm": 85.81599687594381, + "learning_rate": 6.523136620975316e-06, + "loss": 1.042, + "step": 103893 + }, + { + "epoch": 1.25, + "grad_norm": 11.303194815406474, + "learning_rate": 6.522588895208839e-06, + "loss": 1.3262, + "step": 103896 + }, + { + "epoch": 1.25, + "grad_norm": 19.777215532409702, + "learning_rate": 6.5220411813095175e-06, + "loss": 1.1883, + "step": 103899 + }, + { + "epoch": 1.25, + "grad_norm": 13.208832865859005, + "learning_rate": 6.521493479279214e-06, + "loss": 1.2245, + "step": 103902 + }, + { + "epoch": 1.25, + "grad_norm": 15.295095143101392, + "learning_rate": 6.520945789119799e-06, + "loss": 0.9274, + "step": 103905 + }, + { + "epoch": 1.25, + "grad_norm": 2.693877430143303, + "learning_rate": 6.520398110833144e-06, + "loss": 1.3887, + "step": 103908 + }, + { + "epoch": 1.25, + "grad_norm": 6.669121604939963, + "learning_rate": 6.519850444421116e-06, + "loss": 1.3175, + "step": 103911 + }, + { + "epoch": 1.25, + "grad_norm": 4.699705469572086, + "learning_rate": 6.5193027898855885e-06, + "loss": 1.0558, + "step": 103914 + }, + { + "epoch": 1.25, + "grad_norm": 7.913123305973457, + "learning_rate": 6.518755147228423e-06, + "loss": 1.04, + "step": 103917 + }, + { + "epoch": 1.25, + "grad_norm": 9.449814782655976, + "learning_rate": 6.518207516451493e-06, + "loss": 1.0329, + "step": 103920 + }, + { + "epoch": 1.25, + "grad_norm": 7.96423013866517, + "learning_rate": 6.5176598975566675e-06, + "loss": 1.3863, + "step": 103923 + }, + { + "epoch": 1.25, + "grad_norm": 6.6009569286649885, + "learning_rate": 6.517112290545816e-06, + "loss": 1.044, + "step": 103926 + }, + { + "epoch": 1.25, + "grad_norm": 11.022285296270368, + "learning_rate": 6.516564695420802e-06, + "loss": 1.1225, + "step": 103929 + }, + { + "epoch": 1.25, + "grad_norm": 7.389963427894154, + "learning_rate": 6.516017112183501e-06, + "loss": 1.0286, + "step": 103932 + }, + { + "epoch": 1.25, + "grad_norm": 8.465940816145126, + "learning_rate": 6.515469540835775e-06, + "loss": 1.2458, + "step": 103935 + }, + { + "epoch": 1.25, + "grad_norm": 7.2311076177204425, + "learning_rate": 6.514921981379499e-06, + "loss": 1.2712, + "step": 103938 + }, + { + "epoch": 1.25, + "grad_norm": 10.402123780036861, + "learning_rate": 6.51437443381654e-06, + "loss": 1.159, + "step": 103941 + }, + { + "epoch": 1.25, + "grad_norm": 11.271558847596431, + "learning_rate": 6.513826898148765e-06, + "loss": 1.2589, + "step": 103944 + }, + { + "epoch": 1.25, + "grad_norm": 6.87746736655024, + "learning_rate": 6.513279374378041e-06, + "loss": 1.3324, + "step": 103947 + }, + { + "epoch": 1.25, + "grad_norm": 13.143922923805736, + "learning_rate": 6.512731862506239e-06, + "loss": 0.993, + "step": 103950 + }, + { + "epoch": 1.25, + "grad_norm": 12.551830647249771, + "learning_rate": 6.51218436253523e-06, + "loss": 1.4324, + "step": 103953 + }, + { + "epoch": 1.25, + "grad_norm": 8.506865662242332, + "learning_rate": 6.511636874466878e-06, + "loss": 1.2985, + "step": 103956 + }, + { + "epoch": 1.25, + "grad_norm": 5.216622554733289, + "learning_rate": 6.511089398303051e-06, + "loss": 1.1266, + "step": 103959 + }, + { + "epoch": 1.25, + "grad_norm": 4.458105384537499, + "learning_rate": 6.510541934045621e-06, + "loss": 1.0404, + "step": 103962 + }, + { + "epoch": 1.25, + "grad_norm": 2.8203845536362304, + "learning_rate": 6.509994481696452e-06, + "loss": 0.9936, + "step": 103965 + }, + { + "epoch": 1.25, + "grad_norm": 14.999942690848991, + "learning_rate": 6.50944704125742e-06, + "loss": 1.0737, + "step": 103968 + }, + { + "epoch": 1.25, + "grad_norm": 9.346155705500506, + "learning_rate": 6.5088996127303825e-06, + "loss": 0.9611, + "step": 103971 + }, + { + "epoch": 1.25, + "grad_norm": 11.157377401720929, + "learning_rate": 6.508352196117216e-06, + "loss": 1.23, + "step": 103974 + }, + { + "epoch": 1.25, + "grad_norm": 21.44376792229634, + "learning_rate": 6.507804791419784e-06, + "loss": 1.0149, + "step": 103977 + }, + { + "epoch": 1.25, + "grad_norm": 7.774634490280275, + "learning_rate": 6.50725739863996e-06, + "loss": 1.039, + "step": 103980 + }, + { + "epoch": 1.25, + "grad_norm": 10.872125893222465, + "learning_rate": 6.506710017779605e-06, + "loss": 1.0694, + "step": 103983 + }, + { + "epoch": 1.25, + "grad_norm": 11.994977309573972, + "learning_rate": 6.50616264884059e-06, + "loss": 1.3229, + "step": 103986 + }, + { + "epoch": 1.25, + "grad_norm": 13.903064442347201, + "learning_rate": 6.505615291824785e-06, + "loss": 1.0481, + "step": 103989 + }, + { + "epoch": 1.25, + "grad_norm": 8.277414011572464, + "learning_rate": 6.505067946734054e-06, + "loss": 0.8567, + "step": 103992 + }, + { + "epoch": 1.25, + "grad_norm": 4.912192555041272, + "learning_rate": 6.504520613570271e-06, + "loss": 1.1395, + "step": 103995 + }, + { + "epoch": 1.25, + "grad_norm": 7.275442932911573, + "learning_rate": 6.5039732923353e-06, + "loss": 0.8834, + "step": 103998 + }, + { + "epoch": 1.25, + "grad_norm": 13.554849765034346, + "learning_rate": 6.503425983031005e-06, + "loss": 1.3972, + "step": 104001 + }, + { + "epoch": 1.25, + "grad_norm": 12.69725899610884, + "learning_rate": 6.502878685659261e-06, + "loss": 1.3005, + "step": 104004 + }, + { + "epoch": 1.25, + "grad_norm": 25.62968284478604, + "learning_rate": 6.502331400221932e-06, + "loss": 1.0972, + "step": 104007 + }, + { + "epoch": 1.25, + "grad_norm": 21.110376701489795, + "learning_rate": 6.501784126720885e-06, + "loss": 1.3878, + "step": 104010 + }, + { + "epoch": 1.25, + "grad_norm": 3.381259483617042, + "learning_rate": 6.5012368651579885e-06, + "loss": 1.0075, + "step": 104013 + }, + { + "epoch": 1.25, + "grad_norm": 4.168617718051742, + "learning_rate": 6.500689615535112e-06, + "loss": 1.2678, + "step": 104016 + }, + { + "epoch": 1.25, + "grad_norm": 10.152360402158898, + "learning_rate": 6.500142377854117e-06, + "loss": 1.0666, + "step": 104019 + }, + { + "epoch": 1.25, + "grad_norm": 7.737675267444187, + "learning_rate": 6.499595152116882e-06, + "loss": 0.8272, + "step": 104022 + }, + { + "epoch": 1.25, + "grad_norm": 4.516515075211884, + "learning_rate": 6.499047938325262e-06, + "loss": 1.3688, + "step": 104025 + }, + { + "epoch": 1.25, + "grad_norm": 7.055508278324781, + "learning_rate": 6.498500736481135e-06, + "loss": 0.8662, + "step": 104028 + }, + { + "epoch": 1.25, + "grad_norm": 11.125445773005406, + "learning_rate": 6.4979535465863595e-06, + "loss": 1.2449, + "step": 104031 + }, + { + "epoch": 1.25, + "grad_norm": 14.377876469544711, + "learning_rate": 6.4974063686428125e-06, + "loss": 1.1764, + "step": 104034 + }, + { + "epoch": 1.25, + "grad_norm": 16.244132185897648, + "learning_rate": 6.496859202652351e-06, + "loss": 0.9347, + "step": 104037 + }, + { + "epoch": 1.25, + "grad_norm": 8.297159011099822, + "learning_rate": 6.4963120486168505e-06, + "loss": 1.1227, + "step": 104040 + }, + { + "epoch": 1.25, + "grad_norm": 3.1313781393533793, + "learning_rate": 6.495764906538171e-06, + "loss": 1.297, + "step": 104043 + }, + { + "epoch": 1.25, + "grad_norm": 3.4097150532986236, + "learning_rate": 6.495217776418188e-06, + "loss": 1.0128, + "step": 104046 + }, + { + "epoch": 1.25, + "grad_norm": 37.72019213835322, + "learning_rate": 6.494670658258762e-06, + "loss": 1.4989, + "step": 104049 + }, + { + "epoch": 1.25, + "grad_norm": 4.261850685658995, + "learning_rate": 6.494123552061764e-06, + "loss": 1.017, + "step": 104052 + }, + { + "epoch": 1.25, + "grad_norm": 7.142146193204637, + "learning_rate": 6.493576457829056e-06, + "loss": 0.9057, + "step": 104055 + }, + { + "epoch": 1.25, + "grad_norm": 3.9971102674238517, + "learning_rate": 6.493029375562512e-06, + "loss": 0.9244, + "step": 104058 + }, + { + "epoch": 1.25, + "grad_norm": 12.160701139758949, + "learning_rate": 6.492482305263996e-06, + "loss": 0.7559, + "step": 104061 + }, + { + "epoch": 1.25, + "grad_norm": 64.03379507437334, + "learning_rate": 6.491935246935372e-06, + "loss": 1.7744, + "step": 104064 + }, + { + "epoch": 1.25, + "grad_norm": 5.618278504329795, + "learning_rate": 6.491388200578509e-06, + "loss": 1.1401, + "step": 104067 + }, + { + "epoch": 1.25, + "grad_norm": 9.585063754823867, + "learning_rate": 6.490841166195276e-06, + "loss": 1.3272, + "step": 104070 + }, + { + "epoch": 1.25, + "grad_norm": 9.592606311736112, + "learning_rate": 6.4902941437875365e-06, + "loss": 0.9135, + "step": 104073 + }, + { + "epoch": 1.25, + "grad_norm": 4.041342574638298, + "learning_rate": 6.489747133357163e-06, + "loss": 1.3023, + "step": 104076 + }, + { + "epoch": 1.25, + "grad_norm": 8.580929040459827, + "learning_rate": 6.489200134906013e-06, + "loss": 1.1319, + "step": 104079 + }, + { + "epoch": 1.25, + "grad_norm": 13.778904512835123, + "learning_rate": 6.488653148435961e-06, + "loss": 1.323, + "step": 104082 + }, + { + "epoch": 1.25, + "grad_norm": 7.487097165759683, + "learning_rate": 6.488106173948869e-06, + "loss": 1.1139, + "step": 104085 + }, + { + "epoch": 1.25, + "grad_norm": 7.350931317114269, + "learning_rate": 6.48755921144661e-06, + "loss": 1.0526, + "step": 104088 + }, + { + "epoch": 1.25, + "grad_norm": 18.184717199268327, + "learning_rate": 6.487012260931042e-06, + "loss": 1.2379, + "step": 104091 + }, + { + "epoch": 1.25, + "grad_norm": 8.867657191674926, + "learning_rate": 6.486465322404039e-06, + "loss": 1.5929, + "step": 104094 + }, + { + "epoch": 1.25, + "grad_norm": 14.86015689967588, + "learning_rate": 6.485918395867461e-06, + "loss": 1.0004, + "step": 104097 + }, + { + "epoch": 1.25, + "grad_norm": 12.140768163488545, + "learning_rate": 6.4853714813231795e-06, + "loss": 1.5115, + "step": 104100 + }, + { + "epoch": 1.25, + "grad_norm": 21.137445829851035, + "learning_rate": 6.484824578773061e-06, + "loss": 1.2117, + "step": 104103 + }, + { + "epoch": 1.25, + "grad_norm": 11.249267351744482, + "learning_rate": 6.484277688218968e-06, + "loss": 1.2788, + "step": 104106 + }, + { + "epoch": 1.25, + "grad_norm": 16.997238340048803, + "learning_rate": 6.483730809662767e-06, + "loss": 1.1548, + "step": 104109 + }, + { + "epoch": 1.25, + "grad_norm": 33.62479180679619, + "learning_rate": 6.483183943106328e-06, + "loss": 1.4004, + "step": 104112 + }, + { + "epoch": 1.25, + "grad_norm": 17.118035665603838, + "learning_rate": 6.482637088551518e-06, + "loss": 1.1325, + "step": 104115 + }, + { + "epoch": 1.25, + "grad_norm": 5.977552562274094, + "learning_rate": 6.482090246000196e-06, + "loss": 1.2023, + "step": 104118 + }, + { + "epoch": 1.25, + "grad_norm": 7.680119608831055, + "learning_rate": 6.481543415454234e-06, + "loss": 1.0182, + "step": 104121 + }, + { + "epoch": 1.25, + "grad_norm": 3.3228528847277103, + "learning_rate": 6.480996596915499e-06, + "loss": 1.2112, + "step": 104124 + }, + { + "epoch": 1.25, + "grad_norm": 7.056032827829139, + "learning_rate": 6.480449790385853e-06, + "loss": 1.1842, + "step": 104127 + }, + { + "epoch": 1.25, + "grad_norm": 19.04167189639198, + "learning_rate": 6.479902995867169e-06, + "loss": 1.276, + "step": 104130 + }, + { + "epoch": 1.25, + "grad_norm": 6.709548758106165, + "learning_rate": 6.4793562133613015e-06, + "loss": 1.1812, + "step": 104133 + }, + { + "epoch": 1.25, + "grad_norm": 14.870695285706931, + "learning_rate": 6.478809442870126e-06, + "loss": 0.9263, + "step": 104136 + }, + { + "epoch": 1.25, + "grad_norm": 7.8907884557207355, + "learning_rate": 6.478262684395505e-06, + "loss": 1.2912, + "step": 104139 + }, + { + "epoch": 1.25, + "grad_norm": 7.634292730829269, + "learning_rate": 6.477715937939308e-06, + "loss": 0.9332, + "step": 104142 + }, + { + "epoch": 1.25, + "grad_norm": 3.0742733596440392, + "learning_rate": 6.4771692035033965e-06, + "loss": 1.4049, + "step": 104145 + }, + { + "epoch": 1.25, + "grad_norm": 13.944465386974821, + "learning_rate": 6.4766224810896335e-06, + "loss": 1.4666, + "step": 104148 + }, + { + "epoch": 1.25, + "grad_norm": 20.322081171257153, + "learning_rate": 6.476075770699891e-06, + "loss": 1.3217, + "step": 104151 + }, + { + "epoch": 1.25, + "grad_norm": 9.584955824714202, + "learning_rate": 6.475529072336035e-06, + "loss": 1.183, + "step": 104154 + }, + { + "epoch": 1.25, + "grad_norm": 7.318922525177728, + "learning_rate": 6.474982385999928e-06, + "loss": 1.1128, + "step": 104157 + }, + { + "epoch": 1.25, + "grad_norm": 11.692107766979834, + "learning_rate": 6.474435711693434e-06, + "loss": 0.8795, + "step": 104160 + }, + { + "epoch": 1.25, + "grad_norm": 14.373185487602035, + "learning_rate": 6.473889049418423e-06, + "loss": 0.6288, + "step": 104163 + }, + { + "epoch": 1.25, + "grad_norm": 10.434623043540178, + "learning_rate": 6.473342399176758e-06, + "loss": 1.4253, + "step": 104166 + }, + { + "epoch": 1.25, + "grad_norm": 5.660215616986804, + "learning_rate": 6.4727957609703075e-06, + "loss": 1.4861, + "step": 104169 + }, + { + "epoch": 1.25, + "grad_norm": 2.6763228847120852, + "learning_rate": 6.472249134800931e-06, + "loss": 1.5894, + "step": 104172 + }, + { + "epoch": 1.25, + "grad_norm": 3.9103540520537896, + "learning_rate": 6.471702520670498e-06, + "loss": 1.153, + "step": 104175 + }, + { + "epoch": 1.25, + "grad_norm": 16.536396264702095, + "learning_rate": 6.4711559185808735e-06, + "loss": 1.0528, + "step": 104178 + }, + { + "epoch": 1.25, + "grad_norm": 12.76554842687169, + "learning_rate": 6.4706093285339224e-06, + "loss": 1.4259, + "step": 104181 + }, + { + "epoch": 1.25, + "grad_norm": 12.369924449915032, + "learning_rate": 6.470062750531514e-06, + "loss": 1.0386, + "step": 104184 + }, + { + "epoch": 1.25, + "grad_norm": 4.737207294289763, + "learning_rate": 6.469516184575506e-06, + "loss": 1.0104, + "step": 104187 + }, + { + "epoch": 1.25, + "grad_norm": 11.203614798021238, + "learning_rate": 6.46896963066777e-06, + "loss": 1.2876, + "step": 104190 + }, + { + "epoch": 1.25, + "grad_norm": 4.459600942222428, + "learning_rate": 6.4684230888101655e-06, + "loss": 1.0182, + "step": 104193 + }, + { + "epoch": 1.25, + "grad_norm": 7.275858377563368, + "learning_rate": 6.467876559004567e-06, + "loss": 0.9291, + "step": 104196 + }, + { + "epoch": 1.25, + "grad_norm": 6.219454210535356, + "learning_rate": 6.467330041252829e-06, + "loss": 1.2271, + "step": 104199 + }, + { + "epoch": 1.25, + "grad_norm": 19.32001292685369, + "learning_rate": 6.46678353555682e-06, + "loss": 1.1613, + "step": 104202 + }, + { + "epoch": 1.25, + "grad_norm": 9.64751124083111, + "learning_rate": 6.466237041918407e-06, + "loss": 1.2283, + "step": 104205 + }, + { + "epoch": 1.25, + "grad_norm": 12.451378735667062, + "learning_rate": 6.465690560339456e-06, + "loss": 1.1374, + "step": 104208 + }, + { + "epoch": 1.25, + "grad_norm": 9.93144454216481, + "learning_rate": 6.465144090821831e-06, + "loss": 1.3557, + "step": 104211 + }, + { + "epoch": 1.25, + "grad_norm": 2.636943009093681, + "learning_rate": 6.464597633367393e-06, + "loss": 1.3261, + "step": 104214 + }, + { + "epoch": 1.25, + "grad_norm": 12.982779333824123, + "learning_rate": 6.4640511879780085e-06, + "loss": 1.2759, + "step": 104217 + }, + { + "epoch": 1.25, + "grad_norm": 9.685386207770255, + "learning_rate": 6.463504754655548e-06, + "loss": 1.5155, + "step": 104220 + }, + { + "epoch": 1.25, + "grad_norm": 15.094440122314493, + "learning_rate": 6.4629583334018715e-06, + "loss": 1.3484, + "step": 104223 + }, + { + "epoch": 1.25, + "grad_norm": 6.108112500626936, + "learning_rate": 6.462411924218842e-06, + "loss": 0.8991, + "step": 104226 + }, + { + "epoch": 1.25, + "grad_norm": 6.557072712912465, + "learning_rate": 6.461865527108326e-06, + "loss": 1.24, + "step": 104229 + }, + { + "epoch": 1.25, + "grad_norm": 24.84153785967239, + "learning_rate": 6.46131914207219e-06, + "loss": 1.1316, + "step": 104232 + }, + { + "epoch": 1.25, + "grad_norm": 37.09351323531894, + "learning_rate": 6.460772769112296e-06, + "loss": 1.0843, + "step": 104235 + }, + { + "epoch": 1.25, + "grad_norm": 16.353472752418746, + "learning_rate": 6.460226408230512e-06, + "loss": 1.3428, + "step": 104238 + }, + { + "epoch": 1.25, + "grad_norm": 8.755474770880326, + "learning_rate": 6.459680059428699e-06, + "loss": 1.0718, + "step": 104241 + }, + { + "epoch": 1.25, + "grad_norm": 11.583069093747826, + "learning_rate": 6.459133722708722e-06, + "loss": 1.125, + "step": 104244 + }, + { + "epoch": 1.25, + "grad_norm": 17.485484072422643, + "learning_rate": 6.458587398072445e-06, + "loss": 1.2919, + "step": 104247 + }, + { + "epoch": 1.25, + "grad_norm": 9.872114231672247, + "learning_rate": 6.458041085521738e-06, + "loss": 1.189, + "step": 104250 + }, + { + "epoch": 1.25, + "grad_norm": 5.760667669283396, + "learning_rate": 6.457494785058458e-06, + "loss": 1.0223, + "step": 104253 + }, + { + "epoch": 1.25, + "grad_norm": 11.102528672427153, + "learning_rate": 6.456948496684472e-06, + "loss": 1.394, + "step": 104256 + }, + { + "epoch": 1.25, + "grad_norm": 24.371845702179137, + "learning_rate": 6.456402220401643e-06, + "loss": 1.1409, + "step": 104259 + }, + { + "epoch": 1.25, + "grad_norm": 5.438762654923002, + "learning_rate": 6.45585595621184e-06, + "loss": 1.3801, + "step": 104262 + }, + { + "epoch": 1.25, + "grad_norm": 6.418743513915556, + "learning_rate": 6.455309704116926e-06, + "loss": 1.3761, + "step": 104265 + }, + { + "epoch": 1.25, + "grad_norm": 10.195051488660528, + "learning_rate": 6.454763464118758e-06, + "loss": 1.2298, + "step": 104268 + }, + { + "epoch": 1.25, + "grad_norm": 12.124799320113524, + "learning_rate": 6.454217236219206e-06, + "loss": 1.1558, + "step": 104271 + }, + { + "epoch": 1.25, + "grad_norm": 2.2913406775772667, + "learning_rate": 6.453671020420134e-06, + "loss": 1.1606, + "step": 104274 + }, + { + "epoch": 1.25, + "grad_norm": 21.643932829900532, + "learning_rate": 6.4531248167234105e-06, + "loss": 1.2398, + "step": 104277 + }, + { + "epoch": 1.25, + "grad_norm": 17.536516319049653, + "learning_rate": 6.4525786251308875e-06, + "loss": 1.0911, + "step": 104280 + }, + { + "epoch": 1.25, + "grad_norm": 10.799511816665781, + "learning_rate": 6.45203244564444e-06, + "loss": 1.3647, + "step": 104283 + }, + { + "epoch": 1.25, + "grad_norm": 8.160312780339824, + "learning_rate": 6.451486278265924e-06, + "loss": 1.234, + "step": 104286 + }, + { + "epoch": 1.25, + "grad_norm": 10.683870075760222, + "learning_rate": 6.450940122997209e-06, + "loss": 1.4135, + "step": 104289 + }, + { + "epoch": 1.25, + "grad_norm": 11.2821251848943, + "learning_rate": 6.45039397984016e-06, + "loss": 1.2187, + "step": 104292 + }, + { + "epoch": 1.25, + "grad_norm": 3.95506564301874, + "learning_rate": 6.449847848796636e-06, + "loss": 1.1728, + "step": 104295 + }, + { + "epoch": 1.25, + "grad_norm": 8.840385831247461, + "learning_rate": 6.4493017298685e-06, + "loss": 1.0116, + "step": 104298 + }, + { + "epoch": 1.25, + "grad_norm": 10.901361971998718, + "learning_rate": 6.448755623057619e-06, + "loss": 1.5569, + "step": 104301 + }, + { + "epoch": 1.25, + "grad_norm": 8.275024792385377, + "learning_rate": 6.448209528365858e-06, + "loss": 1.0907, + "step": 104304 + }, + { + "epoch": 1.25, + "grad_norm": 17.468371131527164, + "learning_rate": 6.447663445795078e-06, + "loss": 1.0385, + "step": 104307 + }, + { + "epoch": 1.25, + "grad_norm": 7.442709729119991, + "learning_rate": 6.447117375347143e-06, + "loss": 1.2258, + "step": 104310 + }, + { + "epoch": 1.25, + "grad_norm": 10.534292482257683, + "learning_rate": 6.446571317023913e-06, + "loss": 0.844, + "step": 104313 + }, + { + "epoch": 1.25, + "grad_norm": 8.15453416087726, + "learning_rate": 6.44602527082726e-06, + "loss": 1.1439, + "step": 104316 + }, + { + "epoch": 1.25, + "grad_norm": 2.326862252598477, + "learning_rate": 6.445479236759043e-06, + "loss": 1.4249, + "step": 104319 + }, + { + "epoch": 1.25, + "grad_norm": 3.918125943176477, + "learning_rate": 6.444933214821122e-06, + "loss": 1.064, + "step": 104322 + }, + { + "epoch": 1.25, + "grad_norm": 10.388578753405985, + "learning_rate": 6.444387205015366e-06, + "loss": 1.1697, + "step": 104325 + }, + { + "epoch": 1.25, + "grad_norm": 6.375525688147721, + "learning_rate": 6.443841207343632e-06, + "loss": 1.0203, + "step": 104328 + }, + { + "epoch": 1.25, + "grad_norm": 5.476639998569348, + "learning_rate": 6.443295221807792e-06, + "loss": 1.2293, + "step": 104331 + }, + { + "epoch": 1.25, + "grad_norm": 9.147410290093864, + "learning_rate": 6.4427492484097e-06, + "loss": 1.2006, + "step": 104334 + }, + { + "epoch": 1.25, + "grad_norm": 10.912084277081227, + "learning_rate": 6.4422032871512255e-06, + "loss": 1.4241, + "step": 104337 + }, + { + "epoch": 1.25, + "grad_norm": 10.755418747350822, + "learning_rate": 6.4416573380342275e-06, + "loss": 1.3696, + "step": 104340 + }, + { + "epoch": 1.25, + "grad_norm": 8.74339731689569, + "learning_rate": 6.4411114010605715e-06, + "loss": 1.1381, + "step": 104343 + }, + { + "epoch": 1.25, + "grad_norm": 11.839903876050432, + "learning_rate": 6.440565476232125e-06, + "loss": 1.0507, + "step": 104346 + }, + { + "epoch": 1.25, + "grad_norm": 13.555436590527641, + "learning_rate": 6.4400195635507435e-06, + "loss": 1.2419, + "step": 104349 + }, + { + "epoch": 1.25, + "grad_norm": 65.33881201987546, + "learning_rate": 6.439473663018291e-06, + "loss": 0.9985, + "step": 104352 + }, + { + "epoch": 1.25, + "grad_norm": 6.770736866367743, + "learning_rate": 6.438927774636632e-06, + "loss": 1.134, + "step": 104355 + }, + { + "epoch": 1.25, + "grad_norm": 11.52448692696083, + "learning_rate": 6.438381898407635e-06, + "loss": 1.1941, + "step": 104358 + }, + { + "epoch": 1.25, + "grad_norm": 5.422101501545002, + "learning_rate": 6.4378360343331555e-06, + "loss": 1.2259, + "step": 104361 + }, + { + "epoch": 1.25, + "grad_norm": 12.62866250335136, + "learning_rate": 6.437290182415057e-06, + "loss": 1.2347, + "step": 104364 + }, + { + "epoch": 1.25, + "grad_norm": 15.337338917697938, + "learning_rate": 6.436744342655203e-06, + "loss": 0.9698, + "step": 104367 + }, + { + "epoch": 1.26, + "grad_norm": 34.81473670902964, + "learning_rate": 6.436198515055459e-06, + "loss": 1.386, + "step": 104370 + }, + { + "epoch": 1.26, + "grad_norm": 14.242275716177264, + "learning_rate": 6.4356526996176875e-06, + "loss": 1.129, + "step": 104373 + }, + { + "epoch": 1.26, + "grad_norm": 12.975263032736706, + "learning_rate": 6.4351068963437455e-06, + "loss": 1.4036, + "step": 104376 + }, + { + "epoch": 1.26, + "grad_norm": 11.326201688854447, + "learning_rate": 6.4345611052355014e-06, + "loss": 1.1512, + "step": 104379 + }, + { + "epoch": 1.26, + "grad_norm": 3.2698549659138565, + "learning_rate": 6.434015326294815e-06, + "loss": 1.4356, + "step": 104382 + }, + { + "epoch": 1.26, + "grad_norm": 5.704345248184099, + "learning_rate": 6.433469559523553e-06, + "loss": 0.9177, + "step": 104385 + }, + { + "epoch": 1.26, + "grad_norm": 5.3988248282847575, + "learning_rate": 6.432923804923572e-06, + "loss": 1.3543, + "step": 104388 + }, + { + "epoch": 1.26, + "grad_norm": 18.871164748886855, + "learning_rate": 6.432378062496738e-06, + "loss": 1.3157, + "step": 104391 + }, + { + "epoch": 1.26, + "grad_norm": 12.246562820398626, + "learning_rate": 6.431832332244912e-06, + "loss": 1.0189, + "step": 104394 + }, + { + "epoch": 1.26, + "grad_norm": 34.16296968250054, + "learning_rate": 6.431286614169955e-06, + "loss": 1.0989, + "step": 104397 + }, + { + "epoch": 1.26, + "grad_norm": 8.495633821457703, + "learning_rate": 6.430740908273736e-06, + "loss": 1.625, + "step": 104400 + }, + { + "epoch": 1.26, + "grad_norm": 23.34223622551838, + "learning_rate": 6.430195214558111e-06, + "loss": 0.9101, + "step": 104403 + }, + { + "epoch": 1.26, + "grad_norm": 18.66041260399034, + "learning_rate": 6.429649533024942e-06, + "loss": 1.0128, + "step": 104406 + }, + { + "epoch": 1.26, + "grad_norm": 6.418088518265144, + "learning_rate": 6.4291038636760925e-06, + "loss": 1.0138, + "step": 104409 + }, + { + "epoch": 1.26, + "grad_norm": 10.656630167450759, + "learning_rate": 6.42855820651343e-06, + "loss": 1.0963, + "step": 104412 + }, + { + "epoch": 1.26, + "grad_norm": 20.24068760624543, + "learning_rate": 6.42801256153881e-06, + "loss": 1.3911, + "step": 104415 + }, + { + "epoch": 1.26, + "grad_norm": 19.911601315378874, + "learning_rate": 6.427466928754093e-06, + "loss": 1.0653, + "step": 104418 + }, + { + "epoch": 1.26, + "grad_norm": 10.217603465696731, + "learning_rate": 6.426921308161149e-06, + "loss": 1.2097, + "step": 104421 + }, + { + "epoch": 1.26, + "grad_norm": 8.706512856647889, + "learning_rate": 6.426375699761831e-06, + "loss": 1.0517, + "step": 104424 + }, + { + "epoch": 1.26, + "grad_norm": 7.419897915604972, + "learning_rate": 6.425830103558012e-06, + "loss": 0.9837, + "step": 104427 + }, + { + "epoch": 1.26, + "grad_norm": 6.978617936990266, + "learning_rate": 6.4252845195515425e-06, + "loss": 1.3566, + "step": 104430 + }, + { + "epoch": 1.26, + "grad_norm": 13.926612171517393, + "learning_rate": 6.42473894774429e-06, + "loss": 1.6724, + "step": 104433 + }, + { + "epoch": 1.26, + "grad_norm": 15.457955888686513, + "learning_rate": 6.424193388138116e-06, + "loss": 0.9088, + "step": 104436 + }, + { + "epoch": 1.26, + "grad_norm": 8.646067018635367, + "learning_rate": 6.423647840734884e-06, + "loss": 0.9223, + "step": 104439 + }, + { + "epoch": 1.26, + "grad_norm": 15.01674793100407, + "learning_rate": 6.42310230553645e-06, + "loss": 1.1031, + "step": 104442 + }, + { + "epoch": 1.26, + "grad_norm": 2.9814642993101157, + "learning_rate": 6.422556782544682e-06, + "loss": 1.3064, + "step": 104445 + }, + { + "epoch": 1.26, + "grad_norm": 15.309184173179533, + "learning_rate": 6.422011271761437e-06, + "loss": 1.4535, + "step": 104448 + }, + { + "epoch": 1.26, + "grad_norm": 5.804807377723742, + "learning_rate": 6.421465773188578e-06, + "loss": 1.0157, + "step": 104451 + }, + { + "epoch": 1.26, + "grad_norm": 11.726065012146238, + "learning_rate": 6.420920286827972e-06, + "loss": 1.2529, + "step": 104454 + }, + { + "epoch": 1.26, + "grad_norm": 13.12012789376203, + "learning_rate": 6.4203748126814745e-06, + "loss": 1.4441, + "step": 104457 + }, + { + "epoch": 1.26, + "grad_norm": 6.235334687647315, + "learning_rate": 6.419829350750945e-06, + "loss": 1.2492, + "step": 104460 + }, + { + "epoch": 1.26, + "grad_norm": 6.062339352565418, + "learning_rate": 6.419283901038252e-06, + "loss": 1.1442, + "step": 104463 + }, + { + "epoch": 1.26, + "grad_norm": 10.063900428387663, + "learning_rate": 6.418738463545252e-06, + "loss": 1.1695, + "step": 104466 + }, + { + "epoch": 1.26, + "grad_norm": 8.178757008033866, + "learning_rate": 6.418193038273808e-06, + "loss": 1.583, + "step": 104469 + }, + { + "epoch": 1.26, + "grad_norm": 4.691845758010226, + "learning_rate": 6.4176476252257805e-06, + "loss": 1.067, + "step": 104472 + }, + { + "epoch": 1.26, + "grad_norm": 7.522512661207018, + "learning_rate": 6.417102224403031e-06, + "loss": 1.0518, + "step": 104475 + }, + { + "epoch": 1.26, + "grad_norm": 5.887017835864629, + "learning_rate": 6.41655683580742e-06, + "loss": 1.3082, + "step": 104478 + }, + { + "epoch": 1.26, + "grad_norm": 31.14596132102543, + "learning_rate": 6.416011459440814e-06, + "loss": 0.9861, + "step": 104481 + }, + { + "epoch": 1.26, + "grad_norm": 12.775319225574233, + "learning_rate": 6.415466095305065e-06, + "loss": 1.1649, + "step": 104484 + }, + { + "epoch": 1.26, + "grad_norm": 16.17064646323546, + "learning_rate": 6.414920743402042e-06, + "loss": 1.2074, + "step": 104487 + }, + { + "epoch": 1.26, + "grad_norm": 7.90695894006565, + "learning_rate": 6.4143754037336014e-06, + "loss": 1.0277, + "step": 104490 + }, + { + "epoch": 1.26, + "grad_norm": 24.227429435346625, + "learning_rate": 6.413830076301608e-06, + "loss": 1.3354, + "step": 104493 + }, + { + "epoch": 1.26, + "grad_norm": 32.54014954646069, + "learning_rate": 6.413284761107919e-06, + "loss": 1.224, + "step": 104496 + }, + { + "epoch": 1.26, + "grad_norm": 6.146393669217391, + "learning_rate": 6.412739458154399e-06, + "loss": 1.1023, + "step": 104499 + }, + { + "epoch": 1.26, + "grad_norm": 12.51878893916731, + "learning_rate": 6.412194167442904e-06, + "loss": 1.0315, + "step": 104502 + }, + { + "epoch": 1.26, + "grad_norm": 5.346620704911892, + "learning_rate": 6.411648888975302e-06, + "loss": 1.2415, + "step": 104505 + }, + { + "epoch": 1.26, + "grad_norm": 6.963079908942383, + "learning_rate": 6.411103622753447e-06, + "loss": 1.3546, + "step": 104508 + }, + { + "epoch": 1.26, + "grad_norm": 8.397980912077076, + "learning_rate": 6.410558368779205e-06, + "loss": 1.246, + "step": 104511 + }, + { + "epoch": 1.26, + "grad_norm": 8.637143692509836, + "learning_rate": 6.410013127054431e-06, + "loss": 1.1788, + "step": 104514 + }, + { + "epoch": 1.26, + "grad_norm": 4.937003909665457, + "learning_rate": 6.409467897580991e-06, + "loss": 1.1226, + "step": 104517 + }, + { + "epoch": 1.26, + "grad_norm": 22.0544030618076, + "learning_rate": 6.408922680360746e-06, + "loss": 1.5743, + "step": 104520 + }, + { + "epoch": 1.26, + "grad_norm": 20.5134076413254, + "learning_rate": 6.408377475395551e-06, + "loss": 0.9924, + "step": 104523 + }, + { + "epoch": 1.26, + "grad_norm": 10.57951968235348, + "learning_rate": 6.40783228268727e-06, + "loss": 1.0872, + "step": 104526 + }, + { + "epoch": 1.26, + "grad_norm": 15.659555374189194, + "learning_rate": 6.407287102237764e-06, + "loss": 1.0156, + "step": 104529 + }, + { + "epoch": 1.26, + "grad_norm": 9.783925532522757, + "learning_rate": 6.406741934048892e-06, + "loss": 1.0395, + "step": 104532 + }, + { + "epoch": 1.26, + "grad_norm": 11.680416821274903, + "learning_rate": 6.406196778122521e-06, + "loss": 1.6484, + "step": 104535 + }, + { + "epoch": 1.26, + "grad_norm": 2.640673961378997, + "learning_rate": 6.4056516344605005e-06, + "loss": 1.0583, + "step": 104538 + }, + { + "epoch": 1.26, + "grad_norm": 10.656404870910375, + "learning_rate": 6.4051065030646975e-06, + "loss": 1.086, + "step": 104541 + }, + { + "epoch": 1.26, + "grad_norm": 16.46640846645858, + "learning_rate": 6.404561383936972e-06, + "loss": 1.4129, + "step": 104544 + }, + { + "epoch": 1.26, + "grad_norm": 48.74781901330951, + "learning_rate": 6.404016277079184e-06, + "loss": 1.1886, + "step": 104547 + }, + { + "epoch": 1.26, + "grad_norm": 9.060915188236198, + "learning_rate": 6.4034711824931905e-06, + "loss": 0.9263, + "step": 104550 + }, + { + "epoch": 1.26, + "grad_norm": 9.739349499812535, + "learning_rate": 6.402926100180857e-06, + "loss": 1.1902, + "step": 104553 + }, + { + "epoch": 1.26, + "grad_norm": 4.461713088861162, + "learning_rate": 6.402381030144038e-06, + "loss": 1.2982, + "step": 104556 + }, + { + "epoch": 1.26, + "grad_norm": 15.200678885166221, + "learning_rate": 6.4018359723846e-06, + "loss": 1.1977, + "step": 104559 + }, + { + "epoch": 1.26, + "grad_norm": 7.728652309132278, + "learning_rate": 6.4012909269044e-06, + "loss": 1.2802, + "step": 104562 + }, + { + "epoch": 1.26, + "grad_norm": 11.011007897301834, + "learning_rate": 6.4007458937052954e-06, + "loss": 1.1429, + "step": 104565 + }, + { + "epoch": 1.26, + "grad_norm": 12.890276202326366, + "learning_rate": 6.400200872789148e-06, + "loss": 1.1671, + "step": 104568 + }, + { + "epoch": 1.26, + "grad_norm": 17.529293421595277, + "learning_rate": 6.399655864157821e-06, + "loss": 1.1998, + "step": 104571 + }, + { + "epoch": 1.26, + "grad_norm": 5.425093440671835, + "learning_rate": 6.399110867813173e-06, + "loss": 1.3265, + "step": 104574 + }, + { + "epoch": 1.26, + "grad_norm": 7.6395245702003365, + "learning_rate": 6.398565883757059e-06, + "loss": 1.2258, + "step": 104577 + }, + { + "epoch": 1.26, + "grad_norm": 9.029502675276662, + "learning_rate": 6.398020911991342e-06, + "loss": 1.1257, + "step": 104580 + }, + { + "epoch": 1.26, + "grad_norm": 5.551500405666164, + "learning_rate": 6.397475952517885e-06, + "loss": 1.1789, + "step": 104583 + }, + { + "epoch": 1.26, + "grad_norm": 6.969109656799531, + "learning_rate": 6.3969310053385415e-06, + "loss": 0.8781, + "step": 104586 + }, + { + "epoch": 1.26, + "grad_norm": 4.582205880204483, + "learning_rate": 6.396386070455179e-06, + "loss": 1.0008, + "step": 104589 + }, + { + "epoch": 1.26, + "grad_norm": 18.691565109590385, + "learning_rate": 6.395841147869649e-06, + "loss": 1.3305, + "step": 104592 + }, + { + "epoch": 1.26, + "grad_norm": 9.698309873222057, + "learning_rate": 6.395296237583818e-06, + "loss": 1.4465, + "step": 104595 + }, + { + "epoch": 1.26, + "grad_norm": 9.581209475804936, + "learning_rate": 6.394751339599538e-06, + "loss": 1.1974, + "step": 104598 + }, + { + "epoch": 1.26, + "grad_norm": 7.858699433161625, + "learning_rate": 6.394206453918681e-06, + "loss": 1.6414, + "step": 104601 + }, + { + "epoch": 1.26, + "grad_norm": 7.955122248775364, + "learning_rate": 6.3936615805430915e-06, + "loss": 1.5503, + "step": 104604 + }, + { + "epoch": 1.26, + "grad_norm": 13.498903447698744, + "learning_rate": 6.393116719474639e-06, + "loss": 1.0326, + "step": 104607 + }, + { + "epoch": 1.26, + "grad_norm": 15.267182583700485, + "learning_rate": 6.392571870715178e-06, + "loss": 1.2334, + "step": 104610 + }, + { + "epoch": 1.26, + "grad_norm": 26.176827958256816, + "learning_rate": 6.392027034266571e-06, + "loss": 1.1289, + "step": 104613 + }, + { + "epoch": 1.26, + "grad_norm": 6.277738186135341, + "learning_rate": 6.39148221013068e-06, + "loss": 1.0093, + "step": 104616 + }, + { + "epoch": 1.26, + "grad_norm": 4.398922534368718, + "learning_rate": 6.390937398309355e-06, + "loss": 1.3275, + "step": 104619 + }, + { + "epoch": 1.26, + "grad_norm": 34.977247411018844, + "learning_rate": 6.3903925988044605e-06, + "loss": 1.476, + "step": 104622 + }, + { + "epoch": 1.26, + "grad_norm": 5.01112900089088, + "learning_rate": 6.389847811617859e-06, + "loss": 1.2742, + "step": 104625 + }, + { + "epoch": 1.26, + "grad_norm": 9.006887905647046, + "learning_rate": 6.3893030367514076e-06, + "loss": 1.1829, + "step": 104628 + }, + { + "epoch": 1.26, + "grad_norm": 9.883334523506297, + "learning_rate": 6.388758274206961e-06, + "loss": 1.142, + "step": 104631 + }, + { + "epoch": 1.26, + "grad_norm": 8.334320266681862, + "learning_rate": 6.388213523986381e-06, + "loss": 1.0286, + "step": 104634 + }, + { + "epoch": 1.26, + "grad_norm": 25.784231402505096, + "learning_rate": 6.3876687860915294e-06, + "loss": 1.1233, + "step": 104637 + }, + { + "epoch": 1.26, + "grad_norm": 16.00919939689284, + "learning_rate": 6.3871240605242616e-06, + "loss": 1.4956, + "step": 104640 + }, + { + "epoch": 1.26, + "grad_norm": 11.603583227739692, + "learning_rate": 6.386579347286441e-06, + "loss": 1.2851, + "step": 104643 + }, + { + "epoch": 1.26, + "grad_norm": 18.73406364395731, + "learning_rate": 6.3860346463799206e-06, + "loss": 0.9651, + "step": 104646 + }, + { + "epoch": 1.26, + "grad_norm": 14.567127864283847, + "learning_rate": 6.385489957806563e-06, + "loss": 1.0328, + "step": 104649 + }, + { + "epoch": 1.26, + "grad_norm": 27.132083057131773, + "learning_rate": 6.384945281568225e-06, + "loss": 0.8861, + "step": 104652 + }, + { + "epoch": 1.26, + "grad_norm": 9.331441953324834, + "learning_rate": 6.38440061766677e-06, + "loss": 0.9894, + "step": 104655 + }, + { + "epoch": 1.26, + "grad_norm": 3.4961210536207, + "learning_rate": 6.3838559661040535e-06, + "loss": 1.4077, + "step": 104658 + }, + { + "epoch": 1.26, + "grad_norm": 39.799434588654286, + "learning_rate": 6.38331132688193e-06, + "loss": 1.3343, + "step": 104661 + }, + { + "epoch": 1.26, + "grad_norm": 20.21529561247433, + "learning_rate": 6.382766700002263e-06, + "loss": 1.1852, + "step": 104664 + }, + { + "epoch": 1.26, + "grad_norm": 9.661077355272495, + "learning_rate": 6.3822220854669125e-06, + "loss": 1.2571, + "step": 104667 + }, + { + "epoch": 1.26, + "grad_norm": 12.131865195984338, + "learning_rate": 6.381677483277737e-06, + "loss": 0.8222, + "step": 104670 + }, + { + "epoch": 1.26, + "grad_norm": 5.6785011119516655, + "learning_rate": 6.381132893436588e-06, + "loss": 1.286, + "step": 104673 + }, + { + "epoch": 1.26, + "grad_norm": 13.115086514629565, + "learning_rate": 6.380588315945328e-06, + "loss": 1.174, + "step": 104676 + }, + { + "epoch": 1.26, + "grad_norm": 13.742315822562253, + "learning_rate": 6.380043750805821e-06, + "loss": 0.995, + "step": 104679 + }, + { + "epoch": 1.26, + "grad_norm": 7.023179405771177, + "learning_rate": 6.379499198019922e-06, + "loss": 1.087, + "step": 104682 + }, + { + "epoch": 1.26, + "grad_norm": 5.02068230827223, + "learning_rate": 6.378954657589484e-06, + "loss": 1.1254, + "step": 104685 + }, + { + "epoch": 1.26, + "grad_norm": 4.296261783636379, + "learning_rate": 6.378410129516368e-06, + "loss": 1.2296, + "step": 104688 + }, + { + "epoch": 1.26, + "grad_norm": 5.4942576997577035, + "learning_rate": 6.3778656138024376e-06, + "loss": 1.4642, + "step": 104691 + }, + { + "epoch": 1.26, + "grad_norm": 3.1789645729139764, + "learning_rate": 6.377321110449544e-06, + "loss": 1.0797, + "step": 104694 + }, + { + "epoch": 1.26, + "grad_norm": 10.11214192904131, + "learning_rate": 6.376776619459552e-06, + "loss": 1.0407, + "step": 104697 + }, + { + "epoch": 1.26, + "grad_norm": 22.092579903028643, + "learning_rate": 6.3762321408343174e-06, + "loss": 1.2881, + "step": 104700 + }, + { + "epoch": 1.26, + "grad_norm": 5.961736260358208, + "learning_rate": 6.3756876745756925e-06, + "loss": 1.3347, + "step": 104703 + }, + { + "epoch": 1.26, + "grad_norm": 8.12602108879544, + "learning_rate": 6.37514322068554e-06, + "loss": 1.0761, + "step": 104706 + }, + { + "epoch": 1.26, + "grad_norm": 9.352935665407651, + "learning_rate": 6.3745987791657235e-06, + "loss": 1.3727, + "step": 104709 + }, + { + "epoch": 1.26, + "grad_norm": 8.580918344944237, + "learning_rate": 6.374054350018093e-06, + "loss": 1.1609, + "step": 104712 + }, + { + "epoch": 1.26, + "grad_norm": 6.386781286744379, + "learning_rate": 6.373509933244506e-06, + "loss": 0.9725, + "step": 104715 + }, + { + "epoch": 1.26, + "grad_norm": 5.931170651929367, + "learning_rate": 6.372965528846824e-06, + "loss": 1.229, + "step": 104718 + }, + { + "epoch": 1.26, + "grad_norm": 6.020074994722758, + "learning_rate": 6.372421136826907e-06, + "loss": 1.1046, + "step": 104721 + }, + { + "epoch": 1.26, + "grad_norm": 8.675450808736285, + "learning_rate": 6.371876757186612e-06, + "loss": 1.078, + "step": 104724 + }, + { + "epoch": 1.26, + "grad_norm": 22.698668332114654, + "learning_rate": 6.371332389927789e-06, + "loss": 1.1173, + "step": 104727 + }, + { + "epoch": 1.26, + "grad_norm": 4.915444197485422, + "learning_rate": 6.3707880350523025e-06, + "loss": 1.0528, + "step": 104730 + }, + { + "epoch": 1.26, + "grad_norm": 3.941703688184085, + "learning_rate": 6.3702436925620116e-06, + "loss": 1.4952, + "step": 104733 + }, + { + "epoch": 1.26, + "grad_norm": 10.457748406588864, + "learning_rate": 6.3696993624587736e-06, + "loss": 1.5299, + "step": 104736 + }, + { + "epoch": 1.26, + "grad_norm": 63.580963754712144, + "learning_rate": 6.369155044744439e-06, + "loss": 1.638, + "step": 104739 + }, + { + "epoch": 1.26, + "grad_norm": 9.957965368601055, + "learning_rate": 6.368610739420873e-06, + "loss": 1.2426, + "step": 104742 + }, + { + "epoch": 1.26, + "grad_norm": 4.229001529105719, + "learning_rate": 6.368066446489929e-06, + "loss": 1.1488, + "step": 104745 + }, + { + "epoch": 1.26, + "grad_norm": 5.916191389527621, + "learning_rate": 6.3675221659534655e-06, + "loss": 1.4488, + "step": 104748 + }, + { + "epoch": 1.26, + "grad_norm": 19.479432789739874, + "learning_rate": 6.366977897813345e-06, + "loss": 1.0193, + "step": 104751 + }, + { + "epoch": 1.26, + "grad_norm": 7.920163276279407, + "learning_rate": 6.366433642071418e-06, + "loss": 1.2574, + "step": 104754 + }, + { + "epoch": 1.26, + "grad_norm": 17.040561408865223, + "learning_rate": 6.365889398729541e-06, + "loss": 1.0859, + "step": 104757 + }, + { + "epoch": 1.26, + "grad_norm": 6.158977993233459, + "learning_rate": 6.365345167789576e-06, + "loss": 1.3794, + "step": 104760 + }, + { + "epoch": 1.26, + "grad_norm": 25.787479525051612, + "learning_rate": 6.364800949253383e-06, + "loss": 1.4233, + "step": 104763 + }, + { + "epoch": 1.26, + "grad_norm": 7.456254273791363, + "learning_rate": 6.364256743122812e-06, + "loss": 1.074, + "step": 104766 + }, + { + "epoch": 1.26, + "grad_norm": 5.855486411400507, + "learning_rate": 6.363712549399721e-06, + "loss": 1.5149, + "step": 104769 + }, + { + "epoch": 1.26, + "grad_norm": 13.428498083050464, + "learning_rate": 6.36316836808597e-06, + "loss": 1.3474, + "step": 104772 + }, + { + "epoch": 1.26, + "grad_norm": 5.235945134942164, + "learning_rate": 6.362624199183417e-06, + "loss": 1.0607, + "step": 104775 + }, + { + "epoch": 1.26, + "grad_norm": 13.665301399069042, + "learning_rate": 6.362080042693919e-06, + "loss": 1.8148, + "step": 104778 + }, + { + "epoch": 1.26, + "grad_norm": 5.808492617550346, + "learning_rate": 6.361535898619329e-06, + "loss": 1.2432, + "step": 104781 + }, + { + "epoch": 1.26, + "grad_norm": 4.245392006780524, + "learning_rate": 6.360991766961504e-06, + "loss": 1.2981, + "step": 104784 + }, + { + "epoch": 1.26, + "grad_norm": 47.705462757753004, + "learning_rate": 6.360447647722307e-06, + "loss": 1.5555, + "step": 104787 + }, + { + "epoch": 1.26, + "grad_norm": 9.97766058769752, + "learning_rate": 6.359903540903594e-06, + "loss": 1.2972, + "step": 104790 + }, + { + "epoch": 1.26, + "grad_norm": 13.337867355512463, + "learning_rate": 6.359359446507214e-06, + "loss": 1.0709, + "step": 104793 + }, + { + "epoch": 1.26, + "grad_norm": 6.089762463167354, + "learning_rate": 6.35881536453503e-06, + "loss": 0.9331, + "step": 104796 + }, + { + "epoch": 1.26, + "grad_norm": 5.379011843578792, + "learning_rate": 6.358271294988896e-06, + "loss": 1.2758, + "step": 104799 + }, + { + "epoch": 1.26, + "grad_norm": 45.712551473254464, + "learning_rate": 6.357727237870671e-06, + "loss": 1.246, + "step": 104802 + }, + { + "epoch": 1.26, + "grad_norm": 55.20311762373281, + "learning_rate": 6.357183193182215e-06, + "loss": 1.089, + "step": 104805 + }, + { + "epoch": 1.26, + "grad_norm": 14.843442685245993, + "learning_rate": 6.3566391609253784e-06, + "loss": 0.9732, + "step": 104808 + }, + { + "epoch": 1.26, + "grad_norm": 5.946234653642702, + "learning_rate": 6.3560951411020176e-06, + "loss": 1.1254, + "step": 104811 + }, + { + "epoch": 1.26, + "grad_norm": 19.014253959007956, + "learning_rate": 6.355551133713993e-06, + "loss": 1.2897, + "step": 104814 + }, + { + "epoch": 1.26, + "grad_norm": 16.433831663029988, + "learning_rate": 6.355007138763162e-06, + "loss": 1.2011, + "step": 104817 + }, + { + "epoch": 1.26, + "grad_norm": 12.509030524365295, + "learning_rate": 6.354463156251377e-06, + "loss": 0.9851, + "step": 104820 + }, + { + "epoch": 1.26, + "grad_norm": 15.048041484455531, + "learning_rate": 6.353919186180495e-06, + "loss": 1.1903, + "step": 104823 + }, + { + "epoch": 1.26, + "grad_norm": 5.12021611558747, + "learning_rate": 6.353375228552372e-06, + "loss": 1.1867, + "step": 104826 + }, + { + "epoch": 1.26, + "grad_norm": 6.826405544594883, + "learning_rate": 6.35283128336887e-06, + "loss": 1.1258, + "step": 104829 + }, + { + "epoch": 1.26, + "grad_norm": 12.019576003904925, + "learning_rate": 6.35228735063184e-06, + "loss": 1.1647, + "step": 104832 + }, + { + "epoch": 1.26, + "grad_norm": 3.7388730788947075, + "learning_rate": 6.351743430343138e-06, + "loss": 1.2127, + "step": 104835 + }, + { + "epoch": 1.26, + "grad_norm": 5.4765499159505575, + "learning_rate": 6.351199522504623e-06, + "loss": 1.2373, + "step": 104838 + }, + { + "epoch": 1.26, + "grad_norm": 12.494262691538893, + "learning_rate": 6.350655627118148e-06, + "loss": 1.0432, + "step": 104841 + }, + { + "epoch": 1.26, + "grad_norm": 9.228594327875022, + "learning_rate": 6.3501117441855746e-06, + "loss": 1.1058, + "step": 104844 + }, + { + "epoch": 1.26, + "grad_norm": 10.730437909035844, + "learning_rate": 6.3495678737087506e-06, + "loss": 0.8779, + "step": 104847 + }, + { + "epoch": 1.26, + "grad_norm": 16.88855982766265, + "learning_rate": 6.34902401568954e-06, + "loss": 0.9254, + "step": 104850 + }, + { + "epoch": 1.26, + "grad_norm": 36.353829931846896, + "learning_rate": 6.348480170129792e-06, + "loss": 1.1312, + "step": 104853 + }, + { + "epoch": 1.26, + "grad_norm": 2.6112644882083687, + "learning_rate": 6.347936337031366e-06, + "loss": 1.2694, + "step": 104856 + }, + { + "epoch": 1.26, + "grad_norm": 3.78068488830966, + "learning_rate": 6.3473925163961225e-06, + "loss": 1.5372, + "step": 104859 + }, + { + "epoch": 1.26, + "grad_norm": 8.299812121654906, + "learning_rate": 6.34684870822591e-06, + "loss": 1.1561, + "step": 104862 + }, + { + "epoch": 1.26, + "grad_norm": 12.642819736430049, + "learning_rate": 6.346304912522585e-06, + "loss": 1.6083, + "step": 104865 + }, + { + "epoch": 1.26, + "grad_norm": 12.564826185373263, + "learning_rate": 6.345761129288005e-06, + "loss": 1.2377, + "step": 104868 + }, + { + "epoch": 1.26, + "grad_norm": 6.191327530751275, + "learning_rate": 6.345217358524031e-06, + "loss": 0.9953, + "step": 104871 + }, + { + "epoch": 1.26, + "grad_norm": 11.64002932469999, + "learning_rate": 6.344673600232512e-06, + "loss": 1.2486, + "step": 104874 + }, + { + "epoch": 1.26, + "grad_norm": 5.421185771791672, + "learning_rate": 6.344129854415302e-06, + "loss": 1.0007, + "step": 104877 + }, + { + "epoch": 1.26, + "grad_norm": 8.84086220571218, + "learning_rate": 6.343586121074262e-06, + "loss": 0.9947, + "step": 104880 + }, + { + "epoch": 1.26, + "grad_norm": 13.56451357209888, + "learning_rate": 6.343042400211244e-06, + "loss": 1.0181, + "step": 104883 + }, + { + "epoch": 1.26, + "grad_norm": 8.159056266072527, + "learning_rate": 6.34249869182811e-06, + "loss": 0.9949, + "step": 104886 + }, + { + "epoch": 1.26, + "grad_norm": 11.320573167561873, + "learning_rate": 6.341954995926705e-06, + "loss": 1.2313, + "step": 104889 + }, + { + "epoch": 1.26, + "grad_norm": 6.181319693339455, + "learning_rate": 6.341411312508893e-06, + "loss": 1.5864, + "step": 104892 + }, + { + "epoch": 1.26, + "grad_norm": 28.85213580009882, + "learning_rate": 6.340867641576523e-06, + "loss": 0.9253, + "step": 104895 + }, + { + "epoch": 1.26, + "grad_norm": 7.636928785178668, + "learning_rate": 6.34032398313146e-06, + "loss": 1.2367, + "step": 104898 + }, + { + "epoch": 1.26, + "grad_norm": 20.387794869945456, + "learning_rate": 6.3397803371755476e-06, + "loss": 1.3725, + "step": 104901 + }, + { + "epoch": 1.26, + "grad_norm": 4.141653983654445, + "learning_rate": 6.339236703710648e-06, + "loss": 1.2263, + "step": 104904 + }, + { + "epoch": 1.26, + "grad_norm": 11.973027655009892, + "learning_rate": 6.338693082738614e-06, + "loss": 1.3606, + "step": 104907 + }, + { + "epoch": 1.26, + "grad_norm": 18.790248086075632, + "learning_rate": 6.338149474261301e-06, + "loss": 1.1227, + "step": 104910 + }, + { + "epoch": 1.26, + "grad_norm": 11.318252639108326, + "learning_rate": 6.337605878280569e-06, + "loss": 1.1479, + "step": 104913 + }, + { + "epoch": 1.26, + "grad_norm": 10.385094723026285, + "learning_rate": 6.337062294798267e-06, + "loss": 1.1863, + "step": 104916 + }, + { + "epoch": 1.26, + "grad_norm": 4.956616788191384, + "learning_rate": 6.3365187238162506e-06, + "loss": 1.2547, + "step": 104919 + }, + { + "epoch": 1.26, + "grad_norm": 32.620909307533445, + "learning_rate": 6.3359751653363775e-06, + "loss": 1.1696, + "step": 104922 + }, + { + "epoch": 1.26, + "grad_norm": 9.822840211339669, + "learning_rate": 6.3354316193605014e-06, + "loss": 1.3486, + "step": 104925 + }, + { + "epoch": 1.26, + "grad_norm": 48.10264776814668, + "learning_rate": 6.334888085890478e-06, + "loss": 1.1652, + "step": 104928 + }, + { + "epoch": 1.26, + "grad_norm": 10.389315494450294, + "learning_rate": 6.334344564928159e-06, + "loss": 1.3461, + "step": 104931 + }, + { + "epoch": 1.26, + "grad_norm": 8.814329137905258, + "learning_rate": 6.333801056475404e-06, + "loss": 1.4849, + "step": 104934 + }, + { + "epoch": 1.26, + "grad_norm": 25.4159647950403, + "learning_rate": 6.333257560534062e-06, + "loss": 1.1169, + "step": 104937 + }, + { + "epoch": 1.26, + "grad_norm": 14.990228347668207, + "learning_rate": 6.332714077105997e-06, + "loss": 1.481, + "step": 104940 + }, + { + "epoch": 1.26, + "grad_norm": 10.07768906776405, + "learning_rate": 6.332170606193052e-06, + "loss": 1.2554, + "step": 104943 + }, + { + "epoch": 1.26, + "grad_norm": 9.895730065901, + "learning_rate": 6.33162714779709e-06, + "loss": 1.2269, + "step": 104946 + }, + { + "epoch": 1.26, + "grad_norm": 18.642132303913222, + "learning_rate": 6.331083701919962e-06, + "loss": 1.2385, + "step": 104949 + }, + { + "epoch": 1.26, + "grad_norm": 9.330651293898446, + "learning_rate": 6.330540268563527e-06, + "loss": 1.2091, + "step": 104952 + }, + { + "epoch": 1.26, + "grad_norm": 17.37970510933801, + "learning_rate": 6.329996847729634e-06, + "loss": 1.197, + "step": 104955 + }, + { + "epoch": 1.26, + "grad_norm": 5.757957437354025, + "learning_rate": 6.329453439420141e-06, + "loss": 0.9645, + "step": 104958 + }, + { + "epoch": 1.26, + "grad_norm": 5.962741261452332, + "learning_rate": 6.328910043636899e-06, + "loss": 1.2049, + "step": 104961 + }, + { + "epoch": 1.26, + "grad_norm": 4.162726699021174, + "learning_rate": 6.328366660381764e-06, + "loss": 1.2216, + "step": 104964 + }, + { + "epoch": 1.26, + "grad_norm": 10.495538969404762, + "learning_rate": 6.327823289656596e-06, + "loss": 1.3459, + "step": 104967 + }, + { + "epoch": 1.26, + "grad_norm": 3.602093061856658, + "learning_rate": 6.327279931463243e-06, + "loss": 1.1908, + "step": 104970 + }, + { + "epoch": 1.26, + "grad_norm": 3.1768659012761877, + "learning_rate": 6.326736585803556e-06, + "loss": 1.1174, + "step": 104973 + }, + { + "epoch": 1.26, + "grad_norm": 10.764769096885734, + "learning_rate": 6.326193252679399e-06, + "loss": 1.232, + "step": 104976 + }, + { + "epoch": 1.26, + "grad_norm": 31.226067183524872, + "learning_rate": 6.3256499320926225e-06, + "loss": 1.3184, + "step": 104979 + }, + { + "epoch": 1.26, + "grad_norm": 8.013965254135599, + "learning_rate": 6.325106624045074e-06, + "loss": 1.3087, + "step": 104982 + }, + { + "epoch": 1.26, + "grad_norm": 5.961216197355903, + "learning_rate": 6.324563328538613e-06, + "loss": 1.4643, + "step": 104985 + }, + { + "epoch": 1.26, + "grad_norm": 13.498086657154113, + "learning_rate": 6.324020045575097e-06, + "loss": 1.2789, + "step": 104988 + }, + { + "epoch": 1.26, + "grad_norm": 10.468157192894248, + "learning_rate": 6.323476775156374e-06, + "loss": 0.9182, + "step": 104991 + }, + { + "epoch": 1.26, + "grad_norm": 5.94412406553287, + "learning_rate": 6.3229335172843045e-06, + "loss": 1.1341, + "step": 104994 + }, + { + "epoch": 1.26, + "grad_norm": 12.909987322179479, + "learning_rate": 6.322390271960733e-06, + "loss": 0.9031, + "step": 104997 + }, + { + "epoch": 1.26, + "grad_norm": 3.0728711724158995, + "learning_rate": 6.321847039187522e-06, + "loss": 0.9999, + "step": 105000 + }, + { + "epoch": 1.26, + "grad_norm": 2.6144137578989923, + "learning_rate": 6.32130381896652e-06, + "loss": 0.9688, + "step": 105003 + }, + { + "epoch": 1.26, + "grad_norm": 7.7472306071735835, + "learning_rate": 6.320760611299588e-06, + "loss": 0.9574, + "step": 105006 + }, + { + "epoch": 1.26, + "grad_norm": 16.044612935853465, + "learning_rate": 6.32021741618857e-06, + "loss": 1.5947, + "step": 105009 + }, + { + "epoch": 1.26, + "grad_norm": 8.771516732363803, + "learning_rate": 6.319674233635325e-06, + "loss": 1.2782, + "step": 105012 + }, + { + "epoch": 1.26, + "grad_norm": 19.46252853033759, + "learning_rate": 6.319131063641706e-06, + "loss": 1.3063, + "step": 105015 + }, + { + "epoch": 1.26, + "grad_norm": 18.964513215519947, + "learning_rate": 6.318587906209568e-06, + "loss": 1.1392, + "step": 105018 + }, + { + "epoch": 1.26, + "grad_norm": 18.304497414400874, + "learning_rate": 6.318044761340765e-06, + "loss": 1.2597, + "step": 105021 + }, + { + "epoch": 1.26, + "grad_norm": 9.176926250665682, + "learning_rate": 6.3175016290371474e-06, + "loss": 1.1637, + "step": 105024 + }, + { + "epoch": 1.26, + "grad_norm": 13.335891187339142, + "learning_rate": 6.316958509300568e-06, + "loss": 0.9517, + "step": 105027 + }, + { + "epoch": 1.26, + "grad_norm": 6.1913748562527955, + "learning_rate": 6.316415402132885e-06, + "loss": 0.8997, + "step": 105030 + }, + { + "epoch": 1.26, + "grad_norm": 8.195273984830001, + "learning_rate": 6.315872307535953e-06, + "loss": 1.2278, + "step": 105033 + }, + { + "epoch": 1.26, + "grad_norm": 17.573759270499465, + "learning_rate": 6.315329225511615e-06, + "loss": 1.3703, + "step": 105036 + }, + { + "epoch": 1.26, + "grad_norm": 3.9259844967356066, + "learning_rate": 6.314786156061734e-06, + "loss": 1.1329, + "step": 105039 + }, + { + "epoch": 1.26, + "grad_norm": 5.559691409658454, + "learning_rate": 6.31424309918816e-06, + "loss": 1.0816, + "step": 105042 + }, + { + "epoch": 1.26, + "grad_norm": 4.499066487609231, + "learning_rate": 6.313700054892746e-06, + "loss": 1.0461, + "step": 105045 + }, + { + "epoch": 1.26, + "grad_norm": 8.418446366300437, + "learning_rate": 6.31315702317735e-06, + "loss": 1.107, + "step": 105048 + }, + { + "epoch": 1.26, + "grad_norm": 4.138467680539871, + "learning_rate": 6.312614004043817e-06, + "loss": 1.1435, + "step": 105051 + }, + { + "epoch": 1.26, + "grad_norm": 23.952573796930643, + "learning_rate": 6.312070997494006e-06, + "loss": 1.0722, + "step": 105054 + }, + { + "epoch": 1.26, + "grad_norm": 13.565596745869254, + "learning_rate": 6.311528003529765e-06, + "loss": 1.5513, + "step": 105057 + }, + { + "epoch": 1.26, + "grad_norm": 3.904963431332999, + "learning_rate": 6.310985022152957e-06, + "loss": 1.3027, + "step": 105060 + }, + { + "epoch": 1.26, + "grad_norm": 7.599658034144466, + "learning_rate": 6.3104420533654224e-06, + "loss": 1.0292, + "step": 105063 + }, + { + "epoch": 1.26, + "grad_norm": 2.8333985281497536, + "learning_rate": 6.309899097169024e-06, + "loss": 1.3427, + "step": 105066 + }, + { + "epoch": 1.26, + "grad_norm": 6.671587098911475, + "learning_rate": 6.309356153565607e-06, + "loss": 1.034, + "step": 105069 + }, + { + "epoch": 1.26, + "grad_norm": 9.18536288902289, + "learning_rate": 6.3088132225570296e-06, + "loss": 0.9465, + "step": 105072 + }, + { + "epoch": 1.26, + "grad_norm": 5.622522501995478, + "learning_rate": 6.308270304145147e-06, + "loss": 1.3004, + "step": 105075 + }, + { + "epoch": 1.26, + "grad_norm": 20.874803673849822, + "learning_rate": 6.3077273983318025e-06, + "loss": 1.0868, + "step": 105078 + }, + { + "epoch": 1.26, + "grad_norm": 10.703673085812007, + "learning_rate": 6.307184505118855e-06, + "loss": 1.5967, + "step": 105081 + }, + { + "epoch": 1.26, + "grad_norm": 10.120733738049879, + "learning_rate": 6.306641624508159e-06, + "loss": 1.2505, + "step": 105084 + }, + { + "epoch": 1.26, + "grad_norm": 8.820107506923247, + "learning_rate": 6.306098756501566e-06, + "loss": 1.3222, + "step": 105087 + }, + { + "epoch": 1.26, + "grad_norm": 15.235316356412241, + "learning_rate": 6.305555901100925e-06, + "loss": 0.8806, + "step": 105090 + }, + { + "epoch": 1.26, + "grad_norm": 6.078062168405383, + "learning_rate": 6.305013058308089e-06, + "loss": 0.9291, + "step": 105093 + }, + { + "epoch": 1.26, + "grad_norm": 18.277155579608625, + "learning_rate": 6.304470228124916e-06, + "loss": 1.21, + "step": 105096 + }, + { + "epoch": 1.26, + "grad_norm": 3.032165872607766, + "learning_rate": 6.303927410553252e-06, + "loss": 1.144, + "step": 105099 + }, + { + "epoch": 1.26, + "grad_norm": 6.569591782504111, + "learning_rate": 6.303384605594958e-06, + "loss": 1.2458, + "step": 105102 + }, + { + "epoch": 1.26, + "grad_norm": 8.681090215668588, + "learning_rate": 6.3028418132518745e-06, + "loss": 1.2433, + "step": 105105 + }, + { + "epoch": 1.26, + "grad_norm": 13.63506038994883, + "learning_rate": 6.3022990335258634e-06, + "loss": 1.1739, + "step": 105108 + }, + { + "epoch": 1.26, + "grad_norm": 6.793148269395496, + "learning_rate": 6.301756266418772e-06, + "loss": 1.2576, + "step": 105111 + }, + { + "epoch": 1.26, + "grad_norm": 21.941577069019228, + "learning_rate": 6.301213511932458e-06, + "loss": 0.972, + "step": 105114 + }, + { + "epoch": 1.26, + "grad_norm": 6.384792792829404, + "learning_rate": 6.300670770068768e-06, + "loss": 1.3774, + "step": 105117 + }, + { + "epoch": 1.26, + "grad_norm": 9.51207650797833, + "learning_rate": 6.300128040829554e-06, + "loss": 1.3261, + "step": 105120 + }, + { + "epoch": 1.26, + "grad_norm": 13.900007566614107, + "learning_rate": 6.299585324216671e-06, + "loss": 1.3746, + "step": 105123 + }, + { + "epoch": 1.26, + "grad_norm": 17.774112706563912, + "learning_rate": 6.299042620231972e-06, + "loss": 1.1336, + "step": 105126 + }, + { + "epoch": 1.26, + "grad_norm": 19.836890733158114, + "learning_rate": 6.29849992887731e-06, + "loss": 1.3292, + "step": 105129 + }, + { + "epoch": 1.26, + "grad_norm": 14.711760077562241, + "learning_rate": 6.2979572501545295e-06, + "loss": 1.1944, + "step": 105132 + }, + { + "epoch": 1.26, + "grad_norm": 26.69491760594415, + "learning_rate": 6.297414584065489e-06, + "loss": 1.1529, + "step": 105135 + }, + { + "epoch": 1.26, + "grad_norm": 11.748424144252464, + "learning_rate": 6.296871930612038e-06, + "loss": 1.236, + "step": 105138 + }, + { + "epoch": 1.26, + "grad_norm": 9.490803643925474, + "learning_rate": 6.296329289796034e-06, + "loss": 1.0752, + "step": 105141 + }, + { + "epoch": 1.26, + "grad_norm": 17.61851903767519, + "learning_rate": 6.295786661619319e-06, + "loss": 1.2913, + "step": 105144 + }, + { + "epoch": 1.26, + "grad_norm": 11.22873601294716, + "learning_rate": 6.29524404608375e-06, + "loss": 0.9542, + "step": 105147 + }, + { + "epoch": 1.26, + "grad_norm": 17.01611995487054, + "learning_rate": 6.29470144319118e-06, + "loss": 1.2705, + "step": 105150 + }, + { + "epoch": 1.26, + "grad_norm": 14.017594923043779, + "learning_rate": 6.294158852943458e-06, + "loss": 0.8135, + "step": 105153 + }, + { + "epoch": 1.26, + "grad_norm": 4.471789510811291, + "learning_rate": 6.293616275342441e-06, + "loss": 0.8742, + "step": 105156 + }, + { + "epoch": 1.26, + "grad_norm": 12.577174854285975, + "learning_rate": 6.2930737103899745e-06, + "loss": 1.1284, + "step": 105159 + }, + { + "epoch": 1.26, + "grad_norm": 103.67455991518764, + "learning_rate": 6.29253115808791e-06, + "loss": 1.0667, + "step": 105162 + }, + { + "epoch": 1.26, + "grad_norm": 9.503404808675239, + "learning_rate": 6.291988618438101e-06, + "loss": 1.2494, + "step": 105165 + }, + { + "epoch": 1.26, + "grad_norm": 13.982363932552298, + "learning_rate": 6.291446091442404e-06, + "loss": 1.2666, + "step": 105168 + }, + { + "epoch": 1.26, + "grad_norm": 6.5361596642235815, + "learning_rate": 6.290903577102663e-06, + "loss": 1.27, + "step": 105171 + }, + { + "epoch": 1.26, + "grad_norm": 5.674021963978622, + "learning_rate": 6.29036107542073e-06, + "loss": 0.7434, + "step": 105174 + }, + { + "epoch": 1.26, + "grad_norm": 17.640898140781044, + "learning_rate": 6.289818586398459e-06, + "loss": 1.0542, + "step": 105177 + }, + { + "epoch": 1.26, + "grad_norm": 10.505032825209309, + "learning_rate": 6.289276110037703e-06, + "loss": 0.9732, + "step": 105180 + }, + { + "epoch": 1.26, + "grad_norm": 4.663183410155326, + "learning_rate": 6.288733646340312e-06, + "loss": 1.2642, + "step": 105183 + }, + { + "epoch": 1.26, + "grad_norm": 14.942964588870728, + "learning_rate": 6.288191195308133e-06, + "loss": 1.3434, + "step": 105186 + }, + { + "epoch": 1.26, + "grad_norm": 12.521009189169078, + "learning_rate": 6.287648756943021e-06, + "loss": 0.9777, + "step": 105189 + }, + { + "epoch": 1.26, + "grad_norm": 4.253301252021766, + "learning_rate": 6.2871063312468285e-06, + "loss": 1.8226, + "step": 105192 + }, + { + "epoch": 1.26, + "grad_norm": 4.976451988324736, + "learning_rate": 6.286563918221407e-06, + "loss": 0.9429, + "step": 105195 + }, + { + "epoch": 1.26, + "grad_norm": 15.221904730718878, + "learning_rate": 6.286021517868599e-06, + "loss": 1.4205, + "step": 105198 + }, + { + "epoch": 1.27, + "grad_norm": 13.050723538076793, + "learning_rate": 6.285479130190264e-06, + "loss": 1.1647, + "step": 105201 + }, + { + "epoch": 1.27, + "grad_norm": 7.922295634829617, + "learning_rate": 6.2849367551882516e-06, + "loss": 1.0724, + "step": 105204 + }, + { + "epoch": 1.27, + "grad_norm": 13.586811688267723, + "learning_rate": 6.28439439286441e-06, + "loss": 1.1627, + "step": 105207 + }, + { + "epoch": 1.27, + "grad_norm": 6.605338479222489, + "learning_rate": 6.283852043220598e-06, + "loss": 1.0882, + "step": 105210 + }, + { + "epoch": 1.27, + "grad_norm": 6.651109753462283, + "learning_rate": 6.283309706258656e-06, + "loss": 1.1569, + "step": 105213 + }, + { + "epoch": 1.27, + "grad_norm": 9.682187866795156, + "learning_rate": 6.282767381980437e-06, + "loss": 1.013, + "step": 105216 + }, + { + "epoch": 1.27, + "grad_norm": 16.51927485081085, + "learning_rate": 6.282225070387795e-06, + "loss": 1.2597, + "step": 105219 + }, + { + "epoch": 1.27, + "grad_norm": 6.930876916287853, + "learning_rate": 6.281682771482585e-06, + "loss": 1.0548, + "step": 105222 + }, + { + "epoch": 1.27, + "grad_norm": 6.479278216773513, + "learning_rate": 6.281140485266648e-06, + "loss": 1.2001, + "step": 105225 + }, + { + "epoch": 1.27, + "grad_norm": 6.214601037660451, + "learning_rate": 6.280598211741837e-06, + "loss": 1.2758, + "step": 105228 + }, + { + "epoch": 1.27, + "grad_norm": 19.015134562954465, + "learning_rate": 6.280055950910006e-06, + "loss": 1.086, + "step": 105231 + }, + { + "epoch": 1.27, + "grad_norm": 4.412537558087335, + "learning_rate": 6.279513702773005e-06, + "loss": 1.2633, + "step": 105234 + }, + { + "epoch": 1.27, + "grad_norm": 6.118198235927276, + "learning_rate": 6.278971467332688e-06, + "loss": 0.9094, + "step": 105237 + }, + { + "epoch": 1.27, + "grad_norm": 5.200175567938256, + "learning_rate": 6.278429244590896e-06, + "loss": 1.0771, + "step": 105240 + }, + { + "epoch": 1.27, + "grad_norm": 6.396165089331045, + "learning_rate": 6.277887034549484e-06, + "loss": 1.1447, + "step": 105243 + }, + { + "epoch": 1.27, + "grad_norm": 11.652937150991992, + "learning_rate": 6.277344837210304e-06, + "loss": 1.2784, + "step": 105246 + }, + { + "epoch": 1.27, + "grad_norm": 5.929630506775239, + "learning_rate": 6.276802652575207e-06, + "loss": 1.0626, + "step": 105249 + }, + { + "epoch": 1.27, + "grad_norm": 4.5377293896491695, + "learning_rate": 6.276260480646039e-06, + "loss": 1.1664, + "step": 105252 + }, + { + "epoch": 1.27, + "grad_norm": 5.169074156581524, + "learning_rate": 6.275718321424654e-06, + "loss": 1.2009, + "step": 105255 + }, + { + "epoch": 1.27, + "grad_norm": 26.038710593960698, + "learning_rate": 6.275176174912898e-06, + "loss": 1.4253, + "step": 105258 + }, + { + "epoch": 1.27, + "grad_norm": 7.155224906360653, + "learning_rate": 6.274634041112626e-06, + "loss": 1.2464, + "step": 105261 + }, + { + "epoch": 1.27, + "grad_norm": 12.51661088905377, + "learning_rate": 6.274091920025689e-06, + "loss": 1.0995, + "step": 105264 + }, + { + "epoch": 1.27, + "grad_norm": 4.142448939653422, + "learning_rate": 6.273549811653932e-06, + "loss": 1.2408, + "step": 105267 + }, + { + "epoch": 1.27, + "grad_norm": 4.923769278107335, + "learning_rate": 6.273007715999207e-06, + "loss": 1.3105, + "step": 105270 + }, + { + "epoch": 1.27, + "grad_norm": 14.40678975311871, + "learning_rate": 6.272465633063361e-06, + "loss": 1.3876, + "step": 105273 + }, + { + "epoch": 1.27, + "grad_norm": 4.813497307824291, + "learning_rate": 6.271923562848254e-06, + "loss": 1.1651, + "step": 105276 + }, + { + "epoch": 1.27, + "grad_norm": 7.207276414454729, + "learning_rate": 6.271381505355726e-06, + "loss": 1.0827, + "step": 105279 + }, + { + "epoch": 1.27, + "grad_norm": 13.449237360353036, + "learning_rate": 6.270839460587628e-06, + "loss": 1.2815, + "step": 105282 + }, + { + "epoch": 1.27, + "grad_norm": 17.965138934592684, + "learning_rate": 6.2702974285458106e-06, + "loss": 0.9561, + "step": 105285 + }, + { + "epoch": 1.27, + "grad_norm": 17.92151963794241, + "learning_rate": 6.269755409232128e-06, + "loss": 1.2723, + "step": 105288 + }, + { + "epoch": 1.27, + "grad_norm": 8.654356386829994, + "learning_rate": 6.2692134026484285e-06, + "loss": 1.154, + "step": 105291 + }, + { + "epoch": 1.27, + "grad_norm": 13.295113830310548, + "learning_rate": 6.268671408796555e-06, + "loss": 1.2304, + "step": 105294 + }, + { + "epoch": 1.27, + "grad_norm": 10.246900439790465, + "learning_rate": 6.2681294276783644e-06, + "loss": 1.2731, + "step": 105297 + }, + { + "epoch": 1.27, + "grad_norm": 8.458294130474881, + "learning_rate": 6.267587459295702e-06, + "loss": 1.3117, + "step": 105300 + }, + { + "epoch": 1.27, + "grad_norm": 11.542052167077523, + "learning_rate": 6.267045503650423e-06, + "loss": 1.2382, + "step": 105303 + }, + { + "epoch": 1.27, + "grad_norm": 3.2054640763618343, + "learning_rate": 6.2665035607443694e-06, + "loss": 1.1273, + "step": 105306 + }, + { + "epoch": 1.27, + "grad_norm": 10.719511918204775, + "learning_rate": 6.265961630579396e-06, + "loss": 1.2124, + "step": 105309 + }, + { + "epoch": 1.27, + "grad_norm": 8.990411496481123, + "learning_rate": 6.265419713157348e-06, + "loss": 1.0091, + "step": 105312 + }, + { + "epoch": 1.27, + "grad_norm": 22.638913101097973, + "learning_rate": 6.264877808480079e-06, + "loss": 1.5997, + "step": 105315 + }, + { + "epoch": 1.27, + "grad_norm": 4.119713926680869, + "learning_rate": 6.26433591654944e-06, + "loss": 1.346, + "step": 105318 + }, + { + "epoch": 1.27, + "grad_norm": 21.54706173156656, + "learning_rate": 6.263794037367275e-06, + "loss": 1.0366, + "step": 105321 + }, + { + "epoch": 1.27, + "grad_norm": 9.806869824687514, + "learning_rate": 6.2632521709354324e-06, + "loss": 1.0983, + "step": 105324 + }, + { + "epoch": 1.27, + "grad_norm": 28.164338703934657, + "learning_rate": 6.262710317255764e-06, + "loss": 1.2587, + "step": 105327 + }, + { + "epoch": 1.27, + "grad_norm": 69.84884049079594, + "learning_rate": 6.262168476330124e-06, + "loss": 0.9991, + "step": 105330 + }, + { + "epoch": 1.27, + "grad_norm": 11.666671387175885, + "learning_rate": 6.261626648160354e-06, + "loss": 1.4131, + "step": 105333 + }, + { + "epoch": 1.27, + "grad_norm": 3.754186879174207, + "learning_rate": 6.261084832748303e-06, + "loss": 1.1986, + "step": 105336 + }, + { + "epoch": 1.27, + "grad_norm": 5.85119467503288, + "learning_rate": 6.260543030095827e-06, + "loss": 1.5587, + "step": 105339 + }, + { + "epoch": 1.27, + "grad_norm": 5.765924993658018, + "learning_rate": 6.260001240204767e-06, + "loss": 1.2787, + "step": 105342 + }, + { + "epoch": 1.27, + "grad_norm": 20.148060213917795, + "learning_rate": 6.25945946307698e-06, + "loss": 1.1448, + "step": 105345 + }, + { + "epoch": 1.27, + "grad_norm": 17.149842900995548, + "learning_rate": 6.258917698714306e-06, + "loss": 1.1807, + "step": 105348 + }, + { + "epoch": 1.27, + "grad_norm": 4.6770331578449875, + "learning_rate": 6.258375947118601e-06, + "loss": 1.0835, + "step": 105351 + }, + { + "epoch": 1.27, + "grad_norm": 11.912973070953717, + "learning_rate": 6.257834208291709e-06, + "loss": 1.0374, + "step": 105354 + }, + { + "epoch": 1.27, + "grad_norm": 5.146897676606606, + "learning_rate": 6.257292482235485e-06, + "loss": 1.3235, + "step": 105357 + }, + { + "epoch": 1.27, + "grad_norm": 6.100864031931781, + "learning_rate": 6.2567507689517695e-06, + "loss": 1.2357, + "step": 105360 + }, + { + "epoch": 1.27, + "grad_norm": 13.548247947307177, + "learning_rate": 6.256209068442418e-06, + "loss": 1.6135, + "step": 105363 + }, + { + "epoch": 1.27, + "grad_norm": 15.922665417516365, + "learning_rate": 6.255667380709274e-06, + "loss": 1.156, + "step": 105366 + }, + { + "epoch": 1.27, + "grad_norm": 26.305086565679325, + "learning_rate": 6.255125705754188e-06, + "loss": 1.0797, + "step": 105369 + }, + { + "epoch": 1.27, + "grad_norm": 5.923531059325469, + "learning_rate": 6.254584043579014e-06, + "loss": 1.2156, + "step": 105372 + }, + { + "epoch": 1.27, + "grad_norm": 12.711904762661996, + "learning_rate": 6.254042394185593e-06, + "loss": 0.8599, + "step": 105375 + }, + { + "epoch": 1.27, + "grad_norm": 13.748348601613523, + "learning_rate": 6.2535007575757745e-06, + "loss": 1.2004, + "step": 105378 + }, + { + "epoch": 1.27, + "grad_norm": 7.593978216362713, + "learning_rate": 6.252959133751407e-06, + "loss": 1.3665, + "step": 105381 + }, + { + "epoch": 1.27, + "grad_norm": 7.259977201922198, + "learning_rate": 6.2524175227143455e-06, + "loss": 1.4496, + "step": 105384 + }, + { + "epoch": 1.27, + "grad_norm": 10.80596249712355, + "learning_rate": 6.25187592446643e-06, + "loss": 0.8737, + "step": 105387 + }, + { + "epoch": 1.27, + "grad_norm": 14.115152371077277, + "learning_rate": 6.251334339009511e-06, + "loss": 1.1288, + "step": 105390 + }, + { + "epoch": 1.27, + "grad_norm": 6.737227944821448, + "learning_rate": 6.25079276634544e-06, + "loss": 1.3056, + "step": 105393 + }, + { + "epoch": 1.27, + "grad_norm": 4.577075262767244, + "learning_rate": 6.250251206476061e-06, + "loss": 1.2069, + "step": 105396 + }, + { + "epoch": 1.27, + "grad_norm": 2.459320710705359, + "learning_rate": 6.249709659403227e-06, + "loss": 1.1591, + "step": 105399 + }, + { + "epoch": 1.27, + "grad_norm": 40.234637624721636, + "learning_rate": 6.249168125128779e-06, + "loss": 1.2973, + "step": 105402 + }, + { + "epoch": 1.27, + "grad_norm": 10.370770938278461, + "learning_rate": 6.248626603654572e-06, + "loss": 0.8786, + "step": 105405 + }, + { + "epoch": 1.27, + "grad_norm": 8.526568379526662, + "learning_rate": 6.248085094982448e-06, + "loss": 1.1975, + "step": 105408 + }, + { + "epoch": 1.27, + "grad_norm": 4.694102708841032, + "learning_rate": 6.247543599114263e-06, + "loss": 1.2363, + "step": 105411 + }, + { + "epoch": 1.27, + "grad_norm": 3.967250517570689, + "learning_rate": 6.247002116051857e-06, + "loss": 1.25, + "step": 105414 + }, + { + "epoch": 1.27, + "grad_norm": 33.85315811523727, + "learning_rate": 6.246460645797081e-06, + "loss": 1.2308, + "step": 105417 + }, + { + "epoch": 1.27, + "grad_norm": 6.97976750551674, + "learning_rate": 6.245919188351781e-06, + "loss": 1.3884, + "step": 105420 + }, + { + "epoch": 1.27, + "grad_norm": 7.287627447100159, + "learning_rate": 6.245377743717808e-06, + "loss": 1.0831, + "step": 105423 + }, + { + "epoch": 1.27, + "grad_norm": 7.075411929902162, + "learning_rate": 6.244836311897011e-06, + "loss": 0.8979, + "step": 105426 + }, + { + "epoch": 1.27, + "grad_norm": 19.18454036165832, + "learning_rate": 6.244294892891234e-06, + "loss": 1.3028, + "step": 105429 + }, + { + "epoch": 1.27, + "grad_norm": 7.236771019262624, + "learning_rate": 6.2437534867023244e-06, + "loss": 1.3671, + "step": 105432 + }, + { + "epoch": 1.27, + "grad_norm": 3.437694938899371, + "learning_rate": 6.243212093332131e-06, + "loss": 1.4417, + "step": 105435 + }, + { + "epoch": 1.27, + "grad_norm": 15.024468454887506, + "learning_rate": 6.242670712782505e-06, + "loss": 1.6447, + "step": 105438 + }, + { + "epoch": 1.27, + "grad_norm": 6.551253066290168, + "learning_rate": 6.242129345055286e-06, + "loss": 1.299, + "step": 105441 + }, + { + "epoch": 1.27, + "grad_norm": 18.677019078152366, + "learning_rate": 6.241587990152326e-06, + "loss": 1.2689, + "step": 105444 + }, + { + "epoch": 1.27, + "grad_norm": 12.270737987526786, + "learning_rate": 6.241046648075475e-06, + "loss": 1.2528, + "step": 105447 + }, + { + "epoch": 1.27, + "grad_norm": 31.10169780300609, + "learning_rate": 6.240505318826575e-06, + "loss": 1.3046, + "step": 105450 + }, + { + "epoch": 1.27, + "grad_norm": 13.850035613350533, + "learning_rate": 6.2399640024074805e-06, + "loss": 1.1078, + "step": 105453 + }, + { + "epoch": 1.27, + "grad_norm": 9.208943376972053, + "learning_rate": 6.23942269882003e-06, + "loss": 1.5826, + "step": 105456 + }, + { + "epoch": 1.27, + "grad_norm": 6.37438649618059, + "learning_rate": 6.238881408066078e-06, + "loss": 1.0143, + "step": 105459 + }, + { + "epoch": 1.27, + "grad_norm": 9.34071464821155, + "learning_rate": 6.2383401301474665e-06, + "loss": 0.7189, + "step": 105462 + }, + { + "epoch": 1.27, + "grad_norm": 11.66056374043889, + "learning_rate": 6.237798865066049e-06, + "loss": 1.1026, + "step": 105465 + }, + { + "epoch": 1.27, + "grad_norm": 8.84855954319172, + "learning_rate": 6.237257612823666e-06, + "loss": 1.1535, + "step": 105468 + }, + { + "epoch": 1.27, + "grad_norm": 19.362771228305576, + "learning_rate": 6.236716373422169e-06, + "loss": 1.2584, + "step": 105471 + }, + { + "epoch": 1.27, + "grad_norm": 10.266268473612653, + "learning_rate": 6.236175146863401e-06, + "loss": 0.9433, + "step": 105474 + }, + { + "epoch": 1.27, + "grad_norm": 5.17234515382607, + "learning_rate": 6.235633933149214e-06, + "loss": 1.5661, + "step": 105477 + }, + { + "epoch": 1.27, + "grad_norm": 9.081510764525891, + "learning_rate": 6.235092732281452e-06, + "loss": 1.5302, + "step": 105480 + }, + { + "epoch": 1.27, + "grad_norm": 16.22176183235805, + "learning_rate": 6.234551544261963e-06, + "loss": 1.1892, + "step": 105483 + }, + { + "epoch": 1.27, + "grad_norm": 12.215751842785924, + "learning_rate": 6.2340103690925915e-06, + "loss": 0.7984, + "step": 105486 + }, + { + "epoch": 1.27, + "grad_norm": 11.244043058716107, + "learning_rate": 6.233469206775187e-06, + "loss": 1.2377, + "step": 105489 + }, + { + "epoch": 1.27, + "grad_norm": 16.784987371673083, + "learning_rate": 6.232928057311598e-06, + "loss": 1.2782, + "step": 105492 + }, + { + "epoch": 1.27, + "grad_norm": 3.1145988547828685, + "learning_rate": 6.232386920703664e-06, + "loss": 1.1564, + "step": 105495 + }, + { + "epoch": 1.27, + "grad_norm": 8.75935372082447, + "learning_rate": 6.231845796953237e-06, + "loss": 1.0507, + "step": 105498 + }, + { + "epoch": 1.27, + "grad_norm": 11.043993664278222, + "learning_rate": 6.231304686062165e-06, + "loss": 1.6426, + "step": 105501 + }, + { + "epoch": 1.27, + "grad_norm": 9.949095159143653, + "learning_rate": 6.23076358803229e-06, + "loss": 1.4653, + "step": 105504 + }, + { + "epoch": 1.27, + "grad_norm": 35.48274574585724, + "learning_rate": 6.230222502865466e-06, + "loss": 1.5119, + "step": 105507 + }, + { + "epoch": 1.27, + "grad_norm": 6.741597517056667, + "learning_rate": 6.22968143056353e-06, + "loss": 1.1916, + "step": 105510 + }, + { + "epoch": 1.27, + "grad_norm": 17.748908139243188, + "learning_rate": 6.229140371128337e-06, + "loss": 1.6652, + "step": 105513 + }, + { + "epoch": 1.27, + "grad_norm": 16.165730496877874, + "learning_rate": 6.228599324561726e-06, + "loss": 1.3337, + "step": 105516 + }, + { + "epoch": 1.27, + "grad_norm": 14.342191509066694, + "learning_rate": 6.228058290865551e-06, + "loss": 1.0708, + "step": 105519 + }, + { + "epoch": 1.27, + "grad_norm": 8.142291117547195, + "learning_rate": 6.2275172700416506e-06, + "loss": 1.3372, + "step": 105522 + }, + { + "epoch": 1.27, + "grad_norm": 7.297230546612314, + "learning_rate": 6.226976262091877e-06, + "loss": 0.9655, + "step": 105525 + }, + { + "epoch": 1.27, + "grad_norm": 6.569254657895472, + "learning_rate": 6.226435267018073e-06, + "loss": 1.2688, + "step": 105528 + }, + { + "epoch": 1.27, + "grad_norm": 13.952444983330347, + "learning_rate": 6.225894284822087e-06, + "loss": 1.091, + "step": 105531 + }, + { + "epoch": 1.27, + "grad_norm": 3.3174120003221743, + "learning_rate": 6.2253533155057675e-06, + "loss": 1.3933, + "step": 105534 + }, + { + "epoch": 1.27, + "grad_norm": 4.372956345005131, + "learning_rate": 6.224812359070953e-06, + "loss": 1.3209, + "step": 105537 + }, + { + "epoch": 1.27, + "grad_norm": 21.211869230997806, + "learning_rate": 6.224271415519494e-06, + "loss": 1.0838, + "step": 105540 + }, + { + "epoch": 1.27, + "grad_norm": 13.975000775254898, + "learning_rate": 6.223730484853239e-06, + "loss": 0.8589, + "step": 105543 + }, + { + "epoch": 1.27, + "grad_norm": 11.046408998027738, + "learning_rate": 6.223189567074034e-06, + "loss": 1.4704, + "step": 105546 + }, + { + "epoch": 1.27, + "grad_norm": 5.273171085501881, + "learning_rate": 6.222648662183718e-06, + "loss": 1.3046, + "step": 105549 + }, + { + "epoch": 1.27, + "grad_norm": 5.0514071342905575, + "learning_rate": 6.222107770184142e-06, + "loss": 1.4552, + "step": 105552 + }, + { + "epoch": 1.27, + "grad_norm": 4.240476675014386, + "learning_rate": 6.221566891077152e-06, + "loss": 1.1752, + "step": 105555 + }, + { + "epoch": 1.27, + "grad_norm": 10.057263568119719, + "learning_rate": 6.2210260248645926e-06, + "loss": 1.4165, + "step": 105558 + }, + { + "epoch": 1.27, + "grad_norm": 17.63383666803814, + "learning_rate": 6.220485171548314e-06, + "loss": 0.9634, + "step": 105561 + }, + { + "epoch": 1.27, + "grad_norm": 15.849543589410008, + "learning_rate": 6.219944331130155e-06, + "loss": 1.104, + "step": 105564 + }, + { + "epoch": 1.27, + "grad_norm": 22.766620289850497, + "learning_rate": 6.219403503611964e-06, + "loss": 1.7299, + "step": 105567 + }, + { + "epoch": 1.27, + "grad_norm": 9.62611833373681, + "learning_rate": 6.2188626889955865e-06, + "loss": 1.3628, + "step": 105570 + }, + { + "epoch": 1.27, + "grad_norm": 22.82737879042515, + "learning_rate": 6.218321887282872e-06, + "loss": 1.1429, + "step": 105573 + }, + { + "epoch": 1.27, + "grad_norm": 9.486720062893179, + "learning_rate": 6.217781098475661e-06, + "loss": 0.9195, + "step": 105576 + }, + { + "epoch": 1.27, + "grad_norm": 18.137034600274834, + "learning_rate": 6.217240322575799e-06, + "loss": 1.1963, + "step": 105579 + }, + { + "epoch": 1.27, + "grad_norm": 5.0503430182654805, + "learning_rate": 6.2166995595851345e-06, + "loss": 1.4518, + "step": 105582 + }, + { + "epoch": 1.27, + "grad_norm": 9.568052824509, + "learning_rate": 6.2161588095055125e-06, + "loss": 1.0481, + "step": 105585 + }, + { + "epoch": 1.27, + "grad_norm": 5.706857286537176, + "learning_rate": 6.215618072338781e-06, + "loss": 1.1586, + "step": 105588 + }, + { + "epoch": 1.27, + "grad_norm": 9.873357589320703, + "learning_rate": 6.2150773480867756e-06, + "loss": 1.1831, + "step": 105591 + }, + { + "epoch": 1.27, + "grad_norm": 7.50260660145881, + "learning_rate": 6.214536636751349e-06, + "loss": 1.4113, + "step": 105594 + }, + { + "epoch": 1.27, + "grad_norm": 16.85674066714633, + "learning_rate": 6.213995938334346e-06, + "loss": 1.1744, + "step": 105597 + }, + { + "epoch": 1.27, + "grad_norm": 4.290573210682322, + "learning_rate": 6.2134552528376146e-06, + "loss": 1.1314, + "step": 105600 + }, + { + "epoch": 1.27, + "grad_norm": 64.36516733210593, + "learning_rate": 6.212914580262992e-06, + "loss": 1.0422, + "step": 105603 + }, + { + "epoch": 1.27, + "grad_norm": 5.082361187805964, + "learning_rate": 6.212373920612328e-06, + "loss": 1.1675, + "step": 105606 + }, + { + "epoch": 1.27, + "grad_norm": 5.331351555456231, + "learning_rate": 6.21183327388747e-06, + "loss": 1.1835, + "step": 105609 + }, + { + "epoch": 1.27, + "grad_norm": 3.4360923161766523, + "learning_rate": 6.211292640090259e-06, + "loss": 1.1421, + "step": 105612 + }, + { + "epoch": 1.27, + "grad_norm": 10.072974482638775, + "learning_rate": 6.210752019222543e-06, + "loss": 1.0601, + "step": 105615 + }, + { + "epoch": 1.27, + "grad_norm": 25.038266982415013, + "learning_rate": 6.2102114112861636e-06, + "loss": 1.0461, + "step": 105618 + }, + { + "epoch": 1.27, + "grad_norm": 3.394730437334088, + "learning_rate": 6.209670816282969e-06, + "loss": 1.1325, + "step": 105621 + }, + { + "epoch": 1.27, + "grad_norm": 15.578768377812304, + "learning_rate": 6.2091302342148e-06, + "loss": 1.1279, + "step": 105624 + }, + { + "epoch": 1.27, + "grad_norm": 46.33810176472677, + "learning_rate": 6.208589665083508e-06, + "loss": 1.3521, + "step": 105627 + }, + { + "epoch": 1.27, + "grad_norm": 7.398626634314373, + "learning_rate": 6.208049108890934e-06, + "loss": 1.3853, + "step": 105630 + }, + { + "epoch": 1.27, + "grad_norm": 9.743963830404144, + "learning_rate": 6.2075085656389186e-06, + "loss": 1.4921, + "step": 105633 + }, + { + "epoch": 1.27, + "grad_norm": 12.547319968786773, + "learning_rate": 6.2069680353293105e-06, + "loss": 1.0222, + "step": 105636 + }, + { + "epoch": 1.27, + "grad_norm": 9.249413996503447, + "learning_rate": 6.2064275179639575e-06, + "loss": 1.3538, + "step": 105639 + }, + { + "epoch": 1.27, + "grad_norm": 11.165957056772704, + "learning_rate": 6.205887013544703e-06, + "loss": 0.9807, + "step": 105642 + }, + { + "epoch": 1.27, + "grad_norm": 16.210690557772846, + "learning_rate": 6.205346522073383e-06, + "loss": 1.2225, + "step": 105645 + }, + { + "epoch": 1.27, + "grad_norm": 11.56627652369628, + "learning_rate": 6.20480604355185e-06, + "loss": 1.3142, + "step": 105648 + }, + { + "epoch": 1.27, + "grad_norm": 6.648635214839411, + "learning_rate": 6.204265577981949e-06, + "loss": 1.2702, + "step": 105651 + }, + { + "epoch": 1.27, + "grad_norm": 10.063264371963657, + "learning_rate": 6.203725125365525e-06, + "loss": 0.9184, + "step": 105654 + }, + { + "epoch": 1.27, + "grad_norm": 15.49306350346924, + "learning_rate": 6.203184685704415e-06, + "loss": 0.9962, + "step": 105657 + }, + { + "epoch": 1.27, + "grad_norm": 17.86659949056967, + "learning_rate": 6.202644259000468e-06, + "loss": 1.3292, + "step": 105660 + }, + { + "epoch": 1.27, + "grad_norm": 16.57508775454216, + "learning_rate": 6.202103845255532e-06, + "loss": 1.2427, + "step": 105663 + }, + { + "epoch": 1.27, + "grad_norm": 14.208947582856858, + "learning_rate": 6.201563444471443e-06, + "loss": 0.9194, + "step": 105666 + }, + { + "epoch": 1.27, + "grad_norm": 12.765868317659132, + "learning_rate": 6.201023056650055e-06, + "loss": 0.9856, + "step": 105669 + }, + { + "epoch": 1.27, + "grad_norm": 7.829076855905335, + "learning_rate": 6.200482681793205e-06, + "loss": 1.1461, + "step": 105672 + }, + { + "epoch": 1.27, + "grad_norm": 6.323612536810719, + "learning_rate": 6.199942319902737e-06, + "loss": 0.8316, + "step": 105675 + }, + { + "epoch": 1.27, + "grad_norm": 2.7630482706679826, + "learning_rate": 6.199401970980496e-06, + "loss": 1.2825, + "step": 105678 + }, + { + "epoch": 1.27, + "grad_norm": 14.125475706635138, + "learning_rate": 6.198861635028332e-06, + "loss": 1.6076, + "step": 105681 + }, + { + "epoch": 1.27, + "grad_norm": 12.109147620369319, + "learning_rate": 6.1983213120480825e-06, + "loss": 1.3296, + "step": 105684 + }, + { + "epoch": 1.27, + "grad_norm": 14.911914621489883, + "learning_rate": 6.197781002041591e-06, + "loss": 1.0966, + "step": 105687 + }, + { + "epoch": 1.27, + "grad_norm": 12.830297736462144, + "learning_rate": 6.197240705010703e-06, + "loss": 1.0915, + "step": 105690 + }, + { + "epoch": 1.27, + "grad_norm": 8.283144789325656, + "learning_rate": 6.196700420957265e-06, + "loss": 1.1906, + "step": 105693 + }, + { + "epoch": 1.27, + "grad_norm": 5.312738932159276, + "learning_rate": 6.196160149883119e-06, + "loss": 1.0829, + "step": 105696 + }, + { + "epoch": 1.27, + "grad_norm": 6.13647900519511, + "learning_rate": 6.195619891790105e-06, + "loss": 1.1391, + "step": 105699 + }, + { + "epoch": 1.27, + "grad_norm": 6.651801212442262, + "learning_rate": 6.195079646680071e-06, + "loss": 1.6088, + "step": 105702 + }, + { + "epoch": 1.27, + "grad_norm": 33.57542909603983, + "learning_rate": 6.1945394145548606e-06, + "loss": 1.07, + "step": 105705 + }, + { + "epoch": 1.27, + "grad_norm": 5.571103383207024, + "learning_rate": 6.19399919541632e-06, + "loss": 1.3765, + "step": 105708 + }, + { + "epoch": 1.27, + "grad_norm": 13.225172664340521, + "learning_rate": 6.1934589892662846e-06, + "loss": 1.0563, + "step": 105711 + }, + { + "epoch": 1.27, + "grad_norm": 12.579044538033674, + "learning_rate": 6.192918796106604e-06, + "loss": 1.1866, + "step": 105714 + }, + { + "epoch": 1.27, + "grad_norm": 5.438380402933482, + "learning_rate": 6.192378615939118e-06, + "loss": 0.9608, + "step": 105717 + }, + { + "epoch": 1.27, + "grad_norm": 12.318628457370702, + "learning_rate": 6.191838448765672e-06, + "loss": 1.2336, + "step": 105720 + }, + { + "epoch": 1.27, + "grad_norm": 11.100266653080906, + "learning_rate": 6.191298294588115e-06, + "loss": 1.7114, + "step": 105723 + }, + { + "epoch": 1.27, + "grad_norm": 5.462675558485684, + "learning_rate": 6.190758153408283e-06, + "loss": 0.9301, + "step": 105726 + }, + { + "epoch": 1.27, + "grad_norm": 14.274669534946776, + "learning_rate": 6.190218025228019e-06, + "loss": 1.0893, + "step": 105729 + }, + { + "epoch": 1.27, + "grad_norm": 3.097489340981483, + "learning_rate": 6.189677910049168e-06, + "loss": 1.2449, + "step": 105732 + }, + { + "epoch": 1.27, + "grad_norm": 10.449755809474619, + "learning_rate": 6.1891378078735775e-06, + "loss": 1.1266, + "step": 105735 + }, + { + "epoch": 1.27, + "grad_norm": 9.563515325365895, + "learning_rate": 6.188597718703086e-06, + "loss": 1.1963, + "step": 105738 + }, + { + "epoch": 1.27, + "grad_norm": 12.220121286486625, + "learning_rate": 6.188057642539536e-06, + "loss": 1.1564, + "step": 105741 + }, + { + "epoch": 1.27, + "grad_norm": 11.691597672061258, + "learning_rate": 6.187517579384771e-06, + "loss": 1.2708, + "step": 105744 + }, + { + "epoch": 1.27, + "grad_norm": 3.3127801158408534, + "learning_rate": 6.186977529240637e-06, + "loss": 1.0998, + "step": 105747 + }, + { + "epoch": 1.27, + "grad_norm": 7.530441515091021, + "learning_rate": 6.186437492108979e-06, + "loss": 0.8503, + "step": 105750 + }, + { + "epoch": 1.27, + "grad_norm": 6.556754589268028, + "learning_rate": 6.1858974679916305e-06, + "loss": 0.9033, + "step": 105753 + }, + { + "epoch": 1.27, + "grad_norm": 4.003782954659554, + "learning_rate": 6.185357456890444e-06, + "loss": 1.3405, + "step": 105756 + }, + { + "epoch": 1.27, + "grad_norm": 7.510319773349725, + "learning_rate": 6.1848174588072555e-06, + "loss": 1.329, + "step": 105759 + }, + { + "epoch": 1.27, + "grad_norm": 4.745357235502551, + "learning_rate": 6.184277473743914e-06, + "loss": 1.1962, + "step": 105762 + }, + { + "epoch": 1.27, + "grad_norm": 8.690774079439132, + "learning_rate": 6.183737501702256e-06, + "loss": 1.2588, + "step": 105765 + }, + { + "epoch": 1.27, + "grad_norm": 6.144146384306209, + "learning_rate": 6.183197542684128e-06, + "loss": 0.9262, + "step": 105768 + }, + { + "epoch": 1.27, + "grad_norm": 7.662997669167871, + "learning_rate": 6.18265759669137e-06, + "loss": 1.5939, + "step": 105771 + }, + { + "epoch": 1.27, + "grad_norm": 18.36696375335221, + "learning_rate": 6.182117663725828e-06, + "loss": 1.2182, + "step": 105774 + }, + { + "epoch": 1.27, + "grad_norm": 10.08983627391397, + "learning_rate": 6.181577743789348e-06, + "loss": 0.9966, + "step": 105777 + }, + { + "epoch": 1.27, + "grad_norm": 5.242500593734782, + "learning_rate": 6.181037836883764e-06, + "loss": 1.4069, + "step": 105780 + }, + { + "epoch": 1.27, + "grad_norm": 13.799797283485798, + "learning_rate": 6.180497943010921e-06, + "loss": 1.0439, + "step": 105783 + }, + { + "epoch": 1.27, + "grad_norm": 6.551656814748723, + "learning_rate": 6.1799580621726616e-06, + "loss": 1.3861, + "step": 105786 + }, + { + "epoch": 1.27, + "grad_norm": 9.816813159428454, + "learning_rate": 6.1794181943708355e-06, + "loss": 1.0684, + "step": 105789 + }, + { + "epoch": 1.27, + "grad_norm": 7.337754978775311, + "learning_rate": 6.178878339607275e-06, + "loss": 1.1843, + "step": 105792 + }, + { + "epoch": 1.27, + "grad_norm": 20.03767849588534, + "learning_rate": 6.1783384978838255e-06, + "loss": 1.1615, + "step": 105795 + }, + { + "epoch": 1.27, + "grad_norm": 22.130914278237587, + "learning_rate": 6.17779866920233e-06, + "loss": 0.9458, + "step": 105798 + }, + { + "epoch": 1.27, + "grad_norm": 4.620987214942332, + "learning_rate": 6.177258853564632e-06, + "loss": 1.1711, + "step": 105801 + }, + { + "epoch": 1.27, + "grad_norm": 5.151920074472768, + "learning_rate": 6.176719050972575e-06, + "loss": 1.3389, + "step": 105804 + }, + { + "epoch": 1.27, + "grad_norm": 3.7813171808397, + "learning_rate": 6.176179261427996e-06, + "loss": 1.1936, + "step": 105807 + }, + { + "epoch": 1.27, + "grad_norm": 4.44422482978717, + "learning_rate": 6.17563948493274e-06, + "loss": 1.0955, + "step": 105810 + }, + { + "epoch": 1.27, + "grad_norm": 3.095613333709409, + "learning_rate": 6.175099721488649e-06, + "loss": 1.0999, + "step": 105813 + }, + { + "epoch": 1.27, + "grad_norm": 9.296996172959673, + "learning_rate": 6.174559971097567e-06, + "loss": 1.4244, + "step": 105816 + }, + { + "epoch": 1.27, + "grad_norm": 3.1954165843891102, + "learning_rate": 6.174020233761331e-06, + "loss": 1.3677, + "step": 105819 + }, + { + "epoch": 1.27, + "grad_norm": 4.746964526793277, + "learning_rate": 6.173480509481786e-06, + "loss": 1.0233, + "step": 105822 + }, + { + "epoch": 1.27, + "grad_norm": 12.932736563983427, + "learning_rate": 6.172940798260773e-06, + "loss": 1.4503, + "step": 105825 + }, + { + "epoch": 1.27, + "grad_norm": 9.405491985692038, + "learning_rate": 6.172401100100136e-06, + "loss": 0.9729, + "step": 105828 + }, + { + "epoch": 1.27, + "grad_norm": 6.345263569410651, + "learning_rate": 6.171861415001717e-06, + "loss": 0.7647, + "step": 105831 + }, + { + "epoch": 1.27, + "grad_norm": 14.99818092081988, + "learning_rate": 6.171321742967356e-06, + "loss": 1.1928, + "step": 105834 + }, + { + "epoch": 1.27, + "grad_norm": 18.169006158389994, + "learning_rate": 6.170782083998892e-06, + "loss": 1.1767, + "step": 105837 + }, + { + "epoch": 1.27, + "grad_norm": 12.039202564044192, + "learning_rate": 6.170242438098168e-06, + "loss": 0.8581, + "step": 105840 + }, + { + "epoch": 1.27, + "grad_norm": 2.527798246741207, + "learning_rate": 6.169702805267033e-06, + "loss": 1.2486, + "step": 105843 + }, + { + "epoch": 1.27, + "grad_norm": 6.320476026179262, + "learning_rate": 6.169163185507321e-06, + "loss": 0.935, + "step": 105846 + }, + { + "epoch": 1.27, + "grad_norm": 18.457904872561265, + "learning_rate": 6.168623578820872e-06, + "loss": 1.3995, + "step": 105849 + }, + { + "epoch": 1.27, + "grad_norm": 13.537503591531655, + "learning_rate": 6.168083985209533e-06, + "loss": 1.1596, + "step": 105852 + }, + { + "epoch": 1.27, + "grad_norm": 41.61026211207681, + "learning_rate": 6.167544404675141e-06, + "loss": 1.2744, + "step": 105855 + }, + { + "epoch": 1.27, + "grad_norm": 4.269470235834033, + "learning_rate": 6.167004837219543e-06, + "loss": 0.8812, + "step": 105858 + }, + { + "epoch": 1.27, + "grad_norm": 26.4825186355477, + "learning_rate": 6.166465282844573e-06, + "loss": 1.0147, + "step": 105861 + }, + { + "epoch": 1.27, + "grad_norm": 5.676271440111554, + "learning_rate": 6.16592574155208e-06, + "loss": 1.1925, + "step": 105864 + }, + { + "epoch": 1.27, + "grad_norm": 11.690516182308931, + "learning_rate": 6.165386213343898e-06, + "loss": 1.3892, + "step": 105867 + }, + { + "epoch": 1.27, + "grad_norm": 34.38585092449948, + "learning_rate": 6.164846698221876e-06, + "loss": 1.0826, + "step": 105870 + }, + { + "epoch": 1.27, + "grad_norm": 2.43395891823907, + "learning_rate": 6.164307196187846e-06, + "loss": 1.3053, + "step": 105873 + }, + { + "epoch": 1.27, + "grad_norm": 13.121303592751888, + "learning_rate": 6.1637677072436575e-06, + "loss": 1.1327, + "step": 105876 + }, + { + "epoch": 1.27, + "grad_norm": 9.03141958583917, + "learning_rate": 6.163228231391144e-06, + "loss": 1.4635, + "step": 105879 + }, + { + "epoch": 1.27, + "grad_norm": 22.37840346654893, + "learning_rate": 6.162688768632151e-06, + "loss": 1.3492, + "step": 105882 + }, + { + "epoch": 1.27, + "grad_norm": 17.840845480834048, + "learning_rate": 6.162149318968525e-06, + "loss": 1.3418, + "step": 105885 + }, + { + "epoch": 1.27, + "grad_norm": 9.8710652156678, + "learning_rate": 6.161609882402096e-06, + "loss": 1.0605, + "step": 105888 + }, + { + "epoch": 1.27, + "grad_norm": 9.712321333488603, + "learning_rate": 6.1610704589347106e-06, + "loss": 1.306, + "step": 105891 + }, + { + "epoch": 1.27, + "grad_norm": 3.027902968166395, + "learning_rate": 6.160531048568209e-06, + "loss": 0.9848, + "step": 105894 + }, + { + "epoch": 1.27, + "grad_norm": 4.619909884299856, + "learning_rate": 6.159991651304434e-06, + "loss": 1.0753, + "step": 105897 + }, + { + "epoch": 1.27, + "grad_norm": 12.527283973239294, + "learning_rate": 6.159452267145222e-06, + "loss": 0.8366, + "step": 105900 + }, + { + "epoch": 1.27, + "grad_norm": 8.859077070573075, + "learning_rate": 6.158912896092415e-06, + "loss": 1.2031, + "step": 105903 + }, + { + "epoch": 1.27, + "grad_norm": 6.2185103030750195, + "learning_rate": 6.158373538147857e-06, + "loss": 1.1387, + "step": 105906 + }, + { + "epoch": 1.27, + "grad_norm": 7.5372588009259145, + "learning_rate": 6.157834193313383e-06, + "loss": 1.2097, + "step": 105909 + }, + { + "epoch": 1.27, + "grad_norm": 5.676346411009399, + "learning_rate": 6.157294861590844e-06, + "loss": 1.0859, + "step": 105912 + }, + { + "epoch": 1.27, + "grad_norm": 9.553603137019664, + "learning_rate": 6.156755542982067e-06, + "loss": 1.1233, + "step": 105915 + }, + { + "epoch": 1.27, + "grad_norm": 9.991022119025654, + "learning_rate": 6.1562162374889014e-06, + "loss": 1.0274, + "step": 105918 + }, + { + "epoch": 1.27, + "grad_norm": 31.443676113264676, + "learning_rate": 6.155676945113183e-06, + "loss": 0.9626, + "step": 105921 + }, + { + "epoch": 1.27, + "grad_norm": 17.450366172162873, + "learning_rate": 6.155137665856759e-06, + "loss": 1.2457, + "step": 105924 + }, + { + "epoch": 1.27, + "grad_norm": 33.71855676134443, + "learning_rate": 6.154598399721461e-06, + "loss": 1.2452, + "step": 105927 + }, + { + "epoch": 1.27, + "grad_norm": 8.125714989357688, + "learning_rate": 6.154059146709136e-06, + "loss": 1.2047, + "step": 105930 + }, + { + "epoch": 1.27, + "grad_norm": 14.26712849871438, + "learning_rate": 6.153519906821619e-06, + "loss": 0.937, + "step": 105933 + }, + { + "epoch": 1.27, + "grad_norm": 7.232887487568349, + "learning_rate": 6.152980680060756e-06, + "loss": 1.2213, + "step": 105936 + }, + { + "epoch": 1.27, + "grad_norm": 8.049278502764954, + "learning_rate": 6.152441466428385e-06, + "loss": 1.0046, + "step": 105939 + }, + { + "epoch": 1.27, + "grad_norm": 18.366523601015146, + "learning_rate": 6.151902265926344e-06, + "loss": 1.4063, + "step": 105942 + }, + { + "epoch": 1.27, + "grad_norm": 8.143031887152109, + "learning_rate": 6.151363078556471e-06, + "loss": 1.4409, + "step": 105945 + }, + { + "epoch": 1.27, + "grad_norm": 4.665593895348289, + "learning_rate": 6.1508239043206155e-06, + "loss": 1.3096, + "step": 105948 + }, + { + "epoch": 1.27, + "grad_norm": 7.763098241406073, + "learning_rate": 6.15028474322061e-06, + "loss": 0.9987, + "step": 105951 + }, + { + "epoch": 1.27, + "grad_norm": 20.023574091525273, + "learning_rate": 6.149745595258294e-06, + "loss": 1.2516, + "step": 105954 + }, + { + "epoch": 1.27, + "grad_norm": 12.582212685045763, + "learning_rate": 6.149206460435508e-06, + "loss": 0.9998, + "step": 105957 + }, + { + "epoch": 1.27, + "grad_norm": 16.91965766642107, + "learning_rate": 6.148667338754097e-06, + "loss": 0.9361, + "step": 105960 + }, + { + "epoch": 1.27, + "grad_norm": 9.717577638923151, + "learning_rate": 6.148128230215894e-06, + "loss": 1.206, + "step": 105963 + }, + { + "epoch": 1.27, + "grad_norm": 13.398499022201978, + "learning_rate": 6.147589134822747e-06, + "loss": 1.3236, + "step": 105966 + }, + { + "epoch": 1.27, + "grad_norm": 13.021420853308644, + "learning_rate": 6.147050052576484e-06, + "loss": 1.3055, + "step": 105969 + }, + { + "epoch": 1.27, + "grad_norm": 3.0786337888985527, + "learning_rate": 6.146510983478956e-06, + "loss": 1.1909, + "step": 105972 + }, + { + "epoch": 1.27, + "grad_norm": 6.429455709804911, + "learning_rate": 6.145971927531995e-06, + "loss": 1.1384, + "step": 105975 + }, + { + "epoch": 1.27, + "grad_norm": 6.767961173984209, + "learning_rate": 6.145432884737448e-06, + "loss": 1.4023, + "step": 105978 + }, + { + "epoch": 1.27, + "grad_norm": 9.154777434351892, + "learning_rate": 6.144893855097146e-06, + "loss": 1.2054, + "step": 105981 + }, + { + "epoch": 1.27, + "grad_norm": 3.118340359928554, + "learning_rate": 6.144354838612934e-06, + "loss": 1.175, + "step": 105984 + }, + { + "epoch": 1.27, + "grad_norm": 6.58388886311109, + "learning_rate": 6.143815835286648e-06, + "loss": 1.6182, + "step": 105987 + }, + { + "epoch": 1.27, + "grad_norm": 28.100713797734418, + "learning_rate": 6.143276845120133e-06, + "loss": 1.2667, + "step": 105990 + }, + { + "epoch": 1.27, + "grad_norm": 10.778795591829692, + "learning_rate": 6.142737868115227e-06, + "loss": 0.9737, + "step": 105993 + }, + { + "epoch": 1.27, + "grad_norm": 17.765305147956532, + "learning_rate": 6.1421989042737616e-06, + "loss": 1.273, + "step": 105996 + }, + { + "epoch": 1.27, + "grad_norm": 10.469957985512478, + "learning_rate": 6.141659953597583e-06, + "loss": 1.5354, + "step": 105999 + }, + { + "epoch": 1.27, + "grad_norm": 23.268756203433338, + "learning_rate": 6.141121016088531e-06, + "loss": 0.89, + "step": 106002 + }, + { + "epoch": 1.27, + "grad_norm": 11.323014459075244, + "learning_rate": 6.140582091748445e-06, + "loss": 1.3676, + "step": 106005 + }, + { + "epoch": 1.27, + "grad_norm": 14.381081483088616, + "learning_rate": 6.140043180579158e-06, + "loss": 1.414, + "step": 106008 + }, + { + "epoch": 1.27, + "grad_norm": 13.797794584339709, + "learning_rate": 6.139504282582513e-06, + "loss": 1.3235, + "step": 106011 + }, + { + "epoch": 1.27, + "grad_norm": 5.1916616389023496, + "learning_rate": 6.1389653977603516e-06, + "loss": 0.8056, + "step": 106014 + }, + { + "epoch": 1.27, + "grad_norm": 11.061998988128172, + "learning_rate": 6.138426526114508e-06, + "loss": 1.2181, + "step": 106017 + }, + { + "epoch": 1.27, + "grad_norm": 2.5410568652293875, + "learning_rate": 6.1378876676468295e-06, + "loss": 0.9833, + "step": 106020 + }, + { + "epoch": 1.27, + "grad_norm": 8.817663547691046, + "learning_rate": 6.137348822359144e-06, + "loss": 1.086, + "step": 106023 + }, + { + "epoch": 1.27, + "grad_norm": 7.382577529407804, + "learning_rate": 6.136809990253297e-06, + "loss": 1.0899, + "step": 106026 + }, + { + "epoch": 1.27, + "grad_norm": 5.2575119856210515, + "learning_rate": 6.136271171331125e-06, + "loss": 1.067, + "step": 106029 + }, + { + "epoch": 1.28, + "grad_norm": 2.845182395531437, + "learning_rate": 6.135732365594471e-06, + "loss": 1.3344, + "step": 106032 + }, + { + "epoch": 1.28, + "grad_norm": 15.7410899676157, + "learning_rate": 6.135193573045169e-06, + "loss": 1.35, + "step": 106035 + }, + { + "epoch": 1.28, + "grad_norm": 6.19638202474795, + "learning_rate": 6.134654793685058e-06, + "loss": 1.1155, + "step": 106038 + }, + { + "epoch": 1.28, + "grad_norm": 18.54133327969964, + "learning_rate": 6.134116027515977e-06, + "loss": 1.152, + "step": 106041 + }, + { + "epoch": 1.28, + "grad_norm": 6.456253034756095, + "learning_rate": 6.133577274539768e-06, + "loss": 1.2189, + "step": 106044 + }, + { + "epoch": 1.28, + "grad_norm": 9.958964269524238, + "learning_rate": 6.133038534758269e-06, + "loss": 1.4773, + "step": 106047 + }, + { + "epoch": 1.28, + "grad_norm": 8.88163066819657, + "learning_rate": 6.132499808173311e-06, + "loss": 1.2344, + "step": 106050 + }, + { + "epoch": 1.28, + "grad_norm": 12.807306508523777, + "learning_rate": 6.13196109478674e-06, + "loss": 0.9553, + "step": 106053 + }, + { + "epoch": 1.28, + "grad_norm": 100.33018080679584, + "learning_rate": 6.131422394600395e-06, + "loss": 1.0009, + "step": 106056 + }, + { + "epoch": 1.28, + "grad_norm": 31.26639825119275, + "learning_rate": 6.130883707616112e-06, + "loss": 0.9414, + "step": 106059 + }, + { + "epoch": 1.28, + "grad_norm": 5.6067983666817325, + "learning_rate": 6.1303450338357265e-06, + "loss": 1.026, + "step": 106062 + }, + { + "epoch": 1.28, + "grad_norm": 18.668715692271498, + "learning_rate": 6.129806373261079e-06, + "loss": 1.0464, + "step": 106065 + }, + { + "epoch": 1.28, + "grad_norm": 11.624255682670714, + "learning_rate": 6.129267725894011e-06, + "loss": 1.3796, + "step": 106068 + }, + { + "epoch": 1.28, + "grad_norm": 8.182308036806601, + "learning_rate": 6.1287290917363554e-06, + "loss": 1.4983, + "step": 106071 + }, + { + "epoch": 1.28, + "grad_norm": 9.057201392128432, + "learning_rate": 6.128190470789956e-06, + "loss": 0.9659, + "step": 106074 + }, + { + "epoch": 1.28, + "grad_norm": 9.47860491647399, + "learning_rate": 6.127651863056645e-06, + "loss": 1.3718, + "step": 106077 + }, + { + "epoch": 1.28, + "grad_norm": 20.331544191683694, + "learning_rate": 6.127113268538266e-06, + "loss": 0.9869, + "step": 106080 + }, + { + "epoch": 1.28, + "grad_norm": 7.044873506865194, + "learning_rate": 6.126574687236653e-06, + "loss": 1.3315, + "step": 106083 + }, + { + "epoch": 1.28, + "grad_norm": 6.365173671805208, + "learning_rate": 6.126036119153646e-06, + "loss": 1.1727, + "step": 106086 + }, + { + "epoch": 1.28, + "grad_norm": 4.4246045092120525, + "learning_rate": 6.125497564291084e-06, + "loss": 1.1229, + "step": 106089 + }, + { + "epoch": 1.28, + "grad_norm": 25.993808032536954, + "learning_rate": 6.1249590226507996e-06, + "loss": 0.8, + "step": 106092 + }, + { + "epoch": 1.28, + "grad_norm": 9.599886963399316, + "learning_rate": 6.124420494234634e-06, + "loss": 1.0772, + "step": 106095 + }, + { + "epoch": 1.28, + "grad_norm": 14.383808355878633, + "learning_rate": 6.1238819790444285e-06, + "loss": 0.9626, + "step": 106098 + }, + { + "epoch": 1.28, + "grad_norm": 10.094364647173522, + "learning_rate": 6.123343477082018e-06, + "loss": 1.1826, + "step": 106101 + }, + { + "epoch": 1.28, + "grad_norm": 3.2354737818888544, + "learning_rate": 6.1228049883492376e-06, + "loss": 1.1666, + "step": 106104 + }, + { + "epoch": 1.28, + "grad_norm": 17.194451495968007, + "learning_rate": 6.122266512847925e-06, + "loss": 1.3692, + "step": 106107 + }, + { + "epoch": 1.28, + "grad_norm": 17.17485425088104, + "learning_rate": 6.121728050579925e-06, + "loss": 1.1504, + "step": 106110 + }, + { + "epoch": 1.28, + "grad_norm": 15.432215848494366, + "learning_rate": 6.1211896015470704e-06, + "loss": 1.3017, + "step": 106113 + }, + { + "epoch": 1.28, + "grad_norm": 8.144461180346337, + "learning_rate": 6.120651165751195e-06, + "loss": 0.9903, + "step": 106116 + }, + { + "epoch": 1.28, + "grad_norm": 12.739926181828737, + "learning_rate": 6.1201127431941385e-06, + "loss": 1.3957, + "step": 106119 + }, + { + "epoch": 1.28, + "grad_norm": 8.29809347442853, + "learning_rate": 6.119574333877743e-06, + "loss": 0.8086, + "step": 106122 + }, + { + "epoch": 1.28, + "grad_norm": 6.361191991285942, + "learning_rate": 6.11903593780384e-06, + "loss": 1.1555, + "step": 106125 + }, + { + "epoch": 1.28, + "grad_norm": 111.00344322788308, + "learning_rate": 6.118497554974274e-06, + "loss": 1.162, + "step": 106128 + }, + { + "epoch": 1.28, + "grad_norm": 17.040913192161977, + "learning_rate": 6.117959185390876e-06, + "loss": 1.1387, + "step": 106131 + }, + { + "epoch": 1.28, + "grad_norm": 6.788160875238023, + "learning_rate": 6.1174208290554825e-06, + "loss": 1.214, + "step": 106134 + }, + { + "epoch": 1.28, + "grad_norm": 10.492630583483354, + "learning_rate": 6.116882485969933e-06, + "loss": 0.9356, + "step": 106137 + }, + { + "epoch": 1.28, + "grad_norm": 10.842929154898062, + "learning_rate": 6.116344156136069e-06, + "loss": 1.0984, + "step": 106140 + }, + { + "epoch": 1.28, + "grad_norm": 30.137473948834646, + "learning_rate": 6.115805839555722e-06, + "loss": 1.0061, + "step": 106143 + }, + { + "epoch": 1.28, + "grad_norm": 12.735400296475959, + "learning_rate": 6.115267536230726e-06, + "loss": 1.0711, + "step": 106146 + }, + { + "epoch": 1.28, + "grad_norm": 9.645876140096993, + "learning_rate": 6.114729246162926e-06, + "loss": 1.2962, + "step": 106149 + }, + { + "epoch": 1.28, + "grad_norm": 29.59349913793639, + "learning_rate": 6.114190969354157e-06, + "loss": 1.211, + "step": 106152 + }, + { + "epoch": 1.28, + "grad_norm": 7.4024419008535, + "learning_rate": 6.113652705806255e-06, + "loss": 1.5043, + "step": 106155 + }, + { + "epoch": 1.28, + "grad_norm": 7.036662282703152, + "learning_rate": 6.1131144555210544e-06, + "loss": 1.1833, + "step": 106158 + }, + { + "epoch": 1.28, + "grad_norm": 18.222160599113856, + "learning_rate": 6.112576218500392e-06, + "loss": 1.1093, + "step": 106161 + }, + { + "epoch": 1.28, + "grad_norm": 12.258708347857594, + "learning_rate": 6.11203799474611e-06, + "loss": 0.9074, + "step": 106164 + }, + { + "epoch": 1.28, + "grad_norm": 5.711052413620394, + "learning_rate": 6.111499784260044e-06, + "loss": 0.8329, + "step": 106167 + }, + { + "epoch": 1.28, + "grad_norm": 8.365286965538985, + "learning_rate": 6.110961587044024e-06, + "loss": 0.7733, + "step": 106170 + }, + { + "epoch": 1.28, + "grad_norm": 14.663481936500384, + "learning_rate": 6.110423403099894e-06, + "loss": 1.2268, + "step": 106173 + }, + { + "epoch": 1.28, + "grad_norm": 4.111743383088893, + "learning_rate": 6.109885232429486e-06, + "loss": 1.2287, + "step": 106176 + }, + { + "epoch": 1.28, + "grad_norm": 11.661659568524586, + "learning_rate": 6.109347075034638e-06, + "loss": 1.2709, + "step": 106179 + }, + { + "epoch": 1.28, + "grad_norm": 6.120745754963478, + "learning_rate": 6.108808930917191e-06, + "loss": 1.1647, + "step": 106182 + }, + { + "epoch": 1.28, + "grad_norm": 13.00712850603112, + "learning_rate": 6.108270800078974e-06, + "loss": 1.2803, + "step": 106185 + }, + { + "epoch": 1.28, + "grad_norm": 36.22900111971514, + "learning_rate": 6.107732682521827e-06, + "loss": 1.1003, + "step": 106188 + }, + { + "epoch": 1.28, + "grad_norm": 13.24885714060873, + "learning_rate": 6.107194578247586e-06, + "loss": 1.0219, + "step": 106191 + }, + { + "epoch": 1.28, + "grad_norm": 11.179961085004685, + "learning_rate": 6.106656487258092e-06, + "loss": 1.517, + "step": 106194 + }, + { + "epoch": 1.28, + "grad_norm": 9.470192465883363, + "learning_rate": 6.106118409555175e-06, + "loss": 0.8816, + "step": 106197 + }, + { + "epoch": 1.28, + "grad_norm": 12.60254687118756, + "learning_rate": 6.105580345140672e-06, + "loss": 0.916, + "step": 106200 + }, + { + "epoch": 1.28, + "grad_norm": 20.681751714732258, + "learning_rate": 6.105042294016419e-06, + "loss": 1.0761, + "step": 106203 + }, + { + "epoch": 1.28, + "grad_norm": 9.87719694463132, + "learning_rate": 6.104504256184256e-06, + "loss": 1.0736, + "step": 106206 + }, + { + "epoch": 1.28, + "grad_norm": 15.607472308232284, + "learning_rate": 6.103966231646019e-06, + "loss": 1.0983, + "step": 106209 + }, + { + "epoch": 1.28, + "grad_norm": 5.525592238292855, + "learning_rate": 6.103428220403538e-06, + "loss": 1.2078, + "step": 106212 + }, + { + "epoch": 1.28, + "grad_norm": 13.102357086671768, + "learning_rate": 6.102890222458653e-06, + "loss": 1.3107, + "step": 106215 + }, + { + "epoch": 1.28, + "grad_norm": 7.822407634661743, + "learning_rate": 6.1023522378132e-06, + "loss": 1.442, + "step": 106218 + }, + { + "epoch": 1.28, + "grad_norm": 9.673348418605888, + "learning_rate": 6.1018142664690196e-06, + "loss": 1.4063, + "step": 106221 + }, + { + "epoch": 1.28, + "grad_norm": 25.88005589679436, + "learning_rate": 6.101276308427938e-06, + "loss": 1.4813, + "step": 106224 + }, + { + "epoch": 1.28, + "grad_norm": 3.470143623488786, + "learning_rate": 6.100738363691798e-06, + "loss": 1.2811, + "step": 106227 + }, + { + "epoch": 1.28, + "grad_norm": 4.181712574187768, + "learning_rate": 6.100200432262432e-06, + "loss": 1.0628, + "step": 106230 + }, + { + "epoch": 1.28, + "grad_norm": 11.085925336548632, + "learning_rate": 6.099662514141677e-06, + "loss": 1.2107, + "step": 106233 + }, + { + "epoch": 1.28, + "grad_norm": 10.571011297956533, + "learning_rate": 6.099124609331372e-06, + "loss": 1.2497, + "step": 106236 + }, + { + "epoch": 1.28, + "grad_norm": 4.3506580483116455, + "learning_rate": 6.098586717833348e-06, + "loss": 1.2457, + "step": 106239 + }, + { + "epoch": 1.28, + "grad_norm": 4.923035319541353, + "learning_rate": 6.098048839649442e-06, + "loss": 0.8862, + "step": 106242 + }, + { + "epoch": 1.28, + "grad_norm": 6.377859456243837, + "learning_rate": 6.097510974781488e-06, + "loss": 1.4861, + "step": 106245 + }, + { + "epoch": 1.28, + "grad_norm": 10.33916932888887, + "learning_rate": 6.096973123231327e-06, + "loss": 1.053, + "step": 106248 + }, + { + "epoch": 1.28, + "grad_norm": 8.018378933083955, + "learning_rate": 6.09643528500079e-06, + "loss": 1.2465, + "step": 106251 + }, + { + "epoch": 1.28, + "grad_norm": 5.996328850649027, + "learning_rate": 6.095897460091711e-06, + "loss": 1.261, + "step": 106254 + }, + { + "epoch": 1.28, + "grad_norm": 8.897398258148703, + "learning_rate": 6.095359648505927e-06, + "loss": 1.0401, + "step": 106257 + }, + { + "epoch": 1.28, + "grad_norm": 10.814196075432134, + "learning_rate": 6.094821850245277e-06, + "loss": 1.1538, + "step": 106260 + }, + { + "epoch": 1.28, + "grad_norm": 10.401865838954722, + "learning_rate": 6.094284065311595e-06, + "loss": 1.0474, + "step": 106263 + }, + { + "epoch": 1.28, + "grad_norm": 4.161303617767943, + "learning_rate": 6.093746293706712e-06, + "loss": 1.1188, + "step": 106266 + }, + { + "epoch": 1.28, + "grad_norm": 4.913154820103684, + "learning_rate": 6.093208535432467e-06, + "loss": 0.9439, + "step": 106269 + }, + { + "epoch": 1.28, + "grad_norm": 18.39159427046319, + "learning_rate": 6.0926707904906915e-06, + "loss": 1.1156, + "step": 106272 + }, + { + "epoch": 1.28, + "grad_norm": 2.1574745518053704, + "learning_rate": 6.0921330588832274e-06, + "loss": 1.2301, + "step": 106275 + }, + { + "epoch": 1.28, + "grad_norm": 2.832470861251944, + "learning_rate": 6.091595340611901e-06, + "loss": 1.1071, + "step": 106278 + }, + { + "epoch": 1.28, + "grad_norm": 13.253555324109238, + "learning_rate": 6.0910576356785545e-06, + "loss": 1.1739, + "step": 106281 + }, + { + "epoch": 1.28, + "grad_norm": 3.2931972740891178, + "learning_rate": 6.090519944085018e-06, + "loss": 1.4487, + "step": 106284 + }, + { + "epoch": 1.28, + "grad_norm": 13.917288112834523, + "learning_rate": 6.089982265833129e-06, + "loss": 1.577, + "step": 106287 + }, + { + "epoch": 1.28, + "grad_norm": 19.622570260816232, + "learning_rate": 6.089444600924726e-06, + "loss": 1.3764, + "step": 106290 + }, + { + "epoch": 1.28, + "grad_norm": 7.386740615129121, + "learning_rate": 6.088906949361638e-06, + "loss": 1.242, + "step": 106293 + }, + { + "epoch": 1.28, + "grad_norm": 4.745444625137405, + "learning_rate": 6.088369311145701e-06, + "loss": 1.4597, + "step": 106296 + }, + { + "epoch": 1.28, + "grad_norm": 9.424834365433444, + "learning_rate": 6.087831686278749e-06, + "loss": 1.0734, + "step": 106299 + }, + { + "epoch": 1.28, + "grad_norm": 12.014381455772837, + "learning_rate": 6.0872940747626215e-06, + "loss": 1.3155, + "step": 106302 + }, + { + "epoch": 1.28, + "grad_norm": 6.216358350827337, + "learning_rate": 6.0867564765991504e-06, + "loss": 0.848, + "step": 106305 + }, + { + "epoch": 1.28, + "grad_norm": 82.6917158887355, + "learning_rate": 6.086218891790168e-06, + "loss": 1.1935, + "step": 106308 + }, + { + "epoch": 1.28, + "grad_norm": 4.457196819522843, + "learning_rate": 6.085681320337511e-06, + "loss": 1.188, + "step": 106311 + }, + { + "epoch": 1.28, + "grad_norm": 11.04891802805755, + "learning_rate": 6.085143762243012e-06, + "loss": 0.9862, + "step": 106314 + }, + { + "epoch": 1.28, + "grad_norm": 22.547961225985897, + "learning_rate": 6.0846062175085105e-06, + "loss": 1.5231, + "step": 106317 + }, + { + "epoch": 1.28, + "grad_norm": 5.75506457258934, + "learning_rate": 6.084068686135835e-06, + "loss": 1.3826, + "step": 106320 + }, + { + "epoch": 1.28, + "grad_norm": 30.530549659121114, + "learning_rate": 6.083531168126825e-06, + "loss": 1.125, + "step": 106323 + }, + { + "epoch": 1.28, + "grad_norm": 10.673019050818132, + "learning_rate": 6.08299366348331e-06, + "loss": 1.0788, + "step": 106326 + }, + { + "epoch": 1.28, + "grad_norm": 24.51632476221215, + "learning_rate": 6.08245617220713e-06, + "loss": 0.9107, + "step": 106329 + }, + { + "epoch": 1.28, + "grad_norm": 11.21919599045777, + "learning_rate": 6.081918694300113e-06, + "loss": 1.4973, + "step": 106332 + }, + { + "epoch": 1.28, + "grad_norm": 7.36495377922803, + "learning_rate": 6.081381229764096e-06, + "loss": 1.2763, + "step": 106335 + }, + { + "epoch": 1.28, + "grad_norm": 3.5554368755265497, + "learning_rate": 6.080843778600914e-06, + "loss": 1.3785, + "step": 106338 + }, + { + "epoch": 1.28, + "grad_norm": 5.138163473418203, + "learning_rate": 6.080306340812398e-06, + "loss": 1.3156, + "step": 106341 + }, + { + "epoch": 1.28, + "grad_norm": 20.72812963212718, + "learning_rate": 6.079768916400392e-06, + "loss": 1.2743, + "step": 106344 + }, + { + "epoch": 1.28, + "grad_norm": 10.894260783030266, + "learning_rate": 6.079231505366718e-06, + "loss": 0.9796, + "step": 106347 + }, + { + "epoch": 1.28, + "grad_norm": 5.705909880478663, + "learning_rate": 6.0786941077132134e-06, + "loss": 0.849, + "step": 106350 + }, + { + "epoch": 1.28, + "grad_norm": 7.3361959421769365, + "learning_rate": 6.078156723441715e-06, + "loss": 0.9579, + "step": 106353 + }, + { + "epoch": 1.28, + "grad_norm": 21.781002022306524, + "learning_rate": 6.077619352554056e-06, + "loss": 1.1633, + "step": 106356 + }, + { + "epoch": 1.28, + "grad_norm": 18.434920166125714, + "learning_rate": 6.077081995052069e-06, + "loss": 1.4093, + "step": 106359 + }, + { + "epoch": 1.28, + "grad_norm": 14.350553531257594, + "learning_rate": 6.076544650937586e-06, + "loss": 1.2987, + "step": 106362 + }, + { + "epoch": 1.28, + "grad_norm": 5.479183958541839, + "learning_rate": 6.0760073202124446e-06, + "loss": 0.94, + "step": 106365 + }, + { + "epoch": 1.28, + "grad_norm": 8.28943225928434, + "learning_rate": 6.075470002878474e-06, + "loss": 0.8212, + "step": 106368 + }, + { + "epoch": 1.28, + "grad_norm": 14.48731610648964, + "learning_rate": 6.0749326989375154e-06, + "loss": 1.4984, + "step": 106371 + }, + { + "epoch": 1.28, + "grad_norm": 8.954880723111584, + "learning_rate": 6.074395408391394e-06, + "loss": 1.2275, + "step": 106374 + }, + { + "epoch": 1.28, + "grad_norm": 8.0430452970201, + "learning_rate": 6.07385813124195e-06, + "loss": 1.0778, + "step": 106377 + }, + { + "epoch": 1.28, + "grad_norm": 12.37195459171385, + "learning_rate": 6.07332086749101e-06, + "loss": 1.1194, + "step": 106380 + }, + { + "epoch": 1.28, + "grad_norm": 14.489718078718225, + "learning_rate": 6.072783617140417e-06, + "loss": 1.1882, + "step": 106383 + }, + { + "epoch": 1.28, + "grad_norm": 2.4874932446088898, + "learning_rate": 6.072246380191994e-06, + "loss": 0.9592, + "step": 106386 + }, + { + "epoch": 1.28, + "grad_norm": 5.753907251642224, + "learning_rate": 6.071709156647582e-06, + "loss": 1.5087, + "step": 106389 + }, + { + "epoch": 1.28, + "grad_norm": 2.628996132862126, + "learning_rate": 6.071171946509009e-06, + "loss": 1.232, + "step": 106392 + }, + { + "epoch": 1.28, + "grad_norm": 12.684005015475751, + "learning_rate": 6.070634749778111e-06, + "loss": 1.3995, + "step": 106395 + }, + { + "epoch": 1.28, + "grad_norm": 5.309217755108022, + "learning_rate": 6.070097566456725e-06, + "loss": 1.1023, + "step": 106398 + }, + { + "epoch": 1.28, + "grad_norm": 6.771182104636598, + "learning_rate": 6.0695603965466795e-06, + "loss": 1.1657, + "step": 106401 + }, + { + "epoch": 1.28, + "grad_norm": 8.650291278693379, + "learning_rate": 6.0690232400498075e-06, + "loss": 1.2523, + "step": 106404 + }, + { + "epoch": 1.28, + "grad_norm": 16.555191111370377, + "learning_rate": 6.068486096967943e-06, + "loss": 1.2667, + "step": 106407 + }, + { + "epoch": 1.28, + "grad_norm": 4.34424128115169, + "learning_rate": 6.067948967302922e-06, + "loss": 1.1374, + "step": 106410 + }, + { + "epoch": 1.28, + "grad_norm": 19.360491504759473, + "learning_rate": 6.067411851056571e-06, + "loss": 1.2377, + "step": 106413 + }, + { + "epoch": 1.28, + "grad_norm": 2.379955610429534, + "learning_rate": 6.066874748230728e-06, + "loss": 1.3377, + "step": 106416 + }, + { + "epoch": 1.28, + "grad_norm": 2.711790629701405, + "learning_rate": 6.066337658827226e-06, + "loss": 0.9935, + "step": 106419 + }, + { + "epoch": 1.28, + "grad_norm": 7.459984755677153, + "learning_rate": 6.065800582847894e-06, + "loss": 1.2694, + "step": 106422 + }, + { + "epoch": 1.28, + "grad_norm": 13.134922828634764, + "learning_rate": 6.065263520294572e-06, + "loss": 1.1257, + "step": 106425 + }, + { + "epoch": 1.28, + "grad_norm": 19.64191536666894, + "learning_rate": 6.0647264711690844e-06, + "loss": 1.3365, + "step": 106428 + }, + { + "epoch": 1.28, + "grad_norm": 9.154974001478358, + "learning_rate": 6.064189435473269e-06, + "loss": 1.3686, + "step": 106431 + }, + { + "epoch": 1.28, + "grad_norm": 3.882675572298094, + "learning_rate": 6.0636524132089555e-06, + "loss": 0.9311, + "step": 106434 + }, + { + "epoch": 1.28, + "grad_norm": 5.758323560331605, + "learning_rate": 6.063115404377984e-06, + "loss": 1.4569, + "step": 106437 + }, + { + "epoch": 1.28, + "grad_norm": 9.48153352025718, + "learning_rate": 6.062578408982176e-06, + "loss": 1.2521, + "step": 106440 + }, + { + "epoch": 1.28, + "grad_norm": 17.4302962521696, + "learning_rate": 6.062041427023373e-06, + "loss": 1.4827, + "step": 106443 + }, + { + "epoch": 1.28, + "grad_norm": 7.760294223958815, + "learning_rate": 6.061504458503402e-06, + "loss": 1.1841, + "step": 106446 + }, + { + "epoch": 1.28, + "grad_norm": 14.417611443433774, + "learning_rate": 6.060967503424098e-06, + "loss": 1.4161, + "step": 106449 + }, + { + "epoch": 1.28, + "grad_norm": 13.331230889845425, + "learning_rate": 6.060430561787296e-06, + "loss": 1.0256, + "step": 106452 + }, + { + "epoch": 1.28, + "grad_norm": 6.6076697624308, + "learning_rate": 6.059893633594822e-06, + "loss": 1.5122, + "step": 106455 + }, + { + "epoch": 1.28, + "grad_norm": 18.771784564039766, + "learning_rate": 6.059356718848513e-06, + "loss": 1.3041, + "step": 106458 + }, + { + "epoch": 1.28, + "grad_norm": 4.604573966586857, + "learning_rate": 6.058819817550199e-06, + "loss": 0.8917, + "step": 106461 + }, + { + "epoch": 1.28, + "grad_norm": 4.914629788474408, + "learning_rate": 6.058282929701718e-06, + "loss": 1.1434, + "step": 106464 + }, + { + "epoch": 1.28, + "grad_norm": 9.258651772755634, + "learning_rate": 6.0577460553048915e-06, + "loss": 1.289, + "step": 106467 + }, + { + "epoch": 1.28, + "grad_norm": 7.410885773002207, + "learning_rate": 6.05720919436156e-06, + "loss": 1.1951, + "step": 106470 + }, + { + "epoch": 1.28, + "grad_norm": 12.04306569625375, + "learning_rate": 6.056672346873553e-06, + "loss": 0.97, + "step": 106473 + }, + { + "epoch": 1.28, + "grad_norm": 6.025208617231858, + "learning_rate": 6.0561355128427025e-06, + "loss": 1.1576, + "step": 106476 + }, + { + "epoch": 1.28, + "grad_norm": 7.313872629748014, + "learning_rate": 6.055598692270843e-06, + "loss": 1.2922, + "step": 106479 + }, + { + "epoch": 1.28, + "grad_norm": 13.79025372470923, + "learning_rate": 6.055061885159803e-06, + "loss": 1.1788, + "step": 106482 + }, + { + "epoch": 1.28, + "grad_norm": 17.752041923270053, + "learning_rate": 6.054525091511416e-06, + "loss": 1.3687, + "step": 106485 + }, + { + "epoch": 1.28, + "grad_norm": 6.753500709402286, + "learning_rate": 6.0539883113275125e-06, + "loss": 1.1927, + "step": 106488 + }, + { + "epoch": 1.28, + "grad_norm": 6.113827010752445, + "learning_rate": 6.05345154460993e-06, + "loss": 1.2708, + "step": 106491 + }, + { + "epoch": 1.28, + "grad_norm": 4.1476420844958914, + "learning_rate": 6.0529147913604905e-06, + "loss": 1.343, + "step": 106494 + }, + { + "epoch": 1.28, + "grad_norm": 7.9317182921887355, + "learning_rate": 6.052378051581034e-06, + "loss": 1.1522, + "step": 106497 + }, + { + "epoch": 1.28, + "grad_norm": 12.110768975408932, + "learning_rate": 6.051841325273387e-06, + "loss": 1.4929, + "step": 106500 + }, + { + "epoch": 1.28, + "grad_norm": 4.254882715146746, + "learning_rate": 6.051304612439385e-06, + "loss": 1.0289, + "step": 106503 + }, + { + "epoch": 1.28, + "grad_norm": 7.452976781791153, + "learning_rate": 6.050767913080861e-06, + "loss": 1.2478, + "step": 106506 + }, + { + "epoch": 1.28, + "grad_norm": 31.7296804463662, + "learning_rate": 6.050231227199639e-06, + "loss": 1.2914, + "step": 106509 + }, + { + "epoch": 1.28, + "grad_norm": 15.219742778634007, + "learning_rate": 6.049694554797555e-06, + "loss": 0.6544, + "step": 106512 + }, + { + "epoch": 1.28, + "grad_norm": 11.58414071702725, + "learning_rate": 6.049157895876443e-06, + "loss": 0.9737, + "step": 106515 + }, + { + "epoch": 1.28, + "grad_norm": 7.977589043558917, + "learning_rate": 6.0486212504381335e-06, + "loss": 1.4375, + "step": 106518 + }, + { + "epoch": 1.28, + "grad_norm": 8.500480201997313, + "learning_rate": 6.048084618484453e-06, + "loss": 0.8309, + "step": 106521 + }, + { + "epoch": 1.28, + "grad_norm": 5.251727621057967, + "learning_rate": 6.047548000017236e-06, + "loss": 1.2775, + "step": 106524 + }, + { + "epoch": 1.28, + "grad_norm": 13.128392269932094, + "learning_rate": 6.047011395038316e-06, + "loss": 1.4938, + "step": 106527 + }, + { + "epoch": 1.28, + "grad_norm": 4.55228043521285, + "learning_rate": 6.04647480354952e-06, + "loss": 1.2496, + "step": 106530 + }, + { + "epoch": 1.28, + "grad_norm": 25.69703093300969, + "learning_rate": 6.045938225552686e-06, + "loss": 0.9481, + "step": 106533 + }, + { + "epoch": 1.28, + "grad_norm": 7.290631736087504, + "learning_rate": 6.045401661049636e-06, + "loss": 1.4052, + "step": 106536 + }, + { + "epoch": 1.28, + "grad_norm": 14.375980212572458, + "learning_rate": 6.044865110042209e-06, + "loss": 1.2603, + "step": 106539 + }, + { + "epoch": 1.28, + "grad_norm": 6.627248038208554, + "learning_rate": 6.044328572532228e-06, + "loss": 1.0326, + "step": 106542 + }, + { + "epoch": 1.28, + "grad_norm": 9.759545411026362, + "learning_rate": 6.043792048521535e-06, + "loss": 1.4459, + "step": 106545 + }, + { + "epoch": 1.28, + "grad_norm": 6.915347966384042, + "learning_rate": 6.043255538011952e-06, + "loss": 0.8229, + "step": 106548 + }, + { + "epoch": 1.28, + "grad_norm": 12.433375523801958, + "learning_rate": 6.042719041005311e-06, + "loss": 1.1916, + "step": 106551 + }, + { + "epoch": 1.28, + "grad_norm": 7.097088759309612, + "learning_rate": 6.042182557503445e-06, + "loss": 1.5337, + "step": 106554 + }, + { + "epoch": 1.28, + "grad_norm": 7.916280115897051, + "learning_rate": 6.041646087508184e-06, + "loss": 1.4115, + "step": 106557 + }, + { + "epoch": 1.28, + "grad_norm": 17.444123450018246, + "learning_rate": 6.041109631021363e-06, + "loss": 0.9177, + "step": 106560 + }, + { + "epoch": 1.28, + "grad_norm": 14.39372234733089, + "learning_rate": 6.040573188044805e-06, + "loss": 1.492, + "step": 106563 + }, + { + "epoch": 1.28, + "grad_norm": 3.3927249425792625, + "learning_rate": 6.040036758580343e-06, + "loss": 1.3877, + "step": 106566 + }, + { + "epoch": 1.28, + "grad_norm": 23.709256309815242, + "learning_rate": 6.039500342629814e-06, + "loss": 1.1721, + "step": 106569 + }, + { + "epoch": 1.28, + "grad_norm": 18.3084640324761, + "learning_rate": 6.038963940195044e-06, + "loss": 1.07, + "step": 106572 + }, + { + "epoch": 1.28, + "grad_norm": 205.5653372588094, + "learning_rate": 6.038427551277859e-06, + "loss": 1.1968, + "step": 106575 + }, + { + "epoch": 1.28, + "grad_norm": 12.478090833892784, + "learning_rate": 6.037891175880094e-06, + "loss": 1.1379, + "step": 106578 + }, + { + "epoch": 1.28, + "grad_norm": 9.949023246495313, + "learning_rate": 6.037354814003582e-06, + "loss": 1.5141, + "step": 106581 + }, + { + "epoch": 1.28, + "grad_norm": 4.321554118233519, + "learning_rate": 6.036818465650147e-06, + "loss": 1.0112, + "step": 106584 + }, + { + "epoch": 1.28, + "grad_norm": 12.762655474797224, + "learning_rate": 6.036282130821629e-06, + "loss": 1.4046, + "step": 106587 + }, + { + "epoch": 1.28, + "grad_norm": 6.167673402116169, + "learning_rate": 6.035745809519849e-06, + "loss": 1.1116, + "step": 106590 + }, + { + "epoch": 1.28, + "grad_norm": 6.481049056532838, + "learning_rate": 6.035209501746639e-06, + "loss": 1.2438, + "step": 106593 + }, + { + "epoch": 1.28, + "grad_norm": 3.7241491849601522, + "learning_rate": 6.034673207503832e-06, + "loss": 1.5361, + "step": 106596 + }, + { + "epoch": 1.28, + "grad_norm": 9.87937195531063, + "learning_rate": 6.034136926793259e-06, + "loss": 0.9923, + "step": 106599 + }, + { + "epoch": 1.28, + "grad_norm": 29.492577844651887, + "learning_rate": 6.033600659616748e-06, + "loss": 1.3701, + "step": 106602 + }, + { + "epoch": 1.28, + "grad_norm": 9.28985667707134, + "learning_rate": 6.033064405976125e-06, + "loss": 1.0422, + "step": 106605 + }, + { + "epoch": 1.28, + "grad_norm": 12.614876106673734, + "learning_rate": 6.032528165873227e-06, + "loss": 1.0815, + "step": 106608 + }, + { + "epoch": 1.28, + "grad_norm": 17.795768264864222, + "learning_rate": 6.0319919393098824e-06, + "loss": 1.3731, + "step": 106611 + }, + { + "epoch": 1.28, + "grad_norm": 4.811720476276468, + "learning_rate": 6.031455726287921e-06, + "loss": 1.1453, + "step": 106614 + }, + { + "epoch": 1.28, + "grad_norm": 11.823964614556548, + "learning_rate": 6.030919526809169e-06, + "loss": 1.4091, + "step": 106617 + }, + { + "epoch": 1.28, + "grad_norm": 13.995217398327751, + "learning_rate": 6.0303833408754585e-06, + "loss": 1.0483, + "step": 106620 + }, + { + "epoch": 1.28, + "grad_norm": 10.539736814882462, + "learning_rate": 6.029847168488621e-06, + "loss": 1.0773, + "step": 106623 + }, + { + "epoch": 1.28, + "grad_norm": 5.36220054602026, + "learning_rate": 6.02931100965049e-06, + "loss": 1.1653, + "step": 106626 + }, + { + "epoch": 1.28, + "grad_norm": 8.204917844815846, + "learning_rate": 6.0287748643628825e-06, + "loss": 1.1869, + "step": 106629 + }, + { + "epoch": 1.28, + "grad_norm": 5.012854712087031, + "learning_rate": 6.028238732627638e-06, + "loss": 1.3408, + "step": 106632 + }, + { + "epoch": 1.28, + "grad_norm": 6.3945911025252435, + "learning_rate": 6.027702614446586e-06, + "loss": 1.5172, + "step": 106635 + }, + { + "epoch": 1.28, + "grad_norm": 26.48776982413174, + "learning_rate": 6.027166509821552e-06, + "loss": 1.1295, + "step": 106638 + }, + { + "epoch": 1.28, + "grad_norm": 12.657225491857082, + "learning_rate": 6.026630418754371e-06, + "loss": 1.1101, + "step": 106641 + }, + { + "epoch": 1.28, + "grad_norm": 6.207303608907748, + "learning_rate": 6.026094341246867e-06, + "loss": 0.9243, + "step": 106644 + }, + { + "epoch": 1.28, + "grad_norm": 14.361365434495992, + "learning_rate": 6.025558277300872e-06, + "loss": 1.0934, + "step": 106647 + }, + { + "epoch": 1.28, + "grad_norm": 7.067082485598308, + "learning_rate": 6.025022226918212e-06, + "loss": 0.9902, + "step": 106650 + }, + { + "epoch": 1.28, + "grad_norm": 12.846017526377956, + "learning_rate": 6.024486190100725e-06, + "loss": 1.2417, + "step": 106653 + }, + { + "epoch": 1.28, + "grad_norm": 7.023412350241805, + "learning_rate": 6.023950166850232e-06, + "loss": 1.1385, + "step": 106656 + }, + { + "epoch": 1.28, + "grad_norm": 9.78741270808988, + "learning_rate": 6.023414157168563e-06, + "loss": 1.0194, + "step": 106659 + }, + { + "epoch": 1.28, + "grad_norm": 14.122519929492661, + "learning_rate": 6.02287816105755e-06, + "loss": 1.2507, + "step": 106662 + }, + { + "epoch": 1.28, + "grad_norm": 8.624642833893859, + "learning_rate": 6.022342178519023e-06, + "loss": 1.0689, + "step": 106665 + }, + { + "epoch": 1.28, + "grad_norm": 8.022112317989814, + "learning_rate": 6.0218062095548105e-06, + "loss": 1.1339, + "step": 106668 + }, + { + "epoch": 1.28, + "grad_norm": 20.80989408972209, + "learning_rate": 6.021270254166738e-06, + "loss": 1.2535, + "step": 106671 + }, + { + "epoch": 1.28, + "grad_norm": 5.815071983024109, + "learning_rate": 6.020734312356635e-06, + "loss": 1.0831, + "step": 106674 + }, + { + "epoch": 1.28, + "grad_norm": 16.118450850702818, + "learning_rate": 6.020198384126337e-06, + "loss": 0.9723, + "step": 106677 + }, + { + "epoch": 1.28, + "grad_norm": 21.14610452810846, + "learning_rate": 6.0196624694776676e-06, + "loss": 1.1645, + "step": 106680 + }, + { + "epoch": 1.28, + "grad_norm": 11.049817981720661, + "learning_rate": 6.0191265684124546e-06, + "loss": 1.1448, + "step": 106683 + }, + { + "epoch": 1.28, + "grad_norm": 6.075902018722932, + "learning_rate": 6.01859068093253e-06, + "loss": 1.6652, + "step": 106686 + }, + { + "epoch": 1.28, + "grad_norm": 16.383722563248398, + "learning_rate": 6.018054807039719e-06, + "loss": 1.4429, + "step": 106689 + }, + { + "epoch": 1.28, + "grad_norm": 13.244444639095942, + "learning_rate": 6.017518946735852e-06, + "loss": 1.1736, + "step": 106692 + }, + { + "epoch": 1.28, + "grad_norm": 12.461178127275502, + "learning_rate": 6.016983100022764e-06, + "loss": 1.1255, + "step": 106695 + }, + { + "epoch": 1.28, + "grad_norm": 6.419055034154156, + "learning_rate": 6.016447266902275e-06, + "loss": 1.4244, + "step": 106698 + }, + { + "epoch": 1.28, + "grad_norm": 45.91627938495915, + "learning_rate": 6.015911447376215e-06, + "loss": 1.4964, + "step": 106701 + }, + { + "epoch": 1.28, + "grad_norm": 4.94782923255991, + "learning_rate": 6.0153756414464136e-06, + "loss": 1.2097, + "step": 106704 + }, + { + "epoch": 1.28, + "grad_norm": 13.267304729328337, + "learning_rate": 6.014839849114704e-06, + "loss": 1.406, + "step": 106707 + }, + { + "epoch": 1.28, + "grad_norm": 13.51673295710298, + "learning_rate": 6.01430407038291e-06, + "loss": 0.9287, + "step": 106710 + }, + { + "epoch": 1.28, + "grad_norm": 9.267360666210685, + "learning_rate": 6.013768305252856e-06, + "loss": 1.115, + "step": 106713 + }, + { + "epoch": 1.28, + "grad_norm": 7.238906281770459, + "learning_rate": 6.013232553726377e-06, + "loss": 1.6729, + "step": 106716 + }, + { + "epoch": 1.28, + "grad_norm": 45.19622336442631, + "learning_rate": 6.012696815805301e-06, + "loss": 1.1558, + "step": 106719 + }, + { + "epoch": 1.28, + "grad_norm": 12.265089811341118, + "learning_rate": 6.0121610914914555e-06, + "loss": 1.3274, + "step": 106722 + }, + { + "epoch": 1.28, + "grad_norm": 7.095109799676498, + "learning_rate": 6.011625380786665e-06, + "loss": 1.3345, + "step": 106725 + }, + { + "epoch": 1.28, + "grad_norm": 6.182856215441453, + "learning_rate": 6.011089683692761e-06, + "loss": 1.1374, + "step": 106728 + }, + { + "epoch": 1.28, + "grad_norm": 3.3057274757957695, + "learning_rate": 6.010554000211571e-06, + "loss": 1.3813, + "step": 106731 + }, + { + "epoch": 1.28, + "grad_norm": 6.243051133783874, + "learning_rate": 6.0100183303449265e-06, + "loss": 1.1289, + "step": 106734 + }, + { + "epoch": 1.28, + "grad_norm": 13.147737932554708, + "learning_rate": 6.009482674094648e-06, + "loss": 1.0886, + "step": 106737 + }, + { + "epoch": 1.28, + "grad_norm": 28.56743172752565, + "learning_rate": 6.008947031462568e-06, + "loss": 1.1123, + "step": 106740 + }, + { + "epoch": 1.28, + "grad_norm": 21.570951385897924, + "learning_rate": 6.008411402450516e-06, + "loss": 1.2664, + "step": 106743 + }, + { + "epoch": 1.28, + "grad_norm": 7.942385061052955, + "learning_rate": 6.0078757870603156e-06, + "loss": 1.2353, + "step": 106746 + }, + { + "epoch": 1.28, + "grad_norm": 9.69226081508706, + "learning_rate": 6.0073401852938015e-06, + "loss": 1.1872, + "step": 106749 + }, + { + "epoch": 1.28, + "grad_norm": 10.211465028392478, + "learning_rate": 6.006804597152796e-06, + "loss": 1.1964, + "step": 106752 + }, + { + "epoch": 1.28, + "grad_norm": 8.703246041834612, + "learning_rate": 6.006269022639124e-06, + "loss": 1.1758, + "step": 106755 + }, + { + "epoch": 1.28, + "grad_norm": 12.507620493432414, + "learning_rate": 6.005733461754619e-06, + "loss": 1.3774, + "step": 106758 + }, + { + "epoch": 1.28, + "grad_norm": 22.383064270091094, + "learning_rate": 6.005197914501111e-06, + "loss": 1.1523, + "step": 106761 + }, + { + "epoch": 1.28, + "grad_norm": 5.046687113888556, + "learning_rate": 6.004662380880422e-06, + "loss": 0.8646, + "step": 106764 + }, + { + "epoch": 1.28, + "grad_norm": 13.310745439922387, + "learning_rate": 6.004126860894377e-06, + "loss": 1.0141, + "step": 106767 + }, + { + "epoch": 1.28, + "grad_norm": 20.003833342734765, + "learning_rate": 6.003591354544811e-06, + "loss": 1.1107, + "step": 106770 + }, + { + "epoch": 1.28, + "grad_norm": 5.067862939007101, + "learning_rate": 6.0030558618335465e-06, + "loss": 1.0122, + "step": 106773 + }, + { + "epoch": 1.28, + "grad_norm": 5.188697776629745, + "learning_rate": 6.002520382762418e-06, + "loss": 1.1971, + "step": 106776 + }, + { + "epoch": 1.28, + "grad_norm": 14.630782356009039, + "learning_rate": 6.001984917333241e-06, + "loss": 1.1832, + "step": 106779 + }, + { + "epoch": 1.28, + "grad_norm": 6.1688562259318385, + "learning_rate": 6.0014494655478525e-06, + "loss": 1.0939, + "step": 106782 + }, + { + "epoch": 1.28, + "grad_norm": 11.467187484150982, + "learning_rate": 6.000914027408075e-06, + "loss": 1.1613, + "step": 106785 + }, + { + "epoch": 1.28, + "grad_norm": 5.564921799991305, + "learning_rate": 6.000378602915742e-06, + "loss": 1.4745, + "step": 106788 + }, + { + "epoch": 1.28, + "grad_norm": 3.7195086030860103, + "learning_rate": 5.99984319207267e-06, + "loss": 1.5515, + "step": 106791 + }, + { + "epoch": 1.28, + "grad_norm": 6.627527871091642, + "learning_rate": 5.999307794880695e-06, + "loss": 1.0593, + "step": 106794 + }, + { + "epoch": 1.28, + "grad_norm": 10.423365013005512, + "learning_rate": 5.9987724113416415e-06, + "loss": 1.2164, + "step": 106797 + }, + { + "epoch": 1.28, + "grad_norm": 7.3940518707069165, + "learning_rate": 5.9982370414573356e-06, + "loss": 1.463, + "step": 106800 + }, + { + "epoch": 1.28, + "grad_norm": 8.316714430941554, + "learning_rate": 5.997701685229608e-06, + "loss": 0.9363, + "step": 106803 + }, + { + "epoch": 1.28, + "grad_norm": 15.96632161473542, + "learning_rate": 5.997166342660281e-06, + "loss": 1.2472, + "step": 106806 + }, + { + "epoch": 1.28, + "grad_norm": 8.592759748629078, + "learning_rate": 5.996631013751182e-06, + "loss": 0.9532, + "step": 106809 + }, + { + "epoch": 1.28, + "grad_norm": 11.782403281449037, + "learning_rate": 5.996095698504141e-06, + "loss": 1.5167, + "step": 106812 + }, + { + "epoch": 1.28, + "grad_norm": 30.084683800585786, + "learning_rate": 5.9955603969209854e-06, + "loss": 1.2902, + "step": 106815 + }, + { + "epoch": 1.28, + "grad_norm": 5.187346904051016, + "learning_rate": 5.995025109003539e-06, + "loss": 1.0947, + "step": 106818 + }, + { + "epoch": 1.28, + "grad_norm": 13.094964191742658, + "learning_rate": 5.994489834753625e-06, + "loss": 1.1336, + "step": 106821 + }, + { + "epoch": 1.28, + "grad_norm": 6.002030297799599, + "learning_rate": 5.993954574173078e-06, + "loss": 1.3386, + "step": 106824 + }, + { + "epoch": 1.28, + "grad_norm": 8.304237561090458, + "learning_rate": 5.993419327263719e-06, + "loss": 1.1767, + "step": 106827 + }, + { + "epoch": 1.28, + "grad_norm": 7.3751183816286945, + "learning_rate": 5.99288409402738e-06, + "loss": 1.2435, + "step": 106830 + }, + { + "epoch": 1.28, + "grad_norm": 14.642848552236822, + "learning_rate": 5.992348874465881e-06, + "loss": 1.2305, + "step": 106833 + }, + { + "epoch": 1.28, + "grad_norm": 9.37357817996551, + "learning_rate": 5.991813668581054e-06, + "loss": 1.1076, + "step": 106836 + }, + { + "epoch": 1.28, + "grad_norm": 12.584941253029184, + "learning_rate": 5.99127847637472e-06, + "loss": 1.1841, + "step": 106839 + }, + { + "epoch": 1.28, + "grad_norm": 9.940638788079406, + "learning_rate": 5.990743297848713e-06, + "loss": 1.0249, + "step": 106842 + }, + { + "epoch": 1.28, + "grad_norm": 10.509309922142021, + "learning_rate": 5.990208133004851e-06, + "loss": 1.227, + "step": 106845 + }, + { + "epoch": 1.28, + "grad_norm": 14.219929364530968, + "learning_rate": 5.989672981844968e-06, + "loss": 1.1955, + "step": 106848 + }, + { + "epoch": 1.28, + "grad_norm": 9.382957960379885, + "learning_rate": 5.989137844370882e-06, + "loss": 1.0608, + "step": 106851 + }, + { + "epoch": 1.28, + "grad_norm": 8.957820875750194, + "learning_rate": 5.988602720584426e-06, + "loss": 1.1468, + "step": 106854 + }, + { + "epoch": 1.28, + "grad_norm": 6.44622825664954, + "learning_rate": 5.988067610487427e-06, + "loss": 1.2845, + "step": 106857 + }, + { + "epoch": 1.28, + "grad_norm": 5.335646494450792, + "learning_rate": 5.987532514081706e-06, + "loss": 0.9815, + "step": 106860 + }, + { + "epoch": 1.28, + "grad_norm": 11.090201144599273, + "learning_rate": 5.98699743136909e-06, + "loss": 1.1676, + "step": 106863 + }, + { + "epoch": 1.29, + "grad_norm": 5.126671377477575, + "learning_rate": 5.9864623623514075e-06, + "loss": 1.2756, + "step": 106866 + }, + { + "epoch": 1.29, + "grad_norm": 2.917321928068222, + "learning_rate": 5.985927307030485e-06, + "loss": 1.0922, + "step": 106869 + }, + { + "epoch": 1.29, + "grad_norm": 10.713861899321552, + "learning_rate": 5.985392265408143e-06, + "loss": 0.7792, + "step": 106872 + }, + { + "epoch": 1.29, + "grad_norm": 18.068763664956258, + "learning_rate": 5.984857237486212e-06, + "loss": 1.07, + "step": 106875 + }, + { + "epoch": 1.29, + "grad_norm": 6.848100363215998, + "learning_rate": 5.984322223266519e-06, + "loss": 1.0182, + "step": 106878 + }, + { + "epoch": 1.29, + "grad_norm": 36.539681123249736, + "learning_rate": 5.983787222750885e-06, + "loss": 1.165, + "step": 106881 + }, + { + "epoch": 1.29, + "grad_norm": 3.5143097861076416, + "learning_rate": 5.983252235941143e-06, + "loss": 1.1757, + "step": 106884 + }, + { + "epoch": 1.29, + "grad_norm": 14.410158765329871, + "learning_rate": 5.9827172628391094e-06, + "loss": 0.8693, + "step": 106887 + }, + { + "epoch": 1.29, + "grad_norm": 6.274632727502036, + "learning_rate": 5.9821823034466175e-06, + "loss": 1.0791, + "step": 106890 + }, + { + "epoch": 1.29, + "grad_norm": 6.551754035151482, + "learning_rate": 5.981647357765488e-06, + "loss": 1.1674, + "step": 106893 + }, + { + "epoch": 1.29, + "grad_norm": 8.773103798056658, + "learning_rate": 5.9811124257975525e-06, + "loss": 1.5334, + "step": 106896 + }, + { + "epoch": 1.29, + "grad_norm": 9.886505755526414, + "learning_rate": 5.980577507544629e-06, + "loss": 0.9177, + "step": 106899 + }, + { + "epoch": 1.29, + "grad_norm": 3.271834516879352, + "learning_rate": 5.980042603008549e-06, + "loss": 1.3547, + "step": 106902 + }, + { + "epoch": 1.29, + "grad_norm": 6.894396923021636, + "learning_rate": 5.979507712191134e-06, + "loss": 0.9303, + "step": 106905 + }, + { + "epoch": 1.29, + "grad_norm": 3.75027334928315, + "learning_rate": 5.978972835094213e-06, + "loss": 1.1781, + "step": 106908 + }, + { + "epoch": 1.29, + "grad_norm": 18.279304842264175, + "learning_rate": 5.978437971719609e-06, + "loss": 1.0954, + "step": 106911 + }, + { + "epoch": 1.29, + "grad_norm": 8.144373952443804, + "learning_rate": 5.977903122069147e-06, + "loss": 1.3882, + "step": 106914 + }, + { + "epoch": 1.29, + "grad_norm": 9.909637618786832, + "learning_rate": 5.9773682861446504e-06, + "loss": 1.3143, + "step": 106917 + }, + { + "epoch": 1.29, + "grad_norm": 5.03898586988094, + "learning_rate": 5.976833463947949e-06, + "loss": 1.1537, + "step": 106920 + }, + { + "epoch": 1.29, + "grad_norm": 4.784407533169307, + "learning_rate": 5.97629865548087e-06, + "loss": 1.215, + "step": 106923 + }, + { + "epoch": 1.29, + "grad_norm": 12.606922551972616, + "learning_rate": 5.975763860745228e-06, + "loss": 1.4003, + "step": 106926 + }, + { + "epoch": 1.29, + "grad_norm": 41.28304454758091, + "learning_rate": 5.975229079742856e-06, + "loss": 1.3711, + "step": 106929 + }, + { + "epoch": 1.29, + "grad_norm": 5.089568384681897, + "learning_rate": 5.974694312475579e-06, + "loss": 0.9105, + "step": 106932 + }, + { + "epoch": 1.29, + "grad_norm": 12.65072030761272, + "learning_rate": 5.974159558945217e-06, + "loss": 1.3304, + "step": 106935 + }, + { + "epoch": 1.29, + "grad_norm": 22.965647193817013, + "learning_rate": 5.973624819153604e-06, + "loss": 1.5507, + "step": 106938 + }, + { + "epoch": 1.29, + "grad_norm": 7.3316316714230565, + "learning_rate": 5.9730900931025545e-06, + "loss": 0.8416, + "step": 106941 + }, + { + "epoch": 1.29, + "grad_norm": 14.918055431885083, + "learning_rate": 5.972555380793901e-06, + "loss": 0.9225, + "step": 106944 + }, + { + "epoch": 1.29, + "grad_norm": 10.907328011062566, + "learning_rate": 5.972020682229462e-06, + "loss": 0.846, + "step": 106947 + }, + { + "epoch": 1.29, + "grad_norm": 6.98276842845462, + "learning_rate": 5.9714859974110704e-06, + "loss": 0.9351, + "step": 106950 + }, + { + "epoch": 1.29, + "grad_norm": 25.790713511865526, + "learning_rate": 5.970951326340542e-06, + "loss": 1.0287, + "step": 106953 + }, + { + "epoch": 1.29, + "grad_norm": 29.753120117916335, + "learning_rate": 5.970416669019706e-06, + "loss": 1.2642, + "step": 106956 + }, + { + "epoch": 1.29, + "grad_norm": 12.392541603129176, + "learning_rate": 5.969882025450387e-06, + "loss": 1.3786, + "step": 106959 + }, + { + "epoch": 1.29, + "grad_norm": 50.78793972959066, + "learning_rate": 5.969347395634408e-06, + "loss": 1.1995, + "step": 106962 + }, + { + "epoch": 1.29, + "grad_norm": 9.565571722660659, + "learning_rate": 5.968812779573598e-06, + "loss": 1.2945, + "step": 106965 + }, + { + "epoch": 1.29, + "grad_norm": 15.945755689437012, + "learning_rate": 5.968278177269775e-06, + "loss": 1.2566, + "step": 106968 + }, + { + "epoch": 1.29, + "grad_norm": 9.988675421914497, + "learning_rate": 5.967743588724766e-06, + "loss": 1.1891, + "step": 106971 + }, + { + "epoch": 1.29, + "grad_norm": 21.468321151114942, + "learning_rate": 5.967209013940396e-06, + "loss": 1.2345, + "step": 106974 + }, + { + "epoch": 1.29, + "grad_norm": 14.949779212827366, + "learning_rate": 5.966674452918493e-06, + "loss": 1.0876, + "step": 106977 + }, + { + "epoch": 1.29, + "grad_norm": 23.759799791193366, + "learning_rate": 5.966139905660873e-06, + "loss": 1.4738, + "step": 106980 + }, + { + "epoch": 1.29, + "grad_norm": 4.987156123274059, + "learning_rate": 5.965605372169363e-06, + "loss": 1.2105, + "step": 106983 + }, + { + "epoch": 1.29, + "grad_norm": 77.65375657472852, + "learning_rate": 5.965070852445791e-06, + "loss": 1.2093, + "step": 106986 + }, + { + "epoch": 1.29, + "grad_norm": 4.007599767183575, + "learning_rate": 5.9645363464919785e-06, + "loss": 0.9763, + "step": 106989 + }, + { + "epoch": 1.29, + "grad_norm": 15.401175665204395, + "learning_rate": 5.964001854309753e-06, + "loss": 1.2117, + "step": 106992 + }, + { + "epoch": 1.29, + "grad_norm": 17.152048591670212, + "learning_rate": 5.963467375900931e-06, + "loss": 1.2012, + "step": 106995 + }, + { + "epoch": 1.29, + "grad_norm": 9.948121863670197, + "learning_rate": 5.962932911267343e-06, + "loss": 1.2398, + "step": 106998 + }, + { + "epoch": 1.29, + "grad_norm": 17.326588409483747, + "learning_rate": 5.96239846041081e-06, + "loss": 1.2196, + "step": 107001 + }, + { + "epoch": 1.29, + "grad_norm": 11.91088410965351, + "learning_rate": 5.961864023333159e-06, + "loss": 1.6787, + "step": 107004 + }, + { + "epoch": 1.29, + "grad_norm": 14.14517147262033, + "learning_rate": 5.96132960003621e-06, + "loss": 1.2295, + "step": 107007 + }, + { + "epoch": 1.29, + "grad_norm": 3.2531379742562336, + "learning_rate": 5.960795190521786e-06, + "loss": 1.5648, + "step": 107010 + }, + { + "epoch": 1.29, + "grad_norm": 15.215927075075506, + "learning_rate": 5.960260794791714e-06, + "loss": 1.4015, + "step": 107013 + }, + { + "epoch": 1.29, + "grad_norm": 11.646658702247286, + "learning_rate": 5.959726412847819e-06, + "loss": 1.1494, + "step": 107016 + }, + { + "epoch": 1.29, + "grad_norm": 16.07987722118261, + "learning_rate": 5.959192044691923e-06, + "loss": 1.191, + "step": 107019 + }, + { + "epoch": 1.29, + "grad_norm": 3.1109652255069964, + "learning_rate": 5.9586576903258475e-06, + "loss": 1.1663, + "step": 107022 + }, + { + "epoch": 1.29, + "grad_norm": 5.309441564202986, + "learning_rate": 5.958123349751417e-06, + "loss": 1.1046, + "step": 107025 + }, + { + "epoch": 1.29, + "grad_norm": 3.7511737470886435, + "learning_rate": 5.9575890229704555e-06, + "loss": 1.4211, + "step": 107028 + }, + { + "epoch": 1.29, + "grad_norm": 64.38566765876817, + "learning_rate": 5.957054709984791e-06, + "loss": 1.3741, + "step": 107031 + }, + { + "epoch": 1.29, + "grad_norm": 5.005148162815409, + "learning_rate": 5.956520410796236e-06, + "loss": 1.2247, + "step": 107034 + }, + { + "epoch": 1.29, + "grad_norm": 6.483041338531045, + "learning_rate": 5.955986125406623e-06, + "loss": 1.1589, + "step": 107037 + }, + { + "epoch": 1.29, + "grad_norm": 6.541936009218756, + "learning_rate": 5.955451853817773e-06, + "loss": 1.4857, + "step": 107040 + }, + { + "epoch": 1.29, + "grad_norm": 4.016319267120609, + "learning_rate": 5.954917596031507e-06, + "loss": 1.1337, + "step": 107043 + }, + { + "epoch": 1.29, + "grad_norm": 7.737092608167415, + "learning_rate": 5.9543833520496555e-06, + "loss": 1.206, + "step": 107046 + }, + { + "epoch": 1.29, + "grad_norm": 3.2490723333942584, + "learning_rate": 5.953849121874033e-06, + "loss": 1.0584, + "step": 107049 + }, + { + "epoch": 1.29, + "grad_norm": 21.106187884025264, + "learning_rate": 5.953314905506464e-06, + "loss": 1.6334, + "step": 107052 + }, + { + "epoch": 1.29, + "grad_norm": 10.017751748899197, + "learning_rate": 5.952780702948775e-06, + "loss": 1.0693, + "step": 107055 + }, + { + "epoch": 1.29, + "grad_norm": 9.346660594583783, + "learning_rate": 5.952246514202791e-06, + "loss": 1.2197, + "step": 107058 + }, + { + "epoch": 1.29, + "grad_norm": 6.828922947645639, + "learning_rate": 5.95171233927033e-06, + "loss": 1.3446, + "step": 107061 + }, + { + "epoch": 1.29, + "grad_norm": 16.03625487543507, + "learning_rate": 5.951178178153214e-06, + "loss": 1.2908, + "step": 107064 + }, + { + "epoch": 1.29, + "grad_norm": 8.443048730785828, + "learning_rate": 5.950644030853267e-06, + "loss": 1.341, + "step": 107067 + }, + { + "epoch": 1.29, + "grad_norm": 22.52565546710302, + "learning_rate": 5.950109897372318e-06, + "loss": 1.3637, + "step": 107070 + }, + { + "epoch": 1.29, + "grad_norm": 8.008543344911907, + "learning_rate": 5.9495757777121864e-06, + "loss": 1.4004, + "step": 107073 + }, + { + "epoch": 1.29, + "grad_norm": 5.7073834881369905, + "learning_rate": 5.949041671874689e-06, + "loss": 1.1118, + "step": 107076 + }, + { + "epoch": 1.29, + "grad_norm": 6.420552918720729, + "learning_rate": 5.948507579861654e-06, + "loss": 1.2325, + "step": 107079 + }, + { + "epoch": 1.29, + "grad_norm": 4.205832375739283, + "learning_rate": 5.947973501674905e-06, + "loss": 1.0713, + "step": 107082 + }, + { + "epoch": 1.29, + "grad_norm": 14.042944814275733, + "learning_rate": 5.947439437316265e-06, + "loss": 0.9684, + "step": 107085 + }, + { + "epoch": 1.29, + "grad_norm": 34.82018348971164, + "learning_rate": 5.9469053867875505e-06, + "loss": 1.1014, + "step": 107088 + }, + { + "epoch": 1.29, + "grad_norm": 11.96325228070115, + "learning_rate": 5.946371350090588e-06, + "loss": 1.4097, + "step": 107091 + }, + { + "epoch": 1.29, + "grad_norm": 11.220550542969146, + "learning_rate": 5.945837327227202e-06, + "loss": 1.0846, + "step": 107094 + }, + { + "epoch": 1.29, + "grad_norm": 17.87440583406039, + "learning_rate": 5.94530331819921e-06, + "loss": 1.0963, + "step": 107097 + }, + { + "epoch": 1.29, + "grad_norm": 31.71511326753589, + "learning_rate": 5.944769323008442e-06, + "loss": 1.1729, + "step": 107100 + }, + { + "epoch": 1.29, + "grad_norm": 25.456104380105014, + "learning_rate": 5.944235341656715e-06, + "loss": 1.2874, + "step": 107103 + }, + { + "epoch": 1.29, + "grad_norm": 8.063814977759327, + "learning_rate": 5.943701374145847e-06, + "loss": 1.3117, + "step": 107106 + }, + { + "epoch": 1.29, + "grad_norm": 3.4099196649407904, + "learning_rate": 5.9431674204776665e-06, + "loss": 1.448, + "step": 107109 + }, + { + "epoch": 1.29, + "grad_norm": 13.430379874675218, + "learning_rate": 5.942633480653998e-06, + "loss": 1.0076, + "step": 107112 + }, + { + "epoch": 1.29, + "grad_norm": 9.498350443708222, + "learning_rate": 5.942099554676659e-06, + "loss": 1.11, + "step": 107115 + }, + { + "epoch": 1.29, + "grad_norm": 6.509780769574936, + "learning_rate": 5.941565642547471e-06, + "loss": 1.3343, + "step": 107118 + }, + { + "epoch": 1.29, + "grad_norm": 2.347049945416087, + "learning_rate": 5.9410317442682565e-06, + "loss": 1.5325, + "step": 107121 + }, + { + "epoch": 1.29, + "grad_norm": 31.783108841143534, + "learning_rate": 5.940497859840841e-06, + "loss": 1.0705, + "step": 107124 + }, + { + "epoch": 1.29, + "grad_norm": 11.019482949357633, + "learning_rate": 5.939963989267045e-06, + "loss": 1.5272, + "step": 107127 + }, + { + "epoch": 1.29, + "grad_norm": 5.5797819693651105, + "learning_rate": 5.939430132548687e-06, + "loss": 1.1484, + "step": 107130 + }, + { + "epoch": 1.29, + "grad_norm": 11.184366878986662, + "learning_rate": 5.93889628968759e-06, + "loss": 0.7301, + "step": 107133 + }, + { + "epoch": 1.29, + "grad_norm": 4.745462198914555, + "learning_rate": 5.93836246068558e-06, + "loss": 1.5309, + "step": 107136 + }, + { + "epoch": 1.29, + "grad_norm": 28.580095340675594, + "learning_rate": 5.937828645544479e-06, + "loss": 1.1152, + "step": 107139 + }, + { + "epoch": 1.29, + "grad_norm": 5.346297077427529, + "learning_rate": 5.937294844266099e-06, + "loss": 1.286, + "step": 107142 + }, + { + "epoch": 1.29, + "grad_norm": 11.528421283566853, + "learning_rate": 5.9367610568522725e-06, + "loss": 1.0491, + "step": 107145 + }, + { + "epoch": 1.29, + "grad_norm": 2.781417701969097, + "learning_rate": 5.936227283304814e-06, + "loss": 1.1325, + "step": 107148 + }, + { + "epoch": 1.29, + "grad_norm": 13.795080113908416, + "learning_rate": 5.9356935236255475e-06, + "loss": 1.5802, + "step": 107151 + }, + { + "epoch": 1.29, + "grad_norm": 10.23766401846687, + "learning_rate": 5.9351597778163005e-06, + "loss": 1.2691, + "step": 107154 + }, + { + "epoch": 1.29, + "grad_norm": 7.528783251009797, + "learning_rate": 5.934626045878886e-06, + "loss": 1.1579, + "step": 107157 + }, + { + "epoch": 1.29, + "grad_norm": 9.420637280183376, + "learning_rate": 5.934092327815126e-06, + "loss": 1.2768, + "step": 107160 + }, + { + "epoch": 1.29, + "grad_norm": 8.872519437496848, + "learning_rate": 5.9335586236268465e-06, + "loss": 1.3437, + "step": 107163 + }, + { + "epoch": 1.29, + "grad_norm": 8.956140127818394, + "learning_rate": 5.93302493331587e-06, + "loss": 1.1113, + "step": 107166 + }, + { + "epoch": 1.29, + "grad_norm": 8.272243335939555, + "learning_rate": 5.932491256884011e-06, + "loss": 1.5758, + "step": 107169 + }, + { + "epoch": 1.29, + "grad_norm": 11.23350368990188, + "learning_rate": 5.931957594333093e-06, + "loss": 1.111, + "step": 107172 + }, + { + "epoch": 1.29, + "grad_norm": 8.549925112703079, + "learning_rate": 5.931423945664938e-06, + "loss": 1.039, + "step": 107175 + }, + { + "epoch": 1.29, + "grad_norm": 6.6122405254512735, + "learning_rate": 5.93089031088137e-06, + "loss": 1.3342, + "step": 107178 + }, + { + "epoch": 1.29, + "grad_norm": 6.109577365341487, + "learning_rate": 5.930356689984209e-06, + "loss": 1.4316, + "step": 107181 + }, + { + "epoch": 1.29, + "grad_norm": 10.273173653567738, + "learning_rate": 5.929823082975272e-06, + "loss": 1.2026, + "step": 107184 + }, + { + "epoch": 1.29, + "grad_norm": 11.51477277115564, + "learning_rate": 5.929289489856385e-06, + "loss": 1.2067, + "step": 107187 + }, + { + "epoch": 1.29, + "grad_norm": 27.883098168091866, + "learning_rate": 5.9287559106293635e-06, + "loss": 1.0696, + "step": 107190 + }, + { + "epoch": 1.29, + "grad_norm": 6.509519573857011, + "learning_rate": 5.928222345296035e-06, + "loss": 1.2223, + "step": 107193 + }, + { + "epoch": 1.29, + "grad_norm": 9.160177577070689, + "learning_rate": 5.927688793858215e-06, + "loss": 1.3516, + "step": 107196 + }, + { + "epoch": 1.29, + "grad_norm": 16.208817884920705, + "learning_rate": 5.927155256317727e-06, + "loss": 1.1372, + "step": 107199 + }, + { + "epoch": 1.29, + "grad_norm": 37.24770876169787, + "learning_rate": 5.9266217326763895e-06, + "loss": 1.0787, + "step": 107202 + }, + { + "epoch": 1.29, + "grad_norm": 14.353640594884808, + "learning_rate": 5.926088222936024e-06, + "loss": 1.3406, + "step": 107205 + }, + { + "epoch": 1.29, + "grad_norm": 10.659147831267752, + "learning_rate": 5.925554727098457e-06, + "loss": 1.1201, + "step": 107208 + }, + { + "epoch": 1.29, + "grad_norm": 3.087302993155105, + "learning_rate": 5.925021245165502e-06, + "loss": 1.1118, + "step": 107211 + }, + { + "epoch": 1.29, + "grad_norm": 6.164651488811171, + "learning_rate": 5.92448777713898e-06, + "loss": 1.0762, + "step": 107214 + }, + { + "epoch": 1.29, + "grad_norm": 4.73866185838463, + "learning_rate": 5.923954323020713e-06, + "loss": 0.881, + "step": 107217 + }, + { + "epoch": 1.29, + "grad_norm": 9.51853253925832, + "learning_rate": 5.923420882812525e-06, + "loss": 1.4736, + "step": 107220 + }, + { + "epoch": 1.29, + "grad_norm": 11.648687123723494, + "learning_rate": 5.922887456516231e-06, + "loss": 0.9989, + "step": 107223 + }, + { + "epoch": 1.29, + "grad_norm": 4.391799086137098, + "learning_rate": 5.9223540441336525e-06, + "loss": 1.1702, + "step": 107226 + }, + { + "epoch": 1.29, + "grad_norm": 7.729977012275098, + "learning_rate": 5.921820645666611e-06, + "loss": 1.3974, + "step": 107229 + }, + { + "epoch": 1.29, + "grad_norm": 7.441431536504485, + "learning_rate": 5.9212872611169285e-06, + "loss": 1.4781, + "step": 107232 + }, + { + "epoch": 1.29, + "grad_norm": 6.0014448978709005, + "learning_rate": 5.9207538904864234e-06, + "loss": 1.5248, + "step": 107235 + }, + { + "epoch": 1.29, + "grad_norm": 9.657290371670955, + "learning_rate": 5.920220533776915e-06, + "loss": 1.1648, + "step": 107238 + }, + { + "epoch": 1.29, + "grad_norm": 23.48669614307818, + "learning_rate": 5.9196871909902245e-06, + "loss": 1.1406, + "step": 107241 + }, + { + "epoch": 1.29, + "grad_norm": 9.303157028794015, + "learning_rate": 5.919153862128171e-06, + "loss": 1.2606, + "step": 107244 + }, + { + "epoch": 1.29, + "grad_norm": 5.33264344870883, + "learning_rate": 5.9186205471925785e-06, + "loss": 1.1385, + "step": 107247 + }, + { + "epoch": 1.29, + "grad_norm": 32.26041879667676, + "learning_rate": 5.91808724618526e-06, + "loss": 1.0966, + "step": 107250 + }, + { + "epoch": 1.29, + "grad_norm": 17.37712173535571, + "learning_rate": 5.917553959108042e-06, + "loss": 0.9691, + "step": 107253 + }, + { + "epoch": 1.29, + "grad_norm": 3.094835251907473, + "learning_rate": 5.91702068596274e-06, + "loss": 1.1424, + "step": 107256 + }, + { + "epoch": 1.29, + "grad_norm": 4.240955236571928, + "learning_rate": 5.916487426751174e-06, + "loss": 1.3178, + "step": 107259 + }, + { + "epoch": 1.29, + "grad_norm": 23.575737030170387, + "learning_rate": 5.915954181475172e-06, + "loss": 1.3562, + "step": 107262 + }, + { + "epoch": 1.29, + "grad_norm": 5.2167672707146755, + "learning_rate": 5.9154209501365435e-06, + "loss": 1.4679, + "step": 107265 + }, + { + "epoch": 1.29, + "grad_norm": 5.659953718568165, + "learning_rate": 5.914887732737112e-06, + "loss": 1.1302, + "step": 107268 + }, + { + "epoch": 1.29, + "grad_norm": 4.504484775349374, + "learning_rate": 5.914354529278694e-06, + "loss": 1.1404, + "step": 107271 + }, + { + "epoch": 1.29, + "grad_norm": 4.839566448478727, + "learning_rate": 5.9138213397631195e-06, + "loss": 1.3161, + "step": 107274 + }, + { + "epoch": 1.29, + "grad_norm": 9.338299280915763, + "learning_rate": 5.9132881641921965e-06, + "loss": 0.9136, + "step": 107277 + }, + { + "epoch": 1.29, + "grad_norm": 7.290509162999023, + "learning_rate": 5.91275500256775e-06, + "loss": 1.3228, + "step": 107280 + }, + { + "epoch": 1.29, + "grad_norm": 10.785239681724093, + "learning_rate": 5.912221854891598e-06, + "loss": 0.9078, + "step": 107283 + }, + { + "epoch": 1.29, + "grad_norm": 19.059936535879405, + "learning_rate": 5.911688721165558e-06, + "loss": 0.9625, + "step": 107286 + }, + { + "epoch": 1.29, + "grad_norm": 10.489480430363603, + "learning_rate": 5.911155601391458e-06, + "loss": 1.2749, + "step": 107289 + }, + { + "epoch": 1.29, + "grad_norm": 6.39094540012225, + "learning_rate": 5.910622495571106e-06, + "loss": 1.272, + "step": 107292 + }, + { + "epoch": 1.29, + "grad_norm": 9.420784628947509, + "learning_rate": 5.910089403706327e-06, + "loss": 0.8525, + "step": 107295 + }, + { + "epoch": 1.29, + "grad_norm": 11.002543341854196, + "learning_rate": 5.909556325798939e-06, + "loss": 1.0846, + "step": 107298 + }, + { + "epoch": 1.29, + "grad_norm": 4.031224818014495, + "learning_rate": 5.909023261850767e-06, + "loss": 0.8752, + "step": 107301 + }, + { + "epoch": 1.29, + "grad_norm": 10.78212193740626, + "learning_rate": 5.90849021186362e-06, + "loss": 1.5179, + "step": 107304 + }, + { + "epoch": 1.29, + "grad_norm": 28.59341841315574, + "learning_rate": 5.907957175839324e-06, + "loss": 1.2502, + "step": 107307 + }, + { + "epoch": 1.29, + "grad_norm": 7.3991398429221755, + "learning_rate": 5.907424153779695e-06, + "loss": 1.2691, + "step": 107310 + }, + { + "epoch": 1.29, + "grad_norm": 10.663455118682489, + "learning_rate": 5.906891145686552e-06, + "loss": 1.0367, + "step": 107313 + }, + { + "epoch": 1.29, + "grad_norm": 3.144713868710465, + "learning_rate": 5.9063581515617195e-06, + "loss": 1.1324, + "step": 107316 + }, + { + "epoch": 1.29, + "grad_norm": 8.39131794228351, + "learning_rate": 5.905825171407011e-06, + "loss": 1.161, + "step": 107319 + }, + { + "epoch": 1.29, + "grad_norm": 7.668536318041749, + "learning_rate": 5.905292205224243e-06, + "loss": 1.1445, + "step": 107322 + }, + { + "epoch": 1.29, + "grad_norm": 13.512251487808504, + "learning_rate": 5.904759253015242e-06, + "loss": 1.1233, + "step": 107325 + }, + { + "epoch": 1.29, + "grad_norm": 7.533694484043789, + "learning_rate": 5.90422631478182e-06, + "loss": 1.4131, + "step": 107328 + }, + { + "epoch": 1.29, + "grad_norm": 10.561214558468938, + "learning_rate": 5.9036933905258e-06, + "loss": 1.0128, + "step": 107331 + }, + { + "epoch": 1.29, + "grad_norm": 7.486269306483709, + "learning_rate": 5.9031604802489975e-06, + "loss": 1.1901, + "step": 107334 + }, + { + "epoch": 1.29, + "grad_norm": 12.372067953696051, + "learning_rate": 5.902627583953233e-06, + "loss": 1.2689, + "step": 107337 + }, + { + "epoch": 1.29, + "grad_norm": 5.55224722402498, + "learning_rate": 5.902094701640323e-06, + "loss": 1.0042, + "step": 107340 + }, + { + "epoch": 1.29, + "grad_norm": 7.326978249303419, + "learning_rate": 5.901561833312093e-06, + "loss": 1.0991, + "step": 107343 + }, + { + "epoch": 1.29, + "grad_norm": 7.361957004398795, + "learning_rate": 5.90102897897035e-06, + "loss": 1.2467, + "step": 107346 + }, + { + "epoch": 1.29, + "grad_norm": 28.54264274349868, + "learning_rate": 5.900496138616923e-06, + "loss": 1.1468, + "step": 107349 + }, + { + "epoch": 1.29, + "grad_norm": 22.52084980170616, + "learning_rate": 5.899963312253623e-06, + "loss": 1.226, + "step": 107352 + }, + { + "epoch": 1.29, + "grad_norm": 12.663029383805776, + "learning_rate": 5.899430499882275e-06, + "loss": 1.2535, + "step": 107355 + }, + { + "epoch": 1.29, + "grad_norm": 8.30035492666214, + "learning_rate": 5.898897701504689e-06, + "loss": 1.177, + "step": 107358 + }, + { + "epoch": 1.29, + "grad_norm": 6.740547175393265, + "learning_rate": 5.898364917122692e-06, + "loss": 1.0216, + "step": 107361 + }, + { + "epoch": 1.29, + "grad_norm": 17.55836439805084, + "learning_rate": 5.8978321467380935e-06, + "loss": 1.1484, + "step": 107364 + }, + { + "epoch": 1.29, + "grad_norm": 13.710148951807609, + "learning_rate": 5.897299390352721e-06, + "loss": 1.2398, + "step": 107367 + }, + { + "epoch": 1.29, + "grad_norm": 73.65426161359984, + "learning_rate": 5.896766647968386e-06, + "loss": 1.0901, + "step": 107370 + }, + { + "epoch": 1.29, + "grad_norm": 10.842499987119725, + "learning_rate": 5.89623391958691e-06, + "loss": 1.1745, + "step": 107373 + }, + { + "epoch": 1.29, + "grad_norm": 7.813997575434093, + "learning_rate": 5.895701205210107e-06, + "loss": 1.436, + "step": 107376 + }, + { + "epoch": 1.29, + "grad_norm": 13.160236881896415, + "learning_rate": 5.8951685048397984e-06, + "loss": 1.2478, + "step": 107379 + }, + { + "epoch": 1.29, + "grad_norm": 3.147895428180915, + "learning_rate": 5.894635818477804e-06, + "loss": 1.0443, + "step": 107382 + }, + { + "epoch": 1.29, + "grad_norm": 5.648846413787783, + "learning_rate": 5.8941031461259345e-06, + "loss": 1.4829, + "step": 107385 + }, + { + "epoch": 1.29, + "grad_norm": 17.9122432317839, + "learning_rate": 5.893570487786013e-06, + "loss": 1.2093, + "step": 107388 + }, + { + "epoch": 1.29, + "grad_norm": 4.289573334492556, + "learning_rate": 5.893037843459857e-06, + "loss": 0.9682, + "step": 107391 + }, + { + "epoch": 1.29, + "grad_norm": 10.918380749087602, + "learning_rate": 5.892505213149282e-06, + "loss": 0.9719, + "step": 107394 + }, + { + "epoch": 1.29, + "grad_norm": 10.204697854011357, + "learning_rate": 5.89197259685611e-06, + "loss": 1.3366, + "step": 107397 + }, + { + "epoch": 1.29, + "grad_norm": 4.788156954046016, + "learning_rate": 5.891439994582154e-06, + "loss": 0.8696, + "step": 107400 + }, + { + "epoch": 1.29, + "grad_norm": 1.9510542903098442, + "learning_rate": 5.890907406329234e-06, + "loss": 0.8518, + "step": 107403 + }, + { + "epoch": 1.29, + "grad_norm": 6.546266625253329, + "learning_rate": 5.890374832099166e-06, + "loss": 1.1382, + "step": 107406 + }, + { + "epoch": 1.29, + "grad_norm": 11.445285127936357, + "learning_rate": 5.889842271893771e-06, + "loss": 1.579, + "step": 107409 + }, + { + "epoch": 1.29, + "grad_norm": 6.2020465059636, + "learning_rate": 5.889309725714861e-06, + "loss": 0.9785, + "step": 107412 + }, + { + "epoch": 1.29, + "grad_norm": 19.275151263105734, + "learning_rate": 5.888777193564258e-06, + "loss": 1.242, + "step": 107415 + }, + { + "epoch": 1.29, + "grad_norm": 3.575556858583337, + "learning_rate": 5.888244675443775e-06, + "loss": 1.3156, + "step": 107418 + }, + { + "epoch": 1.29, + "grad_norm": 5.753555793226296, + "learning_rate": 5.887712171355235e-06, + "loss": 0.7643, + "step": 107421 + }, + { + "epoch": 1.29, + "grad_norm": 12.347820836529948, + "learning_rate": 5.887179681300453e-06, + "loss": 0.9527, + "step": 107424 + }, + { + "epoch": 1.29, + "grad_norm": 12.550939330187205, + "learning_rate": 5.886647205281242e-06, + "loss": 1.0069, + "step": 107427 + }, + { + "epoch": 1.29, + "grad_norm": 10.708012621980792, + "learning_rate": 5.886114743299423e-06, + "loss": 1.2174, + "step": 107430 + }, + { + "epoch": 1.29, + "grad_norm": 35.47717310099211, + "learning_rate": 5.885582295356813e-06, + "loss": 0.9999, + "step": 107433 + }, + { + "epoch": 1.29, + "grad_norm": 4.255169468136307, + "learning_rate": 5.885049861455232e-06, + "loss": 1.1949, + "step": 107436 + }, + { + "epoch": 1.29, + "grad_norm": 10.591331837670745, + "learning_rate": 5.884517441596489e-06, + "loss": 1.2807, + "step": 107439 + }, + { + "epoch": 1.29, + "grad_norm": 37.79616825993829, + "learning_rate": 5.883985035782404e-06, + "loss": 1.4887, + "step": 107442 + }, + { + "epoch": 1.29, + "grad_norm": 14.353836315668016, + "learning_rate": 5.8834526440148e-06, + "loss": 1.3299, + "step": 107445 + }, + { + "epoch": 1.29, + "grad_norm": 23.603370876541966, + "learning_rate": 5.882920266295486e-06, + "loss": 1.2544, + "step": 107448 + }, + { + "epoch": 1.29, + "grad_norm": 8.989893808646036, + "learning_rate": 5.882387902626288e-06, + "loss": 1.0295, + "step": 107451 + }, + { + "epoch": 1.29, + "grad_norm": 9.26325489621545, + "learning_rate": 5.88185555300901e-06, + "loss": 0.8605, + "step": 107454 + }, + { + "epoch": 1.29, + "grad_norm": 18.20633867177151, + "learning_rate": 5.881323217445481e-06, + "loss": 1.1361, + "step": 107457 + }, + { + "epoch": 1.29, + "grad_norm": 6.485981665799933, + "learning_rate": 5.880790895937507e-06, + "loss": 1.2189, + "step": 107460 + }, + { + "epoch": 1.29, + "grad_norm": 7.266300559243452, + "learning_rate": 5.880258588486916e-06, + "loss": 1.17, + "step": 107463 + }, + { + "epoch": 1.29, + "grad_norm": 10.442121925232918, + "learning_rate": 5.879726295095516e-06, + "loss": 1.5357, + "step": 107466 + }, + { + "epoch": 1.29, + "grad_norm": 9.545443031448212, + "learning_rate": 5.8791940157651236e-06, + "loss": 1.5126, + "step": 107469 + }, + { + "epoch": 1.29, + "grad_norm": 5.249760289342383, + "learning_rate": 5.878661750497559e-06, + "loss": 1.2209, + "step": 107472 + }, + { + "epoch": 1.29, + "grad_norm": 5.475492648491133, + "learning_rate": 5.878129499294638e-06, + "loss": 1.2503, + "step": 107475 + }, + { + "epoch": 1.29, + "grad_norm": 10.123238186634575, + "learning_rate": 5.877597262158179e-06, + "loss": 1.5786, + "step": 107478 + }, + { + "epoch": 1.29, + "grad_norm": 10.986383601172921, + "learning_rate": 5.87706503908999e-06, + "loss": 1.1462, + "step": 107481 + }, + { + "epoch": 1.29, + "grad_norm": 5.8701383486735335, + "learning_rate": 5.876532830091896e-06, + "loss": 0.8757, + "step": 107484 + }, + { + "epoch": 1.29, + "grad_norm": 34.47754954473513, + "learning_rate": 5.876000635165711e-06, + "loss": 1.2093, + "step": 107487 + }, + { + "epoch": 1.29, + "grad_norm": 8.573839946060069, + "learning_rate": 5.8754684543132515e-06, + "loss": 1.3868, + "step": 107490 + }, + { + "epoch": 1.29, + "grad_norm": 8.918387014449031, + "learning_rate": 5.874936287536329e-06, + "loss": 0.9227, + "step": 107493 + }, + { + "epoch": 1.29, + "grad_norm": 32.47019877202406, + "learning_rate": 5.874404134836763e-06, + "loss": 1.2053, + "step": 107496 + }, + { + "epoch": 1.29, + "grad_norm": 7.898523701486515, + "learning_rate": 5.8738719962163714e-06, + "loss": 1.0936, + "step": 107499 + }, + { + "epoch": 1.29, + "grad_norm": 6.289275078890863, + "learning_rate": 5.873339871676967e-06, + "loss": 1.4679, + "step": 107502 + }, + { + "epoch": 1.29, + "grad_norm": 28.07203933929787, + "learning_rate": 5.872807761220371e-06, + "loss": 1.1332, + "step": 107505 + }, + { + "epoch": 1.29, + "grad_norm": 7.706264638675556, + "learning_rate": 5.872275664848391e-06, + "loss": 1.0638, + "step": 107508 + }, + { + "epoch": 1.29, + "grad_norm": 8.608620910813288, + "learning_rate": 5.87174358256285e-06, + "loss": 1.1785, + "step": 107511 + }, + { + "epoch": 1.29, + "grad_norm": 10.600064870107968, + "learning_rate": 5.87121151436556e-06, + "loss": 1.5168, + "step": 107514 + }, + { + "epoch": 1.29, + "grad_norm": 8.091482738361849, + "learning_rate": 5.870679460258341e-06, + "loss": 1.0609, + "step": 107517 + }, + { + "epoch": 1.29, + "grad_norm": 6.73713292193929, + "learning_rate": 5.870147420243003e-06, + "loss": 1.2446, + "step": 107520 + }, + { + "epoch": 1.29, + "grad_norm": 6.077521393197829, + "learning_rate": 5.869615394321363e-06, + "loss": 1.3867, + "step": 107523 + }, + { + "epoch": 1.29, + "grad_norm": 16.17365954407901, + "learning_rate": 5.869083382495238e-06, + "loss": 1.3237, + "step": 107526 + }, + { + "epoch": 1.29, + "grad_norm": 16.208126999208083, + "learning_rate": 5.868551384766445e-06, + "loss": 1.5086, + "step": 107529 + }, + { + "epoch": 1.29, + "grad_norm": 5.180904515668032, + "learning_rate": 5.8680194011368e-06, + "loss": 1.1404, + "step": 107532 + }, + { + "epoch": 1.29, + "grad_norm": 26.630560678416696, + "learning_rate": 5.867487431608112e-06, + "loss": 0.9226, + "step": 107535 + }, + { + "epoch": 1.29, + "grad_norm": 4.539971620195815, + "learning_rate": 5.8669554761822025e-06, + "loss": 0.9966, + "step": 107538 + }, + { + "epoch": 1.29, + "grad_norm": 17.73664005943798, + "learning_rate": 5.866423534860886e-06, + "loss": 1.2919, + "step": 107541 + }, + { + "epoch": 1.29, + "grad_norm": 14.120968150838921, + "learning_rate": 5.865891607645979e-06, + "loss": 1.3861, + "step": 107544 + }, + { + "epoch": 1.29, + "grad_norm": 8.19128804235869, + "learning_rate": 5.865359694539292e-06, + "loss": 1.4294, + "step": 107547 + }, + { + "epoch": 1.29, + "grad_norm": 7.5666300369040345, + "learning_rate": 5.864827795542643e-06, + "loss": 1.0665, + "step": 107550 + }, + { + "epoch": 1.29, + "grad_norm": 16.295410838561313, + "learning_rate": 5.864295910657847e-06, + "loss": 1.3803, + "step": 107553 + }, + { + "epoch": 1.29, + "grad_norm": 5.931914548148159, + "learning_rate": 5.863764039886719e-06, + "loss": 1.4403, + "step": 107556 + }, + { + "epoch": 1.29, + "grad_norm": 7.32046060891621, + "learning_rate": 5.863232183231079e-06, + "loss": 1.5507, + "step": 107559 + }, + { + "epoch": 1.29, + "grad_norm": 7.578422846107573, + "learning_rate": 5.8627003406927355e-06, + "loss": 1.0737, + "step": 107562 + }, + { + "epoch": 1.29, + "grad_norm": 19.64560589598488, + "learning_rate": 5.862168512273503e-06, + "loss": 1.1381, + "step": 107565 + }, + { + "epoch": 1.29, + "grad_norm": 9.369118638794964, + "learning_rate": 5.861636697975197e-06, + "loss": 1.3092, + "step": 107568 + }, + { + "epoch": 1.29, + "grad_norm": 36.83831267739082, + "learning_rate": 5.8611048977996406e-06, + "loss": 1.1375, + "step": 107571 + }, + { + "epoch": 1.29, + "grad_norm": 86.32317384933475, + "learning_rate": 5.86057311174864e-06, + "loss": 1.1346, + "step": 107574 + }, + { + "epoch": 1.29, + "grad_norm": 7.129047677974718, + "learning_rate": 5.86004133982401e-06, + "loss": 1.1976, + "step": 107577 + }, + { + "epoch": 1.29, + "grad_norm": 11.329277862027856, + "learning_rate": 5.859509582027568e-06, + "loss": 1.2021, + "step": 107580 + }, + { + "epoch": 1.29, + "grad_norm": 16.07220552510685, + "learning_rate": 5.85897783836113e-06, + "loss": 1.2074, + "step": 107583 + }, + { + "epoch": 1.29, + "grad_norm": 6.605954844653639, + "learning_rate": 5.858446108826512e-06, + "loss": 1.0882, + "step": 107586 + }, + { + "epoch": 1.29, + "grad_norm": 42.14056936300683, + "learning_rate": 5.857914393425521e-06, + "loss": 1.0215, + "step": 107589 + }, + { + "epoch": 1.29, + "grad_norm": 8.62091404846283, + "learning_rate": 5.857382692159976e-06, + "loss": 0.9071, + "step": 107592 + }, + { + "epoch": 1.29, + "grad_norm": 29.860181763449866, + "learning_rate": 5.856851005031694e-06, + "loss": 1.5229, + "step": 107595 + }, + { + "epoch": 1.29, + "grad_norm": 8.677412873543476, + "learning_rate": 5.856319332042489e-06, + "loss": 1.3027, + "step": 107598 + }, + { + "epoch": 1.29, + "grad_norm": 12.979028708666041, + "learning_rate": 5.85578767319417e-06, + "loss": 1.1546, + "step": 107601 + }, + { + "epoch": 1.29, + "grad_norm": 4.973313785757337, + "learning_rate": 5.8552560284885566e-06, + "loss": 1.0683, + "step": 107604 + }, + { + "epoch": 1.29, + "grad_norm": 30.503621675212948, + "learning_rate": 5.854724397927459e-06, + "loss": 1.426, + "step": 107607 + }, + { + "epoch": 1.29, + "grad_norm": 6.5346709374120415, + "learning_rate": 5.854192781512695e-06, + "loss": 1.072, + "step": 107610 + }, + { + "epoch": 1.29, + "grad_norm": 4.851295697708048, + "learning_rate": 5.85366117924608e-06, + "loss": 0.9295, + "step": 107613 + }, + { + "epoch": 1.29, + "grad_norm": 4.714112541744495, + "learning_rate": 5.853129591129425e-06, + "loss": 1.1925, + "step": 107616 + }, + { + "epoch": 1.29, + "grad_norm": 17.63215373459555, + "learning_rate": 5.852598017164543e-06, + "loss": 1.0848, + "step": 107619 + }, + { + "epoch": 1.29, + "grad_norm": 5.5604164790498904, + "learning_rate": 5.852066457353252e-06, + "loss": 0.9842, + "step": 107622 + }, + { + "epoch": 1.29, + "grad_norm": 8.639154572892412, + "learning_rate": 5.8515349116973655e-06, + "loss": 1.3184, + "step": 107625 + }, + { + "epoch": 1.29, + "grad_norm": 6.607798213731596, + "learning_rate": 5.851003380198694e-06, + "loss": 1.1217, + "step": 107628 + }, + { + "epoch": 1.29, + "grad_norm": 4.0243347647198995, + "learning_rate": 5.850471862859053e-06, + "loss": 1.3878, + "step": 107631 + }, + { + "epoch": 1.29, + "grad_norm": 9.946747305946698, + "learning_rate": 5.849940359680255e-06, + "loss": 1.1244, + "step": 107634 + }, + { + "epoch": 1.29, + "grad_norm": 7.060149873412417, + "learning_rate": 5.849408870664119e-06, + "loss": 1.2759, + "step": 107637 + }, + { + "epoch": 1.29, + "grad_norm": 59.26078320426487, + "learning_rate": 5.848877395812458e-06, + "loss": 1.049, + "step": 107640 + }, + { + "epoch": 1.29, + "grad_norm": 8.806101772932314, + "learning_rate": 5.848345935127078e-06, + "loss": 1.1827, + "step": 107643 + }, + { + "epoch": 1.29, + "grad_norm": 8.473398713433491, + "learning_rate": 5.847814488609801e-06, + "loss": 1.3218, + "step": 107646 + }, + { + "epoch": 1.29, + "grad_norm": 32.289153971031844, + "learning_rate": 5.847283056262434e-06, + "loss": 1.266, + "step": 107649 + }, + { + "epoch": 1.29, + "grad_norm": 18.53589790309542, + "learning_rate": 5.8467516380868e-06, + "loss": 0.8817, + "step": 107652 + }, + { + "epoch": 1.29, + "grad_norm": 13.415269115633643, + "learning_rate": 5.846220234084701e-06, + "loss": 1.1855, + "step": 107655 + }, + { + "epoch": 1.29, + "grad_norm": 8.946350426170598, + "learning_rate": 5.845688844257959e-06, + "loss": 1.2649, + "step": 107658 + }, + { + "epoch": 1.29, + "grad_norm": 5.131540551311063, + "learning_rate": 5.845157468608382e-06, + "loss": 0.9677, + "step": 107661 + }, + { + "epoch": 1.29, + "grad_norm": 6.954093023508777, + "learning_rate": 5.8446261071377875e-06, + "loss": 1.2282, + "step": 107664 + }, + { + "epoch": 1.29, + "grad_norm": 10.845299606845675, + "learning_rate": 5.84409475984799e-06, + "loss": 1.3603, + "step": 107667 + }, + { + "epoch": 1.29, + "grad_norm": 3.996962650989716, + "learning_rate": 5.843563426740799e-06, + "loss": 1.5356, + "step": 107670 + }, + { + "epoch": 1.29, + "grad_norm": 4.4578209245156035, + "learning_rate": 5.843032107818027e-06, + "loss": 1.1308, + "step": 107673 + }, + { + "epoch": 1.29, + "grad_norm": 6.568634660872961, + "learning_rate": 5.842500803081488e-06, + "loss": 1.117, + "step": 107676 + }, + { + "epoch": 1.29, + "grad_norm": 5.5393088726690864, + "learning_rate": 5.841969512533001e-06, + "loss": 1.423, + "step": 107679 + }, + { + "epoch": 1.29, + "grad_norm": 17.294645593161334, + "learning_rate": 5.841438236174372e-06, + "loss": 1.1593, + "step": 107682 + }, + { + "epoch": 1.29, + "grad_norm": 7.122193228580987, + "learning_rate": 5.8409069740074144e-06, + "loss": 1.4827, + "step": 107685 + }, + { + "epoch": 1.29, + "grad_norm": 32.802966922796614, + "learning_rate": 5.8403757260339426e-06, + "loss": 1.0247, + "step": 107688 + }, + { + "epoch": 1.29, + "grad_norm": 9.177164754956738, + "learning_rate": 5.839844492255773e-06, + "loss": 1.1312, + "step": 107691 + }, + { + "epoch": 1.29, + "grad_norm": 6.978664307940243, + "learning_rate": 5.839313272674717e-06, + "loss": 1.0945, + "step": 107694 + }, + { + "epoch": 1.3, + "grad_norm": 8.431418016856288, + "learning_rate": 5.838782067292583e-06, + "loss": 1.333, + "step": 107697 + }, + { + "epoch": 1.3, + "grad_norm": 20.73526939832835, + "learning_rate": 5.838250876111189e-06, + "loss": 1.0056, + "step": 107700 + }, + { + "epoch": 1.3, + "grad_norm": 13.446914699537572, + "learning_rate": 5.8377196991323426e-06, + "loss": 1.3649, + "step": 107703 + }, + { + "epoch": 1.3, + "grad_norm": 6.6528614961350065, + "learning_rate": 5.837188536357864e-06, + "loss": 1.2369, + "step": 107706 + }, + { + "epoch": 1.3, + "grad_norm": 9.797563432395958, + "learning_rate": 5.836657387789557e-06, + "loss": 1.2572, + "step": 107709 + }, + { + "epoch": 1.3, + "grad_norm": 17.87197674057814, + "learning_rate": 5.836126253429239e-06, + "loss": 1.6028, + "step": 107712 + }, + { + "epoch": 1.3, + "grad_norm": 12.208400065350553, + "learning_rate": 5.835595133278729e-06, + "loss": 1.0584, + "step": 107715 + }, + { + "epoch": 1.3, + "grad_norm": 11.011308779314504, + "learning_rate": 5.835064027339825e-06, + "loss": 1.2012, + "step": 107718 + }, + { + "epoch": 1.3, + "grad_norm": 5.610750686017891, + "learning_rate": 5.834532935614353e-06, + "loss": 1.1503, + "step": 107721 + }, + { + "epoch": 1.3, + "grad_norm": 6.658068131576056, + "learning_rate": 5.834001858104116e-06, + "loss": 1.1238, + "step": 107724 + }, + { + "epoch": 1.3, + "grad_norm": 14.017179110343442, + "learning_rate": 5.833470794810929e-06, + "loss": 1.5151, + "step": 107727 + }, + { + "epoch": 1.3, + "grad_norm": 2.7887365081568776, + "learning_rate": 5.832939745736606e-06, + "loss": 1.2177, + "step": 107730 + }, + { + "epoch": 1.3, + "grad_norm": 20.528051554571245, + "learning_rate": 5.832408710882963e-06, + "loss": 0.9999, + "step": 107733 + }, + { + "epoch": 1.3, + "grad_norm": 4.921748698014895, + "learning_rate": 5.831877690251804e-06, + "loss": 1.3128, + "step": 107736 + }, + { + "epoch": 1.3, + "grad_norm": 6.003780914844585, + "learning_rate": 5.831346683844948e-06, + "loss": 1.1898, + "step": 107739 + }, + { + "epoch": 1.3, + "grad_norm": 17.880117442657046, + "learning_rate": 5.830815691664201e-06, + "loss": 0.9858, + "step": 107742 + }, + { + "epoch": 1.3, + "grad_norm": 2.9576934085147095, + "learning_rate": 5.830284713711378e-06, + "loss": 1.1039, + "step": 107745 + }, + { + "epoch": 1.3, + "grad_norm": 7.960354376807008, + "learning_rate": 5.829753749988296e-06, + "loss": 0.8219, + "step": 107748 + }, + { + "epoch": 1.3, + "grad_norm": 26.405847459602573, + "learning_rate": 5.8292228004967574e-06, + "loss": 1.4099, + "step": 107751 + }, + { + "epoch": 1.3, + "grad_norm": 6.6560117280897755, + "learning_rate": 5.828691865238581e-06, + "loss": 1.307, + "step": 107754 + }, + { + "epoch": 1.3, + "grad_norm": 3.7944715431216753, + "learning_rate": 5.828160944215574e-06, + "loss": 1.121, + "step": 107757 + }, + { + "epoch": 1.3, + "grad_norm": 7.969083260408582, + "learning_rate": 5.827630037429557e-06, + "loss": 1.2295, + "step": 107760 + }, + { + "epoch": 1.3, + "grad_norm": 11.220799778555026, + "learning_rate": 5.827099144882335e-06, + "loss": 1.3821, + "step": 107763 + }, + { + "epoch": 1.3, + "grad_norm": 12.90179053767907, + "learning_rate": 5.8265682665757166e-06, + "loss": 1.0879, + "step": 107766 + }, + { + "epoch": 1.3, + "grad_norm": 8.135666711476782, + "learning_rate": 5.826037402511517e-06, + "loss": 1.3448, + "step": 107769 + }, + { + "epoch": 1.3, + "grad_norm": 10.00261181133232, + "learning_rate": 5.825506552691549e-06, + "loss": 1.5609, + "step": 107772 + }, + { + "epoch": 1.3, + "grad_norm": 10.925374626136565, + "learning_rate": 5.824975717117627e-06, + "loss": 0.9906, + "step": 107775 + }, + { + "epoch": 1.3, + "grad_norm": 110.02293636380313, + "learning_rate": 5.824444895791554e-06, + "loss": 0.9756, + "step": 107778 + }, + { + "epoch": 1.3, + "grad_norm": 30.751568279483088, + "learning_rate": 5.823914088715152e-06, + "loss": 1.1493, + "step": 107781 + }, + { + "epoch": 1.3, + "grad_norm": 7.089978169093589, + "learning_rate": 5.823383295890223e-06, + "loss": 1.2605, + "step": 107784 + }, + { + "epoch": 1.3, + "grad_norm": 12.588392775394144, + "learning_rate": 5.822852517318587e-06, + "loss": 1.161, + "step": 107787 + }, + { + "epoch": 1.3, + "grad_norm": 6.544607141896369, + "learning_rate": 5.822321753002047e-06, + "loss": 1.1879, + "step": 107790 + }, + { + "epoch": 1.3, + "grad_norm": 5.5984273890114284, + "learning_rate": 5.8217910029424166e-06, + "loss": 1.4726, + "step": 107793 + }, + { + "epoch": 1.3, + "grad_norm": 15.67726901562306, + "learning_rate": 5.821260267141508e-06, + "loss": 1.2128, + "step": 107796 + }, + { + "epoch": 1.3, + "grad_norm": 3.634123730720044, + "learning_rate": 5.820729545601135e-06, + "loss": 1.4637, + "step": 107799 + }, + { + "epoch": 1.3, + "grad_norm": 18.976161718591275, + "learning_rate": 5.82019883832311e-06, + "loss": 1.3272, + "step": 107802 + }, + { + "epoch": 1.3, + "grad_norm": 10.48709692797322, + "learning_rate": 5.819668145309239e-06, + "loss": 1.1209, + "step": 107805 + }, + { + "epoch": 1.3, + "grad_norm": 17.534287318594846, + "learning_rate": 5.819137466561333e-06, + "loss": 0.9442, + "step": 107808 + }, + { + "epoch": 1.3, + "grad_norm": 7.978227046925816, + "learning_rate": 5.818606802081204e-06, + "loss": 0.9823, + "step": 107811 + }, + { + "epoch": 1.3, + "grad_norm": 14.566759842318575, + "learning_rate": 5.818076151870668e-06, + "loss": 1.065, + "step": 107814 + }, + { + "epoch": 1.3, + "grad_norm": 7.493073808961109, + "learning_rate": 5.8175455159315285e-06, + "loss": 0.9609, + "step": 107817 + }, + { + "epoch": 1.3, + "grad_norm": 8.787154379478393, + "learning_rate": 5.817014894265598e-06, + "loss": 1.1383, + "step": 107820 + }, + { + "epoch": 1.3, + "grad_norm": 10.108698942752387, + "learning_rate": 5.8164842868746954e-06, + "loss": 1.1229, + "step": 107823 + }, + { + "epoch": 1.3, + "grad_norm": 18.440435116465896, + "learning_rate": 5.815953693760619e-06, + "loss": 1.1236, + "step": 107826 + }, + { + "epoch": 1.3, + "grad_norm": 6.134733521307535, + "learning_rate": 5.8154231149251896e-06, + "loss": 1.1672, + "step": 107829 + }, + { + "epoch": 1.3, + "grad_norm": 7.136444665572464, + "learning_rate": 5.8148925503702105e-06, + "loss": 1.0885, + "step": 107832 + }, + { + "epoch": 1.3, + "grad_norm": 11.253824441561644, + "learning_rate": 5.814362000097495e-06, + "loss": 1.056, + "step": 107835 + }, + { + "epoch": 1.3, + "grad_norm": 36.15218693360638, + "learning_rate": 5.813831464108855e-06, + "loss": 1.643, + "step": 107838 + }, + { + "epoch": 1.3, + "grad_norm": 12.01029895975363, + "learning_rate": 5.813300942406104e-06, + "loss": 1.5416, + "step": 107841 + }, + { + "epoch": 1.3, + "grad_norm": 11.298106551529816, + "learning_rate": 5.812770434991045e-06, + "loss": 1.2952, + "step": 107844 + }, + { + "epoch": 1.3, + "grad_norm": 5.768648475748898, + "learning_rate": 5.8122399418654965e-06, + "loss": 1.1993, + "step": 107847 + }, + { + "epoch": 1.3, + "grad_norm": 7.356110562388737, + "learning_rate": 5.811709463031261e-06, + "loss": 1.2288, + "step": 107850 + }, + { + "epoch": 1.3, + "grad_norm": 6.1155269490170845, + "learning_rate": 5.811178998490152e-06, + "loss": 1.1957, + "step": 107853 + }, + { + "epoch": 1.3, + "grad_norm": 23.435581833461196, + "learning_rate": 5.810648548243985e-06, + "loss": 1.0922, + "step": 107856 + }, + { + "epoch": 1.3, + "grad_norm": 7.014725391987698, + "learning_rate": 5.810118112294559e-06, + "loss": 1.1443, + "step": 107859 + }, + { + "epoch": 1.3, + "grad_norm": 8.771245214240272, + "learning_rate": 5.809587690643693e-06, + "loss": 1.1337, + "step": 107862 + }, + { + "epoch": 1.3, + "grad_norm": 11.48868184804189, + "learning_rate": 5.809057283293198e-06, + "loss": 0.8397, + "step": 107865 + }, + { + "epoch": 1.3, + "grad_norm": 15.800065166213459, + "learning_rate": 5.808526890244881e-06, + "loss": 1.4214, + "step": 107868 + }, + { + "epoch": 1.3, + "grad_norm": 7.492031132528137, + "learning_rate": 5.807996511500549e-06, + "loss": 1.0298, + "step": 107871 + }, + { + "epoch": 1.3, + "grad_norm": 8.455831139520535, + "learning_rate": 5.807466147062013e-06, + "loss": 1.0159, + "step": 107874 + }, + { + "epoch": 1.3, + "grad_norm": 8.841848659021752, + "learning_rate": 5.806935796931087e-06, + "loss": 1.3233, + "step": 107877 + }, + { + "epoch": 1.3, + "grad_norm": 7.0837658557877505, + "learning_rate": 5.806405461109577e-06, + "loss": 1.2628, + "step": 107880 + }, + { + "epoch": 1.3, + "grad_norm": 7.777375063645745, + "learning_rate": 5.8058751395993e-06, + "loss": 1.2368, + "step": 107883 + }, + { + "epoch": 1.3, + "grad_norm": 12.26801388319618, + "learning_rate": 5.805344832402055e-06, + "loss": 1.0501, + "step": 107886 + }, + { + "epoch": 1.3, + "grad_norm": 3.4552992402633658, + "learning_rate": 5.804814539519663e-06, + "loss": 0.8784, + "step": 107889 + }, + { + "epoch": 1.3, + "grad_norm": 22.429868354666407, + "learning_rate": 5.804284260953923e-06, + "loss": 1.2885, + "step": 107892 + }, + { + "epoch": 1.3, + "grad_norm": 14.732138710176182, + "learning_rate": 5.803753996706654e-06, + "loss": 1.2285, + "step": 107895 + }, + { + "epoch": 1.3, + "grad_norm": 8.186322839473428, + "learning_rate": 5.803223746779656e-06, + "loss": 1.3684, + "step": 107898 + }, + { + "epoch": 1.3, + "grad_norm": 53.95639409055088, + "learning_rate": 5.802693511174744e-06, + "loss": 1.1995, + "step": 107901 + }, + { + "epoch": 1.3, + "grad_norm": 8.15390208042003, + "learning_rate": 5.802163289893728e-06, + "loss": 1.3183, + "step": 107904 + }, + { + "epoch": 1.3, + "grad_norm": 8.902059891945349, + "learning_rate": 5.801633082938419e-06, + "loss": 1.3953, + "step": 107907 + }, + { + "epoch": 1.3, + "grad_norm": 4.840344084899903, + "learning_rate": 5.801102890310626e-06, + "loss": 0.7151, + "step": 107910 + }, + { + "epoch": 1.3, + "grad_norm": 2.5532862391505717, + "learning_rate": 5.800572712012152e-06, + "loss": 1.2288, + "step": 107913 + }, + { + "epoch": 1.3, + "grad_norm": 14.002987467975512, + "learning_rate": 5.800042548044808e-06, + "loss": 1.4329, + "step": 107916 + }, + { + "epoch": 1.3, + "grad_norm": 13.17950923050446, + "learning_rate": 5.79951239841041e-06, + "loss": 1.1971, + "step": 107919 + }, + { + "epoch": 1.3, + "grad_norm": 5.564315685589921, + "learning_rate": 5.7989822631107635e-06, + "loss": 1.1109, + "step": 107922 + }, + { + "epoch": 1.3, + "grad_norm": 20.939574965700317, + "learning_rate": 5.798452142147677e-06, + "loss": 1.144, + "step": 107925 + }, + { + "epoch": 1.3, + "grad_norm": 3.527177492514761, + "learning_rate": 5.7979220355229586e-06, + "loss": 1.1573, + "step": 107928 + }, + { + "epoch": 1.3, + "grad_norm": 11.939773992812702, + "learning_rate": 5.797391943238422e-06, + "loss": 1.1833, + "step": 107931 + }, + { + "epoch": 1.3, + "grad_norm": 16.416722909939278, + "learning_rate": 5.796861865295868e-06, + "loss": 0.8948, + "step": 107934 + }, + { + "epoch": 1.3, + "grad_norm": 7.543484275181716, + "learning_rate": 5.796331801697116e-06, + "loss": 1.1083, + "step": 107937 + }, + { + "epoch": 1.3, + "grad_norm": 10.68173473420628, + "learning_rate": 5.795801752443965e-06, + "loss": 1.1416, + "step": 107940 + }, + { + "epoch": 1.3, + "grad_norm": 12.03907317185278, + "learning_rate": 5.795271717538229e-06, + "loss": 1.0845, + "step": 107943 + }, + { + "epoch": 1.3, + "grad_norm": 16.054572907241266, + "learning_rate": 5.794741696981715e-06, + "loss": 1.0328, + "step": 107946 + }, + { + "epoch": 1.3, + "grad_norm": 4.621353037404138, + "learning_rate": 5.794211690776237e-06, + "loss": 1.1179, + "step": 107949 + }, + { + "epoch": 1.3, + "grad_norm": 6.723675863206913, + "learning_rate": 5.7936816989236e-06, + "loss": 1.308, + "step": 107952 + }, + { + "epoch": 1.3, + "grad_norm": 6.576287840895237, + "learning_rate": 5.793151721425607e-06, + "loss": 1.2164, + "step": 107955 + }, + { + "epoch": 1.3, + "grad_norm": 6.302191598150033, + "learning_rate": 5.7926217582840734e-06, + "loss": 1.4436, + "step": 107958 + }, + { + "epoch": 1.3, + "grad_norm": 7.013650648055663, + "learning_rate": 5.792091809500806e-06, + "loss": 1.0707, + "step": 107961 + }, + { + "epoch": 1.3, + "grad_norm": 16.183117597105827, + "learning_rate": 5.791561875077618e-06, + "loss": 0.816, + "step": 107964 + }, + { + "epoch": 1.3, + "grad_norm": 2.9947986919152196, + "learning_rate": 5.791031955016309e-06, + "loss": 1.8184, + "step": 107967 + }, + { + "epoch": 1.3, + "grad_norm": 9.919924450112813, + "learning_rate": 5.790502049318691e-06, + "loss": 1.0971, + "step": 107970 + }, + { + "epoch": 1.3, + "grad_norm": 27.600554811058327, + "learning_rate": 5.789972157986578e-06, + "loss": 1.2815, + "step": 107973 + }, + { + "epoch": 1.3, + "grad_norm": 7.310660539242356, + "learning_rate": 5.789442281021774e-06, + "loss": 1.0039, + "step": 107976 + }, + { + "epoch": 1.3, + "grad_norm": 6.755932731977946, + "learning_rate": 5.788912418426083e-06, + "loss": 1.4764, + "step": 107979 + }, + { + "epoch": 1.3, + "grad_norm": 11.984571508824724, + "learning_rate": 5.7883825702013165e-06, + "loss": 1.3594, + "step": 107982 + }, + { + "epoch": 1.3, + "grad_norm": 5.727720031918444, + "learning_rate": 5.787852736349282e-06, + "loss": 0.9749, + "step": 107985 + }, + { + "epoch": 1.3, + "grad_norm": 9.387570554448663, + "learning_rate": 5.787322916871792e-06, + "loss": 1.0608, + "step": 107988 + }, + { + "epoch": 1.3, + "grad_norm": 4.651489831021767, + "learning_rate": 5.786793111770652e-06, + "loss": 1.3049, + "step": 107991 + }, + { + "epoch": 1.3, + "grad_norm": 3.5335081613452317, + "learning_rate": 5.786263321047667e-06, + "loss": 0.9906, + "step": 107994 + }, + { + "epoch": 1.3, + "grad_norm": 7.890089751542023, + "learning_rate": 5.785733544704653e-06, + "loss": 1.4299, + "step": 107997 + }, + { + "epoch": 1.3, + "grad_norm": 10.477813896698326, + "learning_rate": 5.785203782743407e-06, + "loss": 1.3736, + "step": 108000 + }, + { + "epoch": 1.3, + "grad_norm": 17.130970673954575, + "learning_rate": 5.784674035165746e-06, + "loss": 1.4113, + "step": 108003 + }, + { + "epoch": 1.3, + "grad_norm": 11.460387063675675, + "learning_rate": 5.784144301973471e-06, + "loss": 1.4016, + "step": 108006 + }, + { + "epoch": 1.3, + "grad_norm": 43.80478391809315, + "learning_rate": 5.783614583168394e-06, + "loss": 0.8616, + "step": 108009 + }, + { + "epoch": 1.3, + "grad_norm": 5.846908621762376, + "learning_rate": 5.783084878752319e-06, + "loss": 1.2194, + "step": 108012 + }, + { + "epoch": 1.3, + "grad_norm": 7.180642532237637, + "learning_rate": 5.782555188727062e-06, + "loss": 1.1566, + "step": 108015 + }, + { + "epoch": 1.3, + "grad_norm": 8.283681404782747, + "learning_rate": 5.782025513094425e-06, + "loss": 1.417, + "step": 108018 + }, + { + "epoch": 1.3, + "grad_norm": 11.019674239331833, + "learning_rate": 5.7814958518562105e-06, + "loss": 1.1264, + "step": 108021 + }, + { + "epoch": 1.3, + "grad_norm": 5.634915955580746, + "learning_rate": 5.780966205014231e-06, + "loss": 1.1839, + "step": 108024 + }, + { + "epoch": 1.3, + "grad_norm": 4.0214213180528695, + "learning_rate": 5.780436572570296e-06, + "loss": 1.0845, + "step": 108027 + }, + { + "epoch": 1.3, + "grad_norm": 6.364104484460208, + "learning_rate": 5.779906954526214e-06, + "loss": 1.3406, + "step": 108030 + }, + { + "epoch": 1.3, + "grad_norm": 5.985772208098422, + "learning_rate": 5.779377350883786e-06, + "loss": 0.9823, + "step": 108033 + }, + { + "epoch": 1.3, + "grad_norm": 9.131106869650283, + "learning_rate": 5.778847761644822e-06, + "loss": 1.4226, + "step": 108036 + }, + { + "epoch": 1.3, + "grad_norm": 7.349701364023876, + "learning_rate": 5.778318186811135e-06, + "loss": 1.467, + "step": 108039 + }, + { + "epoch": 1.3, + "grad_norm": 14.318292643718504, + "learning_rate": 5.777788626384524e-06, + "loss": 1.1453, + "step": 108042 + }, + { + "epoch": 1.3, + "grad_norm": 7.4452697084151085, + "learning_rate": 5.777259080366801e-06, + "loss": 0.98, + "step": 108045 + }, + { + "epoch": 1.3, + "grad_norm": 20.95269183310229, + "learning_rate": 5.77672954875977e-06, + "loss": 1.5046, + "step": 108048 + }, + { + "epoch": 1.3, + "grad_norm": 7.749135888181394, + "learning_rate": 5.77620003156524e-06, + "loss": 1.1164, + "step": 108051 + }, + { + "epoch": 1.3, + "grad_norm": 2.7816341393550275, + "learning_rate": 5.775670528785017e-06, + "loss": 1.4823, + "step": 108054 + }, + { + "epoch": 1.3, + "grad_norm": 9.942296339577625, + "learning_rate": 5.775141040420913e-06, + "loss": 0.9658, + "step": 108057 + }, + { + "epoch": 1.3, + "grad_norm": 10.302303173872396, + "learning_rate": 5.77461156647473e-06, + "loss": 1.1086, + "step": 108060 + }, + { + "epoch": 1.3, + "grad_norm": 13.32878725586886, + "learning_rate": 5.774082106948273e-06, + "loss": 1.2757, + "step": 108063 + }, + { + "epoch": 1.3, + "grad_norm": 33.52176066430547, + "learning_rate": 5.773552661843351e-06, + "loss": 1.2515, + "step": 108066 + }, + { + "epoch": 1.3, + "grad_norm": 17.364608835813115, + "learning_rate": 5.773023231161772e-06, + "loss": 1.3562, + "step": 108069 + }, + { + "epoch": 1.3, + "grad_norm": 16.94970204587479, + "learning_rate": 5.772493814905345e-06, + "loss": 1.1367, + "step": 108072 + }, + { + "epoch": 1.3, + "grad_norm": 19.27796534166833, + "learning_rate": 5.771964413075871e-06, + "loss": 1.3691, + "step": 108075 + }, + { + "epoch": 1.3, + "grad_norm": 4.590268383135605, + "learning_rate": 5.77143502567516e-06, + "loss": 1.0162, + "step": 108078 + }, + { + "epoch": 1.3, + "grad_norm": 4.758559924450428, + "learning_rate": 5.770905652705021e-06, + "loss": 1.1932, + "step": 108081 + }, + { + "epoch": 1.3, + "grad_norm": 14.146468551902148, + "learning_rate": 5.770376294167254e-06, + "loss": 1.1546, + "step": 108084 + }, + { + "epoch": 1.3, + "grad_norm": 6.553412675184601, + "learning_rate": 5.769846950063674e-06, + "loss": 1.0449, + "step": 108087 + }, + { + "epoch": 1.3, + "grad_norm": 52.78165487228924, + "learning_rate": 5.769317620396077e-06, + "loss": 1.4155, + "step": 108090 + }, + { + "epoch": 1.3, + "grad_norm": 5.126754719367094, + "learning_rate": 5.768788305166275e-06, + "loss": 1.3462, + "step": 108093 + }, + { + "epoch": 1.3, + "grad_norm": 10.017447408172025, + "learning_rate": 5.768259004376075e-06, + "loss": 1.375, + "step": 108096 + }, + { + "epoch": 1.3, + "grad_norm": 14.632332981572587, + "learning_rate": 5.767729718027288e-06, + "loss": 1.4315, + "step": 108099 + }, + { + "epoch": 1.3, + "grad_norm": 27.054261511098588, + "learning_rate": 5.767200446121715e-06, + "loss": 1.1816, + "step": 108102 + }, + { + "epoch": 1.3, + "grad_norm": 4.700278654150956, + "learning_rate": 5.766671188661157e-06, + "loss": 1.3019, + "step": 108105 + }, + { + "epoch": 1.3, + "grad_norm": 10.74443119314784, + "learning_rate": 5.766141945647425e-06, + "loss": 1.2299, + "step": 108108 + }, + { + "epoch": 1.3, + "grad_norm": 12.758013109271685, + "learning_rate": 5.765612717082327e-06, + "loss": 1.6218, + "step": 108111 + }, + { + "epoch": 1.3, + "grad_norm": 12.227700554059167, + "learning_rate": 5.76508350296767e-06, + "loss": 1.406, + "step": 108114 + }, + { + "epoch": 1.3, + "grad_norm": 11.874442332745264, + "learning_rate": 5.764554303305254e-06, + "loss": 1.4663, + "step": 108117 + }, + { + "epoch": 1.3, + "grad_norm": 10.300069620274067, + "learning_rate": 5.76402511809689e-06, + "loss": 1.1729, + "step": 108120 + }, + { + "epoch": 1.3, + "grad_norm": 12.059877844956532, + "learning_rate": 5.7634959473443856e-06, + "loss": 1.2334, + "step": 108123 + }, + { + "epoch": 1.3, + "grad_norm": 9.46324149851761, + "learning_rate": 5.762966791049544e-06, + "loss": 1.1511, + "step": 108126 + }, + { + "epoch": 1.3, + "grad_norm": 20.473352979173388, + "learning_rate": 5.762437649214165e-06, + "loss": 1.1441, + "step": 108129 + }, + { + "epoch": 1.3, + "grad_norm": 7.745005375013283, + "learning_rate": 5.761908521840061e-06, + "loss": 1.4576, + "step": 108132 + }, + { + "epoch": 1.3, + "grad_norm": 11.904463652769392, + "learning_rate": 5.761379408929038e-06, + "loss": 1.4916, + "step": 108135 + }, + { + "epoch": 1.3, + "grad_norm": 11.102320096810145, + "learning_rate": 5.7608503104828974e-06, + "loss": 0.9791, + "step": 108138 + }, + { + "epoch": 1.3, + "grad_norm": 13.140634146128502, + "learning_rate": 5.760321226503454e-06, + "loss": 1.1444, + "step": 108141 + }, + { + "epoch": 1.3, + "grad_norm": 10.316358286691608, + "learning_rate": 5.759792156992506e-06, + "loss": 0.9531, + "step": 108144 + }, + { + "epoch": 1.3, + "grad_norm": 5.529190352923787, + "learning_rate": 5.759263101951857e-06, + "loss": 0.9244, + "step": 108147 + }, + { + "epoch": 1.3, + "grad_norm": 13.066663114705582, + "learning_rate": 5.758734061383316e-06, + "loss": 0.7701, + "step": 108150 + }, + { + "epoch": 1.3, + "grad_norm": 8.953472582583348, + "learning_rate": 5.75820503528869e-06, + "loss": 1.1943, + "step": 108153 + }, + { + "epoch": 1.3, + "grad_norm": 2.7344631510631086, + "learning_rate": 5.7576760236697785e-06, + "loss": 1.2091, + "step": 108156 + }, + { + "epoch": 1.3, + "grad_norm": 6.387330326754257, + "learning_rate": 5.7571470265283915e-06, + "loss": 1.4555, + "step": 108159 + }, + { + "epoch": 1.3, + "grad_norm": 8.383696476578583, + "learning_rate": 5.756618043866333e-06, + "loss": 1.1874, + "step": 108162 + }, + { + "epoch": 1.3, + "grad_norm": 4.921416756099704, + "learning_rate": 5.756089075685414e-06, + "loss": 1.0001, + "step": 108165 + }, + { + "epoch": 1.3, + "grad_norm": 5.210274876120754, + "learning_rate": 5.755560121987434e-06, + "loss": 1.3355, + "step": 108168 + }, + { + "epoch": 1.3, + "grad_norm": 12.415086235016267, + "learning_rate": 5.755031182774193e-06, + "loss": 1.0629, + "step": 108171 + }, + { + "epoch": 1.3, + "grad_norm": 12.85518646835301, + "learning_rate": 5.754502258047503e-06, + "loss": 0.9564, + "step": 108174 + }, + { + "epoch": 1.3, + "grad_norm": 31.01382869871176, + "learning_rate": 5.753973347809167e-06, + "loss": 1.0284, + "step": 108177 + }, + { + "epoch": 1.3, + "grad_norm": 4.121428741518417, + "learning_rate": 5.7534444520609946e-06, + "loss": 1.3052, + "step": 108180 + }, + { + "epoch": 1.3, + "grad_norm": 18.266235986315376, + "learning_rate": 5.752915570804782e-06, + "loss": 1.5051, + "step": 108183 + }, + { + "epoch": 1.3, + "grad_norm": 14.878559816507865, + "learning_rate": 5.752386704042343e-06, + "loss": 1.1196, + "step": 108186 + }, + { + "epoch": 1.3, + "grad_norm": 8.99670345769935, + "learning_rate": 5.751857851775475e-06, + "loss": 1.2002, + "step": 108189 + }, + { + "epoch": 1.3, + "grad_norm": 11.777534150848457, + "learning_rate": 5.751329014005985e-06, + "loss": 1.6202, + "step": 108192 + }, + { + "epoch": 1.3, + "grad_norm": 4.715265955386366, + "learning_rate": 5.750800190735684e-06, + "loss": 0.9193, + "step": 108195 + }, + { + "epoch": 1.3, + "grad_norm": 15.944630186276182, + "learning_rate": 5.7502713819663656e-06, + "loss": 1.2537, + "step": 108198 + }, + { + "epoch": 1.3, + "grad_norm": 8.54169688484992, + "learning_rate": 5.749742587699841e-06, + "loss": 1.3077, + "step": 108201 + }, + { + "epoch": 1.3, + "grad_norm": 21.253236456295255, + "learning_rate": 5.749213807937914e-06, + "loss": 1.0667, + "step": 108204 + }, + { + "epoch": 1.3, + "grad_norm": 3.1335289999492977, + "learning_rate": 5.748685042682394e-06, + "loss": 1.3155, + "step": 108207 + }, + { + "epoch": 1.3, + "grad_norm": 9.339572402559, + "learning_rate": 5.7481562919350806e-06, + "loss": 1.4311, + "step": 108210 + }, + { + "epoch": 1.3, + "grad_norm": 34.34464520441354, + "learning_rate": 5.747627555697774e-06, + "loss": 1.1973, + "step": 108213 + }, + { + "epoch": 1.3, + "grad_norm": 11.635820004090252, + "learning_rate": 5.747098833972282e-06, + "loss": 1.3392, + "step": 108216 + }, + { + "epoch": 1.3, + "grad_norm": 11.944794570137496, + "learning_rate": 5.74657012676041e-06, + "loss": 0.8012, + "step": 108219 + }, + { + "epoch": 1.3, + "grad_norm": 12.2875451258218, + "learning_rate": 5.746041434063967e-06, + "loss": 1.1174, + "step": 108222 + }, + { + "epoch": 1.3, + "grad_norm": 7.757412823336065, + "learning_rate": 5.745512755884748e-06, + "loss": 1.0372, + "step": 108225 + }, + { + "epoch": 1.3, + "grad_norm": 24.77601172904791, + "learning_rate": 5.744984092224565e-06, + "loss": 0.8729, + "step": 108228 + }, + { + "epoch": 1.3, + "grad_norm": 9.50423991654581, + "learning_rate": 5.744455443085215e-06, + "loss": 1.2329, + "step": 108231 + }, + { + "epoch": 1.3, + "grad_norm": 10.904805205166623, + "learning_rate": 5.743926808468509e-06, + "loss": 1.0764, + "step": 108234 + }, + { + "epoch": 1.3, + "grad_norm": 8.199052395909662, + "learning_rate": 5.743398188376245e-06, + "loss": 1.1325, + "step": 108237 + }, + { + "epoch": 1.3, + "grad_norm": 12.89052243104211, + "learning_rate": 5.742869582810229e-06, + "loss": 0.9423, + "step": 108240 + }, + { + "epoch": 1.3, + "grad_norm": 5.284781016051059, + "learning_rate": 5.742340991772266e-06, + "loss": 1.1393, + "step": 108243 + }, + { + "epoch": 1.3, + "grad_norm": 12.225992702581006, + "learning_rate": 5.741812415264159e-06, + "loss": 1.2725, + "step": 108246 + }, + { + "epoch": 1.3, + "grad_norm": 11.770303921550177, + "learning_rate": 5.741283853287717e-06, + "loss": 1.1964, + "step": 108249 + }, + { + "epoch": 1.3, + "grad_norm": 9.432909613660062, + "learning_rate": 5.7407553058447385e-06, + "loss": 0.9251, + "step": 108252 + }, + { + "epoch": 1.3, + "grad_norm": 6.492686290178801, + "learning_rate": 5.740226772937025e-06, + "loss": 1.3226, + "step": 108255 + }, + { + "epoch": 1.3, + "grad_norm": 4.855261878794914, + "learning_rate": 5.739698254566382e-06, + "loss": 1.2731, + "step": 108258 + }, + { + "epoch": 1.3, + "grad_norm": 4.079552736357933, + "learning_rate": 5.73916975073462e-06, + "loss": 1.0803, + "step": 108261 + }, + { + "epoch": 1.3, + "grad_norm": 9.590096148314103, + "learning_rate": 5.738641261443532e-06, + "loss": 1.1674, + "step": 108264 + }, + { + "epoch": 1.3, + "grad_norm": 7.406338265544732, + "learning_rate": 5.738112786694927e-06, + "loss": 1.1302, + "step": 108267 + }, + { + "epoch": 1.3, + "grad_norm": 5.972367729128023, + "learning_rate": 5.737584326490611e-06, + "loss": 1.2022, + "step": 108270 + }, + { + "epoch": 1.3, + "grad_norm": 5.990386355774354, + "learning_rate": 5.737055880832381e-06, + "loss": 1.3902, + "step": 108273 + }, + { + "epoch": 1.3, + "grad_norm": 31.796677658281745, + "learning_rate": 5.736527449722047e-06, + "loss": 1.3494, + "step": 108276 + }, + { + "epoch": 1.3, + "grad_norm": 6.883582470994366, + "learning_rate": 5.7359990331614055e-06, + "loss": 0.8905, + "step": 108279 + }, + { + "epoch": 1.3, + "grad_norm": 4.780304073032642, + "learning_rate": 5.735470631152264e-06, + "loss": 0.9971, + "step": 108282 + }, + { + "epoch": 1.3, + "grad_norm": 15.023448621017781, + "learning_rate": 5.7349422436964245e-06, + "loss": 0.8218, + "step": 108285 + }, + { + "epoch": 1.3, + "grad_norm": 17.240705252266515, + "learning_rate": 5.7344138707956945e-06, + "loss": 1.0839, + "step": 108288 + }, + { + "epoch": 1.3, + "grad_norm": 6.451900604087444, + "learning_rate": 5.73388551245187e-06, + "loss": 1.3939, + "step": 108291 + }, + { + "epoch": 1.3, + "grad_norm": 10.746345098239722, + "learning_rate": 5.733357168666762e-06, + "loss": 1.4487, + "step": 108294 + }, + { + "epoch": 1.3, + "grad_norm": 23.410245015601053, + "learning_rate": 5.7328288394421636e-06, + "loss": 1.3423, + "step": 108297 + }, + { + "epoch": 1.3, + "grad_norm": 10.061068119886892, + "learning_rate": 5.732300524779886e-06, + "loss": 1.5176, + "step": 108300 + }, + { + "epoch": 1.3, + "grad_norm": 6.538290304384563, + "learning_rate": 5.731772224681731e-06, + "loss": 1.341, + "step": 108303 + }, + { + "epoch": 1.3, + "grad_norm": 6.661369854546284, + "learning_rate": 5.731243939149498e-06, + "loss": 1.1509, + "step": 108306 + }, + { + "epoch": 1.3, + "grad_norm": 9.418136210326296, + "learning_rate": 5.73071566818499e-06, + "loss": 0.9836, + "step": 108309 + }, + { + "epoch": 1.3, + "grad_norm": 5.638428675483102, + "learning_rate": 5.730187411790017e-06, + "loss": 0.8545, + "step": 108312 + }, + { + "epoch": 1.3, + "grad_norm": 4.541142073759627, + "learning_rate": 5.729659169966371e-06, + "loss": 0.9635, + "step": 108315 + }, + { + "epoch": 1.3, + "grad_norm": 7.939581885309135, + "learning_rate": 5.729130942715866e-06, + "loss": 1.3019, + "step": 108318 + }, + { + "epoch": 1.3, + "grad_norm": 38.6762317230518, + "learning_rate": 5.728602730040293e-06, + "loss": 1.331, + "step": 108321 + }, + { + "epoch": 1.3, + "grad_norm": 22.12954467618636, + "learning_rate": 5.728074531941461e-06, + "loss": 1.2249, + "step": 108324 + }, + { + "epoch": 1.3, + "grad_norm": 13.54568550311025, + "learning_rate": 5.727546348421172e-06, + "loss": 0.9553, + "step": 108327 + }, + { + "epoch": 1.3, + "grad_norm": 6.576844645982318, + "learning_rate": 5.727018179481232e-06, + "loss": 1.4657, + "step": 108330 + }, + { + "epoch": 1.3, + "grad_norm": 10.015768018067387, + "learning_rate": 5.726490025123437e-06, + "loss": 1.1995, + "step": 108333 + }, + { + "epoch": 1.3, + "grad_norm": 3.96087340785421, + "learning_rate": 5.725961885349595e-06, + "loss": 1.3127, + "step": 108336 + }, + { + "epoch": 1.3, + "grad_norm": 5.393939036521107, + "learning_rate": 5.725433760161502e-06, + "loss": 0.8147, + "step": 108339 + }, + { + "epoch": 1.3, + "grad_norm": 4.982136412180259, + "learning_rate": 5.7249056495609676e-06, + "loss": 1.4461, + "step": 108342 + }, + { + "epoch": 1.3, + "grad_norm": 7.49394507370245, + "learning_rate": 5.724377553549787e-06, + "loss": 1.2266, + "step": 108345 + }, + { + "epoch": 1.3, + "grad_norm": 8.046095440578933, + "learning_rate": 5.723849472129765e-06, + "loss": 1.0727, + "step": 108348 + }, + { + "epoch": 1.3, + "grad_norm": 5.693021003972713, + "learning_rate": 5.7233214053027045e-06, + "loss": 1.0429, + "step": 108351 + }, + { + "epoch": 1.3, + "grad_norm": 3.443559393431074, + "learning_rate": 5.722793353070408e-06, + "loss": 1.1473, + "step": 108354 + }, + { + "epoch": 1.3, + "grad_norm": 12.76146524349597, + "learning_rate": 5.722265315434681e-06, + "loss": 1.0905, + "step": 108357 + }, + { + "epoch": 1.3, + "grad_norm": 12.06653231078487, + "learning_rate": 5.721737292397321e-06, + "loss": 1.184, + "step": 108360 + }, + { + "epoch": 1.3, + "grad_norm": 9.69140626457185, + "learning_rate": 5.721209283960127e-06, + "loss": 0.8633, + "step": 108363 + }, + { + "epoch": 1.3, + "grad_norm": 12.115393486142189, + "learning_rate": 5.720681290124905e-06, + "loss": 1.2109, + "step": 108366 + }, + { + "epoch": 1.3, + "grad_norm": 32.539135531423234, + "learning_rate": 5.720153310893461e-06, + "loss": 1.0713, + "step": 108369 + }, + { + "epoch": 1.3, + "grad_norm": 21.440244350563873, + "learning_rate": 5.719625346267587e-06, + "loss": 1.2714, + "step": 108372 + }, + { + "epoch": 1.3, + "grad_norm": 11.302420789316024, + "learning_rate": 5.719097396249089e-06, + "loss": 1.2188, + "step": 108375 + }, + { + "epoch": 1.3, + "grad_norm": 8.955170394516017, + "learning_rate": 5.718569460839776e-06, + "loss": 1.2478, + "step": 108378 + }, + { + "epoch": 1.3, + "grad_norm": 13.119378714558705, + "learning_rate": 5.7180415400414365e-06, + "loss": 1.2716, + "step": 108381 + }, + { + "epoch": 1.3, + "grad_norm": 20.906639506592917, + "learning_rate": 5.717513633855886e-06, + "loss": 1.512, + "step": 108384 + }, + { + "epoch": 1.3, + "grad_norm": 7.438428092508609, + "learning_rate": 5.716985742284913e-06, + "loss": 1.1954, + "step": 108387 + }, + { + "epoch": 1.3, + "grad_norm": 8.075870285874252, + "learning_rate": 5.716457865330325e-06, + "loss": 1.1062, + "step": 108390 + }, + { + "epoch": 1.3, + "grad_norm": 11.537625646421766, + "learning_rate": 5.715930002993924e-06, + "loss": 1.2261, + "step": 108393 + }, + { + "epoch": 1.3, + "grad_norm": 6.610021906533096, + "learning_rate": 5.715402155277514e-06, + "loss": 1.4071, + "step": 108396 + }, + { + "epoch": 1.3, + "grad_norm": 11.292230597944222, + "learning_rate": 5.71487432218289e-06, + "loss": 1.1642, + "step": 108399 + }, + { + "epoch": 1.3, + "grad_norm": 6.0390686196173355, + "learning_rate": 5.7143465037118595e-06, + "loss": 1.1347, + "step": 108402 + }, + { + "epoch": 1.3, + "grad_norm": 7.012500913357455, + "learning_rate": 5.713818699866218e-06, + "loss": 1.1762, + "step": 108405 + }, + { + "epoch": 1.3, + "grad_norm": 98.93274304047286, + "learning_rate": 5.713290910647769e-06, + "loss": 1.3514, + "step": 108408 + }, + { + "epoch": 1.3, + "grad_norm": 4.197346104460857, + "learning_rate": 5.712763136058317e-06, + "loss": 0.9307, + "step": 108411 + }, + { + "epoch": 1.3, + "grad_norm": 12.438981421240417, + "learning_rate": 5.7122353760996565e-06, + "loss": 1.0276, + "step": 108414 + }, + { + "epoch": 1.3, + "grad_norm": 88.65927209500195, + "learning_rate": 5.711707630773593e-06, + "loss": 1.4161, + "step": 108417 + }, + { + "epoch": 1.3, + "grad_norm": 3.3571999475627052, + "learning_rate": 5.711179900081929e-06, + "loss": 1.4075, + "step": 108420 + }, + { + "epoch": 1.3, + "grad_norm": 2.539501081237949, + "learning_rate": 5.710652184026465e-06, + "loss": 1.2681, + "step": 108423 + }, + { + "epoch": 1.3, + "grad_norm": 3.0596541776605077, + "learning_rate": 5.710124482608997e-06, + "loss": 1.2424, + "step": 108426 + }, + { + "epoch": 1.3, + "grad_norm": 9.127215367694005, + "learning_rate": 5.709596795831326e-06, + "loss": 1.0053, + "step": 108429 + }, + { + "epoch": 1.3, + "grad_norm": 4.18493007535653, + "learning_rate": 5.7090691236952565e-06, + "loss": 1.2463, + "step": 108432 + }, + { + "epoch": 1.3, + "grad_norm": 6.018421551104539, + "learning_rate": 5.708541466202591e-06, + "loss": 1.075, + "step": 108435 + }, + { + "epoch": 1.3, + "grad_norm": 3.945262558681431, + "learning_rate": 5.70801382335513e-06, + "loss": 0.8461, + "step": 108438 + }, + { + "epoch": 1.3, + "grad_norm": 1.943278096899775, + "learning_rate": 5.7074861951546665e-06, + "loss": 0.8544, + "step": 108441 + }, + { + "epoch": 1.3, + "grad_norm": 13.167178344409171, + "learning_rate": 5.706958581603011e-06, + "loss": 1.5341, + "step": 108444 + }, + { + "epoch": 1.3, + "grad_norm": 9.477801374624306, + "learning_rate": 5.7064309827019574e-06, + "loss": 1.0991, + "step": 108447 + }, + { + "epoch": 1.3, + "grad_norm": 13.06773367393434, + "learning_rate": 5.7059033984533106e-06, + "loss": 1.2288, + "step": 108450 + }, + { + "epoch": 1.3, + "grad_norm": 17.441197914229065, + "learning_rate": 5.705375828858866e-06, + "loss": 1.01, + "step": 108453 + }, + { + "epoch": 1.3, + "grad_norm": 9.679897949197006, + "learning_rate": 5.704848273920425e-06, + "loss": 1.3657, + "step": 108456 + }, + { + "epoch": 1.3, + "grad_norm": 8.756832434205016, + "learning_rate": 5.704320733639791e-06, + "loss": 1.2023, + "step": 108459 + }, + { + "epoch": 1.3, + "grad_norm": 7.1285243892641095, + "learning_rate": 5.703793208018766e-06, + "loss": 1.1754, + "step": 108462 + }, + { + "epoch": 1.3, + "grad_norm": 16.54500651677069, + "learning_rate": 5.70326569705915e-06, + "loss": 1.1253, + "step": 108465 + }, + { + "epoch": 1.3, + "grad_norm": 3.9907330080972203, + "learning_rate": 5.702738200762734e-06, + "loss": 1.3774, + "step": 108468 + }, + { + "epoch": 1.3, + "grad_norm": 7.455932924475877, + "learning_rate": 5.702210719131325e-06, + "loss": 1.0476, + "step": 108471 + }, + { + "epoch": 1.3, + "grad_norm": 8.862784032716748, + "learning_rate": 5.701683252166723e-06, + "loss": 1.0514, + "step": 108474 + }, + { + "epoch": 1.3, + "grad_norm": 8.70158512068166, + "learning_rate": 5.701155799870732e-06, + "loss": 0.7913, + "step": 108477 + }, + { + "epoch": 1.3, + "grad_norm": 13.811101435618284, + "learning_rate": 5.700628362245144e-06, + "loss": 1.2376, + "step": 108480 + }, + { + "epoch": 1.3, + "grad_norm": 16.093079796844, + "learning_rate": 5.700100939291761e-06, + "loss": 0.9887, + "step": 108483 + }, + { + "epoch": 1.3, + "grad_norm": 4.988993912302875, + "learning_rate": 5.6995735310123915e-06, + "loss": 1.2979, + "step": 108486 + }, + { + "epoch": 1.3, + "grad_norm": 10.023584506721877, + "learning_rate": 5.699046137408824e-06, + "loss": 1.3104, + "step": 108489 + }, + { + "epoch": 1.3, + "grad_norm": 16.884055599819927, + "learning_rate": 5.698518758482865e-06, + "loss": 0.7642, + "step": 108492 + }, + { + "epoch": 1.3, + "grad_norm": 3.928083212374762, + "learning_rate": 5.697991394236309e-06, + "loss": 1.1128, + "step": 108495 + }, + { + "epoch": 1.3, + "grad_norm": 12.999289541177713, + "learning_rate": 5.69746404467096e-06, + "loss": 1.0011, + "step": 108498 + }, + { + "epoch": 1.3, + "grad_norm": 7.521378377995353, + "learning_rate": 5.6969367097886145e-06, + "loss": 1.295, + "step": 108501 + }, + { + "epoch": 1.3, + "grad_norm": 4.601183080080133, + "learning_rate": 5.6964093895910785e-06, + "loss": 1.0373, + "step": 108504 + }, + { + "epoch": 1.3, + "grad_norm": 27.130905931347204, + "learning_rate": 5.695882084080148e-06, + "loss": 1.3057, + "step": 108507 + }, + { + "epoch": 1.3, + "grad_norm": 7.028873697444966, + "learning_rate": 5.695354793257617e-06, + "loss": 1.3447, + "step": 108510 + }, + { + "epoch": 1.3, + "grad_norm": 8.912011466747545, + "learning_rate": 5.69482751712529e-06, + "loss": 0.9485, + "step": 108513 + }, + { + "epoch": 1.3, + "grad_norm": 15.637139779790546, + "learning_rate": 5.6943002556849655e-06, + "loss": 0.9274, + "step": 108516 + }, + { + "epoch": 1.3, + "grad_norm": 5.301256565402014, + "learning_rate": 5.693773008938448e-06, + "loss": 1.1379, + "step": 108519 + }, + { + "epoch": 1.3, + "grad_norm": 8.985168650174275, + "learning_rate": 5.693245776887527e-06, + "loss": 1.068, + "step": 108522 + }, + { + "epoch": 1.3, + "grad_norm": 14.459356059777692, + "learning_rate": 5.6927185595340075e-06, + "loss": 1.2469, + "step": 108525 + }, + { + "epoch": 1.31, + "grad_norm": 19.631013636201523, + "learning_rate": 5.692191356879693e-06, + "loss": 1.4586, + "step": 108528 + }, + { + "epoch": 1.31, + "grad_norm": 11.621769442864643, + "learning_rate": 5.691664168926376e-06, + "loss": 0.9448, + "step": 108531 + }, + { + "epoch": 1.31, + "grad_norm": 3.630055821368703, + "learning_rate": 5.6911369956758544e-06, + "loss": 1.1174, + "step": 108534 + }, + { + "epoch": 1.31, + "grad_norm": 2.686100451813704, + "learning_rate": 5.69060983712993e-06, + "loss": 0.9949, + "step": 108537 + }, + { + "epoch": 1.31, + "grad_norm": 4.384674818374589, + "learning_rate": 5.690082693290402e-06, + "loss": 1.1837, + "step": 108540 + }, + { + "epoch": 1.31, + "grad_norm": 8.783090602162263, + "learning_rate": 5.689555564159068e-06, + "loss": 1.1392, + "step": 108543 + }, + { + "epoch": 1.31, + "grad_norm": 2.820672449445196, + "learning_rate": 5.689028449737735e-06, + "loss": 1.2463, + "step": 108546 + }, + { + "epoch": 1.31, + "grad_norm": 4.03040687231986, + "learning_rate": 5.6885013500281925e-06, + "loss": 1.3922, + "step": 108549 + }, + { + "epoch": 1.31, + "grad_norm": 5.059796867090821, + "learning_rate": 5.687974265032239e-06, + "loss": 1.3636, + "step": 108552 + }, + { + "epoch": 1.31, + "grad_norm": 10.310245724438689, + "learning_rate": 5.687447194751676e-06, + "loss": 1.2427, + "step": 108555 + }, + { + "epoch": 1.31, + "grad_norm": 5.393983265639359, + "learning_rate": 5.6869201391883065e-06, + "loss": 1.1815, + "step": 108558 + }, + { + "epoch": 1.31, + "grad_norm": 16.63445735351684, + "learning_rate": 5.686393098343921e-06, + "loss": 1.3205, + "step": 108561 + }, + { + "epoch": 1.31, + "grad_norm": 16.559248106377776, + "learning_rate": 5.685866072220323e-06, + "loss": 1.0123, + "step": 108564 + }, + { + "epoch": 1.31, + "grad_norm": 21.86902259543483, + "learning_rate": 5.685339060819308e-06, + "loss": 1.1879, + "step": 108567 + }, + { + "epoch": 1.31, + "grad_norm": 8.71289629218145, + "learning_rate": 5.684812064142682e-06, + "loss": 0.9513, + "step": 108570 + }, + { + "epoch": 1.31, + "grad_norm": 12.277418539432452, + "learning_rate": 5.684285082192238e-06, + "loss": 1.1895, + "step": 108573 + }, + { + "epoch": 1.31, + "grad_norm": 5.918050209684352, + "learning_rate": 5.6837581149697706e-06, + "loss": 1.2843, + "step": 108576 + }, + { + "epoch": 1.31, + "grad_norm": 22.22435240359438, + "learning_rate": 5.683231162477082e-06, + "loss": 1.0442, + "step": 108579 + }, + { + "epoch": 1.31, + "grad_norm": 8.414733224304051, + "learning_rate": 5.6827042247159714e-06, + "loss": 0.9055, + "step": 108582 + }, + { + "epoch": 1.31, + "grad_norm": 3.6560215088998067, + "learning_rate": 5.682177301688239e-06, + "loss": 1.2971, + "step": 108585 + }, + { + "epoch": 1.31, + "grad_norm": 12.609434164136728, + "learning_rate": 5.681650393395677e-06, + "loss": 1.0989, + "step": 108588 + }, + { + "epoch": 1.31, + "grad_norm": 8.586792046498577, + "learning_rate": 5.681123499840085e-06, + "loss": 1.2487, + "step": 108591 + }, + { + "epoch": 1.31, + "grad_norm": 3.8071281861797126, + "learning_rate": 5.680596621023269e-06, + "loss": 1.2935, + "step": 108594 + }, + { + "epoch": 1.31, + "grad_norm": 100.08795515038051, + "learning_rate": 5.680069756947015e-06, + "loss": 0.9802, + "step": 108597 + }, + { + "epoch": 1.31, + "grad_norm": 9.980483944515003, + "learning_rate": 5.679542907613131e-06, + "loss": 1.0535, + "step": 108600 + }, + { + "epoch": 1.31, + "grad_norm": 7.78734785576107, + "learning_rate": 5.679016073023409e-06, + "loss": 1.1868, + "step": 108603 + }, + { + "epoch": 1.31, + "grad_norm": 22.898823816547168, + "learning_rate": 5.678489253179646e-06, + "loss": 0.8362, + "step": 108606 + }, + { + "epoch": 1.31, + "grad_norm": 8.667125633395814, + "learning_rate": 5.677962448083645e-06, + "loss": 1.3003, + "step": 108609 + }, + { + "epoch": 1.31, + "grad_norm": 7.789247462544745, + "learning_rate": 5.677435657737205e-06, + "loss": 1.175, + "step": 108612 + }, + { + "epoch": 1.31, + "grad_norm": 24.456533270975847, + "learning_rate": 5.676908882142119e-06, + "loss": 1.7081, + "step": 108615 + }, + { + "epoch": 1.31, + "grad_norm": 5.95820372716178, + "learning_rate": 5.676382121300183e-06, + "loss": 0.9853, + "step": 108618 + }, + { + "epoch": 1.31, + "grad_norm": 9.774471933348059, + "learning_rate": 5.675855375213196e-06, + "loss": 0.9964, + "step": 108621 + }, + { + "epoch": 1.31, + "grad_norm": 6.458988766269565, + "learning_rate": 5.675328643882958e-06, + "loss": 1.6359, + "step": 108624 + }, + { + "epoch": 1.31, + "grad_norm": 9.839626910355129, + "learning_rate": 5.674801927311269e-06, + "loss": 1.0861, + "step": 108627 + }, + { + "epoch": 1.31, + "grad_norm": 5.516378415846338, + "learning_rate": 5.67427522549992e-06, + "loss": 1.2356, + "step": 108630 + }, + { + "epoch": 1.31, + "grad_norm": 10.123208655141463, + "learning_rate": 5.673748538450712e-06, + "loss": 1.4608, + "step": 108633 + }, + { + "epoch": 1.31, + "grad_norm": 5.2657760286220405, + "learning_rate": 5.673221866165444e-06, + "loss": 1.005, + "step": 108636 + }, + { + "epoch": 1.31, + "grad_norm": 14.35773272517171, + "learning_rate": 5.672695208645913e-06, + "loss": 1.2526, + "step": 108639 + }, + { + "epoch": 1.31, + "grad_norm": 4.317015375864443, + "learning_rate": 5.672168565893909e-06, + "loss": 1.0041, + "step": 108642 + }, + { + "epoch": 1.31, + "grad_norm": 21.45096058709898, + "learning_rate": 5.671641937911236e-06, + "loss": 0.9497, + "step": 108645 + }, + { + "epoch": 1.31, + "grad_norm": 15.099089080565333, + "learning_rate": 5.67111532469969e-06, + "loss": 1.3666, + "step": 108648 + }, + { + "epoch": 1.31, + "grad_norm": 36.73146682227362, + "learning_rate": 5.670588726261067e-06, + "loss": 1.1076, + "step": 108651 + }, + { + "epoch": 1.31, + "grad_norm": 3.8622299853840802, + "learning_rate": 5.67006214259717e-06, + "loss": 1.1539, + "step": 108654 + }, + { + "epoch": 1.31, + "grad_norm": 6.57064824479297, + "learning_rate": 5.6695355737097914e-06, + "loss": 1.2468, + "step": 108657 + }, + { + "epoch": 1.31, + "grad_norm": 8.3275934742981, + "learning_rate": 5.669009019600724e-06, + "loss": 1.2326, + "step": 108660 + }, + { + "epoch": 1.31, + "grad_norm": 46.36601320928848, + "learning_rate": 5.668482480271769e-06, + "loss": 1.3569, + "step": 108663 + }, + { + "epoch": 1.31, + "grad_norm": 6.302600374247268, + "learning_rate": 5.667955955724727e-06, + "loss": 1.1221, + "step": 108666 + }, + { + "epoch": 1.31, + "grad_norm": 42.26821276679666, + "learning_rate": 5.667429445961385e-06, + "loss": 1.1132, + "step": 108669 + }, + { + "epoch": 1.31, + "grad_norm": 14.75635358548014, + "learning_rate": 5.666902950983548e-06, + "loss": 1.2894, + "step": 108672 + }, + { + "epoch": 1.31, + "grad_norm": 15.997626263486557, + "learning_rate": 5.666376470793011e-06, + "loss": 1.3468, + "step": 108675 + }, + { + "epoch": 1.31, + "grad_norm": 7.240617463674843, + "learning_rate": 5.665850005391573e-06, + "loss": 0.9895, + "step": 108678 + }, + { + "epoch": 1.31, + "grad_norm": 6.7366325198005175, + "learning_rate": 5.665323554781028e-06, + "loss": 1.1027, + "step": 108681 + }, + { + "epoch": 1.31, + "grad_norm": 10.219834749137396, + "learning_rate": 5.664797118963168e-06, + "loss": 1.8399, + "step": 108684 + }, + { + "epoch": 1.31, + "grad_norm": 15.273298890044934, + "learning_rate": 5.664270697939793e-06, + "loss": 0.9294, + "step": 108687 + }, + { + "epoch": 1.31, + "grad_norm": 5.8103781635715706, + "learning_rate": 5.663744291712703e-06, + "loss": 1.0952, + "step": 108690 + }, + { + "epoch": 1.31, + "grad_norm": 11.242302973479271, + "learning_rate": 5.663217900283693e-06, + "loss": 1.0314, + "step": 108693 + }, + { + "epoch": 1.31, + "grad_norm": 5.2246609514811295, + "learning_rate": 5.662691523654556e-06, + "loss": 1.1663, + "step": 108696 + }, + { + "epoch": 1.31, + "grad_norm": 13.826194444761173, + "learning_rate": 5.662165161827094e-06, + "loss": 1.3085, + "step": 108699 + }, + { + "epoch": 1.31, + "grad_norm": 20.226255021484654, + "learning_rate": 5.661638814803096e-06, + "loss": 1.1896, + "step": 108702 + }, + { + "epoch": 1.31, + "grad_norm": 7.8142053251028925, + "learning_rate": 5.661112482584361e-06, + "loss": 0.9859, + "step": 108705 + }, + { + "epoch": 1.31, + "grad_norm": 9.904205258739287, + "learning_rate": 5.660586165172692e-06, + "loss": 1.1609, + "step": 108708 + }, + { + "epoch": 1.31, + "grad_norm": 3.82397304093727, + "learning_rate": 5.660059862569874e-06, + "loss": 1.0887, + "step": 108711 + }, + { + "epoch": 1.31, + "grad_norm": 3.891609251937291, + "learning_rate": 5.659533574777709e-06, + "loss": 1.2717, + "step": 108714 + }, + { + "epoch": 1.31, + "grad_norm": 11.78862875468345, + "learning_rate": 5.659007301797993e-06, + "loss": 1.2074, + "step": 108717 + }, + { + "epoch": 1.31, + "grad_norm": 9.989062428280574, + "learning_rate": 5.6584810436325245e-06, + "loss": 1.1402, + "step": 108720 + }, + { + "epoch": 1.31, + "grad_norm": 9.122867221660785, + "learning_rate": 5.6579548002830964e-06, + "loss": 0.953, + "step": 108723 + }, + { + "epoch": 1.31, + "grad_norm": 6.498441567023025, + "learning_rate": 5.657428571751502e-06, + "loss": 1.2538, + "step": 108726 + }, + { + "epoch": 1.31, + "grad_norm": 9.488288062605413, + "learning_rate": 5.656902358039538e-06, + "loss": 0.8754, + "step": 108729 + }, + { + "epoch": 1.31, + "grad_norm": 5.632033542516013, + "learning_rate": 5.656376159149003e-06, + "loss": 1.0404, + "step": 108732 + }, + { + "epoch": 1.31, + "grad_norm": 12.900791600435605, + "learning_rate": 5.655849975081695e-06, + "loss": 1.2403, + "step": 108735 + }, + { + "epoch": 1.31, + "grad_norm": 25.32948311735507, + "learning_rate": 5.6553238058394034e-06, + "loss": 1.1412, + "step": 108738 + }, + { + "epoch": 1.31, + "grad_norm": 39.37417029215169, + "learning_rate": 5.654797651423929e-06, + "loss": 1.2866, + "step": 108741 + }, + { + "epoch": 1.31, + "grad_norm": 6.734531415692221, + "learning_rate": 5.654271511837062e-06, + "loss": 1.3588, + "step": 108744 + }, + { + "epoch": 1.31, + "grad_norm": 11.72425188831095, + "learning_rate": 5.653745387080605e-06, + "loss": 1.1553, + "step": 108747 + }, + { + "epoch": 1.31, + "grad_norm": 6.623068400541649, + "learning_rate": 5.653219277156345e-06, + "loss": 1.4767, + "step": 108750 + }, + { + "epoch": 1.31, + "grad_norm": 10.483990586286028, + "learning_rate": 5.652693182066082e-06, + "loss": 1.3352, + "step": 108753 + }, + { + "epoch": 1.31, + "grad_norm": 6.201656859278815, + "learning_rate": 5.652167101811612e-06, + "loss": 1.1503, + "step": 108756 + }, + { + "epoch": 1.31, + "grad_norm": 18.093436421005237, + "learning_rate": 5.651641036394728e-06, + "loss": 1.2246, + "step": 108759 + }, + { + "epoch": 1.31, + "grad_norm": 4.209135515899039, + "learning_rate": 5.651114985817232e-06, + "loss": 1.2212, + "step": 108762 + }, + { + "epoch": 1.31, + "grad_norm": 10.33354823106063, + "learning_rate": 5.650588950080915e-06, + "loss": 1.2245, + "step": 108765 + }, + { + "epoch": 1.31, + "grad_norm": 30.03434219001215, + "learning_rate": 5.650062929187565e-06, + "loss": 1.2774, + "step": 108768 + }, + { + "epoch": 1.31, + "grad_norm": 14.790470548371147, + "learning_rate": 5.6495369231389855e-06, + "loss": 1.2566, + "step": 108771 + }, + { + "epoch": 1.31, + "grad_norm": 24.106975329079063, + "learning_rate": 5.649010931936974e-06, + "loss": 0.6617, + "step": 108774 + }, + { + "epoch": 1.31, + "grad_norm": 19.669555264998642, + "learning_rate": 5.648484955583314e-06, + "loss": 1.3178, + "step": 108777 + }, + { + "epoch": 1.31, + "grad_norm": 54.01585790961907, + "learning_rate": 5.647958994079809e-06, + "loss": 0.9714, + "step": 108780 + }, + { + "epoch": 1.31, + "grad_norm": 8.29960168083936, + "learning_rate": 5.647433047428257e-06, + "loss": 1.2695, + "step": 108783 + }, + { + "epoch": 1.31, + "grad_norm": 10.348894410903569, + "learning_rate": 5.646907115630443e-06, + "loss": 1.1638, + "step": 108786 + }, + { + "epoch": 1.31, + "grad_norm": 27.65500351651785, + "learning_rate": 5.646381198688172e-06, + "loss": 1.3363, + "step": 108789 + }, + { + "epoch": 1.31, + "grad_norm": 7.82808788629279, + "learning_rate": 5.645855296603229e-06, + "loss": 1.2359, + "step": 108792 + }, + { + "epoch": 1.31, + "grad_norm": 8.137430569514194, + "learning_rate": 5.645329409377414e-06, + "loss": 1.0808, + "step": 108795 + }, + { + "epoch": 1.31, + "grad_norm": 17.955355765773632, + "learning_rate": 5.644803537012522e-06, + "loss": 1.4884, + "step": 108798 + }, + { + "epoch": 1.31, + "grad_norm": 13.492645086726984, + "learning_rate": 5.64427767951035e-06, + "loss": 0.985, + "step": 108801 + }, + { + "epoch": 1.31, + "grad_norm": 18.917107164475183, + "learning_rate": 5.643751836872685e-06, + "loss": 1.0895, + "step": 108804 + }, + { + "epoch": 1.31, + "grad_norm": 8.85161352239473, + "learning_rate": 5.64322600910133e-06, + "loss": 1.2247, + "step": 108807 + }, + { + "epoch": 1.31, + "grad_norm": 10.644839865468427, + "learning_rate": 5.642700196198072e-06, + "loss": 1.08, + "step": 108810 + }, + { + "epoch": 1.31, + "grad_norm": 3.3191724975322536, + "learning_rate": 5.642174398164708e-06, + "loss": 0.8731, + "step": 108813 + }, + { + "epoch": 1.31, + "grad_norm": 9.610560343305057, + "learning_rate": 5.641648615003037e-06, + "loss": 1.269, + "step": 108816 + }, + { + "epoch": 1.31, + "grad_norm": 13.718927940293536, + "learning_rate": 5.641122846714846e-06, + "loss": 0.9306, + "step": 108819 + }, + { + "epoch": 1.31, + "grad_norm": 11.148520449128316, + "learning_rate": 5.640597093301932e-06, + "loss": 1.0899, + "step": 108822 + }, + { + "epoch": 1.31, + "grad_norm": 11.055607530686821, + "learning_rate": 5.640071354766094e-06, + "loss": 1.5543, + "step": 108825 + }, + { + "epoch": 1.31, + "grad_norm": 4.442257118055515, + "learning_rate": 5.639545631109119e-06, + "loss": 1.3451, + "step": 108828 + }, + { + "epoch": 1.31, + "grad_norm": 5.017228169403793, + "learning_rate": 5.639019922332807e-06, + "loss": 1.2446, + "step": 108831 + }, + { + "epoch": 1.31, + "grad_norm": 9.580011470012881, + "learning_rate": 5.638494228438946e-06, + "loss": 0.8788, + "step": 108834 + }, + { + "epoch": 1.31, + "grad_norm": 12.71350119777323, + "learning_rate": 5.637968549429333e-06, + "loss": 1.3742, + "step": 108837 + }, + { + "epoch": 1.31, + "grad_norm": 5.221494979585088, + "learning_rate": 5.637442885305762e-06, + "loss": 1.0547, + "step": 108840 + }, + { + "epoch": 1.31, + "grad_norm": 22.963307834942135, + "learning_rate": 5.636917236070031e-06, + "loss": 1.1799, + "step": 108843 + }, + { + "epoch": 1.31, + "grad_norm": 18.195764045844655, + "learning_rate": 5.6363916017239255e-06, + "loss": 1.2561, + "step": 108846 + }, + { + "epoch": 1.31, + "grad_norm": 3.3347644485088925, + "learning_rate": 5.635865982269248e-06, + "loss": 1.2476, + "step": 108849 + }, + { + "epoch": 1.31, + "grad_norm": 6.794754096951948, + "learning_rate": 5.635340377707785e-06, + "loss": 1.2495, + "step": 108852 + }, + { + "epoch": 1.31, + "grad_norm": 17.15927413806852, + "learning_rate": 5.634814788041337e-06, + "loss": 1.5068, + "step": 108855 + }, + { + "epoch": 1.31, + "grad_norm": 11.094732201154754, + "learning_rate": 5.634289213271688e-06, + "loss": 1.1222, + "step": 108858 + }, + { + "epoch": 1.31, + "grad_norm": 66.98953927202835, + "learning_rate": 5.633763653400638e-06, + "loss": 1.3268, + "step": 108861 + }, + { + "epoch": 1.31, + "grad_norm": 9.23478730440728, + "learning_rate": 5.6332381084299805e-06, + "loss": 1.0453, + "step": 108864 + }, + { + "epoch": 1.31, + "grad_norm": 14.493643967133881, + "learning_rate": 5.632712578361512e-06, + "loss": 1.2386, + "step": 108867 + }, + { + "epoch": 1.31, + "grad_norm": 9.021221830969267, + "learning_rate": 5.632187063197018e-06, + "loss": 1.1755, + "step": 108870 + }, + { + "epoch": 1.31, + "grad_norm": 14.627961357127147, + "learning_rate": 5.631661562938302e-06, + "loss": 1.1713, + "step": 108873 + }, + { + "epoch": 1.31, + "grad_norm": 9.45280161470661, + "learning_rate": 5.631136077587147e-06, + "loss": 1.262, + "step": 108876 + }, + { + "epoch": 1.31, + "grad_norm": 4.993363801719522, + "learning_rate": 5.63061060714535e-06, + "loss": 1.6398, + "step": 108879 + }, + { + "epoch": 1.31, + "grad_norm": 16.500135477438985, + "learning_rate": 5.630085151614709e-06, + "loss": 1.0755, + "step": 108882 + }, + { + "epoch": 1.31, + "grad_norm": 9.57437438742756, + "learning_rate": 5.62955971099701e-06, + "loss": 0.8941, + "step": 108885 + }, + { + "epoch": 1.31, + "grad_norm": 14.401958053469565, + "learning_rate": 5.6290342852940504e-06, + "loss": 1.2622, + "step": 108888 + }, + { + "epoch": 1.31, + "grad_norm": 9.891574119232821, + "learning_rate": 5.628508874507625e-06, + "loss": 0.9215, + "step": 108891 + }, + { + "epoch": 1.31, + "grad_norm": 5.268896718681436, + "learning_rate": 5.627983478639521e-06, + "loss": 1.0902, + "step": 108894 + }, + { + "epoch": 1.31, + "grad_norm": 7.162886899218658, + "learning_rate": 5.627458097691538e-06, + "loss": 1.0241, + "step": 108897 + }, + { + "epoch": 1.31, + "grad_norm": 9.255741007575937, + "learning_rate": 5.626932731665462e-06, + "loss": 1.0658, + "step": 108900 + }, + { + "epoch": 1.31, + "grad_norm": 9.142029998996225, + "learning_rate": 5.626407380563088e-06, + "loss": 1.1188, + "step": 108903 + }, + { + "epoch": 1.31, + "grad_norm": 20.44184528329574, + "learning_rate": 5.625882044386212e-06, + "loss": 1.2729, + "step": 108906 + }, + { + "epoch": 1.31, + "grad_norm": 8.773771645110108, + "learning_rate": 5.625356723136628e-06, + "loss": 1.272, + "step": 108909 + }, + { + "epoch": 1.31, + "grad_norm": 6.649157807144679, + "learning_rate": 5.6248314168161225e-06, + "loss": 0.958, + "step": 108912 + }, + { + "epoch": 1.31, + "grad_norm": 4.887279931732074, + "learning_rate": 5.624306125426495e-06, + "loss": 1.4632, + "step": 108915 + }, + { + "epoch": 1.31, + "grad_norm": 9.332642006192676, + "learning_rate": 5.623780848969531e-06, + "loss": 1.229, + "step": 108918 + }, + { + "epoch": 1.31, + "grad_norm": 5.217998723882898, + "learning_rate": 5.623255587447027e-06, + "loss": 1.2208, + "step": 108921 + }, + { + "epoch": 1.31, + "grad_norm": 5.44819216949054, + "learning_rate": 5.622730340860778e-06, + "loss": 1.1857, + "step": 108924 + }, + { + "epoch": 1.31, + "grad_norm": 9.366222572045897, + "learning_rate": 5.6222051092125705e-06, + "loss": 1.1966, + "step": 108927 + }, + { + "epoch": 1.31, + "grad_norm": 13.031289353347189, + "learning_rate": 5.621679892504199e-06, + "loss": 0.9742, + "step": 108930 + }, + { + "epoch": 1.31, + "grad_norm": 13.134454504008533, + "learning_rate": 5.621154690737463e-06, + "loss": 1.4703, + "step": 108933 + }, + { + "epoch": 1.31, + "grad_norm": 16.343083890571027, + "learning_rate": 5.620629503914147e-06, + "loss": 1.083, + "step": 108936 + }, + { + "epoch": 1.31, + "grad_norm": 3.6213218750925487, + "learning_rate": 5.62010433203604e-06, + "loss": 0.6782, + "step": 108939 + }, + { + "epoch": 1.31, + "grad_norm": 12.356454559949059, + "learning_rate": 5.619579175104942e-06, + "loss": 0.9062, + "step": 108942 + }, + { + "epoch": 1.31, + "grad_norm": 7.320080668476146, + "learning_rate": 5.619054033122642e-06, + "loss": 1.3333, + "step": 108945 + }, + { + "epoch": 1.31, + "grad_norm": 11.389874301372112, + "learning_rate": 5.618528906090932e-06, + "loss": 1.0657, + "step": 108948 + }, + { + "epoch": 1.31, + "grad_norm": 6.287444199619243, + "learning_rate": 5.618003794011608e-06, + "loss": 1.1799, + "step": 108951 + }, + { + "epoch": 1.31, + "grad_norm": 6.268500323156886, + "learning_rate": 5.617478696886456e-06, + "loss": 1.0292, + "step": 108954 + }, + { + "epoch": 1.31, + "grad_norm": 5.141170883058939, + "learning_rate": 5.616953614717273e-06, + "loss": 0.8437, + "step": 108957 + }, + { + "epoch": 1.31, + "grad_norm": 11.194838785163803, + "learning_rate": 5.616428547505845e-06, + "loss": 1.226, + "step": 108960 + }, + { + "epoch": 1.31, + "grad_norm": 4.033998041591934, + "learning_rate": 5.615903495253971e-06, + "loss": 0.999, + "step": 108963 + }, + { + "epoch": 1.31, + "grad_norm": 8.544017038608274, + "learning_rate": 5.615378457963437e-06, + "loss": 1.4457, + "step": 108966 + }, + { + "epoch": 1.31, + "grad_norm": 2.824470992508356, + "learning_rate": 5.614853435636036e-06, + "loss": 1.5523, + "step": 108969 + }, + { + "epoch": 1.31, + "grad_norm": 3.0098991696147728, + "learning_rate": 5.614328428273561e-06, + "loss": 1.0945, + "step": 108972 + }, + { + "epoch": 1.31, + "grad_norm": 10.293708423142844, + "learning_rate": 5.613803435877807e-06, + "loss": 1.1261, + "step": 108975 + }, + { + "epoch": 1.31, + "grad_norm": 32.34842256032612, + "learning_rate": 5.613278458450562e-06, + "loss": 1.1322, + "step": 108978 + }, + { + "epoch": 1.31, + "grad_norm": 20.35567889474076, + "learning_rate": 5.612753495993615e-06, + "loss": 1.1353, + "step": 108981 + }, + { + "epoch": 1.31, + "grad_norm": 10.319880592525566, + "learning_rate": 5.612228548508758e-06, + "loss": 1.1261, + "step": 108984 + }, + { + "epoch": 1.31, + "grad_norm": 9.017420732252202, + "learning_rate": 5.611703615997785e-06, + "loss": 1.1082, + "step": 108987 + }, + { + "epoch": 1.31, + "grad_norm": 8.31506613488572, + "learning_rate": 5.611178698462492e-06, + "loss": 1.0642, + "step": 108990 + }, + { + "epoch": 1.31, + "grad_norm": 9.441143957875203, + "learning_rate": 5.610653795904659e-06, + "loss": 1.2952, + "step": 108993 + }, + { + "epoch": 1.31, + "grad_norm": 6.752063180609582, + "learning_rate": 5.610128908326085e-06, + "loss": 1.1172, + "step": 108996 + }, + { + "epoch": 1.31, + "grad_norm": 10.582436913031154, + "learning_rate": 5.609604035728564e-06, + "loss": 1.2427, + "step": 108999 + }, + { + "epoch": 1.31, + "grad_norm": 93.14983923521888, + "learning_rate": 5.609079178113878e-06, + "loss": 1.3878, + "step": 109002 + }, + { + "epoch": 1.31, + "grad_norm": 6.968575176300571, + "learning_rate": 5.608554335483829e-06, + "loss": 0.9863, + "step": 109005 + }, + { + "epoch": 1.31, + "grad_norm": 28.709697089952883, + "learning_rate": 5.608029507840196e-06, + "loss": 0.9309, + "step": 109008 + }, + { + "epoch": 1.31, + "grad_norm": 7.963216918232315, + "learning_rate": 5.6075046951847776e-06, + "loss": 1.4423, + "step": 109011 + }, + { + "epoch": 1.31, + "grad_norm": 18.019949235812117, + "learning_rate": 5.606979897519363e-06, + "loss": 1.3322, + "step": 109014 + }, + { + "epoch": 1.31, + "grad_norm": 18.480208853250925, + "learning_rate": 5.606455114845748e-06, + "loss": 1.3191, + "step": 109017 + }, + { + "epoch": 1.31, + "grad_norm": 26.38072129902048, + "learning_rate": 5.60593034716572e-06, + "loss": 1.1891, + "step": 109020 + }, + { + "epoch": 1.31, + "grad_norm": 4.624102012327663, + "learning_rate": 5.605405594481063e-06, + "loss": 1.0201, + "step": 109023 + }, + { + "epoch": 1.31, + "grad_norm": 11.644771065078077, + "learning_rate": 5.604880856793575e-06, + "loss": 1.2733, + "step": 109026 + }, + { + "epoch": 1.31, + "grad_norm": 28.80449486597949, + "learning_rate": 5.604356134105047e-06, + "loss": 1.2864, + "step": 109029 + }, + { + "epoch": 1.31, + "grad_norm": 12.098396834933835, + "learning_rate": 5.60383142641727e-06, + "loss": 1.1983, + "step": 109032 + }, + { + "epoch": 1.31, + "grad_norm": 40.82555412307248, + "learning_rate": 5.603306733732029e-06, + "loss": 1.0645, + "step": 109035 + }, + { + "epoch": 1.31, + "grad_norm": 13.729222309523337, + "learning_rate": 5.602782056051119e-06, + "loss": 1.127, + "step": 109038 + }, + { + "epoch": 1.31, + "grad_norm": 7.953151199873333, + "learning_rate": 5.602257393376334e-06, + "loss": 1.5822, + "step": 109041 + }, + { + "epoch": 1.31, + "grad_norm": 8.70849294673643, + "learning_rate": 5.60173274570946e-06, + "loss": 1.0741, + "step": 109044 + }, + { + "epoch": 1.31, + "grad_norm": 5.741894694794674, + "learning_rate": 5.601208113052285e-06, + "loss": 1.2813, + "step": 109047 + }, + { + "epoch": 1.31, + "grad_norm": 3.8578332093183425, + "learning_rate": 5.6006834954066e-06, + "loss": 0.9245, + "step": 109050 + }, + { + "epoch": 1.31, + "grad_norm": 12.449698264045692, + "learning_rate": 5.6001588927742e-06, + "loss": 0.9553, + "step": 109053 + }, + { + "epoch": 1.31, + "grad_norm": 19.649722737400523, + "learning_rate": 5.599634305156871e-06, + "loss": 1.0498, + "step": 109056 + }, + { + "epoch": 1.31, + "grad_norm": 14.733325275198919, + "learning_rate": 5.599109732556411e-06, + "loss": 1.4284, + "step": 109059 + }, + { + "epoch": 1.31, + "grad_norm": 15.737058682105042, + "learning_rate": 5.598585174974604e-06, + "loss": 1.1481, + "step": 109062 + }, + { + "epoch": 1.31, + "grad_norm": 5.479970857149406, + "learning_rate": 5.598060632413236e-06, + "loss": 0.8845, + "step": 109065 + }, + { + "epoch": 1.31, + "grad_norm": 14.471717299364704, + "learning_rate": 5.597536104874101e-06, + "loss": 1.1682, + "step": 109068 + }, + { + "epoch": 1.31, + "grad_norm": 14.497949069417881, + "learning_rate": 5.597011592358994e-06, + "loss": 1.0706, + "step": 109071 + }, + { + "epoch": 1.31, + "grad_norm": 19.474670404521866, + "learning_rate": 5.596487094869696e-06, + "loss": 1.3454, + "step": 109074 + }, + { + "epoch": 1.31, + "grad_norm": 16.831560763387, + "learning_rate": 5.595962612408001e-06, + "loss": 1.3417, + "step": 109077 + }, + { + "epoch": 1.31, + "grad_norm": 21.9777029816224, + "learning_rate": 5.5954381449757e-06, + "loss": 1.139, + "step": 109080 + }, + { + "epoch": 1.31, + "grad_norm": 6.129388180476146, + "learning_rate": 5.594913692574586e-06, + "loss": 1.3723, + "step": 109083 + }, + { + "epoch": 1.31, + "grad_norm": 6.411153708603865, + "learning_rate": 5.594389255206445e-06, + "loss": 1.4739, + "step": 109086 + }, + { + "epoch": 1.31, + "grad_norm": 3.602943354600673, + "learning_rate": 5.5938648328730625e-06, + "loss": 1.1991, + "step": 109089 + }, + { + "epoch": 1.31, + "grad_norm": 38.228696959201706, + "learning_rate": 5.593340425576232e-06, + "loss": 1.3147, + "step": 109092 + }, + { + "epoch": 1.31, + "grad_norm": 15.172291479500009, + "learning_rate": 5.592816033317745e-06, + "loss": 1.1671, + "step": 109095 + }, + { + "epoch": 1.31, + "grad_norm": 7.2202749548997405, + "learning_rate": 5.592291656099391e-06, + "loss": 1.1289, + "step": 109098 + }, + { + "epoch": 1.31, + "grad_norm": 8.091180852897123, + "learning_rate": 5.591767293922955e-06, + "loss": 0.9525, + "step": 109101 + }, + { + "epoch": 1.31, + "grad_norm": 13.892274706296027, + "learning_rate": 5.591242946790234e-06, + "loss": 1.3901, + "step": 109104 + }, + { + "epoch": 1.31, + "grad_norm": 3.539860247400226, + "learning_rate": 5.590718614703008e-06, + "loss": 1.2279, + "step": 109107 + }, + { + "epoch": 1.31, + "grad_norm": 2.9514348594699307, + "learning_rate": 5.590194297663072e-06, + "loss": 1.1545, + "step": 109110 + }, + { + "epoch": 1.31, + "grad_norm": 7.510043711399795, + "learning_rate": 5.589669995672217e-06, + "loss": 1.2257, + "step": 109113 + }, + { + "epoch": 1.31, + "grad_norm": 9.36395583561774, + "learning_rate": 5.589145708732227e-06, + "loss": 1.0873, + "step": 109116 + }, + { + "epoch": 1.31, + "grad_norm": 4.8399039812874625, + "learning_rate": 5.588621436844892e-06, + "loss": 1.265, + "step": 109119 + }, + { + "epoch": 1.31, + "grad_norm": 4.642091730040722, + "learning_rate": 5.588097180012005e-06, + "loss": 1.3095, + "step": 109122 + }, + { + "epoch": 1.31, + "grad_norm": 5.812903060875065, + "learning_rate": 5.587572938235357e-06, + "loss": 1.0339, + "step": 109125 + }, + { + "epoch": 1.31, + "grad_norm": 4.582445498318973, + "learning_rate": 5.587048711516733e-06, + "loss": 1.6619, + "step": 109128 + }, + { + "epoch": 1.31, + "grad_norm": 9.35945602266409, + "learning_rate": 5.586524499857917e-06, + "loss": 1.0753, + "step": 109131 + }, + { + "epoch": 1.31, + "grad_norm": 7.879972799576439, + "learning_rate": 5.586000303260704e-06, + "loss": 1.182, + "step": 109134 + }, + { + "epoch": 1.31, + "grad_norm": 7.513510547105469, + "learning_rate": 5.585476121726882e-06, + "loss": 1.2216, + "step": 109137 + }, + { + "epoch": 1.31, + "grad_norm": 4.307194825751459, + "learning_rate": 5.584951955258245e-06, + "loss": 1.348, + "step": 109140 + }, + { + "epoch": 1.31, + "grad_norm": 14.610095540383808, + "learning_rate": 5.584427803856571e-06, + "loss": 1.161, + "step": 109143 + }, + { + "epoch": 1.31, + "grad_norm": 22.124314405003364, + "learning_rate": 5.58390366752366e-06, + "loss": 1.4027, + "step": 109146 + }, + { + "epoch": 1.31, + "grad_norm": 10.670795442770387, + "learning_rate": 5.583379546261291e-06, + "loss": 1.2115, + "step": 109149 + }, + { + "epoch": 1.31, + "grad_norm": 5.996410293489204, + "learning_rate": 5.58285544007126e-06, + "loss": 1.3514, + "step": 109152 + }, + { + "epoch": 1.31, + "grad_norm": 11.839864883036695, + "learning_rate": 5.582331348955348e-06, + "loss": 1.0912, + "step": 109155 + }, + { + "epoch": 1.31, + "grad_norm": 14.653664510038274, + "learning_rate": 5.5818072729153485e-06, + "loss": 1.0382, + "step": 109158 + }, + { + "epoch": 1.31, + "grad_norm": 5.78242323999124, + "learning_rate": 5.581283211953049e-06, + "loss": 1.3214, + "step": 109161 + }, + { + "epoch": 1.31, + "grad_norm": 26.862764642376785, + "learning_rate": 5.580759166070239e-06, + "loss": 1.2539, + "step": 109164 + }, + { + "epoch": 1.31, + "grad_norm": 11.760826600187894, + "learning_rate": 5.580235135268709e-06, + "loss": 1.1463, + "step": 109167 + }, + { + "epoch": 1.31, + "grad_norm": 9.387223701871976, + "learning_rate": 5.579711119550245e-06, + "loss": 1.1925, + "step": 109170 + }, + { + "epoch": 1.31, + "grad_norm": 8.04412695589891, + "learning_rate": 5.579187118916631e-06, + "loss": 1.4196, + "step": 109173 + }, + { + "epoch": 1.31, + "grad_norm": 11.589581574122496, + "learning_rate": 5.578663133369659e-06, + "loss": 0.9286, + "step": 109176 + }, + { + "epoch": 1.31, + "grad_norm": 50.57773229294936, + "learning_rate": 5.578139162911121e-06, + "loss": 1.3361, + "step": 109179 + }, + { + "epoch": 1.31, + "grad_norm": 13.963150829854143, + "learning_rate": 5.577615207542797e-06, + "loss": 1.3087, + "step": 109182 + }, + { + "epoch": 1.31, + "grad_norm": 4.547220375361439, + "learning_rate": 5.57709126726648e-06, + "loss": 1.3294, + "step": 109185 + }, + { + "epoch": 1.31, + "grad_norm": 10.989022679088716, + "learning_rate": 5.576567342083956e-06, + "loss": 1.0484, + "step": 109188 + }, + { + "epoch": 1.31, + "grad_norm": 12.19907111672468, + "learning_rate": 5.576043431997021e-06, + "loss": 1.4124, + "step": 109191 + }, + { + "epoch": 1.31, + "grad_norm": 9.182256995959658, + "learning_rate": 5.575519537007454e-06, + "loss": 1.2207, + "step": 109194 + }, + { + "epoch": 1.31, + "grad_norm": 5.9005754259688254, + "learning_rate": 5.574995657117042e-06, + "loss": 1.2104, + "step": 109197 + }, + { + "epoch": 1.31, + "grad_norm": 3.270203239626934, + "learning_rate": 5.574471792327576e-06, + "loss": 0.7562, + "step": 109200 + }, + { + "epoch": 1.31, + "grad_norm": 8.325831298253128, + "learning_rate": 5.573947942640843e-06, + "loss": 1.2764, + "step": 109203 + }, + { + "epoch": 1.31, + "grad_norm": 11.692313087700416, + "learning_rate": 5.573424108058636e-06, + "loss": 1.3102, + "step": 109206 + }, + { + "epoch": 1.31, + "grad_norm": 13.812372787542634, + "learning_rate": 5.572900288582735e-06, + "loss": 0.8194, + "step": 109209 + }, + { + "epoch": 1.31, + "grad_norm": 8.686765657267186, + "learning_rate": 5.572376484214932e-06, + "loss": 1.1929, + "step": 109212 + }, + { + "epoch": 1.31, + "grad_norm": 10.48253064517386, + "learning_rate": 5.571852694957011e-06, + "loss": 1.2869, + "step": 109215 + }, + { + "epoch": 1.31, + "grad_norm": 6.884214822287293, + "learning_rate": 5.5713289208107615e-06, + "loss": 1.0852, + "step": 109218 + }, + { + "epoch": 1.31, + "grad_norm": 6.307245405597394, + "learning_rate": 5.570805161777976e-06, + "loss": 1.009, + "step": 109221 + }, + { + "epoch": 1.31, + "grad_norm": 13.62248267559125, + "learning_rate": 5.570281417860433e-06, + "loss": 1.0611, + "step": 109224 + }, + { + "epoch": 1.31, + "grad_norm": 11.107977464331395, + "learning_rate": 5.569757689059922e-06, + "loss": 1.3008, + "step": 109227 + }, + { + "epoch": 1.31, + "grad_norm": 5.94270783406207, + "learning_rate": 5.569233975378234e-06, + "loss": 1.1402, + "step": 109230 + }, + { + "epoch": 1.31, + "grad_norm": 10.374366311660902, + "learning_rate": 5.568710276817157e-06, + "loss": 1.2802, + "step": 109233 + }, + { + "epoch": 1.31, + "grad_norm": 19.11441740418596, + "learning_rate": 5.5681865933784775e-06, + "loss": 1.1535, + "step": 109236 + }, + { + "epoch": 1.31, + "grad_norm": 2.7644309575203776, + "learning_rate": 5.5676629250639766e-06, + "loss": 1.2129, + "step": 109239 + }, + { + "epoch": 1.31, + "grad_norm": 6.213203153715824, + "learning_rate": 5.567139271875444e-06, + "loss": 1.0506, + "step": 109242 + }, + { + "epoch": 1.31, + "grad_norm": 3.377319050482725, + "learning_rate": 5.566615633814669e-06, + "loss": 1.1148, + "step": 109245 + }, + { + "epoch": 1.31, + "grad_norm": 13.481007195641721, + "learning_rate": 5.566092010883444e-06, + "loss": 0.8975, + "step": 109248 + }, + { + "epoch": 1.31, + "grad_norm": 50.565065822348686, + "learning_rate": 5.565568403083543e-06, + "loss": 1.0348, + "step": 109251 + }, + { + "epoch": 1.31, + "grad_norm": 7.400137887703597, + "learning_rate": 5.565044810416766e-06, + "loss": 1.0766, + "step": 109254 + }, + { + "epoch": 1.31, + "grad_norm": 6.274493952924423, + "learning_rate": 5.564521232884888e-06, + "loss": 1.3173, + "step": 109257 + }, + { + "epoch": 1.31, + "grad_norm": 7.144327654240645, + "learning_rate": 5.563997670489707e-06, + "loss": 1.4813, + "step": 109260 + }, + { + "epoch": 1.31, + "grad_norm": 13.710738046873775, + "learning_rate": 5.563474123232999e-06, + "loss": 1.2267, + "step": 109263 + }, + { + "epoch": 1.31, + "grad_norm": 4.37519744443475, + "learning_rate": 5.562950591116556e-06, + "loss": 1.2874, + "step": 109266 + }, + { + "epoch": 1.31, + "grad_norm": 59.91572072162484, + "learning_rate": 5.5624270741421635e-06, + "loss": 1.2023, + "step": 109269 + }, + { + "epoch": 1.31, + "grad_norm": 12.077931402647806, + "learning_rate": 5.56190357231161e-06, + "loss": 1.3035, + "step": 109272 + }, + { + "epoch": 1.31, + "grad_norm": 5.314292307375537, + "learning_rate": 5.561380085626684e-06, + "loss": 1.1374, + "step": 109275 + }, + { + "epoch": 1.31, + "grad_norm": 5.439581359587972, + "learning_rate": 5.5608566140891705e-06, + "loss": 1.0912, + "step": 109278 + }, + { + "epoch": 1.31, + "grad_norm": 8.349481251440793, + "learning_rate": 5.56033315770085e-06, + "loss": 1.147, + "step": 109281 + }, + { + "epoch": 1.31, + "grad_norm": 22.20677230926829, + "learning_rate": 5.559809716463513e-06, + "loss": 1.1075, + "step": 109284 + }, + { + "epoch": 1.31, + "grad_norm": 13.325809555470578, + "learning_rate": 5.559286290378949e-06, + "loss": 1.2508, + "step": 109287 + }, + { + "epoch": 1.31, + "grad_norm": 14.100292693735643, + "learning_rate": 5.558762879448939e-06, + "loss": 1.3019, + "step": 109290 + }, + { + "epoch": 1.31, + "grad_norm": 8.325613121308304, + "learning_rate": 5.558239483675271e-06, + "loss": 1.2881, + "step": 109293 + }, + { + "epoch": 1.31, + "grad_norm": 9.484685107836922, + "learning_rate": 5.557716103059736e-06, + "loss": 1.1213, + "step": 109296 + }, + { + "epoch": 1.31, + "grad_norm": 6.916228223274743, + "learning_rate": 5.557192737604112e-06, + "loss": 1.3455, + "step": 109299 + }, + { + "epoch": 1.31, + "grad_norm": 8.307084364304146, + "learning_rate": 5.556669387310192e-06, + "loss": 1.1616, + "step": 109302 + }, + { + "epoch": 1.31, + "grad_norm": 7.843025583831376, + "learning_rate": 5.556146052179755e-06, + "loss": 0.9015, + "step": 109305 + }, + { + "epoch": 1.31, + "grad_norm": 3.44410248270486, + "learning_rate": 5.555622732214592e-06, + "loss": 1.447, + "step": 109308 + }, + { + "epoch": 1.31, + "grad_norm": 25.980376762846863, + "learning_rate": 5.555099427416486e-06, + "loss": 1.2511, + "step": 109311 + }, + { + "epoch": 1.31, + "grad_norm": 8.857938138367965, + "learning_rate": 5.55457613778723e-06, + "loss": 1.2754, + "step": 109314 + }, + { + "epoch": 1.31, + "grad_norm": 10.539324321247578, + "learning_rate": 5.5540528633286e-06, + "loss": 1.1696, + "step": 109317 + }, + { + "epoch": 1.31, + "grad_norm": 14.334517206767094, + "learning_rate": 5.5535296040423894e-06, + "loss": 1.1176, + "step": 109320 + }, + { + "epoch": 1.31, + "grad_norm": 7.884156359674835, + "learning_rate": 5.5530063599303775e-06, + "loss": 1.1672, + "step": 109323 + }, + { + "epoch": 1.31, + "grad_norm": 3.160142710401647, + "learning_rate": 5.5524831309943525e-06, + "loss": 1.5412, + "step": 109326 + }, + { + "epoch": 1.31, + "grad_norm": 8.475554909339506, + "learning_rate": 5.551959917236106e-06, + "loss": 1.1508, + "step": 109329 + }, + { + "epoch": 1.31, + "grad_norm": 8.83162168126428, + "learning_rate": 5.551436718657412e-06, + "loss": 1.2439, + "step": 109332 + }, + { + "epoch": 1.31, + "grad_norm": 5.6764895033246185, + "learning_rate": 5.5509135352600655e-06, + "loss": 1.4119, + "step": 109335 + }, + { + "epoch": 1.31, + "grad_norm": 4.256114468794471, + "learning_rate": 5.55039036704585e-06, + "loss": 1.2596, + "step": 109338 + }, + { + "epoch": 1.31, + "grad_norm": 7.426495690721443, + "learning_rate": 5.549867214016549e-06, + "loss": 0.9837, + "step": 109341 + }, + { + "epoch": 1.31, + "grad_norm": 11.150360446115222, + "learning_rate": 5.549344076173946e-06, + "loss": 1.0205, + "step": 109344 + }, + { + "epoch": 1.31, + "grad_norm": 4.061070539616263, + "learning_rate": 5.548820953519827e-06, + "loss": 1.2071, + "step": 109347 + }, + { + "epoch": 1.31, + "grad_norm": 4.306039586651997, + "learning_rate": 5.548297846055979e-06, + "loss": 1.3691, + "step": 109350 + }, + { + "epoch": 1.31, + "grad_norm": 6.3590609910845135, + "learning_rate": 5.547774753784187e-06, + "loss": 1.2108, + "step": 109353 + }, + { + "epoch": 1.31, + "grad_norm": 5.026141609333255, + "learning_rate": 5.54725167670624e-06, + "loss": 1.5125, + "step": 109356 + }, + { + "epoch": 1.32, + "grad_norm": 9.387931888292243, + "learning_rate": 5.546728614823916e-06, + "loss": 1.3166, + "step": 109359 + }, + { + "epoch": 1.32, + "grad_norm": 6.545480740666218, + "learning_rate": 5.546205568139007e-06, + "loss": 1.3826, + "step": 109362 + }, + { + "epoch": 1.32, + "grad_norm": 6.089819659327949, + "learning_rate": 5.54568253665329e-06, + "loss": 1.5579, + "step": 109365 + }, + { + "epoch": 1.32, + "grad_norm": 12.21430686386422, + "learning_rate": 5.545159520368558e-06, + "loss": 1.0448, + "step": 109368 + }, + { + "epoch": 1.32, + "grad_norm": 10.593886249037496, + "learning_rate": 5.544636519286588e-06, + "loss": 1.3332, + "step": 109371 + }, + { + "epoch": 1.32, + "grad_norm": 14.99135868960991, + "learning_rate": 5.544113533409168e-06, + "loss": 1.1636, + "step": 109374 + }, + { + "epoch": 1.32, + "grad_norm": 18.168144222178526, + "learning_rate": 5.543590562738085e-06, + "loss": 0.9111, + "step": 109377 + }, + { + "epoch": 1.32, + "grad_norm": 10.493038949043745, + "learning_rate": 5.543067607275126e-06, + "loss": 1.4259, + "step": 109380 + }, + { + "epoch": 1.32, + "grad_norm": 13.97155390344931, + "learning_rate": 5.54254466702207e-06, + "loss": 1.0033, + "step": 109383 + }, + { + "epoch": 1.32, + "grad_norm": 9.94410020874895, + "learning_rate": 5.542021741980701e-06, + "loss": 1.1265, + "step": 109386 + }, + { + "epoch": 1.32, + "grad_norm": 28.09039046270572, + "learning_rate": 5.541498832152805e-06, + "loss": 1.2761, + "step": 109389 + }, + { + "epoch": 1.32, + "grad_norm": 11.429641944300817, + "learning_rate": 5.540975937540169e-06, + "loss": 1.1343, + "step": 109392 + }, + { + "epoch": 1.32, + "grad_norm": 13.4161619167132, + "learning_rate": 5.5404530581445795e-06, + "loss": 1.0829, + "step": 109395 + }, + { + "epoch": 1.32, + "grad_norm": 7.859143238980629, + "learning_rate": 5.539930193967813e-06, + "loss": 0.7227, + "step": 109398 + }, + { + "epoch": 1.32, + "grad_norm": 3.416943958077465, + "learning_rate": 5.539407345011658e-06, + "loss": 1.2514, + "step": 109401 + }, + { + "epoch": 1.32, + "grad_norm": 15.638531896254273, + "learning_rate": 5.538884511277905e-06, + "loss": 0.8319, + "step": 109404 + }, + { + "epoch": 1.32, + "grad_norm": 4.664480391180682, + "learning_rate": 5.5383616927683255e-06, + "loss": 1.272, + "step": 109407 + }, + { + "epoch": 1.32, + "grad_norm": 14.277256438415701, + "learning_rate": 5.537838889484716e-06, + "loss": 1.2119, + "step": 109410 + }, + { + "epoch": 1.32, + "grad_norm": 5.710786834558155, + "learning_rate": 5.537316101428851e-06, + "loss": 1.1309, + "step": 109413 + }, + { + "epoch": 1.32, + "grad_norm": 8.202879423737969, + "learning_rate": 5.536793328602519e-06, + "loss": 0.9062, + "step": 109416 + }, + { + "epoch": 1.32, + "grad_norm": 7.881246224390757, + "learning_rate": 5.536270571007503e-06, + "loss": 1.1763, + "step": 109419 + }, + { + "epoch": 1.32, + "grad_norm": 12.732348528994068, + "learning_rate": 5.535747828645591e-06, + "loss": 1.2743, + "step": 109422 + }, + { + "epoch": 1.32, + "grad_norm": 6.075039809837638, + "learning_rate": 5.535225101518561e-06, + "loss": 1.4788, + "step": 109425 + }, + { + "epoch": 1.32, + "grad_norm": 31.641691010985497, + "learning_rate": 5.534702389628203e-06, + "loss": 1.2157, + "step": 109428 + }, + { + "epoch": 1.32, + "grad_norm": 3.9903222972702492, + "learning_rate": 5.5341796929762935e-06, + "loss": 0.9646, + "step": 109431 + }, + { + "epoch": 1.32, + "grad_norm": 6.311765885451225, + "learning_rate": 5.5336570115646195e-06, + "loss": 1.3571, + "step": 109434 + }, + { + "epoch": 1.32, + "grad_norm": 3.9628050223272338, + "learning_rate": 5.533134345394971e-06, + "loss": 1.033, + "step": 109437 + }, + { + "epoch": 1.32, + "grad_norm": 12.726046833883032, + "learning_rate": 5.532611694469121e-06, + "loss": 1.0497, + "step": 109440 + }, + { + "epoch": 1.32, + "grad_norm": 11.908457489920124, + "learning_rate": 5.532089058788858e-06, + "loss": 1.6924, + "step": 109443 + }, + { + "epoch": 1.32, + "grad_norm": 4.642100188454169, + "learning_rate": 5.531566438355968e-06, + "loss": 1.0803, + "step": 109446 + }, + { + "epoch": 1.32, + "grad_norm": 12.811332370394036, + "learning_rate": 5.531043833172236e-06, + "loss": 1.2949, + "step": 109449 + }, + { + "epoch": 1.32, + "grad_norm": 41.63560435139374, + "learning_rate": 5.530521243239435e-06, + "loss": 1.1499, + "step": 109452 + }, + { + "epoch": 1.32, + "grad_norm": 13.510019161804202, + "learning_rate": 5.529998668559355e-06, + "loss": 1.3345, + "step": 109455 + }, + { + "epoch": 1.32, + "grad_norm": 10.194046868287373, + "learning_rate": 5.529476109133781e-06, + "loss": 1.2543, + "step": 109458 + }, + { + "epoch": 1.32, + "grad_norm": 9.19670890561437, + "learning_rate": 5.528953564964494e-06, + "loss": 1.0615, + "step": 109461 + }, + { + "epoch": 1.32, + "grad_norm": 9.868669841921607, + "learning_rate": 5.528431036053282e-06, + "loss": 1.0619, + "step": 109464 + }, + { + "epoch": 1.32, + "grad_norm": 5.048701041860552, + "learning_rate": 5.52790852240192e-06, + "loss": 1.0956, + "step": 109467 + }, + { + "epoch": 1.32, + "grad_norm": 4.73130285890726, + "learning_rate": 5.5273860240122e-06, + "loss": 1.0097, + "step": 109470 + }, + { + "epoch": 1.32, + "grad_norm": 28.632584425411444, + "learning_rate": 5.526863540885896e-06, + "loss": 0.9485, + "step": 109473 + }, + { + "epoch": 1.32, + "grad_norm": 30.327334554618783, + "learning_rate": 5.526341073024799e-06, + "loss": 1.5313, + "step": 109476 + }, + { + "epoch": 1.32, + "grad_norm": 12.445753825581528, + "learning_rate": 5.525818620430685e-06, + "loss": 1.3583, + "step": 109479 + }, + { + "epoch": 1.32, + "grad_norm": 9.981983217638396, + "learning_rate": 5.525296183105342e-06, + "loss": 1.3946, + "step": 109482 + }, + { + "epoch": 1.32, + "grad_norm": 5.673042621885876, + "learning_rate": 5.52477376105055e-06, + "loss": 1.1552, + "step": 109485 + }, + { + "epoch": 1.32, + "grad_norm": 5.733172893750187, + "learning_rate": 5.524251354268096e-06, + "loss": 1.3553, + "step": 109488 + }, + { + "epoch": 1.32, + "grad_norm": 1.8421313283013436, + "learning_rate": 5.523728962759762e-06, + "loss": 1.136, + "step": 109491 + }, + { + "epoch": 1.32, + "grad_norm": 27.39268988597339, + "learning_rate": 5.523206586527324e-06, + "loss": 1.0545, + "step": 109494 + }, + { + "epoch": 1.32, + "grad_norm": 35.240140601677616, + "learning_rate": 5.522684225572568e-06, + "loss": 1.0635, + "step": 109497 + }, + { + "epoch": 1.32, + "grad_norm": 22.02506847547512, + "learning_rate": 5.5221618798972795e-06, + "loss": 1.1015, + "step": 109500 + }, + { + "epoch": 1.32, + "grad_norm": 9.491073749027732, + "learning_rate": 5.5216395495032434e-06, + "loss": 1.0568, + "step": 109503 + }, + { + "epoch": 1.32, + "grad_norm": 6.7772113290826494, + "learning_rate": 5.521117234392235e-06, + "loss": 1.3193, + "step": 109506 + }, + { + "epoch": 1.32, + "grad_norm": 11.34199006543325, + "learning_rate": 5.520594934566039e-06, + "loss": 1.2534, + "step": 109509 + }, + { + "epoch": 1.32, + "grad_norm": 12.5525406846946, + "learning_rate": 5.520072650026445e-06, + "loss": 1.2142, + "step": 109512 + }, + { + "epoch": 1.32, + "grad_norm": 36.58358342623247, + "learning_rate": 5.519550380775223e-06, + "loss": 1.3395, + "step": 109515 + }, + { + "epoch": 1.32, + "grad_norm": 3.271057732241152, + "learning_rate": 5.519028126814166e-06, + "loss": 1.0759, + "step": 109518 + }, + { + "epoch": 1.32, + "grad_norm": 12.991200740825509, + "learning_rate": 5.518505888145049e-06, + "loss": 1.1461, + "step": 109521 + }, + { + "epoch": 1.32, + "grad_norm": 7.272221331710196, + "learning_rate": 5.517983664769657e-06, + "loss": 1.1523, + "step": 109524 + }, + { + "epoch": 1.32, + "grad_norm": 13.87804510125759, + "learning_rate": 5.517461456689772e-06, + "loss": 1.2396, + "step": 109527 + }, + { + "epoch": 1.32, + "grad_norm": 17.183106937586246, + "learning_rate": 5.51693926390718e-06, + "loss": 1.3314, + "step": 109530 + }, + { + "epoch": 1.32, + "grad_norm": 3.0532809372278917, + "learning_rate": 5.51641708642366e-06, + "loss": 1.1135, + "step": 109533 + }, + { + "epoch": 1.32, + "grad_norm": 11.454292150211035, + "learning_rate": 5.51589492424099e-06, + "loss": 1.1969, + "step": 109536 + }, + { + "epoch": 1.32, + "grad_norm": 5.662772165720739, + "learning_rate": 5.515372777360954e-06, + "loss": 0.9732, + "step": 109539 + }, + { + "epoch": 1.32, + "grad_norm": 3.2959153306530116, + "learning_rate": 5.514850645785337e-06, + "loss": 1.0927, + "step": 109542 + }, + { + "epoch": 1.32, + "grad_norm": 13.793579294115617, + "learning_rate": 5.514328529515922e-06, + "loss": 1.0945, + "step": 109545 + }, + { + "epoch": 1.32, + "grad_norm": 12.636978142045256, + "learning_rate": 5.513806428554486e-06, + "loss": 1.3163, + "step": 109548 + }, + { + "epoch": 1.32, + "grad_norm": 11.95667534429518, + "learning_rate": 5.513284342902811e-06, + "loss": 0.9564, + "step": 109551 + }, + { + "epoch": 1.32, + "grad_norm": 8.777696671348384, + "learning_rate": 5.512762272562684e-06, + "loss": 1.1215, + "step": 109554 + }, + { + "epoch": 1.32, + "grad_norm": 6.056158228893961, + "learning_rate": 5.512240217535885e-06, + "loss": 0.9871, + "step": 109557 + }, + { + "epoch": 1.32, + "grad_norm": 12.319618745800032, + "learning_rate": 5.511718177824188e-06, + "loss": 0.8856, + "step": 109560 + }, + { + "epoch": 1.32, + "grad_norm": 6.9392078638422525, + "learning_rate": 5.511196153429381e-06, + "loss": 1.2186, + "step": 109563 + }, + { + "epoch": 1.32, + "grad_norm": 8.48808982954841, + "learning_rate": 5.510674144353243e-06, + "loss": 1.0692, + "step": 109566 + }, + { + "epoch": 1.32, + "grad_norm": 14.342493218487363, + "learning_rate": 5.5101521505975585e-06, + "loss": 0.9999, + "step": 109569 + }, + { + "epoch": 1.32, + "grad_norm": 8.812098961553916, + "learning_rate": 5.509630172164112e-06, + "loss": 1.2657, + "step": 109572 + }, + { + "epoch": 1.32, + "grad_norm": 11.774271401354065, + "learning_rate": 5.50910820905468e-06, + "loss": 1.2026, + "step": 109575 + }, + { + "epoch": 1.32, + "grad_norm": 9.773451749564076, + "learning_rate": 5.50858626127104e-06, + "loss": 1.075, + "step": 109578 + }, + { + "epoch": 1.32, + "grad_norm": 8.488238944256663, + "learning_rate": 5.508064328814977e-06, + "loss": 0.873, + "step": 109581 + }, + { + "epoch": 1.32, + "grad_norm": 13.44167581414934, + "learning_rate": 5.507542411688276e-06, + "loss": 1.4302, + "step": 109584 + }, + { + "epoch": 1.32, + "grad_norm": 9.369257568575733, + "learning_rate": 5.507020509892711e-06, + "loss": 1.1439, + "step": 109587 + }, + { + "epoch": 1.32, + "grad_norm": 8.599399312389846, + "learning_rate": 5.506498623430067e-06, + "loss": 1.3686, + "step": 109590 + }, + { + "epoch": 1.32, + "grad_norm": 14.093961700516696, + "learning_rate": 5.505976752302125e-06, + "loss": 1.2937, + "step": 109593 + }, + { + "epoch": 1.32, + "grad_norm": 4.045841896124144, + "learning_rate": 5.505454896510668e-06, + "loss": 1.4669, + "step": 109596 + }, + { + "epoch": 1.32, + "grad_norm": 9.478738021911141, + "learning_rate": 5.5049330560574755e-06, + "loss": 1.0713, + "step": 109599 + }, + { + "epoch": 1.32, + "grad_norm": 14.723206040806442, + "learning_rate": 5.504411230944323e-06, + "loss": 1.014, + "step": 109602 + }, + { + "epoch": 1.32, + "grad_norm": 4.992442693320502, + "learning_rate": 5.503889421172997e-06, + "loss": 1.2128, + "step": 109605 + }, + { + "epoch": 1.32, + "grad_norm": 5.0975236076729775, + "learning_rate": 5.503367626745275e-06, + "loss": 1.0576, + "step": 109608 + }, + { + "epoch": 1.32, + "grad_norm": 27.48940970596735, + "learning_rate": 5.502845847662946e-06, + "loss": 1.4728, + "step": 109611 + }, + { + "epoch": 1.32, + "grad_norm": 8.276968410820643, + "learning_rate": 5.502324083927777e-06, + "loss": 1.3531, + "step": 109614 + }, + { + "epoch": 1.32, + "grad_norm": 27.321665661697754, + "learning_rate": 5.501802335541562e-06, + "loss": 1.1861, + "step": 109617 + }, + { + "epoch": 1.32, + "grad_norm": 4.488367907003731, + "learning_rate": 5.501280602506072e-06, + "loss": 1.1542, + "step": 109620 + }, + { + "epoch": 1.32, + "grad_norm": 7.954619186908737, + "learning_rate": 5.50075888482309e-06, + "loss": 1.137, + "step": 109623 + }, + { + "epoch": 1.32, + "grad_norm": 5.458271385563171, + "learning_rate": 5.500237182494401e-06, + "loss": 1.1517, + "step": 109626 + }, + { + "epoch": 1.32, + "grad_norm": 8.155907238157718, + "learning_rate": 5.499715495521778e-06, + "loss": 0.9048, + "step": 109629 + }, + { + "epoch": 1.32, + "grad_norm": 3.9559989366961243, + "learning_rate": 5.4991938239070045e-06, + "loss": 1.3765, + "step": 109632 + }, + { + "epoch": 1.32, + "grad_norm": 11.619540568539465, + "learning_rate": 5.498672167651862e-06, + "loss": 1.1907, + "step": 109635 + }, + { + "epoch": 1.32, + "grad_norm": 4.906211423582264, + "learning_rate": 5.498150526758132e-06, + "loss": 1.4742, + "step": 109638 + }, + { + "epoch": 1.32, + "grad_norm": 8.581772804233976, + "learning_rate": 5.497628901227596e-06, + "loss": 1.0895, + "step": 109641 + }, + { + "epoch": 1.32, + "grad_norm": 6.437654397986971, + "learning_rate": 5.4971072910620246e-06, + "loss": 1.1732, + "step": 109644 + }, + { + "epoch": 1.32, + "grad_norm": 9.49510425518742, + "learning_rate": 5.496585696263204e-06, + "loss": 1.2387, + "step": 109647 + }, + { + "epoch": 1.32, + "grad_norm": 5.884428587659618, + "learning_rate": 5.496064116832915e-06, + "loss": 1.1581, + "step": 109650 + }, + { + "epoch": 1.32, + "grad_norm": 18.859445706368724, + "learning_rate": 5.495542552772941e-06, + "loss": 1.1203, + "step": 109653 + }, + { + "epoch": 1.32, + "grad_norm": 9.458893424717363, + "learning_rate": 5.495021004085055e-06, + "loss": 1.0442, + "step": 109656 + }, + { + "epoch": 1.32, + "grad_norm": 3.9215874285256085, + "learning_rate": 5.494499470771042e-06, + "loss": 1.0309, + "step": 109659 + }, + { + "epoch": 1.32, + "grad_norm": 9.10308715887184, + "learning_rate": 5.493977952832676e-06, + "loss": 0.8819, + "step": 109662 + }, + { + "epoch": 1.32, + "grad_norm": 23.298690349275926, + "learning_rate": 5.493456450271744e-06, + "loss": 0.9918, + "step": 109665 + }, + { + "epoch": 1.32, + "grad_norm": 11.961028432929579, + "learning_rate": 5.492934963090018e-06, + "loss": 0.7757, + "step": 109668 + }, + { + "epoch": 1.32, + "grad_norm": 31.577298222103803, + "learning_rate": 5.4924134912892804e-06, + "loss": 1.1805, + "step": 109671 + }, + { + "epoch": 1.32, + "grad_norm": 9.019064499619198, + "learning_rate": 5.4918920348713136e-06, + "loss": 1.2429, + "step": 109674 + }, + { + "epoch": 1.32, + "grad_norm": 9.106366999682692, + "learning_rate": 5.4913705938378935e-06, + "loss": 1.162, + "step": 109677 + }, + { + "epoch": 1.32, + "grad_norm": 6.509878738054574, + "learning_rate": 5.490849168190807e-06, + "loss": 1.1686, + "step": 109680 + }, + { + "epoch": 1.32, + "grad_norm": 9.58795918099767, + "learning_rate": 5.490327757931828e-06, + "loss": 1.6551, + "step": 109683 + }, + { + "epoch": 1.32, + "grad_norm": 15.807313990645596, + "learning_rate": 5.489806363062732e-06, + "loss": 1.2303, + "step": 109686 + }, + { + "epoch": 1.32, + "grad_norm": 14.569958302104038, + "learning_rate": 5.489284983585301e-06, + "loss": 1.1926, + "step": 109689 + }, + { + "epoch": 1.32, + "grad_norm": 6.929857165560279, + "learning_rate": 5.488763619501322e-06, + "loss": 1.337, + "step": 109692 + }, + { + "epoch": 1.32, + "grad_norm": 20.54839668968212, + "learning_rate": 5.4882422708125626e-06, + "loss": 1.0613, + "step": 109695 + }, + { + "epoch": 1.32, + "grad_norm": 16.36297219645441, + "learning_rate": 5.487720937520806e-06, + "loss": 1.2274, + "step": 109698 + }, + { + "epoch": 1.32, + "grad_norm": 5.982833674447837, + "learning_rate": 5.487199619627839e-06, + "loss": 1.3436, + "step": 109701 + }, + { + "epoch": 1.32, + "grad_norm": 10.966362207415052, + "learning_rate": 5.486678317135427e-06, + "loss": 0.8406, + "step": 109704 + }, + { + "epoch": 1.32, + "grad_norm": 10.450627926814132, + "learning_rate": 5.486157030045364e-06, + "loss": 1.2373, + "step": 109707 + }, + { + "epoch": 1.32, + "grad_norm": 8.055163363125006, + "learning_rate": 5.4856357583594155e-06, + "loss": 1.0979, + "step": 109710 + }, + { + "epoch": 1.32, + "grad_norm": 7.978718322668668, + "learning_rate": 5.485114502079365e-06, + "loss": 0.8899, + "step": 109713 + }, + { + "epoch": 1.32, + "grad_norm": 10.134866115977655, + "learning_rate": 5.4845932612069945e-06, + "loss": 1.1586, + "step": 109716 + }, + { + "epoch": 1.32, + "grad_norm": 9.73218629010379, + "learning_rate": 5.484072035744082e-06, + "loss": 1.2475, + "step": 109719 + }, + { + "epoch": 1.32, + "grad_norm": 10.878448749684651, + "learning_rate": 5.483550825692403e-06, + "loss": 1.0608, + "step": 109722 + }, + { + "epoch": 1.32, + "grad_norm": 4.5768735134731955, + "learning_rate": 5.483029631053742e-06, + "loss": 1.2634, + "step": 109725 + }, + { + "epoch": 1.32, + "grad_norm": 7.190508342357429, + "learning_rate": 5.48250845182987e-06, + "loss": 1.252, + "step": 109728 + }, + { + "epoch": 1.32, + "grad_norm": 7.201348914304001, + "learning_rate": 5.481987288022569e-06, + "loss": 1.1801, + "step": 109731 + }, + { + "epoch": 1.32, + "grad_norm": 13.010843556162019, + "learning_rate": 5.481466139633622e-06, + "loss": 1.2983, + "step": 109734 + }, + { + "epoch": 1.32, + "grad_norm": 8.111135646489062, + "learning_rate": 5.480945006664799e-06, + "loss": 1.3441, + "step": 109737 + }, + { + "epoch": 1.32, + "grad_norm": 17.327598570434052, + "learning_rate": 5.480423889117884e-06, + "loss": 1.2441, + "step": 109740 + }, + { + "epoch": 1.32, + "grad_norm": 11.264430400245285, + "learning_rate": 5.479902786994658e-06, + "loss": 1.0679, + "step": 109743 + }, + { + "epoch": 1.32, + "grad_norm": 14.991424425987898, + "learning_rate": 5.4793817002968915e-06, + "loss": 1.6087, + "step": 109746 + }, + { + "epoch": 1.32, + "grad_norm": 12.405870382678573, + "learning_rate": 5.478860629026371e-06, + "loss": 1.6088, + "step": 109749 + }, + { + "epoch": 1.32, + "grad_norm": 3.273711411494502, + "learning_rate": 5.4783395731848675e-06, + "loss": 0.9247, + "step": 109752 + }, + { + "epoch": 1.32, + "grad_norm": 13.058414661890344, + "learning_rate": 5.47781853277416e-06, + "loss": 1.2365, + "step": 109755 + }, + { + "epoch": 1.32, + "grad_norm": 10.358406438822819, + "learning_rate": 5.477297507796031e-06, + "loss": 1.0855, + "step": 109758 + }, + { + "epoch": 1.32, + "grad_norm": 27.272922966792475, + "learning_rate": 5.476776498252261e-06, + "loss": 1.4355, + "step": 109761 + }, + { + "epoch": 1.32, + "grad_norm": 19.383633805525385, + "learning_rate": 5.476255504144619e-06, + "loss": 1.0007, + "step": 109764 + }, + { + "epoch": 1.32, + "grad_norm": 29.404976364088228, + "learning_rate": 5.47573452547489e-06, + "loss": 1.0721, + "step": 109767 + }, + { + "epoch": 1.32, + "grad_norm": 18.756345234334887, + "learning_rate": 5.475213562244846e-06, + "loss": 1.1727, + "step": 109770 + }, + { + "epoch": 1.32, + "grad_norm": 37.018861413711214, + "learning_rate": 5.474692614456274e-06, + "loss": 1.209, + "step": 109773 + }, + { + "epoch": 1.32, + "grad_norm": 18.23109794187647, + "learning_rate": 5.474171682110941e-06, + "loss": 0.9187, + "step": 109776 + }, + { + "epoch": 1.32, + "grad_norm": 5.008367273274203, + "learning_rate": 5.47365076521063e-06, + "loss": 1.1029, + "step": 109779 + }, + { + "epoch": 1.32, + "grad_norm": 7.518728408107114, + "learning_rate": 5.473129863757118e-06, + "loss": 1.2053, + "step": 109782 + }, + { + "epoch": 1.32, + "grad_norm": 5.7876164275307245, + "learning_rate": 5.472608977752183e-06, + "loss": 1.2006, + "step": 109785 + }, + { + "epoch": 1.32, + "grad_norm": 9.834663831585845, + "learning_rate": 5.4720881071976075e-06, + "loss": 0.9703, + "step": 109788 + }, + { + "epoch": 1.32, + "grad_norm": 27.313027802006708, + "learning_rate": 5.471567252095165e-06, + "loss": 1.1844, + "step": 109791 + }, + { + "epoch": 1.32, + "grad_norm": 20.16662608397984, + "learning_rate": 5.471046412446625e-06, + "loss": 1.2309, + "step": 109794 + }, + { + "epoch": 1.32, + "grad_norm": 14.529452893733174, + "learning_rate": 5.470525588253777e-06, + "loss": 1.2976, + "step": 109797 + }, + { + "epoch": 1.32, + "grad_norm": 13.160612795917853, + "learning_rate": 5.470004779518394e-06, + "loss": 0.8842, + "step": 109800 + }, + { + "epoch": 1.32, + "grad_norm": 17.05109812847414, + "learning_rate": 5.4694839862422515e-06, + "loss": 0.9181, + "step": 109803 + }, + { + "epoch": 1.32, + "grad_norm": 6.349449491043512, + "learning_rate": 5.468963208427127e-06, + "loss": 1.4561, + "step": 109806 + }, + { + "epoch": 1.32, + "grad_norm": 8.474837789985378, + "learning_rate": 5.468442446074803e-06, + "loss": 1.2241, + "step": 109809 + }, + { + "epoch": 1.32, + "grad_norm": 11.675777628567614, + "learning_rate": 5.467921699187048e-06, + "loss": 1.3932, + "step": 109812 + }, + { + "epoch": 1.32, + "grad_norm": 3.275928037515348, + "learning_rate": 5.467400967765649e-06, + "loss": 1.0767, + "step": 109815 + }, + { + "epoch": 1.32, + "grad_norm": 9.103978958283216, + "learning_rate": 5.466880251812375e-06, + "loss": 1.1258, + "step": 109818 + }, + { + "epoch": 1.32, + "grad_norm": 6.787002771612568, + "learning_rate": 5.466359551329004e-06, + "loss": 0.949, + "step": 109821 + }, + { + "epoch": 1.32, + "grad_norm": 12.626402732381859, + "learning_rate": 5.465838866317314e-06, + "loss": 1.2362, + "step": 109824 + }, + { + "epoch": 1.32, + "grad_norm": 7.0816171129260255, + "learning_rate": 5.465318196779088e-06, + "loss": 1.2974, + "step": 109827 + }, + { + "epoch": 1.32, + "grad_norm": 7.38381414023683, + "learning_rate": 5.464797542716094e-06, + "loss": 1.01, + "step": 109830 + }, + { + "epoch": 1.32, + "grad_norm": 17.913956816097294, + "learning_rate": 5.4642769041301165e-06, + "loss": 0.8523, + "step": 109833 + }, + { + "epoch": 1.32, + "grad_norm": 7.362456057716265, + "learning_rate": 5.463756281022923e-06, + "loss": 1.0825, + "step": 109836 + }, + { + "epoch": 1.32, + "grad_norm": 8.34533002790971, + "learning_rate": 5.463235673396297e-06, + "loss": 1.2446, + "step": 109839 + }, + { + "epoch": 1.32, + "grad_norm": 12.071681296395358, + "learning_rate": 5.462715081252015e-06, + "loss": 1.2939, + "step": 109842 + }, + { + "epoch": 1.32, + "grad_norm": 8.996021632197545, + "learning_rate": 5.462194504591851e-06, + "loss": 1.0236, + "step": 109845 + }, + { + "epoch": 1.32, + "grad_norm": 6.190493514587923, + "learning_rate": 5.46167394341758e-06, + "loss": 1.1884, + "step": 109848 + }, + { + "epoch": 1.32, + "grad_norm": 8.265936009932261, + "learning_rate": 5.4611533977309875e-06, + "loss": 0.8724, + "step": 109851 + }, + { + "epoch": 1.32, + "grad_norm": 5.120172935999618, + "learning_rate": 5.4606328675338414e-06, + "loss": 1.119, + "step": 109854 + }, + { + "epoch": 1.32, + "grad_norm": 3.6534180221838737, + "learning_rate": 5.4601123528279175e-06, + "loss": 1.1552, + "step": 109857 + }, + { + "epoch": 1.32, + "grad_norm": 5.549973033004327, + "learning_rate": 5.459591853614996e-06, + "loss": 0.9448, + "step": 109860 + }, + { + "epoch": 1.32, + "grad_norm": 16.64081577689475, + "learning_rate": 5.45907136989685e-06, + "loss": 0.9521, + "step": 109863 + }, + { + "epoch": 1.32, + "grad_norm": 7.630181290676865, + "learning_rate": 5.458550901675259e-06, + "loss": 1.2666, + "step": 109866 + }, + { + "epoch": 1.32, + "grad_norm": 17.214106480917636, + "learning_rate": 5.458030448952001e-06, + "loss": 1.4422, + "step": 109869 + }, + { + "epoch": 1.32, + "grad_norm": 23.081265287188653, + "learning_rate": 5.457510011728846e-06, + "loss": 0.7727, + "step": 109872 + }, + { + "epoch": 1.32, + "grad_norm": 7.2940799239442295, + "learning_rate": 5.456989590007576e-06, + "loss": 1.0541, + "step": 109875 + }, + { + "epoch": 1.32, + "grad_norm": 23.674089749561325, + "learning_rate": 5.456469183789961e-06, + "loss": 1.0778, + "step": 109878 + }, + { + "epoch": 1.32, + "grad_norm": 12.663908251784246, + "learning_rate": 5.455948793077785e-06, + "loss": 1.3976, + "step": 109881 + }, + { + "epoch": 1.32, + "grad_norm": 8.394497180553572, + "learning_rate": 5.455428417872814e-06, + "loss": 1.2932, + "step": 109884 + }, + { + "epoch": 1.32, + "grad_norm": 12.289339093189902, + "learning_rate": 5.454908058176829e-06, + "loss": 0.864, + "step": 109887 + }, + { + "epoch": 1.32, + "grad_norm": 6.874054547413275, + "learning_rate": 5.454387713991605e-06, + "loss": 1.3726, + "step": 109890 + }, + { + "epoch": 1.32, + "grad_norm": 5.213461674790392, + "learning_rate": 5.4538673853189225e-06, + "loss": 1.2151, + "step": 109893 + }, + { + "epoch": 1.32, + "grad_norm": 9.5872699667707, + "learning_rate": 5.453347072160553e-06, + "loss": 1.3445, + "step": 109896 + }, + { + "epoch": 1.32, + "grad_norm": 6.760509002206228, + "learning_rate": 5.4528267745182694e-06, + "loss": 0.9062, + "step": 109899 + }, + { + "epoch": 1.32, + "grad_norm": 14.019826516655366, + "learning_rate": 5.45230649239385e-06, + "loss": 1.7325, + "step": 109902 + }, + { + "epoch": 1.32, + "grad_norm": 7.147979212562315, + "learning_rate": 5.45178622578907e-06, + "loss": 1.1302, + "step": 109905 + }, + { + "epoch": 1.32, + "grad_norm": 9.179017296570532, + "learning_rate": 5.451265974705708e-06, + "loss": 1.2211, + "step": 109908 + }, + { + "epoch": 1.32, + "grad_norm": 7.458672192892056, + "learning_rate": 5.450745739145536e-06, + "loss": 0.9866, + "step": 109911 + }, + { + "epoch": 1.32, + "grad_norm": 16.00704606601368, + "learning_rate": 5.4502255191103284e-06, + "loss": 0.9985, + "step": 109914 + }, + { + "epoch": 1.32, + "grad_norm": 4.7464275767404684, + "learning_rate": 5.449705314601865e-06, + "loss": 1.0002, + "step": 109917 + }, + { + "epoch": 1.32, + "grad_norm": 9.719891438658244, + "learning_rate": 5.4491851256219166e-06, + "loss": 1.2524, + "step": 109920 + }, + { + "epoch": 1.32, + "grad_norm": 17.622466786518054, + "learning_rate": 5.448664952172263e-06, + "loss": 1.39, + "step": 109923 + }, + { + "epoch": 1.32, + "grad_norm": 2.6057467036022595, + "learning_rate": 5.4481447942546715e-06, + "loss": 1.5477, + "step": 109926 + }, + { + "epoch": 1.32, + "grad_norm": 5.740497664807395, + "learning_rate": 5.447624651870924e-06, + "loss": 1.1943, + "step": 109929 + }, + { + "epoch": 1.32, + "grad_norm": 6.7260784759400005, + "learning_rate": 5.447104525022793e-06, + "loss": 0.9583, + "step": 109932 + }, + { + "epoch": 1.32, + "grad_norm": 5.486755455287186, + "learning_rate": 5.446584413712058e-06, + "loss": 1.2764, + "step": 109935 + }, + { + "epoch": 1.32, + "grad_norm": 4.053763635520831, + "learning_rate": 5.4460643179404915e-06, + "loss": 1.4179, + "step": 109938 + }, + { + "epoch": 1.32, + "grad_norm": 3.778181893454342, + "learning_rate": 5.445544237709861e-06, + "loss": 1.3206, + "step": 109941 + }, + { + "epoch": 1.32, + "grad_norm": 9.23424759961315, + "learning_rate": 5.445024173021949e-06, + "loss": 1.2925, + "step": 109944 + }, + { + "epoch": 1.32, + "grad_norm": 5.570210016254523, + "learning_rate": 5.444504123878529e-06, + "loss": 1.3371, + "step": 109947 + }, + { + "epoch": 1.32, + "grad_norm": 17.467884776069212, + "learning_rate": 5.44398409028138e-06, + "loss": 1.1585, + "step": 109950 + }, + { + "epoch": 1.32, + "grad_norm": 4.378204459750933, + "learning_rate": 5.443464072232267e-06, + "loss": 1.0331, + "step": 109953 + }, + { + "epoch": 1.32, + "grad_norm": 7.058730255353666, + "learning_rate": 5.44294406973297e-06, + "loss": 1.4711, + "step": 109956 + }, + { + "epoch": 1.32, + "grad_norm": 4.0443038294964975, + "learning_rate": 5.442424082785268e-06, + "loss": 1.3372, + "step": 109959 + }, + { + "epoch": 1.32, + "grad_norm": 7.077754557856446, + "learning_rate": 5.441904111390931e-06, + "loss": 1.1258, + "step": 109962 + }, + { + "epoch": 1.32, + "grad_norm": 7.370734768181361, + "learning_rate": 5.44138415555173e-06, + "loss": 1.0489, + "step": 109965 + }, + { + "epoch": 1.32, + "grad_norm": 12.337807946282052, + "learning_rate": 5.440864215269441e-06, + "loss": 1.2405, + "step": 109968 + }, + { + "epoch": 1.32, + "grad_norm": 10.039941205518529, + "learning_rate": 5.440344290545841e-06, + "loss": 1.3925, + "step": 109971 + }, + { + "epoch": 1.32, + "grad_norm": 30.106347739746234, + "learning_rate": 5.439824381382704e-06, + "loss": 1.0118, + "step": 109974 + }, + { + "epoch": 1.32, + "grad_norm": 9.010503071967603, + "learning_rate": 5.439304487781808e-06, + "loss": 1.0022, + "step": 109977 + }, + { + "epoch": 1.32, + "grad_norm": 4.034030879722497, + "learning_rate": 5.438784609744923e-06, + "loss": 1.1078, + "step": 109980 + }, + { + "epoch": 1.32, + "grad_norm": 6.01488442825364, + "learning_rate": 5.438264747273818e-06, + "loss": 1.0782, + "step": 109983 + }, + { + "epoch": 1.32, + "grad_norm": 13.033643206625207, + "learning_rate": 5.437744900370274e-06, + "loss": 1.1583, + "step": 109986 + }, + { + "epoch": 1.32, + "grad_norm": 12.311850455342478, + "learning_rate": 5.437225069036067e-06, + "loss": 1.2987, + "step": 109989 + }, + { + "epoch": 1.32, + "grad_norm": 14.348840992457509, + "learning_rate": 5.436705253272963e-06, + "loss": 1.1024, + "step": 109992 + }, + { + "epoch": 1.32, + "grad_norm": 14.8247375429013, + "learning_rate": 5.436185453082739e-06, + "loss": 1.2234, + "step": 109995 + }, + { + "epoch": 1.32, + "grad_norm": 9.342616585171273, + "learning_rate": 5.4356656684671716e-06, + "loss": 1.0897, + "step": 109998 + }, + { + "epoch": 1.32, + "grad_norm": 9.326574548961627, + "learning_rate": 5.435145899428037e-06, + "loss": 1.1077, + "step": 110001 + }, + { + "epoch": 1.32, + "grad_norm": 11.015371896682813, + "learning_rate": 5.434626145967106e-06, + "loss": 1.262, + "step": 110004 + }, + { + "epoch": 1.32, + "grad_norm": 6.014670343079868, + "learning_rate": 5.434106408086146e-06, + "loss": 0.8342, + "step": 110007 + }, + { + "epoch": 1.32, + "grad_norm": 7.405834951780102, + "learning_rate": 5.433586685786937e-06, + "loss": 1.4231, + "step": 110010 + }, + { + "epoch": 1.32, + "grad_norm": 5.218286058877753, + "learning_rate": 5.433066979071251e-06, + "loss": 1.0763, + "step": 110013 + }, + { + "epoch": 1.32, + "grad_norm": 14.57681760540574, + "learning_rate": 5.432547287940868e-06, + "loss": 1.1615, + "step": 110016 + }, + { + "epoch": 1.32, + "grad_norm": 6.404700153339323, + "learning_rate": 5.43202761239755e-06, + "loss": 1.3633, + "step": 110019 + }, + { + "epoch": 1.32, + "grad_norm": 9.54754907201547, + "learning_rate": 5.431507952443078e-06, + "loss": 0.9138, + "step": 110022 + }, + { + "epoch": 1.32, + "grad_norm": 10.952544290313519, + "learning_rate": 5.430988308079227e-06, + "loss": 1.2928, + "step": 110025 + }, + { + "epoch": 1.32, + "grad_norm": 4.049742591840021, + "learning_rate": 5.430468679307763e-06, + "loss": 1.2848, + "step": 110028 + }, + { + "epoch": 1.32, + "grad_norm": 6.624451300234722, + "learning_rate": 5.429949066130469e-06, + "loss": 1.4678, + "step": 110031 + }, + { + "epoch": 1.32, + "grad_norm": 3.124366753046203, + "learning_rate": 5.429429468549107e-06, + "loss": 0.9712, + "step": 110034 + }, + { + "epoch": 1.32, + "grad_norm": 25.881161257247438, + "learning_rate": 5.428909886565457e-06, + "loss": 1.6442, + "step": 110037 + }, + { + "epoch": 1.32, + "grad_norm": 10.44868397675946, + "learning_rate": 5.42839032018129e-06, + "loss": 1.0352, + "step": 110040 + }, + { + "epoch": 1.32, + "grad_norm": 5.932212561395338, + "learning_rate": 5.4278707693983845e-06, + "loss": 1.5428, + "step": 110043 + }, + { + "epoch": 1.32, + "grad_norm": 8.33273561121748, + "learning_rate": 5.42735123421851e-06, + "loss": 1.1462, + "step": 110046 + }, + { + "epoch": 1.32, + "grad_norm": 5.751748451296535, + "learning_rate": 5.426831714643432e-06, + "loss": 1.363, + "step": 110049 + }, + { + "epoch": 1.32, + "grad_norm": 5.438082368438814, + "learning_rate": 5.426312210674934e-06, + "loss": 1.0032, + "step": 110052 + }, + { + "epoch": 1.32, + "grad_norm": 11.543824203533061, + "learning_rate": 5.425792722314782e-06, + "loss": 0.9971, + "step": 110055 + }, + { + "epoch": 1.32, + "grad_norm": 10.809838416681009, + "learning_rate": 5.425273249564758e-06, + "loss": 0.9932, + "step": 110058 + }, + { + "epoch": 1.32, + "grad_norm": 40.54842287089631, + "learning_rate": 5.424753792426623e-06, + "loss": 1.2875, + "step": 110061 + }, + { + "epoch": 1.32, + "grad_norm": 11.837216077940923, + "learning_rate": 5.424234350902155e-06, + "loss": 1.2534, + "step": 110064 + }, + { + "epoch": 1.32, + "grad_norm": 10.106571809798902, + "learning_rate": 5.423714924993132e-06, + "loss": 1.3671, + "step": 110067 + }, + { + "epoch": 1.32, + "grad_norm": 4.377938073715674, + "learning_rate": 5.423195514701322e-06, + "loss": 1.2616, + "step": 110070 + }, + { + "epoch": 1.32, + "grad_norm": 14.191931073997056, + "learning_rate": 5.422676120028492e-06, + "loss": 1.0328, + "step": 110073 + }, + { + "epoch": 1.32, + "grad_norm": 3.8524982426955825, + "learning_rate": 5.422156740976421e-06, + "loss": 1.2367, + "step": 110076 + }, + { + "epoch": 1.32, + "grad_norm": 6.859881091208015, + "learning_rate": 5.421637377546878e-06, + "loss": 0.9931, + "step": 110079 + }, + { + "epoch": 1.32, + "grad_norm": 14.792790826566605, + "learning_rate": 5.421118029741639e-06, + "loss": 1.0139, + "step": 110082 + }, + { + "epoch": 1.32, + "grad_norm": 11.777240631528134, + "learning_rate": 5.420598697562479e-06, + "loss": 0.9859, + "step": 110085 + }, + { + "epoch": 1.32, + "grad_norm": 21.335632568112008, + "learning_rate": 5.420079381011165e-06, + "loss": 1.0046, + "step": 110088 + }, + { + "epoch": 1.32, + "grad_norm": 14.482876194072702, + "learning_rate": 5.419560080089468e-06, + "loss": 1.3608, + "step": 110091 + }, + { + "epoch": 1.32, + "grad_norm": 10.3909732400511, + "learning_rate": 5.419040794799162e-06, + "loss": 1.1475, + "step": 110094 + }, + { + "epoch": 1.32, + "grad_norm": 4.78151728189624, + "learning_rate": 5.418521525142023e-06, + "loss": 1.3409, + "step": 110097 + }, + { + "epoch": 1.32, + "grad_norm": 17.157394861440455, + "learning_rate": 5.418002271119817e-06, + "loss": 1.4032, + "step": 110100 + }, + { + "epoch": 1.32, + "grad_norm": 10.726722483690974, + "learning_rate": 5.417483032734318e-06, + "loss": 1.1943, + "step": 110103 + }, + { + "epoch": 1.32, + "grad_norm": 18.167013965930092, + "learning_rate": 5.416963809987299e-06, + "loss": 1.033, + "step": 110106 + }, + { + "epoch": 1.32, + "grad_norm": 34.742879712506536, + "learning_rate": 5.416444602880535e-06, + "loss": 1.1374, + "step": 110109 + }, + { + "epoch": 1.32, + "grad_norm": 4.958273094731298, + "learning_rate": 5.415925411415796e-06, + "loss": 1.2954, + "step": 110112 + }, + { + "epoch": 1.32, + "grad_norm": 8.150523540326317, + "learning_rate": 5.415406235594846e-06, + "loss": 1.395, + "step": 110115 + }, + { + "epoch": 1.32, + "grad_norm": 45.26104412025352, + "learning_rate": 5.414887075419466e-06, + "loss": 1.1505, + "step": 110118 + }, + { + "epoch": 1.32, + "grad_norm": 8.16086887845758, + "learning_rate": 5.414367930891423e-06, + "loss": 1.1217, + "step": 110121 + }, + { + "epoch": 1.32, + "grad_norm": 3.6842101337460647, + "learning_rate": 5.413848802012496e-06, + "loss": 1.1385, + "step": 110124 + }, + { + "epoch": 1.32, + "grad_norm": 8.472657962269318, + "learning_rate": 5.413329688784445e-06, + "loss": 1.273, + "step": 110127 + }, + { + "epoch": 1.32, + "grad_norm": 7.9384164190536755, + "learning_rate": 5.412810591209054e-06, + "loss": 1.0219, + "step": 110130 + }, + { + "epoch": 1.32, + "grad_norm": 15.758568184595516, + "learning_rate": 5.412291509288084e-06, + "loss": 1.273, + "step": 110133 + }, + { + "epoch": 1.32, + "grad_norm": 4.334077182818512, + "learning_rate": 5.411772443023308e-06, + "loss": 1.2545, + "step": 110136 + }, + { + "epoch": 1.32, + "grad_norm": 4.732102168335408, + "learning_rate": 5.411253392416507e-06, + "loss": 0.8287, + "step": 110139 + }, + { + "epoch": 1.32, + "grad_norm": 12.30324008770025, + "learning_rate": 5.410734357469438e-06, + "loss": 1.2184, + "step": 110142 + }, + { + "epoch": 1.32, + "grad_norm": 15.457540071655357, + "learning_rate": 5.410215338183881e-06, + "loss": 1.2048, + "step": 110145 + }, + { + "epoch": 1.32, + "grad_norm": 10.604532883455695, + "learning_rate": 5.409696334561607e-06, + "loss": 1.212, + "step": 110148 + }, + { + "epoch": 1.32, + "grad_norm": 2.371086696286842, + "learning_rate": 5.409177346604391e-06, + "loss": 1.2834, + "step": 110151 + }, + { + "epoch": 1.32, + "grad_norm": 5.083045150912853, + "learning_rate": 5.408658374313996e-06, + "loss": 1.1759, + "step": 110154 + }, + { + "epoch": 1.32, + "grad_norm": 5.166029715449522, + "learning_rate": 5.4081394176921946e-06, + "loss": 1.3392, + "step": 110157 + }, + { + "epoch": 1.32, + "grad_norm": 25.82378170810902, + "learning_rate": 5.407620476740758e-06, + "loss": 1.328, + "step": 110160 + }, + { + "epoch": 1.32, + "grad_norm": 9.488458136964441, + "learning_rate": 5.407101551461459e-06, + "loss": 1.1531, + "step": 110163 + }, + { + "epoch": 1.32, + "grad_norm": 17.316996019109215, + "learning_rate": 5.406582641856072e-06, + "loss": 1.3568, + "step": 110166 + }, + { + "epoch": 1.32, + "grad_norm": 10.397924208998887, + "learning_rate": 5.40606374792636e-06, + "loss": 1.2587, + "step": 110169 + }, + { + "epoch": 1.32, + "grad_norm": 8.762532141418099, + "learning_rate": 5.405544869674102e-06, + "loss": 1.2707, + "step": 110172 + }, + { + "epoch": 1.32, + "grad_norm": 16.803832758035092, + "learning_rate": 5.405026007101059e-06, + "loss": 0.9233, + "step": 110175 + }, + { + "epoch": 1.32, + "grad_norm": 12.409579892059973, + "learning_rate": 5.404507160209012e-06, + "loss": 1.4279, + "step": 110178 + }, + { + "epoch": 1.32, + "grad_norm": 9.38702620098022, + "learning_rate": 5.403988328999723e-06, + "loss": 1.3709, + "step": 110181 + }, + { + "epoch": 1.32, + "grad_norm": 4.503367933636861, + "learning_rate": 5.403469513474967e-06, + "loss": 1.3042, + "step": 110184 + }, + { + "epoch": 1.32, + "grad_norm": 26.189736766883424, + "learning_rate": 5.402950713636513e-06, + "loss": 1.0089, + "step": 110187 + }, + { + "epoch": 1.33, + "grad_norm": 8.149462992573996, + "learning_rate": 5.402431929486134e-06, + "loss": 1.3528, + "step": 110190 + }, + { + "epoch": 1.33, + "grad_norm": 17.571897020195536, + "learning_rate": 5.4019131610256e-06, + "loss": 1.5653, + "step": 110193 + }, + { + "epoch": 1.33, + "grad_norm": 3.0725903168641713, + "learning_rate": 5.401394408256682e-06, + "loss": 1.2472, + "step": 110196 + }, + { + "epoch": 1.33, + "grad_norm": 3.5004840993523434, + "learning_rate": 5.400875671181144e-06, + "loss": 1.2348, + "step": 110199 + }, + { + "epoch": 1.33, + "grad_norm": 21.080095903922835, + "learning_rate": 5.40035694980076e-06, + "loss": 1.1633, + "step": 110202 + }, + { + "epoch": 1.33, + "grad_norm": 4.570459849313614, + "learning_rate": 5.399838244117306e-06, + "loss": 1.2067, + "step": 110205 + }, + { + "epoch": 1.33, + "grad_norm": 14.283689112960852, + "learning_rate": 5.3993195541325426e-06, + "loss": 1.2256, + "step": 110208 + }, + { + "epoch": 1.33, + "grad_norm": 7.149015038091601, + "learning_rate": 5.398800879848245e-06, + "loss": 1.1702, + "step": 110211 + }, + { + "epoch": 1.33, + "grad_norm": 2.1224959191324007, + "learning_rate": 5.398282221266187e-06, + "loss": 1.2987, + "step": 110214 + }, + { + "epoch": 1.33, + "grad_norm": 7.888700695211616, + "learning_rate": 5.397763578388129e-06, + "loss": 1.7077, + "step": 110217 + }, + { + "epoch": 1.33, + "grad_norm": 9.858310665521913, + "learning_rate": 5.39724495121585e-06, + "loss": 1.1097, + "step": 110220 + }, + { + "epoch": 1.33, + "grad_norm": 16.02201535314276, + "learning_rate": 5.396726339751113e-06, + "loss": 1.1719, + "step": 110223 + }, + { + "epoch": 1.33, + "grad_norm": 6.466148508168248, + "learning_rate": 5.396207743995691e-06, + "loss": 1.1871, + "step": 110226 + }, + { + "epoch": 1.33, + "grad_norm": 6.698271334714341, + "learning_rate": 5.395689163951353e-06, + "loss": 1.2414, + "step": 110229 + }, + { + "epoch": 1.33, + "grad_norm": 8.236250672799345, + "learning_rate": 5.395170599619874e-06, + "loss": 1.1634, + "step": 110232 + }, + { + "epoch": 1.33, + "grad_norm": 16.64860845037243, + "learning_rate": 5.3946520510030155e-06, + "loss": 0.9624, + "step": 110235 + }, + { + "epoch": 1.33, + "grad_norm": 3.625384979474979, + "learning_rate": 5.394133518102553e-06, + "loss": 0.9509, + "step": 110238 + }, + { + "epoch": 1.33, + "grad_norm": 11.17481221959453, + "learning_rate": 5.393615000920252e-06, + "loss": 0.9661, + "step": 110241 + }, + { + "epoch": 1.33, + "grad_norm": 9.770311877555159, + "learning_rate": 5.393096499457882e-06, + "loss": 1.2691, + "step": 110244 + }, + { + "epoch": 1.33, + "grad_norm": 5.823097360095494, + "learning_rate": 5.392578013717219e-06, + "loss": 1.1431, + "step": 110247 + }, + { + "epoch": 1.33, + "grad_norm": 15.557170856801525, + "learning_rate": 5.3920595437000235e-06, + "loss": 1.062, + "step": 110250 + }, + { + "epoch": 1.33, + "grad_norm": 5.536045601909863, + "learning_rate": 5.3915410894080685e-06, + "loss": 1.2682, + "step": 110253 + }, + { + "epoch": 1.33, + "grad_norm": 6.666910267769601, + "learning_rate": 5.391022650843129e-06, + "loss": 1.286, + "step": 110256 + }, + { + "epoch": 1.33, + "grad_norm": 16.80148772830381, + "learning_rate": 5.390504228006964e-06, + "loss": 1.1391, + "step": 110259 + }, + { + "epoch": 1.33, + "grad_norm": 25.02031387940397, + "learning_rate": 5.389985820901352e-06, + "loss": 1.1305, + "step": 110262 + }, + { + "epoch": 1.33, + "grad_norm": 6.723065927232971, + "learning_rate": 5.389467429528055e-06, + "loss": 1.2413, + "step": 110265 + }, + { + "epoch": 1.33, + "grad_norm": 13.579595235122872, + "learning_rate": 5.3889490538888435e-06, + "loss": 1.2728, + "step": 110268 + }, + { + "epoch": 1.33, + "grad_norm": 12.80929808663451, + "learning_rate": 5.388430693985489e-06, + "loss": 1.0472, + "step": 110271 + }, + { + "epoch": 1.33, + "grad_norm": 4.318475447192184, + "learning_rate": 5.387912349819761e-06, + "loss": 1.3859, + "step": 110274 + }, + { + "epoch": 1.33, + "grad_norm": 9.117566168714637, + "learning_rate": 5.387394021393426e-06, + "loss": 1.1359, + "step": 110277 + }, + { + "epoch": 1.33, + "grad_norm": 2.254072365524401, + "learning_rate": 5.386875708708257e-06, + "loss": 1.3967, + "step": 110280 + }, + { + "epoch": 1.33, + "grad_norm": 8.523667619587803, + "learning_rate": 5.386357411766015e-06, + "loss": 1.521, + "step": 110283 + }, + { + "epoch": 1.33, + "grad_norm": 18.3633330691395, + "learning_rate": 5.385839130568477e-06, + "loss": 1.2629, + "step": 110286 + }, + { + "epoch": 1.33, + "grad_norm": 8.366083896067233, + "learning_rate": 5.385320865117405e-06, + "loss": 1.1641, + "step": 110289 + }, + { + "epoch": 1.33, + "grad_norm": 12.168592312295868, + "learning_rate": 5.3848026154145705e-06, + "loss": 0.9859, + "step": 110292 + }, + { + "epoch": 1.33, + "grad_norm": 36.53034551530958, + "learning_rate": 5.384284381461742e-06, + "loss": 0.9846, + "step": 110295 + }, + { + "epoch": 1.33, + "grad_norm": 17.817333117792135, + "learning_rate": 5.383766163260691e-06, + "loss": 1.1895, + "step": 110298 + }, + { + "epoch": 1.33, + "grad_norm": 13.611054999222869, + "learning_rate": 5.38324796081318e-06, + "loss": 0.7458, + "step": 110301 + }, + { + "epoch": 1.33, + "grad_norm": 3.743366649591921, + "learning_rate": 5.382729774120988e-06, + "loss": 1.3782, + "step": 110304 + }, + { + "epoch": 1.33, + "grad_norm": 8.192747186102684, + "learning_rate": 5.3822116031858676e-06, + "loss": 0.989, + "step": 110307 + }, + { + "epoch": 1.33, + "grad_norm": 7.353324411418939, + "learning_rate": 5.381693448009597e-06, + "loss": 1.1927, + "step": 110310 + }, + { + "epoch": 1.33, + "grad_norm": 20.11945760493341, + "learning_rate": 5.381175308593949e-06, + "loss": 1.0848, + "step": 110313 + }, + { + "epoch": 1.33, + "grad_norm": 11.536366195797125, + "learning_rate": 5.380657184940681e-06, + "loss": 1.049, + "step": 110316 + }, + { + "epoch": 1.33, + "grad_norm": 4.8462994597289315, + "learning_rate": 5.3801390770515645e-06, + "loss": 1.1701, + "step": 110319 + }, + { + "epoch": 1.33, + "grad_norm": 8.096423333145115, + "learning_rate": 5.379620984928375e-06, + "loss": 0.9986, + "step": 110322 + }, + { + "epoch": 1.33, + "grad_norm": 12.878561332353438, + "learning_rate": 5.379102908572871e-06, + "loss": 0.7872, + "step": 110325 + }, + { + "epoch": 1.33, + "grad_norm": 9.019662370987579, + "learning_rate": 5.3785848479868275e-06, + "loss": 0.9694, + "step": 110328 + }, + { + "epoch": 1.33, + "grad_norm": 6.799272192433042, + "learning_rate": 5.378066803172006e-06, + "loss": 1.1357, + "step": 110331 + }, + { + "epoch": 1.33, + "grad_norm": 11.974209534796605, + "learning_rate": 5.377548774130177e-06, + "loss": 1.2887, + "step": 110334 + }, + { + "epoch": 1.33, + "grad_norm": 6.935714126527965, + "learning_rate": 5.377030760863109e-06, + "loss": 1.2418, + "step": 110337 + }, + { + "epoch": 1.33, + "grad_norm": 10.076680932083702, + "learning_rate": 5.376512763372574e-06, + "loss": 1.2366, + "step": 110340 + }, + { + "epoch": 1.33, + "grad_norm": 17.59268477845025, + "learning_rate": 5.375994781660332e-06, + "loss": 1.1798, + "step": 110343 + }, + { + "epoch": 1.33, + "grad_norm": 9.261235150227073, + "learning_rate": 5.375476815728158e-06, + "loss": 0.9344, + "step": 110346 + }, + { + "epoch": 1.33, + "grad_norm": 9.90896751477577, + "learning_rate": 5.374958865577813e-06, + "loss": 0.9754, + "step": 110349 + }, + { + "epoch": 1.33, + "grad_norm": 7.130440066333441, + "learning_rate": 5.3744409312110655e-06, + "loss": 1.1591, + "step": 110352 + }, + { + "epoch": 1.33, + "grad_norm": 7.663439483783012, + "learning_rate": 5.373923012629689e-06, + "loss": 1.1732, + "step": 110355 + }, + { + "epoch": 1.33, + "grad_norm": 9.300231773876073, + "learning_rate": 5.3734051098354446e-06, + "loss": 1.1532, + "step": 110358 + }, + { + "epoch": 1.33, + "grad_norm": 7.001667476369764, + "learning_rate": 5.372887222830101e-06, + "loss": 1.2333, + "step": 110361 + }, + { + "epoch": 1.33, + "grad_norm": 10.36551660682424, + "learning_rate": 5.372369351615432e-06, + "loss": 1.3705, + "step": 110364 + }, + { + "epoch": 1.33, + "grad_norm": 7.019895764765653, + "learning_rate": 5.371851496193197e-06, + "loss": 1.0222, + "step": 110367 + }, + { + "epoch": 1.33, + "grad_norm": 6.608174696593981, + "learning_rate": 5.371333656565163e-06, + "loss": 1.4817, + "step": 110370 + }, + { + "epoch": 1.33, + "grad_norm": 17.00472408405864, + "learning_rate": 5.3708158327331e-06, + "loss": 1.3951, + "step": 110373 + }, + { + "epoch": 1.33, + "grad_norm": 4.380187635230711, + "learning_rate": 5.3702980246987764e-06, + "loss": 1.4174, + "step": 110376 + }, + { + "epoch": 1.33, + "grad_norm": 2.844769754491443, + "learning_rate": 5.369780232463957e-06, + "loss": 1.4181, + "step": 110379 + }, + { + "epoch": 1.33, + "grad_norm": 7.900042613045101, + "learning_rate": 5.369262456030413e-06, + "loss": 1.1927, + "step": 110382 + }, + { + "epoch": 1.33, + "grad_norm": 3.2664953156297742, + "learning_rate": 5.368744695399905e-06, + "loss": 1.063, + "step": 110385 + }, + { + "epoch": 1.33, + "grad_norm": 6.722265033873343, + "learning_rate": 5.3682269505742064e-06, + "loss": 1.4788, + "step": 110388 + }, + { + "epoch": 1.33, + "grad_norm": 5.675996427455102, + "learning_rate": 5.36770922155508e-06, + "loss": 1.0032, + "step": 110391 + }, + { + "epoch": 1.33, + "grad_norm": 12.183244964078947, + "learning_rate": 5.367191508344294e-06, + "loss": 1.2712, + "step": 110394 + }, + { + "epoch": 1.33, + "grad_norm": 14.089739061405943, + "learning_rate": 5.366673810943612e-06, + "loss": 1.3416, + "step": 110397 + }, + { + "epoch": 1.33, + "grad_norm": 6.030221089318145, + "learning_rate": 5.366156129354803e-06, + "loss": 1.247, + "step": 110400 + }, + { + "epoch": 1.33, + "grad_norm": 29.30521680253981, + "learning_rate": 5.365638463579634e-06, + "loss": 1.1313, + "step": 110403 + }, + { + "epoch": 1.33, + "grad_norm": 3.661904121498804, + "learning_rate": 5.365120813619876e-06, + "loss": 1.2755, + "step": 110406 + }, + { + "epoch": 1.33, + "grad_norm": 9.393346176267313, + "learning_rate": 5.364603179477291e-06, + "loss": 1.0062, + "step": 110409 + }, + { + "epoch": 1.33, + "grad_norm": 5.226307820278936, + "learning_rate": 5.364085561153642e-06, + "loss": 1.0752, + "step": 110412 + }, + { + "epoch": 1.33, + "grad_norm": 5.222387511031595, + "learning_rate": 5.3635679586507e-06, + "loss": 1.0488, + "step": 110415 + }, + { + "epoch": 1.33, + "grad_norm": 9.625889229726134, + "learning_rate": 5.363050371970229e-06, + "loss": 0.868, + "step": 110418 + }, + { + "epoch": 1.33, + "grad_norm": 4.1639423977132, + "learning_rate": 5.362532801114e-06, + "loss": 1.0492, + "step": 110421 + }, + { + "epoch": 1.33, + "grad_norm": 13.396516759257736, + "learning_rate": 5.362015246083773e-06, + "loss": 1.2836, + "step": 110424 + }, + { + "epoch": 1.33, + "grad_norm": 7.984497717803233, + "learning_rate": 5.361497706881319e-06, + "loss": 0.8775, + "step": 110427 + }, + { + "epoch": 1.33, + "grad_norm": 3.213498505300916, + "learning_rate": 5.360980183508405e-06, + "loss": 1.3857, + "step": 110430 + }, + { + "epoch": 1.33, + "grad_norm": 10.887881492583828, + "learning_rate": 5.360462675966791e-06, + "loss": 1.1641, + "step": 110433 + }, + { + "epoch": 1.33, + "grad_norm": 12.355019357792056, + "learning_rate": 5.359945184258251e-06, + "loss": 1.1828, + "step": 110436 + }, + { + "epoch": 1.33, + "grad_norm": 2.8368872396753035, + "learning_rate": 5.359427708384541e-06, + "loss": 1.431, + "step": 110439 + }, + { + "epoch": 1.33, + "grad_norm": 24.17417876187987, + "learning_rate": 5.3589102483474355e-06, + "loss": 1.2068, + "step": 110442 + }, + { + "epoch": 1.33, + "grad_norm": 8.462004921190989, + "learning_rate": 5.358392804148696e-06, + "loss": 1.1489, + "step": 110445 + }, + { + "epoch": 1.33, + "grad_norm": 14.673617327898365, + "learning_rate": 5.357875375790094e-06, + "loss": 1.224, + "step": 110448 + }, + { + "epoch": 1.33, + "grad_norm": 7.159234022798593, + "learning_rate": 5.3573579632733916e-06, + "loss": 1.4117, + "step": 110451 + }, + { + "epoch": 1.33, + "grad_norm": 9.192475880555682, + "learning_rate": 5.35684056660035e-06, + "loss": 1.1812, + "step": 110454 + }, + { + "epoch": 1.33, + "grad_norm": 3.8898793754260246, + "learning_rate": 5.356323185772739e-06, + "loss": 1.4419, + "step": 110457 + }, + { + "epoch": 1.33, + "grad_norm": 16.448731949617773, + "learning_rate": 5.355805820792325e-06, + "loss": 1.2839, + "step": 110460 + }, + { + "epoch": 1.33, + "grad_norm": 6.0141546386662466, + "learning_rate": 5.355288471660876e-06, + "loss": 1.2676, + "step": 110463 + }, + { + "epoch": 1.33, + "grad_norm": 7.946114107432635, + "learning_rate": 5.35477113838015e-06, + "loss": 1.1231, + "step": 110466 + }, + { + "epoch": 1.33, + "grad_norm": 9.652819697396987, + "learning_rate": 5.354253820951919e-06, + "loss": 1.0955, + "step": 110469 + }, + { + "epoch": 1.33, + "grad_norm": 27.225221005484777, + "learning_rate": 5.353736519377949e-06, + "loss": 0.9244, + "step": 110472 + }, + { + "epoch": 1.33, + "grad_norm": 5.308433923862655, + "learning_rate": 5.3532192336600035e-06, + "loss": 1.2645, + "step": 110475 + }, + { + "epoch": 1.33, + "grad_norm": 4.354114396377478, + "learning_rate": 5.352701963799841e-06, + "loss": 1.2505, + "step": 110478 + }, + { + "epoch": 1.33, + "grad_norm": 9.05524213280774, + "learning_rate": 5.352184709799234e-06, + "loss": 1.1589, + "step": 110481 + }, + { + "epoch": 1.33, + "grad_norm": 6.387906417416488, + "learning_rate": 5.351667471659947e-06, + "loss": 1.5132, + "step": 110484 + }, + { + "epoch": 1.33, + "grad_norm": 3.598870019512081, + "learning_rate": 5.351150249383743e-06, + "loss": 1.0949, + "step": 110487 + }, + { + "epoch": 1.33, + "grad_norm": 20.3113524772096, + "learning_rate": 5.350633042972395e-06, + "loss": 1.021, + "step": 110490 + }, + { + "epoch": 1.33, + "grad_norm": 17.278674328492617, + "learning_rate": 5.3501158524276595e-06, + "loss": 0.9852, + "step": 110493 + }, + { + "epoch": 1.33, + "grad_norm": 4.114676307611205, + "learning_rate": 5.349598677751301e-06, + "loss": 1.1234, + "step": 110496 + }, + { + "epoch": 1.33, + "grad_norm": 9.213636270123166, + "learning_rate": 5.3490815189450874e-06, + "loss": 1.3589, + "step": 110499 + }, + { + "epoch": 1.33, + "grad_norm": 13.563787300334015, + "learning_rate": 5.348564376010786e-06, + "loss": 1.0865, + "step": 110502 + }, + { + "epoch": 1.33, + "grad_norm": 15.936546608434798, + "learning_rate": 5.3480472489501556e-06, + "loss": 1.2836, + "step": 110505 + }, + { + "epoch": 1.33, + "grad_norm": 2.826293446695498, + "learning_rate": 5.347530137764964e-06, + "loss": 1.3551, + "step": 110508 + }, + { + "epoch": 1.33, + "grad_norm": 11.616178574215061, + "learning_rate": 5.347013042456976e-06, + "loss": 1.4154, + "step": 110511 + }, + { + "epoch": 1.33, + "grad_norm": 9.694134623408798, + "learning_rate": 5.3464959630279625e-06, + "loss": 1.2956, + "step": 110514 + }, + { + "epoch": 1.33, + "grad_norm": 2.0845273677008795, + "learning_rate": 5.34597889947968e-06, + "loss": 1.1493, + "step": 110517 + }, + { + "epoch": 1.33, + "grad_norm": 10.435303679244738, + "learning_rate": 5.3454618518138925e-06, + "loss": 1.2925, + "step": 110520 + }, + { + "epoch": 1.33, + "grad_norm": 12.176268606683257, + "learning_rate": 5.344944820032366e-06, + "loss": 1.4758, + "step": 110523 + }, + { + "epoch": 1.33, + "grad_norm": 7.57159105955871, + "learning_rate": 5.344427804136867e-06, + "loss": 1.3636, + "step": 110526 + }, + { + "epoch": 1.33, + "grad_norm": 12.258380389987519, + "learning_rate": 5.343910804129162e-06, + "loss": 0.9154, + "step": 110529 + }, + { + "epoch": 1.33, + "grad_norm": 3.97978697728968, + "learning_rate": 5.343393820011009e-06, + "loss": 1.1766, + "step": 110532 + }, + { + "epoch": 1.33, + "grad_norm": 14.309577165951472, + "learning_rate": 5.34287685178418e-06, + "loss": 1.1237, + "step": 110535 + }, + { + "epoch": 1.33, + "grad_norm": 9.717444979410551, + "learning_rate": 5.34235989945043e-06, + "loss": 1.1242, + "step": 110538 + }, + { + "epoch": 1.33, + "grad_norm": 16.95653309814816, + "learning_rate": 5.341842963011528e-06, + "loss": 1.5973, + "step": 110541 + }, + { + "epoch": 1.33, + "grad_norm": 10.630682739020063, + "learning_rate": 5.341326042469243e-06, + "loss": 1.0499, + "step": 110544 + }, + { + "epoch": 1.33, + "grad_norm": 38.752220099530966, + "learning_rate": 5.340809137825329e-06, + "loss": 1.3979, + "step": 110547 + }, + { + "epoch": 1.33, + "grad_norm": 3.051105780143244, + "learning_rate": 5.340292249081555e-06, + "loss": 1.2151, + "step": 110550 + }, + { + "epoch": 1.33, + "grad_norm": 18.13645381981412, + "learning_rate": 5.339775376239686e-06, + "loss": 0.8988, + "step": 110553 + }, + { + "epoch": 1.33, + "grad_norm": 11.956710946435132, + "learning_rate": 5.339258519301489e-06, + "loss": 1.3142, + "step": 110556 + }, + { + "epoch": 1.33, + "grad_norm": 9.072503043338518, + "learning_rate": 5.338741678268724e-06, + "loss": 1.0207, + "step": 110559 + }, + { + "epoch": 1.33, + "grad_norm": 10.042900462363242, + "learning_rate": 5.3382248531431505e-06, + "loss": 1.1479, + "step": 110562 + }, + { + "epoch": 1.33, + "grad_norm": 13.090536681554646, + "learning_rate": 5.337708043926537e-06, + "loss": 1.0135, + "step": 110565 + }, + { + "epoch": 1.33, + "grad_norm": 6.887377257148443, + "learning_rate": 5.337191250620646e-06, + "loss": 1.3449, + "step": 110568 + }, + { + "epoch": 1.33, + "grad_norm": 4.5646984840370965, + "learning_rate": 5.3366744732272455e-06, + "loss": 1.0451, + "step": 110571 + }, + { + "epoch": 1.33, + "grad_norm": 21.995150107796512, + "learning_rate": 5.336157711748091e-06, + "loss": 1.2114, + "step": 110574 + }, + { + "epoch": 1.33, + "grad_norm": 9.877717960084757, + "learning_rate": 5.335640966184955e-06, + "loss": 0.8651, + "step": 110577 + }, + { + "epoch": 1.33, + "grad_norm": 18.822033862285895, + "learning_rate": 5.335124236539592e-06, + "loss": 0.8814, + "step": 110580 + }, + { + "epoch": 1.33, + "grad_norm": 7.212629185358841, + "learning_rate": 5.334607522813774e-06, + "loss": 1.3362, + "step": 110583 + }, + { + "epoch": 1.33, + "grad_norm": 19.749850135301468, + "learning_rate": 5.334090825009255e-06, + "loss": 1.0823, + "step": 110586 + }, + { + "epoch": 1.33, + "grad_norm": 14.900851616431337, + "learning_rate": 5.3335741431278045e-06, + "loss": 1.2131, + "step": 110589 + }, + { + "epoch": 1.33, + "grad_norm": 2.4398490564991073, + "learning_rate": 5.333057477171184e-06, + "loss": 1.1602, + "step": 110592 + }, + { + "epoch": 1.33, + "grad_norm": 19.437577986846165, + "learning_rate": 5.332540827141158e-06, + "loss": 1.2553, + "step": 110595 + }, + { + "epoch": 1.33, + "grad_norm": 10.391915313196531, + "learning_rate": 5.332024193039494e-06, + "loss": 1.5092, + "step": 110598 + }, + { + "epoch": 1.33, + "grad_norm": 11.618357871414727, + "learning_rate": 5.3315075748679465e-06, + "loss": 1.1347, + "step": 110601 + }, + { + "epoch": 1.33, + "grad_norm": 6.332532017108295, + "learning_rate": 5.33099097262828e-06, + "loss": 1.4221, + "step": 110604 + }, + { + "epoch": 1.33, + "grad_norm": 9.486005343170472, + "learning_rate": 5.3304743863222605e-06, + "loss": 1.2156, + "step": 110607 + }, + { + "epoch": 1.33, + "grad_norm": 8.583925028224488, + "learning_rate": 5.329957815951654e-06, + "loss": 0.9447, + "step": 110610 + }, + { + "epoch": 1.33, + "grad_norm": 13.834979304401122, + "learning_rate": 5.329441261518214e-06, + "loss": 1.5234, + "step": 110613 + }, + { + "epoch": 1.33, + "grad_norm": 3.982107382226469, + "learning_rate": 5.328924723023709e-06, + "loss": 1.3795, + "step": 110616 + }, + { + "epoch": 1.33, + "grad_norm": 5.846107204492988, + "learning_rate": 5.3284082004699015e-06, + "loss": 1.494, + "step": 110619 + }, + { + "epoch": 1.33, + "grad_norm": 21.033330861679882, + "learning_rate": 5.327891693858558e-06, + "loss": 1.4194, + "step": 110622 + }, + { + "epoch": 1.33, + "grad_norm": 6.380931721696667, + "learning_rate": 5.327375203191439e-06, + "loss": 1.0867, + "step": 110625 + }, + { + "epoch": 1.33, + "grad_norm": 8.721466199344476, + "learning_rate": 5.326858728470299e-06, + "loss": 1.168, + "step": 110628 + }, + { + "epoch": 1.33, + "grad_norm": 5.904336504976291, + "learning_rate": 5.326342269696907e-06, + "loss": 1.0409, + "step": 110631 + }, + { + "epoch": 1.33, + "grad_norm": 29.686097382935007, + "learning_rate": 5.325825826873026e-06, + "loss": 1.2365, + "step": 110634 + }, + { + "epoch": 1.33, + "grad_norm": 12.29223370517356, + "learning_rate": 5.3253094000004225e-06, + "loss": 1.0913, + "step": 110637 + }, + { + "epoch": 1.33, + "grad_norm": 5.799702996335315, + "learning_rate": 5.324792989080849e-06, + "loss": 1.1059, + "step": 110640 + }, + { + "epoch": 1.33, + "grad_norm": 24.805248368882282, + "learning_rate": 5.324276594116078e-06, + "loss": 1.1514, + "step": 110643 + }, + { + "epoch": 1.33, + "grad_norm": 13.128718209250625, + "learning_rate": 5.323760215107863e-06, + "loss": 1.3306, + "step": 110646 + }, + { + "epoch": 1.33, + "grad_norm": 5.099274660463035, + "learning_rate": 5.323243852057969e-06, + "loss": 0.7667, + "step": 110649 + }, + { + "epoch": 1.33, + "grad_norm": 18.01898905700171, + "learning_rate": 5.3227275049681634e-06, + "loss": 1.049, + "step": 110652 + }, + { + "epoch": 1.33, + "grad_norm": 6.783980619787276, + "learning_rate": 5.322211173840199e-06, + "loss": 0.9866, + "step": 110655 + }, + { + "epoch": 1.33, + "grad_norm": 5.525938813644166, + "learning_rate": 5.321694858675845e-06, + "loss": 1.1754, + "step": 110658 + }, + { + "epoch": 1.33, + "grad_norm": 12.602940613152898, + "learning_rate": 5.321178559476861e-06, + "loss": 1.2036, + "step": 110661 + }, + { + "epoch": 1.33, + "grad_norm": 18.66211706659528, + "learning_rate": 5.3206622762450124e-06, + "loss": 1.099, + "step": 110664 + }, + { + "epoch": 1.33, + "grad_norm": 2.9366299075150355, + "learning_rate": 5.320146008982059e-06, + "loss": 1.3312, + "step": 110667 + }, + { + "epoch": 1.33, + "grad_norm": 8.8009948240831, + "learning_rate": 5.319629757689757e-06, + "loss": 1.3768, + "step": 110670 + }, + { + "epoch": 1.33, + "grad_norm": 13.579900831544334, + "learning_rate": 5.319113522369872e-06, + "loss": 1.0747, + "step": 110673 + }, + { + "epoch": 1.33, + "grad_norm": 7.107613557643236, + "learning_rate": 5.318597303024168e-06, + "loss": 1.1775, + "step": 110676 + }, + { + "epoch": 1.33, + "grad_norm": 9.031195605229849, + "learning_rate": 5.3180810996544084e-06, + "loss": 0.935, + "step": 110679 + }, + { + "epoch": 1.33, + "grad_norm": 8.610829869828532, + "learning_rate": 5.317564912262348e-06, + "loss": 1.3343, + "step": 110682 + }, + { + "epoch": 1.33, + "grad_norm": 39.16341442234429, + "learning_rate": 5.317048740849755e-06, + "loss": 1.0869, + "step": 110685 + }, + { + "epoch": 1.33, + "grad_norm": 3.8313774465183594, + "learning_rate": 5.316532585418383e-06, + "loss": 1.0724, + "step": 110688 + }, + { + "epoch": 1.33, + "grad_norm": 7.04819193264248, + "learning_rate": 5.316016445970005e-06, + "loss": 1.2591, + "step": 110691 + }, + { + "epoch": 1.33, + "grad_norm": 16.704681940217824, + "learning_rate": 5.315500322506371e-06, + "loss": 1.1033, + "step": 110694 + }, + { + "epoch": 1.33, + "grad_norm": 10.907178117673249, + "learning_rate": 5.314984215029246e-06, + "loss": 1.1628, + "step": 110697 + }, + { + "epoch": 1.33, + "grad_norm": 5.909647886393245, + "learning_rate": 5.314468123540393e-06, + "loss": 1.3347, + "step": 110700 + }, + { + "epoch": 1.33, + "grad_norm": 10.450727318151605, + "learning_rate": 5.313952048041573e-06, + "loss": 0.9822, + "step": 110703 + }, + { + "epoch": 1.33, + "grad_norm": 4.592369139703383, + "learning_rate": 5.31343598853455e-06, + "loss": 1.2694, + "step": 110706 + }, + { + "epoch": 1.33, + "grad_norm": 8.352695351187878, + "learning_rate": 5.312919945021081e-06, + "loss": 1.4464, + "step": 110709 + }, + { + "epoch": 1.33, + "grad_norm": 12.965969274630464, + "learning_rate": 5.312403917502925e-06, + "loss": 1.0374, + "step": 110712 + }, + { + "epoch": 1.33, + "grad_norm": 13.040692454457904, + "learning_rate": 5.311887905981846e-06, + "loss": 1.2617, + "step": 110715 + }, + { + "epoch": 1.33, + "grad_norm": 8.842980772225488, + "learning_rate": 5.311371910459608e-06, + "loss": 1.1525, + "step": 110718 + }, + { + "epoch": 1.33, + "grad_norm": 6.657842287029486, + "learning_rate": 5.310855930937966e-06, + "loss": 0.8247, + "step": 110721 + }, + { + "epoch": 1.33, + "grad_norm": 6.164162344332884, + "learning_rate": 5.310339967418684e-06, + "loss": 1.1033, + "step": 110724 + }, + { + "epoch": 1.33, + "grad_norm": 16.52339400965061, + "learning_rate": 5.309824019903527e-06, + "loss": 1.0316, + "step": 110727 + }, + { + "epoch": 1.33, + "grad_norm": 10.241705103319774, + "learning_rate": 5.3093080883942445e-06, + "loss": 0.8931, + "step": 110730 + }, + { + "epoch": 1.33, + "grad_norm": 5.6223189892863035, + "learning_rate": 5.308792172892609e-06, + "loss": 1.3556, + "step": 110733 + }, + { + "epoch": 1.33, + "grad_norm": 10.220583304903297, + "learning_rate": 5.308276273400373e-06, + "loss": 1.1096, + "step": 110736 + }, + { + "epoch": 1.33, + "grad_norm": 12.10501108807455, + "learning_rate": 5.3077603899193005e-06, + "loss": 1.2247, + "step": 110739 + }, + { + "epoch": 1.33, + "grad_norm": 25.11513975796924, + "learning_rate": 5.30724452245115e-06, + "loss": 0.7629, + "step": 110742 + }, + { + "epoch": 1.33, + "grad_norm": 7.78840732092527, + "learning_rate": 5.306728670997689e-06, + "loss": 0.8569, + "step": 110745 + }, + { + "epoch": 1.33, + "grad_norm": 8.274801966026082, + "learning_rate": 5.306212835560668e-06, + "loss": 1.3117, + "step": 110748 + }, + { + "epoch": 1.33, + "grad_norm": 7.824240678920502, + "learning_rate": 5.305697016141855e-06, + "loss": 1.1939, + "step": 110751 + }, + { + "epoch": 1.33, + "grad_norm": 18.549121466073082, + "learning_rate": 5.305181212743005e-06, + "loss": 1.4672, + "step": 110754 + }, + { + "epoch": 1.33, + "grad_norm": 6.420125037751186, + "learning_rate": 5.304665425365881e-06, + "loss": 1.1708, + "step": 110757 + }, + { + "epoch": 1.33, + "grad_norm": 5.360226556316425, + "learning_rate": 5.3041496540122435e-06, + "loss": 1.0009, + "step": 110760 + }, + { + "epoch": 1.33, + "grad_norm": 5.091978034377723, + "learning_rate": 5.30363389868385e-06, + "loss": 1.4551, + "step": 110763 + }, + { + "epoch": 1.33, + "grad_norm": 12.20940078682403, + "learning_rate": 5.303118159382461e-06, + "loss": 0.9514, + "step": 110766 + }, + { + "epoch": 1.33, + "grad_norm": 32.175645021599465, + "learning_rate": 5.302602436109843e-06, + "loss": 1.2244, + "step": 110769 + }, + { + "epoch": 1.33, + "grad_norm": 12.90498815796097, + "learning_rate": 5.302086728867751e-06, + "loss": 1.479, + "step": 110772 + }, + { + "epoch": 1.33, + "grad_norm": 5.282656005260256, + "learning_rate": 5.30157103765794e-06, + "loss": 1.191, + "step": 110775 + }, + { + "epoch": 1.33, + "grad_norm": 5.406625909107936, + "learning_rate": 5.301055362482176e-06, + "loss": 1.1911, + "step": 110778 + }, + { + "epoch": 1.33, + "grad_norm": 5.762259931446406, + "learning_rate": 5.300539703342217e-06, + "loss": 1.1394, + "step": 110781 + }, + { + "epoch": 1.33, + "grad_norm": 13.96380316662631, + "learning_rate": 5.300024060239823e-06, + "loss": 1.4597, + "step": 110784 + }, + { + "epoch": 1.33, + "grad_norm": 6.938151107515881, + "learning_rate": 5.2995084331767565e-06, + "loss": 0.7119, + "step": 110787 + }, + { + "epoch": 1.33, + "grad_norm": 12.571282453431584, + "learning_rate": 5.298992822154774e-06, + "loss": 1.1337, + "step": 110790 + }, + { + "epoch": 1.33, + "grad_norm": 13.643210609692005, + "learning_rate": 5.2984772271756355e-06, + "loss": 1.1148, + "step": 110793 + }, + { + "epoch": 1.33, + "grad_norm": 6.277908777374369, + "learning_rate": 5.2979616482411e-06, + "loss": 1.5034, + "step": 110796 + }, + { + "epoch": 1.33, + "grad_norm": 9.894872835172574, + "learning_rate": 5.297446085352931e-06, + "loss": 1.8173, + "step": 110799 + }, + { + "epoch": 1.33, + "grad_norm": 5.683808224788543, + "learning_rate": 5.29693053851288e-06, + "loss": 1.1147, + "step": 110802 + }, + { + "epoch": 1.33, + "grad_norm": 54.04583285767907, + "learning_rate": 5.296415007722712e-06, + "loss": 1.2198, + "step": 110805 + }, + { + "epoch": 1.33, + "grad_norm": 4.7949487666086075, + "learning_rate": 5.295899492984184e-06, + "loss": 0.9283, + "step": 110808 + }, + { + "epoch": 1.33, + "grad_norm": 5.903269490283274, + "learning_rate": 5.295383994299061e-06, + "loss": 1.0385, + "step": 110811 + }, + { + "epoch": 1.33, + "grad_norm": 16.27372103282686, + "learning_rate": 5.294868511669098e-06, + "loss": 1.5661, + "step": 110814 + }, + { + "epoch": 1.33, + "grad_norm": 13.187141710945236, + "learning_rate": 5.2943530450960505e-06, + "loss": 1.0711, + "step": 110817 + }, + { + "epoch": 1.33, + "grad_norm": 6.15873282928199, + "learning_rate": 5.293837594581681e-06, + "loss": 1.4199, + "step": 110820 + }, + { + "epoch": 1.33, + "grad_norm": 12.897639892486254, + "learning_rate": 5.293322160127748e-06, + "loss": 1.005, + "step": 110823 + }, + { + "epoch": 1.33, + "grad_norm": 6.937225593815537, + "learning_rate": 5.292806741736016e-06, + "loss": 1.1606, + "step": 110826 + }, + { + "epoch": 1.33, + "grad_norm": 6.321073901966041, + "learning_rate": 5.2922913394082345e-06, + "loss": 1.2226, + "step": 110829 + }, + { + "epoch": 1.33, + "grad_norm": 18.546271241290498, + "learning_rate": 5.291775953146168e-06, + "loss": 1.0989, + "step": 110832 + }, + { + "epoch": 1.33, + "grad_norm": 16.64385524397264, + "learning_rate": 5.291260582951576e-06, + "loss": 1.4749, + "step": 110835 + }, + { + "epoch": 1.33, + "grad_norm": 32.36207772771451, + "learning_rate": 5.290745228826213e-06, + "loss": 1.2134, + "step": 110838 + }, + { + "epoch": 1.33, + "grad_norm": 14.686981191413649, + "learning_rate": 5.290229890771844e-06, + "loss": 1.1095, + "step": 110841 + }, + { + "epoch": 1.33, + "grad_norm": 5.536465482424291, + "learning_rate": 5.289714568790221e-06, + "loss": 1.2155, + "step": 110844 + }, + { + "epoch": 1.33, + "grad_norm": 31.016407070735234, + "learning_rate": 5.289199262883105e-06, + "loss": 1.1641, + "step": 110847 + }, + { + "epoch": 1.33, + "grad_norm": 13.09043682675051, + "learning_rate": 5.2886839730522535e-06, + "loss": 1.1447, + "step": 110850 + }, + { + "epoch": 1.33, + "grad_norm": 37.55419479157588, + "learning_rate": 5.288168699299432e-06, + "loss": 1.0551, + "step": 110853 + }, + { + "epoch": 1.33, + "grad_norm": 3.5142533204323136, + "learning_rate": 5.2876534416263895e-06, + "loss": 1.248, + "step": 110856 + }, + { + "epoch": 1.33, + "grad_norm": 10.336951476095374, + "learning_rate": 5.287138200034891e-06, + "loss": 1.1863, + "step": 110859 + }, + { + "epoch": 1.33, + "grad_norm": 15.522680445748044, + "learning_rate": 5.28662297452669e-06, + "loss": 1.2635, + "step": 110862 + }, + { + "epoch": 1.33, + "grad_norm": 7.520476328270716, + "learning_rate": 5.2861077651035474e-06, + "loss": 1.2223, + "step": 110865 + }, + { + "epoch": 1.33, + "grad_norm": 7.124241691459448, + "learning_rate": 5.285592571767224e-06, + "loss": 0.9975, + "step": 110868 + }, + { + "epoch": 1.33, + "grad_norm": 7.452979372600273, + "learning_rate": 5.285077394519471e-06, + "loss": 1.2559, + "step": 110871 + }, + { + "epoch": 1.33, + "grad_norm": 10.163399525299363, + "learning_rate": 5.284562233362051e-06, + "loss": 1.3705, + "step": 110874 + }, + { + "epoch": 1.33, + "grad_norm": 8.956595558417106, + "learning_rate": 5.284047088296725e-06, + "loss": 1.3354, + "step": 110877 + }, + { + "epoch": 1.33, + "grad_norm": 10.442916168497433, + "learning_rate": 5.283531959325248e-06, + "loss": 1.257, + "step": 110880 + }, + { + "epoch": 1.33, + "grad_norm": 4.787733959955367, + "learning_rate": 5.283016846449373e-06, + "loss": 1.3763, + "step": 110883 + }, + { + "epoch": 1.33, + "grad_norm": 6.285497424631143, + "learning_rate": 5.282501749670864e-06, + "loss": 1.0535, + "step": 110886 + }, + { + "epoch": 1.33, + "grad_norm": 7.235241242184173, + "learning_rate": 5.2819866689914765e-06, + "loss": 1.2644, + "step": 110889 + }, + { + "epoch": 1.33, + "grad_norm": 5.801613209985538, + "learning_rate": 5.2814716044129685e-06, + "loss": 0.9165, + "step": 110892 + }, + { + "epoch": 1.33, + "grad_norm": 2.323973620323876, + "learning_rate": 5.280956555937102e-06, + "loss": 1.1845, + "step": 110895 + }, + { + "epoch": 1.33, + "grad_norm": 2.549255993019774, + "learning_rate": 5.280441523565629e-06, + "loss": 1.4744, + "step": 110898 + }, + { + "epoch": 1.33, + "grad_norm": 8.18766148207391, + "learning_rate": 5.279926507300311e-06, + "loss": 1.0566, + "step": 110901 + }, + { + "epoch": 1.33, + "grad_norm": 9.14589887582685, + "learning_rate": 5.2794115071429005e-06, + "loss": 1.0193, + "step": 110904 + }, + { + "epoch": 1.33, + "grad_norm": 4.8951815300883155, + "learning_rate": 5.278896523095163e-06, + "loss": 1.1519, + "step": 110907 + }, + { + "epoch": 1.33, + "grad_norm": 7.703115582988242, + "learning_rate": 5.278381555158846e-06, + "loss": 1.2944, + "step": 110910 + }, + { + "epoch": 1.33, + "grad_norm": 20.917685144794433, + "learning_rate": 5.277866603335712e-06, + "loss": 1.216, + "step": 110913 + }, + { + "epoch": 1.33, + "grad_norm": 2.528038334756555, + "learning_rate": 5.277351667627518e-06, + "loss": 1.1384, + "step": 110916 + }, + { + "epoch": 1.33, + "grad_norm": 9.045768742205475, + "learning_rate": 5.276836748036027e-06, + "loss": 0.9591, + "step": 110919 + }, + { + "epoch": 1.33, + "grad_norm": 5.72687029609504, + "learning_rate": 5.2763218445629904e-06, + "loss": 0.966, + "step": 110922 + }, + { + "epoch": 1.33, + "grad_norm": 6.422101645823994, + "learning_rate": 5.275806957210161e-06, + "loss": 1.0395, + "step": 110925 + }, + { + "epoch": 1.33, + "grad_norm": 7.973775507596144, + "learning_rate": 5.275292085979302e-06, + "loss": 1.2417, + "step": 110928 + }, + { + "epoch": 1.33, + "grad_norm": 9.598011537915513, + "learning_rate": 5.274777230872169e-06, + "loss": 1.2689, + "step": 110931 + }, + { + "epoch": 1.33, + "grad_norm": 9.658209603639955, + "learning_rate": 5.274262391890523e-06, + "loss": 1.3516, + "step": 110934 + }, + { + "epoch": 1.33, + "grad_norm": 24.409098068669866, + "learning_rate": 5.273747569036114e-06, + "loss": 1.1897, + "step": 110937 + }, + { + "epoch": 1.33, + "grad_norm": 8.654025110255214, + "learning_rate": 5.273232762310701e-06, + "loss": 0.8355, + "step": 110940 + }, + { + "epoch": 1.33, + "grad_norm": 6.668579284422909, + "learning_rate": 5.272717971716046e-06, + "loss": 1.371, + "step": 110943 + }, + { + "epoch": 1.33, + "grad_norm": 8.503305013021523, + "learning_rate": 5.272203197253898e-06, + "loss": 1.2814, + "step": 110946 + }, + { + "epoch": 1.33, + "grad_norm": 30.925902541272325, + "learning_rate": 5.271688438926023e-06, + "loss": 1.2934, + "step": 110949 + }, + { + "epoch": 1.33, + "grad_norm": 9.623416097405173, + "learning_rate": 5.2711736967341666e-06, + "loss": 1.0535, + "step": 110952 + }, + { + "epoch": 1.33, + "grad_norm": 145.36891619053267, + "learning_rate": 5.2706589706800915e-06, + "loss": 1.3866, + "step": 110955 + }, + { + "epoch": 1.33, + "grad_norm": 6.32628078579908, + "learning_rate": 5.270144260765554e-06, + "loss": 1.1482, + "step": 110958 + }, + { + "epoch": 1.33, + "grad_norm": 2.746664997055812, + "learning_rate": 5.269629566992314e-06, + "loss": 1.2715, + "step": 110961 + }, + { + "epoch": 1.33, + "grad_norm": 8.65818441711246, + "learning_rate": 5.2691148893621246e-06, + "loss": 1.1885, + "step": 110964 + }, + { + "epoch": 1.33, + "grad_norm": 7.578437059052927, + "learning_rate": 5.268600227876739e-06, + "loss": 1.1933, + "step": 110967 + }, + { + "epoch": 1.33, + "grad_norm": 6.060760465535614, + "learning_rate": 5.268085582537915e-06, + "loss": 1.2446, + "step": 110970 + }, + { + "epoch": 1.33, + "grad_norm": 5.356263491780078, + "learning_rate": 5.267570953347412e-06, + "loss": 0.9594, + "step": 110973 + }, + { + "epoch": 1.33, + "grad_norm": 25.447479320213652, + "learning_rate": 5.267056340306987e-06, + "loss": 1.3022, + "step": 110976 + }, + { + "epoch": 1.33, + "grad_norm": 13.245076412875598, + "learning_rate": 5.266541743418392e-06, + "loss": 1.5432, + "step": 110979 + }, + { + "epoch": 1.33, + "grad_norm": 11.350278597645417, + "learning_rate": 5.266027162683383e-06, + "loss": 1.4506, + "step": 110982 + }, + { + "epoch": 1.33, + "grad_norm": 7.447445053208555, + "learning_rate": 5.265512598103723e-06, + "loss": 1.254, + "step": 110985 + }, + { + "epoch": 1.33, + "grad_norm": 5.229886717758419, + "learning_rate": 5.264998049681164e-06, + "loss": 1.0958, + "step": 110988 + }, + { + "epoch": 1.33, + "grad_norm": 7.520746216524955, + "learning_rate": 5.2644835174174555e-06, + "loss": 1.0275, + "step": 110991 + }, + { + "epoch": 1.33, + "grad_norm": 15.813056583237007, + "learning_rate": 5.26396900131436e-06, + "loss": 1.3899, + "step": 110994 + }, + { + "epoch": 1.33, + "grad_norm": 10.675825178681489, + "learning_rate": 5.263454501373633e-06, + "loss": 1.2015, + "step": 110997 + }, + { + "epoch": 1.33, + "grad_norm": 7.424500284449303, + "learning_rate": 5.262940017597029e-06, + "loss": 1.4062, + "step": 111000 + }, + { + "epoch": 1.33, + "grad_norm": 4.7163075904834235, + "learning_rate": 5.262425549986307e-06, + "loss": 1.1443, + "step": 111003 + }, + { + "epoch": 1.33, + "grad_norm": 23.8982603537864, + "learning_rate": 5.261911098543222e-06, + "loss": 1.0309, + "step": 111006 + }, + { + "epoch": 1.33, + "grad_norm": 5.86174243922006, + "learning_rate": 5.261396663269524e-06, + "loss": 1.2886, + "step": 111009 + }, + { + "epoch": 1.33, + "grad_norm": 5.970588171980419, + "learning_rate": 5.260882244166972e-06, + "loss": 1.3781, + "step": 111012 + }, + { + "epoch": 1.33, + "grad_norm": 7.549950557314602, + "learning_rate": 5.260367841237324e-06, + "loss": 1.113, + "step": 111015 + }, + { + "epoch": 1.33, + "grad_norm": 6.9505753471517, + "learning_rate": 5.259853454482331e-06, + "loss": 1.0137, + "step": 111018 + }, + { + "epoch": 1.33, + "grad_norm": 3.835624186868604, + "learning_rate": 5.259339083903751e-06, + "loss": 1.2731, + "step": 111021 + }, + { + "epoch": 1.34, + "grad_norm": 7.862003305921949, + "learning_rate": 5.258824729503338e-06, + "loss": 1.2358, + "step": 111024 + }, + { + "epoch": 1.34, + "grad_norm": 21.900226581772984, + "learning_rate": 5.258310391282853e-06, + "loss": 1.2871, + "step": 111027 + }, + { + "epoch": 1.34, + "grad_norm": 7.497294259376946, + "learning_rate": 5.257796069244048e-06, + "loss": 0.7399, + "step": 111030 + }, + { + "epoch": 1.34, + "grad_norm": 29.78219422879711, + "learning_rate": 5.257281763388671e-06, + "loss": 1.2781, + "step": 111033 + }, + { + "epoch": 1.34, + "grad_norm": 12.686872115346004, + "learning_rate": 5.256767473718485e-06, + "loss": 1.1111, + "step": 111036 + }, + { + "epoch": 1.34, + "grad_norm": 10.54725478996696, + "learning_rate": 5.256253200235242e-06, + "loss": 1.0246, + "step": 111039 + }, + { + "epoch": 1.34, + "grad_norm": 3.9702105925653868, + "learning_rate": 5.2557389429407e-06, + "loss": 1.1561, + "step": 111042 + }, + { + "epoch": 1.34, + "grad_norm": 5.456918395671036, + "learning_rate": 5.255224701836611e-06, + "loss": 1.3011, + "step": 111045 + }, + { + "epoch": 1.34, + "grad_norm": 17.228557027332428, + "learning_rate": 5.254710476924734e-06, + "loss": 1.0293, + "step": 111048 + }, + { + "epoch": 1.34, + "grad_norm": 14.29607940275281, + "learning_rate": 5.254196268206817e-06, + "loss": 1.3932, + "step": 111051 + }, + { + "epoch": 1.34, + "grad_norm": 6.105059838936719, + "learning_rate": 5.253682075684618e-06, + "loss": 1.6198, + "step": 111054 + }, + { + "epoch": 1.34, + "grad_norm": 9.777595058290721, + "learning_rate": 5.253167899359899e-06, + "loss": 1.0955, + "step": 111057 + }, + { + "epoch": 1.34, + "grad_norm": 26.985938486178416, + "learning_rate": 5.252653739234401e-06, + "loss": 1.4686, + "step": 111060 + }, + { + "epoch": 1.34, + "grad_norm": 12.353704507596818, + "learning_rate": 5.2521395953098885e-06, + "loss": 1.1751, + "step": 111063 + }, + { + "epoch": 1.34, + "grad_norm": 6.474746957439526, + "learning_rate": 5.251625467588111e-06, + "loss": 0.8291, + "step": 111066 + }, + { + "epoch": 1.34, + "grad_norm": 7.358986366441885, + "learning_rate": 5.25111135607083e-06, + "loss": 1.2505, + "step": 111069 + }, + { + "epoch": 1.34, + "grad_norm": 4.733181094959724, + "learning_rate": 5.250597260759797e-06, + "loss": 1.3486, + "step": 111072 + }, + { + "epoch": 1.34, + "grad_norm": 5.907140911183503, + "learning_rate": 5.2500831816567595e-06, + "loss": 1.0012, + "step": 111075 + }, + { + "epoch": 1.34, + "grad_norm": 4.082635463681627, + "learning_rate": 5.249569118763478e-06, + "loss": 0.9603, + "step": 111078 + }, + { + "epoch": 1.34, + "grad_norm": 18.906076701119925, + "learning_rate": 5.249055072081705e-06, + "loss": 1.264, + "step": 111081 + }, + { + "epoch": 1.34, + "grad_norm": 8.739057932043044, + "learning_rate": 5.2485410416132e-06, + "loss": 0.9157, + "step": 111084 + }, + { + "epoch": 1.34, + "grad_norm": 3.720533837989208, + "learning_rate": 5.248027027359709e-06, + "loss": 1.2086, + "step": 111087 + }, + { + "epoch": 1.34, + "grad_norm": 7.770030773638946, + "learning_rate": 5.247513029322995e-06, + "loss": 1.168, + "step": 111090 + }, + { + "epoch": 1.34, + "grad_norm": 18.64053623762942, + "learning_rate": 5.2469990475048015e-06, + "loss": 0.754, + "step": 111093 + }, + { + "epoch": 1.34, + "grad_norm": 26.6572413747408, + "learning_rate": 5.246485081906894e-06, + "loss": 1.5034, + "step": 111096 + }, + { + "epoch": 1.34, + "grad_norm": 5.8804815931764, + "learning_rate": 5.245971132531014e-06, + "loss": 1.0196, + "step": 111099 + }, + { + "epoch": 1.34, + "grad_norm": 10.172997582560532, + "learning_rate": 5.245457199378925e-06, + "loss": 1.3739, + "step": 111102 + }, + { + "epoch": 1.34, + "grad_norm": 14.305639879597084, + "learning_rate": 5.244943282452377e-06, + "loss": 1.3132, + "step": 111105 + }, + { + "epoch": 1.34, + "grad_norm": 6.538116108219948, + "learning_rate": 5.244429381753125e-06, + "loss": 1.2992, + "step": 111108 + }, + { + "epoch": 1.34, + "grad_norm": 14.224738219724959, + "learning_rate": 5.243915497282926e-06, + "loss": 1.1368, + "step": 111111 + }, + { + "epoch": 1.34, + "grad_norm": 5.178401200894092, + "learning_rate": 5.243401629043531e-06, + "loss": 1.085, + "step": 111114 + }, + { + "epoch": 1.34, + "grad_norm": 10.934168667388342, + "learning_rate": 5.242887777036688e-06, + "loss": 1.1088, + "step": 111117 + }, + { + "epoch": 1.34, + "grad_norm": 10.102789535647014, + "learning_rate": 5.242373941264156e-06, + "loss": 1.4381, + "step": 111120 + }, + { + "epoch": 1.34, + "grad_norm": 4.830400744903038, + "learning_rate": 5.241860121727693e-06, + "loss": 1.1912, + "step": 111123 + }, + { + "epoch": 1.34, + "grad_norm": 9.066444598836544, + "learning_rate": 5.2413463184290415e-06, + "loss": 1.325, + "step": 111126 + }, + { + "epoch": 1.34, + "grad_norm": 7.477963898897228, + "learning_rate": 5.240832531369962e-06, + "loss": 1.0101, + "step": 111129 + }, + { + "epoch": 1.34, + "grad_norm": 16.105869016288473, + "learning_rate": 5.240318760552212e-06, + "loss": 1.3941, + "step": 111132 + }, + { + "epoch": 1.34, + "grad_norm": 21.52472178727057, + "learning_rate": 5.239805005977534e-06, + "loss": 1.3758, + "step": 111135 + }, + { + "epoch": 1.34, + "grad_norm": 15.696163989239052, + "learning_rate": 5.239291267647691e-06, + "loss": 1.2623, + "step": 111138 + }, + { + "epoch": 1.34, + "grad_norm": 10.296851005359471, + "learning_rate": 5.238777545564429e-06, + "loss": 1.1033, + "step": 111141 + }, + { + "epoch": 1.34, + "grad_norm": 3.2342486751408397, + "learning_rate": 5.238263839729504e-06, + "loss": 1.2971, + "step": 111144 + }, + { + "epoch": 1.34, + "grad_norm": 9.922452978640935, + "learning_rate": 5.237750150144669e-06, + "loss": 1.2053, + "step": 111147 + }, + { + "epoch": 1.34, + "grad_norm": 7.0861802257213675, + "learning_rate": 5.237236476811682e-06, + "loss": 1.4687, + "step": 111150 + }, + { + "epoch": 1.34, + "grad_norm": 21.354656557379048, + "learning_rate": 5.236722819732287e-06, + "loss": 1.2467, + "step": 111153 + }, + { + "epoch": 1.34, + "grad_norm": 12.150628793400447, + "learning_rate": 5.236209178908245e-06, + "loss": 1.1148, + "step": 111156 + }, + { + "epoch": 1.34, + "grad_norm": 1.986825082036266, + "learning_rate": 5.235695554341301e-06, + "loss": 1.2716, + "step": 111159 + }, + { + "epoch": 1.34, + "grad_norm": 14.334933789733146, + "learning_rate": 5.235181946033212e-06, + "loss": 0.8566, + "step": 111162 + }, + { + "epoch": 1.34, + "grad_norm": 16.064773077546654, + "learning_rate": 5.234668353985735e-06, + "loss": 1.0838, + "step": 111165 + }, + { + "epoch": 1.34, + "grad_norm": 15.537201606992946, + "learning_rate": 5.234154778200615e-06, + "loss": 1.2001, + "step": 111168 + }, + { + "epoch": 1.34, + "grad_norm": 10.219921463157567, + "learning_rate": 5.233641218679608e-06, + "loss": 1.5378, + "step": 111171 + }, + { + "epoch": 1.34, + "grad_norm": 7.168633934365735, + "learning_rate": 5.23312767542447e-06, + "loss": 1.1797, + "step": 111174 + }, + { + "epoch": 1.34, + "grad_norm": 17.243133134370257, + "learning_rate": 5.232614148436947e-06, + "loss": 0.9751, + "step": 111177 + }, + { + "epoch": 1.34, + "grad_norm": 15.778361520898033, + "learning_rate": 5.232100637718798e-06, + "loss": 1.2756, + "step": 111180 + }, + { + "epoch": 1.34, + "grad_norm": 18.577643203562438, + "learning_rate": 5.231587143271768e-06, + "loss": 1.2917, + "step": 111183 + }, + { + "epoch": 1.34, + "grad_norm": 1.9002861174822716, + "learning_rate": 5.231073665097615e-06, + "loss": 1.3104, + "step": 111186 + }, + { + "epoch": 1.34, + "grad_norm": 2.579912512930592, + "learning_rate": 5.230560203198087e-06, + "loss": 1.2944, + "step": 111189 + }, + { + "epoch": 1.34, + "grad_norm": 3.6136437458763395, + "learning_rate": 5.230046757574946e-06, + "loss": 1.1282, + "step": 111192 + }, + { + "epoch": 1.34, + "grad_norm": 11.769494518855131, + "learning_rate": 5.2295333282299324e-06, + "loss": 1.464, + "step": 111195 + }, + { + "epoch": 1.34, + "grad_norm": 3.231539156981894, + "learning_rate": 5.229019915164806e-06, + "loss": 1.066, + "step": 111198 + }, + { + "epoch": 1.34, + "grad_norm": 3.7505765466100867, + "learning_rate": 5.228506518381313e-06, + "loss": 1.0689, + "step": 111201 + }, + { + "epoch": 1.34, + "grad_norm": 8.280285001442984, + "learning_rate": 5.227993137881213e-06, + "loss": 1.4127, + "step": 111204 + }, + { + "epoch": 1.34, + "grad_norm": 15.378916766676111, + "learning_rate": 5.227479773666249e-06, + "loss": 0.9974, + "step": 111207 + }, + { + "epoch": 1.34, + "grad_norm": 9.37235499711384, + "learning_rate": 5.226966425738178e-06, + "loss": 1.6424, + "step": 111210 + }, + { + "epoch": 1.34, + "grad_norm": 9.462544318561958, + "learning_rate": 5.2264530940987515e-06, + "loss": 1.2285, + "step": 111213 + }, + { + "epoch": 1.34, + "grad_norm": 2.7664426684933883, + "learning_rate": 5.22593977874972e-06, + "loss": 1.2352, + "step": 111216 + }, + { + "epoch": 1.34, + "grad_norm": 8.930770009880371, + "learning_rate": 5.225426479692841e-06, + "loss": 1.0505, + "step": 111219 + }, + { + "epoch": 1.34, + "grad_norm": 5.532337531850927, + "learning_rate": 5.2249131969298615e-06, + "loss": 1.4609, + "step": 111222 + }, + { + "epoch": 1.34, + "grad_norm": 2.972170549897462, + "learning_rate": 5.224399930462529e-06, + "loss": 1.3135, + "step": 111225 + }, + { + "epoch": 1.34, + "grad_norm": 12.82361482330791, + "learning_rate": 5.2238866802926e-06, + "loss": 1.3393, + "step": 111228 + }, + { + "epoch": 1.34, + "grad_norm": 9.549761986433708, + "learning_rate": 5.223373446421829e-06, + "loss": 0.6853, + "step": 111231 + }, + { + "epoch": 1.34, + "grad_norm": 5.437564313682916, + "learning_rate": 5.222860228851961e-06, + "loss": 1.3155, + "step": 111234 + }, + { + "epoch": 1.34, + "grad_norm": 8.657688299573229, + "learning_rate": 5.222347027584749e-06, + "loss": 1.1633, + "step": 111237 + }, + { + "epoch": 1.34, + "grad_norm": 14.919911684651343, + "learning_rate": 5.22183384262195e-06, + "loss": 1.1286, + "step": 111240 + }, + { + "epoch": 1.34, + "grad_norm": 6.902325664146667, + "learning_rate": 5.221320673965308e-06, + "loss": 0.8732, + "step": 111243 + }, + { + "epoch": 1.34, + "grad_norm": 14.632457740706347, + "learning_rate": 5.2208075216165796e-06, + "loss": 1.3512, + "step": 111246 + }, + { + "epoch": 1.34, + "grad_norm": 5.100421702705988, + "learning_rate": 5.220294385577511e-06, + "loss": 1.1499, + "step": 111249 + }, + { + "epoch": 1.34, + "grad_norm": 5.142413038889209, + "learning_rate": 5.219781265849856e-06, + "loss": 0.791, + "step": 111252 + }, + { + "epoch": 1.34, + "grad_norm": 9.501648099342994, + "learning_rate": 5.219268162435364e-06, + "loss": 1.0944, + "step": 111255 + }, + { + "epoch": 1.34, + "grad_norm": 49.112369379408285, + "learning_rate": 5.218755075335794e-06, + "loss": 1.0142, + "step": 111258 + }, + { + "epoch": 1.34, + "grad_norm": 7.497873989797661, + "learning_rate": 5.218242004552886e-06, + "loss": 0.8758, + "step": 111261 + }, + { + "epoch": 1.34, + "grad_norm": 20.082060571795793, + "learning_rate": 5.217728950088399e-06, + "loss": 1.2316, + "step": 111264 + }, + { + "epoch": 1.34, + "grad_norm": 229.63012652308055, + "learning_rate": 5.217215911944077e-06, + "loss": 1.0169, + "step": 111267 + }, + { + "epoch": 1.34, + "grad_norm": 8.95213769271469, + "learning_rate": 5.216702890121674e-06, + "loss": 1.3407, + "step": 111270 + }, + { + "epoch": 1.34, + "grad_norm": 7.42062824106697, + "learning_rate": 5.2161898846229464e-06, + "loss": 1.1102, + "step": 111273 + }, + { + "epoch": 1.34, + "grad_norm": 7.592908299896149, + "learning_rate": 5.215676895449634e-06, + "loss": 1.1604, + "step": 111276 + }, + { + "epoch": 1.34, + "grad_norm": 7.091593580292197, + "learning_rate": 5.215163922603493e-06, + "loss": 1.3283, + "step": 111279 + }, + { + "epoch": 1.34, + "grad_norm": 8.629508865948207, + "learning_rate": 5.214650966086278e-06, + "loss": 1.1781, + "step": 111282 + }, + { + "epoch": 1.34, + "grad_norm": 6.331931185242558, + "learning_rate": 5.214138025899738e-06, + "loss": 1.5505, + "step": 111285 + }, + { + "epoch": 1.34, + "grad_norm": 7.357812744648124, + "learning_rate": 5.213625102045614e-06, + "loss": 1.1807, + "step": 111288 + }, + { + "epoch": 1.34, + "grad_norm": 12.276341877764105, + "learning_rate": 5.2131121945256645e-06, + "loss": 0.7433, + "step": 111291 + }, + { + "epoch": 1.34, + "grad_norm": 16.017864403865225, + "learning_rate": 5.21259930334164e-06, + "loss": 1.3809, + "step": 111294 + }, + { + "epoch": 1.34, + "grad_norm": 13.05326913579657, + "learning_rate": 5.212086428495289e-06, + "loss": 1.1031, + "step": 111297 + }, + { + "epoch": 1.34, + "grad_norm": 17.558951934515953, + "learning_rate": 5.211573569988367e-06, + "loss": 1.4008, + "step": 111300 + }, + { + "epoch": 1.34, + "grad_norm": 5.326801389908519, + "learning_rate": 5.211060727822614e-06, + "loss": 1.0761, + "step": 111303 + }, + { + "epoch": 1.34, + "grad_norm": 5.25478192642357, + "learning_rate": 5.210547901999791e-06, + "loss": 1.1746, + "step": 111306 + }, + { + "epoch": 1.34, + "grad_norm": 4.769881086453732, + "learning_rate": 5.210035092521638e-06, + "loss": 1.048, + "step": 111309 + }, + { + "epoch": 1.34, + "grad_norm": 13.995888198744451, + "learning_rate": 5.209522299389914e-06, + "loss": 1.2903, + "step": 111312 + }, + { + "epoch": 1.34, + "grad_norm": 4.936325930673296, + "learning_rate": 5.2090095226063604e-06, + "loss": 1.3981, + "step": 111315 + }, + { + "epoch": 1.34, + "grad_norm": 67.30878456339084, + "learning_rate": 5.208496762172735e-06, + "loss": 0.8794, + "step": 111318 + }, + { + "epoch": 1.34, + "grad_norm": 7.05998999338426, + "learning_rate": 5.20798401809078e-06, + "loss": 1.3654, + "step": 111321 + }, + { + "epoch": 1.34, + "grad_norm": 5.251695717386912, + "learning_rate": 5.207471290362256e-06, + "loss": 1.1203, + "step": 111324 + }, + { + "epoch": 1.34, + "grad_norm": 9.519810607308713, + "learning_rate": 5.206958578988906e-06, + "loss": 1.1455, + "step": 111327 + }, + { + "epoch": 1.34, + "grad_norm": 13.959818733420093, + "learning_rate": 5.206445883972474e-06, + "loss": 1.1699, + "step": 111330 + }, + { + "epoch": 1.34, + "grad_norm": 6.4519882815709035, + "learning_rate": 5.205933205314717e-06, + "loss": 1.3878, + "step": 111333 + }, + { + "epoch": 1.34, + "grad_norm": 19.875915951238277, + "learning_rate": 5.2054205430173835e-06, + "loss": 1.2077, + "step": 111336 + }, + { + "epoch": 1.34, + "grad_norm": 11.116285499225699, + "learning_rate": 5.204907897082225e-06, + "loss": 1.3158, + "step": 111339 + }, + { + "epoch": 1.34, + "grad_norm": 8.019168896092333, + "learning_rate": 5.2043952675109866e-06, + "loss": 0.9101, + "step": 111342 + }, + { + "epoch": 1.34, + "grad_norm": 8.81007215170438, + "learning_rate": 5.203882654305419e-06, + "loss": 1.4673, + "step": 111345 + }, + { + "epoch": 1.34, + "grad_norm": 10.273952804089259, + "learning_rate": 5.203370057467276e-06, + "loss": 1.197, + "step": 111348 + }, + { + "epoch": 1.34, + "grad_norm": 7.570427967824072, + "learning_rate": 5.2028574769983e-06, + "loss": 1.3796, + "step": 111351 + }, + { + "epoch": 1.34, + "grad_norm": 6.962282534467474, + "learning_rate": 5.202344912900248e-06, + "loss": 1.4741, + "step": 111354 + }, + { + "epoch": 1.34, + "grad_norm": 5.343767325017926, + "learning_rate": 5.201832365174859e-06, + "loss": 1.0957, + "step": 111357 + }, + { + "epoch": 1.34, + "grad_norm": 47.55724390355884, + "learning_rate": 5.201319833823889e-06, + "loss": 1.3069, + "step": 111360 + }, + { + "epoch": 1.34, + "grad_norm": 7.890699577688918, + "learning_rate": 5.200807318849086e-06, + "loss": 0.9228, + "step": 111363 + }, + { + "epoch": 1.34, + "grad_norm": 4.123037916874809, + "learning_rate": 5.200294820252203e-06, + "loss": 1.1588, + "step": 111366 + }, + { + "epoch": 1.34, + "grad_norm": 5.415287166047078, + "learning_rate": 5.199782338034984e-06, + "loss": 1.4645, + "step": 111369 + }, + { + "epoch": 1.34, + "grad_norm": 5.619249501313167, + "learning_rate": 5.199269872199175e-06, + "loss": 1.357, + "step": 111372 + }, + { + "epoch": 1.34, + "grad_norm": 6.923858064737137, + "learning_rate": 5.19875742274653e-06, + "loss": 1.2787, + "step": 111375 + }, + { + "epoch": 1.34, + "grad_norm": 12.825002729602746, + "learning_rate": 5.198244989678795e-06, + "loss": 1.2001, + "step": 111378 + }, + { + "epoch": 1.34, + "grad_norm": 3.4660335140420795, + "learning_rate": 5.1977325729977246e-06, + "loss": 1.0317, + "step": 111381 + }, + { + "epoch": 1.34, + "grad_norm": 23.995072084453, + "learning_rate": 5.19722017270506e-06, + "loss": 1.2253, + "step": 111384 + }, + { + "epoch": 1.34, + "grad_norm": 10.770900469117628, + "learning_rate": 5.196707788802552e-06, + "loss": 0.9672, + "step": 111387 + }, + { + "epoch": 1.34, + "grad_norm": 6.084231088354313, + "learning_rate": 5.196195421291954e-06, + "loss": 1.1062, + "step": 111390 + }, + { + "epoch": 1.34, + "grad_norm": 6.941471803951774, + "learning_rate": 5.195683070175011e-06, + "loss": 1.5517, + "step": 111393 + }, + { + "epoch": 1.34, + "grad_norm": 12.40411358714706, + "learning_rate": 5.1951707354534685e-06, + "loss": 1.0211, + "step": 111396 + }, + { + "epoch": 1.34, + "grad_norm": 7.729966533216057, + "learning_rate": 5.194658417129077e-06, + "loss": 1.0748, + "step": 111399 + }, + { + "epoch": 1.34, + "grad_norm": 8.768706616600442, + "learning_rate": 5.194146115203584e-06, + "loss": 1.4222, + "step": 111402 + }, + { + "epoch": 1.34, + "grad_norm": 12.525772614580797, + "learning_rate": 5.193633829678741e-06, + "loss": 1.2155, + "step": 111405 + }, + { + "epoch": 1.34, + "grad_norm": 9.031541306157212, + "learning_rate": 5.193121560556298e-06, + "loss": 1.2369, + "step": 111408 + }, + { + "epoch": 1.34, + "grad_norm": 12.667172883028817, + "learning_rate": 5.192609307837999e-06, + "loss": 1.2382, + "step": 111411 + }, + { + "epoch": 1.34, + "grad_norm": 7.413484776231034, + "learning_rate": 5.192097071525589e-06, + "loss": 1.1629, + "step": 111414 + }, + { + "epoch": 1.34, + "grad_norm": 5.988246458351125, + "learning_rate": 5.1915848516208216e-06, + "loss": 1.1217, + "step": 111417 + }, + { + "epoch": 1.34, + "grad_norm": 6.250686563439196, + "learning_rate": 5.191072648125445e-06, + "loss": 1.1118, + "step": 111420 + }, + { + "epoch": 1.34, + "grad_norm": 13.238367697312233, + "learning_rate": 5.190560461041201e-06, + "loss": 1.308, + "step": 111423 + }, + { + "epoch": 1.34, + "grad_norm": 9.89826930905792, + "learning_rate": 5.190048290369843e-06, + "loss": 1.5112, + "step": 111426 + }, + { + "epoch": 1.34, + "grad_norm": 7.328478205014588, + "learning_rate": 5.189536136113118e-06, + "loss": 0.9138, + "step": 111429 + }, + { + "epoch": 1.34, + "grad_norm": 49.373892666790496, + "learning_rate": 5.1890239982727774e-06, + "loss": 1.2786, + "step": 111432 + }, + { + "epoch": 1.34, + "grad_norm": 9.680972402240306, + "learning_rate": 5.188511876850564e-06, + "loss": 1.1848, + "step": 111435 + }, + { + "epoch": 1.34, + "grad_norm": 16.446649998211115, + "learning_rate": 5.1879997718482224e-06, + "loss": 1.1576, + "step": 111438 + }, + { + "epoch": 1.34, + "grad_norm": 9.758582149113243, + "learning_rate": 5.187487683267505e-06, + "loss": 1.1569, + "step": 111441 + }, + { + "epoch": 1.34, + "grad_norm": 16.3532464539808, + "learning_rate": 5.1869756111101585e-06, + "loss": 1.1098, + "step": 111444 + }, + { + "epoch": 1.34, + "grad_norm": 5.063472314970553, + "learning_rate": 5.1864635553779345e-06, + "loss": 1.1415, + "step": 111447 + }, + { + "epoch": 1.34, + "grad_norm": 6.47383168496805, + "learning_rate": 5.185951516072574e-06, + "loss": 1.4161, + "step": 111450 + }, + { + "epoch": 1.34, + "grad_norm": 8.781108684131576, + "learning_rate": 5.185439493195825e-06, + "loss": 1.1065, + "step": 111453 + }, + { + "epoch": 1.34, + "grad_norm": 12.610023890101159, + "learning_rate": 5.1849274867494404e-06, + "loss": 1.2247, + "step": 111456 + }, + { + "epoch": 1.34, + "grad_norm": 17.83128502707829, + "learning_rate": 5.184415496735162e-06, + "loss": 1.0265, + "step": 111459 + }, + { + "epoch": 1.34, + "grad_norm": 13.763852040519783, + "learning_rate": 5.183903523154741e-06, + "loss": 1.3396, + "step": 111462 + }, + { + "epoch": 1.34, + "grad_norm": 8.639168058747277, + "learning_rate": 5.18339156600992e-06, + "loss": 0.9909, + "step": 111465 + }, + { + "epoch": 1.34, + "grad_norm": 52.45903906377756, + "learning_rate": 5.182879625302448e-06, + "loss": 1.2558, + "step": 111468 + }, + { + "epoch": 1.34, + "grad_norm": 15.801962777783253, + "learning_rate": 5.182367701034072e-06, + "loss": 1.2772, + "step": 111471 + }, + { + "epoch": 1.34, + "grad_norm": 25.2097985985671, + "learning_rate": 5.181855793206545e-06, + "loss": 1.4881, + "step": 111474 + }, + { + "epoch": 1.34, + "grad_norm": 4.246152731887955, + "learning_rate": 5.181343901821608e-06, + "loss": 1.2275, + "step": 111477 + }, + { + "epoch": 1.34, + "grad_norm": 6.669523252219599, + "learning_rate": 5.180832026881005e-06, + "loss": 1.1274, + "step": 111480 + }, + { + "epoch": 1.34, + "grad_norm": 15.486638593774089, + "learning_rate": 5.180320168386486e-06, + "loss": 0.8573, + "step": 111483 + }, + { + "epoch": 1.34, + "grad_norm": 3.7030962583019225, + "learning_rate": 5.179808326339799e-06, + "loss": 1.1473, + "step": 111486 + }, + { + "epoch": 1.34, + "grad_norm": 6.807581220037323, + "learning_rate": 5.179296500742693e-06, + "loss": 1.0759, + "step": 111489 + }, + { + "epoch": 1.34, + "grad_norm": 5.728372317415663, + "learning_rate": 5.178784691596908e-06, + "loss": 1.0368, + "step": 111492 + }, + { + "epoch": 1.34, + "grad_norm": 43.56534542600985, + "learning_rate": 5.178272898904196e-06, + "loss": 0.9845, + "step": 111495 + }, + { + "epoch": 1.34, + "grad_norm": 7.133804215332986, + "learning_rate": 5.177761122666302e-06, + "loss": 1.1934, + "step": 111498 + }, + { + "epoch": 1.34, + "grad_norm": 16.766278352004697, + "learning_rate": 5.177249362884975e-06, + "loss": 1.5478, + "step": 111501 + }, + { + "epoch": 1.34, + "grad_norm": 15.991053360743265, + "learning_rate": 5.176737619561955e-06, + "loss": 1.0832, + "step": 111504 + }, + { + "epoch": 1.34, + "grad_norm": 6.319533400297261, + "learning_rate": 5.176225892698992e-06, + "loss": 1.155, + "step": 111507 + }, + { + "epoch": 1.34, + "grad_norm": 8.081458274581378, + "learning_rate": 5.175714182297831e-06, + "loss": 1.1432, + "step": 111510 + }, + { + "epoch": 1.34, + "grad_norm": 12.169869661392413, + "learning_rate": 5.17520248836022e-06, + "loss": 1.4333, + "step": 111513 + }, + { + "epoch": 1.34, + "grad_norm": 8.929075221133585, + "learning_rate": 5.17469081088791e-06, + "loss": 1.1752, + "step": 111516 + }, + { + "epoch": 1.34, + "grad_norm": 5.939969077356356, + "learning_rate": 5.174179149882642e-06, + "loss": 0.9824, + "step": 111519 + }, + { + "epoch": 1.34, + "grad_norm": 9.197486934112082, + "learning_rate": 5.173667505346158e-06, + "loss": 1.1633, + "step": 111522 + }, + { + "epoch": 1.34, + "grad_norm": 6.722766217929888, + "learning_rate": 5.173155877280209e-06, + "loss": 1.3756, + "step": 111525 + }, + { + "epoch": 1.34, + "grad_norm": 15.65073449243331, + "learning_rate": 5.172644265686544e-06, + "loss": 1.0928, + "step": 111528 + }, + { + "epoch": 1.34, + "grad_norm": 3.278982041734184, + "learning_rate": 5.172132670566901e-06, + "loss": 1.1094, + "step": 111531 + }, + { + "epoch": 1.34, + "grad_norm": 11.697322191608135, + "learning_rate": 5.171621091923031e-06, + "loss": 1.7841, + "step": 111534 + }, + { + "epoch": 1.34, + "grad_norm": 13.812816172405094, + "learning_rate": 5.171109529756678e-06, + "loss": 0.8968, + "step": 111537 + }, + { + "epoch": 1.34, + "grad_norm": 16.29334899867876, + "learning_rate": 5.170597984069594e-06, + "loss": 1.1587, + "step": 111540 + }, + { + "epoch": 1.34, + "grad_norm": 5.679045314782657, + "learning_rate": 5.170086454863519e-06, + "loss": 0.7493, + "step": 111543 + }, + { + "epoch": 1.34, + "grad_norm": 10.314993531304431, + "learning_rate": 5.1695749421401945e-06, + "loss": 1.1252, + "step": 111546 + }, + { + "epoch": 1.34, + "grad_norm": 4.556829440433519, + "learning_rate": 5.169063445901372e-06, + "loss": 1.3584, + "step": 111549 + }, + { + "epoch": 1.34, + "grad_norm": 12.653528428343828, + "learning_rate": 5.1685519661487944e-06, + "loss": 0.8391, + "step": 111552 + }, + { + "epoch": 1.34, + "grad_norm": 6.627007361185729, + "learning_rate": 5.168040502884214e-06, + "loss": 0.874, + "step": 111555 + }, + { + "epoch": 1.34, + "grad_norm": 10.612926970961905, + "learning_rate": 5.167529056109366e-06, + "loss": 1.3343, + "step": 111558 + }, + { + "epoch": 1.34, + "grad_norm": 11.638902055730911, + "learning_rate": 5.167017625826005e-06, + "loss": 1.0775, + "step": 111561 + }, + { + "epoch": 1.34, + "grad_norm": 10.818763977225048, + "learning_rate": 5.166506212035869e-06, + "loss": 1.2338, + "step": 111564 + }, + { + "epoch": 1.34, + "grad_norm": 3.0813400983511032, + "learning_rate": 5.165994814740704e-06, + "loss": 1.194, + "step": 111567 + }, + { + "epoch": 1.34, + "grad_norm": 11.2314710650375, + "learning_rate": 5.165483433942264e-06, + "loss": 1.0326, + "step": 111570 + }, + { + "epoch": 1.34, + "grad_norm": 8.413968828475635, + "learning_rate": 5.164972069642282e-06, + "loss": 1.1029, + "step": 111573 + }, + { + "epoch": 1.34, + "grad_norm": 6.619214885787385, + "learning_rate": 5.16446072184251e-06, + "loss": 1.223, + "step": 111576 + }, + { + "epoch": 1.34, + "grad_norm": 13.561314592322006, + "learning_rate": 5.163949390544692e-06, + "loss": 0.7702, + "step": 111579 + }, + { + "epoch": 1.34, + "grad_norm": 11.2567425113402, + "learning_rate": 5.163438075750575e-06, + "loss": 1.0222, + "step": 111582 + }, + { + "epoch": 1.34, + "grad_norm": 4.826919546073282, + "learning_rate": 5.162926777461903e-06, + "loss": 1.2009, + "step": 111585 + }, + { + "epoch": 1.34, + "grad_norm": 11.052556655803508, + "learning_rate": 5.1624154956804155e-06, + "loss": 1.0438, + "step": 111588 + }, + { + "epoch": 1.34, + "grad_norm": 5.886492628244939, + "learning_rate": 5.161904230407861e-06, + "loss": 0.9154, + "step": 111591 + }, + { + "epoch": 1.34, + "grad_norm": 8.7479608311839, + "learning_rate": 5.1613929816459855e-06, + "loss": 1.2837, + "step": 111594 + }, + { + "epoch": 1.34, + "grad_norm": 12.818209636296588, + "learning_rate": 5.160881749396536e-06, + "loss": 1.2726, + "step": 111597 + }, + { + "epoch": 1.34, + "grad_norm": 10.369483504889985, + "learning_rate": 5.16037053366125e-06, + "loss": 1.1943, + "step": 111600 + }, + { + "epoch": 1.34, + "grad_norm": 3.3309728870464435, + "learning_rate": 5.159859334441879e-06, + "loss": 1.2443, + "step": 111603 + }, + { + "epoch": 1.34, + "grad_norm": 10.397203221234905, + "learning_rate": 5.1593481517401625e-06, + "loss": 1.5016, + "step": 111606 + }, + { + "epoch": 1.34, + "grad_norm": 19.347387894985783, + "learning_rate": 5.158836985557851e-06, + "loss": 1.0753, + "step": 111609 + }, + { + "epoch": 1.34, + "grad_norm": 10.104979541301017, + "learning_rate": 5.15832583589668e-06, + "loss": 1.1907, + "step": 111612 + }, + { + "epoch": 1.34, + "grad_norm": 11.103406027306445, + "learning_rate": 5.157814702758401e-06, + "loss": 1.0813, + "step": 111615 + }, + { + "epoch": 1.34, + "grad_norm": 13.016484720756695, + "learning_rate": 5.157303586144755e-06, + "loss": 0.8296, + "step": 111618 + }, + { + "epoch": 1.34, + "grad_norm": 10.316988161946195, + "learning_rate": 5.156792486057487e-06, + "loss": 1.348, + "step": 111621 + }, + { + "epoch": 1.34, + "grad_norm": 7.984030840972759, + "learning_rate": 5.156281402498347e-06, + "loss": 1.511, + "step": 111624 + }, + { + "epoch": 1.34, + "grad_norm": 8.303566292992612, + "learning_rate": 5.155770335469073e-06, + "loss": 0.9093, + "step": 111627 + }, + { + "epoch": 1.34, + "grad_norm": 13.494191360763947, + "learning_rate": 5.155259284971406e-06, + "loss": 0.9868, + "step": 111630 + }, + { + "epoch": 1.34, + "grad_norm": 4.550827058251288, + "learning_rate": 5.1547482510070945e-06, + "loss": 0.9658, + "step": 111633 + }, + { + "epoch": 1.34, + "grad_norm": 8.999055379999579, + "learning_rate": 5.154237233577885e-06, + "loss": 1.2497, + "step": 111636 + }, + { + "epoch": 1.34, + "grad_norm": 12.995610340145157, + "learning_rate": 5.153726232685516e-06, + "loss": 0.9986, + "step": 111639 + }, + { + "epoch": 1.34, + "grad_norm": 3.354569114581503, + "learning_rate": 5.153215248331733e-06, + "loss": 1.4677, + "step": 111642 + }, + { + "epoch": 1.34, + "grad_norm": 9.97647097698344, + "learning_rate": 5.1527042805182835e-06, + "loss": 1.0817, + "step": 111645 + }, + { + "epoch": 1.34, + "grad_norm": 6.656349501195412, + "learning_rate": 5.152193329246906e-06, + "loss": 1.0365, + "step": 111648 + }, + { + "epoch": 1.34, + "grad_norm": 6.65145066610606, + "learning_rate": 5.1516823945193485e-06, + "loss": 1.2232, + "step": 111651 + }, + { + "epoch": 1.34, + "grad_norm": 4.372694639341284, + "learning_rate": 5.151171476337351e-06, + "loss": 1.0544, + "step": 111654 + }, + { + "epoch": 1.34, + "grad_norm": 21.633658014243544, + "learning_rate": 5.150660574702657e-06, + "loss": 1.3862, + "step": 111657 + }, + { + "epoch": 1.34, + "grad_norm": 5.509195457048118, + "learning_rate": 5.150149689617012e-06, + "loss": 1.2053, + "step": 111660 + }, + { + "epoch": 1.34, + "grad_norm": 11.699839697840437, + "learning_rate": 5.149638821082164e-06, + "loss": 0.9488, + "step": 111663 + }, + { + "epoch": 1.34, + "grad_norm": 4.7023065435036004, + "learning_rate": 5.149127969099846e-06, + "loss": 1.4359, + "step": 111666 + }, + { + "epoch": 1.34, + "grad_norm": 6.402213763742527, + "learning_rate": 5.148617133671813e-06, + "loss": 1.38, + "step": 111669 + }, + { + "epoch": 1.34, + "grad_norm": 10.541791802945735, + "learning_rate": 5.148106314799797e-06, + "loss": 1.0253, + "step": 111672 + }, + { + "epoch": 1.34, + "grad_norm": 8.121589272943574, + "learning_rate": 5.147595512485546e-06, + "loss": 0.829, + "step": 111675 + }, + { + "epoch": 1.34, + "grad_norm": 18.220434946598584, + "learning_rate": 5.147084726730809e-06, + "loss": 1.3492, + "step": 111678 + }, + { + "epoch": 1.34, + "grad_norm": 10.98534159458994, + "learning_rate": 5.14657395753732e-06, + "loss": 1.2334, + "step": 111681 + }, + { + "epoch": 1.34, + "grad_norm": 28.714202460074624, + "learning_rate": 5.146063204906824e-06, + "loss": 1.3011, + "step": 111684 + }, + { + "epoch": 1.34, + "grad_norm": 10.603682630640188, + "learning_rate": 5.145552468841071e-06, + "loss": 0.8924, + "step": 111687 + }, + { + "epoch": 1.34, + "grad_norm": 3.5985449370941276, + "learning_rate": 5.145041749341799e-06, + "loss": 1.2641, + "step": 111690 + }, + { + "epoch": 1.34, + "grad_norm": 11.170710001757307, + "learning_rate": 5.144531046410746e-06, + "loss": 0.9822, + "step": 111693 + }, + { + "epoch": 1.34, + "grad_norm": 2.668686525192959, + "learning_rate": 5.144020360049662e-06, + "loss": 1.3056, + "step": 111696 + }, + { + "epoch": 1.34, + "grad_norm": 6.129830730685256, + "learning_rate": 5.143509690260284e-06, + "loss": 0.9916, + "step": 111699 + }, + { + "epoch": 1.34, + "grad_norm": 46.50290530620643, + "learning_rate": 5.14299903704436e-06, + "loss": 1.3982, + "step": 111702 + }, + { + "epoch": 1.34, + "grad_norm": 7.954948609311717, + "learning_rate": 5.1424884004036355e-06, + "loss": 1.0771, + "step": 111705 + }, + { + "epoch": 1.34, + "grad_norm": 26.84375893378858, + "learning_rate": 5.141977780339844e-06, + "loss": 1.2433, + "step": 111708 + }, + { + "epoch": 1.34, + "grad_norm": 39.584241187024055, + "learning_rate": 5.141467176854735e-06, + "loss": 1.5741, + "step": 111711 + }, + { + "epoch": 1.34, + "grad_norm": 45.464735781688894, + "learning_rate": 5.140956589950046e-06, + "loss": 1.0934, + "step": 111714 + }, + { + "epoch": 1.34, + "grad_norm": 7.833989825566451, + "learning_rate": 5.140446019627525e-06, + "loss": 0.9597, + "step": 111717 + }, + { + "epoch": 1.34, + "grad_norm": 16.438205348098716, + "learning_rate": 5.139935465888908e-06, + "loss": 1.1259, + "step": 111720 + }, + { + "epoch": 1.34, + "grad_norm": 3.9984905285069683, + "learning_rate": 5.139424928735941e-06, + "loss": 1.1472, + "step": 111723 + }, + { + "epoch": 1.34, + "grad_norm": 6.752549477359261, + "learning_rate": 5.138914408170366e-06, + "loss": 1.3198, + "step": 111726 + }, + { + "epoch": 1.34, + "grad_norm": 31.269733627090872, + "learning_rate": 5.138403904193927e-06, + "loss": 1.354, + "step": 111729 + }, + { + "epoch": 1.34, + "grad_norm": 15.83160449099017, + "learning_rate": 5.137893416808363e-06, + "loss": 1.2202, + "step": 111732 + }, + { + "epoch": 1.34, + "grad_norm": 12.086142384487458, + "learning_rate": 5.137382946015419e-06, + "loss": 1.2494, + "step": 111735 + }, + { + "epoch": 1.34, + "grad_norm": 13.123514103452516, + "learning_rate": 5.136872491816833e-06, + "loss": 1.1126, + "step": 111738 + }, + { + "epoch": 1.34, + "grad_norm": 6.910565850122072, + "learning_rate": 5.13636205421435e-06, + "loss": 0.885, + "step": 111741 + }, + { + "epoch": 1.34, + "grad_norm": 3.1844668829463587, + "learning_rate": 5.135851633209714e-06, + "loss": 1.1325, + "step": 111744 + }, + { + "epoch": 1.34, + "grad_norm": 4.072316076644681, + "learning_rate": 5.1353412288046625e-06, + "loss": 1.1624, + "step": 111747 + }, + { + "epoch": 1.34, + "grad_norm": 13.886444289668187, + "learning_rate": 5.1348308410009376e-06, + "loss": 1.3407, + "step": 111750 + }, + { + "epoch": 1.34, + "grad_norm": 8.103119470726128, + "learning_rate": 5.134320469800287e-06, + "loss": 0.9844, + "step": 111753 + }, + { + "epoch": 1.34, + "grad_norm": 11.842080924983238, + "learning_rate": 5.133810115204443e-06, + "loss": 1.0723, + "step": 111756 + }, + { + "epoch": 1.34, + "grad_norm": 5.996201078620794, + "learning_rate": 5.133299777215157e-06, + "loss": 0.9049, + "step": 111759 + }, + { + "epoch": 1.34, + "grad_norm": 4.39662403516218, + "learning_rate": 5.132789455834162e-06, + "loss": 0.9125, + "step": 111762 + }, + { + "epoch": 1.34, + "grad_norm": 3.7754885113397045, + "learning_rate": 5.132279151063203e-06, + "loss": 0.9677, + "step": 111765 + }, + { + "epoch": 1.34, + "grad_norm": 11.171492913776225, + "learning_rate": 5.131768862904023e-06, + "loss": 1.2512, + "step": 111768 + }, + { + "epoch": 1.34, + "grad_norm": 4.391461296321922, + "learning_rate": 5.131258591358366e-06, + "loss": 1.1651, + "step": 111771 + }, + { + "epoch": 1.34, + "grad_norm": 10.107762191338958, + "learning_rate": 5.130748336427964e-06, + "loss": 1.1347, + "step": 111774 + }, + { + "epoch": 1.34, + "grad_norm": 7.206778257067236, + "learning_rate": 5.130238098114571e-06, + "loss": 1.1744, + "step": 111777 + }, + { + "epoch": 1.34, + "grad_norm": 33.28349719836782, + "learning_rate": 5.129727876419914e-06, + "loss": 0.84, + "step": 111780 + }, + { + "epoch": 1.34, + "grad_norm": 11.293434269193783, + "learning_rate": 5.129217671345743e-06, + "loss": 1.3001, + "step": 111783 + }, + { + "epoch": 1.34, + "grad_norm": 31.846468615424904, + "learning_rate": 5.128707482893802e-06, + "loss": 1.2742, + "step": 111786 + }, + { + "epoch": 1.34, + "grad_norm": 6.7823942449473025, + "learning_rate": 5.128197311065823e-06, + "loss": 1.2623, + "step": 111789 + }, + { + "epoch": 1.34, + "grad_norm": 14.393912717862397, + "learning_rate": 5.127687155863554e-06, + "loss": 1.194, + "step": 111792 + }, + { + "epoch": 1.34, + "grad_norm": 6.707666454565219, + "learning_rate": 5.1271770172887355e-06, + "loss": 0.997, + "step": 111795 + }, + { + "epoch": 1.34, + "grad_norm": 19.75155469671497, + "learning_rate": 5.126666895343106e-06, + "loss": 1.155, + "step": 111798 + }, + { + "epoch": 1.34, + "grad_norm": 2.869364120016213, + "learning_rate": 5.126156790028405e-06, + "loss": 1.2195, + "step": 111801 + }, + { + "epoch": 1.34, + "grad_norm": 3.546654788891486, + "learning_rate": 5.125646701346374e-06, + "loss": 0.8907, + "step": 111804 + }, + { + "epoch": 1.34, + "grad_norm": 14.41842729392897, + "learning_rate": 5.125136629298757e-06, + "loss": 1.5788, + "step": 111807 + }, + { + "epoch": 1.34, + "grad_norm": 16.671296367446782, + "learning_rate": 5.1246265738872905e-06, + "loss": 1.5536, + "step": 111810 + }, + { + "epoch": 1.34, + "grad_norm": 5.687482195676109, + "learning_rate": 5.124116535113722e-06, + "loss": 1.5188, + "step": 111813 + }, + { + "epoch": 1.34, + "grad_norm": 9.673384726952294, + "learning_rate": 5.1236065129797845e-06, + "loss": 1.0198, + "step": 111816 + }, + { + "epoch": 1.34, + "grad_norm": 3.5787409569174797, + "learning_rate": 5.123096507487224e-06, + "loss": 1.2123, + "step": 111819 + }, + { + "epoch": 1.34, + "grad_norm": 5.1338653498515, + "learning_rate": 5.1225865186377755e-06, + "loss": 1.0342, + "step": 111822 + }, + { + "epoch": 1.34, + "grad_norm": 7.49490118981287, + "learning_rate": 5.122076546433185e-06, + "loss": 1.1045, + "step": 111825 + }, + { + "epoch": 1.34, + "grad_norm": 21.218675346097207, + "learning_rate": 5.121566590875189e-06, + "loss": 1.4477, + "step": 111828 + }, + { + "epoch": 1.34, + "grad_norm": 15.727001136004395, + "learning_rate": 5.121056651965527e-06, + "loss": 1.2602, + "step": 111831 + }, + { + "epoch": 1.34, + "grad_norm": 5.169430387600509, + "learning_rate": 5.120546729705942e-06, + "loss": 1.531, + "step": 111834 + }, + { + "epoch": 1.34, + "grad_norm": 40.402486465690366, + "learning_rate": 5.120036824098176e-06, + "loss": 1.5639, + "step": 111837 + }, + { + "epoch": 1.34, + "grad_norm": 6.30701734058455, + "learning_rate": 5.119526935143968e-06, + "loss": 0.9966, + "step": 111840 + }, + { + "epoch": 1.34, + "grad_norm": 18.632578487388226, + "learning_rate": 5.119017062845053e-06, + "loss": 1.3576, + "step": 111843 + }, + { + "epoch": 1.34, + "grad_norm": 12.361411997758964, + "learning_rate": 5.118507207203174e-06, + "loss": 1.1446, + "step": 111846 + }, + { + "epoch": 1.34, + "grad_norm": 8.964073368925982, + "learning_rate": 5.117997368220072e-06, + "loss": 1.4628, + "step": 111849 + }, + { + "epoch": 1.34, + "grad_norm": 11.734394423823339, + "learning_rate": 5.117487545897488e-06, + "loss": 0.9275, + "step": 111852 + }, + { + "epoch": 1.35, + "grad_norm": 10.805959748947599, + "learning_rate": 5.116977740237159e-06, + "loss": 1.2019, + "step": 111855 + }, + { + "epoch": 1.35, + "grad_norm": 9.044704490727728, + "learning_rate": 5.116467951240825e-06, + "loss": 1.3789, + "step": 111858 + }, + { + "epoch": 1.35, + "grad_norm": 19.63171236944227, + "learning_rate": 5.115958178910232e-06, + "loss": 1.2187, + "step": 111861 + }, + { + "epoch": 1.35, + "grad_norm": 12.020368107630544, + "learning_rate": 5.115448423247109e-06, + "loss": 1.3146, + "step": 111864 + }, + { + "epoch": 1.35, + "grad_norm": 5.778622029928666, + "learning_rate": 5.114938684253205e-06, + "loss": 1.1134, + "step": 111867 + }, + { + "epoch": 1.35, + "grad_norm": 43.13520902005329, + "learning_rate": 5.114428961930253e-06, + "loss": 1.1661, + "step": 111870 + }, + { + "epoch": 1.35, + "grad_norm": 35.94770241548059, + "learning_rate": 5.113919256279996e-06, + "loss": 1.0714, + "step": 111873 + }, + { + "epoch": 1.35, + "grad_norm": 4.409538001123893, + "learning_rate": 5.113409567304171e-06, + "loss": 0.8373, + "step": 111876 + }, + { + "epoch": 1.35, + "grad_norm": 7.708730471827563, + "learning_rate": 5.112899895004524e-06, + "loss": 0.8944, + "step": 111879 + }, + { + "epoch": 1.35, + "grad_norm": 16.070000850343952, + "learning_rate": 5.112390239382787e-06, + "loss": 1.2821, + "step": 111882 + }, + { + "epoch": 1.35, + "grad_norm": 14.186107926603142, + "learning_rate": 5.1118806004406994e-06, + "loss": 1.1718, + "step": 111885 + }, + { + "epoch": 1.35, + "grad_norm": 5.702398562479662, + "learning_rate": 5.111370978180003e-06, + "loss": 1.3487, + "step": 111888 + }, + { + "epoch": 1.35, + "grad_norm": 4.9133277517890255, + "learning_rate": 5.110861372602436e-06, + "loss": 1.3218, + "step": 111891 + }, + { + "epoch": 1.35, + "grad_norm": 6.2449761726568145, + "learning_rate": 5.110351783709743e-06, + "loss": 1.22, + "step": 111894 + }, + { + "epoch": 1.35, + "grad_norm": 11.89658137336269, + "learning_rate": 5.109842211503653e-06, + "loss": 1.1705, + "step": 111897 + }, + { + "epoch": 1.35, + "grad_norm": 12.719943470130367, + "learning_rate": 5.10933265598591e-06, + "loss": 1.3384, + "step": 111900 + }, + { + "epoch": 1.35, + "grad_norm": 8.973068552994224, + "learning_rate": 5.108823117158258e-06, + "loss": 1.0112, + "step": 111903 + }, + { + "epoch": 1.35, + "grad_norm": 14.328123240441593, + "learning_rate": 5.10831359502243e-06, + "loss": 1.4422, + "step": 111906 + }, + { + "epoch": 1.35, + "grad_norm": 3.860646301797918, + "learning_rate": 5.107804089580162e-06, + "loss": 0.903, + "step": 111909 + }, + { + "epoch": 1.35, + "grad_norm": 16.063158485192062, + "learning_rate": 5.107294600833197e-06, + "loss": 1.2099, + "step": 111912 + }, + { + "epoch": 1.35, + "grad_norm": 53.529071037402964, + "learning_rate": 5.106785128783273e-06, + "loss": 1.0366, + "step": 111915 + }, + { + "epoch": 1.35, + "grad_norm": 14.304241261832264, + "learning_rate": 5.106275673432128e-06, + "loss": 1.3695, + "step": 111918 + }, + { + "epoch": 1.35, + "grad_norm": 10.390505749238498, + "learning_rate": 5.105766234781506e-06, + "loss": 1.2612, + "step": 111921 + }, + { + "epoch": 1.35, + "grad_norm": 6.441552069451008, + "learning_rate": 5.105256812833141e-06, + "loss": 0.9826, + "step": 111924 + }, + { + "epoch": 1.35, + "grad_norm": 4.672234977467462, + "learning_rate": 5.1047474075887685e-06, + "loss": 1.0982, + "step": 111927 + }, + { + "epoch": 1.35, + "grad_norm": 20.171220260686056, + "learning_rate": 5.1042380190501276e-06, + "loss": 1.1848, + "step": 111930 + }, + { + "epoch": 1.35, + "grad_norm": 16.3180357655147, + "learning_rate": 5.103728647218964e-06, + "loss": 1.0967, + "step": 111933 + }, + { + "epoch": 1.35, + "grad_norm": 12.266762853134578, + "learning_rate": 5.103219292097007e-06, + "loss": 1.312, + "step": 111936 + }, + { + "epoch": 1.35, + "grad_norm": 3.1360072220562003, + "learning_rate": 5.102709953685998e-06, + "loss": 1.3164, + "step": 111939 + }, + { + "epoch": 1.35, + "grad_norm": 22.868471690212168, + "learning_rate": 5.102200631987677e-06, + "loss": 1.451, + "step": 111942 + }, + { + "epoch": 1.35, + "grad_norm": 3.1950748822738038, + "learning_rate": 5.101691327003785e-06, + "loss": 1.3022, + "step": 111945 + }, + { + "epoch": 1.35, + "grad_norm": 17.37625880915713, + "learning_rate": 5.101182038736055e-06, + "loss": 1.2297, + "step": 111948 + }, + { + "epoch": 1.35, + "grad_norm": 2.836160749977239, + "learning_rate": 5.100672767186223e-06, + "loss": 1.2866, + "step": 111951 + }, + { + "epoch": 1.35, + "grad_norm": 3.63603302936534, + "learning_rate": 5.100163512356031e-06, + "loss": 1.2803, + "step": 111954 + }, + { + "epoch": 1.35, + "grad_norm": 13.288725443960939, + "learning_rate": 5.099654274247215e-06, + "loss": 1.0546, + "step": 111957 + }, + { + "epoch": 1.35, + "grad_norm": 6.09545219428182, + "learning_rate": 5.099145052861517e-06, + "loss": 1.2285, + "step": 111960 + }, + { + "epoch": 1.35, + "grad_norm": 4.703774236743106, + "learning_rate": 5.098635848200668e-06, + "loss": 0.9888, + "step": 111963 + }, + { + "epoch": 1.35, + "grad_norm": 16.564960378673153, + "learning_rate": 5.098126660266415e-06, + "loss": 1.4426, + "step": 111966 + }, + { + "epoch": 1.35, + "grad_norm": 13.781272401647424, + "learning_rate": 5.0976174890604844e-06, + "loss": 1.0762, + "step": 111969 + }, + { + "epoch": 1.35, + "grad_norm": 7.046626456014808, + "learning_rate": 5.09710833458462e-06, + "loss": 1.2959, + "step": 111972 + }, + { + "epoch": 1.35, + "grad_norm": 6.620220315045784, + "learning_rate": 5.096599196840564e-06, + "loss": 1.0581, + "step": 111975 + }, + { + "epoch": 1.35, + "grad_norm": 30.437070133305497, + "learning_rate": 5.096090075830042e-06, + "loss": 1.1663, + "step": 111978 + }, + { + "epoch": 1.35, + "grad_norm": 14.272758432294646, + "learning_rate": 5.095580971554801e-06, + "loss": 1.202, + "step": 111981 + }, + { + "epoch": 1.35, + "grad_norm": 5.31033942598348, + "learning_rate": 5.0950718840165745e-06, + "loss": 1.3075, + "step": 111984 + }, + { + "epoch": 1.35, + "grad_norm": 20.135801655074822, + "learning_rate": 5.0945628132171055e-06, + "loss": 1.2877, + "step": 111987 + }, + { + "epoch": 1.35, + "grad_norm": 28.385754632552587, + "learning_rate": 5.0940537591581255e-06, + "loss": 1.5203, + "step": 111990 + }, + { + "epoch": 1.35, + "grad_norm": 22.32704437575799, + "learning_rate": 5.093544721841369e-06, + "loss": 1.0686, + "step": 111993 + }, + { + "epoch": 1.35, + "grad_norm": 9.118969179856359, + "learning_rate": 5.093035701268578e-06, + "loss": 1.8566, + "step": 111996 + }, + { + "epoch": 1.35, + "grad_norm": 4.007309087079911, + "learning_rate": 5.092526697441488e-06, + "loss": 1.2535, + "step": 111999 + }, + { + "epoch": 1.35, + "grad_norm": 8.388050850353338, + "learning_rate": 5.09201771036184e-06, + "loss": 1.4544, + "step": 112002 + }, + { + "epoch": 1.35, + "grad_norm": 12.928914714462477, + "learning_rate": 5.091508740031366e-06, + "loss": 1.0621, + "step": 112005 + }, + { + "epoch": 1.35, + "grad_norm": 15.929287802606074, + "learning_rate": 5.0909997864518064e-06, + "loss": 1.5127, + "step": 112008 + }, + { + "epoch": 1.35, + "grad_norm": 6.93470621596991, + "learning_rate": 5.090490849624894e-06, + "loss": 0.9736, + "step": 112011 + }, + { + "epoch": 1.35, + "grad_norm": 5.6166667032244995, + "learning_rate": 5.08998192955237e-06, + "loss": 1.2855, + "step": 112014 + }, + { + "epoch": 1.35, + "grad_norm": 14.516355338048584, + "learning_rate": 5.089473026235967e-06, + "loss": 1.5339, + "step": 112017 + }, + { + "epoch": 1.35, + "grad_norm": 5.5932733962783034, + "learning_rate": 5.0889641396774234e-06, + "loss": 1.2727, + "step": 112020 + }, + { + "epoch": 1.35, + "grad_norm": 11.397759707203912, + "learning_rate": 5.088455269878476e-06, + "loss": 1.144, + "step": 112023 + }, + { + "epoch": 1.35, + "grad_norm": 7.066614056094593, + "learning_rate": 5.087946416840861e-06, + "loss": 1.2386, + "step": 112026 + }, + { + "epoch": 1.35, + "grad_norm": 36.717665821194934, + "learning_rate": 5.087437580566319e-06, + "loss": 1.2226, + "step": 112029 + }, + { + "epoch": 1.35, + "grad_norm": 7.462327779680963, + "learning_rate": 5.086928761056584e-06, + "loss": 1.1434, + "step": 112032 + }, + { + "epoch": 1.35, + "grad_norm": 8.542582974952118, + "learning_rate": 5.0864199583133885e-06, + "loss": 1.28, + "step": 112035 + }, + { + "epoch": 1.35, + "grad_norm": 24.092334762868134, + "learning_rate": 5.085911172338471e-06, + "loss": 1.1424, + "step": 112038 + }, + { + "epoch": 1.35, + "grad_norm": 12.352577963363764, + "learning_rate": 5.085402403133573e-06, + "loss": 1.3819, + "step": 112041 + }, + { + "epoch": 1.35, + "grad_norm": 6.14706128487662, + "learning_rate": 5.084893650700421e-06, + "loss": 1.2071, + "step": 112044 + }, + { + "epoch": 1.35, + "grad_norm": 9.368670884056883, + "learning_rate": 5.084384915040758e-06, + "loss": 1.6529, + "step": 112047 + }, + { + "epoch": 1.35, + "grad_norm": 5.965027980380938, + "learning_rate": 5.083876196156318e-06, + "loss": 1.2107, + "step": 112050 + }, + { + "epoch": 1.35, + "grad_norm": 7.194902178973437, + "learning_rate": 5.083367494048841e-06, + "loss": 0.9463, + "step": 112053 + }, + { + "epoch": 1.35, + "grad_norm": 9.486730529602442, + "learning_rate": 5.082858808720062e-06, + "loss": 1.4499, + "step": 112056 + }, + { + "epoch": 1.35, + "grad_norm": 7.186433070703786, + "learning_rate": 5.082350140171708e-06, + "loss": 1.0523, + "step": 112059 + }, + { + "epoch": 1.35, + "grad_norm": 15.382096302645321, + "learning_rate": 5.081841488405524e-06, + "loss": 0.9034, + "step": 112062 + }, + { + "epoch": 1.35, + "grad_norm": 21.17674255886422, + "learning_rate": 5.081332853423242e-06, + "loss": 1.0205, + "step": 112065 + }, + { + "epoch": 1.35, + "grad_norm": 19.678579747345932, + "learning_rate": 5.080824235226604e-06, + "loss": 1.3737, + "step": 112068 + }, + { + "epoch": 1.35, + "grad_norm": 8.102517242988522, + "learning_rate": 5.0803156338173365e-06, + "loss": 1.208, + "step": 112071 + }, + { + "epoch": 1.35, + "grad_norm": 7.827393091215274, + "learning_rate": 5.079807049197184e-06, + "loss": 1.1478, + "step": 112074 + }, + { + "epoch": 1.35, + "grad_norm": 7.110712475944115, + "learning_rate": 5.079298481367874e-06, + "loss": 1.1878, + "step": 112077 + }, + { + "epoch": 1.35, + "grad_norm": 4.827046518684274, + "learning_rate": 5.078789930331145e-06, + "loss": 1.2584, + "step": 112080 + }, + { + "epoch": 1.35, + "grad_norm": 11.544163427853116, + "learning_rate": 5.078281396088738e-06, + "loss": 1.22, + "step": 112083 + }, + { + "epoch": 1.35, + "grad_norm": 7.229448407021339, + "learning_rate": 5.07777287864238e-06, + "loss": 0.8957, + "step": 112086 + }, + { + "epoch": 1.35, + "grad_norm": 7.24095364203373, + "learning_rate": 5.0772643779938105e-06, + "loss": 1.0201, + "step": 112089 + }, + { + "epoch": 1.35, + "grad_norm": 51.35566870385054, + "learning_rate": 5.0767558941447636e-06, + "loss": 1.3001, + "step": 112092 + }, + { + "epoch": 1.35, + "grad_norm": 6.2900011233643545, + "learning_rate": 5.076247427096979e-06, + "loss": 1.4396, + "step": 112095 + }, + { + "epoch": 1.35, + "grad_norm": 9.121182617594997, + "learning_rate": 5.07573897685219e-06, + "loss": 1.044, + "step": 112098 + }, + { + "epoch": 1.35, + "grad_norm": 5.935221547973339, + "learning_rate": 5.075230543412126e-06, + "loss": 1.2046, + "step": 112101 + }, + { + "epoch": 1.35, + "grad_norm": 9.165076391285146, + "learning_rate": 5.074722126778525e-06, + "loss": 1.0886, + "step": 112104 + }, + { + "epoch": 1.35, + "grad_norm": 7.889345359761385, + "learning_rate": 5.074213726953124e-06, + "loss": 1.1829, + "step": 112107 + }, + { + "epoch": 1.35, + "grad_norm": 9.737993848410428, + "learning_rate": 5.073705343937662e-06, + "loss": 1.385, + "step": 112110 + }, + { + "epoch": 1.35, + "grad_norm": 11.983476470973372, + "learning_rate": 5.073196977733864e-06, + "loss": 0.7863, + "step": 112113 + }, + { + "epoch": 1.35, + "grad_norm": 6.659530151088243, + "learning_rate": 5.072688628343474e-06, + "loss": 0.977, + "step": 112116 + }, + { + "epoch": 1.35, + "grad_norm": 4.781769342494349, + "learning_rate": 5.072180295768221e-06, + "loss": 1.4404, + "step": 112119 + }, + { + "epoch": 1.35, + "grad_norm": 12.423138665154895, + "learning_rate": 5.071671980009844e-06, + "loss": 1.1698, + "step": 112122 + }, + { + "epoch": 1.35, + "grad_norm": 5.2266064148376055, + "learning_rate": 5.0711636810700705e-06, + "loss": 1.2928, + "step": 112125 + }, + { + "epoch": 1.35, + "grad_norm": 4.45848093292018, + "learning_rate": 5.070655398950641e-06, + "loss": 1.2338, + "step": 112128 + }, + { + "epoch": 1.35, + "grad_norm": 6.869677774472026, + "learning_rate": 5.070147133653289e-06, + "loss": 1.088, + "step": 112131 + }, + { + "epoch": 1.35, + "grad_norm": 10.648218610144855, + "learning_rate": 5.06963888517975e-06, + "loss": 1.0998, + "step": 112134 + }, + { + "epoch": 1.35, + "grad_norm": 8.384342278892035, + "learning_rate": 5.06913065353176e-06, + "loss": 0.9778, + "step": 112137 + }, + { + "epoch": 1.35, + "grad_norm": 16.088648284894504, + "learning_rate": 5.068622438711053e-06, + "loss": 1.088, + "step": 112140 + }, + { + "epoch": 1.35, + "grad_norm": 8.312421364794815, + "learning_rate": 5.068114240719355e-06, + "loss": 1.279, + "step": 112143 + }, + { + "epoch": 1.35, + "grad_norm": 10.129509112224099, + "learning_rate": 5.067606059558408e-06, + "loss": 1.0663, + "step": 112146 + }, + { + "epoch": 1.35, + "grad_norm": 3.0088675000016103, + "learning_rate": 5.067097895229949e-06, + "loss": 1.1074, + "step": 112149 + }, + { + "epoch": 1.35, + "grad_norm": 11.735280251157956, + "learning_rate": 5.066589747735704e-06, + "loss": 0.9407, + "step": 112152 + }, + { + "epoch": 1.35, + "grad_norm": 10.3390591329716, + "learning_rate": 5.06608161707741e-06, + "loss": 1.3328, + "step": 112155 + }, + { + "epoch": 1.35, + "grad_norm": 5.589735500223601, + "learning_rate": 5.065573503256806e-06, + "loss": 0.9304, + "step": 112158 + }, + { + "epoch": 1.35, + "grad_norm": 15.431480662783184, + "learning_rate": 5.065065406275621e-06, + "loss": 1.0337, + "step": 112161 + }, + { + "epoch": 1.35, + "grad_norm": 6.034801950867164, + "learning_rate": 5.064557326135593e-06, + "loss": 1.1593, + "step": 112164 + }, + { + "epoch": 1.35, + "grad_norm": 14.843125209388162, + "learning_rate": 5.064049262838449e-06, + "loss": 1.0613, + "step": 112167 + }, + { + "epoch": 1.35, + "grad_norm": 7.087691717704196, + "learning_rate": 5.0635412163859255e-06, + "loss": 0.8913, + "step": 112170 + }, + { + "epoch": 1.35, + "grad_norm": 5.016611111571574, + "learning_rate": 5.06303318677976e-06, + "loss": 1.0144, + "step": 112173 + }, + { + "epoch": 1.35, + "grad_norm": 13.946330204371803, + "learning_rate": 5.062525174021686e-06, + "loss": 0.9311, + "step": 112176 + }, + { + "epoch": 1.35, + "grad_norm": 9.184828605394006, + "learning_rate": 5.062017178113432e-06, + "loss": 1.1088, + "step": 112179 + }, + { + "epoch": 1.35, + "grad_norm": 5.6278571631123775, + "learning_rate": 5.061509199056737e-06, + "loss": 1.6622, + "step": 112182 + }, + { + "epoch": 1.35, + "grad_norm": 4.8620224820635665, + "learning_rate": 5.061001236853332e-06, + "loss": 1.0558, + "step": 112185 + }, + { + "epoch": 1.35, + "grad_norm": 6.001429040317967, + "learning_rate": 5.060493291504949e-06, + "loss": 1.392, + "step": 112188 + }, + { + "epoch": 1.35, + "grad_norm": 12.511986332248878, + "learning_rate": 5.059985363013327e-06, + "loss": 1.2904, + "step": 112191 + }, + { + "epoch": 1.35, + "grad_norm": 26.578877868724316, + "learning_rate": 5.05947745138019e-06, + "loss": 1.0693, + "step": 112194 + }, + { + "epoch": 1.35, + "grad_norm": 4.315371220115177, + "learning_rate": 5.058969556607278e-06, + "loss": 1.1524, + "step": 112197 + }, + { + "epoch": 1.35, + "grad_norm": 30.85621360632167, + "learning_rate": 5.058461678696328e-06, + "loss": 1.6389, + "step": 112200 + }, + { + "epoch": 1.35, + "grad_norm": 8.881363338780462, + "learning_rate": 5.057953817649068e-06, + "loss": 0.9533, + "step": 112203 + }, + { + "epoch": 1.35, + "grad_norm": 6.637940956550692, + "learning_rate": 5.0574459734672275e-06, + "loss": 0.9774, + "step": 112206 + }, + { + "epoch": 1.35, + "grad_norm": 45.20756148048451, + "learning_rate": 5.056938146152544e-06, + "loss": 1.0727, + "step": 112209 + }, + { + "epoch": 1.35, + "grad_norm": 9.07645276862752, + "learning_rate": 5.056430335706749e-06, + "loss": 1.3845, + "step": 112212 + }, + { + "epoch": 1.35, + "grad_norm": 6.029041022518467, + "learning_rate": 5.055922542131577e-06, + "loss": 1.4733, + "step": 112215 + }, + { + "epoch": 1.35, + "grad_norm": 9.126878550432714, + "learning_rate": 5.0554147654287655e-06, + "loss": 1.2012, + "step": 112218 + }, + { + "epoch": 1.35, + "grad_norm": 6.865369781589052, + "learning_rate": 5.054907005600037e-06, + "loss": 1.3073, + "step": 112221 + }, + { + "epoch": 1.35, + "grad_norm": 5.372721122858113, + "learning_rate": 5.0543992626471335e-06, + "loss": 1.1943, + "step": 112224 + }, + { + "epoch": 1.35, + "grad_norm": 7.68444870957156, + "learning_rate": 5.053891536571781e-06, + "loss": 1.1599, + "step": 112227 + }, + { + "epoch": 1.35, + "grad_norm": 16.700740762562123, + "learning_rate": 5.05338382737572e-06, + "loss": 1.0017, + "step": 112230 + }, + { + "epoch": 1.35, + "grad_norm": 5.092128056293266, + "learning_rate": 5.052876135060671e-06, + "loss": 1.1121, + "step": 112233 + }, + { + "epoch": 1.35, + "grad_norm": 5.113997237729515, + "learning_rate": 5.052368459628376e-06, + "loss": 1.128, + "step": 112236 + }, + { + "epoch": 1.35, + "grad_norm": 3.450121718165302, + "learning_rate": 5.051860801080566e-06, + "loss": 1.2816, + "step": 112239 + }, + { + "epoch": 1.35, + "grad_norm": 12.051628650277856, + "learning_rate": 5.051353159418976e-06, + "loss": 1.2174, + "step": 112242 + }, + { + "epoch": 1.35, + "grad_norm": 8.528043235340702, + "learning_rate": 5.050845534645334e-06, + "loss": 1.2031, + "step": 112245 + }, + { + "epoch": 1.35, + "grad_norm": 15.715301372458082, + "learning_rate": 5.05033792676137e-06, + "loss": 1.2299, + "step": 112248 + }, + { + "epoch": 1.35, + "grad_norm": 13.6703179061733, + "learning_rate": 5.049830335768821e-06, + "loss": 1.1309, + "step": 112251 + }, + { + "epoch": 1.35, + "grad_norm": 8.551261150370099, + "learning_rate": 5.049322761669418e-06, + "loss": 1.2238, + "step": 112254 + }, + { + "epoch": 1.35, + "grad_norm": 12.12159935869834, + "learning_rate": 5.048815204464896e-06, + "loss": 1.0982, + "step": 112257 + }, + { + "epoch": 1.35, + "grad_norm": 9.25247089987857, + "learning_rate": 5.048307664156982e-06, + "loss": 1.308, + "step": 112260 + }, + { + "epoch": 1.35, + "grad_norm": 11.567431390965355, + "learning_rate": 5.047800140747409e-06, + "loss": 0.8779, + "step": 112263 + }, + { + "epoch": 1.35, + "grad_norm": 19.657106635526777, + "learning_rate": 5.047292634237915e-06, + "loss": 1.2369, + "step": 112266 + }, + { + "epoch": 1.35, + "grad_norm": 3.1961917466291005, + "learning_rate": 5.046785144630222e-06, + "loss": 1.2396, + "step": 112269 + }, + { + "epoch": 1.35, + "grad_norm": 8.516414081226243, + "learning_rate": 5.0462776719260716e-06, + "loss": 1.1059, + "step": 112272 + }, + { + "epoch": 1.35, + "grad_norm": 24.105589968885706, + "learning_rate": 5.045770216127188e-06, + "loss": 1.003, + "step": 112275 + }, + { + "epoch": 1.35, + "grad_norm": 7.031522535308978, + "learning_rate": 5.0452627772353055e-06, + "loss": 1.2777, + "step": 112278 + }, + { + "epoch": 1.35, + "grad_norm": 2.5678754146544107, + "learning_rate": 5.044755355252158e-06, + "loss": 0.8421, + "step": 112281 + }, + { + "epoch": 1.35, + "grad_norm": 7.7160537737726, + "learning_rate": 5.044247950179477e-06, + "loss": 1.244, + "step": 112284 + }, + { + "epoch": 1.35, + "grad_norm": 12.383847810556505, + "learning_rate": 5.043740562018995e-06, + "loss": 0.9462, + "step": 112287 + }, + { + "epoch": 1.35, + "grad_norm": 3.962125115545449, + "learning_rate": 5.043233190772436e-06, + "loss": 1.1232, + "step": 112290 + }, + { + "epoch": 1.35, + "grad_norm": 14.321752970718869, + "learning_rate": 5.042725836441536e-06, + "loss": 1.1953, + "step": 112293 + }, + { + "epoch": 1.35, + "grad_norm": 11.02118523367983, + "learning_rate": 5.042218499028028e-06, + "loss": 1.4352, + "step": 112296 + }, + { + "epoch": 1.35, + "grad_norm": 4.75108606629288, + "learning_rate": 5.041711178533647e-06, + "loss": 1.1664, + "step": 112299 + }, + { + "epoch": 1.35, + "grad_norm": 9.000005005621704, + "learning_rate": 5.041203874960116e-06, + "loss": 1.3525, + "step": 112302 + }, + { + "epoch": 1.35, + "grad_norm": 5.794510806214161, + "learning_rate": 5.0406965883091685e-06, + "loss": 0.8576, + "step": 112305 + }, + { + "epoch": 1.35, + "grad_norm": 74.93025078420706, + "learning_rate": 5.040189318582543e-06, + "loss": 1.408, + "step": 112308 + }, + { + "epoch": 1.35, + "grad_norm": 5.9326567257903315, + "learning_rate": 5.039682065781964e-06, + "loss": 1.132, + "step": 112311 + }, + { + "epoch": 1.35, + "grad_norm": 3.890948072698564, + "learning_rate": 5.039174829909158e-06, + "loss": 0.9749, + "step": 112314 + }, + { + "epoch": 1.35, + "grad_norm": 5.2840134677066235, + "learning_rate": 5.038667610965863e-06, + "loss": 1.5512, + "step": 112317 + }, + { + "epoch": 1.35, + "grad_norm": 17.197594846192956, + "learning_rate": 5.038160408953808e-06, + "loss": 0.8631, + "step": 112320 + }, + { + "epoch": 1.35, + "grad_norm": 3.172467538029758, + "learning_rate": 5.0376532238747245e-06, + "loss": 1.0704, + "step": 112323 + }, + { + "epoch": 1.35, + "grad_norm": 11.207527376428159, + "learning_rate": 5.037146055730346e-06, + "loss": 0.9996, + "step": 112326 + }, + { + "epoch": 1.35, + "grad_norm": 22.08511935772257, + "learning_rate": 5.036638904522398e-06, + "loss": 1.3949, + "step": 112329 + }, + { + "epoch": 1.35, + "grad_norm": 2.4061214121568395, + "learning_rate": 5.036131770252617e-06, + "loss": 1.3034, + "step": 112332 + }, + { + "epoch": 1.35, + "grad_norm": 13.315214261792322, + "learning_rate": 5.035624652922726e-06, + "loss": 1.3789, + "step": 112335 + }, + { + "epoch": 1.35, + "grad_norm": 4.627499418183236, + "learning_rate": 5.0351175525344655e-06, + "loss": 1.5464, + "step": 112338 + }, + { + "epoch": 1.35, + "grad_norm": 13.156308091195406, + "learning_rate": 5.034610469089554e-06, + "loss": 1.3172, + "step": 112341 + }, + { + "epoch": 1.35, + "grad_norm": 7.8021397928920475, + "learning_rate": 5.03410340258973e-06, + "loss": 1.2492, + "step": 112344 + }, + { + "epoch": 1.35, + "grad_norm": 6.18552402848124, + "learning_rate": 5.0335963530367225e-06, + "loss": 1.4941, + "step": 112347 + }, + { + "epoch": 1.35, + "grad_norm": 2.6311306533485337, + "learning_rate": 5.033089320432265e-06, + "loss": 1.4743, + "step": 112350 + }, + { + "epoch": 1.35, + "grad_norm": 11.245886792773018, + "learning_rate": 5.0325823047780845e-06, + "loss": 1.0217, + "step": 112353 + }, + { + "epoch": 1.35, + "grad_norm": 10.25496471375675, + "learning_rate": 5.032075306075907e-06, + "loss": 1.3435, + "step": 112356 + }, + { + "epoch": 1.35, + "grad_norm": 20.122248521828922, + "learning_rate": 5.031568324327468e-06, + "loss": 1.151, + "step": 112359 + }, + { + "epoch": 1.35, + "grad_norm": 7.39795033319935, + "learning_rate": 5.031061359534497e-06, + "loss": 1.0289, + "step": 112362 + }, + { + "epoch": 1.35, + "grad_norm": 6.655142494949009, + "learning_rate": 5.030554411698727e-06, + "loss": 1.1299, + "step": 112365 + }, + { + "epoch": 1.35, + "grad_norm": 14.588467965568986, + "learning_rate": 5.03004748082188e-06, + "loss": 0.9713, + "step": 112368 + }, + { + "epoch": 1.35, + "grad_norm": 9.20939849903909, + "learning_rate": 5.029540566905693e-06, + "loss": 1.3988, + "step": 112371 + }, + { + "epoch": 1.35, + "grad_norm": 4.728486980602279, + "learning_rate": 5.029033669951896e-06, + "loss": 1.3092, + "step": 112374 + }, + { + "epoch": 1.35, + "grad_norm": 11.397583273302358, + "learning_rate": 5.028526789962214e-06, + "loss": 0.806, + "step": 112377 + }, + { + "epoch": 1.35, + "grad_norm": 19.117969611531727, + "learning_rate": 5.028019926938381e-06, + "loss": 1.2336, + "step": 112380 + }, + { + "epoch": 1.35, + "grad_norm": 12.87420437522931, + "learning_rate": 5.027513080882123e-06, + "loss": 0.7503, + "step": 112383 + }, + { + "epoch": 1.35, + "grad_norm": 30.170577689270306, + "learning_rate": 5.027006251795171e-06, + "loss": 1.5311, + "step": 112386 + }, + { + "epoch": 1.35, + "grad_norm": 13.338851992187747, + "learning_rate": 5.0264994396792555e-06, + "loss": 1.2939, + "step": 112389 + }, + { + "epoch": 1.35, + "grad_norm": 20.410007180035286, + "learning_rate": 5.025992644536111e-06, + "loss": 0.9171, + "step": 112392 + }, + { + "epoch": 1.35, + "grad_norm": 7.533249049572708, + "learning_rate": 5.02548586636746e-06, + "loss": 1.1812, + "step": 112395 + }, + { + "epoch": 1.35, + "grad_norm": 14.795061505023817, + "learning_rate": 5.024979105175031e-06, + "loss": 1.4555, + "step": 112398 + }, + { + "epoch": 1.35, + "grad_norm": 10.923812158212254, + "learning_rate": 5.024472360960556e-06, + "loss": 0.983, + "step": 112401 + }, + { + "epoch": 1.35, + "grad_norm": 4.0785598572179635, + "learning_rate": 5.023965633725765e-06, + "loss": 1.1611, + "step": 112404 + }, + { + "epoch": 1.35, + "grad_norm": 6.790941969167415, + "learning_rate": 5.023458923472391e-06, + "loss": 1.3236, + "step": 112407 + }, + { + "epoch": 1.35, + "grad_norm": 6.023736566206723, + "learning_rate": 5.022952230202155e-06, + "loss": 0.9781, + "step": 112410 + }, + { + "epoch": 1.35, + "grad_norm": 4.482765606040187, + "learning_rate": 5.02244555391679e-06, + "loss": 0.9284, + "step": 112413 + }, + { + "epoch": 1.35, + "grad_norm": 5.591389603789904, + "learning_rate": 5.02193889461803e-06, + "loss": 1.1683, + "step": 112416 + }, + { + "epoch": 1.35, + "grad_norm": 25.007870612159643, + "learning_rate": 5.021432252307599e-06, + "loss": 1.4336, + "step": 112419 + }, + { + "epoch": 1.35, + "grad_norm": 10.698294598768236, + "learning_rate": 5.020925626987223e-06, + "loss": 1.2151, + "step": 112422 + }, + { + "epoch": 1.35, + "grad_norm": 12.513508630325733, + "learning_rate": 5.020419018658635e-06, + "loss": 1.4158, + "step": 112425 + }, + { + "epoch": 1.35, + "grad_norm": 4.2623526676043895, + "learning_rate": 5.019912427323561e-06, + "loss": 0.945, + "step": 112428 + }, + { + "epoch": 1.35, + "grad_norm": 16.29978320695068, + "learning_rate": 5.019405852983734e-06, + "loss": 0.8599, + "step": 112431 + }, + { + "epoch": 1.35, + "grad_norm": 12.305136594047717, + "learning_rate": 5.018899295640885e-06, + "loss": 1.1343, + "step": 112434 + }, + { + "epoch": 1.35, + "grad_norm": 10.984360579380082, + "learning_rate": 5.018392755296737e-06, + "loss": 1.0896, + "step": 112437 + }, + { + "epoch": 1.35, + "grad_norm": 6.6710714766377635, + "learning_rate": 5.017886231953017e-06, + "loss": 1.2443, + "step": 112440 + }, + { + "epoch": 1.35, + "grad_norm": 11.522869461987266, + "learning_rate": 5.017379725611456e-06, + "loss": 1.2206, + "step": 112443 + }, + { + "epoch": 1.35, + "grad_norm": 15.332244105723596, + "learning_rate": 5.016873236273787e-06, + "loss": 1.2941, + "step": 112446 + }, + { + "epoch": 1.35, + "grad_norm": 11.538440645883593, + "learning_rate": 5.016366763941733e-06, + "loss": 1.041, + "step": 112449 + }, + { + "epoch": 1.35, + "grad_norm": 4.230652450126485, + "learning_rate": 5.015860308617022e-06, + "loss": 1.1342, + "step": 112452 + }, + { + "epoch": 1.35, + "grad_norm": 10.9180333852688, + "learning_rate": 5.0153538703013845e-06, + "loss": 1.4154, + "step": 112455 + }, + { + "epoch": 1.35, + "grad_norm": 3.824221760884586, + "learning_rate": 5.014847448996554e-06, + "loss": 1.3174, + "step": 112458 + }, + { + "epoch": 1.35, + "grad_norm": 45.03786858963454, + "learning_rate": 5.014341044704254e-06, + "loss": 1.1913, + "step": 112461 + }, + { + "epoch": 1.35, + "grad_norm": 3.057836000447877, + "learning_rate": 5.013834657426207e-06, + "loss": 1.2081, + "step": 112464 + }, + { + "epoch": 1.35, + "grad_norm": 4.77694808836551, + "learning_rate": 5.013328287164146e-06, + "loss": 1.2867, + "step": 112467 + }, + { + "epoch": 1.35, + "grad_norm": 8.117826718258938, + "learning_rate": 5.0128219339198e-06, + "loss": 1.368, + "step": 112470 + }, + { + "epoch": 1.35, + "grad_norm": 7.949907049352342, + "learning_rate": 5.012315597694902e-06, + "loss": 1.0383, + "step": 112473 + }, + { + "epoch": 1.35, + "grad_norm": 9.87150280117967, + "learning_rate": 5.0118092784911665e-06, + "loss": 0.788, + "step": 112476 + }, + { + "epoch": 1.35, + "grad_norm": 4.151996055820295, + "learning_rate": 5.011302976310335e-06, + "loss": 1.0129, + "step": 112479 + }, + { + "epoch": 1.35, + "grad_norm": 6.068605709805277, + "learning_rate": 5.010796691154127e-06, + "loss": 1.1348, + "step": 112482 + }, + { + "epoch": 1.35, + "grad_norm": 9.199506959435027, + "learning_rate": 5.010290423024271e-06, + "loss": 1.287, + "step": 112485 + }, + { + "epoch": 1.35, + "grad_norm": 23.754294757342162, + "learning_rate": 5.0097841719225e-06, + "loss": 1.0521, + "step": 112488 + }, + { + "epoch": 1.35, + "grad_norm": 4.4415674932220695, + "learning_rate": 5.009277937850537e-06, + "loss": 0.9287, + "step": 112491 + }, + { + "epoch": 1.35, + "grad_norm": 9.542840681917212, + "learning_rate": 5.008771720810108e-06, + "loss": 1.0952, + "step": 112494 + }, + { + "epoch": 1.35, + "grad_norm": 5.611305065117825, + "learning_rate": 5.008265520802944e-06, + "loss": 0.9865, + "step": 112497 + }, + { + "epoch": 1.35, + "grad_norm": 4.8184603768608145, + "learning_rate": 5.007759337830777e-06, + "loss": 1.2131, + "step": 112500 + }, + { + "epoch": 1.35, + "grad_norm": 19.06714609910828, + "learning_rate": 5.007253171895327e-06, + "loss": 1.1453, + "step": 112503 + }, + { + "epoch": 1.35, + "grad_norm": 5.171569523847849, + "learning_rate": 5.006747022998322e-06, + "loss": 1.3365, + "step": 112506 + }, + { + "epoch": 1.35, + "grad_norm": 7.770873313225824, + "learning_rate": 5.00624089114149e-06, + "loss": 1.4468, + "step": 112509 + }, + { + "epoch": 1.35, + "grad_norm": 7.5080289587783, + "learning_rate": 5.0057347763265585e-06, + "loss": 1.2756, + "step": 112512 + }, + { + "epoch": 1.35, + "grad_norm": 6.09865881341389, + "learning_rate": 5.005228678555258e-06, + "loss": 1.0357, + "step": 112515 + }, + { + "epoch": 1.35, + "grad_norm": 15.618025823384876, + "learning_rate": 5.004722597829312e-06, + "loss": 0.7887, + "step": 112518 + }, + { + "epoch": 1.35, + "grad_norm": 2.851587776812692, + "learning_rate": 5.00421653415045e-06, + "loss": 1.0537, + "step": 112521 + }, + { + "epoch": 1.35, + "grad_norm": 11.538987201319172, + "learning_rate": 5.003710487520395e-06, + "loss": 1.1523, + "step": 112524 + }, + { + "epoch": 1.35, + "grad_norm": 10.44711775173713, + "learning_rate": 5.003204457940879e-06, + "loss": 1.4513, + "step": 112527 + }, + { + "epoch": 1.35, + "grad_norm": 3.377345188714468, + "learning_rate": 5.002698445413623e-06, + "loss": 1.1645, + "step": 112530 + }, + { + "epoch": 1.35, + "grad_norm": 6.237236194709916, + "learning_rate": 5.0021924499403575e-06, + "loss": 1.1628, + "step": 112533 + }, + { + "epoch": 1.35, + "grad_norm": 46.27077069887246, + "learning_rate": 5.001686471522809e-06, + "loss": 1.4198, + "step": 112536 + }, + { + "epoch": 1.35, + "grad_norm": 8.980615403369969, + "learning_rate": 5.001180510162704e-06, + "loss": 0.9514, + "step": 112539 + }, + { + "epoch": 1.35, + "grad_norm": 7.598361619288464, + "learning_rate": 5.000674565861774e-06, + "loss": 0.9199, + "step": 112542 + }, + { + "epoch": 1.35, + "grad_norm": 12.748362312950901, + "learning_rate": 5.00016863862174e-06, + "loss": 1.0386, + "step": 112545 + }, + { + "epoch": 1.35, + "grad_norm": 8.342911049108132, + "learning_rate": 4.999662728444326e-06, + "loss": 1.1876, + "step": 112548 + }, + { + "epoch": 1.35, + "grad_norm": 113.54407808716932, + "learning_rate": 4.999156835331262e-06, + "loss": 1.1582, + "step": 112551 + }, + { + "epoch": 1.35, + "grad_norm": 7.2182681014632335, + "learning_rate": 4.998650959284277e-06, + "loss": 1.2162, + "step": 112554 + }, + { + "epoch": 1.35, + "grad_norm": 15.38346236369857, + "learning_rate": 4.998145100305092e-06, + "loss": 1.0801, + "step": 112557 + }, + { + "epoch": 1.35, + "grad_norm": 7.3151508755090955, + "learning_rate": 4.997639258395437e-06, + "loss": 1.4154, + "step": 112560 + }, + { + "epoch": 1.35, + "grad_norm": 29.425543014377222, + "learning_rate": 4.99713343355704e-06, + "loss": 1.3179, + "step": 112563 + }, + { + "epoch": 1.35, + "grad_norm": 13.96636048804799, + "learning_rate": 4.996627625791621e-06, + "loss": 1.3605, + "step": 112566 + }, + { + "epoch": 1.35, + "grad_norm": 6.181335025315605, + "learning_rate": 4.996121835100913e-06, + "loss": 1.0778, + "step": 112569 + }, + { + "epoch": 1.35, + "grad_norm": 7.832791169876299, + "learning_rate": 4.995616061486636e-06, + "loss": 1.1002, + "step": 112572 + }, + { + "epoch": 1.35, + "grad_norm": 4.957231571764765, + "learning_rate": 4.995110304950519e-06, + "loss": 1.0503, + "step": 112575 + }, + { + "epoch": 1.35, + "grad_norm": 9.695739889471248, + "learning_rate": 4.994604565494288e-06, + "loss": 1.2742, + "step": 112578 + }, + { + "epoch": 1.35, + "grad_norm": 12.622074455838705, + "learning_rate": 4.994098843119672e-06, + "loss": 1.3573, + "step": 112581 + }, + { + "epoch": 1.35, + "grad_norm": 11.92731643146511, + "learning_rate": 4.993593137828389e-06, + "loss": 1.1377, + "step": 112584 + }, + { + "epoch": 1.35, + "grad_norm": 4.614384918650437, + "learning_rate": 4.993087449622173e-06, + "loss": 1.142, + "step": 112587 + }, + { + "epoch": 1.35, + "grad_norm": 22.20756873869533, + "learning_rate": 4.992581778502743e-06, + "loss": 1.0516, + "step": 112590 + }, + { + "epoch": 1.35, + "grad_norm": 19.839388557020555, + "learning_rate": 4.992076124471827e-06, + "loss": 1.1079, + "step": 112593 + }, + { + "epoch": 1.35, + "grad_norm": 16.25876535900366, + "learning_rate": 4.991570487531156e-06, + "loss": 1.5451, + "step": 112596 + }, + { + "epoch": 1.35, + "grad_norm": 3.574373744989751, + "learning_rate": 4.991064867682447e-06, + "loss": 1.4599, + "step": 112599 + }, + { + "epoch": 1.35, + "grad_norm": 14.714061828675359, + "learning_rate": 4.99055926492743e-06, + "loss": 1.1601, + "step": 112602 + }, + { + "epoch": 1.35, + "grad_norm": 17.492915155590293, + "learning_rate": 4.990053679267831e-06, + "loss": 1.1453, + "step": 112605 + }, + { + "epoch": 1.35, + "grad_norm": 4.547703696487199, + "learning_rate": 4.989548110705373e-06, + "loss": 1.1219, + "step": 112608 + }, + { + "epoch": 1.35, + "grad_norm": 5.387631593821883, + "learning_rate": 4.9890425592417855e-06, + "loss": 1.1835, + "step": 112611 + }, + { + "epoch": 1.35, + "grad_norm": 4.671299515374618, + "learning_rate": 4.988537024878787e-06, + "loss": 0.9626, + "step": 112614 + }, + { + "epoch": 1.35, + "grad_norm": 3.2901154057726743, + "learning_rate": 4.988031507618107e-06, + "loss": 1.117, + "step": 112617 + }, + { + "epoch": 1.35, + "grad_norm": 4.398576177977995, + "learning_rate": 4.98752600746147e-06, + "loss": 1.4565, + "step": 112620 + }, + { + "epoch": 1.35, + "grad_norm": 7.710775726978409, + "learning_rate": 4.987020524410604e-06, + "loss": 1.161, + "step": 112623 + }, + { + "epoch": 1.35, + "grad_norm": 5.544685070208056, + "learning_rate": 4.986515058467228e-06, + "loss": 1.0034, + "step": 112626 + }, + { + "epoch": 1.35, + "grad_norm": 10.413124186369469, + "learning_rate": 4.986009609633075e-06, + "loss": 1.1029, + "step": 112629 + }, + { + "epoch": 1.35, + "grad_norm": 8.924994135553264, + "learning_rate": 4.98550417790986e-06, + "loss": 1.2544, + "step": 112632 + }, + { + "epoch": 1.35, + "grad_norm": 39.51647388944592, + "learning_rate": 4.984998763299318e-06, + "loss": 1.2931, + "step": 112635 + }, + { + "epoch": 1.35, + "grad_norm": 2.9472491465363304, + "learning_rate": 4.984493365803165e-06, + "loss": 1.0949, + "step": 112638 + }, + { + "epoch": 1.35, + "grad_norm": 5.590046069646416, + "learning_rate": 4.9839879854231285e-06, + "loss": 0.9398, + "step": 112641 + }, + { + "epoch": 1.35, + "grad_norm": 103.59351772996833, + "learning_rate": 4.983482622160936e-06, + "loss": 1.4608, + "step": 112644 + }, + { + "epoch": 1.35, + "grad_norm": 7.976845780287313, + "learning_rate": 4.982977276018308e-06, + "loss": 1.3164, + "step": 112647 + }, + { + "epoch": 1.35, + "grad_norm": 17.172595422405422, + "learning_rate": 4.982471946996977e-06, + "loss": 1.124, + "step": 112650 + }, + { + "epoch": 1.35, + "grad_norm": 5.343398905663699, + "learning_rate": 4.981966635098662e-06, + "loss": 1.291, + "step": 112653 + }, + { + "epoch": 1.35, + "grad_norm": 7.124102531049529, + "learning_rate": 4.981461340325082e-06, + "loss": 1.0504, + "step": 112656 + }, + { + "epoch": 1.35, + "grad_norm": 6.4157268419437194, + "learning_rate": 4.980956062677969e-06, + "loss": 1.4044, + "step": 112659 + }, + { + "epoch": 1.35, + "grad_norm": 5.625797468692187, + "learning_rate": 4.980450802159047e-06, + "loss": 1.2799, + "step": 112662 + }, + { + "epoch": 1.35, + "grad_norm": 3.243540483627065, + "learning_rate": 4.979945558770035e-06, + "loss": 0.9552, + "step": 112665 + }, + { + "epoch": 1.35, + "grad_norm": 16.42494006528769, + "learning_rate": 4.97944033251266e-06, + "loss": 1.1293, + "step": 112668 + }, + { + "epoch": 1.35, + "grad_norm": 8.243511789764515, + "learning_rate": 4.978935123388653e-06, + "loss": 1.2757, + "step": 112671 + }, + { + "epoch": 1.35, + "grad_norm": 13.291009296958899, + "learning_rate": 4.978429931399727e-06, + "loss": 1.5413, + "step": 112674 + }, + { + "epoch": 1.35, + "grad_norm": 36.92500402218921, + "learning_rate": 4.977924756547613e-06, + "loss": 0.8352, + "step": 112677 + }, + { + "epoch": 1.35, + "grad_norm": 9.260313288266058, + "learning_rate": 4.97741959883403e-06, + "loss": 1.1301, + "step": 112680 + }, + { + "epoch": 1.35, + "grad_norm": 11.565160082519853, + "learning_rate": 4.976914458260705e-06, + "loss": 0.9869, + "step": 112683 + }, + { + "epoch": 1.36, + "grad_norm": 3.9347553119465943, + "learning_rate": 4.976409334829362e-06, + "loss": 1.0503, + "step": 112686 + }, + { + "epoch": 1.36, + "grad_norm": 15.714315791099532, + "learning_rate": 4.975904228541728e-06, + "loss": 1.192, + "step": 112689 + }, + { + "epoch": 1.36, + "grad_norm": 14.770961591400795, + "learning_rate": 4.975399139399519e-06, + "loss": 1.0141, + "step": 112692 + }, + { + "epoch": 1.36, + "grad_norm": 3.7899443824888643, + "learning_rate": 4.974894067404468e-06, + "loss": 1.1797, + "step": 112695 + }, + { + "epoch": 1.36, + "grad_norm": 3.1054298924478307, + "learning_rate": 4.9743890125582884e-06, + "loss": 0.9988, + "step": 112698 + }, + { + "epoch": 1.36, + "grad_norm": 8.707419280399316, + "learning_rate": 4.973883974862709e-06, + "loss": 0.9126, + "step": 112701 + }, + { + "epoch": 1.36, + "grad_norm": 10.120408379920157, + "learning_rate": 4.973378954319457e-06, + "loss": 1.0207, + "step": 112704 + }, + { + "epoch": 1.36, + "grad_norm": 6.73919465207122, + "learning_rate": 4.972873950930248e-06, + "loss": 1.2372, + "step": 112707 + }, + { + "epoch": 1.36, + "grad_norm": 7.431788392361726, + "learning_rate": 4.97236896469681e-06, + "loss": 1.5076, + "step": 112710 + }, + { + "epoch": 1.36, + "grad_norm": 16.83547517745899, + "learning_rate": 4.971863995620869e-06, + "loss": 0.8966, + "step": 112713 + }, + { + "epoch": 1.36, + "grad_norm": 6.485929298841862, + "learning_rate": 4.971359043704146e-06, + "loss": 1.0705, + "step": 112716 + }, + { + "epoch": 1.36, + "grad_norm": 18.168093119324368, + "learning_rate": 4.9708541089483585e-06, + "loss": 1.1225, + "step": 112719 + }, + { + "epoch": 1.36, + "grad_norm": 5.853432027062623, + "learning_rate": 4.9703491913552345e-06, + "loss": 0.8608, + "step": 112722 + }, + { + "epoch": 1.36, + "grad_norm": 5.690618186098008, + "learning_rate": 4.969844290926499e-06, + "loss": 1.472, + "step": 112725 + }, + { + "epoch": 1.36, + "grad_norm": 5.293358514005489, + "learning_rate": 4.969339407663871e-06, + "loss": 1.1178, + "step": 112728 + }, + { + "epoch": 1.36, + "grad_norm": 6.169347622773352, + "learning_rate": 4.9688345415690795e-06, + "loss": 1.0993, + "step": 112731 + }, + { + "epoch": 1.36, + "grad_norm": 36.353002596724515, + "learning_rate": 4.968329692643841e-06, + "loss": 1.2505, + "step": 112734 + }, + { + "epoch": 1.36, + "grad_norm": 11.44829292198215, + "learning_rate": 4.967824860889883e-06, + "loss": 1.2624, + "step": 112737 + }, + { + "epoch": 1.36, + "grad_norm": 5.424104175020466, + "learning_rate": 4.967320046308923e-06, + "loss": 0.9849, + "step": 112740 + }, + { + "epoch": 1.36, + "grad_norm": 16.31552367980438, + "learning_rate": 4.966815248902691e-06, + "loss": 1.1487, + "step": 112743 + }, + { + "epoch": 1.36, + "grad_norm": 7.817238065777168, + "learning_rate": 4.966310468672901e-06, + "loss": 1.3187, + "step": 112746 + }, + { + "epoch": 1.36, + "grad_norm": 15.378489990300244, + "learning_rate": 4.9658057056212815e-06, + "loss": 1.4687, + "step": 112749 + }, + { + "epoch": 1.36, + "grad_norm": 7.4352452922857015, + "learning_rate": 4.9653009597495525e-06, + "loss": 1.5248, + "step": 112752 + }, + { + "epoch": 1.36, + "grad_norm": 24.458224981056194, + "learning_rate": 4.964796231059443e-06, + "loss": 1.1313, + "step": 112755 + }, + { + "epoch": 1.36, + "grad_norm": 4.20821357516495, + "learning_rate": 4.96429151955267e-06, + "loss": 1.3453, + "step": 112758 + }, + { + "epoch": 1.36, + "grad_norm": 27.487700494894266, + "learning_rate": 4.963786825230952e-06, + "loss": 1.0076, + "step": 112761 + }, + { + "epoch": 1.36, + "grad_norm": 3.70406745687109, + "learning_rate": 4.963282148096015e-06, + "loss": 1.1179, + "step": 112764 + }, + { + "epoch": 1.36, + "grad_norm": 4.6941649210435505, + "learning_rate": 4.9627774881495824e-06, + "loss": 1.1564, + "step": 112767 + }, + { + "epoch": 1.36, + "grad_norm": 13.57293347072813, + "learning_rate": 4.96227284539338e-06, + "loss": 0.953, + "step": 112770 + }, + { + "epoch": 1.36, + "grad_norm": 20.236078909551736, + "learning_rate": 4.961768219829122e-06, + "loss": 1.1952, + "step": 112773 + }, + { + "epoch": 1.36, + "grad_norm": 9.553640479204345, + "learning_rate": 4.961263611458533e-06, + "loss": 1.1209, + "step": 112776 + }, + { + "epoch": 1.36, + "grad_norm": 4.969061235363208, + "learning_rate": 4.960759020283341e-06, + "loss": 0.955, + "step": 112779 + }, + { + "epoch": 1.36, + "grad_norm": 8.453171195788414, + "learning_rate": 4.9602544463052596e-06, + "loss": 1.1252, + "step": 112782 + }, + { + "epoch": 1.36, + "grad_norm": 11.584699031136823, + "learning_rate": 4.959749889526019e-06, + "loss": 1.4069, + "step": 112785 + }, + { + "epoch": 1.36, + "grad_norm": 3.318130304508905, + "learning_rate": 4.959245349947332e-06, + "loss": 1.4649, + "step": 112788 + }, + { + "epoch": 1.36, + "grad_norm": 12.564830537045438, + "learning_rate": 4.958740827570924e-06, + "loss": 1.2836, + "step": 112791 + }, + { + "epoch": 1.36, + "grad_norm": 19.43761462277003, + "learning_rate": 4.9582363223985185e-06, + "loss": 0.9085, + "step": 112794 + }, + { + "epoch": 1.36, + "grad_norm": 9.360835594451423, + "learning_rate": 4.95773183443184e-06, + "loss": 1.1885, + "step": 112797 + }, + { + "epoch": 1.36, + "grad_norm": 27.5759922928756, + "learning_rate": 4.957227363672608e-06, + "loss": 1.3097, + "step": 112800 + }, + { + "epoch": 1.36, + "grad_norm": 4.660203237494713, + "learning_rate": 4.9567229101225375e-06, + "loss": 1.1866, + "step": 112803 + }, + { + "epoch": 1.36, + "grad_norm": 12.11581567267515, + "learning_rate": 4.956218473783355e-06, + "loss": 0.8314, + "step": 112806 + }, + { + "epoch": 1.36, + "grad_norm": 17.895449873638526, + "learning_rate": 4.955714054656783e-06, + "loss": 1.5425, + "step": 112809 + }, + { + "epoch": 1.36, + "grad_norm": 7.355624235952285, + "learning_rate": 4.955209652744545e-06, + "loss": 0.9616, + "step": 112812 + }, + { + "epoch": 1.36, + "grad_norm": 4.080815353811163, + "learning_rate": 4.954705268048356e-06, + "loss": 1.0679, + "step": 112815 + }, + { + "epoch": 1.36, + "grad_norm": 10.731445586552526, + "learning_rate": 4.95420090056994e-06, + "loss": 1.5495, + "step": 112818 + }, + { + "epoch": 1.36, + "grad_norm": 7.036319306430376, + "learning_rate": 4.953696550311022e-06, + "loss": 1.1335, + "step": 112821 + }, + { + "epoch": 1.36, + "grad_norm": 3.896617572409183, + "learning_rate": 4.953192217273322e-06, + "loss": 1.2122, + "step": 112824 + }, + { + "epoch": 1.36, + "grad_norm": 12.36330567368311, + "learning_rate": 4.952687901458554e-06, + "loss": 1.045, + "step": 112827 + }, + { + "epoch": 1.36, + "grad_norm": 11.837836494120733, + "learning_rate": 4.9521836028684445e-06, + "loss": 1.3327, + "step": 112830 + }, + { + "epoch": 1.36, + "grad_norm": 14.001392382998928, + "learning_rate": 4.951679321504714e-06, + "loss": 1.1208, + "step": 112833 + }, + { + "epoch": 1.36, + "grad_norm": 6.685494893468294, + "learning_rate": 4.951175057369083e-06, + "loss": 1.2973, + "step": 112836 + }, + { + "epoch": 1.36, + "grad_norm": 34.62988835232103, + "learning_rate": 4.950670810463278e-06, + "loss": 1.208, + "step": 112839 + }, + { + "epoch": 1.36, + "grad_norm": 7.531284318285376, + "learning_rate": 4.9501665807890145e-06, + "loss": 1.0701, + "step": 112842 + }, + { + "epoch": 1.36, + "grad_norm": 22.700761111335385, + "learning_rate": 4.9496623683480095e-06, + "loss": 1.2952, + "step": 112845 + }, + { + "epoch": 1.36, + "grad_norm": 7.058814699642818, + "learning_rate": 4.949158173141987e-06, + "loss": 1.3391, + "step": 112848 + }, + { + "epoch": 1.36, + "grad_norm": 19.07991357892922, + "learning_rate": 4.948653995172673e-06, + "loss": 1.2729, + "step": 112851 + }, + { + "epoch": 1.36, + "grad_norm": 8.546193412148279, + "learning_rate": 4.948149834441779e-06, + "loss": 1.1949, + "step": 112854 + }, + { + "epoch": 1.36, + "grad_norm": 20.75274326223834, + "learning_rate": 4.947645690951031e-06, + "loss": 0.8722, + "step": 112857 + }, + { + "epoch": 1.36, + "grad_norm": 6.024644075579576, + "learning_rate": 4.947141564702147e-06, + "loss": 1.3743, + "step": 112860 + }, + { + "epoch": 1.36, + "grad_norm": 6.3917317743712925, + "learning_rate": 4.946637455696853e-06, + "loss": 1.1343, + "step": 112863 + }, + { + "epoch": 1.36, + "grad_norm": 9.137130308849787, + "learning_rate": 4.946133363936866e-06, + "loss": 1.0831, + "step": 112866 + }, + { + "epoch": 1.36, + "grad_norm": 12.196822430848899, + "learning_rate": 4.945629289423901e-06, + "loss": 1.1498, + "step": 112869 + }, + { + "epoch": 1.36, + "grad_norm": 8.79831229542965, + "learning_rate": 4.945125232159683e-06, + "loss": 1.4374, + "step": 112872 + }, + { + "epoch": 1.36, + "grad_norm": 17.922967019489224, + "learning_rate": 4.944621192145932e-06, + "loss": 1.1753, + "step": 112875 + }, + { + "epoch": 1.36, + "grad_norm": 11.36184891310753, + "learning_rate": 4.944117169384371e-06, + "loss": 1.4275, + "step": 112878 + }, + { + "epoch": 1.36, + "grad_norm": 11.813700298586694, + "learning_rate": 4.943613163876714e-06, + "loss": 1.258, + "step": 112881 + }, + { + "epoch": 1.36, + "grad_norm": 5.106474257265074, + "learning_rate": 4.943109175624688e-06, + "loss": 1.3298, + "step": 112884 + }, + { + "epoch": 1.36, + "grad_norm": 2.861367140898954, + "learning_rate": 4.942605204630003e-06, + "loss": 0.9519, + "step": 112887 + }, + { + "epoch": 1.36, + "grad_norm": 14.2220580176203, + "learning_rate": 4.942101250894387e-06, + "loss": 1.0304, + "step": 112890 + }, + { + "epoch": 1.36, + "grad_norm": 6.505437545427548, + "learning_rate": 4.9415973144195604e-06, + "loss": 1.3668, + "step": 112893 + }, + { + "epoch": 1.36, + "grad_norm": 6.812303446903833, + "learning_rate": 4.941093395207236e-06, + "loss": 1.072, + "step": 112896 + }, + { + "epoch": 1.36, + "grad_norm": 3.138420491785395, + "learning_rate": 4.940589493259138e-06, + "loss": 1.0822, + "step": 112899 + }, + { + "epoch": 1.36, + "grad_norm": 8.750680671334006, + "learning_rate": 4.940085608576987e-06, + "loss": 0.9564, + "step": 112902 + }, + { + "epoch": 1.36, + "grad_norm": 11.612627079140056, + "learning_rate": 4.939581741162502e-06, + "loss": 1.451, + "step": 112905 + }, + { + "epoch": 1.36, + "grad_norm": 14.115378661643627, + "learning_rate": 4.939077891017404e-06, + "loss": 1.0541, + "step": 112908 + }, + { + "epoch": 1.36, + "grad_norm": 9.190938397665304, + "learning_rate": 4.938574058143406e-06, + "loss": 1.4144, + "step": 112911 + }, + { + "epoch": 1.36, + "grad_norm": 4.444304909348429, + "learning_rate": 4.938070242542232e-06, + "loss": 1.3698, + "step": 112914 + }, + { + "epoch": 1.36, + "grad_norm": 5.418110505254236, + "learning_rate": 4.937566444215601e-06, + "loss": 1.1142, + "step": 112917 + }, + { + "epoch": 1.36, + "grad_norm": 25.271872912312276, + "learning_rate": 4.937062663165235e-06, + "loss": 1.1967, + "step": 112920 + }, + { + "epoch": 1.36, + "grad_norm": 5.994340973217275, + "learning_rate": 4.936558899392848e-06, + "loss": 1.1001, + "step": 112923 + }, + { + "epoch": 1.36, + "grad_norm": 8.41637191772328, + "learning_rate": 4.936055152900161e-06, + "loss": 1.4681, + "step": 112926 + }, + { + "epoch": 1.36, + "grad_norm": 6.291596312425765, + "learning_rate": 4.9355514236888964e-06, + "loss": 1.0561, + "step": 112929 + }, + { + "epoch": 1.36, + "grad_norm": 10.72387522240718, + "learning_rate": 4.935047711760772e-06, + "loss": 1.1138, + "step": 112932 + }, + { + "epoch": 1.36, + "grad_norm": 2.768480354176391, + "learning_rate": 4.934544017117502e-06, + "loss": 0.9716, + "step": 112935 + }, + { + "epoch": 1.36, + "grad_norm": 5.728938344419759, + "learning_rate": 4.934040339760807e-06, + "loss": 0.919, + "step": 112938 + }, + { + "epoch": 1.36, + "grad_norm": 7.848633312072771, + "learning_rate": 4.933536679692409e-06, + "loss": 1.2431, + "step": 112941 + }, + { + "epoch": 1.36, + "grad_norm": 63.68685987477435, + "learning_rate": 4.933033036914026e-06, + "loss": 1.2207, + "step": 112944 + }, + { + "epoch": 1.36, + "grad_norm": 6.228143476678124, + "learning_rate": 4.932529411427378e-06, + "loss": 1.6653, + "step": 112947 + }, + { + "epoch": 1.36, + "grad_norm": 14.458029541303137, + "learning_rate": 4.932025803234183e-06, + "loss": 1.3378, + "step": 112950 + }, + { + "epoch": 1.36, + "grad_norm": 16.529147290724023, + "learning_rate": 4.931522212336154e-06, + "loss": 1.0211, + "step": 112953 + }, + { + "epoch": 1.36, + "grad_norm": 10.722244418111616, + "learning_rate": 4.9310186387350145e-06, + "loss": 1.3728, + "step": 112956 + }, + { + "epoch": 1.36, + "grad_norm": 16.262423739504637, + "learning_rate": 4.930515082432486e-06, + "loss": 1.0868, + "step": 112959 + }, + { + "epoch": 1.36, + "grad_norm": 6.067228757179509, + "learning_rate": 4.9300115434302795e-06, + "loss": 0.9751, + "step": 112962 + }, + { + "epoch": 1.36, + "grad_norm": 4.786770323754339, + "learning_rate": 4.929508021730118e-06, + "loss": 1.3436, + "step": 112965 + }, + { + "epoch": 1.36, + "grad_norm": 8.194534425889215, + "learning_rate": 4.929004517333719e-06, + "loss": 1.3895, + "step": 112968 + }, + { + "epoch": 1.36, + "grad_norm": 8.763067249196542, + "learning_rate": 4.928501030242805e-06, + "loss": 0.7919, + "step": 112971 + }, + { + "epoch": 1.36, + "grad_norm": 4.473432897718892, + "learning_rate": 4.92799756045909e-06, + "loss": 1.026, + "step": 112974 + }, + { + "epoch": 1.36, + "grad_norm": 23.000039196880113, + "learning_rate": 4.9274941079842884e-06, + "loss": 1.204, + "step": 112977 + }, + { + "epoch": 1.36, + "grad_norm": 3.423778299492101, + "learning_rate": 4.926990672820123e-06, + "loss": 1.2731, + "step": 112980 + }, + { + "epoch": 1.36, + "grad_norm": 15.561751987897301, + "learning_rate": 4.926487254968311e-06, + "loss": 1.2017, + "step": 112983 + }, + { + "epoch": 1.36, + "grad_norm": 5.411555851637703, + "learning_rate": 4.925983854430574e-06, + "loss": 1.1716, + "step": 112986 + }, + { + "epoch": 1.36, + "grad_norm": 16.550785857608062, + "learning_rate": 4.925480471208622e-06, + "loss": 1.1303, + "step": 112989 + }, + { + "epoch": 1.36, + "grad_norm": 4.492445611306191, + "learning_rate": 4.924977105304183e-06, + "loss": 1.2544, + "step": 112992 + }, + { + "epoch": 1.36, + "grad_norm": 6.954110574846276, + "learning_rate": 4.924473756718963e-06, + "loss": 1.4421, + "step": 112995 + }, + { + "epoch": 1.36, + "grad_norm": 7.445477749954206, + "learning_rate": 4.923970425454687e-06, + "loss": 1.1652, + "step": 112998 + }, + { + "epoch": 1.36, + "grad_norm": 6.73613743897287, + "learning_rate": 4.923467111513076e-06, + "loss": 1.1253, + "step": 113001 + }, + { + "epoch": 1.36, + "grad_norm": 9.353791107927227, + "learning_rate": 4.922963814895839e-06, + "loss": 1.3516, + "step": 113004 + }, + { + "epoch": 1.36, + "grad_norm": 6.45575632056171, + "learning_rate": 4.922460535604697e-06, + "loss": 1.1342, + "step": 113007 + }, + { + "epoch": 1.36, + "grad_norm": 4.81466806259935, + "learning_rate": 4.921957273641369e-06, + "loss": 1.1281, + "step": 113010 + }, + { + "epoch": 1.36, + "grad_norm": 19.176061588612885, + "learning_rate": 4.921454029007575e-06, + "loss": 1.2195, + "step": 113013 + }, + { + "epoch": 1.36, + "grad_norm": 7.888611115179419, + "learning_rate": 4.92095080170503e-06, + "loss": 1.1467, + "step": 113016 + }, + { + "epoch": 1.36, + "grad_norm": 7.4239385185260485, + "learning_rate": 4.9204475917354456e-06, + "loss": 1.2401, + "step": 113019 + }, + { + "epoch": 1.36, + "grad_norm": 12.311161817869316, + "learning_rate": 4.919944399100544e-06, + "loss": 1.0501, + "step": 113022 + }, + { + "epoch": 1.36, + "grad_norm": 7.667946937298295, + "learning_rate": 4.919441223802044e-06, + "loss": 1.1832, + "step": 113025 + }, + { + "epoch": 1.36, + "grad_norm": 3.446054625456942, + "learning_rate": 4.918938065841663e-06, + "loss": 0.9442, + "step": 113028 + }, + { + "epoch": 1.36, + "grad_norm": 11.52871580808612, + "learning_rate": 4.9184349252211135e-06, + "loss": 1.1043, + "step": 113031 + }, + { + "epoch": 1.36, + "grad_norm": 15.242298045227344, + "learning_rate": 4.917931801942119e-06, + "loss": 1.4191, + "step": 113034 + }, + { + "epoch": 1.36, + "grad_norm": 8.02147812882811, + "learning_rate": 4.917428696006388e-06, + "loss": 1.1242, + "step": 113037 + }, + { + "epoch": 1.36, + "grad_norm": 14.35521488675866, + "learning_rate": 4.916925607415647e-06, + "loss": 1.2468, + "step": 113040 + }, + { + "epoch": 1.36, + "grad_norm": 4.103338731649699, + "learning_rate": 4.9164225361716034e-06, + "loss": 1.0825, + "step": 113043 + }, + { + "epoch": 1.36, + "grad_norm": 12.217804592643116, + "learning_rate": 4.9159194822759785e-06, + "loss": 1.6001, + "step": 113046 + }, + { + "epoch": 1.36, + "grad_norm": 8.9846374914201, + "learning_rate": 4.915416445730489e-06, + "loss": 0.7365, + "step": 113049 + }, + { + "epoch": 1.36, + "grad_norm": 13.858714318581375, + "learning_rate": 4.914913426536852e-06, + "loss": 1.211, + "step": 113052 + }, + { + "epoch": 1.36, + "grad_norm": 8.770148546119188, + "learning_rate": 4.914410424696788e-06, + "loss": 0.876, + "step": 113055 + }, + { + "epoch": 1.36, + "grad_norm": 17.872960620151837, + "learning_rate": 4.913907440212009e-06, + "loss": 1.0189, + "step": 113058 + }, + { + "epoch": 1.36, + "grad_norm": 8.45743946241345, + "learning_rate": 4.913404473084229e-06, + "loss": 1.2183, + "step": 113061 + }, + { + "epoch": 1.36, + "grad_norm": 7.013519303226256, + "learning_rate": 4.912901523315166e-06, + "loss": 1.0173, + "step": 113064 + }, + { + "epoch": 1.36, + "grad_norm": 55.83571861306796, + "learning_rate": 4.912398590906542e-06, + "loss": 0.8291, + "step": 113067 + }, + { + "epoch": 1.36, + "grad_norm": 14.629371151130906, + "learning_rate": 4.911895675860065e-06, + "loss": 1.141, + "step": 113070 + }, + { + "epoch": 1.36, + "grad_norm": 22.074047720433903, + "learning_rate": 4.911392778177456e-06, + "loss": 1.307, + "step": 113073 + }, + { + "epoch": 1.36, + "grad_norm": 11.618157897990477, + "learning_rate": 4.910889897860435e-06, + "loss": 0.9528, + "step": 113076 + }, + { + "epoch": 1.36, + "grad_norm": 30.61326419408397, + "learning_rate": 4.9103870349107084e-06, + "loss": 1.3609, + "step": 113079 + }, + { + "epoch": 1.36, + "grad_norm": 8.049072290293168, + "learning_rate": 4.909884189330002e-06, + "loss": 1.2684, + "step": 113082 + }, + { + "epoch": 1.36, + "grad_norm": 8.118045564330435, + "learning_rate": 4.909381361120023e-06, + "loss": 1.1985, + "step": 113085 + }, + { + "epoch": 1.36, + "grad_norm": 9.079214419793145, + "learning_rate": 4.908878550282493e-06, + "loss": 1.2879, + "step": 113088 + }, + { + "epoch": 1.36, + "grad_norm": 11.84535748204205, + "learning_rate": 4.908375756819126e-06, + "loss": 1.1643, + "step": 113091 + }, + { + "epoch": 1.36, + "grad_norm": 19.40994848348683, + "learning_rate": 4.907872980731642e-06, + "loss": 1.3961, + "step": 113094 + }, + { + "epoch": 1.36, + "grad_norm": 11.398235089272394, + "learning_rate": 4.90737022202175e-06, + "loss": 0.8288, + "step": 113097 + }, + { + "epoch": 1.36, + "grad_norm": 8.537580903839988, + "learning_rate": 4.906867480691173e-06, + "loss": 0.9947, + "step": 113100 + }, + { + "epoch": 1.36, + "grad_norm": 5.438318472771827, + "learning_rate": 4.9063647567416195e-06, + "loss": 1.3067, + "step": 113103 + }, + { + "epoch": 1.36, + "grad_norm": 3.345448500549709, + "learning_rate": 4.9058620501748065e-06, + "loss": 1.2336, + "step": 113106 + }, + { + "epoch": 1.36, + "grad_norm": 3.42991523241425, + "learning_rate": 4.905359360992456e-06, + "loss": 1.3555, + "step": 113109 + }, + { + "epoch": 1.36, + "grad_norm": 9.907950870164917, + "learning_rate": 4.904856689196277e-06, + "loss": 1.0051, + "step": 113112 + }, + { + "epoch": 1.36, + "grad_norm": 4.462148729126442, + "learning_rate": 4.9043540347879845e-06, + "loss": 1.1931, + "step": 113115 + }, + { + "epoch": 1.36, + "grad_norm": 27.13174801925048, + "learning_rate": 4.903851397769301e-06, + "loss": 1.1218, + "step": 113118 + }, + { + "epoch": 1.36, + "grad_norm": 9.196393211095783, + "learning_rate": 4.903348778141937e-06, + "loss": 1.1435, + "step": 113121 + }, + { + "epoch": 1.36, + "grad_norm": 7.672749450683885, + "learning_rate": 4.902846175907604e-06, + "loss": 1.2128, + "step": 113124 + }, + { + "epoch": 1.36, + "grad_norm": 19.39931947908954, + "learning_rate": 4.902343591068021e-06, + "loss": 1.1482, + "step": 113127 + }, + { + "epoch": 1.36, + "grad_norm": 9.333511447350038, + "learning_rate": 4.9018410236249024e-06, + "loss": 1.155, + "step": 113130 + }, + { + "epoch": 1.36, + "grad_norm": 6.583001109455622, + "learning_rate": 4.901338473579965e-06, + "loss": 1.0671, + "step": 113133 + }, + { + "epoch": 1.36, + "grad_norm": 4.350119596612077, + "learning_rate": 4.900835940934927e-06, + "loss": 1.0923, + "step": 113136 + }, + { + "epoch": 1.36, + "grad_norm": 16.842061568101382, + "learning_rate": 4.900333425691495e-06, + "loss": 1.0558, + "step": 113139 + }, + { + "epoch": 1.36, + "grad_norm": 25.79422795428795, + "learning_rate": 4.899830927851391e-06, + "loss": 1.1617, + "step": 113142 + }, + { + "epoch": 1.36, + "grad_norm": 18.305957310216122, + "learning_rate": 4.899328447416324e-06, + "loss": 0.8148, + "step": 113145 + }, + { + "epoch": 1.36, + "grad_norm": 6.983656974238611, + "learning_rate": 4.898825984388016e-06, + "loss": 1.3683, + "step": 113148 + }, + { + "epoch": 1.36, + "grad_norm": 5.67343559636567, + "learning_rate": 4.898323538768172e-06, + "loss": 0.9568, + "step": 113151 + }, + { + "epoch": 1.36, + "grad_norm": 20.69550757322185, + "learning_rate": 4.897821110558514e-06, + "loss": 1.1141, + "step": 113154 + }, + { + "epoch": 1.36, + "grad_norm": 6.328036340538138, + "learning_rate": 4.897318699760755e-06, + "loss": 1.3485, + "step": 113157 + }, + { + "epoch": 1.36, + "grad_norm": 3.918328142741643, + "learning_rate": 4.896816306376612e-06, + "loss": 1.9102, + "step": 113160 + }, + { + "epoch": 1.36, + "grad_norm": 11.824174807272964, + "learning_rate": 4.8963139304077925e-06, + "loss": 1.4914, + "step": 113163 + }, + { + "epoch": 1.36, + "grad_norm": 13.64256994281266, + "learning_rate": 4.8958115718560195e-06, + "loss": 1.2659, + "step": 113166 + }, + { + "epoch": 1.36, + "grad_norm": 7.475167619976177, + "learning_rate": 4.8953092307229996e-06, + "loss": 0.8581, + "step": 113169 + }, + { + "epoch": 1.36, + "grad_norm": 5.0361343795958335, + "learning_rate": 4.8948069070104506e-06, + "loss": 1.1877, + "step": 113172 + }, + { + "epoch": 1.36, + "grad_norm": 17.951178476734622, + "learning_rate": 4.89430460072009e-06, + "loss": 1.3004, + "step": 113175 + }, + { + "epoch": 1.36, + "grad_norm": 19.52856782729008, + "learning_rate": 4.893802311853626e-06, + "loss": 1.4084, + "step": 113178 + }, + { + "epoch": 1.36, + "grad_norm": 6.399638310501003, + "learning_rate": 4.893300040412774e-06, + "loss": 1.2897, + "step": 113181 + }, + { + "epoch": 1.36, + "grad_norm": 14.858649347475106, + "learning_rate": 4.892797786399255e-06, + "loss": 1.2891, + "step": 113184 + }, + { + "epoch": 1.36, + "grad_norm": 4.523557033636128, + "learning_rate": 4.892295549814772e-06, + "loss": 1.4021, + "step": 113187 + }, + { + "epoch": 1.36, + "grad_norm": 14.789598145787586, + "learning_rate": 4.8917933306610496e-06, + "loss": 1.5343, + "step": 113190 + }, + { + "epoch": 1.36, + "grad_norm": 9.86174635162079, + "learning_rate": 4.8912911289397925e-06, + "loss": 0.8292, + "step": 113193 + }, + { + "epoch": 1.36, + "grad_norm": 4.989000021636479, + "learning_rate": 4.890788944652718e-06, + "loss": 1.2548, + "step": 113196 + }, + { + "epoch": 1.36, + "grad_norm": 6.508055093411643, + "learning_rate": 4.8902867778015416e-06, + "loss": 0.952, + "step": 113199 + }, + { + "epoch": 1.36, + "grad_norm": 7.04639266649238, + "learning_rate": 4.889784628387979e-06, + "loss": 1.093, + "step": 113202 + }, + { + "epoch": 1.36, + "grad_norm": 9.929192749491007, + "learning_rate": 4.889282496413737e-06, + "loss": 1.3296, + "step": 113205 + }, + { + "epoch": 1.36, + "grad_norm": 4.346545587926601, + "learning_rate": 4.888780381880537e-06, + "loss": 1.24, + "step": 113208 + }, + { + "epoch": 1.36, + "grad_norm": 10.5657544950626, + "learning_rate": 4.888278284790084e-06, + "loss": 1.1363, + "step": 113211 + }, + { + "epoch": 1.36, + "grad_norm": 9.125180440197498, + "learning_rate": 4.887776205144097e-06, + "loss": 1.518, + "step": 113214 + }, + { + "epoch": 1.36, + "grad_norm": 3.442211389734092, + "learning_rate": 4.887274142944291e-06, + "loss": 0.8637, + "step": 113217 + }, + { + "epoch": 1.36, + "grad_norm": 8.465231878571162, + "learning_rate": 4.886772098192374e-06, + "loss": 1.5458, + "step": 113220 + }, + { + "epoch": 1.36, + "grad_norm": 11.514420082272412, + "learning_rate": 4.886270070890062e-06, + "loss": 1.1845, + "step": 113223 + }, + { + "epoch": 1.36, + "grad_norm": 5.9033856800864095, + "learning_rate": 4.885768061039072e-06, + "loss": 1.4571, + "step": 113226 + }, + { + "epoch": 1.36, + "grad_norm": 4.646965216744728, + "learning_rate": 4.885266068641114e-06, + "loss": 1.3996, + "step": 113229 + }, + { + "epoch": 1.36, + "grad_norm": 7.561166409503368, + "learning_rate": 4.884764093697895e-06, + "loss": 1.1433, + "step": 113232 + }, + { + "epoch": 1.36, + "grad_norm": 17.54293431394349, + "learning_rate": 4.8842621362111355e-06, + "loss": 1.1006, + "step": 113235 + }, + { + "epoch": 1.36, + "grad_norm": 15.143576270294313, + "learning_rate": 4.8837601961825455e-06, + "loss": 1.1219, + "step": 113238 + }, + { + "epoch": 1.36, + "grad_norm": 11.538729231503545, + "learning_rate": 4.883258273613839e-06, + "loss": 0.9626, + "step": 113241 + }, + { + "epoch": 1.36, + "grad_norm": 6.234522305358057, + "learning_rate": 4.882756368506733e-06, + "loss": 1.2648, + "step": 113244 + }, + { + "epoch": 1.36, + "grad_norm": 5.3047438280970844, + "learning_rate": 4.882254480862933e-06, + "loss": 1.3236, + "step": 113247 + }, + { + "epoch": 1.36, + "grad_norm": 9.609834943385296, + "learning_rate": 4.88175261068416e-06, + "loss": 1.3029, + "step": 113250 + }, + { + "epoch": 1.36, + "grad_norm": 14.979737602087987, + "learning_rate": 4.881250757972116e-06, + "loss": 1.079, + "step": 113253 + }, + { + "epoch": 1.36, + "grad_norm": 38.132649951335004, + "learning_rate": 4.880748922728523e-06, + "loss": 1.3803, + "step": 113256 + }, + { + "epoch": 1.36, + "grad_norm": 3.8056066764159984, + "learning_rate": 4.880247104955087e-06, + "loss": 1.0244, + "step": 113259 + }, + { + "epoch": 1.36, + "grad_norm": 10.395687029038308, + "learning_rate": 4.879745304653524e-06, + "loss": 1.0282, + "step": 113262 + }, + { + "epoch": 1.36, + "grad_norm": 13.539848422802688, + "learning_rate": 4.879243521825545e-06, + "loss": 1.0865, + "step": 113265 + }, + { + "epoch": 1.36, + "grad_norm": 6.6844953860878205, + "learning_rate": 4.878741756472868e-06, + "loss": 1.0107, + "step": 113268 + }, + { + "epoch": 1.36, + "grad_norm": 3.7354194016369533, + "learning_rate": 4.8782400085972e-06, + "loss": 1.4265, + "step": 113271 + }, + { + "epoch": 1.36, + "grad_norm": 10.530262480990961, + "learning_rate": 4.8777382782002505e-06, + "loss": 1.4358, + "step": 113274 + }, + { + "epoch": 1.36, + "grad_norm": 13.335265308087042, + "learning_rate": 4.877236565283735e-06, + "loss": 1.1374, + "step": 113277 + }, + { + "epoch": 1.36, + "grad_norm": 13.216898209258492, + "learning_rate": 4.876734869849367e-06, + "loss": 1.3532, + "step": 113280 + }, + { + "epoch": 1.36, + "grad_norm": 24.801184438635467, + "learning_rate": 4.876233191898859e-06, + "loss": 1.2025, + "step": 113283 + }, + { + "epoch": 1.36, + "grad_norm": 7.371159261911814, + "learning_rate": 4.8757315314339205e-06, + "loss": 1.0075, + "step": 113286 + }, + { + "epoch": 1.36, + "grad_norm": 5.805153005961684, + "learning_rate": 4.875229888456262e-06, + "loss": 0.8859, + "step": 113289 + }, + { + "epoch": 1.36, + "grad_norm": 83.3137450703746, + "learning_rate": 4.874728262967604e-06, + "loss": 1.5137, + "step": 113292 + }, + { + "epoch": 1.36, + "grad_norm": 9.444011378165511, + "learning_rate": 4.874226654969647e-06, + "loss": 1.4379, + "step": 113295 + }, + { + "epoch": 1.36, + "grad_norm": 3.4295469756924377, + "learning_rate": 4.873725064464112e-06, + "loss": 1.1207, + "step": 113298 + }, + { + "epoch": 1.36, + "grad_norm": 5.185881826241835, + "learning_rate": 4.873223491452704e-06, + "loss": 1.2391, + "step": 113301 + }, + { + "epoch": 1.36, + "grad_norm": 7.359942309909615, + "learning_rate": 4.872721935937137e-06, + "loss": 1.2524, + "step": 113304 + }, + { + "epoch": 1.36, + "grad_norm": 12.203660119028042, + "learning_rate": 4.872220397919123e-06, + "loss": 1.2837, + "step": 113307 + }, + { + "epoch": 1.36, + "grad_norm": 20.985458077498265, + "learning_rate": 4.871718877400377e-06, + "loss": 1.232, + "step": 113310 + }, + { + "epoch": 1.36, + "grad_norm": 43.16401698872067, + "learning_rate": 4.871217374382609e-06, + "loss": 1.5217, + "step": 113313 + }, + { + "epoch": 1.36, + "grad_norm": 12.271677275706004, + "learning_rate": 4.870715888867524e-06, + "loss": 1.2115, + "step": 113316 + }, + { + "epoch": 1.36, + "grad_norm": 5.267602641336093, + "learning_rate": 4.870214420856838e-06, + "loss": 1.437, + "step": 113319 + }, + { + "epoch": 1.36, + "grad_norm": 5.36382780481294, + "learning_rate": 4.869712970352263e-06, + "loss": 1.1757, + "step": 113322 + }, + { + "epoch": 1.36, + "grad_norm": 2.5278447184026014, + "learning_rate": 4.869211537355513e-06, + "loss": 1.3778, + "step": 113325 + }, + { + "epoch": 1.36, + "grad_norm": 23.41445612155565, + "learning_rate": 4.868710121868292e-06, + "loss": 1.1335, + "step": 113328 + }, + { + "epoch": 1.36, + "grad_norm": 2.67036408087404, + "learning_rate": 4.868208723892315e-06, + "loss": 0.9643, + "step": 113331 + }, + { + "epoch": 1.36, + "grad_norm": 4.847186264172118, + "learning_rate": 4.8677073434292975e-06, + "loss": 1.0378, + "step": 113334 + }, + { + "epoch": 1.36, + "grad_norm": 7.194716268387238, + "learning_rate": 4.867205980480946e-06, + "loss": 1.1107, + "step": 113337 + }, + { + "epoch": 1.36, + "grad_norm": 5.972143456625661, + "learning_rate": 4.866704635048968e-06, + "loss": 1.1087, + "step": 113340 + }, + { + "epoch": 1.36, + "grad_norm": 93.13374005093436, + "learning_rate": 4.8662033071350775e-06, + "loss": 0.9507, + "step": 113343 + }, + { + "epoch": 1.36, + "grad_norm": 9.68841910830801, + "learning_rate": 4.865701996740986e-06, + "loss": 1.3856, + "step": 113346 + }, + { + "epoch": 1.36, + "grad_norm": 7.431399234149506, + "learning_rate": 4.8652007038684046e-06, + "loss": 0.8763, + "step": 113349 + }, + { + "epoch": 1.36, + "grad_norm": 12.292877048712715, + "learning_rate": 4.864699428519048e-06, + "loss": 1.113, + "step": 113352 + }, + { + "epoch": 1.36, + "grad_norm": 9.650290490337088, + "learning_rate": 4.864198170694622e-06, + "loss": 0.9731, + "step": 113355 + }, + { + "epoch": 1.36, + "grad_norm": 5.637858159085114, + "learning_rate": 4.863696930396833e-06, + "loss": 0.8751, + "step": 113358 + }, + { + "epoch": 1.36, + "grad_norm": 22.642121069394406, + "learning_rate": 4.863195707627397e-06, + "loss": 1.4266, + "step": 113361 + }, + { + "epoch": 1.36, + "grad_norm": 6.493819463806007, + "learning_rate": 4.862694502388028e-06, + "loss": 0.9866, + "step": 113364 + }, + { + "epoch": 1.36, + "grad_norm": 13.540493982294239, + "learning_rate": 4.862193314680428e-06, + "loss": 1.1028, + "step": 113367 + }, + { + "epoch": 1.36, + "grad_norm": 5.851664114419512, + "learning_rate": 4.861692144506311e-06, + "loss": 0.8266, + "step": 113370 + }, + { + "epoch": 1.36, + "grad_norm": 12.711804499817834, + "learning_rate": 4.861190991867389e-06, + "loss": 0.898, + "step": 113373 + }, + { + "epoch": 1.36, + "grad_norm": 13.978261936950611, + "learning_rate": 4.860689856765375e-06, + "loss": 0.782, + "step": 113376 + }, + { + "epoch": 1.36, + "grad_norm": 9.405584464119853, + "learning_rate": 4.860188739201975e-06, + "loss": 1.3738, + "step": 113379 + }, + { + "epoch": 1.36, + "grad_norm": 19.607841467600608, + "learning_rate": 4.859687639178895e-06, + "loss": 1.0029, + "step": 113382 + }, + { + "epoch": 1.36, + "grad_norm": 12.288627875730489, + "learning_rate": 4.85918655669785e-06, + "loss": 1.4136, + "step": 113385 + }, + { + "epoch": 1.36, + "grad_norm": 20.217346594455886, + "learning_rate": 4.85868549176055e-06, + "loss": 1.262, + "step": 113388 + }, + { + "epoch": 1.36, + "grad_norm": 5.1569301943670265, + "learning_rate": 4.858184444368709e-06, + "loss": 1.4101, + "step": 113391 + }, + { + "epoch": 1.36, + "grad_norm": 19.52750259870871, + "learning_rate": 4.857683414524027e-06, + "loss": 1.0203, + "step": 113394 + }, + { + "epoch": 1.36, + "grad_norm": 11.929844945916392, + "learning_rate": 4.857182402228223e-06, + "loss": 1.1638, + "step": 113397 + }, + { + "epoch": 1.36, + "grad_norm": 3.8809458633866973, + "learning_rate": 4.856681407483001e-06, + "loss": 0.9945, + "step": 113400 + }, + { + "epoch": 1.36, + "grad_norm": 10.853640756351817, + "learning_rate": 4.85618043029007e-06, + "loss": 1.2173, + "step": 113403 + }, + { + "epoch": 1.36, + "grad_norm": 28.468216920501767, + "learning_rate": 4.855679470651149e-06, + "loss": 1.1277, + "step": 113406 + }, + { + "epoch": 1.36, + "grad_norm": 7.357542651776915, + "learning_rate": 4.855178528567936e-06, + "loss": 1.6812, + "step": 113409 + }, + { + "epoch": 1.36, + "grad_norm": 12.330498588852272, + "learning_rate": 4.854677604042145e-06, + "loss": 1.0662, + "step": 113412 + }, + { + "epoch": 1.36, + "grad_norm": 7.304918757353396, + "learning_rate": 4.854176697075486e-06, + "loss": 0.8192, + "step": 113415 + }, + { + "epoch": 1.36, + "grad_norm": 22.158060364315123, + "learning_rate": 4.853675807669674e-06, + "loss": 1.5643, + "step": 113418 + }, + { + "epoch": 1.36, + "grad_norm": 6.565016766569385, + "learning_rate": 4.853174935826412e-06, + "loss": 0.9726, + "step": 113421 + }, + { + "epoch": 1.36, + "grad_norm": 8.455891123275165, + "learning_rate": 4.852674081547405e-06, + "loss": 0.8966, + "step": 113424 + }, + { + "epoch": 1.36, + "grad_norm": 84.61377793912135, + "learning_rate": 4.852173244834369e-06, + "loss": 0.9273, + "step": 113427 + }, + { + "epoch": 1.36, + "grad_norm": 7.403468803044069, + "learning_rate": 4.85167242568901e-06, + "loss": 1.3156, + "step": 113430 + }, + { + "epoch": 1.36, + "grad_norm": 24.818521260891426, + "learning_rate": 4.851171624113043e-06, + "loss": 1.4309, + "step": 113433 + }, + { + "epoch": 1.36, + "grad_norm": 10.621317627596811, + "learning_rate": 4.85067084010817e-06, + "loss": 1.1523, + "step": 113436 + }, + { + "epoch": 1.36, + "grad_norm": 4.066927812224176, + "learning_rate": 4.850170073676103e-06, + "loss": 0.9765, + "step": 113439 + }, + { + "epoch": 1.36, + "grad_norm": 34.15772812104488, + "learning_rate": 4.84966932481855e-06, + "loss": 0.9836, + "step": 113442 + }, + { + "epoch": 1.36, + "grad_norm": 11.231132259652405, + "learning_rate": 4.849168593537224e-06, + "loss": 1.0444, + "step": 113445 + }, + { + "epoch": 1.36, + "grad_norm": 4.6696900788547095, + "learning_rate": 4.848667879833825e-06, + "loss": 1.4234, + "step": 113448 + }, + { + "epoch": 1.36, + "grad_norm": 11.733582333048878, + "learning_rate": 4.848167183710068e-06, + "loss": 1.1203, + "step": 113451 + }, + { + "epoch": 1.36, + "grad_norm": 7.942895969227992, + "learning_rate": 4.847666505167661e-06, + "loss": 1.2747, + "step": 113454 + }, + { + "epoch": 1.36, + "grad_norm": 13.367992535274619, + "learning_rate": 4.847165844208312e-06, + "loss": 1.1619, + "step": 113457 + }, + { + "epoch": 1.36, + "grad_norm": 25.948844634986415, + "learning_rate": 4.846665200833733e-06, + "loss": 1.0462, + "step": 113460 + }, + { + "epoch": 1.36, + "grad_norm": 5.722954139138195, + "learning_rate": 4.84616457504563e-06, + "loss": 0.9646, + "step": 113463 + }, + { + "epoch": 1.36, + "grad_norm": 8.659046387039485, + "learning_rate": 4.845663966845707e-06, + "loss": 0.9316, + "step": 113466 + }, + { + "epoch": 1.36, + "grad_norm": 8.195118645084749, + "learning_rate": 4.845163376235677e-06, + "loss": 1.0701, + "step": 113469 + }, + { + "epoch": 1.36, + "grad_norm": 9.482304700431966, + "learning_rate": 4.844662803217251e-06, + "loss": 1.166, + "step": 113472 + }, + { + "epoch": 1.36, + "grad_norm": 7.862576453577314, + "learning_rate": 4.84416224779213e-06, + "loss": 1.5605, + "step": 113475 + }, + { + "epoch": 1.36, + "grad_norm": 8.591039347522875, + "learning_rate": 4.843661709962025e-06, + "loss": 1.2427, + "step": 113478 + }, + { + "epoch": 1.36, + "grad_norm": 8.88202397641027, + "learning_rate": 4.843161189728651e-06, + "loss": 1.1465, + "step": 113481 + }, + { + "epoch": 1.36, + "grad_norm": 4.326198372416181, + "learning_rate": 4.842660687093705e-06, + "loss": 1.0094, + "step": 113484 + }, + { + "epoch": 1.36, + "grad_norm": 5.225798549261039, + "learning_rate": 4.8421602020589055e-06, + "loss": 1.0601, + "step": 113487 + }, + { + "epoch": 1.36, + "grad_norm": 8.047851536620497, + "learning_rate": 4.841659734625952e-06, + "loss": 1.3358, + "step": 113490 + }, + { + "epoch": 1.36, + "grad_norm": 5.599965397435966, + "learning_rate": 4.841159284796554e-06, + "loss": 1.1434, + "step": 113493 + }, + { + "epoch": 1.36, + "grad_norm": 7.634948087915998, + "learning_rate": 4.840658852572422e-06, + "loss": 1.3143, + "step": 113496 + }, + { + "epoch": 1.36, + "grad_norm": 10.742541373527917, + "learning_rate": 4.8401584379552666e-06, + "loss": 0.9969, + "step": 113499 + }, + { + "epoch": 1.36, + "grad_norm": 8.378592548874428, + "learning_rate": 4.839658040946789e-06, + "loss": 1.0235, + "step": 113502 + }, + { + "epoch": 1.36, + "grad_norm": 3.4038404332415135, + "learning_rate": 4.839157661548703e-06, + "loss": 1.2455, + "step": 113505 + }, + { + "epoch": 1.36, + "grad_norm": 16.36732425423512, + "learning_rate": 4.838657299762709e-06, + "loss": 0.8723, + "step": 113508 + }, + { + "epoch": 1.36, + "grad_norm": 7.673533709952139, + "learning_rate": 4.838156955590519e-06, + "loss": 1.078, + "step": 113511 + }, + { + "epoch": 1.36, + "grad_norm": 3.713567355502054, + "learning_rate": 4.837656629033842e-06, + "loss": 1.1978, + "step": 113514 + }, + { + "epoch": 1.37, + "grad_norm": 20.566152609293162, + "learning_rate": 4.8371563200943815e-06, + "loss": 1.236, + "step": 113517 + }, + { + "epoch": 1.37, + "grad_norm": 13.455698192613433, + "learning_rate": 4.836656028773846e-06, + "loss": 1.2128, + "step": 113520 + }, + { + "epoch": 1.37, + "grad_norm": 7.535410045744264, + "learning_rate": 4.836155755073944e-06, + "loss": 1.061, + "step": 113523 + }, + { + "epoch": 1.37, + "grad_norm": 7.202496094890047, + "learning_rate": 4.835655498996386e-06, + "loss": 0.8835, + "step": 113526 + }, + { + "epoch": 1.37, + "grad_norm": 6.951425197495433, + "learning_rate": 4.835155260542877e-06, + "loss": 1.2704, + "step": 113529 + }, + { + "epoch": 1.37, + "grad_norm": 14.79011674296764, + "learning_rate": 4.8346550397151174e-06, + "loss": 1.1957, + "step": 113532 + }, + { + "epoch": 1.37, + "grad_norm": 10.854088091393516, + "learning_rate": 4.834154836514819e-06, + "loss": 1.313, + "step": 113535 + }, + { + "epoch": 1.37, + "grad_norm": 35.262913341548035, + "learning_rate": 4.833654650943691e-06, + "loss": 1.0472, + "step": 113538 + }, + { + "epoch": 1.37, + "grad_norm": 14.960418869029635, + "learning_rate": 4.833154483003441e-06, + "loss": 1.4679, + "step": 113541 + }, + { + "epoch": 1.37, + "grad_norm": 7.153138699040866, + "learning_rate": 4.832654332695772e-06, + "loss": 0.9861, + "step": 113544 + }, + { + "epoch": 1.37, + "grad_norm": 16.57091391907743, + "learning_rate": 4.832154200022394e-06, + "loss": 0.9615, + "step": 113547 + }, + { + "epoch": 1.37, + "grad_norm": 9.672714999361007, + "learning_rate": 4.83165408498501e-06, + "loss": 1.2082, + "step": 113550 + }, + { + "epoch": 1.37, + "grad_norm": 9.905527362067417, + "learning_rate": 4.831153987585332e-06, + "loss": 1.0752, + "step": 113553 + }, + { + "epoch": 1.37, + "grad_norm": 21.077194858173545, + "learning_rate": 4.83065390782506e-06, + "loss": 1.0763, + "step": 113556 + }, + { + "epoch": 1.37, + "grad_norm": 6.5200898649417995, + "learning_rate": 4.830153845705905e-06, + "loss": 0.9747, + "step": 113559 + }, + { + "epoch": 1.37, + "grad_norm": 7.43890604273843, + "learning_rate": 4.829653801229572e-06, + "loss": 1.2672, + "step": 113562 + }, + { + "epoch": 1.37, + "grad_norm": 7.8755647685277745, + "learning_rate": 4.829153774397768e-06, + "loss": 0.9082, + "step": 113565 + }, + { + "epoch": 1.37, + "grad_norm": 6.613815551841793, + "learning_rate": 4.8286537652122045e-06, + "loss": 1.3966, + "step": 113568 + }, + { + "epoch": 1.37, + "grad_norm": 6.291312422979984, + "learning_rate": 4.828153773674582e-06, + "loss": 1.1305, + "step": 113571 + }, + { + "epoch": 1.37, + "grad_norm": 12.564802821382523, + "learning_rate": 4.827653799786603e-06, + "loss": 0.7366, + "step": 113574 + }, + { + "epoch": 1.37, + "grad_norm": 6.998456077694646, + "learning_rate": 4.827153843549981e-06, + "loss": 1.5553, + "step": 113577 + }, + { + "epoch": 1.37, + "grad_norm": 8.259612048957777, + "learning_rate": 4.826653904966422e-06, + "loss": 1.6134, + "step": 113580 + }, + { + "epoch": 1.37, + "grad_norm": 2.9861618238255105, + "learning_rate": 4.826153984037626e-06, + "loss": 1.2957, + "step": 113583 + }, + { + "epoch": 1.37, + "grad_norm": 11.326702240078149, + "learning_rate": 4.825654080765303e-06, + "loss": 1.2429, + "step": 113586 + }, + { + "epoch": 1.37, + "grad_norm": 46.41709165258819, + "learning_rate": 4.825154195151162e-06, + "loss": 0.9047, + "step": 113589 + }, + { + "epoch": 1.37, + "grad_norm": 13.37687416143955, + "learning_rate": 4.8246543271969036e-06, + "loss": 1.2113, + "step": 113592 + }, + { + "epoch": 1.37, + "grad_norm": 10.478159339237573, + "learning_rate": 4.824154476904239e-06, + "loss": 1.0553, + "step": 113595 + }, + { + "epoch": 1.37, + "grad_norm": 11.019349699600514, + "learning_rate": 4.823654644274867e-06, + "loss": 1.1605, + "step": 113598 + }, + { + "epoch": 1.37, + "grad_norm": 6.10406348215645, + "learning_rate": 4.823154829310497e-06, + "loss": 1.1602, + "step": 113601 + }, + { + "epoch": 1.37, + "grad_norm": 14.586244405934846, + "learning_rate": 4.822655032012834e-06, + "loss": 1.1471, + "step": 113604 + }, + { + "epoch": 1.37, + "grad_norm": 9.07509190720763, + "learning_rate": 4.822155252383589e-06, + "loss": 1.1219, + "step": 113607 + }, + { + "epoch": 1.37, + "grad_norm": 10.386809764963333, + "learning_rate": 4.821655490424459e-06, + "loss": 1.2306, + "step": 113610 + }, + { + "epoch": 1.37, + "grad_norm": 4.455130743013195, + "learning_rate": 4.821155746137158e-06, + "loss": 1.1769, + "step": 113613 + }, + { + "epoch": 1.37, + "grad_norm": 8.151577627048017, + "learning_rate": 4.8206560195233834e-06, + "loss": 1.2142, + "step": 113616 + }, + { + "epoch": 1.37, + "grad_norm": 8.125776663737929, + "learning_rate": 4.820156310584843e-06, + "loss": 0.6949, + "step": 113619 + }, + { + "epoch": 1.37, + "grad_norm": 3.065884766106849, + "learning_rate": 4.819656619323248e-06, + "loss": 1.1737, + "step": 113622 + }, + { + "epoch": 1.37, + "grad_norm": 11.25667558803416, + "learning_rate": 4.819156945740295e-06, + "loss": 1.2015, + "step": 113625 + }, + { + "epoch": 1.37, + "grad_norm": 7.9361047209587525, + "learning_rate": 4.818657289837694e-06, + "loss": 1.3425, + "step": 113628 + }, + { + "epoch": 1.37, + "grad_norm": 15.402552510473464, + "learning_rate": 4.818157651617152e-06, + "loss": 1.3462, + "step": 113631 + }, + { + "epoch": 1.37, + "grad_norm": 3.0243897751271094, + "learning_rate": 4.817658031080372e-06, + "loss": 1.1688, + "step": 113634 + }, + { + "epoch": 1.37, + "grad_norm": 8.987604935274016, + "learning_rate": 4.817158428229055e-06, + "loss": 1.427, + "step": 113637 + }, + { + "epoch": 1.37, + "grad_norm": 9.267635624632872, + "learning_rate": 4.81665884306491e-06, + "loss": 1.1162, + "step": 113640 + }, + { + "epoch": 1.37, + "grad_norm": 15.484824772326807, + "learning_rate": 4.8161592755896395e-06, + "loss": 1.219, + "step": 113643 + }, + { + "epoch": 1.37, + "grad_norm": 8.181200444968969, + "learning_rate": 4.8156597258049505e-06, + "loss": 1.2748, + "step": 113646 + }, + { + "epoch": 1.37, + "grad_norm": 10.273390485651714, + "learning_rate": 4.815160193712553e-06, + "loss": 1.1524, + "step": 113649 + }, + { + "epoch": 1.37, + "grad_norm": 137.1170270270728, + "learning_rate": 4.81466067931414e-06, + "loss": 1.0311, + "step": 113652 + }, + { + "epoch": 1.37, + "grad_norm": 13.716285394376312, + "learning_rate": 4.814161182611428e-06, + "loss": 1.1954, + "step": 113655 + }, + { + "epoch": 1.37, + "grad_norm": 12.39271285819301, + "learning_rate": 4.813661703606112e-06, + "loss": 1.0921, + "step": 113658 + }, + { + "epoch": 1.37, + "grad_norm": 2.9307868786600295, + "learning_rate": 4.813162242299904e-06, + "loss": 1.0325, + "step": 113661 + }, + { + "epoch": 1.37, + "grad_norm": 4.900347780476516, + "learning_rate": 4.8126627986945006e-06, + "loss": 1.2939, + "step": 113664 + }, + { + "epoch": 1.37, + "grad_norm": 4.337720279108778, + "learning_rate": 4.812163372791611e-06, + "loss": 1.3481, + "step": 113667 + }, + { + "epoch": 1.37, + "grad_norm": 26.931595519192733, + "learning_rate": 4.8116639645929385e-06, + "loss": 1.4259, + "step": 113670 + }, + { + "epoch": 1.37, + "grad_norm": 8.496744749917925, + "learning_rate": 4.811164574100193e-06, + "loss": 1.2951, + "step": 113673 + }, + { + "epoch": 1.37, + "grad_norm": 2.3205954402351554, + "learning_rate": 4.810665201315073e-06, + "loss": 1.1562, + "step": 113676 + }, + { + "epoch": 1.37, + "grad_norm": 4.503071969224452, + "learning_rate": 4.81016584623928e-06, + "loss": 1.2143, + "step": 113679 + }, + { + "epoch": 1.37, + "grad_norm": 11.781796615369258, + "learning_rate": 4.809666508874522e-06, + "loss": 0.9933, + "step": 113682 + }, + { + "epoch": 1.37, + "grad_norm": 5.937570986743939, + "learning_rate": 4.809167189222502e-06, + "loss": 0.9252, + "step": 113685 + }, + { + "epoch": 1.37, + "grad_norm": 9.59230160494272, + "learning_rate": 4.808667887284929e-06, + "loss": 1.5317, + "step": 113688 + }, + { + "epoch": 1.37, + "grad_norm": 23.27148949789811, + "learning_rate": 4.8081686030634975e-06, + "loss": 0.9424, + "step": 113691 + }, + { + "epoch": 1.37, + "grad_norm": 4.361104107886062, + "learning_rate": 4.807669336559918e-06, + "loss": 1.1345, + "step": 113694 + }, + { + "epoch": 1.37, + "grad_norm": 4.782059486524593, + "learning_rate": 4.8071700877758965e-06, + "loss": 0.9342, + "step": 113697 + }, + { + "epoch": 1.37, + "grad_norm": 22.382827324745097, + "learning_rate": 4.806670856713128e-06, + "loss": 1.0901, + "step": 113700 + }, + { + "epoch": 1.37, + "grad_norm": 11.987150903675118, + "learning_rate": 4.806171643373326e-06, + "loss": 1.2637, + "step": 113703 + }, + { + "epoch": 1.37, + "grad_norm": 157.50546338707193, + "learning_rate": 4.805672447758186e-06, + "loss": 1.223, + "step": 113706 + }, + { + "epoch": 1.37, + "grad_norm": 3.2168320595922397, + "learning_rate": 4.805173269869415e-06, + "loss": 1.0691, + "step": 113709 + }, + { + "epoch": 1.37, + "grad_norm": 9.394924055699699, + "learning_rate": 4.804674109708715e-06, + "loss": 1.4976, + "step": 113712 + }, + { + "epoch": 1.37, + "grad_norm": 3.1981283694126086, + "learning_rate": 4.804174967277796e-06, + "loss": 1.4021, + "step": 113715 + }, + { + "epoch": 1.37, + "grad_norm": 19.850702566006472, + "learning_rate": 4.803675842578355e-06, + "loss": 1.2886, + "step": 113718 + }, + { + "epoch": 1.37, + "grad_norm": 7.542043122873211, + "learning_rate": 4.803176735612094e-06, + "loss": 0.8783, + "step": 113721 + }, + { + "epoch": 1.37, + "grad_norm": 17.102267076470223, + "learning_rate": 4.802677646380719e-06, + "loss": 1.3051, + "step": 113724 + }, + { + "epoch": 1.37, + "grad_norm": 6.268433663098694, + "learning_rate": 4.802178574885932e-06, + "loss": 1.056, + "step": 113727 + }, + { + "epoch": 1.37, + "grad_norm": 7.510166647020826, + "learning_rate": 4.801679521129442e-06, + "loss": 1.0665, + "step": 113730 + }, + { + "epoch": 1.37, + "grad_norm": 20.013804005285287, + "learning_rate": 4.801180485112943e-06, + "loss": 1.2686, + "step": 113733 + }, + { + "epoch": 1.37, + "grad_norm": 6.629684068257282, + "learning_rate": 4.800681466838143e-06, + "loss": 1.1631, + "step": 113736 + }, + { + "epoch": 1.37, + "grad_norm": 32.24192539014388, + "learning_rate": 4.800182466306748e-06, + "loss": 1.1537, + "step": 113739 + }, + { + "epoch": 1.37, + "grad_norm": 11.769582307057203, + "learning_rate": 4.799683483520457e-06, + "loss": 1.2279, + "step": 113742 + }, + { + "epoch": 1.37, + "grad_norm": 7.0043813297944615, + "learning_rate": 4.7991845184809695e-06, + "loss": 1.5467, + "step": 113745 + }, + { + "epoch": 1.37, + "grad_norm": 5.99790468491356, + "learning_rate": 4.798685571189993e-06, + "loss": 1.4266, + "step": 113748 + }, + { + "epoch": 1.37, + "grad_norm": 18.26756898006013, + "learning_rate": 4.798186641649227e-06, + "loss": 1.0217, + "step": 113751 + }, + { + "epoch": 1.37, + "grad_norm": 6.300153357470957, + "learning_rate": 4.797687729860378e-06, + "loss": 0.8958, + "step": 113754 + }, + { + "epoch": 1.37, + "grad_norm": 10.137856613328948, + "learning_rate": 4.79718883582515e-06, + "loss": 1.1858, + "step": 113757 + }, + { + "epoch": 1.37, + "grad_norm": 12.373599731590135, + "learning_rate": 4.796689959545238e-06, + "loss": 1.4897, + "step": 113760 + }, + { + "epoch": 1.37, + "grad_norm": 10.378771127897823, + "learning_rate": 4.796191101022354e-06, + "loss": 1.1936, + "step": 113763 + }, + { + "epoch": 1.37, + "grad_norm": 27.937889859396147, + "learning_rate": 4.795692260258191e-06, + "loss": 1.3699, + "step": 113766 + }, + { + "epoch": 1.37, + "grad_norm": 6.959144609581961, + "learning_rate": 4.79519343725446e-06, + "loss": 1.0722, + "step": 113769 + }, + { + "epoch": 1.37, + "grad_norm": 9.286747759805472, + "learning_rate": 4.794694632012854e-06, + "loss": 1.3733, + "step": 113772 + }, + { + "epoch": 1.37, + "grad_norm": 14.264548656575883, + "learning_rate": 4.794195844535081e-06, + "loss": 1.1802, + "step": 113775 + }, + { + "epoch": 1.37, + "grad_norm": 8.631606436870733, + "learning_rate": 4.793697074822843e-06, + "loss": 1.2247, + "step": 113778 + }, + { + "epoch": 1.37, + "grad_norm": 9.029065133055193, + "learning_rate": 4.793198322877845e-06, + "loss": 1.0251, + "step": 113781 + }, + { + "epoch": 1.37, + "grad_norm": 22.215560131957954, + "learning_rate": 4.792699588701786e-06, + "loss": 1.3934, + "step": 113784 + }, + { + "epoch": 1.37, + "grad_norm": 10.908806228350148, + "learning_rate": 4.7922008722963635e-06, + "loss": 1.1148, + "step": 113787 + }, + { + "epoch": 1.37, + "grad_norm": 12.410197340685906, + "learning_rate": 4.791702173663283e-06, + "loss": 1.1112, + "step": 113790 + }, + { + "epoch": 1.37, + "grad_norm": 7.57762422044946, + "learning_rate": 4.791203492804247e-06, + "loss": 0.9493, + "step": 113793 + }, + { + "epoch": 1.37, + "grad_norm": 22.483603422357536, + "learning_rate": 4.7907048297209624e-06, + "loss": 1.6003, + "step": 113796 + }, + { + "epoch": 1.37, + "grad_norm": 7.454753050685102, + "learning_rate": 4.790206184415121e-06, + "loss": 1.0589, + "step": 113799 + }, + { + "epoch": 1.37, + "grad_norm": 9.657389841086907, + "learning_rate": 4.78970755688843e-06, + "loss": 1.2146, + "step": 113802 + }, + { + "epoch": 1.37, + "grad_norm": 6.8737246748541265, + "learning_rate": 4.789208947142593e-06, + "loss": 0.8339, + "step": 113805 + }, + { + "epoch": 1.37, + "grad_norm": 7.463066741100885, + "learning_rate": 4.788710355179306e-06, + "loss": 1.1032, + "step": 113808 + }, + { + "epoch": 1.37, + "grad_norm": 12.1714595471646, + "learning_rate": 4.7882117810002774e-06, + "loss": 1.4609, + "step": 113811 + }, + { + "epoch": 1.37, + "grad_norm": 26.32310562301878, + "learning_rate": 4.787713224607201e-06, + "loss": 1.5711, + "step": 113814 + }, + { + "epoch": 1.37, + "grad_norm": 48.925216931561685, + "learning_rate": 4.787214686001781e-06, + "loss": 1.2957, + "step": 113817 + }, + { + "epoch": 1.37, + "grad_norm": 6.643307762970957, + "learning_rate": 4.786716165185721e-06, + "loss": 1.4071, + "step": 113820 + }, + { + "epoch": 1.37, + "grad_norm": 13.063306924535075, + "learning_rate": 4.786217662160724e-06, + "loss": 1.1508, + "step": 113823 + }, + { + "epoch": 1.37, + "grad_norm": 6.529707598090925, + "learning_rate": 4.785719176928487e-06, + "loss": 1.258, + "step": 113826 + }, + { + "epoch": 1.37, + "grad_norm": 23.93574731516621, + "learning_rate": 4.78522070949071e-06, + "loss": 1.4036, + "step": 113829 + }, + { + "epoch": 1.37, + "grad_norm": 7.824951215783509, + "learning_rate": 4.784722259849095e-06, + "loss": 1.7054, + "step": 113832 + }, + { + "epoch": 1.37, + "grad_norm": 10.200641213158091, + "learning_rate": 4.784223828005345e-06, + "loss": 1.3386, + "step": 113835 + }, + { + "epoch": 1.37, + "grad_norm": 3.300217470939808, + "learning_rate": 4.783725413961166e-06, + "loss": 1.1129, + "step": 113838 + }, + { + "epoch": 1.37, + "grad_norm": 6.139389000132617, + "learning_rate": 4.783227017718247e-06, + "loss": 1.2426, + "step": 113841 + }, + { + "epoch": 1.37, + "grad_norm": 8.94054758732058, + "learning_rate": 4.782728639278295e-06, + "loss": 1.3766, + "step": 113844 + }, + { + "epoch": 1.37, + "grad_norm": 13.715029175269668, + "learning_rate": 4.782230278643016e-06, + "loss": 1.0643, + "step": 113847 + }, + { + "epoch": 1.37, + "grad_norm": 10.044728898857452, + "learning_rate": 4.781731935814106e-06, + "loss": 1.1473, + "step": 113850 + }, + { + "epoch": 1.37, + "grad_norm": 5.505098977644796, + "learning_rate": 4.781233610793261e-06, + "loss": 1.4402, + "step": 113853 + }, + { + "epoch": 1.37, + "grad_norm": 9.341843767175751, + "learning_rate": 4.780735303582186e-06, + "loss": 1.1406, + "step": 113856 + }, + { + "epoch": 1.37, + "grad_norm": 14.884298726555176, + "learning_rate": 4.780237014182581e-06, + "loss": 1.2938, + "step": 113859 + }, + { + "epoch": 1.37, + "grad_norm": 7.837377349642809, + "learning_rate": 4.779738742596147e-06, + "loss": 1.5282, + "step": 113862 + }, + { + "epoch": 1.37, + "grad_norm": 10.763667876695857, + "learning_rate": 4.779240488824588e-06, + "loss": 1.1246, + "step": 113865 + }, + { + "epoch": 1.37, + "grad_norm": 7.189148665729935, + "learning_rate": 4.7787422528696005e-06, + "loss": 1.2965, + "step": 113868 + }, + { + "epoch": 1.37, + "grad_norm": 4.155412926902802, + "learning_rate": 4.778244034732881e-06, + "loss": 1.0937, + "step": 113871 + }, + { + "epoch": 1.37, + "grad_norm": 7.061583783283914, + "learning_rate": 4.7777458344161355e-06, + "loss": 0.9396, + "step": 113874 + }, + { + "epoch": 1.37, + "grad_norm": 3.2565928893654648, + "learning_rate": 4.777247651921065e-06, + "loss": 0.9932, + "step": 113877 + }, + { + "epoch": 1.37, + "grad_norm": 6.8405283704608655, + "learning_rate": 4.776749487249364e-06, + "loss": 1.0144, + "step": 113880 + }, + { + "epoch": 1.37, + "grad_norm": 9.435031945422002, + "learning_rate": 4.776251340402734e-06, + "loss": 1.3465, + "step": 113883 + }, + { + "epoch": 1.37, + "grad_norm": 8.067427985735394, + "learning_rate": 4.775753211382877e-06, + "loss": 1.3716, + "step": 113886 + }, + { + "epoch": 1.37, + "grad_norm": 11.411818713011115, + "learning_rate": 4.775255100191497e-06, + "loss": 1.3368, + "step": 113889 + }, + { + "epoch": 1.37, + "grad_norm": 6.548536996957072, + "learning_rate": 4.774757006830289e-06, + "loss": 1.1546, + "step": 113892 + }, + { + "epoch": 1.37, + "grad_norm": 4.883308933862232, + "learning_rate": 4.77425893130095e-06, + "loss": 1.3314, + "step": 113895 + }, + { + "epoch": 1.37, + "grad_norm": 12.339052284599477, + "learning_rate": 4.773760873605184e-06, + "loss": 1.1301, + "step": 113898 + }, + { + "epoch": 1.37, + "grad_norm": 64.10549443352996, + "learning_rate": 4.7732628337446875e-06, + "loss": 1.1075, + "step": 113901 + }, + { + "epoch": 1.37, + "grad_norm": 19.653891536803656, + "learning_rate": 4.772764811721167e-06, + "loss": 0.7995, + "step": 113904 + }, + { + "epoch": 1.37, + "grad_norm": 8.667895674370842, + "learning_rate": 4.772266807536313e-06, + "loss": 0.9152, + "step": 113907 + }, + { + "epoch": 1.37, + "grad_norm": 45.773605583916535, + "learning_rate": 4.7717688211918336e-06, + "loss": 1.3279, + "step": 113910 + }, + { + "epoch": 1.37, + "grad_norm": 13.139125712062727, + "learning_rate": 4.7712708526894205e-06, + "loss": 1.0214, + "step": 113913 + }, + { + "epoch": 1.37, + "grad_norm": 4.849805543159089, + "learning_rate": 4.7707729020307755e-06, + "loss": 1.2745, + "step": 113916 + }, + { + "epoch": 1.37, + "grad_norm": 6.8414257493282, + "learning_rate": 4.7702749692176045e-06, + "loss": 1.0324, + "step": 113919 + }, + { + "epoch": 1.37, + "grad_norm": 22.103738238641462, + "learning_rate": 4.7697770542515955e-06, + "loss": 1.099, + "step": 113922 + }, + { + "epoch": 1.37, + "grad_norm": 17.466954737412973, + "learning_rate": 4.769279157134455e-06, + "loss": 1.0312, + "step": 113925 + }, + { + "epoch": 1.37, + "grad_norm": 17.41955540364114, + "learning_rate": 4.76878127786788e-06, + "loss": 1.289, + "step": 113928 + }, + { + "epoch": 1.37, + "grad_norm": 13.24723419019034, + "learning_rate": 4.768283416453574e-06, + "loss": 1.0782, + "step": 113931 + }, + { + "epoch": 1.37, + "grad_norm": 4.059286937183192, + "learning_rate": 4.767785572893234e-06, + "loss": 1.2857, + "step": 113934 + }, + { + "epoch": 1.37, + "grad_norm": 8.63336084023448, + "learning_rate": 4.76728774718855e-06, + "loss": 1.3232, + "step": 113937 + }, + { + "epoch": 1.37, + "grad_norm": 6.343238058548419, + "learning_rate": 4.766789939341231e-06, + "loss": 1.1727, + "step": 113940 + }, + { + "epoch": 1.37, + "grad_norm": 3.6933715489250454, + "learning_rate": 4.766292149352971e-06, + "loss": 1.1938, + "step": 113943 + }, + { + "epoch": 1.37, + "grad_norm": 6.441688681732744, + "learning_rate": 4.765794377225476e-06, + "loss": 1.2485, + "step": 113946 + }, + { + "epoch": 1.37, + "grad_norm": 5.030332423023572, + "learning_rate": 4.765296622960434e-06, + "loss": 1.1352, + "step": 113949 + }, + { + "epoch": 1.37, + "grad_norm": 10.41623497422798, + "learning_rate": 4.764798886559555e-06, + "loss": 1.3767, + "step": 113952 + }, + { + "epoch": 1.37, + "grad_norm": 18.26767155517434, + "learning_rate": 4.764301168024527e-06, + "loss": 1.1504, + "step": 113955 + }, + { + "epoch": 1.37, + "grad_norm": 4.649907718761181, + "learning_rate": 4.763803467357057e-06, + "loss": 0.7711, + "step": 113958 + }, + { + "epoch": 1.37, + "grad_norm": 10.300676576278109, + "learning_rate": 4.763305784558836e-06, + "loss": 1.1292, + "step": 113961 + }, + { + "epoch": 1.37, + "grad_norm": 6.72663189138825, + "learning_rate": 4.762808119631565e-06, + "loss": 1.0171, + "step": 113964 + }, + { + "epoch": 1.37, + "grad_norm": 13.510475614325008, + "learning_rate": 4.7623104725769455e-06, + "loss": 1.123, + "step": 113967 + }, + { + "epoch": 1.37, + "grad_norm": 4.66253905551893, + "learning_rate": 4.761812843396672e-06, + "loss": 1.6333, + "step": 113970 + }, + { + "epoch": 1.37, + "grad_norm": 4.749706068583314, + "learning_rate": 4.761315232092449e-06, + "loss": 1.1103, + "step": 113973 + }, + { + "epoch": 1.37, + "grad_norm": 5.401374855032755, + "learning_rate": 4.7608176386659695e-06, + "loss": 1.0279, + "step": 113976 + }, + { + "epoch": 1.37, + "grad_norm": 7.976384868306738, + "learning_rate": 4.76032006311893e-06, + "loss": 1.3766, + "step": 113979 + }, + { + "epoch": 1.37, + "grad_norm": 7.633163991871166, + "learning_rate": 4.759822505453029e-06, + "loss": 1.2686, + "step": 113982 + }, + { + "epoch": 1.37, + "grad_norm": 12.513786765730524, + "learning_rate": 4.759324965669971e-06, + "loss": 1.2102, + "step": 113985 + }, + { + "epoch": 1.37, + "grad_norm": 8.201011766088024, + "learning_rate": 4.758827443771445e-06, + "loss": 1.2777, + "step": 113988 + }, + { + "epoch": 1.37, + "grad_norm": 7.530283120217461, + "learning_rate": 4.758329939759154e-06, + "loss": 0.9867, + "step": 113991 + }, + { + "epoch": 1.37, + "grad_norm": 7.212228099594957, + "learning_rate": 4.757832453634797e-06, + "loss": 1.0239, + "step": 113994 + }, + { + "epoch": 1.37, + "grad_norm": 5.176013381444546, + "learning_rate": 4.757334985400068e-06, + "loss": 0.8139, + "step": 113997 + }, + { + "epoch": 1.37, + "grad_norm": 16.359068979504176, + "learning_rate": 4.7568375350566684e-06, + "loss": 1.0672, + "step": 114000 + }, + { + "epoch": 1.37, + "grad_norm": 3.5294046574145996, + "learning_rate": 4.756340102606291e-06, + "loss": 1.0164, + "step": 114003 + }, + { + "epoch": 1.37, + "grad_norm": 7.070013683154259, + "learning_rate": 4.755842688050636e-06, + "loss": 1.36, + "step": 114006 + }, + { + "epoch": 1.37, + "grad_norm": 7.607292777156739, + "learning_rate": 4.7553452913914e-06, + "loss": 0.8529, + "step": 114009 + }, + { + "epoch": 1.37, + "grad_norm": 6.958297540483429, + "learning_rate": 4.754847912630286e-06, + "loss": 0.8564, + "step": 114012 + }, + { + "epoch": 1.37, + "grad_norm": 17.373016713857243, + "learning_rate": 4.754350551768983e-06, + "loss": 1.3795, + "step": 114015 + }, + { + "epoch": 1.37, + "grad_norm": 6.6005116216610675, + "learning_rate": 4.753853208809195e-06, + "loss": 1.2037, + "step": 114018 + }, + { + "epoch": 1.37, + "grad_norm": 6.425682233206135, + "learning_rate": 4.753355883752613e-06, + "loss": 1.044, + "step": 114021 + }, + { + "epoch": 1.37, + "grad_norm": 5.496515994187007, + "learning_rate": 4.752858576600938e-06, + "loss": 1.832, + "step": 114024 + }, + { + "epoch": 1.37, + "grad_norm": 5.657263271206984, + "learning_rate": 4.75236128735587e-06, + "loss": 1.3682, + "step": 114027 + }, + { + "epoch": 1.37, + "grad_norm": 9.413811020149344, + "learning_rate": 4.751864016019099e-06, + "loss": 1.53, + "step": 114030 + }, + { + "epoch": 1.37, + "grad_norm": 4.309033730494244, + "learning_rate": 4.751366762592325e-06, + "loss": 0.9748, + "step": 114033 + }, + { + "epoch": 1.37, + "grad_norm": 8.965618727915338, + "learning_rate": 4.750869527077251e-06, + "loss": 1.0073, + "step": 114036 + }, + { + "epoch": 1.37, + "grad_norm": 12.268380546132658, + "learning_rate": 4.750372309475564e-06, + "loss": 1.4947, + "step": 114039 + }, + { + "epoch": 1.37, + "grad_norm": 12.281510374559659, + "learning_rate": 4.749875109788969e-06, + "loss": 1.1846, + "step": 114042 + }, + { + "epoch": 1.37, + "grad_norm": 5.296596143130728, + "learning_rate": 4.749377928019157e-06, + "loss": 1.0363, + "step": 114045 + }, + { + "epoch": 1.37, + "grad_norm": 6.211669159959964, + "learning_rate": 4.748880764167825e-06, + "loss": 1.3033, + "step": 114048 + }, + { + "epoch": 1.37, + "grad_norm": 7.865255513609301, + "learning_rate": 4.748383618236672e-06, + "loss": 0.9994, + "step": 114051 + }, + { + "epoch": 1.37, + "grad_norm": 15.056978543681938, + "learning_rate": 4.747886490227398e-06, + "loss": 1.6001, + "step": 114054 + }, + { + "epoch": 1.37, + "grad_norm": 11.038995562341537, + "learning_rate": 4.747389380141691e-06, + "loss": 0.837, + "step": 114057 + }, + { + "epoch": 1.37, + "grad_norm": 22.161227625754897, + "learning_rate": 4.746892287981257e-06, + "loss": 0.9573, + "step": 114060 + }, + { + "epoch": 1.37, + "grad_norm": 25.60647392925668, + "learning_rate": 4.746395213747783e-06, + "loss": 1.1407, + "step": 114063 + }, + { + "epoch": 1.37, + "grad_norm": 5.936243271699791, + "learning_rate": 4.745898157442974e-06, + "loss": 1.2237, + "step": 114066 + }, + { + "epoch": 1.37, + "grad_norm": 3.58330843695601, + "learning_rate": 4.745401119068518e-06, + "loss": 1.0366, + "step": 114069 + }, + { + "epoch": 1.37, + "grad_norm": 2.723667806595638, + "learning_rate": 4.744904098626115e-06, + "loss": 1.265, + "step": 114072 + }, + { + "epoch": 1.37, + "grad_norm": 35.275348997511756, + "learning_rate": 4.7444070961174625e-06, + "loss": 0.9337, + "step": 114075 + }, + { + "epoch": 1.37, + "grad_norm": 10.62455122988441, + "learning_rate": 4.743910111544259e-06, + "loss": 0.9062, + "step": 114078 + }, + { + "epoch": 1.37, + "grad_norm": 8.699638139358978, + "learning_rate": 4.7434131449081925e-06, + "loss": 0.9388, + "step": 114081 + }, + { + "epoch": 1.37, + "grad_norm": 11.631037554426713, + "learning_rate": 4.742916196210967e-06, + "loss": 1.0459, + "step": 114084 + }, + { + "epoch": 1.37, + "grad_norm": 22.712086722903944, + "learning_rate": 4.742419265454272e-06, + "loss": 1.2353, + "step": 114087 + }, + { + "epoch": 1.37, + "grad_norm": 4.2801773587463, + "learning_rate": 4.741922352639806e-06, + "loss": 1.0971, + "step": 114090 + }, + { + "epoch": 1.37, + "grad_norm": 15.544532240653236, + "learning_rate": 4.74142545776927e-06, + "loss": 1.0607, + "step": 114093 + }, + { + "epoch": 1.37, + "grad_norm": 17.519356814340078, + "learning_rate": 4.740928580844349e-06, + "loss": 1.1902, + "step": 114096 + }, + { + "epoch": 1.37, + "grad_norm": 4.719223061395493, + "learning_rate": 4.7404317218667465e-06, + "loss": 1.431, + "step": 114099 + }, + { + "epoch": 1.37, + "grad_norm": 43.22074960813541, + "learning_rate": 4.739934880838158e-06, + "loss": 0.9443, + "step": 114102 + }, + { + "epoch": 1.37, + "grad_norm": 5.84379350301889, + "learning_rate": 4.739438057760274e-06, + "loss": 1.0202, + "step": 114105 + }, + { + "epoch": 1.37, + "grad_norm": 5.834971528501187, + "learning_rate": 4.738941252634797e-06, + "loss": 1.3097, + "step": 114108 + }, + { + "epoch": 1.37, + "grad_norm": 19.961415803952306, + "learning_rate": 4.738444465463414e-06, + "loss": 0.9395, + "step": 114111 + }, + { + "epoch": 1.37, + "grad_norm": 8.33716112265884, + "learning_rate": 4.737947696247827e-06, + "loss": 1.0, + "step": 114114 + }, + { + "epoch": 1.37, + "grad_norm": 24.988108473108646, + "learning_rate": 4.7374509449897265e-06, + "loss": 0.9732, + "step": 114117 + }, + { + "epoch": 1.37, + "grad_norm": 8.931669336829913, + "learning_rate": 4.736954211690815e-06, + "loss": 0.8251, + "step": 114120 + }, + { + "epoch": 1.37, + "grad_norm": 6.601839887520246, + "learning_rate": 4.73645749635278e-06, + "loss": 1.0372, + "step": 114123 + }, + { + "epoch": 1.37, + "grad_norm": 5.428187641604985, + "learning_rate": 4.735960798977322e-06, + "loss": 1.0004, + "step": 114126 + }, + { + "epoch": 1.37, + "grad_norm": 4.639437233076746, + "learning_rate": 4.735464119566131e-06, + "loss": 1.2856, + "step": 114129 + }, + { + "epoch": 1.37, + "grad_norm": 13.928704902401993, + "learning_rate": 4.7349674581209045e-06, + "loss": 0.9668, + "step": 114132 + }, + { + "epoch": 1.37, + "grad_norm": 9.329235832661114, + "learning_rate": 4.734470814643341e-06, + "loss": 1.1151, + "step": 114135 + }, + { + "epoch": 1.37, + "grad_norm": 13.979495458520345, + "learning_rate": 4.73397418913513e-06, + "loss": 1.4829, + "step": 114138 + }, + { + "epoch": 1.37, + "grad_norm": 9.371798828661511, + "learning_rate": 4.733477581597966e-06, + "loss": 1.0955, + "step": 114141 + }, + { + "epoch": 1.37, + "grad_norm": 8.502518691354979, + "learning_rate": 4.732980992033551e-06, + "loss": 1.0974, + "step": 114144 + }, + { + "epoch": 1.37, + "grad_norm": 1.9726581832510193, + "learning_rate": 4.732484420443574e-06, + "loss": 2.0436, + "step": 114147 + }, + { + "epoch": 1.37, + "grad_norm": 18.66016936584009, + "learning_rate": 4.731987866829727e-06, + "loss": 0.9841, + "step": 114150 + }, + { + "epoch": 1.37, + "grad_norm": 19.445106295037796, + "learning_rate": 4.731491331193708e-06, + "loss": 1.441, + "step": 114153 + }, + { + "epoch": 1.37, + "grad_norm": 11.747452377492632, + "learning_rate": 4.730994813537211e-06, + "loss": 1.2399, + "step": 114156 + }, + { + "epoch": 1.37, + "grad_norm": 5.769867844502962, + "learning_rate": 4.7304983138619305e-06, + "loss": 1.3493, + "step": 114159 + }, + { + "epoch": 1.37, + "grad_norm": 14.5993623823149, + "learning_rate": 4.730001832169566e-06, + "loss": 1.1226, + "step": 114162 + }, + { + "epoch": 1.37, + "grad_norm": 2.916321498608961, + "learning_rate": 4.729505368461803e-06, + "loss": 1.2305, + "step": 114165 + }, + { + "epoch": 1.37, + "grad_norm": 5.1907340416873815, + "learning_rate": 4.729008922740342e-06, + "loss": 1.5537, + "step": 114168 + }, + { + "epoch": 1.37, + "grad_norm": 15.196085314635324, + "learning_rate": 4.728512495006873e-06, + "loss": 1.352, + "step": 114171 + }, + { + "epoch": 1.37, + "grad_norm": 9.134131050149195, + "learning_rate": 4.728016085263095e-06, + "loss": 1.0691, + "step": 114174 + }, + { + "epoch": 1.37, + "grad_norm": 21.545117293303992, + "learning_rate": 4.727519693510696e-06, + "loss": 1.3932, + "step": 114177 + }, + { + "epoch": 1.37, + "grad_norm": 5.919937354809489, + "learning_rate": 4.727023319751373e-06, + "loss": 0.8471, + "step": 114180 + }, + { + "epoch": 1.37, + "grad_norm": 4.198187349552446, + "learning_rate": 4.72652696398682e-06, + "loss": 1.1225, + "step": 114183 + }, + { + "epoch": 1.37, + "grad_norm": 3.5386480327194456, + "learning_rate": 4.726030626218734e-06, + "loss": 0.986, + "step": 114186 + }, + { + "epoch": 1.37, + "grad_norm": 6.009536205444403, + "learning_rate": 4.725534306448807e-06, + "loss": 1.1298, + "step": 114189 + }, + { + "epoch": 1.37, + "grad_norm": 13.278918352581696, + "learning_rate": 4.725038004678727e-06, + "loss": 1.0417, + "step": 114192 + }, + { + "epoch": 1.37, + "grad_norm": 8.098080275663277, + "learning_rate": 4.724541720910192e-06, + "loss": 0.9803, + "step": 114195 + }, + { + "epoch": 1.37, + "grad_norm": 7.653776274963083, + "learning_rate": 4.724045455144896e-06, + "loss": 1.1228, + "step": 114198 + }, + { + "epoch": 1.37, + "grad_norm": 9.953985369204506, + "learning_rate": 4.723549207384537e-06, + "loss": 1.1419, + "step": 114201 + }, + { + "epoch": 1.37, + "grad_norm": 78.86366696131292, + "learning_rate": 4.7230529776307996e-06, + "loss": 1.5125, + "step": 114204 + }, + { + "epoch": 1.37, + "grad_norm": 16.727617705330964, + "learning_rate": 4.7225567658853824e-06, + "loss": 1.2687, + "step": 114207 + }, + { + "epoch": 1.37, + "grad_norm": 9.380333635109194, + "learning_rate": 4.7220605721499805e-06, + "loss": 1.0341, + "step": 114210 + }, + { + "epoch": 1.37, + "grad_norm": 21.391049443139117, + "learning_rate": 4.721564396426281e-06, + "loss": 0.9405, + "step": 114213 + }, + { + "epoch": 1.37, + "grad_norm": 5.4400492646099305, + "learning_rate": 4.721068238715986e-06, + "loss": 1.234, + "step": 114216 + }, + { + "epoch": 1.37, + "grad_norm": 10.526418019615368, + "learning_rate": 4.720572099020779e-06, + "loss": 1.2436, + "step": 114219 + }, + { + "epoch": 1.37, + "grad_norm": 7.307398740863628, + "learning_rate": 4.720075977342359e-06, + "loss": 1.1562, + "step": 114222 + }, + { + "epoch": 1.37, + "grad_norm": 11.503840584764962, + "learning_rate": 4.719579873682416e-06, + "loss": 1.1522, + "step": 114225 + }, + { + "epoch": 1.37, + "grad_norm": 4.505392406210422, + "learning_rate": 4.71908378804265e-06, + "loss": 0.9899, + "step": 114228 + }, + { + "epoch": 1.37, + "grad_norm": 3.1804198083825463, + "learning_rate": 4.7185877204247484e-06, + "loss": 1.1421, + "step": 114231 + }, + { + "epoch": 1.37, + "grad_norm": 12.213228271181215, + "learning_rate": 4.7180916708304e-06, + "loss": 1.1536, + "step": 114234 + }, + { + "epoch": 1.37, + "grad_norm": 11.770004618109963, + "learning_rate": 4.717595639261303e-06, + "loss": 1.2579, + "step": 114237 + }, + { + "epoch": 1.37, + "grad_norm": 6.231431183979836, + "learning_rate": 4.71709962571915e-06, + "loss": 1.0526, + "step": 114240 + }, + { + "epoch": 1.37, + "grad_norm": 8.897240863285216, + "learning_rate": 4.7166036302056364e-06, + "loss": 1.5203, + "step": 114243 + }, + { + "epoch": 1.37, + "grad_norm": 3.6892415804850716, + "learning_rate": 4.716107652722447e-06, + "loss": 1.1741, + "step": 114246 + }, + { + "epoch": 1.37, + "grad_norm": 15.796188894940457, + "learning_rate": 4.71561169327128e-06, + "loss": 1.1701, + "step": 114249 + }, + { + "epoch": 1.37, + "grad_norm": 43.10149687071441, + "learning_rate": 4.71511575185383e-06, + "loss": 1.5488, + "step": 114252 + }, + { + "epoch": 1.37, + "grad_norm": 5.3200147003498826, + "learning_rate": 4.714619828471787e-06, + "loss": 1.4275, + "step": 114255 + }, + { + "epoch": 1.37, + "grad_norm": 6.977503348477615, + "learning_rate": 4.714123923126838e-06, + "loss": 1.2889, + "step": 114258 + }, + { + "epoch": 1.37, + "grad_norm": 6.95913334147423, + "learning_rate": 4.713628035820681e-06, + "loss": 1.1253, + "step": 114261 + }, + { + "epoch": 1.37, + "grad_norm": 28.166266833476364, + "learning_rate": 4.713132166555006e-06, + "loss": 1.3768, + "step": 114264 + }, + { + "epoch": 1.37, + "grad_norm": 34.83564850917226, + "learning_rate": 4.712636315331508e-06, + "loss": 0.9458, + "step": 114267 + }, + { + "epoch": 1.37, + "grad_norm": 23.04900185169963, + "learning_rate": 4.712140482151881e-06, + "loss": 1.2606, + "step": 114270 + }, + { + "epoch": 1.37, + "grad_norm": 19.12002885032882, + "learning_rate": 4.711644667017814e-06, + "loss": 1.5786, + "step": 114273 + }, + { + "epoch": 1.37, + "grad_norm": 53.07270146395922, + "learning_rate": 4.711148869930995e-06, + "loss": 1.1058, + "step": 114276 + }, + { + "epoch": 1.37, + "grad_norm": 21.87459669309242, + "learning_rate": 4.71065309089312e-06, + "loss": 0.9287, + "step": 114279 + }, + { + "epoch": 1.37, + "grad_norm": 14.086142098338966, + "learning_rate": 4.710157329905885e-06, + "loss": 0.9087, + "step": 114282 + }, + { + "epoch": 1.37, + "grad_norm": 4.120082864073795, + "learning_rate": 4.709661586970973e-06, + "loss": 0.9388, + "step": 114285 + }, + { + "epoch": 1.37, + "grad_norm": 7.624873880444258, + "learning_rate": 4.70916586209008e-06, + "loss": 1.0857, + "step": 114288 + }, + { + "epoch": 1.37, + "grad_norm": 10.12974589429963, + "learning_rate": 4.708670155264899e-06, + "loss": 0.9836, + "step": 114291 + }, + { + "epoch": 1.37, + "grad_norm": 18.471945505466326, + "learning_rate": 4.708174466497125e-06, + "loss": 1.2534, + "step": 114294 + }, + { + "epoch": 1.37, + "grad_norm": 15.616401622605197, + "learning_rate": 4.7076787957884455e-06, + "loss": 1.1678, + "step": 114297 + }, + { + "epoch": 1.37, + "grad_norm": 3.8114292360791886, + "learning_rate": 4.7071831431405464e-06, + "loss": 1.2578, + "step": 114300 + }, + { + "epoch": 1.37, + "grad_norm": 7.86029284074614, + "learning_rate": 4.706687508555127e-06, + "loss": 1.0168, + "step": 114303 + }, + { + "epoch": 1.37, + "grad_norm": 14.520765846951933, + "learning_rate": 4.706191892033876e-06, + "loss": 1.1553, + "step": 114306 + }, + { + "epoch": 1.37, + "grad_norm": 7.740341528731637, + "learning_rate": 4.705696293578488e-06, + "loss": 0.9952, + "step": 114309 + }, + { + "epoch": 1.37, + "grad_norm": 11.883849525994595, + "learning_rate": 4.705200713190648e-06, + "loss": 1.1432, + "step": 114312 + }, + { + "epoch": 1.37, + "grad_norm": 3.281632554717111, + "learning_rate": 4.7047051508720554e-06, + "loss": 1.1337, + "step": 114315 + }, + { + "epoch": 1.37, + "grad_norm": 7.308625631191254, + "learning_rate": 4.704209606624392e-06, + "loss": 1.0374, + "step": 114318 + }, + { + "epoch": 1.37, + "grad_norm": 35.291415253312096, + "learning_rate": 4.703714080449354e-06, + "loss": 1.0294, + "step": 114321 + }, + { + "epoch": 1.37, + "grad_norm": 3.282137154616004, + "learning_rate": 4.703218572348636e-06, + "loss": 1.1897, + "step": 114324 + }, + { + "epoch": 1.37, + "grad_norm": 6.504438875373213, + "learning_rate": 4.702723082323922e-06, + "loss": 1.1367, + "step": 114327 + }, + { + "epoch": 1.37, + "grad_norm": 83.07516142335257, + "learning_rate": 4.702227610376905e-06, + "loss": 1.077, + "step": 114330 + }, + { + "epoch": 1.37, + "grad_norm": 12.289932379700312, + "learning_rate": 4.701732156509278e-06, + "loss": 1.2119, + "step": 114333 + }, + { + "epoch": 1.37, + "grad_norm": 3.678558276977689, + "learning_rate": 4.701236720722734e-06, + "loss": 1.1431, + "step": 114336 + }, + { + "epoch": 1.37, + "grad_norm": 12.397871718409773, + "learning_rate": 4.700741303018959e-06, + "loss": 1.2345, + "step": 114339 + }, + { + "epoch": 1.37, + "grad_norm": 12.690334335101282, + "learning_rate": 4.700245903399644e-06, + "loss": 1.126, + "step": 114342 + }, + { + "epoch": 1.37, + "grad_norm": 17.054575989336396, + "learning_rate": 4.69975052186648e-06, + "loss": 1.0545, + "step": 114345 + }, + { + "epoch": 1.37, + "grad_norm": 6.409203970004594, + "learning_rate": 4.699255158421157e-06, + "loss": 1.1113, + "step": 114348 + }, + { + "epoch": 1.38, + "grad_norm": 9.780144941936395, + "learning_rate": 4.698759813065371e-06, + "loss": 1.4738, + "step": 114351 + }, + { + "epoch": 1.38, + "grad_norm": 2.653264818026185, + "learning_rate": 4.6982644858008055e-06, + "loss": 1.2055, + "step": 114354 + }, + { + "epoch": 1.38, + "grad_norm": 41.09771206689763, + "learning_rate": 4.697769176629153e-06, + "loss": 1.5834, + "step": 114357 + }, + { + "epoch": 1.38, + "grad_norm": 7.680589937932209, + "learning_rate": 4.697273885552108e-06, + "loss": 1.1789, + "step": 114360 + }, + { + "epoch": 1.38, + "grad_norm": 22.627196625961286, + "learning_rate": 4.696778612571358e-06, + "loss": 1.5301, + "step": 114363 + }, + { + "epoch": 1.38, + "grad_norm": 8.996549504878299, + "learning_rate": 4.696283357688588e-06, + "loss": 1.3104, + "step": 114366 + }, + { + "epoch": 1.38, + "grad_norm": 10.503396997046943, + "learning_rate": 4.695788120905492e-06, + "loss": 1.1601, + "step": 114369 + }, + { + "epoch": 1.38, + "grad_norm": 8.376979605649048, + "learning_rate": 4.695292902223761e-06, + "loss": 1.048, + "step": 114372 + }, + { + "epoch": 1.38, + "grad_norm": 7.051416024470689, + "learning_rate": 4.694797701645085e-06, + "loss": 1.1671, + "step": 114375 + }, + { + "epoch": 1.38, + "grad_norm": 11.797143383523755, + "learning_rate": 4.694302519171157e-06, + "loss": 1.0817, + "step": 114378 + }, + { + "epoch": 1.38, + "grad_norm": 4.335768213660416, + "learning_rate": 4.693807354803663e-06, + "loss": 1.3568, + "step": 114381 + }, + { + "epoch": 1.38, + "grad_norm": 37.3559533926882, + "learning_rate": 4.6933122085442905e-06, + "loss": 1.2095, + "step": 114384 + }, + { + "epoch": 1.38, + "grad_norm": 4.3171147189920545, + "learning_rate": 4.692817080394731e-06, + "loss": 1.0995, + "step": 114387 + }, + { + "epoch": 1.38, + "grad_norm": 3.908167540285937, + "learning_rate": 4.6923219703566795e-06, + "loss": 0.8566, + "step": 114390 + }, + { + "epoch": 1.38, + "grad_norm": 8.298272463884823, + "learning_rate": 4.691826878431817e-06, + "loss": 1.0133, + "step": 114393 + }, + { + "epoch": 1.38, + "grad_norm": 10.114375733086998, + "learning_rate": 4.691331804621838e-06, + "loss": 1.3806, + "step": 114396 + }, + { + "epoch": 1.38, + "grad_norm": 26.063317283355182, + "learning_rate": 4.690836748928431e-06, + "loss": 1.0015, + "step": 114399 + }, + { + "epoch": 1.38, + "grad_norm": 12.94117490468699, + "learning_rate": 4.69034171135329e-06, + "loss": 1.0338, + "step": 114402 + }, + { + "epoch": 1.38, + "grad_norm": 10.602109957129167, + "learning_rate": 4.689846691898099e-06, + "loss": 1.2664, + "step": 114405 + }, + { + "epoch": 1.38, + "grad_norm": 6.354683871133596, + "learning_rate": 4.689351690564546e-06, + "loss": 1.3014, + "step": 114408 + }, + { + "epoch": 1.38, + "grad_norm": 14.571796789010328, + "learning_rate": 4.688856707354322e-06, + "loss": 1.1205, + "step": 114411 + }, + { + "epoch": 1.38, + "grad_norm": 9.525579571046084, + "learning_rate": 4.688361742269118e-06, + "loss": 1.2122, + "step": 114414 + }, + { + "epoch": 1.38, + "grad_norm": 9.608637335396889, + "learning_rate": 4.687866795310625e-06, + "loss": 1.3932, + "step": 114417 + }, + { + "epoch": 1.38, + "grad_norm": 7.227633532056955, + "learning_rate": 4.687371866480526e-06, + "loss": 1.1139, + "step": 114420 + }, + { + "epoch": 1.38, + "grad_norm": 6.923654615545286, + "learning_rate": 4.6868769557805175e-06, + "loss": 1.3663, + "step": 114423 + }, + { + "epoch": 1.38, + "grad_norm": 14.16376821759797, + "learning_rate": 4.6863820632122796e-06, + "loss": 0.9846, + "step": 114426 + }, + { + "epoch": 1.38, + "grad_norm": 3.928827957507467, + "learning_rate": 4.685887188777505e-06, + "loss": 1.213, + "step": 114429 + }, + { + "epoch": 1.38, + "grad_norm": 18.420380358622445, + "learning_rate": 4.685392332477888e-06, + "loss": 1.0828, + "step": 114432 + }, + { + "epoch": 1.38, + "grad_norm": 4.619055967302844, + "learning_rate": 4.684897494315108e-06, + "loss": 1.2036, + "step": 114435 + }, + { + "epoch": 1.38, + "grad_norm": 8.355033009594678, + "learning_rate": 4.68440267429086e-06, + "loss": 1.2146, + "step": 114438 + }, + { + "epoch": 1.38, + "grad_norm": 7.022960578987352, + "learning_rate": 4.68390787240683e-06, + "loss": 1.3443, + "step": 114441 + }, + { + "epoch": 1.38, + "grad_norm": 12.042809029181033, + "learning_rate": 4.683413088664711e-06, + "loss": 1.0369, + "step": 114444 + }, + { + "epoch": 1.38, + "grad_norm": 5.776546741572503, + "learning_rate": 4.682918323066189e-06, + "loss": 1.147, + "step": 114447 + }, + { + "epoch": 1.38, + "grad_norm": 10.400711181854778, + "learning_rate": 4.682423575612947e-06, + "loss": 1.199, + "step": 114450 + }, + { + "epoch": 1.38, + "grad_norm": 14.01312227747925, + "learning_rate": 4.681928846306678e-06, + "loss": 1.1167, + "step": 114453 + }, + { + "epoch": 1.38, + "grad_norm": 7.513223258927665, + "learning_rate": 4.6814341351490715e-06, + "loss": 1.1131, + "step": 114456 + }, + { + "epoch": 1.38, + "grad_norm": 3.5728675556952045, + "learning_rate": 4.680939442141816e-06, + "loss": 1.189, + "step": 114459 + }, + { + "epoch": 1.38, + "grad_norm": 5.884461546758613, + "learning_rate": 4.680444767286597e-06, + "loss": 0.9811, + "step": 114462 + }, + { + "epoch": 1.38, + "grad_norm": 5.610755529998196, + "learning_rate": 4.679950110585106e-06, + "loss": 1.039, + "step": 114465 + }, + { + "epoch": 1.38, + "grad_norm": 9.036392314648388, + "learning_rate": 4.679455472039026e-06, + "loss": 0.9989, + "step": 114468 + }, + { + "epoch": 1.38, + "grad_norm": 11.715247061576202, + "learning_rate": 4.678960851650052e-06, + "loss": 1.451, + "step": 114471 + }, + { + "epoch": 1.38, + "grad_norm": 13.444453003879435, + "learning_rate": 4.678466249419864e-06, + "loss": 1.1394, + "step": 114474 + }, + { + "epoch": 1.38, + "grad_norm": 6.791407244021685, + "learning_rate": 4.677971665350153e-06, + "loss": 0.8295, + "step": 114477 + }, + { + "epoch": 1.38, + "grad_norm": 4.80593624528642, + "learning_rate": 4.677477099442609e-06, + "loss": 1.0302, + "step": 114480 + }, + { + "epoch": 1.38, + "grad_norm": 9.849936285134048, + "learning_rate": 4.6769825516989176e-06, + "loss": 1.634, + "step": 114483 + }, + { + "epoch": 1.38, + "grad_norm": 13.775748424736973, + "learning_rate": 4.676488022120772e-06, + "loss": 1.0771, + "step": 114486 + }, + { + "epoch": 1.38, + "grad_norm": 10.22497971290491, + "learning_rate": 4.6759935107098544e-06, + "loss": 1.0743, + "step": 114489 + }, + { + "epoch": 1.38, + "grad_norm": 7.264956464384888, + "learning_rate": 4.675499017467849e-06, + "loss": 0.8941, + "step": 114492 + }, + { + "epoch": 1.38, + "grad_norm": 17.984676709026157, + "learning_rate": 4.675004542396449e-06, + "loss": 1.3248, + "step": 114495 + }, + { + "epoch": 1.38, + "grad_norm": 5.410721981325649, + "learning_rate": 4.674510085497344e-06, + "loss": 1.1071, + "step": 114498 + }, + { + "epoch": 1.38, + "grad_norm": 29.491638779460807, + "learning_rate": 4.674015646772213e-06, + "loss": 1.3991, + "step": 114501 + }, + { + "epoch": 1.38, + "grad_norm": 42.661542775770016, + "learning_rate": 4.673521226222749e-06, + "loss": 1.0121, + "step": 114504 + }, + { + "epoch": 1.38, + "grad_norm": 12.132305265407204, + "learning_rate": 4.6730268238506426e-06, + "loss": 0.9764, + "step": 114507 + }, + { + "epoch": 1.38, + "grad_norm": 2.8200755822947676, + "learning_rate": 4.672532439657572e-06, + "loss": 1.5722, + "step": 114510 + }, + { + "epoch": 1.38, + "grad_norm": 24.441544595482533, + "learning_rate": 4.672038073645234e-06, + "loss": 1.3817, + "step": 114513 + }, + { + "epoch": 1.38, + "grad_norm": 3.137081791000366, + "learning_rate": 4.6715437258153075e-06, + "loss": 1.7019, + "step": 114516 + }, + { + "epoch": 1.38, + "grad_norm": 8.558770493653208, + "learning_rate": 4.671049396169481e-06, + "loss": 1.1148, + "step": 114519 + }, + { + "epoch": 1.38, + "grad_norm": 12.335066297381285, + "learning_rate": 4.670555084709446e-06, + "loss": 0.8708, + "step": 114522 + }, + { + "epoch": 1.38, + "grad_norm": 6.521999475328913, + "learning_rate": 4.670060791436889e-06, + "loss": 1.3673, + "step": 114525 + }, + { + "epoch": 1.38, + "grad_norm": 6.65102260015258, + "learning_rate": 4.669566516353491e-06, + "loss": 1.4273, + "step": 114528 + }, + { + "epoch": 1.38, + "grad_norm": 30.448051435357325, + "learning_rate": 4.669072259460946e-06, + "loss": 1.1571, + "step": 114531 + }, + { + "epoch": 1.38, + "grad_norm": 3.215483184362096, + "learning_rate": 4.6685780207609346e-06, + "loss": 0.7519, + "step": 114534 + }, + { + "epoch": 1.38, + "grad_norm": 11.58895423554363, + "learning_rate": 4.668083800255146e-06, + "loss": 1.2963, + "step": 114537 + }, + { + "epoch": 1.38, + "grad_norm": 11.121757867729075, + "learning_rate": 4.667589597945269e-06, + "loss": 1.5296, + "step": 114540 + }, + { + "epoch": 1.38, + "grad_norm": 3.8534620529170414, + "learning_rate": 4.667095413832985e-06, + "loss": 0.656, + "step": 114543 + }, + { + "epoch": 1.38, + "grad_norm": 36.64428144219086, + "learning_rate": 4.666601247919984e-06, + "loss": 1.1423, + "step": 114546 + }, + { + "epoch": 1.38, + "grad_norm": 5.574534882318532, + "learning_rate": 4.666107100207955e-06, + "loss": 1.3604, + "step": 114549 + }, + { + "epoch": 1.38, + "grad_norm": 10.093075734129716, + "learning_rate": 4.665612970698582e-06, + "loss": 1.174, + "step": 114552 + }, + { + "epoch": 1.38, + "grad_norm": 4.465518834861815, + "learning_rate": 4.665118859393546e-06, + "loss": 0.9951, + "step": 114555 + }, + { + "epoch": 1.38, + "grad_norm": 10.85037558545015, + "learning_rate": 4.6646247662945375e-06, + "loss": 1.1074, + "step": 114558 + }, + { + "epoch": 1.38, + "grad_norm": 35.07892498075127, + "learning_rate": 4.664130691403244e-06, + "loss": 1.1253, + "step": 114561 + }, + { + "epoch": 1.38, + "grad_norm": 4.297390910266145, + "learning_rate": 4.663636634721349e-06, + "loss": 0.9379, + "step": 114564 + }, + { + "epoch": 1.38, + "grad_norm": 7.019140941514658, + "learning_rate": 4.663142596250545e-06, + "loss": 1.1326, + "step": 114567 + }, + { + "epoch": 1.38, + "grad_norm": 18.162893945867634, + "learning_rate": 4.662648575992509e-06, + "loss": 1.0084, + "step": 114570 + }, + { + "epoch": 1.38, + "grad_norm": 11.84517968065109, + "learning_rate": 4.662154573948934e-06, + "loss": 1.1732, + "step": 114573 + }, + { + "epoch": 1.38, + "grad_norm": 4.197642608470479, + "learning_rate": 4.661660590121499e-06, + "loss": 1.2829, + "step": 114576 + }, + { + "epoch": 1.38, + "grad_norm": 18.086382758767837, + "learning_rate": 4.661166624511898e-06, + "loss": 1.4658, + "step": 114579 + }, + { + "epoch": 1.38, + "grad_norm": 5.821880224901176, + "learning_rate": 4.660672677121808e-06, + "loss": 1.2319, + "step": 114582 + }, + { + "epoch": 1.38, + "grad_norm": 7.9882723428948665, + "learning_rate": 4.66017874795292e-06, + "loss": 1.0989, + "step": 114585 + }, + { + "epoch": 1.38, + "grad_norm": 5.333942323296803, + "learning_rate": 4.659684837006917e-06, + "loss": 1.425, + "step": 114588 + }, + { + "epoch": 1.38, + "grad_norm": 13.753947166155465, + "learning_rate": 4.659190944285491e-06, + "loss": 1.2808, + "step": 114591 + }, + { + "epoch": 1.38, + "grad_norm": 15.58792119979256, + "learning_rate": 4.6586970697903176e-06, + "loss": 1.1666, + "step": 114594 + }, + { + "epoch": 1.38, + "grad_norm": 8.708843173644736, + "learning_rate": 4.658203213523091e-06, + "loss": 1.0647, + "step": 114597 + }, + { + "epoch": 1.38, + "grad_norm": 11.698854167702434, + "learning_rate": 4.657709375485491e-06, + "loss": 1.7002, + "step": 114600 + }, + { + "epoch": 1.38, + "grad_norm": 5.084164349749704, + "learning_rate": 4.657215555679203e-06, + "loss": 1.2134, + "step": 114603 + }, + { + "epoch": 1.38, + "grad_norm": 4.576173092716306, + "learning_rate": 4.656721754105917e-06, + "loss": 0.7157, + "step": 114606 + }, + { + "epoch": 1.38, + "grad_norm": 6.979640432191546, + "learning_rate": 4.656227970767313e-06, + "loss": 1.1738, + "step": 114609 + }, + { + "epoch": 1.38, + "grad_norm": 5.5723684005715395, + "learning_rate": 4.655734205665076e-06, + "loss": 1.4482, + "step": 114612 + }, + { + "epoch": 1.38, + "grad_norm": 12.652015254453824, + "learning_rate": 4.655240458800899e-06, + "loss": 1.0054, + "step": 114615 + }, + { + "epoch": 1.38, + "grad_norm": 20.27951468610032, + "learning_rate": 4.654746730176457e-06, + "loss": 1.4865, + "step": 114618 + }, + { + "epoch": 1.38, + "grad_norm": 3.4052221692617857, + "learning_rate": 4.6542530197934424e-06, + "loss": 1.0677, + "step": 114621 + }, + { + "epoch": 1.38, + "grad_norm": 15.02707463594971, + "learning_rate": 4.653759327653533e-06, + "loss": 1.598, + "step": 114624 + }, + { + "epoch": 1.38, + "grad_norm": 15.699173268621601, + "learning_rate": 4.653265653758416e-06, + "loss": 0.6858, + "step": 114627 + }, + { + "epoch": 1.38, + "grad_norm": 12.11704643964705, + "learning_rate": 4.65277199810978e-06, + "loss": 1.2827, + "step": 114630 + }, + { + "epoch": 1.38, + "grad_norm": 9.854424320681527, + "learning_rate": 4.652278360709309e-06, + "loss": 0.9418, + "step": 114633 + }, + { + "epoch": 1.38, + "grad_norm": 32.57407723277591, + "learning_rate": 4.651784741558682e-06, + "loss": 1.6778, + "step": 114636 + }, + { + "epoch": 1.38, + "grad_norm": 17.37196562721042, + "learning_rate": 4.651291140659592e-06, + "loss": 1.2036, + "step": 114639 + }, + { + "epoch": 1.38, + "grad_norm": 12.694648304735967, + "learning_rate": 4.6507975580137146e-06, + "loss": 1.2246, + "step": 114642 + }, + { + "epoch": 1.38, + "grad_norm": 6.246911923507234, + "learning_rate": 4.6503039936227375e-06, + "loss": 1.2918, + "step": 114645 + }, + { + "epoch": 1.38, + "grad_norm": 20.499572474623935, + "learning_rate": 4.649810447488351e-06, + "loss": 1.4583, + "step": 114648 + }, + { + "epoch": 1.38, + "grad_norm": 7.785764198996773, + "learning_rate": 4.64931691961223e-06, + "loss": 1.4072, + "step": 114651 + }, + { + "epoch": 1.38, + "grad_norm": 4.235982941387715, + "learning_rate": 4.6488234099960624e-06, + "loss": 1.1522, + "step": 114654 + }, + { + "epoch": 1.38, + "grad_norm": 11.650522179742342, + "learning_rate": 4.648329918641537e-06, + "loss": 1.1244, + "step": 114657 + }, + { + "epoch": 1.38, + "grad_norm": 7.535751538295488, + "learning_rate": 4.647836445550336e-06, + "loss": 1.0743, + "step": 114660 + }, + { + "epoch": 1.38, + "grad_norm": 5.142134483762883, + "learning_rate": 4.647342990724135e-06, + "loss": 0.9594, + "step": 114663 + }, + { + "epoch": 1.38, + "grad_norm": 7.45580747028162, + "learning_rate": 4.646849554164625e-06, + "loss": 1.0033, + "step": 114666 + }, + { + "epoch": 1.38, + "grad_norm": 2.5988745193435876, + "learning_rate": 4.64635613587349e-06, + "loss": 1.2126, + "step": 114669 + }, + { + "epoch": 1.38, + "grad_norm": 9.659059252069392, + "learning_rate": 4.645862735852412e-06, + "loss": 1.503, + "step": 114672 + }, + { + "epoch": 1.38, + "grad_norm": 12.411449374933067, + "learning_rate": 4.6453693541030806e-06, + "loss": 1.3145, + "step": 114675 + }, + { + "epoch": 1.38, + "grad_norm": 11.26399748843219, + "learning_rate": 4.644875990627171e-06, + "loss": 1.081, + "step": 114678 + }, + { + "epoch": 1.38, + "grad_norm": 2.909726854932166, + "learning_rate": 4.644382645426375e-06, + "loss": 1.4236, + "step": 114681 + }, + { + "epoch": 1.38, + "grad_norm": 11.070437954967032, + "learning_rate": 4.643889318502367e-06, + "loss": 1.1606, + "step": 114684 + }, + { + "epoch": 1.38, + "grad_norm": 9.743430847783571, + "learning_rate": 4.64339600985684e-06, + "loss": 1.2702, + "step": 114687 + }, + { + "epoch": 1.38, + "grad_norm": 26.461602155480797, + "learning_rate": 4.642902719491469e-06, + "loss": 1.1195, + "step": 114690 + }, + { + "epoch": 1.38, + "grad_norm": 11.286843113760781, + "learning_rate": 4.642409447407942e-06, + "loss": 1.149, + "step": 114693 + }, + { + "epoch": 1.38, + "grad_norm": 33.482357315228164, + "learning_rate": 4.64191619360794e-06, + "loss": 1.0877, + "step": 114696 + }, + { + "epoch": 1.38, + "grad_norm": 5.678319912100491, + "learning_rate": 4.641422958093154e-06, + "loss": 1.2884, + "step": 114699 + }, + { + "epoch": 1.38, + "grad_norm": 10.97943400090985, + "learning_rate": 4.64092974086526e-06, + "loss": 0.9162, + "step": 114702 + }, + { + "epoch": 1.38, + "grad_norm": 15.707506025143921, + "learning_rate": 4.640436541925939e-06, + "loss": 1.2956, + "step": 114705 + }, + { + "epoch": 1.38, + "grad_norm": 21.94680952184101, + "learning_rate": 4.639943361276878e-06, + "loss": 1.0183, + "step": 114708 + }, + { + "epoch": 1.38, + "grad_norm": 6.9585832875166895, + "learning_rate": 4.639450198919759e-06, + "loss": 1.2466, + "step": 114711 + }, + { + "epoch": 1.38, + "grad_norm": 9.155554586648313, + "learning_rate": 4.638957054856269e-06, + "loss": 1.1754, + "step": 114714 + }, + { + "epoch": 1.38, + "grad_norm": 6.134112399068556, + "learning_rate": 4.638463929088085e-06, + "loss": 1.1884, + "step": 114717 + }, + { + "epoch": 1.38, + "grad_norm": 10.463142256683627, + "learning_rate": 4.637970821616892e-06, + "loss": 1.2293, + "step": 114720 + }, + { + "epoch": 1.38, + "grad_norm": 8.71425557185698, + "learning_rate": 4.6374777324443765e-06, + "loss": 1.4294, + "step": 114723 + }, + { + "epoch": 1.38, + "grad_norm": 7.333645312339833, + "learning_rate": 4.636984661572215e-06, + "loss": 1.0098, + "step": 114726 + }, + { + "epoch": 1.38, + "grad_norm": 13.072381354188389, + "learning_rate": 4.636491609002095e-06, + "loss": 1.4998, + "step": 114729 + }, + { + "epoch": 1.38, + "grad_norm": 9.096041411655753, + "learning_rate": 4.635998574735695e-06, + "loss": 1.2696, + "step": 114732 + }, + { + "epoch": 1.38, + "grad_norm": 3.309202485353315, + "learning_rate": 4.6355055587747e-06, + "loss": 1.2048, + "step": 114735 + }, + { + "epoch": 1.38, + "grad_norm": 5.54626097432598, + "learning_rate": 4.635012561120791e-06, + "loss": 1.2162, + "step": 114738 + }, + { + "epoch": 1.38, + "grad_norm": 21.352467082767244, + "learning_rate": 4.634519581775656e-06, + "loss": 1.3057, + "step": 114741 + }, + { + "epoch": 1.38, + "grad_norm": 4.720969459764015, + "learning_rate": 4.634026620740973e-06, + "loss": 1.3131, + "step": 114744 + }, + { + "epoch": 1.38, + "grad_norm": 9.916069001486541, + "learning_rate": 4.6335336780184214e-06, + "loss": 0.8444, + "step": 114747 + }, + { + "epoch": 1.38, + "grad_norm": 16.007932099725675, + "learning_rate": 4.633040753609685e-06, + "loss": 1.4048, + "step": 114750 + }, + { + "epoch": 1.38, + "grad_norm": 7.559794366491277, + "learning_rate": 4.632547847516448e-06, + "loss": 1.1397, + "step": 114753 + }, + { + "epoch": 1.38, + "grad_norm": 6.483852661006587, + "learning_rate": 4.6320549597403964e-06, + "loss": 0.982, + "step": 114756 + }, + { + "epoch": 1.38, + "grad_norm": 6.586526135573835, + "learning_rate": 4.631562090283202e-06, + "loss": 1.1819, + "step": 114759 + }, + { + "epoch": 1.38, + "grad_norm": 3.363975567106468, + "learning_rate": 4.631069239146555e-06, + "loss": 1.3998, + "step": 114762 + }, + { + "epoch": 1.38, + "grad_norm": 16.858078886047934, + "learning_rate": 4.630576406332136e-06, + "loss": 1.3929, + "step": 114765 + }, + { + "epoch": 1.38, + "grad_norm": 3.9311503511561465, + "learning_rate": 4.630083591841627e-06, + "loss": 1.0308, + "step": 114768 + }, + { + "epoch": 1.38, + "grad_norm": 46.72680817899717, + "learning_rate": 4.629590795676705e-06, + "loss": 0.709, + "step": 114771 + }, + { + "epoch": 1.38, + "grad_norm": 3.5085655310864974, + "learning_rate": 4.629098017839056e-06, + "loss": 1.139, + "step": 114774 + }, + { + "epoch": 1.38, + "grad_norm": 13.352497244358355, + "learning_rate": 4.628605258330358e-06, + "loss": 1.2015, + "step": 114777 + }, + { + "epoch": 1.38, + "grad_norm": 11.179361785572041, + "learning_rate": 4.6281125171522976e-06, + "loss": 1.409, + "step": 114780 + }, + { + "epoch": 1.38, + "grad_norm": 4.959000261834432, + "learning_rate": 4.627619794306558e-06, + "loss": 0.8339, + "step": 114783 + }, + { + "epoch": 1.38, + "grad_norm": 6.447260453966664, + "learning_rate": 4.627127089794816e-06, + "loss": 1.057, + "step": 114786 + }, + { + "epoch": 1.38, + "grad_norm": 9.907475885514593, + "learning_rate": 4.626634403618752e-06, + "loss": 1.1982, + "step": 114789 + }, + { + "epoch": 1.38, + "grad_norm": 14.158983338395027, + "learning_rate": 4.626141735780048e-06, + "loss": 1.3595, + "step": 114792 + }, + { + "epoch": 1.38, + "grad_norm": 11.23409544923301, + "learning_rate": 4.62564908628039e-06, + "loss": 1.1169, + "step": 114795 + }, + { + "epoch": 1.38, + "grad_norm": 9.948803892808904, + "learning_rate": 4.625156455121452e-06, + "loss": 0.9017, + "step": 114798 + }, + { + "epoch": 1.38, + "grad_norm": 4.197359541920443, + "learning_rate": 4.62466384230492e-06, + "loss": 1.1117, + "step": 114801 + }, + { + "epoch": 1.38, + "grad_norm": 42.0820020636159, + "learning_rate": 4.6241712478324726e-06, + "loss": 1.2153, + "step": 114804 + }, + { + "epoch": 1.38, + "grad_norm": 3.094405717873573, + "learning_rate": 4.623678671705797e-06, + "loss": 1.0124, + "step": 114807 + }, + { + "epoch": 1.38, + "grad_norm": 3.5600373337798907, + "learning_rate": 4.62318611392657e-06, + "loss": 1.1816, + "step": 114810 + }, + { + "epoch": 1.38, + "grad_norm": 5.958571823517699, + "learning_rate": 4.622693574496468e-06, + "loss": 1.0492, + "step": 114813 + }, + { + "epoch": 1.38, + "grad_norm": 15.238574365631143, + "learning_rate": 4.6222010534171755e-06, + "loss": 1.4706, + "step": 114816 + }, + { + "epoch": 1.38, + "grad_norm": 6.030287427590816, + "learning_rate": 4.621708550690373e-06, + "loss": 0.9371, + "step": 114819 + }, + { + "epoch": 1.38, + "grad_norm": 8.937272234480226, + "learning_rate": 4.621216066317748e-06, + "loss": 1.2406, + "step": 114822 + }, + { + "epoch": 1.38, + "grad_norm": 26.7032027825757, + "learning_rate": 4.62072360030097e-06, + "loss": 1.2078, + "step": 114825 + }, + { + "epoch": 1.38, + "grad_norm": 6.99725840197799, + "learning_rate": 4.620231152641729e-06, + "loss": 1.048, + "step": 114828 + }, + { + "epoch": 1.38, + "grad_norm": 16.71775300554322, + "learning_rate": 4.6197387233416964e-06, + "loss": 1.5288, + "step": 114831 + }, + { + "epoch": 1.38, + "grad_norm": 10.729331721610588, + "learning_rate": 4.619246312402559e-06, + "loss": 1.3147, + "step": 114834 + }, + { + "epoch": 1.38, + "grad_norm": 14.46218213112821, + "learning_rate": 4.618753919825999e-06, + "loss": 1.0851, + "step": 114837 + }, + { + "epoch": 1.38, + "grad_norm": 8.76281903959542, + "learning_rate": 4.618261545613689e-06, + "loss": 1.3, + "step": 114840 + }, + { + "epoch": 1.38, + "grad_norm": 3.1836147772935863, + "learning_rate": 4.617769189767315e-06, + "loss": 1.3676, + "step": 114843 + }, + { + "epoch": 1.38, + "grad_norm": 4.4239508323461445, + "learning_rate": 4.6172768522885555e-06, + "loss": 1.2475, + "step": 114846 + }, + { + "epoch": 1.38, + "grad_norm": 11.465928374070494, + "learning_rate": 4.616784533179096e-06, + "loss": 1.3124, + "step": 114849 + }, + { + "epoch": 1.38, + "grad_norm": 13.951993681422893, + "learning_rate": 4.616292232440611e-06, + "loss": 1.0274, + "step": 114852 + }, + { + "epoch": 1.38, + "grad_norm": 5.373971185853479, + "learning_rate": 4.615799950074778e-06, + "loss": 0.8523, + "step": 114855 + }, + { + "epoch": 1.38, + "grad_norm": 9.175841399984822, + "learning_rate": 4.615307686083279e-06, + "loss": 1.3749, + "step": 114858 + }, + { + "epoch": 1.38, + "grad_norm": 3.5431297290746158, + "learning_rate": 4.614815440467797e-06, + "loss": 0.9496, + "step": 114861 + }, + { + "epoch": 1.38, + "grad_norm": 13.139696058179375, + "learning_rate": 4.614323213230013e-06, + "loss": 0.8156, + "step": 114864 + }, + { + "epoch": 1.38, + "grad_norm": 12.62571339680627, + "learning_rate": 4.613831004371601e-06, + "loss": 1.6961, + "step": 114867 + }, + { + "epoch": 1.38, + "grad_norm": 14.423315844014533, + "learning_rate": 4.613338813894247e-06, + "loss": 1.7377, + "step": 114870 + }, + { + "epoch": 1.38, + "grad_norm": 7.817826487165227, + "learning_rate": 4.612846641799623e-06, + "loss": 1.0268, + "step": 114873 + }, + { + "epoch": 1.38, + "grad_norm": 4.44137131087793, + "learning_rate": 4.612354488089418e-06, + "loss": 0.9053, + "step": 114876 + }, + { + "epoch": 1.38, + "grad_norm": 8.822610865754367, + "learning_rate": 4.6118623527653015e-06, + "loss": 1.0248, + "step": 114879 + }, + { + "epoch": 1.38, + "grad_norm": 16.77443422391473, + "learning_rate": 4.611370235828959e-06, + "loss": 1.138, + "step": 114882 + }, + { + "epoch": 1.38, + "grad_norm": 7.986811241395923, + "learning_rate": 4.610878137282069e-06, + "loss": 1.2112, + "step": 114885 + }, + { + "epoch": 1.38, + "grad_norm": 5.619596034371428, + "learning_rate": 4.610386057126309e-06, + "loss": 1.0151, + "step": 114888 + }, + { + "epoch": 1.38, + "grad_norm": 3.692358508449699, + "learning_rate": 4.609893995363366e-06, + "loss": 1.142, + "step": 114891 + }, + { + "epoch": 1.38, + "grad_norm": 5.66162603347497, + "learning_rate": 4.609401951994912e-06, + "loss": 1.2955, + "step": 114894 + }, + { + "epoch": 1.38, + "grad_norm": 8.163046374230161, + "learning_rate": 4.608909927022624e-06, + "loss": 1.7947, + "step": 114897 + }, + { + "epoch": 1.38, + "grad_norm": 11.945373270389924, + "learning_rate": 4.608417920448185e-06, + "loss": 0.9894, + "step": 114900 + }, + { + "epoch": 1.38, + "grad_norm": 5.774589732851362, + "learning_rate": 4.607925932273276e-06, + "loss": 1.2205, + "step": 114903 + }, + { + "epoch": 1.38, + "grad_norm": 37.4804476286097, + "learning_rate": 4.607433962499571e-06, + "loss": 1.2772, + "step": 114906 + }, + { + "epoch": 1.38, + "grad_norm": 9.501453503195464, + "learning_rate": 4.6069420111287515e-06, + "loss": 1.0645, + "step": 114909 + }, + { + "epoch": 1.38, + "grad_norm": 10.453252019213814, + "learning_rate": 4.6064500781625e-06, + "loss": 0.9204, + "step": 114912 + }, + { + "epoch": 1.38, + "grad_norm": 9.641688735162475, + "learning_rate": 4.605958163602487e-06, + "loss": 0.908, + "step": 114915 + }, + { + "epoch": 1.38, + "grad_norm": 4.556132563937909, + "learning_rate": 4.605466267450401e-06, + "loss": 1.174, + "step": 114918 + }, + { + "epoch": 1.38, + "grad_norm": 8.110471462950908, + "learning_rate": 4.604974389707911e-06, + "loss": 1.098, + "step": 114921 + }, + { + "epoch": 1.38, + "grad_norm": 13.469217183501293, + "learning_rate": 4.604482530376701e-06, + "loss": 1.1465, + "step": 114924 + }, + { + "epoch": 1.38, + "grad_norm": 6.9522452368164185, + "learning_rate": 4.603990689458447e-06, + "loss": 1.1125, + "step": 114927 + }, + { + "epoch": 1.38, + "grad_norm": 22.486529350507126, + "learning_rate": 4.603498866954834e-06, + "loss": 1.2694, + "step": 114930 + }, + { + "epoch": 1.38, + "grad_norm": 57.328116731212205, + "learning_rate": 4.603007062867531e-06, + "loss": 1.6081, + "step": 114933 + }, + { + "epoch": 1.38, + "grad_norm": 18.74670519439516, + "learning_rate": 4.602515277198226e-06, + "loss": 1.394, + "step": 114936 + }, + { + "epoch": 1.38, + "grad_norm": 6.782498030531776, + "learning_rate": 4.602023509948588e-06, + "loss": 1.4096, + "step": 114939 + }, + { + "epoch": 1.38, + "grad_norm": 11.10737082976818, + "learning_rate": 4.601531761120299e-06, + "loss": 1.2306, + "step": 114942 + }, + { + "epoch": 1.38, + "grad_norm": 11.936837966275283, + "learning_rate": 4.601040030715042e-06, + "loss": 1.3198, + "step": 114945 + }, + { + "epoch": 1.38, + "grad_norm": 12.87384988435337, + "learning_rate": 4.6005483187344855e-06, + "loss": 1.5722, + "step": 114948 + }, + { + "epoch": 1.38, + "grad_norm": 5.245187042378497, + "learning_rate": 4.600056625180314e-06, + "loss": 1.2253, + "step": 114951 + }, + { + "epoch": 1.38, + "grad_norm": 9.506104956245105, + "learning_rate": 4.599564950054204e-06, + "loss": 1.0361, + "step": 114954 + }, + { + "epoch": 1.38, + "grad_norm": 12.464745905918392, + "learning_rate": 4.599073293357838e-06, + "loss": 1.2562, + "step": 114957 + }, + { + "epoch": 1.38, + "grad_norm": 8.66162503199053, + "learning_rate": 4.598581655092888e-06, + "loss": 1.1134, + "step": 114960 + }, + { + "epoch": 1.38, + "grad_norm": 3.3373848618488147, + "learning_rate": 4.598090035261029e-06, + "loss": 1.2437, + "step": 114963 + }, + { + "epoch": 1.38, + "grad_norm": 8.407195034850215, + "learning_rate": 4.597598433863944e-06, + "loss": 1.1529, + "step": 114966 + }, + { + "epoch": 1.38, + "grad_norm": 4.473627505392651, + "learning_rate": 4.59710685090331e-06, + "loss": 1.5421, + "step": 114969 + }, + { + "epoch": 1.38, + "grad_norm": 3.257525095916212, + "learning_rate": 4.596615286380808e-06, + "loss": 1.5291, + "step": 114972 + }, + { + "epoch": 1.38, + "grad_norm": 8.492589251064036, + "learning_rate": 4.596123740298107e-06, + "loss": 1.4391, + "step": 114975 + }, + { + "epoch": 1.38, + "grad_norm": 3.8908490035198655, + "learning_rate": 4.5956322126568944e-06, + "loss": 0.8631, + "step": 114978 + }, + { + "epoch": 1.38, + "grad_norm": 9.216461308150521, + "learning_rate": 4.595140703458837e-06, + "loss": 1.3156, + "step": 114981 + }, + { + "epoch": 1.38, + "grad_norm": 26.921337436399522, + "learning_rate": 4.594649212705623e-06, + "loss": 1.3137, + "step": 114984 + }, + { + "epoch": 1.38, + "grad_norm": 7.95420702035139, + "learning_rate": 4.594157740398919e-06, + "loss": 1.062, + "step": 114987 + }, + { + "epoch": 1.38, + "grad_norm": 24.663660662989493, + "learning_rate": 4.593666286540408e-06, + "loss": 1.3846, + "step": 114990 + }, + { + "epoch": 1.38, + "grad_norm": 10.925425039284697, + "learning_rate": 4.593174851131767e-06, + "loss": 0.9258, + "step": 114993 + }, + { + "epoch": 1.38, + "grad_norm": 4.614655892280016, + "learning_rate": 4.592683434174672e-06, + "loss": 1.1264, + "step": 114996 + }, + { + "epoch": 1.38, + "grad_norm": 4.864189400703745, + "learning_rate": 4.592192035670805e-06, + "loss": 1.1785, + "step": 114999 + }, + { + "epoch": 1.38, + "grad_norm": 2.9026166075957183, + "learning_rate": 4.591700655621839e-06, + "loss": 0.8455, + "step": 115002 + }, + { + "epoch": 1.38, + "grad_norm": 34.3848480710239, + "learning_rate": 4.5912092940294465e-06, + "loss": 0.9219, + "step": 115005 + }, + { + "epoch": 1.38, + "grad_norm": 11.843661987527152, + "learning_rate": 4.590717950895308e-06, + "loss": 1.2164, + "step": 115008 + }, + { + "epoch": 1.38, + "grad_norm": 13.556308151123819, + "learning_rate": 4.590226626221105e-06, + "loss": 1.4154, + "step": 115011 + }, + { + "epoch": 1.38, + "grad_norm": 2.91417259985369, + "learning_rate": 4.5897353200085056e-06, + "loss": 1.0827, + "step": 115014 + }, + { + "epoch": 1.38, + "grad_norm": 6.467133458748462, + "learning_rate": 4.589244032259192e-06, + "loss": 1.5221, + "step": 115017 + }, + { + "epoch": 1.38, + "grad_norm": 6.562328841689012, + "learning_rate": 4.588752762974842e-06, + "loss": 1.3915, + "step": 115020 + }, + { + "epoch": 1.38, + "grad_norm": 20.47828493596482, + "learning_rate": 4.588261512157126e-06, + "loss": 1.0425, + "step": 115023 + }, + { + "epoch": 1.38, + "grad_norm": 12.307504732252921, + "learning_rate": 4.587770279807729e-06, + "loss": 1.1825, + "step": 115026 + }, + { + "epoch": 1.38, + "grad_norm": 5.399986008935731, + "learning_rate": 4.5872790659283175e-06, + "loss": 1.1983, + "step": 115029 + }, + { + "epoch": 1.38, + "grad_norm": 21.80439543087356, + "learning_rate": 4.586787870520574e-06, + "loss": 0.9669, + "step": 115032 + }, + { + "epoch": 1.38, + "grad_norm": 10.337749279996903, + "learning_rate": 4.586296693586173e-06, + "loss": 1.1519, + "step": 115035 + }, + { + "epoch": 1.38, + "grad_norm": 16.774775331169817, + "learning_rate": 4.5858055351267945e-06, + "loss": 1.1873, + "step": 115038 + }, + { + "epoch": 1.38, + "grad_norm": 11.324229374630018, + "learning_rate": 4.585314395144109e-06, + "loss": 1.2706, + "step": 115041 + }, + { + "epoch": 1.38, + "grad_norm": 22.070519349722154, + "learning_rate": 4.584823273639799e-06, + "loss": 1.273, + "step": 115044 + }, + { + "epoch": 1.38, + "grad_norm": 10.147869947930682, + "learning_rate": 4.584332170615532e-06, + "loss": 1.1634, + "step": 115047 + }, + { + "epoch": 1.38, + "grad_norm": 4.813826484301463, + "learning_rate": 4.58384108607299e-06, + "loss": 0.7994, + "step": 115050 + }, + { + "epoch": 1.38, + "grad_norm": 3.944145060626971, + "learning_rate": 4.583350020013849e-06, + "loss": 1.0564, + "step": 115053 + }, + { + "epoch": 1.38, + "grad_norm": 10.804317757433795, + "learning_rate": 4.582858972439781e-06, + "loss": 1.3658, + "step": 115056 + }, + { + "epoch": 1.38, + "grad_norm": 4.832225759247885, + "learning_rate": 4.582367943352464e-06, + "loss": 1.3516, + "step": 115059 + }, + { + "epoch": 1.38, + "grad_norm": 3.6464135959560595, + "learning_rate": 4.581876932753577e-06, + "loss": 1.091, + "step": 115062 + }, + { + "epoch": 1.38, + "grad_norm": 22.850752645554433, + "learning_rate": 4.581385940644794e-06, + "loss": 1.048, + "step": 115065 + }, + { + "epoch": 1.38, + "grad_norm": 3.901534099956024, + "learning_rate": 4.580894967027783e-06, + "loss": 0.9558, + "step": 115068 + }, + { + "epoch": 1.38, + "grad_norm": 3.500939195095638, + "learning_rate": 4.580404011904227e-06, + "loss": 0.9445, + "step": 115071 + }, + { + "epoch": 1.38, + "grad_norm": 8.192041867834735, + "learning_rate": 4.579913075275799e-06, + "loss": 1.0307, + "step": 115074 + }, + { + "epoch": 1.38, + "grad_norm": 5.049648443337515, + "learning_rate": 4.579422157144176e-06, + "loss": 1.1608, + "step": 115077 + }, + { + "epoch": 1.38, + "grad_norm": 11.100126405722948, + "learning_rate": 4.578931257511037e-06, + "loss": 1.2385, + "step": 115080 + }, + { + "epoch": 1.38, + "grad_norm": 9.739531328532328, + "learning_rate": 4.578440376378049e-06, + "loss": 1.411, + "step": 115083 + }, + { + "epoch": 1.38, + "grad_norm": 6.891276943368445, + "learning_rate": 4.577949513746894e-06, + "loss": 0.9685, + "step": 115086 + }, + { + "epoch": 1.38, + "grad_norm": 12.700942530949362, + "learning_rate": 4.577458669619242e-06, + "loss": 1.1245, + "step": 115089 + }, + { + "epoch": 1.38, + "grad_norm": 12.580243984422365, + "learning_rate": 4.576967843996772e-06, + "loss": 1.0678, + "step": 115092 + }, + { + "epoch": 1.38, + "grad_norm": 191.92422177982613, + "learning_rate": 4.576477036881155e-06, + "loss": 1.1421, + "step": 115095 + }, + { + "epoch": 1.38, + "grad_norm": 15.525616968996161, + "learning_rate": 4.575986248274068e-06, + "loss": 1.151, + "step": 115098 + }, + { + "epoch": 1.38, + "grad_norm": 15.631501997946852, + "learning_rate": 4.575495478177186e-06, + "loss": 0.9141, + "step": 115101 + }, + { + "epoch": 1.38, + "grad_norm": 14.806725864496084, + "learning_rate": 4.5750047265921885e-06, + "loss": 1.5628, + "step": 115104 + }, + { + "epoch": 1.38, + "grad_norm": 19.601856181371904, + "learning_rate": 4.574513993520746e-06, + "loss": 1.2644, + "step": 115107 + }, + { + "epoch": 1.38, + "grad_norm": 10.910658812993667, + "learning_rate": 4.574023278964528e-06, + "loss": 1.0009, + "step": 115110 + }, + { + "epoch": 1.38, + "grad_norm": 4.309961314860788, + "learning_rate": 4.573532582925215e-06, + "loss": 1.1523, + "step": 115113 + }, + { + "epoch": 1.38, + "grad_norm": 26.800485921179334, + "learning_rate": 4.57304190540448e-06, + "loss": 1.4039, + "step": 115116 + }, + { + "epoch": 1.38, + "grad_norm": 11.708093679703143, + "learning_rate": 4.572551246404003e-06, + "loss": 1.3402, + "step": 115119 + }, + { + "epoch": 1.38, + "grad_norm": 9.384852585642475, + "learning_rate": 4.5720606059254495e-06, + "loss": 1.261, + "step": 115122 + }, + { + "epoch": 1.38, + "grad_norm": 8.668026032485791, + "learning_rate": 4.571569983970498e-06, + "loss": 1.3778, + "step": 115125 + }, + { + "epoch": 1.38, + "grad_norm": 8.85451919435501, + "learning_rate": 4.571079380540826e-06, + "loss": 1.2329, + "step": 115128 + }, + { + "epoch": 1.38, + "grad_norm": 12.195263833427806, + "learning_rate": 4.570588795638101e-06, + "loss": 1.3228, + "step": 115131 + }, + { + "epoch": 1.38, + "grad_norm": 4.384239322083028, + "learning_rate": 4.570098229264005e-06, + "loss": 1.1206, + "step": 115134 + }, + { + "epoch": 1.38, + "grad_norm": 6.370887383486978, + "learning_rate": 4.569607681420204e-06, + "loss": 1.391, + "step": 115137 + }, + { + "epoch": 1.38, + "grad_norm": 4.9064005087195195, + "learning_rate": 4.569117152108376e-06, + "loss": 1.4572, + "step": 115140 + }, + { + "epoch": 1.38, + "grad_norm": 13.69269134213113, + "learning_rate": 4.568626641330195e-06, + "loss": 1.087, + "step": 115143 + }, + { + "epoch": 1.38, + "grad_norm": 3.6056319872756477, + "learning_rate": 4.568136149087338e-06, + "loss": 1.3238, + "step": 115146 + }, + { + "epoch": 1.38, + "grad_norm": 6.6119523564471105, + "learning_rate": 4.5676456753814756e-06, + "loss": 1.2596, + "step": 115149 + }, + { + "epoch": 1.38, + "grad_norm": 3.571911223257187, + "learning_rate": 4.56715522021428e-06, + "loss": 1.2787, + "step": 115152 + }, + { + "epoch": 1.38, + "grad_norm": 10.249732314944266, + "learning_rate": 4.566664783587425e-06, + "loss": 1.0201, + "step": 115155 + }, + { + "epoch": 1.38, + "grad_norm": 25.451361653131, + "learning_rate": 4.566174365502586e-06, + "loss": 1.3961, + "step": 115158 + }, + { + "epoch": 1.38, + "grad_norm": 7.3171978656748795, + "learning_rate": 4.565683965961441e-06, + "loss": 1.0403, + "step": 115161 + }, + { + "epoch": 1.38, + "grad_norm": 6.718526397375319, + "learning_rate": 4.565193584965656e-06, + "loss": 1.1861, + "step": 115164 + }, + { + "epoch": 1.38, + "grad_norm": 12.926592930062803, + "learning_rate": 4.564703222516907e-06, + "loss": 0.9412, + "step": 115167 + }, + { + "epoch": 1.38, + "grad_norm": 17.456637308201596, + "learning_rate": 4.564212878616872e-06, + "loss": 1.4849, + "step": 115170 + }, + { + "epoch": 1.38, + "grad_norm": 5.675559086709526, + "learning_rate": 4.563722553267219e-06, + "loss": 1.1909, + "step": 115173 + }, + { + "epoch": 1.38, + "grad_norm": 17.46533222294797, + "learning_rate": 4.563232246469621e-06, + "loss": 1.6808, + "step": 115176 + }, + { + "epoch": 1.38, + "grad_norm": 6.034177542383605, + "learning_rate": 4.562741958225752e-06, + "loss": 0.8617, + "step": 115179 + }, + { + "epoch": 1.39, + "grad_norm": 14.688064047705845, + "learning_rate": 4.562251688537287e-06, + "loss": 1.1398, + "step": 115182 + }, + { + "epoch": 1.39, + "grad_norm": 6.059896919263744, + "learning_rate": 4.561761437405898e-06, + "loss": 1.1091, + "step": 115185 + }, + { + "epoch": 1.39, + "grad_norm": 11.749089377876555, + "learning_rate": 4.561271204833262e-06, + "loss": 1.4062, + "step": 115188 + }, + { + "epoch": 1.39, + "grad_norm": 8.695506285235467, + "learning_rate": 4.560780990821045e-06, + "loss": 1.0935, + "step": 115191 + }, + { + "epoch": 1.39, + "grad_norm": 6.929032121502782, + "learning_rate": 4.560290795370927e-06, + "loss": 0.9681, + "step": 115194 + }, + { + "epoch": 1.39, + "grad_norm": 20.58856458433065, + "learning_rate": 4.559800618484573e-06, + "loss": 1.1919, + "step": 115197 + }, + { + "epoch": 1.39, + "grad_norm": 11.034797675815497, + "learning_rate": 4.559310460163663e-06, + "loss": 1.3536, + "step": 115200 + }, + { + "epoch": 1.39, + "grad_norm": 10.4042012637252, + "learning_rate": 4.5588203204098635e-06, + "loss": 1.0032, + "step": 115203 + }, + { + "epoch": 1.39, + "grad_norm": 7.376090467156567, + "learning_rate": 4.558330199224851e-06, + "loss": 1.0073, + "step": 115206 + }, + { + "epoch": 1.39, + "grad_norm": 9.961621548113042, + "learning_rate": 4.557840096610296e-06, + "loss": 0.9929, + "step": 115209 + }, + { + "epoch": 1.39, + "grad_norm": 7.891640955834422, + "learning_rate": 4.557350012567878e-06, + "loss": 1.4121, + "step": 115212 + }, + { + "epoch": 1.39, + "grad_norm": 21.451918354509072, + "learning_rate": 4.556859947099262e-06, + "loss": 1.3023, + "step": 115215 + }, + { + "epoch": 1.39, + "grad_norm": 6.709922551671266, + "learning_rate": 4.556369900206121e-06, + "loss": 1.0934, + "step": 115218 + }, + { + "epoch": 1.39, + "grad_norm": 11.2251077073016, + "learning_rate": 4.555879871890128e-06, + "loss": 0.9334, + "step": 115221 + }, + { + "epoch": 1.39, + "grad_norm": 14.762481427194151, + "learning_rate": 4.555389862152955e-06, + "loss": 1.2817, + "step": 115224 + }, + { + "epoch": 1.39, + "grad_norm": 22.755294891394364, + "learning_rate": 4.55489987099628e-06, + "loss": 0.7633, + "step": 115227 + }, + { + "epoch": 1.39, + "grad_norm": 10.872994724678948, + "learning_rate": 4.554409898421766e-06, + "loss": 1.1753, + "step": 115230 + }, + { + "epoch": 1.39, + "grad_norm": 12.712948254926847, + "learning_rate": 4.55391994443109e-06, + "loss": 1.0243, + "step": 115233 + }, + { + "epoch": 1.39, + "grad_norm": 28.221038110477476, + "learning_rate": 4.553430009025926e-06, + "loss": 1.3731, + "step": 115236 + }, + { + "epoch": 1.39, + "grad_norm": 14.357236913226423, + "learning_rate": 4.552940092207942e-06, + "loss": 1.4766, + "step": 115239 + }, + { + "epoch": 1.39, + "grad_norm": 4.802202322076003, + "learning_rate": 4.552450193978814e-06, + "loss": 1.1676, + "step": 115242 + }, + { + "epoch": 1.39, + "grad_norm": 4.8881675972412175, + "learning_rate": 4.551960314340207e-06, + "loss": 1.1604, + "step": 115245 + }, + { + "epoch": 1.39, + "grad_norm": 16.71461649469769, + "learning_rate": 4.551470453293798e-06, + "loss": 1.1479, + "step": 115248 + }, + { + "epoch": 1.39, + "grad_norm": 11.715765359647602, + "learning_rate": 4.550980610841258e-06, + "loss": 1.2441, + "step": 115251 + }, + { + "epoch": 1.39, + "grad_norm": 9.421702859195149, + "learning_rate": 4.550490786984261e-06, + "loss": 1.3392, + "step": 115254 + }, + { + "epoch": 1.39, + "grad_norm": 6.632300911755239, + "learning_rate": 4.550000981724477e-06, + "loss": 1.2311, + "step": 115257 + }, + { + "epoch": 1.39, + "grad_norm": 2.650409401532458, + "learning_rate": 4.549511195063573e-06, + "loss": 0.6614, + "step": 115260 + }, + { + "epoch": 1.39, + "grad_norm": 22.63410655329442, + "learning_rate": 4.549021427003223e-06, + "loss": 1.1497, + "step": 115263 + }, + { + "epoch": 1.39, + "grad_norm": 15.399543378499617, + "learning_rate": 4.548531677545101e-06, + "loss": 1.3241, + "step": 115266 + }, + { + "epoch": 1.39, + "grad_norm": 4.609226353477644, + "learning_rate": 4.548041946690879e-06, + "loss": 1.3103, + "step": 115269 + }, + { + "epoch": 1.39, + "grad_norm": 21.453943791237954, + "learning_rate": 4.547552234442223e-06, + "loss": 1.4923, + "step": 115272 + }, + { + "epoch": 1.39, + "grad_norm": 14.586200637463493, + "learning_rate": 4.547062540800806e-06, + "loss": 1.4325, + "step": 115275 + }, + { + "epoch": 1.39, + "grad_norm": 5.523118386023868, + "learning_rate": 4.546572865768306e-06, + "loss": 1.1944, + "step": 115278 + }, + { + "epoch": 1.39, + "grad_norm": 10.17201220640533, + "learning_rate": 4.546083209346388e-06, + "loss": 0.7984, + "step": 115281 + }, + { + "epoch": 1.39, + "grad_norm": 43.13002938385763, + "learning_rate": 4.54559357153672e-06, + "loss": 1.3593, + "step": 115284 + }, + { + "epoch": 1.39, + "grad_norm": 6.248354574701386, + "learning_rate": 4.545103952340975e-06, + "loss": 1.0769, + "step": 115287 + }, + { + "epoch": 1.39, + "grad_norm": 10.461510571233003, + "learning_rate": 4.544614351760827e-06, + "loss": 0.8479, + "step": 115290 + }, + { + "epoch": 1.39, + "grad_norm": 28.462302645158964, + "learning_rate": 4.544124769797944e-06, + "loss": 0.9787, + "step": 115293 + }, + { + "epoch": 1.39, + "grad_norm": 5.669643251265956, + "learning_rate": 4.543635206454002e-06, + "loss": 1.0837, + "step": 115296 + }, + { + "epoch": 1.39, + "grad_norm": 3.3388386991895778, + "learning_rate": 4.543145661730668e-06, + "loss": 1.355, + "step": 115299 + }, + { + "epoch": 1.39, + "grad_norm": 11.098645615222269, + "learning_rate": 4.5426561356296084e-06, + "loss": 1.4385, + "step": 115302 + }, + { + "epoch": 1.39, + "grad_norm": 5.488875200647888, + "learning_rate": 4.542166628152498e-06, + "loss": 1.1187, + "step": 115305 + }, + { + "epoch": 1.39, + "grad_norm": 11.364253814018776, + "learning_rate": 4.54167713930101e-06, + "loss": 0.986, + "step": 115308 + }, + { + "epoch": 1.39, + "grad_norm": 10.498327985617598, + "learning_rate": 4.54118766907681e-06, + "loss": 1.157, + "step": 115311 + }, + { + "epoch": 1.39, + "grad_norm": 4.127461496224789, + "learning_rate": 4.540698217481568e-06, + "loss": 1.397, + "step": 115314 + }, + { + "epoch": 1.39, + "grad_norm": 13.424857422941843, + "learning_rate": 4.540208784516958e-06, + "loss": 1.4047, + "step": 115317 + }, + { + "epoch": 1.39, + "grad_norm": 23.630445090944146, + "learning_rate": 4.539719370184652e-06, + "loss": 1.588, + "step": 115320 + }, + { + "epoch": 1.39, + "grad_norm": 5.444159058555981, + "learning_rate": 4.5392299744863165e-06, + "loss": 1.013, + "step": 115323 + }, + { + "epoch": 1.39, + "grad_norm": 7.725234357891096, + "learning_rate": 4.538740597423619e-06, + "loss": 1.1609, + "step": 115326 + }, + { + "epoch": 1.39, + "grad_norm": 9.918642117719365, + "learning_rate": 4.538251238998232e-06, + "loss": 1.3839, + "step": 115329 + }, + { + "epoch": 1.39, + "grad_norm": 10.510186002549931, + "learning_rate": 4.537761899211827e-06, + "loss": 0.8492, + "step": 115332 + }, + { + "epoch": 1.39, + "grad_norm": 9.07875003119367, + "learning_rate": 4.537272578066076e-06, + "loss": 1.0746, + "step": 115335 + }, + { + "epoch": 1.39, + "grad_norm": 9.677002690485532, + "learning_rate": 4.536783275562644e-06, + "loss": 0.9851, + "step": 115338 + }, + { + "epoch": 1.39, + "grad_norm": 40.259165943086586, + "learning_rate": 4.536293991703206e-06, + "loss": 1.0303, + "step": 115341 + }, + { + "epoch": 1.39, + "grad_norm": 9.338602679397772, + "learning_rate": 4.5358047264894235e-06, + "loss": 1.2299, + "step": 115344 + }, + { + "epoch": 1.39, + "grad_norm": 23.38476111929891, + "learning_rate": 4.535315479922972e-06, + "loss": 1.3459, + "step": 115347 + }, + { + "epoch": 1.39, + "grad_norm": 7.586219679534643, + "learning_rate": 4.5348262520055245e-06, + "loss": 1.351, + "step": 115350 + }, + { + "epoch": 1.39, + "grad_norm": 7.652469140968721, + "learning_rate": 4.534337042738743e-06, + "loss": 1.3702, + "step": 115353 + }, + { + "epoch": 1.39, + "grad_norm": 3.4390545539975537, + "learning_rate": 4.533847852124299e-06, + "loss": 1.5314, + "step": 115356 + }, + { + "epoch": 1.39, + "grad_norm": 11.002396455172375, + "learning_rate": 4.533358680163865e-06, + "loss": 1.3312, + "step": 115359 + }, + { + "epoch": 1.39, + "grad_norm": 3.9575108595850623, + "learning_rate": 4.532869526859112e-06, + "loss": 1.0785, + "step": 115362 + }, + { + "epoch": 1.39, + "grad_norm": 22.20297488403117, + "learning_rate": 4.532380392211706e-06, + "loss": 1.6217, + "step": 115365 + }, + { + "epoch": 1.39, + "grad_norm": 30.063802140544816, + "learning_rate": 4.5318912762233125e-06, + "loss": 1.444, + "step": 115368 + }, + { + "epoch": 1.39, + "grad_norm": 31.50618543334302, + "learning_rate": 4.531402178895604e-06, + "loss": 1.3221, + "step": 115371 + }, + { + "epoch": 1.39, + "grad_norm": 7.038057957582748, + "learning_rate": 4.530913100230251e-06, + "loss": 1.1681, + "step": 115374 + }, + { + "epoch": 1.39, + "grad_norm": 4.88557391180678, + "learning_rate": 4.530424040228925e-06, + "loss": 1.1469, + "step": 115377 + }, + { + "epoch": 1.39, + "grad_norm": 4.348386108978138, + "learning_rate": 4.5299349988932874e-06, + "loss": 1.3585, + "step": 115380 + }, + { + "epoch": 1.39, + "grad_norm": 13.7114652454497, + "learning_rate": 4.529445976225015e-06, + "loss": 1.3032, + "step": 115383 + }, + { + "epoch": 1.39, + "grad_norm": 7.840961507604821, + "learning_rate": 4.528956972225769e-06, + "loss": 1.1334, + "step": 115386 + }, + { + "epoch": 1.39, + "grad_norm": 8.164794170497954, + "learning_rate": 4.528467986897227e-06, + "loss": 1.427, + "step": 115389 + }, + { + "epoch": 1.39, + "grad_norm": 9.429666933245183, + "learning_rate": 4.527979020241049e-06, + "loss": 1.0032, + "step": 115392 + }, + { + "epoch": 1.39, + "grad_norm": 12.07313084204833, + "learning_rate": 4.527490072258907e-06, + "loss": 0.9833, + "step": 115395 + }, + { + "epoch": 1.39, + "grad_norm": 13.878817080405275, + "learning_rate": 4.527001142952471e-06, + "loss": 1.3412, + "step": 115398 + }, + { + "epoch": 1.39, + "grad_norm": 14.08699649711067, + "learning_rate": 4.5265122323234074e-06, + "loss": 1.3587, + "step": 115401 + }, + { + "epoch": 1.39, + "grad_norm": 16.014955943565493, + "learning_rate": 4.526023340373391e-06, + "loss": 0.9174, + "step": 115404 + }, + { + "epoch": 1.39, + "grad_norm": 8.005404907702756, + "learning_rate": 4.525534467104086e-06, + "loss": 1.1726, + "step": 115407 + }, + { + "epoch": 1.39, + "grad_norm": 11.02889052597447, + "learning_rate": 4.525045612517155e-06, + "loss": 1.3592, + "step": 115410 + }, + { + "epoch": 1.39, + "grad_norm": 6.431852902376615, + "learning_rate": 4.524556776614271e-06, + "loss": 1.0502, + "step": 115413 + }, + { + "epoch": 1.39, + "grad_norm": 9.56585072947466, + "learning_rate": 4.524067959397106e-06, + "loss": 1.0891, + "step": 115416 + }, + { + "epoch": 1.39, + "grad_norm": 3.70050276500727, + "learning_rate": 4.523579160867322e-06, + "loss": 1.4455, + "step": 115419 + }, + { + "epoch": 1.39, + "grad_norm": 7.49660999293048, + "learning_rate": 4.523090381026589e-06, + "loss": 1.0851, + "step": 115422 + }, + { + "epoch": 1.39, + "grad_norm": 13.15661227646142, + "learning_rate": 4.52260161987658e-06, + "loss": 1.2249, + "step": 115425 + }, + { + "epoch": 1.39, + "grad_norm": 8.942183809885622, + "learning_rate": 4.522112877418955e-06, + "loss": 1.2538, + "step": 115428 + }, + { + "epoch": 1.39, + "grad_norm": 4.824003472087816, + "learning_rate": 4.521624153655388e-06, + "loss": 1.2487, + "step": 115431 + }, + { + "epoch": 1.39, + "grad_norm": 8.850779073851548, + "learning_rate": 4.521135448587542e-06, + "loss": 1.3055, + "step": 115434 + }, + { + "epoch": 1.39, + "grad_norm": 12.0009483881019, + "learning_rate": 4.5206467622170865e-06, + "loss": 1.3969, + "step": 115437 + }, + { + "epoch": 1.39, + "grad_norm": 7.042644474718345, + "learning_rate": 4.520158094545691e-06, + "loss": 1.0021, + "step": 115440 + }, + { + "epoch": 1.39, + "grad_norm": 22.467351802549814, + "learning_rate": 4.519669445575025e-06, + "loss": 1.6048, + "step": 115443 + }, + { + "epoch": 1.39, + "grad_norm": 24.397657638470456, + "learning_rate": 4.519180815306751e-06, + "loss": 0.8929, + "step": 115446 + }, + { + "epoch": 1.39, + "grad_norm": 8.417300525514245, + "learning_rate": 4.518692203742541e-06, + "loss": 1.2606, + "step": 115449 + }, + { + "epoch": 1.39, + "grad_norm": 6.003746964368415, + "learning_rate": 4.5182036108840555e-06, + "loss": 1.2553, + "step": 115452 + }, + { + "epoch": 1.39, + "grad_norm": 4.654592785886889, + "learning_rate": 4.517715036732968e-06, + "loss": 0.8508, + "step": 115455 + }, + { + "epoch": 1.39, + "grad_norm": 12.617051279218298, + "learning_rate": 4.5172264812909486e-06, + "loss": 1.0398, + "step": 115458 + }, + { + "epoch": 1.39, + "grad_norm": 15.611686842046485, + "learning_rate": 4.516737944559655e-06, + "loss": 1.1442, + "step": 115461 + }, + { + "epoch": 1.39, + "grad_norm": 5.210473579441572, + "learning_rate": 4.516249426540761e-06, + "loss": 1.093, + "step": 115464 + }, + { + "epoch": 1.39, + "grad_norm": 9.75679002892149, + "learning_rate": 4.5157609272359345e-06, + "loss": 1.2876, + "step": 115467 + }, + { + "epoch": 1.39, + "grad_norm": 14.348258888763345, + "learning_rate": 4.515272446646839e-06, + "loss": 1.0964, + "step": 115470 + }, + { + "epoch": 1.39, + "grad_norm": 5.278339494567097, + "learning_rate": 4.514783984775145e-06, + "loss": 1.3282, + "step": 115473 + }, + { + "epoch": 1.39, + "grad_norm": 3.8159256587916093, + "learning_rate": 4.514295541622514e-06, + "loss": 1.264, + "step": 115476 + }, + { + "epoch": 1.39, + "grad_norm": 4.64400473516229, + "learning_rate": 4.513807117190617e-06, + "loss": 1.0422, + "step": 115479 + }, + { + "epoch": 1.39, + "grad_norm": 6.816842423644025, + "learning_rate": 4.513318711481121e-06, + "loss": 1.1001, + "step": 115482 + }, + { + "epoch": 1.39, + "grad_norm": 10.505389839239998, + "learning_rate": 4.5128303244956935e-06, + "loss": 0.9605, + "step": 115485 + }, + { + "epoch": 1.39, + "grad_norm": 20.159081718986524, + "learning_rate": 4.512341956235998e-06, + "loss": 1.1928, + "step": 115488 + }, + { + "epoch": 1.39, + "grad_norm": 20.62979707672303, + "learning_rate": 4.511853606703705e-06, + "loss": 1.0319, + "step": 115491 + }, + { + "epoch": 1.39, + "grad_norm": 9.312506692609503, + "learning_rate": 4.5113652759004765e-06, + "loss": 1.1838, + "step": 115494 + }, + { + "epoch": 1.39, + "grad_norm": 11.788583536740585, + "learning_rate": 4.510876963827984e-06, + "loss": 1.2174, + "step": 115497 + }, + { + "epoch": 1.39, + "grad_norm": 68.77966353219118, + "learning_rate": 4.510388670487887e-06, + "loss": 1.0115, + "step": 115500 + }, + { + "epoch": 1.39, + "grad_norm": 7.808875485355565, + "learning_rate": 4.5099003958818575e-06, + "loss": 1.421, + "step": 115503 + }, + { + "epoch": 1.39, + "grad_norm": 16.777460304918392, + "learning_rate": 4.5094121400115594e-06, + "loss": 1.0513, + "step": 115506 + }, + { + "epoch": 1.39, + "grad_norm": 7.171234747844755, + "learning_rate": 4.508923902878664e-06, + "loss": 1.4753, + "step": 115509 + }, + { + "epoch": 1.39, + "grad_norm": 6.326855016042398, + "learning_rate": 4.508435684484831e-06, + "loss": 1.2723, + "step": 115512 + }, + { + "epoch": 1.39, + "grad_norm": 8.531900282679844, + "learning_rate": 4.507947484831732e-06, + "loss": 0.9588, + "step": 115515 + }, + { + "epoch": 1.39, + "grad_norm": 8.0731267946899, + "learning_rate": 4.5074593039210246e-06, + "loss": 1.3758, + "step": 115518 + }, + { + "epoch": 1.39, + "grad_norm": 7.080972369063509, + "learning_rate": 4.5069711417543816e-06, + "loss": 0.8435, + "step": 115521 + }, + { + "epoch": 1.39, + "grad_norm": 5.927810505127188, + "learning_rate": 4.506482998333471e-06, + "loss": 1.1221, + "step": 115524 + }, + { + "epoch": 1.39, + "grad_norm": 17.377739987080197, + "learning_rate": 4.505994873659951e-06, + "loss": 1.0761, + "step": 115527 + }, + { + "epoch": 1.39, + "grad_norm": 9.711243635502024, + "learning_rate": 4.505506767735492e-06, + "loss": 1.1979, + "step": 115530 + }, + { + "epoch": 1.39, + "grad_norm": 9.34488359616718, + "learning_rate": 4.505018680561763e-06, + "loss": 1.2824, + "step": 115533 + }, + { + "epoch": 1.39, + "grad_norm": 8.66566447062496, + "learning_rate": 4.504530612140423e-06, + "loss": 0.9403, + "step": 115536 + }, + { + "epoch": 1.39, + "grad_norm": 53.7574508730121, + "learning_rate": 4.504042562473143e-06, + "loss": 1.2043, + "step": 115539 + }, + { + "epoch": 1.39, + "grad_norm": 25.435589817358967, + "learning_rate": 4.5035545315615836e-06, + "loss": 0.7743, + "step": 115542 + }, + { + "epoch": 1.39, + "grad_norm": 9.66464441621937, + "learning_rate": 4.503066519407412e-06, + "loss": 1.5427, + "step": 115545 + }, + { + "epoch": 1.39, + "grad_norm": 3.1602405323785936, + "learning_rate": 4.502578526012295e-06, + "loss": 0.9244, + "step": 115548 + }, + { + "epoch": 1.39, + "grad_norm": 4.708299987389951, + "learning_rate": 4.5020905513779e-06, + "loss": 1.1247, + "step": 115551 + }, + { + "epoch": 1.39, + "grad_norm": 2.8108976482144628, + "learning_rate": 4.501602595505886e-06, + "loss": 0.8835, + "step": 115554 + }, + { + "epoch": 1.39, + "grad_norm": 4.22886604284542, + "learning_rate": 4.501114658397927e-06, + "loss": 1.1706, + "step": 115557 + }, + { + "epoch": 1.39, + "grad_norm": 53.51186143643345, + "learning_rate": 4.500626740055678e-06, + "loss": 1.3382, + "step": 115560 + }, + { + "epoch": 1.39, + "grad_norm": 12.313796254612452, + "learning_rate": 4.500138840480809e-06, + "loss": 0.8412, + "step": 115563 + }, + { + "epoch": 1.39, + "grad_norm": 2.846331576100091, + "learning_rate": 4.4996509596749895e-06, + "loss": 1.0546, + "step": 115566 + }, + { + "epoch": 1.39, + "grad_norm": 11.528489953819893, + "learning_rate": 4.499163097639876e-06, + "loss": 1.2266, + "step": 115569 + }, + { + "epoch": 1.39, + "grad_norm": 7.414767463135865, + "learning_rate": 4.498675254377137e-06, + "loss": 0.8957, + "step": 115572 + }, + { + "epoch": 1.39, + "grad_norm": 3.305586950048491, + "learning_rate": 4.498187429888442e-06, + "loss": 1.3905, + "step": 115575 + }, + { + "epoch": 1.39, + "grad_norm": 3.7912990873632024, + "learning_rate": 4.497699624175452e-06, + "loss": 1.524, + "step": 115578 + }, + { + "epoch": 1.39, + "grad_norm": 13.981103931802366, + "learning_rate": 4.497211837239827e-06, + "loss": 1.2233, + "step": 115581 + }, + { + "epoch": 1.39, + "grad_norm": 11.169406076891992, + "learning_rate": 4.496724069083236e-06, + "loss": 1.2753, + "step": 115584 + }, + { + "epoch": 1.39, + "grad_norm": 13.938369823359524, + "learning_rate": 4.496236319707343e-06, + "loss": 1.5889, + "step": 115587 + }, + { + "epoch": 1.39, + "grad_norm": 6.479884887099006, + "learning_rate": 4.495748589113814e-06, + "loss": 1.3102, + "step": 115590 + }, + { + "epoch": 1.39, + "grad_norm": 6.449013468902554, + "learning_rate": 4.495260877304315e-06, + "loss": 1.4063, + "step": 115593 + }, + { + "epoch": 1.39, + "grad_norm": 9.067822911910666, + "learning_rate": 4.494773184280505e-06, + "loss": 0.9975, + "step": 115596 + }, + { + "epoch": 1.39, + "grad_norm": 11.895332222676007, + "learning_rate": 4.494285510044054e-06, + "loss": 1.2229, + "step": 115599 + }, + { + "epoch": 1.39, + "grad_norm": 4.477318227255551, + "learning_rate": 4.493797854596621e-06, + "loss": 1.2142, + "step": 115602 + }, + { + "epoch": 1.39, + "grad_norm": 15.04114441901684, + "learning_rate": 4.493310217939876e-06, + "loss": 1.2862, + "step": 115605 + }, + { + "epoch": 1.39, + "grad_norm": 11.038283645245475, + "learning_rate": 4.492822600075475e-06, + "loss": 1.261, + "step": 115608 + }, + { + "epoch": 1.39, + "grad_norm": 4.589523452314161, + "learning_rate": 4.492335001005087e-06, + "loss": 1.1682, + "step": 115611 + }, + { + "epoch": 1.39, + "grad_norm": 16.46070088345127, + "learning_rate": 4.491847420730377e-06, + "loss": 1.0437, + "step": 115614 + }, + { + "epoch": 1.39, + "grad_norm": 9.001650388057984, + "learning_rate": 4.4913598592530105e-06, + "loss": 0.8863, + "step": 115617 + }, + { + "epoch": 1.39, + "grad_norm": 8.228790013792109, + "learning_rate": 4.490872316574648e-06, + "loss": 1.056, + "step": 115620 + }, + { + "epoch": 1.39, + "grad_norm": 3.040730797559596, + "learning_rate": 4.4903847926969515e-06, + "loss": 1.26, + "step": 115623 + }, + { + "epoch": 1.39, + "grad_norm": 5.286432639382933, + "learning_rate": 4.489897287621587e-06, + "loss": 1.1702, + "step": 115626 + }, + { + "epoch": 1.39, + "grad_norm": 9.08857318455186, + "learning_rate": 4.489409801350218e-06, + "loss": 1.2124, + "step": 115629 + }, + { + "epoch": 1.39, + "grad_norm": 18.747957688004252, + "learning_rate": 4.4889223338845115e-06, + "loss": 1.1167, + "step": 115632 + }, + { + "epoch": 1.39, + "grad_norm": 4.430720167211608, + "learning_rate": 4.488434885226125e-06, + "loss": 1.2892, + "step": 115635 + }, + { + "epoch": 1.39, + "grad_norm": 10.207556033295578, + "learning_rate": 4.487947455376724e-06, + "loss": 1.1948, + "step": 115638 + }, + { + "epoch": 1.39, + "grad_norm": 14.751053535617112, + "learning_rate": 4.487460044337978e-06, + "loss": 0.904, + "step": 115641 + }, + { + "epoch": 1.39, + "grad_norm": 14.150661449587277, + "learning_rate": 4.4869726521115406e-06, + "loss": 1.2747, + "step": 115644 + }, + { + "epoch": 1.39, + "grad_norm": 21.04896931487954, + "learning_rate": 4.4864852786990846e-06, + "loss": 1.319, + "step": 115647 + }, + { + "epoch": 1.39, + "grad_norm": 8.335803412731577, + "learning_rate": 4.4859979241022636e-06, + "loss": 1.3101, + "step": 115650 + }, + { + "epoch": 1.39, + "grad_norm": 20.841654826495123, + "learning_rate": 4.485510588322745e-06, + "loss": 0.9004, + "step": 115653 + }, + { + "epoch": 1.39, + "grad_norm": 10.973640945848707, + "learning_rate": 4.4850232713621935e-06, + "loss": 1.2779, + "step": 115656 + }, + { + "epoch": 1.39, + "grad_norm": 8.686686082599033, + "learning_rate": 4.484535973222275e-06, + "loss": 0.9518, + "step": 115659 + }, + { + "epoch": 1.39, + "grad_norm": 4.341346541727822, + "learning_rate": 4.484048693904648e-06, + "loss": 1.279, + "step": 115662 + }, + { + "epoch": 1.39, + "grad_norm": 2.6593247400302045, + "learning_rate": 4.483561433410973e-06, + "loss": 1.2906, + "step": 115665 + }, + { + "epoch": 1.39, + "grad_norm": 8.142095334308422, + "learning_rate": 4.483074191742914e-06, + "loss": 1.084, + "step": 115668 + }, + { + "epoch": 1.39, + "grad_norm": 10.731213069094458, + "learning_rate": 4.482586968902137e-06, + "loss": 1.1911, + "step": 115671 + }, + { + "epoch": 1.39, + "grad_norm": 4.611741046376423, + "learning_rate": 4.482099764890307e-06, + "loss": 1.4039, + "step": 115674 + }, + { + "epoch": 1.39, + "grad_norm": 9.127271076965384, + "learning_rate": 4.48161257970908e-06, + "loss": 1.3817, + "step": 115677 + }, + { + "epoch": 1.39, + "grad_norm": 7.2270276227710974, + "learning_rate": 4.48112541336012e-06, + "loss": 1.3123, + "step": 115680 + }, + { + "epoch": 1.39, + "grad_norm": 7.638203175107923, + "learning_rate": 4.480638265845095e-06, + "loss": 1.307, + "step": 115683 + }, + { + "epoch": 1.39, + "grad_norm": 7.006413384274959, + "learning_rate": 4.480151137165664e-06, + "loss": 1.5247, + "step": 115686 + }, + { + "epoch": 1.39, + "grad_norm": 3.7450337985855997, + "learning_rate": 4.479664027323486e-06, + "loss": 0.9659, + "step": 115689 + }, + { + "epoch": 1.39, + "grad_norm": 7.978361239011762, + "learning_rate": 4.4791769363202255e-06, + "loss": 1.1945, + "step": 115692 + }, + { + "epoch": 1.39, + "grad_norm": 6.75029326136213, + "learning_rate": 4.478689864157545e-06, + "loss": 1.2123, + "step": 115695 + }, + { + "epoch": 1.39, + "grad_norm": 3.855987385704957, + "learning_rate": 4.478202810837108e-06, + "loss": 1.2641, + "step": 115698 + }, + { + "epoch": 1.39, + "grad_norm": 18.477196417532895, + "learning_rate": 4.477715776360579e-06, + "loss": 1.3835, + "step": 115701 + }, + { + "epoch": 1.39, + "grad_norm": 20.854923802671408, + "learning_rate": 4.4772287607296175e-06, + "loss": 1.2618, + "step": 115704 + }, + { + "epoch": 1.39, + "grad_norm": 11.560118789456059, + "learning_rate": 4.476741763945881e-06, + "loss": 1.0578, + "step": 115707 + }, + { + "epoch": 1.39, + "grad_norm": 3.66220554369951, + "learning_rate": 4.476254786011035e-06, + "loss": 1.092, + "step": 115710 + }, + { + "epoch": 1.39, + "grad_norm": 9.479913501964035, + "learning_rate": 4.475767826926746e-06, + "loss": 1.0001, + "step": 115713 + }, + { + "epoch": 1.39, + "grad_norm": 3.7504255779907525, + "learning_rate": 4.4752808866946675e-06, + "loss": 0.8651, + "step": 115716 + }, + { + "epoch": 1.39, + "grad_norm": 8.367821080555077, + "learning_rate": 4.474793965316465e-06, + "loss": 0.9322, + "step": 115719 + }, + { + "epoch": 1.39, + "grad_norm": 22.822696371637427, + "learning_rate": 4.474307062793801e-06, + "loss": 1.1235, + "step": 115722 + }, + { + "epoch": 1.39, + "grad_norm": 3.9065704780171897, + "learning_rate": 4.473820179128341e-06, + "loss": 1.2757, + "step": 115725 + }, + { + "epoch": 1.39, + "grad_norm": 8.660238918403486, + "learning_rate": 4.473333314321742e-06, + "loss": 1.0652, + "step": 115728 + }, + { + "epoch": 1.39, + "grad_norm": 21.628801872789243, + "learning_rate": 4.472846468375661e-06, + "loss": 1.3545, + "step": 115731 + }, + { + "epoch": 1.39, + "grad_norm": 13.224187294051195, + "learning_rate": 4.472359641291765e-06, + "loss": 1.1608, + "step": 115734 + }, + { + "epoch": 1.39, + "grad_norm": 5.78841510148047, + "learning_rate": 4.471872833071714e-06, + "loss": 1.1135, + "step": 115737 + }, + { + "epoch": 1.39, + "grad_norm": 10.218487602054072, + "learning_rate": 4.471386043717174e-06, + "loss": 1.0682, + "step": 115740 + }, + { + "epoch": 1.39, + "grad_norm": 5.871357711654431, + "learning_rate": 4.470899273229798e-06, + "loss": 0.7828, + "step": 115743 + }, + { + "epoch": 1.39, + "grad_norm": 5.428132241047331, + "learning_rate": 4.470412521611255e-06, + "loss": 0.866, + "step": 115746 + }, + { + "epoch": 1.39, + "grad_norm": 9.610689099554591, + "learning_rate": 4.469925788863199e-06, + "loss": 0.7999, + "step": 115749 + }, + { + "epoch": 1.39, + "grad_norm": 2.7342217792733923, + "learning_rate": 4.469439074987294e-06, + "loss": 1.3006, + "step": 115752 + }, + { + "epoch": 1.39, + "grad_norm": 21.10103118201755, + "learning_rate": 4.468952379985204e-06, + "loss": 1.0646, + "step": 115755 + }, + { + "epoch": 1.39, + "grad_norm": 11.199602328694553, + "learning_rate": 4.468465703858585e-06, + "loss": 1.5369, + "step": 115758 + }, + { + "epoch": 1.39, + "grad_norm": 5.193149629827222, + "learning_rate": 4.4679790466091e-06, + "loss": 1.0437, + "step": 115761 + }, + { + "epoch": 1.39, + "grad_norm": 20.942372791268262, + "learning_rate": 4.46749240823841e-06, + "loss": 1.5906, + "step": 115764 + }, + { + "epoch": 1.39, + "grad_norm": 5.763806569496265, + "learning_rate": 4.467005788748179e-06, + "loss": 1.3797, + "step": 115767 + }, + { + "epoch": 1.39, + "grad_norm": 6.229585300359228, + "learning_rate": 4.466519188140064e-06, + "loss": 1.2157, + "step": 115770 + }, + { + "epoch": 1.39, + "grad_norm": 8.656067899558856, + "learning_rate": 4.466032606415722e-06, + "loss": 1.4762, + "step": 115773 + }, + { + "epoch": 1.39, + "grad_norm": 6.303523289939852, + "learning_rate": 4.465546043576817e-06, + "loss": 1.3018, + "step": 115776 + }, + { + "epoch": 1.39, + "grad_norm": 23.409987659807282, + "learning_rate": 4.465059499625011e-06, + "loss": 1.2947, + "step": 115779 + }, + { + "epoch": 1.39, + "grad_norm": 15.383108144396958, + "learning_rate": 4.464572974561966e-06, + "loss": 1.1591, + "step": 115782 + }, + { + "epoch": 1.39, + "grad_norm": 7.041697001581554, + "learning_rate": 4.464086468389337e-06, + "loss": 0.7213, + "step": 115785 + }, + { + "epoch": 1.39, + "grad_norm": 9.407156773409913, + "learning_rate": 4.463599981108786e-06, + "loss": 0.8208, + "step": 115788 + }, + { + "epoch": 1.39, + "grad_norm": 2.934961246643018, + "learning_rate": 4.463113512721978e-06, + "loss": 1.1992, + "step": 115791 + }, + { + "epoch": 1.39, + "grad_norm": 3.9944937553394526, + "learning_rate": 4.462627063230569e-06, + "loss": 1.0932, + "step": 115794 + }, + { + "epoch": 1.39, + "grad_norm": 7.480269507738649, + "learning_rate": 4.462140632636216e-06, + "loss": 1.3619, + "step": 115797 + }, + { + "epoch": 1.39, + "grad_norm": 7.557020869565816, + "learning_rate": 4.461654220940582e-06, + "loss": 0.7749, + "step": 115800 + }, + { + "epoch": 1.39, + "grad_norm": 15.491506713170804, + "learning_rate": 4.4611678281453275e-06, + "loss": 1.2918, + "step": 115803 + }, + { + "epoch": 1.39, + "grad_norm": 5.399411571629354, + "learning_rate": 4.460681454252112e-06, + "loss": 1.4089, + "step": 115806 + }, + { + "epoch": 1.39, + "grad_norm": 2.2183607082378636, + "learning_rate": 4.460195099262599e-06, + "loss": 1.1571, + "step": 115809 + }, + { + "epoch": 1.39, + "grad_norm": 9.401750886678558, + "learning_rate": 4.459708763178444e-06, + "loss": 1.3227, + "step": 115812 + }, + { + "epoch": 1.39, + "grad_norm": 7.9588011111689685, + "learning_rate": 4.459222446001305e-06, + "loss": 1.535, + "step": 115815 + }, + { + "epoch": 1.39, + "grad_norm": 4.463148214508666, + "learning_rate": 4.458736147732844e-06, + "loss": 1.1779, + "step": 115818 + }, + { + "epoch": 1.39, + "grad_norm": 32.479857381947745, + "learning_rate": 4.458249868374723e-06, + "loss": 1.0867, + "step": 115821 + }, + { + "epoch": 1.39, + "grad_norm": 7.407634341312032, + "learning_rate": 4.457763607928597e-06, + "loss": 1.5005, + "step": 115824 + }, + { + "epoch": 1.39, + "grad_norm": 2.3531350711759664, + "learning_rate": 4.457277366396127e-06, + "loss": 0.7029, + "step": 115827 + }, + { + "epoch": 1.39, + "grad_norm": 6.787146754530042, + "learning_rate": 4.456791143778971e-06, + "loss": 1.0661, + "step": 115830 + }, + { + "epoch": 1.39, + "grad_norm": 11.008602038630219, + "learning_rate": 4.456304940078796e-06, + "loss": 1.4396, + "step": 115833 + }, + { + "epoch": 1.39, + "grad_norm": 7.328592438728528, + "learning_rate": 4.4558187552972545e-06, + "loss": 0.81, + "step": 115836 + }, + { + "epoch": 1.39, + "grad_norm": 9.057451831572118, + "learning_rate": 4.455332589436003e-06, + "loss": 1.3877, + "step": 115839 + }, + { + "epoch": 1.39, + "grad_norm": 7.348076802919192, + "learning_rate": 4.454846442496704e-06, + "loss": 0.9778, + "step": 115842 + }, + { + "epoch": 1.39, + "grad_norm": 10.825029825741627, + "learning_rate": 4.454360314481018e-06, + "loss": 1.1587, + "step": 115845 + }, + { + "epoch": 1.39, + "grad_norm": 15.962783059032322, + "learning_rate": 4.453874205390605e-06, + "loss": 1.1882, + "step": 115848 + }, + { + "epoch": 1.39, + "grad_norm": 23.94879904032825, + "learning_rate": 4.453388115227119e-06, + "loss": 1.27, + "step": 115851 + }, + { + "epoch": 1.39, + "grad_norm": 10.5678164161944, + "learning_rate": 4.452902043992225e-06, + "loss": 1.048, + "step": 115854 + }, + { + "epoch": 1.39, + "grad_norm": 2.70803009669963, + "learning_rate": 4.452415991687573e-06, + "loss": 1.4401, + "step": 115857 + }, + { + "epoch": 1.39, + "grad_norm": 7.3155089254060615, + "learning_rate": 4.451929958314829e-06, + "loss": 1.1922, + "step": 115860 + }, + { + "epoch": 1.39, + "grad_norm": 4.780408992899548, + "learning_rate": 4.451443943875652e-06, + "loss": 1.2406, + "step": 115863 + }, + { + "epoch": 1.39, + "grad_norm": 10.801625883961153, + "learning_rate": 4.450957948371696e-06, + "loss": 1.09, + "step": 115866 + }, + { + "epoch": 1.39, + "grad_norm": 2.7302312443612795, + "learning_rate": 4.4504719718046195e-06, + "loss": 1.2491, + "step": 115869 + }, + { + "epoch": 1.39, + "grad_norm": 7.951400173844101, + "learning_rate": 4.4499860141760855e-06, + "loss": 1.103, + "step": 115872 + }, + { + "epoch": 1.39, + "grad_norm": 7.358907288469169, + "learning_rate": 4.449500075487753e-06, + "loss": 1.6422, + "step": 115875 + }, + { + "epoch": 1.39, + "grad_norm": 3.5305564006263563, + "learning_rate": 4.4490141557412765e-06, + "loss": 1.3562, + "step": 115878 + }, + { + "epoch": 1.39, + "grad_norm": 14.833032498823501, + "learning_rate": 4.448528254938313e-06, + "loss": 1.3006, + "step": 115881 + }, + { + "epoch": 1.39, + "grad_norm": 5.061573891129932, + "learning_rate": 4.448042373080522e-06, + "loss": 1.2976, + "step": 115884 + }, + { + "epoch": 1.39, + "grad_norm": 10.392578537531524, + "learning_rate": 4.447556510169563e-06, + "loss": 1.2957, + "step": 115887 + }, + { + "epoch": 1.39, + "grad_norm": 14.521569418672726, + "learning_rate": 4.447070666207097e-06, + "loss": 1.0828, + "step": 115890 + }, + { + "epoch": 1.39, + "grad_norm": 5.445492760733321, + "learning_rate": 4.4465848411947755e-06, + "loss": 1.2222, + "step": 115893 + }, + { + "epoch": 1.39, + "grad_norm": 6.904908716781131, + "learning_rate": 4.446099035134263e-06, + "loss": 1.0978, + "step": 115896 + }, + { + "epoch": 1.39, + "grad_norm": 7.971225740349012, + "learning_rate": 4.445613248027209e-06, + "loss": 1.1363, + "step": 115899 + }, + { + "epoch": 1.39, + "grad_norm": 5.699038215185314, + "learning_rate": 4.445127479875282e-06, + "loss": 1.254, + "step": 115902 + }, + { + "epoch": 1.39, + "grad_norm": 3.1324271222193114, + "learning_rate": 4.44464173068013e-06, + "loss": 1.3399, + "step": 115905 + }, + { + "epoch": 1.39, + "grad_norm": 30.15543149201759, + "learning_rate": 4.444156000443415e-06, + "loss": 1.1573, + "step": 115908 + }, + { + "epoch": 1.39, + "grad_norm": 9.949716828305462, + "learning_rate": 4.443670289166792e-06, + "loss": 1.3002, + "step": 115911 + }, + { + "epoch": 1.39, + "grad_norm": 5.041982051979184, + "learning_rate": 4.443184596851924e-06, + "loss": 1.2693, + "step": 115914 + }, + { + "epoch": 1.39, + "grad_norm": 6.758456701654169, + "learning_rate": 4.442698923500468e-06, + "loss": 1.5791, + "step": 115917 + }, + { + "epoch": 1.39, + "grad_norm": 5.120464279063274, + "learning_rate": 4.4422132691140786e-06, + "loss": 1.1952, + "step": 115920 + }, + { + "epoch": 1.39, + "grad_norm": 5.4503213587907515, + "learning_rate": 4.441727633694409e-06, + "loss": 0.8684, + "step": 115923 + }, + { + "epoch": 1.39, + "grad_norm": 8.016596248949286, + "learning_rate": 4.441242017243122e-06, + "loss": 1.2214, + "step": 115926 + }, + { + "epoch": 1.39, + "grad_norm": 47.60807362012387, + "learning_rate": 4.440756419761877e-06, + "loss": 1.1408, + "step": 115929 + }, + { + "epoch": 1.39, + "grad_norm": 6.02392132471102, + "learning_rate": 4.440270841252324e-06, + "loss": 1.5049, + "step": 115932 + }, + { + "epoch": 1.39, + "grad_norm": 9.293144562164583, + "learning_rate": 4.4397852817161245e-06, + "loss": 1.0565, + "step": 115935 + }, + { + "epoch": 1.39, + "grad_norm": 11.942332733175876, + "learning_rate": 4.439299741154939e-06, + "loss": 1.0343, + "step": 115938 + }, + { + "epoch": 1.39, + "grad_norm": 29.01047634385289, + "learning_rate": 4.438814219570416e-06, + "loss": 1.1563, + "step": 115941 + }, + { + "epoch": 1.39, + "grad_norm": 6.571935326750201, + "learning_rate": 4.438328716964221e-06, + "loss": 0.9824, + "step": 115944 + }, + { + "epoch": 1.39, + "grad_norm": 6.069250339324004, + "learning_rate": 4.437843233338003e-06, + "loss": 0.7553, + "step": 115947 + }, + { + "epoch": 1.39, + "grad_norm": 7.060991404630238, + "learning_rate": 4.437357768693423e-06, + "loss": 1.227, + "step": 115950 + }, + { + "epoch": 1.39, + "grad_norm": 5.792964126071385, + "learning_rate": 4.436872323032137e-06, + "loss": 1.1425, + "step": 115953 + }, + { + "epoch": 1.39, + "grad_norm": 6.1895158945557665, + "learning_rate": 4.436386896355806e-06, + "loss": 1.1734, + "step": 115956 + }, + { + "epoch": 1.39, + "grad_norm": 8.54473966836762, + "learning_rate": 4.4359014886660775e-06, + "loss": 1.1014, + "step": 115959 + }, + { + "epoch": 1.39, + "grad_norm": 5.888783310328896, + "learning_rate": 4.435416099964618e-06, + "loss": 1.0209, + "step": 115962 + }, + { + "epoch": 1.39, + "grad_norm": 3.217940494986494, + "learning_rate": 4.434930730253076e-06, + "loss": 1.6794, + "step": 115965 + }, + { + "epoch": 1.39, + "grad_norm": 23.045954363113733, + "learning_rate": 4.434445379533109e-06, + "loss": 0.6993, + "step": 115968 + }, + { + "epoch": 1.39, + "grad_norm": 8.256225819861298, + "learning_rate": 4.4339600478063794e-06, + "loss": 1.1435, + "step": 115971 + }, + { + "epoch": 1.39, + "grad_norm": 6.607182985183786, + "learning_rate": 4.433474735074537e-06, + "loss": 1.2134, + "step": 115974 + }, + { + "epoch": 1.39, + "grad_norm": 8.412648686272284, + "learning_rate": 4.432989441339238e-06, + "loss": 1.2583, + "step": 115977 + }, + { + "epoch": 1.39, + "grad_norm": 17.028321739023937, + "learning_rate": 4.432504166602146e-06, + "loss": 0.8328, + "step": 115980 + }, + { + "epoch": 1.39, + "grad_norm": 28.294764901827758, + "learning_rate": 4.4320189108649116e-06, + "loss": 1.0979, + "step": 115983 + }, + { + "epoch": 1.39, + "grad_norm": 23.291592797942805, + "learning_rate": 4.431533674129187e-06, + "loss": 1.2783, + "step": 115986 + }, + { + "epoch": 1.39, + "grad_norm": 14.439755996352684, + "learning_rate": 4.4310484563966325e-06, + "loss": 0.9382, + "step": 115989 + }, + { + "epoch": 1.39, + "grad_norm": 7.311707384407915, + "learning_rate": 4.430563257668904e-06, + "loss": 1.2622, + "step": 115992 + }, + { + "epoch": 1.39, + "grad_norm": 13.246242566720273, + "learning_rate": 4.430078077947657e-06, + "loss": 0.9484, + "step": 115995 + }, + { + "epoch": 1.39, + "grad_norm": 3.5545861415899056, + "learning_rate": 4.4295929172345506e-06, + "loss": 1.274, + "step": 115998 + }, + { + "epoch": 1.39, + "grad_norm": 8.045407193289341, + "learning_rate": 4.429107775531234e-06, + "loss": 1.1026, + "step": 116001 + }, + { + "epoch": 1.39, + "grad_norm": 10.142810374813074, + "learning_rate": 4.42862265283937e-06, + "loss": 1.3867, + "step": 116004 + }, + { + "epoch": 1.39, + "grad_norm": 13.067120129591395, + "learning_rate": 4.428137549160605e-06, + "loss": 1.234, + "step": 116007 + }, + { + "epoch": 1.39, + "grad_norm": 9.352076295722899, + "learning_rate": 4.427652464496603e-06, + "loss": 1.1485, + "step": 116010 + }, + { + "epoch": 1.4, + "grad_norm": 7.6236373910952935, + "learning_rate": 4.4271673988490136e-06, + "loss": 1.0095, + "step": 116013 + }, + { + "epoch": 1.4, + "grad_norm": 12.238797190108569, + "learning_rate": 4.426682352219493e-06, + "loss": 1.1638, + "step": 116016 + }, + { + "epoch": 1.4, + "grad_norm": 8.769345602617491, + "learning_rate": 4.4261973246097e-06, + "loss": 1.3362, + "step": 116019 + }, + { + "epoch": 1.4, + "grad_norm": 7.363709788843939, + "learning_rate": 4.425712316021291e-06, + "loss": 1.1168, + "step": 116022 + }, + { + "epoch": 1.4, + "grad_norm": 14.177068953620568, + "learning_rate": 4.425227326455913e-06, + "loss": 0.9109, + "step": 116025 + }, + { + "epoch": 1.4, + "grad_norm": 11.812071869059617, + "learning_rate": 4.4247423559152316e-06, + "loss": 1.2569, + "step": 116028 + }, + { + "epoch": 1.4, + "grad_norm": 9.29730618793427, + "learning_rate": 4.424257404400892e-06, + "loss": 0.9909, + "step": 116031 + }, + { + "epoch": 1.4, + "grad_norm": 11.188297937502206, + "learning_rate": 4.423772471914552e-06, + "loss": 0.8819, + "step": 116034 + }, + { + "epoch": 1.4, + "grad_norm": 7.246716754801469, + "learning_rate": 4.4232875584578734e-06, + "loss": 1.2455, + "step": 116037 + }, + { + "epoch": 1.4, + "grad_norm": 7.644341061936306, + "learning_rate": 4.422802664032502e-06, + "loss": 0.9006, + "step": 116040 + }, + { + "epoch": 1.4, + "grad_norm": 8.769422663706582, + "learning_rate": 4.422317788640096e-06, + "loss": 1.2495, + "step": 116043 + }, + { + "epoch": 1.4, + "grad_norm": 11.527458507310918, + "learning_rate": 4.421832932282314e-06, + "loss": 1.0408, + "step": 116046 + }, + { + "epoch": 1.4, + "grad_norm": 6.216889384802352, + "learning_rate": 4.421348094960803e-06, + "loss": 1.4469, + "step": 116049 + }, + { + "epoch": 1.4, + "grad_norm": 4.452143794229442, + "learning_rate": 4.420863276677225e-06, + "loss": 1.2902, + "step": 116052 + }, + { + "epoch": 1.4, + "grad_norm": 23.185021571758142, + "learning_rate": 4.420378477433228e-06, + "loss": 1.1259, + "step": 116055 + }, + { + "epoch": 1.4, + "grad_norm": 6.960577708000054, + "learning_rate": 4.4198936972304685e-06, + "loss": 1.0741, + "step": 116058 + }, + { + "epoch": 1.4, + "grad_norm": 6.991153870293052, + "learning_rate": 4.4194089360706025e-06, + "loss": 1.3215, + "step": 116061 + }, + { + "epoch": 1.4, + "grad_norm": 4.645034542657859, + "learning_rate": 4.418924193955287e-06, + "loss": 0.8743, + "step": 116064 + }, + { + "epoch": 1.4, + "grad_norm": 2.1574057991478934, + "learning_rate": 4.41843947088617e-06, + "loss": 1.331, + "step": 116067 + }, + { + "epoch": 1.4, + "grad_norm": 10.557527350253004, + "learning_rate": 4.4179547668649125e-06, + "loss": 0.9776, + "step": 116070 + }, + { + "epoch": 1.4, + "grad_norm": 12.057879609541956, + "learning_rate": 4.4174700818931605e-06, + "loss": 1.0379, + "step": 116073 + }, + { + "epoch": 1.4, + "grad_norm": 20.950588798357398, + "learning_rate": 4.416985415972572e-06, + "loss": 0.9513, + "step": 116076 + }, + { + "epoch": 1.4, + "grad_norm": 11.64004104504233, + "learning_rate": 4.416500769104806e-06, + "loss": 0.7972, + "step": 116079 + }, + { + "epoch": 1.4, + "grad_norm": 3.255412535637467, + "learning_rate": 4.416016141291506e-06, + "loss": 1.1911, + "step": 116082 + }, + { + "epoch": 1.4, + "grad_norm": 9.748203844898288, + "learning_rate": 4.415531532534333e-06, + "loss": 0.9981, + "step": 116085 + }, + { + "epoch": 1.4, + "grad_norm": 8.046315105793445, + "learning_rate": 4.415046942834943e-06, + "loss": 1.1357, + "step": 116088 + }, + { + "epoch": 1.4, + "grad_norm": 21.925083666864296, + "learning_rate": 4.414562372194986e-06, + "loss": 1.0127, + "step": 116091 + }, + { + "epoch": 1.4, + "grad_norm": 9.272641125895754, + "learning_rate": 4.414077820616112e-06, + "loss": 1.3043, + "step": 116094 + }, + { + "epoch": 1.4, + "grad_norm": 11.164045718939526, + "learning_rate": 4.4135932880999785e-06, + "loss": 1.3783, + "step": 116097 + }, + { + "epoch": 1.4, + "grad_norm": 7.931006984940971, + "learning_rate": 4.4131087746482386e-06, + "loss": 1.1615, + "step": 116100 + }, + { + "epoch": 1.4, + "grad_norm": 11.799952372496499, + "learning_rate": 4.412624280262546e-06, + "loss": 1.0781, + "step": 116103 + }, + { + "epoch": 1.4, + "grad_norm": 10.991622181184507, + "learning_rate": 4.412139804944558e-06, + "loss": 1.0606, + "step": 116106 + }, + { + "epoch": 1.4, + "grad_norm": 14.838538601417273, + "learning_rate": 4.411655348695921e-06, + "loss": 1.0257, + "step": 116109 + }, + { + "epoch": 1.4, + "grad_norm": 5.072180834018, + "learning_rate": 4.411170911518295e-06, + "loss": 1.0798, + "step": 116112 + }, + { + "epoch": 1.4, + "grad_norm": 4.8849337594549604, + "learning_rate": 4.410686493413325e-06, + "loss": 1.2041, + "step": 116115 + }, + { + "epoch": 1.4, + "grad_norm": 7.365905522845602, + "learning_rate": 4.410202094382673e-06, + "loss": 1.0626, + "step": 116118 + }, + { + "epoch": 1.4, + "grad_norm": 8.955413848771391, + "learning_rate": 4.409717714427984e-06, + "loss": 1.0504, + "step": 116121 + }, + { + "epoch": 1.4, + "grad_norm": 6.324456756771113, + "learning_rate": 4.409233353550915e-06, + "loss": 1.0301, + "step": 116124 + }, + { + "epoch": 1.4, + "grad_norm": 4.745788638068197, + "learning_rate": 4.408749011753118e-06, + "loss": 1.3341, + "step": 116127 + }, + { + "epoch": 1.4, + "grad_norm": 5.120026592403513, + "learning_rate": 4.408264689036251e-06, + "loss": 1.1041, + "step": 116130 + }, + { + "epoch": 1.4, + "grad_norm": 6.105994094922201, + "learning_rate": 4.407780385401963e-06, + "loss": 1.3213, + "step": 116133 + }, + { + "epoch": 1.4, + "grad_norm": 7.13315764444338, + "learning_rate": 4.4072961008519025e-06, + "loss": 1.2083, + "step": 116136 + }, + { + "epoch": 1.4, + "grad_norm": 3.0759772343819156, + "learning_rate": 4.406811835387724e-06, + "loss": 1.381, + "step": 116139 + }, + { + "epoch": 1.4, + "grad_norm": 6.504916285516635, + "learning_rate": 4.406327589011085e-06, + "loss": 1.4145, + "step": 116142 + }, + { + "epoch": 1.4, + "grad_norm": 12.856355707024417, + "learning_rate": 4.405843361723637e-06, + "loss": 1.1283, + "step": 116145 + }, + { + "epoch": 1.4, + "grad_norm": 7.605851485764049, + "learning_rate": 4.405359153527027e-06, + "loss": 1.209, + "step": 116148 + }, + { + "epoch": 1.4, + "grad_norm": 9.243770138290998, + "learning_rate": 4.40487496442291e-06, + "loss": 1.1218, + "step": 116151 + }, + { + "epoch": 1.4, + "grad_norm": 20.42487612857769, + "learning_rate": 4.404390794412945e-06, + "loss": 1.1658, + "step": 116154 + }, + { + "epoch": 1.4, + "grad_norm": 26.12255394406532, + "learning_rate": 4.4039066434987744e-06, + "loss": 1.2172, + "step": 116157 + }, + { + "epoch": 1.4, + "grad_norm": 5.978780123789421, + "learning_rate": 4.403422511682058e-06, + "loss": 1.3225, + "step": 116160 + }, + { + "epoch": 1.4, + "grad_norm": 6.893945944711307, + "learning_rate": 4.4029383989644414e-06, + "loss": 1.1471, + "step": 116163 + }, + { + "epoch": 1.4, + "grad_norm": 12.037398233184755, + "learning_rate": 4.4024543053475795e-06, + "loss": 1.0004, + "step": 116166 + }, + { + "epoch": 1.4, + "grad_norm": 2.3008912240374246, + "learning_rate": 4.401970230833126e-06, + "loss": 1.2247, + "step": 116169 + }, + { + "epoch": 1.4, + "grad_norm": 20.441508680341723, + "learning_rate": 4.401486175422734e-06, + "loss": 1.0326, + "step": 116172 + }, + { + "epoch": 1.4, + "grad_norm": 9.684487538766712, + "learning_rate": 4.4010021391180545e-06, + "loss": 0.9434, + "step": 116175 + }, + { + "epoch": 1.4, + "grad_norm": 8.324417275090308, + "learning_rate": 4.400518121920734e-06, + "loss": 0.9554, + "step": 116178 + }, + { + "epoch": 1.4, + "grad_norm": 11.073213034997552, + "learning_rate": 4.400034123832427e-06, + "loss": 1.0154, + "step": 116181 + }, + { + "epoch": 1.4, + "grad_norm": 9.038000103000147, + "learning_rate": 4.399550144854787e-06, + "loss": 1.4262, + "step": 116184 + }, + { + "epoch": 1.4, + "grad_norm": 28.01521769196345, + "learning_rate": 4.399066184989469e-06, + "loss": 1.1056, + "step": 116187 + }, + { + "epoch": 1.4, + "grad_norm": 6.082438370628198, + "learning_rate": 4.398582244238118e-06, + "loss": 1.0525, + "step": 116190 + }, + { + "epoch": 1.4, + "grad_norm": 11.553728839277333, + "learning_rate": 4.398098322602387e-06, + "loss": 1.1955, + "step": 116193 + }, + { + "epoch": 1.4, + "grad_norm": 17.313566466069204, + "learning_rate": 4.397614420083932e-06, + "loss": 1.0801, + "step": 116196 + }, + { + "epoch": 1.4, + "grad_norm": 4.547263171031508, + "learning_rate": 4.3971305366844005e-06, + "loss": 1.0599, + "step": 116199 + }, + { + "epoch": 1.4, + "grad_norm": 5.256762150913069, + "learning_rate": 4.396646672405443e-06, + "loss": 0.9256, + "step": 116202 + }, + { + "epoch": 1.4, + "grad_norm": 12.403193334085788, + "learning_rate": 4.39616282724871e-06, + "loss": 1.4199, + "step": 116205 + }, + { + "epoch": 1.4, + "grad_norm": 7.739529739459337, + "learning_rate": 4.395679001215856e-06, + "loss": 0.9465, + "step": 116208 + }, + { + "epoch": 1.4, + "grad_norm": 15.424735657085133, + "learning_rate": 4.39519519430853e-06, + "loss": 1.0412, + "step": 116211 + }, + { + "epoch": 1.4, + "grad_norm": 6.672334106261327, + "learning_rate": 4.394711406528388e-06, + "loss": 1.3969, + "step": 116214 + }, + { + "epoch": 1.4, + "grad_norm": 2.8322815015019347, + "learning_rate": 4.394227637877077e-06, + "loss": 1.1575, + "step": 116217 + }, + { + "epoch": 1.4, + "grad_norm": 6.794705392923788, + "learning_rate": 4.393743888356243e-06, + "loss": 1.0361, + "step": 116220 + }, + { + "epoch": 1.4, + "grad_norm": 3.537360075777713, + "learning_rate": 4.393260157967544e-06, + "loss": 1.121, + "step": 116223 + }, + { + "epoch": 1.4, + "grad_norm": 10.418193933712372, + "learning_rate": 4.392776446712631e-06, + "loss": 1.2709, + "step": 116226 + }, + { + "epoch": 1.4, + "grad_norm": 5.107464671013441, + "learning_rate": 4.392292754593148e-06, + "loss": 1.3542, + "step": 116229 + }, + { + "epoch": 1.4, + "grad_norm": 9.509475215450088, + "learning_rate": 4.391809081610751e-06, + "loss": 1.2986, + "step": 116232 + }, + { + "epoch": 1.4, + "grad_norm": 13.21954393138691, + "learning_rate": 4.391325427767089e-06, + "loss": 1.5351, + "step": 116235 + }, + { + "epoch": 1.4, + "grad_norm": 7.835691529343759, + "learning_rate": 4.390841793063817e-06, + "loss": 1.6361, + "step": 116238 + }, + { + "epoch": 1.4, + "grad_norm": 3.0319610599950937, + "learning_rate": 4.390358177502582e-06, + "loss": 1.4926, + "step": 116241 + }, + { + "epoch": 1.4, + "grad_norm": 8.03161419343284, + "learning_rate": 4.3898745810850295e-06, + "loss": 1.2674, + "step": 116244 + }, + { + "epoch": 1.4, + "grad_norm": 13.144833981374614, + "learning_rate": 4.389391003812815e-06, + "loss": 0.9552, + "step": 116247 + }, + { + "epoch": 1.4, + "grad_norm": 13.126830648481999, + "learning_rate": 4.388907445687587e-06, + "loss": 1.0103, + "step": 116250 + }, + { + "epoch": 1.4, + "grad_norm": 12.237968615904716, + "learning_rate": 4.3884239067110015e-06, + "loss": 0.9447, + "step": 116253 + }, + { + "epoch": 1.4, + "grad_norm": 5.355913138596646, + "learning_rate": 4.387940386884699e-06, + "loss": 1.1118, + "step": 116256 + }, + { + "epoch": 1.4, + "grad_norm": 7.049813095409487, + "learning_rate": 4.38745688621034e-06, + "loss": 1.2927, + "step": 116259 + }, + { + "epoch": 1.4, + "grad_norm": 8.044108891807982, + "learning_rate": 4.386973404689564e-06, + "loss": 1.2461, + "step": 116262 + }, + { + "epoch": 1.4, + "grad_norm": 18.11149284043552, + "learning_rate": 4.386489942324026e-06, + "loss": 1.1232, + "step": 116265 + }, + { + "epoch": 1.4, + "grad_norm": 29.039178260436763, + "learning_rate": 4.38600649911538e-06, + "loss": 1.1813, + "step": 116268 + }, + { + "epoch": 1.4, + "grad_norm": 13.472366606977305, + "learning_rate": 4.385523075065268e-06, + "loss": 1.0458, + "step": 116271 + }, + { + "epoch": 1.4, + "grad_norm": 10.506616842212697, + "learning_rate": 4.385039670175344e-06, + "loss": 1.0358, + "step": 116274 + }, + { + "epoch": 1.4, + "grad_norm": 3.1232500629297575, + "learning_rate": 4.384556284447255e-06, + "loss": 1.3437, + "step": 116277 + }, + { + "epoch": 1.4, + "grad_norm": 4.294159564682106, + "learning_rate": 4.384072917882659e-06, + "loss": 0.9903, + "step": 116280 + }, + { + "epoch": 1.4, + "grad_norm": 21.5954362581937, + "learning_rate": 4.3835895704831975e-06, + "loss": 1.4485, + "step": 116283 + }, + { + "epoch": 1.4, + "grad_norm": 2.528314821788309, + "learning_rate": 4.3831062422505185e-06, + "loss": 1.3711, + "step": 116286 + }, + { + "epoch": 1.4, + "grad_norm": 8.977602033160657, + "learning_rate": 4.3826229331862755e-06, + "loss": 0.9952, + "step": 116289 + }, + { + "epoch": 1.4, + "grad_norm": 9.432966785665743, + "learning_rate": 4.382139643292116e-06, + "loss": 1.0571, + "step": 116292 + }, + { + "epoch": 1.4, + "grad_norm": 4.715704584881669, + "learning_rate": 4.3816563725696945e-06, + "loss": 1.029, + "step": 116295 + }, + { + "epoch": 1.4, + "grad_norm": 10.959435387062069, + "learning_rate": 4.381173121020652e-06, + "loss": 1.0915, + "step": 116298 + }, + { + "epoch": 1.4, + "grad_norm": 9.749491564325067, + "learning_rate": 4.380689888646646e-06, + "loss": 1.114, + "step": 116301 + }, + { + "epoch": 1.4, + "grad_norm": 12.561020994994815, + "learning_rate": 4.380206675449318e-06, + "loss": 1.393, + "step": 116304 + }, + { + "epoch": 1.4, + "grad_norm": 49.06397994057619, + "learning_rate": 4.379723481430324e-06, + "loss": 1.1125, + "step": 116307 + }, + { + "epoch": 1.4, + "grad_norm": 9.409489459156301, + "learning_rate": 4.379240306591305e-06, + "loss": 1.1956, + "step": 116310 + }, + { + "epoch": 1.4, + "grad_norm": 4.746861823782167, + "learning_rate": 4.378757150933914e-06, + "loss": 1.435, + "step": 116313 + }, + { + "epoch": 1.4, + "grad_norm": 10.578184417533343, + "learning_rate": 4.3782740144598e-06, + "loss": 1.0175, + "step": 116316 + }, + { + "epoch": 1.4, + "grad_norm": 23.460547871176722, + "learning_rate": 4.377790897170613e-06, + "loss": 1.3684, + "step": 116319 + }, + { + "epoch": 1.4, + "grad_norm": 13.955716745868042, + "learning_rate": 4.377307799068004e-06, + "loss": 1.0282, + "step": 116322 + }, + { + "epoch": 1.4, + "grad_norm": 13.74844987680131, + "learning_rate": 4.376824720153617e-06, + "loss": 0.9193, + "step": 116325 + }, + { + "epoch": 1.4, + "grad_norm": 7.610606475115159, + "learning_rate": 4.376341660429098e-06, + "loss": 1.0529, + "step": 116328 + }, + { + "epoch": 1.4, + "grad_norm": 4.649645138926764, + "learning_rate": 4.3758586198961005e-06, + "loss": 1.2825, + "step": 116331 + }, + { + "epoch": 1.4, + "grad_norm": 11.78978521033417, + "learning_rate": 4.375375598556275e-06, + "loss": 1.2402, + "step": 116334 + }, + { + "epoch": 1.4, + "grad_norm": 9.912934955450968, + "learning_rate": 4.374892596411263e-06, + "loss": 1.049, + "step": 116337 + }, + { + "epoch": 1.4, + "grad_norm": 33.787154541465455, + "learning_rate": 4.374409613462716e-06, + "loss": 1.6689, + "step": 116340 + }, + { + "epoch": 1.4, + "grad_norm": 8.130331048476759, + "learning_rate": 4.373926649712288e-06, + "loss": 1.2325, + "step": 116343 + }, + { + "epoch": 1.4, + "grad_norm": 2.8011041919574255, + "learning_rate": 4.373443705161616e-06, + "loss": 1.3519, + "step": 116346 + }, + { + "epoch": 1.4, + "grad_norm": 3.6121437524159363, + "learning_rate": 4.372960779812359e-06, + "loss": 1.2717, + "step": 116349 + }, + { + "epoch": 1.4, + "grad_norm": 7.251798171912195, + "learning_rate": 4.372477873666156e-06, + "loss": 0.9816, + "step": 116352 + }, + { + "epoch": 1.4, + "grad_norm": 7.7433003348792475, + "learning_rate": 4.371994986724659e-06, + "loss": 0.9961, + "step": 116355 + }, + { + "epoch": 1.4, + "grad_norm": 7.408057431036591, + "learning_rate": 4.371512118989516e-06, + "loss": 1.3604, + "step": 116358 + }, + { + "epoch": 1.4, + "grad_norm": 10.939791325364409, + "learning_rate": 4.371029270462378e-06, + "loss": 1.0762, + "step": 116361 + }, + { + "epoch": 1.4, + "grad_norm": 5.155649947507877, + "learning_rate": 4.370546441144886e-06, + "loss": 1.6722, + "step": 116364 + }, + { + "epoch": 1.4, + "grad_norm": 8.725633820118265, + "learning_rate": 4.370063631038695e-06, + "loss": 1.002, + "step": 116367 + }, + { + "epoch": 1.4, + "grad_norm": 14.27540093952805, + "learning_rate": 4.369580840145445e-06, + "loss": 0.9512, + "step": 116370 + }, + { + "epoch": 1.4, + "grad_norm": 17.01782194302767, + "learning_rate": 4.369098068466788e-06, + "loss": 1.0592, + "step": 116373 + }, + { + "epoch": 1.4, + "grad_norm": 13.071653734285029, + "learning_rate": 4.368615316004374e-06, + "loss": 0.8581, + "step": 116376 + }, + { + "epoch": 1.4, + "grad_norm": 17.3716301869018, + "learning_rate": 4.368132582759846e-06, + "loss": 1.1481, + "step": 116379 + }, + { + "epoch": 1.4, + "grad_norm": 12.60679076111917, + "learning_rate": 4.36764986873485e-06, + "loss": 1.0435, + "step": 116382 + }, + { + "epoch": 1.4, + "grad_norm": 22.891195124517214, + "learning_rate": 4.367167173931038e-06, + "loss": 1.4143, + "step": 116385 + }, + { + "epoch": 1.4, + "grad_norm": 13.295783066184894, + "learning_rate": 4.366684498350059e-06, + "loss": 0.8621, + "step": 116388 + }, + { + "epoch": 1.4, + "grad_norm": 17.583982956071154, + "learning_rate": 4.366201841993556e-06, + "loss": 1.2642, + "step": 116391 + }, + { + "epoch": 1.4, + "grad_norm": 10.115478276481541, + "learning_rate": 4.365719204863173e-06, + "loss": 1.046, + "step": 116394 + }, + { + "epoch": 1.4, + "grad_norm": 6.2487847388848134, + "learning_rate": 4.365236586960562e-06, + "loss": 1.1817, + "step": 116397 + }, + { + "epoch": 1.4, + "grad_norm": 8.42394018293232, + "learning_rate": 4.364753988287369e-06, + "loss": 0.9501, + "step": 116400 + }, + { + "epoch": 1.4, + "grad_norm": 10.837602397965568, + "learning_rate": 4.3642714088452435e-06, + "loss": 1.1072, + "step": 116403 + }, + { + "epoch": 1.4, + "grad_norm": 13.695231894126191, + "learning_rate": 4.363788848635826e-06, + "loss": 1.0786, + "step": 116406 + }, + { + "epoch": 1.4, + "grad_norm": 12.553770510255152, + "learning_rate": 4.363306307660772e-06, + "loss": 0.8461, + "step": 116409 + }, + { + "epoch": 1.4, + "grad_norm": 7.266059405014778, + "learning_rate": 4.3628237859217186e-06, + "loss": 0.8237, + "step": 116412 + }, + { + "epoch": 1.4, + "grad_norm": 8.475267340640453, + "learning_rate": 4.36234128342032e-06, + "loss": 1.1972, + "step": 116415 + }, + { + "epoch": 1.4, + "grad_norm": 56.52207054070617, + "learning_rate": 4.3618588001582174e-06, + "loss": 1.069, + "step": 116418 + }, + { + "epoch": 1.4, + "grad_norm": 4.811180333858058, + "learning_rate": 4.36137633613706e-06, + "loss": 1.2933, + "step": 116421 + }, + { + "epoch": 1.4, + "grad_norm": 46.25299018967211, + "learning_rate": 4.360893891358493e-06, + "loss": 1.0056, + "step": 116424 + }, + { + "epoch": 1.4, + "grad_norm": 3.9803614451666025, + "learning_rate": 4.360411465824165e-06, + "loss": 0.7672, + "step": 116427 + }, + { + "epoch": 1.4, + "grad_norm": 8.080479720765888, + "learning_rate": 4.359929059535725e-06, + "loss": 1.3527, + "step": 116430 + }, + { + "epoch": 1.4, + "grad_norm": 11.911669505417983, + "learning_rate": 4.359446672494815e-06, + "loss": 1.0256, + "step": 116433 + }, + { + "epoch": 1.4, + "grad_norm": 12.760265515215796, + "learning_rate": 4.358964304703078e-06, + "loss": 1.0572, + "step": 116436 + }, + { + "epoch": 1.4, + "grad_norm": 16.57872630749122, + "learning_rate": 4.358481956162164e-06, + "loss": 1.2359, + "step": 116439 + }, + { + "epoch": 1.4, + "grad_norm": 5.621606715962323, + "learning_rate": 4.357999626873723e-06, + "loss": 1.4429, + "step": 116442 + }, + { + "epoch": 1.4, + "grad_norm": 7.502073420412618, + "learning_rate": 4.3575173168393935e-06, + "loss": 1.4954, + "step": 116445 + }, + { + "epoch": 1.4, + "grad_norm": 13.82101491707598, + "learning_rate": 4.357035026060824e-06, + "loss": 0.9221, + "step": 116448 + }, + { + "epoch": 1.4, + "grad_norm": 8.104243440354344, + "learning_rate": 4.356552754539666e-06, + "loss": 1.291, + "step": 116451 + }, + { + "epoch": 1.4, + "grad_norm": 11.02736535429119, + "learning_rate": 4.356070502277557e-06, + "loss": 0.9403, + "step": 116454 + }, + { + "epoch": 1.4, + "grad_norm": 2.590703019796343, + "learning_rate": 4.35558826927615e-06, + "loss": 1.3538, + "step": 116457 + }, + { + "epoch": 1.4, + "grad_norm": 7.597205026456402, + "learning_rate": 4.355106055537084e-06, + "loss": 1.3396, + "step": 116460 + }, + { + "epoch": 1.4, + "grad_norm": 25.059886854373687, + "learning_rate": 4.354623861062006e-06, + "loss": 1.1932, + "step": 116463 + }, + { + "epoch": 1.4, + "grad_norm": 15.256027837293324, + "learning_rate": 4.354141685852566e-06, + "loss": 0.9931, + "step": 116466 + }, + { + "epoch": 1.4, + "grad_norm": 5.3139300541419185, + "learning_rate": 4.3536595299104075e-06, + "loss": 1.2204, + "step": 116469 + }, + { + "epoch": 1.4, + "grad_norm": 6.356813189594574, + "learning_rate": 4.353177393237173e-06, + "loss": 1.2768, + "step": 116472 + }, + { + "epoch": 1.4, + "grad_norm": 8.286749103904897, + "learning_rate": 4.352695275834514e-06, + "loss": 0.965, + "step": 116475 + }, + { + "epoch": 1.4, + "grad_norm": 30.001127193387223, + "learning_rate": 4.352213177704068e-06, + "loss": 0.8603, + "step": 116478 + }, + { + "epoch": 1.4, + "grad_norm": 3.047118953974408, + "learning_rate": 4.351731098847484e-06, + "loss": 1.196, + "step": 116481 + }, + { + "epoch": 1.4, + "grad_norm": 7.783489088058415, + "learning_rate": 4.351249039266411e-06, + "loss": 1.0772, + "step": 116484 + }, + { + "epoch": 1.4, + "grad_norm": 14.634047039143784, + "learning_rate": 4.350766998962487e-06, + "loss": 1.3502, + "step": 116487 + }, + { + "epoch": 1.4, + "grad_norm": 9.29038409444618, + "learning_rate": 4.35028497793736e-06, + "loss": 0.8982, + "step": 116490 + }, + { + "epoch": 1.4, + "grad_norm": 2.280137810795385, + "learning_rate": 4.349802976192679e-06, + "loss": 1.1236, + "step": 116493 + }, + { + "epoch": 1.4, + "grad_norm": 23.027935118439203, + "learning_rate": 4.349320993730085e-06, + "loss": 1.2106, + "step": 116496 + }, + { + "epoch": 1.4, + "grad_norm": 6.349947220851011, + "learning_rate": 4.3488390305512195e-06, + "loss": 0.9942, + "step": 116499 + }, + { + "epoch": 1.4, + "grad_norm": 13.077882658106239, + "learning_rate": 4.348357086657732e-06, + "loss": 1.1899, + "step": 116502 + }, + { + "epoch": 1.4, + "grad_norm": 9.633626298199285, + "learning_rate": 4.347875162051265e-06, + "loss": 1.0325, + "step": 116505 + }, + { + "epoch": 1.4, + "grad_norm": 12.724546716233608, + "learning_rate": 4.347393256733465e-06, + "loss": 0.8524, + "step": 116508 + }, + { + "epoch": 1.4, + "grad_norm": 30.551255106057575, + "learning_rate": 4.346911370705978e-06, + "loss": 1.1611, + "step": 116511 + }, + { + "epoch": 1.4, + "grad_norm": 5.889164649235114, + "learning_rate": 4.346429503970443e-06, + "loss": 0.8967, + "step": 116514 + }, + { + "epoch": 1.4, + "grad_norm": 3.435192718001913, + "learning_rate": 4.3459476565285116e-06, + "loss": 1.2508, + "step": 116517 + }, + { + "epoch": 1.4, + "grad_norm": 6.177144405458656, + "learning_rate": 4.345465828381821e-06, + "loss": 1.1745, + "step": 116520 + }, + { + "epoch": 1.4, + "grad_norm": 2.334390679418153, + "learning_rate": 4.344984019532021e-06, + "loss": 1.4528, + "step": 116523 + }, + { + "epoch": 1.4, + "grad_norm": 7.36195742951049, + "learning_rate": 4.3445022299807515e-06, + "loss": 1.08, + "step": 116526 + }, + { + "epoch": 1.4, + "grad_norm": 7.820695062091904, + "learning_rate": 4.344020459729659e-06, + "loss": 0.8268, + "step": 116529 + }, + { + "epoch": 1.4, + "grad_norm": 10.371207860717456, + "learning_rate": 4.343538708780387e-06, + "loss": 1.0363, + "step": 116532 + }, + { + "epoch": 1.4, + "grad_norm": 6.539389806420962, + "learning_rate": 4.3430569771345834e-06, + "loss": 1.1004, + "step": 116535 + }, + { + "epoch": 1.4, + "grad_norm": 15.304119777385514, + "learning_rate": 4.342575264793888e-06, + "loss": 1.1176, + "step": 116538 + }, + { + "epoch": 1.4, + "grad_norm": 6.908009038683449, + "learning_rate": 4.342093571759942e-06, + "loss": 1.2578, + "step": 116541 + }, + { + "epoch": 1.4, + "grad_norm": 11.48137258650195, + "learning_rate": 4.341611898034394e-06, + "loss": 1.0549, + "step": 116544 + }, + { + "epoch": 1.4, + "grad_norm": 7.17621608439574, + "learning_rate": 4.341130243618885e-06, + "loss": 1.173, + "step": 116547 + }, + { + "epoch": 1.4, + "grad_norm": 58.22491236633673, + "learning_rate": 4.340648608515064e-06, + "loss": 1.2922, + "step": 116550 + }, + { + "epoch": 1.4, + "grad_norm": 17.002394579587754, + "learning_rate": 4.340166992724568e-06, + "loss": 1.1663, + "step": 116553 + }, + { + "epoch": 1.4, + "grad_norm": 9.486828128207799, + "learning_rate": 4.339685396249044e-06, + "loss": 0.9526, + "step": 116556 + }, + { + "epoch": 1.4, + "grad_norm": 12.706545474313018, + "learning_rate": 4.3392038190901365e-06, + "loss": 1.2497, + "step": 116559 + }, + { + "epoch": 1.4, + "grad_norm": 11.03925760534233, + "learning_rate": 4.338722261249485e-06, + "loss": 1.3891, + "step": 116562 + }, + { + "epoch": 1.4, + "grad_norm": 6.848028816731457, + "learning_rate": 4.338240722728738e-06, + "loss": 1.4659, + "step": 116565 + }, + { + "epoch": 1.4, + "grad_norm": 11.780542710689208, + "learning_rate": 4.337759203529533e-06, + "loss": 1.3731, + "step": 116568 + }, + { + "epoch": 1.4, + "grad_norm": 10.817534757234888, + "learning_rate": 4.3372777036535165e-06, + "loss": 1.2072, + "step": 116571 + }, + { + "epoch": 1.4, + "grad_norm": 18.350873114570895, + "learning_rate": 4.336796223102332e-06, + "loss": 1.2904, + "step": 116574 + }, + { + "epoch": 1.4, + "grad_norm": 10.899666033975672, + "learning_rate": 4.336314761877624e-06, + "loss": 1.1352, + "step": 116577 + }, + { + "epoch": 1.4, + "grad_norm": 34.07965310938197, + "learning_rate": 4.335833319981036e-06, + "loss": 1.1057, + "step": 116580 + }, + { + "epoch": 1.4, + "grad_norm": 20.5803165996043, + "learning_rate": 4.335351897414203e-06, + "loss": 1.1682, + "step": 116583 + }, + { + "epoch": 1.4, + "grad_norm": 18.18810038833833, + "learning_rate": 4.3348704941787745e-06, + "loss": 1.2967, + "step": 116586 + }, + { + "epoch": 1.4, + "grad_norm": 6.323497197980438, + "learning_rate": 4.334389110276393e-06, + "loss": 1.0362, + "step": 116589 + }, + { + "epoch": 1.4, + "grad_norm": 14.661962105392492, + "learning_rate": 4.333907745708703e-06, + "loss": 1.4766, + "step": 116592 + }, + { + "epoch": 1.4, + "grad_norm": 9.552108648768389, + "learning_rate": 4.333426400477342e-06, + "loss": 1.0707, + "step": 116595 + }, + { + "epoch": 1.4, + "grad_norm": 6.719818956640985, + "learning_rate": 4.332945074583955e-06, + "loss": 1.3593, + "step": 116598 + }, + { + "epoch": 1.4, + "grad_norm": 7.538028563970548, + "learning_rate": 4.332463768030189e-06, + "loss": 1.2422, + "step": 116601 + }, + { + "epoch": 1.4, + "grad_norm": 6.039333177470932, + "learning_rate": 4.3319824808176834e-06, + "loss": 1.3219, + "step": 116604 + }, + { + "epoch": 1.4, + "grad_norm": 10.905160792061848, + "learning_rate": 4.331501212948076e-06, + "loss": 1.1822, + "step": 116607 + }, + { + "epoch": 1.4, + "grad_norm": 10.05116543970255, + "learning_rate": 4.331019964423013e-06, + "loss": 1.0157, + "step": 116610 + }, + { + "epoch": 1.4, + "grad_norm": 5.915176957684321, + "learning_rate": 4.330538735244138e-06, + "loss": 1.5942, + "step": 116613 + }, + { + "epoch": 1.4, + "grad_norm": 7.405091278336427, + "learning_rate": 4.330057525413091e-06, + "loss": 1.0631, + "step": 116616 + }, + { + "epoch": 1.4, + "grad_norm": 18.49469251947772, + "learning_rate": 4.329576334931519e-06, + "loss": 1.2788, + "step": 116619 + }, + { + "epoch": 1.4, + "grad_norm": 16.510294670137615, + "learning_rate": 4.329095163801057e-06, + "loss": 1.1108, + "step": 116622 + }, + { + "epoch": 1.4, + "grad_norm": 13.55324023773574, + "learning_rate": 4.328614012023354e-06, + "loss": 1.47, + "step": 116625 + }, + { + "epoch": 1.4, + "grad_norm": 3.7995750877880217, + "learning_rate": 4.328132879600045e-06, + "loss": 0.8727, + "step": 116628 + }, + { + "epoch": 1.4, + "grad_norm": 5.924354691009084, + "learning_rate": 4.32765176653278e-06, + "loss": 1.1741, + "step": 116631 + }, + { + "epoch": 1.4, + "grad_norm": 5.227227496706439, + "learning_rate": 4.3271706728231925e-06, + "loss": 0.8219, + "step": 116634 + }, + { + "epoch": 1.4, + "grad_norm": 5.2552763134404135, + "learning_rate": 4.3266895984729275e-06, + "loss": 1.0594, + "step": 116637 + }, + { + "epoch": 1.4, + "grad_norm": 7.166013814648048, + "learning_rate": 4.326208543483627e-06, + "loss": 1.236, + "step": 116640 + }, + { + "epoch": 1.4, + "grad_norm": 59.989769034140245, + "learning_rate": 4.325727507856938e-06, + "loss": 1.2305, + "step": 116643 + }, + { + "epoch": 1.4, + "grad_norm": 3.7027514390859273, + "learning_rate": 4.325246491594496e-06, + "loss": 1.1444, + "step": 116646 + }, + { + "epoch": 1.4, + "grad_norm": 14.000685715367096, + "learning_rate": 4.324765494697941e-06, + "loss": 1.2799, + "step": 116649 + }, + { + "epoch": 1.4, + "grad_norm": 13.436966076706208, + "learning_rate": 4.324284517168916e-06, + "loss": 1.1638, + "step": 116652 + }, + { + "epoch": 1.4, + "grad_norm": 12.812589711038498, + "learning_rate": 4.323803559009064e-06, + "loss": 0.7969, + "step": 116655 + }, + { + "epoch": 1.4, + "grad_norm": 8.151184970727773, + "learning_rate": 4.3233226202200304e-06, + "loss": 1.1989, + "step": 116658 + }, + { + "epoch": 1.4, + "grad_norm": 9.296183013124864, + "learning_rate": 4.322841700803448e-06, + "loss": 1.2054, + "step": 116661 + }, + { + "epoch": 1.4, + "grad_norm": 12.748042680438228, + "learning_rate": 4.3223608007609604e-06, + "loss": 1.4439, + "step": 116664 + }, + { + "epoch": 1.4, + "grad_norm": 3.561163595364606, + "learning_rate": 4.321879920094216e-06, + "loss": 1.106, + "step": 116667 + }, + { + "epoch": 1.4, + "grad_norm": 14.586651512208935, + "learning_rate": 4.321399058804846e-06, + "loss": 1.2737, + "step": 116670 + }, + { + "epoch": 1.4, + "grad_norm": 6.694576119913704, + "learning_rate": 4.3209182168944986e-06, + "loss": 1.2618, + "step": 116673 + }, + { + "epoch": 1.4, + "grad_norm": 11.05918672569049, + "learning_rate": 4.320437394364808e-06, + "loss": 1.0806, + "step": 116676 + }, + { + "epoch": 1.4, + "grad_norm": 5.141898425816338, + "learning_rate": 4.3199565912174206e-06, + "loss": 0.9445, + "step": 116679 + }, + { + "epoch": 1.4, + "grad_norm": 2.5537370656206164, + "learning_rate": 4.319475807453973e-06, + "loss": 0.8785, + "step": 116682 + }, + { + "epoch": 1.4, + "grad_norm": 4.960162047018736, + "learning_rate": 4.318995043076113e-06, + "loss": 1.1316, + "step": 116685 + }, + { + "epoch": 1.4, + "grad_norm": 14.53204464546432, + "learning_rate": 4.318514298085477e-06, + "loss": 0.9322, + "step": 116688 + }, + { + "epoch": 1.4, + "grad_norm": 11.252773925649592, + "learning_rate": 4.318033572483702e-06, + "loss": 1.2121, + "step": 116691 + }, + { + "epoch": 1.4, + "grad_norm": 4.171908059914978, + "learning_rate": 4.317552866272431e-06, + "loss": 0.7519, + "step": 116694 + }, + { + "epoch": 1.4, + "grad_norm": 5.746174415707635, + "learning_rate": 4.317072179453305e-06, + "loss": 1.039, + "step": 116697 + }, + { + "epoch": 1.4, + "grad_norm": 25.857197336429284, + "learning_rate": 4.316591512027969e-06, + "loss": 1.2095, + "step": 116700 + }, + { + "epoch": 1.4, + "grad_norm": 9.409430442716404, + "learning_rate": 4.316110863998054e-06, + "loss": 1.3899, + "step": 116703 + }, + { + "epoch": 1.4, + "grad_norm": 6.186735973901363, + "learning_rate": 4.315630235365207e-06, + "loss": 1.1025, + "step": 116706 + }, + { + "epoch": 1.4, + "grad_norm": 12.58951757619973, + "learning_rate": 4.315149626131069e-06, + "loss": 1.0993, + "step": 116709 + }, + { + "epoch": 1.4, + "grad_norm": 17.47331669040065, + "learning_rate": 4.314669036297278e-06, + "loss": 1.2137, + "step": 116712 + }, + { + "epoch": 1.4, + "grad_norm": 16.499472080268767, + "learning_rate": 4.314188465865471e-06, + "loss": 1.2724, + "step": 116715 + }, + { + "epoch": 1.4, + "grad_norm": 8.8077667811122, + "learning_rate": 4.31370791483729e-06, + "loss": 1.2149, + "step": 116718 + }, + { + "epoch": 1.4, + "grad_norm": 20.656639107691724, + "learning_rate": 4.313227383214376e-06, + "loss": 0.8351, + "step": 116721 + }, + { + "epoch": 1.4, + "grad_norm": 24.83460824550276, + "learning_rate": 4.3127468709983675e-06, + "loss": 1.1919, + "step": 116724 + }, + { + "epoch": 1.4, + "grad_norm": 7.991468226481332, + "learning_rate": 4.31226637819091e-06, + "loss": 1.055, + "step": 116727 + }, + { + "epoch": 1.4, + "grad_norm": 9.150931633098043, + "learning_rate": 4.311785904793638e-06, + "loss": 0.8102, + "step": 116730 + }, + { + "epoch": 1.4, + "grad_norm": 37.73381595995937, + "learning_rate": 4.311305450808189e-06, + "loss": 1.1485, + "step": 116733 + }, + { + "epoch": 1.4, + "grad_norm": 19.87213749340523, + "learning_rate": 4.310825016236204e-06, + "loss": 1.0924, + "step": 116736 + }, + { + "epoch": 1.4, + "grad_norm": 16.812874865164552, + "learning_rate": 4.310344601079329e-06, + "loss": 1.2579, + "step": 116739 + }, + { + "epoch": 1.4, + "grad_norm": 10.179843744551851, + "learning_rate": 4.309864205339195e-06, + "loss": 0.921, + "step": 116742 + }, + { + "epoch": 1.4, + "grad_norm": 7.8766991744301285, + "learning_rate": 4.309383829017444e-06, + "loss": 1.3607, + "step": 116745 + }, + { + "epoch": 1.4, + "grad_norm": 9.94328088712249, + "learning_rate": 4.308903472115716e-06, + "loss": 1.2523, + "step": 116748 + }, + { + "epoch": 1.4, + "grad_norm": 3.1868616703952597, + "learning_rate": 4.308423134635655e-06, + "loss": 1.1796, + "step": 116751 + }, + { + "epoch": 1.4, + "grad_norm": 3.2581697282258735, + "learning_rate": 4.307942816578896e-06, + "loss": 1.2493, + "step": 116754 + }, + { + "epoch": 1.4, + "grad_norm": 4.04698494033305, + "learning_rate": 4.3074625179470745e-06, + "loss": 1.0516, + "step": 116757 + }, + { + "epoch": 1.4, + "grad_norm": 5.621428822447115, + "learning_rate": 4.306982238741831e-06, + "loss": 1.0194, + "step": 116760 + }, + { + "epoch": 1.4, + "grad_norm": 10.103148215447698, + "learning_rate": 4.306501978964809e-06, + "loss": 0.7998, + "step": 116763 + }, + { + "epoch": 1.4, + "grad_norm": 20.392341703807283, + "learning_rate": 4.306021738617646e-06, + "loss": 1.2717, + "step": 116766 + }, + { + "epoch": 1.4, + "grad_norm": 11.978018120282037, + "learning_rate": 4.305541517701978e-06, + "loss": 1.5697, + "step": 116769 + }, + { + "epoch": 1.4, + "grad_norm": 28.12736506733651, + "learning_rate": 4.305061316219449e-06, + "loss": 1.3827, + "step": 116772 + }, + { + "epoch": 1.4, + "grad_norm": 3.9598002820380023, + "learning_rate": 4.30458113417169e-06, + "loss": 1.5219, + "step": 116775 + }, + { + "epoch": 1.4, + "grad_norm": 8.602658531979264, + "learning_rate": 4.304100971560346e-06, + "loss": 1.1511, + "step": 116778 + }, + { + "epoch": 1.4, + "grad_norm": 13.64301231461353, + "learning_rate": 4.303620828387056e-06, + "loss": 1.1138, + "step": 116781 + }, + { + "epoch": 1.4, + "grad_norm": 10.389073645256033, + "learning_rate": 4.303140704653453e-06, + "loss": 1.0089, + "step": 116784 + }, + { + "epoch": 1.4, + "grad_norm": 6.541411866190461, + "learning_rate": 4.302660600361178e-06, + "loss": 1.2778, + "step": 116787 + }, + { + "epoch": 1.4, + "grad_norm": 12.219355590831464, + "learning_rate": 4.302180515511871e-06, + "loss": 1.4238, + "step": 116790 + }, + { + "epoch": 1.4, + "grad_norm": 7.008670397515628, + "learning_rate": 4.301700450107172e-06, + "loss": 1.1834, + "step": 116793 + }, + { + "epoch": 1.4, + "grad_norm": 15.67750849788569, + "learning_rate": 4.301220404148718e-06, + "loss": 1.053, + "step": 116796 + }, + { + "epoch": 1.4, + "grad_norm": 6.070386158217417, + "learning_rate": 4.300740377638143e-06, + "loss": 0.9804, + "step": 116799 + }, + { + "epoch": 1.4, + "grad_norm": 5.494345219671481, + "learning_rate": 4.300260370577087e-06, + "loss": 1.3716, + "step": 116802 + }, + { + "epoch": 1.4, + "grad_norm": 19.3660870812707, + "learning_rate": 4.299780382967191e-06, + "loss": 0.9297, + "step": 116805 + }, + { + "epoch": 1.4, + "grad_norm": 7.400832028456406, + "learning_rate": 4.299300414810093e-06, + "loss": 1.3374, + "step": 116808 + }, + { + "epoch": 1.4, + "grad_norm": 4.856288877563578, + "learning_rate": 4.298820466107426e-06, + "loss": 1.318, + "step": 116811 + }, + { + "epoch": 1.4, + "grad_norm": 17.925026152546103, + "learning_rate": 4.298340536860836e-06, + "loss": 1.0339, + "step": 116814 + }, + { + "epoch": 1.4, + "grad_norm": 8.657349960338506, + "learning_rate": 4.297860627071952e-06, + "loss": 1.0097, + "step": 116817 + }, + { + "epoch": 1.4, + "grad_norm": 11.877879102568043, + "learning_rate": 4.297380736742418e-06, + "loss": 1.1787, + "step": 116820 + }, + { + "epoch": 1.4, + "grad_norm": 3.7094909691006954, + "learning_rate": 4.2969008658738665e-06, + "loss": 1.1304, + "step": 116823 + }, + { + "epoch": 1.4, + "grad_norm": 6.2592059330071725, + "learning_rate": 4.2964210144679385e-06, + "loss": 1.0833, + "step": 116826 + }, + { + "epoch": 1.4, + "grad_norm": 7.061980470864134, + "learning_rate": 4.295941182526271e-06, + "loss": 1.2203, + "step": 116829 + }, + { + "epoch": 1.4, + "grad_norm": 8.12982775335914, + "learning_rate": 4.295461370050502e-06, + "loss": 1.5132, + "step": 116832 + }, + { + "epoch": 1.4, + "grad_norm": 2.457483126097991, + "learning_rate": 4.294981577042272e-06, + "loss": 1.5006, + "step": 116835 + }, + { + "epoch": 1.4, + "grad_norm": 17.490382852783036, + "learning_rate": 4.294501803503215e-06, + "loss": 1.3971, + "step": 116838 + }, + { + "epoch": 1.4, + "grad_norm": 9.474863465478496, + "learning_rate": 4.294022049434965e-06, + "loss": 1.0405, + "step": 116841 + }, + { + "epoch": 1.41, + "grad_norm": 21.55947876252036, + "learning_rate": 4.293542314839162e-06, + "loss": 1.209, + "step": 116844 + }, + { + "epoch": 1.41, + "grad_norm": 6.340039156838809, + "learning_rate": 4.293062599717447e-06, + "loss": 1.3493, + "step": 116847 + }, + { + "epoch": 1.41, + "grad_norm": 9.013079931988988, + "learning_rate": 4.292582904071451e-06, + "loss": 1.2996, + "step": 116850 + }, + { + "epoch": 1.41, + "grad_norm": 11.681140585494656, + "learning_rate": 4.292103227902812e-06, + "loss": 1.5194, + "step": 116853 + }, + { + "epoch": 1.41, + "grad_norm": 5.194538437176319, + "learning_rate": 4.291623571213173e-06, + "loss": 1.3613, + "step": 116856 + }, + { + "epoch": 1.41, + "grad_norm": 6.816373731735685, + "learning_rate": 4.291143934004164e-06, + "loss": 0.9266, + "step": 116859 + }, + { + "epoch": 1.41, + "grad_norm": 6.604026349574278, + "learning_rate": 4.290664316277428e-06, + "loss": 1.3389, + "step": 116862 + }, + { + "epoch": 1.41, + "grad_norm": 13.243397940516463, + "learning_rate": 4.2901847180345935e-06, + "loss": 1.3649, + "step": 116865 + }, + { + "epoch": 1.41, + "grad_norm": 13.814842107501786, + "learning_rate": 4.289705139277302e-06, + "loss": 1.013, + "step": 116868 + }, + { + "epoch": 1.41, + "grad_norm": 8.372540701108406, + "learning_rate": 4.289225580007192e-06, + "loss": 1.0937, + "step": 116871 + }, + { + "epoch": 1.41, + "grad_norm": 8.357903925968078, + "learning_rate": 4.2887460402259e-06, + "loss": 1.1081, + "step": 116874 + }, + { + "epoch": 1.41, + "grad_norm": 9.574881032647944, + "learning_rate": 4.288266519935056e-06, + "loss": 1.2214, + "step": 116877 + }, + { + "epoch": 1.41, + "grad_norm": 7.937360079698611, + "learning_rate": 4.287787019136306e-06, + "loss": 1.035, + "step": 116880 + }, + { + "epoch": 1.41, + "grad_norm": 46.76958430851637, + "learning_rate": 4.2873075378312774e-06, + "loss": 1.1438, + "step": 116883 + }, + { + "epoch": 1.41, + "grad_norm": 4.22732209255209, + "learning_rate": 4.286828076021611e-06, + "loss": 1.2837, + "step": 116886 + }, + { + "epoch": 1.41, + "grad_norm": 7.9081878587638865, + "learning_rate": 4.286348633708946e-06, + "loss": 1.2094, + "step": 116889 + }, + { + "epoch": 1.41, + "grad_norm": 32.003855441806124, + "learning_rate": 4.2858692108949105e-06, + "loss": 1.3626, + "step": 116892 + }, + { + "epoch": 1.41, + "grad_norm": 7.202152527149005, + "learning_rate": 4.285389807581145e-06, + "loss": 1.5625, + "step": 116895 + }, + { + "epoch": 1.41, + "grad_norm": 12.583749055045436, + "learning_rate": 4.284910423769291e-06, + "loss": 1.1347, + "step": 116898 + }, + { + "epoch": 1.41, + "grad_norm": 13.714342685848269, + "learning_rate": 4.284431059460974e-06, + "loss": 1.4614, + "step": 116901 + }, + { + "epoch": 1.41, + "grad_norm": 5.925344925474774, + "learning_rate": 4.283951714657839e-06, + "loss": 1.539, + "step": 116904 + }, + { + "epoch": 1.41, + "grad_norm": 15.275358495778391, + "learning_rate": 4.283472389361515e-06, + "loss": 1.0372, + "step": 116907 + }, + { + "epoch": 1.41, + "grad_norm": 4.2411812260126185, + "learning_rate": 4.28299308357364e-06, + "loss": 1.5065, + "step": 116910 + }, + { + "epoch": 1.41, + "grad_norm": 7.166573334606349, + "learning_rate": 4.282513797295851e-06, + "loss": 1.0513, + "step": 116913 + }, + { + "epoch": 1.41, + "grad_norm": 21.12693605206718, + "learning_rate": 4.2820345305297864e-06, + "loss": 1.14, + "step": 116916 + }, + { + "epoch": 1.41, + "grad_norm": 14.950636644819147, + "learning_rate": 4.281555283277075e-06, + "loss": 1.2547, + "step": 116919 + }, + { + "epoch": 1.41, + "grad_norm": 7.224770090651182, + "learning_rate": 4.281076055539358e-06, + "loss": 1.03, + "step": 116922 + }, + { + "epoch": 1.41, + "grad_norm": 19.37133445635075, + "learning_rate": 4.280596847318266e-06, + "loss": 1.3164, + "step": 116925 + }, + { + "epoch": 1.41, + "grad_norm": 14.781721053022386, + "learning_rate": 4.28011765861544e-06, + "loss": 1.2061, + "step": 116928 + }, + { + "epoch": 1.41, + "grad_norm": 10.337903877510348, + "learning_rate": 4.279638489432508e-06, + "loss": 1.3027, + "step": 116931 + }, + { + "epoch": 1.41, + "grad_norm": 3.0909694032494825, + "learning_rate": 4.2791593397711105e-06, + "loss": 0.9674, + "step": 116934 + }, + { + "epoch": 1.41, + "grad_norm": 10.624804057734188, + "learning_rate": 4.278680209632881e-06, + "loss": 1.0764, + "step": 116937 + }, + { + "epoch": 1.41, + "grad_norm": 19.70499243850738, + "learning_rate": 4.278201099019459e-06, + "loss": 1.5582, + "step": 116940 + }, + { + "epoch": 1.41, + "grad_norm": 4.558842401794042, + "learning_rate": 4.277722007932471e-06, + "loss": 1.2032, + "step": 116943 + }, + { + "epoch": 1.41, + "grad_norm": 10.846586195129646, + "learning_rate": 4.277242936373561e-06, + "loss": 1.2512, + "step": 116946 + }, + { + "epoch": 1.41, + "grad_norm": 9.36648023180068, + "learning_rate": 4.276763884344357e-06, + "loss": 1.3504, + "step": 116949 + }, + { + "epoch": 1.41, + "grad_norm": 8.396737720241635, + "learning_rate": 4.276284851846495e-06, + "loss": 1.0034, + "step": 116952 + }, + { + "epoch": 1.41, + "grad_norm": 9.522182860760003, + "learning_rate": 4.275805838881615e-06, + "loss": 0.6549, + "step": 116955 + }, + { + "epoch": 1.41, + "grad_norm": 10.640318757153867, + "learning_rate": 4.275326845451344e-06, + "loss": 1.1257, + "step": 116958 + }, + { + "epoch": 1.41, + "grad_norm": 6.490328351484364, + "learning_rate": 4.27484787155732e-06, + "loss": 1.3096, + "step": 116961 + }, + { + "epoch": 1.41, + "grad_norm": 6.977495088297314, + "learning_rate": 4.274368917201184e-06, + "loss": 1.3186, + "step": 116964 + }, + { + "epoch": 1.41, + "grad_norm": 4.410682952445348, + "learning_rate": 4.273889982384559e-06, + "loss": 1.2991, + "step": 116967 + }, + { + "epoch": 1.41, + "grad_norm": 30.974682725539417, + "learning_rate": 4.2734110671090886e-06, + "loss": 1.1598, + "step": 116970 + }, + { + "epoch": 1.41, + "grad_norm": 11.029235996456157, + "learning_rate": 4.2729321713764e-06, + "loss": 1.2353, + "step": 116973 + }, + { + "epoch": 1.41, + "grad_norm": 8.177382502973257, + "learning_rate": 4.272453295188132e-06, + "loss": 1.2813, + "step": 116976 + }, + { + "epoch": 1.41, + "grad_norm": 5.209814249965214, + "learning_rate": 4.271974438545916e-06, + "loss": 1.2773, + "step": 116979 + }, + { + "epoch": 1.41, + "grad_norm": 11.331974504695294, + "learning_rate": 4.271495601451393e-06, + "loss": 1.253, + "step": 116982 + }, + { + "epoch": 1.41, + "grad_norm": 10.845590551943532, + "learning_rate": 4.271016783906188e-06, + "loss": 0.7872, + "step": 116985 + }, + { + "epoch": 1.41, + "grad_norm": 11.632655973027731, + "learning_rate": 4.270537985911944e-06, + "loss": 1.3729, + "step": 116988 + }, + { + "epoch": 1.41, + "grad_norm": 5.590203690988071, + "learning_rate": 4.270059207470285e-06, + "loss": 1.1291, + "step": 116991 + }, + { + "epoch": 1.41, + "grad_norm": 16.529858865370404, + "learning_rate": 4.269580448582849e-06, + "loss": 1.0675, + "step": 116994 + }, + { + "epoch": 1.41, + "grad_norm": 8.167530018062878, + "learning_rate": 4.269101709251276e-06, + "loss": 1.1573, + "step": 116997 + }, + { + "epoch": 1.41, + "grad_norm": 3.9950702583256024, + "learning_rate": 4.268622989477192e-06, + "loss": 1.0756, + "step": 117000 + }, + { + "epoch": 1.41, + "grad_norm": 5.62432618339422, + "learning_rate": 4.268144289262232e-06, + "loss": 0.9118, + "step": 117003 + }, + { + "epoch": 1.41, + "grad_norm": 10.532727464990531, + "learning_rate": 4.2676656086080345e-06, + "loss": 1.1404, + "step": 117006 + }, + { + "epoch": 1.41, + "grad_norm": 5.079623177466842, + "learning_rate": 4.26718694751623e-06, + "loss": 0.91, + "step": 117009 + }, + { + "epoch": 1.41, + "grad_norm": 4.288954139119646, + "learning_rate": 4.266708305988447e-06, + "loss": 1.0208, + "step": 117012 + }, + { + "epoch": 1.41, + "grad_norm": 6.058674461150327, + "learning_rate": 4.266229684026325e-06, + "loss": 1.2175, + "step": 117015 + }, + { + "epoch": 1.41, + "grad_norm": 3.5551544508445034, + "learning_rate": 4.265751081631495e-06, + "loss": 0.9057, + "step": 117018 + }, + { + "epoch": 1.41, + "grad_norm": 5.281894081512686, + "learning_rate": 4.265272498805591e-06, + "loss": 1.6665, + "step": 117021 + }, + { + "epoch": 1.41, + "grad_norm": 15.217440372499324, + "learning_rate": 4.2647939355502495e-06, + "loss": 1.2148, + "step": 117024 + }, + { + "epoch": 1.41, + "grad_norm": 10.73884058919899, + "learning_rate": 4.264315391867098e-06, + "loss": 1.0984, + "step": 117027 + }, + { + "epoch": 1.41, + "grad_norm": 9.9410823540533, + "learning_rate": 4.263836867757775e-06, + "loss": 0.9889, + "step": 117030 + }, + { + "epoch": 1.41, + "grad_norm": 13.367307994324573, + "learning_rate": 4.263358363223906e-06, + "loss": 1.1455, + "step": 117033 + }, + { + "epoch": 1.41, + "grad_norm": 12.077493490579517, + "learning_rate": 4.262879878267133e-06, + "loss": 1.3884, + "step": 117036 + }, + { + "epoch": 1.41, + "grad_norm": 9.411194671670751, + "learning_rate": 4.262401412889081e-06, + "loss": 1.0178, + "step": 117039 + }, + { + "epoch": 1.41, + "grad_norm": 7.247076017621617, + "learning_rate": 4.261922967091386e-06, + "loss": 1.0672, + "step": 117042 + }, + { + "epoch": 1.41, + "grad_norm": 5.232802006670046, + "learning_rate": 4.261444540875682e-06, + "loss": 1.2513, + "step": 117045 + }, + { + "epoch": 1.41, + "grad_norm": 7.734538256549253, + "learning_rate": 4.260966134243604e-06, + "loss": 1.2186, + "step": 117048 + }, + { + "epoch": 1.41, + "grad_norm": 4.030325093140949, + "learning_rate": 4.260487747196781e-06, + "loss": 1.155, + "step": 117051 + }, + { + "epoch": 1.41, + "grad_norm": 6.881877936010947, + "learning_rate": 4.2600093797368425e-06, + "loss": 1.0246, + "step": 117054 + }, + { + "epoch": 1.41, + "grad_norm": 11.5227468991721, + "learning_rate": 4.259531031865424e-06, + "loss": 1.299, + "step": 117057 + }, + { + "epoch": 1.41, + "grad_norm": 23.715303707116572, + "learning_rate": 4.259052703584159e-06, + "loss": 1.1174, + "step": 117060 + }, + { + "epoch": 1.41, + "grad_norm": 6.250479829386889, + "learning_rate": 4.258574394894682e-06, + "loss": 1.1766, + "step": 117063 + }, + { + "epoch": 1.41, + "grad_norm": 3.7628523887278886, + "learning_rate": 4.2580961057986205e-06, + "loss": 1.2462, + "step": 117066 + }, + { + "epoch": 1.41, + "grad_norm": 10.5433820133349, + "learning_rate": 4.257617836297607e-06, + "loss": 1.338, + "step": 117069 + }, + { + "epoch": 1.41, + "grad_norm": 9.960694941520746, + "learning_rate": 4.2571395863932805e-06, + "loss": 1.3418, + "step": 117072 + }, + { + "epoch": 1.41, + "grad_norm": 14.76055599378164, + "learning_rate": 4.256661356087264e-06, + "loss": 1.0544, + "step": 117075 + }, + { + "epoch": 1.41, + "grad_norm": 6.487883138164113, + "learning_rate": 4.256183145381196e-06, + "loss": 1.0845, + "step": 117078 + }, + { + "epoch": 1.41, + "grad_norm": 21.43156445067593, + "learning_rate": 4.255704954276703e-06, + "loss": 1.3127, + "step": 117081 + }, + { + "epoch": 1.41, + "grad_norm": 8.206251413492772, + "learning_rate": 4.255226782775421e-06, + "loss": 1.1719, + "step": 117084 + }, + { + "epoch": 1.41, + "grad_norm": 33.820973602610856, + "learning_rate": 4.254748630878981e-06, + "loss": 1.0091, + "step": 117087 + }, + { + "epoch": 1.41, + "grad_norm": 10.241932480327566, + "learning_rate": 4.254270498589017e-06, + "loss": 1.5989, + "step": 117090 + }, + { + "epoch": 1.41, + "grad_norm": 8.487280133644846, + "learning_rate": 4.253792385907158e-06, + "loss": 1.0124, + "step": 117093 + }, + { + "epoch": 1.41, + "grad_norm": 8.1731693077332, + "learning_rate": 4.253314292835032e-06, + "loss": 0.8003, + "step": 117096 + }, + { + "epoch": 1.41, + "grad_norm": 3.0593341919578507, + "learning_rate": 4.2528362193742754e-06, + "loss": 1.2977, + "step": 117099 + }, + { + "epoch": 1.41, + "grad_norm": 18.993221640189393, + "learning_rate": 4.252358165526518e-06, + "loss": 1.5076, + "step": 117102 + }, + { + "epoch": 1.41, + "grad_norm": 12.217418952959695, + "learning_rate": 4.251880131293395e-06, + "loss": 1.1257, + "step": 117105 + }, + { + "epoch": 1.41, + "grad_norm": 5.138569312493319, + "learning_rate": 4.251402116676533e-06, + "loss": 1.4134, + "step": 117108 + }, + { + "epoch": 1.41, + "grad_norm": 14.366485531879873, + "learning_rate": 4.250924121677563e-06, + "loss": 1.2095, + "step": 117111 + }, + { + "epoch": 1.41, + "grad_norm": 6.464292309979438, + "learning_rate": 4.250446146298123e-06, + "loss": 0.8688, + "step": 117114 + }, + { + "epoch": 1.41, + "grad_norm": 13.612775504798957, + "learning_rate": 4.249968190539839e-06, + "loss": 1.0142, + "step": 117117 + }, + { + "epoch": 1.41, + "grad_norm": 14.47563768172904, + "learning_rate": 4.249490254404338e-06, + "loss": 1.3117, + "step": 117120 + }, + { + "epoch": 1.41, + "grad_norm": 11.347873864858263, + "learning_rate": 4.249012337893256e-06, + "loss": 1.1728, + "step": 117123 + }, + { + "epoch": 1.41, + "grad_norm": 10.558442115787662, + "learning_rate": 4.248534441008223e-06, + "loss": 1.3223, + "step": 117126 + }, + { + "epoch": 1.41, + "grad_norm": 5.619424934274034, + "learning_rate": 4.248056563750871e-06, + "loss": 0.9908, + "step": 117129 + }, + { + "epoch": 1.41, + "grad_norm": 3.811264459713433, + "learning_rate": 4.247578706122834e-06, + "loss": 1.1317, + "step": 117132 + }, + { + "epoch": 1.41, + "grad_norm": 25.038325194061027, + "learning_rate": 4.2471008681257375e-06, + "loss": 1.2993, + "step": 117135 + }, + { + "epoch": 1.41, + "grad_norm": 10.811498125857934, + "learning_rate": 4.24662304976121e-06, + "loss": 1.1566, + "step": 117138 + }, + { + "epoch": 1.41, + "grad_norm": 4.522970564807409, + "learning_rate": 4.246145251030887e-06, + "loss": 1.0893, + "step": 117141 + }, + { + "epoch": 1.41, + "grad_norm": 4.7491122053445345, + "learning_rate": 4.245667471936401e-06, + "loss": 0.8213, + "step": 117144 + }, + { + "epoch": 1.41, + "grad_norm": 48.695491215808396, + "learning_rate": 4.245189712479375e-06, + "loss": 1.3264, + "step": 117147 + }, + { + "epoch": 1.41, + "grad_norm": 8.128599697007122, + "learning_rate": 4.244711972661444e-06, + "loss": 0.8551, + "step": 117150 + }, + { + "epoch": 1.41, + "grad_norm": 7.226721865152315, + "learning_rate": 4.244234252484237e-06, + "loss": 1.0348, + "step": 117153 + }, + { + "epoch": 1.41, + "grad_norm": 26.840016461968155, + "learning_rate": 4.2437565519493905e-06, + "loss": 1.0377, + "step": 117156 + }, + { + "epoch": 1.41, + "grad_norm": 4.275472834336913, + "learning_rate": 4.243278871058528e-06, + "loss": 1.0899, + "step": 117159 + }, + { + "epoch": 1.41, + "grad_norm": 8.855506678261706, + "learning_rate": 4.242801209813279e-06, + "loss": 1.3776, + "step": 117162 + }, + { + "epoch": 1.41, + "grad_norm": 5.4942026412431035, + "learning_rate": 4.242323568215275e-06, + "loss": 1.1409, + "step": 117165 + }, + { + "epoch": 1.41, + "grad_norm": 12.312668473362555, + "learning_rate": 4.2418459462661464e-06, + "loss": 1.0655, + "step": 117168 + }, + { + "epoch": 1.41, + "grad_norm": 10.250013077998481, + "learning_rate": 4.241368343967528e-06, + "loss": 1.2713, + "step": 117171 + }, + { + "epoch": 1.41, + "grad_norm": 5.545585294728926, + "learning_rate": 4.240890761321042e-06, + "loss": 1.2016, + "step": 117174 + }, + { + "epoch": 1.41, + "grad_norm": 3.963334727839253, + "learning_rate": 4.240413198328323e-06, + "loss": 1.3503, + "step": 117177 + }, + { + "epoch": 1.41, + "grad_norm": 7.75293022351449, + "learning_rate": 4.239935654990998e-06, + "loss": 0.9483, + "step": 117180 + }, + { + "epoch": 1.41, + "grad_norm": 8.467505973807404, + "learning_rate": 4.239458131310696e-06, + "loss": 1.4569, + "step": 117183 + }, + { + "epoch": 1.41, + "grad_norm": 17.718770500178522, + "learning_rate": 4.238980627289052e-06, + "loss": 1.0178, + "step": 117186 + }, + { + "epoch": 1.41, + "grad_norm": 5.741013060372045, + "learning_rate": 4.238503142927689e-06, + "loss": 1.376, + "step": 117189 + }, + { + "epoch": 1.41, + "grad_norm": 7.899194474558246, + "learning_rate": 4.23802567822824e-06, + "loss": 1.3806, + "step": 117192 + }, + { + "epoch": 1.41, + "grad_norm": 6.273373809172994, + "learning_rate": 4.237548233192334e-06, + "loss": 1.0132, + "step": 117195 + }, + { + "epoch": 1.41, + "grad_norm": 15.33876613436015, + "learning_rate": 4.237070807821603e-06, + "loss": 1.0956, + "step": 117198 + }, + { + "epoch": 1.41, + "grad_norm": 35.76863918432574, + "learning_rate": 4.236593402117674e-06, + "loss": 0.9107, + "step": 117201 + }, + { + "epoch": 1.41, + "grad_norm": 7.225161307250649, + "learning_rate": 4.236116016082171e-06, + "loss": 1.166, + "step": 117204 + }, + { + "epoch": 1.41, + "grad_norm": 7.283895440670513, + "learning_rate": 4.235638649716729e-06, + "loss": 1.0393, + "step": 117207 + }, + { + "epoch": 1.41, + "grad_norm": 4.916375132300531, + "learning_rate": 4.235161303022976e-06, + "loss": 1.1823, + "step": 117210 + }, + { + "epoch": 1.41, + "grad_norm": 9.201731674111192, + "learning_rate": 4.234683976002545e-06, + "loss": 1.2106, + "step": 117213 + }, + { + "epoch": 1.41, + "grad_norm": 5.985740033821056, + "learning_rate": 4.234206668657056e-06, + "loss": 0.941, + "step": 117216 + }, + { + "epoch": 1.41, + "grad_norm": 7.403972621611069, + "learning_rate": 4.2337293809881445e-06, + "loss": 1.3457, + "step": 117219 + }, + { + "epoch": 1.41, + "grad_norm": 17.196378876198427, + "learning_rate": 4.23325211299744e-06, + "loss": 1.0688, + "step": 117222 + }, + { + "epoch": 1.41, + "grad_norm": 5.203053770421486, + "learning_rate": 4.232774864686568e-06, + "loss": 1.2425, + "step": 117225 + }, + { + "epoch": 1.41, + "grad_norm": 9.675439263181511, + "learning_rate": 4.2322976360571565e-06, + "loss": 1.0278, + "step": 117228 + }, + { + "epoch": 1.41, + "grad_norm": 12.57692041651665, + "learning_rate": 4.231820427110835e-06, + "loss": 1.4091, + "step": 117231 + }, + { + "epoch": 1.41, + "grad_norm": 22.86201695764336, + "learning_rate": 4.231343237849232e-06, + "loss": 1.1783, + "step": 117234 + }, + { + "epoch": 1.41, + "grad_norm": 4.977962522792161, + "learning_rate": 4.230866068273978e-06, + "loss": 1.0414, + "step": 117237 + }, + { + "epoch": 1.41, + "grad_norm": 15.700672936194989, + "learning_rate": 4.230388918386702e-06, + "loss": 0.9954, + "step": 117240 + }, + { + "epoch": 1.41, + "grad_norm": 21.61058547471906, + "learning_rate": 4.229911788189031e-06, + "loss": 1.0096, + "step": 117243 + }, + { + "epoch": 1.41, + "grad_norm": 34.824744874923596, + "learning_rate": 4.229434677682589e-06, + "loss": 1.4322, + "step": 117246 + }, + { + "epoch": 1.41, + "grad_norm": 21.376268887671998, + "learning_rate": 4.228957586869009e-06, + "loss": 1.5837, + "step": 117249 + }, + { + "epoch": 1.41, + "grad_norm": 6.332214905624728, + "learning_rate": 4.228480515749921e-06, + "loss": 1.1864, + "step": 117252 + }, + { + "epoch": 1.41, + "grad_norm": 19.03322499485144, + "learning_rate": 4.228003464326946e-06, + "loss": 1.1491, + "step": 117255 + }, + { + "epoch": 1.41, + "grad_norm": 7.904374307441071, + "learning_rate": 4.227526432601716e-06, + "loss": 1.1687, + "step": 117258 + }, + { + "epoch": 1.41, + "grad_norm": 12.525834320070272, + "learning_rate": 4.227049420575859e-06, + "loss": 0.7327, + "step": 117261 + }, + { + "epoch": 1.41, + "grad_norm": 5.858125507676759, + "learning_rate": 4.226572428251008e-06, + "loss": 1.0185, + "step": 117264 + }, + { + "epoch": 1.41, + "grad_norm": 11.553393844391815, + "learning_rate": 4.226095455628785e-06, + "loss": 1.3046, + "step": 117267 + }, + { + "epoch": 1.41, + "grad_norm": 40.010993661372034, + "learning_rate": 4.225618502710814e-06, + "loss": 1.2084, + "step": 117270 + }, + { + "epoch": 1.41, + "grad_norm": 10.172780339216208, + "learning_rate": 4.225141569498728e-06, + "loss": 0.9089, + "step": 117273 + }, + { + "epoch": 1.41, + "grad_norm": 2.822769948608871, + "learning_rate": 4.224664655994153e-06, + "loss": 1.0513, + "step": 117276 + }, + { + "epoch": 1.41, + "grad_norm": 15.693887374301609, + "learning_rate": 4.224187762198721e-06, + "loss": 1.3789, + "step": 117279 + }, + { + "epoch": 1.41, + "grad_norm": 4.639588439381242, + "learning_rate": 4.223710888114053e-06, + "loss": 0.9686, + "step": 117282 + }, + { + "epoch": 1.41, + "grad_norm": 2.6780517717438475, + "learning_rate": 4.223234033741782e-06, + "loss": 1.2079, + "step": 117285 + }, + { + "epoch": 1.41, + "grad_norm": 3.8296230623741696, + "learning_rate": 4.222757199083529e-06, + "loss": 0.9685, + "step": 117288 + }, + { + "epoch": 1.41, + "grad_norm": 4.485755703234438, + "learning_rate": 4.222280384140925e-06, + "loss": 1.2785, + "step": 117291 + }, + { + "epoch": 1.41, + "grad_norm": 7.355850059009657, + "learning_rate": 4.221803588915601e-06, + "loss": 1.1241, + "step": 117294 + }, + { + "epoch": 1.41, + "grad_norm": 3.5585868108556857, + "learning_rate": 4.221326813409176e-06, + "loss": 1.3902, + "step": 117297 + }, + { + "epoch": 1.41, + "grad_norm": 27.07860721805323, + "learning_rate": 4.22085005762328e-06, + "loss": 1.1409, + "step": 117300 + }, + { + "epoch": 1.41, + "grad_norm": 7.0823688616530065, + "learning_rate": 4.220373321559542e-06, + "loss": 1.5386, + "step": 117303 + }, + { + "epoch": 1.41, + "grad_norm": 44.98803701160576, + "learning_rate": 4.219896605219591e-06, + "loss": 1.1889, + "step": 117306 + }, + { + "epoch": 1.41, + "grad_norm": 19.99179980179403, + "learning_rate": 4.219419908605052e-06, + "loss": 0.9781, + "step": 117309 + }, + { + "epoch": 1.41, + "grad_norm": 2.5076055580515795, + "learning_rate": 4.218943231717547e-06, + "loss": 1.3358, + "step": 117312 + }, + { + "epoch": 1.41, + "grad_norm": 4.990475504498892, + "learning_rate": 4.218466574558706e-06, + "loss": 1.2985, + "step": 117315 + }, + { + "epoch": 1.41, + "grad_norm": 8.556385155733235, + "learning_rate": 4.217989937130155e-06, + "loss": 1.3932, + "step": 117318 + }, + { + "epoch": 1.41, + "grad_norm": 10.328594462407434, + "learning_rate": 4.217513319433527e-06, + "loss": 1.3883, + "step": 117321 + }, + { + "epoch": 1.41, + "grad_norm": 3.758571579739986, + "learning_rate": 4.217036721470439e-06, + "loss": 1.2213, + "step": 117324 + }, + { + "epoch": 1.41, + "grad_norm": 4.592209937499744, + "learning_rate": 4.216560143242525e-06, + "loss": 1.2632, + "step": 117327 + }, + { + "epoch": 1.41, + "grad_norm": 12.442327369552203, + "learning_rate": 4.2160835847514045e-06, + "loss": 1.1704, + "step": 117330 + }, + { + "epoch": 1.41, + "grad_norm": 11.548867833818385, + "learning_rate": 4.215607045998712e-06, + "loss": 1.2156, + "step": 117333 + }, + { + "epoch": 1.41, + "grad_norm": 14.87115242398429, + "learning_rate": 4.215130526986065e-06, + "loss": 0.9645, + "step": 117336 + }, + { + "epoch": 1.41, + "grad_norm": 3.0783626212298034, + "learning_rate": 4.214654027715093e-06, + "loss": 1.5022, + "step": 117339 + }, + { + "epoch": 1.41, + "grad_norm": 13.597209788535496, + "learning_rate": 4.2141775481874235e-06, + "loss": 1.2235, + "step": 117342 + }, + { + "epoch": 1.41, + "grad_norm": 2.425854601666949, + "learning_rate": 4.2137010884046816e-06, + "loss": 1.3351, + "step": 117345 + }, + { + "epoch": 1.41, + "grad_norm": 13.590617798960876, + "learning_rate": 4.213224648368498e-06, + "loss": 1.0042, + "step": 117348 + }, + { + "epoch": 1.41, + "grad_norm": 6.9293181274469084, + "learning_rate": 4.212748228080494e-06, + "loss": 1.2312, + "step": 117351 + }, + { + "epoch": 1.41, + "grad_norm": 5.06471499646702, + "learning_rate": 4.2122718275422915e-06, + "loss": 1.2384, + "step": 117354 + }, + { + "epoch": 1.41, + "grad_norm": 6.126471598048086, + "learning_rate": 4.211795446755521e-06, + "loss": 1.1134, + "step": 117357 + }, + { + "epoch": 1.41, + "grad_norm": 4.506092282602917, + "learning_rate": 4.211319085721812e-06, + "loss": 0.988, + "step": 117360 + }, + { + "epoch": 1.41, + "grad_norm": 12.969894435040255, + "learning_rate": 4.210842744442781e-06, + "loss": 1.0823, + "step": 117363 + }, + { + "epoch": 1.41, + "grad_norm": 21.172248821285194, + "learning_rate": 4.210366422920059e-06, + "loss": 1.0578, + "step": 117366 + }, + { + "epoch": 1.41, + "grad_norm": 5.752722223858625, + "learning_rate": 4.209890121155276e-06, + "loss": 1.25, + "step": 117369 + }, + { + "epoch": 1.41, + "grad_norm": 4.60414483800216, + "learning_rate": 4.2094138391500474e-06, + "loss": 1.1558, + "step": 117372 + }, + { + "epoch": 1.41, + "grad_norm": 13.1980083747142, + "learning_rate": 4.208937576906007e-06, + "loss": 1.0741, + "step": 117375 + }, + { + "epoch": 1.41, + "grad_norm": 6.941040657740027, + "learning_rate": 4.208461334424773e-06, + "loss": 0.9111, + "step": 117378 + }, + { + "epoch": 1.41, + "grad_norm": 9.108925485663061, + "learning_rate": 4.207985111707975e-06, + "loss": 0.907, + "step": 117381 + }, + { + "epoch": 1.41, + "grad_norm": 12.401367795311229, + "learning_rate": 4.207508908757238e-06, + "loss": 1.2041, + "step": 117384 + }, + { + "epoch": 1.41, + "grad_norm": 10.600325166237859, + "learning_rate": 4.20703272557419e-06, + "loss": 1.1791, + "step": 117387 + }, + { + "epoch": 1.41, + "grad_norm": 30.854150047164225, + "learning_rate": 4.206556562160448e-06, + "loss": 0.9507, + "step": 117390 + }, + { + "epoch": 1.41, + "grad_norm": 12.279665707519008, + "learning_rate": 4.206080418517645e-06, + "loss": 0.7194, + "step": 117393 + }, + { + "epoch": 1.41, + "grad_norm": 7.086149534719419, + "learning_rate": 4.2056042946474005e-06, + "loss": 1.0819, + "step": 117396 + }, + { + "epoch": 1.41, + "grad_norm": 10.359849478951732, + "learning_rate": 4.205128190551341e-06, + "loss": 1.2626, + "step": 117399 + }, + { + "epoch": 1.41, + "grad_norm": 5.454204082425259, + "learning_rate": 4.2046521062310954e-06, + "loss": 0.9512, + "step": 117402 + }, + { + "epoch": 1.41, + "grad_norm": 11.53376414368994, + "learning_rate": 4.2041760416882805e-06, + "loss": 1.0184, + "step": 117405 + }, + { + "epoch": 1.41, + "grad_norm": 6.064356592575427, + "learning_rate": 4.203699996924525e-06, + "loss": 0.787, + "step": 117408 + }, + { + "epoch": 1.41, + "grad_norm": 8.42744004762846, + "learning_rate": 4.203223971941458e-06, + "loss": 0.9765, + "step": 117411 + }, + { + "epoch": 1.41, + "grad_norm": 9.984942241234467, + "learning_rate": 4.202747966740699e-06, + "loss": 1.3618, + "step": 117414 + }, + { + "epoch": 1.41, + "grad_norm": 7.781738802239931, + "learning_rate": 4.20227198132387e-06, + "loss": 1.0916, + "step": 117417 + }, + { + "epoch": 1.41, + "grad_norm": 45.60834337128389, + "learning_rate": 4.201796015692598e-06, + "loss": 1.0274, + "step": 117420 + }, + { + "epoch": 1.41, + "grad_norm": 7.774241583676117, + "learning_rate": 4.201320069848508e-06, + "loss": 1.6581, + "step": 117423 + }, + { + "epoch": 1.41, + "grad_norm": 12.943473805729944, + "learning_rate": 4.200844143793224e-06, + "loss": 1.4469, + "step": 117426 + }, + { + "epoch": 1.41, + "grad_norm": 8.52112809764073, + "learning_rate": 4.200368237528373e-06, + "loss": 1.3975, + "step": 117429 + }, + { + "epoch": 1.41, + "grad_norm": 14.51089576129609, + "learning_rate": 4.199892351055572e-06, + "loss": 1.1865, + "step": 117432 + }, + { + "epoch": 1.41, + "grad_norm": 10.067953267237318, + "learning_rate": 4.199416484376455e-06, + "loss": 1.1823, + "step": 117435 + }, + { + "epoch": 1.41, + "grad_norm": 16.42344722711228, + "learning_rate": 4.198940637492635e-06, + "loss": 0.9253, + "step": 117438 + }, + { + "epoch": 1.41, + "grad_norm": 6.126027773404308, + "learning_rate": 4.198464810405745e-06, + "loss": 1.2603, + "step": 117441 + }, + { + "epoch": 1.41, + "grad_norm": 7.457836897677955, + "learning_rate": 4.197989003117401e-06, + "loss": 1.1275, + "step": 117444 + }, + { + "epoch": 1.41, + "grad_norm": 9.970668818406248, + "learning_rate": 4.1975132156292305e-06, + "loss": 1.1696, + "step": 117447 + }, + { + "epoch": 1.41, + "grad_norm": 17.839662381316643, + "learning_rate": 4.1970374479428584e-06, + "loss": 1.0708, + "step": 117450 + }, + { + "epoch": 1.41, + "grad_norm": 6.466458246704897, + "learning_rate": 4.19656170005991e-06, + "loss": 1.2174, + "step": 117453 + }, + { + "epoch": 1.41, + "grad_norm": 11.460891784252835, + "learning_rate": 4.196085971982004e-06, + "loss": 1.2125, + "step": 117456 + }, + { + "epoch": 1.41, + "grad_norm": 4.462843069530559, + "learning_rate": 4.195610263710768e-06, + "loss": 1.2352, + "step": 117459 + }, + { + "epoch": 1.41, + "grad_norm": 5.728201014775579, + "learning_rate": 4.19513457524782e-06, + "loss": 0.887, + "step": 117462 + }, + { + "epoch": 1.41, + "grad_norm": 12.884444057396353, + "learning_rate": 4.194658906594787e-06, + "loss": 0.9846, + "step": 117465 + }, + { + "epoch": 1.41, + "grad_norm": 8.8968254730281, + "learning_rate": 4.194183257753296e-06, + "loss": 0.9572, + "step": 117468 + }, + { + "epoch": 1.41, + "grad_norm": 43.63436436954991, + "learning_rate": 4.193707628724962e-06, + "loss": 1.2009, + "step": 117471 + }, + { + "epoch": 1.41, + "grad_norm": 36.20588153703684, + "learning_rate": 4.193232019511413e-06, + "loss": 1.4776, + "step": 117474 + }, + { + "epoch": 1.41, + "grad_norm": 16.86968039471847, + "learning_rate": 4.192756430114275e-06, + "loss": 1.1058, + "step": 117477 + }, + { + "epoch": 1.41, + "grad_norm": 9.652670359848681, + "learning_rate": 4.192280860535164e-06, + "loss": 0.9312, + "step": 117480 + }, + { + "epoch": 1.41, + "grad_norm": 14.491400399213719, + "learning_rate": 4.1918053107757105e-06, + "loss": 1.1672, + "step": 117483 + }, + { + "epoch": 1.41, + "grad_norm": 6.473644983767934, + "learning_rate": 4.1913297808375284e-06, + "loss": 1.0366, + "step": 117486 + }, + { + "epoch": 1.41, + "grad_norm": 6.853975679743679, + "learning_rate": 4.1908542707222475e-06, + "loss": 1.1374, + "step": 117489 + }, + { + "epoch": 1.41, + "grad_norm": 10.600863455281347, + "learning_rate": 4.1903787804314865e-06, + "loss": 0.9414, + "step": 117492 + }, + { + "epoch": 1.41, + "grad_norm": 16.695675778042144, + "learning_rate": 4.189903309966875e-06, + "loss": 1.2002, + "step": 117495 + }, + { + "epoch": 1.41, + "grad_norm": 3.300477604194229, + "learning_rate": 4.189427859330027e-06, + "loss": 0.9289, + "step": 117498 + }, + { + "epoch": 1.41, + "grad_norm": 5.285574353741002, + "learning_rate": 4.188952428522573e-06, + "loss": 1.123, + "step": 117501 + }, + { + "epoch": 1.41, + "grad_norm": 9.177627624422088, + "learning_rate": 4.188477017546127e-06, + "loss": 1.3879, + "step": 117504 + }, + { + "epoch": 1.41, + "grad_norm": 8.030587652602334, + "learning_rate": 4.1880016264023145e-06, + "loss": 1.4722, + "step": 117507 + }, + { + "epoch": 1.41, + "grad_norm": 8.206982576198346, + "learning_rate": 4.187526255092763e-06, + "loss": 1.4086, + "step": 117510 + }, + { + "epoch": 1.41, + "grad_norm": 12.744714699197708, + "learning_rate": 4.1870509036190875e-06, + "loss": 1.0401, + "step": 117513 + }, + { + "epoch": 1.41, + "grad_norm": 10.170313454358492, + "learning_rate": 4.186575571982913e-06, + "loss": 1.2872, + "step": 117516 + }, + { + "epoch": 1.41, + "grad_norm": 5.594675892449826, + "learning_rate": 4.186100260185867e-06, + "loss": 1.223, + "step": 117519 + }, + { + "epoch": 1.41, + "grad_norm": 9.997241034071463, + "learning_rate": 4.1856249682295656e-06, + "loss": 1.143, + "step": 117522 + }, + { + "epoch": 1.41, + "grad_norm": 4.966386619154646, + "learning_rate": 4.185149696115629e-06, + "loss": 0.915, + "step": 117525 + }, + { + "epoch": 1.41, + "grad_norm": 6.859833411707375, + "learning_rate": 4.1846744438456824e-06, + "loss": 1.2161, + "step": 117528 + }, + { + "epoch": 1.41, + "grad_norm": 12.467372893324928, + "learning_rate": 4.184199211421346e-06, + "loss": 0.9195, + "step": 117531 + }, + { + "epoch": 1.41, + "grad_norm": 6.602616116111078, + "learning_rate": 4.183723998844243e-06, + "loss": 0.752, + "step": 117534 + }, + { + "epoch": 1.41, + "grad_norm": 8.479811365629022, + "learning_rate": 4.183248806115999e-06, + "loss": 1.2409, + "step": 117537 + }, + { + "epoch": 1.41, + "grad_norm": 10.749487140548704, + "learning_rate": 4.1827736332382276e-06, + "loss": 1.1422, + "step": 117540 + }, + { + "epoch": 1.41, + "grad_norm": 12.538669715418484, + "learning_rate": 4.182298480212559e-06, + "loss": 1.1605, + "step": 117543 + }, + { + "epoch": 1.41, + "grad_norm": 4.440402997285332, + "learning_rate": 4.1818233470406055e-06, + "loss": 1.1287, + "step": 117546 + }, + { + "epoch": 1.41, + "grad_norm": 13.102759010926514, + "learning_rate": 4.181348233723997e-06, + "loss": 1.039, + "step": 117549 + }, + { + "epoch": 1.41, + "grad_norm": 8.866118495177778, + "learning_rate": 4.180873140264348e-06, + "loss": 1.7166, + "step": 117552 + }, + { + "epoch": 1.41, + "grad_norm": 12.738591116473728, + "learning_rate": 4.180398066663283e-06, + "loss": 1.2197, + "step": 117555 + }, + { + "epoch": 1.41, + "grad_norm": 92.49157049429692, + "learning_rate": 4.179923012922422e-06, + "loss": 1.1498, + "step": 117558 + }, + { + "epoch": 1.41, + "grad_norm": 12.076675919561653, + "learning_rate": 4.179447979043392e-06, + "loss": 1.2142, + "step": 117561 + }, + { + "epoch": 1.41, + "grad_norm": 6.785051377660194, + "learning_rate": 4.178972965027809e-06, + "loss": 1.2741, + "step": 117564 + }, + { + "epoch": 1.41, + "grad_norm": 6.046893197187678, + "learning_rate": 4.1784979708772914e-06, + "loss": 1.1004, + "step": 117567 + }, + { + "epoch": 1.41, + "grad_norm": 12.06640356294069, + "learning_rate": 4.178022996593464e-06, + "loss": 1.3768, + "step": 117570 + }, + { + "epoch": 1.41, + "grad_norm": 13.363175498927983, + "learning_rate": 4.177548042177945e-06, + "loss": 1.2829, + "step": 117573 + }, + { + "epoch": 1.41, + "grad_norm": 4.243320676394059, + "learning_rate": 4.177073107632362e-06, + "loss": 1.1701, + "step": 117576 + }, + { + "epoch": 1.41, + "grad_norm": 6.400135344107927, + "learning_rate": 4.176598192958329e-06, + "loss": 1.3945, + "step": 117579 + }, + { + "epoch": 1.41, + "grad_norm": 9.14055732259937, + "learning_rate": 4.176123298157466e-06, + "loss": 1.1842, + "step": 117582 + }, + { + "epoch": 1.41, + "grad_norm": 6.795035996756985, + "learning_rate": 4.175648423231402e-06, + "loss": 1.1297, + "step": 117585 + }, + { + "epoch": 1.41, + "grad_norm": 4.724137026463917, + "learning_rate": 4.175173568181747e-06, + "loss": 1.0984, + "step": 117588 + }, + { + "epoch": 1.41, + "grad_norm": 8.2512662421162, + "learning_rate": 4.174698733010132e-06, + "loss": 1.4667, + "step": 117591 + }, + { + "epoch": 1.41, + "grad_norm": 3.778625461290072, + "learning_rate": 4.174223917718168e-06, + "loss": 1.1478, + "step": 117594 + }, + { + "epoch": 1.41, + "grad_norm": 8.64642541176784, + "learning_rate": 4.173749122307478e-06, + "loss": 1.1949, + "step": 117597 + }, + { + "epoch": 1.41, + "grad_norm": 12.946390460333458, + "learning_rate": 4.173274346779685e-06, + "loss": 1.3906, + "step": 117600 + }, + { + "epoch": 1.41, + "grad_norm": 3.803215253414691, + "learning_rate": 4.1727995911364096e-06, + "loss": 1.1366, + "step": 117603 + }, + { + "epoch": 1.41, + "grad_norm": 10.763776371902598, + "learning_rate": 4.172324855379272e-06, + "loss": 1.2378, + "step": 117606 + }, + { + "epoch": 1.41, + "grad_norm": 13.300094090896849, + "learning_rate": 4.171850139509885e-06, + "loss": 1.3007, + "step": 117609 + }, + { + "epoch": 1.41, + "grad_norm": 15.39371129112189, + "learning_rate": 4.171375443529876e-06, + "loss": 0.7398, + "step": 117612 + }, + { + "epoch": 1.41, + "grad_norm": 8.341471320950067, + "learning_rate": 4.170900767440863e-06, + "loss": 1.124, + "step": 117615 + }, + { + "epoch": 1.41, + "grad_norm": 4.916920421645805, + "learning_rate": 4.17042611124447e-06, + "loss": 0.9788, + "step": 117618 + }, + { + "epoch": 1.41, + "grad_norm": 13.189882778591775, + "learning_rate": 4.169951474942308e-06, + "loss": 1.1838, + "step": 117621 + }, + { + "epoch": 1.41, + "grad_norm": 9.303192987581218, + "learning_rate": 4.169476858536003e-06, + "loss": 1.2668, + "step": 117624 + }, + { + "epoch": 1.41, + "grad_norm": 9.062776669947219, + "learning_rate": 4.1690022620271755e-06, + "loss": 1.0186, + "step": 117627 + }, + { + "epoch": 1.41, + "grad_norm": 17.92667192555239, + "learning_rate": 4.168527685417444e-06, + "loss": 1.5271, + "step": 117630 + }, + { + "epoch": 1.41, + "grad_norm": 4.034146276460263, + "learning_rate": 4.168053128708422e-06, + "loss": 1.3172, + "step": 117633 + }, + { + "epoch": 1.41, + "grad_norm": 10.424077961113774, + "learning_rate": 4.167578591901736e-06, + "loss": 1.3369, + "step": 117636 + }, + { + "epoch": 1.41, + "grad_norm": 9.889365079471528, + "learning_rate": 4.167104074999002e-06, + "loss": 1.4714, + "step": 117639 + }, + { + "epoch": 1.41, + "grad_norm": 19.07218072366965, + "learning_rate": 4.166629578001842e-06, + "loss": 1.0537, + "step": 117642 + }, + { + "epoch": 1.41, + "grad_norm": 7.68002934919106, + "learning_rate": 4.166155100911876e-06, + "loss": 1.4334, + "step": 117645 + }, + { + "epoch": 1.41, + "grad_norm": 26.133284907259267, + "learning_rate": 4.165680643730723e-06, + "loss": 1.1612, + "step": 117648 + }, + { + "epoch": 1.41, + "grad_norm": 13.056039555817616, + "learning_rate": 4.165206206459997e-06, + "loss": 1.0508, + "step": 117651 + }, + { + "epoch": 1.41, + "grad_norm": 7.091395797274895, + "learning_rate": 4.164731789101318e-06, + "loss": 1.094, + "step": 117654 + }, + { + "epoch": 1.41, + "grad_norm": 41.18563609382686, + "learning_rate": 4.1642573916563135e-06, + "loss": 1.5979, + "step": 117657 + }, + { + "epoch": 1.41, + "grad_norm": 8.728094786676477, + "learning_rate": 4.163783014126592e-06, + "loss": 1.2605, + "step": 117660 + }, + { + "epoch": 1.41, + "grad_norm": 5.278775964246664, + "learning_rate": 4.163308656513777e-06, + "loss": 1.4264, + "step": 117663 + }, + { + "epoch": 1.41, + "grad_norm": 10.2648824212177, + "learning_rate": 4.162834318819488e-06, + "loss": 1.0774, + "step": 117666 + }, + { + "epoch": 1.41, + "grad_norm": 19.329020451994847, + "learning_rate": 4.162360001045346e-06, + "loss": 0.9642, + "step": 117669 + }, + { + "epoch": 1.41, + "grad_norm": 8.572280786598704, + "learning_rate": 4.161885703192966e-06, + "loss": 1.2273, + "step": 117672 + }, + { + "epoch": 1.42, + "grad_norm": 14.397302207227881, + "learning_rate": 4.161411425263963e-06, + "loss": 1.0619, + "step": 117675 + }, + { + "epoch": 1.42, + "grad_norm": 6.439873659237624, + "learning_rate": 4.160937167259962e-06, + "loss": 1.4967, + "step": 117678 + }, + { + "epoch": 1.42, + "grad_norm": 9.41615095085721, + "learning_rate": 4.160462929182578e-06, + "loss": 1.3889, + "step": 117681 + }, + { + "epoch": 1.42, + "grad_norm": 13.166784360434214, + "learning_rate": 4.1599887110334335e-06, + "loss": 1.1215, + "step": 117684 + }, + { + "epoch": 1.42, + "grad_norm": 25.047326181677313, + "learning_rate": 4.1595145128141425e-06, + "loss": 1.2423, + "step": 117687 + }, + { + "epoch": 1.42, + "grad_norm": 8.771970156845368, + "learning_rate": 4.159040334526326e-06, + "loss": 1.0002, + "step": 117690 + }, + { + "epoch": 1.42, + "grad_norm": 12.51950651270966, + "learning_rate": 4.158566176171599e-06, + "loss": 0.8811, + "step": 117693 + }, + { + "epoch": 1.42, + "grad_norm": 6.574550250561398, + "learning_rate": 4.15809203775158e-06, + "loss": 1.3078, + "step": 117696 + }, + { + "epoch": 1.42, + "grad_norm": 11.944292895076291, + "learning_rate": 4.157617919267894e-06, + "loss": 1.2609, + "step": 117699 + }, + { + "epoch": 1.42, + "grad_norm": 11.006857223768126, + "learning_rate": 4.157143820722148e-06, + "loss": 1.1362, + "step": 117702 + }, + { + "epoch": 1.42, + "grad_norm": 14.80043229131184, + "learning_rate": 4.156669742115967e-06, + "loss": 1.1405, + "step": 117705 + }, + { + "epoch": 1.42, + "grad_norm": 6.843650281423018, + "learning_rate": 4.156195683450968e-06, + "loss": 1.2795, + "step": 117708 + }, + { + "epoch": 1.42, + "grad_norm": 7.006690612408672, + "learning_rate": 4.155721644728771e-06, + "loss": 1.1755, + "step": 117711 + }, + { + "epoch": 1.42, + "grad_norm": 10.4542233466289, + "learning_rate": 4.15524762595099e-06, + "loss": 0.9574, + "step": 117714 + }, + { + "epoch": 1.42, + "grad_norm": 10.637555634674516, + "learning_rate": 4.154773627119241e-06, + "loss": 0.9176, + "step": 117717 + }, + { + "epoch": 1.42, + "grad_norm": 2.953695275731847, + "learning_rate": 4.154299648235144e-06, + "loss": 1.1158, + "step": 117720 + }, + { + "epoch": 1.42, + "grad_norm": 2.3751750425433564, + "learning_rate": 4.153825689300317e-06, + "loss": 0.9844, + "step": 117723 + }, + { + "epoch": 1.42, + "grad_norm": 16.7604762095236, + "learning_rate": 4.15335175031638e-06, + "loss": 1.0551, + "step": 117726 + }, + { + "epoch": 1.42, + "grad_norm": 12.151452477382474, + "learning_rate": 4.152877831284944e-06, + "loss": 1.163, + "step": 117729 + }, + { + "epoch": 1.42, + "grad_norm": 12.766839399005608, + "learning_rate": 4.152403932207634e-06, + "loss": 1.1836, + "step": 117732 + }, + { + "epoch": 1.42, + "grad_norm": 11.289558147085309, + "learning_rate": 4.15193005308606e-06, + "loss": 1.3098, + "step": 117735 + }, + { + "epoch": 1.42, + "grad_norm": 13.854888589459172, + "learning_rate": 4.151456193921844e-06, + "loss": 0.9947, + "step": 117738 + }, + { + "epoch": 1.42, + "grad_norm": 3.2924948622560812, + "learning_rate": 4.150982354716599e-06, + "loss": 0.8881, + "step": 117741 + }, + { + "epoch": 1.42, + "grad_norm": 6.086377639540087, + "learning_rate": 4.150508535471944e-06, + "loss": 1.1926, + "step": 117744 + }, + { + "epoch": 1.42, + "grad_norm": 4.94137735388623, + "learning_rate": 4.150034736189498e-06, + "loss": 0.9547, + "step": 117747 + }, + { + "epoch": 1.42, + "grad_norm": 5.471088286630711, + "learning_rate": 4.149560956870874e-06, + "loss": 0.8312, + "step": 117750 + }, + { + "epoch": 1.42, + "grad_norm": 4.261904441344627, + "learning_rate": 4.149087197517696e-06, + "loss": 1.2338, + "step": 117753 + }, + { + "epoch": 1.42, + "grad_norm": 13.039065259803426, + "learning_rate": 4.1486134581315754e-06, + "loss": 1.083, + "step": 117756 + }, + { + "epoch": 1.42, + "grad_norm": 12.719381554581656, + "learning_rate": 4.148139738714127e-06, + "loss": 1.0361, + "step": 117759 + }, + { + "epoch": 1.42, + "grad_norm": 12.741744638659767, + "learning_rate": 4.147666039266967e-06, + "loss": 1.1011, + "step": 117762 + }, + { + "epoch": 1.42, + "grad_norm": 24.1913496766185, + "learning_rate": 4.1471923597917215e-06, + "loss": 1.1442, + "step": 117765 + }, + { + "epoch": 1.42, + "grad_norm": 7.460344464213532, + "learning_rate": 4.146718700289995e-06, + "loss": 0.899, + "step": 117768 + }, + { + "epoch": 1.42, + "grad_norm": 27.681827581884907, + "learning_rate": 4.146245060763409e-06, + "loss": 1.1681, + "step": 117771 + }, + { + "epoch": 1.42, + "grad_norm": 3.642824069011982, + "learning_rate": 4.1457714412135855e-06, + "loss": 0.996, + "step": 117774 + }, + { + "epoch": 1.42, + "grad_norm": 6.891487771188453, + "learning_rate": 4.145297841642131e-06, + "loss": 0.9933, + "step": 117777 + }, + { + "epoch": 1.42, + "grad_norm": 18.78327591135821, + "learning_rate": 4.144824262050669e-06, + "loss": 0.9466, + "step": 117780 + }, + { + "epoch": 1.42, + "grad_norm": 4.937250064654867, + "learning_rate": 4.14435070244081e-06, + "loss": 0.9711, + "step": 117783 + }, + { + "epoch": 1.42, + "grad_norm": 4.043502295523238, + "learning_rate": 4.1438771628141725e-06, + "loss": 1.404, + "step": 117786 + }, + { + "epoch": 1.42, + "grad_norm": 10.127787245595151, + "learning_rate": 4.143403643172373e-06, + "loss": 1.3646, + "step": 117789 + }, + { + "epoch": 1.42, + "grad_norm": 6.7523759778296295, + "learning_rate": 4.14293014351703e-06, + "loss": 1.3231, + "step": 117792 + }, + { + "epoch": 1.42, + "grad_norm": 5.620283381824602, + "learning_rate": 4.142456663849754e-06, + "loss": 1.262, + "step": 117795 + }, + { + "epoch": 1.42, + "grad_norm": 15.593982322746756, + "learning_rate": 4.141983204172168e-06, + "loss": 1.1127, + "step": 117798 + }, + { + "epoch": 1.42, + "grad_norm": 7.344228092027052, + "learning_rate": 4.141509764485878e-06, + "loss": 1.0102, + "step": 117801 + }, + { + "epoch": 1.42, + "grad_norm": 6.586519388541242, + "learning_rate": 4.141036344792505e-06, + "loss": 1.2018, + "step": 117804 + }, + { + "epoch": 1.42, + "grad_norm": 23.24773388701531, + "learning_rate": 4.140562945093669e-06, + "loss": 1.41, + "step": 117807 + }, + { + "epoch": 1.42, + "grad_norm": 9.698063985643042, + "learning_rate": 4.140089565390978e-06, + "loss": 1.0714, + "step": 117810 + }, + { + "epoch": 1.42, + "grad_norm": 30.301728689539768, + "learning_rate": 4.139616205686049e-06, + "loss": 0.8433, + "step": 117813 + }, + { + "epoch": 1.42, + "grad_norm": 13.456198418090663, + "learning_rate": 4.1391428659805e-06, + "loss": 1.2415, + "step": 117816 + }, + { + "epoch": 1.42, + "grad_norm": 14.737662453202221, + "learning_rate": 4.13866954627595e-06, + "loss": 1.2992, + "step": 117819 + }, + { + "epoch": 1.42, + "grad_norm": 5.514507295061303, + "learning_rate": 4.138196246574008e-06, + "loss": 1.1123, + "step": 117822 + }, + { + "epoch": 1.42, + "grad_norm": 6.645232869086663, + "learning_rate": 4.137722966876288e-06, + "loss": 1.4145, + "step": 117825 + }, + { + "epoch": 1.42, + "grad_norm": 4.22471459769218, + "learning_rate": 4.137249707184407e-06, + "loss": 1.0932, + "step": 117828 + }, + { + "epoch": 1.42, + "grad_norm": 14.210796341693726, + "learning_rate": 4.136776467499981e-06, + "loss": 1.2677, + "step": 117831 + }, + { + "epoch": 1.42, + "grad_norm": 8.748223222009162, + "learning_rate": 4.136303247824629e-06, + "loss": 1.1954, + "step": 117834 + }, + { + "epoch": 1.42, + "grad_norm": 6.685997243649807, + "learning_rate": 4.135830048159959e-06, + "loss": 0.9392, + "step": 117837 + }, + { + "epoch": 1.42, + "grad_norm": 31.152526917296342, + "learning_rate": 4.135356868507592e-06, + "loss": 1.2627, + "step": 117840 + }, + { + "epoch": 1.42, + "grad_norm": 15.967585453456074, + "learning_rate": 4.134883708869135e-06, + "loss": 1.3395, + "step": 117843 + }, + { + "epoch": 1.42, + "grad_norm": 9.56865030402692, + "learning_rate": 4.1344105692462124e-06, + "loss": 1.0797, + "step": 117846 + }, + { + "epoch": 1.42, + "grad_norm": 22.847353298011214, + "learning_rate": 4.133937449640429e-06, + "loss": 1.0084, + "step": 117849 + }, + { + "epoch": 1.42, + "grad_norm": 8.314184640354448, + "learning_rate": 4.1334643500534045e-06, + "loss": 1.139, + "step": 117852 + }, + { + "epoch": 1.42, + "grad_norm": 7.268487558569677, + "learning_rate": 4.132991270486753e-06, + "loss": 1.1056, + "step": 117855 + }, + { + "epoch": 1.42, + "grad_norm": 14.517121931369104, + "learning_rate": 4.132518210942089e-06, + "loss": 1.0638, + "step": 117858 + }, + { + "epoch": 1.42, + "grad_norm": 34.328484083582694, + "learning_rate": 4.1320451714210305e-06, + "loss": 1.0925, + "step": 117861 + }, + { + "epoch": 1.42, + "grad_norm": 13.087368841600872, + "learning_rate": 4.131572151925189e-06, + "loss": 1.3328, + "step": 117864 + }, + { + "epoch": 1.42, + "grad_norm": 14.993211337957034, + "learning_rate": 4.131099152456174e-06, + "loss": 0.8997, + "step": 117867 + }, + { + "epoch": 1.42, + "grad_norm": 18.761787775496465, + "learning_rate": 4.130626173015603e-06, + "loss": 1.1704, + "step": 117870 + }, + { + "epoch": 1.42, + "grad_norm": 3.4691510183530037, + "learning_rate": 4.130153213605095e-06, + "loss": 1.1301, + "step": 117873 + }, + { + "epoch": 1.42, + "grad_norm": 13.106465966250529, + "learning_rate": 4.129680274226255e-06, + "loss": 1.0605, + "step": 117876 + }, + { + "epoch": 1.42, + "grad_norm": 9.188644925382917, + "learning_rate": 4.129207354880703e-06, + "loss": 1.2264, + "step": 117879 + }, + { + "epoch": 1.42, + "grad_norm": 13.705614441151862, + "learning_rate": 4.128734455570055e-06, + "loss": 1.2828, + "step": 117882 + }, + { + "epoch": 1.42, + "grad_norm": 4.78309571959564, + "learning_rate": 4.128261576295918e-06, + "loss": 1.0501, + "step": 117885 + }, + { + "epoch": 1.42, + "grad_norm": 63.27515793097052, + "learning_rate": 4.127788717059912e-06, + "loss": 1.3074, + "step": 117888 + }, + { + "epoch": 1.42, + "grad_norm": 9.49393858570182, + "learning_rate": 4.127315877863644e-06, + "loss": 0.863, + "step": 117891 + }, + { + "epoch": 1.42, + "grad_norm": 9.968382636989693, + "learning_rate": 4.126843058708734e-06, + "loss": 1.3115, + "step": 117894 + }, + { + "epoch": 1.42, + "grad_norm": 5.161380433951988, + "learning_rate": 4.126370259596791e-06, + "loss": 1.2128, + "step": 117897 + }, + { + "epoch": 1.42, + "grad_norm": 13.191917053675496, + "learning_rate": 4.125897480529435e-06, + "loss": 1.3563, + "step": 117900 + }, + { + "epoch": 1.42, + "grad_norm": 14.694940060414803, + "learning_rate": 4.125424721508271e-06, + "loss": 1.4521, + "step": 117903 + }, + { + "epoch": 1.42, + "grad_norm": 8.381564068814095, + "learning_rate": 4.124951982534921e-06, + "loss": 1.2214, + "step": 117906 + }, + { + "epoch": 1.42, + "grad_norm": 15.15219826000407, + "learning_rate": 4.1244792636109885e-06, + "loss": 1.3819, + "step": 117909 + }, + { + "epoch": 1.42, + "grad_norm": 12.035271410349077, + "learning_rate": 4.124006564738093e-06, + "loss": 1.1229, + "step": 117912 + }, + { + "epoch": 1.42, + "grad_norm": 57.46225109703546, + "learning_rate": 4.1235338859178495e-06, + "loss": 1.4716, + "step": 117915 + }, + { + "epoch": 1.42, + "grad_norm": 15.822279670523168, + "learning_rate": 4.123061227151866e-06, + "loss": 1.5631, + "step": 117918 + }, + { + "epoch": 1.42, + "grad_norm": 8.395371444356396, + "learning_rate": 4.122588588441756e-06, + "loss": 1.109, + "step": 117921 + }, + { + "epoch": 1.42, + "grad_norm": 10.770742787941535, + "learning_rate": 4.122115969789139e-06, + "loss": 1.3957, + "step": 117924 + }, + { + "epoch": 1.42, + "grad_norm": 5.038205144493833, + "learning_rate": 4.121643371195623e-06, + "loss": 1.3599, + "step": 117927 + }, + { + "epoch": 1.42, + "grad_norm": 16.48764240528085, + "learning_rate": 4.121170792662816e-06, + "loss": 1.6037, + "step": 117930 + }, + { + "epoch": 1.42, + "grad_norm": 5.7577223517601785, + "learning_rate": 4.120698234192336e-06, + "loss": 1.4497, + "step": 117933 + }, + { + "epoch": 1.42, + "grad_norm": 19.526847718178466, + "learning_rate": 4.1202256957857955e-06, + "loss": 0.9404, + "step": 117936 + }, + { + "epoch": 1.42, + "grad_norm": 6.767033386912935, + "learning_rate": 4.1197531774448064e-06, + "loss": 1.2137, + "step": 117939 + }, + { + "epoch": 1.42, + "grad_norm": 15.392987190912297, + "learning_rate": 4.119280679170986e-06, + "loss": 1.2562, + "step": 117942 + }, + { + "epoch": 1.42, + "grad_norm": 5.060010997202664, + "learning_rate": 4.118808200965938e-06, + "loss": 1.0527, + "step": 117945 + }, + { + "epoch": 1.42, + "grad_norm": 8.227542521736565, + "learning_rate": 4.1183357428312835e-06, + "loss": 1.2158, + "step": 117948 + }, + { + "epoch": 1.42, + "grad_norm": 5.0978385860725215, + "learning_rate": 4.117863304768626e-06, + "loss": 1.0314, + "step": 117951 + }, + { + "epoch": 1.42, + "grad_norm": 7.917297950657107, + "learning_rate": 4.117390886779586e-06, + "loss": 1.2092, + "step": 117954 + }, + { + "epoch": 1.42, + "grad_norm": 7.469324142134115, + "learning_rate": 4.11691848886577e-06, + "loss": 1.1592, + "step": 117957 + }, + { + "epoch": 1.42, + "grad_norm": 5.045816756924752, + "learning_rate": 4.116446111028791e-06, + "loss": 1.1412, + "step": 117960 + }, + { + "epoch": 1.42, + "grad_norm": 10.754568513788355, + "learning_rate": 4.115973753270263e-06, + "loss": 1.2253, + "step": 117963 + }, + { + "epoch": 1.42, + "grad_norm": 29.08904025585277, + "learning_rate": 4.1155014155917994e-06, + "loss": 1.5605, + "step": 117966 + }, + { + "epoch": 1.42, + "grad_norm": 12.642413503296343, + "learning_rate": 4.11502909799501e-06, + "loss": 1.1306, + "step": 117969 + }, + { + "epoch": 1.42, + "grad_norm": 19.29283609727548, + "learning_rate": 4.114556800481504e-06, + "loss": 1.0921, + "step": 117972 + }, + { + "epoch": 1.42, + "grad_norm": 8.703801982902052, + "learning_rate": 4.114084523052895e-06, + "loss": 0.9855, + "step": 117975 + }, + { + "epoch": 1.42, + "grad_norm": 3.6381120489574843, + "learning_rate": 4.113612265710794e-06, + "loss": 1.2007, + "step": 117978 + }, + { + "epoch": 1.42, + "grad_norm": 4.178233805314512, + "learning_rate": 4.1131400284568205e-06, + "loss": 1.1496, + "step": 117981 + }, + { + "epoch": 1.42, + "grad_norm": 6.4276481683738105, + "learning_rate": 4.112667811292574e-06, + "loss": 1.2108, + "step": 117984 + }, + { + "epoch": 1.42, + "grad_norm": 22.49091990527476, + "learning_rate": 4.112195614219673e-06, + "loss": 1.3222, + "step": 117987 + }, + { + "epoch": 1.42, + "grad_norm": 10.180542936026555, + "learning_rate": 4.111723437239731e-06, + "loss": 0.9688, + "step": 117990 + }, + { + "epoch": 1.42, + "grad_norm": 14.284540302779163, + "learning_rate": 4.111251280354351e-06, + "loss": 0.8636, + "step": 117993 + }, + { + "epoch": 1.42, + "grad_norm": 11.414520151203321, + "learning_rate": 4.110779143565155e-06, + "loss": 1.3, + "step": 117996 + }, + { + "epoch": 1.42, + "grad_norm": 13.823065305046741, + "learning_rate": 4.110307026873743e-06, + "loss": 1.1267, + "step": 117999 + }, + { + "epoch": 1.42, + "grad_norm": 5.576096038869323, + "learning_rate": 4.109834930281732e-06, + "loss": 1.6293, + "step": 118002 + }, + { + "epoch": 1.42, + "grad_norm": 15.119054113374583, + "learning_rate": 4.109362853790734e-06, + "loss": 1.2628, + "step": 118005 + }, + { + "epoch": 1.42, + "grad_norm": 24.60625129575814, + "learning_rate": 4.10889079740236e-06, + "loss": 1.1401, + "step": 118008 + }, + { + "epoch": 1.42, + "grad_norm": 35.99724949149874, + "learning_rate": 4.108418761118222e-06, + "loss": 1.2259, + "step": 118011 + }, + { + "epoch": 1.42, + "grad_norm": 12.771591594122134, + "learning_rate": 4.107946744939925e-06, + "loss": 1.0874, + "step": 118014 + }, + { + "epoch": 1.42, + "grad_norm": 5.518585722760066, + "learning_rate": 4.1074747488690824e-06, + "loss": 1.2907, + "step": 118017 + }, + { + "epoch": 1.42, + "grad_norm": 4.577779853772821, + "learning_rate": 4.107002772907306e-06, + "loss": 1.1301, + "step": 118020 + }, + { + "epoch": 1.42, + "grad_norm": 31.15083352954939, + "learning_rate": 4.1065308170562115e-06, + "loss": 1.0741, + "step": 118023 + }, + { + "epoch": 1.42, + "grad_norm": 3.237228326262377, + "learning_rate": 4.1060588813174e-06, + "loss": 1.2797, + "step": 118026 + }, + { + "epoch": 1.42, + "grad_norm": 17.378906118113782, + "learning_rate": 4.105586965692485e-06, + "loss": 1.2496, + "step": 118029 + }, + { + "epoch": 1.42, + "grad_norm": 9.757657862027274, + "learning_rate": 4.105115070183084e-06, + "loss": 1.074, + "step": 118032 + }, + { + "epoch": 1.42, + "grad_norm": 8.668919556922575, + "learning_rate": 4.104643194790801e-06, + "loss": 1.2437, + "step": 118035 + }, + { + "epoch": 1.42, + "grad_norm": 7.1921313213496925, + "learning_rate": 4.104171339517245e-06, + "loss": 0.9359, + "step": 118038 + }, + { + "epoch": 1.42, + "grad_norm": 7.815536920077398, + "learning_rate": 4.103699504364027e-06, + "loss": 1.0198, + "step": 118041 + }, + { + "epoch": 1.42, + "grad_norm": 10.255204815554048, + "learning_rate": 4.103227689332759e-06, + "loss": 1.3076, + "step": 118044 + }, + { + "epoch": 1.42, + "grad_norm": 3.562120741930827, + "learning_rate": 4.102755894425052e-06, + "loss": 1.1359, + "step": 118047 + }, + { + "epoch": 1.42, + "grad_norm": 262.7409349542357, + "learning_rate": 4.102284119642518e-06, + "loss": 1.2872, + "step": 118050 + }, + { + "epoch": 1.42, + "grad_norm": 12.66520864643343, + "learning_rate": 4.10181236498676e-06, + "loss": 0.9933, + "step": 118053 + }, + { + "epoch": 1.42, + "grad_norm": 27.432333712366454, + "learning_rate": 4.101340630459396e-06, + "loss": 1.0029, + "step": 118056 + }, + { + "epoch": 1.42, + "grad_norm": 3.6280493982742685, + "learning_rate": 4.100868916062028e-06, + "loss": 1.4667, + "step": 118059 + }, + { + "epoch": 1.42, + "grad_norm": 14.173579529453098, + "learning_rate": 4.1003972217962725e-06, + "loss": 1.5673, + "step": 118062 + }, + { + "epoch": 1.42, + "grad_norm": 4.858447515471384, + "learning_rate": 4.099925547663733e-06, + "loss": 1.2688, + "step": 118065 + }, + { + "epoch": 1.42, + "grad_norm": 33.337139739137385, + "learning_rate": 4.0994538936660235e-06, + "loss": 1.6085, + "step": 118068 + }, + { + "epoch": 1.42, + "grad_norm": 15.248357698284872, + "learning_rate": 4.098982259804751e-06, + "loss": 1.2002, + "step": 118071 + }, + { + "epoch": 1.42, + "grad_norm": 9.599739657732982, + "learning_rate": 4.098510646081531e-06, + "loss": 1.2888, + "step": 118074 + }, + { + "epoch": 1.42, + "grad_norm": 6.563195603023058, + "learning_rate": 4.0980390524979675e-06, + "loss": 1.081, + "step": 118077 + }, + { + "epoch": 1.42, + "grad_norm": 6.8652049325482665, + "learning_rate": 4.097567479055668e-06, + "loss": 1.3585, + "step": 118080 + }, + { + "epoch": 1.42, + "grad_norm": 12.570793017407695, + "learning_rate": 4.097095925756245e-06, + "loss": 1.2808, + "step": 118083 + }, + { + "epoch": 1.42, + "grad_norm": 5.550288543508132, + "learning_rate": 4.096624392601306e-06, + "loss": 1.4276, + "step": 118086 + }, + { + "epoch": 1.42, + "grad_norm": 5.096008473024968, + "learning_rate": 4.096152879592466e-06, + "loss": 0.9288, + "step": 118089 + }, + { + "epoch": 1.42, + "grad_norm": 12.503270751896205, + "learning_rate": 4.095681386731326e-06, + "loss": 0.8675, + "step": 118092 + }, + { + "epoch": 1.42, + "grad_norm": 4.890854203373228, + "learning_rate": 4.095209914019499e-06, + "loss": 1.0968, + "step": 118095 + }, + { + "epoch": 1.42, + "grad_norm": 2.8175926619012306, + "learning_rate": 4.094738461458598e-06, + "loss": 1.0869, + "step": 118098 + }, + { + "epoch": 1.42, + "grad_norm": 14.023464395785883, + "learning_rate": 4.094267029050223e-06, + "loss": 1.2573, + "step": 118101 + }, + { + "epoch": 1.42, + "grad_norm": 4.071463069506281, + "learning_rate": 4.093795616795991e-06, + "loss": 1.3299, + "step": 118104 + }, + { + "epoch": 1.42, + "grad_norm": 8.690852040758202, + "learning_rate": 4.093324224697503e-06, + "loss": 1.4151, + "step": 118107 + }, + { + "epoch": 1.42, + "grad_norm": 10.583858686481825, + "learning_rate": 4.092852852756372e-06, + "loss": 1.0726, + "step": 118110 + }, + { + "epoch": 1.42, + "grad_norm": 9.16376302614724, + "learning_rate": 4.0923815009742075e-06, + "loss": 1.3442, + "step": 118113 + }, + { + "epoch": 1.42, + "grad_norm": 30.68282270043289, + "learning_rate": 4.0919101693526195e-06, + "loss": 1.3874, + "step": 118116 + }, + { + "epoch": 1.42, + "grad_norm": 8.81744346291702, + "learning_rate": 4.091438857893215e-06, + "loss": 1.1999, + "step": 118119 + }, + { + "epoch": 1.42, + "grad_norm": 6.401879073211803, + "learning_rate": 4.090967566597597e-06, + "loss": 1.5156, + "step": 118122 + }, + { + "epoch": 1.42, + "grad_norm": 16.400721099438783, + "learning_rate": 4.090496295467379e-06, + "loss": 1.3479, + "step": 118125 + }, + { + "epoch": 1.42, + "grad_norm": 27.791111674277015, + "learning_rate": 4.090025044504168e-06, + "loss": 1.1633, + "step": 118128 + }, + { + "epoch": 1.42, + "grad_norm": 6.872179840291149, + "learning_rate": 4.089553813709577e-06, + "loss": 1.3287, + "step": 118131 + }, + { + "epoch": 1.42, + "grad_norm": 10.072174280661864, + "learning_rate": 4.089082603085206e-06, + "loss": 1.1164, + "step": 118134 + }, + { + "epoch": 1.42, + "grad_norm": 11.275518682944092, + "learning_rate": 4.088611412632666e-06, + "loss": 1.3481, + "step": 118137 + }, + { + "epoch": 1.42, + "grad_norm": 17.129678128326084, + "learning_rate": 4.088140242353571e-06, + "loss": 0.9458, + "step": 118140 + }, + { + "epoch": 1.42, + "grad_norm": 2.941333802400441, + "learning_rate": 4.087669092249523e-06, + "loss": 0.9538, + "step": 118143 + }, + { + "epoch": 1.42, + "grad_norm": 17.698434987618917, + "learning_rate": 4.087197962322127e-06, + "loss": 0.9671, + "step": 118146 + }, + { + "epoch": 1.42, + "grad_norm": 5.396490902428713, + "learning_rate": 4.0867268525729955e-06, + "loss": 0.9613, + "step": 118149 + }, + { + "epoch": 1.42, + "grad_norm": 5.779667956775386, + "learning_rate": 4.0862557630037345e-06, + "loss": 0.8853, + "step": 118152 + }, + { + "epoch": 1.42, + "grad_norm": 4.759896511219811, + "learning_rate": 4.085784693615953e-06, + "loss": 1.0791, + "step": 118155 + }, + { + "epoch": 1.42, + "grad_norm": 20.454405595176972, + "learning_rate": 4.085313644411262e-06, + "loss": 1.4784, + "step": 118158 + }, + { + "epoch": 1.42, + "grad_norm": 3.4567868420964856, + "learning_rate": 4.084842615391264e-06, + "loss": 1.3829, + "step": 118161 + }, + { + "epoch": 1.42, + "grad_norm": 3.741332488589671, + "learning_rate": 4.0843716065575645e-06, + "loss": 0.8749, + "step": 118164 + }, + { + "epoch": 1.42, + "grad_norm": 7.256241730466172, + "learning_rate": 4.083900617911773e-06, + "loss": 1.1916, + "step": 118167 + }, + { + "epoch": 1.42, + "grad_norm": 5.617910590229179, + "learning_rate": 4.083429649455504e-06, + "loss": 0.9734, + "step": 118170 + }, + { + "epoch": 1.42, + "grad_norm": 10.167767625536737, + "learning_rate": 4.0829587011903526e-06, + "loss": 1.3656, + "step": 118173 + }, + { + "epoch": 1.42, + "grad_norm": 7.06900461315893, + "learning_rate": 4.082487773117932e-06, + "loss": 1.1861, + "step": 118176 + }, + { + "epoch": 1.42, + "grad_norm": 21.146403306070948, + "learning_rate": 4.08201686523985e-06, + "loss": 1.1764, + "step": 118179 + }, + { + "epoch": 1.42, + "grad_norm": 8.491726532754695, + "learning_rate": 4.0815459775577145e-06, + "loss": 1.4029, + "step": 118182 + }, + { + "epoch": 1.42, + "grad_norm": 19.488181858179825, + "learning_rate": 4.081075110073133e-06, + "loss": 1.3105, + "step": 118185 + }, + { + "epoch": 1.42, + "grad_norm": 8.32139519342126, + "learning_rate": 4.080604262787705e-06, + "loss": 1.2407, + "step": 118188 + }, + { + "epoch": 1.42, + "grad_norm": 3.0512904772152463, + "learning_rate": 4.0801334357030445e-06, + "loss": 1.1464, + "step": 118191 + }, + { + "epoch": 1.42, + "grad_norm": 13.271503518832022, + "learning_rate": 4.079662628820754e-06, + "loss": 1.0664, + "step": 118194 + }, + { + "epoch": 1.42, + "grad_norm": 2.312474789861249, + "learning_rate": 4.079191842142447e-06, + "loss": 1.2467, + "step": 118197 + }, + { + "epoch": 1.42, + "grad_norm": 16.407831453890285, + "learning_rate": 4.078721075669722e-06, + "loss": 1.1766, + "step": 118200 + }, + { + "epoch": 1.42, + "grad_norm": 7.901165623657392, + "learning_rate": 4.078250329404193e-06, + "loss": 0.9346, + "step": 118203 + }, + { + "epoch": 1.42, + "grad_norm": 8.106551835659928, + "learning_rate": 4.077779603347458e-06, + "loss": 0.7458, + "step": 118206 + }, + { + "epoch": 1.42, + "grad_norm": 4.169224798364886, + "learning_rate": 4.077308897501129e-06, + "loss": 1.3872, + "step": 118209 + }, + { + "epoch": 1.42, + "grad_norm": 7.569052539406386, + "learning_rate": 4.076838211866815e-06, + "loss": 1.0527, + "step": 118212 + }, + { + "epoch": 1.42, + "grad_norm": 12.59302062978899, + "learning_rate": 4.076367546446115e-06, + "loss": 0.6773, + "step": 118215 + }, + { + "epoch": 1.42, + "grad_norm": 4.231089313041205, + "learning_rate": 4.075896901240638e-06, + "loss": 1.081, + "step": 118218 + }, + { + "epoch": 1.42, + "grad_norm": 8.480048729538078, + "learning_rate": 4.075426276251992e-06, + "loss": 1.1806, + "step": 118221 + }, + { + "epoch": 1.42, + "grad_norm": 32.851220552543296, + "learning_rate": 4.074955671481785e-06, + "loss": 1.1044, + "step": 118224 + }, + { + "epoch": 1.42, + "grad_norm": 13.511674487274721, + "learning_rate": 4.074485086931621e-06, + "loss": 1.0695, + "step": 118227 + }, + { + "epoch": 1.42, + "grad_norm": 19.34444058520484, + "learning_rate": 4.074014522603101e-06, + "loss": 1.1464, + "step": 118230 + }, + { + "epoch": 1.42, + "grad_norm": 9.942344861394936, + "learning_rate": 4.073543978497834e-06, + "loss": 1.0011, + "step": 118233 + }, + { + "epoch": 1.42, + "grad_norm": 12.834960966439173, + "learning_rate": 4.073073454617429e-06, + "loss": 0.9621, + "step": 118236 + }, + { + "epoch": 1.42, + "grad_norm": 11.778806851888964, + "learning_rate": 4.072602950963491e-06, + "loss": 1.0727, + "step": 118239 + }, + { + "epoch": 1.42, + "grad_norm": 11.127305036249135, + "learning_rate": 4.072132467537621e-06, + "loss": 0.877, + "step": 118242 + }, + { + "epoch": 1.42, + "grad_norm": 7.681235259162235, + "learning_rate": 4.0716620043414315e-06, + "loss": 1.166, + "step": 118245 + }, + { + "epoch": 1.42, + "grad_norm": 15.834351097944278, + "learning_rate": 4.071191561376521e-06, + "loss": 1.1471, + "step": 118248 + }, + { + "epoch": 1.42, + "grad_norm": 8.760824559927078, + "learning_rate": 4.0707211386445e-06, + "loss": 1.1898, + "step": 118251 + }, + { + "epoch": 1.42, + "grad_norm": 6.424164870596068, + "learning_rate": 4.07025073614697e-06, + "loss": 1.1498, + "step": 118254 + }, + { + "epoch": 1.42, + "grad_norm": 298.08964154908983, + "learning_rate": 4.069780353885539e-06, + "loss": 1.0722, + "step": 118257 + }, + { + "epoch": 1.42, + "grad_norm": 10.711965108147785, + "learning_rate": 4.06930999186181e-06, + "loss": 1.2174, + "step": 118260 + }, + { + "epoch": 1.42, + "grad_norm": 13.211550367361856, + "learning_rate": 4.0688396500773895e-06, + "loss": 1.2267, + "step": 118263 + }, + { + "epoch": 1.42, + "grad_norm": 8.003851847520217, + "learning_rate": 4.068369328533888e-06, + "loss": 1.4078, + "step": 118266 + }, + { + "epoch": 1.42, + "grad_norm": 10.962268560719494, + "learning_rate": 4.067899027232904e-06, + "loss": 1.0031, + "step": 118269 + }, + { + "epoch": 1.42, + "grad_norm": 8.176726091291318, + "learning_rate": 4.067428746176042e-06, + "loss": 1.3964, + "step": 118272 + }, + { + "epoch": 1.42, + "grad_norm": 14.364753663475005, + "learning_rate": 4.066958485364908e-06, + "loss": 1.3221, + "step": 118275 + }, + { + "epoch": 1.42, + "grad_norm": 10.306733926050669, + "learning_rate": 4.066488244801111e-06, + "loss": 1.5807, + "step": 118278 + }, + { + "epoch": 1.42, + "grad_norm": 19.38112186587927, + "learning_rate": 4.066018024486248e-06, + "loss": 1.2261, + "step": 118281 + }, + { + "epoch": 1.42, + "grad_norm": 12.741639519360056, + "learning_rate": 4.065547824421929e-06, + "loss": 1.0139, + "step": 118284 + }, + { + "epoch": 1.42, + "grad_norm": 3.736114012516336, + "learning_rate": 4.065077644609762e-06, + "loss": 1.0788, + "step": 118287 + }, + { + "epoch": 1.42, + "grad_norm": 4.8754483784598, + "learning_rate": 4.064607485051343e-06, + "loss": 0.8655, + "step": 118290 + }, + { + "epoch": 1.42, + "grad_norm": 11.16480262260311, + "learning_rate": 4.064137345748283e-06, + "loss": 0.938, + "step": 118293 + }, + { + "epoch": 1.42, + "grad_norm": 8.618330139673523, + "learning_rate": 4.063667226702183e-06, + "loss": 1.0449, + "step": 118296 + }, + { + "epoch": 1.42, + "grad_norm": 23.861944949066466, + "learning_rate": 4.063197127914646e-06, + "loss": 1.0156, + "step": 118299 + }, + { + "epoch": 1.42, + "grad_norm": 117.41253737689364, + "learning_rate": 4.062727049387281e-06, + "loss": 1.3727, + "step": 118302 + }, + { + "epoch": 1.42, + "grad_norm": 14.533083089592155, + "learning_rate": 4.062256991121693e-06, + "loss": 1.2139, + "step": 118305 + }, + { + "epoch": 1.42, + "grad_norm": 12.174398383435664, + "learning_rate": 4.0617869531194785e-06, + "loss": 1.0325, + "step": 118308 + }, + { + "epoch": 1.42, + "grad_norm": 7.393533453976312, + "learning_rate": 4.06131693538225e-06, + "loss": 1.0475, + "step": 118311 + }, + { + "epoch": 1.42, + "grad_norm": 3.550549267461765, + "learning_rate": 4.0608469379116044e-06, + "loss": 0.9884, + "step": 118314 + }, + { + "epoch": 1.42, + "grad_norm": 15.637277200166126, + "learning_rate": 4.060376960709149e-06, + "loss": 1.2949, + "step": 118317 + }, + { + "epoch": 1.42, + "grad_norm": 8.99351491486615, + "learning_rate": 4.059907003776492e-06, + "loss": 1.128, + "step": 118320 + }, + { + "epoch": 1.42, + "grad_norm": 4.8015737090745, + "learning_rate": 4.059437067115228e-06, + "loss": 1.0031, + "step": 118323 + }, + { + "epoch": 1.42, + "grad_norm": 5.951691317279317, + "learning_rate": 4.058967150726966e-06, + "loss": 1.1241, + "step": 118326 + }, + { + "epoch": 1.42, + "grad_norm": 4.2878505086381296, + "learning_rate": 4.058497254613313e-06, + "loss": 1.091, + "step": 118329 + }, + { + "epoch": 1.42, + "grad_norm": 11.988625731704094, + "learning_rate": 4.058027378775865e-06, + "loss": 1.1277, + "step": 118332 + }, + { + "epoch": 1.42, + "grad_norm": 30.869274249759602, + "learning_rate": 4.057557523216232e-06, + "loss": 0.8269, + "step": 118335 + }, + { + "epoch": 1.42, + "grad_norm": 4.917640713565538, + "learning_rate": 4.0570876879360125e-06, + "loss": 1.2322, + "step": 118338 + }, + { + "epoch": 1.42, + "grad_norm": 3.808539220899061, + "learning_rate": 4.056617872936811e-06, + "loss": 1.0269, + "step": 118341 + }, + { + "epoch": 1.42, + "grad_norm": 14.552073134936602, + "learning_rate": 4.056148078220232e-06, + "loss": 0.8669, + "step": 118344 + }, + { + "epoch": 1.42, + "grad_norm": 9.996355449370174, + "learning_rate": 4.0556783037878835e-06, + "loss": 1.6449, + "step": 118347 + }, + { + "epoch": 1.42, + "grad_norm": 13.809959737819964, + "learning_rate": 4.055208549641358e-06, + "loss": 1.2012, + "step": 118350 + }, + { + "epoch": 1.42, + "grad_norm": 3.584202430649627, + "learning_rate": 4.05473881578227e-06, + "loss": 1.1503, + "step": 118353 + }, + { + "epoch": 1.42, + "grad_norm": 6.79074523846391, + "learning_rate": 4.0542691022122125e-06, + "loss": 0.8508, + "step": 118356 + }, + { + "epoch": 1.42, + "grad_norm": 11.21279958999093, + "learning_rate": 4.053799408932797e-06, + "loss": 1.4465, + "step": 118359 + }, + { + "epoch": 1.42, + "grad_norm": 5.0948887086864305, + "learning_rate": 4.053329735945618e-06, + "loss": 1.4198, + "step": 118362 + }, + { + "epoch": 1.42, + "grad_norm": 6.115511779956736, + "learning_rate": 4.052860083252284e-06, + "loss": 0.8013, + "step": 118365 + }, + { + "epoch": 1.42, + "grad_norm": 7.515961020204802, + "learning_rate": 4.052390450854394e-06, + "loss": 1.6064, + "step": 118368 + }, + { + "epoch": 1.42, + "grad_norm": 7.774116185033555, + "learning_rate": 4.051920838753558e-06, + "loss": 1.0819, + "step": 118371 + }, + { + "epoch": 1.42, + "grad_norm": 8.501991536421901, + "learning_rate": 4.051451246951371e-06, + "loss": 1.1979, + "step": 118374 + }, + { + "epoch": 1.42, + "grad_norm": 28.42420160767198, + "learning_rate": 4.05098167544944e-06, + "loss": 0.9745, + "step": 118377 + }, + { + "epoch": 1.42, + "grad_norm": 4.698472995606946, + "learning_rate": 4.050512124249364e-06, + "loss": 1.1532, + "step": 118380 + }, + { + "epoch": 1.42, + "grad_norm": 8.030457523102294, + "learning_rate": 4.050042593352745e-06, + "loss": 1.1594, + "step": 118383 + }, + { + "epoch": 1.42, + "grad_norm": 8.56607403661983, + "learning_rate": 4.0495730827611935e-06, + "loss": 0.9332, + "step": 118386 + }, + { + "epoch": 1.42, + "grad_norm": 8.557847474015947, + "learning_rate": 4.0491035924763e-06, + "loss": 1.0872, + "step": 118389 + }, + { + "epoch": 1.42, + "grad_norm": 8.19110724180093, + "learning_rate": 4.048634122499674e-06, + "loss": 1.2576, + "step": 118392 + }, + { + "epoch": 1.42, + "grad_norm": 12.456083641603728, + "learning_rate": 4.048164672832919e-06, + "loss": 1.1027, + "step": 118395 + }, + { + "epoch": 1.42, + "grad_norm": 14.362634820812598, + "learning_rate": 4.047695243477631e-06, + "loss": 1.0878, + "step": 118398 + }, + { + "epoch": 1.42, + "grad_norm": 16.058329067948357, + "learning_rate": 4.047225834435418e-06, + "loss": 1.1687, + "step": 118401 + }, + { + "epoch": 1.42, + "grad_norm": 11.480989677694525, + "learning_rate": 4.046756445707876e-06, + "loss": 0.9272, + "step": 118404 + }, + { + "epoch": 1.42, + "grad_norm": 4.000136552425547, + "learning_rate": 4.046287077296609e-06, + "loss": 1.469, + "step": 118407 + }, + { + "epoch": 1.42, + "grad_norm": 8.94903572154028, + "learning_rate": 4.045817729203221e-06, + "loss": 0.9403, + "step": 118410 + }, + { + "epoch": 1.42, + "grad_norm": 12.639068296275386, + "learning_rate": 4.045348401429314e-06, + "loss": 1.2519, + "step": 118413 + }, + { + "epoch": 1.42, + "grad_norm": 10.427232564673481, + "learning_rate": 4.044879093976486e-06, + "loss": 1.2612, + "step": 118416 + }, + { + "epoch": 1.42, + "grad_norm": 6.628956455226752, + "learning_rate": 4.044409806846344e-06, + "loss": 1.1472, + "step": 118419 + }, + { + "epoch": 1.42, + "grad_norm": 4.2898297131577, + "learning_rate": 4.043940540040483e-06, + "loss": 0.8758, + "step": 118422 + }, + { + "epoch": 1.42, + "grad_norm": 14.138436649233766, + "learning_rate": 4.0434712935605065e-06, + "loss": 1.0781, + "step": 118425 + }, + { + "epoch": 1.42, + "grad_norm": 10.000845594781403, + "learning_rate": 4.043002067408022e-06, + "loss": 1.0304, + "step": 118428 + }, + { + "epoch": 1.42, + "grad_norm": 6.523010548735008, + "learning_rate": 4.042532861584621e-06, + "loss": 1.1588, + "step": 118431 + }, + { + "epoch": 1.42, + "grad_norm": 23.574018646332007, + "learning_rate": 4.04206367609191e-06, + "loss": 1.4085, + "step": 118434 + }, + { + "epoch": 1.42, + "grad_norm": 14.52212563755334, + "learning_rate": 4.0415945109314925e-06, + "loss": 0.9753, + "step": 118437 + }, + { + "epoch": 1.42, + "grad_norm": 10.26253971937024, + "learning_rate": 4.041125366104968e-06, + "loss": 1.5599, + "step": 118440 + }, + { + "epoch": 1.42, + "grad_norm": 13.041052046944417, + "learning_rate": 4.040656241613931e-06, + "loss": 1.1444, + "step": 118443 + }, + { + "epoch": 1.42, + "grad_norm": 10.295773293714229, + "learning_rate": 4.040187137459989e-06, + "loss": 1.256, + "step": 118446 + }, + { + "epoch": 1.42, + "grad_norm": 8.861875762803532, + "learning_rate": 4.039718053644741e-06, + "loss": 1.072, + "step": 118449 + }, + { + "epoch": 1.42, + "grad_norm": 17.342118275222436, + "learning_rate": 4.039248990169788e-06, + "loss": 1.1137, + "step": 118452 + }, + { + "epoch": 1.42, + "grad_norm": 5.742181893644101, + "learning_rate": 4.038779947036736e-06, + "loss": 0.957, + "step": 118455 + }, + { + "epoch": 1.42, + "grad_norm": 12.610283630223995, + "learning_rate": 4.038310924247176e-06, + "loss": 1.2315, + "step": 118458 + }, + { + "epoch": 1.42, + "grad_norm": 12.644453489529406, + "learning_rate": 4.0378419218027175e-06, + "loss": 1.2365, + "step": 118461 + }, + { + "epoch": 1.42, + "grad_norm": 7.550841154362837, + "learning_rate": 4.037372939704954e-06, + "loss": 1.1894, + "step": 118464 + }, + { + "epoch": 1.42, + "grad_norm": 5.6364803956315646, + "learning_rate": 4.036903977955492e-06, + "loss": 1.2319, + "step": 118467 + }, + { + "epoch": 1.42, + "grad_norm": 14.274091783533212, + "learning_rate": 4.036435036555924e-06, + "loss": 0.9593, + "step": 118470 + }, + { + "epoch": 1.42, + "grad_norm": 14.428164273340629, + "learning_rate": 4.035966115507858e-06, + "loss": 0.975, + "step": 118473 + }, + { + "epoch": 1.42, + "grad_norm": 21.552884144166097, + "learning_rate": 4.035497214812889e-06, + "loss": 1.51, + "step": 118476 + }, + { + "epoch": 1.42, + "grad_norm": 9.703614394269241, + "learning_rate": 4.035028334472624e-06, + "loss": 1.1302, + "step": 118479 + }, + { + "epoch": 1.42, + "grad_norm": 12.662764796199351, + "learning_rate": 4.034559474488659e-06, + "loss": 0.9645, + "step": 118482 + }, + { + "epoch": 1.42, + "grad_norm": 7.618947658095747, + "learning_rate": 4.034090634862589e-06, + "loss": 0.9928, + "step": 118485 + }, + { + "epoch": 1.42, + "grad_norm": 7.803979203525676, + "learning_rate": 4.03362181559602e-06, + "loss": 1.0586, + "step": 118488 + }, + { + "epoch": 1.42, + "grad_norm": 2.63828516091503, + "learning_rate": 4.03315301669055e-06, + "loss": 0.9115, + "step": 118491 + }, + { + "epoch": 1.42, + "grad_norm": 8.120839825314484, + "learning_rate": 4.032684238147784e-06, + "loss": 1.1766, + "step": 118494 + }, + { + "epoch": 1.42, + "grad_norm": 10.003054426082102, + "learning_rate": 4.032215479969312e-06, + "loss": 0.8308, + "step": 118497 + }, + { + "epoch": 1.42, + "grad_norm": 9.695147131427214, + "learning_rate": 4.03174674215674e-06, + "loss": 1.1014, + "step": 118500 + }, + { + "epoch": 1.42, + "grad_norm": 6.090813356602293, + "learning_rate": 4.031278024711671e-06, + "loss": 1.1963, + "step": 118503 + }, + { + "epoch": 1.42, + "grad_norm": 6.214305530204754, + "learning_rate": 4.030809327635695e-06, + "loss": 1.1332, + "step": 118506 + }, + { + "epoch": 1.43, + "grad_norm": 5.641799191847373, + "learning_rate": 4.030340650930421e-06, + "loss": 1.3201, + "step": 118509 + }, + { + "epoch": 1.43, + "grad_norm": 6.340337919531197, + "learning_rate": 4.029871994597441e-06, + "loss": 1.4592, + "step": 118512 + }, + { + "epoch": 1.43, + "grad_norm": 5.794790114139439, + "learning_rate": 4.0294033586383575e-06, + "loss": 0.7369, + "step": 118515 + }, + { + "epoch": 1.43, + "grad_norm": 17.504014655223404, + "learning_rate": 4.028934743054768e-06, + "loss": 1.039, + "step": 118518 + }, + { + "epoch": 1.43, + "grad_norm": 24.81590479040006, + "learning_rate": 4.028466147848279e-06, + "loss": 1.3008, + "step": 118521 + }, + { + "epoch": 1.43, + "grad_norm": 16.23837928372811, + "learning_rate": 4.027997573020484e-06, + "loss": 0.9258, + "step": 118524 + }, + { + "epoch": 1.43, + "grad_norm": 12.737070682220804, + "learning_rate": 4.027529018572978e-06, + "loss": 0.935, + "step": 118527 + }, + { + "epoch": 1.43, + "grad_norm": 13.944244582331459, + "learning_rate": 4.027060484507364e-06, + "loss": 1.2042, + "step": 118530 + }, + { + "epoch": 1.43, + "grad_norm": 2.894700515128496, + "learning_rate": 4.026591970825241e-06, + "loss": 1.3336, + "step": 118533 + }, + { + "epoch": 1.43, + "grad_norm": 10.25631650608399, + "learning_rate": 4.026123477528212e-06, + "loss": 1.1096, + "step": 118536 + }, + { + "epoch": 1.43, + "grad_norm": 9.539291661473545, + "learning_rate": 4.0256550046178674e-06, + "loss": 1.1738, + "step": 118539 + }, + { + "epoch": 1.43, + "grad_norm": 5.513604197559565, + "learning_rate": 4.025186552095811e-06, + "loss": 0.9885, + "step": 118542 + }, + { + "epoch": 1.43, + "grad_norm": 8.404033454741208, + "learning_rate": 4.024718119963646e-06, + "loss": 1.2002, + "step": 118545 + }, + { + "epoch": 1.43, + "grad_norm": 14.374116404753734, + "learning_rate": 4.024249708222963e-06, + "loss": 1.0051, + "step": 118548 + }, + { + "epoch": 1.43, + "grad_norm": 16.20801582264936, + "learning_rate": 4.023781316875362e-06, + "loss": 1.428, + "step": 118551 + }, + { + "epoch": 1.43, + "grad_norm": 10.49197177387468, + "learning_rate": 4.023312945922441e-06, + "loss": 1.426, + "step": 118554 + }, + { + "epoch": 1.43, + "grad_norm": 3.251875791403854, + "learning_rate": 4.0228445953658004e-06, + "loss": 1.1554, + "step": 118557 + }, + { + "epoch": 1.43, + "grad_norm": 3.4584095425891195, + "learning_rate": 4.022376265207039e-06, + "loss": 1.521, + "step": 118560 + }, + { + "epoch": 1.43, + "grad_norm": 7.423854872071692, + "learning_rate": 4.021907955447757e-06, + "loss": 1.1023, + "step": 118563 + }, + { + "epoch": 1.43, + "grad_norm": 6.165642383902259, + "learning_rate": 4.021439666089551e-06, + "loss": 1.1229, + "step": 118566 + }, + { + "epoch": 1.43, + "grad_norm": 21.047045150478297, + "learning_rate": 4.020971397134013e-06, + "loss": 1.0447, + "step": 118569 + }, + { + "epoch": 1.43, + "grad_norm": 7.517096705780006, + "learning_rate": 4.020503148582746e-06, + "loss": 1.3229, + "step": 118572 + }, + { + "epoch": 1.43, + "grad_norm": 16.751472932690604, + "learning_rate": 4.020034920437352e-06, + "loss": 0.9903, + "step": 118575 + }, + { + "epoch": 1.43, + "grad_norm": 11.324372572426862, + "learning_rate": 4.019566712699422e-06, + "loss": 1.3544, + "step": 118578 + }, + { + "epoch": 1.43, + "grad_norm": 9.103308315410036, + "learning_rate": 4.019098525370556e-06, + "loss": 1.2956, + "step": 118581 + }, + { + "epoch": 1.43, + "grad_norm": 9.288099110775365, + "learning_rate": 4.018630358452352e-06, + "loss": 0.9539, + "step": 118584 + }, + { + "epoch": 1.43, + "grad_norm": 6.600076840267852, + "learning_rate": 4.018162211946413e-06, + "loss": 0.9407, + "step": 118587 + }, + { + "epoch": 1.43, + "grad_norm": 10.321552021596412, + "learning_rate": 4.01769408585433e-06, + "loss": 1.384, + "step": 118590 + }, + { + "epoch": 1.43, + "grad_norm": 8.357566535472394, + "learning_rate": 4.0172259801777e-06, + "loss": 1.5904, + "step": 118593 + }, + { + "epoch": 1.43, + "grad_norm": 8.175410498747812, + "learning_rate": 4.016757894918122e-06, + "loss": 1.196, + "step": 118596 + }, + { + "epoch": 1.43, + "grad_norm": 7.187141530105103, + "learning_rate": 4.016289830077195e-06, + "loss": 1.001, + "step": 118599 + }, + { + "epoch": 1.43, + "grad_norm": 11.429507192804024, + "learning_rate": 4.015821785656519e-06, + "loss": 1.1098, + "step": 118602 + }, + { + "epoch": 1.43, + "grad_norm": 9.478071564475743, + "learning_rate": 4.015353761657683e-06, + "loss": 1.4109, + "step": 118605 + }, + { + "epoch": 1.43, + "grad_norm": 9.677688872913604, + "learning_rate": 4.014885758082293e-06, + "loss": 0.7387, + "step": 118608 + }, + { + "epoch": 1.43, + "grad_norm": 5.681322359173935, + "learning_rate": 4.014417774931939e-06, + "loss": 1.2121, + "step": 118611 + }, + { + "epoch": 1.43, + "grad_norm": 14.260280961259244, + "learning_rate": 4.013949812208221e-06, + "loss": 1.465, + "step": 118614 + }, + { + "epoch": 1.43, + "grad_norm": 4.568618550467091, + "learning_rate": 4.01348186991274e-06, + "loss": 0.8826, + "step": 118617 + }, + { + "epoch": 1.43, + "grad_norm": 2.6623482201478135, + "learning_rate": 4.013013948047085e-06, + "loss": 1.0305, + "step": 118620 + }, + { + "epoch": 1.43, + "grad_norm": 15.1554367877794, + "learning_rate": 4.012546046612856e-06, + "loss": 1.3348, + "step": 118623 + }, + { + "epoch": 1.43, + "grad_norm": 11.527505783034872, + "learning_rate": 4.012078165611652e-06, + "loss": 1.0573, + "step": 118626 + }, + { + "epoch": 1.43, + "grad_norm": 9.48811822140577, + "learning_rate": 4.011610305045072e-06, + "loss": 1.0798, + "step": 118629 + }, + { + "epoch": 1.43, + "grad_norm": 7.0595039112684645, + "learning_rate": 4.011142464914708e-06, + "loss": 1.1984, + "step": 118632 + }, + { + "epoch": 1.43, + "grad_norm": 5.845276443042161, + "learning_rate": 4.0106746452221545e-06, + "loss": 0.9809, + "step": 118635 + }, + { + "epoch": 1.43, + "grad_norm": 8.683590025661317, + "learning_rate": 4.010206845969011e-06, + "loss": 1.1121, + "step": 118638 + }, + { + "epoch": 1.43, + "grad_norm": 51.709881410525824, + "learning_rate": 4.009739067156875e-06, + "loss": 1.084, + "step": 118641 + }, + { + "epoch": 1.43, + "grad_norm": 3.7098383066608824, + "learning_rate": 4.009271308787345e-06, + "loss": 1.2849, + "step": 118644 + }, + { + "epoch": 1.43, + "grad_norm": 13.438635979971599, + "learning_rate": 4.00880357086201e-06, + "loss": 1.1871, + "step": 118647 + }, + { + "epoch": 1.43, + "grad_norm": 13.994594363186067, + "learning_rate": 4.00833585338247e-06, + "loss": 1.3512, + "step": 118650 + }, + { + "epoch": 1.43, + "grad_norm": 9.26459394207318, + "learning_rate": 4.007868156350326e-06, + "loss": 1.2707, + "step": 118653 + }, + { + "epoch": 1.43, + "grad_norm": 3.369173223358538, + "learning_rate": 4.007400479767169e-06, + "loss": 0.7928, + "step": 118656 + }, + { + "epoch": 1.43, + "grad_norm": 74.03012946473716, + "learning_rate": 4.006932823634593e-06, + "loss": 1.2484, + "step": 118659 + }, + { + "epoch": 1.43, + "grad_norm": 11.08326232848187, + "learning_rate": 4.006465187954196e-06, + "loss": 0.9529, + "step": 118662 + }, + { + "epoch": 1.43, + "grad_norm": 14.401496630512995, + "learning_rate": 4.0059975727275746e-06, + "loss": 1.1742, + "step": 118665 + }, + { + "epoch": 1.43, + "grad_norm": 9.432869058749128, + "learning_rate": 4.005529977956325e-06, + "loss": 1.1997, + "step": 118668 + }, + { + "epoch": 1.43, + "grad_norm": 9.27268359807776, + "learning_rate": 4.005062403642045e-06, + "loss": 1.2765, + "step": 118671 + }, + { + "epoch": 1.43, + "grad_norm": 10.798901985779548, + "learning_rate": 4.004594849786329e-06, + "loss": 1.0767, + "step": 118674 + }, + { + "epoch": 1.43, + "grad_norm": 8.154014903635163, + "learning_rate": 4.004127316390766e-06, + "loss": 1.3072, + "step": 118677 + }, + { + "epoch": 1.43, + "grad_norm": 11.186884726158207, + "learning_rate": 4.003659803456958e-06, + "loss": 1.3124, + "step": 118680 + }, + { + "epoch": 1.43, + "grad_norm": 16.226302737110483, + "learning_rate": 4.0031923109865015e-06, + "loss": 1.1725, + "step": 118683 + }, + { + "epoch": 1.43, + "grad_norm": 8.396556427679064, + "learning_rate": 4.002724838980987e-06, + "loss": 1.1534, + "step": 118686 + }, + { + "epoch": 1.43, + "grad_norm": 13.054620094918526, + "learning_rate": 4.002257387442011e-06, + "loss": 1.2424, + "step": 118689 + }, + { + "epoch": 1.43, + "grad_norm": 5.974524795904524, + "learning_rate": 4.0017899563711715e-06, + "loss": 1.0555, + "step": 118692 + }, + { + "epoch": 1.43, + "grad_norm": 9.925808766317296, + "learning_rate": 4.0013225457700665e-06, + "loss": 1.0468, + "step": 118695 + }, + { + "epoch": 1.43, + "grad_norm": 8.242667242226693, + "learning_rate": 4.000855155640287e-06, + "loss": 1.2849, + "step": 118698 + }, + { + "epoch": 1.43, + "grad_norm": 6.992747409530098, + "learning_rate": 4.000387785983423e-06, + "loss": 1.0543, + "step": 118701 + }, + { + "epoch": 1.43, + "grad_norm": 7.745132986071257, + "learning_rate": 3.999920436801074e-06, + "loss": 1.2975, + "step": 118704 + }, + { + "epoch": 1.43, + "grad_norm": 5.011048837339833, + "learning_rate": 3.999453108094837e-06, + "loss": 0.9251, + "step": 118707 + }, + { + "epoch": 1.43, + "grad_norm": 15.50470407545458, + "learning_rate": 3.998985799866307e-06, + "loss": 1.5649, + "step": 118710 + }, + { + "epoch": 1.43, + "grad_norm": 8.912550732400918, + "learning_rate": 3.998518512117075e-06, + "loss": 1.3598, + "step": 118713 + }, + { + "epoch": 1.43, + "grad_norm": 9.777865229945961, + "learning_rate": 3.998051244848741e-06, + "loss": 1.3343, + "step": 118716 + }, + { + "epoch": 1.43, + "grad_norm": 16.753407044693063, + "learning_rate": 3.997583998062892e-06, + "loss": 1.0684, + "step": 118719 + }, + { + "epoch": 1.43, + "grad_norm": 13.18154880181597, + "learning_rate": 3.997116771761127e-06, + "loss": 1.228, + "step": 118722 + }, + { + "epoch": 1.43, + "grad_norm": 39.31146795547466, + "learning_rate": 3.996649565945043e-06, + "loss": 0.777, + "step": 118725 + }, + { + "epoch": 1.43, + "grad_norm": 14.799690728059039, + "learning_rate": 3.996182380616229e-06, + "loss": 1.1496, + "step": 118728 + }, + { + "epoch": 1.43, + "grad_norm": 3.7869661660073164, + "learning_rate": 3.995715215776282e-06, + "loss": 1.4309, + "step": 118731 + }, + { + "epoch": 1.43, + "grad_norm": 6.611711363603222, + "learning_rate": 3.995248071426796e-06, + "loss": 0.944, + "step": 118734 + }, + { + "epoch": 1.43, + "grad_norm": 19.746211390067895, + "learning_rate": 3.994780947569369e-06, + "loss": 1.5013, + "step": 118737 + }, + { + "epoch": 1.43, + "grad_norm": 10.831289039587162, + "learning_rate": 3.994313844205592e-06, + "loss": 1.2517, + "step": 118740 + }, + { + "epoch": 1.43, + "grad_norm": 12.614041308435304, + "learning_rate": 3.993846761337054e-06, + "loss": 1.1792, + "step": 118743 + }, + { + "epoch": 1.43, + "grad_norm": 6.256855895713389, + "learning_rate": 3.993379698965355e-06, + "loss": 1.1227, + "step": 118746 + }, + { + "epoch": 1.43, + "grad_norm": 6.663177654137235, + "learning_rate": 3.992912657092086e-06, + "loss": 1.1264, + "step": 118749 + }, + { + "epoch": 1.43, + "grad_norm": 14.126215608930183, + "learning_rate": 3.992445635718847e-06, + "loss": 1.4257, + "step": 118752 + }, + { + "epoch": 1.43, + "grad_norm": 13.973550456170933, + "learning_rate": 3.991978634847223e-06, + "loss": 0.9647, + "step": 118755 + }, + { + "epoch": 1.43, + "grad_norm": 13.721102121598475, + "learning_rate": 3.991511654478816e-06, + "loss": 0.9174, + "step": 118758 + }, + { + "epoch": 1.43, + "grad_norm": 3.1568716282558547, + "learning_rate": 3.991044694615211e-06, + "loss": 1.0566, + "step": 118761 + }, + { + "epoch": 1.43, + "grad_norm": 4.317451374825066, + "learning_rate": 3.990577755258011e-06, + "loss": 1.0855, + "step": 118764 + }, + { + "epoch": 1.43, + "grad_norm": 8.631327043218144, + "learning_rate": 3.990110836408799e-06, + "loss": 1.2467, + "step": 118767 + }, + { + "epoch": 1.43, + "grad_norm": 17.441335994899923, + "learning_rate": 3.989643938069175e-06, + "loss": 1.0356, + "step": 118770 + }, + { + "epoch": 1.43, + "grad_norm": 12.464327657784068, + "learning_rate": 3.989177060240732e-06, + "loss": 1.0174, + "step": 118773 + }, + { + "epoch": 1.43, + "grad_norm": 10.888876247499653, + "learning_rate": 3.9887102029250624e-06, + "loss": 1.1294, + "step": 118776 + }, + { + "epoch": 1.43, + "grad_norm": 11.936790068046443, + "learning_rate": 3.988243366123763e-06, + "loss": 0.9498, + "step": 118779 + }, + { + "epoch": 1.43, + "grad_norm": 12.361190436120403, + "learning_rate": 3.9877765498384226e-06, + "loss": 1.1688, + "step": 118782 + }, + { + "epoch": 1.43, + "grad_norm": 6.859820681910364, + "learning_rate": 3.987309754070633e-06, + "loss": 0.8855, + "step": 118785 + }, + { + "epoch": 1.43, + "grad_norm": 18.434507685367823, + "learning_rate": 3.986842978821989e-06, + "loss": 1.2678, + "step": 118788 + }, + { + "epoch": 1.43, + "grad_norm": 5.535425052037122, + "learning_rate": 3.986376224094087e-06, + "loss": 1.4396, + "step": 118791 + }, + { + "epoch": 1.43, + "grad_norm": 2.870991822354479, + "learning_rate": 3.985909489888514e-06, + "loss": 0.9318, + "step": 118794 + }, + { + "epoch": 1.43, + "grad_norm": 3.0725613558507554, + "learning_rate": 3.985442776206865e-06, + "loss": 1.4129, + "step": 118797 + }, + { + "epoch": 1.43, + "grad_norm": 6.3759201886583154, + "learning_rate": 3.984976083050738e-06, + "loss": 0.9557, + "step": 118800 + }, + { + "epoch": 1.43, + "grad_norm": 7.819747237791369, + "learning_rate": 3.984509410421716e-06, + "loss": 1.5944, + "step": 118803 + }, + { + "epoch": 1.43, + "grad_norm": 6.139920141416857, + "learning_rate": 3.984042758321401e-06, + "loss": 0.9212, + "step": 118806 + }, + { + "epoch": 1.43, + "grad_norm": 6.327164004811058, + "learning_rate": 3.983576126751377e-06, + "loss": 1.2555, + "step": 118809 + }, + { + "epoch": 1.43, + "grad_norm": 20.75757946621771, + "learning_rate": 3.9831095157132415e-06, + "loss": 0.9195, + "step": 118812 + }, + { + "epoch": 1.43, + "grad_norm": 15.901884934359337, + "learning_rate": 3.9826429252085865e-06, + "loss": 1.2467, + "step": 118815 + }, + { + "epoch": 1.43, + "grad_norm": 9.723524880996246, + "learning_rate": 3.982176355239006e-06, + "loss": 1.2685, + "step": 118818 + }, + { + "epoch": 1.43, + "grad_norm": 6.1823274551032, + "learning_rate": 3.981709805806087e-06, + "loss": 1.0281, + "step": 118821 + }, + { + "epoch": 1.43, + "grad_norm": 7.521891748692102, + "learning_rate": 3.981243276911427e-06, + "loss": 1.236, + "step": 118824 + }, + { + "epoch": 1.43, + "grad_norm": 8.56403252851389, + "learning_rate": 3.980776768556614e-06, + "loss": 1.1669, + "step": 118827 + }, + { + "epoch": 1.43, + "grad_norm": 13.301294495039697, + "learning_rate": 3.98031028074324e-06, + "loss": 0.9341, + "step": 118830 + }, + { + "epoch": 1.43, + "grad_norm": 7.294218215894897, + "learning_rate": 3.979843813472904e-06, + "loss": 1.2188, + "step": 118833 + }, + { + "epoch": 1.43, + "grad_norm": 8.685214718870826, + "learning_rate": 3.979377366747188e-06, + "loss": 1.4142, + "step": 118836 + }, + { + "epoch": 1.43, + "grad_norm": 12.022901294765536, + "learning_rate": 3.978910940567689e-06, + "loss": 1.2011, + "step": 118839 + }, + { + "epoch": 1.43, + "grad_norm": 30.692208662103184, + "learning_rate": 3.978444534936001e-06, + "loss": 1.1694, + "step": 118842 + }, + { + "epoch": 1.43, + "grad_norm": 4.14356740026596, + "learning_rate": 3.977978149853714e-06, + "loss": 1.3881, + "step": 118845 + }, + { + "epoch": 1.43, + "grad_norm": 5.828816784014151, + "learning_rate": 3.977511785322413e-06, + "loss": 1.363, + "step": 118848 + }, + { + "epoch": 1.43, + "grad_norm": 14.8551184625053, + "learning_rate": 3.9770454413436965e-06, + "loss": 1.0761, + "step": 118851 + }, + { + "epoch": 1.43, + "grad_norm": 11.63068994765033, + "learning_rate": 3.976579117919155e-06, + "loss": 1.152, + "step": 118854 + }, + { + "epoch": 1.43, + "grad_norm": 7.65579870848655, + "learning_rate": 3.976112815050378e-06, + "loss": 1.0678, + "step": 118857 + }, + { + "epoch": 1.43, + "grad_norm": 3.358451234320636, + "learning_rate": 3.975646532738963e-06, + "loss": 0.7899, + "step": 118860 + }, + { + "epoch": 1.43, + "grad_norm": 14.117081010811868, + "learning_rate": 3.975180270986492e-06, + "loss": 1.6833, + "step": 118863 + }, + { + "epoch": 1.43, + "grad_norm": 19.44487203110372, + "learning_rate": 3.974714029794563e-06, + "loss": 1.1068, + "step": 118866 + }, + { + "epoch": 1.43, + "grad_norm": 6.693597988378315, + "learning_rate": 3.974247809164763e-06, + "loss": 1.1949, + "step": 118869 + }, + { + "epoch": 1.43, + "grad_norm": 7.26382381315986, + "learning_rate": 3.973781609098688e-06, + "loss": 1.354, + "step": 118872 + }, + { + "epoch": 1.43, + "grad_norm": 9.696763733086234, + "learning_rate": 3.973315429597921e-06, + "loss": 0.9285, + "step": 118875 + }, + { + "epoch": 1.43, + "grad_norm": 8.206853722754305, + "learning_rate": 3.972849270664058e-06, + "loss": 0.9582, + "step": 118878 + }, + { + "epoch": 1.43, + "grad_norm": 22.073864841193387, + "learning_rate": 3.972383132298691e-06, + "loss": 1.4131, + "step": 118881 + }, + { + "epoch": 1.43, + "grad_norm": 9.317421374197636, + "learning_rate": 3.971917014503411e-06, + "loss": 1.1204, + "step": 118884 + }, + { + "epoch": 1.43, + "grad_norm": 5.596076710745209, + "learning_rate": 3.971450917279804e-06, + "loss": 0.8335, + "step": 118887 + }, + { + "epoch": 1.43, + "grad_norm": 7.054723881178128, + "learning_rate": 3.970984840629468e-06, + "loss": 1.2645, + "step": 118890 + }, + { + "epoch": 1.43, + "grad_norm": 6.438972855485249, + "learning_rate": 3.970518784553985e-06, + "loss": 1.1236, + "step": 118893 + }, + { + "epoch": 1.43, + "grad_norm": 6.633583761090119, + "learning_rate": 3.970052749054949e-06, + "loss": 1.0467, + "step": 118896 + }, + { + "epoch": 1.43, + "grad_norm": 6.3070691627686575, + "learning_rate": 3.969586734133955e-06, + "loss": 1.0521, + "step": 118899 + }, + { + "epoch": 1.43, + "grad_norm": 9.463304627532343, + "learning_rate": 3.9691207397925854e-06, + "loss": 1.0612, + "step": 118902 + }, + { + "epoch": 1.43, + "grad_norm": 8.791943755897703, + "learning_rate": 3.968654766032435e-06, + "loss": 0.9583, + "step": 118905 + }, + { + "epoch": 1.43, + "grad_norm": 13.882562554743771, + "learning_rate": 3.968188812855097e-06, + "loss": 1.295, + "step": 118908 + }, + { + "epoch": 1.43, + "grad_norm": 4.141294812926575, + "learning_rate": 3.9677228802621534e-06, + "loss": 0.9252, + "step": 118911 + }, + { + "epoch": 1.43, + "grad_norm": 9.902497368278496, + "learning_rate": 3.967256968255203e-06, + "loss": 0.9684, + "step": 118914 + }, + { + "epoch": 1.43, + "grad_norm": 22.36904521932664, + "learning_rate": 3.9667910768358285e-06, + "loss": 1.2702, + "step": 118917 + }, + { + "epoch": 1.43, + "grad_norm": 18.825780785062438, + "learning_rate": 3.966325206005622e-06, + "loss": 1.1529, + "step": 118920 + }, + { + "epoch": 1.43, + "grad_norm": 33.952936062071174, + "learning_rate": 3.965859355766175e-06, + "loss": 1.1285, + "step": 118923 + }, + { + "epoch": 1.43, + "grad_norm": 10.576295025851776, + "learning_rate": 3.96539352611908e-06, + "loss": 1.2669, + "step": 118926 + }, + { + "epoch": 1.43, + "grad_norm": 4.660740392452763, + "learning_rate": 3.96492771706592e-06, + "loss": 1.4148, + "step": 118929 + }, + { + "epoch": 1.43, + "grad_norm": 7.474902431839276, + "learning_rate": 3.96446192860829e-06, + "loss": 1.0322, + "step": 118932 + }, + { + "epoch": 1.43, + "grad_norm": 10.01723426022288, + "learning_rate": 3.963996160747775e-06, + "loss": 1.4823, + "step": 118935 + }, + { + "epoch": 1.43, + "grad_norm": 22.761153719726668, + "learning_rate": 3.963530413485967e-06, + "loss": 1.3218, + "step": 118938 + }, + { + "epoch": 1.43, + "grad_norm": 4.3596569237691085, + "learning_rate": 3.963064686824458e-06, + "loss": 1.0676, + "step": 118941 + }, + { + "epoch": 1.43, + "grad_norm": 16.08689826100023, + "learning_rate": 3.962598980764833e-06, + "loss": 1.4301, + "step": 118944 + }, + { + "epoch": 1.43, + "grad_norm": 10.168045782497243, + "learning_rate": 3.962133295308681e-06, + "loss": 1.2726, + "step": 118947 + }, + { + "epoch": 1.43, + "grad_norm": 12.963296987602233, + "learning_rate": 3.961667630457597e-06, + "loss": 1.0201, + "step": 118950 + }, + { + "epoch": 1.43, + "grad_norm": 26.125620750236447, + "learning_rate": 3.961201986213168e-06, + "loss": 1.2132, + "step": 118953 + }, + { + "epoch": 1.43, + "grad_norm": 34.28910856260077, + "learning_rate": 3.960736362576976e-06, + "loss": 1.4692, + "step": 118956 + }, + { + "epoch": 1.43, + "grad_norm": 3.2102429586341494, + "learning_rate": 3.960270759550617e-06, + "loss": 1.4077, + "step": 118959 + }, + { + "epoch": 1.43, + "grad_norm": 3.6090727750212688, + "learning_rate": 3.959805177135677e-06, + "loss": 1.2012, + "step": 118962 + }, + { + "epoch": 1.43, + "grad_norm": 12.001632372653825, + "learning_rate": 3.959339615333747e-06, + "loss": 1.4363, + "step": 118965 + }, + { + "epoch": 1.43, + "grad_norm": 14.050000594802158, + "learning_rate": 3.958874074146419e-06, + "loss": 1.2205, + "step": 118968 + }, + { + "epoch": 1.43, + "grad_norm": 5.797452377681104, + "learning_rate": 3.9584085535752735e-06, + "loss": 1.2722, + "step": 118971 + }, + { + "epoch": 1.43, + "grad_norm": 5.928502137480851, + "learning_rate": 3.957943053621908e-06, + "loss": 1.3635, + "step": 118974 + }, + { + "epoch": 1.43, + "grad_norm": 2.7320928666145194, + "learning_rate": 3.957477574287902e-06, + "loss": 1.1769, + "step": 118977 + }, + { + "epoch": 1.43, + "grad_norm": 34.5305813082053, + "learning_rate": 3.957012115574852e-06, + "loss": 1.3078, + "step": 118980 + }, + { + "epoch": 1.43, + "grad_norm": 4.851349041805757, + "learning_rate": 3.956546677484339e-06, + "loss": 1.1665, + "step": 118983 + }, + { + "epoch": 1.43, + "grad_norm": 18.831285907776824, + "learning_rate": 3.9560812600179574e-06, + "loss": 1.0396, + "step": 118986 + }, + { + "epoch": 1.43, + "grad_norm": 19.460032614210913, + "learning_rate": 3.955615863177292e-06, + "loss": 1.2405, + "step": 118989 + }, + { + "epoch": 1.43, + "grad_norm": 9.641938374779832, + "learning_rate": 3.955150486963937e-06, + "loss": 1.3465, + "step": 118992 + }, + { + "epoch": 1.43, + "grad_norm": 9.995685315864986, + "learning_rate": 3.954685131379474e-06, + "loss": 1.0201, + "step": 118995 + }, + { + "epoch": 1.43, + "grad_norm": 135.36488544829334, + "learning_rate": 3.954219796425491e-06, + "loss": 0.9666, + "step": 118998 + }, + { + "epoch": 1.43, + "grad_norm": 4.900395667671861, + "learning_rate": 3.953754482103579e-06, + "loss": 1.1435, + "step": 119001 + }, + { + "epoch": 1.43, + "grad_norm": 10.364813741651814, + "learning_rate": 3.953289188415323e-06, + "loss": 1.0359, + "step": 119004 + }, + { + "epoch": 1.43, + "grad_norm": 3.436224699722364, + "learning_rate": 3.9528239153623185e-06, + "loss": 1.0302, + "step": 119007 + }, + { + "epoch": 1.43, + "grad_norm": 14.20683120002901, + "learning_rate": 3.952358662946143e-06, + "loss": 1.3688, + "step": 119010 + }, + { + "epoch": 1.43, + "grad_norm": 8.527084458775317, + "learning_rate": 3.95189343116839e-06, + "loss": 1.8732, + "step": 119013 + }, + { + "epoch": 1.43, + "grad_norm": 11.779272672631295, + "learning_rate": 3.9514282200306485e-06, + "loss": 1.5974, + "step": 119016 + }, + { + "epoch": 1.43, + "grad_norm": 15.682381972958417, + "learning_rate": 3.950963029534501e-06, + "loss": 0.9602, + "step": 119019 + }, + { + "epoch": 1.43, + "grad_norm": 3.1927674786861027, + "learning_rate": 3.950497859681541e-06, + "loss": 1.215, + "step": 119022 + }, + { + "epoch": 1.43, + "grad_norm": 16.470495876794057, + "learning_rate": 3.9500327104733505e-06, + "loss": 1.4431, + "step": 119025 + }, + { + "epoch": 1.43, + "grad_norm": 6.257047755235451, + "learning_rate": 3.949567581911518e-06, + "loss": 0.9238, + "step": 119028 + }, + { + "epoch": 1.43, + "grad_norm": 11.704644409403159, + "learning_rate": 3.949102473997633e-06, + "loss": 0.9138, + "step": 119031 + }, + { + "epoch": 1.43, + "grad_norm": 11.783386987761412, + "learning_rate": 3.948637386733284e-06, + "loss": 0.9057, + "step": 119034 + }, + { + "epoch": 1.43, + "grad_norm": 11.018483594013949, + "learning_rate": 3.948172320120056e-06, + "loss": 1.2773, + "step": 119037 + }, + { + "epoch": 1.43, + "grad_norm": 11.258932958287716, + "learning_rate": 3.947707274159534e-06, + "loss": 1.2095, + "step": 119040 + }, + { + "epoch": 1.43, + "grad_norm": 8.872244133951806, + "learning_rate": 3.947242248853304e-06, + "loss": 1.1581, + "step": 119043 + }, + { + "epoch": 1.43, + "grad_norm": 8.859941872745427, + "learning_rate": 3.9467772442029585e-06, + "loss": 1.0214, + "step": 119046 + }, + { + "epoch": 1.43, + "grad_norm": 5.670828484057769, + "learning_rate": 3.946312260210086e-06, + "loss": 0.9966, + "step": 119049 + }, + { + "epoch": 1.43, + "grad_norm": 14.42817854234982, + "learning_rate": 3.945847296876264e-06, + "loss": 1.1546, + "step": 119052 + }, + { + "epoch": 1.43, + "grad_norm": 4.608164750176391, + "learning_rate": 3.945382354203084e-06, + "loss": 1.2922, + "step": 119055 + }, + { + "epoch": 1.43, + "grad_norm": 11.368217781862958, + "learning_rate": 3.944917432192139e-06, + "loss": 1.106, + "step": 119058 + }, + { + "epoch": 1.43, + "grad_norm": 13.035013821320339, + "learning_rate": 3.944452530845007e-06, + "loss": 1.5262, + "step": 119061 + }, + { + "epoch": 1.43, + "grad_norm": 6.536609494273482, + "learning_rate": 3.943987650163277e-06, + "loss": 1.1057, + "step": 119064 + }, + { + "epoch": 1.43, + "grad_norm": 15.75720349279011, + "learning_rate": 3.943522790148534e-06, + "loss": 1.0244, + "step": 119067 + }, + { + "epoch": 1.43, + "grad_norm": 8.107146445991171, + "learning_rate": 3.943057950802366e-06, + "loss": 0.8019, + "step": 119070 + }, + { + "epoch": 1.43, + "grad_norm": 16.51734813903025, + "learning_rate": 3.94259313212636e-06, + "loss": 1.2781, + "step": 119073 + }, + { + "epoch": 1.43, + "grad_norm": 5.183403374107516, + "learning_rate": 3.942128334122105e-06, + "loss": 1.2564, + "step": 119076 + }, + { + "epoch": 1.43, + "grad_norm": 3.7860721349122013, + "learning_rate": 3.941663556791183e-06, + "loss": 0.8791, + "step": 119079 + }, + { + "epoch": 1.43, + "grad_norm": 21.090108315884773, + "learning_rate": 3.94119880013518e-06, + "loss": 1.2622, + "step": 119082 + }, + { + "epoch": 1.43, + "grad_norm": 9.289516862266249, + "learning_rate": 3.940734064155681e-06, + "loss": 1.1284, + "step": 119085 + }, + { + "epoch": 1.43, + "grad_norm": 3.2437642577717543, + "learning_rate": 3.940269348854279e-06, + "loss": 1.3042, + "step": 119088 + }, + { + "epoch": 1.43, + "grad_norm": 4.092312384247463, + "learning_rate": 3.93980465423255e-06, + "loss": 1.3067, + "step": 119091 + }, + { + "epoch": 1.43, + "grad_norm": 6.392477246155261, + "learning_rate": 3.939339980292086e-06, + "loss": 0.9013, + "step": 119094 + }, + { + "epoch": 1.43, + "grad_norm": 8.511905355736925, + "learning_rate": 3.938875327034471e-06, + "loss": 1.0676, + "step": 119097 + }, + { + "epoch": 1.43, + "grad_norm": 6.313849449686541, + "learning_rate": 3.938410694461295e-06, + "loss": 1.3262, + "step": 119100 + }, + { + "epoch": 1.43, + "grad_norm": 11.003418678966026, + "learning_rate": 3.93794608257414e-06, + "loss": 1.0775, + "step": 119103 + }, + { + "epoch": 1.43, + "grad_norm": 12.1580750464121, + "learning_rate": 3.937481491374588e-06, + "loss": 1.5125, + "step": 119106 + }, + { + "epoch": 1.43, + "grad_norm": 8.065361073790541, + "learning_rate": 3.937016920864229e-06, + "loss": 1.2225, + "step": 119109 + }, + { + "epoch": 1.43, + "grad_norm": 7.131447345255282, + "learning_rate": 3.936552371044645e-06, + "loss": 1.1449, + "step": 119112 + }, + { + "epoch": 1.43, + "grad_norm": 3.1171289204828057, + "learning_rate": 3.936087841917429e-06, + "loss": 1.5158, + "step": 119115 + }, + { + "epoch": 1.43, + "grad_norm": 3.816070559136554, + "learning_rate": 3.935623333484158e-06, + "loss": 0.9645, + "step": 119118 + }, + { + "epoch": 1.43, + "grad_norm": 28.08576838940645, + "learning_rate": 3.935158845746422e-06, + "loss": 1.7278, + "step": 119121 + }, + { + "epoch": 1.43, + "grad_norm": 8.789512711149184, + "learning_rate": 3.9346943787058016e-06, + "loss": 1.0198, + "step": 119124 + }, + { + "epoch": 1.43, + "grad_norm": 8.655736306434505, + "learning_rate": 3.934229932363885e-06, + "loss": 1.2001, + "step": 119127 + }, + { + "epoch": 1.43, + "grad_norm": 55.54237757191722, + "learning_rate": 3.9337655067222594e-06, + "loss": 1.3637, + "step": 119130 + }, + { + "epoch": 1.43, + "grad_norm": 12.776528166394614, + "learning_rate": 3.933301101782504e-06, + "loss": 1.2452, + "step": 119133 + }, + { + "epoch": 1.43, + "grad_norm": 3.8811537233739775, + "learning_rate": 3.932836717546206e-06, + "loss": 1.5416, + "step": 119136 + }, + { + "epoch": 1.43, + "grad_norm": 12.266176792636386, + "learning_rate": 3.932372354014952e-06, + "loss": 1.1827, + "step": 119139 + }, + { + "epoch": 1.43, + "grad_norm": 25.71596293420756, + "learning_rate": 3.931908011190328e-06, + "loss": 1.0955, + "step": 119142 + }, + { + "epoch": 1.43, + "grad_norm": 21.03569954210968, + "learning_rate": 3.931443689073917e-06, + "loss": 1.2609, + "step": 119145 + }, + { + "epoch": 1.43, + "grad_norm": 2.338256875367844, + "learning_rate": 3.930979387667299e-06, + "loss": 1.1195, + "step": 119148 + }, + { + "epoch": 1.43, + "grad_norm": 14.590968371469337, + "learning_rate": 3.930515106972063e-06, + "loss": 1.0944, + "step": 119151 + }, + { + "epoch": 1.43, + "grad_norm": 19.281658481782117, + "learning_rate": 3.930050846989792e-06, + "loss": 1.42, + "step": 119154 + }, + { + "epoch": 1.43, + "grad_norm": 6.629300899867042, + "learning_rate": 3.929586607722074e-06, + "loss": 1.393, + "step": 119157 + }, + { + "epoch": 1.43, + "grad_norm": 8.935768898707233, + "learning_rate": 3.929122389170489e-06, + "loss": 1.1535, + "step": 119160 + }, + { + "epoch": 1.43, + "grad_norm": 8.821838485600518, + "learning_rate": 3.928658191336624e-06, + "loss": 0.9961, + "step": 119163 + }, + { + "epoch": 1.43, + "grad_norm": 6.762881267859045, + "learning_rate": 3.9281940142220595e-06, + "loss": 0.9591, + "step": 119166 + }, + { + "epoch": 1.43, + "grad_norm": 7.404213319771563, + "learning_rate": 3.927729857828384e-06, + "loss": 1.0725, + "step": 119169 + }, + { + "epoch": 1.43, + "grad_norm": 17.839205888442365, + "learning_rate": 3.927265722157178e-06, + "loss": 1.0828, + "step": 119172 + }, + { + "epoch": 1.43, + "grad_norm": 9.771906919738814, + "learning_rate": 3.926801607210024e-06, + "loss": 1.1791, + "step": 119175 + }, + { + "epoch": 1.43, + "grad_norm": 7.280877871984541, + "learning_rate": 3.926337512988509e-06, + "loss": 1.1366, + "step": 119178 + }, + { + "epoch": 1.43, + "grad_norm": 6.918320934217714, + "learning_rate": 3.925873439494217e-06, + "loss": 1.4109, + "step": 119181 + }, + { + "epoch": 1.43, + "grad_norm": 17.14390816845069, + "learning_rate": 3.925409386728736e-06, + "loss": 1.2964, + "step": 119184 + }, + { + "epoch": 1.43, + "grad_norm": 14.311026490970164, + "learning_rate": 3.924945354693643e-06, + "loss": 1.0412, + "step": 119187 + }, + { + "epoch": 1.43, + "grad_norm": 3.550321213451927, + "learning_rate": 3.92448134339052e-06, + "loss": 1.0961, + "step": 119190 + }, + { + "epoch": 1.43, + "grad_norm": 4.401256610654463, + "learning_rate": 3.924017352820954e-06, + "loss": 1.1734, + "step": 119193 + }, + { + "epoch": 1.43, + "grad_norm": 8.07722557540493, + "learning_rate": 3.923553382986531e-06, + "loss": 1.2526, + "step": 119196 + }, + { + "epoch": 1.43, + "grad_norm": 10.696715190966131, + "learning_rate": 3.92308943388883e-06, + "loss": 1.292, + "step": 119199 + }, + { + "epoch": 1.43, + "grad_norm": 8.524167988744658, + "learning_rate": 3.922625505529434e-06, + "loss": 1.0068, + "step": 119202 + }, + { + "epoch": 1.43, + "grad_norm": 5.059005005376712, + "learning_rate": 3.922161597909933e-06, + "loss": 1.2976, + "step": 119205 + }, + { + "epoch": 1.43, + "grad_norm": 13.622205281486469, + "learning_rate": 3.9216977110319e-06, + "loss": 0.8934, + "step": 119208 + }, + { + "epoch": 1.43, + "grad_norm": 8.39003423382947, + "learning_rate": 3.921233844896928e-06, + "loss": 0.8504, + "step": 119211 + }, + { + "epoch": 1.43, + "grad_norm": 8.368356271187388, + "learning_rate": 3.920769999506592e-06, + "loss": 1.1734, + "step": 119214 + }, + { + "epoch": 1.43, + "grad_norm": 2.700634654791766, + "learning_rate": 3.920306174862477e-06, + "loss": 1.0419, + "step": 119217 + }, + { + "epoch": 1.43, + "grad_norm": 13.372253919176798, + "learning_rate": 3.9198423709661684e-06, + "loss": 1.2026, + "step": 119220 + }, + { + "epoch": 1.43, + "grad_norm": 10.743287001509175, + "learning_rate": 3.91937858781925e-06, + "loss": 1.0408, + "step": 119223 + }, + { + "epoch": 1.43, + "grad_norm": 2.634660156728792, + "learning_rate": 3.9189148254233e-06, + "loss": 1.4785, + "step": 119226 + }, + { + "epoch": 1.43, + "grad_norm": 6.666197506424618, + "learning_rate": 3.918451083779906e-06, + "loss": 1.2739, + "step": 119229 + }, + { + "epoch": 1.43, + "grad_norm": 3.174705409006233, + "learning_rate": 3.917987362890645e-06, + "loss": 1.0655, + "step": 119232 + }, + { + "epoch": 1.43, + "grad_norm": 3.3800404080541955, + "learning_rate": 3.9175236627571024e-06, + "loss": 1.1302, + "step": 119235 + }, + { + "epoch": 1.43, + "grad_norm": 4.836001976193734, + "learning_rate": 3.917059983380863e-06, + "loss": 1.5651, + "step": 119238 + }, + { + "epoch": 1.43, + "grad_norm": 11.698619439383204, + "learning_rate": 3.916596324763504e-06, + "loss": 1.4636, + "step": 119241 + }, + { + "epoch": 1.43, + "grad_norm": 4.543204669242924, + "learning_rate": 3.9161326869066106e-06, + "loss": 0.9964, + "step": 119244 + }, + { + "epoch": 1.43, + "grad_norm": 5.596201399788629, + "learning_rate": 3.915669069811766e-06, + "loss": 1.3165, + "step": 119247 + }, + { + "epoch": 1.43, + "grad_norm": 16.97644846216865, + "learning_rate": 3.915205473480553e-06, + "loss": 0.8908, + "step": 119250 + }, + { + "epoch": 1.43, + "grad_norm": 8.836050703072846, + "learning_rate": 3.914741897914553e-06, + "loss": 1.2501, + "step": 119253 + }, + { + "epoch": 1.43, + "grad_norm": 6.2012407950004995, + "learning_rate": 3.914278343115343e-06, + "loss": 0.9428, + "step": 119256 + }, + { + "epoch": 1.43, + "grad_norm": 3.110698597131012, + "learning_rate": 3.91381480908451e-06, + "loss": 0.9615, + "step": 119259 + }, + { + "epoch": 1.43, + "grad_norm": 12.747605992648463, + "learning_rate": 3.913351295823634e-06, + "loss": 1.039, + "step": 119262 + }, + { + "epoch": 1.43, + "grad_norm": 10.566604487394942, + "learning_rate": 3.912887803334301e-06, + "loss": 1.1083, + "step": 119265 + }, + { + "epoch": 1.43, + "grad_norm": 14.13953578307589, + "learning_rate": 3.912424331618086e-06, + "loss": 0.9733, + "step": 119268 + }, + { + "epoch": 1.43, + "grad_norm": 7.025585225157395, + "learning_rate": 3.911960880676577e-06, + "loss": 1.3033, + "step": 119271 + }, + { + "epoch": 1.43, + "grad_norm": 8.707520753230098, + "learning_rate": 3.91149745051135e-06, + "loss": 1.3557, + "step": 119274 + }, + { + "epoch": 1.43, + "grad_norm": 27.872884999177842, + "learning_rate": 3.911034041123993e-06, + "loss": 1.1205, + "step": 119277 + }, + { + "epoch": 1.43, + "grad_norm": 9.210662312212397, + "learning_rate": 3.9105706525160795e-06, + "loss": 1.4684, + "step": 119280 + }, + { + "epoch": 1.43, + "grad_norm": 5.8935725708645155, + "learning_rate": 3.910107284689194e-06, + "loss": 1.3953, + "step": 119283 + }, + { + "epoch": 1.43, + "grad_norm": 7.427573365085582, + "learning_rate": 3.90964393764492e-06, + "loss": 1.1657, + "step": 119286 + }, + { + "epoch": 1.43, + "grad_norm": 16.678814143670465, + "learning_rate": 3.909180611384837e-06, + "loss": 1.1786, + "step": 119289 + }, + { + "epoch": 1.43, + "grad_norm": 19.561462159017296, + "learning_rate": 3.908717305910531e-06, + "loss": 1.4291, + "step": 119292 + }, + { + "epoch": 1.43, + "grad_norm": 35.79385366546476, + "learning_rate": 3.908254021223577e-06, + "loss": 1.0545, + "step": 119295 + }, + { + "epoch": 1.43, + "grad_norm": 15.116696681171817, + "learning_rate": 3.907790757325556e-06, + "loss": 1.1652, + "step": 119298 + }, + { + "epoch": 1.43, + "grad_norm": 8.443241753072417, + "learning_rate": 3.90732751421805e-06, + "loss": 1.0516, + "step": 119301 + }, + { + "epoch": 1.43, + "grad_norm": 4.872123952699431, + "learning_rate": 3.906864291902645e-06, + "loss": 1.0138, + "step": 119304 + }, + { + "epoch": 1.43, + "grad_norm": 4.36898808925251, + "learning_rate": 3.906401090380912e-06, + "loss": 0.8957, + "step": 119307 + }, + { + "epoch": 1.43, + "grad_norm": 8.38026114891033, + "learning_rate": 3.905937909654439e-06, + "loss": 1.3694, + "step": 119310 + }, + { + "epoch": 1.43, + "grad_norm": 16.822429515050224, + "learning_rate": 3.905474749724808e-06, + "loss": 1.2983, + "step": 119313 + }, + { + "epoch": 1.43, + "grad_norm": 3.925936846571618, + "learning_rate": 3.905011610593592e-06, + "loss": 1.267, + "step": 119316 + }, + { + "epoch": 1.43, + "grad_norm": 6.914590195168752, + "learning_rate": 3.90454849226238e-06, + "loss": 0.9865, + "step": 119319 + }, + { + "epoch": 1.43, + "grad_norm": 4.291116945994551, + "learning_rate": 3.904085394732745e-06, + "loss": 1.1765, + "step": 119322 + }, + { + "epoch": 1.43, + "grad_norm": 8.225738356269394, + "learning_rate": 3.9036223180062715e-06, + "loss": 1.288, + "step": 119325 + }, + { + "epoch": 1.43, + "grad_norm": 7.361394280363574, + "learning_rate": 3.903159262084538e-06, + "loss": 1.5529, + "step": 119328 + }, + { + "epoch": 1.43, + "grad_norm": 13.177257768417551, + "learning_rate": 3.902696226969129e-06, + "loss": 1.2927, + "step": 119331 + }, + { + "epoch": 1.43, + "grad_norm": 3.466123987483708, + "learning_rate": 3.902233212661619e-06, + "loss": 1.2482, + "step": 119334 + }, + { + "epoch": 1.43, + "grad_norm": 34.65933227952353, + "learning_rate": 3.901770219163594e-06, + "loss": 1.4776, + "step": 119337 + }, + { + "epoch": 1.44, + "grad_norm": 18.665104699446392, + "learning_rate": 3.901307246476628e-06, + "loss": 0.8175, + "step": 119340 + }, + { + "epoch": 1.44, + "grad_norm": 19.073013309366537, + "learning_rate": 3.900844294602303e-06, + "loss": 1.1118, + "step": 119343 + }, + { + "epoch": 1.44, + "grad_norm": 6.103414221632159, + "learning_rate": 3.900381363542203e-06, + "loss": 1.3094, + "step": 119346 + }, + { + "epoch": 1.44, + "grad_norm": 9.960484709896587, + "learning_rate": 3.899918453297899e-06, + "loss": 1.087, + "step": 119349 + }, + { + "epoch": 1.44, + "grad_norm": 10.703581837853811, + "learning_rate": 3.899455563870979e-06, + "loss": 1.3965, + "step": 119352 + }, + { + "epoch": 1.44, + "grad_norm": 7.855937509686675, + "learning_rate": 3.898992695263022e-06, + "loss": 1.0397, + "step": 119355 + }, + { + "epoch": 1.44, + "grad_norm": 8.474977349020431, + "learning_rate": 3.898529847475605e-06, + "loss": 1.2348, + "step": 119358 + }, + { + "epoch": 1.44, + "grad_norm": 22.78163895297912, + "learning_rate": 3.8980670205103045e-06, + "loss": 0.9416, + "step": 119361 + }, + { + "epoch": 1.44, + "grad_norm": 4.881572570534141, + "learning_rate": 3.897604214368704e-06, + "loss": 0.8859, + "step": 119364 + }, + { + "epoch": 1.44, + "grad_norm": 6.711897314165577, + "learning_rate": 3.897141429052381e-06, + "loss": 1.1395, + "step": 119367 + }, + { + "epoch": 1.44, + "grad_norm": 3.2989095235689385, + "learning_rate": 3.896678664562917e-06, + "loss": 1.3527, + "step": 119370 + }, + { + "epoch": 1.44, + "grad_norm": 8.17394376323354, + "learning_rate": 3.896215920901895e-06, + "loss": 1.1734, + "step": 119373 + }, + { + "epoch": 1.44, + "grad_norm": 6.942193497068605, + "learning_rate": 3.895753198070884e-06, + "loss": 1.3235, + "step": 119376 + }, + { + "epoch": 1.44, + "grad_norm": 5.475299672582665, + "learning_rate": 3.8952904960714735e-06, + "loss": 1.0241, + "step": 119379 + }, + { + "epoch": 1.44, + "grad_norm": 6.387155462818035, + "learning_rate": 3.8948278149052324e-06, + "loss": 1.2804, + "step": 119382 + }, + { + "epoch": 1.44, + "grad_norm": 11.917297034330524, + "learning_rate": 3.8943651545737496e-06, + "loss": 0.9413, + "step": 119385 + }, + { + "epoch": 1.44, + "grad_norm": 4.760657639459318, + "learning_rate": 3.893902515078596e-06, + "loss": 0.8825, + "step": 119388 + }, + { + "epoch": 1.44, + "grad_norm": 11.541838364324162, + "learning_rate": 3.893439896421353e-06, + "loss": 1.1553, + "step": 119391 + }, + { + "epoch": 1.44, + "grad_norm": 5.719368045505074, + "learning_rate": 3.892977298603601e-06, + "loss": 1.2048, + "step": 119394 + }, + { + "epoch": 1.44, + "grad_norm": 8.507859072745209, + "learning_rate": 3.8925147216269195e-06, + "loss": 1.22, + "step": 119397 + }, + { + "epoch": 1.44, + "grad_norm": 5.565826018593101, + "learning_rate": 3.892052165492886e-06, + "loss": 0.9192, + "step": 119400 + }, + { + "epoch": 1.44, + "grad_norm": 17.344494184024768, + "learning_rate": 3.891589630203076e-06, + "loss": 1.6236, + "step": 119403 + }, + { + "epoch": 1.44, + "grad_norm": 5.174515111586823, + "learning_rate": 3.89112711575907e-06, + "loss": 1.1342, + "step": 119406 + }, + { + "epoch": 1.44, + "grad_norm": 5.562554658519332, + "learning_rate": 3.890664622162447e-06, + "loss": 1.0728, + "step": 119409 + }, + { + "epoch": 1.44, + "grad_norm": 20.79583907392862, + "learning_rate": 3.890202149414787e-06, + "loss": 1.3368, + "step": 119412 + }, + { + "epoch": 1.44, + "grad_norm": 6.587813813838193, + "learning_rate": 3.889739697517664e-06, + "loss": 1.369, + "step": 119415 + }, + { + "epoch": 1.44, + "grad_norm": 14.068461475702758, + "learning_rate": 3.8892772664726585e-06, + "loss": 1.1548, + "step": 119418 + }, + { + "epoch": 1.44, + "grad_norm": 39.19653843432011, + "learning_rate": 3.888814856281352e-06, + "loss": 1.1922, + "step": 119421 + }, + { + "epoch": 1.44, + "grad_norm": 7.0305099194005205, + "learning_rate": 3.888352466945315e-06, + "loss": 1.0849, + "step": 119424 + }, + { + "epoch": 1.44, + "grad_norm": 11.31727061569418, + "learning_rate": 3.887890098466134e-06, + "loss": 0.8775, + "step": 119427 + }, + { + "epoch": 1.44, + "grad_norm": 4.385448896887718, + "learning_rate": 3.88742775084538e-06, + "loss": 1.2961, + "step": 119430 + }, + { + "epoch": 1.44, + "grad_norm": 21.098974788896733, + "learning_rate": 3.8869654240846315e-06, + "loss": 1.2235, + "step": 119433 + }, + { + "epoch": 1.44, + "grad_norm": 17.88026608674356, + "learning_rate": 3.8865031181854685e-06, + "loss": 1.3595, + "step": 119436 + }, + { + "epoch": 1.44, + "grad_norm": 3.2633134892348106, + "learning_rate": 3.886040833149472e-06, + "loss": 1.4817, + "step": 119439 + }, + { + "epoch": 1.44, + "grad_norm": 5.892687784152011, + "learning_rate": 3.885578568978215e-06, + "loss": 1.1613, + "step": 119442 + }, + { + "epoch": 1.44, + "grad_norm": 3.950192714113558, + "learning_rate": 3.8851163256732736e-06, + "loss": 1.4811, + "step": 119445 + }, + { + "epoch": 1.44, + "grad_norm": 12.385796509177018, + "learning_rate": 3.8846541032362285e-06, + "loss": 1.3325, + "step": 119448 + }, + { + "epoch": 1.44, + "grad_norm": 7.3751320828410325, + "learning_rate": 3.884191901668655e-06, + "loss": 1.2801, + "step": 119451 + }, + { + "epoch": 1.44, + "grad_norm": 14.023961964267555, + "learning_rate": 3.883729720972135e-06, + "loss": 1.0369, + "step": 119454 + }, + { + "epoch": 1.44, + "grad_norm": 17.52556048567312, + "learning_rate": 3.8832675611482385e-06, + "loss": 1.0742, + "step": 119457 + }, + { + "epoch": 1.44, + "grad_norm": 7.326621834252663, + "learning_rate": 3.882805422198548e-06, + "loss": 1.2716, + "step": 119460 + }, + { + "epoch": 1.44, + "grad_norm": 13.94025828548044, + "learning_rate": 3.882343304124641e-06, + "loss": 1.1894, + "step": 119463 + }, + { + "epoch": 1.44, + "grad_norm": 16.476290303240926, + "learning_rate": 3.881881206928094e-06, + "loss": 0.9871, + "step": 119466 + }, + { + "epoch": 1.44, + "grad_norm": 7.910989669525781, + "learning_rate": 3.881419130610479e-06, + "loss": 1.0941, + "step": 119469 + }, + { + "epoch": 1.44, + "grad_norm": 6.040712661888562, + "learning_rate": 3.880957075173376e-06, + "loss": 0.9933, + "step": 119472 + }, + { + "epoch": 1.44, + "grad_norm": 9.79323534697081, + "learning_rate": 3.880495040618363e-06, + "loss": 1.4128, + "step": 119475 + }, + { + "epoch": 1.44, + "grad_norm": 9.557802361796444, + "learning_rate": 3.880033026947015e-06, + "loss": 1.0941, + "step": 119478 + }, + { + "epoch": 1.44, + "grad_norm": 8.8423552991044, + "learning_rate": 3.879571034160915e-06, + "loss": 1.3804, + "step": 119481 + }, + { + "epoch": 1.44, + "grad_norm": 5.165404220618536, + "learning_rate": 3.87910906226163e-06, + "loss": 1.0814, + "step": 119484 + }, + { + "epoch": 1.44, + "grad_norm": 4.038779542409069, + "learning_rate": 3.878647111250745e-06, + "loss": 0.7739, + "step": 119487 + }, + { + "epoch": 1.44, + "grad_norm": 6.144112052692056, + "learning_rate": 3.87818518112983e-06, + "loss": 1.1793, + "step": 119490 + }, + { + "epoch": 1.44, + "grad_norm": 7.164756453489584, + "learning_rate": 3.877723271900465e-06, + "loss": 1.148, + "step": 119493 + }, + { + "epoch": 1.44, + "grad_norm": 6.79838682895364, + "learning_rate": 3.877261383564223e-06, + "loss": 0.9181, + "step": 119496 + }, + { + "epoch": 1.44, + "grad_norm": 19.755095155055333, + "learning_rate": 3.876799516122684e-06, + "loss": 1.1585, + "step": 119499 + }, + { + "epoch": 1.44, + "grad_norm": 5.166040193330805, + "learning_rate": 3.8763376695774205e-06, + "loss": 1.1408, + "step": 119502 + }, + { + "epoch": 1.44, + "grad_norm": 9.355674142983622, + "learning_rate": 3.875875843930014e-06, + "loss": 1.251, + "step": 119505 + }, + { + "epoch": 1.44, + "grad_norm": 8.91033498761875, + "learning_rate": 3.875414039182039e-06, + "loss": 1.2068, + "step": 119508 + }, + { + "epoch": 1.44, + "grad_norm": 13.441101976390835, + "learning_rate": 3.874952255335066e-06, + "loss": 1.1114, + "step": 119511 + }, + { + "epoch": 1.44, + "grad_norm": 14.122922989480239, + "learning_rate": 3.874490492390674e-06, + "loss": 1.4894, + "step": 119514 + }, + { + "epoch": 1.44, + "grad_norm": 20.42422958572473, + "learning_rate": 3.87402875035044e-06, + "loss": 1.2403, + "step": 119517 + }, + { + "epoch": 1.44, + "grad_norm": 6.464517610023457, + "learning_rate": 3.873567029215944e-06, + "loss": 1.2145, + "step": 119520 + }, + { + "epoch": 1.44, + "grad_norm": 8.149842996776007, + "learning_rate": 3.873105328988752e-06, + "loss": 1.2521, + "step": 119523 + }, + { + "epoch": 1.44, + "grad_norm": 8.93213805855761, + "learning_rate": 3.872643649670445e-06, + "loss": 1.0337, + "step": 119526 + }, + { + "epoch": 1.44, + "grad_norm": 4.7553705655903045, + "learning_rate": 3.872181991262601e-06, + "loss": 1.0587, + "step": 119529 + }, + { + "epoch": 1.44, + "grad_norm": 6.988752484119486, + "learning_rate": 3.87172035376679e-06, + "loss": 1.0922, + "step": 119532 + }, + { + "epoch": 1.44, + "grad_norm": 4.246445621587417, + "learning_rate": 3.871258737184593e-06, + "loss": 1.1202, + "step": 119535 + }, + { + "epoch": 1.44, + "grad_norm": 8.78960640449598, + "learning_rate": 3.8707971415175794e-06, + "loss": 1.3647, + "step": 119538 + }, + { + "epoch": 1.44, + "grad_norm": 3.7427045895934, + "learning_rate": 3.870335566767327e-06, + "loss": 1.0788, + "step": 119541 + }, + { + "epoch": 1.44, + "grad_norm": 77.55425571094499, + "learning_rate": 3.869874012935412e-06, + "loss": 1.5987, + "step": 119544 + }, + { + "epoch": 1.44, + "grad_norm": 13.461619770906642, + "learning_rate": 3.869412480023412e-06, + "loss": 1.3487, + "step": 119547 + }, + { + "epoch": 1.44, + "grad_norm": 12.710097109325948, + "learning_rate": 3.8689509680328985e-06, + "loss": 1.1995, + "step": 119550 + }, + { + "epoch": 1.44, + "grad_norm": 2.9202490725901944, + "learning_rate": 3.868489476965445e-06, + "loss": 0.8859, + "step": 119553 + }, + { + "epoch": 1.44, + "grad_norm": 9.453068643000213, + "learning_rate": 3.868028006822626e-06, + "loss": 1.1153, + "step": 119556 + }, + { + "epoch": 1.44, + "grad_norm": 8.169095148234284, + "learning_rate": 3.8675665576060205e-06, + "loss": 1.1833, + "step": 119559 + }, + { + "epoch": 1.44, + "grad_norm": 9.933680672173722, + "learning_rate": 3.867105129317205e-06, + "loss": 1.2575, + "step": 119562 + }, + { + "epoch": 1.44, + "grad_norm": 6.593818938048291, + "learning_rate": 3.866643721957746e-06, + "loss": 1.3744, + "step": 119565 + }, + { + "epoch": 1.44, + "grad_norm": 8.982205905305294, + "learning_rate": 3.866182335529223e-06, + "loss": 0.9538, + "step": 119568 + }, + { + "epoch": 1.44, + "grad_norm": 6.042523988298191, + "learning_rate": 3.8657209700332144e-06, + "loss": 1.38, + "step": 119571 + }, + { + "epoch": 1.44, + "grad_norm": 8.858075354963464, + "learning_rate": 3.8652596254712904e-06, + "loss": 1.4086, + "step": 119574 + }, + { + "epoch": 1.44, + "grad_norm": 11.73713728868652, + "learning_rate": 3.864798301845022e-06, + "loss": 1.5215, + "step": 119577 + }, + { + "epoch": 1.44, + "grad_norm": 3.2322710622436785, + "learning_rate": 3.864336999155987e-06, + "loss": 1.1472, + "step": 119580 + }, + { + "epoch": 1.44, + "grad_norm": 12.211045824534345, + "learning_rate": 3.8638757174057596e-06, + "loss": 1.5883, + "step": 119583 + }, + { + "epoch": 1.44, + "grad_norm": 5.559831838602083, + "learning_rate": 3.863414456595914e-06, + "loss": 0.913, + "step": 119586 + }, + { + "epoch": 1.44, + "grad_norm": 8.982015055122767, + "learning_rate": 3.862953216728029e-06, + "loss": 1.2898, + "step": 119589 + }, + { + "epoch": 1.44, + "grad_norm": 3.068344556481306, + "learning_rate": 3.862491997803674e-06, + "loss": 1.538, + "step": 119592 + }, + { + "epoch": 1.44, + "grad_norm": 6.8280183178584775, + "learning_rate": 3.862030799824418e-06, + "loss": 0.94, + "step": 119595 + }, + { + "epoch": 1.44, + "grad_norm": 3.478253250461295, + "learning_rate": 3.861569622791841e-06, + "loss": 0.8279, + "step": 119598 + }, + { + "epoch": 1.44, + "grad_norm": 15.372766169864695, + "learning_rate": 3.861108466707519e-06, + "loss": 0.8511, + "step": 119601 + }, + { + "epoch": 1.44, + "grad_norm": 8.377965160234831, + "learning_rate": 3.86064733157302e-06, + "loss": 1.2495, + "step": 119604 + }, + { + "epoch": 1.44, + "grad_norm": 10.952656392642087, + "learning_rate": 3.86018621738992e-06, + "loss": 1.3182, + "step": 119607 + }, + { + "epoch": 1.44, + "grad_norm": 9.765675902343474, + "learning_rate": 3.8597251241597925e-06, + "loss": 0.9307, + "step": 119610 + }, + { + "epoch": 1.44, + "grad_norm": 8.969814863432035, + "learning_rate": 3.859264051884215e-06, + "loss": 1.2408, + "step": 119613 + }, + { + "epoch": 1.44, + "grad_norm": 4.037774817837252, + "learning_rate": 3.858803000564757e-06, + "loss": 0.853, + "step": 119616 + }, + { + "epoch": 1.44, + "grad_norm": 5.859908265102701, + "learning_rate": 3.85834197020299e-06, + "loss": 1.1314, + "step": 119619 + }, + { + "epoch": 1.44, + "grad_norm": 10.706952792810704, + "learning_rate": 3.857880960800489e-06, + "loss": 1.1108, + "step": 119622 + }, + { + "epoch": 1.44, + "grad_norm": 11.362432750664558, + "learning_rate": 3.857419972358828e-06, + "loss": 1.5923, + "step": 119625 + }, + { + "epoch": 1.44, + "grad_norm": 13.899451068628107, + "learning_rate": 3.856959004879582e-06, + "loss": 1.2083, + "step": 119628 + }, + { + "epoch": 1.44, + "grad_norm": 4.541567184504155, + "learning_rate": 3.8564980583643205e-06, + "loss": 0.9868, + "step": 119631 + }, + { + "epoch": 1.44, + "grad_norm": 11.117200978926661, + "learning_rate": 3.856037132814622e-06, + "loss": 1.0005, + "step": 119634 + }, + { + "epoch": 1.44, + "grad_norm": 9.17259469992822, + "learning_rate": 3.855576228232052e-06, + "loss": 0.8055, + "step": 119637 + }, + { + "epoch": 1.44, + "grad_norm": 27.663876583234217, + "learning_rate": 3.855115344618187e-06, + "loss": 1.3081, + "step": 119640 + }, + { + "epoch": 1.44, + "grad_norm": 27.1661855614169, + "learning_rate": 3.854654481974602e-06, + "loss": 1.2655, + "step": 119643 + }, + { + "epoch": 1.44, + "grad_norm": 10.146994618050933, + "learning_rate": 3.854193640302866e-06, + "loss": 1.0605, + "step": 119646 + }, + { + "epoch": 1.44, + "grad_norm": 8.335144079007712, + "learning_rate": 3.853732819604552e-06, + "loss": 1.0184, + "step": 119649 + }, + { + "epoch": 1.44, + "grad_norm": 10.092556648152588, + "learning_rate": 3.8532720198812356e-06, + "loss": 1.5441, + "step": 119652 + }, + { + "epoch": 1.44, + "grad_norm": 13.339163626756275, + "learning_rate": 3.8528112411344896e-06, + "loss": 1.4098, + "step": 119655 + }, + { + "epoch": 1.44, + "grad_norm": 10.051558811728782, + "learning_rate": 3.852350483365885e-06, + "loss": 1.0354, + "step": 119658 + }, + { + "epoch": 1.44, + "grad_norm": 10.622756290825363, + "learning_rate": 3.851889746576991e-06, + "loss": 1.4431, + "step": 119661 + }, + { + "epoch": 1.44, + "grad_norm": 28.7932947177427, + "learning_rate": 3.8514290307693835e-06, + "loss": 1.6351, + "step": 119664 + }, + { + "epoch": 1.44, + "grad_norm": 7.1615046648872465, + "learning_rate": 3.850968335944632e-06, + "loss": 1.2065, + "step": 119667 + }, + { + "epoch": 1.44, + "grad_norm": 12.304358682035964, + "learning_rate": 3.850507662104316e-06, + "loss": 1.0824, + "step": 119670 + }, + { + "epoch": 1.44, + "grad_norm": 6.209092348395425, + "learning_rate": 3.850047009249998e-06, + "loss": 0.9693, + "step": 119673 + }, + { + "epoch": 1.44, + "grad_norm": 7.457689646772505, + "learning_rate": 3.849586377383257e-06, + "loss": 1.0891, + "step": 119676 + }, + { + "epoch": 1.44, + "grad_norm": 11.973038250303919, + "learning_rate": 3.849125766505661e-06, + "loss": 1.2551, + "step": 119679 + }, + { + "epoch": 1.44, + "grad_norm": 4.035132467828822, + "learning_rate": 3.8486651766187855e-06, + "loss": 1.186, + "step": 119682 + }, + { + "epoch": 1.44, + "grad_norm": 8.805190537834333, + "learning_rate": 3.848204607724196e-06, + "loss": 1.1541, + "step": 119685 + }, + { + "epoch": 1.44, + "grad_norm": 12.982751053548728, + "learning_rate": 3.847744059823468e-06, + "loss": 1.2022, + "step": 119688 + }, + { + "epoch": 1.44, + "grad_norm": 3.5476739922606377, + "learning_rate": 3.847283532918175e-06, + "loss": 0.7798, + "step": 119691 + }, + { + "epoch": 1.44, + "grad_norm": 8.164351842423818, + "learning_rate": 3.846823027009886e-06, + "loss": 1.3306, + "step": 119694 + }, + { + "epoch": 1.44, + "grad_norm": 5.318664932169842, + "learning_rate": 3.846362542100179e-06, + "loss": 1.2744, + "step": 119697 + }, + { + "epoch": 1.44, + "grad_norm": 11.689033002122303, + "learning_rate": 3.845902078190617e-06, + "loss": 1.2442, + "step": 119700 + }, + { + "epoch": 1.44, + "grad_norm": 12.042641116417917, + "learning_rate": 3.845441635282774e-06, + "loss": 1.4255, + "step": 119703 + }, + { + "epoch": 1.44, + "grad_norm": 2.9153770647621546, + "learning_rate": 3.84498121337822e-06, + "loss": 1.1955, + "step": 119706 + }, + { + "epoch": 1.44, + "grad_norm": 4.077730365207683, + "learning_rate": 3.844520812478532e-06, + "loss": 0.9911, + "step": 119709 + }, + { + "epoch": 1.44, + "grad_norm": 7.520488166678603, + "learning_rate": 3.844060432585274e-06, + "loss": 1.2328, + "step": 119712 + }, + { + "epoch": 1.44, + "grad_norm": 5.160985155814126, + "learning_rate": 3.84360007370002e-06, + "loss": 1.1949, + "step": 119715 + }, + { + "epoch": 1.44, + "grad_norm": 6.085449183024669, + "learning_rate": 3.843139735824345e-06, + "loss": 0.8874, + "step": 119718 + }, + { + "epoch": 1.44, + "grad_norm": 9.07388977062506, + "learning_rate": 3.842679418959814e-06, + "loss": 1.1951, + "step": 119721 + }, + { + "epoch": 1.44, + "grad_norm": 9.455078278634742, + "learning_rate": 3.842219123108002e-06, + "loss": 1.4633, + "step": 119724 + }, + { + "epoch": 1.44, + "grad_norm": 2.370286362491631, + "learning_rate": 3.841758848270476e-06, + "loss": 1.1188, + "step": 119727 + }, + { + "epoch": 1.44, + "grad_norm": 18.2708936558263, + "learning_rate": 3.841298594448808e-06, + "loss": 0.9703, + "step": 119730 + }, + { + "epoch": 1.44, + "grad_norm": 8.184713497874785, + "learning_rate": 3.84083836164457e-06, + "loss": 1.0599, + "step": 119733 + }, + { + "epoch": 1.44, + "grad_norm": 7.819987280356306, + "learning_rate": 3.840378149859335e-06, + "loss": 1.0186, + "step": 119736 + }, + { + "epoch": 1.44, + "grad_norm": 7.784446528930163, + "learning_rate": 3.839917959094668e-06, + "loss": 0.8807, + "step": 119739 + }, + { + "epoch": 1.44, + "grad_norm": 33.4886976471128, + "learning_rate": 3.839457789352146e-06, + "loss": 1.4159, + "step": 119742 + }, + { + "epoch": 1.44, + "grad_norm": 5.267996327251294, + "learning_rate": 3.838997640633331e-06, + "loss": 1.1443, + "step": 119745 + }, + { + "epoch": 1.44, + "grad_norm": 128.1064784900514, + "learning_rate": 3.838537512939799e-06, + "loss": 1.2449, + "step": 119748 + }, + { + "epoch": 1.44, + "grad_norm": 10.649683397748472, + "learning_rate": 3.838077406273121e-06, + "loss": 1.4077, + "step": 119751 + }, + { + "epoch": 1.44, + "grad_norm": 6.3889713124534575, + "learning_rate": 3.8376173206348635e-06, + "loss": 1.202, + "step": 119754 + }, + { + "epoch": 1.44, + "grad_norm": 10.397460023497144, + "learning_rate": 3.837157256026597e-06, + "loss": 1.0608, + "step": 119757 + }, + { + "epoch": 1.44, + "grad_norm": 36.252158078279265, + "learning_rate": 3.836697212449897e-06, + "loss": 1.193, + "step": 119760 + }, + { + "epoch": 1.44, + "grad_norm": 7.312130123520738, + "learning_rate": 3.836237189906325e-06, + "loss": 1.1283, + "step": 119763 + }, + { + "epoch": 1.44, + "grad_norm": 2.9193395190019875, + "learning_rate": 3.8357771883974605e-06, + "loss": 1.2497, + "step": 119766 + }, + { + "epoch": 1.44, + "grad_norm": 3.7407011057685766, + "learning_rate": 3.835317207924865e-06, + "loss": 1.2904, + "step": 119769 + }, + { + "epoch": 1.44, + "grad_norm": 8.891890842281637, + "learning_rate": 3.83485724849011e-06, + "loss": 1.0573, + "step": 119772 + }, + { + "epoch": 1.44, + "grad_norm": 17.09551642708119, + "learning_rate": 3.834397310094766e-06, + "loss": 1.2283, + "step": 119775 + }, + { + "epoch": 1.44, + "grad_norm": 9.628682185992552, + "learning_rate": 3.8339373927404075e-06, + "loss": 1.0224, + "step": 119778 + }, + { + "epoch": 1.44, + "grad_norm": 8.404129444842589, + "learning_rate": 3.833477496428596e-06, + "loss": 1.0931, + "step": 119781 + }, + { + "epoch": 1.44, + "grad_norm": 12.469401813194654, + "learning_rate": 3.833017621160908e-06, + "loss": 1.0659, + "step": 119784 + }, + { + "epoch": 1.44, + "grad_norm": 13.663379767304173, + "learning_rate": 3.832557766938907e-06, + "loss": 1.4021, + "step": 119787 + }, + { + "epoch": 1.44, + "grad_norm": 14.400516628174731, + "learning_rate": 3.832097933764167e-06, + "loss": 1.1913, + "step": 119790 + }, + { + "epoch": 1.44, + "grad_norm": 11.22475728692287, + "learning_rate": 3.8316381216382524e-06, + "loss": 1.1891, + "step": 119793 + }, + { + "epoch": 1.44, + "grad_norm": 14.2296733583269, + "learning_rate": 3.831178330562735e-06, + "loss": 1.0944, + "step": 119796 + }, + { + "epoch": 1.44, + "grad_norm": 3.8731911116875937, + "learning_rate": 3.830718560539184e-06, + "loss": 1.3335, + "step": 119799 + }, + { + "epoch": 1.44, + "grad_norm": 5.332960700331233, + "learning_rate": 3.830258811569171e-06, + "loss": 1.2644, + "step": 119802 + }, + { + "epoch": 1.44, + "grad_norm": 3.592380013535458, + "learning_rate": 3.82979908365426e-06, + "loss": 1.2231, + "step": 119805 + }, + { + "epoch": 1.44, + "grad_norm": 14.653571201136641, + "learning_rate": 3.829339376796025e-06, + "loss": 1.4712, + "step": 119808 + }, + { + "epoch": 1.44, + "grad_norm": 3.6941621932118003, + "learning_rate": 3.828879690996029e-06, + "loss": 1.3788, + "step": 119811 + }, + { + "epoch": 1.44, + "grad_norm": 5.276424587332421, + "learning_rate": 3.828420026255844e-06, + "loss": 1.2143, + "step": 119814 + }, + { + "epoch": 1.44, + "grad_norm": 7.107547429886598, + "learning_rate": 3.8279603825770404e-06, + "loss": 1.0339, + "step": 119817 + }, + { + "epoch": 1.44, + "grad_norm": 7.804413017743813, + "learning_rate": 3.827500759961183e-06, + "loss": 1.0987, + "step": 119820 + }, + { + "epoch": 1.44, + "grad_norm": 8.279663026571042, + "learning_rate": 3.827041158409841e-06, + "loss": 1.4071, + "step": 119823 + }, + { + "epoch": 1.44, + "grad_norm": 8.611685990149715, + "learning_rate": 3.8265815779245885e-06, + "loss": 1.1088, + "step": 119826 + }, + { + "epoch": 1.44, + "grad_norm": 11.732071072024645, + "learning_rate": 3.826122018506986e-06, + "loss": 1.2989, + "step": 119829 + }, + { + "epoch": 1.44, + "grad_norm": 6.5584965574862535, + "learning_rate": 3.8256624801586085e-06, + "loss": 0.9429, + "step": 119832 + }, + { + "epoch": 1.44, + "grad_norm": 9.353731472898994, + "learning_rate": 3.825202962881017e-06, + "loss": 1.3119, + "step": 119835 + }, + { + "epoch": 1.44, + "grad_norm": 9.527428503027117, + "learning_rate": 3.824743466675784e-06, + "loss": 1.0629, + "step": 119838 + }, + { + "epoch": 1.44, + "grad_norm": 3.5753034394106913, + "learning_rate": 3.824283991544477e-06, + "loss": 1.0792, + "step": 119841 + }, + { + "epoch": 1.44, + "grad_norm": 7.5593070890908045, + "learning_rate": 3.823824537488669e-06, + "loss": 1.2063, + "step": 119844 + }, + { + "epoch": 1.44, + "grad_norm": 6.565080165108024, + "learning_rate": 3.823365104509917e-06, + "loss": 1.1126, + "step": 119847 + }, + { + "epoch": 1.44, + "grad_norm": 22.676265320374917, + "learning_rate": 3.8229056926098e-06, + "loss": 1.3709, + "step": 119850 + }, + { + "epoch": 1.44, + "grad_norm": 28.632004645051055, + "learning_rate": 3.822446301789879e-06, + "loss": 0.9474, + "step": 119853 + }, + { + "epoch": 1.44, + "grad_norm": 7.549542087445432, + "learning_rate": 3.821986932051722e-06, + "loss": 1.4219, + "step": 119856 + }, + { + "epoch": 1.44, + "grad_norm": 64.7106756249801, + "learning_rate": 3.821527583396901e-06, + "loss": 1.1479, + "step": 119859 + }, + { + "epoch": 1.44, + "grad_norm": 5.538539649615424, + "learning_rate": 3.82106825582698e-06, + "loss": 0.9837, + "step": 119862 + }, + { + "epoch": 1.44, + "grad_norm": 14.33993674278066, + "learning_rate": 3.820608949343525e-06, + "loss": 1.3661, + "step": 119865 + }, + { + "epoch": 1.44, + "grad_norm": 6.716730717513354, + "learning_rate": 3.8201496639481106e-06, + "loss": 1.1722, + "step": 119868 + }, + { + "epoch": 1.44, + "grad_norm": 8.160134077065317, + "learning_rate": 3.819690399642299e-06, + "loss": 1.1464, + "step": 119871 + }, + { + "epoch": 1.44, + "grad_norm": 4.914546832010444, + "learning_rate": 3.819231156427654e-06, + "loss": 1.1369, + "step": 119874 + }, + { + "epoch": 1.44, + "grad_norm": 8.1483345768445, + "learning_rate": 3.818771934305747e-06, + "loss": 1.0579, + "step": 119877 + }, + { + "epoch": 1.44, + "grad_norm": 21.955646253910576, + "learning_rate": 3.818312733278145e-06, + "loss": 1.3597, + "step": 119880 + }, + { + "epoch": 1.44, + "grad_norm": 15.84986026788523, + "learning_rate": 3.817853553346416e-06, + "loss": 1.2442, + "step": 119883 + }, + { + "epoch": 1.44, + "grad_norm": 12.638484287102072, + "learning_rate": 3.817394394512129e-06, + "loss": 0.8917, + "step": 119886 + }, + { + "epoch": 1.44, + "grad_norm": 7.831675944472526, + "learning_rate": 3.816935256776843e-06, + "loss": 1.1663, + "step": 119889 + }, + { + "epoch": 1.44, + "grad_norm": 29.21731914161777, + "learning_rate": 3.816476140142135e-06, + "loss": 1.2453, + "step": 119892 + }, + { + "epoch": 1.44, + "grad_norm": 12.471201280285923, + "learning_rate": 3.816017044609562e-06, + "loss": 0.9044, + "step": 119895 + }, + { + "epoch": 1.44, + "grad_norm": 7.038928125729816, + "learning_rate": 3.8155579701807e-06, + "loss": 1.6378, + "step": 119898 + }, + { + "epoch": 1.44, + "grad_norm": 27.394768451941196, + "learning_rate": 3.815098916857106e-06, + "loss": 1.1772, + "step": 119901 + }, + { + "epoch": 1.44, + "grad_norm": 5.187101311891336, + "learning_rate": 3.8146398846403532e-06, + "loss": 1.3132, + "step": 119904 + }, + { + "epoch": 1.44, + "grad_norm": 8.558386049363257, + "learning_rate": 3.8141808735320064e-06, + "loss": 1.2046, + "step": 119907 + }, + { + "epoch": 1.44, + "grad_norm": 9.572731750612242, + "learning_rate": 3.8137218835336344e-06, + "loss": 1.4907, + "step": 119910 + }, + { + "epoch": 1.44, + "grad_norm": 9.57452990270355, + "learning_rate": 3.8132629146468027e-06, + "loss": 1.0867, + "step": 119913 + }, + { + "epoch": 1.44, + "grad_norm": 8.302541798542071, + "learning_rate": 3.812803966873072e-06, + "loss": 1.2199, + "step": 119916 + }, + { + "epoch": 1.44, + "grad_norm": 5.454250537984784, + "learning_rate": 3.812345040214013e-06, + "loss": 1.2199, + "step": 119919 + }, + { + "epoch": 1.44, + "grad_norm": 9.600854722437738, + "learning_rate": 3.811886134671191e-06, + "loss": 1.0876, + "step": 119922 + }, + { + "epoch": 1.44, + "grad_norm": 4.4617131149534766, + "learning_rate": 3.811427250246177e-06, + "loss": 1.434, + "step": 119925 + }, + { + "epoch": 1.44, + "grad_norm": 9.084389652289456, + "learning_rate": 3.810968386940528e-06, + "loss": 1.2373, + "step": 119928 + }, + { + "epoch": 1.44, + "grad_norm": 9.22330167275835, + "learning_rate": 3.810509544755816e-06, + "loss": 1.0444, + "step": 119931 + }, + { + "epoch": 1.44, + "grad_norm": 5.836041638320351, + "learning_rate": 3.810050723693608e-06, + "loss": 1.392, + "step": 119934 + }, + { + "epoch": 1.44, + "grad_norm": 18.563433558737877, + "learning_rate": 3.8095919237554644e-06, + "loss": 1.232, + "step": 119937 + }, + { + "epoch": 1.44, + "grad_norm": 21.47476036590935, + "learning_rate": 3.809133144942957e-06, + "loss": 1.1943, + "step": 119940 + }, + { + "epoch": 1.44, + "grad_norm": 5.655408838613776, + "learning_rate": 3.808674387257645e-06, + "loss": 1.4043, + "step": 119943 + }, + { + "epoch": 1.44, + "grad_norm": 26.247448664571863, + "learning_rate": 3.808215650701097e-06, + "loss": 1.2225, + "step": 119946 + }, + { + "epoch": 1.44, + "grad_norm": 12.681217087951978, + "learning_rate": 3.807756935274879e-06, + "loss": 1.294, + "step": 119949 + }, + { + "epoch": 1.44, + "grad_norm": 6.052216027882841, + "learning_rate": 3.807298240980559e-06, + "loss": 1.6388, + "step": 119952 + }, + { + "epoch": 1.44, + "grad_norm": 2.470411599779524, + "learning_rate": 3.8068395678197002e-06, + "loss": 1.2973, + "step": 119955 + }, + { + "epoch": 1.44, + "grad_norm": 22.198748630137274, + "learning_rate": 3.8063809157938626e-06, + "loss": 1.0538, + "step": 119958 + }, + { + "epoch": 1.44, + "grad_norm": 2.9620175831395614, + "learning_rate": 3.8059222849046174e-06, + "loss": 1.1563, + "step": 119961 + }, + { + "epoch": 1.44, + "grad_norm": 21.062792429288812, + "learning_rate": 3.8054636751535267e-06, + "loss": 0.9903, + "step": 119964 + }, + { + "epoch": 1.44, + "grad_norm": 17.27083931874457, + "learning_rate": 3.8050050865421617e-06, + "loss": 1.3748, + "step": 119967 + }, + { + "epoch": 1.44, + "grad_norm": 17.05674856166314, + "learning_rate": 3.80454651907208e-06, + "loss": 1.4689, + "step": 119970 + }, + { + "epoch": 1.44, + "grad_norm": 7.640151225384005, + "learning_rate": 3.8040879727448487e-06, + "loss": 0.9746, + "step": 119973 + }, + { + "epoch": 1.44, + "grad_norm": 11.296844736995602, + "learning_rate": 3.8036294475620374e-06, + "loss": 1.0357, + "step": 119976 + }, + { + "epoch": 1.44, + "grad_norm": 5.777445139860695, + "learning_rate": 3.803170943525206e-06, + "loss": 1.1906, + "step": 119979 + }, + { + "epoch": 1.44, + "grad_norm": 4.974765246588198, + "learning_rate": 3.802712460635918e-06, + "loss": 0.9674, + "step": 119982 + }, + { + "epoch": 1.44, + "grad_norm": 43.20230025007839, + "learning_rate": 3.8022539988957384e-06, + "loss": 1.2088, + "step": 119985 + }, + { + "epoch": 1.44, + "grad_norm": 4.733685428209127, + "learning_rate": 3.801795558306235e-06, + "loss": 0.8674, + "step": 119988 + }, + { + "epoch": 1.44, + "grad_norm": 5.521181851089348, + "learning_rate": 3.801337138868969e-06, + "loss": 1.2073, + "step": 119991 + }, + { + "epoch": 1.44, + "grad_norm": 11.853145234391853, + "learning_rate": 3.800878740585512e-06, + "loss": 0.9192, + "step": 119994 + }, + { + "epoch": 1.44, + "grad_norm": 7.645179436092351, + "learning_rate": 3.8004203634574223e-06, + "loss": 1.0643, + "step": 119997 + }, + { + "epoch": 1.44, + "grad_norm": 10.912826503586492, + "learning_rate": 3.799962007486261e-06, + "loss": 1.4175, + "step": 120000 + }, + { + "epoch": 1.44, + "grad_norm": 14.760991613908066, + "learning_rate": 3.7995036726735957e-06, + "loss": 1.1951, + "step": 120003 + }, + { + "epoch": 1.44, + "grad_norm": 5.747716661187517, + "learning_rate": 3.7990453590209953e-06, + "loss": 1.1441, + "step": 120006 + }, + { + "epoch": 1.44, + "grad_norm": 7.036865699116911, + "learning_rate": 3.7985870665300163e-06, + "loss": 0.9782, + "step": 120009 + }, + { + "epoch": 1.44, + "grad_norm": 10.13488329793364, + "learning_rate": 3.798128795202225e-06, + "loss": 1.4123, + "step": 120012 + }, + { + "epoch": 1.44, + "grad_norm": 30.29055714418123, + "learning_rate": 3.797670545039186e-06, + "loss": 1.2789, + "step": 120015 + }, + { + "epoch": 1.44, + "grad_norm": 69.33378122025331, + "learning_rate": 3.7972123160424677e-06, + "loss": 1.3593, + "step": 120018 + }, + { + "epoch": 1.44, + "grad_norm": 7.483147252107292, + "learning_rate": 3.7967541082136295e-06, + "loss": 1.3266, + "step": 120021 + }, + { + "epoch": 1.44, + "grad_norm": 6.280007913932844, + "learning_rate": 3.7962959215542318e-06, + "loss": 1.0948, + "step": 120024 + }, + { + "epoch": 1.44, + "grad_norm": 27.518070228685435, + "learning_rate": 3.7958377560658398e-06, + "loss": 1.2165, + "step": 120027 + }, + { + "epoch": 1.44, + "grad_norm": 5.492807718196421, + "learning_rate": 3.795379611750021e-06, + "loss": 1.027, + "step": 120030 + }, + { + "epoch": 1.44, + "grad_norm": 9.982031279425422, + "learning_rate": 3.79492148860834e-06, + "loss": 1.0137, + "step": 120033 + }, + { + "epoch": 1.44, + "grad_norm": 2.750060183188608, + "learning_rate": 3.794463386642352e-06, + "loss": 1.2996, + "step": 120036 + }, + { + "epoch": 1.44, + "grad_norm": 6.811264778315092, + "learning_rate": 3.7940053058536297e-06, + "loss": 1.4344, + "step": 120039 + }, + { + "epoch": 1.44, + "grad_norm": 10.183982947787248, + "learning_rate": 3.7935472462437285e-06, + "loss": 1.2662, + "step": 120042 + }, + { + "epoch": 1.44, + "grad_norm": 22.363156163281914, + "learning_rate": 3.7930892078142147e-06, + "loss": 1.2289, + "step": 120045 + }, + { + "epoch": 1.44, + "grad_norm": 9.665679674969429, + "learning_rate": 3.7926311905666546e-06, + "loss": 1.1872, + "step": 120048 + }, + { + "epoch": 1.44, + "grad_norm": 5.665983149883953, + "learning_rate": 3.792173194502605e-06, + "loss": 1.2122, + "step": 120051 + }, + { + "epoch": 1.44, + "grad_norm": 18.458994603663147, + "learning_rate": 3.791715219623633e-06, + "loss": 0.9189, + "step": 120054 + }, + { + "epoch": 1.44, + "grad_norm": 8.054120762261102, + "learning_rate": 3.7912572659313008e-06, + "loss": 0.9229, + "step": 120057 + }, + { + "epoch": 1.44, + "grad_norm": 6.756686687211479, + "learning_rate": 3.790799333427174e-06, + "loss": 1.0045, + "step": 120060 + }, + { + "epoch": 1.44, + "grad_norm": 22.82952284086881, + "learning_rate": 3.790341422112813e-06, + "loss": 1.1312, + "step": 120063 + }, + { + "epoch": 1.44, + "grad_norm": 14.605771875659576, + "learning_rate": 3.7898835319897766e-06, + "loss": 1.0368, + "step": 120066 + }, + { + "epoch": 1.44, + "grad_norm": 12.343575382651135, + "learning_rate": 3.7894256630596304e-06, + "loss": 1.1217, + "step": 120069 + }, + { + "epoch": 1.44, + "grad_norm": 11.477833027479528, + "learning_rate": 3.7889678153239375e-06, + "loss": 1.1851, + "step": 120072 + }, + { + "epoch": 1.44, + "grad_norm": 5.978509945868648, + "learning_rate": 3.7885099887842636e-06, + "loss": 0.9031, + "step": 120075 + }, + { + "epoch": 1.44, + "grad_norm": 4.587215524564964, + "learning_rate": 3.788052183442165e-06, + "loss": 1.3389, + "step": 120078 + }, + { + "epoch": 1.44, + "grad_norm": 7.099585673254614, + "learning_rate": 3.787594399299206e-06, + "loss": 1.2231, + "step": 120081 + }, + { + "epoch": 1.44, + "grad_norm": 6.1453072217986175, + "learning_rate": 3.787136636356953e-06, + "loss": 1.288, + "step": 120084 + }, + { + "epoch": 1.44, + "grad_norm": 6.191121813402138, + "learning_rate": 3.7866788946169652e-06, + "loss": 0.9405, + "step": 120087 + }, + { + "epoch": 1.44, + "grad_norm": 15.065638679583147, + "learning_rate": 3.786221174080801e-06, + "loss": 1.0784, + "step": 120090 + }, + { + "epoch": 1.44, + "grad_norm": 6.095463558766609, + "learning_rate": 3.785763474750025e-06, + "loss": 1.2558, + "step": 120093 + }, + { + "epoch": 1.44, + "grad_norm": 11.077410758852452, + "learning_rate": 3.7853057966262007e-06, + "loss": 1.2469, + "step": 120096 + }, + { + "epoch": 1.44, + "grad_norm": 6.285090631453138, + "learning_rate": 3.7848481397108885e-06, + "loss": 1.158, + "step": 120099 + }, + { + "epoch": 1.44, + "grad_norm": 9.238582465971707, + "learning_rate": 3.784390504005655e-06, + "loss": 1.2567, + "step": 120102 + }, + { + "epoch": 1.44, + "grad_norm": 5.299060982952671, + "learning_rate": 3.7839328895120573e-06, + "loss": 1.2136, + "step": 120105 + }, + { + "epoch": 1.44, + "grad_norm": 6.415993127919479, + "learning_rate": 3.7834752962316546e-06, + "loss": 0.903, + "step": 120108 + }, + { + "epoch": 1.44, + "grad_norm": 3.8970381062886075, + "learning_rate": 3.7830177241660116e-06, + "loss": 0.9101, + "step": 120111 + }, + { + "epoch": 1.44, + "grad_norm": 9.739240201159161, + "learning_rate": 3.7825601733166928e-06, + "loss": 1.1195, + "step": 120114 + }, + { + "epoch": 1.44, + "grad_norm": 10.11254586249778, + "learning_rate": 3.7821026436852536e-06, + "loss": 1.2229, + "step": 120117 + }, + { + "epoch": 1.44, + "grad_norm": 3.8138082643478963, + "learning_rate": 3.7816451352732586e-06, + "loss": 1.4635, + "step": 120120 + }, + { + "epoch": 1.44, + "grad_norm": 9.338101082797191, + "learning_rate": 3.7811876480822685e-06, + "loss": 1.2265, + "step": 120123 + }, + { + "epoch": 1.44, + "grad_norm": 12.0841390955756, + "learning_rate": 3.7807301821138477e-06, + "loss": 1.5655, + "step": 120126 + }, + { + "epoch": 1.44, + "grad_norm": 18.455943005033905, + "learning_rate": 3.780272737369556e-06, + "loss": 1.2468, + "step": 120129 + }, + { + "epoch": 1.44, + "grad_norm": 17.083410775265335, + "learning_rate": 3.7798153138509485e-06, + "loss": 0.8461, + "step": 120132 + }, + { + "epoch": 1.44, + "grad_norm": 9.356764842419409, + "learning_rate": 3.779357911559591e-06, + "loss": 0.9453, + "step": 120135 + }, + { + "epoch": 1.44, + "grad_norm": 14.995742851692414, + "learning_rate": 3.778900530497045e-06, + "loss": 1.8468, + "step": 120138 + }, + { + "epoch": 1.44, + "grad_norm": 10.932576551341398, + "learning_rate": 3.778443170664874e-06, + "loss": 1.0077, + "step": 120141 + }, + { + "epoch": 1.44, + "grad_norm": 16.438987313082574, + "learning_rate": 3.777985832064631e-06, + "loss": 1.4259, + "step": 120144 + }, + { + "epoch": 1.44, + "grad_norm": 2.7091186018228877, + "learning_rate": 3.777528514697886e-06, + "loss": 1.1475, + "step": 120147 + }, + { + "epoch": 1.44, + "grad_norm": 10.369609183423835, + "learning_rate": 3.7770712185661905e-06, + "loss": 1.0222, + "step": 120150 + }, + { + "epoch": 1.44, + "grad_norm": 8.655871904486931, + "learning_rate": 3.7766139436711104e-06, + "loss": 0.9322, + "step": 120153 + }, + { + "epoch": 1.44, + "grad_norm": 4.569219900450345, + "learning_rate": 3.776156690014208e-06, + "loss": 1.0258, + "step": 120156 + }, + { + "epoch": 1.44, + "grad_norm": 4.595602709683791, + "learning_rate": 3.7756994575970373e-06, + "loss": 1.4854, + "step": 120159 + }, + { + "epoch": 1.44, + "grad_norm": 12.700552756682717, + "learning_rate": 3.7752422464211626e-06, + "loss": 0.994, + "step": 120162 + }, + { + "epoch": 1.44, + "grad_norm": 5.391554400012865, + "learning_rate": 3.7747850564881437e-06, + "loss": 1.4004, + "step": 120165 + }, + { + "epoch": 1.44, + "grad_norm": 13.72088769093336, + "learning_rate": 3.774327887799545e-06, + "loss": 1.1534, + "step": 120168 + }, + { + "epoch": 1.45, + "grad_norm": 5.578575227099125, + "learning_rate": 3.773870740356923e-06, + "loss": 0.9086, + "step": 120171 + }, + { + "epoch": 1.45, + "grad_norm": 6.7845401461042085, + "learning_rate": 3.7734136141618326e-06, + "loss": 0.8314, + "step": 120174 + }, + { + "epoch": 1.45, + "grad_norm": 18.029593272708503, + "learning_rate": 3.7729565092158403e-06, + "loss": 0.951, + "step": 120177 + }, + { + "epoch": 1.45, + "grad_norm": 12.60713576816273, + "learning_rate": 3.7724994255205037e-06, + "loss": 1.2439, + "step": 120180 + }, + { + "epoch": 1.45, + "grad_norm": 4.432139971804335, + "learning_rate": 3.772042363077387e-06, + "loss": 1.3269, + "step": 120183 + }, + { + "epoch": 1.45, + "grad_norm": 21.373933526970784, + "learning_rate": 3.7715853218880438e-06, + "loss": 1.2602, + "step": 120186 + }, + { + "epoch": 1.45, + "grad_norm": 14.482070188883752, + "learning_rate": 3.771128301954039e-06, + "loss": 1.3693, + "step": 120189 + }, + { + "epoch": 1.45, + "grad_norm": 3.830413371710815, + "learning_rate": 3.770671303276926e-06, + "loss": 1.6494, + "step": 120192 + }, + { + "epoch": 1.45, + "grad_norm": 4.952865397005335, + "learning_rate": 3.7702143258582723e-06, + "loss": 1.2615, + "step": 120195 + }, + { + "epoch": 1.45, + "grad_norm": 7.180042979420345, + "learning_rate": 3.769757369699629e-06, + "loss": 1.1707, + "step": 120198 + }, + { + "epoch": 1.45, + "grad_norm": 9.99342420611866, + "learning_rate": 3.7693004348025596e-06, + "loss": 1.1548, + "step": 120201 + }, + { + "epoch": 1.45, + "grad_norm": 58.10681920988473, + "learning_rate": 3.768843521168625e-06, + "loss": 1.7429, + "step": 120204 + }, + { + "epoch": 1.45, + "grad_norm": 9.908576089646084, + "learning_rate": 3.768386628799381e-06, + "loss": 1.1075, + "step": 120207 + }, + { + "epoch": 1.45, + "grad_norm": 2.9416162180644094, + "learning_rate": 3.7679297576963934e-06, + "loss": 1.2237, + "step": 120210 + }, + { + "epoch": 1.45, + "grad_norm": 10.017741140887708, + "learning_rate": 3.767472907861217e-06, + "loss": 0.9257, + "step": 120213 + }, + { + "epoch": 1.45, + "grad_norm": 9.885009768876936, + "learning_rate": 3.767016079295407e-06, + "loss": 1.257, + "step": 120216 + }, + { + "epoch": 1.45, + "grad_norm": 6.717574068020059, + "learning_rate": 3.766559272000525e-06, + "loss": 0.947, + "step": 120219 + }, + { + "epoch": 1.45, + "grad_norm": 14.7923022176267, + "learning_rate": 3.766102485978135e-06, + "loss": 1.2504, + "step": 120222 + }, + { + "epoch": 1.45, + "grad_norm": 3.5597480870768097, + "learning_rate": 3.765645721229788e-06, + "loss": 1.1853, + "step": 120225 + }, + { + "epoch": 1.45, + "grad_norm": 12.206988963254794, + "learning_rate": 3.7651889777570472e-06, + "loss": 1.2563, + "step": 120228 + }, + { + "epoch": 1.45, + "grad_norm": 9.6485024760906, + "learning_rate": 3.764732255561474e-06, + "loss": 1.1439, + "step": 120231 + }, + { + "epoch": 1.45, + "grad_norm": 4.520380588365997, + "learning_rate": 3.7642755546446206e-06, + "loss": 0.852, + "step": 120234 + }, + { + "epoch": 1.45, + "grad_norm": 14.361710025520818, + "learning_rate": 3.7638188750080517e-06, + "loss": 0.7215, + "step": 120237 + }, + { + "epoch": 1.45, + "grad_norm": 3.513778656211272, + "learning_rate": 3.763362216653319e-06, + "loss": 1.1767, + "step": 120240 + }, + { + "epoch": 1.45, + "grad_norm": 14.114645584310393, + "learning_rate": 3.7629055795819845e-06, + "loss": 1.3306, + "step": 120243 + }, + { + "epoch": 1.45, + "grad_norm": 8.54528672494798, + "learning_rate": 3.7624489637956075e-06, + "loss": 0.819, + "step": 120246 + }, + { + "epoch": 1.45, + "grad_norm": 4.93169632306733, + "learning_rate": 3.7619923692957482e-06, + "loss": 1.2452, + "step": 120249 + }, + { + "epoch": 1.45, + "grad_norm": 8.107691199637657, + "learning_rate": 3.761535796083958e-06, + "loss": 1.045, + "step": 120252 + }, + { + "epoch": 1.45, + "grad_norm": 5.462283019012637, + "learning_rate": 3.761079244161804e-06, + "loss": 1.2236, + "step": 120255 + }, + { + "epoch": 1.45, + "grad_norm": 19.3201126259304, + "learning_rate": 3.760622713530835e-06, + "loss": 1.2794, + "step": 120258 + }, + { + "epoch": 1.45, + "grad_norm": 2.5976214479914477, + "learning_rate": 3.7601662041926126e-06, + "loss": 0.9735, + "step": 120261 + }, + { + "epoch": 1.45, + "grad_norm": 3.1123563416257234, + "learning_rate": 3.7597097161486995e-06, + "loss": 1.5665, + "step": 120264 + }, + { + "epoch": 1.45, + "grad_norm": 3.5849335697304583, + "learning_rate": 3.759253249400646e-06, + "loss": 1.1785, + "step": 120267 + }, + { + "epoch": 1.45, + "grad_norm": 10.285179309190267, + "learning_rate": 3.7587968039500134e-06, + "loss": 1.191, + "step": 120270 + }, + { + "epoch": 1.45, + "grad_norm": 6.442455264588281, + "learning_rate": 3.758340379798363e-06, + "loss": 1.0302, + "step": 120273 + }, + { + "epoch": 1.45, + "grad_norm": 28.022099376309075, + "learning_rate": 3.7578839769472475e-06, + "loss": 1.1027, + "step": 120276 + }, + { + "epoch": 1.45, + "grad_norm": 10.837681598946476, + "learning_rate": 3.7574275953982233e-06, + "loss": 1.2273, + "step": 120279 + }, + { + "epoch": 1.45, + "grad_norm": 21.11353793513898, + "learning_rate": 3.7569712351528498e-06, + "loss": 1.0997, + "step": 120282 + }, + { + "epoch": 1.45, + "grad_norm": 16.476539610870525, + "learning_rate": 3.7565148962126842e-06, + "loss": 1.0566, + "step": 120285 + }, + { + "epoch": 1.45, + "grad_norm": 2.791015464121363, + "learning_rate": 3.7560585785792848e-06, + "loss": 1.0769, + "step": 120288 + }, + { + "epoch": 1.45, + "grad_norm": 13.492255086525217, + "learning_rate": 3.7556022822542116e-06, + "loss": 1.2678, + "step": 120291 + }, + { + "epoch": 1.45, + "grad_norm": 4.179438559192272, + "learning_rate": 3.7551460072390157e-06, + "loss": 1.3996, + "step": 120294 + }, + { + "epoch": 1.45, + "grad_norm": 2.752032041777346, + "learning_rate": 3.75468975353526e-06, + "loss": 1.1847, + "step": 120297 + }, + { + "epoch": 1.45, + "grad_norm": 5.111808772701181, + "learning_rate": 3.7542335211444957e-06, + "loss": 1.2006, + "step": 120300 + }, + { + "epoch": 1.45, + "grad_norm": 13.500317110612164, + "learning_rate": 3.753777310068285e-06, + "loss": 1.1461, + "step": 120303 + }, + { + "epoch": 1.45, + "grad_norm": 9.220309317557914, + "learning_rate": 3.75332112030818e-06, + "loss": 0.9026, + "step": 120306 + }, + { + "epoch": 1.45, + "grad_norm": 5.640013560928536, + "learning_rate": 3.7528649518657402e-06, + "loss": 1.3368, + "step": 120309 + }, + { + "epoch": 1.45, + "grad_norm": 24.92964289357332, + "learning_rate": 3.752408804742521e-06, + "loss": 1.0026, + "step": 120312 + }, + { + "epoch": 1.45, + "grad_norm": 20.772142789817103, + "learning_rate": 3.751952678940085e-06, + "loss": 1.3952, + "step": 120315 + }, + { + "epoch": 1.45, + "grad_norm": 10.923331981549682, + "learning_rate": 3.7514965744599796e-06, + "loss": 1.0722, + "step": 120318 + }, + { + "epoch": 1.45, + "grad_norm": 6.7188736248925425, + "learning_rate": 3.751040491303769e-06, + "loss": 1.3615, + "step": 120321 + }, + { + "epoch": 1.45, + "grad_norm": 16.8065761945367, + "learning_rate": 3.750584429473003e-06, + "loss": 1.0989, + "step": 120324 + }, + { + "epoch": 1.45, + "grad_norm": 3.6164535558721007, + "learning_rate": 3.750128388969242e-06, + "loss": 1.0985, + "step": 120327 + }, + { + "epoch": 1.45, + "grad_norm": 10.499280861832183, + "learning_rate": 3.749672369794045e-06, + "loss": 1.2873, + "step": 120330 + }, + { + "epoch": 1.45, + "grad_norm": 7.898208487337774, + "learning_rate": 3.7492163719489605e-06, + "loss": 1.0044, + "step": 120333 + }, + { + "epoch": 1.45, + "grad_norm": 7.467906954698691, + "learning_rate": 3.74876039543555e-06, + "loss": 1.1106, + "step": 120336 + }, + { + "epoch": 1.45, + "grad_norm": 3.9203530206087542, + "learning_rate": 3.7483044402553713e-06, + "loss": 1.4079, + "step": 120339 + }, + { + "epoch": 1.45, + "grad_norm": 6.153582547153248, + "learning_rate": 3.7478485064099744e-06, + "loss": 1.3824, + "step": 120342 + }, + { + "epoch": 1.45, + "grad_norm": 4.89076466975447, + "learning_rate": 3.7473925939009225e-06, + "loss": 1.1206, + "step": 120345 + }, + { + "epoch": 1.45, + "grad_norm": 4.12810487575017, + "learning_rate": 3.7469367027297634e-06, + "loss": 1.0678, + "step": 120348 + }, + { + "epoch": 1.45, + "grad_norm": 8.79916569599449, + "learning_rate": 3.7464808328980573e-06, + "loss": 1.2442, + "step": 120351 + }, + { + "epoch": 1.45, + "grad_norm": 10.648108949811053, + "learning_rate": 3.746024984407359e-06, + "loss": 1.1642, + "step": 120354 + }, + { + "epoch": 1.45, + "grad_norm": 4.842317559367343, + "learning_rate": 3.745569157259229e-06, + "loss": 0.9983, + "step": 120357 + }, + { + "epoch": 1.45, + "grad_norm": 3.007573610282973, + "learning_rate": 3.745113351455215e-06, + "loss": 0.8362, + "step": 120360 + }, + { + "epoch": 1.45, + "grad_norm": 14.503356694303076, + "learning_rate": 3.7446575669968798e-06, + "loss": 1.3142, + "step": 120363 + }, + { + "epoch": 1.45, + "grad_norm": 6.38561262639748, + "learning_rate": 3.744201803885772e-06, + "loss": 1.2411, + "step": 120366 + }, + { + "epoch": 1.45, + "grad_norm": 15.039977020869172, + "learning_rate": 3.743746062123449e-06, + "loss": 1.2333, + "step": 120369 + }, + { + "epoch": 1.45, + "grad_norm": 3.919210966556853, + "learning_rate": 3.7432903417114706e-06, + "loss": 1.278, + "step": 120372 + }, + { + "epoch": 1.45, + "grad_norm": 21.21586748463177, + "learning_rate": 3.742834642651386e-06, + "loss": 1.135, + "step": 120375 + }, + { + "epoch": 1.45, + "grad_norm": 4.953606216577003, + "learning_rate": 3.742378964944753e-06, + "loss": 1.0957, + "step": 120378 + }, + { + "epoch": 1.45, + "grad_norm": 13.658171993182695, + "learning_rate": 3.7419233085931293e-06, + "loss": 1.3829, + "step": 120381 + }, + { + "epoch": 1.45, + "grad_norm": 5.474410704627487, + "learning_rate": 3.741467673598068e-06, + "loss": 1.2954, + "step": 120384 + }, + { + "epoch": 1.45, + "grad_norm": 6.112879690700059, + "learning_rate": 3.74101205996112e-06, + "loss": 1.3696, + "step": 120387 + }, + { + "epoch": 1.45, + "grad_norm": 9.419995614103001, + "learning_rate": 3.7405564676838423e-06, + "loss": 0.8624, + "step": 120390 + }, + { + "epoch": 1.45, + "grad_norm": 10.296710801060076, + "learning_rate": 3.740100896767791e-06, + "loss": 0.8334, + "step": 120393 + }, + { + "epoch": 1.45, + "grad_norm": 4.70390608314878, + "learning_rate": 3.7396453472145213e-06, + "loss": 1.1632, + "step": 120396 + }, + { + "epoch": 1.45, + "grad_norm": 11.234998091657939, + "learning_rate": 3.7391898190255894e-06, + "loss": 1.2252, + "step": 120399 + }, + { + "epoch": 1.45, + "grad_norm": 13.108346923213055, + "learning_rate": 3.7387343122025453e-06, + "loss": 1.6851, + "step": 120402 + }, + { + "epoch": 1.45, + "grad_norm": 10.007279430828435, + "learning_rate": 3.7382788267469483e-06, + "loss": 1.1816, + "step": 120405 + }, + { + "epoch": 1.45, + "grad_norm": 4.55858274488103, + "learning_rate": 3.7378233626603467e-06, + "loss": 1.2646, + "step": 120408 + }, + { + "epoch": 1.45, + "grad_norm": 3.667731201196901, + "learning_rate": 3.7373679199443026e-06, + "loss": 0.9773, + "step": 120411 + }, + { + "epoch": 1.45, + "grad_norm": 4.300982500587038, + "learning_rate": 3.7369124986003613e-06, + "loss": 1.1508, + "step": 120414 + }, + { + "epoch": 1.45, + "grad_norm": 9.537582273147134, + "learning_rate": 3.736457098630083e-06, + "loss": 0.875, + "step": 120417 + }, + { + "epoch": 1.45, + "grad_norm": 41.314542379657894, + "learning_rate": 3.7360017200350195e-06, + "loss": 1.2623, + "step": 120420 + }, + { + "epoch": 1.45, + "grad_norm": 7.877792256415644, + "learning_rate": 3.73554636281673e-06, + "loss": 1.0613, + "step": 120423 + }, + { + "epoch": 1.45, + "grad_norm": 4.556349475908281, + "learning_rate": 3.7350910269767635e-06, + "loss": 1.0661, + "step": 120426 + }, + { + "epoch": 1.45, + "grad_norm": 11.011091104057098, + "learning_rate": 3.734635712516672e-06, + "loss": 1.4343, + "step": 120429 + }, + { + "epoch": 1.45, + "grad_norm": 7.674685535773255, + "learning_rate": 3.7341804194380117e-06, + "loss": 0.9728, + "step": 120432 + }, + { + "epoch": 1.45, + "grad_norm": 9.24947495412539, + "learning_rate": 3.7337251477423365e-06, + "loss": 1.1798, + "step": 120435 + }, + { + "epoch": 1.45, + "grad_norm": 8.540377941025774, + "learning_rate": 3.7332698974312044e-06, + "loss": 1.0522, + "step": 120438 + }, + { + "epoch": 1.45, + "grad_norm": 6.5053582720026535, + "learning_rate": 3.7328146685061605e-06, + "loss": 1.1291, + "step": 120441 + }, + { + "epoch": 1.45, + "grad_norm": 6.252586717822269, + "learning_rate": 3.732359460968763e-06, + "loss": 1.4998, + "step": 120444 + }, + { + "epoch": 1.45, + "grad_norm": 16.262906673033708, + "learning_rate": 3.7319042748205694e-06, + "loss": 1.4316, + "step": 120447 + }, + { + "epoch": 1.45, + "grad_norm": 11.33864230525502, + "learning_rate": 3.7314491100631243e-06, + "loss": 1.1304, + "step": 120450 + }, + { + "epoch": 1.45, + "grad_norm": 22.522932117671477, + "learning_rate": 3.730993966697989e-06, + "loss": 1.3181, + "step": 120453 + }, + { + "epoch": 1.45, + "grad_norm": 12.852806517252906, + "learning_rate": 3.7305388447267098e-06, + "loss": 1.0214, + "step": 120456 + }, + { + "epoch": 1.45, + "grad_norm": 15.551048003819211, + "learning_rate": 3.7300837441508443e-06, + "loss": 1.4138, + "step": 120459 + }, + { + "epoch": 1.45, + "grad_norm": 26.75451376690275, + "learning_rate": 3.7296286649719437e-06, + "loss": 1.4401, + "step": 120462 + }, + { + "epoch": 1.45, + "grad_norm": 4.708087622239352, + "learning_rate": 3.7291736071915653e-06, + "loss": 1.5092, + "step": 120465 + }, + { + "epoch": 1.45, + "grad_norm": 23.91729675031523, + "learning_rate": 3.7287185708112583e-06, + "loss": 1.2268, + "step": 120468 + }, + { + "epoch": 1.45, + "grad_norm": 8.04491171763295, + "learning_rate": 3.7282635558325732e-06, + "loss": 1.2701, + "step": 120471 + }, + { + "epoch": 1.45, + "grad_norm": 6.919235275523742, + "learning_rate": 3.727808562257065e-06, + "loss": 1.1497, + "step": 120474 + }, + { + "epoch": 1.45, + "grad_norm": 15.852097824588313, + "learning_rate": 3.727353590086287e-06, + "loss": 1.1691, + "step": 120477 + }, + { + "epoch": 1.45, + "grad_norm": 5.592794914492681, + "learning_rate": 3.7268986393217955e-06, + "loss": 1.0862, + "step": 120480 + }, + { + "epoch": 1.45, + "grad_norm": 27.982575844292466, + "learning_rate": 3.726443709965136e-06, + "loss": 1.3877, + "step": 120483 + }, + { + "epoch": 1.45, + "grad_norm": 16.048315377481106, + "learning_rate": 3.7259888020178637e-06, + "loss": 1.2765, + "step": 120486 + }, + { + "epoch": 1.45, + "grad_norm": 6.062548990177597, + "learning_rate": 3.725533915481536e-06, + "loss": 1.2786, + "step": 120489 + }, + { + "epoch": 1.45, + "grad_norm": 9.889671089803247, + "learning_rate": 3.7250790503577005e-06, + "loss": 1.4023, + "step": 120492 + }, + { + "epoch": 1.45, + "grad_norm": 5.617130294449884, + "learning_rate": 3.7246242066479067e-06, + "loss": 1.3506, + "step": 120495 + }, + { + "epoch": 1.45, + "grad_norm": 7.2903460138528, + "learning_rate": 3.724169384353711e-06, + "loss": 1.3579, + "step": 120498 + }, + { + "epoch": 1.45, + "grad_norm": 9.465648135378423, + "learning_rate": 3.7237145834766632e-06, + "loss": 1.0034, + "step": 120501 + }, + { + "epoch": 1.45, + "grad_norm": 5.177056188982387, + "learning_rate": 3.723259804018318e-06, + "loss": 1.0982, + "step": 120504 + }, + { + "epoch": 1.45, + "grad_norm": 4.123257551184604, + "learning_rate": 3.722805045980229e-06, + "loss": 1.1645, + "step": 120507 + }, + { + "epoch": 1.45, + "grad_norm": 4.743027460598794, + "learning_rate": 3.7223503093639444e-06, + "loss": 1.2209, + "step": 120510 + }, + { + "epoch": 1.45, + "grad_norm": 7.932570336903949, + "learning_rate": 3.7218955941710157e-06, + "loss": 1.1025, + "step": 120513 + }, + { + "epoch": 1.45, + "grad_norm": 17.698433166000125, + "learning_rate": 3.721440900402994e-06, + "loss": 1.3009, + "step": 120516 + }, + { + "epoch": 1.45, + "grad_norm": 13.741477606774103, + "learning_rate": 3.7209862280614375e-06, + "loss": 0.9657, + "step": 120519 + }, + { + "epoch": 1.45, + "grad_norm": 38.80018228918478, + "learning_rate": 3.72053157714789e-06, + "loss": 1.2128, + "step": 120522 + }, + { + "epoch": 1.45, + "grad_norm": 4.681774382945321, + "learning_rate": 3.7200769476639054e-06, + "loss": 1.1611, + "step": 120525 + }, + { + "epoch": 1.45, + "grad_norm": 5.7477432162634745, + "learning_rate": 3.7196223396110366e-06, + "loss": 1.2827, + "step": 120528 + }, + { + "epoch": 1.45, + "grad_norm": 10.053395859119941, + "learning_rate": 3.7191677529908386e-06, + "loss": 1.3057, + "step": 120531 + }, + { + "epoch": 1.45, + "grad_norm": 16.00920342893482, + "learning_rate": 3.7187131878048576e-06, + "loss": 1.1521, + "step": 120534 + }, + { + "epoch": 1.45, + "grad_norm": 7.012220474745449, + "learning_rate": 3.718258644054644e-06, + "loss": 1.0108, + "step": 120537 + }, + { + "epoch": 1.45, + "grad_norm": 4.667303717695373, + "learning_rate": 3.7178041217417504e-06, + "loss": 1.0342, + "step": 120540 + }, + { + "epoch": 1.45, + "grad_norm": 47.513234252517016, + "learning_rate": 3.7173496208677285e-06, + "loss": 1.2462, + "step": 120543 + }, + { + "epoch": 1.45, + "grad_norm": 5.342019084048092, + "learning_rate": 3.7168951414341328e-06, + "loss": 1.063, + "step": 120546 + }, + { + "epoch": 1.45, + "grad_norm": 21.21401303610317, + "learning_rate": 3.716440683442506e-06, + "loss": 1.2684, + "step": 120549 + }, + { + "epoch": 1.45, + "grad_norm": 11.903779479604724, + "learning_rate": 3.7159862468944087e-06, + "loss": 1.1484, + "step": 120552 + }, + { + "epoch": 1.45, + "grad_norm": 10.22268886735967, + "learning_rate": 3.7155318317913835e-06, + "loss": 0.6397, + "step": 120555 + }, + { + "epoch": 1.45, + "grad_norm": 6.901804578581385, + "learning_rate": 3.715077438134984e-06, + "loss": 0.6375, + "step": 120558 + }, + { + "epoch": 1.45, + "grad_norm": 7.467508935267516, + "learning_rate": 3.7146230659267647e-06, + "loss": 1.0164, + "step": 120561 + }, + { + "epoch": 1.45, + "grad_norm": 17.77836409082238, + "learning_rate": 3.714168715168269e-06, + "loss": 1.1071, + "step": 120564 + }, + { + "epoch": 1.45, + "grad_norm": 24.21487279333338, + "learning_rate": 3.7137143858610523e-06, + "loss": 1.4395, + "step": 120567 + }, + { + "epoch": 1.45, + "grad_norm": 9.67619262582482, + "learning_rate": 3.713260078006664e-06, + "loss": 1.1659, + "step": 120570 + }, + { + "epoch": 1.45, + "grad_norm": 3.698546399349801, + "learning_rate": 3.7128057916066583e-06, + "loss": 1.267, + "step": 120573 + }, + { + "epoch": 1.45, + "grad_norm": 2.451366445538196, + "learning_rate": 3.712351526662581e-06, + "loss": 1.34, + "step": 120576 + }, + { + "epoch": 1.45, + "grad_norm": 5.36020249293064, + "learning_rate": 3.7118972831759804e-06, + "loss": 1.302, + "step": 120579 + }, + { + "epoch": 1.45, + "grad_norm": 6.604518105732538, + "learning_rate": 3.7114430611484086e-06, + "loss": 1.2713, + "step": 120582 + }, + { + "epoch": 1.45, + "grad_norm": 4.856961245724613, + "learning_rate": 3.710988860581417e-06, + "loss": 1.108, + "step": 120585 + }, + { + "epoch": 1.45, + "grad_norm": 11.551416559053157, + "learning_rate": 3.7105346814765587e-06, + "loss": 1.1749, + "step": 120588 + }, + { + "epoch": 1.45, + "grad_norm": 4.053184470986948, + "learning_rate": 3.7100805238353765e-06, + "loss": 1.1089, + "step": 120591 + }, + { + "epoch": 1.45, + "grad_norm": 3.759411977646988, + "learning_rate": 3.7096263876594276e-06, + "loss": 1.5642, + "step": 120594 + }, + { + "epoch": 1.45, + "grad_norm": 7.129189545704169, + "learning_rate": 3.7091722729502543e-06, + "loss": 1.31, + "step": 120597 + }, + { + "epoch": 1.45, + "grad_norm": 9.367604584333025, + "learning_rate": 3.708718179709413e-06, + "loss": 1.2235, + "step": 120600 + }, + { + "epoch": 1.45, + "grad_norm": 26.437167851153724, + "learning_rate": 3.7082641079384473e-06, + "loss": 1.6017, + "step": 120603 + }, + { + "epoch": 1.45, + "grad_norm": 15.908112789056664, + "learning_rate": 3.707810057638911e-06, + "loss": 1.3735, + "step": 120606 + }, + { + "epoch": 1.45, + "grad_norm": 29.703439284983737, + "learning_rate": 3.707356028812351e-06, + "loss": 1.3059, + "step": 120609 + }, + { + "epoch": 1.45, + "grad_norm": 8.764363452002206, + "learning_rate": 3.7069020214603193e-06, + "loss": 1.1105, + "step": 120612 + }, + { + "epoch": 1.45, + "grad_norm": 15.149097068786396, + "learning_rate": 3.706448035584367e-06, + "loss": 1.0743, + "step": 120615 + }, + { + "epoch": 1.45, + "grad_norm": 4.358266274726276, + "learning_rate": 3.705994071186041e-06, + "loss": 1.5434, + "step": 120618 + }, + { + "epoch": 1.45, + "grad_norm": 6.865900900906666, + "learning_rate": 3.7055401282668868e-06, + "loss": 1.1735, + "step": 120621 + }, + { + "epoch": 1.45, + "grad_norm": 10.682972089831887, + "learning_rate": 3.7050862068284567e-06, + "loss": 0.9935, + "step": 120624 + }, + { + "epoch": 1.45, + "grad_norm": 21.62098570275916, + "learning_rate": 3.7046323068723035e-06, + "loss": 0.9063, + "step": 120627 + }, + { + "epoch": 1.45, + "grad_norm": 9.256805956007588, + "learning_rate": 3.7041784283999695e-06, + "loss": 1.3024, + "step": 120630 + }, + { + "epoch": 1.45, + "grad_norm": 11.894622548192832, + "learning_rate": 3.7037245714130053e-06, + "loss": 1.2457, + "step": 120633 + }, + { + "epoch": 1.45, + "grad_norm": 13.536639975484665, + "learning_rate": 3.703270735912966e-06, + "loss": 1.093, + "step": 120636 + }, + { + "epoch": 1.45, + "grad_norm": 9.372137659831449, + "learning_rate": 3.702816921901392e-06, + "loss": 1.237, + "step": 120639 + }, + { + "epoch": 1.45, + "grad_norm": 6.633628231255884, + "learning_rate": 3.702363129379839e-06, + "loss": 1.3053, + "step": 120642 + }, + { + "epoch": 1.45, + "grad_norm": 14.304334154614434, + "learning_rate": 3.7019093583498487e-06, + "loss": 1.2764, + "step": 120645 + }, + { + "epoch": 1.45, + "grad_norm": 24.542971637397365, + "learning_rate": 3.7014556088129737e-06, + "loss": 1.1407, + "step": 120648 + }, + { + "epoch": 1.45, + "grad_norm": 3.798830430345098, + "learning_rate": 3.7010018807707617e-06, + "loss": 1.2433, + "step": 120651 + }, + { + "epoch": 1.45, + "grad_norm": 6.049489124425651, + "learning_rate": 3.700548174224765e-06, + "loss": 0.9938, + "step": 120654 + }, + { + "epoch": 1.45, + "grad_norm": 9.752146075870256, + "learning_rate": 3.7000944891765244e-06, + "loss": 1.1121, + "step": 120657 + }, + { + "epoch": 1.45, + "grad_norm": 20.841069165724143, + "learning_rate": 3.6996408256275962e-06, + "loss": 1.3935, + "step": 120660 + }, + { + "epoch": 1.45, + "grad_norm": 6.749486821273368, + "learning_rate": 3.69918718357952e-06, + "loss": 1.0206, + "step": 120663 + }, + { + "epoch": 1.45, + "grad_norm": 5.369693480387765, + "learning_rate": 3.6987335630338493e-06, + "loss": 0.9555, + "step": 120666 + }, + { + "epoch": 1.45, + "grad_norm": 5.02041981209005, + "learning_rate": 3.698279963992134e-06, + "loss": 1.1415, + "step": 120669 + }, + { + "epoch": 1.45, + "grad_norm": 6.609467184364216, + "learning_rate": 3.697826386455916e-06, + "loss": 1.3404, + "step": 120672 + }, + { + "epoch": 1.45, + "grad_norm": 28.38486746897521, + "learning_rate": 3.6973728304267473e-06, + "loss": 1.4628, + "step": 120675 + }, + { + "epoch": 1.45, + "grad_norm": 14.558698198598924, + "learning_rate": 3.696919295906174e-06, + "loss": 1.209, + "step": 120678 + }, + { + "epoch": 1.45, + "grad_norm": 6.516178889481184, + "learning_rate": 3.6964657828957495e-06, + "loss": 1.2465, + "step": 120681 + }, + { + "epoch": 1.45, + "grad_norm": 3.6778745600799274, + "learning_rate": 3.6960122913970154e-06, + "loss": 1.1013, + "step": 120684 + }, + { + "epoch": 1.45, + "grad_norm": 12.000149968321951, + "learning_rate": 3.695558821411518e-06, + "loss": 1.3102, + "step": 120687 + }, + { + "epoch": 1.45, + "grad_norm": 10.041979329561574, + "learning_rate": 3.695105372940807e-06, + "loss": 1.6402, + "step": 120690 + }, + { + "epoch": 1.45, + "grad_norm": 11.242942301493356, + "learning_rate": 3.6946519459864305e-06, + "loss": 1.1578, + "step": 120693 + }, + { + "epoch": 1.45, + "grad_norm": 51.97655213124118, + "learning_rate": 3.694198540549939e-06, + "loss": 1.5567, + "step": 120696 + }, + { + "epoch": 1.45, + "grad_norm": 2.156438747725224, + "learning_rate": 3.693745156632873e-06, + "loss": 1.3032, + "step": 120699 + }, + { + "epoch": 1.45, + "grad_norm": 7.5292172106709785, + "learning_rate": 3.6932917942367876e-06, + "loss": 1.1393, + "step": 120702 + }, + { + "epoch": 1.45, + "grad_norm": 8.107071718116691, + "learning_rate": 3.692838453363221e-06, + "loss": 1.2113, + "step": 120705 + }, + { + "epoch": 1.45, + "grad_norm": 39.90687861965825, + "learning_rate": 3.6923851340137296e-06, + "loss": 1.3829, + "step": 120708 + }, + { + "epoch": 1.45, + "grad_norm": 7.795743420166757, + "learning_rate": 3.6919318361898515e-06, + "loss": 1.2361, + "step": 120711 + }, + { + "epoch": 1.45, + "grad_norm": 10.355758626741297, + "learning_rate": 3.6914785598931378e-06, + "loss": 1.2609, + "step": 120714 + }, + { + "epoch": 1.45, + "grad_norm": 16.91794557180494, + "learning_rate": 3.6910253051251365e-06, + "loss": 1.5325, + "step": 120717 + }, + { + "epoch": 1.45, + "grad_norm": 8.17840835252812, + "learning_rate": 3.6905720718873927e-06, + "loss": 0.9679, + "step": 120720 + }, + { + "epoch": 1.45, + "grad_norm": 4.8773234770154374, + "learning_rate": 3.6901188601814563e-06, + "loss": 1.3785, + "step": 120723 + }, + { + "epoch": 1.45, + "grad_norm": 17.599171417428476, + "learning_rate": 3.689665670008873e-06, + "loss": 1.1656, + "step": 120726 + }, + { + "epoch": 1.45, + "grad_norm": 15.243881889719296, + "learning_rate": 3.689212501371183e-06, + "loss": 1.1305, + "step": 120729 + }, + { + "epoch": 1.45, + "grad_norm": 9.230353731345053, + "learning_rate": 3.6887593542699383e-06, + "loss": 1.331, + "step": 120732 + }, + { + "epoch": 1.45, + "grad_norm": 7.929399900791932, + "learning_rate": 3.6883062287066885e-06, + "loss": 1.0213, + "step": 120735 + }, + { + "epoch": 1.45, + "grad_norm": 7.198774817626154, + "learning_rate": 3.6878531246829716e-06, + "loss": 1.1348, + "step": 120738 + }, + { + "epoch": 1.45, + "grad_norm": 18.50392109879901, + "learning_rate": 3.6874000422003385e-06, + "loss": 0.8385, + "step": 120741 + }, + { + "epoch": 1.45, + "grad_norm": 14.32989692603425, + "learning_rate": 3.6869469812603397e-06, + "loss": 1.0592, + "step": 120744 + }, + { + "epoch": 1.45, + "grad_norm": 5.569510781686481, + "learning_rate": 3.686493941864513e-06, + "loss": 1.0864, + "step": 120747 + }, + { + "epoch": 1.45, + "grad_norm": 17.320794306310106, + "learning_rate": 3.686040924014411e-06, + "loss": 1.0188, + "step": 120750 + }, + { + "epoch": 1.45, + "grad_norm": 6.296616732456977, + "learning_rate": 3.685587927711575e-06, + "loss": 1.054, + "step": 120753 + }, + { + "epoch": 1.45, + "grad_norm": 4.082735623313502, + "learning_rate": 3.685134952957552e-06, + "loss": 0.9199, + "step": 120756 + }, + { + "epoch": 1.45, + "grad_norm": 8.92871563175439, + "learning_rate": 3.6846819997538886e-06, + "loss": 1.4074, + "step": 120759 + }, + { + "epoch": 1.45, + "grad_norm": 3.8298487293168404, + "learning_rate": 3.6842290681021355e-06, + "loss": 1.042, + "step": 120762 + }, + { + "epoch": 1.45, + "grad_norm": 32.13469793760924, + "learning_rate": 3.683776158003829e-06, + "loss": 1.1199, + "step": 120765 + }, + { + "epoch": 1.45, + "grad_norm": 2.95509416503622, + "learning_rate": 3.6833232694605225e-06, + "loss": 1.3323, + "step": 120768 + }, + { + "epoch": 1.45, + "grad_norm": 6.004631179878816, + "learning_rate": 3.682870402473756e-06, + "loss": 1.677, + "step": 120771 + }, + { + "epoch": 1.45, + "grad_norm": 4.581633459328723, + "learning_rate": 3.682417557045077e-06, + "loss": 1.0159, + "step": 120774 + }, + { + "epoch": 1.45, + "grad_norm": 7.956440117223084, + "learning_rate": 3.6819647331760346e-06, + "loss": 0.9754, + "step": 120777 + }, + { + "epoch": 1.45, + "grad_norm": 7.403747035480013, + "learning_rate": 3.681511930868168e-06, + "loss": 1.2151, + "step": 120780 + }, + { + "epoch": 1.45, + "grad_norm": 2.7900237045198564, + "learning_rate": 3.681059150123024e-06, + "loss": 0.9735, + "step": 120783 + }, + { + "epoch": 1.45, + "grad_norm": 23.92311598696008, + "learning_rate": 3.680606390942154e-06, + "loss": 1.1638, + "step": 120786 + }, + { + "epoch": 1.45, + "grad_norm": 11.175150482294232, + "learning_rate": 3.6801536533270977e-06, + "loss": 1.4259, + "step": 120789 + }, + { + "epoch": 1.45, + "grad_norm": 17.71345035179664, + "learning_rate": 3.679700937279397e-06, + "loss": 1.195, + "step": 120792 + }, + { + "epoch": 1.45, + "grad_norm": 8.106920321031797, + "learning_rate": 3.6792482428006005e-06, + "loss": 1.2656, + "step": 120795 + }, + { + "epoch": 1.45, + "grad_norm": 5.85830081742089, + "learning_rate": 3.6787955698922515e-06, + "loss": 0.9401, + "step": 120798 + }, + { + "epoch": 1.45, + "grad_norm": 3.512477500982333, + "learning_rate": 3.6783429185558985e-06, + "loss": 1.1882, + "step": 120801 + }, + { + "epoch": 1.45, + "grad_norm": 18.377250325739478, + "learning_rate": 3.6778902887930866e-06, + "loss": 1.1251, + "step": 120804 + }, + { + "epoch": 1.45, + "grad_norm": 3.785544479299818, + "learning_rate": 3.6774376806053546e-06, + "loss": 1.3357, + "step": 120807 + }, + { + "epoch": 1.45, + "grad_norm": 19.05277663991737, + "learning_rate": 3.676985093994254e-06, + "loss": 0.7941, + "step": 120810 + }, + { + "epoch": 1.45, + "grad_norm": 5.874263834804329, + "learning_rate": 3.676532528961322e-06, + "loss": 1.3707, + "step": 120813 + }, + { + "epoch": 1.45, + "grad_norm": 2.4758620786345795, + "learning_rate": 3.676079985508111e-06, + "loss": 1.2949, + "step": 120816 + }, + { + "epoch": 1.45, + "grad_norm": 10.57939517474191, + "learning_rate": 3.6756274636361567e-06, + "loss": 0.7284, + "step": 120819 + }, + { + "epoch": 1.45, + "grad_norm": 19.27508979815488, + "learning_rate": 3.675174963347008e-06, + "loss": 1.1957, + "step": 120822 + }, + { + "epoch": 1.45, + "grad_norm": 13.914890855781424, + "learning_rate": 3.6747224846422093e-06, + "loss": 1.2553, + "step": 120825 + }, + { + "epoch": 1.45, + "grad_norm": 48.68488584309874, + "learning_rate": 3.674270027523308e-06, + "loss": 1.0529, + "step": 120828 + }, + { + "epoch": 1.45, + "grad_norm": 10.476508263046954, + "learning_rate": 3.673817591991844e-06, + "loss": 1.0818, + "step": 120831 + }, + { + "epoch": 1.45, + "grad_norm": 10.06660759689625, + "learning_rate": 3.6733651780493583e-06, + "loss": 1.2062, + "step": 120834 + }, + { + "epoch": 1.45, + "grad_norm": 7.381323322730597, + "learning_rate": 3.6729127856973988e-06, + "loss": 1.2283, + "step": 120837 + }, + { + "epoch": 1.45, + "grad_norm": 3.39562919614458, + "learning_rate": 3.67246041493751e-06, + "loss": 1.4004, + "step": 120840 + }, + { + "epoch": 1.45, + "grad_norm": 4.364689826758445, + "learning_rate": 3.6720080657712363e-06, + "loss": 1.2832, + "step": 120843 + }, + { + "epoch": 1.45, + "grad_norm": 7.959891050054273, + "learning_rate": 3.6715557382001178e-06, + "loss": 1.2799, + "step": 120846 + }, + { + "epoch": 1.45, + "grad_norm": 21.838978233376007, + "learning_rate": 3.6711034322256988e-06, + "loss": 1.1488, + "step": 120849 + }, + { + "epoch": 1.45, + "grad_norm": 10.310647285204665, + "learning_rate": 3.6706511478495277e-06, + "loss": 0.8875, + "step": 120852 + }, + { + "epoch": 1.45, + "grad_norm": 9.874024432996114, + "learning_rate": 3.670198885073142e-06, + "loss": 1.3154, + "step": 120855 + }, + { + "epoch": 1.45, + "grad_norm": 53.02435486859742, + "learning_rate": 3.66974664389809e-06, + "loss": 0.9614, + "step": 120858 + }, + { + "epoch": 1.45, + "grad_norm": 11.893011152441925, + "learning_rate": 3.6692944243259098e-06, + "loss": 1.2819, + "step": 120861 + }, + { + "epoch": 1.45, + "grad_norm": 7.104829438021626, + "learning_rate": 3.6688422263581467e-06, + "loss": 0.8116, + "step": 120864 + }, + { + "epoch": 1.45, + "grad_norm": 15.617495280642107, + "learning_rate": 3.6683900499963455e-06, + "loss": 1.4505, + "step": 120867 + }, + { + "epoch": 1.45, + "grad_norm": 8.29560163789115, + "learning_rate": 3.6679378952420507e-06, + "loss": 1.2151, + "step": 120870 + }, + { + "epoch": 1.45, + "grad_norm": 17.622756752886957, + "learning_rate": 3.6674857620968043e-06, + "loss": 1.3696, + "step": 120873 + }, + { + "epoch": 1.45, + "grad_norm": 5.144869461329633, + "learning_rate": 3.667033650562144e-06, + "loss": 1.1696, + "step": 120876 + }, + { + "epoch": 1.45, + "grad_norm": 6.698292914613593, + "learning_rate": 3.666581560639616e-06, + "loss": 1.0391, + "step": 120879 + }, + { + "epoch": 1.45, + "grad_norm": 25.2854775646109, + "learning_rate": 3.6661294923307657e-06, + "loss": 1.5537, + "step": 120882 + }, + { + "epoch": 1.45, + "grad_norm": 7.470055444034856, + "learning_rate": 3.6656774456371358e-06, + "loss": 1.0266, + "step": 120885 + }, + { + "epoch": 1.45, + "grad_norm": 9.74859283186665, + "learning_rate": 3.6652254205602644e-06, + "loss": 1.0791, + "step": 120888 + }, + { + "epoch": 1.45, + "grad_norm": 6.699912238926274, + "learning_rate": 3.664773417101697e-06, + "loss": 0.9789, + "step": 120891 + }, + { + "epoch": 1.45, + "grad_norm": 27.3935546945619, + "learning_rate": 3.6643214352629785e-06, + "loss": 0.8688, + "step": 120894 + }, + { + "epoch": 1.45, + "grad_norm": 4.90166284552586, + "learning_rate": 3.6638694750456493e-06, + "loss": 1.1137, + "step": 120897 + }, + { + "epoch": 1.45, + "grad_norm": 6.343107721318979, + "learning_rate": 3.663417536451248e-06, + "loss": 1.0429, + "step": 120900 + }, + { + "epoch": 1.45, + "grad_norm": 7.514779750215319, + "learning_rate": 3.66296561948132e-06, + "loss": 1.2653, + "step": 120903 + }, + { + "epoch": 1.45, + "grad_norm": 17.93649742795953, + "learning_rate": 3.6625137241374085e-06, + "loss": 0.9562, + "step": 120906 + }, + { + "epoch": 1.45, + "grad_norm": 9.990020624119099, + "learning_rate": 3.662061850421055e-06, + "loss": 1.1457, + "step": 120909 + }, + { + "epoch": 1.45, + "grad_norm": 26.57921874464359, + "learning_rate": 3.6616099983338037e-06, + "loss": 1.1744, + "step": 120912 + }, + { + "epoch": 1.45, + "grad_norm": 5.766199750055062, + "learning_rate": 3.6611581678771913e-06, + "loss": 1.0048, + "step": 120915 + }, + { + "epoch": 1.45, + "grad_norm": 6.901227254897602, + "learning_rate": 3.660706359052767e-06, + "loss": 1.2142, + "step": 120918 + }, + { + "epoch": 1.45, + "grad_norm": 9.441260826414755, + "learning_rate": 3.6602545718620652e-06, + "loss": 0.8779, + "step": 120921 + }, + { + "epoch": 1.45, + "grad_norm": 3.5872068572773514, + "learning_rate": 3.659802806306634e-06, + "loss": 0.9985, + "step": 120924 + }, + { + "epoch": 1.45, + "grad_norm": 6.47753366496177, + "learning_rate": 3.659351062388008e-06, + "loss": 1.3224, + "step": 120927 + }, + { + "epoch": 1.45, + "grad_norm": 2.194137261181858, + "learning_rate": 3.6588993401077332e-06, + "loss": 1.465, + "step": 120930 + }, + { + "epoch": 1.45, + "grad_norm": 11.96451837550154, + "learning_rate": 3.658447639467352e-06, + "loss": 1.362, + "step": 120933 + }, + { + "epoch": 1.45, + "grad_norm": 14.76435967055211, + "learning_rate": 3.657995960468407e-06, + "loss": 0.8488, + "step": 120936 + }, + { + "epoch": 1.45, + "grad_norm": 6.866080744209091, + "learning_rate": 3.657544303112438e-06, + "loss": 1.0549, + "step": 120939 + }, + { + "epoch": 1.45, + "grad_norm": 4.405843837641529, + "learning_rate": 3.6570926674009823e-06, + "loss": 1.3559, + "step": 120942 + }, + { + "epoch": 1.45, + "grad_norm": 3.959660152396975, + "learning_rate": 3.6566410533355835e-06, + "loss": 0.9472, + "step": 120945 + }, + { + "epoch": 1.45, + "grad_norm": 21.5130612780739, + "learning_rate": 3.656189460917785e-06, + "loss": 1.1845, + "step": 120948 + }, + { + "epoch": 1.45, + "grad_norm": 8.779514223789118, + "learning_rate": 3.6557378901491303e-06, + "loss": 1.501, + "step": 120951 + }, + { + "epoch": 1.45, + "grad_norm": 7.680041004230299, + "learning_rate": 3.6552863410311533e-06, + "loss": 0.8819, + "step": 120954 + }, + { + "epoch": 1.45, + "grad_norm": 3.8211199827958895, + "learning_rate": 3.654834813565399e-06, + "loss": 1.2052, + "step": 120957 + }, + { + "epoch": 1.45, + "grad_norm": 8.61933611758562, + "learning_rate": 3.6543833077534106e-06, + "loss": 1.2398, + "step": 120960 + }, + { + "epoch": 1.45, + "grad_norm": 10.78302622422599, + "learning_rate": 3.6539318235967235e-06, + "loss": 1.0342, + "step": 120963 + }, + { + "epoch": 1.45, + "grad_norm": 5.957931112840804, + "learning_rate": 3.653480361096885e-06, + "loss": 1.003, + "step": 120966 + }, + { + "epoch": 1.45, + "grad_norm": 12.900190991718052, + "learning_rate": 3.653028920255428e-06, + "loss": 1.4271, + "step": 120969 + }, + { + "epoch": 1.45, + "grad_norm": 8.510951298933236, + "learning_rate": 3.6525775010738973e-06, + "loss": 1.165, + "step": 120972 + }, + { + "epoch": 1.45, + "grad_norm": 6.628382780203736, + "learning_rate": 3.6521261035538326e-06, + "loss": 0.7888, + "step": 120975 + }, + { + "epoch": 1.45, + "grad_norm": 35.05184201625047, + "learning_rate": 3.651674727696779e-06, + "loss": 0.9385, + "step": 120978 + }, + { + "epoch": 1.45, + "grad_norm": 7.403695136638743, + "learning_rate": 3.6512233735042733e-06, + "loss": 1.1613, + "step": 120981 + }, + { + "epoch": 1.45, + "grad_norm": 34.99950647911799, + "learning_rate": 3.6507720409778513e-06, + "loss": 1.0057, + "step": 120984 + }, + { + "epoch": 1.45, + "grad_norm": 10.6837334363335, + "learning_rate": 3.650320730119058e-06, + "loss": 1.1944, + "step": 120987 + }, + { + "epoch": 1.45, + "grad_norm": 8.006408367633332, + "learning_rate": 3.6498694409294333e-06, + "loss": 1.3478, + "step": 120990 + }, + { + "epoch": 1.45, + "grad_norm": 15.982228451611807, + "learning_rate": 3.6494181734105195e-06, + "loss": 1.0849, + "step": 120993 + }, + { + "epoch": 1.45, + "grad_norm": 4.566189969486304, + "learning_rate": 3.6489669275638505e-06, + "loss": 1.139, + "step": 120996 + }, + { + "epoch": 1.45, + "grad_norm": 6.104281173254457, + "learning_rate": 3.6485157033909703e-06, + "loss": 1.2469, + "step": 120999 + }, + { + "epoch": 1.46, + "grad_norm": 26.370129767282492, + "learning_rate": 3.6480645008934214e-06, + "loss": 1.2565, + "step": 121002 + }, + { + "epoch": 1.46, + "grad_norm": 4.898983039445465, + "learning_rate": 3.6476133200727394e-06, + "loss": 1.1737, + "step": 121005 + }, + { + "epoch": 1.46, + "grad_norm": 4.105057520404054, + "learning_rate": 3.647162160930463e-06, + "loss": 1.2399, + "step": 121008 + }, + { + "epoch": 1.46, + "grad_norm": 9.54371406230263, + "learning_rate": 3.6467110234681335e-06, + "loss": 0.853, + "step": 121011 + }, + { + "epoch": 1.46, + "grad_norm": 9.963956961075795, + "learning_rate": 3.6462599076872907e-06, + "loss": 1.0719, + "step": 121014 + }, + { + "epoch": 1.46, + "grad_norm": 6.703387177469119, + "learning_rate": 3.6458088135894744e-06, + "loss": 1.2725, + "step": 121017 + }, + { + "epoch": 1.46, + "grad_norm": 7.501639468370836, + "learning_rate": 3.6453577411762262e-06, + "loss": 0.9766, + "step": 121020 + }, + { + "epoch": 1.46, + "grad_norm": 8.783374723288908, + "learning_rate": 3.6449066904490837e-06, + "loss": 1.5105, + "step": 121023 + }, + { + "epoch": 1.46, + "grad_norm": 9.632073215695632, + "learning_rate": 3.6444556614095828e-06, + "loss": 1.2067, + "step": 121026 + }, + { + "epoch": 1.46, + "grad_norm": 9.025931742343579, + "learning_rate": 3.6440046540592655e-06, + "loss": 1.1636, + "step": 121029 + }, + { + "epoch": 1.46, + "grad_norm": 35.611431508620306, + "learning_rate": 3.643553668399674e-06, + "loss": 1.0847, + "step": 121032 + }, + { + "epoch": 1.46, + "grad_norm": 12.435799557181337, + "learning_rate": 3.643102704432341e-06, + "loss": 1.2825, + "step": 121035 + }, + { + "epoch": 1.46, + "grad_norm": 13.67239683453076, + "learning_rate": 3.6426517621588077e-06, + "loss": 1.7199, + "step": 121038 + }, + { + "epoch": 1.46, + "grad_norm": 52.59413442386584, + "learning_rate": 3.6422008415806153e-06, + "loss": 1.3178, + "step": 121041 + }, + { + "epoch": 1.46, + "grad_norm": 6.9350868263516245, + "learning_rate": 3.641749942699304e-06, + "loss": 1.1377, + "step": 121044 + }, + { + "epoch": 1.46, + "grad_norm": 11.474309802932877, + "learning_rate": 3.6412990655164102e-06, + "loss": 0.9858, + "step": 121047 + }, + { + "epoch": 1.46, + "grad_norm": 11.215193462078165, + "learning_rate": 3.6408482100334685e-06, + "loss": 0.9556, + "step": 121050 + }, + { + "epoch": 1.46, + "grad_norm": 21.97283633752563, + "learning_rate": 3.6403973762520217e-06, + "loss": 1.2291, + "step": 121053 + }, + { + "epoch": 1.46, + "grad_norm": 28.02292898371965, + "learning_rate": 3.6399465641736075e-06, + "loss": 1.1545, + "step": 121056 + }, + { + "epoch": 1.46, + "grad_norm": 14.152131779170524, + "learning_rate": 3.6394957737997692e-06, + "loss": 0.77, + "step": 121059 + }, + { + "epoch": 1.46, + "grad_norm": 7.417611030202076, + "learning_rate": 3.639045005132037e-06, + "loss": 1.0335, + "step": 121062 + }, + { + "epoch": 1.46, + "grad_norm": 29.80630707213254, + "learning_rate": 3.6385942581719556e-06, + "loss": 1.0771, + "step": 121065 + }, + { + "epoch": 1.46, + "grad_norm": 5.215125156052596, + "learning_rate": 3.638143532921058e-06, + "loss": 1.2581, + "step": 121068 + }, + { + "epoch": 1.46, + "grad_norm": 6.886208985651237, + "learning_rate": 3.6376928293808854e-06, + "loss": 1.2128, + "step": 121071 + }, + { + "epoch": 1.46, + "grad_norm": 7.092883580874709, + "learning_rate": 3.6372421475529785e-06, + "loss": 1.233, + "step": 121074 + }, + { + "epoch": 1.46, + "grad_norm": 14.738657389455541, + "learning_rate": 3.636791487438868e-06, + "loss": 1.0857, + "step": 121077 + }, + { + "epoch": 1.46, + "grad_norm": 6.025008816134406, + "learning_rate": 3.6363408490400965e-06, + "loss": 1.0801, + "step": 121080 + }, + { + "epoch": 1.46, + "grad_norm": 4.877046914737229, + "learning_rate": 3.6358902323582025e-06, + "loss": 0.9751, + "step": 121083 + }, + { + "epoch": 1.46, + "grad_norm": 29.25274288414278, + "learning_rate": 3.6354396373947255e-06, + "loss": 0.9661, + "step": 121086 + }, + { + "epoch": 1.46, + "grad_norm": 5.3118140499259905, + "learning_rate": 3.6349890641511997e-06, + "loss": 0.8828, + "step": 121089 + }, + { + "epoch": 1.46, + "grad_norm": 4.981226599854854, + "learning_rate": 3.634538512629161e-06, + "loss": 1.0675, + "step": 121092 + }, + { + "epoch": 1.46, + "grad_norm": 10.060608501427671, + "learning_rate": 3.63408798283015e-06, + "loss": 1.1241, + "step": 121095 + }, + { + "epoch": 1.46, + "grad_norm": 12.227929684942215, + "learning_rate": 3.6336374747557033e-06, + "loss": 0.9451, + "step": 121098 + }, + { + "epoch": 1.46, + "grad_norm": 6.422731101718373, + "learning_rate": 3.6331869884073623e-06, + "loss": 1.0522, + "step": 121101 + }, + { + "epoch": 1.46, + "grad_norm": 9.572520816749977, + "learning_rate": 3.6327365237866573e-06, + "loss": 0.9874, + "step": 121104 + }, + { + "epoch": 1.46, + "grad_norm": 3.485846428784356, + "learning_rate": 3.632286080895132e-06, + "loss": 1.0496, + "step": 121107 + }, + { + "epoch": 1.46, + "grad_norm": 12.715783092813165, + "learning_rate": 3.6318356597343174e-06, + "loss": 1.2417, + "step": 121110 + }, + { + "epoch": 1.46, + "grad_norm": 7.375877434949946, + "learning_rate": 3.6313852603057577e-06, + "loss": 1.2906, + "step": 121113 + }, + { + "epoch": 1.46, + "grad_norm": 18.666594137825754, + "learning_rate": 3.6309348826109826e-06, + "loss": 0.9395, + "step": 121116 + }, + { + "epoch": 1.46, + "grad_norm": 34.59721817632738, + "learning_rate": 3.630484526651532e-06, + "loss": 1.4848, + "step": 121119 + }, + { + "epoch": 1.46, + "grad_norm": 3.8145423531049505, + "learning_rate": 3.6300341924289438e-06, + "loss": 0.9807, + "step": 121122 + }, + { + "epoch": 1.46, + "grad_norm": 2.7787127006456105, + "learning_rate": 3.629583879944756e-06, + "loss": 0.8652, + "step": 121125 + }, + { + "epoch": 1.46, + "grad_norm": 14.78373803124408, + "learning_rate": 3.6291335892005053e-06, + "loss": 1.2065, + "step": 121128 + }, + { + "epoch": 1.46, + "grad_norm": 18.9974039320338, + "learning_rate": 3.628683320197727e-06, + "loss": 1.09, + "step": 121131 + }, + { + "epoch": 1.46, + "grad_norm": 10.865278680530652, + "learning_rate": 3.6282330729379543e-06, + "loss": 1.2319, + "step": 121134 + }, + { + "epoch": 1.46, + "grad_norm": 6.704893014710596, + "learning_rate": 3.6277828474227283e-06, + "loss": 0.9774, + "step": 121137 + }, + { + "epoch": 1.46, + "grad_norm": 7.84255793112065, + "learning_rate": 3.627332643653586e-06, + "loss": 1.0079, + "step": 121140 + }, + { + "epoch": 1.46, + "grad_norm": 3.6070364620604285, + "learning_rate": 3.6268824616320595e-06, + "loss": 1.4409, + "step": 121143 + }, + { + "epoch": 1.46, + "grad_norm": 4.280714202788768, + "learning_rate": 3.626432301359687e-06, + "loss": 0.953, + "step": 121146 + }, + { + "epoch": 1.46, + "grad_norm": 15.027964325960696, + "learning_rate": 3.6259821628380086e-06, + "loss": 1.1713, + "step": 121149 + }, + { + "epoch": 1.46, + "grad_norm": 8.196020744622095, + "learning_rate": 3.625532046068555e-06, + "loss": 0.9732, + "step": 121152 + }, + { + "epoch": 1.46, + "grad_norm": 61.32762369816195, + "learning_rate": 3.625081951052867e-06, + "loss": 1.1909, + "step": 121155 + }, + { + "epoch": 1.46, + "grad_norm": 5.879713983381353, + "learning_rate": 3.6246318777924738e-06, + "loss": 0.9173, + "step": 121158 + }, + { + "epoch": 1.46, + "grad_norm": 9.277628357536997, + "learning_rate": 3.6241818262889173e-06, + "loss": 1.0843, + "step": 121161 + }, + { + "epoch": 1.46, + "grad_norm": 3.655644395612971, + "learning_rate": 3.62373179654373e-06, + "loss": 1.447, + "step": 121164 + }, + { + "epoch": 1.46, + "grad_norm": 7.222902041822013, + "learning_rate": 3.6232817885584536e-06, + "loss": 1.4653, + "step": 121167 + }, + { + "epoch": 1.46, + "grad_norm": 17.665594232414033, + "learning_rate": 3.6228318023346164e-06, + "loss": 0.9592, + "step": 121170 + }, + { + "epoch": 1.46, + "grad_norm": 7.252498056688002, + "learning_rate": 3.6223818378737606e-06, + "loss": 0.9205, + "step": 121173 + }, + { + "epoch": 1.46, + "grad_norm": 8.242444469774764, + "learning_rate": 3.621931895177414e-06, + "loss": 0.9711, + "step": 121176 + }, + { + "epoch": 1.46, + "grad_norm": 8.694729121844388, + "learning_rate": 3.6214819742471176e-06, + "loss": 1.0859, + "step": 121179 + }, + { + "epoch": 1.46, + "grad_norm": 14.523616095741684, + "learning_rate": 3.6210320750844087e-06, + "loss": 1.0957, + "step": 121182 + }, + { + "epoch": 1.46, + "grad_norm": 18.37500935692044, + "learning_rate": 3.620582197690816e-06, + "loss": 1.1309, + "step": 121185 + }, + { + "epoch": 1.46, + "grad_norm": 12.979463299959596, + "learning_rate": 3.620132342067879e-06, + "loss": 1.3319, + "step": 121188 + }, + { + "epoch": 1.46, + "grad_norm": 6.531662909339777, + "learning_rate": 3.619682508217135e-06, + "loss": 0.941, + "step": 121191 + }, + { + "epoch": 1.46, + "grad_norm": 6.484481689129647, + "learning_rate": 3.6192326961401137e-06, + "loss": 1.3174, + "step": 121194 + }, + { + "epoch": 1.46, + "grad_norm": 8.274377512855349, + "learning_rate": 3.618782905838356e-06, + "loss": 0.9272, + "step": 121197 + }, + { + "epoch": 1.46, + "grad_norm": 24.42417699151833, + "learning_rate": 3.61833313731339e-06, + "loss": 1.1382, + "step": 121200 + }, + { + "epoch": 1.46, + "grad_norm": 13.762308249566289, + "learning_rate": 3.617883390566754e-06, + "loss": 1.1826, + "step": 121203 + }, + { + "epoch": 1.46, + "grad_norm": 4.484887797072189, + "learning_rate": 3.6174336655999843e-06, + "loss": 1.0974, + "step": 121206 + }, + { + "epoch": 1.46, + "grad_norm": 25.545995243765734, + "learning_rate": 3.6169839624146174e-06, + "loss": 1.3139, + "step": 121209 + }, + { + "epoch": 1.46, + "grad_norm": 38.6008430721989, + "learning_rate": 3.6165342810121818e-06, + "loss": 1.3508, + "step": 121212 + }, + { + "epoch": 1.46, + "grad_norm": 14.568629196081284, + "learning_rate": 3.6160846213942193e-06, + "loss": 1.391, + "step": 121215 + }, + { + "epoch": 1.46, + "grad_norm": 24.53876079095182, + "learning_rate": 3.6156349835622563e-06, + "loss": 1.0359, + "step": 121218 + }, + { + "epoch": 1.46, + "grad_norm": 5.16866126961978, + "learning_rate": 3.615185367517835e-06, + "loss": 1.2141, + "step": 121221 + }, + { + "epoch": 1.46, + "grad_norm": 1.6711824969351228, + "learning_rate": 3.6147357732624823e-06, + "loss": 1.4936, + "step": 121224 + }, + { + "epoch": 1.46, + "grad_norm": 7.777776103370413, + "learning_rate": 3.6142862007977375e-06, + "loss": 1.3125, + "step": 121227 + }, + { + "epoch": 1.46, + "grad_norm": 8.96622842480748, + "learning_rate": 3.6138366501251332e-06, + "loss": 1.1644, + "step": 121230 + }, + { + "epoch": 1.46, + "grad_norm": 5.74802513588334, + "learning_rate": 3.613387121246207e-06, + "loss": 0.9679, + "step": 121233 + }, + { + "epoch": 1.46, + "grad_norm": 6.57027627436808, + "learning_rate": 3.612937614162487e-06, + "loss": 1.2903, + "step": 121236 + }, + { + "epoch": 1.46, + "grad_norm": 11.529364933502354, + "learning_rate": 3.6124881288755143e-06, + "loss": 1.0115, + "step": 121239 + }, + { + "epoch": 1.46, + "grad_norm": 11.204625090335544, + "learning_rate": 3.6120386653868157e-06, + "loss": 1.3614, + "step": 121242 + }, + { + "epoch": 1.46, + "grad_norm": 9.497490808049118, + "learning_rate": 3.611589223697928e-06, + "loss": 1.0963, + "step": 121245 + }, + { + "epoch": 1.46, + "grad_norm": 7.158365193420945, + "learning_rate": 3.6111398038103874e-06, + "loss": 1.166, + "step": 121248 + }, + { + "epoch": 1.46, + "grad_norm": 7.815843162014584, + "learning_rate": 3.6106904057257233e-06, + "loss": 1.2457, + "step": 121251 + }, + { + "epoch": 1.46, + "grad_norm": 4.571547062050962, + "learning_rate": 3.6102410294454705e-06, + "loss": 1.3663, + "step": 121254 + }, + { + "epoch": 1.46, + "grad_norm": 5.041718994657889, + "learning_rate": 3.609791674971167e-06, + "loss": 0.9732, + "step": 121257 + }, + { + "epoch": 1.46, + "grad_norm": 9.192071087868348, + "learning_rate": 3.6093423423043406e-06, + "loss": 1.1973, + "step": 121260 + }, + { + "epoch": 1.46, + "grad_norm": 13.916706592853227, + "learning_rate": 3.6088930314465288e-06, + "loss": 1.0829, + "step": 121263 + }, + { + "epoch": 1.46, + "grad_norm": 15.08412854744935, + "learning_rate": 3.60844374239926e-06, + "loss": 1.258, + "step": 121266 + }, + { + "epoch": 1.46, + "grad_norm": 4.2789768459854844, + "learning_rate": 3.6079944751640715e-06, + "loss": 1.3643, + "step": 121269 + }, + { + "epoch": 1.46, + "grad_norm": 4.920115437283999, + "learning_rate": 3.607545229742494e-06, + "loss": 1.2517, + "step": 121272 + }, + { + "epoch": 1.46, + "grad_norm": 6.6025385987611545, + "learning_rate": 3.607096006136066e-06, + "loss": 1.1513, + "step": 121275 + }, + { + "epoch": 1.46, + "grad_norm": 9.867544905328442, + "learning_rate": 3.6066468043463144e-06, + "loss": 1.1769, + "step": 121278 + }, + { + "epoch": 1.46, + "grad_norm": 11.61234934970372, + "learning_rate": 3.6061976243747767e-06, + "loss": 1.1668, + "step": 121281 + }, + { + "epoch": 1.46, + "grad_norm": 7.672161145389659, + "learning_rate": 3.605748466222979e-06, + "loss": 1.1305, + "step": 121284 + }, + { + "epoch": 1.46, + "grad_norm": 13.724382948669932, + "learning_rate": 3.6052993298924598e-06, + "loss": 1.1603, + "step": 121287 + }, + { + "epoch": 1.46, + "grad_norm": 4.972598839429729, + "learning_rate": 3.604850215384754e-06, + "loss": 1.2779, + "step": 121290 + }, + { + "epoch": 1.46, + "grad_norm": 17.657889062579006, + "learning_rate": 3.6044011227013875e-06, + "loss": 1.1091, + "step": 121293 + }, + { + "epoch": 1.46, + "grad_norm": 6.023554658972509, + "learning_rate": 3.6039520518438963e-06, + "loss": 0.8195, + "step": 121296 + }, + { + "epoch": 1.46, + "grad_norm": 5.094395417277791, + "learning_rate": 3.603503002813816e-06, + "loss": 1.1908, + "step": 121299 + }, + { + "epoch": 1.46, + "grad_norm": 5.564558396486108, + "learning_rate": 3.603053975612677e-06, + "loss": 0.9335, + "step": 121302 + }, + { + "epoch": 1.46, + "grad_norm": 24.49675379071158, + "learning_rate": 3.6026049702420063e-06, + "loss": 1.0464, + "step": 121305 + }, + { + "epoch": 1.46, + "grad_norm": 17.279070436029436, + "learning_rate": 3.602155986703341e-06, + "loss": 1.0304, + "step": 121308 + }, + { + "epoch": 1.46, + "grad_norm": 13.203506377401105, + "learning_rate": 3.601707024998211e-06, + "loss": 0.9954, + "step": 121311 + }, + { + "epoch": 1.46, + "grad_norm": 9.893702449088538, + "learning_rate": 3.601258085128152e-06, + "loss": 1.2665, + "step": 121314 + }, + { + "epoch": 1.46, + "grad_norm": 5.3326812991527595, + "learning_rate": 3.6008091670946977e-06, + "loss": 1.1617, + "step": 121317 + }, + { + "epoch": 1.46, + "grad_norm": 27.46615220843819, + "learning_rate": 3.6003602708993736e-06, + "loss": 1.2366, + "step": 121320 + }, + { + "epoch": 1.46, + "grad_norm": 11.98647025354905, + "learning_rate": 3.599911396543717e-06, + "loss": 1.5689, + "step": 121323 + }, + { + "epoch": 1.46, + "grad_norm": 2.61592492129528, + "learning_rate": 3.5994625440292553e-06, + "loss": 1.3583, + "step": 121326 + }, + { + "epoch": 1.46, + "grad_norm": 12.176002761427625, + "learning_rate": 3.5990137133575244e-06, + "loss": 1.1112, + "step": 121329 + }, + { + "epoch": 1.46, + "grad_norm": 45.91956527847537, + "learning_rate": 3.598564904530052e-06, + "loss": 1.223, + "step": 121332 + }, + { + "epoch": 1.46, + "grad_norm": 14.945745405770488, + "learning_rate": 3.5981161175483715e-06, + "loss": 1.1076, + "step": 121335 + }, + { + "epoch": 1.46, + "grad_norm": 8.230672088816544, + "learning_rate": 3.5976673524140136e-06, + "loss": 1.1399, + "step": 121338 + }, + { + "epoch": 1.46, + "grad_norm": 6.491904164033969, + "learning_rate": 3.5972186091285153e-06, + "loss": 1.213, + "step": 121341 + }, + { + "epoch": 1.46, + "grad_norm": 4.467271686268919, + "learning_rate": 3.596769887693404e-06, + "loss": 1.1241, + "step": 121344 + }, + { + "epoch": 1.46, + "grad_norm": 30.073417900834595, + "learning_rate": 3.5963211881102066e-06, + "loss": 1.0908, + "step": 121347 + }, + { + "epoch": 1.46, + "grad_norm": 4.395089842190455, + "learning_rate": 3.5958725103804594e-06, + "loss": 1.0667, + "step": 121350 + }, + { + "epoch": 1.46, + "grad_norm": 7.9742127618267205, + "learning_rate": 3.595423854505692e-06, + "loss": 1.2317, + "step": 121353 + }, + { + "epoch": 1.46, + "grad_norm": 5.564230283390554, + "learning_rate": 3.594975220487439e-06, + "loss": 1.0109, + "step": 121356 + }, + { + "epoch": 1.46, + "grad_norm": 8.25388016071437, + "learning_rate": 3.5945266083272258e-06, + "loss": 0.6939, + "step": 121359 + }, + { + "epoch": 1.46, + "grad_norm": 4.339293085470899, + "learning_rate": 3.5940780180265855e-06, + "loss": 1.1072, + "step": 121362 + }, + { + "epoch": 1.46, + "grad_norm": 8.342435403858055, + "learning_rate": 3.593629449587053e-06, + "loss": 0.9604, + "step": 121365 + }, + { + "epoch": 1.46, + "grad_norm": 4.789809864046665, + "learning_rate": 3.5931809030101517e-06, + "loss": 1.0015, + "step": 121368 + }, + { + "epoch": 1.46, + "grad_norm": 2.9810534564237874, + "learning_rate": 3.59273237829742e-06, + "loss": 1.4316, + "step": 121371 + }, + { + "epoch": 1.46, + "grad_norm": 16.15730868309153, + "learning_rate": 3.5922838754503818e-06, + "loss": 0.9374, + "step": 121374 + }, + { + "epoch": 1.46, + "grad_norm": 20.914862686098648, + "learning_rate": 3.5918353944705708e-06, + "loss": 1.0271, + "step": 121377 + }, + { + "epoch": 1.46, + "grad_norm": 5.922101755654181, + "learning_rate": 3.5913869353595165e-06, + "loss": 1.1329, + "step": 121380 + }, + { + "epoch": 1.46, + "grad_norm": 8.716889867184607, + "learning_rate": 3.5909384981187543e-06, + "loss": 1.6535, + "step": 121383 + }, + { + "epoch": 1.46, + "grad_norm": 6.439672384336291, + "learning_rate": 3.59049008274981e-06, + "loss": 1.057, + "step": 121386 + }, + { + "epoch": 1.46, + "grad_norm": 6.0915585262788685, + "learning_rate": 3.5900416892542122e-06, + "loss": 1.3596, + "step": 121389 + }, + { + "epoch": 1.46, + "grad_norm": 12.092415522234294, + "learning_rate": 3.589593317633492e-06, + "loss": 1.3728, + "step": 121392 + }, + { + "epoch": 1.46, + "grad_norm": 11.152716179823651, + "learning_rate": 3.589144967889181e-06, + "loss": 1.3106, + "step": 121395 + }, + { + "epoch": 1.46, + "grad_norm": 8.733992419291507, + "learning_rate": 3.588696640022813e-06, + "loss": 1.3624, + "step": 121398 + }, + { + "epoch": 1.46, + "grad_norm": 11.466549020548538, + "learning_rate": 3.58824833403591e-06, + "loss": 0.9751, + "step": 121401 + }, + { + "epoch": 1.46, + "grad_norm": 14.850855021875104, + "learning_rate": 3.587800049930007e-06, + "loss": 1.0448, + "step": 121404 + }, + { + "epoch": 1.46, + "grad_norm": 10.521799343055097, + "learning_rate": 3.5873517877066346e-06, + "loss": 1.6131, + "step": 121407 + }, + { + "epoch": 1.46, + "grad_norm": 9.602421545403878, + "learning_rate": 3.586903547367322e-06, + "loss": 1.2431, + "step": 121410 + }, + { + "epoch": 1.46, + "grad_norm": 5.467832676729868, + "learning_rate": 3.586455328913593e-06, + "loss": 1.0326, + "step": 121413 + }, + { + "epoch": 1.46, + "grad_norm": 6.912642633673075, + "learning_rate": 3.5860071323469825e-06, + "loss": 1.2361, + "step": 121416 + }, + { + "epoch": 1.46, + "grad_norm": 7.832421138787823, + "learning_rate": 3.5855589576690198e-06, + "loss": 1.0821, + "step": 121419 + }, + { + "epoch": 1.46, + "grad_norm": 2.970116066014408, + "learning_rate": 3.585110804881233e-06, + "loss": 1.4729, + "step": 121422 + }, + { + "epoch": 1.46, + "grad_norm": 8.848220610328239, + "learning_rate": 3.5846626739851563e-06, + "loss": 1.3438, + "step": 121425 + }, + { + "epoch": 1.46, + "grad_norm": 4.292336545043462, + "learning_rate": 3.5842145649823158e-06, + "loss": 1.6532, + "step": 121428 + }, + { + "epoch": 1.46, + "grad_norm": 3.5651306566234737, + "learning_rate": 3.5837664778742356e-06, + "loss": 1.2452, + "step": 121431 + }, + { + "epoch": 1.46, + "grad_norm": 4.050430208520967, + "learning_rate": 3.58331841266245e-06, + "loss": 1.2234, + "step": 121434 + }, + { + "epoch": 1.46, + "grad_norm": 8.474923887461179, + "learning_rate": 3.58287036934849e-06, + "loss": 0.9276, + "step": 121437 + }, + { + "epoch": 1.46, + "grad_norm": 9.105417428499347, + "learning_rate": 3.582422347933878e-06, + "loss": 1.3142, + "step": 121440 + }, + { + "epoch": 1.46, + "grad_norm": 4.38501394375449, + "learning_rate": 3.5819743484201485e-06, + "loss": 1.2563, + "step": 121443 + }, + { + "epoch": 1.46, + "grad_norm": 60.54891670862073, + "learning_rate": 3.5815263708088276e-06, + "loss": 1.0558, + "step": 121446 + }, + { + "epoch": 1.46, + "grad_norm": 5.706289633614381, + "learning_rate": 3.58107841510145e-06, + "loss": 1.1057, + "step": 121449 + }, + { + "epoch": 1.46, + "grad_norm": 7.0578187555006755, + "learning_rate": 3.5806304812995384e-06, + "loss": 1.2141, + "step": 121452 + }, + { + "epoch": 1.46, + "grad_norm": 14.983626002081069, + "learning_rate": 3.58018256940462e-06, + "loss": 1.4181, + "step": 121455 + }, + { + "epoch": 1.46, + "grad_norm": 12.634171334969942, + "learning_rate": 3.5797346794182262e-06, + "loss": 1.0843, + "step": 121458 + }, + { + "epoch": 1.46, + "grad_norm": 12.903398254256992, + "learning_rate": 3.5792868113418844e-06, + "loss": 1.2446, + "step": 121461 + }, + { + "epoch": 1.46, + "grad_norm": 7.819786748379684, + "learning_rate": 3.5788389651771283e-06, + "loss": 1.1321, + "step": 121464 + }, + { + "epoch": 1.46, + "grad_norm": 6.427591202056743, + "learning_rate": 3.578391140925479e-06, + "loss": 1.097, + "step": 121467 + }, + { + "epoch": 1.46, + "grad_norm": 7.528101395267629, + "learning_rate": 3.5779433385884697e-06, + "loss": 1.0632, + "step": 121470 + }, + { + "epoch": 1.46, + "grad_norm": 5.376547122727342, + "learning_rate": 3.5774955581676243e-06, + "loss": 1.1065, + "step": 121473 + }, + { + "epoch": 1.46, + "grad_norm": 15.183861307010892, + "learning_rate": 3.5770477996644724e-06, + "loss": 0.9944, + "step": 121476 + }, + { + "epoch": 1.46, + "grad_norm": 21.2469745788555, + "learning_rate": 3.5766000630805466e-06, + "loss": 1.2699, + "step": 121479 + }, + { + "epoch": 1.46, + "grad_norm": 10.2995122315984, + "learning_rate": 3.5761523484173675e-06, + "loss": 1.1681, + "step": 121482 + }, + { + "epoch": 1.46, + "grad_norm": 10.091433092371744, + "learning_rate": 3.5757046556764663e-06, + "loss": 1.2071, + "step": 121485 + }, + { + "epoch": 1.46, + "grad_norm": 19.668654592341742, + "learning_rate": 3.5752569848593722e-06, + "loss": 1.2177, + "step": 121488 + }, + { + "epoch": 1.46, + "grad_norm": 3.7560468066245836, + "learning_rate": 3.5748093359676137e-06, + "loss": 1.2525, + "step": 121491 + }, + { + "epoch": 1.46, + "grad_norm": 16.612300653635234, + "learning_rate": 3.5743617090027185e-06, + "loss": 1.1021, + "step": 121494 + }, + { + "epoch": 1.46, + "grad_norm": 4.839591131400769, + "learning_rate": 3.5739141039662073e-06, + "loss": 1.2782, + "step": 121497 + }, + { + "epoch": 1.46, + "grad_norm": 4.44998099197096, + "learning_rate": 3.573466520859613e-06, + "loss": 1.1154, + "step": 121500 + }, + { + "epoch": 1.46, + "grad_norm": 6.727569012592574, + "learning_rate": 3.5730189596844634e-06, + "loss": 1.2428, + "step": 121503 + }, + { + "epoch": 1.46, + "grad_norm": 6.752991422892082, + "learning_rate": 3.5725714204422867e-06, + "loss": 1.0798, + "step": 121506 + }, + { + "epoch": 1.46, + "grad_norm": 7.987463876142386, + "learning_rate": 3.5721239031346067e-06, + "loss": 1.0688, + "step": 121509 + }, + { + "epoch": 1.46, + "grad_norm": 3.5460526051327728, + "learning_rate": 3.5716764077629528e-06, + "loss": 1.0164, + "step": 121512 + }, + { + "epoch": 1.46, + "grad_norm": 3.728613206536858, + "learning_rate": 3.571228934328854e-06, + "loss": 1.0672, + "step": 121515 + }, + { + "epoch": 1.46, + "grad_norm": 20.749271694831446, + "learning_rate": 3.570781482833836e-06, + "loss": 1.1951, + "step": 121518 + }, + { + "epoch": 1.46, + "grad_norm": 11.022597004387588, + "learning_rate": 3.570334053279422e-06, + "loss": 0.7932, + "step": 121521 + }, + { + "epoch": 1.46, + "grad_norm": 14.564351619983992, + "learning_rate": 3.569886645667141e-06, + "loss": 1.0773, + "step": 121524 + }, + { + "epoch": 1.46, + "grad_norm": 6.595774450113753, + "learning_rate": 3.5694392599985204e-06, + "loss": 1.239, + "step": 121527 + }, + { + "epoch": 1.46, + "grad_norm": 15.95544305919565, + "learning_rate": 3.5689918962750882e-06, + "loss": 1.1417, + "step": 121530 + }, + { + "epoch": 1.46, + "grad_norm": 6.183906498682293, + "learning_rate": 3.5685445544983733e-06, + "loss": 1.2958, + "step": 121533 + }, + { + "epoch": 1.46, + "grad_norm": 10.525695095601693, + "learning_rate": 3.5680972346698994e-06, + "loss": 1.1971, + "step": 121536 + }, + { + "epoch": 1.46, + "grad_norm": 4.057988099044232, + "learning_rate": 3.56764993679119e-06, + "loss": 0.9329, + "step": 121539 + }, + { + "epoch": 1.46, + "grad_norm": 8.772594799982377, + "learning_rate": 3.567202660863773e-06, + "loss": 0.7823, + "step": 121542 + }, + { + "epoch": 1.46, + "grad_norm": 11.006759616968795, + "learning_rate": 3.5667554068891806e-06, + "loss": 1.204, + "step": 121545 + }, + { + "epoch": 1.46, + "grad_norm": 7.901227635603813, + "learning_rate": 3.566308174868932e-06, + "loss": 1.1494, + "step": 121548 + }, + { + "epoch": 1.46, + "grad_norm": 6.785778768886159, + "learning_rate": 3.565860964804555e-06, + "loss": 1.0052, + "step": 121551 + }, + { + "epoch": 1.46, + "grad_norm": 9.256352960787567, + "learning_rate": 3.5654137766975784e-06, + "loss": 1.3024, + "step": 121554 + }, + { + "epoch": 1.46, + "grad_norm": 2.9146274772574947, + "learning_rate": 3.5649666105495283e-06, + "loss": 0.7931, + "step": 121557 + }, + { + "epoch": 1.46, + "grad_norm": 7.757534773002155, + "learning_rate": 3.564519466361931e-06, + "loss": 1.2701, + "step": 121560 + }, + { + "epoch": 1.46, + "grad_norm": 12.063419390993964, + "learning_rate": 3.5640723441363066e-06, + "loss": 1.0943, + "step": 121563 + }, + { + "epoch": 1.46, + "grad_norm": 7.708677695070617, + "learning_rate": 3.563625243874185e-06, + "loss": 1.1894, + "step": 121566 + }, + { + "epoch": 1.46, + "grad_norm": 3.1933704309812105, + "learning_rate": 3.5631781655770915e-06, + "loss": 1.1017, + "step": 121569 + }, + { + "epoch": 1.46, + "grad_norm": 12.39490754063702, + "learning_rate": 3.5627311092465568e-06, + "loss": 1.1054, + "step": 121572 + }, + { + "epoch": 1.46, + "grad_norm": 6.5033322456624765, + "learning_rate": 3.562284074884098e-06, + "loss": 1.0821, + "step": 121575 + }, + { + "epoch": 1.46, + "grad_norm": 7.321491746638986, + "learning_rate": 3.561837062491249e-06, + "loss": 1.4337, + "step": 121578 + }, + { + "epoch": 1.46, + "grad_norm": 9.64477299919753, + "learning_rate": 3.5613900720695273e-06, + "loss": 1.1667, + "step": 121581 + }, + { + "epoch": 1.46, + "grad_norm": 10.087550918517701, + "learning_rate": 3.560943103620462e-06, + "loss": 1.3502, + "step": 121584 + }, + { + "epoch": 1.46, + "grad_norm": 18.61674577191118, + "learning_rate": 3.560496157145582e-06, + "loss": 1.3779, + "step": 121587 + }, + { + "epoch": 1.46, + "grad_norm": 12.408029755740264, + "learning_rate": 3.5600492326464052e-06, + "loss": 1.0661, + "step": 121590 + }, + { + "epoch": 1.46, + "grad_norm": 11.630414995795567, + "learning_rate": 3.559602330124462e-06, + "loss": 1.0062, + "step": 121593 + }, + { + "epoch": 1.46, + "grad_norm": 22.525121121316722, + "learning_rate": 3.5591554495812753e-06, + "loss": 1.2913, + "step": 121596 + }, + { + "epoch": 1.46, + "grad_norm": 7.848135457661409, + "learning_rate": 3.5587085910183737e-06, + "loss": 1.3946, + "step": 121599 + }, + { + "epoch": 1.46, + "grad_norm": 7.324902478305503, + "learning_rate": 3.558261754437281e-06, + "loss": 1.1724, + "step": 121602 + }, + { + "epoch": 1.46, + "grad_norm": 8.690244643939254, + "learning_rate": 3.557814939839517e-06, + "loss": 1.1985, + "step": 121605 + }, + { + "epoch": 1.46, + "grad_norm": 13.562786405435165, + "learning_rate": 3.55736814722661e-06, + "loss": 1.0752, + "step": 121608 + }, + { + "epoch": 1.46, + "grad_norm": 9.030840101792755, + "learning_rate": 3.556921376600084e-06, + "loss": 1.48, + "step": 121611 + }, + { + "epoch": 1.46, + "grad_norm": 4.716449778894641, + "learning_rate": 3.5564746279614694e-06, + "loss": 1.2228, + "step": 121614 + }, + { + "epoch": 1.46, + "grad_norm": 10.242203316438436, + "learning_rate": 3.556027901312281e-06, + "loss": 1.2798, + "step": 121617 + }, + { + "epoch": 1.46, + "grad_norm": 13.034142173910624, + "learning_rate": 3.5555811966540533e-06, + "loss": 1.1443, + "step": 121620 + }, + { + "epoch": 1.46, + "grad_norm": 10.313204731835851, + "learning_rate": 3.555134513988301e-06, + "loss": 1.266, + "step": 121623 + }, + { + "epoch": 1.46, + "grad_norm": 27.90390170717134, + "learning_rate": 3.5546878533165573e-06, + "loss": 1.0039, + "step": 121626 + }, + { + "epoch": 1.46, + "grad_norm": 4.254963157074386, + "learning_rate": 3.5542412146403383e-06, + "loss": 1.2191, + "step": 121629 + }, + { + "epoch": 1.46, + "grad_norm": 4.69738474136415, + "learning_rate": 3.553794597961173e-06, + "loss": 1.0115, + "step": 121632 + }, + { + "epoch": 1.46, + "grad_norm": 3.123327725404752, + "learning_rate": 3.5533480032805844e-06, + "loss": 1.2154, + "step": 121635 + }, + { + "epoch": 1.46, + "grad_norm": 7.846938640105127, + "learning_rate": 3.5529014306000976e-06, + "loss": 0.9582, + "step": 121638 + }, + { + "epoch": 1.46, + "grad_norm": 20.42020870203475, + "learning_rate": 3.5524548799212387e-06, + "loss": 1.0246, + "step": 121641 + }, + { + "epoch": 1.46, + "grad_norm": 19.415015084021515, + "learning_rate": 3.5520083512455285e-06, + "loss": 1.092, + "step": 121644 + }, + { + "epoch": 1.46, + "grad_norm": 4.274737795608132, + "learning_rate": 3.551561844574489e-06, + "loss": 1.1114, + "step": 121647 + }, + { + "epoch": 1.46, + "grad_norm": 5.250909144861386, + "learning_rate": 3.551115359909646e-06, + "loss": 1.0615, + "step": 121650 + }, + { + "epoch": 1.46, + "grad_norm": 3.4824858115489903, + "learning_rate": 3.5506688972525262e-06, + "loss": 1.5129, + "step": 121653 + }, + { + "epoch": 1.46, + "grad_norm": 9.365267083329098, + "learning_rate": 3.5502224566046473e-06, + "loss": 0.9983, + "step": 121656 + }, + { + "epoch": 1.46, + "grad_norm": 10.826153867904441, + "learning_rate": 3.549776037967535e-06, + "loss": 1.1449, + "step": 121659 + }, + { + "epoch": 1.46, + "grad_norm": 3.997755568094017, + "learning_rate": 3.549329641342718e-06, + "loss": 1.1243, + "step": 121662 + }, + { + "epoch": 1.46, + "grad_norm": 9.3089644030362, + "learning_rate": 3.548883266731713e-06, + "loss": 1.279, + "step": 121665 + }, + { + "epoch": 1.46, + "grad_norm": 13.193032943787241, + "learning_rate": 3.5484369141360475e-06, + "loss": 1.0092, + "step": 121668 + }, + { + "epoch": 1.46, + "grad_norm": 2.9464888116444006, + "learning_rate": 3.5479905835572405e-06, + "loss": 0.7774, + "step": 121671 + }, + { + "epoch": 1.46, + "grad_norm": 4.578072252071887, + "learning_rate": 3.547544274996817e-06, + "loss": 1.1267, + "step": 121674 + }, + { + "epoch": 1.46, + "grad_norm": 10.608642371637695, + "learning_rate": 3.5470979884563015e-06, + "loss": 1.1864, + "step": 121677 + }, + { + "epoch": 1.46, + "grad_norm": 4.148128324589997, + "learning_rate": 3.546651723937219e-06, + "loss": 0.8245, + "step": 121680 + }, + { + "epoch": 1.46, + "grad_norm": 8.16038679434147, + "learning_rate": 3.5462054814410872e-06, + "loss": 1.0271, + "step": 121683 + }, + { + "epoch": 1.46, + "grad_norm": 13.601775773559128, + "learning_rate": 3.5457592609694335e-06, + "loss": 1.3899, + "step": 121686 + }, + { + "epoch": 1.46, + "grad_norm": 3.913073653672917, + "learning_rate": 3.5453130625237763e-06, + "loss": 1.081, + "step": 121689 + }, + { + "epoch": 1.46, + "grad_norm": 18.958744785890712, + "learning_rate": 3.54486688610564e-06, + "loss": 0.922, + "step": 121692 + }, + { + "epoch": 1.46, + "grad_norm": 8.093032889526743, + "learning_rate": 3.544420731716551e-06, + "loss": 1.2908, + "step": 121695 + }, + { + "epoch": 1.46, + "grad_norm": 5.6170502327532486, + "learning_rate": 3.5439745993580266e-06, + "loss": 0.8766, + "step": 121698 + }, + { + "epoch": 1.46, + "grad_norm": 4.065831102149761, + "learning_rate": 3.5435284890315913e-06, + "loss": 1.0488, + "step": 121701 + }, + { + "epoch": 1.46, + "grad_norm": 21.057680795448476, + "learning_rate": 3.54308240073877e-06, + "loss": 1.5122, + "step": 121704 + }, + { + "epoch": 1.46, + "grad_norm": 7.80413050589347, + "learning_rate": 3.5426363344810834e-06, + "loss": 1.0887, + "step": 121707 + }, + { + "epoch": 1.46, + "grad_norm": 2.786605125638133, + "learning_rate": 3.542190290260049e-06, + "loss": 1.1597, + "step": 121710 + }, + { + "epoch": 1.46, + "grad_norm": 8.342986858764814, + "learning_rate": 3.541744268077194e-06, + "loss": 1.0644, + "step": 121713 + }, + { + "epoch": 1.46, + "grad_norm": 3.8966653081023983, + "learning_rate": 3.541298267934039e-06, + "loss": 1.2338, + "step": 121716 + }, + { + "epoch": 1.46, + "grad_norm": 8.155443589868414, + "learning_rate": 3.5408522898321064e-06, + "loss": 1.0665, + "step": 121719 + }, + { + "epoch": 1.46, + "grad_norm": 4.713481508784864, + "learning_rate": 3.5404063337729222e-06, + "loss": 1.0524, + "step": 121722 + }, + { + "epoch": 1.46, + "grad_norm": 3.9026779473715103, + "learning_rate": 3.5399603997580013e-06, + "loss": 1.4164, + "step": 121725 + }, + { + "epoch": 1.46, + "grad_norm": 12.168716314608561, + "learning_rate": 3.539514487788871e-06, + "loss": 1.4635, + "step": 121728 + }, + { + "epoch": 1.46, + "grad_norm": 9.766431754084651, + "learning_rate": 3.539068597867048e-06, + "loss": 1.0768, + "step": 121731 + }, + { + "epoch": 1.46, + "grad_norm": 13.932720452698549, + "learning_rate": 3.53862272999406e-06, + "loss": 1.028, + "step": 121734 + }, + { + "epoch": 1.46, + "grad_norm": 10.261711081011534, + "learning_rate": 3.5381768841714224e-06, + "loss": 1.3603, + "step": 121737 + }, + { + "epoch": 1.46, + "grad_norm": 7.357584738209663, + "learning_rate": 3.537731060400659e-06, + "loss": 1.4171, + "step": 121740 + }, + { + "epoch": 1.46, + "grad_norm": 2.7454909112103265, + "learning_rate": 3.537285258683292e-06, + "loss": 1.3404, + "step": 121743 + }, + { + "epoch": 1.46, + "grad_norm": 4.774481766242744, + "learning_rate": 3.536839479020845e-06, + "loss": 1.6241, + "step": 121746 + }, + { + "epoch": 1.46, + "grad_norm": 5.078375475301029, + "learning_rate": 3.5363937214148345e-06, + "loss": 1.2852, + "step": 121749 + }, + { + "epoch": 1.46, + "grad_norm": 6.764576478308715, + "learning_rate": 3.535947985866787e-06, + "loss": 1.258, + "step": 121752 + }, + { + "epoch": 1.46, + "grad_norm": 8.99786133221569, + "learning_rate": 3.535502272378217e-06, + "loss": 1.5486, + "step": 121755 + }, + { + "epoch": 1.46, + "grad_norm": 19.93933636737781, + "learning_rate": 3.53505658095065e-06, + "loss": 1.1961, + "step": 121758 + }, + { + "epoch": 1.46, + "grad_norm": 7.894446585127609, + "learning_rate": 3.5346109115856097e-06, + "loss": 1.2691, + "step": 121761 + }, + { + "epoch": 1.46, + "grad_norm": 7.557623323712112, + "learning_rate": 3.534165264284609e-06, + "loss": 1.3948, + "step": 121764 + }, + { + "epoch": 1.46, + "grad_norm": 2.5151204175959943, + "learning_rate": 3.5337196390491744e-06, + "loss": 1.2612, + "step": 121767 + }, + { + "epoch": 1.46, + "grad_norm": 5.565821428677233, + "learning_rate": 3.5332740358808283e-06, + "loss": 1.1737, + "step": 121770 + }, + { + "epoch": 1.46, + "grad_norm": 6.364503818103838, + "learning_rate": 3.5328284547810855e-06, + "loss": 1.4602, + "step": 121773 + }, + { + "epoch": 1.46, + "grad_norm": 6.373007962362456, + "learning_rate": 3.532382895751473e-06, + "loss": 1.2441, + "step": 121776 + }, + { + "epoch": 1.46, + "grad_norm": 9.271616149162927, + "learning_rate": 3.5319373587935045e-06, + "loss": 1.3692, + "step": 121779 + }, + { + "epoch": 1.46, + "grad_norm": 12.211763222501784, + "learning_rate": 3.5314918439087043e-06, + "loss": 1.3577, + "step": 121782 + }, + { + "epoch": 1.46, + "grad_norm": 6.787127593094636, + "learning_rate": 3.5310463510985928e-06, + "loss": 1.3939, + "step": 121785 + }, + { + "epoch": 1.46, + "grad_norm": 4.934681512429948, + "learning_rate": 3.530600880364693e-06, + "loss": 1.0518, + "step": 121788 + }, + { + "epoch": 1.46, + "grad_norm": 5.410866765705177, + "learning_rate": 3.5301554317085184e-06, + "loss": 0.9627, + "step": 121791 + }, + { + "epoch": 1.46, + "grad_norm": 12.622971301230345, + "learning_rate": 3.5297100051315968e-06, + "loss": 0.968, + "step": 121794 + }, + { + "epoch": 1.46, + "grad_norm": 4.308464640713535, + "learning_rate": 3.5292646006354413e-06, + "loss": 1.3259, + "step": 121797 + }, + { + "epoch": 1.46, + "grad_norm": 31.731218572953797, + "learning_rate": 3.528819218221574e-06, + "loss": 1.2187, + "step": 121800 + }, + { + "epoch": 1.46, + "grad_norm": 8.36296201438538, + "learning_rate": 3.5283738578915194e-06, + "loss": 1.2923, + "step": 121803 + }, + { + "epoch": 1.46, + "grad_norm": 11.248364644971785, + "learning_rate": 3.5279285196467905e-06, + "loss": 1.1729, + "step": 121806 + }, + { + "epoch": 1.46, + "grad_norm": 64.92797972585687, + "learning_rate": 3.5274832034889107e-06, + "loss": 1.0491, + "step": 121809 + }, + { + "epoch": 1.46, + "grad_norm": 7.3077311360685515, + "learning_rate": 3.527037909419403e-06, + "loss": 1.2418, + "step": 121812 + }, + { + "epoch": 1.46, + "grad_norm": 8.20606160067152, + "learning_rate": 3.526592637439783e-06, + "loss": 0.9403, + "step": 121815 + }, + { + "epoch": 1.46, + "grad_norm": 29.923817054957812, + "learning_rate": 3.5261473875515663e-06, + "loss": 1.4733, + "step": 121818 + }, + { + "epoch": 1.46, + "grad_norm": 9.622437796675184, + "learning_rate": 3.525702159756278e-06, + "loss": 1.3214, + "step": 121821 + }, + { + "epoch": 1.46, + "grad_norm": 8.803291987207963, + "learning_rate": 3.5252569540554348e-06, + "loss": 1.2619, + "step": 121824 + }, + { + "epoch": 1.46, + "grad_norm": 17.95990002320967, + "learning_rate": 3.524811770450559e-06, + "loss": 1.3595, + "step": 121827 + }, + { + "epoch": 1.46, + "grad_norm": 8.367934302603354, + "learning_rate": 3.5243666089431704e-06, + "loss": 1.3691, + "step": 121830 + }, + { + "epoch": 1.47, + "grad_norm": 7.0333537880919454, + "learning_rate": 3.5239214695347833e-06, + "loss": 1.1499, + "step": 121833 + }, + { + "epoch": 1.47, + "grad_norm": 8.674897126889125, + "learning_rate": 3.5234763522269235e-06, + "loss": 1.1528, + "step": 121836 + }, + { + "epoch": 1.47, + "grad_norm": 3.7071640155051626, + "learning_rate": 3.523031257021101e-06, + "loss": 1.188, + "step": 121839 + }, + { + "epoch": 1.47, + "grad_norm": 7.023597391943829, + "learning_rate": 3.5225861839188447e-06, + "loss": 1.1824, + "step": 121842 + }, + { + "epoch": 1.47, + "grad_norm": 8.708566843149471, + "learning_rate": 3.522141132921665e-06, + "loss": 1.0417, + "step": 121845 + }, + { + "epoch": 1.47, + "grad_norm": 23.422921866681975, + "learning_rate": 3.521696104031084e-06, + "loss": 1.1596, + "step": 121848 + }, + { + "epoch": 1.47, + "grad_norm": 8.87499764190963, + "learning_rate": 3.5212510972486203e-06, + "loss": 1.1548, + "step": 121851 + }, + { + "epoch": 1.47, + "grad_norm": 5.75167319953473, + "learning_rate": 3.5208061125757977e-06, + "loss": 0.9843, + "step": 121854 + }, + { + "epoch": 1.47, + "grad_norm": 5.21724781283874, + "learning_rate": 3.5203611500141286e-06, + "loss": 1.625, + "step": 121857 + }, + { + "epoch": 1.47, + "grad_norm": 9.691887561314601, + "learning_rate": 3.519916209565131e-06, + "loss": 1.1384, + "step": 121860 + }, + { + "epoch": 1.47, + "grad_norm": 12.142106848470918, + "learning_rate": 3.519471291230324e-06, + "loss": 0.6769, + "step": 121863 + }, + { + "epoch": 1.47, + "grad_norm": 6.1778321996400996, + "learning_rate": 3.519026395011227e-06, + "loss": 1.1007, + "step": 121866 + }, + { + "epoch": 1.47, + "grad_norm": 3.1905751583787985, + "learning_rate": 3.518581520909363e-06, + "loss": 1.4435, + "step": 121869 + }, + { + "epoch": 1.47, + "grad_norm": 6.674142227933887, + "learning_rate": 3.518136668926242e-06, + "loss": 1.403, + "step": 121872 + }, + { + "epoch": 1.47, + "grad_norm": 4.142877435231946, + "learning_rate": 3.517691839063385e-06, + "loss": 1.461, + "step": 121875 + }, + { + "epoch": 1.47, + "grad_norm": 2.283642841757551, + "learning_rate": 3.517247031322315e-06, + "loss": 1.2355, + "step": 121878 + }, + { + "epoch": 1.47, + "grad_norm": 2.437998006838557, + "learning_rate": 3.5168022457045425e-06, + "loss": 1.2201, + "step": 121881 + }, + { + "epoch": 1.47, + "grad_norm": 9.19695499468234, + "learning_rate": 3.516357482211591e-06, + "loss": 1.3286, + "step": 121884 + }, + { + "epoch": 1.47, + "grad_norm": 13.119275980930006, + "learning_rate": 3.5159127408449735e-06, + "loss": 0.7968, + "step": 121887 + }, + { + "epoch": 1.47, + "grad_norm": 11.317381037654869, + "learning_rate": 3.5154680216062096e-06, + "loss": 0.8945, + "step": 121890 + }, + { + "epoch": 1.47, + "grad_norm": 5.685858447945949, + "learning_rate": 3.515023324496819e-06, + "loss": 1.102, + "step": 121893 + }, + { + "epoch": 1.47, + "grad_norm": 5.1407652081842725, + "learning_rate": 3.5145786495183208e-06, + "loss": 1.0188, + "step": 121896 + }, + { + "epoch": 1.47, + "grad_norm": 6.281575663359108, + "learning_rate": 3.5141339966722287e-06, + "loss": 1.4174, + "step": 121899 + }, + { + "epoch": 1.47, + "grad_norm": 22.49662008378551, + "learning_rate": 3.513689365960059e-06, + "loss": 1.1889, + "step": 121902 + }, + { + "epoch": 1.47, + "grad_norm": 7.030427119964649, + "learning_rate": 3.5132447573833296e-06, + "loss": 1.2133, + "step": 121905 + }, + { + "epoch": 1.47, + "grad_norm": 10.629299238980243, + "learning_rate": 3.512800170943561e-06, + "loss": 1.3165, + "step": 121908 + }, + { + "epoch": 1.47, + "grad_norm": 19.077971781263987, + "learning_rate": 3.5123556066422705e-06, + "loss": 1.0551, + "step": 121911 + }, + { + "epoch": 1.47, + "grad_norm": 17.589183464543666, + "learning_rate": 3.511911064480971e-06, + "loss": 1.4378, + "step": 121914 + }, + { + "epoch": 1.47, + "grad_norm": 9.878727851907906, + "learning_rate": 3.511466544461182e-06, + "loss": 1.3387, + "step": 121917 + }, + { + "epoch": 1.47, + "grad_norm": 5.39701962452563, + "learning_rate": 3.5110220465844236e-06, + "loss": 1.2496, + "step": 121920 + }, + { + "epoch": 1.47, + "grad_norm": 6.8403509502664175, + "learning_rate": 3.5105775708522105e-06, + "loss": 1.0773, + "step": 121923 + }, + { + "epoch": 1.47, + "grad_norm": 7.367236975523174, + "learning_rate": 3.5101331172660546e-06, + "loss": 1.0528, + "step": 121926 + }, + { + "epoch": 1.47, + "grad_norm": 17.56248164414068, + "learning_rate": 3.509688685827477e-06, + "loss": 1.4607, + "step": 121929 + }, + { + "epoch": 1.47, + "grad_norm": 7.869335839409239, + "learning_rate": 3.509244276537993e-06, + "loss": 1.2389, + "step": 121932 + }, + { + "epoch": 1.47, + "grad_norm": 5.130286844928585, + "learning_rate": 3.5087998893991216e-06, + "loss": 1.4991, + "step": 121935 + }, + { + "epoch": 1.47, + "grad_norm": 6.860167132666905, + "learning_rate": 3.508355524412381e-06, + "loss": 1.405, + "step": 121938 + }, + { + "epoch": 1.47, + "grad_norm": 5.326504702571481, + "learning_rate": 3.5079111815792843e-06, + "loss": 1.1907, + "step": 121941 + }, + { + "epoch": 1.47, + "grad_norm": 5.332939015383944, + "learning_rate": 3.5074668609013463e-06, + "loss": 1.1546, + "step": 121944 + }, + { + "epoch": 1.47, + "grad_norm": 8.315724975742292, + "learning_rate": 3.5070225623800835e-06, + "loss": 0.8982, + "step": 121947 + }, + { + "epoch": 1.47, + "grad_norm": 10.99776179650095, + "learning_rate": 3.5065782860170184e-06, + "loss": 0.7507, + "step": 121950 + }, + { + "epoch": 1.47, + "grad_norm": 8.529078013290485, + "learning_rate": 3.5061340318136595e-06, + "loss": 1.2298, + "step": 121953 + }, + { + "epoch": 1.47, + "grad_norm": 7.918108759918633, + "learning_rate": 3.5056897997715256e-06, + "loss": 1.3216, + "step": 121956 + }, + { + "epoch": 1.47, + "grad_norm": 5.512221920384325, + "learning_rate": 3.5052455898921332e-06, + "loss": 1.1728, + "step": 121959 + }, + { + "epoch": 1.47, + "grad_norm": 3.28507866602356, + "learning_rate": 3.5048014021770016e-06, + "loss": 1.301, + "step": 121962 + }, + { + "epoch": 1.47, + "grad_norm": 6.574458951214991, + "learning_rate": 3.5043572366276435e-06, + "loss": 1.0234, + "step": 121965 + }, + { + "epoch": 1.47, + "grad_norm": 10.615902795293502, + "learning_rate": 3.5039130932455714e-06, + "loss": 1.1269, + "step": 121968 + }, + { + "epoch": 1.47, + "grad_norm": 15.823829361646476, + "learning_rate": 3.5034689720323034e-06, + "loss": 1.2935, + "step": 121971 + }, + { + "epoch": 1.47, + "grad_norm": 8.005157130381573, + "learning_rate": 3.503024872989356e-06, + "loss": 1.2258, + "step": 121974 + }, + { + "epoch": 1.47, + "grad_norm": 5.597915053897661, + "learning_rate": 3.5025807961182466e-06, + "loss": 0.7804, + "step": 121977 + }, + { + "epoch": 1.47, + "grad_norm": 5.925401516778701, + "learning_rate": 3.5021367414204867e-06, + "loss": 1.2741, + "step": 121980 + }, + { + "epoch": 1.47, + "grad_norm": 19.214213649305847, + "learning_rate": 3.501692708897597e-06, + "loss": 0.8426, + "step": 121983 + }, + { + "epoch": 1.47, + "grad_norm": 9.432470215650987, + "learning_rate": 3.501248698551085e-06, + "loss": 0.9965, + "step": 121986 + }, + { + "epoch": 1.47, + "grad_norm": 5.074741565148417, + "learning_rate": 3.5008047103824717e-06, + "loss": 1.0371, + "step": 121989 + }, + { + "epoch": 1.47, + "grad_norm": 7.481421599143496, + "learning_rate": 3.500360744393273e-06, + "loss": 1.1682, + "step": 121992 + }, + { + "epoch": 1.47, + "grad_norm": 7.72429315184966, + "learning_rate": 3.499916800584999e-06, + "loss": 1.0648, + "step": 121995 + }, + { + "epoch": 1.47, + "grad_norm": 11.37886433482626, + "learning_rate": 3.499472878959168e-06, + "loss": 1.0144, + "step": 121998 + }, + { + "epoch": 1.47, + "grad_norm": 5.251009642774108, + "learning_rate": 3.499028979517295e-06, + "loss": 0.8694, + "step": 122001 + }, + { + "epoch": 1.47, + "grad_norm": 8.0267927165692, + "learning_rate": 3.498585102260896e-06, + "loss": 1.1904, + "step": 122004 + }, + { + "epoch": 1.47, + "grad_norm": 17.443941318235154, + "learning_rate": 3.4981412471914856e-06, + "loss": 1.2293, + "step": 122007 + }, + { + "epoch": 1.47, + "grad_norm": 3.791639231319478, + "learning_rate": 3.4976974143105737e-06, + "loss": 0.8469, + "step": 122010 + }, + { + "epoch": 1.47, + "grad_norm": 4.246491512490027, + "learning_rate": 3.497253603619679e-06, + "loss": 0.9773, + "step": 122013 + }, + { + "epoch": 1.47, + "grad_norm": 8.67937406969948, + "learning_rate": 3.4968098151203145e-06, + "loss": 1.153, + "step": 122016 + }, + { + "epoch": 1.47, + "grad_norm": 11.749097347058193, + "learning_rate": 3.4963660488139993e-06, + "loss": 1.1783, + "step": 122019 + }, + { + "epoch": 1.47, + "grad_norm": 34.55457201155982, + "learning_rate": 3.4959223047022406e-06, + "loss": 1.3247, + "step": 122022 + }, + { + "epoch": 1.47, + "grad_norm": 9.992056252233525, + "learning_rate": 3.49547858278656e-06, + "loss": 1.2543, + "step": 122025 + }, + { + "epoch": 1.47, + "grad_norm": 13.19976113926989, + "learning_rate": 3.4950348830684644e-06, + "loss": 0.9296, + "step": 122028 + }, + { + "epoch": 1.47, + "grad_norm": 9.12458675485595, + "learning_rate": 3.494591205549476e-06, + "loss": 0.7969, + "step": 122031 + }, + { + "epoch": 1.47, + "grad_norm": 6.922688632307757, + "learning_rate": 3.4941475502310994e-06, + "loss": 1.0851, + "step": 122034 + }, + { + "epoch": 1.47, + "grad_norm": 8.19170942503192, + "learning_rate": 3.4937039171148558e-06, + "loss": 0.9177, + "step": 122037 + }, + { + "epoch": 1.47, + "grad_norm": 4.04926171734695, + "learning_rate": 3.4932603062022565e-06, + "loss": 0.8675, + "step": 122040 + }, + { + "epoch": 1.47, + "grad_norm": 5.532767465469785, + "learning_rate": 3.4928167174948157e-06, + "loss": 1.1696, + "step": 122043 + }, + { + "epoch": 1.47, + "grad_norm": 6.526549380281087, + "learning_rate": 3.4923731509940508e-06, + "loss": 1.273, + "step": 122046 + }, + { + "epoch": 1.47, + "grad_norm": 18.82298801189867, + "learning_rate": 3.491929606701473e-06, + "loss": 1.1245, + "step": 122049 + }, + { + "epoch": 1.47, + "grad_norm": 2.9885663960561537, + "learning_rate": 3.4914860846185906e-06, + "loss": 1.073, + "step": 122052 + }, + { + "epoch": 1.47, + "grad_norm": 4.502135060285753, + "learning_rate": 3.491042584746923e-06, + "loss": 1.3538, + "step": 122055 + }, + { + "epoch": 1.47, + "grad_norm": 6.285627619702164, + "learning_rate": 3.490599107087985e-06, + "loss": 1.0645, + "step": 122058 + }, + { + "epoch": 1.47, + "grad_norm": 13.531455179232895, + "learning_rate": 3.4901556516432853e-06, + "loss": 1.2875, + "step": 122061 + }, + { + "epoch": 1.47, + "grad_norm": 13.571754420280785, + "learning_rate": 3.4897122184143396e-06, + "loss": 1.3549, + "step": 122064 + }, + { + "epoch": 1.47, + "grad_norm": 10.44404634386184, + "learning_rate": 3.4892688074026636e-06, + "loss": 1.4225, + "step": 122067 + }, + { + "epoch": 1.47, + "grad_norm": 7.28938640181462, + "learning_rate": 3.4888254186097647e-06, + "loss": 1.2534, + "step": 122070 + }, + { + "epoch": 1.47, + "grad_norm": 6.699267873383322, + "learning_rate": 3.4883820520371623e-06, + "loss": 1.3889, + "step": 122073 + }, + { + "epoch": 1.47, + "grad_norm": 19.9719319861093, + "learning_rate": 3.4879387076863648e-06, + "loss": 1.1597, + "step": 122076 + }, + { + "epoch": 1.47, + "grad_norm": 10.432328246599162, + "learning_rate": 3.4874953855588856e-06, + "loss": 1.359, + "step": 122079 + }, + { + "epoch": 1.47, + "grad_norm": 7.120452358009753, + "learning_rate": 3.4870520856562385e-06, + "loss": 1.155, + "step": 122082 + }, + { + "epoch": 1.47, + "grad_norm": 17.018135211304873, + "learning_rate": 3.486608807979941e-06, + "loss": 1.3417, + "step": 122085 + }, + { + "epoch": 1.47, + "grad_norm": 10.322432926681552, + "learning_rate": 3.4861655525314977e-06, + "loss": 1.3233, + "step": 122088 + }, + { + "epoch": 1.47, + "grad_norm": 10.734325729577618, + "learning_rate": 3.4857223193124278e-06, + "loss": 1.1769, + "step": 122091 + }, + { + "epoch": 1.47, + "grad_norm": 52.89904603145093, + "learning_rate": 3.4852791083242386e-06, + "loss": 1.1448, + "step": 122094 + }, + { + "epoch": 1.47, + "grad_norm": 16.95523415805795, + "learning_rate": 3.4848359195684455e-06, + "loss": 0.8378, + "step": 122097 + }, + { + "epoch": 1.47, + "grad_norm": 8.696765367100953, + "learning_rate": 3.484392753046564e-06, + "loss": 1.1432, + "step": 122100 + }, + { + "epoch": 1.47, + "grad_norm": 7.103805118917619, + "learning_rate": 3.4839496087601e-06, + "loss": 1.7233, + "step": 122103 + }, + { + "epoch": 1.47, + "grad_norm": 14.222443869247233, + "learning_rate": 3.4835064867105673e-06, + "loss": 1.4578, + "step": 122106 + }, + { + "epoch": 1.47, + "grad_norm": 6.594326421384379, + "learning_rate": 3.4830633868994813e-06, + "loss": 1.197, + "step": 122109 + }, + { + "epoch": 1.47, + "grad_norm": 11.466156018239916, + "learning_rate": 3.482620309328355e-06, + "loss": 1.5105, + "step": 122112 + }, + { + "epoch": 1.47, + "grad_norm": 23.20839727210822, + "learning_rate": 3.4821772539986987e-06, + "loss": 1.3073, + "step": 122115 + }, + { + "epoch": 1.47, + "grad_norm": 5.201248673024626, + "learning_rate": 3.4817342209120195e-06, + "loss": 1.4304, + "step": 122118 + }, + { + "epoch": 1.47, + "grad_norm": 11.164273693746665, + "learning_rate": 3.481291210069835e-06, + "loss": 0.8202, + "step": 122121 + }, + { + "epoch": 1.47, + "grad_norm": 6.88404266508221, + "learning_rate": 3.480848221473655e-06, + "loss": 1.0806, + "step": 122124 + }, + { + "epoch": 1.47, + "grad_norm": 5.6076725052035385, + "learning_rate": 3.4804052551249944e-06, + "loss": 0.9191, + "step": 122127 + }, + { + "epoch": 1.47, + "grad_norm": 12.725431215828545, + "learning_rate": 3.47996231102536e-06, + "loss": 1.2159, + "step": 122130 + }, + { + "epoch": 1.47, + "grad_norm": 3.380879074464803, + "learning_rate": 3.479519389176268e-06, + "loss": 1.0147, + "step": 122133 + }, + { + "epoch": 1.47, + "grad_norm": 6.364415512764908, + "learning_rate": 3.479076489579225e-06, + "loss": 1.1694, + "step": 122136 + }, + { + "epoch": 1.47, + "grad_norm": 2.5801732395075114, + "learning_rate": 3.478633612235748e-06, + "loss": 0.8472, + "step": 122139 + }, + { + "epoch": 1.47, + "grad_norm": 25.578148669359464, + "learning_rate": 3.478190757147343e-06, + "loss": 1.0651, + "step": 122142 + }, + { + "epoch": 1.47, + "grad_norm": 9.327606724670403, + "learning_rate": 3.477747924315523e-06, + "loss": 1.5777, + "step": 122145 + }, + { + "epoch": 1.47, + "grad_norm": 5.813849912431231, + "learning_rate": 3.4773051137418e-06, + "loss": 0.9135, + "step": 122148 + }, + { + "epoch": 1.47, + "grad_norm": 15.357520376107091, + "learning_rate": 3.4768623254276857e-06, + "loss": 1.261, + "step": 122151 + }, + { + "epoch": 1.47, + "grad_norm": 27.177240860351553, + "learning_rate": 3.476419559374694e-06, + "loss": 1.2785, + "step": 122154 + }, + { + "epoch": 1.47, + "grad_norm": 11.977314772319367, + "learning_rate": 3.475976815584332e-06, + "loss": 1.8151, + "step": 122157 + }, + { + "epoch": 1.47, + "grad_norm": 15.31628158772591, + "learning_rate": 3.4755340940581084e-06, + "loss": 1.2267, + "step": 122160 + }, + { + "epoch": 1.47, + "grad_norm": 2.8074892384268684, + "learning_rate": 3.475091394797536e-06, + "loss": 1.5182, + "step": 122163 + }, + { + "epoch": 1.47, + "grad_norm": 7.010078676577854, + "learning_rate": 3.474648717804131e-06, + "loss": 1.2755, + "step": 122166 + }, + { + "epoch": 1.47, + "grad_norm": 17.757893431568604, + "learning_rate": 3.474206063079396e-06, + "loss": 1.0765, + "step": 122169 + }, + { + "epoch": 1.47, + "grad_norm": 7.563576250641497, + "learning_rate": 3.4737634306248446e-06, + "loss": 1.185, + "step": 122172 + }, + { + "epoch": 1.47, + "grad_norm": 5.271683283338797, + "learning_rate": 3.473320820441991e-06, + "loss": 1.1718, + "step": 122175 + }, + { + "epoch": 1.47, + "grad_norm": 16.183201264207234, + "learning_rate": 3.4728782325323395e-06, + "loss": 0.8245, + "step": 122178 + }, + { + "epoch": 1.47, + "grad_norm": 3.886704387866875, + "learning_rate": 3.472435666897407e-06, + "loss": 0.7987, + "step": 122181 + }, + { + "epoch": 1.47, + "grad_norm": 9.07247854516374, + "learning_rate": 3.471993123538696e-06, + "loss": 1.0615, + "step": 122184 + }, + { + "epoch": 1.47, + "grad_norm": 12.269355977235202, + "learning_rate": 3.4715506024577227e-06, + "loss": 0.8988, + "step": 122187 + }, + { + "epoch": 1.47, + "grad_norm": 14.189848733248086, + "learning_rate": 3.471108103655995e-06, + "loss": 1.6644, + "step": 122190 + }, + { + "epoch": 1.47, + "grad_norm": 9.439399119350675, + "learning_rate": 3.470665627135027e-06, + "loss": 0.9509, + "step": 122193 + }, + { + "epoch": 1.47, + "grad_norm": 11.212100255151821, + "learning_rate": 3.4702231728963222e-06, + "loss": 1.2625, + "step": 122196 + }, + { + "epoch": 1.47, + "grad_norm": 10.065212059368557, + "learning_rate": 3.4697807409413965e-06, + "loss": 0.9642, + "step": 122199 + }, + { + "epoch": 1.47, + "grad_norm": 8.15011376734391, + "learning_rate": 3.4693383312717534e-06, + "loss": 1.0729, + "step": 122202 + }, + { + "epoch": 1.47, + "grad_norm": 15.734076447402229, + "learning_rate": 3.468895943888907e-06, + "loss": 1.4497, + "step": 122205 + }, + { + "epoch": 1.47, + "grad_norm": 7.332249829964473, + "learning_rate": 3.46845357879437e-06, + "loss": 0.9827, + "step": 122208 + }, + { + "epoch": 1.47, + "grad_norm": 7.378177620829482, + "learning_rate": 3.4680112359896435e-06, + "loss": 1.0753, + "step": 122211 + }, + { + "epoch": 1.47, + "grad_norm": 9.871305838185508, + "learning_rate": 3.4675689154762426e-06, + "loss": 0.8015, + "step": 122214 + }, + { + "epoch": 1.47, + "grad_norm": 14.862843988611973, + "learning_rate": 3.4671266172556796e-06, + "loss": 1.5765, + "step": 122217 + }, + { + "epoch": 1.47, + "grad_norm": 3.4385045381986394, + "learning_rate": 3.4666843413294594e-06, + "loss": 1.2298, + "step": 122220 + }, + { + "epoch": 1.47, + "grad_norm": 3.918721378246869, + "learning_rate": 3.46624208769909e-06, + "loss": 0.9753, + "step": 122223 + }, + { + "epoch": 1.47, + "grad_norm": 5.305171905432671, + "learning_rate": 3.4657998563660824e-06, + "loss": 1.3021, + "step": 122226 + }, + { + "epoch": 1.47, + "grad_norm": 8.579955736032758, + "learning_rate": 3.465357647331946e-06, + "loss": 0.785, + "step": 122229 + }, + { + "epoch": 1.47, + "grad_norm": 15.875631532750976, + "learning_rate": 3.4649154605981906e-06, + "loss": 1.396, + "step": 122232 + }, + { + "epoch": 1.47, + "grad_norm": 4.462740544408607, + "learning_rate": 3.4644732961663286e-06, + "loss": 1.1728, + "step": 122235 + }, + { + "epoch": 1.47, + "grad_norm": 4.494250991604031, + "learning_rate": 3.4640311540378615e-06, + "loss": 1.1122, + "step": 122238 + }, + { + "epoch": 1.47, + "grad_norm": 12.555120290830953, + "learning_rate": 3.463589034214304e-06, + "loss": 1.3623, + "step": 122241 + }, + { + "epoch": 1.47, + "grad_norm": 17.95824117058796, + "learning_rate": 3.4631469366971614e-06, + "loss": 0.8926, + "step": 122244 + }, + { + "epoch": 1.47, + "grad_norm": 17.882027966483427, + "learning_rate": 3.4627048614879457e-06, + "loss": 1.2936, + "step": 122247 + }, + { + "epoch": 1.47, + "grad_norm": 7.429090307886324, + "learning_rate": 3.4622628085881603e-06, + "loss": 1.1396, + "step": 122250 + }, + { + "epoch": 1.47, + "grad_norm": 2.498986619089573, + "learning_rate": 3.4618207779993175e-06, + "loss": 1.1926, + "step": 122253 + }, + { + "epoch": 1.47, + "grad_norm": 4.404876910613591, + "learning_rate": 3.4613787697229252e-06, + "loss": 1.3349, + "step": 122256 + }, + { + "epoch": 1.47, + "grad_norm": 99.12113182592043, + "learning_rate": 3.4609367837604957e-06, + "loss": 1.1416, + "step": 122259 + }, + { + "epoch": 1.47, + "grad_norm": 13.663363554574289, + "learning_rate": 3.4604948201135325e-06, + "loss": 1.1377, + "step": 122262 + }, + { + "epoch": 1.47, + "grad_norm": 10.549091439799566, + "learning_rate": 3.460052878783543e-06, + "loss": 1.4134, + "step": 122265 + }, + { + "epoch": 1.47, + "grad_norm": 9.116141975876882, + "learning_rate": 3.459610959772036e-06, + "loss": 1.1483, + "step": 122268 + }, + { + "epoch": 1.47, + "grad_norm": 25.08421478636608, + "learning_rate": 3.459169063080522e-06, + "loss": 1.5249, + "step": 122271 + }, + { + "epoch": 1.47, + "grad_norm": 9.234191985350297, + "learning_rate": 3.458727188710511e-06, + "loss": 1.1648, + "step": 122274 + }, + { + "epoch": 1.47, + "grad_norm": 7.9258420377239736, + "learning_rate": 3.4582853366635048e-06, + "loss": 1.008, + "step": 122277 + }, + { + "epoch": 1.47, + "grad_norm": 9.459927712287719, + "learning_rate": 3.4578435069410144e-06, + "loss": 1.4994, + "step": 122280 + }, + { + "epoch": 1.47, + "grad_norm": 7.339247982172074, + "learning_rate": 3.4574016995445503e-06, + "loss": 1.1149, + "step": 122283 + }, + { + "epoch": 1.47, + "grad_norm": 3.4547825595039203, + "learning_rate": 3.456959914475615e-06, + "loss": 1.1478, + "step": 122286 + }, + { + "epoch": 1.47, + "grad_norm": 33.343436848240636, + "learning_rate": 3.4565181517357215e-06, + "loss": 1.1768, + "step": 122289 + }, + { + "epoch": 1.47, + "grad_norm": 13.350953539101976, + "learning_rate": 3.456076411326371e-06, + "loss": 1.3852, + "step": 122292 + }, + { + "epoch": 1.47, + "grad_norm": 3.606542594795987, + "learning_rate": 3.455634693249076e-06, + "loss": 1.1055, + "step": 122295 + }, + { + "epoch": 1.47, + "grad_norm": 5.32624858116088, + "learning_rate": 3.455192997505341e-06, + "loss": 0.9798, + "step": 122298 + }, + { + "epoch": 1.47, + "grad_norm": 35.43478954185686, + "learning_rate": 3.454751324096679e-06, + "loss": 1.1335, + "step": 122301 + }, + { + "epoch": 1.47, + "grad_norm": 5.340301503125828, + "learning_rate": 3.4543096730245927e-06, + "loss": 1.0779, + "step": 122304 + }, + { + "epoch": 1.47, + "grad_norm": 7.61752091399124, + "learning_rate": 3.453868044290587e-06, + "loss": 1.1052, + "step": 122307 + }, + { + "epoch": 1.47, + "grad_norm": 4.2817541222549735, + "learning_rate": 3.4534264378961714e-06, + "loss": 1.1012, + "step": 122310 + }, + { + "epoch": 1.47, + "grad_norm": 37.93738682861738, + "learning_rate": 3.4529848538428535e-06, + "loss": 0.9707, + "step": 122313 + }, + { + "epoch": 1.47, + "grad_norm": 9.791330697239113, + "learning_rate": 3.4525432921321434e-06, + "loss": 1.1311, + "step": 122316 + }, + { + "epoch": 1.47, + "grad_norm": 9.545042838154073, + "learning_rate": 3.452101752765541e-06, + "loss": 0.9566, + "step": 122319 + }, + { + "epoch": 1.47, + "grad_norm": 21.409170545085345, + "learning_rate": 3.451660235744556e-06, + "loss": 1.3733, + "step": 122322 + }, + { + "epoch": 1.47, + "grad_norm": 5.2703983875910625, + "learning_rate": 3.4512187410707008e-06, + "loss": 1.1342, + "step": 122325 + }, + { + "epoch": 1.47, + "grad_norm": 10.004268127466421, + "learning_rate": 3.4507772687454764e-06, + "loss": 1.0885, + "step": 122328 + }, + { + "epoch": 1.47, + "grad_norm": 7.385388957121514, + "learning_rate": 3.4503358187703874e-06, + "loss": 1.2393, + "step": 122331 + }, + { + "epoch": 1.47, + "grad_norm": 9.191484351829576, + "learning_rate": 3.4498943911469427e-06, + "loss": 1.0195, + "step": 122334 + }, + { + "epoch": 1.47, + "grad_norm": 6.409808334728753, + "learning_rate": 3.449452985876649e-06, + "loss": 1.4927, + "step": 122337 + }, + { + "epoch": 1.47, + "grad_norm": 4.9361129024175, + "learning_rate": 3.449011602961012e-06, + "loss": 1.5117, + "step": 122340 + }, + { + "epoch": 1.47, + "grad_norm": 19.564135671518677, + "learning_rate": 3.4485702424015433e-06, + "loss": 1.2255, + "step": 122343 + }, + { + "epoch": 1.47, + "grad_norm": 6.1788139967126385, + "learning_rate": 3.4481289041997413e-06, + "loss": 1.2283, + "step": 122346 + }, + { + "epoch": 1.47, + "grad_norm": 8.152793935707974, + "learning_rate": 3.4476875883571183e-06, + "loss": 1.0025, + "step": 122349 + }, + { + "epoch": 1.47, + "grad_norm": 10.681535095286256, + "learning_rate": 3.4472462948751737e-06, + "loss": 1.2514, + "step": 122352 + }, + { + "epoch": 1.47, + "grad_norm": 7.538106189601116, + "learning_rate": 3.4468050237554207e-06, + "loss": 0.9563, + "step": 122355 + }, + { + "epoch": 1.47, + "grad_norm": 18.86943373269838, + "learning_rate": 3.4463637749993585e-06, + "loss": 1.127, + "step": 122358 + }, + { + "epoch": 1.47, + "grad_norm": 7.556380510821284, + "learning_rate": 3.445922548608497e-06, + "loss": 1.1561, + "step": 122361 + }, + { + "epoch": 1.47, + "grad_norm": 6.520321693255405, + "learning_rate": 3.445481344584339e-06, + "loss": 0.8881, + "step": 122364 + }, + { + "epoch": 1.47, + "grad_norm": 4.2883382636848495, + "learning_rate": 3.445040162928398e-06, + "loss": 1.1972, + "step": 122367 + }, + { + "epoch": 1.47, + "grad_norm": 4.173307524083925, + "learning_rate": 3.4445990036421716e-06, + "loss": 0.8868, + "step": 122370 + }, + { + "epoch": 1.47, + "grad_norm": 3.925744770578182, + "learning_rate": 3.4441578667271658e-06, + "loss": 1.2291, + "step": 122373 + }, + { + "epoch": 1.47, + "grad_norm": 18.698811419632957, + "learning_rate": 3.443716752184887e-06, + "loss": 1.507, + "step": 122376 + }, + { + "epoch": 1.47, + "grad_norm": 9.611425459428208, + "learning_rate": 3.4432756600168415e-06, + "loss": 1.2391, + "step": 122379 + }, + { + "epoch": 1.47, + "grad_norm": 7.380191590780991, + "learning_rate": 3.4428345902245385e-06, + "loss": 1.4008, + "step": 122382 + }, + { + "epoch": 1.47, + "grad_norm": 12.855674912555491, + "learning_rate": 3.442393542809476e-06, + "loss": 1.3488, + "step": 122385 + }, + { + "epoch": 1.47, + "grad_norm": 7.1363079165117735, + "learning_rate": 3.4419525177731605e-06, + "loss": 1.1933, + "step": 122388 + }, + { + "epoch": 1.47, + "grad_norm": 6.520257237824337, + "learning_rate": 3.441511515117103e-06, + "loss": 0.9856, + "step": 122391 + }, + { + "epoch": 1.47, + "grad_norm": 18.967990381987203, + "learning_rate": 3.441070534842802e-06, + "loss": 1.1172, + "step": 122394 + }, + { + "epoch": 1.47, + "grad_norm": 3.496456003290236, + "learning_rate": 3.440629576951767e-06, + "loss": 1.0907, + "step": 122397 + }, + { + "epoch": 1.47, + "grad_norm": 8.82951215639891, + "learning_rate": 3.4401886414454977e-06, + "loss": 1.5791, + "step": 122400 + }, + { + "epoch": 1.47, + "grad_norm": 28.016567153897874, + "learning_rate": 3.4397477283255023e-06, + "loss": 1.0238, + "step": 122403 + }, + { + "epoch": 1.47, + "grad_norm": 7.127201116064508, + "learning_rate": 3.439306837593284e-06, + "loss": 1.4668, + "step": 122406 + }, + { + "epoch": 1.47, + "grad_norm": 5.072555998727014, + "learning_rate": 3.438865969250351e-06, + "loss": 1.3692, + "step": 122409 + }, + { + "epoch": 1.47, + "grad_norm": 6.0548953776055905, + "learning_rate": 3.438425123298207e-06, + "loss": 1.0119, + "step": 122412 + }, + { + "epoch": 1.47, + "grad_norm": 9.37601766010502, + "learning_rate": 3.4379842997383496e-06, + "loss": 1.0044, + "step": 122415 + }, + { + "epoch": 1.47, + "grad_norm": 8.494180693703811, + "learning_rate": 3.437543498572289e-06, + "loss": 1.4832, + "step": 122418 + }, + { + "epoch": 1.47, + "grad_norm": 5.601907828837763, + "learning_rate": 3.437102719801528e-06, + "loss": 1.1865, + "step": 122421 + }, + { + "epoch": 1.47, + "grad_norm": 6.032200591198448, + "learning_rate": 3.4366619634275757e-06, + "loss": 1.0136, + "step": 122424 + }, + { + "epoch": 1.47, + "grad_norm": 7.7326336614607385, + "learning_rate": 3.436221229451928e-06, + "loss": 0.8909, + "step": 122427 + }, + { + "epoch": 1.47, + "grad_norm": 15.39070180715695, + "learning_rate": 3.435780517876094e-06, + "loss": 1.2756, + "step": 122430 + }, + { + "epoch": 1.47, + "grad_norm": 8.119462914643329, + "learning_rate": 3.435339828701579e-06, + "loss": 0.7473, + "step": 122433 + }, + { + "epoch": 1.47, + "grad_norm": 6.179104107054947, + "learning_rate": 3.4348991619298844e-06, + "loss": 1.0421, + "step": 122436 + }, + { + "epoch": 1.47, + "grad_norm": 18.937746323084422, + "learning_rate": 3.4344585175625113e-06, + "loss": 1.2727, + "step": 122439 + }, + { + "epoch": 1.47, + "grad_norm": 4.261308767880274, + "learning_rate": 3.4340178956009664e-06, + "loss": 1.2489, + "step": 122442 + }, + { + "epoch": 1.47, + "grad_norm": 17.975716629465527, + "learning_rate": 3.433577296046754e-06, + "loss": 1.489, + "step": 122445 + }, + { + "epoch": 1.47, + "grad_norm": 10.428071652086276, + "learning_rate": 3.4331367189013763e-06, + "loss": 1.166, + "step": 122448 + }, + { + "epoch": 1.47, + "grad_norm": 3.741355262001418, + "learning_rate": 3.4326961641663414e-06, + "loss": 0.8975, + "step": 122451 + }, + { + "epoch": 1.47, + "grad_norm": 6.857706223826755, + "learning_rate": 3.432255631843148e-06, + "loss": 1.3096, + "step": 122454 + }, + { + "epoch": 1.47, + "grad_norm": 12.24916433876092, + "learning_rate": 3.4318151219332973e-06, + "loss": 1.0818, + "step": 122457 + }, + { + "epoch": 1.47, + "grad_norm": 10.596020206914, + "learning_rate": 3.431374634438297e-06, + "loss": 1.2811, + "step": 122460 + }, + { + "epoch": 1.47, + "grad_norm": 8.343097977731171, + "learning_rate": 3.430934169359651e-06, + "loss": 0.8802, + "step": 122463 + }, + { + "epoch": 1.47, + "grad_norm": 8.357294793414681, + "learning_rate": 3.430493726698858e-06, + "loss": 0.9312, + "step": 122466 + }, + { + "epoch": 1.47, + "grad_norm": 9.042441518295552, + "learning_rate": 3.430053306457424e-06, + "loss": 1.0208, + "step": 122469 + }, + { + "epoch": 1.47, + "grad_norm": 10.563627091161035, + "learning_rate": 3.42961290863685e-06, + "loss": 1.1079, + "step": 122472 + }, + { + "epoch": 1.47, + "grad_norm": 13.222069934407921, + "learning_rate": 3.429172533238645e-06, + "loss": 1.1798, + "step": 122475 + }, + { + "epoch": 1.47, + "grad_norm": 10.328014930995941, + "learning_rate": 3.428732180264307e-06, + "loss": 0.971, + "step": 122478 + }, + { + "epoch": 1.47, + "grad_norm": 6.212634213268589, + "learning_rate": 3.428291849715336e-06, + "loss": 0.7444, + "step": 122481 + }, + { + "epoch": 1.47, + "grad_norm": 13.64838806650453, + "learning_rate": 3.427851541593238e-06, + "loss": 0.993, + "step": 122484 + }, + { + "epoch": 1.47, + "grad_norm": 4.499225219356824, + "learning_rate": 3.4274112558995154e-06, + "loss": 0.808, + "step": 122487 + }, + { + "epoch": 1.47, + "grad_norm": 8.264382719155103, + "learning_rate": 3.426970992635673e-06, + "loss": 1.2053, + "step": 122490 + }, + { + "epoch": 1.47, + "grad_norm": 8.7459728754098, + "learning_rate": 3.4265307518032087e-06, + "loss": 1.2976, + "step": 122493 + }, + { + "epoch": 1.47, + "grad_norm": 7.240102023007127, + "learning_rate": 3.426090533403631e-06, + "loss": 1.1543, + "step": 122496 + }, + { + "epoch": 1.47, + "grad_norm": 28.0547438275754, + "learning_rate": 3.4256503374384342e-06, + "loss": 1.2828, + "step": 122499 + }, + { + "epoch": 1.47, + "grad_norm": 10.628943559986064, + "learning_rate": 3.425210163909124e-06, + "loss": 1.1579, + "step": 122502 + }, + { + "epoch": 1.47, + "grad_norm": 8.192741695307134, + "learning_rate": 3.424770012817208e-06, + "loss": 1.2899, + "step": 122505 + }, + { + "epoch": 1.47, + "grad_norm": 11.157392851748543, + "learning_rate": 3.424329884164179e-06, + "loss": 0.9167, + "step": 122508 + }, + { + "epoch": 1.47, + "grad_norm": 3.513585189596476, + "learning_rate": 3.4238897779515446e-06, + "loss": 0.9673, + "step": 122511 + }, + { + "epoch": 1.47, + "grad_norm": 11.218443243823, + "learning_rate": 3.423449694180806e-06, + "loss": 1.3534, + "step": 122514 + }, + { + "epoch": 1.47, + "grad_norm": 11.400113206353012, + "learning_rate": 3.423009632853467e-06, + "loss": 1.066, + "step": 122517 + }, + { + "epoch": 1.47, + "grad_norm": 10.587990457142858, + "learning_rate": 3.4225695939710278e-06, + "loss": 1.0839, + "step": 122520 + }, + { + "epoch": 1.47, + "grad_norm": 8.43261734774036, + "learning_rate": 3.4221295775349863e-06, + "loss": 1.195, + "step": 122523 + }, + { + "epoch": 1.47, + "grad_norm": 7.526655687905268, + "learning_rate": 3.4216895835468455e-06, + "loss": 1.4467, + "step": 122526 + }, + { + "epoch": 1.47, + "grad_norm": 5.12395135750545, + "learning_rate": 3.4212496120081107e-06, + "loss": 1.179, + "step": 122529 + }, + { + "epoch": 1.47, + "grad_norm": 4.347030697226431, + "learning_rate": 3.4208096629202835e-06, + "loss": 1.3095, + "step": 122532 + }, + { + "epoch": 1.47, + "grad_norm": 15.450025113740184, + "learning_rate": 3.42036973628486e-06, + "loss": 1.2836, + "step": 122535 + }, + { + "epoch": 1.47, + "grad_norm": 18.086828034699984, + "learning_rate": 3.419929832103348e-06, + "loss": 1.2561, + "step": 122538 + }, + { + "epoch": 1.47, + "grad_norm": 7.4210230592727395, + "learning_rate": 3.4194899503772416e-06, + "loss": 1.0811, + "step": 122541 + }, + { + "epoch": 1.47, + "grad_norm": 9.331325817966146, + "learning_rate": 3.4190500911080504e-06, + "loss": 1.3782, + "step": 122544 + }, + { + "epoch": 1.47, + "grad_norm": 12.012107819540848, + "learning_rate": 3.4186102542972665e-06, + "loss": 1.5563, + "step": 122547 + }, + { + "epoch": 1.47, + "grad_norm": 10.086683305977735, + "learning_rate": 3.4181704399463943e-06, + "loss": 0.8464, + "step": 122550 + }, + { + "epoch": 1.47, + "grad_norm": 14.838160097460289, + "learning_rate": 3.4177306480569373e-06, + "loss": 1.1823, + "step": 122553 + }, + { + "epoch": 1.47, + "grad_norm": 8.898040534820566, + "learning_rate": 3.4172908786303937e-06, + "loss": 1.0719, + "step": 122556 + }, + { + "epoch": 1.47, + "grad_norm": 12.41274784010337, + "learning_rate": 3.4168511316682697e-06, + "loss": 2.0354, + "step": 122559 + }, + { + "epoch": 1.47, + "grad_norm": 5.713370458047631, + "learning_rate": 3.41641140717206e-06, + "loss": 1.1265, + "step": 122562 + }, + { + "epoch": 1.47, + "grad_norm": 3.193842881406483, + "learning_rate": 3.415971705143264e-06, + "loss": 1.3525, + "step": 122565 + }, + { + "epoch": 1.47, + "grad_norm": 12.201826186449738, + "learning_rate": 3.4155320255833857e-06, + "loss": 0.8254, + "step": 122568 + }, + { + "epoch": 1.47, + "grad_norm": 5.294127007375751, + "learning_rate": 3.4150923684939273e-06, + "loss": 1.0583, + "step": 122571 + }, + { + "epoch": 1.47, + "grad_norm": 3.29350095227365, + "learning_rate": 3.4146527338763835e-06, + "loss": 1.3641, + "step": 122574 + }, + { + "epoch": 1.47, + "grad_norm": 10.25530630476282, + "learning_rate": 3.414213121732258e-06, + "loss": 1.1556, + "step": 122577 + }, + { + "epoch": 1.47, + "grad_norm": 10.83023520487426, + "learning_rate": 3.4137735320630548e-06, + "loss": 1.2549, + "step": 122580 + }, + { + "epoch": 1.47, + "grad_norm": 8.69499552063155, + "learning_rate": 3.4133339648702666e-06, + "loss": 1.1839, + "step": 122583 + }, + { + "epoch": 1.47, + "grad_norm": 8.138892447180138, + "learning_rate": 3.4128944201554005e-06, + "loss": 1.0359, + "step": 122586 + }, + { + "epoch": 1.47, + "grad_norm": 2.8926237996687885, + "learning_rate": 3.412454897919949e-06, + "loss": 1.4303, + "step": 122589 + }, + { + "epoch": 1.47, + "grad_norm": 5.2354612673057925, + "learning_rate": 3.412015398165417e-06, + "loss": 1.3061, + "step": 122592 + }, + { + "epoch": 1.47, + "grad_norm": 15.133441423901123, + "learning_rate": 3.411575920893303e-06, + "loss": 1.4449, + "step": 122595 + }, + { + "epoch": 1.47, + "grad_norm": 21.933480660994267, + "learning_rate": 3.4111364661051106e-06, + "loss": 1.3609, + "step": 122598 + }, + { + "epoch": 1.47, + "grad_norm": 5.30306223996376, + "learning_rate": 3.410697033802333e-06, + "loss": 1.4095, + "step": 122601 + }, + { + "epoch": 1.47, + "grad_norm": 5.18498585955463, + "learning_rate": 3.410257623986476e-06, + "loss": 1.4112, + "step": 122604 + }, + { + "epoch": 1.47, + "grad_norm": 2.5949106512209363, + "learning_rate": 3.4098182366590327e-06, + "loss": 1.3781, + "step": 122607 + }, + { + "epoch": 1.47, + "grad_norm": 9.607715346427032, + "learning_rate": 3.4093788718215072e-06, + "loss": 1.0026, + "step": 122610 + }, + { + "epoch": 1.47, + "grad_norm": 15.863028144372333, + "learning_rate": 3.4089395294754e-06, + "loss": 1.1299, + "step": 122613 + }, + { + "epoch": 1.47, + "grad_norm": 6.648840869493946, + "learning_rate": 3.408500209622204e-06, + "loss": 1.1389, + "step": 122616 + }, + { + "epoch": 1.47, + "grad_norm": 8.432050450738638, + "learning_rate": 3.4080609122634236e-06, + "loss": 1.0079, + "step": 122619 + }, + { + "epoch": 1.47, + "grad_norm": 11.949662121178662, + "learning_rate": 3.40762163740056e-06, + "loss": 0.9567, + "step": 122622 + }, + { + "epoch": 1.47, + "grad_norm": 8.377762928808266, + "learning_rate": 3.407182385035106e-06, + "loss": 1.4063, + "step": 122625 + }, + { + "epoch": 1.47, + "grad_norm": 12.102955004901958, + "learning_rate": 3.406743155168567e-06, + "loss": 1.4449, + "step": 122628 + }, + { + "epoch": 1.47, + "grad_norm": 5.423152743732622, + "learning_rate": 3.4063039478024353e-06, + "loss": 1.0102, + "step": 122631 + }, + { + "epoch": 1.47, + "grad_norm": 7.7184428754899725, + "learning_rate": 3.4058647629382137e-06, + "loss": 0.9048, + "step": 122634 + }, + { + "epoch": 1.47, + "grad_norm": 6.9375722975224035, + "learning_rate": 3.4054256005774e-06, + "loss": 1.2172, + "step": 122637 + }, + { + "epoch": 1.47, + "grad_norm": 7.135396349403868, + "learning_rate": 3.4049864607214968e-06, + "loss": 0.7872, + "step": 122640 + }, + { + "epoch": 1.47, + "grad_norm": 9.904720798844311, + "learning_rate": 3.4045473433719953e-06, + "loss": 1.2363, + "step": 122643 + }, + { + "epoch": 1.47, + "grad_norm": 5.3262485237398485, + "learning_rate": 3.4041082485304023e-06, + "loss": 1.1065, + "step": 122646 + }, + { + "epoch": 1.47, + "grad_norm": 6.826108280251977, + "learning_rate": 3.403669176198209e-06, + "loss": 0.861, + "step": 122649 + }, + { + "epoch": 1.47, + "grad_norm": 11.58632143727786, + "learning_rate": 3.4032301263769196e-06, + "loss": 1.199, + "step": 122652 + }, + { + "epoch": 1.47, + "grad_norm": 8.876216559892935, + "learning_rate": 3.402791099068026e-06, + "loss": 1.007, + "step": 122655 + }, + { + "epoch": 1.47, + "grad_norm": 11.146639685844805, + "learning_rate": 3.4023520942730304e-06, + "loss": 1.1652, + "step": 122658 + }, + { + "epoch": 1.47, + "grad_norm": 4.685187691818235, + "learning_rate": 3.40191311199343e-06, + "loss": 1.14, + "step": 122661 + }, + { + "epoch": 1.47, + "grad_norm": 3.71682380627196, + "learning_rate": 3.401474152230728e-06, + "loss": 1.104, + "step": 122664 + }, + { + "epoch": 1.48, + "grad_norm": 25.661355744429905, + "learning_rate": 3.4010352149864132e-06, + "loss": 0.891, + "step": 122667 + }, + { + "epoch": 1.48, + "grad_norm": 16.616266948921965, + "learning_rate": 3.4005963002619935e-06, + "loss": 1.5054, + "step": 122670 + }, + { + "epoch": 1.48, + "grad_norm": 2.589557018148001, + "learning_rate": 3.400157408058956e-06, + "loss": 1.1962, + "step": 122673 + }, + { + "epoch": 1.48, + "grad_norm": 12.894582168602192, + "learning_rate": 3.3997185383788058e-06, + "loss": 1.1884, + "step": 122676 + }, + { + "epoch": 1.48, + "grad_norm": 10.07421122307959, + "learning_rate": 3.399279691223042e-06, + "loss": 1.1848, + "step": 122679 + }, + { + "epoch": 1.48, + "grad_norm": 6.079204470033093, + "learning_rate": 3.398840866593155e-06, + "loss": 1.6156, + "step": 122682 + }, + { + "epoch": 1.48, + "grad_norm": 7.378248001077721, + "learning_rate": 3.3984020644906458e-06, + "loss": 1.401, + "step": 122685 + }, + { + "epoch": 1.48, + "grad_norm": 8.170115305453646, + "learning_rate": 3.3979632849170175e-06, + "loss": 1.353, + "step": 122688 + }, + { + "epoch": 1.48, + "grad_norm": 2.8530886870947167, + "learning_rate": 3.397524527873758e-06, + "loss": 1.1175, + "step": 122691 + }, + { + "epoch": 1.48, + "grad_norm": 10.960576887712902, + "learning_rate": 3.3970857933623726e-06, + "loss": 1.0746, + "step": 122694 + }, + { + "epoch": 1.48, + "grad_norm": 11.201465080998839, + "learning_rate": 3.396647081384351e-06, + "loss": 1.2265, + "step": 122697 + }, + { + "epoch": 1.48, + "grad_norm": 9.057083015172447, + "learning_rate": 3.3962083919411957e-06, + "loss": 1.0973, + "step": 122700 + }, + { + "epoch": 1.48, + "grad_norm": 15.130292737588888, + "learning_rate": 3.3957697250344026e-06, + "loss": 1.1762, + "step": 122703 + }, + { + "epoch": 1.48, + "grad_norm": 20.443708275826783, + "learning_rate": 3.3953310806654706e-06, + "loss": 1.3911, + "step": 122706 + }, + { + "epoch": 1.48, + "grad_norm": 14.423979385378251, + "learning_rate": 3.394892458835892e-06, + "loss": 1.081, + "step": 122709 + }, + { + "epoch": 1.48, + "grad_norm": 6.967187452165944, + "learning_rate": 3.39445385954717e-06, + "loss": 1.4055, + "step": 122712 + }, + { + "epoch": 1.48, + "grad_norm": 4.461462209586088, + "learning_rate": 3.394015282800793e-06, + "loss": 0.7982, + "step": 122715 + }, + { + "epoch": 1.48, + "grad_norm": 13.739804899444982, + "learning_rate": 3.3935767285982635e-06, + "loss": 1.0076, + "step": 122718 + }, + { + "epoch": 1.48, + "grad_norm": 8.705843909518522, + "learning_rate": 3.3931381969410805e-06, + "loss": 1.1992, + "step": 122721 + }, + { + "epoch": 1.48, + "grad_norm": 7.851802173972105, + "learning_rate": 3.3926996878307326e-06, + "loss": 1.5424, + "step": 122724 + }, + { + "epoch": 1.48, + "grad_norm": 2.4725146820266666, + "learning_rate": 3.392261201268722e-06, + "loss": 1.251, + "step": 122727 + }, + { + "epoch": 1.48, + "grad_norm": 8.85219531292016, + "learning_rate": 3.3918227372565473e-06, + "loss": 0.9463, + "step": 122730 + }, + { + "epoch": 1.48, + "grad_norm": 8.98065810369493, + "learning_rate": 3.3913842957957e-06, + "loss": 0.995, + "step": 122733 + }, + { + "epoch": 1.48, + "grad_norm": 15.279943570784205, + "learning_rate": 3.3909458768876744e-06, + "loss": 1.2649, + "step": 122736 + }, + { + "epoch": 1.48, + "grad_norm": 4.942774824346156, + "learning_rate": 3.390507480533971e-06, + "loss": 1.162, + "step": 122739 + }, + { + "epoch": 1.48, + "grad_norm": 10.445554976219912, + "learning_rate": 3.3900691067360847e-06, + "loss": 0.998, + "step": 122742 + }, + { + "epoch": 1.48, + "grad_norm": 9.844686343473732, + "learning_rate": 3.389630755495511e-06, + "loss": 0.8914, + "step": 122745 + }, + { + "epoch": 1.48, + "grad_norm": 8.027275211604865, + "learning_rate": 3.389192426813751e-06, + "loss": 0.8695, + "step": 122748 + }, + { + "epoch": 1.48, + "grad_norm": 9.10390640252707, + "learning_rate": 3.3887541206922915e-06, + "loss": 1.439, + "step": 122751 + }, + { + "epoch": 1.48, + "grad_norm": 15.948670071866113, + "learning_rate": 3.3883158371326364e-06, + "loss": 1.2611, + "step": 122754 + }, + { + "epoch": 1.48, + "grad_norm": 14.26036528752079, + "learning_rate": 3.3878775761362747e-06, + "loss": 1.3039, + "step": 122757 + }, + { + "epoch": 1.48, + "grad_norm": 11.786188763515383, + "learning_rate": 3.3874393377047088e-06, + "loss": 1.2434, + "step": 122760 + }, + { + "epoch": 1.48, + "grad_norm": 4.8920757190416335, + "learning_rate": 3.387001121839427e-06, + "loss": 1.2921, + "step": 122763 + }, + { + "epoch": 1.48, + "grad_norm": 9.785027901354983, + "learning_rate": 3.3865629285419275e-06, + "loss": 1.6403, + "step": 122766 + }, + { + "epoch": 1.48, + "grad_norm": 61.63470279704222, + "learning_rate": 3.386124757813708e-06, + "loss": 1.3294, + "step": 122769 + }, + { + "epoch": 1.48, + "grad_norm": 11.658059798467903, + "learning_rate": 3.3856866096562647e-06, + "loss": 0.9683, + "step": 122772 + }, + { + "epoch": 1.48, + "grad_norm": 6.169110073024247, + "learning_rate": 3.385248484071091e-06, + "loss": 1.2901, + "step": 122775 + }, + { + "epoch": 1.48, + "grad_norm": 5.641390612577935, + "learning_rate": 3.384810381059679e-06, + "loss": 1.3394, + "step": 122778 + }, + { + "epoch": 1.48, + "grad_norm": 11.675918772465621, + "learning_rate": 3.384372300623525e-06, + "loss": 1.1303, + "step": 122781 + }, + { + "epoch": 1.48, + "grad_norm": 9.733205284663411, + "learning_rate": 3.3839342427641265e-06, + "loss": 1.2538, + "step": 122784 + }, + { + "epoch": 1.48, + "grad_norm": 7.0476157614958295, + "learning_rate": 3.38349620748298e-06, + "loss": 1.2638, + "step": 122787 + }, + { + "epoch": 1.48, + "grad_norm": 9.867686227475334, + "learning_rate": 3.3830581947815743e-06, + "loss": 1.0594, + "step": 122790 + }, + { + "epoch": 1.48, + "grad_norm": 15.352542073676542, + "learning_rate": 3.3826202046614074e-06, + "loss": 1.1655, + "step": 122793 + }, + { + "epoch": 1.48, + "grad_norm": 8.0388351026268, + "learning_rate": 3.3821822371239788e-06, + "loss": 1.1553, + "step": 122796 + }, + { + "epoch": 1.48, + "grad_norm": 7.483520234293089, + "learning_rate": 3.381744292170773e-06, + "loss": 1.0051, + "step": 122799 + }, + { + "epoch": 1.48, + "grad_norm": 8.689755913249174, + "learning_rate": 3.3813063698032954e-06, + "loss": 1.0688, + "step": 122802 + }, + { + "epoch": 1.48, + "grad_norm": 3.8438965774278095, + "learning_rate": 3.380868470023031e-06, + "loss": 1.3949, + "step": 122805 + }, + { + "epoch": 1.48, + "grad_norm": 3.162741541394708, + "learning_rate": 3.3804305928314783e-06, + "loss": 1.2256, + "step": 122808 + }, + { + "epoch": 1.48, + "grad_norm": 5.32175839333513, + "learning_rate": 3.3799927382301325e-06, + "loss": 1.1934, + "step": 122811 + }, + { + "epoch": 1.48, + "grad_norm": 8.032466852541804, + "learning_rate": 3.379554906220489e-06, + "loss": 1.5837, + "step": 122814 + }, + { + "epoch": 1.48, + "grad_norm": 16.93829972618917, + "learning_rate": 3.37911709680404e-06, + "loss": 1.02, + "step": 122817 + }, + { + "epoch": 1.48, + "grad_norm": 25.526234477918823, + "learning_rate": 3.3786793099822767e-06, + "loss": 1.0088, + "step": 122820 + }, + { + "epoch": 1.48, + "grad_norm": 8.053322557613884, + "learning_rate": 3.3782415457566964e-06, + "loss": 1.1976, + "step": 122823 + }, + { + "epoch": 1.48, + "grad_norm": 12.573114158592945, + "learning_rate": 3.377803804128792e-06, + "loss": 1.1393, + "step": 122826 + }, + { + "epoch": 1.48, + "grad_norm": 7.691253277803698, + "learning_rate": 3.377366085100061e-06, + "loss": 1.1284, + "step": 122829 + }, + { + "epoch": 1.48, + "grad_norm": 7.564982412924947, + "learning_rate": 3.3769283886719905e-06, + "loss": 0.91, + "step": 122832 + }, + { + "epoch": 1.48, + "grad_norm": 4.879759307147555, + "learning_rate": 3.376490714846078e-06, + "loss": 0.7744, + "step": 122835 + }, + { + "epoch": 1.48, + "grad_norm": 12.615327139861817, + "learning_rate": 3.376053063623821e-06, + "loss": 0.9392, + "step": 122838 + }, + { + "epoch": 1.48, + "grad_norm": 3.1588647200951114, + "learning_rate": 3.375615435006708e-06, + "loss": 1.2154, + "step": 122841 + }, + { + "epoch": 1.48, + "grad_norm": 6.95526896916517, + "learning_rate": 3.375177828996231e-06, + "loss": 1.0306, + "step": 122844 + }, + { + "epoch": 1.48, + "grad_norm": 5.681767089097374, + "learning_rate": 3.3747402455938847e-06, + "loss": 0.857, + "step": 122847 + }, + { + "epoch": 1.48, + "grad_norm": 16.334974510271405, + "learning_rate": 3.374302684801165e-06, + "loss": 1.4023, + "step": 122850 + }, + { + "epoch": 1.48, + "grad_norm": 12.249508880263605, + "learning_rate": 3.3738651466195627e-06, + "loss": 1.3372, + "step": 122853 + }, + { + "epoch": 1.48, + "grad_norm": 19.321521911627002, + "learning_rate": 3.373427631050575e-06, + "loss": 0.8775, + "step": 122856 + }, + { + "epoch": 1.48, + "grad_norm": 4.13937149868555, + "learning_rate": 3.3729901380956933e-06, + "loss": 1.0016, + "step": 122859 + }, + { + "epoch": 1.48, + "grad_norm": 6.749753644159911, + "learning_rate": 3.3725526677564047e-06, + "loss": 1.33, + "step": 122862 + }, + { + "epoch": 1.48, + "grad_norm": 3.9738217293351874, + "learning_rate": 3.3721152200342066e-06, + "loss": 1.2553, + "step": 122865 + }, + { + "epoch": 1.48, + "grad_norm": 11.65590901801321, + "learning_rate": 3.3716777949305957e-06, + "loss": 1.0255, + "step": 122868 + }, + { + "epoch": 1.48, + "grad_norm": 4.455515188747611, + "learning_rate": 3.371240392447057e-06, + "loss": 0.9521, + "step": 122871 + }, + { + "epoch": 1.48, + "grad_norm": 14.633643535900145, + "learning_rate": 3.3708030125850875e-06, + "loss": 0.8042, + "step": 122874 + }, + { + "epoch": 1.48, + "grad_norm": 22.46043208100462, + "learning_rate": 3.3703656553461807e-06, + "loss": 1.2747, + "step": 122877 + }, + { + "epoch": 1.48, + "grad_norm": 6.956805378932743, + "learning_rate": 3.36992832073183e-06, + "loss": 1.4272, + "step": 122880 + }, + { + "epoch": 1.48, + "grad_norm": 8.05153356282158, + "learning_rate": 3.369491008743526e-06, + "loss": 1.0542, + "step": 122883 + }, + { + "epoch": 1.48, + "grad_norm": 4.953628135650234, + "learning_rate": 3.3690537193827577e-06, + "loss": 1.1132, + "step": 122886 + }, + { + "epoch": 1.48, + "grad_norm": 2.8282620446174067, + "learning_rate": 3.36861645265102e-06, + "loss": 1.3051, + "step": 122889 + }, + { + "epoch": 1.48, + "grad_norm": 6.204525071679592, + "learning_rate": 3.3681792085498067e-06, + "loss": 1.2976, + "step": 122892 + }, + { + "epoch": 1.48, + "grad_norm": 10.136080750359552, + "learning_rate": 3.3677419870806116e-06, + "loss": 1.2261, + "step": 122895 + }, + { + "epoch": 1.48, + "grad_norm": 8.499919981893731, + "learning_rate": 3.3673047882449208e-06, + "loss": 1.1261, + "step": 122898 + }, + { + "epoch": 1.48, + "grad_norm": 8.230796988681792, + "learning_rate": 3.3668676120442335e-06, + "loss": 1.3155, + "step": 122901 + }, + { + "epoch": 1.48, + "grad_norm": 10.792234560594196, + "learning_rate": 3.3664304584800346e-06, + "loss": 1.3282, + "step": 122904 + }, + { + "epoch": 1.48, + "grad_norm": 6.766461812431365, + "learning_rate": 3.365993327553818e-06, + "loss": 1.0772, + "step": 122907 + }, + { + "epoch": 1.48, + "grad_norm": 8.487891604991212, + "learning_rate": 3.365556219267081e-06, + "loss": 1.4732, + "step": 122910 + }, + { + "epoch": 1.48, + "grad_norm": 3.835777048048021, + "learning_rate": 3.365119133621307e-06, + "loss": 1.2739, + "step": 122913 + }, + { + "epoch": 1.48, + "grad_norm": 2.3358053942314307, + "learning_rate": 3.3646820706179916e-06, + "loss": 1.2376, + "step": 122916 + }, + { + "epoch": 1.48, + "grad_norm": 2.9083916047508653, + "learning_rate": 3.364245030258627e-06, + "loss": 1.2116, + "step": 122919 + }, + { + "epoch": 1.48, + "grad_norm": 7.367212449234119, + "learning_rate": 3.363808012544707e-06, + "loss": 0.9932, + "step": 122922 + }, + { + "epoch": 1.48, + "grad_norm": 9.990674303866959, + "learning_rate": 3.363371017477719e-06, + "loss": 1.5331, + "step": 122925 + }, + { + "epoch": 1.48, + "grad_norm": 39.651370904676895, + "learning_rate": 3.362934045059153e-06, + "loss": 1.3645, + "step": 122928 + }, + { + "epoch": 1.48, + "grad_norm": 6.127803225334605, + "learning_rate": 3.3624970952905024e-06, + "loss": 1.0092, + "step": 122931 + }, + { + "epoch": 1.48, + "grad_norm": 3.869989670917689, + "learning_rate": 3.362060168173258e-06, + "loss": 0.9836, + "step": 122934 + }, + { + "epoch": 1.48, + "grad_norm": 3.672111807965422, + "learning_rate": 3.361623263708914e-06, + "loss": 1.2948, + "step": 122937 + }, + { + "epoch": 1.48, + "grad_norm": 7.436762786811721, + "learning_rate": 3.3611863818989566e-06, + "loss": 1.1431, + "step": 122940 + }, + { + "epoch": 1.48, + "grad_norm": 5.76771518972643, + "learning_rate": 3.360749522744877e-06, + "loss": 1.2849, + "step": 122943 + }, + { + "epoch": 1.48, + "grad_norm": 7.754371914675259, + "learning_rate": 3.360312686248173e-06, + "loss": 1.2575, + "step": 122946 + }, + { + "epoch": 1.48, + "grad_norm": 5.58989557004277, + "learning_rate": 3.35987587241033e-06, + "loss": 0.9296, + "step": 122949 + }, + { + "epoch": 1.48, + "grad_norm": 8.71500786723199, + "learning_rate": 3.3594390812328347e-06, + "loss": 1.3054, + "step": 122952 + }, + { + "epoch": 1.48, + "grad_norm": 2.8656492235535063, + "learning_rate": 3.3590023127171822e-06, + "loss": 1.0096, + "step": 122955 + }, + { + "epoch": 1.48, + "grad_norm": 3.458253969259229, + "learning_rate": 3.358565566864862e-06, + "loss": 0.865, + "step": 122958 + }, + { + "epoch": 1.48, + "grad_norm": 10.828001791262945, + "learning_rate": 3.358128843677366e-06, + "loss": 1.349, + "step": 122961 + }, + { + "epoch": 1.48, + "grad_norm": 10.89011999388024, + "learning_rate": 3.357692143156187e-06, + "loss": 1.1984, + "step": 122964 + }, + { + "epoch": 1.48, + "grad_norm": 4.5866236279774775, + "learning_rate": 3.357255465302813e-06, + "loss": 1.1506, + "step": 122967 + }, + { + "epoch": 1.48, + "grad_norm": 9.10604058421341, + "learning_rate": 3.3568188101187295e-06, + "loss": 0.7779, + "step": 122970 + }, + { + "epoch": 1.48, + "grad_norm": 8.761328770328605, + "learning_rate": 3.3563821776054306e-06, + "loss": 1.2635, + "step": 122973 + }, + { + "epoch": 1.48, + "grad_norm": 5.186367951869933, + "learning_rate": 3.3559455677644093e-06, + "loss": 1.0613, + "step": 122976 + }, + { + "epoch": 1.48, + "grad_norm": 14.23005018034249, + "learning_rate": 3.3555089805971496e-06, + "loss": 1.2006, + "step": 122979 + }, + { + "epoch": 1.48, + "grad_norm": 10.250641519605717, + "learning_rate": 3.355072416105144e-06, + "loss": 1.1574, + "step": 122982 + }, + { + "epoch": 1.48, + "grad_norm": 7.301350608086302, + "learning_rate": 3.3546358742898834e-06, + "loss": 1.5102, + "step": 122985 + }, + { + "epoch": 1.48, + "grad_norm": 10.01373611070997, + "learning_rate": 3.3541993551528597e-06, + "loss": 1.2574, + "step": 122988 + }, + { + "epoch": 1.48, + "grad_norm": 6.6991438325851, + "learning_rate": 3.3537628586955607e-06, + "loss": 0.8533, + "step": 122991 + }, + { + "epoch": 1.48, + "grad_norm": 16.845729100826926, + "learning_rate": 3.353326384919471e-06, + "loss": 1.2971, + "step": 122994 + }, + { + "epoch": 1.48, + "grad_norm": 13.828383919899938, + "learning_rate": 3.352889933826083e-06, + "loss": 1.2787, + "step": 122997 + }, + { + "epoch": 1.48, + "grad_norm": 7.578130321038824, + "learning_rate": 3.3524535054168896e-06, + "loss": 1.4554, + "step": 123000 + }, + { + "epoch": 1.48, + "grad_norm": 11.541801964525332, + "learning_rate": 3.35201709969338e-06, + "loss": 1.3679, + "step": 123003 + }, + { + "epoch": 1.48, + "grad_norm": 7.208422171391152, + "learning_rate": 3.351580716657039e-06, + "loss": 1.5614, + "step": 123006 + }, + { + "epoch": 1.48, + "grad_norm": 4.862760383388693, + "learning_rate": 3.3511443563093605e-06, + "loss": 1.0736, + "step": 123009 + }, + { + "epoch": 1.48, + "grad_norm": 11.671754315107941, + "learning_rate": 3.350708018651829e-06, + "loss": 1.2405, + "step": 123012 + }, + { + "epoch": 1.48, + "grad_norm": 11.210702087643599, + "learning_rate": 3.3502717036859356e-06, + "loss": 1.1422, + "step": 123015 + }, + { + "epoch": 1.48, + "grad_norm": 11.875853573909191, + "learning_rate": 3.3498354114131724e-06, + "loss": 1.2155, + "step": 123018 + }, + { + "epoch": 1.48, + "grad_norm": 28.411807842149354, + "learning_rate": 3.3493991418350235e-06, + "loss": 1.1709, + "step": 123021 + }, + { + "epoch": 1.48, + "grad_norm": 12.921734692551746, + "learning_rate": 3.3489628949529797e-06, + "loss": 1.2016, + "step": 123024 + }, + { + "epoch": 1.48, + "grad_norm": 7.9301169990370655, + "learning_rate": 3.3485266707685284e-06, + "loss": 0.8797, + "step": 123027 + }, + { + "epoch": 1.48, + "grad_norm": 8.547613124473388, + "learning_rate": 3.3480904692831638e-06, + "loss": 1.1124, + "step": 123030 + }, + { + "epoch": 1.48, + "grad_norm": 4.555408440839605, + "learning_rate": 3.347654290498371e-06, + "loss": 0.9522, + "step": 123033 + }, + { + "epoch": 1.48, + "grad_norm": 80.06471788363086, + "learning_rate": 3.3472181344156342e-06, + "loss": 0.9287, + "step": 123036 + }, + { + "epoch": 1.48, + "grad_norm": 6.232283408601149, + "learning_rate": 3.346782001036445e-06, + "loss": 0.9634, + "step": 123039 + }, + { + "epoch": 1.48, + "grad_norm": 10.05684437485262, + "learning_rate": 3.3463458903622936e-06, + "loss": 1.0892, + "step": 123042 + }, + { + "epoch": 1.48, + "grad_norm": 4.217090998046899, + "learning_rate": 3.345909802394669e-06, + "loss": 1.039, + "step": 123045 + }, + { + "epoch": 1.48, + "grad_norm": 8.942013659026195, + "learning_rate": 3.3454737371350555e-06, + "loss": 1.1249, + "step": 123048 + }, + { + "epoch": 1.48, + "grad_norm": 6.360577182521289, + "learning_rate": 3.345037694584945e-06, + "loss": 1.016, + "step": 123051 + }, + { + "epoch": 1.48, + "grad_norm": 29.332077350965957, + "learning_rate": 3.344601674745822e-06, + "loss": 1.1936, + "step": 123054 + }, + { + "epoch": 1.48, + "grad_norm": 4.3753925727262795, + "learning_rate": 3.344165677619179e-06, + "loss": 0.9674, + "step": 123057 + }, + { + "epoch": 1.48, + "grad_norm": 8.697134881989253, + "learning_rate": 3.3437297032064974e-06, + "loss": 1.2729, + "step": 123060 + }, + { + "epoch": 1.48, + "grad_norm": 8.078764113185413, + "learning_rate": 3.343293751509269e-06, + "loss": 1.195, + "step": 123063 + }, + { + "epoch": 1.48, + "grad_norm": 6.460456864884522, + "learning_rate": 3.342857822528981e-06, + "loss": 1.1599, + "step": 123066 + }, + { + "epoch": 1.48, + "grad_norm": 9.604125842871913, + "learning_rate": 3.342421916267121e-06, + "loss": 1.1672, + "step": 123069 + }, + { + "epoch": 1.48, + "grad_norm": 23.878516652248283, + "learning_rate": 3.3419860327251806e-06, + "loss": 1.2093, + "step": 123072 + }, + { + "epoch": 1.48, + "grad_norm": 10.978843299135166, + "learning_rate": 3.341550171904644e-06, + "loss": 1.4589, + "step": 123075 + }, + { + "epoch": 1.48, + "grad_norm": 21.758380979547766, + "learning_rate": 3.3411143338069953e-06, + "loss": 1.0761, + "step": 123078 + }, + { + "epoch": 1.48, + "grad_norm": 8.279539926293749, + "learning_rate": 3.3406785184337243e-06, + "loss": 0.801, + "step": 123081 + }, + { + "epoch": 1.48, + "grad_norm": 5.382179230202597, + "learning_rate": 3.3402427257863224e-06, + "loss": 1.13, + "step": 123084 + }, + { + "epoch": 1.48, + "grad_norm": 3.0826353769388026, + "learning_rate": 3.339806955866269e-06, + "loss": 1.2882, + "step": 123087 + }, + { + "epoch": 1.48, + "grad_norm": 10.603432645226425, + "learning_rate": 3.3393712086750572e-06, + "loss": 1.4939, + "step": 123090 + }, + { + "epoch": 1.48, + "grad_norm": 17.903694648458877, + "learning_rate": 3.3389354842141743e-06, + "loss": 1.1355, + "step": 123093 + }, + { + "epoch": 1.48, + "grad_norm": 11.041414093840327, + "learning_rate": 3.3384997824851027e-06, + "loss": 1.1409, + "step": 123096 + }, + { + "epoch": 1.48, + "grad_norm": 8.445422405770108, + "learning_rate": 3.3380641034893345e-06, + "loss": 1.2031, + "step": 123099 + }, + { + "epoch": 1.48, + "grad_norm": 3.880359621452655, + "learning_rate": 3.3376284472283516e-06, + "loss": 0.8942, + "step": 123102 + }, + { + "epoch": 1.48, + "grad_norm": 13.998382965312452, + "learning_rate": 3.3371928137036426e-06, + "loss": 1.2122, + "step": 123105 + }, + { + "epoch": 1.48, + "grad_norm": 12.294131932444722, + "learning_rate": 3.3367572029166937e-06, + "loss": 1.1276, + "step": 123108 + }, + { + "epoch": 1.48, + "grad_norm": 10.540506728698192, + "learning_rate": 3.336321614868997e-06, + "loss": 0.9622, + "step": 123111 + }, + { + "epoch": 1.48, + "grad_norm": 10.081502444081732, + "learning_rate": 3.3358860495620303e-06, + "loss": 1.3025, + "step": 123114 + }, + { + "epoch": 1.48, + "grad_norm": 3.911402612539532, + "learning_rate": 3.3354505069972876e-06, + "loss": 0.9979, + "step": 123117 + }, + { + "epoch": 1.48, + "grad_norm": 2.859341820615326, + "learning_rate": 3.335014987176248e-06, + "loss": 1.364, + "step": 123120 + }, + { + "epoch": 1.48, + "grad_norm": 7.263513601772533, + "learning_rate": 3.3345794901004024e-06, + "loss": 1.298, + "step": 123123 + }, + { + "epoch": 1.48, + "grad_norm": 11.157447326043158, + "learning_rate": 3.334144015771239e-06, + "loss": 1.5625, + "step": 123126 + }, + { + "epoch": 1.48, + "grad_norm": 7.562571893691976, + "learning_rate": 3.3337085641902378e-06, + "loss": 1.2554, + "step": 123129 + }, + { + "epoch": 1.48, + "grad_norm": 8.44001000820222, + "learning_rate": 3.333273135358888e-06, + "loss": 0.8371, + "step": 123132 + }, + { + "epoch": 1.48, + "grad_norm": 5.734882336572326, + "learning_rate": 3.3328377292786785e-06, + "loss": 1.5213, + "step": 123135 + }, + { + "epoch": 1.48, + "grad_norm": 14.124767772044422, + "learning_rate": 3.3324023459510935e-06, + "loss": 1.2581, + "step": 123138 + }, + { + "epoch": 1.48, + "grad_norm": 2.3901153213923, + "learning_rate": 3.3319669853776137e-06, + "loss": 0.9854, + "step": 123141 + }, + { + "epoch": 1.48, + "grad_norm": 7.753172246916766, + "learning_rate": 3.331531647559728e-06, + "loss": 1.2134, + "step": 123144 + }, + { + "epoch": 1.48, + "grad_norm": 9.614437400761574, + "learning_rate": 3.3310963324989244e-06, + "loss": 1.0499, + "step": 123147 + }, + { + "epoch": 1.48, + "grad_norm": 3.96062876517125, + "learning_rate": 3.3306610401966856e-06, + "loss": 0.9806, + "step": 123150 + }, + { + "epoch": 1.48, + "grad_norm": 16.032766412278576, + "learning_rate": 3.3302257706545015e-06, + "loss": 1.1799, + "step": 123153 + }, + { + "epoch": 1.48, + "grad_norm": 16.60960609042562, + "learning_rate": 3.3297905238738525e-06, + "loss": 1.1042, + "step": 123156 + }, + { + "epoch": 1.48, + "grad_norm": 15.178323536067513, + "learning_rate": 3.3293552998562283e-06, + "loss": 1.1022, + "step": 123159 + }, + { + "epoch": 1.48, + "grad_norm": 15.777993385242464, + "learning_rate": 3.3289200986031076e-06, + "loss": 1.0891, + "step": 123162 + }, + { + "epoch": 1.48, + "grad_norm": 4.01735950535518, + "learning_rate": 3.328484920115984e-06, + "loss": 1.2223, + "step": 123165 + }, + { + "epoch": 1.48, + "grad_norm": 17.388942150986885, + "learning_rate": 3.328049764396335e-06, + "loss": 0.8931, + "step": 123168 + }, + { + "epoch": 1.48, + "grad_norm": 9.240421640848007, + "learning_rate": 3.3276146314456494e-06, + "loss": 1.2702, + "step": 123171 + }, + { + "epoch": 1.48, + "grad_norm": 10.17021583839429, + "learning_rate": 3.32717952126541e-06, + "loss": 1.5359, + "step": 123174 + }, + { + "epoch": 1.48, + "grad_norm": 6.327112382244511, + "learning_rate": 3.326744433857109e-06, + "loss": 0.9482, + "step": 123177 + }, + { + "epoch": 1.48, + "grad_norm": 21.167603806562767, + "learning_rate": 3.3263093692222236e-06, + "loss": 1.0775, + "step": 123180 + }, + { + "epoch": 1.48, + "grad_norm": 11.873967062117263, + "learning_rate": 3.3258743273622374e-06, + "loss": 1.0135, + "step": 123183 + }, + { + "epoch": 1.48, + "grad_norm": 10.328736100753117, + "learning_rate": 3.3254393082786385e-06, + "loss": 0.8408, + "step": 123186 + }, + { + "epoch": 1.48, + "grad_norm": 33.61429094967718, + "learning_rate": 3.3250043119729114e-06, + "loss": 1.0097, + "step": 123189 + }, + { + "epoch": 1.48, + "grad_norm": 16.642949435341116, + "learning_rate": 3.3245693384465438e-06, + "loss": 0.8878, + "step": 123192 + }, + { + "epoch": 1.48, + "grad_norm": 6.487982037616667, + "learning_rate": 3.3241343877010125e-06, + "loss": 1.1533, + "step": 123195 + }, + { + "epoch": 1.48, + "grad_norm": 5.021512864089832, + "learning_rate": 3.323699459737806e-06, + "loss": 1.0127, + "step": 123198 + }, + { + "epoch": 1.48, + "grad_norm": 16.419026758261992, + "learning_rate": 3.323264554558412e-06, + "loss": 1.1306, + "step": 123201 + }, + { + "epoch": 1.48, + "grad_norm": 11.835803217389259, + "learning_rate": 3.3228296721643073e-06, + "loss": 1.2717, + "step": 123204 + }, + { + "epoch": 1.48, + "grad_norm": 6.3789158381369315, + "learning_rate": 3.322394812556984e-06, + "loss": 0.9853, + "step": 123207 + }, + { + "epoch": 1.48, + "grad_norm": 2.5312255055942945, + "learning_rate": 3.3219599757379185e-06, + "loss": 1.1393, + "step": 123210 + }, + { + "epoch": 1.48, + "grad_norm": 2.806618858647312, + "learning_rate": 3.3215251617085977e-06, + "loss": 1.255, + "step": 123213 + }, + { + "epoch": 1.48, + "grad_norm": 4.49244260886006, + "learning_rate": 3.3210903704705055e-06, + "loss": 1.1458, + "step": 123216 + }, + { + "epoch": 1.48, + "grad_norm": 9.015322707186836, + "learning_rate": 3.32065560202513e-06, + "loss": 1.429, + "step": 123219 + }, + { + "epoch": 1.48, + "grad_norm": 14.440600234197987, + "learning_rate": 3.320220856373948e-06, + "loss": 1.2981, + "step": 123222 + }, + { + "epoch": 1.48, + "grad_norm": 5.454705653325159, + "learning_rate": 3.319786133518449e-06, + "loss": 1.3612, + "step": 123225 + }, + { + "epoch": 1.48, + "grad_norm": 11.079473575300613, + "learning_rate": 3.3193514334601107e-06, + "loss": 1.0001, + "step": 123228 + }, + { + "epoch": 1.48, + "grad_norm": 11.77265277597578, + "learning_rate": 3.3189167562004198e-06, + "loss": 1.1609, + "step": 123231 + }, + { + "epoch": 1.48, + "grad_norm": 7.777320686627181, + "learning_rate": 3.318482101740863e-06, + "loss": 1.0979, + "step": 123234 + }, + { + "epoch": 1.48, + "grad_norm": 15.363118212476515, + "learning_rate": 3.318047470082916e-06, + "loss": 1.2402, + "step": 123237 + }, + { + "epoch": 1.48, + "grad_norm": 22.049857621273965, + "learning_rate": 3.3176128612280668e-06, + "loss": 1.3058, + "step": 123240 + }, + { + "epoch": 1.48, + "grad_norm": 6.683133073703427, + "learning_rate": 3.3171782751778005e-06, + "loss": 1.3942, + "step": 123243 + }, + { + "epoch": 1.48, + "grad_norm": 6.079015545026191, + "learning_rate": 3.3167437119335987e-06, + "loss": 1.0702, + "step": 123246 + }, + { + "epoch": 1.48, + "grad_norm": 17.608313915548216, + "learning_rate": 3.316309171496939e-06, + "loss": 1.1864, + "step": 123249 + }, + { + "epoch": 1.48, + "grad_norm": 9.189239167263764, + "learning_rate": 3.315874653869309e-06, + "loss": 1.0701, + "step": 123252 + }, + { + "epoch": 1.48, + "grad_norm": 10.568696958023123, + "learning_rate": 3.3154401590521914e-06, + "loss": 1.6283, + "step": 123255 + }, + { + "epoch": 1.48, + "grad_norm": 6.458400190884881, + "learning_rate": 3.3150056870470684e-06, + "loss": 1.4763, + "step": 123258 + }, + { + "epoch": 1.48, + "grad_norm": 11.130007684627609, + "learning_rate": 3.3145712378554264e-06, + "loss": 1.3172, + "step": 123261 + }, + { + "epoch": 1.48, + "grad_norm": 9.791226377393441, + "learning_rate": 3.314136811478742e-06, + "loss": 1.1764, + "step": 123264 + }, + { + "epoch": 1.48, + "grad_norm": 7.297164090249992, + "learning_rate": 3.313702407918503e-06, + "loss": 1.2985, + "step": 123267 + }, + { + "epoch": 1.48, + "grad_norm": 10.123405018836142, + "learning_rate": 3.3132680271761865e-06, + "loss": 1.1508, + "step": 123270 + }, + { + "epoch": 1.48, + "grad_norm": 5.565858091416479, + "learning_rate": 3.31283366925328e-06, + "loss": 1.1638, + "step": 123273 + }, + { + "epoch": 1.48, + "grad_norm": 5.944626032514469, + "learning_rate": 3.3123993341512616e-06, + "loss": 1.028, + "step": 123276 + }, + { + "epoch": 1.48, + "grad_norm": 12.722783380397418, + "learning_rate": 3.311965021871615e-06, + "loss": 0.9545, + "step": 123279 + }, + { + "epoch": 1.48, + "grad_norm": 8.401582457794767, + "learning_rate": 3.311530732415822e-06, + "loss": 1.0613, + "step": 123282 + }, + { + "epoch": 1.48, + "grad_norm": 11.621742100750977, + "learning_rate": 3.3110964657853695e-06, + "loss": 1.0969, + "step": 123285 + }, + { + "epoch": 1.48, + "grad_norm": 4.582617188666149, + "learning_rate": 3.3106622219817353e-06, + "loss": 1.331, + "step": 123288 + }, + { + "epoch": 1.48, + "grad_norm": 17.845886071866897, + "learning_rate": 3.310228001006398e-06, + "loss": 1.0741, + "step": 123291 + }, + { + "epoch": 1.48, + "grad_norm": 11.888878108699483, + "learning_rate": 3.3097938028608424e-06, + "loss": 1.0549, + "step": 123294 + }, + { + "epoch": 1.48, + "grad_norm": 2.7534882567486574, + "learning_rate": 3.309359627546551e-06, + "loss": 1.4847, + "step": 123297 + }, + { + "epoch": 1.48, + "grad_norm": 7.378218904119827, + "learning_rate": 3.308925475065009e-06, + "loss": 1.3348, + "step": 123300 + }, + { + "epoch": 1.48, + "grad_norm": 5.566960367947172, + "learning_rate": 3.30849134541769e-06, + "loss": 1.1634, + "step": 123303 + }, + { + "epoch": 1.48, + "grad_norm": 4.008818628565008, + "learning_rate": 3.3080572386060815e-06, + "loss": 1.0698, + "step": 123306 + }, + { + "epoch": 1.48, + "grad_norm": 11.846019208703245, + "learning_rate": 3.307623154631665e-06, + "loss": 0.8569, + "step": 123309 + }, + { + "epoch": 1.48, + "grad_norm": 6.024797411389135, + "learning_rate": 3.307189093495917e-06, + "loss": 1.0874, + "step": 123312 + }, + { + "epoch": 1.48, + "grad_norm": 2.7555609487768167, + "learning_rate": 3.3067550552003258e-06, + "loss": 1.0469, + "step": 123315 + }, + { + "epoch": 1.48, + "grad_norm": 19.841591222576895, + "learning_rate": 3.3063210397463654e-06, + "loss": 1.3904, + "step": 123318 + }, + { + "epoch": 1.48, + "grad_norm": 9.639389230127874, + "learning_rate": 3.30588704713552e-06, + "loss": 1.1793, + "step": 123321 + }, + { + "epoch": 1.48, + "grad_norm": 5.979630793332907, + "learning_rate": 3.3054530773692707e-06, + "loss": 0.9725, + "step": 123324 + }, + { + "epoch": 1.48, + "grad_norm": 8.747766743797197, + "learning_rate": 3.305019130449102e-06, + "loss": 1.0293, + "step": 123327 + }, + { + "epoch": 1.48, + "grad_norm": 7.849951996180448, + "learning_rate": 3.3045852063764917e-06, + "loss": 1.2818, + "step": 123330 + }, + { + "epoch": 1.48, + "grad_norm": 5.869460822339639, + "learning_rate": 3.3041513051529173e-06, + "loss": 0.9031, + "step": 123333 + }, + { + "epoch": 1.48, + "grad_norm": 4.988434909517488, + "learning_rate": 3.303717426779862e-06, + "loss": 0.9173, + "step": 123336 + }, + { + "epoch": 1.48, + "grad_norm": 5.441632287499224, + "learning_rate": 3.3032835712588075e-06, + "loss": 1.1651, + "step": 123339 + }, + { + "epoch": 1.48, + "grad_norm": 11.225342306361899, + "learning_rate": 3.3028497385912372e-06, + "loss": 1.1106, + "step": 123342 + }, + { + "epoch": 1.48, + "grad_norm": 16.67911273606592, + "learning_rate": 3.302415928778625e-06, + "loss": 1.5458, + "step": 123345 + }, + { + "epoch": 1.48, + "grad_norm": 6.039048798016071, + "learning_rate": 3.3019821418224552e-06, + "loss": 1.2022, + "step": 123348 + }, + { + "epoch": 1.48, + "grad_norm": 6.271974587145869, + "learning_rate": 3.3015483777242107e-06, + "loss": 1.0737, + "step": 123351 + }, + { + "epoch": 1.48, + "grad_norm": 10.490554432148286, + "learning_rate": 3.3011146364853696e-06, + "loss": 0.9251, + "step": 123354 + }, + { + "epoch": 1.48, + "grad_norm": 4.69592759409399, + "learning_rate": 3.300680918107407e-06, + "loss": 1.0953, + "step": 123357 + }, + { + "epoch": 1.48, + "grad_norm": 5.9150435843423255, + "learning_rate": 3.300247222591808e-06, + "loss": 1.193, + "step": 123360 + }, + { + "epoch": 1.48, + "grad_norm": 5.516526719115875, + "learning_rate": 3.299813549940052e-06, + "loss": 1.1684, + "step": 123363 + }, + { + "epoch": 1.48, + "grad_norm": 19.249057419675292, + "learning_rate": 3.299379900153619e-06, + "loss": 1.1324, + "step": 123366 + }, + { + "epoch": 1.48, + "grad_norm": 5.026834354175029, + "learning_rate": 3.2989462732339906e-06, + "loss": 0.9687, + "step": 123369 + }, + { + "epoch": 1.48, + "grad_norm": 10.522221562865017, + "learning_rate": 3.2985126691826465e-06, + "loss": 0.8513, + "step": 123372 + }, + { + "epoch": 1.48, + "grad_norm": 3.1317990669432834, + "learning_rate": 3.2980790880010615e-06, + "loss": 1.4451, + "step": 123375 + }, + { + "epoch": 1.48, + "grad_norm": 8.535101425805681, + "learning_rate": 3.297645529690717e-06, + "loss": 0.9194, + "step": 123378 + }, + { + "epoch": 1.48, + "grad_norm": 6.440954677391831, + "learning_rate": 3.297211994253099e-06, + "loss": 1.2071, + "step": 123381 + }, + { + "epoch": 1.48, + "grad_norm": 2.532032650072123, + "learning_rate": 3.2967784816896785e-06, + "loss": 1.6036, + "step": 123384 + }, + { + "epoch": 1.48, + "grad_norm": 10.58498104671882, + "learning_rate": 3.2963449920019387e-06, + "loss": 1.074, + "step": 123387 + }, + { + "epoch": 1.48, + "grad_norm": 8.606740188891248, + "learning_rate": 3.2959115251913586e-06, + "loss": 1.2034, + "step": 123390 + }, + { + "epoch": 1.48, + "grad_norm": 6.042096509313448, + "learning_rate": 3.2954780812594213e-06, + "loss": 1.2179, + "step": 123393 + }, + { + "epoch": 1.48, + "grad_norm": 4.308514931456829, + "learning_rate": 3.2950446602076014e-06, + "loss": 0.9208, + "step": 123396 + }, + { + "epoch": 1.48, + "grad_norm": 9.970716634974144, + "learning_rate": 3.294611262037376e-06, + "loss": 1.5062, + "step": 123399 + }, + { + "epoch": 1.48, + "grad_norm": 10.002144849236222, + "learning_rate": 3.294177886750227e-06, + "loss": 1.114, + "step": 123402 + }, + { + "epoch": 1.48, + "grad_norm": 13.594195935527427, + "learning_rate": 3.293744534347635e-06, + "loss": 1.0396, + "step": 123405 + }, + { + "epoch": 1.48, + "grad_norm": 4.744864139007902, + "learning_rate": 3.293311204831079e-06, + "loss": 1.3081, + "step": 123408 + }, + { + "epoch": 1.48, + "grad_norm": 9.167389934953851, + "learning_rate": 3.2928778982020325e-06, + "loss": 1.1692, + "step": 123411 + }, + { + "epoch": 1.48, + "grad_norm": 4.131342781063533, + "learning_rate": 3.2924446144619826e-06, + "loss": 1.4047, + "step": 123414 + }, + { + "epoch": 1.48, + "grad_norm": 2.321234441643173, + "learning_rate": 3.292011353612399e-06, + "loss": 1.3112, + "step": 123417 + }, + { + "epoch": 1.48, + "grad_norm": 7.766331852742458, + "learning_rate": 3.2915781156547644e-06, + "loss": 1.2324, + "step": 123420 + }, + { + "epoch": 1.48, + "grad_norm": 12.450966871971344, + "learning_rate": 3.2911449005905606e-06, + "loss": 0.8431, + "step": 123423 + }, + { + "epoch": 1.48, + "grad_norm": 10.080790780000216, + "learning_rate": 3.29071170842126e-06, + "loss": 1.3069, + "step": 123426 + }, + { + "epoch": 1.48, + "grad_norm": 4.631782065982165, + "learning_rate": 3.290278539148343e-06, + "loss": 1.1032, + "step": 123429 + }, + { + "epoch": 1.48, + "grad_norm": 5.280675922221833, + "learning_rate": 3.2898453927732887e-06, + "loss": 0.9526, + "step": 123432 + }, + { + "epoch": 1.48, + "grad_norm": 14.40418478510706, + "learning_rate": 3.2894122692975785e-06, + "loss": 0.7282, + "step": 123435 + }, + { + "epoch": 1.48, + "grad_norm": 4.155158309486822, + "learning_rate": 3.288979168722687e-06, + "loss": 1.1685, + "step": 123438 + }, + { + "epoch": 1.48, + "grad_norm": 17.981427987637506, + "learning_rate": 3.28854609105009e-06, + "loss": 1.3399, + "step": 123441 + }, + { + "epoch": 1.48, + "grad_norm": 2.9965547686670213, + "learning_rate": 3.2881130362812664e-06, + "loss": 1.1201, + "step": 123444 + }, + { + "epoch": 1.48, + "grad_norm": 16.475923884793545, + "learning_rate": 3.287680004417696e-06, + "loss": 1.1579, + "step": 123447 + }, + { + "epoch": 1.48, + "grad_norm": 12.806191471156977, + "learning_rate": 3.2872469954608587e-06, + "loss": 1.4739, + "step": 123450 + }, + { + "epoch": 1.48, + "grad_norm": 12.473687411741748, + "learning_rate": 3.286814009412227e-06, + "loss": 1.4334, + "step": 123453 + }, + { + "epoch": 1.48, + "grad_norm": 5.9162573525622975, + "learning_rate": 3.286381046273284e-06, + "loss": 0.8961, + "step": 123456 + }, + { + "epoch": 1.48, + "grad_norm": 18.04905431069571, + "learning_rate": 3.2859481060455e-06, + "loss": 1.5136, + "step": 123459 + }, + { + "epoch": 1.48, + "grad_norm": 14.242057681555528, + "learning_rate": 3.2855151887303615e-06, + "loss": 1.3713, + "step": 123462 + }, + { + "epoch": 1.48, + "grad_norm": 17.784396927141323, + "learning_rate": 3.285082294329337e-06, + "loss": 1.7065, + "step": 123465 + }, + { + "epoch": 1.48, + "grad_norm": 11.701427383976496, + "learning_rate": 3.2846494228439084e-06, + "loss": 1.2131, + "step": 123468 + }, + { + "epoch": 1.48, + "grad_norm": 7.204243320647643, + "learning_rate": 3.2842165742755517e-06, + "loss": 1.1156, + "step": 123471 + }, + { + "epoch": 1.48, + "grad_norm": 83.867058189526, + "learning_rate": 3.2837837486257453e-06, + "loss": 1.4327, + "step": 123474 + }, + { + "epoch": 1.48, + "grad_norm": 4.901105786095444, + "learning_rate": 3.283350945895969e-06, + "loss": 1.1139, + "step": 123477 + }, + { + "epoch": 1.48, + "grad_norm": 5.117344489296958, + "learning_rate": 3.2829181660876975e-06, + "loss": 1.2023, + "step": 123480 + }, + { + "epoch": 1.48, + "grad_norm": 5.047546543221613, + "learning_rate": 3.2824854092024017e-06, + "loss": 1.1974, + "step": 123483 + }, + { + "epoch": 1.48, + "grad_norm": 18.647038707969333, + "learning_rate": 3.2820526752415647e-06, + "loss": 1.0009, + "step": 123486 + }, + { + "epoch": 1.48, + "grad_norm": 3.1480138027403948, + "learning_rate": 3.281619964206666e-06, + "loss": 1.142, + "step": 123489 + }, + { + "epoch": 1.48, + "grad_norm": 4.8873021167346264, + "learning_rate": 3.2811872760991736e-06, + "loss": 0.9964, + "step": 123492 + }, + { + "epoch": 1.48, + "grad_norm": 6.310820248642136, + "learning_rate": 3.28075461092057e-06, + "loss": 1.2303, + "step": 123495 + }, + { + "epoch": 1.49, + "grad_norm": 4.008765144334665, + "learning_rate": 3.2803219686723343e-06, + "loss": 0.9511, + "step": 123498 + }, + { + "epoch": 1.49, + "grad_norm": 27.23716881937251, + "learning_rate": 3.2798893493559346e-06, + "loss": 1.3441, + "step": 123501 + }, + { + "epoch": 1.49, + "grad_norm": 7.004000171081779, + "learning_rate": 3.2794567529728562e-06, + "loss": 1.0974, + "step": 123504 + }, + { + "epoch": 1.49, + "grad_norm": 2.6777336178223794, + "learning_rate": 3.279024179524568e-06, + "loss": 1.1147, + "step": 123507 + }, + { + "epoch": 1.49, + "grad_norm": 5.952004653885897, + "learning_rate": 3.2785916290125486e-06, + "loss": 1.0534, + "step": 123510 + }, + { + "epoch": 1.49, + "grad_norm": 7.159759691718667, + "learning_rate": 3.278159101438275e-06, + "loss": 1.1594, + "step": 123513 + }, + { + "epoch": 1.49, + "grad_norm": 14.294148399173471, + "learning_rate": 3.277726596803228e-06, + "loss": 1.357, + "step": 123516 + }, + { + "epoch": 1.49, + "grad_norm": 5.337086176448787, + "learning_rate": 3.2772941151088745e-06, + "loss": 1.3103, + "step": 123519 + }, + { + "epoch": 1.49, + "grad_norm": 8.19473288637672, + "learning_rate": 3.276861656356698e-06, + "loss": 0.9524, + "step": 123522 + }, + { + "epoch": 1.49, + "grad_norm": 4.640982302343513, + "learning_rate": 3.2764292205481673e-06, + "loss": 1.2297, + "step": 123525 + }, + { + "epoch": 1.49, + "grad_norm": 8.50494201929764, + "learning_rate": 3.2759968076847614e-06, + "loss": 1.3273, + "step": 123528 + }, + { + "epoch": 1.49, + "grad_norm": 2.3910815557036975, + "learning_rate": 3.275564417767961e-06, + "loss": 1.3017, + "step": 123531 + }, + { + "epoch": 1.49, + "grad_norm": 8.042486088880748, + "learning_rate": 3.2751320507992334e-06, + "loss": 1.2712, + "step": 123534 + }, + { + "epoch": 1.49, + "grad_norm": 2.806117684553474, + "learning_rate": 3.274699706780058e-06, + "loss": 1.0404, + "step": 123537 + }, + { + "epoch": 1.49, + "grad_norm": 18.289829603579406, + "learning_rate": 3.274267385711909e-06, + "loss": 1.3127, + "step": 123540 + }, + { + "epoch": 1.49, + "grad_norm": 3.017002663357487, + "learning_rate": 3.273835087596268e-06, + "loss": 1.3602, + "step": 123543 + }, + { + "epoch": 1.49, + "grad_norm": 5.920689793860055, + "learning_rate": 3.2734028124346028e-06, + "loss": 1.1546, + "step": 123546 + }, + { + "epoch": 1.49, + "grad_norm": 4.744919264850895, + "learning_rate": 3.272970560228389e-06, + "loss": 1.1572, + "step": 123549 + }, + { + "epoch": 1.49, + "grad_norm": 4.9420680950890175, + "learning_rate": 3.2725383309791035e-06, + "loss": 1.2215, + "step": 123552 + }, + { + "epoch": 1.49, + "grad_norm": 7.89047542996444, + "learning_rate": 3.2721061246882212e-06, + "loss": 1.3429, + "step": 123555 + }, + { + "epoch": 1.49, + "grad_norm": 13.574511906605967, + "learning_rate": 3.2716739413572207e-06, + "loss": 1.2963, + "step": 123558 + }, + { + "epoch": 1.49, + "grad_norm": 5.905718294423739, + "learning_rate": 3.2712417809875706e-06, + "loss": 1.4436, + "step": 123561 + }, + { + "epoch": 1.49, + "grad_norm": 10.776978182683973, + "learning_rate": 3.2708096435807512e-06, + "loss": 1.1973, + "step": 123564 + }, + { + "epoch": 1.49, + "grad_norm": 5.510752451665643, + "learning_rate": 3.270377529138232e-06, + "loss": 1.0463, + "step": 123567 + }, + { + "epoch": 1.49, + "grad_norm": 6.035911289446425, + "learning_rate": 3.2699454376614935e-06, + "loss": 1.1817, + "step": 123570 + }, + { + "epoch": 1.49, + "grad_norm": 10.022188545262397, + "learning_rate": 3.269513369152003e-06, + "loss": 1.0448, + "step": 123573 + }, + { + "epoch": 1.49, + "grad_norm": 4.379164124611598, + "learning_rate": 3.2690813236112396e-06, + "loss": 1.0679, + "step": 123576 + }, + { + "epoch": 1.49, + "grad_norm": 12.493734370287434, + "learning_rate": 3.2686493010406774e-06, + "loss": 1.4318, + "step": 123579 + }, + { + "epoch": 1.49, + "grad_norm": 47.09146060617613, + "learning_rate": 3.2682173014417907e-06, + "loss": 1.2266, + "step": 123582 + }, + { + "epoch": 1.49, + "grad_norm": 7.264872375416329, + "learning_rate": 3.267785324816056e-06, + "loss": 1.0509, + "step": 123585 + }, + { + "epoch": 1.49, + "grad_norm": 145.5743483060201, + "learning_rate": 3.267353371164945e-06, + "loss": 1.1768, + "step": 123588 + }, + { + "epoch": 1.49, + "grad_norm": 14.676155292096311, + "learning_rate": 3.2669214404899296e-06, + "loss": 1.2133, + "step": 123591 + }, + { + "epoch": 1.49, + "grad_norm": 9.645995451901772, + "learning_rate": 3.2664895327924863e-06, + "loss": 1.0925, + "step": 123594 + }, + { + "epoch": 1.49, + "grad_norm": 4.349799751075328, + "learning_rate": 3.2660576480740913e-06, + "loss": 1.0552, + "step": 123597 + }, + { + "epoch": 1.49, + "grad_norm": 26.125413170228846, + "learning_rate": 3.265625786336213e-06, + "loss": 1.2425, + "step": 123600 + }, + { + "epoch": 1.49, + "grad_norm": 6.884379125583092, + "learning_rate": 3.2651939475803275e-06, + "loss": 1.1848, + "step": 123603 + }, + { + "epoch": 1.49, + "grad_norm": 9.199132386328287, + "learning_rate": 3.2647621318079128e-06, + "loss": 1.267, + "step": 123606 + }, + { + "epoch": 1.49, + "grad_norm": 7.8450052923394775, + "learning_rate": 3.2643303390204362e-06, + "loss": 1.4951, + "step": 123609 + }, + { + "epoch": 1.49, + "grad_norm": 30.145632132313253, + "learning_rate": 3.263898569219377e-06, + "loss": 0.9213, + "step": 123612 + }, + { + "epoch": 1.49, + "grad_norm": 5.229464274632375, + "learning_rate": 3.2634668224062017e-06, + "loss": 1.4542, + "step": 123615 + }, + { + "epoch": 1.49, + "grad_norm": 8.759605611955948, + "learning_rate": 3.2630350985823876e-06, + "loss": 0.8521, + "step": 123618 + }, + { + "epoch": 1.49, + "grad_norm": 9.391226487013329, + "learning_rate": 3.262603397749409e-06, + "loss": 1.2063, + "step": 123621 + }, + { + "epoch": 1.49, + "grad_norm": 7.069498078236721, + "learning_rate": 3.262171719908741e-06, + "loss": 1.0068, + "step": 123624 + }, + { + "epoch": 1.49, + "grad_norm": 4.3878014869601065, + "learning_rate": 3.261740065061851e-06, + "loss": 1.346, + "step": 123627 + }, + { + "epoch": 1.49, + "grad_norm": 19.319983672057656, + "learning_rate": 3.2613084332102183e-06, + "loss": 1.4314, + "step": 123630 + }, + { + "epoch": 1.49, + "grad_norm": 4.324364899625481, + "learning_rate": 3.260876824355309e-06, + "loss": 1.17, + "step": 123633 + }, + { + "epoch": 1.49, + "grad_norm": 20.520502280270296, + "learning_rate": 3.2604452384985997e-06, + "loss": 1.1102, + "step": 123636 + }, + { + "epoch": 1.49, + "grad_norm": 23.41795773647977, + "learning_rate": 3.2600136756415668e-06, + "loss": 1.1184, + "step": 123639 + }, + { + "epoch": 1.49, + "grad_norm": 10.286027295467761, + "learning_rate": 3.259582135785676e-06, + "loss": 0.9099, + "step": 123642 + }, + { + "epoch": 1.49, + "grad_norm": 16.616997804812048, + "learning_rate": 3.259150618932404e-06, + "loss": 0.9755, + "step": 123645 + }, + { + "epoch": 1.49, + "grad_norm": 3.468837923746064, + "learning_rate": 3.258719125083226e-06, + "loss": 0.9795, + "step": 123648 + }, + { + "epoch": 1.49, + "grad_norm": 6.360590388947114, + "learning_rate": 3.2582876542396113e-06, + "loss": 1.3174, + "step": 123651 + }, + { + "epoch": 1.49, + "grad_norm": 4.961120682887792, + "learning_rate": 3.2578562064030284e-06, + "loss": 0.922, + "step": 123654 + }, + { + "epoch": 1.49, + "grad_norm": 8.057427540753409, + "learning_rate": 3.257424781574955e-06, + "loss": 0.8486, + "step": 123657 + }, + { + "epoch": 1.49, + "grad_norm": 2.6297303689478078, + "learning_rate": 3.256993379756861e-06, + "loss": 0.9688, + "step": 123660 + }, + { + "epoch": 1.49, + "grad_norm": 15.508023082684057, + "learning_rate": 3.2565620009502207e-06, + "loss": 1.2806, + "step": 123663 + }, + { + "epoch": 1.49, + "grad_norm": 6.762320753942676, + "learning_rate": 3.256130645156509e-06, + "loss": 1.4064, + "step": 123666 + }, + { + "epoch": 1.49, + "grad_norm": 6.379965196568207, + "learning_rate": 3.2556993123771898e-06, + "loss": 1.2171, + "step": 123669 + }, + { + "epoch": 1.49, + "grad_norm": 13.68556165386154, + "learning_rate": 3.255268002613743e-06, + "loss": 1.5719, + "step": 123672 + }, + { + "epoch": 1.49, + "grad_norm": 5.891444122891623, + "learning_rate": 3.254836715867634e-06, + "loss": 1.3042, + "step": 123675 + }, + { + "epoch": 1.49, + "grad_norm": 9.667415541800034, + "learning_rate": 3.254405452140341e-06, + "loss": 1.0891, + "step": 123678 + }, + { + "epoch": 1.49, + "grad_norm": 7.465685427570147, + "learning_rate": 3.2539742114333296e-06, + "loss": 1.3975, + "step": 123681 + }, + { + "epoch": 1.49, + "grad_norm": 8.12204006744745, + "learning_rate": 3.2535429937480744e-06, + "loss": 1.0091, + "step": 123684 + }, + { + "epoch": 1.49, + "grad_norm": 4.461581475495618, + "learning_rate": 3.2531117990860462e-06, + "loss": 1.1019, + "step": 123687 + }, + { + "epoch": 1.49, + "grad_norm": 17.643108890303687, + "learning_rate": 3.252680627448721e-06, + "loss": 1.542, + "step": 123690 + }, + { + "epoch": 1.49, + "grad_norm": 7.1114372336691885, + "learning_rate": 3.2522494788375657e-06, + "loss": 1.2795, + "step": 123693 + }, + { + "epoch": 1.49, + "grad_norm": 4.120077308561836, + "learning_rate": 3.2518183532540492e-06, + "loss": 1.0702, + "step": 123696 + }, + { + "epoch": 1.49, + "grad_norm": 4.830445070616186, + "learning_rate": 3.2513872506996467e-06, + "loss": 1.0119, + "step": 123699 + }, + { + "epoch": 1.49, + "grad_norm": 11.223665231498597, + "learning_rate": 3.2509561711758286e-06, + "loss": 1.3801, + "step": 123702 + }, + { + "epoch": 1.49, + "grad_norm": 35.22177061204992, + "learning_rate": 3.2505251146840688e-06, + "loss": 1.0066, + "step": 123705 + }, + { + "epoch": 1.49, + "grad_norm": 19.909199662300583, + "learning_rate": 3.250094081225833e-06, + "loss": 1.1339, + "step": 123708 + }, + { + "epoch": 1.49, + "grad_norm": 3.0440821804087244, + "learning_rate": 3.2496630708025943e-06, + "loss": 1.0872, + "step": 123711 + }, + { + "epoch": 1.49, + "grad_norm": 4.664887117754921, + "learning_rate": 3.2492320834158277e-06, + "loss": 1.2425, + "step": 123714 + }, + { + "epoch": 1.49, + "grad_norm": 9.312692199509295, + "learning_rate": 3.2488011190669966e-06, + "loss": 0.9347, + "step": 123717 + }, + { + "epoch": 1.49, + "grad_norm": 4.258216289242267, + "learning_rate": 3.2483701777575793e-06, + "loss": 1.423, + "step": 123720 + }, + { + "epoch": 1.49, + "grad_norm": 13.069541432366561, + "learning_rate": 3.247939259489039e-06, + "loss": 1.2511, + "step": 123723 + }, + { + "epoch": 1.49, + "grad_norm": 13.08079422250843, + "learning_rate": 3.24750836426285e-06, + "loss": 1.1715, + "step": 123726 + }, + { + "epoch": 1.49, + "grad_norm": 16.650659802837346, + "learning_rate": 3.2470774920804837e-06, + "loss": 1.3384, + "step": 123729 + }, + { + "epoch": 1.49, + "grad_norm": 6.67345542616535, + "learning_rate": 3.2466466429434117e-06, + "loss": 0.8486, + "step": 123732 + }, + { + "epoch": 1.49, + "grad_norm": 6.975004025085822, + "learning_rate": 3.246215816853102e-06, + "loss": 1.2166, + "step": 123735 + }, + { + "epoch": 1.49, + "grad_norm": 5.937422325164951, + "learning_rate": 3.2457850138110227e-06, + "loss": 1.0224, + "step": 123738 + }, + { + "epoch": 1.49, + "grad_norm": 5.911123644948804, + "learning_rate": 3.245354233818645e-06, + "loss": 1.0848, + "step": 123741 + }, + { + "epoch": 1.49, + "grad_norm": 8.810876791183372, + "learning_rate": 3.244923476877442e-06, + "loss": 1.1348, + "step": 123744 + }, + { + "epoch": 1.49, + "grad_norm": 15.120358408002046, + "learning_rate": 3.244492742988884e-06, + "loss": 0.9518, + "step": 123747 + }, + { + "epoch": 1.49, + "grad_norm": 9.33801313617561, + "learning_rate": 3.244062032154436e-06, + "loss": 1.1188, + "step": 123750 + }, + { + "epoch": 1.49, + "grad_norm": 3.8892208678057054, + "learning_rate": 3.2436313443755705e-06, + "loss": 1.1085, + "step": 123753 + }, + { + "epoch": 1.49, + "grad_norm": 9.266540248000215, + "learning_rate": 3.2432006796537605e-06, + "loss": 0.911, + "step": 123756 + }, + { + "epoch": 1.49, + "grad_norm": 5.9113958767304124, + "learning_rate": 3.2427700379904735e-06, + "loss": 0.9877, + "step": 123759 + }, + { + "epoch": 1.49, + "grad_norm": 12.590574447895943, + "learning_rate": 3.242339419387176e-06, + "loss": 1.0267, + "step": 123762 + }, + { + "epoch": 1.49, + "grad_norm": 5.4715399039974475, + "learning_rate": 3.241908823845339e-06, + "loss": 1.2542, + "step": 123765 + }, + { + "epoch": 1.49, + "grad_norm": 18.9129909324042, + "learning_rate": 3.241478251366432e-06, + "loss": 1.5518, + "step": 123768 + }, + { + "epoch": 1.49, + "grad_norm": 7.210107287370634, + "learning_rate": 3.241047701951927e-06, + "loss": 1.2733, + "step": 123771 + }, + { + "epoch": 1.49, + "grad_norm": 7.316307218619669, + "learning_rate": 3.2406171756032946e-06, + "loss": 1.2814, + "step": 123774 + }, + { + "epoch": 1.49, + "grad_norm": 11.619161478557071, + "learning_rate": 3.240186672322e-06, + "loss": 1.3365, + "step": 123777 + }, + { + "epoch": 1.49, + "grad_norm": 13.60697392805804, + "learning_rate": 3.239756192109511e-06, + "loss": 1.3901, + "step": 123780 + }, + { + "epoch": 1.49, + "grad_norm": 11.380107034366922, + "learning_rate": 3.239325734967299e-06, + "loss": 1.0718, + "step": 123783 + }, + { + "epoch": 1.49, + "grad_norm": 10.306399181924256, + "learning_rate": 3.238895300896836e-06, + "loss": 1.3146, + "step": 123786 + }, + { + "epoch": 1.49, + "grad_norm": 7.397876921359061, + "learning_rate": 3.238464889899585e-06, + "loss": 1.3757, + "step": 123789 + }, + { + "epoch": 1.49, + "grad_norm": 6.423306705792652, + "learning_rate": 3.2380345019770176e-06, + "loss": 0.8484, + "step": 123792 + }, + { + "epoch": 1.49, + "grad_norm": 7.699291600839881, + "learning_rate": 3.2376041371306034e-06, + "loss": 0.9063, + "step": 123795 + }, + { + "epoch": 1.49, + "grad_norm": 6.61552347752324, + "learning_rate": 3.237173795361813e-06, + "loss": 1.0092, + "step": 123798 + }, + { + "epoch": 1.49, + "grad_norm": 14.287853019049832, + "learning_rate": 3.236743476672113e-06, + "loss": 1.3139, + "step": 123801 + }, + { + "epoch": 1.49, + "grad_norm": 14.46597616773028, + "learning_rate": 3.2363131810629677e-06, + "loss": 1.1082, + "step": 123804 + }, + { + "epoch": 1.49, + "grad_norm": 10.196093737275651, + "learning_rate": 3.23588290853585e-06, + "loss": 1.2622, + "step": 123807 + }, + { + "epoch": 1.49, + "grad_norm": 39.09937081658375, + "learning_rate": 3.2354526590922276e-06, + "loss": 1.1188, + "step": 123810 + }, + { + "epoch": 1.49, + "grad_norm": 3.1112019352776317, + "learning_rate": 3.235022432733571e-06, + "loss": 1.0912, + "step": 123813 + }, + { + "epoch": 1.49, + "grad_norm": 6.151207207742827, + "learning_rate": 3.234592229461343e-06, + "loss": 1.4127, + "step": 123816 + }, + { + "epoch": 1.49, + "grad_norm": 12.578688801896906, + "learning_rate": 3.2341620492770155e-06, + "loss": 1.1117, + "step": 123819 + }, + { + "epoch": 1.49, + "grad_norm": 8.629618669955503, + "learning_rate": 3.233731892182058e-06, + "loss": 1.5466, + "step": 123822 + }, + { + "epoch": 1.49, + "grad_norm": 9.909531877238804, + "learning_rate": 3.2333017581779335e-06, + "loss": 1.2297, + "step": 123825 + }, + { + "epoch": 1.49, + "grad_norm": 8.234674976876304, + "learning_rate": 3.232871647266117e-06, + "loss": 1.1204, + "step": 123828 + }, + { + "epoch": 1.49, + "grad_norm": 8.621960502098254, + "learning_rate": 3.2324415594480684e-06, + "loss": 1.1516, + "step": 123831 + }, + { + "epoch": 1.49, + "grad_norm": 9.492291612331972, + "learning_rate": 3.2320114947252577e-06, + "loss": 1.2311, + "step": 123834 + }, + { + "epoch": 1.49, + "grad_norm": 7.475337764848863, + "learning_rate": 3.2315814530991563e-06, + "loss": 0.9454, + "step": 123837 + }, + { + "epoch": 1.49, + "grad_norm": 5.776224370153845, + "learning_rate": 3.2311514345712313e-06, + "loss": 1.2519, + "step": 123840 + }, + { + "epoch": 1.49, + "grad_norm": 10.827667783808176, + "learning_rate": 3.2307214391429476e-06, + "loss": 1.2285, + "step": 123843 + }, + { + "epoch": 1.49, + "grad_norm": 3.7303261536235155, + "learning_rate": 3.2302914668157715e-06, + "loss": 0.9875, + "step": 123846 + }, + { + "epoch": 1.49, + "grad_norm": 6.327640221102408, + "learning_rate": 3.2298615175911717e-06, + "loss": 1.1485, + "step": 123849 + }, + { + "epoch": 1.49, + "grad_norm": 3.3675673354507882, + "learning_rate": 3.229431591470615e-06, + "loss": 0.9711, + "step": 123852 + }, + { + "epoch": 1.49, + "grad_norm": 6.175564259968897, + "learning_rate": 3.2290016884555742e-06, + "loss": 0.8424, + "step": 123855 + }, + { + "epoch": 1.49, + "grad_norm": 14.435154430237834, + "learning_rate": 3.2285718085475082e-06, + "loss": 1.2931, + "step": 123858 + }, + { + "epoch": 1.49, + "grad_norm": 4.878957428690346, + "learning_rate": 3.228141951747887e-06, + "loss": 1.0325, + "step": 123861 + }, + { + "epoch": 1.49, + "grad_norm": 5.903368580738163, + "learning_rate": 3.2277121180581816e-06, + "loss": 1.5177, + "step": 123864 + }, + { + "epoch": 1.49, + "grad_norm": 9.187540876801384, + "learning_rate": 3.2272823074798555e-06, + "loss": 1.1329, + "step": 123867 + }, + { + "epoch": 1.49, + "grad_norm": 5.115309663246092, + "learning_rate": 3.2268525200143717e-06, + "loss": 1.1885, + "step": 123870 + }, + { + "epoch": 1.49, + "grad_norm": 6.303355756838292, + "learning_rate": 3.226422755663201e-06, + "loss": 1.342, + "step": 123873 + }, + { + "epoch": 1.49, + "grad_norm": 8.795266107341375, + "learning_rate": 3.2259930144278096e-06, + "loss": 1.0327, + "step": 123876 + }, + { + "epoch": 1.49, + "grad_norm": 4.78820016254928, + "learning_rate": 3.2255632963096637e-06, + "loss": 1.0492, + "step": 123879 + }, + { + "epoch": 1.49, + "grad_norm": 5.327811636642716, + "learning_rate": 3.225133601310234e-06, + "loss": 1.3412, + "step": 123882 + }, + { + "epoch": 1.49, + "grad_norm": 12.065940551130543, + "learning_rate": 3.2247039294309835e-06, + "loss": 1.1701, + "step": 123885 + }, + { + "epoch": 1.49, + "grad_norm": 17.337072884274345, + "learning_rate": 3.224274280673374e-06, + "loss": 1.07, + "step": 123888 + }, + { + "epoch": 1.49, + "grad_norm": 4.853484087990551, + "learning_rate": 3.223844655038875e-06, + "loss": 1.0404, + "step": 123891 + }, + { + "epoch": 1.49, + "grad_norm": 3.9384823369221538, + "learning_rate": 3.2234150525289576e-06, + "loss": 1.1211, + "step": 123894 + }, + { + "epoch": 1.49, + "grad_norm": 7.358782256023798, + "learning_rate": 3.2229854731450807e-06, + "loss": 1.6103, + "step": 123897 + }, + { + "epoch": 1.49, + "grad_norm": 13.921883132756653, + "learning_rate": 3.222555916888712e-06, + "loss": 1.0479, + "step": 123900 + }, + { + "epoch": 1.49, + "grad_norm": 30.62098476492778, + "learning_rate": 3.2221263837613205e-06, + "loss": 0.9243, + "step": 123903 + }, + { + "epoch": 1.49, + "grad_norm": 7.921777836353609, + "learning_rate": 3.2216968737643727e-06, + "loss": 1.278, + "step": 123906 + }, + { + "epoch": 1.49, + "grad_norm": 3.331962979449083, + "learning_rate": 3.2212673868993318e-06, + "loss": 0.6681, + "step": 123909 + }, + { + "epoch": 1.49, + "grad_norm": 6.919886406374259, + "learning_rate": 3.22083792316766e-06, + "loss": 1.1296, + "step": 123912 + }, + { + "epoch": 1.49, + "grad_norm": 4.364975413189562, + "learning_rate": 3.220408482570827e-06, + "loss": 0.8463, + "step": 123915 + }, + { + "epoch": 1.49, + "grad_norm": 8.692523142868604, + "learning_rate": 3.219979065110298e-06, + "loss": 0.9292, + "step": 123918 + }, + { + "epoch": 1.49, + "grad_norm": 4.290275304252506, + "learning_rate": 3.219549670787542e-06, + "loss": 1.405, + "step": 123921 + }, + { + "epoch": 1.49, + "grad_norm": 9.862215187243944, + "learning_rate": 3.219120299604016e-06, + "loss": 1.149, + "step": 123924 + }, + { + "epoch": 1.49, + "grad_norm": 58.88187413256273, + "learning_rate": 3.218690951561194e-06, + "loss": 1.3173, + "step": 123927 + }, + { + "epoch": 1.49, + "grad_norm": 3.0999269917767096, + "learning_rate": 3.2182616266605338e-06, + "loss": 1.3368, + "step": 123930 + }, + { + "epoch": 1.49, + "grad_norm": 10.698353800679183, + "learning_rate": 3.217832324903505e-06, + "loss": 0.9172, + "step": 123933 + }, + { + "epoch": 1.49, + "grad_norm": 6.52667197157847, + "learning_rate": 3.217403046291573e-06, + "loss": 1.289, + "step": 123936 + }, + { + "epoch": 1.49, + "grad_norm": 12.633327979605982, + "learning_rate": 3.2169737908261988e-06, + "loss": 1.0152, + "step": 123939 + }, + { + "epoch": 1.49, + "grad_norm": 99.70535751096045, + "learning_rate": 3.2165445585088505e-06, + "loss": 0.9601, + "step": 123942 + }, + { + "epoch": 1.49, + "grad_norm": 3.852927260939701, + "learning_rate": 3.2161153493409914e-06, + "loss": 1.2244, + "step": 123945 + }, + { + "epoch": 1.49, + "grad_norm": 6.155924198411418, + "learning_rate": 3.21568616332409e-06, + "loss": 1.2182, + "step": 123948 + }, + { + "epoch": 1.49, + "grad_norm": 12.461603108012786, + "learning_rate": 3.2152570004596096e-06, + "loss": 1.3381, + "step": 123951 + }, + { + "epoch": 1.49, + "grad_norm": 10.518968320694704, + "learning_rate": 3.214827860749008e-06, + "loss": 1.0673, + "step": 123954 + }, + { + "epoch": 1.49, + "grad_norm": 3.9485897646894643, + "learning_rate": 3.2143987441937564e-06, + "loss": 1.449, + "step": 123957 + }, + { + "epoch": 1.49, + "grad_norm": 7.497272166430507, + "learning_rate": 3.213969650795317e-06, + "loss": 1.2398, + "step": 123960 + }, + { + "epoch": 1.49, + "grad_norm": 6.433571970231156, + "learning_rate": 3.2135405805551577e-06, + "loss": 1.1684, + "step": 123963 + }, + { + "epoch": 1.49, + "grad_norm": 3.2005803169469993, + "learning_rate": 3.2131115334747374e-06, + "loss": 1.2742, + "step": 123966 + }, + { + "epoch": 1.49, + "grad_norm": 7.956549324862572, + "learning_rate": 3.212682509555526e-06, + "loss": 1.1436, + "step": 123969 + }, + { + "epoch": 1.49, + "grad_norm": 7.187660540628917, + "learning_rate": 3.2122535087989812e-06, + "loss": 1.4993, + "step": 123972 + }, + { + "epoch": 1.49, + "grad_norm": 8.511543054427605, + "learning_rate": 3.211824531206573e-06, + "loss": 1.2321, + "step": 123975 + }, + { + "epoch": 1.49, + "grad_norm": 4.310719284883727, + "learning_rate": 3.21139557677976e-06, + "loss": 0.8711, + "step": 123978 + }, + { + "epoch": 1.49, + "grad_norm": 7.131785511970529, + "learning_rate": 3.210966645520008e-06, + "loss": 1.0873, + "step": 123981 + }, + { + "epoch": 1.49, + "grad_norm": 4.670674314732183, + "learning_rate": 3.210537737428783e-06, + "loss": 1.0138, + "step": 123984 + }, + { + "epoch": 1.49, + "grad_norm": 10.49747522359097, + "learning_rate": 3.2101088525075454e-06, + "loss": 1.3368, + "step": 123987 + }, + { + "epoch": 1.49, + "grad_norm": 3.0072891436818034, + "learning_rate": 3.2096799907577646e-06, + "loss": 0.8528, + "step": 123990 + }, + { + "epoch": 1.49, + "grad_norm": 8.878292568213887, + "learning_rate": 3.2092511521809e-06, + "loss": 1.4714, + "step": 123993 + }, + { + "epoch": 1.49, + "grad_norm": 53.55580004485235, + "learning_rate": 3.2088223367784112e-06, + "loss": 1.1553, + "step": 123996 + }, + { + "epoch": 1.49, + "grad_norm": 4.6208613911278125, + "learning_rate": 3.2083935445517665e-06, + "loss": 1.1866, + "step": 123999 + }, + { + "epoch": 1.49, + "grad_norm": 10.452784785965845, + "learning_rate": 3.2079647755024314e-06, + "loss": 1.0866, + "step": 124002 + }, + { + "epoch": 1.49, + "grad_norm": 4.001277660233817, + "learning_rate": 3.2075360296318624e-06, + "loss": 1.1604, + "step": 124005 + }, + { + "epoch": 1.49, + "grad_norm": 5.261493264947256, + "learning_rate": 3.207107306941527e-06, + "loss": 1.0477, + "step": 124008 + }, + { + "epoch": 1.49, + "grad_norm": 5.999354171459987, + "learning_rate": 3.2066786074328903e-06, + "loss": 0.9072, + "step": 124011 + }, + { + "epoch": 1.49, + "grad_norm": 6.699352956642473, + "learning_rate": 3.206249931107409e-06, + "loss": 1.3155, + "step": 124014 + }, + { + "epoch": 1.49, + "grad_norm": 7.2884814902116055, + "learning_rate": 3.2058212779665542e-06, + "loss": 1.2735, + "step": 124017 + }, + { + "epoch": 1.49, + "grad_norm": 25.77687622876632, + "learning_rate": 3.205392648011779e-06, + "loss": 1.2256, + "step": 124020 + }, + { + "epoch": 1.49, + "grad_norm": 9.920423960895276, + "learning_rate": 3.2049640412445525e-06, + "loss": 0.9757, + "step": 124023 + }, + { + "epoch": 1.49, + "grad_norm": 12.06874103640155, + "learning_rate": 3.2045354576663356e-06, + "loss": 1.2541, + "step": 124026 + }, + { + "epoch": 1.49, + "grad_norm": 16.69808661693151, + "learning_rate": 3.2041068972785962e-06, + "loss": 0.7976, + "step": 124029 + }, + { + "epoch": 1.49, + "grad_norm": 6.441275069487158, + "learning_rate": 3.203678360082787e-06, + "loss": 1.3555, + "step": 124032 + }, + { + "epoch": 1.49, + "grad_norm": 4.6726243351771934, + "learning_rate": 3.2032498460803795e-06, + "loss": 0.8409, + "step": 124035 + }, + { + "epoch": 1.49, + "grad_norm": 15.6352070083359, + "learning_rate": 3.2028213552728294e-06, + "loss": 1.1171, + "step": 124038 + }, + { + "epoch": 1.49, + "grad_norm": 3.303248831634412, + "learning_rate": 3.2023928876616007e-06, + "loss": 1.1102, + "step": 124041 + }, + { + "epoch": 1.49, + "grad_norm": 9.092242971854903, + "learning_rate": 3.20196444324816e-06, + "loss": 1.0023, + "step": 124044 + }, + { + "epoch": 1.49, + "grad_norm": 5.7407734495403115, + "learning_rate": 3.2015360220339633e-06, + "loss": 0.9345, + "step": 124047 + }, + { + "epoch": 1.49, + "grad_norm": 10.509394741865643, + "learning_rate": 3.201107624020475e-06, + "loss": 0.802, + "step": 124050 + }, + { + "epoch": 1.49, + "grad_norm": 8.57033199733389, + "learning_rate": 3.200679249209161e-06, + "loss": 1.1713, + "step": 124053 + }, + { + "epoch": 1.49, + "grad_norm": 39.52153173367775, + "learning_rate": 3.2002508976014757e-06, + "loss": 1.2843, + "step": 124056 + }, + { + "epoch": 1.49, + "grad_norm": 5.763321410795062, + "learning_rate": 3.1998225691988883e-06, + "loss": 1.3986, + "step": 124059 + }, + { + "epoch": 1.49, + "grad_norm": 10.454030667068848, + "learning_rate": 3.1993942640028543e-06, + "loss": 1.6587, + "step": 124062 + }, + { + "epoch": 1.49, + "grad_norm": 13.602938861166333, + "learning_rate": 3.1989659820148377e-06, + "loss": 1.3136, + "step": 124065 + }, + { + "epoch": 1.49, + "grad_norm": 19.428806945312935, + "learning_rate": 3.1985377232363003e-06, + "loss": 1.2887, + "step": 124068 + }, + { + "epoch": 1.49, + "grad_norm": 17.574104348587532, + "learning_rate": 3.198109487668707e-06, + "loss": 1.1093, + "step": 124071 + }, + { + "epoch": 1.49, + "grad_norm": 4.357660281045431, + "learning_rate": 3.1976812753135134e-06, + "loss": 0.7559, + "step": 124074 + }, + { + "epoch": 1.49, + "grad_norm": 22.045433517501362, + "learning_rate": 3.1972530861721863e-06, + "loss": 1.2839, + "step": 124077 + }, + { + "epoch": 1.49, + "grad_norm": 6.543813858734097, + "learning_rate": 3.196824920246181e-06, + "loss": 0.7547, + "step": 124080 + }, + { + "epoch": 1.49, + "grad_norm": 4.828899647850323, + "learning_rate": 3.1963967775369643e-06, + "loss": 1.289, + "step": 124083 + }, + { + "epoch": 1.49, + "grad_norm": 8.635985362973681, + "learning_rate": 3.1959686580459926e-06, + "loss": 1.3955, + "step": 124086 + }, + { + "epoch": 1.49, + "grad_norm": 5.224270941676222, + "learning_rate": 3.1955405617747283e-06, + "loss": 1.2661, + "step": 124089 + }, + { + "epoch": 1.49, + "grad_norm": 9.261511482890315, + "learning_rate": 3.1951124887246334e-06, + "loss": 1.2317, + "step": 124092 + }, + { + "epoch": 1.49, + "grad_norm": 3.55966467925154, + "learning_rate": 3.194684438897172e-06, + "loss": 1.0233, + "step": 124095 + }, + { + "epoch": 1.49, + "grad_norm": 9.067828137696752, + "learning_rate": 3.194256412293797e-06, + "loss": 0.9755, + "step": 124098 + }, + { + "epoch": 1.49, + "grad_norm": 6.073230235423912, + "learning_rate": 3.1938284089159776e-06, + "loss": 1.2556, + "step": 124101 + }, + { + "epoch": 1.49, + "grad_norm": 15.667330089388148, + "learning_rate": 3.1934004287651664e-06, + "loss": 1.5741, + "step": 124104 + }, + { + "epoch": 1.49, + "grad_norm": 10.639499590813125, + "learning_rate": 3.192972471842829e-06, + "loss": 1.0405, + "step": 124107 + }, + { + "epoch": 1.49, + "grad_norm": 8.835467467893498, + "learning_rate": 3.192544538150427e-06, + "loss": 1.1701, + "step": 124110 + }, + { + "epoch": 1.49, + "grad_norm": 10.841853493324267, + "learning_rate": 3.1921166276894155e-06, + "loss": 1.379, + "step": 124113 + }, + { + "epoch": 1.49, + "grad_norm": 6.964603597322812, + "learning_rate": 3.1916887404612574e-06, + "loss": 1.0177, + "step": 124116 + }, + { + "epoch": 1.49, + "grad_norm": 5.784312694359101, + "learning_rate": 3.1912608764674167e-06, + "loss": 0.9803, + "step": 124119 + }, + { + "epoch": 1.49, + "grad_norm": 35.41601402311406, + "learning_rate": 3.1908330357093474e-06, + "loss": 1.2428, + "step": 124122 + }, + { + "epoch": 1.49, + "grad_norm": 3.5304179980859662, + "learning_rate": 3.190405218188515e-06, + "loss": 1.4778, + "step": 124125 + }, + { + "epoch": 1.49, + "grad_norm": 7.152758765773485, + "learning_rate": 3.1899774239063743e-06, + "loss": 1.1892, + "step": 124128 + }, + { + "epoch": 1.49, + "grad_norm": 4.885503091764483, + "learning_rate": 3.1895496528643864e-06, + "loss": 1.2553, + "step": 124131 + }, + { + "epoch": 1.49, + "grad_norm": 3.9249598115508424, + "learning_rate": 3.189121905064013e-06, + "loss": 1.3766, + "step": 124134 + }, + { + "epoch": 1.49, + "grad_norm": 5.598488546205475, + "learning_rate": 3.188694180506716e-06, + "loss": 1.3863, + "step": 124137 + }, + { + "epoch": 1.49, + "grad_norm": 8.473487689108511, + "learning_rate": 3.18826647919395e-06, + "loss": 1.0219, + "step": 124140 + }, + { + "epoch": 1.49, + "grad_norm": 7.197173738864907, + "learning_rate": 3.18783880112718e-06, + "loss": 1.0889, + "step": 124143 + }, + { + "epoch": 1.49, + "grad_norm": 31.2125780186293, + "learning_rate": 3.1874111463078594e-06, + "loss": 1.2879, + "step": 124146 + }, + { + "epoch": 1.49, + "grad_norm": 4.945302008550479, + "learning_rate": 3.1869835147374505e-06, + "loss": 1.0784, + "step": 124149 + }, + { + "epoch": 1.49, + "grad_norm": 3.8753042772155606, + "learning_rate": 3.1865559064174158e-06, + "loss": 1.1753, + "step": 124152 + }, + { + "epoch": 1.49, + "grad_norm": 8.179833234114447, + "learning_rate": 3.186128321349208e-06, + "loss": 1.2841, + "step": 124155 + }, + { + "epoch": 1.49, + "grad_norm": 6.586650258041774, + "learning_rate": 3.1857007595342903e-06, + "loss": 1.1635, + "step": 124158 + }, + { + "epoch": 1.49, + "grad_norm": 6.81406231912987, + "learning_rate": 3.1852732209741244e-06, + "loss": 0.8459, + "step": 124161 + }, + { + "epoch": 1.49, + "grad_norm": 10.946666577119345, + "learning_rate": 3.1848457056701675e-06, + "loss": 1.1056, + "step": 124164 + }, + { + "epoch": 1.49, + "grad_norm": 22.20938147631608, + "learning_rate": 3.184418213623873e-06, + "loss": 1.2593, + "step": 124167 + }, + { + "epoch": 1.49, + "grad_norm": 10.988687591494688, + "learning_rate": 3.183990744836705e-06, + "loss": 1.2668, + "step": 124170 + }, + { + "epoch": 1.49, + "grad_norm": 6.676723560610464, + "learning_rate": 3.1835632993101206e-06, + "loss": 0.9233, + "step": 124173 + }, + { + "epoch": 1.49, + "grad_norm": 10.128198461652236, + "learning_rate": 3.18313587704558e-06, + "loss": 1.0291, + "step": 124176 + }, + { + "epoch": 1.49, + "grad_norm": 10.266446115018761, + "learning_rate": 3.182708478044544e-06, + "loss": 1.3196, + "step": 124179 + }, + { + "epoch": 1.49, + "grad_norm": 9.375652240004488, + "learning_rate": 3.1822811023084655e-06, + "loss": 1.3704, + "step": 124182 + }, + { + "epoch": 1.49, + "grad_norm": 6.893627208427032, + "learning_rate": 3.181853749838809e-06, + "loss": 1.1009, + "step": 124185 + }, + { + "epoch": 1.49, + "grad_norm": 10.020591176716342, + "learning_rate": 3.181426420637026e-06, + "loss": 1.4082, + "step": 124188 + }, + { + "epoch": 1.49, + "grad_norm": 3.9316911722129295, + "learning_rate": 3.180999114704583e-06, + "loss": 1.3323, + "step": 124191 + }, + { + "epoch": 1.49, + "grad_norm": 9.107883712897316, + "learning_rate": 3.180571832042929e-06, + "loss": 1.0476, + "step": 124194 + }, + { + "epoch": 1.49, + "grad_norm": 7.330456226686572, + "learning_rate": 3.1801445726535284e-06, + "loss": 1.017, + "step": 124197 + }, + { + "epoch": 1.49, + "grad_norm": 4.281748979200992, + "learning_rate": 3.1797173365378376e-06, + "loss": 0.9177, + "step": 124200 + }, + { + "epoch": 1.49, + "grad_norm": 5.314040121291958, + "learning_rate": 3.1792901236973174e-06, + "loss": 1.434, + "step": 124203 + }, + { + "epoch": 1.49, + "grad_norm": 7.642103408343898, + "learning_rate": 3.1788629341334244e-06, + "loss": 1.3328, + "step": 124206 + }, + { + "epoch": 1.49, + "grad_norm": 6.15965195213668, + "learning_rate": 3.178435767847611e-06, + "loss": 1.2487, + "step": 124209 + }, + { + "epoch": 1.49, + "grad_norm": 17.714900478141658, + "learning_rate": 3.178008624841339e-06, + "loss": 1.2611, + "step": 124212 + }, + { + "epoch": 1.49, + "grad_norm": 9.303979748597179, + "learning_rate": 3.1775815051160672e-06, + "loss": 1.3881, + "step": 124215 + }, + { + "epoch": 1.49, + "grad_norm": 86.92049501535395, + "learning_rate": 3.1771544086732553e-06, + "loss": 1.2624, + "step": 124218 + }, + { + "epoch": 1.49, + "grad_norm": 17.27919666088203, + "learning_rate": 3.1767273355143537e-06, + "loss": 1.265, + "step": 124221 + }, + { + "epoch": 1.49, + "grad_norm": 6.650137950865102, + "learning_rate": 3.1763002856408254e-06, + "loss": 0.9992, + "step": 124224 + }, + { + "epoch": 1.49, + "grad_norm": 5.085984449376685, + "learning_rate": 3.1758732590541284e-06, + "loss": 1.0729, + "step": 124227 + }, + { + "epoch": 1.49, + "grad_norm": 8.381705056314367, + "learning_rate": 3.1754462557557155e-06, + "loss": 0.7767, + "step": 124230 + }, + { + "epoch": 1.49, + "grad_norm": 2.166381495187462, + "learning_rate": 3.175019275747049e-06, + "loss": 1.0129, + "step": 124233 + }, + { + "epoch": 1.49, + "grad_norm": 9.36642602698872, + "learning_rate": 3.1745923190295803e-06, + "loss": 0.9461, + "step": 124236 + }, + { + "epoch": 1.49, + "grad_norm": 6.75391973340043, + "learning_rate": 3.1741653856047694e-06, + "loss": 1.157, + "step": 124239 + }, + { + "epoch": 1.49, + "grad_norm": 34.37514115873469, + "learning_rate": 3.1737384754740743e-06, + "loss": 1.2402, + "step": 124242 + }, + { + "epoch": 1.49, + "grad_norm": 13.17885376922712, + "learning_rate": 3.1733115886389532e-06, + "loss": 1.2096, + "step": 124245 + }, + { + "epoch": 1.49, + "grad_norm": 4.089843246456698, + "learning_rate": 3.172884725100861e-06, + "loss": 1.2794, + "step": 124248 + }, + { + "epoch": 1.49, + "grad_norm": 3.1357058382511074, + "learning_rate": 3.1724578848612506e-06, + "loss": 1.0423, + "step": 124251 + }, + { + "epoch": 1.49, + "grad_norm": 9.583527643850086, + "learning_rate": 3.172031067921583e-06, + "loss": 1.2208, + "step": 124254 + }, + { + "epoch": 1.49, + "grad_norm": 3.4952408694090167, + "learning_rate": 3.171604274283313e-06, + "loss": 1.0017, + "step": 124257 + }, + { + "epoch": 1.49, + "grad_norm": 3.378337989787622, + "learning_rate": 3.171177503947901e-06, + "loss": 1.0093, + "step": 124260 + }, + { + "epoch": 1.49, + "grad_norm": 12.073077785345792, + "learning_rate": 3.1707507569167983e-06, + "loss": 1.279, + "step": 124263 + }, + { + "epoch": 1.49, + "grad_norm": 19.53159673414222, + "learning_rate": 3.170324033191463e-06, + "loss": 1.0536, + "step": 124266 + }, + { + "epoch": 1.49, + "grad_norm": 25.97694796683711, + "learning_rate": 3.169897332773355e-06, + "loss": 1.0921, + "step": 124269 + }, + { + "epoch": 1.49, + "grad_norm": 5.19836111619183, + "learning_rate": 3.1694706556639267e-06, + "loss": 1.2282, + "step": 124272 + }, + { + "epoch": 1.49, + "grad_norm": 8.016179395954667, + "learning_rate": 3.1690440018646317e-06, + "loss": 1.4739, + "step": 124275 + }, + { + "epoch": 1.49, + "grad_norm": 16.279853129211276, + "learning_rate": 3.1686173713769297e-06, + "loss": 1.8319, + "step": 124278 + }, + { + "epoch": 1.49, + "grad_norm": 13.995114553065758, + "learning_rate": 3.168190764202275e-06, + "loss": 1.2905, + "step": 124281 + }, + { + "epoch": 1.49, + "grad_norm": 6.774677445538064, + "learning_rate": 3.1677641803421243e-06, + "loss": 1.1163, + "step": 124284 + }, + { + "epoch": 1.49, + "grad_norm": 18.765801788801035, + "learning_rate": 3.1673376197979376e-06, + "loss": 0.9791, + "step": 124287 + }, + { + "epoch": 1.49, + "grad_norm": 5.127960519459784, + "learning_rate": 3.166911082571167e-06, + "loss": 1.0224, + "step": 124290 + }, + { + "epoch": 1.49, + "grad_norm": 4.398239386316971, + "learning_rate": 3.166484568663263e-06, + "loss": 1.049, + "step": 124293 + }, + { + "epoch": 1.49, + "grad_norm": 9.824939470139716, + "learning_rate": 3.1660580780756864e-06, + "loss": 1.0099, + "step": 124296 + }, + { + "epoch": 1.49, + "grad_norm": 8.430650525736157, + "learning_rate": 3.165631610809895e-06, + "loss": 0.8143, + "step": 124299 + }, + { + "epoch": 1.49, + "grad_norm": 10.861380973267634, + "learning_rate": 3.1652051668673377e-06, + "loss": 1.5469, + "step": 124302 + }, + { + "epoch": 1.49, + "grad_norm": 21.462172786025484, + "learning_rate": 3.164778746249474e-06, + "loss": 1.4054, + "step": 124305 + }, + { + "epoch": 1.49, + "grad_norm": 33.47236430915024, + "learning_rate": 3.164352348957759e-06, + "loss": 1.4311, + "step": 124308 + }, + { + "epoch": 1.49, + "grad_norm": 7.152921391907401, + "learning_rate": 3.163925974993649e-06, + "loss": 0.9534, + "step": 124311 + }, + { + "epoch": 1.49, + "grad_norm": 6.877969680831133, + "learning_rate": 3.163499624358598e-06, + "loss": 0.8819, + "step": 124314 + }, + { + "epoch": 1.49, + "grad_norm": 4.088453102437002, + "learning_rate": 3.1630732970540566e-06, + "loss": 1.0557, + "step": 124317 + }, + { + "epoch": 1.49, + "grad_norm": 18.60461603698245, + "learning_rate": 3.162646993081484e-06, + "loss": 1.4379, + "step": 124320 + }, + { + "epoch": 1.49, + "grad_norm": 6.811383105406343, + "learning_rate": 3.1622207124423343e-06, + "loss": 0.946, + "step": 124323 + }, + { + "epoch": 1.49, + "grad_norm": 5.378293674426049, + "learning_rate": 3.1617944551380654e-06, + "loss": 1.2002, + "step": 124326 + }, + { + "epoch": 1.5, + "grad_norm": 6.052275676325375, + "learning_rate": 3.1613682211701246e-06, + "loss": 1.1509, + "step": 124329 + }, + { + "epoch": 1.5, + "grad_norm": 6.3390227274306845, + "learning_rate": 3.1609420105399757e-06, + "loss": 1.2732, + "step": 124332 + }, + { + "epoch": 1.5, + "grad_norm": 7.048619231701008, + "learning_rate": 3.160515823249064e-06, + "loss": 1.2371, + "step": 124335 + }, + { + "epoch": 1.5, + "grad_norm": 29.488150597044463, + "learning_rate": 3.1600896592988496e-06, + "loss": 1.3448, + "step": 124338 + }, + { + "epoch": 1.5, + "grad_norm": 8.356402759357263, + "learning_rate": 3.1596635186907876e-06, + "loss": 0.8064, + "step": 124341 + }, + { + "epoch": 1.5, + "grad_norm": 6.067102515812995, + "learning_rate": 3.159237401426327e-06, + "loss": 0.8123, + "step": 124344 + }, + { + "epoch": 1.5, + "grad_norm": 15.590637412345307, + "learning_rate": 3.158811307506925e-06, + "loss": 1.4798, + "step": 124347 + }, + { + "epoch": 1.5, + "grad_norm": 9.184051808868919, + "learning_rate": 3.1583852369340363e-06, + "loss": 1.3953, + "step": 124350 + }, + { + "epoch": 1.5, + "grad_norm": 8.921232687203194, + "learning_rate": 3.157959189709118e-06, + "loss": 1.6327, + "step": 124353 + }, + { + "epoch": 1.5, + "grad_norm": 11.023500396888926, + "learning_rate": 3.1575331658336195e-06, + "loss": 0.9046, + "step": 124356 + }, + { + "epoch": 1.5, + "grad_norm": 7.195122479733003, + "learning_rate": 3.1571071653089924e-06, + "loss": 0.9656, + "step": 124359 + }, + { + "epoch": 1.5, + "grad_norm": 19.684060858715963, + "learning_rate": 3.1566811881366943e-06, + "loss": 1.0195, + "step": 124362 + }, + { + "epoch": 1.5, + "grad_norm": 6.651430166898146, + "learning_rate": 3.1562552343181784e-06, + "loss": 1.4343, + "step": 124365 + }, + { + "epoch": 1.5, + "grad_norm": 10.57269995562739, + "learning_rate": 3.1558293038549015e-06, + "loss": 1.2752, + "step": 124368 + }, + { + "epoch": 1.5, + "grad_norm": 6.406384182830717, + "learning_rate": 3.15540339674831e-06, + "loss": 1.1534, + "step": 124371 + }, + { + "epoch": 1.5, + "grad_norm": 15.357002753835173, + "learning_rate": 3.154977512999865e-06, + "loss": 1.1368, + "step": 124374 + }, + { + "epoch": 1.5, + "grad_norm": 2.4494929610246703, + "learning_rate": 3.1545516526110134e-06, + "loss": 1.0664, + "step": 124377 + }, + { + "epoch": 1.5, + "grad_norm": 12.185306314671616, + "learning_rate": 3.1541258155832145e-06, + "loss": 0.9428, + "step": 124380 + }, + { + "epoch": 1.5, + "grad_norm": 6.112403108224245, + "learning_rate": 3.1537000019179142e-06, + "loss": 0.9139, + "step": 124383 + }, + { + "epoch": 1.5, + "grad_norm": 7.821436670556282, + "learning_rate": 3.1532742116165706e-06, + "loss": 0.9891, + "step": 124386 + }, + { + "epoch": 1.5, + "grad_norm": 11.314388858687067, + "learning_rate": 3.152848444680635e-06, + "loss": 1.4763, + "step": 124389 + }, + { + "epoch": 1.5, + "grad_norm": 10.123564433128434, + "learning_rate": 3.152422701111563e-06, + "loss": 1.0306, + "step": 124392 + }, + { + "epoch": 1.5, + "grad_norm": 6.456033601766269, + "learning_rate": 3.1519969809108074e-06, + "loss": 0.8437, + "step": 124395 + }, + { + "epoch": 1.5, + "grad_norm": 4.379530956888169, + "learning_rate": 3.1515712840798206e-06, + "loss": 1.1943, + "step": 124398 + }, + { + "epoch": 1.5, + "grad_norm": 2.789250259623198, + "learning_rate": 3.15114561062005e-06, + "loss": 1.0676, + "step": 124401 + }, + { + "epoch": 1.5, + "grad_norm": 10.147777486804456, + "learning_rate": 3.150719960532953e-06, + "loss": 1.3538, + "step": 124404 + }, + { + "epoch": 1.5, + "grad_norm": 8.209632930030084, + "learning_rate": 3.1502943338199853e-06, + "loss": 1.3047, + "step": 124407 + }, + { + "epoch": 1.5, + "grad_norm": 9.498154951508608, + "learning_rate": 3.1498687304825915e-06, + "loss": 1.1387, + "step": 124410 + }, + { + "epoch": 1.5, + "grad_norm": 25.01404293986887, + "learning_rate": 3.1494431505222287e-06, + "loss": 1.3144, + "step": 124413 + }, + { + "epoch": 1.5, + "grad_norm": 7.0414931811184855, + "learning_rate": 3.14901759394035e-06, + "loss": 0.8796, + "step": 124416 + }, + { + "epoch": 1.5, + "grad_norm": 15.764088530966074, + "learning_rate": 3.148592060738408e-06, + "loss": 1.3277, + "step": 124419 + }, + { + "epoch": 1.5, + "grad_norm": 25.320057485252754, + "learning_rate": 3.148166550917855e-06, + "loss": 1.1278, + "step": 124422 + }, + { + "epoch": 1.5, + "grad_norm": 4.3071059843220665, + "learning_rate": 3.1477410644801375e-06, + "loss": 1.2787, + "step": 124425 + }, + { + "epoch": 1.5, + "grad_norm": 7.969856706594958, + "learning_rate": 3.1473156014267116e-06, + "loss": 1.3054, + "step": 124428 + }, + { + "epoch": 1.5, + "grad_norm": 10.697598050314868, + "learning_rate": 3.1468901617590297e-06, + "loss": 1.1389, + "step": 124431 + }, + { + "epoch": 1.5, + "grad_norm": 7.153143312060054, + "learning_rate": 3.146464745478546e-06, + "loss": 1.23, + "step": 124434 + }, + { + "epoch": 1.5, + "grad_norm": 5.523344408079358, + "learning_rate": 3.1460393525867063e-06, + "loss": 1.0658, + "step": 124437 + }, + { + "epoch": 1.5, + "grad_norm": 4.6983030576199205, + "learning_rate": 3.1456139830849684e-06, + "loss": 1.0836, + "step": 124440 + }, + { + "epoch": 1.5, + "grad_norm": 13.153133271122568, + "learning_rate": 3.1451886369747785e-06, + "loss": 0.9356, + "step": 124443 + }, + { + "epoch": 1.5, + "grad_norm": 3.750605663426806, + "learning_rate": 3.144763314257591e-06, + "loss": 1.0453, + "step": 124446 + }, + { + "epoch": 1.5, + "grad_norm": 10.450273368199653, + "learning_rate": 3.144338014934859e-06, + "loss": 1.2569, + "step": 124449 + }, + { + "epoch": 1.5, + "grad_norm": 10.063605951147967, + "learning_rate": 3.1439127390080293e-06, + "loss": 1.3742, + "step": 124452 + }, + { + "epoch": 1.5, + "grad_norm": 4.201117993115842, + "learning_rate": 3.143487486478556e-06, + "loss": 1.1243, + "step": 124455 + }, + { + "epoch": 1.5, + "grad_norm": 70.44925489592515, + "learning_rate": 3.14306225734789e-06, + "loss": 1.2332, + "step": 124458 + }, + { + "epoch": 1.5, + "grad_norm": 22.373733903074868, + "learning_rate": 3.1426370516174866e-06, + "loss": 1.2, + "step": 124461 + }, + { + "epoch": 1.5, + "grad_norm": 34.82252025217721, + "learning_rate": 3.1422118692887916e-06, + "loss": 1.0294, + "step": 124464 + }, + { + "epoch": 1.5, + "grad_norm": 3.475038265308353, + "learning_rate": 3.1417867103632547e-06, + "loss": 1.5796, + "step": 124467 + }, + { + "epoch": 1.5, + "grad_norm": 4.65778010864622, + "learning_rate": 3.1413615748423296e-06, + "loss": 1.2977, + "step": 124470 + }, + { + "epoch": 1.5, + "grad_norm": 3.026195280161237, + "learning_rate": 3.1409364627274672e-06, + "loss": 1.1858, + "step": 124473 + }, + { + "epoch": 1.5, + "grad_norm": 21.48162514102496, + "learning_rate": 3.140511374020121e-06, + "loss": 1.1325, + "step": 124476 + }, + { + "epoch": 1.5, + "grad_norm": 13.34127188820059, + "learning_rate": 3.140086308721736e-06, + "loss": 1.0223, + "step": 124479 + }, + { + "epoch": 1.5, + "grad_norm": 6.835851851386314, + "learning_rate": 3.1396612668337676e-06, + "loss": 0.6951, + "step": 124482 + }, + { + "epoch": 1.5, + "grad_norm": 8.83012341195832, + "learning_rate": 3.1392362483576623e-06, + "loss": 1.4875, + "step": 124485 + }, + { + "epoch": 1.5, + "grad_norm": 4.050767331161234, + "learning_rate": 3.1388112532948746e-06, + "loss": 1.1152, + "step": 124488 + }, + { + "epoch": 1.5, + "grad_norm": 10.179947283131877, + "learning_rate": 3.1383862816468503e-06, + "loss": 1.0961, + "step": 124491 + }, + { + "epoch": 1.5, + "grad_norm": 9.303581498211761, + "learning_rate": 3.1379613334150416e-06, + "loss": 0.6765, + "step": 124494 + }, + { + "epoch": 1.5, + "grad_norm": 15.33101781833069, + "learning_rate": 3.1375364086008987e-06, + "loss": 1.2252, + "step": 124497 + }, + { + "epoch": 1.5, + "grad_norm": 16.7220625632088, + "learning_rate": 3.1371115072058734e-06, + "loss": 1.3928, + "step": 124500 + }, + { + "epoch": 1.5, + "grad_norm": 11.187677408353087, + "learning_rate": 3.1366866292314167e-06, + "loss": 1.3271, + "step": 124503 + }, + { + "epoch": 1.5, + "grad_norm": 8.868495109491406, + "learning_rate": 3.136261774678977e-06, + "loss": 1.7455, + "step": 124506 + }, + { + "epoch": 1.5, + "grad_norm": 12.922044222049903, + "learning_rate": 3.1358369435499992e-06, + "loss": 1.028, + "step": 124509 + }, + { + "epoch": 1.5, + "grad_norm": 4.374732353581172, + "learning_rate": 3.1354121358459387e-06, + "loss": 1.0588, + "step": 124512 + }, + { + "epoch": 1.5, + "grad_norm": 6.551605014267958, + "learning_rate": 3.1349873515682473e-06, + "loss": 1.0701, + "step": 124515 + }, + { + "epoch": 1.5, + "grad_norm": 8.232814509959505, + "learning_rate": 3.1345625907183674e-06, + "loss": 1.1811, + "step": 124518 + }, + { + "epoch": 1.5, + "grad_norm": 7.7867400977156365, + "learning_rate": 3.134137853297753e-06, + "loss": 0.9279, + "step": 124521 + }, + { + "epoch": 1.5, + "grad_norm": 66.29720749272398, + "learning_rate": 3.133713139307856e-06, + "loss": 1.0774, + "step": 124524 + }, + { + "epoch": 1.5, + "grad_norm": 3.5340694366712757, + "learning_rate": 3.133288448750119e-06, + "loss": 1.5249, + "step": 124527 + }, + { + "epoch": 1.5, + "grad_norm": 13.84887890308389, + "learning_rate": 3.1328637816259988e-06, + "loss": 1.1355, + "step": 124530 + }, + { + "epoch": 1.5, + "grad_norm": 18.388229752559745, + "learning_rate": 3.1324391379369377e-06, + "loss": 0.9563, + "step": 124533 + }, + { + "epoch": 1.5, + "grad_norm": 10.817447426433285, + "learning_rate": 3.1320145176843885e-06, + "loss": 1.2352, + "step": 124536 + }, + { + "epoch": 1.5, + "grad_norm": 5.075109494345833, + "learning_rate": 3.1315899208697996e-06, + "loss": 1.0514, + "step": 124539 + }, + { + "epoch": 1.5, + "grad_norm": 17.334090517338417, + "learning_rate": 3.1311653474946236e-06, + "loss": 1.3084, + "step": 124542 + }, + { + "epoch": 1.5, + "grad_norm": 8.509855710986333, + "learning_rate": 3.1307407975603034e-06, + "loss": 1.2823, + "step": 124545 + }, + { + "epoch": 1.5, + "grad_norm": 7.341693789654489, + "learning_rate": 3.1303162710682935e-06, + "loss": 0.8023, + "step": 124548 + }, + { + "epoch": 1.5, + "grad_norm": 5.915001954823644, + "learning_rate": 3.129891768020036e-06, + "loss": 1.3522, + "step": 124551 + }, + { + "epoch": 1.5, + "grad_norm": 6.949247832324896, + "learning_rate": 3.129467288416983e-06, + "loss": 1.0476, + "step": 124554 + }, + { + "epoch": 1.5, + "grad_norm": 4.64343866764124, + "learning_rate": 3.129042832260587e-06, + "loss": 1.2998, + "step": 124557 + }, + { + "epoch": 1.5, + "grad_norm": 2.4839234083465707, + "learning_rate": 3.12861839955229e-06, + "loss": 1.0676, + "step": 124560 + }, + { + "epoch": 1.5, + "grad_norm": 5.13391883925697, + "learning_rate": 3.128193990293543e-06, + "loss": 1.2355, + "step": 124563 + }, + { + "epoch": 1.5, + "grad_norm": 20.29952072183153, + "learning_rate": 3.1277696044857986e-06, + "loss": 1.2023, + "step": 124566 + }, + { + "epoch": 1.5, + "grad_norm": 26.35333637386517, + "learning_rate": 3.1273452421305008e-06, + "loss": 0.8926, + "step": 124569 + }, + { + "epoch": 1.5, + "grad_norm": 8.469558940252117, + "learning_rate": 3.1269209032290947e-06, + "loss": 1.4856, + "step": 124572 + }, + { + "epoch": 1.5, + "grad_norm": 9.459214156535616, + "learning_rate": 3.126496587783032e-06, + "loss": 1.0689, + "step": 124575 + }, + { + "epoch": 1.5, + "grad_norm": 5.727240806750495, + "learning_rate": 3.1260722957937618e-06, + "loss": 1.0841, + "step": 124578 + }, + { + "epoch": 1.5, + "grad_norm": 14.759126303011136, + "learning_rate": 3.125648027262729e-06, + "loss": 1.2706, + "step": 124581 + }, + { + "epoch": 1.5, + "grad_norm": 17.83174154350193, + "learning_rate": 3.1252237821913878e-06, + "loss": 1.1608, + "step": 124584 + }, + { + "epoch": 1.5, + "grad_norm": 9.457583558377515, + "learning_rate": 3.1247995605811775e-06, + "loss": 0.9814, + "step": 124587 + }, + { + "epoch": 1.5, + "grad_norm": 3.2756881906838053, + "learning_rate": 3.1243753624335537e-06, + "loss": 1.3875, + "step": 124590 + }, + { + "epoch": 1.5, + "grad_norm": 6.036098637907065, + "learning_rate": 3.1239511877499563e-06, + "loss": 1.1064, + "step": 124593 + }, + { + "epoch": 1.5, + "grad_norm": 11.875462927749282, + "learning_rate": 3.1235270365318406e-06, + "loss": 1.1941, + "step": 124596 + }, + { + "epoch": 1.5, + "grad_norm": 4.409898994756988, + "learning_rate": 3.1231029087806473e-06, + "loss": 1.0943, + "step": 124599 + }, + { + "epoch": 1.5, + "grad_norm": 4.305799030064326, + "learning_rate": 3.122678804497826e-06, + "loss": 0.7806, + "step": 124602 + }, + { + "epoch": 1.5, + "grad_norm": 11.301053091032504, + "learning_rate": 3.1222547236848244e-06, + "loss": 1.2678, + "step": 124605 + }, + { + "epoch": 1.5, + "grad_norm": 4.595160948040882, + "learning_rate": 3.121830666343094e-06, + "loss": 0.9943, + "step": 124608 + }, + { + "epoch": 1.5, + "grad_norm": 2.7597228136865195, + "learning_rate": 3.1214066324740786e-06, + "loss": 1.0645, + "step": 124611 + }, + { + "epoch": 1.5, + "grad_norm": 7.917483842493904, + "learning_rate": 3.1209826220792206e-06, + "loss": 1.0629, + "step": 124614 + }, + { + "epoch": 1.5, + "grad_norm": 8.79639163413378, + "learning_rate": 3.1205586351599713e-06, + "loss": 1.1353, + "step": 124617 + }, + { + "epoch": 1.5, + "grad_norm": 9.732245396074392, + "learning_rate": 3.120134671717777e-06, + "loss": 1.0807, + "step": 124620 + }, + { + "epoch": 1.5, + "grad_norm": 6.305130623245829, + "learning_rate": 3.1197107317540885e-06, + "loss": 0.9095, + "step": 124623 + }, + { + "epoch": 1.5, + "grad_norm": 4.2117688220733935, + "learning_rate": 3.119286815270346e-06, + "loss": 1.2507, + "step": 124626 + }, + { + "epoch": 1.5, + "grad_norm": 7.447854237671408, + "learning_rate": 3.118862922267999e-06, + "loss": 1.263, + "step": 124629 + }, + { + "epoch": 1.5, + "grad_norm": 6.645736455178624, + "learning_rate": 3.1184390527484975e-06, + "loss": 0.8938, + "step": 124632 + }, + { + "epoch": 1.5, + "grad_norm": 15.356003251914599, + "learning_rate": 3.1180152067132817e-06, + "loss": 1.1667, + "step": 124635 + }, + { + "epoch": 1.5, + "grad_norm": 9.043003425925878, + "learning_rate": 3.117591384163805e-06, + "loss": 1.3076, + "step": 124638 + }, + { + "epoch": 1.5, + "grad_norm": 24.830665209135407, + "learning_rate": 3.117167585101506e-06, + "loss": 1.1322, + "step": 124641 + }, + { + "epoch": 1.5, + "grad_norm": 25.68085681589536, + "learning_rate": 3.1167438095278347e-06, + "loss": 1.1939, + "step": 124644 + }, + { + "epoch": 1.5, + "grad_norm": 7.60091926730414, + "learning_rate": 3.116320057444238e-06, + "loss": 1.437, + "step": 124647 + }, + { + "epoch": 1.5, + "grad_norm": 9.500583303312848, + "learning_rate": 3.1158963288521638e-06, + "loss": 0.9696, + "step": 124650 + }, + { + "epoch": 1.5, + "grad_norm": 17.56808276640969, + "learning_rate": 3.115472623753052e-06, + "loss": 1.0425, + "step": 124653 + }, + { + "epoch": 1.5, + "grad_norm": 4.12944324545865, + "learning_rate": 3.1150489421483567e-06, + "loss": 0.8103, + "step": 124656 + }, + { + "epoch": 1.5, + "grad_norm": 4.172259366835653, + "learning_rate": 3.1146252840395162e-06, + "loss": 0.961, + "step": 124659 + }, + { + "epoch": 1.5, + "grad_norm": 3.4786372256173177, + "learning_rate": 3.114201649427979e-06, + "loss": 1.2972, + "step": 124662 + }, + { + "epoch": 1.5, + "grad_norm": 11.736382382171278, + "learning_rate": 3.113778038315195e-06, + "loss": 1.0954, + "step": 124665 + }, + { + "epoch": 1.5, + "grad_norm": 8.128122746629236, + "learning_rate": 3.1133544507026027e-06, + "loss": 0.9781, + "step": 124668 + }, + { + "epoch": 1.5, + "grad_norm": 4.2678923194706995, + "learning_rate": 3.1129308865916507e-06, + "loss": 1.218, + "step": 124671 + }, + { + "epoch": 1.5, + "grad_norm": 10.751094221866534, + "learning_rate": 3.1125073459837895e-06, + "loss": 1.4272, + "step": 124674 + }, + { + "epoch": 1.5, + "grad_norm": 6.41827630258324, + "learning_rate": 3.112083828880459e-06, + "loss": 1.3557, + "step": 124677 + }, + { + "epoch": 1.5, + "grad_norm": 8.112202530909254, + "learning_rate": 3.111660335283102e-06, + "loss": 0.8974, + "step": 124680 + }, + { + "epoch": 1.5, + "grad_norm": 9.248917836084658, + "learning_rate": 3.111236865193167e-06, + "loss": 1.2695, + "step": 124683 + }, + { + "epoch": 1.5, + "grad_norm": 11.156516382420852, + "learning_rate": 3.1108134186120996e-06, + "loss": 1.0648, + "step": 124686 + }, + { + "epoch": 1.5, + "grad_norm": 3.477740984238081, + "learning_rate": 3.110389995541344e-06, + "loss": 1.2596, + "step": 124689 + }, + { + "epoch": 1.5, + "grad_norm": 20.185272765284857, + "learning_rate": 3.109966595982349e-06, + "loss": 1.5783, + "step": 124692 + }, + { + "epoch": 1.5, + "grad_norm": 7.35081044851477, + "learning_rate": 3.109543219936554e-06, + "loss": 0.9664, + "step": 124695 + }, + { + "epoch": 1.5, + "grad_norm": 18.30512393773041, + "learning_rate": 3.109119867405408e-06, + "loss": 1.4103, + "step": 124698 + }, + { + "epoch": 1.5, + "grad_norm": 4.568359593158543, + "learning_rate": 3.1086965383903512e-06, + "loss": 1.1058, + "step": 124701 + }, + { + "epoch": 1.5, + "grad_norm": 10.774663753067149, + "learning_rate": 3.1082732328928344e-06, + "loss": 1.558, + "step": 124704 + }, + { + "epoch": 1.5, + "grad_norm": 10.540857320270044, + "learning_rate": 3.1078499509142944e-06, + "loss": 1.077, + "step": 124707 + }, + { + "epoch": 1.5, + "grad_norm": 4.437007811092837, + "learning_rate": 3.1074266924561814e-06, + "loss": 0.9202, + "step": 124710 + }, + { + "epoch": 1.5, + "grad_norm": 7.793769486163519, + "learning_rate": 3.1070034575199383e-06, + "loss": 1.301, + "step": 124713 + }, + { + "epoch": 1.5, + "grad_norm": 10.465271368312585, + "learning_rate": 3.1065802461070118e-06, + "loss": 1.3072, + "step": 124716 + }, + { + "epoch": 1.5, + "grad_norm": 11.104528480872364, + "learning_rate": 3.106157058218844e-06, + "loss": 0.956, + "step": 124719 + }, + { + "epoch": 1.5, + "grad_norm": 8.87639165066161, + "learning_rate": 3.105733893856876e-06, + "loss": 1.2762, + "step": 124722 + }, + { + "epoch": 1.5, + "grad_norm": 13.914148131169288, + "learning_rate": 3.1053107530225557e-06, + "loss": 1.2212, + "step": 124725 + }, + { + "epoch": 1.5, + "grad_norm": 7.2953942927872, + "learning_rate": 3.1048876357173254e-06, + "loss": 1.1329, + "step": 124728 + }, + { + "epoch": 1.5, + "grad_norm": 6.881865345389712, + "learning_rate": 3.1044645419426334e-06, + "loss": 1.4784, + "step": 124731 + }, + { + "epoch": 1.5, + "grad_norm": 27.876046359554042, + "learning_rate": 3.104041471699917e-06, + "loss": 1.2631, + "step": 124734 + }, + { + "epoch": 1.5, + "grad_norm": 9.040113927483535, + "learning_rate": 3.103618424990623e-06, + "loss": 1.3866, + "step": 124737 + }, + { + "epoch": 1.5, + "grad_norm": 10.915013371814577, + "learning_rate": 3.103195401816198e-06, + "loss": 1.2917, + "step": 124740 + }, + { + "epoch": 1.5, + "grad_norm": 17.518941447895557, + "learning_rate": 3.1027724021780813e-06, + "loss": 1.2829, + "step": 124743 + }, + { + "epoch": 1.5, + "grad_norm": 7.324023944853486, + "learning_rate": 3.10234942607772e-06, + "loss": 1.1874, + "step": 124746 + }, + { + "epoch": 1.5, + "grad_norm": 3.8809728433429025, + "learning_rate": 3.101926473516552e-06, + "loss": 1.1425, + "step": 124749 + }, + { + "epoch": 1.5, + "grad_norm": 7.846270621323676, + "learning_rate": 3.101503544496025e-06, + "loss": 1.3131, + "step": 124752 + }, + { + "epoch": 1.5, + "grad_norm": 6.401967164723514, + "learning_rate": 3.101080639017582e-06, + "loss": 0.8479, + "step": 124755 + }, + { + "epoch": 1.5, + "grad_norm": 12.088077267806504, + "learning_rate": 3.100657757082668e-06, + "loss": 1.6742, + "step": 124758 + }, + { + "epoch": 1.5, + "grad_norm": 8.189250802534042, + "learning_rate": 3.1002348986927244e-06, + "loss": 1.6821, + "step": 124761 + }, + { + "epoch": 1.5, + "grad_norm": 7.567647338981996, + "learning_rate": 3.09981206384919e-06, + "loss": 1.3076, + "step": 124764 + }, + { + "epoch": 1.5, + "grad_norm": 13.9219434876838, + "learning_rate": 3.099389252553513e-06, + "loss": 1.144, + "step": 124767 + }, + { + "epoch": 1.5, + "grad_norm": 9.828790199450586, + "learning_rate": 3.0989664648071337e-06, + "loss": 1.1015, + "step": 124770 + }, + { + "epoch": 1.5, + "grad_norm": 6.255430771376941, + "learning_rate": 3.0985437006115004e-06, + "loss": 1.3231, + "step": 124773 + }, + { + "epoch": 1.5, + "grad_norm": 10.801717397641665, + "learning_rate": 3.098120959968047e-06, + "loss": 1.2549, + "step": 124776 + }, + { + "epoch": 1.5, + "grad_norm": 37.51035371571725, + "learning_rate": 3.097698242878221e-06, + "loss": 1.0757, + "step": 124779 + }, + { + "epoch": 1.5, + "grad_norm": 22.048182946590114, + "learning_rate": 3.097275549343469e-06, + "loss": 1.1228, + "step": 124782 + }, + { + "epoch": 1.5, + "grad_norm": 8.15673521351848, + "learning_rate": 3.0968528793652285e-06, + "loss": 1.0497, + "step": 124785 + }, + { + "epoch": 1.5, + "grad_norm": 7.503700387867664, + "learning_rate": 3.096430232944939e-06, + "loss": 1.4184, + "step": 124788 + }, + { + "epoch": 1.5, + "grad_norm": 5.904111549865279, + "learning_rate": 3.0960076100840476e-06, + "loss": 1.1934, + "step": 124791 + }, + { + "epoch": 1.5, + "grad_norm": 7.639413883470211, + "learning_rate": 3.0955850107839935e-06, + "loss": 1.1049, + "step": 124794 + }, + { + "epoch": 1.5, + "grad_norm": 16.361114569079053, + "learning_rate": 3.095162435046223e-06, + "loss": 1.2922, + "step": 124797 + }, + { + "epoch": 1.5, + "grad_norm": 4.189688678302841, + "learning_rate": 3.0947398828721774e-06, + "loss": 1.4077, + "step": 124800 + }, + { + "epoch": 1.5, + "grad_norm": 43.820580749520445, + "learning_rate": 3.0943173542632987e-06, + "loss": 1.2795, + "step": 124803 + }, + { + "epoch": 1.5, + "grad_norm": 7.443873017717842, + "learning_rate": 3.093894849221023e-06, + "loss": 1.266, + "step": 124806 + }, + { + "epoch": 1.5, + "grad_norm": 6.597949091728006, + "learning_rate": 3.093472367746797e-06, + "loss": 1.0665, + "step": 124809 + }, + { + "epoch": 1.5, + "grad_norm": 2.944359445250163, + "learning_rate": 3.0930499098420663e-06, + "loss": 1.1899, + "step": 124812 + }, + { + "epoch": 1.5, + "grad_norm": 6.24861016167643, + "learning_rate": 3.0926274755082643e-06, + "loss": 1.0097, + "step": 124815 + }, + { + "epoch": 1.5, + "grad_norm": 9.909186552124865, + "learning_rate": 3.092205064746837e-06, + "loss": 0.767, + "step": 124818 + }, + { + "epoch": 1.5, + "grad_norm": 6.941444674015788, + "learning_rate": 3.091782677559225e-06, + "loss": 0.9948, + "step": 124821 + }, + { + "epoch": 1.5, + "grad_norm": 5.136351899793162, + "learning_rate": 3.091360313946874e-06, + "loss": 1.183, + "step": 124824 + }, + { + "epoch": 1.5, + "grad_norm": 18.909255216887672, + "learning_rate": 3.0909379739112223e-06, + "loss": 0.8535, + "step": 124827 + }, + { + "epoch": 1.5, + "grad_norm": 4.303788309567827, + "learning_rate": 3.0905156574537067e-06, + "loss": 1.4435, + "step": 124830 + }, + { + "epoch": 1.5, + "grad_norm": 10.098982970325213, + "learning_rate": 3.0900933645757723e-06, + "loss": 1.2878, + "step": 124833 + }, + { + "epoch": 1.5, + "grad_norm": 2.3585512599028684, + "learning_rate": 3.089671095278861e-06, + "loss": 1.52, + "step": 124836 + }, + { + "epoch": 1.5, + "grad_norm": 12.977855081419914, + "learning_rate": 3.089248849564416e-06, + "loss": 1.158, + "step": 124839 + }, + { + "epoch": 1.5, + "grad_norm": 12.504510214145853, + "learning_rate": 3.088826627433872e-06, + "loss": 1.4513, + "step": 124842 + }, + { + "epoch": 1.5, + "grad_norm": 8.957258714247235, + "learning_rate": 3.0884044288886772e-06, + "loss": 1.2538, + "step": 124845 + }, + { + "epoch": 1.5, + "grad_norm": 4.113698376917727, + "learning_rate": 3.087982253930265e-06, + "loss": 1.1398, + "step": 124848 + }, + { + "epoch": 1.5, + "grad_norm": 16.30123310967729, + "learning_rate": 3.08756010256008e-06, + "loss": 1.7303, + "step": 124851 + }, + { + "epoch": 1.5, + "grad_norm": 3.3106061496640438, + "learning_rate": 3.087137974779565e-06, + "loss": 1.0682, + "step": 124854 + }, + { + "epoch": 1.5, + "grad_norm": 8.506557006454754, + "learning_rate": 3.0867158705901547e-06, + "loss": 1.0063, + "step": 124857 + }, + { + "epoch": 1.5, + "grad_norm": 11.752175205312776, + "learning_rate": 3.0862937899932933e-06, + "loss": 0.8825, + "step": 124860 + }, + { + "epoch": 1.5, + "grad_norm": 6.569873911872622, + "learning_rate": 3.085871732990422e-06, + "loss": 1.0413, + "step": 124863 + }, + { + "epoch": 1.5, + "grad_norm": 6.7248072087183655, + "learning_rate": 3.085449699582982e-06, + "loss": 1.2604, + "step": 124866 + }, + { + "epoch": 1.5, + "grad_norm": 8.452442624190638, + "learning_rate": 3.085027689772412e-06, + "loss": 1.0519, + "step": 124869 + }, + { + "epoch": 1.5, + "grad_norm": 6.112079040968103, + "learning_rate": 3.0846057035601485e-06, + "loss": 1.0261, + "step": 124872 + }, + { + "epoch": 1.5, + "grad_norm": 6.788355490150008, + "learning_rate": 3.0841837409476337e-06, + "loss": 1.2227, + "step": 124875 + }, + { + "epoch": 1.5, + "grad_norm": 5.632048750268767, + "learning_rate": 3.08376180193631e-06, + "loss": 0.9861, + "step": 124878 + }, + { + "epoch": 1.5, + "grad_norm": 41.46755306821025, + "learning_rate": 3.08333988652762e-06, + "loss": 1.281, + "step": 124881 + }, + { + "epoch": 1.5, + "grad_norm": 13.373411533626577, + "learning_rate": 3.0829179947229947e-06, + "loss": 1.5601, + "step": 124884 + }, + { + "epoch": 1.5, + "grad_norm": 3.821240506054406, + "learning_rate": 3.082496126523883e-06, + "loss": 1.6849, + "step": 124887 + }, + { + "epoch": 1.5, + "grad_norm": 12.097331169591696, + "learning_rate": 3.0820742819317162e-06, + "loss": 1.3456, + "step": 124890 + }, + { + "epoch": 1.5, + "grad_norm": 14.424722936157684, + "learning_rate": 3.0816524609479425e-06, + "loss": 1.0205, + "step": 124893 + }, + { + "epoch": 1.5, + "grad_norm": 10.041035543293345, + "learning_rate": 3.081230663573993e-06, + "loss": 0.9759, + "step": 124896 + }, + { + "epoch": 1.5, + "grad_norm": 5.348980457648481, + "learning_rate": 3.0808088898113108e-06, + "loss": 1.2889, + "step": 124899 + }, + { + "epoch": 1.5, + "grad_norm": 14.147626360335117, + "learning_rate": 3.080387139661336e-06, + "loss": 1.2205, + "step": 124902 + }, + { + "epoch": 1.5, + "grad_norm": 25.037073517511732, + "learning_rate": 3.0799654131255074e-06, + "loss": 0.9991, + "step": 124905 + }, + { + "epoch": 1.5, + "grad_norm": 2.5559335886032106, + "learning_rate": 3.079543710205268e-06, + "loss": 1.1681, + "step": 124908 + }, + { + "epoch": 1.5, + "grad_norm": 10.972974205506718, + "learning_rate": 3.079122030902053e-06, + "loss": 1.433, + "step": 124911 + }, + { + "epoch": 1.5, + "grad_norm": 9.999055199468728, + "learning_rate": 3.0787003752172975e-06, + "loss": 0.9236, + "step": 124914 + }, + { + "epoch": 1.5, + "grad_norm": 7.78306668248546, + "learning_rate": 3.078278743152445e-06, + "loss": 1.1589, + "step": 124917 + }, + { + "epoch": 1.5, + "grad_norm": 19.056981337158614, + "learning_rate": 3.077857134708938e-06, + "loss": 1.5907, + "step": 124920 + }, + { + "epoch": 1.5, + "grad_norm": 14.817989073162279, + "learning_rate": 3.077435549888207e-06, + "loss": 0.9386, + "step": 124923 + }, + { + "epoch": 1.5, + "grad_norm": 2.9347082360495675, + "learning_rate": 3.0770139886916963e-06, + "loss": 0.9534, + "step": 124926 + }, + { + "epoch": 1.5, + "grad_norm": 8.025693733543266, + "learning_rate": 3.0765924511208446e-06, + "loss": 1.2704, + "step": 124929 + }, + { + "epoch": 1.5, + "grad_norm": 10.591283814029154, + "learning_rate": 3.0761709371770877e-06, + "loss": 1.0839, + "step": 124932 + }, + { + "epoch": 1.5, + "grad_norm": 18.767565040737036, + "learning_rate": 3.0757494468618675e-06, + "loss": 1.1832, + "step": 124935 + }, + { + "epoch": 1.5, + "grad_norm": 5.928549361638654, + "learning_rate": 3.075327980176617e-06, + "loss": 1.4336, + "step": 124938 + }, + { + "epoch": 1.5, + "grad_norm": 20.9060518339637, + "learning_rate": 3.074906537122778e-06, + "loss": 1.1004, + "step": 124941 + }, + { + "epoch": 1.5, + "grad_norm": 7.918531629811746, + "learning_rate": 3.074485117701789e-06, + "loss": 1.5051, + "step": 124944 + }, + { + "epoch": 1.5, + "grad_norm": 13.041847980884766, + "learning_rate": 3.0740637219150903e-06, + "loss": 1.2276, + "step": 124947 + }, + { + "epoch": 1.5, + "grad_norm": 13.322401865552573, + "learning_rate": 3.0736423497641155e-06, + "loss": 1.3018, + "step": 124950 + }, + { + "epoch": 1.5, + "grad_norm": 6.868076004473362, + "learning_rate": 3.073221001250306e-06, + "loss": 1.2559, + "step": 124953 + }, + { + "epoch": 1.5, + "grad_norm": 5.002770346779419, + "learning_rate": 3.0727996763750955e-06, + "loss": 1.5049, + "step": 124956 + }, + { + "epoch": 1.5, + "grad_norm": 4.163195738119413, + "learning_rate": 3.0723783751399248e-06, + "loss": 1.3263, + "step": 124959 + }, + { + "epoch": 1.5, + "grad_norm": 4.4610004866702155, + "learning_rate": 3.0719570975462356e-06, + "loss": 1.1773, + "step": 124962 + }, + { + "epoch": 1.5, + "grad_norm": 6.054679051786854, + "learning_rate": 3.071535843595457e-06, + "loss": 1.55, + "step": 124965 + }, + { + "epoch": 1.5, + "grad_norm": 14.723303229690783, + "learning_rate": 3.0711146132890314e-06, + "loss": 1.0493, + "step": 124968 + }, + { + "epoch": 1.5, + "grad_norm": 3.9963748511572392, + "learning_rate": 3.0706934066283956e-06, + "loss": 1.5378, + "step": 124971 + }, + { + "epoch": 1.5, + "grad_norm": 5.203730348673516, + "learning_rate": 3.0702722236149906e-06, + "loss": 0.8796, + "step": 124974 + }, + { + "epoch": 1.5, + "grad_norm": 2.5259363634321916, + "learning_rate": 3.06985106425025e-06, + "loss": 0.8058, + "step": 124977 + }, + { + "epoch": 1.5, + "grad_norm": 11.632927433673771, + "learning_rate": 3.069429928535609e-06, + "loss": 1.8042, + "step": 124980 + }, + { + "epoch": 1.5, + "grad_norm": 22.05627768961135, + "learning_rate": 3.0690088164725063e-06, + "loss": 1.1605, + "step": 124983 + }, + { + "epoch": 1.5, + "grad_norm": 2.580775129857426, + "learning_rate": 3.068587728062381e-06, + "loss": 1.288, + "step": 124986 + }, + { + "epoch": 1.5, + "grad_norm": 8.864177643435692, + "learning_rate": 3.068166663306671e-06, + "loss": 0.9754, + "step": 124989 + }, + { + "epoch": 1.5, + "grad_norm": 6.091688136002421, + "learning_rate": 3.067745622206809e-06, + "loss": 1.3179, + "step": 124992 + }, + { + "epoch": 1.5, + "grad_norm": 7.561375384858376, + "learning_rate": 3.067324604764238e-06, + "loss": 1.3037, + "step": 124995 + }, + { + "epoch": 1.5, + "grad_norm": 9.980193915696214, + "learning_rate": 3.0669036109803873e-06, + "loss": 1.2457, + "step": 124998 + }, + { + "epoch": 1.5, + "grad_norm": 9.803475690058804, + "learning_rate": 3.0664826408567005e-06, + "loss": 1.4869, + "step": 125001 + }, + { + "epoch": 1.5, + "grad_norm": 12.27844193347509, + "learning_rate": 3.0660616943946076e-06, + "loss": 1.3039, + "step": 125004 + }, + { + "epoch": 1.5, + "grad_norm": 13.67793837044137, + "learning_rate": 3.065640771595548e-06, + "loss": 1.0502, + "step": 125007 + }, + { + "epoch": 1.5, + "grad_norm": 6.372289414441587, + "learning_rate": 3.0652198724609595e-06, + "loss": 1.022, + "step": 125010 + }, + { + "epoch": 1.5, + "grad_norm": 6.05239683809043, + "learning_rate": 3.0647989969922775e-06, + "loss": 1.312, + "step": 125013 + }, + { + "epoch": 1.5, + "grad_norm": 4.819195296599644, + "learning_rate": 3.0643781451909416e-06, + "loss": 0.9879, + "step": 125016 + }, + { + "epoch": 1.5, + "grad_norm": 14.057601351732348, + "learning_rate": 3.0639573170583846e-06, + "loss": 1.3839, + "step": 125019 + }, + { + "epoch": 1.5, + "grad_norm": 24.426945105803195, + "learning_rate": 3.0635365125960404e-06, + "loss": 1.4974, + "step": 125022 + }, + { + "epoch": 1.5, + "grad_norm": 6.023397583076172, + "learning_rate": 3.0631157318053474e-06, + "loss": 1.2342, + "step": 125025 + }, + { + "epoch": 1.5, + "grad_norm": 5.096074772822005, + "learning_rate": 3.0626949746877455e-06, + "loss": 1.1214, + "step": 125028 + }, + { + "epoch": 1.5, + "grad_norm": 9.397018316878013, + "learning_rate": 3.062274241244663e-06, + "loss": 1.1322, + "step": 125031 + }, + { + "epoch": 1.5, + "grad_norm": 21.899700951181735, + "learning_rate": 3.061853531477539e-06, + "loss": 1.0926, + "step": 125034 + }, + { + "epoch": 1.5, + "grad_norm": 8.929925279852759, + "learning_rate": 3.061432845387814e-06, + "loss": 1.021, + "step": 125037 + }, + { + "epoch": 1.5, + "grad_norm": 7.891965886746465, + "learning_rate": 3.0610121829769156e-06, + "loss": 1.2871, + "step": 125040 + }, + { + "epoch": 1.5, + "grad_norm": 17.41794158742972, + "learning_rate": 3.0605915442462865e-06, + "loss": 1.1401, + "step": 125043 + }, + { + "epoch": 1.5, + "grad_norm": 19.83318669136047, + "learning_rate": 3.060170929197356e-06, + "loss": 0.8128, + "step": 125046 + }, + { + "epoch": 1.5, + "grad_norm": 13.838339648916909, + "learning_rate": 3.059750337831562e-06, + "loss": 1.4497, + "step": 125049 + }, + { + "epoch": 1.5, + "grad_norm": 7.082353220833495, + "learning_rate": 3.0593297701503397e-06, + "loss": 1.106, + "step": 125052 + }, + { + "epoch": 1.5, + "grad_norm": 5.648091102060614, + "learning_rate": 3.058909226155129e-06, + "loss": 1.1009, + "step": 125055 + }, + { + "epoch": 1.5, + "grad_norm": 38.79063316125201, + "learning_rate": 3.058488705847358e-06, + "loss": 1.1265, + "step": 125058 + }, + { + "epoch": 1.5, + "grad_norm": 28.519744605009837, + "learning_rate": 3.0580682092284674e-06, + "loss": 1.485, + "step": 125061 + }, + { + "epoch": 1.5, + "grad_norm": 9.017051840202038, + "learning_rate": 3.0576477362998858e-06, + "loss": 1.0031, + "step": 125064 + }, + { + "epoch": 1.5, + "grad_norm": 7.610832726013567, + "learning_rate": 3.0572272870630524e-06, + "loss": 1.1946, + "step": 125067 + }, + { + "epoch": 1.5, + "grad_norm": 11.715042212569978, + "learning_rate": 3.0568068615194047e-06, + "loss": 1.2661, + "step": 125070 + }, + { + "epoch": 1.5, + "grad_norm": 12.279424773522686, + "learning_rate": 3.056386459670371e-06, + "loss": 1.3054, + "step": 125073 + }, + { + "epoch": 1.5, + "grad_norm": 11.708510876403068, + "learning_rate": 3.05596608151739e-06, + "loss": 1.4025, + "step": 125076 + }, + { + "epoch": 1.5, + "grad_norm": 6.791374438327974, + "learning_rate": 3.055545727061897e-06, + "loss": 1.3705, + "step": 125079 + }, + { + "epoch": 1.5, + "grad_norm": 13.86654782115083, + "learning_rate": 3.055125396305326e-06, + "loss": 0.9426, + "step": 125082 + }, + { + "epoch": 1.5, + "grad_norm": 3.5096380446422297, + "learning_rate": 3.054705089249108e-06, + "loss": 1.2115, + "step": 125085 + }, + { + "epoch": 1.5, + "grad_norm": 2.4296700079939213, + "learning_rate": 3.0542848058946794e-06, + "loss": 1.3682, + "step": 125088 + }, + { + "epoch": 1.5, + "grad_norm": 2.33111957808936, + "learning_rate": 3.0538645462434746e-06, + "loss": 1.3401, + "step": 125091 + }, + { + "epoch": 1.5, + "grad_norm": 4.551656372451862, + "learning_rate": 3.053444310296928e-06, + "loss": 1.3081, + "step": 125094 + }, + { + "epoch": 1.5, + "grad_norm": 9.05907644572878, + "learning_rate": 3.0530240980564774e-06, + "loss": 1.1771, + "step": 125097 + }, + { + "epoch": 1.5, + "grad_norm": 12.129162175138347, + "learning_rate": 3.0526039095235505e-06, + "loss": 1.0363, + "step": 125100 + }, + { + "epoch": 1.5, + "grad_norm": 7.383406919720284, + "learning_rate": 3.052183744699586e-06, + "loss": 1.1721, + "step": 125103 + }, + { + "epoch": 1.5, + "grad_norm": 4.0502762019940635, + "learning_rate": 3.0517636035860133e-06, + "loss": 1.1229, + "step": 125106 + }, + { + "epoch": 1.5, + "grad_norm": 3.3463684176002286, + "learning_rate": 3.0513434861842715e-06, + "loss": 1.2912, + "step": 125109 + }, + { + "epoch": 1.5, + "grad_norm": 3.0945665624109306, + "learning_rate": 3.050923392495789e-06, + "loss": 1.1165, + "step": 125112 + }, + { + "epoch": 1.5, + "grad_norm": 26.125012287537615, + "learning_rate": 3.0505033225220017e-06, + "loss": 1.1385, + "step": 125115 + }, + { + "epoch": 1.5, + "grad_norm": 13.834445944078517, + "learning_rate": 3.050083276264343e-06, + "loss": 0.9839, + "step": 125118 + }, + { + "epoch": 1.5, + "grad_norm": 13.505517364352599, + "learning_rate": 3.049663253724251e-06, + "loss": 0.9444, + "step": 125121 + }, + { + "epoch": 1.5, + "grad_norm": 21.016868021998402, + "learning_rate": 3.0492432549031535e-06, + "loss": 1.1516, + "step": 125124 + }, + { + "epoch": 1.5, + "grad_norm": 9.30293705822376, + "learning_rate": 3.0488232798024818e-06, + "loss": 1.321, + "step": 125127 + }, + { + "epoch": 1.5, + "grad_norm": 8.058919450657362, + "learning_rate": 3.0484033284236737e-06, + "loss": 0.8008, + "step": 125130 + }, + { + "epoch": 1.5, + "grad_norm": 7.045790667399728, + "learning_rate": 3.0479834007681596e-06, + "loss": 1.3128, + "step": 125133 + }, + { + "epoch": 1.5, + "grad_norm": 15.977679186002838, + "learning_rate": 3.047563496837379e-06, + "loss": 1.2043, + "step": 125136 + }, + { + "epoch": 1.5, + "grad_norm": 10.906577059185333, + "learning_rate": 3.047143616632755e-06, + "loss": 1.1228, + "step": 125139 + }, + { + "epoch": 1.5, + "grad_norm": 7.978136429013389, + "learning_rate": 3.0467237601557264e-06, + "loss": 0.7382, + "step": 125142 + }, + { + "epoch": 1.5, + "grad_norm": 7.404836560623703, + "learning_rate": 3.046303927407729e-06, + "loss": 0.9873, + "step": 125145 + }, + { + "epoch": 1.5, + "grad_norm": 9.743406223149508, + "learning_rate": 3.0458841183901867e-06, + "loss": 1.2706, + "step": 125148 + }, + { + "epoch": 1.5, + "grad_norm": 4.517034786720905, + "learning_rate": 3.045464333104541e-06, + "loss": 1.1427, + "step": 125151 + }, + { + "epoch": 1.5, + "grad_norm": 2.3874289738626953, + "learning_rate": 3.045044571552217e-06, + "loss": 1.118, + "step": 125154 + }, + { + "epoch": 1.5, + "grad_norm": 8.75652848401202, + "learning_rate": 3.044624833734652e-06, + "loss": 1.5638, + "step": 125157 + }, + { + "epoch": 1.51, + "grad_norm": 8.0694334853685, + "learning_rate": 3.044205119653275e-06, + "loss": 1.1195, + "step": 125160 + }, + { + "epoch": 1.51, + "grad_norm": 10.962295402144257, + "learning_rate": 3.043785429309525e-06, + "loss": 1.0386, + "step": 125163 + }, + { + "epoch": 1.51, + "grad_norm": 4.187510543929389, + "learning_rate": 3.0433657627048297e-06, + "loss": 1.4512, + "step": 125166 + }, + { + "epoch": 1.51, + "grad_norm": 4.996076372863078, + "learning_rate": 3.0429461198406186e-06, + "loss": 1.0761, + "step": 125169 + }, + { + "epoch": 1.51, + "grad_norm": 5.729263958420403, + "learning_rate": 3.042526500718326e-06, + "loss": 1.1011, + "step": 125172 + }, + { + "epoch": 1.51, + "grad_norm": 15.107735917134034, + "learning_rate": 3.0421069053393837e-06, + "loss": 1.4775, + "step": 125175 + }, + { + "epoch": 1.51, + "grad_norm": 37.69782136497322, + "learning_rate": 3.041687333705228e-06, + "loss": 1.2005, + "step": 125178 + }, + { + "epoch": 1.51, + "grad_norm": 7.168263465723318, + "learning_rate": 3.0412677858172834e-06, + "loss": 0.9432, + "step": 125181 + }, + { + "epoch": 1.51, + "grad_norm": 4.730522655589924, + "learning_rate": 3.0408482616769862e-06, + "loss": 1.586, + "step": 125184 + }, + { + "epoch": 1.51, + "grad_norm": 5.8389815305188595, + "learning_rate": 3.0404287612857696e-06, + "loss": 1.1379, + "step": 125187 + }, + { + "epoch": 1.51, + "grad_norm": 16.74512197205993, + "learning_rate": 3.0400092846450634e-06, + "loss": 1.5128, + "step": 125190 + }, + { + "epoch": 1.51, + "grad_norm": 15.348120908189196, + "learning_rate": 3.0395898317562943e-06, + "loss": 1.09, + "step": 125193 + }, + { + "epoch": 1.51, + "grad_norm": 5.447044484822565, + "learning_rate": 3.039170402620899e-06, + "loss": 1.1151, + "step": 125196 + }, + { + "epoch": 1.51, + "grad_norm": 4.2913902886964195, + "learning_rate": 3.0387509972403084e-06, + "loss": 1.2826, + "step": 125199 + }, + { + "epoch": 1.51, + "grad_norm": 4.765779985546199, + "learning_rate": 3.0383316156159524e-06, + "loss": 1.2673, + "step": 125202 + }, + { + "epoch": 1.51, + "grad_norm": 10.598425137509434, + "learning_rate": 3.037912257749266e-06, + "loss": 1.2347, + "step": 125205 + }, + { + "epoch": 1.51, + "grad_norm": 6.256344693883062, + "learning_rate": 3.0374929236416774e-06, + "loss": 1.2266, + "step": 125208 + }, + { + "epoch": 1.51, + "grad_norm": 4.0101373440023504, + "learning_rate": 3.037073613294614e-06, + "loss": 1.1571, + "step": 125211 + }, + { + "epoch": 1.51, + "grad_norm": 9.613630476588622, + "learning_rate": 3.036654326709512e-06, + "loss": 1.1941, + "step": 125214 + }, + { + "epoch": 1.51, + "grad_norm": 20.558574446304878, + "learning_rate": 3.0362350638878024e-06, + "loss": 1.2215, + "step": 125217 + }, + { + "epoch": 1.51, + "grad_norm": 7.946959081309584, + "learning_rate": 3.035815824830911e-06, + "loss": 1.3382, + "step": 125220 + }, + { + "epoch": 1.51, + "grad_norm": 11.527084496453478, + "learning_rate": 3.035396609540272e-06, + "loss": 1.0376, + "step": 125223 + }, + { + "epoch": 1.51, + "grad_norm": 9.97244947191707, + "learning_rate": 3.0349774180173152e-06, + "loss": 1.3449, + "step": 125226 + }, + { + "epoch": 1.51, + "grad_norm": 2.6795512815497604, + "learning_rate": 3.0345582502634765e-06, + "loss": 1.0453, + "step": 125229 + }, + { + "epoch": 1.51, + "grad_norm": 11.042186301453237, + "learning_rate": 3.0341391062801795e-06, + "loss": 1.2224, + "step": 125232 + }, + { + "epoch": 1.51, + "grad_norm": 8.079263886197255, + "learning_rate": 3.033719986068855e-06, + "loss": 1.0527, + "step": 125235 + }, + { + "epoch": 1.51, + "grad_norm": 8.432718512049211, + "learning_rate": 3.0333008896309336e-06, + "loss": 1.1037, + "step": 125238 + }, + { + "epoch": 1.51, + "grad_norm": 16.3304359098687, + "learning_rate": 3.0328818169678477e-06, + "loss": 1.2391, + "step": 125241 + }, + { + "epoch": 1.51, + "grad_norm": 12.94928599826144, + "learning_rate": 3.0324627680810304e-06, + "loss": 1.5418, + "step": 125244 + }, + { + "epoch": 1.51, + "grad_norm": 29.16541139508805, + "learning_rate": 3.0320437429719042e-06, + "loss": 1.2641, + "step": 125247 + }, + { + "epoch": 1.51, + "grad_norm": 33.29530451590663, + "learning_rate": 3.031624741641902e-06, + "loss": 0.9996, + "step": 125250 + }, + { + "epoch": 1.51, + "grad_norm": 6.910684678173732, + "learning_rate": 3.0312057640924588e-06, + "loss": 1.5787, + "step": 125253 + }, + { + "epoch": 1.51, + "grad_norm": 9.515921087343523, + "learning_rate": 3.030786810324996e-06, + "loss": 0.9793, + "step": 125256 + }, + { + "epoch": 1.51, + "grad_norm": 4.051290776702418, + "learning_rate": 3.030367880340952e-06, + "loss": 1.1411, + "step": 125259 + }, + { + "epoch": 1.51, + "grad_norm": 7.1795556164808865, + "learning_rate": 3.0299489741417476e-06, + "loss": 1.2127, + "step": 125262 + }, + { + "epoch": 1.51, + "grad_norm": 32.17886759773483, + "learning_rate": 3.029530091728817e-06, + "loss": 1.0731, + "step": 125265 + }, + { + "epoch": 1.51, + "grad_norm": 10.209697505703218, + "learning_rate": 3.029111233103591e-06, + "loss": 1.396, + "step": 125268 + }, + { + "epoch": 1.51, + "grad_norm": 12.668886675723828, + "learning_rate": 3.0286923982674986e-06, + "loss": 1.2513, + "step": 125271 + }, + { + "epoch": 1.51, + "grad_norm": 9.638757411136782, + "learning_rate": 3.028273587221969e-06, + "loss": 1.1391, + "step": 125274 + }, + { + "epoch": 1.51, + "grad_norm": 12.308432482749346, + "learning_rate": 3.027854799968427e-06, + "loss": 1.1825, + "step": 125277 + }, + { + "epoch": 1.51, + "grad_norm": 9.554729857343139, + "learning_rate": 3.0274360365083056e-06, + "loss": 1.352, + "step": 125280 + }, + { + "epoch": 1.51, + "grad_norm": 8.277053314399001, + "learning_rate": 3.0270172968430333e-06, + "loss": 1.0242, + "step": 125283 + }, + { + "epoch": 1.51, + "grad_norm": 5.916231350410914, + "learning_rate": 3.0265985809740416e-06, + "loss": 0.841, + "step": 125286 + }, + { + "epoch": 1.51, + "grad_norm": 9.481723746390601, + "learning_rate": 3.026179888902755e-06, + "loss": 1.1582, + "step": 125289 + }, + { + "epoch": 1.51, + "grad_norm": 12.39765281426089, + "learning_rate": 3.025761220630604e-06, + "loss": 0.6897, + "step": 125292 + }, + { + "epoch": 1.51, + "grad_norm": 11.728247234987727, + "learning_rate": 3.0253425761590215e-06, + "loss": 1.108, + "step": 125295 + }, + { + "epoch": 1.51, + "grad_norm": 5.58346459509562, + "learning_rate": 3.024923955489433e-06, + "loss": 1.0416, + "step": 125298 + }, + { + "epoch": 1.51, + "grad_norm": 13.644714298813614, + "learning_rate": 3.024505358623262e-06, + "loss": 1.0479, + "step": 125301 + }, + { + "epoch": 1.51, + "grad_norm": 7.669640743543, + "learning_rate": 3.0240867855619426e-06, + "loss": 1.0298, + "step": 125304 + }, + { + "epoch": 1.51, + "grad_norm": 20.810351843881794, + "learning_rate": 3.0236682363069016e-06, + "loss": 1.2433, + "step": 125307 + }, + { + "epoch": 1.51, + "grad_norm": 16.80529460958677, + "learning_rate": 3.0232497108595693e-06, + "loss": 1.4607, + "step": 125310 + }, + { + "epoch": 1.51, + "grad_norm": 5.97762026278814, + "learning_rate": 3.022831209221374e-06, + "loss": 1.2859, + "step": 125313 + }, + { + "epoch": 1.51, + "grad_norm": 15.78744783921863, + "learning_rate": 3.0224127313937447e-06, + "loss": 1.1505, + "step": 125316 + }, + { + "epoch": 1.51, + "grad_norm": 15.483473236457156, + "learning_rate": 3.021994277378103e-06, + "loss": 1.3984, + "step": 125319 + }, + { + "epoch": 1.51, + "grad_norm": 8.688051455429012, + "learning_rate": 3.0215758471758815e-06, + "loss": 1.1814, + "step": 125322 + }, + { + "epoch": 1.51, + "grad_norm": 6.449542476420195, + "learning_rate": 3.021157440788511e-06, + "loss": 0.9055, + "step": 125325 + }, + { + "epoch": 1.51, + "grad_norm": 7.085080094482215, + "learning_rate": 3.0207390582174136e-06, + "loss": 0.9666, + "step": 125328 + }, + { + "epoch": 1.51, + "grad_norm": 8.484545582563056, + "learning_rate": 3.02032069946402e-06, + "loss": 1.1406, + "step": 125331 + }, + { + "epoch": 1.51, + "grad_norm": 11.182501405700902, + "learning_rate": 3.0199023645297575e-06, + "loss": 1.1542, + "step": 125334 + }, + { + "epoch": 1.51, + "grad_norm": 12.23327306271821, + "learning_rate": 3.0194840534160576e-06, + "loss": 0.9927, + "step": 125337 + }, + { + "epoch": 1.51, + "grad_norm": 4.934094835554287, + "learning_rate": 3.0190657661243437e-06, + "loss": 1.2567, + "step": 125340 + }, + { + "epoch": 1.51, + "grad_norm": 5.685285775640086, + "learning_rate": 3.0186475026560414e-06, + "loss": 1.0955, + "step": 125343 + }, + { + "epoch": 1.51, + "grad_norm": 10.372654651448203, + "learning_rate": 3.01822926301258e-06, + "loss": 0.9973, + "step": 125346 + }, + { + "epoch": 1.51, + "grad_norm": 4.284952531150079, + "learning_rate": 3.017811047195387e-06, + "loss": 0.8489, + "step": 125349 + }, + { + "epoch": 1.51, + "grad_norm": 11.950347326034013, + "learning_rate": 3.0173928552058927e-06, + "loss": 1.2868, + "step": 125352 + }, + { + "epoch": 1.51, + "grad_norm": 38.860075736102644, + "learning_rate": 3.01697468704552e-06, + "loss": 1.1276, + "step": 125355 + }, + { + "epoch": 1.51, + "grad_norm": 3.2651151719402525, + "learning_rate": 3.0165565427156985e-06, + "loss": 1.0173, + "step": 125358 + }, + { + "epoch": 1.51, + "grad_norm": 2.3632587257659656, + "learning_rate": 3.0161384222178523e-06, + "loss": 1.0503, + "step": 125361 + }, + { + "epoch": 1.51, + "grad_norm": 13.68213840496667, + "learning_rate": 3.0157203255534095e-06, + "loss": 1.142, + "step": 125364 + }, + { + "epoch": 1.51, + "grad_norm": 11.014960917495353, + "learning_rate": 3.015302252723802e-06, + "loss": 1.152, + "step": 125367 + }, + { + "epoch": 1.51, + "grad_norm": 12.642106022739116, + "learning_rate": 3.014884203730447e-06, + "loss": 1.2929, + "step": 125370 + }, + { + "epoch": 1.51, + "grad_norm": 29.496750103178535, + "learning_rate": 3.0144661785747775e-06, + "loss": 1.2782, + "step": 125373 + }, + { + "epoch": 1.51, + "grad_norm": 13.235276116006109, + "learning_rate": 3.0140481772582176e-06, + "loss": 1.1668, + "step": 125376 + }, + { + "epoch": 1.51, + "grad_norm": 7.530483688701141, + "learning_rate": 3.013630199782199e-06, + "loss": 1.2296, + "step": 125379 + }, + { + "epoch": 1.51, + "grad_norm": 15.30533520456848, + "learning_rate": 3.013212246148144e-06, + "loss": 1.1942, + "step": 125382 + }, + { + "epoch": 1.51, + "grad_norm": 6.926074810064711, + "learning_rate": 3.0127943163574758e-06, + "loss": 1.1489, + "step": 125385 + }, + { + "epoch": 1.51, + "grad_norm": 15.766273646355572, + "learning_rate": 3.0123764104116236e-06, + "loss": 1.139, + "step": 125388 + }, + { + "epoch": 1.51, + "grad_norm": 8.503836231328446, + "learning_rate": 3.0119585283120135e-06, + "loss": 1.1956, + "step": 125391 + }, + { + "epoch": 1.51, + "grad_norm": 4.562801009008202, + "learning_rate": 3.0115406700600756e-06, + "loss": 1.0589, + "step": 125394 + }, + { + "epoch": 1.51, + "grad_norm": 3.9514368397096575, + "learning_rate": 3.0111228356572287e-06, + "loss": 1.0073, + "step": 125397 + }, + { + "epoch": 1.51, + "grad_norm": 12.334577381486188, + "learning_rate": 3.010705025104905e-06, + "loss": 0.8045, + "step": 125400 + }, + { + "epoch": 1.51, + "grad_norm": 6.163719861479659, + "learning_rate": 3.0102872384045246e-06, + "loss": 0.9275, + "step": 125403 + }, + { + "epoch": 1.51, + "grad_norm": 9.266746263318906, + "learning_rate": 3.0098694755575197e-06, + "loss": 1.1922, + "step": 125406 + }, + { + "epoch": 1.51, + "grad_norm": 9.31740159913719, + "learning_rate": 3.0094517365653085e-06, + "loss": 1.155, + "step": 125409 + }, + { + "epoch": 1.51, + "grad_norm": 4.73609829212477, + "learning_rate": 3.009034021429321e-06, + "loss": 1.1076, + "step": 125412 + }, + { + "epoch": 1.51, + "grad_norm": 9.087151813741167, + "learning_rate": 3.008616330150983e-06, + "loss": 1.1285, + "step": 125415 + }, + { + "epoch": 1.51, + "grad_norm": 7.1422785970256735, + "learning_rate": 3.008198662731718e-06, + "loss": 0.8685, + "step": 125418 + }, + { + "epoch": 1.51, + "grad_norm": 2.941762820982199, + "learning_rate": 3.0077810191729575e-06, + "loss": 1.4932, + "step": 125421 + }, + { + "epoch": 1.51, + "grad_norm": 15.203366636750294, + "learning_rate": 3.00736339947612e-06, + "loss": 1.4064, + "step": 125424 + }, + { + "epoch": 1.51, + "grad_norm": 7.690268919139136, + "learning_rate": 3.0069458036426293e-06, + "loss": 1.0709, + "step": 125427 + }, + { + "epoch": 1.51, + "grad_norm": 16.37471957796239, + "learning_rate": 3.006528231673914e-06, + "loss": 0.7981, + "step": 125430 + }, + { + "epoch": 1.51, + "grad_norm": 12.512898578244892, + "learning_rate": 3.0061106835714027e-06, + "loss": 1.5494, + "step": 125433 + }, + { + "epoch": 1.51, + "grad_norm": 15.61379309666087, + "learning_rate": 3.0056931593365124e-06, + "loss": 1.4096, + "step": 125436 + }, + { + "epoch": 1.51, + "grad_norm": 23.02258405581244, + "learning_rate": 3.0052756589706733e-06, + "loss": 1.1912, + "step": 125439 + }, + { + "epoch": 1.51, + "grad_norm": 9.895695567923507, + "learning_rate": 3.004858182475311e-06, + "loss": 1.4733, + "step": 125442 + }, + { + "epoch": 1.51, + "grad_norm": 7.105493535453724, + "learning_rate": 3.004440729851845e-06, + "loss": 1.3482, + "step": 125445 + }, + { + "epoch": 1.51, + "grad_norm": 4.843229647652121, + "learning_rate": 3.0040233011017063e-06, + "loss": 1.0407, + "step": 125448 + }, + { + "epoch": 1.51, + "grad_norm": 7.119727521956153, + "learning_rate": 3.0036058962263125e-06, + "loss": 1.1776, + "step": 125451 + }, + { + "epoch": 1.51, + "grad_norm": 4.497729012540526, + "learning_rate": 3.0031885152270923e-06, + "loss": 1.3201, + "step": 125454 + }, + { + "epoch": 1.51, + "grad_norm": 5.3750491256710395, + "learning_rate": 3.002771158105469e-06, + "loss": 0.9713, + "step": 125457 + }, + { + "epoch": 1.51, + "grad_norm": 8.615187126044331, + "learning_rate": 3.0023538248628704e-06, + "loss": 1.0196, + "step": 125460 + }, + { + "epoch": 1.51, + "grad_norm": 6.661940966079256, + "learning_rate": 3.001936515500715e-06, + "loss": 1.1877, + "step": 125463 + }, + { + "epoch": 1.51, + "grad_norm": 15.895673485820838, + "learning_rate": 3.001519230020433e-06, + "loss": 1.2155, + "step": 125466 + }, + { + "epoch": 1.51, + "grad_norm": 13.495994653049785, + "learning_rate": 3.001101968423441e-06, + "loss": 1.2524, + "step": 125469 + }, + { + "epoch": 1.51, + "grad_norm": 6.386034010350204, + "learning_rate": 3.0006847307111674e-06, + "loss": 1.5599, + "step": 125472 + }, + { + "epoch": 1.51, + "grad_norm": 2.632734767866997, + "learning_rate": 3.000267516885039e-06, + "loss": 1.4807, + "step": 125475 + }, + { + "epoch": 1.51, + "grad_norm": 5.741821854937182, + "learning_rate": 2.999850326946473e-06, + "loss": 1.1947, + "step": 125478 + }, + { + "epoch": 1.51, + "grad_norm": 4.405081551301902, + "learning_rate": 2.999433160896896e-06, + "loss": 1.3474, + "step": 125481 + }, + { + "epoch": 1.51, + "grad_norm": 11.60251586899215, + "learning_rate": 2.999016018737736e-06, + "loss": 1.0072, + "step": 125484 + }, + { + "epoch": 1.51, + "grad_norm": 9.650946059863363, + "learning_rate": 2.9985989004704084e-06, + "loss": 0.8231, + "step": 125487 + }, + { + "epoch": 1.51, + "grad_norm": 16.168197033660974, + "learning_rate": 2.998181806096345e-06, + "loss": 1.0785, + "step": 125490 + }, + { + "epoch": 1.51, + "grad_norm": 8.664888731490324, + "learning_rate": 2.9977647356169614e-06, + "loss": 1.5272, + "step": 125493 + }, + { + "epoch": 1.51, + "grad_norm": 3.9974884943045246, + "learning_rate": 2.997347689033685e-06, + "loss": 1.053, + "step": 125496 + }, + { + "epoch": 1.51, + "grad_norm": 4.017286545058299, + "learning_rate": 2.996930666347938e-06, + "loss": 1.2451, + "step": 125499 + }, + { + "epoch": 1.51, + "grad_norm": 3.353405040478315, + "learning_rate": 2.996513667561147e-06, + "loss": 1.2665, + "step": 125502 + }, + { + "epoch": 1.51, + "grad_norm": 6.082526389600715, + "learning_rate": 2.99609669267473e-06, + "loss": 1.0523, + "step": 125505 + }, + { + "epoch": 1.51, + "grad_norm": 13.855430817672856, + "learning_rate": 2.995679741690115e-06, + "loss": 1.5626, + "step": 125508 + }, + { + "epoch": 1.51, + "grad_norm": 13.580467379389804, + "learning_rate": 2.9952628146087193e-06, + "loss": 1.1415, + "step": 125511 + }, + { + "epoch": 1.51, + "grad_norm": 3.2413601979361135, + "learning_rate": 2.994845911431972e-06, + "loss": 1.3041, + "step": 125514 + }, + { + "epoch": 1.51, + "grad_norm": 7.500768028330238, + "learning_rate": 2.9944290321612877e-06, + "loss": 0.8736, + "step": 125517 + }, + { + "epoch": 1.51, + "grad_norm": 7.960165383413503, + "learning_rate": 2.9940121767980945e-06, + "loss": 1.2034, + "step": 125520 + }, + { + "epoch": 1.51, + "grad_norm": 14.39377272293208, + "learning_rate": 2.9935953453438136e-06, + "loss": 1.1236, + "step": 125523 + }, + { + "epoch": 1.51, + "grad_norm": 6.382312599999661, + "learning_rate": 2.9931785377998722e-06, + "loss": 1.2011, + "step": 125526 + }, + { + "epoch": 1.51, + "grad_norm": 8.150191482701215, + "learning_rate": 2.9927617541676858e-06, + "loss": 0.9826, + "step": 125529 + }, + { + "epoch": 1.51, + "grad_norm": 6.573157481361523, + "learning_rate": 2.9923449944486815e-06, + "loss": 1.0999, + "step": 125532 + }, + { + "epoch": 1.51, + "grad_norm": 19.99903429550888, + "learning_rate": 2.9919282586442765e-06, + "loss": 1.04, + "step": 125535 + }, + { + "epoch": 1.51, + "grad_norm": 10.074409508442526, + "learning_rate": 2.9915115467558963e-06, + "loss": 1.2186, + "step": 125538 + }, + { + "epoch": 1.51, + "grad_norm": 3.8231574520829015, + "learning_rate": 2.9910948587849664e-06, + "loss": 0.8843, + "step": 125541 + }, + { + "epoch": 1.51, + "grad_norm": 20.933075933929562, + "learning_rate": 2.9906781947329024e-06, + "loss": 0.7519, + "step": 125544 + }, + { + "epoch": 1.51, + "grad_norm": 11.055360262561074, + "learning_rate": 2.9902615546011283e-06, + "loss": 1.4304, + "step": 125547 + }, + { + "epoch": 1.51, + "grad_norm": 7.3002349050945545, + "learning_rate": 2.9898449383910687e-06, + "loss": 1.1571, + "step": 125550 + }, + { + "epoch": 1.51, + "grad_norm": 8.774970631916817, + "learning_rate": 2.9894283461041417e-06, + "loss": 0.8626, + "step": 125553 + }, + { + "epoch": 1.51, + "grad_norm": 7.495534383617266, + "learning_rate": 2.9890117777417725e-06, + "loss": 0.968, + "step": 125556 + }, + { + "epoch": 1.51, + "grad_norm": 7.880408233254372, + "learning_rate": 2.988595233305379e-06, + "loss": 1.0803, + "step": 125559 + }, + { + "epoch": 1.51, + "grad_norm": 6.910171521689879, + "learning_rate": 2.9881787127963837e-06, + "loss": 1.2658, + "step": 125562 + }, + { + "epoch": 1.51, + "grad_norm": 11.138000140673947, + "learning_rate": 2.987762216216209e-06, + "loss": 1.2481, + "step": 125565 + }, + { + "epoch": 1.51, + "grad_norm": 16.31399607583457, + "learning_rate": 2.987345743566279e-06, + "loss": 1.1049, + "step": 125568 + }, + { + "epoch": 1.51, + "grad_norm": 15.064125826460042, + "learning_rate": 2.986929294848009e-06, + "loss": 0.9349, + "step": 125571 + }, + { + "epoch": 1.51, + "grad_norm": 16.51865598365528, + "learning_rate": 2.9865128700628253e-06, + "loss": 0.8809, + "step": 125574 + }, + { + "epoch": 1.51, + "grad_norm": 26.0991429636785, + "learning_rate": 2.9860964692121452e-06, + "loss": 1.1587, + "step": 125577 + }, + { + "epoch": 1.51, + "grad_norm": 8.393553639361187, + "learning_rate": 2.985680092297392e-06, + "loss": 1.2979, + "step": 125580 + }, + { + "epoch": 1.51, + "grad_norm": 8.775498426654224, + "learning_rate": 2.9852637393199878e-06, + "loss": 0.9967, + "step": 125583 + }, + { + "epoch": 1.51, + "grad_norm": 19.333855928119657, + "learning_rate": 2.9848474102813484e-06, + "loss": 0.8895, + "step": 125586 + }, + { + "epoch": 1.51, + "grad_norm": 10.236228040172705, + "learning_rate": 2.984431105182899e-06, + "loss": 1.2297, + "step": 125589 + }, + { + "epoch": 1.51, + "grad_norm": 5.056006760708269, + "learning_rate": 2.9840148240260625e-06, + "loss": 0.7901, + "step": 125592 + }, + { + "epoch": 1.51, + "grad_norm": 4.563085710139245, + "learning_rate": 2.9835985668122557e-06, + "loss": 1.1369, + "step": 125595 + }, + { + "epoch": 1.51, + "grad_norm": 10.05834757954576, + "learning_rate": 2.9831823335428967e-06, + "loss": 0.9392, + "step": 125598 + }, + { + "epoch": 1.51, + "grad_norm": 9.154833639588885, + "learning_rate": 2.982766124219408e-06, + "loss": 1.1416, + "step": 125601 + }, + { + "epoch": 1.51, + "grad_norm": 5.487309347536085, + "learning_rate": 2.982349938843212e-06, + "loss": 0.9164, + "step": 125604 + }, + { + "epoch": 1.51, + "grad_norm": 13.157986456879714, + "learning_rate": 2.9819337774157274e-06, + "loss": 1.084, + "step": 125607 + }, + { + "epoch": 1.51, + "grad_norm": 6.007809776926488, + "learning_rate": 2.9815176399383794e-06, + "loss": 1.2276, + "step": 125610 + }, + { + "epoch": 1.51, + "grad_norm": 12.377387828531564, + "learning_rate": 2.9811015264125787e-06, + "loss": 1.0459, + "step": 125613 + }, + { + "epoch": 1.51, + "grad_norm": 5.545102266264025, + "learning_rate": 2.980685436839754e-06, + "loss": 1.1931, + "step": 125616 + }, + { + "epoch": 1.51, + "grad_norm": 25.36978953656537, + "learning_rate": 2.980269371221319e-06, + "loss": 1.0071, + "step": 125619 + }, + { + "epoch": 1.51, + "grad_norm": 6.673950498030425, + "learning_rate": 2.9798533295586996e-06, + "loss": 1.6073, + "step": 125622 + }, + { + "epoch": 1.51, + "grad_norm": 4.328873597720407, + "learning_rate": 2.9794373118533083e-06, + "loss": 1.002, + "step": 125625 + }, + { + "epoch": 1.51, + "grad_norm": 9.91646026033763, + "learning_rate": 2.9790213181065684e-06, + "loss": 1.4978, + "step": 125628 + }, + { + "epoch": 1.51, + "grad_norm": 4.220704223799942, + "learning_rate": 2.9786053483198994e-06, + "loss": 1.2416, + "step": 125631 + }, + { + "epoch": 1.51, + "grad_norm": 6.87952395210706, + "learning_rate": 2.9781894024947257e-06, + "loss": 0.979, + "step": 125634 + }, + { + "epoch": 1.51, + "grad_norm": 9.259240213196883, + "learning_rate": 2.977773480632461e-06, + "loss": 1.1466, + "step": 125637 + }, + { + "epoch": 1.51, + "grad_norm": 5.925252030158929, + "learning_rate": 2.9773575827345234e-06, + "loss": 1.0606, + "step": 125640 + }, + { + "epoch": 1.51, + "grad_norm": 3.799534752162907, + "learning_rate": 2.9769417088023354e-06, + "loss": 1.1126, + "step": 125643 + }, + { + "epoch": 1.51, + "grad_norm": 2.8197797349957234, + "learning_rate": 2.976525858837316e-06, + "loss": 1.1303, + "step": 125646 + }, + { + "epoch": 1.51, + "grad_norm": 9.354683705425348, + "learning_rate": 2.9761100328408853e-06, + "loss": 1.1796, + "step": 125649 + }, + { + "epoch": 1.51, + "grad_norm": 11.313089045405224, + "learning_rate": 2.97569423081446e-06, + "loss": 1.4092, + "step": 125652 + }, + { + "epoch": 1.51, + "grad_norm": 6.166067050014828, + "learning_rate": 2.975278452759459e-06, + "loss": 1.3051, + "step": 125655 + }, + { + "epoch": 1.51, + "grad_norm": 14.123394232900973, + "learning_rate": 2.9748626986773054e-06, + "loss": 0.9566, + "step": 125658 + }, + { + "epoch": 1.51, + "grad_norm": 9.835113150945762, + "learning_rate": 2.9744469685694124e-06, + "loss": 1.3357, + "step": 125661 + }, + { + "epoch": 1.51, + "grad_norm": 10.961924321270304, + "learning_rate": 2.9740312624372036e-06, + "loss": 1.3327, + "step": 125664 + }, + { + "epoch": 1.51, + "grad_norm": 4.543460893132849, + "learning_rate": 2.9736155802820933e-06, + "loss": 1.1056, + "step": 125667 + }, + { + "epoch": 1.51, + "grad_norm": 7.977182020061638, + "learning_rate": 2.9731999221055006e-06, + "loss": 1.272, + "step": 125670 + }, + { + "epoch": 1.51, + "grad_norm": 4.6168180284695985, + "learning_rate": 2.9727842879088477e-06, + "loss": 1.3309, + "step": 125673 + }, + { + "epoch": 1.51, + "grad_norm": 17.0178561105452, + "learning_rate": 2.972368677693552e-06, + "loss": 1.1246, + "step": 125676 + }, + { + "epoch": 1.51, + "grad_norm": 10.869262906042866, + "learning_rate": 2.971953091461032e-06, + "loss": 0.952, + "step": 125679 + }, + { + "epoch": 1.51, + "grad_norm": 6.860161608201458, + "learning_rate": 2.9715375292127e-06, + "loss": 1.2419, + "step": 125682 + }, + { + "epoch": 1.51, + "grad_norm": 5.525767059092303, + "learning_rate": 2.97112199094998e-06, + "loss": 1.0979, + "step": 125685 + }, + { + "epoch": 1.51, + "grad_norm": 15.699043788514466, + "learning_rate": 2.9707064766742886e-06, + "loss": 1.0731, + "step": 125688 + }, + { + "epoch": 1.51, + "grad_norm": 13.840133465408, + "learning_rate": 2.9702909863870478e-06, + "loss": 1.3056, + "step": 125691 + }, + { + "epoch": 1.51, + "grad_norm": 4.959639882862308, + "learning_rate": 2.969875520089668e-06, + "loss": 1.3211, + "step": 125694 + }, + { + "epoch": 1.51, + "grad_norm": 13.338152304164547, + "learning_rate": 2.9694600777835703e-06, + "loss": 1.2851, + "step": 125697 + }, + { + "epoch": 1.51, + "grad_norm": 6.076655039594941, + "learning_rate": 2.969044659470176e-06, + "loss": 0.9911, + "step": 125700 + }, + { + "epoch": 1.51, + "grad_norm": 13.046607738831833, + "learning_rate": 2.9686292651509006e-06, + "loss": 1.0651, + "step": 125703 + }, + { + "epoch": 1.51, + "grad_norm": 10.68989423173765, + "learning_rate": 2.9682138948271578e-06, + "loss": 1.3194, + "step": 125706 + }, + { + "epoch": 1.51, + "grad_norm": 13.76586503514831, + "learning_rate": 2.9677985485003678e-06, + "loss": 1.2137, + "step": 125709 + }, + { + "epoch": 1.51, + "grad_norm": 4.997929504462886, + "learning_rate": 2.967383226171948e-06, + "loss": 1.1111, + "step": 125712 + }, + { + "epoch": 1.51, + "grad_norm": 26.119608542811356, + "learning_rate": 2.966967927843316e-06, + "loss": 0.723, + "step": 125715 + }, + { + "epoch": 1.51, + "grad_norm": 32.44161963345578, + "learning_rate": 2.966552653515893e-06, + "loss": 1.1907, + "step": 125718 + }, + { + "epoch": 1.51, + "grad_norm": 6.61629217253652, + "learning_rate": 2.9661374031910916e-06, + "loss": 1.4504, + "step": 125721 + }, + { + "epoch": 1.51, + "grad_norm": 12.002075656224205, + "learning_rate": 2.965722176870327e-06, + "loss": 0.7714, + "step": 125724 + }, + { + "epoch": 1.51, + "grad_norm": 15.830833616748, + "learning_rate": 2.965306974555019e-06, + "loss": 1.3649, + "step": 125727 + }, + { + "epoch": 1.51, + "grad_norm": 5.173875085545737, + "learning_rate": 2.9648917962465883e-06, + "loss": 1.2801, + "step": 125730 + }, + { + "epoch": 1.51, + "grad_norm": 6.027456639145304, + "learning_rate": 2.9644766419464433e-06, + "loss": 0.9824, + "step": 125733 + }, + { + "epoch": 1.51, + "grad_norm": 5.623669455977995, + "learning_rate": 2.964061511656006e-06, + "loss": 1.2792, + "step": 125736 + }, + { + "epoch": 1.51, + "grad_norm": 18.33142832239565, + "learning_rate": 2.963646405376692e-06, + "loss": 0.9121, + "step": 125739 + }, + { + "epoch": 1.51, + "grad_norm": 4.463151778911554, + "learning_rate": 2.963231323109922e-06, + "loss": 1.2563, + "step": 125742 + }, + { + "epoch": 1.51, + "grad_norm": 16.55852761014577, + "learning_rate": 2.962816264857109e-06, + "loss": 0.9843, + "step": 125745 + }, + { + "epoch": 1.51, + "grad_norm": 4.618141595040921, + "learning_rate": 2.962401230619666e-06, + "loss": 1.1361, + "step": 125748 + }, + { + "epoch": 1.51, + "grad_norm": 5.38175783487073, + "learning_rate": 2.9619862203990124e-06, + "loss": 1.145, + "step": 125751 + }, + { + "epoch": 1.51, + "grad_norm": 4.821728651365688, + "learning_rate": 2.961571234196564e-06, + "loss": 1.1234, + "step": 125754 + }, + { + "epoch": 1.51, + "grad_norm": 10.438811280340746, + "learning_rate": 2.961156272013742e-06, + "loss": 1.2159, + "step": 125757 + }, + { + "epoch": 1.51, + "grad_norm": 12.55195122603867, + "learning_rate": 2.9607413338519553e-06, + "loss": 1.3078, + "step": 125760 + }, + { + "epoch": 1.51, + "grad_norm": 15.88131742724269, + "learning_rate": 2.960326419712626e-06, + "loss": 1.3804, + "step": 125763 + }, + { + "epoch": 1.51, + "grad_norm": 13.016956712707158, + "learning_rate": 2.9599115295971625e-06, + "loss": 1.5845, + "step": 125766 + }, + { + "epoch": 1.51, + "grad_norm": 6.561389804005975, + "learning_rate": 2.9594966635069866e-06, + "loss": 1.2606, + "step": 125769 + }, + { + "epoch": 1.51, + "grad_norm": 7.317948719382311, + "learning_rate": 2.9590818214435147e-06, + "loss": 1.0305, + "step": 125772 + }, + { + "epoch": 1.51, + "grad_norm": 15.711581277701576, + "learning_rate": 2.9586670034081576e-06, + "loss": 1.2251, + "step": 125775 + }, + { + "epoch": 1.51, + "grad_norm": 3.9447231157778826, + "learning_rate": 2.9582522094023346e-06, + "loss": 1.1886, + "step": 125778 + }, + { + "epoch": 1.51, + "grad_norm": 11.227651029864997, + "learning_rate": 2.957837439427459e-06, + "loss": 1.207, + "step": 125781 + }, + { + "epoch": 1.51, + "grad_norm": 8.218870365636697, + "learning_rate": 2.9574226934849517e-06, + "loss": 0.8947, + "step": 125784 + }, + { + "epoch": 1.51, + "grad_norm": 8.789726433591785, + "learning_rate": 2.9570079715762234e-06, + "loss": 0.8689, + "step": 125787 + }, + { + "epoch": 1.51, + "grad_norm": 4.144792402944928, + "learning_rate": 2.956593273702688e-06, + "loss": 1.32, + "step": 125790 + }, + { + "epoch": 1.51, + "grad_norm": 7.207184275942509, + "learning_rate": 2.9561785998657623e-06, + "loss": 1.2586, + "step": 125793 + }, + { + "epoch": 1.51, + "grad_norm": 34.66658736699968, + "learning_rate": 2.955763950066861e-06, + "loss": 1.2267, + "step": 125796 + }, + { + "epoch": 1.51, + "grad_norm": 7.26460803271544, + "learning_rate": 2.955349324307404e-06, + "loss": 1.2584, + "step": 125799 + }, + { + "epoch": 1.51, + "grad_norm": 4.207068756906748, + "learning_rate": 2.9549347225887983e-06, + "loss": 1.1352, + "step": 125802 + }, + { + "epoch": 1.51, + "grad_norm": 50.52659099960947, + "learning_rate": 2.954520144912467e-06, + "loss": 1.4035, + "step": 125805 + }, + { + "epoch": 1.51, + "grad_norm": 8.583830821659909, + "learning_rate": 2.9541055912798168e-06, + "loss": 1.1768, + "step": 125808 + }, + { + "epoch": 1.51, + "grad_norm": 14.346454884407605, + "learning_rate": 2.9536910616922697e-06, + "loss": 1.2112, + "step": 125811 + }, + { + "epoch": 1.51, + "grad_norm": 12.82813581237703, + "learning_rate": 2.9532765561512333e-06, + "loss": 1.3808, + "step": 125814 + }, + { + "epoch": 1.51, + "grad_norm": 14.505166637259782, + "learning_rate": 2.9528620746581272e-06, + "loss": 1.2348, + "step": 125817 + }, + { + "epoch": 1.51, + "grad_norm": 11.369208832841911, + "learning_rate": 2.952447617214362e-06, + "loss": 1.0802, + "step": 125820 + }, + { + "epoch": 1.51, + "grad_norm": 5.438538761901347, + "learning_rate": 2.952033183821357e-06, + "loss": 1.412, + "step": 125823 + }, + { + "epoch": 1.51, + "grad_norm": 7.000609673133384, + "learning_rate": 2.951618774480526e-06, + "loss": 1.1769, + "step": 125826 + }, + { + "epoch": 1.51, + "grad_norm": 5.899774851095822, + "learning_rate": 2.951204389193282e-06, + "loss": 1.1819, + "step": 125829 + }, + { + "epoch": 1.51, + "grad_norm": 11.941649438038718, + "learning_rate": 2.950790027961035e-06, + "loss": 1.0165, + "step": 125832 + }, + { + "epoch": 1.51, + "grad_norm": 9.212058247833511, + "learning_rate": 2.950375690785202e-06, + "loss": 1.2551, + "step": 125835 + }, + { + "epoch": 1.51, + "grad_norm": 9.87590117601179, + "learning_rate": 2.9499613776672008e-06, + "loss": 1.0055, + "step": 125838 + }, + { + "epoch": 1.51, + "grad_norm": 13.878548151037322, + "learning_rate": 2.94954708860844e-06, + "loss": 1.2392, + "step": 125841 + }, + { + "epoch": 1.51, + "grad_norm": 12.40166422831352, + "learning_rate": 2.9491328236103347e-06, + "loss": 1.434, + "step": 125844 + }, + { + "epoch": 1.51, + "grad_norm": 4.897911373889541, + "learning_rate": 2.9487185826742993e-06, + "loss": 1.1657, + "step": 125847 + }, + { + "epoch": 1.51, + "grad_norm": 14.195452671392806, + "learning_rate": 2.9483043658017496e-06, + "loss": 1.2311, + "step": 125850 + }, + { + "epoch": 1.51, + "grad_norm": 9.728203984185031, + "learning_rate": 2.947890172994099e-06, + "loss": 1.1998, + "step": 125853 + }, + { + "epoch": 1.51, + "grad_norm": 19.156532816927506, + "learning_rate": 2.947476004252755e-06, + "loss": 1.0393, + "step": 125856 + }, + { + "epoch": 1.51, + "grad_norm": 9.649520806669226, + "learning_rate": 2.9470618595791346e-06, + "loss": 1.1356, + "step": 125859 + }, + { + "epoch": 1.51, + "grad_norm": 5.024011813756442, + "learning_rate": 2.9466477389746516e-06, + "loss": 1.0421, + "step": 125862 + }, + { + "epoch": 1.51, + "grad_norm": 3.0221859424261845, + "learning_rate": 2.9462336424407232e-06, + "loss": 1.246, + "step": 125865 + }, + { + "epoch": 1.51, + "grad_norm": 14.82730195358082, + "learning_rate": 2.945819569978755e-06, + "loss": 1.1741, + "step": 125868 + }, + { + "epoch": 1.51, + "grad_norm": 28.75812326004046, + "learning_rate": 2.945405521590168e-06, + "loss": 0.9557, + "step": 125871 + }, + { + "epoch": 1.51, + "grad_norm": 14.508653371266632, + "learning_rate": 2.944991497276366e-06, + "loss": 0.9789, + "step": 125874 + }, + { + "epoch": 1.51, + "grad_norm": 6.846829599897993, + "learning_rate": 2.9445774970387676e-06, + "loss": 1.2846, + "step": 125877 + }, + { + "epoch": 1.51, + "grad_norm": 7.751213436509028, + "learning_rate": 2.944163520878789e-06, + "loss": 1.2778, + "step": 125880 + }, + { + "epoch": 1.51, + "grad_norm": 3.525802343875061, + "learning_rate": 2.9437495687978344e-06, + "loss": 0.9021, + "step": 125883 + }, + { + "epoch": 1.51, + "grad_norm": 49.58552870598461, + "learning_rate": 2.9433356407973214e-06, + "loss": 1.1955, + "step": 125886 + }, + { + "epoch": 1.51, + "grad_norm": 10.114317241610662, + "learning_rate": 2.942921736878661e-06, + "loss": 0.8702, + "step": 125889 + }, + { + "epoch": 1.51, + "grad_norm": 38.53330792095202, + "learning_rate": 2.942507857043271e-06, + "loss": 1.0028, + "step": 125892 + }, + { + "epoch": 1.51, + "grad_norm": 4.9538750465354005, + "learning_rate": 2.942094001292559e-06, + "loss": 1.3751, + "step": 125895 + }, + { + "epoch": 1.51, + "grad_norm": 4.802767504751304, + "learning_rate": 2.9416801696279353e-06, + "loss": 1.1822, + "step": 125898 + }, + { + "epoch": 1.51, + "grad_norm": 11.013499168658122, + "learning_rate": 2.9412663620508155e-06, + "loss": 0.8861, + "step": 125901 + }, + { + "epoch": 1.51, + "grad_norm": 2.3273913555611303, + "learning_rate": 2.9408525785626097e-06, + "loss": 0.9592, + "step": 125904 + }, + { + "epoch": 1.51, + "grad_norm": 8.320524476555729, + "learning_rate": 2.940438819164735e-06, + "loss": 0.6151, + "step": 125907 + }, + { + "epoch": 1.51, + "grad_norm": 8.369328363981666, + "learning_rate": 2.9400250838585965e-06, + "loss": 1.2622, + "step": 125910 + }, + { + "epoch": 1.51, + "grad_norm": 2.9222626684535875, + "learning_rate": 2.9396113726456123e-06, + "loss": 1.1089, + "step": 125913 + }, + { + "epoch": 1.51, + "grad_norm": 6.750393546754095, + "learning_rate": 2.9391976855271888e-06, + "loss": 1.193, + "step": 125916 + }, + { + "epoch": 1.51, + "grad_norm": 42.80558454123384, + "learning_rate": 2.9387840225047427e-06, + "loss": 1.1154, + "step": 125919 + }, + { + "epoch": 1.51, + "grad_norm": 9.080308031872338, + "learning_rate": 2.9383703835796805e-06, + "loss": 1.2061, + "step": 125922 + }, + { + "epoch": 1.51, + "grad_norm": 4.912414878338888, + "learning_rate": 2.937956768753417e-06, + "loss": 0.9535, + "step": 125925 + }, + { + "epoch": 1.51, + "grad_norm": 6.59987131966957, + "learning_rate": 2.9375431780273634e-06, + "loss": 0.9803, + "step": 125928 + }, + { + "epoch": 1.51, + "grad_norm": 5.969412192807294, + "learning_rate": 2.9371296114029304e-06, + "loss": 1.0485, + "step": 125931 + }, + { + "epoch": 1.51, + "grad_norm": 92.14217457782598, + "learning_rate": 2.936716068881533e-06, + "loss": 1.2101, + "step": 125934 + }, + { + "epoch": 1.51, + "grad_norm": 6.313059087419438, + "learning_rate": 2.9363025504645805e-06, + "loss": 1.0, + "step": 125937 + }, + { + "epoch": 1.51, + "grad_norm": 8.250040587593904, + "learning_rate": 2.9358890561534792e-06, + "loss": 0.9974, + "step": 125940 + }, + { + "epoch": 1.51, + "grad_norm": 7.646032546343133, + "learning_rate": 2.935475585949644e-06, + "loss": 1.1439, + "step": 125943 + }, + { + "epoch": 1.51, + "grad_norm": 8.400164554044768, + "learning_rate": 2.93506213985449e-06, + "loss": 1.1051, + "step": 125946 + }, + { + "epoch": 1.51, + "grad_norm": 12.401443329113148, + "learning_rate": 2.9346487178694194e-06, + "loss": 1.2679, + "step": 125949 + }, + { + "epoch": 1.51, + "grad_norm": 9.44605621021746, + "learning_rate": 2.9342353199958483e-06, + "loss": 1.3839, + "step": 125952 + }, + { + "epoch": 1.51, + "grad_norm": 8.650878514777476, + "learning_rate": 2.9338219462351914e-06, + "loss": 1.2913, + "step": 125955 + }, + { + "epoch": 1.51, + "grad_norm": 3.4608230663784396, + "learning_rate": 2.9334085965888505e-06, + "loss": 1.1797, + "step": 125958 + }, + { + "epoch": 1.51, + "grad_norm": 9.644410208593849, + "learning_rate": 2.932995271058244e-06, + "loss": 1.1656, + "step": 125961 + }, + { + "epoch": 1.51, + "grad_norm": 16.868757734768405, + "learning_rate": 2.9325819696447765e-06, + "loss": 1.0789, + "step": 125964 + }, + { + "epoch": 1.51, + "grad_norm": 9.622348665335926, + "learning_rate": 2.9321686923498615e-06, + "loss": 1.0818, + "step": 125967 + }, + { + "epoch": 1.51, + "grad_norm": 8.955060914108403, + "learning_rate": 2.931755439174908e-06, + "loss": 0.8854, + "step": 125970 + }, + { + "epoch": 1.51, + "grad_norm": 13.211229403694755, + "learning_rate": 2.9313422101213305e-06, + "loss": 1.2813, + "step": 125973 + }, + { + "epoch": 1.51, + "grad_norm": 3.857358644503015, + "learning_rate": 2.9309290051905327e-06, + "loss": 1.4914, + "step": 125976 + }, + { + "epoch": 1.51, + "grad_norm": 9.47772475478786, + "learning_rate": 2.9305158243839316e-06, + "loss": 1.1999, + "step": 125979 + }, + { + "epoch": 1.51, + "grad_norm": 6.660492804581824, + "learning_rate": 2.93010266770293e-06, + "loss": 1.1093, + "step": 125982 + }, + { + "epoch": 1.51, + "grad_norm": 4.730389438099379, + "learning_rate": 2.9296895351489418e-06, + "loss": 0.9905, + "step": 125985 + }, + { + "epoch": 1.51, + "grad_norm": 5.519261722583012, + "learning_rate": 2.9292764267233787e-06, + "loss": 1.0779, + "step": 125988 + }, + { + "epoch": 1.52, + "grad_norm": 13.804268437526297, + "learning_rate": 2.928863342427646e-06, + "loss": 1.3061, + "step": 125991 + }, + { + "epoch": 1.52, + "grad_norm": 6.796724231132328, + "learning_rate": 2.9284502822631556e-06, + "loss": 1.0093, + "step": 125994 + }, + { + "epoch": 1.52, + "grad_norm": 2.6229024168083366, + "learning_rate": 2.92803724623132e-06, + "loss": 1.1709, + "step": 125997 + }, + { + "epoch": 1.52, + "grad_norm": 11.723671348952323, + "learning_rate": 2.927624234333547e-06, + "loss": 0.7875, + "step": 126000 + }, + { + "epoch": 1.52, + "grad_norm": 8.387277946654244, + "learning_rate": 2.927211246571241e-06, + "loss": 0.9388, + "step": 126003 + }, + { + "epoch": 1.52, + "grad_norm": 10.854678316417111, + "learning_rate": 2.926798282945815e-06, + "loss": 1.0271, + "step": 126006 + }, + { + "epoch": 1.52, + "grad_norm": 15.229844855252713, + "learning_rate": 2.9263853434586797e-06, + "loss": 1.2876, + "step": 126009 + }, + { + "epoch": 1.52, + "grad_norm": 3.5352310027407854, + "learning_rate": 2.925972428111242e-06, + "loss": 1.4717, + "step": 126012 + }, + { + "epoch": 1.52, + "grad_norm": 7.044666329255789, + "learning_rate": 2.9255595369049172e-06, + "loss": 1.3916, + "step": 126015 + }, + { + "epoch": 1.52, + "grad_norm": 5.578953139219575, + "learning_rate": 2.9251466698411058e-06, + "loss": 1.1925, + "step": 126018 + }, + { + "epoch": 1.52, + "grad_norm": 13.25930386441255, + "learning_rate": 2.9247338269212235e-06, + "loss": 1.0098, + "step": 126021 + }, + { + "epoch": 1.52, + "grad_norm": 25.679158458384844, + "learning_rate": 2.9243210081466723e-06, + "loss": 1.3705, + "step": 126024 + }, + { + "epoch": 1.52, + "grad_norm": 18.87187750259813, + "learning_rate": 2.923908213518869e-06, + "loss": 1.2194, + "step": 126027 + }, + { + "epoch": 1.52, + "grad_norm": 5.824719304201729, + "learning_rate": 2.9234954430392135e-06, + "loss": 1.1436, + "step": 126030 + }, + { + "epoch": 1.52, + "grad_norm": 6.031807386086988, + "learning_rate": 2.9230826967091207e-06, + "loss": 1.401, + "step": 126033 + }, + { + "epoch": 1.52, + "grad_norm": 3.822455080305774, + "learning_rate": 2.922669974529997e-06, + "loss": 1.209, + "step": 126036 + }, + { + "epoch": 1.52, + "grad_norm": 2.824221845783925, + "learning_rate": 2.9222572765032553e-06, + "loss": 1.0915, + "step": 126039 + }, + { + "epoch": 1.52, + "grad_norm": 8.63250299499926, + "learning_rate": 2.921844602630299e-06, + "loss": 1.0303, + "step": 126042 + }, + { + "epoch": 1.52, + "grad_norm": 16.300726403232044, + "learning_rate": 2.9214319529125344e-06, + "loss": 1.224, + "step": 126045 + }, + { + "epoch": 1.52, + "grad_norm": 3.1627739006679514, + "learning_rate": 2.9210193273513742e-06, + "loss": 1.0211, + "step": 126048 + }, + { + "epoch": 1.52, + "grad_norm": 3.4709980787803874, + "learning_rate": 2.920606725948224e-06, + "loss": 1.3284, + "step": 126051 + }, + { + "epoch": 1.52, + "grad_norm": 6.287392501784514, + "learning_rate": 2.920194148704496e-06, + "loss": 1.3305, + "step": 126054 + }, + { + "epoch": 1.52, + "grad_norm": 12.006641882013364, + "learning_rate": 2.9197815956215924e-06, + "loss": 1.0906, + "step": 126057 + }, + { + "epoch": 1.52, + "grad_norm": 10.545798604831132, + "learning_rate": 2.9193690667009246e-06, + "loss": 1.3067, + "step": 126060 + }, + { + "epoch": 1.52, + "grad_norm": 8.091534745693565, + "learning_rate": 2.918956561943902e-06, + "loss": 1.5042, + "step": 126063 + }, + { + "epoch": 1.52, + "grad_norm": 5.874050805591337, + "learning_rate": 2.918544081351927e-06, + "loss": 1.1285, + "step": 126066 + }, + { + "epoch": 1.52, + "grad_norm": 8.573696228020651, + "learning_rate": 2.9181316249264148e-06, + "loss": 0.7341, + "step": 126069 + }, + { + "epoch": 1.52, + "grad_norm": 15.940406701185983, + "learning_rate": 2.917719192668764e-06, + "loss": 0.939, + "step": 126072 + }, + { + "epoch": 1.52, + "grad_norm": 18.776418755457836, + "learning_rate": 2.917306784580387e-06, + "loss": 1.4385, + "step": 126075 + }, + { + "epoch": 1.52, + "grad_norm": 7.869145806050258, + "learning_rate": 2.916894400662691e-06, + "loss": 1.1013, + "step": 126078 + }, + { + "epoch": 1.52, + "grad_norm": 2.390271607755067, + "learning_rate": 2.9164820409170856e-06, + "loss": 1.182, + "step": 126081 + }, + { + "epoch": 1.52, + "grad_norm": 10.953070173897446, + "learning_rate": 2.916069705344973e-06, + "loss": 0.8991, + "step": 126084 + }, + { + "epoch": 1.52, + "grad_norm": 6.479056830094117, + "learning_rate": 2.915657393947765e-06, + "loss": 1.0959, + "step": 126087 + }, + { + "epoch": 1.52, + "grad_norm": 12.581881278879552, + "learning_rate": 2.915245106726865e-06, + "loss": 1.1054, + "step": 126090 + }, + { + "epoch": 1.52, + "grad_norm": 4.7945288516848965, + "learning_rate": 2.9148328436836804e-06, + "loss": 1.0467, + "step": 126093 + }, + { + "epoch": 1.52, + "grad_norm": 2.201636041255169, + "learning_rate": 2.9144206048196223e-06, + "loss": 1.0564, + "step": 126096 + }, + { + "epoch": 1.52, + "grad_norm": 14.361712451091499, + "learning_rate": 2.9140083901360916e-06, + "loss": 1.1478, + "step": 126099 + }, + { + "epoch": 1.52, + "grad_norm": 3.184918849613594, + "learning_rate": 2.9135961996344986e-06, + "loss": 1.3054, + "step": 126102 + }, + { + "epoch": 1.52, + "grad_norm": 6.35388426548856, + "learning_rate": 2.913184033316251e-06, + "loss": 1.0811, + "step": 126105 + }, + { + "epoch": 1.52, + "grad_norm": 12.086958785381594, + "learning_rate": 2.9127718911827542e-06, + "loss": 1.1512, + "step": 126108 + }, + { + "epoch": 1.52, + "grad_norm": 3.4427059674354554, + "learning_rate": 2.9123597732354116e-06, + "loss": 1.0331, + "step": 126111 + }, + { + "epoch": 1.52, + "grad_norm": 6.668753171813995, + "learning_rate": 2.9119476794756317e-06, + "loss": 1.131, + "step": 126114 + }, + { + "epoch": 1.52, + "grad_norm": 3.459040837556493, + "learning_rate": 2.911535609904821e-06, + "loss": 1.3734, + "step": 126117 + }, + { + "epoch": 1.52, + "grad_norm": 9.936066443052889, + "learning_rate": 2.911123564524386e-06, + "loss": 1.1681, + "step": 126120 + }, + { + "epoch": 1.52, + "grad_norm": 7.116336683867961, + "learning_rate": 2.9107115433357367e-06, + "loss": 1.1733, + "step": 126123 + }, + { + "epoch": 1.52, + "grad_norm": 4.581774160679051, + "learning_rate": 2.9102995463402715e-06, + "loss": 1.1214, + "step": 126126 + }, + { + "epoch": 1.52, + "grad_norm": 7.506851200014458, + "learning_rate": 2.9098875735394037e-06, + "loss": 1.2792, + "step": 126129 + }, + { + "epoch": 1.52, + "grad_norm": 9.028304144151253, + "learning_rate": 2.9094756249345335e-06, + "loss": 0.9739, + "step": 126132 + }, + { + "epoch": 1.52, + "grad_norm": 3.0545847909536845, + "learning_rate": 2.9090637005270703e-06, + "loss": 1.137, + "step": 126135 + }, + { + "epoch": 1.52, + "grad_norm": 6.9978260852011, + "learning_rate": 2.9086518003184173e-06, + "loss": 1.293, + "step": 126138 + }, + { + "epoch": 1.52, + "grad_norm": 7.447345942295537, + "learning_rate": 2.90823992430998e-06, + "loss": 1.2216, + "step": 126141 + }, + { + "epoch": 1.52, + "grad_norm": 6.459681286867237, + "learning_rate": 2.907828072503166e-06, + "loss": 1.3898, + "step": 126144 + }, + { + "epoch": 1.52, + "grad_norm": 6.407450827051758, + "learning_rate": 2.9074162448993825e-06, + "loss": 1.1636, + "step": 126147 + }, + { + "epoch": 1.52, + "grad_norm": 11.138661763828669, + "learning_rate": 2.9070044415000342e-06, + "loss": 1.2256, + "step": 126150 + }, + { + "epoch": 1.52, + "grad_norm": 9.293745937903186, + "learning_rate": 2.906592662306521e-06, + "loss": 0.9084, + "step": 126153 + }, + { + "epoch": 1.52, + "grad_norm": 9.551175636716009, + "learning_rate": 2.906180907320252e-06, + "loss": 0.846, + "step": 126156 + }, + { + "epoch": 1.52, + "grad_norm": 22.37565019922495, + "learning_rate": 2.905769176542632e-06, + "loss": 1.5587, + "step": 126159 + }, + { + "epoch": 1.52, + "grad_norm": 9.123100725420846, + "learning_rate": 2.9053574699750695e-06, + "loss": 1.0993, + "step": 126162 + }, + { + "epoch": 1.52, + "grad_norm": 5.3463506719294305, + "learning_rate": 2.9049457876189635e-06, + "loss": 1.2711, + "step": 126165 + }, + { + "epoch": 1.52, + "grad_norm": 18.51371342503698, + "learning_rate": 2.9045341294757224e-06, + "loss": 1.1288, + "step": 126168 + }, + { + "epoch": 1.52, + "grad_norm": 8.724951716099547, + "learning_rate": 2.904122495546753e-06, + "loss": 0.9749, + "step": 126171 + }, + { + "epoch": 1.52, + "grad_norm": 9.27651786971756, + "learning_rate": 2.9037108858334553e-06, + "loss": 1.1658, + "step": 126174 + }, + { + "epoch": 1.52, + "grad_norm": 3.708914301684951, + "learning_rate": 2.9032993003372388e-06, + "loss": 1.3305, + "step": 126177 + }, + { + "epoch": 1.52, + "grad_norm": 15.02428671142723, + "learning_rate": 2.9028877390595022e-06, + "loss": 1.0048, + "step": 126180 + }, + { + "epoch": 1.52, + "grad_norm": 14.153875457790107, + "learning_rate": 2.902476202001654e-06, + "loss": 1.0711, + "step": 126183 + }, + { + "epoch": 1.52, + "grad_norm": 11.99941813232212, + "learning_rate": 2.9020646891650984e-06, + "loss": 1.2966, + "step": 126186 + }, + { + "epoch": 1.52, + "grad_norm": 8.25360559340206, + "learning_rate": 2.901653200551241e-06, + "loss": 0.8793, + "step": 126189 + }, + { + "epoch": 1.52, + "grad_norm": 23.81170803540991, + "learning_rate": 2.901241736161485e-06, + "loss": 1.0391, + "step": 126192 + }, + { + "epoch": 1.52, + "grad_norm": 6.136525945650162, + "learning_rate": 2.900830295997231e-06, + "loss": 0.9227, + "step": 126195 + }, + { + "epoch": 1.52, + "grad_norm": 8.225600443683849, + "learning_rate": 2.9004188800598866e-06, + "loss": 1.2511, + "step": 126198 + }, + { + "epoch": 1.52, + "grad_norm": 5.560357594161103, + "learning_rate": 2.9000074883508557e-06, + "loss": 1.3246, + "step": 126201 + }, + { + "epoch": 1.52, + "grad_norm": 6.448762063258675, + "learning_rate": 2.899596120871544e-06, + "loss": 1.3805, + "step": 126204 + }, + { + "epoch": 1.52, + "grad_norm": 8.900202501751751, + "learning_rate": 2.8991847776233496e-06, + "loss": 0.9549, + "step": 126207 + }, + { + "epoch": 1.52, + "grad_norm": 17.791687990661057, + "learning_rate": 2.898773458607681e-06, + "loss": 1.133, + "step": 126210 + }, + { + "epoch": 1.52, + "grad_norm": 6.866036735520056, + "learning_rate": 2.898362163825943e-06, + "loss": 0.913, + "step": 126213 + }, + { + "epoch": 1.52, + "grad_norm": 3.601695672465751, + "learning_rate": 2.8979508932795375e-06, + "loss": 1.4352, + "step": 126216 + }, + { + "epoch": 1.52, + "grad_norm": 7.303930067008477, + "learning_rate": 2.897539646969865e-06, + "loss": 1.3162, + "step": 126219 + }, + { + "epoch": 1.52, + "grad_norm": 4.287713557656909, + "learning_rate": 2.89712842489833e-06, + "loss": 1.3059, + "step": 126222 + }, + { + "epoch": 1.52, + "grad_norm": 4.76307797256987, + "learning_rate": 2.8967172270663367e-06, + "loss": 1.1069, + "step": 126225 + }, + { + "epoch": 1.52, + "grad_norm": 29.392043521470622, + "learning_rate": 2.896306053475291e-06, + "loss": 1.1136, + "step": 126228 + }, + { + "epoch": 1.52, + "grad_norm": 7.728504150500644, + "learning_rate": 2.895894904126596e-06, + "loss": 1.1052, + "step": 126231 + }, + { + "epoch": 1.52, + "grad_norm": 7.745015312289313, + "learning_rate": 2.895483779021653e-06, + "loss": 0.8614, + "step": 126234 + }, + { + "epoch": 1.52, + "grad_norm": 20.502561341856513, + "learning_rate": 2.895072678161861e-06, + "loss": 1.0303, + "step": 126237 + }, + { + "epoch": 1.52, + "grad_norm": 10.38631488693547, + "learning_rate": 2.8946616015486274e-06, + "loss": 1.1519, + "step": 126240 + }, + { + "epoch": 1.52, + "grad_norm": 7.163890598643266, + "learning_rate": 2.8942505491833573e-06, + "loss": 1.354, + "step": 126243 + }, + { + "epoch": 1.52, + "grad_norm": 18.773901774145845, + "learning_rate": 2.8938395210674486e-06, + "loss": 0.6093, + "step": 126246 + }, + { + "epoch": 1.52, + "grad_norm": 6.048224817283126, + "learning_rate": 2.8934285172023046e-06, + "loss": 1.1709, + "step": 126249 + }, + { + "epoch": 1.52, + "grad_norm": 6.773263100852766, + "learning_rate": 2.89301753758933e-06, + "loss": 1.2212, + "step": 126252 + }, + { + "epoch": 1.52, + "grad_norm": 5.372540733594142, + "learning_rate": 2.892606582229929e-06, + "loss": 1.1663, + "step": 126255 + }, + { + "epoch": 1.52, + "grad_norm": 7.518603790684723, + "learning_rate": 2.8921956511255024e-06, + "loss": 1.2653, + "step": 126258 + }, + { + "epoch": 1.52, + "grad_norm": 4.27223366332276, + "learning_rate": 2.8917847442774483e-06, + "loss": 1.3761, + "step": 126261 + }, + { + "epoch": 1.52, + "grad_norm": 3.701318532578395, + "learning_rate": 2.891373861687173e-06, + "loss": 1.4712, + "step": 126264 + }, + { + "epoch": 1.52, + "grad_norm": 5.398271486963834, + "learning_rate": 2.8909630033560775e-06, + "loss": 1.126, + "step": 126267 + }, + { + "epoch": 1.52, + "grad_norm": 18.23786115602324, + "learning_rate": 2.8905521692855686e-06, + "loss": 1.1978, + "step": 126270 + }, + { + "epoch": 1.52, + "grad_norm": 6.795684767833432, + "learning_rate": 2.89014135947704e-06, + "loss": 1.2894, + "step": 126273 + }, + { + "epoch": 1.52, + "grad_norm": 6.975206850510915, + "learning_rate": 2.8897305739319026e-06, + "loss": 1.1228, + "step": 126276 + }, + { + "epoch": 1.52, + "grad_norm": 7.447246617241473, + "learning_rate": 2.88931981265155e-06, + "loss": 1.061, + "step": 126279 + }, + { + "epoch": 1.52, + "grad_norm": 31.69549334002036, + "learning_rate": 2.888909075637387e-06, + "loss": 1.0178, + "step": 126282 + }, + { + "epoch": 1.52, + "grad_norm": 7.368876017133545, + "learning_rate": 2.888498362890819e-06, + "loss": 1.4117, + "step": 126285 + }, + { + "epoch": 1.52, + "grad_norm": 8.244979645746303, + "learning_rate": 2.8880876744132423e-06, + "loss": 1.0752, + "step": 126288 + }, + { + "epoch": 1.52, + "grad_norm": 4.627744009667936, + "learning_rate": 2.8876770102060604e-06, + "loss": 0.8267, + "step": 126291 + }, + { + "epoch": 1.52, + "grad_norm": 9.039915988247, + "learning_rate": 2.887266370270675e-06, + "loss": 1.0793, + "step": 126294 + }, + { + "epoch": 1.52, + "grad_norm": 7.582621479692908, + "learning_rate": 2.8868557546084895e-06, + "loss": 1.372, + "step": 126297 + }, + { + "epoch": 1.52, + "grad_norm": 22.84402446470005, + "learning_rate": 2.8864451632209045e-06, + "loss": 1.5211, + "step": 126300 + }, + { + "epoch": 1.52, + "grad_norm": 7.5930775033248015, + "learning_rate": 2.886034596109316e-06, + "loss": 1.0323, + "step": 126303 + }, + { + "epoch": 1.52, + "grad_norm": 8.17161827645535, + "learning_rate": 2.885624053275129e-06, + "loss": 1.1374, + "step": 126306 + }, + { + "epoch": 1.52, + "grad_norm": 6.084405433192316, + "learning_rate": 2.885213534719745e-06, + "loss": 1.5161, + "step": 126309 + }, + { + "epoch": 1.52, + "grad_norm": 9.131335640793768, + "learning_rate": 2.884803040444568e-06, + "loss": 1.0881, + "step": 126312 + }, + { + "epoch": 1.52, + "grad_norm": 10.088213520860773, + "learning_rate": 2.884392570450991e-06, + "loss": 1.3891, + "step": 126315 + }, + { + "epoch": 1.52, + "grad_norm": 7.1270274773650115, + "learning_rate": 2.883982124740423e-06, + "loss": 1.3666, + "step": 126318 + }, + { + "epoch": 1.52, + "grad_norm": 6.47179654903652, + "learning_rate": 2.8835717033142574e-06, + "loss": 0.9371, + "step": 126321 + }, + { + "epoch": 1.52, + "grad_norm": 10.647269518363718, + "learning_rate": 2.8831613061739016e-06, + "loss": 1.207, + "step": 126324 + }, + { + "epoch": 1.52, + "grad_norm": 6.627434568897904, + "learning_rate": 2.8827509333207506e-06, + "loss": 1.1634, + "step": 126327 + }, + { + "epoch": 1.52, + "grad_norm": 5.027802979223777, + "learning_rate": 2.8823405847562058e-06, + "loss": 1.2954, + "step": 126330 + }, + { + "epoch": 1.52, + "grad_norm": 10.04724599054083, + "learning_rate": 2.881930260481669e-06, + "loss": 0.9328, + "step": 126333 + }, + { + "epoch": 1.52, + "grad_norm": 3.5436890356146216, + "learning_rate": 2.88151996049854e-06, + "loss": 1.3002, + "step": 126336 + }, + { + "epoch": 1.52, + "grad_norm": 13.809118140677546, + "learning_rate": 2.881109684808223e-06, + "loss": 1.1559, + "step": 126339 + }, + { + "epoch": 1.52, + "grad_norm": 9.452912282972347, + "learning_rate": 2.8806994334121143e-06, + "loss": 1.2898, + "step": 126342 + }, + { + "epoch": 1.52, + "grad_norm": 10.986287708617322, + "learning_rate": 2.8802892063116105e-06, + "loss": 1.2531, + "step": 126345 + }, + { + "epoch": 1.52, + "grad_norm": 6.778032226883163, + "learning_rate": 2.879879003508116e-06, + "loss": 1.2462, + "step": 126348 + }, + { + "epoch": 1.52, + "grad_norm": 2.921181966204782, + "learning_rate": 2.8794688250030324e-06, + "loss": 1.2991, + "step": 126351 + }, + { + "epoch": 1.52, + "grad_norm": 11.826017281734124, + "learning_rate": 2.879058670797754e-06, + "loss": 1.5662, + "step": 126354 + }, + { + "epoch": 1.52, + "grad_norm": 7.351890646205579, + "learning_rate": 2.878648540893684e-06, + "loss": 1.3383, + "step": 126357 + }, + { + "epoch": 1.52, + "grad_norm": 6.8033921129128725, + "learning_rate": 2.8782384352922234e-06, + "loss": 1.2361, + "step": 126360 + }, + { + "epoch": 1.52, + "grad_norm": 10.860012446190524, + "learning_rate": 2.877828353994767e-06, + "loss": 1.176, + "step": 126363 + }, + { + "epoch": 1.52, + "grad_norm": 5.849838144408196, + "learning_rate": 2.8774182970027198e-06, + "loss": 1.1261, + "step": 126366 + }, + { + "epoch": 1.52, + "grad_norm": 6.924283443248677, + "learning_rate": 2.8770082643174756e-06, + "loss": 1.0047, + "step": 126369 + }, + { + "epoch": 1.52, + "grad_norm": 7.506025476937872, + "learning_rate": 2.8765982559404372e-06, + "loss": 1.1056, + "step": 126372 + }, + { + "epoch": 1.52, + "grad_norm": 14.907617995613418, + "learning_rate": 2.8761882718730017e-06, + "loss": 1.2679, + "step": 126375 + }, + { + "epoch": 1.52, + "grad_norm": 3.352655400444562, + "learning_rate": 2.875778312116574e-06, + "loss": 1.0865, + "step": 126378 + }, + { + "epoch": 1.52, + "grad_norm": 11.600325051911295, + "learning_rate": 2.8753683766725447e-06, + "loss": 0.967, + "step": 126381 + }, + { + "epoch": 1.52, + "grad_norm": 6.487814582809088, + "learning_rate": 2.8749584655423213e-06, + "loss": 0.9372, + "step": 126384 + }, + { + "epoch": 1.52, + "grad_norm": 5.235544261546515, + "learning_rate": 2.8745485787272943e-06, + "loss": 1.5961, + "step": 126387 + }, + { + "epoch": 1.52, + "grad_norm": 8.103481278474096, + "learning_rate": 2.8741387162288657e-06, + "loss": 1.3604, + "step": 126390 + }, + { + "epoch": 1.52, + "grad_norm": 7.427831299436163, + "learning_rate": 2.8737288780484397e-06, + "loss": 1.2854, + "step": 126393 + }, + { + "epoch": 1.52, + "grad_norm": 5.112034143900163, + "learning_rate": 2.8733190641874054e-06, + "loss": 1.4878, + "step": 126396 + }, + { + "epoch": 1.52, + "grad_norm": 23.615203761552372, + "learning_rate": 2.8729092746471675e-06, + "loss": 1.241, + "step": 126399 + }, + { + "epoch": 1.52, + "grad_norm": 5.678690154357954, + "learning_rate": 2.8724995094291254e-06, + "loss": 0.8689, + "step": 126402 + }, + { + "epoch": 1.52, + "grad_norm": 10.055159148208562, + "learning_rate": 2.8720897685346728e-06, + "loss": 1.0995, + "step": 126405 + }, + { + "epoch": 1.52, + "grad_norm": 6.18573379193196, + "learning_rate": 2.871680051965212e-06, + "loss": 0.8791, + "step": 126408 + }, + { + "epoch": 1.52, + "grad_norm": 12.926166368795407, + "learning_rate": 2.871270359722138e-06, + "loss": 1.2654, + "step": 126411 + }, + { + "epoch": 1.52, + "grad_norm": 6.616666828479528, + "learning_rate": 2.8708606918068494e-06, + "loss": 1.0044, + "step": 126414 + }, + { + "epoch": 1.52, + "grad_norm": 5.600493640908259, + "learning_rate": 2.870451048220746e-06, + "loss": 1.0863, + "step": 126417 + }, + { + "epoch": 1.52, + "grad_norm": 13.108506620952305, + "learning_rate": 2.870041428965228e-06, + "loss": 1.0566, + "step": 126420 + }, + { + "epoch": 1.52, + "grad_norm": 9.158614002646688, + "learning_rate": 2.8696318340416872e-06, + "loss": 1.4356, + "step": 126423 + }, + { + "epoch": 1.52, + "grad_norm": 7.554118882759854, + "learning_rate": 2.869222263451528e-06, + "loss": 1.2748, + "step": 126426 + }, + { + "epoch": 1.52, + "grad_norm": 7.011152545666592, + "learning_rate": 2.868812717196141e-06, + "loss": 1.1501, + "step": 126429 + }, + { + "epoch": 1.52, + "grad_norm": 4.925947928518421, + "learning_rate": 2.868403195276932e-06, + "loss": 0.933, + "step": 126432 + }, + { + "epoch": 1.52, + "grad_norm": 11.361551412378047, + "learning_rate": 2.8679936976952893e-06, + "loss": 1.1052, + "step": 126435 + }, + { + "epoch": 1.52, + "grad_norm": 10.835189730304592, + "learning_rate": 2.8675842244526165e-06, + "loss": 1.3466, + "step": 126438 + }, + { + "epoch": 1.52, + "grad_norm": 8.3017667684866, + "learning_rate": 2.8671747755503088e-06, + "loss": 1.1847, + "step": 126441 + }, + { + "epoch": 1.52, + "grad_norm": 9.21430338705831, + "learning_rate": 2.8667653509897653e-06, + "loss": 0.9278, + "step": 126444 + }, + { + "epoch": 1.52, + "grad_norm": 3.177420804558287, + "learning_rate": 2.8663559507723846e-06, + "loss": 1.202, + "step": 126447 + }, + { + "epoch": 1.52, + "grad_norm": 4.6789777084293815, + "learning_rate": 2.865946574899562e-06, + "loss": 1.1141, + "step": 126450 + }, + { + "epoch": 1.52, + "grad_norm": 43.884812723068016, + "learning_rate": 2.8655372233726906e-06, + "loss": 1.0882, + "step": 126453 + }, + { + "epoch": 1.52, + "grad_norm": 19.216335570008766, + "learning_rate": 2.8651278961931704e-06, + "loss": 1.1616, + "step": 126456 + }, + { + "epoch": 1.52, + "grad_norm": 8.950992157834495, + "learning_rate": 2.864718593362402e-06, + "loss": 0.8399, + "step": 126459 + }, + { + "epoch": 1.52, + "grad_norm": 10.81102572531008, + "learning_rate": 2.864309314881777e-06, + "loss": 0.9311, + "step": 126462 + }, + { + "epoch": 1.52, + "grad_norm": 4.852200940390269, + "learning_rate": 2.863900060752692e-06, + "loss": 1.0363, + "step": 126465 + }, + { + "epoch": 1.52, + "grad_norm": 14.826729560443889, + "learning_rate": 2.863490830976551e-06, + "loss": 1.2651, + "step": 126468 + }, + { + "epoch": 1.52, + "grad_norm": 14.944912519026905, + "learning_rate": 2.8630816255547413e-06, + "loss": 1.2192, + "step": 126471 + }, + { + "epoch": 1.52, + "grad_norm": 4.752876184091705, + "learning_rate": 2.8626724444886657e-06, + "loss": 1.4454, + "step": 126474 + }, + { + "epoch": 1.52, + "grad_norm": 20.216788156218335, + "learning_rate": 2.8622632877797163e-06, + "loss": 0.8352, + "step": 126477 + }, + { + "epoch": 1.52, + "grad_norm": 2.172004241510008, + "learning_rate": 2.86185415542929e-06, + "loss": 1.0965, + "step": 126480 + }, + { + "epoch": 1.52, + "grad_norm": 6.925387752250429, + "learning_rate": 2.8614450474387845e-06, + "loss": 1.5222, + "step": 126483 + }, + { + "epoch": 1.52, + "grad_norm": 10.048389506535917, + "learning_rate": 2.8610359638095995e-06, + "loss": 1.5877, + "step": 126486 + }, + { + "epoch": 1.52, + "grad_norm": 9.030539985885895, + "learning_rate": 2.8606269045431247e-06, + "loss": 1.2556, + "step": 126489 + }, + { + "epoch": 1.52, + "grad_norm": 7.726666799498459, + "learning_rate": 2.860217869640761e-06, + "loss": 1.118, + "step": 126492 + }, + { + "epoch": 1.52, + "grad_norm": 9.237581046961013, + "learning_rate": 2.8598088591038985e-06, + "loss": 1.181, + "step": 126495 + }, + { + "epoch": 1.52, + "grad_norm": 27.691334771997568, + "learning_rate": 2.859399872933937e-06, + "loss": 1.1937, + "step": 126498 + }, + { + "epoch": 1.52, + "grad_norm": 10.406161883881886, + "learning_rate": 2.8589909111322746e-06, + "loss": 0.9034, + "step": 126501 + }, + { + "epoch": 1.52, + "grad_norm": 2.8175407951862517, + "learning_rate": 2.8585819737003016e-06, + "loss": 1.3124, + "step": 126504 + }, + { + "epoch": 1.52, + "grad_norm": 20.110905549400577, + "learning_rate": 2.8581730606394155e-06, + "loss": 1.3597, + "step": 126507 + }, + { + "epoch": 1.52, + "grad_norm": 10.369773076783133, + "learning_rate": 2.8577641719510153e-06, + "loss": 1.3992, + "step": 126510 + }, + { + "epoch": 1.52, + "grad_norm": 34.77509000220559, + "learning_rate": 2.8573553076364933e-06, + "loss": 1.2949, + "step": 126513 + }, + { + "epoch": 1.52, + "grad_norm": 17.392142165965755, + "learning_rate": 2.8569464676972413e-06, + "loss": 1.5481, + "step": 126516 + }, + { + "epoch": 1.52, + "grad_norm": 7.467092616306323, + "learning_rate": 2.8565376521346586e-06, + "loss": 0.9988, + "step": 126519 + }, + { + "epoch": 1.52, + "grad_norm": 10.394998672327455, + "learning_rate": 2.8561288609501403e-06, + "loss": 1.2292, + "step": 126522 + }, + { + "epoch": 1.52, + "grad_norm": 12.57367224159363, + "learning_rate": 2.85572009414508e-06, + "loss": 1.0129, + "step": 126525 + }, + { + "epoch": 1.52, + "grad_norm": 9.502201727786463, + "learning_rate": 2.8553113517208773e-06, + "loss": 1.2927, + "step": 126528 + }, + { + "epoch": 1.52, + "grad_norm": 7.9452656070017005, + "learning_rate": 2.854902633678921e-06, + "loss": 1.4101, + "step": 126531 + }, + { + "epoch": 1.52, + "grad_norm": 16.117392981861435, + "learning_rate": 2.854493940020611e-06, + "loss": 1.4683, + "step": 126534 + }, + { + "epoch": 1.52, + "grad_norm": 7.07080274928909, + "learning_rate": 2.8540852707473364e-06, + "loss": 1.0403, + "step": 126537 + }, + { + "epoch": 1.52, + "grad_norm": 6.34277924179742, + "learning_rate": 2.8536766258604975e-06, + "loss": 1.2179, + "step": 126540 + }, + { + "epoch": 1.52, + "grad_norm": 6.2291298364670835, + "learning_rate": 2.8532680053614825e-06, + "loss": 1.0605, + "step": 126543 + }, + { + "epoch": 1.52, + "grad_norm": 6.41369795508431, + "learning_rate": 2.85285940925169e-06, + "loss": 1.1121, + "step": 126546 + }, + { + "epoch": 1.52, + "grad_norm": 10.13633371363232, + "learning_rate": 2.8524508375325157e-06, + "loss": 1.1378, + "step": 126549 + }, + { + "epoch": 1.52, + "grad_norm": 11.644214176219124, + "learning_rate": 2.8520422902053537e-06, + "loss": 1.3429, + "step": 126552 + }, + { + "epoch": 1.52, + "grad_norm": 6.619309950060721, + "learning_rate": 2.8516337672715966e-06, + "loss": 0.9799, + "step": 126555 + }, + { + "epoch": 1.52, + "grad_norm": 32.442678443216586, + "learning_rate": 2.8512252687326357e-06, + "loss": 1.4008, + "step": 126558 + }, + { + "epoch": 1.52, + "grad_norm": 6.485375265935791, + "learning_rate": 2.850816794589868e-06, + "loss": 1.4418, + "step": 126561 + }, + { + "epoch": 1.52, + "grad_norm": 10.083367630960796, + "learning_rate": 2.850408344844687e-06, + "loss": 1.2102, + "step": 126564 + }, + { + "epoch": 1.52, + "grad_norm": 16.719839685715765, + "learning_rate": 2.849999919498491e-06, + "loss": 1.0337, + "step": 126567 + }, + { + "epoch": 1.52, + "grad_norm": 9.697854779352992, + "learning_rate": 2.849591518552666e-06, + "loss": 1.2597, + "step": 126570 + }, + { + "epoch": 1.52, + "grad_norm": 11.317234124972172, + "learning_rate": 2.8491831420086103e-06, + "loss": 0.8631, + "step": 126573 + }, + { + "epoch": 1.52, + "grad_norm": 10.233273726396941, + "learning_rate": 2.848774789867719e-06, + "loss": 0.9954, + "step": 126576 + }, + { + "epoch": 1.52, + "grad_norm": 7.009833903032288, + "learning_rate": 2.848366462131381e-06, + "loss": 0.9602, + "step": 126579 + }, + { + "epoch": 1.52, + "grad_norm": 6.8885800729629265, + "learning_rate": 2.847958158800995e-06, + "loss": 0.9654, + "step": 126582 + }, + { + "epoch": 1.52, + "grad_norm": 5.784634909760728, + "learning_rate": 2.847549879877949e-06, + "loss": 1.2322, + "step": 126585 + }, + { + "epoch": 1.52, + "grad_norm": 2.4785788812489935, + "learning_rate": 2.8471416253636384e-06, + "loss": 1.4381, + "step": 126588 + }, + { + "epoch": 1.52, + "grad_norm": 4.28707558764798, + "learning_rate": 2.846733395259458e-06, + "loss": 1.1164, + "step": 126591 + }, + { + "epoch": 1.52, + "grad_norm": 3.42919484828953, + "learning_rate": 2.8463251895668022e-06, + "loss": 1.0201, + "step": 126594 + }, + { + "epoch": 1.52, + "grad_norm": 2.322676257509671, + "learning_rate": 2.8459170082870624e-06, + "loss": 1.0675, + "step": 126597 + }, + { + "epoch": 1.52, + "grad_norm": 4.348844561001315, + "learning_rate": 2.8455088514216277e-06, + "loss": 1.1977, + "step": 126600 + }, + { + "epoch": 1.52, + "grad_norm": 5.813261649631427, + "learning_rate": 2.8451007189718936e-06, + "loss": 1.2243, + "step": 126603 + }, + { + "epoch": 1.52, + "grad_norm": 12.644755026648108, + "learning_rate": 2.844692610939254e-06, + "loss": 1.5136, + "step": 126606 + }, + { + "epoch": 1.52, + "grad_norm": 6.285474225203823, + "learning_rate": 2.8442845273251043e-06, + "loss": 1.4541, + "step": 126609 + }, + { + "epoch": 1.52, + "grad_norm": 8.73167022489953, + "learning_rate": 2.8438764681308308e-06, + "loss": 1.298, + "step": 126612 + }, + { + "epoch": 1.52, + "grad_norm": 12.330237757034183, + "learning_rate": 2.84346843335783e-06, + "loss": 1.2132, + "step": 126615 + }, + { + "epoch": 1.52, + "grad_norm": 7.772244700823166, + "learning_rate": 2.8430604230074954e-06, + "loss": 0.6388, + "step": 126618 + }, + { + "epoch": 1.52, + "grad_norm": 9.139774210541628, + "learning_rate": 2.842652437081219e-06, + "loss": 0.9613, + "step": 126621 + }, + { + "epoch": 1.52, + "grad_norm": 35.673185602641276, + "learning_rate": 2.842244475580388e-06, + "loss": 0.9474, + "step": 126624 + }, + { + "epoch": 1.52, + "grad_norm": 6.836873882088653, + "learning_rate": 2.8418365385063985e-06, + "loss": 1.142, + "step": 126627 + }, + { + "epoch": 1.52, + "grad_norm": 9.611528405934964, + "learning_rate": 2.8414286258606426e-06, + "loss": 1.1719, + "step": 126630 + }, + { + "epoch": 1.52, + "grad_norm": 3.446052452664877, + "learning_rate": 2.841020737644512e-06, + "loss": 0.9928, + "step": 126633 + }, + { + "epoch": 1.52, + "grad_norm": 11.324324467001201, + "learning_rate": 2.8406128738594017e-06, + "loss": 1.0424, + "step": 126636 + }, + { + "epoch": 1.52, + "grad_norm": 5.273052156319571, + "learning_rate": 2.840205034506702e-06, + "loss": 0.6202, + "step": 126639 + }, + { + "epoch": 1.52, + "grad_norm": 9.509320455356933, + "learning_rate": 2.8397972195878e-06, + "loss": 0.8813, + "step": 126642 + }, + { + "epoch": 1.52, + "grad_norm": 13.250482876373104, + "learning_rate": 2.83938942910409e-06, + "loss": 1.0143, + "step": 126645 + }, + { + "epoch": 1.52, + "grad_norm": 3.4043332196264418, + "learning_rate": 2.838981663056969e-06, + "loss": 1.0859, + "step": 126648 + }, + { + "epoch": 1.52, + "grad_norm": 8.870363648972178, + "learning_rate": 2.838573921447821e-06, + "loss": 1.382, + "step": 126651 + }, + { + "epoch": 1.52, + "grad_norm": 4.160111240797998, + "learning_rate": 2.83816620427804e-06, + "loss": 1.0209, + "step": 126654 + }, + { + "epoch": 1.52, + "grad_norm": 4.556613963652623, + "learning_rate": 2.8377585115490194e-06, + "loss": 0.9532, + "step": 126657 + }, + { + "epoch": 1.52, + "grad_norm": 8.089423193410925, + "learning_rate": 2.837350843262151e-06, + "loss": 0.9465, + "step": 126660 + }, + { + "epoch": 1.52, + "grad_norm": 10.207968872535366, + "learning_rate": 2.8369431994188248e-06, + "loss": 1.516, + "step": 126663 + }, + { + "epoch": 1.52, + "grad_norm": 21.375423031616467, + "learning_rate": 2.8365355800204276e-06, + "loss": 1.2733, + "step": 126666 + }, + { + "epoch": 1.52, + "grad_norm": 8.599881189053669, + "learning_rate": 2.8361279850683543e-06, + "loss": 1.2032, + "step": 126669 + }, + { + "epoch": 1.52, + "grad_norm": 10.485703934967674, + "learning_rate": 2.835720414563996e-06, + "loss": 1.4549, + "step": 126672 + }, + { + "epoch": 1.52, + "grad_norm": 3.9062722404279278, + "learning_rate": 2.8353128685087473e-06, + "loss": 1.0301, + "step": 126675 + }, + { + "epoch": 1.52, + "grad_norm": 9.537526023272424, + "learning_rate": 2.834905346903991e-06, + "loss": 1.3449, + "step": 126678 + }, + { + "epoch": 1.52, + "grad_norm": 6.1080306015337795, + "learning_rate": 2.834497849751122e-06, + "loss": 1.1684, + "step": 126681 + }, + { + "epoch": 1.52, + "grad_norm": 65.79113179422373, + "learning_rate": 2.8340903770515337e-06, + "loss": 0.9856, + "step": 126684 + }, + { + "epoch": 1.52, + "grad_norm": 26.035680649102908, + "learning_rate": 2.8336829288066112e-06, + "loss": 1.0223, + "step": 126687 + }, + { + "epoch": 1.52, + "grad_norm": 18.333392041951132, + "learning_rate": 2.833275505017751e-06, + "loss": 1.1773, + "step": 126690 + }, + { + "epoch": 1.52, + "grad_norm": 3.740244503896927, + "learning_rate": 2.8328681056863362e-06, + "loss": 1.3851, + "step": 126693 + }, + { + "epoch": 1.52, + "grad_norm": 6.458518043585332, + "learning_rate": 2.8324607308137607e-06, + "loss": 0.9891, + "step": 126696 + }, + { + "epoch": 1.52, + "grad_norm": 3.166785982331376, + "learning_rate": 2.832053380401416e-06, + "loss": 1.0988, + "step": 126699 + }, + { + "epoch": 1.52, + "grad_norm": 28.05962864572417, + "learning_rate": 2.831646054450694e-06, + "loss": 1.4675, + "step": 126702 + }, + { + "epoch": 1.52, + "grad_norm": 2.6497583250484977, + "learning_rate": 2.8312387529629817e-06, + "loss": 1.0406, + "step": 126705 + }, + { + "epoch": 1.52, + "grad_norm": 15.158398475853515, + "learning_rate": 2.830831475939666e-06, + "loss": 1.2522, + "step": 126708 + }, + { + "epoch": 1.52, + "grad_norm": 7.089894101341489, + "learning_rate": 2.830424223382141e-06, + "loss": 0.7399, + "step": 126711 + }, + { + "epoch": 1.52, + "grad_norm": 8.946108664750108, + "learning_rate": 2.8300169952917943e-06, + "loss": 1.0716, + "step": 126714 + }, + { + "epoch": 1.52, + "grad_norm": 11.760302573341061, + "learning_rate": 2.8296097916700217e-06, + "loss": 1.2874, + "step": 126717 + }, + { + "epoch": 1.52, + "grad_norm": 9.02648143345436, + "learning_rate": 2.829202612518204e-06, + "loss": 1.3013, + "step": 126720 + }, + { + "epoch": 1.52, + "grad_norm": 8.76466664357824, + "learning_rate": 2.8287954578377353e-06, + "loss": 1.2622, + "step": 126723 + }, + { + "epoch": 1.52, + "grad_norm": 6.942689917813277, + "learning_rate": 2.8283883276300074e-06, + "loss": 1.1732, + "step": 126726 + }, + { + "epoch": 1.52, + "grad_norm": 3.0629932724220965, + "learning_rate": 2.8279812218964076e-06, + "loss": 0.9062, + "step": 126729 + }, + { + "epoch": 1.52, + "grad_norm": 7.825540729153872, + "learning_rate": 2.8275741406383207e-06, + "loss": 0.9982, + "step": 126732 + }, + { + "epoch": 1.52, + "grad_norm": 7.849679164270888, + "learning_rate": 2.8271670838571397e-06, + "loss": 1.3198, + "step": 126735 + }, + { + "epoch": 1.52, + "grad_norm": 5.4295523707542745, + "learning_rate": 2.826760051554255e-06, + "loss": 1.2405, + "step": 126738 + }, + { + "epoch": 1.52, + "grad_norm": 7.318704258727122, + "learning_rate": 2.8263530437310527e-06, + "loss": 1.2025, + "step": 126741 + }, + { + "epoch": 1.52, + "grad_norm": 7.93603137463541, + "learning_rate": 2.8259460603889276e-06, + "loss": 1.4065, + "step": 126744 + }, + { + "epoch": 1.52, + "grad_norm": 17.846026679804282, + "learning_rate": 2.8255391015292645e-06, + "loss": 1.0881, + "step": 126747 + }, + { + "epoch": 1.52, + "grad_norm": 8.207883078436755, + "learning_rate": 2.825132167153449e-06, + "loss": 1.3448, + "step": 126750 + }, + { + "epoch": 1.52, + "grad_norm": 14.179278158943534, + "learning_rate": 2.8247252572628736e-06, + "loss": 1.3076, + "step": 126753 + }, + { + "epoch": 1.52, + "grad_norm": 8.085574885379135, + "learning_rate": 2.8243183718589283e-06, + "loss": 1.3584, + "step": 126756 + }, + { + "epoch": 1.52, + "grad_norm": 13.390861166386665, + "learning_rate": 2.823911510942998e-06, + "loss": 1.2339, + "step": 126759 + }, + { + "epoch": 1.52, + "grad_norm": 6.533076960321532, + "learning_rate": 2.823504674516472e-06, + "loss": 1.2854, + "step": 126762 + }, + { + "epoch": 1.52, + "grad_norm": 21.050535574543616, + "learning_rate": 2.823097862580739e-06, + "loss": 1.3409, + "step": 126765 + }, + { + "epoch": 1.52, + "grad_norm": 4.603925869177606, + "learning_rate": 2.8226910751371927e-06, + "loss": 1.1432, + "step": 126768 + }, + { + "epoch": 1.52, + "grad_norm": 6.609249981800333, + "learning_rate": 2.822284312187216e-06, + "loss": 0.9462, + "step": 126771 + }, + { + "epoch": 1.52, + "grad_norm": 6.721383105724603, + "learning_rate": 2.8218775737321934e-06, + "loss": 1.208, + "step": 126774 + }, + { + "epoch": 1.52, + "grad_norm": 25.299054943357774, + "learning_rate": 2.8214708597735175e-06, + "loss": 1.0451, + "step": 126777 + }, + { + "epoch": 1.52, + "grad_norm": 9.072650938098954, + "learning_rate": 2.821064170312576e-06, + "loss": 1.2747, + "step": 126780 + }, + { + "epoch": 1.52, + "grad_norm": 4.513193305517857, + "learning_rate": 2.82065750535076e-06, + "loss": 1.2806, + "step": 126783 + }, + { + "epoch": 1.52, + "grad_norm": 6.640685960956336, + "learning_rate": 2.820250864889451e-06, + "loss": 1.2027, + "step": 126786 + }, + { + "epoch": 1.52, + "grad_norm": 3.550066581672973, + "learning_rate": 2.8198442489300414e-06, + "loss": 1.0139, + "step": 126789 + }, + { + "epoch": 1.52, + "grad_norm": 23.836649147164152, + "learning_rate": 2.8194376574739155e-06, + "loss": 1.0917, + "step": 126792 + }, + { + "epoch": 1.52, + "grad_norm": 18.410184368620005, + "learning_rate": 2.8190310905224617e-06, + "loss": 1.1751, + "step": 126795 + }, + { + "epoch": 1.52, + "grad_norm": 14.518915289329618, + "learning_rate": 2.8186245480770725e-06, + "loss": 1.1252, + "step": 126798 + }, + { + "epoch": 1.52, + "grad_norm": 4.438541857201546, + "learning_rate": 2.818218030139126e-06, + "loss": 1.3948, + "step": 126801 + }, + { + "epoch": 1.52, + "grad_norm": 3.9369987408874647, + "learning_rate": 2.8178115367100155e-06, + "loss": 0.826, + "step": 126804 + }, + { + "epoch": 1.52, + "grad_norm": 4.428108771947356, + "learning_rate": 2.817405067791128e-06, + "loss": 1.1872, + "step": 126807 + }, + { + "epoch": 1.52, + "grad_norm": 3.892735019135663, + "learning_rate": 2.816998623383852e-06, + "loss": 1.3432, + "step": 126810 + }, + { + "epoch": 1.52, + "grad_norm": 9.810332772766559, + "learning_rate": 2.816592203489572e-06, + "loss": 1.3291, + "step": 126813 + }, + { + "epoch": 1.52, + "grad_norm": 2.937978043091323, + "learning_rate": 2.8161858081096726e-06, + "loss": 1.1326, + "step": 126816 + }, + { + "epoch": 1.52, + "grad_norm": 11.533829172157665, + "learning_rate": 2.815779437245544e-06, + "loss": 1.3507, + "step": 126819 + }, + { + "epoch": 1.52, + "grad_norm": 8.968194035065109, + "learning_rate": 2.815373090898572e-06, + "loss": 0.9984, + "step": 126822 + }, + { + "epoch": 1.53, + "grad_norm": 9.200848041664905, + "learning_rate": 2.8149667690701464e-06, + "loss": 1.2637, + "step": 126825 + }, + { + "epoch": 1.53, + "grad_norm": 20.91140879972474, + "learning_rate": 2.8145604717616495e-06, + "loss": 1.4625, + "step": 126828 + }, + { + "epoch": 1.53, + "grad_norm": 6.576866410940517, + "learning_rate": 2.8141541989744713e-06, + "loss": 0.9113, + "step": 126831 + }, + { + "epoch": 1.53, + "grad_norm": 8.914877409596333, + "learning_rate": 2.813747950709994e-06, + "loss": 1.035, + "step": 126834 + }, + { + "epoch": 1.53, + "grad_norm": 7.456983899703781, + "learning_rate": 2.8133417269696094e-06, + "loss": 1.3475, + "step": 126837 + }, + { + "epoch": 1.53, + "grad_norm": 12.381056037030117, + "learning_rate": 2.812935527754698e-06, + "loss": 1.1352, + "step": 126840 + }, + { + "epoch": 1.53, + "grad_norm": 10.76058131571069, + "learning_rate": 2.8125293530666486e-06, + "loss": 1.2514, + "step": 126843 + }, + { + "epoch": 1.53, + "grad_norm": 13.907078706782924, + "learning_rate": 2.812123202906847e-06, + "loss": 1.3002, + "step": 126846 + }, + { + "epoch": 1.53, + "grad_norm": 10.857055251300578, + "learning_rate": 2.811717077276681e-06, + "loss": 1.0099, + "step": 126849 + }, + { + "epoch": 1.53, + "grad_norm": 3.4930754507526784, + "learning_rate": 2.811310976177538e-06, + "loss": 1.3611, + "step": 126852 + }, + { + "epoch": 1.53, + "grad_norm": 5.536904582786443, + "learning_rate": 2.810904899610801e-06, + "loss": 1.0969, + "step": 126855 + }, + { + "epoch": 1.53, + "grad_norm": 7.6289991028288595, + "learning_rate": 2.810498847577853e-06, + "loss": 1.3348, + "step": 126858 + }, + { + "epoch": 1.53, + "grad_norm": 19.81449911664736, + "learning_rate": 2.8100928200800838e-06, + "loss": 1.2986, + "step": 126861 + }, + { + "epoch": 1.53, + "grad_norm": 10.08156553023347, + "learning_rate": 2.8096868171188797e-06, + "loss": 1.3231, + "step": 126864 + }, + { + "epoch": 1.53, + "grad_norm": 8.699576894543256, + "learning_rate": 2.809280838695623e-06, + "loss": 0.978, + "step": 126867 + }, + { + "epoch": 1.53, + "grad_norm": 21.125119751134363, + "learning_rate": 2.808874884811699e-06, + "loss": 1.1155, + "step": 126870 + }, + { + "epoch": 1.53, + "grad_norm": 6.996112344775107, + "learning_rate": 2.8084689554684986e-06, + "loss": 1.2675, + "step": 126873 + }, + { + "epoch": 1.53, + "grad_norm": 4.369465016571864, + "learning_rate": 2.8080630506674e-06, + "loss": 1.0834, + "step": 126876 + }, + { + "epoch": 1.53, + "grad_norm": 6.874212971634691, + "learning_rate": 2.8076571704097955e-06, + "loss": 1.4191, + "step": 126879 + }, + { + "epoch": 1.53, + "grad_norm": 7.550400183624802, + "learning_rate": 2.8072513146970624e-06, + "loss": 1.2119, + "step": 126882 + }, + { + "epoch": 1.53, + "grad_norm": 4.511311566614093, + "learning_rate": 2.8068454835305902e-06, + "loss": 1.126, + "step": 126885 + }, + { + "epoch": 1.53, + "grad_norm": 13.626086645943987, + "learning_rate": 2.806439676911764e-06, + "loss": 1.0354, + "step": 126888 + }, + { + "epoch": 1.53, + "grad_norm": 8.106324942950641, + "learning_rate": 2.806033894841971e-06, + "loss": 0.9685, + "step": 126891 + }, + { + "epoch": 1.53, + "grad_norm": 9.92893192050282, + "learning_rate": 2.8056281373225893e-06, + "loss": 0.9305, + "step": 126894 + }, + { + "epoch": 1.53, + "grad_norm": 10.089464889128555, + "learning_rate": 2.8052224043550104e-06, + "loss": 1.3106, + "step": 126897 + }, + { + "epoch": 1.53, + "grad_norm": 8.002306756419825, + "learning_rate": 2.804816695940613e-06, + "loss": 1.2119, + "step": 126900 + }, + { + "epoch": 1.53, + "grad_norm": 5.953094868608933, + "learning_rate": 2.8044110120807844e-06, + "loss": 1.0927, + "step": 126903 + }, + { + "epoch": 1.53, + "grad_norm": 13.071861767964748, + "learning_rate": 2.8040053527769127e-06, + "loss": 1.102, + "step": 126906 + }, + { + "epoch": 1.53, + "grad_norm": 3.740002908281323, + "learning_rate": 2.803599718030375e-06, + "loss": 1.4054, + "step": 126909 + }, + { + "epoch": 1.53, + "grad_norm": 8.728519185807148, + "learning_rate": 2.8031941078425606e-06, + "loss": 1.6099, + "step": 126912 + }, + { + "epoch": 1.53, + "grad_norm": 13.985597142342218, + "learning_rate": 2.8027885222148543e-06, + "loss": 1.3652, + "step": 126915 + }, + { + "epoch": 1.53, + "grad_norm": 8.050658238334801, + "learning_rate": 2.802382961148635e-06, + "loss": 1.4332, + "step": 126918 + }, + { + "epoch": 1.53, + "grad_norm": 11.931614254977447, + "learning_rate": 2.8019774246452946e-06, + "loss": 1.1567, + "step": 126921 + }, + { + "epoch": 1.53, + "grad_norm": 5.669526630281151, + "learning_rate": 2.801571912706208e-06, + "loss": 0.9847, + "step": 126924 + }, + { + "epoch": 1.53, + "grad_norm": 5.155078673663434, + "learning_rate": 2.8011664253327654e-06, + "loss": 1.1775, + "step": 126927 + }, + { + "epoch": 1.53, + "grad_norm": 3.99803084415977, + "learning_rate": 2.8007609625263467e-06, + "loss": 0.9813, + "step": 126930 + }, + { + "epoch": 1.53, + "grad_norm": 8.314762653848119, + "learning_rate": 2.800355524288342e-06, + "loss": 1.2262, + "step": 126933 + }, + { + "epoch": 1.53, + "grad_norm": 28.10361281558142, + "learning_rate": 2.7999501106201276e-06, + "loss": 0.9302, + "step": 126936 + }, + { + "epoch": 1.53, + "grad_norm": 4.980219928364546, + "learning_rate": 2.799544721523092e-06, + "loss": 1.1537, + "step": 126939 + }, + { + "epoch": 1.53, + "grad_norm": 6.969245397995915, + "learning_rate": 2.7991393569986146e-06, + "loss": 1.2179, + "step": 126942 + }, + { + "epoch": 1.53, + "grad_norm": 5.835004950776081, + "learning_rate": 2.7987340170480836e-06, + "loss": 1.123, + "step": 126945 + }, + { + "epoch": 1.53, + "grad_norm": 4.010954157570406, + "learning_rate": 2.798328701672877e-06, + "loss": 1.3297, + "step": 126948 + }, + { + "epoch": 1.53, + "grad_norm": 12.751099660720163, + "learning_rate": 2.79792341087438e-06, + "loss": 1.0583, + "step": 126951 + }, + { + "epoch": 1.53, + "grad_norm": 5.677264064491993, + "learning_rate": 2.7975181446539756e-06, + "loss": 1.0666, + "step": 126954 + }, + { + "epoch": 1.53, + "grad_norm": 8.226985598079592, + "learning_rate": 2.7971129030130517e-06, + "loss": 0.8624, + "step": 126957 + }, + { + "epoch": 1.53, + "grad_norm": 7.630879295274544, + "learning_rate": 2.796707685952983e-06, + "loss": 1.1527, + "step": 126960 + }, + { + "epoch": 1.53, + "grad_norm": 5.78139016939214, + "learning_rate": 2.7963024934751603e-06, + "loss": 0.9721, + "step": 126963 + }, + { + "epoch": 1.53, + "grad_norm": 10.082173104300542, + "learning_rate": 2.7958973255809584e-06, + "loss": 1.3449, + "step": 126966 + }, + { + "epoch": 1.53, + "grad_norm": 10.294452312954347, + "learning_rate": 2.795492182271765e-06, + "loss": 1.2005, + "step": 126969 + }, + { + "epoch": 1.53, + "grad_norm": 6.692880437045191, + "learning_rate": 2.7950870635489637e-06, + "loss": 1.013, + "step": 126972 + }, + { + "epoch": 1.53, + "grad_norm": 8.498410852186359, + "learning_rate": 2.7946819694139325e-06, + "loss": 1.1755, + "step": 126975 + }, + { + "epoch": 1.53, + "grad_norm": 3.500241872785369, + "learning_rate": 2.794276899868057e-06, + "loss": 1.3253, + "step": 126978 + }, + { + "epoch": 1.53, + "grad_norm": 12.28531685096776, + "learning_rate": 2.793871854912721e-06, + "loss": 1.3818, + "step": 126981 + }, + { + "epoch": 1.53, + "grad_norm": 11.174053011037081, + "learning_rate": 2.793466834549302e-06, + "loss": 0.9413, + "step": 126984 + }, + { + "epoch": 1.53, + "grad_norm": 10.700446967991839, + "learning_rate": 2.7930618387791884e-06, + "loss": 1.4438, + "step": 126987 + }, + { + "epoch": 1.53, + "grad_norm": 10.519564223427839, + "learning_rate": 2.7926568676037556e-06, + "loss": 1.0133, + "step": 126990 + }, + { + "epoch": 1.53, + "grad_norm": 6.045475385637961, + "learning_rate": 2.792251921024388e-06, + "loss": 1.2602, + "step": 126993 + }, + { + "epoch": 1.53, + "grad_norm": 21.132071618446837, + "learning_rate": 2.7918469990424703e-06, + "loss": 1.1343, + "step": 126996 + }, + { + "epoch": 1.53, + "grad_norm": 7.669936774997922, + "learning_rate": 2.791442101659384e-06, + "loss": 1.1509, + "step": 126999 + }, + { + "epoch": 1.53, + "grad_norm": 4.1900342431681405, + "learning_rate": 2.7910372288765063e-06, + "loss": 0.8911, + "step": 127002 + }, + { + "epoch": 1.53, + "grad_norm": 5.953646212855452, + "learning_rate": 2.7906323806952253e-06, + "loss": 1.5479, + "step": 127005 + }, + { + "epoch": 1.53, + "grad_norm": 20.869336807518494, + "learning_rate": 2.790227557116916e-06, + "loss": 1.2223, + "step": 127008 + }, + { + "epoch": 1.53, + "grad_norm": 7.030484981244376, + "learning_rate": 2.7898227581429628e-06, + "loss": 1.3304, + "step": 127011 + }, + { + "epoch": 1.53, + "grad_norm": 5.879207448081685, + "learning_rate": 2.78941798377475e-06, + "loss": 1.1081, + "step": 127014 + }, + { + "epoch": 1.53, + "grad_norm": 11.570290715160642, + "learning_rate": 2.7890132340136544e-06, + "loss": 0.8718, + "step": 127017 + }, + { + "epoch": 1.53, + "grad_norm": 4.811747277141867, + "learning_rate": 2.788608508861059e-06, + "loss": 1.2967, + "step": 127020 + }, + { + "epoch": 1.53, + "grad_norm": 10.373948238090268, + "learning_rate": 2.788203808318348e-06, + "loss": 1.2009, + "step": 127023 + }, + { + "epoch": 1.53, + "grad_norm": 6.692089834261898, + "learning_rate": 2.7877991323869e-06, + "loss": 1.0366, + "step": 127026 + }, + { + "epoch": 1.53, + "grad_norm": 43.91562684981637, + "learning_rate": 2.787394481068093e-06, + "loss": 1.1471, + "step": 127029 + }, + { + "epoch": 1.53, + "grad_norm": 5.392462157419317, + "learning_rate": 2.7869898543633102e-06, + "loss": 1.5251, + "step": 127032 + }, + { + "epoch": 1.53, + "grad_norm": 11.836849543597115, + "learning_rate": 2.7865852522739334e-06, + "loss": 1.4696, + "step": 127035 + }, + { + "epoch": 1.53, + "grad_norm": 3.9854841764790083, + "learning_rate": 2.786180674801342e-06, + "loss": 1.1566, + "step": 127038 + }, + { + "epoch": 1.53, + "grad_norm": 5.666522734274868, + "learning_rate": 2.7857761219469216e-06, + "loss": 1.0053, + "step": 127041 + }, + { + "epoch": 1.53, + "grad_norm": 7.398069534432389, + "learning_rate": 2.785371593712045e-06, + "loss": 1.3208, + "step": 127044 + }, + { + "epoch": 1.53, + "grad_norm": 9.56825870480031, + "learning_rate": 2.7849670900981e-06, + "loss": 1.273, + "step": 127047 + }, + { + "epoch": 1.53, + "grad_norm": 5.313344825198131, + "learning_rate": 2.7845626111064593e-06, + "loss": 1.0549, + "step": 127050 + }, + { + "epoch": 1.53, + "grad_norm": 3.0889381543758416, + "learning_rate": 2.7841581567385113e-06, + "loss": 1.2331, + "step": 127053 + }, + { + "epoch": 1.53, + "grad_norm": 8.553093434390492, + "learning_rate": 2.783753726995629e-06, + "loss": 0.9193, + "step": 127056 + }, + { + "epoch": 1.53, + "grad_norm": 4.992632515761454, + "learning_rate": 2.7833493218791962e-06, + "loss": 0.91, + "step": 127059 + }, + { + "epoch": 1.53, + "grad_norm": 8.188590613445646, + "learning_rate": 2.782944941390593e-06, + "loss": 1.3741, + "step": 127062 + }, + { + "epoch": 1.53, + "grad_norm": 18.96262939507085, + "learning_rate": 2.7825405855312025e-06, + "loss": 1.1031, + "step": 127065 + }, + { + "epoch": 1.53, + "grad_norm": 7.217026802284895, + "learning_rate": 2.782136254302401e-06, + "loss": 0.9285, + "step": 127068 + }, + { + "epoch": 1.53, + "grad_norm": 8.188423442310038, + "learning_rate": 2.781731947705565e-06, + "loss": 1.0798, + "step": 127071 + }, + { + "epoch": 1.53, + "grad_norm": 7.025102220827433, + "learning_rate": 2.7813276657420786e-06, + "loss": 1.4654, + "step": 127074 + }, + { + "epoch": 1.53, + "grad_norm": 15.032241943794137, + "learning_rate": 2.7809234084133196e-06, + "loss": 0.959, + "step": 127077 + }, + { + "epoch": 1.53, + "grad_norm": 7.514157153871101, + "learning_rate": 2.7805191757206727e-06, + "loss": 1.0433, + "step": 127080 + }, + { + "epoch": 1.53, + "grad_norm": 4.5547534852371365, + "learning_rate": 2.7801149676655104e-06, + "loss": 1.218, + "step": 127083 + }, + { + "epoch": 1.53, + "grad_norm": 17.72180164048462, + "learning_rate": 2.7797107842492144e-06, + "loss": 0.7563, + "step": 127086 + }, + { + "epoch": 1.53, + "grad_norm": 6.994579156949807, + "learning_rate": 2.7793066254731683e-06, + "loss": 1.1165, + "step": 127089 + }, + { + "epoch": 1.53, + "grad_norm": 20.264290926949553, + "learning_rate": 2.778902491338744e-06, + "loss": 1.0433, + "step": 127092 + }, + { + "epoch": 1.53, + "grad_norm": 9.288165444039096, + "learning_rate": 2.7784983818473287e-06, + "loss": 1.2709, + "step": 127095 + }, + { + "epoch": 1.53, + "grad_norm": 14.79802380893772, + "learning_rate": 2.778094297000293e-06, + "loss": 1.2299, + "step": 127098 + }, + { + "epoch": 1.53, + "grad_norm": 28.204489442321673, + "learning_rate": 2.7776902367990213e-06, + "loss": 1.2083, + "step": 127101 + }, + { + "epoch": 1.53, + "grad_norm": 10.266579892860102, + "learning_rate": 2.777286201244891e-06, + "loss": 0.7747, + "step": 127104 + }, + { + "epoch": 1.53, + "grad_norm": 8.73612936909954, + "learning_rate": 2.776882190339284e-06, + "loss": 1.3085, + "step": 127107 + }, + { + "epoch": 1.53, + "grad_norm": 4.66486423109596, + "learning_rate": 2.776478204083577e-06, + "loss": 1.4741, + "step": 127110 + }, + { + "epoch": 1.53, + "grad_norm": 4.346795008029601, + "learning_rate": 2.7760742424791444e-06, + "loss": 1.3472, + "step": 127113 + }, + { + "epoch": 1.53, + "grad_norm": 10.983051745340978, + "learning_rate": 2.7756703055273683e-06, + "loss": 1.4917, + "step": 127116 + }, + { + "epoch": 1.53, + "grad_norm": 3.1529286066345596, + "learning_rate": 2.7752663932296275e-06, + "loss": 0.9927, + "step": 127119 + }, + { + "epoch": 1.53, + "grad_norm": 9.084160804741366, + "learning_rate": 2.7748625055873036e-06, + "loss": 1.1348, + "step": 127122 + }, + { + "epoch": 1.53, + "grad_norm": 6.944465416763762, + "learning_rate": 2.7744586426017673e-06, + "loss": 1.001, + "step": 127125 + }, + { + "epoch": 1.53, + "grad_norm": 4.845425065949699, + "learning_rate": 2.774054804274402e-06, + "loss": 1.0021, + "step": 127128 + }, + { + "epoch": 1.53, + "grad_norm": 3.5055257152052417, + "learning_rate": 2.773650990606588e-06, + "loss": 1.3029, + "step": 127131 + }, + { + "epoch": 1.53, + "grad_norm": 14.278262612463049, + "learning_rate": 2.7732472015996993e-06, + "loss": 1.3804, + "step": 127134 + }, + { + "epoch": 1.53, + "grad_norm": 16.92087374513289, + "learning_rate": 2.7728434372551126e-06, + "loss": 0.953, + "step": 127137 + }, + { + "epoch": 1.53, + "grad_norm": 6.71865233136305, + "learning_rate": 2.7724396975742073e-06, + "loss": 1.3962, + "step": 127140 + }, + { + "epoch": 1.53, + "grad_norm": 17.510300735045426, + "learning_rate": 2.772035982558363e-06, + "loss": 0.8378, + "step": 127143 + }, + { + "epoch": 1.53, + "grad_norm": 10.279631183770597, + "learning_rate": 2.7716322922089555e-06, + "loss": 1.5757, + "step": 127146 + }, + { + "epoch": 1.53, + "grad_norm": 8.282298664212774, + "learning_rate": 2.771228626527367e-06, + "loss": 1.0786, + "step": 127149 + }, + { + "epoch": 1.53, + "grad_norm": 16.105860335472844, + "learning_rate": 2.7708249855149716e-06, + "loss": 1.38, + "step": 127152 + }, + { + "epoch": 1.53, + "grad_norm": 8.764285681506875, + "learning_rate": 2.770421369173143e-06, + "loss": 1.2127, + "step": 127155 + }, + { + "epoch": 1.53, + "grad_norm": 7.951591928126912, + "learning_rate": 2.7700177775032623e-06, + "loss": 1.357, + "step": 127158 + }, + { + "epoch": 1.53, + "grad_norm": 24.694544644599613, + "learning_rate": 2.7696142105067093e-06, + "loss": 1.3629, + "step": 127161 + }, + { + "epoch": 1.53, + "grad_norm": 3.5074896122189525, + "learning_rate": 2.7692106681848563e-06, + "loss": 1.0439, + "step": 127164 + }, + { + "epoch": 1.53, + "grad_norm": 17.941920358704557, + "learning_rate": 2.7688071505390822e-06, + "loss": 1.1456, + "step": 127167 + }, + { + "epoch": 1.53, + "grad_norm": 8.70956445151375, + "learning_rate": 2.768403657570765e-06, + "loss": 1.188, + "step": 127170 + }, + { + "epoch": 1.53, + "grad_norm": 9.623265510732502, + "learning_rate": 2.7680001892812846e-06, + "loss": 1.2161, + "step": 127173 + }, + { + "epoch": 1.53, + "grad_norm": 5.426678745340937, + "learning_rate": 2.7675967456720143e-06, + "loss": 1.3596, + "step": 127176 + }, + { + "epoch": 1.53, + "grad_norm": 22.158386305525305, + "learning_rate": 2.7671933267443273e-06, + "loss": 1.1191, + "step": 127179 + }, + { + "epoch": 1.53, + "grad_norm": 8.810100175241427, + "learning_rate": 2.766789932499605e-06, + "loss": 1.1685, + "step": 127182 + }, + { + "epoch": 1.53, + "grad_norm": 6.185882979901869, + "learning_rate": 2.766386562939224e-06, + "loss": 1.1085, + "step": 127185 + }, + { + "epoch": 1.53, + "grad_norm": 7.709949263367905, + "learning_rate": 2.765983218064562e-06, + "loss": 1.2359, + "step": 127188 + }, + { + "epoch": 1.53, + "grad_norm": 13.605031994465042, + "learning_rate": 2.765579897876991e-06, + "loss": 1.0551, + "step": 127191 + }, + { + "epoch": 1.53, + "grad_norm": 7.988405097996634, + "learning_rate": 2.765176602377894e-06, + "loss": 1.2656, + "step": 127194 + }, + { + "epoch": 1.53, + "grad_norm": 6.059667787972524, + "learning_rate": 2.7647733315686387e-06, + "loss": 0.964, + "step": 127197 + }, + { + "epoch": 1.53, + "grad_norm": 4.281128334426998, + "learning_rate": 2.7643700854506062e-06, + "loss": 1.1317, + "step": 127200 + }, + { + "epoch": 1.53, + "grad_norm": 6.440512254106885, + "learning_rate": 2.763966864025176e-06, + "loss": 1.0331, + "step": 127203 + }, + { + "epoch": 1.53, + "grad_norm": 7.986029311855512, + "learning_rate": 2.7635636672937173e-06, + "loss": 1.089, + "step": 127206 + }, + { + "epoch": 1.53, + "grad_norm": 9.583714908631622, + "learning_rate": 2.7631604952576086e-06, + "loss": 1.0352, + "step": 127209 + }, + { + "epoch": 1.53, + "grad_norm": 5.967246956452268, + "learning_rate": 2.762757347918227e-06, + "loss": 1.2294, + "step": 127212 + }, + { + "epoch": 1.53, + "grad_norm": 8.98805686235872, + "learning_rate": 2.762354225276951e-06, + "loss": 0.9114, + "step": 127215 + }, + { + "epoch": 1.53, + "grad_norm": 7.599814509441409, + "learning_rate": 2.761951127335153e-06, + "loss": 1.1307, + "step": 127218 + }, + { + "epoch": 1.53, + "grad_norm": 19.656735386950984, + "learning_rate": 2.761548054094205e-06, + "loss": 1.1656, + "step": 127221 + }, + { + "epoch": 1.53, + "grad_norm": 3.825412519838402, + "learning_rate": 2.7611450055554865e-06, + "loss": 1.3278, + "step": 127224 + }, + { + "epoch": 1.53, + "grad_norm": 5.562336441009726, + "learning_rate": 2.760741981720373e-06, + "loss": 1.3757, + "step": 127227 + }, + { + "epoch": 1.53, + "grad_norm": 2.4733360843755006, + "learning_rate": 2.7603389825902416e-06, + "loss": 1.1965, + "step": 127230 + }, + { + "epoch": 1.53, + "grad_norm": 8.475557862816107, + "learning_rate": 2.7599360081664638e-06, + "loss": 1.0582, + "step": 127233 + }, + { + "epoch": 1.53, + "grad_norm": 4.845689967122288, + "learning_rate": 2.759533058450419e-06, + "loss": 1.1423, + "step": 127236 + }, + { + "epoch": 1.53, + "grad_norm": 20.138977935659938, + "learning_rate": 2.7591301334434763e-06, + "loss": 1.0814, + "step": 127239 + }, + { + "epoch": 1.53, + "grad_norm": 6.188799312339889, + "learning_rate": 2.758727233147018e-06, + "loss": 1.0014, + "step": 127242 + }, + { + "epoch": 1.53, + "grad_norm": 3.8796368362759117, + "learning_rate": 2.7583243575624117e-06, + "loss": 1.1921, + "step": 127245 + }, + { + "epoch": 1.53, + "grad_norm": 8.802313581967143, + "learning_rate": 2.7579215066910368e-06, + "loss": 1.1838, + "step": 127248 + }, + { + "epoch": 1.53, + "grad_norm": 4.3516500554624615, + "learning_rate": 2.7575186805342667e-06, + "loss": 1.1887, + "step": 127251 + }, + { + "epoch": 1.53, + "grad_norm": 6.270885832363994, + "learning_rate": 2.7571158790934772e-06, + "loss": 1.2128, + "step": 127254 + }, + { + "epoch": 1.53, + "grad_norm": 5.913186078682874, + "learning_rate": 2.7567131023700442e-06, + "loss": 1.3168, + "step": 127257 + }, + { + "epoch": 1.53, + "grad_norm": 11.984404771654491, + "learning_rate": 2.756310350365342e-06, + "loss": 1.2184, + "step": 127260 + }, + { + "epoch": 1.53, + "grad_norm": 11.572445051156471, + "learning_rate": 2.755907623080739e-06, + "loss": 1.4806, + "step": 127263 + }, + { + "epoch": 1.53, + "grad_norm": 3.2533458834728277, + "learning_rate": 2.7555049205176144e-06, + "loss": 1.385, + "step": 127266 + }, + { + "epoch": 1.53, + "grad_norm": 11.948033180173587, + "learning_rate": 2.7551022426773455e-06, + "loss": 0.93, + "step": 127269 + }, + { + "epoch": 1.53, + "grad_norm": 7.424116719440985, + "learning_rate": 2.7546995895612993e-06, + "loss": 1.1149, + "step": 127272 + }, + { + "epoch": 1.53, + "grad_norm": 6.75432546661463, + "learning_rate": 2.754296961170855e-06, + "loss": 1.1417, + "step": 127275 + }, + { + "epoch": 1.53, + "grad_norm": 5.830298684743889, + "learning_rate": 2.753894357507384e-06, + "loss": 1.2045, + "step": 127278 + }, + { + "epoch": 1.53, + "grad_norm": 4.416017666349372, + "learning_rate": 2.753491778572265e-06, + "loss": 0.7564, + "step": 127281 + }, + { + "epoch": 1.53, + "grad_norm": 6.772170792947649, + "learning_rate": 2.753089224366867e-06, + "loss": 1.1968, + "step": 127284 + }, + { + "epoch": 1.53, + "grad_norm": 6.997192199191704, + "learning_rate": 2.7526866948925637e-06, + "loss": 1.2516, + "step": 127287 + }, + { + "epoch": 1.53, + "grad_norm": 9.066120541793108, + "learning_rate": 2.7522841901507303e-06, + "loss": 0.9524, + "step": 127290 + }, + { + "epoch": 1.53, + "grad_norm": 4.58907487697266, + "learning_rate": 2.7518817101427397e-06, + "loss": 1.2381, + "step": 127293 + }, + { + "epoch": 1.53, + "grad_norm": 5.367558402052218, + "learning_rate": 2.7514792548699687e-06, + "loss": 0.9746, + "step": 127296 + }, + { + "epoch": 1.53, + "grad_norm": 42.07082988483669, + "learning_rate": 2.751076824333786e-06, + "loss": 0.9306, + "step": 127299 + }, + { + "epoch": 1.53, + "grad_norm": 11.250002555435623, + "learning_rate": 2.7506744185355695e-06, + "loss": 1.0955, + "step": 127302 + }, + { + "epoch": 1.53, + "grad_norm": 4.188331219898291, + "learning_rate": 2.750272037476688e-06, + "loss": 1.1105, + "step": 127305 + }, + { + "epoch": 1.53, + "grad_norm": 62.54853408615547, + "learning_rate": 2.7498696811585156e-06, + "loss": 1.2319, + "step": 127308 + }, + { + "epoch": 1.53, + "grad_norm": 17.47276858875831, + "learning_rate": 2.7494673495824297e-06, + "loss": 1.0251, + "step": 127311 + }, + { + "epoch": 1.53, + "grad_norm": 4.9837910231060505, + "learning_rate": 2.749065042749798e-06, + "loss": 1.0122, + "step": 127314 + }, + { + "epoch": 1.53, + "grad_norm": 6.53754610871428, + "learning_rate": 2.7486627606619944e-06, + "loss": 1.3508, + "step": 127317 + }, + { + "epoch": 1.53, + "grad_norm": 6.108629273855599, + "learning_rate": 2.7482605033203923e-06, + "loss": 1.0192, + "step": 127320 + }, + { + "epoch": 1.53, + "grad_norm": 2.9820832117984435, + "learning_rate": 2.74785827072637e-06, + "loss": 1.3921, + "step": 127323 + }, + { + "epoch": 1.53, + "grad_norm": 4.260243279107602, + "learning_rate": 2.747456062881294e-06, + "loss": 1.26, + "step": 127326 + }, + { + "epoch": 1.53, + "grad_norm": 4.649243953136826, + "learning_rate": 2.7470538797865353e-06, + "loss": 1.0536, + "step": 127329 + }, + { + "epoch": 1.53, + "grad_norm": 7.643814895983346, + "learning_rate": 2.7466517214434683e-06, + "loss": 1.2446, + "step": 127332 + }, + { + "epoch": 1.53, + "grad_norm": 8.070339069552748, + "learning_rate": 2.746249587853468e-06, + "loss": 1.2091, + "step": 127335 + }, + { + "epoch": 1.53, + "grad_norm": 16.189663576793667, + "learning_rate": 2.7458474790179067e-06, + "loss": 0.8119, + "step": 127338 + }, + { + "epoch": 1.53, + "grad_norm": 5.704062770108354, + "learning_rate": 2.745445394938153e-06, + "loss": 1.2959, + "step": 127341 + }, + { + "epoch": 1.53, + "grad_norm": 6.969513892810138, + "learning_rate": 2.7450433356155835e-06, + "loss": 1.193, + "step": 127344 + }, + { + "epoch": 1.53, + "grad_norm": 15.334675922665383, + "learning_rate": 2.744641301051565e-06, + "loss": 1.0563, + "step": 127347 + }, + { + "epoch": 1.53, + "grad_norm": 30.661605158163223, + "learning_rate": 2.7442392912474757e-06, + "loss": 1.0442, + "step": 127350 + }, + { + "epoch": 1.53, + "grad_norm": 10.198772142014478, + "learning_rate": 2.7438373062046808e-06, + "loss": 0.7685, + "step": 127353 + }, + { + "epoch": 1.53, + "grad_norm": 8.741302809698258, + "learning_rate": 2.7434353459245556e-06, + "loss": 1.2444, + "step": 127356 + }, + { + "epoch": 1.53, + "grad_norm": 9.518011347660643, + "learning_rate": 2.743033410408472e-06, + "loss": 1.0102, + "step": 127359 + }, + { + "epoch": 1.53, + "grad_norm": 7.532411797360834, + "learning_rate": 2.742631499657802e-06, + "loss": 1.0198, + "step": 127362 + }, + { + "epoch": 1.53, + "grad_norm": 11.503326259496475, + "learning_rate": 2.742229613673918e-06, + "loss": 0.8952, + "step": 127365 + }, + { + "epoch": 1.53, + "grad_norm": 7.158830182352065, + "learning_rate": 2.7418277524581915e-06, + "loss": 1.1929, + "step": 127368 + }, + { + "epoch": 1.53, + "grad_norm": 9.349486580702909, + "learning_rate": 2.741425916011988e-06, + "loss": 1.2071, + "step": 127371 + }, + { + "epoch": 1.53, + "grad_norm": 29.50897186838029, + "learning_rate": 2.741024104336685e-06, + "loss": 1.0861, + "step": 127374 + }, + { + "epoch": 1.53, + "grad_norm": 8.505180474543817, + "learning_rate": 2.7406223174336542e-06, + "loss": 1.0477, + "step": 127377 + }, + { + "epoch": 1.53, + "grad_norm": 18.710773001909406, + "learning_rate": 2.7402205553042616e-06, + "loss": 0.7787, + "step": 127380 + }, + { + "epoch": 1.53, + "grad_norm": 15.200215151444912, + "learning_rate": 2.7398188179498818e-06, + "loss": 1.074, + "step": 127383 + }, + { + "epoch": 1.53, + "grad_norm": 6.53714312367695, + "learning_rate": 2.7394171053718878e-06, + "loss": 1.4372, + "step": 127386 + }, + { + "epoch": 1.53, + "grad_norm": 5.675911292106886, + "learning_rate": 2.7390154175716455e-06, + "loss": 1.1415, + "step": 127389 + }, + { + "epoch": 1.53, + "grad_norm": 12.166698755745927, + "learning_rate": 2.73861375455053e-06, + "loss": 1.07, + "step": 127392 + }, + { + "epoch": 1.53, + "grad_norm": 5.855830124209755, + "learning_rate": 2.7382121163099084e-06, + "loss": 1.1401, + "step": 127395 + }, + { + "epoch": 1.53, + "grad_norm": 7.232098714998477, + "learning_rate": 2.7378105028511525e-06, + "loss": 1.1536, + "step": 127398 + }, + { + "epoch": 1.53, + "grad_norm": 17.86318193155846, + "learning_rate": 2.7374089141756333e-06, + "loss": 1.0446, + "step": 127401 + }, + { + "epoch": 1.53, + "grad_norm": 15.551867223200436, + "learning_rate": 2.737007350284725e-06, + "loss": 0.8441, + "step": 127404 + }, + { + "epoch": 1.53, + "grad_norm": 20.204360813693256, + "learning_rate": 2.7366058111797923e-06, + "loss": 1.254, + "step": 127407 + }, + { + "epoch": 1.53, + "grad_norm": 16.830608627690893, + "learning_rate": 2.7362042968622093e-06, + "loss": 1.0889, + "step": 127410 + }, + { + "epoch": 1.53, + "grad_norm": 12.172824976352809, + "learning_rate": 2.735802807333342e-06, + "loss": 1.0859, + "step": 127413 + }, + { + "epoch": 1.53, + "grad_norm": 7.801871252117899, + "learning_rate": 2.7354013425945634e-06, + "loss": 1.1901, + "step": 127416 + }, + { + "epoch": 1.53, + "grad_norm": 6.192232530202221, + "learning_rate": 2.734999902647246e-06, + "loss": 1.0257, + "step": 127419 + }, + { + "epoch": 1.53, + "grad_norm": 4.8008746488011464, + "learning_rate": 2.7345984874927544e-06, + "loss": 1.1491, + "step": 127422 + }, + { + "epoch": 1.53, + "grad_norm": 29.66035058840413, + "learning_rate": 2.7341970971324615e-06, + "loss": 1.2163, + "step": 127425 + }, + { + "epoch": 1.53, + "grad_norm": 4.14742023056152, + "learning_rate": 2.7337957315677398e-06, + "loss": 0.9725, + "step": 127428 + }, + { + "epoch": 1.53, + "grad_norm": 31.496833901798684, + "learning_rate": 2.733394390799955e-06, + "loss": 0.9532, + "step": 127431 + }, + { + "epoch": 1.53, + "grad_norm": 8.126614058529793, + "learning_rate": 2.7329930748304756e-06, + "loss": 0.882, + "step": 127434 + }, + { + "epoch": 1.53, + "grad_norm": 7.5549287290149225, + "learning_rate": 2.732591783660673e-06, + "loss": 0.9938, + "step": 127437 + }, + { + "epoch": 1.53, + "grad_norm": 6.762142606057433, + "learning_rate": 2.7321905172919173e-06, + "loss": 1.1956, + "step": 127440 + }, + { + "epoch": 1.53, + "grad_norm": 11.132266792706003, + "learning_rate": 2.731789275725577e-06, + "loss": 1.217, + "step": 127443 + }, + { + "epoch": 1.53, + "grad_norm": 2.8481381978412186, + "learning_rate": 2.7313880589630247e-06, + "loss": 1.5162, + "step": 127446 + }, + { + "epoch": 1.53, + "grad_norm": 6.663783455053033, + "learning_rate": 2.730986867005624e-06, + "loss": 1.1787, + "step": 127449 + }, + { + "epoch": 1.53, + "grad_norm": 5.304689155683005, + "learning_rate": 2.73058569985475e-06, + "loss": 1.2977, + "step": 127452 + }, + { + "epoch": 1.53, + "grad_norm": 9.571172978283206, + "learning_rate": 2.7301845575117658e-06, + "loss": 1.1175, + "step": 127455 + }, + { + "epoch": 1.53, + "grad_norm": 8.88029641760031, + "learning_rate": 2.7297834399780455e-06, + "loss": 1.2627, + "step": 127458 + }, + { + "epoch": 1.53, + "grad_norm": 7.270661073721858, + "learning_rate": 2.729382347254953e-06, + "loss": 0.8322, + "step": 127461 + }, + { + "epoch": 1.53, + "grad_norm": 5.94768868352194, + "learning_rate": 2.72898127934386e-06, + "loss": 1.1172, + "step": 127464 + }, + { + "epoch": 1.53, + "grad_norm": 9.423322743458069, + "learning_rate": 2.7285802362461345e-06, + "loss": 1.2301, + "step": 127467 + }, + { + "epoch": 1.53, + "grad_norm": 2.977574643664746, + "learning_rate": 2.7281792179631483e-06, + "loss": 0.8619, + "step": 127470 + }, + { + "epoch": 1.53, + "grad_norm": 6.3188050812791365, + "learning_rate": 2.727778224496268e-06, + "loss": 1.2417, + "step": 127473 + }, + { + "epoch": 1.53, + "grad_norm": 8.229654106262815, + "learning_rate": 2.727377255846857e-06, + "loss": 0.9, + "step": 127476 + }, + { + "epoch": 1.53, + "grad_norm": 9.48568553762244, + "learning_rate": 2.7269763120162884e-06, + "loss": 1.2659, + "step": 127479 + }, + { + "epoch": 1.53, + "grad_norm": 20.046676148616296, + "learning_rate": 2.7265753930059304e-06, + "loss": 1.3762, + "step": 127482 + }, + { + "epoch": 1.53, + "grad_norm": 9.39286423659316, + "learning_rate": 2.7261744988171533e-06, + "loss": 1.0449, + "step": 127485 + }, + { + "epoch": 1.53, + "grad_norm": 9.409820561774591, + "learning_rate": 2.7257736294513192e-06, + "loss": 1.2147, + "step": 127488 + }, + { + "epoch": 1.53, + "grad_norm": 4.745460000384308, + "learning_rate": 2.7253727849098e-06, + "loss": 1.1088, + "step": 127491 + }, + { + "epoch": 1.53, + "grad_norm": 10.406210519677083, + "learning_rate": 2.7249719651939667e-06, + "loss": 1.1647, + "step": 127494 + }, + { + "epoch": 1.53, + "grad_norm": 8.267719490356486, + "learning_rate": 2.724571170305179e-06, + "loss": 1.2354, + "step": 127497 + }, + { + "epoch": 1.53, + "grad_norm": 5.164218558456137, + "learning_rate": 2.724170400244813e-06, + "loss": 1.0784, + "step": 127500 + }, + { + "epoch": 1.53, + "grad_norm": 8.809776664167718, + "learning_rate": 2.7237696550142312e-06, + "loss": 1.1555, + "step": 127503 + }, + { + "epoch": 1.53, + "grad_norm": 11.615871216884745, + "learning_rate": 2.723368934614801e-06, + "loss": 1.5685, + "step": 127506 + }, + { + "epoch": 1.53, + "grad_norm": 4.098077310424739, + "learning_rate": 2.7229682390478918e-06, + "loss": 1.2563, + "step": 127509 + }, + { + "epoch": 1.53, + "grad_norm": 10.265193580238241, + "learning_rate": 2.7225675683148744e-06, + "loss": 1.2073, + "step": 127512 + }, + { + "epoch": 1.53, + "grad_norm": 20.195935901992613, + "learning_rate": 2.7221669224171087e-06, + "loss": 1.1373, + "step": 127515 + }, + { + "epoch": 1.53, + "grad_norm": 6.895215906684149, + "learning_rate": 2.7217663013559702e-06, + "loss": 1.3101, + "step": 127518 + }, + { + "epoch": 1.53, + "grad_norm": 5.954574771805295, + "learning_rate": 2.721365705132818e-06, + "loss": 1.2807, + "step": 127521 + }, + { + "epoch": 1.53, + "grad_norm": 16.63678957988588, + "learning_rate": 2.7209651337490227e-06, + "loss": 1.3485, + "step": 127524 + }, + { + "epoch": 1.53, + "grad_norm": 6.918026392561655, + "learning_rate": 2.7205645872059538e-06, + "loss": 1.0566, + "step": 127527 + }, + { + "epoch": 1.53, + "grad_norm": 248.60379249465015, + "learning_rate": 2.720164065504974e-06, + "loss": 1.2857, + "step": 127530 + }, + { + "epoch": 1.53, + "grad_norm": 6.403181836207539, + "learning_rate": 2.7197635686474523e-06, + "loss": 1.2436, + "step": 127533 + }, + { + "epoch": 1.53, + "grad_norm": 5.24923054838555, + "learning_rate": 2.7193630966347573e-06, + "loss": 1.3235, + "step": 127536 + }, + { + "epoch": 1.53, + "grad_norm": 11.141743357544534, + "learning_rate": 2.7189626494682543e-06, + "loss": 1.3193, + "step": 127539 + }, + { + "epoch": 1.53, + "grad_norm": 10.092335960513354, + "learning_rate": 2.7185622271493052e-06, + "loss": 1.3751, + "step": 127542 + }, + { + "epoch": 1.53, + "grad_norm": 4.272470639967575, + "learning_rate": 2.718161829679281e-06, + "loss": 1.0093, + "step": 127545 + }, + { + "epoch": 1.53, + "grad_norm": 12.73134315366103, + "learning_rate": 2.717761457059548e-06, + "loss": 1.1177, + "step": 127548 + }, + { + "epoch": 1.53, + "grad_norm": 14.406257676084127, + "learning_rate": 2.717361109291472e-06, + "loss": 1.1528, + "step": 127551 + }, + { + "epoch": 1.53, + "grad_norm": 11.238737881119963, + "learning_rate": 2.7169607863764213e-06, + "loss": 1.2088, + "step": 127554 + }, + { + "epoch": 1.53, + "grad_norm": 9.255196126678532, + "learning_rate": 2.716560488315758e-06, + "loss": 1.1177, + "step": 127557 + }, + { + "epoch": 1.53, + "grad_norm": 14.314685243757685, + "learning_rate": 2.7161602151108535e-06, + "loss": 1.1149, + "step": 127560 + }, + { + "epoch": 1.53, + "grad_norm": 3.729352027382091, + "learning_rate": 2.7157599667630676e-06, + "loss": 1.1644, + "step": 127563 + }, + { + "epoch": 1.53, + "grad_norm": 5.9766519388824095, + "learning_rate": 2.715359743273771e-06, + "loss": 1.3111, + "step": 127566 + }, + { + "epoch": 1.53, + "grad_norm": 7.530572202035468, + "learning_rate": 2.7149595446443257e-06, + "loss": 1.0022, + "step": 127569 + }, + { + "epoch": 1.53, + "grad_norm": 10.070784370990625, + "learning_rate": 2.7145593708760998e-06, + "loss": 1.5276, + "step": 127572 + }, + { + "epoch": 1.53, + "grad_norm": 6.385774168698448, + "learning_rate": 2.714159221970457e-06, + "loss": 1.2957, + "step": 127575 + }, + { + "epoch": 1.53, + "grad_norm": 7.705725354171933, + "learning_rate": 2.7137590979287686e-06, + "loss": 1.012, + "step": 127578 + }, + { + "epoch": 1.53, + "grad_norm": 4.998248441722218, + "learning_rate": 2.7133589987523955e-06, + "loss": 1.282, + "step": 127581 + }, + { + "epoch": 1.53, + "grad_norm": 8.99888218918988, + "learning_rate": 2.712958924442701e-06, + "loss": 1.2707, + "step": 127584 + }, + { + "epoch": 1.53, + "grad_norm": 8.85538995898343, + "learning_rate": 2.712558875001052e-06, + "loss": 1.2999, + "step": 127587 + }, + { + "epoch": 1.53, + "grad_norm": 11.32047117285805, + "learning_rate": 2.7121588504288156e-06, + "loss": 1.0938, + "step": 127590 + }, + { + "epoch": 1.53, + "grad_norm": 5.586467573009876, + "learning_rate": 2.7117588507273582e-06, + "loss": 0.8966, + "step": 127593 + }, + { + "epoch": 1.53, + "grad_norm": 3.4236846418323728, + "learning_rate": 2.7113588758980404e-06, + "loss": 1.3096, + "step": 127596 + }, + { + "epoch": 1.53, + "grad_norm": 4.613060380855141, + "learning_rate": 2.7109589259422296e-06, + "loss": 1.1499, + "step": 127599 + }, + { + "epoch": 1.53, + "grad_norm": 4.947223176146201, + "learning_rate": 2.7105590008612926e-06, + "loss": 0.9378, + "step": 127602 + }, + { + "epoch": 1.53, + "grad_norm": 34.7076711281976, + "learning_rate": 2.7101591006565896e-06, + "loss": 1.4718, + "step": 127605 + }, + { + "epoch": 1.53, + "grad_norm": 11.271425409193737, + "learning_rate": 2.7097592253294904e-06, + "loss": 0.9315, + "step": 127608 + }, + { + "epoch": 1.53, + "grad_norm": 5.471174654579353, + "learning_rate": 2.709359374881354e-06, + "loss": 1.2235, + "step": 127611 + }, + { + "epoch": 1.53, + "grad_norm": 7.714351614600456, + "learning_rate": 2.708959549313549e-06, + "loss": 0.8755, + "step": 127614 + }, + { + "epoch": 1.53, + "grad_norm": 9.377633583313647, + "learning_rate": 2.7085597486274374e-06, + "loss": 1.0948, + "step": 127617 + }, + { + "epoch": 1.53, + "grad_norm": 12.297748337700773, + "learning_rate": 2.708159972824389e-06, + "loss": 1.1546, + "step": 127620 + }, + { + "epoch": 1.53, + "grad_norm": 7.118912723656173, + "learning_rate": 2.707760221905763e-06, + "loss": 1.4775, + "step": 127623 + }, + { + "epoch": 1.53, + "grad_norm": 10.081265839272469, + "learning_rate": 2.7073604958729226e-06, + "loss": 1.2698, + "step": 127626 + }, + { + "epoch": 1.53, + "grad_norm": 6.377930349160191, + "learning_rate": 2.706960794727235e-06, + "loss": 0.891, + "step": 127629 + }, + { + "epoch": 1.53, + "grad_norm": 10.076031291026268, + "learning_rate": 2.706561118470061e-06, + "loss": 1.0758, + "step": 127632 + }, + { + "epoch": 1.53, + "grad_norm": 11.556584544861055, + "learning_rate": 2.7061614671027715e-06, + "loss": 1.4309, + "step": 127635 + }, + { + "epoch": 1.53, + "grad_norm": 17.545368850384758, + "learning_rate": 2.705761840626722e-06, + "loss": 1.0822, + "step": 127638 + }, + { + "epoch": 1.53, + "grad_norm": 6.898800789979483, + "learning_rate": 2.70536223904328e-06, + "loss": 0.9292, + "step": 127641 + }, + { + "epoch": 1.53, + "grad_norm": 13.110903561631575, + "learning_rate": 2.7049626623538127e-06, + "loss": 1.2578, + "step": 127644 + }, + { + "epoch": 1.53, + "grad_norm": 5.315919809268181, + "learning_rate": 2.7045631105596793e-06, + "loss": 1.2411, + "step": 127647 + }, + { + "epoch": 1.53, + "grad_norm": 12.256552443351104, + "learning_rate": 2.7041635836622417e-06, + "loss": 1.4588, + "step": 127650 + }, + { + "epoch": 1.53, + "grad_norm": 73.30590288856625, + "learning_rate": 2.7037640816628663e-06, + "loss": 1.5711, + "step": 127653 + }, + { + "epoch": 1.54, + "grad_norm": 38.20283034918905, + "learning_rate": 2.7033646045629145e-06, + "loss": 0.9739, + "step": 127656 + }, + { + "epoch": 1.54, + "grad_norm": 8.095793341886855, + "learning_rate": 2.7029651523637524e-06, + "loss": 0.7258, + "step": 127659 + }, + { + "epoch": 1.54, + "grad_norm": 6.218192073070157, + "learning_rate": 2.7025657250667448e-06, + "loss": 1.0688, + "step": 127662 + }, + { + "epoch": 1.54, + "grad_norm": 12.561093349085683, + "learning_rate": 2.702166322673252e-06, + "loss": 1.6025, + "step": 127665 + }, + { + "epoch": 1.54, + "grad_norm": 5.303668367169403, + "learning_rate": 2.701766945184633e-06, + "loss": 1.1274, + "step": 127668 + }, + { + "epoch": 1.54, + "grad_norm": 14.67712416561465, + "learning_rate": 2.7013675926022555e-06, + "loss": 1.5037, + "step": 127671 + }, + { + "epoch": 1.54, + "grad_norm": 8.025253850188003, + "learning_rate": 2.700968264927484e-06, + "loss": 1.1587, + "step": 127674 + }, + { + "epoch": 1.54, + "grad_norm": 9.468411890116984, + "learning_rate": 2.700568962161676e-06, + "loss": 0.9198, + "step": 127677 + }, + { + "epoch": 1.54, + "grad_norm": 8.435030592349053, + "learning_rate": 2.7001696843061966e-06, + "loss": 1.0629, + "step": 127680 + }, + { + "epoch": 1.54, + "grad_norm": 10.38338706256353, + "learning_rate": 2.6997704313624085e-06, + "loss": 1.4781, + "step": 127683 + }, + { + "epoch": 1.54, + "grad_norm": 2.2785672467916145, + "learning_rate": 2.6993712033316775e-06, + "loss": 1.0507, + "step": 127686 + }, + { + "epoch": 1.54, + "grad_norm": 5.813990642785789, + "learning_rate": 2.698972000215363e-06, + "loss": 0.8118, + "step": 127689 + }, + { + "epoch": 1.54, + "grad_norm": 3.096637822750139, + "learning_rate": 2.6985728220148245e-06, + "loss": 1.3812, + "step": 127692 + }, + { + "epoch": 1.54, + "grad_norm": 12.960458374775662, + "learning_rate": 2.6981736687314265e-06, + "loss": 1.2354, + "step": 127695 + }, + { + "epoch": 1.54, + "grad_norm": 4.761659054778202, + "learning_rate": 2.6977745403665313e-06, + "loss": 1.3174, + "step": 127698 + }, + { + "epoch": 1.54, + "grad_norm": 18.661858072308682, + "learning_rate": 2.6973754369215054e-06, + "loss": 1.3255, + "step": 127701 + }, + { + "epoch": 1.54, + "grad_norm": 6.1998772163715605, + "learning_rate": 2.6969763583977025e-06, + "loss": 1.4208, + "step": 127704 + }, + { + "epoch": 1.54, + "grad_norm": 7.255666201384122, + "learning_rate": 2.6965773047964926e-06, + "loss": 1.539, + "step": 127707 + }, + { + "epoch": 1.54, + "grad_norm": 7.798716323383268, + "learning_rate": 2.6961782761192314e-06, + "loss": 1.2513, + "step": 127710 + }, + { + "epoch": 1.54, + "grad_norm": 9.780564138274274, + "learning_rate": 2.6957792723672814e-06, + "loss": 0.9858, + "step": 127713 + }, + { + "epoch": 1.54, + "grad_norm": 7.791165247535879, + "learning_rate": 2.6953802935420105e-06, + "loss": 1.0109, + "step": 127716 + }, + { + "epoch": 1.54, + "grad_norm": 9.700497106185134, + "learning_rate": 2.694981339644771e-06, + "loss": 0.7749, + "step": 127719 + }, + { + "epoch": 1.54, + "grad_norm": 14.921167859107808, + "learning_rate": 2.694582410676929e-06, + "loss": 1.266, + "step": 127722 + }, + { + "epoch": 1.54, + "grad_norm": 6.679816586830311, + "learning_rate": 2.694183506639847e-06, + "loss": 0.9232, + "step": 127725 + }, + { + "epoch": 1.54, + "grad_norm": 3.9984506432850035, + "learning_rate": 2.693784627534888e-06, + "loss": 0.9823, + "step": 127728 + }, + { + "epoch": 1.54, + "grad_norm": 13.257905447950728, + "learning_rate": 2.6933857733634096e-06, + "loss": 1.1557, + "step": 127731 + }, + { + "epoch": 1.54, + "grad_norm": 5.126965174164245, + "learning_rate": 2.6929869441267707e-06, + "loss": 1.1339, + "step": 127734 + }, + { + "epoch": 1.54, + "grad_norm": 9.274497358792322, + "learning_rate": 2.6925881398263354e-06, + "loss": 1.1648, + "step": 127737 + }, + { + "epoch": 1.54, + "grad_norm": 5.331820910466757, + "learning_rate": 2.6921893604634653e-06, + "loss": 1.0478, + "step": 127740 + }, + { + "epoch": 1.54, + "grad_norm": 11.253502871709275, + "learning_rate": 2.691790606039523e-06, + "loss": 0.9283, + "step": 127743 + }, + { + "epoch": 1.54, + "grad_norm": 7.415108385470928, + "learning_rate": 2.6913918765558645e-06, + "loss": 1.0573, + "step": 127746 + }, + { + "epoch": 1.54, + "grad_norm": 5.313820374146792, + "learning_rate": 2.690993172013855e-06, + "loss": 0.9986, + "step": 127749 + }, + { + "epoch": 1.54, + "grad_norm": 2.721115099167023, + "learning_rate": 2.690594492414851e-06, + "loss": 1.0625, + "step": 127752 + }, + { + "epoch": 1.54, + "grad_norm": 17.641974646781634, + "learning_rate": 2.6901958377602177e-06, + "loss": 1.3629, + "step": 127755 + }, + { + "epoch": 1.54, + "grad_norm": 2.632648597041651, + "learning_rate": 2.68979720805131e-06, + "loss": 1.5859, + "step": 127758 + }, + { + "epoch": 1.54, + "grad_norm": 7.904319128447005, + "learning_rate": 2.6893986032894927e-06, + "loss": 0.942, + "step": 127761 + }, + { + "epoch": 1.54, + "grad_norm": 12.652784169890962, + "learning_rate": 2.6890000234761225e-06, + "loss": 0.687, + "step": 127764 + }, + { + "epoch": 1.54, + "grad_norm": 2.8352499389092958, + "learning_rate": 2.688601468612564e-06, + "loss": 1.5485, + "step": 127767 + }, + { + "epoch": 1.54, + "grad_norm": 5.573824840089161, + "learning_rate": 2.6882029387001763e-06, + "loss": 1.113, + "step": 127770 + }, + { + "epoch": 1.54, + "grad_norm": 5.071597940593779, + "learning_rate": 2.6878044337403197e-06, + "loss": 1.0749, + "step": 127773 + }, + { + "epoch": 1.54, + "grad_norm": 10.807871799501672, + "learning_rate": 2.687405953734349e-06, + "loss": 1.1831, + "step": 127776 + }, + { + "epoch": 1.54, + "grad_norm": 5.793385752615244, + "learning_rate": 2.687007498683628e-06, + "loss": 1.2001, + "step": 127779 + }, + { + "epoch": 1.54, + "grad_norm": 11.285912569591204, + "learning_rate": 2.6866090685895185e-06, + "loss": 1.3282, + "step": 127782 + }, + { + "epoch": 1.54, + "grad_norm": 4.526871735526646, + "learning_rate": 2.6862106634533757e-06, + "loss": 1.4152, + "step": 127785 + }, + { + "epoch": 1.54, + "grad_norm": 16.58941472407141, + "learning_rate": 2.685812283276561e-06, + "loss": 1.1569, + "step": 127788 + }, + { + "epoch": 1.54, + "grad_norm": 8.165999482623526, + "learning_rate": 2.6854139280604386e-06, + "loss": 1.3993, + "step": 127791 + }, + { + "epoch": 1.54, + "grad_norm": 6.84356965624836, + "learning_rate": 2.6850155978063595e-06, + "loss": 0.884, + "step": 127794 + }, + { + "epoch": 1.54, + "grad_norm": 5.328362604465723, + "learning_rate": 2.684617292515691e-06, + "loss": 1.2495, + "step": 127797 + }, + { + "epoch": 1.54, + "grad_norm": 48.87475594525324, + "learning_rate": 2.6842190121897858e-06, + "loss": 1.3321, + "step": 127800 + }, + { + "epoch": 1.54, + "grad_norm": 11.989287574538924, + "learning_rate": 2.683820756830006e-06, + "loss": 1.0821, + "step": 127803 + }, + { + "epoch": 1.54, + "grad_norm": 4.461437841205886, + "learning_rate": 2.68342252643771e-06, + "loss": 1.0763, + "step": 127806 + }, + { + "epoch": 1.54, + "grad_norm": 3.746413096286984, + "learning_rate": 2.6830243210142602e-06, + "loss": 1.1876, + "step": 127809 + }, + { + "epoch": 1.54, + "grad_norm": 4.519103836072494, + "learning_rate": 2.682626140561011e-06, + "loss": 1.329, + "step": 127812 + }, + { + "epoch": 1.54, + "grad_norm": 6.478715083882822, + "learning_rate": 2.682227985079325e-06, + "loss": 1.1809, + "step": 127815 + }, + { + "epoch": 1.54, + "grad_norm": 13.32462568524631, + "learning_rate": 2.6818298545705567e-06, + "loss": 1.3274, + "step": 127818 + }, + { + "epoch": 1.54, + "grad_norm": 4.809392381706272, + "learning_rate": 2.6814317490360677e-06, + "loss": 1.0852, + "step": 127821 + }, + { + "epoch": 1.54, + "grad_norm": 8.019218605467467, + "learning_rate": 2.6810336684772175e-06, + "loss": 1.237, + "step": 127824 + }, + { + "epoch": 1.54, + "grad_norm": 7.031178955334786, + "learning_rate": 2.6806356128953615e-06, + "loss": 1.0053, + "step": 127827 + }, + { + "epoch": 1.54, + "grad_norm": 8.419678973502842, + "learning_rate": 2.680237582291859e-06, + "loss": 0.8543, + "step": 127830 + }, + { + "epoch": 1.54, + "grad_norm": 9.822051535499604, + "learning_rate": 2.679839576668072e-06, + "loss": 0.8351, + "step": 127833 + }, + { + "epoch": 1.54, + "grad_norm": 7.019111458588626, + "learning_rate": 2.679441596025353e-06, + "loss": 1.3142, + "step": 127836 + }, + { + "epoch": 1.54, + "grad_norm": 12.528038063350678, + "learning_rate": 2.6790436403650666e-06, + "loss": 1.0959, + "step": 127839 + }, + { + "epoch": 1.54, + "grad_norm": 12.492712990331384, + "learning_rate": 2.678645709688563e-06, + "loss": 0.8945, + "step": 127842 + }, + { + "epoch": 1.54, + "grad_norm": 7.297110537478042, + "learning_rate": 2.678247803997205e-06, + "loss": 1.1011, + "step": 127845 + }, + { + "epoch": 1.54, + "grad_norm": 74.54739790300069, + "learning_rate": 2.6778499232923504e-06, + "loss": 1.2587, + "step": 127848 + }, + { + "epoch": 1.54, + "grad_norm": 4.940562787702231, + "learning_rate": 2.677452067575359e-06, + "loss": 1.1402, + "step": 127851 + }, + { + "epoch": 1.54, + "grad_norm": 6.427617395172409, + "learning_rate": 2.6770542368475837e-06, + "loss": 1.4272, + "step": 127854 + }, + { + "epoch": 1.54, + "grad_norm": 7.155289593163248, + "learning_rate": 2.676656431110387e-06, + "loss": 1.1169, + "step": 127857 + }, + { + "epoch": 1.54, + "grad_norm": 11.376441066935003, + "learning_rate": 2.676258650365121e-06, + "loss": 1.0927, + "step": 127860 + }, + { + "epoch": 1.54, + "grad_norm": 3.624845433180271, + "learning_rate": 2.6758608946131505e-06, + "loss": 1.1566, + "step": 127863 + }, + { + "epoch": 1.54, + "grad_norm": 14.083672879454703, + "learning_rate": 2.675463163855824e-06, + "loss": 1.007, + "step": 127866 + }, + { + "epoch": 1.54, + "grad_norm": 19.68517833440138, + "learning_rate": 2.6750654580945046e-06, + "loss": 1.3028, + "step": 127869 + }, + { + "epoch": 1.54, + "grad_norm": 16.00642128170017, + "learning_rate": 2.6746677773305473e-06, + "loss": 0.6332, + "step": 127872 + }, + { + "epoch": 1.54, + "grad_norm": 8.194591638366385, + "learning_rate": 2.674270121565311e-06, + "loss": 1.263, + "step": 127875 + }, + { + "epoch": 1.54, + "grad_norm": 74.63440314499151, + "learning_rate": 2.6738724908001546e-06, + "loss": 1.2402, + "step": 127878 + }, + { + "epoch": 1.54, + "grad_norm": 10.447908028170817, + "learning_rate": 2.673474885036433e-06, + "loss": 0.8035, + "step": 127881 + }, + { + "epoch": 1.54, + "grad_norm": 12.842271658920993, + "learning_rate": 2.6730773042754997e-06, + "loss": 0.9679, + "step": 127884 + }, + { + "epoch": 1.54, + "grad_norm": 7.016368839180711, + "learning_rate": 2.6726797485187137e-06, + "loss": 1.2178, + "step": 127887 + }, + { + "epoch": 1.54, + "grad_norm": 10.281342455031716, + "learning_rate": 2.672282217767437e-06, + "loss": 1.2568, + "step": 127890 + }, + { + "epoch": 1.54, + "grad_norm": 19.17746071357452, + "learning_rate": 2.6718847120230175e-06, + "loss": 1.1166, + "step": 127893 + }, + { + "epoch": 1.54, + "grad_norm": 10.294269910690124, + "learning_rate": 2.6714872312868156e-06, + "loss": 1.4872, + "step": 127896 + }, + { + "epoch": 1.54, + "grad_norm": 11.583998472254358, + "learning_rate": 2.6710897755601916e-06, + "loss": 1.2743, + "step": 127899 + }, + { + "epoch": 1.54, + "grad_norm": 19.275949913175534, + "learning_rate": 2.6706923448444943e-06, + "loss": 1.3657, + "step": 127902 + }, + { + "epoch": 1.54, + "grad_norm": 7.44464279206111, + "learning_rate": 2.670294939141088e-06, + "loss": 1.2981, + "step": 127905 + }, + { + "epoch": 1.54, + "grad_norm": 6.80573931200749, + "learning_rate": 2.669897558451322e-06, + "loss": 1.3353, + "step": 127908 + }, + { + "epoch": 1.54, + "grad_norm": 5.223905162127579, + "learning_rate": 2.669500202776556e-06, + "loss": 0.8958, + "step": 127911 + }, + { + "epoch": 1.54, + "grad_norm": 4.5931048663249, + "learning_rate": 2.6691028721181444e-06, + "loss": 0.9926, + "step": 127914 + }, + { + "epoch": 1.54, + "grad_norm": 10.060873828710019, + "learning_rate": 2.6687055664774462e-06, + "loss": 0.885, + "step": 127917 + }, + { + "epoch": 1.54, + "grad_norm": 4.421514484115344, + "learning_rate": 2.668308285855814e-06, + "loss": 1.219, + "step": 127920 + }, + { + "epoch": 1.54, + "grad_norm": 6.805795484330239, + "learning_rate": 2.667911030254606e-06, + "loss": 1.2168, + "step": 127923 + }, + { + "epoch": 1.54, + "grad_norm": 5.766647857561133, + "learning_rate": 2.6675137996751755e-06, + "loss": 1.3396, + "step": 127926 + }, + { + "epoch": 1.54, + "grad_norm": 9.039389341386936, + "learning_rate": 2.6671165941188782e-06, + "loss": 1.1192, + "step": 127929 + }, + { + "epoch": 1.54, + "grad_norm": 12.167700699140347, + "learning_rate": 2.6667194135870745e-06, + "loss": 1.3607, + "step": 127932 + }, + { + "epoch": 1.54, + "grad_norm": 26.446228447919637, + "learning_rate": 2.666322258081112e-06, + "loss": 0.9816, + "step": 127935 + }, + { + "epoch": 1.54, + "grad_norm": 6.34660431656097, + "learning_rate": 2.66592512760235e-06, + "loss": 0.8246, + "step": 127938 + }, + { + "epoch": 1.54, + "grad_norm": 6.899077670436127, + "learning_rate": 2.6655280221521474e-06, + "loss": 1.1141, + "step": 127941 + }, + { + "epoch": 1.54, + "grad_norm": 8.88318372408682, + "learning_rate": 2.665130941731856e-06, + "loss": 0.9624, + "step": 127944 + }, + { + "epoch": 1.54, + "grad_norm": 10.56682719652196, + "learning_rate": 2.6647338863428272e-06, + "loss": 1.2984, + "step": 127947 + }, + { + "epoch": 1.54, + "grad_norm": 14.54055907585306, + "learning_rate": 2.664336855986419e-06, + "loss": 1.0261, + "step": 127950 + }, + { + "epoch": 1.54, + "grad_norm": 4.19734816333976, + "learning_rate": 2.663939850663988e-06, + "loss": 1.2462, + "step": 127953 + }, + { + "epoch": 1.54, + "grad_norm": 10.644795795706687, + "learning_rate": 2.663542870376887e-06, + "loss": 0.9547, + "step": 127956 + }, + { + "epoch": 1.54, + "grad_norm": 2.6917896295551627, + "learning_rate": 2.663145915126475e-06, + "loss": 1.2214, + "step": 127959 + }, + { + "epoch": 1.54, + "grad_norm": 7.903063564139942, + "learning_rate": 2.6627489849141008e-06, + "loss": 0.6817, + "step": 127962 + }, + { + "epoch": 1.54, + "grad_norm": 11.710028149998365, + "learning_rate": 2.662352079741123e-06, + "loss": 1.228, + "step": 127965 + }, + { + "epoch": 1.54, + "grad_norm": 4.730867284405434, + "learning_rate": 2.661955199608892e-06, + "loss": 1.0307, + "step": 127968 + }, + { + "epoch": 1.54, + "grad_norm": 6.153387823306625, + "learning_rate": 2.661558344518769e-06, + "loss": 1.2934, + "step": 127971 + }, + { + "epoch": 1.54, + "grad_norm": 9.774833044856411, + "learning_rate": 2.6611615144721006e-06, + "loss": 0.9952, + "step": 127974 + }, + { + "epoch": 1.54, + "grad_norm": 7.961064594792478, + "learning_rate": 2.6607647094702437e-06, + "loss": 1.2481, + "step": 127977 + }, + { + "epoch": 1.54, + "grad_norm": 9.738225482267831, + "learning_rate": 2.6603679295145535e-06, + "loss": 1.3589, + "step": 127980 + }, + { + "epoch": 1.54, + "grad_norm": 17.481161151222835, + "learning_rate": 2.6599711746063873e-06, + "loss": 1.0134, + "step": 127983 + }, + { + "epoch": 1.54, + "grad_norm": 4.4069938575073655, + "learning_rate": 2.659574444747095e-06, + "loss": 0.8874, + "step": 127986 + }, + { + "epoch": 1.54, + "grad_norm": 14.531151145742315, + "learning_rate": 2.659177739938028e-06, + "loss": 1.145, + "step": 127989 + }, + { + "epoch": 1.54, + "grad_norm": 6.924062407242958, + "learning_rate": 2.658781060180544e-06, + "loss": 1.0988, + "step": 127992 + }, + { + "epoch": 1.54, + "grad_norm": 7.991176755985064, + "learning_rate": 2.6583844054759956e-06, + "loss": 1.2653, + "step": 127995 + }, + { + "epoch": 1.54, + "grad_norm": 8.934507610758265, + "learning_rate": 2.65798777582574e-06, + "loss": 1.0485, + "step": 127998 + }, + { + "epoch": 1.54, + "grad_norm": 5.265331066709596, + "learning_rate": 2.6575911712311244e-06, + "loss": 1.0872, + "step": 128001 + }, + { + "epoch": 1.54, + "grad_norm": 15.8020363095473, + "learning_rate": 2.657194591693505e-06, + "loss": 1.3438, + "step": 128004 + }, + { + "epoch": 1.54, + "grad_norm": 5.003653211459311, + "learning_rate": 2.656798037214239e-06, + "loss": 1.3567, + "step": 128007 + }, + { + "epoch": 1.54, + "grad_norm": 12.61054775524789, + "learning_rate": 2.6564015077946738e-06, + "loss": 1.2805, + "step": 128010 + }, + { + "epoch": 1.54, + "grad_norm": 5.719358244384033, + "learning_rate": 2.6560050034361674e-06, + "loss": 1.1036, + "step": 128013 + }, + { + "epoch": 1.54, + "grad_norm": 4.015747219852055, + "learning_rate": 2.6556085241400675e-06, + "loss": 0.9875, + "step": 128016 + }, + { + "epoch": 1.54, + "grad_norm": 5.619611399690236, + "learning_rate": 2.65521206990773e-06, + "loss": 1.1754, + "step": 128019 + }, + { + "epoch": 1.54, + "grad_norm": 37.18203938291435, + "learning_rate": 2.654815640740509e-06, + "loss": 1.1554, + "step": 128022 + }, + { + "epoch": 1.54, + "grad_norm": 3.9183492396843755, + "learning_rate": 2.654419236639759e-06, + "loss": 1.0048, + "step": 128025 + }, + { + "epoch": 1.54, + "grad_norm": 2.636016543209282, + "learning_rate": 2.6540228576068306e-06, + "loss": 1.0472, + "step": 128028 + }, + { + "epoch": 1.54, + "grad_norm": 8.308215712093432, + "learning_rate": 2.653626503643073e-06, + "loss": 1.082, + "step": 128031 + }, + { + "epoch": 1.54, + "grad_norm": 6.31460138128612, + "learning_rate": 2.6532301747498413e-06, + "loss": 1.1838, + "step": 128034 + }, + { + "epoch": 1.54, + "grad_norm": 3.616416197072691, + "learning_rate": 2.6528338709284896e-06, + "loss": 0.9868, + "step": 128037 + }, + { + "epoch": 1.54, + "grad_norm": 5.885044174680085, + "learning_rate": 2.6524375921803725e-06, + "loss": 0.8294, + "step": 128040 + }, + { + "epoch": 1.54, + "grad_norm": 5.998966063885583, + "learning_rate": 2.6520413385068357e-06, + "loss": 1.0669, + "step": 128043 + }, + { + "epoch": 1.54, + "grad_norm": 3.71987663488003, + "learning_rate": 2.6516451099092354e-06, + "loss": 1.052, + "step": 128046 + }, + { + "epoch": 1.54, + "grad_norm": 4.0607033848000285, + "learning_rate": 2.651248906388927e-06, + "loss": 1.4891, + "step": 128049 + }, + { + "epoch": 1.54, + "grad_norm": 7.059987753225033, + "learning_rate": 2.6508527279472586e-06, + "loss": 0.9691, + "step": 128052 + }, + { + "epoch": 1.54, + "grad_norm": 11.4585060452693, + "learning_rate": 2.6504565745855794e-06, + "loss": 1.3293, + "step": 128055 + }, + { + "epoch": 1.54, + "grad_norm": 5.397729703569766, + "learning_rate": 2.650060446305245e-06, + "loss": 1.1609, + "step": 128058 + }, + { + "epoch": 1.54, + "grad_norm": 6.711065976551642, + "learning_rate": 2.6496643431076077e-06, + "loss": 1.1357, + "step": 128061 + }, + { + "epoch": 1.54, + "grad_norm": 20.177158760473798, + "learning_rate": 2.649268264994017e-06, + "loss": 1.5095, + "step": 128064 + }, + { + "epoch": 1.54, + "grad_norm": 8.205858058293375, + "learning_rate": 2.6488722119658296e-06, + "loss": 1.4047, + "step": 128067 + }, + { + "epoch": 1.54, + "grad_norm": 8.822903981671969, + "learning_rate": 2.6484761840243934e-06, + "loss": 1.3318, + "step": 128070 + }, + { + "epoch": 1.54, + "grad_norm": 8.655127046377299, + "learning_rate": 2.6480801811710567e-06, + "loss": 1.2063, + "step": 128073 + }, + { + "epoch": 1.54, + "grad_norm": 15.377527826897332, + "learning_rate": 2.6476842034071747e-06, + "loss": 1.2365, + "step": 128076 + }, + { + "epoch": 1.54, + "grad_norm": 8.219408012989849, + "learning_rate": 2.6472882507341e-06, + "loss": 1.2981, + "step": 128079 + }, + { + "epoch": 1.54, + "grad_norm": 5.855400559319712, + "learning_rate": 2.6468923231531806e-06, + "loss": 1.06, + "step": 128082 + }, + { + "epoch": 1.54, + "grad_norm": 5.869639789626695, + "learning_rate": 2.6464964206657685e-06, + "loss": 1.1684, + "step": 128085 + }, + { + "epoch": 1.54, + "grad_norm": 12.785821989615508, + "learning_rate": 2.6461005432732155e-06, + "loss": 1.5212, + "step": 128088 + }, + { + "epoch": 1.54, + "grad_norm": 7.463673300646426, + "learning_rate": 2.6457046909768757e-06, + "loss": 1.1499, + "step": 128091 + }, + { + "epoch": 1.54, + "grad_norm": 13.863694611613969, + "learning_rate": 2.6453088637780967e-06, + "loss": 0.7043, + "step": 128094 + }, + { + "epoch": 1.54, + "grad_norm": 5.271127549016515, + "learning_rate": 2.6449130616782258e-06, + "loss": 1.3331, + "step": 128097 + }, + { + "epoch": 1.54, + "grad_norm": 3.6734937058839456, + "learning_rate": 2.6445172846786172e-06, + "loss": 1.3663, + "step": 128100 + }, + { + "epoch": 1.54, + "grad_norm": 12.09926966984506, + "learning_rate": 2.6441215327806226e-06, + "loss": 1.2912, + "step": 128103 + }, + { + "epoch": 1.54, + "grad_norm": 6.640142118537132, + "learning_rate": 2.643725805985594e-06, + "loss": 1.1897, + "step": 128106 + }, + { + "epoch": 1.54, + "grad_norm": 4.681428776118225, + "learning_rate": 2.643330104294877e-06, + "loss": 1.1104, + "step": 128109 + }, + { + "epoch": 1.54, + "grad_norm": 13.057851504714593, + "learning_rate": 2.6429344277098244e-06, + "loss": 1.3634, + "step": 128112 + }, + { + "epoch": 1.54, + "grad_norm": 3.5070673411117244, + "learning_rate": 2.642538776231789e-06, + "loss": 1.5631, + "step": 128115 + }, + { + "epoch": 1.54, + "grad_norm": 17.89851546565942, + "learning_rate": 2.6421431498621163e-06, + "loss": 1.0786, + "step": 128118 + }, + { + "epoch": 1.54, + "grad_norm": 6.625939421684444, + "learning_rate": 2.6417475486021614e-06, + "loss": 1.1118, + "step": 128121 + }, + { + "epoch": 1.54, + "grad_norm": 6.274924446094201, + "learning_rate": 2.6413519724532686e-06, + "loss": 1.0915, + "step": 128124 + }, + { + "epoch": 1.54, + "grad_norm": 4.5991363181064955, + "learning_rate": 2.6409564214167914e-06, + "loss": 1.0666, + "step": 128127 + }, + { + "epoch": 1.54, + "grad_norm": 7.474665391656434, + "learning_rate": 2.640560895494079e-06, + "loss": 1.234, + "step": 128130 + }, + { + "epoch": 1.54, + "grad_norm": 7.547160448285946, + "learning_rate": 2.640165394686485e-06, + "loss": 1.4303, + "step": 128133 + }, + { + "epoch": 1.54, + "grad_norm": 14.417765653183887, + "learning_rate": 2.639769918995354e-06, + "loss": 1.5196, + "step": 128136 + }, + { + "epoch": 1.54, + "grad_norm": 14.003961771236039, + "learning_rate": 2.6393744684220357e-06, + "loss": 1.3462, + "step": 128139 + }, + { + "epoch": 1.54, + "grad_norm": 9.090764285780304, + "learning_rate": 2.6389790429678796e-06, + "loss": 1.087, + "step": 128142 + }, + { + "epoch": 1.54, + "grad_norm": 5.006145975376569, + "learning_rate": 2.638583642634238e-06, + "loss": 1.1594, + "step": 128145 + }, + { + "epoch": 1.54, + "grad_norm": 11.31520586470298, + "learning_rate": 2.6381882674224614e-06, + "loss": 0.8819, + "step": 128148 + }, + { + "epoch": 1.54, + "grad_norm": 8.210893658966825, + "learning_rate": 2.6377929173338923e-06, + "loss": 1.196, + "step": 128151 + }, + { + "epoch": 1.54, + "grad_norm": 10.162752400959596, + "learning_rate": 2.637397592369886e-06, + "loss": 1.1239, + "step": 128154 + }, + { + "epoch": 1.54, + "grad_norm": 4.140909046075565, + "learning_rate": 2.6370022925317916e-06, + "loss": 1.0306, + "step": 128157 + }, + { + "epoch": 1.54, + "grad_norm": 3.327079531329811, + "learning_rate": 2.636607017820957e-06, + "loss": 0.9031, + "step": 128160 + }, + { + "epoch": 1.54, + "grad_norm": 3.1667574148142994, + "learning_rate": 2.6362117682387267e-06, + "loss": 1.1668, + "step": 128163 + }, + { + "epoch": 1.54, + "grad_norm": 7.70644434862215, + "learning_rate": 2.6358165437864525e-06, + "loss": 1.1312, + "step": 128166 + }, + { + "epoch": 1.54, + "grad_norm": 13.128521978471033, + "learning_rate": 2.6354213444654854e-06, + "loss": 1.3336, + "step": 128169 + }, + { + "epoch": 1.54, + "grad_norm": 4.855848097266141, + "learning_rate": 2.6350261702771717e-06, + "loss": 1.1483, + "step": 128172 + }, + { + "epoch": 1.54, + "grad_norm": 17.454410504260043, + "learning_rate": 2.6346310212228645e-06, + "loss": 1.3201, + "step": 128175 + }, + { + "epoch": 1.54, + "grad_norm": 10.018617497260411, + "learning_rate": 2.634235897303907e-06, + "loss": 1.3806, + "step": 128178 + }, + { + "epoch": 1.54, + "grad_norm": 10.876159709105018, + "learning_rate": 2.6338407985216487e-06, + "loss": 0.9926, + "step": 128181 + }, + { + "epoch": 1.54, + "grad_norm": 4.849458507440614, + "learning_rate": 2.633445724877437e-06, + "loss": 0.9482, + "step": 128184 + }, + { + "epoch": 1.54, + "grad_norm": 6.732897253756935, + "learning_rate": 2.633050676372624e-06, + "loss": 0.7323, + "step": 128187 + }, + { + "epoch": 1.54, + "grad_norm": 9.881845837441581, + "learning_rate": 2.6326556530085535e-06, + "loss": 0.923, + "step": 128190 + }, + { + "epoch": 1.54, + "grad_norm": 10.752643855813673, + "learning_rate": 2.632260654786576e-06, + "loss": 1.2251, + "step": 128193 + }, + { + "epoch": 1.54, + "grad_norm": 14.751506777421843, + "learning_rate": 2.631865681708038e-06, + "loss": 0.9895, + "step": 128196 + }, + { + "epoch": 1.54, + "grad_norm": 9.923449453897708, + "learning_rate": 2.6314707337742916e-06, + "loss": 1.2356, + "step": 128199 + }, + { + "epoch": 1.54, + "grad_norm": 5.643178354868283, + "learning_rate": 2.631075810986682e-06, + "loss": 1.3426, + "step": 128202 + }, + { + "epoch": 1.54, + "grad_norm": 5.06133001795753, + "learning_rate": 2.6306809133465527e-06, + "loss": 1.2142, + "step": 128205 + }, + { + "epoch": 1.54, + "grad_norm": 6.178675432155486, + "learning_rate": 2.6302860408552553e-06, + "loss": 0.9286, + "step": 128208 + }, + { + "epoch": 1.54, + "grad_norm": 4.366197418220153, + "learning_rate": 2.629891193514137e-06, + "loss": 1.0383, + "step": 128211 + }, + { + "epoch": 1.54, + "grad_norm": 11.634032461942795, + "learning_rate": 2.629496371324548e-06, + "loss": 1.4872, + "step": 128214 + }, + { + "epoch": 1.54, + "grad_norm": 8.768652782200984, + "learning_rate": 2.629101574287831e-06, + "loss": 1.05, + "step": 128217 + }, + { + "epoch": 1.54, + "grad_norm": 11.177672708856214, + "learning_rate": 2.6287068024053377e-06, + "loss": 0.883, + "step": 128220 + }, + { + "epoch": 1.54, + "grad_norm": 8.766923117179209, + "learning_rate": 2.6283120556784104e-06, + "loss": 1.1236, + "step": 128223 + }, + { + "epoch": 1.54, + "grad_norm": 14.378974408715413, + "learning_rate": 2.6279173341083985e-06, + "loss": 1.5258, + "step": 128226 + }, + { + "epoch": 1.54, + "grad_norm": 6.525881778181981, + "learning_rate": 2.627522637696652e-06, + "loss": 1.103, + "step": 128229 + }, + { + "epoch": 1.54, + "grad_norm": 40.444889589906936, + "learning_rate": 2.6271279664445125e-06, + "loss": 1.2902, + "step": 128232 + }, + { + "epoch": 1.54, + "grad_norm": 3.7653446780225517, + "learning_rate": 2.6267333203533296e-06, + "loss": 0.9379, + "step": 128235 + }, + { + "epoch": 1.54, + "grad_norm": 6.292402052307867, + "learning_rate": 2.62633869942445e-06, + "loss": 1.2264, + "step": 128238 + }, + { + "epoch": 1.54, + "grad_norm": 10.511465414792806, + "learning_rate": 2.625944103659224e-06, + "loss": 1.1932, + "step": 128241 + }, + { + "epoch": 1.54, + "grad_norm": 24.91520875828493, + "learning_rate": 2.6255495330589943e-06, + "loss": 1.2813, + "step": 128244 + }, + { + "epoch": 1.54, + "grad_norm": 6.4486768546987, + "learning_rate": 2.625154987625105e-06, + "loss": 0.921, + "step": 128247 + }, + { + "epoch": 1.54, + "grad_norm": 7.1155883327239975, + "learning_rate": 2.6247604673589045e-06, + "loss": 1.3225, + "step": 128250 + }, + { + "epoch": 1.54, + "grad_norm": 4.480565662594096, + "learning_rate": 2.62436597226174e-06, + "loss": 0.9262, + "step": 128253 + }, + { + "epoch": 1.54, + "grad_norm": 6.7310629429722315, + "learning_rate": 2.623971502334961e-06, + "loss": 1.2474, + "step": 128256 + }, + { + "epoch": 1.54, + "grad_norm": 6.914312205942538, + "learning_rate": 2.623577057579908e-06, + "loss": 1.3505, + "step": 128259 + }, + { + "epoch": 1.54, + "grad_norm": 15.46575303327786, + "learning_rate": 2.623182637997932e-06, + "loss": 1.5105, + "step": 128262 + }, + { + "epoch": 1.54, + "grad_norm": 5.588385949592595, + "learning_rate": 2.622788243590374e-06, + "loss": 1.239, + "step": 128265 + }, + { + "epoch": 1.54, + "grad_norm": 7.087176320011314, + "learning_rate": 2.6223938743585843e-06, + "loss": 1.6896, + "step": 128268 + }, + { + "epoch": 1.54, + "grad_norm": 17.438116621140246, + "learning_rate": 2.6219995303039047e-06, + "loss": 1.5096, + "step": 128271 + }, + { + "epoch": 1.54, + "grad_norm": 3.585532384321249, + "learning_rate": 2.6216052114276834e-06, + "loss": 1.2505, + "step": 128274 + }, + { + "epoch": 1.54, + "grad_norm": 15.297146922842417, + "learning_rate": 2.621210917731266e-06, + "loss": 1.3644, + "step": 128277 + }, + { + "epoch": 1.54, + "grad_norm": 10.32361290259923, + "learning_rate": 2.620816649215997e-06, + "loss": 0.8178, + "step": 128280 + }, + { + "epoch": 1.54, + "grad_norm": 7.282683800306133, + "learning_rate": 2.620422405883226e-06, + "loss": 1.0147, + "step": 128283 + }, + { + "epoch": 1.54, + "grad_norm": 2.691570915156903, + "learning_rate": 2.620028187734296e-06, + "loss": 1.295, + "step": 128286 + }, + { + "epoch": 1.54, + "grad_norm": 13.067463657798685, + "learning_rate": 2.6196339947705486e-06, + "loss": 1.2494, + "step": 128289 + }, + { + "epoch": 1.54, + "grad_norm": 7.709532089452529, + "learning_rate": 2.6192398269933305e-06, + "loss": 0.9834, + "step": 128292 + }, + { + "epoch": 1.54, + "grad_norm": 8.070488641614338, + "learning_rate": 2.6188456844039923e-06, + "loss": 1.3173, + "step": 128295 + }, + { + "epoch": 1.54, + "grad_norm": 4.784040148967978, + "learning_rate": 2.6184515670038724e-06, + "loss": 1.2538, + "step": 128298 + }, + { + "epoch": 1.54, + "grad_norm": 5.670306149286496, + "learning_rate": 2.6180574747943188e-06, + "loss": 1.3345, + "step": 128301 + }, + { + "epoch": 1.54, + "grad_norm": 7.010225344345779, + "learning_rate": 2.6176634077766783e-06, + "loss": 1.3056, + "step": 128304 + }, + { + "epoch": 1.54, + "grad_norm": 15.577118170422464, + "learning_rate": 2.617269365952291e-06, + "loss": 1.1486, + "step": 128307 + }, + { + "epoch": 1.54, + "grad_norm": 2.5746114489842022, + "learning_rate": 2.6168753493225062e-06, + "loss": 0.7625, + "step": 128310 + }, + { + "epoch": 1.54, + "grad_norm": 17.78815033935152, + "learning_rate": 2.616481357888664e-06, + "loss": 1.1702, + "step": 128313 + }, + { + "epoch": 1.54, + "grad_norm": 4.067606111422226, + "learning_rate": 2.616087391652111e-06, + "loss": 1.2997, + "step": 128316 + }, + { + "epoch": 1.54, + "grad_norm": 6.02706507609988, + "learning_rate": 2.615693450614193e-06, + "loss": 1.2289, + "step": 128319 + }, + { + "epoch": 1.54, + "grad_norm": 3.6655581344731925, + "learning_rate": 2.6152995347762557e-06, + "loss": 1.0612, + "step": 128322 + }, + { + "epoch": 1.54, + "grad_norm": 4.592091924590322, + "learning_rate": 2.614905644139638e-06, + "loss": 1.169, + "step": 128325 + }, + { + "epoch": 1.54, + "grad_norm": 6.365619407118198, + "learning_rate": 2.6145117787056905e-06, + "loss": 0.7967, + "step": 128328 + }, + { + "epoch": 1.54, + "grad_norm": 9.208612597905994, + "learning_rate": 2.614117938475751e-06, + "loss": 0.9874, + "step": 128331 + }, + { + "epoch": 1.54, + "grad_norm": 6.344615824314197, + "learning_rate": 2.6137241234511666e-06, + "loss": 1.0249, + "step": 128334 + }, + { + "epoch": 1.54, + "grad_norm": 6.072743957767622, + "learning_rate": 2.613330333633285e-06, + "loss": 1.2223, + "step": 128337 + }, + { + "epoch": 1.54, + "grad_norm": 6.960314898176086, + "learning_rate": 2.612936569023442e-06, + "loss": 1.0732, + "step": 128340 + }, + { + "epoch": 1.54, + "grad_norm": 7.6516302635607385, + "learning_rate": 2.612542829622986e-06, + "loss": 1.1535, + "step": 128343 + }, + { + "epoch": 1.54, + "grad_norm": 5.458087592832083, + "learning_rate": 2.6121491154332645e-06, + "loss": 1.2762, + "step": 128346 + }, + { + "epoch": 1.54, + "grad_norm": 19.842081974793786, + "learning_rate": 2.611755426455613e-06, + "loss": 1.2018, + "step": 128349 + }, + { + "epoch": 1.54, + "grad_norm": 6.75791264955317, + "learning_rate": 2.6113617626913823e-06, + "loss": 1.1915, + "step": 128352 + }, + { + "epoch": 1.54, + "grad_norm": 7.715685928587227, + "learning_rate": 2.6109681241419094e-06, + "loss": 1.0272, + "step": 128355 + }, + { + "epoch": 1.54, + "grad_norm": 3.0079210813642874, + "learning_rate": 2.61057451080854e-06, + "loss": 1.1958, + "step": 128358 + }, + { + "epoch": 1.54, + "grad_norm": 5.331961384555973, + "learning_rate": 2.610180922692619e-06, + "loss": 0.9636, + "step": 128361 + }, + { + "epoch": 1.54, + "grad_norm": 17.230749236204, + "learning_rate": 2.6097873597954914e-06, + "loss": 1.2992, + "step": 128364 + }, + { + "epoch": 1.54, + "grad_norm": 10.87219091540153, + "learning_rate": 2.6093938221184955e-06, + "loss": 1.2614, + "step": 128367 + }, + { + "epoch": 1.54, + "grad_norm": 11.3117401622661, + "learning_rate": 2.609000309662979e-06, + "loss": 1.3557, + "step": 128370 + }, + { + "epoch": 1.54, + "grad_norm": 64.55898146907379, + "learning_rate": 2.6086068224302786e-06, + "loss": 1.2821, + "step": 128373 + }, + { + "epoch": 1.54, + "grad_norm": 2.1548995745028967, + "learning_rate": 2.608213360421744e-06, + "loss": 0.8333, + "step": 128376 + }, + { + "epoch": 1.54, + "grad_norm": 19.114908040704425, + "learning_rate": 2.6078199236387113e-06, + "loss": 1.1869, + "step": 128379 + }, + { + "epoch": 1.54, + "grad_norm": 8.000454068170756, + "learning_rate": 2.607426512082527e-06, + "loss": 1.089, + "step": 128382 + }, + { + "epoch": 1.54, + "grad_norm": 10.178729667552602, + "learning_rate": 2.6070331257545335e-06, + "loss": 1.4232, + "step": 128385 + }, + { + "epoch": 1.54, + "grad_norm": 14.929868924361402, + "learning_rate": 2.606639764656076e-06, + "loss": 1.1006, + "step": 128388 + }, + { + "epoch": 1.54, + "grad_norm": 8.59057412638849, + "learning_rate": 2.6062464287884903e-06, + "loss": 0.9776, + "step": 128391 + }, + { + "epoch": 1.54, + "grad_norm": 9.345893124931976, + "learning_rate": 2.6058531181531256e-06, + "loss": 1.1555, + "step": 128394 + }, + { + "epoch": 1.54, + "grad_norm": 21.27360035253614, + "learning_rate": 2.605459832751318e-06, + "loss": 1.1953, + "step": 128397 + }, + { + "epoch": 1.54, + "grad_norm": 6.020257263109079, + "learning_rate": 2.6050665725844126e-06, + "loss": 1.4975, + "step": 128400 + }, + { + "epoch": 1.54, + "grad_norm": 31.37094044352057, + "learning_rate": 2.6046733376537536e-06, + "loss": 1.0172, + "step": 128403 + }, + { + "epoch": 1.54, + "grad_norm": 4.256751252332993, + "learning_rate": 2.6042801279606787e-06, + "loss": 1.2758, + "step": 128406 + }, + { + "epoch": 1.54, + "grad_norm": 3.3024369572210155, + "learning_rate": 2.6038869435065316e-06, + "loss": 1.0912, + "step": 128409 + }, + { + "epoch": 1.54, + "grad_norm": 8.651061346495185, + "learning_rate": 2.6034937842926567e-06, + "loss": 1.5729, + "step": 128412 + }, + { + "epoch": 1.54, + "grad_norm": 16.598682071880823, + "learning_rate": 2.6031006503203904e-06, + "loss": 1.3424, + "step": 128415 + }, + { + "epoch": 1.54, + "grad_norm": 9.690820790841375, + "learning_rate": 2.6027075415910806e-06, + "loss": 1.3945, + "step": 128418 + }, + { + "epoch": 1.54, + "grad_norm": 13.704415127382374, + "learning_rate": 2.6023144581060623e-06, + "loss": 1.0522, + "step": 128421 + }, + { + "epoch": 1.54, + "grad_norm": 11.934594631190604, + "learning_rate": 2.6019213998666806e-06, + "loss": 0.9834, + "step": 128424 + }, + { + "epoch": 1.54, + "grad_norm": 6.338064584876395, + "learning_rate": 2.6015283668742763e-06, + "loss": 1.167, + "step": 128427 + }, + { + "epoch": 1.54, + "grad_norm": 13.837492775885524, + "learning_rate": 2.6011353591301936e-06, + "loss": 1.2812, + "step": 128430 + }, + { + "epoch": 1.54, + "grad_norm": 16.404230752867047, + "learning_rate": 2.6007423766357676e-06, + "loss": 1.0318, + "step": 128433 + }, + { + "epoch": 1.54, + "grad_norm": 8.864790410818891, + "learning_rate": 2.600349419392345e-06, + "loss": 0.9718, + "step": 128436 + }, + { + "epoch": 1.54, + "grad_norm": 6.104149219846217, + "learning_rate": 2.5999564874012627e-06, + "loss": 1.1385, + "step": 128439 + }, + { + "epoch": 1.54, + "grad_norm": 5.614663868004249, + "learning_rate": 2.599563580663863e-06, + "loss": 0.9313, + "step": 128442 + }, + { + "epoch": 1.54, + "grad_norm": 26.90133437889427, + "learning_rate": 2.59917069918149e-06, + "loss": 1.2721, + "step": 128445 + }, + { + "epoch": 1.54, + "grad_norm": 5.02348474865224, + "learning_rate": 2.598777842955479e-06, + "loss": 1.2371, + "step": 128448 + }, + { + "epoch": 1.54, + "grad_norm": 3.8014977918692083, + "learning_rate": 2.5983850119871736e-06, + "loss": 1.191, + "step": 128451 + }, + { + "epoch": 1.54, + "grad_norm": 19.161340020053366, + "learning_rate": 2.597992206277916e-06, + "loss": 0.869, + "step": 128454 + }, + { + "epoch": 1.54, + "grad_norm": 3.6436814473930386, + "learning_rate": 2.5975994258290448e-06, + "loss": 1.2845, + "step": 128457 + }, + { + "epoch": 1.54, + "grad_norm": 6.286075558241734, + "learning_rate": 2.5972066706418987e-06, + "loss": 1.1117, + "step": 128460 + }, + { + "epoch": 1.54, + "grad_norm": 14.069895364268385, + "learning_rate": 2.5968139407178194e-06, + "loss": 0.8113, + "step": 128463 + }, + { + "epoch": 1.54, + "grad_norm": 9.445023439870829, + "learning_rate": 2.5964212360581465e-06, + "loss": 1.0293, + "step": 128466 + }, + { + "epoch": 1.54, + "grad_norm": 9.570982121568415, + "learning_rate": 2.5960285566642217e-06, + "loss": 1.0812, + "step": 128469 + }, + { + "epoch": 1.54, + "grad_norm": 11.634427146459373, + "learning_rate": 2.595635902537388e-06, + "loss": 1.0467, + "step": 128472 + }, + { + "epoch": 1.54, + "grad_norm": 25.425196553011357, + "learning_rate": 2.595243273678979e-06, + "loss": 1.0055, + "step": 128475 + }, + { + "epoch": 1.54, + "grad_norm": 11.714579978637286, + "learning_rate": 2.59485067009034e-06, + "loss": 1.2123, + "step": 128478 + }, + { + "epoch": 1.54, + "grad_norm": 6.439267675469157, + "learning_rate": 2.5944580917728058e-06, + "loss": 1.401, + "step": 128481 + }, + { + "epoch": 1.54, + "grad_norm": 5.853084686130893, + "learning_rate": 2.5940655387277216e-06, + "loss": 1.3131, + "step": 128484 + }, + { + "epoch": 1.55, + "grad_norm": 43.85473527336014, + "learning_rate": 2.59367301095642e-06, + "loss": 1.3302, + "step": 128487 + }, + { + "epoch": 1.55, + "grad_norm": 5.241479434054251, + "learning_rate": 2.593280508460246e-06, + "loss": 1.2184, + "step": 128490 + }, + { + "epoch": 1.55, + "grad_norm": 3.312511278519996, + "learning_rate": 2.5928880312405367e-06, + "loss": 0.9579, + "step": 128493 + }, + { + "epoch": 1.55, + "grad_norm": 18.342343181369575, + "learning_rate": 2.592495579298636e-06, + "loss": 0.9629, + "step": 128496 + }, + { + "epoch": 1.55, + "grad_norm": 6.400412908739534, + "learning_rate": 2.592103152635881e-06, + "loss": 1.3836, + "step": 128499 + }, + { + "epoch": 1.55, + "grad_norm": 9.18123940983483, + "learning_rate": 2.5917107512536045e-06, + "loss": 1.0288, + "step": 128502 + }, + { + "epoch": 1.55, + "grad_norm": 4.792002955717449, + "learning_rate": 2.5913183751531524e-06, + "loss": 0.8921, + "step": 128505 + }, + { + "epoch": 1.55, + "grad_norm": 9.623014278451437, + "learning_rate": 2.5909260243358613e-06, + "loss": 1.3261, + "step": 128508 + }, + { + "epoch": 1.55, + "grad_norm": 18.122583959822492, + "learning_rate": 2.5905336988030737e-06, + "loss": 1.2804, + "step": 128511 + }, + { + "epoch": 1.55, + "grad_norm": 3.1602779293379246, + "learning_rate": 2.590141398556123e-06, + "loss": 1.3769, + "step": 128514 + }, + { + "epoch": 1.55, + "grad_norm": 3.203922787452092, + "learning_rate": 2.5897491235963514e-06, + "loss": 0.9416, + "step": 128517 + }, + { + "epoch": 1.55, + "grad_norm": 7.334550399000175, + "learning_rate": 2.5893568739250983e-06, + "loss": 1.1628, + "step": 128520 + }, + { + "epoch": 1.55, + "grad_norm": 14.42231262911039, + "learning_rate": 2.588964649543699e-06, + "loss": 0.8353, + "step": 128523 + }, + { + "epoch": 1.55, + "grad_norm": 7.367425399263466, + "learning_rate": 2.588572450453497e-06, + "loss": 1.0762, + "step": 128526 + }, + { + "epoch": 1.55, + "grad_norm": 12.768510868836358, + "learning_rate": 2.588180276655824e-06, + "loss": 0.9405, + "step": 128529 + }, + { + "epoch": 1.55, + "grad_norm": 12.65056841770047, + "learning_rate": 2.5877881281520222e-06, + "loss": 1.1368, + "step": 128532 + }, + { + "epoch": 1.55, + "grad_norm": 12.613651514225095, + "learning_rate": 2.58739600494343e-06, + "loss": 1.2023, + "step": 128535 + }, + { + "epoch": 1.55, + "grad_norm": 15.93573238756519, + "learning_rate": 2.587003907031388e-06, + "loss": 1.4273, + "step": 128538 + }, + { + "epoch": 1.55, + "grad_norm": 13.632082511952083, + "learning_rate": 2.5866118344172318e-06, + "loss": 1.3368, + "step": 128541 + }, + { + "epoch": 1.55, + "grad_norm": 10.6621982408695, + "learning_rate": 2.5862197871022955e-06, + "loss": 1.166, + "step": 128544 + }, + { + "epoch": 1.55, + "grad_norm": 11.39562593838939, + "learning_rate": 2.5858277650879217e-06, + "loss": 0.9779, + "step": 128547 + }, + { + "epoch": 1.55, + "grad_norm": 7.266456036229291, + "learning_rate": 2.585435768375446e-06, + "loss": 0.9861, + "step": 128550 + }, + { + "epoch": 1.55, + "grad_norm": 17.393019147462347, + "learning_rate": 2.585043796966211e-06, + "loss": 1.2798, + "step": 128553 + }, + { + "epoch": 1.55, + "grad_norm": 2.6986522320858555, + "learning_rate": 2.584651850861547e-06, + "loss": 1.2393, + "step": 128556 + }, + { + "epoch": 1.55, + "grad_norm": 7.6141222450265955, + "learning_rate": 2.5842599300627957e-06, + "loss": 1.022, + "step": 128559 + }, + { + "epoch": 1.55, + "grad_norm": 8.926695738540548, + "learning_rate": 2.5838680345712973e-06, + "loss": 1.0501, + "step": 128562 + }, + { + "epoch": 1.55, + "grad_norm": 4.948791587291549, + "learning_rate": 2.5834761643883856e-06, + "loss": 0.827, + "step": 128565 + }, + { + "epoch": 1.55, + "grad_norm": 3.8244202685359587, + "learning_rate": 2.5830843195153953e-06, + "loss": 0.9091, + "step": 128568 + }, + { + "epoch": 1.55, + "grad_norm": 10.992999114211557, + "learning_rate": 2.5826924999536675e-06, + "loss": 1.2174, + "step": 128571 + }, + { + "epoch": 1.55, + "grad_norm": 5.6353186655734895, + "learning_rate": 2.582300705704538e-06, + "loss": 1.2004, + "step": 128574 + }, + { + "epoch": 1.55, + "grad_norm": 4.737220243915359, + "learning_rate": 2.581908936769344e-06, + "loss": 1.2155, + "step": 128577 + }, + { + "epoch": 1.55, + "grad_norm": 15.494031499540721, + "learning_rate": 2.5815171931494253e-06, + "loss": 1.0004, + "step": 128580 + }, + { + "epoch": 1.55, + "grad_norm": 5.090347117762469, + "learning_rate": 2.5811254748461178e-06, + "loss": 1.271, + "step": 128583 + }, + { + "epoch": 1.55, + "grad_norm": 7.678668595461869, + "learning_rate": 2.5807337818607525e-06, + "loss": 0.9496, + "step": 128586 + }, + { + "epoch": 1.55, + "grad_norm": 11.333500834062422, + "learning_rate": 2.5803421141946717e-06, + "loss": 1.1392, + "step": 128589 + }, + { + "epoch": 1.55, + "grad_norm": 3.832375859255556, + "learning_rate": 2.579950471849213e-06, + "loss": 1.6953, + "step": 128592 + }, + { + "epoch": 1.55, + "grad_norm": 7.453193413584107, + "learning_rate": 2.5795588548257066e-06, + "loss": 1.3561, + "step": 128595 + }, + { + "epoch": 1.55, + "grad_norm": 2.442452221978396, + "learning_rate": 2.579167263125495e-06, + "loss": 1.1802, + "step": 128598 + }, + { + "epoch": 1.55, + "grad_norm": 8.212385440134696, + "learning_rate": 2.578775696749911e-06, + "loss": 1.0315, + "step": 128601 + }, + { + "epoch": 1.55, + "grad_norm": 6.789964790803909, + "learning_rate": 2.5783841557002963e-06, + "loss": 1.4265, + "step": 128604 + }, + { + "epoch": 1.55, + "grad_norm": 13.913888793956891, + "learning_rate": 2.577992639977983e-06, + "loss": 0.9758, + "step": 128607 + }, + { + "epoch": 1.55, + "grad_norm": 17.438850213904328, + "learning_rate": 2.5776011495843044e-06, + "loss": 1.5859, + "step": 128610 + }, + { + "epoch": 1.55, + "grad_norm": 4.36578310025149, + "learning_rate": 2.5772096845205996e-06, + "loss": 1.2838, + "step": 128613 + }, + { + "epoch": 1.55, + "grad_norm": 10.673952271002523, + "learning_rate": 2.576818244788204e-06, + "loss": 1.0867, + "step": 128616 + }, + { + "epoch": 1.55, + "grad_norm": 6.45480367549285, + "learning_rate": 2.5764268303884575e-06, + "loss": 1.1584, + "step": 128619 + }, + { + "epoch": 1.55, + "grad_norm": 13.77807556582409, + "learning_rate": 2.5760354413226883e-06, + "loss": 1.3199, + "step": 128622 + }, + { + "epoch": 1.55, + "grad_norm": 10.079164715087908, + "learning_rate": 2.5756440775922396e-06, + "loss": 1.533, + "step": 128625 + }, + { + "epoch": 1.55, + "grad_norm": 9.47335798396431, + "learning_rate": 2.5752527391984415e-06, + "loss": 0.9497, + "step": 128628 + }, + { + "epoch": 1.55, + "grad_norm": 8.716229819200455, + "learning_rate": 2.5748614261426295e-06, + "loss": 1.3154, + "step": 128631 + }, + { + "epoch": 1.55, + "grad_norm": 3.0949142348624554, + "learning_rate": 2.574470138426146e-06, + "loss": 1.1414, + "step": 128634 + }, + { + "epoch": 1.55, + "grad_norm": 30.880081384660823, + "learning_rate": 2.5740788760503164e-06, + "loss": 0.8035, + "step": 128637 + }, + { + "epoch": 1.55, + "grad_norm": 5.338299765589788, + "learning_rate": 2.573687639016482e-06, + "loss": 1.2516, + "step": 128640 + }, + { + "epoch": 1.55, + "grad_norm": 11.890608626917157, + "learning_rate": 2.5732964273259762e-06, + "loss": 1.4104, + "step": 128643 + }, + { + "epoch": 1.55, + "grad_norm": 7.945463818472721, + "learning_rate": 2.572905240980138e-06, + "loss": 0.9821, + "step": 128646 + }, + { + "epoch": 1.55, + "grad_norm": 7.910159680018286, + "learning_rate": 2.572514079980298e-06, + "loss": 1.1881, + "step": 128649 + }, + { + "epoch": 1.55, + "grad_norm": 15.168036043092705, + "learning_rate": 2.5721229443277904e-06, + "loss": 1.156, + "step": 128652 + }, + { + "epoch": 1.55, + "grad_norm": 3.8606234144720757, + "learning_rate": 2.5717318340239506e-06, + "loss": 1.1899, + "step": 128655 + }, + { + "epoch": 1.55, + "grad_norm": 16.050886216264544, + "learning_rate": 2.571340749070116e-06, + "loss": 1.2421, + "step": 128658 + }, + { + "epoch": 1.55, + "grad_norm": 6.163603663643163, + "learning_rate": 2.570949689467621e-06, + "loss": 1.2603, + "step": 128661 + }, + { + "epoch": 1.55, + "grad_norm": 3.6104306637165764, + "learning_rate": 2.5705586552177964e-06, + "loss": 1.3984, + "step": 128664 + }, + { + "epoch": 1.55, + "grad_norm": 9.598258012520958, + "learning_rate": 2.5701676463219815e-06, + "loss": 1.407, + "step": 128667 + }, + { + "epoch": 1.55, + "grad_norm": 1.9904874003258572, + "learning_rate": 2.569776662781506e-06, + "loss": 1.6793, + "step": 128670 + }, + { + "epoch": 1.55, + "grad_norm": 5.665643458188026, + "learning_rate": 2.569385704597709e-06, + "loss": 1.2242, + "step": 128673 + }, + { + "epoch": 1.55, + "grad_norm": 13.86224492480018, + "learning_rate": 2.568994771771919e-06, + "loss": 1.3858, + "step": 128676 + }, + { + "epoch": 1.55, + "grad_norm": 10.588414064151582, + "learning_rate": 2.5686038643054734e-06, + "loss": 0.9768, + "step": 128679 + }, + { + "epoch": 1.55, + "grad_norm": 18.43995016489263, + "learning_rate": 2.568212982199706e-06, + "loss": 1.3659, + "step": 128682 + }, + { + "epoch": 1.55, + "grad_norm": 5.717405761432418, + "learning_rate": 2.567822125455951e-06, + "loss": 1.4076, + "step": 128685 + }, + { + "epoch": 1.55, + "grad_norm": 4.253564614064753, + "learning_rate": 2.5674312940755443e-06, + "loss": 1.2385, + "step": 128688 + }, + { + "epoch": 1.55, + "grad_norm": 5.9828737653417665, + "learning_rate": 2.5670404880598175e-06, + "loss": 1.3239, + "step": 128691 + }, + { + "epoch": 1.55, + "grad_norm": 12.277870191199511, + "learning_rate": 2.5666497074101017e-06, + "loss": 1.2222, + "step": 128694 + }, + { + "epoch": 1.55, + "grad_norm": 5.718674311755013, + "learning_rate": 2.5662589521277335e-06, + "loss": 0.9662, + "step": 128697 + }, + { + "epoch": 1.55, + "grad_norm": 6.307377450436065, + "learning_rate": 2.5658682222140473e-06, + "loss": 0.9572, + "step": 128700 + }, + { + "epoch": 1.55, + "grad_norm": 5.231019516671098, + "learning_rate": 2.565477517670374e-06, + "loss": 1.4544, + "step": 128703 + }, + { + "epoch": 1.55, + "grad_norm": 4.339917089105303, + "learning_rate": 2.5650868384980466e-06, + "loss": 1.4607, + "step": 128706 + }, + { + "epoch": 1.55, + "grad_norm": 5.2640325410212006, + "learning_rate": 2.5646961846983997e-06, + "loss": 1.3701, + "step": 128709 + }, + { + "epoch": 1.55, + "grad_norm": 8.564613526316903, + "learning_rate": 2.5643055562727704e-06, + "loss": 1.0971, + "step": 128712 + }, + { + "epoch": 1.55, + "grad_norm": 10.387638232754647, + "learning_rate": 2.5639149532224872e-06, + "loss": 1.1237, + "step": 128715 + }, + { + "epoch": 1.55, + "grad_norm": 11.683658403014578, + "learning_rate": 2.5635243755488813e-06, + "loss": 1.1554, + "step": 128718 + }, + { + "epoch": 1.55, + "grad_norm": 6.063538387671039, + "learning_rate": 2.5631338232532887e-06, + "loss": 1.4372, + "step": 128721 + }, + { + "epoch": 1.55, + "grad_norm": 16.920003578549814, + "learning_rate": 2.5627432963370403e-06, + "loss": 1.251, + "step": 128724 + }, + { + "epoch": 1.55, + "grad_norm": 9.307628465394426, + "learning_rate": 2.562352794801474e-06, + "loss": 1.1157, + "step": 128727 + }, + { + "epoch": 1.55, + "grad_norm": 7.963500120548238, + "learning_rate": 2.5619623186479147e-06, + "loss": 1.3092, + "step": 128730 + }, + { + "epoch": 1.55, + "grad_norm": 6.27244988949413, + "learning_rate": 2.5615718678777026e-06, + "loss": 1.0395, + "step": 128733 + }, + { + "epoch": 1.55, + "grad_norm": 4.704985916558553, + "learning_rate": 2.5611814424921634e-06, + "loss": 1.573, + "step": 128736 + }, + { + "epoch": 1.55, + "grad_norm": 10.117381728469558, + "learning_rate": 2.560791042492632e-06, + "loss": 1.1564, + "step": 128739 + }, + { + "epoch": 1.55, + "grad_norm": 5.216081179217385, + "learning_rate": 2.560400667880445e-06, + "loss": 0.8073, + "step": 128742 + }, + { + "epoch": 1.55, + "grad_norm": 6.627809418219237, + "learning_rate": 2.560010318656927e-06, + "loss": 0.9956, + "step": 128745 + }, + { + "epoch": 1.55, + "grad_norm": 34.689696738065855, + "learning_rate": 2.5596199948234146e-06, + "loss": 1.1126, + "step": 128748 + }, + { + "epoch": 1.55, + "grad_norm": 6.303565422352164, + "learning_rate": 2.559229696381238e-06, + "loss": 1.2225, + "step": 128751 + }, + { + "epoch": 1.55, + "grad_norm": 7.091061323829309, + "learning_rate": 2.558839423331735e-06, + "loss": 1.1806, + "step": 128754 + }, + { + "epoch": 1.55, + "grad_norm": 7.802011201710164, + "learning_rate": 2.558449175676231e-06, + "loss": 1.2003, + "step": 128757 + }, + { + "epoch": 1.55, + "grad_norm": 7.779213139074842, + "learning_rate": 2.5580589534160573e-06, + "loss": 1.3976, + "step": 128760 + }, + { + "epoch": 1.55, + "grad_norm": 7.547813523875013, + "learning_rate": 2.5576687565525482e-06, + "loss": 1.1, + "step": 128763 + }, + { + "epoch": 1.55, + "grad_norm": 6.0921915288868425, + "learning_rate": 2.557278585087034e-06, + "loss": 0.8302, + "step": 128766 + }, + { + "epoch": 1.55, + "grad_norm": 2.697259772427829, + "learning_rate": 2.5568884390208515e-06, + "loss": 1.055, + "step": 128769 + }, + { + "epoch": 1.55, + "grad_norm": 12.090729267159917, + "learning_rate": 2.556498318355324e-06, + "loss": 1.3897, + "step": 128772 + }, + { + "epoch": 1.55, + "grad_norm": 3.9031084338085327, + "learning_rate": 2.556108223091789e-06, + "loss": 1.1913, + "step": 128775 + }, + { + "epoch": 1.55, + "grad_norm": 9.345272551136798, + "learning_rate": 2.5557181532315733e-06, + "loss": 1.2958, + "step": 128778 + }, + { + "epoch": 1.55, + "grad_norm": 6.045263289788393, + "learning_rate": 2.5553281087760116e-06, + "loss": 0.6925, + "step": 128781 + }, + { + "epoch": 1.55, + "grad_norm": 8.052304372375062, + "learning_rate": 2.554938089726432e-06, + "loss": 1.1613, + "step": 128784 + }, + { + "epoch": 1.55, + "grad_norm": 12.615046746918834, + "learning_rate": 2.5545480960841674e-06, + "loss": 0.9362, + "step": 128787 + }, + { + "epoch": 1.55, + "grad_norm": 11.49047640061469, + "learning_rate": 2.5541581278505467e-06, + "loss": 1.085, + "step": 128790 + }, + { + "epoch": 1.55, + "grad_norm": 8.377215762490941, + "learning_rate": 2.5537681850269036e-06, + "loss": 1.1835, + "step": 128793 + }, + { + "epoch": 1.55, + "grad_norm": 4.356048511566402, + "learning_rate": 2.5533782676145703e-06, + "loss": 0.7083, + "step": 128796 + }, + { + "epoch": 1.55, + "grad_norm": 8.193389701521806, + "learning_rate": 2.552988375614874e-06, + "loss": 1.0829, + "step": 128799 + }, + { + "epoch": 1.55, + "grad_norm": 6.587535814759044, + "learning_rate": 2.5525985090291438e-06, + "loss": 1.0897, + "step": 128802 + }, + { + "epoch": 1.55, + "grad_norm": 7.506769837320574, + "learning_rate": 2.552208667858712e-06, + "loss": 0.9363, + "step": 128805 + }, + { + "epoch": 1.55, + "grad_norm": 17.473018522893405, + "learning_rate": 2.5518188521049127e-06, + "loss": 1.3193, + "step": 128808 + }, + { + "epoch": 1.55, + "grad_norm": 7.370614101677119, + "learning_rate": 2.5514290617690696e-06, + "loss": 1.146, + "step": 128811 + }, + { + "epoch": 1.55, + "grad_norm": 11.978155157008205, + "learning_rate": 2.5510392968525154e-06, + "loss": 1.3345, + "step": 128814 + }, + { + "epoch": 1.55, + "grad_norm": 4.755123534758064, + "learning_rate": 2.5506495573565846e-06, + "loss": 1.3561, + "step": 128817 + }, + { + "epoch": 1.55, + "grad_norm": 5.922701686477446, + "learning_rate": 2.5502598432826008e-06, + "loss": 1.213, + "step": 128820 + }, + { + "epoch": 1.55, + "grad_norm": 7.262011259715132, + "learning_rate": 2.5498701546319004e-06, + "loss": 0.9761, + "step": 128823 + }, + { + "epoch": 1.55, + "grad_norm": 9.895182137242111, + "learning_rate": 2.549480491405806e-06, + "loss": 1.2308, + "step": 128826 + }, + { + "epoch": 1.55, + "grad_norm": 6.81577011167899, + "learning_rate": 2.54909085360565e-06, + "loss": 1.0959, + "step": 128829 + }, + { + "epoch": 1.55, + "grad_norm": 22.233241438610218, + "learning_rate": 2.548701241232765e-06, + "loss": 1.2634, + "step": 128832 + }, + { + "epoch": 1.55, + "grad_norm": 8.889903179441776, + "learning_rate": 2.54831165428848e-06, + "loss": 1.2099, + "step": 128835 + }, + { + "epoch": 1.55, + "grad_norm": 15.005625166210429, + "learning_rate": 2.547922092774121e-06, + "loss": 1.1743, + "step": 128838 + }, + { + "epoch": 1.55, + "grad_norm": 4.77782037547279, + "learning_rate": 2.5475325566910227e-06, + "loss": 1.1076, + "step": 128841 + }, + { + "epoch": 1.55, + "grad_norm": 6.487214231283537, + "learning_rate": 2.5471430460405087e-06, + "loss": 0.8678, + "step": 128844 + }, + { + "epoch": 1.55, + "grad_norm": 10.291968108781038, + "learning_rate": 2.5467535608239102e-06, + "loss": 0.8313, + "step": 128847 + }, + { + "epoch": 1.55, + "grad_norm": 12.450697260474191, + "learning_rate": 2.546364101042561e-06, + "loss": 1.2694, + "step": 128850 + }, + { + "epoch": 1.55, + "grad_norm": 12.52457845724144, + "learning_rate": 2.5459746666977824e-06, + "loss": 1.4976, + "step": 128853 + }, + { + "epoch": 1.55, + "grad_norm": 4.225909509552604, + "learning_rate": 2.5455852577909077e-06, + "loss": 0.8211, + "step": 128856 + }, + { + "epoch": 1.55, + "grad_norm": 6.410532018700682, + "learning_rate": 2.5451958743232696e-06, + "loss": 1.0442, + "step": 128859 + }, + { + "epoch": 1.55, + "grad_norm": 10.466520186528227, + "learning_rate": 2.544806516296191e-06, + "loss": 1.1781, + "step": 128862 + }, + { + "epoch": 1.55, + "grad_norm": 6.804043997556068, + "learning_rate": 2.544417183711001e-06, + "loss": 1.0699, + "step": 128865 + }, + { + "epoch": 1.55, + "grad_norm": 20.056847115775504, + "learning_rate": 2.5440278765690287e-06, + "loss": 0.9091, + "step": 128868 + }, + { + "epoch": 1.55, + "grad_norm": 8.68132801920043, + "learning_rate": 2.5436385948716034e-06, + "loss": 1.3075, + "step": 128871 + }, + { + "epoch": 1.55, + "grad_norm": 10.354950534648303, + "learning_rate": 2.543249338620054e-06, + "loss": 0.8037, + "step": 128874 + }, + { + "epoch": 1.55, + "grad_norm": 8.129202497317493, + "learning_rate": 2.5428601078157123e-06, + "loss": 1.0654, + "step": 128877 + }, + { + "epoch": 1.55, + "grad_norm": 10.863558365183758, + "learning_rate": 2.5424709024598994e-06, + "loss": 1.0093, + "step": 128880 + }, + { + "epoch": 1.55, + "grad_norm": 8.381256107459626, + "learning_rate": 2.542081722553951e-06, + "loss": 1.2606, + "step": 128883 + }, + { + "epoch": 1.55, + "grad_norm": 10.962709703170491, + "learning_rate": 2.5416925680991866e-06, + "loss": 1.1384, + "step": 128886 + }, + { + "epoch": 1.55, + "grad_norm": 35.10074486930007, + "learning_rate": 2.5413034390969438e-06, + "loss": 1.2537, + "step": 128889 + }, + { + "epoch": 1.55, + "grad_norm": 17.80707542709303, + "learning_rate": 2.540914335548542e-06, + "loss": 1.0345, + "step": 128892 + }, + { + "epoch": 1.55, + "grad_norm": 33.186870106557464, + "learning_rate": 2.540525257455313e-06, + "loss": 1.1663, + "step": 128895 + }, + { + "epoch": 1.55, + "grad_norm": 8.389240052994476, + "learning_rate": 2.540136204818584e-06, + "loss": 1.1702, + "step": 128898 + }, + { + "epoch": 1.55, + "grad_norm": 4.553984471196223, + "learning_rate": 2.5397471776396864e-06, + "loss": 1.0571, + "step": 128901 + }, + { + "epoch": 1.55, + "grad_norm": 13.921917744977508, + "learning_rate": 2.5393581759199436e-06, + "loss": 1.0791, + "step": 128904 + }, + { + "epoch": 1.55, + "grad_norm": 6.419337934196953, + "learning_rate": 2.5389691996606822e-06, + "loss": 1.3468, + "step": 128907 + }, + { + "epoch": 1.55, + "grad_norm": 8.214579453387014, + "learning_rate": 2.5385802488632317e-06, + "loss": 1.0925, + "step": 128910 + }, + { + "epoch": 1.55, + "grad_norm": 19.187442220727725, + "learning_rate": 2.538191323528918e-06, + "loss": 1.2108, + "step": 128913 + }, + { + "epoch": 1.55, + "grad_norm": 9.256948503897286, + "learning_rate": 2.537802423659074e-06, + "loss": 1.0754, + "step": 128916 + }, + { + "epoch": 1.55, + "grad_norm": 9.62148163680287, + "learning_rate": 2.537413549255018e-06, + "loss": 1.145, + "step": 128919 + }, + { + "epoch": 1.55, + "grad_norm": 13.770368363144328, + "learning_rate": 2.5370247003180827e-06, + "loss": 1.3717, + "step": 128922 + }, + { + "epoch": 1.55, + "grad_norm": 11.213476707039781, + "learning_rate": 2.536635876849597e-06, + "loss": 1.1683, + "step": 128925 + }, + { + "epoch": 1.55, + "grad_norm": 7.352826582449091, + "learning_rate": 2.536247078850881e-06, + "loss": 1.0796, + "step": 128928 + }, + { + "epoch": 1.55, + "grad_norm": 12.09857881071814, + "learning_rate": 2.535858306323268e-06, + "loss": 1.5391, + "step": 128931 + }, + { + "epoch": 1.55, + "grad_norm": 7.167831342028109, + "learning_rate": 2.53546955926808e-06, + "loss": 1.3732, + "step": 128934 + }, + { + "epoch": 1.55, + "grad_norm": 4.789286303838732, + "learning_rate": 2.5350808376866455e-06, + "loss": 1.0802, + "step": 128937 + }, + { + "epoch": 1.55, + "grad_norm": 4.450531881870845, + "learning_rate": 2.5346921415802907e-06, + "loss": 1.0923, + "step": 128940 + }, + { + "epoch": 1.55, + "grad_norm": 8.080469445315526, + "learning_rate": 2.5343034709503457e-06, + "loss": 1.0962, + "step": 128943 + }, + { + "epoch": 1.55, + "grad_norm": 10.13076986729655, + "learning_rate": 2.5339148257981315e-06, + "loss": 1.3442, + "step": 128946 + }, + { + "epoch": 1.55, + "grad_norm": 6.139231406500746, + "learning_rate": 2.533526206124979e-06, + "loss": 1.4295, + "step": 128949 + }, + { + "epoch": 1.55, + "grad_norm": 9.435739226556246, + "learning_rate": 2.5331376119322094e-06, + "loss": 1.55, + "step": 128952 + }, + { + "epoch": 1.55, + "grad_norm": 10.739762372662238, + "learning_rate": 2.5327490432211523e-06, + "loss": 1.2351, + "step": 128955 + }, + { + "epoch": 1.55, + "grad_norm": 17.1575315845139, + "learning_rate": 2.532360499993135e-06, + "loss": 0.9973, + "step": 128958 + }, + { + "epoch": 1.55, + "grad_norm": 9.209593558555435, + "learning_rate": 2.531971982249478e-06, + "loss": 1.0024, + "step": 128961 + }, + { + "epoch": 1.55, + "grad_norm": 5.930080727630287, + "learning_rate": 2.5315834899915115e-06, + "loss": 1.0685, + "step": 128964 + }, + { + "epoch": 1.55, + "grad_norm": 7.995925161494477, + "learning_rate": 2.531195023220564e-06, + "loss": 0.8948, + "step": 128967 + }, + { + "epoch": 1.55, + "grad_norm": 15.442593163203213, + "learning_rate": 2.530806581937957e-06, + "loss": 1.0076, + "step": 128970 + }, + { + "epoch": 1.55, + "grad_norm": 8.256813610584828, + "learning_rate": 2.5304181661450133e-06, + "loss": 1.2135, + "step": 128973 + }, + { + "epoch": 1.55, + "grad_norm": 7.5252383961760065, + "learning_rate": 2.5300297758430625e-06, + "loss": 0.9561, + "step": 128976 + }, + { + "epoch": 1.55, + "grad_norm": 7.530767781687492, + "learning_rate": 2.52964141103343e-06, + "loss": 1.1037, + "step": 128979 + }, + { + "epoch": 1.55, + "grad_norm": 16.980643205951562, + "learning_rate": 2.5292530717174392e-06, + "loss": 1.1435, + "step": 128982 + }, + { + "epoch": 1.55, + "grad_norm": 10.751008202410505, + "learning_rate": 2.528864757896421e-06, + "loss": 1.3819, + "step": 128985 + }, + { + "epoch": 1.55, + "grad_norm": 10.918758777142871, + "learning_rate": 2.5284764695716923e-06, + "loss": 1.0874, + "step": 128988 + }, + { + "epoch": 1.55, + "grad_norm": 3.7966379302854656, + "learning_rate": 2.5280882067445853e-06, + "loss": 0.7274, + "step": 128991 + }, + { + "epoch": 1.55, + "grad_norm": 3.372626272335032, + "learning_rate": 2.5276999694164193e-06, + "loss": 1.5233, + "step": 128994 + }, + { + "epoch": 1.55, + "grad_norm": 12.183359489024278, + "learning_rate": 2.527311757588524e-06, + "loss": 1.3492, + "step": 128997 + }, + { + "epoch": 1.55, + "grad_norm": 7.890707102098203, + "learning_rate": 2.5269235712622198e-06, + "loss": 1.2177, + "step": 129000 + }, + { + "epoch": 1.55, + "grad_norm": 10.50372374456005, + "learning_rate": 2.526535410438834e-06, + "loss": 1.2802, + "step": 129003 + }, + { + "epoch": 1.55, + "grad_norm": 9.783431165035596, + "learning_rate": 2.5261472751196903e-06, + "loss": 1.0071, + "step": 129006 + }, + { + "epoch": 1.55, + "grad_norm": 4.57332353153792, + "learning_rate": 2.5257591653061165e-06, + "loss": 1.2012, + "step": 129009 + }, + { + "epoch": 1.55, + "grad_norm": 4.298563304542367, + "learning_rate": 2.5253710809994346e-06, + "loss": 0.8037, + "step": 129012 + }, + { + "epoch": 1.55, + "grad_norm": 7.378955866764824, + "learning_rate": 2.524983022200966e-06, + "loss": 1.2196, + "step": 129015 + }, + { + "epoch": 1.55, + "grad_norm": 12.767890877542857, + "learning_rate": 2.5245949889120393e-06, + "loss": 1.1577, + "step": 129018 + }, + { + "epoch": 1.55, + "grad_norm": 11.844420393367875, + "learning_rate": 2.5242069811339753e-06, + "loss": 1.1941, + "step": 129021 + }, + { + "epoch": 1.55, + "grad_norm": 6.740868678461139, + "learning_rate": 2.5238189988681037e-06, + "loss": 0.9013, + "step": 129024 + }, + { + "epoch": 1.55, + "grad_norm": 7.15774323501869, + "learning_rate": 2.523431042115743e-06, + "loss": 0.8734, + "step": 129027 + }, + { + "epoch": 1.55, + "grad_norm": 6.866850018868926, + "learning_rate": 2.523043110878217e-06, + "loss": 1.2082, + "step": 129030 + }, + { + "epoch": 1.55, + "grad_norm": 11.533948099187297, + "learning_rate": 2.5226552051568566e-06, + "loss": 1.2997, + "step": 129033 + }, + { + "epoch": 1.55, + "grad_norm": 9.439675417152849, + "learning_rate": 2.5222673249529773e-06, + "loss": 1.0293, + "step": 129036 + }, + { + "epoch": 1.55, + "grad_norm": 5.662911867134132, + "learning_rate": 2.5218794702679083e-06, + "loss": 1.2679, + "step": 129039 + }, + { + "epoch": 1.55, + "grad_norm": 13.21771333815598, + "learning_rate": 2.5214916411029687e-06, + "loss": 1.1667, + "step": 129042 + }, + { + "epoch": 1.55, + "grad_norm": 10.298711994964014, + "learning_rate": 2.521103837459484e-06, + "loss": 0.783, + "step": 129045 + }, + { + "epoch": 1.55, + "grad_norm": 12.680097234458861, + "learning_rate": 2.5207160593387782e-06, + "loss": 1.304, + "step": 129048 + }, + { + "epoch": 1.55, + "grad_norm": 17.165895622556636, + "learning_rate": 2.520328306742177e-06, + "loss": 1.3613, + "step": 129051 + }, + { + "epoch": 1.55, + "grad_norm": 14.364886775323841, + "learning_rate": 2.519940579671002e-06, + "loss": 1.4518, + "step": 129054 + }, + { + "epoch": 1.55, + "grad_norm": 33.40977461390498, + "learning_rate": 2.519552878126572e-06, + "loss": 1.2723, + "step": 129057 + }, + { + "epoch": 1.55, + "grad_norm": 6.577707611452509, + "learning_rate": 2.5191652021102144e-06, + "loss": 1.2241, + "step": 129060 + }, + { + "epoch": 1.55, + "grad_norm": 5.411241385162379, + "learning_rate": 2.5187775516232506e-06, + "loss": 0.9437, + "step": 129063 + }, + { + "epoch": 1.55, + "grad_norm": 5.0143000893823695, + "learning_rate": 2.5183899266670062e-06, + "loss": 1.3604, + "step": 129066 + }, + { + "epoch": 1.55, + "grad_norm": 4.451930979502633, + "learning_rate": 2.518002327242801e-06, + "loss": 1.0981, + "step": 129069 + }, + { + "epoch": 1.55, + "grad_norm": 9.200989156205699, + "learning_rate": 2.5176147533519577e-06, + "loss": 1.0026, + "step": 129072 + }, + { + "epoch": 1.55, + "grad_norm": 3.0722434808579036, + "learning_rate": 2.5172272049958035e-06, + "loss": 1.3065, + "step": 129075 + }, + { + "epoch": 1.55, + "grad_norm": 17.160695658688017, + "learning_rate": 2.516839682175658e-06, + "loss": 1.1068, + "step": 129078 + }, + { + "epoch": 1.55, + "grad_norm": 10.78343783572211, + "learning_rate": 2.516452184892839e-06, + "loss": 0.8806, + "step": 129081 + }, + { + "epoch": 1.55, + "grad_norm": 5.171539449111109, + "learning_rate": 2.5160647131486748e-06, + "loss": 1.0231, + "step": 129084 + }, + { + "epoch": 1.55, + "grad_norm": 5.322997743689762, + "learning_rate": 2.515677266944485e-06, + "loss": 0.9002, + "step": 129087 + }, + { + "epoch": 1.55, + "grad_norm": 3.887049311140083, + "learning_rate": 2.5152898462815932e-06, + "loss": 1.1836, + "step": 129090 + }, + { + "epoch": 1.55, + "grad_norm": 16.907384944380986, + "learning_rate": 2.514902451161324e-06, + "loss": 1.1843, + "step": 129093 + }, + { + "epoch": 1.55, + "grad_norm": 17.586810367911344, + "learning_rate": 2.5145150815849963e-06, + "loss": 0.9343, + "step": 129096 + }, + { + "epoch": 1.55, + "grad_norm": 12.793814910876046, + "learning_rate": 2.51412773755393e-06, + "loss": 1.0745, + "step": 129099 + }, + { + "epoch": 1.55, + "grad_norm": 20.886581792164908, + "learning_rate": 2.513740419069449e-06, + "loss": 1.0425, + "step": 129102 + }, + { + "epoch": 1.55, + "grad_norm": 3.0318339755575927, + "learning_rate": 2.5133531261328782e-06, + "loss": 1.3144, + "step": 129105 + }, + { + "epoch": 1.55, + "grad_norm": 3.7521304502914483, + "learning_rate": 2.512965858745534e-06, + "loss": 1.3762, + "step": 129108 + }, + { + "epoch": 1.55, + "grad_norm": 2.576510279261674, + "learning_rate": 2.512578616908741e-06, + "loss": 1.03, + "step": 129111 + }, + { + "epoch": 1.55, + "grad_norm": 4.827415704453126, + "learning_rate": 2.512191400623819e-06, + "loss": 1.3569, + "step": 129114 + }, + { + "epoch": 1.55, + "grad_norm": 3.1363573571764762, + "learning_rate": 2.5118042098920936e-06, + "loss": 1.2732, + "step": 129117 + }, + { + "epoch": 1.55, + "grad_norm": 14.478232914858276, + "learning_rate": 2.5114170447148833e-06, + "loss": 1.005, + "step": 129120 + }, + { + "epoch": 1.55, + "grad_norm": 23.589702670430608, + "learning_rate": 2.511029905093507e-06, + "loss": 1.3733, + "step": 129123 + }, + { + "epoch": 1.55, + "grad_norm": 10.72251056694883, + "learning_rate": 2.5106427910292883e-06, + "loss": 0.9939, + "step": 129126 + }, + { + "epoch": 1.55, + "grad_norm": 12.355651032353787, + "learning_rate": 2.5102557025235476e-06, + "loss": 0.9354, + "step": 129129 + }, + { + "epoch": 1.55, + "grad_norm": 26.427167916824597, + "learning_rate": 2.5098686395776097e-06, + "loss": 1.4996, + "step": 129132 + }, + { + "epoch": 1.55, + "grad_norm": 3.1650308188137486, + "learning_rate": 2.5094816021927883e-06, + "loss": 1.3789, + "step": 129135 + }, + { + "epoch": 1.55, + "grad_norm": 16.867719161784873, + "learning_rate": 2.509094590370412e-06, + "loss": 1.0141, + "step": 129138 + }, + { + "epoch": 1.55, + "grad_norm": 12.72233985798205, + "learning_rate": 2.5087076041117942e-06, + "loss": 1.0315, + "step": 129141 + }, + { + "epoch": 1.55, + "grad_norm": 2.7991580758554226, + "learning_rate": 2.5083206434182596e-06, + "loss": 1.0433, + "step": 129144 + }, + { + "epoch": 1.55, + "grad_norm": 4.480417729727513, + "learning_rate": 2.5079337082911303e-06, + "loss": 0.9253, + "step": 129147 + }, + { + "epoch": 1.55, + "grad_norm": 19.48592128610372, + "learning_rate": 2.5075467987317215e-06, + "loss": 1.1253, + "step": 129150 + }, + { + "epoch": 1.55, + "grad_norm": 6.204405875327747, + "learning_rate": 2.507159914741357e-06, + "loss": 1.3001, + "step": 129153 + }, + { + "epoch": 1.55, + "grad_norm": 4.5677591511837905, + "learning_rate": 2.506773056321358e-06, + "loss": 1.1633, + "step": 129156 + }, + { + "epoch": 1.55, + "grad_norm": 8.009897000760226, + "learning_rate": 2.506386223473045e-06, + "loss": 1.0743, + "step": 129159 + }, + { + "epoch": 1.55, + "grad_norm": 3.44644212075601, + "learning_rate": 2.5059994161977375e-06, + "loss": 1.1769, + "step": 129162 + }, + { + "epoch": 1.55, + "grad_norm": 16.83933641452633, + "learning_rate": 2.5056126344967514e-06, + "loss": 0.8657, + "step": 129165 + }, + { + "epoch": 1.55, + "grad_norm": 21.897251880839207, + "learning_rate": 2.50522587837141e-06, + "loss": 1.217, + "step": 129168 + }, + { + "epoch": 1.55, + "grad_norm": 2.1998273587641757, + "learning_rate": 2.5048391478230326e-06, + "loss": 1.1861, + "step": 129171 + }, + { + "epoch": 1.55, + "grad_norm": 4.744788948899629, + "learning_rate": 2.5044524428529427e-06, + "loss": 1.1768, + "step": 129174 + }, + { + "epoch": 1.55, + "grad_norm": 7.036046236235774, + "learning_rate": 2.5040657634624545e-06, + "loss": 1.4392, + "step": 129177 + }, + { + "epoch": 1.55, + "grad_norm": 5.754904488914642, + "learning_rate": 2.503679109652891e-06, + "loss": 1.1594, + "step": 129180 + }, + { + "epoch": 1.55, + "grad_norm": 12.814345901382547, + "learning_rate": 2.50329248142557e-06, + "loss": 1.1974, + "step": 129183 + }, + { + "epoch": 1.55, + "grad_norm": 9.30644261401296, + "learning_rate": 2.502905878781813e-06, + "loss": 1.0107, + "step": 129186 + }, + { + "epoch": 1.55, + "grad_norm": 3.1897435729443906, + "learning_rate": 2.5025193017229344e-06, + "loss": 1.1234, + "step": 129189 + }, + { + "epoch": 1.55, + "grad_norm": 7.53407881868512, + "learning_rate": 2.5021327502502577e-06, + "loss": 1.4862, + "step": 129192 + }, + { + "epoch": 1.55, + "grad_norm": 11.039981411225233, + "learning_rate": 2.5017462243651e-06, + "loss": 1.2786, + "step": 129195 + }, + { + "epoch": 1.55, + "grad_norm": 4.533067879648548, + "learning_rate": 2.5013597240687828e-06, + "loss": 1.1041, + "step": 129198 + }, + { + "epoch": 1.55, + "grad_norm": 7.557609392289348, + "learning_rate": 2.500973249362627e-06, + "loss": 1.241, + "step": 129201 + }, + { + "epoch": 1.55, + "grad_norm": 9.346290648909147, + "learning_rate": 2.500586800247947e-06, + "loss": 0.8717, + "step": 129204 + }, + { + "epoch": 1.55, + "grad_norm": 4.249571445760628, + "learning_rate": 2.5002003767260617e-06, + "loss": 0.579, + "step": 129207 + }, + { + "epoch": 1.55, + "grad_norm": 4.627966508373041, + "learning_rate": 2.49981397879829e-06, + "loss": 1.1169, + "step": 129210 + }, + { + "epoch": 1.55, + "grad_norm": 8.982279391675457, + "learning_rate": 2.499427606465955e-06, + "loss": 1.3514, + "step": 129213 + }, + { + "epoch": 1.55, + "grad_norm": 6.918831938612347, + "learning_rate": 2.499041259730368e-06, + "loss": 1.2076, + "step": 129216 + }, + { + "epoch": 1.55, + "grad_norm": 12.903112483195468, + "learning_rate": 2.498654938592853e-06, + "loss": 1.3119, + "step": 129219 + }, + { + "epoch": 1.55, + "grad_norm": 12.689043014336429, + "learning_rate": 2.4982686430547286e-06, + "loss": 0.906, + "step": 129222 + }, + { + "epoch": 1.55, + "grad_norm": 16.383806247917576, + "learning_rate": 2.4978823731173087e-06, + "loss": 1.0907, + "step": 129225 + }, + { + "epoch": 1.55, + "grad_norm": 14.559855386551375, + "learning_rate": 2.4974961287819166e-06, + "loss": 1.0414, + "step": 129228 + }, + { + "epoch": 1.55, + "grad_norm": 9.981164974571703, + "learning_rate": 2.497109910049865e-06, + "loss": 0.9498, + "step": 129231 + }, + { + "epoch": 1.55, + "grad_norm": 3.5469488699151355, + "learning_rate": 2.4967237169224746e-06, + "loss": 0.9372, + "step": 129234 + }, + { + "epoch": 1.55, + "grad_norm": 12.087736024062092, + "learning_rate": 2.496337549401063e-06, + "loss": 0.695, + "step": 129237 + }, + { + "epoch": 1.55, + "grad_norm": 7.928458590416447, + "learning_rate": 2.495951407486953e-06, + "loss": 1.2881, + "step": 129240 + }, + { + "epoch": 1.55, + "grad_norm": 3.763317037611085, + "learning_rate": 2.4955652911814542e-06, + "loss": 1.4665, + "step": 129243 + }, + { + "epoch": 1.55, + "grad_norm": 5.7151676126219275, + "learning_rate": 2.495179200485891e-06, + "loss": 1.2442, + "step": 129246 + }, + { + "epoch": 1.55, + "grad_norm": 14.83190475408385, + "learning_rate": 2.494793135401574e-06, + "loss": 1.1151, + "step": 129249 + }, + { + "epoch": 1.55, + "grad_norm": 9.778603581895995, + "learning_rate": 2.494407095929826e-06, + "loss": 1.3364, + "step": 129252 + }, + { + "epoch": 1.55, + "grad_norm": 6.295158957791813, + "learning_rate": 2.494021082071966e-06, + "loss": 1.2951, + "step": 129255 + }, + { + "epoch": 1.55, + "grad_norm": 2.9254339516681394, + "learning_rate": 2.493635093829304e-06, + "loss": 1.1792, + "step": 129258 + }, + { + "epoch": 1.55, + "grad_norm": 4.064607744540101, + "learning_rate": 2.4932491312031627e-06, + "loss": 0.879, + "step": 129261 + }, + { + "epoch": 1.55, + "grad_norm": 20.908838207604536, + "learning_rate": 2.492863194194861e-06, + "loss": 1.1137, + "step": 129264 + }, + { + "epoch": 1.55, + "grad_norm": 8.71460849807514, + "learning_rate": 2.4924772828057107e-06, + "loss": 1.3594, + "step": 129267 + }, + { + "epoch": 1.55, + "grad_norm": 5.628571837044766, + "learning_rate": 2.4920913970370342e-06, + "loss": 0.9375, + "step": 129270 + }, + { + "epoch": 1.55, + "grad_norm": 33.270992959754565, + "learning_rate": 2.491705536890141e-06, + "loss": 1.1783, + "step": 129273 + }, + { + "epoch": 1.55, + "grad_norm": 10.369258071325717, + "learning_rate": 2.491319702366354e-06, + "loss": 1.0347, + "step": 129276 + }, + { + "epoch": 1.55, + "grad_norm": 9.322148025951492, + "learning_rate": 2.490933893466988e-06, + "loss": 1.1063, + "step": 129279 + }, + { + "epoch": 1.55, + "grad_norm": 11.579430473350357, + "learning_rate": 2.4905481101933615e-06, + "loss": 0.9353, + "step": 129282 + }, + { + "epoch": 1.55, + "grad_norm": 5.388123532508351, + "learning_rate": 2.490162352546788e-06, + "loss": 0.9943, + "step": 129285 + }, + { + "epoch": 1.55, + "grad_norm": 13.392989786474999, + "learning_rate": 2.4897766205285877e-06, + "loss": 1.3661, + "step": 129288 + }, + { + "epoch": 1.55, + "grad_norm": 5.097321194165783, + "learning_rate": 2.489390914140072e-06, + "loss": 0.6843, + "step": 129291 + }, + { + "epoch": 1.55, + "grad_norm": 15.793912329589148, + "learning_rate": 2.489005233382562e-06, + "loss": 1.07, + "step": 129294 + }, + { + "epoch": 1.55, + "grad_norm": 41.010243700262, + "learning_rate": 2.4886195782573687e-06, + "loss": 1.5232, + "step": 129297 + }, + { + "epoch": 1.55, + "grad_norm": 2.625342400361915, + "learning_rate": 2.4882339487658114e-06, + "loss": 1.1449, + "step": 129300 + }, + { + "epoch": 1.55, + "grad_norm": 10.083436344909192, + "learning_rate": 2.487848344909206e-06, + "loss": 1.0255, + "step": 129303 + }, + { + "epoch": 1.55, + "grad_norm": 4.241470273990786, + "learning_rate": 2.4874627666888696e-06, + "loss": 1.0049, + "step": 129306 + }, + { + "epoch": 1.55, + "grad_norm": 6.527980231293348, + "learning_rate": 2.4870772141061184e-06, + "loss": 0.8841, + "step": 129309 + }, + { + "epoch": 1.55, + "grad_norm": 6.022412513940675, + "learning_rate": 2.486691687162266e-06, + "loss": 1.1611, + "step": 129312 + }, + { + "epoch": 1.55, + "grad_norm": 3.1738154406213512, + "learning_rate": 2.486306185858627e-06, + "loss": 0.9416, + "step": 129315 + }, + { + "epoch": 1.56, + "grad_norm": 14.203254708617283, + "learning_rate": 2.4859207101965177e-06, + "loss": 1.2157, + "step": 129318 + }, + { + "epoch": 1.56, + "grad_norm": 15.47300312402375, + "learning_rate": 2.4855352601772575e-06, + "loss": 1.2897, + "step": 129321 + }, + { + "epoch": 1.56, + "grad_norm": 9.10319308287255, + "learning_rate": 2.4851498358021565e-06, + "loss": 0.9498, + "step": 129324 + }, + { + "epoch": 1.56, + "grad_norm": 12.425002590648113, + "learning_rate": 2.484764437072531e-06, + "loss": 1.0614, + "step": 129327 + }, + { + "epoch": 1.56, + "grad_norm": 6.479794532843309, + "learning_rate": 2.4843790639897015e-06, + "loss": 1.112, + "step": 129330 + }, + { + "epoch": 1.56, + "grad_norm": 4.8322819888698385, + "learning_rate": 2.4839937165549768e-06, + "loss": 1.5586, + "step": 129333 + }, + { + "epoch": 1.56, + "grad_norm": 6.737951904561765, + "learning_rate": 2.4836083947696755e-06, + "loss": 1.0322, + "step": 129336 + }, + { + "epoch": 1.56, + "grad_norm": 4.881964472002189, + "learning_rate": 2.483223098635109e-06, + "loss": 1.1159, + "step": 129339 + }, + { + "epoch": 1.56, + "grad_norm": 7.369288445654801, + "learning_rate": 2.4828378281525957e-06, + "loss": 1.3661, + "step": 129342 + }, + { + "epoch": 1.56, + "grad_norm": 5.922535758525078, + "learning_rate": 2.482452583323448e-06, + "loss": 0.9102, + "step": 129345 + }, + { + "epoch": 1.56, + "grad_norm": 3.272279190936055, + "learning_rate": 2.4820673641489843e-06, + "loss": 1.2119, + "step": 129348 + }, + { + "epoch": 1.56, + "grad_norm": 9.89210674412509, + "learning_rate": 2.481682170630515e-06, + "loss": 1.2417, + "step": 129351 + }, + { + "epoch": 1.56, + "grad_norm": 40.85383243664704, + "learning_rate": 2.4812970027693585e-06, + "loss": 1.3555, + "step": 129354 + }, + { + "epoch": 1.56, + "grad_norm": 90.1184197496623, + "learning_rate": 2.4809118605668248e-06, + "loss": 1.1738, + "step": 129357 + }, + { + "epoch": 1.56, + "grad_norm": 9.604867670013086, + "learning_rate": 2.4805267440242296e-06, + "loss": 1.1415, + "step": 129360 + }, + { + "epoch": 1.56, + "grad_norm": 3.083805925501848, + "learning_rate": 2.4801416531428923e-06, + "loss": 1.2347, + "step": 129363 + }, + { + "epoch": 1.56, + "grad_norm": 10.760747628828616, + "learning_rate": 2.4797565879241194e-06, + "loss": 1.059, + "step": 129366 + }, + { + "epoch": 1.56, + "grad_norm": 8.114338708867688, + "learning_rate": 2.479371548369228e-06, + "loss": 1.3218, + "step": 129369 + }, + { + "epoch": 1.56, + "grad_norm": 37.31040732788575, + "learning_rate": 2.4789865344795362e-06, + "loss": 1.2257, + "step": 129372 + }, + { + "epoch": 1.56, + "grad_norm": 18.5615940569564, + "learning_rate": 2.478601546256355e-06, + "loss": 1.2752, + "step": 129375 + }, + { + "epoch": 1.56, + "grad_norm": 13.001088255226906, + "learning_rate": 2.478216583700993e-06, + "loss": 1.167, + "step": 129378 + }, + { + "epoch": 1.56, + "grad_norm": 3.4449863843135535, + "learning_rate": 2.477831646814769e-06, + "loss": 1.0218, + "step": 129381 + }, + { + "epoch": 1.56, + "grad_norm": 5.087576140525629, + "learning_rate": 2.4774467355989963e-06, + "loss": 0.988, + "step": 129384 + }, + { + "epoch": 1.56, + "grad_norm": 6.5450158189509855, + "learning_rate": 2.477061850054988e-06, + "loss": 1.1992, + "step": 129387 + }, + { + "epoch": 1.56, + "grad_norm": 5.775221881103291, + "learning_rate": 2.4766769901840616e-06, + "loss": 1.0286, + "step": 129390 + }, + { + "epoch": 1.56, + "grad_norm": 18.601411344633128, + "learning_rate": 2.476292155987523e-06, + "loss": 0.9046, + "step": 129393 + }, + { + "epoch": 1.56, + "grad_norm": 4.895501675874739, + "learning_rate": 2.4759073474666927e-06, + "loss": 1.4178, + "step": 129396 + }, + { + "epoch": 1.56, + "grad_norm": 9.768009519709981, + "learning_rate": 2.4755225646228775e-06, + "loss": 1.1313, + "step": 129399 + }, + { + "epoch": 1.56, + "grad_norm": 5.732876817168943, + "learning_rate": 2.4751378074573964e-06, + "loss": 0.8476, + "step": 129402 + }, + { + "epoch": 1.56, + "grad_norm": 13.439810060782426, + "learning_rate": 2.4747530759715567e-06, + "loss": 1.1995, + "step": 129405 + }, + { + "epoch": 1.56, + "grad_norm": 15.353481596398812, + "learning_rate": 2.474368370166673e-06, + "loss": 0.992, + "step": 129408 + }, + { + "epoch": 1.56, + "grad_norm": 6.981447436127376, + "learning_rate": 2.4739836900440604e-06, + "loss": 0.8957, + "step": 129411 + }, + { + "epoch": 1.56, + "grad_norm": 11.890071061951824, + "learning_rate": 2.4735990356050334e-06, + "loss": 1.1997, + "step": 129414 + }, + { + "epoch": 1.56, + "grad_norm": 8.711829188417962, + "learning_rate": 2.473214406850901e-06, + "loss": 1.7038, + "step": 129417 + }, + { + "epoch": 1.56, + "grad_norm": 10.093602344535455, + "learning_rate": 2.4728298037829757e-06, + "loss": 1.1601, + "step": 129420 + }, + { + "epoch": 1.56, + "grad_norm": 13.647035274017115, + "learning_rate": 2.4724452264025687e-06, + "loss": 1.1327, + "step": 129423 + }, + { + "epoch": 1.56, + "grad_norm": 59.420463580061124, + "learning_rate": 2.4720606747109964e-06, + "loss": 1.0431, + "step": 129426 + }, + { + "epoch": 1.56, + "grad_norm": 4.806894738402482, + "learning_rate": 2.471676148709572e-06, + "loss": 1.0255, + "step": 129429 + }, + { + "epoch": 1.56, + "grad_norm": 7.013097734187805, + "learning_rate": 2.471291648399602e-06, + "loss": 0.7628, + "step": 129432 + }, + { + "epoch": 1.56, + "grad_norm": 7.499676669544947, + "learning_rate": 2.470907173782402e-06, + "loss": 0.9725, + "step": 129435 + }, + { + "epoch": 1.56, + "grad_norm": 6.527490689753478, + "learning_rate": 2.4705227248592867e-06, + "loss": 1.0293, + "step": 129438 + }, + { + "epoch": 1.56, + "grad_norm": 8.61317603706474, + "learning_rate": 2.470138301631563e-06, + "loss": 1.1483, + "step": 129441 + }, + { + "epoch": 1.56, + "grad_norm": 9.628376020756901, + "learning_rate": 2.4697539041005468e-06, + "loss": 1.1097, + "step": 129444 + }, + { + "epoch": 1.56, + "grad_norm": 13.55836258409125, + "learning_rate": 2.469369532267546e-06, + "loss": 1.3203, + "step": 129447 + }, + { + "epoch": 1.56, + "grad_norm": 4.335378299092915, + "learning_rate": 2.4689851861338742e-06, + "loss": 0.9781, + "step": 129450 + }, + { + "epoch": 1.56, + "grad_norm": 6.5084542058747505, + "learning_rate": 2.468600865700843e-06, + "loss": 0.7193, + "step": 129453 + }, + { + "epoch": 1.56, + "grad_norm": 17.417599203717955, + "learning_rate": 2.468216570969768e-06, + "loss": 1.0969, + "step": 129456 + }, + { + "epoch": 1.56, + "grad_norm": 2.8732781853109373, + "learning_rate": 2.4678323019419558e-06, + "loss": 0.8695, + "step": 129459 + }, + { + "epoch": 1.56, + "grad_norm": 5.126289296687839, + "learning_rate": 2.4674480586187164e-06, + "loss": 1.0761, + "step": 129462 + }, + { + "epoch": 1.56, + "grad_norm": 4.980266434112935, + "learning_rate": 2.467063841001364e-06, + "loss": 0.9142, + "step": 129465 + }, + { + "epoch": 1.56, + "grad_norm": 13.240324393885503, + "learning_rate": 2.466679649091209e-06, + "loss": 1.128, + "step": 129468 + }, + { + "epoch": 1.56, + "grad_norm": 8.570799332282752, + "learning_rate": 2.4662954828895658e-06, + "loss": 1.0413, + "step": 129471 + }, + { + "epoch": 1.56, + "grad_norm": 8.29272720943238, + "learning_rate": 2.46591134239774e-06, + "loss": 1.0171, + "step": 129474 + }, + { + "epoch": 1.56, + "grad_norm": 7.8161311737274595, + "learning_rate": 2.4655272276170437e-06, + "loss": 1.2355, + "step": 129477 + }, + { + "epoch": 1.56, + "grad_norm": 6.759409083261152, + "learning_rate": 2.4651431385487934e-06, + "loss": 1.3093, + "step": 129480 + }, + { + "epoch": 1.56, + "grad_norm": 14.017807721818405, + "learning_rate": 2.464759075194295e-06, + "loss": 0.8417, + "step": 129483 + }, + { + "epoch": 1.56, + "grad_norm": 5.220272385826285, + "learning_rate": 2.464375037554857e-06, + "loss": 1.5338, + "step": 129486 + }, + { + "epoch": 1.56, + "grad_norm": 3.7904988516322575, + "learning_rate": 2.463991025631792e-06, + "loss": 1.4013, + "step": 129489 + }, + { + "epoch": 1.56, + "grad_norm": 11.504315776886822, + "learning_rate": 2.4636070394264123e-06, + "loss": 0.9092, + "step": 129492 + }, + { + "epoch": 1.56, + "grad_norm": 6.213412733680572, + "learning_rate": 2.463223078940027e-06, + "loss": 1.0714, + "step": 129495 + }, + { + "epoch": 1.56, + "grad_norm": 12.869622873026614, + "learning_rate": 2.4628391441739484e-06, + "loss": 1.1038, + "step": 129498 + }, + { + "epoch": 1.56, + "grad_norm": 17.111898563989577, + "learning_rate": 2.462455235129485e-06, + "loss": 1.1485, + "step": 129501 + }, + { + "epoch": 1.56, + "grad_norm": 10.249465340445909, + "learning_rate": 2.4620713518079454e-06, + "loss": 1.0884, + "step": 129504 + }, + { + "epoch": 1.56, + "grad_norm": 9.614412134886027, + "learning_rate": 2.4616874942106395e-06, + "loss": 1.0701, + "step": 129507 + }, + { + "epoch": 1.56, + "grad_norm": 8.600366100256052, + "learning_rate": 2.461303662338883e-06, + "loss": 1.3502, + "step": 129510 + }, + { + "epoch": 1.56, + "grad_norm": 11.808131266766427, + "learning_rate": 2.4609198561939786e-06, + "loss": 1.2672, + "step": 129513 + }, + { + "epoch": 1.56, + "grad_norm": 5.397085947561094, + "learning_rate": 2.460536075777239e-06, + "loss": 1.0096, + "step": 129516 + }, + { + "epoch": 1.56, + "grad_norm": 11.455701814982799, + "learning_rate": 2.4601523210899734e-06, + "loss": 1.4401, + "step": 129519 + }, + { + "epoch": 1.56, + "grad_norm": 2.6423412863115106, + "learning_rate": 2.459768592133496e-06, + "loss": 1.3479, + "step": 129522 + }, + { + "epoch": 1.56, + "grad_norm": 18.33575999822377, + "learning_rate": 2.459384888909112e-06, + "loss": 1.3593, + "step": 129525 + }, + { + "epoch": 1.56, + "grad_norm": 21.83214926280636, + "learning_rate": 2.4590012114181284e-06, + "loss": 1.1633, + "step": 129528 + }, + { + "epoch": 1.56, + "grad_norm": 13.465995839163627, + "learning_rate": 2.4586175596618566e-06, + "loss": 1.0729, + "step": 129531 + }, + { + "epoch": 1.56, + "grad_norm": 5.939977319934769, + "learning_rate": 2.458233933641608e-06, + "loss": 1.4787, + "step": 129534 + }, + { + "epoch": 1.56, + "grad_norm": 4.302415194170119, + "learning_rate": 2.457850333358692e-06, + "loss": 1.0678, + "step": 129537 + }, + { + "epoch": 1.56, + "grad_norm": 2.1588991381428975, + "learning_rate": 2.457466758814415e-06, + "loss": 1.3875, + "step": 129540 + }, + { + "epoch": 1.56, + "grad_norm": 3.385101206387035, + "learning_rate": 2.457083210010086e-06, + "loss": 1.2275, + "step": 129543 + }, + { + "epoch": 1.56, + "grad_norm": 13.434985930816248, + "learning_rate": 2.4566996869470185e-06, + "loss": 1.5539, + "step": 129546 + }, + { + "epoch": 1.56, + "grad_norm": 15.604430771328975, + "learning_rate": 2.456316189626514e-06, + "loss": 1.4373, + "step": 129549 + }, + { + "epoch": 1.56, + "grad_norm": 8.562558856676878, + "learning_rate": 2.4559327180498893e-06, + "loss": 1.0781, + "step": 129552 + }, + { + "epoch": 1.56, + "grad_norm": 3.829819701086757, + "learning_rate": 2.4555492722184448e-06, + "loss": 1.1861, + "step": 129555 + }, + { + "epoch": 1.56, + "grad_norm": 7.805842807621926, + "learning_rate": 2.4551658521334933e-06, + "loss": 0.7834, + "step": 129558 + }, + { + "epoch": 1.56, + "grad_norm": 21.84716710858813, + "learning_rate": 2.4547824577963444e-06, + "loss": 1.576, + "step": 129561 + }, + { + "epoch": 1.56, + "grad_norm": 16.666666393603116, + "learning_rate": 2.454399089208307e-06, + "loss": 1.0134, + "step": 129564 + }, + { + "epoch": 1.56, + "grad_norm": 12.996969434731126, + "learning_rate": 2.454015746370687e-06, + "loss": 1.4396, + "step": 129567 + }, + { + "epoch": 1.56, + "grad_norm": 4.269781519819733, + "learning_rate": 2.4536324292847914e-06, + "loss": 1.0653, + "step": 129570 + }, + { + "epoch": 1.56, + "grad_norm": 7.165617691580581, + "learning_rate": 2.45324913795193e-06, + "loss": 0.9589, + "step": 129573 + }, + { + "epoch": 1.56, + "grad_norm": 6.756598736399541, + "learning_rate": 2.45286587237341e-06, + "loss": 1.0218, + "step": 129576 + }, + { + "epoch": 1.56, + "grad_norm": 5.031040513251573, + "learning_rate": 2.4524826325505446e-06, + "loss": 0.9258, + "step": 129579 + }, + { + "epoch": 1.56, + "grad_norm": 7.966482294750966, + "learning_rate": 2.4520994184846337e-06, + "loss": 1.0506, + "step": 129582 + }, + { + "epoch": 1.56, + "grad_norm": 3.326884187704288, + "learning_rate": 2.451716230176989e-06, + "loss": 1.3037, + "step": 129585 + }, + { + "epoch": 1.56, + "grad_norm": 23.49492902045753, + "learning_rate": 2.4513330676289206e-06, + "loss": 1.3641, + "step": 129588 + }, + { + "epoch": 1.56, + "grad_norm": 8.854464453839215, + "learning_rate": 2.450949930841733e-06, + "loss": 1.5019, + "step": 129591 + }, + { + "epoch": 1.56, + "grad_norm": 10.854124936525286, + "learning_rate": 2.450566819816732e-06, + "loss": 1.2446, + "step": 129594 + }, + { + "epoch": 1.56, + "grad_norm": 6.5142080981523005, + "learning_rate": 2.450183734555227e-06, + "loss": 1.1097, + "step": 129597 + }, + { + "epoch": 1.56, + "grad_norm": 8.292414850124016, + "learning_rate": 2.4498006750585256e-06, + "loss": 0.7273, + "step": 129600 + }, + { + "epoch": 1.56, + "grad_norm": 13.558229587944759, + "learning_rate": 2.449417641327935e-06, + "loss": 1.1117, + "step": 129603 + }, + { + "epoch": 1.56, + "grad_norm": 3.7878867186258547, + "learning_rate": 2.449034633364764e-06, + "loss": 0.961, + "step": 129606 + }, + { + "epoch": 1.56, + "grad_norm": 11.475190064740117, + "learning_rate": 2.4486516511703197e-06, + "loss": 1.1902, + "step": 129609 + }, + { + "epoch": 1.56, + "grad_norm": 6.104932132390533, + "learning_rate": 2.4482686947459032e-06, + "loss": 1.0748, + "step": 129612 + }, + { + "epoch": 1.56, + "grad_norm": 28.329265508869348, + "learning_rate": 2.4478857640928254e-06, + "loss": 1.2007, + "step": 129615 + }, + { + "epoch": 1.56, + "grad_norm": 10.0270209556953, + "learning_rate": 2.447502859212397e-06, + "loss": 1.1891, + "step": 129618 + }, + { + "epoch": 1.56, + "grad_norm": 9.324410573743675, + "learning_rate": 2.4471199801059175e-06, + "loss": 1.1714, + "step": 129621 + }, + { + "epoch": 1.56, + "grad_norm": 6.5658303549330475, + "learning_rate": 2.4467371267746976e-06, + "loss": 1.1653, + "step": 129624 + }, + { + "epoch": 1.56, + "grad_norm": 3.2522758840843577, + "learning_rate": 2.446354299220043e-06, + "loss": 1.1656, + "step": 129627 + }, + { + "epoch": 1.56, + "grad_norm": 2.950878788755361, + "learning_rate": 2.4459714974432623e-06, + "loss": 1.0212, + "step": 129630 + }, + { + "epoch": 1.56, + "grad_norm": 30.90043399113967, + "learning_rate": 2.445588721445661e-06, + "loss": 0.9286, + "step": 129633 + }, + { + "epoch": 1.56, + "grad_norm": 14.01865861411029, + "learning_rate": 2.4452059712285414e-06, + "loss": 1.0974, + "step": 129636 + }, + { + "epoch": 1.56, + "grad_norm": 6.467765489954133, + "learning_rate": 2.444823246793212e-06, + "loss": 1.0345, + "step": 129639 + }, + { + "epoch": 1.56, + "grad_norm": 8.544201933182666, + "learning_rate": 2.4444405481409793e-06, + "loss": 1.0278, + "step": 129642 + }, + { + "epoch": 1.56, + "grad_norm": 3.0731250927221807, + "learning_rate": 2.4440578752731527e-06, + "loss": 0.9829, + "step": 129645 + }, + { + "epoch": 1.56, + "grad_norm": 4.527884615584758, + "learning_rate": 2.4436752281910325e-06, + "loss": 1.123, + "step": 129648 + }, + { + "epoch": 1.56, + "grad_norm": 13.065409663868577, + "learning_rate": 2.4432926068959297e-06, + "loss": 1.0278, + "step": 129651 + }, + { + "epoch": 1.56, + "grad_norm": 5.399155651429399, + "learning_rate": 2.4429100113891447e-06, + "loss": 1.0797, + "step": 129654 + }, + { + "epoch": 1.56, + "grad_norm": 5.260946550807522, + "learning_rate": 2.4425274416719845e-06, + "loss": 1.1004, + "step": 129657 + }, + { + "epoch": 1.56, + "grad_norm": 10.758880126646735, + "learning_rate": 2.4421448977457596e-06, + "loss": 0.9919, + "step": 129660 + }, + { + "epoch": 1.56, + "grad_norm": 8.5771218028388, + "learning_rate": 2.4417623796117696e-06, + "loss": 1.2479, + "step": 129663 + }, + { + "epoch": 1.56, + "grad_norm": 7.720854511624412, + "learning_rate": 2.441379887271321e-06, + "loss": 1.0254, + "step": 129666 + }, + { + "epoch": 1.56, + "grad_norm": 10.516580679712845, + "learning_rate": 2.4409974207257213e-06, + "loss": 1.1339, + "step": 129669 + }, + { + "epoch": 1.56, + "grad_norm": 9.693425296705442, + "learning_rate": 2.440614979976277e-06, + "loss": 1.01, + "step": 129672 + }, + { + "epoch": 1.56, + "grad_norm": 9.069442476419113, + "learning_rate": 2.440232565024291e-06, + "loss": 0.7002, + "step": 129675 + }, + { + "epoch": 1.56, + "grad_norm": 8.90543621058528, + "learning_rate": 2.4398501758710657e-06, + "loss": 1.1654, + "step": 129678 + }, + { + "epoch": 1.56, + "grad_norm": 3.620761569649641, + "learning_rate": 2.4394678125179084e-06, + "loss": 1.0923, + "step": 129681 + }, + { + "epoch": 1.56, + "grad_norm": 5.01305008761121, + "learning_rate": 2.439085474966124e-06, + "loss": 0.5843, + "step": 129684 + }, + { + "epoch": 1.56, + "grad_norm": 8.067805503090426, + "learning_rate": 2.4387031632170198e-06, + "loss": 1.3481, + "step": 129687 + }, + { + "epoch": 1.56, + "grad_norm": 10.164082835720682, + "learning_rate": 2.4383208772718957e-06, + "loss": 1.2321, + "step": 129690 + }, + { + "epoch": 1.56, + "grad_norm": 6.465905716409935, + "learning_rate": 2.437938617132062e-06, + "loss": 0.9713, + "step": 129693 + }, + { + "epoch": 1.56, + "grad_norm": 60.28179294444347, + "learning_rate": 2.437556382798817e-06, + "loss": 0.8514, + "step": 129696 + }, + { + "epoch": 1.56, + "grad_norm": 2.6972380656563604, + "learning_rate": 2.43717417427347e-06, + "loss": 1.289, + "step": 129699 + }, + { + "epoch": 1.56, + "grad_norm": 28.347371688908165, + "learning_rate": 2.4367919915573214e-06, + "loss": 1.4607, + "step": 129702 + }, + { + "epoch": 1.56, + "grad_norm": 10.520556773162273, + "learning_rate": 2.4364098346516775e-06, + "loss": 1.0096, + "step": 129705 + }, + { + "epoch": 1.56, + "grad_norm": 4.469458329392578, + "learning_rate": 2.4360277035578426e-06, + "loss": 0.846, + "step": 129708 + }, + { + "epoch": 1.56, + "grad_norm": 15.563099103667971, + "learning_rate": 2.43564559827712e-06, + "loss": 1.6995, + "step": 129711 + }, + { + "epoch": 1.56, + "grad_norm": 3.406801232743299, + "learning_rate": 2.4352635188108174e-06, + "loss": 0.8711, + "step": 129714 + }, + { + "epoch": 1.56, + "grad_norm": 8.78226335188386, + "learning_rate": 2.434881465160236e-06, + "loss": 1.2406, + "step": 129717 + }, + { + "epoch": 1.56, + "grad_norm": 7.415310914899701, + "learning_rate": 2.434499437326676e-06, + "loss": 1.098, + "step": 129720 + }, + { + "epoch": 1.56, + "grad_norm": 12.842733829682878, + "learning_rate": 2.434117435311445e-06, + "loss": 1.3138, + "step": 129723 + }, + { + "epoch": 1.56, + "grad_norm": 6.538153164248715, + "learning_rate": 2.433735459115848e-06, + "loss": 1.0388, + "step": 129726 + }, + { + "epoch": 1.56, + "grad_norm": 8.603214767616594, + "learning_rate": 2.433353508741184e-06, + "loss": 1.178, + "step": 129729 + }, + { + "epoch": 1.56, + "grad_norm": 189.65500541847177, + "learning_rate": 2.432971584188758e-06, + "loss": 1.2703, + "step": 129732 + }, + { + "epoch": 1.56, + "grad_norm": 7.129869372963055, + "learning_rate": 2.4325896854598783e-06, + "loss": 1.1524, + "step": 129735 + }, + { + "epoch": 1.56, + "grad_norm": 8.681212039875629, + "learning_rate": 2.4322078125558414e-06, + "loss": 1.3442, + "step": 129738 + }, + { + "epoch": 1.56, + "grad_norm": 4.170017029505835, + "learning_rate": 2.4318259654779564e-06, + "loss": 1.1141, + "step": 129741 + }, + { + "epoch": 1.56, + "grad_norm": 6.548709772621812, + "learning_rate": 2.4314441442275196e-06, + "loss": 1.2437, + "step": 129744 + }, + { + "epoch": 1.56, + "grad_norm": 5.798222883682795, + "learning_rate": 2.431062348805838e-06, + "loss": 1.2884, + "step": 129747 + }, + { + "epoch": 1.56, + "grad_norm": 9.360153359835346, + "learning_rate": 2.4306805792142153e-06, + "loss": 1.0389, + "step": 129750 + }, + { + "epoch": 1.56, + "grad_norm": 4.347320775745135, + "learning_rate": 2.4302988354539546e-06, + "loss": 1.107, + "step": 129753 + }, + { + "epoch": 1.56, + "grad_norm": 4.890865371899369, + "learning_rate": 2.429917117526356e-06, + "loss": 1.0952, + "step": 129756 + }, + { + "epoch": 1.56, + "grad_norm": 13.762395711865677, + "learning_rate": 2.4295354254327253e-06, + "loss": 1.5875, + "step": 129759 + }, + { + "epoch": 1.56, + "grad_norm": 8.90124498533367, + "learning_rate": 2.429153759174361e-06, + "loss": 0.9011, + "step": 129762 + }, + { + "epoch": 1.56, + "grad_norm": 13.730061461418536, + "learning_rate": 2.428772118752567e-06, + "loss": 1.145, + "step": 129765 + }, + { + "epoch": 1.56, + "grad_norm": 11.425802969966874, + "learning_rate": 2.428390504168651e-06, + "loss": 1.4439, + "step": 129768 + }, + { + "epoch": 1.56, + "grad_norm": 12.10363326857238, + "learning_rate": 2.428008915423907e-06, + "loss": 1.2749, + "step": 129771 + }, + { + "epoch": 1.56, + "grad_norm": 11.376647115795432, + "learning_rate": 2.4276273525196415e-06, + "loss": 1.3476, + "step": 129774 + }, + { + "epoch": 1.56, + "grad_norm": 15.693219928696735, + "learning_rate": 2.4272458154571586e-06, + "loss": 1.1443, + "step": 129777 + }, + { + "epoch": 1.56, + "grad_norm": 7.764984527590465, + "learning_rate": 2.426864304237756e-06, + "loss": 0.9764, + "step": 129780 + }, + { + "epoch": 1.56, + "grad_norm": 3.8092382666932756, + "learning_rate": 2.426482818862741e-06, + "loss": 1.2777, + "step": 129783 + }, + { + "epoch": 1.56, + "grad_norm": 10.908809028647479, + "learning_rate": 2.426101359333408e-06, + "loss": 1.2102, + "step": 129786 + }, + { + "epoch": 1.56, + "grad_norm": 19.281249339495186, + "learning_rate": 2.4257199256510635e-06, + "loss": 1.0706, + "step": 129789 + }, + { + "epoch": 1.56, + "grad_norm": 4.595362974992167, + "learning_rate": 2.4253385178170088e-06, + "loss": 1.1742, + "step": 129792 + }, + { + "epoch": 1.56, + "grad_norm": 7.2076087556728865, + "learning_rate": 2.4249571358325475e-06, + "loss": 1.346, + "step": 129795 + }, + { + "epoch": 1.56, + "grad_norm": 8.657383158936229, + "learning_rate": 2.4245757796989766e-06, + "loss": 1.0024, + "step": 129798 + }, + { + "epoch": 1.56, + "grad_norm": 7.114485073868588, + "learning_rate": 2.424194449417603e-06, + "loss": 1.195, + "step": 129801 + }, + { + "epoch": 1.56, + "grad_norm": 10.856570386085169, + "learning_rate": 2.4238131449897227e-06, + "loss": 0.9017, + "step": 129804 + }, + { + "epoch": 1.56, + "grad_norm": 3.014197211013554, + "learning_rate": 2.4234318664166412e-06, + "loss": 1.1589, + "step": 129807 + }, + { + "epoch": 1.56, + "grad_norm": 43.57195771627516, + "learning_rate": 2.4230506136996548e-06, + "loss": 1.6012, + "step": 129810 + }, + { + "epoch": 1.56, + "grad_norm": 10.255715579129141, + "learning_rate": 2.4226693868400676e-06, + "loss": 1.3064, + "step": 129813 + }, + { + "epoch": 1.56, + "grad_norm": 4.354220675413332, + "learning_rate": 2.42228818583918e-06, + "loss": 1.7309, + "step": 129816 + }, + { + "epoch": 1.56, + "grad_norm": 6.747255573715302, + "learning_rate": 2.4219070106982968e-06, + "loss": 1.1925, + "step": 129819 + }, + { + "epoch": 1.56, + "grad_norm": 12.237486456137983, + "learning_rate": 2.4215258614187122e-06, + "loss": 1.2679, + "step": 129822 + }, + { + "epoch": 1.56, + "grad_norm": 2.5243970373606044, + "learning_rate": 2.4211447380017337e-06, + "loss": 0.8761, + "step": 129825 + }, + { + "epoch": 1.56, + "grad_norm": 6.659006135443897, + "learning_rate": 2.4207636404486547e-06, + "loss": 1.2537, + "step": 129828 + }, + { + "epoch": 1.56, + "grad_norm": 8.796130892157002, + "learning_rate": 2.4203825687607796e-06, + "loss": 1.1305, + "step": 129831 + }, + { + "epoch": 1.56, + "grad_norm": 10.645406655527683, + "learning_rate": 2.4200015229394116e-06, + "loss": 0.9206, + "step": 129834 + }, + { + "epoch": 1.56, + "grad_norm": 5.130994193876138, + "learning_rate": 2.4196205029858444e-06, + "loss": 1.0204, + "step": 129837 + }, + { + "epoch": 1.56, + "grad_norm": 5.874248219079666, + "learning_rate": 2.419239508901383e-06, + "loss": 1.0293, + "step": 129840 + }, + { + "epoch": 1.56, + "grad_norm": 20.775770010453343, + "learning_rate": 2.418858540687329e-06, + "loss": 1.1384, + "step": 129843 + }, + { + "epoch": 1.56, + "grad_norm": 7.236874726131363, + "learning_rate": 2.418477598344977e-06, + "loss": 1.1504, + "step": 129846 + }, + { + "epoch": 1.56, + "grad_norm": 12.40805948486663, + "learning_rate": 2.4180966818756333e-06, + "loss": 1.1447, + "step": 129849 + }, + { + "epoch": 1.56, + "grad_norm": 9.514166815872704, + "learning_rate": 2.4177157912805915e-06, + "loss": 0.8734, + "step": 129852 + }, + { + "epoch": 1.56, + "grad_norm": 15.430146177792976, + "learning_rate": 2.4173349265611547e-06, + "loss": 1.3501, + "step": 129855 + }, + { + "epoch": 1.56, + "grad_norm": 10.298330069975949, + "learning_rate": 2.4169540877186226e-06, + "loss": 1.3825, + "step": 129858 + }, + { + "epoch": 1.56, + "grad_norm": 6.549234320959972, + "learning_rate": 2.416573274754298e-06, + "loss": 1.0858, + "step": 129861 + }, + { + "epoch": 1.56, + "grad_norm": 11.969685169336374, + "learning_rate": 2.4161924876694743e-06, + "loss": 1.5, + "step": 129864 + }, + { + "epoch": 1.56, + "grad_norm": 9.211898846131705, + "learning_rate": 2.4158117264654567e-06, + "loss": 1.1557, + "step": 129867 + }, + { + "epoch": 1.56, + "grad_norm": 18.980316109443894, + "learning_rate": 2.4154309911435383e-06, + "loss": 1.5065, + "step": 129870 + }, + { + "epoch": 1.56, + "grad_norm": 7.238949212437561, + "learning_rate": 2.4150502817050227e-06, + "loss": 1.2264, + "step": 129873 + }, + { + "epoch": 1.56, + "grad_norm": 2.694916005611616, + "learning_rate": 2.4146695981512115e-06, + "loss": 1.3054, + "step": 129876 + }, + { + "epoch": 1.56, + "grad_norm": 5.870056453209397, + "learning_rate": 2.414288940483397e-06, + "loss": 0.8973, + "step": 129879 + }, + { + "epoch": 1.56, + "grad_norm": 10.640123046734999, + "learning_rate": 2.4139083087028827e-06, + "loss": 1.0025, + "step": 129882 + }, + { + "epoch": 1.56, + "grad_norm": 3.183826406537649, + "learning_rate": 2.4135277028109694e-06, + "loss": 1.3182, + "step": 129885 + }, + { + "epoch": 1.56, + "grad_norm": 5.4609484200618335, + "learning_rate": 2.4131471228089544e-06, + "loss": 1.373, + "step": 129888 + }, + { + "epoch": 1.56, + "grad_norm": 8.4817942000703, + "learning_rate": 2.4127665686981316e-06, + "loss": 1.1368, + "step": 129891 + }, + { + "epoch": 1.56, + "grad_norm": 3.9831152665623994, + "learning_rate": 2.4123860404798037e-06, + "loss": 1.283, + "step": 129894 + }, + { + "epoch": 1.56, + "grad_norm": 3.713328968124083, + "learning_rate": 2.4120055381552687e-06, + "loss": 0.841, + "step": 129897 + }, + { + "epoch": 1.56, + "grad_norm": 13.926209904702782, + "learning_rate": 2.4116250617258264e-06, + "loss": 1.2739, + "step": 129900 + }, + { + "epoch": 1.56, + "grad_norm": 2.3520699961901945, + "learning_rate": 2.4112446111927767e-06, + "loss": 1.2395, + "step": 129903 + }, + { + "epoch": 1.56, + "grad_norm": 3.1352580059559276, + "learning_rate": 2.4108641865574133e-06, + "loss": 1.2021, + "step": 129906 + }, + { + "epoch": 1.56, + "grad_norm": 8.578562750455491, + "learning_rate": 2.41048378782104e-06, + "loss": 1.5871, + "step": 129909 + }, + { + "epoch": 1.56, + "grad_norm": 4.127749913707638, + "learning_rate": 2.410103414984949e-06, + "loss": 0.9745, + "step": 129912 + }, + { + "epoch": 1.56, + "grad_norm": 4.720378793995802, + "learning_rate": 2.4097230680504436e-06, + "loss": 1.2054, + "step": 129915 + }, + { + "epoch": 1.56, + "grad_norm": 13.721871209614095, + "learning_rate": 2.4093427470188157e-06, + "loss": 1.3151, + "step": 129918 + }, + { + "epoch": 1.56, + "grad_norm": 4.047325944928842, + "learning_rate": 2.408962451891368e-06, + "loss": 1.1055, + "step": 129921 + }, + { + "epoch": 1.56, + "grad_norm": 4.565594819769639, + "learning_rate": 2.4085821826693967e-06, + "loss": 1.2861, + "step": 129924 + }, + { + "epoch": 1.56, + "grad_norm": 8.495294700837226, + "learning_rate": 2.4082019393542034e-06, + "loss": 1.1821, + "step": 129927 + }, + { + "epoch": 1.56, + "grad_norm": 5.251125074016888, + "learning_rate": 2.407821721947081e-06, + "loss": 1.4516, + "step": 129930 + }, + { + "epoch": 1.56, + "grad_norm": 8.795086479434017, + "learning_rate": 2.407441530449327e-06, + "loss": 0.9941, + "step": 129933 + }, + { + "epoch": 1.56, + "grad_norm": 5.919979328226943, + "learning_rate": 2.40706136486224e-06, + "loss": 1.2716, + "step": 129936 + }, + { + "epoch": 1.56, + "grad_norm": 10.93128734163112, + "learning_rate": 2.4066812251871174e-06, + "loss": 1.0496, + "step": 129939 + }, + { + "epoch": 1.56, + "grad_norm": 6.671598827297166, + "learning_rate": 2.406301111425259e-06, + "loss": 1.2207, + "step": 129942 + }, + { + "epoch": 1.56, + "grad_norm": 9.870706835794827, + "learning_rate": 2.4059210235779575e-06, + "loss": 1.392, + "step": 129945 + }, + { + "epoch": 1.56, + "grad_norm": 11.811214541351973, + "learning_rate": 2.405540961646512e-06, + "loss": 1.2035, + "step": 129948 + }, + { + "epoch": 1.56, + "grad_norm": 3.929678765525049, + "learning_rate": 2.405160925632222e-06, + "loss": 1.2265, + "step": 129951 + }, + { + "epoch": 1.56, + "grad_norm": 21.16336183497222, + "learning_rate": 2.40478091553638e-06, + "loss": 1.2515, + "step": 129954 + }, + { + "epoch": 1.56, + "grad_norm": 9.39062050037588, + "learning_rate": 2.4044009313602866e-06, + "loss": 1.3643, + "step": 129957 + }, + { + "epoch": 1.56, + "grad_norm": 5.769025163571126, + "learning_rate": 2.4040209731052357e-06, + "loss": 1.0484, + "step": 129960 + }, + { + "epoch": 1.56, + "grad_norm": 3.105269601418131, + "learning_rate": 2.403641040772524e-06, + "loss": 1.0809, + "step": 129963 + }, + { + "epoch": 1.56, + "grad_norm": 11.766212810703253, + "learning_rate": 2.4032611343634493e-06, + "loss": 1.4459, + "step": 129966 + }, + { + "epoch": 1.56, + "grad_norm": 16.261999288954552, + "learning_rate": 2.4028812538793112e-06, + "loss": 0.9602, + "step": 129969 + }, + { + "epoch": 1.56, + "grad_norm": 15.947429744172274, + "learning_rate": 2.4025013993214032e-06, + "loss": 1.0134, + "step": 129972 + }, + { + "epoch": 1.56, + "grad_norm": 8.294616318173262, + "learning_rate": 2.4021215706910183e-06, + "loss": 1.2289, + "step": 129975 + }, + { + "epoch": 1.56, + "grad_norm": 8.428469686816749, + "learning_rate": 2.401741767989455e-06, + "loss": 1.304, + "step": 129978 + }, + { + "epoch": 1.56, + "grad_norm": 7.891657103093278, + "learning_rate": 2.401361991218011e-06, + "loss": 1.2311, + "step": 129981 + }, + { + "epoch": 1.56, + "grad_norm": 8.795455243183982, + "learning_rate": 2.400982240377985e-06, + "loss": 0.9744, + "step": 129984 + }, + { + "epoch": 1.56, + "grad_norm": 3.929560279407683, + "learning_rate": 2.4006025154706647e-06, + "loss": 1.1707, + "step": 129987 + }, + { + "epoch": 1.56, + "grad_norm": 4.779285417692911, + "learning_rate": 2.4002228164973516e-06, + "loss": 1.1614, + "step": 129990 + }, + { + "epoch": 1.56, + "grad_norm": 10.074927292868521, + "learning_rate": 2.399843143459344e-06, + "loss": 1.1113, + "step": 129993 + }, + { + "epoch": 1.56, + "grad_norm": 5.5356742462751685, + "learning_rate": 2.3994634963579343e-06, + "loss": 1.1144, + "step": 129996 + }, + { + "epoch": 1.56, + "grad_norm": 16.03702321469427, + "learning_rate": 2.3990838751944145e-06, + "loss": 1.0745, + "step": 129999 + }, + { + "epoch": 1.56, + "grad_norm": 4.884008280210114, + "learning_rate": 2.398704279970083e-06, + "loss": 1.078, + "step": 130002 + }, + { + "epoch": 1.56, + "grad_norm": 6.056499304403608, + "learning_rate": 2.3983247106862373e-06, + "loss": 1.113, + "step": 130005 + }, + { + "epoch": 1.56, + "grad_norm": 9.008773412729678, + "learning_rate": 2.3979451673441702e-06, + "loss": 1.3529, + "step": 130008 + }, + { + "epoch": 1.56, + "grad_norm": 13.256866145363054, + "learning_rate": 2.3975656499451816e-06, + "loss": 1.0518, + "step": 130011 + }, + { + "epoch": 1.56, + "grad_norm": 6.423576644666147, + "learning_rate": 2.397186158490562e-06, + "loss": 1.2963, + "step": 130014 + }, + { + "epoch": 1.56, + "grad_norm": 19.15607120932296, + "learning_rate": 2.396806692981606e-06, + "loss": 1.1806, + "step": 130017 + }, + { + "epoch": 1.56, + "grad_norm": 9.250285899629153, + "learning_rate": 2.396427253419609e-06, + "loss": 1.1431, + "step": 130020 + }, + { + "epoch": 1.56, + "grad_norm": 5.34048733363818, + "learning_rate": 2.3960478398058697e-06, + "loss": 1.2434, + "step": 130023 + }, + { + "epoch": 1.56, + "grad_norm": 2.799681724561057, + "learning_rate": 2.3956684521416782e-06, + "loss": 0.9821, + "step": 130026 + }, + { + "epoch": 1.56, + "grad_norm": 5.06556175218168, + "learning_rate": 2.395289090428331e-06, + "loss": 1.2185, + "step": 130029 + }, + { + "epoch": 1.56, + "grad_norm": 8.420751444092168, + "learning_rate": 2.394909754667123e-06, + "loss": 1.1702, + "step": 130032 + }, + { + "epoch": 1.56, + "grad_norm": 9.835388116646845, + "learning_rate": 2.394530444859351e-06, + "loss": 1.3809, + "step": 130035 + }, + { + "epoch": 1.56, + "grad_norm": 4.253266816547001, + "learning_rate": 2.3941511610063075e-06, + "loss": 0.954, + "step": 130038 + }, + { + "epoch": 1.56, + "grad_norm": 6.516508406342661, + "learning_rate": 2.393771903109283e-06, + "loss": 1.3045, + "step": 130041 + }, + { + "epoch": 1.56, + "grad_norm": 4.462759404978927, + "learning_rate": 2.3933926711695764e-06, + "loss": 0.908, + "step": 130044 + }, + { + "epoch": 1.56, + "grad_norm": 8.717294186143237, + "learning_rate": 2.3930134651884795e-06, + "loss": 1.3713, + "step": 130047 + }, + { + "epoch": 1.56, + "grad_norm": 7.899577970359049, + "learning_rate": 2.392634285167291e-06, + "loss": 1.0908, + "step": 130050 + }, + { + "epoch": 1.56, + "grad_norm": 6.281906858200176, + "learning_rate": 2.3922551311072995e-06, + "loss": 1.0479, + "step": 130053 + }, + { + "epoch": 1.56, + "grad_norm": 14.024000448515931, + "learning_rate": 2.391876003009802e-06, + "loss": 0.9407, + "step": 130056 + }, + { + "epoch": 1.56, + "grad_norm": 4.255846101777375, + "learning_rate": 2.39149690087609e-06, + "loss": 1.6555, + "step": 130059 + }, + { + "epoch": 1.56, + "grad_norm": 11.831298799564532, + "learning_rate": 2.3911178247074586e-06, + "loss": 1.046, + "step": 130062 + }, + { + "epoch": 1.56, + "grad_norm": 5.909847869855878, + "learning_rate": 2.3907387745052023e-06, + "loss": 1.2967, + "step": 130065 + }, + { + "epoch": 1.56, + "grad_norm": 9.727002488520096, + "learning_rate": 2.3903597502706134e-06, + "loss": 0.9941, + "step": 130068 + }, + { + "epoch": 1.56, + "grad_norm": 7.789775287458804, + "learning_rate": 2.3899807520049836e-06, + "loss": 1.037, + "step": 130071 + }, + { + "epoch": 1.56, + "grad_norm": 19.32998000912213, + "learning_rate": 2.3896017797096092e-06, + "loss": 1.4284, + "step": 130074 + }, + { + "epoch": 1.56, + "grad_norm": 5.557520418007665, + "learning_rate": 2.389222833385786e-06, + "loss": 1.3546, + "step": 130077 + }, + { + "epoch": 1.56, + "grad_norm": 11.733453267209766, + "learning_rate": 2.3888439130348027e-06, + "loss": 1.1204, + "step": 130080 + }, + { + "epoch": 1.56, + "grad_norm": 9.460425079930946, + "learning_rate": 2.3884650186579515e-06, + "loss": 1.6338, + "step": 130083 + }, + { + "epoch": 1.56, + "grad_norm": 14.859178998118665, + "learning_rate": 2.3880861502565277e-06, + "loss": 0.9845, + "step": 130086 + }, + { + "epoch": 1.56, + "grad_norm": 10.908746098009772, + "learning_rate": 2.3877073078318236e-06, + "loss": 0.6281, + "step": 130089 + }, + { + "epoch": 1.56, + "grad_norm": 4.155016410545577, + "learning_rate": 2.387328491385136e-06, + "loss": 1.2834, + "step": 130092 + }, + { + "epoch": 1.56, + "grad_norm": 6.29850127714837, + "learning_rate": 2.386949700917751e-06, + "loss": 1.1758, + "step": 130095 + }, + { + "epoch": 1.56, + "grad_norm": 18.728528012070164, + "learning_rate": 2.3865709364309676e-06, + "loss": 1.1081, + "step": 130098 + }, + { + "epoch": 1.56, + "grad_norm": 10.993596724127784, + "learning_rate": 2.386192197926072e-06, + "loss": 0.9144, + "step": 130101 + }, + { + "epoch": 1.56, + "grad_norm": 7.422951804336342, + "learning_rate": 2.3858134854043623e-06, + "loss": 1.1761, + "step": 130104 + }, + { + "epoch": 1.56, + "grad_norm": 14.620753869873841, + "learning_rate": 2.3854347988671266e-06, + "loss": 1.2363, + "step": 130107 + }, + { + "epoch": 1.56, + "grad_norm": 7.541983498934667, + "learning_rate": 2.385056138315659e-06, + "loss": 1.1196, + "step": 130110 + }, + { + "epoch": 1.56, + "grad_norm": 19.268176757481857, + "learning_rate": 2.3846775037512513e-06, + "loss": 1.0844, + "step": 130113 + }, + { + "epoch": 1.56, + "grad_norm": 16.720086362691333, + "learning_rate": 2.3842988951751966e-06, + "loss": 0.7366, + "step": 130116 + }, + { + "epoch": 1.56, + "grad_norm": 3.4575431316891367, + "learning_rate": 2.38392031258879e-06, + "loss": 1.2604, + "step": 130119 + }, + { + "epoch": 1.56, + "grad_norm": 4.257899027684432, + "learning_rate": 2.3835417559933194e-06, + "loss": 1.6351, + "step": 130122 + }, + { + "epoch": 1.56, + "grad_norm": 3.2436699789504306, + "learning_rate": 2.383163225390075e-06, + "loss": 1.0338, + "step": 130125 + }, + { + "epoch": 1.56, + "grad_norm": 4.500459907525996, + "learning_rate": 2.382784720780351e-06, + "loss": 0.9565, + "step": 130128 + }, + { + "epoch": 1.56, + "grad_norm": 6.9649971025883035, + "learning_rate": 2.3824062421654415e-06, + "loss": 1.4345, + "step": 130131 + }, + { + "epoch": 1.56, + "grad_norm": 2.975892921500738, + "learning_rate": 2.3820277895466324e-06, + "loss": 1.2322, + "step": 130134 + }, + { + "epoch": 1.56, + "grad_norm": 10.308920784769606, + "learning_rate": 2.381649362925219e-06, + "loss": 1.2875, + "step": 130137 + }, + { + "epoch": 1.56, + "grad_norm": 13.314477432351586, + "learning_rate": 2.3812709623024923e-06, + "loss": 0.9806, + "step": 130140 + }, + { + "epoch": 1.56, + "grad_norm": 5.4739211873451, + "learning_rate": 2.3808925876797463e-06, + "loss": 1.7262, + "step": 130143 + }, + { + "epoch": 1.56, + "grad_norm": 7.818194776498279, + "learning_rate": 2.3805142390582704e-06, + "loss": 1.2241, + "step": 130146 + }, + { + "epoch": 1.57, + "grad_norm": 6.11717745494895, + "learning_rate": 2.38013591643935e-06, + "loss": 1.055, + "step": 130149 + }, + { + "epoch": 1.57, + "grad_norm": 3.281554286980917, + "learning_rate": 2.3797576198242822e-06, + "loss": 1.3721, + "step": 130152 + }, + { + "epoch": 1.57, + "grad_norm": 5.7392883425040395, + "learning_rate": 2.379379349214357e-06, + "loss": 1.4836, + "step": 130155 + }, + { + "epoch": 1.57, + "grad_norm": 6.3975553608052245, + "learning_rate": 2.3790011046108685e-06, + "loss": 0.9977, + "step": 130158 + }, + { + "epoch": 1.57, + "grad_norm": 5.699671912599384, + "learning_rate": 2.3786228860151007e-06, + "loss": 1.3501, + "step": 130161 + }, + { + "epoch": 1.57, + "grad_norm": 14.794258717458089, + "learning_rate": 2.37824469342835e-06, + "loss": 1.4213, + "step": 130164 + }, + { + "epoch": 1.57, + "grad_norm": 23.0508107002813, + "learning_rate": 2.377866526851903e-06, + "loss": 1.174, + "step": 130167 + }, + { + "epoch": 1.57, + "grad_norm": 80.97997231278711, + "learning_rate": 2.377488386287051e-06, + "loss": 1.0442, + "step": 130170 + }, + { + "epoch": 1.57, + "grad_norm": 13.530054014544799, + "learning_rate": 2.3771102717350892e-06, + "loss": 1.2109, + "step": 130173 + }, + { + "epoch": 1.57, + "grad_norm": 30.130638649520236, + "learning_rate": 2.376732183197301e-06, + "loss": 0.7094, + "step": 130176 + }, + { + "epoch": 1.57, + "grad_norm": 4.250843499365804, + "learning_rate": 2.3763541206749796e-06, + "loss": 1.5614, + "step": 130179 + }, + { + "epoch": 1.57, + "grad_norm": 2.784852681932783, + "learning_rate": 2.375976084169417e-06, + "loss": 1.0145, + "step": 130182 + }, + { + "epoch": 1.57, + "grad_norm": 5.924013775581904, + "learning_rate": 2.375598073681904e-06, + "loss": 1.2006, + "step": 130185 + }, + { + "epoch": 1.57, + "grad_norm": 9.921209179219668, + "learning_rate": 2.3752200892137277e-06, + "loss": 1.1495, + "step": 130188 + }, + { + "epoch": 1.57, + "grad_norm": 6.596265452624734, + "learning_rate": 2.3748421307661763e-06, + "loss": 1.2498, + "step": 130191 + }, + { + "epoch": 1.57, + "grad_norm": 9.400941574170451, + "learning_rate": 2.3744641983405426e-06, + "loss": 1.1204, + "step": 130194 + }, + { + "epoch": 1.57, + "grad_norm": 5.939914102162877, + "learning_rate": 2.374086291938116e-06, + "loss": 0.7949, + "step": 130197 + }, + { + "epoch": 1.57, + "grad_norm": 4.009772204492149, + "learning_rate": 2.373708411560188e-06, + "loss": 1.5253, + "step": 130200 + }, + { + "epoch": 1.57, + "grad_norm": 7.496077135672148, + "learning_rate": 2.3733305572080444e-06, + "loss": 0.9061, + "step": 130203 + }, + { + "epoch": 1.57, + "grad_norm": 10.238870801057324, + "learning_rate": 2.372952728882979e-06, + "loss": 1.5603, + "step": 130206 + }, + { + "epoch": 1.57, + "grad_norm": 3.55178722567338, + "learning_rate": 2.3725749265862754e-06, + "loss": 1.2658, + "step": 130209 + }, + { + "epoch": 1.57, + "grad_norm": 6.794076240978465, + "learning_rate": 2.3721971503192297e-06, + "loss": 1.1157, + "step": 130212 + }, + { + "epoch": 1.57, + "grad_norm": 10.874317552168735, + "learning_rate": 2.371819400083124e-06, + "loss": 1.1562, + "step": 130215 + }, + { + "epoch": 1.57, + "grad_norm": 3.3553701475354623, + "learning_rate": 2.3714416758792515e-06, + "loss": 1.2076, + "step": 130218 + }, + { + "epoch": 1.57, + "grad_norm": 9.363497082330152, + "learning_rate": 2.3710639777089006e-06, + "loss": 0.9381, + "step": 130221 + }, + { + "epoch": 1.57, + "grad_norm": 18.40882258426103, + "learning_rate": 2.370686305573361e-06, + "loss": 1.0186, + "step": 130224 + }, + { + "epoch": 1.57, + "grad_norm": 2.830709346616814, + "learning_rate": 2.370308659473923e-06, + "loss": 1.3049, + "step": 130227 + }, + { + "epoch": 1.57, + "grad_norm": 7.78793762978752, + "learning_rate": 2.3699310394118736e-06, + "loss": 1.0203, + "step": 130230 + }, + { + "epoch": 1.57, + "grad_norm": 15.150157786001568, + "learning_rate": 2.369553445388498e-06, + "loss": 1.4485, + "step": 130233 + }, + { + "epoch": 1.57, + "grad_norm": 10.375172685022003, + "learning_rate": 2.3691758774050886e-06, + "loss": 0.9385, + "step": 130236 + }, + { + "epoch": 1.57, + "grad_norm": 11.810451400683169, + "learning_rate": 2.368798335462936e-06, + "loss": 1.2568, + "step": 130239 + }, + { + "epoch": 1.57, + "grad_norm": 14.676416278653281, + "learning_rate": 2.3684208195633242e-06, + "loss": 1.0225, + "step": 130242 + }, + { + "epoch": 1.57, + "grad_norm": 25.120863106159153, + "learning_rate": 2.3680433297075423e-06, + "loss": 1.1246, + "step": 130245 + }, + { + "epoch": 1.57, + "grad_norm": 14.985185655097995, + "learning_rate": 2.367665865896882e-06, + "loss": 1.0075, + "step": 130248 + }, + { + "epoch": 1.57, + "grad_norm": 6.528373755419505, + "learning_rate": 2.367288428132626e-06, + "loss": 1.1915, + "step": 130251 + }, + { + "epoch": 1.57, + "grad_norm": 13.406422693316783, + "learning_rate": 2.3669110164160692e-06, + "loss": 1.138, + "step": 130254 + }, + { + "epoch": 1.57, + "grad_norm": 7.441209898092595, + "learning_rate": 2.366533630748492e-06, + "loss": 0.9788, + "step": 130257 + }, + { + "epoch": 1.57, + "grad_norm": 6.13709170806449, + "learning_rate": 2.3661562711311857e-06, + "loss": 1.1172, + "step": 130260 + }, + { + "epoch": 1.57, + "grad_norm": 6.398760826456472, + "learning_rate": 2.365778937565438e-06, + "loss": 1.4257, + "step": 130263 + }, + { + "epoch": 1.57, + "grad_norm": 16.520201746446915, + "learning_rate": 2.3654016300525406e-06, + "loss": 0.9381, + "step": 130266 + }, + { + "epoch": 1.57, + "grad_norm": 6.237371847181262, + "learning_rate": 2.3650243485937752e-06, + "loss": 1.3313, + "step": 130269 + }, + { + "epoch": 1.57, + "grad_norm": 10.385142242051467, + "learning_rate": 2.364647093190434e-06, + "loss": 1.2433, + "step": 130272 + }, + { + "epoch": 1.57, + "grad_norm": 6.3651441258910735, + "learning_rate": 2.364269863843799e-06, + "loss": 1.105, + "step": 130275 + }, + { + "epoch": 1.57, + "grad_norm": 43.524238215849685, + "learning_rate": 2.3638926605551606e-06, + "loss": 1.1364, + "step": 130278 + }, + { + "epoch": 1.57, + "grad_norm": 3.12188628584567, + "learning_rate": 2.363515483325809e-06, + "loss": 1.3239, + "step": 130281 + }, + { + "epoch": 1.57, + "grad_norm": 10.752992872048436, + "learning_rate": 2.3631383321570247e-06, + "loss": 0.9332, + "step": 130284 + }, + { + "epoch": 1.57, + "grad_norm": 6.558982822791525, + "learning_rate": 2.3627612070500995e-06, + "loss": 1.1883, + "step": 130287 + }, + { + "epoch": 1.57, + "grad_norm": 3.0599376019782225, + "learning_rate": 2.3623841080063214e-06, + "loss": 1.5595, + "step": 130290 + }, + { + "epoch": 1.57, + "grad_norm": 10.669363707225228, + "learning_rate": 2.3620070350269765e-06, + "loss": 1.2766, + "step": 130293 + }, + { + "epoch": 1.57, + "grad_norm": 11.435817496968683, + "learning_rate": 2.3616299881133465e-06, + "loss": 1.1731, + "step": 130296 + }, + { + "epoch": 1.57, + "grad_norm": 24.12084783699379, + "learning_rate": 2.361252967266723e-06, + "loss": 1.2603, + "step": 130299 + }, + { + "epoch": 1.57, + "grad_norm": 11.874979448430635, + "learning_rate": 2.36087597248839e-06, + "loss": 1.1206, + "step": 130302 + }, + { + "epoch": 1.57, + "grad_norm": 9.28936670466834, + "learning_rate": 2.360499003779637e-06, + "loss": 1.2377, + "step": 130305 + }, + { + "epoch": 1.57, + "grad_norm": 19.961388786284136, + "learning_rate": 2.3601220611417518e-06, + "loss": 0.7371, + "step": 130308 + }, + { + "epoch": 1.57, + "grad_norm": 6.770259617767532, + "learning_rate": 2.3597451445760158e-06, + "loss": 1.0509, + "step": 130311 + }, + { + "epoch": 1.57, + "grad_norm": 2.734736325728452, + "learning_rate": 2.359368254083719e-06, + "loss": 1.2434, + "step": 130314 + }, + { + "epoch": 1.57, + "grad_norm": 8.210602556300714, + "learning_rate": 2.358991389666144e-06, + "loss": 0.9654, + "step": 130317 + }, + { + "epoch": 1.57, + "grad_norm": 5.388641370554944, + "learning_rate": 2.3586145513245827e-06, + "loss": 1.1603, + "step": 130320 + }, + { + "epoch": 1.57, + "grad_norm": 14.87426294394471, + "learning_rate": 2.3582377390603138e-06, + "loss": 1.2784, + "step": 130323 + }, + { + "epoch": 1.57, + "grad_norm": 8.233227598286534, + "learning_rate": 2.3578609528746277e-06, + "loss": 1.5319, + "step": 130326 + }, + { + "epoch": 1.57, + "grad_norm": 14.30490611555741, + "learning_rate": 2.3574841927688096e-06, + "loss": 1.1185, + "step": 130329 + }, + { + "epoch": 1.57, + "grad_norm": 11.26693427463612, + "learning_rate": 2.357107458744147e-06, + "loss": 0.7175, + "step": 130332 + }, + { + "epoch": 1.57, + "grad_norm": 9.408377562408578, + "learning_rate": 2.3567307508019244e-06, + "loss": 1.1317, + "step": 130335 + }, + { + "epoch": 1.57, + "grad_norm": 3.0622266946260126, + "learning_rate": 2.356354068943424e-06, + "loss": 1.3536, + "step": 130338 + }, + { + "epoch": 1.57, + "grad_norm": 8.471629682346114, + "learning_rate": 2.355977413169933e-06, + "loss": 1.0494, + "step": 130341 + }, + { + "epoch": 1.57, + "grad_norm": 10.69251092514436, + "learning_rate": 2.3556007834827376e-06, + "loss": 1.226, + "step": 130344 + }, + { + "epoch": 1.57, + "grad_norm": 17.47672604122555, + "learning_rate": 2.3552241798831276e-06, + "loss": 1.3749, + "step": 130347 + }, + { + "epoch": 1.57, + "grad_norm": 11.617197210182919, + "learning_rate": 2.35484760237238e-06, + "loss": 1.2186, + "step": 130350 + }, + { + "epoch": 1.57, + "grad_norm": 9.358858560860854, + "learning_rate": 2.3544710509517833e-06, + "loss": 1.0018, + "step": 130353 + }, + { + "epoch": 1.57, + "grad_norm": 4.576791619483039, + "learning_rate": 2.354094525622627e-06, + "loss": 1.0266, + "step": 130356 + }, + { + "epoch": 1.57, + "grad_norm": 16.27773523364328, + "learning_rate": 2.353718026386188e-06, + "loss": 1.4385, + "step": 130359 + }, + { + "epoch": 1.57, + "grad_norm": 8.204103270922706, + "learning_rate": 2.3533415532437586e-06, + "loss": 1.3188, + "step": 130362 + }, + { + "epoch": 1.57, + "grad_norm": 5.478485876560352, + "learning_rate": 2.3529651061966175e-06, + "loss": 1.1263, + "step": 130365 + }, + { + "epoch": 1.57, + "grad_norm": 11.901925378024403, + "learning_rate": 2.352588685246052e-06, + "loss": 0.8868, + "step": 130368 + }, + { + "epoch": 1.57, + "grad_norm": 5.339758112794817, + "learning_rate": 2.3522122903933464e-06, + "loss": 1.2656, + "step": 130371 + }, + { + "epoch": 1.57, + "grad_norm": 3.274963610370036, + "learning_rate": 2.3518359216397892e-06, + "loss": 0.7997, + "step": 130374 + }, + { + "epoch": 1.57, + "grad_norm": 4.2501902100651225, + "learning_rate": 2.351459578986658e-06, + "loss": 1.2695, + "step": 130377 + }, + { + "epoch": 1.57, + "grad_norm": 6.662866371750222, + "learning_rate": 2.351083262435242e-06, + "loss": 1.1481, + "step": 130380 + }, + { + "epoch": 1.57, + "grad_norm": 5.970136639380673, + "learning_rate": 2.3507069719868215e-06, + "loss": 1.2215, + "step": 130383 + }, + { + "epoch": 1.57, + "grad_norm": 7.934369722405701, + "learning_rate": 2.3503307076426827e-06, + "loss": 1.22, + "step": 130386 + }, + { + "epoch": 1.57, + "grad_norm": 6.985778147052223, + "learning_rate": 2.3499544694041133e-06, + "loss": 1.1082, + "step": 130389 + }, + { + "epoch": 1.57, + "grad_norm": 14.733186274938314, + "learning_rate": 2.349578257272391e-06, + "loss": 1.3472, + "step": 130392 + }, + { + "epoch": 1.57, + "grad_norm": 4.986264851990157, + "learning_rate": 2.349202071248802e-06, + "loss": 1.3897, + "step": 130395 + }, + { + "epoch": 1.57, + "grad_norm": 8.452645078942629, + "learning_rate": 2.3488259113346334e-06, + "loss": 1.2224, + "step": 130398 + }, + { + "epoch": 1.57, + "grad_norm": 3.651520676051515, + "learning_rate": 2.3484497775311653e-06, + "loss": 1.3486, + "step": 130401 + }, + { + "epoch": 1.57, + "grad_norm": 3.7447535949034743, + "learning_rate": 2.3480736698396812e-06, + "loss": 1.3046, + "step": 130404 + }, + { + "epoch": 1.57, + "grad_norm": 5.562289408665197, + "learning_rate": 2.347697588261464e-06, + "loss": 1.2236, + "step": 130407 + }, + { + "epoch": 1.57, + "grad_norm": 15.58116931373789, + "learning_rate": 2.3473215327977984e-06, + "loss": 1.2824, + "step": 130410 + }, + { + "epoch": 1.57, + "grad_norm": 9.174352823883098, + "learning_rate": 2.3469455034499675e-06, + "loss": 1.3317, + "step": 130413 + }, + { + "epoch": 1.57, + "grad_norm": 5.637849441905476, + "learning_rate": 2.346569500219259e-06, + "loss": 1.0399, + "step": 130416 + }, + { + "epoch": 1.57, + "grad_norm": 30.663635429099305, + "learning_rate": 2.346193523106949e-06, + "loss": 1.3435, + "step": 130419 + }, + { + "epoch": 1.57, + "grad_norm": 7.103755314048524, + "learning_rate": 2.345817572114326e-06, + "loss": 1.3127, + "step": 130422 + }, + { + "epoch": 1.57, + "grad_norm": 9.045435260240088, + "learning_rate": 2.3454416472426677e-06, + "loss": 0.9564, + "step": 130425 + }, + { + "epoch": 1.57, + "grad_norm": 11.079921179629936, + "learning_rate": 2.3450657484932638e-06, + "loss": 1.3546, + "step": 130428 + }, + { + "epoch": 1.57, + "grad_norm": 8.23791924053102, + "learning_rate": 2.3446898758673897e-06, + "loss": 0.9966, + "step": 130431 + }, + { + "epoch": 1.57, + "grad_norm": 3.6077295648712315, + "learning_rate": 2.3443140293663313e-06, + "loss": 1.0882, + "step": 130434 + }, + { + "epoch": 1.57, + "grad_norm": 12.37178632404583, + "learning_rate": 2.343938208991372e-06, + "loss": 1.1679, + "step": 130437 + }, + { + "epoch": 1.57, + "grad_norm": 5.79295262920604, + "learning_rate": 2.3435624147437963e-06, + "loss": 1.337, + "step": 130440 + }, + { + "epoch": 1.57, + "grad_norm": 13.809445150596995, + "learning_rate": 2.343186646624884e-06, + "loss": 1.1902, + "step": 130443 + }, + { + "epoch": 1.57, + "grad_norm": 7.881422827726973, + "learning_rate": 2.342810904635916e-06, + "loss": 1.0888, + "step": 130446 + }, + { + "epoch": 1.57, + "grad_norm": 17.70374522461208, + "learning_rate": 2.342435188778175e-06, + "loss": 1.5172, + "step": 130449 + }, + { + "epoch": 1.57, + "grad_norm": 11.223786207136849, + "learning_rate": 2.3420594990529456e-06, + "loss": 1.0796, + "step": 130452 + }, + { + "epoch": 1.57, + "grad_norm": 10.104015686041878, + "learning_rate": 2.3416838354615113e-06, + "loss": 0.7807, + "step": 130455 + }, + { + "epoch": 1.57, + "grad_norm": 23.708091292478258, + "learning_rate": 2.3413081980051477e-06, + "loss": 1.2385, + "step": 130458 + }, + { + "epoch": 1.57, + "grad_norm": 10.145840768805526, + "learning_rate": 2.340932586685142e-06, + "loss": 1.2586, + "step": 130461 + }, + { + "epoch": 1.57, + "grad_norm": 12.987883691276428, + "learning_rate": 2.3405570015027766e-06, + "loss": 1.0217, + "step": 130464 + }, + { + "epoch": 1.57, + "grad_norm": 5.5662669716540885, + "learning_rate": 2.3401814424593285e-06, + "loss": 0.9867, + "step": 130467 + }, + { + "epoch": 1.57, + "grad_norm": 4.890763914626895, + "learning_rate": 2.3398059095560845e-06, + "loss": 1.1651, + "step": 130470 + }, + { + "epoch": 1.57, + "grad_norm": 13.869244591304323, + "learning_rate": 2.3394304027943216e-06, + "loss": 1.4761, + "step": 130473 + }, + { + "epoch": 1.57, + "grad_norm": 13.874578902889338, + "learning_rate": 2.3390549221753235e-06, + "loss": 1.5531, + "step": 130476 + }, + { + "epoch": 1.57, + "grad_norm": 13.167027553220155, + "learning_rate": 2.338679467700371e-06, + "loss": 0.8036, + "step": 130479 + }, + { + "epoch": 1.57, + "grad_norm": 4.7401775058791245, + "learning_rate": 2.338304039370749e-06, + "loss": 1.3928, + "step": 130482 + }, + { + "epoch": 1.57, + "grad_norm": 78.47859645003619, + "learning_rate": 2.3379286371877352e-06, + "loss": 1.1563, + "step": 130485 + }, + { + "epoch": 1.57, + "grad_norm": 13.579052521698381, + "learning_rate": 2.3375532611526087e-06, + "loss": 1.1712, + "step": 130488 + }, + { + "epoch": 1.57, + "grad_norm": 21.636168417559826, + "learning_rate": 2.3371779112666526e-06, + "loss": 0.9596, + "step": 130491 + }, + { + "epoch": 1.57, + "grad_norm": 25.3418474115094, + "learning_rate": 2.3368025875311485e-06, + "loss": 1.0239, + "step": 130494 + }, + { + "epoch": 1.57, + "grad_norm": 6.735151546022636, + "learning_rate": 2.3364272899473796e-06, + "loss": 1.3049, + "step": 130497 + }, + { + "epoch": 1.57, + "grad_norm": 10.04222984189478, + "learning_rate": 2.3360520185166212e-06, + "loss": 0.9705, + "step": 130500 + }, + { + "epoch": 1.57, + "grad_norm": 9.11565652968066, + "learning_rate": 2.3356767732401566e-06, + "loss": 1.3236, + "step": 130503 + }, + { + "epoch": 1.57, + "grad_norm": 7.821416817364308, + "learning_rate": 2.3353015541192693e-06, + "loss": 1.2054, + "step": 130506 + }, + { + "epoch": 1.57, + "grad_norm": 4.49752306979466, + "learning_rate": 2.3349263611552375e-06, + "loss": 0.8158, + "step": 130509 + }, + { + "epoch": 1.57, + "grad_norm": 11.760270722930429, + "learning_rate": 2.334551194349337e-06, + "loss": 1.3483, + "step": 130512 + }, + { + "epoch": 1.57, + "grad_norm": 17.757922320412327, + "learning_rate": 2.3341760537028534e-06, + "loss": 1.2659, + "step": 130515 + }, + { + "epoch": 1.57, + "grad_norm": 3.9507612178317544, + "learning_rate": 2.3338009392170646e-06, + "loss": 1.2276, + "step": 130518 + }, + { + "epoch": 1.57, + "grad_norm": 20.380713120785014, + "learning_rate": 2.333425850893253e-06, + "loss": 1.1597, + "step": 130521 + }, + { + "epoch": 1.57, + "grad_norm": 9.853199962700257, + "learning_rate": 2.3330507887327004e-06, + "loss": 1.438, + "step": 130524 + }, + { + "epoch": 1.57, + "grad_norm": 12.559995495620845, + "learning_rate": 2.3326757527366826e-06, + "loss": 1.0697, + "step": 130527 + }, + { + "epoch": 1.57, + "grad_norm": 7.969620356659039, + "learning_rate": 2.332300742906478e-06, + "loss": 1.1926, + "step": 130530 + }, + { + "epoch": 1.57, + "grad_norm": 4.185923328490878, + "learning_rate": 2.3319257592433695e-06, + "loss": 1.26, + "step": 130533 + }, + { + "epoch": 1.57, + "grad_norm": 8.534838499534132, + "learning_rate": 2.33155080174864e-06, + "loss": 1.3855, + "step": 130536 + }, + { + "epoch": 1.57, + "grad_norm": 16.299878802667298, + "learning_rate": 2.331175870423562e-06, + "loss": 1.2576, + "step": 130539 + }, + { + "epoch": 1.57, + "grad_norm": 4.079083570366963, + "learning_rate": 2.3308009652694185e-06, + "loss": 1.147, + "step": 130542 + }, + { + "epoch": 1.57, + "grad_norm": 8.115362611383558, + "learning_rate": 2.330426086287488e-06, + "loss": 0.9643, + "step": 130545 + }, + { + "epoch": 1.57, + "grad_norm": 5.328600595276996, + "learning_rate": 2.3300512334790547e-06, + "loss": 1.1548, + "step": 130548 + }, + { + "epoch": 1.57, + "grad_norm": 4.10367968118941, + "learning_rate": 2.3296764068453936e-06, + "loss": 1.0394, + "step": 130551 + }, + { + "epoch": 1.57, + "grad_norm": 12.55566188911777, + "learning_rate": 2.3293016063877806e-06, + "loss": 1.2018, + "step": 130554 + }, + { + "epoch": 1.57, + "grad_norm": 4.825645393885823, + "learning_rate": 2.328926832107499e-06, + "loss": 0.9689, + "step": 130557 + }, + { + "epoch": 1.57, + "grad_norm": 13.141373491805988, + "learning_rate": 2.3285520840058274e-06, + "loss": 1.4098, + "step": 130560 + }, + { + "epoch": 1.57, + "grad_norm": 10.506678879512775, + "learning_rate": 2.3281773620840463e-06, + "loss": 1.2342, + "step": 130563 + }, + { + "epoch": 1.57, + "grad_norm": 28.414047620701382, + "learning_rate": 2.3278026663434296e-06, + "loss": 1.3225, + "step": 130566 + }, + { + "epoch": 1.57, + "grad_norm": 4.457596710358362, + "learning_rate": 2.327427996785262e-06, + "loss": 1.1367, + "step": 130569 + }, + { + "epoch": 1.57, + "grad_norm": 7.105724304244702, + "learning_rate": 2.3270533534108154e-06, + "loss": 1.2498, + "step": 130572 + }, + { + "epoch": 1.57, + "grad_norm": 26.105349295460922, + "learning_rate": 2.326678736221373e-06, + "loss": 1.1379, + "step": 130575 + }, + { + "epoch": 1.57, + "grad_norm": 3.661897302125466, + "learning_rate": 2.3263041452182145e-06, + "loss": 1.3927, + "step": 130578 + }, + { + "epoch": 1.57, + "grad_norm": 6.4572657405122715, + "learning_rate": 2.3259295804026127e-06, + "loss": 1.3008, + "step": 130581 + }, + { + "epoch": 1.57, + "grad_norm": 3.2710492780908758, + "learning_rate": 2.32555504177585e-06, + "loss": 1.2049, + "step": 130584 + }, + { + "epoch": 1.57, + "grad_norm": 7.5798553778387046, + "learning_rate": 2.3251805293392026e-06, + "loss": 0.8292, + "step": 130587 + }, + { + "epoch": 1.57, + "grad_norm": 6.491955478015876, + "learning_rate": 2.3248060430939535e-06, + "loss": 1.1162, + "step": 130590 + }, + { + "epoch": 1.57, + "grad_norm": 49.507046130824435, + "learning_rate": 2.3244315830413768e-06, + "loss": 1.398, + "step": 130593 + }, + { + "epoch": 1.57, + "grad_norm": 17.978367881869374, + "learning_rate": 2.3240571491827467e-06, + "loss": 0.9734, + "step": 130596 + }, + { + "epoch": 1.57, + "grad_norm": 8.611075942495992, + "learning_rate": 2.3236827415193454e-06, + "loss": 1.184, + "step": 130599 + }, + { + "epoch": 1.57, + "grad_norm": 3.2071819510229727, + "learning_rate": 2.323308360052449e-06, + "loss": 1.1839, + "step": 130602 + }, + { + "epoch": 1.57, + "grad_norm": 15.455988084663247, + "learning_rate": 2.3229340047833394e-06, + "loss": 1.1503, + "step": 130605 + }, + { + "epoch": 1.57, + "grad_norm": 34.76808107525184, + "learning_rate": 2.3225596757132872e-06, + "loss": 1.2577, + "step": 130608 + }, + { + "epoch": 1.57, + "grad_norm": 6.766958954477765, + "learning_rate": 2.3221853728435773e-06, + "loss": 1.382, + "step": 130611 + }, + { + "epoch": 1.57, + "grad_norm": 5.4625821892555235, + "learning_rate": 2.32181109617548e-06, + "loss": 1.0907, + "step": 130614 + }, + { + "epoch": 1.57, + "grad_norm": 3.6236002309205078, + "learning_rate": 2.3214368457102787e-06, + "loss": 1.0085, + "step": 130617 + }, + { + "epoch": 1.57, + "grad_norm": 9.463140993296319, + "learning_rate": 2.321062621449245e-06, + "loss": 1.3352, + "step": 130620 + }, + { + "epoch": 1.57, + "grad_norm": 11.25262281601724, + "learning_rate": 2.320688423393658e-06, + "loss": 1.0111, + "step": 130623 + }, + { + "epoch": 1.57, + "grad_norm": 10.969407546231409, + "learning_rate": 2.320314251544796e-06, + "loss": 1.053, + "step": 130626 + }, + { + "epoch": 1.57, + "grad_norm": 19.347299920635386, + "learning_rate": 2.319940105903935e-06, + "loss": 1.1033, + "step": 130629 + }, + { + "epoch": 1.57, + "grad_norm": 16.614648183263128, + "learning_rate": 2.319565986472354e-06, + "loss": 1.1159, + "step": 130632 + }, + { + "epoch": 1.57, + "grad_norm": 6.878263405113354, + "learning_rate": 2.319191893251329e-06, + "loss": 1.2089, + "step": 130635 + }, + { + "epoch": 1.57, + "grad_norm": 19.51409704330283, + "learning_rate": 2.3188178262421322e-06, + "loss": 1.3206, + "step": 130638 + }, + { + "epoch": 1.57, + "grad_norm": 3.3823676450368554, + "learning_rate": 2.318443785446044e-06, + "loss": 1.1334, + "step": 130641 + }, + { + "epoch": 1.57, + "grad_norm": 10.571483711662257, + "learning_rate": 2.318069770864343e-06, + "loss": 1.1834, + "step": 130644 + }, + { + "epoch": 1.57, + "grad_norm": 6.239023429289051, + "learning_rate": 2.3176957824982993e-06, + "loss": 0.8177, + "step": 130647 + }, + { + "epoch": 1.57, + "grad_norm": 24.187377343624313, + "learning_rate": 2.3173218203491933e-06, + "loss": 1.5318, + "step": 130650 + }, + { + "epoch": 1.57, + "grad_norm": 7.863068896422593, + "learning_rate": 2.3169478844183024e-06, + "loss": 0.9924, + "step": 130653 + }, + { + "epoch": 1.57, + "grad_norm": 7.3041943906790685, + "learning_rate": 2.316573974706899e-06, + "loss": 1.0362, + "step": 130656 + }, + { + "epoch": 1.57, + "grad_norm": 8.468070173783516, + "learning_rate": 2.316200091216264e-06, + "loss": 1.0504, + "step": 130659 + }, + { + "epoch": 1.57, + "grad_norm": 5.488709056454857, + "learning_rate": 2.3158262339476678e-06, + "loss": 1.5075, + "step": 130662 + }, + { + "epoch": 1.57, + "grad_norm": 3.753683342619331, + "learning_rate": 2.3154524029023883e-06, + "loss": 0.9739, + "step": 130665 + }, + { + "epoch": 1.57, + "grad_norm": 11.6256908476475, + "learning_rate": 2.3150785980817016e-06, + "loss": 1.018, + "step": 130668 + }, + { + "epoch": 1.57, + "grad_norm": 12.372288928610569, + "learning_rate": 2.314704819486887e-06, + "loss": 1.2473, + "step": 130671 + }, + { + "epoch": 1.57, + "grad_norm": 5.623795597171455, + "learning_rate": 2.3143310671192133e-06, + "loss": 1.1117, + "step": 130674 + }, + { + "epoch": 1.57, + "grad_norm": 4.1725568332949114, + "learning_rate": 2.313957340979962e-06, + "loss": 1.1503, + "step": 130677 + }, + { + "epoch": 1.57, + "grad_norm": 11.153662908364277, + "learning_rate": 2.3135836410704037e-06, + "loss": 1.4015, + "step": 130680 + }, + { + "epoch": 1.57, + "grad_norm": 2.4278039073383555, + "learning_rate": 2.313209967391815e-06, + "loss": 0.8464, + "step": 130683 + }, + { + "epoch": 1.57, + "grad_norm": 8.91827920615677, + "learning_rate": 2.312836319945475e-06, + "loss": 1.0763, + "step": 130686 + }, + { + "epoch": 1.57, + "grad_norm": 15.262926659033827, + "learning_rate": 2.3124626987326537e-06, + "loss": 1.2286, + "step": 130689 + }, + { + "epoch": 1.57, + "grad_norm": 11.28631411989731, + "learning_rate": 2.3120891037546266e-06, + "loss": 1.1157, + "step": 130692 + }, + { + "epoch": 1.57, + "grad_norm": 9.305254343335617, + "learning_rate": 2.311715535012674e-06, + "loss": 1.117, + "step": 130695 + }, + { + "epoch": 1.57, + "grad_norm": 16.264842494133784, + "learning_rate": 2.311341992508065e-06, + "loss": 1.0532, + "step": 130698 + }, + { + "epoch": 1.57, + "grad_norm": 7.896943172103524, + "learning_rate": 2.310968476242078e-06, + "loss": 1.1479, + "step": 130701 + }, + { + "epoch": 1.57, + "grad_norm": 9.88719827937877, + "learning_rate": 2.310594986215984e-06, + "loss": 1.252, + "step": 130704 + }, + { + "epoch": 1.57, + "grad_norm": 2.950131683231507, + "learning_rate": 2.3102215224310587e-06, + "loss": 0.9276, + "step": 130707 + }, + { + "epoch": 1.57, + "grad_norm": 6.376555088988525, + "learning_rate": 2.309848084888577e-06, + "loss": 0.865, + "step": 130710 + }, + { + "epoch": 1.57, + "grad_norm": 9.830733754663335, + "learning_rate": 2.309474673589818e-06, + "loss": 1.096, + "step": 130713 + }, + { + "epoch": 1.57, + "grad_norm": 5.818421846963293, + "learning_rate": 2.3091012885360487e-06, + "loss": 1.191, + "step": 130716 + }, + { + "epoch": 1.57, + "grad_norm": 11.380886730134165, + "learning_rate": 2.308727929728549e-06, + "loss": 1.1396, + "step": 130719 + }, + { + "epoch": 1.57, + "grad_norm": 5.483999696755417, + "learning_rate": 2.3083545971685872e-06, + "loss": 0.9273, + "step": 130722 + }, + { + "epoch": 1.57, + "grad_norm": 9.84968411745353, + "learning_rate": 2.307981290857444e-06, + "loss": 1.2722, + "step": 130725 + }, + { + "epoch": 1.57, + "grad_norm": 3.4786577343621157, + "learning_rate": 2.3076080107963873e-06, + "loss": 1.2287, + "step": 130728 + }, + { + "epoch": 1.57, + "grad_norm": 6.562026705856318, + "learning_rate": 2.307234756986694e-06, + "loss": 0.9518, + "step": 130731 + }, + { + "epoch": 1.57, + "grad_norm": 32.4039923914429, + "learning_rate": 2.306861529429637e-06, + "loss": 1.1337, + "step": 130734 + }, + { + "epoch": 1.57, + "grad_norm": 17.586284941691332, + "learning_rate": 2.3064883281264907e-06, + "loss": 1.1926, + "step": 130737 + }, + { + "epoch": 1.57, + "grad_norm": 9.958892096423561, + "learning_rate": 2.306115153078531e-06, + "loss": 1.0008, + "step": 130740 + }, + { + "epoch": 1.57, + "grad_norm": 10.66086515240797, + "learning_rate": 2.3057420042870295e-06, + "loss": 0.9703, + "step": 130743 + }, + { + "epoch": 1.57, + "grad_norm": 11.330114997288952, + "learning_rate": 2.305368881753256e-06, + "loss": 1.1899, + "step": 130746 + }, + { + "epoch": 1.57, + "grad_norm": 15.235651939434113, + "learning_rate": 2.3049957854784866e-06, + "loss": 1.1061, + "step": 130749 + }, + { + "epoch": 1.57, + "grad_norm": 3.327308667339515, + "learning_rate": 2.304622715463999e-06, + "loss": 1.0754, + "step": 130752 + }, + { + "epoch": 1.57, + "grad_norm": 2.1837815812297756, + "learning_rate": 2.3042496717110572e-06, + "loss": 1.1911, + "step": 130755 + }, + { + "epoch": 1.57, + "grad_norm": 5.24543154370592, + "learning_rate": 2.3038766542209412e-06, + "loss": 1.0241, + "step": 130758 + }, + { + "epoch": 1.57, + "grad_norm": 20.767423565343176, + "learning_rate": 2.3035036629949236e-06, + "loss": 1.1793, + "step": 130761 + }, + { + "epoch": 1.57, + "grad_norm": 7.189878004270787, + "learning_rate": 2.3031306980342737e-06, + "loss": 1.0148, + "step": 130764 + }, + { + "epoch": 1.57, + "grad_norm": 3.664742678382673, + "learning_rate": 2.3027577593402696e-06, + "loss": 1.1337, + "step": 130767 + }, + { + "epoch": 1.57, + "grad_norm": 8.683858077716016, + "learning_rate": 2.3023848469141765e-06, + "loss": 1.5459, + "step": 130770 + }, + { + "epoch": 1.57, + "grad_norm": 4.293510587832604, + "learning_rate": 2.3020119607572724e-06, + "loss": 1.1885, + "step": 130773 + }, + { + "epoch": 1.57, + "grad_norm": 6.689617971284073, + "learning_rate": 2.3016391008708283e-06, + "loss": 0.9718, + "step": 130776 + }, + { + "epoch": 1.57, + "grad_norm": 13.724767611211586, + "learning_rate": 2.301266267256119e-06, + "loss": 1.1378, + "step": 130779 + }, + { + "epoch": 1.57, + "grad_norm": 27.320181844309232, + "learning_rate": 2.3008934599144127e-06, + "loss": 1.4012, + "step": 130782 + }, + { + "epoch": 1.57, + "grad_norm": 11.33752503465414, + "learning_rate": 2.300520678846987e-06, + "loss": 1.0958, + "step": 130785 + }, + { + "epoch": 1.57, + "grad_norm": 24.292510368054337, + "learning_rate": 2.3001479240551084e-06, + "loss": 1.3602, + "step": 130788 + }, + { + "epoch": 1.57, + "grad_norm": 7.423100442489399, + "learning_rate": 2.2997751955400503e-06, + "loss": 1.2577, + "step": 130791 + }, + { + "epoch": 1.57, + "grad_norm": 11.956957605339008, + "learning_rate": 2.29940249330309e-06, + "loss": 1.0652, + "step": 130794 + }, + { + "epoch": 1.57, + "grad_norm": 9.644153376606729, + "learning_rate": 2.2990298173454916e-06, + "loss": 0.9582, + "step": 130797 + }, + { + "epoch": 1.57, + "grad_norm": 7.9267206782192625, + "learning_rate": 2.29865716766853e-06, + "loss": 1.3362, + "step": 130800 + }, + { + "epoch": 1.57, + "grad_norm": 7.723894945490687, + "learning_rate": 2.2982845442734815e-06, + "loss": 1.2163, + "step": 130803 + }, + { + "epoch": 1.57, + "grad_norm": 8.85305009795569, + "learning_rate": 2.297911947161613e-06, + "loss": 0.9483, + "step": 130806 + }, + { + "epoch": 1.57, + "grad_norm": 5.633364347221268, + "learning_rate": 2.2975393763341936e-06, + "loss": 0.8331, + "step": 130809 + }, + { + "epoch": 1.57, + "grad_norm": 8.943548615164067, + "learning_rate": 2.2971668317924987e-06, + "loss": 1.0023, + "step": 130812 + }, + { + "epoch": 1.57, + "grad_norm": 20.73963712766816, + "learning_rate": 2.296794313537798e-06, + "loss": 1.373, + "step": 130815 + }, + { + "epoch": 1.57, + "grad_norm": 16.747690508236644, + "learning_rate": 2.296421821571364e-06, + "loss": 1.3155, + "step": 130818 + }, + { + "epoch": 1.57, + "grad_norm": 8.334581380864014, + "learning_rate": 2.2960493558944707e-06, + "loss": 0.9333, + "step": 130821 + }, + { + "epoch": 1.57, + "grad_norm": 5.4482553034473735, + "learning_rate": 2.2956769165083837e-06, + "loss": 1.3739, + "step": 130824 + }, + { + "epoch": 1.57, + "grad_norm": 5.0484704135563145, + "learning_rate": 2.2953045034143785e-06, + "loss": 1.227, + "step": 130827 + }, + { + "epoch": 1.57, + "grad_norm": 8.407783878630124, + "learning_rate": 2.2949321166137206e-06, + "loss": 1.3501, + "step": 130830 + }, + { + "epoch": 1.57, + "grad_norm": 6.9334784856665985, + "learning_rate": 2.2945597561076873e-06, + "loss": 1.0602, + "step": 130833 + }, + { + "epoch": 1.57, + "grad_norm": 6.049617024542823, + "learning_rate": 2.2941874218975424e-06, + "loss": 0.8822, + "step": 130836 + }, + { + "epoch": 1.57, + "grad_norm": 6.1013653165880894, + "learning_rate": 2.293815113984561e-06, + "loss": 0.9166, + "step": 130839 + }, + { + "epoch": 1.57, + "grad_norm": 14.068128185284131, + "learning_rate": 2.293442832370013e-06, + "loss": 1.3342, + "step": 130842 + }, + { + "epoch": 1.57, + "grad_norm": 5.517173887569022, + "learning_rate": 2.293070577055171e-06, + "loss": 1.0302, + "step": 130845 + }, + { + "epoch": 1.57, + "grad_norm": 12.557229795152299, + "learning_rate": 2.2926983480413035e-06, + "loss": 1.1872, + "step": 130848 + }, + { + "epoch": 1.57, + "grad_norm": 5.914979202818906, + "learning_rate": 2.2923261453296774e-06, + "loss": 1.6056, + "step": 130851 + }, + { + "epoch": 1.57, + "grad_norm": 25.513153923632196, + "learning_rate": 2.291953968921565e-06, + "loss": 1.0304, + "step": 130854 + }, + { + "epoch": 1.57, + "grad_norm": 12.311381383151117, + "learning_rate": 2.2915818188182395e-06, + "loss": 1.1759, + "step": 130857 + }, + { + "epoch": 1.57, + "grad_norm": 12.199181085229702, + "learning_rate": 2.2912096950209695e-06, + "loss": 1.2969, + "step": 130860 + }, + { + "epoch": 1.57, + "grad_norm": 9.00634039507679, + "learning_rate": 2.2908375975310215e-06, + "loss": 1.205, + "step": 130863 + }, + { + "epoch": 1.57, + "grad_norm": 8.188552031364452, + "learning_rate": 2.2904655263496688e-06, + "loss": 0.977, + "step": 130866 + }, + { + "epoch": 1.57, + "grad_norm": 4.4464999636863025, + "learning_rate": 2.290093481478183e-06, + "loss": 0.8306, + "step": 130869 + }, + { + "epoch": 1.57, + "grad_norm": 9.007488017992356, + "learning_rate": 2.2897214629178275e-06, + "loss": 1.5108, + "step": 130872 + }, + { + "epoch": 1.57, + "grad_norm": 6.844039216829814, + "learning_rate": 2.2893494706698793e-06, + "loss": 0.9438, + "step": 130875 + }, + { + "epoch": 1.57, + "grad_norm": 8.257643988869376, + "learning_rate": 2.2889775047356e-06, + "loss": 1.2968, + "step": 130878 + }, + { + "epoch": 1.57, + "grad_norm": 34.94556251868524, + "learning_rate": 2.2886055651162643e-06, + "loss": 1.2609, + "step": 130881 + }, + { + "epoch": 1.57, + "grad_norm": 10.138082023292075, + "learning_rate": 2.2882336518131387e-06, + "loss": 1.1638, + "step": 130884 + }, + { + "epoch": 1.57, + "grad_norm": 5.312853316118756, + "learning_rate": 2.287861764827498e-06, + "loss": 1.4538, + "step": 130887 + }, + { + "epoch": 1.57, + "grad_norm": 4.994884659105225, + "learning_rate": 2.287489904160607e-06, + "loss": 1.2357, + "step": 130890 + }, + { + "epoch": 1.57, + "grad_norm": 8.741731113091854, + "learning_rate": 2.287118069813731e-06, + "loss": 1.1354, + "step": 130893 + }, + { + "epoch": 1.57, + "grad_norm": 8.075565033914307, + "learning_rate": 2.2867462617881443e-06, + "loss": 1.0495, + "step": 130896 + }, + { + "epoch": 1.57, + "grad_norm": 22.77433897010923, + "learning_rate": 2.286374480085113e-06, + "loss": 1.2407, + "step": 130899 + }, + { + "epoch": 1.57, + "grad_norm": 10.38149343611917, + "learning_rate": 2.2860027247059113e-06, + "loss": 1.0582, + "step": 130902 + }, + { + "epoch": 1.57, + "grad_norm": 21.252599295582304, + "learning_rate": 2.2856309956517996e-06, + "loss": 1.202, + "step": 130905 + }, + { + "epoch": 1.57, + "grad_norm": 6.498902612886196, + "learning_rate": 2.285259292924051e-06, + "loss": 1.029, + "step": 130908 + }, + { + "epoch": 1.57, + "grad_norm": 6.519455466916996, + "learning_rate": 2.2848876165239376e-06, + "loss": 1.1343, + "step": 130911 + }, + { + "epoch": 1.57, + "grad_norm": 5.439240891381784, + "learning_rate": 2.2845159664527226e-06, + "loss": 1.0257, + "step": 130914 + }, + { + "epoch": 1.57, + "grad_norm": 7.2663347344821005, + "learning_rate": 2.284144342711673e-06, + "loss": 1.3673, + "step": 130917 + }, + { + "epoch": 1.57, + "grad_norm": 3.0959914988922157, + "learning_rate": 2.2837727453020585e-06, + "loss": 1.1018, + "step": 130920 + }, + { + "epoch": 1.57, + "grad_norm": 8.941997772985786, + "learning_rate": 2.2834011742251492e-06, + "loss": 1.1657, + "step": 130923 + }, + { + "epoch": 1.57, + "grad_norm": 4.909587886789971, + "learning_rate": 2.283029629482213e-06, + "loss": 1.1581, + "step": 130926 + }, + { + "epoch": 1.57, + "grad_norm": 8.54717261515588, + "learning_rate": 2.282658111074518e-06, + "loss": 1.5346, + "step": 130929 + }, + { + "epoch": 1.57, + "grad_norm": 11.543160008432482, + "learning_rate": 2.282286619003333e-06, + "loss": 1.1688, + "step": 130932 + }, + { + "epoch": 1.57, + "grad_norm": 7.670796613325228, + "learning_rate": 2.28191515326992e-06, + "loss": 1.2922, + "step": 130935 + }, + { + "epoch": 1.57, + "grad_norm": 8.433855936324225, + "learning_rate": 2.28154371387555e-06, + "loss": 1.178, + "step": 130938 + }, + { + "epoch": 1.57, + "grad_norm": 11.082704901576424, + "learning_rate": 2.2811723008214947e-06, + "loss": 1.0804, + "step": 130941 + }, + { + "epoch": 1.57, + "grad_norm": 7.59824156117811, + "learning_rate": 2.280800914109016e-06, + "loss": 0.968, + "step": 130944 + }, + { + "epoch": 1.57, + "grad_norm": 7.012163464744219, + "learning_rate": 2.2804295537393816e-06, + "loss": 1.5422, + "step": 130947 + }, + { + "epoch": 1.57, + "grad_norm": 3.36064809152693, + "learning_rate": 2.280058219713862e-06, + "loss": 1.0753, + "step": 130950 + }, + { + "epoch": 1.57, + "grad_norm": 3.4037312596698457, + "learning_rate": 2.279686912033725e-06, + "loss": 1.1007, + "step": 130953 + }, + { + "epoch": 1.57, + "grad_norm": 9.005792020439586, + "learning_rate": 2.2793156307002363e-06, + "loss": 1.1465, + "step": 130956 + }, + { + "epoch": 1.57, + "grad_norm": 8.301140112533998, + "learning_rate": 2.278944375714659e-06, + "loss": 1.3486, + "step": 130959 + }, + { + "epoch": 1.57, + "grad_norm": 3.6697860828312856, + "learning_rate": 2.2785731470782635e-06, + "loss": 0.9821, + "step": 130962 + }, + { + "epoch": 1.57, + "grad_norm": 6.901757478928832, + "learning_rate": 2.2782019447923175e-06, + "loss": 1.2419, + "step": 130965 + }, + { + "epoch": 1.57, + "grad_norm": 8.910898950669688, + "learning_rate": 2.277830768858089e-06, + "loss": 1.2233, + "step": 130968 + }, + { + "epoch": 1.57, + "grad_norm": 3.325516699099691, + "learning_rate": 2.2774596192768395e-06, + "loss": 1.3126, + "step": 130971 + }, + { + "epoch": 1.57, + "grad_norm": 24.089518414710273, + "learning_rate": 2.2770884960498385e-06, + "loss": 0.9972, + "step": 130974 + }, + { + "epoch": 1.57, + "grad_norm": 21.647798918923883, + "learning_rate": 2.2767173991783566e-06, + "loss": 1.1804, + "step": 130977 + }, + { + "epoch": 1.57, + "grad_norm": 6.746295881100823, + "learning_rate": 2.276346328663652e-06, + "loss": 0.8718, + "step": 130980 + }, + { + "epoch": 1.58, + "grad_norm": 7.022371843544351, + "learning_rate": 2.2759752845069992e-06, + "loss": 1.1742, + "step": 130983 + }, + { + "epoch": 1.58, + "grad_norm": 4.791384345822645, + "learning_rate": 2.2756042667096576e-06, + "loss": 1.042, + "step": 130986 + }, + { + "epoch": 1.58, + "grad_norm": 12.792071124457074, + "learning_rate": 2.2752332752728966e-06, + "loss": 1.1468, + "step": 130989 + }, + { + "epoch": 1.58, + "grad_norm": 4.565627889611562, + "learning_rate": 2.2748623101979816e-06, + "loss": 1.0998, + "step": 130992 + }, + { + "epoch": 1.58, + "grad_norm": 8.364909417018707, + "learning_rate": 2.274491371486182e-06, + "loss": 1.2129, + "step": 130995 + }, + { + "epoch": 1.58, + "grad_norm": 21.666432354349023, + "learning_rate": 2.27412045913876e-06, + "loss": 1.0619, + "step": 130998 + }, + { + "epoch": 1.58, + "grad_norm": 11.520791648412144, + "learning_rate": 2.273749573156979e-06, + "loss": 1.3112, + "step": 131001 + }, + { + "epoch": 1.58, + "grad_norm": 5.892132114190552, + "learning_rate": 2.2733787135421094e-06, + "loss": 1.1731, + "step": 131004 + }, + { + "epoch": 1.58, + "grad_norm": 7.330544352444618, + "learning_rate": 2.2730078802954136e-06, + "loss": 1.1438, + "step": 131007 + }, + { + "epoch": 1.58, + "grad_norm": 22.26704442198377, + "learning_rate": 2.2726370734181613e-06, + "loss": 1.3527, + "step": 131010 + }, + { + "epoch": 1.58, + "grad_norm": 6.702999158594045, + "learning_rate": 2.2722662929116137e-06, + "loss": 0.9933, + "step": 131013 + }, + { + "epoch": 1.58, + "grad_norm": 4.929543279505278, + "learning_rate": 2.271895538777037e-06, + "loss": 1.3453, + "step": 131016 + }, + { + "epoch": 1.58, + "grad_norm": 21.98279436705405, + "learning_rate": 2.2715248110157007e-06, + "loss": 1.0583, + "step": 131019 + }, + { + "epoch": 1.58, + "grad_norm": 12.795524649749026, + "learning_rate": 2.271154109628866e-06, + "loss": 1.0967, + "step": 131022 + }, + { + "epoch": 1.58, + "grad_norm": 4.350669731673895, + "learning_rate": 2.2707834346177947e-06, + "loss": 1.0287, + "step": 131025 + }, + { + "epoch": 1.58, + "grad_norm": 3.2783497792150555, + "learning_rate": 2.2704127859837568e-06, + "loss": 1.0717, + "step": 131028 + }, + { + "epoch": 1.58, + "grad_norm": 5.011414321214715, + "learning_rate": 2.2700421637280147e-06, + "loss": 1.2345, + "step": 131031 + }, + { + "epoch": 1.58, + "grad_norm": 13.592394204088121, + "learning_rate": 2.2696715678518356e-06, + "loss": 1.4562, + "step": 131034 + }, + { + "epoch": 1.58, + "grad_norm": 7.970070040652197, + "learning_rate": 2.2693009983564853e-06, + "loss": 1.5132, + "step": 131037 + }, + { + "epoch": 1.58, + "grad_norm": 4.162177778024983, + "learning_rate": 2.2689304552432255e-06, + "loss": 1.1258, + "step": 131040 + }, + { + "epoch": 1.58, + "grad_norm": 5.824124384164806, + "learning_rate": 2.2685599385133197e-06, + "loss": 1.002, + "step": 131043 + }, + { + "epoch": 1.58, + "grad_norm": 8.647416505010332, + "learning_rate": 2.2681894481680323e-06, + "loss": 1.0496, + "step": 131046 + }, + { + "epoch": 1.58, + "grad_norm": 10.975456532186833, + "learning_rate": 2.267818984208634e-06, + "loss": 1.3024, + "step": 131049 + }, + { + "epoch": 1.58, + "grad_norm": 8.490100296931194, + "learning_rate": 2.2674485466363803e-06, + "loss": 1.035, + "step": 131052 + }, + { + "epoch": 1.58, + "grad_norm": 4.6073250137970305, + "learning_rate": 2.2670781354525395e-06, + "loss": 1.2378, + "step": 131055 + }, + { + "epoch": 1.58, + "grad_norm": 6.676847955570782, + "learning_rate": 2.266707750658377e-06, + "loss": 1.5769, + "step": 131058 + }, + { + "epoch": 1.58, + "grad_norm": 12.214758766986044, + "learning_rate": 2.2663373922551567e-06, + "loss": 1.304, + "step": 131061 + }, + { + "epoch": 1.58, + "grad_norm": 6.413534882342049, + "learning_rate": 2.265967060244142e-06, + "loss": 1.0722, + "step": 131064 + }, + { + "epoch": 1.58, + "grad_norm": 13.6948578694802, + "learning_rate": 2.265596754626592e-06, + "loss": 1.4629, + "step": 131067 + }, + { + "epoch": 1.58, + "grad_norm": 3.2144604564240584, + "learning_rate": 2.2652264754037755e-06, + "loss": 0.8207, + "step": 131070 + }, + { + "epoch": 1.58, + "grad_norm": 5.5745272931923076, + "learning_rate": 2.2648562225769556e-06, + "loss": 1.406, + "step": 131073 + }, + { + "epoch": 1.58, + "grad_norm": 2.9831094689964814, + "learning_rate": 2.2644859961473965e-06, + "loss": 1.1122, + "step": 131076 + }, + { + "epoch": 1.58, + "grad_norm": 5.367268663899614, + "learning_rate": 2.2641157961163584e-06, + "loss": 1.253, + "step": 131079 + }, + { + "epoch": 1.58, + "grad_norm": 13.185625572040701, + "learning_rate": 2.2637456224851095e-06, + "loss": 1.1633, + "step": 131082 + }, + { + "epoch": 1.58, + "grad_norm": 6.140365265314672, + "learning_rate": 2.2633754752549077e-06, + "loss": 1.1288, + "step": 131085 + }, + { + "epoch": 1.58, + "grad_norm": 69.81584127910138, + "learning_rate": 2.263005354427018e-06, + "loss": 1.3071, + "step": 131088 + }, + { + "epoch": 1.58, + "grad_norm": 5.757664553789093, + "learning_rate": 2.262635260002707e-06, + "loss": 0.829, + "step": 131091 + }, + { + "epoch": 1.58, + "grad_norm": 9.297616532049172, + "learning_rate": 2.2622651919832316e-06, + "loss": 1.0437, + "step": 131094 + }, + { + "epoch": 1.58, + "grad_norm": 9.912379923592068, + "learning_rate": 2.2618951503698594e-06, + "loss": 1.2767, + "step": 131097 + }, + { + "epoch": 1.58, + "grad_norm": 5.365339841971999, + "learning_rate": 2.26152513516385e-06, + "loss": 1.1715, + "step": 131100 + }, + { + "epoch": 1.58, + "grad_norm": 8.408631671794415, + "learning_rate": 2.261155146366473e-06, + "loss": 1.1029, + "step": 131103 + }, + { + "epoch": 1.58, + "grad_norm": 8.017204497695847, + "learning_rate": 2.260785183978984e-06, + "loss": 1.1582, + "step": 131106 + }, + { + "epoch": 1.58, + "grad_norm": 8.226954312042517, + "learning_rate": 2.2604152480026455e-06, + "loss": 1.4881, + "step": 131109 + }, + { + "epoch": 1.58, + "grad_norm": 10.451813343649436, + "learning_rate": 2.2600453384387223e-06, + "loss": 0.9759, + "step": 131112 + }, + { + "epoch": 1.58, + "grad_norm": 29.945466497845626, + "learning_rate": 2.259675455288476e-06, + "loss": 1.0136, + "step": 131115 + }, + { + "epoch": 1.58, + "grad_norm": 4.020942431739841, + "learning_rate": 2.259305598553172e-06, + "loss": 1.0324, + "step": 131118 + }, + { + "epoch": 1.58, + "grad_norm": 6.2381619713186165, + "learning_rate": 2.258935768234067e-06, + "loss": 1.3293, + "step": 131121 + }, + { + "epoch": 1.58, + "grad_norm": 11.436990759462159, + "learning_rate": 2.2585659643324287e-06, + "loss": 1.2152, + "step": 131124 + }, + { + "epoch": 1.58, + "grad_norm": 4.761133648962387, + "learning_rate": 2.258196186849514e-06, + "loss": 1.1965, + "step": 131127 + }, + { + "epoch": 1.58, + "grad_norm": 4.966287060624918, + "learning_rate": 2.2578264357865896e-06, + "loss": 1.3149, + "step": 131130 + }, + { + "epoch": 1.58, + "grad_norm": 7.78976612244119, + "learning_rate": 2.257456711144912e-06, + "loss": 1.0515, + "step": 131133 + }, + { + "epoch": 1.58, + "grad_norm": 11.493547647527853, + "learning_rate": 2.257087012925746e-06, + "loss": 1.3713, + "step": 131136 + }, + { + "epoch": 1.58, + "grad_norm": 11.89483497682273, + "learning_rate": 2.256717341130352e-06, + "loss": 1.156, + "step": 131139 + }, + { + "epoch": 1.58, + "grad_norm": 4.206041874032367, + "learning_rate": 2.2563476957599938e-06, + "loss": 1.0851, + "step": 131142 + }, + { + "epoch": 1.58, + "grad_norm": 5.631781473253101, + "learning_rate": 2.2559780768159343e-06, + "loss": 0.9595, + "step": 131145 + }, + { + "epoch": 1.58, + "grad_norm": 4.549671134911657, + "learning_rate": 2.255608484299432e-06, + "loss": 1.4949, + "step": 131148 + }, + { + "epoch": 1.58, + "grad_norm": 5.480316038396399, + "learning_rate": 2.2552389182117463e-06, + "loss": 1.4334, + "step": 131151 + }, + { + "epoch": 1.58, + "grad_norm": 4.28795707890127, + "learning_rate": 2.25486937855414e-06, + "loss": 1.2758, + "step": 131154 + }, + { + "epoch": 1.58, + "grad_norm": 4.1129795888063185, + "learning_rate": 2.254499865327877e-06, + "loss": 0.7527, + "step": 131157 + }, + { + "epoch": 1.58, + "grad_norm": 3.097264369110812, + "learning_rate": 2.254130378534214e-06, + "loss": 1.1896, + "step": 131160 + }, + { + "epoch": 1.58, + "grad_norm": 8.156413840698827, + "learning_rate": 2.2537609181744136e-06, + "loss": 1.4202, + "step": 131163 + }, + { + "epoch": 1.58, + "grad_norm": 5.736449966201774, + "learning_rate": 2.2533914842497395e-06, + "loss": 1.3568, + "step": 131166 + }, + { + "epoch": 1.58, + "grad_norm": 9.528446994905444, + "learning_rate": 2.2530220767614474e-06, + "loss": 1.2424, + "step": 131169 + }, + { + "epoch": 1.58, + "grad_norm": 5.031610466643817, + "learning_rate": 2.2526526957108043e-06, + "loss": 1.4075, + "step": 131172 + }, + { + "epoch": 1.58, + "grad_norm": 16.466038291690776, + "learning_rate": 2.252283341099063e-06, + "loss": 1.3586, + "step": 131175 + }, + { + "epoch": 1.58, + "grad_norm": 12.460898976007606, + "learning_rate": 2.2519140129274884e-06, + "loss": 1.2068, + "step": 131178 + }, + { + "epoch": 1.58, + "grad_norm": 16.943900064190817, + "learning_rate": 2.2515447111973397e-06, + "loss": 1.1951, + "step": 131181 + }, + { + "epoch": 1.58, + "grad_norm": 10.498149657450071, + "learning_rate": 2.2511754359098816e-06, + "loss": 1.1743, + "step": 131184 + }, + { + "epoch": 1.58, + "grad_norm": 17.66551398858783, + "learning_rate": 2.250806187066368e-06, + "loss": 1.0677, + "step": 131187 + }, + { + "epoch": 1.58, + "grad_norm": 3.367517444813139, + "learning_rate": 2.2504369646680637e-06, + "loss": 1.1153, + "step": 131190 + }, + { + "epoch": 1.58, + "grad_norm": 5.84524078162976, + "learning_rate": 2.250067768716224e-06, + "loss": 1.0546, + "step": 131193 + }, + { + "epoch": 1.58, + "grad_norm": 5.907997985596729, + "learning_rate": 2.2496985992121114e-06, + "loss": 1.1003, + "step": 131196 + }, + { + "epoch": 1.58, + "grad_norm": 7.611800311470944, + "learning_rate": 2.249329456156989e-06, + "loss": 1.3339, + "step": 131199 + }, + { + "epoch": 1.58, + "grad_norm": 16.79836155518018, + "learning_rate": 2.248960339552111e-06, + "loss": 1.151, + "step": 131202 + }, + { + "epoch": 1.58, + "grad_norm": 28.740918891990432, + "learning_rate": 2.2485912493987385e-06, + "loss": 1.1576, + "step": 131205 + }, + { + "epoch": 1.58, + "grad_norm": 11.063197719356765, + "learning_rate": 2.2482221856981345e-06, + "loss": 1.124, + "step": 131208 + }, + { + "epoch": 1.58, + "grad_norm": 13.824154231000993, + "learning_rate": 2.247853148451554e-06, + "loss": 1.1233, + "step": 131211 + }, + { + "epoch": 1.58, + "grad_norm": 6.045242649796878, + "learning_rate": 2.247484137660262e-06, + "loss": 1.2456, + "step": 131214 + }, + { + "epoch": 1.58, + "grad_norm": 8.023172242461586, + "learning_rate": 2.24711515332551e-06, + "loss": 1.4719, + "step": 131217 + }, + { + "epoch": 1.58, + "grad_norm": 30.49749462236228, + "learning_rate": 2.2467461954485615e-06, + "loss": 1.3184, + "step": 131220 + }, + { + "epoch": 1.58, + "grad_norm": 4.736567960747404, + "learning_rate": 2.2463772640306756e-06, + "loss": 1.2499, + "step": 131223 + }, + { + "epoch": 1.58, + "grad_norm": 7.857236478976035, + "learning_rate": 2.246008359073115e-06, + "loss": 1.0793, + "step": 131226 + }, + { + "epoch": 1.58, + "grad_norm": 13.158767447530655, + "learning_rate": 2.245639480577132e-06, + "loss": 1.3264, + "step": 131229 + }, + { + "epoch": 1.58, + "grad_norm": 5.009361131762346, + "learning_rate": 2.2452706285439905e-06, + "loss": 1.1355, + "step": 131232 + }, + { + "epoch": 1.58, + "grad_norm": 11.123817603488423, + "learning_rate": 2.2449018029749437e-06, + "loss": 0.9127, + "step": 131235 + }, + { + "epoch": 1.58, + "grad_norm": 8.625747359889568, + "learning_rate": 2.2445330038712575e-06, + "loss": 1.165, + "step": 131238 + }, + { + "epoch": 1.58, + "grad_norm": 7.286908448927595, + "learning_rate": 2.244164231234184e-06, + "loss": 0.929, + "step": 131241 + }, + { + "epoch": 1.58, + "grad_norm": 8.105991497987944, + "learning_rate": 2.2437954850649836e-06, + "loss": 1.0882, + "step": 131244 + }, + { + "epoch": 1.58, + "grad_norm": 6.5085279288412705, + "learning_rate": 2.2434267653649158e-06, + "loss": 1.1591, + "step": 131247 + }, + { + "epoch": 1.58, + "grad_norm": 4.969985531272507, + "learning_rate": 2.2430580721352412e-06, + "loss": 1.2054, + "step": 131250 + }, + { + "epoch": 1.58, + "grad_norm": 3.8000970012120234, + "learning_rate": 2.242689405377213e-06, + "loss": 0.907, + "step": 131253 + }, + { + "epoch": 1.58, + "grad_norm": 3.0836800753207045, + "learning_rate": 2.242320765092094e-06, + "loss": 1.3866, + "step": 131256 + }, + { + "epoch": 1.58, + "grad_norm": 2.647543670881349, + "learning_rate": 2.241952151281137e-06, + "loss": 1.1344, + "step": 131259 + }, + { + "epoch": 1.58, + "grad_norm": 10.822705051865892, + "learning_rate": 2.241583563945603e-06, + "loss": 1.3406, + "step": 131262 + }, + { + "epoch": 1.58, + "grad_norm": 22.916401219950952, + "learning_rate": 2.241215003086753e-06, + "loss": 1.2621, + "step": 131265 + }, + { + "epoch": 1.58, + "grad_norm": 18.209421255573325, + "learning_rate": 2.2408464687058383e-06, + "loss": 1.2706, + "step": 131268 + }, + { + "epoch": 1.58, + "grad_norm": 3.7252862825287782, + "learning_rate": 2.2404779608041194e-06, + "loss": 0.8437, + "step": 131271 + }, + { + "epoch": 1.58, + "grad_norm": 9.898255587995779, + "learning_rate": 2.240109479382858e-06, + "loss": 1.3191, + "step": 131274 + }, + { + "epoch": 1.58, + "grad_norm": 3.8952653050325883, + "learning_rate": 2.2397410244433037e-06, + "loss": 1.3323, + "step": 131277 + }, + { + "epoch": 1.58, + "grad_norm": 5.035029344243368, + "learning_rate": 2.2393725959867217e-06, + "loss": 1.2139, + "step": 131280 + }, + { + "epoch": 1.58, + "grad_norm": 8.50156298369336, + "learning_rate": 2.239004194014364e-06, + "loss": 1.1076, + "step": 131283 + }, + { + "epoch": 1.58, + "grad_norm": 5.44376792088971, + "learning_rate": 2.2386358185274882e-06, + "loss": 1.2426, + "step": 131286 + }, + { + "epoch": 1.58, + "grad_norm": 8.13305559932983, + "learning_rate": 2.238267469527352e-06, + "loss": 1.2659, + "step": 131289 + }, + { + "epoch": 1.58, + "grad_norm": 7.620707872159677, + "learning_rate": 2.2378991470152167e-06, + "loss": 1.0615, + "step": 131292 + }, + { + "epoch": 1.58, + "grad_norm": 6.203430012747048, + "learning_rate": 2.237530850992333e-06, + "loss": 1.2712, + "step": 131295 + }, + { + "epoch": 1.58, + "grad_norm": 7.453006384439681, + "learning_rate": 2.237162581459964e-06, + "loss": 1.1048, + "step": 131298 + }, + { + "epoch": 1.58, + "grad_norm": 17.43339641140068, + "learning_rate": 2.236794338419359e-06, + "loss": 0.9761, + "step": 131301 + }, + { + "epoch": 1.58, + "grad_norm": 3.624620900661523, + "learning_rate": 2.2364261218717787e-06, + "loss": 1.2579, + "step": 131304 + }, + { + "epoch": 1.58, + "grad_norm": 20.061515528731647, + "learning_rate": 2.236057931818483e-06, + "loss": 1.2388, + "step": 131307 + }, + { + "epoch": 1.58, + "grad_norm": 8.614051197981885, + "learning_rate": 2.2356897682607224e-06, + "loss": 1.7262, + "step": 131310 + }, + { + "epoch": 1.58, + "grad_norm": 14.830711417424666, + "learning_rate": 2.235321631199755e-06, + "loss": 1.1597, + "step": 131313 + }, + { + "epoch": 1.58, + "grad_norm": 8.668356529613586, + "learning_rate": 2.2349535206368423e-06, + "loss": 1.1026, + "step": 131316 + }, + { + "epoch": 1.58, + "grad_norm": 7.402751916442897, + "learning_rate": 2.2345854365732345e-06, + "loss": 1.3826, + "step": 131319 + }, + { + "epoch": 1.58, + "grad_norm": 6.662459277554052, + "learning_rate": 2.2342173790101883e-06, + "loss": 1.2128, + "step": 131322 + }, + { + "epoch": 1.58, + "grad_norm": 3.639719418865041, + "learning_rate": 2.23384934794896e-06, + "loss": 1.4106, + "step": 131325 + }, + { + "epoch": 1.58, + "grad_norm": 10.952672402874274, + "learning_rate": 2.2334813433908067e-06, + "loss": 1.1537, + "step": 131328 + }, + { + "epoch": 1.58, + "grad_norm": 12.891963406261958, + "learning_rate": 2.233113365336984e-06, + "loss": 1.4094, + "step": 131331 + }, + { + "epoch": 1.58, + "grad_norm": 7.305424846461685, + "learning_rate": 2.2327454137887504e-06, + "loss": 1.3625, + "step": 131334 + }, + { + "epoch": 1.58, + "grad_norm": 10.98327543413559, + "learning_rate": 2.232377488747357e-06, + "loss": 1.5556, + "step": 131337 + }, + { + "epoch": 1.58, + "grad_norm": 8.425513471431664, + "learning_rate": 2.2320095902140637e-06, + "loss": 1.1021, + "step": 131340 + }, + { + "epoch": 1.58, + "grad_norm": 7.395637772623722, + "learning_rate": 2.2316417181901206e-06, + "loss": 1.3253, + "step": 131343 + }, + { + "epoch": 1.58, + "grad_norm": 9.112330351676935, + "learning_rate": 2.231273872676789e-06, + "loss": 1.3115, + "step": 131346 + }, + { + "epoch": 1.58, + "grad_norm": 9.860587745011523, + "learning_rate": 2.230906053675319e-06, + "loss": 1.3127, + "step": 131349 + }, + { + "epoch": 1.58, + "grad_norm": 8.07744644493926, + "learning_rate": 2.2305382611869675e-06, + "loss": 1.1318, + "step": 131352 + }, + { + "epoch": 1.58, + "grad_norm": 9.16313083965081, + "learning_rate": 2.230170495212991e-06, + "loss": 1.4094, + "step": 131355 + }, + { + "epoch": 1.58, + "grad_norm": 6.957869227189294, + "learning_rate": 2.2298027557546464e-06, + "loss": 1.2219, + "step": 131358 + }, + { + "epoch": 1.58, + "grad_norm": 22.63214245793912, + "learning_rate": 2.229435042813186e-06, + "loss": 1.1077, + "step": 131361 + }, + { + "epoch": 1.58, + "grad_norm": 6.435163335754694, + "learning_rate": 2.2290673563898623e-06, + "loss": 1.2701, + "step": 131364 + }, + { + "epoch": 1.58, + "grad_norm": 13.33046742615121, + "learning_rate": 2.228699696485933e-06, + "loss": 1.3359, + "step": 131367 + }, + { + "epoch": 1.58, + "grad_norm": 17.95512511986947, + "learning_rate": 2.2283320631026526e-06, + "loss": 1.0759, + "step": 131370 + }, + { + "epoch": 1.58, + "grad_norm": 7.107876831810766, + "learning_rate": 2.2279644562412772e-06, + "loss": 1.2985, + "step": 131373 + }, + { + "epoch": 1.58, + "grad_norm": 4.64549100601276, + "learning_rate": 2.2275968759030577e-06, + "loss": 1.2531, + "step": 131376 + }, + { + "epoch": 1.58, + "grad_norm": 8.158627325992674, + "learning_rate": 2.2272293220892505e-06, + "loss": 0.8805, + "step": 131379 + }, + { + "epoch": 1.58, + "grad_norm": 8.623409638712916, + "learning_rate": 2.2268617948011127e-06, + "loss": 1.0975, + "step": 131382 + }, + { + "epoch": 1.58, + "grad_norm": 9.086621672909505, + "learning_rate": 2.2264942940398928e-06, + "loss": 1.0236, + "step": 131385 + }, + { + "epoch": 1.58, + "grad_norm": 7.169036958076254, + "learning_rate": 2.22612681980685e-06, + "loss": 0.9689, + "step": 131388 + }, + { + "epoch": 1.58, + "grad_norm": 12.04617627612488, + "learning_rate": 2.2257593721032346e-06, + "loss": 1.0756, + "step": 131391 + }, + { + "epoch": 1.58, + "grad_norm": 10.995129137681882, + "learning_rate": 2.225391950930301e-06, + "loss": 1.1301, + "step": 131394 + }, + { + "epoch": 1.58, + "grad_norm": 14.077783506168453, + "learning_rate": 2.2250245562893045e-06, + "loss": 0.9588, + "step": 131397 + }, + { + "epoch": 1.58, + "grad_norm": 7.47092242965753, + "learning_rate": 2.2246571881815016e-06, + "loss": 0.9978, + "step": 131400 + }, + { + "epoch": 1.58, + "grad_norm": 3.677230167830394, + "learning_rate": 2.224289846608144e-06, + "loss": 1.1754, + "step": 131403 + }, + { + "epoch": 1.58, + "grad_norm": 15.556492678369521, + "learning_rate": 2.2239225315704806e-06, + "loss": 1.437, + "step": 131406 + }, + { + "epoch": 1.58, + "grad_norm": 5.794522268889748, + "learning_rate": 2.223555243069768e-06, + "loss": 1.0998, + "step": 131409 + }, + { + "epoch": 1.58, + "grad_norm": 3.5902263944693686, + "learning_rate": 2.223187981107262e-06, + "loss": 1.4428, + "step": 131412 + }, + { + "epoch": 1.58, + "grad_norm": 7.316400145486057, + "learning_rate": 2.2228207456842166e-06, + "loss": 1.009, + "step": 131415 + }, + { + "epoch": 1.58, + "grad_norm": 5.831768789300539, + "learning_rate": 2.2224535368018784e-06, + "loss": 0.8947, + "step": 131418 + }, + { + "epoch": 1.58, + "grad_norm": 7.141588187342893, + "learning_rate": 2.222086354461507e-06, + "loss": 0.7764, + "step": 131421 + }, + { + "epoch": 1.58, + "grad_norm": 31.861648724783237, + "learning_rate": 2.221719198664354e-06, + "loss": 1.6073, + "step": 131424 + }, + { + "epoch": 1.58, + "grad_norm": 4.4436487049096725, + "learning_rate": 2.221352069411673e-06, + "loss": 1.1993, + "step": 131427 + }, + { + "epoch": 1.58, + "grad_norm": 8.153195776913295, + "learning_rate": 2.2209849667047122e-06, + "loss": 1.0686, + "step": 131430 + }, + { + "epoch": 1.58, + "grad_norm": 20.83045909129658, + "learning_rate": 2.220617890544727e-06, + "loss": 1.2477, + "step": 131433 + }, + { + "epoch": 1.58, + "grad_norm": 6.501744191606187, + "learning_rate": 2.2202508409329726e-06, + "loss": 1.1654, + "step": 131436 + }, + { + "epoch": 1.58, + "grad_norm": 15.881129963917312, + "learning_rate": 2.2198838178706984e-06, + "loss": 1.057, + "step": 131439 + }, + { + "epoch": 1.58, + "grad_norm": 3.0867046360591446, + "learning_rate": 2.2195168213591623e-06, + "loss": 1.4858, + "step": 131442 + }, + { + "epoch": 1.58, + "grad_norm": 9.696739114404753, + "learning_rate": 2.2191498513996112e-06, + "loss": 1.203, + "step": 131445 + }, + { + "epoch": 1.58, + "grad_norm": 15.802436598680906, + "learning_rate": 2.2187829079932975e-06, + "loss": 1.3752, + "step": 131448 + }, + { + "epoch": 1.58, + "grad_norm": 4.4043733382410455, + "learning_rate": 2.2184159911414747e-06, + "loss": 1.1268, + "step": 131451 + }, + { + "epoch": 1.58, + "grad_norm": 13.392302772093785, + "learning_rate": 2.2180491008453973e-06, + "loss": 1.13, + "step": 131454 + }, + { + "epoch": 1.58, + "grad_norm": 10.193743381897965, + "learning_rate": 2.2176822371063134e-06, + "loss": 0.7591, + "step": 131457 + }, + { + "epoch": 1.58, + "grad_norm": 2.9917675915482618, + "learning_rate": 2.217315399925477e-06, + "loss": 1.1532, + "step": 131460 + }, + { + "epoch": 1.58, + "grad_norm": 8.306224179458905, + "learning_rate": 2.216948589304139e-06, + "loss": 0.883, + "step": 131463 + }, + { + "epoch": 1.58, + "grad_norm": 12.414606147344347, + "learning_rate": 2.216581805243555e-06, + "loss": 1.2143, + "step": 131466 + }, + { + "epoch": 1.58, + "grad_norm": 2.7574906051447376, + "learning_rate": 2.216215047744974e-06, + "loss": 1.0426, + "step": 131469 + }, + { + "epoch": 1.58, + "grad_norm": 9.472103870719186, + "learning_rate": 2.2158483168096444e-06, + "loss": 0.8266, + "step": 131472 + }, + { + "epoch": 1.58, + "grad_norm": 6.3057901224530895, + "learning_rate": 2.215481612438821e-06, + "loss": 1.153, + "step": 131475 + }, + { + "epoch": 1.58, + "grad_norm": 5.8951226614069965, + "learning_rate": 2.2151149346337542e-06, + "loss": 1.13, + "step": 131478 + }, + { + "epoch": 1.58, + "grad_norm": 12.14366675907831, + "learning_rate": 2.214748283395699e-06, + "loss": 0.957, + "step": 131481 + }, + { + "epoch": 1.58, + "grad_norm": 8.026352271694666, + "learning_rate": 2.2143816587259014e-06, + "loss": 1.0278, + "step": 131484 + }, + { + "epoch": 1.58, + "grad_norm": 45.26336042829651, + "learning_rate": 2.214015060625617e-06, + "loss": 1.2557, + "step": 131487 + }, + { + "epoch": 1.58, + "grad_norm": 9.991010132639701, + "learning_rate": 2.213648489096092e-06, + "loss": 1.306, + "step": 131490 + }, + { + "epoch": 1.58, + "grad_norm": 13.095781768560597, + "learning_rate": 2.213281944138581e-06, + "loss": 1.0323, + "step": 131493 + }, + { + "epoch": 1.58, + "grad_norm": 5.416935624208965, + "learning_rate": 2.212915425754336e-06, + "loss": 1.0224, + "step": 131496 + }, + { + "epoch": 1.58, + "grad_norm": 9.494317887460326, + "learning_rate": 2.2125489339446026e-06, + "loss": 1.2234, + "step": 131499 + }, + { + "epoch": 1.58, + "grad_norm": 21.984944262358013, + "learning_rate": 2.2121824687106353e-06, + "loss": 1.3362, + "step": 131502 + }, + { + "epoch": 1.58, + "grad_norm": 12.142012136057787, + "learning_rate": 2.2118160300536837e-06, + "loss": 1.3491, + "step": 131505 + }, + { + "epoch": 1.58, + "grad_norm": 14.247065924863623, + "learning_rate": 2.211449617975001e-06, + "loss": 1.0233, + "step": 131508 + }, + { + "epoch": 1.58, + "grad_norm": 6.125157624433182, + "learning_rate": 2.211083232475837e-06, + "loss": 1.1203, + "step": 131511 + }, + { + "epoch": 1.58, + "grad_norm": 13.838237499534667, + "learning_rate": 2.2107168735574357e-06, + "loss": 1.4915, + "step": 131514 + }, + { + "epoch": 1.58, + "grad_norm": 6.652673445248515, + "learning_rate": 2.2103505412210537e-06, + "loss": 1.0041, + "step": 131517 + }, + { + "epoch": 1.58, + "grad_norm": 6.5739129247367645, + "learning_rate": 2.209984235467938e-06, + "loss": 1.0243, + "step": 131520 + }, + { + "epoch": 1.58, + "grad_norm": 27.02465729215946, + "learning_rate": 2.2096179562993435e-06, + "loss": 1.0678, + "step": 131523 + }, + { + "epoch": 1.58, + "grad_norm": 8.736485175606676, + "learning_rate": 2.209251703716514e-06, + "loss": 1.0246, + "step": 131526 + }, + { + "epoch": 1.58, + "grad_norm": 15.172842100158945, + "learning_rate": 2.208885477720706e-06, + "loss": 1.1418, + "step": 131529 + }, + { + "epoch": 1.58, + "grad_norm": 13.114884457372241, + "learning_rate": 2.2085192783131616e-06, + "loss": 1.2651, + "step": 131532 + }, + { + "epoch": 1.58, + "grad_norm": 5.978202638742073, + "learning_rate": 2.2081531054951364e-06, + "loss": 0.8236, + "step": 131535 + }, + { + "epoch": 1.58, + "grad_norm": 16.124423524785726, + "learning_rate": 2.207786959267877e-06, + "loss": 1.2976, + "step": 131538 + }, + { + "epoch": 1.58, + "grad_norm": 5.619293969092966, + "learning_rate": 2.2074208396326334e-06, + "loss": 1.0995, + "step": 131541 + }, + { + "epoch": 1.58, + "grad_norm": 13.363697062371402, + "learning_rate": 2.2070547465906555e-06, + "loss": 1.2185, + "step": 131544 + }, + { + "epoch": 1.58, + "grad_norm": 5.967851859332634, + "learning_rate": 2.206688680143193e-06, + "loss": 1.4408, + "step": 131547 + }, + { + "epoch": 1.58, + "grad_norm": 16.865622173323946, + "learning_rate": 2.2063226402914974e-06, + "loss": 1.1525, + "step": 131550 + }, + { + "epoch": 1.58, + "grad_norm": 15.066617714472569, + "learning_rate": 2.205956627036815e-06, + "loss": 1.2924, + "step": 131553 + }, + { + "epoch": 1.58, + "grad_norm": 194.25781890644456, + "learning_rate": 2.205590640380393e-06, + "loss": 1.2978, + "step": 131556 + }, + { + "epoch": 1.58, + "grad_norm": 8.726076687613219, + "learning_rate": 2.205224680323482e-06, + "loss": 0.8792, + "step": 131559 + }, + { + "epoch": 1.58, + "grad_norm": 9.563607228411744, + "learning_rate": 2.2048587468673345e-06, + "loss": 1.0729, + "step": 131562 + }, + { + "epoch": 1.58, + "grad_norm": 8.345552677197269, + "learning_rate": 2.204492840013194e-06, + "loss": 1.333, + "step": 131565 + }, + { + "epoch": 1.58, + "grad_norm": 9.453093260704861, + "learning_rate": 2.2041269597623117e-06, + "loss": 1.2588, + "step": 131568 + }, + { + "epoch": 1.58, + "grad_norm": 8.511872394791352, + "learning_rate": 2.203761106115935e-06, + "loss": 1.0038, + "step": 131571 + }, + { + "epoch": 1.58, + "grad_norm": 11.663723350787635, + "learning_rate": 2.2033952790753168e-06, + "loss": 1.1672, + "step": 131574 + }, + { + "epoch": 1.58, + "grad_norm": 4.056446566366769, + "learning_rate": 2.2030294786417028e-06, + "loss": 0.9321, + "step": 131577 + }, + { + "epoch": 1.58, + "grad_norm": 9.366609885572611, + "learning_rate": 2.202663704816337e-06, + "loss": 1.5991, + "step": 131580 + }, + { + "epoch": 1.58, + "grad_norm": 7.103011150779494, + "learning_rate": 2.202297957600471e-06, + "loss": 1.0504, + "step": 131583 + }, + { + "epoch": 1.58, + "grad_norm": 5.751903067449317, + "learning_rate": 2.2019322369953544e-06, + "loss": 1.5315, + "step": 131586 + }, + { + "epoch": 1.58, + "grad_norm": 30.65178907806576, + "learning_rate": 2.201566543002237e-06, + "loss": 1.5375, + "step": 131589 + }, + { + "epoch": 1.58, + "grad_norm": 8.07805333997473, + "learning_rate": 2.2012008756223614e-06, + "loss": 1.0979, + "step": 131592 + }, + { + "epoch": 1.58, + "grad_norm": 8.979551729644667, + "learning_rate": 2.2008352348569795e-06, + "loss": 1.338, + "step": 131595 + }, + { + "epoch": 1.58, + "grad_norm": 19.972041877618718, + "learning_rate": 2.2004696207073363e-06, + "loss": 1.1273, + "step": 131598 + }, + { + "epoch": 1.58, + "grad_norm": 3.8880028265645303, + "learning_rate": 2.2001040331746802e-06, + "loss": 1.1971, + "step": 131601 + }, + { + "epoch": 1.58, + "grad_norm": 26.75491723913716, + "learning_rate": 2.199738472260262e-06, + "loss": 1.1672, + "step": 131604 + }, + { + "epoch": 1.58, + "grad_norm": 6.834225272544544, + "learning_rate": 2.199372937965325e-06, + "loss": 0.9571, + "step": 131607 + }, + { + "epoch": 1.58, + "grad_norm": 4.815365846447307, + "learning_rate": 2.199007430291118e-06, + "loss": 1.2112, + "step": 131610 + }, + { + "epoch": 1.58, + "grad_norm": 19.805987069971522, + "learning_rate": 2.1986419492388887e-06, + "loss": 1.1059, + "step": 131613 + }, + { + "epoch": 1.58, + "grad_norm": 7.159031436189517, + "learning_rate": 2.1982764948098866e-06, + "loss": 1.3064, + "step": 131616 + }, + { + "epoch": 1.58, + "grad_norm": 17.470616022436072, + "learning_rate": 2.1979110670053572e-06, + "loss": 1.5963, + "step": 131619 + }, + { + "epoch": 1.58, + "grad_norm": 16.816685796997366, + "learning_rate": 2.197545665826545e-06, + "loss": 1.0321, + "step": 131622 + }, + { + "epoch": 1.58, + "grad_norm": 13.889004302985956, + "learning_rate": 2.1971802912746975e-06, + "loss": 1.2569, + "step": 131625 + }, + { + "epoch": 1.58, + "grad_norm": 10.257713056445692, + "learning_rate": 2.1968149433510634e-06, + "loss": 1.243, + "step": 131628 + }, + { + "epoch": 1.58, + "grad_norm": 11.063301320257299, + "learning_rate": 2.196449622056893e-06, + "loss": 1.2332, + "step": 131631 + }, + { + "epoch": 1.58, + "grad_norm": 8.868231110967212, + "learning_rate": 2.1960843273934252e-06, + "loss": 1.012, + "step": 131634 + }, + { + "epoch": 1.58, + "grad_norm": 2.9459497516347626, + "learning_rate": 2.195719059361915e-06, + "loss": 1.2305, + "step": 131637 + }, + { + "epoch": 1.58, + "grad_norm": 12.3677027097979, + "learning_rate": 2.1953538179636e-06, + "loss": 1.5817, + "step": 131640 + }, + { + "epoch": 1.58, + "grad_norm": 40.94725487978253, + "learning_rate": 2.1949886031997346e-06, + "loss": 1.0363, + "step": 131643 + }, + { + "epoch": 1.58, + "grad_norm": 31.25623399822329, + "learning_rate": 2.1946234150715595e-06, + "loss": 1.3393, + "step": 131646 + }, + { + "epoch": 1.58, + "grad_norm": 3.6279078258117985, + "learning_rate": 2.1942582535803226e-06, + "loss": 1.5686, + "step": 131649 + }, + { + "epoch": 1.58, + "grad_norm": 5.1225228493578445, + "learning_rate": 2.1938931187272715e-06, + "loss": 1.3745, + "step": 131652 + }, + { + "epoch": 1.58, + "grad_norm": 12.242183299313817, + "learning_rate": 2.19352801051365e-06, + "loss": 0.8255, + "step": 131655 + }, + { + "epoch": 1.58, + "grad_norm": 12.141992794036852, + "learning_rate": 2.1931629289407095e-06, + "loss": 0.9382, + "step": 131658 + }, + { + "epoch": 1.58, + "grad_norm": 5.93323565443455, + "learning_rate": 2.1927978740096923e-06, + "loss": 1.2781, + "step": 131661 + }, + { + "epoch": 1.58, + "grad_norm": 7.945773058504595, + "learning_rate": 2.19243284572184e-06, + "loss": 1.1939, + "step": 131664 + }, + { + "epoch": 1.58, + "grad_norm": 13.77470952093845, + "learning_rate": 2.1920678440784026e-06, + "loss": 0.9931, + "step": 131667 + }, + { + "epoch": 1.58, + "grad_norm": 20.25808230248619, + "learning_rate": 2.1917028690806276e-06, + "loss": 1.5065, + "step": 131670 + }, + { + "epoch": 1.58, + "grad_norm": 6.794331535455164, + "learning_rate": 2.1913379207297557e-06, + "loss": 1.1243, + "step": 131673 + }, + { + "epoch": 1.58, + "grad_norm": 11.076428818014941, + "learning_rate": 2.1909729990270346e-06, + "loss": 1.6702, + "step": 131676 + }, + { + "epoch": 1.58, + "grad_norm": 6.630787634674847, + "learning_rate": 2.1906081039737128e-06, + "loss": 1.1596, + "step": 131679 + }, + { + "epoch": 1.58, + "grad_norm": 6.401355202463949, + "learning_rate": 2.1902432355710303e-06, + "loss": 1.2159, + "step": 131682 + }, + { + "epoch": 1.58, + "grad_norm": 14.807991915856208, + "learning_rate": 2.189878393820236e-06, + "loss": 0.8446, + "step": 131685 + }, + { + "epoch": 1.58, + "grad_norm": 11.227916845489974, + "learning_rate": 2.189513578722572e-06, + "loss": 1.2424, + "step": 131688 + }, + { + "epoch": 1.58, + "grad_norm": 7.043305107205175, + "learning_rate": 2.1891487902792842e-06, + "loss": 1.1766, + "step": 131691 + }, + { + "epoch": 1.58, + "grad_norm": 3.517576940070518, + "learning_rate": 2.1887840284916185e-06, + "loss": 1.155, + "step": 131694 + }, + { + "epoch": 1.58, + "grad_norm": 15.653267029456122, + "learning_rate": 2.1884192933608217e-06, + "loss": 1.262, + "step": 131697 + }, + { + "epoch": 1.58, + "grad_norm": 37.83107291580495, + "learning_rate": 2.1880545848881342e-06, + "loss": 1.48, + "step": 131700 + }, + { + "epoch": 1.58, + "grad_norm": 11.53655574785883, + "learning_rate": 2.1876899030748046e-06, + "loss": 0.8137, + "step": 131703 + }, + { + "epoch": 1.58, + "grad_norm": 13.83743782576401, + "learning_rate": 2.187325247922073e-06, + "loss": 1.3215, + "step": 131706 + }, + { + "epoch": 1.58, + "grad_norm": 10.975325752634292, + "learning_rate": 2.1869606194311855e-06, + "loss": 0.6922, + "step": 131709 + }, + { + "epoch": 1.58, + "grad_norm": 9.055935784767584, + "learning_rate": 2.1865960176033905e-06, + "loss": 1.3623, + "step": 131712 + }, + { + "epoch": 1.58, + "grad_norm": 8.1996081896766, + "learning_rate": 2.1862314424399267e-06, + "loss": 1.2661, + "step": 131715 + }, + { + "epoch": 1.58, + "grad_norm": 4.750146154067971, + "learning_rate": 2.18586689394204e-06, + "loss": 1.3483, + "step": 131718 + }, + { + "epoch": 1.58, + "grad_norm": 10.885498066353039, + "learning_rate": 2.1855023721109768e-06, + "loss": 0.8363, + "step": 131721 + }, + { + "epoch": 1.58, + "grad_norm": 9.987254186814928, + "learning_rate": 2.1851378769479804e-06, + "loss": 0.7853, + "step": 131724 + }, + { + "epoch": 1.58, + "grad_norm": 8.225655820823162, + "learning_rate": 2.1847734084542904e-06, + "loss": 0.8418, + "step": 131727 + }, + { + "epoch": 1.58, + "grad_norm": 2.4303797746744253, + "learning_rate": 2.1844089666311542e-06, + "loss": 1.1583, + "step": 131730 + }, + { + "epoch": 1.58, + "grad_norm": 5.708055208780272, + "learning_rate": 2.1840445514798138e-06, + "loss": 1.4377, + "step": 131733 + }, + { + "epoch": 1.58, + "grad_norm": 7.664963994661696, + "learning_rate": 2.1836801630015146e-06, + "loss": 0.9502, + "step": 131736 + }, + { + "epoch": 1.58, + "grad_norm": 4.380213298086848, + "learning_rate": 2.183315801197503e-06, + "loss": 1.1867, + "step": 131739 + }, + { + "epoch": 1.58, + "grad_norm": 12.250288615569916, + "learning_rate": 2.1829514660690154e-06, + "loss": 0.8662, + "step": 131742 + }, + { + "epoch": 1.58, + "grad_norm": 4.644685867131076, + "learning_rate": 2.182587157617302e-06, + "loss": 1.4191, + "step": 131745 + }, + { + "epoch": 1.58, + "grad_norm": 25.967373843112394, + "learning_rate": 2.1822228758436005e-06, + "loss": 0.8952, + "step": 131748 + }, + { + "epoch": 1.58, + "grad_norm": 6.440530603899381, + "learning_rate": 2.1818586207491588e-06, + "loss": 0.8848, + "step": 131751 + }, + { + "epoch": 1.58, + "grad_norm": 9.870156381414878, + "learning_rate": 2.181494392335214e-06, + "loss": 1.4578, + "step": 131754 + }, + { + "epoch": 1.58, + "grad_norm": 9.843968136471306, + "learning_rate": 2.1811301906030136e-06, + "loss": 1.2557, + "step": 131757 + }, + { + "epoch": 1.58, + "grad_norm": 22.148316745464857, + "learning_rate": 2.180766015553799e-06, + "loss": 1.5199, + "step": 131760 + }, + { + "epoch": 1.58, + "grad_norm": 8.711292422747286, + "learning_rate": 2.180401867188816e-06, + "loss": 0.8697, + "step": 131763 + }, + { + "epoch": 1.58, + "grad_norm": 6.972384229147611, + "learning_rate": 2.180037745509306e-06, + "loss": 1.5803, + "step": 131766 + }, + { + "epoch": 1.58, + "grad_norm": 8.850325124326575, + "learning_rate": 2.1796736505165063e-06, + "loss": 1.2338, + "step": 131769 + }, + { + "epoch": 1.58, + "grad_norm": 6.699675924927072, + "learning_rate": 2.179309582211664e-06, + "loss": 1.1829, + "step": 131772 + }, + { + "epoch": 1.58, + "grad_norm": 7.118049359218352, + "learning_rate": 2.1789455405960215e-06, + "loss": 1.3757, + "step": 131775 + }, + { + "epoch": 1.58, + "grad_norm": 10.083463980135086, + "learning_rate": 2.178581525670823e-06, + "loss": 1.0005, + "step": 131778 + }, + { + "epoch": 1.58, + "grad_norm": 13.638287843067372, + "learning_rate": 2.178217537437306e-06, + "loss": 1.1799, + "step": 131781 + }, + { + "epoch": 1.58, + "grad_norm": 13.4070085732412, + "learning_rate": 2.1778535758967154e-06, + "loss": 1.3747, + "step": 131784 + }, + { + "epoch": 1.58, + "grad_norm": 3.234345095701425, + "learning_rate": 2.1774896410502955e-06, + "loss": 1.5208, + "step": 131787 + }, + { + "epoch": 1.58, + "grad_norm": 8.430719235232493, + "learning_rate": 2.1771257328992835e-06, + "loss": 0.8535, + "step": 131790 + }, + { + "epoch": 1.58, + "grad_norm": 9.595983003862354, + "learning_rate": 2.176761851444925e-06, + "loss": 0.6879, + "step": 131793 + }, + { + "epoch": 1.58, + "grad_norm": 7.311253911871832, + "learning_rate": 2.1763979966884586e-06, + "loss": 1.3081, + "step": 131796 + }, + { + "epoch": 1.58, + "grad_norm": 6.107828426859291, + "learning_rate": 2.1760341686311283e-06, + "loss": 0.9504, + "step": 131799 + }, + { + "epoch": 1.58, + "grad_norm": 21.437063133316137, + "learning_rate": 2.175670367274174e-06, + "loss": 1.0329, + "step": 131802 + }, + { + "epoch": 1.58, + "grad_norm": 4.748628630723075, + "learning_rate": 2.175306592618842e-06, + "loss": 1.0779, + "step": 131805 + }, + { + "epoch": 1.58, + "grad_norm": 6.490055406419651, + "learning_rate": 2.1749428446663677e-06, + "loss": 1.1352, + "step": 131808 + }, + { + "epoch": 1.58, + "grad_norm": 35.31671032839423, + "learning_rate": 2.174579123417997e-06, + "loss": 1.1559, + "step": 131811 + }, + { + "epoch": 1.59, + "grad_norm": 5.597119744843763, + "learning_rate": 2.1742154288749663e-06, + "loss": 0.9027, + "step": 131814 + }, + { + "epoch": 1.59, + "grad_norm": 10.597825790027935, + "learning_rate": 2.17385176103852e-06, + "loss": 1.1569, + "step": 131817 + }, + { + "epoch": 1.59, + "grad_norm": 5.066604669621459, + "learning_rate": 2.1734881199099012e-06, + "loss": 1.3025, + "step": 131820 + }, + { + "epoch": 1.59, + "grad_norm": 6.27007711428732, + "learning_rate": 2.173124505490345e-06, + "loss": 1.3618, + "step": 131823 + }, + { + "epoch": 1.59, + "grad_norm": 14.292377036901614, + "learning_rate": 2.172760917781096e-06, + "loss": 1.0135, + "step": 131826 + }, + { + "epoch": 1.59, + "grad_norm": 11.237872351313417, + "learning_rate": 2.1723973567833978e-06, + "loss": 1.1781, + "step": 131829 + }, + { + "epoch": 1.59, + "grad_norm": 3.5951035504553968, + "learning_rate": 2.172033822498487e-06, + "loss": 1.1055, + "step": 131832 + }, + { + "epoch": 1.59, + "grad_norm": 15.814448797127096, + "learning_rate": 2.171670314927603e-06, + "loss": 0.9096, + "step": 131835 + }, + { + "epoch": 1.59, + "grad_norm": 6.519988587433396, + "learning_rate": 2.1713068340719888e-06, + "loss": 0.9461, + "step": 131838 + }, + { + "epoch": 1.59, + "grad_norm": 9.363057918719303, + "learning_rate": 2.1709433799328837e-06, + "loss": 1.5859, + "step": 131841 + }, + { + "epoch": 1.59, + "grad_norm": 8.399956940277125, + "learning_rate": 2.17057995251153e-06, + "loss": 0.9931, + "step": 131844 + }, + { + "epoch": 1.59, + "grad_norm": 8.408598571499132, + "learning_rate": 2.170216551809169e-06, + "loss": 1.1346, + "step": 131847 + }, + { + "epoch": 1.59, + "grad_norm": 3.9976857188284414, + "learning_rate": 2.169853177827036e-06, + "loss": 1.1896, + "step": 131850 + }, + { + "epoch": 1.59, + "grad_norm": 7.116251657337362, + "learning_rate": 2.1694898305663757e-06, + "loss": 1.2889, + "step": 131853 + }, + { + "epoch": 1.59, + "grad_norm": 3.827321747668377, + "learning_rate": 2.1691265100284242e-06, + "loss": 1.0425, + "step": 131856 + }, + { + "epoch": 1.59, + "grad_norm": 11.52579336095356, + "learning_rate": 2.168763216214426e-06, + "loss": 0.8899, + "step": 131859 + }, + { + "epoch": 1.59, + "grad_norm": 9.58804137236006, + "learning_rate": 2.1683999491256146e-06, + "loss": 1.3537, + "step": 131862 + }, + { + "epoch": 1.59, + "grad_norm": 3.8083126516870425, + "learning_rate": 2.168036708763234e-06, + "loss": 1.1841, + "step": 131865 + }, + { + "epoch": 1.59, + "grad_norm": 10.750192607899951, + "learning_rate": 2.1676734951285217e-06, + "loss": 1.0785, + "step": 131868 + }, + { + "epoch": 1.59, + "grad_norm": 2.82997503978793, + "learning_rate": 2.167310308222722e-06, + "loss": 1.1745, + "step": 131871 + }, + { + "epoch": 1.59, + "grad_norm": 6.154017418518814, + "learning_rate": 2.1669471480470716e-06, + "loss": 1.392, + "step": 131874 + }, + { + "epoch": 1.59, + "grad_norm": 6.5211420918161656, + "learning_rate": 2.166584014602806e-06, + "loss": 1.2931, + "step": 131877 + }, + { + "epoch": 1.59, + "grad_norm": 11.524029002167126, + "learning_rate": 2.166220907891168e-06, + "loss": 1.837, + "step": 131880 + }, + { + "epoch": 1.59, + "grad_norm": 7.104331588120623, + "learning_rate": 2.1658578279133958e-06, + "loss": 1.4141, + "step": 131883 + }, + { + "epoch": 1.59, + "grad_norm": 5.776275316852792, + "learning_rate": 2.1654947746707312e-06, + "loss": 1.0723, + "step": 131886 + }, + { + "epoch": 1.59, + "grad_norm": 2.900533377593908, + "learning_rate": 2.1651317481644097e-06, + "loss": 1.0789, + "step": 131889 + }, + { + "epoch": 1.59, + "grad_norm": 5.661144886347911, + "learning_rate": 2.1647687483956704e-06, + "loss": 1.0379, + "step": 131892 + }, + { + "epoch": 1.59, + "grad_norm": 5.181784294988761, + "learning_rate": 2.1644057753657555e-06, + "loss": 1.298, + "step": 131895 + }, + { + "epoch": 1.59, + "grad_norm": 22.238998960412044, + "learning_rate": 2.1640428290759e-06, + "loss": 1.439, + "step": 131898 + }, + { + "epoch": 1.59, + "grad_norm": 5.923799770666564, + "learning_rate": 2.1636799095273453e-06, + "loss": 1.2616, + "step": 131901 + }, + { + "epoch": 1.59, + "grad_norm": 10.159204908735246, + "learning_rate": 2.1633170167213256e-06, + "loss": 1.4916, + "step": 131904 + }, + { + "epoch": 1.59, + "grad_norm": 2.953862881788544, + "learning_rate": 2.1629541506590825e-06, + "loss": 1.2208, + "step": 131907 + }, + { + "epoch": 1.59, + "grad_norm": 15.987678666423651, + "learning_rate": 2.162591311341854e-06, + "loss": 1.1706, + "step": 131910 + }, + { + "epoch": 1.59, + "grad_norm": 4.972666808439027, + "learning_rate": 2.1622284987708818e-06, + "loss": 1.2303, + "step": 131913 + }, + { + "epoch": 1.59, + "grad_norm": 12.449525667764918, + "learning_rate": 2.1618657129473997e-06, + "loss": 1.019, + "step": 131916 + }, + { + "epoch": 1.59, + "grad_norm": 10.864855203980014, + "learning_rate": 2.161502953872644e-06, + "loss": 0.8698, + "step": 131919 + }, + { + "epoch": 1.59, + "grad_norm": 12.182199477427398, + "learning_rate": 2.161140221547856e-06, + "loss": 1.2209, + "step": 131922 + }, + { + "epoch": 1.59, + "grad_norm": 8.366840167964133, + "learning_rate": 2.160777515974273e-06, + "loss": 0.964, + "step": 131925 + }, + { + "epoch": 1.59, + "grad_norm": 6.221015210175003, + "learning_rate": 2.1604148371531343e-06, + "loss": 1.3955, + "step": 131928 + }, + { + "epoch": 1.59, + "grad_norm": 7.223866085557626, + "learning_rate": 2.1600521850856735e-06, + "loss": 1.4197, + "step": 131931 + }, + { + "epoch": 1.59, + "grad_norm": 10.974441381154106, + "learning_rate": 2.1596895597731304e-06, + "loss": 1.3559, + "step": 131934 + }, + { + "epoch": 1.59, + "grad_norm": 7.808945629342844, + "learning_rate": 2.1593269612167465e-06, + "loss": 1.3122, + "step": 131937 + }, + { + "epoch": 1.59, + "grad_norm": 42.07780204109383, + "learning_rate": 2.158964389417755e-06, + "loss": 1.2091, + "step": 131940 + }, + { + "epoch": 1.59, + "grad_norm": 21.761779151708293, + "learning_rate": 2.1586018443773903e-06, + "loss": 1.1889, + "step": 131943 + }, + { + "epoch": 1.59, + "grad_norm": 7.8738044217165895, + "learning_rate": 2.158239326096894e-06, + "loss": 1.2607, + "step": 131946 + }, + { + "epoch": 1.59, + "grad_norm": 2.391397920249825, + "learning_rate": 2.1578768345775013e-06, + "loss": 1.2281, + "step": 131949 + }, + { + "epoch": 1.59, + "grad_norm": 14.015110125381414, + "learning_rate": 2.1575143698204514e-06, + "loss": 1.3402, + "step": 131952 + }, + { + "epoch": 1.59, + "grad_norm": 2.2976771268704725, + "learning_rate": 2.157151931826983e-06, + "loss": 0.8776, + "step": 131955 + }, + { + "epoch": 1.59, + "grad_norm": 4.222569070863029, + "learning_rate": 2.156789520598329e-06, + "loss": 1.1097, + "step": 131958 + }, + { + "epoch": 1.59, + "grad_norm": 4.322033605455628, + "learning_rate": 2.1564271361357247e-06, + "loss": 1.2075, + "step": 131961 + }, + { + "epoch": 1.59, + "grad_norm": 6.795963216814492, + "learning_rate": 2.15606477844041e-06, + "loss": 0.9987, + "step": 131964 + }, + { + "epoch": 1.59, + "grad_norm": 8.304903801762201, + "learning_rate": 2.155702447513622e-06, + "loss": 1.0839, + "step": 131967 + }, + { + "epoch": 1.59, + "grad_norm": 6.606119512381178, + "learning_rate": 2.1553401433565958e-06, + "loss": 1.2299, + "step": 131970 + }, + { + "epoch": 1.59, + "grad_norm": 16.10275659235663, + "learning_rate": 2.1549778659705665e-06, + "loss": 1.0689, + "step": 131973 + }, + { + "epoch": 1.59, + "grad_norm": 15.186316809241704, + "learning_rate": 2.154615615356772e-06, + "loss": 1.2048, + "step": 131976 + }, + { + "epoch": 1.59, + "grad_norm": 12.400559783059808, + "learning_rate": 2.154253391516451e-06, + "loss": 1.239, + "step": 131979 + }, + { + "epoch": 1.59, + "grad_norm": 8.851107417259271, + "learning_rate": 2.153891194450838e-06, + "loss": 1.2204, + "step": 131982 + }, + { + "epoch": 1.59, + "grad_norm": 16.2096285537806, + "learning_rate": 2.153529024161165e-06, + "loss": 1.3751, + "step": 131985 + }, + { + "epoch": 1.59, + "grad_norm": 9.168937819905436, + "learning_rate": 2.1531668806486715e-06, + "loss": 1.2539, + "step": 131988 + }, + { + "epoch": 1.59, + "grad_norm": 15.337949368455503, + "learning_rate": 2.1528047639145923e-06, + "loss": 1.5708, + "step": 131991 + }, + { + "epoch": 1.59, + "grad_norm": 8.567169789537772, + "learning_rate": 2.1524426739601665e-06, + "loss": 1.2559, + "step": 131994 + }, + { + "epoch": 1.59, + "grad_norm": 16.375845647072907, + "learning_rate": 2.1520806107866254e-06, + "loss": 1.0065, + "step": 131997 + }, + { + "epoch": 1.59, + "grad_norm": 3.8623875787861457, + "learning_rate": 2.1517185743952085e-06, + "loss": 1.0457, + "step": 132000 + }, + { + "epoch": 1.59, + "grad_norm": 6.3859424277231875, + "learning_rate": 2.151356564787147e-06, + "loss": 0.8588, + "step": 132003 + }, + { + "epoch": 1.59, + "grad_norm": 4.683819650596424, + "learning_rate": 2.150994581963678e-06, + "loss": 0.9509, + "step": 132006 + }, + { + "epoch": 1.59, + "grad_norm": 15.2114419952005, + "learning_rate": 2.1506326259260402e-06, + "loss": 1.314, + "step": 132009 + }, + { + "epoch": 1.59, + "grad_norm": 7.607677470233464, + "learning_rate": 2.1502706966754626e-06, + "loss": 0.8665, + "step": 132012 + }, + { + "epoch": 1.59, + "grad_norm": 4.390198251992179, + "learning_rate": 2.1499087942131845e-06, + "loss": 1.1592, + "step": 132015 + }, + { + "epoch": 1.59, + "grad_norm": 9.538048862233376, + "learning_rate": 2.1495469185404405e-06, + "loss": 1.4016, + "step": 132018 + }, + { + "epoch": 1.59, + "grad_norm": 28.55656545632589, + "learning_rate": 2.149185069658467e-06, + "loss": 1.3125, + "step": 132021 + }, + { + "epoch": 1.59, + "grad_norm": 7.97648029447131, + "learning_rate": 2.1488232475684966e-06, + "loss": 1.1665, + "step": 132024 + }, + { + "epoch": 1.59, + "grad_norm": 8.664761701603945, + "learning_rate": 2.1484614522717627e-06, + "loss": 1.0571, + "step": 132027 + }, + { + "epoch": 1.59, + "grad_norm": 4.448779061860021, + "learning_rate": 2.148099683769501e-06, + "loss": 1.1525, + "step": 132030 + }, + { + "epoch": 1.59, + "grad_norm": 9.227799351637355, + "learning_rate": 2.1477379420629472e-06, + "loss": 0.9401, + "step": 132033 + }, + { + "epoch": 1.59, + "grad_norm": 4.441514873119534, + "learning_rate": 2.147376227153338e-06, + "loss": 1.138, + "step": 132036 + }, + { + "epoch": 1.59, + "grad_norm": 7.3824950107850125, + "learning_rate": 2.147014539041903e-06, + "loss": 1.1058, + "step": 132039 + }, + { + "epoch": 1.59, + "grad_norm": 7.117993687556246, + "learning_rate": 2.1466528777298813e-06, + "loss": 1.2495, + "step": 132042 + }, + { + "epoch": 1.59, + "grad_norm": 7.9093234553299645, + "learning_rate": 2.1462912432185012e-06, + "loss": 1.0451, + "step": 132045 + }, + { + "epoch": 1.59, + "grad_norm": 15.138305450186264, + "learning_rate": 2.145929635509003e-06, + "loss": 1.2543, + "step": 132048 + }, + { + "epoch": 1.59, + "grad_norm": 10.14600216794786, + "learning_rate": 2.145568054602616e-06, + "loss": 1.0728, + "step": 132051 + }, + { + "epoch": 1.59, + "grad_norm": 6.800925351831036, + "learning_rate": 2.1452065005005762e-06, + "loss": 1.3855, + "step": 132054 + }, + { + "epoch": 1.59, + "grad_norm": 5.984809314229882, + "learning_rate": 2.1448449732041165e-06, + "loss": 1.5887, + "step": 132057 + }, + { + "epoch": 1.59, + "grad_norm": 13.903636413858447, + "learning_rate": 2.1444834727144716e-06, + "loss": 1.0425, + "step": 132060 + }, + { + "epoch": 1.59, + "grad_norm": 8.03614717399496, + "learning_rate": 2.144121999032878e-06, + "loss": 1.5026, + "step": 132063 + }, + { + "epoch": 1.59, + "grad_norm": 11.337792157272895, + "learning_rate": 2.1437605521605663e-06, + "loss": 1.0459, + "step": 132066 + }, + { + "epoch": 1.59, + "grad_norm": 9.574957147455901, + "learning_rate": 2.1433991320987678e-06, + "loss": 1.4822, + "step": 132069 + }, + { + "epoch": 1.59, + "grad_norm": 9.946746869984, + "learning_rate": 2.1430377388487176e-06, + "loss": 1.3098, + "step": 132072 + }, + { + "epoch": 1.59, + "grad_norm": 8.997813213006626, + "learning_rate": 2.1426763724116518e-06, + "loss": 1.2838, + "step": 132075 + }, + { + "epoch": 1.59, + "grad_norm": 12.063647185777471, + "learning_rate": 2.1423150327888e-06, + "loss": 0.8099, + "step": 132078 + }, + { + "epoch": 1.59, + "grad_norm": 57.48236534530074, + "learning_rate": 2.1419537199813956e-06, + "loss": 1.3189, + "step": 132081 + }, + { + "epoch": 1.59, + "grad_norm": 8.622845154287344, + "learning_rate": 2.1415924339906757e-06, + "loss": 0.765, + "step": 132084 + }, + { + "epoch": 1.59, + "grad_norm": 6.7068743137510145, + "learning_rate": 2.141231174817867e-06, + "loss": 1.0811, + "step": 132087 + }, + { + "epoch": 1.59, + "grad_norm": 16.058193840272846, + "learning_rate": 2.140869942464209e-06, + "loss": 0.9576, + "step": 132090 + }, + { + "epoch": 1.59, + "grad_norm": 8.689471597035258, + "learning_rate": 2.1405087369309273e-06, + "loss": 0.9744, + "step": 132093 + }, + { + "epoch": 1.59, + "grad_norm": 44.300534905475836, + "learning_rate": 2.14014755821926e-06, + "loss": 1.1692, + "step": 132096 + }, + { + "epoch": 1.59, + "grad_norm": 3.466496854536595, + "learning_rate": 2.1397864063304363e-06, + "loss": 0.9819, + "step": 132099 + }, + { + "epoch": 1.59, + "grad_norm": 9.35432559960039, + "learning_rate": 2.1394252812656947e-06, + "loss": 1.3631, + "step": 132102 + }, + { + "epoch": 1.59, + "grad_norm": 15.732650675951193, + "learning_rate": 2.1390641830262595e-06, + "loss": 0.8293, + "step": 132105 + }, + { + "epoch": 1.59, + "grad_norm": 5.734148216432328, + "learning_rate": 2.1387031116133695e-06, + "loss": 1.2322, + "step": 132108 + }, + { + "epoch": 1.59, + "grad_norm": 12.85811797793337, + "learning_rate": 2.138342067028251e-06, + "loss": 1.186, + "step": 132111 + }, + { + "epoch": 1.59, + "grad_norm": 2.528213213247054, + "learning_rate": 2.1379810492721397e-06, + "loss": 1.121, + "step": 132114 + }, + { + "epoch": 1.59, + "grad_norm": 6.246386897899505, + "learning_rate": 2.1376200583462702e-06, + "loss": 0.8617, + "step": 132117 + }, + { + "epoch": 1.59, + "grad_norm": 3.631623447030434, + "learning_rate": 2.137259094251869e-06, + "loss": 0.9234, + "step": 132120 + }, + { + "epoch": 1.59, + "grad_norm": 8.978928404756568, + "learning_rate": 2.136898156990169e-06, + "loss": 0.9061, + "step": 132123 + }, + { + "epoch": 1.59, + "grad_norm": 12.12034587065485, + "learning_rate": 2.136537246562408e-06, + "loss": 1.2302, + "step": 132126 + }, + { + "epoch": 1.59, + "grad_norm": 14.092997060202741, + "learning_rate": 2.1361763629698082e-06, + "loss": 1.3311, + "step": 132129 + }, + { + "epoch": 1.59, + "grad_norm": 10.837768544986996, + "learning_rate": 2.13581550621361e-06, + "loss": 1.1832, + "step": 132132 + }, + { + "epoch": 1.59, + "grad_norm": 4.183994609297479, + "learning_rate": 2.1354546762950377e-06, + "loss": 1.6016, + "step": 132135 + }, + { + "epoch": 1.59, + "grad_norm": 8.112490023647277, + "learning_rate": 2.1350938732153257e-06, + "loss": 1.4209, + "step": 132138 + }, + { + "epoch": 1.59, + "grad_norm": 11.083183497603123, + "learning_rate": 2.1347330969757064e-06, + "loss": 0.9579, + "step": 132141 + }, + { + "epoch": 1.59, + "grad_norm": 14.496656073629046, + "learning_rate": 2.1343723475774114e-06, + "loss": 1.3891, + "step": 132144 + }, + { + "epoch": 1.59, + "grad_norm": 7.060365896792925, + "learning_rate": 2.134011625021668e-06, + "loss": 1.2079, + "step": 132147 + }, + { + "epoch": 1.59, + "grad_norm": 3.939552430899102, + "learning_rate": 2.133650929309713e-06, + "loss": 1.1394, + "step": 132150 + }, + { + "epoch": 1.59, + "grad_norm": 31.727479753755702, + "learning_rate": 2.1332902604427697e-06, + "loss": 1.1399, + "step": 132153 + }, + { + "epoch": 1.59, + "grad_norm": 21.97937704220099, + "learning_rate": 2.132929618422077e-06, + "loss": 0.9569, + "step": 132156 + }, + { + "epoch": 1.59, + "grad_norm": 2.685115870729145, + "learning_rate": 2.1325690032488587e-06, + "loss": 1.3611, + "step": 132159 + }, + { + "epoch": 1.59, + "grad_norm": 11.025039061107885, + "learning_rate": 2.132208414924349e-06, + "loss": 1.5019, + "step": 132162 + }, + { + "epoch": 1.59, + "grad_norm": 7.246936046966995, + "learning_rate": 2.131847853449778e-06, + "loss": 1.4032, + "step": 132165 + }, + { + "epoch": 1.59, + "grad_norm": 3.0604386986540555, + "learning_rate": 2.131487318826376e-06, + "loss": 0.9068, + "step": 132168 + }, + { + "epoch": 1.59, + "grad_norm": 4.724050175865566, + "learning_rate": 2.1311268110553762e-06, + "loss": 1.0053, + "step": 132171 + }, + { + "epoch": 1.59, + "grad_norm": 11.127111064507728, + "learning_rate": 2.1307663301380067e-06, + "loss": 1.2062, + "step": 132174 + }, + { + "epoch": 1.59, + "grad_norm": 3.98672909818393, + "learning_rate": 2.130405876075493e-06, + "loss": 1.5247, + "step": 132177 + }, + { + "epoch": 1.59, + "grad_norm": 5.0358102484624325, + "learning_rate": 2.1300454488690715e-06, + "loss": 0.9781, + "step": 132180 + }, + { + "epoch": 1.59, + "grad_norm": 10.914977389146127, + "learning_rate": 2.1296850485199716e-06, + "loss": 1.1637, + "step": 132183 + }, + { + "epoch": 1.59, + "grad_norm": 10.377835312400478, + "learning_rate": 2.1293246750294195e-06, + "loss": 1.5205, + "step": 132186 + }, + { + "epoch": 1.59, + "grad_norm": 6.518890054966795, + "learning_rate": 2.1289643283986472e-06, + "loss": 1.0745, + "step": 132189 + }, + { + "epoch": 1.59, + "grad_norm": 9.761622595954302, + "learning_rate": 2.1286040086288864e-06, + "loss": 1.338, + "step": 132192 + }, + { + "epoch": 1.59, + "grad_norm": 35.682627951406864, + "learning_rate": 2.128243715721363e-06, + "loss": 0.9888, + "step": 132195 + }, + { + "epoch": 1.59, + "grad_norm": 15.07260831154339, + "learning_rate": 2.1278834496773117e-06, + "loss": 1.013, + "step": 132198 + }, + { + "epoch": 1.59, + "grad_norm": 4.378734615792379, + "learning_rate": 2.1275232104979547e-06, + "loss": 1.2444, + "step": 132201 + }, + { + "epoch": 1.59, + "grad_norm": 10.356621144510314, + "learning_rate": 2.1271629981845252e-06, + "loss": 1.1017, + "step": 132204 + }, + { + "epoch": 1.59, + "grad_norm": 11.145001230129283, + "learning_rate": 2.1268028127382533e-06, + "loss": 1.1064, + "step": 132207 + }, + { + "epoch": 1.59, + "grad_norm": 25.83652897928529, + "learning_rate": 2.1264426541603713e-06, + "loss": 1.3011, + "step": 132210 + }, + { + "epoch": 1.59, + "grad_norm": 9.960114849988791, + "learning_rate": 2.1260825224521e-06, + "loss": 1.3432, + "step": 132213 + }, + { + "epoch": 1.59, + "grad_norm": 7.9944348050829, + "learning_rate": 2.1257224176146763e-06, + "loss": 1.3737, + "step": 132216 + }, + { + "epoch": 1.59, + "grad_norm": 45.41248977969237, + "learning_rate": 2.1253623396493226e-06, + "loss": 1.0217, + "step": 132219 + }, + { + "epoch": 1.59, + "grad_norm": 12.15863951400498, + "learning_rate": 2.125002288557271e-06, + "loss": 0.9548, + "step": 132222 + }, + { + "epoch": 1.59, + "grad_norm": 6.408269536457899, + "learning_rate": 2.1246422643397533e-06, + "loss": 0.9432, + "step": 132225 + }, + { + "epoch": 1.59, + "grad_norm": 18.121862410123178, + "learning_rate": 2.124282266997991e-06, + "loss": 1.2355, + "step": 132228 + }, + { + "epoch": 1.59, + "grad_norm": 4.725836652839961, + "learning_rate": 2.1239222965332175e-06, + "loss": 1.0074, + "step": 132231 + }, + { + "epoch": 1.59, + "grad_norm": 7.599415955978467, + "learning_rate": 2.123562352946663e-06, + "loss": 1.1426, + "step": 132234 + }, + { + "epoch": 1.59, + "grad_norm": 9.638931345946709, + "learning_rate": 2.123202436239553e-06, + "loss": 0.872, + "step": 132237 + }, + { + "epoch": 1.59, + "grad_norm": 7.083668472614742, + "learning_rate": 2.122842546413113e-06, + "loss": 0.9687, + "step": 132240 + }, + { + "epoch": 1.59, + "grad_norm": 15.435615376083412, + "learning_rate": 2.122482683468573e-06, + "loss": 1.4942, + "step": 132243 + }, + { + "epoch": 1.59, + "grad_norm": 6.817896957489018, + "learning_rate": 2.122122847407163e-06, + "loss": 1.0069, + "step": 132246 + }, + { + "epoch": 1.59, + "grad_norm": 10.398658174253528, + "learning_rate": 2.1217630382301115e-06, + "loss": 1.2102, + "step": 132249 + }, + { + "epoch": 1.59, + "grad_norm": 25.128984128531187, + "learning_rate": 2.1214032559386457e-06, + "loss": 1.2475, + "step": 132252 + }, + { + "epoch": 1.59, + "grad_norm": 8.642104433993985, + "learning_rate": 2.1210435005339915e-06, + "loss": 0.9122, + "step": 132255 + }, + { + "epoch": 1.59, + "grad_norm": 7.028898997230683, + "learning_rate": 2.120683772017379e-06, + "loss": 1.314, + "step": 132258 + }, + { + "epoch": 1.59, + "grad_norm": 13.45996496330238, + "learning_rate": 2.120324070390033e-06, + "loss": 1.3582, + "step": 132261 + }, + { + "epoch": 1.59, + "grad_norm": 21.96065308688333, + "learning_rate": 2.119964395653186e-06, + "loss": 1.0988, + "step": 132264 + }, + { + "epoch": 1.59, + "grad_norm": 3.776384899835254, + "learning_rate": 2.1196047478080584e-06, + "loss": 1.2734, + "step": 132267 + }, + { + "epoch": 1.59, + "grad_norm": 7.227521255946223, + "learning_rate": 2.119245126855881e-06, + "loss": 0.9583, + "step": 132270 + }, + { + "epoch": 1.59, + "grad_norm": 7.537779698809244, + "learning_rate": 2.118885532797882e-06, + "loss": 1.5893, + "step": 132273 + }, + { + "epoch": 1.59, + "grad_norm": 10.516504607681844, + "learning_rate": 2.1185259656352906e-06, + "loss": 1.2757, + "step": 132276 + }, + { + "epoch": 1.59, + "grad_norm": 12.055803629563616, + "learning_rate": 2.1181664253693313e-06, + "loss": 1.3708, + "step": 132279 + }, + { + "epoch": 1.59, + "grad_norm": 11.306647439534474, + "learning_rate": 2.1178069120012277e-06, + "loss": 0.8873, + "step": 132282 + }, + { + "epoch": 1.59, + "grad_norm": 4.849446772970223, + "learning_rate": 2.1174474255322097e-06, + "loss": 0.9568, + "step": 132285 + }, + { + "epoch": 1.59, + "grad_norm": 15.996215743718123, + "learning_rate": 2.117087965963506e-06, + "loss": 1.27, + "step": 132288 + }, + { + "epoch": 1.59, + "grad_norm": 7.269429254691355, + "learning_rate": 2.1167285332963426e-06, + "loss": 1.3962, + "step": 132291 + }, + { + "epoch": 1.59, + "grad_norm": 30.750870658614147, + "learning_rate": 2.116369127531943e-06, + "loss": 1.1912, + "step": 132294 + }, + { + "epoch": 1.59, + "grad_norm": 3.48409105265796, + "learning_rate": 2.1160097486715357e-06, + "loss": 1.0739, + "step": 132297 + }, + { + "epoch": 1.59, + "grad_norm": 11.424996787691436, + "learning_rate": 2.115650396716351e-06, + "loss": 1.1335, + "step": 132300 + }, + { + "epoch": 1.59, + "grad_norm": 8.001585996829453, + "learning_rate": 2.115291071667609e-06, + "loss": 1.283, + "step": 132303 + }, + { + "epoch": 1.59, + "grad_norm": 7.1930034145988575, + "learning_rate": 2.1149317735265396e-06, + "loss": 0.8272, + "step": 132306 + }, + { + "epoch": 1.59, + "grad_norm": 7.260143733380941, + "learning_rate": 2.1145725022943664e-06, + "loss": 0.8459, + "step": 132309 + }, + { + "epoch": 1.59, + "grad_norm": 5.240348149634891, + "learning_rate": 2.114213257972316e-06, + "loss": 1.091, + "step": 132312 + }, + { + "epoch": 1.59, + "grad_norm": 24.71074119843307, + "learning_rate": 2.113854040561616e-06, + "loss": 1.2672, + "step": 132315 + }, + { + "epoch": 1.59, + "grad_norm": 27.160061024476796, + "learning_rate": 2.1134948500634944e-06, + "loss": 1.3267, + "step": 132318 + }, + { + "epoch": 1.59, + "grad_norm": 5.727939324487121, + "learning_rate": 2.113135686479174e-06, + "loss": 0.7811, + "step": 132321 + }, + { + "epoch": 1.59, + "grad_norm": 20.376399299021234, + "learning_rate": 2.1127765498098786e-06, + "loss": 1.4179, + "step": 132324 + }, + { + "epoch": 1.59, + "grad_norm": 7.229665265406828, + "learning_rate": 2.1124174400568343e-06, + "loss": 1.2062, + "step": 132327 + }, + { + "epoch": 1.59, + "grad_norm": 3.1499623506737233, + "learning_rate": 2.1120583572212692e-06, + "loss": 1.2153, + "step": 132330 + }, + { + "epoch": 1.59, + "grad_norm": 5.55179029559569, + "learning_rate": 2.111699301304411e-06, + "loss": 0.7957, + "step": 132333 + }, + { + "epoch": 1.59, + "grad_norm": 5.349215223759575, + "learning_rate": 2.1113402723074785e-06, + "loss": 1.0856, + "step": 132336 + }, + { + "epoch": 1.59, + "grad_norm": 3.8749991702365505, + "learning_rate": 2.110981270231699e-06, + "loss": 1.2935, + "step": 132339 + }, + { + "epoch": 1.59, + "grad_norm": 2.772295191409305, + "learning_rate": 2.110622295078303e-06, + "loss": 1.1107, + "step": 132342 + }, + { + "epoch": 1.59, + "grad_norm": 4.294733539622024, + "learning_rate": 2.110263346848511e-06, + "loss": 1.0956, + "step": 132345 + }, + { + "epoch": 1.59, + "grad_norm": 18.0142006104109, + "learning_rate": 2.109904425543545e-06, + "loss": 1.119, + "step": 132348 + }, + { + "epoch": 1.59, + "grad_norm": 7.59191663872178, + "learning_rate": 2.109545531164634e-06, + "loss": 1.0951, + "step": 132351 + }, + { + "epoch": 1.59, + "grad_norm": 11.360550517358254, + "learning_rate": 2.109186663713001e-06, + "loss": 1.3634, + "step": 132354 + }, + { + "epoch": 1.59, + "grad_norm": 8.492984040527057, + "learning_rate": 2.108827823189873e-06, + "loss": 1.1254, + "step": 132357 + }, + { + "epoch": 1.59, + "grad_norm": 9.049436086918712, + "learning_rate": 2.1084690095964755e-06, + "loss": 1.1435, + "step": 132360 + }, + { + "epoch": 1.59, + "grad_norm": 9.4706065742443, + "learning_rate": 2.10811022293403e-06, + "loss": 1.1147, + "step": 132363 + }, + { + "epoch": 1.59, + "grad_norm": 5.897902075040881, + "learning_rate": 2.10775146320376e-06, + "loss": 1.2884, + "step": 132366 + }, + { + "epoch": 1.59, + "grad_norm": 7.456828370631809, + "learning_rate": 2.107392730406891e-06, + "loss": 1.2185, + "step": 132369 + }, + { + "epoch": 1.59, + "grad_norm": 3.698911140874414, + "learning_rate": 2.1070340245446495e-06, + "loss": 1.1089, + "step": 132372 + }, + { + "epoch": 1.59, + "grad_norm": 9.56623173904483, + "learning_rate": 2.1066753456182565e-06, + "loss": 1.3249, + "step": 132375 + }, + { + "epoch": 1.59, + "grad_norm": 9.222000446544548, + "learning_rate": 2.106316693628937e-06, + "loss": 1.1324, + "step": 132378 + }, + { + "epoch": 1.59, + "grad_norm": 12.802346917645611, + "learning_rate": 2.1059580685779147e-06, + "loss": 1.1823, + "step": 132381 + }, + { + "epoch": 1.59, + "grad_norm": 10.966640108118543, + "learning_rate": 2.105599470466417e-06, + "loss": 1.0588, + "step": 132384 + }, + { + "epoch": 1.59, + "grad_norm": 14.316207705883835, + "learning_rate": 2.1052408992956653e-06, + "loss": 1.1891, + "step": 132387 + }, + { + "epoch": 1.59, + "grad_norm": 4.448263685544064, + "learning_rate": 2.1048823550668805e-06, + "loss": 1.242, + "step": 132390 + }, + { + "epoch": 1.59, + "grad_norm": 3.5605523670509838, + "learning_rate": 2.104523837781287e-06, + "loss": 1.2094, + "step": 132393 + }, + { + "epoch": 1.59, + "grad_norm": 6.445353555176576, + "learning_rate": 2.1041653474401114e-06, + "loss": 1.1432, + "step": 132396 + }, + { + "epoch": 1.59, + "grad_norm": 7.620951441791065, + "learning_rate": 2.103806884044577e-06, + "loss": 1.0021, + "step": 132399 + }, + { + "epoch": 1.59, + "grad_norm": 5.181100476908705, + "learning_rate": 2.1034484475959026e-06, + "loss": 1.2805, + "step": 132402 + }, + { + "epoch": 1.59, + "grad_norm": 7.046038459147066, + "learning_rate": 2.1030900380953144e-06, + "loss": 1.1758, + "step": 132405 + }, + { + "epoch": 1.59, + "grad_norm": 5.703457542388592, + "learning_rate": 2.102731655544038e-06, + "loss": 0.8199, + "step": 132408 + }, + { + "epoch": 1.59, + "grad_norm": 14.439258189661492, + "learning_rate": 2.102373299943292e-06, + "loss": 0.8855, + "step": 132411 + }, + { + "epoch": 1.59, + "grad_norm": 6.297878860505921, + "learning_rate": 2.102014971294304e-06, + "loss": 1.1063, + "step": 132414 + }, + { + "epoch": 1.59, + "grad_norm": 6.050713295708535, + "learning_rate": 2.101656669598291e-06, + "loss": 1.4803, + "step": 132417 + }, + { + "epoch": 1.59, + "grad_norm": 12.77625011441416, + "learning_rate": 2.1012983948564782e-06, + "loss": 1.2861, + "step": 132420 + }, + { + "epoch": 1.59, + "grad_norm": 9.226885965408345, + "learning_rate": 2.1009401470700896e-06, + "loss": 0.9592, + "step": 132423 + }, + { + "epoch": 1.59, + "grad_norm": 4.982575799097421, + "learning_rate": 2.100581926240349e-06, + "loss": 0.9004, + "step": 132426 + }, + { + "epoch": 1.59, + "grad_norm": 3.8853249252311226, + "learning_rate": 2.100223732368477e-06, + "loss": 1.1569, + "step": 132429 + }, + { + "epoch": 1.59, + "grad_norm": 13.338993114260205, + "learning_rate": 2.0998655654556943e-06, + "loss": 0.9671, + "step": 132432 + }, + { + "epoch": 1.59, + "grad_norm": 26.24662853935998, + "learning_rate": 2.0995074255032245e-06, + "loss": 0.9463, + "step": 132435 + }, + { + "epoch": 1.59, + "grad_norm": 17.640775825052003, + "learning_rate": 2.09914931251229e-06, + "loss": 1.0428, + "step": 132438 + }, + { + "epoch": 1.59, + "grad_norm": 8.422758431310564, + "learning_rate": 2.0987912264841148e-06, + "loss": 1.0652, + "step": 132441 + }, + { + "epoch": 1.59, + "grad_norm": 10.087308833849118, + "learning_rate": 2.0984331674199177e-06, + "loss": 1.1229, + "step": 132444 + }, + { + "epoch": 1.59, + "grad_norm": 3.919594598497942, + "learning_rate": 2.0980751353209204e-06, + "loss": 1.2085, + "step": 132447 + }, + { + "epoch": 1.59, + "grad_norm": 10.340526247727492, + "learning_rate": 2.0977171301883504e-06, + "loss": 1.27, + "step": 132450 + }, + { + "epoch": 1.59, + "grad_norm": 11.617672365442573, + "learning_rate": 2.097359152023426e-06, + "loss": 1.2755, + "step": 132453 + }, + { + "epoch": 1.59, + "grad_norm": 12.601393924147436, + "learning_rate": 2.0970012008273645e-06, + "loss": 1.2919, + "step": 132456 + }, + { + "epoch": 1.59, + "grad_norm": 11.771022905843955, + "learning_rate": 2.096643276601391e-06, + "loss": 1.2725, + "step": 132459 + }, + { + "epoch": 1.59, + "grad_norm": 16.376987722143284, + "learning_rate": 2.0962853793467284e-06, + "loss": 1.074, + "step": 132462 + }, + { + "epoch": 1.59, + "grad_norm": 8.36048804171487, + "learning_rate": 2.095927509064596e-06, + "loss": 1.2103, + "step": 132465 + }, + { + "epoch": 1.59, + "grad_norm": 2.514302951410048, + "learning_rate": 2.09556966575622e-06, + "loss": 0.9869, + "step": 132468 + }, + { + "epoch": 1.59, + "grad_norm": 11.098927406960406, + "learning_rate": 2.095211849422816e-06, + "loss": 1.3736, + "step": 132471 + }, + { + "epoch": 1.59, + "grad_norm": 14.001370254575788, + "learning_rate": 2.0948540600656042e-06, + "loss": 0.7797, + "step": 132474 + }, + { + "epoch": 1.59, + "grad_norm": 6.923341161399251, + "learning_rate": 2.0944962976858087e-06, + "loss": 0.9327, + "step": 132477 + }, + { + "epoch": 1.59, + "grad_norm": 10.559531213759703, + "learning_rate": 2.094138562284652e-06, + "loss": 1.0888, + "step": 132480 + }, + { + "epoch": 1.59, + "grad_norm": 4.850299137578375, + "learning_rate": 2.0937808538633507e-06, + "loss": 1.0258, + "step": 132483 + }, + { + "epoch": 1.59, + "grad_norm": 4.196759777801448, + "learning_rate": 2.0934231724231268e-06, + "loss": 1.1431, + "step": 132486 + }, + { + "epoch": 1.59, + "grad_norm": 18.438078711299706, + "learning_rate": 2.0930655179652013e-06, + "loss": 1.1815, + "step": 132489 + }, + { + "epoch": 1.59, + "grad_norm": 54.77805007358991, + "learning_rate": 2.092707890490799e-06, + "loss": 1.1972, + "step": 132492 + }, + { + "epoch": 1.59, + "grad_norm": 2.7752048051443174, + "learning_rate": 2.0923502900011363e-06, + "loss": 0.9442, + "step": 132495 + }, + { + "epoch": 1.59, + "grad_norm": 14.697085464597427, + "learning_rate": 2.0919927164974306e-06, + "loss": 1.2391, + "step": 132498 + }, + { + "epoch": 1.59, + "grad_norm": 7.937741449810873, + "learning_rate": 2.0916351699809044e-06, + "loss": 1.102, + "step": 132501 + }, + { + "epoch": 1.59, + "grad_norm": 5.703087592230756, + "learning_rate": 2.09127765045278e-06, + "loss": 1.4932, + "step": 132504 + }, + { + "epoch": 1.59, + "grad_norm": 6.963507769197948, + "learning_rate": 2.0909201579142792e-06, + "loss": 0.9623, + "step": 132507 + }, + { + "epoch": 1.59, + "grad_norm": 7.9215023239373465, + "learning_rate": 2.0905626923666155e-06, + "loss": 1.1516, + "step": 132510 + }, + { + "epoch": 1.59, + "grad_norm": 8.841909909681437, + "learning_rate": 2.0902052538110152e-06, + "loss": 1.0923, + "step": 132513 + }, + { + "epoch": 1.59, + "grad_norm": 7.3876517325531665, + "learning_rate": 2.0898478422486935e-06, + "loss": 1.1335, + "step": 132516 + }, + { + "epoch": 1.59, + "grad_norm": 6.26986133186687, + "learning_rate": 2.0894904576808706e-06, + "loss": 0.9642, + "step": 132519 + }, + { + "epoch": 1.59, + "grad_norm": 4.786925490696502, + "learning_rate": 2.089133100108771e-06, + "loss": 1.028, + "step": 132522 + }, + { + "epoch": 1.59, + "grad_norm": 13.936236105697757, + "learning_rate": 2.0887757695336074e-06, + "loss": 1.1101, + "step": 132525 + }, + { + "epoch": 1.59, + "grad_norm": 8.708692972657229, + "learning_rate": 2.088418465956603e-06, + "loss": 1.1534, + "step": 132528 + }, + { + "epoch": 1.59, + "grad_norm": 14.173193098517622, + "learning_rate": 2.0880611893789758e-06, + "loss": 1.3982, + "step": 132531 + }, + { + "epoch": 1.59, + "grad_norm": 9.894371870935542, + "learning_rate": 2.0877039398019495e-06, + "loss": 0.9734, + "step": 132534 + }, + { + "epoch": 1.59, + "grad_norm": 15.369366287877876, + "learning_rate": 2.087346717226739e-06, + "loss": 1.2764, + "step": 132537 + }, + { + "epoch": 1.59, + "grad_norm": 8.082042794848467, + "learning_rate": 2.086989521654562e-06, + "loss": 1.1346, + "step": 132540 + }, + { + "epoch": 1.59, + "grad_norm": 4.50089063951043, + "learning_rate": 2.0866323530866396e-06, + "loss": 1.1242, + "step": 132543 + }, + { + "epoch": 1.59, + "grad_norm": 5.064169875857526, + "learning_rate": 2.0862752115241904e-06, + "loss": 0.7316, + "step": 132546 + }, + { + "epoch": 1.59, + "grad_norm": 11.311022015828607, + "learning_rate": 2.0859180969684354e-06, + "loss": 1.0967, + "step": 132549 + }, + { + "epoch": 1.59, + "grad_norm": 6.595808708803062, + "learning_rate": 2.085561009420589e-06, + "loss": 1.0121, + "step": 132552 + }, + { + "epoch": 1.59, + "grad_norm": 6.1786233755483435, + "learning_rate": 2.0852039488818755e-06, + "loss": 1.2168, + "step": 132555 + }, + { + "epoch": 1.59, + "grad_norm": 6.664234536707639, + "learning_rate": 2.0848469153535066e-06, + "loss": 0.8607, + "step": 132558 + }, + { + "epoch": 1.59, + "grad_norm": 8.20199043434813, + "learning_rate": 2.0844899088367067e-06, + "loss": 1.2623, + "step": 132561 + }, + { + "epoch": 1.59, + "grad_norm": 9.394519776511762, + "learning_rate": 2.0841329293326896e-06, + "loss": 1.0632, + "step": 132564 + }, + { + "epoch": 1.59, + "grad_norm": 4.117297855598867, + "learning_rate": 2.0837759768426745e-06, + "loss": 1.3032, + "step": 132567 + }, + { + "epoch": 1.59, + "grad_norm": 3.3263099474938, + "learning_rate": 2.083419051367881e-06, + "loss": 0.8725, + "step": 132570 + }, + { + "epoch": 1.59, + "grad_norm": 2.819567235536333, + "learning_rate": 2.0830621529095274e-06, + "loss": 1.1557, + "step": 132573 + }, + { + "epoch": 1.59, + "grad_norm": 9.10082520857912, + "learning_rate": 2.082705281468833e-06, + "loss": 1.1567, + "step": 132576 + }, + { + "epoch": 1.59, + "grad_norm": 8.275768398679338, + "learning_rate": 2.082348437047014e-06, + "loss": 0.8503, + "step": 132579 + }, + { + "epoch": 1.59, + "grad_norm": 4.6572397141429684, + "learning_rate": 2.081991619645285e-06, + "loss": 1.0144, + "step": 132582 + }, + { + "epoch": 1.59, + "grad_norm": 8.521762973947713, + "learning_rate": 2.081634829264867e-06, + "loss": 1.1761, + "step": 132585 + }, + { + "epoch": 1.59, + "grad_norm": 27.8855385987168, + "learning_rate": 2.0812780659069786e-06, + "loss": 0.9739, + "step": 132588 + }, + { + "epoch": 1.59, + "grad_norm": 10.235837046584647, + "learning_rate": 2.080921329572835e-06, + "loss": 1.5516, + "step": 132591 + }, + { + "epoch": 1.59, + "grad_norm": 2.786679066299725, + "learning_rate": 2.080564620263653e-06, + "loss": 0.8813, + "step": 132594 + }, + { + "epoch": 1.59, + "grad_norm": 13.612698429391216, + "learning_rate": 2.080207937980654e-06, + "loss": 1.0337, + "step": 132597 + }, + { + "epoch": 1.59, + "grad_norm": 19.982949252569135, + "learning_rate": 2.0798512827250508e-06, + "loss": 1.5809, + "step": 132600 + }, + { + "epoch": 1.59, + "grad_norm": 9.197781241287272, + "learning_rate": 2.0794946544980643e-06, + "loss": 0.8074, + "step": 132603 + }, + { + "epoch": 1.59, + "grad_norm": 13.195189920800308, + "learning_rate": 2.0791380533009076e-06, + "loss": 1.2367, + "step": 132606 + }, + { + "epoch": 1.59, + "grad_norm": 14.428612755560417, + "learning_rate": 2.0787814791347995e-06, + "loss": 1.2699, + "step": 132609 + }, + { + "epoch": 1.59, + "grad_norm": 10.359109338566402, + "learning_rate": 2.0784249320009564e-06, + "loss": 1.1201, + "step": 132612 + }, + { + "epoch": 1.59, + "grad_norm": 7.60490512086175, + "learning_rate": 2.078068411900599e-06, + "loss": 1.0487, + "step": 132615 + }, + { + "epoch": 1.59, + "grad_norm": 5.023780536766093, + "learning_rate": 2.077711918834938e-06, + "loss": 0.8801, + "step": 132618 + }, + { + "epoch": 1.59, + "grad_norm": 12.423691745944847, + "learning_rate": 2.077355452805194e-06, + "loss": 0.9563, + "step": 132621 + }, + { + "epoch": 1.59, + "grad_norm": 17.923214484908886, + "learning_rate": 2.076999013812582e-06, + "loss": 1.5434, + "step": 132624 + }, + { + "epoch": 1.59, + "grad_norm": 4.521283750253884, + "learning_rate": 2.0766426018583163e-06, + "loss": 1.1098, + "step": 132627 + }, + { + "epoch": 1.59, + "grad_norm": 10.960334424485765, + "learning_rate": 2.0762862169436183e-06, + "loss": 1.2499, + "step": 132630 + }, + { + "epoch": 1.59, + "grad_norm": 8.55930937536461, + "learning_rate": 2.0759298590696997e-06, + "loss": 1.3384, + "step": 132633 + }, + { + "epoch": 1.59, + "grad_norm": 10.047899774966668, + "learning_rate": 2.0755735282377776e-06, + "loss": 1.0265, + "step": 132636 + }, + { + "epoch": 1.59, + "grad_norm": 8.012021771838612, + "learning_rate": 2.075217224449071e-06, + "loss": 1.452, + "step": 132639 + }, + { + "epoch": 1.59, + "grad_norm": 7.219693012076156, + "learning_rate": 2.074860947704792e-06, + "loss": 1.5694, + "step": 132642 + }, + { + "epoch": 1.6, + "grad_norm": 7.173902902516346, + "learning_rate": 2.07450469800616e-06, + "loss": 0.9512, + "step": 132645 + }, + { + "epoch": 1.6, + "grad_norm": 12.664769572352885, + "learning_rate": 2.0741484753543863e-06, + "loss": 1.2693, + "step": 132648 + }, + { + "epoch": 1.6, + "grad_norm": 30.616498767574846, + "learning_rate": 2.073792279750688e-06, + "loss": 1.1329, + "step": 132651 + }, + { + "epoch": 1.6, + "grad_norm": 6.68258284809017, + "learning_rate": 2.073436111196282e-06, + "loss": 1.1217, + "step": 132654 + }, + { + "epoch": 1.6, + "grad_norm": 9.42215595139948, + "learning_rate": 2.0730799696923865e-06, + "loss": 1.0141, + "step": 132657 + }, + { + "epoch": 1.6, + "grad_norm": 8.327774934203449, + "learning_rate": 2.072723855240211e-06, + "loss": 0.9563, + "step": 132660 + }, + { + "epoch": 1.6, + "grad_norm": 7.958036439262862, + "learning_rate": 2.072367767840977e-06, + "loss": 0.6453, + "step": 132663 + }, + { + "epoch": 1.6, + "grad_norm": 14.343215823268041, + "learning_rate": 2.0720117074958925e-06, + "loss": 1.1181, + "step": 132666 + }, + { + "epoch": 1.6, + "grad_norm": 12.035227540963458, + "learning_rate": 2.0716556742061788e-06, + "loss": 1.1033, + "step": 132669 + }, + { + "epoch": 1.6, + "grad_norm": 15.567735173104994, + "learning_rate": 2.071299667973047e-06, + "loss": 1.3298, + "step": 132672 + }, + { + "epoch": 1.6, + "grad_norm": 6.972030801247311, + "learning_rate": 2.070943688797713e-06, + "loss": 0.7757, + "step": 132675 + }, + { + "epoch": 1.6, + "grad_norm": 33.252339012274525, + "learning_rate": 2.070587736681392e-06, + "loss": 1.2988, + "step": 132678 + }, + { + "epoch": 1.6, + "grad_norm": 6.152347632007175, + "learning_rate": 2.0702318116253017e-06, + "loss": 0.7932, + "step": 132681 + }, + { + "epoch": 1.6, + "grad_norm": 4.704725386867631, + "learning_rate": 2.0698759136306512e-06, + "loss": 1.0698, + "step": 132684 + }, + { + "epoch": 1.6, + "grad_norm": 8.24444958233512, + "learning_rate": 2.0695200426986606e-06, + "loss": 0.7692, + "step": 132687 + }, + { + "epoch": 1.6, + "grad_norm": 17.539139927717393, + "learning_rate": 2.069164198830539e-06, + "loss": 1.1247, + "step": 132690 + }, + { + "epoch": 1.6, + "grad_norm": 13.173926255796191, + "learning_rate": 2.068808382027503e-06, + "loss": 0.7208, + "step": 132693 + }, + { + "epoch": 1.6, + "grad_norm": 10.587688899134676, + "learning_rate": 2.0684525922907705e-06, + "loss": 1.5477, + "step": 132696 + }, + { + "epoch": 1.6, + "grad_norm": 5.959248118252994, + "learning_rate": 2.0680968296215485e-06, + "loss": 0.8391, + "step": 132699 + }, + { + "epoch": 1.6, + "grad_norm": 8.88681599758216, + "learning_rate": 2.0677410940210563e-06, + "loss": 1.3903, + "step": 132702 + }, + { + "epoch": 1.6, + "grad_norm": 5.129950471438249, + "learning_rate": 2.067385385490509e-06, + "loss": 1.088, + "step": 132705 + }, + { + "epoch": 1.6, + "grad_norm": 9.170023832524125, + "learning_rate": 2.067029704031115e-06, + "loss": 1.1105, + "step": 132708 + }, + { + "epoch": 1.6, + "grad_norm": 8.273758576898322, + "learning_rate": 2.066674049644094e-06, + "loss": 1.5748, + "step": 132711 + }, + { + "epoch": 1.6, + "grad_norm": 30.012255698594547, + "learning_rate": 2.066318422330653e-06, + "loss": 1.1973, + "step": 132714 + }, + { + "epoch": 1.6, + "grad_norm": 3.6131843456224053, + "learning_rate": 2.0659628220920113e-06, + "loss": 1.0663, + "step": 132717 + }, + { + "epoch": 1.6, + "grad_norm": 4.440831929753348, + "learning_rate": 2.06560724892938e-06, + "loss": 1.189, + "step": 132720 + }, + { + "epoch": 1.6, + "grad_norm": 3.1392513948385385, + "learning_rate": 2.0652517028439755e-06, + "loss": 1.2271, + "step": 132723 + }, + { + "epoch": 1.6, + "grad_norm": 3.139517370100267, + "learning_rate": 2.0648961838370076e-06, + "loss": 1.285, + "step": 132726 + }, + { + "epoch": 1.6, + "grad_norm": 6.501408483220118, + "learning_rate": 2.064540691909692e-06, + "loss": 1.1455, + "step": 132729 + }, + { + "epoch": 1.6, + "grad_norm": 7.048817206455327, + "learning_rate": 2.0641852270632377e-06, + "loss": 1.0309, + "step": 132732 + }, + { + "epoch": 1.6, + "grad_norm": 4.350149916221294, + "learning_rate": 2.0638297892988624e-06, + "loss": 1.3829, + "step": 132735 + }, + { + "epoch": 1.6, + "grad_norm": 9.015303796271613, + "learning_rate": 2.063474378617779e-06, + "loss": 1.1906, + "step": 132738 + }, + { + "epoch": 1.6, + "grad_norm": 10.47624343719899, + "learning_rate": 2.063118995021196e-06, + "loss": 0.864, + "step": 132741 + }, + { + "epoch": 1.6, + "grad_norm": 7.299824649024606, + "learning_rate": 2.062763638510329e-06, + "loss": 0.9068, + "step": 132744 + }, + { + "epoch": 1.6, + "grad_norm": 12.209388176421736, + "learning_rate": 2.0624083090863943e-06, + "loss": 1.4312, + "step": 132747 + }, + { + "epoch": 1.6, + "grad_norm": 17.412262931183964, + "learning_rate": 2.0620530067506005e-06, + "loss": 1.408, + "step": 132750 + }, + { + "epoch": 1.6, + "grad_norm": 8.306650200215119, + "learning_rate": 2.0616977315041574e-06, + "loss": 1.3231, + "step": 132753 + }, + { + "epoch": 1.6, + "grad_norm": 3.2762984290632025, + "learning_rate": 2.061342483348281e-06, + "loss": 1.1747, + "step": 132756 + }, + { + "epoch": 1.6, + "grad_norm": 7.443992858864963, + "learning_rate": 2.060987262284183e-06, + "loss": 1.2426, + "step": 132759 + }, + { + "epoch": 1.6, + "grad_norm": 6.639050407503613, + "learning_rate": 2.0606320683130766e-06, + "loss": 0.9068, + "step": 132762 + }, + { + "epoch": 1.6, + "grad_norm": 5.197019972393154, + "learning_rate": 2.0602769014361745e-06, + "loss": 1.3942, + "step": 132765 + }, + { + "epoch": 1.6, + "grad_norm": 4.866049285067704, + "learning_rate": 2.059921761654686e-06, + "loss": 1.3868, + "step": 132768 + }, + { + "epoch": 1.6, + "grad_norm": 2.744097023836747, + "learning_rate": 2.0595666489698263e-06, + "loss": 1.0493, + "step": 132771 + }, + { + "epoch": 1.6, + "grad_norm": 11.914377301520712, + "learning_rate": 2.0592115633828037e-06, + "loss": 1.1452, + "step": 132774 + }, + { + "epoch": 1.6, + "grad_norm": 13.40169813679553, + "learning_rate": 2.058856504894834e-06, + "loss": 1.0878, + "step": 132777 + }, + { + "epoch": 1.6, + "grad_norm": 8.017250810341597, + "learning_rate": 2.058501473507124e-06, + "loss": 1.1026, + "step": 132780 + }, + { + "epoch": 1.6, + "grad_norm": 38.07250205640567, + "learning_rate": 2.0581464692208886e-06, + "loss": 1.2225, + "step": 132783 + }, + { + "epoch": 1.6, + "grad_norm": 10.193673764743806, + "learning_rate": 2.0577914920373377e-06, + "loss": 1.042, + "step": 132786 + }, + { + "epoch": 1.6, + "grad_norm": 8.999437814721793, + "learning_rate": 2.057436541957687e-06, + "loss": 1.623, + "step": 132789 + }, + { + "epoch": 1.6, + "grad_norm": 6.0533586763873375, + "learning_rate": 2.0570816189831433e-06, + "loss": 0.7919, + "step": 132792 + }, + { + "epoch": 1.6, + "grad_norm": 16.77201862770257, + "learning_rate": 2.056726723114918e-06, + "loss": 1.3496, + "step": 132795 + }, + { + "epoch": 1.6, + "grad_norm": 8.044211001575272, + "learning_rate": 2.056371854354222e-06, + "loss": 1.2242, + "step": 132798 + }, + { + "epoch": 1.6, + "grad_norm": 11.830822432283483, + "learning_rate": 2.0560170127022684e-06, + "loss": 1.3955, + "step": 132801 + }, + { + "epoch": 1.6, + "grad_norm": 5.4774436297518, + "learning_rate": 2.0556621981602696e-06, + "loss": 1.1651, + "step": 132804 + }, + { + "epoch": 1.6, + "grad_norm": 8.818652050650705, + "learning_rate": 2.0553074107294313e-06, + "loss": 1.2988, + "step": 132807 + }, + { + "epoch": 1.6, + "grad_norm": 10.8656447369973, + "learning_rate": 2.0549526504109675e-06, + "loss": 0.915, + "step": 132810 + }, + { + "epoch": 1.6, + "grad_norm": 9.712719942389832, + "learning_rate": 2.0545979172060913e-06, + "loss": 1.2839, + "step": 132813 + }, + { + "epoch": 1.6, + "grad_norm": 10.697651779003372, + "learning_rate": 2.054243211116007e-06, + "loss": 1.6443, + "step": 132816 + }, + { + "epoch": 1.6, + "grad_norm": 5.472241956282301, + "learning_rate": 2.053888532141932e-06, + "loss": 1.2095, + "step": 132819 + }, + { + "epoch": 1.6, + "grad_norm": 7.800724800730954, + "learning_rate": 2.05353388028507e-06, + "loss": 1.4026, + "step": 132822 + }, + { + "epoch": 1.6, + "grad_norm": 7.044522172730518, + "learning_rate": 2.0531792555466356e-06, + "loss": 1.0792, + "step": 132825 + }, + { + "epoch": 1.6, + "grad_norm": 79.30899946532709, + "learning_rate": 2.052824657927838e-06, + "loss": 1.1569, + "step": 132828 + }, + { + "epoch": 1.6, + "grad_norm": 5.032349645973188, + "learning_rate": 2.0524700874298885e-06, + "loss": 1.2613, + "step": 132831 + }, + { + "epoch": 1.6, + "grad_norm": 10.74453667251137, + "learning_rate": 2.0521155440539965e-06, + "loss": 0.82, + "step": 132834 + }, + { + "epoch": 1.6, + "grad_norm": 15.120969510467178, + "learning_rate": 2.0517610278013687e-06, + "loss": 1.2739, + "step": 132837 + }, + { + "epoch": 1.6, + "grad_norm": 11.45356482788408, + "learning_rate": 2.0514065386732186e-06, + "loss": 1.1729, + "step": 132840 + }, + { + "epoch": 1.6, + "grad_norm": 5.481638980970678, + "learning_rate": 2.051052076670753e-06, + "loss": 1.1448, + "step": 132843 + }, + { + "epoch": 1.6, + "grad_norm": 8.687118114452982, + "learning_rate": 2.0506976417951873e-06, + "loss": 1.3127, + "step": 132846 + }, + { + "epoch": 1.6, + "grad_norm": 5.053461465464166, + "learning_rate": 2.050343234047725e-06, + "loss": 1.2415, + "step": 132849 + }, + { + "epoch": 1.6, + "grad_norm": 5.814212728954002, + "learning_rate": 2.0499888534295774e-06, + "loss": 1.2542, + "step": 132852 + }, + { + "epoch": 1.6, + "grad_norm": 3.344047896376448, + "learning_rate": 2.049634499941957e-06, + "loss": 1.4849, + "step": 132855 + }, + { + "epoch": 1.6, + "grad_norm": 10.696112850033941, + "learning_rate": 2.04928017358607e-06, + "loss": 1.0101, + "step": 132858 + }, + { + "epoch": 1.6, + "grad_norm": 12.930646439652456, + "learning_rate": 2.0489258743631235e-06, + "loss": 1.0238, + "step": 132861 + }, + { + "epoch": 1.6, + "grad_norm": 10.785318692096912, + "learning_rate": 2.0485716022743284e-06, + "loss": 1.5175, + "step": 132864 + }, + { + "epoch": 1.6, + "grad_norm": 5.49094497378659, + "learning_rate": 2.0482173573208954e-06, + "loss": 1.2544, + "step": 132867 + }, + { + "epoch": 1.6, + "grad_norm": 8.275796777418034, + "learning_rate": 2.0478631395040317e-06, + "loss": 0.9466, + "step": 132870 + }, + { + "epoch": 1.6, + "grad_norm": 10.749681516595535, + "learning_rate": 2.047508948824949e-06, + "loss": 1.4998, + "step": 132873 + }, + { + "epoch": 1.6, + "grad_norm": 7.189607907257212, + "learning_rate": 2.047154785284854e-06, + "loss": 1.1272, + "step": 132876 + }, + { + "epoch": 1.6, + "grad_norm": 2.973789635222321, + "learning_rate": 2.0468006488849536e-06, + "loss": 1.1838, + "step": 132879 + }, + { + "epoch": 1.6, + "grad_norm": 7.572853306006838, + "learning_rate": 2.0464465396264566e-06, + "loss": 1.1782, + "step": 132882 + }, + { + "epoch": 1.6, + "grad_norm": 9.457660246306366, + "learning_rate": 2.0460924575105756e-06, + "loss": 1.0166, + "step": 132885 + }, + { + "epoch": 1.6, + "grad_norm": 4.7161513326344835, + "learning_rate": 2.045738402538513e-06, + "loss": 1.1143, + "step": 132888 + }, + { + "epoch": 1.6, + "grad_norm": 5.654696513937409, + "learning_rate": 2.0453843747114808e-06, + "loss": 0.8619, + "step": 132891 + }, + { + "epoch": 1.6, + "grad_norm": 2.698236907266222, + "learning_rate": 2.0450303740306865e-06, + "loss": 1.0701, + "step": 132894 + }, + { + "epoch": 1.6, + "grad_norm": 5.4830137572850335, + "learning_rate": 2.044676400497341e-06, + "loss": 1.0149, + "step": 132897 + }, + { + "epoch": 1.6, + "grad_norm": 10.606290774627883, + "learning_rate": 2.0443224541126494e-06, + "loss": 1.1597, + "step": 132900 + }, + { + "epoch": 1.6, + "grad_norm": 6.6552979842408, + "learning_rate": 2.043968534877816e-06, + "loss": 0.9074, + "step": 132903 + }, + { + "epoch": 1.6, + "grad_norm": 34.61329959355539, + "learning_rate": 2.043614642794053e-06, + "loss": 1.1453, + "step": 132906 + }, + { + "epoch": 1.6, + "grad_norm": 5.715807251589293, + "learning_rate": 2.043260777862568e-06, + "loss": 1.2217, + "step": 132909 + }, + { + "epoch": 1.6, + "grad_norm": 14.945653409003949, + "learning_rate": 2.042906940084569e-06, + "loss": 1.1209, + "step": 132912 + }, + { + "epoch": 1.6, + "grad_norm": 5.140964298192863, + "learning_rate": 2.042553129461261e-06, + "loss": 0.9961, + "step": 132915 + }, + { + "epoch": 1.6, + "grad_norm": 5.063989900084224, + "learning_rate": 2.042199345993855e-06, + "loss": 1.1164, + "step": 132918 + }, + { + "epoch": 1.6, + "grad_norm": 5.021230030410377, + "learning_rate": 2.041845589683554e-06, + "loss": 1.1372, + "step": 132921 + }, + { + "epoch": 1.6, + "grad_norm": 35.42357435688295, + "learning_rate": 2.041491860531568e-06, + "loss": 1.2201, + "step": 132924 + }, + { + "epoch": 1.6, + "grad_norm": 16.55653700491809, + "learning_rate": 2.0411381585391055e-06, + "loss": 1.2556, + "step": 132927 + }, + { + "epoch": 1.6, + "grad_norm": 6.825753965809596, + "learning_rate": 2.040784483707369e-06, + "loss": 1.252, + "step": 132930 + }, + { + "epoch": 1.6, + "grad_norm": 6.6427032983432, + "learning_rate": 2.040430836037568e-06, + "loss": 1.3549, + "step": 132933 + }, + { + "epoch": 1.6, + "grad_norm": 8.27124941905832, + "learning_rate": 2.0400772155309103e-06, + "loss": 1.143, + "step": 132936 + }, + { + "epoch": 1.6, + "grad_norm": 7.486177009566469, + "learning_rate": 2.0397236221886042e-06, + "loss": 0.937, + "step": 132939 + }, + { + "epoch": 1.6, + "grad_norm": 18.0898912672242, + "learning_rate": 2.0393700560118544e-06, + "loss": 1.4238, + "step": 132942 + }, + { + "epoch": 1.6, + "grad_norm": 16.31054879310484, + "learning_rate": 2.039016517001864e-06, + "loss": 1.4136, + "step": 132945 + }, + { + "epoch": 1.6, + "grad_norm": 14.121692427409164, + "learning_rate": 2.038663005159843e-06, + "loss": 1.2276, + "step": 132948 + }, + { + "epoch": 1.6, + "grad_norm": 16.62423502426481, + "learning_rate": 2.038309520486997e-06, + "loss": 1.2442, + "step": 132951 + }, + { + "epoch": 1.6, + "grad_norm": 2.782810375660142, + "learning_rate": 2.0379560629845363e-06, + "loss": 1.0288, + "step": 132954 + }, + { + "epoch": 1.6, + "grad_norm": 12.413036896658245, + "learning_rate": 2.0376026326536604e-06, + "loss": 1.3478, + "step": 132957 + }, + { + "epoch": 1.6, + "grad_norm": 6.28862426728812, + "learning_rate": 2.0372492294955815e-06, + "loss": 1.4739, + "step": 132960 + }, + { + "epoch": 1.6, + "grad_norm": 5.548247216316895, + "learning_rate": 2.0368958535115004e-06, + "loss": 0.922, + "step": 132963 + }, + { + "epoch": 1.6, + "grad_norm": 8.664089584890855, + "learning_rate": 2.0365425047026277e-06, + "loss": 1.4772, + "step": 132966 + }, + { + "epoch": 1.6, + "grad_norm": 10.75632236264615, + "learning_rate": 2.0361891830701643e-06, + "loss": 0.9025, + "step": 132969 + }, + { + "epoch": 1.6, + "grad_norm": 11.084516084867975, + "learning_rate": 2.0358358886153197e-06, + "loss": 0.7405, + "step": 132972 + }, + { + "epoch": 1.6, + "grad_norm": 7.896417491389359, + "learning_rate": 2.0354826213392975e-06, + "loss": 0.88, + "step": 132975 + }, + { + "epoch": 1.6, + "grad_norm": 13.258577736356834, + "learning_rate": 2.035129381243304e-06, + "loss": 0.9412, + "step": 132978 + }, + { + "epoch": 1.6, + "grad_norm": 11.833460805466173, + "learning_rate": 2.0347761683285483e-06, + "loss": 1.259, + "step": 132981 + }, + { + "epoch": 1.6, + "grad_norm": 7.192415089435716, + "learning_rate": 2.0344229825962325e-06, + "loss": 0.9868, + "step": 132984 + }, + { + "epoch": 1.6, + "grad_norm": 8.066483172223283, + "learning_rate": 2.0340698240475587e-06, + "loss": 1.2666, + "step": 132987 + }, + { + "epoch": 1.6, + "grad_norm": 8.084668051474202, + "learning_rate": 2.033716692683736e-06, + "loss": 1.1938, + "step": 132990 + }, + { + "epoch": 1.6, + "grad_norm": 4.522944913106374, + "learning_rate": 2.0333635885059712e-06, + "loss": 1.0071, + "step": 132993 + }, + { + "epoch": 1.6, + "grad_norm": 6.360944742175292, + "learning_rate": 2.033010511515464e-06, + "loss": 1.0745, + "step": 132996 + }, + { + "epoch": 1.6, + "grad_norm": 2.6778021938127843, + "learning_rate": 2.032657461713422e-06, + "loss": 1.3639, + "step": 132999 + }, + { + "epoch": 1.6, + "grad_norm": 8.400729467967352, + "learning_rate": 2.0323044391010503e-06, + "loss": 0.9133, + "step": 133002 + }, + { + "epoch": 1.6, + "grad_norm": 9.598023659213577, + "learning_rate": 2.0319514436795575e-06, + "loss": 1.8856, + "step": 133005 + }, + { + "epoch": 1.6, + "grad_norm": 16.69657196879037, + "learning_rate": 2.0315984754501428e-06, + "loss": 1.1065, + "step": 133008 + }, + { + "epoch": 1.6, + "grad_norm": 24.326605558097505, + "learning_rate": 2.03124553441401e-06, + "loss": 1.2167, + "step": 133011 + }, + { + "epoch": 1.6, + "grad_norm": 5.658557482508612, + "learning_rate": 2.0308926205723667e-06, + "loss": 0.9631, + "step": 133014 + }, + { + "epoch": 1.6, + "grad_norm": 4.741563582473306, + "learning_rate": 2.0305397339264155e-06, + "loss": 0.9633, + "step": 133017 + }, + { + "epoch": 1.6, + "grad_norm": 6.779608435412792, + "learning_rate": 2.0301868744773646e-06, + "loss": 1.3616, + "step": 133020 + }, + { + "epoch": 1.6, + "grad_norm": 8.735340985915723, + "learning_rate": 2.029834042226413e-06, + "loss": 1.0161, + "step": 133023 + }, + { + "epoch": 1.6, + "grad_norm": 12.10639423967299, + "learning_rate": 2.02948123717477e-06, + "loss": 0.9689, + "step": 133026 + }, + { + "epoch": 1.6, + "grad_norm": 5.16154030975538, + "learning_rate": 2.029128459323633e-06, + "loss": 1.1532, + "step": 133029 + }, + { + "epoch": 1.6, + "grad_norm": 7.782648301533556, + "learning_rate": 2.028775708674209e-06, + "loss": 1.2314, + "step": 133032 + }, + { + "epoch": 1.6, + "grad_norm": 7.517422946819747, + "learning_rate": 2.0284229852277047e-06, + "loss": 1.1556, + "step": 133035 + }, + { + "epoch": 1.6, + "grad_norm": 9.204359790575248, + "learning_rate": 2.02807028898532e-06, + "loss": 1.2793, + "step": 133038 + }, + { + "epoch": 1.6, + "grad_norm": 13.311338029960854, + "learning_rate": 2.0277176199482595e-06, + "loss": 0.9901, + "step": 133041 + }, + { + "epoch": 1.6, + "grad_norm": 32.8243806137632, + "learning_rate": 2.0273649781177275e-06, + "loss": 1.2857, + "step": 133044 + }, + { + "epoch": 1.6, + "grad_norm": 4.790747439014817, + "learning_rate": 2.027012363494929e-06, + "loss": 1.2703, + "step": 133047 + }, + { + "epoch": 1.6, + "grad_norm": 41.717901244979316, + "learning_rate": 2.026659776081066e-06, + "loss": 1.229, + "step": 133050 + }, + { + "epoch": 1.6, + "grad_norm": 3.793322160827761, + "learning_rate": 2.0263072158773377e-06, + "loss": 1.2678, + "step": 133053 + }, + { + "epoch": 1.6, + "grad_norm": 9.390550116802244, + "learning_rate": 2.025954682884952e-06, + "loss": 1.0178, + "step": 133056 + }, + { + "epoch": 1.6, + "grad_norm": 10.467143029955851, + "learning_rate": 2.0256021771051103e-06, + "loss": 0.7663, + "step": 133059 + }, + { + "epoch": 1.6, + "grad_norm": 6.3750568290738405, + "learning_rate": 2.025249698539018e-06, + "loss": 1.1677, + "step": 133062 + }, + { + "epoch": 1.6, + "grad_norm": 8.750646805079368, + "learning_rate": 2.0248972471878746e-06, + "loss": 1.0373, + "step": 133065 + }, + { + "epoch": 1.6, + "grad_norm": 11.984115304517752, + "learning_rate": 2.0245448230528853e-06, + "loss": 1.157, + "step": 133068 + }, + { + "epoch": 1.6, + "grad_norm": 6.438955127913131, + "learning_rate": 2.0241924261352507e-06, + "loss": 1.0727, + "step": 133071 + }, + { + "epoch": 1.6, + "grad_norm": 49.560070213112894, + "learning_rate": 2.023840056436176e-06, + "loss": 1.1433, + "step": 133074 + }, + { + "epoch": 1.6, + "grad_norm": 9.19293069057895, + "learning_rate": 2.0234877139568596e-06, + "loss": 1.2787, + "step": 133077 + }, + { + "epoch": 1.6, + "grad_norm": 11.80882746602734, + "learning_rate": 2.023135398698507e-06, + "loss": 1.1197, + "step": 133080 + }, + { + "epoch": 1.6, + "grad_norm": 3.794414234548882, + "learning_rate": 2.0227831106623208e-06, + "loss": 0.9394, + "step": 133083 + }, + { + "epoch": 1.6, + "grad_norm": 8.323175912615115, + "learning_rate": 2.0224308498495015e-06, + "loss": 1.2646, + "step": 133086 + }, + { + "epoch": 1.6, + "grad_norm": 6.103507400201401, + "learning_rate": 2.022078616261255e-06, + "loss": 1.1545, + "step": 133089 + }, + { + "epoch": 1.6, + "grad_norm": 8.808619022042624, + "learning_rate": 2.02172640989878e-06, + "loss": 1.3644, + "step": 133092 + }, + { + "epoch": 1.6, + "grad_norm": 4.00076834171701, + "learning_rate": 2.0213742307632768e-06, + "loss": 1.1893, + "step": 133095 + }, + { + "epoch": 1.6, + "grad_norm": 3.846598303061401, + "learning_rate": 2.0210220788559497e-06, + "loss": 1.3733, + "step": 133098 + }, + { + "epoch": 1.6, + "grad_norm": 4.307141837579849, + "learning_rate": 2.0206699541780027e-06, + "loss": 1.4277, + "step": 133101 + }, + { + "epoch": 1.6, + "grad_norm": 6.976641465958028, + "learning_rate": 2.0203178567306323e-06, + "loss": 1.1337, + "step": 133104 + }, + { + "epoch": 1.6, + "grad_norm": 7.310854570493572, + "learning_rate": 2.0199657865150425e-06, + "loss": 1.4964, + "step": 133107 + }, + { + "epoch": 1.6, + "grad_norm": 16.92795567929289, + "learning_rate": 2.0196137435324382e-06, + "loss": 1.2816, + "step": 133110 + }, + { + "epoch": 1.6, + "grad_norm": 6.519954862548834, + "learning_rate": 2.019261727784014e-06, + "loss": 1.0521, + "step": 133113 + }, + { + "epoch": 1.6, + "grad_norm": 4.176618332593998, + "learning_rate": 2.0189097392709787e-06, + "loss": 1.0795, + "step": 133116 + }, + { + "epoch": 1.6, + "grad_norm": 11.212956734291428, + "learning_rate": 2.0185577779945277e-06, + "loss": 0.9217, + "step": 133119 + }, + { + "epoch": 1.6, + "grad_norm": 7.518785163137481, + "learning_rate": 2.018205843955863e-06, + "loss": 1.1875, + "step": 133122 + }, + { + "epoch": 1.6, + "grad_norm": 18.765024238725054, + "learning_rate": 2.017853937156187e-06, + "loss": 1.1608, + "step": 133125 + }, + { + "epoch": 1.6, + "grad_norm": 11.771992070409949, + "learning_rate": 2.017502057596702e-06, + "loss": 1.5525, + "step": 133128 + }, + { + "epoch": 1.6, + "grad_norm": 2.488221936127392, + "learning_rate": 2.0171502052786053e-06, + "loss": 1.0128, + "step": 133131 + }, + { + "epoch": 1.6, + "grad_norm": 2.146945873776532, + "learning_rate": 2.016798380203102e-06, + "loss": 0.9733, + "step": 133134 + }, + { + "epoch": 1.6, + "grad_norm": 8.529930098226316, + "learning_rate": 2.0164465823713884e-06, + "loss": 1.2743, + "step": 133137 + }, + { + "epoch": 1.6, + "grad_norm": 18.8814697331474, + "learning_rate": 2.0160948117846667e-06, + "loss": 0.9085, + "step": 133140 + }, + { + "epoch": 1.6, + "grad_norm": 9.441856535807096, + "learning_rate": 2.0157430684441404e-06, + "loss": 1.2094, + "step": 133143 + }, + { + "epoch": 1.6, + "grad_norm": 3.952057834196022, + "learning_rate": 2.015391352351004e-06, + "loss": 1.0228, + "step": 133146 + }, + { + "epoch": 1.6, + "grad_norm": 15.604278905971551, + "learning_rate": 2.0150396635064607e-06, + "loss": 0.8908, + "step": 133149 + }, + { + "epoch": 1.6, + "grad_norm": 6.262660649158606, + "learning_rate": 2.014688001911713e-06, + "loss": 1.0331, + "step": 133152 + }, + { + "epoch": 1.6, + "grad_norm": 5.867458056613561, + "learning_rate": 2.0143363675679604e-06, + "loss": 1.2955, + "step": 133155 + }, + { + "epoch": 1.6, + "grad_norm": 2.1184254554774165, + "learning_rate": 2.0139847604763975e-06, + "loss": 1.1016, + "step": 133158 + }, + { + "epoch": 1.6, + "grad_norm": 13.621966321572351, + "learning_rate": 2.0136331806382293e-06, + "loss": 1.549, + "step": 133161 + }, + { + "epoch": 1.6, + "grad_norm": 7.69519073469823, + "learning_rate": 2.013281628054653e-06, + "loss": 1.2218, + "step": 133164 + }, + { + "epoch": 1.6, + "grad_norm": 6.1653450359440845, + "learning_rate": 2.01293010272687e-06, + "loss": 1.0148, + "step": 133167 + }, + { + "epoch": 1.6, + "grad_norm": 9.827841987813093, + "learning_rate": 2.0125786046560824e-06, + "loss": 1.1405, + "step": 133170 + }, + { + "epoch": 1.6, + "grad_norm": 5.901724154571863, + "learning_rate": 2.012227133843485e-06, + "loss": 1.1901, + "step": 133173 + }, + { + "epoch": 1.6, + "grad_norm": 14.704162282112533, + "learning_rate": 2.0118756902902814e-06, + "loss": 1.5105, + "step": 133176 + }, + { + "epoch": 1.6, + "grad_norm": 7.2816604980050785, + "learning_rate": 2.011524273997666e-06, + "loss": 1.2903, + "step": 133179 + }, + { + "epoch": 1.6, + "grad_norm": 7.8350172441265, + "learning_rate": 2.0111728849668434e-06, + "loss": 1.0353, + "step": 133182 + }, + { + "epoch": 1.6, + "grad_norm": 7.098217739676411, + "learning_rate": 2.010821523199008e-06, + "loss": 1.2577, + "step": 133185 + }, + { + "epoch": 1.6, + "grad_norm": 9.388116619851871, + "learning_rate": 2.01047018869536e-06, + "loss": 1.1104, + "step": 133188 + }, + { + "epoch": 1.6, + "grad_norm": 12.174544148016968, + "learning_rate": 2.0101188814571003e-06, + "loss": 1.0405, + "step": 133191 + }, + { + "epoch": 1.6, + "grad_norm": 11.211052121361762, + "learning_rate": 2.0097676014854294e-06, + "loss": 0.8928, + "step": 133194 + }, + { + "epoch": 1.6, + "grad_norm": 7.135861100391267, + "learning_rate": 2.009416348781543e-06, + "loss": 1.2802, + "step": 133197 + }, + { + "epoch": 1.6, + "grad_norm": 20.863852566182242, + "learning_rate": 2.0090651233466395e-06, + "loss": 1.0358, + "step": 133200 + }, + { + "epoch": 1.6, + "grad_norm": 2.788715578419174, + "learning_rate": 2.0087139251819164e-06, + "loss": 1.1934, + "step": 133203 + }, + { + "epoch": 1.6, + "grad_norm": 12.715677766306795, + "learning_rate": 2.008362754288575e-06, + "loss": 1.4338, + "step": 133206 + }, + { + "epoch": 1.6, + "grad_norm": 12.386872502283351, + "learning_rate": 2.008011610667816e-06, + "loss": 1.1945, + "step": 133209 + }, + { + "epoch": 1.6, + "grad_norm": 4.87520573753951, + "learning_rate": 2.0076604943208323e-06, + "loss": 1.0862, + "step": 133212 + }, + { + "epoch": 1.6, + "grad_norm": 8.404482054233682, + "learning_rate": 2.007309405248824e-06, + "loss": 1.367, + "step": 133215 + }, + { + "epoch": 1.6, + "grad_norm": 4.159153095570674, + "learning_rate": 2.0069583434529917e-06, + "loss": 1.3366, + "step": 133218 + }, + { + "epoch": 1.6, + "grad_norm": 2.3994669683652745, + "learning_rate": 2.0066073089345296e-06, + "loss": 1.1389, + "step": 133221 + }, + { + "epoch": 1.6, + "grad_norm": 2.496648333836193, + "learning_rate": 2.0062563016946402e-06, + "loss": 1.0064, + "step": 133224 + }, + { + "epoch": 1.6, + "grad_norm": 6.265878121230122, + "learning_rate": 2.005905321734516e-06, + "loss": 0.946, + "step": 133227 + }, + { + "epoch": 1.6, + "grad_norm": 5.90866934702476, + "learning_rate": 2.005554369055357e-06, + "loss": 0.9669, + "step": 133230 + }, + { + "epoch": 1.6, + "grad_norm": 3.043963817265878, + "learning_rate": 2.0052034436583625e-06, + "loss": 0.7817, + "step": 133233 + }, + { + "epoch": 1.6, + "grad_norm": 10.510563645004218, + "learning_rate": 2.00485254554473e-06, + "loss": 1.0452, + "step": 133236 + }, + { + "epoch": 1.6, + "grad_norm": 3.8163520441405567, + "learning_rate": 2.0045016747156543e-06, + "loss": 1.3965, + "step": 133239 + }, + { + "epoch": 1.6, + "grad_norm": 16.553076036168502, + "learning_rate": 2.0041508311723367e-06, + "loss": 0.687, + "step": 133242 + }, + { + "epoch": 1.6, + "grad_norm": 9.275757569735577, + "learning_rate": 2.0038000149159696e-06, + "loss": 0.992, + "step": 133245 + }, + { + "epoch": 1.6, + "grad_norm": 16.256059440750093, + "learning_rate": 2.0034492259477535e-06, + "loss": 1.1042, + "step": 133248 + }, + { + "epoch": 1.6, + "grad_norm": 12.115863156605695, + "learning_rate": 2.0030984642688867e-06, + "loss": 1.1455, + "step": 133251 + }, + { + "epoch": 1.6, + "grad_norm": 4.243177019647389, + "learning_rate": 2.002747729880562e-06, + "loss": 1.0067, + "step": 133254 + }, + { + "epoch": 1.6, + "grad_norm": 7.72648601543602, + "learning_rate": 2.002397022783978e-06, + "loss": 1.0416, + "step": 133257 + }, + { + "epoch": 1.6, + "grad_norm": 9.250834597041964, + "learning_rate": 2.0020463429803363e-06, + "loss": 1.3103, + "step": 133260 + }, + { + "epoch": 1.6, + "grad_norm": 6.467882461394953, + "learning_rate": 2.0016956904708284e-06, + "loss": 1.2716, + "step": 133263 + }, + { + "epoch": 1.6, + "grad_norm": 6.804564446800145, + "learning_rate": 2.0013450652566502e-06, + "loss": 1.3459, + "step": 133266 + }, + { + "epoch": 1.6, + "grad_norm": 4.15158742942203, + "learning_rate": 2.0009944673390003e-06, + "loss": 1.5023, + "step": 133269 + }, + { + "epoch": 1.6, + "grad_norm": 2.757053949290991, + "learning_rate": 2.0006438967190746e-06, + "loss": 1.0687, + "step": 133272 + }, + { + "epoch": 1.6, + "grad_norm": 12.317478792313134, + "learning_rate": 2.0002933533980696e-06, + "loss": 1.0033, + "step": 133275 + }, + { + "epoch": 1.6, + "grad_norm": 10.079810344589637, + "learning_rate": 1.9999428373771846e-06, + "loss": 1.1241, + "step": 133278 + }, + { + "epoch": 1.6, + "grad_norm": 7.862570444663516, + "learning_rate": 1.9995923486576106e-06, + "loss": 1.3179, + "step": 133281 + }, + { + "epoch": 1.6, + "grad_norm": 14.854143490601327, + "learning_rate": 1.9992418872405496e-06, + "loss": 1.0297, + "step": 133284 + }, + { + "epoch": 1.6, + "grad_norm": 6.123193006554074, + "learning_rate": 1.9988914531271907e-06, + "loss": 1.1763, + "step": 133287 + }, + { + "epoch": 1.6, + "grad_norm": 30.549960562365307, + "learning_rate": 1.998541046318736e-06, + "loss": 1.0041, + "step": 133290 + }, + { + "epoch": 1.6, + "grad_norm": 7.083017292102984, + "learning_rate": 1.9981906668163763e-06, + "loss": 1.4065, + "step": 133293 + }, + { + "epoch": 1.6, + "grad_norm": 3.64379463191471, + "learning_rate": 1.9978403146213098e-06, + "loss": 1.2204, + "step": 133296 + }, + { + "epoch": 1.6, + "grad_norm": 15.581089264906385, + "learning_rate": 1.997489989734731e-06, + "loss": 1.2229, + "step": 133299 + }, + { + "epoch": 1.6, + "grad_norm": 2.9841251578164454, + "learning_rate": 1.9971396921578402e-06, + "loss": 1.0777, + "step": 133302 + }, + { + "epoch": 1.6, + "grad_norm": 11.189460143490434, + "learning_rate": 1.9967894218918284e-06, + "loss": 1.3439, + "step": 133305 + }, + { + "epoch": 1.6, + "grad_norm": 6.427199053219384, + "learning_rate": 1.9964391789378888e-06, + "loss": 1.1117, + "step": 133308 + }, + { + "epoch": 1.6, + "grad_norm": 3.2483378751589287, + "learning_rate": 1.9960889632972193e-06, + "loss": 1.0791, + "step": 133311 + }, + { + "epoch": 1.6, + "grad_norm": 11.464030662791581, + "learning_rate": 1.9957387749710153e-06, + "loss": 0.955, + "step": 133314 + }, + { + "epoch": 1.6, + "grad_norm": 3.067433857364063, + "learning_rate": 1.995388613960475e-06, + "loss": 1.2431, + "step": 133317 + }, + { + "epoch": 1.6, + "grad_norm": 6.384316194660792, + "learning_rate": 1.9950384802667868e-06, + "loss": 0.9638, + "step": 133320 + }, + { + "epoch": 1.6, + "grad_norm": 9.228351980909707, + "learning_rate": 1.994688373891149e-06, + "loss": 1.143, + "step": 133323 + }, + { + "epoch": 1.6, + "grad_norm": 8.5500727803492, + "learning_rate": 1.994338294834759e-06, + "loss": 1.1999, + "step": 133326 + }, + { + "epoch": 1.6, + "grad_norm": 9.345963091066242, + "learning_rate": 1.9939882430988065e-06, + "loss": 1.1318, + "step": 133329 + }, + { + "epoch": 1.6, + "grad_norm": 30.28451929309338, + "learning_rate": 1.9936382186844905e-06, + "loss": 1.2964, + "step": 133332 + }, + { + "epoch": 1.6, + "grad_norm": 8.730262264187402, + "learning_rate": 1.9932882215930005e-06, + "loss": 1.2006, + "step": 133335 + }, + { + "epoch": 1.6, + "grad_norm": 5.078755715514663, + "learning_rate": 1.9929382518255348e-06, + "loss": 1.1958, + "step": 133338 + }, + { + "epoch": 1.6, + "grad_norm": 15.816261483969845, + "learning_rate": 1.9925883093832854e-06, + "loss": 0.9164, + "step": 133341 + }, + { + "epoch": 1.6, + "grad_norm": 3.480441341801314, + "learning_rate": 1.992238394267452e-06, + "loss": 1.1556, + "step": 133344 + }, + { + "epoch": 1.6, + "grad_norm": 30.45722731814819, + "learning_rate": 1.9918885064792237e-06, + "loss": 1.0762, + "step": 133347 + }, + { + "epoch": 1.6, + "grad_norm": 3.5935269062059265, + "learning_rate": 1.9915386460197927e-06, + "loss": 0.9915, + "step": 133350 + }, + { + "epoch": 1.6, + "grad_norm": 8.503477450670648, + "learning_rate": 1.9911888128903555e-06, + "loss": 1.4324, + "step": 133353 + }, + { + "epoch": 1.6, + "grad_norm": 9.783444546804112, + "learning_rate": 1.990839007092107e-06, + "loss": 1.1404, + "step": 133356 + }, + { + "epoch": 1.6, + "grad_norm": 23.695709106792577, + "learning_rate": 1.9904892286262412e-06, + "loss": 1.0665, + "step": 133359 + }, + { + "epoch": 1.6, + "grad_norm": 30.033041968117214, + "learning_rate": 1.9901394774939496e-06, + "loss": 1.1419, + "step": 133362 + }, + { + "epoch": 1.6, + "grad_norm": 8.735802578476894, + "learning_rate": 1.9897897536964262e-06, + "loss": 0.9823, + "step": 133365 + }, + { + "epoch": 1.6, + "grad_norm": 8.550517962170147, + "learning_rate": 1.9894400572348673e-06, + "loss": 1.3962, + "step": 133368 + }, + { + "epoch": 1.6, + "grad_norm": 6.152518080968745, + "learning_rate": 1.989090388110465e-06, + "loss": 0.9254, + "step": 133371 + }, + { + "epoch": 1.6, + "grad_norm": 7.852638803286538, + "learning_rate": 1.988740746324408e-06, + "loss": 1.0399, + "step": 133374 + }, + { + "epoch": 1.6, + "grad_norm": 8.708784338523593, + "learning_rate": 1.9883911318778938e-06, + "loss": 0.8724, + "step": 133377 + }, + { + "epoch": 1.6, + "grad_norm": 15.893072109616385, + "learning_rate": 1.9880415447721148e-06, + "loss": 1.3283, + "step": 133380 + }, + { + "epoch": 1.6, + "grad_norm": 5.04082708125543, + "learning_rate": 1.987691985008264e-06, + "loss": 1.1305, + "step": 133383 + }, + { + "epoch": 1.6, + "grad_norm": 7.663351729992027, + "learning_rate": 1.9873424525875375e-06, + "loss": 1.0174, + "step": 133386 + }, + { + "epoch": 1.6, + "grad_norm": 14.416913634324448, + "learning_rate": 1.986992947511125e-06, + "loss": 1.1474, + "step": 133389 + }, + { + "epoch": 1.6, + "grad_norm": 7.4350783723053855, + "learning_rate": 1.9866434697802162e-06, + "loss": 0.8408, + "step": 133392 + }, + { + "epoch": 1.6, + "grad_norm": 7.592264928647656, + "learning_rate": 1.9862940193960078e-06, + "loss": 1.092, + "step": 133395 + }, + { + "epoch": 1.6, + "grad_norm": 10.09571467652177, + "learning_rate": 1.9859445963596937e-06, + "loss": 1.1327, + "step": 133398 + }, + { + "epoch": 1.6, + "grad_norm": 4.98883274208997, + "learning_rate": 1.98559520067246e-06, + "loss": 0.8907, + "step": 133401 + }, + { + "epoch": 1.6, + "grad_norm": 3.5824423807439434, + "learning_rate": 1.985245832335504e-06, + "loss": 1.2011, + "step": 133404 + }, + { + "epoch": 1.6, + "grad_norm": 9.672101762418308, + "learning_rate": 1.984896491350018e-06, + "loss": 0.7728, + "step": 133407 + }, + { + "epoch": 1.6, + "grad_norm": 7.366179587824243, + "learning_rate": 1.9845471777171944e-06, + "loss": 1.0906, + "step": 133410 + }, + { + "epoch": 1.6, + "grad_norm": 4.980479641205898, + "learning_rate": 1.9841978914382253e-06, + "loss": 1.1581, + "step": 133413 + }, + { + "epoch": 1.6, + "grad_norm": 7.9825128707649355, + "learning_rate": 1.983848632514298e-06, + "loss": 0.8705, + "step": 133416 + }, + { + "epoch": 1.6, + "grad_norm": 6.178536089922712, + "learning_rate": 1.983499400946609e-06, + "loss": 1.2789, + "step": 133419 + }, + { + "epoch": 1.6, + "grad_norm": 4.405038237601058, + "learning_rate": 1.9831501967363474e-06, + "loss": 1.2003, + "step": 133422 + }, + { + "epoch": 1.6, + "grad_norm": 6.658203886105889, + "learning_rate": 1.98280101988471e-06, + "loss": 1.0233, + "step": 133425 + }, + { + "epoch": 1.6, + "grad_norm": 8.562068397127609, + "learning_rate": 1.982451870392883e-06, + "loss": 1.0107, + "step": 133428 + }, + { + "epoch": 1.6, + "grad_norm": 11.60376026166159, + "learning_rate": 1.9821027482620625e-06, + "loss": 1.1118, + "step": 133431 + }, + { + "epoch": 1.6, + "grad_norm": 2.9378645878822196, + "learning_rate": 1.9817536534934333e-06, + "loss": 1.3005, + "step": 133434 + }, + { + "epoch": 1.6, + "grad_norm": 15.43577937116103, + "learning_rate": 1.9814045860881915e-06, + "loss": 1.3604, + "step": 133437 + }, + { + "epoch": 1.6, + "grad_norm": 57.28987698871164, + "learning_rate": 1.98105554604753e-06, + "loss": 1.5002, + "step": 133440 + }, + { + "epoch": 1.6, + "grad_norm": 4.897915762261577, + "learning_rate": 1.980706533372635e-06, + "loss": 1.0775, + "step": 133443 + }, + { + "epoch": 1.6, + "grad_norm": 8.218824466681724, + "learning_rate": 1.980357548064701e-06, + "loss": 1.2829, + "step": 133446 + }, + { + "epoch": 1.6, + "grad_norm": 6.3117969878852955, + "learning_rate": 1.9800085901249167e-06, + "loss": 1.1008, + "step": 133449 + }, + { + "epoch": 1.6, + "grad_norm": 5.818415086354127, + "learning_rate": 1.979659659554477e-06, + "loss": 1.1867, + "step": 133452 + }, + { + "epoch": 1.6, + "grad_norm": 18.6773250026128, + "learning_rate": 1.979310756354571e-06, + "loss": 1.4503, + "step": 133455 + }, + { + "epoch": 1.6, + "grad_norm": 15.886217059351393, + "learning_rate": 1.978961880526384e-06, + "loss": 1.1232, + "step": 133458 + }, + { + "epoch": 1.6, + "grad_norm": 6.1822620945714295, + "learning_rate": 1.978613032071113e-06, + "loss": 0.8615, + "step": 133461 + }, + { + "epoch": 1.6, + "grad_norm": 3.970758026637048, + "learning_rate": 1.9782642109899454e-06, + "loss": 0.8834, + "step": 133464 + }, + { + "epoch": 1.6, + "grad_norm": 15.645824584565563, + "learning_rate": 1.9779154172840754e-06, + "loss": 1.167, + "step": 133467 + }, + { + "epoch": 1.6, + "grad_norm": 10.818652834473662, + "learning_rate": 1.9775666509546877e-06, + "loss": 1.3423, + "step": 133470 + }, + { + "epoch": 1.6, + "grad_norm": 3.6844095506246166, + "learning_rate": 1.977217912002978e-06, + "loss": 1.1916, + "step": 133473 + }, + { + "epoch": 1.61, + "grad_norm": 5.548979894451897, + "learning_rate": 1.9768692004301303e-06, + "loss": 1.3042, + "step": 133476 + }, + { + "epoch": 1.61, + "grad_norm": 5.211210598814929, + "learning_rate": 1.9765205162373424e-06, + "loss": 1.1719, + "step": 133479 + }, + { + "epoch": 1.61, + "grad_norm": 8.207892361655139, + "learning_rate": 1.976171859425796e-06, + "loss": 1.0269, + "step": 133482 + }, + { + "epoch": 1.61, + "grad_norm": 6.167098184408019, + "learning_rate": 1.9758232299966864e-06, + "loss": 1.4381, + "step": 133485 + }, + { + "epoch": 1.61, + "grad_norm": 3.443583659463797, + "learning_rate": 1.9754746279512006e-06, + "loss": 1.3132, + "step": 133488 + }, + { + "epoch": 1.61, + "grad_norm": 4.952473918894116, + "learning_rate": 1.9751260532905292e-06, + "loss": 1.0436, + "step": 133491 + }, + { + "epoch": 1.61, + "grad_norm": 41.980975149219816, + "learning_rate": 1.974777506015866e-06, + "loss": 1.1851, + "step": 133494 + }, + { + "epoch": 1.61, + "grad_norm": 8.718874496461725, + "learning_rate": 1.974428986128395e-06, + "loss": 1.5847, + "step": 133497 + }, + { + "epoch": 1.61, + "grad_norm": 18.18554448400728, + "learning_rate": 1.974080493629306e-06, + "loss": 1.1136, + "step": 133500 + }, + { + "epoch": 1.61, + "grad_norm": 4.122478302766309, + "learning_rate": 1.973732028519788e-06, + "loss": 1.1465, + "step": 133503 + }, + { + "epoch": 1.61, + "grad_norm": 6.326572403319872, + "learning_rate": 1.9733835908010344e-06, + "loss": 1.383, + "step": 133506 + }, + { + "epoch": 1.61, + "grad_norm": 11.564983763620866, + "learning_rate": 1.973035180474229e-06, + "loss": 1.2246, + "step": 133509 + }, + { + "epoch": 1.61, + "grad_norm": 5.228390835629916, + "learning_rate": 1.972686797540563e-06, + "loss": 1.2751, + "step": 133512 + }, + { + "epoch": 1.61, + "grad_norm": 6.28506892215344, + "learning_rate": 1.9723384420012283e-06, + "loss": 1.7196, + "step": 133515 + }, + { + "epoch": 1.61, + "grad_norm": 8.79968333390483, + "learning_rate": 1.971990113857408e-06, + "loss": 1.0449, + "step": 133518 + }, + { + "epoch": 1.61, + "grad_norm": 10.411117284622984, + "learning_rate": 1.9716418131102975e-06, + "loss": 1.4739, + "step": 133521 + }, + { + "epoch": 1.61, + "grad_norm": 15.4682522011305, + "learning_rate": 1.971293539761079e-06, + "loss": 0.9815, + "step": 133524 + }, + { + "epoch": 1.61, + "grad_norm": 5.62587792210024, + "learning_rate": 1.970945293810943e-06, + "loss": 1.151, + "step": 133527 + }, + { + "epoch": 1.61, + "grad_norm": 3.306249738915893, + "learning_rate": 1.9705970752610783e-06, + "loss": 1.0064, + "step": 133530 + }, + { + "epoch": 1.61, + "grad_norm": 3.1541197590001158, + "learning_rate": 1.9702488841126766e-06, + "loss": 1.3711, + "step": 133533 + }, + { + "epoch": 1.61, + "grad_norm": 7.576890766997979, + "learning_rate": 1.9699007203669207e-06, + "loss": 0.9378, + "step": 133536 + }, + { + "epoch": 1.61, + "grad_norm": 5.726601699028789, + "learning_rate": 1.969552584025004e-06, + "loss": 1.2321, + "step": 133539 + }, + { + "epoch": 1.61, + "grad_norm": 18.32250797235228, + "learning_rate": 1.969204475088109e-06, + "loss": 1.4883, + "step": 133542 + }, + { + "epoch": 1.61, + "grad_norm": 3.663341476948225, + "learning_rate": 1.968856393557427e-06, + "loss": 0.952, + "step": 133545 + }, + { + "epoch": 1.61, + "grad_norm": 6.383656990423913, + "learning_rate": 1.9685083394341465e-06, + "loss": 1.3278, + "step": 133548 + }, + { + "epoch": 1.61, + "grad_norm": 6.75007406252369, + "learning_rate": 1.9681603127194525e-06, + "loss": 1.0358, + "step": 133551 + }, + { + "epoch": 1.61, + "grad_norm": 21.296792187759443, + "learning_rate": 1.9678123134145342e-06, + "loss": 0.9521, + "step": 133554 + }, + { + "epoch": 1.61, + "grad_norm": 17.94171804107844, + "learning_rate": 1.9674643415205817e-06, + "loss": 1.4324, + "step": 133557 + }, + { + "epoch": 1.61, + "grad_norm": 10.112013440121704, + "learning_rate": 1.9671163970387774e-06, + "loss": 0.6852, + "step": 133560 + }, + { + "epoch": 1.61, + "grad_norm": 13.02968451276547, + "learning_rate": 1.9667684799703144e-06, + "loss": 1.2006, + "step": 133563 + }, + { + "epoch": 1.61, + "grad_norm": 3.6389006729028606, + "learning_rate": 1.9664205903163747e-06, + "loss": 1.2082, + "step": 133566 + }, + { + "epoch": 1.61, + "grad_norm": 21.272716173532153, + "learning_rate": 1.9660727280781467e-06, + "loss": 1.1347, + "step": 133569 + }, + { + "epoch": 1.61, + "grad_norm": 9.413619615860386, + "learning_rate": 1.9657248932568196e-06, + "loss": 0.963, + "step": 133572 + }, + { + "epoch": 1.61, + "grad_norm": 7.580874678226245, + "learning_rate": 1.9653770858535827e-06, + "loss": 1.4295, + "step": 133575 + }, + { + "epoch": 1.61, + "grad_norm": 4.35165857032433, + "learning_rate": 1.965029305869616e-06, + "loss": 1.1726, + "step": 133578 + }, + { + "epoch": 1.61, + "grad_norm": 3.0742974263367153, + "learning_rate": 1.9646815533061125e-06, + "loss": 1.0359, + "step": 133581 + }, + { + "epoch": 1.61, + "grad_norm": 4.944493979235434, + "learning_rate": 1.9643338281642544e-06, + "loss": 0.9928, + "step": 133584 + }, + { + "epoch": 1.61, + "grad_norm": 14.538986806719924, + "learning_rate": 1.9639861304452336e-06, + "loss": 0.8569, + "step": 133587 + }, + { + "epoch": 1.61, + "grad_norm": 20.090401254281165, + "learning_rate": 1.96363846015023e-06, + "loss": 1.2235, + "step": 133590 + }, + { + "epoch": 1.61, + "grad_norm": 15.847079399527257, + "learning_rate": 1.9632908172804343e-06, + "loss": 1.5166, + "step": 133593 + }, + { + "epoch": 1.61, + "grad_norm": 10.96431479433423, + "learning_rate": 1.962943201837032e-06, + "loss": 1.0375, + "step": 133596 + }, + { + "epoch": 1.61, + "grad_norm": 6.100148686776028, + "learning_rate": 1.96259561382121e-06, + "loss": 1.2384, + "step": 133599 + }, + { + "epoch": 1.61, + "grad_norm": 7.313712243234498, + "learning_rate": 1.962248053234157e-06, + "loss": 0.8761, + "step": 133602 + }, + { + "epoch": 1.61, + "grad_norm": 6.659312141910954, + "learning_rate": 1.961900520077056e-06, + "loss": 1.2952, + "step": 133605 + }, + { + "epoch": 1.61, + "grad_norm": 4.6015596247214345, + "learning_rate": 1.9615530143510897e-06, + "loss": 0.8974, + "step": 133608 + }, + { + "epoch": 1.61, + "grad_norm": 19.095874006856075, + "learning_rate": 1.9612055360574476e-06, + "loss": 0.9889, + "step": 133611 + }, + { + "epoch": 1.61, + "grad_norm": 6.959295161423098, + "learning_rate": 1.9608580851973193e-06, + "loss": 1.0037, + "step": 133614 + }, + { + "epoch": 1.61, + "grad_norm": 12.304214749258868, + "learning_rate": 1.9605106617718826e-06, + "loss": 1.1015, + "step": 133617 + }, + { + "epoch": 1.61, + "grad_norm": 4.354522126769187, + "learning_rate": 1.9601632657823276e-06, + "loss": 1.1188, + "step": 133620 + }, + { + "epoch": 1.61, + "grad_norm": 4.880801301259423, + "learning_rate": 1.959815897229843e-06, + "loss": 0.9013, + "step": 133623 + }, + { + "epoch": 1.61, + "grad_norm": 7.306395341245758, + "learning_rate": 1.959468556115608e-06, + "loss": 1.3118, + "step": 133626 + }, + { + "epoch": 1.61, + "grad_norm": 8.45943166885459, + "learning_rate": 1.9591212424408122e-06, + "loss": 1.1124, + "step": 133629 + }, + { + "epoch": 1.61, + "grad_norm": 6.234769785312264, + "learning_rate": 1.9587739562066377e-06, + "loss": 1.4572, + "step": 133632 + }, + { + "epoch": 1.61, + "grad_norm": 7.5725967742693205, + "learning_rate": 1.9584266974142698e-06, + "loss": 1.3664, + "step": 133635 + }, + { + "epoch": 1.61, + "grad_norm": 29.08411973279983, + "learning_rate": 1.958079466064896e-06, + "loss": 1.2463, + "step": 133638 + }, + { + "epoch": 1.61, + "grad_norm": 6.322068707049658, + "learning_rate": 1.957732262159703e-06, + "loss": 1.0276, + "step": 133641 + }, + { + "epoch": 1.61, + "grad_norm": 9.396917102780145, + "learning_rate": 1.95738508569987e-06, + "loss": 1.0713, + "step": 133644 + }, + { + "epoch": 1.61, + "grad_norm": 10.787521422835436, + "learning_rate": 1.9570379366865876e-06, + "loss": 1.1223, + "step": 133647 + }, + { + "epoch": 1.61, + "grad_norm": 6.613319640128508, + "learning_rate": 1.956690815121035e-06, + "loss": 1.4298, + "step": 133650 + }, + { + "epoch": 1.61, + "grad_norm": 12.878107256257538, + "learning_rate": 1.9563437210044002e-06, + "loss": 1.094, + "step": 133653 + }, + { + "epoch": 1.61, + "grad_norm": 16.239990392153814, + "learning_rate": 1.9559966543378682e-06, + "loss": 1.4465, + "step": 133656 + }, + { + "epoch": 1.61, + "grad_norm": 119.4120070837948, + "learning_rate": 1.95564961512262e-06, + "loss": 1.0427, + "step": 133659 + }, + { + "epoch": 1.61, + "grad_norm": 3.4515587186070427, + "learning_rate": 1.9553026033598433e-06, + "loss": 0.8142, + "step": 133662 + }, + { + "epoch": 1.61, + "grad_norm": 7.970618460524147, + "learning_rate": 1.9549556190507224e-06, + "loss": 1.2616, + "step": 133665 + }, + { + "epoch": 1.61, + "grad_norm": 5.040251618072762, + "learning_rate": 1.954608662196441e-06, + "loss": 0.7981, + "step": 133668 + }, + { + "epoch": 1.61, + "grad_norm": 27.45260910569837, + "learning_rate": 1.9542617327981784e-06, + "loss": 1.1466, + "step": 133671 + }, + { + "epoch": 1.61, + "grad_norm": 7.37483064679981, + "learning_rate": 1.953914830857123e-06, + "loss": 1.1517, + "step": 133674 + }, + { + "epoch": 1.61, + "grad_norm": 5.335427186369394, + "learning_rate": 1.9535679563744593e-06, + "loss": 1.0399, + "step": 133677 + }, + { + "epoch": 1.61, + "grad_norm": 5.421048830690791, + "learning_rate": 1.9532211093513688e-06, + "loss": 1.0907, + "step": 133680 + }, + { + "epoch": 1.61, + "grad_norm": 17.560588760519757, + "learning_rate": 1.9528742897890385e-06, + "loss": 0.877, + "step": 133683 + }, + { + "epoch": 1.61, + "grad_norm": 10.176698359705407, + "learning_rate": 1.9525274976886476e-06, + "loss": 1.1743, + "step": 133686 + }, + { + "epoch": 1.61, + "grad_norm": 8.902271771878125, + "learning_rate": 1.9521807330513844e-06, + "loss": 1.2381, + "step": 133689 + }, + { + "epoch": 1.61, + "grad_norm": 10.153294992765815, + "learning_rate": 1.9518339958784272e-06, + "loss": 1.2537, + "step": 133692 + }, + { + "epoch": 1.61, + "grad_norm": 8.872405725366983, + "learning_rate": 1.951487286170963e-06, + "loss": 0.819, + "step": 133695 + }, + { + "epoch": 1.61, + "grad_norm": 10.511979679369217, + "learning_rate": 1.951140603930173e-06, + "loss": 1.3321, + "step": 133698 + }, + { + "epoch": 1.61, + "grad_norm": 11.350405039054795, + "learning_rate": 1.9507939491572394e-06, + "loss": 1.0149, + "step": 133701 + }, + { + "epoch": 1.61, + "grad_norm": 2.9541419454976063, + "learning_rate": 1.950447321853347e-06, + "loss": 0.8223, + "step": 133704 + }, + { + "epoch": 1.61, + "grad_norm": 6.309760137205153, + "learning_rate": 1.950100722019681e-06, + "loss": 1.4607, + "step": 133707 + }, + { + "epoch": 1.61, + "grad_norm": 21.77760477004583, + "learning_rate": 1.949754149657421e-06, + "loss": 0.8463, + "step": 133710 + }, + { + "epoch": 1.61, + "grad_norm": 6.351024503707005, + "learning_rate": 1.949407604767749e-06, + "loss": 1.4458, + "step": 133713 + }, + { + "epoch": 1.61, + "grad_norm": 10.797302031132235, + "learning_rate": 1.949061087351849e-06, + "loss": 1.1591, + "step": 133716 + }, + { + "epoch": 1.61, + "grad_norm": 10.244454414045903, + "learning_rate": 1.9487145974109033e-06, + "loss": 1.0765, + "step": 133719 + }, + { + "epoch": 1.61, + "grad_norm": 5.3182945462911295, + "learning_rate": 1.948368134946097e-06, + "loss": 1.1934, + "step": 133722 + }, + { + "epoch": 1.61, + "grad_norm": 11.881361183465318, + "learning_rate": 1.948021699958608e-06, + "loss": 1.0371, + "step": 133725 + }, + { + "epoch": 1.61, + "grad_norm": 30.090491472927283, + "learning_rate": 1.94767529244962e-06, + "loss": 0.8766, + "step": 133728 + }, + { + "epoch": 1.61, + "grad_norm": 19.364751816898128, + "learning_rate": 1.9473289124203187e-06, + "loss": 0.9622, + "step": 133731 + }, + { + "epoch": 1.61, + "grad_norm": 20.54966481289724, + "learning_rate": 1.9469825598718808e-06, + "loss": 1.2066, + "step": 133734 + }, + { + "epoch": 1.61, + "grad_norm": 8.518874550810317, + "learning_rate": 1.9466362348054924e-06, + "loss": 1.1756, + "step": 133737 + }, + { + "epoch": 1.61, + "grad_norm": 11.198664960345846, + "learning_rate": 1.946289937222332e-06, + "loss": 0.8754, + "step": 133740 + }, + { + "epoch": 1.61, + "grad_norm": 3.139237479518549, + "learning_rate": 1.9459436671235834e-06, + "loss": 0.8502, + "step": 133743 + }, + { + "epoch": 1.61, + "grad_norm": 6.862863279774093, + "learning_rate": 1.945597424510427e-06, + "loss": 0.9143, + "step": 133746 + }, + { + "epoch": 1.61, + "grad_norm": 8.06153486861407, + "learning_rate": 1.945251209384049e-06, + "loss": 1.0304, + "step": 133749 + }, + { + "epoch": 1.61, + "grad_norm": 3.724212734182797, + "learning_rate": 1.9449050217456267e-06, + "loss": 1.0385, + "step": 133752 + }, + { + "epoch": 1.61, + "grad_norm": 5.086495270798718, + "learning_rate": 1.9445588615963397e-06, + "loss": 1.1258, + "step": 133755 + }, + { + "epoch": 1.61, + "grad_norm": 21.574773323050856, + "learning_rate": 1.9442127289373724e-06, + "loss": 1.5523, + "step": 133758 + }, + { + "epoch": 1.61, + "grad_norm": 4.517528771633388, + "learning_rate": 1.9438666237699045e-06, + "loss": 0.9337, + "step": 133761 + }, + { + "epoch": 1.61, + "grad_norm": 3.337490565691938, + "learning_rate": 1.9435205460951213e-06, + "loss": 1.5649, + "step": 133764 + }, + { + "epoch": 1.61, + "grad_norm": 8.026696002098321, + "learning_rate": 1.9431744959141974e-06, + "loss": 1.1923, + "step": 133767 + }, + { + "epoch": 1.61, + "grad_norm": 17.190843967921385, + "learning_rate": 1.9428284732283166e-06, + "loss": 1.1409, + "step": 133770 + }, + { + "epoch": 1.61, + "grad_norm": 2.004435961576781, + "learning_rate": 1.942482478038663e-06, + "loss": 1.0425, + "step": 133773 + }, + { + "epoch": 1.61, + "grad_norm": 11.988525896558244, + "learning_rate": 1.942136510346415e-06, + "loss": 1.4204, + "step": 133776 + }, + { + "epoch": 1.61, + "grad_norm": 9.798566054153754, + "learning_rate": 1.9417905701527493e-06, + "loss": 1.4024, + "step": 133779 + }, + { + "epoch": 1.61, + "grad_norm": 2.8767273084940506, + "learning_rate": 1.94144465745885e-06, + "loss": 1.2253, + "step": 133782 + }, + { + "epoch": 1.61, + "grad_norm": 3.8715615076674013, + "learning_rate": 1.941098772265897e-06, + "loss": 1.5193, + "step": 133785 + }, + { + "epoch": 1.61, + "grad_norm": 7.147066437687653, + "learning_rate": 1.9407529145750716e-06, + "loss": 1.3914, + "step": 133788 + }, + { + "epoch": 1.61, + "grad_norm": 6.723273493736217, + "learning_rate": 1.9404070843875565e-06, + "loss": 1.0059, + "step": 133791 + }, + { + "epoch": 1.61, + "grad_norm": 2.808594093156537, + "learning_rate": 1.9400612817045285e-06, + "loss": 1.1705, + "step": 133794 + }, + { + "epoch": 1.61, + "grad_norm": 17.922376081586926, + "learning_rate": 1.9397155065271657e-06, + "loss": 1.1381, + "step": 133797 + }, + { + "epoch": 1.61, + "grad_norm": 5.786945389594246, + "learning_rate": 1.939369758856651e-06, + "loss": 1.2089, + "step": 133800 + }, + { + "epoch": 1.61, + "grad_norm": 7.791338756883671, + "learning_rate": 1.939024038694165e-06, + "loss": 1.2086, + "step": 133803 + }, + { + "epoch": 1.61, + "grad_norm": 4.194236846353263, + "learning_rate": 1.938678346040885e-06, + "loss": 1.1905, + "step": 133806 + }, + { + "epoch": 1.61, + "grad_norm": 9.674430944653317, + "learning_rate": 1.938332680897992e-06, + "loss": 1.2439, + "step": 133809 + }, + { + "epoch": 1.61, + "grad_norm": 3.0587326190560984, + "learning_rate": 1.937987043266666e-06, + "loss": 1.479, + "step": 133812 + }, + { + "epoch": 1.61, + "grad_norm": 3.090796904232621, + "learning_rate": 1.9376414331480886e-06, + "loss": 0.9585, + "step": 133815 + }, + { + "epoch": 1.61, + "grad_norm": 7.342345815960225, + "learning_rate": 1.937295850543437e-06, + "loss": 1.1758, + "step": 133818 + }, + { + "epoch": 1.61, + "grad_norm": 12.351606633595374, + "learning_rate": 1.9369502954538877e-06, + "loss": 1.0072, + "step": 133821 + }, + { + "epoch": 1.61, + "grad_norm": 13.311274699891666, + "learning_rate": 1.936604767880623e-06, + "loss": 1.2867, + "step": 133824 + }, + { + "epoch": 1.61, + "grad_norm": 13.001293093695235, + "learning_rate": 1.9362592678248214e-06, + "loss": 1.2688, + "step": 133827 + }, + { + "epoch": 1.61, + "grad_norm": 19.712520584341807, + "learning_rate": 1.935913795287665e-06, + "loss": 1.751, + "step": 133830 + }, + { + "epoch": 1.61, + "grad_norm": 7.380927978808181, + "learning_rate": 1.9355683502703284e-06, + "loss": 0.9876, + "step": 133833 + }, + { + "epoch": 1.61, + "grad_norm": 13.117891927932838, + "learning_rate": 1.9352229327739915e-06, + "loss": 0.8804, + "step": 133836 + }, + { + "epoch": 1.61, + "grad_norm": 6.904822326102696, + "learning_rate": 1.9348775427998366e-06, + "loss": 1.2317, + "step": 133839 + }, + { + "epoch": 1.61, + "grad_norm": 8.21140995840362, + "learning_rate": 1.934532180349037e-06, + "loss": 0.9557, + "step": 133842 + }, + { + "epoch": 1.61, + "grad_norm": 4.448012104970231, + "learning_rate": 1.9341868454227762e-06, + "loss": 0.9598, + "step": 133845 + }, + { + "epoch": 1.61, + "grad_norm": 30.932150384606665, + "learning_rate": 1.9338415380222276e-06, + "loss": 1.0602, + "step": 133848 + }, + { + "epoch": 1.61, + "grad_norm": 8.604562017556903, + "learning_rate": 1.933496258148573e-06, + "loss": 1.0325, + "step": 133851 + }, + { + "epoch": 1.61, + "grad_norm": 4.241311328636016, + "learning_rate": 1.9331510058029902e-06, + "loss": 1.0024, + "step": 133854 + }, + { + "epoch": 1.61, + "grad_norm": 5.534749442671691, + "learning_rate": 1.9328057809866597e-06, + "loss": 1.3107, + "step": 133857 + }, + { + "epoch": 1.61, + "grad_norm": 3.4207227281509813, + "learning_rate": 1.932460583700757e-06, + "loss": 1.1878, + "step": 133860 + }, + { + "epoch": 1.61, + "grad_norm": 8.358218660562262, + "learning_rate": 1.932115413946458e-06, + "loss": 1.2551, + "step": 133863 + }, + { + "epoch": 1.61, + "grad_norm": 13.098243970772886, + "learning_rate": 1.931770271724943e-06, + "loss": 1.3925, + "step": 133866 + }, + { + "epoch": 1.61, + "grad_norm": 3.9718589461197116, + "learning_rate": 1.931425157037391e-06, + "loss": 0.9111, + "step": 133869 + }, + { + "epoch": 1.61, + "grad_norm": 5.3746488155663235, + "learning_rate": 1.9310800698849796e-06, + "loss": 1.2941, + "step": 133872 + }, + { + "epoch": 1.61, + "grad_norm": 12.50242399544975, + "learning_rate": 1.9307350102688836e-06, + "loss": 0.9974, + "step": 133875 + }, + { + "epoch": 1.61, + "grad_norm": 11.749975291946397, + "learning_rate": 1.9303899781902835e-06, + "loss": 1.1359, + "step": 133878 + }, + { + "epoch": 1.61, + "grad_norm": 5.4188272164308335, + "learning_rate": 1.9300449736503567e-06, + "loss": 1.4814, + "step": 133881 + }, + { + "epoch": 1.61, + "grad_norm": 4.62529555613298, + "learning_rate": 1.9296999966502807e-06, + "loss": 1.284, + "step": 133884 + }, + { + "epoch": 1.61, + "grad_norm": 6.2080853995712, + "learning_rate": 1.929355047191228e-06, + "loss": 1.0038, + "step": 133887 + }, + { + "epoch": 1.61, + "grad_norm": 3.2547922821612456, + "learning_rate": 1.929010125274381e-06, + "loss": 1.0978, + "step": 133890 + }, + { + "epoch": 1.61, + "grad_norm": 4.935704627265033, + "learning_rate": 1.9286652309009145e-06, + "loss": 1.0331, + "step": 133893 + }, + { + "epoch": 1.61, + "grad_norm": 3.7907284557380807, + "learning_rate": 1.9283203640720063e-06, + "loss": 0.9379, + "step": 133896 + }, + { + "epoch": 1.61, + "grad_norm": 10.024287512484197, + "learning_rate": 1.9279755247888356e-06, + "loss": 1.1377, + "step": 133899 + }, + { + "epoch": 1.61, + "grad_norm": 12.432409098214853, + "learning_rate": 1.9276307130525773e-06, + "loss": 0.9131, + "step": 133902 + }, + { + "epoch": 1.61, + "grad_norm": 4.143555978711819, + "learning_rate": 1.9272859288644043e-06, + "loss": 1.0534, + "step": 133905 + }, + { + "epoch": 1.61, + "grad_norm": 6.280180194372998, + "learning_rate": 1.926941172225497e-06, + "loss": 0.8202, + "step": 133908 + }, + { + "epoch": 1.61, + "grad_norm": 3.2300806051950803, + "learning_rate": 1.9265964431370353e-06, + "loss": 0.9789, + "step": 133911 + }, + { + "epoch": 1.61, + "grad_norm": 12.965659966987994, + "learning_rate": 1.926251741600188e-06, + "loss": 1.2559, + "step": 133914 + }, + { + "epoch": 1.61, + "grad_norm": 7.150895134370684, + "learning_rate": 1.9259070676161364e-06, + "loss": 1.243, + "step": 133917 + }, + { + "epoch": 1.61, + "grad_norm": 6.479786241621005, + "learning_rate": 1.925562421186055e-06, + "loss": 1.4754, + "step": 133920 + }, + { + "epoch": 1.61, + "grad_norm": 12.530755376639968, + "learning_rate": 1.9252178023111235e-06, + "loss": 1.2734, + "step": 133923 + }, + { + "epoch": 1.61, + "grad_norm": 5.372212649513914, + "learning_rate": 1.924873210992515e-06, + "loss": 0.7676, + "step": 133926 + }, + { + "epoch": 1.61, + "grad_norm": 3.19207922275424, + "learning_rate": 1.924528647231403e-06, + "loss": 1.3312, + "step": 133929 + }, + { + "epoch": 1.61, + "grad_norm": 7.631851931491385, + "learning_rate": 1.924184111028966e-06, + "loss": 0.923, + "step": 133932 + }, + { + "epoch": 1.61, + "grad_norm": 9.466558790883159, + "learning_rate": 1.923839602386379e-06, + "loss": 0.9147, + "step": 133935 + }, + { + "epoch": 1.61, + "grad_norm": 6.261394152132817, + "learning_rate": 1.9234951213048225e-06, + "loss": 0.7581, + "step": 133938 + }, + { + "epoch": 1.61, + "grad_norm": 5.051325123250819, + "learning_rate": 1.923150667785465e-06, + "loss": 1.0098, + "step": 133941 + }, + { + "epoch": 1.61, + "grad_norm": 13.689078088677746, + "learning_rate": 1.9228062418294868e-06, + "loss": 1.1986, + "step": 133944 + }, + { + "epoch": 1.61, + "grad_norm": 12.732429164310563, + "learning_rate": 1.922461843438059e-06, + "loss": 1.11, + "step": 133947 + }, + { + "epoch": 1.61, + "grad_norm": 9.601803874821629, + "learning_rate": 1.9221174726123604e-06, + "loss": 1.1466, + "step": 133950 + }, + { + "epoch": 1.61, + "grad_norm": 15.034812751865037, + "learning_rate": 1.921773129353567e-06, + "loss": 1.114, + "step": 133953 + }, + { + "epoch": 1.61, + "grad_norm": 5.396142775786449, + "learning_rate": 1.92142881366285e-06, + "loss": 1.1213, + "step": 133956 + }, + { + "epoch": 1.61, + "grad_norm": 5.410766101170977, + "learning_rate": 1.9210845255413856e-06, + "loss": 0.9268, + "step": 133959 + }, + { + "epoch": 1.61, + "grad_norm": 12.865611510132402, + "learning_rate": 1.9207402649903505e-06, + "loss": 1.1415, + "step": 133962 + }, + { + "epoch": 1.61, + "grad_norm": 4.968431350433262, + "learning_rate": 1.9203960320109215e-06, + "loss": 1.1066, + "step": 133965 + }, + { + "epoch": 1.61, + "grad_norm": 6.304404729960707, + "learning_rate": 1.92005182660427e-06, + "loss": 1.0493, + "step": 133968 + }, + { + "epoch": 1.61, + "grad_norm": 8.223317926853062, + "learning_rate": 1.9197076487715693e-06, + "loss": 1.0236, + "step": 133971 + }, + { + "epoch": 1.61, + "grad_norm": 5.486514206287583, + "learning_rate": 1.9193634985139954e-06, + "loss": 0.6478, + "step": 133974 + }, + { + "epoch": 1.61, + "grad_norm": 7.149874808278781, + "learning_rate": 1.9190193758327234e-06, + "loss": 1.3051, + "step": 133977 + }, + { + "epoch": 1.61, + "grad_norm": 8.356695161951903, + "learning_rate": 1.91867528072893e-06, + "loss": 0.8704, + "step": 133980 + }, + { + "epoch": 1.61, + "grad_norm": 4.695182853913596, + "learning_rate": 1.9183312132037846e-06, + "loss": 0.8516, + "step": 133983 + }, + { + "epoch": 1.61, + "grad_norm": 4.600095620541702, + "learning_rate": 1.9179871732584667e-06, + "loss": 1.0018, + "step": 133986 + }, + { + "epoch": 1.61, + "grad_norm": 6.029011922665919, + "learning_rate": 1.9176431608941436e-06, + "loss": 1.1539, + "step": 133989 + }, + { + "epoch": 1.61, + "grad_norm": 6.399484181946855, + "learning_rate": 1.917299176111996e-06, + "loss": 1.1749, + "step": 133992 + }, + { + "epoch": 1.61, + "grad_norm": 3.0383663002908254, + "learning_rate": 1.9169552189131934e-06, + "loss": 1.5329, + "step": 133995 + }, + { + "epoch": 1.61, + "grad_norm": 5.342754718142053, + "learning_rate": 1.916611289298911e-06, + "loss": 1.191, + "step": 133998 + }, + { + "epoch": 1.61, + "grad_norm": 13.949835972036755, + "learning_rate": 1.916267387270322e-06, + "loss": 1.3794, + "step": 134001 + }, + { + "epoch": 1.61, + "grad_norm": 4.593604634343178, + "learning_rate": 1.9159235128286014e-06, + "loss": 1.1972, + "step": 134004 + }, + { + "epoch": 1.61, + "grad_norm": 10.686051691854846, + "learning_rate": 1.9155796659749237e-06, + "loss": 1.2464, + "step": 134007 + }, + { + "epoch": 1.61, + "grad_norm": 10.579296399405468, + "learning_rate": 1.9152358467104613e-06, + "loss": 1.1926, + "step": 134010 + }, + { + "epoch": 1.61, + "grad_norm": 6.471059370464622, + "learning_rate": 1.9148920550363836e-06, + "loss": 1.1377, + "step": 134013 + }, + { + "epoch": 1.61, + "grad_norm": 10.956740419902482, + "learning_rate": 1.914548290953868e-06, + "loss": 1.3745, + "step": 134016 + }, + { + "epoch": 1.61, + "grad_norm": 5.054559699112066, + "learning_rate": 1.9142045544640885e-06, + "loss": 0.9888, + "step": 134019 + }, + { + "epoch": 1.61, + "grad_norm": 6.827952522575569, + "learning_rate": 1.913860845568214e-06, + "loss": 1.148, + "step": 134022 + }, + { + "epoch": 1.61, + "grad_norm": 6.898613328790683, + "learning_rate": 1.91351716426742e-06, + "loss": 1.2063, + "step": 134025 + }, + { + "epoch": 1.61, + "grad_norm": 4.287194033647468, + "learning_rate": 1.9131735105628813e-06, + "loss": 1.185, + "step": 134028 + }, + { + "epoch": 1.61, + "grad_norm": 5.515315838840035, + "learning_rate": 1.9128298844557657e-06, + "loss": 1.1942, + "step": 134031 + }, + { + "epoch": 1.61, + "grad_norm": 12.525271076639564, + "learning_rate": 1.9124862859472526e-06, + "loss": 1.3255, + "step": 134034 + }, + { + "epoch": 1.61, + "grad_norm": 6.2273075054125355, + "learning_rate": 1.9121427150385073e-06, + "loss": 0.6447, + "step": 134037 + }, + { + "epoch": 1.61, + "grad_norm": 7.961550994525485, + "learning_rate": 1.9117991717307062e-06, + "loss": 1.2129, + "step": 134040 + }, + { + "epoch": 1.61, + "grad_norm": 11.294336167204584, + "learning_rate": 1.9114556560250196e-06, + "loss": 1.0827, + "step": 134043 + }, + { + "epoch": 1.61, + "grad_norm": 39.03441766709188, + "learning_rate": 1.911112167922625e-06, + "loss": 1.2346, + "step": 134046 + }, + { + "epoch": 1.61, + "grad_norm": 10.937713844311553, + "learning_rate": 1.9107687074246893e-06, + "loss": 1.4078, + "step": 134049 + }, + { + "epoch": 1.61, + "grad_norm": 8.237864222595375, + "learning_rate": 1.910425274532388e-06, + "loss": 1.1436, + "step": 134052 + }, + { + "epoch": 1.61, + "grad_norm": 4.463610999892652, + "learning_rate": 1.9100818692468893e-06, + "loss": 0.9676, + "step": 134055 + }, + { + "epoch": 1.61, + "grad_norm": 5.415804979867735, + "learning_rate": 1.9097384915693663e-06, + "loss": 0.7927, + "step": 134058 + }, + { + "epoch": 1.61, + "grad_norm": 9.422501072061356, + "learning_rate": 1.909395141500995e-06, + "loss": 0.8888, + "step": 134061 + }, + { + "epoch": 1.61, + "grad_norm": 13.189632362631912, + "learning_rate": 1.9090518190429406e-06, + "loss": 1.152, + "step": 134064 + }, + { + "epoch": 1.61, + "grad_norm": 7.2891887081563596, + "learning_rate": 1.9087085241963787e-06, + "loss": 1.1095, + "step": 134067 + }, + { + "epoch": 1.61, + "grad_norm": 7.390190786135993, + "learning_rate": 1.9083652569624822e-06, + "loss": 1.1577, + "step": 134070 + }, + { + "epoch": 1.61, + "grad_norm": 7.322507371537955, + "learning_rate": 1.908022017342418e-06, + "loss": 1.4813, + "step": 134073 + }, + { + "epoch": 1.61, + "grad_norm": 4.925178242217265, + "learning_rate": 1.907678805337363e-06, + "loss": 1.0908, + "step": 134076 + }, + { + "epoch": 1.61, + "grad_norm": 6.524210748136093, + "learning_rate": 1.9073356209484816e-06, + "loss": 1.1713, + "step": 134079 + }, + { + "epoch": 1.61, + "grad_norm": 4.044811469062106, + "learning_rate": 1.90699246417695e-06, + "loss": 1.2389, + "step": 134082 + }, + { + "epoch": 1.61, + "grad_norm": 9.563583280161334, + "learning_rate": 1.9066493350239378e-06, + "loss": 0.9569, + "step": 134085 + }, + { + "epoch": 1.61, + "grad_norm": 8.570533228842235, + "learning_rate": 1.906306233490619e-06, + "loss": 1.3015, + "step": 134088 + }, + { + "epoch": 1.61, + "grad_norm": 4.3454067087831065, + "learning_rate": 1.905963159578158e-06, + "loss": 1.2238, + "step": 134091 + }, + { + "epoch": 1.61, + "grad_norm": 23.45851016096567, + "learning_rate": 1.9056201132877327e-06, + "loss": 1.0407, + "step": 134094 + }, + { + "epoch": 1.61, + "grad_norm": 15.451065080705767, + "learning_rate": 1.9052770946205079e-06, + "loss": 1.2399, + "step": 134097 + }, + { + "epoch": 1.61, + "grad_norm": 19.677435324022824, + "learning_rate": 1.9049341035776592e-06, + "loss": 1.4194, + "step": 134100 + }, + { + "epoch": 1.61, + "grad_norm": 13.951000855050612, + "learning_rate": 1.9045911401603522e-06, + "loss": 1.3495, + "step": 134103 + }, + { + "epoch": 1.61, + "grad_norm": 9.664354309126473, + "learning_rate": 1.9042482043697585e-06, + "loss": 1.2403, + "step": 134106 + }, + { + "epoch": 1.61, + "grad_norm": 17.845643706707833, + "learning_rate": 1.9039052962070515e-06, + "loss": 1.1784, + "step": 134109 + }, + { + "epoch": 1.61, + "grad_norm": 7.493350098969207, + "learning_rate": 1.9035624156734e-06, + "loss": 1.185, + "step": 134112 + }, + { + "epoch": 1.61, + "grad_norm": 9.425031919259958, + "learning_rate": 1.9032195627699723e-06, + "loss": 1.0354, + "step": 134115 + }, + { + "epoch": 1.61, + "grad_norm": 14.53619143318471, + "learning_rate": 1.902876737497943e-06, + "loss": 1.3621, + "step": 134118 + }, + { + "epoch": 1.61, + "grad_norm": 9.319036044979182, + "learning_rate": 1.9025339398584753e-06, + "loss": 0.8986, + "step": 134121 + }, + { + "epoch": 1.61, + "grad_norm": 5.107377079229161, + "learning_rate": 1.9021911698527418e-06, + "loss": 1.2055, + "step": 134124 + }, + { + "epoch": 1.61, + "grad_norm": 43.75880844296994, + "learning_rate": 1.901848427481917e-06, + "loss": 1.0044, + "step": 134127 + }, + { + "epoch": 1.61, + "grad_norm": 4.7999569630373955, + "learning_rate": 1.9015057127471636e-06, + "loss": 0.7832, + "step": 134130 + }, + { + "epoch": 1.61, + "grad_norm": 18.355632509738204, + "learning_rate": 1.9011630256496539e-06, + "loss": 0.8793, + "step": 134133 + }, + { + "epoch": 1.61, + "grad_norm": 7.617987633586907, + "learning_rate": 1.90082036619056e-06, + "loss": 1.0829, + "step": 134136 + }, + { + "epoch": 1.61, + "grad_norm": 6.004146775243496, + "learning_rate": 1.9004777343710468e-06, + "loss": 1.2065, + "step": 134139 + }, + { + "epoch": 1.61, + "grad_norm": 4.035502952238614, + "learning_rate": 1.9001351301922877e-06, + "loss": 1.1779, + "step": 134142 + }, + { + "epoch": 1.61, + "grad_norm": 12.899699425417728, + "learning_rate": 1.8997925536554485e-06, + "loss": 1.425, + "step": 134145 + }, + { + "epoch": 1.61, + "grad_norm": 11.57412830028018, + "learning_rate": 1.899450004761698e-06, + "loss": 1.3726, + "step": 134148 + }, + { + "epoch": 1.61, + "grad_norm": 7.3009917136364475, + "learning_rate": 1.8991074835122081e-06, + "loss": 1.0522, + "step": 134151 + }, + { + "epoch": 1.61, + "grad_norm": 96.47218908940118, + "learning_rate": 1.8987649899081484e-06, + "loss": 0.6697, + "step": 134154 + }, + { + "epoch": 1.61, + "grad_norm": 13.07864464538959, + "learning_rate": 1.8984225239506826e-06, + "loss": 1.0627, + "step": 134157 + }, + { + "epoch": 1.61, + "grad_norm": 4.6990336076706125, + "learning_rate": 1.8980800856409864e-06, + "loss": 0.906, + "step": 134160 + }, + { + "epoch": 1.61, + "grad_norm": 6.306722516066984, + "learning_rate": 1.8977376749802212e-06, + "loss": 1.5997, + "step": 134163 + }, + { + "epoch": 1.61, + "grad_norm": 9.188264563056963, + "learning_rate": 1.8973952919695592e-06, + "loss": 1.1933, + "step": 134166 + }, + { + "epoch": 1.61, + "grad_norm": 7.024899756488157, + "learning_rate": 1.897052936610171e-06, + "loss": 0.8386, + "step": 134169 + }, + { + "epoch": 1.61, + "grad_norm": 23.768529939395087, + "learning_rate": 1.8967106089032195e-06, + "loss": 1.1787, + "step": 134172 + }, + { + "epoch": 1.61, + "grad_norm": 11.595694211913694, + "learning_rate": 1.8963683088498764e-06, + "loss": 1.0715, + "step": 134175 + }, + { + "epoch": 1.61, + "grad_norm": 7.873098919831951, + "learning_rate": 1.8960260364513116e-06, + "loss": 1.2791, + "step": 134178 + }, + { + "epoch": 1.61, + "grad_norm": 28.586161757198685, + "learning_rate": 1.8956837917086912e-06, + "loss": 1.069, + "step": 134181 + }, + { + "epoch": 1.61, + "grad_norm": 7.227078263070293, + "learning_rate": 1.8953415746231807e-06, + "loss": 1.4407, + "step": 134184 + }, + { + "epoch": 1.61, + "grad_norm": 3.9477651605383914, + "learning_rate": 1.8949993851959493e-06, + "loss": 0.9727, + "step": 134187 + }, + { + "epoch": 1.61, + "grad_norm": 7.669577006104089, + "learning_rate": 1.8946572234281656e-06, + "loss": 1.3137, + "step": 134190 + }, + { + "epoch": 1.61, + "grad_norm": 23.422641924719063, + "learning_rate": 1.8943150893209971e-06, + "loss": 1.1553, + "step": 134193 + }, + { + "epoch": 1.61, + "grad_norm": 5.492260034776842, + "learning_rate": 1.8939729828756148e-06, + "loss": 1.0667, + "step": 134196 + }, + { + "epoch": 1.61, + "grad_norm": 22.225922254195186, + "learning_rate": 1.89363090409318e-06, + "loss": 1.0849, + "step": 134199 + }, + { + "epoch": 1.61, + "grad_norm": 6.828685271798774, + "learning_rate": 1.893288852974866e-06, + "loss": 1.0765, + "step": 134202 + }, + { + "epoch": 1.61, + "grad_norm": 5.723787289267952, + "learning_rate": 1.8929468295218346e-06, + "loss": 0.9961, + "step": 134205 + }, + { + "epoch": 1.61, + "grad_norm": 4.803574363483198, + "learning_rate": 1.8926048337352576e-06, + "loss": 1.3246, + "step": 134208 + }, + { + "epoch": 1.61, + "grad_norm": 7.131739446052405, + "learning_rate": 1.8922628656162977e-06, + "loss": 1.2721, + "step": 134211 + }, + { + "epoch": 1.61, + "grad_norm": 3.4166758388704705, + "learning_rate": 1.8919209251661242e-06, + "loss": 1.0733, + "step": 134214 + }, + { + "epoch": 1.61, + "grad_norm": 8.964000105914137, + "learning_rate": 1.8915790123859035e-06, + "loss": 0.9548, + "step": 134217 + }, + { + "epoch": 1.61, + "grad_norm": 15.933313532159566, + "learning_rate": 1.891237127276806e-06, + "loss": 1.281, + "step": 134220 + }, + { + "epoch": 1.61, + "grad_norm": 4.93994398396986, + "learning_rate": 1.8908952698399963e-06, + "loss": 1.1295, + "step": 134223 + }, + { + "epoch": 1.61, + "grad_norm": 4.621506190528557, + "learning_rate": 1.8905534400766368e-06, + "loss": 1.4811, + "step": 134226 + }, + { + "epoch": 1.61, + "grad_norm": 20.123738007500844, + "learning_rate": 1.8902116379878965e-06, + "loss": 1.4475, + "step": 134229 + }, + { + "epoch": 1.61, + "grad_norm": 4.53848555379102, + "learning_rate": 1.8898698635749435e-06, + "loss": 1.0937, + "step": 134232 + }, + { + "epoch": 1.61, + "grad_norm": 8.271673062100412, + "learning_rate": 1.8895281168389467e-06, + "loss": 1.5423, + "step": 134235 + }, + { + "epoch": 1.61, + "grad_norm": 6.030913525782421, + "learning_rate": 1.8891863977810653e-06, + "loss": 1.2854, + "step": 134238 + }, + { + "epoch": 1.61, + "grad_norm": 2.516021812080281, + "learning_rate": 1.8888447064024707e-06, + "loss": 1.1137, + "step": 134241 + }, + { + "epoch": 1.61, + "grad_norm": 4.821345495325345, + "learning_rate": 1.8885030427043293e-06, + "loss": 1.1744, + "step": 134244 + }, + { + "epoch": 1.61, + "grad_norm": 8.581672016810293, + "learning_rate": 1.8881614066878017e-06, + "loss": 1.2383, + "step": 134247 + }, + { + "epoch": 1.61, + "grad_norm": 12.414845327174335, + "learning_rate": 1.8878197983540614e-06, + "loss": 1.17, + "step": 134250 + }, + { + "epoch": 1.61, + "grad_norm": 11.733429039184868, + "learning_rate": 1.8874782177042672e-06, + "loss": 1.3014, + "step": 134253 + }, + { + "epoch": 1.61, + "grad_norm": 8.78579679500331, + "learning_rate": 1.8871366647395872e-06, + "loss": 0.8909, + "step": 134256 + }, + { + "epoch": 1.61, + "grad_norm": 12.950329585882441, + "learning_rate": 1.8867951394611871e-06, + "loss": 1.2183, + "step": 134259 + }, + { + "epoch": 1.61, + "grad_norm": 5.940158515468027, + "learning_rate": 1.886453641870236e-06, + "loss": 1.0873, + "step": 134262 + }, + { + "epoch": 1.61, + "grad_norm": 17.61265808672854, + "learning_rate": 1.8861121719678965e-06, + "loss": 1.2315, + "step": 134265 + }, + { + "epoch": 1.61, + "grad_norm": 8.766912032063065, + "learning_rate": 1.8857707297553306e-06, + "loss": 0.969, + "step": 134268 + }, + { + "epoch": 1.61, + "grad_norm": 2.2325514090537366, + "learning_rate": 1.8854293152337067e-06, + "loss": 1.0451, + "step": 134271 + }, + { + "epoch": 1.61, + "grad_norm": 19.74117535910879, + "learning_rate": 1.8850879284041901e-06, + "loss": 1.2998, + "step": 134274 + }, + { + "epoch": 1.61, + "grad_norm": 7.810934746275608, + "learning_rate": 1.8847465692679478e-06, + "loss": 1.0834, + "step": 134277 + }, + { + "epoch": 1.61, + "grad_norm": 12.503265598633579, + "learning_rate": 1.8844052378261391e-06, + "loss": 1.2737, + "step": 134280 + }, + { + "epoch": 1.61, + "grad_norm": 4.517877213450816, + "learning_rate": 1.8840639340799327e-06, + "loss": 1.2185, + "step": 134283 + }, + { + "epoch": 1.61, + "grad_norm": 9.378426002991217, + "learning_rate": 1.8837226580304946e-06, + "loss": 1.4248, + "step": 134286 + }, + { + "epoch": 1.61, + "grad_norm": 2.8999282184721924, + "learning_rate": 1.8833814096789882e-06, + "loss": 1.0362, + "step": 134289 + }, + { + "epoch": 1.61, + "grad_norm": 9.900826124952365, + "learning_rate": 1.8830401890265748e-06, + "loss": 1.4749, + "step": 134292 + }, + { + "epoch": 1.61, + "grad_norm": 8.870336282837423, + "learning_rate": 1.8826989960744213e-06, + "loss": 1.3289, + "step": 134295 + }, + { + "epoch": 1.61, + "grad_norm": 13.9639238409677, + "learning_rate": 1.8823578308236922e-06, + "loss": 0.8479, + "step": 134298 + }, + { + "epoch": 1.61, + "grad_norm": 6.248209838942104, + "learning_rate": 1.8820166932755523e-06, + "loss": 0.9909, + "step": 134301 + }, + { + "epoch": 1.61, + "grad_norm": 5.90255823915705, + "learning_rate": 1.8816755834311683e-06, + "loss": 1.1465, + "step": 134304 + }, + { + "epoch": 1.62, + "grad_norm": 20.10576319728546, + "learning_rate": 1.8813345012917006e-06, + "loss": 1.2025, + "step": 134307 + }, + { + "epoch": 1.62, + "grad_norm": 3.128886482301115, + "learning_rate": 1.8809934468583112e-06, + "loss": 1.1142, + "step": 134310 + }, + { + "epoch": 1.62, + "grad_norm": 3.5321746092644095, + "learning_rate": 1.8806524201321675e-06, + "loss": 1.197, + "step": 134313 + }, + { + "epoch": 1.62, + "grad_norm": 3.5723702945201126, + "learning_rate": 1.8803114211144346e-06, + "loss": 1.1508, + "step": 134316 + }, + { + "epoch": 1.62, + "grad_norm": 7.131154609677785, + "learning_rate": 1.879970449806271e-06, + "loss": 1.0602, + "step": 134319 + }, + { + "epoch": 1.62, + "grad_norm": 7.006903639603385, + "learning_rate": 1.8796295062088443e-06, + "loss": 1.1589, + "step": 134322 + }, + { + "epoch": 1.62, + "grad_norm": 2.1789127396697276, + "learning_rate": 1.8792885903233172e-06, + "loss": 1.3794, + "step": 134325 + }, + { + "epoch": 1.62, + "grad_norm": 10.15246629080127, + "learning_rate": 1.878947702150855e-06, + "loss": 1.2745, + "step": 134328 + }, + { + "epoch": 1.62, + "grad_norm": 15.246744028925248, + "learning_rate": 1.8786068416926195e-06, + "loss": 1.2542, + "step": 134331 + }, + { + "epoch": 1.62, + "grad_norm": 6.646381062873849, + "learning_rate": 1.8782660089497707e-06, + "loss": 1.075, + "step": 134334 + }, + { + "epoch": 1.62, + "grad_norm": 13.508674593674042, + "learning_rate": 1.8779252039234752e-06, + "loss": 1.0999, + "step": 134337 + }, + { + "epoch": 1.62, + "grad_norm": 12.76236714834507, + "learning_rate": 1.8775844266148945e-06, + "loss": 1.1554, + "step": 134340 + }, + { + "epoch": 1.62, + "grad_norm": 13.837950022482046, + "learning_rate": 1.8772436770251956e-06, + "loss": 1.2987, + "step": 134343 + }, + { + "epoch": 1.62, + "grad_norm": 4.382963713325671, + "learning_rate": 1.8769029551555352e-06, + "loss": 1.1477, + "step": 134346 + }, + { + "epoch": 1.62, + "grad_norm": 10.967798612484234, + "learning_rate": 1.8765622610070822e-06, + "loss": 1.347, + "step": 134349 + }, + { + "epoch": 1.62, + "grad_norm": 11.069428431819931, + "learning_rate": 1.8762215945809936e-06, + "loss": 0.7598, + "step": 134352 + }, + { + "epoch": 1.62, + "grad_norm": 14.44818314890246, + "learning_rate": 1.8758809558784341e-06, + "loss": 1.273, + "step": 134355 + }, + { + "epoch": 1.62, + "grad_norm": 11.3700397869199, + "learning_rate": 1.8755403449005693e-06, + "loss": 0.9159, + "step": 134358 + }, + { + "epoch": 1.62, + "grad_norm": 12.036786666650155, + "learning_rate": 1.8751997616485561e-06, + "loss": 1.1405, + "step": 134361 + }, + { + "epoch": 1.62, + "grad_norm": 17.742620880064422, + "learning_rate": 1.8748592061235604e-06, + "loss": 1.0401, + "step": 134364 + }, + { + "epoch": 1.62, + "grad_norm": 10.144017748933404, + "learning_rate": 1.8745186783267434e-06, + "loss": 1.4166, + "step": 134367 + }, + { + "epoch": 1.62, + "grad_norm": 6.651141797397926, + "learning_rate": 1.8741781782592694e-06, + "loss": 1.1621, + "step": 134370 + }, + { + "epoch": 1.62, + "grad_norm": 4.361708411914469, + "learning_rate": 1.8738377059222978e-06, + "loss": 0.6954, + "step": 134373 + }, + { + "epoch": 1.62, + "grad_norm": 13.313609313299724, + "learning_rate": 1.8734972613169899e-06, + "loss": 1.1206, + "step": 134376 + }, + { + "epoch": 1.62, + "grad_norm": 5.739629229530834, + "learning_rate": 1.8731568444445081e-06, + "loss": 1.014, + "step": 134379 + }, + { + "epoch": 1.62, + "grad_norm": 18.84753646047985, + "learning_rate": 1.8728164553060145e-06, + "loss": 1.0207, + "step": 134382 + }, + { + "epoch": 1.62, + "grad_norm": 3.5002940396628834, + "learning_rate": 1.8724760939026742e-06, + "loss": 0.9269, + "step": 134385 + }, + { + "epoch": 1.62, + "grad_norm": 8.477125449244062, + "learning_rate": 1.8721357602356416e-06, + "loss": 1.1586, + "step": 134388 + }, + { + "epoch": 1.62, + "grad_norm": 8.5921061636526, + "learning_rate": 1.8717954543060856e-06, + "loss": 1.1217, + "step": 134391 + }, + { + "epoch": 1.62, + "grad_norm": 16.681190749312478, + "learning_rate": 1.8714551761151611e-06, + "loss": 1.3124, + "step": 134394 + }, + { + "epoch": 1.62, + "grad_norm": 7.211272449181661, + "learning_rate": 1.8711149256640349e-06, + "loss": 1.101, + "step": 134397 + }, + { + "epoch": 1.62, + "grad_norm": 10.72851248448962, + "learning_rate": 1.8707747029538625e-06, + "loss": 0.8265, + "step": 134400 + }, + { + "epoch": 1.62, + "grad_norm": 6.902448480496145, + "learning_rate": 1.8704345079858088e-06, + "loss": 1.4036, + "step": 134403 + }, + { + "epoch": 1.62, + "grad_norm": 14.486548555064614, + "learning_rate": 1.8700943407610327e-06, + "loss": 1.0383, + "step": 134406 + }, + { + "epoch": 1.62, + "grad_norm": 9.497142967153506, + "learning_rate": 1.8697542012806968e-06, + "loss": 1.314, + "step": 134409 + }, + { + "epoch": 1.62, + "grad_norm": 13.376641714450997, + "learning_rate": 1.8694140895459644e-06, + "loss": 1.2925, + "step": 134412 + }, + { + "epoch": 1.62, + "grad_norm": 12.492557479964972, + "learning_rate": 1.8690740055579926e-06, + "loss": 1.1377, + "step": 134415 + }, + { + "epoch": 1.62, + "grad_norm": 5.326236265299724, + "learning_rate": 1.8687339493179402e-06, + "loss": 1.1971, + "step": 134418 + }, + { + "epoch": 1.62, + "grad_norm": 5.491396460625393, + "learning_rate": 1.8683939208269686e-06, + "loss": 1.1764, + "step": 134421 + }, + { + "epoch": 1.62, + "grad_norm": 5.284796942228881, + "learning_rate": 1.8680539200862435e-06, + "loss": 0.966, + "step": 134424 + }, + { + "epoch": 1.62, + "grad_norm": 84.90625222518548, + "learning_rate": 1.8677139470969186e-06, + "loss": 0.9234, + "step": 134427 + }, + { + "epoch": 1.62, + "grad_norm": 5.639518123952837, + "learning_rate": 1.867374001860156e-06, + "loss": 1.1906, + "step": 134430 + }, + { + "epoch": 1.62, + "grad_norm": 9.64996048599639, + "learning_rate": 1.8670340843771173e-06, + "loss": 1.3455, + "step": 134433 + }, + { + "epoch": 1.62, + "grad_norm": 12.162098125681252, + "learning_rate": 1.8666941946489646e-06, + "loss": 1.1094, + "step": 134436 + }, + { + "epoch": 1.62, + "grad_norm": 4.101964666827522, + "learning_rate": 1.866354332676854e-06, + "loss": 1.0893, + "step": 134439 + }, + { + "epoch": 1.62, + "grad_norm": 3.577816273902302, + "learning_rate": 1.8660144984619443e-06, + "loss": 0.8391, + "step": 134442 + }, + { + "epoch": 1.62, + "grad_norm": 8.716212586300546, + "learning_rate": 1.865674692005398e-06, + "loss": 0.9938, + "step": 134445 + }, + { + "epoch": 1.62, + "grad_norm": 7.356506912608834, + "learning_rate": 1.8653349133083732e-06, + "loss": 1.0144, + "step": 134448 + }, + { + "epoch": 1.62, + "grad_norm": 6.095414304094796, + "learning_rate": 1.8649951623720331e-06, + "loss": 1.1413, + "step": 134451 + }, + { + "epoch": 1.62, + "grad_norm": 4.877926427954057, + "learning_rate": 1.8646554391975314e-06, + "loss": 1.2544, + "step": 134454 + }, + { + "epoch": 1.62, + "grad_norm": 9.247982857351197, + "learning_rate": 1.8643157437860327e-06, + "loss": 1.32, + "step": 134457 + }, + { + "epoch": 1.62, + "grad_norm": 4.972954069461967, + "learning_rate": 1.8639760761386916e-06, + "loss": 1.04, + "step": 134460 + }, + { + "epoch": 1.62, + "grad_norm": 4.816277678852262, + "learning_rate": 1.8636364362566695e-06, + "loss": 1.1561, + "step": 134463 + }, + { + "epoch": 1.62, + "grad_norm": 14.132807174168729, + "learning_rate": 1.8632968241411287e-06, + "loss": 1.4174, + "step": 134466 + }, + { + "epoch": 1.62, + "grad_norm": 6.705974505373543, + "learning_rate": 1.8629572397932216e-06, + "loss": 1.2451, + "step": 134469 + }, + { + "epoch": 1.62, + "grad_norm": 3.89775813013914, + "learning_rate": 1.8626176832141107e-06, + "loss": 1.3115, + "step": 134472 + }, + { + "epoch": 1.62, + "grad_norm": 16.27793277165322, + "learning_rate": 1.8622781544049551e-06, + "loss": 0.9223, + "step": 134475 + }, + { + "epoch": 1.62, + "grad_norm": 9.283941189577192, + "learning_rate": 1.8619386533669148e-06, + "loss": 0.8397, + "step": 134478 + }, + { + "epoch": 1.62, + "grad_norm": 3.0772290331142247, + "learning_rate": 1.8615991801011479e-06, + "loss": 1.0596, + "step": 134481 + }, + { + "epoch": 1.62, + "grad_norm": 18.588325673177113, + "learning_rate": 1.861259734608808e-06, + "loss": 1.2553, + "step": 134484 + }, + { + "epoch": 1.62, + "grad_norm": 16.110847203145234, + "learning_rate": 1.8609203168910573e-06, + "loss": 0.9967, + "step": 134487 + }, + { + "epoch": 1.62, + "grad_norm": 2.6054564811263385, + "learning_rate": 1.8605809269490526e-06, + "loss": 0.995, + "step": 134490 + }, + { + "epoch": 1.62, + "grad_norm": 4.6189713185558015, + "learning_rate": 1.8602415647839578e-06, + "loss": 1.2833, + "step": 134493 + }, + { + "epoch": 1.62, + "grad_norm": 7.3558522162344575, + "learning_rate": 1.8599022303969228e-06, + "loss": 1.2559, + "step": 134496 + }, + { + "epoch": 1.62, + "grad_norm": 8.53463768492524, + "learning_rate": 1.8595629237891133e-06, + "loss": 1.1712, + "step": 134499 + }, + { + "epoch": 1.62, + "grad_norm": 4.442950277062082, + "learning_rate": 1.8592236449616796e-06, + "loss": 0.8356, + "step": 134502 + }, + { + "epoch": 1.62, + "grad_norm": 8.263321279260166, + "learning_rate": 1.8588843939157863e-06, + "loss": 1.1133, + "step": 134505 + }, + { + "epoch": 1.62, + "grad_norm": 18.05514634472291, + "learning_rate": 1.858545170652586e-06, + "loss": 1.1335, + "step": 134508 + }, + { + "epoch": 1.62, + "grad_norm": 15.19574552941939, + "learning_rate": 1.8582059751732373e-06, + "loss": 1.1813, + "step": 134511 + }, + { + "epoch": 1.62, + "grad_norm": 9.42686271390269, + "learning_rate": 1.8578668074788997e-06, + "loss": 1.2299, + "step": 134514 + }, + { + "epoch": 1.62, + "grad_norm": 16.50834176340879, + "learning_rate": 1.8575276675707298e-06, + "loss": 1.5063, + "step": 134517 + }, + { + "epoch": 1.62, + "grad_norm": 6.1334959729220495, + "learning_rate": 1.857188555449888e-06, + "loss": 1.2279, + "step": 134520 + }, + { + "epoch": 1.62, + "grad_norm": 3.0471000794280547, + "learning_rate": 1.856849471117529e-06, + "loss": 1.1947, + "step": 134523 + }, + { + "epoch": 1.62, + "grad_norm": 10.512470742749612, + "learning_rate": 1.8565104145748059e-06, + "loss": 1.3182, + "step": 134526 + }, + { + "epoch": 1.62, + "grad_norm": 24.463185842180284, + "learning_rate": 1.8561713858228802e-06, + "loss": 1.0687, + "step": 134529 + }, + { + "epoch": 1.62, + "grad_norm": 9.23601242888885, + "learning_rate": 1.85583238486291e-06, + "loss": 0.9627, + "step": 134532 + }, + { + "epoch": 1.62, + "grad_norm": 15.827522448219517, + "learning_rate": 1.8554934116960489e-06, + "loss": 0.9571, + "step": 134535 + }, + { + "epoch": 1.62, + "grad_norm": 6.574416329936707, + "learning_rate": 1.8551544663234544e-06, + "loss": 0.9967, + "step": 134538 + }, + { + "epoch": 1.62, + "grad_norm": 13.493307820929754, + "learning_rate": 1.8548155487462871e-06, + "loss": 1.3523, + "step": 134541 + }, + { + "epoch": 1.62, + "grad_norm": 11.345875665877582, + "learning_rate": 1.854476658965697e-06, + "loss": 0.9152, + "step": 134544 + }, + { + "epoch": 1.62, + "grad_norm": 5.860287658999547, + "learning_rate": 1.8541377969828467e-06, + "loss": 1.199, + "step": 134547 + }, + { + "epoch": 1.62, + "grad_norm": 3.8530188100929768, + "learning_rate": 1.8537989627988884e-06, + "loss": 0.8681, + "step": 134550 + }, + { + "epoch": 1.62, + "grad_norm": 5.104528872868245, + "learning_rate": 1.853460156414979e-06, + "loss": 1.3868, + "step": 134553 + }, + { + "epoch": 1.62, + "grad_norm": 4.553026400486196, + "learning_rate": 1.8531213778322766e-06, + "loss": 1.1285, + "step": 134556 + }, + { + "epoch": 1.62, + "grad_norm": 11.453851747761778, + "learning_rate": 1.8527826270519378e-06, + "loss": 1.1976, + "step": 134559 + }, + { + "epoch": 1.62, + "grad_norm": 14.788072692643816, + "learning_rate": 1.8524439040751162e-06, + "loss": 0.9764, + "step": 134562 + }, + { + "epoch": 1.62, + "grad_norm": 10.881526270293335, + "learning_rate": 1.85210520890297e-06, + "loss": 1.335, + "step": 134565 + }, + { + "epoch": 1.62, + "grad_norm": 3.081994669448431, + "learning_rate": 1.8517665415366526e-06, + "loss": 1.1047, + "step": 134568 + }, + { + "epoch": 1.62, + "grad_norm": 4.522003976322484, + "learning_rate": 1.8514279019773196e-06, + "loss": 1.1767, + "step": 134571 + }, + { + "epoch": 1.62, + "grad_norm": 7.977841997099655, + "learning_rate": 1.8510892902261312e-06, + "loss": 0.742, + "step": 134574 + }, + { + "epoch": 1.62, + "grad_norm": 24.754028017888594, + "learning_rate": 1.8507507062842378e-06, + "loss": 1.0906, + "step": 134577 + }, + { + "epoch": 1.62, + "grad_norm": 8.547627236335453, + "learning_rate": 1.8504121501527973e-06, + "loss": 1.1723, + "step": 134580 + }, + { + "epoch": 1.62, + "grad_norm": 9.814373319825167, + "learning_rate": 1.8500736218329663e-06, + "loss": 1.2577, + "step": 134583 + }, + { + "epoch": 1.62, + "grad_norm": 10.233988235888637, + "learning_rate": 1.8497351213258985e-06, + "loss": 1.2981, + "step": 134586 + }, + { + "epoch": 1.62, + "grad_norm": 7.9323490154158485, + "learning_rate": 1.8493966486327465e-06, + "loss": 1.1015, + "step": 134589 + }, + { + "epoch": 1.62, + "grad_norm": 12.209398029088762, + "learning_rate": 1.849058203754668e-06, + "loss": 1.1245, + "step": 134592 + }, + { + "epoch": 1.62, + "grad_norm": 4.171102423151147, + "learning_rate": 1.8487197866928174e-06, + "loss": 1.3039, + "step": 134595 + }, + { + "epoch": 1.62, + "grad_norm": 3.606468521072072, + "learning_rate": 1.8483813974483512e-06, + "loss": 1.0358, + "step": 134598 + }, + { + "epoch": 1.62, + "grad_norm": 7.9770391426588665, + "learning_rate": 1.8480430360224243e-06, + "loss": 1.2559, + "step": 134601 + }, + { + "epoch": 1.62, + "grad_norm": 4.657514430619577, + "learning_rate": 1.8477047024161888e-06, + "loss": 1.1864, + "step": 134604 + }, + { + "epoch": 1.62, + "grad_norm": 4.630354643180356, + "learning_rate": 1.847366396630802e-06, + "loss": 1.1013, + "step": 134607 + }, + { + "epoch": 1.62, + "grad_norm": 6.170105950217842, + "learning_rate": 1.8470281186674156e-06, + "loss": 1.027, + "step": 134610 + }, + { + "epoch": 1.62, + "grad_norm": 5.419163908833492, + "learning_rate": 1.8466898685271884e-06, + "loss": 0.9792, + "step": 134613 + }, + { + "epoch": 1.62, + "grad_norm": 6.766605265854869, + "learning_rate": 1.8463516462112686e-06, + "loss": 1.0879, + "step": 134616 + }, + { + "epoch": 1.62, + "grad_norm": 4.754100204802335, + "learning_rate": 1.846013451720814e-06, + "loss": 1.0268, + "step": 134619 + }, + { + "epoch": 1.62, + "grad_norm": 9.19377594612203, + "learning_rate": 1.8456752850569782e-06, + "loss": 0.9966, + "step": 134622 + }, + { + "epoch": 1.62, + "grad_norm": 6.201630622860915, + "learning_rate": 1.845337146220918e-06, + "loss": 1.4886, + "step": 134625 + }, + { + "epoch": 1.62, + "grad_norm": 16.646566511711345, + "learning_rate": 1.8449990352137847e-06, + "loss": 1.2782, + "step": 134628 + }, + { + "epoch": 1.62, + "grad_norm": 2.8864401636434978, + "learning_rate": 1.8446609520367298e-06, + "loss": 1.4059, + "step": 134631 + }, + { + "epoch": 1.62, + "grad_norm": 11.185983691906726, + "learning_rate": 1.8443228966909099e-06, + "loss": 1.12, + "step": 134634 + }, + { + "epoch": 1.62, + "grad_norm": 13.259588666658322, + "learning_rate": 1.8439848691774787e-06, + "loss": 1.4356, + "step": 134637 + }, + { + "epoch": 1.62, + "grad_norm": 18.58104233479351, + "learning_rate": 1.8436468694975907e-06, + "loss": 0.9118, + "step": 134640 + }, + { + "epoch": 1.62, + "grad_norm": 5.8284240717858244, + "learning_rate": 1.843308897652396e-06, + "loss": 1.1826, + "step": 134643 + }, + { + "epoch": 1.62, + "grad_norm": 4.513911682625891, + "learning_rate": 1.8429709536430506e-06, + "loss": 1.0743, + "step": 134646 + }, + { + "epoch": 1.62, + "grad_norm": 13.043751307662129, + "learning_rate": 1.8426330374707092e-06, + "loss": 1.5509, + "step": 134649 + }, + { + "epoch": 1.62, + "grad_norm": 8.346211014758481, + "learning_rate": 1.8422951491365203e-06, + "loss": 1.089, + "step": 134652 + }, + { + "epoch": 1.62, + "grad_norm": 2.5146874571185545, + "learning_rate": 1.8419572886416427e-06, + "loss": 1.1578, + "step": 134655 + }, + { + "epoch": 1.62, + "grad_norm": 12.725245795243818, + "learning_rate": 1.8416194559872224e-06, + "loss": 1.216, + "step": 134658 + }, + { + "epoch": 1.62, + "grad_norm": 9.172952069634677, + "learning_rate": 1.841281651174418e-06, + "loss": 1.2337, + "step": 134661 + }, + { + "epoch": 1.62, + "grad_norm": 20.698521975532948, + "learning_rate": 1.8409438742043795e-06, + "loss": 1.2369, + "step": 134664 + }, + { + "epoch": 1.62, + "grad_norm": 10.264938551837913, + "learning_rate": 1.8406061250782636e-06, + "loss": 1.2086, + "step": 134667 + }, + { + "epoch": 1.62, + "grad_norm": 7.8153898363038845, + "learning_rate": 1.8402684037972175e-06, + "loss": 1.1513, + "step": 134670 + }, + { + "epoch": 1.62, + "grad_norm": 8.969006100017811, + "learning_rate": 1.8399307103623988e-06, + "loss": 1.1891, + "step": 134673 + }, + { + "epoch": 1.62, + "grad_norm": 7.754840124849009, + "learning_rate": 1.8395930447749554e-06, + "loss": 1.2859, + "step": 134676 + }, + { + "epoch": 1.62, + "grad_norm": 11.734806847304366, + "learning_rate": 1.8392554070360402e-06, + "loss": 1.1405, + "step": 134679 + }, + { + "epoch": 1.62, + "grad_norm": 7.670617170641458, + "learning_rate": 1.8389177971468109e-06, + "loss": 1.2774, + "step": 134682 + }, + { + "epoch": 1.62, + "grad_norm": 14.85082039325013, + "learning_rate": 1.838580215108412e-06, + "loss": 1.0664, + "step": 134685 + }, + { + "epoch": 1.62, + "grad_norm": 6.911649950732215, + "learning_rate": 1.8382426609219994e-06, + "loss": 0.8334, + "step": 134688 + }, + { + "epoch": 1.62, + "grad_norm": 2.623834625762494, + "learning_rate": 1.8379051345887267e-06, + "loss": 1.0406, + "step": 134691 + }, + { + "epoch": 1.62, + "grad_norm": 33.04109689834506, + "learning_rate": 1.8375676361097438e-06, + "loss": 1.1701, + "step": 134694 + }, + { + "epoch": 1.62, + "grad_norm": 8.251551548596888, + "learning_rate": 1.8372301654862012e-06, + "loss": 1.1102, + "step": 134697 + }, + { + "epoch": 1.62, + "grad_norm": 12.91394682602057, + "learning_rate": 1.8368927227192513e-06, + "loss": 1.1241, + "step": 134700 + }, + { + "epoch": 1.62, + "grad_norm": 3.543954486154535, + "learning_rate": 1.836555307810045e-06, + "loss": 1.2013, + "step": 134703 + }, + { + "epoch": 1.62, + "grad_norm": 10.685783856101999, + "learning_rate": 1.8362179207597363e-06, + "loss": 1.2186, + "step": 134706 + }, + { + "epoch": 1.62, + "grad_norm": 9.127851020748645, + "learning_rate": 1.8358805615694774e-06, + "loss": 1.3139, + "step": 134709 + }, + { + "epoch": 1.62, + "grad_norm": 5.432672326587707, + "learning_rate": 1.8355432302404152e-06, + "loss": 0.9954, + "step": 134712 + }, + { + "epoch": 1.62, + "grad_norm": 14.815215555611362, + "learning_rate": 1.8352059267737054e-06, + "loss": 1.0642, + "step": 134715 + }, + { + "epoch": 1.62, + "grad_norm": 8.494203902039475, + "learning_rate": 1.8348686511704939e-06, + "loss": 1.3317, + "step": 134718 + }, + { + "epoch": 1.62, + "grad_norm": 8.80574831195405, + "learning_rate": 1.8345314034319373e-06, + "loss": 1.0815, + "step": 134721 + }, + { + "epoch": 1.62, + "grad_norm": 18.6518721937327, + "learning_rate": 1.8341941835591814e-06, + "loss": 1.0712, + "step": 134724 + }, + { + "epoch": 1.62, + "grad_norm": 10.292380825284841, + "learning_rate": 1.8338569915533798e-06, + "loss": 0.7872, + "step": 134727 + }, + { + "epoch": 1.62, + "grad_norm": 11.061115880978623, + "learning_rate": 1.8335198274156828e-06, + "loss": 1.3775, + "step": 134730 + }, + { + "epoch": 1.62, + "grad_norm": 6.179174312034958, + "learning_rate": 1.8331826911472428e-06, + "loss": 0.9473, + "step": 134733 + }, + { + "epoch": 1.62, + "grad_norm": 8.462078335595363, + "learning_rate": 1.8328455827492098e-06, + "loss": 1.2044, + "step": 134736 + }, + { + "epoch": 1.62, + "grad_norm": 11.506049622623534, + "learning_rate": 1.8325085022227296e-06, + "loss": 1.2052, + "step": 134739 + }, + { + "epoch": 1.62, + "grad_norm": 5.960482795861025, + "learning_rate": 1.8321714495689568e-06, + "loss": 1.0836, + "step": 134742 + }, + { + "epoch": 1.62, + "grad_norm": 9.502976132921686, + "learning_rate": 1.8318344247890397e-06, + "loss": 1.2291, + "step": 134745 + }, + { + "epoch": 1.62, + "grad_norm": 12.371150226661173, + "learning_rate": 1.8314974278841324e-06, + "loss": 1.207, + "step": 134748 + }, + { + "epoch": 1.62, + "grad_norm": 25.139864022157752, + "learning_rate": 1.8311604588553799e-06, + "loss": 0.8137, + "step": 134751 + }, + { + "epoch": 1.62, + "grad_norm": 19.98645037426142, + "learning_rate": 1.8308235177039345e-06, + "loss": 1.0134, + "step": 134754 + }, + { + "epoch": 1.62, + "grad_norm": 2.6641374824289796, + "learning_rate": 1.8304866044309488e-06, + "loss": 0.811, + "step": 134757 + }, + { + "epoch": 1.62, + "grad_norm": 9.889318208863124, + "learning_rate": 1.830149719037566e-06, + "loss": 1.0749, + "step": 134760 + }, + { + "epoch": 1.62, + "grad_norm": 4.029038693238149, + "learning_rate": 1.8298128615249434e-06, + "loss": 1.1884, + "step": 134763 + }, + { + "epoch": 1.62, + "grad_norm": 17.11539783267254, + "learning_rate": 1.8294760318942228e-06, + "loss": 1.2186, + "step": 134766 + }, + { + "epoch": 1.62, + "grad_norm": 6.8038199192633995, + "learning_rate": 1.8291392301465572e-06, + "loss": 1.0391, + "step": 134769 + }, + { + "epoch": 1.62, + "grad_norm": 14.594486617840609, + "learning_rate": 1.8288024562830974e-06, + "loss": 0.9405, + "step": 134772 + }, + { + "epoch": 1.62, + "grad_norm": 3.908044124338808, + "learning_rate": 1.8284657103049942e-06, + "loss": 1.0458, + "step": 134775 + }, + { + "epoch": 1.62, + "grad_norm": 6.098247651016899, + "learning_rate": 1.8281289922133926e-06, + "loss": 1.2017, + "step": 134778 + }, + { + "epoch": 1.62, + "grad_norm": 12.562173586372058, + "learning_rate": 1.8277923020094424e-06, + "loss": 0.8836, + "step": 134781 + }, + { + "epoch": 1.62, + "grad_norm": 14.164225641226952, + "learning_rate": 1.8274556396942932e-06, + "loss": 0.9714, + "step": 134784 + }, + { + "epoch": 1.62, + "grad_norm": 6.49838011131572, + "learning_rate": 1.8271190052690935e-06, + "loss": 1.3721, + "step": 134787 + }, + { + "epoch": 1.62, + "grad_norm": 3.4654171323587857, + "learning_rate": 1.8267823987349954e-06, + "loss": 1.1685, + "step": 134790 + }, + { + "epoch": 1.62, + "grad_norm": 8.068633668556519, + "learning_rate": 1.8264458200931423e-06, + "loss": 1.1162, + "step": 134793 + }, + { + "epoch": 1.62, + "grad_norm": 8.404018430706111, + "learning_rate": 1.826109269344686e-06, + "loss": 1.2368, + "step": 134796 + }, + { + "epoch": 1.62, + "grad_norm": 17.84652338747354, + "learning_rate": 1.825772746490777e-06, + "loss": 1.2271, + "step": 134799 + }, + { + "epoch": 1.62, + "grad_norm": 14.440232076858402, + "learning_rate": 1.8254362515325618e-06, + "loss": 1.2451, + "step": 134802 + }, + { + "epoch": 1.62, + "grad_norm": 4.326751270094513, + "learning_rate": 1.825099784471185e-06, + "loss": 1.0636, + "step": 134805 + }, + { + "epoch": 1.62, + "grad_norm": 12.229745328377515, + "learning_rate": 1.8247633453077984e-06, + "loss": 1.0118, + "step": 134808 + }, + { + "epoch": 1.62, + "grad_norm": 7.561057692317673, + "learning_rate": 1.8244269340435495e-06, + "loss": 1.329, + "step": 134811 + }, + { + "epoch": 1.62, + "grad_norm": 14.810974626159148, + "learning_rate": 1.8240905506795858e-06, + "loss": 1.2611, + "step": 134814 + }, + { + "epoch": 1.62, + "grad_norm": 5.164289840927365, + "learning_rate": 1.82375419521706e-06, + "loss": 1.1373, + "step": 134817 + }, + { + "epoch": 1.62, + "grad_norm": 41.31974239824893, + "learning_rate": 1.8234178676571158e-06, + "loss": 1.0864, + "step": 134820 + }, + { + "epoch": 1.62, + "grad_norm": 8.793895942816713, + "learning_rate": 1.8230815680008985e-06, + "loss": 1.2607, + "step": 134823 + }, + { + "epoch": 1.62, + "grad_norm": 13.050752571919734, + "learning_rate": 1.8227452962495585e-06, + "loss": 1.4528, + "step": 134826 + }, + { + "epoch": 1.62, + "grad_norm": 13.2726323208144, + "learning_rate": 1.822409052404246e-06, + "loss": 0.9882, + "step": 134829 + }, + { + "epoch": 1.62, + "grad_norm": 9.549004483381916, + "learning_rate": 1.8220728364661034e-06, + "loss": 0.7399, + "step": 134832 + }, + { + "epoch": 1.62, + "grad_norm": 6.88342718099099, + "learning_rate": 1.8217366484362798e-06, + "loss": 1.1938, + "step": 134835 + }, + { + "epoch": 1.62, + "grad_norm": 9.761003337444128, + "learning_rate": 1.8214004883159241e-06, + "loss": 1.1846, + "step": 134838 + }, + { + "epoch": 1.62, + "grad_norm": 38.19815448454844, + "learning_rate": 1.8210643561061848e-06, + "loss": 1.2171, + "step": 134841 + }, + { + "epoch": 1.62, + "grad_norm": 29.910787919498564, + "learning_rate": 1.820728251808207e-06, + "loss": 1.3776, + "step": 134844 + }, + { + "epoch": 1.62, + "grad_norm": 2.52546372650051, + "learning_rate": 1.8203921754231345e-06, + "loss": 1.1851, + "step": 134847 + }, + { + "epoch": 1.62, + "grad_norm": 10.410498195954624, + "learning_rate": 1.8200561269521178e-06, + "loss": 1.2224, + "step": 134850 + }, + { + "epoch": 1.62, + "grad_norm": 38.0830371367886, + "learning_rate": 1.8197201063963022e-06, + "loss": 1.2366, + "step": 134853 + }, + { + "epoch": 1.62, + "grad_norm": 13.360351765556654, + "learning_rate": 1.8193841137568392e-06, + "loss": 1.3293, + "step": 134856 + }, + { + "epoch": 1.62, + "grad_norm": 5.16784757952927, + "learning_rate": 1.8190481490348676e-06, + "loss": 0.9261, + "step": 134859 + }, + { + "epoch": 1.62, + "grad_norm": 15.68454296079664, + "learning_rate": 1.8187122122315414e-06, + "loss": 1.0838, + "step": 134862 + }, + { + "epoch": 1.62, + "grad_norm": 17.93302568484313, + "learning_rate": 1.8183763033480007e-06, + "loss": 1.0189, + "step": 134865 + }, + { + "epoch": 1.62, + "grad_norm": 4.46856862820173, + "learning_rate": 1.8180404223853954e-06, + "loss": 1.1152, + "step": 134868 + }, + { + "epoch": 1.62, + "grad_norm": 4.002808228067488, + "learning_rate": 1.8177045693448726e-06, + "loss": 1.3466, + "step": 134871 + }, + { + "epoch": 1.62, + "grad_norm": 12.234879251537958, + "learning_rate": 1.8173687442275745e-06, + "loss": 1.2951, + "step": 134874 + }, + { + "epoch": 1.62, + "grad_norm": 6.684105043891391, + "learning_rate": 1.8170329470346503e-06, + "loss": 1.2379, + "step": 134877 + }, + { + "epoch": 1.62, + "grad_norm": 6.816327611116408, + "learning_rate": 1.8166971777672436e-06, + "loss": 0.9531, + "step": 134880 + }, + { + "epoch": 1.62, + "grad_norm": 7.151155079843602, + "learning_rate": 1.8163614364265058e-06, + "loss": 0.8957, + "step": 134883 + }, + { + "epoch": 1.62, + "grad_norm": 12.545584807661589, + "learning_rate": 1.816025723013578e-06, + "loss": 1.0537, + "step": 134886 + }, + { + "epoch": 1.62, + "grad_norm": 9.643657417724054, + "learning_rate": 1.8156900375296038e-06, + "loss": 1.078, + "step": 134889 + }, + { + "epoch": 1.62, + "grad_norm": 9.313840779873706, + "learning_rate": 1.8153543799757322e-06, + "loss": 1.1068, + "step": 134892 + }, + { + "epoch": 1.62, + "grad_norm": 6.854049083685953, + "learning_rate": 1.815018750353108e-06, + "loss": 0.948, + "step": 134895 + }, + { + "epoch": 1.62, + "grad_norm": 10.529916559989736, + "learning_rate": 1.8146831486628791e-06, + "loss": 1.3754, + "step": 134898 + }, + { + "epoch": 1.62, + "grad_norm": 8.208133684528136, + "learning_rate": 1.8143475749061856e-06, + "loss": 1.0944, + "step": 134901 + }, + { + "epoch": 1.62, + "grad_norm": 19.056805905591542, + "learning_rate": 1.8140120290841777e-06, + "loss": 1.1788, + "step": 134904 + }, + { + "epoch": 1.62, + "grad_norm": 14.933541905860768, + "learning_rate": 1.813676511197996e-06, + "loss": 1.2898, + "step": 134907 + }, + { + "epoch": 1.62, + "grad_norm": 9.94315768344935, + "learning_rate": 1.813341021248789e-06, + "loss": 1.2963, + "step": 134910 + }, + { + "epoch": 1.62, + "grad_norm": 3.039281455555611, + "learning_rate": 1.8130055592376995e-06, + "loss": 0.8905, + "step": 134913 + }, + { + "epoch": 1.62, + "grad_norm": 6.282720641834748, + "learning_rate": 1.8126701251658719e-06, + "loss": 1.2263, + "step": 134916 + }, + { + "epoch": 1.62, + "grad_norm": 3.288676457068737, + "learning_rate": 1.812334719034452e-06, + "loss": 1.7533, + "step": 134919 + }, + { + "epoch": 1.62, + "grad_norm": 12.549967234564713, + "learning_rate": 1.8119993408445858e-06, + "loss": 1.2283, + "step": 134922 + }, + { + "epoch": 1.62, + "grad_norm": 5.505227449447862, + "learning_rate": 1.8116639905974176e-06, + "loss": 1.2359, + "step": 134925 + }, + { + "epoch": 1.62, + "grad_norm": 12.083398592578199, + "learning_rate": 1.811328668294091e-06, + "loss": 1.2502, + "step": 134928 + }, + { + "epoch": 1.62, + "grad_norm": 4.102163579824403, + "learning_rate": 1.8109933739357487e-06, + "loss": 1.3179, + "step": 134931 + }, + { + "epoch": 1.62, + "grad_norm": 5.804994913649743, + "learning_rate": 1.8106581075235352e-06, + "loss": 1.104, + "step": 134934 + }, + { + "epoch": 1.62, + "grad_norm": 11.355097514439198, + "learning_rate": 1.8103228690585995e-06, + "loss": 1.103, + "step": 134937 + }, + { + "epoch": 1.62, + "grad_norm": 21.76130759755441, + "learning_rate": 1.8099876585420784e-06, + "loss": 1.1342, + "step": 134940 + }, + { + "epoch": 1.62, + "grad_norm": 14.029848800632742, + "learning_rate": 1.8096524759751188e-06, + "loss": 1.3418, + "step": 134943 + }, + { + "epoch": 1.62, + "grad_norm": 3.3145019789890857, + "learning_rate": 1.8093173213588688e-06, + "loss": 1.1449, + "step": 134946 + }, + { + "epoch": 1.62, + "grad_norm": 11.707757404839155, + "learning_rate": 1.8089821946944664e-06, + "loss": 0.8944, + "step": 134949 + }, + { + "epoch": 1.62, + "grad_norm": 27.101253758065646, + "learning_rate": 1.8086470959830592e-06, + "loss": 1.6359, + "step": 134952 + }, + { + "epoch": 1.62, + "grad_norm": 5.619728906277217, + "learning_rate": 1.8083120252257857e-06, + "loss": 1.1733, + "step": 134955 + }, + { + "epoch": 1.62, + "grad_norm": 7.483245018662911, + "learning_rate": 1.8079769824237937e-06, + "loss": 1.2078, + "step": 134958 + }, + { + "epoch": 1.62, + "grad_norm": 8.80755573596974, + "learning_rate": 1.8076419675782253e-06, + "loss": 1.003, + "step": 134961 + }, + { + "epoch": 1.62, + "grad_norm": 7.192855183125918, + "learning_rate": 1.8073069806902244e-06, + "loss": 1.304, + "step": 134964 + }, + { + "epoch": 1.62, + "grad_norm": 2.9638702654504527, + "learning_rate": 1.8069720217609355e-06, + "loss": 1.085, + "step": 134967 + }, + { + "epoch": 1.62, + "grad_norm": 16.91473826567034, + "learning_rate": 1.8066370907914998e-06, + "loss": 1.5587, + "step": 134970 + }, + { + "epoch": 1.62, + "grad_norm": 6.265689819586929, + "learning_rate": 1.8063021877830588e-06, + "loss": 1.3405, + "step": 134973 + }, + { + "epoch": 1.62, + "grad_norm": 5.935340171383529, + "learning_rate": 1.805967312736756e-06, + "loss": 1.1234, + "step": 134976 + }, + { + "epoch": 1.62, + "grad_norm": 5.513064659627592, + "learning_rate": 1.8056324656537394e-06, + "loss": 1.0146, + "step": 134979 + }, + { + "epoch": 1.62, + "grad_norm": 12.208241074164727, + "learning_rate": 1.8052976465351436e-06, + "loss": 1.3164, + "step": 134982 + }, + { + "epoch": 1.62, + "grad_norm": 4.935032962564714, + "learning_rate": 1.8049628553821164e-06, + "loss": 0.6501, + "step": 134985 + }, + { + "epoch": 1.62, + "grad_norm": 5.565911708607775, + "learning_rate": 1.8046280921958004e-06, + "loss": 1.0055, + "step": 134988 + }, + { + "epoch": 1.62, + "grad_norm": 29.77607756897388, + "learning_rate": 1.8042933569773346e-06, + "loss": 1.1463, + "step": 134991 + }, + { + "epoch": 1.62, + "grad_norm": 12.330932344204339, + "learning_rate": 1.803958649727867e-06, + "loss": 0.938, + "step": 134994 + }, + { + "epoch": 1.62, + "grad_norm": 12.017231250258966, + "learning_rate": 1.8036239704485326e-06, + "loss": 1.7474, + "step": 134997 + }, + { + "epoch": 1.62, + "grad_norm": 3.4903892977767828, + "learning_rate": 1.8032893191404777e-06, + "loss": 0.9434, + "step": 135000 + }, + { + "epoch": 1.62, + "grad_norm": 6.455086990558025, + "learning_rate": 1.8029546958048437e-06, + "loss": 1.3015, + "step": 135003 + }, + { + "epoch": 1.62, + "grad_norm": 7.091336190225324, + "learning_rate": 1.8026201004427756e-06, + "loss": 1.0189, + "step": 135006 + }, + { + "epoch": 1.62, + "grad_norm": 19.553985482319828, + "learning_rate": 1.8022855330554102e-06, + "loss": 1.2862, + "step": 135009 + }, + { + "epoch": 1.62, + "grad_norm": 32.297074942368326, + "learning_rate": 1.801950993643894e-06, + "loss": 1.2024, + "step": 135012 + }, + { + "epoch": 1.62, + "grad_norm": 12.913199925432702, + "learning_rate": 1.801616482209363e-06, + "loss": 1.2131, + "step": 135015 + }, + { + "epoch": 1.62, + "grad_norm": 10.312239515189615, + "learning_rate": 1.8012819987529629e-06, + "loss": 1.0494, + "step": 135018 + }, + { + "epoch": 1.62, + "grad_norm": 4.792986648647456, + "learning_rate": 1.800947543275835e-06, + "loss": 1.1588, + "step": 135021 + }, + { + "epoch": 1.62, + "grad_norm": 11.511187618141989, + "learning_rate": 1.8006131157791185e-06, + "loss": 1.3186, + "step": 135024 + }, + { + "epoch": 1.62, + "grad_norm": 30.31569400772328, + "learning_rate": 1.8002787162639556e-06, + "loss": 0.9257, + "step": 135027 + }, + { + "epoch": 1.62, + "grad_norm": 7.785782956530179, + "learning_rate": 1.799944344731489e-06, + "loss": 1.1151, + "step": 135030 + }, + { + "epoch": 1.62, + "grad_norm": 6.8724958280691615, + "learning_rate": 1.7996100011828599e-06, + "loss": 1.1822, + "step": 135033 + }, + { + "epoch": 1.62, + "grad_norm": 11.76203531681596, + "learning_rate": 1.7992756856192085e-06, + "loss": 1.4713, + "step": 135036 + }, + { + "epoch": 1.62, + "grad_norm": 7.798849120061235, + "learning_rate": 1.7989413980416726e-06, + "loss": 0.9961, + "step": 135039 + }, + { + "epoch": 1.62, + "grad_norm": 10.021422267343182, + "learning_rate": 1.7986071384513958e-06, + "loss": 1.0832, + "step": 135042 + }, + { + "epoch": 1.62, + "grad_norm": 159.43929568051092, + "learning_rate": 1.7982729068495198e-06, + "loss": 0.9334, + "step": 135045 + }, + { + "epoch": 1.62, + "grad_norm": 5.92486333635185, + "learning_rate": 1.7979387032371854e-06, + "loss": 1.1846, + "step": 135048 + }, + { + "epoch": 1.62, + "grad_norm": 8.85157390616528, + "learning_rate": 1.7976045276155297e-06, + "loss": 0.9855, + "step": 135051 + }, + { + "epoch": 1.62, + "grad_norm": 12.144536058536621, + "learning_rate": 1.7972703799856973e-06, + "loss": 1.1498, + "step": 135054 + }, + { + "epoch": 1.62, + "grad_norm": 12.425917140943922, + "learning_rate": 1.7969362603488238e-06, + "loss": 1.0057, + "step": 135057 + }, + { + "epoch": 1.62, + "grad_norm": 3.657068422451476, + "learning_rate": 1.7966021687060552e-06, + "loss": 1.0431, + "step": 135060 + }, + { + "epoch": 1.62, + "grad_norm": 13.598197759890922, + "learning_rate": 1.7962681050585263e-06, + "loss": 1.1766, + "step": 135063 + }, + { + "epoch": 1.62, + "grad_norm": 72.06901267507209, + "learning_rate": 1.795934069407379e-06, + "loss": 1.1952, + "step": 135066 + }, + { + "epoch": 1.62, + "grad_norm": 19.608651975606016, + "learning_rate": 1.795600061753754e-06, + "loss": 1.1745, + "step": 135069 + }, + { + "epoch": 1.62, + "grad_norm": 13.612033937137044, + "learning_rate": 1.7952660820987911e-06, + "loss": 1.059, + "step": 135072 + }, + { + "epoch": 1.62, + "grad_norm": 7.365922559064734, + "learning_rate": 1.7949321304436306e-06, + "loss": 0.9421, + "step": 135075 + }, + { + "epoch": 1.62, + "grad_norm": 7.297628089652273, + "learning_rate": 1.7945982067894129e-06, + "loss": 1.0544, + "step": 135078 + }, + { + "epoch": 1.62, + "grad_norm": 4.275578210693003, + "learning_rate": 1.7942643111372726e-06, + "loss": 1.3392, + "step": 135081 + }, + { + "epoch": 1.62, + "grad_norm": 10.420519169387411, + "learning_rate": 1.7939304434883531e-06, + "loss": 1.46, + "step": 135084 + }, + { + "epoch": 1.62, + "grad_norm": 4.120315446153206, + "learning_rate": 1.7935966038437957e-06, + "loss": 1.1641, + "step": 135087 + }, + { + "epoch": 1.62, + "grad_norm": 8.642958841155409, + "learning_rate": 1.7932627922047342e-06, + "loss": 1.2127, + "step": 135090 + }, + { + "epoch": 1.62, + "grad_norm": 29.286304289238636, + "learning_rate": 1.7929290085723105e-06, + "loss": 0.9398, + "step": 135093 + }, + { + "epoch": 1.62, + "grad_norm": 11.641585032493381, + "learning_rate": 1.7925952529476665e-06, + "loss": 1.185, + "step": 135096 + }, + { + "epoch": 1.62, + "grad_norm": 14.784895582441717, + "learning_rate": 1.7922615253319364e-06, + "loss": 1.2462, + "step": 135099 + }, + { + "epoch": 1.62, + "grad_norm": 4.068876642351183, + "learning_rate": 1.791927825726264e-06, + "loss": 1.1993, + "step": 135102 + }, + { + "epoch": 1.62, + "grad_norm": 31.645026237269196, + "learning_rate": 1.7915941541317828e-06, + "loss": 1.2148, + "step": 135105 + }, + { + "epoch": 1.62, + "grad_norm": 4.647575427755978, + "learning_rate": 1.791260510549634e-06, + "loss": 1.1019, + "step": 135108 + }, + { + "epoch": 1.62, + "grad_norm": 3.5238235566062297, + "learning_rate": 1.7909268949809567e-06, + "loss": 1.2712, + "step": 135111 + }, + { + "epoch": 1.62, + "grad_norm": 4.279984883371433, + "learning_rate": 1.790593307426891e-06, + "loss": 1.2145, + "step": 135114 + }, + { + "epoch": 1.62, + "grad_norm": 3.4727864409655753, + "learning_rate": 1.7902597478885708e-06, + "loss": 1.023, + "step": 135117 + }, + { + "epoch": 1.62, + "grad_norm": 6.8819663202162875, + "learning_rate": 1.7899262163671395e-06, + "loss": 1.1215, + "step": 135120 + }, + { + "epoch": 1.62, + "grad_norm": 14.470932915477862, + "learning_rate": 1.7895927128637303e-06, + "loss": 1.1768, + "step": 135123 + }, + { + "epoch": 1.62, + "grad_norm": 9.36717225966384, + "learning_rate": 1.7892592373794837e-06, + "loss": 1.1606, + "step": 135126 + }, + { + "epoch": 1.62, + "grad_norm": 5.714415466106291, + "learning_rate": 1.788925789915541e-06, + "loss": 0.6724, + "step": 135129 + }, + { + "epoch": 1.62, + "grad_norm": 3.5023398807967996, + "learning_rate": 1.7885923704730336e-06, + "loss": 1.1611, + "step": 135132 + }, + { + "epoch": 1.62, + "grad_norm": 7.7512601862214145, + "learning_rate": 1.7882589790531025e-06, + "loss": 1.1196, + "step": 135135 + }, + { + "epoch": 1.62, + "grad_norm": 19.49396556657535, + "learning_rate": 1.7879256156568892e-06, + "loss": 1.2306, + "step": 135138 + }, + { + "epoch": 1.63, + "grad_norm": 7.117069276629924, + "learning_rate": 1.7875922802855272e-06, + "loss": 1.1971, + "step": 135141 + }, + { + "epoch": 1.63, + "grad_norm": 44.722682924460905, + "learning_rate": 1.7872589729401524e-06, + "loss": 1.1377, + "step": 135144 + }, + { + "epoch": 1.63, + "grad_norm": 9.498561240652686, + "learning_rate": 1.7869256936219038e-06, + "loss": 1.373, + "step": 135147 + }, + { + "epoch": 1.63, + "grad_norm": 8.20272304112194, + "learning_rate": 1.7865924423319203e-06, + "loss": 0.925, + "step": 135150 + }, + { + "epoch": 1.63, + "grad_norm": 8.778012213759142, + "learning_rate": 1.7862592190713379e-06, + "loss": 0.9877, + "step": 135153 + }, + { + "epoch": 1.63, + "grad_norm": 8.59875654935533, + "learning_rate": 1.7859260238412968e-06, + "loss": 1.2632, + "step": 135156 + }, + { + "epoch": 1.63, + "grad_norm": 12.591987857477084, + "learning_rate": 1.7855928566429292e-06, + "loss": 1.0314, + "step": 135159 + }, + { + "epoch": 1.63, + "grad_norm": 18.316002445413485, + "learning_rate": 1.7852597174773767e-06, + "loss": 0.7747, + "step": 135162 + }, + { + "epoch": 1.63, + "grad_norm": 6.4493112273433315, + "learning_rate": 1.7849266063457716e-06, + "loss": 1.4195, + "step": 135165 + }, + { + "epoch": 1.63, + "grad_norm": 4.3864275306296125, + "learning_rate": 1.784593523249255e-06, + "loss": 1.337, + "step": 135168 + }, + { + "epoch": 1.63, + "grad_norm": 8.144366163217994, + "learning_rate": 1.7842604681889597e-06, + "loss": 1.1704, + "step": 135171 + }, + { + "epoch": 1.63, + "grad_norm": 6.590957811936463, + "learning_rate": 1.7839274411660234e-06, + "loss": 1.5964, + "step": 135174 + }, + { + "epoch": 1.63, + "grad_norm": 26.46893326175418, + "learning_rate": 1.7835944421815832e-06, + "loss": 1.0118, + "step": 135177 + }, + { + "epoch": 1.63, + "grad_norm": 2.8751195581375164, + "learning_rate": 1.783261471236779e-06, + "loss": 1.0389, + "step": 135180 + }, + { + "epoch": 1.63, + "grad_norm": 7.280664018642588, + "learning_rate": 1.7829285283327436e-06, + "loss": 1.3444, + "step": 135183 + }, + { + "epoch": 1.63, + "grad_norm": 8.282510883586616, + "learning_rate": 1.7825956134706102e-06, + "loss": 0.9433, + "step": 135186 + }, + { + "epoch": 1.63, + "grad_norm": 8.63508907092263, + "learning_rate": 1.782262726651518e-06, + "loss": 0.8824, + "step": 135189 + }, + { + "epoch": 1.63, + "grad_norm": 20.287620046783452, + "learning_rate": 1.781929867876604e-06, + "loss": 1.2088, + "step": 135192 + }, + { + "epoch": 1.63, + "grad_norm": 7.48197332634516, + "learning_rate": 1.7815970371470049e-06, + "loss": 1.184, + "step": 135195 + }, + { + "epoch": 1.63, + "grad_norm": 19.81973376764658, + "learning_rate": 1.781264234463853e-06, + "loss": 1.5402, + "step": 135198 + }, + { + "epoch": 1.63, + "grad_norm": 4.289552640612359, + "learning_rate": 1.7809314598282856e-06, + "loss": 1.1934, + "step": 135201 + }, + { + "epoch": 1.63, + "grad_norm": 10.697150985477908, + "learning_rate": 1.7805987132414404e-06, + "loss": 0.7513, + "step": 135204 + }, + { + "epoch": 1.63, + "grad_norm": 17.82693683855526, + "learning_rate": 1.7802659947044498e-06, + "loss": 1.0613, + "step": 135207 + }, + { + "epoch": 1.63, + "grad_norm": 12.396975259782927, + "learning_rate": 1.779933304218453e-06, + "loss": 1.0918, + "step": 135210 + }, + { + "epoch": 1.63, + "grad_norm": 8.007605979678598, + "learning_rate": 1.7796006417845801e-06, + "loss": 0.819, + "step": 135213 + }, + { + "epoch": 1.63, + "grad_norm": 14.363367779885886, + "learning_rate": 1.7792680074039692e-06, + "loss": 1.4104, + "step": 135216 + }, + { + "epoch": 1.63, + "grad_norm": 4.676027645345535, + "learning_rate": 1.7789354010777549e-06, + "loss": 1.2063, + "step": 135219 + }, + { + "epoch": 1.63, + "grad_norm": 5.86104389354449, + "learning_rate": 1.7786028228070762e-06, + "loss": 1.1795, + "step": 135222 + }, + { + "epoch": 1.63, + "grad_norm": 5.056533421106683, + "learning_rate": 1.7782702725930644e-06, + "loss": 1.0858, + "step": 135225 + }, + { + "epoch": 1.63, + "grad_norm": 22.719479060049895, + "learning_rate": 1.7779377504368533e-06, + "loss": 0.9122, + "step": 135228 + }, + { + "epoch": 1.63, + "grad_norm": 4.7941059517077305, + "learning_rate": 1.7776052563395774e-06, + "loss": 1.3629, + "step": 135231 + }, + { + "epoch": 1.63, + "grad_norm": 4.112521850555485, + "learning_rate": 1.7772727903023746e-06, + "loss": 1.4103, + "step": 135234 + }, + { + "epoch": 1.63, + "grad_norm": 5.844643733779569, + "learning_rate": 1.7769403523263795e-06, + "loss": 1.2212, + "step": 135237 + }, + { + "epoch": 1.63, + "grad_norm": 6.015395945490255, + "learning_rate": 1.7766079424127226e-06, + "loss": 1.2251, + "step": 135240 + }, + { + "epoch": 1.63, + "grad_norm": 4.427246933952454, + "learning_rate": 1.7762755605625414e-06, + "loss": 0.8737, + "step": 135243 + }, + { + "epoch": 1.63, + "grad_norm": 19.869334171351877, + "learning_rate": 1.775943206776971e-06, + "loss": 1.2506, + "step": 135246 + }, + { + "epoch": 1.63, + "grad_norm": 10.040486427044181, + "learning_rate": 1.7756108810571448e-06, + "loss": 1.0767, + "step": 135249 + }, + { + "epoch": 1.63, + "grad_norm": 7.582344274770864, + "learning_rate": 1.775278583404194e-06, + "loss": 1.1966, + "step": 135252 + }, + { + "epoch": 1.63, + "grad_norm": 12.6421774994632, + "learning_rate": 1.7749463138192546e-06, + "loss": 1.2048, + "step": 135255 + }, + { + "epoch": 1.63, + "grad_norm": 13.06377590577309, + "learning_rate": 1.7746140723034599e-06, + "loss": 1.315, + "step": 135258 + }, + { + "epoch": 1.63, + "grad_norm": 7.1907690890510105, + "learning_rate": 1.7742818588579458e-06, + "loss": 1.1953, + "step": 135261 + }, + { + "epoch": 1.63, + "grad_norm": 9.459103930032628, + "learning_rate": 1.7739496734838468e-06, + "loss": 1.1487, + "step": 135264 + }, + { + "epoch": 1.63, + "grad_norm": 10.122112968417426, + "learning_rate": 1.7736175161822921e-06, + "loss": 1.0673, + "step": 135267 + }, + { + "epoch": 1.63, + "grad_norm": 22.681950163972086, + "learning_rate": 1.7732853869544208e-06, + "loss": 1.2922, + "step": 135270 + }, + { + "epoch": 1.63, + "grad_norm": 9.226581236514397, + "learning_rate": 1.77295328580136e-06, + "loss": 1.0804, + "step": 135273 + }, + { + "epoch": 1.63, + "grad_norm": 9.82679276542157, + "learning_rate": 1.7726212127242492e-06, + "loss": 1.1581, + "step": 135276 + }, + { + "epoch": 1.63, + "grad_norm": 16.853863312622188, + "learning_rate": 1.772289167724216e-06, + "loss": 1.3123, + "step": 135279 + }, + { + "epoch": 1.63, + "grad_norm": 6.277232752653457, + "learning_rate": 1.771957150802396e-06, + "loss": 0.8847, + "step": 135282 + }, + { + "epoch": 1.63, + "grad_norm": 15.335986666447212, + "learning_rate": 1.7716251619599223e-06, + "loss": 1.1129, + "step": 135285 + }, + { + "epoch": 1.63, + "grad_norm": 8.085306337954986, + "learning_rate": 1.7712932011979312e-06, + "loss": 1.0614, + "step": 135288 + }, + { + "epoch": 1.63, + "grad_norm": 14.693243549699428, + "learning_rate": 1.7709612685175526e-06, + "loss": 1.3143, + "step": 135291 + }, + { + "epoch": 1.63, + "grad_norm": 8.63643048048031, + "learning_rate": 1.7706293639199156e-06, + "loss": 1.5685, + "step": 135294 + }, + { + "epoch": 1.63, + "grad_norm": 3.6780614260861806, + "learning_rate": 1.7702974874061573e-06, + "loss": 1.7542, + "step": 135297 + }, + { + "epoch": 1.63, + "grad_norm": 6.274146696305951, + "learning_rate": 1.769965638977409e-06, + "loss": 1.0599, + "step": 135300 + }, + { + "epoch": 1.63, + "grad_norm": 5.743894924678591, + "learning_rate": 1.7696338186348061e-06, + "loss": 1.3498, + "step": 135303 + }, + { + "epoch": 1.63, + "grad_norm": 9.313776291793447, + "learning_rate": 1.7693020263794757e-06, + "loss": 1.354, + "step": 135306 + }, + { + "epoch": 1.63, + "grad_norm": 17.325446389755527, + "learning_rate": 1.7689702622125536e-06, + "loss": 1.1512, + "step": 135309 + }, + { + "epoch": 1.63, + "grad_norm": 14.801515841809953, + "learning_rate": 1.7686385261351724e-06, + "loss": 1.1998, + "step": 135312 + }, + { + "epoch": 1.63, + "grad_norm": 5.942451944060324, + "learning_rate": 1.768306818148462e-06, + "loss": 1.0815, + "step": 135315 + }, + { + "epoch": 1.63, + "grad_norm": 11.490110405169263, + "learning_rate": 1.767975138253557e-06, + "loss": 1.1455, + "step": 135318 + }, + { + "epoch": 1.63, + "grad_norm": 1.951741001154141, + "learning_rate": 1.7676434864515846e-06, + "loss": 1.1883, + "step": 135321 + }, + { + "epoch": 1.63, + "grad_norm": 5.3964397243423585, + "learning_rate": 1.7673118627436804e-06, + "loss": 0.866, + "step": 135324 + }, + { + "epoch": 1.63, + "grad_norm": 4.210131455619498, + "learning_rate": 1.7669802671309755e-06, + "loss": 1.2938, + "step": 135327 + }, + { + "epoch": 1.63, + "grad_norm": 16.89608620452104, + "learning_rate": 1.7666486996146038e-06, + "loss": 1.3304, + "step": 135330 + }, + { + "epoch": 1.63, + "grad_norm": 21.300054756788743, + "learning_rate": 1.7663171601956952e-06, + "loss": 1.0747, + "step": 135333 + }, + { + "epoch": 1.63, + "grad_norm": 8.698853759483113, + "learning_rate": 1.7659856488753767e-06, + "loss": 0.9322, + "step": 135336 + }, + { + "epoch": 1.63, + "grad_norm": 12.53397103431952, + "learning_rate": 1.765654165654783e-06, + "loss": 0.9691, + "step": 135339 + }, + { + "epoch": 1.63, + "grad_norm": 13.569228592335415, + "learning_rate": 1.7653227105350469e-06, + "loss": 0.9303, + "step": 135342 + }, + { + "epoch": 1.63, + "grad_norm": 4.255625258830988, + "learning_rate": 1.7649912835173e-06, + "loss": 0.9187, + "step": 135345 + }, + { + "epoch": 1.63, + "grad_norm": 5.432588713826978, + "learning_rate": 1.7646598846026696e-06, + "loss": 1.324, + "step": 135348 + }, + { + "epoch": 1.63, + "grad_norm": 3.7652318330731482, + "learning_rate": 1.7643285137922883e-06, + "loss": 1.1373, + "step": 135351 + }, + { + "epoch": 1.63, + "grad_norm": 4.126029908546224, + "learning_rate": 1.7639971710872905e-06, + "loss": 0.9555, + "step": 135354 + }, + { + "epoch": 1.63, + "grad_norm": 8.337356467597278, + "learning_rate": 1.7636658564888032e-06, + "loss": 1.3673, + "step": 135357 + }, + { + "epoch": 1.63, + "grad_norm": 8.362974012338334, + "learning_rate": 1.7633345699979554e-06, + "loss": 1.0843, + "step": 135360 + }, + { + "epoch": 1.63, + "grad_norm": 6.449085098012915, + "learning_rate": 1.7630033116158795e-06, + "loss": 1.2407, + "step": 135363 + }, + { + "epoch": 1.63, + "grad_norm": 18.890087250121642, + "learning_rate": 1.762672081343706e-06, + "loss": 1.2228, + "step": 135366 + }, + { + "epoch": 1.63, + "grad_norm": 11.821448796148228, + "learning_rate": 1.7623408791825659e-06, + "loss": 1.2365, + "step": 135369 + }, + { + "epoch": 1.63, + "grad_norm": 11.379330720648452, + "learning_rate": 1.7620097051335917e-06, + "loss": 1.216, + "step": 135372 + }, + { + "epoch": 1.63, + "grad_norm": 20.218865679580663, + "learning_rate": 1.7616785591979112e-06, + "loss": 0.9386, + "step": 135375 + }, + { + "epoch": 1.63, + "grad_norm": 7.486996629727675, + "learning_rate": 1.7613474413766518e-06, + "loss": 1.1285, + "step": 135378 + }, + { + "epoch": 1.63, + "grad_norm": 3.9833756633636184, + "learning_rate": 1.7610163516709467e-06, + "loss": 1.248, + "step": 135381 + }, + { + "epoch": 1.63, + "grad_norm": 4.983894973026639, + "learning_rate": 1.760685290081927e-06, + "loss": 1.0717, + "step": 135384 + }, + { + "epoch": 1.63, + "grad_norm": 19.708327157689858, + "learning_rate": 1.760354256610718e-06, + "loss": 1.2746, + "step": 135387 + }, + { + "epoch": 1.63, + "grad_norm": 3.8251269445430944, + "learning_rate": 1.7600232512584514e-06, + "loss": 1.3134, + "step": 135390 + }, + { + "epoch": 1.63, + "grad_norm": 4.305586747820265, + "learning_rate": 1.759692274026259e-06, + "loss": 0.891, + "step": 135393 + }, + { + "epoch": 1.63, + "grad_norm": 15.153846640852509, + "learning_rate": 1.7593613249152696e-06, + "loss": 1.1567, + "step": 135396 + }, + { + "epoch": 1.63, + "grad_norm": 3.5608259665888107, + "learning_rate": 1.7590304039266126e-06, + "loss": 1.1887, + "step": 135399 + }, + { + "epoch": 1.63, + "grad_norm": 6.299720639931536, + "learning_rate": 1.7586995110614148e-06, + "loss": 1.2522, + "step": 135402 + }, + { + "epoch": 1.63, + "grad_norm": 10.07119889023141, + "learning_rate": 1.758368646320806e-06, + "loss": 1.1925, + "step": 135405 + }, + { + "epoch": 1.63, + "grad_norm": 17.034525243132922, + "learning_rate": 1.758037809705916e-06, + "loss": 1.3405, + "step": 135408 + }, + { + "epoch": 1.63, + "grad_norm": 8.741974547453765, + "learning_rate": 1.7577070012178776e-06, + "loss": 1.1582, + "step": 135411 + }, + { + "epoch": 1.63, + "grad_norm": 9.499800958209255, + "learning_rate": 1.7573762208578127e-06, + "loss": 1.1028, + "step": 135414 + }, + { + "epoch": 1.63, + "grad_norm": 10.024291609812224, + "learning_rate": 1.7570454686268578e-06, + "loss": 0.9621, + "step": 135417 + }, + { + "epoch": 1.63, + "grad_norm": 11.721767351375666, + "learning_rate": 1.7567147445261345e-06, + "loss": 1.1925, + "step": 135420 + }, + { + "epoch": 1.63, + "grad_norm": 23.540100821628194, + "learning_rate": 1.756384048556774e-06, + "loss": 1.3436, + "step": 135423 + }, + { + "epoch": 1.63, + "grad_norm": 5.704816605823765, + "learning_rate": 1.7560533807199086e-06, + "loss": 0.9216, + "step": 135426 + }, + { + "epoch": 1.63, + "grad_norm": 10.8933030304893, + "learning_rate": 1.7557227410166599e-06, + "loss": 1.116, + "step": 135429 + }, + { + "epoch": 1.63, + "grad_norm": 5.780492897108638, + "learning_rate": 1.7553921294481612e-06, + "loss": 1.0349, + "step": 135432 + }, + { + "epoch": 1.63, + "grad_norm": 18.82763799224296, + "learning_rate": 1.7550615460155397e-06, + "loss": 1.1212, + "step": 135435 + }, + { + "epoch": 1.63, + "grad_norm": 6.094249448799251, + "learning_rate": 1.754730990719925e-06, + "loss": 0.9373, + "step": 135438 + }, + { + "epoch": 1.63, + "grad_norm": 4.311246400313564, + "learning_rate": 1.7544004635624434e-06, + "loss": 1.1457, + "step": 135441 + }, + { + "epoch": 1.63, + "grad_norm": 7.230262593055353, + "learning_rate": 1.7540699645442205e-06, + "loss": 1.1062, + "step": 135444 + }, + { + "epoch": 1.63, + "grad_norm": 7.736271055534162, + "learning_rate": 1.7537394936663875e-06, + "loss": 1.2376, + "step": 135447 + }, + { + "epoch": 1.63, + "grad_norm": 6.967145113290165, + "learning_rate": 1.7534090509300706e-06, + "loss": 1.0627, + "step": 135450 + }, + { + "epoch": 1.63, + "grad_norm": 10.329684940175936, + "learning_rate": 1.7530786363364006e-06, + "loss": 1.0672, + "step": 135453 + }, + { + "epoch": 1.63, + "grad_norm": 6.657230135533803, + "learning_rate": 1.7527482498865e-06, + "loss": 1.1646, + "step": 135456 + }, + { + "epoch": 1.63, + "grad_norm": 7.0662979761224, + "learning_rate": 1.7524178915815027e-06, + "loss": 1.1952, + "step": 135459 + }, + { + "epoch": 1.63, + "grad_norm": 3.841148538557246, + "learning_rate": 1.7520875614225286e-06, + "loss": 1.0715, + "step": 135462 + }, + { + "epoch": 1.63, + "grad_norm": 2.9208003021004583, + "learning_rate": 1.7517572594107112e-06, + "loss": 1.3921, + "step": 135465 + }, + { + "epoch": 1.63, + "grad_norm": 4.9495791934271285, + "learning_rate": 1.7514269855471744e-06, + "loss": 1.1703, + "step": 135468 + }, + { + "epoch": 1.63, + "grad_norm": 3.283670018264965, + "learning_rate": 1.7510967398330448e-06, + "loss": 1.3634, + "step": 135471 + }, + { + "epoch": 1.63, + "grad_norm": 9.398411089831912, + "learning_rate": 1.7507665222694502e-06, + "loss": 1.2014, + "step": 135474 + }, + { + "epoch": 1.63, + "grad_norm": 17.298031660622975, + "learning_rate": 1.750436332857519e-06, + "loss": 0.942, + "step": 135477 + }, + { + "epoch": 1.63, + "grad_norm": 4.587264145208165, + "learning_rate": 1.7501061715983803e-06, + "loss": 1.1209, + "step": 135480 + }, + { + "epoch": 1.63, + "grad_norm": 6.734077306714425, + "learning_rate": 1.7497760384931573e-06, + "loss": 1.179, + "step": 135483 + }, + { + "epoch": 1.63, + "grad_norm": 1.8700242570231087, + "learning_rate": 1.7494459335429737e-06, + "loss": 1.0458, + "step": 135486 + }, + { + "epoch": 1.63, + "grad_norm": 4.443649754584434, + "learning_rate": 1.7491158567489597e-06, + "loss": 0.9845, + "step": 135489 + }, + { + "epoch": 1.63, + "grad_norm": 14.200506495535324, + "learning_rate": 1.7487858081122434e-06, + "loss": 1.154, + "step": 135492 + }, + { + "epoch": 1.63, + "grad_norm": 3.1429903712828176, + "learning_rate": 1.7484557876339469e-06, + "loss": 1.0692, + "step": 135495 + }, + { + "epoch": 1.63, + "grad_norm": 2.2258454640290677, + "learning_rate": 1.7481257953151987e-06, + "loss": 1.516, + "step": 135498 + }, + { + "epoch": 1.63, + "grad_norm": 4.3089745683918865, + "learning_rate": 1.7477958311571274e-06, + "loss": 1.3769, + "step": 135501 + }, + { + "epoch": 1.63, + "grad_norm": 6.858098779350817, + "learning_rate": 1.7474658951608537e-06, + "loss": 1.1139, + "step": 135504 + }, + { + "epoch": 1.63, + "grad_norm": 4.202001567374497, + "learning_rate": 1.7471359873275095e-06, + "loss": 1.1764, + "step": 135507 + }, + { + "epoch": 1.63, + "grad_norm": 8.156696526408252, + "learning_rate": 1.7468061076582144e-06, + "loss": 1.4655, + "step": 135510 + }, + { + "epoch": 1.63, + "grad_norm": 22.90752053594858, + "learning_rate": 1.746476256154097e-06, + "loss": 0.9339, + "step": 135513 + }, + { + "epoch": 1.63, + "grad_norm": 15.074585974167775, + "learning_rate": 1.7461464328162835e-06, + "loss": 1.2855, + "step": 135516 + }, + { + "epoch": 1.63, + "grad_norm": 18.16786671342861, + "learning_rate": 1.7458166376459028e-06, + "loss": 0.7315, + "step": 135519 + }, + { + "epoch": 1.63, + "grad_norm": 10.036787530471885, + "learning_rate": 1.7454868706440731e-06, + "loss": 1.047, + "step": 135522 + }, + { + "epoch": 1.63, + "grad_norm": 10.83110229852332, + "learning_rate": 1.7451571318119265e-06, + "loss": 1.1893, + "step": 135525 + }, + { + "epoch": 1.63, + "grad_norm": 7.242315319082689, + "learning_rate": 1.7448274211505822e-06, + "loss": 1.388, + "step": 135528 + }, + { + "epoch": 1.63, + "grad_norm": 10.373119583583339, + "learning_rate": 1.7444977386611683e-06, + "loss": 1.0719, + "step": 135531 + }, + { + "epoch": 1.63, + "grad_norm": 10.139950128252886, + "learning_rate": 1.7441680843448127e-06, + "loss": 1.3609, + "step": 135534 + }, + { + "epoch": 1.63, + "grad_norm": 14.927154107971619, + "learning_rate": 1.7438384582026358e-06, + "loss": 1.072, + "step": 135537 + }, + { + "epoch": 1.63, + "grad_norm": 2.9020604466659075, + "learning_rate": 1.743508860235764e-06, + "loss": 1.341, + "step": 135540 + }, + { + "epoch": 1.63, + "grad_norm": 4.846820042728317, + "learning_rate": 1.7431792904453248e-06, + "loss": 0.7616, + "step": 135543 + }, + { + "epoch": 1.63, + "grad_norm": 4.6196859059931965, + "learning_rate": 1.742849748832438e-06, + "loss": 0.9543, + "step": 135546 + }, + { + "epoch": 1.63, + "grad_norm": 9.083603826615446, + "learning_rate": 1.7425202353982328e-06, + "loss": 1.157, + "step": 135549 + }, + { + "epoch": 1.63, + "grad_norm": 3.285024576206145, + "learning_rate": 1.7421907501438306e-06, + "loss": 1.3378, + "step": 135552 + }, + { + "epoch": 1.63, + "grad_norm": 7.7638972088871006, + "learning_rate": 1.741861293070356e-06, + "loss": 1.2334, + "step": 135555 + }, + { + "epoch": 1.63, + "grad_norm": 30.135368905762036, + "learning_rate": 1.7415318641789348e-06, + "loss": 1.186, + "step": 135558 + }, + { + "epoch": 1.63, + "grad_norm": 19.061941298852084, + "learning_rate": 1.7412024634706925e-06, + "loss": 1.2201, + "step": 135561 + }, + { + "epoch": 1.63, + "grad_norm": 7.125346065670425, + "learning_rate": 1.7408730909467496e-06, + "loss": 1.1878, + "step": 135564 + }, + { + "epoch": 1.63, + "grad_norm": 9.006436137109258, + "learning_rate": 1.7405437466082332e-06, + "loss": 1.5624, + "step": 135567 + }, + { + "epoch": 1.63, + "grad_norm": 22.869882482588377, + "learning_rate": 1.7402144304562652e-06, + "loss": 0.9076, + "step": 135570 + }, + { + "epoch": 1.63, + "grad_norm": 11.399495093097205, + "learning_rate": 1.7398851424919715e-06, + "loss": 1.2011, + "step": 135573 + }, + { + "epoch": 1.63, + "grad_norm": 22.94119724945913, + "learning_rate": 1.7395558827164716e-06, + "loss": 1.0543, + "step": 135576 + }, + { + "epoch": 1.63, + "grad_norm": 8.33570718613112, + "learning_rate": 1.739226651130893e-06, + "loss": 1.2388, + "step": 135579 + }, + { + "epoch": 1.63, + "grad_norm": 4.525172138247337, + "learning_rate": 1.7388974477363585e-06, + "loss": 1.3877, + "step": 135582 + }, + { + "epoch": 1.63, + "grad_norm": 11.705395097156337, + "learning_rate": 1.7385682725339936e-06, + "loss": 0.9674, + "step": 135585 + }, + { + "epoch": 1.63, + "grad_norm": 16.835358427737475, + "learning_rate": 1.7382391255249166e-06, + "loss": 1.4632, + "step": 135588 + }, + { + "epoch": 1.63, + "grad_norm": 10.337716581546285, + "learning_rate": 1.7379100067102562e-06, + "loss": 1.0707, + "step": 135591 + }, + { + "epoch": 1.63, + "grad_norm": 5.88362835710576, + "learning_rate": 1.7375809160911305e-06, + "loss": 0.9988, + "step": 135594 + }, + { + "epoch": 1.63, + "grad_norm": 6.276759097629576, + "learning_rate": 1.7372518536686643e-06, + "loss": 0.6245, + "step": 135597 + }, + { + "epoch": 1.63, + "grad_norm": 8.697987284783194, + "learning_rate": 1.7369228194439847e-06, + "loss": 1.071, + "step": 135600 + }, + { + "epoch": 1.63, + "grad_norm": 9.572422871161573, + "learning_rate": 1.7365938134182093e-06, + "loss": 0.9532, + "step": 135603 + }, + { + "epoch": 1.63, + "grad_norm": 3.638831006565116, + "learning_rate": 1.7362648355924617e-06, + "loss": 1.1487, + "step": 135606 + }, + { + "epoch": 1.63, + "grad_norm": 3.910506699094489, + "learning_rate": 1.7359358859678688e-06, + "loss": 1.3086, + "step": 135609 + }, + { + "epoch": 1.63, + "grad_norm": 14.415023274365566, + "learning_rate": 1.7356069645455464e-06, + "loss": 1.0591, + "step": 135612 + }, + { + "epoch": 1.63, + "grad_norm": 18.56094108221927, + "learning_rate": 1.7352780713266236e-06, + "loss": 1.256, + "step": 135615 + }, + { + "epoch": 1.63, + "grad_norm": 6.581040445893116, + "learning_rate": 1.7349492063122175e-06, + "loss": 0.9997, + "step": 135618 + }, + { + "epoch": 1.63, + "grad_norm": 7.863293570986444, + "learning_rate": 1.7346203695034535e-06, + "loss": 1.0564, + "step": 135621 + }, + { + "epoch": 1.63, + "grad_norm": 14.474080282374377, + "learning_rate": 1.7342915609014522e-06, + "loss": 1.1197, + "step": 135624 + }, + { + "epoch": 1.63, + "grad_norm": 32.587826204217656, + "learning_rate": 1.733962780507339e-06, + "loss": 1.2593, + "step": 135627 + }, + { + "epoch": 1.63, + "grad_norm": 6.482583493570996, + "learning_rate": 1.7336340283222308e-06, + "loss": 1.3877, + "step": 135630 + }, + { + "epoch": 1.63, + "grad_norm": 25.0029170689266, + "learning_rate": 1.7333053043472547e-06, + "loss": 1.1767, + "step": 135633 + }, + { + "epoch": 1.63, + "grad_norm": 10.30239360942243, + "learning_rate": 1.7329766085835276e-06, + "loss": 1.077, + "step": 135636 + }, + { + "epoch": 1.63, + "grad_norm": 11.447749262261421, + "learning_rate": 1.7326479410321727e-06, + "loss": 1.1484, + "step": 135639 + }, + { + "epoch": 1.63, + "grad_norm": 10.143479896440464, + "learning_rate": 1.7323193016943162e-06, + "loss": 0.9535, + "step": 135642 + }, + { + "epoch": 1.63, + "grad_norm": 4.735509953314044, + "learning_rate": 1.7319906905710736e-06, + "loss": 1.1422, + "step": 135645 + }, + { + "epoch": 1.63, + "grad_norm": 3.9262170573220656, + "learning_rate": 1.7316621076635675e-06, + "loss": 0.9431, + "step": 135648 + }, + { + "epoch": 1.63, + "grad_norm": 7.603392078206762, + "learning_rate": 1.7313335529729236e-06, + "loss": 1.4334, + "step": 135651 + }, + { + "epoch": 1.63, + "grad_norm": 13.345444924691362, + "learning_rate": 1.7310050265002598e-06, + "loss": 1.1202, + "step": 135654 + }, + { + "epoch": 1.63, + "grad_norm": 6.810744143531476, + "learning_rate": 1.7306765282466954e-06, + "loss": 1.199, + "step": 135657 + }, + { + "epoch": 1.63, + "grad_norm": 12.518933878813188, + "learning_rate": 1.7303480582133526e-06, + "loss": 1.2757, + "step": 135660 + }, + { + "epoch": 1.63, + "grad_norm": 20.114758713946802, + "learning_rate": 1.7300196164013528e-06, + "loss": 1.3691, + "step": 135663 + }, + { + "epoch": 1.63, + "grad_norm": 7.860610158152159, + "learning_rate": 1.7296912028118184e-06, + "loss": 0.9717, + "step": 135666 + }, + { + "epoch": 1.63, + "grad_norm": 9.645225141308247, + "learning_rate": 1.729362817445871e-06, + "loss": 1.3092, + "step": 135669 + }, + { + "epoch": 1.63, + "grad_norm": 7.870212141668707, + "learning_rate": 1.729034460304626e-06, + "loss": 1.1592, + "step": 135672 + }, + { + "epoch": 1.63, + "grad_norm": 16.33194361767591, + "learning_rate": 1.7287061313892105e-06, + "loss": 0.9593, + "step": 135675 + }, + { + "epoch": 1.63, + "grad_norm": 6.090107594416918, + "learning_rate": 1.7283778307007392e-06, + "loss": 1.2017, + "step": 135678 + }, + { + "epoch": 1.63, + "grad_norm": 3.5971989742076187, + "learning_rate": 1.7280495582403367e-06, + "loss": 1.2213, + "step": 135681 + }, + { + "epoch": 1.63, + "grad_norm": 7.840622991678673, + "learning_rate": 1.7277213140091198e-06, + "loss": 0.8922, + "step": 135684 + }, + { + "epoch": 1.63, + "grad_norm": 14.95852829021582, + "learning_rate": 1.7273930980082098e-06, + "loss": 0.8307, + "step": 135687 + }, + { + "epoch": 1.63, + "grad_norm": 5.575397037497094, + "learning_rate": 1.7270649102387283e-06, + "loss": 0.8787, + "step": 135690 + }, + { + "epoch": 1.63, + "grad_norm": 7.058647388007072, + "learning_rate": 1.7267367507017963e-06, + "loss": 1.3193, + "step": 135693 + }, + { + "epoch": 1.63, + "grad_norm": 29.850636531635086, + "learning_rate": 1.7264086193985319e-06, + "loss": 1.0651, + "step": 135696 + }, + { + "epoch": 1.63, + "grad_norm": 5.558590344329038, + "learning_rate": 1.726080516330052e-06, + "loss": 1.1608, + "step": 135699 + }, + { + "epoch": 1.63, + "grad_norm": 4.606033040832419, + "learning_rate": 1.7257524414974792e-06, + "loss": 1.4612, + "step": 135702 + }, + { + "epoch": 1.63, + "grad_norm": 2.7904776049244755, + "learning_rate": 1.7254243949019323e-06, + "loss": 1.4133, + "step": 135705 + }, + { + "epoch": 1.63, + "grad_norm": 4.311718433427658, + "learning_rate": 1.725096376544535e-06, + "loss": 1.219, + "step": 135708 + }, + { + "epoch": 1.63, + "grad_norm": 6.1835424508716015, + "learning_rate": 1.7247683864263999e-06, + "loss": 1.1751, + "step": 135711 + }, + { + "epoch": 1.63, + "grad_norm": 5.031888543114464, + "learning_rate": 1.7244404245486502e-06, + "loss": 1.3103, + "step": 135714 + }, + { + "epoch": 1.63, + "grad_norm": 3.3652463002076676, + "learning_rate": 1.7241124909124062e-06, + "loss": 1.4321, + "step": 135717 + }, + { + "epoch": 1.63, + "grad_norm": 5.908258941694471, + "learning_rate": 1.723784585518784e-06, + "loss": 1.0325, + "step": 135720 + }, + { + "epoch": 1.63, + "grad_norm": 39.23325652111082, + "learning_rate": 1.7234567083689046e-06, + "loss": 1.258, + "step": 135723 + }, + { + "epoch": 1.63, + "grad_norm": 16.346520233099504, + "learning_rate": 1.723128859463885e-06, + "loss": 1.3171, + "step": 135726 + }, + { + "epoch": 1.63, + "grad_norm": 5.6379814333217535, + "learning_rate": 1.7228010388048454e-06, + "loss": 1.3766, + "step": 135729 + }, + { + "epoch": 1.63, + "grad_norm": 9.121605937878916, + "learning_rate": 1.7224732463929038e-06, + "loss": 1.0922, + "step": 135732 + }, + { + "epoch": 1.63, + "grad_norm": 5.547042466263551, + "learning_rate": 1.7221454822291828e-06, + "loss": 0.8849, + "step": 135735 + }, + { + "epoch": 1.63, + "grad_norm": 4.306341400627167, + "learning_rate": 1.7218177463147968e-06, + "loss": 1.0977, + "step": 135738 + }, + { + "epoch": 1.63, + "grad_norm": 14.812726869591419, + "learning_rate": 1.7214900386508637e-06, + "loss": 1.1866, + "step": 135741 + }, + { + "epoch": 1.63, + "grad_norm": 5.8759922514711675, + "learning_rate": 1.721162359238502e-06, + "loss": 0.9597, + "step": 135744 + }, + { + "epoch": 1.63, + "grad_norm": 11.689830436487997, + "learning_rate": 1.7208347080788313e-06, + "loss": 1.0721, + "step": 135747 + }, + { + "epoch": 1.63, + "grad_norm": 22.727954154880056, + "learning_rate": 1.7205070851729722e-06, + "loss": 1.2378, + "step": 135750 + }, + { + "epoch": 1.63, + "grad_norm": 15.123732347535546, + "learning_rate": 1.720179490522038e-06, + "loss": 1.1206, + "step": 135753 + }, + { + "epoch": 1.63, + "grad_norm": 4.511067547561787, + "learning_rate": 1.7198519241271483e-06, + "loss": 1.2688, + "step": 135756 + }, + { + "epoch": 1.63, + "grad_norm": 23.88702214991555, + "learning_rate": 1.719524385989424e-06, + "loss": 1.2995, + "step": 135759 + }, + { + "epoch": 1.63, + "grad_norm": 9.335698757333533, + "learning_rate": 1.719196876109981e-06, + "loss": 1.366, + "step": 135762 + }, + { + "epoch": 1.63, + "grad_norm": 18.490909706711943, + "learning_rate": 1.7188693944899337e-06, + "loss": 1.3455, + "step": 135765 + }, + { + "epoch": 1.63, + "grad_norm": 4.801647479332336, + "learning_rate": 1.7185419411304017e-06, + "loss": 1.3254, + "step": 135768 + }, + { + "epoch": 1.63, + "grad_norm": 11.236213646448011, + "learning_rate": 1.7182145160325037e-06, + "loss": 1.0593, + "step": 135771 + }, + { + "epoch": 1.63, + "grad_norm": 6.151801328596754, + "learning_rate": 1.7178871191973556e-06, + "loss": 1.1563, + "step": 135774 + }, + { + "epoch": 1.63, + "grad_norm": 12.818936995824583, + "learning_rate": 1.7175597506260789e-06, + "loss": 1.4635, + "step": 135777 + }, + { + "epoch": 1.63, + "grad_norm": 9.555823745909576, + "learning_rate": 1.7172324103197879e-06, + "loss": 1.4253, + "step": 135780 + }, + { + "epoch": 1.63, + "grad_norm": 12.351403268887838, + "learning_rate": 1.7169050982795953e-06, + "loss": 1.0367, + "step": 135783 + }, + { + "epoch": 1.63, + "grad_norm": 23.91167074734178, + "learning_rate": 1.7165778145066237e-06, + "loss": 1.178, + "step": 135786 + }, + { + "epoch": 1.63, + "grad_norm": 4.049959413639572, + "learning_rate": 1.7162505590019896e-06, + "loss": 0.9882, + "step": 135789 + }, + { + "epoch": 1.63, + "grad_norm": 6.795419336421022, + "learning_rate": 1.7159233317668066e-06, + "loss": 1.255, + "step": 135792 + }, + { + "epoch": 1.63, + "grad_norm": 6.630631043262901, + "learning_rate": 1.715596132802193e-06, + "loss": 1.0181, + "step": 135795 + }, + { + "epoch": 1.63, + "grad_norm": 8.763287003054547, + "learning_rate": 1.7152689621092655e-06, + "loss": 1.1826, + "step": 135798 + }, + { + "epoch": 1.63, + "grad_norm": 58.938872718194915, + "learning_rate": 1.7149418196891444e-06, + "loss": 1.1232, + "step": 135801 + }, + { + "epoch": 1.63, + "grad_norm": 15.174944982638205, + "learning_rate": 1.714614705542942e-06, + "loss": 1.1235, + "step": 135804 + }, + { + "epoch": 1.63, + "grad_norm": 8.519399473184178, + "learning_rate": 1.7142876196717729e-06, + "loss": 0.9922, + "step": 135807 + }, + { + "epoch": 1.63, + "grad_norm": 8.259630520899695, + "learning_rate": 1.7139605620767553e-06, + "loss": 0.8003, + "step": 135810 + }, + { + "epoch": 1.63, + "grad_norm": 4.7096405021247305, + "learning_rate": 1.713633532759006e-06, + "loss": 0.799, + "step": 135813 + }, + { + "epoch": 1.63, + "grad_norm": 6.921971619026741, + "learning_rate": 1.7133065317196428e-06, + "loss": 0.9458, + "step": 135816 + }, + { + "epoch": 1.63, + "grad_norm": 11.828043538010187, + "learning_rate": 1.7129795589597776e-06, + "loss": 0.8753, + "step": 135819 + }, + { + "epoch": 1.63, + "grad_norm": 7.970827973054307, + "learning_rate": 1.71265261448053e-06, + "loss": 1.5387, + "step": 135822 + }, + { + "epoch": 1.63, + "grad_norm": 3.6371702735934632, + "learning_rate": 1.7123256982830116e-06, + "loss": 1.0571, + "step": 135825 + }, + { + "epoch": 1.63, + "grad_norm": 10.767439080483209, + "learning_rate": 1.7119988103683416e-06, + "loss": 1.3194, + "step": 135828 + }, + { + "epoch": 1.63, + "grad_norm": 11.107041187906203, + "learning_rate": 1.7116719507376356e-06, + "loss": 1.4556, + "step": 135831 + }, + { + "epoch": 1.63, + "grad_norm": 7.767301469729522, + "learning_rate": 1.711345119392005e-06, + "loss": 1.0582, + "step": 135834 + }, + { + "epoch": 1.63, + "grad_norm": 7.672021388428932, + "learning_rate": 1.7110183163325688e-06, + "loss": 1.3633, + "step": 135837 + }, + { + "epoch": 1.63, + "grad_norm": 3.4206020216399757, + "learning_rate": 1.7106915415604407e-06, + "loss": 1.2591, + "step": 135840 + }, + { + "epoch": 1.63, + "grad_norm": 5.829782305657542, + "learning_rate": 1.71036479507674e-06, + "loss": 1.0652, + "step": 135843 + }, + { + "epoch": 1.63, + "grad_norm": 5.775068483673386, + "learning_rate": 1.7100380768825774e-06, + "loss": 1.028, + "step": 135846 + }, + { + "epoch": 1.63, + "grad_norm": 11.196104730076964, + "learning_rate": 1.709711386979067e-06, + "loss": 1.0242, + "step": 135849 + }, + { + "epoch": 1.63, + "grad_norm": 11.32504076370065, + "learning_rate": 1.7093847253673256e-06, + "loss": 0.987, + "step": 135852 + }, + { + "epoch": 1.63, + "grad_norm": 8.332066971129278, + "learning_rate": 1.7090580920484677e-06, + "loss": 1.0839, + "step": 135855 + }, + { + "epoch": 1.63, + "grad_norm": 10.445255492222001, + "learning_rate": 1.7087314870236104e-06, + "loss": 1.4203, + "step": 135858 + }, + { + "epoch": 1.63, + "grad_norm": 20.203930320301208, + "learning_rate": 1.7084049102938637e-06, + "loss": 1.0601, + "step": 135861 + }, + { + "epoch": 1.63, + "grad_norm": 21.932043462414264, + "learning_rate": 1.7080783618603447e-06, + "loss": 1.4124, + "step": 135864 + }, + { + "epoch": 1.63, + "grad_norm": 5.279184927174845, + "learning_rate": 1.707751841724169e-06, + "loss": 1.2324, + "step": 135867 + }, + { + "epoch": 1.63, + "grad_norm": 5.932545617744469, + "learning_rate": 1.7074253498864502e-06, + "loss": 1.1071, + "step": 135870 + }, + { + "epoch": 1.63, + "grad_norm": 3.713230950497141, + "learning_rate": 1.7070988863482996e-06, + "loss": 1.3428, + "step": 135873 + }, + { + "epoch": 1.63, + "grad_norm": 12.158282801434503, + "learning_rate": 1.706772451110833e-06, + "loss": 1.1228, + "step": 135876 + }, + { + "epoch": 1.63, + "grad_norm": 10.92612344830759, + "learning_rate": 1.7064460441751641e-06, + "loss": 1.3017, + "step": 135879 + }, + { + "epoch": 1.63, + "grad_norm": 14.281067201117816, + "learning_rate": 1.7061196655424083e-06, + "loss": 1.0641, + "step": 135882 + }, + { + "epoch": 1.63, + "grad_norm": 4.677416264958794, + "learning_rate": 1.7057933152136808e-06, + "loss": 1.471, + "step": 135885 + }, + { + "epoch": 1.63, + "grad_norm": 2.4519746172575565, + "learning_rate": 1.7054669931900935e-06, + "loss": 1.0721, + "step": 135888 + }, + { + "epoch": 1.63, + "grad_norm": 9.189814433320226, + "learning_rate": 1.705140699472757e-06, + "loss": 1.3214, + "step": 135891 + }, + { + "epoch": 1.63, + "grad_norm": 5.365596936789993, + "learning_rate": 1.704814434062787e-06, + "loss": 1.3565, + "step": 135894 + }, + { + "epoch": 1.63, + "grad_norm": 10.614210483847328, + "learning_rate": 1.7044881969613014e-06, + "loss": 1.6464, + "step": 135897 + }, + { + "epoch": 1.63, + "grad_norm": 4.127813201954437, + "learning_rate": 1.704161988169406e-06, + "loss": 1.3407, + "step": 135900 + }, + { + "epoch": 1.63, + "grad_norm": 7.526032243460351, + "learning_rate": 1.7038358076882177e-06, + "loss": 0.9051, + "step": 135903 + }, + { + "epoch": 1.63, + "grad_norm": 5.1665887050147985, + "learning_rate": 1.703509655518849e-06, + "loss": 1.0553, + "step": 135906 + }, + { + "epoch": 1.63, + "grad_norm": 8.820740270145807, + "learning_rate": 1.7031835316624167e-06, + "loss": 1.2169, + "step": 135909 + }, + { + "epoch": 1.63, + "grad_norm": 10.755673142111352, + "learning_rate": 1.70285743612003e-06, + "loss": 1.0112, + "step": 135912 + }, + { + "epoch": 1.63, + "grad_norm": 12.371284555843857, + "learning_rate": 1.7025313688928003e-06, + "loss": 1.1438, + "step": 135915 + }, + { + "epoch": 1.63, + "grad_norm": 3.584258143914882, + "learning_rate": 1.702205329981842e-06, + "loss": 1.3173, + "step": 135918 + }, + { + "epoch": 1.63, + "grad_norm": 8.12552357308758, + "learning_rate": 1.7018793193882676e-06, + "loss": 1.1444, + "step": 135921 + }, + { + "epoch": 1.63, + "grad_norm": 3.3675212034667994, + "learning_rate": 1.7015533371131932e-06, + "loss": 1.3827, + "step": 135924 + }, + { + "epoch": 1.63, + "grad_norm": 8.093759200172702, + "learning_rate": 1.7012273831577252e-06, + "loss": 1.2622, + "step": 135927 + }, + { + "epoch": 1.63, + "grad_norm": 20.92442276160707, + "learning_rate": 1.700901457522982e-06, + "loss": 1.217, + "step": 135930 + }, + { + "epoch": 1.63, + "grad_norm": 40.08077967020594, + "learning_rate": 1.7005755602100705e-06, + "loss": 0.9947, + "step": 135933 + }, + { + "epoch": 1.63, + "grad_norm": 1.9479712864582286, + "learning_rate": 1.7002496912201049e-06, + "loss": 0.7667, + "step": 135936 + }, + { + "epoch": 1.63, + "grad_norm": 4.077472918659005, + "learning_rate": 1.6999238505541992e-06, + "loss": 1.3489, + "step": 135939 + }, + { + "epoch": 1.63, + "grad_norm": 9.176893719476402, + "learning_rate": 1.6995980382134625e-06, + "loss": 1.1097, + "step": 135942 + }, + { + "epoch": 1.63, + "grad_norm": 9.656739270323852, + "learning_rate": 1.699272254199007e-06, + "loss": 1.4612, + "step": 135945 + }, + { + "epoch": 1.63, + "grad_norm": 3.1510637800919596, + "learning_rate": 1.6989464985119463e-06, + "loss": 1.0155, + "step": 135948 + }, + { + "epoch": 1.63, + "grad_norm": 4.11824661392281, + "learning_rate": 1.698620771153393e-06, + "loss": 1.1237, + "step": 135951 + }, + { + "epoch": 1.63, + "grad_norm": 10.474977703860528, + "learning_rate": 1.6982950721244573e-06, + "loss": 1.2632, + "step": 135954 + }, + { + "epoch": 1.63, + "grad_norm": 26.25878760773201, + "learning_rate": 1.697969401426247e-06, + "loss": 1.0648, + "step": 135957 + }, + { + "epoch": 1.63, + "grad_norm": 12.411950417424363, + "learning_rate": 1.697643759059877e-06, + "loss": 1.3588, + "step": 135960 + }, + { + "epoch": 1.63, + "grad_norm": 7.354496688807129, + "learning_rate": 1.6973181450264587e-06, + "loss": 0.9593, + "step": 135963 + }, + { + "epoch": 1.63, + "grad_norm": 10.570733096141929, + "learning_rate": 1.6969925593271053e-06, + "loss": 0.8491, + "step": 135966 + }, + { + "epoch": 1.63, + "grad_norm": 20.606444200838965, + "learning_rate": 1.6966670019629228e-06, + "loss": 1.2644, + "step": 135969 + }, + { + "epoch": 1.64, + "grad_norm": 8.304730374766548, + "learning_rate": 1.696341472935028e-06, + "loss": 1.1528, + "step": 135972 + }, + { + "epoch": 1.64, + "grad_norm": 16.257133249127165, + "learning_rate": 1.6960159722445258e-06, + "loss": 1.0523, + "step": 135975 + }, + { + "epoch": 1.64, + "grad_norm": 7.844557938787737, + "learning_rate": 1.6956904998925327e-06, + "loss": 1.073, + "step": 135978 + }, + { + "epoch": 1.64, + "grad_norm": 4.4435216394501955, + "learning_rate": 1.6953650558801548e-06, + "loss": 1.0709, + "step": 135981 + }, + { + "epoch": 1.64, + "grad_norm": 2.804984443034418, + "learning_rate": 1.6950396402085044e-06, + "loss": 0.8095, + "step": 135984 + }, + { + "epoch": 1.64, + "grad_norm": 7.253601918948004, + "learning_rate": 1.694714252878692e-06, + "loss": 0.9794, + "step": 135987 + }, + { + "epoch": 1.64, + "grad_norm": 10.731196069993336, + "learning_rate": 1.6943888938918286e-06, + "loss": 1.0941, + "step": 135990 + }, + { + "epoch": 1.64, + "grad_norm": 4.64850440231825, + "learning_rate": 1.6940635632490254e-06, + "loss": 1.0323, + "step": 135993 + }, + { + "epoch": 1.64, + "grad_norm": 3.7138460818467003, + "learning_rate": 1.693738260951393e-06, + "loss": 1.2206, + "step": 135996 + }, + { + "epoch": 1.64, + "grad_norm": 9.61251938436583, + "learning_rate": 1.693412987000037e-06, + "loss": 1.6614, + "step": 135999 + }, + { + "epoch": 1.64, + "grad_norm": 15.933435244715012, + "learning_rate": 1.69308774139607e-06, + "loss": 1.0502, + "step": 136002 + }, + { + "epoch": 1.64, + "grad_norm": 7.5604450272366215, + "learning_rate": 1.6927625241406055e-06, + "loss": 0.8935, + "step": 136005 + }, + { + "epoch": 1.64, + "grad_norm": 6.8374072886730115, + "learning_rate": 1.6924373352347479e-06, + "loss": 1.206, + "step": 136008 + }, + { + "epoch": 1.64, + "grad_norm": 3.883667299384546, + "learning_rate": 1.6921121746796088e-06, + "loss": 1.3885, + "step": 136011 + }, + { + "epoch": 1.64, + "grad_norm": 7.0382355314300025, + "learning_rate": 1.6917870424763017e-06, + "loss": 0.992, + "step": 136014 + }, + { + "epoch": 1.64, + "grad_norm": 11.414173240609353, + "learning_rate": 1.691461938625929e-06, + "loss": 1.1882, + "step": 136017 + }, + { + "epoch": 1.64, + "grad_norm": 6.034575756981684, + "learning_rate": 1.6911368631296077e-06, + "loss": 1.5622, + "step": 136020 + }, + { + "epoch": 1.64, + "grad_norm": 5.753406501409452, + "learning_rate": 1.6908118159884402e-06, + "loss": 1.0179, + "step": 136023 + }, + { + "epoch": 1.64, + "grad_norm": 5.000771799508796, + "learning_rate": 1.69048679720354e-06, + "loss": 1.3287, + "step": 136026 + }, + { + "epoch": 1.64, + "grad_norm": 3.809806171873657, + "learning_rate": 1.690161806776014e-06, + "loss": 1.0851, + "step": 136029 + }, + { + "epoch": 1.64, + "grad_norm": 10.895860857101454, + "learning_rate": 1.6898368447069758e-06, + "loss": 0.9574, + "step": 136032 + }, + { + "epoch": 1.64, + "grad_norm": 5.691801617576879, + "learning_rate": 1.6895119109975288e-06, + "loss": 1.2832, + "step": 136035 + }, + { + "epoch": 1.64, + "grad_norm": 4.480665525664626, + "learning_rate": 1.6891870056487858e-06, + "loss": 0.7182, + "step": 136038 + }, + { + "epoch": 1.64, + "grad_norm": 29.283830719224877, + "learning_rate": 1.6888621286618524e-06, + "loss": 1.2271, + "step": 136041 + }, + { + "epoch": 1.64, + "grad_norm": 15.145478322607959, + "learning_rate": 1.6885372800378386e-06, + "loss": 1.1907, + "step": 136044 + }, + { + "epoch": 1.64, + "grad_norm": 3.499013743728696, + "learning_rate": 1.688212459777856e-06, + "loss": 1.1545, + "step": 136047 + }, + { + "epoch": 1.64, + "grad_norm": 8.767176439708914, + "learning_rate": 1.6878876678830069e-06, + "loss": 0.732, + "step": 136050 + }, + { + "epoch": 1.64, + "grad_norm": 7.947022653644193, + "learning_rate": 1.687562904354404e-06, + "loss": 1.3055, + "step": 136053 + }, + { + "epoch": 1.64, + "grad_norm": 4.831308045448086, + "learning_rate": 1.6872381691931572e-06, + "loss": 1.3066, + "step": 136056 + }, + { + "epoch": 1.64, + "grad_norm": 3.9504252033099627, + "learning_rate": 1.6869134624003724e-06, + "loss": 1.2612, + "step": 136059 + }, + { + "epoch": 1.64, + "grad_norm": 4.48095608913355, + "learning_rate": 1.6865887839771556e-06, + "loss": 1.0832, + "step": 136062 + }, + { + "epoch": 1.64, + "grad_norm": 9.895575163076016, + "learning_rate": 1.6862641339246155e-06, + "loss": 1.3015, + "step": 136065 + }, + { + "epoch": 1.64, + "grad_norm": 3.1985522721321393, + "learning_rate": 1.6859395122438627e-06, + "loss": 1.2168, + "step": 136068 + }, + { + "epoch": 1.64, + "grad_norm": 12.219426893284018, + "learning_rate": 1.6856149189360027e-06, + "loss": 1.3946, + "step": 136071 + }, + { + "epoch": 1.64, + "grad_norm": 4.378089239328074, + "learning_rate": 1.685290354002147e-06, + "loss": 0.9101, + "step": 136074 + }, + { + "epoch": 1.64, + "grad_norm": 3.9172651722775136, + "learning_rate": 1.684965817443398e-06, + "loss": 0.6911, + "step": 136077 + }, + { + "epoch": 1.64, + "grad_norm": 5.050888564012705, + "learning_rate": 1.684641309260867e-06, + "loss": 0.8061, + "step": 136080 + }, + { + "epoch": 1.64, + "grad_norm": 6.163565853030352, + "learning_rate": 1.684316829455659e-06, + "loss": 1.0222, + "step": 136083 + }, + { + "epoch": 1.64, + "grad_norm": 3.367316651903237, + "learning_rate": 1.6839923780288846e-06, + "loss": 1.2784, + "step": 136086 + }, + { + "epoch": 1.64, + "grad_norm": 6.130296950649438, + "learning_rate": 1.6836679549816458e-06, + "loss": 1.3993, + "step": 136089 + }, + { + "epoch": 1.64, + "grad_norm": 14.329522260107016, + "learning_rate": 1.6833435603150527e-06, + "loss": 0.9123, + "step": 136092 + }, + { + "epoch": 1.64, + "grad_norm": 11.031714632373275, + "learning_rate": 1.6830191940302132e-06, + "loss": 1.1593, + "step": 136095 + }, + { + "epoch": 1.64, + "grad_norm": 5.139069358016853, + "learning_rate": 1.6826948561282353e-06, + "loss": 0.8708, + "step": 136098 + }, + { + "epoch": 1.64, + "grad_norm": 3.8710575289361993, + "learning_rate": 1.6823705466102236e-06, + "loss": 1.1018, + "step": 136101 + }, + { + "epoch": 1.64, + "grad_norm": 8.836852030047515, + "learning_rate": 1.682046265477283e-06, + "loss": 1.1898, + "step": 136104 + }, + { + "epoch": 1.64, + "grad_norm": 8.607743892936464, + "learning_rate": 1.6817220127305222e-06, + "loss": 1.0456, + "step": 136107 + }, + { + "epoch": 1.64, + "grad_norm": 10.787736157783531, + "learning_rate": 1.6813977883710486e-06, + "loss": 1.2722, + "step": 136110 + }, + { + "epoch": 1.64, + "grad_norm": 49.088268106116345, + "learning_rate": 1.68107359239997e-06, + "loss": 1.1783, + "step": 136113 + }, + { + "epoch": 1.64, + "grad_norm": 10.822718756179189, + "learning_rate": 1.6807494248183886e-06, + "loss": 1.4186, + "step": 136116 + }, + { + "epoch": 1.64, + "grad_norm": 15.59967022088374, + "learning_rate": 1.6804252856274127e-06, + "loss": 1.07, + "step": 136119 + }, + { + "epoch": 1.64, + "grad_norm": 3.5453268769077773, + "learning_rate": 1.6801011748281516e-06, + "loss": 1.2792, + "step": 136122 + }, + { + "epoch": 1.64, + "grad_norm": 5.04621250075811, + "learning_rate": 1.679777092421705e-06, + "loss": 1.3394, + "step": 136125 + }, + { + "epoch": 1.64, + "grad_norm": 3.6722493848559346, + "learning_rate": 1.6794530384091845e-06, + "loss": 1.2473, + "step": 136128 + }, + { + "epoch": 1.64, + "grad_norm": 11.613324686259968, + "learning_rate": 1.6791290127916926e-06, + "loss": 1.2207, + "step": 136131 + }, + { + "epoch": 1.64, + "grad_norm": 9.243986443164253, + "learning_rate": 1.678805015570335e-06, + "loss": 0.9489, + "step": 136134 + }, + { + "epoch": 1.64, + "grad_norm": 8.225567771148707, + "learning_rate": 1.6784810467462186e-06, + "loss": 1.0616, + "step": 136137 + }, + { + "epoch": 1.64, + "grad_norm": 4.997333405834317, + "learning_rate": 1.6781571063204526e-06, + "loss": 1.0764, + "step": 136140 + }, + { + "epoch": 1.64, + "grad_norm": 8.003462411649132, + "learning_rate": 1.6778331942941362e-06, + "loss": 1.0301, + "step": 136143 + }, + { + "epoch": 1.64, + "grad_norm": 7.210878047428302, + "learning_rate": 1.6775093106683792e-06, + "loss": 1.1817, + "step": 136146 + }, + { + "epoch": 1.64, + "grad_norm": 7.144015278437872, + "learning_rate": 1.6771854554442824e-06, + "loss": 1.1492, + "step": 136149 + }, + { + "epoch": 1.64, + "grad_norm": 21.506900740429202, + "learning_rate": 1.6768616286229545e-06, + "loss": 1.0648, + "step": 136152 + }, + { + "epoch": 1.64, + "grad_norm": 7.7733136363897755, + "learning_rate": 1.6765378302055024e-06, + "loss": 1.0731, + "step": 136155 + }, + { + "epoch": 1.64, + "grad_norm": 4.767772784963192, + "learning_rate": 1.6762140601930254e-06, + "loss": 0.963, + "step": 136158 + }, + { + "epoch": 1.64, + "grad_norm": 3.745036627685678, + "learning_rate": 1.6758903185866327e-06, + "loss": 1.3443, + "step": 136161 + }, + { + "epoch": 1.64, + "grad_norm": 8.80791194856758, + "learning_rate": 1.6755666053874297e-06, + "loss": 1.1694, + "step": 136164 + }, + { + "epoch": 1.64, + "grad_norm": 7.3243131683356495, + "learning_rate": 1.6752429205965203e-06, + "loss": 1.1346, + "step": 136167 + }, + { + "epoch": 1.64, + "grad_norm": 7.729252578577223, + "learning_rate": 1.6749192642150048e-06, + "loss": 1.2477, + "step": 136170 + }, + { + "epoch": 1.64, + "grad_norm": 11.596120706315546, + "learning_rate": 1.674595636243992e-06, + "loss": 1.3795, + "step": 136173 + }, + { + "epoch": 1.64, + "grad_norm": 7.5512799128645955, + "learning_rate": 1.6742720366845843e-06, + "loss": 0.9885, + "step": 136176 + }, + { + "epoch": 1.64, + "grad_norm": 8.192214834369677, + "learning_rate": 1.6739484655378879e-06, + "loss": 1.2412, + "step": 136179 + }, + { + "epoch": 1.64, + "grad_norm": 8.091981667682179, + "learning_rate": 1.6736249228050095e-06, + "loss": 1.14, + "step": 136182 + }, + { + "epoch": 1.64, + "grad_norm": 7.160956159404612, + "learning_rate": 1.6733014084870469e-06, + "loss": 1.1992, + "step": 136185 + }, + { + "epoch": 1.64, + "grad_norm": 5.81879994685157, + "learning_rate": 1.6729779225851095e-06, + "loss": 0.9181, + "step": 136188 + }, + { + "epoch": 1.64, + "grad_norm": 18.704028435907173, + "learning_rate": 1.6726544651002963e-06, + "loss": 1.1045, + "step": 136191 + }, + { + "epoch": 1.64, + "grad_norm": 8.849538407748113, + "learning_rate": 1.6723310360337174e-06, + "loss": 1.2764, + "step": 136194 + }, + { + "epoch": 1.64, + "grad_norm": 12.308154819252438, + "learning_rate": 1.6720076353864701e-06, + "loss": 1.0556, + "step": 136197 + }, + { + "epoch": 1.64, + "grad_norm": 3.818039300829228, + "learning_rate": 1.671684263159662e-06, + "loss": 1.0111, + "step": 136200 + }, + { + "epoch": 1.64, + "grad_norm": 3.0257097131658566, + "learning_rate": 1.6713609193543946e-06, + "loss": 1.4453, + "step": 136203 + }, + { + "epoch": 1.64, + "grad_norm": 10.02594510557768, + "learning_rate": 1.6710376039717745e-06, + "loss": 1.2632, + "step": 136206 + }, + { + "epoch": 1.64, + "grad_norm": 8.123448091419167, + "learning_rate": 1.6707143170129036e-06, + "loss": 1.2435, + "step": 136209 + }, + { + "epoch": 1.64, + "grad_norm": 6.975522114364922, + "learning_rate": 1.6703910584788818e-06, + "loss": 1.1617, + "step": 136212 + }, + { + "epoch": 1.64, + "grad_norm": 8.124442903122112, + "learning_rate": 1.670067828370816e-06, + "loss": 1.3357, + "step": 136215 + }, + { + "epoch": 1.64, + "grad_norm": 8.11169301251545, + "learning_rate": 1.6697446266898077e-06, + "loss": 0.9731, + "step": 136218 + }, + { + "epoch": 1.64, + "grad_norm": 8.477935163195426, + "learning_rate": 1.6694214534369623e-06, + "loss": 1.0103, + "step": 136221 + }, + { + "epoch": 1.64, + "grad_norm": 2.52281208120034, + "learning_rate": 1.6690983086133794e-06, + "loss": 1.2803, + "step": 136224 + }, + { + "epoch": 1.64, + "grad_norm": 9.562274793975924, + "learning_rate": 1.6687751922201623e-06, + "loss": 1.3249, + "step": 136227 + }, + { + "epoch": 1.64, + "grad_norm": 12.974706140329028, + "learning_rate": 1.6684521042584167e-06, + "loss": 1.2116, + "step": 136230 + }, + { + "epoch": 1.64, + "grad_norm": 19.306498252362317, + "learning_rate": 1.668129044729242e-06, + "loss": 0.7144, + "step": 136233 + }, + { + "epoch": 1.64, + "grad_norm": 18.335958885524747, + "learning_rate": 1.6678060136337427e-06, + "loss": 0.9099, + "step": 136236 + }, + { + "epoch": 1.64, + "grad_norm": 7.540621754175781, + "learning_rate": 1.6674830109730188e-06, + "loss": 1.417, + "step": 136239 + }, + { + "epoch": 1.64, + "grad_norm": 10.646844110618638, + "learning_rate": 1.6671600367481734e-06, + "loss": 1.0895, + "step": 136242 + }, + { + "epoch": 1.64, + "grad_norm": 6.965096854696989, + "learning_rate": 1.6668370909603104e-06, + "loss": 1.166, + "step": 136245 + }, + { + "epoch": 1.64, + "grad_norm": 4.963722839994008, + "learning_rate": 1.6665141736105317e-06, + "loss": 1.5158, + "step": 136248 + }, + { + "epoch": 1.64, + "grad_norm": 7.019245387289972, + "learning_rate": 1.6661912846999395e-06, + "loss": 0.9297, + "step": 136251 + }, + { + "epoch": 1.64, + "grad_norm": 4.951318622574467, + "learning_rate": 1.665868424229632e-06, + "loss": 1.0437, + "step": 136254 + }, + { + "epoch": 1.64, + "grad_norm": 7.195238051265554, + "learning_rate": 1.6655455922007135e-06, + "loss": 1.3997, + "step": 136257 + }, + { + "epoch": 1.64, + "grad_norm": 7.406235020450521, + "learning_rate": 1.6652227886142857e-06, + "loss": 0.7712, + "step": 136260 + }, + { + "epoch": 1.64, + "grad_norm": 3.0627698042160745, + "learning_rate": 1.6649000134714533e-06, + "loss": 1.2496, + "step": 136263 + }, + { + "epoch": 1.64, + "grad_norm": 3.8217171257889166, + "learning_rate": 1.6645772667733117e-06, + "loss": 1.355, + "step": 136266 + }, + { + "epoch": 1.64, + "grad_norm": 3.3137059494339494, + "learning_rate": 1.664254548520966e-06, + "loss": 1.5988, + "step": 136269 + }, + { + "epoch": 1.64, + "grad_norm": 3.8234531629783075, + "learning_rate": 1.6639318587155194e-06, + "loss": 1.1945, + "step": 136272 + }, + { + "epoch": 1.64, + "grad_norm": 8.371597441853991, + "learning_rate": 1.6636091973580714e-06, + "loss": 0.7317, + "step": 136275 + }, + { + "epoch": 1.64, + "grad_norm": 13.999559963951517, + "learning_rate": 1.6632865644497188e-06, + "loss": 1.3041, + "step": 136278 + }, + { + "epoch": 1.64, + "grad_norm": 14.210439807336837, + "learning_rate": 1.662963959991567e-06, + "loss": 1.4685, + "step": 136281 + }, + { + "epoch": 1.64, + "grad_norm": 8.036638223400468, + "learning_rate": 1.6626413839847155e-06, + "loss": 0.9653, + "step": 136284 + }, + { + "epoch": 1.64, + "grad_norm": 8.717961158816298, + "learning_rate": 1.6623188364302668e-06, + "loss": 0.7253, + "step": 136287 + }, + { + "epoch": 1.64, + "grad_norm": 7.107340788414147, + "learning_rate": 1.661996317329322e-06, + "loss": 1.2998, + "step": 136290 + }, + { + "epoch": 1.64, + "grad_norm": 11.179484875729527, + "learning_rate": 1.6616738266829813e-06, + "loss": 1.381, + "step": 136293 + }, + { + "epoch": 1.64, + "grad_norm": 7.00912803319647, + "learning_rate": 1.6613513644923418e-06, + "loss": 1.3588, + "step": 136296 + }, + { + "epoch": 1.64, + "grad_norm": 12.805295495714947, + "learning_rate": 1.661028930758507e-06, + "loss": 1.3212, + "step": 136299 + }, + { + "epoch": 1.64, + "grad_norm": 14.203508813175764, + "learning_rate": 1.6607065254825783e-06, + "loss": 1.0084, + "step": 136302 + }, + { + "epoch": 1.64, + "grad_norm": 4.878034934041134, + "learning_rate": 1.6603841486656524e-06, + "loss": 1.0846, + "step": 136305 + }, + { + "epoch": 1.64, + "grad_norm": 7.714112124736956, + "learning_rate": 1.6600618003088309e-06, + "loss": 1.3478, + "step": 136308 + }, + { + "epoch": 1.64, + "grad_norm": 9.5559611366441, + "learning_rate": 1.6597394804132149e-06, + "loss": 1.0808, + "step": 136311 + }, + { + "epoch": 1.64, + "grad_norm": 2.702513316630069, + "learning_rate": 1.659417188979907e-06, + "loss": 0.9877, + "step": 136314 + }, + { + "epoch": 1.64, + "grad_norm": 5.038054551408463, + "learning_rate": 1.6590949260100032e-06, + "loss": 1.124, + "step": 136317 + }, + { + "epoch": 1.64, + "grad_norm": 9.906917976697844, + "learning_rate": 1.6587726915046021e-06, + "loss": 1.3416, + "step": 136320 + }, + { + "epoch": 1.64, + "grad_norm": 5.638099303510467, + "learning_rate": 1.6584504854648055e-06, + "loss": 0.9797, + "step": 136323 + }, + { + "epoch": 1.64, + "grad_norm": 22.08413732333809, + "learning_rate": 1.6581283078917122e-06, + "loss": 1.404, + "step": 136326 + }, + { + "epoch": 1.64, + "grad_norm": 8.663890208682673, + "learning_rate": 1.6578061587864247e-06, + "loss": 1.2349, + "step": 136329 + }, + { + "epoch": 1.64, + "grad_norm": 8.238257696815355, + "learning_rate": 1.6574840381500368e-06, + "loss": 1.2992, + "step": 136332 + }, + { + "epoch": 1.64, + "grad_norm": 8.707555599095011, + "learning_rate": 1.6571619459836542e-06, + "loss": 1.3739, + "step": 136335 + }, + { + "epoch": 1.64, + "grad_norm": 3.3012599248883374, + "learning_rate": 1.6568398822883702e-06, + "loss": 1.287, + "step": 136338 + }, + { + "epoch": 1.64, + "grad_norm": 19.726595681980175, + "learning_rate": 1.6565178470652865e-06, + "loss": 1.3469, + "step": 136341 + }, + { + "epoch": 1.64, + "grad_norm": 3.114650089437686, + "learning_rate": 1.6561958403155054e-06, + "loss": 1.1321, + "step": 136344 + }, + { + "epoch": 1.64, + "grad_norm": 6.919570231866648, + "learning_rate": 1.6558738620401194e-06, + "loss": 0.7975, + "step": 136347 + }, + { + "epoch": 1.64, + "grad_norm": 4.546372389647879, + "learning_rate": 1.6555519122402297e-06, + "loss": 1.1305, + "step": 136350 + }, + { + "epoch": 1.64, + "grad_norm": 4.694112145205035, + "learning_rate": 1.6552299909169367e-06, + "loss": 1.1761, + "step": 136353 + }, + { + "epoch": 1.64, + "grad_norm": 8.958183927693605, + "learning_rate": 1.6549080980713394e-06, + "loss": 1.1121, + "step": 136356 + }, + { + "epoch": 1.64, + "grad_norm": 9.40359979528644, + "learning_rate": 1.6545862337045348e-06, + "loss": 1.6333, + "step": 136359 + }, + { + "epoch": 1.64, + "grad_norm": 12.76707116523085, + "learning_rate": 1.6542643978176198e-06, + "loss": 1.2896, + "step": 136362 + }, + { + "epoch": 1.64, + "grad_norm": 4.814721706003844, + "learning_rate": 1.6539425904116945e-06, + "loss": 1.0356, + "step": 136365 + }, + { + "epoch": 1.64, + "grad_norm": 11.660379858633922, + "learning_rate": 1.653620811487856e-06, + "loss": 0.9237, + "step": 136368 + }, + { + "epoch": 1.64, + "grad_norm": 3.7527802893102584, + "learning_rate": 1.6532990610472066e-06, + "loss": 1.0529, + "step": 136371 + }, + { + "epoch": 1.64, + "grad_norm": 7.023418576969332, + "learning_rate": 1.6529773390908377e-06, + "loss": 1.125, + "step": 136374 + }, + { + "epoch": 1.64, + "grad_norm": 12.92372559030671, + "learning_rate": 1.6526556456198538e-06, + "loss": 1.0867, + "step": 136377 + }, + { + "epoch": 1.64, + "grad_norm": 7.533401490290345, + "learning_rate": 1.6523339806353468e-06, + "loss": 0.9008, + "step": 136380 + }, + { + "epoch": 1.64, + "grad_norm": 7.701750738427169, + "learning_rate": 1.6520123441384195e-06, + "loss": 1.5112, + "step": 136383 + }, + { + "epoch": 1.64, + "grad_norm": 31.317771427052474, + "learning_rate": 1.6516907361301638e-06, + "loss": 1.0289, + "step": 136386 + }, + { + "epoch": 1.64, + "grad_norm": 5.916056208936798, + "learning_rate": 1.6513691566116818e-06, + "loss": 1.1211, + "step": 136389 + }, + { + "epoch": 1.64, + "grad_norm": 9.026820563792867, + "learning_rate": 1.6510476055840696e-06, + "loss": 1.3396, + "step": 136392 + }, + { + "epoch": 1.64, + "grad_norm": 43.34097154989153, + "learning_rate": 1.650726083048424e-06, + "loss": 0.9665, + "step": 136395 + }, + { + "epoch": 1.64, + "grad_norm": 8.312432014186985, + "learning_rate": 1.6504045890058461e-06, + "loss": 1.1253, + "step": 136398 + }, + { + "epoch": 1.64, + "grad_norm": 10.470830264627855, + "learning_rate": 1.6500831234574289e-06, + "loss": 1.0064, + "step": 136401 + }, + { + "epoch": 1.64, + "grad_norm": 11.314751180743492, + "learning_rate": 1.6497616864042687e-06, + "loss": 1.1735, + "step": 136404 + }, + { + "epoch": 1.64, + "grad_norm": 7.772358318672001, + "learning_rate": 1.6494402778474638e-06, + "loss": 1.1015, + "step": 136407 + }, + { + "epoch": 1.64, + "grad_norm": 4.534639010908117, + "learning_rate": 1.6491188977881134e-06, + "loss": 1.1727, + "step": 136410 + }, + { + "epoch": 1.64, + "grad_norm": 18.40824545350899, + "learning_rate": 1.648797546227311e-06, + "loss": 1.2888, + "step": 136413 + }, + { + "epoch": 1.64, + "grad_norm": 2.741866802174085, + "learning_rate": 1.6484762231661534e-06, + "loss": 1.0048, + "step": 136416 + }, + { + "epoch": 1.64, + "grad_norm": 3.9344462904218376, + "learning_rate": 1.6481549286057407e-06, + "loss": 0.7158, + "step": 136419 + }, + { + "epoch": 1.64, + "grad_norm": 18.008751935944055, + "learning_rate": 1.6478336625471647e-06, + "loss": 1.2377, + "step": 136422 + }, + { + "epoch": 1.64, + "grad_norm": 13.271748795805149, + "learning_rate": 1.6475124249915275e-06, + "loss": 1.1047, + "step": 136425 + }, + { + "epoch": 1.64, + "grad_norm": 7.203214037080292, + "learning_rate": 1.6471912159399185e-06, + "loss": 1.5615, + "step": 136428 + }, + { + "epoch": 1.64, + "grad_norm": 5.476987053862735, + "learning_rate": 1.6468700353934363e-06, + "loss": 1.4399, + "step": 136431 + }, + { + "epoch": 1.64, + "grad_norm": 10.489160979819452, + "learning_rate": 1.6465488833531784e-06, + "loss": 1.4319, + "step": 136434 + }, + { + "epoch": 1.64, + "grad_norm": 3.072676878960914, + "learning_rate": 1.6462277598202436e-06, + "loss": 1.2786, + "step": 136437 + }, + { + "epoch": 1.64, + "grad_norm": 6.809756431502988, + "learning_rate": 1.6459066647957212e-06, + "loss": 0.9625, + "step": 136440 + }, + { + "epoch": 1.64, + "grad_norm": 3.93428627500523, + "learning_rate": 1.6455855982807133e-06, + "loss": 1.034, + "step": 136443 + }, + { + "epoch": 1.64, + "grad_norm": 8.575763859763692, + "learning_rate": 1.6452645602763095e-06, + "loss": 0.8432, + "step": 136446 + }, + { + "epoch": 1.64, + "grad_norm": 6.483819767286415, + "learning_rate": 1.6449435507836087e-06, + "loss": 1.3429, + "step": 136449 + }, + { + "epoch": 1.64, + "grad_norm": 10.7524561267358, + "learning_rate": 1.6446225698037078e-06, + "loss": 1.3084, + "step": 136452 + }, + { + "epoch": 1.64, + "grad_norm": 25.224422638803354, + "learning_rate": 1.644301617337699e-06, + "loss": 1.0918, + "step": 136455 + }, + { + "epoch": 1.64, + "grad_norm": 3.3256724526157475, + "learning_rate": 1.6439806933866786e-06, + "loss": 1.4279, + "step": 136458 + }, + { + "epoch": 1.64, + "grad_norm": 7.96526017489189, + "learning_rate": 1.643659797951742e-06, + "loss": 1.0272, + "step": 136461 + }, + { + "epoch": 1.64, + "grad_norm": 2.078051078830089, + "learning_rate": 1.6433389310339875e-06, + "loss": 1.0461, + "step": 136464 + }, + { + "epoch": 1.64, + "grad_norm": 4.457274008356676, + "learning_rate": 1.6430180926345075e-06, + "loss": 0.8681, + "step": 136467 + }, + { + "epoch": 1.64, + "grad_norm": 6.515547898206678, + "learning_rate": 1.642697282754393e-06, + "loss": 1.0446, + "step": 136470 + }, + { + "epoch": 1.64, + "grad_norm": 3.899465831470205, + "learning_rate": 1.6423765013947435e-06, + "loss": 1.237, + "step": 136473 + }, + { + "epoch": 1.64, + "grad_norm": 8.06670088531261, + "learning_rate": 1.6420557485566524e-06, + "loss": 1.1297, + "step": 136476 + }, + { + "epoch": 1.64, + "grad_norm": 22.284510959100363, + "learning_rate": 1.6417350242412167e-06, + "loss": 1.3507, + "step": 136479 + }, + { + "epoch": 1.64, + "grad_norm": 13.921671322937408, + "learning_rate": 1.6414143284495254e-06, + "loss": 1.3448, + "step": 136482 + }, + { + "epoch": 1.64, + "grad_norm": 6.068125865385674, + "learning_rate": 1.64109366118268e-06, + "loss": 0.8962, + "step": 136485 + }, + { + "epoch": 1.64, + "grad_norm": 5.505835599321254, + "learning_rate": 1.6407730224417683e-06, + "loss": 1.1659, + "step": 136488 + }, + { + "epoch": 1.64, + "grad_norm": 4.01610394094458, + "learning_rate": 1.6404524122278897e-06, + "loss": 0.9879, + "step": 136491 + }, + { + "epoch": 1.64, + "grad_norm": 3.941567661701542, + "learning_rate": 1.6401318305421333e-06, + "loss": 1.3921, + "step": 136494 + }, + { + "epoch": 1.64, + "grad_norm": 10.326877999578768, + "learning_rate": 1.6398112773855955e-06, + "loss": 1.3894, + "step": 136497 + }, + { + "epoch": 1.64, + "grad_norm": 12.924740680872736, + "learning_rate": 1.6394907527593717e-06, + "loss": 1.215, + "step": 136500 + }, + { + "epoch": 1.64, + "grad_norm": 5.192006285283165, + "learning_rate": 1.6391702566645528e-06, + "loss": 0.8466, + "step": 136503 + }, + { + "epoch": 1.64, + "grad_norm": 5.89416791937461, + "learning_rate": 1.6388497891022381e-06, + "loss": 1.0733, + "step": 136506 + }, + { + "epoch": 1.64, + "grad_norm": 30.40408823624093, + "learning_rate": 1.6385293500735166e-06, + "loss": 1.0223, + "step": 136509 + }, + { + "epoch": 1.64, + "grad_norm": 6.638993726122751, + "learning_rate": 1.6382089395794808e-06, + "loss": 1.1193, + "step": 136512 + }, + { + "epoch": 1.64, + "grad_norm": 22.243985754326427, + "learning_rate": 1.6378885576212255e-06, + "loss": 1.0622, + "step": 136515 + }, + { + "epoch": 1.64, + "grad_norm": 5.58251585598149, + "learning_rate": 1.6375682041998475e-06, + "loss": 1.3989, + "step": 136518 + }, + { + "epoch": 1.64, + "grad_norm": 3.1700094575080726, + "learning_rate": 1.6372478793164336e-06, + "loss": 1.099, + "step": 136521 + }, + { + "epoch": 1.64, + "grad_norm": 9.20728378948085, + "learning_rate": 1.636927582972081e-06, + "loss": 1.261, + "step": 136524 + }, + { + "epoch": 1.64, + "grad_norm": 8.576462582700653, + "learning_rate": 1.6366073151678852e-06, + "loss": 1.2839, + "step": 136527 + }, + { + "epoch": 1.64, + "grad_norm": 9.485922816412861, + "learning_rate": 1.6362870759049333e-06, + "loss": 1.1971, + "step": 136530 + }, + { + "epoch": 1.64, + "grad_norm": 6.338312403679177, + "learning_rate": 1.6359668651843231e-06, + "loss": 0.9749, + "step": 136533 + }, + { + "epoch": 1.64, + "grad_norm": 11.41767334691883, + "learning_rate": 1.6356466830071438e-06, + "loss": 1.0611, + "step": 136536 + }, + { + "epoch": 1.64, + "grad_norm": 11.785851592769895, + "learning_rate": 1.6353265293744892e-06, + "loss": 1.1658, + "step": 136539 + }, + { + "epoch": 1.64, + "grad_norm": 7.888871075299168, + "learning_rate": 1.6350064042874514e-06, + "loss": 1.0351, + "step": 136542 + }, + { + "epoch": 1.64, + "grad_norm": 5.6338338980644025, + "learning_rate": 1.6346863077471264e-06, + "loss": 1.3398, + "step": 136545 + }, + { + "epoch": 1.64, + "grad_norm": 101.25056304358368, + "learning_rate": 1.6343662397546024e-06, + "loss": 1.2299, + "step": 136548 + }, + { + "epoch": 1.64, + "grad_norm": 10.67673540460253, + "learning_rate": 1.6340462003109746e-06, + "loss": 1.3613, + "step": 136551 + }, + { + "epoch": 1.64, + "grad_norm": 9.006762256362668, + "learning_rate": 1.6337261894173305e-06, + "loss": 1.2333, + "step": 136554 + }, + { + "epoch": 1.64, + "grad_norm": 12.964888380907169, + "learning_rate": 1.633406207074767e-06, + "loss": 1.1253, + "step": 136557 + }, + { + "epoch": 1.64, + "grad_norm": 23.00179989786643, + "learning_rate": 1.6330862532843762e-06, + "loss": 0.9054, + "step": 136560 + }, + { + "epoch": 1.64, + "grad_norm": 14.584517851399081, + "learning_rate": 1.6327663280472461e-06, + "loss": 1.2216, + "step": 136563 + }, + { + "epoch": 1.64, + "grad_norm": 8.44064551350532, + "learning_rate": 1.6324464313644707e-06, + "loss": 0.9381, + "step": 136566 + }, + { + "epoch": 1.64, + "grad_norm": 10.279073236464777, + "learning_rate": 1.6321265632371441e-06, + "loss": 1.008, + "step": 136569 + }, + { + "epoch": 1.64, + "grad_norm": 6.632872769615387, + "learning_rate": 1.6318067236663548e-06, + "loss": 0.9905, + "step": 136572 + }, + { + "epoch": 1.64, + "grad_norm": 8.375285065981664, + "learning_rate": 1.631486912653193e-06, + "loss": 0.7342, + "step": 136575 + }, + { + "epoch": 1.64, + "grad_norm": 13.741175416127422, + "learning_rate": 1.631167130198752e-06, + "loss": 1.4078, + "step": 136578 + }, + { + "epoch": 1.64, + "grad_norm": 5.31971066240072, + "learning_rate": 1.6308473763041232e-06, + "loss": 0.865, + "step": 136581 + }, + { + "epoch": 1.64, + "grad_norm": 9.95017313448202, + "learning_rate": 1.6305276509703982e-06, + "loss": 1.3904, + "step": 136584 + }, + { + "epoch": 1.64, + "grad_norm": 3.621194519608393, + "learning_rate": 1.6302079541986694e-06, + "loss": 1.3747, + "step": 136587 + }, + { + "epoch": 1.64, + "grad_norm": 5.380766014955879, + "learning_rate": 1.6298882859900233e-06, + "loss": 1.5591, + "step": 136590 + }, + { + "epoch": 1.64, + "grad_norm": 6.584762729176321, + "learning_rate": 1.6295686463455562e-06, + "loss": 1.3216, + "step": 136593 + }, + { + "epoch": 1.64, + "grad_norm": 15.116228965875043, + "learning_rate": 1.629249035266355e-06, + "loss": 0.9806, + "step": 136596 + }, + { + "epoch": 1.64, + "grad_norm": 7.606324950263313, + "learning_rate": 1.6289294527535128e-06, + "loss": 1.0518, + "step": 136599 + }, + { + "epoch": 1.64, + "grad_norm": 9.239604557993983, + "learning_rate": 1.6286098988081178e-06, + "loss": 1.0686, + "step": 136602 + }, + { + "epoch": 1.64, + "grad_norm": 4.053508227038332, + "learning_rate": 1.6282903734312605e-06, + "loss": 1.3071, + "step": 136605 + }, + { + "epoch": 1.64, + "grad_norm": 7.672244670221974, + "learning_rate": 1.6279708766240343e-06, + "loss": 1.0628, + "step": 136608 + }, + { + "epoch": 1.64, + "grad_norm": 19.873087862848582, + "learning_rate": 1.6276514083875295e-06, + "loss": 1.5536, + "step": 136611 + }, + { + "epoch": 1.64, + "grad_norm": 6.318537030371663, + "learning_rate": 1.627331968722834e-06, + "loss": 0.8191, + "step": 136614 + }, + { + "epoch": 1.64, + "grad_norm": 3.1774765003743575, + "learning_rate": 1.627012557631037e-06, + "loss": 1.0997, + "step": 136617 + }, + { + "epoch": 1.64, + "grad_norm": 3.3142247405149425, + "learning_rate": 1.6266931751132309e-06, + "loss": 1.2689, + "step": 136620 + }, + { + "epoch": 1.64, + "grad_norm": 6.654394640196581, + "learning_rate": 1.6263738211705037e-06, + "loss": 1.0449, + "step": 136623 + }, + { + "epoch": 1.64, + "grad_norm": 7.526652819143981, + "learning_rate": 1.6260544958039492e-06, + "loss": 1.0546, + "step": 136626 + }, + { + "epoch": 1.64, + "grad_norm": 11.113963495550333, + "learning_rate": 1.625735199014653e-06, + "loss": 1.4404, + "step": 136629 + }, + { + "epoch": 1.64, + "grad_norm": 9.321001867642844, + "learning_rate": 1.6254159308037054e-06, + "loss": 1.1481, + "step": 136632 + }, + { + "epoch": 1.64, + "grad_norm": 5.291199201798209, + "learning_rate": 1.625096691172199e-06, + "loss": 0.9793, + "step": 136635 + }, + { + "epoch": 1.64, + "grad_norm": 8.79280325328525, + "learning_rate": 1.624777480121219e-06, + "loss": 1.0199, + "step": 136638 + }, + { + "epoch": 1.64, + "grad_norm": 3.8160278108073955, + "learning_rate": 1.6244582976518587e-06, + "loss": 1.3688, + "step": 136641 + }, + { + "epoch": 1.64, + "grad_norm": 99.37069822714221, + "learning_rate": 1.6241391437652033e-06, + "loss": 1.2204, + "step": 136644 + }, + { + "epoch": 1.64, + "grad_norm": 4.123590872102604, + "learning_rate": 1.6238200184623432e-06, + "loss": 1.1406, + "step": 136647 + }, + { + "epoch": 1.64, + "grad_norm": 7.307691667784181, + "learning_rate": 1.6235009217443686e-06, + "loss": 1.2738, + "step": 136650 + }, + { + "epoch": 1.64, + "grad_norm": 7.884288090668991, + "learning_rate": 1.623181853612371e-06, + "loss": 1.0244, + "step": 136653 + }, + { + "epoch": 1.64, + "grad_norm": 16.149854230500534, + "learning_rate": 1.6228628140674363e-06, + "loss": 0.9814, + "step": 136656 + }, + { + "epoch": 1.64, + "grad_norm": 7.517941356098114, + "learning_rate": 1.622543803110651e-06, + "loss": 1.2178, + "step": 136659 + }, + { + "epoch": 1.64, + "grad_norm": 6.902704596256301, + "learning_rate": 1.6222248207431068e-06, + "loss": 1.2003, + "step": 136662 + }, + { + "epoch": 1.64, + "grad_norm": 6.66663870803204, + "learning_rate": 1.6219058669658905e-06, + "loss": 0.842, + "step": 136665 + }, + { + "epoch": 1.64, + "grad_norm": 7.585442678480191, + "learning_rate": 1.6215869417800945e-06, + "loss": 1.5069, + "step": 136668 + }, + { + "epoch": 1.64, + "grad_norm": 16.839535506705033, + "learning_rate": 1.6212680451868024e-06, + "loss": 1.2143, + "step": 136671 + }, + { + "epoch": 1.64, + "grad_norm": 8.864091377623478, + "learning_rate": 1.6209491771871045e-06, + "loss": 1.1994, + "step": 136674 + }, + { + "epoch": 1.64, + "grad_norm": 17.321606452925618, + "learning_rate": 1.620630337782091e-06, + "loss": 1.3224, + "step": 136677 + }, + { + "epoch": 1.64, + "grad_norm": 3.0608657885113684, + "learning_rate": 1.6203115269728475e-06, + "loss": 1.1268, + "step": 136680 + }, + { + "epoch": 1.64, + "grad_norm": 5.255825722878392, + "learning_rate": 1.6199927447604601e-06, + "loss": 1.159, + "step": 136683 + }, + { + "epoch": 1.64, + "grad_norm": 3.537677002162449, + "learning_rate": 1.619673991146019e-06, + "loss": 1.2757, + "step": 136686 + }, + { + "epoch": 1.64, + "grad_norm": 22.549603948992782, + "learning_rate": 1.6193552661306123e-06, + "loss": 1.482, + "step": 136689 + }, + { + "epoch": 1.64, + "grad_norm": 6.402646976383599, + "learning_rate": 1.6190365697153277e-06, + "loss": 1.2129, + "step": 136692 + }, + { + "epoch": 1.64, + "grad_norm": 9.60071423756909, + "learning_rate": 1.6187179019012544e-06, + "loss": 1.3539, + "step": 136695 + }, + { + "epoch": 1.64, + "grad_norm": 6.145804172502323, + "learning_rate": 1.6183992626894784e-06, + "loss": 1.1309, + "step": 136698 + }, + { + "epoch": 1.64, + "grad_norm": 5.2741452935864315, + "learning_rate": 1.6180806520810832e-06, + "loss": 1.1503, + "step": 136701 + }, + { + "epoch": 1.64, + "grad_norm": 7.926680793311975, + "learning_rate": 1.6177620700771611e-06, + "loss": 0.7838, + "step": 136704 + }, + { + "epoch": 1.64, + "grad_norm": 13.410873611284515, + "learning_rate": 1.6174435166787993e-06, + "loss": 1.158, + "step": 136707 + }, + { + "epoch": 1.64, + "grad_norm": 6.5475112409229155, + "learning_rate": 1.6171249918870814e-06, + "loss": 1.0102, + "step": 136710 + }, + { + "epoch": 1.64, + "grad_norm": 7.3975549474085875, + "learning_rate": 1.616806495703096e-06, + "loss": 1.0868, + "step": 136713 + }, + { + "epoch": 1.64, + "grad_norm": 3.0372370438104053, + "learning_rate": 1.6164880281279304e-06, + "loss": 1.1263, + "step": 136716 + }, + { + "epoch": 1.64, + "grad_norm": 9.451677442856568, + "learning_rate": 1.616169589162674e-06, + "loss": 0.736, + "step": 136719 + }, + { + "epoch": 1.64, + "grad_norm": 3.810741639263831, + "learning_rate": 1.6158511788084108e-06, + "loss": 1.0167, + "step": 136722 + }, + { + "epoch": 1.64, + "grad_norm": 8.008515781143979, + "learning_rate": 1.615532797066225e-06, + "loss": 1.3753, + "step": 136725 + }, + { + "epoch": 1.64, + "grad_norm": 5.663663730240642, + "learning_rate": 1.6152144439372064e-06, + "loss": 1.126, + "step": 136728 + }, + { + "epoch": 1.64, + "grad_norm": 4.678947921850428, + "learning_rate": 1.6148961194224399e-06, + "loss": 1.2748, + "step": 136731 + }, + { + "epoch": 1.64, + "grad_norm": 13.813044926503933, + "learning_rate": 1.6145778235230158e-06, + "loss": 1.1186, + "step": 136734 + }, + { + "epoch": 1.64, + "grad_norm": 9.97856679538431, + "learning_rate": 1.6142595562400144e-06, + "loss": 0.9753, + "step": 136737 + }, + { + "epoch": 1.64, + "grad_norm": 10.299321246132704, + "learning_rate": 1.6139413175745244e-06, + "loss": 1.3102, + "step": 136740 + }, + { + "epoch": 1.64, + "grad_norm": 9.829635206090536, + "learning_rate": 1.6136231075276344e-06, + "loss": 1.2365, + "step": 136743 + }, + { + "epoch": 1.64, + "grad_norm": 7.810768642844885, + "learning_rate": 1.6133049261004263e-06, + "loss": 1.2881, + "step": 136746 + }, + { + "epoch": 1.64, + "grad_norm": 7.258350572455024, + "learning_rate": 1.6129867732939897e-06, + "loss": 1.2453, + "step": 136749 + }, + { + "epoch": 1.64, + "grad_norm": 3.954171672692846, + "learning_rate": 1.6126686491094057e-06, + "loss": 1.4938, + "step": 136752 + }, + { + "epoch": 1.64, + "grad_norm": 12.42109498534425, + "learning_rate": 1.6123505535477623e-06, + "loss": 1.3554, + "step": 136755 + }, + { + "epoch": 1.64, + "grad_norm": 11.008895426658498, + "learning_rate": 1.6120324866101454e-06, + "loss": 1.4837, + "step": 136758 + }, + { + "epoch": 1.64, + "grad_norm": 6.3060441454177365, + "learning_rate": 1.6117144482976432e-06, + "loss": 1.0433, + "step": 136761 + }, + { + "epoch": 1.64, + "grad_norm": 4.922798456407004, + "learning_rate": 1.6113964386113369e-06, + "loss": 1.1313, + "step": 136764 + }, + { + "epoch": 1.64, + "grad_norm": 7.35330722307059, + "learning_rate": 1.6110784575523119e-06, + "loss": 1.117, + "step": 136767 + }, + { + "epoch": 1.64, + "grad_norm": 9.97148989695811, + "learning_rate": 1.6107605051216535e-06, + "loss": 0.6209, + "step": 136770 + }, + { + "epoch": 1.64, + "grad_norm": 3.2642851932861014, + "learning_rate": 1.6104425813204483e-06, + "loss": 1.0185, + "step": 136773 + }, + { + "epoch": 1.64, + "grad_norm": 10.941352917235973, + "learning_rate": 1.6101246861497832e-06, + "loss": 0.974, + "step": 136776 + }, + { + "epoch": 1.64, + "grad_norm": 8.657242134712511, + "learning_rate": 1.6098068196107375e-06, + "loss": 1.1105, + "step": 136779 + }, + { + "epoch": 1.64, + "grad_norm": 6.479355899945491, + "learning_rate": 1.6094889817043991e-06, + "loss": 0.8558, + "step": 136782 + }, + { + "epoch": 1.64, + "grad_norm": 10.967622745840293, + "learning_rate": 1.609171172431856e-06, + "loss": 1.1005, + "step": 136785 + }, + { + "epoch": 1.64, + "grad_norm": 6.944394591710087, + "learning_rate": 1.6088533917941884e-06, + "loss": 0.6901, + "step": 136788 + }, + { + "epoch": 1.64, + "grad_norm": 2.5575846693119506, + "learning_rate": 1.6085356397924789e-06, + "loss": 1.1312, + "step": 136791 + }, + { + "epoch": 1.64, + "grad_norm": 20.189884142149644, + "learning_rate": 1.6082179164278155e-06, + "loss": 1.4263, + "step": 136794 + }, + { + "epoch": 1.64, + "grad_norm": 9.224471942395583, + "learning_rate": 1.6079002217012808e-06, + "loss": 1.0722, + "step": 136797 + }, + { + "epoch": 1.64, + "grad_norm": 5.339725991749115, + "learning_rate": 1.6075825556139602e-06, + "loss": 1.2418, + "step": 136800 + }, + { + "epoch": 1.65, + "grad_norm": 25.86316762507229, + "learning_rate": 1.6072649181669397e-06, + "loss": 1.1381, + "step": 136803 + }, + { + "epoch": 1.65, + "grad_norm": 7.289437725361447, + "learning_rate": 1.606947309361302e-06, + "loss": 1.1779, + "step": 136806 + }, + { + "epoch": 1.65, + "grad_norm": 4.611967883461547, + "learning_rate": 1.6066297291981269e-06, + "loss": 0.6526, + "step": 136809 + }, + { + "epoch": 1.65, + "grad_norm": 6.986672142081223, + "learning_rate": 1.6063121776785007e-06, + "loss": 1.1811, + "step": 136812 + }, + { + "epoch": 1.65, + "grad_norm": 9.128383184893979, + "learning_rate": 1.605994654803511e-06, + "loss": 1.2665, + "step": 136815 + }, + { + "epoch": 1.65, + "grad_norm": 5.913766392501365, + "learning_rate": 1.6056771605742349e-06, + "loss": 1.2532, + "step": 136818 + }, + { + "epoch": 1.65, + "grad_norm": 4.861938345733319, + "learning_rate": 1.6053596949917594e-06, + "loss": 1.0151, + "step": 136821 + }, + { + "epoch": 1.65, + "grad_norm": 20.290833967953937, + "learning_rate": 1.6050422580571667e-06, + "loss": 1.2558, + "step": 136824 + }, + { + "epoch": 1.65, + "grad_norm": 6.92628411919176, + "learning_rate": 1.604724849771544e-06, + "loss": 1.1578, + "step": 136827 + }, + { + "epoch": 1.65, + "grad_norm": 11.664193438179772, + "learning_rate": 1.6044074701359713e-06, + "loss": 1.1852, + "step": 136830 + }, + { + "epoch": 1.65, + "grad_norm": 5.2735472107550745, + "learning_rate": 1.60409011915153e-06, + "loss": 1.0916, + "step": 136833 + }, + { + "epoch": 1.65, + "grad_norm": 8.942840376613859, + "learning_rate": 1.6037727968193051e-06, + "loss": 1.1988, + "step": 136836 + }, + { + "epoch": 1.65, + "grad_norm": 7.466561063193361, + "learning_rate": 1.6034555031403786e-06, + "loss": 0.7932, + "step": 136839 + }, + { + "epoch": 1.65, + "grad_norm": 7.628209061351233, + "learning_rate": 1.6031382381158366e-06, + "loss": 1.1903, + "step": 136842 + }, + { + "epoch": 1.65, + "grad_norm": 8.729226431421976, + "learning_rate": 1.602821001746756e-06, + "loss": 1.3425, + "step": 136845 + }, + { + "epoch": 1.65, + "grad_norm": 10.03995302493939, + "learning_rate": 1.6025037940342258e-06, + "loss": 1.1262, + "step": 136848 + }, + { + "epoch": 1.65, + "grad_norm": 9.950826570545653, + "learning_rate": 1.602186614979324e-06, + "loss": 1.2186, + "step": 136851 + }, + { + "epoch": 1.65, + "grad_norm": 27.18810015555827, + "learning_rate": 1.6018694645831333e-06, + "loss": 0.8299, + "step": 136854 + }, + { + "epoch": 1.65, + "grad_norm": 7.304414577694989, + "learning_rate": 1.6015523428467394e-06, + "loss": 0.8623, + "step": 136857 + }, + { + "epoch": 1.65, + "grad_norm": 4.98548232943987, + "learning_rate": 1.6012352497712202e-06, + "loss": 0.9653, + "step": 136860 + }, + { + "epoch": 1.65, + "grad_norm": 12.693870489080112, + "learning_rate": 1.6009181853576594e-06, + "loss": 1.0432, + "step": 136863 + }, + { + "epoch": 1.65, + "grad_norm": 10.50618763954585, + "learning_rate": 1.6006011496071405e-06, + "loss": 1.0709, + "step": 136866 + }, + { + "epoch": 1.65, + "grad_norm": 7.9801678619849055, + "learning_rate": 1.6002841425207461e-06, + "loss": 1.1705, + "step": 136869 + }, + { + "epoch": 1.65, + "grad_norm": 13.224353585001557, + "learning_rate": 1.5999671640995563e-06, + "loss": 0.783, + "step": 136872 + }, + { + "epoch": 1.65, + "grad_norm": 6.258203705636315, + "learning_rate": 1.5996502143446513e-06, + "loss": 1.1218, + "step": 136875 + }, + { + "epoch": 1.65, + "grad_norm": 9.345979305215296, + "learning_rate": 1.5993332932571137e-06, + "loss": 1.1308, + "step": 136878 + }, + { + "epoch": 1.65, + "grad_norm": 9.654406716145742, + "learning_rate": 1.5990164008380248e-06, + "loss": 1.1873, + "step": 136881 + }, + { + "epoch": 1.65, + "grad_norm": 7.05270797953168, + "learning_rate": 1.5986995370884706e-06, + "loss": 1.1164, + "step": 136884 + }, + { + "epoch": 1.65, + "grad_norm": 35.51814441349551, + "learning_rate": 1.5983827020095265e-06, + "loss": 0.796, + "step": 136887 + }, + { + "epoch": 1.65, + "grad_norm": 7.792755994841581, + "learning_rate": 1.5980658956022777e-06, + "loss": 1.5881, + "step": 136890 + }, + { + "epoch": 1.65, + "grad_norm": 30.94846445955304, + "learning_rate": 1.5977491178678017e-06, + "loss": 1.0506, + "step": 136893 + }, + { + "epoch": 1.65, + "grad_norm": 16.311877648597186, + "learning_rate": 1.5974323688071835e-06, + "loss": 1.4552, + "step": 136896 + }, + { + "epoch": 1.65, + "grad_norm": 42.16370595420093, + "learning_rate": 1.597115648421499e-06, + "loss": 0.969, + "step": 136899 + }, + { + "epoch": 1.65, + "grad_norm": 12.259095317238668, + "learning_rate": 1.5967989567118324e-06, + "loss": 1.0405, + "step": 136902 + }, + { + "epoch": 1.65, + "grad_norm": 5.963009424908359, + "learning_rate": 1.5964822936792657e-06, + "loss": 1.1402, + "step": 136905 + }, + { + "epoch": 1.65, + "grad_norm": 11.747549452725387, + "learning_rate": 1.5961656593248764e-06, + "loss": 1.1012, + "step": 136908 + }, + { + "epoch": 1.65, + "grad_norm": 3.7591972676122727, + "learning_rate": 1.5958490536497494e-06, + "loss": 1.045, + "step": 136911 + }, + { + "epoch": 1.65, + "grad_norm": 6.764887952630601, + "learning_rate": 1.595532476654963e-06, + "loss": 1.0231, + "step": 136914 + }, + { + "epoch": 1.65, + "grad_norm": 10.047871343139635, + "learning_rate": 1.5952159283415935e-06, + "loss": 1.0372, + "step": 136917 + }, + { + "epoch": 1.65, + "grad_norm": 14.004418330552864, + "learning_rate": 1.594899408710725e-06, + "loss": 1.3261, + "step": 136920 + }, + { + "epoch": 1.65, + "grad_norm": 13.566830991995023, + "learning_rate": 1.5945829177634398e-06, + "loss": 1.0956, + "step": 136923 + }, + { + "epoch": 1.65, + "grad_norm": 4.3162155318630235, + "learning_rate": 1.5942664555008136e-06, + "loss": 0.838, + "step": 136926 + }, + { + "epoch": 1.65, + "grad_norm": 10.17602804322973, + "learning_rate": 1.5939500219239278e-06, + "loss": 1.0991, + "step": 136929 + }, + { + "epoch": 1.65, + "grad_norm": 13.562604140327805, + "learning_rate": 1.5936336170338652e-06, + "loss": 1.1605, + "step": 136932 + }, + { + "epoch": 1.65, + "grad_norm": 15.231808303913844, + "learning_rate": 1.593317240831701e-06, + "loss": 1.1901, + "step": 136935 + }, + { + "epoch": 1.65, + "grad_norm": 5.505552701009972, + "learning_rate": 1.5930008933185193e-06, + "loss": 1.3786, + "step": 136938 + }, + { + "epoch": 1.65, + "grad_norm": 5.755273809532694, + "learning_rate": 1.5926845744953946e-06, + "loss": 1.1798, + "step": 136941 + }, + { + "epoch": 1.65, + "grad_norm": 4.5837802531546386, + "learning_rate": 1.5923682843634092e-06, + "loss": 1.1563, + "step": 136944 + }, + { + "epoch": 1.65, + "grad_norm": 6.999037061395615, + "learning_rate": 1.5920520229236424e-06, + "loss": 1.5012, + "step": 136947 + }, + { + "epoch": 1.65, + "grad_norm": 15.783660259665748, + "learning_rate": 1.5917357901771757e-06, + "loss": 0.9451, + "step": 136950 + }, + { + "epoch": 1.65, + "grad_norm": 5.26405777216493, + "learning_rate": 1.5914195861250848e-06, + "loss": 0.7598, + "step": 136953 + }, + { + "epoch": 1.65, + "grad_norm": 7.098964687319998, + "learning_rate": 1.591103410768451e-06, + "loss": 1.024, + "step": 136956 + }, + { + "epoch": 1.65, + "grad_norm": 11.97648095268011, + "learning_rate": 1.590787264108351e-06, + "loss": 1.1706, + "step": 136959 + }, + { + "epoch": 1.65, + "grad_norm": 4.4886287808350644, + "learning_rate": 1.5904711461458644e-06, + "loss": 1.4634, + "step": 136962 + }, + { + "epoch": 1.65, + "grad_norm": 6.1291153517053445, + "learning_rate": 1.5901550568820745e-06, + "loss": 1.1445, + "step": 136965 + }, + { + "epoch": 1.65, + "grad_norm": 7.166194547529066, + "learning_rate": 1.5898389963180526e-06, + "loss": 1.1465, + "step": 136968 + }, + { + "epoch": 1.65, + "grad_norm": 7.512469044876262, + "learning_rate": 1.5895229644548805e-06, + "loss": 1.0486, + "step": 136971 + }, + { + "epoch": 1.65, + "grad_norm": 10.742381712071033, + "learning_rate": 1.58920696129364e-06, + "loss": 1.5095, + "step": 136974 + }, + { + "epoch": 1.65, + "grad_norm": 7.633303035653224, + "learning_rate": 1.588890986835404e-06, + "loss": 1.2883, + "step": 136977 + }, + { + "epoch": 1.65, + "grad_norm": 6.04763117250109, + "learning_rate": 1.5885750410812562e-06, + "loss": 1.0495, + "step": 136980 + }, + { + "epoch": 1.65, + "grad_norm": 3.696894206643192, + "learning_rate": 1.5882591240322697e-06, + "loss": 1.2691, + "step": 136983 + }, + { + "epoch": 1.65, + "grad_norm": 5.887062040342169, + "learning_rate": 1.5879432356895252e-06, + "loss": 0.9898, + "step": 136986 + }, + { + "epoch": 1.65, + "grad_norm": 2.703045804174762, + "learning_rate": 1.5876273760540994e-06, + "loss": 0.8945, + "step": 136989 + }, + { + "epoch": 1.65, + "grad_norm": 10.97235387684784, + "learning_rate": 1.5873115451270737e-06, + "loss": 1.3166, + "step": 136992 + }, + { + "epoch": 1.65, + "grad_norm": 43.65815308256559, + "learning_rate": 1.5869957429095218e-06, + "loss": 1.0121, + "step": 136995 + }, + { + "epoch": 1.65, + "grad_norm": 8.894526447214291, + "learning_rate": 1.5866799694025247e-06, + "loss": 0.7733, + "step": 136998 + }, + { + "epoch": 1.65, + "grad_norm": 7.9338569281762945, + "learning_rate": 1.5863642246071577e-06, + "loss": 0.7903, + "step": 137001 + }, + { + "epoch": 1.65, + "grad_norm": 3.040463973965132, + "learning_rate": 1.5860485085244992e-06, + "loss": 1.1434, + "step": 137004 + }, + { + "epoch": 1.65, + "grad_norm": 13.99108135067155, + "learning_rate": 1.5857328211556256e-06, + "loss": 1.1564, + "step": 137007 + }, + { + "epoch": 1.65, + "grad_norm": 6.447662494414108, + "learning_rate": 1.5854171625016145e-06, + "loss": 1.0264, + "step": 137010 + }, + { + "epoch": 1.65, + "grad_norm": 6.635875209656143, + "learning_rate": 1.5851015325635432e-06, + "loss": 1.317, + "step": 137013 + }, + { + "epoch": 1.65, + "grad_norm": 6.667316560570329, + "learning_rate": 1.5847859313424929e-06, + "loss": 0.7392, + "step": 137016 + }, + { + "epoch": 1.65, + "grad_norm": 5.210436907564726, + "learning_rate": 1.5844703588395338e-06, + "loss": 1.1128, + "step": 137019 + }, + { + "epoch": 1.65, + "grad_norm": 2.6003680137564613, + "learning_rate": 1.5841548150557484e-06, + "loss": 1.1048, + "step": 137022 + }, + { + "epoch": 1.65, + "grad_norm": 3.9652549621433377, + "learning_rate": 1.5838392999922092e-06, + "loss": 1.1875, + "step": 137025 + }, + { + "epoch": 1.65, + "grad_norm": 2.509001072932688, + "learning_rate": 1.5835238136499953e-06, + "loss": 1.0494, + "step": 137028 + }, + { + "epoch": 1.65, + "grad_norm": 5.275460317822548, + "learning_rate": 1.5832083560301848e-06, + "loss": 0.9743, + "step": 137031 + }, + { + "epoch": 1.65, + "grad_norm": 3.526814730626665, + "learning_rate": 1.58289292713385e-06, + "loss": 1.198, + "step": 137034 + }, + { + "epoch": 1.65, + "grad_norm": 4.36504897275077, + "learning_rate": 1.5825775269620703e-06, + "loss": 1.2011, + "step": 137037 + }, + { + "epoch": 1.65, + "grad_norm": 12.95517783649213, + "learning_rate": 1.5822621555159245e-06, + "loss": 0.792, + "step": 137040 + }, + { + "epoch": 1.65, + "grad_norm": 18.889951636377752, + "learning_rate": 1.581946812796482e-06, + "loss": 1.2094, + "step": 137043 + }, + { + "epoch": 1.65, + "grad_norm": 13.010083344820973, + "learning_rate": 1.5816314988048265e-06, + "loss": 0.9368, + "step": 137046 + }, + { + "epoch": 1.65, + "grad_norm": 8.836011380478455, + "learning_rate": 1.5813162135420279e-06, + "loss": 0.9525, + "step": 137049 + }, + { + "epoch": 1.65, + "grad_norm": 6.1550118130498195, + "learning_rate": 1.5810009570091644e-06, + "loss": 1.0436, + "step": 137052 + }, + { + "epoch": 1.65, + "grad_norm": 21.8042691813983, + "learning_rate": 1.580685729207312e-06, + "loss": 1.2232, + "step": 137055 + }, + { + "epoch": 1.65, + "grad_norm": 4.3803309847516285, + "learning_rate": 1.5803705301375482e-06, + "loss": 1.3002, + "step": 137058 + }, + { + "epoch": 1.65, + "grad_norm": 4.656088564461926, + "learning_rate": 1.580055359800946e-06, + "loss": 1.3678, + "step": 137061 + }, + { + "epoch": 1.65, + "grad_norm": 4.356112548938109, + "learning_rate": 1.5797402181985844e-06, + "loss": 0.9431, + "step": 137064 + }, + { + "epoch": 1.65, + "grad_norm": 9.831418494675747, + "learning_rate": 1.5794251053315335e-06, + "loss": 1.4823, + "step": 137067 + }, + { + "epoch": 1.65, + "grad_norm": 4.042338018873589, + "learning_rate": 1.5791100212008714e-06, + "loss": 1.0979, + "step": 137070 + }, + { + "epoch": 1.65, + "grad_norm": 11.338096391076272, + "learning_rate": 1.5787949658076763e-06, + "loss": 1.0397, + "step": 137073 + }, + { + "epoch": 1.65, + "grad_norm": 6.133088647229589, + "learning_rate": 1.5784799391530182e-06, + "loss": 1.2175, + "step": 137076 + }, + { + "epoch": 1.65, + "grad_norm": 6.5361567694552365, + "learning_rate": 1.5781649412379751e-06, + "loss": 1.4976, + "step": 137079 + }, + { + "epoch": 1.65, + "grad_norm": 8.239551940237213, + "learning_rate": 1.5778499720636231e-06, + "loss": 1.09, + "step": 137082 + }, + { + "epoch": 1.65, + "grad_norm": 18.035244266031114, + "learning_rate": 1.5775350316310366e-06, + "loss": 1.0496, + "step": 137085 + }, + { + "epoch": 1.65, + "grad_norm": 6.885576748015897, + "learning_rate": 1.5772201199412863e-06, + "loss": 1.302, + "step": 137088 + }, + { + "epoch": 1.65, + "grad_norm": 4.690770290367911, + "learning_rate": 1.5769052369954497e-06, + "loss": 0.9678, + "step": 137091 + }, + { + "epoch": 1.65, + "grad_norm": 15.005620264761074, + "learning_rate": 1.5765903827946017e-06, + "loss": 1.2859, + "step": 137094 + }, + { + "epoch": 1.65, + "grad_norm": 8.51456533263312, + "learning_rate": 1.5762755573398159e-06, + "loss": 1.1692, + "step": 137097 + }, + { + "epoch": 1.65, + "grad_norm": 5.666443502006163, + "learning_rate": 1.5759607606321714e-06, + "loss": 0.9398, + "step": 137100 + }, + { + "epoch": 1.65, + "grad_norm": 15.43506162235845, + "learning_rate": 1.5756459926727351e-06, + "loss": 1.0203, + "step": 137103 + }, + { + "epoch": 1.65, + "grad_norm": 13.855313511974318, + "learning_rate": 1.5753312534625865e-06, + "loss": 1.29, + "step": 137106 + }, + { + "epoch": 1.65, + "grad_norm": 9.495928417842187, + "learning_rate": 1.5750165430027964e-06, + "loss": 1.0073, + "step": 137109 + }, + { + "epoch": 1.65, + "grad_norm": 20.56335513530673, + "learning_rate": 1.574701861294442e-06, + "loss": 1.068, + "step": 137112 + }, + { + "epoch": 1.65, + "grad_norm": 6.697826275078429, + "learning_rate": 1.5743872083385924e-06, + "loss": 1.0738, + "step": 137115 + }, + { + "epoch": 1.65, + "grad_norm": 7.867477122912214, + "learning_rate": 1.5740725841363259e-06, + "loss": 1.4127, + "step": 137118 + }, + { + "epoch": 1.65, + "grad_norm": 4.500629987608338, + "learning_rate": 1.5737579886887133e-06, + "loss": 1.2279, + "step": 137121 + }, + { + "epoch": 1.65, + "grad_norm": 7.0666276723251125, + "learning_rate": 1.5734434219968332e-06, + "loss": 1.2176, + "step": 137124 + }, + { + "epoch": 1.65, + "grad_norm": 15.70467859914864, + "learning_rate": 1.5731288840617542e-06, + "loss": 1.1055, + "step": 137127 + }, + { + "epoch": 1.65, + "grad_norm": 37.22212889213255, + "learning_rate": 1.5728143748845491e-06, + "loss": 0.9825, + "step": 137130 + }, + { + "epoch": 1.65, + "grad_norm": 12.403524593181077, + "learning_rate": 1.572499894466294e-06, + "loss": 1.3009, + "step": 137133 + }, + { + "epoch": 1.65, + "grad_norm": 21.550356942537853, + "learning_rate": 1.5721854428080596e-06, + "loss": 1.5195, + "step": 137136 + }, + { + "epoch": 1.65, + "grad_norm": 10.250004834413819, + "learning_rate": 1.5718710199109233e-06, + "loss": 0.7323, + "step": 137139 + }, + { + "epoch": 1.65, + "grad_norm": 116.75362746602104, + "learning_rate": 1.571556625775954e-06, + "loss": 0.9061, + "step": 137142 + }, + { + "epoch": 1.65, + "grad_norm": 9.76998953769428, + "learning_rate": 1.5712422604042254e-06, + "loss": 0.9829, + "step": 137145 + }, + { + "epoch": 1.65, + "grad_norm": 8.411103099924807, + "learning_rate": 1.5709279237968135e-06, + "loss": 1.1615, + "step": 137148 + }, + { + "epoch": 1.65, + "grad_norm": 11.655818219610206, + "learning_rate": 1.5706136159547857e-06, + "loss": 1.2242, + "step": 137151 + }, + { + "epoch": 1.65, + "grad_norm": 17.3435117159719, + "learning_rate": 1.5702993368792197e-06, + "loss": 0.9183, + "step": 137154 + }, + { + "epoch": 1.65, + "grad_norm": 6.836454265086713, + "learning_rate": 1.569985086571184e-06, + "loss": 1.2337, + "step": 137157 + }, + { + "epoch": 1.65, + "grad_norm": 21.47090330022325, + "learning_rate": 1.5696708650317528e-06, + "loss": 1.2497, + "step": 137160 + }, + { + "epoch": 1.65, + "grad_norm": 9.802270820393101, + "learning_rate": 1.569356672261998e-06, + "loss": 1.3354, + "step": 137163 + }, + { + "epoch": 1.65, + "grad_norm": 9.657023511873534, + "learning_rate": 1.5690425082629946e-06, + "loss": 1.2391, + "step": 137166 + }, + { + "epoch": 1.65, + "grad_norm": 6.592724581995989, + "learning_rate": 1.5687283730358115e-06, + "loss": 1.4652, + "step": 137169 + }, + { + "epoch": 1.65, + "grad_norm": 19.093051516693638, + "learning_rate": 1.5684142665815194e-06, + "loss": 1.287, + "step": 137172 + }, + { + "epoch": 1.65, + "grad_norm": 7.814390752744286, + "learning_rate": 1.5681001889011938e-06, + "loss": 1.0636, + "step": 137175 + }, + { + "epoch": 1.65, + "grad_norm": 5.208552344798499, + "learning_rate": 1.5677861399959037e-06, + "loss": 1.2244, + "step": 137178 + }, + { + "epoch": 1.65, + "grad_norm": 5.006858977714779, + "learning_rate": 1.5674721198667254e-06, + "loss": 1.0423, + "step": 137181 + }, + { + "epoch": 1.65, + "grad_norm": 11.36054334238104, + "learning_rate": 1.567158128514724e-06, + "loss": 1.204, + "step": 137184 + }, + { + "epoch": 1.65, + "grad_norm": 15.339540400245424, + "learning_rate": 1.566844165940975e-06, + "loss": 1.2046, + "step": 137187 + }, + { + "epoch": 1.65, + "grad_norm": 7.25773365117081, + "learning_rate": 1.5665302321465515e-06, + "loss": 1.398, + "step": 137190 + }, + { + "epoch": 1.65, + "grad_norm": 7.259676837332279, + "learning_rate": 1.5662163271325225e-06, + "loss": 1.2873, + "step": 137193 + }, + { + "epoch": 1.65, + "grad_norm": 5.323392238981017, + "learning_rate": 1.5659024508999576e-06, + "loss": 1.2694, + "step": 137196 + }, + { + "epoch": 1.65, + "grad_norm": 7.589790186457611, + "learning_rate": 1.565588603449929e-06, + "loss": 1.0735, + "step": 137199 + }, + { + "epoch": 1.65, + "grad_norm": 29.767505773093227, + "learning_rate": 1.5652747847835092e-06, + "loss": 1.1857, + "step": 137202 + }, + { + "epoch": 1.65, + "grad_norm": 7.176434550979767, + "learning_rate": 1.5649609949017674e-06, + "loss": 1.0246, + "step": 137205 + }, + { + "epoch": 1.65, + "grad_norm": 9.56156236284358, + "learning_rate": 1.5646472338057784e-06, + "loss": 0.9557, + "step": 137208 + }, + { + "epoch": 1.65, + "grad_norm": 4.059806442537471, + "learning_rate": 1.56433350149661e-06, + "loss": 1.3106, + "step": 137211 + }, + { + "epoch": 1.65, + "grad_norm": 7.706995366594296, + "learning_rate": 1.5640197979753302e-06, + "loss": 1.1103, + "step": 137214 + }, + { + "epoch": 1.65, + "grad_norm": 5.429257423091925, + "learning_rate": 1.563706123243013e-06, + "loss": 1.3194, + "step": 137217 + }, + { + "epoch": 1.65, + "grad_norm": 6.353246745693993, + "learning_rate": 1.5633924773007292e-06, + "loss": 1.133, + "step": 137220 + }, + { + "epoch": 1.65, + "grad_norm": 7.532695636371085, + "learning_rate": 1.563078860149546e-06, + "loss": 0.9643, + "step": 137223 + }, + { + "epoch": 1.65, + "grad_norm": 46.66474859740785, + "learning_rate": 1.562765271790536e-06, + "loss": 1.2273, + "step": 137226 + }, + { + "epoch": 1.65, + "grad_norm": 10.822488090021677, + "learning_rate": 1.5624517122247695e-06, + "loss": 1.4961, + "step": 137229 + }, + { + "epoch": 1.65, + "grad_norm": 12.500364657244416, + "learning_rate": 1.5621381814533176e-06, + "loss": 0.8095, + "step": 137232 + }, + { + "epoch": 1.65, + "grad_norm": 15.658794454231916, + "learning_rate": 1.5618246794772496e-06, + "loss": 1.2674, + "step": 137235 + }, + { + "epoch": 1.65, + "grad_norm": 15.990685144190207, + "learning_rate": 1.5615112062976313e-06, + "loss": 1.3631, + "step": 137238 + }, + { + "epoch": 1.65, + "grad_norm": 8.223410231397885, + "learning_rate": 1.5611977619155361e-06, + "loss": 1.1316, + "step": 137241 + }, + { + "epoch": 1.65, + "grad_norm": 2.587075592336652, + "learning_rate": 1.5608843463320332e-06, + "loss": 1.4625, + "step": 137244 + }, + { + "epoch": 1.65, + "grad_norm": 9.34078860246948, + "learning_rate": 1.5605709595481954e-06, + "loss": 1.1953, + "step": 137247 + }, + { + "epoch": 1.65, + "grad_norm": 5.231964681224518, + "learning_rate": 1.5602576015650861e-06, + "loss": 1.4233, + "step": 137250 + }, + { + "epoch": 1.65, + "grad_norm": 9.40743345110506, + "learning_rate": 1.5599442723837798e-06, + "loss": 1.2499, + "step": 137253 + }, + { + "epoch": 1.65, + "grad_norm": 6.458856275906712, + "learning_rate": 1.5596309720053416e-06, + "loss": 1.122, + "step": 137256 + }, + { + "epoch": 1.65, + "grad_norm": 27.388950931633133, + "learning_rate": 1.5593177004308423e-06, + "loss": 1.2802, + "step": 137259 + }, + { + "epoch": 1.65, + "grad_norm": 8.317772920811885, + "learning_rate": 1.5590044576613538e-06, + "loss": 0.9952, + "step": 137262 + }, + { + "epoch": 1.65, + "grad_norm": 15.00524487435879, + "learning_rate": 1.5586912436979406e-06, + "loss": 1.0532, + "step": 137265 + }, + { + "epoch": 1.65, + "grad_norm": 8.33883294894172, + "learning_rate": 1.5583780585416742e-06, + "loss": 0.9567, + "step": 137268 + }, + { + "epoch": 1.65, + "grad_norm": 5.251022052395151, + "learning_rate": 1.5580649021936213e-06, + "loss": 0.7839, + "step": 137271 + }, + { + "epoch": 1.65, + "grad_norm": 5.768269193539477, + "learning_rate": 1.5577517746548555e-06, + "loss": 1.4402, + "step": 137274 + }, + { + "epoch": 1.65, + "grad_norm": 5.38908600878878, + "learning_rate": 1.5574386759264416e-06, + "loss": 0.978, + "step": 137277 + }, + { + "epoch": 1.65, + "grad_norm": 10.365336616769092, + "learning_rate": 1.5571256060094463e-06, + "loss": 1.3343, + "step": 137280 + }, + { + "epoch": 1.65, + "grad_norm": 4.393687178329557, + "learning_rate": 1.5568125649049405e-06, + "loss": 1.2776, + "step": 137283 + }, + { + "epoch": 1.65, + "grad_norm": 3.9249080112664276, + "learning_rate": 1.5564995526139914e-06, + "loss": 0.8518, + "step": 137286 + }, + { + "epoch": 1.65, + "grad_norm": 2.1697690695458847, + "learning_rate": 1.556186569137671e-06, + "loss": 1.3926, + "step": 137289 + }, + { + "epoch": 1.65, + "grad_norm": 8.00928224949516, + "learning_rate": 1.5558736144770426e-06, + "loss": 1.1584, + "step": 137292 + }, + { + "epoch": 1.65, + "grad_norm": 16.316812755862454, + "learning_rate": 1.5555606886331776e-06, + "loss": 0.9271, + "step": 137295 + }, + { + "epoch": 1.65, + "grad_norm": 2.543458444616181, + "learning_rate": 1.5552477916071397e-06, + "loss": 1.2225, + "step": 137298 + }, + { + "epoch": 1.65, + "grad_norm": 5.547628080272942, + "learning_rate": 1.5549349234000023e-06, + "loss": 0.8803, + "step": 137301 + }, + { + "epoch": 1.65, + "grad_norm": 3.4988918208221347, + "learning_rate": 1.5546220840128268e-06, + "loss": 1.1064, + "step": 137304 + }, + { + "epoch": 1.65, + "grad_norm": 21.816635552707194, + "learning_rate": 1.5543092734466847e-06, + "loss": 1.2446, + "step": 137307 + }, + { + "epoch": 1.65, + "grad_norm": 9.68364311696007, + "learning_rate": 1.5539964917026428e-06, + "loss": 1.1111, + "step": 137310 + }, + { + "epoch": 1.65, + "grad_norm": 6.45984248210005, + "learning_rate": 1.5536837387817694e-06, + "loss": 0.9708, + "step": 137313 + }, + { + "epoch": 1.65, + "grad_norm": 5.945383445203444, + "learning_rate": 1.5533710146851323e-06, + "loss": 0.9546, + "step": 137316 + }, + { + "epoch": 1.65, + "grad_norm": 5.502901296636721, + "learning_rate": 1.5530583194137981e-06, + "loss": 1.3899, + "step": 137319 + }, + { + "epoch": 1.65, + "grad_norm": 6.38091368773282, + "learning_rate": 1.5527456529688312e-06, + "loss": 1.2471, + "step": 137322 + }, + { + "epoch": 1.65, + "grad_norm": 10.812138590631436, + "learning_rate": 1.5524330153513e-06, + "loss": 1.0725, + "step": 137325 + }, + { + "epoch": 1.65, + "grad_norm": 11.348166957045372, + "learning_rate": 1.552120406562274e-06, + "loss": 1.0121, + "step": 137328 + }, + { + "epoch": 1.65, + "grad_norm": 7.625764085750003, + "learning_rate": 1.551807826602817e-06, + "loss": 1.1663, + "step": 137331 + }, + { + "epoch": 1.65, + "grad_norm": 7.377717412682732, + "learning_rate": 1.5514952754739965e-06, + "loss": 0.988, + "step": 137334 + }, + { + "epoch": 1.65, + "grad_norm": 16.825520511700798, + "learning_rate": 1.5511827531768796e-06, + "loss": 0.9923, + "step": 137337 + }, + { + "epoch": 1.65, + "grad_norm": 7.799418495936662, + "learning_rate": 1.5508702597125348e-06, + "loss": 1.2218, + "step": 137340 + }, + { + "epoch": 1.65, + "grad_norm": 19.10076464830653, + "learning_rate": 1.5505577950820262e-06, + "loss": 1.1644, + "step": 137343 + }, + { + "epoch": 1.65, + "grad_norm": 6.089212405038978, + "learning_rate": 1.5502453592864186e-06, + "loss": 1.24, + "step": 137346 + }, + { + "epoch": 1.65, + "grad_norm": 17.439686566718596, + "learning_rate": 1.5499329523267792e-06, + "loss": 0.8522, + "step": 137349 + }, + { + "epoch": 1.65, + "grad_norm": 4.471188936922116, + "learning_rate": 1.5496205742041759e-06, + "loss": 1.3083, + "step": 137352 + }, + { + "epoch": 1.65, + "grad_norm": 5.1825144494414515, + "learning_rate": 1.5493082249196757e-06, + "loss": 1.0563, + "step": 137355 + }, + { + "epoch": 1.65, + "grad_norm": 7.134035835515663, + "learning_rate": 1.5489959044743396e-06, + "loss": 1.2403, + "step": 137358 + }, + { + "epoch": 1.65, + "grad_norm": 10.429319446778438, + "learning_rate": 1.5486836128692396e-06, + "loss": 1.4237, + "step": 137361 + }, + { + "epoch": 1.65, + "grad_norm": 12.201962270883552, + "learning_rate": 1.5483713501054365e-06, + "loss": 1.4984, + "step": 137364 + }, + { + "epoch": 1.65, + "grad_norm": 5.366411114429943, + "learning_rate": 1.5480591161839965e-06, + "loss": 1.469, + "step": 137367 + }, + { + "epoch": 1.65, + "grad_norm": 13.591335688659886, + "learning_rate": 1.5477469111059895e-06, + "loss": 1.4093, + "step": 137370 + }, + { + "epoch": 1.65, + "grad_norm": 9.660854690201317, + "learning_rate": 1.547434734872475e-06, + "loss": 0.7217, + "step": 137373 + }, + { + "epoch": 1.65, + "grad_norm": 6.300595030999252, + "learning_rate": 1.547122587484521e-06, + "loss": 1.3002, + "step": 137376 + }, + { + "epoch": 1.65, + "grad_norm": 4.880450199912699, + "learning_rate": 1.5468104689431928e-06, + "loss": 1.2978, + "step": 137379 + }, + { + "epoch": 1.65, + "grad_norm": 7.8604324178588385, + "learning_rate": 1.546498379249559e-06, + "loss": 1.1835, + "step": 137382 + }, + { + "epoch": 1.65, + "grad_norm": 8.052813109744543, + "learning_rate": 1.54618631840468e-06, + "loss": 1.2469, + "step": 137385 + }, + { + "epoch": 1.65, + "grad_norm": 9.573302909887518, + "learning_rate": 1.545874286409621e-06, + "loss": 1.2511, + "step": 137388 + }, + { + "epoch": 1.65, + "grad_norm": 11.454218185834028, + "learning_rate": 1.5455622832654472e-06, + "loss": 1.2188, + "step": 137391 + }, + { + "epoch": 1.65, + "grad_norm": 13.956305517710279, + "learning_rate": 1.5452503089732241e-06, + "loss": 1.06, + "step": 137394 + }, + { + "epoch": 1.65, + "grad_norm": 5.060683791669944, + "learning_rate": 1.5449383635340187e-06, + "loss": 1.0849, + "step": 137397 + }, + { + "epoch": 1.65, + "grad_norm": 10.140598487128932, + "learning_rate": 1.5446264469488904e-06, + "loss": 1.0504, + "step": 137400 + }, + { + "epoch": 1.65, + "grad_norm": 10.705515256297158, + "learning_rate": 1.544314559218909e-06, + "loss": 1.3773, + "step": 137403 + }, + { + "epoch": 1.65, + "grad_norm": 5.255122298247942, + "learning_rate": 1.5440027003451342e-06, + "loss": 1.2496, + "step": 137406 + }, + { + "epoch": 1.65, + "grad_norm": 16.33614275115458, + "learning_rate": 1.5436908703286347e-06, + "loss": 0.9209, + "step": 137409 + }, + { + "epoch": 1.65, + "grad_norm": 7.559529181795622, + "learning_rate": 1.5433790691704697e-06, + "loss": 1.0256, + "step": 137412 + }, + { + "epoch": 1.65, + "grad_norm": 7.663669205753986, + "learning_rate": 1.5430672968717064e-06, + "loss": 1.1004, + "step": 137415 + }, + { + "epoch": 1.65, + "grad_norm": 45.26780475665654, + "learning_rate": 1.5427555534334072e-06, + "loss": 1.5711, + "step": 137418 + }, + { + "epoch": 1.65, + "grad_norm": 13.15178889385097, + "learning_rate": 1.5424438388566377e-06, + "loss": 1.352, + "step": 137421 + }, + { + "epoch": 1.65, + "grad_norm": 4.876984774804774, + "learning_rate": 1.5421321531424638e-06, + "loss": 1.015, + "step": 137424 + }, + { + "epoch": 1.65, + "grad_norm": 6.876296247914973, + "learning_rate": 1.5418204962919458e-06, + "loss": 1.3094, + "step": 137427 + }, + { + "epoch": 1.65, + "grad_norm": 6.144347732867757, + "learning_rate": 1.5415088683061452e-06, + "loss": 1.2951, + "step": 137430 + }, + { + "epoch": 1.65, + "grad_norm": 3.91288688777854, + "learning_rate": 1.5411972691861288e-06, + "loss": 1.1724, + "step": 137433 + }, + { + "epoch": 1.65, + "grad_norm": 19.16832429486678, + "learning_rate": 1.5408856989329612e-06, + "loss": 0.9436, + "step": 137436 + }, + { + "epoch": 1.65, + "grad_norm": 8.146910296397087, + "learning_rate": 1.5405741575477017e-06, + "loss": 1.023, + "step": 137439 + }, + { + "epoch": 1.65, + "grad_norm": 11.631636386441281, + "learning_rate": 1.5402626450314152e-06, + "loss": 1.1911, + "step": 137442 + }, + { + "epoch": 1.65, + "grad_norm": 2.904228187484674, + "learning_rate": 1.539951161385167e-06, + "loss": 1.11, + "step": 137445 + }, + { + "epoch": 1.65, + "grad_norm": 13.12250175206982, + "learning_rate": 1.5396397066100167e-06, + "loss": 1.1615, + "step": 137448 + }, + { + "epoch": 1.65, + "grad_norm": 5.992555016817983, + "learning_rate": 1.5393282807070308e-06, + "loss": 1.5087, + "step": 137451 + }, + { + "epoch": 1.65, + "grad_norm": 7.602579011446611, + "learning_rate": 1.5390168836772678e-06, + "loss": 1.0987, + "step": 137454 + }, + { + "epoch": 1.65, + "grad_norm": 13.583522831969251, + "learning_rate": 1.5387055155217922e-06, + "loss": 1.1864, + "step": 137457 + }, + { + "epoch": 1.65, + "grad_norm": 3.974497537297372, + "learning_rate": 1.5383941762416666e-06, + "loss": 0.9833, + "step": 137460 + }, + { + "epoch": 1.65, + "grad_norm": 6.084603628237862, + "learning_rate": 1.5380828658379565e-06, + "loss": 1.4788, + "step": 137463 + }, + { + "epoch": 1.65, + "grad_norm": 4.8817639448880685, + "learning_rate": 1.537771584311719e-06, + "loss": 1.1557, + "step": 137466 + }, + { + "epoch": 1.65, + "grad_norm": 5.615124915936367, + "learning_rate": 1.5374603316640225e-06, + "loss": 1.5557, + "step": 137469 + }, + { + "epoch": 1.65, + "grad_norm": 17.85703338216646, + "learning_rate": 1.5371491078959222e-06, + "loss": 1.1692, + "step": 137472 + }, + { + "epoch": 1.65, + "grad_norm": 6.1569070467303835, + "learning_rate": 1.5368379130084843e-06, + "loss": 1.0607, + "step": 137475 + }, + { + "epoch": 1.65, + "grad_norm": 10.220545148593004, + "learning_rate": 1.5365267470027723e-06, + "loss": 1.374, + "step": 137478 + }, + { + "epoch": 1.65, + "grad_norm": 6.556341276853669, + "learning_rate": 1.5362156098798432e-06, + "loss": 1.0369, + "step": 137481 + }, + { + "epoch": 1.65, + "grad_norm": 6.852677140394734, + "learning_rate": 1.5359045016407615e-06, + "loss": 1.2338, + "step": 137484 + }, + { + "epoch": 1.65, + "grad_norm": 4.756120364867284, + "learning_rate": 1.535593422286592e-06, + "loss": 1.0899, + "step": 137487 + }, + { + "epoch": 1.65, + "grad_norm": 6.797220594921697, + "learning_rate": 1.535282371818393e-06, + "loss": 1.3403, + "step": 137490 + }, + { + "epoch": 1.65, + "grad_norm": 8.553488797749683, + "learning_rate": 1.5349713502372242e-06, + "loss": 1.3904, + "step": 137493 + }, + { + "epoch": 1.65, + "grad_norm": 7.500835938261904, + "learning_rate": 1.5346603575441488e-06, + "loss": 1.3346, + "step": 137496 + }, + { + "epoch": 1.65, + "grad_norm": 16.93407945035098, + "learning_rate": 1.5343493937402287e-06, + "loss": 1.0358, + "step": 137499 + }, + { + "epoch": 1.65, + "grad_norm": 10.443254456908912, + "learning_rate": 1.5340384588265244e-06, + "loss": 1.1823, + "step": 137502 + }, + { + "epoch": 1.65, + "grad_norm": 10.347033603366606, + "learning_rate": 1.5337275528041007e-06, + "loss": 1.3678, + "step": 137505 + }, + { + "epoch": 1.65, + "grad_norm": 8.63811217752659, + "learning_rate": 1.5334166756740121e-06, + "loss": 1.2309, + "step": 137508 + }, + { + "epoch": 1.65, + "grad_norm": 9.774372746493674, + "learning_rate": 1.5331058274373244e-06, + "loss": 1.202, + "step": 137511 + }, + { + "epoch": 1.65, + "grad_norm": 3.8620248441499987, + "learning_rate": 1.532795008095096e-06, + "loss": 1.2251, + "step": 137514 + }, + { + "epoch": 1.65, + "grad_norm": 6.3501290645809085, + "learning_rate": 1.5324842176483901e-06, + "loss": 1.0373, + "step": 137517 + }, + { + "epoch": 1.65, + "grad_norm": 15.755592219238274, + "learning_rate": 1.5321734560982626e-06, + "loss": 1.1088, + "step": 137520 + }, + { + "epoch": 1.65, + "grad_norm": 6.576254711882636, + "learning_rate": 1.5318627234457772e-06, + "loss": 1.2313, + "step": 137523 + }, + { + "epoch": 1.65, + "grad_norm": 6.3009317797191855, + "learning_rate": 1.5315520196919941e-06, + "loss": 1.2043, + "step": 137526 + }, + { + "epoch": 1.65, + "grad_norm": 3.56364027184232, + "learning_rate": 1.531241344837977e-06, + "loss": 1.2241, + "step": 137529 + }, + { + "epoch": 1.65, + "grad_norm": 6.899377695578818, + "learning_rate": 1.5309306988847816e-06, + "loss": 1.1986, + "step": 137532 + }, + { + "epoch": 1.65, + "grad_norm": 7.1246335751098675, + "learning_rate": 1.5306200818334672e-06, + "loss": 1.0421, + "step": 137535 + }, + { + "epoch": 1.65, + "grad_norm": 20.493618786665447, + "learning_rate": 1.530309493685096e-06, + "loss": 1.3359, + "step": 137538 + }, + { + "epoch": 1.65, + "grad_norm": 5.263493512763238, + "learning_rate": 1.5299989344407272e-06, + "loss": 1.1028, + "step": 137541 + }, + { + "epoch": 1.65, + "grad_norm": 7.506644020345023, + "learning_rate": 1.5296884041014248e-06, + "loss": 1.4231, + "step": 137544 + }, + { + "epoch": 1.65, + "grad_norm": 6.280117749623435, + "learning_rate": 1.529377902668241e-06, + "loss": 1.2067, + "step": 137547 + }, + { + "epoch": 1.65, + "grad_norm": 27.876415168224256, + "learning_rate": 1.5290674301422402e-06, + "loss": 0.9504, + "step": 137550 + }, + { + "epoch": 1.65, + "grad_norm": 10.659435841417283, + "learning_rate": 1.5287569865244833e-06, + "loss": 1.2976, + "step": 137553 + }, + { + "epoch": 1.65, + "grad_norm": 3.257034500771301, + "learning_rate": 1.5284465718160247e-06, + "loss": 1.0332, + "step": 137556 + }, + { + "epoch": 1.65, + "grad_norm": 5.319118493795041, + "learning_rate": 1.528136186017929e-06, + "loss": 0.8303, + "step": 137559 + }, + { + "epoch": 1.65, + "grad_norm": 14.517501783702295, + "learning_rate": 1.527825829131251e-06, + "loss": 1.2166, + "step": 137562 + }, + { + "epoch": 1.65, + "grad_norm": 4.348114425064953, + "learning_rate": 1.527515501157052e-06, + "loss": 0.9489, + "step": 137565 + }, + { + "epoch": 1.65, + "grad_norm": 10.398010414989894, + "learning_rate": 1.5272052020963902e-06, + "loss": 1.3507, + "step": 137568 + }, + { + "epoch": 1.65, + "grad_norm": 4.416833205395922, + "learning_rate": 1.5268949319503279e-06, + "loss": 0.9895, + "step": 137571 + }, + { + "epoch": 1.65, + "grad_norm": 22.8115148442782, + "learning_rate": 1.5265846907199177e-06, + "loss": 0.9198, + "step": 137574 + }, + { + "epoch": 1.65, + "grad_norm": 18.522687208527262, + "learning_rate": 1.5262744784062256e-06, + "loss": 1.3755, + "step": 137577 + }, + { + "epoch": 1.65, + "grad_norm": 7.056254790568845, + "learning_rate": 1.5259642950103038e-06, + "loss": 1.0074, + "step": 137580 + }, + { + "epoch": 1.65, + "grad_norm": 11.906064389650409, + "learning_rate": 1.5256541405332127e-06, + "loss": 1.0141, + "step": 137583 + }, + { + "epoch": 1.65, + "grad_norm": 17.95684765400385, + "learning_rate": 1.5253440149760145e-06, + "loss": 1.1412, + "step": 137586 + }, + { + "epoch": 1.65, + "grad_norm": 4.26753049443665, + "learning_rate": 1.5250339183397634e-06, + "loss": 1.3266, + "step": 137589 + }, + { + "epoch": 1.65, + "grad_norm": 7.526371843928337, + "learning_rate": 1.5247238506255168e-06, + "loss": 1.2948, + "step": 137592 + }, + { + "epoch": 1.65, + "grad_norm": 4.8139711607035025, + "learning_rate": 1.5244138118343389e-06, + "loss": 1.0329, + "step": 137595 + }, + { + "epoch": 1.65, + "grad_norm": 46.64516248553189, + "learning_rate": 1.5241038019672827e-06, + "loss": 1.2533, + "step": 137598 + }, + { + "epoch": 1.65, + "grad_norm": 7.9661735126549145, + "learning_rate": 1.5237938210254055e-06, + "loss": 1.1886, + "step": 137601 + }, + { + "epoch": 1.65, + "grad_norm": 4.609279704167124, + "learning_rate": 1.5234838690097664e-06, + "loss": 1.2068, + "step": 137604 + }, + { + "epoch": 1.65, + "grad_norm": 14.420109131371518, + "learning_rate": 1.5231739459214246e-06, + "loss": 1.1962, + "step": 137607 + }, + { + "epoch": 1.65, + "grad_norm": 9.370273445094298, + "learning_rate": 1.5228640517614356e-06, + "loss": 0.753, + "step": 137610 + }, + { + "epoch": 1.65, + "grad_norm": 5.318366689823363, + "learning_rate": 1.5225541865308613e-06, + "loss": 1.3993, + "step": 137613 + }, + { + "epoch": 1.65, + "grad_norm": 5.2564748678758955, + "learning_rate": 1.5222443502307527e-06, + "loss": 0.8993, + "step": 137616 + }, + { + "epoch": 1.65, + "grad_norm": 3.259232485612226, + "learning_rate": 1.5219345428621734e-06, + "loss": 1.1757, + "step": 137619 + }, + { + "epoch": 1.65, + "grad_norm": 5.695749376250289, + "learning_rate": 1.5216247644261762e-06, + "loss": 1.0307, + "step": 137622 + }, + { + "epoch": 1.65, + "grad_norm": 100.3476044884247, + "learning_rate": 1.521315014923821e-06, + "loss": 1.4515, + "step": 137625 + }, + { + "epoch": 1.65, + "grad_norm": 53.03999125633026, + "learning_rate": 1.5210052943561615e-06, + "loss": 1.3007, + "step": 137628 + }, + { + "epoch": 1.65, + "grad_norm": 6.045002601646611, + "learning_rate": 1.520695602724257e-06, + "loss": 1.2478, + "step": 137631 + }, + { + "epoch": 1.66, + "grad_norm": 6.4041712111332645, + "learning_rate": 1.5203859400291643e-06, + "loss": 0.9951, + "step": 137634 + }, + { + "epoch": 1.66, + "grad_norm": 7.269774359578231, + "learning_rate": 1.5200763062719426e-06, + "loss": 1.0552, + "step": 137637 + }, + { + "epoch": 1.66, + "grad_norm": 13.393525246427666, + "learning_rate": 1.5197667014536455e-06, + "loss": 1.2058, + "step": 137640 + }, + { + "epoch": 1.66, + "grad_norm": 4.357757416676742, + "learning_rate": 1.5194571255753277e-06, + "loss": 1.0165, + "step": 137643 + }, + { + "epoch": 1.66, + "grad_norm": 11.043091092961964, + "learning_rate": 1.5191475786380483e-06, + "loss": 0.9558, + "step": 137646 + }, + { + "epoch": 1.66, + "grad_norm": 7.003363424760552, + "learning_rate": 1.5188380606428632e-06, + "loss": 1.1973, + "step": 137649 + }, + { + "epoch": 1.66, + "grad_norm": 9.384374563409073, + "learning_rate": 1.5185285715908315e-06, + "loss": 0.9878, + "step": 137652 + }, + { + "epoch": 1.66, + "grad_norm": 13.882341594682991, + "learning_rate": 1.5182191114830037e-06, + "loss": 1.1611, + "step": 137655 + }, + { + "epoch": 1.66, + "grad_norm": 13.048560934047996, + "learning_rate": 1.5179096803204396e-06, + "loss": 1.211, + "step": 137658 + }, + { + "epoch": 1.66, + "grad_norm": 7.620931900274906, + "learning_rate": 1.5176002781041976e-06, + "loss": 1.1941, + "step": 137661 + }, + { + "epoch": 1.66, + "grad_norm": 11.692409122328632, + "learning_rate": 1.5172909048353268e-06, + "loss": 1.1551, + "step": 137664 + }, + { + "epoch": 1.66, + "grad_norm": 8.37214431439323, + "learning_rate": 1.5169815605148896e-06, + "loss": 1.1579, + "step": 137667 + }, + { + "epoch": 1.66, + "grad_norm": 5.630568216564398, + "learning_rate": 1.5166722451439363e-06, + "loss": 1.3217, + "step": 137670 + }, + { + "epoch": 1.66, + "grad_norm": 2.9794191013357434, + "learning_rate": 1.516362958723525e-06, + "loss": 1.0545, + "step": 137673 + }, + { + "epoch": 1.66, + "grad_norm": 19.461854040128426, + "learning_rate": 1.5160537012547115e-06, + "loss": 1.0947, + "step": 137676 + }, + { + "epoch": 1.66, + "grad_norm": 11.162618826898465, + "learning_rate": 1.515744472738553e-06, + "loss": 1.2655, + "step": 137679 + }, + { + "epoch": 1.66, + "grad_norm": 7.633809320377823, + "learning_rate": 1.5154352731761025e-06, + "loss": 1.1689, + "step": 137682 + }, + { + "epoch": 1.66, + "grad_norm": 9.061446684334854, + "learning_rate": 1.5151261025684128e-06, + "loss": 0.9856, + "step": 137685 + }, + { + "epoch": 1.66, + "grad_norm": 4.686875017769911, + "learning_rate": 1.514816960916542e-06, + "loss": 1.0777, + "step": 137688 + }, + { + "epoch": 1.66, + "grad_norm": 29.600138777303886, + "learning_rate": 1.5145078482215435e-06, + "loss": 1.3471, + "step": 137691 + }, + { + "epoch": 1.66, + "grad_norm": 8.691596903008097, + "learning_rate": 1.5141987644844768e-06, + "loss": 1.1057, + "step": 137694 + }, + { + "epoch": 1.66, + "grad_norm": 10.917238975982485, + "learning_rate": 1.5138897097063898e-06, + "loss": 1.2933, + "step": 137697 + }, + { + "epoch": 1.66, + "grad_norm": 10.957657798705212, + "learning_rate": 1.5135806838883416e-06, + "loss": 1.0252, + "step": 137700 + }, + { + "epoch": 1.66, + "grad_norm": 11.449833069571335, + "learning_rate": 1.5132716870313869e-06, + "loss": 1.4543, + "step": 137703 + }, + { + "epoch": 1.66, + "grad_norm": 5.135206607348477, + "learning_rate": 1.5129627191365804e-06, + "loss": 1.415, + "step": 137706 + }, + { + "epoch": 1.66, + "grad_norm": 4.0324461570340855, + "learning_rate": 1.5126537802049712e-06, + "loss": 1.0437, + "step": 137709 + }, + { + "epoch": 1.66, + "grad_norm": 7.989363274081006, + "learning_rate": 1.5123448702376186e-06, + "loss": 0.9467, + "step": 137712 + }, + { + "epoch": 1.66, + "grad_norm": 6.99631966076695, + "learning_rate": 1.5120359892355752e-06, + "loss": 0.9251, + "step": 137715 + }, + { + "epoch": 1.66, + "grad_norm": 8.251776851451275, + "learning_rate": 1.5117271371998965e-06, + "loss": 1.0, + "step": 137718 + }, + { + "epoch": 1.66, + "grad_norm": 8.200212682919533, + "learning_rate": 1.5114183141316374e-06, + "loss": 1.3223, + "step": 137721 + }, + { + "epoch": 1.66, + "grad_norm": 6.5870609822769035, + "learning_rate": 1.5111095200318493e-06, + "loss": 1.2859, + "step": 137724 + }, + { + "epoch": 1.66, + "grad_norm": 12.339673816783206, + "learning_rate": 1.5108007549015846e-06, + "loss": 1.1747, + "step": 137727 + }, + { + "epoch": 1.66, + "grad_norm": 8.498188241922373, + "learning_rate": 1.5104920187419003e-06, + "loss": 1.034, + "step": 137730 + }, + { + "epoch": 1.66, + "grad_norm": 12.784829545252823, + "learning_rate": 1.51018331155385e-06, + "loss": 1.3208, + "step": 137733 + }, + { + "epoch": 1.66, + "grad_norm": 5.397118222464555, + "learning_rate": 1.509874633338484e-06, + "loss": 1.296, + "step": 137736 + }, + { + "epoch": 1.66, + "grad_norm": 6.299270933834643, + "learning_rate": 1.509565984096858e-06, + "loss": 1.2384, + "step": 137739 + }, + { + "epoch": 1.66, + "grad_norm": 8.289025831875533, + "learning_rate": 1.5092573638300244e-06, + "loss": 1.1916, + "step": 137742 + }, + { + "epoch": 1.66, + "grad_norm": 10.512656998949874, + "learning_rate": 1.5089487725390405e-06, + "loss": 1.2612, + "step": 137745 + }, + { + "epoch": 1.66, + "grad_norm": 3.162935631240012, + "learning_rate": 1.5086402102249552e-06, + "loss": 1.1504, + "step": 137748 + }, + { + "epoch": 1.66, + "grad_norm": 16.253531496777153, + "learning_rate": 1.50833167688882e-06, + "loss": 1.3835, + "step": 137751 + }, + { + "epoch": 1.66, + "grad_norm": 12.892718392678157, + "learning_rate": 1.5080231725316908e-06, + "loss": 1.1962, + "step": 137754 + }, + { + "epoch": 1.66, + "grad_norm": 13.454479383688094, + "learning_rate": 1.5077146971546187e-06, + "loss": 1.3434, + "step": 137757 + }, + { + "epoch": 1.66, + "grad_norm": 16.94376830350298, + "learning_rate": 1.5074062507586607e-06, + "loss": 1.0493, + "step": 137760 + }, + { + "epoch": 1.66, + "grad_norm": 18.61260079929812, + "learning_rate": 1.5070978333448627e-06, + "loss": 1.1128, + "step": 137763 + }, + { + "epoch": 1.66, + "grad_norm": 21.4086805091428, + "learning_rate": 1.506789444914284e-06, + "loss": 0.8656, + "step": 137766 + }, + { + "epoch": 1.66, + "grad_norm": 9.438487532896023, + "learning_rate": 1.5064810854679713e-06, + "loss": 1.3695, + "step": 137769 + }, + { + "epoch": 1.66, + "grad_norm": 5.428959577064298, + "learning_rate": 1.5061727550069783e-06, + "loss": 1.2395, + "step": 137772 + }, + { + "epoch": 1.66, + "grad_norm": 4.396996094325124, + "learning_rate": 1.5058644535323619e-06, + "loss": 1.2701, + "step": 137775 + }, + { + "epoch": 1.66, + "grad_norm": 2.7186048268661853, + "learning_rate": 1.505556181045167e-06, + "loss": 1.1083, + "step": 137778 + }, + { + "epoch": 1.66, + "grad_norm": 5.894287493707841, + "learning_rate": 1.5052479375464501e-06, + "loss": 1.1008, + "step": 137781 + }, + { + "epoch": 1.66, + "grad_norm": 31.107974918727137, + "learning_rate": 1.504939723037262e-06, + "loss": 1.1708, + "step": 137784 + }, + { + "epoch": 1.66, + "grad_norm": 7.247368589557125, + "learning_rate": 1.504631537518657e-06, + "loss": 0.909, + "step": 137787 + }, + { + "epoch": 1.66, + "grad_norm": 14.112076836045334, + "learning_rate": 1.5043233809916846e-06, + "loss": 1.1813, + "step": 137790 + }, + { + "epoch": 1.66, + "grad_norm": 3.4281995341419487, + "learning_rate": 1.5040152534573936e-06, + "loss": 1.057, + "step": 137793 + }, + { + "epoch": 1.66, + "grad_norm": 5.444230987573665, + "learning_rate": 1.503707154916838e-06, + "loss": 1.0177, + "step": 137796 + }, + { + "epoch": 1.66, + "grad_norm": 10.489560511069515, + "learning_rate": 1.5033990853710701e-06, + "loss": 0.92, + "step": 137799 + }, + { + "epoch": 1.66, + "grad_norm": 10.46061433043992, + "learning_rate": 1.5030910448211434e-06, + "loss": 1.0254, + "step": 137802 + }, + { + "epoch": 1.66, + "grad_norm": 9.646722059966711, + "learning_rate": 1.5027830332681037e-06, + "loss": 1.4188, + "step": 137805 + }, + { + "epoch": 1.66, + "grad_norm": 13.229947899769707, + "learning_rate": 1.502475050713007e-06, + "loss": 1.086, + "step": 137808 + }, + { + "epoch": 1.66, + "grad_norm": 10.8072243724104, + "learning_rate": 1.5021670971569003e-06, + "loss": 1.0915, + "step": 137811 + }, + { + "epoch": 1.66, + "grad_norm": 10.52464539998244, + "learning_rate": 1.501859172600838e-06, + "loss": 0.9961, + "step": 137814 + }, + { + "epoch": 1.66, + "grad_norm": 5.644943202322072, + "learning_rate": 1.5015512770458674e-06, + "loss": 1.2819, + "step": 137817 + }, + { + "epoch": 1.66, + "grad_norm": 5.888911677378409, + "learning_rate": 1.5012434104930408e-06, + "loss": 1.2949, + "step": 137820 + }, + { + "epoch": 1.66, + "grad_norm": 5.653686188862738, + "learning_rate": 1.5009355729434094e-06, + "loss": 1.0116, + "step": 137823 + }, + { + "epoch": 1.66, + "grad_norm": 4.186864967584106, + "learning_rate": 1.5006277643980237e-06, + "loss": 1.0433, + "step": 137826 + }, + { + "epoch": 1.66, + "grad_norm": 4.9583773730668526, + "learning_rate": 1.5003199848579363e-06, + "loss": 1.0208, + "step": 137829 + }, + { + "epoch": 1.66, + "grad_norm": 13.347678208995385, + "learning_rate": 1.5000122343241952e-06, + "loss": 1.0422, + "step": 137832 + }, + { + "epoch": 1.66, + "grad_norm": 7.979648751282972, + "learning_rate": 1.4997045127978482e-06, + "loss": 1.1726, + "step": 137835 + }, + { + "epoch": 1.66, + "grad_norm": 16.03973923282976, + "learning_rate": 1.499396820279948e-06, + "loss": 1.055, + "step": 137838 + }, + { + "epoch": 1.66, + "grad_norm": 10.959650502543594, + "learning_rate": 1.499089156771547e-06, + "loss": 1.1683, + "step": 137841 + }, + { + "epoch": 1.66, + "grad_norm": 7.044316454413311, + "learning_rate": 1.4987815222736902e-06, + "loss": 1.0125, + "step": 137844 + }, + { + "epoch": 1.66, + "grad_norm": 5.483057221135289, + "learning_rate": 1.4984739167874308e-06, + "loss": 1.336, + "step": 137847 + }, + { + "epoch": 1.66, + "grad_norm": 8.492989529215109, + "learning_rate": 1.4981663403138191e-06, + "loss": 1.5617, + "step": 137850 + }, + { + "epoch": 1.66, + "grad_norm": 5.205428640454887, + "learning_rate": 1.4978587928539012e-06, + "loss": 1.2926, + "step": 137853 + }, + { + "epoch": 1.66, + "grad_norm": 3.6209599412467064, + "learning_rate": 1.4975512744087306e-06, + "loss": 1.1436, + "step": 137856 + }, + { + "epoch": 1.66, + "grad_norm": 8.194086377429599, + "learning_rate": 1.497243784979353e-06, + "loss": 0.71, + "step": 137859 + }, + { + "epoch": 1.66, + "grad_norm": 8.220159729662921, + "learning_rate": 1.4969363245668201e-06, + "loss": 0.9432, + "step": 137862 + }, + { + "epoch": 1.66, + "grad_norm": 2.2071824291270383, + "learning_rate": 1.4966288931721794e-06, + "loss": 1.0724, + "step": 137865 + }, + { + "epoch": 1.66, + "grad_norm": 8.831138390553669, + "learning_rate": 1.4963214907964852e-06, + "loss": 0.8966, + "step": 137868 + }, + { + "epoch": 1.66, + "grad_norm": 3.8882331402702546, + "learning_rate": 1.4960141174407794e-06, + "loss": 0.9777, + "step": 137871 + }, + { + "epoch": 1.66, + "grad_norm": 24.16818636619254, + "learning_rate": 1.495706773106118e-06, + "loss": 1.0269, + "step": 137874 + }, + { + "epoch": 1.66, + "grad_norm": 4.906914442866948, + "learning_rate": 1.4953994577935428e-06, + "loss": 1.1544, + "step": 137877 + }, + { + "epoch": 1.66, + "grad_norm": 8.211739052407998, + "learning_rate": 1.4950921715041055e-06, + "loss": 1.2021, + "step": 137880 + }, + { + "epoch": 1.66, + "grad_norm": 15.7015603029942, + "learning_rate": 1.4947849142388582e-06, + "loss": 0.94, + "step": 137883 + }, + { + "epoch": 1.66, + "grad_norm": 3.480758504010541, + "learning_rate": 1.4944776859988442e-06, + "loss": 1.344, + "step": 137886 + }, + { + "epoch": 1.66, + "grad_norm": 7.4769204574150026, + "learning_rate": 1.4941704867851137e-06, + "loss": 0.9023, + "step": 137889 + }, + { + "epoch": 1.66, + "grad_norm": 10.490275482415196, + "learning_rate": 1.4938633165987182e-06, + "loss": 1.3805, + "step": 137892 + }, + { + "epoch": 1.66, + "grad_norm": 4.3108259012768455, + "learning_rate": 1.4935561754407012e-06, + "loss": 1.2061, + "step": 137895 + }, + { + "epoch": 1.66, + "grad_norm": 17.446566974391924, + "learning_rate": 1.4932490633121144e-06, + "loss": 1.2432, + "step": 137898 + }, + { + "epoch": 1.66, + "grad_norm": 23.318145104191128, + "learning_rate": 1.4929419802140033e-06, + "loss": 1.2628, + "step": 137901 + }, + { + "epoch": 1.66, + "grad_norm": 6.087779594066368, + "learning_rate": 1.492634926147416e-06, + "loss": 1.2286, + "step": 137904 + }, + { + "epoch": 1.66, + "grad_norm": 5.23771741780897, + "learning_rate": 1.4923279011134018e-06, + "loss": 0.9594, + "step": 137907 + }, + { + "epoch": 1.66, + "grad_norm": 10.888102029793759, + "learning_rate": 1.4920209051130096e-06, + "loss": 1.0147, + "step": 137910 + }, + { + "epoch": 1.66, + "grad_norm": 4.467545072064109, + "learning_rate": 1.4917139381472844e-06, + "loss": 1.1957, + "step": 137913 + }, + { + "epoch": 1.66, + "grad_norm": 14.330636308639882, + "learning_rate": 1.4914070002172765e-06, + "loss": 0.922, + "step": 137916 + }, + { + "epoch": 1.66, + "grad_norm": 14.123779293996767, + "learning_rate": 1.4911000913240293e-06, + "loss": 0.9665, + "step": 137919 + }, + { + "epoch": 1.66, + "grad_norm": 10.209928533027746, + "learning_rate": 1.4907932114685952e-06, + "loss": 1.3868, + "step": 137922 + }, + { + "epoch": 1.66, + "grad_norm": 7.230482663767222, + "learning_rate": 1.490486360652017e-06, + "loss": 1.0482, + "step": 137925 + }, + { + "epoch": 1.66, + "grad_norm": 13.674302792949472, + "learning_rate": 1.4901795388753425e-06, + "loss": 0.7324, + "step": 137928 + }, + { + "epoch": 1.66, + "grad_norm": 6.735882922163487, + "learning_rate": 1.4898727461396213e-06, + "loss": 1.2041, + "step": 137931 + }, + { + "epoch": 1.66, + "grad_norm": 5.839629348192552, + "learning_rate": 1.4895659824458986e-06, + "loss": 1.1656, + "step": 137934 + }, + { + "epoch": 1.66, + "grad_norm": 9.946321918095885, + "learning_rate": 1.489259247795224e-06, + "loss": 1.0946, + "step": 137937 + }, + { + "epoch": 1.66, + "grad_norm": 8.832619783774973, + "learning_rate": 1.488952542188642e-06, + "loss": 1.0466, + "step": 137940 + }, + { + "epoch": 1.66, + "grad_norm": 11.11541793308664, + "learning_rate": 1.4886458656271962e-06, + "loss": 1.2559, + "step": 137943 + }, + { + "epoch": 1.66, + "grad_norm": 9.51222499652301, + "learning_rate": 1.488339218111937e-06, + "loss": 1.3418, + "step": 137946 + }, + { + "epoch": 1.66, + "grad_norm": 3.8831746261887212, + "learning_rate": 1.4880325996439127e-06, + "loss": 1.0656, + "step": 137949 + }, + { + "epoch": 1.66, + "grad_norm": 9.870693876327282, + "learning_rate": 1.4877260102241652e-06, + "loss": 1.0149, + "step": 137952 + }, + { + "epoch": 1.66, + "grad_norm": 4.874930291849717, + "learning_rate": 1.487419449853742e-06, + "loss": 1.5662, + "step": 137955 + }, + { + "epoch": 1.66, + "grad_norm": 5.719162424327086, + "learning_rate": 1.487112918533693e-06, + "loss": 0.9718, + "step": 137958 + }, + { + "epoch": 1.66, + "grad_norm": 6.806154800648891, + "learning_rate": 1.4868064162650587e-06, + "loss": 1.2412, + "step": 137961 + }, + { + "epoch": 1.66, + "grad_norm": 13.822624438704576, + "learning_rate": 1.486499943048889e-06, + "loss": 1.0593, + "step": 137964 + }, + { + "epoch": 1.66, + "grad_norm": 4.175726320461019, + "learning_rate": 1.486193498886227e-06, + "loss": 0.6119, + "step": 137967 + }, + { + "epoch": 1.66, + "grad_norm": 22.27615951907949, + "learning_rate": 1.4858870837781203e-06, + "loss": 0.921, + "step": 137970 + }, + { + "epoch": 1.66, + "grad_norm": 5.694534649294355, + "learning_rate": 1.4855806977256137e-06, + "loss": 1.129, + "step": 137973 + }, + { + "epoch": 1.66, + "grad_norm": 9.854143465767464, + "learning_rate": 1.4852743407297554e-06, + "loss": 1.1069, + "step": 137976 + }, + { + "epoch": 1.66, + "grad_norm": 13.528068601472471, + "learning_rate": 1.4849680127915866e-06, + "loss": 0.866, + "step": 137979 + }, + { + "epoch": 1.66, + "grad_norm": 7.712057461638438, + "learning_rate": 1.4846617139121578e-06, + "loss": 1.1152, + "step": 137982 + }, + { + "epoch": 1.66, + "grad_norm": 8.239735853477391, + "learning_rate": 1.484355444092508e-06, + "loss": 1.3867, + "step": 137985 + }, + { + "epoch": 1.66, + "grad_norm": 10.562858884967758, + "learning_rate": 1.4840492033336863e-06, + "loss": 1.3132, + "step": 137988 + }, + { + "epoch": 1.66, + "grad_norm": 10.775709413238472, + "learning_rate": 1.4837429916367397e-06, + "loss": 1.2252, + "step": 137991 + }, + { + "epoch": 1.66, + "grad_norm": 4.20388022679942, + "learning_rate": 1.4834368090027085e-06, + "loss": 1.1411, + "step": 137994 + }, + { + "epoch": 1.66, + "grad_norm": 5.736503312233906, + "learning_rate": 1.4831306554326396e-06, + "loss": 1.0506, + "step": 137997 + }, + { + "epoch": 1.66, + "grad_norm": 17.24701582955023, + "learning_rate": 1.4828245309275801e-06, + "loss": 1.4015, + "step": 138000 + }, + { + "epoch": 1.66, + "grad_norm": 18.133275869301777, + "learning_rate": 1.4825184354885725e-06, + "loss": 1.0723, + "step": 138003 + }, + { + "epoch": 1.66, + "grad_norm": 10.887874993024422, + "learning_rate": 1.4822123691166602e-06, + "loss": 1.3062, + "step": 138006 + }, + { + "epoch": 1.66, + "grad_norm": 20.570617829087528, + "learning_rate": 1.481906331812888e-06, + "loss": 1.5688, + "step": 138009 + }, + { + "epoch": 1.66, + "grad_norm": 6.191055677100562, + "learning_rate": 1.4816003235783017e-06, + "loss": 1.3924, + "step": 138012 + }, + { + "epoch": 1.66, + "grad_norm": 7.637277504218111, + "learning_rate": 1.481294344413945e-06, + "loss": 1.6532, + "step": 138015 + }, + { + "epoch": 1.66, + "grad_norm": 9.485947378401454, + "learning_rate": 1.4809883943208647e-06, + "loss": 1.2819, + "step": 138018 + }, + { + "epoch": 1.66, + "grad_norm": 8.607067770010074, + "learning_rate": 1.4806824733001e-06, + "loss": 1.0448, + "step": 138021 + }, + { + "epoch": 1.66, + "grad_norm": 3.202578563388945, + "learning_rate": 1.4803765813526993e-06, + "loss": 1.3487, + "step": 138024 + }, + { + "epoch": 1.66, + "grad_norm": 30.87764251211728, + "learning_rate": 1.4800707184797037e-06, + "loss": 1.5718, + "step": 138027 + }, + { + "epoch": 1.66, + "grad_norm": 10.065075186215065, + "learning_rate": 1.479764884682159e-06, + "loss": 1.3432, + "step": 138030 + }, + { + "epoch": 1.66, + "grad_norm": 3.5302723764296786, + "learning_rate": 1.4794590799611053e-06, + "loss": 1.2075, + "step": 138033 + }, + { + "epoch": 1.66, + "grad_norm": 4.679754418245427, + "learning_rate": 1.47915330431759e-06, + "loss": 0.976, + "step": 138036 + }, + { + "epoch": 1.66, + "grad_norm": 6.693119810663954, + "learning_rate": 1.478847557752654e-06, + "loss": 1.1435, + "step": 138039 + }, + { + "epoch": 1.66, + "grad_norm": 7.425586986263699, + "learning_rate": 1.4785418402673446e-06, + "loss": 1.0019, + "step": 138042 + }, + { + "epoch": 1.66, + "grad_norm": 11.98407777209244, + "learning_rate": 1.478236151862702e-06, + "loss": 0.9778, + "step": 138045 + }, + { + "epoch": 1.66, + "grad_norm": 13.2176705299175, + "learning_rate": 1.4779304925397687e-06, + "loss": 1.0376, + "step": 138048 + }, + { + "epoch": 1.66, + "grad_norm": 3.6674921964899063, + "learning_rate": 1.4776248622995882e-06, + "loss": 1.3218, + "step": 138051 + }, + { + "epoch": 1.66, + "grad_norm": 5.171352537074439, + "learning_rate": 1.4773192611432042e-06, + "loss": 1.0179, + "step": 138054 + }, + { + "epoch": 1.66, + "grad_norm": 6.624247500018126, + "learning_rate": 1.4770136890716624e-06, + "loss": 1.1621, + "step": 138057 + }, + { + "epoch": 1.66, + "grad_norm": 9.970598934626627, + "learning_rate": 1.476708146086001e-06, + "loss": 1.2444, + "step": 138060 + }, + { + "epoch": 1.66, + "grad_norm": 5.7175520663180155, + "learning_rate": 1.4764026321872637e-06, + "loss": 1.262, + "step": 138063 + }, + { + "epoch": 1.66, + "grad_norm": 7.278826753950513, + "learning_rate": 1.4760971473764973e-06, + "loss": 0.9564, + "step": 138066 + }, + { + "epoch": 1.66, + "grad_norm": 11.700788418256444, + "learning_rate": 1.4757916916547378e-06, + "loss": 1.1011, + "step": 138069 + }, + { + "epoch": 1.66, + "grad_norm": 8.434240634442247, + "learning_rate": 1.475486265023034e-06, + "loss": 1.0906, + "step": 138072 + }, + { + "epoch": 1.66, + "grad_norm": 7.400143236455772, + "learning_rate": 1.475180867482422e-06, + "loss": 1.0024, + "step": 138075 + }, + { + "epoch": 1.66, + "grad_norm": 2.870936625981717, + "learning_rate": 1.4748754990339476e-06, + "loss": 1.0752, + "step": 138078 + }, + { + "epoch": 1.66, + "grad_norm": 4.327519507891236, + "learning_rate": 1.4745701596786522e-06, + "loss": 1.1505, + "step": 138081 + }, + { + "epoch": 1.66, + "grad_norm": 7.109663374618925, + "learning_rate": 1.4742648494175803e-06, + "loss": 0.932, + "step": 138084 + }, + { + "epoch": 1.66, + "grad_norm": 9.419982089872999, + "learning_rate": 1.4739595682517715e-06, + "loss": 1.0278, + "step": 138087 + }, + { + "epoch": 1.66, + "grad_norm": 13.340643491500595, + "learning_rate": 1.4736543161822648e-06, + "loss": 1.0181, + "step": 138090 + }, + { + "epoch": 1.66, + "grad_norm": 3.6046056140365064, + "learning_rate": 1.4733490932101057e-06, + "loss": 1.041, + "step": 138093 + }, + { + "epoch": 1.66, + "grad_norm": 28.09259543608558, + "learning_rate": 1.473043899336335e-06, + "loss": 1.2658, + "step": 138096 + }, + { + "epoch": 1.66, + "grad_norm": 5.2876562570199175, + "learning_rate": 1.4727387345619947e-06, + "loss": 0.9556, + "step": 138099 + }, + { + "epoch": 1.66, + "grad_norm": 3.571248621906214, + "learning_rate": 1.4724335988881255e-06, + "loss": 1.2992, + "step": 138102 + }, + { + "epoch": 1.66, + "grad_norm": 8.318498606504305, + "learning_rate": 1.4721284923157674e-06, + "loss": 1.4919, + "step": 138105 + }, + { + "epoch": 1.66, + "grad_norm": 13.656840220394074, + "learning_rate": 1.4718234148459653e-06, + "loss": 1.3418, + "step": 138108 + }, + { + "epoch": 1.66, + "grad_norm": 9.648235498980267, + "learning_rate": 1.4715183664797583e-06, + "loss": 0.911, + "step": 138111 + }, + { + "epoch": 1.66, + "grad_norm": 6.936022080054969, + "learning_rate": 1.4712133472181855e-06, + "loss": 1.1947, + "step": 138114 + }, + { + "epoch": 1.66, + "grad_norm": 19.347364085810753, + "learning_rate": 1.4709083570622884e-06, + "loss": 1.0594, + "step": 138117 + }, + { + "epoch": 1.66, + "grad_norm": 10.293429603394472, + "learning_rate": 1.4706033960131095e-06, + "loss": 1.2608, + "step": 138120 + }, + { + "epoch": 1.66, + "grad_norm": 21.47009162420678, + "learning_rate": 1.470298464071689e-06, + "loss": 1.2145, + "step": 138123 + }, + { + "epoch": 1.66, + "grad_norm": 12.719868505696791, + "learning_rate": 1.4699935612390703e-06, + "loss": 1.0248, + "step": 138126 + }, + { + "epoch": 1.66, + "grad_norm": 2.5925520447538224, + "learning_rate": 1.4696886875162897e-06, + "loss": 1.3254, + "step": 138129 + }, + { + "epoch": 1.66, + "grad_norm": 7.984729781163204, + "learning_rate": 1.4693838429043882e-06, + "loss": 1.187, + "step": 138132 + }, + { + "epoch": 1.66, + "grad_norm": 2.6170205636849384, + "learning_rate": 1.4690790274044064e-06, + "loss": 1.312, + "step": 138135 + }, + { + "epoch": 1.66, + "grad_norm": 3.492152340819684, + "learning_rate": 1.4687742410173867e-06, + "loss": 1.0143, + "step": 138138 + }, + { + "epoch": 1.66, + "grad_norm": 3.77356794709814, + "learning_rate": 1.4684694837443657e-06, + "loss": 1.1295, + "step": 138141 + }, + { + "epoch": 1.66, + "grad_norm": 8.269500748989058, + "learning_rate": 1.4681647555863854e-06, + "loss": 0.816, + "step": 138144 + }, + { + "epoch": 1.66, + "grad_norm": 14.886730501768856, + "learning_rate": 1.4678600565444856e-06, + "loss": 1.1217, + "step": 138147 + }, + { + "epoch": 1.66, + "grad_norm": 12.394277433011673, + "learning_rate": 1.4675553866197078e-06, + "loss": 0.9821, + "step": 138150 + }, + { + "epoch": 1.66, + "grad_norm": 10.50793415519029, + "learning_rate": 1.4672507458130913e-06, + "loss": 1.2967, + "step": 138153 + }, + { + "epoch": 1.66, + "grad_norm": 12.612249993638354, + "learning_rate": 1.4669461341256719e-06, + "loss": 1.1139, + "step": 138156 + }, + { + "epoch": 1.66, + "grad_norm": 25.938125822131312, + "learning_rate": 1.4666415515584909e-06, + "loss": 1.1901, + "step": 138159 + }, + { + "epoch": 1.66, + "grad_norm": 2.8644404562931154, + "learning_rate": 1.4663369981125897e-06, + "loss": 1.2184, + "step": 138162 + }, + { + "epoch": 1.66, + "grad_norm": 8.951885062597773, + "learning_rate": 1.4660324737890086e-06, + "loss": 1.1313, + "step": 138165 + }, + { + "epoch": 1.66, + "grad_norm": 11.065973032226436, + "learning_rate": 1.4657279785887813e-06, + "loss": 1.1485, + "step": 138168 + }, + { + "epoch": 1.66, + "grad_norm": 16.632657495222706, + "learning_rate": 1.4654235125129512e-06, + "loss": 1.2881, + "step": 138171 + }, + { + "epoch": 1.66, + "grad_norm": 14.197771999166296, + "learning_rate": 1.4651190755625589e-06, + "loss": 1.6425, + "step": 138174 + }, + { + "epoch": 1.66, + "grad_norm": 9.135231401934496, + "learning_rate": 1.4648146677386389e-06, + "loss": 1.1714, + "step": 138177 + }, + { + "epoch": 1.66, + "grad_norm": 25.123087808410485, + "learning_rate": 1.4645102890422336e-06, + "loss": 1.4764, + "step": 138180 + }, + { + "epoch": 1.66, + "grad_norm": 8.213022801920433, + "learning_rate": 1.464205939474379e-06, + "loss": 1.3592, + "step": 138183 + }, + { + "epoch": 1.66, + "grad_norm": 31.323881729806047, + "learning_rate": 1.4639016190361144e-06, + "loss": 1.1468, + "step": 138186 + }, + { + "epoch": 1.66, + "grad_norm": 3.907029873271665, + "learning_rate": 1.4635973277284788e-06, + "loss": 1.5801, + "step": 138189 + }, + { + "epoch": 1.66, + "grad_norm": 4.689293956614949, + "learning_rate": 1.4632930655525124e-06, + "loss": 1.1384, + "step": 138192 + }, + { + "epoch": 1.66, + "grad_norm": 7.5602559514397925, + "learning_rate": 1.4629888325092533e-06, + "loss": 1.088, + "step": 138195 + }, + { + "epoch": 1.66, + "grad_norm": 14.837449245465612, + "learning_rate": 1.4626846285997355e-06, + "loss": 1.347, + "step": 138198 + }, + { + "epoch": 1.66, + "grad_norm": 39.13343196379838, + "learning_rate": 1.4623804538249985e-06, + "loss": 1.191, + "step": 138201 + }, + { + "epoch": 1.66, + "grad_norm": 11.073532024840045, + "learning_rate": 1.4620763081860834e-06, + "loss": 1.1105, + "step": 138204 + }, + { + "epoch": 1.66, + "grad_norm": 15.893967684372141, + "learning_rate": 1.4617721916840288e-06, + "loss": 1.1859, + "step": 138207 + }, + { + "epoch": 1.66, + "grad_norm": 8.959858755159342, + "learning_rate": 1.4614681043198676e-06, + "loss": 1.1402, + "step": 138210 + }, + { + "epoch": 1.66, + "grad_norm": 24.564522018067088, + "learning_rate": 1.4611640460946397e-06, + "loss": 1.1248, + "step": 138213 + }, + { + "epoch": 1.66, + "grad_norm": 4.464998217797466, + "learning_rate": 1.4608600170093855e-06, + "loss": 0.8596, + "step": 138216 + }, + { + "epoch": 1.66, + "grad_norm": 10.114908332637636, + "learning_rate": 1.4605560170651411e-06, + "loss": 1.1125, + "step": 138219 + }, + { + "epoch": 1.66, + "grad_norm": 2.9280553023174067, + "learning_rate": 1.46025204626294e-06, + "loss": 0.8474, + "step": 138222 + }, + { + "epoch": 1.66, + "grad_norm": 19.849216445996852, + "learning_rate": 1.4599481046038233e-06, + "loss": 1.0996, + "step": 138225 + }, + { + "epoch": 1.66, + "grad_norm": 10.759914665220709, + "learning_rate": 1.459644192088827e-06, + "loss": 1.6791, + "step": 138228 + }, + { + "epoch": 1.66, + "grad_norm": 6.761126553821037, + "learning_rate": 1.4593403087189896e-06, + "loss": 1.1471, + "step": 138231 + }, + { + "epoch": 1.66, + "grad_norm": 4.138394092485794, + "learning_rate": 1.4590364544953495e-06, + "loss": 1.0861, + "step": 138234 + }, + { + "epoch": 1.66, + "grad_norm": 27.18610603950227, + "learning_rate": 1.4587326294189408e-06, + "loss": 1.0989, + "step": 138237 + }, + { + "epoch": 1.66, + "grad_norm": 29.400051699784687, + "learning_rate": 1.4584288334907992e-06, + "loss": 1.1158, + "step": 138240 + }, + { + "epoch": 1.66, + "grad_norm": 22.930557228106245, + "learning_rate": 1.458125066711964e-06, + "loss": 1.8243, + "step": 138243 + }, + { + "epoch": 1.66, + "grad_norm": 6.761622374089928, + "learning_rate": 1.457821329083473e-06, + "loss": 1.0902, + "step": 138246 + }, + { + "epoch": 1.66, + "grad_norm": 3.5227745052415114, + "learning_rate": 1.4575176206063578e-06, + "loss": 1.3673, + "step": 138249 + }, + { + "epoch": 1.66, + "grad_norm": 8.373999811809101, + "learning_rate": 1.45721394128166e-06, + "loss": 1.3812, + "step": 138252 + }, + { + "epoch": 1.66, + "grad_norm": 11.456145319190078, + "learning_rate": 1.4569102911104115e-06, + "loss": 1.106, + "step": 138255 + }, + { + "epoch": 1.66, + "grad_norm": 5.3216671442486145, + "learning_rate": 1.4566066700936554e-06, + "loss": 1.2194, + "step": 138258 + }, + { + "epoch": 1.66, + "grad_norm": 4.933639358766706, + "learning_rate": 1.4563030782324217e-06, + "loss": 1.1614, + "step": 138261 + }, + { + "epoch": 1.66, + "grad_norm": 13.022800557470555, + "learning_rate": 1.4559995155277472e-06, + "loss": 1.3447, + "step": 138264 + }, + { + "epoch": 1.66, + "grad_norm": 15.156914632224934, + "learning_rate": 1.455695981980668e-06, + "loss": 1.1969, + "step": 138267 + }, + { + "epoch": 1.66, + "grad_norm": 7.897033580687256, + "learning_rate": 1.4553924775922213e-06, + "loss": 1.3018, + "step": 138270 + }, + { + "epoch": 1.66, + "grad_norm": 9.858781142446952, + "learning_rate": 1.4550890023634444e-06, + "loss": 1.2819, + "step": 138273 + }, + { + "epoch": 1.66, + "grad_norm": 9.355630432174397, + "learning_rate": 1.4547855562953684e-06, + "loss": 1.0805, + "step": 138276 + }, + { + "epoch": 1.66, + "grad_norm": 10.268183231469203, + "learning_rate": 1.454482139389034e-06, + "loss": 0.9861, + "step": 138279 + }, + { + "epoch": 1.66, + "grad_norm": 3.891549498863543, + "learning_rate": 1.4541787516454707e-06, + "loss": 0.8882, + "step": 138282 + }, + { + "epoch": 1.66, + "grad_norm": 19.587655844248374, + "learning_rate": 1.4538753930657179e-06, + "loss": 1.2779, + "step": 138285 + }, + { + "epoch": 1.66, + "grad_norm": 11.242744654411835, + "learning_rate": 1.4535720636508122e-06, + "loss": 0.9728, + "step": 138288 + }, + { + "epoch": 1.66, + "grad_norm": 8.478049442081263, + "learning_rate": 1.453268763401785e-06, + "loss": 1.0789, + "step": 138291 + }, + { + "epoch": 1.66, + "grad_norm": 15.44715382238874, + "learning_rate": 1.4529654923196722e-06, + "loss": 1.0653, + "step": 138294 + }, + { + "epoch": 1.66, + "grad_norm": 8.394782896909568, + "learning_rate": 1.45266225040551e-06, + "loss": 1.2357, + "step": 138297 + }, + { + "epoch": 1.66, + "grad_norm": 11.277147927035209, + "learning_rate": 1.4523590376603346e-06, + "loss": 0.7504, + "step": 138300 + }, + { + "epoch": 1.66, + "grad_norm": 10.084770604058306, + "learning_rate": 1.4520558540851793e-06, + "loss": 0.7939, + "step": 138303 + }, + { + "epoch": 1.66, + "grad_norm": 8.779129934782762, + "learning_rate": 1.451752699681076e-06, + "loss": 0.8721, + "step": 138306 + }, + { + "epoch": 1.66, + "grad_norm": 32.768597676967694, + "learning_rate": 1.451449574449062e-06, + "loss": 1.2233, + "step": 138309 + }, + { + "epoch": 1.66, + "grad_norm": 4.4370232371785, + "learning_rate": 1.4511464783901707e-06, + "loss": 1.3219, + "step": 138312 + }, + { + "epoch": 1.66, + "grad_norm": 12.982203975198741, + "learning_rate": 1.4508434115054404e-06, + "loss": 1.0882, + "step": 138315 + }, + { + "epoch": 1.66, + "grad_norm": 22.022334042416453, + "learning_rate": 1.450540373795899e-06, + "loss": 0.968, + "step": 138318 + }, + { + "epoch": 1.66, + "grad_norm": 6.850746247404804, + "learning_rate": 1.4502373652625868e-06, + "loss": 1.5767, + "step": 138321 + }, + { + "epoch": 1.66, + "grad_norm": 5.052088267231571, + "learning_rate": 1.449934385906533e-06, + "loss": 0.9092, + "step": 138324 + }, + { + "epoch": 1.66, + "grad_norm": 5.237109143854085, + "learning_rate": 1.4496314357287745e-06, + "loss": 1.2041, + "step": 138327 + }, + { + "epoch": 1.66, + "grad_norm": 9.698725559378076, + "learning_rate": 1.4493285147303426e-06, + "loss": 1.0536, + "step": 138330 + }, + { + "epoch": 1.66, + "grad_norm": 4.778087040438156, + "learning_rate": 1.4490256229122723e-06, + "loss": 0.7054, + "step": 138333 + }, + { + "epoch": 1.66, + "grad_norm": 8.91614542796963, + "learning_rate": 1.4487227602755972e-06, + "loss": 1.2869, + "step": 138336 + }, + { + "epoch": 1.66, + "grad_norm": 15.456209262066015, + "learning_rate": 1.4484199268213505e-06, + "loss": 1.0429, + "step": 138339 + }, + { + "epoch": 1.66, + "grad_norm": 5.5770516203108995, + "learning_rate": 1.4481171225505697e-06, + "loss": 1.0506, + "step": 138342 + }, + { + "epoch": 1.66, + "grad_norm": 5.610974191805647, + "learning_rate": 1.4478143474642848e-06, + "loss": 1.6057, + "step": 138345 + }, + { + "epoch": 1.66, + "grad_norm": 20.2192026905047, + "learning_rate": 1.4475116015635272e-06, + "loss": 1.1117, + "step": 138348 + }, + { + "epoch": 1.66, + "grad_norm": 31.467243756077302, + "learning_rate": 1.4472088848493304e-06, + "loss": 0.9772, + "step": 138351 + }, + { + "epoch": 1.66, + "grad_norm": 14.636273301040223, + "learning_rate": 1.4469061973227327e-06, + "loss": 1.0858, + "step": 138354 + }, + { + "epoch": 1.66, + "grad_norm": 12.66017267923168, + "learning_rate": 1.4466035389847598e-06, + "loss": 1.1144, + "step": 138357 + }, + { + "epoch": 1.66, + "grad_norm": 6.548685335690182, + "learning_rate": 1.4463009098364489e-06, + "loss": 0.7766, + "step": 138360 + }, + { + "epoch": 1.66, + "grad_norm": 8.171517807736743, + "learning_rate": 1.4459983098788343e-06, + "loss": 1.0608, + "step": 138363 + }, + { + "epoch": 1.66, + "grad_norm": 4.2457095354621375, + "learning_rate": 1.4456957391129445e-06, + "loss": 1.1066, + "step": 138366 + }, + { + "epoch": 1.66, + "grad_norm": 2.985317486667084, + "learning_rate": 1.445393197539815e-06, + "loss": 1.0057, + "step": 138369 + }, + { + "epoch": 1.66, + "grad_norm": 42.35707939067654, + "learning_rate": 1.4450906851604752e-06, + "loss": 0.9046, + "step": 138372 + }, + { + "epoch": 1.66, + "grad_norm": 9.860975958352629, + "learning_rate": 1.4447882019759596e-06, + "loss": 1.4083, + "step": 138375 + }, + { + "epoch": 1.66, + "grad_norm": 6.367792544577512, + "learning_rate": 1.444485747987301e-06, + "loss": 1.1973, + "step": 138378 + }, + { + "epoch": 1.66, + "grad_norm": 20.169454078269652, + "learning_rate": 1.4441833231955327e-06, + "loss": 0.7771, + "step": 138381 + }, + { + "epoch": 1.66, + "grad_norm": 6.044581420201897, + "learning_rate": 1.4438809276016818e-06, + "loss": 0.8876, + "step": 138384 + }, + { + "epoch": 1.66, + "grad_norm": 10.164032692818779, + "learning_rate": 1.4435785612067865e-06, + "loss": 1.1499, + "step": 138387 + }, + { + "epoch": 1.66, + "grad_norm": 10.502726617570298, + "learning_rate": 1.4432762240118737e-06, + "loss": 1.3655, + "step": 138390 + }, + { + "epoch": 1.66, + "grad_norm": 9.514246105002519, + "learning_rate": 1.4429739160179755e-06, + "loss": 1.1186, + "step": 138393 + }, + { + "epoch": 1.66, + "grad_norm": 6.905372103653212, + "learning_rate": 1.4426716372261285e-06, + "loss": 1.1644, + "step": 138396 + }, + { + "epoch": 1.66, + "grad_norm": 2.519984265732225, + "learning_rate": 1.442369387637359e-06, + "loss": 1.5219, + "step": 138399 + }, + { + "epoch": 1.66, + "grad_norm": 15.34478658862007, + "learning_rate": 1.4420671672526997e-06, + "loss": 0.9675, + "step": 138402 + }, + { + "epoch": 1.66, + "grad_norm": 3.591256806742727, + "learning_rate": 1.4417649760731844e-06, + "loss": 1.7094, + "step": 138405 + }, + { + "epoch": 1.66, + "grad_norm": 12.837169119633735, + "learning_rate": 1.441462814099841e-06, + "loss": 1.013, + "step": 138408 + }, + { + "epoch": 1.66, + "grad_norm": 14.212222615170772, + "learning_rate": 1.4411606813337041e-06, + "loss": 0.8714, + "step": 138411 + }, + { + "epoch": 1.66, + "grad_norm": 6.11902358984064, + "learning_rate": 1.440858577775801e-06, + "loss": 1.1921, + "step": 138414 + }, + { + "epoch": 1.66, + "grad_norm": 16.853644238043408, + "learning_rate": 1.4405565034271641e-06, + "loss": 1.4701, + "step": 138417 + }, + { + "epoch": 1.66, + "grad_norm": 27.089325424118893, + "learning_rate": 1.4402544582888255e-06, + "loss": 1.1828, + "step": 138420 + }, + { + "epoch": 1.66, + "grad_norm": 6.038347962784832, + "learning_rate": 1.4399524423618172e-06, + "loss": 1.0609, + "step": 138423 + }, + { + "epoch": 1.66, + "grad_norm": 9.490214276901598, + "learning_rate": 1.4396504556471657e-06, + "loss": 1.1947, + "step": 138426 + }, + { + "epoch": 1.66, + "grad_norm": 4.748899204115316, + "learning_rate": 1.439348498145906e-06, + "loss": 1.3663, + "step": 138429 + }, + { + "epoch": 1.66, + "grad_norm": 10.308277530923554, + "learning_rate": 1.439046569859064e-06, + "loss": 1.3018, + "step": 138432 + }, + { + "epoch": 1.66, + "grad_norm": 2.4678090601325358, + "learning_rate": 1.438744670787674e-06, + "loss": 0.848, + "step": 138435 + }, + { + "epoch": 1.66, + "grad_norm": 4.5354128342474995, + "learning_rate": 1.4384428009327634e-06, + "loss": 1.0785, + "step": 138438 + }, + { + "epoch": 1.66, + "grad_norm": 4.203175188746765, + "learning_rate": 1.4381409602953633e-06, + "loss": 1.2362, + "step": 138441 + }, + { + "epoch": 1.66, + "grad_norm": 3.7390557367669874, + "learning_rate": 1.4378391488765041e-06, + "loss": 1.0829, + "step": 138444 + }, + { + "epoch": 1.66, + "grad_norm": 49.443042888151716, + "learning_rate": 1.4375373666772185e-06, + "loss": 1.045, + "step": 138447 + }, + { + "epoch": 1.66, + "grad_norm": 11.107719806048244, + "learning_rate": 1.437235613698531e-06, + "loss": 1.4984, + "step": 138450 + }, + { + "epoch": 1.66, + "grad_norm": 4.803471560282525, + "learning_rate": 1.4369338899414764e-06, + "loss": 1.1381, + "step": 138453 + }, + { + "epoch": 1.66, + "grad_norm": 11.170392719599214, + "learning_rate": 1.4366321954070795e-06, + "loss": 1.0875, + "step": 138456 + }, + { + "epoch": 1.66, + "grad_norm": 6.365427725677036, + "learning_rate": 1.4363305300963726e-06, + "loss": 0.9546, + "step": 138459 + }, + { + "epoch": 1.66, + "grad_norm": 4.669680135082925, + "learning_rate": 1.4360288940103873e-06, + "loss": 1.1352, + "step": 138462 + }, + { + "epoch": 1.67, + "grad_norm": 12.686107469535527, + "learning_rate": 1.4357272871501483e-06, + "loss": 1.4579, + "step": 138465 + }, + { + "epoch": 1.67, + "grad_norm": 5.421583780003657, + "learning_rate": 1.4354257095166867e-06, + "loss": 0.9185, + "step": 138468 + }, + { + "epoch": 1.67, + "grad_norm": 12.839462578179711, + "learning_rate": 1.4351241611110355e-06, + "loss": 1.2579, + "step": 138471 + }, + { + "epoch": 1.67, + "grad_norm": 5.522305830670909, + "learning_rate": 1.4348226419342181e-06, + "loss": 1.0563, + "step": 138474 + }, + { + "epoch": 1.67, + "grad_norm": 10.33157674810088, + "learning_rate": 1.434521151987268e-06, + "loss": 1.2153, + "step": 138477 + }, + { + "epoch": 1.67, + "grad_norm": 25.92305645379299, + "learning_rate": 1.4342196912712093e-06, + "loss": 1.3635, + "step": 138480 + }, + { + "epoch": 1.67, + "grad_norm": 13.508470129539017, + "learning_rate": 1.4339182597870738e-06, + "loss": 1.2292, + "step": 138483 + }, + { + "epoch": 1.67, + "grad_norm": 6.674873829728451, + "learning_rate": 1.433616857535889e-06, + "loss": 1.0533, + "step": 138486 + }, + { + "epoch": 1.67, + "grad_norm": 5.78377503937847, + "learning_rate": 1.433315484518687e-06, + "loss": 1.1213, + "step": 138489 + }, + { + "epoch": 1.67, + "grad_norm": 14.939988637155757, + "learning_rate": 1.4330141407364917e-06, + "loss": 1.4437, + "step": 138492 + }, + { + "epoch": 1.67, + "grad_norm": 3.6594896534527996, + "learning_rate": 1.4327128261903355e-06, + "loss": 1.047, + "step": 138495 + }, + { + "epoch": 1.67, + "grad_norm": 5.534236069155918, + "learning_rate": 1.4324115408812423e-06, + "loss": 1.0707, + "step": 138498 + }, + { + "epoch": 1.67, + "grad_norm": 14.450982323076774, + "learning_rate": 1.432110284810242e-06, + "loss": 0.9807, + "step": 138501 + }, + { + "epoch": 1.67, + "grad_norm": 9.020301886467232, + "learning_rate": 1.431809057978365e-06, + "loss": 1.5226, + "step": 138504 + }, + { + "epoch": 1.67, + "grad_norm": 14.849569113066435, + "learning_rate": 1.4315078603866361e-06, + "loss": 1.016, + "step": 138507 + }, + { + "epoch": 1.67, + "grad_norm": 5.715464758761573, + "learning_rate": 1.4312066920360845e-06, + "loss": 1.0671, + "step": 138510 + }, + { + "epoch": 1.67, + "grad_norm": 10.734377503176887, + "learning_rate": 1.4309055529277405e-06, + "loss": 1.1036, + "step": 138513 + }, + { + "epoch": 1.67, + "grad_norm": 6.937160161150601, + "learning_rate": 1.4306044430626286e-06, + "loss": 1.1117, + "step": 138516 + }, + { + "epoch": 1.67, + "grad_norm": 3.2110964119591703, + "learning_rate": 1.4303033624417751e-06, + "loss": 1.0114, + "step": 138519 + }, + { + "epoch": 1.67, + "grad_norm": 6.049163294148413, + "learning_rate": 1.4300023110662086e-06, + "loss": 1.2619, + "step": 138522 + }, + { + "epoch": 1.67, + "grad_norm": 15.652290749606562, + "learning_rate": 1.4297012889369576e-06, + "loss": 1.3069, + "step": 138525 + }, + { + "epoch": 1.67, + "grad_norm": 12.522616141574781, + "learning_rate": 1.4294002960550502e-06, + "loss": 1.253, + "step": 138528 + }, + { + "epoch": 1.67, + "grad_norm": 11.725290080682235, + "learning_rate": 1.4290993324215142e-06, + "loss": 0.9243, + "step": 138531 + }, + { + "epoch": 1.67, + "grad_norm": 26.047967854365282, + "learning_rate": 1.428798398037372e-06, + "loss": 1.39, + "step": 138534 + }, + { + "epoch": 1.67, + "grad_norm": 10.317460943089019, + "learning_rate": 1.4284974929036566e-06, + "loss": 1.1287, + "step": 138537 + }, + { + "epoch": 1.67, + "grad_norm": 6.100327730714519, + "learning_rate": 1.428196617021389e-06, + "loss": 1.1537, + "step": 138540 + }, + { + "epoch": 1.67, + "grad_norm": 5.252209184066744, + "learning_rate": 1.4278957703916018e-06, + "loss": 1.0663, + "step": 138543 + }, + { + "epoch": 1.67, + "grad_norm": 5.76741856200195, + "learning_rate": 1.4275949530153155e-06, + "loss": 1.3699, + "step": 138546 + }, + { + "epoch": 1.67, + "grad_norm": 23.012932121581528, + "learning_rate": 1.4272941648935601e-06, + "loss": 1.023, + "step": 138549 + }, + { + "epoch": 1.67, + "grad_norm": 7.8952905573556365, + "learning_rate": 1.4269934060273626e-06, + "loss": 1.0782, + "step": 138552 + }, + { + "epoch": 1.67, + "grad_norm": 5.425244730508689, + "learning_rate": 1.4266926764177502e-06, + "loss": 0.8967, + "step": 138555 + }, + { + "epoch": 1.67, + "grad_norm": 8.276348043814137, + "learning_rate": 1.4263919760657475e-06, + "loss": 1.4083, + "step": 138558 + }, + { + "epoch": 1.67, + "grad_norm": 5.186929917474631, + "learning_rate": 1.426091304972379e-06, + "loss": 0.9749, + "step": 138561 + }, + { + "epoch": 1.67, + "grad_norm": 6.9562927492079885, + "learning_rate": 1.425790663138672e-06, + "loss": 1.2999, + "step": 138564 + }, + { + "epoch": 1.67, + "grad_norm": 9.121693055257108, + "learning_rate": 1.4254900505656533e-06, + "loss": 1.3869, + "step": 138567 + }, + { + "epoch": 1.67, + "grad_norm": 16.821672690878145, + "learning_rate": 1.425189467254351e-06, + "loss": 1.0164, + "step": 138570 + }, + { + "epoch": 1.67, + "grad_norm": 7.85989058204174, + "learning_rate": 1.4248889132057865e-06, + "loss": 1.0839, + "step": 138573 + }, + { + "epoch": 1.67, + "grad_norm": 8.176170752034826, + "learning_rate": 1.4245883884209865e-06, + "loss": 0.9929, + "step": 138576 + }, + { + "epoch": 1.67, + "grad_norm": 6.220529501589131, + "learning_rate": 1.4242878929009807e-06, + "loss": 0.9851, + "step": 138579 + }, + { + "epoch": 1.67, + "grad_norm": 3.2379256348339345, + "learning_rate": 1.423987426646788e-06, + "loss": 1.2004, + "step": 138582 + }, + { + "epoch": 1.67, + "grad_norm": 7.440599117774575, + "learning_rate": 1.4236869896594407e-06, + "loss": 1.1555, + "step": 138585 + }, + { + "epoch": 1.67, + "grad_norm": 6.395540824295277, + "learning_rate": 1.4233865819399573e-06, + "loss": 1.2524, + "step": 138588 + }, + { + "epoch": 1.67, + "grad_norm": 5.025917359780725, + "learning_rate": 1.4230862034893667e-06, + "loss": 0.9241, + "step": 138591 + }, + { + "epoch": 1.67, + "grad_norm": 4.3685519645045545, + "learning_rate": 1.4227858543086926e-06, + "loss": 1.3808, + "step": 138594 + }, + { + "epoch": 1.67, + "grad_norm": 9.03563916013827, + "learning_rate": 1.4224855343989641e-06, + "loss": 1.3086, + "step": 138597 + }, + { + "epoch": 1.67, + "grad_norm": 17.989797761708544, + "learning_rate": 1.4221852437612027e-06, + "loss": 0.9441, + "step": 138600 + }, + { + "epoch": 1.67, + "grad_norm": 2.842101387834752, + "learning_rate": 1.421884982396431e-06, + "loss": 1.1804, + "step": 138603 + }, + { + "epoch": 1.67, + "grad_norm": 3.292011199858254, + "learning_rate": 1.4215847503056756e-06, + "loss": 0.9572, + "step": 138606 + }, + { + "epoch": 1.67, + "grad_norm": 8.089002187663205, + "learning_rate": 1.4212845474899618e-06, + "loss": 1.1353, + "step": 138609 + }, + { + "epoch": 1.67, + "grad_norm": 7.212774382253507, + "learning_rate": 1.4209843739503172e-06, + "loss": 1.4767, + "step": 138612 + }, + { + "epoch": 1.67, + "grad_norm": 20.39200466866282, + "learning_rate": 1.4206842296877589e-06, + "loss": 1.0705, + "step": 138615 + }, + { + "epoch": 1.67, + "grad_norm": 11.82647678131866, + "learning_rate": 1.420384114703316e-06, + "loss": 1.1128, + "step": 138618 + }, + { + "epoch": 1.67, + "grad_norm": 15.951516693112294, + "learning_rate": 1.4200840289980134e-06, + "loss": 1.299, + "step": 138621 + }, + { + "epoch": 1.67, + "grad_norm": 8.518614681850318, + "learning_rate": 1.4197839725728734e-06, + "loss": 1.1059, + "step": 138624 + }, + { + "epoch": 1.67, + "grad_norm": 7.516917824676139, + "learning_rate": 1.4194839454289189e-06, + "loss": 1.0851, + "step": 138627 + }, + { + "epoch": 1.67, + "grad_norm": 12.65636192578084, + "learning_rate": 1.4191839475671743e-06, + "loss": 1.1935, + "step": 138630 + }, + { + "epoch": 1.67, + "grad_norm": 8.371173492920711, + "learning_rate": 1.4188839789886645e-06, + "loss": 1.1024, + "step": 138633 + }, + { + "epoch": 1.67, + "grad_norm": 4.19859166035375, + "learning_rate": 1.4185840396944117e-06, + "loss": 1.2306, + "step": 138636 + }, + { + "epoch": 1.67, + "grad_norm": 5.3322086500276304, + "learning_rate": 1.4182841296854432e-06, + "loss": 1.0366, + "step": 138639 + }, + { + "epoch": 1.67, + "grad_norm": 6.4840896368289505, + "learning_rate": 1.4179842489627805e-06, + "loss": 1.0671, + "step": 138642 + }, + { + "epoch": 1.67, + "grad_norm": 9.214084564858132, + "learning_rate": 1.4176843975274445e-06, + "loss": 1.1333, + "step": 138645 + }, + { + "epoch": 1.67, + "grad_norm": 6.980019938572111, + "learning_rate": 1.4173845753804593e-06, + "loss": 1.0588, + "step": 138648 + }, + { + "epoch": 1.67, + "grad_norm": 4.560425224425799, + "learning_rate": 1.4170847825228519e-06, + "loss": 1.3535, + "step": 138651 + }, + { + "epoch": 1.67, + "grad_norm": 6.306999529430261, + "learning_rate": 1.4167850189556397e-06, + "loss": 1.376, + "step": 138654 + }, + { + "epoch": 1.67, + "grad_norm": 4.416724273241346, + "learning_rate": 1.4164852846798493e-06, + "loss": 1.2131, + "step": 138657 + }, + { + "epoch": 1.67, + "grad_norm": 12.975168293081948, + "learning_rate": 1.4161855796965018e-06, + "loss": 1.2232, + "step": 138660 + }, + { + "epoch": 1.67, + "grad_norm": 3.4159213037798812, + "learning_rate": 1.4158859040066242e-06, + "loss": 1.7203, + "step": 138663 + }, + { + "epoch": 1.67, + "grad_norm": 11.904834335591987, + "learning_rate": 1.4155862576112356e-06, + "loss": 1.3988, + "step": 138666 + }, + { + "epoch": 1.67, + "grad_norm": 9.478699884691505, + "learning_rate": 1.4152866405113574e-06, + "loss": 1.031, + "step": 138669 + }, + { + "epoch": 1.67, + "grad_norm": 8.027316759368212, + "learning_rate": 1.4149870527080135e-06, + "loss": 0.8485, + "step": 138672 + }, + { + "epoch": 1.67, + "grad_norm": 22.910178411517876, + "learning_rate": 1.4146874942022259e-06, + "loss": 1.1071, + "step": 138675 + }, + { + "epoch": 1.67, + "grad_norm": 9.76895099395068, + "learning_rate": 1.41438796499502e-06, + "loss": 1.3734, + "step": 138678 + }, + { + "epoch": 1.67, + "grad_norm": 22.42081013324653, + "learning_rate": 1.414088465087413e-06, + "loss": 1.2152, + "step": 138681 + }, + { + "epoch": 1.67, + "grad_norm": 14.031811670247919, + "learning_rate": 1.4137889944804329e-06, + "loss": 1.42, + "step": 138684 + }, + { + "epoch": 1.67, + "grad_norm": 5.1074445271058675, + "learning_rate": 1.4134895531750946e-06, + "loss": 1.1073, + "step": 138687 + }, + { + "epoch": 1.67, + "grad_norm": 3.089202848606273, + "learning_rate": 1.4131901411724235e-06, + "loss": 1.3609, + "step": 138690 + }, + { + "epoch": 1.67, + "grad_norm": 5.338896599133534, + "learning_rate": 1.4128907584734452e-06, + "loss": 1.123, + "step": 138693 + }, + { + "epoch": 1.67, + "grad_norm": 4.017741903726115, + "learning_rate": 1.4125914050791744e-06, + "loss": 1.3392, + "step": 138696 + }, + { + "epoch": 1.67, + "grad_norm": 13.23195154007653, + "learning_rate": 1.412292080990636e-06, + "loss": 1.2583, + "step": 138699 + }, + { + "epoch": 1.67, + "grad_norm": 7.830337036182941, + "learning_rate": 1.411992786208851e-06, + "loss": 0.9841, + "step": 138702 + }, + { + "epoch": 1.67, + "grad_norm": 8.752890832928532, + "learning_rate": 1.4116935207348449e-06, + "loss": 0.9113, + "step": 138705 + }, + { + "epoch": 1.67, + "grad_norm": 18.193397645980482, + "learning_rate": 1.4113942845696339e-06, + "loss": 1.2117, + "step": 138708 + }, + { + "epoch": 1.67, + "grad_norm": 5.42161157131546, + "learning_rate": 1.4110950777142395e-06, + "loss": 1.1246, + "step": 138711 + }, + { + "epoch": 1.67, + "grad_norm": 4.538237298794859, + "learning_rate": 1.4107959001696826e-06, + "loss": 1.0475, + "step": 138714 + }, + { + "epoch": 1.67, + "grad_norm": 12.064949992832428, + "learning_rate": 1.410496751936986e-06, + "loss": 1.1897, + "step": 138717 + }, + { + "epoch": 1.67, + "grad_norm": 13.638900632861828, + "learning_rate": 1.410197633017173e-06, + "loss": 1.3902, + "step": 138720 + }, + { + "epoch": 1.67, + "grad_norm": 5.00995431304804, + "learning_rate": 1.4098985434112588e-06, + "loss": 0.8752, + "step": 138723 + }, + { + "epoch": 1.67, + "grad_norm": 8.590395830292547, + "learning_rate": 1.4095994831202687e-06, + "loss": 1.0712, + "step": 138726 + }, + { + "epoch": 1.67, + "grad_norm": 6.8504815674882655, + "learning_rate": 1.409300452145219e-06, + "loss": 1.6395, + "step": 138729 + }, + { + "epoch": 1.67, + "grad_norm": 15.661705524992744, + "learning_rate": 1.4090014504871352e-06, + "loss": 1.5568, + "step": 138732 + }, + { + "epoch": 1.67, + "grad_norm": 5.697941045500996, + "learning_rate": 1.4087024781470327e-06, + "loss": 1.051, + "step": 138735 + }, + { + "epoch": 1.67, + "grad_norm": 10.034193279624049, + "learning_rate": 1.408403535125934e-06, + "loss": 1.1339, + "step": 138738 + }, + { + "epoch": 1.67, + "grad_norm": 4.709307492341191, + "learning_rate": 1.4081046214248595e-06, + "loss": 1.0009, + "step": 138741 + }, + { + "epoch": 1.67, + "grad_norm": 4.795105302642943, + "learning_rate": 1.4078057370448284e-06, + "loss": 1.321, + "step": 138744 + }, + { + "epoch": 1.67, + "grad_norm": 25.77842072049588, + "learning_rate": 1.4075068819868643e-06, + "loss": 1.3344, + "step": 138747 + }, + { + "epoch": 1.67, + "grad_norm": 4.929937893700371, + "learning_rate": 1.407208056251984e-06, + "loss": 1.0838, + "step": 138750 + }, + { + "epoch": 1.67, + "grad_norm": 5.057724636367618, + "learning_rate": 1.4069092598412048e-06, + "loss": 1.3005, + "step": 138753 + }, + { + "epoch": 1.67, + "grad_norm": 8.244874603399166, + "learning_rate": 1.4066104927555502e-06, + "loss": 0.9641, + "step": 138756 + }, + { + "epoch": 1.67, + "grad_norm": 5.673392709891793, + "learning_rate": 1.4063117549960403e-06, + "loss": 0.8436, + "step": 138759 + }, + { + "epoch": 1.67, + "grad_norm": 8.714000067645994, + "learning_rate": 1.4060130465636901e-06, + "loss": 1.3126, + "step": 138762 + }, + { + "epoch": 1.67, + "grad_norm": 9.109972954836708, + "learning_rate": 1.405714367459523e-06, + "loss": 1.0981, + "step": 138765 + }, + { + "epoch": 1.67, + "grad_norm": 6.702948391749225, + "learning_rate": 1.4054157176845562e-06, + "loss": 1.0558, + "step": 138768 + }, + { + "epoch": 1.67, + "grad_norm": 12.961764211763514, + "learning_rate": 1.405117097239812e-06, + "loss": 1.0239, + "step": 138771 + }, + { + "epoch": 1.67, + "grad_norm": 13.330912405773567, + "learning_rate": 1.4048185061263086e-06, + "loss": 1.1351, + "step": 138774 + }, + { + "epoch": 1.67, + "grad_norm": 6.218825320346301, + "learning_rate": 1.4045199443450596e-06, + "loss": 1.0479, + "step": 138777 + }, + { + "epoch": 1.67, + "grad_norm": 7.997318254713016, + "learning_rate": 1.4042214118970897e-06, + "loss": 0.8421, + "step": 138780 + }, + { + "epoch": 1.67, + "grad_norm": 12.28246542623975, + "learning_rate": 1.403922908783415e-06, + "loss": 0.966, + "step": 138783 + }, + { + "epoch": 1.67, + "grad_norm": 7.936993355042414, + "learning_rate": 1.4036244350050576e-06, + "loss": 1.1892, + "step": 138786 + }, + { + "epoch": 1.67, + "grad_norm": 10.8114697472817, + "learning_rate": 1.4033259905630302e-06, + "loss": 1.1522, + "step": 138789 + }, + { + "epoch": 1.67, + "grad_norm": 15.049726749229873, + "learning_rate": 1.4030275754583589e-06, + "loss": 1.1455, + "step": 138792 + }, + { + "epoch": 1.67, + "grad_norm": 8.304759130134709, + "learning_rate": 1.4027291896920548e-06, + "loss": 0.6359, + "step": 138795 + }, + { + "epoch": 1.67, + "grad_norm": 4.963855571692161, + "learning_rate": 1.402430833265138e-06, + "loss": 1.0859, + "step": 138798 + }, + { + "epoch": 1.67, + "grad_norm": 7.436310740858866, + "learning_rate": 1.4021325061786317e-06, + "loss": 1.0, + "step": 138801 + }, + { + "epoch": 1.67, + "grad_norm": 12.701417152869977, + "learning_rate": 1.401834208433548e-06, + "loss": 1.3668, + "step": 138804 + }, + { + "epoch": 1.67, + "grad_norm": 8.825804402223781, + "learning_rate": 1.401535940030907e-06, + "loss": 0.9238, + "step": 138807 + }, + { + "epoch": 1.67, + "grad_norm": 10.92125372394084, + "learning_rate": 1.401237700971726e-06, + "loss": 1.2893, + "step": 138810 + }, + { + "epoch": 1.67, + "grad_norm": 3.4295719536736846, + "learning_rate": 1.4009394912570262e-06, + "loss": 1.0356, + "step": 138813 + }, + { + "epoch": 1.67, + "grad_norm": 7.151698985008399, + "learning_rate": 1.4006413108878226e-06, + "loss": 0.995, + "step": 138816 + }, + { + "epoch": 1.67, + "grad_norm": 6.879898967828653, + "learning_rate": 1.4003431598651295e-06, + "loss": 1.0648, + "step": 138819 + }, + { + "epoch": 1.67, + "grad_norm": 6.387139677158958, + "learning_rate": 1.4000450381899688e-06, + "loss": 0.8837, + "step": 138822 + }, + { + "epoch": 1.67, + "grad_norm": 9.337823973592391, + "learning_rate": 1.3997469458633562e-06, + "loss": 1.0264, + "step": 138825 + }, + { + "epoch": 1.67, + "grad_norm": 8.868253840906469, + "learning_rate": 1.3994488828863117e-06, + "loss": 0.968, + "step": 138828 + }, + { + "epoch": 1.67, + "grad_norm": 3.449677430227821, + "learning_rate": 1.3991508492598483e-06, + "loss": 0.8963, + "step": 138831 + }, + { + "epoch": 1.67, + "grad_norm": 4.357848335534531, + "learning_rate": 1.398852844984987e-06, + "loss": 1.1336, + "step": 138834 + }, + { + "epoch": 1.67, + "grad_norm": 9.937737828552327, + "learning_rate": 1.3985548700627404e-06, + "loss": 1.2906, + "step": 138837 + }, + { + "epoch": 1.67, + "grad_norm": 6.225543057087392, + "learning_rate": 1.3982569244941313e-06, + "loss": 1.0544, + "step": 138840 + }, + { + "epoch": 1.67, + "grad_norm": 18.930840482879894, + "learning_rate": 1.3979590082801697e-06, + "loss": 1.0935, + "step": 138843 + }, + { + "epoch": 1.67, + "grad_norm": 7.9853043022280445, + "learning_rate": 1.397661121421875e-06, + "loss": 1.4295, + "step": 138846 + }, + { + "epoch": 1.67, + "grad_norm": 6.891866260676853, + "learning_rate": 1.397363263920265e-06, + "loss": 1.0634, + "step": 138849 + }, + { + "epoch": 1.67, + "grad_norm": 2.996343591695364, + "learning_rate": 1.3970654357763557e-06, + "loss": 1.0511, + "step": 138852 + }, + { + "epoch": 1.67, + "grad_norm": 9.086515808022659, + "learning_rate": 1.3967676369911654e-06, + "loss": 1.6287, + "step": 138855 + }, + { + "epoch": 1.67, + "grad_norm": 6.786012999849774, + "learning_rate": 1.3964698675657085e-06, + "loss": 0.9326, + "step": 138858 + }, + { + "epoch": 1.67, + "grad_norm": 20.266277409340102, + "learning_rate": 1.3961721275009987e-06, + "loss": 1.0966, + "step": 138861 + }, + { + "epoch": 1.67, + "grad_norm": 22.36745295226558, + "learning_rate": 1.3958744167980543e-06, + "loss": 1.1353, + "step": 138864 + }, + { + "epoch": 1.67, + "grad_norm": 15.266809717774306, + "learning_rate": 1.3955767354578931e-06, + "loss": 0.9024, + "step": 138867 + }, + { + "epoch": 1.67, + "grad_norm": 13.720870681691094, + "learning_rate": 1.3952790834815277e-06, + "loss": 1.077, + "step": 138870 + }, + { + "epoch": 1.67, + "grad_norm": 5.571955241231085, + "learning_rate": 1.394981460869974e-06, + "loss": 1.1277, + "step": 138873 + }, + { + "epoch": 1.67, + "grad_norm": 13.019445175664677, + "learning_rate": 1.3946838676242525e-06, + "loss": 1.199, + "step": 138876 + }, + { + "epoch": 1.67, + "grad_norm": 6.487866529127775, + "learning_rate": 1.394386303745373e-06, + "loss": 0.7943, + "step": 138879 + }, + { + "epoch": 1.67, + "grad_norm": 8.161441238267972, + "learning_rate": 1.3940887692343551e-06, + "loss": 1.218, + "step": 138882 + }, + { + "epoch": 1.67, + "grad_norm": 5.07140865403507, + "learning_rate": 1.3937912640922114e-06, + "loss": 0.9094, + "step": 138885 + }, + { + "epoch": 1.67, + "grad_norm": 2.3918924664705368, + "learning_rate": 1.393493788319956e-06, + "loss": 1.0796, + "step": 138888 + }, + { + "epoch": 1.67, + "grad_norm": 4.4424466620283045, + "learning_rate": 1.3931963419186079e-06, + "loss": 0.9582, + "step": 138891 + }, + { + "epoch": 1.67, + "grad_norm": 5.143696745999965, + "learning_rate": 1.3928989248891823e-06, + "loss": 1.2562, + "step": 138894 + }, + { + "epoch": 1.67, + "grad_norm": 5.242382296514341, + "learning_rate": 1.3926015372326907e-06, + "loss": 0.9834, + "step": 138897 + }, + { + "epoch": 1.67, + "grad_norm": 12.017863232680643, + "learning_rate": 1.3923041789501513e-06, + "loss": 1.16, + "step": 138900 + }, + { + "epoch": 1.67, + "grad_norm": 8.43903193100455, + "learning_rate": 1.3920068500425753e-06, + "loss": 1.2175, + "step": 138903 + }, + { + "epoch": 1.67, + "grad_norm": 6.093742528299149, + "learning_rate": 1.391709550510979e-06, + "loss": 1.0129, + "step": 138906 + }, + { + "epoch": 1.67, + "grad_norm": 9.626591682642344, + "learning_rate": 1.39141228035638e-06, + "loss": 0.8695, + "step": 138909 + }, + { + "epoch": 1.67, + "grad_norm": 10.66536581098206, + "learning_rate": 1.3911150395797868e-06, + "loss": 1.1126, + "step": 138912 + }, + { + "epoch": 1.67, + "grad_norm": 7.935282391182021, + "learning_rate": 1.3908178281822181e-06, + "loss": 1.3562, + "step": 138915 + }, + { + "epoch": 1.67, + "grad_norm": 13.012368730599194, + "learning_rate": 1.3905206461646893e-06, + "loss": 1.0407, + "step": 138918 + }, + { + "epoch": 1.67, + "grad_norm": 10.796610861123469, + "learning_rate": 1.3902234935282121e-06, + "loss": 1.0449, + "step": 138921 + }, + { + "epoch": 1.67, + "grad_norm": 7.281623125830305, + "learning_rate": 1.3899263702737987e-06, + "loss": 1.2474, + "step": 138924 + }, + { + "epoch": 1.67, + "grad_norm": 17.320068699874472, + "learning_rate": 1.3896292764024655e-06, + "loss": 1.2656, + "step": 138927 + }, + { + "epoch": 1.67, + "grad_norm": 9.607107148182335, + "learning_rate": 1.3893322119152263e-06, + "loss": 1.1664, + "step": 138930 + }, + { + "epoch": 1.67, + "grad_norm": 8.358039177564475, + "learning_rate": 1.389035176813094e-06, + "loss": 1.0137, + "step": 138933 + }, + { + "epoch": 1.67, + "grad_norm": 21.422962050351103, + "learning_rate": 1.3887381710970859e-06, + "loss": 1.2028, + "step": 138936 + }, + { + "epoch": 1.67, + "grad_norm": 9.906948284339391, + "learning_rate": 1.3884411947682098e-06, + "loss": 1.1377, + "step": 138939 + }, + { + "epoch": 1.67, + "grad_norm": 15.356509154397312, + "learning_rate": 1.3881442478274853e-06, + "loss": 1.3209, + "step": 138942 + }, + { + "epoch": 1.67, + "grad_norm": 3.8442892863037184, + "learning_rate": 1.38784733027592e-06, + "loss": 1.2909, + "step": 138945 + }, + { + "epoch": 1.67, + "grad_norm": 8.102414091409509, + "learning_rate": 1.3875504421145314e-06, + "loss": 1.3297, + "step": 138948 + }, + { + "epoch": 1.67, + "grad_norm": 5.2526052516346065, + "learning_rate": 1.387253583344328e-06, + "loss": 0.8826, + "step": 138951 + }, + { + "epoch": 1.67, + "grad_norm": 11.875795117215473, + "learning_rate": 1.3869567539663265e-06, + "loss": 1.1901, + "step": 138954 + }, + { + "epoch": 1.67, + "grad_norm": 21.571560442719186, + "learning_rate": 1.3866599539815395e-06, + "loss": 1.1895, + "step": 138957 + }, + { + "epoch": 1.67, + "grad_norm": 2.868905002095567, + "learning_rate": 1.3863631833909819e-06, + "loss": 1.0482, + "step": 138960 + }, + { + "epoch": 1.67, + "grad_norm": 10.714651703140838, + "learning_rate": 1.3860664421956637e-06, + "loss": 1.2586, + "step": 138963 + }, + { + "epoch": 1.67, + "grad_norm": 8.708218395314269, + "learning_rate": 1.3857697303965967e-06, + "loss": 1.066, + "step": 138966 + }, + { + "epoch": 1.67, + "grad_norm": 4.246095237236626, + "learning_rate": 1.385473047994793e-06, + "loss": 1.2485, + "step": 138969 + }, + { + "epoch": 1.67, + "grad_norm": 11.754991334116042, + "learning_rate": 1.385176394991269e-06, + "loss": 1.301, + "step": 138972 + }, + { + "epoch": 1.67, + "grad_norm": 8.017832279043533, + "learning_rate": 1.3848797713870355e-06, + "loss": 1.1536, + "step": 138975 + }, + { + "epoch": 1.67, + "grad_norm": 9.453024536139251, + "learning_rate": 1.384583177183102e-06, + "loss": 1.1878, + "step": 138978 + }, + { + "epoch": 1.67, + "grad_norm": 3.9931923085521293, + "learning_rate": 1.3842866123804844e-06, + "loss": 1.0103, + "step": 138981 + }, + { + "epoch": 1.67, + "grad_norm": 6.050391465903514, + "learning_rate": 1.383990076980194e-06, + "loss": 1.0378, + "step": 138984 + }, + { + "epoch": 1.67, + "grad_norm": 5.048123653672845, + "learning_rate": 1.38369357098324e-06, + "loss": 1.3074, + "step": 138987 + }, + { + "epoch": 1.67, + "grad_norm": 2.8476638039564652, + "learning_rate": 1.3833970943906383e-06, + "loss": 1.2011, + "step": 138990 + }, + { + "epoch": 1.67, + "grad_norm": 5.1324684168648504, + "learning_rate": 1.383100647203397e-06, + "loss": 1.1617, + "step": 138993 + }, + { + "epoch": 1.67, + "grad_norm": 91.21576149914169, + "learning_rate": 1.3828042294225296e-06, + "loss": 1.2427, + "step": 138996 + }, + { + "epoch": 1.67, + "grad_norm": 14.134005751915513, + "learning_rate": 1.3825078410490467e-06, + "loss": 1.2746, + "step": 138999 + }, + { + "epoch": 1.67, + "grad_norm": 12.856424220204012, + "learning_rate": 1.3822114820839628e-06, + "loss": 1.0157, + "step": 139002 + }, + { + "epoch": 1.67, + "grad_norm": 7.061835723016264, + "learning_rate": 1.381915152528286e-06, + "loss": 1.0746, + "step": 139005 + }, + { + "epoch": 1.67, + "grad_norm": 5.743469713873088, + "learning_rate": 1.3816188523830298e-06, + "loss": 1.2281, + "step": 139008 + }, + { + "epoch": 1.67, + "grad_norm": 5.991096085442799, + "learning_rate": 1.3813225816492015e-06, + "loss": 1.2162, + "step": 139011 + }, + { + "epoch": 1.67, + "grad_norm": 2.797983402322061, + "learning_rate": 1.3810263403278158e-06, + "loss": 1.1853, + "step": 139014 + }, + { + "epoch": 1.67, + "grad_norm": 9.843719416974107, + "learning_rate": 1.3807301284198838e-06, + "loss": 1.1479, + "step": 139017 + }, + { + "epoch": 1.67, + "grad_norm": 10.100617210553478, + "learning_rate": 1.3804339459264139e-06, + "loss": 1.2921, + "step": 139020 + }, + { + "epoch": 1.67, + "grad_norm": 3.329759142873113, + "learning_rate": 1.3801377928484172e-06, + "loss": 0.9107, + "step": 139023 + }, + { + "epoch": 1.67, + "grad_norm": 5.988733829851039, + "learning_rate": 1.3798416691869088e-06, + "loss": 1.2557, + "step": 139026 + }, + { + "epoch": 1.67, + "grad_norm": 12.055276390291304, + "learning_rate": 1.3795455749428955e-06, + "loss": 0.9827, + "step": 139029 + }, + { + "epoch": 1.67, + "grad_norm": 6.591815415990407, + "learning_rate": 1.3792495101173853e-06, + "loss": 0.7765, + "step": 139032 + }, + { + "epoch": 1.67, + "grad_norm": 5.266600367409218, + "learning_rate": 1.3789534747113908e-06, + "loss": 0.9586, + "step": 139035 + }, + { + "epoch": 1.67, + "grad_norm": 16.486487615422384, + "learning_rate": 1.3786574687259234e-06, + "loss": 1.3598, + "step": 139038 + }, + { + "epoch": 1.67, + "grad_norm": 4.748135191494622, + "learning_rate": 1.3783614921619936e-06, + "loss": 1.0547, + "step": 139041 + }, + { + "epoch": 1.67, + "grad_norm": 7.44397702821259, + "learning_rate": 1.3780655450206115e-06, + "loss": 1.1715, + "step": 139044 + }, + { + "epoch": 1.67, + "grad_norm": 14.904545185783661, + "learning_rate": 1.3777696273027853e-06, + "loss": 0.9936, + "step": 139047 + }, + { + "epoch": 1.67, + "grad_norm": 5.516711832323266, + "learning_rate": 1.377473739009526e-06, + "loss": 1.0008, + "step": 139050 + }, + { + "epoch": 1.67, + "grad_norm": 8.853839832996886, + "learning_rate": 1.3771778801418424e-06, + "loss": 0.9033, + "step": 139053 + }, + { + "epoch": 1.67, + "grad_norm": 3.402787356902268, + "learning_rate": 1.3768820507007463e-06, + "loss": 1.087, + "step": 139056 + }, + { + "epoch": 1.67, + "grad_norm": 4.34554132975773, + "learning_rate": 1.376586250687244e-06, + "loss": 1.1853, + "step": 139059 + }, + { + "epoch": 1.67, + "grad_norm": 6.730464074867836, + "learning_rate": 1.3762904801023458e-06, + "loss": 1.0842, + "step": 139062 + }, + { + "epoch": 1.67, + "grad_norm": 7.567046736945827, + "learning_rate": 1.3759947389470629e-06, + "loss": 1.1884, + "step": 139065 + }, + { + "epoch": 1.67, + "grad_norm": 17.376763964377982, + "learning_rate": 1.3756990272224046e-06, + "loss": 1.1397, + "step": 139068 + }, + { + "epoch": 1.67, + "grad_norm": 4.573607888055965, + "learning_rate": 1.3754033449293803e-06, + "loss": 1.2374, + "step": 139071 + }, + { + "epoch": 1.67, + "grad_norm": 3.8894111831246034, + "learning_rate": 1.3751076920689944e-06, + "loss": 1.2956, + "step": 139074 + }, + { + "epoch": 1.67, + "grad_norm": 5.744796268551988, + "learning_rate": 1.3748120686422605e-06, + "loss": 0.9085, + "step": 139077 + }, + { + "epoch": 1.67, + "grad_norm": 52.8209265227232, + "learning_rate": 1.374516474650186e-06, + "loss": 1.0975, + "step": 139080 + }, + { + "epoch": 1.67, + "grad_norm": 9.987490556656107, + "learning_rate": 1.3742209100937809e-06, + "loss": 1.1781, + "step": 139083 + }, + { + "epoch": 1.67, + "grad_norm": 7.003623063063513, + "learning_rate": 1.3739253749740521e-06, + "loss": 1.0331, + "step": 139086 + }, + { + "epoch": 1.67, + "grad_norm": 5.998321801391853, + "learning_rate": 1.373629869292008e-06, + "loss": 1.3798, + "step": 139089 + }, + { + "epoch": 1.67, + "grad_norm": 7.20045558204527, + "learning_rate": 1.373334393048661e-06, + "loss": 1.1943, + "step": 139092 + }, + { + "epoch": 1.67, + "grad_norm": 6.160897764317783, + "learning_rate": 1.3730389462450132e-06, + "loss": 0.9539, + "step": 139095 + }, + { + "epoch": 1.67, + "grad_norm": 2.458402427413296, + "learning_rate": 1.3727435288820789e-06, + "loss": 1.1742, + "step": 139098 + }, + { + "epoch": 1.67, + "grad_norm": 29.445507289588242, + "learning_rate": 1.3724481409608614e-06, + "loss": 0.9455, + "step": 139101 + }, + { + "epoch": 1.67, + "grad_norm": 6.898739700473208, + "learning_rate": 1.37215278248237e-06, + "loss": 1.1019, + "step": 139104 + }, + { + "epoch": 1.67, + "grad_norm": 11.502323529258307, + "learning_rate": 1.3718574534476136e-06, + "loss": 1.175, + "step": 139107 + }, + { + "epoch": 1.67, + "grad_norm": 8.674845755645357, + "learning_rate": 1.371562153857603e-06, + "loss": 0.9503, + "step": 139110 + }, + { + "epoch": 1.67, + "grad_norm": 7.154490958787751, + "learning_rate": 1.3712668837133414e-06, + "loss": 0.8799, + "step": 139113 + }, + { + "epoch": 1.67, + "grad_norm": 8.654660704459694, + "learning_rate": 1.370971643015837e-06, + "loss": 1.3142, + "step": 139116 + }, + { + "epoch": 1.67, + "grad_norm": 10.324125848391649, + "learning_rate": 1.370676431766097e-06, + "loss": 1.136, + "step": 139119 + }, + { + "epoch": 1.67, + "grad_norm": 9.300414563383715, + "learning_rate": 1.3703812499651315e-06, + "loss": 1.3608, + "step": 139122 + }, + { + "epoch": 1.67, + "grad_norm": 2.6763220086992234, + "learning_rate": 1.3700860976139474e-06, + "loss": 1.0875, + "step": 139125 + }, + { + "epoch": 1.67, + "grad_norm": 4.749632968371167, + "learning_rate": 1.3697909747135485e-06, + "loss": 0.8459, + "step": 139128 + }, + { + "epoch": 1.67, + "grad_norm": 13.276644582055251, + "learning_rate": 1.3694958812649462e-06, + "loss": 1.5566, + "step": 139131 + }, + { + "epoch": 1.67, + "grad_norm": 4.267046026759512, + "learning_rate": 1.3692008172691463e-06, + "loss": 1.2968, + "step": 139134 + }, + { + "epoch": 1.67, + "grad_norm": 15.99994562390607, + "learning_rate": 1.3689057827271557e-06, + "loss": 1.3332, + "step": 139137 + }, + { + "epoch": 1.67, + "grad_norm": 7.170512308157691, + "learning_rate": 1.3686107776399782e-06, + "loss": 1.1884, + "step": 139140 + }, + { + "epoch": 1.67, + "grad_norm": 20.4952202019918, + "learning_rate": 1.368315802008624e-06, + "loss": 1.0246, + "step": 139143 + }, + { + "epoch": 1.67, + "grad_norm": 3.2151554344243554, + "learning_rate": 1.368020855834098e-06, + "loss": 1.6377, + "step": 139146 + }, + { + "epoch": 1.67, + "grad_norm": 8.660150926318137, + "learning_rate": 1.367725939117408e-06, + "loss": 1.0068, + "step": 139149 + }, + { + "epoch": 1.67, + "grad_norm": 10.247675690992452, + "learning_rate": 1.3674310518595624e-06, + "loss": 1.0649, + "step": 139152 + }, + { + "epoch": 1.67, + "grad_norm": 9.061033255508017, + "learning_rate": 1.3671361940615647e-06, + "loss": 1.1759, + "step": 139155 + }, + { + "epoch": 1.67, + "grad_norm": 9.081136625657466, + "learning_rate": 1.3668413657244207e-06, + "loss": 1.0961, + "step": 139158 + }, + { + "epoch": 1.67, + "grad_norm": 14.036073665741394, + "learning_rate": 1.3665465668491362e-06, + "loss": 0.8841, + "step": 139161 + }, + { + "epoch": 1.67, + "grad_norm": 7.2136752306779455, + "learning_rate": 1.3662517974367206e-06, + "loss": 1.0849, + "step": 139164 + }, + { + "epoch": 1.67, + "grad_norm": 3.0117864705951165, + "learning_rate": 1.3659570574881763e-06, + "loss": 0.7884, + "step": 139167 + }, + { + "epoch": 1.67, + "grad_norm": 19.00082379776488, + "learning_rate": 1.3656623470045105e-06, + "loss": 1.1896, + "step": 139170 + }, + { + "epoch": 1.67, + "grad_norm": 7.842130296656788, + "learning_rate": 1.3653676659867288e-06, + "loss": 1.2733, + "step": 139173 + }, + { + "epoch": 1.67, + "grad_norm": 12.327952954971286, + "learning_rate": 1.3650730144358383e-06, + "loss": 1.0838, + "step": 139176 + }, + { + "epoch": 1.67, + "grad_norm": 8.348848053669071, + "learning_rate": 1.3647783923528445e-06, + "loss": 0.6917, + "step": 139179 + }, + { + "epoch": 1.67, + "grad_norm": 6.3744087424488605, + "learning_rate": 1.3644837997387495e-06, + "loss": 0.9964, + "step": 139182 + }, + { + "epoch": 1.67, + "grad_norm": 2.467728038563083, + "learning_rate": 1.3641892365945607e-06, + "loss": 1.3512, + "step": 139185 + }, + { + "epoch": 1.67, + "grad_norm": 11.434235225703615, + "learning_rate": 1.3638947029212835e-06, + "loss": 1.0555, + "step": 139188 + }, + { + "epoch": 1.67, + "grad_norm": 30.634871417204113, + "learning_rate": 1.3636001987199243e-06, + "loss": 0.919, + "step": 139191 + }, + { + "epoch": 1.67, + "grad_norm": 8.78754313676424, + "learning_rate": 1.363305723991485e-06, + "loss": 1.1468, + "step": 139194 + }, + { + "epoch": 1.67, + "grad_norm": 9.84233031773906, + "learning_rate": 1.3630112787369754e-06, + "loss": 0.8073, + "step": 139197 + }, + { + "epoch": 1.67, + "grad_norm": 28.80564771427954, + "learning_rate": 1.362716862957394e-06, + "loss": 1.1857, + "step": 139200 + }, + { + "epoch": 1.67, + "grad_norm": 14.713470090848828, + "learning_rate": 1.3624224766537496e-06, + "loss": 1.5312, + "step": 139203 + }, + { + "epoch": 1.67, + "grad_norm": 20.75278982246746, + "learning_rate": 1.3621281198270475e-06, + "loss": 1.089, + "step": 139206 + }, + { + "epoch": 1.67, + "grad_norm": 12.03861201286466, + "learning_rate": 1.3618337924782888e-06, + "loss": 1.157, + "step": 139209 + }, + { + "epoch": 1.67, + "grad_norm": 12.622539954895192, + "learning_rate": 1.3615394946084804e-06, + "loss": 1.0528, + "step": 139212 + }, + { + "epoch": 1.67, + "grad_norm": 14.852674780554409, + "learning_rate": 1.361245226218626e-06, + "loss": 1.2512, + "step": 139215 + }, + { + "epoch": 1.67, + "grad_norm": 12.88784852295336, + "learning_rate": 1.3609509873097315e-06, + "loss": 1.1433, + "step": 139218 + }, + { + "epoch": 1.67, + "grad_norm": 6.653274471733799, + "learning_rate": 1.3606567778828006e-06, + "loss": 1.2813, + "step": 139221 + }, + { + "epoch": 1.67, + "grad_norm": 5.6689273325176055, + "learning_rate": 1.3603625979388336e-06, + "loss": 1.259, + "step": 139224 + }, + { + "epoch": 1.67, + "grad_norm": 3.016814888758593, + "learning_rate": 1.360068447478836e-06, + "loss": 1.2377, + "step": 139227 + }, + { + "epoch": 1.67, + "grad_norm": 7.211993442318626, + "learning_rate": 1.3597743265038143e-06, + "loss": 1.2293, + "step": 139230 + }, + { + "epoch": 1.67, + "grad_norm": 7.69908085757851, + "learning_rate": 1.3594802350147717e-06, + "loss": 0.8838, + "step": 139233 + }, + { + "epoch": 1.67, + "grad_norm": 5.19983768065731, + "learning_rate": 1.35918617301271e-06, + "loss": 0.8767, + "step": 139236 + }, + { + "epoch": 1.67, + "grad_norm": 4.546626217548222, + "learning_rate": 1.3588921404986344e-06, + "loss": 1.0875, + "step": 139239 + }, + { + "epoch": 1.67, + "grad_norm": 20.618373760172766, + "learning_rate": 1.3585981374735458e-06, + "loss": 1.4924, + "step": 139242 + }, + { + "epoch": 1.67, + "grad_norm": 7.087926922775512, + "learning_rate": 1.3583041639384508e-06, + "loss": 1.1358, + "step": 139245 + }, + { + "epoch": 1.67, + "grad_norm": 3.2670544289195704, + "learning_rate": 1.358010219894349e-06, + "loss": 1.0894, + "step": 139248 + }, + { + "epoch": 1.67, + "grad_norm": 16.007903733465536, + "learning_rate": 1.3577163053422448e-06, + "loss": 1.0785, + "step": 139251 + }, + { + "epoch": 1.67, + "grad_norm": 9.347244104041813, + "learning_rate": 1.3574224202831431e-06, + "loss": 1.4815, + "step": 139254 + }, + { + "epoch": 1.67, + "grad_norm": 23.63238367274366, + "learning_rate": 1.3571285647180455e-06, + "loss": 1.1326, + "step": 139257 + }, + { + "epoch": 1.67, + "grad_norm": 3.993075118006587, + "learning_rate": 1.3568347386479564e-06, + "loss": 1.0786, + "step": 139260 + }, + { + "epoch": 1.67, + "grad_norm": 17.381362376945813, + "learning_rate": 1.3565409420738773e-06, + "loss": 1.2348, + "step": 139263 + }, + { + "epoch": 1.67, + "grad_norm": 5.603901220132684, + "learning_rate": 1.3562471749968088e-06, + "loss": 1.2071, + "step": 139266 + }, + { + "epoch": 1.67, + "grad_norm": 4.021180578495898, + "learning_rate": 1.3559534374177552e-06, + "loss": 1.0938, + "step": 139269 + }, + { + "epoch": 1.67, + "grad_norm": 15.165750557487014, + "learning_rate": 1.3556597293377204e-06, + "loss": 0.8602, + "step": 139272 + }, + { + "epoch": 1.67, + "grad_norm": 17.348091009824905, + "learning_rate": 1.3553660507577039e-06, + "loss": 1.2511, + "step": 139275 + }, + { + "epoch": 1.67, + "grad_norm": 3.022127208450428, + "learning_rate": 1.3550724016787088e-06, + "loss": 1.2248, + "step": 139278 + }, + { + "epoch": 1.67, + "grad_norm": 7.635315412009474, + "learning_rate": 1.35477878210174e-06, + "loss": 0.8626, + "step": 139281 + }, + { + "epoch": 1.67, + "grad_norm": 18.13285526078935, + "learning_rate": 1.3544851920277958e-06, + "loss": 1.2291, + "step": 139284 + }, + { + "epoch": 1.67, + "grad_norm": 14.511358955159286, + "learning_rate": 1.3541916314578807e-06, + "loss": 1.2133, + "step": 139287 + }, + { + "epoch": 1.67, + "grad_norm": 5.579803870728381, + "learning_rate": 1.353898100392994e-06, + "loss": 0.9635, + "step": 139290 + }, + { + "epoch": 1.67, + "grad_norm": 14.18550100924952, + "learning_rate": 1.3536045988341384e-06, + "loss": 1.2597, + "step": 139293 + }, + { + "epoch": 1.67, + "grad_norm": 6.674785028424569, + "learning_rate": 1.3533111267823151e-06, + "loss": 0.9038, + "step": 139296 + }, + { + "epoch": 1.68, + "grad_norm": 8.48835100203481, + "learning_rate": 1.3530176842385302e-06, + "loss": 0.79, + "step": 139299 + }, + { + "epoch": 1.68, + "grad_norm": 10.498186427196796, + "learning_rate": 1.352724271203778e-06, + "loss": 1.2753, + "step": 139302 + }, + { + "epoch": 1.68, + "grad_norm": 19.992017048242918, + "learning_rate": 1.352430887679065e-06, + "loss": 1.3729, + "step": 139305 + }, + { + "epoch": 1.68, + "grad_norm": 4.819448064087348, + "learning_rate": 1.3521375336653897e-06, + "loss": 1.2386, + "step": 139308 + }, + { + "epoch": 1.68, + "grad_norm": 4.959891631470168, + "learning_rate": 1.351844209163753e-06, + "loss": 0.874, + "step": 139311 + }, + { + "epoch": 1.68, + "grad_norm": 8.424924109724127, + "learning_rate": 1.3515509141751594e-06, + "loss": 1.0606, + "step": 139314 + }, + { + "epoch": 1.68, + "grad_norm": 6.001000284931279, + "learning_rate": 1.3512576487006047e-06, + "loss": 1.2767, + "step": 139317 + }, + { + "epoch": 1.68, + "grad_norm": 14.782721846385591, + "learning_rate": 1.3509644127410926e-06, + "loss": 0.9314, + "step": 139320 + }, + { + "epoch": 1.68, + "grad_norm": 22.035459700949133, + "learning_rate": 1.3506712062976268e-06, + "loss": 1.3085, + "step": 139323 + }, + { + "epoch": 1.68, + "grad_norm": 9.421749599574214, + "learning_rate": 1.350378029371201e-06, + "loss": 1.3169, + "step": 139326 + }, + { + "epoch": 1.68, + "grad_norm": 8.448932184968804, + "learning_rate": 1.350084881962823e-06, + "loss": 1.2264, + "step": 139329 + }, + { + "epoch": 1.68, + "grad_norm": 8.370880040634267, + "learning_rate": 1.349791764073487e-06, + "loss": 1.0417, + "step": 139332 + }, + { + "epoch": 1.68, + "grad_norm": 4.6838745224082885, + "learning_rate": 1.349498675704195e-06, + "loss": 1.0114, + "step": 139335 + }, + { + "epoch": 1.68, + "grad_norm": 4.1362008108650326, + "learning_rate": 1.3492056168559486e-06, + "loss": 1.188, + "step": 139338 + }, + { + "epoch": 1.68, + "grad_norm": 3.1270774643973462, + "learning_rate": 1.3489125875297504e-06, + "loss": 1.2155, + "step": 139341 + }, + { + "epoch": 1.68, + "grad_norm": 5.780173455491257, + "learning_rate": 1.3486195877265951e-06, + "loss": 0.9359, + "step": 139344 + }, + { + "epoch": 1.68, + "grad_norm": 6.576936795307581, + "learning_rate": 1.3483266174474862e-06, + "loss": 1.2784, + "step": 139347 + }, + { + "epoch": 1.68, + "grad_norm": 8.000556984802218, + "learning_rate": 1.348033676693421e-06, + "loss": 1.1638, + "step": 139350 + }, + { + "epoch": 1.68, + "grad_norm": 11.123048641294174, + "learning_rate": 1.3477407654654018e-06, + "loss": 1.148, + "step": 139353 + }, + { + "epoch": 1.68, + "grad_norm": 13.669655605763104, + "learning_rate": 1.3474478837644256e-06, + "loss": 1.0457, + "step": 139356 + }, + { + "epoch": 1.68, + "grad_norm": 6.868610524578278, + "learning_rate": 1.3471550315914916e-06, + "loss": 1.3398, + "step": 139359 + }, + { + "epoch": 1.68, + "grad_norm": 9.831338660388816, + "learning_rate": 1.3468622089476025e-06, + "loss": 1.1731, + "step": 139362 + }, + { + "epoch": 1.68, + "grad_norm": 2.5034216668613443, + "learning_rate": 1.346569415833755e-06, + "loss": 1.0867, + "step": 139365 + }, + { + "epoch": 1.68, + "grad_norm": 7.569610541172496, + "learning_rate": 1.3462766522509508e-06, + "loss": 1.1928, + "step": 139368 + }, + { + "epoch": 1.68, + "grad_norm": 8.965395498992212, + "learning_rate": 1.3459839182001878e-06, + "loss": 1.1521, + "step": 139371 + }, + { + "epoch": 1.68, + "grad_norm": 9.52093963832718, + "learning_rate": 1.345691213682463e-06, + "loss": 0.9829, + "step": 139374 + }, + { + "epoch": 1.68, + "grad_norm": 6.36303513210259, + "learning_rate": 1.3453985386987756e-06, + "loss": 1.1676, + "step": 139377 + }, + { + "epoch": 1.68, + "grad_norm": 3.882599304255719, + "learning_rate": 1.3451058932501294e-06, + "loss": 1.1414, + "step": 139380 + }, + { + "epoch": 1.68, + "grad_norm": 7.46737754487039, + "learning_rate": 1.3448132773375155e-06, + "loss": 1.1605, + "step": 139383 + }, + { + "epoch": 1.68, + "grad_norm": 20.50485964316703, + "learning_rate": 1.3445206909619369e-06, + "loss": 1.5585, + "step": 139386 + }, + { + "epoch": 1.68, + "grad_norm": 13.955462772285703, + "learning_rate": 1.3442281341243935e-06, + "loss": 0.858, + "step": 139389 + }, + { + "epoch": 1.68, + "grad_norm": 11.055253712137791, + "learning_rate": 1.3439356068258802e-06, + "loss": 0.9252, + "step": 139392 + }, + { + "epoch": 1.68, + "grad_norm": 7.061889507244373, + "learning_rate": 1.3436431090673986e-06, + "loss": 1.2857, + "step": 139395 + }, + { + "epoch": 1.68, + "grad_norm": 13.615918731065031, + "learning_rate": 1.343350640849943e-06, + "loss": 1.3871, + "step": 139398 + }, + { + "epoch": 1.68, + "grad_norm": 14.57337229303527, + "learning_rate": 1.343058202174513e-06, + "loss": 1.1546, + "step": 139401 + }, + { + "epoch": 1.68, + "grad_norm": 5.602245749465483, + "learning_rate": 1.342765793042108e-06, + "loss": 1.066, + "step": 139404 + }, + { + "epoch": 1.68, + "grad_norm": 14.478803023275299, + "learning_rate": 1.3424734134537264e-06, + "loss": 1.2641, + "step": 139407 + }, + { + "epoch": 1.68, + "grad_norm": 3.57259935456373, + "learning_rate": 1.3421810634103628e-06, + "loss": 1.2822, + "step": 139410 + }, + { + "epoch": 1.68, + "grad_norm": 13.9007415608458, + "learning_rate": 1.341888742913019e-06, + "loss": 1.3141, + "step": 139413 + }, + { + "epoch": 1.68, + "grad_norm": 8.143967231592507, + "learning_rate": 1.3415964519626878e-06, + "loss": 1.0259, + "step": 139416 + }, + { + "epoch": 1.68, + "grad_norm": 9.732748211817107, + "learning_rate": 1.3413041905603685e-06, + "loss": 1.089, + "step": 139419 + }, + { + "epoch": 1.68, + "grad_norm": 3.858728198339331, + "learning_rate": 1.3410119587070624e-06, + "loss": 0.9948, + "step": 139422 + }, + { + "epoch": 1.68, + "grad_norm": 11.624227009299789, + "learning_rate": 1.3407197564037611e-06, + "loss": 1.0185, + "step": 139425 + }, + { + "epoch": 1.68, + "grad_norm": 8.179267955829657, + "learning_rate": 1.3404275836514635e-06, + "loss": 0.8379, + "step": 139428 + }, + { + "epoch": 1.68, + "grad_norm": 11.289501825968614, + "learning_rate": 1.3401354404511714e-06, + "loss": 1.3107, + "step": 139431 + }, + { + "epoch": 1.68, + "grad_norm": 4.803604917621086, + "learning_rate": 1.3398433268038757e-06, + "loss": 1.319, + "step": 139434 + }, + { + "epoch": 1.68, + "grad_norm": 3.557239331135131, + "learning_rate": 1.3395512427105738e-06, + "loss": 1.0994, + "step": 139437 + }, + { + "epoch": 1.68, + "grad_norm": 8.554242322919455, + "learning_rate": 1.339259188172265e-06, + "loss": 0.9598, + "step": 139440 + }, + { + "epoch": 1.68, + "grad_norm": 24.09122536048157, + "learning_rate": 1.3389671631899438e-06, + "loss": 1.0862, + "step": 139443 + }, + { + "epoch": 1.68, + "grad_norm": 6.135966710861845, + "learning_rate": 1.3386751677646092e-06, + "loss": 0.6991, + "step": 139446 + }, + { + "epoch": 1.68, + "grad_norm": 7.237374864990069, + "learning_rate": 1.3383832018972575e-06, + "loss": 0.7929, + "step": 139449 + }, + { + "epoch": 1.68, + "grad_norm": 8.175687641584368, + "learning_rate": 1.3380912655888822e-06, + "loss": 1.5059, + "step": 139452 + }, + { + "epoch": 1.68, + "grad_norm": 16.8152071366617, + "learning_rate": 1.3377993588404836e-06, + "loss": 1.2741, + "step": 139455 + }, + { + "epoch": 1.68, + "grad_norm": 8.57352727065179, + "learning_rate": 1.3375074816530542e-06, + "loss": 1.22, + "step": 139458 + }, + { + "epoch": 1.68, + "grad_norm": 4.813982847475774, + "learning_rate": 1.3372156340275932e-06, + "loss": 0.9684, + "step": 139461 + }, + { + "epoch": 1.68, + "grad_norm": 7.92809521860983, + "learning_rate": 1.3369238159650921e-06, + "loss": 1.1572, + "step": 139464 + }, + { + "epoch": 1.68, + "grad_norm": 5.679107823471239, + "learning_rate": 1.33663202746655e-06, + "loss": 1.02, + "step": 139467 + }, + { + "epoch": 1.68, + "grad_norm": 9.804074090973034, + "learning_rate": 1.3363402685329629e-06, + "loss": 1.0573, + "step": 139470 + }, + { + "epoch": 1.68, + "grad_norm": 2.8923069068364846, + "learning_rate": 1.3360485391653266e-06, + "loss": 1.4983, + "step": 139473 + }, + { + "epoch": 1.68, + "grad_norm": 9.266442729525046, + "learning_rate": 1.335756839364637e-06, + "loss": 1.029, + "step": 139476 + }, + { + "epoch": 1.68, + "grad_norm": 5.351661677303244, + "learning_rate": 1.3354651691318866e-06, + "loss": 0.8695, + "step": 139479 + }, + { + "epoch": 1.68, + "grad_norm": 9.07283176538654, + "learning_rate": 1.3351735284680722e-06, + "loss": 1.0735, + "step": 139482 + }, + { + "epoch": 1.68, + "grad_norm": 14.30734996278563, + "learning_rate": 1.334881917374189e-06, + "loss": 1.2691, + "step": 139485 + }, + { + "epoch": 1.68, + "grad_norm": 12.275443397899057, + "learning_rate": 1.3345903358512358e-06, + "loss": 0.9994, + "step": 139488 + }, + { + "epoch": 1.68, + "grad_norm": 14.719514445260014, + "learning_rate": 1.3342987839002008e-06, + "loss": 0.9956, + "step": 139491 + }, + { + "epoch": 1.68, + "grad_norm": 18.033897972889505, + "learning_rate": 1.3340072615220845e-06, + "loss": 1.5061, + "step": 139494 + }, + { + "epoch": 1.68, + "grad_norm": 12.36695620411743, + "learning_rate": 1.3337157687178804e-06, + "loss": 1.1786, + "step": 139497 + }, + { + "epoch": 1.68, + "grad_norm": 7.527111540948845, + "learning_rate": 1.3334243054885821e-06, + "loss": 1.3727, + "step": 139500 + }, + { + "epoch": 1.68, + "grad_norm": 9.443106547526513, + "learning_rate": 1.3331328718351867e-06, + "loss": 1.0096, + "step": 139503 + }, + { + "epoch": 1.68, + "grad_norm": 9.885751358489669, + "learning_rate": 1.3328414677586844e-06, + "loss": 1.3799, + "step": 139506 + }, + { + "epoch": 1.68, + "grad_norm": 13.134146753944858, + "learning_rate": 1.332550093260072e-06, + "loss": 0.931, + "step": 139509 + }, + { + "epoch": 1.68, + "grad_norm": 7.453776367300811, + "learning_rate": 1.3322587483403448e-06, + "loss": 1.1824, + "step": 139512 + }, + { + "epoch": 1.68, + "grad_norm": 5.720206618259823, + "learning_rate": 1.3319674330004983e-06, + "loss": 1.2389, + "step": 139515 + }, + { + "epoch": 1.68, + "grad_norm": 6.18227350312147, + "learning_rate": 1.3316761472415252e-06, + "loss": 1.0516, + "step": 139518 + }, + { + "epoch": 1.68, + "grad_norm": 3.515860512722389, + "learning_rate": 1.3313848910644167e-06, + "loss": 1.4325, + "step": 139521 + }, + { + "epoch": 1.68, + "grad_norm": 2.6529698740589605, + "learning_rate": 1.3310936644701689e-06, + "loss": 0.8699, + "step": 139524 + }, + { + "epoch": 1.68, + "grad_norm": 17.13609793422027, + "learning_rate": 1.3308024674597765e-06, + "loss": 1.2822, + "step": 139527 + }, + { + "epoch": 1.68, + "grad_norm": 4.995540711746565, + "learning_rate": 1.330511300034234e-06, + "loss": 1.0577, + "step": 139530 + }, + { + "epoch": 1.68, + "grad_norm": 7.718681430668422, + "learning_rate": 1.3302201621945322e-06, + "loss": 1.4299, + "step": 139533 + }, + { + "epoch": 1.68, + "grad_norm": 9.220259221278578, + "learning_rate": 1.3299290539416654e-06, + "loss": 1.2381, + "step": 139536 + }, + { + "epoch": 1.68, + "grad_norm": 10.168738942241774, + "learning_rate": 1.329637975276631e-06, + "loss": 1.3212, + "step": 139539 + }, + { + "epoch": 1.68, + "grad_norm": 11.849813764641448, + "learning_rate": 1.3293469262004176e-06, + "loss": 1.1182, + "step": 139542 + }, + { + "epoch": 1.68, + "grad_norm": 8.932654917515586, + "learning_rate": 1.3290559067140185e-06, + "loss": 0.8412, + "step": 139545 + }, + { + "epoch": 1.68, + "grad_norm": 21.592157831762663, + "learning_rate": 1.3287649168184292e-06, + "loss": 1.2754, + "step": 139548 + }, + { + "epoch": 1.68, + "grad_norm": 5.975796974601363, + "learning_rate": 1.328473956514641e-06, + "loss": 1.2086, + "step": 139551 + }, + { + "epoch": 1.68, + "grad_norm": 4.906118290689662, + "learning_rate": 1.3281830258036477e-06, + "loss": 1.1757, + "step": 139554 + }, + { + "epoch": 1.68, + "grad_norm": 9.28418616515614, + "learning_rate": 1.327892124686445e-06, + "loss": 1.0733, + "step": 139557 + }, + { + "epoch": 1.68, + "grad_norm": 7.370555688783083, + "learning_rate": 1.3276012531640226e-06, + "loss": 1.1551, + "step": 139560 + }, + { + "epoch": 1.68, + "grad_norm": 2.720565181846776, + "learning_rate": 1.3273104112373703e-06, + "loss": 0.8422, + "step": 139563 + }, + { + "epoch": 1.68, + "grad_norm": 11.18526985576036, + "learning_rate": 1.3270195989074852e-06, + "loss": 0.8571, + "step": 139566 + }, + { + "epoch": 1.68, + "grad_norm": 4.965463785354255, + "learning_rate": 1.326728816175359e-06, + "loss": 1.1339, + "step": 139569 + }, + { + "epoch": 1.68, + "grad_norm": 7.323728657292627, + "learning_rate": 1.3264380630419815e-06, + "loss": 1.0464, + "step": 139572 + }, + { + "epoch": 1.68, + "grad_norm": 10.041709856676402, + "learning_rate": 1.3261473395083468e-06, + "loss": 1.2874, + "step": 139575 + }, + { + "epoch": 1.68, + "grad_norm": 5.462670476093111, + "learning_rate": 1.3258566455754473e-06, + "loss": 0.8476, + "step": 139578 + }, + { + "epoch": 1.68, + "grad_norm": 8.064517790500917, + "learning_rate": 1.3255659812442767e-06, + "loss": 1.276, + "step": 139581 + }, + { + "epoch": 1.68, + "grad_norm": 6.880223049974404, + "learning_rate": 1.3252753465158242e-06, + "loss": 0.9573, + "step": 139584 + }, + { + "epoch": 1.68, + "grad_norm": 11.560829157926758, + "learning_rate": 1.324984741391081e-06, + "loss": 1.1525, + "step": 139587 + }, + { + "epoch": 1.68, + "grad_norm": 14.53364120511734, + "learning_rate": 1.3246941658710388e-06, + "loss": 1.3631, + "step": 139590 + }, + { + "epoch": 1.68, + "grad_norm": 10.386485442122156, + "learning_rate": 1.3244036199566922e-06, + "loss": 1.1871, + "step": 139593 + }, + { + "epoch": 1.68, + "grad_norm": 18.71529547517738, + "learning_rate": 1.3241131036490328e-06, + "loss": 1.1896, + "step": 139596 + }, + { + "epoch": 1.68, + "grad_norm": 10.364286281341009, + "learning_rate": 1.3238226169490475e-06, + "loss": 0.9395, + "step": 139599 + }, + { + "epoch": 1.68, + "grad_norm": 13.354431688700513, + "learning_rate": 1.3235321598577312e-06, + "loss": 1.2034, + "step": 139602 + }, + { + "epoch": 1.68, + "grad_norm": 5.204909318794653, + "learning_rate": 1.323241732376076e-06, + "loss": 1.5139, + "step": 139605 + }, + { + "epoch": 1.68, + "grad_norm": 19.199017594261516, + "learning_rate": 1.3229513345050694e-06, + "loss": 1.9305, + "step": 139608 + }, + { + "epoch": 1.68, + "grad_norm": 14.802096987478453, + "learning_rate": 1.3226609662457068e-06, + "loss": 0.9124, + "step": 139611 + }, + { + "epoch": 1.68, + "grad_norm": 5.847224908025029, + "learning_rate": 1.3223706275989746e-06, + "loss": 1.3387, + "step": 139614 + }, + { + "epoch": 1.68, + "grad_norm": 4.527066987006509, + "learning_rate": 1.3220803185658658e-06, + "loss": 1.1456, + "step": 139617 + }, + { + "epoch": 1.68, + "grad_norm": 19.244594835364666, + "learning_rate": 1.3217900391473704e-06, + "loss": 1.1483, + "step": 139620 + }, + { + "epoch": 1.68, + "grad_norm": 9.743661942373613, + "learning_rate": 1.3214997893444825e-06, + "loss": 1.1895, + "step": 139623 + }, + { + "epoch": 1.68, + "grad_norm": 7.911235773569769, + "learning_rate": 1.3212095691581906e-06, + "loss": 1.1958, + "step": 139626 + }, + { + "epoch": 1.68, + "grad_norm": 7.267352794267263, + "learning_rate": 1.3209193785894815e-06, + "loss": 1.2883, + "step": 139629 + }, + { + "epoch": 1.68, + "grad_norm": 24.2670554297637, + "learning_rate": 1.3206292176393476e-06, + "loss": 1.1156, + "step": 139632 + }, + { + "epoch": 1.68, + "grad_norm": 12.790821687751611, + "learning_rate": 1.3203390863087806e-06, + "loss": 1.001, + "step": 139635 + }, + { + "epoch": 1.68, + "grad_norm": 7.834144657593644, + "learning_rate": 1.320048984598772e-06, + "loss": 1.3479, + "step": 139638 + }, + { + "epoch": 1.68, + "grad_norm": 2.8900907084138687, + "learning_rate": 1.3197589125103082e-06, + "loss": 0.976, + "step": 139641 + }, + { + "epoch": 1.68, + "grad_norm": 11.360747010430105, + "learning_rate": 1.3194688700443792e-06, + "loss": 1.3546, + "step": 139644 + }, + { + "epoch": 1.68, + "grad_norm": 6.060483620523744, + "learning_rate": 1.3191788572019793e-06, + "loss": 1.2932, + "step": 139647 + }, + { + "epoch": 1.68, + "grad_norm": 10.386338649337947, + "learning_rate": 1.3188888739840955e-06, + "loss": 1.0194, + "step": 139650 + }, + { + "epoch": 1.68, + "grad_norm": 8.759319420340859, + "learning_rate": 1.318598920391715e-06, + "loss": 1.1345, + "step": 139653 + }, + { + "epoch": 1.68, + "grad_norm": 8.23032218948027, + "learning_rate": 1.3183089964258278e-06, + "loss": 1.1603, + "step": 139656 + }, + { + "epoch": 1.68, + "grad_norm": 13.299617787608335, + "learning_rate": 1.3180191020874267e-06, + "loss": 1.2673, + "step": 139659 + }, + { + "epoch": 1.68, + "grad_norm": 7.993508168209914, + "learning_rate": 1.3177292373774976e-06, + "loss": 0.9604, + "step": 139662 + }, + { + "epoch": 1.68, + "grad_norm": 15.573719456218354, + "learning_rate": 1.3174394022970348e-06, + "loss": 1.158, + "step": 139665 + }, + { + "epoch": 1.68, + "grad_norm": 14.6175552543224, + "learning_rate": 1.3171495968470228e-06, + "loss": 0.6784, + "step": 139668 + }, + { + "epoch": 1.68, + "grad_norm": 20.851002233781536, + "learning_rate": 1.31685982102845e-06, + "loss": 1.3433, + "step": 139671 + }, + { + "epoch": 1.68, + "grad_norm": 9.764816016514917, + "learning_rate": 1.3165700748423072e-06, + "loss": 1.425, + "step": 139674 + }, + { + "epoch": 1.68, + "grad_norm": 75.83970095551543, + "learning_rate": 1.3162803582895844e-06, + "loss": 1.7872, + "step": 139677 + }, + { + "epoch": 1.68, + "grad_norm": 12.173714990204266, + "learning_rate": 1.3159906713712678e-06, + "loss": 1.0318, + "step": 139680 + }, + { + "epoch": 1.68, + "grad_norm": 9.408217781176882, + "learning_rate": 1.3157010140883464e-06, + "loss": 0.982, + "step": 139683 + }, + { + "epoch": 1.68, + "grad_norm": 46.41951290862347, + "learning_rate": 1.3154113864418094e-06, + "loss": 1.3413, + "step": 139686 + }, + { + "epoch": 1.68, + "grad_norm": 5.432231421660473, + "learning_rate": 1.3151217884326472e-06, + "loss": 1.1477, + "step": 139689 + }, + { + "epoch": 1.68, + "grad_norm": 16.965550743020444, + "learning_rate": 1.3148322200618458e-06, + "loss": 0.8706, + "step": 139692 + }, + { + "epoch": 1.68, + "grad_norm": 3.0067136295192123, + "learning_rate": 1.3145426813303918e-06, + "loss": 1.099, + "step": 139695 + }, + { + "epoch": 1.68, + "grad_norm": 7.275876822688863, + "learning_rate": 1.314253172239276e-06, + "loss": 1.4219, + "step": 139698 + }, + { + "epoch": 1.68, + "grad_norm": 9.363521385793929, + "learning_rate": 1.3139636927894839e-06, + "loss": 0.9436, + "step": 139701 + }, + { + "epoch": 1.68, + "grad_norm": 8.808985936850535, + "learning_rate": 1.3136742429820082e-06, + "loss": 1.06, + "step": 139704 + }, + { + "epoch": 1.68, + "grad_norm": 13.012371369270268, + "learning_rate": 1.3133848228178315e-06, + "loss": 1.158, + "step": 139707 + }, + { + "epoch": 1.68, + "grad_norm": 5.072436362391431, + "learning_rate": 1.313095432297945e-06, + "loss": 1.127, + "step": 139710 + }, + { + "epoch": 1.68, + "grad_norm": 9.26526759665893, + "learning_rate": 1.3128060714233338e-06, + "loss": 1.0451, + "step": 139713 + }, + { + "epoch": 1.68, + "grad_norm": 19.971101732022163, + "learning_rate": 1.3125167401949856e-06, + "loss": 1.4016, + "step": 139716 + }, + { + "epoch": 1.68, + "grad_norm": 13.47318402258214, + "learning_rate": 1.31222743861389e-06, + "loss": 1.32, + "step": 139719 + }, + { + "epoch": 1.68, + "grad_norm": 7.66415604347768, + "learning_rate": 1.3119381666810315e-06, + "loss": 1.3284, + "step": 139722 + }, + { + "epoch": 1.68, + "grad_norm": 2.842431748304692, + "learning_rate": 1.3116489243973985e-06, + "loss": 1.077, + "step": 139725 + }, + { + "epoch": 1.68, + "grad_norm": 2.5709652284137614, + "learning_rate": 1.3113597117639777e-06, + "loss": 1.1818, + "step": 139728 + }, + { + "epoch": 1.68, + "grad_norm": 10.35312457994962, + "learning_rate": 1.3110705287817594e-06, + "loss": 1.0059, + "step": 139731 + }, + { + "epoch": 1.68, + "grad_norm": 3.9084521133174865, + "learning_rate": 1.3107813754517274e-06, + "loss": 1.2974, + "step": 139734 + }, + { + "epoch": 1.68, + "grad_norm": 3.99453937901252, + "learning_rate": 1.3104922517748663e-06, + "loss": 1.1445, + "step": 139737 + }, + { + "epoch": 1.68, + "grad_norm": 2.08944525086227, + "learning_rate": 1.3102031577521658e-06, + "loss": 0.9229, + "step": 139740 + }, + { + "epoch": 1.68, + "grad_norm": 8.112642990955415, + "learning_rate": 1.3099140933846111e-06, + "loss": 1.1655, + "step": 139743 + }, + { + "epoch": 1.68, + "grad_norm": 14.046192823391767, + "learning_rate": 1.309625058673193e-06, + "loss": 1.3528, + "step": 139746 + }, + { + "epoch": 1.68, + "grad_norm": 5.323236920577199, + "learning_rate": 1.3093360536188905e-06, + "loss": 1.1815, + "step": 139749 + }, + { + "epoch": 1.68, + "grad_norm": 2.910927056114616, + "learning_rate": 1.3090470782226972e-06, + "loss": 1.1565, + "step": 139752 + }, + { + "epoch": 1.68, + "grad_norm": 3.544805663407326, + "learning_rate": 1.3087581324855924e-06, + "loss": 1.0795, + "step": 139755 + }, + { + "epoch": 1.68, + "grad_norm": 9.677658630564768, + "learning_rate": 1.3084692164085689e-06, + "loss": 0.9729, + "step": 139758 + }, + { + "epoch": 1.68, + "grad_norm": 4.01843272371781, + "learning_rate": 1.3081803299926054e-06, + "loss": 1.1557, + "step": 139761 + }, + { + "epoch": 1.68, + "grad_norm": 10.328902103224832, + "learning_rate": 1.3078914732386927e-06, + "loss": 1.6497, + "step": 139764 + }, + { + "epoch": 1.68, + "grad_norm": 8.766067849852167, + "learning_rate": 1.3076026461478164e-06, + "loss": 0.7404, + "step": 139767 + }, + { + "epoch": 1.68, + "grad_norm": 4.633032431137065, + "learning_rate": 1.3073138487209601e-06, + "loss": 0.8174, + "step": 139770 + }, + { + "epoch": 1.68, + "grad_norm": 10.1401444500999, + "learning_rate": 1.3070250809591135e-06, + "loss": 1.2568, + "step": 139773 + }, + { + "epoch": 1.68, + "grad_norm": 59.5104679051288, + "learning_rate": 1.3067363428632585e-06, + "loss": 0.9601, + "step": 139776 + }, + { + "epoch": 1.68, + "grad_norm": 22.816489247037886, + "learning_rate": 1.3064476344343791e-06, + "loss": 1.1583, + "step": 139779 + }, + { + "epoch": 1.68, + "grad_norm": 14.4020200804201, + "learning_rate": 1.306158955673462e-06, + "loss": 1.0901, + "step": 139782 + }, + { + "epoch": 1.68, + "grad_norm": 8.620461145801508, + "learning_rate": 1.3058703065814948e-06, + "loss": 1.0429, + "step": 139785 + }, + { + "epoch": 1.68, + "grad_norm": 11.484198094879416, + "learning_rate": 1.3055816871594595e-06, + "loss": 1.2883, + "step": 139788 + }, + { + "epoch": 1.68, + "grad_norm": 7.726094745069652, + "learning_rate": 1.3052930974083422e-06, + "loss": 1.1734, + "step": 139791 + }, + { + "epoch": 1.68, + "grad_norm": 3.1716439974310466, + "learning_rate": 1.3050045373291297e-06, + "loss": 1.0654, + "step": 139794 + }, + { + "epoch": 1.68, + "grad_norm": 13.426696541002322, + "learning_rate": 1.3047160069228015e-06, + "loss": 1.2027, + "step": 139797 + }, + { + "epoch": 1.68, + "grad_norm": 9.213519808741324, + "learning_rate": 1.3044275061903488e-06, + "loss": 1.0565, + "step": 139800 + }, + { + "epoch": 1.68, + "grad_norm": 4.066152481361087, + "learning_rate": 1.304139035132751e-06, + "loss": 1.27, + "step": 139803 + }, + { + "epoch": 1.68, + "grad_norm": 2.949254273315313, + "learning_rate": 1.3038505937509938e-06, + "loss": 1.4725, + "step": 139806 + }, + { + "epoch": 1.68, + "grad_norm": 13.94986912520336, + "learning_rate": 1.303562182046063e-06, + "loss": 1.1365, + "step": 139809 + }, + { + "epoch": 1.68, + "grad_norm": 9.66002537835292, + "learning_rate": 1.3032738000189438e-06, + "loss": 0.9824, + "step": 139812 + }, + { + "epoch": 1.68, + "grad_norm": 10.205905520128674, + "learning_rate": 1.302985447670616e-06, + "loss": 1.2404, + "step": 139815 + }, + { + "epoch": 1.68, + "grad_norm": 9.48937093479787, + "learning_rate": 1.3026971250020681e-06, + "loss": 1.2055, + "step": 139818 + }, + { + "epoch": 1.68, + "grad_norm": 7.949177007684527, + "learning_rate": 1.3024088320142802e-06, + "loss": 1.4896, + "step": 139821 + }, + { + "epoch": 1.68, + "grad_norm": 9.875234787876902, + "learning_rate": 1.3021205687082395e-06, + "loss": 1.138, + "step": 139824 + }, + { + "epoch": 1.68, + "grad_norm": 3.1411639959465356, + "learning_rate": 1.3018323350849293e-06, + "loss": 0.9667, + "step": 139827 + }, + { + "epoch": 1.68, + "grad_norm": 17.704451548421783, + "learning_rate": 1.301544131145329e-06, + "loss": 1.2497, + "step": 139830 + }, + { + "epoch": 1.68, + "grad_norm": 13.246056054734447, + "learning_rate": 1.3012559568904271e-06, + "loss": 1.2426, + "step": 139833 + }, + { + "epoch": 1.68, + "grad_norm": 7.3118633768471835, + "learning_rate": 1.3009678123212076e-06, + "loss": 1.0005, + "step": 139836 + }, + { + "epoch": 1.68, + "grad_norm": 3.5852297120981773, + "learning_rate": 1.3006796974386481e-06, + "loss": 0.9308, + "step": 139839 + }, + { + "epoch": 1.68, + "grad_norm": 7.9330648007003255, + "learning_rate": 1.3003916122437387e-06, + "loss": 1.5232, + "step": 139842 + }, + { + "epoch": 1.68, + "grad_norm": 12.888361689003421, + "learning_rate": 1.3001035567374566e-06, + "loss": 0.8893, + "step": 139845 + }, + { + "epoch": 1.68, + "grad_norm": 5.399256387690008, + "learning_rate": 1.2998155309207871e-06, + "loss": 1.186, + "step": 139848 + }, + { + "epoch": 1.68, + "grad_norm": 11.767625512349731, + "learning_rate": 1.2995275347947134e-06, + "loss": 1.3106, + "step": 139851 + }, + { + "epoch": 1.68, + "grad_norm": 5.266043877438186, + "learning_rate": 1.299239568360221e-06, + "loss": 1.0575, + "step": 139854 + }, + { + "epoch": 1.68, + "grad_norm": 9.468650453543937, + "learning_rate": 1.298951631618287e-06, + "loss": 1.3171, + "step": 139857 + }, + { + "epoch": 1.68, + "grad_norm": 4.343623277452967, + "learning_rate": 1.2986637245698997e-06, + "loss": 1.1756, + "step": 139860 + }, + { + "epoch": 1.68, + "grad_norm": 3.2040634131858545, + "learning_rate": 1.298375847216037e-06, + "loss": 1.0867, + "step": 139863 + }, + { + "epoch": 1.68, + "grad_norm": 5.2897019547414095, + "learning_rate": 1.2980879995576845e-06, + "loss": 1.2665, + "step": 139866 + }, + { + "epoch": 1.68, + "grad_norm": 9.362879301572363, + "learning_rate": 1.2978001815958218e-06, + "loss": 1.1255, + "step": 139869 + }, + { + "epoch": 1.68, + "grad_norm": 43.6727080392476, + "learning_rate": 1.2975123933314326e-06, + "loss": 1.3047, + "step": 139872 + }, + { + "epoch": 1.68, + "grad_norm": 3.679005648304148, + "learning_rate": 1.2972246347654982e-06, + "loss": 1.0261, + "step": 139875 + }, + { + "epoch": 1.68, + "grad_norm": 35.638509356857504, + "learning_rate": 1.2969369058990045e-06, + "loss": 0.9998, + "step": 139878 + }, + { + "epoch": 1.68, + "grad_norm": 8.248216216449435, + "learning_rate": 1.2966492067329272e-06, + "loss": 1.154, + "step": 139881 + }, + { + "epoch": 1.68, + "grad_norm": 7.274886822222085, + "learning_rate": 1.2963615372682548e-06, + "loss": 0.9771, + "step": 139884 + }, + { + "epoch": 1.68, + "grad_norm": 6.241067647811795, + "learning_rate": 1.2960738975059616e-06, + "loss": 0.9204, + "step": 139887 + }, + { + "epoch": 1.68, + "grad_norm": 9.965962106235148, + "learning_rate": 1.295786287447034e-06, + "loss": 1.0033, + "step": 139890 + }, + { + "epoch": 1.68, + "grad_norm": 15.625057341584633, + "learning_rate": 1.2954987070924551e-06, + "loss": 1.3343, + "step": 139893 + }, + { + "epoch": 1.68, + "grad_norm": 7.863878198341739, + "learning_rate": 1.2952111564432012e-06, + "loss": 0.9745, + "step": 139896 + }, + { + "epoch": 1.68, + "grad_norm": 6.786411340361074, + "learning_rate": 1.294923635500256e-06, + "loss": 1.2841, + "step": 139899 + }, + { + "epoch": 1.68, + "grad_norm": 3.5068092702053764, + "learning_rate": 1.2946361442646028e-06, + "loss": 1.4661, + "step": 139902 + }, + { + "epoch": 1.68, + "grad_norm": 5.758271831329018, + "learning_rate": 1.2943486827372188e-06, + "loss": 0.9376, + "step": 139905 + }, + { + "epoch": 1.68, + "grad_norm": 4.36071846519066, + "learning_rate": 1.2940612509190897e-06, + "loss": 0.9994, + "step": 139908 + }, + { + "epoch": 1.68, + "grad_norm": 5.94914949229433, + "learning_rate": 1.2937738488111905e-06, + "loss": 1.0059, + "step": 139911 + }, + { + "epoch": 1.68, + "grad_norm": 18.26359730410203, + "learning_rate": 1.293486476414506e-06, + "loss": 1.2182, + "step": 139914 + }, + { + "epoch": 1.68, + "grad_norm": 5.188903516732757, + "learning_rate": 1.2931991337300154e-06, + "loss": 0.8452, + "step": 139917 + }, + { + "epoch": 1.68, + "grad_norm": 5.135849223513436, + "learning_rate": 1.2929118207587033e-06, + "loss": 1.0397, + "step": 139920 + }, + { + "epoch": 1.68, + "grad_norm": 16.307835552126313, + "learning_rate": 1.2926245375015433e-06, + "loss": 1.2054, + "step": 139923 + }, + { + "epoch": 1.68, + "grad_norm": 4.77977040526336, + "learning_rate": 1.2923372839595227e-06, + "loss": 0.8737, + "step": 139926 + }, + { + "epoch": 1.68, + "grad_norm": 4.835456890342518, + "learning_rate": 1.292050060133616e-06, + "loss": 1.1732, + "step": 139929 + }, + { + "epoch": 1.68, + "grad_norm": 17.874979483873727, + "learning_rate": 1.291762866024806e-06, + "loss": 1.3093, + "step": 139932 + }, + { + "epoch": 1.68, + "grad_norm": 6.534052042454962, + "learning_rate": 1.291475701634074e-06, + "loss": 1.0791, + "step": 139935 + }, + { + "epoch": 1.68, + "grad_norm": 6.989282793640998, + "learning_rate": 1.291188566962397e-06, + "loss": 1.1095, + "step": 139938 + }, + { + "epoch": 1.68, + "grad_norm": 8.365925539632544, + "learning_rate": 1.2909014620107563e-06, + "loss": 1.314, + "step": 139941 + }, + { + "epoch": 1.68, + "grad_norm": 3.6012772781539266, + "learning_rate": 1.2906143867801346e-06, + "loss": 1.4002, + "step": 139944 + }, + { + "epoch": 1.68, + "grad_norm": 4.4629337701775755, + "learning_rate": 1.290327341271509e-06, + "loss": 1.3672, + "step": 139947 + }, + { + "epoch": 1.68, + "grad_norm": 8.038229854129627, + "learning_rate": 1.2900403254858562e-06, + "loss": 0.969, + "step": 139950 + }, + { + "epoch": 1.68, + "grad_norm": 7.370463669280711, + "learning_rate": 1.2897533394241578e-06, + "loss": 1.0873, + "step": 139953 + }, + { + "epoch": 1.68, + "grad_norm": 8.135633224139424, + "learning_rate": 1.2894663830873954e-06, + "loss": 0.9212, + "step": 139956 + }, + { + "epoch": 1.68, + "grad_norm": 28.707489325501637, + "learning_rate": 1.289179456476546e-06, + "loss": 0.8524, + "step": 139959 + }, + { + "epoch": 1.68, + "grad_norm": 10.45886075470483, + "learning_rate": 1.2888925595925917e-06, + "loss": 1.3425, + "step": 139962 + }, + { + "epoch": 1.68, + "grad_norm": 6.884061072715826, + "learning_rate": 1.2886056924365065e-06, + "loss": 1.2026, + "step": 139965 + }, + { + "epoch": 1.68, + "grad_norm": 9.449000433169179, + "learning_rate": 1.2883188550092751e-06, + "loss": 0.951, + "step": 139968 + }, + { + "epoch": 1.68, + "grad_norm": 3.3412206031904956, + "learning_rate": 1.2880320473118713e-06, + "loss": 1.2777, + "step": 139971 + }, + { + "epoch": 1.68, + "grad_norm": 4.540306791067584, + "learning_rate": 1.2877452693452785e-06, + "loss": 1.3429, + "step": 139974 + }, + { + "epoch": 1.68, + "grad_norm": 6.777991932078167, + "learning_rate": 1.2874585211104706e-06, + "loss": 1.3491, + "step": 139977 + }, + { + "epoch": 1.68, + "grad_norm": 9.837815837451256, + "learning_rate": 1.287171802608429e-06, + "loss": 1.0237, + "step": 139980 + }, + { + "epoch": 1.68, + "grad_norm": 11.6574966129412, + "learning_rate": 1.2868851138401307e-06, + "loss": 0.9904, + "step": 139983 + }, + { + "epoch": 1.68, + "grad_norm": 9.43378032738044, + "learning_rate": 1.286598454806558e-06, + "loss": 1.1161, + "step": 139986 + }, + { + "epoch": 1.68, + "grad_norm": 3.426884966240876, + "learning_rate": 1.286311825508686e-06, + "loss": 1.4852, + "step": 139989 + }, + { + "epoch": 1.68, + "grad_norm": 11.543116340445387, + "learning_rate": 1.2860252259474904e-06, + "loss": 1.0842, + "step": 139992 + }, + { + "epoch": 1.68, + "grad_norm": 9.175827285305303, + "learning_rate": 1.2857386561239528e-06, + "loss": 0.9941, + "step": 139995 + }, + { + "epoch": 1.68, + "grad_norm": 21.23066449309731, + "learning_rate": 1.2854521160390499e-06, + "loss": 1.3779, + "step": 139998 + }, + { + "epoch": 1.68, + "grad_norm": 18.4415501560477, + "learning_rate": 1.2851656056937611e-06, + "loss": 1.2726, + "step": 140001 + }, + { + "epoch": 1.68, + "grad_norm": 7.034652466362882, + "learning_rate": 1.2848791250890625e-06, + "loss": 1.1057, + "step": 140004 + }, + { + "epoch": 1.68, + "grad_norm": 9.589993455610065, + "learning_rate": 1.284592674225932e-06, + "loss": 1.1764, + "step": 140007 + }, + { + "epoch": 1.68, + "grad_norm": 13.003963469570117, + "learning_rate": 1.2843062531053485e-06, + "loss": 1.2346, + "step": 140010 + }, + { + "epoch": 1.68, + "grad_norm": 10.903067740452956, + "learning_rate": 1.2840198617282872e-06, + "loss": 1.2143, + "step": 140013 + }, + { + "epoch": 1.68, + "grad_norm": 3.8260545470948157, + "learning_rate": 1.2837335000957286e-06, + "loss": 1.4091, + "step": 140016 + }, + { + "epoch": 1.68, + "grad_norm": 4.946482554514653, + "learning_rate": 1.2834471682086459e-06, + "loss": 1.0376, + "step": 140019 + }, + { + "epoch": 1.68, + "grad_norm": 9.857570850522006, + "learning_rate": 1.2831608660680184e-06, + "loss": 0.9725, + "step": 140022 + }, + { + "epoch": 1.68, + "grad_norm": 12.42848144893216, + "learning_rate": 1.2828745936748221e-06, + "loss": 0.8983, + "step": 140025 + }, + { + "epoch": 1.68, + "grad_norm": 14.815413578113601, + "learning_rate": 1.2825883510300385e-06, + "loss": 0.9377, + "step": 140028 + }, + { + "epoch": 1.68, + "grad_norm": 5.842876021522767, + "learning_rate": 1.28230213813464e-06, + "loss": 1.0535, + "step": 140031 + }, + { + "epoch": 1.68, + "grad_norm": 4.44420066213241, + "learning_rate": 1.2820159549896015e-06, + "loss": 1.1522, + "step": 140034 + }, + { + "epoch": 1.68, + "grad_norm": 3.326316082277267, + "learning_rate": 1.2817298015959035e-06, + "loss": 1.1418, + "step": 140037 + }, + { + "epoch": 1.68, + "grad_norm": 4.059336889299164, + "learning_rate": 1.2814436779545215e-06, + "loss": 1.0413, + "step": 140040 + }, + { + "epoch": 1.68, + "grad_norm": 3.9612614191639164, + "learning_rate": 1.2811575840664326e-06, + "loss": 1.0499, + "step": 140043 + }, + { + "epoch": 1.68, + "grad_norm": 19.83031336352904, + "learning_rate": 1.2808715199326117e-06, + "loss": 0.9568, + "step": 140046 + }, + { + "epoch": 1.68, + "grad_norm": 8.005556450043116, + "learning_rate": 1.2805854855540346e-06, + "loss": 1.063, + "step": 140049 + }, + { + "epoch": 1.68, + "grad_norm": 6.668210004483128, + "learning_rate": 1.2802994809316815e-06, + "loss": 0.9048, + "step": 140052 + }, + { + "epoch": 1.68, + "grad_norm": 13.222659229196065, + "learning_rate": 1.2800135060665242e-06, + "loss": 0.9694, + "step": 140055 + }, + { + "epoch": 1.68, + "grad_norm": 17.0928493884857, + "learning_rate": 1.2797275609595395e-06, + "loss": 1.142, + "step": 140058 + }, + { + "epoch": 1.68, + "grad_norm": 17.595518450319116, + "learning_rate": 1.279441645611702e-06, + "loss": 1.5301, + "step": 140061 + }, + { + "epoch": 1.68, + "grad_norm": 3.141680494761098, + "learning_rate": 1.27915576002399e-06, + "loss": 1.4076, + "step": 140064 + }, + { + "epoch": 1.68, + "grad_norm": 12.956343278650802, + "learning_rate": 1.2788699041973773e-06, + "loss": 1.3713, + "step": 140067 + }, + { + "epoch": 1.68, + "grad_norm": 9.236638797165657, + "learning_rate": 1.278584078132843e-06, + "loss": 1.0905, + "step": 140070 + }, + { + "epoch": 1.68, + "grad_norm": 7.105967433452802, + "learning_rate": 1.2782982818313604e-06, + "loss": 0.8935, + "step": 140073 + }, + { + "epoch": 1.68, + "grad_norm": 23.611598041399517, + "learning_rate": 1.2780125152939017e-06, + "loss": 0.9275, + "step": 140076 + }, + { + "epoch": 1.68, + "grad_norm": 5.5367063396136, + "learning_rate": 1.2777267785214443e-06, + "loss": 1.2181, + "step": 140079 + }, + { + "epoch": 1.68, + "grad_norm": 7.567388010886178, + "learning_rate": 1.2774410715149654e-06, + "loss": 1.3734, + "step": 140082 + }, + { + "epoch": 1.68, + "grad_norm": 5.032708085059177, + "learning_rate": 1.2771553942754367e-06, + "loss": 1.1526, + "step": 140085 + }, + { + "epoch": 1.68, + "grad_norm": 10.197997009679074, + "learning_rate": 1.2768697468038349e-06, + "loss": 1.2897, + "step": 140088 + }, + { + "epoch": 1.68, + "grad_norm": 6.710377741046894, + "learning_rate": 1.2765841291011337e-06, + "loss": 1.1017, + "step": 140091 + }, + { + "epoch": 1.68, + "grad_norm": 7.146156171256337, + "learning_rate": 1.2762985411683114e-06, + "loss": 1.1041, + "step": 140094 + }, + { + "epoch": 1.68, + "grad_norm": 7.154690322942663, + "learning_rate": 1.2760129830063406e-06, + "loss": 0.799, + "step": 140097 + }, + { + "epoch": 1.68, + "grad_norm": 7.486515997094461, + "learning_rate": 1.2757274546161923e-06, + "loss": 1.0556, + "step": 140100 + }, + { + "epoch": 1.68, + "grad_norm": 7.323650634839989, + "learning_rate": 1.275441955998844e-06, + "loss": 1.5108, + "step": 140103 + }, + { + "epoch": 1.68, + "grad_norm": 5.689259774435476, + "learning_rate": 1.275156487155269e-06, + "loss": 0.9666, + "step": 140106 + }, + { + "epoch": 1.68, + "grad_norm": 5.715588786537033, + "learning_rate": 1.2748710480864447e-06, + "loss": 1.0493, + "step": 140109 + }, + { + "epoch": 1.68, + "grad_norm": 6.189382967131646, + "learning_rate": 1.2745856387933408e-06, + "loss": 1.1309, + "step": 140112 + }, + { + "epoch": 1.68, + "grad_norm": 10.316751008556801, + "learning_rate": 1.274300259276936e-06, + "loss": 1.2245, + "step": 140115 + }, + { + "epoch": 1.68, + "grad_norm": 9.658507322785344, + "learning_rate": 1.2740149095381981e-06, + "loss": 1.279, + "step": 140118 + }, + { + "epoch": 1.68, + "grad_norm": 4.468409839150674, + "learning_rate": 1.2737295895781053e-06, + "loss": 1.4346, + "step": 140121 + }, + { + "epoch": 1.68, + "grad_norm": 4.034388187548656, + "learning_rate": 1.2734442993976314e-06, + "loss": 1.3058, + "step": 140124 + }, + { + "epoch": 1.68, + "grad_norm": 13.284518652393576, + "learning_rate": 1.2731590389977476e-06, + "loss": 1.2645, + "step": 140127 + }, + { + "epoch": 1.69, + "grad_norm": 11.37217938127246, + "learning_rate": 1.272873808379428e-06, + "loss": 1.2265, + "step": 140130 + }, + { + "epoch": 1.69, + "grad_norm": 4.8483371433894575, + "learning_rate": 1.2725886075436467e-06, + "loss": 0.7684, + "step": 140133 + }, + { + "epoch": 1.69, + "grad_norm": 11.139501239261213, + "learning_rate": 1.272303436491379e-06, + "loss": 1.1845, + "step": 140136 + }, + { + "epoch": 1.69, + "grad_norm": 5.787946755672937, + "learning_rate": 1.272018295223596e-06, + "loss": 0.8496, + "step": 140139 + }, + { + "epoch": 1.69, + "grad_norm": 19.649454098898413, + "learning_rate": 1.2717331837412682e-06, + "loss": 1.1571, + "step": 140142 + }, + { + "epoch": 1.69, + "grad_norm": 2.40130168049574, + "learning_rate": 1.2714481020453718e-06, + "loss": 1.4435, + "step": 140145 + }, + { + "epoch": 1.69, + "grad_norm": 10.364790450413564, + "learning_rate": 1.2711630501368788e-06, + "loss": 0.9588, + "step": 140148 + }, + { + "epoch": 1.69, + "grad_norm": 6.262979623100093, + "learning_rate": 1.2708780280167644e-06, + "loss": 1.6328, + "step": 140151 + }, + { + "epoch": 1.69, + "grad_norm": 6.5665879254390855, + "learning_rate": 1.2705930356859963e-06, + "loss": 0.8304, + "step": 140154 + }, + { + "epoch": 1.69, + "grad_norm": 11.410702117345243, + "learning_rate": 1.270308073145553e-06, + "loss": 1.1566, + "step": 140157 + }, + { + "epoch": 1.69, + "grad_norm": 15.299272531334676, + "learning_rate": 1.2700231403964015e-06, + "loss": 1.5432, + "step": 140160 + }, + { + "epoch": 1.69, + "grad_norm": 6.059147933520309, + "learning_rate": 1.2697382374395184e-06, + "loss": 0.9576, + "step": 140163 + }, + { + "epoch": 1.69, + "grad_norm": 16.385173918400255, + "learning_rate": 1.2694533642758711e-06, + "loss": 1.0343, + "step": 140166 + }, + { + "epoch": 1.69, + "grad_norm": 14.309510807473217, + "learning_rate": 1.2691685209064364e-06, + "loss": 1.2294, + "step": 140169 + }, + { + "epoch": 1.69, + "grad_norm": 6.283633339177465, + "learning_rate": 1.2688837073321836e-06, + "loss": 1.1573, + "step": 140172 + }, + { + "epoch": 1.69, + "grad_norm": 5.669216139741768, + "learning_rate": 1.2685989235540864e-06, + "loss": 0.9849, + "step": 140175 + }, + { + "epoch": 1.69, + "grad_norm": 31.998501517487988, + "learning_rate": 1.2683141695731171e-06, + "loss": 1.1812, + "step": 140178 + }, + { + "epoch": 1.69, + "grad_norm": 5.99331234840013, + "learning_rate": 1.2680294453902475e-06, + "loss": 1.3084, + "step": 140181 + }, + { + "epoch": 1.69, + "grad_norm": 14.504298601226408, + "learning_rate": 1.2677447510064455e-06, + "loss": 0.8831, + "step": 140184 + }, + { + "epoch": 1.69, + "grad_norm": 12.146961760730731, + "learning_rate": 1.2674600864226849e-06, + "loss": 1.0678, + "step": 140187 + }, + { + "epoch": 1.69, + "grad_norm": 8.49641124178921, + "learning_rate": 1.2671754516399403e-06, + "loss": 1.0939, + "step": 140190 + }, + { + "epoch": 1.69, + "grad_norm": 2.4528538183722612, + "learning_rate": 1.266890846659179e-06, + "loss": 0.9561, + "step": 140193 + }, + { + "epoch": 1.69, + "grad_norm": 11.952160469779347, + "learning_rate": 1.266606271481372e-06, + "loss": 0.9112, + "step": 140196 + }, + { + "epoch": 1.69, + "grad_norm": 13.134029621068711, + "learning_rate": 1.2663217261074934e-06, + "loss": 1.3531, + "step": 140199 + }, + { + "epoch": 1.69, + "grad_norm": 9.313309434678711, + "learning_rate": 1.2660372105385144e-06, + "loss": 1.2371, + "step": 140202 + }, + { + "epoch": 1.69, + "grad_norm": 9.976376037531569, + "learning_rate": 1.2657527247754043e-06, + "loss": 0.8721, + "step": 140205 + }, + { + "epoch": 1.69, + "grad_norm": 5.233428676244871, + "learning_rate": 1.2654682688191322e-06, + "loss": 1.2332, + "step": 140208 + }, + { + "epoch": 1.69, + "grad_norm": 9.119148233463184, + "learning_rate": 1.2651838426706708e-06, + "loss": 1.1459, + "step": 140211 + }, + { + "epoch": 1.69, + "grad_norm": 10.234129503922121, + "learning_rate": 1.2648994463309905e-06, + "loss": 1.146, + "step": 140214 + }, + { + "epoch": 1.69, + "grad_norm": 11.871657946951016, + "learning_rate": 1.2646150798010637e-06, + "loss": 0.8668, + "step": 140217 + }, + { + "epoch": 1.69, + "grad_norm": 5.1146419047039835, + "learning_rate": 1.2643307430818574e-06, + "loss": 1.2706, + "step": 140220 + }, + { + "epoch": 1.69, + "grad_norm": 16.636519494181616, + "learning_rate": 1.2640464361743465e-06, + "loss": 0.8463, + "step": 140223 + }, + { + "epoch": 1.69, + "grad_norm": 4.891434616753064, + "learning_rate": 1.2637621590794957e-06, + "loss": 1.4296, + "step": 140226 + }, + { + "epoch": 1.69, + "grad_norm": 9.890864937246933, + "learning_rate": 1.2634779117982776e-06, + "loss": 1.2416, + "step": 140229 + }, + { + "epoch": 1.69, + "grad_norm": 8.19746851958664, + "learning_rate": 1.2631936943316647e-06, + "loss": 1.2134, + "step": 140232 + }, + { + "epoch": 1.69, + "grad_norm": 10.827606708264218, + "learning_rate": 1.262909506680623e-06, + "loss": 1.2344, + "step": 140235 + }, + { + "epoch": 1.69, + "grad_norm": 8.697231584273194, + "learning_rate": 1.2626253488461237e-06, + "loss": 0.9783, + "step": 140238 + }, + { + "epoch": 1.69, + "grad_norm": 7.543222569304322, + "learning_rate": 1.2623412208291375e-06, + "loss": 0.8399, + "step": 140241 + }, + { + "epoch": 1.69, + "grad_norm": 5.8386479091770225, + "learning_rate": 1.2620571226306356e-06, + "loss": 1.1178, + "step": 140244 + }, + { + "epoch": 1.69, + "grad_norm": 6.379296760761012, + "learning_rate": 1.261773054251585e-06, + "loss": 1.2321, + "step": 140247 + }, + { + "epoch": 1.69, + "grad_norm": 8.056432771483088, + "learning_rate": 1.2614890156929528e-06, + "loss": 1.0756, + "step": 140250 + }, + { + "epoch": 1.69, + "grad_norm": 8.192145264190613, + "learning_rate": 1.2612050069557113e-06, + "loss": 1.13, + "step": 140253 + }, + { + "epoch": 1.69, + "grad_norm": 19.73796134659469, + "learning_rate": 1.26092102804083e-06, + "loss": 1.0236, + "step": 140256 + }, + { + "epoch": 1.69, + "grad_norm": 6.938777546060458, + "learning_rate": 1.2606370789492794e-06, + "loss": 1.2059, + "step": 140259 + }, + { + "epoch": 1.69, + "grad_norm": 41.37804427430313, + "learning_rate": 1.260353159682024e-06, + "loss": 1.2666, + "step": 140262 + }, + { + "epoch": 1.69, + "grad_norm": 16.511379180381535, + "learning_rate": 1.2600692702400374e-06, + "loss": 1.2972, + "step": 140265 + }, + { + "epoch": 1.69, + "grad_norm": 9.881805195006397, + "learning_rate": 1.2597854106242847e-06, + "loss": 1.1196, + "step": 140268 + }, + { + "epoch": 1.69, + "grad_norm": 9.495308649613325, + "learning_rate": 1.259501580835737e-06, + "loss": 1.3006, + "step": 140271 + }, + { + "epoch": 1.69, + "grad_norm": 11.929132668897108, + "learning_rate": 1.2592177808753614e-06, + "loss": 1.0271, + "step": 140274 + }, + { + "epoch": 1.69, + "grad_norm": 8.704518716524424, + "learning_rate": 1.2589340107441261e-06, + "loss": 1.0015, + "step": 140277 + }, + { + "epoch": 1.69, + "grad_norm": 14.630743759991399, + "learning_rate": 1.2586502704430016e-06, + "loss": 1.0005, + "step": 140280 + }, + { + "epoch": 1.69, + "grad_norm": 4.353571079261437, + "learning_rate": 1.2583665599729544e-06, + "loss": 1.1649, + "step": 140283 + }, + { + "epoch": 1.69, + "grad_norm": 10.860616631810446, + "learning_rate": 1.2580828793349553e-06, + "loss": 0.9174, + "step": 140286 + }, + { + "epoch": 1.69, + "grad_norm": 7.183242273951372, + "learning_rate": 1.2577992285299712e-06, + "loss": 1.1298, + "step": 140289 + }, + { + "epoch": 1.69, + "grad_norm": 2.670999369377744, + "learning_rate": 1.2575156075589668e-06, + "loss": 1.0939, + "step": 140292 + }, + { + "epoch": 1.69, + "grad_norm": 15.935925139683103, + "learning_rate": 1.2572320164229124e-06, + "loss": 1.1523, + "step": 140295 + }, + { + "epoch": 1.69, + "grad_norm": 10.41067802715153, + "learning_rate": 1.2569484551227784e-06, + "loss": 1.0026, + "step": 140298 + }, + { + "epoch": 1.69, + "grad_norm": 9.733103989289086, + "learning_rate": 1.2566649236595275e-06, + "loss": 1.3308, + "step": 140301 + }, + { + "epoch": 1.69, + "grad_norm": 5.533498839058174, + "learning_rate": 1.2563814220341309e-06, + "loss": 0.6946, + "step": 140304 + }, + { + "epoch": 1.69, + "grad_norm": 5.541063612866598, + "learning_rate": 1.2560979502475567e-06, + "loss": 1.3377, + "step": 140307 + }, + { + "epoch": 1.69, + "grad_norm": 7.080923856056347, + "learning_rate": 1.2558145083007679e-06, + "loss": 1.0459, + "step": 140310 + }, + { + "epoch": 1.69, + "grad_norm": 12.281895142136996, + "learning_rate": 1.2555310961947376e-06, + "loss": 1.0634, + "step": 140313 + }, + { + "epoch": 1.69, + "grad_norm": 2.7589086249313746, + "learning_rate": 1.2552477139304276e-06, + "loss": 1.051, + "step": 140316 + }, + { + "epoch": 1.69, + "grad_norm": 6.5700559146983215, + "learning_rate": 1.254964361508807e-06, + "loss": 1.1785, + "step": 140319 + }, + { + "epoch": 1.69, + "grad_norm": 9.73713534465738, + "learning_rate": 1.254681038930843e-06, + "loss": 1.3977, + "step": 140322 + }, + { + "epoch": 1.69, + "grad_norm": 5.641889928536944, + "learning_rate": 1.2543977461975055e-06, + "loss": 1.1406, + "step": 140325 + }, + { + "epoch": 1.69, + "grad_norm": 9.04667546978336, + "learning_rate": 1.2541144833097552e-06, + "loss": 1.4292, + "step": 140328 + }, + { + "epoch": 1.69, + "grad_norm": 5.8421581417770465, + "learning_rate": 1.2538312502685645e-06, + "loss": 1.2779, + "step": 140331 + }, + { + "epoch": 1.69, + "grad_norm": 9.9126430418035, + "learning_rate": 1.2535480470748951e-06, + "loss": 1.2247, + "step": 140334 + }, + { + "epoch": 1.69, + "grad_norm": 8.942405318720725, + "learning_rate": 1.253264873729716e-06, + "loss": 0.9594, + "step": 140337 + }, + { + "epoch": 1.69, + "grad_norm": 10.51376837794292, + "learning_rate": 1.2529817302339953e-06, + "loss": 1.2829, + "step": 140340 + }, + { + "epoch": 1.69, + "grad_norm": 4.1159406855166445, + "learning_rate": 1.2526986165886957e-06, + "loss": 1.2978, + "step": 140343 + }, + { + "epoch": 1.69, + "grad_norm": 10.239242285672397, + "learning_rate": 1.252415532794784e-06, + "loss": 1.176, + "step": 140346 + }, + { + "epoch": 1.69, + "grad_norm": 3.192459016553549, + "learning_rate": 1.2521324788532308e-06, + "loss": 1.1742, + "step": 140349 + }, + { + "epoch": 1.69, + "grad_norm": 17.866030065634803, + "learning_rate": 1.2518494547649972e-06, + "loss": 1.3736, + "step": 140352 + }, + { + "epoch": 1.69, + "grad_norm": 6.675128052006559, + "learning_rate": 1.2515664605310485e-06, + "loss": 1.2202, + "step": 140355 + }, + { + "epoch": 1.69, + "grad_norm": 19.05394382992732, + "learning_rate": 1.2512834961523523e-06, + "loss": 1.4457, + "step": 140358 + }, + { + "epoch": 1.69, + "grad_norm": 6.600653083655596, + "learning_rate": 1.2510005616298738e-06, + "loss": 1.147, + "step": 140361 + }, + { + "epoch": 1.69, + "grad_norm": 10.664907285160675, + "learning_rate": 1.2507176569645796e-06, + "loss": 1.0246, + "step": 140364 + }, + { + "epoch": 1.69, + "grad_norm": 5.914161710583063, + "learning_rate": 1.250434782157437e-06, + "loss": 1.1539, + "step": 140367 + }, + { + "epoch": 1.69, + "grad_norm": 10.664613106064259, + "learning_rate": 1.2501519372094062e-06, + "loss": 0.999, + "step": 140370 + }, + { + "epoch": 1.69, + "grad_norm": 4.368378026101666, + "learning_rate": 1.2498691221214576e-06, + "loss": 1.1801, + "step": 140373 + }, + { + "epoch": 1.69, + "grad_norm": 6.10256049266119, + "learning_rate": 1.2495863368945516e-06, + "loss": 1.2986, + "step": 140376 + }, + { + "epoch": 1.69, + "grad_norm": 32.6474845146908, + "learning_rate": 1.2493035815296573e-06, + "loss": 1.3747, + "step": 140379 + }, + { + "epoch": 1.69, + "grad_norm": 11.112064107871566, + "learning_rate": 1.2490208560277362e-06, + "loss": 1.3982, + "step": 140382 + }, + { + "epoch": 1.69, + "grad_norm": 30.65207558024702, + "learning_rate": 1.2487381603897552e-06, + "loss": 1.0603, + "step": 140385 + }, + { + "epoch": 1.69, + "grad_norm": 2.9869428124248762, + "learning_rate": 1.2484554946166782e-06, + "loss": 1.0571, + "step": 140388 + }, + { + "epoch": 1.69, + "grad_norm": 6.10695440812186, + "learning_rate": 1.2481728587094722e-06, + "loss": 1.1351, + "step": 140391 + }, + { + "epoch": 1.69, + "grad_norm": 7.967012622224283, + "learning_rate": 1.2478902526691005e-06, + "loss": 0.9736, + "step": 140394 + }, + { + "epoch": 1.69, + "grad_norm": 7.121587782194878, + "learning_rate": 1.247607676496524e-06, + "loss": 1.2717, + "step": 140397 + }, + { + "epoch": 1.69, + "grad_norm": 9.875248065758166, + "learning_rate": 1.2473251301927104e-06, + "loss": 1.3124, + "step": 140400 + }, + { + "epoch": 1.69, + "grad_norm": 6.10419122948267, + "learning_rate": 1.2470426137586234e-06, + "loss": 1.166, + "step": 140403 + }, + { + "epoch": 1.69, + "grad_norm": 6.004008576620991, + "learning_rate": 1.246760127195229e-06, + "loss": 1.2152, + "step": 140406 + }, + { + "epoch": 1.69, + "grad_norm": 3.5350332854378888, + "learning_rate": 1.2464776705034875e-06, + "loss": 1.1097, + "step": 140409 + }, + { + "epoch": 1.69, + "grad_norm": 7.131950843257388, + "learning_rate": 1.246195243684365e-06, + "loss": 1.0875, + "step": 140412 + }, + { + "epoch": 1.69, + "grad_norm": 11.263456602120009, + "learning_rate": 1.245912846738827e-06, + "loss": 1.1183, + "step": 140415 + }, + { + "epoch": 1.69, + "grad_norm": 10.28230303435971, + "learning_rate": 1.245630479667832e-06, + "loss": 1.3406, + "step": 140418 + }, + { + "epoch": 1.69, + "grad_norm": 6.577878999816881, + "learning_rate": 1.2453481424723501e-06, + "loss": 1.146, + "step": 140421 + }, + { + "epoch": 1.69, + "grad_norm": 22.340212071346254, + "learning_rate": 1.2450658351533396e-06, + "loss": 1.204, + "step": 140424 + }, + { + "epoch": 1.69, + "grad_norm": 12.110620814443834, + "learning_rate": 1.244783557711765e-06, + "loss": 0.9929, + "step": 140427 + }, + { + "epoch": 1.69, + "grad_norm": 11.817580182509216, + "learning_rate": 1.2445013101485914e-06, + "loss": 1.5789, + "step": 140430 + }, + { + "epoch": 1.69, + "grad_norm": 4.035833192248811, + "learning_rate": 1.2442190924647834e-06, + "loss": 0.9864, + "step": 140433 + }, + { + "epoch": 1.69, + "grad_norm": 2.7333123626583173, + "learning_rate": 1.2439369046612993e-06, + "loss": 1.1575, + "step": 140436 + }, + { + "epoch": 1.69, + "grad_norm": 9.557458850619852, + "learning_rate": 1.243654746739107e-06, + "loss": 1.2439, + "step": 140439 + }, + { + "epoch": 1.69, + "grad_norm": 8.176158687715347, + "learning_rate": 1.2433726186991635e-06, + "loss": 1.3116, + "step": 140442 + }, + { + "epoch": 1.69, + "grad_norm": 3.811238292624423, + "learning_rate": 1.243090520542437e-06, + "loss": 1.2585, + "step": 140445 + }, + { + "epoch": 1.69, + "grad_norm": 4.988645982913936, + "learning_rate": 1.2428084522698892e-06, + "loss": 0.9192, + "step": 140448 + }, + { + "epoch": 1.69, + "grad_norm": 9.082678240319224, + "learning_rate": 1.24252641388248e-06, + "loss": 1.31, + "step": 140451 + }, + { + "epoch": 1.69, + "grad_norm": 7.185301378409821, + "learning_rate": 1.2422444053811743e-06, + "loss": 1.0987, + "step": 140454 + }, + { + "epoch": 1.69, + "grad_norm": 2.7786253294633685, + "learning_rate": 1.241962426766935e-06, + "loss": 1.2665, + "step": 140457 + }, + { + "epoch": 1.69, + "grad_norm": 3.7660835788395053, + "learning_rate": 1.2416804780407243e-06, + "loss": 1.0205, + "step": 140460 + }, + { + "epoch": 1.69, + "grad_norm": 8.115733757849338, + "learning_rate": 1.2413985592035006e-06, + "loss": 0.8867, + "step": 140463 + }, + { + "epoch": 1.69, + "grad_norm": 10.118726879666609, + "learning_rate": 1.2411166702562293e-06, + "loss": 0.9467, + "step": 140466 + }, + { + "epoch": 1.69, + "grad_norm": 3.2243620075273696, + "learning_rate": 1.2408348111998714e-06, + "loss": 1.292, + "step": 140469 + }, + { + "epoch": 1.69, + "grad_norm": 11.41543964441869, + "learning_rate": 1.24055298203539e-06, + "loss": 1.3312, + "step": 140472 + }, + { + "epoch": 1.69, + "grad_norm": 7.755403392717695, + "learning_rate": 1.240271182763747e-06, + "loss": 1.2075, + "step": 140475 + }, + { + "epoch": 1.69, + "grad_norm": 6.201845164472456, + "learning_rate": 1.2399894133859025e-06, + "loss": 1.3123, + "step": 140478 + }, + { + "epoch": 1.69, + "grad_norm": 9.152158332915217, + "learning_rate": 1.2397076739028202e-06, + "loss": 1.0361, + "step": 140481 + }, + { + "epoch": 1.69, + "grad_norm": 12.19577876985585, + "learning_rate": 1.2394259643154583e-06, + "loss": 1.2811, + "step": 140484 + }, + { + "epoch": 1.69, + "grad_norm": 6.687471170997802, + "learning_rate": 1.2391442846247815e-06, + "loss": 1.3755, + "step": 140487 + }, + { + "epoch": 1.69, + "grad_norm": 6.154987204330064, + "learning_rate": 1.238862634831748e-06, + "loss": 1.243, + "step": 140490 + }, + { + "epoch": 1.69, + "grad_norm": 16.59705243774548, + "learning_rate": 1.2385810149373212e-06, + "loss": 1.065, + "step": 140493 + }, + { + "epoch": 1.69, + "grad_norm": 7.718791375370488, + "learning_rate": 1.2382994249424606e-06, + "loss": 1.108, + "step": 140496 + }, + { + "epoch": 1.69, + "grad_norm": 11.047174360826622, + "learning_rate": 1.2380178648481301e-06, + "loss": 1.1642, + "step": 140499 + }, + { + "epoch": 1.69, + "grad_norm": 6.11409458597697, + "learning_rate": 1.2377363346552896e-06, + "loss": 1.0552, + "step": 140502 + }, + { + "epoch": 1.69, + "grad_norm": 4.664282597395939, + "learning_rate": 1.237454834364895e-06, + "loss": 0.9389, + "step": 140505 + }, + { + "epoch": 1.69, + "grad_norm": 5.884577748579393, + "learning_rate": 1.2371733639779127e-06, + "loss": 1.5186, + "step": 140508 + }, + { + "epoch": 1.69, + "grad_norm": 11.177423864985535, + "learning_rate": 1.2368919234953004e-06, + "loss": 1.2071, + "step": 140511 + }, + { + "epoch": 1.69, + "grad_norm": 11.441584263412816, + "learning_rate": 1.2366105129180217e-06, + "loss": 1.3414, + "step": 140514 + }, + { + "epoch": 1.69, + "grad_norm": 13.775925573179666, + "learning_rate": 1.2363291322470329e-06, + "loss": 1.3237, + "step": 140517 + }, + { + "epoch": 1.69, + "grad_norm": 10.08971517205547, + "learning_rate": 1.2360477814832961e-06, + "loss": 1.14, + "step": 140520 + }, + { + "epoch": 1.69, + "grad_norm": 8.79279404426314, + "learning_rate": 1.2357664606277732e-06, + "loss": 1.1867, + "step": 140523 + }, + { + "epoch": 1.69, + "grad_norm": 5.358736480532192, + "learning_rate": 1.2354851696814208e-06, + "loss": 1.1483, + "step": 140526 + }, + { + "epoch": 1.69, + "grad_norm": 3.841513987313002, + "learning_rate": 1.2352039086452018e-06, + "loss": 0.9599, + "step": 140529 + }, + { + "epoch": 1.69, + "grad_norm": 7.698177426529498, + "learning_rate": 1.234922677520074e-06, + "loss": 0.9603, + "step": 140532 + }, + { + "epoch": 1.69, + "grad_norm": 21.405277709412267, + "learning_rate": 1.234641476306997e-06, + "loss": 0.9524, + "step": 140535 + }, + { + "epoch": 1.69, + "grad_norm": 10.105782351903109, + "learning_rate": 1.234360305006932e-06, + "loss": 1.3977, + "step": 140538 + }, + { + "epoch": 1.69, + "grad_norm": 5.67312297583303, + "learning_rate": 1.2340791636208394e-06, + "loss": 1.0837, + "step": 140541 + }, + { + "epoch": 1.69, + "grad_norm": 7.445931940513591, + "learning_rate": 1.2337980521496774e-06, + "loss": 0.9675, + "step": 140544 + }, + { + "epoch": 1.69, + "grad_norm": 9.62842086027086, + "learning_rate": 1.2335169705944027e-06, + "loss": 1.1748, + "step": 140547 + }, + { + "epoch": 1.69, + "grad_norm": 10.510388942269811, + "learning_rate": 1.2332359189559772e-06, + "loss": 1.3018, + "step": 140550 + }, + { + "epoch": 1.69, + "grad_norm": 16.314872284932186, + "learning_rate": 1.23295489723536e-06, + "loss": 0.7096, + "step": 140553 + }, + { + "epoch": 1.69, + "grad_norm": 8.027040493889485, + "learning_rate": 1.2326739054335112e-06, + "loss": 1.0217, + "step": 140556 + }, + { + "epoch": 1.69, + "grad_norm": 9.593115054622881, + "learning_rate": 1.232392943551387e-06, + "loss": 1.2411, + "step": 140559 + }, + { + "epoch": 1.69, + "grad_norm": 9.306464507280817, + "learning_rate": 1.2321120115899477e-06, + "loss": 1.1751, + "step": 140562 + }, + { + "epoch": 1.69, + "grad_norm": 8.803318952218701, + "learning_rate": 1.2318311095501535e-06, + "loss": 1.0286, + "step": 140565 + }, + { + "epoch": 1.69, + "grad_norm": 8.582499847842671, + "learning_rate": 1.2315502374329624e-06, + "loss": 1.222, + "step": 140568 + }, + { + "epoch": 1.69, + "grad_norm": 6.129595042806144, + "learning_rate": 1.2312693952393283e-06, + "loss": 1.1744, + "step": 140571 + }, + { + "epoch": 1.69, + "grad_norm": 2.1293848017631816, + "learning_rate": 1.230988582970214e-06, + "loss": 1.3862, + "step": 140574 + }, + { + "epoch": 1.69, + "grad_norm": 4.364539608004499, + "learning_rate": 1.2307078006265772e-06, + "loss": 0.9288, + "step": 140577 + }, + { + "epoch": 1.69, + "grad_norm": 7.629830243514353, + "learning_rate": 1.2304270482093761e-06, + "loss": 0.8365, + "step": 140580 + }, + { + "epoch": 1.69, + "grad_norm": 4.422914850586074, + "learning_rate": 1.2301463257195711e-06, + "loss": 0.9363, + "step": 140583 + }, + { + "epoch": 1.69, + "grad_norm": 3.08090003795706, + "learning_rate": 1.2298656331581172e-06, + "loss": 1.0883, + "step": 140586 + }, + { + "epoch": 1.69, + "grad_norm": 6.298155325866548, + "learning_rate": 1.229584970525971e-06, + "loss": 0.7831, + "step": 140589 + }, + { + "epoch": 1.69, + "grad_norm": 12.989401501806286, + "learning_rate": 1.229304337824092e-06, + "loss": 1.6152, + "step": 140592 + }, + { + "epoch": 1.69, + "grad_norm": 23.420431215261946, + "learning_rate": 1.2290237350534406e-06, + "loss": 1.0201, + "step": 140595 + }, + { + "epoch": 1.69, + "grad_norm": 24.633623508253287, + "learning_rate": 1.2287431622149692e-06, + "loss": 1.5284, + "step": 140598 + }, + { + "epoch": 1.69, + "grad_norm": 10.523674978944893, + "learning_rate": 1.2284626193096382e-06, + "loss": 1.1106, + "step": 140601 + }, + { + "epoch": 1.69, + "grad_norm": 11.104423984721082, + "learning_rate": 1.2281821063384058e-06, + "loss": 1.1345, + "step": 140604 + }, + { + "epoch": 1.69, + "grad_norm": 5.449306554801534, + "learning_rate": 1.227901623302229e-06, + "loss": 0.8433, + "step": 140607 + }, + { + "epoch": 1.69, + "grad_norm": 4.092979757505914, + "learning_rate": 1.2276211702020646e-06, + "loss": 0.8463, + "step": 140610 + }, + { + "epoch": 1.69, + "grad_norm": 5.012537181077803, + "learning_rate": 1.2273407470388676e-06, + "loss": 1.0153, + "step": 140613 + }, + { + "epoch": 1.69, + "grad_norm": 6.993149709287107, + "learning_rate": 1.2270603538135973e-06, + "loss": 1.3573, + "step": 140616 + }, + { + "epoch": 1.69, + "grad_norm": 5.686664227388053, + "learning_rate": 1.2267799905272082e-06, + "loss": 1.534, + "step": 140619 + }, + { + "epoch": 1.69, + "grad_norm": 6.702509704974074, + "learning_rate": 1.2264996571806632e-06, + "loss": 1.2775, + "step": 140622 + }, + { + "epoch": 1.69, + "grad_norm": 13.2265749543822, + "learning_rate": 1.2262193537749113e-06, + "loss": 0.9778, + "step": 140625 + }, + { + "epoch": 1.69, + "grad_norm": 13.952042338246997, + "learning_rate": 1.225939080310914e-06, + "loss": 1.0345, + "step": 140628 + }, + { + "epoch": 1.69, + "grad_norm": 9.386050025099058, + "learning_rate": 1.225658836789625e-06, + "loss": 0.9874, + "step": 140631 + }, + { + "epoch": 1.69, + "grad_norm": 5.028705269715817, + "learning_rate": 1.2253786232120013e-06, + "loss": 1.2485, + "step": 140634 + }, + { + "epoch": 1.69, + "grad_norm": 9.066568314092159, + "learning_rate": 1.2250984395790032e-06, + "loss": 1.1449, + "step": 140637 + }, + { + "epoch": 1.69, + "grad_norm": 6.8705773384198645, + "learning_rate": 1.2248182858915802e-06, + "loss": 1.1124, + "step": 140640 + }, + { + "epoch": 1.69, + "grad_norm": 13.886993240746786, + "learning_rate": 1.2245381621506912e-06, + "loss": 1.4148, + "step": 140643 + }, + { + "epoch": 1.69, + "grad_norm": 11.960982271225932, + "learning_rate": 1.2242580683572935e-06, + "loss": 1.1899, + "step": 140646 + }, + { + "epoch": 1.69, + "grad_norm": 10.70036418037518, + "learning_rate": 1.2239780045123439e-06, + "loss": 0.823, + "step": 140649 + }, + { + "epoch": 1.69, + "grad_norm": 6.821984493104057, + "learning_rate": 1.2236979706167961e-06, + "loss": 1.062, + "step": 140652 + }, + { + "epoch": 1.69, + "grad_norm": 5.081088830445164, + "learning_rate": 1.223417966671604e-06, + "loss": 0.8512, + "step": 140655 + }, + { + "epoch": 1.69, + "grad_norm": 5.121049025093294, + "learning_rate": 1.2231379926777244e-06, + "loss": 0.9018, + "step": 140658 + }, + { + "epoch": 1.69, + "grad_norm": 22.83755410238314, + "learning_rate": 1.2228580486361142e-06, + "loss": 1.1382, + "step": 140661 + }, + { + "epoch": 1.69, + "grad_norm": 7.326276176628166, + "learning_rate": 1.2225781345477294e-06, + "loss": 1.4432, + "step": 140664 + }, + { + "epoch": 1.69, + "grad_norm": 21.27794671639241, + "learning_rate": 1.2222982504135216e-06, + "loss": 1.5271, + "step": 140667 + }, + { + "epoch": 1.69, + "grad_norm": 18.045947702701746, + "learning_rate": 1.2220183962344512e-06, + "loss": 1.0216, + "step": 140670 + }, + { + "epoch": 1.69, + "grad_norm": 5.298671148553975, + "learning_rate": 1.2217385720114682e-06, + "loss": 1.1201, + "step": 140673 + }, + { + "epoch": 1.69, + "grad_norm": 8.666750825090087, + "learning_rate": 1.221458777745531e-06, + "loss": 1.3884, + "step": 140676 + }, + { + "epoch": 1.69, + "grad_norm": 3.9772506450532985, + "learning_rate": 1.22117901343759e-06, + "loss": 0.9284, + "step": 140679 + }, + { + "epoch": 1.69, + "grad_norm": 11.085110875868487, + "learning_rate": 1.2208992790886044e-06, + "loss": 1.0917, + "step": 140682 + }, + { + "epoch": 1.69, + "grad_norm": 6.0745264039655735, + "learning_rate": 1.2206195746995265e-06, + "loss": 0.7464, + "step": 140685 + }, + { + "epoch": 1.69, + "grad_norm": 10.892882426363595, + "learning_rate": 1.2203399002713124e-06, + "loss": 0.7683, + "step": 140688 + }, + { + "epoch": 1.69, + "grad_norm": 14.360582304906307, + "learning_rate": 1.2200602558049169e-06, + "loss": 1.3548, + "step": 140691 + }, + { + "epoch": 1.69, + "grad_norm": 8.792064784834754, + "learning_rate": 1.2197806413012946e-06, + "loss": 1.2379, + "step": 140694 + }, + { + "epoch": 1.69, + "grad_norm": 16.174109568442685, + "learning_rate": 1.2195010567613951e-06, + "loss": 0.9427, + "step": 140697 + }, + { + "epoch": 1.69, + "grad_norm": 4.6021633161388475, + "learning_rate": 1.2192215021861763e-06, + "loss": 0.9007, + "step": 140700 + }, + { + "epoch": 1.69, + "grad_norm": 18.356821785325558, + "learning_rate": 1.2189419775765931e-06, + "loss": 1.6517, + "step": 140703 + }, + { + "epoch": 1.69, + "grad_norm": 30.895976346843398, + "learning_rate": 1.218662482933597e-06, + "loss": 0.7959, + "step": 140706 + }, + { + "epoch": 1.69, + "grad_norm": 14.94677733872941, + "learning_rate": 1.2183830182581425e-06, + "loss": 1.0414, + "step": 140709 + }, + { + "epoch": 1.69, + "grad_norm": 10.664382996562036, + "learning_rate": 1.2181035835511845e-06, + "loss": 1.384, + "step": 140712 + }, + { + "epoch": 1.69, + "grad_norm": 7.685997721672596, + "learning_rate": 1.2178241788136747e-06, + "loss": 1.1718, + "step": 140715 + }, + { + "epoch": 1.69, + "grad_norm": 3.903368938184881, + "learning_rate": 1.2175448040465698e-06, + "loss": 0.9976, + "step": 140718 + }, + { + "epoch": 1.69, + "grad_norm": 8.413343401023438, + "learning_rate": 1.2172654592508193e-06, + "loss": 1.155, + "step": 140721 + }, + { + "epoch": 1.69, + "grad_norm": 27.571651467526316, + "learning_rate": 1.2169861444273778e-06, + "loss": 1.1534, + "step": 140724 + }, + { + "epoch": 1.69, + "grad_norm": 12.178902623998406, + "learning_rate": 1.216706859577199e-06, + "loss": 1.0601, + "step": 140727 + }, + { + "epoch": 1.69, + "grad_norm": 4.9391762380435065, + "learning_rate": 1.216427604701238e-06, + "loss": 1.1834, + "step": 140730 + }, + { + "epoch": 1.69, + "grad_norm": 2.9020261537660463, + "learning_rate": 1.2161483798004437e-06, + "loss": 1.2289, + "step": 140733 + }, + { + "epoch": 1.69, + "grad_norm": 7.7164233918193235, + "learning_rate": 1.215869184875773e-06, + "loss": 1.0162, + "step": 140736 + }, + { + "epoch": 1.69, + "grad_norm": 7.271006359499666, + "learning_rate": 1.2155900199281744e-06, + "loss": 1.1633, + "step": 140739 + }, + { + "epoch": 1.69, + "grad_norm": 11.39823599776837, + "learning_rate": 1.2153108849586036e-06, + "loss": 0.5896, + "step": 140742 + }, + { + "epoch": 1.69, + "grad_norm": 8.863265418508972, + "learning_rate": 1.2150317799680144e-06, + "loss": 0.8007, + "step": 140745 + }, + { + "epoch": 1.69, + "grad_norm": 39.388759863919006, + "learning_rate": 1.214752704957355e-06, + "loss": 1.4426, + "step": 140748 + }, + { + "epoch": 1.69, + "grad_norm": 7.493526735772191, + "learning_rate": 1.2144736599275807e-06, + "loss": 1.3658, + "step": 140751 + }, + { + "epoch": 1.69, + "grad_norm": 5.302822432453328, + "learning_rate": 1.214194644879645e-06, + "loss": 1.188, + "step": 140754 + }, + { + "epoch": 1.69, + "grad_norm": 9.103440814269339, + "learning_rate": 1.2139156598144964e-06, + "loss": 0.8117, + "step": 140757 + }, + { + "epoch": 1.69, + "grad_norm": 10.546452510235008, + "learning_rate": 1.213636704733091e-06, + "loss": 1.3681, + "step": 140760 + }, + { + "epoch": 1.69, + "grad_norm": 6.237198799654382, + "learning_rate": 1.213357779636376e-06, + "loss": 0.8656, + "step": 140763 + }, + { + "epoch": 1.69, + "grad_norm": 13.698694371099775, + "learning_rate": 1.213078884525307e-06, + "loss": 1.0026, + "step": 140766 + }, + { + "epoch": 1.69, + "grad_norm": 7.377899978727173, + "learning_rate": 1.2128000194008338e-06, + "loss": 0.9644, + "step": 140769 + }, + { + "epoch": 1.69, + "grad_norm": 7.496152398479383, + "learning_rate": 1.2125211842639118e-06, + "loss": 1.0352, + "step": 140772 + }, + { + "epoch": 1.69, + "grad_norm": 9.614220209976835, + "learning_rate": 1.212242379115487e-06, + "loss": 1.3043, + "step": 140775 + }, + { + "epoch": 1.69, + "grad_norm": 14.87034464343693, + "learning_rate": 1.2119636039565164e-06, + "loss": 1.1716, + "step": 140778 + }, + { + "epoch": 1.69, + "grad_norm": 15.730759545803576, + "learning_rate": 1.211684858787946e-06, + "loss": 0.9722, + "step": 140781 + }, + { + "epoch": 1.69, + "grad_norm": 16.16139957474897, + "learning_rate": 1.2114061436107327e-06, + "loss": 1.1317, + "step": 140784 + }, + { + "epoch": 1.69, + "grad_norm": 22.74872250649271, + "learning_rate": 1.2111274584258215e-06, + "loss": 1.1906, + "step": 140787 + }, + { + "epoch": 1.69, + "grad_norm": 11.60760218169863, + "learning_rate": 1.210848803234167e-06, + "loss": 0.8632, + "step": 140790 + }, + { + "epoch": 1.69, + "grad_norm": 15.689012911948257, + "learning_rate": 1.2105701780367186e-06, + "loss": 1.074, + "step": 140793 + }, + { + "epoch": 1.69, + "grad_norm": 3.2503968335078453, + "learning_rate": 1.2102915828344298e-06, + "loss": 1.5147, + "step": 140796 + }, + { + "epoch": 1.69, + "grad_norm": 4.106321855691868, + "learning_rate": 1.210013017628251e-06, + "loss": 1.2931, + "step": 140799 + }, + { + "epoch": 1.69, + "grad_norm": 4.683524253003522, + "learning_rate": 1.2097344824191316e-06, + "loss": 0.9678, + "step": 140802 + }, + { + "epoch": 1.69, + "grad_norm": 7.535976030868249, + "learning_rate": 1.2094559772080206e-06, + "loss": 1.004, + "step": 140805 + }, + { + "epoch": 1.69, + "grad_norm": 6.631160235416338, + "learning_rate": 1.2091775019958696e-06, + "loss": 1.2444, + "step": 140808 + }, + { + "epoch": 1.69, + "grad_norm": 13.87125707652107, + "learning_rate": 1.2088990567836312e-06, + "loss": 1.3998, + "step": 140811 + }, + { + "epoch": 1.69, + "grad_norm": 2.446259233924365, + "learning_rate": 1.2086206415722523e-06, + "loss": 0.8564, + "step": 140814 + }, + { + "epoch": 1.69, + "grad_norm": 6.709333880655516, + "learning_rate": 1.2083422563626846e-06, + "loss": 1.402, + "step": 140817 + }, + { + "epoch": 1.69, + "grad_norm": 7.4211790396112765, + "learning_rate": 1.2080639011558792e-06, + "loss": 1.012, + "step": 140820 + }, + { + "epoch": 1.69, + "grad_norm": 8.162133522040037, + "learning_rate": 1.2077855759527835e-06, + "loss": 1.1252, + "step": 140823 + }, + { + "epoch": 1.69, + "grad_norm": 10.091317612094448, + "learning_rate": 1.2075072807543508e-06, + "loss": 1.0744, + "step": 140826 + }, + { + "epoch": 1.69, + "grad_norm": 7.936622602530607, + "learning_rate": 1.2072290155615263e-06, + "loss": 1.0647, + "step": 140829 + }, + { + "epoch": 1.69, + "grad_norm": 7.054656783682192, + "learning_rate": 1.2069507803752623e-06, + "loss": 0.8633, + "step": 140832 + }, + { + "epoch": 1.69, + "grad_norm": 7.212948746433861, + "learning_rate": 1.206672575196508e-06, + "loss": 1.2209, + "step": 140835 + }, + { + "epoch": 1.69, + "grad_norm": 8.064893713505997, + "learning_rate": 1.206394400026215e-06, + "loss": 1.4827, + "step": 140838 + }, + { + "epoch": 1.69, + "grad_norm": 23.23363500333634, + "learning_rate": 1.2061162548653283e-06, + "loss": 1.2232, + "step": 140841 + }, + { + "epoch": 1.69, + "grad_norm": 6.875955466317195, + "learning_rate": 1.205838139714801e-06, + "loss": 0.8155, + "step": 140844 + }, + { + "epoch": 1.69, + "grad_norm": 81.52517586249566, + "learning_rate": 1.2055600545755785e-06, + "loss": 1.1243, + "step": 140847 + }, + { + "epoch": 1.69, + "grad_norm": 15.274285416437385, + "learning_rate": 1.2052819994486132e-06, + "loss": 1.0874, + "step": 140850 + }, + { + "epoch": 1.69, + "grad_norm": 8.6729028301153, + "learning_rate": 1.205003974334853e-06, + "loss": 1.4852, + "step": 140853 + }, + { + "epoch": 1.69, + "grad_norm": 10.208085612271649, + "learning_rate": 1.204725979235245e-06, + "loss": 1.3703, + "step": 140856 + }, + { + "epoch": 1.69, + "grad_norm": 21.854883325471253, + "learning_rate": 1.2044480141507386e-06, + "loss": 1.0854, + "step": 140859 + }, + { + "epoch": 1.69, + "grad_norm": 4.728957560212513, + "learning_rate": 1.2041700790822853e-06, + "loss": 1.4513, + "step": 140862 + }, + { + "epoch": 1.69, + "grad_norm": 35.16483565071492, + "learning_rate": 1.2038921740308318e-06, + "loss": 0.9349, + "step": 140865 + }, + { + "epoch": 1.69, + "grad_norm": 6.9528829935940655, + "learning_rate": 1.2036142989973233e-06, + "loss": 1.0521, + "step": 140868 + }, + { + "epoch": 1.69, + "grad_norm": 7.496265040699472, + "learning_rate": 1.2033364539827109e-06, + "loss": 1.108, + "step": 140871 + }, + { + "epoch": 1.69, + "grad_norm": 2.7392229646162343, + "learning_rate": 1.2030586389879429e-06, + "loss": 1.3026, + "step": 140874 + }, + { + "epoch": 1.69, + "grad_norm": 4.026729237614524, + "learning_rate": 1.2027808540139663e-06, + "loss": 0.9041, + "step": 140877 + }, + { + "epoch": 1.69, + "grad_norm": 6.7172714257897095, + "learning_rate": 1.2025030990617327e-06, + "loss": 1.2134, + "step": 140880 + }, + { + "epoch": 1.69, + "grad_norm": 9.11690883524461, + "learning_rate": 1.2022253741321844e-06, + "loss": 0.9559, + "step": 140883 + }, + { + "epoch": 1.69, + "grad_norm": 15.540140788811264, + "learning_rate": 1.2019476792262753e-06, + "loss": 1.304, + "step": 140886 + }, + { + "epoch": 1.69, + "grad_norm": 5.6570762964287935, + "learning_rate": 1.2016700143449467e-06, + "loss": 1.3109, + "step": 140889 + }, + { + "epoch": 1.69, + "grad_norm": 3.682053434289012, + "learning_rate": 1.2013923794891514e-06, + "loss": 1.301, + "step": 140892 + }, + { + "epoch": 1.69, + "grad_norm": 9.883305919211063, + "learning_rate": 1.2011147746598328e-06, + "loss": 1.1727, + "step": 140895 + }, + { + "epoch": 1.69, + "grad_norm": 6.608210885514603, + "learning_rate": 1.2008371998579393e-06, + "loss": 1.4468, + "step": 140898 + }, + { + "epoch": 1.69, + "grad_norm": 9.569074454983635, + "learning_rate": 1.20055965508442e-06, + "loss": 1.2851, + "step": 140901 + }, + { + "epoch": 1.69, + "grad_norm": 12.967213896099338, + "learning_rate": 1.2002821403402233e-06, + "loss": 1.1734, + "step": 140904 + }, + { + "epoch": 1.69, + "grad_norm": 4.03021841910167, + "learning_rate": 1.2000046556262934e-06, + "loss": 1.4373, + "step": 140907 + }, + { + "epoch": 1.69, + "grad_norm": 3.2510022393451337, + "learning_rate": 1.1997272009435756e-06, + "loss": 0.9479, + "step": 140910 + }, + { + "epoch": 1.69, + "grad_norm": 8.132741822226274, + "learning_rate": 1.199449776293019e-06, + "loss": 1.37, + "step": 140913 + }, + { + "epoch": 1.69, + "grad_norm": 11.681238717575662, + "learning_rate": 1.1991723816755708e-06, + "loss": 1.0326, + "step": 140916 + }, + { + "epoch": 1.69, + "grad_norm": 3.039086983702509, + "learning_rate": 1.1988950170921797e-06, + "loss": 0.9854, + "step": 140919 + }, + { + "epoch": 1.69, + "grad_norm": 2.972844531792882, + "learning_rate": 1.1986176825437867e-06, + "loss": 1.1452, + "step": 140922 + }, + { + "epoch": 1.69, + "grad_norm": 4.856627526274234, + "learning_rate": 1.1983403780313419e-06, + "loss": 1.0478, + "step": 140925 + }, + { + "epoch": 1.69, + "grad_norm": 8.971570810926437, + "learning_rate": 1.1980631035557933e-06, + "loss": 1.202, + "step": 140928 + }, + { + "epoch": 1.69, + "grad_norm": 5.627526949345553, + "learning_rate": 1.1977858591180824e-06, + "loss": 1.0159, + "step": 140931 + }, + { + "epoch": 1.69, + "grad_norm": 7.227591678450201, + "learning_rate": 1.1975086447191598e-06, + "loss": 1.2597, + "step": 140934 + }, + { + "epoch": 1.69, + "grad_norm": 11.76731829275689, + "learning_rate": 1.1972314603599678e-06, + "loss": 1.1868, + "step": 140937 + }, + { + "epoch": 1.69, + "grad_norm": 5.875255860854663, + "learning_rate": 1.1969543060414546e-06, + "loss": 1.0065, + "step": 140940 + }, + { + "epoch": 1.69, + "grad_norm": 22.230481028633363, + "learning_rate": 1.196677181764565e-06, + "loss": 1.1238, + "step": 140943 + }, + { + "epoch": 1.69, + "grad_norm": 8.030437821883647, + "learning_rate": 1.1964000875302474e-06, + "loss": 1.4003, + "step": 140946 + }, + { + "epoch": 1.69, + "grad_norm": 7.281603207607906, + "learning_rate": 1.196123023339445e-06, + "loss": 1.2481, + "step": 140949 + }, + { + "epoch": 1.69, + "grad_norm": 7.120342579601738, + "learning_rate": 1.1958459891931008e-06, + "loss": 0.828, + "step": 140952 + }, + { + "epoch": 1.69, + "grad_norm": 5.831831921882859, + "learning_rate": 1.1955689850921636e-06, + "loss": 1.1172, + "step": 140955 + }, + { + "epoch": 1.69, + "grad_norm": 23.628585800777866, + "learning_rate": 1.1952920110375788e-06, + "loss": 0.9708, + "step": 140958 + }, + { + "epoch": 1.7, + "grad_norm": 6.617290083878489, + "learning_rate": 1.195015067030293e-06, + "loss": 1.4865, + "step": 140961 + }, + { + "epoch": 1.7, + "grad_norm": 7.42233651263699, + "learning_rate": 1.1947381530712465e-06, + "loss": 1.2953, + "step": 140964 + }, + { + "epoch": 1.7, + "grad_norm": 11.663314440009017, + "learning_rate": 1.1944612691613866e-06, + "loss": 1.0103, + "step": 140967 + }, + { + "epoch": 1.7, + "grad_norm": 6.55852183148078, + "learning_rate": 1.1941844153016613e-06, + "loss": 0.9537, + "step": 140970 + }, + { + "epoch": 1.7, + "grad_norm": 5.433436163287954, + "learning_rate": 1.1939075914930132e-06, + "loss": 1.198, + "step": 140973 + }, + { + "epoch": 1.7, + "grad_norm": 26.72713778428631, + "learning_rate": 1.1936307977363837e-06, + "loss": 1.0041, + "step": 140976 + }, + { + "epoch": 1.7, + "grad_norm": 10.499627692148186, + "learning_rate": 1.193354034032721e-06, + "loss": 1.2005, + "step": 140979 + }, + { + "epoch": 1.7, + "grad_norm": 4.123925453438979, + "learning_rate": 1.1930773003829676e-06, + "loss": 1.2361, + "step": 140982 + }, + { + "epoch": 1.7, + "grad_norm": 5.473233677237571, + "learning_rate": 1.1928005967880706e-06, + "loss": 0.8454, + "step": 140985 + }, + { + "epoch": 1.7, + "grad_norm": 9.222310430138423, + "learning_rate": 1.1925239232489738e-06, + "loss": 1.2167, + "step": 140988 + }, + { + "epoch": 1.7, + "grad_norm": 23.86861403441895, + "learning_rate": 1.1922472797666206e-06, + "loss": 1.2134, + "step": 140991 + }, + { + "epoch": 1.7, + "grad_norm": 2.6053473037277746, + "learning_rate": 1.1919706663419539e-06, + "loss": 0.9235, + "step": 140994 + }, + { + "epoch": 1.7, + "grad_norm": 9.676074378244888, + "learning_rate": 1.1916940829759171e-06, + "loss": 1.0948, + "step": 140997 + }, + { + "epoch": 1.7, + "grad_norm": 4.384566906166921, + "learning_rate": 1.1914175296694585e-06, + "loss": 1.3336, + "step": 141000 + }, + { + "epoch": 1.7, + "grad_norm": 5.344297167902529, + "learning_rate": 1.1911410064235173e-06, + "loss": 1.1823, + "step": 141003 + }, + { + "epoch": 1.7, + "grad_norm": 2.6586061461640274, + "learning_rate": 1.1908645132390384e-06, + "loss": 0.8926, + "step": 141006 + }, + { + "epoch": 1.7, + "grad_norm": 4.697250669811998, + "learning_rate": 1.1905880501169665e-06, + "loss": 1.0299, + "step": 141009 + }, + { + "epoch": 1.7, + "grad_norm": 20.372111847021316, + "learning_rate": 1.1903116170582462e-06, + "loss": 1.2607, + "step": 141012 + }, + { + "epoch": 1.7, + "grad_norm": 3.1356806548172296, + "learning_rate": 1.1900352140638194e-06, + "loss": 1.1015, + "step": 141015 + }, + { + "epoch": 1.7, + "grad_norm": 8.366394620487545, + "learning_rate": 1.189758841134626e-06, + "loss": 1.1348, + "step": 141018 + }, + { + "epoch": 1.7, + "grad_norm": 13.505773777423187, + "learning_rate": 1.1894824982716124e-06, + "loss": 1.0923, + "step": 141021 + }, + { + "epoch": 1.7, + "grad_norm": 13.443639086253707, + "learning_rate": 1.189206185475722e-06, + "loss": 1.0717, + "step": 141024 + }, + { + "epoch": 1.7, + "grad_norm": 5.728111093336886, + "learning_rate": 1.1889299027478996e-06, + "loss": 1.2818, + "step": 141027 + }, + { + "epoch": 1.7, + "grad_norm": 3.436014437635759, + "learning_rate": 1.1886536500890832e-06, + "loss": 1.0272, + "step": 141030 + }, + { + "epoch": 1.7, + "grad_norm": 12.106753991638007, + "learning_rate": 1.188377427500218e-06, + "loss": 1.2608, + "step": 141033 + }, + { + "epoch": 1.7, + "grad_norm": 7.923996237514944, + "learning_rate": 1.1881012349822496e-06, + "loss": 1.0462, + "step": 141036 + }, + { + "epoch": 1.7, + "grad_norm": 7.063999607218855, + "learning_rate": 1.187825072536115e-06, + "loss": 1.0692, + "step": 141039 + }, + { + "epoch": 1.7, + "grad_norm": 14.306463650837761, + "learning_rate": 1.1875489401627605e-06, + "loss": 1.4115, + "step": 141042 + }, + { + "epoch": 1.7, + "grad_norm": 11.441989511621724, + "learning_rate": 1.187272837863126e-06, + "loss": 0.7921, + "step": 141045 + }, + { + "epoch": 1.7, + "grad_norm": 4.028936747628304, + "learning_rate": 1.1869967656381553e-06, + "loss": 1.0166, + "step": 141048 + }, + { + "epoch": 1.7, + "grad_norm": 9.384174421303973, + "learning_rate": 1.18672072348879e-06, + "loss": 1.0734, + "step": 141051 + }, + { + "epoch": 1.7, + "grad_norm": 7.429733891808093, + "learning_rate": 1.186444711415974e-06, + "loss": 1.341, + "step": 141054 + }, + { + "epoch": 1.7, + "grad_norm": 13.17261264030133, + "learning_rate": 1.1861687294206481e-06, + "loss": 0.9019, + "step": 141057 + }, + { + "epoch": 1.7, + "grad_norm": 5.693796103519512, + "learning_rate": 1.185892777503751e-06, + "loss": 1.0614, + "step": 141060 + }, + { + "epoch": 1.7, + "grad_norm": 15.655214788232453, + "learning_rate": 1.1856168556662274e-06, + "loss": 1.1945, + "step": 141063 + }, + { + "epoch": 1.7, + "grad_norm": 3.5176621334482414, + "learning_rate": 1.1853409639090186e-06, + "loss": 1.3023, + "step": 141066 + }, + { + "epoch": 1.7, + "grad_norm": 9.63829016859995, + "learning_rate": 1.1850651022330685e-06, + "loss": 1.4386, + "step": 141069 + }, + { + "epoch": 1.7, + "grad_norm": 17.26282928598196, + "learning_rate": 1.1847892706393126e-06, + "loss": 0.9199, + "step": 141072 + }, + { + "epoch": 1.7, + "grad_norm": 3.9705645062387886, + "learning_rate": 1.1845134691286964e-06, + "loss": 1.252, + "step": 141075 + }, + { + "epoch": 1.7, + "grad_norm": 15.27299653550023, + "learning_rate": 1.184237697702163e-06, + "loss": 1.2668, + "step": 141078 + }, + { + "epoch": 1.7, + "grad_norm": 13.557285202832148, + "learning_rate": 1.1839619563606508e-06, + "loss": 1.5071, + "step": 141081 + }, + { + "epoch": 1.7, + "grad_norm": 5.889829934191435, + "learning_rate": 1.183686245105098e-06, + "loss": 1.1217, + "step": 141084 + }, + { + "epoch": 1.7, + "grad_norm": 9.125259423802268, + "learning_rate": 1.183410563936449e-06, + "loss": 1.0302, + "step": 141087 + }, + { + "epoch": 1.7, + "grad_norm": 9.801802230921664, + "learning_rate": 1.1831349128556447e-06, + "loss": 1.0692, + "step": 141090 + }, + { + "epoch": 1.7, + "grad_norm": 10.556101540739881, + "learning_rate": 1.182859291863624e-06, + "loss": 1.3026, + "step": 141093 + }, + { + "epoch": 1.7, + "grad_norm": 5.812084777737055, + "learning_rate": 1.1825837009613316e-06, + "loss": 1.013, + "step": 141096 + }, + { + "epoch": 1.7, + "grad_norm": 11.018697858602351, + "learning_rate": 1.182308140149705e-06, + "loss": 1.3372, + "step": 141099 + }, + { + "epoch": 1.7, + "grad_norm": 18.131885704577105, + "learning_rate": 1.182032609429682e-06, + "loss": 1.2792, + "step": 141102 + }, + { + "epoch": 1.7, + "grad_norm": 3.57892849821615, + "learning_rate": 1.1817571088022051e-06, + "loss": 0.9416, + "step": 141105 + }, + { + "epoch": 1.7, + "grad_norm": 5.396103474604792, + "learning_rate": 1.1814816382682171e-06, + "loss": 1.0087, + "step": 141108 + }, + { + "epoch": 1.7, + "grad_norm": 6.356318222875228, + "learning_rate": 1.1812061978286548e-06, + "loss": 0.7041, + "step": 141111 + }, + { + "epoch": 1.7, + "grad_norm": 39.28042675569743, + "learning_rate": 1.1809307874844577e-06, + "loss": 1.0771, + "step": 141114 + }, + { + "epoch": 1.7, + "grad_norm": 18.239358566754756, + "learning_rate": 1.1806554072365684e-06, + "loss": 1.3067, + "step": 141117 + }, + { + "epoch": 1.7, + "grad_norm": 4.97111097317095, + "learning_rate": 1.180380057085927e-06, + "loss": 1.0682, + "step": 141120 + }, + { + "epoch": 1.7, + "grad_norm": 6.695814264856436, + "learning_rate": 1.1801047370334716e-06, + "loss": 1.1456, + "step": 141123 + }, + { + "epoch": 1.7, + "grad_norm": 10.654507442646057, + "learning_rate": 1.1798294470801409e-06, + "loss": 1.1126, + "step": 141126 + }, + { + "epoch": 1.7, + "grad_norm": 5.847071825842921, + "learning_rate": 1.1795541872268744e-06, + "loss": 1.1906, + "step": 141129 + }, + { + "epoch": 1.7, + "grad_norm": 19.62996439526743, + "learning_rate": 1.1792789574746121e-06, + "loss": 1.2407, + "step": 141132 + }, + { + "epoch": 1.7, + "grad_norm": 8.342576518141144, + "learning_rate": 1.1790037578242963e-06, + "loss": 1.1868, + "step": 141135 + }, + { + "epoch": 1.7, + "grad_norm": 15.13378678618873, + "learning_rate": 1.1787285882768607e-06, + "loss": 1.365, + "step": 141138 + }, + { + "epoch": 1.7, + "grad_norm": 5.71211552513402, + "learning_rate": 1.178453448833249e-06, + "loss": 1.446, + "step": 141141 + }, + { + "epoch": 1.7, + "grad_norm": 4.006506442904819, + "learning_rate": 1.1781783394943957e-06, + "loss": 1.2795, + "step": 141144 + }, + { + "epoch": 1.7, + "grad_norm": 6.501030609260981, + "learning_rate": 1.1779032602612427e-06, + "loss": 1.2743, + "step": 141147 + }, + { + "epoch": 1.7, + "grad_norm": 6.29081218019903, + "learning_rate": 1.1776282111347304e-06, + "loss": 1.1179, + "step": 141150 + }, + { + "epoch": 1.7, + "grad_norm": 14.286084866682558, + "learning_rate": 1.1773531921157932e-06, + "loss": 1.0962, + "step": 141153 + }, + { + "epoch": 1.7, + "grad_norm": 12.109879652323897, + "learning_rate": 1.1770782032053718e-06, + "loss": 1.0577, + "step": 141156 + }, + { + "epoch": 1.7, + "grad_norm": 8.924314545627498, + "learning_rate": 1.1768032444044043e-06, + "loss": 1.0416, + "step": 141159 + }, + { + "epoch": 1.7, + "grad_norm": 8.56173702273519, + "learning_rate": 1.1765283157138307e-06, + "loss": 1.0752, + "step": 141162 + }, + { + "epoch": 1.7, + "grad_norm": 6.533995727289304, + "learning_rate": 1.1762534171345884e-06, + "loss": 1.4424, + "step": 141165 + }, + { + "epoch": 1.7, + "grad_norm": 14.217575810409933, + "learning_rate": 1.1759785486676123e-06, + "loss": 1.168, + "step": 141168 + }, + { + "epoch": 1.7, + "grad_norm": 4.517296128658535, + "learning_rate": 1.1757037103138436e-06, + "loss": 1.1233, + "step": 141171 + }, + { + "epoch": 1.7, + "grad_norm": 10.538469363329439, + "learning_rate": 1.1754289020742194e-06, + "loss": 1.1632, + "step": 141174 + }, + { + "epoch": 1.7, + "grad_norm": 7.619932454344117, + "learning_rate": 1.17515412394968e-06, + "loss": 0.8047, + "step": 141177 + }, + { + "epoch": 1.7, + "grad_norm": 22.950875153298682, + "learning_rate": 1.174879375941158e-06, + "loss": 0.9994, + "step": 141180 + }, + { + "epoch": 1.7, + "grad_norm": 15.584384303531293, + "learning_rate": 1.1746046580495973e-06, + "loss": 1.1477, + "step": 141183 + }, + { + "epoch": 1.7, + "grad_norm": 13.591516536895293, + "learning_rate": 1.1743299702759292e-06, + "loss": 1.6043, + "step": 141186 + }, + { + "epoch": 1.7, + "grad_norm": 6.149047850897475, + "learning_rate": 1.1740553126210962e-06, + "loss": 1.1567, + "step": 141189 + }, + { + "epoch": 1.7, + "grad_norm": 4.7634339454530945, + "learning_rate": 1.1737806850860312e-06, + "loss": 0.8009, + "step": 141192 + }, + { + "epoch": 1.7, + "grad_norm": 8.755635163693427, + "learning_rate": 1.173506087671673e-06, + "loss": 0.9564, + "step": 141195 + }, + { + "epoch": 1.7, + "grad_norm": 19.411441748168116, + "learning_rate": 1.1732315203789603e-06, + "loss": 1.0055, + "step": 141198 + }, + { + "epoch": 1.7, + "grad_norm": 13.697512908157274, + "learning_rate": 1.1729569832088283e-06, + "loss": 1.0608, + "step": 141201 + }, + { + "epoch": 1.7, + "grad_norm": 9.312918665028793, + "learning_rate": 1.1726824761622157e-06, + "loss": 1.0037, + "step": 141204 + }, + { + "epoch": 1.7, + "grad_norm": 19.57755953954007, + "learning_rate": 1.1724079992400594e-06, + "loss": 1.4739, + "step": 141207 + }, + { + "epoch": 1.7, + "grad_norm": 6.875097598114782, + "learning_rate": 1.1721335524432931e-06, + "loss": 1.0472, + "step": 141210 + }, + { + "epoch": 1.7, + "grad_norm": 6.750517298005507, + "learning_rate": 1.1718591357728547e-06, + "loss": 1.1974, + "step": 141213 + }, + { + "epoch": 1.7, + "grad_norm": 3.2810896709464887, + "learning_rate": 1.171584749229684e-06, + "loss": 1.0869, + "step": 141216 + }, + { + "epoch": 1.7, + "grad_norm": 18.45192678897252, + "learning_rate": 1.1713103928147118e-06, + "loss": 1.0834, + "step": 141219 + }, + { + "epoch": 1.7, + "grad_norm": 3.0825796131119416, + "learning_rate": 1.1710360665288766e-06, + "loss": 1.2456, + "step": 141222 + }, + { + "epoch": 1.7, + "grad_norm": 8.436655048575556, + "learning_rate": 1.170761770373119e-06, + "loss": 1.2628, + "step": 141225 + }, + { + "epoch": 1.7, + "grad_norm": 10.21428658582607, + "learning_rate": 1.1704875043483678e-06, + "loss": 1.0743, + "step": 141228 + }, + { + "epoch": 1.7, + "grad_norm": 57.36233930082773, + "learning_rate": 1.1702132684555645e-06, + "loss": 1.1307, + "step": 141231 + }, + { + "epoch": 1.7, + "grad_norm": 12.022205490294175, + "learning_rate": 1.1699390626956409e-06, + "loss": 1.2716, + "step": 141234 + }, + { + "epoch": 1.7, + "grad_norm": 7.950404184146605, + "learning_rate": 1.1696648870695348e-06, + "loss": 0.9619, + "step": 141237 + }, + { + "epoch": 1.7, + "grad_norm": 8.479291358752011, + "learning_rate": 1.1693907415781813e-06, + "loss": 1.2598, + "step": 141240 + }, + { + "epoch": 1.7, + "grad_norm": 6.801479301798621, + "learning_rate": 1.1691166262225184e-06, + "loss": 0.908, + "step": 141243 + }, + { + "epoch": 1.7, + "grad_norm": 3.9640982255211825, + "learning_rate": 1.1688425410034787e-06, + "loss": 1.072, + "step": 141246 + }, + { + "epoch": 1.7, + "grad_norm": 4.153256667101733, + "learning_rate": 1.1685684859219992e-06, + "loss": 1.474, + "step": 141249 + }, + { + "epoch": 1.7, + "grad_norm": 8.360914814403607, + "learning_rate": 1.1682944609790137e-06, + "loss": 1.4644, + "step": 141252 + }, + { + "epoch": 1.7, + "grad_norm": 6.655727413935912, + "learning_rate": 1.1680204661754567e-06, + "loss": 1.0793, + "step": 141255 + }, + { + "epoch": 1.7, + "grad_norm": 11.194662465092716, + "learning_rate": 1.167746501512267e-06, + "loss": 0.8812, + "step": 141258 + }, + { + "epoch": 1.7, + "grad_norm": 12.606022465040263, + "learning_rate": 1.1674725669903753e-06, + "loss": 1.2236, + "step": 141261 + }, + { + "epoch": 1.7, + "grad_norm": 8.240452073745278, + "learning_rate": 1.167198662610719e-06, + "loss": 1.1389, + "step": 141264 + }, + { + "epoch": 1.7, + "grad_norm": 14.818602322682116, + "learning_rate": 1.166924788374233e-06, + "loss": 0.944, + "step": 141267 + }, + { + "epoch": 1.7, + "grad_norm": 10.9500705185344, + "learning_rate": 1.1666509442818497e-06, + "loss": 1.0581, + "step": 141270 + }, + { + "epoch": 1.7, + "grad_norm": 12.039817753041241, + "learning_rate": 1.1663771303345062e-06, + "loss": 1.1569, + "step": 141273 + }, + { + "epoch": 1.7, + "grad_norm": 17.008610680532115, + "learning_rate": 1.166103346533134e-06, + "loss": 0.764, + "step": 141276 + }, + { + "epoch": 1.7, + "grad_norm": 14.707264830070846, + "learning_rate": 1.16582959287867e-06, + "loss": 1.1213, + "step": 141279 + }, + { + "epoch": 1.7, + "grad_norm": 7.414244588691393, + "learning_rate": 1.1655558693720459e-06, + "loss": 1.1437, + "step": 141282 + }, + { + "epoch": 1.7, + "grad_norm": 12.174163817183128, + "learning_rate": 1.1652821760142007e-06, + "loss": 1.2608, + "step": 141285 + }, + { + "epoch": 1.7, + "grad_norm": 7.4181052965568615, + "learning_rate": 1.1650085128060628e-06, + "loss": 1.1617, + "step": 141288 + }, + { + "epoch": 1.7, + "grad_norm": 7.496535131520981, + "learning_rate": 1.1647348797485713e-06, + "loss": 1.1088, + "step": 141291 + }, + { + "epoch": 1.7, + "grad_norm": 15.280098679474413, + "learning_rate": 1.1644612768426544e-06, + "loss": 1.0279, + "step": 141294 + }, + { + "epoch": 1.7, + "grad_norm": 3.4658059905615066, + "learning_rate": 1.1641877040892503e-06, + "loss": 1.177, + "step": 141297 + }, + { + "epoch": 1.7, + "grad_norm": 7.391896823128356, + "learning_rate": 1.163914161489289e-06, + "loss": 0.9437, + "step": 141300 + }, + { + "epoch": 1.7, + "grad_norm": 2.9903219408835633, + "learning_rate": 1.1636406490437068e-06, + "loss": 0.9562, + "step": 141303 + }, + { + "epoch": 1.7, + "grad_norm": 6.106812066372851, + "learning_rate": 1.1633671667534352e-06, + "loss": 1.3025, + "step": 141306 + }, + { + "epoch": 1.7, + "grad_norm": 8.485630039855224, + "learning_rate": 1.163093714619411e-06, + "loss": 1.1302, + "step": 141309 + }, + { + "epoch": 1.7, + "grad_norm": 7.132359843158643, + "learning_rate": 1.1628202926425637e-06, + "loss": 1.1221, + "step": 141312 + }, + { + "epoch": 1.7, + "grad_norm": 15.923462959542414, + "learning_rate": 1.1625469008238289e-06, + "loss": 1.1368, + "step": 141315 + }, + { + "epoch": 1.7, + "grad_norm": 2.7022986726812013, + "learning_rate": 1.162273539164136e-06, + "loss": 0.9709, + "step": 141318 + }, + { + "epoch": 1.7, + "grad_norm": 44.21505313623406, + "learning_rate": 1.16200020766442e-06, + "loss": 1.0696, + "step": 141321 + }, + { + "epoch": 1.7, + "grad_norm": 5.096657346059417, + "learning_rate": 1.1617269063256164e-06, + "loss": 1.1443, + "step": 141324 + }, + { + "epoch": 1.7, + "grad_norm": 14.291182840739276, + "learning_rate": 1.1614536351486527e-06, + "loss": 1.1525, + "step": 141327 + }, + { + "epoch": 1.7, + "grad_norm": 5.836994439850618, + "learning_rate": 1.1611803941344646e-06, + "loss": 0.9333, + "step": 141330 + }, + { + "epoch": 1.7, + "grad_norm": 9.609565861770017, + "learning_rate": 1.160907183283987e-06, + "loss": 1.0136, + "step": 141333 + }, + { + "epoch": 1.7, + "grad_norm": 4.932305588341418, + "learning_rate": 1.1606340025981455e-06, + "loss": 1.0518, + "step": 141336 + }, + { + "epoch": 1.7, + "grad_norm": 7.642655644680781, + "learning_rate": 1.1603608520778798e-06, + "loss": 0.8699, + "step": 141339 + }, + { + "epoch": 1.7, + "grad_norm": 5.278755018603237, + "learning_rate": 1.1600877317241155e-06, + "loss": 0.9698, + "step": 141342 + }, + { + "epoch": 1.7, + "grad_norm": 7.237805075472418, + "learning_rate": 1.1598146415377886e-06, + "loss": 1.5552, + "step": 141345 + }, + { + "epoch": 1.7, + "grad_norm": 11.323029652530993, + "learning_rate": 1.1595415815198286e-06, + "loss": 1.2502, + "step": 141348 + }, + { + "epoch": 1.7, + "grad_norm": 14.680068036193472, + "learning_rate": 1.1592685516711722e-06, + "loss": 0.9641, + "step": 141351 + }, + { + "epoch": 1.7, + "grad_norm": 3.696883394921545, + "learning_rate": 1.1589955519927454e-06, + "loss": 1.0282, + "step": 141354 + }, + { + "epoch": 1.7, + "grad_norm": 25.556421361976128, + "learning_rate": 1.1587225824854841e-06, + "loss": 1.265, + "step": 141357 + }, + { + "epoch": 1.7, + "grad_norm": 4.271510195834048, + "learning_rate": 1.1584496431503168e-06, + "loss": 1.3726, + "step": 141360 + }, + { + "epoch": 1.7, + "grad_norm": 8.258483368603018, + "learning_rate": 1.1581767339881755e-06, + "loss": 1.3938, + "step": 141363 + }, + { + "epoch": 1.7, + "grad_norm": 6.167527114184326, + "learning_rate": 1.1579038549999944e-06, + "loss": 1.0885, + "step": 141366 + }, + { + "epoch": 1.7, + "grad_norm": 4.68097946605703, + "learning_rate": 1.1576310061867002e-06, + "loss": 1.2485, + "step": 141369 + }, + { + "epoch": 1.7, + "grad_norm": 8.314101849622674, + "learning_rate": 1.1573581875492257e-06, + "loss": 1.3309, + "step": 141372 + }, + { + "epoch": 1.7, + "grad_norm": 12.8757753843437, + "learning_rate": 1.1570853990885056e-06, + "loss": 1.0766, + "step": 141375 + }, + { + "epoch": 1.7, + "grad_norm": 5.117858904536164, + "learning_rate": 1.156812640805468e-06, + "loss": 1.2081, + "step": 141378 + }, + { + "epoch": 1.7, + "grad_norm": 20.732779172741964, + "learning_rate": 1.1565399127010413e-06, + "loss": 1.1716, + "step": 141381 + }, + { + "epoch": 1.7, + "grad_norm": 11.532120848444267, + "learning_rate": 1.1562672147761578e-06, + "loss": 0.9161, + "step": 141384 + }, + { + "epoch": 1.7, + "grad_norm": 6.71971216730434, + "learning_rate": 1.1559945470317502e-06, + "loss": 1.3415, + "step": 141387 + }, + { + "epoch": 1.7, + "grad_norm": 4.361037346028529, + "learning_rate": 1.1557219094687467e-06, + "loss": 1.1411, + "step": 141390 + }, + { + "epoch": 1.7, + "grad_norm": 7.1670331037150605, + "learning_rate": 1.1554493020880807e-06, + "loss": 0.8917, + "step": 141393 + }, + { + "epoch": 1.7, + "grad_norm": 6.292776856156366, + "learning_rate": 1.1551767248906776e-06, + "loss": 0.9835, + "step": 141396 + }, + { + "epoch": 1.7, + "grad_norm": 5.155253766695385, + "learning_rate": 1.1549041778774738e-06, + "loss": 1.2072, + "step": 141399 + }, + { + "epoch": 1.7, + "grad_norm": 7.5916340550337225, + "learning_rate": 1.1546316610493936e-06, + "loss": 1.2182, + "step": 141402 + }, + { + "epoch": 1.7, + "grad_norm": 15.300518567123842, + "learning_rate": 1.1543591744073712e-06, + "loss": 1.2363, + "step": 141405 + }, + { + "epoch": 1.7, + "grad_norm": 12.085191408601297, + "learning_rate": 1.154086717952333e-06, + "loss": 1.0557, + "step": 141408 + }, + { + "epoch": 1.7, + "grad_norm": 12.77169415305625, + "learning_rate": 1.1538142916852102e-06, + "loss": 0.9232, + "step": 141411 + }, + { + "epoch": 1.7, + "grad_norm": 14.135925945273907, + "learning_rate": 1.1535418956069333e-06, + "loss": 1.0682, + "step": 141414 + }, + { + "epoch": 1.7, + "grad_norm": 10.022698203795285, + "learning_rate": 1.1532695297184338e-06, + "loss": 0.8208, + "step": 141417 + }, + { + "epoch": 1.7, + "grad_norm": 6.096011897819047, + "learning_rate": 1.1529971940206386e-06, + "loss": 1.5066, + "step": 141420 + }, + { + "epoch": 1.7, + "grad_norm": 5.676266730500826, + "learning_rate": 1.1527248885144747e-06, + "loss": 0.9949, + "step": 141423 + }, + { + "epoch": 1.7, + "grad_norm": 11.50375399812828, + "learning_rate": 1.152452613200874e-06, + "loss": 0.9226, + "step": 141426 + }, + { + "epoch": 1.7, + "grad_norm": 5.0177363043069825, + "learning_rate": 1.1521803680807664e-06, + "loss": 0.9456, + "step": 141429 + }, + { + "epoch": 1.7, + "grad_norm": 13.240280714777438, + "learning_rate": 1.1519081531550813e-06, + "loss": 1.2942, + "step": 141432 + }, + { + "epoch": 1.7, + "grad_norm": 14.716893654494179, + "learning_rate": 1.1516359684247459e-06, + "loss": 1.0928, + "step": 141435 + }, + { + "epoch": 1.7, + "grad_norm": 3.271636941830561, + "learning_rate": 1.151363813890688e-06, + "loss": 1.2272, + "step": 141438 + }, + { + "epoch": 1.7, + "grad_norm": 5.083808933118687, + "learning_rate": 1.1510916895538415e-06, + "loss": 1.0002, + "step": 141441 + }, + { + "epoch": 1.7, + "grad_norm": 11.741110087417821, + "learning_rate": 1.150819595415129e-06, + "loss": 1.2215, + "step": 141444 + }, + { + "epoch": 1.7, + "grad_norm": 17.306961681996683, + "learning_rate": 1.1505475314754845e-06, + "loss": 1.0596, + "step": 141447 + }, + { + "epoch": 1.7, + "grad_norm": 2.770584542254314, + "learning_rate": 1.150275497735831e-06, + "loss": 1.2671, + "step": 141450 + }, + { + "epoch": 1.7, + "grad_norm": 4.88578726804761, + "learning_rate": 1.1500034941970995e-06, + "loss": 0.9248, + "step": 141453 + }, + { + "epoch": 1.7, + "grad_norm": 6.11839159368035, + "learning_rate": 1.1497315208602189e-06, + "loss": 1.2166, + "step": 141456 + }, + { + "epoch": 1.7, + "grad_norm": 10.576013787867398, + "learning_rate": 1.1494595777261185e-06, + "loss": 1.1784, + "step": 141459 + }, + { + "epoch": 1.7, + "grad_norm": 4.715956058885488, + "learning_rate": 1.1491876647957244e-06, + "loss": 0.9931, + "step": 141462 + }, + { + "epoch": 1.7, + "grad_norm": 5.209038274838015, + "learning_rate": 1.1489157820699636e-06, + "loss": 1.0712, + "step": 141465 + }, + { + "epoch": 1.7, + "grad_norm": 11.251072079951447, + "learning_rate": 1.1486439295497643e-06, + "loss": 1.4414, + "step": 141468 + }, + { + "epoch": 1.7, + "grad_norm": 9.03144187742117, + "learning_rate": 1.1483721072360554e-06, + "loss": 1.3643, + "step": 141471 + }, + { + "epoch": 1.7, + "grad_norm": 6.794612149559943, + "learning_rate": 1.1481003151297653e-06, + "loss": 1.1751, + "step": 141474 + }, + { + "epoch": 1.7, + "grad_norm": 9.838376876099503, + "learning_rate": 1.1478285532318202e-06, + "loss": 1.1604, + "step": 141477 + }, + { + "epoch": 1.7, + "grad_norm": 10.294225283641621, + "learning_rate": 1.1475568215431465e-06, + "loss": 0.9161, + "step": 141480 + }, + { + "epoch": 1.7, + "grad_norm": 6.4561110591543995, + "learning_rate": 1.147285120064674e-06, + "loss": 1.0792, + "step": 141483 + }, + { + "epoch": 1.7, + "grad_norm": 7.794314372262866, + "learning_rate": 1.1470134487973295e-06, + "loss": 1.1037, + "step": 141486 + }, + { + "epoch": 1.7, + "grad_norm": 6.843607085312923, + "learning_rate": 1.1467418077420378e-06, + "loss": 1.0522, + "step": 141489 + }, + { + "epoch": 1.7, + "grad_norm": 16.01575885813488, + "learning_rate": 1.146470196899726e-06, + "loss": 1.3168, + "step": 141492 + }, + { + "epoch": 1.7, + "grad_norm": 14.373993422589725, + "learning_rate": 1.1461986162713235e-06, + "loss": 0.9967, + "step": 141495 + }, + { + "epoch": 1.7, + "grad_norm": 16.307281864688218, + "learning_rate": 1.1459270658577559e-06, + "loss": 0.8705, + "step": 141498 + }, + { + "epoch": 1.7, + "grad_norm": 5.260204598883092, + "learning_rate": 1.1456555456599516e-06, + "loss": 1.0195, + "step": 141501 + }, + { + "epoch": 1.7, + "grad_norm": 6.467034170417715, + "learning_rate": 1.1453840556788365e-06, + "loss": 0.9134, + "step": 141504 + }, + { + "epoch": 1.7, + "grad_norm": 10.229539965221159, + "learning_rate": 1.1451125959153331e-06, + "loss": 1.1069, + "step": 141507 + }, + { + "epoch": 1.7, + "grad_norm": 11.692530187984309, + "learning_rate": 1.1448411663703707e-06, + "loss": 1.6253, + "step": 141510 + }, + { + "epoch": 1.7, + "grad_norm": 15.618443829725111, + "learning_rate": 1.1445697670448785e-06, + "loss": 0.843, + "step": 141513 + }, + { + "epoch": 1.7, + "grad_norm": 5.454789046671502, + "learning_rate": 1.1442983979397782e-06, + "loss": 0.9313, + "step": 141516 + }, + { + "epoch": 1.7, + "grad_norm": 22.428502390189383, + "learning_rate": 1.1440270590559978e-06, + "loss": 1.0494, + "step": 141519 + }, + { + "epoch": 1.7, + "grad_norm": 3.5562951860656997, + "learning_rate": 1.143755750394462e-06, + "loss": 1.1932, + "step": 141522 + }, + { + "epoch": 1.7, + "grad_norm": 8.395462376064334, + "learning_rate": 1.1434844719561012e-06, + "loss": 1.0573, + "step": 141525 + }, + { + "epoch": 1.7, + "grad_norm": 7.247235628497148, + "learning_rate": 1.1432132237418369e-06, + "loss": 1.2225, + "step": 141528 + }, + { + "epoch": 1.7, + "grad_norm": 15.357286804786746, + "learning_rate": 1.1429420057525931e-06, + "loss": 0.8526, + "step": 141531 + }, + { + "epoch": 1.7, + "grad_norm": 35.34117071658682, + "learning_rate": 1.1426708179892987e-06, + "loss": 1.1095, + "step": 141534 + }, + { + "epoch": 1.7, + "grad_norm": 10.425173916612748, + "learning_rate": 1.1423996604528785e-06, + "loss": 0.8912, + "step": 141537 + }, + { + "epoch": 1.7, + "grad_norm": 7.42818040399294, + "learning_rate": 1.14212853314426e-06, + "loss": 1.0944, + "step": 141540 + }, + { + "epoch": 1.7, + "grad_norm": 6.544928643584821, + "learning_rate": 1.1418574360643641e-06, + "loss": 1.1305, + "step": 141543 + }, + { + "epoch": 1.7, + "grad_norm": 9.458910232690563, + "learning_rate": 1.1415863692141193e-06, + "loss": 1.6641, + "step": 141546 + }, + { + "epoch": 1.7, + "grad_norm": 3.096595523270775, + "learning_rate": 1.141315332594448e-06, + "loss": 1.164, + "step": 141549 + }, + { + "epoch": 1.7, + "grad_norm": 10.482170777188827, + "learning_rate": 1.1410443262062765e-06, + "loss": 1.2634, + "step": 141552 + }, + { + "epoch": 1.7, + "grad_norm": 29.735197497065617, + "learning_rate": 1.1407733500505325e-06, + "loss": 1.197, + "step": 141555 + }, + { + "epoch": 1.7, + "grad_norm": 7.9313893021920165, + "learning_rate": 1.1405024041281342e-06, + "loss": 0.8179, + "step": 141558 + }, + { + "epoch": 1.7, + "grad_norm": 17.787586797846092, + "learning_rate": 1.1402314884400112e-06, + "loss": 1.4615, + "step": 141561 + }, + { + "epoch": 1.7, + "grad_norm": 5.956803667025877, + "learning_rate": 1.1399606029870868e-06, + "loss": 1.2407, + "step": 141564 + }, + { + "epoch": 1.7, + "grad_norm": 9.81311174208428, + "learning_rate": 1.1396897477702874e-06, + "loss": 1.1365, + "step": 141567 + }, + { + "epoch": 1.7, + "grad_norm": 7.015268919049455, + "learning_rate": 1.1394189227905361e-06, + "loss": 1.6666, + "step": 141570 + }, + { + "epoch": 1.7, + "grad_norm": 5.48175854943253, + "learning_rate": 1.1391481280487536e-06, + "loss": 1.2121, + "step": 141573 + }, + { + "epoch": 1.7, + "grad_norm": 13.995392145463116, + "learning_rate": 1.1388773635458672e-06, + "loss": 1.0173, + "step": 141576 + }, + { + "epoch": 1.7, + "grad_norm": 6.470719070324238, + "learning_rate": 1.1386066292828002e-06, + "loss": 0.8665, + "step": 141579 + }, + { + "epoch": 1.7, + "grad_norm": 2.336125573044673, + "learning_rate": 1.1383359252604797e-06, + "loss": 1.2563, + "step": 141582 + }, + { + "epoch": 1.7, + "grad_norm": 4.946459841533299, + "learning_rate": 1.138065251479824e-06, + "loss": 1.3981, + "step": 141585 + }, + { + "epoch": 1.7, + "grad_norm": 16.183389130290703, + "learning_rate": 1.1377946079417613e-06, + "loss": 1.117, + "step": 141588 + }, + { + "epoch": 1.7, + "grad_norm": 16.344885055800187, + "learning_rate": 1.1375239946472127e-06, + "loss": 0.9675, + "step": 141591 + }, + { + "epoch": 1.7, + "grad_norm": 24.25783464454022, + "learning_rate": 1.1372534115971034e-06, + "loss": 0.9524, + "step": 141594 + }, + { + "epoch": 1.7, + "grad_norm": 10.784646811354607, + "learning_rate": 1.1369828587923537e-06, + "loss": 0.9798, + "step": 141597 + }, + { + "epoch": 1.7, + "grad_norm": 8.165060772670378, + "learning_rate": 1.1367123362338894e-06, + "loss": 0.9487, + "step": 141600 + }, + { + "epoch": 1.7, + "grad_norm": 15.506028484602032, + "learning_rate": 1.1364418439226343e-06, + "loss": 1.2761, + "step": 141603 + }, + { + "epoch": 1.7, + "grad_norm": 6.641906339962583, + "learning_rate": 1.1361713818595088e-06, + "loss": 1.3518, + "step": 141606 + }, + { + "epoch": 1.7, + "grad_norm": 14.911732879739557, + "learning_rate": 1.135900950045441e-06, + "loss": 1.1631, + "step": 141609 + }, + { + "epoch": 1.7, + "grad_norm": 11.490193224814282, + "learning_rate": 1.1356305484813502e-06, + "loss": 1.1856, + "step": 141612 + }, + { + "epoch": 1.7, + "grad_norm": 15.860478128571541, + "learning_rate": 1.1353601771681566e-06, + "loss": 0.9188, + "step": 141615 + }, + { + "epoch": 1.7, + "grad_norm": 8.303871966559091, + "learning_rate": 1.1350898361067864e-06, + "loss": 1.2289, + "step": 141618 + }, + { + "epoch": 1.7, + "grad_norm": 3.5585359901207907, + "learning_rate": 1.1348195252981631e-06, + "loss": 1.1773, + "step": 141621 + }, + { + "epoch": 1.7, + "grad_norm": 7.989507290542982, + "learning_rate": 1.134549244743206e-06, + "loss": 1.2496, + "step": 141624 + }, + { + "epoch": 1.7, + "grad_norm": 21.331038495823755, + "learning_rate": 1.134278994442839e-06, + "loss": 1.3946, + "step": 141627 + }, + { + "epoch": 1.7, + "grad_norm": 24.070620653753075, + "learning_rate": 1.1340087743979845e-06, + "loss": 1.1275, + "step": 141630 + }, + { + "epoch": 1.7, + "grad_norm": 14.065433056334896, + "learning_rate": 1.133738584609566e-06, + "loss": 1.1705, + "step": 141633 + }, + { + "epoch": 1.7, + "grad_norm": 71.48459113649537, + "learning_rate": 1.1334684250785044e-06, + "loss": 1.0129, + "step": 141636 + }, + { + "epoch": 1.7, + "grad_norm": 2.305862965121198, + "learning_rate": 1.1331982958057186e-06, + "loss": 1.5964, + "step": 141639 + }, + { + "epoch": 1.7, + "grad_norm": 5.855559079561672, + "learning_rate": 1.1329281967921346e-06, + "loss": 0.9882, + "step": 141642 + }, + { + "epoch": 1.7, + "grad_norm": 8.79931967745037, + "learning_rate": 1.1326581280386716e-06, + "loss": 0.9804, + "step": 141645 + }, + { + "epoch": 1.7, + "grad_norm": 3.707625589403714, + "learning_rate": 1.1323880895462548e-06, + "loss": 1.1193, + "step": 141648 + }, + { + "epoch": 1.7, + "grad_norm": 3.183787503374067, + "learning_rate": 1.1321180813158016e-06, + "loss": 1.2377, + "step": 141651 + }, + { + "epoch": 1.7, + "grad_norm": 4.5428856465801655, + "learning_rate": 1.1318481033482364e-06, + "loss": 1.3251, + "step": 141654 + }, + { + "epoch": 1.7, + "grad_norm": 14.360750919364584, + "learning_rate": 1.1315781556444783e-06, + "loss": 0.8457, + "step": 141657 + }, + { + "epoch": 1.7, + "grad_norm": 3.16927575121037, + "learning_rate": 1.1313082382054486e-06, + "loss": 1.02, + "step": 141660 + }, + { + "epoch": 1.7, + "grad_norm": 3.00674009439608, + "learning_rate": 1.1310383510320722e-06, + "loss": 1.1376, + "step": 141663 + }, + { + "epoch": 1.7, + "grad_norm": 6.434080066037922, + "learning_rate": 1.130768494125265e-06, + "loss": 1.3303, + "step": 141666 + }, + { + "epoch": 1.7, + "grad_norm": 4.868588838136668, + "learning_rate": 1.1304986674859508e-06, + "loss": 1.2872, + "step": 141669 + }, + { + "epoch": 1.7, + "grad_norm": 3.7062437150763388, + "learning_rate": 1.130228871115049e-06, + "loss": 1.1958, + "step": 141672 + }, + { + "epoch": 1.7, + "grad_norm": 2.3129623824888967, + "learning_rate": 1.1299591050134839e-06, + "loss": 1.0577, + "step": 141675 + }, + { + "epoch": 1.7, + "grad_norm": 1.8535589489826234, + "learning_rate": 1.1296893691821731e-06, + "loss": 1.1948, + "step": 141678 + }, + { + "epoch": 1.7, + "grad_norm": 8.291335477997123, + "learning_rate": 1.1294196636220356e-06, + "loss": 1.1628, + "step": 141681 + }, + { + "epoch": 1.7, + "grad_norm": 15.245774944757565, + "learning_rate": 1.129149988333993e-06, + "loss": 1.0895, + "step": 141684 + }, + { + "epoch": 1.7, + "grad_norm": 6.7952464549188365, + "learning_rate": 1.1288803433189666e-06, + "loss": 1.1975, + "step": 141687 + }, + { + "epoch": 1.7, + "grad_norm": 7.4958289704602885, + "learning_rate": 1.128610728577878e-06, + "loss": 1.0874, + "step": 141690 + }, + { + "epoch": 1.7, + "grad_norm": 13.890950174953922, + "learning_rate": 1.1283411441116443e-06, + "loss": 1.5811, + "step": 141693 + }, + { + "epoch": 1.7, + "grad_norm": 11.266974267560887, + "learning_rate": 1.128071589921188e-06, + "loss": 1.402, + "step": 141696 + }, + { + "epoch": 1.7, + "grad_norm": 9.278450092801455, + "learning_rate": 1.1278020660074252e-06, + "loss": 1.1321, + "step": 141699 + }, + { + "epoch": 1.7, + "grad_norm": 16.146399852269994, + "learning_rate": 1.1275325723712816e-06, + "loss": 1.0083, + "step": 141702 + }, + { + "epoch": 1.7, + "grad_norm": 25.501745042631423, + "learning_rate": 1.127263109013671e-06, + "loss": 1.2424, + "step": 141705 + }, + { + "epoch": 1.7, + "grad_norm": 5.6740833847642325, + "learning_rate": 1.1269936759355149e-06, + "loss": 1.1105, + "step": 141708 + }, + { + "epoch": 1.7, + "grad_norm": 4.100622510831035, + "learning_rate": 1.1267242731377337e-06, + "loss": 1.2475, + "step": 141711 + }, + { + "epoch": 1.7, + "grad_norm": 5.651798341531607, + "learning_rate": 1.1264549006212466e-06, + "loss": 1.1328, + "step": 141714 + }, + { + "epoch": 1.7, + "grad_norm": 5.034351289681017, + "learning_rate": 1.126185558386974e-06, + "loss": 1.497, + "step": 141717 + }, + { + "epoch": 1.7, + "grad_norm": 4.185033039327751, + "learning_rate": 1.1259162464358342e-06, + "loss": 1.0681, + "step": 141720 + }, + { + "epoch": 1.7, + "grad_norm": 8.398332270378928, + "learning_rate": 1.125646964768744e-06, + "loss": 1.0069, + "step": 141723 + }, + { + "epoch": 1.7, + "grad_norm": 2.3117930286990642, + "learning_rate": 1.125377713386624e-06, + "loss": 0.9722, + "step": 141726 + }, + { + "epoch": 1.7, + "grad_norm": 5.100169563469532, + "learning_rate": 1.1251084922903954e-06, + "loss": 1.425, + "step": 141729 + }, + { + "epoch": 1.7, + "grad_norm": 5.4728254040119975, + "learning_rate": 1.1248393014809722e-06, + "loss": 1.0389, + "step": 141732 + }, + { + "epoch": 1.7, + "grad_norm": 5.061101330887574, + "learning_rate": 1.1245701409592768e-06, + "loss": 0.8694, + "step": 141735 + }, + { + "epoch": 1.7, + "grad_norm": 8.054403414796543, + "learning_rate": 1.1243010107262275e-06, + "loss": 1.0587, + "step": 141738 + }, + { + "epoch": 1.7, + "grad_norm": 11.714743256314497, + "learning_rate": 1.1240319107827403e-06, + "loss": 1.2865, + "step": 141741 + }, + { + "epoch": 1.7, + "grad_norm": 9.752506619272268, + "learning_rate": 1.1237628411297374e-06, + "loss": 1.2643, + "step": 141744 + }, + { + "epoch": 1.7, + "grad_norm": 25.61333875808873, + "learning_rate": 1.1234938017681318e-06, + "loss": 1.4041, + "step": 141747 + }, + { + "epoch": 1.7, + "grad_norm": 6.708959527504319, + "learning_rate": 1.123224792698846e-06, + "loss": 1.2786, + "step": 141750 + }, + { + "epoch": 1.7, + "grad_norm": 5.047115946231151, + "learning_rate": 1.1229558139227947e-06, + "loss": 1.0517, + "step": 141753 + }, + { + "epoch": 1.7, + "grad_norm": 6.08784536542987, + "learning_rate": 1.1226868654409007e-06, + "loss": 1.2015, + "step": 141756 + }, + { + "epoch": 1.7, + "grad_norm": 12.882492998479627, + "learning_rate": 1.1224179472540775e-06, + "loss": 1.194, + "step": 141759 + }, + { + "epoch": 1.7, + "grad_norm": 3.4332682245517914, + "learning_rate": 1.1221490593632456e-06, + "loss": 1.2035, + "step": 141762 + }, + { + "epoch": 1.7, + "grad_norm": 8.694091746508755, + "learning_rate": 1.1218802017693186e-06, + "loss": 1.1742, + "step": 141765 + }, + { + "epoch": 1.7, + "grad_norm": 9.62370655420778, + "learning_rate": 1.1216113744732171e-06, + "loss": 1.2933, + "step": 141768 + }, + { + "epoch": 1.7, + "grad_norm": 7.772378253081004, + "learning_rate": 1.12134257747586e-06, + "loss": 0.8565, + "step": 141771 + }, + { + "epoch": 1.7, + "grad_norm": 5.066829963321706, + "learning_rate": 1.1210738107781605e-06, + "loss": 1.0897, + "step": 141774 + }, + { + "epoch": 1.7, + "grad_norm": 10.528849846943377, + "learning_rate": 1.1208050743810383e-06, + "loss": 0.9612, + "step": 141777 + }, + { + "epoch": 1.7, + "grad_norm": 14.731270148552069, + "learning_rate": 1.120536368285412e-06, + "loss": 0.6995, + "step": 141780 + }, + { + "epoch": 1.7, + "grad_norm": 13.76374213160221, + "learning_rate": 1.1202676924921962e-06, + "loss": 1.1057, + "step": 141783 + }, + { + "epoch": 1.7, + "grad_norm": 9.353061623059153, + "learning_rate": 1.119999047002306e-06, + "loss": 1.2992, + "step": 141786 + }, + { + "epoch": 1.7, + "grad_norm": 6.118661475029135, + "learning_rate": 1.1197304318166613e-06, + "loss": 1.4242, + "step": 141789 + }, + { + "epoch": 1.71, + "grad_norm": 9.308937530911846, + "learning_rate": 1.1194618469361784e-06, + "loss": 1.1877, + "step": 141792 + }, + { + "epoch": 1.71, + "grad_norm": 9.260814329975462, + "learning_rate": 1.119193292361772e-06, + "loss": 1.2407, + "step": 141795 + }, + { + "epoch": 1.71, + "grad_norm": 3.5288705857117812, + "learning_rate": 1.1189247680943627e-06, + "loss": 1.014, + "step": 141798 + }, + { + "epoch": 1.71, + "grad_norm": 5.898596139960817, + "learning_rate": 1.1186562741348629e-06, + "loss": 1.4139, + "step": 141801 + }, + { + "epoch": 1.71, + "grad_norm": 3.626098181310465, + "learning_rate": 1.1183878104841916e-06, + "loss": 1.3483, + "step": 141804 + }, + { + "epoch": 1.71, + "grad_norm": 12.607850756918578, + "learning_rate": 1.1181193771432619e-06, + "loss": 1.3026, + "step": 141807 + }, + { + "epoch": 1.71, + "grad_norm": 4.7669325219159, + "learning_rate": 1.1178509741129929e-06, + "loss": 0.9775, + "step": 141810 + }, + { + "epoch": 1.71, + "grad_norm": 10.241812401343399, + "learning_rate": 1.117582601394298e-06, + "loss": 0.8578, + "step": 141813 + }, + { + "epoch": 1.71, + "grad_norm": 14.638018803855887, + "learning_rate": 1.1173142589880937e-06, + "loss": 0.9115, + "step": 141816 + }, + { + "epoch": 1.71, + "grad_norm": 6.831182169740074, + "learning_rate": 1.1170459468952965e-06, + "loss": 1.1013, + "step": 141819 + }, + { + "epoch": 1.71, + "grad_norm": 3.6846786794941613, + "learning_rate": 1.1167776651168239e-06, + "loss": 1.1882, + "step": 141822 + }, + { + "epoch": 1.71, + "grad_norm": 6.030870948562179, + "learning_rate": 1.1165094136535892e-06, + "loss": 1.0009, + "step": 141825 + }, + { + "epoch": 1.71, + "grad_norm": 13.288215396589475, + "learning_rate": 1.1162411925065053e-06, + "loss": 1.1596, + "step": 141828 + }, + { + "epoch": 1.71, + "grad_norm": 9.937835886646639, + "learning_rate": 1.1159730016764913e-06, + "loss": 1.164, + "step": 141831 + }, + { + "epoch": 1.71, + "grad_norm": 6.289947931665385, + "learning_rate": 1.115704841164461e-06, + "loss": 1.0363, + "step": 141834 + }, + { + "epoch": 1.71, + "grad_norm": 10.320292562890554, + "learning_rate": 1.1154367109713326e-06, + "loss": 1.2863, + "step": 141837 + }, + { + "epoch": 1.71, + "grad_norm": 10.486717949695908, + "learning_rate": 1.1151686110980164e-06, + "loss": 1.2618, + "step": 141840 + }, + { + "epoch": 1.71, + "grad_norm": 8.687516516231828, + "learning_rate": 1.1149005415454283e-06, + "loss": 1.2995, + "step": 141843 + }, + { + "epoch": 1.71, + "grad_norm": 6.279732272951534, + "learning_rate": 1.1146325023144877e-06, + "loss": 1.0117, + "step": 141846 + }, + { + "epoch": 1.71, + "grad_norm": 6.026663027663815, + "learning_rate": 1.1143644934061027e-06, + "loss": 1.078, + "step": 141849 + }, + { + "epoch": 1.71, + "grad_norm": 11.06123496703247, + "learning_rate": 1.1140965148211945e-06, + "loss": 0.8693, + "step": 141852 + }, + { + "epoch": 1.71, + "grad_norm": 7.700799533348587, + "learning_rate": 1.1138285665606707e-06, + "loss": 1.1049, + "step": 141855 + }, + { + "epoch": 1.71, + "grad_norm": 8.177766333648362, + "learning_rate": 1.1135606486254502e-06, + "loss": 1.0145, + "step": 141858 + }, + { + "epoch": 1.71, + "grad_norm": 13.514320974884185, + "learning_rate": 1.1132927610164457e-06, + "loss": 1.2739, + "step": 141861 + }, + { + "epoch": 1.71, + "grad_norm": 15.406583251968673, + "learning_rate": 1.113024903734574e-06, + "loss": 1.4066, + "step": 141864 + }, + { + "epoch": 1.71, + "grad_norm": 12.717282453738166, + "learning_rate": 1.1127570767807462e-06, + "loss": 1.3154, + "step": 141867 + }, + { + "epoch": 1.71, + "grad_norm": 5.4482384272737105, + "learning_rate": 1.1124892801558784e-06, + "loss": 1.3753, + "step": 141870 + }, + { + "epoch": 1.71, + "grad_norm": 24.921277723433434, + "learning_rate": 1.1122215138608816e-06, + "loss": 1.0448, + "step": 141873 + }, + { + "epoch": 1.71, + "grad_norm": 13.444476346334007, + "learning_rate": 1.1119537778966717e-06, + "loss": 1.1574, + "step": 141876 + }, + { + "epoch": 1.71, + "grad_norm": 4.398567424499244, + "learning_rate": 1.1116860722641643e-06, + "loss": 1.1464, + "step": 141879 + }, + { + "epoch": 1.71, + "grad_norm": 23.688071333493333, + "learning_rate": 1.1114183969642677e-06, + "loss": 0.943, + "step": 141882 + }, + { + "epoch": 1.71, + "grad_norm": 21.05428638829303, + "learning_rate": 1.1111507519978993e-06, + "loss": 1.365, + "step": 141885 + }, + { + "epoch": 1.71, + "grad_norm": 3.330356292334608, + "learning_rate": 1.1108831373659735e-06, + "loss": 1.3814, + "step": 141888 + }, + { + "epoch": 1.71, + "grad_norm": 11.313754996238897, + "learning_rate": 1.1106155530694018e-06, + "loss": 1.2025, + "step": 141891 + }, + { + "epoch": 1.71, + "grad_norm": 7.096079843733277, + "learning_rate": 1.1103479991090959e-06, + "loss": 1.2367, + "step": 141894 + }, + { + "epoch": 1.71, + "grad_norm": 11.134720854321815, + "learning_rate": 1.1100804754859695e-06, + "loss": 1.2885, + "step": 141897 + }, + { + "epoch": 1.71, + "grad_norm": 8.430516353005842, + "learning_rate": 1.1098129822009361e-06, + "loss": 0.8717, + "step": 141900 + }, + { + "epoch": 1.71, + "grad_norm": 5.6418340864735335, + "learning_rate": 1.1095455192549088e-06, + "loss": 0.8822, + "step": 141903 + }, + { + "epoch": 1.71, + "grad_norm": 3.0556707392395692, + "learning_rate": 1.109278086648803e-06, + "loss": 1.0525, + "step": 141906 + }, + { + "epoch": 1.71, + "grad_norm": 18.44599207908168, + "learning_rate": 1.1090106843835258e-06, + "loss": 1.1995, + "step": 141909 + }, + { + "epoch": 1.71, + "grad_norm": 6.774876957479651, + "learning_rate": 1.1087433124599944e-06, + "loss": 1.025, + "step": 141912 + }, + { + "epoch": 1.71, + "grad_norm": 61.691207596357614, + "learning_rate": 1.1084759708791183e-06, + "loss": 1.044, + "step": 141915 + }, + { + "epoch": 1.71, + "grad_norm": 20.174951089380293, + "learning_rate": 1.108208659641813e-06, + "loss": 0.7051, + "step": 141918 + }, + { + "epoch": 1.71, + "grad_norm": 12.950994324845324, + "learning_rate": 1.1079413787489867e-06, + "loss": 1.2241, + "step": 141921 + }, + { + "epoch": 1.71, + "grad_norm": 15.094302622276917, + "learning_rate": 1.1076741282015536e-06, + "loss": 0.9966, + "step": 141924 + }, + { + "epoch": 1.71, + "grad_norm": 8.47140836185496, + "learning_rate": 1.1074069080004246e-06, + "loss": 1.1659, + "step": 141927 + }, + { + "epoch": 1.71, + "grad_norm": 6.745407454409479, + "learning_rate": 1.107139718146516e-06, + "loss": 1.1915, + "step": 141930 + }, + { + "epoch": 1.71, + "grad_norm": 8.487493062184324, + "learning_rate": 1.1068725586407359e-06, + "loss": 1.1748, + "step": 141933 + }, + { + "epoch": 1.71, + "grad_norm": 8.031012050783751, + "learning_rate": 1.1066054294839944e-06, + "loss": 1.4486, + "step": 141936 + }, + { + "epoch": 1.71, + "grad_norm": 9.342593067364295, + "learning_rate": 1.1063383306772046e-06, + "loss": 1.3857, + "step": 141939 + }, + { + "epoch": 1.71, + "grad_norm": 4.277911494744856, + "learning_rate": 1.10607126222128e-06, + "loss": 1.0319, + "step": 141942 + }, + { + "epoch": 1.71, + "grad_norm": 4.055308792703492, + "learning_rate": 1.1058042241171319e-06, + "loss": 1.3076, + "step": 141945 + }, + { + "epoch": 1.71, + "grad_norm": 11.337303022511286, + "learning_rate": 1.1055372163656685e-06, + "loss": 1.2148, + "step": 141948 + }, + { + "epoch": 1.71, + "grad_norm": 8.030261312924859, + "learning_rate": 1.1052702389678017e-06, + "loss": 0.9613, + "step": 141951 + }, + { + "epoch": 1.71, + "grad_norm": 7.404893370130653, + "learning_rate": 1.1050032919244469e-06, + "loss": 1.2817, + "step": 141954 + }, + { + "epoch": 1.71, + "grad_norm": 10.314458511333783, + "learning_rate": 1.1047363752365093e-06, + "loss": 1.0008, + "step": 141957 + }, + { + "epoch": 1.71, + "grad_norm": 4.090670566438053, + "learning_rate": 1.1044694889049057e-06, + "loss": 0.9835, + "step": 141960 + }, + { + "epoch": 1.71, + "grad_norm": 2.3039992541088496, + "learning_rate": 1.10420263293054e-06, + "loss": 1.5242, + "step": 141963 + }, + { + "epoch": 1.71, + "grad_norm": 12.51929213015918, + "learning_rate": 1.1039358073143268e-06, + "loss": 1.5447, + "step": 141966 + }, + { + "epoch": 1.71, + "grad_norm": 3.6469464298887337, + "learning_rate": 1.103669012057177e-06, + "loss": 1.2495, + "step": 141969 + }, + { + "epoch": 1.71, + "grad_norm": 8.963681500192633, + "learning_rate": 1.1034022471600014e-06, + "loss": 1.1071, + "step": 141972 + }, + { + "epoch": 1.71, + "grad_norm": 4.030512610905058, + "learning_rate": 1.103135512623711e-06, + "loss": 1.0, + "step": 141975 + }, + { + "epoch": 1.71, + "grad_norm": 10.427497051761767, + "learning_rate": 1.1028688084492112e-06, + "loss": 1.0732, + "step": 141978 + }, + { + "epoch": 1.71, + "grad_norm": 4.127115483793656, + "learning_rate": 1.102602134637415e-06, + "loss": 1.6887, + "step": 141981 + }, + { + "epoch": 1.71, + "grad_norm": 9.342201676371413, + "learning_rate": 1.1023354911892336e-06, + "loss": 1.089, + "step": 141984 + }, + { + "epoch": 1.71, + "grad_norm": 3.225200807124814, + "learning_rate": 1.1020688781055788e-06, + "loss": 1.1374, + "step": 141987 + }, + { + "epoch": 1.71, + "grad_norm": 13.961231620328356, + "learning_rate": 1.1018022953873553e-06, + "loss": 1.1037, + "step": 141990 + }, + { + "epoch": 1.71, + "grad_norm": 8.855903112396657, + "learning_rate": 1.1015357430354745e-06, + "loss": 1.1752, + "step": 141993 + }, + { + "epoch": 1.71, + "grad_norm": 7.910066571780538, + "learning_rate": 1.1012692210508501e-06, + "loss": 1.5009, + "step": 141996 + }, + { + "epoch": 1.71, + "grad_norm": 6.904624153346932, + "learning_rate": 1.1010027294343883e-06, + "loss": 1.1583, + "step": 141999 + }, + { + "epoch": 1.71, + "grad_norm": 6.408181400889133, + "learning_rate": 1.100736268186997e-06, + "loss": 0.952, + "step": 142002 + }, + { + "epoch": 1.71, + "grad_norm": 6.697766280533172, + "learning_rate": 1.1004698373095878e-06, + "loss": 1.3795, + "step": 142005 + }, + { + "epoch": 1.71, + "grad_norm": 13.114651936261605, + "learning_rate": 1.100203436803069e-06, + "loss": 1.0222, + "step": 142008 + }, + { + "epoch": 1.71, + "grad_norm": 4.713194345260701, + "learning_rate": 1.0999370666683495e-06, + "loss": 1.0523, + "step": 142011 + }, + { + "epoch": 1.71, + "grad_norm": 5.362890308084627, + "learning_rate": 1.099670726906341e-06, + "loss": 1.1869, + "step": 142014 + }, + { + "epoch": 1.71, + "grad_norm": 235.66665777797604, + "learning_rate": 1.0994044175179518e-06, + "loss": 1.0622, + "step": 142017 + }, + { + "epoch": 1.71, + "grad_norm": 7.766536949863386, + "learning_rate": 1.0991381385040867e-06, + "loss": 1.0969, + "step": 142020 + }, + { + "epoch": 1.71, + "grad_norm": 9.839817486285003, + "learning_rate": 1.0988718898656558e-06, + "loss": 1.2301, + "step": 142023 + }, + { + "epoch": 1.71, + "grad_norm": 8.039529337294908, + "learning_rate": 1.098605671603572e-06, + "loss": 1.1748, + "step": 142026 + }, + { + "epoch": 1.71, + "grad_norm": 4.162920915698096, + "learning_rate": 1.098339483718739e-06, + "loss": 0.9748, + "step": 142029 + }, + { + "epoch": 1.71, + "grad_norm": 11.135026528254166, + "learning_rate": 1.098073326212067e-06, + "loss": 1.4246, + "step": 142032 + }, + { + "epoch": 1.71, + "grad_norm": 41.65400729616598, + "learning_rate": 1.097807199084464e-06, + "loss": 1.3793, + "step": 142035 + }, + { + "epoch": 1.71, + "grad_norm": 15.639839682469356, + "learning_rate": 1.097541102336841e-06, + "loss": 1.0072, + "step": 142038 + }, + { + "epoch": 1.71, + "grad_norm": 6.948601814780806, + "learning_rate": 1.0972750359701024e-06, + "loss": 0.7572, + "step": 142041 + }, + { + "epoch": 1.71, + "grad_norm": 4.046233943588356, + "learning_rate": 1.0970089999851563e-06, + "loss": 1.0625, + "step": 142044 + }, + { + "epoch": 1.71, + "grad_norm": 2.4590291119829453, + "learning_rate": 1.0967429943829111e-06, + "loss": 1.1427, + "step": 142047 + }, + { + "epoch": 1.71, + "grad_norm": 11.468702073186575, + "learning_rate": 1.0964770191642759e-06, + "loss": 1.178, + "step": 142050 + }, + { + "epoch": 1.71, + "grad_norm": 6.3767413234684795, + "learning_rate": 1.096211074330159e-06, + "loss": 0.9059, + "step": 142053 + }, + { + "epoch": 1.71, + "grad_norm": 6.0176704175707325, + "learning_rate": 1.0959451598814642e-06, + "loss": 0.9758, + "step": 142056 + }, + { + "epoch": 1.71, + "grad_norm": 11.887141690285773, + "learning_rate": 1.0956792758191038e-06, + "loss": 0.9723, + "step": 142059 + }, + { + "epoch": 1.71, + "grad_norm": 8.544977268065935, + "learning_rate": 1.0954134221439805e-06, + "loss": 1.1198, + "step": 142062 + }, + { + "epoch": 1.71, + "grad_norm": 39.14418699255173, + "learning_rate": 1.0951475988570048e-06, + "loss": 1.2169, + "step": 142065 + }, + { + "epoch": 1.71, + "grad_norm": 7.473245003787088, + "learning_rate": 1.0948818059590838e-06, + "loss": 1.1103, + "step": 142068 + }, + { + "epoch": 1.71, + "grad_norm": 10.181648858207184, + "learning_rate": 1.094616043451122e-06, + "loss": 1.0723, + "step": 142071 + }, + { + "epoch": 1.71, + "grad_norm": 10.680696162625834, + "learning_rate": 1.094350311334027e-06, + "loss": 1.0522, + "step": 142074 + }, + { + "epoch": 1.71, + "grad_norm": 5.466148812467794, + "learning_rate": 1.0940846096087077e-06, + "loss": 1.3291, + "step": 142077 + }, + { + "epoch": 1.71, + "grad_norm": 5.169668398796424, + "learning_rate": 1.093818938276071e-06, + "loss": 0.9781, + "step": 142080 + }, + { + "epoch": 1.71, + "grad_norm": 6.2608345019353635, + "learning_rate": 1.0935532973370233e-06, + "loss": 1.1344, + "step": 142083 + }, + { + "epoch": 1.71, + "grad_norm": 11.976263164911783, + "learning_rate": 1.0932876867924669e-06, + "loss": 0.9145, + "step": 142086 + }, + { + "epoch": 1.71, + "grad_norm": 15.343636637846613, + "learning_rate": 1.0930221066433123e-06, + "loss": 0.8473, + "step": 142089 + }, + { + "epoch": 1.71, + "grad_norm": 36.453502906239855, + "learning_rate": 1.0927565568904653e-06, + "loss": 1.2604, + "step": 142092 + }, + { + "epoch": 1.71, + "grad_norm": 9.36205595104988, + "learning_rate": 1.092491037534833e-06, + "loss": 0.9316, + "step": 142095 + }, + { + "epoch": 1.71, + "grad_norm": 10.940313120381031, + "learning_rate": 1.0922255485773182e-06, + "loss": 1.0919, + "step": 142098 + }, + { + "epoch": 1.71, + "grad_norm": 6.775557779476942, + "learning_rate": 1.091960090018831e-06, + "loss": 1.1332, + "step": 142101 + }, + { + "epoch": 1.71, + "grad_norm": 7.71325914987952, + "learning_rate": 1.0916946618602743e-06, + "loss": 1.2483, + "step": 142104 + }, + { + "epoch": 1.71, + "grad_norm": 5.9435183367322, + "learning_rate": 1.0914292641025559e-06, + "loss": 0.8175, + "step": 142107 + }, + { + "epoch": 1.71, + "grad_norm": 17.774632766846505, + "learning_rate": 1.0911638967465788e-06, + "loss": 1.061, + "step": 142110 + }, + { + "epoch": 1.71, + "grad_norm": 11.622417703894879, + "learning_rate": 1.090898559793251e-06, + "loss": 1.1083, + "step": 142113 + }, + { + "epoch": 1.71, + "grad_norm": 15.324486794845853, + "learning_rate": 1.090633253243477e-06, + "loss": 1.1213, + "step": 142116 + }, + { + "epoch": 1.71, + "grad_norm": 6.702338727834006, + "learning_rate": 1.0903679770981624e-06, + "loss": 1.0007, + "step": 142119 + }, + { + "epoch": 1.71, + "grad_norm": 6.761674516034915, + "learning_rate": 1.0901027313582147e-06, + "loss": 0.9842, + "step": 142122 + }, + { + "epoch": 1.71, + "grad_norm": 12.320267797496413, + "learning_rate": 1.0898375160245367e-06, + "loss": 1.2359, + "step": 142125 + }, + { + "epoch": 1.71, + "grad_norm": 13.159378174615794, + "learning_rate": 1.0895723310980322e-06, + "loss": 1.4017, + "step": 142128 + }, + { + "epoch": 1.71, + "grad_norm": 9.557288481961242, + "learning_rate": 1.0893071765796082e-06, + "loss": 0.9913, + "step": 142131 + }, + { + "epoch": 1.71, + "grad_norm": 10.210802223222128, + "learning_rate": 1.0890420524701706e-06, + "loss": 1.1287, + "step": 142134 + }, + { + "epoch": 1.71, + "grad_norm": 4.719754659501419, + "learning_rate": 1.088776958770621e-06, + "loss": 1.0683, + "step": 142137 + }, + { + "epoch": 1.71, + "grad_norm": 3.4186816088991288, + "learning_rate": 1.0885118954818663e-06, + "loss": 1.0801, + "step": 142140 + }, + { + "epoch": 1.71, + "grad_norm": 2.767975835081519, + "learning_rate": 1.0882468626048116e-06, + "loss": 1.2374, + "step": 142143 + }, + { + "epoch": 1.71, + "grad_norm": 3.739395360881488, + "learning_rate": 1.0879818601403591e-06, + "loss": 1.1408, + "step": 142146 + }, + { + "epoch": 1.71, + "grad_norm": 6.85692768345659, + "learning_rate": 1.0877168880894162e-06, + "loss": 1.3248, + "step": 142149 + }, + { + "epoch": 1.71, + "grad_norm": 2.980583844001951, + "learning_rate": 1.0874519464528832e-06, + "loss": 1.19, + "step": 142152 + }, + { + "epoch": 1.71, + "grad_norm": 10.239026677863079, + "learning_rate": 1.0871870352316671e-06, + "loss": 1.0571, + "step": 142155 + }, + { + "epoch": 1.71, + "grad_norm": 6.992077880684723, + "learning_rate": 1.0869221544266706e-06, + "loss": 1.581, + "step": 142158 + }, + { + "epoch": 1.71, + "grad_norm": 3.4346434458053325, + "learning_rate": 1.0866573040388007e-06, + "loss": 1.3202, + "step": 142161 + }, + { + "epoch": 1.71, + "grad_norm": 11.018367298600921, + "learning_rate": 1.0863924840689567e-06, + "loss": 1.4167, + "step": 142164 + }, + { + "epoch": 1.71, + "grad_norm": 6.351131882681143, + "learning_rate": 1.0861276945180466e-06, + "loss": 1.2129, + "step": 142167 + }, + { + "epoch": 1.71, + "grad_norm": 6.417934367538696, + "learning_rate": 1.08586293538697e-06, + "loss": 1.0437, + "step": 142170 + }, + { + "epoch": 1.71, + "grad_norm": 6.555254819765438, + "learning_rate": 1.0855982066766314e-06, + "loss": 1.176, + "step": 142173 + }, + { + "epoch": 1.71, + "grad_norm": 10.150419301345732, + "learning_rate": 1.085333508387938e-06, + "loss": 1.3232, + "step": 142176 + }, + { + "epoch": 1.71, + "grad_norm": 7.5091299358345935, + "learning_rate": 1.085068840521788e-06, + "loss": 1.0141, + "step": 142179 + }, + { + "epoch": 1.71, + "grad_norm": 3.5512646287830254, + "learning_rate": 1.0848042030790873e-06, + "loss": 0.9179, + "step": 142182 + }, + { + "epoch": 1.71, + "grad_norm": 9.124741817924034, + "learning_rate": 1.0845395960607397e-06, + "loss": 0.889, + "step": 142185 + }, + { + "epoch": 1.71, + "grad_norm": 5.9824617997920155, + "learning_rate": 1.0842750194676465e-06, + "loss": 1.287, + "step": 142188 + }, + { + "epoch": 1.71, + "grad_norm": 3.680033289669779, + "learning_rate": 1.0840104733007118e-06, + "loss": 1.0842, + "step": 142191 + }, + { + "epoch": 1.71, + "grad_norm": 10.74059734283529, + "learning_rate": 1.0837459575608367e-06, + "loss": 0.9002, + "step": 142194 + }, + { + "epoch": 1.71, + "grad_norm": 7.272403601066777, + "learning_rate": 1.083481472248925e-06, + "loss": 1.0776, + "step": 142197 + }, + { + "epoch": 1.71, + "grad_norm": 10.495806334533022, + "learning_rate": 1.0832170173658795e-06, + "loss": 1.2409, + "step": 142200 + }, + { + "epoch": 1.71, + "grad_norm": 4.738429516492516, + "learning_rate": 1.082952592912604e-06, + "loss": 1.0013, + "step": 142203 + }, + { + "epoch": 1.71, + "grad_norm": 5.250868547855101, + "learning_rate": 1.0826881988899974e-06, + "loss": 1.4159, + "step": 142206 + }, + { + "epoch": 1.71, + "grad_norm": 4.022288927444707, + "learning_rate": 1.0824238352989668e-06, + "loss": 1.1937, + "step": 142209 + }, + { + "epoch": 1.71, + "grad_norm": 6.013896209580126, + "learning_rate": 1.0821595021404085e-06, + "loss": 1.1675, + "step": 142212 + }, + { + "epoch": 1.71, + "grad_norm": 3.3474474921764723, + "learning_rate": 1.0818951994152304e-06, + "loss": 1.0709, + "step": 142215 + }, + { + "epoch": 1.71, + "grad_norm": 5.866398670894667, + "learning_rate": 1.0816309271243286e-06, + "loss": 0.9711, + "step": 142218 + }, + { + "epoch": 1.71, + "grad_norm": 9.095988968756195, + "learning_rate": 1.0813666852686089e-06, + "loss": 1.4235, + "step": 142221 + }, + { + "epoch": 1.71, + "grad_norm": 11.389028733136007, + "learning_rate": 1.0811024738489727e-06, + "loss": 1.2863, + "step": 142224 + }, + { + "epoch": 1.71, + "grad_norm": 11.935070140331872, + "learning_rate": 1.0808382928663208e-06, + "loss": 1.4463, + "step": 142227 + }, + { + "epoch": 1.71, + "grad_norm": 8.872354952837425, + "learning_rate": 1.0805741423215565e-06, + "loss": 0.9244, + "step": 142230 + }, + { + "epoch": 1.71, + "grad_norm": 4.105426425973379, + "learning_rate": 1.0803100222155805e-06, + "loss": 1.1339, + "step": 142233 + }, + { + "epoch": 1.71, + "grad_norm": 5.405446180306933, + "learning_rate": 1.0800459325492918e-06, + "loss": 1.0743, + "step": 142236 + }, + { + "epoch": 1.71, + "grad_norm": 3.8756292743588663, + "learning_rate": 1.079781873323592e-06, + "loss": 1.1203, + "step": 142239 + }, + { + "epoch": 1.71, + "grad_norm": 11.269447240757808, + "learning_rate": 1.0795178445393873e-06, + "loss": 1.4873, + "step": 142242 + }, + { + "epoch": 1.71, + "grad_norm": 12.225014329172167, + "learning_rate": 1.0792538461975722e-06, + "loss": 1.1938, + "step": 142245 + }, + { + "epoch": 1.71, + "grad_norm": 3.0452618920059575, + "learning_rate": 1.0789898782990504e-06, + "loss": 1.2262, + "step": 142248 + }, + { + "epoch": 1.71, + "grad_norm": 6.960022190914084, + "learning_rate": 1.0787259408447248e-06, + "loss": 1.1144, + "step": 142251 + }, + { + "epoch": 1.71, + "grad_norm": 3.4766368248311026, + "learning_rate": 1.0784620338354933e-06, + "loss": 1.1684, + "step": 142254 + }, + { + "epoch": 1.71, + "grad_norm": 5.238675112836802, + "learning_rate": 1.0781981572722577e-06, + "loss": 0.9981, + "step": 142257 + }, + { + "epoch": 1.71, + "grad_norm": 20.520437161965894, + "learning_rate": 1.077934311155917e-06, + "loss": 0.7925, + "step": 142260 + }, + { + "epoch": 1.71, + "grad_norm": 6.0885476713758315, + "learning_rate": 1.077670495487373e-06, + "loss": 1.3225, + "step": 142263 + }, + { + "epoch": 1.71, + "grad_norm": 6.502141189431116, + "learning_rate": 1.0774067102675257e-06, + "loss": 1.4073, + "step": 142266 + }, + { + "epoch": 1.71, + "grad_norm": 7.401583817345182, + "learning_rate": 1.0771429554972768e-06, + "loss": 0.8951, + "step": 142269 + }, + { + "epoch": 1.71, + "grad_norm": 10.477756739452092, + "learning_rate": 1.0768792311775234e-06, + "loss": 1.2896, + "step": 142272 + }, + { + "epoch": 1.71, + "grad_norm": 11.955553375089034, + "learning_rate": 1.076615537309169e-06, + "loss": 0.8739, + "step": 142275 + }, + { + "epoch": 1.71, + "grad_norm": 10.88473418575121, + "learning_rate": 1.07635187389311e-06, + "loss": 1.2907, + "step": 142278 + }, + { + "epoch": 1.71, + "grad_norm": 7.150487513466324, + "learning_rate": 1.0760882409302477e-06, + "loss": 1.1286, + "step": 142281 + }, + { + "epoch": 1.71, + "grad_norm": 2.6071470573582634, + "learning_rate": 1.0758246384214831e-06, + "loss": 1.1977, + "step": 142284 + }, + { + "epoch": 1.71, + "grad_norm": 6.891258868772973, + "learning_rate": 1.075561066367713e-06, + "loss": 1.2373, + "step": 142287 + }, + { + "epoch": 1.71, + "grad_norm": 8.647374900548652, + "learning_rate": 1.0752975247698382e-06, + "loss": 1.2673, + "step": 142290 + }, + { + "epoch": 1.71, + "grad_norm": 3.8186195640647744, + "learning_rate": 1.0750340136287607e-06, + "loss": 1.1154, + "step": 142293 + }, + { + "epoch": 1.71, + "grad_norm": 14.11228920806774, + "learning_rate": 1.0747705329453772e-06, + "loss": 0.8521, + "step": 142296 + }, + { + "epoch": 1.71, + "grad_norm": 2.192452232967078, + "learning_rate": 1.074507082720585e-06, + "loss": 1.1269, + "step": 142299 + }, + { + "epoch": 1.71, + "grad_norm": 4.36871203609118, + "learning_rate": 1.0742436629552855e-06, + "loss": 1.2101, + "step": 142302 + }, + { + "epoch": 1.71, + "grad_norm": 10.89166055192239, + "learning_rate": 1.0739802736503768e-06, + "loss": 0.9231, + "step": 142305 + }, + { + "epoch": 1.71, + "grad_norm": 13.280546871663594, + "learning_rate": 1.0737169148067584e-06, + "loss": 1.314, + "step": 142308 + }, + { + "epoch": 1.71, + "grad_norm": 19.875145071670065, + "learning_rate": 1.0734535864253303e-06, + "loss": 0.9425, + "step": 142311 + }, + { + "epoch": 1.71, + "grad_norm": 6.790438409955343, + "learning_rate": 1.073190288506989e-06, + "loss": 1.2145, + "step": 142314 + }, + { + "epoch": 1.71, + "grad_norm": 6.248327427351505, + "learning_rate": 1.0729270210526355e-06, + "loss": 1.0064, + "step": 142317 + }, + { + "epoch": 1.71, + "grad_norm": 18.91340308070346, + "learning_rate": 1.0726637840631637e-06, + "loss": 0.8779, + "step": 142320 + }, + { + "epoch": 1.71, + "grad_norm": 2.767087297698141, + "learning_rate": 1.0724005775394774e-06, + "loss": 0.9377, + "step": 142323 + }, + { + "epoch": 1.71, + "grad_norm": 11.079229409135626, + "learning_rate": 1.0721374014824703e-06, + "loss": 1.1798, + "step": 142326 + }, + { + "epoch": 1.71, + "grad_norm": 32.50980311809415, + "learning_rate": 1.0718742558930429e-06, + "loss": 1.0214, + "step": 142329 + }, + { + "epoch": 1.71, + "grad_norm": 7.317498013217436, + "learning_rate": 1.071611140772092e-06, + "loss": 1.1287, + "step": 142332 + }, + { + "epoch": 1.71, + "grad_norm": 6.0067878346527595, + "learning_rate": 1.071348056120519e-06, + "loss": 1.1546, + "step": 142335 + }, + { + "epoch": 1.71, + "grad_norm": 15.778033960261965, + "learning_rate": 1.0710850019392182e-06, + "loss": 0.9735, + "step": 142338 + }, + { + "epoch": 1.71, + "grad_norm": 7.346171456181735, + "learning_rate": 1.0708219782290873e-06, + "loss": 1.0704, + "step": 142341 + }, + { + "epoch": 1.71, + "grad_norm": 16.466196619555763, + "learning_rate": 1.0705589849910237e-06, + "loss": 1.2067, + "step": 142344 + }, + { + "epoch": 1.71, + "grad_norm": 4.735341471590399, + "learning_rate": 1.0702960222259263e-06, + "loss": 1.2277, + "step": 142347 + }, + { + "epoch": 1.71, + "grad_norm": 4.662265246485806, + "learning_rate": 1.0700330899346934e-06, + "loss": 1.3419, + "step": 142350 + }, + { + "epoch": 1.71, + "grad_norm": 12.923686151304137, + "learning_rate": 1.06977018811822e-06, + "loss": 1.0414, + "step": 142353 + }, + { + "epoch": 1.71, + "grad_norm": 5.199577879273834, + "learning_rate": 1.069507316777404e-06, + "loss": 0.9603, + "step": 142356 + }, + { + "epoch": 1.71, + "grad_norm": 8.356768334156103, + "learning_rate": 1.0692444759131438e-06, + "loss": 1.065, + "step": 142359 + }, + { + "epoch": 1.71, + "grad_norm": 2.805544422855191, + "learning_rate": 1.0689816655263341e-06, + "loss": 1.2829, + "step": 142362 + }, + { + "epoch": 1.71, + "grad_norm": 4.261254856111948, + "learning_rate": 1.0687188856178754e-06, + "loss": 1.3225, + "step": 142365 + }, + { + "epoch": 1.71, + "grad_norm": 5.508560806702165, + "learning_rate": 1.0684561361886602e-06, + "loss": 1.4698, + "step": 142368 + }, + { + "epoch": 1.71, + "grad_norm": 17.28932871321623, + "learning_rate": 1.068193417239587e-06, + "loss": 1.3447, + "step": 142371 + }, + { + "epoch": 1.71, + "grad_norm": 9.733968197110977, + "learning_rate": 1.0679307287715511e-06, + "loss": 1.3055, + "step": 142374 + }, + { + "epoch": 1.71, + "grad_norm": 6.236829293491983, + "learning_rate": 1.0676680707854536e-06, + "loss": 1.1778, + "step": 142377 + }, + { + "epoch": 1.71, + "grad_norm": 13.796014107671564, + "learning_rate": 1.0674054432821878e-06, + "loss": 1.0715, + "step": 142380 + }, + { + "epoch": 1.71, + "grad_norm": 12.745405740577558, + "learning_rate": 1.0671428462626477e-06, + "loss": 0.9982, + "step": 142383 + }, + { + "epoch": 1.71, + "grad_norm": 6.954342130623501, + "learning_rate": 1.06688027972773e-06, + "loss": 1.2833, + "step": 142386 + }, + { + "epoch": 1.71, + "grad_norm": 3.0971073586071856, + "learning_rate": 1.0666177436783332e-06, + "loss": 0.8167, + "step": 142389 + }, + { + "epoch": 1.71, + "grad_norm": 12.676613572533363, + "learning_rate": 1.0663552381153541e-06, + "loss": 1.2303, + "step": 142392 + }, + { + "epoch": 1.71, + "grad_norm": 13.547764958927912, + "learning_rate": 1.0660927630396856e-06, + "loss": 0.938, + "step": 142395 + }, + { + "epoch": 1.71, + "grad_norm": 6.121356828253428, + "learning_rate": 1.0658303184522234e-06, + "loss": 1.0341, + "step": 142398 + }, + { + "epoch": 1.71, + "grad_norm": 7.092627729815282, + "learning_rate": 1.065567904353867e-06, + "loss": 1.0455, + "step": 142401 + }, + { + "epoch": 1.71, + "grad_norm": 8.771420164913113, + "learning_rate": 1.0653055207455087e-06, + "loss": 1.0236, + "step": 142404 + }, + { + "epoch": 1.71, + "grad_norm": 7.384750238510406, + "learning_rate": 1.0650431676280427e-06, + "loss": 1.0352, + "step": 142407 + }, + { + "epoch": 1.71, + "grad_norm": 9.759321848677745, + "learning_rate": 1.0647808450023656e-06, + "loss": 1.3788, + "step": 142410 + }, + { + "epoch": 1.71, + "grad_norm": 15.70544148211185, + "learning_rate": 1.0645185528693735e-06, + "loss": 0.9858, + "step": 142413 + }, + { + "epoch": 1.71, + "grad_norm": 12.320338195643815, + "learning_rate": 1.0642562912299604e-06, + "loss": 1.0756, + "step": 142416 + }, + { + "epoch": 1.71, + "grad_norm": 7.861871115701565, + "learning_rate": 1.063994060085024e-06, + "loss": 1.1768, + "step": 142419 + }, + { + "epoch": 1.71, + "grad_norm": 11.653691425424013, + "learning_rate": 1.0637318594354572e-06, + "loss": 1.2735, + "step": 142422 + }, + { + "epoch": 1.71, + "grad_norm": 13.16307774161117, + "learning_rate": 1.0634696892821538e-06, + "loss": 1.1047, + "step": 142425 + }, + { + "epoch": 1.71, + "grad_norm": 8.477199919106486, + "learning_rate": 1.0632075496260086e-06, + "loss": 1.2288, + "step": 142428 + }, + { + "epoch": 1.71, + "grad_norm": 4.547382586946474, + "learning_rate": 1.0629454404679185e-06, + "loss": 1.2573, + "step": 142431 + }, + { + "epoch": 1.71, + "grad_norm": 17.060191492041398, + "learning_rate": 1.0626833618087751e-06, + "loss": 1.4136, + "step": 142434 + }, + { + "epoch": 1.71, + "grad_norm": 4.65919463120569, + "learning_rate": 1.0624213136494743e-06, + "loss": 0.8957, + "step": 142437 + }, + { + "epoch": 1.71, + "grad_norm": 4.999852979731785, + "learning_rate": 1.06215929599091e-06, + "loss": 0.7863, + "step": 142440 + }, + { + "epoch": 1.71, + "grad_norm": 3.058586224856548, + "learning_rate": 1.061897308833979e-06, + "loss": 1.0025, + "step": 142443 + }, + { + "epoch": 1.71, + "grad_norm": 7.247042505975484, + "learning_rate": 1.0616353521795719e-06, + "loss": 1.2687, + "step": 142446 + }, + { + "epoch": 1.71, + "grad_norm": 3.916887632706012, + "learning_rate": 1.0613734260285834e-06, + "loss": 1.3365, + "step": 142449 + }, + { + "epoch": 1.71, + "grad_norm": 5.476408792655497, + "learning_rate": 1.0611115303819064e-06, + "loss": 1.1653, + "step": 142452 + }, + { + "epoch": 1.71, + "grad_norm": 8.059275906369447, + "learning_rate": 1.0608496652404366e-06, + "loss": 1.295, + "step": 142455 + }, + { + "epoch": 1.71, + "grad_norm": 5.073375617990044, + "learning_rate": 1.0605878306050688e-06, + "loss": 1.1401, + "step": 142458 + }, + { + "epoch": 1.71, + "grad_norm": 22.55782961628531, + "learning_rate": 1.0603260264766924e-06, + "loss": 1.3923, + "step": 142461 + }, + { + "epoch": 1.71, + "grad_norm": 11.858498984846614, + "learning_rate": 1.0600642528562044e-06, + "loss": 1.0996, + "step": 142464 + }, + { + "epoch": 1.71, + "grad_norm": 6.967258296016386, + "learning_rate": 1.0598025097444976e-06, + "loss": 1.1526, + "step": 142467 + }, + { + "epoch": 1.71, + "grad_norm": 2.7719746694173306, + "learning_rate": 1.0595407971424632e-06, + "loss": 1.1338, + "step": 142470 + }, + { + "epoch": 1.71, + "grad_norm": 8.573526803185562, + "learning_rate": 1.0592791150509985e-06, + "loss": 0.8497, + "step": 142473 + }, + { + "epoch": 1.71, + "grad_norm": 7.663486426137061, + "learning_rate": 1.0590174634709904e-06, + "loss": 1.0774, + "step": 142476 + }, + { + "epoch": 1.71, + "grad_norm": 9.654544417747003, + "learning_rate": 1.0587558424033362e-06, + "loss": 1.181, + "step": 142479 + }, + { + "epoch": 1.71, + "grad_norm": 9.62263069661206, + "learning_rate": 1.0584942518489272e-06, + "loss": 0.9927, + "step": 142482 + }, + { + "epoch": 1.71, + "grad_norm": 9.064145966177906, + "learning_rate": 1.0582326918086583e-06, + "loss": 1.2599, + "step": 142485 + }, + { + "epoch": 1.71, + "grad_norm": 17.480957524860536, + "learning_rate": 1.057971162283421e-06, + "loss": 1.3555, + "step": 142488 + }, + { + "epoch": 1.71, + "grad_norm": 9.233284609514275, + "learning_rate": 1.0577096632741046e-06, + "loss": 1.463, + "step": 142491 + }, + { + "epoch": 1.71, + "grad_norm": 5.1327896612011905, + "learning_rate": 1.057448194781604e-06, + "loss": 1.4892, + "step": 142494 + }, + { + "epoch": 1.71, + "grad_norm": 5.885642169158067, + "learning_rate": 1.057186756806813e-06, + "loss": 1.2568, + "step": 142497 + }, + { + "epoch": 1.71, + "grad_norm": 4.358943338728342, + "learning_rate": 1.0569253493506227e-06, + "loss": 1.2487, + "step": 142500 + }, + { + "epoch": 1.71, + "grad_norm": 10.67445822493405, + "learning_rate": 1.0566639724139238e-06, + "loss": 1.0977, + "step": 142503 + }, + { + "epoch": 1.71, + "grad_norm": 8.465943832145529, + "learning_rate": 1.056402625997609e-06, + "loss": 1.0687, + "step": 142506 + }, + { + "epoch": 1.71, + "grad_norm": 6.930787127203364, + "learning_rate": 1.056141310102573e-06, + "loss": 1.4091, + "step": 142509 + }, + { + "epoch": 1.71, + "grad_norm": 7.3542787989397835, + "learning_rate": 1.0558800247297052e-06, + "loss": 1.0145, + "step": 142512 + }, + { + "epoch": 1.71, + "grad_norm": 4.834536225566339, + "learning_rate": 1.0556187698798948e-06, + "loss": 1.2626, + "step": 142515 + }, + { + "epoch": 1.71, + "grad_norm": 13.319479948139175, + "learning_rate": 1.0553575455540366e-06, + "loss": 1.1886, + "step": 142518 + }, + { + "epoch": 1.71, + "grad_norm": 21.175757872209658, + "learning_rate": 1.05509635175302e-06, + "loss": 0.9603, + "step": 142521 + }, + { + "epoch": 1.71, + "grad_norm": 6.077879972906252, + "learning_rate": 1.0548351884777387e-06, + "loss": 1.0168, + "step": 142524 + }, + { + "epoch": 1.71, + "grad_norm": 12.340652973082594, + "learning_rate": 1.054574055729084e-06, + "loss": 0.9257, + "step": 142527 + }, + { + "epoch": 1.71, + "grad_norm": 18.039720873557393, + "learning_rate": 1.0543129535079467e-06, + "loss": 1.1866, + "step": 142530 + }, + { + "epoch": 1.71, + "grad_norm": 6.5310983598806125, + "learning_rate": 1.0540518818152146e-06, + "loss": 1.1616, + "step": 142533 + }, + { + "epoch": 1.71, + "grad_norm": 5.341416998085567, + "learning_rate": 1.0537908406517817e-06, + "loss": 0.6676, + "step": 142536 + }, + { + "epoch": 1.71, + "grad_norm": 9.032409510836866, + "learning_rate": 1.0535298300185392e-06, + "loss": 1.1788, + "step": 142539 + }, + { + "epoch": 1.71, + "grad_norm": 8.270984159285067, + "learning_rate": 1.0532688499163756e-06, + "loss": 1.2942, + "step": 142542 + }, + { + "epoch": 1.71, + "grad_norm": 10.45171360210304, + "learning_rate": 1.0530079003461834e-06, + "loss": 1.1647, + "step": 142545 + }, + { + "epoch": 1.71, + "grad_norm": 6.423940715801142, + "learning_rate": 1.0527469813088508e-06, + "loss": 1.2175, + "step": 142548 + }, + { + "epoch": 1.71, + "grad_norm": 9.47032276835942, + "learning_rate": 1.0524860928052738e-06, + "loss": 0.8447, + "step": 142551 + }, + { + "epoch": 1.71, + "grad_norm": 8.133309436385481, + "learning_rate": 1.0522252348363382e-06, + "loss": 1.1466, + "step": 142554 + }, + { + "epoch": 1.71, + "grad_norm": 5.372720734514316, + "learning_rate": 1.0519644074029323e-06, + "loss": 1.3515, + "step": 142557 + }, + { + "epoch": 1.71, + "grad_norm": 9.16659174438536, + "learning_rate": 1.0517036105059497e-06, + "loss": 1.2303, + "step": 142560 + }, + { + "epoch": 1.71, + "grad_norm": 12.593357202824379, + "learning_rate": 1.0514428441462788e-06, + "loss": 1.1985, + "step": 142563 + }, + { + "epoch": 1.71, + "grad_norm": 18.398329032437346, + "learning_rate": 1.051182108324813e-06, + "loss": 0.9114, + "step": 142566 + }, + { + "epoch": 1.71, + "grad_norm": 24.33785234413509, + "learning_rate": 1.0509214030424363e-06, + "loss": 0.9372, + "step": 142569 + }, + { + "epoch": 1.71, + "grad_norm": 8.883955668639151, + "learning_rate": 1.0506607283000436e-06, + "loss": 1.1978, + "step": 142572 + }, + { + "epoch": 1.71, + "grad_norm": 9.926687708898104, + "learning_rate": 1.0504000840985208e-06, + "loss": 1.5149, + "step": 142575 + }, + { + "epoch": 1.71, + "grad_norm": 6.9952091565142664, + "learning_rate": 1.050139470438759e-06, + "loss": 1.2201, + "step": 142578 + }, + { + "epoch": 1.71, + "grad_norm": 11.045620365778117, + "learning_rate": 1.0498788873216492e-06, + "loss": 1.1909, + "step": 142581 + }, + { + "epoch": 1.71, + "grad_norm": 15.477255690764661, + "learning_rate": 1.0496183347480771e-06, + "loss": 1.4031, + "step": 142584 + }, + { + "epoch": 1.71, + "grad_norm": 6.29330513865263, + "learning_rate": 1.049357812718933e-06, + "loss": 1.0263, + "step": 142587 + }, + { + "epoch": 1.71, + "grad_norm": 3.2043420243027647, + "learning_rate": 1.0490973212351075e-06, + "loss": 1.1701, + "step": 142590 + }, + { + "epoch": 1.71, + "grad_norm": 5.964446131919075, + "learning_rate": 1.0488368602974908e-06, + "loss": 0.9489, + "step": 142593 + }, + { + "epoch": 1.71, + "grad_norm": 7.113622601191302, + "learning_rate": 1.048576429906969e-06, + "loss": 1.2452, + "step": 142596 + }, + { + "epoch": 1.71, + "grad_norm": 11.420005515825958, + "learning_rate": 1.04831603006443e-06, + "loss": 0.8803, + "step": 142599 + }, + { + "epoch": 1.71, + "grad_norm": 6.6020812434255385, + "learning_rate": 1.0480556607707636e-06, + "loss": 0.8671, + "step": 142602 + }, + { + "epoch": 1.71, + "grad_norm": 15.376900322866959, + "learning_rate": 1.04779532202686e-06, + "loss": 1.3075, + "step": 142605 + }, + { + "epoch": 1.71, + "grad_norm": 20.67229271204964, + "learning_rate": 1.047535013833607e-06, + "loss": 1.1354, + "step": 142608 + }, + { + "epoch": 1.71, + "grad_norm": 12.12005528837193, + "learning_rate": 1.047274736191891e-06, + "loss": 0.8284, + "step": 142611 + }, + { + "epoch": 1.71, + "grad_norm": 12.339390422444728, + "learning_rate": 1.0470144891026035e-06, + "loss": 1.2525, + "step": 142614 + }, + { + "epoch": 1.71, + "grad_norm": 19.12343961903513, + "learning_rate": 1.046754272566628e-06, + "loss": 1.2439, + "step": 142617 + }, + { + "epoch": 1.71, + "grad_norm": 4.492182256240708, + "learning_rate": 1.0464940865848582e-06, + "loss": 1.0911, + "step": 142620 + }, + { + "epoch": 1.71, + "grad_norm": 4.33280415684765, + "learning_rate": 1.046233931158177e-06, + "loss": 1.1666, + "step": 142623 + }, + { + "epoch": 1.72, + "grad_norm": 6.828255587879628, + "learning_rate": 1.0459738062874735e-06, + "loss": 0.9202, + "step": 142626 + }, + { + "epoch": 1.72, + "grad_norm": 3.0797923292030127, + "learning_rate": 1.0457137119736371e-06, + "loss": 0.9825, + "step": 142629 + }, + { + "epoch": 1.72, + "grad_norm": 5.81640199853754, + "learning_rate": 1.0454536482175536e-06, + "loss": 1.257, + "step": 142632 + }, + { + "epoch": 1.72, + "grad_norm": 2.0881841697877235, + "learning_rate": 1.0451936150201136e-06, + "loss": 1.2208, + "step": 142635 + }, + { + "epoch": 1.72, + "grad_norm": 15.466541854141823, + "learning_rate": 1.044933612382203e-06, + "loss": 1.2883, + "step": 142638 + }, + { + "epoch": 1.72, + "grad_norm": 1.8920530299934315, + "learning_rate": 1.0446736403047053e-06, + "loss": 1.1595, + "step": 142641 + }, + { + "epoch": 1.72, + "grad_norm": 5.09582186667148, + "learning_rate": 1.0444136987885113e-06, + "loss": 1.1377, + "step": 142644 + }, + { + "epoch": 1.72, + "grad_norm": 6.432038253312755, + "learning_rate": 1.0441537878345098e-06, + "loss": 1.0922, + "step": 142647 + }, + { + "epoch": 1.72, + "grad_norm": 8.058968284844854, + "learning_rate": 1.0438939074435828e-06, + "loss": 1.0054, + "step": 142650 + }, + { + "epoch": 1.72, + "grad_norm": 7.707367901680853, + "learning_rate": 1.0436340576166203e-06, + "loss": 1.0495, + "step": 142653 + }, + { + "epoch": 1.72, + "grad_norm": 14.909167898735015, + "learning_rate": 1.043374238354511e-06, + "loss": 1.3456, + "step": 142656 + }, + { + "epoch": 1.72, + "grad_norm": 15.828520550059457, + "learning_rate": 1.043114449658137e-06, + "loss": 1.0859, + "step": 142659 + }, + { + "epoch": 1.72, + "grad_norm": 3.707316546418789, + "learning_rate": 1.0428546915283898e-06, + "loss": 0.8275, + "step": 142662 + }, + { + "epoch": 1.72, + "grad_norm": 11.237484897138511, + "learning_rate": 1.0425949639661504e-06, + "loss": 1.0563, + "step": 142665 + }, + { + "epoch": 1.72, + "grad_norm": 5.0536134953901355, + "learning_rate": 1.0423352669723087e-06, + "loss": 1.419, + "step": 142668 + }, + { + "epoch": 1.72, + "grad_norm": 16.447009302476584, + "learning_rate": 1.0420756005477495e-06, + "loss": 1.2337, + "step": 142671 + }, + { + "epoch": 1.72, + "grad_norm": 12.278113312730166, + "learning_rate": 1.0418159646933613e-06, + "loss": 0.9597, + "step": 142674 + }, + { + "epoch": 1.72, + "grad_norm": 6.751492323914496, + "learning_rate": 1.0415563594100276e-06, + "loss": 1.2677, + "step": 142677 + }, + { + "epoch": 1.72, + "grad_norm": 5.233775056159611, + "learning_rate": 1.0412967846986354e-06, + "loss": 1.0605, + "step": 142680 + }, + { + "epoch": 1.72, + "grad_norm": 6.53937849157461, + "learning_rate": 1.0410372405600699e-06, + "loss": 0.9794, + "step": 142683 + }, + { + "epoch": 1.72, + "grad_norm": 6.390340116307514, + "learning_rate": 1.0407777269952169e-06, + "loss": 1.1947, + "step": 142686 + }, + { + "epoch": 1.72, + "grad_norm": 3.4141717598965937, + "learning_rate": 1.0405182440049644e-06, + "loss": 1.4443, + "step": 142689 + }, + { + "epoch": 1.72, + "grad_norm": 5.4813041381794365, + "learning_rate": 1.040258791590194e-06, + "loss": 0.9922, + "step": 142692 + }, + { + "epoch": 1.72, + "grad_norm": 3.956584818517037, + "learning_rate": 1.039999369751794e-06, + "loss": 1.0816, + "step": 142695 + }, + { + "epoch": 1.72, + "grad_norm": 3.8769754804254384, + "learning_rate": 1.0397399784906493e-06, + "loss": 1.2923, + "step": 142698 + }, + { + "epoch": 1.72, + "grad_norm": 8.347901949922539, + "learning_rate": 1.0394806178076432e-06, + "loss": 1.0328, + "step": 142701 + }, + { + "epoch": 1.72, + "grad_norm": 18.275899230253835, + "learning_rate": 1.0392212877036645e-06, + "loss": 1.2776, + "step": 142704 + }, + { + "epoch": 1.72, + "grad_norm": 7.456927425147499, + "learning_rate": 1.0389619881795931e-06, + "loss": 0.9298, + "step": 142707 + }, + { + "epoch": 1.72, + "grad_norm": 11.953239581852689, + "learning_rate": 1.0387027192363174e-06, + "loss": 0.9165, + "step": 142710 + }, + { + "epoch": 1.72, + "grad_norm": 4.773531544760364, + "learning_rate": 1.0384434808747212e-06, + "loss": 1.1916, + "step": 142713 + }, + { + "epoch": 1.72, + "grad_norm": 14.250883594990404, + "learning_rate": 1.0381842730956915e-06, + "loss": 1.4771, + "step": 142716 + }, + { + "epoch": 1.72, + "grad_norm": 4.976404278658403, + "learning_rate": 1.0379250959001086e-06, + "loss": 1.0751, + "step": 142719 + }, + { + "epoch": 1.72, + "grad_norm": 35.124718992354396, + "learning_rate": 1.0376659492888608e-06, + "loss": 1.2627, + "step": 142722 + }, + { + "epoch": 1.72, + "grad_norm": 6.508878511444723, + "learning_rate": 1.0374068332628296e-06, + "loss": 0.8467, + "step": 142725 + }, + { + "epoch": 1.72, + "grad_norm": 9.562735341951264, + "learning_rate": 1.037147747822903e-06, + "loss": 1.4595, + "step": 142728 + }, + { + "epoch": 1.72, + "grad_norm": 10.47090118004231, + "learning_rate": 1.0368886929699595e-06, + "loss": 1.0936, + "step": 142731 + }, + { + "epoch": 1.72, + "grad_norm": 26.01971605136574, + "learning_rate": 1.036629668704887e-06, + "loss": 1.2397, + "step": 142734 + }, + { + "epoch": 1.72, + "grad_norm": 10.58223298400066, + "learning_rate": 1.0363706750285695e-06, + "loss": 1.2359, + "step": 142737 + }, + { + "epoch": 1.72, + "grad_norm": 6.273317401886545, + "learning_rate": 1.0361117119418917e-06, + "loss": 1.3484, + "step": 142740 + }, + { + "epoch": 1.72, + "grad_norm": 6.997276310446084, + "learning_rate": 1.035852779445734e-06, + "loss": 1.2236, + "step": 142743 + }, + { + "epoch": 1.72, + "grad_norm": 6.202184621890471, + "learning_rate": 1.0355938775409834e-06, + "loss": 1.4586, + "step": 142746 + }, + { + "epoch": 1.72, + "grad_norm": 11.024006892118202, + "learning_rate": 1.0353350062285206e-06, + "loss": 1.4579, + "step": 142749 + }, + { + "epoch": 1.72, + "grad_norm": 8.456926154609704, + "learning_rate": 1.0350761655092302e-06, + "loss": 1.158, + "step": 142752 + }, + { + "epoch": 1.72, + "grad_norm": 5.179379401472454, + "learning_rate": 1.0348173553839969e-06, + "loss": 0.984, + "step": 142755 + }, + { + "epoch": 1.72, + "grad_norm": 12.906680596032189, + "learning_rate": 1.0345585758537014e-06, + "loss": 1.0209, + "step": 142758 + }, + { + "epoch": 1.72, + "grad_norm": 8.39556404164535, + "learning_rate": 1.0342998269192283e-06, + "loss": 1.1315, + "step": 142761 + }, + { + "epoch": 1.72, + "grad_norm": 4.018542312766379, + "learning_rate": 1.0340411085814617e-06, + "loss": 1.2091, + "step": 142764 + }, + { + "epoch": 1.72, + "grad_norm": 16.848118498574458, + "learning_rate": 1.0337824208412817e-06, + "loss": 1.2971, + "step": 142767 + }, + { + "epoch": 1.72, + "grad_norm": 5.224093118684005, + "learning_rate": 1.0335237636995754e-06, + "loss": 1.1036, + "step": 142770 + }, + { + "epoch": 1.72, + "grad_norm": 10.944784602930117, + "learning_rate": 1.03326513715722e-06, + "loss": 1.0935, + "step": 142773 + }, + { + "epoch": 1.72, + "grad_norm": 12.1938339532499, + "learning_rate": 1.0330065412151002e-06, + "loss": 1.0517, + "step": 142776 + }, + { + "epoch": 1.72, + "grad_norm": 12.77922762898226, + "learning_rate": 1.032747975874101e-06, + "loss": 1.1711, + "step": 142779 + }, + { + "epoch": 1.72, + "grad_norm": 4.7788832126116665, + "learning_rate": 1.0324894411351039e-06, + "loss": 0.7566, + "step": 142782 + }, + { + "epoch": 1.72, + "grad_norm": 5.2041206147475005, + "learning_rate": 1.032230936998988e-06, + "loss": 1.1673, + "step": 142785 + }, + { + "epoch": 1.72, + "grad_norm": 10.196790638604488, + "learning_rate": 1.0319724634666395e-06, + "loss": 1.0628, + "step": 142788 + }, + { + "epoch": 1.72, + "grad_norm": 6.2951409926338835, + "learning_rate": 1.0317140205389375e-06, + "loss": 1.2006, + "step": 142791 + }, + { + "epoch": 1.72, + "grad_norm": 2.5498796917340663, + "learning_rate": 1.0314556082167647e-06, + "loss": 1.151, + "step": 142794 + }, + { + "epoch": 1.72, + "grad_norm": 3.2789002492703943, + "learning_rate": 1.031197226501006e-06, + "loss": 1.2679, + "step": 142797 + }, + { + "epoch": 1.72, + "grad_norm": 16.658797385007833, + "learning_rate": 1.0309388753925386e-06, + "loss": 1.166, + "step": 142800 + }, + { + "epoch": 1.72, + "grad_norm": 8.091045438275273, + "learning_rate": 1.030680554892246e-06, + "loss": 1.2317, + "step": 142803 + }, + { + "epoch": 1.72, + "grad_norm": 2.637165980921978, + "learning_rate": 1.030422265001012e-06, + "loss": 1.361, + "step": 142806 + }, + { + "epoch": 1.72, + "grad_norm": 9.46880071726691, + "learning_rate": 1.030164005719716e-06, + "loss": 1.1612, + "step": 142809 + }, + { + "epoch": 1.72, + "grad_norm": 13.77646542768648, + "learning_rate": 1.0299057770492371e-06, + "loss": 0.9909, + "step": 142812 + }, + { + "epoch": 1.72, + "grad_norm": 4.296204270326382, + "learning_rate": 1.0296475789904581e-06, + "loss": 0.7928, + "step": 142815 + }, + { + "epoch": 1.72, + "grad_norm": 5.417897162950054, + "learning_rate": 1.0293894115442615e-06, + "loss": 1.2411, + "step": 142818 + }, + { + "epoch": 1.72, + "grad_norm": 5.0267497135062955, + "learning_rate": 1.0291312747115278e-06, + "loss": 1.1834, + "step": 142821 + }, + { + "epoch": 1.72, + "grad_norm": 2.5629699048090213, + "learning_rate": 1.0288731684931397e-06, + "loss": 1.0561, + "step": 142824 + }, + { + "epoch": 1.72, + "grad_norm": 9.115342089629207, + "learning_rate": 1.028615092889974e-06, + "loss": 1.0094, + "step": 142827 + }, + { + "epoch": 1.72, + "grad_norm": 9.239522339743427, + "learning_rate": 1.0283570479029159e-06, + "loss": 1.2311, + "step": 142830 + }, + { + "epoch": 1.72, + "grad_norm": 15.103271957830032, + "learning_rate": 1.028099033532841e-06, + "loss": 1.1282, + "step": 142833 + }, + { + "epoch": 1.72, + "grad_norm": 4.946295579092038, + "learning_rate": 1.0278410497806345e-06, + "loss": 1.0092, + "step": 142836 + }, + { + "epoch": 1.72, + "grad_norm": 5.122025373283758, + "learning_rate": 1.0275830966471734e-06, + "loss": 1.0684, + "step": 142839 + }, + { + "epoch": 1.72, + "grad_norm": 5.182266852903458, + "learning_rate": 1.027325174133339e-06, + "loss": 0.6795, + "step": 142842 + }, + { + "epoch": 1.72, + "grad_norm": 7.533001328466121, + "learning_rate": 1.0270672822400119e-06, + "loss": 0.9335, + "step": 142845 + }, + { + "epoch": 1.72, + "grad_norm": 5.4146720958789265, + "learning_rate": 1.0268094209680735e-06, + "loss": 1.0719, + "step": 142848 + }, + { + "epoch": 1.72, + "grad_norm": 3.818560220518968, + "learning_rate": 1.026551590318403e-06, + "loss": 0.7898, + "step": 142851 + }, + { + "epoch": 1.72, + "grad_norm": 13.326439692453889, + "learning_rate": 1.026293790291878e-06, + "loss": 1.1933, + "step": 142854 + }, + { + "epoch": 1.72, + "grad_norm": 28.812229798081095, + "learning_rate": 1.0260360208893793e-06, + "loss": 0.9606, + "step": 142857 + }, + { + "epoch": 1.72, + "grad_norm": 10.0663036571651, + "learning_rate": 1.0257782821117867e-06, + "loss": 1.3738, + "step": 142860 + }, + { + "epoch": 1.72, + "grad_norm": 17.06926776488264, + "learning_rate": 1.0255205739599839e-06, + "loss": 1.0929, + "step": 142863 + }, + { + "epoch": 1.72, + "grad_norm": 3.1652574745019626, + "learning_rate": 1.0252628964348432e-06, + "loss": 1.0043, + "step": 142866 + }, + { + "epoch": 1.72, + "grad_norm": 9.31690896255678, + "learning_rate": 1.0250052495372487e-06, + "loss": 1.024, + "step": 142869 + }, + { + "epoch": 1.72, + "grad_norm": 8.37988117136656, + "learning_rate": 1.0247476332680806e-06, + "loss": 1.1443, + "step": 142872 + }, + { + "epoch": 1.72, + "grad_norm": 5.826492590078027, + "learning_rate": 1.0244900476282137e-06, + "loss": 0.8389, + "step": 142875 + }, + { + "epoch": 1.72, + "grad_norm": 12.65919775972796, + "learning_rate": 1.024232492618531e-06, + "loss": 1.1997, + "step": 142878 + }, + { + "epoch": 1.72, + "grad_norm": 19.66469921354167, + "learning_rate": 1.023974968239908e-06, + "loss": 1.3336, + "step": 142881 + }, + { + "epoch": 1.72, + "grad_norm": 5.680718594934223, + "learning_rate": 1.0237174744932244e-06, + "loss": 1.0523, + "step": 142884 + }, + { + "epoch": 1.72, + "grad_norm": 9.180716976787592, + "learning_rate": 1.0234600113793614e-06, + "loss": 1.2028, + "step": 142887 + }, + { + "epoch": 1.72, + "grad_norm": 11.714972680678851, + "learning_rate": 1.0232025788991962e-06, + "loss": 1.3862, + "step": 142890 + }, + { + "epoch": 1.72, + "grad_norm": 14.604152079193605, + "learning_rate": 1.0229451770536092e-06, + "loss": 1.0069, + "step": 142893 + }, + { + "epoch": 1.72, + "grad_norm": 20.60067892725167, + "learning_rate": 1.022687805843473e-06, + "loss": 0.9201, + "step": 142896 + }, + { + "epoch": 1.72, + "grad_norm": 32.45846306635753, + "learning_rate": 1.0224304652696703e-06, + "loss": 1.269, + "step": 142899 + }, + { + "epoch": 1.72, + "grad_norm": 5.3558932134328785, + "learning_rate": 1.0221731553330795e-06, + "loss": 1.3631, + "step": 142902 + }, + { + "epoch": 1.72, + "grad_norm": 14.516585778452322, + "learning_rate": 1.0219158760345794e-06, + "loss": 1.4072, + "step": 142905 + }, + { + "epoch": 1.72, + "grad_norm": 7.504724452901522, + "learning_rate": 1.021658627375044e-06, + "loss": 1.2017, + "step": 142908 + }, + { + "epoch": 1.72, + "grad_norm": 11.253978918089333, + "learning_rate": 1.0214014093553549e-06, + "loss": 1.4026, + "step": 142911 + }, + { + "epoch": 1.72, + "grad_norm": 8.583870547676156, + "learning_rate": 1.021144221976389e-06, + "loss": 1.3207, + "step": 142914 + }, + { + "epoch": 1.72, + "grad_norm": 4.990753275459907, + "learning_rate": 1.0208870652390257e-06, + "loss": 1.3068, + "step": 142917 + }, + { + "epoch": 1.72, + "grad_norm": 10.89998097665754, + "learning_rate": 1.0206299391441365e-06, + "loss": 1.1641, + "step": 142920 + }, + { + "epoch": 1.72, + "grad_norm": 4.895414022285203, + "learning_rate": 1.020372843692604e-06, + "loss": 1.2195, + "step": 142923 + }, + { + "epoch": 1.72, + "grad_norm": 7.478948087269171, + "learning_rate": 1.0201157788853055e-06, + "loss": 1.1258, + "step": 142926 + }, + { + "epoch": 1.72, + "grad_norm": 15.414835603288989, + "learning_rate": 1.0198587447231157e-06, + "loss": 1.3177, + "step": 142929 + }, + { + "epoch": 1.72, + "grad_norm": 8.329974113416148, + "learning_rate": 1.019601741206916e-06, + "loss": 1.1757, + "step": 142932 + }, + { + "epoch": 1.72, + "grad_norm": 2.8822978604053913, + "learning_rate": 1.0193447683375813e-06, + "loss": 1.3948, + "step": 142935 + }, + { + "epoch": 1.72, + "grad_norm": 10.443875991415949, + "learning_rate": 1.0190878261159853e-06, + "loss": 1.0613, + "step": 142938 + }, + { + "epoch": 1.72, + "grad_norm": 8.467178154869845, + "learning_rate": 1.0188309145430087e-06, + "loss": 1.3049, + "step": 142941 + }, + { + "epoch": 1.72, + "grad_norm": 3.479696615717162, + "learning_rate": 1.0185740336195282e-06, + "loss": 0.9324, + "step": 142944 + }, + { + "epoch": 1.72, + "grad_norm": 11.900571925092029, + "learning_rate": 1.0183171833464179e-06, + "loss": 0.9603, + "step": 142947 + }, + { + "epoch": 1.72, + "grad_norm": 8.607835594037653, + "learning_rate": 1.0180603637245557e-06, + "loss": 1.0002, + "step": 142950 + }, + { + "epoch": 1.72, + "grad_norm": 5.803330919305382, + "learning_rate": 1.0178035747548176e-06, + "loss": 1.708, + "step": 142953 + }, + { + "epoch": 1.72, + "grad_norm": 5.936921378675493, + "learning_rate": 1.017546816438083e-06, + "loss": 1.2924, + "step": 142956 + }, + { + "epoch": 1.72, + "grad_norm": 15.279758954919311, + "learning_rate": 1.0172900887752258e-06, + "loss": 1.2025, + "step": 142959 + }, + { + "epoch": 1.72, + "grad_norm": 6.250635242165758, + "learning_rate": 1.0170333917671205e-06, + "loss": 1.0552, + "step": 142962 + }, + { + "epoch": 1.72, + "grad_norm": 3.678995618991057, + "learning_rate": 1.0167767254146443e-06, + "loss": 0.9138, + "step": 142965 + }, + { + "epoch": 1.72, + "grad_norm": 8.915681348220913, + "learning_rate": 1.0165200897186734e-06, + "loss": 1.0583, + "step": 142968 + }, + { + "epoch": 1.72, + "grad_norm": 5.826046480053032, + "learning_rate": 1.0162634846800845e-06, + "loss": 0.9881, + "step": 142971 + }, + { + "epoch": 1.72, + "grad_norm": 3.3566352330876135, + "learning_rate": 1.0160069102997516e-06, + "loss": 1.2975, + "step": 142974 + }, + { + "epoch": 1.72, + "grad_norm": 10.649633762295872, + "learning_rate": 1.0157503665785528e-06, + "loss": 1.3273, + "step": 142977 + }, + { + "epoch": 1.72, + "grad_norm": 12.587185802502017, + "learning_rate": 1.0154938535173598e-06, + "loss": 1.0875, + "step": 142980 + }, + { + "epoch": 1.72, + "grad_norm": 8.559870337873123, + "learning_rate": 1.0152373711170493e-06, + "loss": 1.1971, + "step": 142983 + }, + { + "epoch": 1.72, + "grad_norm": 8.746303454132102, + "learning_rate": 1.014980919378501e-06, + "loss": 1.2318, + "step": 142986 + }, + { + "epoch": 1.72, + "grad_norm": 5.21596014265145, + "learning_rate": 1.0147244983025828e-06, + "loss": 1.3198, + "step": 142989 + }, + { + "epoch": 1.72, + "grad_norm": 21.549368989850134, + "learning_rate": 1.014468107890174e-06, + "loss": 1.1311, + "step": 142992 + }, + { + "epoch": 1.72, + "grad_norm": 2.9414642464090766, + "learning_rate": 1.01421174814215e-06, + "loss": 1.5281, + "step": 142995 + }, + { + "epoch": 1.72, + "grad_norm": 12.627661822400658, + "learning_rate": 1.0139554190593858e-06, + "loss": 1.0435, + "step": 142998 + }, + { + "epoch": 1.72, + "grad_norm": 16.980744865914875, + "learning_rate": 1.013699120642755e-06, + "loss": 0.907, + "step": 143001 + }, + { + "epoch": 1.72, + "grad_norm": 4.213761770393976, + "learning_rate": 1.0134428528931295e-06, + "loss": 0.9647, + "step": 143004 + }, + { + "epoch": 1.72, + "grad_norm": 3.4362057591932715, + "learning_rate": 1.0131866158113878e-06, + "loss": 0.988, + "step": 143007 + }, + { + "epoch": 1.72, + "grad_norm": 17.62906604574075, + "learning_rate": 1.0129304093984027e-06, + "loss": 1.4535, + "step": 143010 + }, + { + "epoch": 1.72, + "grad_norm": 3.876079511527054, + "learning_rate": 1.0126742336550511e-06, + "loss": 1.0145, + "step": 143013 + }, + { + "epoch": 1.72, + "grad_norm": 4.077732822487764, + "learning_rate": 1.0124180885822032e-06, + "loss": 1.1901, + "step": 143016 + }, + { + "epoch": 1.72, + "grad_norm": 6.470938899116876, + "learning_rate": 1.0121619741807364e-06, + "loss": 1.3211, + "step": 143019 + }, + { + "epoch": 1.72, + "grad_norm": 14.750566896889554, + "learning_rate": 1.0119058904515221e-06, + "loss": 1.1413, + "step": 143022 + }, + { + "epoch": 1.72, + "grad_norm": 6.76507072519506, + "learning_rate": 1.0116498373954365e-06, + "loss": 0.9858, + "step": 143025 + }, + { + "epoch": 1.72, + "grad_norm": 16.74694078697029, + "learning_rate": 1.0113938150133519e-06, + "loss": 0.9003, + "step": 143028 + }, + { + "epoch": 1.72, + "grad_norm": 11.426591037532775, + "learning_rate": 1.0111378233061408e-06, + "loss": 1.4832, + "step": 143031 + }, + { + "epoch": 1.72, + "grad_norm": 35.03123957053686, + "learning_rate": 1.0108818622746796e-06, + "loss": 1.2823, + "step": 143034 + }, + { + "epoch": 1.72, + "grad_norm": 7.863925288270231, + "learning_rate": 1.0106259319198408e-06, + "loss": 1.2094, + "step": 143037 + }, + { + "epoch": 1.72, + "grad_norm": 10.96248942719337, + "learning_rate": 1.0103700322425003e-06, + "loss": 0.9956, + "step": 143040 + }, + { + "epoch": 1.72, + "grad_norm": 6.692836313805314, + "learning_rate": 1.0101141632435274e-06, + "loss": 0.8929, + "step": 143043 + }, + { + "epoch": 1.72, + "grad_norm": 6.558076076316745, + "learning_rate": 1.0098583249237958e-06, + "loss": 1.0772, + "step": 143046 + }, + { + "epoch": 1.72, + "grad_norm": 24.512670952116512, + "learning_rate": 1.0096025172841795e-06, + "loss": 1.3011, + "step": 143049 + }, + { + "epoch": 1.72, + "grad_norm": 16.251686066064956, + "learning_rate": 1.009346740325553e-06, + "loss": 1.3715, + "step": 143052 + }, + { + "epoch": 1.72, + "grad_norm": 3.8084579753049006, + "learning_rate": 1.0090909940487848e-06, + "loss": 1.2351, + "step": 143055 + }, + { + "epoch": 1.72, + "grad_norm": 18.259070341713013, + "learning_rate": 1.0088352784547517e-06, + "loss": 1.2377, + "step": 143058 + }, + { + "epoch": 1.72, + "grad_norm": 2.9247127106804323, + "learning_rate": 1.0085795935443253e-06, + "loss": 1.1223, + "step": 143061 + }, + { + "epoch": 1.72, + "grad_norm": 4.1916599882020185, + "learning_rate": 1.0083239393183796e-06, + "loss": 1.105, + "step": 143064 + }, + { + "epoch": 1.72, + "grad_norm": 5.316671461704682, + "learning_rate": 1.008068315777786e-06, + "loss": 1.0595, + "step": 143067 + }, + { + "epoch": 1.72, + "grad_norm": 13.216317841800167, + "learning_rate": 1.0078127229234136e-06, + "loss": 1.3356, + "step": 143070 + }, + { + "epoch": 1.72, + "grad_norm": 8.006122885905187, + "learning_rate": 1.0075571607561373e-06, + "loss": 1.3866, + "step": 143073 + }, + { + "epoch": 1.72, + "grad_norm": 9.23485725403275, + "learning_rate": 1.00730162927683e-06, + "loss": 1.2995, + "step": 143076 + }, + { + "epoch": 1.72, + "grad_norm": 6.657097512442646, + "learning_rate": 1.0070461284863654e-06, + "loss": 1.0343, + "step": 143079 + }, + { + "epoch": 1.72, + "grad_norm": 9.670737532627156, + "learning_rate": 1.00679065838561e-06, + "loss": 1.2277, + "step": 143082 + }, + { + "epoch": 1.72, + "grad_norm": 3.4747985880986105, + "learning_rate": 1.0065352189754417e-06, + "loss": 0.8988, + "step": 143085 + }, + { + "epoch": 1.72, + "grad_norm": 10.383616508631981, + "learning_rate": 1.0062798102567272e-06, + "loss": 1.1067, + "step": 143088 + }, + { + "epoch": 1.72, + "grad_norm": 34.34117882900197, + "learning_rate": 1.0060244322303403e-06, + "loss": 1.0192, + "step": 143091 + }, + { + "epoch": 1.72, + "grad_norm": 6.651533333913538, + "learning_rate": 1.0057690848971534e-06, + "loss": 1.114, + "step": 143094 + }, + { + "epoch": 1.72, + "grad_norm": 7.934775979639501, + "learning_rate": 1.0055137682580362e-06, + "loss": 1.0078, + "step": 143097 + }, + { + "epoch": 1.72, + "grad_norm": 4.015671920581601, + "learning_rate": 1.0052584823138601e-06, + "loss": 0.8937, + "step": 143100 + }, + { + "epoch": 1.72, + "grad_norm": 4.938203432626349, + "learning_rate": 1.0050032270654963e-06, + "loss": 0.9547, + "step": 143103 + }, + { + "epoch": 1.72, + "grad_norm": 4.288513168054765, + "learning_rate": 1.004748002513819e-06, + "loss": 1.0597, + "step": 143106 + }, + { + "epoch": 1.72, + "grad_norm": 8.131513848850576, + "learning_rate": 1.0044928086596972e-06, + "loss": 0.9922, + "step": 143109 + }, + { + "epoch": 1.72, + "grad_norm": 4.855814908277267, + "learning_rate": 1.004237645503998e-06, + "loss": 1.1238, + "step": 143112 + }, + { + "epoch": 1.72, + "grad_norm": 4.6002754311281135, + "learning_rate": 1.0039825130475967e-06, + "loss": 0.7476, + "step": 143115 + }, + { + "epoch": 1.72, + "grad_norm": 27.570170930680316, + "learning_rate": 1.003727411291363e-06, + "loss": 1.4319, + "step": 143118 + }, + { + "epoch": 1.72, + "grad_norm": 9.84686063829757, + "learning_rate": 1.0034723402361679e-06, + "loss": 1.0534, + "step": 143121 + }, + { + "epoch": 1.72, + "grad_norm": 5.472030809471774, + "learning_rate": 1.0032172998828804e-06, + "loss": 0.9812, + "step": 143124 + }, + { + "epoch": 1.72, + "grad_norm": 11.235058888940369, + "learning_rate": 1.002962290232372e-06, + "loss": 1.0482, + "step": 143127 + }, + { + "epoch": 1.72, + "grad_norm": 16.36509159252241, + "learning_rate": 1.0027073112855123e-06, + "loss": 1.4224, + "step": 143130 + }, + { + "epoch": 1.72, + "grad_norm": 23.288209947688163, + "learning_rate": 1.0024523630431727e-06, + "loss": 1.4179, + "step": 143133 + }, + { + "epoch": 1.72, + "grad_norm": 10.822311645995695, + "learning_rate": 1.002197445506221e-06, + "loss": 1.4229, + "step": 143136 + }, + { + "epoch": 1.72, + "grad_norm": 9.363190476350422, + "learning_rate": 1.0019425586755282e-06, + "loss": 0.9825, + "step": 143139 + }, + { + "epoch": 1.72, + "grad_norm": 11.467546593857657, + "learning_rate": 1.0016877025519645e-06, + "loss": 1.1494, + "step": 143142 + }, + { + "epoch": 1.72, + "grad_norm": 7.805576750638717, + "learning_rate": 1.001432877136399e-06, + "loss": 1.169, + "step": 143145 + }, + { + "epoch": 1.72, + "grad_norm": 6.868210484440424, + "learning_rate": 1.0011780824297036e-06, + "loss": 1.0306, + "step": 143148 + }, + { + "epoch": 1.72, + "grad_norm": 5.576635561463068, + "learning_rate": 1.000923318432746e-06, + "loss": 0.9677, + "step": 143151 + }, + { + "epoch": 1.72, + "grad_norm": 11.134806719993653, + "learning_rate": 1.000668585146395e-06, + "loss": 0.9759, + "step": 143154 + }, + { + "epoch": 1.72, + "grad_norm": 2.9531251501627023, + "learning_rate": 1.0004138825715193e-06, + "loss": 0.8529, + "step": 143157 + }, + { + "epoch": 1.72, + "grad_norm": 8.363167567019422, + "learning_rate": 1.000159210708992e-06, + "loss": 0.6867, + "step": 143160 + }, + { + "epoch": 1.72, + "grad_norm": 5.110361992909055, + "learning_rate": 9.999045695596776e-07, + "loss": 0.8906, + "step": 143163 + }, + { + "epoch": 1.72, + "grad_norm": 10.028363839503207, + "learning_rate": 9.99649959124447e-07, + "loss": 0.9497, + "step": 143166 + }, + { + "epoch": 1.72, + "grad_norm": 7.986156571447616, + "learning_rate": 9.99395379404171e-07, + "loss": 0.8667, + "step": 143169 + }, + { + "epoch": 1.72, + "grad_norm": 4.8657875597462015, + "learning_rate": 9.991408303997142e-07, + "loss": 1.1155, + "step": 143172 + }, + { + "epoch": 1.72, + "grad_norm": 11.80436308131269, + "learning_rate": 9.988863121119496e-07, + "loss": 0.9173, + "step": 143175 + }, + { + "epoch": 1.72, + "grad_norm": 5.609214865282781, + "learning_rate": 9.986318245417425e-07, + "loss": 0.9417, + "step": 143178 + }, + { + "epoch": 1.72, + "grad_norm": 4.993289131829607, + "learning_rate": 9.983773676899634e-07, + "loss": 0.9531, + "step": 143181 + }, + { + "epoch": 1.72, + "grad_norm": 42.81660581681615, + "learning_rate": 9.981229415574789e-07, + "loss": 1.1328, + "step": 143184 + }, + { + "epoch": 1.72, + "grad_norm": 25.101743961257892, + "learning_rate": 9.9786854614516e-07, + "loss": 1.1589, + "step": 143187 + }, + { + "epoch": 1.72, + "grad_norm": 8.903322518902977, + "learning_rate": 9.97614181453872e-07, + "loss": 1.0004, + "step": 143190 + }, + { + "epoch": 1.72, + "grad_norm": 8.997113300777034, + "learning_rate": 9.973598474844858e-07, + "loss": 1.3283, + "step": 143193 + }, + { + "epoch": 1.72, + "grad_norm": 3.356240904790925, + "learning_rate": 9.971055442378652e-07, + "loss": 1.1037, + "step": 143196 + }, + { + "epoch": 1.72, + "grad_norm": 9.140799811962419, + "learning_rate": 9.968512717148815e-07, + "loss": 1.1277, + "step": 143199 + }, + { + "epoch": 1.72, + "grad_norm": 20.293346226565347, + "learning_rate": 9.965970299164029e-07, + "loss": 1.6048, + "step": 143202 + }, + { + "epoch": 1.72, + "grad_norm": 3.1273060826027286, + "learning_rate": 9.963428188432943e-07, + "loss": 0.8434, + "step": 143205 + }, + { + "epoch": 1.72, + "grad_norm": 9.728329959982961, + "learning_rate": 9.960886384964242e-07, + "loss": 1.1934, + "step": 143208 + }, + { + "epoch": 1.72, + "grad_norm": 5.160913610703731, + "learning_rate": 9.958344888766624e-07, + "loss": 0.9638, + "step": 143211 + }, + { + "epoch": 1.72, + "grad_norm": 18.167405390198322, + "learning_rate": 9.955803699848733e-07, + "loss": 1.2966, + "step": 143214 + }, + { + "epoch": 1.72, + "grad_norm": 6.774769772222499, + "learning_rate": 9.953262818219245e-07, + "loss": 1.2581, + "step": 143217 + }, + { + "epoch": 1.72, + "grad_norm": 8.071221319028552, + "learning_rate": 9.950722243886823e-07, + "loss": 1.5816, + "step": 143220 + }, + { + "epoch": 1.72, + "grad_norm": 7.1688937519105815, + "learning_rate": 9.94818197686016e-07, + "loss": 1.4594, + "step": 143223 + }, + { + "epoch": 1.72, + "grad_norm": 23.18425623728069, + "learning_rate": 9.945642017147916e-07, + "loss": 1.2123, + "step": 143226 + }, + { + "epoch": 1.72, + "grad_norm": 10.097558769640537, + "learning_rate": 9.943102364758771e-07, + "loss": 0.8049, + "step": 143229 + }, + { + "epoch": 1.72, + "grad_norm": 4.66781282857108, + "learning_rate": 9.940563019701366e-07, + "loss": 0.9664, + "step": 143232 + }, + { + "epoch": 1.72, + "grad_norm": 9.819343212594523, + "learning_rate": 9.93802398198439e-07, + "loss": 1.1292, + "step": 143235 + }, + { + "epoch": 1.72, + "grad_norm": 4.104817771262112, + "learning_rate": 9.935485251616485e-07, + "loss": 1.1993, + "step": 143238 + }, + { + "epoch": 1.72, + "grad_norm": 8.443771579023542, + "learning_rate": 9.932946828606349e-07, + "loss": 1.0338, + "step": 143241 + }, + { + "epoch": 1.72, + "grad_norm": 7.126766535230922, + "learning_rate": 9.930408712962603e-07, + "loss": 1.2635, + "step": 143244 + }, + { + "epoch": 1.72, + "grad_norm": 6.44658605355152, + "learning_rate": 9.927870904693926e-07, + "loss": 1.3202, + "step": 143247 + }, + { + "epoch": 1.72, + "grad_norm": 24.552077814210968, + "learning_rate": 9.92533340380899e-07, + "loss": 1.0066, + "step": 143250 + }, + { + "epoch": 1.72, + "grad_norm": 16.484621295954252, + "learning_rate": 9.922796210316455e-07, + "loss": 0.972, + "step": 143253 + }, + { + "epoch": 1.72, + "grad_norm": 9.180776580773115, + "learning_rate": 9.92025932422498e-07, + "loss": 1.0891, + "step": 143256 + }, + { + "epoch": 1.72, + "grad_norm": 6.626506229655475, + "learning_rate": 9.91772274554319e-07, + "loss": 1.1541, + "step": 143259 + }, + { + "epoch": 1.72, + "grad_norm": 8.470510893536098, + "learning_rate": 9.915186474279758e-07, + "loss": 1.1959, + "step": 143262 + }, + { + "epoch": 1.72, + "grad_norm": 11.04277508583223, + "learning_rate": 9.912650510443355e-07, + "loss": 1.229, + "step": 143265 + }, + { + "epoch": 1.72, + "grad_norm": 26.716416730239946, + "learning_rate": 9.91011485404264e-07, + "loss": 1.1175, + "step": 143268 + }, + { + "epoch": 1.72, + "grad_norm": 32.78693593752164, + "learning_rate": 9.90757950508624e-07, + "loss": 1.6699, + "step": 143271 + }, + { + "epoch": 1.72, + "grad_norm": 6.6344583728731745, + "learning_rate": 9.905044463582826e-07, + "loss": 1.1064, + "step": 143274 + }, + { + "epoch": 1.72, + "grad_norm": 6.253898375370752, + "learning_rate": 9.902509729541044e-07, + "loss": 0.9212, + "step": 143277 + }, + { + "epoch": 1.72, + "grad_norm": 13.421837256405864, + "learning_rate": 9.899975302969534e-07, + "loss": 1.2437, + "step": 143280 + }, + { + "epoch": 1.72, + "grad_norm": 33.17496568401581, + "learning_rate": 9.897441183876978e-07, + "loss": 1.2832, + "step": 143283 + }, + { + "epoch": 1.72, + "grad_norm": 10.903252908728183, + "learning_rate": 9.894907372271977e-07, + "loss": 1.3952, + "step": 143286 + }, + { + "epoch": 1.72, + "grad_norm": 4.147269240917953, + "learning_rate": 9.892373868163196e-07, + "loss": 1.1248, + "step": 143289 + }, + { + "epoch": 1.72, + "grad_norm": 11.972538968981427, + "learning_rate": 9.889840671559291e-07, + "loss": 1.1219, + "step": 143292 + }, + { + "epoch": 1.72, + "grad_norm": 16.291613252345694, + "learning_rate": 9.887307782468925e-07, + "loss": 1.3267, + "step": 143295 + }, + { + "epoch": 1.72, + "grad_norm": 4.696669680495721, + "learning_rate": 9.884775200900698e-07, + "loss": 1.1178, + "step": 143298 + }, + { + "epoch": 1.72, + "grad_norm": 8.051397161158928, + "learning_rate": 9.882242926863296e-07, + "loss": 1.1972, + "step": 143301 + }, + { + "epoch": 1.72, + "grad_norm": 17.622936332907436, + "learning_rate": 9.879710960365307e-07, + "loss": 0.9458, + "step": 143304 + }, + { + "epoch": 1.72, + "grad_norm": 12.399286220686548, + "learning_rate": 9.877179301415419e-07, + "loss": 1.5544, + "step": 143307 + }, + { + "epoch": 1.72, + "grad_norm": 5.51980580607326, + "learning_rate": 9.874647950022264e-07, + "loss": 0.895, + "step": 143310 + }, + { + "epoch": 1.72, + "grad_norm": 9.275730267341736, + "learning_rate": 9.87211690619445e-07, + "loss": 1.1563, + "step": 143313 + }, + { + "epoch": 1.72, + "grad_norm": 13.041292330039875, + "learning_rate": 9.869586169940648e-07, + "loss": 1.6361, + "step": 143316 + }, + { + "epoch": 1.72, + "grad_norm": 7.866681915697574, + "learning_rate": 9.867055741269505e-07, + "loss": 0.8913, + "step": 143319 + }, + { + "epoch": 1.72, + "grad_norm": 11.183562887236505, + "learning_rate": 9.864525620189624e-07, + "loss": 0.9558, + "step": 143322 + }, + { + "epoch": 1.72, + "grad_norm": 5.903534223034828, + "learning_rate": 9.861995806709635e-07, + "loss": 1.0976, + "step": 143325 + }, + { + "epoch": 1.72, + "grad_norm": 6.362510311907619, + "learning_rate": 9.859466300838183e-07, + "loss": 1.3389, + "step": 143328 + }, + { + "epoch": 1.72, + "grad_norm": 4.941547798372154, + "learning_rate": 9.856937102583918e-07, + "loss": 1.0293, + "step": 143331 + }, + { + "epoch": 1.72, + "grad_norm": 4.618755668888506, + "learning_rate": 9.854408211955446e-07, + "loss": 1.1851, + "step": 143334 + }, + { + "epoch": 1.72, + "grad_norm": 3.2663052518076747, + "learning_rate": 9.851879628961425e-07, + "loss": 1.3725, + "step": 143337 + }, + { + "epoch": 1.72, + "grad_norm": 4.921230090232551, + "learning_rate": 9.849351353610458e-07, + "loss": 1.0571, + "step": 143340 + }, + { + "epoch": 1.72, + "grad_norm": 7.8025050230398065, + "learning_rate": 9.846823385911197e-07, + "loss": 1.0684, + "step": 143343 + }, + { + "epoch": 1.72, + "grad_norm": 5.212255218935212, + "learning_rate": 9.844295725872233e-07, + "loss": 1.0283, + "step": 143346 + }, + { + "epoch": 1.72, + "grad_norm": 2.8283049718195756, + "learning_rate": 9.841768373502235e-07, + "loss": 1.0642, + "step": 143349 + }, + { + "epoch": 1.72, + "grad_norm": 8.377514919719141, + "learning_rate": 9.83924132880979e-07, + "loss": 1.1996, + "step": 143352 + }, + { + "epoch": 1.72, + "grad_norm": 7.027317314840753, + "learning_rate": 9.836714591803531e-07, + "loss": 1.1472, + "step": 143355 + }, + { + "epoch": 1.72, + "grad_norm": 5.291700720461051, + "learning_rate": 9.834188162492098e-07, + "loss": 1.4515, + "step": 143358 + }, + { + "epoch": 1.72, + "grad_norm": 7.0058903716688, + "learning_rate": 9.831662040884116e-07, + "loss": 0.9315, + "step": 143361 + }, + { + "epoch": 1.72, + "grad_norm": 4.082508860154551, + "learning_rate": 9.8291362269882e-07, + "loss": 1.0449, + "step": 143364 + }, + { + "epoch": 1.72, + "grad_norm": 16.536590695475418, + "learning_rate": 9.826610720812935e-07, + "loss": 1.3515, + "step": 143367 + }, + { + "epoch": 1.72, + "grad_norm": 12.269977673739614, + "learning_rate": 9.824085522366976e-07, + "loss": 1.1134, + "step": 143370 + }, + { + "epoch": 1.72, + "grad_norm": 3.2619380069665436, + "learning_rate": 9.82156063165892e-07, + "loss": 1.1083, + "step": 143373 + }, + { + "epoch": 1.72, + "grad_norm": 6.39480985844224, + "learning_rate": 9.819036048697428e-07, + "loss": 1.0368, + "step": 143376 + }, + { + "epoch": 1.72, + "grad_norm": 17.92757846182627, + "learning_rate": 9.816511773491056e-07, + "loss": 1.1191, + "step": 143379 + }, + { + "epoch": 1.72, + "grad_norm": 4.429276510082408, + "learning_rate": 9.813987806048453e-07, + "loss": 0.9712, + "step": 143382 + }, + { + "epoch": 1.72, + "grad_norm": 8.9548388202283, + "learning_rate": 9.811464146378235e-07, + "loss": 1.3117, + "step": 143385 + }, + { + "epoch": 1.72, + "grad_norm": 12.167159627445265, + "learning_rate": 9.808940794488987e-07, + "loss": 1.3762, + "step": 143388 + }, + { + "epoch": 1.72, + "grad_norm": 7.338896854214389, + "learning_rate": 9.806417750389351e-07, + "loss": 1.5656, + "step": 143391 + }, + { + "epoch": 1.72, + "grad_norm": 3.9079439019056994, + "learning_rate": 9.803895014087915e-07, + "loss": 1.1944, + "step": 143394 + }, + { + "epoch": 1.72, + "grad_norm": 3.8126833049823268, + "learning_rate": 9.801372585593293e-07, + "loss": 0.9316, + "step": 143397 + }, + { + "epoch": 1.72, + "grad_norm": 14.762012089241379, + "learning_rate": 9.7988504649141e-07, + "loss": 1.3196, + "step": 143400 + }, + { + "epoch": 1.72, + "grad_norm": 5.119181704058211, + "learning_rate": 9.79632865205895e-07, + "loss": 1.2291, + "step": 143403 + }, + { + "epoch": 1.72, + "grad_norm": 10.460551508337785, + "learning_rate": 9.79380714703645e-07, + "loss": 0.9611, + "step": 143406 + }, + { + "epoch": 1.72, + "grad_norm": 10.687994134449067, + "learning_rate": 9.791285949855167e-07, + "loss": 1.0898, + "step": 143409 + }, + { + "epoch": 1.72, + "grad_norm": 10.804831488653901, + "learning_rate": 9.788765060523741e-07, + "loss": 1.1921, + "step": 143412 + }, + { + "epoch": 1.72, + "grad_norm": 4.1071210508415845, + "learning_rate": 9.786244479050756e-07, + "loss": 1.0376, + "step": 143415 + }, + { + "epoch": 1.72, + "grad_norm": 6.12047427724564, + "learning_rate": 9.783724205444844e-07, + "loss": 1.0734, + "step": 143418 + }, + { + "epoch": 1.72, + "grad_norm": 4.706074320020825, + "learning_rate": 9.78120423971457e-07, + "loss": 1.2507, + "step": 143421 + }, + { + "epoch": 1.72, + "grad_norm": 9.725014417155167, + "learning_rate": 9.778684581868548e-07, + "loss": 1.1166, + "step": 143424 + }, + { + "epoch": 1.72, + "grad_norm": 7.148261367476068, + "learning_rate": 9.776165231915403e-07, + "loss": 1.225, + "step": 143427 + }, + { + "epoch": 1.72, + "grad_norm": 3.5449127310720816, + "learning_rate": 9.773646189863694e-07, + "loss": 0.7312, + "step": 143430 + }, + { + "epoch": 1.72, + "grad_norm": 6.616335400838111, + "learning_rate": 9.771127455722028e-07, + "loss": 1.0475, + "step": 143433 + }, + { + "epoch": 1.72, + "grad_norm": 7.212197149532881, + "learning_rate": 9.768609029498987e-07, + "loss": 1.3179, + "step": 143436 + }, + { + "epoch": 1.72, + "grad_norm": 5.004278324004896, + "learning_rate": 9.766090911203197e-07, + "loss": 1.0775, + "step": 143439 + }, + { + "epoch": 1.72, + "grad_norm": 16.21071784456766, + "learning_rate": 9.763573100843226e-07, + "loss": 0.9116, + "step": 143442 + }, + { + "epoch": 1.72, + "grad_norm": 6.567049953680866, + "learning_rate": 9.761055598427705e-07, + "loss": 0.9323, + "step": 143445 + }, + { + "epoch": 1.72, + "grad_norm": 10.594680370068561, + "learning_rate": 9.758538403965189e-07, + "loss": 1.3233, + "step": 143448 + }, + { + "epoch": 1.72, + "grad_norm": 16.044730258786974, + "learning_rate": 9.756021517464265e-07, + "loss": 0.871, + "step": 143451 + }, + { + "epoch": 1.72, + "grad_norm": 27.783953044090005, + "learning_rate": 9.753504938933533e-07, + "loss": 1.0251, + "step": 143454 + }, + { + "epoch": 1.73, + "grad_norm": 5.670955519745777, + "learning_rate": 9.7509886683816e-07, + "loss": 1.3456, + "step": 143457 + }, + { + "epoch": 1.73, + "grad_norm": 13.641085626515661, + "learning_rate": 9.748472705817013e-07, + "loss": 1.2473, + "step": 143460 + }, + { + "epoch": 1.73, + "grad_norm": 14.362738393833077, + "learning_rate": 9.745957051248378e-07, + "loss": 1.1499, + "step": 143463 + }, + { + "epoch": 1.73, + "grad_norm": 9.208344419198847, + "learning_rate": 9.743441704684288e-07, + "loss": 1.1177, + "step": 143466 + }, + { + "epoch": 1.73, + "grad_norm": 10.492596343659827, + "learning_rate": 9.740926666133344e-07, + "loss": 1.1747, + "step": 143469 + }, + { + "epoch": 1.73, + "grad_norm": 12.625788871341788, + "learning_rate": 9.738411935604098e-07, + "loss": 1.1071, + "step": 143472 + }, + { + "epoch": 1.73, + "grad_norm": 6.872271991786877, + "learning_rate": 9.73589751310512e-07, + "loss": 1.2236, + "step": 143475 + }, + { + "epoch": 1.73, + "grad_norm": 9.685648571081447, + "learning_rate": 9.733383398645014e-07, + "loss": 1.3631, + "step": 143478 + }, + { + "epoch": 1.73, + "grad_norm": 4.001934995623698, + "learning_rate": 9.730869592232362e-07, + "loss": 1.0866, + "step": 143481 + }, + { + "epoch": 1.73, + "grad_norm": 6.051731811278555, + "learning_rate": 9.728356093875747e-07, + "loss": 1.0622, + "step": 143484 + }, + { + "epoch": 1.73, + "grad_norm": 14.910893962740484, + "learning_rate": 9.725842903583716e-07, + "loss": 1.2503, + "step": 143487 + }, + { + "epoch": 1.73, + "grad_norm": 6.3391432596298705, + "learning_rate": 9.723330021364897e-07, + "loss": 1.0875, + "step": 143490 + }, + { + "epoch": 1.73, + "grad_norm": 6.036295784405317, + "learning_rate": 9.720817447227815e-07, + "loss": 1.4256, + "step": 143493 + }, + { + "epoch": 1.73, + "grad_norm": 13.657615989625299, + "learning_rate": 9.718305181181053e-07, + "loss": 0.9943, + "step": 143496 + }, + { + "epoch": 1.73, + "grad_norm": 6.605143005755722, + "learning_rate": 9.715793223233227e-07, + "loss": 0.945, + "step": 143499 + }, + { + "epoch": 1.73, + "grad_norm": 11.605415638488267, + "learning_rate": 9.713281573392853e-07, + "loss": 0.846, + "step": 143502 + }, + { + "epoch": 1.73, + "grad_norm": 4.392220079394844, + "learning_rate": 9.71077023166853e-07, + "loss": 1.2426, + "step": 143505 + }, + { + "epoch": 1.73, + "grad_norm": 2.68084598163636, + "learning_rate": 9.708259198068825e-07, + "loss": 0.8619, + "step": 143508 + }, + { + "epoch": 1.73, + "grad_norm": 8.96718965925836, + "learning_rate": 9.705748472602329e-07, + "loss": 1.2357, + "step": 143511 + }, + { + "epoch": 1.73, + "grad_norm": 8.595621829863305, + "learning_rate": 9.703238055277586e-07, + "loss": 0.9464, + "step": 143514 + }, + { + "epoch": 1.73, + "grad_norm": 5.306698271171341, + "learning_rate": 9.70072794610315e-07, + "loss": 1.0049, + "step": 143517 + }, + { + "epoch": 1.73, + "grad_norm": 13.036859814247851, + "learning_rate": 9.698218145087612e-07, + "loss": 1.0322, + "step": 143520 + }, + { + "epoch": 1.73, + "grad_norm": 7.036955538537563, + "learning_rate": 9.695708652239522e-07, + "loss": 0.8511, + "step": 143523 + }, + { + "epoch": 1.73, + "grad_norm": 10.374087970407919, + "learning_rate": 9.69319946756746e-07, + "loss": 0.8069, + "step": 143526 + }, + { + "epoch": 1.73, + "grad_norm": 8.871759379021873, + "learning_rate": 9.690690591079976e-07, + "loss": 1.2927, + "step": 143529 + }, + { + "epoch": 1.73, + "grad_norm": 21.95497698279286, + "learning_rate": 9.688182022785653e-07, + "loss": 1.1758, + "step": 143532 + }, + { + "epoch": 1.73, + "grad_norm": 17.90383070719225, + "learning_rate": 9.685673762693003e-07, + "loss": 1.1705, + "step": 143535 + }, + { + "epoch": 1.73, + "grad_norm": 7.449592366358713, + "learning_rate": 9.683165810810647e-07, + "loss": 1.2856, + "step": 143538 + }, + { + "epoch": 1.73, + "grad_norm": 17.15982560830913, + "learning_rate": 9.680658167147094e-07, + "loss": 0.9872, + "step": 143541 + }, + { + "epoch": 1.73, + "grad_norm": 8.481336087418773, + "learning_rate": 9.67815083171092e-07, + "loss": 1.5429, + "step": 143544 + }, + { + "epoch": 1.73, + "grad_norm": 6.228279762611069, + "learning_rate": 9.675643804510681e-07, + "loss": 0.9418, + "step": 143547 + }, + { + "epoch": 1.73, + "grad_norm": 3.94599244066465, + "learning_rate": 9.67313708555494e-07, + "loss": 0.9969, + "step": 143550 + }, + { + "epoch": 1.73, + "grad_norm": 7.472466644698902, + "learning_rate": 9.670630674852266e-07, + "loss": 0.9756, + "step": 143553 + }, + { + "epoch": 1.73, + "grad_norm": 8.810682934416008, + "learning_rate": 9.668124572411198e-07, + "loss": 1.0694, + "step": 143556 + }, + { + "epoch": 1.73, + "grad_norm": 25.673065298938965, + "learning_rate": 9.665618778240249e-07, + "loss": 1.2758, + "step": 143559 + }, + { + "epoch": 1.73, + "grad_norm": 13.694350969871987, + "learning_rate": 9.663113292348014e-07, + "loss": 1.3159, + "step": 143562 + }, + { + "epoch": 1.73, + "grad_norm": 19.479656509967334, + "learning_rate": 9.660608114743054e-07, + "loss": 1.1201, + "step": 143565 + }, + { + "epoch": 1.73, + "grad_norm": 5.038938017253828, + "learning_rate": 9.65810324543388e-07, + "loss": 1.2049, + "step": 143568 + }, + { + "epoch": 1.73, + "grad_norm": 9.327811071305216, + "learning_rate": 9.655598684429057e-07, + "loss": 1.0906, + "step": 143571 + }, + { + "epoch": 1.73, + "grad_norm": 6.883324060512418, + "learning_rate": 9.653094431737154e-07, + "loss": 1.0524, + "step": 143574 + }, + { + "epoch": 1.73, + "grad_norm": 12.03449086001208, + "learning_rate": 9.650590487366673e-07, + "loss": 1.3845, + "step": 143577 + }, + { + "epoch": 1.73, + "grad_norm": 6.920965132598412, + "learning_rate": 9.648086851326199e-07, + "loss": 1.2656, + "step": 143580 + }, + { + "epoch": 1.73, + "grad_norm": 5.047270592859453, + "learning_rate": 9.645583523624246e-07, + "loss": 0.9405, + "step": 143583 + }, + { + "epoch": 1.73, + "grad_norm": 4.1008255066637345, + "learning_rate": 9.643080504269374e-07, + "loss": 1.2954, + "step": 143586 + }, + { + "epoch": 1.73, + "grad_norm": 8.763337090361283, + "learning_rate": 9.640577793270124e-07, + "loss": 0.9397, + "step": 143589 + }, + { + "epoch": 1.73, + "grad_norm": 10.479118026013811, + "learning_rate": 9.63807539063505e-07, + "loss": 0.9876, + "step": 143592 + }, + { + "epoch": 1.73, + "grad_norm": 6.6985301788129705, + "learning_rate": 9.635573296372657e-07, + "loss": 1.0655, + "step": 143595 + }, + { + "epoch": 1.73, + "grad_norm": 11.71329069765757, + "learning_rate": 9.63307151049152e-07, + "loss": 0.9967, + "step": 143598 + }, + { + "epoch": 1.73, + "grad_norm": 17.183502869985592, + "learning_rate": 9.630570033000141e-07, + "loss": 1.394, + "step": 143601 + }, + { + "epoch": 1.73, + "grad_norm": 18.022136326752456, + "learning_rate": 9.62806886390708e-07, + "loss": 1.0891, + "step": 143604 + }, + { + "epoch": 1.73, + "grad_norm": 9.105368197792085, + "learning_rate": 9.625568003220886e-07, + "loss": 1.2477, + "step": 143607 + }, + { + "epoch": 1.73, + "grad_norm": 4.988369247043284, + "learning_rate": 9.623067450950052e-07, + "loss": 0.8543, + "step": 143610 + }, + { + "epoch": 1.73, + "grad_norm": 15.448289221997229, + "learning_rate": 9.620567207103138e-07, + "loss": 1.0439, + "step": 143613 + }, + { + "epoch": 1.73, + "grad_norm": 6.288979677484678, + "learning_rate": 9.618067271688691e-07, + "loss": 1.0156, + "step": 143616 + }, + { + "epoch": 1.73, + "grad_norm": 7.735720313977525, + "learning_rate": 9.615567644715218e-07, + "loss": 1.4961, + "step": 143619 + }, + { + "epoch": 1.73, + "grad_norm": 8.962192885116629, + "learning_rate": 9.613068326191254e-07, + "loss": 1.1304, + "step": 143622 + }, + { + "epoch": 1.73, + "grad_norm": 3.479248359432423, + "learning_rate": 9.610569316125329e-07, + "loss": 1.136, + "step": 143625 + }, + { + "epoch": 1.73, + "grad_norm": 42.29140318346941, + "learning_rate": 9.608070614525967e-07, + "loss": 1.5149, + "step": 143628 + }, + { + "epoch": 1.73, + "grad_norm": 15.253791362098701, + "learning_rate": 9.605572221401704e-07, + "loss": 1.0235, + "step": 143631 + }, + { + "epoch": 1.73, + "grad_norm": 20.362259562174877, + "learning_rate": 9.60307413676107e-07, + "loss": 1.1399, + "step": 143634 + }, + { + "epoch": 1.73, + "grad_norm": 12.029391109339466, + "learning_rate": 9.600576360612578e-07, + "loss": 0.7803, + "step": 143637 + }, + { + "epoch": 1.73, + "grad_norm": 8.339670808573985, + "learning_rate": 9.598078892964768e-07, + "loss": 0.9986, + "step": 143640 + }, + { + "epoch": 1.73, + "grad_norm": 18.906730026886834, + "learning_rate": 9.595581733826142e-07, + "loss": 1.3584, + "step": 143643 + }, + { + "epoch": 1.73, + "grad_norm": 3.6732724850165814, + "learning_rate": 9.59308488320524e-07, + "loss": 1.2695, + "step": 143646 + }, + { + "epoch": 1.73, + "grad_norm": 20.402976032232623, + "learning_rate": 9.590588341110562e-07, + "loss": 0.9909, + "step": 143649 + }, + { + "epoch": 1.73, + "grad_norm": 5.537504915608707, + "learning_rate": 9.58809210755064e-07, + "loss": 1.341, + "step": 143652 + }, + { + "epoch": 1.73, + "grad_norm": 5.66932912890821, + "learning_rate": 9.585596182533995e-07, + "loss": 1.2941, + "step": 143655 + }, + { + "epoch": 1.73, + "grad_norm": 12.562220379687558, + "learning_rate": 9.583100566069138e-07, + "loss": 0.9957, + "step": 143658 + }, + { + "epoch": 1.73, + "grad_norm": 53.675157192228504, + "learning_rate": 9.58060525816462e-07, + "loss": 1.3555, + "step": 143661 + }, + { + "epoch": 1.73, + "grad_norm": 6.100882766206927, + "learning_rate": 9.578110258828921e-07, + "loss": 1.0933, + "step": 143664 + }, + { + "epoch": 1.73, + "grad_norm": 27.42443610981965, + "learning_rate": 9.575615568070539e-07, + "loss": 0.9897, + "step": 143667 + }, + { + "epoch": 1.73, + "grad_norm": 11.504381409546113, + "learning_rate": 9.573121185898016e-07, + "loss": 0.7724, + "step": 143670 + }, + { + "epoch": 1.73, + "grad_norm": 3.2685581550987415, + "learning_rate": 9.570627112319876e-07, + "loss": 1.1761, + "step": 143673 + }, + { + "epoch": 1.73, + "grad_norm": 6.506728649875216, + "learning_rate": 9.568133347344589e-07, + "loss": 1.4448, + "step": 143676 + }, + { + "epoch": 1.73, + "grad_norm": 9.211193477250928, + "learning_rate": 9.565639890980693e-07, + "loss": 1.1104, + "step": 143679 + }, + { + "epoch": 1.73, + "grad_norm": 4.092452644745198, + "learning_rate": 9.563146743236718e-07, + "loss": 1.4916, + "step": 143682 + }, + { + "epoch": 1.73, + "grad_norm": 10.888446965752374, + "learning_rate": 9.56065390412112e-07, + "loss": 1.0887, + "step": 143685 + }, + { + "epoch": 1.73, + "grad_norm": 11.589682849386712, + "learning_rate": 9.55816137364246e-07, + "loss": 1.2091, + "step": 143688 + }, + { + "epoch": 1.73, + "grad_norm": 10.949214165249327, + "learning_rate": 9.555669151809198e-07, + "loss": 1.005, + "step": 143691 + }, + { + "epoch": 1.73, + "grad_norm": 9.630162552832552, + "learning_rate": 9.553177238629862e-07, + "loss": 1.4321, + "step": 143694 + }, + { + "epoch": 1.73, + "grad_norm": 11.441133569681753, + "learning_rate": 9.550685634112944e-07, + "loss": 1.0437, + "step": 143697 + }, + { + "epoch": 1.73, + "grad_norm": 3.1625944632864886, + "learning_rate": 9.54819433826698e-07, + "loss": 1.2633, + "step": 143700 + }, + { + "epoch": 1.73, + "grad_norm": 8.392382524685722, + "learning_rate": 9.545703351100433e-07, + "loss": 1.0607, + "step": 143703 + }, + { + "epoch": 1.73, + "grad_norm": 12.293210865748787, + "learning_rate": 9.543212672621837e-07, + "loss": 1.37, + "step": 143706 + }, + { + "epoch": 1.73, + "grad_norm": 32.43498093199285, + "learning_rate": 9.540722302839656e-07, + "loss": 1.1956, + "step": 143709 + }, + { + "epoch": 1.73, + "grad_norm": 8.869788990390658, + "learning_rate": 9.5382322417624e-07, + "loss": 1.248, + "step": 143712 + }, + { + "epoch": 1.73, + "grad_norm": 4.9981050899542545, + "learning_rate": 9.535742489398592e-07, + "loss": 1.2641, + "step": 143715 + }, + { + "epoch": 1.73, + "grad_norm": 8.77454652793095, + "learning_rate": 9.533253045756697e-07, + "loss": 1.347, + "step": 143718 + }, + { + "epoch": 1.73, + "grad_norm": 6.4980203103555105, + "learning_rate": 9.53076391084522e-07, + "loss": 1.1893, + "step": 143721 + }, + { + "epoch": 1.73, + "grad_norm": 5.440973503893193, + "learning_rate": 9.528275084672678e-07, + "loss": 1.1716, + "step": 143724 + }, + { + "epoch": 1.73, + "grad_norm": 2.501489083453975, + "learning_rate": 9.525786567247552e-07, + "loss": 1.0302, + "step": 143727 + }, + { + "epoch": 1.73, + "grad_norm": 3.787429861293459, + "learning_rate": 9.523298358578304e-07, + "loss": 1.0811, + "step": 143730 + }, + { + "epoch": 1.73, + "grad_norm": 13.533224665879464, + "learning_rate": 9.520810458673446e-07, + "loss": 0.8495, + "step": 143733 + }, + { + "epoch": 1.73, + "grad_norm": 5.177653684663286, + "learning_rate": 9.518322867541474e-07, + "loss": 0.9486, + "step": 143736 + }, + { + "epoch": 1.73, + "grad_norm": 6.724967895773907, + "learning_rate": 9.515835585190869e-07, + "loss": 1.1712, + "step": 143739 + }, + { + "epoch": 1.73, + "grad_norm": 4.62403259533822, + "learning_rate": 9.513348611630147e-07, + "loss": 0.9445, + "step": 143742 + }, + { + "epoch": 1.73, + "grad_norm": 9.744287288118247, + "learning_rate": 9.510861946867756e-07, + "loss": 1.218, + "step": 143745 + }, + { + "epoch": 1.73, + "grad_norm": 15.354596005014141, + "learning_rate": 9.508375590912222e-07, + "loss": 1.1152, + "step": 143748 + }, + { + "epoch": 1.73, + "grad_norm": 5.3131396208424295, + "learning_rate": 9.505889543771985e-07, + "loss": 0.772, + "step": 143751 + }, + { + "epoch": 1.73, + "grad_norm": 15.899348269139843, + "learning_rate": 9.503403805455568e-07, + "loss": 1.0346, + "step": 143754 + }, + { + "epoch": 1.73, + "grad_norm": 10.347978056957183, + "learning_rate": 9.500918375971424e-07, + "loss": 1.3195, + "step": 143757 + }, + { + "epoch": 1.73, + "grad_norm": 7.8016949800044095, + "learning_rate": 9.498433255328043e-07, + "loss": 1.0391, + "step": 143760 + }, + { + "epoch": 1.73, + "grad_norm": 4.246413792967876, + "learning_rate": 9.495948443533908e-07, + "loss": 1.3489, + "step": 143763 + }, + { + "epoch": 1.73, + "grad_norm": 6.316525217402418, + "learning_rate": 9.493463940597525e-07, + "loss": 1.2016, + "step": 143766 + }, + { + "epoch": 1.73, + "grad_norm": 19.110770432825817, + "learning_rate": 9.490979746527351e-07, + "loss": 0.959, + "step": 143769 + }, + { + "epoch": 1.73, + "grad_norm": 4.783176851569399, + "learning_rate": 9.488495861331837e-07, + "loss": 1.1147, + "step": 143772 + }, + { + "epoch": 1.73, + "grad_norm": 4.27864035675298, + "learning_rate": 9.486012285019486e-07, + "loss": 1.0962, + "step": 143775 + }, + { + "epoch": 1.73, + "grad_norm": 11.480033969310478, + "learning_rate": 9.483529017598769e-07, + "loss": 1.4961, + "step": 143778 + }, + { + "epoch": 1.73, + "grad_norm": 23.90837185433739, + "learning_rate": 9.481046059078192e-07, + "loss": 1.4991, + "step": 143781 + }, + { + "epoch": 1.73, + "grad_norm": 11.698024635300204, + "learning_rate": 9.47856340946618e-07, + "loss": 0.8687, + "step": 143784 + }, + { + "epoch": 1.73, + "grad_norm": 4.1113445158587965, + "learning_rate": 9.476081068771215e-07, + "loss": 1.3048, + "step": 143787 + }, + { + "epoch": 1.73, + "grad_norm": 10.4230154407675, + "learning_rate": 9.473599037001813e-07, + "loss": 1.3479, + "step": 143790 + }, + { + "epoch": 1.73, + "grad_norm": 18.015954200457458, + "learning_rate": 9.471117314166378e-07, + "loss": 1.0549, + "step": 143793 + }, + { + "epoch": 1.73, + "grad_norm": 5.824508188582079, + "learning_rate": 9.468635900273438e-07, + "loss": 0.8447, + "step": 143796 + }, + { + "epoch": 1.73, + "grad_norm": 3.713603093927358, + "learning_rate": 9.466154795331406e-07, + "loss": 1.0619, + "step": 143799 + }, + { + "epoch": 1.73, + "grad_norm": 10.44060390126355, + "learning_rate": 9.463673999348788e-07, + "loss": 1.055, + "step": 143802 + }, + { + "epoch": 1.73, + "grad_norm": 18.61689981221174, + "learning_rate": 9.461193512334032e-07, + "loss": 1.169, + "step": 143805 + }, + { + "epoch": 1.73, + "grad_norm": 14.593432454034252, + "learning_rate": 9.458713334295632e-07, + "loss": 1.1838, + "step": 143808 + }, + { + "epoch": 1.73, + "grad_norm": 13.000122235890263, + "learning_rate": 9.456233465242037e-07, + "loss": 1.3916, + "step": 143811 + }, + { + "epoch": 1.73, + "grad_norm": 6.446282296002108, + "learning_rate": 9.453753905181673e-07, + "loss": 1.1663, + "step": 143814 + }, + { + "epoch": 1.73, + "grad_norm": 9.785405273320242, + "learning_rate": 9.451274654123044e-07, + "loss": 1.4008, + "step": 143817 + }, + { + "epoch": 1.73, + "grad_norm": 5.302317778754347, + "learning_rate": 9.448795712074588e-07, + "loss": 1.5194, + "step": 143820 + }, + { + "epoch": 1.73, + "grad_norm": 7.6188995611898, + "learning_rate": 9.446317079044797e-07, + "loss": 1.3301, + "step": 143823 + }, + { + "epoch": 1.73, + "grad_norm": 18.14647411972912, + "learning_rate": 9.443838755042101e-07, + "loss": 0.9606, + "step": 143826 + }, + { + "epoch": 1.73, + "grad_norm": 17.488915535185736, + "learning_rate": 9.441360740074956e-07, + "loss": 1.1448, + "step": 143829 + }, + { + "epoch": 1.73, + "grad_norm": 5.650620123701824, + "learning_rate": 9.438883034151846e-07, + "loss": 1.5007, + "step": 143832 + }, + { + "epoch": 1.73, + "grad_norm": 7.398689020517913, + "learning_rate": 9.43640563728121e-07, + "loss": 1.1226, + "step": 143835 + }, + { + "epoch": 1.73, + "grad_norm": 7.570170365369952, + "learning_rate": 9.433928549471483e-07, + "loss": 1.1288, + "step": 143838 + }, + { + "epoch": 1.73, + "grad_norm": 7.1839711061644955, + "learning_rate": 9.431451770731137e-07, + "loss": 0.9531, + "step": 143841 + }, + { + "epoch": 1.73, + "grad_norm": 17.848760248288844, + "learning_rate": 9.428975301068621e-07, + "loss": 1.0837, + "step": 143844 + }, + { + "epoch": 1.73, + "grad_norm": 4.870753298516808, + "learning_rate": 9.426499140492384e-07, + "loss": 1.0789, + "step": 143847 + }, + { + "epoch": 1.73, + "grad_norm": 9.057070102599686, + "learning_rate": 9.424023289010909e-07, + "loss": 0.7692, + "step": 143850 + }, + { + "epoch": 1.73, + "grad_norm": 7.862590821033699, + "learning_rate": 9.421547746632609e-07, + "loss": 1.3106, + "step": 143853 + }, + { + "epoch": 1.73, + "grad_norm": 4.167138462856185, + "learning_rate": 9.419072513365923e-07, + "loss": 1.4771, + "step": 143856 + }, + { + "epoch": 1.73, + "grad_norm": 9.352489113530112, + "learning_rate": 9.416597589219323e-07, + "loss": 0.8065, + "step": 143859 + }, + { + "epoch": 1.73, + "grad_norm": 48.55413130458045, + "learning_rate": 9.414122974201256e-07, + "loss": 1.161, + "step": 143862 + }, + { + "epoch": 1.73, + "grad_norm": 7.956100765639471, + "learning_rate": 9.411648668320151e-07, + "loss": 0.8695, + "step": 143865 + }, + { + "epoch": 1.73, + "grad_norm": 5.391401209089552, + "learning_rate": 9.409174671584454e-07, + "loss": 1.0307, + "step": 143868 + }, + { + "epoch": 1.73, + "grad_norm": 12.336926656580586, + "learning_rate": 9.406700984002603e-07, + "loss": 1.2609, + "step": 143871 + }, + { + "epoch": 1.73, + "grad_norm": 5.811990336273128, + "learning_rate": 9.404227605583083e-07, + "loss": 1.3256, + "step": 143874 + }, + { + "epoch": 1.73, + "grad_norm": 6.86006991948568, + "learning_rate": 9.401754536334296e-07, + "loss": 1.237, + "step": 143877 + }, + { + "epoch": 1.73, + "grad_norm": 13.53409425247213, + "learning_rate": 9.399281776264669e-07, + "loss": 0.8054, + "step": 143880 + }, + { + "epoch": 1.73, + "grad_norm": 5.764080348466478, + "learning_rate": 9.396809325382661e-07, + "loss": 1.2438, + "step": 143883 + }, + { + "epoch": 1.73, + "grad_norm": 55.46423826421649, + "learning_rate": 9.394337183696711e-07, + "loss": 1.2784, + "step": 143886 + }, + { + "epoch": 1.73, + "grad_norm": 7.065291235322957, + "learning_rate": 9.391865351215257e-07, + "loss": 1.1741, + "step": 143889 + }, + { + "epoch": 1.73, + "grad_norm": 37.3918742340984, + "learning_rate": 9.389393827946725e-07, + "loss": 1.1628, + "step": 143892 + }, + { + "epoch": 1.73, + "grad_norm": 13.082721501021688, + "learning_rate": 9.386922613899552e-07, + "loss": 1.4692, + "step": 143895 + }, + { + "epoch": 1.73, + "grad_norm": 25.422898114331904, + "learning_rate": 9.384451709082187e-07, + "loss": 1.1151, + "step": 143898 + }, + { + "epoch": 1.73, + "grad_norm": 6.99905099765415, + "learning_rate": 9.381981113503036e-07, + "loss": 1.4403, + "step": 143901 + }, + { + "epoch": 1.73, + "grad_norm": 32.41254622454663, + "learning_rate": 9.379510827170557e-07, + "loss": 1.3718, + "step": 143904 + }, + { + "epoch": 1.73, + "grad_norm": 17.433006306832112, + "learning_rate": 9.377040850093144e-07, + "loss": 0.9529, + "step": 143907 + }, + { + "epoch": 1.73, + "grad_norm": 4.949675680527812, + "learning_rate": 9.374571182279258e-07, + "loss": 1.0642, + "step": 143910 + }, + { + "epoch": 1.73, + "grad_norm": 8.273314311305999, + "learning_rate": 9.372101823737311e-07, + "loss": 0.8419, + "step": 143913 + }, + { + "epoch": 1.73, + "grad_norm": 8.016244756261434, + "learning_rate": 9.369632774475757e-07, + "loss": 1.0129, + "step": 143916 + }, + { + "epoch": 1.73, + "grad_norm": 11.84695586752071, + "learning_rate": 9.367164034502996e-07, + "loss": 1.3434, + "step": 143919 + }, + { + "epoch": 1.73, + "grad_norm": 5.698615063979243, + "learning_rate": 9.364695603827445e-07, + "loss": 1.0175, + "step": 143922 + }, + { + "epoch": 1.73, + "grad_norm": 5.711107496579186, + "learning_rate": 9.362227482457542e-07, + "loss": 1.4195, + "step": 143925 + }, + { + "epoch": 1.73, + "grad_norm": 9.476895517773103, + "learning_rate": 9.359759670401702e-07, + "loss": 1.1522, + "step": 143928 + }, + { + "epoch": 1.73, + "grad_norm": 6.376173345195358, + "learning_rate": 9.357292167668386e-07, + "loss": 1.0074, + "step": 143931 + }, + { + "epoch": 1.73, + "grad_norm": 23.19964321928439, + "learning_rate": 9.354824974265952e-07, + "loss": 1.2396, + "step": 143934 + }, + { + "epoch": 1.73, + "grad_norm": 4.55440086365135, + "learning_rate": 9.352358090202851e-07, + "loss": 1.0554, + "step": 143937 + }, + { + "epoch": 1.73, + "grad_norm": 5.513756071271861, + "learning_rate": 9.34989151548753e-07, + "loss": 1.2097, + "step": 143940 + }, + { + "epoch": 1.73, + "grad_norm": 7.766862702224855, + "learning_rate": 9.347425250128362e-07, + "loss": 1.2058, + "step": 143943 + }, + { + "epoch": 1.73, + "grad_norm": 6.755687571371902, + "learning_rate": 9.344959294133771e-07, + "loss": 0.7974, + "step": 143946 + }, + { + "epoch": 1.73, + "grad_norm": 18.110931633619977, + "learning_rate": 9.342493647512174e-07, + "loss": 0.7965, + "step": 143949 + }, + { + "epoch": 1.73, + "grad_norm": 7.636714464378433, + "learning_rate": 9.340028310271998e-07, + "loss": 1.0673, + "step": 143952 + }, + { + "epoch": 1.73, + "grad_norm": 10.027840634837123, + "learning_rate": 9.337563282421646e-07, + "loss": 0.9676, + "step": 143955 + }, + { + "epoch": 1.73, + "grad_norm": 7.938252690334583, + "learning_rate": 9.335098563969558e-07, + "loss": 0.7869, + "step": 143958 + }, + { + "epoch": 1.73, + "grad_norm": 36.98240826057073, + "learning_rate": 9.332634154924114e-07, + "loss": 0.8362, + "step": 143961 + }, + { + "epoch": 1.73, + "grad_norm": 11.636373282063884, + "learning_rate": 9.330170055293708e-07, + "loss": 1.3187, + "step": 143964 + }, + { + "epoch": 1.73, + "grad_norm": 7.489283045737375, + "learning_rate": 9.327706265086778e-07, + "loss": 0.8578, + "step": 143967 + }, + { + "epoch": 1.73, + "grad_norm": 4.681089637065223, + "learning_rate": 9.32524278431175e-07, + "loss": 1.1202, + "step": 143970 + }, + { + "epoch": 1.73, + "grad_norm": 5.560965635755312, + "learning_rate": 9.322779612976985e-07, + "loss": 1.1514, + "step": 143973 + }, + { + "epoch": 1.73, + "grad_norm": 5.38614831301319, + "learning_rate": 9.320316751090919e-07, + "loss": 1.5085, + "step": 143976 + }, + { + "epoch": 1.73, + "grad_norm": 24.228894021706292, + "learning_rate": 9.317854198661935e-07, + "loss": 1.4362, + "step": 143979 + }, + { + "epoch": 1.73, + "grad_norm": 10.086844920720518, + "learning_rate": 9.315391955698483e-07, + "loss": 0.8716, + "step": 143982 + }, + { + "epoch": 1.73, + "grad_norm": 4.957287311219949, + "learning_rate": 9.312930022208921e-07, + "loss": 1.1825, + "step": 143985 + }, + { + "epoch": 1.73, + "grad_norm": 14.642329775988262, + "learning_rate": 9.310468398201655e-07, + "loss": 1.21, + "step": 143988 + }, + { + "epoch": 1.73, + "grad_norm": 3.429861473932247, + "learning_rate": 9.308007083685089e-07, + "loss": 1.066, + "step": 143991 + }, + { + "epoch": 1.73, + "grad_norm": 12.319119931789043, + "learning_rate": 9.305546078667627e-07, + "loss": 1.3752, + "step": 143994 + }, + { + "epoch": 1.73, + "grad_norm": 6.045829626235708, + "learning_rate": 9.303085383157695e-07, + "loss": 1.1336, + "step": 143997 + }, + { + "epoch": 1.73, + "grad_norm": 11.353666438406602, + "learning_rate": 9.300624997163632e-07, + "loss": 1.0977, + "step": 144000 + }, + { + "epoch": 1.73, + "grad_norm": 4.077731859558813, + "learning_rate": 9.298164920693898e-07, + "loss": 1.1544, + "step": 144003 + }, + { + "epoch": 1.73, + "grad_norm": 4.79823311238196, + "learning_rate": 9.295705153756829e-07, + "loss": 1.6737, + "step": 144006 + }, + { + "epoch": 1.73, + "grad_norm": 8.55423628631843, + "learning_rate": 9.293245696360853e-07, + "loss": 1.132, + "step": 144009 + }, + { + "epoch": 1.73, + "grad_norm": 9.443022432332004, + "learning_rate": 9.290786548514374e-07, + "loss": 1.143, + "step": 144012 + }, + { + "epoch": 1.73, + "grad_norm": 6.8101679437273495, + "learning_rate": 9.288327710225741e-07, + "loss": 1.4778, + "step": 144015 + }, + { + "epoch": 1.73, + "grad_norm": 31.01954350058156, + "learning_rate": 9.285869181503382e-07, + "loss": 1.1857, + "step": 144018 + }, + { + "epoch": 1.73, + "grad_norm": 17.293214001556617, + "learning_rate": 9.283410962355666e-07, + "loss": 1.1206, + "step": 144021 + }, + { + "epoch": 1.73, + "grad_norm": 11.60974150303671, + "learning_rate": 9.28095305279102e-07, + "loss": 1.1749, + "step": 144024 + }, + { + "epoch": 1.73, + "grad_norm": 10.681032444241646, + "learning_rate": 9.278495452817804e-07, + "loss": 1.2076, + "step": 144027 + }, + { + "epoch": 1.73, + "grad_norm": 10.22695638014278, + "learning_rate": 9.276038162444378e-07, + "loss": 1.3923, + "step": 144030 + }, + { + "epoch": 1.73, + "grad_norm": 24.242402183401897, + "learning_rate": 9.273581181679158e-07, + "loss": 0.8661, + "step": 144033 + }, + { + "epoch": 1.73, + "grad_norm": 3.492684866660224, + "learning_rate": 9.271124510530527e-07, + "loss": 1.169, + "step": 144036 + }, + { + "epoch": 1.73, + "grad_norm": 26.291043280313406, + "learning_rate": 9.268668149006898e-07, + "loss": 0.9223, + "step": 144039 + }, + { + "epoch": 1.73, + "grad_norm": 6.060645100566005, + "learning_rate": 9.266212097116589e-07, + "loss": 1.3917, + "step": 144042 + }, + { + "epoch": 1.73, + "grad_norm": 10.153956823291916, + "learning_rate": 9.263756354868037e-07, + "loss": 1.0673, + "step": 144045 + }, + { + "epoch": 1.73, + "grad_norm": 5.764446118066874, + "learning_rate": 9.261300922269589e-07, + "loss": 1.0672, + "step": 144048 + }, + { + "epoch": 1.73, + "grad_norm": 13.53475437982918, + "learning_rate": 9.258845799329653e-07, + "loss": 1.2705, + "step": 144051 + }, + { + "epoch": 1.73, + "grad_norm": 5.694287192865772, + "learning_rate": 9.256390986056574e-07, + "loss": 0.9827, + "step": 144054 + }, + { + "epoch": 1.73, + "grad_norm": 11.301887288600893, + "learning_rate": 9.253936482458747e-07, + "loss": 1.1282, + "step": 144057 + }, + { + "epoch": 1.73, + "grad_norm": 4.127828399594874, + "learning_rate": 9.251482288544544e-07, + "loss": 1.2438, + "step": 144060 + }, + { + "epoch": 1.73, + "grad_norm": 20.23153029359111, + "learning_rate": 9.249028404322346e-07, + "loss": 1.1942, + "step": 144063 + }, + { + "epoch": 1.73, + "grad_norm": 2.977902938014594, + "learning_rate": 9.246574829800548e-07, + "loss": 1.382, + "step": 144066 + }, + { + "epoch": 1.73, + "grad_norm": 7.0707787381231295, + "learning_rate": 9.244121564987496e-07, + "loss": 1.1742, + "step": 144069 + }, + { + "epoch": 1.73, + "grad_norm": 8.893214840674956, + "learning_rate": 9.241668609891552e-07, + "loss": 1.2734, + "step": 144072 + }, + { + "epoch": 1.73, + "grad_norm": 11.889249414599123, + "learning_rate": 9.239215964521098e-07, + "loss": 1.2372, + "step": 144075 + }, + { + "epoch": 1.73, + "grad_norm": 2.840644043961793, + "learning_rate": 9.236763628884538e-07, + "loss": 1.1008, + "step": 144078 + }, + { + "epoch": 1.73, + "grad_norm": 4.270059135043809, + "learning_rate": 9.234311602990175e-07, + "loss": 1.4194, + "step": 144081 + }, + { + "epoch": 1.73, + "grad_norm": 57.909794173840204, + "learning_rate": 9.231859886846429e-07, + "loss": 1.1555, + "step": 144084 + }, + { + "epoch": 1.73, + "grad_norm": 20.815302190437812, + "learning_rate": 9.229408480461654e-07, + "loss": 1.0888, + "step": 144087 + }, + { + "epoch": 1.73, + "grad_norm": 16.329224968459215, + "learning_rate": 9.226957383844204e-07, + "loss": 1.2977, + "step": 144090 + }, + { + "epoch": 1.73, + "grad_norm": 6.525002617123372, + "learning_rate": 9.224506597002469e-07, + "loss": 1.3595, + "step": 144093 + }, + { + "epoch": 1.73, + "grad_norm": 11.89507458889535, + "learning_rate": 9.222056119944777e-07, + "loss": 1.2074, + "step": 144096 + }, + { + "epoch": 1.73, + "grad_norm": 2.91802327769629, + "learning_rate": 9.219605952679512e-07, + "loss": 1.2362, + "step": 144099 + }, + { + "epoch": 1.73, + "grad_norm": 6.388441331441318, + "learning_rate": 9.217156095215029e-07, + "loss": 1.0197, + "step": 144102 + }, + { + "epoch": 1.73, + "grad_norm": 9.042530182820366, + "learning_rate": 9.214706547559716e-07, + "loss": 1.5657, + "step": 144105 + }, + { + "epoch": 1.73, + "grad_norm": 7.424356050095329, + "learning_rate": 9.212257309721883e-07, + "loss": 1.183, + "step": 144108 + }, + { + "epoch": 1.73, + "grad_norm": 11.548835368250536, + "learning_rate": 9.209808381709939e-07, + "loss": 1.0214, + "step": 144111 + }, + { + "epoch": 1.73, + "grad_norm": 10.776162381553718, + "learning_rate": 9.207359763532187e-07, + "loss": 1.1268, + "step": 144114 + }, + { + "epoch": 1.73, + "grad_norm": 11.627979432980638, + "learning_rate": 9.20491145519703e-07, + "loss": 1.1393, + "step": 144117 + }, + { + "epoch": 1.73, + "grad_norm": 5.984088567647337, + "learning_rate": 9.202463456712807e-07, + "loss": 1.6309, + "step": 144120 + }, + { + "epoch": 1.73, + "grad_norm": 4.625343063981797, + "learning_rate": 9.200015768087866e-07, + "loss": 1.4012, + "step": 144123 + }, + { + "epoch": 1.73, + "grad_norm": 13.353410059465313, + "learning_rate": 9.197568389330558e-07, + "loss": 1.3293, + "step": 144126 + }, + { + "epoch": 1.73, + "grad_norm": 4.373586930376939, + "learning_rate": 9.195121320449263e-07, + "loss": 1.3496, + "step": 144129 + }, + { + "epoch": 1.73, + "grad_norm": 19.369945002033063, + "learning_rate": 9.192674561452298e-07, + "loss": 0.7678, + "step": 144132 + }, + { + "epoch": 1.73, + "grad_norm": 11.337295884761371, + "learning_rate": 9.190228112348032e-07, + "loss": 1.2223, + "step": 144135 + }, + { + "epoch": 1.73, + "grad_norm": 5.158245775571246, + "learning_rate": 9.187781973144794e-07, + "loss": 0.8884, + "step": 144138 + }, + { + "epoch": 1.73, + "grad_norm": 9.163884667509002, + "learning_rate": 9.185336143850953e-07, + "loss": 1.1885, + "step": 144141 + }, + { + "epoch": 1.73, + "grad_norm": 14.123066419134265, + "learning_rate": 9.182890624474838e-07, + "loss": 1.0783, + "step": 144144 + }, + { + "epoch": 1.73, + "grad_norm": 7.712520095403407, + "learning_rate": 9.18044541502483e-07, + "loss": 1.4174, + "step": 144147 + }, + { + "epoch": 1.73, + "grad_norm": 4.712664360850224, + "learning_rate": 9.178000515509234e-07, + "loss": 1.2364, + "step": 144150 + }, + { + "epoch": 1.73, + "grad_norm": 4.696006550384128, + "learning_rate": 9.175555925936419e-07, + "loss": 1.2011, + "step": 144153 + }, + { + "epoch": 1.73, + "grad_norm": 8.723225911217861, + "learning_rate": 9.173111646314703e-07, + "loss": 1.1349, + "step": 144156 + }, + { + "epoch": 1.73, + "grad_norm": 2.30239537451551, + "learning_rate": 9.170667676652456e-07, + "loss": 1.6175, + "step": 144159 + }, + { + "epoch": 1.73, + "grad_norm": 10.546992781346917, + "learning_rate": 9.168224016957993e-07, + "loss": 1.1831, + "step": 144162 + }, + { + "epoch": 1.73, + "grad_norm": 11.488905258378846, + "learning_rate": 9.165780667239654e-07, + "loss": 1.0727, + "step": 144165 + }, + { + "epoch": 1.73, + "grad_norm": 6.185544769744871, + "learning_rate": 9.163337627505797e-07, + "loss": 1.3048, + "step": 144168 + }, + { + "epoch": 1.73, + "grad_norm": 10.998208599737394, + "learning_rate": 9.16089489776476e-07, + "loss": 1.3496, + "step": 144171 + }, + { + "epoch": 1.73, + "grad_norm": 8.798882943592043, + "learning_rate": 9.158452478024848e-07, + "loss": 1.6009, + "step": 144174 + }, + { + "epoch": 1.73, + "grad_norm": 3.694220889994374, + "learning_rate": 9.156010368294443e-07, + "loss": 1.0089, + "step": 144177 + }, + { + "epoch": 1.73, + "grad_norm": 16.553595748774864, + "learning_rate": 9.153568568581828e-07, + "loss": 1.2323, + "step": 144180 + }, + { + "epoch": 1.73, + "grad_norm": 13.542513113892147, + "learning_rate": 9.151127078895372e-07, + "loss": 1.015, + "step": 144183 + }, + { + "epoch": 1.73, + "grad_norm": 5.356268773232648, + "learning_rate": 9.148685899243404e-07, + "loss": 1.3014, + "step": 144186 + }, + { + "epoch": 1.73, + "grad_norm": 7.4893216810497965, + "learning_rate": 9.146245029634226e-07, + "loss": 1.1849, + "step": 144189 + }, + { + "epoch": 1.73, + "grad_norm": 6.787329931931033, + "learning_rate": 9.14380447007619e-07, + "loss": 1.2128, + "step": 144192 + }, + { + "epoch": 1.73, + "grad_norm": 8.85610113107307, + "learning_rate": 9.141364220577654e-07, + "loss": 1.2305, + "step": 144195 + }, + { + "epoch": 1.73, + "grad_norm": 6.761492483969977, + "learning_rate": 9.138924281146877e-07, + "loss": 1.1654, + "step": 144198 + }, + { + "epoch": 1.73, + "grad_norm": 8.734282613463208, + "learning_rate": 9.136484651792255e-07, + "loss": 1.1733, + "step": 144201 + }, + { + "epoch": 1.73, + "grad_norm": 8.91208911830453, + "learning_rate": 9.134045332522057e-07, + "loss": 1.3088, + "step": 144204 + }, + { + "epoch": 1.73, + "grad_norm": 3.1829915150847667, + "learning_rate": 9.131606323344644e-07, + "loss": 1.59, + "step": 144207 + }, + { + "epoch": 1.73, + "grad_norm": 6.77745684630659, + "learning_rate": 9.129167624268309e-07, + "loss": 0.9765, + "step": 144210 + }, + { + "epoch": 1.73, + "grad_norm": 4.258464432025584, + "learning_rate": 9.126729235301424e-07, + "loss": 1.3126, + "step": 144213 + }, + { + "epoch": 1.73, + "grad_norm": 63.55303878020131, + "learning_rate": 9.124291156452259e-07, + "loss": 0.9007, + "step": 144216 + }, + { + "epoch": 1.73, + "grad_norm": 7.140542658735817, + "learning_rate": 9.121853387729173e-07, + "loss": 1.1617, + "step": 144219 + }, + { + "epoch": 1.73, + "grad_norm": 12.296090052770493, + "learning_rate": 9.119415929140451e-07, + "loss": 0.9671, + "step": 144222 + }, + { + "epoch": 1.73, + "grad_norm": 4.654933771626021, + "learning_rate": 9.11697878069443e-07, + "loss": 0.7756, + "step": 144225 + }, + { + "epoch": 1.73, + "grad_norm": 13.302581352266312, + "learning_rate": 9.114541942399435e-07, + "loss": 1.2197, + "step": 144228 + }, + { + "epoch": 1.73, + "grad_norm": 5.1199803749629735, + "learning_rate": 9.112105414263761e-07, + "loss": 1.0476, + "step": 144231 + }, + { + "epoch": 1.73, + "grad_norm": 3.1679202258382624, + "learning_rate": 9.109669196295734e-07, + "loss": 1.0607, + "step": 144234 + }, + { + "epoch": 1.73, + "grad_norm": 6.639557521521809, + "learning_rate": 9.107233288503681e-07, + "loss": 0.9288, + "step": 144237 + }, + { + "epoch": 1.73, + "grad_norm": 5.949387431457892, + "learning_rate": 9.104797690895895e-07, + "loss": 0.9045, + "step": 144240 + }, + { + "epoch": 1.73, + "grad_norm": 5.755211283516846, + "learning_rate": 9.102362403480691e-07, + "loss": 1.2347, + "step": 144243 + }, + { + "epoch": 1.73, + "grad_norm": 5.770616995878255, + "learning_rate": 9.099927426266364e-07, + "loss": 1.1345, + "step": 144246 + }, + { + "epoch": 1.73, + "grad_norm": 5.36771127849975, + "learning_rate": 9.097492759261262e-07, + "loss": 1.1436, + "step": 144249 + }, + { + "epoch": 1.73, + "grad_norm": 4.400577286188381, + "learning_rate": 9.095058402473667e-07, + "loss": 1.0844, + "step": 144252 + }, + { + "epoch": 1.73, + "grad_norm": 6.005211233605908, + "learning_rate": 9.092624355911906e-07, + "loss": 0.9716, + "step": 144255 + }, + { + "epoch": 1.73, + "grad_norm": 11.851895358877787, + "learning_rate": 9.09019061958426e-07, + "loss": 1.2023, + "step": 144258 + }, + { + "epoch": 1.73, + "grad_norm": 8.698894812304033, + "learning_rate": 9.08775719349907e-07, + "loss": 1.0412, + "step": 144261 + }, + { + "epoch": 1.73, + "grad_norm": 8.497046543836568, + "learning_rate": 9.085324077664592e-07, + "loss": 0.7917, + "step": 144264 + }, + { + "epoch": 1.73, + "grad_norm": 11.32470972110694, + "learning_rate": 9.082891272089178e-07, + "loss": 1.2726, + "step": 144267 + }, + { + "epoch": 1.73, + "grad_norm": 6.950187673596294, + "learning_rate": 9.080458776781088e-07, + "loss": 0.9641, + "step": 144270 + }, + { + "epoch": 1.73, + "grad_norm": 7.055334110841669, + "learning_rate": 9.078026591748646e-07, + "loss": 1.1176, + "step": 144273 + }, + { + "epoch": 1.73, + "grad_norm": 15.174213402839577, + "learning_rate": 9.075594717000147e-07, + "loss": 1.2744, + "step": 144276 + }, + { + "epoch": 1.73, + "grad_norm": 6.426024577065394, + "learning_rate": 9.073163152543918e-07, + "loss": 1.6942, + "step": 144279 + }, + { + "epoch": 1.73, + "grad_norm": 7.212282558396954, + "learning_rate": 9.070731898388218e-07, + "loss": 1.0987, + "step": 144282 + }, + { + "epoch": 1.73, + "grad_norm": 8.119072925113741, + "learning_rate": 9.068300954541342e-07, + "loss": 0.9222, + "step": 144285 + }, + { + "epoch": 1.74, + "grad_norm": 16.45072001379478, + "learning_rate": 9.065870321011616e-07, + "loss": 1.1085, + "step": 144288 + }, + { + "epoch": 1.74, + "grad_norm": 5.448737942013896, + "learning_rate": 9.063439997807311e-07, + "loss": 1.0843, + "step": 144291 + }, + { + "epoch": 1.74, + "grad_norm": 27.35892455692324, + "learning_rate": 9.061009984936742e-07, + "loss": 0.8921, + "step": 144294 + }, + { + "epoch": 1.74, + "grad_norm": 4.929176490409781, + "learning_rate": 9.05858028240818e-07, + "loss": 0.9041, + "step": 144297 + }, + { + "epoch": 1.74, + "grad_norm": 7.816522304140831, + "learning_rate": 9.05615089022992e-07, + "loss": 1.5576, + "step": 144300 + }, + { + "epoch": 1.74, + "grad_norm": 3.54388951060872, + "learning_rate": 9.053721808410276e-07, + "loss": 1.2146, + "step": 144303 + }, + { + "epoch": 1.74, + "grad_norm": 16.106430632234932, + "learning_rate": 9.051293036957509e-07, + "loss": 1.2673, + "step": 144306 + }, + { + "epoch": 1.74, + "grad_norm": 8.252808281272884, + "learning_rate": 9.048864575879934e-07, + "loss": 1.0379, + "step": 144309 + }, + { + "epoch": 1.74, + "grad_norm": 10.987055983049991, + "learning_rate": 9.046436425185812e-07, + "loss": 1.2405, + "step": 144312 + }, + { + "epoch": 1.74, + "grad_norm": 5.128272662258971, + "learning_rate": 9.044008584883435e-07, + "loss": 1.1203, + "step": 144315 + }, + { + "epoch": 1.74, + "grad_norm": 7.79808396030774, + "learning_rate": 9.041581054981097e-07, + "loss": 1.1409, + "step": 144318 + }, + { + "epoch": 1.74, + "grad_norm": 9.099548083619755, + "learning_rate": 9.039153835487102e-07, + "loss": 1.4714, + "step": 144321 + }, + { + "epoch": 1.74, + "grad_norm": 3.7577895128308167, + "learning_rate": 9.0367269264097e-07, + "loss": 0.9755, + "step": 144324 + }, + { + "epoch": 1.74, + "grad_norm": 7.234068402285579, + "learning_rate": 9.034300327757173e-07, + "loss": 0.9121, + "step": 144327 + }, + { + "epoch": 1.74, + "grad_norm": 3.2162652797550546, + "learning_rate": 9.031874039537814e-07, + "loss": 1.1147, + "step": 144330 + }, + { + "epoch": 1.74, + "grad_norm": 10.30112338040667, + "learning_rate": 9.029448061759904e-07, + "loss": 1.0008, + "step": 144333 + }, + { + "epoch": 1.74, + "grad_norm": 20.40180482300805, + "learning_rate": 9.027022394431729e-07, + "loss": 1.6003, + "step": 144336 + }, + { + "epoch": 1.74, + "grad_norm": 7.553047205193598, + "learning_rate": 9.024597037561544e-07, + "loss": 0.9152, + "step": 144339 + }, + { + "epoch": 1.74, + "grad_norm": 22.400245485935276, + "learning_rate": 9.022171991157647e-07, + "loss": 1.1947, + "step": 144342 + }, + { + "epoch": 1.74, + "grad_norm": 4.411218535122435, + "learning_rate": 9.019747255228317e-07, + "loss": 0.8291, + "step": 144345 + }, + { + "epoch": 1.74, + "grad_norm": 98.8066800874347, + "learning_rate": 9.017322829781827e-07, + "loss": 1.4992, + "step": 144348 + }, + { + "epoch": 1.74, + "grad_norm": 7.941378279423795, + "learning_rate": 9.014898714826415e-07, + "loss": 0.9333, + "step": 144351 + }, + { + "epoch": 1.74, + "grad_norm": 10.981937082982123, + "learning_rate": 9.012474910370384e-07, + "loss": 1.1041, + "step": 144354 + }, + { + "epoch": 1.74, + "grad_norm": 16.35829265160795, + "learning_rate": 9.010051416422005e-07, + "loss": 1.2533, + "step": 144357 + }, + { + "epoch": 1.74, + "grad_norm": 10.036147724376509, + "learning_rate": 9.007628232989551e-07, + "loss": 1.1392, + "step": 144360 + }, + { + "epoch": 1.74, + "grad_norm": 4.86371934820643, + "learning_rate": 9.005205360081292e-07, + "loss": 1.2514, + "step": 144363 + }, + { + "epoch": 1.74, + "grad_norm": 14.197321535988412, + "learning_rate": 9.002782797705501e-07, + "loss": 1.0547, + "step": 144366 + }, + { + "epoch": 1.74, + "grad_norm": 16.440381432160184, + "learning_rate": 9.000360545870413e-07, + "loss": 1.1235, + "step": 144369 + }, + { + "epoch": 1.74, + "grad_norm": 5.026002986458862, + "learning_rate": 8.997938604584322e-07, + "loss": 1.0945, + "step": 144372 + }, + { + "epoch": 1.74, + "grad_norm": 28.29208535496436, + "learning_rate": 8.995516973855501e-07, + "loss": 1.4453, + "step": 144375 + }, + { + "epoch": 1.74, + "grad_norm": 8.566909022053704, + "learning_rate": 8.993095653692186e-07, + "loss": 1.1509, + "step": 144378 + }, + { + "epoch": 1.74, + "grad_norm": 15.475445376927627, + "learning_rate": 8.990674644102648e-07, + "loss": 0.8503, + "step": 144381 + }, + { + "epoch": 1.74, + "grad_norm": 6.019868494357208, + "learning_rate": 8.98825394509516e-07, + "loss": 1.0882, + "step": 144384 + }, + { + "epoch": 1.74, + "grad_norm": 3.2000801962965943, + "learning_rate": 8.985833556677992e-07, + "loss": 1.2452, + "step": 144387 + }, + { + "epoch": 1.74, + "grad_norm": 9.27933996582545, + "learning_rate": 8.983413478859405e-07, + "loss": 0.8198, + "step": 144390 + }, + { + "epoch": 1.74, + "grad_norm": 6.736808010993552, + "learning_rate": 8.980993711647612e-07, + "loss": 1.0305, + "step": 144393 + }, + { + "epoch": 1.74, + "grad_norm": 12.310295754639982, + "learning_rate": 8.978574255050909e-07, + "loss": 1.0524, + "step": 144396 + }, + { + "epoch": 1.74, + "grad_norm": 6.124337838006462, + "learning_rate": 8.976155109077544e-07, + "loss": 1.3069, + "step": 144399 + }, + { + "epoch": 1.74, + "grad_norm": 4.650535639802575, + "learning_rate": 8.973736273735789e-07, + "loss": 1.0451, + "step": 144402 + }, + { + "epoch": 1.74, + "grad_norm": 3.5988460632878665, + "learning_rate": 8.97131774903387e-07, + "loss": 1.4388, + "step": 144405 + }, + { + "epoch": 1.74, + "grad_norm": 10.349452303856907, + "learning_rate": 8.96889953498008e-07, + "loss": 1.0842, + "step": 144408 + }, + { + "epoch": 1.74, + "grad_norm": 5.325524159945386, + "learning_rate": 8.966481631582624e-07, + "loss": 0.8404, + "step": 144411 + }, + { + "epoch": 1.74, + "grad_norm": 9.256507836737951, + "learning_rate": 8.964064038849773e-07, + "loss": 1.1336, + "step": 144414 + }, + { + "epoch": 1.74, + "grad_norm": 2.3227204116445446, + "learning_rate": 8.961646756789799e-07, + "loss": 1.2553, + "step": 144417 + }, + { + "epoch": 1.74, + "grad_norm": 14.863036683334897, + "learning_rate": 8.959229785410916e-07, + "loss": 1.1304, + "step": 144420 + }, + { + "epoch": 1.74, + "grad_norm": 9.491545500482166, + "learning_rate": 8.956813124721386e-07, + "loss": 0.9933, + "step": 144423 + }, + { + "epoch": 1.74, + "grad_norm": 4.652776405300994, + "learning_rate": 8.954396774729469e-07, + "loss": 1.4151, + "step": 144426 + }, + { + "epoch": 1.74, + "grad_norm": 10.017417183366094, + "learning_rate": 8.951980735443411e-07, + "loss": 1.3191, + "step": 144429 + }, + { + "epoch": 1.74, + "grad_norm": 3.0207772309910235, + "learning_rate": 8.949565006871453e-07, + "loss": 0.9161, + "step": 144432 + }, + { + "epoch": 1.74, + "grad_norm": 16.566572511089138, + "learning_rate": 8.947149589021809e-07, + "loss": 0.8947, + "step": 144435 + }, + { + "epoch": 1.74, + "grad_norm": 7.5210159910899845, + "learning_rate": 8.94473448190275e-07, + "loss": 1.0129, + "step": 144438 + }, + { + "epoch": 1.74, + "grad_norm": 16.903849425309048, + "learning_rate": 8.942319685522505e-07, + "loss": 1.593, + "step": 144441 + }, + { + "epoch": 1.74, + "grad_norm": 3.834854530355207, + "learning_rate": 8.939905199889354e-07, + "loss": 0.9989, + "step": 144444 + }, + { + "epoch": 1.74, + "grad_norm": 9.18131056465561, + "learning_rate": 8.937491025011491e-07, + "loss": 1.2024, + "step": 144447 + }, + { + "epoch": 1.74, + "grad_norm": 2.6058786939867593, + "learning_rate": 8.935077160897177e-07, + "loss": 0.9856, + "step": 144450 + }, + { + "epoch": 1.74, + "grad_norm": 3.7108605559188685, + "learning_rate": 8.932663607554636e-07, + "loss": 1.1381, + "step": 144453 + }, + { + "epoch": 1.74, + "grad_norm": 18.597549434331203, + "learning_rate": 8.930250364992121e-07, + "loss": 1.0283, + "step": 144456 + }, + { + "epoch": 1.74, + "grad_norm": 13.278468475595474, + "learning_rate": 8.927837433217845e-07, + "loss": 1.4187, + "step": 144459 + }, + { + "epoch": 1.74, + "grad_norm": 9.260379739503867, + "learning_rate": 8.925424812240058e-07, + "loss": 1.2764, + "step": 144462 + }, + { + "epoch": 1.74, + "grad_norm": 6.00306021440757, + "learning_rate": 8.923012502066986e-07, + "loss": 1.0674, + "step": 144465 + }, + { + "epoch": 1.74, + "grad_norm": 3.4454796546524986, + "learning_rate": 8.920600502706867e-07, + "loss": 0.9833, + "step": 144468 + }, + { + "epoch": 1.74, + "grad_norm": 20.752338587485742, + "learning_rate": 8.918188814167949e-07, + "loss": 1.5471, + "step": 144471 + }, + { + "epoch": 1.74, + "grad_norm": 4.226190990108083, + "learning_rate": 8.915777436458451e-07, + "loss": 1.0031, + "step": 144474 + }, + { + "epoch": 1.74, + "grad_norm": 8.9172230074554, + "learning_rate": 8.913366369586573e-07, + "loss": 1.0206, + "step": 144477 + }, + { + "epoch": 1.74, + "grad_norm": 6.2526060969887665, + "learning_rate": 8.910955613560568e-07, + "loss": 1.0734, + "step": 144480 + }, + { + "epoch": 1.74, + "grad_norm": 10.502780228904403, + "learning_rate": 8.908545168388683e-07, + "loss": 1.3937, + "step": 144483 + }, + { + "epoch": 1.74, + "grad_norm": 3.095638515771637, + "learning_rate": 8.9061350340791e-07, + "loss": 0.967, + "step": 144486 + }, + { + "epoch": 1.74, + "grad_norm": 5.371385146813747, + "learning_rate": 8.903725210640068e-07, + "loss": 1.2172, + "step": 144489 + }, + { + "epoch": 1.74, + "grad_norm": 6.538547668606211, + "learning_rate": 8.901315698079804e-07, + "loss": 1.223, + "step": 144492 + }, + { + "epoch": 1.74, + "grad_norm": 9.318814626542116, + "learning_rate": 8.898906496406556e-07, + "loss": 1.1288, + "step": 144495 + }, + { + "epoch": 1.74, + "grad_norm": 3.7985552196923837, + "learning_rate": 8.896497605628529e-07, + "loss": 1.1808, + "step": 144498 + }, + { + "epoch": 1.74, + "grad_norm": 4.970545270018742, + "learning_rate": 8.894089025753927e-07, + "loss": 1.0765, + "step": 144501 + }, + { + "epoch": 1.74, + "grad_norm": 6.993201027809316, + "learning_rate": 8.891680756790977e-07, + "loss": 1.0434, + "step": 144504 + }, + { + "epoch": 1.74, + "grad_norm": 23.985881272551442, + "learning_rate": 8.889272798747894e-07, + "loss": 1.2836, + "step": 144507 + }, + { + "epoch": 1.74, + "grad_norm": 5.63988624230235, + "learning_rate": 8.886865151632939e-07, + "loss": 1.4272, + "step": 144510 + }, + { + "epoch": 1.74, + "grad_norm": 14.203759538841624, + "learning_rate": 8.884457815454273e-07, + "loss": 1.0962, + "step": 144513 + }, + { + "epoch": 1.74, + "grad_norm": 14.202382491639163, + "learning_rate": 8.882050790220142e-07, + "loss": 1.2846, + "step": 144516 + }, + { + "epoch": 1.74, + "grad_norm": 22.809915294578413, + "learning_rate": 8.879644075938742e-07, + "loss": 0.9064, + "step": 144519 + }, + { + "epoch": 1.74, + "grad_norm": 5.76451227667239, + "learning_rate": 8.877237672618288e-07, + "loss": 0.9026, + "step": 144522 + }, + { + "epoch": 1.74, + "grad_norm": 6.068752169431228, + "learning_rate": 8.874831580267029e-07, + "loss": 1.3182, + "step": 144525 + }, + { + "epoch": 1.74, + "grad_norm": 16.551811229631305, + "learning_rate": 8.872425798893114e-07, + "loss": 0.9822, + "step": 144528 + }, + { + "epoch": 1.74, + "grad_norm": 6.509986648111176, + "learning_rate": 8.870020328504803e-07, + "loss": 1.3415, + "step": 144531 + }, + { + "epoch": 1.74, + "grad_norm": 11.004327651012028, + "learning_rate": 8.867615169110277e-07, + "loss": 1.0504, + "step": 144534 + }, + { + "epoch": 1.74, + "grad_norm": 5.442994993017244, + "learning_rate": 8.865210320717766e-07, + "loss": 1.0029, + "step": 144537 + }, + { + "epoch": 1.74, + "grad_norm": 11.795688530000437, + "learning_rate": 8.862805783335482e-07, + "loss": 1.3826, + "step": 144540 + }, + { + "epoch": 1.74, + "grad_norm": 4.895573064140265, + "learning_rate": 8.860401556971588e-07, + "loss": 0.8276, + "step": 144543 + }, + { + "epoch": 1.74, + "grad_norm": 23.029153918040034, + "learning_rate": 8.857997641634308e-07, + "loss": 1.104, + "step": 144546 + }, + { + "epoch": 1.74, + "grad_norm": 13.852242160865794, + "learning_rate": 8.855594037331861e-07, + "loss": 1.1187, + "step": 144549 + }, + { + "epoch": 1.74, + "grad_norm": 9.01749241769601, + "learning_rate": 8.85319074407246e-07, + "loss": 1.1329, + "step": 144552 + }, + { + "epoch": 1.74, + "grad_norm": 6.286146941993855, + "learning_rate": 8.850787761864265e-07, + "loss": 1.5268, + "step": 144555 + }, + { + "epoch": 1.74, + "grad_norm": 6.542805052061282, + "learning_rate": 8.848385090715516e-07, + "loss": 1.2527, + "step": 144558 + }, + { + "epoch": 1.74, + "grad_norm": 8.145383414736605, + "learning_rate": 8.845982730634384e-07, + "loss": 1.0069, + "step": 144561 + }, + { + "epoch": 1.74, + "grad_norm": 3.319660328088195, + "learning_rate": 8.843580681629105e-07, + "loss": 1.4308, + "step": 144564 + }, + { + "epoch": 1.74, + "grad_norm": 7.176158261458836, + "learning_rate": 8.841178943707817e-07, + "loss": 1.1498, + "step": 144567 + }, + { + "epoch": 1.74, + "grad_norm": 9.501697734661736, + "learning_rate": 8.838777516878761e-07, + "loss": 0.8976, + "step": 144570 + }, + { + "epoch": 1.74, + "grad_norm": 4.32186887664663, + "learning_rate": 8.836376401150116e-07, + "loss": 0.9996, + "step": 144573 + }, + { + "epoch": 1.74, + "grad_norm": 7.633882222160038, + "learning_rate": 8.833975596530086e-07, + "loss": 1.0411, + "step": 144576 + }, + { + "epoch": 1.74, + "grad_norm": 9.105494713514272, + "learning_rate": 8.831575103026868e-07, + "loss": 0.9819, + "step": 144579 + }, + { + "epoch": 1.74, + "grad_norm": 18.055491025269692, + "learning_rate": 8.829174920648653e-07, + "loss": 1.2746, + "step": 144582 + }, + { + "epoch": 1.74, + "grad_norm": 9.751501473276528, + "learning_rate": 8.826775049403602e-07, + "loss": 1.4074, + "step": 144585 + }, + { + "epoch": 1.74, + "grad_norm": 4.8194036320438185, + "learning_rate": 8.82437548929993e-07, + "loss": 1.1695, + "step": 144588 + }, + { + "epoch": 1.74, + "grad_norm": 5.382443352877838, + "learning_rate": 8.821976240345842e-07, + "loss": 0.7613, + "step": 144591 + }, + { + "epoch": 1.74, + "grad_norm": 9.481418568634258, + "learning_rate": 8.819577302549498e-07, + "loss": 1.1476, + "step": 144594 + }, + { + "epoch": 1.74, + "grad_norm": 4.633972654108193, + "learning_rate": 8.81717867591908e-07, + "loss": 0.9374, + "step": 144597 + }, + { + "epoch": 1.74, + "grad_norm": 6.9930370457985465, + "learning_rate": 8.814780360462805e-07, + "loss": 1.17, + "step": 144600 + }, + { + "epoch": 1.74, + "grad_norm": 6.359598935265381, + "learning_rate": 8.812382356188831e-07, + "loss": 1.1176, + "step": 144603 + }, + { + "epoch": 1.74, + "grad_norm": 5.036598783773036, + "learning_rate": 8.809984663105364e-07, + "loss": 1.2763, + "step": 144606 + }, + { + "epoch": 1.74, + "grad_norm": 3.637258987567308, + "learning_rate": 8.807587281220553e-07, + "loss": 0.9304, + "step": 144609 + }, + { + "epoch": 1.74, + "grad_norm": 8.168404099987297, + "learning_rate": 8.805190210542602e-07, + "loss": 0.9425, + "step": 144612 + }, + { + "epoch": 1.74, + "grad_norm": 4.9350924856882745, + "learning_rate": 8.802793451079683e-07, + "loss": 1.1212, + "step": 144615 + }, + { + "epoch": 1.74, + "grad_norm": 5.794891015268402, + "learning_rate": 8.800397002839999e-07, + "loss": 0.8934, + "step": 144618 + }, + { + "epoch": 1.74, + "grad_norm": 5.627802537405934, + "learning_rate": 8.798000865831701e-07, + "loss": 1.0759, + "step": 144621 + }, + { + "epoch": 1.74, + "grad_norm": 9.75501436847896, + "learning_rate": 8.795605040062994e-07, + "loss": 1.1967, + "step": 144624 + }, + { + "epoch": 1.74, + "grad_norm": 11.50956855196544, + "learning_rate": 8.793209525542002e-07, + "loss": 1.1299, + "step": 144627 + }, + { + "epoch": 1.74, + "grad_norm": 16.264555815499904, + "learning_rate": 8.790814322276952e-07, + "loss": 1.0355, + "step": 144630 + }, + { + "epoch": 1.74, + "grad_norm": 7.252126340375588, + "learning_rate": 8.788419430276007e-07, + "loss": 0.9957, + "step": 144633 + }, + { + "epoch": 1.74, + "grad_norm": 17.685624148078194, + "learning_rate": 8.786024849547314e-07, + "loss": 1.1862, + "step": 144636 + }, + { + "epoch": 1.74, + "grad_norm": 24.892486687296927, + "learning_rate": 8.783630580099068e-07, + "loss": 1.4566, + "step": 144639 + }, + { + "epoch": 1.74, + "grad_norm": 9.13748936033522, + "learning_rate": 8.781236621939449e-07, + "loss": 0.904, + "step": 144642 + }, + { + "epoch": 1.74, + "grad_norm": 13.890389499653478, + "learning_rate": 8.778842975076607e-07, + "loss": 1.1635, + "step": 144645 + }, + { + "epoch": 1.74, + "grad_norm": 13.014632065713085, + "learning_rate": 8.776449639518702e-07, + "loss": 1.1967, + "step": 144648 + }, + { + "epoch": 1.74, + "grad_norm": 11.041838270668865, + "learning_rate": 8.774056615273918e-07, + "loss": 0.8439, + "step": 144651 + }, + { + "epoch": 1.74, + "grad_norm": 10.947584133851876, + "learning_rate": 8.771663902350424e-07, + "loss": 0.9104, + "step": 144654 + }, + { + "epoch": 1.74, + "grad_norm": 10.206111503227632, + "learning_rate": 8.769271500756371e-07, + "loss": 1.2713, + "step": 144657 + }, + { + "epoch": 1.74, + "grad_norm": 9.131366234702654, + "learning_rate": 8.766879410499962e-07, + "loss": 1.5036, + "step": 144660 + }, + { + "epoch": 1.74, + "grad_norm": 5.657765997830887, + "learning_rate": 8.764487631589302e-07, + "loss": 1.0411, + "step": 144663 + }, + { + "epoch": 1.74, + "grad_norm": 3.226232791654971, + "learning_rate": 8.762096164032608e-07, + "loss": 1.0601, + "step": 144666 + }, + { + "epoch": 1.74, + "grad_norm": 5.526167775386101, + "learning_rate": 8.759705007838004e-07, + "loss": 1.0022, + "step": 144669 + }, + { + "epoch": 1.74, + "grad_norm": 14.065791591573078, + "learning_rate": 8.757314163013675e-07, + "loss": 1.2115, + "step": 144672 + }, + { + "epoch": 1.74, + "grad_norm": 6.41768475028862, + "learning_rate": 8.754923629567746e-07, + "loss": 1.1692, + "step": 144675 + }, + { + "epoch": 1.74, + "grad_norm": 3.9330248588151524, + "learning_rate": 8.75253340750839e-07, + "loss": 1.2529, + "step": 144678 + }, + { + "epoch": 1.74, + "grad_norm": 6.382077291477704, + "learning_rate": 8.750143496843777e-07, + "loss": 1.121, + "step": 144681 + }, + { + "epoch": 1.74, + "grad_norm": 7.025877283383198, + "learning_rate": 8.747753897582078e-07, + "loss": 1.0272, + "step": 144684 + }, + { + "epoch": 1.74, + "grad_norm": 5.9212700618578635, + "learning_rate": 8.745364609731421e-07, + "loss": 1.279, + "step": 144687 + }, + { + "epoch": 1.74, + "grad_norm": 11.409910963475506, + "learning_rate": 8.742975633299954e-07, + "loss": 1.334, + "step": 144690 + }, + { + "epoch": 1.74, + "grad_norm": 5.513173254410011, + "learning_rate": 8.740586968295839e-07, + "loss": 1.0188, + "step": 144693 + }, + { + "epoch": 1.74, + "grad_norm": 6.695881558040675, + "learning_rate": 8.738198614727223e-07, + "loss": 0.9237, + "step": 144696 + }, + { + "epoch": 1.74, + "grad_norm": 8.714290623163468, + "learning_rate": 8.735810572602277e-07, + "loss": 1.279, + "step": 144699 + }, + { + "epoch": 1.74, + "grad_norm": 7.170144193432368, + "learning_rate": 8.733422841929128e-07, + "loss": 1.1427, + "step": 144702 + }, + { + "epoch": 1.74, + "grad_norm": 3.5242655311345863, + "learning_rate": 8.731035422715927e-07, + "loss": 0.8583, + "step": 144705 + }, + { + "epoch": 1.74, + "grad_norm": 7.668446072202388, + "learning_rate": 8.728648314970856e-07, + "loss": 1.2314, + "step": 144708 + }, + { + "epoch": 1.74, + "grad_norm": 6.431559041134347, + "learning_rate": 8.726261518702006e-07, + "loss": 1.045, + "step": 144711 + }, + { + "epoch": 1.74, + "grad_norm": 6.152456437677529, + "learning_rate": 8.723875033917561e-07, + "loss": 1.3799, + "step": 144714 + }, + { + "epoch": 1.74, + "grad_norm": 9.366914010433863, + "learning_rate": 8.721488860625649e-07, + "loss": 1.2199, + "step": 144717 + }, + { + "epoch": 1.74, + "grad_norm": 3.965787425636685, + "learning_rate": 8.719102998834406e-07, + "loss": 1.1459, + "step": 144720 + }, + { + "epoch": 1.74, + "grad_norm": 5.182619264519911, + "learning_rate": 8.716717448551981e-07, + "loss": 1.2322, + "step": 144723 + }, + { + "epoch": 1.74, + "grad_norm": 6.087384335014262, + "learning_rate": 8.714332209786547e-07, + "loss": 1.3294, + "step": 144726 + }, + { + "epoch": 1.74, + "grad_norm": 7.172226741695406, + "learning_rate": 8.711947282546196e-07, + "loss": 0.7043, + "step": 144729 + }, + { + "epoch": 1.74, + "grad_norm": 9.280827235049957, + "learning_rate": 8.709562666839111e-07, + "loss": 1.0968, + "step": 144732 + }, + { + "epoch": 1.74, + "grad_norm": 20.383932996621205, + "learning_rate": 8.707178362673375e-07, + "loss": 1.3104, + "step": 144735 + }, + { + "epoch": 1.74, + "grad_norm": 23.560594074141395, + "learning_rate": 8.704794370057168e-07, + "loss": 0.782, + "step": 144738 + }, + { + "epoch": 1.74, + "grad_norm": 6.206211732059376, + "learning_rate": 8.702410688998631e-07, + "loss": 1.1438, + "step": 144741 + }, + { + "epoch": 1.74, + "grad_norm": 49.61354081504589, + "learning_rate": 8.700027319505855e-07, + "loss": 1.3704, + "step": 144744 + }, + { + "epoch": 1.74, + "grad_norm": 7.72612375914883, + "learning_rate": 8.697644261587001e-07, + "loss": 0.9699, + "step": 144747 + }, + { + "epoch": 1.74, + "grad_norm": 9.272217341831823, + "learning_rate": 8.695261515250219e-07, + "loss": 1.4078, + "step": 144750 + }, + { + "epoch": 1.74, + "grad_norm": 20.34396025180107, + "learning_rate": 8.692879080503624e-07, + "loss": 1.0409, + "step": 144753 + }, + { + "epoch": 1.74, + "grad_norm": 27.731809283485987, + "learning_rate": 8.690496957355321e-07, + "loss": 1.1595, + "step": 144756 + }, + { + "epoch": 1.74, + "grad_norm": 3.1529274461162, + "learning_rate": 8.68811514581347e-07, + "loss": 1.1304, + "step": 144759 + }, + { + "epoch": 1.74, + "grad_norm": 8.287258537311233, + "learning_rate": 8.685733645886185e-07, + "loss": 1.0803, + "step": 144762 + }, + { + "epoch": 1.74, + "grad_norm": 9.90505057871112, + "learning_rate": 8.683352457581606e-07, + "loss": 1.0483, + "step": 144765 + }, + { + "epoch": 1.74, + "grad_norm": 22.712659280558068, + "learning_rate": 8.680971580907871e-07, + "loss": 1.2335, + "step": 144768 + }, + { + "epoch": 1.74, + "grad_norm": 3.7920605007614134, + "learning_rate": 8.678591015873061e-07, + "loss": 1.4318, + "step": 144771 + }, + { + "epoch": 1.74, + "grad_norm": 18.80499579981995, + "learning_rate": 8.676210762485349e-07, + "loss": 1.206, + "step": 144774 + }, + { + "epoch": 1.74, + "grad_norm": 9.479098695053647, + "learning_rate": 8.673830820752815e-07, + "loss": 1.0712, + "step": 144777 + }, + { + "epoch": 1.74, + "grad_norm": 10.254970529160326, + "learning_rate": 8.671451190683621e-07, + "loss": 1.2605, + "step": 144780 + }, + { + "epoch": 1.74, + "grad_norm": 27.278254343881965, + "learning_rate": 8.66907187228585e-07, + "loss": 1.1924, + "step": 144783 + }, + { + "epoch": 1.74, + "grad_norm": 2.4257902975538626, + "learning_rate": 8.666692865567638e-07, + "loss": 0.9546, + "step": 144786 + }, + { + "epoch": 1.74, + "grad_norm": 5.154925652887402, + "learning_rate": 8.664314170537103e-07, + "loss": 1.0795, + "step": 144789 + }, + { + "epoch": 1.74, + "grad_norm": 12.285691594632077, + "learning_rate": 8.661935787202392e-07, + "loss": 1.0251, + "step": 144792 + }, + { + "epoch": 1.74, + "grad_norm": 15.542904249690537, + "learning_rate": 8.659557715571587e-07, + "loss": 0.7788, + "step": 144795 + }, + { + "epoch": 1.74, + "grad_norm": 7.84692163244336, + "learning_rate": 8.657179955652795e-07, + "loss": 0.833, + "step": 144798 + }, + { + "epoch": 1.74, + "grad_norm": 11.607614233626967, + "learning_rate": 8.654802507454141e-07, + "loss": 1.2666, + "step": 144801 + }, + { + "epoch": 1.74, + "grad_norm": 7.732731340991543, + "learning_rate": 8.652425370983741e-07, + "loss": 1.1209, + "step": 144804 + }, + { + "epoch": 1.74, + "grad_norm": 13.40459076662638, + "learning_rate": 8.650048546249745e-07, + "loss": 1.5612, + "step": 144807 + }, + { + "epoch": 1.74, + "grad_norm": 10.884658872119466, + "learning_rate": 8.647672033260201e-07, + "loss": 1.3242, + "step": 144810 + }, + { + "epoch": 1.74, + "grad_norm": 3.0389818163398994, + "learning_rate": 8.645295832023238e-07, + "loss": 1.04, + "step": 144813 + }, + { + "epoch": 1.74, + "grad_norm": 16.862848904513626, + "learning_rate": 8.642919942547002e-07, + "loss": 1.3751, + "step": 144816 + }, + { + "epoch": 1.74, + "grad_norm": 27.54169838345596, + "learning_rate": 8.640544364839554e-07, + "loss": 0.871, + "step": 144819 + }, + { + "epoch": 1.74, + "grad_norm": 14.434204712240655, + "learning_rate": 8.638169098909044e-07, + "loss": 1.4767, + "step": 144822 + }, + { + "epoch": 1.74, + "grad_norm": 5.456953666985366, + "learning_rate": 8.635794144763532e-07, + "loss": 0.9851, + "step": 144825 + }, + { + "epoch": 1.74, + "grad_norm": 9.77358714342892, + "learning_rate": 8.633419502411145e-07, + "loss": 0.8086, + "step": 144828 + }, + { + "epoch": 1.74, + "grad_norm": 8.718453377446568, + "learning_rate": 8.631045171859986e-07, + "loss": 1.4138, + "step": 144831 + }, + { + "epoch": 1.74, + "grad_norm": 4.193276668173227, + "learning_rate": 8.628671153118184e-07, + "loss": 1.079, + "step": 144834 + }, + { + "epoch": 1.74, + "grad_norm": 5.3476611056460275, + "learning_rate": 8.62629744619381e-07, + "loss": 1.2303, + "step": 144837 + }, + { + "epoch": 1.74, + "grad_norm": 9.058081452628153, + "learning_rate": 8.623924051094956e-07, + "loss": 1.147, + "step": 144840 + }, + { + "epoch": 1.74, + "grad_norm": 10.102990492624256, + "learning_rate": 8.621550967829728e-07, + "loss": 1.1809, + "step": 144843 + }, + { + "epoch": 1.74, + "grad_norm": 3.958290885610814, + "learning_rate": 8.619178196406231e-07, + "loss": 0.8957, + "step": 144846 + }, + { + "epoch": 1.74, + "grad_norm": 23.65716787379137, + "learning_rate": 8.61680573683259e-07, + "loss": 1.6611, + "step": 144849 + }, + { + "epoch": 1.74, + "grad_norm": 11.423175580796299, + "learning_rate": 8.614433589116856e-07, + "loss": 1.2239, + "step": 144852 + }, + { + "epoch": 1.74, + "grad_norm": 10.601817304196919, + "learning_rate": 8.612061753267142e-07, + "loss": 1.1365, + "step": 144855 + }, + { + "epoch": 1.74, + "grad_norm": 4.280862155114026, + "learning_rate": 8.609690229291556e-07, + "loss": 1.5254, + "step": 144858 + }, + { + "epoch": 1.74, + "grad_norm": 7.388379001748259, + "learning_rate": 8.60731901719819e-07, + "loss": 1.0391, + "step": 144861 + }, + { + "epoch": 1.74, + "grad_norm": 6.144604157739292, + "learning_rate": 8.604948116995093e-07, + "loss": 1.0935, + "step": 144864 + }, + { + "epoch": 1.74, + "grad_norm": 5.276770390440507, + "learning_rate": 8.602577528690392e-07, + "loss": 1.448, + "step": 144867 + }, + { + "epoch": 1.74, + "grad_norm": 13.417961768374967, + "learning_rate": 8.60020725229218e-07, + "loss": 1.0415, + "step": 144870 + }, + { + "epoch": 1.74, + "grad_norm": 8.783033958280054, + "learning_rate": 8.597837287808531e-07, + "loss": 1.0346, + "step": 144873 + }, + { + "epoch": 1.74, + "grad_norm": 7.970575962333919, + "learning_rate": 8.595467635247567e-07, + "loss": 1.0448, + "step": 144876 + }, + { + "epoch": 1.74, + "grad_norm": 2.8086196662983896, + "learning_rate": 8.593098294617341e-07, + "loss": 1.0781, + "step": 144879 + }, + { + "epoch": 1.74, + "grad_norm": 6.104022241101789, + "learning_rate": 8.590729265925923e-07, + "loss": 1.0819, + "step": 144882 + }, + { + "epoch": 1.74, + "grad_norm": 3.063227725680059, + "learning_rate": 8.588360549181429e-07, + "loss": 1.022, + "step": 144885 + }, + { + "epoch": 1.74, + "grad_norm": 7.676611679484166, + "learning_rate": 8.585992144391953e-07, + "loss": 0.9373, + "step": 144888 + }, + { + "epoch": 1.74, + "grad_norm": 7.097204365547773, + "learning_rate": 8.583624051565542e-07, + "loss": 1.2987, + "step": 144891 + }, + { + "epoch": 1.74, + "grad_norm": 7.546255376185184, + "learning_rate": 8.58125627071028e-07, + "loss": 1.262, + "step": 144894 + }, + { + "epoch": 1.74, + "grad_norm": 3.044414004616098, + "learning_rate": 8.578888801834273e-07, + "loss": 1.2023, + "step": 144897 + }, + { + "epoch": 1.74, + "grad_norm": 19.431088483140343, + "learning_rate": 8.576521644945612e-07, + "loss": 1.1843, + "step": 144900 + }, + { + "epoch": 1.74, + "grad_norm": 8.257480433632672, + "learning_rate": 8.574154800052337e-07, + "loss": 0.9953, + "step": 144903 + }, + { + "epoch": 1.74, + "grad_norm": 6.1929214106860915, + "learning_rate": 8.57178826716254e-07, + "loss": 1.1562, + "step": 144906 + }, + { + "epoch": 1.74, + "grad_norm": 7.084061039481633, + "learning_rate": 8.569422046284281e-07, + "loss": 1.3835, + "step": 144909 + }, + { + "epoch": 1.74, + "grad_norm": 5.613862745584242, + "learning_rate": 8.567056137425655e-07, + "loss": 1.1613, + "step": 144912 + }, + { + "epoch": 1.74, + "grad_norm": 5.689578119230023, + "learning_rate": 8.564690540594755e-07, + "loss": 1.3086, + "step": 144915 + }, + { + "epoch": 1.74, + "grad_norm": 3.955170857965499, + "learning_rate": 8.56232525579962e-07, + "loss": 1.0033, + "step": 144918 + }, + { + "epoch": 1.74, + "grad_norm": 11.008472397688571, + "learning_rate": 8.559960283048341e-07, + "loss": 1.1683, + "step": 144921 + }, + { + "epoch": 1.74, + "grad_norm": 7.8513644455087945, + "learning_rate": 8.557595622348958e-07, + "loss": 1.3568, + "step": 144924 + }, + { + "epoch": 1.74, + "grad_norm": 5.165803489832066, + "learning_rate": 8.555231273709575e-07, + "loss": 1.0856, + "step": 144927 + }, + { + "epoch": 1.74, + "grad_norm": 9.50161548684054, + "learning_rate": 8.552867237138262e-07, + "loss": 1.2155, + "step": 144930 + }, + { + "epoch": 1.74, + "grad_norm": 11.958707369043257, + "learning_rate": 8.55050351264306e-07, + "loss": 1.1247, + "step": 144933 + }, + { + "epoch": 1.74, + "grad_norm": 19.643948468563448, + "learning_rate": 8.54814010023205e-07, + "loss": 1.4819, + "step": 144936 + }, + { + "epoch": 1.74, + "grad_norm": 16.565345930812793, + "learning_rate": 8.545776999913291e-07, + "loss": 0.8864, + "step": 144939 + }, + { + "epoch": 1.74, + "grad_norm": 2.859976759524016, + "learning_rate": 8.543414211694867e-07, + "loss": 1.1352, + "step": 144942 + }, + { + "epoch": 1.74, + "grad_norm": 5.927739303389906, + "learning_rate": 8.541051735584838e-07, + "loss": 1.4255, + "step": 144945 + }, + { + "epoch": 1.74, + "grad_norm": 4.887607498736609, + "learning_rate": 8.538689571591241e-07, + "loss": 1.0717, + "step": 144948 + }, + { + "epoch": 1.74, + "grad_norm": 8.952240185381354, + "learning_rate": 8.536327719722138e-07, + "loss": 0.75, + "step": 144951 + }, + { + "epoch": 1.74, + "grad_norm": 9.152849420421234, + "learning_rate": 8.53396617998562e-07, + "loss": 1.1231, + "step": 144954 + }, + { + "epoch": 1.74, + "grad_norm": 14.401275699174295, + "learning_rate": 8.531604952389738e-07, + "loss": 1.164, + "step": 144957 + }, + { + "epoch": 1.74, + "grad_norm": 12.164762016823632, + "learning_rate": 8.529244036942518e-07, + "loss": 0.984, + "step": 144960 + }, + { + "epoch": 1.74, + "grad_norm": 8.794115590480448, + "learning_rate": 8.526883433652067e-07, + "loss": 1.2461, + "step": 144963 + }, + { + "epoch": 1.74, + "grad_norm": 6.722087456373076, + "learning_rate": 8.524523142526398e-07, + "loss": 1.0828, + "step": 144966 + }, + { + "epoch": 1.74, + "grad_norm": 6.778410660062832, + "learning_rate": 8.522163163573594e-07, + "loss": 1.3243, + "step": 144969 + }, + { + "epoch": 1.74, + "grad_norm": 10.294433595334942, + "learning_rate": 8.519803496801682e-07, + "loss": 1.0974, + "step": 144972 + }, + { + "epoch": 1.74, + "grad_norm": 8.881117264112767, + "learning_rate": 8.517444142218734e-07, + "loss": 0.9562, + "step": 144975 + }, + { + "epoch": 1.74, + "grad_norm": 7.290872190695396, + "learning_rate": 8.515085099832787e-07, + "loss": 1.0318, + "step": 144978 + }, + { + "epoch": 1.74, + "grad_norm": 13.066866750260717, + "learning_rate": 8.512726369651913e-07, + "loss": 1.1624, + "step": 144981 + }, + { + "epoch": 1.74, + "grad_norm": 12.317684888014732, + "learning_rate": 8.510367951684162e-07, + "loss": 1.3843, + "step": 144984 + }, + { + "epoch": 1.74, + "grad_norm": 9.648891050028585, + "learning_rate": 8.50800984593757e-07, + "loss": 1.2009, + "step": 144987 + }, + { + "epoch": 1.74, + "grad_norm": 6.450390767589879, + "learning_rate": 8.505652052420165e-07, + "loss": 1.2382, + "step": 144990 + }, + { + "epoch": 1.74, + "grad_norm": 4.268586293723573, + "learning_rate": 8.503294571140019e-07, + "loss": 1.5751, + "step": 144993 + }, + { + "epoch": 1.74, + "grad_norm": 7.663528049272472, + "learning_rate": 8.500937402105191e-07, + "loss": 1.0479, + "step": 144996 + }, + { + "epoch": 1.74, + "grad_norm": 9.785685386598775, + "learning_rate": 8.498580545323687e-07, + "loss": 0.9854, + "step": 144999 + }, + { + "epoch": 1.74, + "grad_norm": 7.882803776964357, + "learning_rate": 8.496224000803566e-07, + "loss": 1.1949, + "step": 145002 + }, + { + "epoch": 1.74, + "grad_norm": 8.262795737945657, + "learning_rate": 8.493867768552888e-07, + "loss": 1.2448, + "step": 145005 + }, + { + "epoch": 1.74, + "grad_norm": 6.822277456979815, + "learning_rate": 8.49151184857967e-07, + "loss": 1.3058, + "step": 145008 + }, + { + "epoch": 1.74, + "grad_norm": 3.443669914939563, + "learning_rate": 8.489156240891972e-07, + "loss": 1.2367, + "step": 145011 + }, + { + "epoch": 1.74, + "grad_norm": 7.997027370787581, + "learning_rate": 8.486800945497798e-07, + "loss": 1.0738, + "step": 145014 + }, + { + "epoch": 1.74, + "grad_norm": 7.018559494564558, + "learning_rate": 8.484445962405219e-07, + "loss": 1.147, + "step": 145017 + }, + { + "epoch": 1.74, + "grad_norm": 9.508067942385505, + "learning_rate": 8.482091291622264e-07, + "loss": 0.9579, + "step": 145020 + }, + { + "epoch": 1.74, + "grad_norm": 3.529607314516887, + "learning_rate": 8.479736933156979e-07, + "loss": 1.493, + "step": 145023 + }, + { + "epoch": 1.74, + "grad_norm": 11.094697715914993, + "learning_rate": 8.477382887017372e-07, + "loss": 1.5363, + "step": 145026 + }, + { + "epoch": 1.74, + "grad_norm": 11.15393806596775, + "learning_rate": 8.4750291532115e-07, + "loss": 0.9332, + "step": 145029 + }, + { + "epoch": 1.74, + "grad_norm": 4.340707144683746, + "learning_rate": 8.472675731747382e-07, + "loss": 1.3834, + "step": 145032 + }, + { + "epoch": 1.74, + "grad_norm": 16.259304722236784, + "learning_rate": 8.470322622633042e-07, + "loss": 1.2424, + "step": 145035 + }, + { + "epoch": 1.74, + "grad_norm": 2.655601750255612, + "learning_rate": 8.467969825876544e-07, + "loss": 1.4596, + "step": 145038 + }, + { + "epoch": 1.74, + "grad_norm": 11.649396458878376, + "learning_rate": 8.465617341485877e-07, + "loss": 1.3235, + "step": 145041 + }, + { + "epoch": 1.74, + "grad_norm": 8.719656401132777, + "learning_rate": 8.463265169469092e-07, + "loss": 1.3465, + "step": 145044 + }, + { + "epoch": 1.74, + "grad_norm": 16.66162552529699, + "learning_rate": 8.460913309834229e-07, + "loss": 1.0797, + "step": 145047 + }, + { + "epoch": 1.74, + "grad_norm": 7.886082699419319, + "learning_rate": 8.458561762589279e-07, + "loss": 1.1833, + "step": 145050 + }, + { + "epoch": 1.74, + "grad_norm": 5.784929291203681, + "learning_rate": 8.456210527742292e-07, + "loss": 0.8655, + "step": 145053 + }, + { + "epoch": 1.74, + "grad_norm": 9.45650427354486, + "learning_rate": 8.453859605301273e-07, + "loss": 1.1245, + "step": 145056 + }, + { + "epoch": 1.74, + "grad_norm": 9.34981289715371, + "learning_rate": 8.45150899527426e-07, + "loss": 1.0048, + "step": 145059 + }, + { + "epoch": 1.74, + "grad_norm": 6.900443884704529, + "learning_rate": 8.449158697669268e-07, + "loss": 1.1631, + "step": 145062 + }, + { + "epoch": 1.74, + "grad_norm": 7.824410988338876, + "learning_rate": 8.446808712494348e-07, + "loss": 1.0724, + "step": 145065 + }, + { + "epoch": 1.74, + "grad_norm": 5.366500397701739, + "learning_rate": 8.444459039757457e-07, + "loss": 1.3782, + "step": 145068 + }, + { + "epoch": 1.74, + "grad_norm": 5.012672167250902, + "learning_rate": 8.442109679466681e-07, + "loss": 0.9864, + "step": 145071 + }, + { + "epoch": 1.74, + "grad_norm": 14.344194611676466, + "learning_rate": 8.439760631629978e-07, + "loss": 0.9523, + "step": 145074 + }, + { + "epoch": 1.74, + "grad_norm": 11.040220359701914, + "learning_rate": 8.437411896255421e-07, + "loss": 1.1356, + "step": 145077 + }, + { + "epoch": 1.74, + "grad_norm": 13.231751829437755, + "learning_rate": 8.435063473350968e-07, + "loss": 0.9614, + "step": 145080 + }, + { + "epoch": 1.74, + "grad_norm": 4.469593959133991, + "learning_rate": 8.43271536292467e-07, + "loss": 0.9381, + "step": 145083 + }, + { + "epoch": 1.74, + "grad_norm": 4.125222781917048, + "learning_rate": 8.430367564984532e-07, + "loss": 1.1922, + "step": 145086 + }, + { + "epoch": 1.74, + "grad_norm": 8.406560747399157, + "learning_rate": 8.428020079538557e-07, + "loss": 1.0455, + "step": 145089 + }, + { + "epoch": 1.74, + "grad_norm": 4.318950197677713, + "learning_rate": 8.425672906594795e-07, + "loss": 1.169, + "step": 145092 + }, + { + "epoch": 1.74, + "grad_norm": 16.17411887490062, + "learning_rate": 8.423326046161218e-07, + "loss": 1.3866, + "step": 145095 + }, + { + "epoch": 1.74, + "grad_norm": 9.673419148750217, + "learning_rate": 8.420979498245829e-07, + "loss": 1.1855, + "step": 145098 + }, + { + "epoch": 1.74, + "grad_norm": 6.99349077252839, + "learning_rate": 8.418633262856646e-07, + "loss": 0.8888, + "step": 145101 + }, + { + "epoch": 1.74, + "grad_norm": 6.962709068936784, + "learning_rate": 8.416287340001705e-07, + "loss": 1.1739, + "step": 145104 + }, + { + "epoch": 1.74, + "grad_norm": 7.075093315507878, + "learning_rate": 8.413941729688968e-07, + "loss": 0.8612, + "step": 145107 + }, + { + "epoch": 1.74, + "grad_norm": 3.8342085278961737, + "learning_rate": 8.41159643192645e-07, + "loss": 1.3602, + "step": 145110 + }, + { + "epoch": 1.74, + "grad_norm": 15.97698223519022, + "learning_rate": 8.409251446722189e-07, + "loss": 0.9171, + "step": 145113 + }, + { + "epoch": 1.74, + "grad_norm": 33.503936909119226, + "learning_rate": 8.406906774084145e-07, + "loss": 1.0213, + "step": 145116 + }, + { + "epoch": 1.75, + "grad_norm": 5.4135386374839465, + "learning_rate": 8.404562414020356e-07, + "loss": 0.8875, + "step": 145119 + }, + { + "epoch": 1.75, + "grad_norm": 7.8434675175789, + "learning_rate": 8.402218366538783e-07, + "loss": 1.1687, + "step": 145122 + }, + { + "epoch": 1.75, + "grad_norm": 4.985908454216455, + "learning_rate": 8.399874631647443e-07, + "loss": 1.1912, + "step": 145125 + }, + { + "epoch": 1.75, + "grad_norm": 6.534962007698587, + "learning_rate": 8.397531209354337e-07, + "loss": 1.1516, + "step": 145128 + }, + { + "epoch": 1.75, + "grad_norm": 2.5555767835251926, + "learning_rate": 8.395188099667473e-07, + "loss": 1.1834, + "step": 145131 + }, + { + "epoch": 1.75, + "grad_norm": 5.90125449072098, + "learning_rate": 8.392845302594832e-07, + "loss": 0.8913, + "step": 145134 + }, + { + "epoch": 1.75, + "grad_norm": 9.313965551842486, + "learning_rate": 8.39050281814442e-07, + "loss": 0.9016, + "step": 145137 + }, + { + "epoch": 1.75, + "grad_norm": 2.1255055791636464, + "learning_rate": 8.388160646324218e-07, + "loss": 1.0478, + "step": 145140 + }, + { + "epoch": 1.75, + "grad_norm": 3.030863179085097, + "learning_rate": 8.385818787142219e-07, + "loss": 1.1374, + "step": 145143 + }, + { + "epoch": 1.75, + "grad_norm": 8.764743532975203, + "learning_rate": 8.38347724060643e-07, + "loss": 1.2053, + "step": 145146 + }, + { + "epoch": 1.75, + "grad_norm": 20.988130282820208, + "learning_rate": 8.381136006724822e-07, + "loss": 1.2141, + "step": 145149 + }, + { + "epoch": 1.75, + "grad_norm": 11.005321627719894, + "learning_rate": 8.378795085505398e-07, + "loss": 1.1262, + "step": 145152 + }, + { + "epoch": 1.75, + "grad_norm": 12.988422816329939, + "learning_rate": 8.376454476956164e-07, + "loss": 0.9887, + "step": 145155 + }, + { + "epoch": 1.75, + "grad_norm": 10.97314105109181, + "learning_rate": 8.374114181085091e-07, + "loss": 1.1145, + "step": 145158 + }, + { + "epoch": 1.75, + "grad_norm": 11.15018895472179, + "learning_rate": 8.37177419790014e-07, + "loss": 1.0227, + "step": 145161 + }, + { + "epoch": 1.75, + "grad_norm": 8.469619252905053, + "learning_rate": 8.369434527409314e-07, + "loss": 1.4568, + "step": 145164 + }, + { + "epoch": 1.75, + "grad_norm": 9.897635462187896, + "learning_rate": 8.367095169620609e-07, + "loss": 1.4167, + "step": 145167 + }, + { + "epoch": 1.75, + "grad_norm": 10.458019102930699, + "learning_rate": 8.364756124542007e-07, + "loss": 1.2687, + "step": 145170 + }, + { + "epoch": 1.75, + "grad_norm": 5.505275553922362, + "learning_rate": 8.362417392181488e-07, + "loss": 0.8427, + "step": 145173 + }, + { + "epoch": 1.75, + "grad_norm": 6.3525089935002415, + "learning_rate": 8.360078972547026e-07, + "loss": 0.953, + "step": 145176 + }, + { + "epoch": 1.75, + "grad_norm": 13.85454782809939, + "learning_rate": 8.357740865646624e-07, + "loss": 1.1544, + "step": 145179 + }, + { + "epoch": 1.75, + "grad_norm": 12.299240564235614, + "learning_rate": 8.355403071488222e-07, + "loss": 1.2695, + "step": 145182 + }, + { + "epoch": 1.75, + "grad_norm": 6.5086175453356265, + "learning_rate": 8.353065590079834e-07, + "loss": 0.9116, + "step": 145185 + }, + { + "epoch": 1.75, + "grad_norm": 2.367828404919183, + "learning_rate": 8.350728421429411e-07, + "loss": 0.8447, + "step": 145188 + }, + { + "epoch": 1.75, + "grad_norm": 13.406187323724705, + "learning_rate": 8.348391565544934e-07, + "loss": 1.3011, + "step": 145191 + }, + { + "epoch": 1.75, + "grad_norm": 5.046207309652794, + "learning_rate": 8.346055022434385e-07, + "loss": 1.2157, + "step": 145194 + }, + { + "epoch": 1.75, + "grad_norm": 8.147508433687314, + "learning_rate": 8.343718792105748e-07, + "loss": 1.023, + "step": 145197 + }, + { + "epoch": 1.75, + "grad_norm": 9.541573889613476, + "learning_rate": 8.341382874566995e-07, + "loss": 1.054, + "step": 145200 + }, + { + "epoch": 1.75, + "grad_norm": 13.423688797033048, + "learning_rate": 8.339047269826061e-07, + "loss": 1.3935, + "step": 145203 + }, + { + "epoch": 1.75, + "grad_norm": 9.148540547009892, + "learning_rate": 8.336711977890954e-07, + "loss": 1.1647, + "step": 145206 + }, + { + "epoch": 1.75, + "grad_norm": 20.113397881969142, + "learning_rate": 8.334376998769622e-07, + "loss": 0.9552, + "step": 145209 + }, + { + "epoch": 1.75, + "grad_norm": 19.130925520457907, + "learning_rate": 8.332042332470069e-07, + "loss": 0.9818, + "step": 145212 + }, + { + "epoch": 1.75, + "grad_norm": 5.5909080745348945, + "learning_rate": 8.329707979000213e-07, + "loss": 1.0075, + "step": 145215 + }, + { + "epoch": 1.75, + "grad_norm": 5.483814499469264, + "learning_rate": 8.327373938368044e-07, + "loss": 1.3463, + "step": 145218 + }, + { + "epoch": 1.75, + "grad_norm": 14.461380238115503, + "learning_rate": 8.325040210581548e-07, + "loss": 1.5963, + "step": 145221 + }, + { + "epoch": 1.75, + "grad_norm": 14.323650190114016, + "learning_rate": 8.32270679564865e-07, + "loss": 1.3727, + "step": 145224 + }, + { + "epoch": 1.75, + "grad_norm": 13.117480139130778, + "learning_rate": 8.320373693577344e-07, + "loss": 1.4301, + "step": 145227 + }, + { + "epoch": 1.75, + "grad_norm": 5.483812912273168, + "learning_rate": 8.318040904375568e-07, + "loss": 0.9258, + "step": 145230 + }, + { + "epoch": 1.75, + "grad_norm": 2.670274839971141, + "learning_rate": 8.315708428051295e-07, + "loss": 1.1585, + "step": 145233 + }, + { + "epoch": 1.75, + "grad_norm": 4.373924651855529, + "learning_rate": 8.313376264612483e-07, + "loss": 1.1923, + "step": 145236 + }, + { + "epoch": 1.75, + "grad_norm": 6.4565980986687554, + "learning_rate": 8.311044414067105e-07, + "loss": 1.6381, + "step": 145239 + }, + { + "epoch": 1.75, + "grad_norm": 6.4128062758925575, + "learning_rate": 8.30871287642312e-07, + "loss": 1.0446, + "step": 145242 + }, + { + "epoch": 1.75, + "grad_norm": 4.967591368231452, + "learning_rate": 8.306381651688445e-07, + "loss": 1.1499, + "step": 145245 + }, + { + "epoch": 1.75, + "grad_norm": 9.673368197537075, + "learning_rate": 8.304050739871062e-07, + "loss": 1.1215, + "step": 145248 + }, + { + "epoch": 1.75, + "grad_norm": 19.02515438904923, + "learning_rate": 8.301720140978919e-07, + "loss": 1.1692, + "step": 145251 + }, + { + "epoch": 1.75, + "grad_norm": 4.906709258207339, + "learning_rate": 8.299389855020002e-07, + "loss": 1.3248, + "step": 145254 + }, + { + "epoch": 1.75, + "grad_norm": 3.7076386450849523, + "learning_rate": 8.297059882002212e-07, + "loss": 1.1053, + "step": 145257 + }, + { + "epoch": 1.75, + "grad_norm": 6.599634644834541, + "learning_rate": 8.294730221933533e-07, + "loss": 1.4506, + "step": 145260 + }, + { + "epoch": 1.75, + "grad_norm": 5.107559401292961, + "learning_rate": 8.292400874821916e-07, + "loss": 1.3145, + "step": 145263 + }, + { + "epoch": 1.75, + "grad_norm": 6.805108820986587, + "learning_rate": 8.290071840675307e-07, + "loss": 1.1777, + "step": 145266 + }, + { + "epoch": 1.75, + "grad_norm": 9.003196724054684, + "learning_rate": 8.287743119501634e-07, + "loss": 1.1808, + "step": 145269 + }, + { + "epoch": 1.75, + "grad_norm": 8.14066279479882, + "learning_rate": 8.285414711308859e-07, + "loss": 1.1409, + "step": 145272 + }, + { + "epoch": 1.75, + "grad_norm": 5.98863918792537, + "learning_rate": 8.283086616104929e-07, + "loss": 1.0881, + "step": 145275 + }, + { + "epoch": 1.75, + "grad_norm": 5.277540678900457, + "learning_rate": 8.280758833897784e-07, + "loss": 1.0028, + "step": 145278 + }, + { + "epoch": 1.75, + "grad_norm": 12.804512528815033, + "learning_rate": 8.278431364695394e-07, + "loss": 1.2667, + "step": 145281 + }, + { + "epoch": 1.75, + "grad_norm": 20.26521052672551, + "learning_rate": 8.276104208505675e-07, + "loss": 1.1579, + "step": 145284 + }, + { + "epoch": 1.75, + "grad_norm": 11.082262376513262, + "learning_rate": 8.273777365336555e-07, + "loss": 1.2967, + "step": 145287 + }, + { + "epoch": 1.75, + "grad_norm": 6.530838068121132, + "learning_rate": 8.271450835195993e-07, + "loss": 1.3325, + "step": 145290 + }, + { + "epoch": 1.75, + "grad_norm": 6.466440692489018, + "learning_rate": 8.26912461809195e-07, + "loss": 0.9983, + "step": 145293 + }, + { + "epoch": 1.75, + "grad_norm": 6.586718714287842, + "learning_rate": 8.266798714032331e-07, + "loss": 1.1451, + "step": 145296 + }, + { + "epoch": 1.75, + "grad_norm": 5.128654252562434, + "learning_rate": 8.264473123025074e-07, + "loss": 0.9694, + "step": 145299 + }, + { + "epoch": 1.75, + "grad_norm": 6.703744740863344, + "learning_rate": 8.262147845078139e-07, + "loss": 1.2306, + "step": 145302 + }, + { + "epoch": 1.75, + "grad_norm": 5.801031989544798, + "learning_rate": 8.259822880199453e-07, + "loss": 1.449, + "step": 145305 + }, + { + "epoch": 1.75, + "grad_norm": 4.5731311285087095, + "learning_rate": 8.257498228396966e-07, + "loss": 1.1102, + "step": 145308 + }, + { + "epoch": 1.75, + "grad_norm": 28.66340736524118, + "learning_rate": 8.255173889678558e-07, + "loss": 1.6863, + "step": 145311 + }, + { + "epoch": 1.75, + "grad_norm": 13.873689488517456, + "learning_rate": 8.252849864052204e-07, + "loss": 1.0028, + "step": 145314 + }, + { + "epoch": 1.75, + "grad_norm": 4.425173838834585, + "learning_rate": 8.250526151525818e-07, + "loss": 0.8209, + "step": 145317 + }, + { + "epoch": 1.75, + "grad_norm": 3.056690263201865, + "learning_rate": 8.24820275210736e-07, + "loss": 1.2533, + "step": 145320 + }, + { + "epoch": 1.75, + "grad_norm": 19.58538100628592, + "learning_rate": 8.245879665804724e-07, + "loss": 1.1895, + "step": 145323 + }, + { + "epoch": 1.75, + "grad_norm": 3.2998677776377097, + "learning_rate": 8.243556892625848e-07, + "loss": 1.3653, + "step": 145326 + }, + { + "epoch": 1.75, + "grad_norm": 5.714863039414816, + "learning_rate": 8.241234432578682e-07, + "loss": 0.9813, + "step": 145329 + }, + { + "epoch": 1.75, + "grad_norm": 8.370164674336605, + "learning_rate": 8.238912285671108e-07, + "loss": 1.2683, + "step": 145332 + }, + { + "epoch": 1.75, + "grad_norm": 5.703804037673536, + "learning_rate": 8.236590451911097e-07, + "loss": 0.9848, + "step": 145335 + }, + { + "epoch": 1.75, + "grad_norm": 6.630947976682848, + "learning_rate": 8.234268931306521e-07, + "loss": 1.1066, + "step": 145338 + }, + { + "epoch": 1.75, + "grad_norm": 6.811167305023567, + "learning_rate": 8.231947723865341e-07, + "loss": 1.0547, + "step": 145341 + }, + { + "epoch": 1.75, + "grad_norm": 17.228638881543798, + "learning_rate": 8.229626829595471e-07, + "loss": 1.2445, + "step": 145344 + }, + { + "epoch": 1.75, + "grad_norm": 6.917657779589351, + "learning_rate": 8.227306248504841e-07, + "loss": 1.2482, + "step": 145347 + }, + { + "epoch": 1.75, + "grad_norm": 5.928921158018677, + "learning_rate": 8.224985980601363e-07, + "loss": 1.2403, + "step": 145350 + }, + { + "epoch": 1.75, + "grad_norm": 9.743296260399847, + "learning_rate": 8.222666025892933e-07, + "loss": 1.0745, + "step": 145353 + }, + { + "epoch": 1.75, + "grad_norm": 3.2745350019434487, + "learning_rate": 8.220346384387478e-07, + "loss": 1.3517, + "step": 145356 + }, + { + "epoch": 1.75, + "grad_norm": 9.779221479179535, + "learning_rate": 8.218027056092936e-07, + "loss": 1.0634, + "step": 145359 + }, + { + "epoch": 1.75, + "grad_norm": 10.515620357814152, + "learning_rate": 8.215708041017212e-07, + "loss": 0.9935, + "step": 145362 + }, + { + "epoch": 1.75, + "grad_norm": 6.349003243942532, + "learning_rate": 8.213389339168209e-07, + "loss": 0.6895, + "step": 145365 + }, + { + "epoch": 1.75, + "grad_norm": 9.817452855856052, + "learning_rate": 8.211070950553834e-07, + "loss": 1.193, + "step": 145368 + }, + { + "epoch": 1.75, + "grad_norm": 7.206616123055335, + "learning_rate": 8.208752875182035e-07, + "loss": 1.2619, + "step": 145371 + }, + { + "epoch": 1.75, + "grad_norm": 8.141038751181878, + "learning_rate": 8.206435113060707e-07, + "loss": 1.0419, + "step": 145374 + }, + { + "epoch": 1.75, + "grad_norm": 7.25372133076813, + "learning_rate": 8.20411766419773e-07, + "loss": 1.3515, + "step": 145377 + }, + { + "epoch": 1.75, + "grad_norm": 11.778988742034183, + "learning_rate": 8.201800528601034e-07, + "loss": 1.0133, + "step": 145380 + }, + { + "epoch": 1.75, + "grad_norm": 3.2801946167863023, + "learning_rate": 8.199483706278522e-07, + "loss": 1.1563, + "step": 145383 + }, + { + "epoch": 1.75, + "grad_norm": 11.806846756840178, + "learning_rate": 8.19716719723811e-07, + "loss": 1.0459, + "step": 145386 + }, + { + "epoch": 1.75, + "grad_norm": 7.993123070415638, + "learning_rate": 8.194851001487714e-07, + "loss": 0.7865, + "step": 145389 + }, + { + "epoch": 1.75, + "grad_norm": 7.519261302552229, + "learning_rate": 8.192535119035228e-07, + "loss": 1.252, + "step": 145392 + }, + { + "epoch": 1.75, + "grad_norm": 12.132582765927587, + "learning_rate": 8.190219549888534e-07, + "loss": 1.1036, + "step": 145395 + }, + { + "epoch": 1.75, + "grad_norm": 9.0280685377729, + "learning_rate": 8.187904294055548e-07, + "loss": 1.2429, + "step": 145398 + }, + { + "epoch": 1.75, + "grad_norm": 10.720711837660172, + "learning_rate": 8.185589351544188e-07, + "loss": 1.2632, + "step": 145401 + }, + { + "epoch": 1.75, + "grad_norm": 9.403047750370238, + "learning_rate": 8.183274722362333e-07, + "loss": 1.3954, + "step": 145404 + }, + { + "epoch": 1.75, + "grad_norm": 8.38266158128831, + "learning_rate": 8.180960406517879e-07, + "loss": 1.0366, + "step": 145407 + }, + { + "epoch": 1.75, + "grad_norm": 7.157202996750785, + "learning_rate": 8.178646404018741e-07, + "loss": 1.2157, + "step": 145410 + }, + { + "epoch": 1.75, + "grad_norm": 7.978975369479599, + "learning_rate": 8.176332714872826e-07, + "loss": 1.4309, + "step": 145413 + }, + { + "epoch": 1.75, + "grad_norm": 6.00460418187815, + "learning_rate": 8.174019339088002e-07, + "loss": 1.2789, + "step": 145416 + }, + { + "epoch": 1.75, + "grad_norm": 5.238139905641992, + "learning_rate": 8.171706276672165e-07, + "loss": 0.9334, + "step": 145419 + }, + { + "epoch": 1.75, + "grad_norm": 3.0733749814390126, + "learning_rate": 8.169393527633218e-07, + "loss": 1.1458, + "step": 145422 + }, + { + "epoch": 1.75, + "grad_norm": 294.1995507509604, + "learning_rate": 8.167081091979057e-07, + "loss": 1.0431, + "step": 145425 + }, + { + "epoch": 1.75, + "grad_norm": 6.253355134454825, + "learning_rate": 8.164768969717584e-07, + "loss": 1.2988, + "step": 145428 + }, + { + "epoch": 1.75, + "grad_norm": 22.52809254106126, + "learning_rate": 8.162457160856663e-07, + "loss": 1.0842, + "step": 145431 + }, + { + "epoch": 1.75, + "grad_norm": 14.247024862420808, + "learning_rate": 8.160145665404207e-07, + "loss": 1.0555, + "step": 145434 + }, + { + "epoch": 1.75, + "grad_norm": 4.909235642413861, + "learning_rate": 8.157834483368077e-07, + "loss": 1.0617, + "step": 145437 + }, + { + "epoch": 1.75, + "grad_norm": 7.389882661046198, + "learning_rate": 8.155523614756177e-07, + "loss": 1.3145, + "step": 145440 + }, + { + "epoch": 1.75, + "grad_norm": 6.878366100209819, + "learning_rate": 8.153213059576404e-07, + "loss": 1.0281, + "step": 145443 + }, + { + "epoch": 1.75, + "grad_norm": 8.802951964741014, + "learning_rate": 8.150902817836625e-07, + "loss": 1.2522, + "step": 145446 + }, + { + "epoch": 1.75, + "grad_norm": 10.786517572913988, + "learning_rate": 8.148592889544727e-07, + "loss": 1.1176, + "step": 145449 + }, + { + "epoch": 1.75, + "grad_norm": 9.524090040170046, + "learning_rate": 8.1462832747086e-07, + "loss": 1.0307, + "step": 145452 + }, + { + "epoch": 1.75, + "grad_norm": 8.489546785251889, + "learning_rate": 8.143973973336128e-07, + "loss": 1.0391, + "step": 145455 + }, + { + "epoch": 1.75, + "grad_norm": 5.870012494552505, + "learning_rate": 8.141664985435193e-07, + "loss": 1.1275, + "step": 145458 + }, + { + "epoch": 1.75, + "grad_norm": 12.550002007226245, + "learning_rate": 8.139356311013657e-07, + "loss": 1.0923, + "step": 145461 + }, + { + "epoch": 1.75, + "grad_norm": 14.06046705794441, + "learning_rate": 8.137047950079402e-07, + "loss": 1.0092, + "step": 145464 + }, + { + "epoch": 1.75, + "grad_norm": 6.59488989276596, + "learning_rate": 8.13473990264032e-07, + "loss": 1.4128, + "step": 145467 + }, + { + "epoch": 1.75, + "grad_norm": 10.235551862258005, + "learning_rate": 8.132432168704297e-07, + "loss": 0.9267, + "step": 145470 + }, + { + "epoch": 1.75, + "grad_norm": 20.710667047032125, + "learning_rate": 8.130124748279167e-07, + "loss": 1.0242, + "step": 145473 + }, + { + "epoch": 1.75, + "grad_norm": 4.252690975056007, + "learning_rate": 8.127817641372859e-07, + "loss": 1.5505, + "step": 145476 + }, + { + "epoch": 1.75, + "grad_norm": 19.03524916934119, + "learning_rate": 8.12551084799319e-07, + "loss": 1.0814, + "step": 145479 + }, + { + "epoch": 1.75, + "grad_norm": 10.867070452033687, + "learning_rate": 8.123204368148086e-07, + "loss": 1.1154, + "step": 145482 + }, + { + "epoch": 1.75, + "grad_norm": 7.458603229758501, + "learning_rate": 8.120898201845362e-07, + "loss": 0.8717, + "step": 145485 + }, + { + "epoch": 1.75, + "grad_norm": 11.711357882398085, + "learning_rate": 8.118592349092935e-07, + "loss": 1.3226, + "step": 145488 + }, + { + "epoch": 1.75, + "grad_norm": 5.6500155809266435, + "learning_rate": 8.116286809898644e-07, + "loss": 1.07, + "step": 145491 + }, + { + "epoch": 1.75, + "grad_norm": 1.8473151450876868, + "learning_rate": 8.113981584270369e-07, + "loss": 1.1141, + "step": 145494 + }, + { + "epoch": 1.75, + "grad_norm": 5.761376168829309, + "learning_rate": 8.111676672215996e-07, + "loss": 1.1629, + "step": 145497 + }, + { + "epoch": 1.75, + "grad_norm": 5.125483464033536, + "learning_rate": 8.109372073743383e-07, + "loss": 1.1999, + "step": 145500 + }, + { + "epoch": 1.75, + "grad_norm": 8.07420134604349, + "learning_rate": 8.107067788860357e-07, + "loss": 1.0311, + "step": 145503 + }, + { + "epoch": 1.75, + "grad_norm": 11.71339492478558, + "learning_rate": 8.104763817574812e-07, + "loss": 1.5197, + "step": 145506 + }, + { + "epoch": 1.75, + "grad_norm": 3.5776078627978873, + "learning_rate": 8.102460159894621e-07, + "loss": 0.7703, + "step": 145509 + }, + { + "epoch": 1.75, + "grad_norm": 15.034694583544052, + "learning_rate": 8.100156815827621e-07, + "loss": 1.3114, + "step": 145512 + }, + { + "epoch": 1.75, + "grad_norm": 4.712031695102119, + "learning_rate": 8.097853785381682e-07, + "loss": 1.3034, + "step": 145515 + }, + { + "epoch": 1.75, + "grad_norm": 9.780184269749622, + "learning_rate": 8.095551068564678e-07, + "loss": 1.4923, + "step": 145518 + }, + { + "epoch": 1.75, + "grad_norm": 5.008433258825072, + "learning_rate": 8.093248665384434e-07, + "loss": 1.1277, + "step": 145521 + }, + { + "epoch": 1.75, + "grad_norm": 7.885423743333472, + "learning_rate": 8.090946575848857e-07, + "loss": 1.0078, + "step": 145524 + }, + { + "epoch": 1.75, + "grad_norm": 7.77627751874533, + "learning_rate": 8.08864479996575e-07, + "loss": 1.2905, + "step": 145527 + }, + { + "epoch": 1.75, + "grad_norm": 10.436545808322387, + "learning_rate": 8.086343337742985e-07, + "loss": 1.0857, + "step": 145530 + }, + { + "epoch": 1.75, + "grad_norm": 3.962262540202491, + "learning_rate": 8.084042189188435e-07, + "loss": 1.1903, + "step": 145533 + }, + { + "epoch": 1.75, + "grad_norm": 10.893635637493839, + "learning_rate": 8.081741354309958e-07, + "loss": 1.4417, + "step": 145536 + }, + { + "epoch": 1.75, + "grad_norm": 9.43191666943879, + "learning_rate": 8.079440833115359e-07, + "loss": 1.2247, + "step": 145539 + }, + { + "epoch": 1.75, + "grad_norm": 7.5041345520237, + "learning_rate": 8.077140625612545e-07, + "loss": 1.4157, + "step": 145542 + }, + { + "epoch": 1.75, + "grad_norm": 10.394813587316783, + "learning_rate": 8.074840731809319e-07, + "loss": 1.296, + "step": 145545 + }, + { + "epoch": 1.75, + "grad_norm": 6.669844205957331, + "learning_rate": 8.072541151713553e-07, + "loss": 1.3054, + "step": 145548 + }, + { + "epoch": 1.75, + "grad_norm": 10.70604653114573, + "learning_rate": 8.070241885333119e-07, + "loss": 1.2931, + "step": 145551 + }, + { + "epoch": 1.75, + "grad_norm": 17.217777251216898, + "learning_rate": 8.067942932675798e-07, + "loss": 0.9173, + "step": 145554 + }, + { + "epoch": 1.75, + "grad_norm": 7.531261845087354, + "learning_rate": 8.065644293749486e-07, + "loss": 1.2119, + "step": 145557 + }, + { + "epoch": 1.75, + "grad_norm": 6.737424421948355, + "learning_rate": 8.063345968562042e-07, + "loss": 1.1012, + "step": 145560 + }, + { + "epoch": 1.75, + "grad_norm": 7.384526575024277, + "learning_rate": 8.061047957121249e-07, + "loss": 1.0429, + "step": 145563 + }, + { + "epoch": 1.75, + "grad_norm": 2.415452165840892, + "learning_rate": 8.058750259435011e-07, + "loss": 1.1844, + "step": 145566 + }, + { + "epoch": 1.75, + "grad_norm": 5.8681101817978965, + "learning_rate": 8.056452875511111e-07, + "loss": 0.8723, + "step": 145569 + }, + { + "epoch": 1.75, + "grad_norm": 7.294198039638928, + "learning_rate": 8.054155805357433e-07, + "loss": 1.3654, + "step": 145572 + }, + { + "epoch": 1.75, + "grad_norm": 9.86764069936762, + "learning_rate": 8.051859048981802e-07, + "loss": 1.2686, + "step": 145575 + }, + { + "epoch": 1.75, + "grad_norm": 6.475860001817898, + "learning_rate": 8.049562606392069e-07, + "loss": 1.0241, + "step": 145578 + }, + { + "epoch": 1.75, + "grad_norm": 6.3021469681357605, + "learning_rate": 8.047266477596039e-07, + "loss": 1.1874, + "step": 145581 + }, + { + "epoch": 1.75, + "grad_norm": 12.52516364919431, + "learning_rate": 8.044970662601592e-07, + "loss": 1.2126, + "step": 145584 + }, + { + "epoch": 1.75, + "grad_norm": 6.179292629576344, + "learning_rate": 8.042675161416513e-07, + "loss": 0.9475, + "step": 145587 + }, + { + "epoch": 1.75, + "grad_norm": 7.378655895185137, + "learning_rate": 8.040379974048684e-07, + "loss": 1.0353, + "step": 145590 + }, + { + "epoch": 1.75, + "grad_norm": 5.960142550499464, + "learning_rate": 8.038085100505888e-07, + "loss": 1.0934, + "step": 145593 + }, + { + "epoch": 1.75, + "grad_norm": 38.754593278844176, + "learning_rate": 8.035790540795996e-07, + "loss": 1.4387, + "step": 145596 + }, + { + "epoch": 1.75, + "grad_norm": 8.372359502303642, + "learning_rate": 8.033496294926813e-07, + "loss": 1.1036, + "step": 145599 + }, + { + "epoch": 1.75, + "grad_norm": 4.859087503112742, + "learning_rate": 8.031202362906209e-07, + "loss": 0.8674, + "step": 145602 + }, + { + "epoch": 1.75, + "grad_norm": 4.826525142136752, + "learning_rate": 8.028908744741959e-07, + "loss": 1.36, + "step": 145605 + }, + { + "epoch": 1.75, + "grad_norm": 19.167919898684527, + "learning_rate": 8.026615440441932e-07, + "loss": 1.3527, + "step": 145608 + }, + { + "epoch": 1.75, + "grad_norm": 7.282127936277859, + "learning_rate": 8.024322450013933e-07, + "loss": 1.3537, + "step": 145611 + }, + { + "epoch": 1.75, + "grad_norm": 4.718606403589277, + "learning_rate": 8.022029773465778e-07, + "loss": 1.0975, + "step": 145614 + }, + { + "epoch": 1.75, + "grad_norm": 7.854155203658186, + "learning_rate": 8.019737410805329e-07, + "loss": 1.0491, + "step": 145617 + }, + { + "epoch": 1.75, + "grad_norm": 17.00855168942047, + "learning_rate": 8.017445362040365e-07, + "loss": 1.1231, + "step": 145620 + }, + { + "epoch": 1.75, + "grad_norm": 9.268418327967668, + "learning_rate": 8.015153627178729e-07, + "loss": 1.1632, + "step": 145623 + }, + { + "epoch": 1.75, + "grad_norm": 11.747016014677282, + "learning_rate": 8.012862206228256e-07, + "loss": 1.1519, + "step": 145626 + }, + { + "epoch": 1.75, + "grad_norm": 5.236610390383667, + "learning_rate": 8.01057109919674e-07, + "loss": 1.5092, + "step": 145629 + }, + { + "epoch": 1.75, + "grad_norm": 13.330020766904658, + "learning_rate": 8.00828030609202e-07, + "loss": 1.1152, + "step": 145632 + }, + { + "epoch": 1.75, + "grad_norm": 9.19468432040362, + "learning_rate": 8.0059898269219e-07, + "loss": 1.219, + "step": 145635 + }, + { + "epoch": 1.75, + "grad_norm": 9.030195181373928, + "learning_rate": 8.003699661694187e-07, + "loss": 1.3818, + "step": 145638 + }, + { + "epoch": 1.75, + "grad_norm": 4.835804301642583, + "learning_rate": 8.001409810416716e-07, + "loss": 1.2771, + "step": 145641 + }, + { + "epoch": 1.75, + "grad_norm": 2.5449834837645287, + "learning_rate": 7.999120273097305e-07, + "loss": 1.1212, + "step": 145644 + }, + { + "epoch": 1.75, + "grad_norm": 19.22084609950152, + "learning_rate": 7.996831049743748e-07, + "loss": 1.2999, + "step": 145647 + }, + { + "epoch": 1.75, + "grad_norm": 5.814055868998965, + "learning_rate": 7.994542140363892e-07, + "loss": 0.922, + "step": 145650 + }, + { + "epoch": 1.75, + "grad_norm": 5.416139618106888, + "learning_rate": 7.992253544965489e-07, + "loss": 1.2331, + "step": 145653 + }, + { + "epoch": 1.75, + "grad_norm": 16.80910939129376, + "learning_rate": 7.989965263556398e-07, + "loss": 1.5154, + "step": 145656 + }, + { + "epoch": 1.75, + "grad_norm": 13.882284388582153, + "learning_rate": 7.987677296144425e-07, + "loss": 1.3147, + "step": 145659 + }, + { + "epoch": 1.75, + "grad_norm": 7.910335865882245, + "learning_rate": 7.985389642737352e-07, + "loss": 1.0055, + "step": 145662 + }, + { + "epoch": 1.75, + "grad_norm": 5.557362086140544, + "learning_rate": 7.983102303342993e-07, + "loss": 1.2945, + "step": 145665 + }, + { + "epoch": 1.75, + "grad_norm": 7.3693119145923305, + "learning_rate": 7.980815277969189e-07, + "loss": 1.0331, + "step": 145668 + }, + { + "epoch": 1.75, + "grad_norm": 12.94095955993772, + "learning_rate": 7.978528566623711e-07, + "loss": 0.9933, + "step": 145671 + }, + { + "epoch": 1.75, + "grad_norm": 11.197965591738162, + "learning_rate": 7.976242169314352e-07, + "loss": 1.1118, + "step": 145674 + }, + { + "epoch": 1.75, + "grad_norm": 12.063409964281082, + "learning_rate": 7.973956086048928e-07, + "loss": 1.3116, + "step": 145677 + }, + { + "epoch": 1.75, + "grad_norm": 5.630037597260285, + "learning_rate": 7.971670316835256e-07, + "loss": 1.47, + "step": 145680 + }, + { + "epoch": 1.75, + "grad_norm": 11.027402789565183, + "learning_rate": 7.969384861681118e-07, + "loss": 1.1624, + "step": 145683 + }, + { + "epoch": 1.75, + "grad_norm": 9.304419484613037, + "learning_rate": 7.96709972059434e-07, + "loss": 0.9131, + "step": 145686 + }, + { + "epoch": 1.75, + "grad_norm": 7.183646367016917, + "learning_rate": 7.964814893582673e-07, + "loss": 0.9143, + "step": 145689 + }, + { + "epoch": 1.75, + "grad_norm": 11.32373516574017, + "learning_rate": 7.962530380653966e-07, + "loss": 1.1107, + "step": 145692 + }, + { + "epoch": 1.75, + "grad_norm": 12.970623746553448, + "learning_rate": 7.960246181815979e-07, + "loss": 1.1439, + "step": 145695 + }, + { + "epoch": 1.75, + "grad_norm": 6.001471268217084, + "learning_rate": 7.957962297076527e-07, + "loss": 0.8857, + "step": 145698 + }, + { + "epoch": 1.75, + "grad_norm": 4.3561567562331875, + "learning_rate": 7.955678726443384e-07, + "loss": 1.2582, + "step": 145701 + }, + { + "epoch": 1.75, + "grad_norm": 14.065241686207749, + "learning_rate": 7.953395469924363e-07, + "loss": 1.0725, + "step": 145704 + }, + { + "epoch": 1.75, + "grad_norm": 5.8801067397149795, + "learning_rate": 7.951112527527238e-07, + "loss": 1.0762, + "step": 145707 + }, + { + "epoch": 1.75, + "grad_norm": 9.122726355514466, + "learning_rate": 7.948829899259825e-07, + "loss": 0.7295, + "step": 145710 + }, + { + "epoch": 1.75, + "grad_norm": 6.1832536073855815, + "learning_rate": 7.946547585129905e-07, + "loss": 1.378, + "step": 145713 + }, + { + "epoch": 1.75, + "grad_norm": 9.147529578785186, + "learning_rate": 7.94426558514525e-07, + "loss": 1.1306, + "step": 145716 + }, + { + "epoch": 1.75, + "grad_norm": 5.261542039938387, + "learning_rate": 7.941983899313643e-07, + "loss": 1.1651, + "step": 145719 + }, + { + "epoch": 1.75, + "grad_norm": 4.693851614636027, + "learning_rate": 7.9397025276429e-07, + "loss": 1.0467, + "step": 145722 + }, + { + "epoch": 1.75, + "grad_norm": 11.206560374656883, + "learning_rate": 7.937421470140794e-07, + "loss": 1.0562, + "step": 145725 + }, + { + "epoch": 1.75, + "grad_norm": 7.0566067353957145, + "learning_rate": 7.935140726815104e-07, + "loss": 0.9949, + "step": 145728 + }, + { + "epoch": 1.75, + "grad_norm": 5.864376848549005, + "learning_rate": 7.932860297673617e-07, + "loss": 1.2255, + "step": 145731 + }, + { + "epoch": 1.75, + "grad_norm": 17.562253467671248, + "learning_rate": 7.930580182724124e-07, + "loss": 1.3044, + "step": 145734 + }, + { + "epoch": 1.75, + "grad_norm": 14.025921974732055, + "learning_rate": 7.928300381974386e-07, + "loss": 1.144, + "step": 145737 + }, + { + "epoch": 1.75, + "grad_norm": 8.897954965154385, + "learning_rate": 7.926020895432208e-07, + "loss": 1.2398, + "step": 145740 + }, + { + "epoch": 1.75, + "grad_norm": 7.923659057175723, + "learning_rate": 7.923741723105339e-07, + "loss": 1.1693, + "step": 145743 + }, + { + "epoch": 1.75, + "grad_norm": 8.209085074015034, + "learning_rate": 7.921462865001573e-07, + "loss": 1.1651, + "step": 145746 + }, + { + "epoch": 1.75, + "grad_norm": 16.03393024127414, + "learning_rate": 7.919184321128692e-07, + "loss": 1.3325, + "step": 145749 + }, + { + "epoch": 1.75, + "grad_norm": 7.857496213162539, + "learning_rate": 7.916906091494481e-07, + "loss": 1.4508, + "step": 145752 + }, + { + "epoch": 1.75, + "grad_norm": 7.742719111632428, + "learning_rate": 7.914628176106709e-07, + "loss": 1.2516, + "step": 145755 + }, + { + "epoch": 1.75, + "grad_norm": 6.7378644042681115, + "learning_rate": 7.912350574973115e-07, + "loss": 1.2436, + "step": 145758 + }, + { + "epoch": 1.75, + "grad_norm": 9.069070481017967, + "learning_rate": 7.910073288101505e-07, + "loss": 0.9641, + "step": 145761 + }, + { + "epoch": 1.75, + "grad_norm": 9.828073613851306, + "learning_rate": 7.907796315499638e-07, + "loss": 1.5944, + "step": 145764 + }, + { + "epoch": 1.75, + "grad_norm": 18.286863670801285, + "learning_rate": 7.90551965717532e-07, + "loss": 0.6929, + "step": 145767 + }, + { + "epoch": 1.75, + "grad_norm": 2.7291303864089116, + "learning_rate": 7.903243313136266e-07, + "loss": 1.108, + "step": 145770 + }, + { + "epoch": 1.75, + "grad_norm": 10.810569579478386, + "learning_rate": 7.900967283390271e-07, + "loss": 1.3488, + "step": 145773 + }, + { + "epoch": 1.75, + "grad_norm": 4.694125803006303, + "learning_rate": 7.898691567945116e-07, + "loss": 1.0641, + "step": 145776 + }, + { + "epoch": 1.75, + "grad_norm": 5.8748330705544305, + "learning_rate": 7.896416166808551e-07, + "loss": 1.1454, + "step": 145779 + }, + { + "epoch": 1.75, + "grad_norm": 3.2850876917833496, + "learning_rate": 7.894141079988326e-07, + "loss": 1.2597, + "step": 145782 + }, + { + "epoch": 1.75, + "grad_norm": 4.281005357598105, + "learning_rate": 7.891866307492213e-07, + "loss": 1.3938, + "step": 145785 + }, + { + "epoch": 1.75, + "grad_norm": 7.3323241643035075, + "learning_rate": 7.889591849327993e-07, + "loss": 1.1706, + "step": 145788 + }, + { + "epoch": 1.75, + "grad_norm": 10.884387582510175, + "learning_rate": 7.887317705503417e-07, + "loss": 1.0548, + "step": 145791 + }, + { + "epoch": 1.75, + "grad_norm": 332.5194811189915, + "learning_rate": 7.885043876026265e-07, + "loss": 1.1619, + "step": 145794 + }, + { + "epoch": 1.75, + "grad_norm": 24.419802365185507, + "learning_rate": 7.882770360904279e-07, + "loss": 1.1613, + "step": 145797 + }, + { + "epoch": 1.75, + "grad_norm": 3.4564361646474637, + "learning_rate": 7.880497160145196e-07, + "loss": 1.0846, + "step": 145800 + }, + { + "epoch": 1.75, + "grad_norm": 3.4243596425523206, + "learning_rate": 7.878224273756796e-07, + "loss": 1.2683, + "step": 145803 + }, + { + "epoch": 1.75, + "grad_norm": 5.900943735981587, + "learning_rate": 7.875951701746853e-07, + "loss": 1.5313, + "step": 145806 + }, + { + "epoch": 1.75, + "grad_norm": 2.3577869139455023, + "learning_rate": 7.873679444123084e-07, + "loss": 1.1485, + "step": 145809 + }, + { + "epoch": 1.75, + "grad_norm": 8.64784325823836, + "learning_rate": 7.871407500893269e-07, + "loss": 1.1132, + "step": 145812 + }, + { + "epoch": 1.75, + "grad_norm": 20.761183629473383, + "learning_rate": 7.869135872065148e-07, + "loss": 1.0249, + "step": 145815 + }, + { + "epoch": 1.75, + "grad_norm": 4.565481898823783, + "learning_rate": 7.866864557646503e-07, + "loss": 1.3883, + "step": 145818 + }, + { + "epoch": 1.75, + "grad_norm": 5.287102925181321, + "learning_rate": 7.864593557645072e-07, + "loss": 1.3158, + "step": 145821 + }, + { + "epoch": 1.75, + "grad_norm": 8.022512284791931, + "learning_rate": 7.862322872068561e-07, + "loss": 1.025, + "step": 145824 + }, + { + "epoch": 1.75, + "grad_norm": 12.41298062635595, + "learning_rate": 7.860052500924764e-07, + "loss": 1.3419, + "step": 145827 + }, + { + "epoch": 1.75, + "grad_norm": 9.126814095223116, + "learning_rate": 7.857782444221418e-07, + "loss": 1.3794, + "step": 145830 + }, + { + "epoch": 1.75, + "grad_norm": 11.365629723821469, + "learning_rate": 7.855512701966295e-07, + "loss": 1.3557, + "step": 145833 + }, + { + "epoch": 1.75, + "grad_norm": 4.373911748972006, + "learning_rate": 7.853243274167088e-07, + "loss": 0.9532, + "step": 145836 + }, + { + "epoch": 1.75, + "grad_norm": 10.842238680389064, + "learning_rate": 7.850974160831592e-07, + "loss": 0.9266, + "step": 145839 + }, + { + "epoch": 1.75, + "grad_norm": 6.4064307998727275, + "learning_rate": 7.848705361967512e-07, + "loss": 1.3568, + "step": 145842 + }, + { + "epoch": 1.75, + "grad_norm": 9.945768145583388, + "learning_rate": 7.846436877582609e-07, + "loss": 1.2239, + "step": 145845 + }, + { + "epoch": 1.75, + "grad_norm": 11.102395921255754, + "learning_rate": 7.84416870768464e-07, + "loss": 1.6355, + "step": 145848 + }, + { + "epoch": 1.75, + "grad_norm": 7.959426914053327, + "learning_rate": 7.841900852281315e-07, + "loss": 1.24, + "step": 145851 + }, + { + "epoch": 1.75, + "grad_norm": 7.686452531001917, + "learning_rate": 7.839633311380379e-07, + "loss": 0.9346, + "step": 145854 + }, + { + "epoch": 1.75, + "grad_norm": 4.055330613600626, + "learning_rate": 7.837366084989583e-07, + "loss": 0.9803, + "step": 145857 + }, + { + "epoch": 1.75, + "grad_norm": 20.224721065662678, + "learning_rate": 7.835099173116679e-07, + "loss": 0.8796, + "step": 145860 + }, + { + "epoch": 1.75, + "grad_norm": 6.227240690408308, + "learning_rate": 7.832832575769378e-07, + "loss": 1.2012, + "step": 145863 + }, + { + "epoch": 1.75, + "grad_norm": 13.093225246811492, + "learning_rate": 7.830566292955411e-07, + "loss": 1.3075, + "step": 145866 + }, + { + "epoch": 1.75, + "grad_norm": 3.2586134429844744, + "learning_rate": 7.828300324682514e-07, + "loss": 1.168, + "step": 145869 + }, + { + "epoch": 1.75, + "grad_norm": 2.384221774585704, + "learning_rate": 7.826034670958438e-07, + "loss": 0.8855, + "step": 145872 + }, + { + "epoch": 1.75, + "grad_norm": 10.060449125927894, + "learning_rate": 7.82376933179092e-07, + "loss": 1.0243, + "step": 145875 + }, + { + "epoch": 1.75, + "grad_norm": 5.664769051224324, + "learning_rate": 7.821504307187656e-07, + "loss": 1.1049, + "step": 145878 + }, + { + "epoch": 1.75, + "grad_norm": 21.411780933880074, + "learning_rate": 7.819239597156403e-07, + "loss": 1.2841, + "step": 145881 + }, + { + "epoch": 1.75, + "grad_norm": 8.176975675948936, + "learning_rate": 7.816975201704879e-07, + "loss": 1.3873, + "step": 145884 + }, + { + "epoch": 1.75, + "grad_norm": 8.623195064383228, + "learning_rate": 7.814711120840823e-07, + "loss": 1.0145, + "step": 145887 + }, + { + "epoch": 1.75, + "grad_norm": 8.888020465633709, + "learning_rate": 7.81244735457194e-07, + "loss": 1.0871, + "step": 145890 + }, + { + "epoch": 1.75, + "grad_norm": 7.248794619465601, + "learning_rate": 7.810183902905966e-07, + "loss": 1.1844, + "step": 145893 + }, + { + "epoch": 1.75, + "grad_norm": 10.999270066333432, + "learning_rate": 7.80792076585063e-07, + "loss": 0.9135, + "step": 145896 + }, + { + "epoch": 1.75, + "grad_norm": 7.431642659492536, + "learning_rate": 7.805657943413647e-07, + "loss": 1.2457, + "step": 145899 + }, + { + "epoch": 1.75, + "grad_norm": 10.069284791625053, + "learning_rate": 7.803395435602768e-07, + "loss": 1.1671, + "step": 145902 + }, + { + "epoch": 1.75, + "grad_norm": 11.796414147316693, + "learning_rate": 7.801133242425696e-07, + "loss": 1.5876, + "step": 145905 + }, + { + "epoch": 1.75, + "grad_norm": 15.161997416827862, + "learning_rate": 7.798871363890126e-07, + "loss": 1.3664, + "step": 145908 + }, + { + "epoch": 1.75, + "grad_norm": 5.248771584119559, + "learning_rate": 7.796609800003785e-07, + "loss": 1.0161, + "step": 145911 + }, + { + "epoch": 1.75, + "grad_norm": 7.140257719577291, + "learning_rate": 7.794348550774433e-07, + "loss": 1.1245, + "step": 145914 + }, + { + "epoch": 1.75, + "grad_norm": 6.720479249729638, + "learning_rate": 7.792087616209743e-07, + "loss": 1.1207, + "step": 145917 + }, + { + "epoch": 1.75, + "grad_norm": 11.183572967999375, + "learning_rate": 7.78982699631744e-07, + "loss": 1.2128, + "step": 145920 + }, + { + "epoch": 1.75, + "grad_norm": 9.370681000469974, + "learning_rate": 7.787566691105241e-07, + "loss": 1.4111, + "step": 145923 + }, + { + "epoch": 1.75, + "grad_norm": 6.985939101404378, + "learning_rate": 7.785306700580874e-07, + "loss": 0.8594, + "step": 145926 + }, + { + "epoch": 1.75, + "grad_norm": 7.033784059032984, + "learning_rate": 7.783047024752055e-07, + "loss": 1.0935, + "step": 145929 + }, + { + "epoch": 1.75, + "grad_norm": 3.621342575684559, + "learning_rate": 7.780787663626455e-07, + "loss": 1.4866, + "step": 145932 + }, + { + "epoch": 1.75, + "grad_norm": 7.7925156250462715, + "learning_rate": 7.7785286172118e-07, + "loss": 1.1963, + "step": 145935 + }, + { + "epoch": 1.75, + "grad_norm": 8.747665134605265, + "learning_rate": 7.77626988551582e-07, + "loss": 1.3916, + "step": 145938 + }, + { + "epoch": 1.75, + "grad_norm": 15.401931617385175, + "learning_rate": 7.774011468546228e-07, + "loss": 1.1787, + "step": 145941 + }, + { + "epoch": 1.75, + "grad_norm": 16.23581795202451, + "learning_rate": 7.771753366310697e-07, + "loss": 1.062, + "step": 145944 + }, + { + "epoch": 1.75, + "grad_norm": 8.295979066055539, + "learning_rate": 7.769495578816966e-07, + "loss": 0.845, + "step": 145947 + }, + { + "epoch": 1.76, + "grad_norm": 4.774427264662618, + "learning_rate": 7.767238106072717e-07, + "loss": 1.1649, + "step": 145950 + }, + { + "epoch": 1.76, + "grad_norm": 2.6647388089181074, + "learning_rate": 7.764980948085654e-07, + "loss": 1.2524, + "step": 145953 + }, + { + "epoch": 1.76, + "grad_norm": 6.096973761495382, + "learning_rate": 7.762724104863506e-07, + "loss": 1.307, + "step": 145956 + }, + { + "epoch": 1.76, + "grad_norm": 11.971739413492783, + "learning_rate": 7.760467576413944e-07, + "loss": 0.9044, + "step": 145959 + }, + { + "epoch": 1.76, + "grad_norm": 14.577191946681577, + "learning_rate": 7.758211362744672e-07, + "loss": 1.5539, + "step": 145962 + }, + { + "epoch": 1.76, + "grad_norm": 7.761500750137083, + "learning_rate": 7.755955463863407e-07, + "loss": 0.947, + "step": 145965 + }, + { + "epoch": 1.76, + "grad_norm": 7.168172556099627, + "learning_rate": 7.753699879777854e-07, + "loss": 1.5349, + "step": 145968 + }, + { + "epoch": 1.76, + "grad_norm": 18.98871560021639, + "learning_rate": 7.751444610495706e-07, + "loss": 1.1357, + "step": 145971 + }, + { + "epoch": 1.76, + "grad_norm": 7.173043298691941, + "learning_rate": 7.749189656024625e-07, + "loss": 1.1869, + "step": 145974 + }, + { + "epoch": 1.76, + "grad_norm": 5.823952790068276, + "learning_rate": 7.746935016372325e-07, + "loss": 0.8816, + "step": 145977 + }, + { + "epoch": 1.76, + "grad_norm": 9.514616554659186, + "learning_rate": 7.744680691546513e-07, + "loss": 1.428, + "step": 145980 + }, + { + "epoch": 1.76, + "grad_norm": 17.81680390133354, + "learning_rate": 7.742426681554893e-07, + "loss": 0.838, + "step": 145983 + }, + { + "epoch": 1.76, + "grad_norm": 8.062561706711438, + "learning_rate": 7.740172986405125e-07, + "loss": 1.1617, + "step": 145986 + }, + { + "epoch": 1.76, + "grad_norm": 11.321793582139, + "learning_rate": 7.737919606104927e-07, + "loss": 1.395, + "step": 145989 + }, + { + "epoch": 1.76, + "grad_norm": 13.680240920522062, + "learning_rate": 7.735666540661957e-07, + "loss": 0.8586, + "step": 145992 + }, + { + "epoch": 1.76, + "grad_norm": 29.23541546933832, + "learning_rate": 7.733413790083943e-07, + "loss": 1.32, + "step": 145995 + }, + { + "epoch": 1.76, + "grad_norm": 6.163259951963625, + "learning_rate": 7.731161354378547e-07, + "loss": 1.276, + "step": 145998 + }, + { + "epoch": 1.76, + "grad_norm": 6.207177114568795, + "learning_rate": 7.72890923355345e-07, + "loss": 1.5986, + "step": 146001 + }, + { + "epoch": 1.76, + "grad_norm": 7.2287146489895475, + "learning_rate": 7.726657427616346e-07, + "loss": 1.1033, + "step": 146004 + }, + { + "epoch": 1.76, + "grad_norm": 6.996643358184323, + "learning_rate": 7.724405936574942e-07, + "loss": 1.2323, + "step": 146007 + }, + { + "epoch": 1.76, + "grad_norm": 13.080947312539351, + "learning_rate": 7.722154760436906e-07, + "loss": 1.3929, + "step": 146010 + }, + { + "epoch": 1.76, + "grad_norm": 39.60329605358598, + "learning_rate": 7.719903899209913e-07, + "loss": 1.3239, + "step": 146013 + }, + { + "epoch": 1.76, + "grad_norm": 3.8641009642938298, + "learning_rate": 7.717653352901644e-07, + "loss": 1.1358, + "step": 146016 + }, + { + "epoch": 1.76, + "grad_norm": 7.117708990476589, + "learning_rate": 7.715403121519782e-07, + "loss": 1.0535, + "step": 146019 + }, + { + "epoch": 1.76, + "grad_norm": 12.124210362972878, + "learning_rate": 7.713153205072022e-07, + "loss": 1.183, + "step": 146022 + }, + { + "epoch": 1.76, + "grad_norm": 10.365911963712673, + "learning_rate": 7.710903603566011e-07, + "loss": 1.0074, + "step": 146025 + }, + { + "epoch": 1.76, + "grad_norm": 8.818851007424653, + "learning_rate": 7.708654317009445e-07, + "loss": 1.0603, + "step": 146028 + }, + { + "epoch": 1.76, + "grad_norm": 6.3814746821061465, + "learning_rate": 7.706405345410017e-07, + "loss": 0.9009, + "step": 146031 + }, + { + "epoch": 1.76, + "grad_norm": 18.9127014494778, + "learning_rate": 7.704156688775366e-07, + "loss": 1.351, + "step": 146034 + }, + { + "epoch": 1.76, + "grad_norm": 10.90868973734268, + "learning_rate": 7.701908347113196e-07, + "loss": 0.9613, + "step": 146037 + }, + { + "epoch": 1.76, + "grad_norm": 6.869865775295659, + "learning_rate": 7.699660320431157e-07, + "loss": 1.1226, + "step": 146040 + }, + { + "epoch": 1.76, + "grad_norm": 5.603994821108916, + "learning_rate": 7.69741260873692e-07, + "loss": 0.9785, + "step": 146043 + }, + { + "epoch": 1.76, + "grad_norm": 19.79344883601513, + "learning_rate": 7.69516521203818e-07, + "loss": 1.2473, + "step": 146046 + }, + { + "epoch": 1.76, + "grad_norm": 8.057875527858839, + "learning_rate": 7.692918130342597e-07, + "loss": 0.9249, + "step": 146049 + }, + { + "epoch": 1.76, + "grad_norm": 10.920397223347694, + "learning_rate": 7.690671363657831e-07, + "loss": 1.424, + "step": 146052 + }, + { + "epoch": 1.76, + "grad_norm": 13.44403721776221, + "learning_rate": 7.688424911991565e-07, + "loss": 1.0512, + "step": 146055 + }, + { + "epoch": 1.76, + "grad_norm": 14.569796438183742, + "learning_rate": 7.686178775351439e-07, + "loss": 1.0249, + "step": 146058 + }, + { + "epoch": 1.76, + "grad_norm": 3.907753253031345, + "learning_rate": 7.683932953745132e-07, + "loss": 0.9585, + "step": 146061 + }, + { + "epoch": 1.76, + "grad_norm": 3.990468056474875, + "learning_rate": 7.68168744718033e-07, + "loss": 1.0373, + "step": 146064 + }, + { + "epoch": 1.76, + "grad_norm": 16.759668959450845, + "learning_rate": 7.679442255664648e-07, + "loss": 1.3065, + "step": 146067 + }, + { + "epoch": 1.76, + "grad_norm": 4.878858937161862, + "learning_rate": 7.677197379205792e-07, + "loss": 1.2247, + "step": 146070 + }, + { + "epoch": 1.76, + "grad_norm": 18.353706096011184, + "learning_rate": 7.67495281781141e-07, + "loss": 0.8278, + "step": 146073 + }, + { + "epoch": 1.76, + "grad_norm": 11.109521613806827, + "learning_rate": 7.672708571489162e-07, + "loss": 0.7565, + "step": 146076 + }, + { + "epoch": 1.76, + "grad_norm": 5.450806322199749, + "learning_rate": 7.6704646402467e-07, + "loss": 1.2388, + "step": 146079 + }, + { + "epoch": 1.76, + "grad_norm": 14.422144865133298, + "learning_rate": 7.668221024091671e-07, + "loss": 0.9478, + "step": 146082 + }, + { + "epoch": 1.76, + "grad_norm": 5.919977575102594, + "learning_rate": 7.665977723031748e-07, + "loss": 0.9652, + "step": 146085 + }, + { + "epoch": 1.76, + "grad_norm": 4.976808092496846, + "learning_rate": 7.66373473707458e-07, + "loss": 1.1122, + "step": 146088 + }, + { + "epoch": 1.76, + "grad_norm": 10.926057047007298, + "learning_rate": 7.661492066227849e-07, + "loss": 0.9635, + "step": 146091 + }, + { + "epoch": 1.76, + "grad_norm": 3.9506358963138073, + "learning_rate": 7.659249710499172e-07, + "loss": 0.8259, + "step": 146094 + }, + { + "epoch": 1.76, + "grad_norm": 4.043605518872544, + "learning_rate": 7.657007669896233e-07, + "loss": 1.0257, + "step": 146097 + }, + { + "epoch": 1.76, + "grad_norm": 5.462560295877171, + "learning_rate": 7.654765944426635e-07, + "loss": 0.8498, + "step": 146100 + }, + { + "epoch": 1.76, + "grad_norm": 3.4366050016592697, + "learning_rate": 7.652524534098082e-07, + "loss": 1.0717, + "step": 146103 + }, + { + "epoch": 1.76, + "grad_norm": 14.40440852119545, + "learning_rate": 7.650283438918182e-07, + "loss": 1.3438, + "step": 146106 + }, + { + "epoch": 1.76, + "grad_norm": 8.247529904358473, + "learning_rate": 7.648042658894606e-07, + "loss": 0.9713, + "step": 146109 + }, + { + "epoch": 1.76, + "grad_norm": 10.986617928271064, + "learning_rate": 7.645802194034991e-07, + "loss": 1.4216, + "step": 146112 + }, + { + "epoch": 1.76, + "grad_norm": 9.958144736111437, + "learning_rate": 7.643562044347008e-07, + "loss": 1.3374, + "step": 146115 + }, + { + "epoch": 1.76, + "grad_norm": 25.078856619831978, + "learning_rate": 7.641322209838275e-07, + "loss": 1.1785, + "step": 146118 + }, + { + "epoch": 1.76, + "grad_norm": 7.662250745676925, + "learning_rate": 7.639082690516431e-07, + "loss": 1.4356, + "step": 146121 + }, + { + "epoch": 1.76, + "grad_norm": 4.268423549454019, + "learning_rate": 7.636843486389123e-07, + "loss": 1.123, + "step": 146124 + }, + { + "epoch": 1.76, + "grad_norm": 9.441603923844262, + "learning_rate": 7.634604597464001e-07, + "loss": 1.275, + "step": 146127 + }, + { + "epoch": 1.76, + "grad_norm": 2.8271019414273324, + "learning_rate": 7.632366023748727e-07, + "loss": 1.1567, + "step": 146130 + }, + { + "epoch": 1.76, + "grad_norm": 6.9338647824237345, + "learning_rate": 7.630127765250894e-07, + "loss": 1.665, + "step": 146133 + }, + { + "epoch": 1.76, + "grad_norm": 6.761714973575131, + "learning_rate": 7.627889821978163e-07, + "loss": 0.8219, + "step": 146136 + }, + { + "epoch": 1.76, + "grad_norm": 4.2738453093826845, + "learning_rate": 7.625652193938182e-07, + "loss": 1.3367, + "step": 146139 + }, + { + "epoch": 1.76, + "grad_norm": 2.6763698353257506, + "learning_rate": 7.623414881138569e-07, + "loss": 1.2937, + "step": 146142 + }, + { + "epoch": 1.76, + "grad_norm": 10.120466392104406, + "learning_rate": 7.621177883586984e-07, + "loss": 1.1229, + "step": 146145 + }, + { + "epoch": 1.76, + "grad_norm": 10.749639220490968, + "learning_rate": 7.61894120129103e-07, + "loss": 1.029, + "step": 146148 + }, + { + "epoch": 1.76, + "grad_norm": 9.668913679515413, + "learning_rate": 7.616704834258348e-07, + "loss": 1.3235, + "step": 146151 + }, + { + "epoch": 1.76, + "grad_norm": 15.803720028067053, + "learning_rate": 7.614468782496576e-07, + "loss": 1.4145, + "step": 146154 + }, + { + "epoch": 1.76, + "grad_norm": 4.858291809096745, + "learning_rate": 7.612233046013373e-07, + "loss": 0.9992, + "step": 146157 + }, + { + "epoch": 1.76, + "grad_norm": 25.752482994033254, + "learning_rate": 7.609997624816312e-07, + "loss": 1.782, + "step": 146160 + }, + { + "epoch": 1.76, + "grad_norm": 7.449065687556668, + "learning_rate": 7.607762518913076e-07, + "loss": 1.3335, + "step": 146163 + }, + { + "epoch": 1.76, + "grad_norm": 19.366204756516535, + "learning_rate": 7.605527728311235e-07, + "loss": 1.3316, + "step": 146166 + }, + { + "epoch": 1.76, + "grad_norm": 9.045678888027753, + "learning_rate": 7.603293253018462e-07, + "loss": 1.0117, + "step": 146169 + }, + { + "epoch": 1.76, + "grad_norm": 13.024400365049054, + "learning_rate": 7.601059093042374e-07, + "loss": 1.3419, + "step": 146172 + }, + { + "epoch": 1.76, + "grad_norm": 3.591248166451949, + "learning_rate": 7.598825248390584e-07, + "loss": 1.4199, + "step": 146175 + }, + { + "epoch": 1.76, + "grad_norm": 8.503555638585476, + "learning_rate": 7.596591719070712e-07, + "loss": 1.0075, + "step": 146178 + }, + { + "epoch": 1.76, + "grad_norm": 12.222085305468577, + "learning_rate": 7.594358505090416e-07, + "loss": 1.0301, + "step": 146181 + }, + { + "epoch": 1.76, + "grad_norm": 15.338172067518554, + "learning_rate": 7.592125606457279e-07, + "loss": 1.261, + "step": 146184 + }, + { + "epoch": 1.76, + "grad_norm": 9.590873479974693, + "learning_rate": 7.589893023178918e-07, + "loss": 1.096, + "step": 146187 + }, + { + "epoch": 1.76, + "grad_norm": 17.694039227505854, + "learning_rate": 7.587660755262972e-07, + "loss": 1.3342, + "step": 146190 + }, + { + "epoch": 1.76, + "grad_norm": 12.719526428880306, + "learning_rate": 7.585428802717043e-07, + "loss": 1.5434, + "step": 146193 + }, + { + "epoch": 1.76, + "grad_norm": 10.784733400910646, + "learning_rate": 7.583197165548761e-07, + "loss": 1.0303, + "step": 146196 + }, + { + "epoch": 1.76, + "grad_norm": 8.065127157872437, + "learning_rate": 7.580965843765764e-07, + "loss": 1.4103, + "step": 146199 + }, + { + "epoch": 1.76, + "grad_norm": 17.803163431490876, + "learning_rate": 7.578734837375612e-07, + "loss": 1.2617, + "step": 146202 + }, + { + "epoch": 1.76, + "grad_norm": 2.7431127307268235, + "learning_rate": 7.576504146385977e-07, + "loss": 1.3255, + "step": 146205 + }, + { + "epoch": 1.76, + "grad_norm": 8.635446537716192, + "learning_rate": 7.574273770804419e-07, + "loss": 1.0464, + "step": 146208 + }, + { + "epoch": 1.76, + "grad_norm": 10.1902036714349, + "learning_rate": 7.572043710638588e-07, + "loss": 1.3435, + "step": 146211 + }, + { + "epoch": 1.76, + "grad_norm": 8.208869425706244, + "learning_rate": 7.569813965896067e-07, + "loss": 1.1253, + "step": 146214 + }, + { + "epoch": 1.76, + "grad_norm": 9.27430569307264, + "learning_rate": 7.567584536584482e-07, + "loss": 1.2341, + "step": 146217 + }, + { + "epoch": 1.76, + "grad_norm": 7.954764005974827, + "learning_rate": 7.565355422711429e-07, + "loss": 1.173, + "step": 146220 + }, + { + "epoch": 1.76, + "grad_norm": 7.7239944646524235, + "learning_rate": 7.563126624284545e-07, + "loss": 1.3768, + "step": 146223 + }, + { + "epoch": 1.76, + "grad_norm": 4.237290715432912, + "learning_rate": 7.560898141311401e-07, + "loss": 1.3117, + "step": 146226 + }, + { + "epoch": 1.76, + "grad_norm": 5.200385194934003, + "learning_rate": 7.558669973799615e-07, + "loss": 1.0764, + "step": 146229 + }, + { + "epoch": 1.76, + "grad_norm": 12.023077847326558, + "learning_rate": 7.55644212175678e-07, + "loss": 0.9256, + "step": 146232 + }, + { + "epoch": 1.76, + "grad_norm": 10.22711270735388, + "learning_rate": 7.554214585190523e-07, + "loss": 1.1088, + "step": 146235 + }, + { + "epoch": 1.76, + "grad_norm": 11.878545864093642, + "learning_rate": 7.551987364108438e-07, + "loss": 1.2486, + "step": 146238 + }, + { + "epoch": 1.76, + "grad_norm": 3.359918052664656, + "learning_rate": 7.549760458518107e-07, + "loss": 1.103, + "step": 146241 + }, + { + "epoch": 1.76, + "grad_norm": 8.383837440711332, + "learning_rate": 7.547533868427148e-07, + "loss": 1.0363, + "step": 146244 + }, + { + "epoch": 1.76, + "grad_norm": 2.6504798362288113, + "learning_rate": 7.545307593843166e-07, + "loss": 1.3111, + "step": 146247 + }, + { + "epoch": 1.76, + "grad_norm": 6.2635773933068455, + "learning_rate": 7.54308163477373e-07, + "loss": 1.1608, + "step": 146250 + }, + { + "epoch": 1.76, + "grad_norm": 5.704780077428185, + "learning_rate": 7.540855991226471e-07, + "loss": 1.2405, + "step": 146253 + }, + { + "epoch": 1.76, + "grad_norm": 14.749260732625336, + "learning_rate": 7.538630663208946e-07, + "loss": 0.7827, + "step": 146256 + }, + { + "epoch": 1.76, + "grad_norm": 17.28250562886618, + "learning_rate": 7.536405650728773e-07, + "loss": 1.4783, + "step": 146259 + }, + { + "epoch": 1.76, + "grad_norm": 21.75098548536934, + "learning_rate": 7.534180953793547e-07, + "loss": 1.1115, + "step": 146262 + }, + { + "epoch": 1.76, + "grad_norm": 14.166990583250783, + "learning_rate": 7.53195657241087e-07, + "loss": 1.2249, + "step": 146265 + }, + { + "epoch": 1.76, + "grad_norm": 6.4839820603888265, + "learning_rate": 7.529732506588316e-07, + "loss": 0.944, + "step": 146268 + }, + { + "epoch": 1.76, + "grad_norm": 11.724405676152152, + "learning_rate": 7.527508756333468e-07, + "loss": 1.2071, + "step": 146271 + }, + { + "epoch": 1.76, + "grad_norm": 3.825049117703495, + "learning_rate": 7.525285321653919e-07, + "loss": 1.0383, + "step": 146274 + }, + { + "epoch": 1.76, + "grad_norm": 16.704955902310147, + "learning_rate": 7.523062202557263e-07, + "loss": 1.1009, + "step": 146277 + }, + { + "epoch": 1.76, + "grad_norm": 5.561885505430931, + "learning_rate": 7.520839399051094e-07, + "loss": 1.3576, + "step": 146280 + }, + { + "epoch": 1.76, + "grad_norm": 14.440934542692931, + "learning_rate": 7.518616911142984e-07, + "loss": 0.8407, + "step": 146283 + }, + { + "epoch": 1.76, + "grad_norm": 7.29173464656712, + "learning_rate": 7.516394738840515e-07, + "loss": 1.393, + "step": 146286 + }, + { + "epoch": 1.76, + "grad_norm": 11.031001026826209, + "learning_rate": 7.514172882151294e-07, + "loss": 1.4578, + "step": 146289 + }, + { + "epoch": 1.76, + "grad_norm": 12.305109178427317, + "learning_rate": 7.511951341082901e-07, + "loss": 1.246, + "step": 146292 + }, + { + "epoch": 1.76, + "grad_norm": 13.601332658209332, + "learning_rate": 7.509730115642877e-07, + "loss": 1.4017, + "step": 146295 + }, + { + "epoch": 1.76, + "grad_norm": 13.069221815164438, + "learning_rate": 7.507509205838826e-07, + "loss": 1.0594, + "step": 146298 + }, + { + "epoch": 1.76, + "grad_norm": 4.488965841011806, + "learning_rate": 7.505288611678341e-07, + "loss": 1.2528, + "step": 146301 + }, + { + "epoch": 1.76, + "grad_norm": 5.614843667788787, + "learning_rate": 7.503068333168984e-07, + "loss": 1.0351, + "step": 146304 + }, + { + "epoch": 1.76, + "grad_norm": 12.565076054168058, + "learning_rate": 7.500848370318348e-07, + "loss": 0.7551, + "step": 146307 + }, + { + "epoch": 1.76, + "grad_norm": 3.455413568192998, + "learning_rate": 7.498628723134005e-07, + "loss": 1.1041, + "step": 146310 + }, + { + "epoch": 1.76, + "grad_norm": 6.275367478445375, + "learning_rate": 7.496409391623504e-07, + "loss": 1.1542, + "step": 146313 + }, + { + "epoch": 1.76, + "grad_norm": 2.8303291952023493, + "learning_rate": 7.494190375794441e-07, + "loss": 1.1677, + "step": 146316 + }, + { + "epoch": 1.76, + "grad_norm": 10.12049573891884, + "learning_rate": 7.491971675654397e-07, + "loss": 1.1303, + "step": 146319 + }, + { + "epoch": 1.76, + "grad_norm": 10.056582061424326, + "learning_rate": 7.489753291210922e-07, + "loss": 1.4171, + "step": 146322 + }, + { + "epoch": 1.76, + "grad_norm": 6.985395688023839, + "learning_rate": 7.487535222471598e-07, + "loss": 1.1641, + "step": 146325 + }, + { + "epoch": 1.76, + "grad_norm": 3.9685977797107057, + "learning_rate": 7.485317469443987e-07, + "loss": 1.177, + "step": 146328 + }, + { + "epoch": 1.76, + "grad_norm": 7.458737596059209, + "learning_rate": 7.483100032135693e-07, + "loss": 1.4698, + "step": 146331 + }, + { + "epoch": 1.76, + "grad_norm": 35.60494494427201, + "learning_rate": 7.480882910554244e-07, + "loss": 1.2178, + "step": 146334 + }, + { + "epoch": 1.76, + "grad_norm": 5.332804112931488, + "learning_rate": 7.4786661047072e-07, + "loss": 1.1713, + "step": 146337 + }, + { + "epoch": 1.76, + "grad_norm": 2.551591348948536, + "learning_rate": 7.476449614602144e-07, + "loss": 1.0055, + "step": 146340 + }, + { + "epoch": 1.76, + "grad_norm": 20.070310051952895, + "learning_rate": 7.474233440246637e-07, + "loss": 1.1474, + "step": 146343 + }, + { + "epoch": 1.76, + "grad_norm": 5.09214334055831, + "learning_rate": 7.472017581648261e-07, + "loss": 1.1002, + "step": 146346 + }, + { + "epoch": 1.76, + "grad_norm": 6.40880289150006, + "learning_rate": 7.469802038814544e-07, + "loss": 1.2363, + "step": 146349 + }, + { + "epoch": 1.76, + "grad_norm": 7.428874023087985, + "learning_rate": 7.46758681175308e-07, + "loss": 1.1166, + "step": 146352 + }, + { + "epoch": 1.76, + "grad_norm": 3.648637100606, + "learning_rate": 7.465371900471397e-07, + "loss": 0.81, + "step": 146355 + }, + { + "epoch": 1.76, + "grad_norm": 12.104573182444357, + "learning_rate": 7.463157304977064e-07, + "loss": 1.2647, + "step": 146358 + }, + { + "epoch": 1.76, + "grad_norm": 7.81118163219708, + "learning_rate": 7.460943025277667e-07, + "loss": 1.0209, + "step": 146361 + }, + { + "epoch": 1.76, + "grad_norm": 10.095419061767872, + "learning_rate": 7.458729061380721e-07, + "loss": 1.1642, + "step": 146364 + }, + { + "epoch": 1.76, + "grad_norm": 10.780788589879265, + "learning_rate": 7.456515413293797e-07, + "loss": 1.0951, + "step": 146367 + }, + { + "epoch": 1.76, + "grad_norm": 10.832058505737884, + "learning_rate": 7.454302081024445e-07, + "loss": 0.9855, + "step": 146370 + }, + { + "epoch": 1.76, + "grad_norm": 7.051980487769915, + "learning_rate": 7.452089064580259e-07, + "loss": 1.2394, + "step": 146373 + }, + { + "epoch": 1.76, + "grad_norm": 5.246882779497011, + "learning_rate": 7.449876363968744e-07, + "loss": 1.5968, + "step": 146376 + }, + { + "epoch": 1.76, + "grad_norm": 4.383497663717321, + "learning_rate": 7.44766397919745e-07, + "loss": 1.0281, + "step": 146379 + }, + { + "epoch": 1.76, + "grad_norm": 7.591260596605852, + "learning_rate": 7.445451910273949e-07, + "loss": 1.1655, + "step": 146382 + }, + { + "epoch": 1.76, + "grad_norm": 19.361346210992064, + "learning_rate": 7.443240157205778e-07, + "loss": 1.1381, + "step": 146385 + }, + { + "epoch": 1.76, + "grad_norm": 8.011287893617876, + "learning_rate": 7.441028720000498e-07, + "loss": 1.2602, + "step": 146388 + }, + { + "epoch": 1.76, + "grad_norm": 11.675712549623348, + "learning_rate": 7.438817598665637e-07, + "loss": 1.1044, + "step": 146391 + }, + { + "epoch": 1.76, + "grad_norm": 3.2975656609884663, + "learning_rate": 7.436606793208778e-07, + "loss": 1.2426, + "step": 146394 + }, + { + "epoch": 1.76, + "grad_norm": 3.291150555135252, + "learning_rate": 7.434396303637403e-07, + "loss": 1.428, + "step": 146397 + }, + { + "epoch": 1.76, + "grad_norm": 15.783394252744854, + "learning_rate": 7.43218612995913e-07, + "loss": 1.4635, + "step": 146400 + }, + { + "epoch": 1.76, + "grad_norm": 5.161858505439046, + "learning_rate": 7.429976272181427e-07, + "loss": 1.1243, + "step": 146403 + }, + { + "epoch": 1.76, + "grad_norm": 7.36342098992505, + "learning_rate": 7.427766730311881e-07, + "loss": 1.4326, + "step": 146406 + }, + { + "epoch": 1.76, + "grad_norm": 44.03866541697634, + "learning_rate": 7.425557504358016e-07, + "loss": 1.3864, + "step": 146409 + }, + { + "epoch": 1.76, + "grad_norm": 7.036968632109544, + "learning_rate": 7.423348594327384e-07, + "loss": 1.2897, + "step": 146412 + }, + { + "epoch": 1.76, + "grad_norm": 86.6843800051833, + "learning_rate": 7.421140000227533e-07, + "loss": 0.8439, + "step": 146415 + }, + { + "epoch": 1.76, + "grad_norm": 12.751403923748406, + "learning_rate": 7.418931722065992e-07, + "loss": 1.0377, + "step": 146418 + }, + { + "epoch": 1.76, + "grad_norm": 8.59867052693863, + "learning_rate": 7.416723759850263e-07, + "loss": 1.5552, + "step": 146421 + }, + { + "epoch": 1.76, + "grad_norm": 6.727309213784369, + "learning_rate": 7.414516113587911e-07, + "loss": 1.3456, + "step": 146424 + }, + { + "epoch": 1.76, + "grad_norm": 5.975930760316601, + "learning_rate": 7.412308783286481e-07, + "loss": 0.9766, + "step": 146427 + }, + { + "epoch": 1.76, + "grad_norm": 10.725295696532152, + "learning_rate": 7.41010176895347e-07, + "loss": 1.0178, + "step": 146430 + }, + { + "epoch": 1.76, + "grad_norm": 8.611727213942949, + "learning_rate": 7.407895070596438e-07, + "loss": 1.5448, + "step": 146433 + }, + { + "epoch": 1.76, + "grad_norm": 2.296734060718976, + "learning_rate": 7.405688688222923e-07, + "loss": 1.0991, + "step": 146436 + }, + { + "epoch": 1.76, + "grad_norm": 21.735876677448125, + "learning_rate": 7.403482621840419e-07, + "loss": 1.7483, + "step": 146439 + }, + { + "epoch": 1.76, + "grad_norm": 18.35528019379547, + "learning_rate": 7.401276871456497e-07, + "loss": 0.9593, + "step": 146442 + }, + { + "epoch": 1.76, + "grad_norm": 9.31082869409195, + "learning_rate": 7.399071437078653e-07, + "loss": 1.5044, + "step": 146445 + }, + { + "epoch": 1.76, + "grad_norm": 51.47373983835668, + "learning_rate": 7.396866318714412e-07, + "loss": 1.6032, + "step": 146448 + }, + { + "epoch": 1.76, + "grad_norm": 6.388892580350619, + "learning_rate": 7.394661516371315e-07, + "loss": 1.1209, + "step": 146451 + }, + { + "epoch": 1.76, + "grad_norm": 5.666413568776691, + "learning_rate": 7.392457030056899e-07, + "loss": 1.2126, + "step": 146454 + }, + { + "epoch": 1.76, + "grad_norm": 2.586619060738181, + "learning_rate": 7.390252859778657e-07, + "loss": 1.336, + "step": 146457 + }, + { + "epoch": 1.76, + "grad_norm": 7.0811705130915925, + "learning_rate": 7.38804900554414e-07, + "loss": 1.2275, + "step": 146460 + }, + { + "epoch": 1.76, + "grad_norm": 14.082489832734153, + "learning_rate": 7.38584546736083e-07, + "loss": 1.2435, + "step": 146463 + }, + { + "epoch": 1.76, + "grad_norm": 4.871969560910932, + "learning_rate": 7.383642245236277e-07, + "loss": 0.9847, + "step": 146466 + }, + { + "epoch": 1.76, + "grad_norm": 6.233074431788082, + "learning_rate": 7.38143933917801e-07, + "loss": 1.1087, + "step": 146469 + }, + { + "epoch": 1.76, + "grad_norm": 7.845155094652615, + "learning_rate": 7.379236749193508e-07, + "loss": 0.9051, + "step": 146472 + }, + { + "epoch": 1.76, + "grad_norm": 6.0148845987785275, + "learning_rate": 7.377034475290312e-07, + "loss": 1.1541, + "step": 146475 + }, + { + "epoch": 1.76, + "grad_norm": 5.373845582940353, + "learning_rate": 7.374832517475949e-07, + "loss": 0.9371, + "step": 146478 + }, + { + "epoch": 1.76, + "grad_norm": 8.829580866601027, + "learning_rate": 7.372630875757903e-07, + "loss": 1.1663, + "step": 146481 + }, + { + "epoch": 1.76, + "grad_norm": 7.649749187801037, + "learning_rate": 7.370429550143721e-07, + "loss": 0.8069, + "step": 146484 + }, + { + "epoch": 1.76, + "grad_norm": 8.291626200532459, + "learning_rate": 7.368228540640876e-07, + "loss": 1.1544, + "step": 146487 + }, + { + "epoch": 1.76, + "grad_norm": 8.979957570064435, + "learning_rate": 7.366027847256907e-07, + "loss": 0.92, + "step": 146490 + }, + { + "epoch": 1.76, + "grad_norm": 3.4478462018199645, + "learning_rate": 7.363827469999319e-07, + "loss": 1.2982, + "step": 146493 + }, + { + "epoch": 1.76, + "grad_norm": 12.389320278815152, + "learning_rate": 7.361627408875638e-07, + "loss": 1.216, + "step": 146496 + }, + { + "epoch": 1.76, + "grad_norm": 9.526758549232813, + "learning_rate": 7.359427663893326e-07, + "loss": 1.0869, + "step": 146499 + }, + { + "epoch": 1.76, + "grad_norm": 8.260934246755133, + "learning_rate": 7.357228235059955e-07, + "loss": 1.2542, + "step": 146502 + }, + { + "epoch": 1.76, + "grad_norm": 10.271607271316487, + "learning_rate": 7.355029122382962e-07, + "loss": 0.9818, + "step": 146505 + }, + { + "epoch": 1.76, + "grad_norm": 9.842404829521266, + "learning_rate": 7.352830325869908e-07, + "loss": 1.5001, + "step": 146508 + }, + { + "epoch": 1.76, + "grad_norm": 9.158746913340066, + "learning_rate": 7.350631845528255e-07, + "loss": 0.9248, + "step": 146511 + }, + { + "epoch": 1.76, + "grad_norm": 4.498630906356309, + "learning_rate": 7.348433681365519e-07, + "loss": 1.4894, + "step": 146514 + }, + { + "epoch": 1.76, + "grad_norm": 7.570827247032129, + "learning_rate": 7.346235833389204e-07, + "loss": 1.1189, + "step": 146517 + }, + { + "epoch": 1.76, + "grad_norm": 17.145737095284098, + "learning_rate": 7.344038301606826e-07, + "loss": 1.2624, + "step": 146520 + }, + { + "epoch": 1.76, + "grad_norm": 5.176007565129672, + "learning_rate": 7.341841086025869e-07, + "loss": 1.3876, + "step": 146523 + }, + { + "epoch": 1.76, + "grad_norm": 10.649425581287234, + "learning_rate": 7.339644186653849e-07, + "loss": 0.8154, + "step": 146526 + }, + { + "epoch": 1.76, + "grad_norm": 2.850797990231984, + "learning_rate": 7.337447603498227e-07, + "loss": 1.0288, + "step": 146529 + }, + { + "epoch": 1.76, + "grad_norm": 4.942543712988733, + "learning_rate": 7.335251336566518e-07, + "loss": 1.3729, + "step": 146532 + }, + { + "epoch": 1.76, + "grad_norm": 8.660665604370323, + "learning_rate": 7.333055385866228e-07, + "loss": 1.2224, + "step": 146535 + }, + { + "epoch": 1.76, + "grad_norm": 9.466623261119564, + "learning_rate": 7.330859751404828e-07, + "loss": 1.0513, + "step": 146538 + }, + { + "epoch": 1.76, + "grad_norm": 11.019328543904004, + "learning_rate": 7.328664433189836e-07, + "loss": 0.9144, + "step": 146541 + }, + { + "epoch": 1.76, + "grad_norm": 9.551901758753136, + "learning_rate": 7.326469431228734e-07, + "loss": 1.1729, + "step": 146544 + }, + { + "epoch": 1.76, + "grad_norm": 2.87954023173615, + "learning_rate": 7.324274745529003e-07, + "loss": 1.2776, + "step": 146547 + }, + { + "epoch": 1.76, + "grad_norm": 9.27975996509537, + "learning_rate": 7.322080376098151e-07, + "loss": 1.0699, + "step": 146550 + }, + { + "epoch": 1.76, + "grad_norm": 9.224084948046057, + "learning_rate": 7.319886322943648e-07, + "loss": 1.1541, + "step": 146553 + }, + { + "epoch": 1.76, + "grad_norm": 3.6903897343968364, + "learning_rate": 7.317692586072989e-07, + "loss": 0.737, + "step": 146556 + }, + { + "epoch": 1.76, + "grad_norm": 2.571941214973709, + "learning_rate": 7.315499165493656e-07, + "loss": 1.146, + "step": 146559 + }, + { + "epoch": 1.76, + "grad_norm": 12.208308140774047, + "learning_rate": 7.313306061213155e-07, + "loss": 1.2295, + "step": 146562 + }, + { + "epoch": 1.76, + "grad_norm": 9.871150189423032, + "learning_rate": 7.311113273238934e-07, + "loss": 1.6096, + "step": 146565 + }, + { + "epoch": 1.76, + "grad_norm": 5.4742874998949995, + "learning_rate": 7.308920801578522e-07, + "loss": 0.902, + "step": 146568 + }, + { + "epoch": 1.76, + "grad_norm": 7.655452312502587, + "learning_rate": 7.306728646239359e-07, + "loss": 1.1025, + "step": 146571 + }, + { + "epoch": 1.76, + "grad_norm": 4.889049566044703, + "learning_rate": 7.304536807228946e-07, + "loss": 0.9886, + "step": 146574 + }, + { + "epoch": 1.76, + "grad_norm": 11.562795036493817, + "learning_rate": 7.302345284554768e-07, + "loss": 1.3469, + "step": 146577 + }, + { + "epoch": 1.76, + "grad_norm": 6.5126546001014916, + "learning_rate": 7.300154078224286e-07, + "loss": 1.1683, + "step": 146580 + }, + { + "epoch": 1.76, + "grad_norm": 11.030086282561903, + "learning_rate": 7.297963188244983e-07, + "loss": 1.0988, + "step": 146583 + }, + { + "epoch": 1.76, + "grad_norm": 8.69846435585646, + "learning_rate": 7.295772614624363e-07, + "loss": 1.0226, + "step": 146586 + }, + { + "epoch": 1.76, + "grad_norm": 8.161508633558311, + "learning_rate": 7.293582357369878e-07, + "loss": 0.8356, + "step": 146589 + }, + { + "epoch": 1.76, + "grad_norm": 7.802382489319176, + "learning_rate": 7.291392416488985e-07, + "loss": 1.0211, + "step": 146592 + }, + { + "epoch": 1.76, + "grad_norm": 8.226808477952186, + "learning_rate": 7.289202791989181e-07, + "loss": 1.2929, + "step": 146595 + }, + { + "epoch": 1.76, + "grad_norm": 20.36314948160526, + "learning_rate": 7.287013483877925e-07, + "loss": 1.2708, + "step": 146598 + }, + { + "epoch": 1.76, + "grad_norm": 5.150432628401734, + "learning_rate": 7.284824492162701e-07, + "loss": 0.9824, + "step": 146601 + }, + { + "epoch": 1.76, + "grad_norm": 11.103469909342465, + "learning_rate": 7.282635816850991e-07, + "loss": 0.7865, + "step": 146604 + }, + { + "epoch": 1.76, + "grad_norm": 12.930712454295726, + "learning_rate": 7.280447457950235e-07, + "loss": 0.9749, + "step": 146607 + }, + { + "epoch": 1.76, + "grad_norm": 10.425082887977185, + "learning_rate": 7.278259415467925e-07, + "loss": 1.1101, + "step": 146610 + }, + { + "epoch": 1.76, + "grad_norm": 13.627026321431504, + "learning_rate": 7.276071689411502e-07, + "loss": 1.3523, + "step": 146613 + }, + { + "epoch": 1.76, + "grad_norm": 4.8397430581850545, + "learning_rate": 7.273884279788468e-07, + "loss": 1.1638, + "step": 146616 + }, + { + "epoch": 1.76, + "grad_norm": 8.067127995485842, + "learning_rate": 7.271697186606253e-07, + "loss": 0.8347, + "step": 146619 + }, + { + "epoch": 1.76, + "grad_norm": 36.237943247571074, + "learning_rate": 7.269510409872327e-07, + "loss": 1.1272, + "step": 146622 + }, + { + "epoch": 1.76, + "grad_norm": 9.065345523003712, + "learning_rate": 7.267323949594163e-07, + "loss": 1.5321, + "step": 146625 + }, + { + "epoch": 1.76, + "grad_norm": 8.796187994959052, + "learning_rate": 7.265137805779243e-07, + "loss": 1.3351, + "step": 146628 + }, + { + "epoch": 1.76, + "grad_norm": 7.86962008823181, + "learning_rate": 7.262951978434995e-07, + "loss": 1.4008, + "step": 146631 + }, + { + "epoch": 1.76, + "grad_norm": 4.8522542756431974, + "learning_rate": 7.26076646756888e-07, + "loss": 1.1292, + "step": 146634 + }, + { + "epoch": 1.76, + "grad_norm": 12.707407787688474, + "learning_rate": 7.258581273188359e-07, + "loss": 1.5307, + "step": 146637 + }, + { + "epoch": 1.76, + "grad_norm": 4.295461550969567, + "learning_rate": 7.256396395300891e-07, + "loss": 1.0743, + "step": 146640 + }, + { + "epoch": 1.76, + "grad_norm": 3.736434148933622, + "learning_rate": 7.25421183391396e-07, + "loss": 1.1617, + "step": 146643 + }, + { + "epoch": 1.76, + "grad_norm": 16.51317511331752, + "learning_rate": 7.252027589034982e-07, + "loss": 1.1769, + "step": 146646 + }, + { + "epoch": 1.76, + "grad_norm": 7.55203632149561, + "learning_rate": 7.249843660671418e-07, + "loss": 0.9128, + "step": 146649 + }, + { + "epoch": 1.76, + "grad_norm": 3.4657705934395198, + "learning_rate": 7.247660048830751e-07, + "loss": 1.3695, + "step": 146652 + }, + { + "epoch": 1.76, + "grad_norm": 10.144550953937806, + "learning_rate": 7.245476753520398e-07, + "loss": 1.1531, + "step": 146655 + }, + { + "epoch": 1.76, + "grad_norm": 10.288520943370656, + "learning_rate": 7.243293774747839e-07, + "loss": 0.9695, + "step": 146658 + }, + { + "epoch": 1.76, + "grad_norm": 8.653838196547166, + "learning_rate": 7.241111112520483e-07, + "loss": 1.0654, + "step": 146661 + }, + { + "epoch": 1.76, + "grad_norm": 15.924322939928745, + "learning_rate": 7.23892876684581e-07, + "loss": 1.017, + "step": 146664 + }, + { + "epoch": 1.76, + "grad_norm": 3.929250177293461, + "learning_rate": 7.236746737731259e-07, + "loss": 0.9609, + "step": 146667 + }, + { + "epoch": 1.76, + "grad_norm": 6.992188447626803, + "learning_rate": 7.234565025184293e-07, + "loss": 1.0685, + "step": 146670 + }, + { + "epoch": 1.76, + "grad_norm": 10.639474632372274, + "learning_rate": 7.232383629212348e-07, + "loss": 0.8748, + "step": 146673 + }, + { + "epoch": 1.76, + "grad_norm": 3.1759928942159075, + "learning_rate": 7.23020254982284e-07, + "loss": 1.1594, + "step": 146676 + }, + { + "epoch": 1.76, + "grad_norm": 3.7892665941657078, + "learning_rate": 7.228021787023231e-07, + "loss": 1.2222, + "step": 146679 + }, + { + "epoch": 1.76, + "grad_norm": 9.624647840712539, + "learning_rate": 7.225841340820983e-07, + "loss": 0.8889, + "step": 146682 + }, + { + "epoch": 1.76, + "grad_norm": 9.648375287104615, + "learning_rate": 7.223661211223521e-07, + "loss": 1.2275, + "step": 146685 + }, + { + "epoch": 1.76, + "grad_norm": 6.290706443129766, + "learning_rate": 7.221481398238283e-07, + "loss": 0.9894, + "step": 146688 + }, + { + "epoch": 1.76, + "grad_norm": 8.186435861760517, + "learning_rate": 7.219301901872699e-07, + "loss": 1.5245, + "step": 146691 + }, + { + "epoch": 1.76, + "grad_norm": 3.897091112048582, + "learning_rate": 7.21712272213424e-07, + "loss": 1.0609, + "step": 146694 + }, + { + "epoch": 1.76, + "grad_norm": 7.443719213902566, + "learning_rate": 7.214943859030322e-07, + "loss": 1.1695, + "step": 146697 + }, + { + "epoch": 1.76, + "grad_norm": 32.475488023619654, + "learning_rate": 7.212765312568359e-07, + "loss": 1.1577, + "step": 146700 + }, + { + "epoch": 1.76, + "grad_norm": 9.092505418511895, + "learning_rate": 7.210587082755815e-07, + "loss": 1.395, + "step": 146703 + }, + { + "epoch": 1.76, + "grad_norm": 15.28289649166688, + "learning_rate": 7.208409169600105e-07, + "loss": 0.9251, + "step": 146706 + }, + { + "epoch": 1.76, + "grad_norm": 5.119348311096061, + "learning_rate": 7.206231573108669e-07, + "loss": 1.0935, + "step": 146709 + }, + { + "epoch": 1.76, + "grad_norm": 4.6690283668880825, + "learning_rate": 7.204054293288965e-07, + "loss": 1.0472, + "step": 146712 + }, + { + "epoch": 1.76, + "grad_norm": 8.79227341100773, + "learning_rate": 7.201877330148399e-07, + "loss": 0.8977, + "step": 146715 + }, + { + "epoch": 1.76, + "grad_norm": 9.397665077904541, + "learning_rate": 7.199700683694377e-07, + "loss": 1.4394, + "step": 146718 + }, + { + "epoch": 1.76, + "grad_norm": 8.87786211523274, + "learning_rate": 7.19752435393436e-07, + "loss": 1.2555, + "step": 146721 + }, + { + "epoch": 1.76, + "grad_norm": 3.580456507255543, + "learning_rate": 7.195348340875774e-07, + "loss": 1.1043, + "step": 146724 + }, + { + "epoch": 1.76, + "grad_norm": 12.619725715338433, + "learning_rate": 7.193172644526015e-07, + "loss": 1.296, + "step": 146727 + }, + { + "epoch": 1.76, + "grad_norm": 18.292034535974132, + "learning_rate": 7.19099726489254e-07, + "loss": 0.8918, + "step": 146730 + }, + { + "epoch": 1.76, + "grad_norm": 8.498932953181537, + "learning_rate": 7.188822201982759e-07, + "loss": 1.0298, + "step": 146733 + }, + { + "epoch": 1.76, + "grad_norm": 15.140856580688524, + "learning_rate": 7.186647455804108e-07, + "loss": 1.0419, + "step": 146736 + }, + { + "epoch": 1.76, + "grad_norm": 6.91395333341787, + "learning_rate": 7.184473026364002e-07, + "loss": 1.327, + "step": 146739 + }, + { + "epoch": 1.76, + "grad_norm": 9.902603584329613, + "learning_rate": 7.182298913669839e-07, + "loss": 1.2076, + "step": 146742 + }, + { + "epoch": 1.76, + "grad_norm": 24.22009048461041, + "learning_rate": 7.180125117729065e-07, + "loss": 1.2529, + "step": 146745 + }, + { + "epoch": 1.76, + "grad_norm": 8.78600426512469, + "learning_rate": 7.177951638549075e-07, + "loss": 0.9913, + "step": 146748 + }, + { + "epoch": 1.76, + "grad_norm": 4.138298777283315, + "learning_rate": 7.17577847613733e-07, + "loss": 1.0231, + "step": 146751 + }, + { + "epoch": 1.76, + "grad_norm": 7.252928318850562, + "learning_rate": 7.173605630501201e-07, + "loss": 1.1616, + "step": 146754 + }, + { + "epoch": 1.76, + "grad_norm": 7.132844027395908, + "learning_rate": 7.171433101648117e-07, + "loss": 0.9273, + "step": 146757 + }, + { + "epoch": 1.76, + "grad_norm": 22.135777442946303, + "learning_rate": 7.169260889585505e-07, + "loss": 1.087, + "step": 146760 + }, + { + "epoch": 1.76, + "grad_norm": 7.0200885924761325, + "learning_rate": 7.167088994320759e-07, + "loss": 1.2967, + "step": 146763 + }, + { + "epoch": 1.76, + "grad_norm": 6.691870203735259, + "learning_rate": 7.164917415861317e-07, + "loss": 1.0316, + "step": 146766 + }, + { + "epoch": 1.76, + "grad_norm": 9.535153397688632, + "learning_rate": 7.162746154214561e-07, + "loss": 1.4513, + "step": 146769 + }, + { + "epoch": 1.76, + "grad_norm": 6.90271169475037, + "learning_rate": 7.16057520938791e-07, + "loss": 0.9608, + "step": 146772 + }, + { + "epoch": 1.76, + "grad_norm": 26.265893648944967, + "learning_rate": 7.158404581388767e-07, + "loss": 1.0765, + "step": 146775 + }, + { + "epoch": 1.76, + "grad_norm": 17.515901674103567, + "learning_rate": 7.156234270224571e-07, + "loss": 1.2081, + "step": 146778 + }, + { + "epoch": 1.76, + "grad_norm": 14.474804578425127, + "learning_rate": 7.154064275902706e-07, + "loss": 0.9497, + "step": 146781 + }, + { + "epoch": 1.77, + "grad_norm": 8.32170175961171, + "learning_rate": 7.151894598430553e-07, + "loss": 1.0976, + "step": 146784 + }, + { + "epoch": 1.77, + "grad_norm": 6.04153764443041, + "learning_rate": 7.149725237815553e-07, + "loss": 1.0259, + "step": 146787 + }, + { + "epoch": 1.77, + "grad_norm": 7.363780381557726, + "learning_rate": 7.147556194065087e-07, + "loss": 0.9443, + "step": 146790 + }, + { + "epoch": 1.77, + "grad_norm": 7.391811791008054, + "learning_rate": 7.145387467186582e-07, + "loss": 1.0774, + "step": 146793 + }, + { + "epoch": 1.77, + "grad_norm": 2.5158741904171142, + "learning_rate": 7.143219057187412e-07, + "loss": 1.4014, + "step": 146796 + }, + { + "epoch": 1.77, + "grad_norm": 25.855031615100188, + "learning_rate": 7.141050964074981e-07, + "loss": 1.1682, + "step": 146799 + }, + { + "epoch": 1.77, + "grad_norm": 9.1682166602326, + "learning_rate": 7.138883187856716e-07, + "loss": 1.389, + "step": 146802 + }, + { + "epoch": 1.77, + "grad_norm": 6.761284904489475, + "learning_rate": 7.136715728540001e-07, + "loss": 1.0895, + "step": 146805 + }, + { + "epoch": 1.77, + "grad_norm": 7.479516252195646, + "learning_rate": 7.134548586132195e-07, + "loss": 0.9347, + "step": 146808 + }, + { + "epoch": 1.77, + "grad_norm": 21.39956463296161, + "learning_rate": 7.132381760640739e-07, + "loss": 0.86, + "step": 146811 + }, + { + "epoch": 1.77, + "grad_norm": 2.823444816212416, + "learning_rate": 7.130215252073002e-07, + "loss": 1.3837, + "step": 146814 + }, + { + "epoch": 1.77, + "grad_norm": 3.4044550253553307, + "learning_rate": 7.128049060436392e-07, + "loss": 1.1934, + "step": 146817 + }, + { + "epoch": 1.77, + "grad_norm": 23.99879185162705, + "learning_rate": 7.125883185738325e-07, + "loss": 1.0169, + "step": 146820 + }, + { + "epoch": 1.77, + "grad_norm": 11.101589379814572, + "learning_rate": 7.12371762798616e-07, + "loss": 1.2749, + "step": 146823 + }, + { + "epoch": 1.77, + "grad_norm": 7.97189656190641, + "learning_rate": 7.121552387187269e-07, + "loss": 1.2956, + "step": 146826 + }, + { + "epoch": 1.77, + "grad_norm": 7.448117440273817, + "learning_rate": 7.11938746334907e-07, + "loss": 1.3991, + "step": 146829 + }, + { + "epoch": 1.77, + "grad_norm": 3.854014902611095, + "learning_rate": 7.117222856478967e-07, + "loss": 0.8763, + "step": 146832 + }, + { + "epoch": 1.77, + "grad_norm": 4.872262924541139, + "learning_rate": 7.115058566584299e-07, + "loss": 1.1277, + "step": 146835 + }, + { + "epoch": 1.77, + "grad_norm": 12.879417412093463, + "learning_rate": 7.112894593672481e-07, + "loss": 1.1024, + "step": 146838 + }, + { + "epoch": 1.77, + "grad_norm": 3.4618688777221336, + "learning_rate": 7.110730937750909e-07, + "loss": 0.8943, + "step": 146841 + }, + { + "epoch": 1.77, + "grad_norm": 3.817855594364563, + "learning_rate": 7.108567598826955e-07, + "loss": 0.8836, + "step": 146844 + }, + { + "epoch": 1.77, + "grad_norm": 9.112094897893758, + "learning_rate": 7.10640457690801e-07, + "loss": 1.0496, + "step": 146847 + }, + { + "epoch": 1.77, + "grad_norm": 3.584449907863325, + "learning_rate": 7.104241872001427e-07, + "loss": 1.0469, + "step": 146850 + }, + { + "epoch": 1.77, + "grad_norm": 2.159211342371433, + "learning_rate": 7.10207948411461e-07, + "loss": 1.241, + "step": 146853 + }, + { + "epoch": 1.77, + "grad_norm": 4.7089532358877735, + "learning_rate": 7.09991741325492e-07, + "loss": 1.035, + "step": 146856 + }, + { + "epoch": 1.77, + "grad_norm": 13.065081792522287, + "learning_rate": 7.097755659429783e-07, + "loss": 1.2828, + "step": 146859 + }, + { + "epoch": 1.77, + "grad_norm": 9.099957498550264, + "learning_rate": 7.095594222646518e-07, + "loss": 0.91, + "step": 146862 + }, + { + "epoch": 1.77, + "grad_norm": 20.724434689247953, + "learning_rate": 7.093433102912539e-07, + "loss": 1.5818, + "step": 146865 + }, + { + "epoch": 1.77, + "grad_norm": 5.61337496572461, + "learning_rate": 7.091272300235197e-07, + "loss": 1.0697, + "step": 146868 + }, + { + "epoch": 1.77, + "grad_norm": 9.247038216391879, + "learning_rate": 7.089111814621875e-07, + "loss": 1.1937, + "step": 146871 + }, + { + "epoch": 1.77, + "grad_norm": 11.992208245852998, + "learning_rate": 7.086951646079965e-07, + "loss": 1.3679, + "step": 146874 + }, + { + "epoch": 1.77, + "grad_norm": 10.087340335372946, + "learning_rate": 7.084791794616807e-07, + "loss": 1.2975, + "step": 146877 + }, + { + "epoch": 1.77, + "grad_norm": 5.253808063643685, + "learning_rate": 7.082632260239785e-07, + "loss": 1.0292, + "step": 146880 + }, + { + "epoch": 1.77, + "grad_norm": 7.945065179389851, + "learning_rate": 7.080473042956271e-07, + "loss": 1.2333, + "step": 146883 + }, + { + "epoch": 1.77, + "grad_norm": 6.2147115283723044, + "learning_rate": 7.078314142773646e-07, + "loss": 1.0837, + "step": 146886 + }, + { + "epoch": 1.77, + "grad_norm": 7.800812367432161, + "learning_rate": 7.076155559699272e-07, + "loss": 1.026, + "step": 146889 + }, + { + "epoch": 1.77, + "grad_norm": 13.794939288920167, + "learning_rate": 7.073997293740487e-07, + "loss": 1.3954, + "step": 146892 + }, + { + "epoch": 1.77, + "grad_norm": 3.3292391503322625, + "learning_rate": 7.071839344904674e-07, + "loss": 1.0994, + "step": 146895 + }, + { + "epoch": 1.77, + "grad_norm": 6.72977112087601, + "learning_rate": 7.069681713199206e-07, + "loss": 1.1688, + "step": 146898 + }, + { + "epoch": 1.77, + "grad_norm": 7.118400728564789, + "learning_rate": 7.067524398631453e-07, + "loss": 1.4267, + "step": 146901 + }, + { + "epoch": 1.77, + "grad_norm": 10.940142845643585, + "learning_rate": 7.065367401208745e-07, + "loss": 1.1099, + "step": 146904 + }, + { + "epoch": 1.77, + "grad_norm": 5.8410487083579685, + "learning_rate": 7.063210720938496e-07, + "loss": 0.9842, + "step": 146907 + }, + { + "epoch": 1.77, + "grad_norm": 7.369468348032746, + "learning_rate": 7.061054357828001e-07, + "loss": 1.0435, + "step": 146910 + }, + { + "epoch": 1.77, + "grad_norm": 8.827324274168351, + "learning_rate": 7.058898311884676e-07, + "loss": 1.4337, + "step": 146913 + }, + { + "epoch": 1.77, + "grad_norm": 4.017498047548976, + "learning_rate": 7.056742583115827e-07, + "loss": 1.1865, + "step": 146916 + }, + { + "epoch": 1.77, + "grad_norm": 11.938051487647916, + "learning_rate": 7.054587171528848e-07, + "loss": 1.3024, + "step": 146919 + }, + { + "epoch": 1.77, + "grad_norm": 7.649331759265002, + "learning_rate": 7.052432077131089e-07, + "loss": 0.9286, + "step": 146922 + }, + { + "epoch": 1.77, + "grad_norm": 8.26230395462489, + "learning_rate": 7.050277299929886e-07, + "loss": 1.0371, + "step": 146925 + }, + { + "epoch": 1.77, + "grad_norm": 7.079603887896847, + "learning_rate": 7.048122839932625e-07, + "loss": 0.9724, + "step": 146928 + }, + { + "epoch": 1.77, + "grad_norm": 6.9804163309685965, + "learning_rate": 7.045968697146643e-07, + "loss": 1.0877, + "step": 146931 + }, + { + "epoch": 1.77, + "grad_norm": 7.613553161768961, + "learning_rate": 7.04381487157928e-07, + "loss": 0.9733, + "step": 146934 + }, + { + "epoch": 1.77, + "grad_norm": 4.807930953832188, + "learning_rate": 7.041661363237883e-07, + "loss": 0.8828, + "step": 146937 + }, + { + "epoch": 1.77, + "grad_norm": 12.563484180702071, + "learning_rate": 7.039508172129828e-07, + "loss": 0.9054, + "step": 146940 + }, + { + "epoch": 1.77, + "grad_norm": 12.973676719862999, + "learning_rate": 7.037355298262439e-07, + "loss": 1.3711, + "step": 146943 + }, + { + "epoch": 1.77, + "grad_norm": 8.981374487476733, + "learning_rate": 7.035202741643066e-07, + "loss": 1.355, + "step": 146946 + }, + { + "epoch": 1.77, + "grad_norm": 3.626917354489697, + "learning_rate": 7.033050502279082e-07, + "loss": 1.2276, + "step": 146949 + }, + { + "epoch": 1.77, + "grad_norm": 10.394457026028327, + "learning_rate": 7.030898580177781e-07, + "loss": 0.9963, + "step": 146952 + }, + { + "epoch": 1.77, + "grad_norm": 9.4096985327683, + "learning_rate": 7.028746975346567e-07, + "loss": 1.0068, + "step": 146955 + }, + { + "epoch": 1.77, + "grad_norm": 11.136552612228554, + "learning_rate": 7.026595687792726e-07, + "loss": 1.4193, + "step": 146958 + }, + { + "epoch": 1.77, + "grad_norm": 4.783418264566159, + "learning_rate": 7.024444717523626e-07, + "loss": 1.4126, + "step": 146961 + }, + { + "epoch": 1.77, + "grad_norm": 8.914807614153606, + "learning_rate": 7.022294064546609e-07, + "loss": 1.3552, + "step": 146964 + }, + { + "epoch": 1.77, + "grad_norm": 14.095877841973946, + "learning_rate": 7.020143728869034e-07, + "loss": 1.3874, + "step": 146967 + }, + { + "epoch": 1.77, + "grad_norm": 5.330974244496433, + "learning_rate": 7.017993710498195e-07, + "loss": 1.0668, + "step": 146970 + }, + { + "epoch": 1.77, + "grad_norm": 21.222082023875004, + "learning_rate": 7.015844009441464e-07, + "loss": 0.9243, + "step": 146973 + }, + { + "epoch": 1.77, + "grad_norm": 10.16060031120711, + "learning_rate": 7.013694625706147e-07, + "loss": 1.1268, + "step": 146976 + }, + { + "epoch": 1.77, + "grad_norm": 11.628598061527153, + "learning_rate": 7.011545559299604e-07, + "loss": 1.3015, + "step": 146979 + }, + { + "epoch": 1.77, + "grad_norm": 7.376528429423419, + "learning_rate": 7.009396810229174e-07, + "loss": 1.2341, + "step": 146982 + }, + { + "epoch": 1.77, + "grad_norm": 2.960369264917379, + "learning_rate": 7.007248378502163e-07, + "loss": 1.3539, + "step": 146985 + }, + { + "epoch": 1.77, + "grad_norm": 6.560117719162346, + "learning_rate": 7.005100264125919e-07, + "loss": 1.2672, + "step": 146988 + }, + { + "epoch": 1.77, + "grad_norm": 6.755829173997637, + "learning_rate": 7.002952467107782e-07, + "loss": 1.3818, + "step": 146991 + }, + { + "epoch": 1.77, + "grad_norm": 19.915960939192896, + "learning_rate": 7.000804987455079e-07, + "loss": 1.0881, + "step": 146994 + }, + { + "epoch": 1.77, + "grad_norm": 4.772699890556492, + "learning_rate": 6.998657825175114e-07, + "loss": 1.6799, + "step": 146997 + }, + { + "epoch": 1.77, + "grad_norm": 3.070223271362713, + "learning_rate": 6.996510980275217e-07, + "loss": 1.0735, + "step": 147000 + }, + { + "epoch": 1.77, + "grad_norm": 7.89374438235954, + "learning_rate": 6.994364452762747e-07, + "loss": 1.0355, + "step": 147003 + }, + { + "epoch": 1.77, + "grad_norm": 5.470943160020151, + "learning_rate": 6.992218242644999e-07, + "loss": 0.9887, + "step": 147006 + }, + { + "epoch": 1.77, + "grad_norm": 3.990764140200622, + "learning_rate": 6.990072349929322e-07, + "loss": 1.315, + "step": 147009 + }, + { + "epoch": 1.77, + "grad_norm": 7.619208627969656, + "learning_rate": 6.987926774623022e-07, + "loss": 1.2205, + "step": 147012 + }, + { + "epoch": 1.77, + "grad_norm": 3.9581949907227316, + "learning_rate": 6.985781516733437e-07, + "loss": 1.3442, + "step": 147015 + }, + { + "epoch": 1.77, + "grad_norm": 3.831720311129863, + "learning_rate": 6.983636576267849e-07, + "loss": 1.1068, + "step": 147018 + }, + { + "epoch": 1.77, + "grad_norm": 5.074840990750613, + "learning_rate": 6.981491953233633e-07, + "loss": 1.1302, + "step": 147021 + }, + { + "epoch": 1.77, + "grad_norm": 12.569573532953266, + "learning_rate": 6.979347647638057e-07, + "loss": 0.9039, + "step": 147024 + }, + { + "epoch": 1.77, + "grad_norm": 13.047541537114569, + "learning_rate": 6.977203659488474e-07, + "loss": 1.6148, + "step": 147027 + }, + { + "epoch": 1.77, + "grad_norm": 6.260847745119375, + "learning_rate": 6.975059988792177e-07, + "loss": 1.0505, + "step": 147030 + }, + { + "epoch": 1.77, + "grad_norm": 4.7210365648307135, + "learning_rate": 6.972916635556515e-07, + "loss": 1.0505, + "step": 147033 + }, + { + "epoch": 1.77, + "grad_norm": 9.461135823673036, + "learning_rate": 6.970773599788761e-07, + "loss": 1.2133, + "step": 147036 + }, + { + "epoch": 1.77, + "grad_norm": 4.36256766978962, + "learning_rate": 6.968630881496263e-07, + "loss": 1.0562, + "step": 147039 + }, + { + "epoch": 1.77, + "grad_norm": 10.380544912612782, + "learning_rate": 6.966488480686296e-07, + "loss": 1.2167, + "step": 147042 + }, + { + "epoch": 1.77, + "grad_norm": 10.534836761037425, + "learning_rate": 6.964346397366206e-07, + "loss": 1.5021, + "step": 147045 + }, + { + "epoch": 1.77, + "grad_norm": 5.956891986594077, + "learning_rate": 6.962204631543302e-07, + "loss": 1.1488, + "step": 147048 + }, + { + "epoch": 1.77, + "grad_norm": 3.4041931789546744, + "learning_rate": 6.960063183224863e-07, + "loss": 1.2414, + "step": 147051 + }, + { + "epoch": 1.77, + "grad_norm": 12.037884516315914, + "learning_rate": 6.957922052418209e-07, + "loss": 1.0844, + "step": 147054 + }, + { + "epoch": 1.77, + "grad_norm": 9.411846235076828, + "learning_rate": 6.955781239130676e-07, + "loss": 1.1234, + "step": 147057 + }, + { + "epoch": 1.77, + "grad_norm": 5.582279056393673, + "learning_rate": 6.953640743369539e-07, + "loss": 1.2865, + "step": 147060 + }, + { + "epoch": 1.77, + "grad_norm": 17.406684939513404, + "learning_rate": 6.951500565142122e-07, + "loss": 1.1567, + "step": 147063 + }, + { + "epoch": 1.77, + "grad_norm": 8.593365343136641, + "learning_rate": 6.949360704455699e-07, + "loss": 1.338, + "step": 147066 + }, + { + "epoch": 1.77, + "grad_norm": 21.91576059432326, + "learning_rate": 6.947221161317586e-07, + "loss": 1.1844, + "step": 147069 + }, + { + "epoch": 1.77, + "grad_norm": 6.369366132980592, + "learning_rate": 6.945081935735099e-07, + "loss": 1.1652, + "step": 147072 + }, + { + "epoch": 1.77, + "grad_norm": 9.06300204322508, + "learning_rate": 6.942943027715543e-07, + "loss": 0.6533, + "step": 147075 + }, + { + "epoch": 1.77, + "grad_norm": 7.369365935578342, + "learning_rate": 6.940804437266179e-07, + "loss": 1.4335, + "step": 147078 + }, + { + "epoch": 1.77, + "grad_norm": 12.58419438667421, + "learning_rate": 6.938666164394359e-07, + "loss": 1.0131, + "step": 147081 + }, + { + "epoch": 1.77, + "grad_norm": 5.075855381898473, + "learning_rate": 6.93652820910733e-07, + "loss": 1.1199, + "step": 147084 + }, + { + "epoch": 1.77, + "grad_norm": 9.8447123879475, + "learning_rate": 6.934390571412409e-07, + "loss": 1.0636, + "step": 147087 + }, + { + "epoch": 1.77, + "grad_norm": 10.181694999985027, + "learning_rate": 6.932253251316901e-07, + "loss": 1.2795, + "step": 147090 + }, + { + "epoch": 1.77, + "grad_norm": 6.646229830960446, + "learning_rate": 6.930116248828078e-07, + "loss": 1.0813, + "step": 147093 + }, + { + "epoch": 1.77, + "grad_norm": 6.543844704494521, + "learning_rate": 6.927979563953246e-07, + "loss": 1.136, + "step": 147096 + }, + { + "epoch": 1.77, + "grad_norm": 7.157555633908409, + "learning_rate": 6.925843196699711e-07, + "loss": 0.9683, + "step": 147099 + }, + { + "epoch": 1.77, + "grad_norm": 6.397359713988653, + "learning_rate": 6.923707147074743e-07, + "loss": 0.9446, + "step": 147102 + }, + { + "epoch": 1.77, + "grad_norm": 5.938221215096167, + "learning_rate": 6.921571415085626e-07, + "loss": 1.2356, + "step": 147105 + }, + { + "epoch": 1.77, + "grad_norm": 7.158277183446151, + "learning_rate": 6.919436000739666e-07, + "loss": 1.2214, + "step": 147108 + }, + { + "epoch": 1.77, + "grad_norm": 3.789643446271499, + "learning_rate": 6.917300904044133e-07, + "loss": 1.1823, + "step": 147111 + }, + { + "epoch": 1.77, + "grad_norm": 4.844124934771568, + "learning_rate": 6.915166125006322e-07, + "loss": 0.9438, + "step": 147114 + }, + { + "epoch": 1.77, + "grad_norm": 10.773456405914851, + "learning_rate": 6.91303166363354e-07, + "loss": 0.9497, + "step": 147117 + }, + { + "epoch": 1.77, + "grad_norm": 12.13925727294965, + "learning_rate": 6.910897519933035e-07, + "loss": 1.0749, + "step": 147120 + }, + { + "epoch": 1.77, + "grad_norm": 7.552680652408561, + "learning_rate": 6.908763693912124e-07, + "loss": 1.113, + "step": 147123 + }, + { + "epoch": 1.77, + "grad_norm": 5.814795873622879, + "learning_rate": 6.906630185578056e-07, + "loss": 1.0289, + "step": 147126 + }, + { + "epoch": 1.77, + "grad_norm": 5.243423898787986, + "learning_rate": 6.904496994938137e-07, + "loss": 1.0628, + "step": 147129 + }, + { + "epoch": 1.77, + "grad_norm": 12.024468719070407, + "learning_rate": 6.902364121999627e-07, + "loss": 1.0739, + "step": 147132 + }, + { + "epoch": 1.77, + "grad_norm": 3.9653392098051787, + "learning_rate": 6.90023156676981e-07, + "loss": 1.2684, + "step": 147135 + }, + { + "epoch": 1.77, + "grad_norm": 5.957220272095708, + "learning_rate": 6.898099329255969e-07, + "loss": 1.2121, + "step": 147138 + }, + { + "epoch": 1.77, + "grad_norm": 4.022453877877015, + "learning_rate": 6.895967409465398e-07, + "loss": 1.2808, + "step": 147141 + }, + { + "epoch": 1.77, + "grad_norm": 9.351029816976759, + "learning_rate": 6.893835807405358e-07, + "loss": 1.0358, + "step": 147144 + }, + { + "epoch": 1.77, + "grad_norm": 7.055948251091637, + "learning_rate": 6.891704523083098e-07, + "loss": 1.2777, + "step": 147147 + }, + { + "epoch": 1.77, + "grad_norm": 2.9323416785910683, + "learning_rate": 6.889573556505913e-07, + "loss": 1.0991, + "step": 147150 + }, + { + "epoch": 1.77, + "grad_norm": 5.466835001774864, + "learning_rate": 6.887442907681085e-07, + "loss": 0.835, + "step": 147153 + }, + { + "epoch": 1.77, + "grad_norm": 6.458050458155115, + "learning_rate": 6.885312576615877e-07, + "loss": 0.8018, + "step": 147156 + }, + { + "epoch": 1.77, + "grad_norm": 18.585370522372692, + "learning_rate": 6.883182563317547e-07, + "loss": 1.3952, + "step": 147159 + }, + { + "epoch": 1.77, + "grad_norm": 6.634743743912077, + "learning_rate": 6.881052867793381e-07, + "loss": 1.4795, + "step": 147162 + }, + { + "epoch": 1.77, + "grad_norm": 8.770950640929598, + "learning_rate": 6.878923490050648e-07, + "loss": 0.946, + "step": 147165 + }, + { + "epoch": 1.77, + "grad_norm": 10.203204906999268, + "learning_rate": 6.876794430096589e-07, + "loss": 1.4396, + "step": 147168 + }, + { + "epoch": 1.77, + "grad_norm": 5.905973860787874, + "learning_rate": 6.874665687938509e-07, + "loss": 0.9581, + "step": 147171 + }, + { + "epoch": 1.77, + "grad_norm": 4.331918132600442, + "learning_rate": 6.872537263583634e-07, + "loss": 1.23, + "step": 147174 + }, + { + "epoch": 1.77, + "grad_norm": 15.684228943894915, + "learning_rate": 6.870409157039249e-07, + "loss": 0.9181, + "step": 147177 + }, + { + "epoch": 1.77, + "grad_norm": 11.251873095866566, + "learning_rate": 6.868281368312612e-07, + "loss": 1.1235, + "step": 147180 + }, + { + "epoch": 1.77, + "grad_norm": 14.483514521003032, + "learning_rate": 6.866153897410999e-07, + "loss": 0.9546, + "step": 147183 + }, + { + "epoch": 1.77, + "grad_norm": 8.284379403457462, + "learning_rate": 6.864026744341657e-07, + "loss": 0.9923, + "step": 147186 + }, + { + "epoch": 1.77, + "grad_norm": 6.480306630972082, + "learning_rate": 6.861899909111825e-07, + "loss": 1.064, + "step": 147189 + }, + { + "epoch": 1.77, + "grad_norm": 4.913532604854846, + "learning_rate": 6.859773391728775e-07, + "loss": 1.0512, + "step": 147192 + }, + { + "epoch": 1.77, + "grad_norm": 3.945014774737653, + "learning_rate": 6.857647192199779e-07, + "loss": 1.1155, + "step": 147195 + }, + { + "epoch": 1.77, + "grad_norm": 7.586516665341786, + "learning_rate": 6.855521310532098e-07, + "loss": 1.2078, + "step": 147198 + }, + { + "epoch": 1.77, + "grad_norm": 8.232502392207087, + "learning_rate": 6.85339574673295e-07, + "loss": 1.188, + "step": 147201 + }, + { + "epoch": 1.77, + "grad_norm": 3.7333629640911026, + "learning_rate": 6.851270500809615e-07, + "loss": 1.0463, + "step": 147204 + }, + { + "epoch": 1.77, + "grad_norm": 9.117438478408816, + "learning_rate": 6.849145572769367e-07, + "loss": 0.9055, + "step": 147207 + }, + { + "epoch": 1.77, + "grad_norm": 17.429770169676786, + "learning_rate": 6.847020962619421e-07, + "loss": 1.0029, + "step": 147210 + }, + { + "epoch": 1.77, + "grad_norm": 21.559252624743973, + "learning_rate": 6.84489667036703e-07, + "loss": 0.9004, + "step": 147213 + }, + { + "epoch": 1.77, + "grad_norm": 6.896227720202131, + "learning_rate": 6.842772696019439e-07, + "loss": 1.5912, + "step": 147216 + }, + { + "epoch": 1.77, + "grad_norm": 5.16435694061433, + "learning_rate": 6.840649039583925e-07, + "loss": 1.1826, + "step": 147219 + }, + { + "epoch": 1.77, + "grad_norm": 3.6338249160742264, + "learning_rate": 6.838525701067711e-07, + "loss": 1.1489, + "step": 147222 + }, + { + "epoch": 1.77, + "grad_norm": 12.737873370656082, + "learning_rate": 6.836402680478071e-07, + "loss": 1.2908, + "step": 147225 + }, + { + "epoch": 1.77, + "grad_norm": 29.727130059699668, + "learning_rate": 6.834279977822234e-07, + "loss": 1.155, + "step": 147228 + }, + { + "epoch": 1.77, + "grad_norm": 15.684299237114413, + "learning_rate": 6.832157593107425e-07, + "loss": 1.1799, + "step": 147231 + }, + { + "epoch": 1.77, + "grad_norm": 23.27721608649514, + "learning_rate": 6.830035526340894e-07, + "loss": 1.7346, + "step": 147234 + }, + { + "epoch": 1.77, + "grad_norm": 11.90246492515721, + "learning_rate": 6.827913777529915e-07, + "loss": 1.1973, + "step": 147237 + }, + { + "epoch": 1.77, + "grad_norm": 4.217863005746227, + "learning_rate": 6.825792346681692e-07, + "loss": 0.8306, + "step": 147240 + }, + { + "epoch": 1.77, + "grad_norm": 23.677034668848737, + "learning_rate": 6.823671233803475e-07, + "loss": 1.2133, + "step": 147243 + }, + { + "epoch": 1.77, + "grad_norm": 57.057005689767145, + "learning_rate": 6.821550438902513e-07, + "loss": 1.0722, + "step": 147246 + }, + { + "epoch": 1.77, + "grad_norm": 8.71905810292201, + "learning_rate": 6.819429961986045e-07, + "loss": 1.0997, + "step": 147249 + }, + { + "epoch": 1.77, + "grad_norm": 5.599342977833132, + "learning_rate": 6.8173098030613e-07, + "loss": 1.0708, + "step": 147252 + }, + { + "epoch": 1.77, + "grad_norm": 6.924132601462022, + "learning_rate": 6.815189962135493e-07, + "loss": 1.0286, + "step": 147255 + }, + { + "epoch": 1.77, + "grad_norm": 11.199919534529075, + "learning_rate": 6.813070439215885e-07, + "loss": 1.1327, + "step": 147258 + }, + { + "epoch": 1.77, + "grad_norm": 16.050819065872375, + "learning_rate": 6.810951234309704e-07, + "loss": 1.2071, + "step": 147261 + }, + { + "epoch": 1.77, + "grad_norm": 10.400511400702518, + "learning_rate": 6.808832347424188e-07, + "loss": 1.2261, + "step": 147264 + }, + { + "epoch": 1.77, + "grad_norm": 9.793702116589833, + "learning_rate": 6.806713778566542e-07, + "loss": 1.2105, + "step": 147267 + }, + { + "epoch": 1.77, + "grad_norm": 8.89444624371977, + "learning_rate": 6.80459552774404e-07, + "loss": 1.1784, + "step": 147270 + }, + { + "epoch": 1.77, + "grad_norm": 15.732031238336585, + "learning_rate": 6.802477594963863e-07, + "loss": 1.0513, + "step": 147273 + }, + { + "epoch": 1.77, + "grad_norm": 8.181411350710313, + "learning_rate": 6.800359980233262e-07, + "loss": 1.151, + "step": 147276 + }, + { + "epoch": 1.77, + "grad_norm": 5.800702847642282, + "learning_rate": 6.798242683559475e-07, + "loss": 1.161, + "step": 147279 + }, + { + "epoch": 1.77, + "grad_norm": 2.8571371598187785, + "learning_rate": 6.796125704949696e-07, + "loss": 1.0322, + "step": 147282 + }, + { + "epoch": 1.77, + "grad_norm": 3.640982503338875, + "learning_rate": 6.794009044411176e-07, + "loss": 1.173, + "step": 147285 + }, + { + "epoch": 1.77, + "grad_norm": 6.752949046505073, + "learning_rate": 6.79189270195113e-07, + "loss": 0.9474, + "step": 147288 + }, + { + "epoch": 1.77, + "grad_norm": 5.330432738382007, + "learning_rate": 6.789776677576787e-07, + "loss": 1.2924, + "step": 147291 + }, + { + "epoch": 1.77, + "grad_norm": 14.519715369189074, + "learning_rate": 6.787660971295373e-07, + "loss": 1.4717, + "step": 147294 + }, + { + "epoch": 1.77, + "grad_norm": 3.3434867639737655, + "learning_rate": 6.785545583114073e-07, + "loss": 1.118, + "step": 147297 + }, + { + "epoch": 1.77, + "grad_norm": 3.1921229495018744, + "learning_rate": 6.783430513040135e-07, + "loss": 1.1469, + "step": 147300 + }, + { + "epoch": 1.77, + "grad_norm": 7.369319329503709, + "learning_rate": 6.781315761080776e-07, + "loss": 0.8634, + "step": 147303 + }, + { + "epoch": 1.77, + "grad_norm": 7.776936371788031, + "learning_rate": 6.779201327243212e-07, + "loss": 1.2239, + "step": 147306 + }, + { + "epoch": 1.77, + "grad_norm": 7.923386809635589, + "learning_rate": 6.77708721153465e-07, + "loss": 1.2811, + "step": 147309 + }, + { + "epoch": 1.77, + "grad_norm": 39.63899980183759, + "learning_rate": 6.774973413962327e-07, + "loss": 1.3376, + "step": 147312 + }, + { + "epoch": 1.77, + "grad_norm": 9.929482282438174, + "learning_rate": 6.772859934533427e-07, + "loss": 1.0086, + "step": 147315 + }, + { + "epoch": 1.77, + "grad_norm": 23.44797071185186, + "learning_rate": 6.770746773255188e-07, + "loss": 1.0689, + "step": 147318 + }, + { + "epoch": 1.77, + "grad_norm": 5.510342152451849, + "learning_rate": 6.768633930134782e-07, + "loss": 1.0488, + "step": 147321 + }, + { + "epoch": 1.77, + "grad_norm": 3.9808863644330588, + "learning_rate": 6.766521405179461e-07, + "loss": 1.1751, + "step": 147324 + }, + { + "epoch": 1.77, + "grad_norm": 16.733523256705116, + "learning_rate": 6.764409198396415e-07, + "loss": 1.3097, + "step": 147327 + }, + { + "epoch": 1.77, + "grad_norm": 5.3272582690895245, + "learning_rate": 6.762297309792865e-07, + "loss": 1.2471, + "step": 147330 + }, + { + "epoch": 1.77, + "grad_norm": 10.115347840603697, + "learning_rate": 6.760185739376013e-07, + "loss": 1.2882, + "step": 147333 + }, + { + "epoch": 1.77, + "grad_norm": 5.526521226091923, + "learning_rate": 6.758074487153066e-07, + "loss": 1.0188, + "step": 147336 + }, + { + "epoch": 1.77, + "grad_norm": 13.239478100514187, + "learning_rate": 6.755963553131218e-07, + "loss": 1.3858, + "step": 147339 + }, + { + "epoch": 1.77, + "grad_norm": 3.018328675032008, + "learning_rate": 6.753852937317673e-07, + "loss": 0.9505, + "step": 147342 + }, + { + "epoch": 1.77, + "grad_norm": 7.020015424336016, + "learning_rate": 6.751742639719649e-07, + "loss": 0.9094, + "step": 147345 + }, + { + "epoch": 1.77, + "grad_norm": 4.508864123452526, + "learning_rate": 6.74963266034434e-07, + "loss": 1.1231, + "step": 147348 + }, + { + "epoch": 1.77, + "grad_norm": 6.018590834672975, + "learning_rate": 6.74752299919893e-07, + "loss": 0.9481, + "step": 147351 + }, + { + "epoch": 1.77, + "grad_norm": 4.568488791552865, + "learning_rate": 6.745413656290645e-07, + "loss": 1.1437, + "step": 147354 + }, + { + "epoch": 1.77, + "grad_norm": 4.406950980878586, + "learning_rate": 6.74330463162669e-07, + "loss": 1.1882, + "step": 147357 + }, + { + "epoch": 1.77, + "grad_norm": 22.981290451088572, + "learning_rate": 6.741195925214239e-07, + "loss": 1.2799, + "step": 147360 + }, + { + "epoch": 1.77, + "grad_norm": 8.139765606579344, + "learning_rate": 6.739087537060484e-07, + "loss": 1.4169, + "step": 147363 + }, + { + "epoch": 1.77, + "grad_norm": 6.628104407226051, + "learning_rate": 6.736979467172633e-07, + "loss": 1.0093, + "step": 147366 + }, + { + "epoch": 1.77, + "grad_norm": 9.457752013288296, + "learning_rate": 6.734871715557867e-07, + "loss": 0.9987, + "step": 147369 + }, + { + "epoch": 1.77, + "grad_norm": 4.50312109705974, + "learning_rate": 6.732764282223414e-07, + "loss": 1.2954, + "step": 147372 + }, + { + "epoch": 1.77, + "grad_norm": 8.289454447975663, + "learning_rate": 6.730657167176424e-07, + "loss": 0.8515, + "step": 147375 + }, + { + "epoch": 1.77, + "grad_norm": 9.874671453593866, + "learning_rate": 6.728550370424125e-07, + "loss": 1.0684, + "step": 147378 + }, + { + "epoch": 1.77, + "grad_norm": 13.865996000001525, + "learning_rate": 6.726443891973666e-07, + "loss": 1.1375, + "step": 147381 + }, + { + "epoch": 1.77, + "grad_norm": 11.381545562297577, + "learning_rate": 6.724337731832253e-07, + "loss": 0.9864, + "step": 147384 + }, + { + "epoch": 1.77, + "grad_norm": 7.859425491872924, + "learning_rate": 6.722231890007103e-07, + "loss": 1.4293, + "step": 147387 + }, + { + "epoch": 1.77, + "grad_norm": 9.622567970871971, + "learning_rate": 6.720126366505353e-07, + "loss": 1.047, + "step": 147390 + }, + { + "epoch": 1.77, + "grad_norm": 2.8277563541121857, + "learning_rate": 6.71802116133422e-07, + "loss": 1.4725, + "step": 147393 + }, + { + "epoch": 1.77, + "grad_norm": 7.843402414412124, + "learning_rate": 6.715916274500878e-07, + "loss": 1.0143, + "step": 147396 + }, + { + "epoch": 1.77, + "grad_norm": 10.292872475280364, + "learning_rate": 6.713811706012529e-07, + "loss": 0.8523, + "step": 147399 + }, + { + "epoch": 1.77, + "grad_norm": 3.8553362760181016, + "learning_rate": 6.711707455876338e-07, + "loss": 1.0866, + "step": 147402 + }, + { + "epoch": 1.77, + "grad_norm": 14.81596276436279, + "learning_rate": 6.709603524099473e-07, + "loss": 1.1032, + "step": 147405 + }, + { + "epoch": 1.77, + "grad_norm": 2.6126732997247526, + "learning_rate": 6.707499910689131e-07, + "loss": 1.2754, + "step": 147408 + }, + { + "epoch": 1.77, + "grad_norm": 7.5538456017797975, + "learning_rate": 6.705396615652482e-07, + "loss": 1.0896, + "step": 147411 + }, + { + "epoch": 1.77, + "grad_norm": 10.600761812683283, + "learning_rate": 6.703293638996733e-07, + "loss": 1.3747, + "step": 147414 + }, + { + "epoch": 1.77, + "grad_norm": 6.5778828733964465, + "learning_rate": 6.701190980729011e-07, + "loss": 1.1535, + "step": 147417 + }, + { + "epoch": 1.77, + "grad_norm": 7.032213854350746, + "learning_rate": 6.699088640856544e-07, + "loss": 1.1459, + "step": 147420 + }, + { + "epoch": 1.77, + "grad_norm": 6.363732424413227, + "learning_rate": 6.696986619386458e-07, + "loss": 1.2529, + "step": 147423 + }, + { + "epoch": 1.77, + "grad_norm": 5.630354702096461, + "learning_rate": 6.694884916325962e-07, + "loss": 1.0762, + "step": 147426 + }, + { + "epoch": 1.77, + "grad_norm": 4.605502766087236, + "learning_rate": 6.692783531682212e-07, + "loss": 1.1736, + "step": 147429 + }, + { + "epoch": 1.77, + "grad_norm": 5.562415483624961, + "learning_rate": 6.690682465462372e-07, + "loss": 1.0167, + "step": 147432 + }, + { + "epoch": 1.77, + "grad_norm": 29.46529272254647, + "learning_rate": 6.688581717673626e-07, + "loss": 1.1993, + "step": 147435 + }, + { + "epoch": 1.77, + "grad_norm": 5.002956674158244, + "learning_rate": 6.686481288323143e-07, + "loss": 0.7095, + "step": 147438 + }, + { + "epoch": 1.77, + "grad_norm": 8.074930986993127, + "learning_rate": 6.684381177418098e-07, + "loss": 0.9525, + "step": 147441 + }, + { + "epoch": 1.77, + "grad_norm": 15.328111834123634, + "learning_rate": 6.682281384965649e-07, + "loss": 1.2663, + "step": 147444 + }, + { + "epoch": 1.77, + "grad_norm": 11.648220177728893, + "learning_rate": 6.680181910972949e-07, + "loss": 1.2022, + "step": 147447 + }, + { + "epoch": 1.77, + "grad_norm": 6.27546639039373, + "learning_rate": 6.678082755447169e-07, + "loss": 1.4276, + "step": 147450 + }, + { + "epoch": 1.77, + "grad_norm": 8.552542373126522, + "learning_rate": 6.675983918395501e-07, + "loss": 1.3505, + "step": 147453 + }, + { + "epoch": 1.77, + "grad_norm": 8.939916416415901, + "learning_rate": 6.673885399825064e-07, + "loss": 0.8407, + "step": 147456 + }, + { + "epoch": 1.77, + "grad_norm": 6.64463984089072, + "learning_rate": 6.67178719974304e-07, + "loss": 0.998, + "step": 147459 + }, + { + "epoch": 1.77, + "grad_norm": 3.9447685808838737, + "learning_rate": 6.669689318156614e-07, + "loss": 1.169, + "step": 147462 + }, + { + "epoch": 1.77, + "grad_norm": 8.822501726835123, + "learning_rate": 6.6675917550729e-07, + "loss": 1.3573, + "step": 147465 + }, + { + "epoch": 1.77, + "grad_norm": 2.946873460781395, + "learning_rate": 6.665494510499082e-07, + "loss": 1.03, + "step": 147468 + }, + { + "epoch": 1.77, + "grad_norm": 8.034484489912774, + "learning_rate": 6.663397584442311e-07, + "loss": 1.0962, + "step": 147471 + }, + { + "epoch": 1.77, + "grad_norm": 6.284374578484733, + "learning_rate": 6.661300976909735e-07, + "loss": 1.2967, + "step": 147474 + }, + { + "epoch": 1.77, + "grad_norm": 5.312351675048516, + "learning_rate": 6.659204687908527e-07, + "loss": 0.9451, + "step": 147477 + }, + { + "epoch": 1.77, + "grad_norm": 22.485370273435283, + "learning_rate": 6.657108717445837e-07, + "loss": 1.2424, + "step": 147480 + }, + { + "epoch": 1.77, + "grad_norm": 11.000962067642146, + "learning_rate": 6.655013065528803e-07, + "loss": 1.527, + "step": 147483 + }, + { + "epoch": 1.77, + "grad_norm": 5.737902647337267, + "learning_rate": 6.65291773216461e-07, + "loss": 0.8994, + "step": 147486 + }, + { + "epoch": 1.77, + "grad_norm": 4.048856363657171, + "learning_rate": 6.650822717360361e-07, + "loss": 0.9974, + "step": 147489 + }, + { + "epoch": 1.77, + "grad_norm": 7.933097236560042, + "learning_rate": 6.648728021123229e-07, + "loss": 1.474, + "step": 147492 + }, + { + "epoch": 1.77, + "grad_norm": 8.561882003741356, + "learning_rate": 6.646633643460387e-07, + "loss": 1.0837, + "step": 147495 + }, + { + "epoch": 1.77, + "grad_norm": 2.9374906959310505, + "learning_rate": 6.644539584378929e-07, + "loss": 1.2015, + "step": 147498 + }, + { + "epoch": 1.77, + "grad_norm": 17.779258354154628, + "learning_rate": 6.642445843886047e-07, + "loss": 1.2501, + "step": 147501 + }, + { + "epoch": 1.77, + "grad_norm": 8.604960093967732, + "learning_rate": 6.640352421988871e-07, + "loss": 1.1709, + "step": 147504 + }, + { + "epoch": 1.77, + "grad_norm": 14.16102629960495, + "learning_rate": 6.638259318694551e-07, + "loss": 1.183, + "step": 147507 + }, + { + "epoch": 1.77, + "grad_norm": 5.092160492713124, + "learning_rate": 6.636166534010213e-07, + "loss": 1.1908, + "step": 147510 + }, + { + "epoch": 1.77, + "grad_norm": 6.55657513011354, + "learning_rate": 6.634074067942997e-07, + "loss": 0.978, + "step": 147513 + }, + { + "epoch": 1.77, + "grad_norm": 12.664659258359533, + "learning_rate": 6.631981920500064e-07, + "loss": 0.9453, + "step": 147516 + }, + { + "epoch": 1.77, + "grad_norm": 10.033147844504585, + "learning_rate": 6.629890091688552e-07, + "loss": 1.0774, + "step": 147519 + }, + { + "epoch": 1.77, + "grad_norm": 2.9581554775460313, + "learning_rate": 6.627798581515598e-07, + "loss": 1.1586, + "step": 147522 + }, + { + "epoch": 1.77, + "grad_norm": 7.597613371117732, + "learning_rate": 6.625707389988323e-07, + "loss": 1.4843, + "step": 147525 + }, + { + "epoch": 1.77, + "grad_norm": 8.840907804998231, + "learning_rate": 6.623616517113895e-07, + "loss": 0.9175, + "step": 147528 + }, + { + "epoch": 1.77, + "grad_norm": 26.596102998977845, + "learning_rate": 6.6215259628994e-07, + "loss": 1.0695, + "step": 147531 + }, + { + "epoch": 1.77, + "grad_norm": 9.794066786846136, + "learning_rate": 6.61943572735203e-07, + "loss": 1.1801, + "step": 147534 + }, + { + "epoch": 1.77, + "grad_norm": 8.672011279262437, + "learning_rate": 6.61734581047887e-07, + "loss": 0.9216, + "step": 147537 + }, + { + "epoch": 1.77, + "grad_norm": 7.054980860678877, + "learning_rate": 6.61525621228708e-07, + "loss": 1.0544, + "step": 147540 + }, + { + "epoch": 1.77, + "grad_norm": 12.107197604622689, + "learning_rate": 6.613166932783777e-07, + "loss": 1.2212, + "step": 147543 + }, + { + "epoch": 1.77, + "grad_norm": 3.5542525209046762, + "learning_rate": 6.611077971976121e-07, + "loss": 1.2204, + "step": 147546 + }, + { + "epoch": 1.77, + "grad_norm": 9.47298186814096, + "learning_rate": 6.608989329871207e-07, + "loss": 1.3128, + "step": 147549 + }, + { + "epoch": 1.77, + "grad_norm": 9.888846007998389, + "learning_rate": 6.606901006476163e-07, + "loss": 1.1931, + "step": 147552 + }, + { + "epoch": 1.77, + "grad_norm": 13.172033646058903, + "learning_rate": 6.604813001798127e-07, + "loss": 1.3107, + "step": 147555 + }, + { + "epoch": 1.77, + "grad_norm": 8.52416678424826, + "learning_rate": 6.602725315844216e-07, + "loss": 1.2008, + "step": 147558 + }, + { + "epoch": 1.77, + "grad_norm": 8.814134896424369, + "learning_rate": 6.60063794862158e-07, + "loss": 1.4095, + "step": 147561 + }, + { + "epoch": 1.77, + "grad_norm": 10.950663960437394, + "learning_rate": 6.598550900137313e-07, + "loss": 1.4134, + "step": 147564 + }, + { + "epoch": 1.77, + "grad_norm": 9.379686600651619, + "learning_rate": 6.596464170398542e-07, + "loss": 1.1059, + "step": 147567 + }, + { + "epoch": 1.77, + "grad_norm": 14.198242042605635, + "learning_rate": 6.594377759412418e-07, + "loss": 1.1307, + "step": 147570 + }, + { + "epoch": 1.77, + "grad_norm": 8.571170650195954, + "learning_rate": 6.592291667186024e-07, + "loss": 0.8013, + "step": 147573 + }, + { + "epoch": 1.77, + "grad_norm": 5.944857942995137, + "learning_rate": 6.590205893726498e-07, + "loss": 1.0707, + "step": 147576 + }, + { + "epoch": 1.77, + "grad_norm": 4.045531833306365, + "learning_rate": 6.588120439040946e-07, + "loss": 1.0841, + "step": 147579 + }, + { + "epoch": 1.77, + "grad_norm": 7.1935050758174555, + "learning_rate": 6.586035303136496e-07, + "loss": 1.2033, + "step": 147582 + }, + { + "epoch": 1.77, + "grad_norm": 13.452084118860496, + "learning_rate": 6.583950486020252e-07, + "loss": 1.2052, + "step": 147585 + }, + { + "epoch": 1.77, + "grad_norm": 6.526447216304986, + "learning_rate": 6.581865987699354e-07, + "loss": 1.626, + "step": 147588 + }, + { + "epoch": 1.77, + "grad_norm": 8.078370124595095, + "learning_rate": 6.579781808180897e-07, + "loss": 0.9442, + "step": 147591 + }, + { + "epoch": 1.77, + "grad_norm": 3.9350254525000783, + "learning_rate": 6.577697947471983e-07, + "loss": 1.1342, + "step": 147594 + }, + { + "epoch": 1.77, + "grad_norm": 5.123096401273676, + "learning_rate": 6.575614405579734e-07, + "loss": 0.9793, + "step": 147597 + }, + { + "epoch": 1.77, + "grad_norm": 8.731389367158595, + "learning_rate": 6.573531182511261e-07, + "loss": 1.006, + "step": 147600 + }, + { + "epoch": 1.77, + "grad_norm": 14.372929384439411, + "learning_rate": 6.571448278273695e-07, + "loss": 1.11, + "step": 147603 + }, + { + "epoch": 1.77, + "grad_norm": 6.905495518789725, + "learning_rate": 6.569365692874097e-07, + "loss": 1.0349, + "step": 147606 + }, + { + "epoch": 1.77, + "grad_norm": 10.557418561445346, + "learning_rate": 6.567283426319604e-07, + "loss": 1.2974, + "step": 147609 + }, + { + "epoch": 1.77, + "grad_norm": 6.324696827549392, + "learning_rate": 6.565201478617345e-07, + "loss": 1.0427, + "step": 147612 + }, + { + "epoch": 1.78, + "grad_norm": 32.03417434100141, + "learning_rate": 6.563119849774379e-07, + "loss": 1.1013, + "step": 147615 + }, + { + "epoch": 1.78, + "grad_norm": 13.345578559784471, + "learning_rate": 6.561038539797826e-07, + "loss": 1.1744, + "step": 147618 + }, + { + "epoch": 1.78, + "grad_norm": 3.352745108289226, + "learning_rate": 6.558957548694789e-07, + "loss": 1.2144, + "step": 147621 + }, + { + "epoch": 1.78, + "grad_norm": 14.594729360972947, + "learning_rate": 6.556876876472373e-07, + "loss": 1.0623, + "step": 147624 + }, + { + "epoch": 1.78, + "grad_norm": 7.034588418316552, + "learning_rate": 6.554796523137685e-07, + "loss": 0.9815, + "step": 147627 + }, + { + "epoch": 1.78, + "grad_norm": 6.43833049634475, + "learning_rate": 6.55271648869783e-07, + "loss": 0.9377, + "step": 147630 + }, + { + "epoch": 1.78, + "grad_norm": 11.78256953303893, + "learning_rate": 6.550636773159868e-07, + "loss": 1.3354, + "step": 147633 + }, + { + "epoch": 1.78, + "grad_norm": 14.941075929233678, + "learning_rate": 6.54855737653095e-07, + "loss": 1.0172, + "step": 147636 + }, + { + "epoch": 1.78, + "grad_norm": 8.708365967058537, + "learning_rate": 6.546478298818127e-07, + "loss": 1.2754, + "step": 147639 + }, + { + "epoch": 1.78, + "grad_norm": 10.368647158843142, + "learning_rate": 6.544399540028523e-07, + "loss": 1.3356, + "step": 147642 + }, + { + "epoch": 1.78, + "grad_norm": 7.162903737096487, + "learning_rate": 6.542321100169203e-07, + "loss": 1.3211, + "step": 147645 + }, + { + "epoch": 1.78, + "grad_norm": 14.919909047377567, + "learning_rate": 6.540242979247279e-07, + "loss": 1.1964, + "step": 147648 + }, + { + "epoch": 1.78, + "grad_norm": 5.50578040951766, + "learning_rate": 6.538165177269851e-07, + "loss": 1.1175, + "step": 147651 + }, + { + "epoch": 1.78, + "grad_norm": 4.756021845779159, + "learning_rate": 6.536087694244009e-07, + "loss": 0.9642, + "step": 147654 + }, + { + "epoch": 1.78, + "grad_norm": 7.9993122194672015, + "learning_rate": 6.534010530176826e-07, + "loss": 1.35, + "step": 147657 + }, + { + "epoch": 1.78, + "grad_norm": 4.117531762842902, + "learning_rate": 6.531933685075397e-07, + "loss": 1.2319, + "step": 147660 + }, + { + "epoch": 1.78, + "grad_norm": 3.6968148791151574, + "learning_rate": 6.529857158946807e-07, + "loss": 1.3467, + "step": 147663 + }, + { + "epoch": 1.78, + "grad_norm": 4.171978040993445, + "learning_rate": 6.527780951798146e-07, + "loss": 1.4105, + "step": 147666 + }, + { + "epoch": 1.78, + "grad_norm": 4.304172852915358, + "learning_rate": 6.525705063636511e-07, + "loss": 1.0346, + "step": 147669 + }, + { + "epoch": 1.78, + "grad_norm": 8.014150563674114, + "learning_rate": 6.523629494468963e-07, + "loss": 1.0367, + "step": 147672 + }, + { + "epoch": 1.78, + "grad_norm": 4.551799431640442, + "learning_rate": 6.521554244302597e-07, + "loss": 0.9777, + "step": 147675 + }, + { + "epoch": 1.78, + "grad_norm": 17.838491624660488, + "learning_rate": 6.519479313144516e-07, + "loss": 1.3549, + "step": 147678 + }, + { + "epoch": 1.78, + "grad_norm": 8.254804135469387, + "learning_rate": 6.517404701001761e-07, + "loss": 1.1809, + "step": 147681 + }, + { + "epoch": 1.78, + "grad_norm": 173.11308158680205, + "learning_rate": 6.515330407881448e-07, + "loss": 1.4194, + "step": 147684 + }, + { + "epoch": 1.78, + "grad_norm": 6.0101499992228025, + "learning_rate": 6.513256433790627e-07, + "loss": 1.1198, + "step": 147687 + }, + { + "epoch": 1.78, + "grad_norm": 8.924341954690227, + "learning_rate": 6.511182778736381e-07, + "loss": 1.2163, + "step": 147690 + }, + { + "epoch": 1.78, + "grad_norm": 11.49432536777827, + "learning_rate": 6.509109442725803e-07, + "loss": 1.2047, + "step": 147693 + }, + { + "epoch": 1.78, + "grad_norm": 9.20972881515066, + "learning_rate": 6.507036425765967e-07, + "loss": 1.2514, + "step": 147696 + }, + { + "epoch": 1.78, + "grad_norm": 11.728941041556734, + "learning_rate": 6.504963727863934e-07, + "loss": 1.0913, + "step": 147699 + }, + { + "epoch": 1.78, + "grad_norm": 12.581643922606288, + "learning_rate": 6.502891349026774e-07, + "loss": 1.0449, + "step": 147702 + }, + { + "epoch": 1.78, + "grad_norm": 11.220905161322332, + "learning_rate": 6.500819289261573e-07, + "loss": 1.1945, + "step": 147705 + }, + { + "epoch": 1.78, + "grad_norm": 7.467831471061496, + "learning_rate": 6.49874754857539e-07, + "loss": 0.7827, + "step": 147708 + }, + { + "epoch": 1.78, + "grad_norm": 4.943806009853379, + "learning_rate": 6.496676126975332e-07, + "loss": 1.3893, + "step": 147711 + }, + { + "epoch": 1.78, + "grad_norm": 17.492985814297615, + "learning_rate": 6.494605024468403e-07, + "loss": 1.4554, + "step": 147714 + }, + { + "epoch": 1.78, + "grad_norm": 10.541805145188047, + "learning_rate": 6.492534241061721e-07, + "loss": 1.0744, + "step": 147717 + }, + { + "epoch": 1.78, + "grad_norm": 18.07610245907577, + "learning_rate": 6.490463776762346e-07, + "loss": 1.1566, + "step": 147720 + }, + { + "epoch": 1.78, + "grad_norm": 15.07868252718367, + "learning_rate": 6.48839363157735e-07, + "loss": 1.1831, + "step": 147723 + }, + { + "epoch": 1.78, + "grad_norm": 6.770351224162774, + "learning_rate": 6.48632380551375e-07, + "loss": 1.0998, + "step": 147726 + }, + { + "epoch": 1.78, + "grad_norm": 10.997376664270021, + "learning_rate": 6.484254298578652e-07, + "loss": 1.3132, + "step": 147729 + }, + { + "epoch": 1.78, + "grad_norm": 8.894627401524827, + "learning_rate": 6.482185110779116e-07, + "loss": 1.0317, + "step": 147732 + }, + { + "epoch": 1.78, + "grad_norm": 6.3260896454839335, + "learning_rate": 6.480116242122181e-07, + "loss": 0.9997, + "step": 147735 + }, + { + "epoch": 1.78, + "grad_norm": 9.24142509369523, + "learning_rate": 6.478047692614953e-07, + "loss": 1.2718, + "step": 147738 + }, + { + "epoch": 1.78, + "grad_norm": 5.257498020214463, + "learning_rate": 6.475979462264459e-07, + "loss": 1.2817, + "step": 147741 + }, + { + "epoch": 1.78, + "grad_norm": 4.634934675917346, + "learning_rate": 6.473911551077739e-07, + "loss": 1.485, + "step": 147744 + }, + { + "epoch": 1.78, + "grad_norm": 6.3763907758513545, + "learning_rate": 6.471843959061875e-07, + "loss": 1.2551, + "step": 147747 + }, + { + "epoch": 1.78, + "grad_norm": 7.845811453591781, + "learning_rate": 6.469776686223939e-07, + "loss": 0.7671, + "step": 147750 + }, + { + "epoch": 1.78, + "grad_norm": 6.702498819660354, + "learning_rate": 6.467709732570948e-07, + "loss": 1.4856, + "step": 147753 + }, + { + "epoch": 1.78, + "grad_norm": 7.473126589685094, + "learning_rate": 6.465643098109964e-07, + "loss": 1.3123, + "step": 147756 + }, + { + "epoch": 1.78, + "grad_norm": 4.091291366791242, + "learning_rate": 6.463576782848058e-07, + "loss": 1.3068, + "step": 147759 + }, + { + "epoch": 1.78, + "grad_norm": 5.4757973880583295, + "learning_rate": 6.461510786792291e-07, + "loss": 1.1654, + "step": 147762 + }, + { + "epoch": 1.78, + "grad_norm": 3.986442614685737, + "learning_rate": 6.459445109949692e-07, + "loss": 0.8843, + "step": 147765 + }, + { + "epoch": 1.78, + "grad_norm": 4.082962969968998, + "learning_rate": 6.457379752327287e-07, + "loss": 0.968, + "step": 147768 + }, + { + "epoch": 1.78, + "grad_norm": 5.567823827570626, + "learning_rate": 6.455314713932159e-07, + "loss": 1.6105, + "step": 147771 + }, + { + "epoch": 1.78, + "grad_norm": 5.921067961597147, + "learning_rate": 6.453249994771349e-07, + "loss": 1.0945, + "step": 147774 + }, + { + "epoch": 1.78, + "grad_norm": 2.4810820529380115, + "learning_rate": 6.451185594851927e-07, + "loss": 0.9148, + "step": 147777 + }, + { + "epoch": 1.78, + "grad_norm": 11.195713134042796, + "learning_rate": 6.449121514180878e-07, + "loss": 1.3963, + "step": 147780 + }, + { + "epoch": 1.78, + "grad_norm": 7.638570065635655, + "learning_rate": 6.447057752765307e-07, + "loss": 1.0546, + "step": 147783 + }, + { + "epoch": 1.78, + "grad_norm": 10.969622752307442, + "learning_rate": 6.444994310612218e-07, + "loss": 1.2246, + "step": 147786 + }, + { + "epoch": 1.78, + "grad_norm": 9.357438910736375, + "learning_rate": 6.442931187728652e-07, + "loss": 0.8877, + "step": 147789 + }, + { + "epoch": 1.78, + "grad_norm": 6.712682608934672, + "learning_rate": 6.44086838412169e-07, + "loss": 1.0832, + "step": 147792 + }, + { + "epoch": 1.78, + "grad_norm": 10.490439231720607, + "learning_rate": 6.438805899798329e-07, + "loss": 1.1326, + "step": 147795 + }, + { + "epoch": 1.78, + "grad_norm": 4.3353434395153805, + "learning_rate": 6.436743734765616e-07, + "loss": 1.0433, + "step": 147798 + }, + { + "epoch": 1.78, + "grad_norm": 6.89713594216702, + "learning_rate": 6.434681889030592e-07, + "loss": 1.1828, + "step": 147801 + }, + { + "epoch": 1.78, + "grad_norm": 2.9348719085243156, + "learning_rate": 6.432620362600317e-07, + "loss": 1.2026, + "step": 147804 + }, + { + "epoch": 1.78, + "grad_norm": 2.3374066592134555, + "learning_rate": 6.430559155481808e-07, + "loss": 1.1257, + "step": 147807 + }, + { + "epoch": 1.78, + "grad_norm": 8.928170818444222, + "learning_rate": 6.428498267682082e-07, + "loss": 1.205, + "step": 147810 + }, + { + "epoch": 1.78, + "grad_norm": 5.97962589179662, + "learning_rate": 6.426437699208188e-07, + "loss": 1.0775, + "step": 147813 + }, + { + "epoch": 1.78, + "grad_norm": 14.487426045420605, + "learning_rate": 6.424377450067154e-07, + "loss": 1.4247, + "step": 147816 + }, + { + "epoch": 1.78, + "grad_norm": 17.699780412243847, + "learning_rate": 6.42231752026603e-07, + "loss": 1.1868, + "step": 147819 + }, + { + "epoch": 1.78, + "grad_norm": 5.137805859424005, + "learning_rate": 6.420257909811812e-07, + "loss": 1.668, + "step": 147822 + }, + { + "epoch": 1.78, + "grad_norm": 5.730747642940294, + "learning_rate": 6.418198618711569e-07, + "loss": 1.291, + "step": 147825 + }, + { + "epoch": 1.78, + "grad_norm": 15.372102294504058, + "learning_rate": 6.416139646972286e-07, + "loss": 1.3567, + "step": 147828 + }, + { + "epoch": 1.78, + "grad_norm": 5.966802219686272, + "learning_rate": 6.414080994601024e-07, + "loss": 1.45, + "step": 147831 + }, + { + "epoch": 1.78, + "grad_norm": 9.687713377280328, + "learning_rate": 6.412022661604778e-07, + "loss": 1.2125, + "step": 147834 + }, + { + "epoch": 1.78, + "grad_norm": 8.891593896848383, + "learning_rate": 6.409964647990596e-07, + "loss": 1.4932, + "step": 147837 + }, + { + "epoch": 1.78, + "grad_norm": 2.916925951626066, + "learning_rate": 6.407906953765486e-07, + "loss": 0.9399, + "step": 147840 + }, + { + "epoch": 1.78, + "grad_norm": 19.961316588721143, + "learning_rate": 6.405849578936474e-07, + "loss": 1.2008, + "step": 147843 + }, + { + "epoch": 1.78, + "grad_norm": 7.0630274259625105, + "learning_rate": 6.40379252351061e-07, + "loss": 0.8921, + "step": 147846 + }, + { + "epoch": 1.78, + "grad_norm": 6.258000640064167, + "learning_rate": 6.40173578749489e-07, + "loss": 1.07, + "step": 147849 + }, + { + "epoch": 1.78, + "grad_norm": 16.811956281047564, + "learning_rate": 6.399679370896306e-07, + "loss": 1.1481, + "step": 147852 + }, + { + "epoch": 1.78, + "grad_norm": 4.2893060130501155, + "learning_rate": 6.397623273721909e-07, + "loss": 1.2472, + "step": 147855 + }, + { + "epoch": 1.78, + "grad_norm": 3.8699660997106617, + "learning_rate": 6.395567495978716e-07, + "loss": 1.0709, + "step": 147858 + }, + { + "epoch": 1.78, + "grad_norm": 27.543794513003697, + "learning_rate": 6.393512037673733e-07, + "loss": 0.877, + "step": 147861 + }, + { + "epoch": 1.78, + "grad_norm": 6.26666972355265, + "learning_rate": 6.391456898813963e-07, + "loss": 1.344, + "step": 147864 + }, + { + "epoch": 1.78, + "grad_norm": 4.251901086857094, + "learning_rate": 6.38940207940646e-07, + "loss": 1.4559, + "step": 147867 + }, + { + "epoch": 1.78, + "grad_norm": 8.804952214152427, + "learning_rate": 6.387347579458181e-07, + "loss": 1.1522, + "step": 147870 + }, + { + "epoch": 1.78, + "grad_norm": 3.864921977114711, + "learning_rate": 6.385293398976189e-07, + "loss": 1.1383, + "step": 147873 + }, + { + "epoch": 1.78, + "grad_norm": 14.663079928994803, + "learning_rate": 6.383239537967456e-07, + "loss": 1.1379, + "step": 147876 + }, + { + "epoch": 1.78, + "grad_norm": 4.2622929834196315, + "learning_rate": 6.381185996438998e-07, + "loss": 1.1292, + "step": 147879 + }, + { + "epoch": 1.78, + "grad_norm": 6.027187277978264, + "learning_rate": 6.379132774397844e-07, + "loss": 1.1548, + "step": 147882 + }, + { + "epoch": 1.78, + "grad_norm": 4.981178305601079, + "learning_rate": 6.377079871851e-07, + "loss": 0.8885, + "step": 147885 + }, + { + "epoch": 1.78, + "grad_norm": 6.381743799174352, + "learning_rate": 6.375027288805436e-07, + "loss": 1.2661, + "step": 147888 + }, + { + "epoch": 1.78, + "grad_norm": 7.826760061541736, + "learning_rate": 6.372975025268203e-07, + "loss": 1.3822, + "step": 147891 + }, + { + "epoch": 1.78, + "grad_norm": 6.619421492652584, + "learning_rate": 6.370923081246261e-07, + "loss": 0.8021, + "step": 147894 + }, + { + "epoch": 1.78, + "grad_norm": 3.4564754685830095, + "learning_rate": 6.368871456746639e-07, + "loss": 0.989, + "step": 147897 + }, + { + "epoch": 1.78, + "grad_norm": 8.882652227281659, + "learning_rate": 6.366820151776343e-07, + "loss": 1.4009, + "step": 147900 + }, + { + "epoch": 1.78, + "grad_norm": 15.168295635162938, + "learning_rate": 6.364769166342355e-07, + "loss": 1.1989, + "step": 147903 + }, + { + "epoch": 1.78, + "grad_norm": 7.636203043204253, + "learning_rate": 6.36271850045167e-07, + "loss": 0.8776, + "step": 147906 + }, + { + "epoch": 1.78, + "grad_norm": 4.195985810070443, + "learning_rate": 6.360668154111327e-07, + "loss": 0.9993, + "step": 147909 + }, + { + "epoch": 1.78, + "grad_norm": 5.1756778774325864, + "learning_rate": 6.358618127328276e-07, + "loss": 1.166, + "step": 147912 + }, + { + "epoch": 1.78, + "grad_norm": 12.79728180421875, + "learning_rate": 6.356568420109543e-07, + "loss": 1.2838, + "step": 147915 + }, + { + "epoch": 1.78, + "grad_norm": 9.729551009794433, + "learning_rate": 6.354519032462104e-07, + "loss": 1.1868, + "step": 147918 + }, + { + "epoch": 1.78, + "grad_norm": 7.055190449842256, + "learning_rate": 6.352469964392949e-07, + "loss": 1.1571, + "step": 147921 + }, + { + "epoch": 1.78, + "grad_norm": 13.078600235438959, + "learning_rate": 6.350421215909097e-07, + "loss": 1.1095, + "step": 147924 + }, + { + "epoch": 1.78, + "grad_norm": 12.62873004765156, + "learning_rate": 6.348372787017532e-07, + "loss": 1.2738, + "step": 147927 + }, + { + "epoch": 1.78, + "grad_norm": 6.611962708949566, + "learning_rate": 6.346324677725213e-07, + "loss": 0.7706, + "step": 147930 + }, + { + "epoch": 1.78, + "grad_norm": 8.178738985251206, + "learning_rate": 6.34427688803918e-07, + "loss": 1.026, + "step": 147933 + }, + { + "epoch": 1.78, + "grad_norm": 6.880674944980236, + "learning_rate": 6.342229417966383e-07, + "loss": 1.0329, + "step": 147936 + }, + { + "epoch": 1.78, + "grad_norm": 6.422110647031064, + "learning_rate": 6.340182267513828e-07, + "loss": 0.9551, + "step": 147939 + }, + { + "epoch": 1.78, + "grad_norm": 14.278810295031263, + "learning_rate": 6.338135436688486e-07, + "loss": 0.738, + "step": 147942 + }, + { + "epoch": 1.78, + "grad_norm": 14.232458335826635, + "learning_rate": 6.336088925497352e-07, + "loss": 1.3172, + "step": 147945 + }, + { + "epoch": 1.78, + "grad_norm": 7.612488219226793, + "learning_rate": 6.33404273394741e-07, + "loss": 1.1382, + "step": 147948 + }, + { + "epoch": 1.78, + "grad_norm": 7.162162736951217, + "learning_rate": 6.331996862045631e-07, + "loss": 1.4781, + "step": 147951 + }, + { + "epoch": 1.78, + "grad_norm": 10.046909853345015, + "learning_rate": 6.329951309799031e-07, + "loss": 1.165, + "step": 147954 + }, + { + "epoch": 1.78, + "grad_norm": 20.240456504950597, + "learning_rate": 6.327906077214574e-07, + "loss": 0.8282, + "step": 147957 + }, + { + "epoch": 1.78, + "grad_norm": 2.5470274758394713, + "learning_rate": 6.325861164299207e-07, + "loss": 0.9503, + "step": 147960 + }, + { + "epoch": 1.78, + "grad_norm": 15.04300957212905, + "learning_rate": 6.323816571059937e-07, + "loss": 1.3223, + "step": 147963 + }, + { + "epoch": 1.78, + "grad_norm": 9.731092718561325, + "learning_rate": 6.321772297503758e-07, + "loss": 1.1542, + "step": 147966 + }, + { + "epoch": 1.78, + "grad_norm": 3.8070008144013325, + "learning_rate": 6.31972834363761e-07, + "loss": 1.0171, + "step": 147969 + }, + { + "epoch": 1.78, + "grad_norm": 3.2015698255531873, + "learning_rate": 6.317684709468486e-07, + "loss": 1.0276, + "step": 147972 + }, + { + "epoch": 1.78, + "grad_norm": 7.10379621629183, + "learning_rate": 6.315641395003369e-07, + "loss": 0.9426, + "step": 147975 + }, + { + "epoch": 1.78, + "grad_norm": 3.5017979367491208, + "learning_rate": 6.31359840024921e-07, + "loss": 0.8977, + "step": 147978 + }, + { + "epoch": 1.78, + "grad_norm": 4.958875751955894, + "learning_rate": 6.311555725213014e-07, + "loss": 1.2128, + "step": 147981 + }, + { + "epoch": 1.78, + "grad_norm": 5.709402728733853, + "learning_rate": 6.30951336990172e-07, + "loss": 0.9345, + "step": 147984 + }, + { + "epoch": 1.78, + "grad_norm": 6.163264813569186, + "learning_rate": 6.307471334322301e-07, + "loss": 1.0933, + "step": 147987 + }, + { + "epoch": 1.78, + "grad_norm": 5.262768531916946, + "learning_rate": 6.30542961848174e-07, + "loss": 0.8252, + "step": 147990 + }, + { + "epoch": 1.78, + "grad_norm": 8.160408269804698, + "learning_rate": 6.303388222387008e-07, + "loss": 1.481, + "step": 147993 + }, + { + "epoch": 1.78, + "grad_norm": 7.760240028932031, + "learning_rate": 6.301347146045055e-07, + "loss": 0.884, + "step": 147996 + }, + { + "epoch": 1.78, + "grad_norm": 12.990900760771504, + "learning_rate": 6.299306389462867e-07, + "loss": 1.2109, + "step": 147999 + }, + { + "epoch": 1.78, + "grad_norm": 5.6332449134258855, + "learning_rate": 6.29726595264738e-07, + "loss": 0.7859, + "step": 148002 + }, + { + "epoch": 1.78, + "grad_norm": 8.287795944895002, + "learning_rate": 6.295225835605567e-07, + "loss": 1.1919, + "step": 148005 + }, + { + "epoch": 1.78, + "grad_norm": 4.768097894441171, + "learning_rate": 6.293186038344413e-07, + "loss": 1.0427, + "step": 148008 + }, + { + "epoch": 1.78, + "grad_norm": 6.70321458458075, + "learning_rate": 6.291146560870853e-07, + "loss": 1.1691, + "step": 148011 + }, + { + "epoch": 1.78, + "grad_norm": 24.913927883268546, + "learning_rate": 6.289107403191852e-07, + "loss": 1.2172, + "step": 148014 + }, + { + "epoch": 1.78, + "grad_norm": 4.661844928443566, + "learning_rate": 6.287068565314392e-07, + "loss": 1.358, + "step": 148017 + }, + { + "epoch": 1.78, + "grad_norm": 5.19517657504656, + "learning_rate": 6.285030047245411e-07, + "loss": 0.9719, + "step": 148020 + }, + { + "epoch": 1.78, + "grad_norm": 20.18746185091759, + "learning_rate": 6.282991848991849e-07, + "loss": 1.0058, + "step": 148023 + }, + { + "epoch": 1.78, + "grad_norm": 307.1340601637862, + "learning_rate": 6.280953970560676e-07, + "loss": 1.4347, + "step": 148026 + }, + { + "epoch": 1.78, + "grad_norm": 10.39364619434046, + "learning_rate": 6.278916411958846e-07, + "loss": 1.319, + "step": 148029 + }, + { + "epoch": 1.78, + "grad_norm": 11.115932816102548, + "learning_rate": 6.276879173193328e-07, + "loss": 1.2886, + "step": 148032 + }, + { + "epoch": 1.78, + "grad_norm": 9.87934746012296, + "learning_rate": 6.274842254271074e-07, + "loss": 1.084, + "step": 148035 + }, + { + "epoch": 1.78, + "grad_norm": 6.984821600676067, + "learning_rate": 6.27280565519901e-07, + "loss": 1.0869, + "step": 148038 + }, + { + "epoch": 1.78, + "grad_norm": 5.168349105775809, + "learning_rate": 6.27076937598411e-07, + "loss": 1.0203, + "step": 148041 + }, + { + "epoch": 1.78, + "grad_norm": 8.952291779898907, + "learning_rate": 6.268733416633299e-07, + "loss": 0.882, + "step": 148044 + }, + { + "epoch": 1.78, + "grad_norm": 16.016858744131124, + "learning_rate": 6.266697777153552e-07, + "loss": 1.5197, + "step": 148047 + }, + { + "epoch": 1.78, + "grad_norm": 12.708221372086465, + "learning_rate": 6.264662457551795e-07, + "loss": 1.8838, + "step": 148050 + }, + { + "epoch": 1.78, + "grad_norm": 31.078656328898262, + "learning_rate": 6.262627457834969e-07, + "loss": 1.0961, + "step": 148053 + }, + { + "epoch": 1.78, + "grad_norm": 9.163811239867838, + "learning_rate": 6.260592778010044e-07, + "loss": 1.2135, + "step": 148056 + }, + { + "epoch": 1.78, + "grad_norm": 8.40183137921938, + "learning_rate": 6.25855841808396e-07, + "loss": 0.9658, + "step": 148059 + }, + { + "epoch": 1.78, + "grad_norm": 10.705145267053448, + "learning_rate": 6.256524378063656e-07, + "loss": 1.435, + "step": 148062 + }, + { + "epoch": 1.78, + "grad_norm": 8.830471766027316, + "learning_rate": 6.254490657956047e-07, + "loss": 0.9362, + "step": 148065 + }, + { + "epoch": 1.78, + "grad_norm": 11.645889536244026, + "learning_rate": 6.252457257768096e-07, + "loss": 1.2107, + "step": 148068 + }, + { + "epoch": 1.78, + "grad_norm": 5.713223054500375, + "learning_rate": 6.250424177506742e-07, + "loss": 1.1133, + "step": 148071 + }, + { + "epoch": 1.78, + "grad_norm": 8.013735121190127, + "learning_rate": 6.248391417178946e-07, + "loss": 1.2549, + "step": 148074 + }, + { + "epoch": 1.78, + "grad_norm": 7.22052632903702, + "learning_rate": 6.246358976791589e-07, + "loss": 1.1611, + "step": 148077 + }, + { + "epoch": 1.78, + "grad_norm": 10.504613658617425, + "learning_rate": 6.244326856351646e-07, + "loss": 1.056, + "step": 148080 + }, + { + "epoch": 1.78, + "grad_norm": 55.91476240961025, + "learning_rate": 6.242295055866065e-07, + "loss": 0.9023, + "step": 148083 + }, + { + "epoch": 1.78, + "grad_norm": 12.556078622999653, + "learning_rate": 6.240263575341743e-07, + "loss": 0.901, + "step": 148086 + }, + { + "epoch": 1.78, + "grad_norm": 10.968386385915057, + "learning_rate": 6.238232414785639e-07, + "loss": 0.7696, + "step": 148089 + }, + { + "epoch": 1.78, + "grad_norm": 15.284094212418383, + "learning_rate": 6.236201574204659e-07, + "loss": 1.2694, + "step": 148092 + }, + { + "epoch": 1.78, + "grad_norm": 9.43360478276379, + "learning_rate": 6.234171053605753e-07, + "loss": 1.2683, + "step": 148095 + }, + { + "epoch": 1.78, + "grad_norm": 4.061047046405929, + "learning_rate": 6.232140852995849e-07, + "loss": 0.8879, + "step": 148098 + }, + { + "epoch": 1.78, + "grad_norm": 2.5066402581646408, + "learning_rate": 6.230110972381887e-07, + "loss": 1.1732, + "step": 148101 + }, + { + "epoch": 1.78, + "grad_norm": 5.757103664219391, + "learning_rate": 6.228081411770781e-07, + "loss": 0.9229, + "step": 148104 + }, + { + "epoch": 1.78, + "grad_norm": 15.127758132654478, + "learning_rate": 6.226052171169439e-07, + "loss": 1.2622, + "step": 148107 + }, + { + "epoch": 1.78, + "grad_norm": 13.030951712835893, + "learning_rate": 6.224023250584798e-07, + "loss": 1.2999, + "step": 148110 + }, + { + "epoch": 1.78, + "grad_norm": 6.699024839800211, + "learning_rate": 6.2219946500238e-07, + "loss": 1.4579, + "step": 148113 + }, + { + "epoch": 1.78, + "grad_norm": 9.8619178000602, + "learning_rate": 6.219966369493358e-07, + "loss": 1.0095, + "step": 148116 + }, + { + "epoch": 1.78, + "grad_norm": 4.367510346402942, + "learning_rate": 6.21793840900039e-07, + "loss": 1.6147, + "step": 148119 + }, + { + "epoch": 1.78, + "grad_norm": 8.526285695668102, + "learning_rate": 6.215910768551803e-07, + "loss": 0.9872, + "step": 148122 + }, + { + "epoch": 1.78, + "grad_norm": 6.713164585796707, + "learning_rate": 6.213883448154556e-07, + "loss": 1.1593, + "step": 148125 + }, + { + "epoch": 1.78, + "grad_norm": 6.263867994494417, + "learning_rate": 6.211856447815545e-07, + "loss": 0.8492, + "step": 148128 + }, + { + "epoch": 1.78, + "grad_norm": 5.338533473558901, + "learning_rate": 6.209829767541664e-07, + "loss": 1.1491, + "step": 148131 + }, + { + "epoch": 1.78, + "grad_norm": 7.913546151065944, + "learning_rate": 6.207803407339863e-07, + "loss": 1.1093, + "step": 148134 + }, + { + "epoch": 1.78, + "grad_norm": 18.724893364150997, + "learning_rate": 6.205777367217036e-07, + "loss": 1.1834, + "step": 148137 + }, + { + "epoch": 1.78, + "grad_norm": 4.422737999357182, + "learning_rate": 6.203751647180101e-07, + "loss": 1.1334, + "step": 148140 + }, + { + "epoch": 1.78, + "grad_norm": 8.694863875368709, + "learning_rate": 6.201726247236007e-07, + "loss": 1.4685, + "step": 148143 + }, + { + "epoch": 1.78, + "grad_norm": 12.665538607775895, + "learning_rate": 6.199701167391626e-07, + "loss": 0.9345, + "step": 148146 + }, + { + "epoch": 1.78, + "grad_norm": 23.17019593249882, + "learning_rate": 6.197676407653852e-07, + "loss": 0.9901, + "step": 148149 + }, + { + "epoch": 1.78, + "grad_norm": 6.7557318580634815, + "learning_rate": 6.195651968029636e-07, + "loss": 1.0678, + "step": 148152 + }, + { + "epoch": 1.78, + "grad_norm": 6.411510832089677, + "learning_rate": 6.193627848525885e-07, + "loss": 1.0859, + "step": 148155 + }, + { + "epoch": 1.78, + "grad_norm": 15.032415695907536, + "learning_rate": 6.19160404914948e-07, + "loss": 1.151, + "step": 148158 + }, + { + "epoch": 1.78, + "grad_norm": 8.652441995760032, + "learning_rate": 6.189580569907327e-07, + "loss": 1.1307, + "step": 148161 + }, + { + "epoch": 1.78, + "grad_norm": 2.771142368087123, + "learning_rate": 6.187557410806355e-07, + "loss": 1.3395, + "step": 148164 + }, + { + "epoch": 1.78, + "grad_norm": 9.850397394660977, + "learning_rate": 6.18553457185348e-07, + "loss": 0.9542, + "step": 148167 + }, + { + "epoch": 1.78, + "grad_norm": 4.653935605310589, + "learning_rate": 6.183512053055574e-07, + "loss": 0.9213, + "step": 148170 + }, + { + "epoch": 1.78, + "grad_norm": 7.254844842107154, + "learning_rate": 6.181489854419542e-07, + "loss": 1.1765, + "step": 148173 + }, + { + "epoch": 1.78, + "grad_norm": 5.325499794199849, + "learning_rate": 6.17946797595228e-07, + "loss": 1.2619, + "step": 148176 + }, + { + "epoch": 1.78, + "grad_norm": 5.052156642782501, + "learning_rate": 6.177446417660704e-07, + "loss": 1.4525, + "step": 148179 + }, + { + "epoch": 1.78, + "grad_norm": 12.415258143962093, + "learning_rate": 6.175425179551731e-07, + "loss": 1.284, + "step": 148182 + }, + { + "epoch": 1.78, + "grad_norm": 8.045437873411696, + "learning_rate": 6.173404261632221e-07, + "loss": 0.9364, + "step": 148185 + }, + { + "epoch": 1.78, + "grad_norm": 12.285816576997423, + "learning_rate": 6.171383663909092e-07, + "loss": 1.0735, + "step": 148188 + }, + { + "epoch": 1.78, + "grad_norm": 8.844587860970046, + "learning_rate": 6.169363386389226e-07, + "loss": 1.1224, + "step": 148191 + }, + { + "epoch": 1.78, + "grad_norm": 6.2732967509796635, + "learning_rate": 6.16734342907952e-07, + "loss": 0.9003, + "step": 148194 + }, + { + "epoch": 1.78, + "grad_norm": 14.960798249245801, + "learning_rate": 6.165323791986888e-07, + "loss": 1.3793, + "step": 148197 + }, + { + "epoch": 1.78, + "grad_norm": 6.764633789479847, + "learning_rate": 6.163304475118193e-07, + "loss": 0.9298, + "step": 148200 + }, + { + "epoch": 1.78, + "grad_norm": 6.116877538931651, + "learning_rate": 6.161285478480338e-07, + "loss": 1.2722, + "step": 148203 + }, + { + "epoch": 1.78, + "grad_norm": 14.71229280742266, + "learning_rate": 6.159266802080222e-07, + "loss": 0.9716, + "step": 148206 + }, + { + "epoch": 1.78, + "grad_norm": 5.303973698997577, + "learning_rate": 6.157248445924735e-07, + "loss": 1.0651, + "step": 148209 + }, + { + "epoch": 1.78, + "grad_norm": 6.223712270646152, + "learning_rate": 6.155230410020763e-07, + "loss": 0.8304, + "step": 148212 + }, + { + "epoch": 1.78, + "grad_norm": 5.75202141098439, + "learning_rate": 6.153212694375176e-07, + "loss": 1.0812, + "step": 148215 + }, + { + "epoch": 1.78, + "grad_norm": 13.708099926501308, + "learning_rate": 6.15119529899486e-07, + "loss": 1.092, + "step": 148218 + }, + { + "epoch": 1.78, + "grad_norm": 4.467782007192738, + "learning_rate": 6.149178223886709e-07, + "loss": 1.447, + "step": 148221 + }, + { + "epoch": 1.78, + "grad_norm": 4.5328474981261495, + "learning_rate": 6.147161469057628e-07, + "loss": 0.9376, + "step": 148224 + }, + { + "epoch": 1.78, + "grad_norm": 7.065642994653467, + "learning_rate": 6.145145034514466e-07, + "loss": 1.0101, + "step": 148227 + }, + { + "epoch": 1.78, + "grad_norm": 9.551300780852012, + "learning_rate": 6.143128920264118e-07, + "loss": 1.2205, + "step": 148230 + }, + { + "epoch": 1.78, + "grad_norm": 14.090486051446053, + "learning_rate": 6.14111312631348e-07, + "loss": 0.8882, + "step": 148233 + }, + { + "epoch": 1.78, + "grad_norm": 14.269140293724005, + "learning_rate": 6.139097652669413e-07, + "loss": 0.9642, + "step": 148236 + }, + { + "epoch": 1.78, + "grad_norm": 6.8887898669385725, + "learning_rate": 6.137082499338775e-07, + "loss": 1.0667, + "step": 148239 + }, + { + "epoch": 1.78, + "grad_norm": 4.975408871218054, + "learning_rate": 6.135067666328476e-07, + "loss": 1.568, + "step": 148242 + }, + { + "epoch": 1.78, + "grad_norm": 14.846444304313064, + "learning_rate": 6.133053153645374e-07, + "loss": 0.8888, + "step": 148245 + }, + { + "epoch": 1.78, + "grad_norm": 5.610973841598496, + "learning_rate": 6.131038961296354e-07, + "loss": 0.9391, + "step": 148248 + }, + { + "epoch": 1.78, + "grad_norm": 118.19813000014992, + "learning_rate": 6.129025089288298e-07, + "loss": 1.3119, + "step": 148251 + }, + { + "epoch": 1.78, + "grad_norm": 4.179592341929775, + "learning_rate": 6.127011537628069e-07, + "loss": 0.9898, + "step": 148254 + }, + { + "epoch": 1.78, + "grad_norm": 3.4349094685404586, + "learning_rate": 6.124998306322516e-07, + "loss": 1.1067, + "step": 148257 + }, + { + "epoch": 1.78, + "grad_norm": 7.015063180578128, + "learning_rate": 6.122985395378544e-07, + "loss": 1.1085, + "step": 148260 + }, + { + "epoch": 1.78, + "grad_norm": 5.8758209084247275, + "learning_rate": 6.120972804803016e-07, + "loss": 1.2075, + "step": 148263 + }, + { + "epoch": 1.78, + "grad_norm": 2.664319073907162, + "learning_rate": 6.11896053460277e-07, + "loss": 1.0609, + "step": 148266 + }, + { + "epoch": 1.78, + "grad_norm": 3.2753757076280237, + "learning_rate": 6.11694858478471e-07, + "loss": 1.3964, + "step": 148269 + }, + { + "epoch": 1.78, + "grad_norm": 23.162220517132614, + "learning_rate": 6.114936955355677e-07, + "loss": 0.7414, + "step": 148272 + }, + { + "epoch": 1.78, + "grad_norm": 10.641036462301708, + "learning_rate": 6.112925646322565e-07, + "loss": 1.0893, + "step": 148275 + }, + { + "epoch": 1.78, + "grad_norm": 6.785597897264467, + "learning_rate": 6.110914657692224e-07, + "loss": 1.4969, + "step": 148278 + }, + { + "epoch": 1.78, + "grad_norm": 2.267254567076991, + "learning_rate": 6.108903989471493e-07, + "loss": 1.3015, + "step": 148281 + }, + { + "epoch": 1.78, + "grad_norm": 10.649043221618365, + "learning_rate": 6.106893641667255e-07, + "loss": 0.937, + "step": 148284 + }, + { + "epoch": 1.78, + "grad_norm": 7.335877398501951, + "learning_rate": 6.104883614286373e-07, + "loss": 1.1835, + "step": 148287 + }, + { + "epoch": 1.78, + "grad_norm": 15.014935535812304, + "learning_rate": 6.102873907335716e-07, + "loss": 0.9073, + "step": 148290 + }, + { + "epoch": 1.78, + "grad_norm": 10.931624218768746, + "learning_rate": 6.100864520822102e-07, + "loss": 1.1494, + "step": 148293 + }, + { + "epoch": 1.78, + "grad_norm": 11.380533226391455, + "learning_rate": 6.09885545475245e-07, + "loss": 1.3117, + "step": 148296 + }, + { + "epoch": 1.78, + "grad_norm": 5.149802861617022, + "learning_rate": 6.096846709133552e-07, + "loss": 1.1253, + "step": 148299 + }, + { + "epoch": 1.78, + "grad_norm": 3.828881737023422, + "learning_rate": 6.094838283972293e-07, + "loss": 0.9706, + "step": 148302 + }, + { + "epoch": 1.78, + "grad_norm": 12.44663126557745, + "learning_rate": 6.092830179275544e-07, + "loss": 1.0479, + "step": 148305 + }, + { + "epoch": 1.78, + "grad_norm": 8.505153603530758, + "learning_rate": 6.090822395050133e-07, + "loss": 1.3194, + "step": 148308 + }, + { + "epoch": 1.78, + "grad_norm": 8.761747403578797, + "learning_rate": 6.088814931302911e-07, + "loss": 0.9438, + "step": 148311 + }, + { + "epoch": 1.78, + "grad_norm": 8.173237007499772, + "learning_rate": 6.086807788040738e-07, + "loss": 1.1508, + "step": 148314 + }, + { + "epoch": 1.78, + "grad_norm": 12.02193967543828, + "learning_rate": 6.084800965270476e-07, + "loss": 0.9071, + "step": 148317 + }, + { + "epoch": 1.78, + "grad_norm": 5.946713576948068, + "learning_rate": 6.082794462998964e-07, + "loss": 0.9304, + "step": 148320 + }, + { + "epoch": 1.78, + "grad_norm": 4.855003811240741, + "learning_rate": 6.080788281233019e-07, + "loss": 1.1329, + "step": 148323 + }, + { + "epoch": 1.78, + "grad_norm": 7.895143354534653, + "learning_rate": 6.078782419979523e-07, + "loss": 0.9509, + "step": 148326 + }, + { + "epoch": 1.78, + "grad_norm": 5.659724333494531, + "learning_rate": 6.076776879245317e-07, + "loss": 1.0012, + "step": 148329 + }, + { + "epoch": 1.78, + "grad_norm": 4.196855864586545, + "learning_rate": 6.074771659037248e-07, + "loss": 1.2862, + "step": 148332 + }, + { + "epoch": 1.78, + "grad_norm": 16.3339911821148, + "learning_rate": 6.072766759362148e-07, + "loss": 1.1099, + "step": 148335 + }, + { + "epoch": 1.78, + "grad_norm": 5.264640520565788, + "learning_rate": 6.070762180226864e-07, + "loss": 1.3935, + "step": 148338 + }, + { + "epoch": 1.78, + "grad_norm": 6.778873406823685, + "learning_rate": 6.068757921638224e-07, + "loss": 1.0989, + "step": 148341 + }, + { + "epoch": 1.78, + "grad_norm": 5.910153290993891, + "learning_rate": 6.066753983603102e-07, + "loss": 1.3385, + "step": 148344 + }, + { + "epoch": 1.78, + "grad_norm": 6.941951388707666, + "learning_rate": 6.064750366128291e-07, + "loss": 0.9559, + "step": 148347 + }, + { + "epoch": 1.78, + "grad_norm": 15.385122796414516, + "learning_rate": 6.062747069220653e-07, + "loss": 1.2055, + "step": 148350 + }, + { + "epoch": 1.78, + "grad_norm": 11.960918826075709, + "learning_rate": 6.060744092887027e-07, + "loss": 1.4439, + "step": 148353 + }, + { + "epoch": 1.78, + "grad_norm": 12.039940885533962, + "learning_rate": 6.058741437134241e-07, + "loss": 1.2493, + "step": 148356 + }, + { + "epoch": 1.78, + "grad_norm": 24.94673833555495, + "learning_rate": 6.056739101969156e-07, + "loss": 1.2853, + "step": 148359 + }, + { + "epoch": 1.78, + "grad_norm": 17.301383867905198, + "learning_rate": 6.054737087398566e-07, + "loss": 1.1799, + "step": 148362 + }, + { + "epoch": 1.78, + "grad_norm": 9.934688292530504, + "learning_rate": 6.052735393429321e-07, + "loss": 1.3871, + "step": 148365 + }, + { + "epoch": 1.78, + "grad_norm": 14.823669568869224, + "learning_rate": 6.050734020068238e-07, + "loss": 1.0055, + "step": 148368 + }, + { + "epoch": 1.78, + "grad_norm": 9.237990990143032, + "learning_rate": 6.04873296732218e-07, + "loss": 1.3733, + "step": 148371 + }, + { + "epoch": 1.78, + "grad_norm": 6.618464731043893, + "learning_rate": 6.046732235197938e-07, + "loss": 0.9646, + "step": 148374 + }, + { + "epoch": 1.78, + "grad_norm": 16.75836653067647, + "learning_rate": 6.044731823702365e-07, + "loss": 1.243, + "step": 148377 + }, + { + "epoch": 1.78, + "grad_norm": 10.20348034821249, + "learning_rate": 6.042731732842289e-07, + "loss": 1.2406, + "step": 148380 + }, + { + "epoch": 1.78, + "grad_norm": 8.845009156611995, + "learning_rate": 6.040731962624513e-07, + "loss": 0.9367, + "step": 148383 + }, + { + "epoch": 1.78, + "grad_norm": 5.661418764844572, + "learning_rate": 6.038732513055878e-07, + "loss": 1.3146, + "step": 148386 + }, + { + "epoch": 1.78, + "grad_norm": 25.121115784582848, + "learning_rate": 6.036733384143201e-07, + "loss": 1.4144, + "step": 148389 + }, + { + "epoch": 1.78, + "grad_norm": 9.040678976470712, + "learning_rate": 6.034734575893309e-07, + "loss": 1.5164, + "step": 148392 + }, + { + "epoch": 1.78, + "grad_norm": 13.169654990344682, + "learning_rate": 6.032736088313018e-07, + "loss": 1.0544, + "step": 148395 + }, + { + "epoch": 1.78, + "grad_norm": 4.441051499132571, + "learning_rate": 6.030737921409169e-07, + "loss": 1.2786, + "step": 148398 + }, + { + "epoch": 1.78, + "grad_norm": 3.2867258257438543, + "learning_rate": 6.028740075188544e-07, + "loss": 1.0392, + "step": 148401 + }, + { + "epoch": 1.78, + "grad_norm": 8.633856004463386, + "learning_rate": 6.026742549658005e-07, + "loss": 1.067, + "step": 148404 + }, + { + "epoch": 1.78, + "grad_norm": 7.821350462693459, + "learning_rate": 6.024745344824323e-07, + "loss": 1.4178, + "step": 148407 + }, + { + "epoch": 1.78, + "grad_norm": 7.359885268503396, + "learning_rate": 6.022748460694327e-07, + "loss": 1.4078, + "step": 148410 + }, + { + "epoch": 1.78, + "grad_norm": 10.749536526939934, + "learning_rate": 6.020751897274868e-07, + "loss": 1.0193, + "step": 148413 + }, + { + "epoch": 1.78, + "grad_norm": 7.526421388551332, + "learning_rate": 6.018755654572717e-07, + "loss": 1.0991, + "step": 148416 + }, + { + "epoch": 1.78, + "grad_norm": 7.382759502804376, + "learning_rate": 6.01675973259469e-07, + "loss": 1.514, + "step": 148419 + }, + { + "epoch": 1.78, + "grad_norm": 10.34762415204222, + "learning_rate": 6.014764131347639e-07, + "loss": 1.4148, + "step": 148422 + }, + { + "epoch": 1.78, + "grad_norm": 12.766522279797165, + "learning_rate": 6.012768850838336e-07, + "loss": 1.0341, + "step": 148425 + }, + { + "epoch": 1.78, + "grad_norm": 21.19749259520846, + "learning_rate": 6.010773891073584e-07, + "loss": 1.1178, + "step": 148428 + }, + { + "epoch": 1.78, + "grad_norm": 5.186075851966466, + "learning_rate": 6.008779252060204e-07, + "loss": 1.0998, + "step": 148431 + }, + { + "epoch": 1.78, + "grad_norm": 4.590460357188047, + "learning_rate": 6.00678493380501e-07, + "loss": 1.4795, + "step": 148434 + }, + { + "epoch": 1.78, + "grad_norm": 6.126322717159572, + "learning_rate": 6.004790936314797e-07, + "loss": 1.2507, + "step": 148437 + }, + { + "epoch": 1.78, + "grad_norm": 4.917604593971837, + "learning_rate": 6.002797259596393e-07, + "loss": 1.2057, + "step": 148440 + }, + { + "epoch": 1.78, + "grad_norm": 4.411556501261763, + "learning_rate": 6.000803903656571e-07, + "loss": 0.9058, + "step": 148443 + }, + { + "epoch": 1.79, + "grad_norm": 9.87437081754265, + "learning_rate": 5.998810868502169e-07, + "loss": 1.1115, + "step": 148446 + }, + { + "epoch": 1.79, + "grad_norm": 3.7639283552969713, + "learning_rate": 5.996818154139939e-07, + "loss": 1.4373, + "step": 148449 + }, + { + "epoch": 1.79, + "grad_norm": 17.19579141408469, + "learning_rate": 5.994825760576728e-07, + "loss": 0.9836, + "step": 148452 + }, + { + "epoch": 1.79, + "grad_norm": 11.202617581092861, + "learning_rate": 5.992833687819299e-07, + "loss": 1.398, + "step": 148455 + }, + { + "epoch": 1.79, + "grad_norm": 7.880122798566587, + "learning_rate": 5.990841935874469e-07, + "loss": 1.1151, + "step": 148458 + }, + { + "epoch": 1.79, + "grad_norm": 6.285090290441432, + "learning_rate": 5.988850504749045e-07, + "loss": 0.8988, + "step": 148461 + }, + { + "epoch": 1.79, + "grad_norm": 22.13677276053734, + "learning_rate": 5.986859394449806e-07, + "loss": 0.9026, + "step": 148464 + }, + { + "epoch": 1.79, + "grad_norm": 29.515112686085423, + "learning_rate": 5.984868604983552e-07, + "loss": 1.2004, + "step": 148467 + }, + { + "epoch": 1.79, + "grad_norm": 22.807879167982083, + "learning_rate": 5.982878136357084e-07, + "loss": 0.9817, + "step": 148470 + }, + { + "epoch": 1.79, + "grad_norm": 2.0664224591820433, + "learning_rate": 5.980887988577178e-07, + "loss": 1.3767, + "step": 148473 + }, + { + "epoch": 1.79, + "grad_norm": 10.330205319183392, + "learning_rate": 5.978898161650637e-07, + "loss": 1.0412, + "step": 148476 + }, + { + "epoch": 1.79, + "grad_norm": 8.16808676104649, + "learning_rate": 5.976908655584268e-07, + "loss": 1.0105, + "step": 148479 + }, + { + "epoch": 1.79, + "grad_norm": 5.850389458361891, + "learning_rate": 5.974919470384821e-07, + "loss": 1.0172, + "step": 148482 + }, + { + "epoch": 1.79, + "grad_norm": 11.856920880265344, + "learning_rate": 5.972930606059113e-07, + "loss": 1.1414, + "step": 148485 + }, + { + "epoch": 1.79, + "grad_norm": 3.01602025185943, + "learning_rate": 5.970942062613927e-07, + "loss": 1.4055, + "step": 148488 + }, + { + "epoch": 1.79, + "grad_norm": 14.523376142580918, + "learning_rate": 5.968953840056046e-07, + "loss": 1.2845, + "step": 148491 + }, + { + "epoch": 1.79, + "grad_norm": 3.7049954710701183, + "learning_rate": 5.966965938392267e-07, + "loss": 1.4468, + "step": 148494 + }, + { + "epoch": 1.79, + "grad_norm": 3.983213769238862, + "learning_rate": 5.964978357629336e-07, + "loss": 1.2509, + "step": 148497 + }, + { + "epoch": 1.79, + "grad_norm": 3.8277656098734965, + "learning_rate": 5.962991097774073e-07, + "loss": 1.6746, + "step": 148500 + }, + { + "epoch": 1.79, + "grad_norm": 16.865818067270002, + "learning_rate": 5.96100415883325e-07, + "loss": 1.2909, + "step": 148503 + }, + { + "epoch": 1.79, + "grad_norm": 6.98022308406983, + "learning_rate": 5.959017540813661e-07, + "loss": 1.2587, + "step": 148506 + }, + { + "epoch": 1.79, + "grad_norm": 4.09644825867654, + "learning_rate": 5.957031243722055e-07, + "loss": 1.4619, + "step": 148509 + }, + { + "epoch": 1.79, + "grad_norm": 15.158723507519618, + "learning_rate": 5.95504526756524e-07, + "loss": 1.0923, + "step": 148512 + }, + { + "epoch": 1.79, + "grad_norm": 3.8295608660265334, + "learning_rate": 5.953059612349965e-07, + "loss": 1.4862, + "step": 148515 + }, + { + "epoch": 1.79, + "grad_norm": 10.52281780015147, + "learning_rate": 5.951074278083035e-07, + "loss": 1.2502, + "step": 148518 + }, + { + "epoch": 1.79, + "grad_norm": 9.417703643224023, + "learning_rate": 5.949089264771213e-07, + "loss": 1.3156, + "step": 148521 + }, + { + "epoch": 1.79, + "grad_norm": 4.372142606684401, + "learning_rate": 5.947104572421259e-07, + "loss": 1.3839, + "step": 148524 + }, + { + "epoch": 1.79, + "grad_norm": 5.30526212883124, + "learning_rate": 5.945120201039967e-07, + "loss": 0.8552, + "step": 148527 + }, + { + "epoch": 1.79, + "grad_norm": 7.818027059649123, + "learning_rate": 5.94313615063411e-07, + "loss": 1.0662, + "step": 148530 + }, + { + "epoch": 1.79, + "grad_norm": 6.800259151850808, + "learning_rate": 5.94115242121045e-07, + "loss": 0.9287, + "step": 148533 + }, + { + "epoch": 1.79, + "grad_norm": 17.72926303997828, + "learning_rate": 5.939169012775748e-07, + "loss": 1.1917, + "step": 148536 + }, + { + "epoch": 1.79, + "grad_norm": 5.352009255909585, + "learning_rate": 5.937185925336775e-07, + "loss": 0.83, + "step": 148539 + }, + { + "epoch": 1.79, + "grad_norm": 4.506191191252782, + "learning_rate": 5.935203158900315e-07, + "loss": 1.2369, + "step": 148542 + }, + { + "epoch": 1.79, + "grad_norm": 11.429762923034172, + "learning_rate": 5.93322071347311e-07, + "loss": 1.0487, + "step": 148545 + }, + { + "epoch": 1.79, + "grad_norm": 5.002047022782273, + "learning_rate": 5.931238589061961e-07, + "loss": 1.1913, + "step": 148548 + }, + { + "epoch": 1.79, + "grad_norm": 4.954452817405064, + "learning_rate": 5.929256785673599e-07, + "loss": 1.1212, + "step": 148551 + }, + { + "epoch": 1.79, + "grad_norm": 12.552873032028899, + "learning_rate": 5.927275303314817e-07, + "loss": 1.1797, + "step": 148554 + }, + { + "epoch": 1.79, + "grad_norm": 7.380139488079509, + "learning_rate": 5.925294141992343e-07, + "loss": 0.795, + "step": 148557 + }, + { + "epoch": 1.79, + "grad_norm": 10.459038327995483, + "learning_rate": 5.923313301712963e-07, + "loss": 1.2443, + "step": 148560 + }, + { + "epoch": 1.79, + "grad_norm": 9.776818473447427, + "learning_rate": 5.921332782483425e-07, + "loss": 0.9184, + "step": 148563 + }, + { + "epoch": 1.79, + "grad_norm": 12.447281091746085, + "learning_rate": 5.919352584310479e-07, + "loss": 1.1828, + "step": 148566 + }, + { + "epoch": 1.79, + "grad_norm": 6.47382164248713, + "learning_rate": 5.917372707200897e-07, + "loss": 1.0229, + "step": 148569 + }, + { + "epoch": 1.79, + "grad_norm": 13.530263137251673, + "learning_rate": 5.915393151161452e-07, + "loss": 1.2752, + "step": 148572 + }, + { + "epoch": 1.79, + "grad_norm": 16.85047430032312, + "learning_rate": 5.913413916198884e-07, + "loss": 1.105, + "step": 148575 + }, + { + "epoch": 1.79, + "grad_norm": 6.975139661403966, + "learning_rate": 5.91143500231992e-07, + "loss": 0.8582, + "step": 148578 + }, + { + "epoch": 1.79, + "grad_norm": 3.1885188057390743, + "learning_rate": 5.909456409531344e-07, + "loss": 1.0831, + "step": 148581 + }, + { + "epoch": 1.79, + "grad_norm": 3.291245140556388, + "learning_rate": 5.907478137839906e-07, + "loss": 1.204, + "step": 148584 + }, + { + "epoch": 1.79, + "grad_norm": 12.144659280276567, + "learning_rate": 5.905500187252367e-07, + "loss": 1.2312, + "step": 148587 + }, + { + "epoch": 1.79, + "grad_norm": 7.438002973363617, + "learning_rate": 5.903522557775443e-07, + "loss": 1.3735, + "step": 148590 + }, + { + "epoch": 1.79, + "grad_norm": 10.43259916479552, + "learning_rate": 5.901545249415908e-07, + "loss": 1.1125, + "step": 148593 + }, + { + "epoch": 1.79, + "grad_norm": 5.294141340749488, + "learning_rate": 5.899568262180511e-07, + "loss": 1.032, + "step": 148596 + }, + { + "epoch": 1.79, + "grad_norm": 8.648071729907281, + "learning_rate": 5.897591596075991e-07, + "loss": 1.1078, + "step": 148599 + }, + { + "epoch": 1.79, + "grad_norm": 2.1702734300440025, + "learning_rate": 5.895615251109099e-07, + "loss": 1.1037, + "step": 148602 + }, + { + "epoch": 1.79, + "grad_norm": 20.436354884032976, + "learning_rate": 5.893639227286563e-07, + "loss": 1.0899, + "step": 148605 + }, + { + "epoch": 1.79, + "grad_norm": 12.92899485241211, + "learning_rate": 5.891663524615143e-07, + "loss": 1.0664, + "step": 148608 + }, + { + "epoch": 1.79, + "grad_norm": 7.6346883440673246, + "learning_rate": 5.88968814310158e-07, + "loss": 1.1703, + "step": 148611 + }, + { + "epoch": 1.79, + "grad_norm": 5.865032952722396, + "learning_rate": 5.887713082752622e-07, + "loss": 1.1272, + "step": 148614 + }, + { + "epoch": 1.79, + "grad_norm": 6.199956478086367, + "learning_rate": 5.885738343574998e-07, + "loss": 1.0905, + "step": 148617 + }, + { + "epoch": 1.79, + "grad_norm": 3.208257329704085, + "learning_rate": 5.883763925575436e-07, + "loss": 1.102, + "step": 148620 + }, + { + "epoch": 1.79, + "grad_norm": 4.573106689643201, + "learning_rate": 5.881789828760698e-07, + "loss": 1.3234, + "step": 148623 + }, + { + "epoch": 1.79, + "grad_norm": 7.099009274527704, + "learning_rate": 5.879816053137499e-07, + "loss": 1.0299, + "step": 148626 + }, + { + "epoch": 1.79, + "grad_norm": 14.177910177428265, + "learning_rate": 5.877842598712602e-07, + "loss": 1.2652, + "step": 148629 + }, + { + "epoch": 1.79, + "grad_norm": 3.9054350795668724, + "learning_rate": 5.875869465492712e-07, + "loss": 0.7763, + "step": 148632 + }, + { + "epoch": 1.79, + "grad_norm": 11.188956539159895, + "learning_rate": 5.873896653484579e-07, + "loss": 1.3257, + "step": 148635 + }, + { + "epoch": 1.79, + "grad_norm": 4.77264235688451, + "learning_rate": 5.871924162694942e-07, + "loss": 1.2129, + "step": 148638 + }, + { + "epoch": 1.79, + "grad_norm": 9.532197686647086, + "learning_rate": 5.869951993130529e-07, + "loss": 1.2532, + "step": 148641 + }, + { + "epoch": 1.79, + "grad_norm": 9.197208304183876, + "learning_rate": 5.867980144798047e-07, + "loss": 1.1271, + "step": 148644 + }, + { + "epoch": 1.79, + "grad_norm": 3.0902724857405537, + "learning_rate": 5.866008617704244e-07, + "loss": 1.0805, + "step": 148647 + }, + { + "epoch": 1.79, + "grad_norm": 15.385368175319442, + "learning_rate": 5.864037411855839e-07, + "loss": 0.9329, + "step": 148650 + }, + { + "epoch": 1.79, + "grad_norm": 15.255670941712062, + "learning_rate": 5.86206652725958e-07, + "loss": 1.0443, + "step": 148653 + }, + { + "epoch": 1.79, + "grad_norm": 6.402630528316573, + "learning_rate": 5.860095963922186e-07, + "loss": 1.2679, + "step": 148656 + }, + { + "epoch": 1.79, + "grad_norm": 4.044662600828459, + "learning_rate": 5.858125721850371e-07, + "loss": 1.1502, + "step": 148659 + }, + { + "epoch": 1.79, + "grad_norm": 3.2475348714257133, + "learning_rate": 5.856155801050856e-07, + "loss": 0.9693, + "step": 148662 + }, + { + "epoch": 1.79, + "grad_norm": 8.429685830226187, + "learning_rate": 5.854186201530365e-07, + "loss": 1.4657, + "step": 148665 + }, + { + "epoch": 1.79, + "grad_norm": 3.401041985783541, + "learning_rate": 5.85221692329565e-07, + "loss": 1.3545, + "step": 148668 + }, + { + "epoch": 1.79, + "grad_norm": 3.0082733723597537, + "learning_rate": 5.850247966353384e-07, + "loss": 1.3641, + "step": 148671 + }, + { + "epoch": 1.79, + "grad_norm": 13.971789352990685, + "learning_rate": 5.848279330710315e-07, + "loss": 1.3395, + "step": 148674 + }, + { + "epoch": 1.79, + "grad_norm": 2.9341974209613078, + "learning_rate": 5.846311016373151e-07, + "loss": 1.0971, + "step": 148677 + }, + { + "epoch": 1.79, + "grad_norm": 12.879008827910111, + "learning_rate": 5.84434302334862e-07, + "loss": 1.6565, + "step": 148680 + }, + { + "epoch": 1.79, + "grad_norm": 4.556412124181943, + "learning_rate": 5.842375351643448e-07, + "loss": 0.8832, + "step": 148683 + }, + { + "epoch": 1.79, + "grad_norm": 7.430974350289206, + "learning_rate": 5.840408001264308e-07, + "loss": 0.8209, + "step": 148686 + }, + { + "epoch": 1.79, + "grad_norm": 7.63128565389161, + "learning_rate": 5.83844097221794e-07, + "loss": 1.3459, + "step": 148689 + }, + { + "epoch": 1.79, + "grad_norm": 5.478846886464487, + "learning_rate": 5.83647426451106e-07, + "loss": 0.9447, + "step": 148692 + }, + { + "epoch": 1.79, + "grad_norm": 1.9903172288271616, + "learning_rate": 5.834507878150386e-07, + "loss": 0.9324, + "step": 148695 + }, + { + "epoch": 1.79, + "grad_norm": 12.83793950628557, + "learning_rate": 5.832541813142612e-07, + "loss": 1.1387, + "step": 148698 + }, + { + "epoch": 1.79, + "grad_norm": 6.936690090827359, + "learning_rate": 5.830576069494465e-07, + "loss": 0.7583, + "step": 148701 + }, + { + "epoch": 1.79, + "grad_norm": 24.751298745725155, + "learning_rate": 5.828610647212619e-07, + "loss": 1.2659, + "step": 148704 + }, + { + "epoch": 1.79, + "grad_norm": 6.872729164778975, + "learning_rate": 5.826645546303811e-07, + "loss": 0.9817, + "step": 148707 + }, + { + "epoch": 1.79, + "grad_norm": 11.508741063318297, + "learning_rate": 5.82468076677476e-07, + "loss": 1.4641, + "step": 148710 + }, + { + "epoch": 1.79, + "grad_norm": 15.916180836755109, + "learning_rate": 5.822716308632126e-07, + "loss": 1.192, + "step": 148713 + }, + { + "epoch": 1.79, + "grad_norm": 9.381534488464355, + "learning_rate": 5.820752171882649e-07, + "loss": 1.2243, + "step": 148716 + }, + { + "epoch": 1.79, + "grad_norm": 2.7824252092304707, + "learning_rate": 5.818788356533011e-07, + "loss": 1.1927, + "step": 148719 + }, + { + "epoch": 1.79, + "grad_norm": 5.018841020222243, + "learning_rate": 5.816824862589953e-07, + "loss": 1.0582, + "step": 148722 + }, + { + "epoch": 1.79, + "grad_norm": 7.339759026710867, + "learning_rate": 5.814861690060136e-07, + "loss": 1.2493, + "step": 148725 + }, + { + "epoch": 1.79, + "grad_norm": 10.23016944452534, + "learning_rate": 5.812898838950265e-07, + "loss": 1.2373, + "step": 148728 + }, + { + "epoch": 1.79, + "grad_norm": 11.986836175377393, + "learning_rate": 5.810936309267035e-07, + "loss": 1.2658, + "step": 148731 + }, + { + "epoch": 1.79, + "grad_norm": 9.478526786188286, + "learning_rate": 5.808974101017162e-07, + "loss": 0.9561, + "step": 148734 + }, + { + "epoch": 1.79, + "grad_norm": 5.700069045112621, + "learning_rate": 5.807012214207342e-07, + "loss": 1.2061, + "step": 148737 + }, + { + "epoch": 1.79, + "grad_norm": 3.02724880306772, + "learning_rate": 5.805050648844246e-07, + "loss": 0.7948, + "step": 148740 + }, + { + "epoch": 1.79, + "grad_norm": 8.03491710221246, + "learning_rate": 5.803089404934592e-07, + "loss": 1.5443, + "step": 148743 + }, + { + "epoch": 1.79, + "grad_norm": 5.065812432013789, + "learning_rate": 5.801128482485063e-07, + "loss": 1.1551, + "step": 148746 + }, + { + "epoch": 1.79, + "grad_norm": 7.004562059811615, + "learning_rate": 5.799167881502354e-07, + "loss": 1.0937, + "step": 148749 + }, + { + "epoch": 1.79, + "grad_norm": 2.7159468562122258, + "learning_rate": 5.797207601993149e-07, + "loss": 1.1957, + "step": 148752 + }, + { + "epoch": 1.79, + "grad_norm": 16.388557295568056, + "learning_rate": 5.795247643964141e-07, + "loss": 1.2218, + "step": 148755 + }, + { + "epoch": 1.79, + "grad_norm": 3.8997212851877197, + "learning_rate": 5.793288007422016e-07, + "loss": 0.8805, + "step": 148758 + }, + { + "epoch": 1.79, + "grad_norm": 5.792212413200385, + "learning_rate": 5.791328692373466e-07, + "loss": 1.044, + "step": 148761 + }, + { + "epoch": 1.79, + "grad_norm": 8.266120137346574, + "learning_rate": 5.789369698825197e-07, + "loss": 1.0295, + "step": 148764 + }, + { + "epoch": 1.79, + "grad_norm": 18.03160696586772, + "learning_rate": 5.787411026783874e-07, + "loss": 1.2004, + "step": 148767 + }, + { + "epoch": 1.79, + "grad_norm": 10.590298751125856, + "learning_rate": 5.785452676256165e-07, + "loss": 1.107, + "step": 148770 + }, + { + "epoch": 1.79, + "grad_norm": 8.489614475446079, + "learning_rate": 5.783494647248778e-07, + "loss": 1.1893, + "step": 148773 + }, + { + "epoch": 1.79, + "grad_norm": 4.553599618387934, + "learning_rate": 5.781536939768396e-07, + "loss": 1.1665, + "step": 148776 + }, + { + "epoch": 1.79, + "grad_norm": 23.788665999000262, + "learning_rate": 5.779579553821668e-07, + "loss": 1.229, + "step": 148779 + }, + { + "epoch": 1.79, + "grad_norm": 3.4985481069690922, + "learning_rate": 5.777622489415302e-07, + "loss": 0.9204, + "step": 148782 + }, + { + "epoch": 1.79, + "grad_norm": 11.287502243375851, + "learning_rate": 5.775665746555992e-07, + "loss": 1.0562, + "step": 148785 + }, + { + "epoch": 1.79, + "grad_norm": 8.715586336658598, + "learning_rate": 5.773709325250376e-07, + "loss": 0.8224, + "step": 148788 + }, + { + "epoch": 1.79, + "grad_norm": 4.318453166533004, + "learning_rate": 5.771753225505162e-07, + "loss": 0.9595, + "step": 148791 + }, + { + "epoch": 1.79, + "grad_norm": 3.3588917721186426, + "learning_rate": 5.769797447327008e-07, + "loss": 1.1587, + "step": 148794 + }, + { + "epoch": 1.79, + "grad_norm": 8.738843584169922, + "learning_rate": 5.767841990722589e-07, + "loss": 1.1028, + "step": 148797 + }, + { + "epoch": 1.79, + "grad_norm": 7.331403705166693, + "learning_rate": 5.765886855698577e-07, + "loss": 1.2375, + "step": 148800 + }, + { + "epoch": 1.79, + "grad_norm": 11.604904506276956, + "learning_rate": 5.763932042261678e-07, + "loss": 1.013, + "step": 148803 + }, + { + "epoch": 1.79, + "grad_norm": 6.754239250395466, + "learning_rate": 5.761977550418507e-07, + "loss": 1.4212, + "step": 148806 + }, + { + "epoch": 1.79, + "grad_norm": 5.741169112357375, + "learning_rate": 5.760023380175783e-07, + "loss": 1.6659, + "step": 148809 + }, + { + "epoch": 1.79, + "grad_norm": 9.71297896659618, + "learning_rate": 5.758069531540145e-07, + "loss": 1.1855, + "step": 148812 + }, + { + "epoch": 1.79, + "grad_norm": 3.923540119806008, + "learning_rate": 5.756116004518253e-07, + "loss": 1.3094, + "step": 148815 + }, + { + "epoch": 1.79, + "grad_norm": 6.3265349265154445, + "learning_rate": 5.754162799116824e-07, + "loss": 1.0031, + "step": 148818 + }, + { + "epoch": 1.79, + "grad_norm": 4.236390296815597, + "learning_rate": 5.752209915342466e-07, + "loss": 0.8238, + "step": 148821 + }, + { + "epoch": 1.79, + "grad_norm": 5.927248557932242, + "learning_rate": 5.750257353201871e-07, + "loss": 1.2071, + "step": 148824 + }, + { + "epoch": 1.79, + "grad_norm": 3.7550701722067377, + "learning_rate": 5.748305112701691e-07, + "loss": 0.9147, + "step": 148827 + }, + { + "epoch": 1.79, + "grad_norm": 8.15438224860585, + "learning_rate": 5.74635319384862e-07, + "loss": 1.0348, + "step": 148830 + }, + { + "epoch": 1.79, + "grad_norm": 10.04184861379161, + "learning_rate": 5.744401596649296e-07, + "loss": 1.6708, + "step": 148833 + }, + { + "epoch": 1.79, + "grad_norm": 7.761924433580044, + "learning_rate": 5.74245032111036e-07, + "loss": 1.3638, + "step": 148836 + }, + { + "epoch": 1.79, + "grad_norm": 11.015426926778714, + "learning_rate": 5.740499367238495e-07, + "loss": 1.1483, + "step": 148839 + }, + { + "epoch": 1.79, + "grad_norm": 6.465265407731487, + "learning_rate": 5.738548735040339e-07, + "loss": 1.1431, + "step": 148842 + }, + { + "epoch": 1.79, + "grad_norm": 14.769912441844651, + "learning_rate": 5.736598424522588e-07, + "loss": 1.137, + "step": 148845 + }, + { + "epoch": 1.79, + "grad_norm": 2.510297029989517, + "learning_rate": 5.734648435691859e-07, + "loss": 1.0992, + "step": 148848 + }, + { + "epoch": 1.79, + "grad_norm": 8.20066612926376, + "learning_rate": 5.732698768554834e-07, + "loss": 1.3198, + "step": 148851 + }, + { + "epoch": 1.79, + "grad_norm": 6.8667013498766, + "learning_rate": 5.730749423118142e-07, + "loss": 1.2367, + "step": 148854 + }, + { + "epoch": 1.79, + "grad_norm": 14.934587011251171, + "learning_rate": 5.728800399388457e-07, + "loss": 0.8702, + "step": 148857 + }, + { + "epoch": 1.79, + "grad_norm": 9.10265595364905, + "learning_rate": 5.726851697372415e-07, + "loss": 1.0808, + "step": 148860 + }, + { + "epoch": 1.79, + "grad_norm": 5.728479381346559, + "learning_rate": 5.724903317076657e-07, + "loss": 1.2702, + "step": 148863 + }, + { + "epoch": 1.79, + "grad_norm": 12.707692896591563, + "learning_rate": 5.722955258507856e-07, + "loss": 1.4065, + "step": 148866 + }, + { + "epoch": 1.79, + "grad_norm": 8.161090636446612, + "learning_rate": 5.72100752167265e-07, + "loss": 1.0298, + "step": 148869 + }, + { + "epoch": 1.79, + "grad_norm": 9.269657033406828, + "learning_rate": 5.719060106577689e-07, + "loss": 1.1523, + "step": 148872 + }, + { + "epoch": 1.79, + "grad_norm": 9.487494294332887, + "learning_rate": 5.717113013229625e-07, + "loss": 1.1694, + "step": 148875 + }, + { + "epoch": 1.79, + "grad_norm": 7.585921849851568, + "learning_rate": 5.715166241635084e-07, + "loss": 0.8696, + "step": 148878 + }, + { + "epoch": 1.79, + "grad_norm": 11.405667958730303, + "learning_rate": 5.713219791800706e-07, + "loss": 1.0315, + "step": 148881 + }, + { + "epoch": 1.79, + "grad_norm": 14.840228300082167, + "learning_rate": 5.711273663733175e-07, + "loss": 1.2975, + "step": 148884 + }, + { + "epoch": 1.79, + "grad_norm": 5.996467883863775, + "learning_rate": 5.709327857439084e-07, + "loss": 1.1354, + "step": 148887 + }, + { + "epoch": 1.79, + "grad_norm": 3.744036949745273, + "learning_rate": 5.707382372925097e-07, + "loss": 1.0447, + "step": 148890 + }, + { + "epoch": 1.79, + "grad_norm": 5.942700123122853, + "learning_rate": 5.705437210197861e-07, + "loss": 1.0962, + "step": 148893 + }, + { + "epoch": 1.79, + "grad_norm": 4.933623651947718, + "learning_rate": 5.703492369263985e-07, + "loss": 1.8597, + "step": 148896 + }, + { + "epoch": 1.79, + "grad_norm": 7.249658987375176, + "learning_rate": 5.70154785013014e-07, + "loss": 1.293, + "step": 148899 + }, + { + "epoch": 1.79, + "grad_norm": 17.426976801243143, + "learning_rate": 5.699603652802932e-07, + "loss": 1.1138, + "step": 148902 + }, + { + "epoch": 1.79, + "grad_norm": 6.128769740142131, + "learning_rate": 5.697659777289022e-07, + "loss": 1.2718, + "step": 148905 + }, + { + "epoch": 1.79, + "grad_norm": 6.017807459327553, + "learning_rate": 5.695716223595016e-07, + "loss": 1.1488, + "step": 148908 + }, + { + "epoch": 1.79, + "grad_norm": 6.400628970539116, + "learning_rate": 5.693772991727586e-07, + "loss": 1.0094, + "step": 148911 + }, + { + "epoch": 1.79, + "grad_norm": 13.822349245892616, + "learning_rate": 5.691830081693317e-07, + "loss": 0.9832, + "step": 148914 + }, + { + "epoch": 1.79, + "grad_norm": 7.582744099045812, + "learning_rate": 5.689887493498881e-07, + "loss": 1.2115, + "step": 148917 + }, + { + "epoch": 1.79, + "grad_norm": 11.255200162140085, + "learning_rate": 5.68794522715087e-07, + "loss": 1.3501, + "step": 148920 + }, + { + "epoch": 1.79, + "grad_norm": 8.24977105636111, + "learning_rate": 5.686003282655938e-07, + "loss": 0.995, + "step": 148923 + }, + { + "epoch": 1.79, + "grad_norm": 3.354758733585135, + "learning_rate": 5.684061660020723e-07, + "loss": 0.8944, + "step": 148926 + }, + { + "epoch": 1.79, + "grad_norm": 3.3256914938794004, + "learning_rate": 5.682120359251808e-07, + "loss": 1.3026, + "step": 148929 + }, + { + "epoch": 1.79, + "grad_norm": 16.302331598341695, + "learning_rate": 5.680179380355843e-07, + "loss": 1.1444, + "step": 148932 + }, + { + "epoch": 1.79, + "grad_norm": 4.763394554708063, + "learning_rate": 5.67823872333948e-07, + "loss": 1.2137, + "step": 148935 + }, + { + "epoch": 1.79, + "grad_norm": 2.942949612193414, + "learning_rate": 5.6762983882093e-07, + "loss": 0.9142, + "step": 148938 + }, + { + "epoch": 1.79, + "grad_norm": 5.787380179049399, + "learning_rate": 5.674358374971933e-07, + "loss": 1.1518, + "step": 148941 + }, + { + "epoch": 1.79, + "grad_norm": 17.417593384808338, + "learning_rate": 5.672418683633996e-07, + "loss": 1.5714, + "step": 148944 + }, + { + "epoch": 1.79, + "grad_norm": 5.1161731734778275, + "learning_rate": 5.670479314202127e-07, + "loss": 1.3402, + "step": 148947 + }, + { + "epoch": 1.79, + "grad_norm": 23.821946137290634, + "learning_rate": 5.668540266682931e-07, + "loss": 1.1782, + "step": 148950 + }, + { + "epoch": 1.79, + "grad_norm": 8.985643994450664, + "learning_rate": 5.666601541083039e-07, + "loss": 0.9882, + "step": 148953 + }, + { + "epoch": 1.79, + "grad_norm": 13.045923702622561, + "learning_rate": 5.664663137409044e-07, + "loss": 1.0052, + "step": 148956 + }, + { + "epoch": 1.79, + "grad_norm": 4.944564306098655, + "learning_rate": 5.662725055667595e-07, + "loss": 1.2409, + "step": 148959 + }, + { + "epoch": 1.79, + "grad_norm": 10.103896583063918, + "learning_rate": 5.660787295865267e-07, + "loss": 1.0237, + "step": 148962 + }, + { + "epoch": 1.79, + "grad_norm": 7.528267971488326, + "learning_rate": 5.658849858008697e-07, + "loss": 1.0739, + "step": 148965 + }, + { + "epoch": 1.79, + "grad_norm": 7.380595449271456, + "learning_rate": 5.656912742104492e-07, + "loss": 1.3039, + "step": 148968 + }, + { + "epoch": 1.79, + "grad_norm": 5.596041851425391, + "learning_rate": 5.654975948159247e-07, + "loss": 0.8334, + "step": 148971 + }, + { + "epoch": 1.79, + "grad_norm": 7.161877777964584, + "learning_rate": 5.6530394761796e-07, + "loss": 0.9626, + "step": 148974 + }, + { + "epoch": 1.79, + "grad_norm": 2.7800028424648047, + "learning_rate": 5.651103326172147e-07, + "loss": 1.3565, + "step": 148977 + }, + { + "epoch": 1.79, + "grad_norm": 7.246295516094604, + "learning_rate": 5.649167498143504e-07, + "loss": 1.1029, + "step": 148980 + }, + { + "epoch": 1.79, + "grad_norm": 15.84430226944019, + "learning_rate": 5.647231992100244e-07, + "loss": 0.833, + "step": 148983 + }, + { + "epoch": 1.79, + "grad_norm": 6.44185850173089, + "learning_rate": 5.645296808049005e-07, + "loss": 1.2118, + "step": 148986 + }, + { + "epoch": 1.79, + "grad_norm": 2.8438718507105967, + "learning_rate": 5.643361945996373e-07, + "loss": 1.1235, + "step": 148989 + }, + { + "epoch": 1.79, + "grad_norm": 7.067665984049735, + "learning_rate": 5.641427405948985e-07, + "loss": 1.0337, + "step": 148992 + }, + { + "epoch": 1.79, + "grad_norm": 5.484730646378707, + "learning_rate": 5.639493187913391e-07, + "loss": 1.2318, + "step": 148995 + }, + { + "epoch": 1.79, + "grad_norm": 6.064853190928157, + "learning_rate": 5.637559291896233e-07, + "loss": 1.2546, + "step": 148998 + }, + { + "epoch": 1.79, + "grad_norm": 6.091956327367467, + "learning_rate": 5.635625717904092e-07, + "loss": 1.169, + "step": 149001 + }, + { + "epoch": 1.79, + "grad_norm": 5.558936219476097, + "learning_rate": 5.633692465943564e-07, + "loss": 1.2276, + "step": 149004 + }, + { + "epoch": 1.79, + "grad_norm": 10.477182790552742, + "learning_rate": 5.631759536021275e-07, + "loss": 1.0877, + "step": 149007 + }, + { + "epoch": 1.79, + "grad_norm": 3.958753887521042, + "learning_rate": 5.629826928143778e-07, + "loss": 0.6696, + "step": 149010 + }, + { + "epoch": 1.79, + "grad_norm": 5.262897132070371, + "learning_rate": 5.627894642317688e-07, + "loss": 1.0826, + "step": 149013 + }, + { + "epoch": 1.79, + "grad_norm": 3.001295756883179, + "learning_rate": 5.625962678549601e-07, + "loss": 1.4431, + "step": 149016 + }, + { + "epoch": 1.79, + "grad_norm": 6.71901491824168, + "learning_rate": 5.624031036846134e-07, + "loss": 0.8192, + "step": 149019 + }, + { + "epoch": 1.79, + "grad_norm": 8.670378492191132, + "learning_rate": 5.622099717213847e-07, + "loss": 1.1012, + "step": 149022 + }, + { + "epoch": 1.79, + "grad_norm": 12.660910068274893, + "learning_rate": 5.620168719659324e-07, + "loss": 1.072, + "step": 149025 + }, + { + "epoch": 1.79, + "grad_norm": 8.155484286542322, + "learning_rate": 5.618238044189172e-07, + "loss": 1.4475, + "step": 149028 + }, + { + "epoch": 1.79, + "grad_norm": 7.322946459529473, + "learning_rate": 5.616307690809975e-07, + "loss": 1.369, + "step": 149031 + }, + { + "epoch": 1.79, + "grad_norm": 7.22340580553501, + "learning_rate": 5.614377659528336e-07, + "loss": 1.1113, + "step": 149034 + }, + { + "epoch": 1.79, + "grad_norm": 3.0647837447792066, + "learning_rate": 5.612447950350819e-07, + "loss": 0.918, + "step": 149037 + }, + { + "epoch": 1.79, + "grad_norm": 9.683825906190417, + "learning_rate": 5.610518563284017e-07, + "loss": 1.0503, + "step": 149040 + }, + { + "epoch": 1.79, + "grad_norm": 11.73437186295426, + "learning_rate": 5.608589498334526e-07, + "loss": 1.3934, + "step": 149043 + }, + { + "epoch": 1.79, + "grad_norm": 5.763598301225669, + "learning_rate": 5.60666075550893e-07, + "loss": 1.5153, + "step": 149046 + }, + { + "epoch": 1.79, + "grad_norm": 5.154890664620463, + "learning_rate": 5.604732334813778e-07, + "loss": 1.1697, + "step": 149049 + }, + { + "epoch": 1.79, + "grad_norm": 9.623900856135965, + "learning_rate": 5.602804236255677e-07, + "loss": 1.4869, + "step": 149052 + }, + { + "epoch": 1.79, + "grad_norm": 8.157187226295347, + "learning_rate": 5.600876459841198e-07, + "loss": 0.81, + "step": 149055 + }, + { + "epoch": 1.79, + "grad_norm": 5.277066552455361, + "learning_rate": 5.598949005576926e-07, + "loss": 1.3242, + "step": 149058 + }, + { + "epoch": 1.79, + "grad_norm": 7.6778075384267215, + "learning_rate": 5.597021873469454e-07, + "loss": 1.1317, + "step": 149061 + }, + { + "epoch": 1.79, + "grad_norm": 8.909225720093305, + "learning_rate": 5.595095063525335e-07, + "loss": 1.4156, + "step": 149064 + }, + { + "epoch": 1.79, + "grad_norm": 5.791714772588131, + "learning_rate": 5.593168575751162e-07, + "loss": 1.1315, + "step": 149067 + }, + { + "epoch": 1.79, + "grad_norm": 7.09809890570969, + "learning_rate": 5.591242410153486e-07, + "loss": 0.863, + "step": 149070 + }, + { + "epoch": 1.79, + "grad_norm": 7.213737111756933, + "learning_rate": 5.5893165667389e-07, + "loss": 1.0739, + "step": 149073 + }, + { + "epoch": 1.79, + "grad_norm": 11.533652090057645, + "learning_rate": 5.587391045513968e-07, + "loss": 1.178, + "step": 149076 + }, + { + "epoch": 1.79, + "grad_norm": 36.985699786789716, + "learning_rate": 5.58546584648525e-07, + "loss": 1.0473, + "step": 149079 + }, + { + "epoch": 1.79, + "grad_norm": 18.619636071461674, + "learning_rate": 5.58354096965934e-07, + "loss": 1.6454, + "step": 149082 + }, + { + "epoch": 1.79, + "grad_norm": 7.227707270872747, + "learning_rate": 5.581616415042801e-07, + "loss": 1.02, + "step": 149085 + }, + { + "epoch": 1.79, + "grad_norm": 5.706517631606003, + "learning_rate": 5.579692182642204e-07, + "loss": 1.0834, + "step": 149088 + }, + { + "epoch": 1.79, + "grad_norm": 16.007889069203447, + "learning_rate": 5.577768272464091e-07, + "loss": 0.8833, + "step": 149091 + }, + { + "epoch": 1.79, + "grad_norm": 2.5300514362174638, + "learning_rate": 5.575844684515042e-07, + "loss": 1.2323, + "step": 149094 + }, + { + "epoch": 1.79, + "grad_norm": 7.299799674844353, + "learning_rate": 5.573921418801631e-07, + "loss": 0.7814, + "step": 149097 + }, + { + "epoch": 1.79, + "grad_norm": 8.024474130487791, + "learning_rate": 5.57199847533042e-07, + "loss": 1.1, + "step": 149100 + }, + { + "epoch": 1.79, + "grad_norm": 9.723412431726182, + "learning_rate": 5.570075854107948e-07, + "loss": 1.0686, + "step": 149103 + }, + { + "epoch": 1.79, + "grad_norm": 14.077016324690796, + "learning_rate": 5.568153555140799e-07, + "loss": 1.2458, + "step": 149106 + }, + { + "epoch": 1.79, + "grad_norm": 17.025906075424206, + "learning_rate": 5.566231578435543e-07, + "loss": 1.0276, + "step": 149109 + }, + { + "epoch": 1.79, + "grad_norm": 5.010677006098038, + "learning_rate": 5.564309923998712e-07, + "loss": 1.2996, + "step": 149112 + }, + { + "epoch": 1.79, + "grad_norm": 3.005554214063664, + "learning_rate": 5.562388591836887e-07, + "loss": 1.1564, + "step": 149115 + }, + { + "epoch": 1.79, + "grad_norm": 3.976898513068265, + "learning_rate": 5.560467581956597e-07, + "loss": 1.3399, + "step": 149118 + }, + { + "epoch": 1.79, + "grad_norm": 13.676495616372357, + "learning_rate": 5.558546894364414e-07, + "loss": 1.0062, + "step": 149121 + }, + { + "epoch": 1.79, + "grad_norm": 5.100508977710207, + "learning_rate": 5.5566265290669e-07, + "loss": 1.0172, + "step": 149124 + }, + { + "epoch": 1.79, + "grad_norm": 15.197368032756339, + "learning_rate": 5.554706486070604e-07, + "loss": 1.1545, + "step": 149127 + }, + { + "epoch": 1.79, + "grad_norm": 1.9313124268850999, + "learning_rate": 5.55278676538209e-07, + "loss": 0.9634, + "step": 149130 + }, + { + "epoch": 1.79, + "grad_norm": 4.239370024348915, + "learning_rate": 5.550867367007873e-07, + "loss": 1.1678, + "step": 149133 + }, + { + "epoch": 1.79, + "grad_norm": 3.101749156730343, + "learning_rate": 5.548948290954525e-07, + "loss": 1.3608, + "step": 149136 + }, + { + "epoch": 1.79, + "grad_norm": 2.743044970365898, + "learning_rate": 5.547029537228587e-07, + "loss": 1.1807, + "step": 149139 + }, + { + "epoch": 1.79, + "grad_norm": 4.321857818515648, + "learning_rate": 5.545111105836643e-07, + "loss": 1.0555, + "step": 149142 + }, + { + "epoch": 1.79, + "grad_norm": 6.559212169132704, + "learning_rate": 5.543192996785196e-07, + "loss": 1.1105, + "step": 149145 + }, + { + "epoch": 1.79, + "grad_norm": 8.494351256515692, + "learning_rate": 5.541275210080799e-07, + "loss": 1.2521, + "step": 149148 + }, + { + "epoch": 1.79, + "grad_norm": 7.805598210941945, + "learning_rate": 5.539357745730034e-07, + "loss": 0.9147, + "step": 149151 + }, + { + "epoch": 1.79, + "grad_norm": 6.434522029839423, + "learning_rate": 5.537440603739408e-07, + "loss": 1.13, + "step": 149154 + }, + { + "epoch": 1.79, + "grad_norm": 8.313554683807377, + "learning_rate": 5.535523784115459e-07, + "loss": 1.3297, + "step": 149157 + }, + { + "epoch": 1.79, + "grad_norm": 3.9962450045611573, + "learning_rate": 5.53360728686474e-07, + "loss": 0.9482, + "step": 149160 + }, + { + "epoch": 1.79, + "grad_norm": 11.628922577881829, + "learning_rate": 5.531691111993787e-07, + "loss": 0.9794, + "step": 149163 + }, + { + "epoch": 1.79, + "grad_norm": 25.37419357693185, + "learning_rate": 5.529775259509151e-07, + "loss": 1.0269, + "step": 149166 + }, + { + "epoch": 1.79, + "grad_norm": 5.201557029312615, + "learning_rate": 5.527859729417384e-07, + "loss": 1.2971, + "step": 149169 + }, + { + "epoch": 1.79, + "grad_norm": 1.9357005861704748, + "learning_rate": 5.525944521725002e-07, + "loss": 1.0089, + "step": 149172 + }, + { + "epoch": 1.79, + "grad_norm": 6.243810896652789, + "learning_rate": 5.524029636438521e-07, + "loss": 1.1249, + "step": 149175 + }, + { + "epoch": 1.79, + "grad_norm": 11.508340423822533, + "learning_rate": 5.522115073564494e-07, + "loss": 1.3275, + "step": 149178 + }, + { + "epoch": 1.79, + "grad_norm": 14.286805038186385, + "learning_rate": 5.520200833109479e-07, + "loss": 1.2954, + "step": 149181 + }, + { + "epoch": 1.79, + "grad_norm": 5.621137195542997, + "learning_rate": 5.518286915079974e-07, + "loss": 0.9745, + "step": 149184 + }, + { + "epoch": 1.79, + "grad_norm": 3.93681940744493, + "learning_rate": 5.516373319482515e-07, + "loss": 0.9944, + "step": 149187 + }, + { + "epoch": 1.79, + "grad_norm": 10.22665272076017, + "learning_rate": 5.514460046323645e-07, + "loss": 1.1098, + "step": 149190 + }, + { + "epoch": 1.79, + "grad_norm": 12.26292099640461, + "learning_rate": 5.51254709560991e-07, + "loss": 0.8893, + "step": 149193 + }, + { + "epoch": 1.79, + "grad_norm": 15.434004116894332, + "learning_rate": 5.510634467347808e-07, + "loss": 1.0413, + "step": 149196 + }, + { + "epoch": 1.79, + "grad_norm": 11.523017190417741, + "learning_rate": 5.508722161543867e-07, + "loss": 1.1094, + "step": 149199 + }, + { + "epoch": 1.79, + "grad_norm": 15.214779671660743, + "learning_rate": 5.506810178204624e-07, + "loss": 1.4566, + "step": 149202 + }, + { + "epoch": 1.79, + "grad_norm": 3.8575195946489447, + "learning_rate": 5.504898517336599e-07, + "loss": 1.0545, + "step": 149205 + }, + { + "epoch": 1.79, + "grad_norm": 7.135494746551682, + "learning_rate": 5.502987178946329e-07, + "loss": 1.0315, + "step": 149208 + }, + { + "epoch": 1.79, + "grad_norm": 7.64739783988851, + "learning_rate": 5.50107616304032e-07, + "loss": 1.1045, + "step": 149211 + }, + { + "epoch": 1.79, + "grad_norm": 6.373263691428895, + "learning_rate": 5.4991654696251e-07, + "loss": 1.2561, + "step": 149214 + }, + { + "epoch": 1.79, + "grad_norm": 5.634965183075895, + "learning_rate": 5.497255098707188e-07, + "loss": 1.0324, + "step": 149217 + }, + { + "epoch": 1.79, + "grad_norm": 8.525021219173086, + "learning_rate": 5.495345050293099e-07, + "loss": 0.8626, + "step": 149220 + }, + { + "epoch": 1.79, + "grad_norm": 4.896437669292727, + "learning_rate": 5.493435324389374e-07, + "loss": 1.1283, + "step": 149223 + }, + { + "epoch": 1.79, + "grad_norm": 6.260090779450039, + "learning_rate": 5.491525921002494e-07, + "loss": 1.3285, + "step": 149226 + }, + { + "epoch": 1.79, + "grad_norm": 8.278892959826749, + "learning_rate": 5.489616840139e-07, + "loss": 1.263, + "step": 149229 + }, + { + "epoch": 1.79, + "grad_norm": 8.668309033145528, + "learning_rate": 5.487708081805398e-07, + "loss": 1.1142, + "step": 149232 + }, + { + "epoch": 1.79, + "grad_norm": 3.28043598719344, + "learning_rate": 5.485799646008228e-07, + "loss": 1.167, + "step": 149235 + }, + { + "epoch": 1.79, + "grad_norm": 22.74418177491112, + "learning_rate": 5.483891532753971e-07, + "loss": 1.3648, + "step": 149238 + }, + { + "epoch": 1.79, + "grad_norm": 7.55984292218246, + "learning_rate": 5.481983742049135e-07, + "loss": 0.8332, + "step": 149241 + }, + { + "epoch": 1.79, + "grad_norm": 6.85576407369546, + "learning_rate": 5.480076273900236e-07, + "loss": 1.2225, + "step": 149244 + }, + { + "epoch": 1.79, + "grad_norm": 15.08640170675204, + "learning_rate": 5.478169128313804e-07, + "loss": 1.573, + "step": 149247 + }, + { + "epoch": 1.79, + "grad_norm": 6.486804633677713, + "learning_rate": 5.476262305296343e-07, + "loss": 0.9813, + "step": 149250 + }, + { + "epoch": 1.79, + "grad_norm": 12.14940135866001, + "learning_rate": 5.474355804854325e-07, + "loss": 1.0604, + "step": 149253 + }, + { + "epoch": 1.79, + "grad_norm": 7.7222620211354425, + "learning_rate": 5.472449626994314e-07, + "loss": 1.224, + "step": 149256 + }, + { + "epoch": 1.79, + "grad_norm": 7.502420545647733, + "learning_rate": 5.470543771722758e-07, + "loss": 1.2319, + "step": 149259 + }, + { + "epoch": 1.79, + "grad_norm": 16.910168143873097, + "learning_rate": 5.468638239046209e-07, + "loss": 1.4941, + "step": 149262 + }, + { + "epoch": 1.79, + "grad_norm": 5.291145241400401, + "learning_rate": 5.466733028971138e-07, + "loss": 0.9935, + "step": 149265 + }, + { + "epoch": 1.79, + "grad_norm": 7.207232940565769, + "learning_rate": 5.464828141504041e-07, + "loss": 0.9342, + "step": 149268 + }, + { + "epoch": 1.79, + "grad_norm": 4.130154169672305, + "learning_rate": 5.462923576651447e-07, + "loss": 1.1204, + "step": 149271 + }, + { + "epoch": 1.79, + "grad_norm": 10.111765282007877, + "learning_rate": 5.461019334419837e-07, + "loss": 1.3724, + "step": 149274 + }, + { + "epoch": 1.8, + "grad_norm": 5.195695020150686, + "learning_rate": 5.45911541481573e-07, + "loss": 0.9945, + "step": 149277 + }, + { + "epoch": 1.8, + "grad_norm": 6.319449519589602, + "learning_rate": 5.457211817845609e-07, + "loss": 1.117, + "step": 149280 + }, + { + "epoch": 1.8, + "grad_norm": 6.286411090613463, + "learning_rate": 5.455308543515958e-07, + "loss": 1.0506, + "step": 149283 + }, + { + "epoch": 1.8, + "grad_norm": 8.024335708463441, + "learning_rate": 5.453405591833283e-07, + "loss": 0.9695, + "step": 149286 + }, + { + "epoch": 1.8, + "grad_norm": 7.788442869850646, + "learning_rate": 5.45150296280409e-07, + "loss": 1.2569, + "step": 149289 + }, + { + "epoch": 1.8, + "grad_norm": 10.485214077796837, + "learning_rate": 5.449600656434851e-07, + "loss": 0.9665, + "step": 149292 + }, + { + "epoch": 1.8, + "grad_norm": 6.282794703172435, + "learning_rate": 5.447698672732071e-07, + "loss": 1.1836, + "step": 149295 + }, + { + "epoch": 1.8, + "grad_norm": 6.953771338238284, + "learning_rate": 5.445797011702247e-07, + "loss": 1.3024, + "step": 149298 + }, + { + "epoch": 1.8, + "grad_norm": 9.100171064507164, + "learning_rate": 5.44389567335184e-07, + "loss": 1.4049, + "step": 149301 + }, + { + "epoch": 1.8, + "grad_norm": 14.462384779551863, + "learning_rate": 5.441994657687378e-07, + "loss": 0.9664, + "step": 149304 + }, + { + "epoch": 1.8, + "grad_norm": 3.007233965445117, + "learning_rate": 5.44009396471532e-07, + "loss": 1.0075, + "step": 149307 + }, + { + "epoch": 1.8, + "grad_norm": 3.4598608376121347, + "learning_rate": 5.438193594442154e-07, + "loss": 1.09, + "step": 149310 + }, + { + "epoch": 1.8, + "grad_norm": 7.499581403665374, + "learning_rate": 5.436293546874372e-07, + "loss": 0.8138, + "step": 149313 + }, + { + "epoch": 1.8, + "grad_norm": 2.8418378461453404, + "learning_rate": 5.43439382201848e-07, + "loss": 1.6856, + "step": 149316 + }, + { + "epoch": 1.8, + "grad_norm": 9.306668303932797, + "learning_rate": 5.432494419880918e-07, + "loss": 1.1993, + "step": 149319 + }, + { + "epoch": 1.8, + "grad_norm": 11.297805709104802, + "learning_rate": 5.430595340468203e-07, + "loss": 1.2363, + "step": 149322 + }, + { + "epoch": 1.8, + "grad_norm": 11.16453681933763, + "learning_rate": 5.428696583786785e-07, + "loss": 1.3056, + "step": 149325 + }, + { + "epoch": 1.8, + "grad_norm": 6.253126677661659, + "learning_rate": 5.426798149843171e-07, + "loss": 1.5509, + "step": 149328 + }, + { + "epoch": 1.8, + "grad_norm": 3.717884316721603, + "learning_rate": 5.424900038643844e-07, + "loss": 1.1151, + "step": 149331 + }, + { + "epoch": 1.8, + "grad_norm": 7.7744759539069825, + "learning_rate": 5.423002250195242e-07, + "loss": 1.1338, + "step": 149334 + }, + { + "epoch": 1.8, + "grad_norm": 3.429074918670247, + "learning_rate": 5.421104784503883e-07, + "loss": 0.8246, + "step": 149337 + }, + { + "epoch": 1.8, + "grad_norm": 12.025121874240835, + "learning_rate": 5.41920764157623e-07, + "loss": 1.1119, + "step": 149340 + }, + { + "epoch": 1.8, + "grad_norm": 7.468523609505138, + "learning_rate": 5.417310821418742e-07, + "loss": 1.011, + "step": 149343 + }, + { + "epoch": 1.8, + "grad_norm": 2.867718254461346, + "learning_rate": 5.415414324037915e-07, + "loss": 1.1231, + "step": 149346 + }, + { + "epoch": 1.8, + "grad_norm": 4.6076902611145725, + "learning_rate": 5.413518149440189e-07, + "loss": 1.1039, + "step": 149349 + }, + { + "epoch": 1.8, + "grad_norm": 9.554176875137767, + "learning_rate": 5.411622297632069e-07, + "loss": 1.0947, + "step": 149352 + }, + { + "epoch": 1.8, + "grad_norm": 14.065557595582447, + "learning_rate": 5.409726768620005e-07, + "loss": 1.098, + "step": 149355 + }, + { + "epoch": 1.8, + "grad_norm": 13.127037020057134, + "learning_rate": 5.407831562410492e-07, + "loss": 1.031, + "step": 149358 + }, + { + "epoch": 1.8, + "grad_norm": 6.445614362024931, + "learning_rate": 5.40593667900996e-07, + "loss": 1.2087, + "step": 149361 + }, + { + "epoch": 1.8, + "grad_norm": 7.153210914889151, + "learning_rate": 5.404042118424902e-07, + "loss": 1.0384, + "step": 149364 + }, + { + "epoch": 1.8, + "grad_norm": 2.915114737455082, + "learning_rate": 5.402147880661768e-07, + "loss": 1.1934, + "step": 149367 + }, + { + "epoch": 1.8, + "grad_norm": 18.70842481873548, + "learning_rate": 5.400253965727042e-07, + "loss": 1.2309, + "step": 149370 + }, + { + "epoch": 1.8, + "grad_norm": 4.080978407868278, + "learning_rate": 5.398360373627154e-07, + "loss": 1.0456, + "step": 149373 + }, + { + "epoch": 1.8, + "grad_norm": 4.792915201675169, + "learning_rate": 5.396467104368597e-07, + "loss": 1.2364, + "step": 149376 + }, + { + "epoch": 1.8, + "grad_norm": 7.085335627946993, + "learning_rate": 5.394574157957811e-07, + "loss": 1.3668, + "step": 149379 + }, + { + "epoch": 1.8, + "grad_norm": 3.8658493704316665, + "learning_rate": 5.39268153440129e-07, + "loss": 0.9684, + "step": 149382 + }, + { + "epoch": 1.8, + "grad_norm": 5.415899948778862, + "learning_rate": 5.390789233705451e-07, + "loss": 1.0499, + "step": 149385 + }, + { + "epoch": 1.8, + "grad_norm": 32.53670162742194, + "learning_rate": 5.38889725587679e-07, + "loss": 1.2919, + "step": 149388 + }, + { + "epoch": 1.8, + "grad_norm": 8.694392842881413, + "learning_rate": 5.387005600921724e-07, + "loss": 0.926, + "step": 149391 + }, + { + "epoch": 1.8, + "grad_norm": 4.635976765067402, + "learning_rate": 5.385114268846725e-07, + "loss": 0.7669, + "step": 149394 + }, + { + "epoch": 1.8, + "grad_norm": 4.4255932603281884, + "learning_rate": 5.383223259658277e-07, + "loss": 1.3509, + "step": 149397 + }, + { + "epoch": 1.8, + "grad_norm": 14.21204959796743, + "learning_rate": 5.381332573362786e-07, + "loss": 1.2024, + "step": 149400 + }, + { + "epoch": 1.8, + "grad_norm": 5.7723059042284, + "learning_rate": 5.379442209966723e-07, + "loss": 1.0779, + "step": 149403 + }, + { + "epoch": 1.8, + "grad_norm": 25.9110589913513, + "learning_rate": 5.377552169476552e-07, + "loss": 0.8259, + "step": 149406 + }, + { + "epoch": 1.8, + "grad_norm": 6.357033110768984, + "learning_rate": 5.375662451898711e-07, + "loss": 1.3317, + "step": 149409 + }, + { + "epoch": 1.8, + "grad_norm": 3.2184132330693433, + "learning_rate": 5.373773057239651e-07, + "loss": 1.0562, + "step": 149412 + }, + { + "epoch": 1.8, + "grad_norm": 3.939260830143382, + "learning_rate": 5.371883985505822e-07, + "loss": 1.2013, + "step": 149415 + }, + { + "epoch": 1.8, + "grad_norm": 2.0208300877673193, + "learning_rate": 5.369995236703651e-07, + "loss": 1.0803, + "step": 149418 + }, + { + "epoch": 1.8, + "grad_norm": 9.014493098805675, + "learning_rate": 5.368106810839613e-07, + "loss": 1.1178, + "step": 149421 + }, + { + "epoch": 1.8, + "grad_norm": 5.142318347614005, + "learning_rate": 5.366218707920146e-07, + "loss": 1.1882, + "step": 149424 + }, + { + "epoch": 1.8, + "grad_norm": 5.185520280819336, + "learning_rate": 5.364330927951678e-07, + "loss": 1.0264, + "step": 149427 + }, + { + "epoch": 1.8, + "grad_norm": 8.476197460149056, + "learning_rate": 5.36244347094067e-07, + "loss": 1.1402, + "step": 149430 + }, + { + "epoch": 1.8, + "grad_norm": 4.688614280211338, + "learning_rate": 5.360556336893552e-07, + "loss": 1.0284, + "step": 149433 + }, + { + "epoch": 1.8, + "grad_norm": 5.310625377324089, + "learning_rate": 5.358669525816751e-07, + "loss": 0.9416, + "step": 149436 + }, + { + "epoch": 1.8, + "grad_norm": 62.44758015232718, + "learning_rate": 5.35678303771675e-07, + "loss": 0.9018, + "step": 149439 + }, + { + "epoch": 1.8, + "grad_norm": 3.566970036578211, + "learning_rate": 5.354896872599935e-07, + "loss": 0.9505, + "step": 149442 + }, + { + "epoch": 1.8, + "grad_norm": 25.197868081210046, + "learning_rate": 5.353011030472766e-07, + "loss": 0.9792, + "step": 149445 + }, + { + "epoch": 1.8, + "grad_norm": 5.278497266722394, + "learning_rate": 5.351125511341692e-07, + "loss": 0.9797, + "step": 149448 + }, + { + "epoch": 1.8, + "grad_norm": 17.442930273955277, + "learning_rate": 5.349240315213133e-07, + "loss": 1.2567, + "step": 149451 + }, + { + "epoch": 1.8, + "grad_norm": 12.410398841506906, + "learning_rate": 5.347355442093516e-07, + "loss": 1.1161, + "step": 149454 + }, + { + "epoch": 1.8, + "grad_norm": 5.428118057833583, + "learning_rate": 5.34547089198928e-07, + "loss": 1.0618, + "step": 149457 + }, + { + "epoch": 1.8, + "grad_norm": 7.124917378784285, + "learning_rate": 5.343586664906852e-07, + "loss": 1.0381, + "step": 149460 + }, + { + "epoch": 1.8, + "grad_norm": 3.730342245615437, + "learning_rate": 5.341702760852674e-07, + "loss": 0.7747, + "step": 149463 + }, + { + "epoch": 1.8, + "grad_norm": 10.93504121798631, + "learning_rate": 5.339819179833184e-07, + "loss": 1.0714, + "step": 149466 + }, + { + "epoch": 1.8, + "grad_norm": 8.775126042505955, + "learning_rate": 5.337935921854776e-07, + "loss": 1.0173, + "step": 149469 + }, + { + "epoch": 1.8, + "grad_norm": 7.517377021576441, + "learning_rate": 5.336052986923912e-07, + "loss": 1.1542, + "step": 149472 + }, + { + "epoch": 1.8, + "grad_norm": 2.99883366496192, + "learning_rate": 5.334170375046988e-07, + "loss": 0.9792, + "step": 149475 + }, + { + "epoch": 1.8, + "grad_norm": 11.146908828825156, + "learning_rate": 5.332288086230453e-07, + "loss": 1.2174, + "step": 149478 + }, + { + "epoch": 1.8, + "grad_norm": 19.725740705459426, + "learning_rate": 5.330406120480702e-07, + "loss": 0.9831, + "step": 149481 + }, + { + "epoch": 1.8, + "grad_norm": 5.33379292835166, + "learning_rate": 5.328524477804187e-07, + "loss": 1.3428, + "step": 149484 + }, + { + "epoch": 1.8, + "grad_norm": 4.163100427934573, + "learning_rate": 5.326643158207301e-07, + "loss": 0.867, + "step": 149487 + }, + { + "epoch": 1.8, + "grad_norm": 6.561715932463492, + "learning_rate": 5.324762161696506e-07, + "loss": 1.3552, + "step": 149490 + }, + { + "epoch": 1.8, + "grad_norm": 71.82298869892645, + "learning_rate": 5.322881488278198e-07, + "loss": 1.2473, + "step": 149493 + }, + { + "epoch": 1.8, + "grad_norm": 8.526213291809137, + "learning_rate": 5.32100113795877e-07, + "loss": 1.0136, + "step": 149496 + }, + { + "epoch": 1.8, + "grad_norm": 5.579451680201454, + "learning_rate": 5.319121110744663e-07, + "loss": 1.2739, + "step": 149499 + }, + { + "epoch": 1.8, + "grad_norm": 2.3454062629764216, + "learning_rate": 5.317241406642304e-07, + "loss": 1.3837, + "step": 149502 + }, + { + "epoch": 1.8, + "grad_norm": 9.632160092325314, + "learning_rate": 5.315362025658099e-07, + "loss": 1.1571, + "step": 149505 + }, + { + "epoch": 1.8, + "grad_norm": 4.3445954804533695, + "learning_rate": 5.313482967798445e-07, + "loss": 1.0544, + "step": 149508 + }, + { + "epoch": 1.8, + "grad_norm": 8.485291497306736, + "learning_rate": 5.311604233069767e-07, + "loss": 0.935, + "step": 149511 + }, + { + "epoch": 1.8, + "grad_norm": 8.959068771383588, + "learning_rate": 5.309725821478495e-07, + "loss": 1.0323, + "step": 149514 + }, + { + "epoch": 1.8, + "grad_norm": 9.081685416021566, + "learning_rate": 5.307847733031001e-07, + "loss": 1.126, + "step": 149517 + }, + { + "epoch": 1.8, + "grad_norm": 4.622583066262292, + "learning_rate": 5.305969967733738e-07, + "loss": 1.3356, + "step": 149520 + }, + { + "epoch": 1.8, + "grad_norm": 9.813616095916998, + "learning_rate": 5.304092525593074e-07, + "loss": 1.4575, + "step": 149523 + }, + { + "epoch": 1.8, + "grad_norm": 2.8586816182339927, + "learning_rate": 5.302215406615429e-07, + "loss": 1.2762, + "step": 149526 + }, + { + "epoch": 1.8, + "grad_norm": 8.270494357173765, + "learning_rate": 5.30033861080721e-07, + "loss": 1.1844, + "step": 149529 + }, + { + "epoch": 1.8, + "grad_norm": 6.032687507030941, + "learning_rate": 5.298462138174842e-07, + "loss": 1.3313, + "step": 149532 + }, + { + "epoch": 1.8, + "grad_norm": 9.229057099957322, + "learning_rate": 5.296585988724712e-07, + "loss": 1.0482, + "step": 149535 + }, + { + "epoch": 1.8, + "grad_norm": 10.26274745072267, + "learning_rate": 5.294710162463213e-07, + "loss": 0.8687, + "step": 149538 + }, + { + "epoch": 1.8, + "grad_norm": 6.822195846353157, + "learning_rate": 5.292834659396751e-07, + "loss": 1.3771, + "step": 149541 + }, + { + "epoch": 1.8, + "grad_norm": 4.564661142803067, + "learning_rate": 5.290959479531732e-07, + "loss": 1.0964, + "step": 149544 + }, + { + "epoch": 1.8, + "grad_norm": 4.1303289854061465, + "learning_rate": 5.289084622874563e-07, + "loss": 1.3884, + "step": 149547 + }, + { + "epoch": 1.8, + "grad_norm": 17.92824557344043, + "learning_rate": 5.287210089431627e-07, + "loss": 1.1356, + "step": 149550 + }, + { + "epoch": 1.8, + "grad_norm": 3.11960460204217, + "learning_rate": 5.28533587920933e-07, + "loss": 1.4451, + "step": 149553 + }, + { + "epoch": 1.8, + "grad_norm": 5.03416561359633, + "learning_rate": 5.283461992214068e-07, + "loss": 1.4726, + "step": 149556 + }, + { + "epoch": 1.8, + "grad_norm": 9.009364751328064, + "learning_rate": 5.281588428452245e-07, + "loss": 1.0767, + "step": 149559 + }, + { + "epoch": 1.8, + "grad_norm": 8.077224080453869, + "learning_rate": 5.279715187930223e-07, + "loss": 1.3837, + "step": 149562 + }, + { + "epoch": 1.8, + "grad_norm": 18.752706178316238, + "learning_rate": 5.277842270654409e-07, + "loss": 1.0527, + "step": 149565 + }, + { + "epoch": 1.8, + "grad_norm": 4.124270997270785, + "learning_rate": 5.27596967663121e-07, + "loss": 1.1562, + "step": 149568 + }, + { + "epoch": 1.8, + "grad_norm": 4.881325205472943, + "learning_rate": 5.274097405867006e-07, + "loss": 1.2504, + "step": 149571 + }, + { + "epoch": 1.8, + "grad_norm": 5.758501420156783, + "learning_rate": 5.272225458368197e-07, + "loss": 0.9953, + "step": 149574 + }, + { + "epoch": 1.8, + "grad_norm": 3.7470162126819235, + "learning_rate": 5.270353834141162e-07, + "loss": 1.2758, + "step": 149577 + }, + { + "epoch": 1.8, + "grad_norm": 6.939272965249106, + "learning_rate": 5.268482533192277e-07, + "loss": 1.1108, + "step": 149580 + }, + { + "epoch": 1.8, + "grad_norm": 6.225102879910148, + "learning_rate": 5.266611555527945e-07, + "loss": 0.7383, + "step": 149583 + }, + { + "epoch": 1.8, + "grad_norm": 5.041476666722467, + "learning_rate": 5.264740901154552e-07, + "loss": 1.1371, + "step": 149586 + }, + { + "epoch": 1.8, + "grad_norm": 4.152115612757306, + "learning_rate": 5.26287057007846e-07, + "loss": 0.8616, + "step": 149589 + }, + { + "epoch": 1.8, + "grad_norm": 3.76949148462269, + "learning_rate": 5.261000562306062e-07, + "loss": 1.2593, + "step": 149592 + }, + { + "epoch": 1.8, + "grad_norm": 3.009671022499888, + "learning_rate": 5.259130877843754e-07, + "loss": 1.2378, + "step": 149595 + }, + { + "epoch": 1.8, + "grad_norm": 2.8015929423064168, + "learning_rate": 5.25726151669792e-07, + "loss": 1.1169, + "step": 149598 + }, + { + "epoch": 1.8, + "grad_norm": 3.2486748140825177, + "learning_rate": 5.255392478874921e-07, + "loss": 1.2308, + "step": 149601 + }, + { + "epoch": 1.8, + "grad_norm": 3.331565602178867, + "learning_rate": 5.25352376438113e-07, + "loss": 1.1069, + "step": 149604 + }, + { + "epoch": 1.8, + "grad_norm": 2.902048941087206, + "learning_rate": 5.251655373222931e-07, + "loss": 1.0952, + "step": 149607 + }, + { + "epoch": 1.8, + "grad_norm": 12.242335171934315, + "learning_rate": 5.249787305406706e-07, + "loss": 1.1395, + "step": 149610 + }, + { + "epoch": 1.8, + "grad_norm": 19.58876940920844, + "learning_rate": 5.247919560938842e-07, + "loss": 1.2371, + "step": 149613 + }, + { + "epoch": 1.8, + "grad_norm": 17.692302129043835, + "learning_rate": 5.246052139825686e-07, + "loss": 1.2917, + "step": 149616 + }, + { + "epoch": 1.8, + "grad_norm": 24.892200103932534, + "learning_rate": 5.244185042073635e-07, + "loss": 1.2433, + "step": 149619 + }, + { + "epoch": 1.8, + "grad_norm": 4.156059527653062, + "learning_rate": 5.242318267689039e-07, + "loss": 1.1023, + "step": 149622 + }, + { + "epoch": 1.8, + "grad_norm": 4.373061276323695, + "learning_rate": 5.240451816678283e-07, + "loss": 0.9853, + "step": 149625 + }, + { + "epoch": 1.8, + "grad_norm": 11.024708536214979, + "learning_rate": 5.238585689047737e-07, + "loss": 1.095, + "step": 149628 + }, + { + "epoch": 1.8, + "grad_norm": 5.9028397556889685, + "learning_rate": 5.236719884803765e-07, + "loss": 0.8279, + "step": 149631 + }, + { + "epoch": 1.8, + "grad_norm": 11.315704829563094, + "learning_rate": 5.234854403952716e-07, + "loss": 1.1458, + "step": 149634 + }, + { + "epoch": 1.8, + "grad_norm": 9.756414551178183, + "learning_rate": 5.232989246500986e-07, + "loss": 1.1192, + "step": 149637 + }, + { + "epoch": 1.8, + "grad_norm": 13.920088828924117, + "learning_rate": 5.231124412454947e-07, + "loss": 1.0086, + "step": 149640 + }, + { + "epoch": 1.8, + "grad_norm": 12.728944780966323, + "learning_rate": 5.229259901820938e-07, + "loss": 1.0759, + "step": 149643 + }, + { + "epoch": 1.8, + "grad_norm": 11.194362075904076, + "learning_rate": 5.22739571460531e-07, + "loss": 1.2341, + "step": 149646 + }, + { + "epoch": 1.8, + "grad_norm": 5.644244177892509, + "learning_rate": 5.225531850814447e-07, + "loss": 1.2649, + "step": 149649 + }, + { + "epoch": 1.8, + "grad_norm": 3.7854014186658294, + "learning_rate": 5.22366831045471e-07, + "loss": 1.2958, + "step": 149652 + }, + { + "epoch": 1.8, + "grad_norm": 3.700391323886963, + "learning_rate": 5.221805093532472e-07, + "loss": 0.9367, + "step": 149655 + }, + { + "epoch": 1.8, + "grad_norm": 4.4916564809640915, + "learning_rate": 5.219942200054052e-07, + "loss": 1.2369, + "step": 149658 + }, + { + "epoch": 1.8, + "grad_norm": 8.278356628905128, + "learning_rate": 5.218079630025829e-07, + "loss": 1.0797, + "step": 149661 + }, + { + "epoch": 1.8, + "grad_norm": 10.351266109975862, + "learning_rate": 5.216217383454181e-07, + "loss": 1.4177, + "step": 149664 + }, + { + "epoch": 1.8, + "grad_norm": 9.118310765205658, + "learning_rate": 5.214355460345444e-07, + "loss": 1.1245, + "step": 149667 + }, + { + "epoch": 1.8, + "grad_norm": 18.3432189553259, + "learning_rate": 5.212493860705947e-07, + "loss": 1.0632, + "step": 149670 + }, + { + "epoch": 1.8, + "grad_norm": 4.338895351254602, + "learning_rate": 5.210632584542074e-07, + "loss": 1.2987, + "step": 149673 + }, + { + "epoch": 1.8, + "grad_norm": 16.420322167991117, + "learning_rate": 5.208771631860165e-07, + "loss": 1.2471, + "step": 149676 + }, + { + "epoch": 1.8, + "grad_norm": 3.136990275097592, + "learning_rate": 5.20691100266657e-07, + "loss": 1.1344, + "step": 149679 + }, + { + "epoch": 1.8, + "grad_norm": 10.800305173903352, + "learning_rate": 5.205050696967661e-07, + "loss": 1.1631, + "step": 149682 + }, + { + "epoch": 1.8, + "grad_norm": 4.810813024057362, + "learning_rate": 5.203190714769779e-07, + "loss": 1.3105, + "step": 149685 + }, + { + "epoch": 1.8, + "grad_norm": 4.775153818160358, + "learning_rate": 5.201331056079239e-07, + "loss": 1.2254, + "step": 149688 + }, + { + "epoch": 1.8, + "grad_norm": 9.054286274388785, + "learning_rate": 5.199471720902405e-07, + "loss": 0.948, + "step": 149691 + }, + { + "epoch": 1.8, + "grad_norm": 5.618730311225961, + "learning_rate": 5.197612709245647e-07, + "loss": 1.2754, + "step": 149694 + }, + { + "epoch": 1.8, + "grad_norm": 7.151605225378547, + "learning_rate": 5.195754021115273e-07, + "loss": 0.8352, + "step": 149697 + }, + { + "epoch": 1.8, + "grad_norm": 12.85505690910634, + "learning_rate": 5.193895656517644e-07, + "loss": 0.8512, + "step": 149700 + }, + { + "epoch": 1.8, + "grad_norm": 8.176893386042325, + "learning_rate": 5.1920376154591e-07, + "loss": 1.0296, + "step": 149703 + }, + { + "epoch": 1.8, + "grad_norm": 6.182918309645099, + "learning_rate": 5.19017989794599e-07, + "loss": 1.129, + "step": 149706 + }, + { + "epoch": 1.8, + "grad_norm": 18.29456652296626, + "learning_rate": 5.188322503984655e-07, + "loss": 1.3678, + "step": 149709 + }, + { + "epoch": 1.8, + "grad_norm": 12.002221289509404, + "learning_rate": 5.1864654335814e-07, + "loss": 1.1447, + "step": 149712 + }, + { + "epoch": 1.8, + "grad_norm": 6.730240423630302, + "learning_rate": 5.184608686742598e-07, + "loss": 1.0631, + "step": 149715 + }, + { + "epoch": 1.8, + "grad_norm": 10.641121607154025, + "learning_rate": 5.182752263474566e-07, + "loss": 1.137, + "step": 149718 + }, + { + "epoch": 1.8, + "grad_norm": 8.494905231109714, + "learning_rate": 5.180896163783666e-07, + "loss": 1.2232, + "step": 149721 + }, + { + "epoch": 1.8, + "grad_norm": 8.704281527216901, + "learning_rate": 5.179040387676204e-07, + "loss": 1.0656, + "step": 149724 + }, + { + "epoch": 1.8, + "grad_norm": 12.724983842778949, + "learning_rate": 5.17718493515853e-07, + "loss": 1.0939, + "step": 149727 + }, + { + "epoch": 1.8, + "grad_norm": 8.969442653690495, + "learning_rate": 5.17532980623695e-07, + "loss": 1.1196, + "step": 149730 + }, + { + "epoch": 1.8, + "grad_norm": 11.746829196320473, + "learning_rate": 5.173475000917827e-07, + "loss": 0.8541, + "step": 149733 + }, + { + "epoch": 1.8, + "grad_norm": 6.240814341333194, + "learning_rate": 5.171620519207477e-07, + "loss": 1.0319, + "step": 149736 + }, + { + "epoch": 1.8, + "grad_norm": 7.793543530828361, + "learning_rate": 5.169766361112228e-07, + "loss": 1.182, + "step": 149739 + }, + { + "epoch": 1.8, + "grad_norm": 14.439631287448636, + "learning_rate": 5.167912526638396e-07, + "loss": 1.0666, + "step": 149742 + }, + { + "epoch": 1.8, + "grad_norm": 10.036326021972714, + "learning_rate": 5.166059015792335e-07, + "loss": 0.8011, + "step": 149745 + }, + { + "epoch": 1.8, + "grad_norm": 9.794855010401548, + "learning_rate": 5.16420582858036e-07, + "loss": 1.3092, + "step": 149748 + }, + { + "epoch": 1.8, + "grad_norm": 7.157379412983201, + "learning_rate": 5.162352965008788e-07, + "loss": 1.07, + "step": 149751 + }, + { + "epoch": 1.8, + "grad_norm": 4.021470741658343, + "learning_rate": 5.160500425083936e-07, + "loss": 1.2296, + "step": 149754 + }, + { + "epoch": 1.8, + "grad_norm": 12.110134331909455, + "learning_rate": 5.158648208812134e-07, + "loss": 1.1906, + "step": 149757 + }, + { + "epoch": 1.8, + "grad_norm": 8.619779602881056, + "learning_rate": 5.156796316199708e-07, + "loss": 0.9278, + "step": 149760 + }, + { + "epoch": 1.8, + "grad_norm": 8.882221522237472, + "learning_rate": 5.154944747252988e-07, + "loss": 1.0556, + "step": 149763 + }, + { + "epoch": 1.8, + "grad_norm": 8.854199297502527, + "learning_rate": 5.153093501978257e-07, + "loss": 0.976, + "step": 149766 + }, + { + "epoch": 1.8, + "grad_norm": 6.306676955523623, + "learning_rate": 5.151242580381865e-07, + "loss": 1.1556, + "step": 149769 + }, + { + "epoch": 1.8, + "grad_norm": 7.516569197156718, + "learning_rate": 5.14939198247012e-07, + "loss": 1.0444, + "step": 149772 + }, + { + "epoch": 1.8, + "grad_norm": 7.450305742931158, + "learning_rate": 5.147541708249338e-07, + "loss": 1.1805, + "step": 149775 + }, + { + "epoch": 1.8, + "grad_norm": 5.8421632449003305, + "learning_rate": 5.145691757725813e-07, + "loss": 1.2737, + "step": 149778 + }, + { + "epoch": 1.8, + "grad_norm": 9.313817964477101, + "learning_rate": 5.143842130905885e-07, + "loss": 1.1027, + "step": 149781 + }, + { + "epoch": 1.8, + "grad_norm": 3.5492753508151513, + "learning_rate": 5.14199282779585e-07, + "loss": 0.9556, + "step": 149784 + }, + { + "epoch": 1.8, + "grad_norm": 8.86310134447863, + "learning_rate": 5.140143848402035e-07, + "loss": 1.0105, + "step": 149787 + }, + { + "epoch": 1.8, + "grad_norm": 14.906349997134416, + "learning_rate": 5.138295192730746e-07, + "loss": 0.9737, + "step": 149790 + }, + { + "epoch": 1.8, + "grad_norm": 7.2451602972519265, + "learning_rate": 5.13644686078828e-07, + "loss": 0.8227, + "step": 149793 + }, + { + "epoch": 1.8, + "grad_norm": 2.0174989587929137, + "learning_rate": 5.134598852580952e-07, + "loss": 1.3864, + "step": 149796 + }, + { + "epoch": 1.8, + "grad_norm": 5.714173424743706, + "learning_rate": 5.132751168115058e-07, + "loss": 1.0809, + "step": 149799 + }, + { + "epoch": 1.8, + "grad_norm": 11.535256572644661, + "learning_rate": 5.130903807396936e-07, + "loss": 1.0957, + "step": 149802 + }, + { + "epoch": 1.8, + "grad_norm": 15.007818212983016, + "learning_rate": 5.12905677043285e-07, + "loss": 1.1051, + "step": 149805 + }, + { + "epoch": 1.8, + "grad_norm": 6.254464727534969, + "learning_rate": 5.127210057229115e-07, + "loss": 1.2368, + "step": 149808 + }, + { + "epoch": 1.8, + "grad_norm": 7.7968740967817425, + "learning_rate": 5.12536366779206e-07, + "loss": 0.9222, + "step": 149811 + }, + { + "epoch": 1.8, + "grad_norm": 16.667714741151084, + "learning_rate": 5.123517602127948e-07, + "loss": 0.8553, + "step": 149814 + }, + { + "epoch": 1.8, + "grad_norm": 4.128613859257463, + "learning_rate": 5.121671860243105e-07, + "loss": 1.3573, + "step": 149817 + }, + { + "epoch": 1.8, + "grad_norm": 11.277645979565955, + "learning_rate": 5.119826442143816e-07, + "loss": 0.8688, + "step": 149820 + }, + { + "epoch": 1.8, + "grad_norm": 6.48130888293755, + "learning_rate": 5.117981347836376e-07, + "loss": 1.1892, + "step": 149823 + }, + { + "epoch": 1.8, + "grad_norm": 20.46541644625208, + "learning_rate": 5.116136577327102e-07, + "loss": 1.1594, + "step": 149826 + }, + { + "epoch": 1.8, + "grad_norm": 2.586834257752769, + "learning_rate": 5.114292130622278e-07, + "loss": 1.1844, + "step": 149829 + }, + { + "epoch": 1.8, + "grad_norm": 5.179585575618601, + "learning_rate": 5.112448007728188e-07, + "loss": 1.077, + "step": 149832 + }, + { + "epoch": 1.8, + "grad_norm": 7.956980351942868, + "learning_rate": 5.110604208651148e-07, + "loss": 0.8955, + "step": 149835 + }, + { + "epoch": 1.8, + "grad_norm": 13.496682542925441, + "learning_rate": 5.108760733397422e-07, + "loss": 1.2535, + "step": 149838 + }, + { + "epoch": 1.8, + "grad_norm": 2.65205536559535, + "learning_rate": 5.106917581973314e-07, + "loss": 1.104, + "step": 149841 + }, + { + "epoch": 1.8, + "grad_norm": 54.8643570606439, + "learning_rate": 5.105074754385131e-07, + "loss": 0.9055, + "step": 149844 + }, + { + "epoch": 1.8, + "grad_norm": 3.6737687607907454, + "learning_rate": 5.103232250639145e-07, + "loss": 1.1857, + "step": 149847 + }, + { + "epoch": 1.8, + "grad_norm": 6.295540968660517, + "learning_rate": 5.10139007074163e-07, + "loss": 0.9636, + "step": 149850 + }, + { + "epoch": 1.8, + "grad_norm": 9.938699227851227, + "learning_rate": 5.099548214698913e-07, + "loss": 0.9424, + "step": 149853 + }, + { + "epoch": 1.8, + "grad_norm": 6.794051839545092, + "learning_rate": 5.097706682517245e-07, + "loss": 0.8597, + "step": 149856 + }, + { + "epoch": 1.8, + "grad_norm": 13.334190253074878, + "learning_rate": 5.09586547420291e-07, + "loss": 1.5489, + "step": 149859 + }, + { + "epoch": 1.8, + "grad_norm": 13.64766546110682, + "learning_rate": 5.094024589762204e-07, + "loss": 1.3366, + "step": 149862 + }, + { + "epoch": 1.8, + "grad_norm": 15.039059008767696, + "learning_rate": 5.092184029201408e-07, + "loss": 1.248, + "step": 149865 + }, + { + "epoch": 1.8, + "grad_norm": 16.528066515669995, + "learning_rate": 5.090343792526798e-07, + "loss": 1.671, + "step": 149868 + }, + { + "epoch": 1.8, + "grad_norm": 20.180845824956496, + "learning_rate": 5.088503879744677e-07, + "loss": 1.2639, + "step": 149871 + }, + { + "epoch": 1.8, + "grad_norm": 11.861582607667245, + "learning_rate": 5.086664290861287e-07, + "loss": 0.9327, + "step": 149874 + }, + { + "epoch": 1.8, + "grad_norm": 6.901584449052375, + "learning_rate": 5.084825025882945e-07, + "loss": 1.3471, + "step": 149877 + }, + { + "epoch": 1.8, + "grad_norm": 5.481126067507602, + "learning_rate": 5.082986084815888e-07, + "loss": 1.2141, + "step": 149880 + }, + { + "epoch": 1.8, + "grad_norm": 15.185682321908706, + "learning_rate": 5.081147467666425e-07, + "loss": 1.2747, + "step": 149883 + }, + { + "epoch": 1.8, + "grad_norm": 4.748377277101751, + "learning_rate": 5.079309174440794e-07, + "loss": 1.1327, + "step": 149886 + }, + { + "epoch": 1.8, + "grad_norm": 6.882429153406541, + "learning_rate": 5.077471205145301e-07, + "loss": 0.9941, + "step": 149889 + }, + { + "epoch": 1.8, + "grad_norm": 9.480256381255268, + "learning_rate": 5.075633559786209e-07, + "loss": 1.0093, + "step": 149892 + }, + { + "epoch": 1.8, + "grad_norm": 9.554329203429557, + "learning_rate": 5.073796238369799e-07, + "loss": 1.2975, + "step": 149895 + }, + { + "epoch": 1.8, + "grad_norm": 6.6194293716477794, + "learning_rate": 5.071959240902314e-07, + "loss": 1.362, + "step": 149898 + }, + { + "epoch": 1.8, + "grad_norm": 2.862482085473814, + "learning_rate": 5.070122567390057e-07, + "loss": 1.0189, + "step": 149901 + }, + { + "epoch": 1.8, + "grad_norm": 5.952402381862805, + "learning_rate": 5.068286217839257e-07, + "loss": 1.3947, + "step": 149904 + }, + { + "epoch": 1.8, + "grad_norm": 8.50493581176082, + "learning_rate": 5.0664501922562e-07, + "loss": 1.1524, + "step": 149907 + }, + { + "epoch": 1.8, + "grad_norm": 8.79517226785271, + "learning_rate": 5.064614490647179e-07, + "loss": 1.1554, + "step": 149910 + }, + { + "epoch": 1.8, + "grad_norm": 3.4262009732365213, + "learning_rate": 5.062779113018412e-07, + "loss": 1.1277, + "step": 149913 + }, + { + "epoch": 1.8, + "grad_norm": 10.454830223454906, + "learning_rate": 5.060944059376194e-07, + "loss": 1.2124, + "step": 149916 + }, + { + "epoch": 1.8, + "grad_norm": 2.3182028500066667, + "learning_rate": 5.059109329726774e-07, + "loss": 1.6925, + "step": 149919 + }, + { + "epoch": 1.8, + "grad_norm": 5.789650835832118, + "learning_rate": 5.057274924076417e-07, + "loss": 1.1839, + "step": 149922 + }, + { + "epoch": 1.8, + "grad_norm": 10.020980658277008, + "learning_rate": 5.055440842431391e-07, + "loss": 0.9335, + "step": 149925 + }, + { + "epoch": 1.8, + "grad_norm": 4.8063050679452495, + "learning_rate": 5.053607084797929e-07, + "loss": 1.2071, + "step": 149928 + }, + { + "epoch": 1.8, + "grad_norm": 4.2532726755222825, + "learning_rate": 5.051773651182312e-07, + "loss": 1.1431, + "step": 149931 + }, + { + "epoch": 1.8, + "grad_norm": 6.939056635624671, + "learning_rate": 5.049940541590792e-07, + "loss": 1.0514, + "step": 149934 + }, + { + "epoch": 1.8, + "grad_norm": 9.67156697003182, + "learning_rate": 5.04810775602963e-07, + "loss": 0.9301, + "step": 149937 + }, + { + "epoch": 1.8, + "grad_norm": 6.3075378045043395, + "learning_rate": 5.046275294505066e-07, + "loss": 1.0157, + "step": 149940 + }, + { + "epoch": 1.8, + "grad_norm": 14.115133719931254, + "learning_rate": 5.044443157023382e-07, + "loss": 0.9002, + "step": 149943 + }, + { + "epoch": 1.8, + "grad_norm": 32.81217295849192, + "learning_rate": 5.042611343590787e-07, + "loss": 1.1106, + "step": 149946 + }, + { + "epoch": 1.8, + "grad_norm": 5.391643366045409, + "learning_rate": 5.040779854213563e-07, + "loss": 0.8253, + "step": 149949 + }, + { + "epoch": 1.8, + "grad_norm": 3.452491549671207, + "learning_rate": 5.03894868889796e-07, + "loss": 1.1035, + "step": 149952 + }, + { + "epoch": 1.8, + "grad_norm": 3.4211162433257063, + "learning_rate": 5.037117847650208e-07, + "loss": 1.3592, + "step": 149955 + }, + { + "epoch": 1.8, + "grad_norm": 19.712235480364527, + "learning_rate": 5.035287330476568e-07, + "loss": 1.1541, + "step": 149958 + }, + { + "epoch": 1.8, + "grad_norm": 5.322208951348901, + "learning_rate": 5.033457137383302e-07, + "loss": 1.2338, + "step": 149961 + }, + { + "epoch": 1.8, + "grad_norm": 7.125572739424562, + "learning_rate": 5.031627268376638e-07, + "loss": 1.1275, + "step": 149964 + }, + { + "epoch": 1.8, + "grad_norm": 2.478654409962901, + "learning_rate": 5.029797723462815e-07, + "loss": 1.0251, + "step": 149967 + }, + { + "epoch": 1.8, + "grad_norm": 8.854491458286402, + "learning_rate": 5.027968502648084e-07, + "loss": 0.9959, + "step": 149970 + }, + { + "epoch": 1.8, + "grad_norm": 10.483506798784184, + "learning_rate": 5.026139605938684e-07, + "loss": 0.7546, + "step": 149973 + }, + { + "epoch": 1.8, + "grad_norm": 5.554769069841225, + "learning_rate": 5.024311033340856e-07, + "loss": 1.3838, + "step": 149976 + }, + { + "epoch": 1.8, + "grad_norm": 2.8445594745908056, + "learning_rate": 5.02248278486086e-07, + "loss": 1.1337, + "step": 149979 + }, + { + "epoch": 1.8, + "grad_norm": 23.8063133782733, + "learning_rate": 5.020654860504903e-07, + "loss": 1.1357, + "step": 149982 + }, + { + "epoch": 1.8, + "grad_norm": 3.2188590719155536, + "learning_rate": 5.018827260279269e-07, + "loss": 0.8624, + "step": 149985 + }, + { + "epoch": 1.8, + "grad_norm": 4.4132017537646835, + "learning_rate": 5.01699998419013e-07, + "loss": 1.1152, + "step": 149988 + }, + { + "epoch": 1.8, + "grad_norm": 2.6671863485424456, + "learning_rate": 5.015173032243792e-07, + "loss": 1.1583, + "step": 149991 + }, + { + "epoch": 1.8, + "grad_norm": 5.072659607769379, + "learning_rate": 5.013346404446429e-07, + "loss": 1.0652, + "step": 149994 + }, + { + "epoch": 1.8, + "grad_norm": 2.653503379286679, + "learning_rate": 5.011520100804302e-07, + "loss": 1.2653, + "step": 149997 + }, + { + "epoch": 1.8, + "grad_norm": 12.117302557423832, + "learning_rate": 5.009694121323639e-07, + "loss": 0.9934, + "step": 150000 + }, + { + "epoch": 1.8, + "grad_norm": 17.148760519047528, + "learning_rate": 5.00786846601069e-07, + "loss": 1.1857, + "step": 150003 + }, + { + "epoch": 1.8, + "grad_norm": 9.051623014020096, + "learning_rate": 5.006043134871674e-07, + "loss": 1.0467, + "step": 150006 + }, + { + "epoch": 1.8, + "grad_norm": 5.67197617953427, + "learning_rate": 5.004218127912797e-07, + "loss": 0.9884, + "step": 150009 + }, + { + "epoch": 1.8, + "grad_norm": 6.16462054353144, + "learning_rate": 5.002393445140319e-07, + "loss": 1.1598, + "step": 150012 + }, + { + "epoch": 1.8, + "grad_norm": 6.810947831179117, + "learning_rate": 5.000569086560436e-07, + "loss": 1.0831, + "step": 150015 + }, + { + "epoch": 1.8, + "grad_norm": 14.989443361110052, + "learning_rate": 4.998745052179421e-07, + "loss": 1.4562, + "step": 150018 + }, + { + "epoch": 1.8, + "grad_norm": 7.542372451992233, + "learning_rate": 4.996921342003447e-07, + "loss": 0.8768, + "step": 150021 + }, + { + "epoch": 1.8, + "grad_norm": 8.53872367525104, + "learning_rate": 4.995097956038764e-07, + "loss": 1.2684, + "step": 150024 + }, + { + "epoch": 1.8, + "grad_norm": 24.257734909391367, + "learning_rate": 4.993274894291599e-07, + "loss": 1.0617, + "step": 150027 + }, + { + "epoch": 1.8, + "grad_norm": 10.065511415084627, + "learning_rate": 4.991452156768162e-07, + "loss": 1.3996, + "step": 150030 + }, + { + "epoch": 1.8, + "grad_norm": 10.946155585165204, + "learning_rate": 4.989629743474689e-07, + "loss": 0.8383, + "step": 150033 + }, + { + "epoch": 1.8, + "grad_norm": 3.794715850378545, + "learning_rate": 4.987807654417365e-07, + "loss": 1.0088, + "step": 150036 + }, + { + "epoch": 1.8, + "grad_norm": 7.333809155618583, + "learning_rate": 4.98598588960244e-07, + "loss": 1.0552, + "step": 150039 + }, + { + "epoch": 1.8, + "grad_norm": 11.613775745918742, + "learning_rate": 4.98416444903611e-07, + "loss": 0.9436, + "step": 150042 + }, + { + "epoch": 1.8, + "grad_norm": 7.210300398709181, + "learning_rate": 4.982343332724626e-07, + "loss": 1.4068, + "step": 150045 + }, + { + "epoch": 1.8, + "grad_norm": 7.786869890601897, + "learning_rate": 4.980522540674171e-07, + "loss": 1.0282, + "step": 150048 + }, + { + "epoch": 1.8, + "grad_norm": 6.169887434619392, + "learning_rate": 4.978702072890962e-07, + "loss": 1.2881, + "step": 150051 + }, + { + "epoch": 1.8, + "grad_norm": 7.865825766210841, + "learning_rate": 4.976881929381205e-07, + "loss": 0.8551, + "step": 150054 + }, + { + "epoch": 1.8, + "grad_norm": 37.54157044949909, + "learning_rate": 4.97506211015113e-07, + "loss": 1.4167, + "step": 150057 + }, + { + "epoch": 1.8, + "grad_norm": 9.043219403424834, + "learning_rate": 4.973242615206964e-07, + "loss": 1.3516, + "step": 150060 + }, + { + "epoch": 1.8, + "grad_norm": 5.252971668266796, + "learning_rate": 4.971423444554868e-07, + "loss": 1.1597, + "step": 150063 + }, + { + "epoch": 1.8, + "grad_norm": 6.9034602814643025, + "learning_rate": 4.969604598201072e-07, + "loss": 1.5846, + "step": 150066 + }, + { + "epoch": 1.8, + "grad_norm": 1.9117162940585797, + "learning_rate": 4.967786076151804e-07, + "loss": 0.8468, + "step": 150069 + }, + { + "epoch": 1.8, + "grad_norm": 11.359884660006372, + "learning_rate": 4.965967878413247e-07, + "loss": 1.1587, + "step": 150072 + }, + { + "epoch": 1.8, + "grad_norm": 2.5992673539780498, + "learning_rate": 4.964150004991597e-07, + "loss": 0.9937, + "step": 150075 + }, + { + "epoch": 1.8, + "grad_norm": 2.878954244591613, + "learning_rate": 4.962332455893071e-07, + "loss": 1.1881, + "step": 150078 + }, + { + "epoch": 1.8, + "grad_norm": 7.577282181434865, + "learning_rate": 4.960515231123874e-07, + "loss": 1.2839, + "step": 150081 + }, + { + "epoch": 1.8, + "grad_norm": 4.043347709372046, + "learning_rate": 4.958698330690215e-07, + "loss": 0.9858, + "step": 150084 + }, + { + "epoch": 1.8, + "grad_norm": 17.004188943064676, + "learning_rate": 4.956881754598286e-07, + "loss": 1.5152, + "step": 150087 + }, + { + "epoch": 1.8, + "grad_norm": 13.253039295144555, + "learning_rate": 4.955065502854295e-07, + "loss": 1.0309, + "step": 150090 + }, + { + "epoch": 1.8, + "grad_norm": 11.755109376704285, + "learning_rate": 4.953249575464403e-07, + "loss": 1.0585, + "step": 150093 + }, + { + "epoch": 1.8, + "grad_norm": 5.7561225551246, + "learning_rate": 4.951433972434849e-07, + "loss": 1.1537, + "step": 150096 + }, + { + "epoch": 1.8, + "grad_norm": 6.827173818992972, + "learning_rate": 4.949618693771818e-07, + "loss": 1.0299, + "step": 150099 + }, + { + "epoch": 1.8, + "grad_norm": 22.36863983644753, + "learning_rate": 4.947803739481494e-07, + "loss": 1.2654, + "step": 150102 + }, + { + "epoch": 1.8, + "grad_norm": 4.649392993942739, + "learning_rate": 4.945989109570071e-07, + "loss": 1.173, + "step": 150105 + }, + { + "epoch": 1.81, + "grad_norm": 12.232646614552674, + "learning_rate": 4.944174804043755e-07, + "loss": 0.9292, + "step": 150108 + }, + { + "epoch": 1.81, + "grad_norm": 6.967796084612433, + "learning_rate": 4.942360822908743e-07, + "loss": 1.1052, + "step": 150111 + }, + { + "epoch": 1.81, + "grad_norm": 3.1473862683694507, + "learning_rate": 4.940547166171216e-07, + "loss": 1.2723, + "step": 150114 + }, + { + "epoch": 1.81, + "grad_norm": 7.886406646330866, + "learning_rate": 4.938733833837339e-07, + "loss": 1.0818, + "step": 150117 + }, + { + "epoch": 1.81, + "grad_norm": 7.929956654984795, + "learning_rate": 4.936920825913338e-07, + "loss": 1.5813, + "step": 150120 + }, + { + "epoch": 1.81, + "grad_norm": 8.473386188803657, + "learning_rate": 4.935108142405376e-07, + "loss": 1.1122, + "step": 150123 + }, + { + "epoch": 1.81, + "grad_norm": 7.805234450366908, + "learning_rate": 4.933295783319658e-07, + "loss": 1.2176, + "step": 150126 + }, + { + "epoch": 1.81, + "grad_norm": 17.924970785318934, + "learning_rate": 4.931483748662347e-07, + "loss": 1.3952, + "step": 150129 + }, + { + "epoch": 1.81, + "grad_norm": 12.143698486298149, + "learning_rate": 4.929672038439659e-07, + "loss": 1.1418, + "step": 150132 + }, + { + "epoch": 1.81, + "grad_norm": 3.3385216697351217, + "learning_rate": 4.927860652657734e-07, + "loss": 1.0806, + "step": 150135 + }, + { + "epoch": 1.81, + "grad_norm": 6.2429751850740045, + "learning_rate": 4.926049591322779e-07, + "loss": 1.5445, + "step": 150138 + }, + { + "epoch": 1.81, + "grad_norm": 8.013258778441822, + "learning_rate": 4.924238854440988e-07, + "loss": 1.0982, + "step": 150141 + }, + { + "epoch": 1.81, + "grad_norm": 5.240595358212867, + "learning_rate": 4.922428442018501e-07, + "loss": 0.9744, + "step": 150144 + }, + { + "epoch": 1.81, + "grad_norm": 10.757778286895416, + "learning_rate": 4.920618354061524e-07, + "loss": 1.4837, + "step": 150147 + }, + { + "epoch": 1.81, + "grad_norm": 11.532600499029321, + "learning_rate": 4.918808590576229e-07, + "loss": 1.2921, + "step": 150150 + }, + { + "epoch": 1.81, + "grad_norm": 6.066741384637286, + "learning_rate": 4.916999151568813e-07, + "loss": 1.2147, + "step": 150153 + }, + { + "epoch": 1.81, + "grad_norm": 15.021234377077068, + "learning_rate": 4.915190037045425e-07, + "loss": 1.1434, + "step": 150156 + }, + { + "epoch": 1.81, + "grad_norm": 11.773228638785781, + "learning_rate": 4.913381247012228e-07, + "loss": 1.2007, + "step": 150159 + }, + { + "epoch": 1.81, + "grad_norm": 21.20490464178315, + "learning_rate": 4.911572781475404e-07, + "loss": 1.1062, + "step": 150162 + }, + { + "epoch": 1.81, + "grad_norm": 4.60240779080237, + "learning_rate": 4.909764640441139e-07, + "loss": 0.9673, + "step": 150165 + }, + { + "epoch": 1.81, + "grad_norm": 4.853737224363958, + "learning_rate": 4.907956823915617e-07, + "loss": 0.8982, + "step": 150168 + }, + { + "epoch": 1.81, + "grad_norm": 8.027753306222957, + "learning_rate": 4.906149331904963e-07, + "loss": 1.0136, + "step": 150171 + }, + { + "epoch": 1.81, + "grad_norm": 9.942154684480816, + "learning_rate": 4.904342164415388e-07, + "loss": 1.1722, + "step": 150174 + }, + { + "epoch": 1.81, + "grad_norm": 11.648355973632539, + "learning_rate": 4.902535321453017e-07, + "loss": 0.83, + "step": 150177 + }, + { + "epoch": 1.81, + "grad_norm": 7.481317023296222, + "learning_rate": 4.900728803024058e-07, + "loss": 0.8089, + "step": 150180 + }, + { + "epoch": 1.81, + "grad_norm": 6.164405288750049, + "learning_rate": 4.898922609134648e-07, + "loss": 1.1053, + "step": 150183 + }, + { + "epoch": 1.81, + "grad_norm": 4.785005998806191, + "learning_rate": 4.897116739790953e-07, + "loss": 0.9917, + "step": 150186 + }, + { + "epoch": 1.81, + "grad_norm": 6.973835019182687, + "learning_rate": 4.895311194999153e-07, + "loss": 0.8391, + "step": 150189 + }, + { + "epoch": 1.81, + "grad_norm": 19.872860801782757, + "learning_rate": 4.893505974765389e-07, + "loss": 1.3809, + "step": 150192 + }, + { + "epoch": 1.81, + "grad_norm": 18.253197900110415, + "learning_rate": 4.891701079095845e-07, + "loss": 1.2214, + "step": 150195 + }, + { + "epoch": 1.81, + "grad_norm": 9.126460046908154, + "learning_rate": 4.889896507996672e-07, + "loss": 1.043, + "step": 150198 + }, + { + "epoch": 1.81, + "grad_norm": 29.686415433052208, + "learning_rate": 4.888092261474009e-07, + "loss": 1.61, + "step": 150201 + }, + { + "epoch": 1.81, + "grad_norm": 8.480034449295095, + "learning_rate": 4.886288339534029e-07, + "loss": 0.9441, + "step": 150204 + }, + { + "epoch": 1.81, + "grad_norm": 3.240851978560698, + "learning_rate": 4.884484742182905e-07, + "loss": 1.2937, + "step": 150207 + }, + { + "epoch": 1.81, + "grad_norm": 15.07125946980686, + "learning_rate": 4.882681469426754e-07, + "loss": 0.97, + "step": 150210 + }, + { + "epoch": 1.81, + "grad_norm": 11.478242730112518, + "learning_rate": 4.880878521271748e-07, + "loss": 1.1843, + "step": 150213 + }, + { + "epoch": 1.81, + "grad_norm": 10.792515935048447, + "learning_rate": 4.879075897724062e-07, + "loss": 1.4047, + "step": 150216 + }, + { + "epoch": 1.81, + "grad_norm": 6.773253465627761, + "learning_rate": 4.877273598789812e-07, + "loss": 1.1648, + "step": 150219 + }, + { + "epoch": 1.81, + "grad_norm": 6.340270102948373, + "learning_rate": 4.87547162447517e-07, + "loss": 1.2111, + "step": 150222 + }, + { + "epoch": 1.81, + "grad_norm": 4.368524326079433, + "learning_rate": 4.873669974786277e-07, + "loss": 1.0393, + "step": 150225 + }, + { + "epoch": 1.81, + "grad_norm": 17.148398935576818, + "learning_rate": 4.871868649729272e-07, + "loss": 0.8295, + "step": 150228 + }, + { + "epoch": 1.81, + "grad_norm": 19.46051822329464, + "learning_rate": 4.870067649310328e-07, + "loss": 1.287, + "step": 150231 + }, + { + "epoch": 1.81, + "grad_norm": 5.4900529119738115, + "learning_rate": 4.868266973535584e-07, + "loss": 1.0371, + "step": 150234 + }, + { + "epoch": 1.81, + "grad_norm": 5.059660403153995, + "learning_rate": 4.86646662241117e-07, + "loss": 1.1531, + "step": 150237 + }, + { + "epoch": 1.81, + "grad_norm": 8.441372318138512, + "learning_rate": 4.864666595943246e-07, + "loss": 1.1055, + "step": 150240 + }, + { + "epoch": 1.81, + "grad_norm": 9.336128889358141, + "learning_rate": 4.86286689413793e-07, + "loss": 1.127, + "step": 150243 + }, + { + "epoch": 1.81, + "grad_norm": 3.811829404217955, + "learning_rate": 4.861067517001395e-07, + "loss": 1.2016, + "step": 150246 + }, + { + "epoch": 1.81, + "grad_norm": 30.094492677530578, + "learning_rate": 4.859268464539768e-07, + "loss": 1.3019, + "step": 150249 + }, + { + "epoch": 1.81, + "grad_norm": 4.762434637840347, + "learning_rate": 4.85746973675918e-07, + "loss": 1.4385, + "step": 150252 + }, + { + "epoch": 1.81, + "grad_norm": 4.431903172401864, + "learning_rate": 4.855671333665779e-07, + "loss": 1.237, + "step": 150255 + }, + { + "epoch": 1.81, + "grad_norm": 1.9443583883314057, + "learning_rate": 4.853873255265695e-07, + "loss": 1.3957, + "step": 150258 + }, + { + "epoch": 1.81, + "grad_norm": 4.657295086461866, + "learning_rate": 4.85207550156509e-07, + "loss": 1.0352, + "step": 150261 + }, + { + "epoch": 1.81, + "grad_norm": 15.659815853879076, + "learning_rate": 4.850278072570081e-07, + "loss": 1.1065, + "step": 150264 + }, + { + "epoch": 1.81, + "grad_norm": 7.385883951510109, + "learning_rate": 4.848480968286773e-07, + "loss": 1.1193, + "step": 150267 + }, + { + "epoch": 1.81, + "grad_norm": 12.75924512434934, + "learning_rate": 4.84668418872134e-07, + "loss": 1.3322, + "step": 150270 + }, + { + "epoch": 1.81, + "grad_norm": 8.785566697456359, + "learning_rate": 4.844887733879899e-07, + "loss": 1.1029, + "step": 150273 + }, + { + "epoch": 1.81, + "grad_norm": 4.970987591216932, + "learning_rate": 4.843091603768591e-07, + "loss": 1.164, + "step": 150276 + }, + { + "epoch": 1.81, + "grad_norm": 8.142004458804225, + "learning_rate": 4.841295798393519e-07, + "loss": 1.439, + "step": 150279 + }, + { + "epoch": 1.81, + "grad_norm": 12.561076939070302, + "learning_rate": 4.839500317760848e-07, + "loss": 1.3514, + "step": 150282 + }, + { + "epoch": 1.81, + "grad_norm": 8.321997839499227, + "learning_rate": 4.837705161876672e-07, + "loss": 1.5921, + "step": 150285 + }, + { + "epoch": 1.81, + "grad_norm": 8.377772813587162, + "learning_rate": 4.83591033074714e-07, + "loss": 1.0137, + "step": 150288 + }, + { + "epoch": 1.81, + "grad_norm": 5.905625558681631, + "learning_rate": 4.834115824378361e-07, + "loss": 1.023, + "step": 150291 + }, + { + "epoch": 1.81, + "grad_norm": 4.860135729561862, + "learning_rate": 4.83232164277646e-07, + "loss": 0.7431, + "step": 150294 + }, + { + "epoch": 1.81, + "grad_norm": 13.398771175748156, + "learning_rate": 4.83052778594757e-07, + "loss": 0.8266, + "step": 150297 + }, + { + "epoch": 1.81, + "grad_norm": 10.722616025821775, + "learning_rate": 4.828734253897804e-07, + "loss": 1.0906, + "step": 150300 + }, + { + "epoch": 1.81, + "grad_norm": 14.03035197303701, + "learning_rate": 4.826941046633304e-07, + "loss": 1.128, + "step": 150303 + }, + { + "epoch": 1.81, + "grad_norm": 11.346441595494685, + "learning_rate": 4.825148164160176e-07, + "loss": 0.9527, + "step": 150306 + }, + { + "epoch": 1.81, + "grad_norm": 7.626209427743879, + "learning_rate": 4.823355606484514e-07, + "loss": 0.7487, + "step": 150309 + }, + { + "epoch": 1.81, + "grad_norm": 4.021808943911726, + "learning_rate": 4.821563373612459e-07, + "loss": 1.0798, + "step": 150312 + }, + { + "epoch": 1.81, + "grad_norm": 6.884389582283835, + "learning_rate": 4.81977146555015e-07, + "loss": 1.0457, + "step": 150315 + }, + { + "epoch": 1.81, + "grad_norm": 17.37691286229115, + "learning_rate": 4.817979882303647e-07, + "loss": 0.868, + "step": 150318 + }, + { + "epoch": 1.81, + "grad_norm": 11.491341920324587, + "learning_rate": 4.816188623879104e-07, + "loss": 1.1969, + "step": 150321 + }, + { + "epoch": 1.81, + "grad_norm": 6.221078906862955, + "learning_rate": 4.814397690282635e-07, + "loss": 0.9991, + "step": 150324 + }, + { + "epoch": 1.81, + "grad_norm": 5.500261384660351, + "learning_rate": 4.812607081520326e-07, + "loss": 0.8444, + "step": 150327 + }, + { + "epoch": 1.81, + "grad_norm": 7.693468359391576, + "learning_rate": 4.810816797598328e-07, + "loss": 1.2203, + "step": 150330 + }, + { + "epoch": 1.81, + "grad_norm": 5.326030960308402, + "learning_rate": 4.809026838522702e-07, + "loss": 0.8228, + "step": 150333 + }, + { + "epoch": 1.81, + "grad_norm": 4.469451509622822, + "learning_rate": 4.807237204299575e-07, + "loss": 1.2893, + "step": 150336 + }, + { + "epoch": 1.81, + "grad_norm": 14.785700281715535, + "learning_rate": 4.805447894935066e-07, + "loss": 1.0954, + "step": 150339 + }, + { + "epoch": 1.81, + "grad_norm": 7.266515522167178, + "learning_rate": 4.803658910435283e-07, + "loss": 1.2312, + "step": 150342 + }, + { + "epoch": 1.81, + "grad_norm": 8.217345088126562, + "learning_rate": 4.801870250806317e-07, + "loss": 1.0649, + "step": 150345 + }, + { + "epoch": 1.81, + "grad_norm": 9.412176631137225, + "learning_rate": 4.800081916054289e-07, + "loss": 1.3458, + "step": 150348 + }, + { + "epoch": 1.81, + "grad_norm": 11.920522520867642, + "learning_rate": 4.79829390618527e-07, + "loss": 1.0103, + "step": 150351 + }, + { + "epoch": 1.81, + "grad_norm": 5.708928871559846, + "learning_rate": 4.796506221205388e-07, + "loss": 0.742, + "step": 150354 + }, + { + "epoch": 1.81, + "grad_norm": 4.334627357828538, + "learning_rate": 4.794718861120751e-07, + "loss": 1.0331, + "step": 150357 + }, + { + "epoch": 1.81, + "grad_norm": 6.424958081044103, + "learning_rate": 4.79293182593743e-07, + "loss": 1.191, + "step": 150360 + }, + { + "epoch": 1.81, + "grad_norm": 17.916288036551176, + "learning_rate": 4.791145115661544e-07, + "loss": 1.1126, + "step": 150363 + }, + { + "epoch": 1.81, + "grad_norm": 6.596039719692755, + "learning_rate": 4.789358730299198e-07, + "loss": 0.9981, + "step": 150366 + }, + { + "epoch": 1.81, + "grad_norm": 14.220906880510979, + "learning_rate": 4.787572669856466e-07, + "loss": 0.9205, + "step": 150369 + }, + { + "epoch": 1.81, + "grad_norm": 7.449504854488995, + "learning_rate": 4.785786934339453e-07, + "loss": 1.0341, + "step": 150372 + }, + { + "epoch": 1.81, + "grad_norm": 23.30954060217813, + "learning_rate": 4.784001523754244e-07, + "loss": 1.6375, + "step": 150375 + }, + { + "epoch": 1.81, + "grad_norm": 6.966270719088305, + "learning_rate": 4.782216438106946e-07, + "loss": 0.9498, + "step": 150378 + }, + { + "epoch": 1.81, + "grad_norm": 17.643089094195794, + "learning_rate": 4.780431677403651e-07, + "loss": 1.009, + "step": 150381 + }, + { + "epoch": 1.81, + "grad_norm": 8.835199343053205, + "learning_rate": 4.778647241650447e-07, + "loss": 1.1395, + "step": 150384 + }, + { + "epoch": 1.81, + "grad_norm": 4.768945307721347, + "learning_rate": 4.776863130853415e-07, + "loss": 1.2224, + "step": 150387 + }, + { + "epoch": 1.81, + "grad_norm": 24.568611014485565, + "learning_rate": 4.775079345018662e-07, + "loss": 1.1747, + "step": 150390 + }, + { + "epoch": 1.81, + "grad_norm": 20.302181177778376, + "learning_rate": 4.773295884152251e-07, + "loss": 1.4091, + "step": 150393 + }, + { + "epoch": 1.81, + "grad_norm": 4.409860809657391, + "learning_rate": 4.771512748260288e-07, + "loss": 1.3301, + "step": 150396 + }, + { + "epoch": 1.81, + "grad_norm": 6.838594968990121, + "learning_rate": 4.769729937348843e-07, + "loss": 1.2465, + "step": 150399 + }, + { + "epoch": 1.81, + "grad_norm": 5.70716112673759, + "learning_rate": 4.767947451424015e-07, + "loss": 1.1249, + "step": 150402 + }, + { + "epoch": 1.81, + "grad_norm": 12.52369548745719, + "learning_rate": 4.766165290491864e-07, + "loss": 1.2923, + "step": 150405 + }, + { + "epoch": 1.81, + "grad_norm": 10.028802879936524, + "learning_rate": 4.7643834545585077e-07, + "loss": 0.9563, + "step": 150408 + }, + { + "epoch": 1.81, + "grad_norm": 7.652044050882534, + "learning_rate": 4.7626019436300187e-07, + "loss": 1.0613, + "step": 150411 + }, + { + "epoch": 1.81, + "grad_norm": 7.52374492221113, + "learning_rate": 4.7608207577124364e-07, + "loss": 1.4818, + "step": 150414 + }, + { + "epoch": 1.81, + "grad_norm": 10.356397695409001, + "learning_rate": 4.759039896811868e-07, + "loss": 1.1356, + "step": 150417 + }, + { + "epoch": 1.81, + "grad_norm": 4.173751651298151, + "learning_rate": 4.7572593609343966e-07, + "loss": 1.0198, + "step": 150420 + }, + { + "epoch": 1.81, + "grad_norm": 15.30384491932734, + "learning_rate": 4.755479150086106e-07, + "loss": 1.4385, + "step": 150423 + }, + { + "epoch": 1.81, + "grad_norm": 2.6737310996640478, + "learning_rate": 4.753699264273048e-07, + "loss": 0.9152, + "step": 150426 + }, + { + "epoch": 1.81, + "grad_norm": 4.684429445232525, + "learning_rate": 4.7519197035013066e-07, + "loss": 1.3667, + "step": 150429 + }, + { + "epoch": 1.81, + "grad_norm": 6.507092467903496, + "learning_rate": 4.7501404677769534e-07, + "loss": 0.805, + "step": 150432 + }, + { + "epoch": 1.81, + "grad_norm": 10.321647279865802, + "learning_rate": 4.748361557106063e-07, + "loss": 1.0672, + "step": 150435 + }, + { + "epoch": 1.81, + "grad_norm": 3.89823091733202, + "learning_rate": 4.746582971494718e-07, + "loss": 1.3178, + "step": 150438 + }, + { + "epoch": 1.81, + "grad_norm": 22.464202844346797, + "learning_rate": 4.744804710948958e-07, + "loss": 1.3651, + "step": 150441 + }, + { + "epoch": 1.81, + "grad_norm": 32.92555092840474, + "learning_rate": 4.743026775474868e-07, + "loss": 0.9555, + "step": 150444 + }, + { + "epoch": 1.81, + "grad_norm": 7.739825459861759, + "learning_rate": 4.741249165078521e-07, + "loss": 1.5153, + "step": 150447 + }, + { + "epoch": 1.81, + "grad_norm": 1.9839988721741728, + "learning_rate": 4.739471879765989e-07, + "loss": 1.062, + "step": 150450 + }, + { + "epoch": 1.81, + "grad_norm": 2.9148817368644515, + "learning_rate": 4.737694919543323e-07, + "loss": 1.1431, + "step": 150453 + }, + { + "epoch": 1.81, + "grad_norm": 6.341312527332917, + "learning_rate": 4.735918284416585e-07, + "loss": 0.9693, + "step": 150456 + }, + { + "epoch": 1.81, + "grad_norm": 11.488352736858674, + "learning_rate": 4.734141974391837e-07, + "loss": 0.9289, + "step": 150459 + }, + { + "epoch": 1.81, + "grad_norm": 9.498133332320029, + "learning_rate": 4.7323659894751516e-07, + "loss": 0.9815, + "step": 150462 + }, + { + "epoch": 1.81, + "grad_norm": 13.117824982950228, + "learning_rate": 4.730590329672602e-07, + "loss": 0.7254, + "step": 150465 + }, + { + "epoch": 1.81, + "grad_norm": 14.226069287346451, + "learning_rate": 4.7288149949902165e-07, + "loss": 0.8634, + "step": 150468 + }, + { + "epoch": 1.81, + "grad_norm": 9.039500657311637, + "learning_rate": 4.727039985434068e-07, + "loss": 1.3001, + "step": 150471 + }, + { + "epoch": 1.81, + "grad_norm": 5.108558679314607, + "learning_rate": 4.725265301010218e-07, + "loss": 1.1887, + "step": 150474 + }, + { + "epoch": 1.81, + "grad_norm": 3.471856860071242, + "learning_rate": 4.7234909417247286e-07, + "loss": 1.2412, + "step": 150477 + }, + { + "epoch": 1.81, + "grad_norm": 10.929847298916723, + "learning_rate": 4.72171690758364e-07, + "loss": 0.9519, + "step": 150480 + }, + { + "epoch": 1.81, + "grad_norm": 10.16381337392625, + "learning_rate": 4.7199431985930024e-07, + "loss": 1.2222, + "step": 150483 + }, + { + "epoch": 1.81, + "grad_norm": 21.352104328508567, + "learning_rate": 4.7181698147588884e-07, + "loss": 1.0101, + "step": 150486 + }, + { + "epoch": 1.81, + "grad_norm": 7.935595741686411, + "learning_rate": 4.716396756087327e-07, + "loss": 1.1642, + "step": 150489 + }, + { + "epoch": 1.81, + "grad_norm": 8.208092039915178, + "learning_rate": 4.714624022584413e-07, + "loss": 1.0963, + "step": 150492 + }, + { + "epoch": 1.81, + "grad_norm": 102.18856440888727, + "learning_rate": 4.712851614256142e-07, + "loss": 0.9549, + "step": 150495 + }, + { + "epoch": 1.81, + "grad_norm": 10.330970342323472, + "learning_rate": 4.7110795311085975e-07, + "loss": 1.2726, + "step": 150498 + }, + { + "epoch": 1.81, + "grad_norm": 4.019452833506536, + "learning_rate": 4.7093077731477974e-07, + "loss": 0.8689, + "step": 150501 + }, + { + "epoch": 1.81, + "grad_norm": 7.209308140912071, + "learning_rate": 4.7075363403798366e-07, + "loss": 1.0753, + "step": 150504 + }, + { + "epoch": 1.81, + "grad_norm": 6.301715920860859, + "learning_rate": 4.7057652328106995e-07, + "loss": 1.3585, + "step": 150507 + }, + { + "epoch": 1.81, + "grad_norm": 11.436892993412032, + "learning_rate": 4.70399445044647e-07, + "loss": 1.0174, + "step": 150510 + }, + { + "epoch": 1.81, + "grad_norm": 13.423803377867026, + "learning_rate": 4.702223993293187e-07, + "loss": 0.8872, + "step": 150513 + }, + { + "epoch": 1.81, + "grad_norm": 3.867368882518373, + "learning_rate": 4.7004538613568906e-07, + "loss": 1.0657, + "step": 150516 + }, + { + "epoch": 1.81, + "grad_norm": 7.906402502615275, + "learning_rate": 4.698684054643621e-07, + "loss": 0.6551, + "step": 150519 + }, + { + "epoch": 1.81, + "grad_norm": 7.972234571364403, + "learning_rate": 4.6969145731594055e-07, + "loss": 1.0184, + "step": 150522 + }, + { + "epoch": 1.81, + "grad_norm": 7.654246377432187, + "learning_rate": 4.695145416910285e-07, + "loss": 1.1389, + "step": 150525 + }, + { + "epoch": 1.81, + "grad_norm": 11.75894829081654, + "learning_rate": 4.6933765859023095e-07, + "loss": 1.4061, + "step": 150528 + }, + { + "epoch": 1.81, + "grad_norm": 17.453449878046, + "learning_rate": 4.69160808014153e-07, + "loss": 1.0611, + "step": 150531 + }, + { + "epoch": 1.81, + "grad_norm": 2.657278176223867, + "learning_rate": 4.689839899633941e-07, + "loss": 1.1046, + "step": 150534 + }, + { + "epoch": 1.81, + "grad_norm": 5.958736706453753, + "learning_rate": 4.688072044385605e-07, + "loss": 1.1053, + "step": 150537 + }, + { + "epoch": 1.81, + "grad_norm": 9.84373577679734, + "learning_rate": 4.686304514402551e-07, + "loss": 1.2153, + "step": 150540 + }, + { + "epoch": 1.81, + "grad_norm": 7.215764982266497, + "learning_rate": 4.684537309690806e-07, + "loss": 0.9859, + "step": 150543 + }, + { + "epoch": 1.81, + "grad_norm": 6.349329475694053, + "learning_rate": 4.6827704302564003e-07, + "loss": 1.3911, + "step": 150546 + }, + { + "epoch": 1.81, + "grad_norm": 8.544556495063057, + "learning_rate": 4.681003876105361e-07, + "loss": 1.5404, + "step": 150549 + }, + { + "epoch": 1.81, + "grad_norm": 6.993833874545053, + "learning_rate": 4.6792376472437285e-07, + "loss": 1.0715, + "step": 150552 + }, + { + "epoch": 1.81, + "grad_norm": 7.069181119585926, + "learning_rate": 4.6774717436775196e-07, + "loss": 1.4383, + "step": 150555 + }, + { + "epoch": 1.81, + "grad_norm": 5.972207634081218, + "learning_rate": 4.6757061654127746e-07, + "loss": 0.9972, + "step": 150558 + }, + { + "epoch": 1.81, + "grad_norm": 14.014331764834616, + "learning_rate": 4.67394091245551e-07, + "loss": 1.258, + "step": 150561 + }, + { + "epoch": 1.81, + "grad_norm": 7.053976764150226, + "learning_rate": 4.672175984811733e-07, + "loss": 1.0593, + "step": 150564 + }, + { + "epoch": 1.81, + "grad_norm": 12.765617139807349, + "learning_rate": 4.670411382487483e-07, + "loss": 1.5793, + "step": 150567 + }, + { + "epoch": 1.81, + "grad_norm": 15.896122064625484, + "learning_rate": 4.668647105488788e-07, + "loss": 1.3559, + "step": 150570 + }, + { + "epoch": 1.81, + "grad_norm": 11.088703131216716, + "learning_rate": 4.6668831538216664e-07, + "loss": 1.5961, + "step": 150573 + }, + { + "epoch": 1.81, + "grad_norm": 3.4004570608856346, + "learning_rate": 4.6651195274921345e-07, + "loss": 1.1705, + "step": 150576 + }, + { + "epoch": 1.81, + "grad_norm": 4.2702375199365, + "learning_rate": 4.663356226506199e-07, + "loss": 0.9496, + "step": 150579 + }, + { + "epoch": 1.81, + "grad_norm": 12.071843623987975, + "learning_rate": 4.6615932508699005e-07, + "loss": 1.1024, + "step": 150582 + }, + { + "epoch": 1.81, + "grad_norm": 10.31034760433699, + "learning_rate": 4.6598306005892435e-07, + "loss": 0.9723, + "step": 150585 + }, + { + "epoch": 1.81, + "grad_norm": 7.600397525828076, + "learning_rate": 4.6580682756702354e-07, + "loss": 1.2571, + "step": 150588 + }, + { + "epoch": 1.81, + "grad_norm": 9.382359204304933, + "learning_rate": 4.6563062761188937e-07, + "loss": 0.808, + "step": 150591 + }, + { + "epoch": 1.81, + "grad_norm": 12.115352716163025, + "learning_rate": 4.6545446019412466e-07, + "loss": 1.165, + "step": 150594 + }, + { + "epoch": 1.81, + "grad_norm": 10.030679167185234, + "learning_rate": 4.652783253143289e-07, + "loss": 1.4101, + "step": 150597 + }, + { + "epoch": 1.81, + "grad_norm": 5.056239507402975, + "learning_rate": 4.6510222297310503e-07, + "loss": 1.5504, + "step": 150600 + }, + { + "epoch": 1.81, + "grad_norm": 2.8411872027680745, + "learning_rate": 4.649261531710525e-07, + "loss": 1.2706, + "step": 150603 + }, + { + "epoch": 1.81, + "grad_norm": 12.474186753385533, + "learning_rate": 4.647501159087708e-07, + "loss": 1.0415, + "step": 150606 + }, + { + "epoch": 1.81, + "grad_norm": 8.773708500805185, + "learning_rate": 4.645741111868629e-07, + "loss": 1.4873, + "step": 150609 + }, + { + "epoch": 1.81, + "grad_norm": 4.3298693422167505, + "learning_rate": 4.643981390059304e-07, + "loss": 1.1759, + "step": 150612 + }, + { + "epoch": 1.81, + "grad_norm": 9.823985070879736, + "learning_rate": 4.6422219936657187e-07, + "loss": 1.0959, + "step": 150615 + }, + { + "epoch": 1.81, + "grad_norm": 5.908629714582404, + "learning_rate": 4.640462922693867e-07, + "loss": 1.5663, + "step": 150618 + }, + { + "epoch": 1.81, + "grad_norm": 29.002016190197565, + "learning_rate": 4.6387041771497777e-07, + "loss": 1.1199, + "step": 150621 + }, + { + "epoch": 1.81, + "grad_norm": 16.825748751136242, + "learning_rate": 4.636945757039446e-07, + "loss": 1.0676, + "step": 150624 + }, + { + "epoch": 1.81, + "grad_norm": 17.153035718859492, + "learning_rate": 4.6351876623688783e-07, + "loss": 0.9333, + "step": 150627 + }, + { + "epoch": 1.81, + "grad_norm": 5.272732541542239, + "learning_rate": 4.633429893144048e-07, + "loss": 1.3017, + "step": 150630 + }, + { + "epoch": 1.81, + "grad_norm": 10.543629258371439, + "learning_rate": 4.631672449370961e-07, + "loss": 1.1088, + "step": 150633 + }, + { + "epoch": 1.81, + "grad_norm": 7.2690393624038, + "learning_rate": 4.6299153310556343e-07, + "loss": 1.026, + "step": 150636 + }, + { + "epoch": 1.81, + "grad_norm": 5.834298372882021, + "learning_rate": 4.628158538204064e-07, + "loss": 1.2387, + "step": 150639 + }, + { + "epoch": 1.81, + "grad_norm": 8.17333353833305, + "learning_rate": 4.6264020708222337e-07, + "loss": 0.9638, + "step": 150642 + }, + { + "epoch": 1.81, + "grad_norm": 8.937111409590413, + "learning_rate": 4.6246459289161383e-07, + "loss": 1.6801, + "step": 150645 + }, + { + "epoch": 1.81, + "grad_norm": 3.698629333807327, + "learning_rate": 4.6228901124917624e-07, + "loss": 1.018, + "step": 150648 + }, + { + "epoch": 1.81, + "grad_norm": 6.022139912119526, + "learning_rate": 4.6211346215551124e-07, + "loss": 1.1742, + "step": 150651 + }, + { + "epoch": 1.81, + "grad_norm": 6.651163876655542, + "learning_rate": 4.619379456112183e-07, + "loss": 1.0143, + "step": 150654 + }, + { + "epoch": 1.81, + "grad_norm": 5.852903998628466, + "learning_rate": 4.6176246161689474e-07, + "loss": 0.9206, + "step": 150657 + }, + { + "epoch": 1.81, + "grad_norm": 3.0523196224347524, + "learning_rate": 4.615870101731401e-07, + "loss": 1.1455, + "step": 150660 + }, + { + "epoch": 1.81, + "grad_norm": 5.464209253358728, + "learning_rate": 4.614115912805539e-07, + "loss": 1.131, + "step": 150663 + }, + { + "epoch": 1.81, + "grad_norm": 11.271076619814266, + "learning_rate": 4.612362049397345e-07, + "loss": 1.2881, + "step": 150666 + }, + { + "epoch": 1.81, + "grad_norm": 54.73384271425255, + "learning_rate": 4.610608511512804e-07, + "loss": 1.468, + "step": 150669 + }, + { + "epoch": 1.81, + "grad_norm": 14.789368168228977, + "learning_rate": 4.608855299157877e-07, + "loss": 1.6881, + "step": 150672 + }, + { + "epoch": 1.81, + "grad_norm": 15.794835577066243, + "learning_rate": 4.607102412338582e-07, + "loss": 1.1346, + "step": 150675 + }, + { + "epoch": 1.81, + "grad_norm": 4.032159177293494, + "learning_rate": 4.6053498510608695e-07, + "loss": 1.0901, + "step": 150678 + }, + { + "epoch": 1.81, + "grad_norm": 2.7082454479028657, + "learning_rate": 4.603597615330757e-07, + "loss": 1.3462, + "step": 150681 + }, + { + "epoch": 1.81, + "grad_norm": 20.66962843785207, + "learning_rate": 4.6018457051541954e-07, + "loss": 0.7661, + "step": 150684 + }, + { + "epoch": 1.81, + "grad_norm": 12.145884177991382, + "learning_rate": 4.6000941205371797e-07, + "loss": 1.3205, + "step": 150687 + }, + { + "epoch": 1.81, + "grad_norm": 20.64552908478527, + "learning_rate": 4.598342861485672e-07, + "loss": 1.066, + "step": 150690 + }, + { + "epoch": 1.81, + "grad_norm": 22.494576088967605, + "learning_rate": 4.596591928005667e-07, + "loss": 1.001, + "step": 150693 + }, + { + "epoch": 1.81, + "grad_norm": 10.285229573517515, + "learning_rate": 4.594841320103116e-07, + "loss": 1.3265, + "step": 150696 + }, + { + "epoch": 1.81, + "grad_norm": 14.363379542139837, + "learning_rate": 4.5930910377840033e-07, + "loss": 0.9521, + "step": 150699 + }, + { + "epoch": 1.81, + "grad_norm": 10.49476058667244, + "learning_rate": 4.5913410810543123e-07, + "loss": 1.0087, + "step": 150702 + }, + { + "epoch": 1.81, + "grad_norm": 10.959175499069147, + "learning_rate": 4.5895914499200056e-07, + "loss": 1.6493, + "step": 150705 + }, + { + "epoch": 1.81, + "grad_norm": 9.065624718244226, + "learning_rate": 4.587842144387067e-07, + "loss": 0.9972, + "step": 150708 + }, + { + "epoch": 1.81, + "grad_norm": 3.2879788888813417, + "learning_rate": 4.586093164461458e-07, + "loss": 1.1727, + "step": 150711 + }, + { + "epoch": 1.81, + "grad_norm": 2.4886640335963044, + "learning_rate": 4.5843445101491414e-07, + "loss": 1.078, + "step": 150714 + }, + { + "epoch": 1.81, + "grad_norm": 7.567579441897281, + "learning_rate": 4.5825961814560784e-07, + "loss": 1.1348, + "step": 150717 + }, + { + "epoch": 1.81, + "grad_norm": 4.928878097456064, + "learning_rate": 4.580848178388253e-07, + "loss": 1.1786, + "step": 150720 + }, + { + "epoch": 1.81, + "grad_norm": 5.329949901078119, + "learning_rate": 4.5791005009516164e-07, + "loss": 1.0081, + "step": 150723 + }, + { + "epoch": 1.81, + "grad_norm": 5.922544486492711, + "learning_rate": 4.577353149152142e-07, + "loss": 1.4115, + "step": 150726 + }, + { + "epoch": 1.81, + "grad_norm": 5.5109004373998145, + "learning_rate": 4.5756061229958017e-07, + "loss": 1.0958, + "step": 150729 + }, + { + "epoch": 1.81, + "grad_norm": 4.498014488334978, + "learning_rate": 4.5738594224885245e-07, + "loss": 0.7538, + "step": 150732 + }, + { + "epoch": 1.81, + "grad_norm": 5.649349603467025, + "learning_rate": 4.572113047636317e-07, + "loss": 1.3852, + "step": 150735 + }, + { + "epoch": 1.81, + "grad_norm": 9.450595816655921, + "learning_rate": 4.570366998445097e-07, + "loss": 0.8522, + "step": 150738 + }, + { + "epoch": 1.81, + "grad_norm": 15.884569108307412, + "learning_rate": 4.568621274920848e-07, + "loss": 1.1777, + "step": 150741 + }, + { + "epoch": 1.81, + "grad_norm": 6.742045743675224, + "learning_rate": 4.56687587706951e-07, + "loss": 1.4472, + "step": 150744 + }, + { + "epoch": 1.81, + "grad_norm": 14.169343551866582, + "learning_rate": 4.5651308048970665e-07, + "loss": 0.905, + "step": 150747 + }, + { + "epoch": 1.81, + "grad_norm": 2.800249610489671, + "learning_rate": 4.563386058409447e-07, + "loss": 1.0606, + "step": 150750 + }, + { + "epoch": 1.81, + "grad_norm": 6.58543627677062, + "learning_rate": 4.561641637612624e-07, + "loss": 1.0985, + "step": 150753 + }, + { + "epoch": 1.81, + "grad_norm": 12.13623359091182, + "learning_rate": 4.559897542512526e-07, + "loss": 1.2301, + "step": 150756 + }, + { + "epoch": 1.81, + "grad_norm": 7.09593636141303, + "learning_rate": 4.558153773115115e-07, + "loss": 1.1498, + "step": 150759 + }, + { + "epoch": 1.81, + "grad_norm": 3.920546303899985, + "learning_rate": 4.556410329426375e-07, + "loss": 1.2015, + "step": 150762 + }, + { + "epoch": 1.81, + "grad_norm": 5.215917989818129, + "learning_rate": 4.5546672114522015e-07, + "loss": 0.9674, + "step": 150765 + }, + { + "epoch": 1.81, + "grad_norm": 6.116632779093203, + "learning_rate": 4.552924419198579e-07, + "loss": 1.2228, + "step": 150768 + }, + { + "epoch": 1.81, + "grad_norm": 20.343914604124294, + "learning_rate": 4.5511819526714575e-07, + "loss": 1.0686, + "step": 150771 + }, + { + "epoch": 1.81, + "grad_norm": 14.220938046761775, + "learning_rate": 4.5494398118767546e-07, + "loss": 0.9939, + "step": 150774 + }, + { + "epoch": 1.81, + "grad_norm": 10.294331587961004, + "learning_rate": 4.547697996820444e-07, + "loss": 1.1398, + "step": 150777 + }, + { + "epoch": 1.81, + "grad_norm": 8.81872304208035, + "learning_rate": 4.545956507508453e-07, + "loss": 1.0928, + "step": 150780 + }, + { + "epoch": 1.81, + "grad_norm": 15.804744476743586, + "learning_rate": 4.5442153439467227e-07, + "loss": 0.8224, + "step": 150783 + }, + { + "epoch": 1.81, + "grad_norm": 10.160403651479571, + "learning_rate": 4.542474506141203e-07, + "loss": 1.1268, + "step": 150786 + }, + { + "epoch": 1.81, + "grad_norm": 12.403129669630529, + "learning_rate": 4.540733994097857e-07, + "loss": 1.2588, + "step": 150789 + }, + { + "epoch": 1.81, + "grad_norm": 7.730485969410019, + "learning_rate": 4.5389938078225783e-07, + "loss": 1.1597, + "step": 150792 + }, + { + "epoch": 1.81, + "grad_norm": 9.563423640886725, + "learning_rate": 4.537253947321341e-07, + "loss": 1.0936, + "step": 150795 + }, + { + "epoch": 1.81, + "grad_norm": 8.917508809101122, + "learning_rate": 4.5355144126000615e-07, + "loss": 1.1629, + "step": 150798 + }, + { + "epoch": 1.81, + "grad_norm": 7.57803770893056, + "learning_rate": 4.533775203664703e-07, + "loss": 1.1845, + "step": 150801 + }, + { + "epoch": 1.81, + "grad_norm": 7.986056920095494, + "learning_rate": 4.5320363205211603e-07, + "loss": 0.9618, + "step": 150804 + }, + { + "epoch": 1.81, + "grad_norm": 12.846557539987597, + "learning_rate": 4.5302977631753953e-07, + "loss": 0.9956, + "step": 150807 + }, + { + "epoch": 1.81, + "grad_norm": 15.930499992101007, + "learning_rate": 4.5285595316333366e-07, + "loss": 1.3756, + "step": 150810 + }, + { + "epoch": 1.81, + "grad_norm": 6.708363318549427, + "learning_rate": 4.5268216259009234e-07, + "loss": 0.8637, + "step": 150813 + }, + { + "epoch": 1.81, + "grad_norm": 10.120683886330756, + "learning_rate": 4.525084045984074e-07, + "loss": 1.1083, + "step": 150816 + }, + { + "epoch": 1.81, + "grad_norm": 2.2951467421182694, + "learning_rate": 4.5233467918887274e-07, + "loss": 1.0383, + "step": 150819 + }, + { + "epoch": 1.81, + "grad_norm": 7.980023708413909, + "learning_rate": 4.521609863620802e-07, + "loss": 1.0559, + "step": 150822 + }, + { + "epoch": 1.81, + "grad_norm": 4.364188251207119, + "learning_rate": 4.519873261186225e-07, + "loss": 1.3983, + "step": 150825 + }, + { + "epoch": 1.81, + "grad_norm": 8.978654710504705, + "learning_rate": 4.5181369845909486e-07, + "loss": 1.0055, + "step": 150828 + }, + { + "epoch": 1.81, + "grad_norm": 7.833014672856197, + "learning_rate": 4.516401033840867e-07, + "loss": 1.2164, + "step": 150831 + }, + { + "epoch": 1.81, + "grad_norm": 4.4136281025169035, + "learning_rate": 4.51466540894191e-07, + "loss": 0.9149, + "step": 150834 + }, + { + "epoch": 1.81, + "grad_norm": 14.84461290978191, + "learning_rate": 4.5129301099000155e-07, + "loss": 1.1473, + "step": 150837 + }, + { + "epoch": 1.81, + "grad_norm": 12.399067156195303, + "learning_rate": 4.5111951367210806e-07, + "loss": 1.0114, + "step": 150840 + }, + { + "epoch": 1.81, + "grad_norm": 4.339761986365535, + "learning_rate": 4.509460489411066e-07, + "loss": 0.9322, + "step": 150843 + }, + { + "epoch": 1.81, + "grad_norm": 14.913734784272638, + "learning_rate": 4.5077261679758456e-07, + "loss": 1.5099, + "step": 150846 + }, + { + "epoch": 1.81, + "grad_norm": 6.251945679055333, + "learning_rate": 4.5059921724213696e-07, + "loss": 1.0624, + "step": 150849 + }, + { + "epoch": 1.81, + "grad_norm": 17.21237157151385, + "learning_rate": 4.5042585027535334e-07, + "loss": 1.2918, + "step": 150852 + }, + { + "epoch": 1.81, + "grad_norm": 18.308802798801725, + "learning_rate": 4.502525158978288e-07, + "loss": 1.2279, + "step": 150855 + }, + { + "epoch": 1.81, + "grad_norm": 7.010014223036043, + "learning_rate": 4.500792141101507e-07, + "loss": 1.1157, + "step": 150858 + }, + { + "epoch": 1.81, + "grad_norm": 10.613814881351491, + "learning_rate": 4.4990594491291394e-07, + "loss": 1.0736, + "step": 150861 + }, + { + "epoch": 1.81, + "grad_norm": 5.304721721835674, + "learning_rate": 4.497327083067071e-07, + "loss": 1.1783, + "step": 150864 + }, + { + "epoch": 1.81, + "grad_norm": 10.983606745811162, + "learning_rate": 4.4955950429212304e-07, + "loss": 1.4767, + "step": 150867 + }, + { + "epoch": 1.81, + "grad_norm": 2.238838807872086, + "learning_rate": 4.493863328697523e-07, + "loss": 1.34, + "step": 150870 + }, + { + "epoch": 1.81, + "grad_norm": 10.898339812009665, + "learning_rate": 4.492131940401856e-07, + "loss": 1.1053, + "step": 150873 + }, + { + "epoch": 1.81, + "grad_norm": 7.044923187570345, + "learning_rate": 4.490400878040135e-07, + "loss": 1.2046, + "step": 150876 + }, + { + "epoch": 1.81, + "grad_norm": 4.348978276158294, + "learning_rate": 4.4886701416182897e-07, + "loss": 1.0158, + "step": 150879 + }, + { + "epoch": 1.81, + "grad_norm": 2.833570587368739, + "learning_rate": 4.486939731142215e-07, + "loss": 0.8175, + "step": 150882 + }, + { + "epoch": 1.81, + "grad_norm": 5.172137153577967, + "learning_rate": 4.485209646617783e-07, + "loss": 1.0348, + "step": 150885 + }, + { + "epoch": 1.81, + "grad_norm": 12.69030993580916, + "learning_rate": 4.4834798880509457e-07, + "loss": 0.8954, + "step": 150888 + }, + { + "epoch": 1.81, + "grad_norm": 10.016415985052213, + "learning_rate": 4.4817504554475755e-07, + "loss": 1.0139, + "step": 150891 + }, + { + "epoch": 1.81, + "grad_norm": 13.246799128952318, + "learning_rate": 4.480021348813579e-07, + "loss": 1.1382, + "step": 150894 + }, + { + "epoch": 1.81, + "grad_norm": 5.392999968213272, + "learning_rate": 4.478292568154885e-07, + "loss": 1.2386, + "step": 150897 + }, + { + "epoch": 1.81, + "grad_norm": 4.77777909295737, + "learning_rate": 4.4765641134773665e-07, + "loss": 1.1617, + "step": 150900 + }, + { + "epoch": 1.81, + "grad_norm": 7.058812834823542, + "learning_rate": 4.4748359847869295e-07, + "loss": 1.1614, + "step": 150903 + }, + { + "epoch": 1.81, + "grad_norm": 7.3400135193814755, + "learning_rate": 4.473108182089458e-07, + "loss": 1.143, + "step": 150906 + }, + { + "epoch": 1.81, + "grad_norm": 5.305278015110055, + "learning_rate": 4.471380705390871e-07, + "loss": 1.307, + "step": 150909 + }, + { + "epoch": 1.81, + "grad_norm": 5.8382973503629945, + "learning_rate": 4.46965355469704e-07, + "loss": 1.001, + "step": 150912 + }, + { + "epoch": 1.81, + "grad_norm": 10.850413313816127, + "learning_rate": 4.467926730013883e-07, + "loss": 1.1556, + "step": 150915 + }, + { + "epoch": 1.81, + "grad_norm": 9.336814511822656, + "learning_rate": 4.466200231347273e-07, + "loss": 1.1995, + "step": 150918 + }, + { + "epoch": 1.81, + "grad_norm": 18.24877784568919, + "learning_rate": 4.464474058703117e-07, + "loss": 1.2105, + "step": 150921 + }, + { + "epoch": 1.81, + "grad_norm": 5.8912770044929, + "learning_rate": 4.462748212087309e-07, + "loss": 1.1759, + "step": 150924 + }, + { + "epoch": 1.81, + "grad_norm": 10.98867847567608, + "learning_rate": 4.461022691505712e-07, + "loss": 1.2922, + "step": 150927 + }, + { + "epoch": 1.81, + "grad_norm": 5.635669085404078, + "learning_rate": 4.459297496964232e-07, + "loss": 1.3425, + "step": 150930 + }, + { + "epoch": 1.81, + "grad_norm": 5.501396107443103, + "learning_rate": 4.4575726284687536e-07, + "loss": 1.0779, + "step": 150933 + }, + { + "epoch": 1.81, + "grad_norm": 7.176089523881173, + "learning_rate": 4.4558480860251827e-07, + "loss": 1.17, + "step": 150936 + }, + { + "epoch": 1.81, + "grad_norm": 3.328938273851354, + "learning_rate": 4.454123869639371e-07, + "loss": 1.1845, + "step": 150939 + }, + { + "epoch": 1.82, + "grad_norm": 10.302121117716597, + "learning_rate": 4.4523999793172123e-07, + "loss": 0.8474, + "step": 150942 + }, + { + "epoch": 1.82, + "grad_norm": 11.900392924205013, + "learning_rate": 4.450676415064625e-07, + "loss": 1.1045, + "step": 150945 + }, + { + "epoch": 1.82, + "grad_norm": 7.746969066074524, + "learning_rate": 4.4489531768874385e-07, + "loss": 0.9075, + "step": 150948 + }, + { + "epoch": 1.82, + "grad_norm": 11.372903611817952, + "learning_rate": 4.447230264791569e-07, + "loss": 1.2091, + "step": 150951 + }, + { + "epoch": 1.82, + "grad_norm": 17.956662411113136, + "learning_rate": 4.445507678782879e-07, + "loss": 1.0913, + "step": 150954 + }, + { + "epoch": 1.82, + "grad_norm": 15.178664908367876, + "learning_rate": 4.4437854188672524e-07, + "loss": 1.3708, + "step": 150957 + }, + { + "epoch": 1.82, + "grad_norm": 4.234836232133232, + "learning_rate": 4.442063485050563e-07, + "loss": 1.0753, + "step": 150960 + }, + { + "epoch": 1.82, + "grad_norm": 32.003900296991496, + "learning_rate": 4.4403418773387054e-07, + "loss": 1.1313, + "step": 150963 + }, + { + "epoch": 1.82, + "grad_norm": 4.434482233440406, + "learning_rate": 4.438620595737542e-07, + "loss": 1.0765, + "step": 150966 + }, + { + "epoch": 1.82, + "grad_norm": 5.464230742163984, + "learning_rate": 4.436899640252934e-07, + "loss": 1.1807, + "step": 150969 + }, + { + "epoch": 1.82, + "grad_norm": 3.1029226680831776, + "learning_rate": 4.435179010890767e-07, + "loss": 1.2839, + "step": 150972 + }, + { + "epoch": 1.82, + "grad_norm": 5.147462428522865, + "learning_rate": 4.4334587076569016e-07, + "loss": 1.0443, + "step": 150975 + }, + { + "epoch": 1.82, + "grad_norm": 4.974661494141094, + "learning_rate": 4.4317387305572337e-07, + "loss": 1.0105, + "step": 150978 + }, + { + "epoch": 1.82, + "grad_norm": 6.126038699949621, + "learning_rate": 4.430019079597614e-07, + "loss": 1.2207, + "step": 150981 + }, + { + "epoch": 1.82, + "grad_norm": 5.96593269835436, + "learning_rate": 4.4282997547839157e-07, + "loss": 1.0245, + "step": 150984 + }, + { + "epoch": 1.82, + "grad_norm": 14.912533142948002, + "learning_rate": 4.426580756122012e-07, + "loss": 1.2696, + "step": 150987 + }, + { + "epoch": 1.82, + "grad_norm": 13.310247038640492, + "learning_rate": 4.4248620836177645e-07, + "loss": 1.042, + "step": 150990 + }, + { + "epoch": 1.82, + "grad_norm": 7.429439956716792, + "learning_rate": 4.423143737277025e-07, + "loss": 1.1603, + "step": 150993 + }, + { + "epoch": 1.82, + "grad_norm": 2.626685047287884, + "learning_rate": 4.4214257171056654e-07, + "loss": 1.3722, + "step": 150996 + }, + { + "epoch": 1.82, + "grad_norm": 6.3932338445733565, + "learning_rate": 4.4197080231095477e-07, + "loss": 1.3493, + "step": 150999 + }, + { + "epoch": 1.82, + "grad_norm": 9.043901632307794, + "learning_rate": 4.417990655294546e-07, + "loss": 1.0475, + "step": 151002 + }, + { + "epoch": 1.82, + "grad_norm": 3.0001715188705607, + "learning_rate": 4.4162736136665327e-07, + "loss": 1.0119, + "step": 151005 + }, + { + "epoch": 1.82, + "grad_norm": 19.438692104423936, + "learning_rate": 4.414556898231337e-07, + "loss": 1.4569, + "step": 151008 + }, + { + "epoch": 1.82, + "grad_norm": 5.748193034907505, + "learning_rate": 4.41284050899482e-07, + "loss": 1.009, + "step": 151011 + }, + { + "epoch": 1.82, + "grad_norm": 7.037806941479022, + "learning_rate": 4.4111244459628555e-07, + "loss": 1.4787, + "step": 151014 + }, + { + "epoch": 1.82, + "grad_norm": 6.347819695508325, + "learning_rate": 4.409408709141294e-07, + "loss": 0.9638, + "step": 151017 + }, + { + "epoch": 1.82, + "grad_norm": 5.456037987430992, + "learning_rate": 4.4076932985359867e-07, + "loss": 0.8321, + "step": 151020 + }, + { + "epoch": 1.82, + "grad_norm": 5.60576602141803, + "learning_rate": 4.405978214152784e-07, + "loss": 1.3128, + "step": 151023 + }, + { + "epoch": 1.82, + "grad_norm": 2.841310339625104, + "learning_rate": 4.404263455997537e-07, + "loss": 1.0346, + "step": 151026 + }, + { + "epoch": 1.82, + "grad_norm": 11.30084205238357, + "learning_rate": 4.4025490240761304e-07, + "loss": 1.1042, + "step": 151029 + }, + { + "epoch": 1.82, + "grad_norm": 4.521176218976542, + "learning_rate": 4.400834918394381e-07, + "loss": 0.9221, + "step": 151032 + }, + { + "epoch": 1.82, + "grad_norm": 6.878694238466584, + "learning_rate": 4.39912113895814e-07, + "loss": 0.9116, + "step": 151035 + }, + { + "epoch": 1.82, + "grad_norm": 6.3059414839950065, + "learning_rate": 4.3974076857732694e-07, + "loss": 1.3538, + "step": 151038 + }, + { + "epoch": 1.82, + "grad_norm": 14.541137751431195, + "learning_rate": 4.395694558845598e-07, + "loss": 0.9181, + "step": 151041 + }, + { + "epoch": 1.82, + "grad_norm": 11.849689349125367, + "learning_rate": 4.39398175818101e-07, + "loss": 1.0818, + "step": 151044 + }, + { + "epoch": 1.82, + "grad_norm": 9.81155331218927, + "learning_rate": 4.3922692837853e-07, + "loss": 1.2748, + "step": 151047 + }, + { + "epoch": 1.82, + "grad_norm": 10.686655454372314, + "learning_rate": 4.3905571356643526e-07, + "loss": 1.1397, + "step": 151050 + }, + { + "epoch": 1.82, + "grad_norm": 4.031407398504679, + "learning_rate": 4.3888453138239863e-07, + "loss": 1.2981, + "step": 151053 + }, + { + "epoch": 1.82, + "grad_norm": 7.328374683941797, + "learning_rate": 4.387133818270051e-07, + "loss": 1.2788, + "step": 151056 + }, + { + "epoch": 1.82, + "grad_norm": 8.244849977986174, + "learning_rate": 4.3854226490084084e-07, + "loss": 0.9985, + "step": 151059 + }, + { + "epoch": 1.82, + "grad_norm": 8.70761171655128, + "learning_rate": 4.3837118060448545e-07, + "loss": 1.3071, + "step": 151062 + }, + { + "epoch": 1.82, + "grad_norm": 7.60286261523073, + "learning_rate": 4.3820012893852514e-07, + "loss": 1.5152, + "step": 151065 + }, + { + "epoch": 1.82, + "grad_norm": 4.925044290600644, + "learning_rate": 4.38029109903545e-07, + "loss": 1.3433, + "step": 151068 + }, + { + "epoch": 1.82, + "grad_norm": 5.071513831262699, + "learning_rate": 4.378581235001267e-07, + "loss": 1.1749, + "step": 151071 + }, + { + "epoch": 1.82, + "grad_norm": 6.094764849473625, + "learning_rate": 4.376871697288554e-07, + "loss": 1.0135, + "step": 151074 + }, + { + "epoch": 1.82, + "grad_norm": 8.217841629186353, + "learning_rate": 4.375162485903117e-07, + "loss": 0.9514, + "step": 151077 + }, + { + "epoch": 1.82, + "grad_norm": 3.4809864810465148, + "learning_rate": 4.373453600850819e-07, + "loss": 0.9992, + "step": 151080 + }, + { + "epoch": 1.82, + "grad_norm": 6.465296472565697, + "learning_rate": 4.371745042137465e-07, + "loss": 1.1521, + "step": 151083 + }, + { + "epoch": 1.82, + "grad_norm": 5.668867832438216, + "learning_rate": 4.3700368097689185e-07, + "loss": 1.1563, + "step": 151086 + }, + { + "epoch": 1.82, + "grad_norm": 6.135769028522673, + "learning_rate": 4.3683289037509735e-07, + "loss": 1.07, + "step": 151089 + }, + { + "epoch": 1.82, + "grad_norm": 8.093017328969593, + "learning_rate": 4.366621324089482e-07, + "loss": 0.9434, + "step": 151092 + }, + { + "epoch": 1.82, + "grad_norm": 9.315543873798754, + "learning_rate": 4.3649140707902715e-07, + "loss": 1.076, + "step": 151095 + }, + { + "epoch": 1.82, + "grad_norm": 10.417023967594451, + "learning_rate": 4.363207143859172e-07, + "loss": 1.2804, + "step": 151098 + }, + { + "epoch": 1.82, + "grad_norm": 4.938070315604431, + "learning_rate": 4.361500543301977e-07, + "loss": 1.1557, + "step": 151101 + }, + { + "epoch": 1.82, + "grad_norm": 5.917565204307676, + "learning_rate": 4.3597942691245285e-07, + "loss": 1.2695, + "step": 151104 + }, + { + "epoch": 1.82, + "grad_norm": 10.961832459698973, + "learning_rate": 4.3580883213326653e-07, + "loss": 1.5786, + "step": 151107 + }, + { + "epoch": 1.82, + "grad_norm": 2.61999430321914, + "learning_rate": 4.3563826999321826e-07, + "loss": 0.9797, + "step": 151110 + }, + { + "epoch": 1.82, + "grad_norm": 9.601982335440267, + "learning_rate": 4.354677404928942e-07, + "loss": 1.078, + "step": 151113 + }, + { + "epoch": 1.82, + "grad_norm": 5.379341005948606, + "learning_rate": 4.3529724363287283e-07, + "loss": 0.8351, + "step": 151116 + }, + { + "epoch": 1.82, + "grad_norm": 33.61891230298151, + "learning_rate": 4.351267794137348e-07, + "loss": 1.2836, + "step": 151119 + }, + { + "epoch": 1.82, + "grad_norm": 5.649175835197161, + "learning_rate": 4.349563478360652e-07, + "loss": 1.4607, + "step": 151122 + }, + { + "epoch": 1.82, + "grad_norm": 5.940685792991346, + "learning_rate": 4.347859489004447e-07, + "loss": 1.0375, + "step": 151125 + }, + { + "epoch": 1.82, + "grad_norm": 3.8577762805015796, + "learning_rate": 4.346155826074527e-07, + "loss": 1.2894, + "step": 151128 + }, + { + "epoch": 1.82, + "grad_norm": 6.197100295550628, + "learning_rate": 4.344452489576734e-07, + "loss": 1.425, + "step": 151131 + }, + { + "epoch": 1.82, + "grad_norm": 9.824308223396539, + "learning_rate": 4.3427494795168613e-07, + "loss": 0.9322, + "step": 151134 + }, + { + "epoch": 1.82, + "grad_norm": 5.998191609945587, + "learning_rate": 4.3410467959007495e-07, + "loss": 1.1575, + "step": 151137 + }, + { + "epoch": 1.82, + "grad_norm": 4.462281092928937, + "learning_rate": 4.3393444387341833e-07, + "loss": 0.9721, + "step": 151140 + }, + { + "epoch": 1.82, + "grad_norm": 5.780841638928286, + "learning_rate": 4.3376424080229687e-07, + "loss": 0.7896, + "step": 151143 + }, + { + "epoch": 1.82, + "grad_norm": 10.180717993715778, + "learning_rate": 4.335940703772912e-07, + "loss": 1.2878, + "step": 151146 + }, + { + "epoch": 1.82, + "grad_norm": 9.285226457804939, + "learning_rate": 4.3342393259898416e-07, + "loss": 1.3765, + "step": 151149 + }, + { + "epoch": 1.82, + "grad_norm": 7.096834831156826, + "learning_rate": 4.3325382746795654e-07, + "loss": 0.9569, + "step": 151152 + }, + { + "epoch": 1.82, + "grad_norm": 2.1355744690260456, + "learning_rate": 4.3308375498478663e-07, + "loss": 0.6909, + "step": 151155 + }, + { + "epoch": 1.82, + "grad_norm": 3.773667597469311, + "learning_rate": 4.329137151500562e-07, + "loss": 1.205, + "step": 151158 + }, + { + "epoch": 1.82, + "grad_norm": 8.640244940843413, + "learning_rate": 4.3274370796434484e-07, + "loss": 1.1091, + "step": 151161 + }, + { + "epoch": 1.82, + "grad_norm": 7.002694624187747, + "learning_rate": 4.325737334282332e-07, + "loss": 1.0085, + "step": 151164 + }, + { + "epoch": 1.82, + "grad_norm": 4.897184030253855, + "learning_rate": 4.3240379154230185e-07, + "loss": 1.102, + "step": 151167 + }, + { + "epoch": 1.82, + "grad_norm": 6.870962714669128, + "learning_rate": 4.322338823071293e-07, + "loss": 1.0322, + "step": 151170 + }, + { + "epoch": 1.82, + "grad_norm": 28.385418287245507, + "learning_rate": 4.320640057232961e-07, + "loss": 1.2552, + "step": 151173 + }, + { + "epoch": 1.82, + "grad_norm": 10.65917204127831, + "learning_rate": 4.3189416179138187e-07, + "loss": 1.067, + "step": 151176 + }, + { + "epoch": 1.82, + "grad_norm": 6.704606322078721, + "learning_rate": 4.3172435051196837e-07, + "loss": 0.8549, + "step": 151179 + }, + { + "epoch": 1.82, + "grad_norm": 10.634795271372456, + "learning_rate": 4.3155457188563286e-07, + "loss": 1.3524, + "step": 151182 + }, + { + "epoch": 1.82, + "grad_norm": 7.996938687004658, + "learning_rate": 4.3138482591295384e-07, + "loss": 1.1518, + "step": 151185 + }, + { + "epoch": 1.82, + "grad_norm": 11.599550387319539, + "learning_rate": 4.3121511259451074e-07, + "loss": 1.1963, + "step": 151188 + }, + { + "epoch": 1.82, + "grad_norm": 6.856584209202385, + "learning_rate": 4.3104543193088543e-07, + "loss": 1.1115, + "step": 151191 + }, + { + "epoch": 1.82, + "grad_norm": 3.3724256163149633, + "learning_rate": 4.3087578392265627e-07, + "loss": 1.1384, + "step": 151194 + }, + { + "epoch": 1.82, + "grad_norm": 15.005406010564894, + "learning_rate": 4.307061685703995e-07, + "loss": 1.0906, + "step": 151197 + }, + { + "epoch": 1.82, + "grad_norm": 21.67827244544552, + "learning_rate": 4.305365858746968e-07, + "loss": 1.2569, + "step": 151200 + }, + { + "epoch": 1.82, + "grad_norm": 4.896598889456922, + "learning_rate": 4.303670358361245e-07, + "loss": 1.209, + "step": 151203 + }, + { + "epoch": 1.82, + "grad_norm": 4.049732433749402, + "learning_rate": 4.3019751845526425e-07, + "loss": 1.0771, + "step": 151206 + }, + { + "epoch": 1.82, + "grad_norm": 2.7529970688836536, + "learning_rate": 4.300280337326923e-07, + "loss": 1.0981, + "step": 151209 + }, + { + "epoch": 1.82, + "grad_norm": 10.222821377937194, + "learning_rate": 4.29858581668986e-07, + "loss": 0.8781, + "step": 151212 + }, + { + "epoch": 1.82, + "grad_norm": 4.88461914865831, + "learning_rate": 4.296891622647259e-07, + "loss": 0.8152, + "step": 151215 + }, + { + "epoch": 1.82, + "grad_norm": 9.857534236935622, + "learning_rate": 4.295197755204905e-07, + "loss": 1.0419, + "step": 151218 + }, + { + "epoch": 1.82, + "grad_norm": 11.313869615339813, + "learning_rate": 4.2935042143685715e-07, + "loss": 1.2669, + "step": 151221 + }, + { + "epoch": 1.82, + "grad_norm": 9.222751345086671, + "learning_rate": 4.2918110001440306e-07, + "loss": 1.0903, + "step": 151224 + }, + { + "epoch": 1.82, + "grad_norm": 7.195459657111738, + "learning_rate": 4.290118112537056e-07, + "loss": 1.0057, + "step": 151227 + }, + { + "epoch": 1.82, + "grad_norm": 3.36503887161743, + "learning_rate": 4.288425551553432e-07, + "loss": 0.9883, + "step": 151230 + }, + { + "epoch": 1.82, + "grad_norm": 3.6996373552490955, + "learning_rate": 4.2867333171989547e-07, + "loss": 1.3982, + "step": 151233 + }, + { + "epoch": 1.82, + "grad_norm": 4.825082494026228, + "learning_rate": 4.2850414094793626e-07, + "loss": 1.1527, + "step": 151236 + }, + { + "epoch": 1.82, + "grad_norm": 13.92238614505654, + "learning_rate": 4.2833498284004513e-07, + "loss": 0.9404, + "step": 151239 + }, + { + "epoch": 1.82, + "grad_norm": 5.2950646758093844, + "learning_rate": 4.281658573968006e-07, + "loss": 1.0934, + "step": 151242 + }, + { + "epoch": 1.82, + "grad_norm": 7.7082861585359375, + "learning_rate": 4.279967646187766e-07, + "loss": 1.1145, + "step": 151245 + }, + { + "epoch": 1.82, + "grad_norm": 3.022802595519115, + "learning_rate": 4.278277045065526e-07, + "loss": 0.8357, + "step": 151248 + }, + { + "epoch": 1.82, + "grad_norm": 6.616252999017657, + "learning_rate": 4.2765867706070386e-07, + "loss": 0.875, + "step": 151251 + }, + { + "epoch": 1.82, + "grad_norm": 10.88569239698454, + "learning_rate": 4.2748968228180864e-07, + "loss": 0.8369, + "step": 151254 + }, + { + "epoch": 1.82, + "grad_norm": 12.480800603533044, + "learning_rate": 4.2732072017044215e-07, + "loss": 1.4091, + "step": 151257 + }, + { + "epoch": 1.82, + "grad_norm": 2.847186138373955, + "learning_rate": 4.271517907271838e-07, + "loss": 0.9644, + "step": 151260 + }, + { + "epoch": 1.82, + "grad_norm": 7.1870169945228435, + "learning_rate": 4.269828939526077e-07, + "loss": 1.4846, + "step": 151263 + }, + { + "epoch": 1.82, + "grad_norm": 12.221014865735942, + "learning_rate": 4.2681402984729113e-07, + "loss": 1.1204, + "step": 151266 + }, + { + "epoch": 1.82, + "grad_norm": 4.908329724080943, + "learning_rate": 4.266451984118092e-07, + "loss": 1.3788, + "step": 151269 + }, + { + "epoch": 1.82, + "grad_norm": 2.32553780377978, + "learning_rate": 4.2647639964673804e-07, + "loss": 1.1137, + "step": 151272 + }, + { + "epoch": 1.82, + "grad_norm": 9.476541187519034, + "learning_rate": 4.263076335526561e-07, + "loss": 1.5362, + "step": 151275 + }, + { + "epoch": 1.82, + "grad_norm": 11.783324583771584, + "learning_rate": 4.2613890013013746e-07, + "loss": 1.0868, + "step": 151278 + }, + { + "epoch": 1.82, + "grad_norm": 5.85389016859363, + "learning_rate": 4.2597019937975714e-07, + "loss": 1.4425, + "step": 151281 + }, + { + "epoch": 1.82, + "grad_norm": 8.610758261108115, + "learning_rate": 4.258015313020947e-07, + "loss": 1.0032, + "step": 151284 + }, + { + "epoch": 1.82, + "grad_norm": 8.024294653053747, + "learning_rate": 4.2563289589772183e-07, + "loss": 1.0143, + "step": 151287 + }, + { + "epoch": 1.82, + "grad_norm": 8.015202690121688, + "learning_rate": 4.2546429316721374e-07, + "loss": 1.1072, + "step": 151290 + }, + { + "epoch": 1.82, + "grad_norm": 5.920606681197902, + "learning_rate": 4.2529572311114764e-07, + "loss": 1.1595, + "step": 151293 + }, + { + "epoch": 1.82, + "grad_norm": 4.793363187722343, + "learning_rate": 4.2512718573009873e-07, + "loss": 1.0842, + "step": 151296 + }, + { + "epoch": 1.82, + "grad_norm": 11.39532306895024, + "learning_rate": 4.24958681024642e-07, + "loss": 1.1103, + "step": 151299 + }, + { + "epoch": 1.82, + "grad_norm": 12.065065040411334, + "learning_rate": 4.247902089953537e-07, + "loss": 1.1062, + "step": 151302 + }, + { + "epoch": 1.82, + "grad_norm": 5.950600075480017, + "learning_rate": 4.2462176964280564e-07, + "loss": 1.4316, + "step": 151305 + }, + { + "epoch": 1.82, + "grad_norm": 3.595582586962943, + "learning_rate": 4.244533629675762e-07, + "loss": 1.572, + "step": 151308 + }, + { + "epoch": 1.82, + "grad_norm": 8.585397082339751, + "learning_rate": 4.242849889702361e-07, + "loss": 1.0508, + "step": 151311 + }, + { + "epoch": 1.82, + "grad_norm": 4.218589965918334, + "learning_rate": 4.241166476513647e-07, + "loss": 1.011, + "step": 151314 + }, + { + "epoch": 1.82, + "grad_norm": 3.6502049961415692, + "learning_rate": 4.2394833901153176e-07, + "loss": 1.1613, + "step": 151317 + }, + { + "epoch": 1.82, + "grad_norm": 15.161881401223656, + "learning_rate": 4.237800630513145e-07, + "loss": 1.3327, + "step": 151320 + }, + { + "epoch": 1.82, + "grad_norm": 5.713530367872131, + "learning_rate": 4.2361181977128576e-07, + "loss": 1.0177, + "step": 151323 + }, + { + "epoch": 1.82, + "grad_norm": 10.698073070151793, + "learning_rate": 4.2344360917202286e-07, + "loss": 1.4119, + "step": 151326 + }, + { + "epoch": 1.82, + "grad_norm": 5.436990848963434, + "learning_rate": 4.2327543125409544e-07, + "loss": 0.7358, + "step": 151329 + }, + { + "epoch": 1.82, + "grad_norm": 4.6096856809388695, + "learning_rate": 4.231072860180807e-07, + "loss": 0.8797, + "step": 151332 + }, + { + "epoch": 1.82, + "grad_norm": 7.6384067602032975, + "learning_rate": 4.229391734645505e-07, + "loss": 0.9045, + "step": 151335 + }, + { + "epoch": 1.82, + "grad_norm": 9.623567750073907, + "learning_rate": 4.2277109359407875e-07, + "loss": 1.5876, + "step": 151338 + }, + { + "epoch": 1.82, + "grad_norm": 11.674747529485325, + "learning_rate": 4.226030464072406e-07, + "loss": 1.0181, + "step": 151341 + }, + { + "epoch": 1.82, + "grad_norm": 9.326013694061812, + "learning_rate": 4.224350319046078e-07, + "loss": 1.3696, + "step": 151344 + }, + { + "epoch": 1.82, + "grad_norm": 5.0361786514438025, + "learning_rate": 4.222670500867543e-07, + "loss": 0.926, + "step": 151347 + }, + { + "epoch": 1.82, + "grad_norm": 3.3368068006932634, + "learning_rate": 4.220991009542541e-07, + "loss": 1.2505, + "step": 151350 + }, + { + "epoch": 1.82, + "grad_norm": 8.626994285794463, + "learning_rate": 4.21931184507679e-07, + "loss": 1.0154, + "step": 151353 + }, + { + "epoch": 1.82, + "grad_norm": 3.673691320033625, + "learning_rate": 4.217633007476041e-07, + "loss": 1.0027, + "step": 151356 + }, + { + "epoch": 1.82, + "grad_norm": 8.659170541132251, + "learning_rate": 4.215954496746e-07, + "loss": 0.9026, + "step": 151359 + }, + { + "epoch": 1.82, + "grad_norm": 6.615606303730207, + "learning_rate": 4.214276312892396e-07, + "loss": 1.4723, + "step": 151362 + }, + { + "epoch": 1.82, + "grad_norm": 3.4169081933254226, + "learning_rate": 4.2125984559209797e-07, + "loss": 1.1913, + "step": 151365 + }, + { + "epoch": 1.82, + "grad_norm": 9.563099802779826, + "learning_rate": 4.210920925837458e-07, + "loss": 1.3785, + "step": 151368 + }, + { + "epoch": 1.82, + "grad_norm": 3.677787302243002, + "learning_rate": 4.2092437226475605e-07, + "loss": 1.1198, + "step": 151371 + }, + { + "epoch": 1.82, + "grad_norm": 5.011641581416303, + "learning_rate": 4.207566846357014e-07, + "loss": 1.0199, + "step": 151374 + }, + { + "epoch": 1.82, + "grad_norm": 47.693290573700594, + "learning_rate": 4.2058902969715265e-07, + "loss": 1.1094, + "step": 151377 + }, + { + "epoch": 1.82, + "grad_norm": 14.053033583941692, + "learning_rate": 4.2042140744968373e-07, + "loss": 1.2792, + "step": 151380 + }, + { + "epoch": 1.82, + "grad_norm": 7.231561228880759, + "learning_rate": 4.2025381789386645e-07, + "loss": 1.1619, + "step": 151383 + }, + { + "epoch": 1.82, + "grad_norm": 5.790314378657757, + "learning_rate": 4.200862610302714e-07, + "loss": 1.2453, + "step": 151386 + }, + { + "epoch": 1.82, + "grad_norm": 14.47198117103374, + "learning_rate": 4.199187368594715e-07, + "loss": 1.1712, + "step": 151389 + }, + { + "epoch": 1.82, + "grad_norm": 6.921448691922812, + "learning_rate": 4.197512453820396e-07, + "loss": 1.3192, + "step": 151392 + }, + { + "epoch": 1.82, + "grad_norm": 9.02967917027582, + "learning_rate": 4.1958378659854527e-07, + "loss": 1.4675, + "step": 151395 + }, + { + "epoch": 1.82, + "grad_norm": 5.169742906891048, + "learning_rate": 4.1941636050956026e-07, + "loss": 1.2926, + "step": 151398 + }, + { + "epoch": 1.82, + "grad_norm": 21.464094255964408, + "learning_rate": 4.192489671156552e-07, + "loss": 1.252, + "step": 151401 + }, + { + "epoch": 1.82, + "grad_norm": 3.743411994271618, + "learning_rate": 4.1908160641740303e-07, + "loss": 0.8626, + "step": 151404 + }, + { + "epoch": 1.82, + "grad_norm": 1.9533967551812237, + "learning_rate": 4.1891427841537436e-07, + "loss": 1.1449, + "step": 151407 + }, + { + "epoch": 1.82, + "grad_norm": 11.716648986782197, + "learning_rate": 4.1874698311014094e-07, + "loss": 0.9915, + "step": 151410 + }, + { + "epoch": 1.82, + "grad_norm": 6.514968703173968, + "learning_rate": 4.1857972050227235e-07, + "loss": 1.0181, + "step": 151413 + }, + { + "epoch": 1.82, + "grad_norm": 12.851326210405075, + "learning_rate": 4.1841249059234034e-07, + "loss": 1.0517, + "step": 151416 + }, + { + "epoch": 1.82, + "grad_norm": 9.641657327471952, + "learning_rate": 4.182452933809145e-07, + "loss": 1.2112, + "step": 151419 + }, + { + "epoch": 1.82, + "grad_norm": 16.341854369038344, + "learning_rate": 4.180781288685676e-07, + "loss": 1.2768, + "step": 151422 + }, + { + "epoch": 1.82, + "grad_norm": 7.240292030187008, + "learning_rate": 4.1791099705586704e-07, + "loss": 0.8724, + "step": 151425 + }, + { + "epoch": 1.82, + "grad_norm": 23.84947548928566, + "learning_rate": 4.177438979433857e-07, + "loss": 0.8528, + "step": 151428 + }, + { + "epoch": 1.82, + "grad_norm": 6.546806646427359, + "learning_rate": 4.1757683153169195e-07, + "loss": 1.2029, + "step": 151431 + }, + { + "epoch": 1.82, + "grad_norm": 3.683665901179426, + "learning_rate": 4.174097978213576e-07, + "loss": 0.9584, + "step": 151434 + }, + { + "epoch": 1.82, + "grad_norm": 3.3152893301127246, + "learning_rate": 4.1724279681295334e-07, + "loss": 0.9143, + "step": 151437 + }, + { + "epoch": 1.82, + "grad_norm": 16.541086208348716, + "learning_rate": 4.170758285070453e-07, + "loss": 1.1535, + "step": 151440 + }, + { + "epoch": 1.82, + "grad_norm": 7.40155926811817, + "learning_rate": 4.1690889290420645e-07, + "loss": 0.8953, + "step": 151443 + }, + { + "epoch": 1.82, + "grad_norm": 9.448851288331443, + "learning_rate": 4.1674199000500626e-07, + "loss": 0.9553, + "step": 151446 + }, + { + "epoch": 1.82, + "grad_norm": 3.4915465506591037, + "learning_rate": 4.165751198100143e-07, + "loss": 1.171, + "step": 151449 + }, + { + "epoch": 1.82, + "grad_norm": 4.291049498927383, + "learning_rate": 4.164082823197979e-07, + "loss": 1.1302, + "step": 151452 + }, + { + "epoch": 1.82, + "grad_norm": 10.456633608372307, + "learning_rate": 4.162414775349288e-07, + "loss": 1.3801, + "step": 151455 + }, + { + "epoch": 1.82, + "grad_norm": 7.033770722391139, + "learning_rate": 4.160747054559766e-07, + "loss": 0.864, + "step": 151458 + }, + { + "epoch": 1.82, + "grad_norm": 5.847176595647557, + "learning_rate": 4.159079660835075e-07, + "loss": 1.0156, + "step": 151461 + }, + { + "epoch": 1.82, + "grad_norm": 7.36645087026123, + "learning_rate": 4.157412594180943e-07, + "loss": 0.9413, + "step": 151464 + }, + { + "epoch": 1.82, + "grad_norm": 14.253736417996672, + "learning_rate": 4.155745854603022e-07, + "loss": 0.941, + "step": 151467 + }, + { + "epoch": 1.82, + "grad_norm": 8.332846731116424, + "learning_rate": 4.1540794421070177e-07, + "loss": 1.2321, + "step": 151470 + }, + { + "epoch": 1.82, + "grad_norm": 3.987975979715006, + "learning_rate": 4.152413356698615e-07, + "loss": 0.9673, + "step": 151473 + }, + { + "epoch": 1.82, + "grad_norm": 12.09663105073857, + "learning_rate": 4.15074759838352e-07, + "loss": 1.2673, + "step": 151476 + }, + { + "epoch": 1.82, + "grad_norm": 13.334980277933996, + "learning_rate": 4.149082167167395e-07, + "loss": 1.0893, + "step": 151479 + }, + { + "epoch": 1.82, + "grad_norm": 30.381327050132864, + "learning_rate": 4.147417063055914e-07, + "loss": 1.1035, + "step": 151482 + }, + { + "epoch": 1.82, + "grad_norm": 57.77711285493121, + "learning_rate": 4.1457522860547715e-07, + "loss": 1.1616, + "step": 151485 + }, + { + "epoch": 1.82, + "grad_norm": 5.151490619160406, + "learning_rate": 4.1440878361696524e-07, + "loss": 0.7498, + "step": 151488 + }, + { + "epoch": 1.82, + "grad_norm": 6.06680305090137, + "learning_rate": 4.1424237134062515e-07, + "loss": 0.9757, + "step": 151491 + }, + { + "epoch": 1.82, + "grad_norm": 5.106562801126452, + "learning_rate": 4.1407599177702094e-07, + "loss": 1.4273, + "step": 151494 + }, + { + "epoch": 1.82, + "grad_norm": 4.804050561228231, + "learning_rate": 4.1390964492672325e-07, + "loss": 1.5882, + "step": 151497 + }, + { + "epoch": 1.82, + "grad_norm": 5.4876661472626695, + "learning_rate": 4.137433307902994e-07, + "loss": 1.1809, + "step": 151500 + }, + { + "epoch": 1.82, + "grad_norm": 5.3103191357244155, + "learning_rate": 4.135770493683178e-07, + "loss": 1.5593, + "step": 151503 + }, + { + "epoch": 1.82, + "grad_norm": 16.3487725292613, + "learning_rate": 4.1341080066134245e-07, + "loss": 1.1538, + "step": 151506 + }, + { + "epoch": 1.82, + "grad_norm": 4.670400248266501, + "learning_rate": 4.13244584669944e-07, + "loss": 1.0504, + "step": 151509 + }, + { + "epoch": 1.82, + "grad_norm": 7.975452416291426, + "learning_rate": 4.1307840139468756e-07, + "loss": 1.3581, + "step": 151512 + }, + { + "epoch": 1.82, + "grad_norm": 14.006823495408026, + "learning_rate": 4.129122508361416e-07, + "loss": 1.0858, + "step": 151515 + }, + { + "epoch": 1.82, + "grad_norm": 9.914213115903296, + "learning_rate": 4.1274613299487454e-07, + "loss": 1.0823, + "step": 151518 + }, + { + "epoch": 1.82, + "grad_norm": 7.748709586148677, + "learning_rate": 4.1258004787145144e-07, + "loss": 1.0655, + "step": 151521 + }, + { + "epoch": 1.82, + "grad_norm": 6.861844029397832, + "learning_rate": 4.1241399546643856e-07, + "loss": 0.9511, + "step": 151524 + }, + { + "epoch": 1.82, + "grad_norm": 2.999480691362457, + "learning_rate": 4.122479757804021e-07, + "loss": 1.1152, + "step": 151527 + }, + { + "epoch": 1.82, + "grad_norm": 6.082507430893651, + "learning_rate": 4.120819888139116e-07, + "loss": 0.9211, + "step": 151530 + }, + { + "epoch": 1.82, + "grad_norm": 8.200725542568751, + "learning_rate": 4.1191603456753104e-07, + "loss": 1.1471, + "step": 151533 + }, + { + "epoch": 1.82, + "grad_norm": 11.850705567827433, + "learning_rate": 4.1175011304182665e-07, + "loss": 1.037, + "step": 151536 + }, + { + "epoch": 1.82, + "grad_norm": 3.5324396866364456, + "learning_rate": 4.115842242373658e-07, + "loss": 1.1321, + "step": 151539 + }, + { + "epoch": 1.82, + "grad_norm": 7.001634323684826, + "learning_rate": 4.1141836815471567e-07, + "loss": 1.3073, + "step": 151542 + }, + { + "epoch": 1.82, + "grad_norm": 10.785072639522426, + "learning_rate": 4.1125254479444046e-07, + "loss": 1.0382, + "step": 151545 + }, + { + "epoch": 1.82, + "grad_norm": 28.459717806908788, + "learning_rate": 4.1108675415710507e-07, + "loss": 0.898, + "step": 151548 + }, + { + "epoch": 1.82, + "grad_norm": 9.087257325689553, + "learning_rate": 4.1092099624327696e-07, + "loss": 0.9592, + "step": 151551 + }, + { + "epoch": 1.82, + "grad_norm": 2.520330412387026, + "learning_rate": 4.107552710535212e-07, + "loss": 1.1667, + "step": 151554 + }, + { + "epoch": 1.82, + "grad_norm": 5.946526579880331, + "learning_rate": 4.105895785884062e-07, + "loss": 1.0495, + "step": 151557 + }, + { + "epoch": 1.82, + "grad_norm": 4.608350317530322, + "learning_rate": 4.1042391884849267e-07, + "loss": 1.3328, + "step": 151560 + }, + { + "epoch": 1.82, + "grad_norm": 6.343586188238253, + "learning_rate": 4.1025829183434896e-07, + "loss": 0.688, + "step": 151563 + }, + { + "epoch": 1.82, + "grad_norm": 14.367756879234191, + "learning_rate": 4.1009269754653915e-07, + "loss": 1.2551, + "step": 151566 + }, + { + "epoch": 1.82, + "grad_norm": 8.106051813209632, + "learning_rate": 4.099271359856283e-07, + "loss": 1.1634, + "step": 151569 + }, + { + "epoch": 1.82, + "grad_norm": 9.592504451364574, + "learning_rate": 4.097616071521826e-07, + "loss": 1.0263, + "step": 151572 + }, + { + "epoch": 1.82, + "grad_norm": 7.184228137772405, + "learning_rate": 4.09596111046765e-07, + "loss": 0.9966, + "step": 151575 + }, + { + "epoch": 1.82, + "grad_norm": 3.622358099713615, + "learning_rate": 4.0943064766994165e-07, + "loss": 1.0101, + "step": 151578 + }, + { + "epoch": 1.82, + "grad_norm": 4.722332708781385, + "learning_rate": 4.0926521702227773e-07, + "loss": 1.3896, + "step": 151581 + }, + { + "epoch": 1.82, + "grad_norm": 16.63388613929151, + "learning_rate": 4.0909981910433716e-07, + "loss": 1.1015, + "step": 151584 + }, + { + "epoch": 1.82, + "grad_norm": 8.43402467468358, + "learning_rate": 4.0893445391668505e-07, + "loss": 1.1522, + "step": 151587 + }, + { + "epoch": 1.82, + "grad_norm": 2.2078719749037132, + "learning_rate": 4.087691214598832e-07, + "loss": 1.1054, + "step": 151590 + }, + { + "epoch": 1.82, + "grad_norm": 3.725967799664331, + "learning_rate": 4.086038217344979e-07, + "loss": 1.3762, + "step": 151593 + }, + { + "epoch": 1.82, + "grad_norm": 7.259709385239046, + "learning_rate": 4.0843855474109297e-07, + "loss": 1.343, + "step": 151596 + }, + { + "epoch": 1.82, + "grad_norm": 7.777839101408525, + "learning_rate": 4.082733204802336e-07, + "loss": 1.24, + "step": 151599 + }, + { + "epoch": 1.82, + "grad_norm": 8.33449882049068, + "learning_rate": 4.081081189524816e-07, + "loss": 1.2482, + "step": 151602 + }, + { + "epoch": 1.82, + "grad_norm": 9.126268523389976, + "learning_rate": 4.0794295015840204e-07, + "loss": 1.0758, + "step": 151605 + }, + { + "epoch": 1.82, + "grad_norm": 12.621073022354107, + "learning_rate": 4.0777781409855776e-07, + "loss": 0.967, + "step": 151608 + }, + { + "epoch": 1.82, + "grad_norm": 6.21585381123823, + "learning_rate": 4.0761271077351284e-07, + "loss": 1.3384, + "step": 151611 + }, + { + "epoch": 1.82, + "grad_norm": 16.368990847677885, + "learning_rate": 4.074476401838301e-07, + "loss": 1.2553, + "step": 151614 + }, + { + "epoch": 1.82, + "grad_norm": 7.915426446008946, + "learning_rate": 4.0728260233007354e-07, + "loss": 0.9616, + "step": 151617 + }, + { + "epoch": 1.82, + "grad_norm": 6.335425983534771, + "learning_rate": 4.071175972128061e-07, + "loss": 1.2589, + "step": 151620 + }, + { + "epoch": 1.82, + "grad_norm": 2.968540362298304, + "learning_rate": 4.069526248325906e-07, + "loss": 1.3576, + "step": 151623 + }, + { + "epoch": 1.82, + "grad_norm": 6.17352104825977, + "learning_rate": 4.06787685189991e-07, + "loss": 1.3604, + "step": 151626 + }, + { + "epoch": 1.82, + "grad_norm": 11.101573503913913, + "learning_rate": 4.0662277828557027e-07, + "loss": 1.2842, + "step": 151629 + }, + { + "epoch": 1.82, + "grad_norm": 2.494811308355341, + "learning_rate": 4.064579041198902e-07, + "loss": 1.3087, + "step": 151632 + }, + { + "epoch": 1.82, + "grad_norm": 5.001892595507749, + "learning_rate": 4.062930626935124e-07, + "loss": 0.9263, + "step": 151635 + }, + { + "epoch": 1.82, + "grad_norm": 6.3114408054726185, + "learning_rate": 4.061282540070022e-07, + "loss": 1.0733, + "step": 151638 + }, + { + "epoch": 1.82, + "grad_norm": 3.903151227481215, + "learning_rate": 4.0596347806092007e-07, + "loss": 0.9437, + "step": 151641 + }, + { + "epoch": 1.82, + "grad_norm": 8.841501944073027, + "learning_rate": 4.0579873485582786e-07, + "loss": 1.3661, + "step": 151644 + }, + { + "epoch": 1.82, + "grad_norm": 5.020968439575385, + "learning_rate": 4.056340243922907e-07, + "loss": 0.9492, + "step": 151647 + }, + { + "epoch": 1.82, + "grad_norm": 9.618923241935143, + "learning_rate": 4.0546934667086704e-07, + "loss": 1.3628, + "step": 151650 + }, + { + "epoch": 1.82, + "grad_norm": 14.159034026403496, + "learning_rate": 4.0530470169212186e-07, + "loss": 1.1733, + "step": 151653 + }, + { + "epoch": 1.82, + "grad_norm": 11.975226873947715, + "learning_rate": 4.0514008945661486e-07, + "loss": 0.9755, + "step": 151656 + }, + { + "epoch": 1.82, + "grad_norm": 6.270720442533961, + "learning_rate": 4.049755099649078e-07, + "loss": 1.0946, + "step": 151659 + }, + { + "epoch": 1.82, + "grad_norm": 14.068922980518495, + "learning_rate": 4.048109632175645e-07, + "loss": 1.1986, + "step": 151662 + }, + { + "epoch": 1.82, + "grad_norm": 9.576588596734535, + "learning_rate": 4.0464644921514584e-07, + "loss": 1.2035, + "step": 151665 + }, + { + "epoch": 1.82, + "grad_norm": 8.731063278967593, + "learning_rate": 4.044819679582113e-07, + "loss": 1.2925, + "step": 151668 + }, + { + "epoch": 1.82, + "grad_norm": 8.338512808872784, + "learning_rate": 4.043175194473248e-07, + "loss": 0.938, + "step": 151671 + }, + { + "epoch": 1.82, + "grad_norm": 7.711815903599811, + "learning_rate": 4.0415310368304484e-07, + "loss": 0.8373, + "step": 151674 + }, + { + "epoch": 1.82, + "grad_norm": 17.80211018654459, + "learning_rate": 4.039887206659343e-07, + "loss": 0.88, + "step": 151677 + }, + { + "epoch": 1.82, + "grad_norm": 9.930773242733215, + "learning_rate": 4.03824370396555e-07, + "loss": 1.0914, + "step": 151680 + }, + { + "epoch": 1.82, + "grad_norm": 6.008025534812151, + "learning_rate": 4.0366005287546417e-07, + "loss": 1.0038, + "step": 151683 + }, + { + "epoch": 1.82, + "grad_norm": 9.102842976445197, + "learning_rate": 4.03495768103227e-07, + "loss": 1.3694, + "step": 151686 + }, + { + "epoch": 1.82, + "grad_norm": 13.919330185506338, + "learning_rate": 4.033315160804008e-07, + "loss": 0.927, + "step": 151689 + }, + { + "epoch": 1.82, + "grad_norm": 5.395845081685401, + "learning_rate": 4.031672968075484e-07, + "loss": 1.0375, + "step": 151692 + }, + { + "epoch": 1.82, + "grad_norm": 8.054103017088748, + "learning_rate": 4.030031102852305e-07, + "loss": 0.9551, + "step": 151695 + }, + { + "epoch": 1.82, + "grad_norm": 13.34908562791311, + "learning_rate": 4.028389565140045e-07, + "loss": 1.2022, + "step": 151698 + }, + { + "epoch": 1.82, + "grad_norm": 8.764853493364786, + "learning_rate": 4.026748354944321e-07, + "loss": 1.1785, + "step": 151701 + }, + { + "epoch": 1.82, + "grad_norm": 13.736936961232816, + "learning_rate": 4.0251074722707284e-07, + "loss": 0.9942, + "step": 151704 + }, + { + "epoch": 1.82, + "grad_norm": 6.606933426933652, + "learning_rate": 4.023466917124896e-07, + "loss": 0.9116, + "step": 151707 + }, + { + "epoch": 1.82, + "grad_norm": 18.76418915933838, + "learning_rate": 4.021826689512387e-07, + "loss": 0.9619, + "step": 151710 + }, + { + "epoch": 1.82, + "grad_norm": 18.44262159243285, + "learning_rate": 4.020186789438818e-07, + "loss": 1.1196, + "step": 151713 + }, + { + "epoch": 1.82, + "grad_norm": 3.6319366885988456, + "learning_rate": 4.018547216909774e-07, + "loss": 1.1251, + "step": 151716 + }, + { + "epoch": 1.82, + "grad_norm": 4.6337159355892785, + "learning_rate": 4.016907971930861e-07, + "loss": 1.1431, + "step": 151719 + }, + { + "epoch": 1.82, + "grad_norm": 4.825651015037728, + "learning_rate": 4.0152690545076644e-07, + "loss": 1.349, + "step": 151722 + }, + { + "epoch": 1.82, + "grad_norm": 11.152516931477878, + "learning_rate": 4.013630464645779e-07, + "loss": 1.0351, + "step": 151725 + }, + { + "epoch": 1.82, + "grad_norm": 10.07129367530079, + "learning_rate": 4.0119922023508007e-07, + "loss": 1.0153, + "step": 151728 + }, + { + "epoch": 1.82, + "grad_norm": 12.130543259861746, + "learning_rate": 4.0103542676283134e-07, + "loss": 1.1695, + "step": 151731 + }, + { + "epoch": 1.82, + "grad_norm": 18.116210572170964, + "learning_rate": 4.0087166604839245e-07, + "loss": 1.0003, + "step": 151734 + }, + { + "epoch": 1.82, + "grad_norm": 3.277830016782482, + "learning_rate": 4.007079380923207e-07, + "loss": 1.0508, + "step": 151737 + }, + { + "epoch": 1.82, + "grad_norm": 9.111639304940214, + "learning_rate": 4.005442428951745e-07, + "loss": 1.0654, + "step": 151740 + }, + { + "epoch": 1.82, + "grad_norm": 4.820411994291042, + "learning_rate": 4.003805804575134e-07, + "loss": 0.9592, + "step": 151743 + }, + { + "epoch": 1.82, + "grad_norm": 8.461353633750091, + "learning_rate": 4.0021695077989587e-07, + "loss": 1.062, + "step": 151746 + }, + { + "epoch": 1.82, + "grad_norm": 6.984116787709967, + "learning_rate": 4.0005335386287927e-07, + "loss": 1.4731, + "step": 151749 + }, + { + "epoch": 1.82, + "grad_norm": 12.291336105273801, + "learning_rate": 3.998897897070231e-07, + "loss": 1.1535, + "step": 151752 + }, + { + "epoch": 1.82, + "grad_norm": 7.764109855217703, + "learning_rate": 3.997262583128858e-07, + "loss": 1.2878, + "step": 151755 + }, + { + "epoch": 1.82, + "grad_norm": 3.480951984255534, + "learning_rate": 3.9956275968102366e-07, + "loss": 0.9869, + "step": 151758 + }, + { + "epoch": 1.82, + "grad_norm": 33.07914658127511, + "learning_rate": 3.9939929381199725e-07, + "loss": 1.1689, + "step": 151761 + }, + { + "epoch": 1.82, + "grad_norm": 6.693753223046088, + "learning_rate": 3.9923586070636066e-07, + "loss": 0.8699, + "step": 151764 + }, + { + "epoch": 1.82, + "grad_norm": 7.613741899916914, + "learning_rate": 3.990724603646745e-07, + "loss": 1.1036, + "step": 151767 + }, + { + "epoch": 1.82, + "grad_norm": 8.758667731801632, + "learning_rate": 3.989090927874961e-07, + "loss": 1.0683, + "step": 151770 + }, + { + "epoch": 1.83, + "grad_norm": 24.88598530824879, + "learning_rate": 3.9874575797538396e-07, + "loss": 0.996, + "step": 151773 + }, + { + "epoch": 1.83, + "grad_norm": 3.9967595499434276, + "learning_rate": 3.985824559288931e-07, + "loss": 1.1279, + "step": 151776 + }, + { + "epoch": 1.83, + "grad_norm": 18.61432385841914, + "learning_rate": 3.9841918664858314e-07, + "loss": 1.1699, + "step": 151779 + }, + { + "epoch": 1.83, + "grad_norm": 11.634881065756792, + "learning_rate": 3.9825595013500805e-07, + "loss": 1.1312, + "step": 151782 + }, + { + "epoch": 1.83, + "grad_norm": 7.379643711246896, + "learning_rate": 3.980927463887274e-07, + "loss": 1.4288, + "step": 151785 + }, + { + "epoch": 1.83, + "grad_norm": 8.497261463824596, + "learning_rate": 3.9792957541029854e-07, + "loss": 0.9576, + "step": 151788 + }, + { + "epoch": 1.83, + "grad_norm": 15.221210894258455, + "learning_rate": 3.9776643720027653e-07, + "loss": 1.5095, + "step": 151791 + }, + { + "epoch": 1.83, + "grad_norm": 4.3458791977680304, + "learning_rate": 3.9760333175921874e-07, + "loss": 0.713, + "step": 151794 + }, + { + "epoch": 1.83, + "grad_norm": 3.7555194595297356, + "learning_rate": 3.9744025908768356e-07, + "loss": 0.7917, + "step": 151797 + }, + { + "epoch": 1.83, + "grad_norm": 5.889955722350358, + "learning_rate": 3.972772191862251e-07, + "loss": 1.1731, + "step": 151800 + }, + { + "epoch": 1.83, + "grad_norm": 5.6706371062132925, + "learning_rate": 3.971142120554006e-07, + "loss": 1.1971, + "step": 151803 + }, + { + "epoch": 1.83, + "grad_norm": 6.177625310642944, + "learning_rate": 3.9695123769576515e-07, + "loss": 0.8662, + "step": 151806 + }, + { + "epoch": 1.83, + "grad_norm": 10.200670096829109, + "learning_rate": 3.967882961078773e-07, + "loss": 1.091, + "step": 151809 + }, + { + "epoch": 1.83, + "grad_norm": 8.354251760867802, + "learning_rate": 3.966253872922909e-07, + "loss": 1.1635, + "step": 151812 + }, + { + "epoch": 1.83, + "grad_norm": 9.432683509687172, + "learning_rate": 3.964625112495646e-07, + "loss": 1.1377, + "step": 151815 + }, + { + "epoch": 1.83, + "grad_norm": 9.380510092604402, + "learning_rate": 3.962996679802511e-07, + "loss": 1.5225, + "step": 151818 + }, + { + "epoch": 1.83, + "grad_norm": 3.000589027644492, + "learning_rate": 3.961368574849089e-07, + "loss": 0.7488, + "step": 151821 + }, + { + "epoch": 1.83, + "grad_norm": 4.392238901046059, + "learning_rate": 3.9597407976409205e-07, + "loss": 0.8738, + "step": 151824 + }, + { + "epoch": 1.83, + "grad_norm": 7.73153860788867, + "learning_rate": 3.9581133481835677e-07, + "loss": 1.0281, + "step": 151827 + }, + { + "epoch": 1.83, + "grad_norm": 5.978949281631559, + "learning_rate": 3.9564862264825586e-07, + "loss": 1.3563, + "step": 151830 + }, + { + "epoch": 1.83, + "grad_norm": 9.09700125850743, + "learning_rate": 3.9548594325434785e-07, + "loss": 1.0747, + "step": 151833 + }, + { + "epoch": 1.83, + "grad_norm": 22.86168788952744, + "learning_rate": 3.953232966371867e-07, + "loss": 1.1298, + "step": 151836 + }, + { + "epoch": 1.83, + "grad_norm": 22.679206387606943, + "learning_rate": 3.9516068279732867e-07, + "loss": 1.1309, + "step": 151839 + }, + { + "epoch": 1.83, + "grad_norm": 6.36042742771848, + "learning_rate": 3.949981017353277e-07, + "loss": 1.0445, + "step": 151842 + }, + { + "epoch": 1.83, + "grad_norm": 7.867726091626684, + "learning_rate": 3.948355534517367e-07, + "loss": 1.1914, + "step": 151845 + }, + { + "epoch": 1.83, + "grad_norm": 3.5692477129915448, + "learning_rate": 3.946730379471131e-07, + "loss": 1.1393, + "step": 151848 + }, + { + "epoch": 1.83, + "grad_norm": 15.022858113495884, + "learning_rate": 3.9451055522200967e-07, + "loss": 0.8481, + "step": 151851 + }, + { + "epoch": 1.83, + "grad_norm": 7.479581267245945, + "learning_rate": 3.9434810527698373e-07, + "loss": 0.9107, + "step": 151854 + }, + { + "epoch": 1.83, + "grad_norm": 6.0160376594622695, + "learning_rate": 3.9418568811258607e-07, + "loss": 1.2463, + "step": 151857 + }, + { + "epoch": 1.83, + "grad_norm": 4.92457793191751, + "learning_rate": 3.940233037293728e-07, + "loss": 1.2653, + "step": 151860 + }, + { + "epoch": 1.83, + "grad_norm": 10.678869130925875, + "learning_rate": 3.938609521278991e-07, + "loss": 1.2178, + "step": 151863 + }, + { + "epoch": 1.83, + "grad_norm": 3.3509059703604955, + "learning_rate": 3.936986333087167e-07, + "loss": 1.1533, + "step": 151866 + }, + { + "epoch": 1.83, + "grad_norm": 13.133883910858447, + "learning_rate": 3.9353634727238187e-07, + "loss": 1.0843, + "step": 151869 + }, + { + "epoch": 1.83, + "grad_norm": 11.926535641303104, + "learning_rate": 3.9337409401944635e-07, + "loss": 1.0493, + "step": 151872 + }, + { + "epoch": 1.83, + "grad_norm": 11.35628510756548, + "learning_rate": 3.932118735504642e-07, + "loss": 1.1922, + "step": 151875 + }, + { + "epoch": 1.83, + "grad_norm": 7.3456057057753155, + "learning_rate": 3.9304968586599046e-07, + "loss": 1.2206, + "step": 151878 + }, + { + "epoch": 1.83, + "grad_norm": 10.581398909489055, + "learning_rate": 3.9288753096657805e-07, + "loss": 0.9018, + "step": 151881 + }, + { + "epoch": 1.83, + "grad_norm": 4.0376643786499935, + "learning_rate": 3.92725408852781e-07, + "loss": 1.0801, + "step": 151884 + }, + { + "epoch": 1.83, + "grad_norm": 4.742552918288727, + "learning_rate": 3.9256331952514993e-07, + "loss": 1.0911, + "step": 151887 + }, + { + "epoch": 1.83, + "grad_norm": 16.2962083972325, + "learning_rate": 3.9240126298423886e-07, + "loss": 0.7392, + "step": 151890 + }, + { + "epoch": 1.83, + "grad_norm": 8.59654667278227, + "learning_rate": 3.9223923923060294e-07, + "loss": 1.1819, + "step": 151893 + }, + { + "epoch": 1.83, + "grad_norm": 21.67612983022945, + "learning_rate": 3.9207724826479385e-07, + "loss": 0.972, + "step": 151896 + }, + { + "epoch": 1.83, + "grad_norm": 5.744415614854698, + "learning_rate": 3.919152900873635e-07, + "loss": 1.1211, + "step": 151899 + }, + { + "epoch": 1.83, + "grad_norm": 5.350981574850648, + "learning_rate": 3.9175336469886584e-07, + "loss": 0.9981, + "step": 151902 + }, + { + "epoch": 1.83, + "grad_norm": 5.949974745761527, + "learning_rate": 3.915914720998537e-07, + "loss": 1.1498, + "step": 151905 + }, + { + "epoch": 1.83, + "grad_norm": 2.575207378586916, + "learning_rate": 3.9142961229088003e-07, + "loss": 1.2487, + "step": 151908 + }, + { + "epoch": 1.83, + "grad_norm": 5.4933754923929, + "learning_rate": 3.912677852724933e-07, + "loss": 1.0134, + "step": 151911 + }, + { + "epoch": 1.83, + "grad_norm": 17.189952718871126, + "learning_rate": 3.911059910452497e-07, + "loss": 1.4424, + "step": 151914 + }, + { + "epoch": 1.83, + "grad_norm": 3.3501195265203796, + "learning_rate": 3.909442296096999e-07, + "loss": 1.013, + "step": 151917 + }, + { + "epoch": 1.83, + "grad_norm": 7.978731672871183, + "learning_rate": 3.907825009663968e-07, + "loss": 1.0886, + "step": 151920 + }, + { + "epoch": 1.83, + "grad_norm": 10.605567198022941, + "learning_rate": 3.9062080511589215e-07, + "loss": 1.3821, + "step": 151923 + }, + { + "epoch": 1.83, + "grad_norm": 4.505706989602469, + "learning_rate": 3.904591420587367e-07, + "loss": 0.992, + "step": 151926 + }, + { + "epoch": 1.83, + "grad_norm": 11.750826360063213, + "learning_rate": 3.9029751179548434e-07, + "loss": 0.928, + "step": 151929 + }, + { + "epoch": 1.83, + "grad_norm": 6.494410899148451, + "learning_rate": 3.9013591432668364e-07, + "loss": 1.1244, + "step": 151932 + }, + { + "epoch": 1.83, + "grad_norm": 2.9996980130084343, + "learning_rate": 3.899743496528885e-07, + "loss": 0.929, + "step": 151935 + }, + { + "epoch": 1.83, + "grad_norm": 3.1833893886795748, + "learning_rate": 3.898128177746485e-07, + "loss": 1.0168, + "step": 151938 + }, + { + "epoch": 1.83, + "grad_norm": 12.508109330443682, + "learning_rate": 3.8965131869251546e-07, + "loss": 1.2198, + "step": 151941 + }, + { + "epoch": 1.83, + "grad_norm": 14.067460053649738, + "learning_rate": 3.894898524070412e-07, + "loss": 1.1646, + "step": 151944 + }, + { + "epoch": 1.83, + "grad_norm": 7.249593738298386, + "learning_rate": 3.8932841891877736e-07, + "loss": 1.1896, + "step": 151947 + }, + { + "epoch": 1.83, + "grad_norm": 10.445714551723482, + "learning_rate": 3.8916701822827476e-07, + "loss": 1.1036, + "step": 151950 + }, + { + "epoch": 1.83, + "grad_norm": 4.442732301023803, + "learning_rate": 3.890056503360806e-07, + "loss": 1.1952, + "step": 151953 + }, + { + "epoch": 1.83, + "grad_norm": 11.707164410904895, + "learning_rate": 3.88844315242749e-07, + "loss": 1.0089, + "step": 151956 + }, + { + "epoch": 1.83, + "grad_norm": 8.81136797437221, + "learning_rate": 3.886830129488306e-07, + "loss": 1.6877, + "step": 151959 + }, + { + "epoch": 1.83, + "grad_norm": 21.14153934437616, + "learning_rate": 3.885217434548749e-07, + "loss": 1.5073, + "step": 151962 + }, + { + "epoch": 1.83, + "grad_norm": 8.670628343163566, + "learning_rate": 3.8836050676143265e-07, + "loss": 1.1131, + "step": 151965 + }, + { + "epoch": 1.83, + "grad_norm": 8.175930497457209, + "learning_rate": 3.8819930286905226e-07, + "loss": 1.1683, + "step": 151968 + }, + { + "epoch": 1.83, + "grad_norm": 3.6552955265339007, + "learning_rate": 3.880381317782878e-07, + "loss": 1.2369, + "step": 151971 + }, + { + "epoch": 1.83, + "grad_norm": 3.1251957642866595, + "learning_rate": 3.878769934896864e-07, + "loss": 1.3161, + "step": 151974 + }, + { + "epoch": 1.83, + "grad_norm": 3.354150183149319, + "learning_rate": 3.87715888003799e-07, + "loss": 0.9372, + "step": 151977 + }, + { + "epoch": 1.83, + "grad_norm": 3.480876038460662, + "learning_rate": 3.8755481532117276e-07, + "loss": 0.7777, + "step": 151980 + }, + { + "epoch": 1.83, + "grad_norm": 7.884625858808369, + "learning_rate": 3.873937754423607e-07, + "loss": 1.0738, + "step": 151983 + }, + { + "epoch": 1.83, + "grad_norm": 6.48122534041056, + "learning_rate": 3.8723276836791113e-07, + "loss": 1.094, + "step": 151986 + }, + { + "epoch": 1.83, + "grad_norm": 13.670886848238021, + "learning_rate": 3.870717940983748e-07, + "loss": 1.2022, + "step": 151989 + }, + { + "epoch": 1.83, + "grad_norm": 9.400792964748366, + "learning_rate": 3.869108526343002e-07, + "loss": 1.0778, + "step": 151992 + }, + { + "epoch": 1.83, + "grad_norm": 10.846111208540389, + "learning_rate": 3.8674994397623453e-07, + "loss": 1.3185, + "step": 151995 + }, + { + "epoch": 1.83, + "grad_norm": 4.490139411914516, + "learning_rate": 3.8658906812472865e-07, + "loss": 1.1406, + "step": 151998 + }, + { + "epoch": 1.83, + "grad_norm": 7.603802735967208, + "learning_rate": 3.864282250803308e-07, + "loss": 0.936, + "step": 152001 + }, + { + "epoch": 1.83, + "grad_norm": 7.88912188400595, + "learning_rate": 3.8626741484359295e-07, + "loss": 0.9144, + "step": 152004 + }, + { + "epoch": 1.83, + "grad_norm": 21.673160338166717, + "learning_rate": 3.8610663741506016e-07, + "loss": 1.0204, + "step": 152007 + }, + { + "epoch": 1.83, + "grad_norm": 11.6754341459311, + "learning_rate": 3.8594589279528306e-07, + "loss": 1.1375, + "step": 152010 + }, + { + "epoch": 1.83, + "grad_norm": 3.808741838185832, + "learning_rate": 3.857851809848101e-07, + "loss": 0.8674, + "step": 152013 + }, + { + "epoch": 1.83, + "grad_norm": 7.22797220199278, + "learning_rate": 3.8562450198418865e-07, + "loss": 1.1361, + "step": 152016 + }, + { + "epoch": 1.83, + "grad_norm": 2.764526687686946, + "learning_rate": 3.8546385579396715e-07, + "loss": 1.1224, + "step": 152019 + }, + { + "epoch": 1.83, + "grad_norm": 34.31646871757433, + "learning_rate": 3.853032424146952e-07, + "loss": 1.3341, + "step": 152022 + }, + { + "epoch": 1.83, + "grad_norm": 4.304721108457724, + "learning_rate": 3.8514266184691895e-07, + "loss": 0.9977, + "step": 152025 + }, + { + "epoch": 1.83, + "grad_norm": 8.798172808458222, + "learning_rate": 3.84982114091188e-07, + "loss": 1.1724, + "step": 152028 + }, + { + "epoch": 1.83, + "grad_norm": 11.322714327082922, + "learning_rate": 3.8482159914805083e-07, + "loss": 1.424, + "step": 152031 + }, + { + "epoch": 1.83, + "grad_norm": 7.239845176778081, + "learning_rate": 3.8466111701805475e-07, + "loss": 0.9627, + "step": 152034 + }, + { + "epoch": 1.83, + "grad_norm": 4.837012073586074, + "learning_rate": 3.845006677017449e-07, + "loss": 1.0746, + "step": 152037 + }, + { + "epoch": 1.83, + "grad_norm": 4.895846846832489, + "learning_rate": 3.843402511996708e-07, + "loss": 1.1092, + "step": 152040 + }, + { + "epoch": 1.83, + "grad_norm": 7.013262334454206, + "learning_rate": 3.84179867512382e-07, + "loss": 1.0887, + "step": 152043 + }, + { + "epoch": 1.83, + "grad_norm": 3.7622884217708097, + "learning_rate": 3.8401951664042145e-07, + "loss": 1.1486, + "step": 152046 + }, + { + "epoch": 1.83, + "grad_norm": 7.7580028182514225, + "learning_rate": 3.8385919858433875e-07, + "loss": 0.915, + "step": 152049 + }, + { + "epoch": 1.83, + "grad_norm": 134.7459634952896, + "learning_rate": 3.836989133446811e-07, + "loss": 1.1677, + "step": 152052 + }, + { + "epoch": 1.83, + "grad_norm": 7.93760394290279, + "learning_rate": 3.8353866092199707e-07, + "loss": 1.1022, + "step": 152055 + }, + { + "epoch": 1.83, + "grad_norm": 10.19026950187817, + "learning_rate": 3.8337844131683065e-07, + "loss": 1.0662, + "step": 152058 + }, + { + "epoch": 1.83, + "grad_norm": 3.945192944537919, + "learning_rate": 3.8321825452972914e-07, + "loss": 0.9336, + "step": 152061 + }, + { + "epoch": 1.83, + "grad_norm": 9.241655265643267, + "learning_rate": 3.8305810056123993e-07, + "loss": 0.9592, + "step": 152064 + }, + { + "epoch": 1.83, + "grad_norm": 5.813164083168371, + "learning_rate": 3.828979794119092e-07, + "loss": 1.059, + "step": 152067 + }, + { + "epoch": 1.83, + "grad_norm": 2.895496342454249, + "learning_rate": 3.827378910822843e-07, + "loss": 1.0409, + "step": 152070 + }, + { + "epoch": 1.83, + "grad_norm": 3.772068868799327, + "learning_rate": 3.825778355729104e-07, + "loss": 1.1572, + "step": 152073 + }, + { + "epoch": 1.83, + "grad_norm": 5.813764254699001, + "learning_rate": 3.8241781288433586e-07, + "loss": 1.4862, + "step": 152076 + }, + { + "epoch": 1.83, + "grad_norm": 7.226440295612532, + "learning_rate": 3.822578230171026e-07, + "loss": 1.1119, + "step": 152079 + }, + { + "epoch": 1.83, + "grad_norm": 7.60874737679638, + "learning_rate": 3.8209786597176003e-07, + "loss": 1.1866, + "step": 152082 + }, + { + "epoch": 1.83, + "grad_norm": 12.103971019134683, + "learning_rate": 3.8193794174885334e-07, + "loss": 1.0062, + "step": 152085 + }, + { + "epoch": 1.83, + "grad_norm": 7.476003062331611, + "learning_rate": 3.817780503489277e-07, + "loss": 0.9602, + "step": 152088 + }, + { + "epoch": 1.83, + "grad_norm": 4.482479545119755, + "learning_rate": 3.8161819177252925e-07, + "loss": 1.1725, + "step": 152091 + }, + { + "epoch": 1.83, + "grad_norm": 14.861001522390916, + "learning_rate": 3.8145836602020203e-07, + "loss": 1.1386, + "step": 152094 + }, + { + "epoch": 1.83, + "grad_norm": 10.271258502683025, + "learning_rate": 3.8129857309249565e-07, + "loss": 1.4914, + "step": 152097 + }, + { + "epoch": 1.83, + "grad_norm": 3.126574987206591, + "learning_rate": 3.811388129899518e-07, + "loss": 1.3384, + "step": 152100 + }, + { + "epoch": 1.83, + "grad_norm": 9.930431635308837, + "learning_rate": 3.809790857131157e-07, + "loss": 1.0394, + "step": 152103 + }, + { + "epoch": 1.83, + "grad_norm": 6.794004181533018, + "learning_rate": 3.808193912625324e-07, + "loss": 1.2514, + "step": 152106 + }, + { + "epoch": 1.83, + "grad_norm": 13.85889533057543, + "learning_rate": 3.8065972963874817e-07, + "loss": 1.1925, + "step": 152109 + }, + { + "epoch": 1.83, + "grad_norm": 7.271033103749909, + "learning_rate": 3.8050010084230925e-07, + "loss": 1.2073, + "step": 152112 + }, + { + "epoch": 1.83, + "grad_norm": 11.105334676503729, + "learning_rate": 3.803405048737563e-07, + "loss": 0.8495, + "step": 152115 + }, + { + "epoch": 1.83, + "grad_norm": 2.3466197316337967, + "learning_rate": 3.801809417336377e-07, + "loss": 1.2146, + "step": 152118 + }, + { + "epoch": 1.83, + "grad_norm": 4.601138320798807, + "learning_rate": 3.800214114224954e-07, + "loss": 1.1007, + "step": 152121 + }, + { + "epoch": 1.83, + "grad_norm": 5.683989532523916, + "learning_rate": 3.798619139408766e-07, + "loss": 1.0366, + "step": 152124 + }, + { + "epoch": 1.83, + "grad_norm": 3.7435651867084396, + "learning_rate": 3.79702449289322e-07, + "loss": 1.1226, + "step": 152127 + }, + { + "epoch": 1.83, + "grad_norm": 4.203729190550702, + "learning_rate": 3.7954301746837895e-07, + "loss": 0.9843, + "step": 152130 + }, + { + "epoch": 1.83, + "grad_norm": 14.456184858540462, + "learning_rate": 3.793836184785893e-07, + "loss": 0.7812, + "step": 152133 + }, + { + "epoch": 1.83, + "grad_norm": 26.071191637511646, + "learning_rate": 3.792242523204981e-07, + "loss": 1.103, + "step": 152136 + }, + { + "epoch": 1.83, + "grad_norm": 35.89813211185806, + "learning_rate": 3.7906491899465047e-07, + "loss": 1.266, + "step": 152139 + }, + { + "epoch": 1.83, + "grad_norm": 11.09203254978656, + "learning_rate": 3.789056185015882e-07, + "loss": 1.2405, + "step": 152142 + }, + { + "epoch": 1.83, + "grad_norm": 5.898741075398975, + "learning_rate": 3.7874635084185543e-07, + "loss": 0.8537, + "step": 152145 + }, + { + "epoch": 1.83, + "grad_norm": 5.941814010661917, + "learning_rate": 3.7858711601599487e-07, + "loss": 1.3007, + "step": 152148 + }, + { + "epoch": 1.83, + "grad_norm": 8.81333867008339, + "learning_rate": 3.784279140245528e-07, + "loss": 1.1732, + "step": 152151 + }, + { + "epoch": 1.83, + "grad_norm": 10.621224796118431, + "learning_rate": 3.7826874486806886e-07, + "loss": 1.461, + "step": 152154 + }, + { + "epoch": 1.83, + "grad_norm": 11.94748544472933, + "learning_rate": 3.781096085470881e-07, + "loss": 1.2447, + "step": 152157 + }, + { + "epoch": 1.83, + "grad_norm": 8.363798316034238, + "learning_rate": 3.779505050621546e-07, + "loss": 1.2621, + "step": 152160 + }, + { + "epoch": 1.83, + "grad_norm": 6.716649081104071, + "learning_rate": 3.7779143441380896e-07, + "loss": 1.2939, + "step": 152163 + }, + { + "epoch": 1.83, + "grad_norm": 5.031566323895693, + "learning_rate": 3.7763239660259743e-07, + "loss": 1.1778, + "step": 152166 + }, + { + "epoch": 1.83, + "grad_norm": 4.615359017468872, + "learning_rate": 3.774733916290585e-07, + "loss": 1.0616, + "step": 152169 + }, + { + "epoch": 1.83, + "grad_norm": 9.27609616792418, + "learning_rate": 3.7731441949373616e-07, + "loss": 1.1753, + "step": 152172 + }, + { + "epoch": 1.83, + "grad_norm": 4.614827231180257, + "learning_rate": 3.7715548019717443e-07, + "loss": 1.3185, + "step": 152175 + }, + { + "epoch": 1.83, + "grad_norm": 20.303470925434905, + "learning_rate": 3.7699657373991617e-07, + "loss": 1.247, + "step": 152178 + }, + { + "epoch": 1.83, + "grad_norm": 9.139464212916478, + "learning_rate": 3.76837700122501e-07, + "loss": 0.8959, + "step": 152181 + }, + { + "epoch": 1.83, + "grad_norm": 8.622317154089423, + "learning_rate": 3.76678859345474e-07, + "loss": 0.871, + "step": 152184 + }, + { + "epoch": 1.83, + "grad_norm": 37.675833929489386, + "learning_rate": 3.7652005140937474e-07, + "loss": 1.2665, + "step": 152187 + }, + { + "epoch": 1.83, + "grad_norm": 6.531719473746722, + "learning_rate": 3.7636127631474504e-07, + "loss": 1.4981, + "step": 152190 + }, + { + "epoch": 1.83, + "grad_norm": 3.377968542970307, + "learning_rate": 3.7620253406213005e-07, + "loss": 0.8191, + "step": 152193 + }, + { + "epoch": 1.83, + "grad_norm": 3.073942988892744, + "learning_rate": 3.760438246520681e-07, + "loss": 0.9175, + "step": 152196 + }, + { + "epoch": 1.83, + "grad_norm": 12.64154489861842, + "learning_rate": 3.7588514808510224e-07, + "loss": 1.1563, + "step": 152199 + }, + { + "epoch": 1.83, + "grad_norm": 15.049607875959028, + "learning_rate": 3.7572650436177414e-07, + "loss": 1.1096, + "step": 152202 + }, + { + "epoch": 1.83, + "grad_norm": 8.455147091642054, + "learning_rate": 3.7556789348262455e-07, + "loss": 1.1965, + "step": 152205 + }, + { + "epoch": 1.83, + "grad_norm": 18.6372710424822, + "learning_rate": 3.7540931544819526e-07, + "loss": 1.2901, + "step": 152208 + }, + { + "epoch": 1.83, + "grad_norm": 5.014093885932477, + "learning_rate": 3.7525077025902687e-07, + "loss": 1.2361, + "step": 152211 + }, + { + "epoch": 1.83, + "grad_norm": 4.024449991300355, + "learning_rate": 3.750922579156602e-07, + "loss": 1.0241, + "step": 152214 + }, + { + "epoch": 1.83, + "grad_norm": 10.459860929243568, + "learning_rate": 3.7493377841863685e-07, + "loss": 1.3792, + "step": 152217 + }, + { + "epoch": 1.83, + "grad_norm": 17.86394985374729, + "learning_rate": 3.747753317684999e-07, + "loss": 1.1752, + "step": 152220 + }, + { + "epoch": 1.83, + "grad_norm": 3.5463414441390255, + "learning_rate": 3.7461691796578547e-07, + "loss": 1.2855, + "step": 152223 + }, + { + "epoch": 1.83, + "grad_norm": 11.645227334463184, + "learning_rate": 3.744585370110376e-07, + "loss": 1.3644, + "step": 152226 + }, + { + "epoch": 1.83, + "grad_norm": 11.031474083788005, + "learning_rate": 3.7430018890479483e-07, + "loss": 1.1905, + "step": 152229 + }, + { + "epoch": 1.83, + "grad_norm": 9.645714184756493, + "learning_rate": 3.741418736475988e-07, + "loss": 1.4004, + "step": 152232 + }, + { + "epoch": 1.83, + "grad_norm": 9.960251523512104, + "learning_rate": 3.739835912399892e-07, + "loss": 1.3958, + "step": 152235 + }, + { + "epoch": 1.83, + "grad_norm": 11.489230826367164, + "learning_rate": 3.738253416825055e-07, + "loss": 0.9636, + "step": 152238 + }, + { + "epoch": 1.83, + "grad_norm": 8.005425386178665, + "learning_rate": 3.7366712497568956e-07, + "loss": 1.1971, + "step": 152241 + }, + { + "epoch": 1.83, + "grad_norm": 9.258205200629916, + "learning_rate": 3.735089411200799e-07, + "loss": 1.1535, + "step": 152244 + }, + { + "epoch": 1.83, + "grad_norm": 7.555027751334288, + "learning_rate": 3.73350790116217e-07, + "loss": 1.3412, + "step": 152247 + }, + { + "epoch": 1.83, + "grad_norm": 7.706417492226146, + "learning_rate": 3.731926719646417e-07, + "loss": 1.503, + "step": 152250 + }, + { + "epoch": 1.83, + "grad_norm": 8.131785860635597, + "learning_rate": 3.730345866658902e-07, + "loss": 1.0425, + "step": 152253 + }, + { + "epoch": 1.83, + "grad_norm": 6.735012765421532, + "learning_rate": 3.728765342205043e-07, + "loss": 1.1896, + "step": 152256 + }, + { + "epoch": 1.83, + "grad_norm": 10.121854492774128, + "learning_rate": 3.7271851462902353e-07, + "loss": 1.0267, + "step": 152259 + }, + { + "epoch": 1.83, + "grad_norm": 8.546842241379718, + "learning_rate": 3.7256052789198636e-07, + "loss": 1.2555, + "step": 152262 + }, + { + "epoch": 1.83, + "grad_norm": 6.797760286363765, + "learning_rate": 3.724025740099324e-07, + "loss": 1.2682, + "step": 152265 + }, + { + "epoch": 1.83, + "grad_norm": 5.650747195118485, + "learning_rate": 3.722446529834023e-07, + "loss": 0.9707, + "step": 152268 + }, + { + "epoch": 1.83, + "grad_norm": 39.171656568441584, + "learning_rate": 3.7208676481293114e-07, + "loss": 0.8256, + "step": 152271 + }, + { + "epoch": 1.83, + "grad_norm": 2.0924078770219188, + "learning_rate": 3.719289094990619e-07, + "loss": 0.8574, + "step": 152274 + }, + { + "epoch": 1.83, + "grad_norm": 8.061163039941135, + "learning_rate": 3.7177108704232965e-07, + "loss": 0.7483, + "step": 152277 + }, + { + "epoch": 1.83, + "grad_norm": 9.844602028075013, + "learning_rate": 3.71613297443274e-07, + "loss": 1.421, + "step": 152280 + }, + { + "epoch": 1.83, + "grad_norm": 7.017529851538736, + "learning_rate": 3.714555407024345e-07, + "loss": 1.2948, + "step": 152283 + }, + { + "epoch": 1.83, + "grad_norm": 4.734587386920664, + "learning_rate": 3.712978168203507e-07, + "loss": 1.1121, + "step": 152286 + }, + { + "epoch": 1.83, + "grad_norm": 6.7490365280429625, + "learning_rate": 3.7114012579755776e-07, + "loss": 0.9883, + "step": 152289 + }, + { + "epoch": 1.83, + "grad_norm": 10.417079498630008, + "learning_rate": 3.7098246763459635e-07, + "loss": 1.1347, + "step": 152292 + }, + { + "epoch": 1.83, + "grad_norm": 7.179443293841506, + "learning_rate": 3.708248423320027e-07, + "loss": 0.757, + "step": 152295 + }, + { + "epoch": 1.83, + "grad_norm": 7.429381993812574, + "learning_rate": 3.706672498903141e-07, + "loss": 1.2515, + "step": 152298 + }, + { + "epoch": 1.83, + "grad_norm": 9.597727615916982, + "learning_rate": 3.7050969031007244e-07, + "loss": 0.9517, + "step": 152301 + }, + { + "epoch": 1.83, + "grad_norm": 12.796023928348994, + "learning_rate": 3.703521635918106e-07, + "loss": 0.989, + "step": 152304 + }, + { + "epoch": 1.83, + "grad_norm": 4.894431502037182, + "learning_rate": 3.701946697360681e-07, + "loss": 1.0862, + "step": 152307 + }, + { + "epoch": 1.83, + "grad_norm": 6.045589575717013, + "learning_rate": 3.700372087433846e-07, + "loss": 1.2895, + "step": 152310 + }, + { + "epoch": 1.83, + "grad_norm": 7.957111428443915, + "learning_rate": 3.698797806142951e-07, + "loss": 1.4177, + "step": 152313 + }, + { + "epoch": 1.83, + "grad_norm": 4.533546470874552, + "learning_rate": 3.697223853493359e-07, + "loss": 1.014, + "step": 152316 + }, + { + "epoch": 1.83, + "grad_norm": 9.260828909376288, + "learning_rate": 3.695650229490455e-07, + "loss": 1.1509, + "step": 152319 + }, + { + "epoch": 1.83, + "grad_norm": 11.437777598968177, + "learning_rate": 3.694076934139601e-07, + "loss": 1.4033, + "step": 152322 + }, + { + "epoch": 1.83, + "grad_norm": 7.6702092559455535, + "learning_rate": 3.6925039674461815e-07, + "loss": 0.9399, + "step": 152325 + }, + { + "epoch": 1.83, + "grad_norm": 14.555820832349058, + "learning_rate": 3.690931329415559e-07, + "loss": 0.9367, + "step": 152328 + }, + { + "epoch": 1.83, + "grad_norm": 7.324207986344279, + "learning_rate": 3.689359020053096e-07, + "loss": 1.1333, + "step": 152331 + }, + { + "epoch": 1.83, + "grad_norm": 11.23862829656529, + "learning_rate": 3.687787039364166e-07, + "loss": 0.9912, + "step": 152334 + }, + { + "epoch": 1.83, + "grad_norm": 6.926951285223372, + "learning_rate": 3.6862153873541195e-07, + "loss": 0.8533, + "step": 152337 + }, + { + "epoch": 1.83, + "grad_norm": 9.14574007932873, + "learning_rate": 3.684644064028331e-07, + "loss": 1.2131, + "step": 152340 + }, + { + "epoch": 1.83, + "grad_norm": 5.689129634881211, + "learning_rate": 3.6830730693921625e-07, + "loss": 0.86, + "step": 152343 + }, + { + "epoch": 1.83, + "grad_norm": 12.545350681969978, + "learning_rate": 3.681502403450965e-07, + "loss": 1.2268, + "step": 152346 + }, + { + "epoch": 1.83, + "grad_norm": 5.1473705515439185, + "learning_rate": 3.6799320662101013e-07, + "loss": 0.9789, + "step": 152349 + }, + { + "epoch": 1.83, + "grad_norm": 6.555778348384402, + "learning_rate": 3.678362057674945e-07, + "loss": 1.4322, + "step": 152352 + }, + { + "epoch": 1.83, + "grad_norm": 14.876761795956208, + "learning_rate": 3.6767923778508573e-07, + "loss": 1.4919, + "step": 152355 + }, + { + "epoch": 1.83, + "grad_norm": 3.8193546642379785, + "learning_rate": 3.6752230267431577e-07, + "loss": 0.9597, + "step": 152358 + }, + { + "epoch": 1.83, + "grad_norm": 26.461431622997477, + "learning_rate": 3.67365400435723e-07, + "loss": 1.186, + "step": 152361 + }, + { + "epoch": 1.83, + "grad_norm": 6.487756937076496, + "learning_rate": 3.6720853106984256e-07, + "loss": 1.1317, + "step": 152364 + }, + { + "epoch": 1.83, + "grad_norm": 4.4280595500414375, + "learning_rate": 3.670516945772107e-07, + "loss": 1.0567, + "step": 152367 + }, + { + "epoch": 1.83, + "grad_norm": 14.608832432978488, + "learning_rate": 3.6689489095836026e-07, + "loss": 1.3322, + "step": 152370 + }, + { + "epoch": 1.83, + "grad_norm": 10.214438026002322, + "learning_rate": 3.667381202138287e-07, + "loss": 1.2199, + "step": 152373 + }, + { + "epoch": 1.83, + "grad_norm": 6.375599805799224, + "learning_rate": 3.6658138234415e-07, + "loss": 1.1132, + "step": 152376 + }, + { + "epoch": 1.83, + "grad_norm": 10.899141040573951, + "learning_rate": 3.664246773498581e-07, + "loss": 1.1103, + "step": 152379 + }, + { + "epoch": 1.83, + "grad_norm": 12.688575140268085, + "learning_rate": 3.662680052314904e-07, + "loss": 1.3523, + "step": 152382 + }, + { + "epoch": 1.83, + "grad_norm": 5.271320415745343, + "learning_rate": 3.6611136598957876e-07, + "loss": 1.3675, + "step": 152385 + }, + { + "epoch": 1.83, + "grad_norm": 3.4246196458483538, + "learning_rate": 3.659547596246593e-07, + "loss": 1.2244, + "step": 152388 + }, + { + "epoch": 1.83, + "grad_norm": 4.351668337378759, + "learning_rate": 3.65798186137265e-07, + "loss": 1.2178, + "step": 152391 + }, + { + "epoch": 1.83, + "grad_norm": 16.323386998097924, + "learning_rate": 3.6564164552793326e-07, + "loss": 0.767, + "step": 152394 + }, + { + "epoch": 1.83, + "grad_norm": 7.46092176196583, + "learning_rate": 3.654851377971969e-07, + "loss": 0.7164, + "step": 152397 + }, + { + "epoch": 1.83, + "grad_norm": 7.249016469581785, + "learning_rate": 3.653286629455866e-07, + "loss": 1.1227, + "step": 152400 + }, + { + "epoch": 1.83, + "grad_norm": 5.45068609209958, + "learning_rate": 3.651722209736408e-07, + "loss": 1.381, + "step": 152403 + }, + { + "epoch": 1.83, + "grad_norm": 4.071532878834005, + "learning_rate": 3.650158118818903e-07, + "loss": 1.2165, + "step": 152406 + }, + { + "epoch": 1.83, + "grad_norm": 4.546003966055573, + "learning_rate": 3.6485943567087234e-07, + "loss": 0.7823, + "step": 152409 + }, + { + "epoch": 1.83, + "grad_norm": 9.390944696856648, + "learning_rate": 3.6470309234111766e-07, + "loss": 1.2027, + "step": 152412 + }, + { + "epoch": 1.83, + "grad_norm": 33.37116015921411, + "learning_rate": 3.645467818931603e-07, + "loss": 1.0117, + "step": 152415 + }, + { + "epoch": 1.83, + "grad_norm": 5.021266401997423, + "learning_rate": 3.6439050432753533e-07, + "loss": 1.1767, + "step": 152418 + }, + { + "epoch": 1.83, + "grad_norm": 9.63225263667835, + "learning_rate": 3.642342596447745e-07, + "loss": 0.9328, + "step": 152421 + }, + { + "epoch": 1.83, + "grad_norm": 3.069945916575803, + "learning_rate": 3.640780478454109e-07, + "loss": 0.8806, + "step": 152424 + }, + { + "epoch": 1.83, + "grad_norm": 8.5172698446161, + "learning_rate": 3.6392186892997726e-07, + "loss": 0.779, + "step": 152427 + }, + { + "epoch": 1.83, + "grad_norm": 3.8853676884130373, + "learning_rate": 3.637657228990077e-07, + "loss": 0.9149, + "step": 152430 + }, + { + "epoch": 1.83, + "grad_norm": 8.77094778054159, + "learning_rate": 3.636096097530351e-07, + "loss": 1.1733, + "step": 152433 + }, + { + "epoch": 1.83, + "grad_norm": 25.63355200677388, + "learning_rate": 3.6345352949259225e-07, + "loss": 1.0067, + "step": 152436 + }, + { + "epoch": 1.83, + "grad_norm": 12.444326850492002, + "learning_rate": 3.6329748211821224e-07, + "loss": 1.5462, + "step": 152439 + }, + { + "epoch": 1.83, + "grad_norm": 7.199835302294032, + "learning_rate": 3.631414676304257e-07, + "loss": 0.8955, + "step": 152442 + }, + { + "epoch": 1.83, + "grad_norm": 7.083817959640689, + "learning_rate": 3.6298548602976546e-07, + "loss": 1.1755, + "step": 152445 + }, + { + "epoch": 1.83, + "grad_norm": 17.144260324597337, + "learning_rate": 3.6282953731676675e-07, + "loss": 1.1943, + "step": 152448 + }, + { + "epoch": 1.83, + "grad_norm": 7.665286509539325, + "learning_rate": 3.6267362149195685e-07, + "loss": 1.2875, + "step": 152451 + }, + { + "epoch": 1.83, + "grad_norm": 7.814637631666704, + "learning_rate": 3.6251773855587204e-07, + "loss": 1.0053, + "step": 152454 + }, + { + "epoch": 1.83, + "grad_norm": 17.81121524712978, + "learning_rate": 3.6236188850904186e-07, + "loss": 1.1491, + "step": 152457 + }, + { + "epoch": 1.83, + "grad_norm": 15.425291948587002, + "learning_rate": 3.6220607135200037e-07, + "loss": 1.4811, + "step": 152460 + }, + { + "epoch": 1.83, + "grad_norm": 10.51880370004308, + "learning_rate": 3.6205028708527824e-07, + "loss": 1.0262, + "step": 152463 + }, + { + "epoch": 1.83, + "grad_norm": 9.105005686267553, + "learning_rate": 3.6189453570940614e-07, + "loss": 1.3836, + "step": 152466 + }, + { + "epoch": 1.83, + "grad_norm": 7.76111847191329, + "learning_rate": 3.617388172249159e-07, + "loss": 1.0274, + "step": 152469 + }, + { + "epoch": 1.83, + "grad_norm": 9.689253774301887, + "learning_rate": 3.615831316323393e-07, + "loss": 1.0177, + "step": 152472 + }, + { + "epoch": 1.83, + "grad_norm": 10.24388694794272, + "learning_rate": 3.614274789322081e-07, + "loss": 1.2446, + "step": 152475 + }, + { + "epoch": 1.83, + "grad_norm": 4.242946465492326, + "learning_rate": 3.61271859125053e-07, + "loss": 0.925, + "step": 152478 + }, + { + "epoch": 1.83, + "grad_norm": 13.618233387862553, + "learning_rate": 3.61116272211407e-07, + "loss": 0.974, + "step": 152481 + }, + { + "epoch": 1.83, + "grad_norm": 11.78783160894816, + "learning_rate": 3.6096071819179625e-07, + "loss": 1.1115, + "step": 152484 + }, + { + "epoch": 1.83, + "grad_norm": 7.933842201280031, + "learning_rate": 3.608051970667559e-07, + "loss": 1.0896, + "step": 152487 + }, + { + "epoch": 1.83, + "grad_norm": 5.839141070246167, + "learning_rate": 3.606497088368166e-07, + "loss": 0.7923, + "step": 152490 + }, + { + "epoch": 1.83, + "grad_norm": 8.159897786310479, + "learning_rate": 3.604942535025058e-07, + "loss": 1.1084, + "step": 152493 + }, + { + "epoch": 1.83, + "grad_norm": 4.660427355163591, + "learning_rate": 3.603388310643563e-07, + "loss": 1.217, + "step": 152496 + }, + { + "epoch": 1.83, + "grad_norm": 5.095385377446529, + "learning_rate": 3.601834415228989e-07, + "loss": 1.1132, + "step": 152499 + }, + { + "epoch": 1.83, + "grad_norm": 2.1288200819897938, + "learning_rate": 3.6002808487866414e-07, + "loss": 1.1942, + "step": 152502 + }, + { + "epoch": 1.83, + "grad_norm": 9.933366924673013, + "learning_rate": 3.598727611321806e-07, + "loss": 1.4232, + "step": 152505 + }, + { + "epoch": 1.83, + "grad_norm": 8.467668107990784, + "learning_rate": 3.5971747028397783e-07, + "loss": 1.246, + "step": 152508 + }, + { + "epoch": 1.83, + "grad_norm": 4.0084570827358466, + "learning_rate": 3.5956221233458765e-07, + "loss": 0.9259, + "step": 152511 + }, + { + "epoch": 1.83, + "grad_norm": 5.711040465702504, + "learning_rate": 3.5940698728453847e-07, + "loss": 1.1725, + "step": 152514 + }, + { + "epoch": 1.83, + "grad_norm": 9.721163863966856, + "learning_rate": 3.5925179513436213e-07, + "loss": 1.1527, + "step": 152517 + }, + { + "epoch": 1.83, + "grad_norm": 46.2472895838923, + "learning_rate": 3.590966358845849e-07, + "loss": 1.0899, + "step": 152520 + }, + { + "epoch": 1.83, + "grad_norm": 8.14796961612614, + "learning_rate": 3.589415095357396e-07, + "loss": 1.1288, + "step": 152523 + }, + { + "epoch": 1.83, + "grad_norm": 3.4652999026844657, + "learning_rate": 3.587864160883547e-07, + "loss": 1.5215, + "step": 152526 + }, + { + "epoch": 1.83, + "grad_norm": 5.84602110120825, + "learning_rate": 3.586313555429588e-07, + "loss": 0.7799, + "step": 152529 + }, + { + "epoch": 1.83, + "grad_norm": 9.778457622065579, + "learning_rate": 3.584763279000802e-07, + "loss": 1.1859, + "step": 152532 + }, + { + "epoch": 1.83, + "grad_norm": 29.467949121619473, + "learning_rate": 3.5832133316024975e-07, + "loss": 1.1646, + "step": 152535 + }, + { + "epoch": 1.83, + "grad_norm": 5.137120673653064, + "learning_rate": 3.5816637132399466e-07, + "loss": 1.1353, + "step": 152538 + }, + { + "epoch": 1.83, + "grad_norm": 6.10330747154792, + "learning_rate": 3.580114423918457e-07, + "loss": 1.0682, + "step": 152541 + }, + { + "epoch": 1.83, + "grad_norm": 10.902425422340553, + "learning_rate": 3.5785654636433134e-07, + "loss": 1.0902, + "step": 152544 + }, + { + "epoch": 1.83, + "grad_norm": 7.394349011665255, + "learning_rate": 3.5770168324197883e-07, + "loss": 1.1621, + "step": 152547 + }, + { + "epoch": 1.83, + "grad_norm": 9.240558943214246, + "learning_rate": 3.575468530253168e-07, + "loss": 1.151, + "step": 152550 + }, + { + "epoch": 1.83, + "grad_norm": 15.977157537282887, + "learning_rate": 3.573920557148747e-07, + "loss": 1.0516, + "step": 152553 + }, + { + "epoch": 1.83, + "grad_norm": 6.518587285084826, + "learning_rate": 3.57237291311181e-07, + "loss": 1.2193, + "step": 152556 + }, + { + "epoch": 1.83, + "grad_norm": 2.4796288791993315, + "learning_rate": 3.5708255981476203e-07, + "loss": 0.8297, + "step": 152559 + }, + { + "epoch": 1.83, + "grad_norm": 4.489499638160882, + "learning_rate": 3.569278612261462e-07, + "loss": 1.0999, + "step": 152562 + }, + { + "epoch": 1.83, + "grad_norm": 17.03340269351242, + "learning_rate": 3.567731955458631e-07, + "loss": 1.2761, + "step": 152565 + }, + { + "epoch": 1.83, + "grad_norm": 13.060732038475885, + "learning_rate": 3.5661856277444005e-07, + "loss": 1.3988, + "step": 152568 + }, + { + "epoch": 1.83, + "grad_norm": 8.102737287623862, + "learning_rate": 3.564639629124056e-07, + "loss": 0.6876, + "step": 152571 + }, + { + "epoch": 1.83, + "grad_norm": 5.142159200721183, + "learning_rate": 3.563093959602837e-07, + "loss": 0.9676, + "step": 152574 + }, + { + "epoch": 1.83, + "grad_norm": 6.501224758283012, + "learning_rate": 3.561548619186039e-07, + "loss": 0.9965, + "step": 152577 + }, + { + "epoch": 1.83, + "grad_norm": 7.400674775901954, + "learning_rate": 3.560003607878948e-07, + "loss": 1.0396, + "step": 152580 + }, + { + "epoch": 1.83, + "grad_norm": 8.141072772595043, + "learning_rate": 3.558458925686836e-07, + "loss": 1.1624, + "step": 152583 + }, + { + "epoch": 1.83, + "grad_norm": 6.087840092498977, + "learning_rate": 3.5569145726149556e-07, + "loss": 1.0531, + "step": 152586 + }, + { + "epoch": 1.83, + "grad_norm": 11.279361125121582, + "learning_rate": 3.555370548668591e-07, + "loss": 1.5625, + "step": 152589 + }, + { + "epoch": 1.83, + "grad_norm": 8.114769890661202, + "learning_rate": 3.553826853853004e-07, + "loss": 0.9072, + "step": 152592 + }, + { + "epoch": 1.83, + "grad_norm": 3.4923302963057674, + "learning_rate": 3.552283488173458e-07, + "loss": 0.9746, + "step": 152595 + }, + { + "epoch": 1.83, + "grad_norm": 4.221010047771811, + "learning_rate": 3.550740451635248e-07, + "loss": 0.9066, + "step": 152598 + }, + { + "epoch": 1.83, + "grad_norm": 5.775874161401976, + "learning_rate": 3.549197744243604e-07, + "loss": 1.1504, + "step": 152601 + }, + { + "epoch": 1.84, + "grad_norm": 14.754980589180335, + "learning_rate": 3.5476553660038103e-07, + "loss": 1.4068, + "step": 152604 + }, + { + "epoch": 1.84, + "grad_norm": 5.185363056205955, + "learning_rate": 3.5461133169211293e-07, + "loss": 1.3149, + "step": 152607 + }, + { + "epoch": 1.84, + "grad_norm": 4.375536357161303, + "learning_rate": 3.5445715970008234e-07, + "loss": 1.1398, + "step": 152610 + }, + { + "epoch": 1.84, + "grad_norm": 6.363011159672373, + "learning_rate": 3.543030206248155e-07, + "loss": 1.4384, + "step": 152613 + }, + { + "epoch": 1.84, + "grad_norm": 9.742223319201797, + "learning_rate": 3.541489144668375e-07, + "loss": 1.1467, + "step": 152616 + }, + { + "epoch": 1.84, + "grad_norm": 8.858009462605093, + "learning_rate": 3.539948412266736e-07, + "loss": 1.1664, + "step": 152619 + }, + { + "epoch": 1.84, + "grad_norm": 9.635821927352364, + "learning_rate": 3.538408009048522e-07, + "loss": 0.974, + "step": 152622 + }, + { + "epoch": 1.84, + "grad_norm": 5.999364580800111, + "learning_rate": 3.5368679350189836e-07, + "loss": 0.9453, + "step": 152625 + }, + { + "epoch": 1.84, + "grad_norm": 8.425795958689786, + "learning_rate": 3.535328190183351e-07, + "loss": 0.9554, + "step": 152628 + }, + { + "epoch": 1.84, + "grad_norm": 4.216160765666697, + "learning_rate": 3.53378877454692e-07, + "loss": 1.0769, + "step": 152631 + }, + { + "epoch": 1.84, + "grad_norm": 7.456772857015053, + "learning_rate": 3.532249688114897e-07, + "loss": 0.9612, + "step": 152634 + }, + { + "epoch": 1.84, + "grad_norm": 10.975294951386152, + "learning_rate": 3.530710930892578e-07, + "loss": 0.9182, + "step": 152637 + }, + { + "epoch": 1.84, + "grad_norm": 11.505352021079176, + "learning_rate": 3.529172502885181e-07, + "loss": 1.2116, + "step": 152640 + }, + { + "epoch": 1.84, + "grad_norm": 13.631313103067153, + "learning_rate": 3.5276344040979684e-07, + "loss": 0.9549, + "step": 152643 + }, + { + "epoch": 1.84, + "grad_norm": 22.735015041836828, + "learning_rate": 3.5260966345361915e-07, + "loss": 1.5709, + "step": 152646 + }, + { + "epoch": 1.84, + "grad_norm": 13.563465916086423, + "learning_rate": 3.5245591942050907e-07, + "loss": 1.6448, + "step": 152649 + }, + { + "epoch": 1.84, + "grad_norm": 4.141930821137015, + "learning_rate": 3.5230220831099393e-07, + "loss": 1.2827, + "step": 152652 + }, + { + "epoch": 1.84, + "grad_norm": 5.66676450974988, + "learning_rate": 3.5214853012559556e-07, + "loss": 1.1681, + "step": 152655 + }, + { + "epoch": 1.84, + "grad_norm": 12.644382282015428, + "learning_rate": 3.519948848648391e-07, + "loss": 1.0617, + "step": 152658 + }, + { + "epoch": 1.84, + "grad_norm": 13.431985624022099, + "learning_rate": 3.518412725292475e-07, + "loss": 1.4539, + "step": 152661 + }, + { + "epoch": 1.84, + "grad_norm": 4.775250908959037, + "learning_rate": 3.5168769311934916e-07, + "loss": 1.3012, + "step": 152664 + }, + { + "epoch": 1.84, + "grad_norm": 17.374745499265007, + "learning_rate": 3.515341466356625e-07, + "loss": 1.261, + "step": 152667 + }, + { + "epoch": 1.84, + "grad_norm": 6.104685143763509, + "learning_rate": 3.513806330787162e-07, + "loss": 1.1228, + "step": 152670 + }, + { + "epoch": 1.84, + "grad_norm": 6.842819880382362, + "learning_rate": 3.5122715244903185e-07, + "loss": 1.0676, + "step": 152673 + }, + { + "epoch": 1.84, + "grad_norm": 5.480125314633506, + "learning_rate": 3.5107370474713353e-07, + "loss": 1.0355, + "step": 152676 + }, + { + "epoch": 1.84, + "grad_norm": 4.925721322249981, + "learning_rate": 3.5092028997354643e-07, + "loss": 1.324, + "step": 152679 + }, + { + "epoch": 1.84, + "grad_norm": 20.370094501213398, + "learning_rate": 3.507669081287912e-07, + "loss": 1.2105, + "step": 152682 + }, + { + "epoch": 1.84, + "grad_norm": 15.38336770497058, + "learning_rate": 3.5061355921339304e-07, + "loss": 1.4516, + "step": 152685 + }, + { + "epoch": 1.84, + "grad_norm": 10.009979355511682, + "learning_rate": 3.5046024322787474e-07, + "loss": 0.8753, + "step": 152688 + }, + { + "epoch": 1.84, + "grad_norm": 6.618884200008002, + "learning_rate": 3.503069601727616e-07, + "loss": 1.1121, + "step": 152691 + }, + { + "epoch": 1.84, + "grad_norm": 5.205649253316692, + "learning_rate": 3.50153710048573e-07, + "loss": 1.2144, + "step": 152694 + }, + { + "epoch": 1.84, + "grad_norm": 10.512548735566115, + "learning_rate": 3.500004928558365e-07, + "loss": 0.9193, + "step": 152697 + }, + { + "epoch": 1.84, + "grad_norm": 8.811212832249968, + "learning_rate": 3.4984730859507045e-07, + "loss": 1.0766, + "step": 152700 + }, + { + "epoch": 1.84, + "grad_norm": 12.001979933864105, + "learning_rate": 3.496941572667989e-07, + "loss": 1.2467, + "step": 152703 + }, + { + "epoch": 1.84, + "grad_norm": 5.015092749156921, + "learning_rate": 3.49541038871547e-07, + "loss": 1.1416, + "step": 152706 + }, + { + "epoch": 1.84, + "grad_norm": 5.445841724984627, + "learning_rate": 3.493879534098343e-07, + "loss": 1.1705, + "step": 152709 + }, + { + "epoch": 1.84, + "grad_norm": 64.35484557717957, + "learning_rate": 3.492349008821849e-07, + "loss": 1.6103, + "step": 152712 + }, + { + "epoch": 1.84, + "grad_norm": 11.978801337099856, + "learning_rate": 3.4908188128912056e-07, + "loss": 1.2125, + "step": 152715 + }, + { + "epoch": 1.84, + "grad_norm": 5.748585368723985, + "learning_rate": 3.489288946311642e-07, + "loss": 1.3091, + "step": 152718 + }, + { + "epoch": 1.84, + "grad_norm": 5.655950225140383, + "learning_rate": 3.4877594090883646e-07, + "loss": 1.2422, + "step": 152721 + }, + { + "epoch": 1.84, + "grad_norm": 32.31827639987921, + "learning_rate": 3.4862302012265927e-07, + "loss": 1.1219, + "step": 152724 + }, + { + "epoch": 1.84, + "grad_norm": 24.541469612641983, + "learning_rate": 3.4847013227315653e-07, + "loss": 1.6194, + "step": 152727 + }, + { + "epoch": 1.84, + "grad_norm": 10.935159093341868, + "learning_rate": 3.4831727736084785e-07, + "loss": 1.0606, + "step": 152730 + }, + { + "epoch": 1.84, + "grad_norm": 2.999929160253105, + "learning_rate": 3.481644553862562e-07, + "loss": 1.021, + "step": 152733 + }, + { + "epoch": 1.84, + "grad_norm": 9.365341691833365, + "learning_rate": 3.4801166634990223e-07, + "loss": 1.0675, + "step": 152736 + }, + { + "epoch": 1.84, + "grad_norm": 9.09858581441844, + "learning_rate": 3.4785891025231e-07, + "loss": 1.1123, + "step": 152739 + }, + { + "epoch": 1.84, + "grad_norm": 7.10036059996807, + "learning_rate": 3.4770618709399574e-07, + "loss": 1.1913, + "step": 152742 + }, + { + "epoch": 1.84, + "grad_norm": 7.838884425034119, + "learning_rate": 3.4755349687548567e-07, + "loss": 1.3084, + "step": 152745 + }, + { + "epoch": 1.84, + "grad_norm": 8.364111865889138, + "learning_rate": 3.474008395972972e-07, + "loss": 0.9163, + "step": 152748 + }, + { + "epoch": 1.84, + "grad_norm": 10.270520730468318, + "learning_rate": 3.472482152599532e-07, + "loss": 1.1291, + "step": 152751 + }, + { + "epoch": 1.84, + "grad_norm": 5.210455883633293, + "learning_rate": 3.4709562386397444e-07, + "loss": 1.286, + "step": 152754 + }, + { + "epoch": 1.84, + "grad_norm": 6.314199799493825, + "learning_rate": 3.4694306540988153e-07, + "loss": 1.0823, + "step": 152757 + }, + { + "epoch": 1.84, + "grad_norm": 12.75828028326943, + "learning_rate": 3.467905398981941e-07, + "loss": 1.0413, + "step": 152760 + }, + { + "epoch": 1.84, + "grad_norm": 5.648869946998999, + "learning_rate": 3.466380473294351e-07, + "loss": 1.3011, + "step": 152763 + }, + { + "epoch": 1.84, + "grad_norm": 17.690700337176654, + "learning_rate": 3.4648558770412177e-07, + "loss": 1.3932, + "step": 152766 + }, + { + "epoch": 1.84, + "grad_norm": 9.999527721625464, + "learning_rate": 3.46333161022776e-07, + "loss": 1.1843, + "step": 152769 + }, + { + "epoch": 1.84, + "grad_norm": 42.0299069689446, + "learning_rate": 3.4618076728591965e-07, + "loss": 1.0686, + "step": 152772 + }, + { + "epoch": 1.84, + "grad_norm": 8.070499010684893, + "learning_rate": 3.460284064940689e-07, + "loss": 1.0652, + "step": 152775 + }, + { + "epoch": 1.84, + "grad_norm": 8.511884148432145, + "learning_rate": 3.458760786477466e-07, + "loss": 1.3526, + "step": 152778 + }, + { + "epoch": 1.84, + "grad_norm": 8.117058361721663, + "learning_rate": 3.457237837474725e-07, + "loss": 1.7739, + "step": 152781 + }, + { + "epoch": 1.84, + "grad_norm": 14.551261559857688, + "learning_rate": 3.4557152179376495e-07, + "loss": 1.2122, + "step": 152784 + }, + { + "epoch": 1.84, + "grad_norm": 5.086537487181337, + "learning_rate": 3.454192927871447e-07, + "loss": 0.8992, + "step": 152787 + }, + { + "epoch": 1.84, + "grad_norm": 7.348312490782603, + "learning_rate": 3.452670967281302e-07, + "loss": 1.3586, + "step": 152790 + }, + { + "epoch": 1.84, + "grad_norm": 9.76446000644955, + "learning_rate": 3.451149336172421e-07, + "loss": 1.1218, + "step": 152793 + }, + { + "epoch": 1.84, + "grad_norm": 27.780162396579865, + "learning_rate": 3.4496280345499787e-07, + "loss": 1.0447, + "step": 152796 + }, + { + "epoch": 1.84, + "grad_norm": 8.13916983005485, + "learning_rate": 3.448107062419193e-07, + "loss": 1.0244, + "step": 152799 + }, + { + "epoch": 1.84, + "grad_norm": 12.432903535447968, + "learning_rate": 3.446586419785236e-07, + "loss": 1.3993, + "step": 152802 + }, + { + "epoch": 1.84, + "grad_norm": 7.234529768886146, + "learning_rate": 3.4450661066533055e-07, + "loss": 1.3844, + "step": 152805 + }, + { + "epoch": 1.84, + "grad_norm": 2.709915285992045, + "learning_rate": 3.443546123028574e-07, + "loss": 1.3496, + "step": 152808 + }, + { + "epoch": 1.84, + "grad_norm": 7.4773417458691736, + "learning_rate": 3.442026468916249e-07, + "loss": 1.1561, + "step": 152811 + }, + { + "epoch": 1.84, + "grad_norm": 16.259471922136598, + "learning_rate": 3.4405071443215145e-07, + "loss": 1.169, + "step": 152814 + }, + { + "epoch": 1.84, + "grad_norm": 4.997080115274884, + "learning_rate": 3.4389881492495335e-07, + "loss": 1.0282, + "step": 152817 + }, + { + "epoch": 1.84, + "grad_norm": 6.6283755538917175, + "learning_rate": 3.437469483705502e-07, + "loss": 1.056, + "step": 152820 + }, + { + "epoch": 1.84, + "grad_norm": 8.36057841609935, + "learning_rate": 3.435951147694627e-07, + "loss": 1.2144, + "step": 152823 + }, + { + "epoch": 1.84, + "grad_norm": 7.595276963599101, + "learning_rate": 3.43443314122206e-07, + "loss": 1.3479, + "step": 152826 + }, + { + "epoch": 1.84, + "grad_norm": 10.748441868242294, + "learning_rate": 3.4329154642929853e-07, + "loss": 1.3286, + "step": 152829 + }, + { + "epoch": 1.84, + "grad_norm": 13.236651007212075, + "learning_rate": 3.4313981169125765e-07, + "loss": 1.1576, + "step": 152832 + }, + { + "epoch": 1.84, + "grad_norm": 5.499070966411779, + "learning_rate": 3.42988109908603e-07, + "loss": 0.9659, + "step": 152835 + }, + { + "epoch": 1.84, + "grad_norm": 3.573313122446861, + "learning_rate": 3.428364410818519e-07, + "loss": 1.2367, + "step": 152838 + }, + { + "epoch": 1.84, + "grad_norm": 4.094326493812139, + "learning_rate": 3.426848052115217e-07, + "loss": 0.9938, + "step": 152841 + }, + { + "epoch": 1.84, + "grad_norm": 6.948603077474219, + "learning_rate": 3.425332022981287e-07, + "loss": 0.9925, + "step": 152844 + }, + { + "epoch": 1.84, + "grad_norm": 8.660475150122448, + "learning_rate": 3.423816323421925e-07, + "loss": 0.8706, + "step": 152847 + }, + { + "epoch": 1.84, + "grad_norm": 4.977223903462475, + "learning_rate": 3.4223009534422814e-07, + "loss": 1.3014, + "step": 152850 + }, + { + "epoch": 1.84, + "grad_norm": 10.611170601279902, + "learning_rate": 3.420785913047553e-07, + "loss": 1.2024, + "step": 152853 + }, + { + "epoch": 1.84, + "grad_norm": 14.910455340474266, + "learning_rate": 3.41927120224288e-07, + "loss": 1.4133, + "step": 152856 + }, + { + "epoch": 1.84, + "grad_norm": 5.537594165540178, + "learning_rate": 3.417756821033447e-07, + "loss": 1.1445, + "step": 152859 + }, + { + "epoch": 1.84, + "grad_norm": 6.116719633945813, + "learning_rate": 3.416242769424416e-07, + "loss": 1.2838, + "step": 152862 + }, + { + "epoch": 1.84, + "grad_norm": 10.969508885963021, + "learning_rate": 3.414729047420984e-07, + "loss": 1.5163, + "step": 152865 + }, + { + "epoch": 1.84, + "grad_norm": 9.953414708924237, + "learning_rate": 3.4132156550282793e-07, + "loss": 1.2869, + "step": 152868 + }, + { + "epoch": 1.84, + "grad_norm": 10.941247038311271, + "learning_rate": 3.4117025922514646e-07, + "loss": 1.2278, + "step": 152871 + }, + { + "epoch": 1.84, + "grad_norm": 5.082413052275688, + "learning_rate": 3.410189859095725e-07, + "loss": 1.4265, + "step": 152874 + }, + { + "epoch": 1.84, + "grad_norm": 8.039303500912666, + "learning_rate": 3.408677455566223e-07, + "loss": 0.8986, + "step": 152877 + }, + { + "epoch": 1.84, + "grad_norm": 6.401784033554938, + "learning_rate": 3.4071653816681095e-07, + "loss": 0.9259, + "step": 152880 + }, + { + "epoch": 1.84, + "grad_norm": 16.888187244754405, + "learning_rate": 3.4056536374065477e-07, + "loss": 1.3951, + "step": 152883 + }, + { + "epoch": 1.84, + "grad_norm": 7.645872675667589, + "learning_rate": 3.4041422227866884e-07, + "loss": 0.9625, + "step": 152886 + }, + { + "epoch": 1.84, + "grad_norm": 7.679369762470972, + "learning_rate": 3.4026311378137166e-07, + "loss": 1.0582, + "step": 152889 + }, + { + "epoch": 1.84, + "grad_norm": 5.052875778433748, + "learning_rate": 3.401120382492762e-07, + "loss": 1.024, + "step": 152892 + }, + { + "epoch": 1.84, + "grad_norm": 13.282380816755785, + "learning_rate": 3.3996099568289865e-07, + "loss": 1.3095, + "step": 152895 + }, + { + "epoch": 1.84, + "grad_norm": 7.7610943843937115, + "learning_rate": 3.398099860827553e-07, + "loss": 0.841, + "step": 152898 + }, + { + "epoch": 1.84, + "grad_norm": 8.26482323550369, + "learning_rate": 3.3965900944935904e-07, + "loss": 1.4285, + "step": 152901 + }, + { + "epoch": 1.84, + "grad_norm": 6.188059425037421, + "learning_rate": 3.3950806578322836e-07, + "loss": 1.5115, + "step": 152904 + }, + { + "epoch": 1.84, + "grad_norm": 10.234419399862322, + "learning_rate": 3.393571550848773e-07, + "loss": 0.9758, + "step": 152907 + }, + { + "epoch": 1.84, + "grad_norm": 7.7201830124548865, + "learning_rate": 3.39206277354821e-07, + "loss": 1.3306, + "step": 152910 + }, + { + "epoch": 1.84, + "grad_norm": 18.55516866604104, + "learning_rate": 3.3905543259357244e-07, + "loss": 1.2366, + "step": 152913 + }, + { + "epoch": 1.84, + "grad_norm": 4.8648870117936305, + "learning_rate": 3.389046208016478e-07, + "loss": 1.1418, + "step": 152916 + }, + { + "epoch": 1.84, + "grad_norm": 6.629345410090871, + "learning_rate": 3.387538419795622e-07, + "loss": 1.1321, + "step": 152919 + }, + { + "epoch": 1.84, + "grad_norm": 15.648712296784332, + "learning_rate": 3.386030961278308e-07, + "loss": 1.3126, + "step": 152922 + }, + { + "epoch": 1.84, + "grad_norm": 7.138579902373872, + "learning_rate": 3.384523832469655e-07, + "loss": 0.9467, + "step": 152925 + }, + { + "epoch": 1.84, + "grad_norm": 3.855627373571413, + "learning_rate": 3.383017033374825e-07, + "loss": 1.2508, + "step": 152928 + }, + { + "epoch": 1.84, + "grad_norm": 6.1524959455924435, + "learning_rate": 3.3815105639989687e-07, + "loss": 1.5682, + "step": 152931 + }, + { + "epoch": 1.84, + "grad_norm": 3.9673973972209455, + "learning_rate": 3.3800044243472055e-07, + "loss": 1.2448, + "step": 152934 + }, + { + "epoch": 1.84, + "grad_norm": 3.4158119674857828, + "learning_rate": 3.3784986144246857e-07, + "loss": 1.0078, + "step": 152937 + }, + { + "epoch": 1.84, + "grad_norm": 7.114859890331824, + "learning_rate": 3.376993134236539e-07, + "loss": 1.0273, + "step": 152940 + }, + { + "epoch": 1.84, + "grad_norm": 22.053421404895193, + "learning_rate": 3.3754879837879174e-07, + "loss": 1.1891, + "step": 152943 + }, + { + "epoch": 1.84, + "grad_norm": 6.553776953207355, + "learning_rate": 3.373983163083949e-07, + "loss": 1.3062, + "step": 152946 + }, + { + "epoch": 1.84, + "grad_norm": 7.765566880190122, + "learning_rate": 3.372478672129775e-07, + "loss": 1.2687, + "step": 152949 + }, + { + "epoch": 1.84, + "grad_norm": 38.922110199470175, + "learning_rate": 3.370974510930536e-07, + "loss": 0.9744, + "step": 152952 + }, + { + "epoch": 1.84, + "grad_norm": 14.580338592848683, + "learning_rate": 3.369470679491338e-07, + "loss": 1.1973, + "step": 152955 + }, + { + "epoch": 1.84, + "grad_norm": 5.095516991285608, + "learning_rate": 3.367967177817344e-07, + "loss": 1.1315, + "step": 152958 + }, + { + "epoch": 1.84, + "grad_norm": 5.163370852855524, + "learning_rate": 3.3664640059136723e-07, + "loss": 1.0798, + "step": 152961 + }, + { + "epoch": 1.84, + "grad_norm": 4.585542478504935, + "learning_rate": 3.364961163785441e-07, + "loss": 1.0914, + "step": 152964 + }, + { + "epoch": 1.84, + "grad_norm": 3.8809649498217866, + "learning_rate": 3.3634586514377786e-07, + "loss": 1.0995, + "step": 152967 + }, + { + "epoch": 1.84, + "grad_norm": 9.756346019079576, + "learning_rate": 3.361956468875838e-07, + "loss": 1.5767, + "step": 152970 + }, + { + "epoch": 1.84, + "grad_norm": 15.558766796802333, + "learning_rate": 3.360454616104736e-07, + "loss": 1.1704, + "step": 152973 + }, + { + "epoch": 1.84, + "grad_norm": 5.256477914611018, + "learning_rate": 3.358953093129602e-07, + "loss": 1.1171, + "step": 152976 + }, + { + "epoch": 1.84, + "grad_norm": 4.888965548171675, + "learning_rate": 3.357451899955533e-07, + "loss": 1.2437, + "step": 152979 + }, + { + "epoch": 1.84, + "grad_norm": 16.919443862035433, + "learning_rate": 3.355951036587668e-07, + "loss": 1.4752, + "step": 152982 + }, + { + "epoch": 1.84, + "grad_norm": 9.712069635276212, + "learning_rate": 3.354450503031137e-07, + "loss": 1.1658, + "step": 152985 + }, + { + "epoch": 1.84, + "grad_norm": 9.471997812328985, + "learning_rate": 3.352950299291058e-07, + "loss": 1.1413, + "step": 152988 + }, + { + "epoch": 1.84, + "grad_norm": 10.690947296529098, + "learning_rate": 3.351450425372538e-07, + "loss": 1.0445, + "step": 152991 + }, + { + "epoch": 1.84, + "grad_norm": 3.8728615601141816, + "learning_rate": 3.349950881280728e-07, + "loss": 1.1848, + "step": 152994 + }, + { + "epoch": 1.84, + "grad_norm": 13.61307380190564, + "learning_rate": 3.3484516670207024e-07, + "loss": 1.2307, + "step": 152997 + }, + { + "epoch": 1.84, + "grad_norm": 15.657955473001042, + "learning_rate": 3.34695278259759e-07, + "loss": 1.2117, + "step": 153000 + }, + { + "epoch": 1.84, + "grad_norm": 2.9748531724910543, + "learning_rate": 3.3454542280165316e-07, + "loss": 1.1297, + "step": 153003 + }, + { + "epoch": 1.84, + "grad_norm": 6.684557269521972, + "learning_rate": 3.343956003282611e-07, + "loss": 1.3047, + "step": 153006 + }, + { + "epoch": 1.84, + "grad_norm": 12.897587093465093, + "learning_rate": 3.3424581084009587e-07, + "loss": 1.1716, + "step": 153009 + }, + { + "epoch": 1.84, + "grad_norm": 5.122112821070076, + "learning_rate": 3.3409605433766814e-07, + "loss": 0.9734, + "step": 153012 + }, + { + "epoch": 1.84, + "grad_norm": 8.026512716336967, + "learning_rate": 3.3394633082148963e-07, + "loss": 1.2586, + "step": 153015 + }, + { + "epoch": 1.84, + "grad_norm": 4.636917621623288, + "learning_rate": 3.3379664029207006e-07, + "loss": 0.9097, + "step": 153018 + }, + { + "epoch": 1.84, + "grad_norm": 9.263045353531078, + "learning_rate": 3.3364698274992e-07, + "loss": 1.3485, + "step": 153021 + }, + { + "epoch": 1.84, + "grad_norm": 7.963781777860558, + "learning_rate": 3.334973581955514e-07, + "loss": 1.3154, + "step": 153024 + }, + { + "epoch": 1.84, + "grad_norm": 11.528284050854358, + "learning_rate": 3.3334776662947486e-07, + "loss": 1.1283, + "step": 153027 + }, + { + "epoch": 1.84, + "grad_norm": 8.893282026883291, + "learning_rate": 3.3319820805220116e-07, + "loss": 1.2313, + "step": 153030 + }, + { + "epoch": 1.84, + "grad_norm": 9.115072723598269, + "learning_rate": 3.330486824642387e-07, + "loss": 1.1565, + "step": 153033 + }, + { + "epoch": 1.84, + "grad_norm": 19.04846632658979, + "learning_rate": 3.328991898660994e-07, + "loss": 0.8026, + "step": 153036 + }, + { + "epoch": 1.84, + "grad_norm": 4.921666028757864, + "learning_rate": 3.3274973025829273e-07, + "loss": 1.1653, + "step": 153039 + }, + { + "epoch": 1.84, + "grad_norm": 10.166644418787197, + "learning_rate": 3.3260030364133056e-07, + "loss": 1.4573, + "step": 153042 + }, + { + "epoch": 1.84, + "grad_norm": 2.2558401851910537, + "learning_rate": 3.3245091001572025e-07, + "loss": 0.9428, + "step": 153045 + }, + { + "epoch": 1.84, + "grad_norm": 4.957353261935523, + "learning_rate": 3.3230154938197256e-07, + "loss": 1.1552, + "step": 153048 + }, + { + "epoch": 1.84, + "grad_norm": 3.571059112210854, + "learning_rate": 3.3215222174059704e-07, + "loss": 0.8874, + "step": 153051 + }, + { + "epoch": 1.84, + "grad_norm": 18.634146979381203, + "learning_rate": 3.3200292709210434e-07, + "loss": 1.2673, + "step": 153054 + }, + { + "epoch": 1.84, + "grad_norm": 6.885540084136989, + "learning_rate": 3.3185366543700304e-07, + "loss": 1.2492, + "step": 153057 + }, + { + "epoch": 1.84, + "grad_norm": 19.122965821272587, + "learning_rate": 3.3170443677580376e-07, + "loss": 1.0163, + "step": 153060 + }, + { + "epoch": 1.84, + "grad_norm": 4.341051128464669, + "learning_rate": 3.315552411090139e-07, + "loss": 1.2889, + "step": 153063 + }, + { + "epoch": 1.84, + "grad_norm": 12.149949169700857, + "learning_rate": 3.314060784371431e-07, + "loss": 1.0621, + "step": 153066 + }, + { + "epoch": 1.84, + "grad_norm": 11.976185234321562, + "learning_rate": 3.3125694876070204e-07, + "loss": 1.2505, + "step": 153069 + }, + { + "epoch": 1.84, + "grad_norm": 29.285307650223373, + "learning_rate": 3.311078520801969e-07, + "loss": 1.2068, + "step": 153072 + }, + { + "epoch": 1.84, + "grad_norm": 4.754681880287149, + "learning_rate": 3.309587883961374e-07, + "loss": 1.1987, + "step": 153075 + }, + { + "epoch": 1.84, + "grad_norm": 5.872395587636922, + "learning_rate": 3.3080975770903524e-07, + "loss": 1.2207, + "step": 153078 + }, + { + "epoch": 1.84, + "grad_norm": 9.70058082951476, + "learning_rate": 3.306607600193945e-07, + "loss": 1.144, + "step": 153081 + }, + { + "epoch": 1.84, + "grad_norm": 7.1056510556146275, + "learning_rate": 3.30511795327727e-07, + "loss": 0.9641, + "step": 153084 + }, + { + "epoch": 1.84, + "grad_norm": 7.744903664295219, + "learning_rate": 3.3036286363453907e-07, + "loss": 1.0526, + "step": 153087 + }, + { + "epoch": 1.84, + "grad_norm": 8.78076833053955, + "learning_rate": 3.302139649403391e-07, + "loss": 0.9818, + "step": 153090 + }, + { + "epoch": 1.84, + "grad_norm": 6.286269480749307, + "learning_rate": 3.300650992456356e-07, + "loss": 1.0142, + "step": 153093 + }, + { + "epoch": 1.84, + "grad_norm": 11.531412178546653, + "learning_rate": 3.299162665509392e-07, + "loss": 1.0163, + "step": 153096 + }, + { + "epoch": 1.84, + "grad_norm": 3.702880003476796, + "learning_rate": 3.29767466856753e-07, + "loss": 0.9228, + "step": 153099 + }, + { + "epoch": 1.84, + "grad_norm": 8.235479182733064, + "learning_rate": 3.2961870016358864e-07, + "loss": 0.9174, + "step": 153102 + }, + { + "epoch": 1.84, + "grad_norm": 4.593146759978933, + "learning_rate": 3.294699664719514e-07, + "loss": 1.3433, + "step": 153105 + }, + { + "epoch": 1.84, + "grad_norm": 7.672842031285046, + "learning_rate": 3.293212657823508e-07, + "loss": 1.3131, + "step": 153108 + }, + { + "epoch": 1.84, + "grad_norm": 4.256295149390927, + "learning_rate": 3.2917259809529314e-07, + "loss": 0.6143, + "step": 153111 + }, + { + "epoch": 1.84, + "grad_norm": 15.276721529148007, + "learning_rate": 3.2902396341128573e-07, + "loss": 0.9988, + "step": 153114 + }, + { + "epoch": 1.84, + "grad_norm": 18.21111996737638, + "learning_rate": 3.28875361730836e-07, + "loss": 1.0013, + "step": 153117 + }, + { + "epoch": 1.84, + "grad_norm": 13.271006776047908, + "learning_rate": 3.287267930544502e-07, + "loss": 0.9958, + "step": 153120 + }, + { + "epoch": 1.84, + "grad_norm": 7.901153131379097, + "learning_rate": 3.2857825738263795e-07, + "loss": 1.3944, + "step": 153123 + }, + { + "epoch": 1.84, + "grad_norm": 7.765225323755715, + "learning_rate": 3.2842975471590546e-07, + "loss": 1.2164, + "step": 153126 + }, + { + "epoch": 1.84, + "grad_norm": 5.811911565062642, + "learning_rate": 3.282812850547556e-07, + "loss": 1.2186, + "step": 153129 + }, + { + "epoch": 1.84, + "grad_norm": 8.088968577127583, + "learning_rate": 3.2813284839969925e-07, + "loss": 1.4014, + "step": 153132 + }, + { + "epoch": 1.84, + "grad_norm": 4.627276045218825, + "learning_rate": 3.279844447512415e-07, + "loss": 1.1036, + "step": 153135 + }, + { + "epoch": 1.84, + "grad_norm": 7.173595760562855, + "learning_rate": 3.278360741098896e-07, + "loss": 1.0432, + "step": 153138 + }, + { + "epoch": 1.84, + "grad_norm": 13.184632520582742, + "learning_rate": 3.276877364761488e-07, + "loss": 1.5058, + "step": 153141 + }, + { + "epoch": 1.84, + "grad_norm": 8.314397894441628, + "learning_rate": 3.275394318505265e-07, + "loss": 1.0491, + "step": 153144 + }, + { + "epoch": 1.84, + "grad_norm": 5.442176257472627, + "learning_rate": 3.2739116023352777e-07, + "loss": 0.9397, + "step": 153147 + }, + { + "epoch": 1.84, + "grad_norm": 5.92805161215578, + "learning_rate": 3.2724292162565895e-07, + "loss": 1.2159, + "step": 153150 + }, + { + "epoch": 1.84, + "grad_norm": 7.489139506281434, + "learning_rate": 3.2709471602742513e-07, + "loss": 1.132, + "step": 153153 + }, + { + "epoch": 1.84, + "grad_norm": 21.935228902622075, + "learning_rate": 3.2694654343933376e-07, + "loss": 1.0027, + "step": 153156 + }, + { + "epoch": 1.84, + "grad_norm": 5.526011065917963, + "learning_rate": 3.2679840386188876e-07, + "loss": 1.129, + "step": 153159 + }, + { + "epoch": 1.84, + "grad_norm": 8.782053506548996, + "learning_rate": 3.266502972955965e-07, + "loss": 1.2042, + "step": 153162 + }, + { + "epoch": 1.84, + "grad_norm": 6.478904471277893, + "learning_rate": 3.265022237409643e-07, + "loss": 1.233, + "step": 153165 + }, + { + "epoch": 1.84, + "grad_norm": 9.232722213140763, + "learning_rate": 3.263541831984951e-07, + "loss": 1.0881, + "step": 153168 + }, + { + "epoch": 1.84, + "grad_norm": 3.151669529368868, + "learning_rate": 3.2620617566869297e-07, + "loss": 0.8393, + "step": 153171 + }, + { + "epoch": 1.84, + "grad_norm": 29.30095998359202, + "learning_rate": 3.260582011520652e-07, + "loss": 0.9475, + "step": 153174 + }, + { + "epoch": 1.84, + "grad_norm": 10.509275772070831, + "learning_rate": 3.2591025964911816e-07, + "loss": 1.2916, + "step": 153177 + }, + { + "epoch": 1.84, + "grad_norm": 13.139955752453412, + "learning_rate": 3.257623511603525e-07, + "loss": 1.2205, + "step": 153180 + }, + { + "epoch": 1.84, + "grad_norm": 8.817406634323865, + "learning_rate": 3.2561447568627557e-07, + "loss": 0.7725, + "step": 153183 + }, + { + "epoch": 1.84, + "grad_norm": 7.807607813768707, + "learning_rate": 3.254666332273926e-07, + "loss": 1.3125, + "step": 153186 + }, + { + "epoch": 1.84, + "grad_norm": 9.643017969245507, + "learning_rate": 3.2531882378420753e-07, + "loss": 1.1958, + "step": 153189 + }, + { + "epoch": 1.84, + "grad_norm": 5.869219149441835, + "learning_rate": 3.251710473572245e-07, + "loss": 0.9738, + "step": 153192 + }, + { + "epoch": 1.84, + "grad_norm": 9.40190277157162, + "learning_rate": 3.2502330394694636e-07, + "loss": 0.8176, + "step": 153195 + }, + { + "epoch": 1.84, + "grad_norm": 6.74529683665636, + "learning_rate": 3.2487559355387943e-07, + "loss": 1.1691, + "step": 153198 + }, + { + "epoch": 1.84, + "grad_norm": 17.058051468609197, + "learning_rate": 3.2472791617852663e-07, + "loss": 1.3027, + "step": 153201 + }, + { + "epoch": 1.84, + "grad_norm": 7.105848237741312, + "learning_rate": 3.245802718213942e-07, + "loss": 0.9789, + "step": 153204 + }, + { + "epoch": 1.84, + "grad_norm": 15.918556383352195, + "learning_rate": 3.244326604829828e-07, + "loss": 1.2936, + "step": 153207 + }, + { + "epoch": 1.84, + "grad_norm": 1.9443882634954344, + "learning_rate": 3.2428508216379886e-07, + "loss": 1.2761, + "step": 153210 + }, + { + "epoch": 1.84, + "grad_norm": 14.422144622482158, + "learning_rate": 3.24137536864344e-07, + "loss": 1.2001, + "step": 153213 + }, + { + "epoch": 1.84, + "grad_norm": 3.4559861695208127, + "learning_rate": 3.239900245851213e-07, + "loss": 1.1016, + "step": 153216 + }, + { + "epoch": 1.84, + "grad_norm": 15.634716239640465, + "learning_rate": 3.238425453266381e-07, + "loss": 0.9192, + "step": 153219 + }, + { + "epoch": 1.84, + "grad_norm": 2.6246009728695894, + "learning_rate": 3.2369509908939276e-07, + "loss": 1.1211, + "step": 153222 + }, + { + "epoch": 1.84, + "grad_norm": 19.46438726029292, + "learning_rate": 3.2354768587389064e-07, + "loss": 1.5659, + "step": 153225 + }, + { + "epoch": 1.84, + "grad_norm": 17.671820437352252, + "learning_rate": 3.2340030568063565e-07, + "loss": 1.0981, + "step": 153228 + }, + { + "epoch": 1.84, + "grad_norm": 8.866615458326766, + "learning_rate": 3.232529585101307e-07, + "loss": 0.8148, + "step": 153231 + }, + { + "epoch": 1.84, + "grad_norm": 4.5318771754758105, + "learning_rate": 3.231056443628766e-07, + "loss": 1.0743, + "step": 153234 + }, + { + "epoch": 1.84, + "grad_norm": 15.538492730016973, + "learning_rate": 3.2295836323937735e-07, + "loss": 1.2129, + "step": 153237 + }, + { + "epoch": 1.84, + "grad_norm": 12.910937474141999, + "learning_rate": 3.2281111514013476e-07, + "loss": 1.5157, + "step": 153240 + }, + { + "epoch": 1.84, + "grad_norm": 6.435507368053836, + "learning_rate": 3.2266390006565285e-07, + "loss": 1.1887, + "step": 153243 + }, + { + "epoch": 1.84, + "grad_norm": 16.439178180232943, + "learning_rate": 3.2251671801643457e-07, + "loss": 1.0713, + "step": 153246 + }, + { + "epoch": 1.84, + "grad_norm": 3.034336546056424, + "learning_rate": 3.2236956899297955e-07, + "loss": 1.4855, + "step": 153249 + }, + { + "epoch": 1.84, + "grad_norm": 5.502477074101266, + "learning_rate": 3.2222245299579293e-07, + "loss": 0.8908, + "step": 153252 + }, + { + "epoch": 1.84, + "grad_norm": 8.995093783788203, + "learning_rate": 3.2207537002537316e-07, + "loss": 1.4513, + "step": 153255 + }, + { + "epoch": 1.84, + "grad_norm": 5.214779537231933, + "learning_rate": 3.219283200822254e-07, + "loss": 0.7484, + "step": 153258 + }, + { + "epoch": 1.84, + "grad_norm": 7.679910811527319, + "learning_rate": 3.2178130316685043e-07, + "loss": 1.0973, + "step": 153261 + }, + { + "epoch": 1.84, + "grad_norm": 7.682560684866161, + "learning_rate": 3.2163431927974777e-07, + "loss": 0.8933, + "step": 153264 + }, + { + "epoch": 1.84, + "grad_norm": 4.940801988488031, + "learning_rate": 3.214873684214226e-07, + "loss": 1.028, + "step": 153267 + }, + { + "epoch": 1.84, + "grad_norm": 8.171625362948616, + "learning_rate": 3.213404505923756e-07, + "loss": 1.4805, + "step": 153270 + }, + { + "epoch": 1.84, + "grad_norm": 5.138655649699558, + "learning_rate": 3.2119356579310754e-07, + "loss": 1.1934, + "step": 153273 + }, + { + "epoch": 1.84, + "grad_norm": 12.622638712524397, + "learning_rate": 3.21046714024118e-07, + "loss": 1.2436, + "step": 153276 + }, + { + "epoch": 1.84, + "grad_norm": 7.101286087113294, + "learning_rate": 3.2089989528590995e-07, + "loss": 1.3332, + "step": 153279 + }, + { + "epoch": 1.84, + "grad_norm": 3.2274492469619185, + "learning_rate": 3.20753109578984e-07, + "loss": 0.7045, + "step": 153282 + }, + { + "epoch": 1.84, + "grad_norm": 8.814062314821692, + "learning_rate": 3.20606356903842e-07, + "loss": 1.3522, + "step": 153285 + }, + { + "epoch": 1.84, + "grad_norm": 9.782147515488932, + "learning_rate": 3.204596372609836e-07, + "loss": 1.193, + "step": 153288 + }, + { + "epoch": 1.84, + "grad_norm": 5.271531767992137, + "learning_rate": 3.203129506509095e-07, + "loss": 1.7124, + "step": 153291 + }, + { + "epoch": 1.84, + "grad_norm": 3.4325214835962288, + "learning_rate": 3.201662970741215e-07, + "loss": 1.0926, + "step": 153294 + }, + { + "epoch": 1.84, + "grad_norm": 3.907614757959233, + "learning_rate": 3.200196765311192e-07, + "loss": 1.3412, + "step": 153297 + }, + { + "epoch": 1.84, + "grad_norm": 18.989127626390548, + "learning_rate": 3.1987308902240333e-07, + "loss": 1.1083, + "step": 153300 + }, + { + "epoch": 1.84, + "grad_norm": 9.665952497809267, + "learning_rate": 3.1972653454847235e-07, + "loss": 1.1987, + "step": 153303 + }, + { + "epoch": 1.84, + "grad_norm": 9.652762760789981, + "learning_rate": 3.195800131098281e-07, + "loss": 1.4793, + "step": 153306 + }, + { + "epoch": 1.84, + "grad_norm": 5.260253086152705, + "learning_rate": 3.194335247069713e-07, + "loss": 1.0504, + "step": 153309 + }, + { + "epoch": 1.84, + "grad_norm": 6.349053868007204, + "learning_rate": 3.1928706934040153e-07, + "loss": 1.433, + "step": 153312 + }, + { + "epoch": 1.84, + "grad_norm": 13.367903991014266, + "learning_rate": 3.191406470106184e-07, + "loss": 1.3322, + "step": 153315 + }, + { + "epoch": 1.84, + "grad_norm": 5.6392950414424865, + "learning_rate": 3.189942577181193e-07, + "loss": 1.1174, + "step": 153318 + }, + { + "epoch": 1.84, + "grad_norm": 11.209891725747715, + "learning_rate": 3.1884790146340604e-07, + "loss": 1.1425, + "step": 153321 + }, + { + "epoch": 1.84, + "grad_norm": 21.043565602911517, + "learning_rate": 3.187015782469782e-07, + "loss": 1.1512, + "step": 153324 + }, + { + "epoch": 1.84, + "grad_norm": 3.775877236840046, + "learning_rate": 3.1855528806933544e-07, + "loss": 0.9226, + "step": 153327 + }, + { + "epoch": 1.84, + "grad_norm": 6.6984283261347715, + "learning_rate": 3.184090309309751e-07, + "loss": 1.0248, + "step": 153330 + }, + { + "epoch": 1.84, + "grad_norm": 7.694963294469379, + "learning_rate": 3.1826280683239675e-07, + "loss": 1.4126, + "step": 153333 + }, + { + "epoch": 1.84, + "grad_norm": 3.9528804088042895, + "learning_rate": 3.1811661577410224e-07, + "loss": 1.1188, + "step": 153336 + }, + { + "epoch": 1.84, + "grad_norm": 16.599260279036802, + "learning_rate": 3.179704577565879e-07, + "loss": 1.3383, + "step": 153339 + }, + { + "epoch": 1.84, + "grad_norm": 3.0430745814706737, + "learning_rate": 3.178243327803521e-07, + "loss": 1.1323, + "step": 153342 + }, + { + "epoch": 1.84, + "grad_norm": 5.091626356712803, + "learning_rate": 3.1767824084589336e-07, + "loss": 1.2759, + "step": 153345 + }, + { + "epoch": 1.84, + "grad_norm": 6.823193047892545, + "learning_rate": 3.175321819537125e-07, + "loss": 1.1063, + "step": 153348 + }, + { + "epoch": 1.84, + "grad_norm": 17.63287240653666, + "learning_rate": 3.173861561043068e-07, + "loss": 0.9268, + "step": 153351 + }, + { + "epoch": 1.84, + "grad_norm": 23.09173156936503, + "learning_rate": 3.1724016329817477e-07, + "loss": 1.0874, + "step": 153354 + }, + { + "epoch": 1.84, + "grad_norm": 6.039979573963724, + "learning_rate": 3.1709420353581376e-07, + "loss": 1.2755, + "step": 153357 + }, + { + "epoch": 1.84, + "grad_norm": 5.733434451364932, + "learning_rate": 3.169482768177223e-07, + "loss": 1.3103, + "step": 153360 + }, + { + "epoch": 1.84, + "grad_norm": 6.251682329808087, + "learning_rate": 3.1680238314439894e-07, + "loss": 0.9205, + "step": 153363 + }, + { + "epoch": 1.84, + "grad_norm": 3.615596506458395, + "learning_rate": 3.1665652251634203e-07, + "loss": 0.9161, + "step": 153366 + }, + { + "epoch": 1.84, + "grad_norm": 12.238549011768697, + "learning_rate": 3.1651069493404684e-07, + "loss": 1.1022, + "step": 153369 + }, + { + "epoch": 1.84, + "grad_norm": 11.750855159395492, + "learning_rate": 3.1636490039801294e-07, + "loss": 1.3725, + "step": 153372 + }, + { + "epoch": 1.84, + "grad_norm": 4.331733730547348, + "learning_rate": 3.1621913890873766e-07, + "loss": 1.0626, + "step": 153375 + }, + { + "epoch": 1.84, + "grad_norm": 10.134517606477528, + "learning_rate": 3.160734104667196e-07, + "loss": 1.0592, + "step": 153378 + }, + { + "epoch": 1.84, + "grad_norm": 6.9459038883714825, + "learning_rate": 3.159277150724549e-07, + "loss": 0.9019, + "step": 153381 + }, + { + "epoch": 1.84, + "grad_norm": 10.131835721259966, + "learning_rate": 3.157820527264388e-07, + "loss": 1.1057, + "step": 153384 + }, + { + "epoch": 1.84, + "grad_norm": 12.06185108594067, + "learning_rate": 3.156364234291709e-07, + "loss": 1.4111, + "step": 153387 + }, + { + "epoch": 1.84, + "grad_norm": 11.44807224604805, + "learning_rate": 3.1549082718114854e-07, + "loss": 1.3255, + "step": 153390 + }, + { + "epoch": 1.84, + "grad_norm": 9.476164361276627, + "learning_rate": 3.153452639828669e-07, + "loss": 1.1353, + "step": 153393 + }, + { + "epoch": 1.84, + "grad_norm": 7.394699304346027, + "learning_rate": 3.151997338348234e-07, + "loss": 1.1138, + "step": 153396 + }, + { + "epoch": 1.84, + "grad_norm": 7.256330248309889, + "learning_rate": 3.1505423673751536e-07, + "loss": 0.868, + "step": 153399 + }, + { + "epoch": 1.84, + "grad_norm": 5.591644578186641, + "learning_rate": 3.1490877269143903e-07, + "loss": 0.9141, + "step": 153402 + }, + { + "epoch": 1.84, + "grad_norm": 8.940179875295346, + "learning_rate": 3.147633416970897e-07, + "loss": 1.2085, + "step": 153405 + }, + { + "epoch": 1.84, + "grad_norm": 5.796682174812059, + "learning_rate": 3.146179437549646e-07, + "loss": 1.0413, + "step": 153408 + }, + { + "epoch": 1.84, + "grad_norm": 3.6891267672921595, + "learning_rate": 3.1447257886556006e-07, + "loss": 1.0388, + "step": 153411 + }, + { + "epoch": 1.84, + "grad_norm": 90.49054042562797, + "learning_rate": 3.1432724702937124e-07, + "loss": 1.2644, + "step": 153414 + }, + { + "epoch": 1.84, + "grad_norm": 15.193984333661163, + "learning_rate": 3.1418194824689443e-07, + "loss": 1.2742, + "step": 153417 + }, + { + "epoch": 1.84, + "grad_norm": 9.580815664219504, + "learning_rate": 3.1403668251862806e-07, + "loss": 1.3055, + "step": 153420 + }, + { + "epoch": 1.84, + "grad_norm": 9.503611251624303, + "learning_rate": 3.1389144984506405e-07, + "loss": 1.2864, + "step": 153423 + }, + { + "epoch": 1.84, + "grad_norm": 4.226081169549703, + "learning_rate": 3.137462502266997e-07, + "loss": 1.2712, + "step": 153426 + }, + { + "epoch": 1.84, + "grad_norm": 3.950146897912107, + "learning_rate": 3.1360108366403017e-07, + "loss": 1.2057, + "step": 153429 + }, + { + "epoch": 1.84, + "grad_norm": 8.364432192251483, + "learning_rate": 3.134559501575507e-07, + "loss": 1.146, + "step": 153432 + }, + { + "epoch": 1.85, + "grad_norm": 3.7361506354130234, + "learning_rate": 3.133108497077586e-07, + "loss": 0.7728, + "step": 153435 + }, + { + "epoch": 1.85, + "grad_norm": 4.679479393697982, + "learning_rate": 3.131657823151457e-07, + "loss": 1.4232, + "step": 153438 + }, + { + "epoch": 1.85, + "grad_norm": 24.93086854022638, + "learning_rate": 3.1302074798020944e-07, + "loss": 1.0752, + "step": 153441 + }, + { + "epoch": 1.85, + "grad_norm": 9.64496817340079, + "learning_rate": 3.128757467034449e-07, + "loss": 0.9919, + "step": 153444 + }, + { + "epoch": 1.85, + "grad_norm": 15.573819317036293, + "learning_rate": 3.1273077848534616e-07, + "loss": 1.021, + "step": 153447 + }, + { + "epoch": 1.85, + "grad_norm": 5.171452445213976, + "learning_rate": 3.125858433264073e-07, + "loss": 1.2794, + "step": 153450 + }, + { + "epoch": 1.85, + "grad_norm": 10.257590980016158, + "learning_rate": 3.1244094122712345e-07, + "loss": 0.9427, + "step": 153453 + }, + { + "epoch": 1.85, + "grad_norm": 11.892723342677407, + "learning_rate": 3.1229607218798864e-07, + "loss": 1.1821, + "step": 153456 + }, + { + "epoch": 1.85, + "grad_norm": 5.154693130342435, + "learning_rate": 3.1215123620949807e-07, + "loss": 1.3868, + "step": 153459 + }, + { + "epoch": 1.85, + "grad_norm": 5.601792425984573, + "learning_rate": 3.12006433292148e-07, + "loss": 1.7399, + "step": 153462 + }, + { + "epoch": 1.85, + "grad_norm": 4.530837153613994, + "learning_rate": 3.118616634364291e-07, + "loss": 1.3761, + "step": 153465 + }, + { + "epoch": 1.85, + "grad_norm": 12.26990201344517, + "learning_rate": 3.117169266428355e-07, + "loss": 1.1334, + "step": 153468 + }, + { + "epoch": 1.85, + "grad_norm": 4.013530639713066, + "learning_rate": 3.115722229118634e-07, + "loss": 0.9408, + "step": 153471 + }, + { + "epoch": 1.85, + "grad_norm": 3.8654370111656906, + "learning_rate": 3.114275522440058e-07, + "loss": 1.481, + "step": 153474 + }, + { + "epoch": 1.85, + "grad_norm": 3.952284876888735, + "learning_rate": 3.1128291463975446e-07, + "loss": 1.4375, + "step": 153477 + }, + { + "epoch": 1.85, + "grad_norm": 4.949724948069902, + "learning_rate": 3.111383100996057e-07, + "loss": 1.2213, + "step": 153480 + }, + { + "epoch": 1.85, + "grad_norm": 11.375624973411977, + "learning_rate": 3.109937386240514e-07, + "loss": 0.9044, + "step": 153483 + }, + { + "epoch": 1.85, + "grad_norm": 2.877883502003064, + "learning_rate": 3.108492002135877e-07, + "loss": 1.0604, + "step": 153486 + }, + { + "epoch": 1.85, + "grad_norm": 2.790402933232955, + "learning_rate": 3.1070469486870426e-07, + "loss": 1.1851, + "step": 153489 + }, + { + "epoch": 1.85, + "grad_norm": 15.265713249780775, + "learning_rate": 3.1056022258989514e-07, + "loss": 1.1839, + "step": 153492 + }, + { + "epoch": 1.85, + "grad_norm": 8.864978347952565, + "learning_rate": 3.1041578337765446e-07, + "loss": 1.3025, + "step": 153495 + }, + { + "epoch": 1.85, + "grad_norm": 8.982823762646765, + "learning_rate": 3.102713772324739e-07, + "loss": 1.4097, + "step": 153498 + }, + { + "epoch": 1.85, + "grad_norm": 4.894023886364965, + "learning_rate": 3.1012700415484765e-07, + "loss": 0.8678, + "step": 153501 + }, + { + "epoch": 1.85, + "grad_norm": 8.342429929971273, + "learning_rate": 3.099826641452675e-07, + "loss": 0.9355, + "step": 153504 + }, + { + "epoch": 1.85, + "grad_norm": 5.636491277949286, + "learning_rate": 3.0983835720422626e-07, + "loss": 1.0039, + "step": 153507 + }, + { + "epoch": 1.85, + "grad_norm": 5.031485122120822, + "learning_rate": 3.0969408333221596e-07, + "loss": 1.2819, + "step": 153510 + }, + { + "epoch": 1.85, + "grad_norm": 15.617116121546555, + "learning_rate": 3.095498425297294e-07, + "loss": 1.1821, + "step": 153513 + }, + { + "epoch": 1.85, + "grad_norm": 8.823659109538498, + "learning_rate": 3.094056347972596e-07, + "loss": 1.1258, + "step": 153516 + }, + { + "epoch": 1.85, + "grad_norm": 8.750974440154405, + "learning_rate": 3.092614601352961e-07, + "loss": 1.4045, + "step": 153519 + }, + { + "epoch": 1.85, + "grad_norm": 6.815760012426249, + "learning_rate": 3.0911731854433303e-07, + "loss": 1.2479, + "step": 153522 + }, + { + "epoch": 1.85, + "grad_norm": 12.703712934058595, + "learning_rate": 3.0897321002486215e-07, + "loss": 1.0431, + "step": 153525 + }, + { + "epoch": 1.85, + "grad_norm": 4.305285224890566, + "learning_rate": 3.088291345773764e-07, + "loss": 1.229, + "step": 153528 + }, + { + "epoch": 1.85, + "grad_norm": 11.89629559287242, + "learning_rate": 3.086850922023654e-07, + "loss": 1.2625, + "step": 153531 + }, + { + "epoch": 1.85, + "grad_norm": 5.604483995558511, + "learning_rate": 3.0854108290032105e-07, + "loss": 1.3134, + "step": 153534 + }, + { + "epoch": 1.85, + "grad_norm": 16.639511745532126, + "learning_rate": 3.0839710667173394e-07, + "loss": 1.1837, + "step": 153537 + }, + { + "epoch": 1.85, + "grad_norm": 11.048182005879443, + "learning_rate": 3.082531635170982e-07, + "loss": 1.2253, + "step": 153540 + }, + { + "epoch": 1.85, + "grad_norm": 6.278893915868971, + "learning_rate": 3.0810925343690347e-07, + "loss": 1.4427, + "step": 153543 + }, + { + "epoch": 1.85, + "grad_norm": 10.9976071714131, + "learning_rate": 3.079653764316393e-07, + "loss": 1.3399, + "step": 153546 + }, + { + "epoch": 1.85, + "grad_norm": 24.173039817453798, + "learning_rate": 3.078215325018008e-07, + "loss": 1.2723, + "step": 153549 + }, + { + "epoch": 1.85, + "grad_norm": 15.622268833142916, + "learning_rate": 3.0767772164787433e-07, + "loss": 1.1304, + "step": 153552 + }, + { + "epoch": 1.85, + "grad_norm": 6.511135314002444, + "learning_rate": 3.0753394387035395e-07, + "loss": 1.3873, + "step": 153555 + }, + { + "epoch": 1.85, + "grad_norm": 10.751958600795342, + "learning_rate": 3.07390199169727e-07, + "loss": 1.1608, + "step": 153558 + }, + { + "epoch": 1.85, + "grad_norm": 6.840686975373581, + "learning_rate": 3.0724648754648757e-07, + "loss": 1.1204, + "step": 153561 + }, + { + "epoch": 1.85, + "grad_norm": 11.089166756249323, + "learning_rate": 3.071028090011241e-07, + "loss": 1.1125, + "step": 153564 + }, + { + "epoch": 1.85, + "grad_norm": 8.21342110484505, + "learning_rate": 3.0695916353412624e-07, + "loss": 1.2024, + "step": 153567 + }, + { + "epoch": 1.85, + "grad_norm": 7.250444524753364, + "learning_rate": 3.0681555114598803e-07, + "loss": 1.1555, + "step": 153570 + }, + { + "epoch": 1.85, + "grad_norm": 4.9565925556977035, + "learning_rate": 3.0667197183719575e-07, + "loss": 0.958, + "step": 153573 + }, + { + "epoch": 1.85, + "grad_norm": 12.887155214932637, + "learning_rate": 3.0652842560824016e-07, + "loss": 1.1683, + "step": 153576 + }, + { + "epoch": 1.85, + "grad_norm": 3.0301810741036173, + "learning_rate": 3.0638491245961077e-07, + "loss": 1.038, + "step": 153579 + }, + { + "epoch": 1.85, + "grad_norm": 5.883024835139578, + "learning_rate": 3.062414323917995e-07, + "loss": 1.1942, + "step": 153582 + }, + { + "epoch": 1.85, + "grad_norm": 4.404688806747708, + "learning_rate": 3.060979854052937e-07, + "loss": 1.1252, + "step": 153585 + }, + { + "epoch": 1.85, + "grad_norm": 7.261516533972571, + "learning_rate": 3.0595457150058405e-07, + "loss": 1.3323, + "step": 153588 + }, + { + "epoch": 1.85, + "grad_norm": 5.539514051802179, + "learning_rate": 3.0581119067816024e-07, + "loss": 0.905, + "step": 153591 + }, + { + "epoch": 1.85, + "grad_norm": 5.616697263415932, + "learning_rate": 3.056678429385096e-07, + "loss": 1.4154, + "step": 153594 + }, + { + "epoch": 1.85, + "grad_norm": 12.156137223471468, + "learning_rate": 3.05524528282124e-07, + "loss": 1.1877, + "step": 153597 + }, + { + "epoch": 1.85, + "grad_norm": 9.497521219512153, + "learning_rate": 3.0538124670948964e-07, + "loss": 1.2746, + "step": 153600 + }, + { + "epoch": 1.85, + "grad_norm": 5.009737507963441, + "learning_rate": 3.0523799822109846e-07, + "loss": 1.0395, + "step": 153603 + }, + { + "epoch": 1.85, + "grad_norm": 28.60292269499385, + "learning_rate": 3.0509478281743663e-07, + "loss": 1.5489, + "step": 153606 + }, + { + "epoch": 1.85, + "grad_norm": 5.502280765755966, + "learning_rate": 3.049516004989961e-07, + "loss": 1.3048, + "step": 153609 + }, + { + "epoch": 1.85, + "grad_norm": 5.782637075291299, + "learning_rate": 3.0480845126626304e-07, + "loss": 0.7896, + "step": 153612 + }, + { + "epoch": 1.85, + "grad_norm": 3.5215231312144333, + "learning_rate": 3.0466533511972594e-07, + "loss": 1.3404, + "step": 153615 + }, + { + "epoch": 1.85, + "grad_norm": 12.18536083562203, + "learning_rate": 3.0452225205987453e-07, + "loss": 1.107, + "step": 153618 + }, + { + "epoch": 1.85, + "grad_norm": 13.115939680399917, + "learning_rate": 3.0437920208719494e-07, + "loss": 1.2109, + "step": 153621 + }, + { + "epoch": 1.85, + "grad_norm": 4.283172850628653, + "learning_rate": 3.0423618520217915e-07, + "loss": 0.954, + "step": 153624 + }, + { + "epoch": 1.85, + "grad_norm": 4.7602932589962, + "learning_rate": 3.0409320140531107e-07, + "loss": 1.1407, + "step": 153627 + }, + { + "epoch": 1.85, + "grad_norm": 14.765347166169047, + "learning_rate": 3.039502506970804e-07, + "loss": 1.0008, + "step": 153630 + }, + { + "epoch": 1.85, + "grad_norm": 6.01760123190846, + "learning_rate": 3.038073330779756e-07, + "loss": 1.1259, + "step": 153633 + }, + { + "epoch": 1.85, + "grad_norm": 11.591068712782263, + "learning_rate": 3.036644485484841e-07, + "loss": 0.9626, + "step": 153636 + }, + { + "epoch": 1.85, + "grad_norm": 8.654201214661885, + "learning_rate": 3.035215971090932e-07, + "loss": 1.3681, + "step": 153639 + }, + { + "epoch": 1.85, + "grad_norm": 27.15880473859314, + "learning_rate": 3.0337877876028933e-07, + "loss": 0.939, + "step": 153642 + }, + { + "epoch": 1.85, + "grad_norm": 13.585477067515605, + "learning_rate": 3.0323599350256194e-07, + "loss": 1.122, + "step": 153645 + }, + { + "epoch": 1.85, + "grad_norm": 5.114100257685984, + "learning_rate": 3.030932413363963e-07, + "loss": 1.0029, + "step": 153648 + }, + { + "epoch": 1.85, + "grad_norm": 7.180012160295954, + "learning_rate": 3.0295052226228305e-07, + "loss": 1.0815, + "step": 153651 + }, + { + "epoch": 1.85, + "grad_norm": 3.519631076474245, + "learning_rate": 3.028078362807041e-07, + "loss": 1.3595, + "step": 153654 + }, + { + "epoch": 1.85, + "grad_norm": 16.586119296179273, + "learning_rate": 3.026651833921512e-07, + "loss": 1.304, + "step": 153657 + }, + { + "epoch": 1.85, + "grad_norm": 27.458791433110996, + "learning_rate": 3.025225635971074e-07, + "loss": 1.0698, + "step": 153660 + }, + { + "epoch": 1.85, + "grad_norm": 11.344874253001002, + "learning_rate": 3.0237997689606224e-07, + "loss": 1.0004, + "step": 153663 + }, + { + "epoch": 1.85, + "grad_norm": 10.71519603638212, + "learning_rate": 3.022374232895009e-07, + "loss": 1.0899, + "step": 153666 + }, + { + "epoch": 1.85, + "grad_norm": 6.404628713849228, + "learning_rate": 3.0209490277790963e-07, + "loss": 1.0941, + "step": 153669 + }, + { + "epoch": 1.85, + "grad_norm": 10.912544412768163, + "learning_rate": 3.0195241536177476e-07, + "loss": 1.1153, + "step": 153672 + }, + { + "epoch": 1.85, + "grad_norm": 3.7715929160327653, + "learning_rate": 3.0180996104158476e-07, + "loss": 1.0393, + "step": 153675 + }, + { + "epoch": 1.85, + "grad_norm": 2.532089341559007, + "learning_rate": 3.016675398178226e-07, + "loss": 1.2854, + "step": 153678 + }, + { + "epoch": 1.85, + "grad_norm": 10.225531909729838, + "learning_rate": 3.015251516909767e-07, + "loss": 1.1665, + "step": 153681 + }, + { + "epoch": 1.85, + "grad_norm": 12.553578457664084, + "learning_rate": 3.0138279666153236e-07, + "loss": 1.377, + "step": 153684 + }, + { + "epoch": 1.85, + "grad_norm": 11.99679939930491, + "learning_rate": 3.012404747299735e-07, + "loss": 1.034, + "step": 153687 + }, + { + "epoch": 1.85, + "grad_norm": 9.444034846904417, + "learning_rate": 3.0109818589678983e-07, + "loss": 0.9431, + "step": 153690 + }, + { + "epoch": 1.85, + "grad_norm": 21.549031525693238, + "learning_rate": 3.0095593016246205e-07, + "loss": 1.0818, + "step": 153693 + }, + { + "epoch": 1.85, + "grad_norm": 21.730486999166892, + "learning_rate": 3.008137075274786e-07, + "loss": 1.2899, + "step": 153696 + }, + { + "epoch": 1.85, + "grad_norm": 14.088046755655482, + "learning_rate": 3.0067151799232583e-07, + "loss": 1.4077, + "step": 153699 + }, + { + "epoch": 1.85, + "grad_norm": 10.216935442215895, + "learning_rate": 3.0052936155748666e-07, + "loss": 0.9029, + "step": 153702 + }, + { + "epoch": 1.85, + "grad_norm": 8.583564695978897, + "learning_rate": 3.003872382234474e-07, + "loss": 1.1725, + "step": 153705 + }, + { + "epoch": 1.85, + "grad_norm": 10.511301809705525, + "learning_rate": 3.0024514799069204e-07, + "loss": 1.253, + "step": 153708 + }, + { + "epoch": 1.85, + "grad_norm": 3.799707394374147, + "learning_rate": 3.001030908597058e-07, + "loss": 1.1653, + "step": 153711 + }, + { + "epoch": 1.85, + "grad_norm": 6.945314272705592, + "learning_rate": 2.9996106683097383e-07, + "loss": 1.24, + "step": 153714 + }, + { + "epoch": 1.85, + "grad_norm": 6.610299816140675, + "learning_rate": 2.9981907590498236e-07, + "loss": 1.4096, + "step": 153717 + }, + { + "epoch": 1.85, + "grad_norm": 8.536524199596021, + "learning_rate": 2.9967711808221336e-07, + "loss": 1.3171, + "step": 153720 + }, + { + "epoch": 1.85, + "grad_norm": 16.13613014701247, + "learning_rate": 2.9953519336315296e-07, + "loss": 1.1831, + "step": 153723 + }, + { + "epoch": 1.85, + "grad_norm": 6.361224321906297, + "learning_rate": 2.993933017482842e-07, + "loss": 1.2562, + "step": 153726 + }, + { + "epoch": 1.85, + "grad_norm": 4.183865814006821, + "learning_rate": 2.992514432380911e-07, + "loss": 0.8955, + "step": 153729 + }, + { + "epoch": 1.85, + "grad_norm": 6.235938474861154, + "learning_rate": 2.991096178330599e-07, + "loss": 1.2952, + "step": 153732 + }, + { + "epoch": 1.85, + "grad_norm": 3.137064380766402, + "learning_rate": 2.9896782553367256e-07, + "loss": 1.2425, + "step": 153735 + }, + { + "epoch": 1.85, + "grad_norm": 3.054490617748204, + "learning_rate": 2.9882606634041413e-07, + "loss": 1.15, + "step": 153738 + }, + { + "epoch": 1.85, + "grad_norm": 4.942242090736688, + "learning_rate": 2.986843402537687e-07, + "loss": 0.8928, + "step": 153741 + }, + { + "epoch": 1.85, + "grad_norm": 11.61950047290567, + "learning_rate": 2.9854264727421923e-07, + "loss": 1.4819, + "step": 153744 + }, + { + "epoch": 1.85, + "grad_norm": 4.985448668056827, + "learning_rate": 2.9840098740224865e-07, + "loss": 1.1068, + "step": 153747 + }, + { + "epoch": 1.85, + "grad_norm": 10.943845863018154, + "learning_rate": 2.9825936063833994e-07, + "loss": 1.3805, + "step": 153750 + }, + { + "epoch": 1.85, + "grad_norm": 4.273168783270213, + "learning_rate": 2.981177669829782e-07, + "loss": 1.1329, + "step": 153753 + }, + { + "epoch": 1.85, + "grad_norm": 7.8393636941674245, + "learning_rate": 2.9797620643664646e-07, + "loss": 1.1365, + "step": 153756 + }, + { + "epoch": 1.85, + "grad_norm": 3.6481122912708512, + "learning_rate": 2.978346789998277e-07, + "loss": 0.855, + "step": 153759 + }, + { + "epoch": 1.85, + "grad_norm": 9.47475991597656, + "learning_rate": 2.976931846730025e-07, + "loss": 1.1392, + "step": 153762 + }, + { + "epoch": 1.85, + "grad_norm": 3.915162513586386, + "learning_rate": 2.975517234566583e-07, + "loss": 1.2317, + "step": 153765 + }, + { + "epoch": 1.85, + "grad_norm": 17.57987952747573, + "learning_rate": 2.9741029535127364e-07, + "loss": 0.8237, + "step": 153768 + }, + { + "epoch": 1.85, + "grad_norm": 5.608200876004089, + "learning_rate": 2.972689003573337e-07, + "loss": 1.0387, + "step": 153771 + }, + { + "epoch": 1.85, + "grad_norm": 5.891034855680676, + "learning_rate": 2.971275384753192e-07, + "loss": 0.942, + "step": 153774 + }, + { + "epoch": 1.85, + "grad_norm": 17.734336685302406, + "learning_rate": 2.96986209705713e-07, + "loss": 1.0724, + "step": 153777 + }, + { + "epoch": 1.85, + "grad_norm": 9.74884134450532, + "learning_rate": 2.968449140489982e-07, + "loss": 0.9593, + "step": 153780 + }, + { + "epoch": 1.85, + "grad_norm": 5.128097745930947, + "learning_rate": 2.967036515056576e-07, + "loss": 1.2194, + "step": 153783 + }, + { + "epoch": 1.85, + "grad_norm": 9.332972086819304, + "learning_rate": 2.9656242207617314e-07, + "loss": 1.2367, + "step": 153786 + }, + { + "epoch": 1.85, + "grad_norm": 19.249437922200094, + "learning_rate": 2.9642122576102326e-07, + "loss": 1.2276, + "step": 153789 + }, + { + "epoch": 1.85, + "grad_norm": 11.67723429708129, + "learning_rate": 2.962800625606943e-07, + "loss": 1.1993, + "step": 153792 + }, + { + "epoch": 1.85, + "grad_norm": 34.319953581492484, + "learning_rate": 2.9613893247566474e-07, + "loss": 0.7589, + "step": 153795 + }, + { + "epoch": 1.85, + "grad_norm": 12.146999477794918, + "learning_rate": 2.959978355064197e-07, + "loss": 0.9791, + "step": 153798 + }, + { + "epoch": 1.85, + "grad_norm": 7.199809515446912, + "learning_rate": 2.9585677165343773e-07, + "loss": 1.0388, + "step": 153801 + }, + { + "epoch": 1.85, + "grad_norm": 3.8531642057906836, + "learning_rate": 2.957157409172007e-07, + "loss": 0.7413, + "step": 153804 + }, + { + "epoch": 1.85, + "grad_norm": 7.221571315396892, + "learning_rate": 2.955747432981915e-07, + "loss": 1.372, + "step": 153807 + }, + { + "epoch": 1.85, + "grad_norm": 6.856390178248215, + "learning_rate": 2.9543377879688864e-07, + "loss": 0.9927, + "step": 153810 + }, + { + "epoch": 1.85, + "grad_norm": 6.486755048955321, + "learning_rate": 2.952928474137762e-07, + "loss": 0.9803, + "step": 153813 + }, + { + "epoch": 1.85, + "grad_norm": 10.901559883641067, + "learning_rate": 2.9515194914933267e-07, + "loss": 1.6419, + "step": 153816 + }, + { + "epoch": 1.85, + "grad_norm": 3.7424888570007586, + "learning_rate": 2.950110840040399e-07, + "loss": 0.9274, + "step": 153819 + }, + { + "epoch": 1.85, + "grad_norm": 17.751420597080735, + "learning_rate": 2.948702519783786e-07, + "loss": 1.0474, + "step": 153822 + }, + { + "epoch": 1.85, + "grad_norm": 7.229872054149474, + "learning_rate": 2.947294530728306e-07, + "loss": 1.1482, + "step": 153825 + }, + { + "epoch": 1.85, + "grad_norm": 24.064718688369005, + "learning_rate": 2.945886872878756e-07, + "loss": 0.8675, + "step": 153828 + }, + { + "epoch": 1.85, + "grad_norm": 6.508908284553648, + "learning_rate": 2.944479546239909e-07, + "loss": 1.1884, + "step": 153831 + }, + { + "epoch": 1.85, + "grad_norm": 10.652929019612918, + "learning_rate": 2.943072550816606e-07, + "loss": 1.1644, + "step": 153834 + }, + { + "epoch": 1.85, + "grad_norm": 16.932130348955106, + "learning_rate": 2.9416658866136317e-07, + "loss": 1.3746, + "step": 153837 + }, + { + "epoch": 1.85, + "grad_norm": 10.525017924899906, + "learning_rate": 2.9402595536358046e-07, + "loss": 1.2812, + "step": 153840 + }, + { + "epoch": 1.85, + "grad_norm": 5.703432399816099, + "learning_rate": 2.938853551887899e-07, + "loss": 1.3189, + "step": 153843 + }, + { + "epoch": 1.85, + "grad_norm": 8.805565371390694, + "learning_rate": 2.9374478813747333e-07, + "loss": 1.212, + "step": 153846 + }, + { + "epoch": 1.85, + "grad_norm": 10.986596849539737, + "learning_rate": 2.936042542101092e-07, + "loss": 1.086, + "step": 153849 + }, + { + "epoch": 1.85, + "grad_norm": 9.993778525830077, + "learning_rate": 2.9346375340717826e-07, + "loss": 1.1318, + "step": 153852 + }, + { + "epoch": 1.85, + "grad_norm": 17.088349364335862, + "learning_rate": 2.9332328572915904e-07, + "loss": 1.0612, + "step": 153855 + }, + { + "epoch": 1.85, + "grad_norm": 11.115822993368688, + "learning_rate": 2.9318285117653e-07, + "loss": 0.9405, + "step": 153858 + }, + { + "epoch": 1.85, + "grad_norm": 5.192353483388538, + "learning_rate": 2.9304244974977194e-07, + "loss": 1.3366, + "step": 153861 + }, + { + "epoch": 1.85, + "grad_norm": 3.1265721959093487, + "learning_rate": 2.929020814493633e-07, + "loss": 1.0567, + "step": 153864 + }, + { + "epoch": 1.85, + "grad_norm": 11.15451258519724, + "learning_rate": 2.927617462757848e-07, + "loss": 1.4694, + "step": 153867 + }, + { + "epoch": 1.85, + "grad_norm": 6.630250636026753, + "learning_rate": 2.926214442295139e-07, + "loss": 1.2901, + "step": 153870 + }, + { + "epoch": 1.85, + "grad_norm": 11.292497319630089, + "learning_rate": 2.924811753110279e-07, + "loss": 0.9427, + "step": 153873 + }, + { + "epoch": 1.85, + "grad_norm": 4.998293393960346, + "learning_rate": 2.9234093952080657e-07, + "loss": 1.335, + "step": 153876 + }, + { + "epoch": 1.85, + "grad_norm": 11.314969537650784, + "learning_rate": 2.922007368593316e-07, + "loss": 1.0651, + "step": 153879 + }, + { + "epoch": 1.85, + "grad_norm": 12.303983002374101, + "learning_rate": 2.92060567327076e-07, + "loss": 1.0231, + "step": 153882 + }, + { + "epoch": 1.85, + "grad_norm": 6.401494414198358, + "learning_rate": 2.919204309245216e-07, + "loss": 0.9541, + "step": 153885 + }, + { + "epoch": 1.85, + "grad_norm": 10.909927676644992, + "learning_rate": 2.917803276521469e-07, + "loss": 1.2621, + "step": 153888 + }, + { + "epoch": 1.85, + "grad_norm": 9.620660957168818, + "learning_rate": 2.9164025751042826e-07, + "loss": 1.0456, + "step": 153891 + }, + { + "epoch": 1.85, + "grad_norm": 6.95462325152237, + "learning_rate": 2.9150022049984625e-07, + "loss": 1.1353, + "step": 153894 + }, + { + "epoch": 1.85, + "grad_norm": 10.824149807734466, + "learning_rate": 2.913602166208751e-07, + "loss": 1.4111, + "step": 153897 + }, + { + "epoch": 1.85, + "grad_norm": 9.44694620984778, + "learning_rate": 2.9122024587399435e-07, + "loss": 1.0897, + "step": 153900 + }, + { + "epoch": 1.85, + "grad_norm": 14.448646481398722, + "learning_rate": 2.9108030825968245e-07, + "loss": 1.2136, + "step": 153903 + }, + { + "epoch": 1.85, + "grad_norm": 3.731614030960319, + "learning_rate": 2.9094040377841694e-07, + "loss": 1.2566, + "step": 153906 + }, + { + "epoch": 1.85, + "grad_norm": 8.719506686231954, + "learning_rate": 2.9080053243067285e-07, + "loss": 1.089, + "step": 153909 + }, + { + "epoch": 1.85, + "grad_norm": 16.82099025220015, + "learning_rate": 2.90660694216931e-07, + "loss": 1.0553, + "step": 153912 + }, + { + "epoch": 1.85, + "grad_norm": 3.440723858945451, + "learning_rate": 2.905208891376665e-07, + "loss": 1.3093, + "step": 153915 + }, + { + "epoch": 1.85, + "grad_norm": 5.116991764252016, + "learning_rate": 2.9038111719335683e-07, + "loss": 0.9697, + "step": 153918 + }, + { + "epoch": 1.85, + "grad_norm": 16.3302873166697, + "learning_rate": 2.902413783844793e-07, + "loss": 0.7873, + "step": 153921 + }, + { + "epoch": 1.85, + "grad_norm": 6.003809702502172, + "learning_rate": 2.901016727115091e-07, + "loss": 1.0092, + "step": 153924 + }, + { + "epoch": 1.85, + "grad_norm": 12.464690423127127, + "learning_rate": 2.8996200017492595e-07, + "loss": 0.8557, + "step": 153927 + }, + { + "epoch": 1.85, + "grad_norm": 13.000440922889252, + "learning_rate": 2.898223607752038e-07, + "loss": 1.082, + "step": 153930 + }, + { + "epoch": 1.85, + "grad_norm": 7.639333101451494, + "learning_rate": 2.8968275451282114e-07, + "loss": 1.0124, + "step": 153933 + }, + { + "epoch": 1.85, + "grad_norm": 27.849069444464106, + "learning_rate": 2.895431813882543e-07, + "loss": 1.0044, + "step": 153936 + }, + { + "epoch": 1.85, + "grad_norm": 10.371027023765215, + "learning_rate": 2.894036414019774e-07, + "loss": 1.4678, + "step": 153939 + }, + { + "epoch": 1.85, + "grad_norm": 11.452968557399277, + "learning_rate": 2.892641345544689e-07, + "loss": 1.5673, + "step": 153942 + }, + { + "epoch": 1.85, + "grad_norm": 18.290055237996096, + "learning_rate": 2.8912466084620397e-07, + "loss": 1.364, + "step": 153945 + }, + { + "epoch": 1.85, + "grad_norm": 8.300509199455691, + "learning_rate": 2.8898522027765885e-07, + "loss": 1.2508, + "step": 153948 + }, + { + "epoch": 1.85, + "grad_norm": 3.1056303825754275, + "learning_rate": 2.888458128493088e-07, + "loss": 0.8623, + "step": 153951 + }, + { + "epoch": 1.85, + "grad_norm": 6.939735974849131, + "learning_rate": 2.8870643856163006e-07, + "loss": 0.9531, + "step": 153954 + }, + { + "epoch": 1.85, + "grad_norm": 9.95492613290988, + "learning_rate": 2.885670974150989e-07, + "loss": 1.0133, + "step": 153957 + }, + { + "epoch": 1.85, + "grad_norm": 7.5213993599870514, + "learning_rate": 2.8842778941019055e-07, + "loss": 1.1615, + "step": 153960 + }, + { + "epoch": 1.85, + "grad_norm": 22.316515437946194, + "learning_rate": 2.8828851454737906e-07, + "loss": 1.1759, + "step": 153963 + }, + { + "epoch": 1.85, + "grad_norm": 4.950555803867461, + "learning_rate": 2.8814927282714065e-07, + "loss": 1.3505, + "step": 153966 + }, + { + "epoch": 1.85, + "grad_norm": 5.03102949954481, + "learning_rate": 2.880100642499506e-07, + "loss": 1.0566, + "step": 153969 + }, + { + "epoch": 1.85, + "grad_norm": 12.689953583454697, + "learning_rate": 2.87870888816284e-07, + "loss": 1.139, + "step": 153972 + }, + { + "epoch": 1.85, + "grad_norm": 5.09263454282479, + "learning_rate": 2.877317465266172e-07, + "loss": 1.1031, + "step": 153975 + }, + { + "epoch": 1.85, + "grad_norm": 10.02817705749414, + "learning_rate": 2.8759263738142305e-07, + "loss": 1.1136, + "step": 153978 + }, + { + "epoch": 1.85, + "grad_norm": 7.9615142759012905, + "learning_rate": 2.8745356138117573e-07, + "loss": 1.4265, + "step": 153981 + }, + { + "epoch": 1.85, + "grad_norm": 4.273800714451941, + "learning_rate": 2.873145185263515e-07, + "loss": 1.2135, + "step": 153984 + }, + { + "epoch": 1.85, + "grad_norm": 14.483267168793148, + "learning_rate": 2.871755088174255e-07, + "loss": 1.3938, + "step": 153987 + }, + { + "epoch": 1.85, + "grad_norm": 4.781356357708957, + "learning_rate": 2.870365322548685e-07, + "loss": 0.824, + "step": 153990 + }, + { + "epoch": 1.85, + "grad_norm": 11.830546055797218, + "learning_rate": 2.86897588839159e-07, + "loss": 1.3617, + "step": 153993 + }, + { + "epoch": 1.85, + "grad_norm": 3.8232129386149905, + "learning_rate": 2.867586785707688e-07, + "loss": 0.8988, + "step": 153996 + }, + { + "epoch": 1.85, + "grad_norm": 11.025762919815989, + "learning_rate": 2.866198014501731e-07, + "loss": 1.0421, + "step": 153999 + }, + { + "epoch": 1.85, + "grad_norm": 9.054539280126653, + "learning_rate": 2.864809574778449e-07, + "loss": 1.3214, + "step": 154002 + }, + { + "epoch": 1.85, + "grad_norm": 6.690108231905673, + "learning_rate": 2.863421466542582e-07, + "loss": 0.9402, + "step": 154005 + }, + { + "epoch": 1.85, + "grad_norm": 7.134869981237318, + "learning_rate": 2.862033689798871e-07, + "loss": 0.9566, + "step": 154008 + }, + { + "epoch": 1.85, + "grad_norm": 9.864479076955872, + "learning_rate": 2.8606462445520456e-07, + "loss": 0.9143, + "step": 154011 + }, + { + "epoch": 1.85, + "grad_norm": 4.190288317653029, + "learning_rate": 2.8592591308068573e-07, + "loss": 0.9201, + "step": 154014 + }, + { + "epoch": 1.85, + "grad_norm": 9.482989537770782, + "learning_rate": 2.857872348568014e-07, + "loss": 0.9669, + "step": 154017 + }, + { + "epoch": 1.85, + "grad_norm": 4.337268169177068, + "learning_rate": 2.8564858978402665e-07, + "loss": 1.1195, + "step": 154020 + }, + { + "epoch": 1.85, + "grad_norm": 12.279939683804951, + "learning_rate": 2.855099778628345e-07, + "loss": 0.8922, + "step": 154023 + }, + { + "epoch": 1.85, + "grad_norm": 6.969739173122346, + "learning_rate": 2.8537139909369684e-07, + "loss": 1.1114, + "step": 154026 + }, + { + "epoch": 1.85, + "grad_norm": 5.236412358484368, + "learning_rate": 2.8523285347708876e-07, + "loss": 1.074, + "step": 154029 + }, + { + "epoch": 1.85, + "grad_norm": 26.61853011560346, + "learning_rate": 2.85094341013481e-07, + "loss": 1.0296, + "step": 154032 + }, + { + "epoch": 1.85, + "grad_norm": 9.840010042205114, + "learning_rate": 2.8495586170334545e-07, + "loss": 1.0506, + "step": 154035 + }, + { + "epoch": 1.85, + "grad_norm": 8.65406563462984, + "learning_rate": 2.8481741554715726e-07, + "loss": 1.0657, + "step": 154038 + }, + { + "epoch": 1.85, + "grad_norm": 12.109383840226078, + "learning_rate": 2.846790025453894e-07, + "loss": 1.2062, + "step": 154041 + }, + { + "epoch": 1.85, + "grad_norm": 8.203727755663849, + "learning_rate": 2.8454062269851256e-07, + "loss": 1.1198, + "step": 154044 + }, + { + "epoch": 1.85, + "grad_norm": 6.603258651477641, + "learning_rate": 2.8440227600699753e-07, + "loss": 1.1321, + "step": 154047 + }, + { + "epoch": 1.85, + "grad_norm": 10.141349364765349, + "learning_rate": 2.8426396247131725e-07, + "loss": 1.2399, + "step": 154050 + }, + { + "epoch": 1.85, + "grad_norm": 2.5488099696022366, + "learning_rate": 2.841256820919458e-07, + "loss": 1.0393, + "step": 154053 + }, + { + "epoch": 1.85, + "grad_norm": 7.138418253671092, + "learning_rate": 2.8398743486935385e-07, + "loss": 1.0493, + "step": 154056 + }, + { + "epoch": 1.85, + "grad_norm": 11.337775089078693, + "learning_rate": 2.8384922080401333e-07, + "loss": 0.9109, + "step": 154059 + }, + { + "epoch": 1.85, + "grad_norm": 2.3417348424385565, + "learning_rate": 2.83711039896396e-07, + "loss": 1.2221, + "step": 154062 + }, + { + "epoch": 1.85, + "grad_norm": 5.956428449796756, + "learning_rate": 2.835728921469716e-07, + "loss": 1.2157, + "step": 154065 + }, + { + "epoch": 1.85, + "grad_norm": 15.230857362128383, + "learning_rate": 2.834347775562141e-07, + "loss": 1.202, + "step": 154068 + }, + { + "epoch": 1.85, + "grad_norm": 13.044870844635941, + "learning_rate": 2.832966961245931e-07, + "loss": 1.6657, + "step": 154071 + }, + { + "epoch": 1.85, + "grad_norm": 8.160068814011062, + "learning_rate": 2.831586478525805e-07, + "loss": 0.9402, + "step": 154074 + }, + { + "epoch": 1.85, + "grad_norm": 8.960622122590204, + "learning_rate": 2.8302063274064817e-07, + "loss": 1.4889, + "step": 154077 + }, + { + "epoch": 1.85, + "grad_norm": 7.46485112176267, + "learning_rate": 2.828826507892657e-07, + "loss": 1.2418, + "step": 154080 + }, + { + "epoch": 1.85, + "grad_norm": 7.466160507922695, + "learning_rate": 2.82744701998906e-07, + "loss": 1.0537, + "step": 154083 + }, + { + "epoch": 1.85, + "grad_norm": 7.035432079311896, + "learning_rate": 2.826067863700377e-07, + "loss": 1.1907, + "step": 154086 + }, + { + "epoch": 1.85, + "grad_norm": 22.309666434277638, + "learning_rate": 2.824689039031314e-07, + "loss": 0.9502, + "step": 154089 + }, + { + "epoch": 1.85, + "grad_norm": 6.860591812683106, + "learning_rate": 2.8233105459865904e-07, + "loss": 0.9803, + "step": 154092 + }, + { + "epoch": 1.85, + "grad_norm": 19.635646136351095, + "learning_rate": 2.8219323845709133e-07, + "loss": 0.8483, + "step": 154095 + }, + { + "epoch": 1.85, + "grad_norm": 9.252460422595279, + "learning_rate": 2.820554554788968e-07, + "loss": 1.1454, + "step": 154098 + }, + { + "epoch": 1.85, + "grad_norm": 15.767860169527014, + "learning_rate": 2.819177056645461e-07, + "loss": 0.9538, + "step": 154101 + }, + { + "epoch": 1.85, + "grad_norm": 17.82332624574739, + "learning_rate": 2.817799890145123e-07, + "loss": 1.4759, + "step": 154104 + }, + { + "epoch": 1.85, + "grad_norm": 7.112967914353596, + "learning_rate": 2.816423055292605e-07, + "loss": 1.66, + "step": 154107 + }, + { + "epoch": 1.85, + "grad_norm": 10.107712068437635, + "learning_rate": 2.8150465520926375e-07, + "loss": 1.1193, + "step": 154110 + }, + { + "epoch": 1.85, + "grad_norm": 16.647868642985983, + "learning_rate": 2.8136703805499154e-07, + "loss": 1.1419, + "step": 154113 + }, + { + "epoch": 1.85, + "grad_norm": 3.3628293006395014, + "learning_rate": 2.812294540669114e-07, + "loss": 1.212, + "step": 154116 + }, + { + "epoch": 1.85, + "grad_norm": 9.330285199084043, + "learning_rate": 2.810919032454951e-07, + "loss": 1.1293, + "step": 154119 + }, + { + "epoch": 1.85, + "grad_norm": 5.188209400189324, + "learning_rate": 2.809543855912122e-07, + "loss": 0.8754, + "step": 154122 + }, + { + "epoch": 1.85, + "grad_norm": 3.2118018464113427, + "learning_rate": 2.808169011045314e-07, + "loss": 1.4005, + "step": 154125 + }, + { + "epoch": 1.85, + "grad_norm": 7.876337534694272, + "learning_rate": 2.806794497859211e-07, + "loss": 1.2326, + "step": 154128 + }, + { + "epoch": 1.85, + "grad_norm": 16.7946883963331, + "learning_rate": 2.805420316358509e-07, + "loss": 1.5548, + "step": 154131 + }, + { + "epoch": 1.85, + "grad_norm": 13.41717167530112, + "learning_rate": 2.804046466547894e-07, + "loss": 1.1707, + "step": 154134 + }, + { + "epoch": 1.85, + "grad_norm": 14.140092396970296, + "learning_rate": 2.8026729484320726e-07, + "loss": 1.149, + "step": 154137 + }, + { + "epoch": 1.85, + "grad_norm": 5.389689150919821, + "learning_rate": 2.801299762015697e-07, + "loss": 1.0553, + "step": 154140 + }, + { + "epoch": 1.85, + "grad_norm": 11.412300784670059, + "learning_rate": 2.799926907303485e-07, + "loss": 1.129, + "step": 154143 + }, + { + "epoch": 1.85, + "grad_norm": 4.517764241541057, + "learning_rate": 2.798554384300123e-07, + "loss": 1.0887, + "step": 154146 + }, + { + "epoch": 1.85, + "grad_norm": 10.153491028586023, + "learning_rate": 2.7971821930102727e-07, + "loss": 1.4781, + "step": 154149 + }, + { + "epoch": 1.85, + "grad_norm": 12.499579298803894, + "learning_rate": 2.795810333438631e-07, + "loss": 1.1371, + "step": 154152 + }, + { + "epoch": 1.85, + "grad_norm": 2.4748549848708064, + "learning_rate": 2.7944388055898606e-07, + "loss": 1.177, + "step": 154155 + }, + { + "epoch": 1.85, + "grad_norm": 11.285359460982852, + "learning_rate": 2.7930676094686693e-07, + "loss": 1.4004, + "step": 154158 + }, + { + "epoch": 1.85, + "grad_norm": 27.76197966968994, + "learning_rate": 2.791696745079719e-07, + "loss": 1.3929, + "step": 154161 + }, + { + "epoch": 1.85, + "grad_norm": 4.378447363149908, + "learning_rate": 2.7903262124277077e-07, + "loss": 1.14, + "step": 154164 + }, + { + "epoch": 1.85, + "grad_norm": 10.573731026006776, + "learning_rate": 2.7889560115172856e-07, + "loss": 1.1087, + "step": 154167 + }, + { + "epoch": 1.85, + "grad_norm": 8.760574348428266, + "learning_rate": 2.787586142353149e-07, + "loss": 1.0894, + "step": 154170 + }, + { + "epoch": 1.85, + "grad_norm": 18.20246794300408, + "learning_rate": 2.786216604939951e-07, + "loss": 1.2112, + "step": 154173 + }, + { + "epoch": 1.85, + "grad_norm": 4.024010950136449, + "learning_rate": 2.7848473992823977e-07, + "loss": 1.2357, + "step": 154176 + }, + { + "epoch": 1.85, + "grad_norm": 6.088745475237928, + "learning_rate": 2.7834785253851305e-07, + "loss": 0.9834, + "step": 154179 + }, + { + "epoch": 1.85, + "grad_norm": 4.653564096394064, + "learning_rate": 2.7821099832528344e-07, + "loss": 1.211, + "step": 154182 + }, + { + "epoch": 1.85, + "grad_norm": 6.117017637755089, + "learning_rate": 2.780741772890183e-07, + "loss": 1.4179, + "step": 154185 + }, + { + "epoch": 1.85, + "grad_norm": 10.769074039528558, + "learning_rate": 2.7793738943018513e-07, + "loss": 1.2857, + "step": 154188 + }, + { + "epoch": 1.85, + "grad_norm": 7.555002812869202, + "learning_rate": 2.778006347492479e-07, + "loss": 1.318, + "step": 154191 + }, + { + "epoch": 1.85, + "grad_norm": 6.628195574717576, + "learning_rate": 2.776639132466774e-07, + "loss": 1.3298, + "step": 154194 + }, + { + "epoch": 1.85, + "grad_norm": 4.276408573355655, + "learning_rate": 2.775272249229355e-07, + "loss": 1.1891, + "step": 154197 + }, + { + "epoch": 1.85, + "grad_norm": 7.180270072750819, + "learning_rate": 2.7739056977849175e-07, + "loss": 1.0211, + "step": 154200 + }, + { + "epoch": 1.85, + "grad_norm": 10.109398134286065, + "learning_rate": 2.7725394781381363e-07, + "loss": 1.1134, + "step": 154203 + }, + { + "epoch": 1.85, + "grad_norm": 8.915224161604312, + "learning_rate": 2.771173590293641e-07, + "loss": 1.2371, + "step": 154206 + }, + { + "epoch": 1.85, + "grad_norm": 6.001359654221555, + "learning_rate": 2.7698080342561165e-07, + "loss": 1.3025, + "step": 154209 + }, + { + "epoch": 1.85, + "grad_norm": 14.105831186074317, + "learning_rate": 2.768442810030214e-07, + "loss": 1.15, + "step": 154212 + }, + { + "epoch": 1.85, + "grad_norm": 8.863570293379656, + "learning_rate": 2.7670779176205863e-07, + "loss": 1.0549, + "step": 154215 + }, + { + "epoch": 1.85, + "grad_norm": 9.194311128427149, + "learning_rate": 2.7657133570319073e-07, + "loss": 1.0155, + "step": 154218 + }, + { + "epoch": 1.85, + "grad_norm": 6.646717123129783, + "learning_rate": 2.7643491282688174e-07, + "loss": 1.3488, + "step": 154221 + }, + { + "epoch": 1.85, + "grad_norm": 8.996419319930638, + "learning_rate": 2.7629852313359795e-07, + "loss": 1.2131, + "step": 154224 + }, + { + "epoch": 1.85, + "grad_norm": 13.5060622231488, + "learning_rate": 2.761621666238057e-07, + "loss": 0.9635, + "step": 154227 + }, + { + "epoch": 1.85, + "grad_norm": 14.060725482319983, + "learning_rate": 2.76025843297969e-07, + "loss": 1.4507, + "step": 154230 + }, + { + "epoch": 1.85, + "grad_norm": 7.215569948251899, + "learning_rate": 2.7588955315655306e-07, + "loss": 1.1573, + "step": 154233 + }, + { + "epoch": 1.85, + "grad_norm": 7.737397752731337, + "learning_rate": 2.7575329620002536e-07, + "loss": 1.1112, + "step": 154236 + }, + { + "epoch": 1.85, + "grad_norm": 7.546030849430432, + "learning_rate": 2.756170724288465e-07, + "loss": 1.3036, + "step": 154239 + }, + { + "epoch": 1.85, + "grad_norm": 6.388993982605898, + "learning_rate": 2.754808818434851e-07, + "loss": 1.0091, + "step": 154242 + }, + { + "epoch": 1.85, + "grad_norm": 24.122235351718103, + "learning_rate": 2.7534472444440517e-07, + "loss": 1.3972, + "step": 154245 + }, + { + "epoch": 1.85, + "grad_norm": 4.981478951444586, + "learning_rate": 2.7520860023206976e-07, + "loss": 1.0564, + "step": 154248 + }, + { + "epoch": 1.85, + "grad_norm": 8.86320437105675, + "learning_rate": 2.7507250920694506e-07, + "loss": 1.1146, + "step": 154251 + }, + { + "epoch": 1.85, + "grad_norm": 15.88309402003526, + "learning_rate": 2.749364513694952e-07, + "loss": 1.2004, + "step": 154254 + }, + { + "epoch": 1.85, + "grad_norm": 34.48906259838524, + "learning_rate": 2.748004267201854e-07, + "loss": 1.3436, + "step": 154257 + }, + { + "epoch": 1.85, + "grad_norm": 16.85880758287207, + "learning_rate": 2.7466443525947625e-07, + "loss": 1.106, + "step": 154260 + }, + { + "epoch": 1.85, + "grad_norm": 6.832966292998911, + "learning_rate": 2.745284769878354e-07, + "loss": 1.0629, + "step": 154263 + }, + { + "epoch": 1.86, + "grad_norm": 7.561482073411565, + "learning_rate": 2.7439255190572555e-07, + "loss": 1.2591, + "step": 154266 + }, + { + "epoch": 1.86, + "grad_norm": 8.691667970527938, + "learning_rate": 2.7425666001361095e-07, + "loss": 1.3748, + "step": 154269 + }, + { + "epoch": 1.86, + "grad_norm": 14.557285853171022, + "learning_rate": 2.7412080131195563e-07, + "loss": 1.3791, + "step": 154272 + }, + { + "epoch": 1.86, + "grad_norm": 8.099698582681112, + "learning_rate": 2.739849758012225e-07, + "loss": 1.1052, + "step": 154275 + }, + { + "epoch": 1.86, + "grad_norm": 9.539126093925105, + "learning_rate": 2.738491834818757e-07, + "loss": 0.998, + "step": 154278 + }, + { + "epoch": 1.86, + "grad_norm": 6.129161800207049, + "learning_rate": 2.737134243543771e-07, + "loss": 1.226, + "step": 154281 + }, + { + "epoch": 1.86, + "grad_norm": 24.929955112244734, + "learning_rate": 2.735776984191929e-07, + "loss": 1.2187, + "step": 154284 + }, + { + "epoch": 1.86, + "grad_norm": 35.82725977145913, + "learning_rate": 2.7344200567678283e-07, + "loss": 1.3424, + "step": 154287 + }, + { + "epoch": 1.86, + "grad_norm": 6.251594588844322, + "learning_rate": 2.7330634612761196e-07, + "loss": 1.2325, + "step": 154290 + }, + { + "epoch": 1.86, + "grad_norm": 7.522497305880794, + "learning_rate": 2.731707197721434e-07, + "loss": 0.9747, + "step": 154293 + }, + { + "epoch": 1.86, + "grad_norm": 4.222117804521269, + "learning_rate": 2.730351266108411e-07, + "loss": 0.9559, + "step": 154296 + }, + { + "epoch": 1.86, + "grad_norm": 19.359795230881385, + "learning_rate": 2.728995666441647e-07, + "loss": 1.4339, + "step": 154299 + }, + { + "epoch": 1.86, + "grad_norm": 16.725453501174943, + "learning_rate": 2.727640398725784e-07, + "loss": 1.5045, + "step": 154302 + }, + { + "epoch": 1.86, + "grad_norm": 9.313585094313014, + "learning_rate": 2.72628546296545e-07, + "loss": 1.2009, + "step": 154305 + }, + { + "epoch": 1.86, + "grad_norm": 7.356651358759515, + "learning_rate": 2.7249308591652644e-07, + "loss": 0.8365, + "step": 154308 + }, + { + "epoch": 1.86, + "grad_norm": 5.109302725643676, + "learning_rate": 2.723576587329857e-07, + "loss": 0.6918, + "step": 154311 + }, + { + "epoch": 1.86, + "grad_norm": 3.7579473084446837, + "learning_rate": 2.7222226474638346e-07, + "loss": 1.3051, + "step": 154314 + }, + { + "epoch": 1.86, + "grad_norm": 8.37723671470653, + "learning_rate": 2.7208690395718274e-07, + "loss": 1.5269, + "step": 154317 + }, + { + "epoch": 1.86, + "grad_norm": 5.92095687848171, + "learning_rate": 2.7195157636584644e-07, + "loss": 0.924, + "step": 154320 + }, + { + "epoch": 1.86, + "grad_norm": 3.4761950260953496, + "learning_rate": 2.718162819728343e-07, + "loss": 0.8476, + "step": 154323 + }, + { + "epoch": 1.86, + "grad_norm": 3.34211843636438, + "learning_rate": 2.7168102077861026e-07, + "loss": 1.1513, + "step": 154326 + }, + { + "epoch": 1.86, + "grad_norm": 7.110641963369298, + "learning_rate": 2.71545792783634e-07, + "loss": 1.0666, + "step": 154329 + }, + { + "epoch": 1.86, + "grad_norm": 2.0096673322651593, + "learning_rate": 2.7141059798836745e-07, + "loss": 1.3426, + "step": 154332 + }, + { + "epoch": 1.86, + "grad_norm": 8.47355679284111, + "learning_rate": 2.7127543639327236e-07, + "loss": 1.0036, + "step": 154335 + }, + { + "epoch": 1.86, + "grad_norm": 14.138535640692329, + "learning_rate": 2.7114030799881064e-07, + "loss": 0.8913, + "step": 154338 + }, + { + "epoch": 1.86, + "grad_norm": 11.715345290102773, + "learning_rate": 2.7100521280544303e-07, + "loss": 1.2115, + "step": 154341 + }, + { + "epoch": 1.86, + "grad_norm": 13.370336551681046, + "learning_rate": 2.708701508136291e-07, + "loss": 1.2489, + "step": 154344 + }, + { + "epoch": 1.86, + "grad_norm": 11.920516080684093, + "learning_rate": 2.707351220238319e-07, + "loss": 0.9978, + "step": 154347 + }, + { + "epoch": 1.86, + "grad_norm": 68.8535899539293, + "learning_rate": 2.706001264365099e-07, + "loss": 0.9815, + "step": 154350 + }, + { + "epoch": 1.86, + "grad_norm": 10.710036475078512, + "learning_rate": 2.704651640521272e-07, + "loss": 0.9844, + "step": 154353 + }, + { + "epoch": 1.86, + "grad_norm": 3.2676959646828303, + "learning_rate": 2.7033023487114005e-07, + "loss": 1.3394, + "step": 154356 + }, + { + "epoch": 1.86, + "grad_norm": 10.408886690691327, + "learning_rate": 2.701953388940126e-07, + "loss": 1.2212, + "step": 154359 + }, + { + "epoch": 1.86, + "grad_norm": 7.975787570015095, + "learning_rate": 2.7006047612120444e-07, + "loss": 1.0496, + "step": 154362 + }, + { + "epoch": 1.86, + "grad_norm": 30.91139762490214, + "learning_rate": 2.699256465531741e-07, + "loss": 1.2588, + "step": 154365 + }, + { + "epoch": 1.86, + "grad_norm": 11.01648838599326, + "learning_rate": 2.697908501903834e-07, + "loss": 0.988, + "step": 154368 + }, + { + "epoch": 1.86, + "grad_norm": 17.179398297164127, + "learning_rate": 2.696560870332909e-07, + "loss": 1.2715, + "step": 154371 + }, + { + "epoch": 1.86, + "grad_norm": 3.6776546601660556, + "learning_rate": 2.695213570823574e-07, + "loss": 1.2427, + "step": 154374 + }, + { + "epoch": 1.86, + "grad_norm": 5.844707436976752, + "learning_rate": 2.693866603380424e-07, + "loss": 0.775, + "step": 154377 + }, + { + "epoch": 1.86, + "grad_norm": 12.57530682689547, + "learning_rate": 2.692519968008067e-07, + "loss": 1.2808, + "step": 154380 + }, + { + "epoch": 1.86, + "grad_norm": 4.9980068726366165, + "learning_rate": 2.6911736647110886e-07, + "loss": 1.3796, + "step": 154383 + }, + { + "epoch": 1.86, + "grad_norm": 10.968141156921437, + "learning_rate": 2.689827693494074e-07, + "loss": 1.1699, + "step": 154386 + }, + { + "epoch": 1.86, + "grad_norm": 7.7053139113661615, + "learning_rate": 2.6884820543616297e-07, + "loss": 1.0748, + "step": 154389 + }, + { + "epoch": 1.86, + "grad_norm": 8.650777946174985, + "learning_rate": 2.6871367473183643e-07, + "loss": 0.8516, + "step": 154392 + }, + { + "epoch": 1.86, + "grad_norm": 2.8002735146074573, + "learning_rate": 2.685791772368829e-07, + "loss": 1.1254, + "step": 154395 + }, + { + "epoch": 1.86, + "grad_norm": 6.2981759392390275, + "learning_rate": 2.684447129517631e-07, + "loss": 1.1896, + "step": 154398 + }, + { + "epoch": 1.86, + "grad_norm": 5.368186130188209, + "learning_rate": 2.6831028187693677e-07, + "loss": 0.9855, + "step": 154401 + }, + { + "epoch": 1.86, + "grad_norm": 8.661527362103342, + "learning_rate": 2.681758840128623e-07, + "loss": 0.9364, + "step": 154404 + }, + { + "epoch": 1.86, + "grad_norm": 12.567789998842112, + "learning_rate": 2.6804151935999945e-07, + "loss": 1.2227, + "step": 154407 + }, + { + "epoch": 1.86, + "grad_norm": 20.319312635677772, + "learning_rate": 2.679071879188033e-07, + "loss": 1.1971, + "step": 154410 + }, + { + "epoch": 1.86, + "grad_norm": 7.979152567431399, + "learning_rate": 2.677728896897347e-07, + "loss": 1.1765, + "step": 154413 + }, + { + "epoch": 1.86, + "grad_norm": 9.364295779769483, + "learning_rate": 2.676386246732521e-07, + "loss": 0.9705, + "step": 154416 + }, + { + "epoch": 1.86, + "grad_norm": 4.808860524485694, + "learning_rate": 2.67504392869814e-07, + "loss": 0.9121, + "step": 154419 + }, + { + "epoch": 1.86, + "grad_norm": 7.0868555482192805, + "learning_rate": 2.6737019427987674e-07, + "loss": 1.0496, + "step": 154422 + }, + { + "epoch": 1.86, + "grad_norm": 11.56930639782292, + "learning_rate": 2.672360289039e-07, + "loss": 0.8769, + "step": 154425 + }, + { + "epoch": 1.86, + "grad_norm": 11.264348315458333, + "learning_rate": 2.671018967423411e-07, + "loss": 1.1653, + "step": 154428 + }, + { + "epoch": 1.86, + "grad_norm": 9.969526118955846, + "learning_rate": 2.669677977956564e-07, + "loss": 0.8379, + "step": 154431 + }, + { + "epoch": 1.86, + "grad_norm": 3.352542536783351, + "learning_rate": 2.6683373206430664e-07, + "loss": 1.2465, + "step": 154434 + }, + { + "epoch": 1.86, + "grad_norm": 8.549740093798011, + "learning_rate": 2.6669969954874584e-07, + "loss": 0.8151, + "step": 154437 + }, + { + "epoch": 1.86, + "grad_norm": 17.11613377884321, + "learning_rate": 2.6656570024943374e-07, + "loss": 1.2886, + "step": 154440 + }, + { + "epoch": 1.86, + "grad_norm": 23.556862492123493, + "learning_rate": 2.6643173416682655e-07, + "loss": 0.889, + "step": 154443 + }, + { + "epoch": 1.86, + "grad_norm": 15.765477456806098, + "learning_rate": 2.662978013013828e-07, + "loss": 1.057, + "step": 154446 + }, + { + "epoch": 1.86, + "grad_norm": 13.065463736100408, + "learning_rate": 2.661639016535589e-07, + "loss": 1.1014, + "step": 154449 + }, + { + "epoch": 1.86, + "grad_norm": 8.090633959531383, + "learning_rate": 2.660300352238099e-07, + "loss": 1.3497, + "step": 154452 + }, + { + "epoch": 1.86, + "grad_norm": 8.814423880905812, + "learning_rate": 2.658962020125944e-07, + "loss": 1.0978, + "step": 154455 + }, + { + "epoch": 1.86, + "grad_norm": 8.908578948872783, + "learning_rate": 2.6576240202036976e-07, + "loss": 1.0442, + "step": 154458 + }, + { + "epoch": 1.86, + "grad_norm": 3.2865792928285495, + "learning_rate": 2.656286352475923e-07, + "loss": 1.0713, + "step": 154461 + }, + { + "epoch": 1.86, + "grad_norm": 10.745613273212099, + "learning_rate": 2.6549490169471617e-07, + "loss": 1.2521, + "step": 154464 + }, + { + "epoch": 1.86, + "grad_norm": 5.051837482834276, + "learning_rate": 2.65361201362202e-07, + "loss": 1.5132, + "step": 154467 + }, + { + "epoch": 1.86, + "grad_norm": 10.681470306887089, + "learning_rate": 2.6522753425050174e-07, + "loss": 1.0584, + "step": 154470 + }, + { + "epoch": 1.86, + "grad_norm": 3.4498298683910535, + "learning_rate": 2.65093900360075e-07, + "loss": 1.3016, + "step": 154473 + }, + { + "epoch": 1.86, + "grad_norm": 7.213261435362597, + "learning_rate": 2.649602996913747e-07, + "loss": 1.0159, + "step": 154476 + }, + { + "epoch": 1.86, + "grad_norm": 3.5256060509969447, + "learning_rate": 2.6482673224485835e-07, + "loss": 1.1386, + "step": 154479 + }, + { + "epoch": 1.86, + "grad_norm": 7.741480948764738, + "learning_rate": 2.6469319802098215e-07, + "loss": 1.155, + "step": 154482 + }, + { + "epoch": 1.86, + "grad_norm": 11.32169778040946, + "learning_rate": 2.6455969702020136e-07, + "loss": 1.064, + "step": 154485 + }, + { + "epoch": 1.86, + "grad_norm": 11.624200358297148, + "learning_rate": 2.644262292429722e-07, + "loss": 0.9612, + "step": 154488 + }, + { + "epoch": 1.86, + "grad_norm": 9.643845768885477, + "learning_rate": 2.6429279468975e-07, + "loss": 0.9583, + "step": 154491 + }, + { + "epoch": 1.86, + "grad_norm": 13.353358043384356, + "learning_rate": 2.6415939336098874e-07, + "loss": 0.891, + "step": 154494 + }, + { + "epoch": 1.86, + "grad_norm": 9.458574366403006, + "learning_rate": 2.640260252571447e-07, + "loss": 1.087, + "step": 154497 + }, + { + "epoch": 1.86, + "grad_norm": 6.969389860413994, + "learning_rate": 2.6389269037867315e-07, + "loss": 0.9403, + "step": 154500 + }, + { + "epoch": 1.86, + "grad_norm": 6.665681259292317, + "learning_rate": 2.6375938872602924e-07, + "loss": 1.0141, + "step": 154503 + }, + { + "epoch": 1.86, + "grad_norm": 5.871185771531451, + "learning_rate": 2.6362612029966707e-07, + "loss": 0.966, + "step": 154506 + }, + { + "epoch": 1.86, + "grad_norm": 8.555547148562766, + "learning_rate": 2.6349288510004287e-07, + "loss": 1.1094, + "step": 154509 + }, + { + "epoch": 1.86, + "grad_norm": 10.085878370269612, + "learning_rate": 2.6335968312760865e-07, + "loss": 1.3274, + "step": 154512 + }, + { + "epoch": 1.86, + "grad_norm": 15.266589698407383, + "learning_rate": 2.6322651438282275e-07, + "loss": 1.5345, + "step": 154515 + }, + { + "epoch": 1.86, + "grad_norm": 10.321521484914854, + "learning_rate": 2.6309337886613606e-07, + "loss": 1.2879, + "step": 154518 + }, + { + "epoch": 1.86, + "grad_norm": 8.932622907432261, + "learning_rate": 2.629602765780037e-07, + "loss": 1.4845, + "step": 154521 + }, + { + "epoch": 1.86, + "grad_norm": 79.49391976874797, + "learning_rate": 2.62827207518882e-07, + "loss": 1.0283, + "step": 154524 + }, + { + "epoch": 1.86, + "grad_norm": 16.014188948670263, + "learning_rate": 2.6269417168922395e-07, + "loss": 1.1509, + "step": 154527 + }, + { + "epoch": 1.86, + "grad_norm": 2.5124503619801097, + "learning_rate": 2.6256116908948246e-07, + "loss": 1.0321, + "step": 154530 + }, + { + "epoch": 1.86, + "grad_norm": 15.280344923104488, + "learning_rate": 2.6242819972011393e-07, + "loss": 0.9934, + "step": 154533 + }, + { + "epoch": 1.86, + "grad_norm": 6.357445034742909, + "learning_rate": 2.62295263581569e-07, + "loss": 1.3148, + "step": 154536 + }, + { + "epoch": 1.86, + "grad_norm": 6.902369117852517, + "learning_rate": 2.621623606743029e-07, + "loss": 1.0167, + "step": 154539 + }, + { + "epoch": 1.86, + "grad_norm": 5.584000977663923, + "learning_rate": 2.620294909987697e-07, + "loss": 1.0913, + "step": 154542 + }, + { + "epoch": 1.86, + "grad_norm": 5.700796771185291, + "learning_rate": 2.6189665455542134e-07, + "loss": 1.0277, + "step": 154545 + }, + { + "epoch": 1.86, + "grad_norm": 4.465006072847516, + "learning_rate": 2.6176385134471295e-07, + "loss": 1.223, + "step": 154548 + }, + { + "epoch": 1.86, + "grad_norm": 4.461874381964473, + "learning_rate": 2.6163108136709635e-07, + "loss": 1.1349, + "step": 154551 + }, + { + "epoch": 1.86, + "grad_norm": 7.889961457859977, + "learning_rate": 2.614983446230257e-07, + "loss": 1.186, + "step": 154554 + }, + { + "epoch": 1.86, + "grad_norm": 8.695944191711563, + "learning_rate": 2.6136564111295394e-07, + "loss": 1.1968, + "step": 154557 + }, + { + "epoch": 1.86, + "grad_norm": 4.791247254236123, + "learning_rate": 2.6123297083733176e-07, + "loss": 0.9261, + "step": 154560 + }, + { + "epoch": 1.86, + "grad_norm": 5.760413801808187, + "learning_rate": 2.611003337966134e-07, + "loss": 0.8639, + "step": 154563 + }, + { + "epoch": 1.86, + "grad_norm": 6.3649890658307795, + "learning_rate": 2.6096772999125275e-07, + "loss": 1.1561, + "step": 154566 + }, + { + "epoch": 1.86, + "grad_norm": 6.1281449443086204, + "learning_rate": 2.6083515942170177e-07, + "loss": 1.1127, + "step": 154569 + }, + { + "epoch": 1.86, + "grad_norm": 11.912515708567936, + "learning_rate": 2.607026220884112e-07, + "loss": 1.1381, + "step": 154572 + }, + { + "epoch": 1.86, + "grad_norm": 20.039581168265403, + "learning_rate": 2.605701179918352e-07, + "loss": 1.5525, + "step": 154575 + }, + { + "epoch": 1.86, + "grad_norm": 8.375966300679906, + "learning_rate": 2.6043764713242435e-07, + "loss": 1.5065, + "step": 154578 + }, + { + "epoch": 1.86, + "grad_norm": 9.22562950016264, + "learning_rate": 2.60305209510634e-07, + "loss": 1.0912, + "step": 154581 + }, + { + "epoch": 1.86, + "grad_norm": 6.968149388176371, + "learning_rate": 2.6017280512691147e-07, + "loss": 1.1559, + "step": 154584 + }, + { + "epoch": 1.86, + "grad_norm": 4.504928070733582, + "learning_rate": 2.6004043398171084e-07, + "loss": 1.1431, + "step": 154587 + }, + { + "epoch": 1.86, + "grad_norm": 8.537318967657157, + "learning_rate": 2.5990809607548406e-07, + "loss": 1.0445, + "step": 154590 + }, + { + "epoch": 1.86, + "grad_norm": 6.29131049593641, + "learning_rate": 2.597757914086829e-07, + "loss": 1.2057, + "step": 154593 + }, + { + "epoch": 1.86, + "grad_norm": 5.121026816782174, + "learning_rate": 2.596435199817593e-07, + "loss": 0.9994, + "step": 154596 + }, + { + "epoch": 1.86, + "grad_norm": 4.0812912330359, + "learning_rate": 2.5951128179516395e-07, + "loss": 1.1014, + "step": 154599 + }, + { + "epoch": 1.86, + "grad_norm": 4.039396854357149, + "learning_rate": 2.593790768493465e-07, + "loss": 1.2516, + "step": 154602 + }, + { + "epoch": 1.86, + "grad_norm": 16.450066494571953, + "learning_rate": 2.5924690514476105e-07, + "loss": 1.2541, + "step": 154605 + }, + { + "epoch": 1.86, + "grad_norm": 5.918369826032424, + "learning_rate": 2.591147666818572e-07, + "loss": 1.3126, + "step": 154608 + }, + { + "epoch": 1.86, + "grad_norm": 13.232857351459327, + "learning_rate": 2.5898266146108574e-07, + "loss": 1.3118, + "step": 154611 + }, + { + "epoch": 1.86, + "grad_norm": 6.385714939756183, + "learning_rate": 2.588505894828963e-07, + "loss": 0.9403, + "step": 154614 + }, + { + "epoch": 1.86, + "grad_norm": 5.94366136716483, + "learning_rate": 2.5871855074774413e-07, + "loss": 1.2904, + "step": 154617 + }, + { + "epoch": 1.86, + "grad_norm": 3.698863249001991, + "learning_rate": 2.585865452560743e-07, + "loss": 1.1188, + "step": 154620 + }, + { + "epoch": 1.86, + "grad_norm": 12.587522928304953, + "learning_rate": 2.58454573008341e-07, + "loss": 1.1438, + "step": 154623 + }, + { + "epoch": 1.86, + "grad_norm": 4.83634763293788, + "learning_rate": 2.583226340049927e-07, + "loss": 1.285, + "step": 154626 + }, + { + "epoch": 1.86, + "grad_norm": 6.830228307488936, + "learning_rate": 2.581907282464802e-07, + "loss": 1.1201, + "step": 154629 + }, + { + "epoch": 1.86, + "grad_norm": 10.014310940859199, + "learning_rate": 2.580588557332531e-07, + "loss": 1.322, + "step": 154632 + }, + { + "epoch": 1.86, + "grad_norm": 12.798715977724951, + "learning_rate": 2.5792701646576437e-07, + "loss": 1.3376, + "step": 154635 + }, + { + "epoch": 1.86, + "grad_norm": 5.9522633143495165, + "learning_rate": 2.577952104444603e-07, + "loss": 1.2086, + "step": 154638 + }, + { + "epoch": 1.86, + "grad_norm": 4.984464119085215, + "learning_rate": 2.5766343766979175e-07, + "loss": 1.3079, + "step": 154641 + }, + { + "epoch": 1.86, + "grad_norm": 17.667165782351955, + "learning_rate": 2.5753169814220934e-07, + "loss": 1.1553, + "step": 154644 + }, + { + "epoch": 1.86, + "grad_norm": 10.987313636470535, + "learning_rate": 2.573999918621617e-07, + "loss": 1.1585, + "step": 154647 + }, + { + "epoch": 1.86, + "grad_norm": 7.159420558392308, + "learning_rate": 2.572683188300984e-07, + "loss": 0.951, + "step": 154650 + }, + { + "epoch": 1.86, + "grad_norm": 9.7609388419417, + "learning_rate": 2.571366790464691e-07, + "loss": 0.9368, + "step": 154653 + }, + { + "epoch": 1.86, + "grad_norm": 5.885155155349934, + "learning_rate": 2.570050725117235e-07, + "loss": 1.084, + "step": 154656 + }, + { + "epoch": 1.86, + "grad_norm": 7.979035440684298, + "learning_rate": 2.5687349922631e-07, + "loss": 0.9081, + "step": 154659 + }, + { + "epoch": 1.86, + "grad_norm": 11.427434069907974, + "learning_rate": 2.5674195919067836e-07, + "loss": 0.7132, + "step": 154662 + }, + { + "epoch": 1.86, + "grad_norm": 5.513811689595738, + "learning_rate": 2.5661045240527595e-07, + "loss": 1.3632, + "step": 154665 + }, + { + "epoch": 1.86, + "grad_norm": 4.55726455420145, + "learning_rate": 2.564789788705524e-07, + "loss": 0.9527, + "step": 154668 + }, + { + "epoch": 1.86, + "grad_norm": 13.194664366154717, + "learning_rate": 2.5634753858695625e-07, + "loss": 0.7788, + "step": 154671 + }, + { + "epoch": 1.86, + "grad_norm": 9.691926725718796, + "learning_rate": 2.5621613155493607e-07, + "loss": 0.9972, + "step": 154674 + }, + { + "epoch": 1.86, + "grad_norm": 7.3477279255168115, + "learning_rate": 2.5608475777494255e-07, + "loss": 0.9161, + "step": 154677 + }, + { + "epoch": 1.86, + "grad_norm": 5.869511906886095, + "learning_rate": 2.559534172474198e-07, + "loss": 1.2918, + "step": 154680 + }, + { + "epoch": 1.86, + "grad_norm": 7.118794454883832, + "learning_rate": 2.5582210997281977e-07, + "loss": 1.0773, + "step": 154683 + }, + { + "epoch": 1.86, + "grad_norm": 4.206678356556464, + "learning_rate": 2.556908359515886e-07, + "loss": 1.1443, + "step": 154686 + }, + { + "epoch": 1.86, + "grad_norm": 8.395275774742634, + "learning_rate": 2.5555959518417493e-07, + "loss": 1.2036, + "step": 154689 + }, + { + "epoch": 1.86, + "grad_norm": 4.909077212288298, + "learning_rate": 2.554283876710262e-07, + "loss": 1.5273, + "step": 154692 + }, + { + "epoch": 1.86, + "grad_norm": 10.366249127464336, + "learning_rate": 2.552972134125897e-07, + "loss": 1.4946, + "step": 154695 + }, + { + "epoch": 1.86, + "grad_norm": 8.19611502670473, + "learning_rate": 2.5516607240931413e-07, + "loss": 1.0402, + "step": 154698 + }, + { + "epoch": 1.86, + "grad_norm": 8.242507863669502, + "learning_rate": 2.550349646616479e-07, + "loss": 1.0883, + "step": 154701 + }, + { + "epoch": 1.86, + "grad_norm": 12.310914757851275, + "learning_rate": 2.5490389017003627e-07, + "loss": 0.9135, + "step": 154704 + }, + { + "epoch": 1.86, + "grad_norm": 9.08444939497187, + "learning_rate": 2.547728489349277e-07, + "loss": 1.2444, + "step": 154707 + }, + { + "epoch": 1.86, + "grad_norm": 35.8444956825597, + "learning_rate": 2.5464184095676857e-07, + "loss": 1.0283, + "step": 154710 + }, + { + "epoch": 1.86, + "grad_norm": 62.88579350160528, + "learning_rate": 2.545108662360063e-07, + "loss": 1.4508, + "step": 154713 + }, + { + "epoch": 1.86, + "grad_norm": 3.9464621911276008, + "learning_rate": 2.543799247730894e-07, + "loss": 0.9382, + "step": 154716 + }, + { + "epoch": 1.86, + "grad_norm": 7.777325846234105, + "learning_rate": 2.54249016568463e-07, + "loss": 0.9546, + "step": 154719 + }, + { + "epoch": 1.86, + "grad_norm": 8.63145544458781, + "learning_rate": 2.5411814162257354e-07, + "loss": 1.0639, + "step": 154722 + }, + { + "epoch": 1.86, + "grad_norm": 5.905192767543665, + "learning_rate": 2.5398729993586945e-07, + "loss": 0.938, + "step": 154725 + }, + { + "epoch": 1.86, + "grad_norm": 4.666537155480344, + "learning_rate": 2.5385649150879596e-07, + "loss": 1.2583, + "step": 154728 + }, + { + "epoch": 1.86, + "grad_norm": 18.027714492387027, + "learning_rate": 2.5372571634179945e-07, + "loss": 1.3037, + "step": 154731 + }, + { + "epoch": 1.86, + "grad_norm": 5.610155548796278, + "learning_rate": 2.535949744353272e-07, + "loss": 0.9956, + "step": 154734 + }, + { + "epoch": 1.86, + "grad_norm": 4.838214304613735, + "learning_rate": 2.534642657898234e-07, + "loss": 0.9644, + "step": 154737 + }, + { + "epoch": 1.86, + "grad_norm": 3.7654656511351985, + "learning_rate": 2.5333359040573546e-07, + "loss": 1.4348, + "step": 154740 + }, + { + "epoch": 1.86, + "grad_norm": 11.348286375088014, + "learning_rate": 2.532029482835108e-07, + "loss": 1.108, + "step": 154743 + }, + { + "epoch": 1.86, + "grad_norm": 4.546931541865539, + "learning_rate": 2.5307233942359344e-07, + "loss": 1.1718, + "step": 154746 + }, + { + "epoch": 1.86, + "grad_norm": 14.489744038272026, + "learning_rate": 2.529417638264275e-07, + "loss": 1.2186, + "step": 154749 + }, + { + "epoch": 1.86, + "grad_norm": 17.844009239924404, + "learning_rate": 2.528112214924616e-07, + "loss": 1.3768, + "step": 154752 + }, + { + "epoch": 1.86, + "grad_norm": 7.454779062167974, + "learning_rate": 2.526807124221398e-07, + "loss": 1.4233, + "step": 154755 + }, + { + "epoch": 1.86, + "grad_norm": 5.812332506131946, + "learning_rate": 2.525502366159083e-07, + "loss": 1.1322, + "step": 154758 + }, + { + "epoch": 1.86, + "grad_norm": 2.6517175213080795, + "learning_rate": 2.5241979407421123e-07, + "loss": 1.4067, + "step": 154761 + }, + { + "epoch": 1.86, + "grad_norm": 11.09749572769837, + "learning_rate": 2.52289384797495e-07, + "loss": 1.0426, + "step": 154764 + }, + { + "epoch": 1.86, + "grad_norm": 9.455271126795058, + "learning_rate": 2.521590087862036e-07, + "loss": 0.9184, + "step": 154767 + }, + { + "epoch": 1.86, + "grad_norm": 6.0509376599695495, + "learning_rate": 2.5202866604078336e-07, + "loss": 1.0481, + "step": 154770 + }, + { + "epoch": 1.86, + "grad_norm": 3.288829144044344, + "learning_rate": 2.5189835656167617e-07, + "loss": 1.1415, + "step": 154773 + }, + { + "epoch": 1.86, + "grad_norm": 8.968015374434598, + "learning_rate": 2.517680803493294e-07, + "loss": 1.1297, + "step": 154776 + }, + { + "epoch": 1.86, + "grad_norm": 14.481115905348735, + "learning_rate": 2.516378374041872e-07, + "loss": 1.1689, + "step": 154779 + }, + { + "epoch": 1.86, + "grad_norm": 4.194033845356954, + "learning_rate": 2.5150762772669257e-07, + "loss": 1.1471, + "step": 154782 + }, + { + "epoch": 1.86, + "grad_norm": 6.802403262267501, + "learning_rate": 2.5137745131729286e-07, + "loss": 1.0336, + "step": 154785 + }, + { + "epoch": 1.86, + "grad_norm": 6.88073969812324, + "learning_rate": 2.5124730817642993e-07, + "loss": 0.9441, + "step": 154788 + }, + { + "epoch": 1.86, + "grad_norm": 8.627665071923786, + "learning_rate": 2.5111719830454793e-07, + "loss": 0.9245, + "step": 154791 + }, + { + "epoch": 1.86, + "grad_norm": 7.863243602646083, + "learning_rate": 2.5098712170209204e-07, + "loss": 0.7413, + "step": 154794 + }, + { + "epoch": 1.86, + "grad_norm": 9.191640033626458, + "learning_rate": 2.5085707836950526e-07, + "loss": 1.1671, + "step": 154797 + }, + { + "epoch": 1.86, + "grad_norm": 9.390155413176217, + "learning_rate": 2.507270683072316e-07, + "loss": 1.283, + "step": 154800 + }, + { + "epoch": 1.86, + "grad_norm": 12.38818209148652, + "learning_rate": 2.505970915157141e-07, + "loss": 1.3857, + "step": 154803 + }, + { + "epoch": 1.86, + "grad_norm": 3.697066474170116, + "learning_rate": 2.504671479953968e-07, + "loss": 1.1721, + "step": 154806 + }, + { + "epoch": 1.86, + "grad_norm": 18.149033823857383, + "learning_rate": 2.50337237746725e-07, + "loss": 1.0572, + "step": 154809 + }, + { + "epoch": 1.86, + "grad_norm": 21.94204524853079, + "learning_rate": 2.5020736077014053e-07, + "loss": 1.1145, + "step": 154812 + }, + { + "epoch": 1.86, + "grad_norm": 23.665887463373924, + "learning_rate": 2.5007751706608516e-07, + "loss": 1.1716, + "step": 154815 + }, + { + "epoch": 1.86, + "grad_norm": 6.488501357037192, + "learning_rate": 2.499477066350031e-07, + "loss": 1.0552, + "step": 154818 + }, + { + "epoch": 1.86, + "grad_norm": 10.40617490267411, + "learning_rate": 2.4981792947733843e-07, + "loss": 1.5428, + "step": 154821 + }, + { + "epoch": 1.86, + "grad_norm": 11.51515267284711, + "learning_rate": 2.496881855935329e-07, + "loss": 1.029, + "step": 154824 + }, + { + "epoch": 1.86, + "grad_norm": 48.322674219135955, + "learning_rate": 2.495584749840296e-07, + "loss": 1.0444, + "step": 154827 + }, + { + "epoch": 1.86, + "grad_norm": 3.321308143429676, + "learning_rate": 2.494287976492704e-07, + "loss": 1.4442, + "step": 154830 + }, + { + "epoch": 1.86, + "grad_norm": 15.143055952127483, + "learning_rate": 2.4929915358969935e-07, + "loss": 1.0107, + "step": 154833 + }, + { + "epoch": 1.86, + "grad_norm": 12.718362630523968, + "learning_rate": 2.4916954280575834e-07, + "loss": 0.9915, + "step": 154836 + }, + { + "epoch": 1.86, + "grad_norm": 16.15215982176721, + "learning_rate": 2.490399652978892e-07, + "loss": 1.0375, + "step": 154839 + }, + { + "epoch": 1.86, + "grad_norm": 10.557627839925198, + "learning_rate": 2.4891042106653387e-07, + "loss": 1.2839, + "step": 154842 + }, + { + "epoch": 1.86, + "grad_norm": 8.339942157401339, + "learning_rate": 2.4878091011213525e-07, + "loss": 1.323, + "step": 154845 + }, + { + "epoch": 1.86, + "grad_norm": 7.6913913668337095, + "learning_rate": 2.486514324351341e-07, + "loss": 1.3419, + "step": 154848 + }, + { + "epoch": 1.86, + "grad_norm": 31.73537948834735, + "learning_rate": 2.4852198803597573e-07, + "loss": 1.3371, + "step": 154851 + }, + { + "epoch": 1.86, + "grad_norm": 24.397340530030963, + "learning_rate": 2.4839257691509743e-07, + "loss": 1.6468, + "step": 154854 + }, + { + "epoch": 1.86, + "grad_norm": 14.183413526317333, + "learning_rate": 2.482631990729434e-07, + "loss": 1.0503, + "step": 154857 + }, + { + "epoch": 1.86, + "grad_norm": 10.055321151033121, + "learning_rate": 2.481338545099543e-07, + "loss": 1.2156, + "step": 154860 + }, + { + "epoch": 1.86, + "grad_norm": 13.054510837226935, + "learning_rate": 2.4800454322657095e-07, + "loss": 1.4833, + "step": 154863 + }, + { + "epoch": 1.86, + "grad_norm": 8.865408799100646, + "learning_rate": 2.478752652232363e-07, + "loss": 0.9811, + "step": 154866 + }, + { + "epoch": 1.86, + "grad_norm": 4.733435079136309, + "learning_rate": 2.477460205003901e-07, + "loss": 0.9642, + "step": 154869 + }, + { + "epoch": 1.86, + "grad_norm": 14.311846267458609, + "learning_rate": 2.476168090584741e-07, + "loss": 1.091, + "step": 154872 + }, + { + "epoch": 1.86, + "grad_norm": 1.9930585217602559, + "learning_rate": 2.4748763089793013e-07, + "loss": 1.1477, + "step": 154875 + }, + { + "epoch": 1.86, + "grad_norm": 15.973824180994681, + "learning_rate": 2.47358486019198e-07, + "loss": 0.9165, + "step": 154878 + }, + { + "epoch": 1.86, + "grad_norm": 7.385428871917022, + "learning_rate": 2.4722937442271833e-07, + "loss": 0.8161, + "step": 154881 + }, + { + "epoch": 1.86, + "grad_norm": 6.39099898924791, + "learning_rate": 2.471002961089308e-07, + "loss": 0.9544, + "step": 154884 + }, + { + "epoch": 1.86, + "grad_norm": 9.027544131813222, + "learning_rate": 2.4697125107827736e-07, + "loss": 1.2717, + "step": 154887 + }, + { + "epoch": 1.86, + "grad_norm": 7.482375141620308, + "learning_rate": 2.4684223933119753e-07, + "loss": 1.188, + "step": 154890 + }, + { + "epoch": 1.86, + "grad_norm": 5.467668278563615, + "learning_rate": 2.467132608681333e-07, + "loss": 0.9421, + "step": 154893 + }, + { + "epoch": 1.86, + "grad_norm": 2.2849411370698527, + "learning_rate": 2.465843156895242e-07, + "loss": 1.2575, + "step": 154896 + }, + { + "epoch": 1.86, + "grad_norm": 6.392694760314528, + "learning_rate": 2.464554037958078e-07, + "loss": 0.7642, + "step": 154899 + }, + { + "epoch": 1.86, + "grad_norm": 9.370215377364698, + "learning_rate": 2.4632652518742584e-07, + "loss": 1.4634, + "step": 154902 + }, + { + "epoch": 1.86, + "grad_norm": 2.8695835689194595, + "learning_rate": 2.4619767986482023e-07, + "loss": 1.1267, + "step": 154905 + }, + { + "epoch": 1.86, + "grad_norm": 5.6061659024155075, + "learning_rate": 2.460688678284262e-07, + "loss": 1.0039, + "step": 154908 + }, + { + "epoch": 1.86, + "grad_norm": 6.7749417750101255, + "learning_rate": 2.459400890786867e-07, + "loss": 1.1534, + "step": 154911 + }, + { + "epoch": 1.86, + "grad_norm": 12.011614839034376, + "learning_rate": 2.458113436160403e-07, + "loss": 1.1796, + "step": 154914 + }, + { + "epoch": 1.86, + "grad_norm": 4.930928035099665, + "learning_rate": 2.4568263144092774e-07, + "loss": 0.977, + "step": 154917 + }, + { + "epoch": 1.86, + "grad_norm": 2.3420975321848743, + "learning_rate": 2.4555395255378645e-07, + "loss": 1.5601, + "step": 154920 + }, + { + "epoch": 1.86, + "grad_norm": 7.62021664982918, + "learning_rate": 2.4542530695505497e-07, + "loss": 0.9977, + "step": 154923 + }, + { + "epoch": 1.86, + "grad_norm": 2.5372641259610287, + "learning_rate": 2.452966946451729e-07, + "loss": 1.3123, + "step": 154926 + }, + { + "epoch": 1.86, + "grad_norm": 8.010454897985232, + "learning_rate": 2.4516811562458e-07, + "loss": 0.9277, + "step": 154929 + }, + { + "epoch": 1.86, + "grad_norm": 14.315608722080468, + "learning_rate": 2.450395698937158e-07, + "loss": 1.3463, + "step": 154932 + }, + { + "epoch": 1.86, + "grad_norm": 17.85611388748797, + "learning_rate": 2.449110574530167e-07, + "loss": 0.8948, + "step": 154935 + }, + { + "epoch": 1.86, + "grad_norm": 7.145958108533273, + "learning_rate": 2.447825783029223e-07, + "loss": 0.9753, + "step": 154938 + }, + { + "epoch": 1.86, + "grad_norm": 3.4822696152675774, + "learning_rate": 2.4465413244387117e-07, + "loss": 1.1576, + "step": 154941 + }, + { + "epoch": 1.86, + "grad_norm": 4.859348956280418, + "learning_rate": 2.445257198763018e-07, + "loss": 1.1862, + "step": 154944 + }, + { + "epoch": 1.86, + "grad_norm": 12.830296843999731, + "learning_rate": 2.443973406006528e-07, + "loss": 1.2298, + "step": 154947 + }, + { + "epoch": 1.86, + "grad_norm": 19.089411068744642, + "learning_rate": 2.4426899461736156e-07, + "loss": 1.4642, + "step": 154950 + }, + { + "epoch": 1.86, + "grad_norm": 7.38743584521637, + "learning_rate": 2.441406819268655e-07, + "loss": 1.1944, + "step": 154953 + }, + { + "epoch": 1.86, + "grad_norm": 5.953311438704557, + "learning_rate": 2.440124025296031e-07, + "loss": 1.2536, + "step": 154956 + }, + { + "epoch": 1.86, + "grad_norm": 11.783025527276246, + "learning_rate": 2.4388415642601413e-07, + "loss": 0.765, + "step": 154959 + }, + { + "epoch": 1.86, + "grad_norm": 10.443169647854942, + "learning_rate": 2.4375594361653374e-07, + "loss": 1.0889, + "step": 154962 + }, + { + "epoch": 1.86, + "grad_norm": 6.196206622551867, + "learning_rate": 2.436277641015994e-07, + "loss": 0.8322, + "step": 154965 + }, + { + "epoch": 1.86, + "grad_norm": 5.537093761743001, + "learning_rate": 2.4349961788164845e-07, + "loss": 1.2721, + "step": 154968 + }, + { + "epoch": 1.86, + "grad_norm": 5.501467213317579, + "learning_rate": 2.433715049571195e-07, + "loss": 0.9952, + "step": 154971 + }, + { + "epoch": 1.86, + "grad_norm": 2.8199571492806013, + "learning_rate": 2.4324342532845104e-07, + "loss": 0.8019, + "step": 154974 + }, + { + "epoch": 1.86, + "grad_norm": 5.653132584798203, + "learning_rate": 2.4311537899607717e-07, + "loss": 1.2548, + "step": 154977 + }, + { + "epoch": 1.86, + "grad_norm": 8.777966222324642, + "learning_rate": 2.429873659604365e-07, + "loss": 1.021, + "step": 154980 + }, + { + "epoch": 1.86, + "grad_norm": 3.5347736479171967, + "learning_rate": 2.428593862219641e-07, + "loss": 0.9513, + "step": 154983 + }, + { + "epoch": 1.86, + "grad_norm": 14.122241478581213, + "learning_rate": 2.4273143978109983e-07, + "loss": 1.1851, + "step": 154986 + }, + { + "epoch": 1.86, + "grad_norm": 5.796343704787648, + "learning_rate": 2.426035266382776e-07, + "loss": 1.195, + "step": 154989 + }, + { + "epoch": 1.86, + "grad_norm": 8.060153788098633, + "learning_rate": 2.4247564679393485e-07, + "loss": 1.2014, + "step": 154992 + }, + { + "epoch": 1.86, + "grad_norm": 3.0884352677752718, + "learning_rate": 2.42347800248508e-07, + "loss": 0.9969, + "step": 154995 + }, + { + "epoch": 1.86, + "grad_norm": 2.6415005678578782, + "learning_rate": 2.4221998700243444e-07, + "loss": 1.1525, + "step": 154998 + }, + { + "epoch": 1.86, + "grad_norm": 14.723302149524185, + "learning_rate": 2.420922070561493e-07, + "loss": 1.416, + "step": 155001 + }, + { + "epoch": 1.86, + "grad_norm": 21.782725439552713, + "learning_rate": 2.4196446041008794e-07, + "loss": 1.126, + "step": 155004 + }, + { + "epoch": 1.86, + "grad_norm": 9.544387968241688, + "learning_rate": 2.4183674706468765e-07, + "loss": 0.5821, + "step": 155007 + }, + { + "epoch": 1.86, + "grad_norm": 5.870503238013984, + "learning_rate": 2.4170906702038254e-07, + "loss": 0.8487, + "step": 155010 + }, + { + "epoch": 1.86, + "grad_norm": 3.5615459202610453, + "learning_rate": 2.415814202776112e-07, + "loss": 1.3411, + "step": 155013 + }, + { + "epoch": 1.86, + "grad_norm": 7.1473466244393595, + "learning_rate": 2.414538068368055e-07, + "loss": 1.038, + "step": 155016 + }, + { + "epoch": 1.86, + "grad_norm": 8.468674787329075, + "learning_rate": 2.41326226698404e-07, + "loss": 1.2644, + "step": 155019 + }, + { + "epoch": 1.86, + "grad_norm": 6.24329087264109, + "learning_rate": 2.4119867986284185e-07, + "loss": 1.1494, + "step": 155022 + }, + { + "epoch": 1.86, + "grad_norm": 4.14268316027121, + "learning_rate": 2.4107116633055204e-07, + "loss": 1.1002, + "step": 155025 + }, + { + "epoch": 1.86, + "grad_norm": 5.752725963333926, + "learning_rate": 2.4094368610197204e-07, + "loss": 1.1646, + "step": 155028 + }, + { + "epoch": 1.86, + "grad_norm": 3.1915250782935294, + "learning_rate": 2.4081623917753596e-07, + "loss": 0.9878, + "step": 155031 + }, + { + "epoch": 1.86, + "grad_norm": 21.158568687603758, + "learning_rate": 2.406888255576778e-07, + "loss": 0.879, + "step": 155034 + }, + { + "epoch": 1.86, + "grad_norm": 11.273930257195937, + "learning_rate": 2.40561445242834e-07, + "loss": 1.4857, + "step": 155037 + }, + { + "epoch": 1.86, + "grad_norm": 2.848765328591371, + "learning_rate": 2.4043409823343967e-07, + "loss": 1.1579, + "step": 155040 + }, + { + "epoch": 1.86, + "grad_norm": 3.7289957425769473, + "learning_rate": 2.4030678452992673e-07, + "loss": 0.8863, + "step": 155043 + }, + { + "epoch": 1.86, + "grad_norm": 9.299693540889521, + "learning_rate": 2.4017950413273263e-07, + "loss": 1.0131, + "step": 155046 + }, + { + "epoch": 1.86, + "grad_norm": 3.3951329019165595, + "learning_rate": 2.400522570422903e-07, + "loss": 1.0556, + "step": 155049 + }, + { + "epoch": 1.86, + "grad_norm": 17.92431678565349, + "learning_rate": 2.3992504325903276e-07, + "loss": 0.8911, + "step": 155052 + }, + { + "epoch": 1.86, + "grad_norm": 17.334377188082136, + "learning_rate": 2.3979786278339747e-07, + "loss": 1.5195, + "step": 155055 + }, + { + "epoch": 1.86, + "grad_norm": 13.828902039311608, + "learning_rate": 2.396707156158151e-07, + "loss": 1.1662, + "step": 155058 + }, + { + "epoch": 1.86, + "grad_norm": 4.148105697947841, + "learning_rate": 2.39543601756721e-07, + "loss": 0.9793, + "step": 155061 + }, + { + "epoch": 1.86, + "grad_norm": 13.131750427646766, + "learning_rate": 2.3941652120654913e-07, + "loss": 0.8783, + "step": 155064 + }, + { + "epoch": 1.86, + "grad_norm": 13.991176527783482, + "learning_rate": 2.3928947396573256e-07, + "loss": 1.1526, + "step": 155067 + }, + { + "epoch": 1.86, + "grad_norm": 4.253914087579176, + "learning_rate": 2.3916246003470644e-07, + "loss": 1.3171, + "step": 155070 + }, + { + "epoch": 1.86, + "grad_norm": 10.75613599629242, + "learning_rate": 2.390354794139016e-07, + "loss": 0.9526, + "step": 155073 + }, + { + "epoch": 1.86, + "grad_norm": 3.7566581216972814, + "learning_rate": 2.389085321037532e-07, + "loss": 0.8952, + "step": 155076 + }, + { + "epoch": 1.86, + "grad_norm": 7.665597665432337, + "learning_rate": 2.3878161810469424e-07, + "loss": 0.9174, + "step": 155079 + }, + { + "epoch": 1.86, + "grad_norm": 3.585027540939523, + "learning_rate": 2.386547374171577e-07, + "loss": 1.3099, + "step": 155082 + }, + { + "epoch": 1.86, + "grad_norm": 6.194957477471732, + "learning_rate": 2.3852789004157664e-07, + "loss": 1.5199, + "step": 155085 + }, + { + "epoch": 1.86, + "grad_norm": 6.546112992744429, + "learning_rate": 2.384010759783839e-07, + "loss": 1.4221, + "step": 155088 + }, + { + "epoch": 1.86, + "grad_norm": 5.098958158556486, + "learning_rate": 2.382742952280115e-07, + "loss": 1.1284, + "step": 155091 + }, + { + "epoch": 1.86, + "grad_norm": 17.59519590940137, + "learning_rate": 2.3814754779089456e-07, + "loss": 1.0106, + "step": 155094 + }, + { + "epoch": 1.86, + "grad_norm": 8.128218788442831, + "learning_rate": 2.3802083366746166e-07, + "loss": 1.0946, + "step": 155097 + }, + { + "epoch": 1.87, + "grad_norm": 6.487918482229355, + "learning_rate": 2.37894152858148e-07, + "loss": 1.1223, + "step": 155100 + }, + { + "epoch": 1.87, + "grad_norm": 12.151259072843873, + "learning_rate": 2.3776750536338544e-07, + "loss": 0.6811, + "step": 155103 + }, + { + "epoch": 1.87, + "grad_norm": 6.110976102304538, + "learning_rate": 2.3764089118360588e-07, + "loss": 1.1785, + "step": 155106 + }, + { + "epoch": 1.87, + "grad_norm": 3.114859063473541, + "learning_rate": 2.3751431031924233e-07, + "loss": 1.2271, + "step": 155109 + }, + { + "epoch": 1.87, + "grad_norm": 8.226771298231496, + "learning_rate": 2.3738776277072551e-07, + "loss": 0.839, + "step": 155112 + }, + { + "epoch": 1.87, + "grad_norm": 8.707568400573333, + "learning_rate": 2.3726124853848732e-07, + "loss": 1.0559, + "step": 155115 + }, + { + "epoch": 1.87, + "grad_norm": 13.439788292361023, + "learning_rate": 2.3713476762296072e-07, + "loss": 1.2923, + "step": 155118 + }, + { + "epoch": 1.87, + "grad_norm": 28.19620590664481, + "learning_rate": 2.3700832002457653e-07, + "loss": 1.3014, + "step": 155121 + }, + { + "epoch": 1.87, + "grad_norm": 4.788157848163899, + "learning_rate": 2.3688190574376546e-07, + "loss": 1.3721, + "step": 155124 + }, + { + "epoch": 1.87, + "grad_norm": 7.484040159856317, + "learning_rate": 2.3675552478096054e-07, + "loss": 0.9922, + "step": 155127 + }, + { + "epoch": 1.87, + "grad_norm": 8.17421665866214, + "learning_rate": 2.366291771365925e-07, + "loss": 0.9184, + "step": 155130 + }, + { + "epoch": 1.87, + "grad_norm": 10.287055655501769, + "learning_rate": 2.3650286281109213e-07, + "loss": 1.0154, + "step": 155133 + }, + { + "epoch": 1.87, + "grad_norm": 6.689993736574083, + "learning_rate": 2.3637658180489021e-07, + "loss": 1.0632, + "step": 155136 + }, + { + "epoch": 1.87, + "grad_norm": 7.31011678930576, + "learning_rate": 2.3625033411841858e-07, + "loss": 1.3563, + "step": 155139 + }, + { + "epoch": 1.87, + "grad_norm": 5.6867169205591, + "learning_rate": 2.3612411975210693e-07, + "loss": 1.1477, + "step": 155142 + }, + { + "epoch": 1.87, + "grad_norm": 16.20161166732987, + "learning_rate": 2.359979387063871e-07, + "loss": 1.0593, + "step": 155145 + }, + { + "epoch": 1.87, + "grad_norm": 21.49829351974798, + "learning_rate": 2.35871790981691e-07, + "loss": 1.1992, + "step": 155148 + }, + { + "epoch": 1.87, + "grad_norm": 5.983207206455991, + "learning_rate": 2.3574567657844493e-07, + "loss": 1.4415, + "step": 155151 + }, + { + "epoch": 1.87, + "grad_norm": 4.249138011984313, + "learning_rate": 2.3561959549708412e-07, + "loss": 1.0, + "step": 155154 + }, + { + "epoch": 1.87, + "grad_norm": 8.588428416952244, + "learning_rate": 2.354935477380349e-07, + "loss": 1.1301, + "step": 155157 + }, + { + "epoch": 1.87, + "grad_norm": 10.451466504330616, + "learning_rate": 2.3536753330172912e-07, + "loss": 1.5886, + "step": 155160 + }, + { + "epoch": 1.87, + "grad_norm": 8.258632340632623, + "learning_rate": 2.3524155218859758e-07, + "loss": 1.148, + "step": 155163 + }, + { + "epoch": 1.87, + "grad_norm": 6.821653636855424, + "learning_rate": 2.3511560439906877e-07, + "loss": 1.1398, + "step": 155166 + }, + { + "epoch": 1.87, + "grad_norm": 18.65066524855391, + "learning_rate": 2.349896899335735e-07, + "loss": 1.2161, + "step": 155169 + }, + { + "epoch": 1.87, + "grad_norm": 6.588193985194428, + "learning_rate": 2.3486380879254143e-07, + "loss": 0.9837, + "step": 155172 + }, + { + "epoch": 1.87, + "grad_norm": 10.479730240462018, + "learning_rate": 2.3473796097640112e-07, + "loss": 1.2824, + "step": 155175 + }, + { + "epoch": 1.87, + "grad_norm": 11.864602214124613, + "learning_rate": 2.3461214648558216e-07, + "loss": 1.2148, + "step": 155178 + }, + { + "epoch": 1.87, + "grad_norm": 4.355385156301517, + "learning_rate": 2.344863653205154e-07, + "loss": 0.9845, + "step": 155181 + }, + { + "epoch": 1.87, + "grad_norm": 6.410448217413819, + "learning_rate": 2.343606174816282e-07, + "loss": 1.0503, + "step": 155184 + }, + { + "epoch": 1.87, + "grad_norm": 6.000928450118888, + "learning_rate": 2.3423490296935136e-07, + "loss": 1.3323, + "step": 155187 + }, + { + "epoch": 1.87, + "grad_norm": 7.9303358291666255, + "learning_rate": 2.3410922178411344e-07, + "loss": 0.6704, + "step": 155190 + }, + { + "epoch": 1.87, + "grad_norm": 13.542750143212878, + "learning_rate": 2.3398357392634297e-07, + "loss": 1.1191, + "step": 155193 + }, + { + "epoch": 1.87, + "grad_norm": 8.845069924393528, + "learning_rate": 2.338579593964685e-07, + "loss": 1.05, + "step": 155196 + }, + { + "epoch": 1.87, + "grad_norm": 4.597432125169919, + "learning_rate": 2.3373237819491968e-07, + "loss": 1.1403, + "step": 155199 + }, + { + "epoch": 1.87, + "grad_norm": 15.520300157407728, + "learning_rate": 2.3360683032212396e-07, + "loss": 1.4345, + "step": 155202 + }, + { + "epoch": 1.87, + "grad_norm": 3.067570297868989, + "learning_rate": 2.3348131577850986e-07, + "loss": 1.1473, + "step": 155205 + }, + { + "epoch": 1.87, + "grad_norm": 7.811431864966912, + "learning_rate": 2.3335583456450706e-07, + "loss": 1.0528, + "step": 155208 + }, + { + "epoch": 1.87, + "grad_norm": 7.74485059421929, + "learning_rate": 2.3323038668054187e-07, + "loss": 1.4645, + "step": 155211 + }, + { + "epoch": 1.87, + "grad_norm": 7.124506933910542, + "learning_rate": 2.3310497212704398e-07, + "loss": 1.0215, + "step": 155214 + }, + { + "epoch": 1.87, + "grad_norm": 9.439941364057265, + "learning_rate": 2.3297959090444188e-07, + "loss": 1.0217, + "step": 155217 + }, + { + "epoch": 1.87, + "grad_norm": 8.872744274160745, + "learning_rate": 2.3285424301316084e-07, + "loss": 1.1573, + "step": 155220 + }, + { + "epoch": 1.87, + "grad_norm": 5.571673072681039, + "learning_rate": 2.3272892845363048e-07, + "loss": 1.2473, + "step": 155223 + }, + { + "epoch": 1.87, + "grad_norm": 11.197619602794, + "learning_rate": 2.3260364722627715e-07, + "loss": 0.9097, + "step": 155226 + }, + { + "epoch": 1.87, + "grad_norm": 4.074764120870227, + "learning_rate": 2.324783993315316e-07, + "loss": 0.9099, + "step": 155229 + }, + { + "epoch": 1.87, + "grad_norm": 82.40023844823473, + "learning_rate": 2.3235318476981795e-07, + "loss": 1.126, + "step": 155232 + }, + { + "epoch": 1.87, + "grad_norm": 13.430510977088693, + "learning_rate": 2.322280035415636e-07, + "loss": 1.1999, + "step": 155235 + }, + { + "epoch": 1.87, + "grad_norm": 10.937575910075996, + "learning_rate": 2.3210285564719826e-07, + "loss": 1.1206, + "step": 155238 + }, + { + "epoch": 1.87, + "grad_norm": 3.5920229709983453, + "learning_rate": 2.319777410871471e-07, + "loss": 1.2228, + "step": 155241 + }, + { + "epoch": 1.87, + "grad_norm": 21.614129084739826, + "learning_rate": 2.3185265986183759e-07, + "loss": 1.0851, + "step": 155244 + }, + { + "epoch": 1.87, + "grad_norm": 3.977091530622355, + "learning_rate": 2.3172761197169601e-07, + "loss": 1.1634, + "step": 155247 + }, + { + "epoch": 1.87, + "grad_norm": 4.149359361969991, + "learning_rate": 2.3160259741714875e-07, + "loss": 1.0265, + "step": 155250 + }, + { + "epoch": 1.87, + "grad_norm": 9.701154765433007, + "learning_rate": 2.314776161986243e-07, + "loss": 1.2436, + "step": 155253 + }, + { + "epoch": 1.87, + "grad_norm": 13.322031933186777, + "learning_rate": 2.31352668316549e-07, + "loss": 1.1724, + "step": 155256 + }, + { + "epoch": 1.87, + "grad_norm": 7.323064535249267, + "learning_rate": 2.312277537713481e-07, + "loss": 1.1579, + "step": 155259 + }, + { + "epoch": 1.87, + "grad_norm": 15.275435475036957, + "learning_rate": 2.3110287256344678e-07, + "loss": 1.327, + "step": 155262 + }, + { + "epoch": 1.87, + "grad_norm": 2.8193119754821545, + "learning_rate": 2.3097802469327357e-07, + "loss": 0.9678, + "step": 155265 + }, + { + "epoch": 1.87, + "grad_norm": 3.8959333112603, + "learning_rate": 2.308532101612526e-07, + "loss": 1.1855, + "step": 155268 + }, + { + "epoch": 1.87, + "grad_norm": 8.148102738989223, + "learning_rate": 2.3072842896781245e-07, + "loss": 1.254, + "step": 155271 + }, + { + "epoch": 1.87, + "grad_norm": 12.181004000017118, + "learning_rate": 2.3060368111337605e-07, + "loss": 1.2643, + "step": 155274 + }, + { + "epoch": 1.87, + "grad_norm": 3.1556148289996924, + "learning_rate": 2.3047896659837088e-07, + "loss": 0.935, + "step": 155277 + }, + { + "epoch": 1.87, + "grad_norm": 7.768561946598099, + "learning_rate": 2.3035428542322213e-07, + "loss": 1.0271, + "step": 155280 + }, + { + "epoch": 1.87, + "grad_norm": 7.632155062624196, + "learning_rate": 2.3022963758835504e-07, + "loss": 1.0114, + "step": 155283 + }, + { + "epoch": 1.87, + "grad_norm": 9.65483706316855, + "learning_rate": 2.3010502309419481e-07, + "loss": 1.108, + "step": 155286 + }, + { + "epoch": 1.87, + "grad_norm": 8.535095169878986, + "learning_rate": 2.2998044194116775e-07, + "loss": 1.0451, + "step": 155289 + }, + { + "epoch": 1.87, + "grad_norm": 14.425868005069074, + "learning_rate": 2.298558941296969e-07, + "loss": 1.0792, + "step": 155292 + }, + { + "epoch": 1.87, + "grad_norm": 9.497502009959405, + "learning_rate": 2.2973137966020965e-07, + "loss": 0.7759, + "step": 155295 + }, + { + "epoch": 1.87, + "grad_norm": 11.516032866900204, + "learning_rate": 2.296068985331301e-07, + "loss": 1.532, + "step": 155298 + }, + { + "epoch": 1.87, + "grad_norm": 2.9063113784561456, + "learning_rate": 2.2948245074888354e-07, + "loss": 1.1712, + "step": 155301 + }, + { + "epoch": 1.87, + "grad_norm": 19.02528491053684, + "learning_rate": 2.2935803630789288e-07, + "loss": 0.8987, + "step": 155304 + }, + { + "epoch": 1.87, + "grad_norm": 6.2944103443757005, + "learning_rate": 2.292336552105845e-07, + "loss": 1.0153, + "step": 155307 + }, + { + "epoch": 1.87, + "grad_norm": 6.46767073231926, + "learning_rate": 2.2910930745738246e-07, + "loss": 1.0967, + "step": 155310 + }, + { + "epoch": 1.87, + "grad_norm": 11.18136692296259, + "learning_rate": 2.289849930487098e-07, + "loss": 1.109, + "step": 155313 + }, + { + "epoch": 1.87, + "grad_norm": 7.154190403509105, + "learning_rate": 2.2886071198499281e-07, + "loss": 1.3304, + "step": 155316 + }, + { + "epoch": 1.87, + "grad_norm": 13.640084340221122, + "learning_rate": 2.2873646426665342e-07, + "loss": 0.8598, + "step": 155319 + }, + { + "epoch": 1.87, + "grad_norm": 10.00028219963883, + "learning_rate": 2.2861224989411902e-07, + "loss": 1.1036, + "step": 155322 + }, + { + "epoch": 1.87, + "grad_norm": 2.8359992057402064, + "learning_rate": 2.2848806886781038e-07, + "loss": 0.9603, + "step": 155325 + }, + { + "epoch": 1.87, + "grad_norm": 12.30565519721494, + "learning_rate": 2.2836392118815164e-07, + "loss": 1.1616, + "step": 155328 + }, + { + "epoch": 1.87, + "grad_norm": 15.232914410222902, + "learning_rate": 2.2823980685556802e-07, + "loss": 0.883, + "step": 155331 + }, + { + "epoch": 1.87, + "grad_norm": 5.239839081315516, + "learning_rate": 2.2811572587048137e-07, + "loss": 0.8085, + "step": 155334 + }, + { + "epoch": 1.87, + "grad_norm": 7.431209923249173, + "learning_rate": 2.279916782333169e-07, + "loss": 0.9972, + "step": 155337 + }, + { + "epoch": 1.87, + "grad_norm": 8.416339231868562, + "learning_rate": 2.2786766394449545e-07, + "loss": 1.1114, + "step": 155340 + }, + { + "epoch": 1.87, + "grad_norm": 21.343558573337067, + "learning_rate": 2.2774368300444326e-07, + "loss": 1.1216, + "step": 155343 + }, + { + "epoch": 1.87, + "grad_norm": 16.009176314394086, + "learning_rate": 2.276197354135812e-07, + "loss": 1.2717, + "step": 155346 + }, + { + "epoch": 1.87, + "grad_norm": 3.366245675489469, + "learning_rate": 2.2749582117233216e-07, + "loss": 1.41, + "step": 155349 + }, + { + "epoch": 1.87, + "grad_norm": 8.584318465859793, + "learning_rate": 2.2737194028112142e-07, + "loss": 1.2708, + "step": 155352 + }, + { + "epoch": 1.87, + "grad_norm": 9.411567865367017, + "learning_rate": 2.2724809274036973e-07, + "loss": 1.5769, + "step": 155355 + }, + { + "epoch": 1.87, + "grad_norm": 8.66550961860751, + "learning_rate": 2.2712427855050012e-07, + "loss": 1.5196, + "step": 155358 + }, + { + "epoch": 1.87, + "grad_norm": 12.81978444395044, + "learning_rate": 2.2700049771193443e-07, + "loss": 1.0759, + "step": 155361 + }, + { + "epoch": 1.87, + "grad_norm": 5.831639375229266, + "learning_rate": 2.268767502250968e-07, + "loss": 1.012, + "step": 155364 + }, + { + "epoch": 1.87, + "grad_norm": 2.8681611149662944, + "learning_rate": 2.267530360904091e-07, + "loss": 1.0167, + "step": 155367 + }, + { + "epoch": 1.87, + "grad_norm": 3.841751266675508, + "learning_rate": 2.26629355308291e-07, + "loss": 1.3502, + "step": 155370 + }, + { + "epoch": 1.87, + "grad_norm": 7.824449222436608, + "learning_rate": 2.265057078791677e-07, + "loss": 1.3201, + "step": 155373 + }, + { + "epoch": 1.87, + "grad_norm": 17.225468195792125, + "learning_rate": 2.2638209380346e-07, + "loss": 1.336, + "step": 155376 + }, + { + "epoch": 1.87, + "grad_norm": 5.959786596669249, + "learning_rate": 2.2625851308158976e-07, + "loss": 1.2525, + "step": 155379 + }, + { + "epoch": 1.87, + "grad_norm": 4.9021511212215145, + "learning_rate": 2.2613496571397885e-07, + "loss": 1.2803, + "step": 155382 + }, + { + "epoch": 1.87, + "grad_norm": 3.404761880665409, + "learning_rate": 2.260114517010481e-07, + "loss": 0.9748, + "step": 155385 + }, + { + "epoch": 1.87, + "grad_norm": 4.605446834607317, + "learning_rate": 2.2588797104322046e-07, + "loss": 1.1293, + "step": 155388 + }, + { + "epoch": 1.87, + "grad_norm": 9.484302356844314, + "learning_rate": 2.2576452374091674e-07, + "loss": 1.2711, + "step": 155391 + }, + { + "epoch": 1.87, + "grad_norm": 5.688992825495813, + "learning_rate": 2.2564110979455767e-07, + "loss": 1.4068, + "step": 155394 + }, + { + "epoch": 1.87, + "grad_norm": 10.499300672550055, + "learning_rate": 2.2551772920456406e-07, + "loss": 0.7325, + "step": 155397 + }, + { + "epoch": 1.87, + "grad_norm": 35.892874997416456, + "learning_rate": 2.2539438197135776e-07, + "loss": 0.9123, + "step": 155400 + }, + { + "epoch": 1.87, + "grad_norm": 5.4625681804032356, + "learning_rate": 2.252710680953607e-07, + "loss": 1.0385, + "step": 155403 + }, + { + "epoch": 1.87, + "grad_norm": 10.57558271594562, + "learning_rate": 2.251477875769925e-07, + "loss": 1.3937, + "step": 155406 + }, + { + "epoch": 1.87, + "grad_norm": 7.291464388220039, + "learning_rate": 2.25024540416674e-07, + "loss": 0.8896, + "step": 155409 + }, + { + "epoch": 1.87, + "grad_norm": 14.419156541969238, + "learning_rate": 2.2490132661482588e-07, + "loss": 0.8672, + "step": 155412 + }, + { + "epoch": 1.87, + "grad_norm": 15.979985163855462, + "learning_rate": 2.2477814617186677e-07, + "loss": 0.8955, + "step": 155415 + }, + { + "epoch": 1.87, + "grad_norm": 12.894962303970921, + "learning_rate": 2.2465499908822186e-07, + "loss": 1.0837, + "step": 155418 + }, + { + "epoch": 1.87, + "grad_norm": 4.407765478720328, + "learning_rate": 2.2453188536430638e-07, + "loss": 1.3966, + "step": 155421 + }, + { + "epoch": 1.87, + "grad_norm": 7.393586356827288, + "learning_rate": 2.244088050005422e-07, + "loss": 1.1408, + "step": 155424 + }, + { + "epoch": 1.87, + "grad_norm": 7.150433878093008, + "learning_rate": 2.2428575799735008e-07, + "loss": 1.1027, + "step": 155427 + }, + { + "epoch": 1.87, + "grad_norm": 6.5892098112768105, + "learning_rate": 2.2416274435514974e-07, + "loss": 0.9841, + "step": 155430 + }, + { + "epoch": 1.87, + "grad_norm": 4.0530085300782375, + "learning_rate": 2.240397640743608e-07, + "loss": 1.3206, + "step": 155433 + }, + { + "epoch": 1.87, + "grad_norm": 13.341853329748316, + "learning_rate": 2.2391681715540292e-07, + "loss": 0.9493, + "step": 155436 + }, + { + "epoch": 1.87, + "grad_norm": 8.976605408920257, + "learning_rate": 2.2379390359869468e-07, + "loss": 1.091, + "step": 155439 + }, + { + "epoch": 1.87, + "grad_norm": 14.661289898740755, + "learning_rate": 2.2367102340465686e-07, + "loss": 1.1885, + "step": 155442 + }, + { + "epoch": 1.87, + "grad_norm": 3.209818742887091, + "learning_rate": 2.235481765737091e-07, + "loss": 1.4289, + "step": 155445 + }, + { + "epoch": 1.87, + "grad_norm": 13.00105275686534, + "learning_rate": 2.2342536310626994e-07, + "loss": 1.1001, + "step": 155448 + }, + { + "epoch": 1.87, + "grad_norm": 9.256818160889539, + "learning_rate": 2.2330258300275799e-07, + "loss": 1.1462, + "step": 155451 + }, + { + "epoch": 1.87, + "grad_norm": 8.665368062848382, + "learning_rate": 2.2317983626359285e-07, + "loss": 1.4108, + "step": 155454 + }, + { + "epoch": 1.87, + "grad_norm": 13.321760766321121, + "learning_rate": 2.2305712288919311e-07, + "loss": 1.1901, + "step": 155457 + }, + { + "epoch": 1.87, + "grad_norm": 8.34125896602844, + "learning_rate": 2.2293444287997844e-07, + "loss": 1.0821, + "step": 155460 + }, + { + "epoch": 1.87, + "grad_norm": 12.669947413407375, + "learning_rate": 2.228117962363663e-07, + "loss": 1.055, + "step": 155463 + }, + { + "epoch": 1.87, + "grad_norm": 10.792153457978483, + "learning_rate": 2.2268918295877627e-07, + "loss": 1.1319, + "step": 155466 + }, + { + "epoch": 1.87, + "grad_norm": 5.38881198844878, + "learning_rate": 2.225666030476248e-07, + "loss": 0.8945, + "step": 155469 + }, + { + "epoch": 1.87, + "grad_norm": 11.26271777355231, + "learning_rate": 2.2244405650333368e-07, + "loss": 1.1835, + "step": 155472 + }, + { + "epoch": 1.87, + "grad_norm": 18.540972032614523, + "learning_rate": 2.223215433263193e-07, + "loss": 1.0127, + "step": 155475 + }, + { + "epoch": 1.87, + "grad_norm": 9.68284790735783, + "learning_rate": 2.2219906351699795e-07, + "loss": 0.9554, + "step": 155478 + }, + { + "epoch": 1.87, + "grad_norm": 6.902936613582595, + "learning_rate": 2.220766170757893e-07, + "loss": 1.143, + "step": 155481 + }, + { + "epoch": 1.87, + "grad_norm": 8.041314791502357, + "learning_rate": 2.2195420400311195e-07, + "loss": 1.2007, + "step": 155484 + }, + { + "epoch": 1.87, + "grad_norm": 17.63622714240204, + "learning_rate": 2.218318242993833e-07, + "loss": 0.7786, + "step": 155487 + }, + { + "epoch": 1.87, + "grad_norm": 9.327293446534478, + "learning_rate": 2.2170947796501974e-07, + "loss": 1.5947, + "step": 155490 + }, + { + "epoch": 1.87, + "grad_norm": 15.766842923281573, + "learning_rate": 2.2158716500043974e-07, + "loss": 1.4041, + "step": 155493 + }, + { + "epoch": 1.87, + "grad_norm": 7.108899694918902, + "learning_rate": 2.214648854060608e-07, + "loss": 1.0717, + "step": 155496 + }, + { + "epoch": 1.87, + "grad_norm": 3.649568751509724, + "learning_rate": 2.2134263918230038e-07, + "loss": 0.8104, + "step": 155499 + }, + { + "epoch": 1.87, + "grad_norm": 5.575609227649775, + "learning_rate": 2.2122042632957364e-07, + "loss": 0.921, + "step": 155502 + }, + { + "epoch": 1.87, + "grad_norm": 5.317513850059778, + "learning_rate": 2.2109824684830027e-07, + "loss": 1.0734, + "step": 155505 + }, + { + "epoch": 1.87, + "grad_norm": 9.02338474429413, + "learning_rate": 2.2097610073889664e-07, + "loss": 1.4121, + "step": 155508 + }, + { + "epoch": 1.87, + "grad_norm": 10.289572131838831, + "learning_rate": 2.2085398800177793e-07, + "loss": 0.8726, + "step": 155511 + }, + { + "epoch": 1.87, + "grad_norm": 8.438938178261473, + "learning_rate": 2.2073190863736382e-07, + "loss": 1.2154, + "step": 155514 + }, + { + "epoch": 1.87, + "grad_norm": 6.344431957601093, + "learning_rate": 2.2060986264606953e-07, + "loss": 0.9717, + "step": 155517 + }, + { + "epoch": 1.87, + "grad_norm": 6.316534063922597, + "learning_rate": 2.2048785002830919e-07, + "loss": 0.9448, + "step": 155520 + }, + { + "epoch": 1.87, + "grad_norm": 22.06393019102802, + "learning_rate": 2.2036587078450244e-07, + "loss": 1.3192, + "step": 155523 + }, + { + "epoch": 1.87, + "grad_norm": 3.0851197587584136, + "learning_rate": 2.2024392491506453e-07, + "loss": 0.9032, + "step": 155526 + }, + { + "epoch": 1.87, + "grad_norm": 9.06499086630661, + "learning_rate": 2.2012201242041065e-07, + "loss": 0.938, + "step": 155529 + }, + { + "epoch": 1.87, + "grad_norm": 8.167576579591959, + "learning_rate": 2.2000013330095826e-07, + "loss": 1.2673, + "step": 155532 + }, + { + "epoch": 1.87, + "grad_norm": 6.605608244756009, + "learning_rate": 2.198782875571226e-07, + "loss": 1.1687, + "step": 155535 + }, + { + "epoch": 1.87, + "grad_norm": 16.43449623942458, + "learning_rate": 2.1975647518931997e-07, + "loss": 1.4663, + "step": 155538 + }, + { + "epoch": 1.87, + "grad_norm": 3.8377309742795656, + "learning_rate": 2.1963469619796563e-07, + "loss": 1.2684, + "step": 155541 + }, + { + "epoch": 1.87, + "grad_norm": 5.1653328544980965, + "learning_rate": 2.1951295058347366e-07, + "loss": 1.4458, + "step": 155544 + }, + { + "epoch": 1.87, + "grad_norm": 4.745132020400628, + "learning_rate": 2.1939123834626265e-07, + "loss": 1.0271, + "step": 155547 + }, + { + "epoch": 1.87, + "grad_norm": 8.505258169213587, + "learning_rate": 2.1926955948674554e-07, + "loss": 1.0678, + "step": 155550 + }, + { + "epoch": 1.87, + "grad_norm": 12.9660387632564, + "learning_rate": 2.1914791400533875e-07, + "loss": 1.097, + "step": 155553 + }, + { + "epoch": 1.87, + "grad_norm": 5.292877408296015, + "learning_rate": 2.1902630190245744e-07, + "loss": 0.9729, + "step": 155556 + }, + { + "epoch": 1.87, + "grad_norm": 5.141300840220213, + "learning_rate": 2.1890472317851684e-07, + "loss": 1.1145, + "step": 155559 + }, + { + "epoch": 1.87, + "grad_norm": 5.325963902087894, + "learning_rate": 2.1878317783393e-07, + "loss": 1.1792, + "step": 155562 + }, + { + "epoch": 1.87, + "grad_norm": 4.757538639841392, + "learning_rate": 2.1866166586911208e-07, + "loss": 0.9113, + "step": 155565 + }, + { + "epoch": 1.87, + "grad_norm": 2.8909754386532382, + "learning_rate": 2.1854018728448056e-07, + "loss": 1.3545, + "step": 155568 + }, + { + "epoch": 1.87, + "grad_norm": 4.8003561191828386, + "learning_rate": 2.1841874208044733e-07, + "loss": 1.1314, + "step": 155571 + }, + { + "epoch": 1.87, + "grad_norm": 5.29211792563773, + "learning_rate": 2.182973302574276e-07, + "loss": 1.1908, + "step": 155574 + }, + { + "epoch": 1.87, + "grad_norm": 16.14777035108098, + "learning_rate": 2.181759518158366e-07, + "loss": 1.0022, + "step": 155577 + }, + { + "epoch": 1.87, + "grad_norm": 3.1456784140360257, + "learning_rate": 2.1805460675608848e-07, + "loss": 1.4273, + "step": 155580 + }, + { + "epoch": 1.87, + "grad_norm": 3.7986645964787935, + "learning_rate": 2.1793329507859395e-07, + "loss": 1.206, + "step": 155583 + }, + { + "epoch": 1.87, + "grad_norm": 11.144081834290947, + "learning_rate": 2.1781201678377052e-07, + "loss": 1.0082, + "step": 155586 + }, + { + "epoch": 1.87, + "grad_norm": 6.316852782883952, + "learning_rate": 2.1769077187203114e-07, + "loss": 1.3639, + "step": 155589 + }, + { + "epoch": 1.87, + "grad_norm": 12.391768251122116, + "learning_rate": 2.1756956034378995e-07, + "loss": 1.1134, + "step": 155592 + }, + { + "epoch": 1.87, + "grad_norm": 4.8948987501085925, + "learning_rate": 2.1744838219945997e-07, + "loss": 1.1279, + "step": 155595 + }, + { + "epoch": 1.87, + "grad_norm": 6.3564789328521, + "learning_rate": 2.1732723743945526e-07, + "loss": 0.9419, + "step": 155598 + }, + { + "epoch": 1.87, + "grad_norm": 8.529831625271969, + "learning_rate": 2.1720612606419e-07, + "loss": 1.347, + "step": 155601 + }, + { + "epoch": 1.87, + "grad_norm": 20.187153609941014, + "learning_rate": 2.1708504807407494e-07, + "loss": 1.1404, + "step": 155604 + }, + { + "epoch": 1.87, + "grad_norm": 10.19814125331818, + "learning_rate": 2.1696400346952529e-07, + "loss": 1.4235, + "step": 155607 + }, + { + "epoch": 1.87, + "grad_norm": 10.577783992183324, + "learning_rate": 2.1684299225095297e-07, + "loss": 1.5024, + "step": 155610 + }, + { + "epoch": 1.87, + "grad_norm": 5.080047563823032, + "learning_rate": 2.1672201441877205e-07, + "loss": 0.9958, + "step": 155613 + }, + { + "epoch": 1.87, + "grad_norm": 11.469574572419008, + "learning_rate": 2.1660106997339558e-07, + "loss": 1.1813, + "step": 155616 + }, + { + "epoch": 1.87, + "grad_norm": 6.2026960659769985, + "learning_rate": 2.1648015891523544e-07, + "loss": 1.093, + "step": 155619 + }, + { + "epoch": 1.87, + "grad_norm": 14.720082795916126, + "learning_rate": 2.1635928124470461e-07, + "loss": 1.1511, + "step": 155622 + }, + { + "epoch": 1.87, + "grad_norm": 4.914784994784708, + "learning_rate": 2.1623843696221503e-07, + "loss": 0.953, + "step": 155625 + }, + { + "epoch": 1.87, + "grad_norm": 5.864732160171003, + "learning_rate": 2.1611762606817966e-07, + "loss": 0.7877, + "step": 155628 + }, + { + "epoch": 1.87, + "grad_norm": 5.709755428715045, + "learning_rate": 2.159968485630104e-07, + "loss": 1.1161, + "step": 155631 + }, + { + "epoch": 1.87, + "grad_norm": 8.991179279909472, + "learning_rate": 2.1587610444712138e-07, + "loss": 1.2199, + "step": 155634 + }, + { + "epoch": 1.87, + "grad_norm": 6.847042901358121, + "learning_rate": 2.1575539372092113e-07, + "loss": 1.0102, + "step": 155637 + }, + { + "epoch": 1.87, + "grad_norm": 7.097613340613565, + "learning_rate": 2.156347163848238e-07, + "loss": 1.2207, + "step": 155640 + }, + { + "epoch": 1.87, + "grad_norm": 5.693435920232058, + "learning_rate": 2.1551407243924238e-07, + "loss": 0.929, + "step": 155643 + }, + { + "epoch": 1.87, + "grad_norm": 12.457379444857995, + "learning_rate": 2.1539346188458542e-07, + "loss": 1.0853, + "step": 155646 + }, + { + "epoch": 1.87, + "grad_norm": 6.245385641868823, + "learning_rate": 2.1527288472126706e-07, + "loss": 1.2757, + "step": 155649 + }, + { + "epoch": 1.87, + "grad_norm": 8.498928982666083, + "learning_rate": 2.1515234094969695e-07, + "loss": 1.0855, + "step": 155652 + }, + { + "epoch": 1.87, + "grad_norm": 8.130827889869076, + "learning_rate": 2.150318305702881e-07, + "loss": 1.0759, + "step": 155655 + }, + { + "epoch": 1.87, + "grad_norm": 6.569904835617294, + "learning_rate": 2.1491135358345016e-07, + "loss": 1.2525, + "step": 155658 + }, + { + "epoch": 1.87, + "grad_norm": 6.830785156582932, + "learning_rate": 2.1479090998959618e-07, + "loss": 1.2904, + "step": 155661 + }, + { + "epoch": 1.87, + "grad_norm": 6.048819899014926, + "learning_rate": 2.146704997891358e-07, + "loss": 1.0305, + "step": 155664 + }, + { + "epoch": 1.87, + "grad_norm": 17.10969981965096, + "learning_rate": 2.145501229824809e-07, + "loss": 0.9113, + "step": 155667 + }, + { + "epoch": 1.87, + "grad_norm": 5.062308457766515, + "learning_rate": 2.144297795700412e-07, + "loss": 1.126, + "step": 155670 + }, + { + "epoch": 1.87, + "grad_norm": 6.427746226731954, + "learning_rate": 2.1430946955222853e-07, + "loss": 1.1125, + "step": 155673 + }, + { + "epoch": 1.87, + "grad_norm": 7.047581200954721, + "learning_rate": 2.141891929294526e-07, + "loss": 1.3347, + "step": 155676 + }, + { + "epoch": 1.87, + "grad_norm": 7.766595392982889, + "learning_rate": 2.1406894970212312e-07, + "loss": 1.2369, + "step": 155679 + }, + { + "epoch": 1.87, + "grad_norm": 8.704370661525276, + "learning_rate": 2.139487398706519e-07, + "loss": 0.9605, + "step": 155682 + }, + { + "epoch": 1.87, + "grad_norm": 6.611611148637061, + "learning_rate": 2.1382856343544977e-07, + "loss": 1.1622, + "step": 155685 + }, + { + "epoch": 1.87, + "grad_norm": 10.762916716205593, + "learning_rate": 2.137084203969264e-07, + "loss": 0.7601, + "step": 155688 + }, + { + "epoch": 1.87, + "grad_norm": 2.994155935175424, + "learning_rate": 2.1358831075548926e-07, + "loss": 1.118, + "step": 155691 + }, + { + "epoch": 1.87, + "grad_norm": 6.201047533394678, + "learning_rate": 2.134682345115502e-07, + "loss": 1.1019, + "step": 155694 + }, + { + "epoch": 1.87, + "grad_norm": 4.144198255159643, + "learning_rate": 2.133481916655189e-07, + "loss": 0.8492, + "step": 155697 + }, + { + "epoch": 1.87, + "grad_norm": 5.958516586565646, + "learning_rate": 2.1322818221780616e-07, + "loss": 1.1203, + "step": 155700 + }, + { + "epoch": 1.87, + "grad_norm": 10.147937831108221, + "learning_rate": 2.1310820616882056e-07, + "loss": 1.1142, + "step": 155703 + }, + { + "epoch": 1.87, + "grad_norm": 14.01079465158505, + "learning_rate": 2.129882635189706e-07, + "loss": 1.4657, + "step": 155706 + }, + { + "epoch": 1.87, + "grad_norm": 7.179950219007943, + "learning_rate": 2.1286835426866714e-07, + "loss": 1.0086, + "step": 155709 + }, + { + "epoch": 1.87, + "grad_norm": 4.27460156075838, + "learning_rate": 2.1274847841831758e-07, + "loss": 0.8934, + "step": 155712 + }, + { + "epoch": 1.87, + "grad_norm": 12.34572051232869, + "learning_rate": 2.1262863596833383e-07, + "loss": 1.2426, + "step": 155715 + }, + { + "epoch": 1.87, + "grad_norm": 8.544104871598845, + "learning_rate": 2.125088269191211e-07, + "loss": 1.083, + "step": 155718 + }, + { + "epoch": 1.87, + "grad_norm": 3.612899882644056, + "learning_rate": 2.1238905127109132e-07, + "loss": 1.0088, + "step": 155721 + }, + { + "epoch": 1.87, + "grad_norm": 5.193841674921917, + "learning_rate": 2.1226930902465193e-07, + "loss": 1.0436, + "step": 155724 + }, + { + "epoch": 1.87, + "grad_norm": 12.501459375396621, + "learning_rate": 2.1214960018021257e-07, + "loss": 1.0699, + "step": 155727 + }, + { + "epoch": 1.87, + "grad_norm": 16.311072831561795, + "learning_rate": 2.1202992473818073e-07, + "loss": 1.0444, + "step": 155730 + }, + { + "epoch": 1.87, + "grad_norm": 3.6667448964160587, + "learning_rate": 2.1191028269896496e-07, + "loss": 1.4551, + "step": 155733 + }, + { + "epoch": 1.87, + "grad_norm": 13.476281331215896, + "learning_rate": 2.1179067406297383e-07, + "loss": 1.0389, + "step": 155736 + }, + { + "epoch": 1.87, + "grad_norm": 3.958855912639201, + "learning_rate": 2.1167109883061475e-07, + "loss": 0.796, + "step": 155739 + }, + { + "epoch": 1.87, + "grad_norm": 3.363129981233811, + "learning_rate": 2.1155155700229747e-07, + "loss": 1.0071, + "step": 155742 + }, + { + "epoch": 1.87, + "grad_norm": 29.382773944682008, + "learning_rate": 2.1143204857842935e-07, + "loss": 1.2344, + "step": 155745 + }, + { + "epoch": 1.87, + "grad_norm": 4.926340752899908, + "learning_rate": 2.1131257355941682e-07, + "loss": 1.2578, + "step": 155748 + }, + { + "epoch": 1.87, + "grad_norm": 6.199322003394606, + "learning_rate": 2.1119313194566837e-07, + "loss": 0.9062, + "step": 155751 + }, + { + "epoch": 1.87, + "grad_norm": 3.5520672990796176, + "learning_rate": 2.1107372373759261e-07, + "loss": 0.9356, + "step": 155754 + }, + { + "epoch": 1.87, + "grad_norm": 8.232433541124879, + "learning_rate": 2.1095434893559695e-07, + "loss": 1.1695, + "step": 155757 + }, + { + "epoch": 1.87, + "grad_norm": 8.494294955199166, + "learning_rate": 2.1083500754008668e-07, + "loss": 1.376, + "step": 155760 + }, + { + "epoch": 1.87, + "grad_norm": 5.175376891857644, + "learning_rate": 2.1071569955147032e-07, + "loss": 1.0445, + "step": 155763 + }, + { + "epoch": 1.87, + "grad_norm": 3.590299640783466, + "learning_rate": 2.1059642497015532e-07, + "loss": 1.0531, + "step": 155766 + }, + { + "epoch": 1.87, + "grad_norm": 9.381575083625957, + "learning_rate": 2.1047718379655025e-07, + "loss": 0.9059, + "step": 155769 + }, + { + "epoch": 1.87, + "grad_norm": 14.877424283942194, + "learning_rate": 2.1035797603105922e-07, + "loss": 1.0451, + "step": 155772 + }, + { + "epoch": 1.87, + "grad_norm": 2.5113471364587165, + "learning_rate": 2.1023880167409084e-07, + "loss": 1.2628, + "step": 155775 + }, + { + "epoch": 1.87, + "grad_norm": 5.328327826165654, + "learning_rate": 2.1011966072605027e-07, + "loss": 0.9759, + "step": 155778 + }, + { + "epoch": 1.87, + "grad_norm": 16.991567712794176, + "learning_rate": 2.1000055318734502e-07, + "loss": 1.0172, + "step": 155781 + }, + { + "epoch": 1.87, + "grad_norm": 12.905328518263003, + "learning_rate": 2.0988147905838142e-07, + "loss": 1.1217, + "step": 155784 + }, + { + "epoch": 1.87, + "grad_norm": 14.93385555670143, + "learning_rate": 2.0976243833956688e-07, + "loss": 1.2123, + "step": 155787 + }, + { + "epoch": 1.87, + "grad_norm": 4.281965759119486, + "learning_rate": 2.0964343103130557e-07, + "loss": 1.2644, + "step": 155790 + }, + { + "epoch": 1.87, + "grad_norm": 5.195721975571218, + "learning_rate": 2.0952445713400605e-07, + "loss": 0.9838, + "step": 155793 + }, + { + "epoch": 1.87, + "grad_norm": 11.01767545852708, + "learning_rate": 2.094055166480724e-07, + "loss": 1.1887, + "step": 155796 + }, + { + "epoch": 1.87, + "grad_norm": 33.087082774969474, + "learning_rate": 2.09286609573911e-07, + "loss": 1.3707, + "step": 155799 + }, + { + "epoch": 1.87, + "grad_norm": 6.921060707639857, + "learning_rate": 2.091677359119282e-07, + "loss": 1.1643, + "step": 155802 + }, + { + "epoch": 1.87, + "grad_norm": 3.862740356519323, + "learning_rate": 2.0904889566252807e-07, + "loss": 1.1434, + "step": 155805 + }, + { + "epoch": 1.87, + "grad_norm": 4.114718225550445, + "learning_rate": 2.089300888261181e-07, + "loss": 0.9568, + "step": 155808 + }, + { + "epoch": 1.87, + "grad_norm": 3.3546811730438337, + "learning_rate": 2.0881131540310352e-07, + "loss": 1.117, + "step": 155811 + }, + { + "epoch": 1.87, + "grad_norm": 13.276028760656091, + "learning_rate": 2.0869257539388953e-07, + "loss": 1.2783, + "step": 155814 + }, + { + "epoch": 1.87, + "grad_norm": 3.0927828998958393, + "learning_rate": 2.085738687988803e-07, + "loss": 1.2183, + "step": 155817 + }, + { + "epoch": 1.87, + "grad_norm": 9.258784245822428, + "learning_rate": 2.0845519561848105e-07, + "loss": 1.1528, + "step": 155820 + }, + { + "epoch": 1.87, + "grad_norm": 7.807704571079985, + "learning_rate": 2.083365558530992e-07, + "loss": 1.2333, + "step": 155823 + }, + { + "epoch": 1.87, + "grad_norm": 6.159606930998399, + "learning_rate": 2.0821794950313555e-07, + "loss": 1.0313, + "step": 155826 + }, + { + "epoch": 1.87, + "grad_norm": 14.49706734878759, + "learning_rate": 2.080993765689976e-07, + "loss": 1.4177, + "step": 155829 + }, + { + "epoch": 1.87, + "grad_norm": 8.430073934568025, + "learning_rate": 2.079808370510894e-07, + "loss": 1.1492, + "step": 155832 + }, + { + "epoch": 1.87, + "grad_norm": 1.9177215997661725, + "learning_rate": 2.0786233094981623e-07, + "loss": 1.0832, + "step": 155835 + }, + { + "epoch": 1.87, + "grad_norm": 11.520266350958092, + "learning_rate": 2.0774385826558218e-07, + "loss": 1.266, + "step": 155838 + }, + { + "epoch": 1.87, + "grad_norm": 5.856898506218772, + "learning_rate": 2.076254189987903e-07, + "loss": 1.1958, + "step": 155841 + }, + { + "epoch": 1.87, + "grad_norm": 21.247503545792284, + "learning_rate": 2.075070131498458e-07, + "loss": 1.2055, + "step": 155844 + }, + { + "epoch": 1.87, + "grad_norm": 4.432490595849617, + "learning_rate": 2.0738864071915278e-07, + "loss": 1.2702, + "step": 155847 + }, + { + "epoch": 1.87, + "grad_norm": 12.196648427697587, + "learning_rate": 2.0727030170711537e-07, + "loss": 1.0782, + "step": 155850 + }, + { + "epoch": 1.87, + "grad_norm": 14.971221968300163, + "learning_rate": 2.071519961141355e-07, + "loss": 1.378, + "step": 155853 + }, + { + "epoch": 1.87, + "grad_norm": 10.718821386512074, + "learning_rate": 2.070337239406206e-07, + "loss": 1.1193, + "step": 155856 + }, + { + "epoch": 1.87, + "grad_norm": 8.116267997122648, + "learning_rate": 2.0691548518697147e-07, + "loss": 1.4453, + "step": 155859 + }, + { + "epoch": 1.87, + "grad_norm": 8.094885312870895, + "learning_rate": 2.067972798535911e-07, + "loss": 1.3098, + "step": 155862 + }, + { + "epoch": 1.87, + "grad_norm": 3.1139006570579126, + "learning_rate": 2.0667910794088585e-07, + "loss": 1.0364, + "step": 155865 + }, + { + "epoch": 1.87, + "grad_norm": 4.309586263094729, + "learning_rate": 2.0656096944925652e-07, + "loss": 1.1145, + "step": 155868 + }, + { + "epoch": 1.87, + "grad_norm": 9.333900142460093, + "learning_rate": 2.064428643791072e-07, + "loss": 1.1325, + "step": 155871 + }, + { + "epoch": 1.87, + "grad_norm": 7.124186915155119, + "learning_rate": 2.0632479273083983e-07, + "loss": 1.0877, + "step": 155874 + }, + { + "epoch": 1.87, + "grad_norm": 4.9735902572215425, + "learning_rate": 2.062067545048596e-07, + "loss": 1.1312, + "step": 155877 + }, + { + "epoch": 1.87, + "grad_norm": 15.992252162549333, + "learning_rate": 2.0608874970156843e-07, + "loss": 1.1341, + "step": 155880 + }, + { + "epoch": 1.87, + "grad_norm": 2.602638530297216, + "learning_rate": 2.0597077832136713e-07, + "loss": 1.1975, + "step": 155883 + }, + { + "epoch": 1.87, + "grad_norm": 5.127945822598333, + "learning_rate": 2.058528403646598e-07, + "loss": 0.9426, + "step": 155886 + }, + { + "epoch": 1.87, + "grad_norm": 9.750457991761161, + "learning_rate": 2.0573493583184944e-07, + "loss": 1.0242, + "step": 155889 + }, + { + "epoch": 1.87, + "grad_norm": 12.790901016666545, + "learning_rate": 2.0561706472333797e-07, + "loss": 1.122, + "step": 155892 + }, + { + "epoch": 1.87, + "grad_norm": 2.9808303469364965, + "learning_rate": 2.054992270395273e-07, + "loss": 0.9647, + "step": 155895 + }, + { + "epoch": 1.87, + "grad_norm": 8.225525377250733, + "learning_rate": 2.053814227808204e-07, + "loss": 1.0284, + "step": 155898 + }, + { + "epoch": 1.87, + "grad_norm": 5.553026763853721, + "learning_rate": 2.052636519476181e-07, + "loss": 1.2555, + "step": 155901 + }, + { + "epoch": 1.87, + "grad_norm": 5.448673531062877, + "learning_rate": 2.051459145403234e-07, + "loss": 0.8536, + "step": 155904 + }, + { + "epoch": 1.87, + "grad_norm": 5.85070962600679, + "learning_rate": 2.050282105593371e-07, + "loss": 0.9803, + "step": 155907 + }, + { + "epoch": 1.87, + "grad_norm": 4.642535854540399, + "learning_rate": 2.0491054000506105e-07, + "loss": 1.158, + "step": 155910 + }, + { + "epoch": 1.87, + "grad_norm": 19.0632784621589, + "learning_rate": 2.0479290287789723e-07, + "loss": 1.0125, + "step": 155913 + }, + { + "epoch": 1.87, + "grad_norm": 3.7398695543724516, + "learning_rate": 2.0467529917824747e-07, + "loss": 1.0393, + "step": 155916 + }, + { + "epoch": 1.87, + "grad_norm": 9.323916687884216, + "learning_rate": 2.045577289065126e-07, + "loss": 1.0014, + "step": 155919 + }, + { + "epoch": 1.87, + "grad_norm": 7.569691433583323, + "learning_rate": 2.044401920630945e-07, + "loss": 1.1138, + "step": 155922 + }, + { + "epoch": 1.87, + "grad_norm": 5.94268237611035, + "learning_rate": 2.043226886483929e-07, + "loss": 0.9533, + "step": 155925 + }, + { + "epoch": 1.87, + "grad_norm": 7.761266517805422, + "learning_rate": 2.0420521866280963e-07, + "loss": 1.3197, + "step": 155928 + }, + { + "epoch": 1.88, + "grad_norm": 4.881095139526509, + "learning_rate": 2.0408778210674552e-07, + "loss": 1.0446, + "step": 155931 + }, + { + "epoch": 1.88, + "grad_norm": 11.149783091075262, + "learning_rate": 2.0397037898060135e-07, + "loss": 1.0066, + "step": 155934 + }, + { + "epoch": 1.88, + "grad_norm": 9.863558169927446, + "learning_rate": 2.038530092847779e-07, + "loss": 1.3411, + "step": 155937 + }, + { + "epoch": 1.88, + "grad_norm": 12.855499625313517, + "learning_rate": 2.0373567301967602e-07, + "loss": 1.375, + "step": 155940 + }, + { + "epoch": 1.88, + "grad_norm": 37.000519683724576, + "learning_rate": 2.036183701856942e-07, + "loss": 1.2345, + "step": 155943 + }, + { + "epoch": 1.88, + "grad_norm": 4.385629756172715, + "learning_rate": 2.035011007832366e-07, + "loss": 1.495, + "step": 155946 + }, + { + "epoch": 1.88, + "grad_norm": 8.716602175211413, + "learning_rate": 2.0338386481269844e-07, + "loss": 1.0635, + "step": 155949 + }, + { + "epoch": 1.88, + "grad_norm": 20.611769586125487, + "learning_rate": 2.032666622744839e-07, + "loss": 1.2679, + "step": 155952 + }, + { + "epoch": 1.88, + "grad_norm": 19.994639642536924, + "learning_rate": 2.0314949316899036e-07, + "loss": 1.0965, + "step": 155955 + }, + { + "epoch": 1.88, + "grad_norm": 4.128784343217303, + "learning_rate": 2.0303235749662086e-07, + "loss": 1.0633, + "step": 155958 + }, + { + "epoch": 1.88, + "grad_norm": 5.772329832093843, + "learning_rate": 2.0291525525777178e-07, + "loss": 1.0675, + "step": 155961 + }, + { + "epoch": 1.88, + "grad_norm": 4.496607168652798, + "learning_rate": 2.0279818645284388e-07, + "loss": 1.3545, + "step": 155964 + }, + { + "epoch": 1.88, + "grad_norm": 2.6403085829274477, + "learning_rate": 2.0268115108223685e-07, + "loss": 1.1974, + "step": 155967 + }, + { + "epoch": 1.88, + "grad_norm": 4.458229265499238, + "learning_rate": 2.0256414914635038e-07, + "loss": 1.3467, + "step": 155970 + }, + { + "epoch": 1.88, + "grad_norm": 9.303299815943662, + "learning_rate": 2.024471806455841e-07, + "loss": 1.2971, + "step": 155973 + }, + { + "epoch": 1.88, + "grad_norm": 7.56282413909888, + "learning_rate": 2.0233024558033555e-07, + "loss": 0.9861, + "step": 155976 + }, + { + "epoch": 1.88, + "grad_norm": 4.0374129847507945, + "learning_rate": 2.0221334395100435e-07, + "loss": 1.161, + "step": 155979 + }, + { + "epoch": 1.88, + "grad_norm": 2.591930283767931, + "learning_rate": 2.0209647575799018e-07, + "loss": 0.9292, + "step": 155982 + }, + { + "epoch": 1.88, + "grad_norm": 2.905679794234568, + "learning_rate": 2.0197964100169275e-07, + "loss": 1.4185, + "step": 155985 + }, + { + "epoch": 1.88, + "grad_norm": 29.672957156930497, + "learning_rate": 2.0186283968250951e-07, + "loss": 1.3665, + "step": 155988 + }, + { + "epoch": 1.88, + "grad_norm": 13.304907992265976, + "learning_rate": 2.0174607180083795e-07, + "loss": 1.2276, + "step": 155991 + }, + { + "epoch": 1.88, + "grad_norm": 20.5425423020331, + "learning_rate": 2.016293373570777e-07, + "loss": 0.9108, + "step": 155994 + }, + { + "epoch": 1.88, + "grad_norm": 12.89754249406464, + "learning_rate": 2.0151263635162732e-07, + "loss": 1.1268, + "step": 155997 + }, + { + "epoch": 1.88, + "grad_norm": 6.525898822261879, + "learning_rate": 2.0139596878488543e-07, + "loss": 0.9764, + "step": 156000 + }, + { + "epoch": 1.88, + "grad_norm": 11.40685696503697, + "learning_rate": 2.0127933465724948e-07, + "loss": 1.4029, + "step": 156003 + }, + { + "epoch": 1.88, + "grad_norm": 12.65344435846513, + "learning_rate": 2.01162733969118e-07, + "loss": 1.0112, + "step": 156006 + }, + { + "epoch": 1.88, + "grad_norm": 8.787879366993467, + "learning_rate": 2.010461667208885e-07, + "loss": 1.092, + "step": 156009 + }, + { + "epoch": 1.88, + "grad_norm": 8.598212581578151, + "learning_rate": 2.0092963291295842e-07, + "loss": 1.3428, + "step": 156012 + }, + { + "epoch": 1.88, + "grad_norm": 10.662729115308537, + "learning_rate": 2.0081313254572632e-07, + "loss": 0.9686, + "step": 156015 + }, + { + "epoch": 1.88, + "grad_norm": 4.0443033331700065, + "learning_rate": 2.0069666561958855e-07, + "loss": 1.0322, + "step": 156018 + }, + { + "epoch": 1.88, + "grad_norm": 9.954803254788766, + "learning_rate": 2.005802321349437e-07, + "loss": 1.1309, + "step": 156021 + }, + { + "epoch": 1.88, + "grad_norm": 6.595753155399611, + "learning_rate": 2.0046383209218811e-07, + "loss": 1.3347, + "step": 156024 + }, + { + "epoch": 1.88, + "grad_norm": 11.284275266985846, + "learning_rate": 2.0034746549172145e-07, + "loss": 0.9583, + "step": 156027 + }, + { + "epoch": 1.88, + "grad_norm": 2.6653479082191356, + "learning_rate": 2.0023113233393897e-07, + "loss": 1.3914, + "step": 156030 + }, + { + "epoch": 1.88, + "grad_norm": 4.134993672586738, + "learning_rate": 2.0011483261923592e-07, + "loss": 0.879, + "step": 156033 + }, + { + "epoch": 1.88, + "grad_norm": 8.063383049795446, + "learning_rate": 1.9999856634801196e-07, + "loss": 1.053, + "step": 156036 + }, + { + "epoch": 1.88, + "grad_norm": 4.054189496537673, + "learning_rate": 1.9988233352066343e-07, + "loss": 1.4897, + "step": 156039 + }, + { + "epoch": 1.88, + "grad_norm": 10.616903314129924, + "learning_rate": 1.997661341375867e-07, + "loss": 1.453, + "step": 156042 + }, + { + "epoch": 1.88, + "grad_norm": 6.665151128562802, + "learning_rate": 1.9964996819917703e-07, + "loss": 1.0986, + "step": 156045 + }, + { + "epoch": 1.88, + "grad_norm": 9.04168538706934, + "learning_rate": 1.9953383570583295e-07, + "loss": 0.9814, + "step": 156048 + }, + { + "epoch": 1.88, + "grad_norm": 7.208965532139582, + "learning_rate": 1.994177366579486e-07, + "loss": 1.0109, + "step": 156051 + }, + { + "epoch": 1.88, + "grad_norm": 3.140588075445672, + "learning_rate": 1.9930167105592367e-07, + "loss": 1.0811, + "step": 156054 + }, + { + "epoch": 1.88, + "grad_norm": 5.232535968110121, + "learning_rate": 1.9918563890015007e-07, + "loss": 1.4554, + "step": 156057 + }, + { + "epoch": 1.88, + "grad_norm": 3.5042859236891446, + "learning_rate": 1.9906964019102527e-07, + "loss": 1.2308, + "step": 156060 + }, + { + "epoch": 1.88, + "grad_norm": 19.25033795073883, + "learning_rate": 1.9895367492894556e-07, + "loss": 1.1376, + "step": 156063 + }, + { + "epoch": 1.88, + "grad_norm": 6.155377078286642, + "learning_rate": 1.988377431143085e-07, + "loss": 1.1081, + "step": 156066 + }, + { + "epoch": 1.88, + "grad_norm": 3.4429273926962263, + "learning_rate": 1.9872184474750588e-07, + "loss": 0.9609, + "step": 156069 + }, + { + "epoch": 1.88, + "grad_norm": 7.680026995692202, + "learning_rate": 1.9860597982893638e-07, + "loss": 1.0124, + "step": 156072 + }, + { + "epoch": 1.88, + "grad_norm": 10.74038246378838, + "learning_rate": 1.9849014835899406e-07, + "loss": 1.1831, + "step": 156075 + }, + { + "epoch": 1.88, + "grad_norm": 3.334173283543421, + "learning_rate": 1.9837435033807306e-07, + "loss": 0.8254, + "step": 156078 + }, + { + "epoch": 1.88, + "grad_norm": 8.246280423745446, + "learning_rate": 1.9825858576657197e-07, + "loss": 0.976, + "step": 156081 + }, + { + "epoch": 1.88, + "grad_norm": 5.563961465859852, + "learning_rate": 1.9814285464488158e-07, + "loss": 1.2235, + "step": 156084 + }, + { + "epoch": 1.88, + "grad_norm": 9.741298443287368, + "learning_rate": 1.9802715697340046e-07, + "loss": 1.352, + "step": 156087 + }, + { + "epoch": 1.88, + "grad_norm": 2.953750035238312, + "learning_rate": 1.9791149275252164e-07, + "loss": 1.0228, + "step": 156090 + }, + { + "epoch": 1.88, + "grad_norm": 6.236463617319725, + "learning_rate": 1.9779586198264144e-07, + "loss": 1.145, + "step": 156093 + }, + { + "epoch": 1.88, + "grad_norm": 7.810315981133315, + "learning_rate": 1.976802646641518e-07, + "loss": 1.2112, + "step": 156096 + }, + { + "epoch": 1.88, + "grad_norm": 13.302654498331881, + "learning_rate": 1.9756470079744794e-07, + "loss": 1.1432, + "step": 156099 + }, + { + "epoch": 1.88, + "grad_norm": 10.34219402771059, + "learning_rate": 1.9744917038292622e-07, + "loss": 0.8281, + "step": 156102 + }, + { + "epoch": 1.88, + "grad_norm": 8.833350118954035, + "learning_rate": 1.9733367342097854e-07, + "loss": 1.1951, + "step": 156105 + }, + { + "epoch": 1.88, + "grad_norm": 4.151518841029893, + "learning_rate": 1.9721820991200125e-07, + "loss": 1.0759, + "step": 156108 + }, + { + "epoch": 1.88, + "grad_norm": 4.175658915073653, + "learning_rate": 1.9710277985638737e-07, + "loss": 1.0035, + "step": 156111 + }, + { + "epoch": 1.88, + "grad_norm": 5.53294483290782, + "learning_rate": 1.9698738325452994e-07, + "loss": 1.3352, + "step": 156114 + }, + { + "epoch": 1.88, + "grad_norm": 7.181108625791518, + "learning_rate": 1.9687202010682415e-07, + "loss": 1.1175, + "step": 156117 + }, + { + "epoch": 1.88, + "grad_norm": 7.4727309455525095, + "learning_rate": 1.9675669041366308e-07, + "loss": 0.911, + "step": 156120 + }, + { + "epoch": 1.88, + "grad_norm": 8.527079031625231, + "learning_rate": 1.966413941754397e-07, + "loss": 1.0788, + "step": 156123 + }, + { + "epoch": 1.88, + "grad_norm": 12.87342677147367, + "learning_rate": 1.9652613139254818e-07, + "loss": 0.9816, + "step": 156126 + }, + { + "epoch": 1.88, + "grad_norm": 8.819943748212076, + "learning_rate": 1.9641090206538148e-07, + "loss": 1.1096, + "step": 156129 + }, + { + "epoch": 1.88, + "grad_norm": 8.115508995665353, + "learning_rate": 1.962957061943338e-07, + "loss": 1.1535, + "step": 156132 + }, + { + "epoch": 1.88, + "grad_norm": 8.131204549847688, + "learning_rate": 1.961805437797981e-07, + "loss": 1.0972, + "step": 156135 + }, + { + "epoch": 1.88, + "grad_norm": 1.9675577249018146, + "learning_rate": 1.9606541482216633e-07, + "loss": 1.1178, + "step": 156138 + }, + { + "epoch": 1.88, + "grad_norm": 5.131995678874539, + "learning_rate": 1.9595031932183152e-07, + "loss": 1.3331, + "step": 156141 + }, + { + "epoch": 1.88, + "grad_norm": 10.897417130002662, + "learning_rate": 1.9583525727918662e-07, + "loss": 0.8319, + "step": 156144 + }, + { + "epoch": 1.88, + "grad_norm": 12.026959966543146, + "learning_rate": 1.9572022869462582e-07, + "loss": 1.337, + "step": 156147 + }, + { + "epoch": 1.88, + "grad_norm": 7.712452883845494, + "learning_rate": 1.9560523356853877e-07, + "loss": 1.189, + "step": 156150 + }, + { + "epoch": 1.88, + "grad_norm": 9.923538370818838, + "learning_rate": 1.9549027190132076e-07, + "loss": 0.9606, + "step": 156153 + }, + { + "epoch": 1.88, + "grad_norm": 10.378871778289454, + "learning_rate": 1.9537534369336252e-07, + "loss": 1.3888, + "step": 156156 + }, + { + "epoch": 1.88, + "grad_norm": 12.20258057620817, + "learning_rate": 1.9526044894505602e-07, + "loss": 1.0061, + "step": 156159 + }, + { + "epoch": 1.88, + "grad_norm": 9.027972384143503, + "learning_rate": 1.9514558765679536e-07, + "loss": 1.4652, + "step": 156162 + }, + { + "epoch": 1.88, + "grad_norm": 6.3881518617573, + "learning_rate": 1.9503075982897023e-07, + "loss": 1.2347, + "step": 156165 + }, + { + "epoch": 1.88, + "grad_norm": 6.213905380364043, + "learning_rate": 1.9491596546197368e-07, + "loss": 0.923, + "step": 156168 + }, + { + "epoch": 1.88, + "grad_norm": 5.750469669926085, + "learning_rate": 1.9480120455619645e-07, + "loss": 0.8967, + "step": 156171 + }, + { + "epoch": 1.88, + "grad_norm": 8.814157124576967, + "learning_rate": 1.9468647711203158e-07, + "loss": 0.9373, + "step": 156174 + }, + { + "epoch": 1.88, + "grad_norm": 5.48319067528175, + "learning_rate": 1.945717831298699e-07, + "loss": 1.2213, + "step": 156177 + }, + { + "epoch": 1.88, + "grad_norm": 3.4255065141773233, + "learning_rate": 1.944571226101033e-07, + "loss": 1.5627, + "step": 156180 + }, + { + "epoch": 1.88, + "grad_norm": 6.968243232216597, + "learning_rate": 1.9434249555312146e-07, + "loss": 1.0311, + "step": 156183 + }, + { + "epoch": 1.88, + "grad_norm": 9.414971809038828, + "learning_rate": 1.942279019593163e-07, + "loss": 1.1909, + "step": 156186 + }, + { + "epoch": 1.88, + "grad_norm": 5.979245039908437, + "learning_rate": 1.941133418290808e-07, + "loss": 1.4359, + "step": 156189 + }, + { + "epoch": 1.88, + "grad_norm": 8.934951845742798, + "learning_rate": 1.939988151628036e-07, + "loss": 0.9077, + "step": 156192 + }, + { + "epoch": 1.88, + "grad_norm": 3.3088303783951267, + "learning_rate": 1.9388432196087657e-07, + "loss": 1.1195, + "step": 156195 + }, + { + "epoch": 1.88, + "grad_norm": 6.320652324981846, + "learning_rate": 1.9376986222369055e-07, + "loss": 1.0609, + "step": 156198 + }, + { + "epoch": 1.88, + "grad_norm": 15.434267961978595, + "learning_rate": 1.9365543595163626e-07, + "loss": 0.8126, + "step": 156201 + }, + { + "epoch": 1.88, + "grad_norm": 7.45784953859216, + "learning_rate": 1.9354104314510235e-07, + "loss": 1.1595, + "step": 156204 + }, + { + "epoch": 1.88, + "grad_norm": 14.34260783043316, + "learning_rate": 1.934266838044807e-07, + "loss": 1.2665, + "step": 156207 + }, + { + "epoch": 1.88, + "grad_norm": 2.788121885444818, + "learning_rate": 1.933123579301621e-07, + "loss": 1.145, + "step": 156210 + }, + { + "epoch": 1.88, + "grad_norm": 17.558222284633597, + "learning_rate": 1.9319806552253516e-07, + "loss": 0.9188, + "step": 156213 + }, + { + "epoch": 1.88, + "grad_norm": 23.447401695748024, + "learning_rate": 1.9308380658199178e-07, + "loss": 1.2897, + "step": 156216 + }, + { + "epoch": 1.88, + "grad_norm": 3.7552031407862314, + "learning_rate": 1.929695811089205e-07, + "loss": 1.1501, + "step": 156219 + }, + { + "epoch": 1.88, + "grad_norm": 4.321388225617809, + "learning_rate": 1.9285538910371217e-07, + "loss": 1.0354, + "step": 156222 + }, + { + "epoch": 1.88, + "grad_norm": 11.145268319443625, + "learning_rate": 1.927412305667553e-07, + "loss": 1.2579, + "step": 156225 + }, + { + "epoch": 1.88, + "grad_norm": 7.9271996727219625, + "learning_rate": 1.9262710549844078e-07, + "loss": 1.0224, + "step": 156228 + }, + { + "epoch": 1.88, + "grad_norm": 3.9799014902325753, + "learning_rate": 1.925130138991571e-07, + "loss": 1.0253, + "step": 156231 + }, + { + "epoch": 1.88, + "grad_norm": 7.099558002329974, + "learning_rate": 1.923989557692929e-07, + "loss": 1.2618, + "step": 156234 + }, + { + "epoch": 1.88, + "grad_norm": 13.765865836523595, + "learning_rate": 1.9228493110923895e-07, + "loss": 1.0839, + "step": 156237 + }, + { + "epoch": 1.88, + "grad_norm": 4.260416095229906, + "learning_rate": 1.9217093991938385e-07, + "loss": 1.1559, + "step": 156240 + }, + { + "epoch": 1.88, + "grad_norm": 6.5332171861516, + "learning_rate": 1.9205698220011725e-07, + "loss": 1.1271, + "step": 156243 + }, + { + "epoch": 1.88, + "grad_norm": 5.776693163955926, + "learning_rate": 1.9194305795182665e-07, + "loss": 1.0959, + "step": 156246 + }, + { + "epoch": 1.88, + "grad_norm": 6.2834458193589615, + "learning_rate": 1.9182916717490176e-07, + "loss": 1.0878, + "step": 156249 + }, + { + "epoch": 1.88, + "grad_norm": 7.493240841771006, + "learning_rate": 1.9171530986972997e-07, + "loss": 0.9733, + "step": 156252 + }, + { + "epoch": 1.88, + "grad_norm": 15.203380983321864, + "learning_rate": 1.9160148603670214e-07, + "loss": 1.385, + "step": 156255 + }, + { + "epoch": 1.88, + "grad_norm": 7.013141335013614, + "learning_rate": 1.9148769567620462e-07, + "loss": 1.2174, + "step": 156258 + }, + { + "epoch": 1.88, + "grad_norm": 5.860927950241259, + "learning_rate": 1.9137393878862708e-07, + "loss": 0.9771, + "step": 156261 + }, + { + "epoch": 1.88, + "grad_norm": 5.401949832666954, + "learning_rate": 1.9126021537435811e-07, + "loss": 1.1143, + "step": 156264 + }, + { + "epoch": 1.88, + "grad_norm": 3.0438408917522732, + "learning_rate": 1.9114652543378408e-07, + "loss": 1.2124, + "step": 156267 + }, + { + "epoch": 1.88, + "grad_norm": 15.929227620288625, + "learning_rate": 1.9103286896729356e-07, + "loss": 0.9532, + "step": 156270 + }, + { + "epoch": 1.88, + "grad_norm": 7.766481118919031, + "learning_rate": 1.9091924597527512e-07, + "loss": 1.2082, + "step": 156273 + }, + { + "epoch": 1.88, + "grad_norm": 4.943592267988989, + "learning_rate": 1.9080565645811511e-07, + "loss": 1.3002, + "step": 156276 + }, + { + "epoch": 1.88, + "grad_norm": 4.9302215550510455, + "learning_rate": 1.9069210041620323e-07, + "loss": 1.2131, + "step": 156279 + }, + { + "epoch": 1.88, + "grad_norm": 11.349067865958434, + "learning_rate": 1.9057857784992583e-07, + "loss": 1.0082, + "step": 156282 + }, + { + "epoch": 1.88, + "grad_norm": 5.71395361977391, + "learning_rate": 1.904650887596715e-07, + "loss": 0.7174, + "step": 156285 + }, + { + "epoch": 1.88, + "grad_norm": 5.985725135127095, + "learning_rate": 1.903516331458244e-07, + "loss": 1.0161, + "step": 156288 + }, + { + "epoch": 1.88, + "grad_norm": 7.040120744843482, + "learning_rate": 1.9023821100877416e-07, + "loss": 1.1316, + "step": 156291 + }, + { + "epoch": 1.88, + "grad_norm": 10.729812042942022, + "learning_rate": 1.9012482234890717e-07, + "loss": 1.2446, + "step": 156294 + }, + { + "epoch": 1.88, + "grad_norm": 8.000817814682124, + "learning_rate": 1.90011467166612e-07, + "loss": 0.9389, + "step": 156297 + }, + { + "epoch": 1.88, + "grad_norm": 6.024317436205644, + "learning_rate": 1.8989814546227392e-07, + "loss": 0.8529, + "step": 156300 + }, + { + "epoch": 1.88, + "grad_norm": 5.451211547167924, + "learning_rate": 1.8978485723627816e-07, + "loss": 1.1652, + "step": 156303 + }, + { + "epoch": 1.88, + "grad_norm": 19.334215539172625, + "learning_rate": 1.896716024890155e-07, + "loss": 1.222, + "step": 156306 + }, + { + "epoch": 1.88, + "grad_norm": 6.225410418893396, + "learning_rate": 1.89558381220869e-07, + "loss": 0.7812, + "step": 156309 + }, + { + "epoch": 1.88, + "grad_norm": 2.1869873220343354, + "learning_rate": 1.8944519343222612e-07, + "loss": 1.2844, + "step": 156312 + }, + { + "epoch": 1.88, + "grad_norm": 3.2505888098093867, + "learning_rate": 1.893320391234721e-07, + "loss": 1.0552, + "step": 156315 + }, + { + "epoch": 1.88, + "grad_norm": 7.485649296429021, + "learning_rate": 1.8921891829499438e-07, + "loss": 1.3676, + "step": 156318 + }, + { + "epoch": 1.88, + "grad_norm": 3.9064270132457253, + "learning_rate": 1.8910583094717828e-07, + "loss": 1.6318, + "step": 156321 + }, + { + "epoch": 1.88, + "grad_norm": 7.2677300859697205, + "learning_rate": 1.889927770804123e-07, + "loss": 1.1218, + "step": 156324 + }, + { + "epoch": 1.88, + "grad_norm": 4.512138271997244, + "learning_rate": 1.888797566950784e-07, + "loss": 0.8719, + "step": 156327 + }, + { + "epoch": 1.88, + "grad_norm": 10.838482513371575, + "learning_rate": 1.8876676979156406e-07, + "loss": 1.3382, + "step": 156330 + }, + { + "epoch": 1.88, + "grad_norm": 7.992619053994215, + "learning_rate": 1.8865381637025447e-07, + "loss": 1.4091, + "step": 156333 + }, + { + "epoch": 1.88, + "grad_norm": 5.249568598096033, + "learning_rate": 1.8854089643153606e-07, + "loss": 1.2193, + "step": 156336 + }, + { + "epoch": 1.88, + "grad_norm": 7.464712643859105, + "learning_rate": 1.8842800997579292e-07, + "loss": 1.3983, + "step": 156339 + }, + { + "epoch": 1.88, + "grad_norm": 4.604389672000767, + "learning_rate": 1.8831515700341141e-07, + "loss": 1.0493, + "step": 156342 + }, + { + "epoch": 1.88, + "grad_norm": 14.73911476308538, + "learning_rate": 1.8820233751477568e-07, + "loss": 1.167, + "step": 156345 + }, + { + "epoch": 1.88, + "grad_norm": 10.439621729602688, + "learning_rate": 1.8808955151027096e-07, + "loss": 1.2809, + "step": 156348 + }, + { + "epoch": 1.88, + "grad_norm": 4.271047892561919, + "learning_rate": 1.8797679899028366e-07, + "loss": 1.2353, + "step": 156351 + }, + { + "epoch": 1.88, + "grad_norm": 4.691024159316906, + "learning_rate": 1.8786407995519563e-07, + "loss": 1.3, + "step": 156354 + }, + { + "epoch": 1.88, + "grad_norm": 15.550454220188842, + "learning_rate": 1.8775139440539436e-07, + "loss": 1.0469, + "step": 156357 + }, + { + "epoch": 1.88, + "grad_norm": 9.39752524022576, + "learning_rate": 1.8763874234126179e-07, + "loss": 0.8653, + "step": 156360 + }, + { + "epoch": 1.88, + "grad_norm": 31.677055948544172, + "learning_rate": 1.8752612376318535e-07, + "loss": 1.0567, + "step": 156363 + }, + { + "epoch": 1.88, + "grad_norm": 9.217586301377544, + "learning_rate": 1.87413538671547e-07, + "loss": 1.1498, + "step": 156366 + }, + { + "epoch": 1.88, + "grad_norm": 3.5254547883936924, + "learning_rate": 1.8730098706673306e-07, + "loss": 1.4336, + "step": 156369 + }, + { + "epoch": 1.88, + "grad_norm": 7.267409893005929, + "learning_rate": 1.8718846894912434e-07, + "loss": 1.2964, + "step": 156372 + }, + { + "epoch": 1.88, + "grad_norm": 10.265034295324257, + "learning_rate": 1.8707598431910722e-07, + "loss": 1.3989, + "step": 156375 + }, + { + "epoch": 1.88, + "grad_norm": 14.058688115054787, + "learning_rate": 1.8696353317706695e-07, + "loss": 1.065, + "step": 156378 + }, + { + "epoch": 1.88, + "grad_norm": 16.77332110480771, + "learning_rate": 1.868511155233832e-07, + "loss": 1.0019, + "step": 156381 + }, + { + "epoch": 1.88, + "grad_norm": 5.34708231125437, + "learning_rate": 1.8673873135844345e-07, + "loss": 1.1196, + "step": 156384 + }, + { + "epoch": 1.88, + "grad_norm": 13.81371481743135, + "learning_rate": 1.866263806826285e-07, + "loss": 1.2039, + "step": 156387 + }, + { + "epoch": 1.88, + "grad_norm": 12.464059392863136, + "learning_rate": 1.8651406349632471e-07, + "loss": 1.3389, + "step": 156390 + }, + { + "epoch": 1.88, + "grad_norm": 9.630158231618143, + "learning_rate": 1.8640177979991292e-07, + "loss": 1.1333, + "step": 156393 + }, + { + "epoch": 1.88, + "grad_norm": 6.093017677375486, + "learning_rate": 1.862895295937761e-07, + "loss": 1.175, + "step": 156396 + }, + { + "epoch": 1.88, + "grad_norm": 14.433047978263419, + "learning_rate": 1.8617731287829842e-07, + "loss": 0.9677, + "step": 156399 + }, + { + "epoch": 1.88, + "grad_norm": 11.660854498025534, + "learning_rate": 1.860651296538629e-07, + "loss": 0.928, + "step": 156402 + }, + { + "epoch": 1.88, + "grad_norm": 6.062111367947888, + "learning_rate": 1.8595297992085258e-07, + "loss": 1.1223, + "step": 156405 + }, + { + "epoch": 1.88, + "grad_norm": 3.3244460991053257, + "learning_rate": 1.8584086367964937e-07, + "loss": 1.1144, + "step": 156408 + }, + { + "epoch": 1.88, + "grad_norm": 17.252420092050084, + "learning_rate": 1.857287809306363e-07, + "loss": 1.0409, + "step": 156411 + }, + { + "epoch": 1.88, + "grad_norm": 10.498333364140679, + "learning_rate": 1.8561673167419526e-07, + "loss": 0.9466, + "step": 156414 + }, + { + "epoch": 1.88, + "grad_norm": 4.524492895961957, + "learning_rate": 1.855047159107104e-07, + "loss": 1.1748, + "step": 156417 + }, + { + "epoch": 1.88, + "grad_norm": 26.49105586869575, + "learning_rate": 1.8539273364056253e-07, + "loss": 0.9671, + "step": 156420 + }, + { + "epoch": 1.88, + "grad_norm": 10.928336757560578, + "learning_rate": 1.852807848641325e-07, + "loss": 0.8359, + "step": 156423 + }, + { + "epoch": 1.88, + "grad_norm": 7.446090856788252, + "learning_rate": 1.8516886958180547e-07, + "loss": 1.4087, + "step": 156426 + }, + { + "epoch": 1.88, + "grad_norm": 18.24252018648007, + "learning_rate": 1.850569877939612e-07, + "loss": 1.4468, + "step": 156429 + }, + { + "epoch": 1.88, + "grad_norm": 12.245431922895117, + "learning_rate": 1.8494513950098269e-07, + "loss": 1.0349, + "step": 156432 + }, + { + "epoch": 1.88, + "grad_norm": 19.508600054899354, + "learning_rate": 1.8483332470325078e-07, + "loss": 1.1569, + "step": 156435 + }, + { + "epoch": 1.88, + "grad_norm": 12.844645013873272, + "learning_rate": 1.8472154340114735e-07, + "loss": 1.1906, + "step": 156438 + }, + { + "epoch": 1.88, + "grad_norm": 5.123158292043922, + "learning_rate": 1.846097955950532e-07, + "loss": 1.1621, + "step": 156441 + }, + { + "epoch": 1.88, + "grad_norm": 26.80993873056943, + "learning_rate": 1.8449808128535142e-07, + "loss": 1.2336, + "step": 156444 + }, + { + "epoch": 1.88, + "grad_norm": 5.865173063235579, + "learning_rate": 1.8438640047242052e-07, + "loss": 0.9912, + "step": 156447 + }, + { + "epoch": 1.88, + "grad_norm": 13.59166691327848, + "learning_rate": 1.8427475315664467e-07, + "loss": 1.1415, + "step": 156450 + }, + { + "epoch": 1.88, + "grad_norm": 5.027333069625066, + "learning_rate": 1.8416313933840357e-07, + "loss": 1.2922, + "step": 156453 + }, + { + "epoch": 1.88, + "grad_norm": 10.321220102552127, + "learning_rate": 1.8405155901807693e-07, + "loss": 1.0978, + "step": 156456 + }, + { + "epoch": 1.88, + "grad_norm": 2.1164364278245635, + "learning_rate": 1.8394001219604774e-07, + "loss": 0.8201, + "step": 156459 + }, + { + "epoch": 1.88, + "grad_norm": 3.6441936456676602, + "learning_rate": 1.838284988726946e-07, + "loss": 1.206, + "step": 156462 + }, + { + "epoch": 1.88, + "grad_norm": 7.302792473477041, + "learning_rate": 1.8371701904839946e-07, + "loss": 1.0512, + "step": 156465 + }, + { + "epoch": 1.88, + "grad_norm": 4.124989570827549, + "learning_rate": 1.8360557272354197e-07, + "loss": 1.1165, + "step": 156468 + }, + { + "epoch": 1.88, + "grad_norm": 11.630751693618159, + "learning_rate": 1.8349415989850405e-07, + "loss": 1.2859, + "step": 156471 + }, + { + "epoch": 1.88, + "grad_norm": 5.620035853136002, + "learning_rate": 1.8338278057366322e-07, + "loss": 1.2858, + "step": 156474 + }, + { + "epoch": 1.88, + "grad_norm": 2.7666077620208154, + "learning_rate": 1.8327143474940246e-07, + "loss": 1.2899, + "step": 156477 + }, + { + "epoch": 1.88, + "grad_norm": 3.062472962156823, + "learning_rate": 1.8316012242609926e-07, + "loss": 1.163, + "step": 156480 + }, + { + "epoch": 1.88, + "grad_norm": 11.650685350081437, + "learning_rate": 1.8304884360413444e-07, + "loss": 1.125, + "step": 156483 + }, + { + "epoch": 1.88, + "grad_norm": 4.267834602185572, + "learning_rate": 1.829375982838888e-07, + "loss": 0.8179, + "step": 156486 + }, + { + "epoch": 1.88, + "grad_norm": 2.5835222557584845, + "learning_rate": 1.828263864657398e-07, + "loss": 0.8666, + "step": 156489 + }, + { + "epoch": 1.88, + "grad_norm": 12.007755539657929, + "learning_rate": 1.8271520815006826e-07, + "loss": 1.2737, + "step": 156492 + }, + { + "epoch": 1.88, + "grad_norm": 6.325197105486165, + "learning_rate": 1.82604063337255e-07, + "loss": 1.0161, + "step": 156495 + }, + { + "epoch": 1.88, + "grad_norm": 4.34074057757581, + "learning_rate": 1.8249295202767747e-07, + "loss": 1.353, + "step": 156498 + }, + { + "epoch": 1.88, + "grad_norm": 5.7206935535419525, + "learning_rate": 1.823818742217154e-07, + "loss": 1.1695, + "step": 156501 + }, + { + "epoch": 1.88, + "grad_norm": 2.310901841844998, + "learning_rate": 1.8227082991974622e-07, + "loss": 0.8663, + "step": 156504 + }, + { + "epoch": 1.88, + "grad_norm": 2.8790635699095577, + "learning_rate": 1.8215981912215074e-07, + "loss": 1.0845, + "step": 156507 + }, + { + "epoch": 1.88, + "grad_norm": 11.028310599736677, + "learning_rate": 1.820488418293076e-07, + "loss": 1.1445, + "step": 156510 + }, + { + "epoch": 1.88, + "grad_norm": 9.659130562918463, + "learning_rate": 1.8193789804159646e-07, + "loss": 1.3225, + "step": 156513 + }, + { + "epoch": 1.88, + "grad_norm": 15.125861794506125, + "learning_rate": 1.8182698775939366e-07, + "loss": 1.0155, + "step": 156516 + }, + { + "epoch": 1.88, + "grad_norm": 6.3273303209968725, + "learning_rate": 1.8171611098308006e-07, + "loss": 1.4387, + "step": 156519 + }, + { + "epoch": 1.88, + "grad_norm": 6.542500834896053, + "learning_rate": 1.81605267713032e-07, + "loss": 0.9471, + "step": 156522 + }, + { + "epoch": 1.88, + "grad_norm": 2.576343637020241, + "learning_rate": 1.8149445794962917e-07, + "loss": 1.295, + "step": 156525 + }, + { + "epoch": 1.88, + "grad_norm": 5.7074286955519895, + "learning_rate": 1.8138368169324793e-07, + "loss": 1.1631, + "step": 156528 + }, + { + "epoch": 1.88, + "grad_norm": 6.968637020007148, + "learning_rate": 1.81272938944268e-07, + "loss": 0.9995, + "step": 156531 + }, + { + "epoch": 1.88, + "grad_norm": 10.004742619325008, + "learning_rate": 1.8116222970306686e-07, + "loss": 1.0409, + "step": 156534 + }, + { + "epoch": 1.88, + "grad_norm": 16.083212267950977, + "learning_rate": 1.8105155397002305e-07, + "loss": 1.1141, + "step": 156537 + }, + { + "epoch": 1.88, + "grad_norm": 8.954059247196815, + "learning_rate": 1.8094091174551298e-07, + "loss": 1.2725, + "step": 156540 + }, + { + "epoch": 1.88, + "grad_norm": 7.303944321044767, + "learning_rate": 1.8083030302991522e-07, + "loss": 1.2088, + "step": 156543 + }, + { + "epoch": 1.88, + "grad_norm": 7.624802145398811, + "learning_rate": 1.8071972782360614e-07, + "loss": 1.4845, + "step": 156546 + }, + { + "epoch": 1.88, + "grad_norm": 7.814853629319218, + "learning_rate": 1.806091861269632e-07, + "loss": 1.2059, + "step": 156549 + }, + { + "epoch": 1.88, + "grad_norm": 12.926682806627912, + "learning_rate": 1.8049867794036502e-07, + "loss": 1.2194, + "step": 156552 + }, + { + "epoch": 1.88, + "grad_norm": 6.154240911405442, + "learning_rate": 1.8038820326418794e-07, + "loss": 1.3721, + "step": 156555 + }, + { + "epoch": 1.88, + "grad_norm": 3.0754325489567242, + "learning_rate": 1.8027776209880833e-07, + "loss": 1.0501, + "step": 156558 + }, + { + "epoch": 1.88, + "grad_norm": 5.168404167882823, + "learning_rate": 1.8016735444460475e-07, + "loss": 1.1432, + "step": 156561 + }, + { + "epoch": 1.88, + "grad_norm": 15.203538054084316, + "learning_rate": 1.800569803019514e-07, + "loss": 1.5147, + "step": 156564 + }, + { + "epoch": 1.88, + "grad_norm": 7.4042266059063335, + "learning_rate": 1.7994663967122793e-07, + "loss": 0.756, + "step": 156567 + }, + { + "epoch": 1.88, + "grad_norm": 3.5851128718620635, + "learning_rate": 1.7983633255280852e-07, + "loss": 1.0264, + "step": 156570 + }, + { + "epoch": 1.88, + "grad_norm": 10.685857659417767, + "learning_rate": 1.7972605894707064e-07, + "loss": 1.1038, + "step": 156573 + }, + { + "epoch": 1.88, + "grad_norm": 10.05578264034682, + "learning_rate": 1.796158188543906e-07, + "loss": 0.8538, + "step": 156576 + }, + { + "epoch": 1.88, + "grad_norm": 7.452599209866291, + "learning_rate": 1.7950561227514485e-07, + "loss": 1.0717, + "step": 156579 + }, + { + "epoch": 1.88, + "grad_norm": 7.18347991869209, + "learning_rate": 1.7939543920970858e-07, + "loss": 1.1479, + "step": 156582 + }, + { + "epoch": 1.88, + "grad_norm": 15.862187020507996, + "learning_rate": 1.792852996584582e-07, + "loss": 1.3475, + "step": 156585 + }, + { + "epoch": 1.88, + "grad_norm": 3.1036442145211143, + "learning_rate": 1.7917519362177004e-07, + "loss": 0.9302, + "step": 156588 + }, + { + "epoch": 1.88, + "grad_norm": 4.408567072894184, + "learning_rate": 1.7906512110001938e-07, + "loss": 1.0712, + "step": 156591 + }, + { + "epoch": 1.88, + "grad_norm": 10.840408139448183, + "learning_rate": 1.789550820935826e-07, + "loss": 0.9857, + "step": 156594 + }, + { + "epoch": 1.88, + "grad_norm": 7.409002615954557, + "learning_rate": 1.788450766028338e-07, + "loss": 1.244, + "step": 156597 + }, + { + "epoch": 1.88, + "grad_norm": 18.775904894094687, + "learning_rate": 1.7873510462814936e-07, + "loss": 1.478, + "step": 156600 + }, + { + "epoch": 1.88, + "grad_norm": 10.11962491481721, + "learning_rate": 1.7862516616990456e-07, + "loss": 1.1645, + "step": 156603 + }, + { + "epoch": 1.88, + "grad_norm": 8.693359425202017, + "learning_rate": 1.7851526122847462e-07, + "loss": 1.1953, + "step": 156606 + }, + { + "epoch": 1.88, + "grad_norm": 3.0185699081144275, + "learning_rate": 1.7840538980423372e-07, + "loss": 1.3401, + "step": 156609 + }, + { + "epoch": 1.88, + "grad_norm": 13.839707638002734, + "learning_rate": 1.782955518975582e-07, + "loss": 1.2694, + "step": 156612 + }, + { + "epoch": 1.88, + "grad_norm": 10.863039447616995, + "learning_rate": 1.7818574750882112e-07, + "loss": 1.2195, + "step": 156615 + }, + { + "epoch": 1.88, + "grad_norm": 9.545433705719983, + "learning_rate": 1.780759766383988e-07, + "loss": 0.9387, + "step": 156618 + }, + { + "epoch": 1.88, + "grad_norm": 10.822190354120151, + "learning_rate": 1.7796623928666656e-07, + "loss": 0.8559, + "step": 156621 + }, + { + "epoch": 1.88, + "grad_norm": 11.204177079973075, + "learning_rate": 1.7785653545399628e-07, + "loss": 1.1547, + "step": 156624 + }, + { + "epoch": 1.88, + "grad_norm": 7.471522863911473, + "learning_rate": 1.7774686514076433e-07, + "loss": 0.9811, + "step": 156627 + }, + { + "epoch": 1.88, + "grad_norm": 12.598210367662988, + "learning_rate": 1.7763722834734375e-07, + "loss": 1.1621, + "step": 156630 + }, + { + "epoch": 1.88, + "grad_norm": 14.094089590300953, + "learning_rate": 1.7752762507410981e-07, + "loss": 1.3058, + "step": 156633 + }, + { + "epoch": 1.88, + "grad_norm": 4.753891701319104, + "learning_rate": 1.7741805532143664e-07, + "loss": 0.9473, + "step": 156636 + }, + { + "epoch": 1.88, + "grad_norm": 5.269631866454131, + "learning_rate": 1.7730851908969615e-07, + "loss": 0.9644, + "step": 156639 + }, + { + "epoch": 1.88, + "grad_norm": 25.479022594197087, + "learning_rate": 1.771990163792636e-07, + "loss": 1.1676, + "step": 156642 + }, + { + "epoch": 1.88, + "grad_norm": 5.3964762173278125, + "learning_rate": 1.770895471905143e-07, + "loss": 1.2548, + "step": 156645 + }, + { + "epoch": 1.88, + "grad_norm": 13.02777800859745, + "learning_rate": 1.769801115238201e-07, + "loss": 1.3749, + "step": 156648 + }, + { + "epoch": 1.88, + "grad_norm": 8.020436613006616, + "learning_rate": 1.7687070937955297e-07, + "loss": 1.032, + "step": 156651 + }, + { + "epoch": 1.88, + "grad_norm": 12.34208102246137, + "learning_rate": 1.7676134075808815e-07, + "loss": 1.061, + "step": 156654 + }, + { + "epoch": 1.88, + "grad_norm": 4.685083626682091, + "learning_rate": 1.7665200565979867e-07, + "loss": 1.3018, + "step": 156657 + }, + { + "epoch": 1.88, + "grad_norm": 4.688375040944994, + "learning_rate": 1.7654270408505868e-07, + "loss": 1.38, + "step": 156660 + }, + { + "epoch": 1.88, + "grad_norm": 11.435490385273841, + "learning_rate": 1.7643343603423903e-07, + "loss": 1.2928, + "step": 156663 + }, + { + "epoch": 1.88, + "grad_norm": 11.753749782394879, + "learning_rate": 1.7632420150771269e-07, + "loss": 1.337, + "step": 156666 + }, + { + "epoch": 1.88, + "grad_norm": 5.75118673319674, + "learning_rate": 1.7621500050585604e-07, + "loss": 1.1121, + "step": 156669 + }, + { + "epoch": 1.88, + "grad_norm": 3.187189887433902, + "learning_rate": 1.761058330290366e-07, + "loss": 0.8151, + "step": 156672 + }, + { + "epoch": 1.88, + "grad_norm": 21.420721362380444, + "learning_rate": 1.7599669907763073e-07, + "loss": 1.1847, + "step": 156675 + }, + { + "epoch": 1.88, + "grad_norm": 7.176065557222463, + "learning_rate": 1.7588759865200921e-07, + "loss": 1.1189, + "step": 156678 + }, + { + "epoch": 1.88, + "grad_norm": 15.222693235770366, + "learning_rate": 1.75778531752544e-07, + "loss": 1.1684, + "step": 156681 + }, + { + "epoch": 1.88, + "grad_norm": 6.953217629381369, + "learning_rate": 1.7566949837960813e-07, + "loss": 0.9912, + "step": 156684 + }, + { + "epoch": 1.88, + "grad_norm": 9.017827983082125, + "learning_rate": 1.7556049853357459e-07, + "loss": 1.2002, + "step": 156687 + }, + { + "epoch": 1.88, + "grad_norm": 3.622564750794787, + "learning_rate": 1.7545153221481425e-07, + "loss": 0.6666, + "step": 156690 + }, + { + "epoch": 1.88, + "grad_norm": 2.435175977517067, + "learning_rate": 1.7534259942369902e-07, + "loss": 0.7558, + "step": 156693 + }, + { + "epoch": 1.88, + "grad_norm": 10.509311800170948, + "learning_rate": 1.752337001605997e-07, + "loss": 1.3708, + "step": 156696 + }, + { + "epoch": 1.88, + "grad_norm": 4.802874497830299, + "learning_rate": 1.7512483442588934e-07, + "loss": 1.0202, + "step": 156699 + }, + { + "epoch": 1.88, + "grad_norm": 10.387799484880741, + "learning_rate": 1.7501600221993874e-07, + "loss": 1.1331, + "step": 156702 + }, + { + "epoch": 1.88, + "grad_norm": 5.6945769793588115, + "learning_rate": 1.7490720354311984e-07, + "loss": 1.0331, + "step": 156705 + }, + { + "epoch": 1.88, + "grad_norm": 4.70488071432949, + "learning_rate": 1.7479843839580346e-07, + "loss": 1.2723, + "step": 156708 + }, + { + "epoch": 1.88, + "grad_norm": 8.60201130996598, + "learning_rate": 1.7468970677836261e-07, + "loss": 0.8684, + "step": 156711 + }, + { + "epoch": 1.88, + "grad_norm": 24.101728159795623, + "learning_rate": 1.745810086911659e-07, + "loss": 0.8837, + "step": 156714 + }, + { + "epoch": 1.88, + "grad_norm": 7.122488132717556, + "learning_rate": 1.7447234413458414e-07, + "loss": 1.107, + "step": 156717 + }, + { + "epoch": 1.88, + "grad_norm": 4.622024549920502, + "learning_rate": 1.7436371310898924e-07, + "loss": 0.9432, + "step": 156720 + }, + { + "epoch": 1.88, + "grad_norm": 27.005187748760587, + "learning_rate": 1.7425511561475207e-07, + "loss": 1.1239, + "step": 156723 + }, + { + "epoch": 1.88, + "grad_norm": 10.817326916542944, + "learning_rate": 1.7414655165224225e-07, + "loss": 1.2397, + "step": 156726 + }, + { + "epoch": 1.88, + "grad_norm": 10.735274507840058, + "learning_rate": 1.7403802122183288e-07, + "loss": 1.0956, + "step": 156729 + }, + { + "epoch": 1.88, + "grad_norm": 28.50978098298293, + "learning_rate": 1.7392952432389144e-07, + "loss": 0.8455, + "step": 156732 + }, + { + "epoch": 1.88, + "grad_norm": 7.385992593441824, + "learning_rate": 1.738210609587887e-07, + "loss": 1.1884, + "step": 156735 + }, + { + "epoch": 1.88, + "grad_norm": 6.607434820063665, + "learning_rate": 1.7371263112689551e-07, + "loss": 0.7092, + "step": 156738 + }, + { + "epoch": 1.88, + "grad_norm": 10.033070213863429, + "learning_rate": 1.736042348285827e-07, + "loss": 1.3745, + "step": 156741 + }, + { + "epoch": 1.88, + "grad_norm": 4.6996980306719935, + "learning_rate": 1.734958720642177e-07, + "loss": 1.1192, + "step": 156744 + }, + { + "epoch": 1.88, + "grad_norm": 13.759536872450612, + "learning_rate": 1.7338754283417247e-07, + "loss": 1.3842, + "step": 156747 + }, + { + "epoch": 1.88, + "grad_norm": 7.562855175223886, + "learning_rate": 1.7327924713881562e-07, + "loss": 1.119, + "step": 156750 + }, + { + "epoch": 1.88, + "grad_norm": 16.977296112310622, + "learning_rate": 1.7317098497851793e-07, + "loss": 1.3079, + "step": 156753 + }, + { + "epoch": 1.88, + "grad_norm": 8.73390934705394, + "learning_rate": 1.730627563536491e-07, + "loss": 1.1311, + "step": 156756 + }, + { + "epoch": 1.88, + "grad_norm": 11.37750583812605, + "learning_rate": 1.7295456126457556e-07, + "loss": 1.0399, + "step": 156759 + }, + { + "epoch": 1.89, + "grad_norm": 8.69244704446874, + "learning_rate": 1.7284639971166806e-07, + "loss": 1.2651, + "step": 156762 + }, + { + "epoch": 1.89, + "grad_norm": 3.470992610472553, + "learning_rate": 1.7273827169529745e-07, + "loss": 1.0841, + "step": 156765 + }, + { + "epoch": 1.89, + "grad_norm": 8.246397594203343, + "learning_rate": 1.726301772158312e-07, + "loss": 0.9327, + "step": 156768 + }, + { + "epoch": 1.89, + "grad_norm": 24.452480377234103, + "learning_rate": 1.7252211627363678e-07, + "loss": 1.2069, + "step": 156771 + }, + { + "epoch": 1.89, + "grad_norm": 11.991379228269142, + "learning_rate": 1.7241408886908728e-07, + "loss": 0.8947, + "step": 156774 + }, + { + "epoch": 1.89, + "grad_norm": 5.620838377719791, + "learning_rate": 1.723060950025468e-07, + "loss": 1.0418, + "step": 156777 + }, + { + "epoch": 1.89, + "grad_norm": 12.898851411870464, + "learning_rate": 1.7219813467438505e-07, + "loss": 1.0282, + "step": 156780 + }, + { + "epoch": 1.89, + "grad_norm": 6.515754714899293, + "learning_rate": 1.720902078849729e-07, + "loss": 1.3118, + "step": 156783 + }, + { + "epoch": 1.89, + "grad_norm": 13.945510236680787, + "learning_rate": 1.7198231463467553e-07, + "loss": 1.0445, + "step": 156786 + }, + { + "epoch": 1.89, + "grad_norm": 4.647323790636441, + "learning_rate": 1.7187445492386267e-07, + "loss": 1.2955, + "step": 156789 + }, + { + "epoch": 1.89, + "grad_norm": 31.2592083332379, + "learning_rate": 1.7176662875290184e-07, + "loss": 1.2492, + "step": 156792 + }, + { + "epoch": 1.89, + "grad_norm": 2.7093951369842664, + "learning_rate": 1.7165883612216272e-07, + "loss": 0.7822, + "step": 156795 + }, + { + "epoch": 1.89, + "grad_norm": 12.072397592831715, + "learning_rate": 1.715510770320117e-07, + "loss": 1.5301, + "step": 156798 + }, + { + "epoch": 1.89, + "grad_norm": 11.12914309253561, + "learning_rate": 1.714433514828162e-07, + "loss": 1.2944, + "step": 156801 + }, + { + "epoch": 1.89, + "grad_norm": 15.872778056539673, + "learning_rate": 1.713356594749438e-07, + "loss": 0.9482, + "step": 156804 + }, + { + "epoch": 1.89, + "grad_norm": 15.532188352557435, + "learning_rate": 1.71228001008763e-07, + "loss": 1.1191, + "step": 156807 + }, + { + "epoch": 1.89, + "grad_norm": 8.898296197279578, + "learning_rate": 1.7112037608464138e-07, + "loss": 1.0365, + "step": 156810 + }, + { + "epoch": 1.89, + "grad_norm": 10.119523968078036, + "learning_rate": 1.7101278470294413e-07, + "loss": 1.1656, + "step": 156813 + }, + { + "epoch": 1.89, + "grad_norm": 3.5514536973659303, + "learning_rate": 1.7090522686403987e-07, + "loss": 1.1659, + "step": 156816 + }, + { + "epoch": 1.89, + "grad_norm": 2.5949493008650375, + "learning_rate": 1.707977025682972e-07, + "loss": 1.3323, + "step": 156819 + }, + { + "epoch": 1.89, + "grad_norm": 5.0742841739749975, + "learning_rate": 1.7069021181608136e-07, + "loss": 1.0846, + "step": 156822 + }, + { + "epoch": 1.89, + "grad_norm": 6.9124743650040195, + "learning_rate": 1.7058275460775876e-07, + "loss": 1.3975, + "step": 156825 + }, + { + "epoch": 1.89, + "grad_norm": 3.13967432996453, + "learning_rate": 1.7047533094369571e-07, + "loss": 1.1046, + "step": 156828 + }, + { + "epoch": 1.89, + "grad_norm": 3.425894103975308, + "learning_rate": 1.703679408242609e-07, + "loss": 1.2242, + "step": 156831 + }, + { + "epoch": 1.89, + "grad_norm": 11.644431504151443, + "learning_rate": 1.702605842498195e-07, + "loss": 1.2038, + "step": 156834 + }, + { + "epoch": 1.89, + "grad_norm": 5.8696668834157, + "learning_rate": 1.7015326122073795e-07, + "loss": 0.8291, + "step": 156837 + }, + { + "epoch": 1.89, + "grad_norm": 14.359606635391167, + "learning_rate": 1.700459717373837e-07, + "loss": 1.2009, + "step": 156840 + }, + { + "epoch": 1.89, + "grad_norm": 6.868477100973976, + "learning_rate": 1.6993871580011978e-07, + "loss": 1.4211, + "step": 156843 + }, + { + "epoch": 1.89, + "grad_norm": 6.9340897153704715, + "learning_rate": 1.6983149340931482e-07, + "loss": 1.0065, + "step": 156846 + }, + { + "epoch": 1.89, + "grad_norm": 4.787349370800129, + "learning_rate": 1.6972430456533518e-07, + "loss": 1.1813, + "step": 156849 + }, + { + "epoch": 1.89, + "grad_norm": 11.7547265097365, + "learning_rate": 1.69617149268545e-07, + "loss": 1.6305, + "step": 156852 + }, + { + "epoch": 1.89, + "grad_norm": 4.489644935491111, + "learning_rate": 1.6951002751931066e-07, + "loss": 1.1172, + "step": 156855 + }, + { + "epoch": 1.89, + "grad_norm": 55.56450507016144, + "learning_rate": 1.6940293931799744e-07, + "loss": 1.0033, + "step": 156858 + }, + { + "epoch": 1.89, + "grad_norm": 8.23708956575349, + "learning_rate": 1.692958846649717e-07, + "loss": 1.0278, + "step": 156861 + }, + { + "epoch": 1.89, + "grad_norm": 5.694710038656128, + "learning_rate": 1.6918886356059872e-07, + "loss": 1.3771, + "step": 156864 + }, + { + "epoch": 1.89, + "grad_norm": 10.244358502898846, + "learning_rate": 1.690818760052415e-07, + "loss": 1.4684, + "step": 156867 + }, + { + "epoch": 1.89, + "grad_norm": 14.25602706034607, + "learning_rate": 1.6897492199926647e-07, + "loss": 1.2585, + "step": 156870 + }, + { + "epoch": 1.89, + "grad_norm": 21.926217426336944, + "learning_rate": 1.6886800154303995e-07, + "loss": 1.1579, + "step": 156873 + }, + { + "epoch": 1.89, + "grad_norm": 8.162573640662897, + "learning_rate": 1.6876111463692613e-07, + "loss": 1.1239, + "step": 156876 + }, + { + "epoch": 1.89, + "grad_norm": 6.235859577881862, + "learning_rate": 1.6865426128128915e-07, + "loss": 0.9558, + "step": 156879 + }, + { + "epoch": 1.89, + "grad_norm": 14.202609007255857, + "learning_rate": 1.6854744147649316e-07, + "loss": 1.1798, + "step": 156882 + }, + { + "epoch": 1.89, + "grad_norm": 15.138684868202352, + "learning_rate": 1.6844065522290454e-07, + "loss": 1.1372, + "step": 156885 + }, + { + "epoch": 1.89, + "grad_norm": 5.189353748140563, + "learning_rate": 1.683339025208852e-07, + "loss": 0.9648, + "step": 156888 + }, + { + "epoch": 1.89, + "grad_norm": 13.48363135385231, + "learning_rate": 1.682271833708027e-07, + "loss": 1.1, + "step": 156891 + }, + { + "epoch": 1.89, + "grad_norm": 2.9385818459703574, + "learning_rate": 1.6812049777301775e-07, + "loss": 1.0019, + "step": 156894 + }, + { + "epoch": 1.89, + "grad_norm": 7.337392712675141, + "learning_rate": 1.6801384572789682e-07, + "loss": 1.0264, + "step": 156897 + }, + { + "epoch": 1.89, + "grad_norm": 4.80311677772381, + "learning_rate": 1.6790722723580288e-07, + "loss": 0.9158, + "step": 156900 + }, + { + "epoch": 1.89, + "grad_norm": 10.532695093941804, + "learning_rate": 1.6780064229710123e-07, + "loss": 1.0585, + "step": 156903 + }, + { + "epoch": 1.89, + "grad_norm": 12.504848505463139, + "learning_rate": 1.6769409091215493e-07, + "loss": 1.5249, + "step": 156906 + }, + { + "epoch": 1.89, + "grad_norm": 7.443213329478901, + "learning_rate": 1.675875730813248e-07, + "loss": 1.4314, + "step": 156909 + }, + { + "epoch": 1.89, + "grad_norm": 8.425582065589294, + "learning_rate": 1.6748108880497826e-07, + "loss": 1.0716, + "step": 156912 + }, + { + "epoch": 1.89, + "grad_norm": 3.302919088303956, + "learning_rate": 1.673746380834762e-07, + "loss": 1.0452, + "step": 156915 + }, + { + "epoch": 1.89, + "grad_norm": 10.390628815984298, + "learning_rate": 1.6726822091718275e-07, + "loss": 0.8012, + "step": 156918 + }, + { + "epoch": 1.89, + "grad_norm": 17.93492592675033, + "learning_rate": 1.6716183730646207e-07, + "loss": 0.9496, + "step": 156921 + }, + { + "epoch": 1.89, + "grad_norm": 5.938871866924537, + "learning_rate": 1.670554872516761e-07, + "loss": 1.1679, + "step": 156924 + }, + { + "epoch": 1.89, + "grad_norm": 8.786194625173504, + "learning_rate": 1.6694917075318673e-07, + "loss": 0.8916, + "step": 156927 + }, + { + "epoch": 1.89, + "grad_norm": 5.271414707965248, + "learning_rate": 1.668428878113604e-07, + "loss": 1.4573, + "step": 156930 + }, + { + "epoch": 1.89, + "grad_norm": 3.911431543253527, + "learning_rate": 1.6673663842655452e-07, + "loss": 1.4008, + "step": 156933 + }, + { + "epoch": 1.89, + "grad_norm": 11.15590232222701, + "learning_rate": 1.6663042259913663e-07, + "loss": 1.4479, + "step": 156936 + }, + { + "epoch": 1.89, + "grad_norm": 2.8887095835041516, + "learning_rate": 1.6652424032946534e-07, + "loss": 0.9166, + "step": 156939 + }, + { + "epoch": 1.89, + "grad_norm": 18.82602616212718, + "learning_rate": 1.664180916179059e-07, + "loss": 1.0909, + "step": 156942 + }, + { + "epoch": 1.89, + "grad_norm": 5.440213790279339, + "learning_rate": 1.6631197646482022e-07, + "loss": 1.103, + "step": 156945 + }, + { + "epoch": 1.89, + "grad_norm": 5.561057141095911, + "learning_rate": 1.6620589487056915e-07, + "loss": 1.2617, + "step": 156948 + }, + { + "epoch": 1.89, + "grad_norm": 3.6770147493857515, + "learning_rate": 1.660998468355146e-07, + "loss": 1.111, + "step": 156951 + }, + { + "epoch": 1.89, + "grad_norm": 17.145478569695168, + "learning_rate": 1.6599383236001965e-07, + "loss": 1.1738, + "step": 156954 + }, + { + "epoch": 1.89, + "grad_norm": 5.180546269088373, + "learning_rate": 1.6588785144444618e-07, + "loss": 1.0209, + "step": 156957 + }, + { + "epoch": 1.89, + "grad_norm": 30.973501835931845, + "learning_rate": 1.6578190408915397e-07, + "loss": 1.4131, + "step": 156960 + }, + { + "epoch": 1.89, + "grad_norm": 9.76351808170856, + "learning_rate": 1.65675990294506e-07, + "loss": 1.0304, + "step": 156963 + }, + { + "epoch": 1.89, + "grad_norm": 6.24178867672744, + "learning_rate": 1.655701100608642e-07, + "loss": 1.017, + "step": 156966 + }, + { + "epoch": 1.89, + "grad_norm": 8.03855859779246, + "learning_rate": 1.6546426338858945e-07, + "loss": 0.7768, + "step": 156969 + }, + { + "epoch": 1.89, + "grad_norm": 4.611279918486334, + "learning_rate": 1.6535845027804254e-07, + "loss": 0.9597, + "step": 156972 + }, + { + "epoch": 1.89, + "grad_norm": 42.81836552039451, + "learning_rate": 1.6525267072958428e-07, + "loss": 1.1479, + "step": 156975 + }, + { + "epoch": 1.89, + "grad_norm": 4.237375431794297, + "learning_rate": 1.6514692474357664e-07, + "loss": 0.856, + "step": 156978 + }, + { + "epoch": 1.89, + "grad_norm": 8.039053395640572, + "learning_rate": 1.6504121232037928e-07, + "loss": 1.1062, + "step": 156981 + }, + { + "epoch": 1.89, + "grad_norm": 7.424418308588644, + "learning_rate": 1.649355334603553e-07, + "loss": 0.9698, + "step": 156984 + }, + { + "epoch": 1.89, + "grad_norm": 4.1772572774768, + "learning_rate": 1.6482988816386214e-07, + "loss": 0.9114, + "step": 156987 + }, + { + "epoch": 1.89, + "grad_norm": 8.609260506371768, + "learning_rate": 1.6472427643126397e-07, + "loss": 1.3699, + "step": 156990 + }, + { + "epoch": 1.89, + "grad_norm": 4.444538919555159, + "learning_rate": 1.6461869826291722e-07, + "loss": 1.2614, + "step": 156993 + }, + { + "epoch": 1.89, + "grad_norm": 11.00457980753397, + "learning_rate": 1.6451315365918485e-07, + "loss": 1.3923, + "step": 156996 + }, + { + "epoch": 1.89, + "grad_norm": 6.4116063957522895, + "learning_rate": 1.6440764262042664e-07, + "loss": 1.4275, + "step": 156999 + }, + { + "epoch": 1.89, + "grad_norm": 1.9901484491784496, + "learning_rate": 1.6430216514700225e-07, + "loss": 1.1899, + "step": 157002 + }, + { + "epoch": 1.89, + "grad_norm": 14.62101456243912, + "learning_rate": 1.6419672123927145e-07, + "loss": 0.9661, + "step": 157005 + }, + { + "epoch": 1.89, + "grad_norm": 8.085830503793428, + "learning_rate": 1.6409131089759501e-07, + "loss": 1.1821, + "step": 157008 + }, + { + "epoch": 1.89, + "grad_norm": 6.9496662408044285, + "learning_rate": 1.6398593412233154e-07, + "loss": 1.3323, + "step": 157011 + }, + { + "epoch": 1.89, + "grad_norm": 11.830431052259076, + "learning_rate": 1.638805909138419e-07, + "loss": 1.5039, + "step": 157014 + }, + { + "epoch": 1.89, + "grad_norm": 13.198880112186204, + "learning_rate": 1.6377528127248355e-07, + "loss": 1.1703, + "step": 157017 + }, + { + "epoch": 1.89, + "grad_norm": 6.713642226041019, + "learning_rate": 1.6367000519861732e-07, + "loss": 1.3311, + "step": 157020 + }, + { + "epoch": 1.89, + "grad_norm": 13.439622275490695, + "learning_rate": 1.635647626926029e-07, + "loss": 1.0403, + "step": 157023 + }, + { + "epoch": 1.89, + "grad_norm": 9.856742130773059, + "learning_rate": 1.6345955375479895e-07, + "loss": 1.61, + "step": 157026 + }, + { + "epoch": 1.89, + "grad_norm": 10.63227519811547, + "learning_rate": 1.6335437838556401e-07, + "loss": 1.2235, + "step": 157029 + }, + { + "epoch": 1.89, + "grad_norm": 6.557645986540946, + "learning_rate": 1.6324923658525894e-07, + "loss": 1.3165, + "step": 157032 + }, + { + "epoch": 1.89, + "grad_norm": 9.084197188296514, + "learning_rate": 1.6314412835423898e-07, + "loss": 1.3302, + "step": 157035 + }, + { + "epoch": 1.89, + "grad_norm": 1.9995159915791303, + "learning_rate": 1.630390536928661e-07, + "loss": 1.499, + "step": 157038 + }, + { + "epoch": 1.89, + "grad_norm": 15.804979952942318, + "learning_rate": 1.6293401260149778e-07, + "loss": 1.4799, + "step": 157041 + }, + { + "epoch": 1.89, + "grad_norm": 9.249447434324832, + "learning_rate": 1.6282900508049148e-07, + "loss": 1.1819, + "step": 157044 + }, + { + "epoch": 1.89, + "grad_norm": 8.127178096302302, + "learning_rate": 1.6272403113020697e-07, + "loss": 0.9027, + "step": 157047 + }, + { + "epoch": 1.89, + "grad_norm": 6.588016193003239, + "learning_rate": 1.626190907510017e-07, + "loss": 1.1396, + "step": 157050 + }, + { + "epoch": 1.89, + "grad_norm": 15.693965593564043, + "learning_rate": 1.6251418394323426e-07, + "loss": 1.8083, + "step": 157053 + }, + { + "epoch": 1.89, + "grad_norm": 10.515928310735372, + "learning_rate": 1.624093107072633e-07, + "loss": 0.8459, + "step": 157056 + }, + { + "epoch": 1.89, + "grad_norm": 7.30091161795151, + "learning_rate": 1.6230447104344404e-07, + "loss": 0.9255, + "step": 157059 + }, + { + "epoch": 1.89, + "grad_norm": 2.4788969040362736, + "learning_rate": 1.6219966495213736e-07, + "loss": 1.1391, + "step": 157062 + }, + { + "epoch": 1.89, + "grad_norm": 7.820781410841539, + "learning_rate": 1.6209489243369959e-07, + "loss": 1.2112, + "step": 157065 + }, + { + "epoch": 1.89, + "grad_norm": 4.748909492764112, + "learning_rate": 1.6199015348848822e-07, + "loss": 1.2309, + "step": 157068 + }, + { + "epoch": 1.89, + "grad_norm": 6.019543923068348, + "learning_rate": 1.618854481168608e-07, + "loss": 1.0787, + "step": 157071 + }, + { + "epoch": 1.89, + "grad_norm": 6.220398240802697, + "learning_rate": 1.6178077631917477e-07, + "loss": 1.1725, + "step": 157074 + }, + { + "epoch": 1.89, + "grad_norm": 4.622852561364742, + "learning_rate": 1.6167613809578653e-07, + "loss": 1.1362, + "step": 157077 + }, + { + "epoch": 1.89, + "grad_norm": 2.092140740423799, + "learning_rate": 1.615715334470558e-07, + "loss": 1.1431, + "step": 157080 + }, + { + "epoch": 1.89, + "grad_norm": 5.140858091132564, + "learning_rate": 1.614669623733356e-07, + "loss": 1.4318, + "step": 157083 + }, + { + "epoch": 1.89, + "grad_norm": 8.38046773361321, + "learning_rate": 1.6136242487498566e-07, + "loss": 1.405, + "step": 157086 + }, + { + "epoch": 1.89, + "grad_norm": 5.762499423741147, + "learning_rate": 1.6125792095236126e-07, + "loss": 0.9753, + "step": 157089 + }, + { + "epoch": 1.89, + "grad_norm": 2.1619909818400753, + "learning_rate": 1.61153450605821e-07, + "loss": 1.1301, + "step": 157092 + }, + { + "epoch": 1.89, + "grad_norm": 4.785554738966359, + "learning_rate": 1.6104901383571902e-07, + "loss": 0.8598, + "step": 157095 + }, + { + "epoch": 1.89, + "grad_norm": 2.5391596860208296, + "learning_rate": 1.6094461064241285e-07, + "loss": 1.3476, + "step": 157098 + }, + { + "epoch": 1.89, + "grad_norm": 2.969122260676909, + "learning_rate": 1.6084024102625885e-07, + "loss": 1.247, + "step": 157101 + }, + { + "epoch": 1.89, + "grad_norm": 10.185269082257213, + "learning_rate": 1.6073590498761338e-07, + "loss": 0.9751, + "step": 157104 + }, + { + "epoch": 1.89, + "grad_norm": 12.092911715010185, + "learning_rate": 1.6063160252683175e-07, + "loss": 0.9784, + "step": 157107 + }, + { + "epoch": 1.89, + "grad_norm": 13.704231662381606, + "learning_rate": 1.6052733364427032e-07, + "loss": 1.4159, + "step": 157110 + }, + { + "epoch": 1.89, + "grad_norm": 6.402150438763626, + "learning_rate": 1.6042309834028436e-07, + "loss": 1.4894, + "step": 157113 + }, + { + "epoch": 1.89, + "grad_norm": 5.442971700764048, + "learning_rate": 1.6031889661523247e-07, + "loss": 0.8464, + "step": 157116 + }, + { + "epoch": 1.89, + "grad_norm": 10.727344007558214, + "learning_rate": 1.602147284694666e-07, + "loss": 1.0598, + "step": 157119 + }, + { + "epoch": 1.89, + "grad_norm": 7.794042880116482, + "learning_rate": 1.6011059390334317e-07, + "loss": 1.1057, + "step": 157122 + }, + { + "epoch": 1.89, + "grad_norm": 9.755239506810584, + "learning_rate": 1.6000649291721738e-07, + "loss": 0.8079, + "step": 157125 + }, + { + "epoch": 1.89, + "grad_norm": 4.8422896947570075, + "learning_rate": 1.5990242551144564e-07, + "loss": 1.053, + "step": 157128 + }, + { + "epoch": 1.89, + "grad_norm": 8.574588926495618, + "learning_rate": 1.5979839168638322e-07, + "loss": 1.2838, + "step": 157131 + }, + { + "epoch": 1.89, + "grad_norm": 40.100655371520034, + "learning_rate": 1.596943914423843e-07, + "loss": 0.9255, + "step": 157134 + }, + { + "epoch": 1.89, + "grad_norm": 13.304180703606333, + "learning_rate": 1.5959042477980303e-07, + "loss": 1.2739, + "step": 157137 + }, + { + "epoch": 1.89, + "grad_norm": 4.848519464426644, + "learning_rate": 1.5948649169899688e-07, + "loss": 1.0259, + "step": 157140 + }, + { + "epoch": 1.89, + "grad_norm": 7.2061115260556825, + "learning_rate": 1.5938259220031672e-07, + "loss": 1.0056, + "step": 157143 + }, + { + "epoch": 1.89, + "grad_norm": 5.502206765131517, + "learning_rate": 1.592787262841211e-07, + "loss": 1.0485, + "step": 157146 + }, + { + "epoch": 1.89, + "grad_norm": 11.615424099892532, + "learning_rate": 1.5917489395076092e-07, + "loss": 1.0276, + "step": 157149 + }, + { + "epoch": 1.89, + "grad_norm": 6.8315073908037744, + "learning_rate": 1.590710952005936e-07, + "loss": 0.6934, + "step": 157152 + }, + { + "epoch": 1.89, + "grad_norm": 12.175813226416823, + "learning_rate": 1.5896733003397113e-07, + "loss": 1.075, + "step": 157155 + }, + { + "epoch": 1.89, + "grad_norm": 22.911713475819617, + "learning_rate": 1.5886359845124877e-07, + "loss": 1.2355, + "step": 157158 + }, + { + "epoch": 1.89, + "grad_norm": 19.667731019232214, + "learning_rate": 1.5875990045278068e-07, + "loss": 1.2791, + "step": 157161 + }, + { + "epoch": 1.89, + "grad_norm": 11.949334269982653, + "learning_rate": 1.5865623603891878e-07, + "loss": 1.1589, + "step": 157164 + }, + { + "epoch": 1.89, + "grad_norm": 7.207110420389606, + "learning_rate": 1.5855260521001946e-07, + "loss": 1.1065, + "step": 157167 + }, + { + "epoch": 1.89, + "grad_norm": 21.764327459801674, + "learning_rate": 1.584490079664347e-07, + "loss": 1.118, + "step": 157170 + }, + { + "epoch": 1.89, + "grad_norm": 2.5769321161498375, + "learning_rate": 1.5834544430851862e-07, + "loss": 1.1292, + "step": 157173 + }, + { + "epoch": 1.89, + "grad_norm": 4.571514863872288, + "learning_rate": 1.5824191423662427e-07, + "loss": 1.0929, + "step": 157176 + }, + { + "epoch": 1.89, + "grad_norm": 3.7464787424425756, + "learning_rate": 1.5813841775110584e-07, + "loss": 0.9569, + "step": 157179 + }, + { + "epoch": 1.89, + "grad_norm": 6.510193762217163, + "learning_rate": 1.5803495485231636e-07, + "loss": 1.1419, + "step": 157182 + }, + { + "epoch": 1.89, + "grad_norm": 6.781885013014299, + "learning_rate": 1.5793152554060775e-07, + "loss": 1.0469, + "step": 157185 + }, + { + "epoch": 1.89, + "grad_norm": 8.078835419482514, + "learning_rate": 1.5782812981633422e-07, + "loss": 0.7336, + "step": 157188 + }, + { + "epoch": 1.89, + "grad_norm": 7.325893095974399, + "learning_rate": 1.5772476767984767e-07, + "loss": 1.2289, + "step": 157191 + }, + { + "epoch": 1.89, + "grad_norm": 5.848074294999959, + "learning_rate": 1.576214391315012e-07, + "loss": 1.1331, + "step": 157194 + }, + { + "epoch": 1.89, + "grad_norm": 3.475731147155091, + "learning_rate": 1.575181441716478e-07, + "loss": 1.33, + "step": 157197 + }, + { + "epoch": 1.89, + "grad_norm": 6.719440603414878, + "learning_rate": 1.5741488280064053e-07, + "loss": 1.2305, + "step": 157200 + }, + { + "epoch": 1.89, + "grad_norm": 5.215710539119194, + "learning_rate": 1.5731165501883028e-07, + "loss": 1.1773, + "step": 157203 + }, + { + "epoch": 1.89, + "grad_norm": 8.926700246604275, + "learning_rate": 1.5720846082657005e-07, + "loss": 1.0912, + "step": 157206 + }, + { + "epoch": 1.89, + "grad_norm": 4.288788631685259, + "learning_rate": 1.5710530022421177e-07, + "loss": 1.3508, + "step": 157209 + }, + { + "epoch": 1.89, + "grad_norm": 8.554195258869704, + "learning_rate": 1.570021732121074e-07, + "loss": 1.2701, + "step": 157212 + }, + { + "epoch": 1.89, + "grad_norm": 33.05196007222618, + "learning_rate": 1.5689907979061113e-07, + "loss": 1.1136, + "step": 157215 + }, + { + "epoch": 1.89, + "grad_norm": 8.232087555106828, + "learning_rate": 1.5679601996007043e-07, + "loss": 0.7456, + "step": 157218 + }, + { + "epoch": 1.89, + "grad_norm": 13.37824142557412, + "learning_rate": 1.5669299372084056e-07, + "loss": 1.0874, + "step": 157221 + }, + { + "epoch": 1.89, + "grad_norm": 4.287535526132827, + "learning_rate": 1.5659000107327238e-07, + "loss": 0.9424, + "step": 157224 + }, + { + "epoch": 1.89, + "grad_norm": 5.811139252463054, + "learning_rate": 1.5648704201771668e-07, + "loss": 1.1267, + "step": 157227 + }, + { + "epoch": 1.89, + "grad_norm": 8.363707082815473, + "learning_rate": 1.5638411655452434e-07, + "loss": 1.1129, + "step": 157230 + }, + { + "epoch": 1.89, + "grad_norm": 8.717594406864222, + "learning_rate": 1.5628122468404728e-07, + "loss": 1.0787, + "step": 157233 + }, + { + "epoch": 1.89, + "grad_norm": 17.032442363489782, + "learning_rate": 1.5617836640663742e-07, + "loss": 1.0511, + "step": 157236 + }, + { + "epoch": 1.89, + "grad_norm": 7.604188616442802, + "learning_rate": 1.560755417226445e-07, + "loss": 1.0518, + "step": 157239 + }, + { + "epoch": 1.89, + "grad_norm": 8.237945799381153, + "learning_rate": 1.5597275063242046e-07, + "loss": 1.1118, + "step": 157242 + }, + { + "epoch": 1.89, + "grad_norm": 3.236084000979692, + "learning_rate": 1.5586999313631612e-07, + "loss": 1.1705, + "step": 157245 + }, + { + "epoch": 1.89, + "grad_norm": 12.895161248010258, + "learning_rate": 1.5576726923468122e-07, + "loss": 1.2056, + "step": 157248 + }, + { + "epoch": 1.89, + "grad_norm": 520.9857382590595, + "learning_rate": 1.5566457892786656e-07, + "loss": 1.0109, + "step": 157251 + }, + { + "epoch": 1.89, + "grad_norm": 12.461821657066782, + "learning_rate": 1.55561922216223e-07, + "loss": 1.0969, + "step": 157254 + }, + { + "epoch": 1.89, + "grad_norm": 4.804190790056466, + "learning_rate": 1.5545929910010028e-07, + "loss": 1.0934, + "step": 157257 + }, + { + "epoch": 1.89, + "grad_norm": 12.571821453887216, + "learning_rate": 1.5535670957984917e-07, + "loss": 1.2057, + "step": 157260 + }, + { + "epoch": 1.89, + "grad_norm": 6.655828171126127, + "learning_rate": 1.552541536558194e-07, + "loss": 0.9445, + "step": 157263 + }, + { + "epoch": 1.89, + "grad_norm": 29.662685124253137, + "learning_rate": 1.5515163132836187e-07, + "loss": 1.4464, + "step": 157266 + }, + { + "epoch": 1.89, + "grad_norm": 10.992234930485946, + "learning_rate": 1.5504914259782512e-07, + "loss": 1.2887, + "step": 157269 + }, + { + "epoch": 1.89, + "grad_norm": 6.688050579547339, + "learning_rate": 1.5494668746455887e-07, + "loss": 1.3161, + "step": 157272 + }, + { + "epoch": 1.89, + "grad_norm": 3.7250512133941904, + "learning_rate": 1.54844265928914e-07, + "loss": 1.203, + "step": 157275 + }, + { + "epoch": 1.89, + "grad_norm": 4.993611723645716, + "learning_rate": 1.5474187799123906e-07, + "loss": 0.958, + "step": 157278 + }, + { + "epoch": 1.89, + "grad_norm": 9.368409876211302, + "learning_rate": 1.5463952365188495e-07, + "loss": 1.2807, + "step": 157281 + }, + { + "epoch": 1.89, + "grad_norm": 9.195106100730191, + "learning_rate": 1.545372029111991e-07, + "loss": 1.0568, + "step": 157284 + }, + { + "epoch": 1.89, + "grad_norm": 7.80422451745301, + "learning_rate": 1.544349157695324e-07, + "loss": 1.2519, + "step": 157287 + }, + { + "epoch": 1.89, + "grad_norm": 7.479452857737679, + "learning_rate": 1.543326622272323e-07, + "loss": 1.3113, + "step": 157290 + }, + { + "epoch": 1.89, + "grad_norm": 13.266841257142609, + "learning_rate": 1.5423044228464745e-07, + "loss": 1.1567, + "step": 157293 + }, + { + "epoch": 1.89, + "grad_norm": 12.356346098668034, + "learning_rate": 1.5412825594212866e-07, + "loss": 1.3286, + "step": 157296 + }, + { + "epoch": 1.89, + "grad_norm": 8.806621919939706, + "learning_rate": 1.5402610320002342e-07, + "loss": 0.7936, + "step": 157299 + }, + { + "epoch": 1.89, + "grad_norm": 51.15998401711077, + "learning_rate": 1.5392398405868037e-07, + "loss": 1.4657, + "step": 157302 + }, + { + "epoch": 1.89, + "grad_norm": 9.158173371680897, + "learning_rate": 1.5382189851844807e-07, + "loss": 1.1033, + "step": 157305 + }, + { + "epoch": 1.89, + "grad_norm": 5.691757597445477, + "learning_rate": 1.5371984657967631e-07, + "loss": 1.2699, + "step": 157308 + }, + { + "epoch": 1.89, + "grad_norm": 7.69121836665116, + "learning_rate": 1.5361782824271253e-07, + "loss": 1.2669, + "step": 157311 + }, + { + "epoch": 1.89, + "grad_norm": 6.741025952847145, + "learning_rate": 1.5351584350790205e-07, + "loss": 1.2324, + "step": 157314 + }, + { + "epoch": 1.89, + "grad_norm": 31.573676351400444, + "learning_rate": 1.5341389237559678e-07, + "loss": 1.4969, + "step": 157317 + }, + { + "epoch": 1.89, + "grad_norm": 11.811369482311981, + "learning_rate": 1.5331197484614203e-07, + "loss": 1.0276, + "step": 157320 + }, + { + "epoch": 1.89, + "grad_norm": 8.26703168450361, + "learning_rate": 1.532100909198886e-07, + "loss": 1.0153, + "step": 157323 + }, + { + "epoch": 1.89, + "grad_norm": 6.096494729836321, + "learning_rate": 1.5310824059718067e-07, + "loss": 1.1143, + "step": 157326 + }, + { + "epoch": 1.89, + "grad_norm": 6.559735541565797, + "learning_rate": 1.5300642387836905e-07, + "loss": 1.3205, + "step": 157329 + }, + { + "epoch": 1.89, + "grad_norm": 16.017501883486965, + "learning_rate": 1.5290464076379795e-07, + "loss": 1.1735, + "step": 157332 + }, + { + "epoch": 1.89, + "grad_norm": 3.668456396580792, + "learning_rate": 1.528028912538182e-07, + "loss": 1.371, + "step": 157335 + }, + { + "epoch": 1.89, + "grad_norm": 6.253601847398482, + "learning_rate": 1.5270117534877393e-07, + "loss": 1.1958, + "step": 157338 + }, + { + "epoch": 1.89, + "grad_norm": 28.01420546397255, + "learning_rate": 1.525994930490138e-07, + "loss": 1.2118, + "step": 157341 + }, + { + "epoch": 1.89, + "grad_norm": 6.614181612571931, + "learning_rate": 1.5249784435488413e-07, + "loss": 1.3306, + "step": 157344 + }, + { + "epoch": 1.89, + "grad_norm": 12.448652059124088, + "learning_rate": 1.5239622926673247e-07, + "loss": 1.5552, + "step": 157347 + }, + { + "epoch": 1.89, + "grad_norm": 7.4282437481680015, + "learning_rate": 1.5229464778490633e-07, + "loss": 1.0029, + "step": 157350 + }, + { + "epoch": 1.89, + "grad_norm": 9.541209728364162, + "learning_rate": 1.5219309990975095e-07, + "loss": 1.2145, + "step": 157353 + }, + { + "epoch": 1.89, + "grad_norm": 11.20332458242087, + "learning_rate": 1.5209158564161387e-07, + "loss": 1.0335, + "step": 157356 + }, + { + "epoch": 1.89, + "grad_norm": 15.762896203203692, + "learning_rate": 1.5199010498083922e-07, + "loss": 1.0462, + "step": 157359 + }, + { + "epoch": 1.89, + "grad_norm": 8.923612126110546, + "learning_rate": 1.5188865792777675e-07, + "loss": 1.3189, + "step": 157362 + }, + { + "epoch": 1.89, + "grad_norm": 13.566567568638963, + "learning_rate": 1.517872444827706e-07, + "loss": 1.2236, + "step": 157365 + }, + { + "epoch": 1.89, + "grad_norm": 7.09779064229559, + "learning_rate": 1.516858646461672e-07, + "loss": 1.2334, + "step": 157368 + }, + { + "epoch": 1.89, + "grad_norm": 7.5690172954078, + "learning_rate": 1.515845184183129e-07, + "loss": 1.0528, + "step": 157371 + }, + { + "epoch": 1.89, + "grad_norm": 2.0890348952629947, + "learning_rate": 1.51483205799553e-07, + "loss": 1.2098, + "step": 157374 + }, + { + "epoch": 1.89, + "grad_norm": 2.9639191650456853, + "learning_rate": 1.513819267902339e-07, + "loss": 1.1437, + "step": 157377 + }, + { + "epoch": 1.89, + "grad_norm": 9.463596245821034, + "learning_rate": 1.5128068139069974e-07, + "loss": 1.124, + "step": 157380 + }, + { + "epoch": 1.89, + "grad_norm": 4.952848702800997, + "learning_rate": 1.5117946960129692e-07, + "loss": 1.1331, + "step": 157383 + }, + { + "epoch": 1.89, + "grad_norm": 5.712656250819749, + "learning_rate": 1.5107829142237184e-07, + "loss": 1.2312, + "step": 157386 + }, + { + "epoch": 1.89, + "grad_norm": 15.544409345618968, + "learning_rate": 1.5097714685426977e-07, + "loss": 1.1412, + "step": 157389 + }, + { + "epoch": 1.89, + "grad_norm": 9.253053483449257, + "learning_rate": 1.508760358973338e-07, + "loss": 0.6951, + "step": 157392 + }, + { + "epoch": 1.89, + "grad_norm": 10.823120187203758, + "learning_rate": 1.5077495855191137e-07, + "loss": 1.223, + "step": 157395 + }, + { + "epoch": 1.89, + "grad_norm": 9.285994174813395, + "learning_rate": 1.5067391481834558e-07, + "loss": 1.1528, + "step": 157398 + }, + { + "epoch": 1.89, + "grad_norm": 8.549106602343718, + "learning_rate": 1.5057290469698173e-07, + "loss": 1.1358, + "step": 157401 + }, + { + "epoch": 1.89, + "grad_norm": 4.389950108464563, + "learning_rate": 1.5047192818816615e-07, + "loss": 1.1216, + "step": 157404 + }, + { + "epoch": 1.89, + "grad_norm": 11.533867861338532, + "learning_rate": 1.503709852922408e-07, + "loss": 1.2291, + "step": 157407 + }, + { + "epoch": 1.89, + "grad_norm": 10.54945423782734, + "learning_rate": 1.5027007600955212e-07, + "loss": 1.0579, + "step": 157410 + }, + { + "epoch": 1.89, + "grad_norm": 4.62660750333003, + "learning_rate": 1.5016920034044315e-07, + "loss": 1.0824, + "step": 157413 + }, + { + "epoch": 1.89, + "grad_norm": 4.472664245066342, + "learning_rate": 1.5006835828525912e-07, + "loss": 1.0819, + "step": 157416 + }, + { + "epoch": 1.89, + "grad_norm": 2.255198959273073, + "learning_rate": 1.4996754984434424e-07, + "loss": 0.938, + "step": 157419 + }, + { + "epoch": 1.89, + "grad_norm": 4.362147459804284, + "learning_rate": 1.498667750180416e-07, + "loss": 0.9202, + "step": 157422 + }, + { + "epoch": 1.89, + "grad_norm": 24.620218606275884, + "learning_rate": 1.497660338066964e-07, + "loss": 1.0447, + "step": 157425 + }, + { + "epoch": 1.89, + "grad_norm": 10.22250910849042, + "learning_rate": 1.4966532621065066e-07, + "loss": 1.4658, + "step": 157428 + }, + { + "epoch": 1.89, + "grad_norm": 8.638867440376766, + "learning_rate": 1.4956465223024964e-07, + "loss": 0.528, + "step": 157431 + }, + { + "epoch": 1.89, + "grad_norm": 19.50900359703358, + "learning_rate": 1.4946401186583637e-07, + "loss": 1.1737, + "step": 157434 + }, + { + "epoch": 1.89, + "grad_norm": 5.096992651522748, + "learning_rate": 1.4936340511775505e-07, + "loss": 1.039, + "step": 157437 + }, + { + "epoch": 1.89, + "grad_norm": 4.754385901885195, + "learning_rate": 1.492628319863476e-07, + "loss": 1.1278, + "step": 157440 + }, + { + "epoch": 1.89, + "grad_norm": 5.755964729044822, + "learning_rate": 1.4916229247195823e-07, + "loss": 1.1506, + "step": 157443 + }, + { + "epoch": 1.89, + "grad_norm": 7.388904029003356, + "learning_rate": 1.4906178657492886e-07, + "loss": 1.4656, + "step": 157446 + }, + { + "epoch": 1.89, + "grad_norm": 6.5065633739882776, + "learning_rate": 1.4896131429560257e-07, + "loss": 1.1047, + "step": 157449 + }, + { + "epoch": 1.89, + "grad_norm": 4.757465251339356, + "learning_rate": 1.488608756343246e-07, + "loss": 0.9731, + "step": 157452 + }, + { + "epoch": 1.89, + "grad_norm": 5.74618108407974, + "learning_rate": 1.4876047059143472e-07, + "loss": 0.8872, + "step": 157455 + }, + { + "epoch": 1.89, + "grad_norm": 10.104447734913897, + "learning_rate": 1.486600991672782e-07, + "loss": 0.7846, + "step": 157458 + }, + { + "epoch": 1.89, + "grad_norm": 2.9510932353692017, + "learning_rate": 1.4855976136219696e-07, + "loss": 0.9438, + "step": 157461 + }, + { + "epoch": 1.89, + "grad_norm": 10.311377335888126, + "learning_rate": 1.4845945717653187e-07, + "loss": 1.5676, + "step": 157464 + }, + { + "epoch": 1.89, + "grad_norm": 7.220447919033616, + "learning_rate": 1.4835918661062489e-07, + "loss": 1.2275, + "step": 157467 + }, + { + "epoch": 1.89, + "grad_norm": 12.958115482391602, + "learning_rate": 1.4825894966482124e-07, + "loss": 1.3899, + "step": 157470 + }, + { + "epoch": 1.89, + "grad_norm": 11.261036605983728, + "learning_rate": 1.481587463394607e-07, + "loss": 0.8221, + "step": 157473 + }, + { + "epoch": 1.89, + "grad_norm": 6.59079708349671, + "learning_rate": 1.4805857663488522e-07, + "loss": 1.0774, + "step": 157476 + }, + { + "epoch": 1.89, + "grad_norm": 6.608866963326155, + "learning_rate": 1.479584405514378e-07, + "loss": 0.7457, + "step": 157479 + }, + { + "epoch": 1.89, + "grad_norm": 6.230317701362966, + "learning_rate": 1.4785833808945937e-07, + "loss": 1.1976, + "step": 157482 + }, + { + "epoch": 1.89, + "grad_norm": 3.0976095826089494, + "learning_rate": 1.4775826924929182e-07, + "loss": 1.0258, + "step": 157485 + }, + { + "epoch": 1.89, + "grad_norm": 16.568739952752605, + "learning_rate": 1.47658234031276e-07, + "loss": 1.0955, + "step": 157488 + }, + { + "epoch": 1.89, + "grad_norm": 5.644350982409302, + "learning_rate": 1.4755823243575385e-07, + "loss": 1.0711, + "step": 157491 + }, + { + "epoch": 1.89, + "grad_norm": 2.9233584359788938, + "learning_rate": 1.4745826446306733e-07, + "loss": 1.1886, + "step": 157494 + }, + { + "epoch": 1.89, + "grad_norm": 12.450588710649221, + "learning_rate": 1.4735833011355728e-07, + "loss": 1.2417, + "step": 157497 + }, + { + "epoch": 1.89, + "grad_norm": 6.365101620378763, + "learning_rate": 1.472584293875634e-07, + "loss": 0.9953, + "step": 157500 + }, + { + "epoch": 1.89, + "grad_norm": 12.620482357743319, + "learning_rate": 1.4715856228542768e-07, + "loss": 1.2426, + "step": 157503 + }, + { + "epoch": 1.89, + "grad_norm": 8.227123916372413, + "learning_rate": 1.4705872880749095e-07, + "loss": 1.0296, + "step": 157506 + }, + { + "epoch": 1.89, + "grad_norm": 5.331381809025125, + "learning_rate": 1.4695892895409293e-07, + "loss": 1.1527, + "step": 157509 + }, + { + "epoch": 1.89, + "grad_norm": 4.890281921465585, + "learning_rate": 1.4685916272557667e-07, + "loss": 0.9112, + "step": 157512 + }, + { + "epoch": 1.89, + "grad_norm": 2.7529204567218515, + "learning_rate": 1.4675943012227966e-07, + "loss": 1.0012, + "step": 157515 + }, + { + "epoch": 1.89, + "grad_norm": 9.455606741578, + "learning_rate": 1.4665973114454392e-07, + "loss": 1.2082, + "step": 157518 + }, + { + "epoch": 1.89, + "grad_norm": 6.5880639903557086, + "learning_rate": 1.4656006579271022e-07, + "loss": 1.3908, + "step": 157521 + }, + { + "epoch": 1.89, + "grad_norm": 5.863026487528732, + "learning_rate": 1.4646043406711719e-07, + "loss": 1.3488, + "step": 157524 + }, + { + "epoch": 1.89, + "grad_norm": 8.101828592352495, + "learning_rate": 1.4636083596810567e-07, + "loss": 1.2335, + "step": 157527 + }, + { + "epoch": 1.89, + "grad_norm": 11.288905564971845, + "learning_rate": 1.4626127149601433e-07, + "loss": 1.4066, + "step": 157530 + }, + { + "epoch": 1.89, + "grad_norm": 10.434116536966595, + "learning_rate": 1.4616174065118394e-07, + "loss": 1.2579, + "step": 157533 + }, + { + "epoch": 1.89, + "grad_norm": 8.160721095351656, + "learning_rate": 1.4606224343395537e-07, + "loss": 1.463, + "step": 157536 + }, + { + "epoch": 1.89, + "grad_norm": 7.255048493899484, + "learning_rate": 1.4596277984466722e-07, + "loss": 0.7295, + "step": 157539 + }, + { + "epoch": 1.89, + "grad_norm": 12.032947176194797, + "learning_rate": 1.4586334988365702e-07, + "loss": 1.1259, + "step": 157542 + }, + { + "epoch": 1.89, + "grad_norm": 6.06449712804586, + "learning_rate": 1.4576395355126783e-07, + "loss": 1.0633, + "step": 157545 + }, + { + "epoch": 1.89, + "grad_norm": 9.63292932997822, + "learning_rate": 1.456645908478349e-07, + "loss": 1.2059, + "step": 157548 + }, + { + "epoch": 1.89, + "grad_norm": 9.79589643680144, + "learning_rate": 1.4556526177370135e-07, + "loss": 1.1465, + "step": 157551 + }, + { + "epoch": 1.89, + "grad_norm": 6.947422901971048, + "learning_rate": 1.454659663292024e-07, + "loss": 1.3829, + "step": 157554 + }, + { + "epoch": 1.89, + "grad_norm": 8.319861307106171, + "learning_rate": 1.4536670451467894e-07, + "loss": 1.0348, + "step": 157557 + }, + { + "epoch": 1.89, + "grad_norm": 6.477100746284553, + "learning_rate": 1.4526747633046845e-07, + "loss": 1.162, + "step": 157560 + }, + { + "epoch": 1.89, + "grad_norm": 13.827943395777433, + "learning_rate": 1.4516828177691178e-07, + "loss": 1.2185, + "step": 157563 + }, + { + "epoch": 1.89, + "grad_norm": 13.863537291941743, + "learning_rate": 1.4506912085434533e-07, + "loss": 1.1683, + "step": 157566 + }, + { + "epoch": 1.89, + "grad_norm": 11.091119496241772, + "learning_rate": 1.449699935631088e-07, + "loss": 1.3944, + "step": 157569 + }, + { + "epoch": 1.89, + "grad_norm": 4.296658300828039, + "learning_rate": 1.4487089990353865e-07, + "loss": 0.9734, + "step": 157572 + }, + { + "epoch": 1.89, + "grad_norm": 12.330032338798427, + "learning_rate": 1.4477183987597566e-07, + "loss": 1.2479, + "step": 157575 + }, + { + "epoch": 1.89, + "grad_norm": 2.66246629554704, + "learning_rate": 1.4467281348075623e-07, + "loss": 1.01, + "step": 157578 + }, + { + "epoch": 1.89, + "grad_norm": 4.883126132384208, + "learning_rate": 1.4457382071821791e-07, + "loss": 1.229, + "step": 157581 + }, + { + "epoch": 1.89, + "grad_norm": 7.531587812722584, + "learning_rate": 1.4447486158869928e-07, + "loss": 1.3615, + "step": 157584 + }, + { + "epoch": 1.89, + "grad_norm": 12.853283176486984, + "learning_rate": 1.4437593609253896e-07, + "loss": 1.0934, + "step": 157587 + }, + { + "epoch": 1.89, + "grad_norm": 9.628938503907172, + "learning_rate": 1.4427704423007338e-07, + "loss": 1.2583, + "step": 157590 + }, + { + "epoch": 1.9, + "grad_norm": 2.9289040817800767, + "learning_rate": 1.4417818600164e-07, + "loss": 0.8507, + "step": 157593 + }, + { + "epoch": 1.9, + "grad_norm": 2.780847719100988, + "learning_rate": 1.440793614075764e-07, + "loss": 1.0768, + "step": 157596 + }, + { + "epoch": 1.9, + "grad_norm": 5.848115774692517, + "learning_rate": 1.439805704482189e-07, + "loss": 1.1676, + "step": 157599 + }, + { + "epoch": 1.9, + "grad_norm": 5.638613397917343, + "learning_rate": 1.4388181312390615e-07, + "loss": 1.3507, + "step": 157602 + }, + { + "epoch": 1.9, + "grad_norm": 9.059154637408403, + "learning_rate": 1.4378308943497565e-07, + "loss": 1.1076, + "step": 157605 + }, + { + "epoch": 1.9, + "grad_norm": 10.02379176437362, + "learning_rate": 1.436843993817638e-07, + "loss": 1.2509, + "step": 157608 + }, + { + "epoch": 1.9, + "grad_norm": 2.7069748970991965, + "learning_rate": 1.4358574296460482e-07, + "loss": 1.4712, + "step": 157611 + }, + { + "epoch": 1.9, + "grad_norm": 12.207988765171987, + "learning_rate": 1.4348712018383837e-07, + "loss": 1.518, + "step": 157614 + }, + { + "epoch": 1.9, + "grad_norm": 32.47824539912865, + "learning_rate": 1.4338853103979978e-07, + "loss": 1.257, + "step": 157617 + }, + { + "epoch": 1.9, + "grad_norm": 3.903385802701725, + "learning_rate": 1.4328997553282652e-07, + "loss": 1.3682, + "step": 157620 + }, + { + "epoch": 1.9, + "grad_norm": 3.068730208690329, + "learning_rate": 1.4319145366325282e-07, + "loss": 1.2762, + "step": 157623 + }, + { + "epoch": 1.9, + "grad_norm": 5.178296821651667, + "learning_rate": 1.4309296543141725e-07, + "loss": 1.0682, + "step": 157626 + }, + { + "epoch": 1.9, + "grad_norm": 6.054310949322331, + "learning_rate": 1.4299451083765514e-07, + "loss": 1.1078, + "step": 157629 + }, + { + "epoch": 1.9, + "grad_norm": 10.041281118274435, + "learning_rate": 1.4289608988230286e-07, + "loss": 1.15, + "step": 157632 + }, + { + "epoch": 1.9, + "grad_norm": 13.568519437995525, + "learning_rate": 1.427977025656946e-07, + "loss": 1.3876, + "step": 157635 + }, + { + "epoch": 1.9, + "grad_norm": 5.036312791709115, + "learning_rate": 1.426993488881667e-07, + "loss": 1.0032, + "step": 157638 + }, + { + "epoch": 1.9, + "grad_norm": 5.480134299012644, + "learning_rate": 1.4260102885005568e-07, + "loss": 1.1441, + "step": 157641 + }, + { + "epoch": 1.9, + "grad_norm": 8.602725468950444, + "learning_rate": 1.425027424516967e-07, + "loss": 1.0894, + "step": 157644 + }, + { + "epoch": 1.9, + "grad_norm": 3.2908656041143556, + "learning_rate": 1.4240448969342623e-07, + "loss": 0.9299, + "step": 157647 + }, + { + "epoch": 1.9, + "grad_norm": 16.22028308076078, + "learning_rate": 1.4230627057557845e-07, + "loss": 0.9398, + "step": 157650 + }, + { + "epoch": 1.9, + "grad_norm": 12.392081252631744, + "learning_rate": 1.422080850984886e-07, + "loss": 0.6345, + "step": 157653 + }, + { + "epoch": 1.9, + "grad_norm": 6.757688316753001, + "learning_rate": 1.421099332624909e-07, + "loss": 1.0658, + "step": 157656 + }, + { + "epoch": 1.9, + "grad_norm": 8.362141420292778, + "learning_rate": 1.4201181506792172e-07, + "loss": 1.1562, + "step": 157659 + }, + { + "epoch": 1.9, + "grad_norm": 9.65556434521594, + "learning_rate": 1.4191373051511526e-07, + "loss": 1.1242, + "step": 157662 + }, + { + "epoch": 1.9, + "grad_norm": 6.997555470009573, + "learning_rate": 1.418156796044068e-07, + "loss": 0.9763, + "step": 157665 + }, + { + "epoch": 1.9, + "grad_norm": 6.3702418350108525, + "learning_rate": 1.4171766233612937e-07, + "loss": 0.7698, + "step": 157668 + }, + { + "epoch": 1.9, + "grad_norm": 9.972966276395539, + "learning_rate": 1.4161967871062056e-07, + "loss": 1.1303, + "step": 157671 + }, + { + "epoch": 1.9, + "grad_norm": 4.535822371345623, + "learning_rate": 1.4152172872821112e-07, + "loss": 1.1748, + "step": 157674 + }, + { + "epoch": 1.9, + "grad_norm": 9.328508350377726, + "learning_rate": 1.4142381238923752e-07, + "loss": 1.1059, + "step": 157677 + }, + { + "epoch": 1.9, + "grad_norm": 9.771153223502413, + "learning_rate": 1.4132592969403393e-07, + "loss": 1.7379, + "step": 157680 + }, + { + "epoch": 1.9, + "grad_norm": 6.350168517505797, + "learning_rate": 1.4122808064293226e-07, + "loss": 1.3675, + "step": 157683 + }, + { + "epoch": 1.9, + "grad_norm": 18.341520532901832, + "learning_rate": 1.4113026523627005e-07, + "loss": 0.9002, + "step": 157686 + }, + { + "epoch": 1.9, + "grad_norm": 6.978058569023581, + "learning_rate": 1.4103248347437815e-07, + "loss": 0.9699, + "step": 157689 + }, + { + "epoch": 1.9, + "grad_norm": 3.6821951113389506, + "learning_rate": 1.409347353575907e-07, + "loss": 1.0544, + "step": 157692 + }, + { + "epoch": 1.9, + "grad_norm": 8.066396270045, + "learning_rate": 1.4083702088624308e-07, + "loss": 1.0778, + "step": 157695 + }, + { + "epoch": 1.9, + "grad_norm": 8.303211049827018, + "learning_rate": 1.4073934006066602e-07, + "loss": 1.2281, + "step": 157698 + }, + { + "epoch": 1.9, + "grad_norm": 15.56241236520583, + "learning_rate": 1.40641692881196e-07, + "loss": 1.4765, + "step": 157701 + }, + { + "epoch": 1.9, + "grad_norm": 8.013872276915771, + "learning_rate": 1.4054407934816272e-07, + "loss": 1.2088, + "step": 157704 + }, + { + "epoch": 1.9, + "grad_norm": 1.780128871899721, + "learning_rate": 1.4044649946190257e-07, + "loss": 0.9661, + "step": 157707 + }, + { + "epoch": 1.9, + "grad_norm": 5.601049923635489, + "learning_rate": 1.4034895322274534e-07, + "loss": 1.529, + "step": 157710 + }, + { + "epoch": 1.9, + "grad_norm": 8.11847900926097, + "learning_rate": 1.4025144063102735e-07, + "loss": 0.99, + "step": 157713 + }, + { + "epoch": 1.9, + "grad_norm": 10.251119168191067, + "learning_rate": 1.401539616870806e-07, + "loss": 1.3118, + "step": 157716 + }, + { + "epoch": 1.9, + "grad_norm": 9.685028086117335, + "learning_rate": 1.4005651639123484e-07, + "loss": 1.4703, + "step": 157719 + }, + { + "epoch": 1.9, + "grad_norm": 2.1435965833224686, + "learning_rate": 1.3995910474382534e-07, + "loss": 1.5256, + "step": 157722 + }, + { + "epoch": 1.9, + "grad_norm": 5.240207218008527, + "learning_rate": 1.3986172674518295e-07, + "loss": 1.0672, + "step": 157725 + }, + { + "epoch": 1.9, + "grad_norm": 6.2767775915362884, + "learning_rate": 1.3976438239564294e-07, + "loss": 1.0857, + "step": 157728 + }, + { + "epoch": 1.9, + "grad_norm": 5.579163428762912, + "learning_rate": 1.3966707169553395e-07, + "loss": 0.8428, + "step": 157731 + }, + { + "epoch": 1.9, + "grad_norm": 3.065656437241013, + "learning_rate": 1.3956979464518904e-07, + "loss": 1.4783, + "step": 157734 + }, + { + "epoch": 1.9, + "grad_norm": 15.769123653183678, + "learning_rate": 1.394725512449424e-07, + "loss": 1.4324, + "step": 157737 + }, + { + "epoch": 1.9, + "grad_norm": 8.98416209397553, + "learning_rate": 1.3937534149512377e-07, + "loss": 0.9832, + "step": 157740 + }, + { + "epoch": 1.9, + "grad_norm": 2.4989852796614143, + "learning_rate": 1.392781653960651e-07, + "loss": 0.9087, + "step": 157743 + }, + { + "epoch": 1.9, + "grad_norm": 14.75387258406047, + "learning_rate": 1.3918102294809722e-07, + "loss": 1.102, + "step": 157746 + }, + { + "epoch": 1.9, + "grad_norm": 3.6033026921999287, + "learning_rate": 1.3908391415155432e-07, + "loss": 1.1149, + "step": 157749 + }, + { + "epoch": 1.9, + "grad_norm": 3.5835329038659305, + "learning_rate": 1.38986839006765e-07, + "loss": 1.4806, + "step": 157752 + }, + { + "epoch": 1.9, + "grad_norm": 9.698960864568354, + "learning_rate": 1.388897975140624e-07, + "loss": 1.349, + "step": 157755 + }, + { + "epoch": 1.9, + "grad_norm": 10.853004558937632, + "learning_rate": 1.387927896737762e-07, + "loss": 1.2974, + "step": 157758 + }, + { + "epoch": 1.9, + "grad_norm": 5.408647106862227, + "learning_rate": 1.3869581548623834e-07, + "loss": 1.3389, + "step": 157761 + }, + { + "epoch": 1.9, + "grad_norm": 8.45297235553829, + "learning_rate": 1.3859887495177972e-07, + "loss": 1.146, + "step": 157764 + }, + { + "epoch": 1.9, + "grad_norm": 6.196001081867043, + "learning_rate": 1.3850196807073225e-07, + "loss": 1.0524, + "step": 157767 + }, + { + "epoch": 1.9, + "grad_norm": 5.200505234152907, + "learning_rate": 1.3840509484342456e-07, + "loss": 1.193, + "step": 157770 + }, + { + "epoch": 1.9, + "grad_norm": 15.140370014368433, + "learning_rate": 1.3830825527018754e-07, + "loss": 1.2665, + "step": 157773 + }, + { + "epoch": 1.9, + "grad_norm": 4.318853939921945, + "learning_rate": 1.38211449351352e-07, + "loss": 1.301, + "step": 157776 + }, + { + "epoch": 1.9, + "grad_norm": 8.93633504010739, + "learning_rate": 1.381146770872499e-07, + "loss": 0.987, + "step": 157779 + }, + { + "epoch": 1.9, + "grad_norm": 8.892412895202494, + "learning_rate": 1.3801793847820987e-07, + "loss": 1.291, + "step": 157782 + }, + { + "epoch": 1.9, + "grad_norm": 10.667790952036137, + "learning_rate": 1.3792123352456277e-07, + "loss": 1.2844, + "step": 157785 + }, + { + "epoch": 1.9, + "grad_norm": 6.563857156799222, + "learning_rate": 1.378245622266372e-07, + "loss": 1.123, + "step": 157788 + }, + { + "epoch": 1.9, + "grad_norm": 6.463735079834021, + "learning_rate": 1.3772792458476403e-07, + "loss": 1.2768, + "step": 157791 + }, + { + "epoch": 1.9, + "grad_norm": 6.050445242828697, + "learning_rate": 1.3763132059927408e-07, + "loss": 1.217, + "step": 157794 + }, + { + "epoch": 1.9, + "grad_norm": 4.60914002447541, + "learning_rate": 1.375347502704949e-07, + "loss": 1.211, + "step": 157797 + }, + { + "epoch": 1.9, + "grad_norm": 8.748795903318731, + "learning_rate": 1.3743821359875843e-07, + "loss": 0.9324, + "step": 157800 + }, + { + "epoch": 1.9, + "grad_norm": 13.603169758886684, + "learning_rate": 1.3734171058439105e-07, + "loss": 1.2989, + "step": 157803 + }, + { + "epoch": 1.9, + "grad_norm": 17.451119039640247, + "learning_rate": 1.3724524122772476e-07, + "loss": 0.8825, + "step": 157806 + }, + { + "epoch": 1.9, + "grad_norm": 5.341553475782617, + "learning_rate": 1.3714880552908927e-07, + "loss": 1.0764, + "step": 157809 + }, + { + "epoch": 1.9, + "grad_norm": 11.94740676309703, + "learning_rate": 1.37052403488811e-07, + "loss": 1.3896, + "step": 157812 + }, + { + "epoch": 1.9, + "grad_norm": 6.686178759831806, + "learning_rate": 1.3695603510721967e-07, + "loss": 1.1407, + "step": 157815 + }, + { + "epoch": 1.9, + "grad_norm": 5.985454768299984, + "learning_rate": 1.36859700384645e-07, + "loss": 1.0974, + "step": 157818 + }, + { + "epoch": 1.9, + "grad_norm": 5.606159070413969, + "learning_rate": 1.367633993214168e-07, + "loss": 1.0742, + "step": 157821 + }, + { + "epoch": 1.9, + "grad_norm": 8.328206715809092, + "learning_rate": 1.3666713191786252e-07, + "loss": 1.3132, + "step": 157824 + }, + { + "epoch": 1.9, + "grad_norm": 17.804387000359807, + "learning_rate": 1.3657089817430857e-07, + "loss": 0.9062, + "step": 157827 + }, + { + "epoch": 1.9, + "grad_norm": 16.989501411597214, + "learning_rate": 1.3647469809108693e-07, + "loss": 1.1735, + "step": 157830 + }, + { + "epoch": 1.9, + "grad_norm": 6.5822427996754564, + "learning_rate": 1.363785316685229e-07, + "loss": 0.8763, + "step": 157833 + }, + { + "epoch": 1.9, + "grad_norm": 6.295482015546303, + "learning_rate": 1.3628239890694728e-07, + "loss": 0.8359, + "step": 157836 + }, + { + "epoch": 1.9, + "grad_norm": 12.228970755999452, + "learning_rate": 1.3618629980668652e-07, + "loss": 1.3295, + "step": 157839 + }, + { + "epoch": 1.9, + "grad_norm": 29.35178095991868, + "learning_rate": 1.3609023436807033e-07, + "loss": 1.1043, + "step": 157842 + }, + { + "epoch": 1.9, + "grad_norm": 19.591134709730394, + "learning_rate": 1.3599420259142293e-07, + "loss": 1.505, + "step": 157845 + }, + { + "epoch": 1.9, + "grad_norm": 7.247077637387629, + "learning_rate": 1.3589820447707624e-07, + "loss": 1.1329, + "step": 157848 + }, + { + "epoch": 1.9, + "grad_norm": 2.2023317328373087, + "learning_rate": 1.3580224002535446e-07, + "loss": 1.0491, + "step": 157851 + }, + { + "epoch": 1.9, + "grad_norm": 10.561588852960332, + "learning_rate": 1.3570630923658733e-07, + "loss": 0.9588, + "step": 157854 + }, + { + "epoch": 1.9, + "grad_norm": 3.165775997995763, + "learning_rate": 1.3561041211110126e-07, + "loss": 0.9911, + "step": 157857 + }, + { + "epoch": 1.9, + "grad_norm": 10.294143772781975, + "learning_rate": 1.3551454864922376e-07, + "loss": 0.9175, + "step": 157860 + }, + { + "epoch": 1.9, + "grad_norm": 9.453342523642496, + "learning_rate": 1.3541871885128234e-07, + "loss": 1.274, + "step": 157863 + }, + { + "epoch": 1.9, + "grad_norm": 4.498842052762529, + "learning_rate": 1.3532292271760338e-07, + "loss": 1.123, + "step": 157866 + }, + { + "epoch": 1.9, + "grad_norm": 9.299920769779852, + "learning_rate": 1.3522716024851335e-07, + "loss": 1.0889, + "step": 157869 + }, + { + "epoch": 1.9, + "grad_norm": 3.28846884017111, + "learning_rate": 1.3513143144433972e-07, + "loss": 0.7855, + "step": 157872 + }, + { + "epoch": 1.9, + "grad_norm": 6.291939588359812, + "learning_rate": 1.350357363054111e-07, + "loss": 0.8255, + "step": 157875 + }, + { + "epoch": 1.9, + "grad_norm": 7.181319734359247, + "learning_rate": 1.3494007483205062e-07, + "loss": 0.8599, + "step": 157878 + }, + { + "epoch": 1.9, + "grad_norm": 24.87697811526338, + "learning_rate": 1.3484444702458576e-07, + "loss": 1.0445, + "step": 157881 + }, + { + "epoch": 1.9, + "grad_norm": 5.194703580237702, + "learning_rate": 1.3474885288334515e-07, + "loss": 1.0179, + "step": 157884 + }, + { + "epoch": 1.9, + "grad_norm": 3.4408205945224126, + "learning_rate": 1.3465329240865187e-07, + "loss": 1.0821, + "step": 157887 + }, + { + "epoch": 1.9, + "grad_norm": 3.5577139546852017, + "learning_rate": 1.3455776560083456e-07, + "loss": 0.8344, + "step": 157890 + }, + { + "epoch": 1.9, + "grad_norm": 9.209690953918413, + "learning_rate": 1.3446227246021625e-07, + "loss": 1.1079, + "step": 157893 + }, + { + "epoch": 1.9, + "grad_norm": 8.020827280601901, + "learning_rate": 1.3436681298712561e-07, + "loss": 1.0553, + "step": 157896 + }, + { + "epoch": 1.9, + "grad_norm": 13.429481508867845, + "learning_rate": 1.3427138718188794e-07, + "loss": 1.2507, + "step": 157899 + }, + { + "epoch": 1.9, + "grad_norm": 7.434759862885775, + "learning_rate": 1.3417599504482848e-07, + "loss": 1.1748, + "step": 157902 + }, + { + "epoch": 1.9, + "grad_norm": 3.7962539101951136, + "learning_rate": 1.3408063657627145e-07, + "loss": 1.1876, + "step": 157905 + }, + { + "epoch": 1.9, + "grad_norm": 5.4991400478929835, + "learning_rate": 1.3398531177654438e-07, + "loss": 1.0998, + "step": 157908 + }, + { + "epoch": 1.9, + "grad_norm": 13.96091824656244, + "learning_rate": 1.3389002064597145e-07, + "loss": 1.2254, + "step": 157911 + }, + { + "epoch": 1.9, + "grad_norm": 10.376140053086685, + "learning_rate": 1.3379476318487905e-07, + "loss": 1.4144, + "step": 157914 + }, + { + "epoch": 1.9, + "grad_norm": 8.075778990124212, + "learning_rate": 1.3369953939359025e-07, + "loss": 1.2847, + "step": 157917 + }, + { + "epoch": 1.9, + "grad_norm": 13.047449554093008, + "learning_rate": 1.3360434927243148e-07, + "loss": 1.0553, + "step": 157920 + }, + { + "epoch": 1.9, + "grad_norm": 7.9404756415489315, + "learning_rate": 1.3350919282172693e-07, + "loss": 1.1061, + "step": 157923 + }, + { + "epoch": 1.9, + "grad_norm": 11.345797527222256, + "learning_rate": 1.3341407004180295e-07, + "loss": 1.1458, + "step": 157926 + }, + { + "epoch": 1.9, + "grad_norm": 4.820279643160674, + "learning_rate": 1.3331898093298047e-07, + "loss": 1.1074, + "step": 157929 + }, + { + "epoch": 1.9, + "grad_norm": 9.199591654212252, + "learning_rate": 1.3322392549558804e-07, + "loss": 0.8783, + "step": 157932 + }, + { + "epoch": 1.9, + "grad_norm": 17.147850728550715, + "learning_rate": 1.3312890372994768e-07, + "loss": 1.3105, + "step": 157935 + }, + { + "epoch": 1.9, + "grad_norm": 12.574533100355566, + "learning_rate": 1.3303391563638468e-07, + "loss": 1.2386, + "step": 157938 + }, + { + "epoch": 1.9, + "grad_norm": 21.485498439515492, + "learning_rate": 1.329389612152232e-07, + "loss": 1.0748, + "step": 157941 + }, + { + "epoch": 1.9, + "grad_norm": 2.4412959220597488, + "learning_rate": 1.3284404046678634e-07, + "loss": 0.8657, + "step": 157944 + }, + { + "epoch": 1.9, + "grad_norm": 14.317139013036593, + "learning_rate": 1.3274915339139827e-07, + "loss": 1.2331, + "step": 157947 + }, + { + "epoch": 1.9, + "grad_norm": 7.669475431628444, + "learning_rate": 1.326542999893843e-07, + "loss": 1.2069, + "step": 157950 + }, + { + "epoch": 1.9, + "grad_norm": 12.377192029779486, + "learning_rate": 1.3255948026106636e-07, + "loss": 0.8732, + "step": 157953 + }, + { + "epoch": 1.9, + "grad_norm": 9.375399534245853, + "learning_rate": 1.3246469420676979e-07, + "loss": 1.0369, + "step": 157956 + }, + { + "epoch": 1.9, + "grad_norm": 17.265236013990027, + "learning_rate": 1.323699418268154e-07, + "loss": 1.1198, + "step": 157959 + }, + { + "epoch": 1.9, + "grad_norm": 11.919876027015235, + "learning_rate": 1.3227522312152853e-07, + "loss": 1.0277, + "step": 157962 + }, + { + "epoch": 1.9, + "grad_norm": 11.262218141369564, + "learning_rate": 1.3218053809123112e-07, + "loss": 0.8108, + "step": 157965 + }, + { + "epoch": 1.9, + "grad_norm": 8.988918530826158, + "learning_rate": 1.3208588673624844e-07, + "loss": 1.1983, + "step": 157968 + }, + { + "epoch": 1.9, + "grad_norm": 11.433559848315952, + "learning_rate": 1.319912690569025e-07, + "loss": 0.7428, + "step": 157971 + }, + { + "epoch": 1.9, + "grad_norm": 4.940017968921783, + "learning_rate": 1.3189668505351528e-07, + "loss": 1.1721, + "step": 157974 + }, + { + "epoch": 1.9, + "grad_norm": 13.322341767829815, + "learning_rate": 1.3180213472640978e-07, + "loss": 1.0577, + "step": 157977 + }, + { + "epoch": 1.9, + "grad_norm": 5.968233832221864, + "learning_rate": 1.3170761807590916e-07, + "loss": 1.2263, + "step": 157980 + }, + { + "epoch": 1.9, + "grad_norm": 5.752984170311468, + "learning_rate": 1.3161313510233753e-07, + "loss": 1.1083, + "step": 157983 + }, + { + "epoch": 1.9, + "grad_norm": 12.058553029165571, + "learning_rate": 1.3151868580601357e-07, + "loss": 0.9917, + "step": 157986 + }, + { + "epoch": 1.9, + "grad_norm": 5.594756761222701, + "learning_rate": 1.3142427018726255e-07, + "loss": 1.0471, + "step": 157989 + }, + { + "epoch": 1.9, + "grad_norm": 3.8066786976903098, + "learning_rate": 1.3132988824640648e-07, + "loss": 0.9729, + "step": 157992 + }, + { + "epoch": 1.9, + "grad_norm": 7.244287695007548, + "learning_rate": 1.3123553998376613e-07, + "loss": 0.8935, + "step": 157995 + }, + { + "epoch": 1.9, + "grad_norm": 3.001239655596676, + "learning_rate": 1.3114122539966468e-07, + "loss": 1.2093, + "step": 157998 + }, + { + "epoch": 1.9, + "grad_norm": 11.100401033785168, + "learning_rate": 1.310469444944229e-07, + "loss": 1.4532, + "step": 158001 + }, + { + "epoch": 1.9, + "grad_norm": 4.9376109070329495, + "learning_rate": 1.3095269726836278e-07, + "loss": 1.1459, + "step": 158004 + }, + { + "epoch": 1.9, + "grad_norm": 4.422773363458128, + "learning_rate": 1.3085848372180632e-07, + "loss": 1.3159, + "step": 158007 + }, + { + "epoch": 1.9, + "grad_norm": 11.258404747587667, + "learning_rate": 1.3076430385507543e-07, + "loss": 0.9552, + "step": 158010 + }, + { + "epoch": 1.9, + "grad_norm": 17.466387270029532, + "learning_rate": 1.30670157668491e-07, + "loss": 1.178, + "step": 158013 + }, + { + "epoch": 1.9, + "grad_norm": 4.752073933641815, + "learning_rate": 1.30576045162375e-07, + "loss": 1.3226, + "step": 158016 + }, + { + "epoch": 1.9, + "grad_norm": 10.187027564495244, + "learning_rate": 1.3048196633704714e-07, + "loss": 1.1992, + "step": 158019 + }, + { + "epoch": 1.9, + "grad_norm": 10.943693853967803, + "learning_rate": 1.303879211928305e-07, + "loss": 1.0671, + "step": 158022 + }, + { + "epoch": 1.9, + "grad_norm": 5.087630190702625, + "learning_rate": 1.3029390973004375e-07, + "loss": 1.4692, + "step": 158025 + }, + { + "epoch": 1.9, + "grad_norm": 7.1551446552998925, + "learning_rate": 1.301999319490088e-07, + "loss": 1.2366, + "step": 158028 + }, + { + "epoch": 1.9, + "grad_norm": 10.383395512195504, + "learning_rate": 1.3010598785004658e-07, + "loss": 1.2378, + "step": 158031 + }, + { + "epoch": 1.9, + "grad_norm": 10.548972160450884, + "learning_rate": 1.3001207743347899e-07, + "loss": 0.8816, + "step": 158034 + }, + { + "epoch": 1.9, + "grad_norm": 13.664078881687512, + "learning_rate": 1.299182006996236e-07, + "loss": 1.0067, + "step": 158037 + }, + { + "epoch": 1.9, + "grad_norm": 4.061154035811435, + "learning_rate": 1.298243576488023e-07, + "loss": 1.3113, + "step": 158040 + }, + { + "epoch": 1.9, + "grad_norm": 14.641663917160827, + "learning_rate": 1.297305482813349e-07, + "loss": 1.3683, + "step": 158043 + }, + { + "epoch": 1.9, + "grad_norm": 10.154663730297461, + "learning_rate": 1.2963677259754226e-07, + "loss": 1.3179, + "step": 158046 + }, + { + "epoch": 1.9, + "grad_norm": 5.166098482701807, + "learning_rate": 1.295430305977441e-07, + "loss": 0.999, + "step": 158049 + }, + { + "epoch": 1.9, + "grad_norm": 10.359276537145195, + "learning_rate": 1.2944932228226014e-07, + "loss": 1.0456, + "step": 158052 + }, + { + "epoch": 1.9, + "grad_norm": 3.9326249198677004, + "learning_rate": 1.2935564765141019e-07, + "loss": 1.1097, + "step": 158055 + }, + { + "epoch": 1.9, + "grad_norm": 6.580324620329167, + "learning_rate": 1.2926200670551503e-07, + "loss": 1.1184, + "step": 158058 + }, + { + "epoch": 1.9, + "grad_norm": 4.1710022746978, + "learning_rate": 1.2916839944489222e-07, + "loss": 0.9599, + "step": 158061 + }, + { + "epoch": 1.9, + "grad_norm": 17.77225611404024, + "learning_rate": 1.2907482586986264e-07, + "loss": 1.0681, + "step": 158064 + }, + { + "epoch": 1.9, + "grad_norm": 7.2616084897504045, + "learning_rate": 1.289812859807449e-07, + "loss": 1.1863, + "step": 158067 + }, + { + "epoch": 1.9, + "grad_norm": 12.339993221100535, + "learning_rate": 1.288877797778576e-07, + "loss": 0.7967, + "step": 158070 + }, + { + "epoch": 1.9, + "grad_norm": 6.908651384212009, + "learning_rate": 1.2879430726152164e-07, + "loss": 1.4385, + "step": 158073 + }, + { + "epoch": 1.9, + "grad_norm": 4.6020906800661185, + "learning_rate": 1.2870086843205566e-07, + "loss": 1.1655, + "step": 158076 + }, + { + "epoch": 1.9, + "grad_norm": 5.363293800650858, + "learning_rate": 1.2860746328977825e-07, + "loss": 0.8902, + "step": 158079 + }, + { + "epoch": 1.9, + "grad_norm": 20.97445847541102, + "learning_rate": 1.2851409183500695e-07, + "loss": 0.7843, + "step": 158082 + }, + { + "epoch": 1.9, + "grad_norm": 7.655679326353793, + "learning_rate": 1.2842075406806155e-07, + "loss": 1.0214, + "step": 158085 + }, + { + "epoch": 1.9, + "grad_norm": 11.491586103430485, + "learning_rate": 1.283274499892595e-07, + "loss": 1.2155, + "step": 158088 + }, + { + "epoch": 1.9, + "grad_norm": 8.071571724481661, + "learning_rate": 1.282341795989217e-07, + "loss": 0.8298, + "step": 158091 + }, + { + "epoch": 1.9, + "grad_norm": 5.707945308576712, + "learning_rate": 1.2814094289736344e-07, + "loss": 0.9901, + "step": 158094 + }, + { + "epoch": 1.9, + "grad_norm": 7.483266758847486, + "learning_rate": 1.280477398849056e-07, + "loss": 1.1827, + "step": 158097 + }, + { + "epoch": 1.9, + "grad_norm": 27.550151624221407, + "learning_rate": 1.2795457056186455e-07, + "loss": 1.2793, + "step": 158100 + }, + { + "epoch": 1.9, + "grad_norm": 22.625420438193224, + "learning_rate": 1.2786143492855785e-07, + "loss": 0.9033, + "step": 158103 + }, + { + "epoch": 1.9, + "grad_norm": 5.567522731616147, + "learning_rate": 1.2776833298530522e-07, + "loss": 0.9351, + "step": 158106 + }, + { + "epoch": 1.9, + "grad_norm": 5.034585793666787, + "learning_rate": 1.2767526473242308e-07, + "loss": 1.317, + "step": 158109 + }, + { + "epoch": 1.9, + "grad_norm": 5.036649998357804, + "learning_rate": 1.2758223017022898e-07, + "loss": 1.2564, + "step": 158112 + }, + { + "epoch": 1.9, + "grad_norm": 11.264344963650021, + "learning_rate": 1.274892292990404e-07, + "loss": 0.9454, + "step": 158115 + }, + { + "epoch": 1.9, + "grad_norm": 29.81093104653839, + "learning_rate": 1.2739626211917598e-07, + "loss": 0.6911, + "step": 158118 + }, + { + "epoch": 1.9, + "grad_norm": 2.8517814708211535, + "learning_rate": 1.2730332863095107e-07, + "loss": 1.4954, + "step": 158121 + }, + { + "epoch": 1.9, + "grad_norm": 6.964549930154368, + "learning_rate": 1.2721042883468427e-07, + "loss": 0.7715, + "step": 158124 + }, + { + "epoch": 1.9, + "grad_norm": 4.994142769172289, + "learning_rate": 1.2711756273069198e-07, + "loss": 0.8998, + "step": 158127 + }, + { + "epoch": 1.9, + "grad_norm": 4.939932542701184, + "learning_rate": 1.2702473031929065e-07, + "loss": 0.9776, + "step": 158130 + }, + { + "epoch": 1.9, + "grad_norm": 12.034713879476337, + "learning_rate": 1.2693193160079886e-07, + "loss": 1.2876, + "step": 158133 + }, + { + "epoch": 1.9, + "grad_norm": 12.549371178618417, + "learning_rate": 1.2683916657553086e-07, + "loss": 0.9936, + "step": 158136 + }, + { + "epoch": 1.9, + "grad_norm": 6.848801143645585, + "learning_rate": 1.2674643524380526e-07, + "loss": 1.4422, + "step": 158139 + }, + { + "epoch": 1.9, + "grad_norm": 11.014364549197337, + "learning_rate": 1.2665373760593845e-07, + "loss": 1.1715, + "step": 158142 + }, + { + "epoch": 1.9, + "grad_norm": 4.2691710959414, + "learning_rate": 1.265610736622458e-07, + "loss": 1.2392, + "step": 158145 + }, + { + "epoch": 1.9, + "grad_norm": 10.072577438187974, + "learning_rate": 1.2646844341304255e-07, + "loss": 1.3856, + "step": 158148 + }, + { + "epoch": 1.9, + "grad_norm": 6.139348379608723, + "learning_rate": 1.2637584685864735e-07, + "loss": 1.5566, + "step": 158151 + }, + { + "epoch": 1.9, + "grad_norm": 6.2585797852658365, + "learning_rate": 1.262832839993733e-07, + "loss": 0.9803, + "step": 158154 + }, + { + "epoch": 1.9, + "grad_norm": 38.702721544880326, + "learning_rate": 1.26190754835539e-07, + "loss": 1.0194, + "step": 158157 + }, + { + "epoch": 1.9, + "grad_norm": 11.010278645500204, + "learning_rate": 1.260982593674598e-07, + "loss": 1.2966, + "step": 158160 + }, + { + "epoch": 1.9, + "grad_norm": 6.8695396693049435, + "learning_rate": 1.2600579759544984e-07, + "loss": 1.3882, + "step": 158163 + }, + { + "epoch": 1.9, + "grad_norm": 15.003563224870202, + "learning_rate": 1.259133695198256e-07, + "loss": 1.3738, + "step": 158166 + }, + { + "epoch": 1.9, + "grad_norm": 7.883265407669087, + "learning_rate": 1.258209751409023e-07, + "loss": 1.2299, + "step": 158169 + }, + { + "epoch": 1.9, + "grad_norm": 4.94495176799931, + "learning_rate": 1.257286144589964e-07, + "loss": 0.9553, + "step": 158172 + }, + { + "epoch": 1.9, + "grad_norm": 17.53236990608565, + "learning_rate": 1.2563628747442102e-07, + "loss": 0.8643, + "step": 158175 + }, + { + "epoch": 1.9, + "grad_norm": 6.494961141795608, + "learning_rate": 1.255439941874914e-07, + "loss": 1.3008, + "step": 158178 + }, + { + "epoch": 1.9, + "grad_norm": 4.91718089765229, + "learning_rate": 1.25451734598524e-07, + "loss": 1.3414, + "step": 158181 + }, + { + "epoch": 1.9, + "grad_norm": 17.5613494540546, + "learning_rate": 1.253595087078341e-07, + "loss": 1.0414, + "step": 158184 + }, + { + "epoch": 1.9, + "grad_norm": 10.781471245372783, + "learning_rate": 1.2526731651573476e-07, + "loss": 1.4766, + "step": 158187 + }, + { + "epoch": 1.9, + "grad_norm": 3.618559812451067, + "learning_rate": 1.2517515802254132e-07, + "loss": 1.2172, + "step": 158190 + }, + { + "epoch": 1.9, + "grad_norm": 10.82393620024962, + "learning_rate": 1.2508303322856797e-07, + "loss": 1.0129, + "step": 158193 + }, + { + "epoch": 1.9, + "grad_norm": 6.5178009899954406, + "learning_rate": 1.2499094213412887e-07, + "loss": 1.1168, + "step": 158196 + }, + { + "epoch": 1.9, + "grad_norm": 7.030978512728685, + "learning_rate": 1.2489888473953937e-07, + "loss": 1.0904, + "step": 158199 + }, + { + "epoch": 1.9, + "grad_norm": 5.955544293372086, + "learning_rate": 1.2480686104511254e-07, + "loss": 1.0156, + "step": 158202 + }, + { + "epoch": 1.9, + "grad_norm": 6.244278398745711, + "learning_rate": 1.2471487105116365e-07, + "loss": 0.9017, + "step": 158205 + }, + { + "epoch": 1.9, + "grad_norm": 5.176687856722847, + "learning_rate": 1.246229147580047e-07, + "loss": 1.1323, + "step": 158208 + }, + { + "epoch": 1.9, + "grad_norm": 3.420470474836031, + "learning_rate": 1.24530992165951e-07, + "loss": 1.5721, + "step": 158211 + }, + { + "epoch": 1.9, + "grad_norm": 4.646669338089229, + "learning_rate": 1.2443910327531673e-07, + "loss": 1.0234, + "step": 158214 + }, + { + "epoch": 1.9, + "grad_norm": 12.460348549232068, + "learning_rate": 1.2434724808641385e-07, + "loss": 1.34, + "step": 158217 + }, + { + "epoch": 1.9, + "grad_norm": 9.032284740340215, + "learning_rate": 1.2425542659955658e-07, + "loss": 1.0665, + "step": 158220 + }, + { + "epoch": 1.9, + "grad_norm": 3.9207257943564238, + "learning_rate": 1.24163638815058e-07, + "loss": 1.538, + "step": 158223 + }, + { + "epoch": 1.9, + "grad_norm": 2.3652363086021655, + "learning_rate": 1.2407188473323339e-07, + "loss": 0.8373, + "step": 158226 + }, + { + "epoch": 1.9, + "grad_norm": 5.3551616900018715, + "learning_rate": 1.2398016435439254e-07, + "loss": 1.0932, + "step": 158229 + }, + { + "epoch": 1.9, + "grad_norm": 7.136812694188008, + "learning_rate": 1.238884776788507e-07, + "loss": 1.082, + "step": 158232 + }, + { + "epoch": 1.9, + "grad_norm": 13.337787601246399, + "learning_rate": 1.2379682470691988e-07, + "loss": 1.1656, + "step": 158235 + }, + { + "epoch": 1.9, + "grad_norm": 10.5257621515865, + "learning_rate": 1.2370520543891208e-07, + "loss": 1.009, + "step": 158238 + }, + { + "epoch": 1.9, + "grad_norm": 6.918097684252344, + "learning_rate": 1.2361361987514253e-07, + "loss": 1.1636, + "step": 158241 + }, + { + "epoch": 1.9, + "grad_norm": 10.604505609603969, + "learning_rate": 1.2352206801592215e-07, + "loss": 1.0815, + "step": 158244 + }, + { + "epoch": 1.9, + "grad_norm": 6.339114304818838, + "learning_rate": 1.234305498615629e-07, + "loss": 1.077, + "step": 158247 + }, + { + "epoch": 1.9, + "grad_norm": 5.338682083119085, + "learning_rate": 1.2333906541237785e-07, + "loss": 0.9189, + "step": 158250 + }, + { + "epoch": 1.9, + "grad_norm": 6.711399659720703, + "learning_rate": 1.2324761466867896e-07, + "loss": 1.4798, + "step": 158253 + }, + { + "epoch": 1.9, + "grad_norm": 4.028814965978182, + "learning_rate": 1.2315619763077823e-07, + "loss": 1.5109, + "step": 158256 + }, + { + "epoch": 1.9, + "grad_norm": 5.277575327513662, + "learning_rate": 1.2306481429898763e-07, + "loss": 1.1987, + "step": 158259 + }, + { + "epoch": 1.9, + "grad_norm": 5.290798483504826, + "learning_rate": 1.2297346467361914e-07, + "loss": 0.7497, + "step": 158262 + }, + { + "epoch": 1.9, + "grad_norm": 9.629435301523019, + "learning_rate": 1.2288214875498472e-07, + "loss": 1.5714, + "step": 158265 + }, + { + "epoch": 1.9, + "grad_norm": 9.52016929488956, + "learning_rate": 1.2279086654339633e-07, + "loss": 1.1925, + "step": 158268 + }, + { + "epoch": 1.9, + "grad_norm": 7.442367099746511, + "learning_rate": 1.2269961803916487e-07, + "loss": 1.2992, + "step": 158271 + }, + { + "epoch": 1.9, + "grad_norm": 4.785011283169662, + "learning_rate": 1.2260840324260227e-07, + "loss": 1.4499, + "step": 158274 + }, + { + "epoch": 1.9, + "grad_norm": 2.81583421051261, + "learning_rate": 1.225172221540183e-07, + "loss": 1.1957, + "step": 158277 + }, + { + "epoch": 1.9, + "grad_norm": 7.203176592876756, + "learning_rate": 1.2242607477372602e-07, + "loss": 1.0995, + "step": 158280 + }, + { + "epoch": 1.9, + "grad_norm": 4.332939812973321, + "learning_rate": 1.2233496110203413e-07, + "loss": 1.0629, + "step": 158283 + }, + { + "epoch": 1.9, + "grad_norm": 44.055231447699285, + "learning_rate": 1.2224388113925567e-07, + "loss": 0.9504, + "step": 158286 + }, + { + "epoch": 1.9, + "grad_norm": 3.175698307869193, + "learning_rate": 1.221528348857015e-07, + "loss": 0.9861, + "step": 158289 + }, + { + "epoch": 1.9, + "grad_norm": 22.187199548723644, + "learning_rate": 1.2206182234168252e-07, + "loss": 1.0882, + "step": 158292 + }, + { + "epoch": 1.9, + "grad_norm": 4.515526964475144, + "learning_rate": 1.2197084350750732e-07, + "loss": 1.0047, + "step": 158295 + }, + { + "epoch": 1.9, + "grad_norm": 32.57446507105863, + "learning_rate": 1.218798983834879e-07, + "loss": 1.1196, + "step": 158298 + }, + { + "epoch": 1.9, + "grad_norm": 12.346269647913376, + "learning_rate": 1.2178898696993402e-07, + "loss": 1.4226, + "step": 158301 + }, + { + "epoch": 1.9, + "grad_norm": 18.862739298678765, + "learning_rate": 1.2169810926715653e-07, + "loss": 0.843, + "step": 158304 + }, + { + "epoch": 1.9, + "grad_norm": 9.114559968738238, + "learning_rate": 1.216072652754652e-07, + "loss": 1.1544, + "step": 158307 + }, + { + "epoch": 1.9, + "grad_norm": 23.82777741158469, + "learning_rate": 1.2151645499516974e-07, + "loss": 1.5233, + "step": 158310 + }, + { + "epoch": 1.9, + "grad_norm": 4.221666364314088, + "learning_rate": 1.2142567842658103e-07, + "loss": 0.7378, + "step": 158313 + }, + { + "epoch": 1.9, + "grad_norm": 21.111604248987007, + "learning_rate": 1.2133493557000775e-07, + "loss": 1.2982, + "step": 158316 + }, + { + "epoch": 1.9, + "grad_norm": 7.569781582104022, + "learning_rate": 1.2124422642575962e-07, + "loss": 1.2586, + "step": 158319 + }, + { + "epoch": 1.9, + "grad_norm": 15.043334520252058, + "learning_rate": 1.211535509941475e-07, + "loss": 1.3287, + "step": 158322 + }, + { + "epoch": 1.9, + "grad_norm": 6.279098836251472, + "learning_rate": 1.2106290927548004e-07, + "loss": 1.284, + "step": 158325 + }, + { + "epoch": 1.9, + "grad_norm": 6.924535394270279, + "learning_rate": 1.2097230127006588e-07, + "loss": 1.4413, + "step": 158328 + }, + { + "epoch": 1.9, + "grad_norm": 6.192735983882807, + "learning_rate": 1.2088172697821476e-07, + "loss": 1.3965, + "step": 158331 + }, + { + "epoch": 1.9, + "grad_norm": 16.23728788196575, + "learning_rate": 1.2079118640023646e-07, + "loss": 1.0825, + "step": 158334 + }, + { + "epoch": 1.9, + "grad_norm": 2.638230961608223, + "learning_rate": 1.2070067953643961e-07, + "loss": 1.1328, + "step": 158337 + }, + { + "epoch": 1.9, + "grad_norm": 6.964888606365074, + "learning_rate": 1.2061020638713172e-07, + "loss": 0.9446, + "step": 158340 + }, + { + "epoch": 1.9, + "grad_norm": 6.573578392283627, + "learning_rate": 1.2051976695262367e-07, + "loss": 0.7369, + "step": 158343 + }, + { + "epoch": 1.9, + "grad_norm": 17.31275375462294, + "learning_rate": 1.204293612332219e-07, + "loss": 1.7656, + "step": 158346 + }, + { + "epoch": 1.9, + "grad_norm": 6.5204972326567825, + "learning_rate": 1.2033898922923725e-07, + "loss": 1.0972, + "step": 158349 + }, + { + "epoch": 1.9, + "grad_norm": 2.9175826267780147, + "learning_rate": 1.2024865094097727e-07, + "loss": 0.8224, + "step": 158352 + }, + { + "epoch": 1.9, + "grad_norm": 3.1708249808685967, + "learning_rate": 1.2015834636874946e-07, + "loss": 1.1475, + "step": 158355 + }, + { + "epoch": 1.9, + "grad_norm": 5.386966809028905, + "learning_rate": 1.2006807551286248e-07, + "loss": 0.8372, + "step": 158358 + }, + { + "epoch": 1.9, + "grad_norm": 3.530293715849605, + "learning_rate": 1.1997783837362386e-07, + "loss": 0.8804, + "step": 158361 + }, + { + "epoch": 1.9, + "grad_norm": 6.553552195226092, + "learning_rate": 1.1988763495134227e-07, + "loss": 0.8192, + "step": 158364 + }, + { + "epoch": 1.9, + "grad_norm": 3.8503002428982533, + "learning_rate": 1.197974652463263e-07, + "loss": 0.7676, + "step": 158367 + }, + { + "epoch": 1.9, + "grad_norm": 1.6074920223754987, + "learning_rate": 1.1970732925888128e-07, + "loss": 1.4478, + "step": 158370 + }, + { + "epoch": 1.9, + "grad_norm": 8.300709627236213, + "learning_rate": 1.19617226989317e-07, + "loss": 1.3127, + "step": 158373 + }, + { + "epoch": 1.9, + "grad_norm": 6.779798155284102, + "learning_rate": 1.1952715843794094e-07, + "loss": 1.0665, + "step": 158376 + }, + { + "epoch": 1.9, + "grad_norm": 13.413067153450244, + "learning_rate": 1.1943712360505955e-07, + "loss": 1.4125, + "step": 158379 + }, + { + "epoch": 1.9, + "grad_norm": 11.353864107822075, + "learning_rate": 1.1934712249098035e-07, + "loss": 0.9538, + "step": 158382 + }, + { + "epoch": 1.9, + "grad_norm": 9.272964687644441, + "learning_rate": 1.1925715509600978e-07, + "loss": 1.0052, + "step": 158385 + }, + { + "epoch": 1.9, + "grad_norm": 14.295413035546455, + "learning_rate": 1.1916722142045534e-07, + "loss": 1.0308, + "step": 158388 + }, + { + "epoch": 1.9, + "grad_norm": 3.8622818075744516, + "learning_rate": 1.1907732146462459e-07, + "loss": 1.4594, + "step": 158391 + }, + { + "epoch": 1.9, + "grad_norm": 14.651528799242259, + "learning_rate": 1.1898745522882393e-07, + "loss": 1.1442, + "step": 158394 + }, + { + "epoch": 1.9, + "grad_norm": 6.405305806240826, + "learning_rate": 1.1889762271335981e-07, + "loss": 0.9228, + "step": 158397 + }, + { + "epoch": 1.9, + "grad_norm": 7.4592432763504375, + "learning_rate": 1.1880782391853863e-07, + "loss": 0.9768, + "step": 158400 + }, + { + "epoch": 1.9, + "grad_norm": 7.4380011069783345, + "learning_rate": 1.1871805884466792e-07, + "loss": 1.036, + "step": 158403 + }, + { + "epoch": 1.9, + "grad_norm": 7.985578953988302, + "learning_rate": 1.18628327492053e-07, + "loss": 0.9948, + "step": 158406 + }, + { + "epoch": 1.9, + "grad_norm": 19.29135320068501, + "learning_rate": 1.1853862986099917e-07, + "loss": 1.1224, + "step": 158409 + }, + { + "epoch": 1.9, + "grad_norm": 6.425318582204941, + "learning_rate": 1.184489659518151e-07, + "loss": 0.912, + "step": 158412 + }, + { + "epoch": 1.9, + "grad_norm": 8.715408014503822, + "learning_rate": 1.1835933576480497e-07, + "loss": 1.0884, + "step": 158415 + }, + { + "epoch": 1.9, + "grad_norm": 4.02766836221462, + "learning_rate": 1.1826973930027518e-07, + "loss": 0.9824, + "step": 158418 + }, + { + "epoch": 1.9, + "grad_norm": 10.493500936097279, + "learning_rate": 1.181801765585322e-07, + "loss": 1.2293, + "step": 158421 + }, + { + "epoch": 1.91, + "grad_norm": 7.505020904339044, + "learning_rate": 1.1809064753987909e-07, + "loss": 0.9927, + "step": 158424 + }, + { + "epoch": 1.91, + "grad_norm": 2.720860170521027, + "learning_rate": 1.1800115224462339e-07, + "loss": 0.9849, + "step": 158427 + }, + { + "epoch": 1.91, + "grad_norm": 11.713289322589565, + "learning_rate": 1.1791169067307151e-07, + "loss": 1.1673, + "step": 158430 + }, + { + "epoch": 1.91, + "grad_norm": 12.15861317860339, + "learning_rate": 1.1782226282552656e-07, + "loss": 1.0249, + "step": 158433 + }, + { + "epoch": 1.91, + "grad_norm": 15.14785658833668, + "learning_rate": 1.1773286870229495e-07, + "loss": 1.1577, + "step": 158436 + }, + { + "epoch": 1.91, + "grad_norm": 11.28456409402294, + "learning_rate": 1.1764350830368199e-07, + "loss": 0.7389, + "step": 158439 + }, + { + "epoch": 1.91, + "grad_norm": 8.161790734562361, + "learning_rate": 1.1755418162999299e-07, + "loss": 1.0711, + "step": 158442 + }, + { + "epoch": 1.91, + "grad_norm": 20.727276180559606, + "learning_rate": 1.1746488868152994e-07, + "loss": 1.0791, + "step": 158445 + }, + { + "epoch": 1.91, + "grad_norm": 2.754377478035081, + "learning_rate": 1.1737562945860038e-07, + "loss": 1.0163, + "step": 158448 + }, + { + "epoch": 1.91, + "grad_norm": 8.638052785940106, + "learning_rate": 1.1728640396150736e-07, + "loss": 1.5164, + "step": 158451 + }, + { + "epoch": 1.91, + "grad_norm": 7.852573826220316, + "learning_rate": 1.1719721219055624e-07, + "loss": 1.0272, + "step": 158454 + }, + { + "epoch": 1.91, + "grad_norm": 3.4095601687394224, + "learning_rate": 1.171080541460523e-07, + "loss": 1.2433, + "step": 158457 + }, + { + "epoch": 1.91, + "grad_norm": 4.390462347505653, + "learning_rate": 1.1701892982829755e-07, + "loss": 1.0404, + "step": 158460 + }, + { + "epoch": 1.91, + "grad_norm": 4.222892808632444, + "learning_rate": 1.1692983923759948e-07, + "loss": 1.1658, + "step": 158463 + }, + { + "epoch": 1.91, + "grad_norm": 9.375643206431512, + "learning_rate": 1.1684078237425789e-07, + "loss": 0.9775, + "step": 158466 + }, + { + "epoch": 1.91, + "grad_norm": 4.724900536362365, + "learning_rate": 1.1675175923858029e-07, + "loss": 0.8727, + "step": 158469 + }, + { + "epoch": 1.91, + "grad_norm": 7.341886611939048, + "learning_rate": 1.1666276983086755e-07, + "loss": 1.3197, + "step": 158472 + }, + { + "epoch": 1.91, + "grad_norm": 4.068045525404852, + "learning_rate": 1.1657381415142499e-07, + "loss": 0.956, + "step": 158475 + }, + { + "epoch": 1.91, + "grad_norm": 10.275583241447974, + "learning_rate": 1.1648489220055681e-07, + "loss": 1.0888, + "step": 158478 + }, + { + "epoch": 1.91, + "grad_norm": 14.649608329700124, + "learning_rate": 1.1639600397856499e-07, + "loss": 1.4511, + "step": 158481 + }, + { + "epoch": 1.91, + "grad_norm": 4.960632845777964, + "learning_rate": 1.1630714948575483e-07, + "loss": 0.9902, + "step": 158484 + }, + { + "epoch": 1.91, + "grad_norm": 6.829374327827272, + "learning_rate": 1.1621832872242721e-07, + "loss": 1.3412, + "step": 158487 + }, + { + "epoch": 1.91, + "grad_norm": 4.977224076607091, + "learning_rate": 1.1612954168888635e-07, + "loss": 0.9711, + "step": 158490 + }, + { + "epoch": 1.91, + "grad_norm": 27.95368909978014, + "learning_rate": 1.160407883854342e-07, + "loss": 1.2362, + "step": 158493 + }, + { + "epoch": 1.91, + "grad_norm": 8.416692634260224, + "learning_rate": 1.1595206881237719e-07, + "loss": 1.0841, + "step": 158496 + }, + { + "epoch": 1.91, + "grad_norm": 4.23962367609325, + "learning_rate": 1.1586338297001287e-07, + "loss": 0.9291, + "step": 158499 + }, + { + "epoch": 1.91, + "grad_norm": 8.581925667233058, + "learning_rate": 1.1577473085864765e-07, + "loss": 1.2682, + "step": 158502 + }, + { + "epoch": 1.91, + "grad_norm": 11.201509394442962, + "learning_rate": 1.1568611247858352e-07, + "loss": 0.7623, + "step": 158505 + }, + { + "epoch": 1.91, + "grad_norm": 9.672862638704169, + "learning_rate": 1.1559752783012246e-07, + "loss": 0.9806, + "step": 158508 + }, + { + "epoch": 1.91, + "grad_norm": 8.493004267297088, + "learning_rate": 1.1550897691356644e-07, + "loss": 1.0377, + "step": 158511 + }, + { + "epoch": 1.91, + "grad_norm": 9.357900066434938, + "learning_rate": 1.1542045972921745e-07, + "loss": 1.2598, + "step": 158514 + }, + { + "epoch": 1.91, + "grad_norm": 2.62712823725524, + "learning_rate": 1.1533197627737858e-07, + "loss": 1.1282, + "step": 158517 + }, + { + "epoch": 1.91, + "grad_norm": 9.057784230644137, + "learning_rate": 1.152435265583507e-07, + "loss": 1.1804, + "step": 158520 + }, + { + "epoch": 1.91, + "grad_norm": 13.32959647064444, + "learning_rate": 1.1515511057243689e-07, + "loss": 1.0464, + "step": 158523 + }, + { + "epoch": 1.91, + "grad_norm": 8.626745295492025, + "learning_rate": 1.1506672831993693e-07, + "loss": 0.9536, + "step": 158526 + }, + { + "epoch": 1.91, + "grad_norm": 5.381040872376095, + "learning_rate": 1.14978379801155e-07, + "loss": 0.7671, + "step": 158529 + }, + { + "epoch": 1.91, + "grad_norm": 2.5341776779088945, + "learning_rate": 1.1489006501639088e-07, + "loss": 0.7825, + "step": 158532 + }, + { + "epoch": 1.91, + "grad_norm": 10.032251971270888, + "learning_rate": 1.1480178396594543e-07, + "loss": 1.0724, + "step": 158535 + }, + { + "epoch": 1.91, + "grad_norm": 9.391549847785274, + "learning_rate": 1.1471353665012286e-07, + "loss": 0.7172, + "step": 158538 + }, + { + "epoch": 1.91, + "grad_norm": 6.03087236792439, + "learning_rate": 1.146253230692207e-07, + "loss": 1.2258, + "step": 158541 + }, + { + "epoch": 1.91, + "grad_norm": 5.740491115358925, + "learning_rate": 1.1453714322354203e-07, + "loss": 0.8906, + "step": 158544 + }, + { + "epoch": 1.91, + "grad_norm": 11.298830534207223, + "learning_rate": 1.1444899711338774e-07, + "loss": 1.1052, + "step": 158547 + }, + { + "epoch": 1.91, + "grad_norm": 8.339359809123962, + "learning_rate": 1.143608847390576e-07, + "loss": 1.1843, + "step": 158550 + }, + { + "epoch": 1.91, + "grad_norm": 31.592263674988065, + "learning_rate": 1.1427280610085356e-07, + "loss": 1.1624, + "step": 158553 + }, + { + "epoch": 1.91, + "grad_norm": 11.409870170934793, + "learning_rate": 1.141847611990754e-07, + "loss": 0.9611, + "step": 158556 + }, + { + "epoch": 1.91, + "grad_norm": 8.228628387465422, + "learning_rate": 1.1409675003402288e-07, + "loss": 0.8258, + "step": 158559 + }, + { + "epoch": 1.91, + "grad_norm": 8.351079041571777, + "learning_rate": 1.1400877260599796e-07, + "loss": 1.2741, + "step": 158562 + }, + { + "epoch": 1.91, + "grad_norm": 44.309132374751904, + "learning_rate": 1.1392082891530043e-07, + "loss": 1.0663, + "step": 158565 + }, + { + "epoch": 1.91, + "grad_norm": 15.791461383228185, + "learning_rate": 1.1383291896223003e-07, + "loss": 1.0962, + "step": 158568 + }, + { + "epoch": 1.91, + "grad_norm": 6.283800151838814, + "learning_rate": 1.1374504274708764e-07, + "loss": 1.1234, + "step": 158571 + }, + { + "epoch": 1.91, + "grad_norm": 4.7221495390244765, + "learning_rate": 1.1365720027017191e-07, + "loss": 1.2823, + "step": 158574 + }, + { + "epoch": 1.91, + "grad_norm": 7.512191815070999, + "learning_rate": 1.1356939153178259e-07, + "loss": 0.8708, + "step": 158577 + }, + { + "epoch": 1.91, + "grad_norm": 8.178105187011633, + "learning_rate": 1.1348161653222056e-07, + "loss": 1.3107, + "step": 158580 + }, + { + "epoch": 1.91, + "grad_norm": 4.900563670988698, + "learning_rate": 1.1339387527178336e-07, + "loss": 1.2411, + "step": 158583 + }, + { + "epoch": 1.91, + "grad_norm": 2.7873705622329186, + "learning_rate": 1.1330616775077297e-07, + "loss": 1.3062, + "step": 158586 + }, + { + "epoch": 1.91, + "grad_norm": 8.768967330087303, + "learning_rate": 1.1321849396948803e-07, + "loss": 1.1135, + "step": 158589 + }, + { + "epoch": 1.91, + "grad_norm": 27.41558114351067, + "learning_rate": 1.1313085392822609e-07, + "loss": 1.1903, + "step": 158592 + }, + { + "epoch": 1.91, + "grad_norm": 5.775425985397212, + "learning_rate": 1.1304324762728691e-07, + "loss": 1.1004, + "step": 158595 + }, + { + "epoch": 1.91, + "grad_norm": 5.1172159235272066, + "learning_rate": 1.1295567506697024e-07, + "loss": 0.9464, + "step": 158598 + }, + { + "epoch": 1.91, + "grad_norm": 17.257776738917393, + "learning_rate": 1.1286813624757475e-07, + "loss": 1.3472, + "step": 158601 + }, + { + "epoch": 1.91, + "grad_norm": 7.739736644906658, + "learning_rate": 1.1278063116940019e-07, + "loss": 1.405, + "step": 158604 + }, + { + "epoch": 1.91, + "grad_norm": 12.697783193597578, + "learning_rate": 1.1269315983274187e-07, + "loss": 1.1146, + "step": 158607 + }, + { + "epoch": 1.91, + "grad_norm": 3.8985148670369707, + "learning_rate": 1.1260572223790067e-07, + "loss": 1.0907, + "step": 158610 + }, + { + "epoch": 1.91, + "grad_norm": 27.642096825461437, + "learning_rate": 1.1251831838517635e-07, + "loss": 1.2178, + "step": 158613 + }, + { + "epoch": 1.91, + "grad_norm": 8.263449676772405, + "learning_rate": 1.1243094827486422e-07, + "loss": 1.0536, + "step": 158616 + }, + { + "epoch": 1.91, + "grad_norm": 6.60883372894007, + "learning_rate": 1.1234361190726518e-07, + "loss": 1.3599, + "step": 158619 + }, + { + "epoch": 1.91, + "grad_norm": 8.328370203844557, + "learning_rate": 1.122563092826745e-07, + "loss": 1.3386, + "step": 158622 + }, + { + "epoch": 1.91, + "grad_norm": 4.970539286481001, + "learning_rate": 1.1216904040139088e-07, + "loss": 1.4268, + "step": 158625 + }, + { + "epoch": 1.91, + "grad_norm": 8.232623854489706, + "learning_rate": 1.1208180526371404e-07, + "loss": 0.9091, + "step": 158628 + }, + { + "epoch": 1.91, + "grad_norm": 15.297720426233315, + "learning_rate": 1.1199460386994043e-07, + "loss": 1.3635, + "step": 158631 + }, + { + "epoch": 1.91, + "grad_norm": 5.18430056616973, + "learning_rate": 1.1190743622036759e-07, + "loss": 1.0746, + "step": 158634 + }, + { + "epoch": 1.91, + "grad_norm": 6.243789386580685, + "learning_rate": 1.1182030231529195e-07, + "loss": 1.0078, + "step": 158637 + }, + { + "epoch": 1.91, + "grad_norm": 5.366212635089327, + "learning_rate": 1.1173320215501215e-07, + "loss": 1.3132, + "step": 158640 + }, + { + "epoch": 1.91, + "grad_norm": 9.305164941982927, + "learning_rate": 1.1164613573982574e-07, + "loss": 1.0138, + "step": 158643 + }, + { + "epoch": 1.91, + "grad_norm": 4.88528563457542, + "learning_rate": 1.1155910307002915e-07, + "loss": 1.2736, + "step": 158646 + }, + { + "epoch": 1.91, + "grad_norm": 2.6639635536163766, + "learning_rate": 1.1147210414591991e-07, + "loss": 1.2241, + "step": 158649 + }, + { + "epoch": 1.91, + "grad_norm": 6.519715524283139, + "learning_rate": 1.1138513896779335e-07, + "loss": 0.7574, + "step": 158652 + }, + { + "epoch": 1.91, + "grad_norm": 3.873597824347506, + "learning_rate": 1.1129820753594922e-07, + "loss": 1.1817, + "step": 158655 + }, + { + "epoch": 1.91, + "grad_norm": 13.689441335692113, + "learning_rate": 1.1121130985068174e-07, + "loss": 1.0216, + "step": 158658 + }, + { + "epoch": 1.91, + "grad_norm": 12.45000946894699, + "learning_rate": 1.1112444591228733e-07, + "loss": 1.1901, + "step": 158661 + }, + { + "epoch": 1.91, + "grad_norm": 33.85654829737153, + "learning_rate": 1.1103761572106353e-07, + "loss": 1.2261, + "step": 158664 + }, + { + "epoch": 1.91, + "grad_norm": 4.8904369572479585, + "learning_rate": 1.1095081927730677e-07, + "loss": 1.2106, + "step": 158667 + }, + { + "epoch": 1.91, + "grad_norm": 6.725042703005547, + "learning_rate": 1.1086405658131238e-07, + "loss": 1.5706, + "step": 158670 + }, + { + "epoch": 1.91, + "grad_norm": 7.599688086356152, + "learning_rate": 1.107773276333779e-07, + "loss": 1.051, + "step": 158673 + }, + { + "epoch": 1.91, + "grad_norm": 8.622617408494348, + "learning_rate": 1.1069063243379752e-07, + "loss": 1.257, + "step": 158676 + }, + { + "epoch": 1.91, + "grad_norm": 5.701443292139172, + "learning_rate": 1.1060397098286768e-07, + "loss": 1.1105, + "step": 158679 + }, + { + "epoch": 1.91, + "grad_norm": 5.590761119982152, + "learning_rate": 1.1051734328088482e-07, + "loss": 1.0264, + "step": 158682 + }, + { + "epoch": 1.91, + "grad_norm": 14.631950996404054, + "learning_rate": 1.1043074932814424e-07, + "loss": 0.9334, + "step": 158685 + }, + { + "epoch": 1.91, + "grad_norm": 4.786100305782575, + "learning_rate": 1.1034418912494127e-07, + "loss": 0.8789, + "step": 158688 + }, + { + "epoch": 1.91, + "grad_norm": 7.588928686967559, + "learning_rate": 1.1025766267157012e-07, + "loss": 1.2606, + "step": 158691 + }, + { + "epoch": 1.91, + "grad_norm": 9.83548390629049, + "learning_rate": 1.1017116996832834e-07, + "loss": 1.4188, + "step": 158694 + }, + { + "epoch": 1.91, + "grad_norm": 3.870456294821036, + "learning_rate": 1.100847110155101e-07, + "loss": 1.4954, + "step": 158697 + }, + { + "epoch": 1.91, + "grad_norm": 8.398526640538547, + "learning_rate": 1.0999828581341076e-07, + "loss": 0.9713, + "step": 158700 + }, + { + "epoch": 1.91, + "grad_norm": 7.5885275214281025, + "learning_rate": 1.0991189436232452e-07, + "loss": 1.2599, + "step": 158703 + }, + { + "epoch": 1.91, + "grad_norm": 5.3349529995470775, + "learning_rate": 1.098255366625467e-07, + "loss": 0.9576, + "step": 158706 + }, + { + "epoch": 1.91, + "grad_norm": 4.426066966473574, + "learning_rate": 1.097392127143715e-07, + "loss": 0.961, + "step": 158709 + }, + { + "epoch": 1.91, + "grad_norm": 15.655433866315187, + "learning_rate": 1.0965292251809423e-07, + "loss": 1.0951, + "step": 158712 + }, + { + "epoch": 1.91, + "grad_norm": 7.214698238989301, + "learning_rate": 1.0956666607400912e-07, + "loss": 1.1132, + "step": 158715 + }, + { + "epoch": 1.91, + "grad_norm": 7.612521083636674, + "learning_rate": 1.0948044338241038e-07, + "loss": 1.076, + "step": 158718 + }, + { + "epoch": 1.91, + "grad_norm": 6.583771635698712, + "learning_rate": 1.093942544435922e-07, + "loss": 1.0541, + "step": 158721 + }, + { + "epoch": 1.91, + "grad_norm": 5.276254750512953, + "learning_rate": 1.093080992578488e-07, + "loss": 1.0004, + "step": 158724 + }, + { + "epoch": 1.91, + "grad_norm": 15.54476134980138, + "learning_rate": 1.0922197782547439e-07, + "loss": 1.1628, + "step": 158727 + }, + { + "epoch": 1.91, + "grad_norm": 8.222973261415904, + "learning_rate": 1.0913589014676207e-07, + "loss": 1.0204, + "step": 158730 + }, + { + "epoch": 1.91, + "grad_norm": 50.9830665050702, + "learning_rate": 1.0904983622200715e-07, + "loss": 0.9906, + "step": 158733 + }, + { + "epoch": 1.91, + "grad_norm": 29.095420186695105, + "learning_rate": 1.0896381605150163e-07, + "loss": 1.2979, + "step": 158736 + }, + { + "epoch": 1.91, + "grad_norm": 13.813359260668273, + "learning_rate": 1.0887782963554083e-07, + "loss": 0.9986, + "step": 158739 + }, + { + "epoch": 1.91, + "grad_norm": 5.264488326878939, + "learning_rate": 1.0879187697441785e-07, + "loss": 1.2887, + "step": 158742 + }, + { + "epoch": 1.91, + "grad_norm": 6.255211495594911, + "learning_rate": 1.0870595806842355e-07, + "loss": 1.0232, + "step": 158745 + }, + { + "epoch": 1.91, + "grad_norm": 11.437742718731732, + "learning_rate": 1.0862007291785327e-07, + "loss": 1.0513, + "step": 158748 + }, + { + "epoch": 1.91, + "grad_norm": 15.991499225404482, + "learning_rate": 1.0853422152300009e-07, + "loss": 0.9961, + "step": 158751 + }, + { + "epoch": 1.91, + "grad_norm": 11.234388371905641, + "learning_rate": 1.0844840388415823e-07, + "loss": 1.3748, + "step": 158754 + }, + { + "epoch": 1.91, + "grad_norm": 2.654157603609523, + "learning_rate": 1.0836262000161746e-07, + "loss": 0.8268, + "step": 158757 + }, + { + "epoch": 1.91, + "grad_norm": 3.0420091750494445, + "learning_rate": 1.082768698756731e-07, + "loss": 1.0955, + "step": 158760 + }, + { + "epoch": 1.91, + "grad_norm": 10.658840198230145, + "learning_rate": 1.0819115350661491e-07, + "loss": 1.219, + "step": 158763 + }, + { + "epoch": 1.91, + "grad_norm": 3.098032681433067, + "learning_rate": 1.0810547089473932e-07, + "loss": 1.1377, + "step": 158766 + }, + { + "epoch": 1.91, + "grad_norm": 7.604521972827848, + "learning_rate": 1.0801982204033612e-07, + "loss": 1.4541, + "step": 158769 + }, + { + "epoch": 1.91, + "grad_norm": 15.913868016245415, + "learning_rate": 1.0793420694369727e-07, + "loss": 1.0129, + "step": 158772 + }, + { + "epoch": 1.91, + "grad_norm": 5.792889653030156, + "learning_rate": 1.07848625605117e-07, + "loss": 1.1033, + "step": 158775 + }, + { + "epoch": 1.91, + "grad_norm": 9.409943791774982, + "learning_rate": 1.0776307802488506e-07, + "loss": 0.868, + "step": 158778 + }, + { + "epoch": 1.91, + "grad_norm": 6.242943748237597, + "learning_rate": 1.0767756420329568e-07, + "loss": 1.1585, + "step": 158781 + }, + { + "epoch": 1.91, + "grad_norm": 13.497063794522454, + "learning_rate": 1.0759208414063971e-07, + "loss": 0.9387, + "step": 158784 + }, + { + "epoch": 1.91, + "grad_norm": 3.134840674776052, + "learning_rate": 1.0750663783720804e-07, + "loss": 1.371, + "step": 158787 + }, + { + "epoch": 1.91, + "grad_norm": 3.425030573370812, + "learning_rate": 1.0742122529329269e-07, + "loss": 0.9508, + "step": 158790 + }, + { + "epoch": 1.91, + "grad_norm": 8.71092594802539, + "learning_rate": 1.0733584650918671e-07, + "loss": 1.3973, + "step": 158793 + }, + { + "epoch": 1.91, + "grad_norm": 3.862597546933633, + "learning_rate": 1.0725050148517879e-07, + "loss": 1.0409, + "step": 158796 + }, + { + "epoch": 1.91, + "grad_norm": 12.502044042260438, + "learning_rate": 1.0716519022156202e-07, + "loss": 0.9824, + "step": 158799 + }, + { + "epoch": 1.91, + "grad_norm": 6.196198077661725, + "learning_rate": 1.0707991271862839e-07, + "loss": 1.2965, + "step": 158802 + }, + { + "epoch": 1.91, + "grad_norm": 23.86631046982734, + "learning_rate": 1.0699466897666544e-07, + "loss": 1.3172, + "step": 158805 + }, + { + "epoch": 1.91, + "grad_norm": 11.294891234473782, + "learning_rate": 1.0690945899596739e-07, + "loss": 1.2332, + "step": 158808 + }, + { + "epoch": 1.91, + "grad_norm": 4.315786726485672, + "learning_rate": 1.06824282776824e-07, + "loss": 0.7847, + "step": 158811 + }, + { + "epoch": 1.91, + "grad_norm": 5.683426464183485, + "learning_rate": 1.0673914031952504e-07, + "loss": 1.0734, + "step": 158814 + }, + { + "epoch": 1.91, + "grad_norm": 11.792044029113667, + "learning_rate": 1.066540316243625e-07, + "loss": 0.67, + "step": 158817 + }, + { + "epoch": 1.91, + "grad_norm": 5.005845316104971, + "learning_rate": 1.0656895669162615e-07, + "loss": 1.2206, + "step": 158820 + }, + { + "epoch": 1.91, + "grad_norm": 6.556501392579242, + "learning_rate": 1.0648391552160575e-07, + "loss": 1.4071, + "step": 158823 + }, + { + "epoch": 1.91, + "grad_norm": 28.15509210010751, + "learning_rate": 1.063989081145933e-07, + "loss": 0.8346, + "step": 158826 + }, + { + "epoch": 1.91, + "grad_norm": 15.200196923561744, + "learning_rate": 1.0631393447087634e-07, + "loss": 1.1641, + "step": 158829 + }, + { + "epoch": 1.91, + "grad_norm": 11.107896532524526, + "learning_rate": 1.0622899459074687e-07, + "loss": 1.1538, + "step": 158832 + }, + { + "epoch": 1.91, + "grad_norm": 3.8007689026885885, + "learning_rate": 1.0614408847449464e-07, + "loss": 0.9746, + "step": 158835 + }, + { + "epoch": 1.91, + "grad_norm": 10.489868829757256, + "learning_rate": 1.0605921612240833e-07, + "loss": 0.9161, + "step": 158838 + }, + { + "epoch": 1.91, + "grad_norm": 3.830807074231819, + "learning_rate": 1.0597437753477769e-07, + "loss": 1.1137, + "step": 158841 + }, + { + "epoch": 1.91, + "grad_norm": 6.894422725108204, + "learning_rate": 1.0588957271189359e-07, + "loss": 1.0449, + "step": 158844 + }, + { + "epoch": 1.91, + "grad_norm": 7.657970986573623, + "learning_rate": 1.058048016540436e-07, + "loss": 1.2887, + "step": 158847 + }, + { + "epoch": 1.91, + "grad_norm": 8.557705437069007, + "learning_rate": 1.0572006436151971e-07, + "loss": 1.1572, + "step": 158850 + }, + { + "epoch": 1.91, + "grad_norm": 1.7888891946780996, + "learning_rate": 1.0563536083460724e-07, + "loss": 1.2621, + "step": 158853 + }, + { + "epoch": 1.91, + "grad_norm": 5.192697632756005, + "learning_rate": 1.0555069107359817e-07, + "loss": 1.2929, + "step": 158856 + }, + { + "epoch": 1.91, + "grad_norm": 9.42798701205289, + "learning_rate": 1.0546605507878116e-07, + "loss": 1.0245, + "step": 158859 + }, + { + "epoch": 1.91, + "grad_norm": 9.670847936776216, + "learning_rate": 1.0538145285044377e-07, + "loss": 1.3, + "step": 158862 + }, + { + "epoch": 1.91, + "grad_norm": 4.831330445684541, + "learning_rate": 1.0529688438887575e-07, + "loss": 1.39, + "step": 158865 + }, + { + "epoch": 1.91, + "grad_norm": 5.844155081036212, + "learning_rate": 1.0521234969436578e-07, + "loss": 1.3226, + "step": 158868 + }, + { + "epoch": 1.91, + "grad_norm": 4.8259019739362055, + "learning_rate": 1.051278487672014e-07, + "loss": 1.2455, + "step": 158871 + }, + { + "epoch": 1.91, + "grad_norm": 23.969363711551942, + "learning_rate": 1.0504338160767236e-07, + "loss": 1.0392, + "step": 158874 + }, + { + "epoch": 1.91, + "grad_norm": 6.625251237557184, + "learning_rate": 1.0495894821606511e-07, + "loss": 1.2684, + "step": 158877 + }, + { + "epoch": 1.91, + "grad_norm": 11.36735067503679, + "learning_rate": 1.0487454859266832e-07, + "loss": 0.8568, + "step": 158880 + }, + { + "epoch": 1.91, + "grad_norm": 13.914788584362336, + "learning_rate": 1.0479018273777174e-07, + "loss": 1.2129, + "step": 158883 + }, + { + "epoch": 1.91, + "grad_norm": 8.358322057777091, + "learning_rate": 1.0470585065166072e-07, + "loss": 1.1234, + "step": 158886 + }, + { + "epoch": 1.91, + "grad_norm": 9.418831631694134, + "learning_rate": 1.0462155233462612e-07, + "loss": 1.2118, + "step": 158889 + }, + { + "epoch": 1.91, + "grad_norm": 4.4251030522034736, + "learning_rate": 1.0453728778695327e-07, + "loss": 0.9759, + "step": 158892 + }, + { + "epoch": 1.91, + "grad_norm": 9.11775903965153, + "learning_rate": 1.0445305700892971e-07, + "loss": 0.8754, + "step": 158895 + }, + { + "epoch": 1.91, + "grad_norm": 6.653100700218861, + "learning_rate": 1.0436886000084412e-07, + "loss": 1.3292, + "step": 158898 + }, + { + "epoch": 1.91, + "grad_norm": 5.65586372001258, + "learning_rate": 1.0428469676298292e-07, + "loss": 1.3847, + "step": 158901 + }, + { + "epoch": 1.91, + "grad_norm": 9.827652587311418, + "learning_rate": 1.0420056729563366e-07, + "loss": 1.2432, + "step": 158904 + }, + { + "epoch": 1.91, + "grad_norm": 4.382075929795181, + "learning_rate": 1.0411647159908278e-07, + "loss": 1.1437, + "step": 158907 + }, + { + "epoch": 1.91, + "grad_norm": 5.610630729867393, + "learning_rate": 1.0403240967361894e-07, + "loss": 0.8398, + "step": 158910 + }, + { + "epoch": 1.91, + "grad_norm": 3.044509154144492, + "learning_rate": 1.0394838151952746e-07, + "loss": 1.1898, + "step": 158913 + }, + { + "epoch": 1.91, + "grad_norm": 5.114671081137507, + "learning_rate": 1.038643871370959e-07, + "loss": 0.9176, + "step": 158916 + }, + { + "epoch": 1.91, + "grad_norm": 3.348880412979466, + "learning_rate": 1.0378042652661068e-07, + "loss": 1.2471, + "step": 158919 + }, + { + "epoch": 1.91, + "grad_norm": 5.734458315547328, + "learning_rate": 1.0369649968835827e-07, + "loss": 0.9315, + "step": 158922 + }, + { + "epoch": 1.91, + "grad_norm": 5.843901292128432, + "learning_rate": 1.0361260662262396e-07, + "loss": 1.0108, + "step": 158925 + }, + { + "epoch": 1.91, + "grad_norm": 21.663556428859724, + "learning_rate": 1.0352874732969643e-07, + "loss": 0.9813, + "step": 158928 + }, + { + "epoch": 1.91, + "grad_norm": 13.018073630567752, + "learning_rate": 1.0344492180985988e-07, + "loss": 1.1985, + "step": 158931 + }, + { + "epoch": 1.91, + "grad_norm": 5.17068534096613, + "learning_rate": 1.0336113006340188e-07, + "loss": 1.0213, + "step": 158934 + }, + { + "epoch": 1.91, + "grad_norm": 3.286306938035475, + "learning_rate": 1.0327737209060662e-07, + "loss": 0.9552, + "step": 158937 + }, + { + "epoch": 1.91, + "grad_norm": 5.965054588158707, + "learning_rate": 1.0319364789176167e-07, + "loss": 1.3896, + "step": 158940 + }, + { + "epoch": 1.91, + "grad_norm": 7.552778457978577, + "learning_rate": 1.0310995746715236e-07, + "loss": 1.3261, + "step": 158943 + }, + { + "epoch": 1.91, + "grad_norm": 11.042899818822098, + "learning_rate": 1.0302630081706289e-07, + "loss": 1.4269, + "step": 158946 + }, + { + "epoch": 1.91, + "grad_norm": 4.735735046091425, + "learning_rate": 1.029426779417797e-07, + "loss": 1.1475, + "step": 158949 + }, + { + "epoch": 1.91, + "grad_norm": 7.2033928422579265, + "learning_rate": 1.0285908884158923e-07, + "loss": 1.0261, + "step": 158952 + }, + { + "epoch": 1.91, + "grad_norm": 7.438293444833026, + "learning_rate": 1.0277553351677572e-07, + "loss": 1.0247, + "step": 158955 + }, + { + "epoch": 1.91, + "grad_norm": 2.9351475635195534, + "learning_rate": 1.0269201196762335e-07, + "loss": 1.1006, + "step": 158958 + }, + { + "epoch": 1.91, + "grad_norm": 11.709988282318644, + "learning_rate": 1.0260852419441858e-07, + "loss": 1.194, + "step": 158961 + }, + { + "epoch": 1.91, + "grad_norm": 5.645752694007973, + "learning_rate": 1.0252507019744562e-07, + "loss": 0.9077, + "step": 158964 + }, + { + "epoch": 1.91, + "grad_norm": 5.536382006344584, + "learning_rate": 1.024416499769898e-07, + "loss": 0.9214, + "step": 158967 + }, + { + "epoch": 1.91, + "grad_norm": 10.67575042546209, + "learning_rate": 1.0235826353333645e-07, + "loss": 1.0631, + "step": 158970 + }, + { + "epoch": 1.91, + "grad_norm": 6.034066380980929, + "learning_rate": 1.0227491086676756e-07, + "loss": 1.1147, + "step": 158973 + }, + { + "epoch": 1.91, + "grad_norm": 8.168979164954308, + "learning_rate": 1.0219159197757067e-07, + "loss": 1.0005, + "step": 158976 + }, + { + "epoch": 1.91, + "grad_norm": 3.9659678776819867, + "learning_rate": 1.021083068660278e-07, + "loss": 1.0917, + "step": 158979 + }, + { + "epoch": 1.91, + "grad_norm": 5.9410584259559736, + "learning_rate": 1.0202505553242537e-07, + "loss": 1.2508, + "step": 158982 + }, + { + "epoch": 1.91, + "grad_norm": 3.34825038834487, + "learning_rate": 1.0194183797704538e-07, + "loss": 1.102, + "step": 158985 + }, + { + "epoch": 1.91, + "grad_norm": 11.623516263770725, + "learning_rate": 1.0185865420017204e-07, + "loss": 1.2101, + "step": 158988 + }, + { + "epoch": 1.91, + "grad_norm": 12.536531671843518, + "learning_rate": 1.0177550420208959e-07, + "loss": 1.2309, + "step": 158991 + }, + { + "epoch": 1.91, + "grad_norm": 23.672624228088058, + "learning_rate": 1.0169238798308334e-07, + "loss": 1.2358, + "step": 158994 + }, + { + "epoch": 1.91, + "grad_norm": 3.147675181673732, + "learning_rate": 1.0160930554343528e-07, + "loss": 1.0245, + "step": 158997 + }, + { + "epoch": 1.91, + "grad_norm": 94.57891352810796, + "learning_rate": 1.0152625688342854e-07, + "loss": 1.1889, + "step": 159000 + }, + { + "epoch": 1.91, + "grad_norm": 4.83413135423071, + "learning_rate": 1.0144324200334732e-07, + "loss": 1.0884, + "step": 159003 + }, + { + "epoch": 1.91, + "grad_norm": 10.705829514092816, + "learning_rate": 1.0136026090347584e-07, + "loss": 1.2785, + "step": 159006 + }, + { + "epoch": 1.91, + "grad_norm": 2.3529026082603504, + "learning_rate": 1.0127731358409609e-07, + "loss": 1.0746, + "step": 159009 + }, + { + "epoch": 1.91, + "grad_norm": 6.876051718517306, + "learning_rate": 1.0119440004549008e-07, + "loss": 1.1181, + "step": 159012 + }, + { + "epoch": 1.91, + "grad_norm": 9.481112266377394, + "learning_rate": 1.0111152028794314e-07, + "loss": 0.9498, + "step": 159015 + }, + { + "epoch": 1.91, + "grad_norm": 6.6114560085961225, + "learning_rate": 1.0102867431173723e-07, + "loss": 1.0973, + "step": 159018 + }, + { + "epoch": 1.91, + "grad_norm": 7.6635110282675205, + "learning_rate": 1.0094586211715441e-07, + "loss": 1.303, + "step": 159021 + }, + { + "epoch": 1.91, + "grad_norm": 4.988667698970338, + "learning_rate": 1.0086308370447773e-07, + "loss": 1.4904, + "step": 159024 + }, + { + "epoch": 1.91, + "grad_norm": 16.810339652028983, + "learning_rate": 1.0078033907399032e-07, + "loss": 1.0763, + "step": 159027 + }, + { + "epoch": 1.91, + "grad_norm": 7.940983622225663, + "learning_rate": 1.0069762822597307e-07, + "loss": 1.1024, + "step": 159030 + }, + { + "epoch": 1.91, + "grad_norm": 3.418562187579235, + "learning_rate": 1.0061495116070907e-07, + "loss": 1.281, + "step": 159033 + }, + { + "epoch": 1.91, + "grad_norm": 7.375972518400104, + "learning_rate": 1.0053230787848145e-07, + "loss": 1.0157, + "step": 159036 + }, + { + "epoch": 1.91, + "grad_norm": 4.04765650669674, + "learning_rate": 1.0044969837957109e-07, + "loss": 1.2925, + "step": 159039 + }, + { + "epoch": 1.91, + "grad_norm": 5.775169720099893, + "learning_rate": 1.0036712266425885e-07, + "loss": 1.1432, + "step": 159042 + }, + { + "epoch": 1.91, + "grad_norm": 14.207788929708206, + "learning_rate": 1.0028458073282899e-07, + "loss": 1.1374, + "step": 159045 + }, + { + "epoch": 1.91, + "grad_norm": 8.231390163394796, + "learning_rate": 1.0020207258556124e-07, + "loss": 1.2572, + "step": 159048 + }, + { + "epoch": 1.91, + "grad_norm": 12.769224638195327, + "learning_rate": 1.0011959822273876e-07, + "loss": 1.1152, + "step": 159051 + }, + { + "epoch": 1.91, + "grad_norm": 6.00247818396336, + "learning_rate": 1.0003715764464128e-07, + "loss": 1.3259, + "step": 159054 + }, + { + "epoch": 1.91, + "grad_norm": 13.05592771097405, + "learning_rate": 9.995475085155192e-08, + "loss": 0.8718, + "step": 159057 + }, + { + "epoch": 1.91, + "grad_norm": 6.6985985268477215, + "learning_rate": 9.987237784375048e-08, + "loss": 1.0558, + "step": 159060 + }, + { + "epoch": 1.91, + "grad_norm": 4.836000334941869, + "learning_rate": 9.979003862152003e-08, + "loss": 0.6343, + "step": 159063 + }, + { + "epoch": 1.91, + "grad_norm": 6.433091344835007, + "learning_rate": 9.970773318513815e-08, + "loss": 0.8218, + "step": 159066 + }, + { + "epoch": 1.91, + "grad_norm": 12.68349773713257, + "learning_rate": 9.962546153488795e-08, + "loss": 1.222, + "step": 159069 + }, + { + "epoch": 1.91, + "grad_norm": 6.2004840961760195, + "learning_rate": 9.95432236710503e-08, + "loss": 1.2727, + "step": 159072 + }, + { + "epoch": 1.91, + "grad_norm": 32.31682420963991, + "learning_rate": 9.946101959390497e-08, + "loss": 1.1292, + "step": 159075 + }, + { + "epoch": 1.91, + "grad_norm": 9.940762309664631, + "learning_rate": 9.937884930373399e-08, + "loss": 1.2783, + "step": 159078 + }, + { + "epoch": 1.91, + "grad_norm": 6.658034958705501, + "learning_rate": 9.92967128008171e-08, + "loss": 1.2795, + "step": 159081 + }, + { + "epoch": 1.91, + "grad_norm": 34.06322712047362, + "learning_rate": 9.921461008543409e-08, + "loss": 1.3257, + "step": 159084 + }, + { + "epoch": 1.91, + "grad_norm": 7.936669043893951, + "learning_rate": 9.913254115786475e-08, + "loss": 1.3211, + "step": 159087 + }, + { + "epoch": 1.91, + "grad_norm": 12.961310680235767, + "learning_rate": 9.905050601838994e-08, + "loss": 1.3956, + "step": 159090 + }, + { + "epoch": 1.91, + "grad_norm": 7.520901202981631, + "learning_rate": 9.896850466728946e-08, + "loss": 1.3521, + "step": 159093 + }, + { + "epoch": 1.91, + "grad_norm": 5.483458095632405, + "learning_rate": 9.888653710484308e-08, + "loss": 0.8008, + "step": 159096 + }, + { + "epoch": 1.91, + "grad_norm": 5.497633963319837, + "learning_rate": 9.880460333133058e-08, + "loss": 0.9341, + "step": 159099 + }, + { + "epoch": 1.91, + "grad_norm": 3.493192165159339, + "learning_rate": 9.872270334703171e-08, + "loss": 1.2814, + "step": 159102 + }, + { + "epoch": 1.91, + "grad_norm": 5.732185767886457, + "learning_rate": 9.864083715222628e-08, + "loss": 1.3587, + "step": 159105 + }, + { + "epoch": 1.91, + "grad_norm": 6.328728930616302, + "learning_rate": 9.855900474719293e-08, + "loss": 1.1357, + "step": 159108 + }, + { + "epoch": 1.91, + "grad_norm": 11.71001798197099, + "learning_rate": 9.847720613221035e-08, + "loss": 1.0926, + "step": 159111 + }, + { + "epoch": 1.91, + "grad_norm": 4.39999927235101, + "learning_rate": 9.83954413075594e-08, + "loss": 1.0377, + "step": 159114 + }, + { + "epoch": 1.91, + "grad_norm": 3.1657758667062352, + "learning_rate": 9.831371027351766e-08, + "loss": 1.1274, + "step": 159117 + }, + { + "epoch": 1.91, + "grad_norm": 22.180448805478047, + "learning_rate": 9.82320130303649e-08, + "loss": 1.1475, + "step": 159120 + }, + { + "epoch": 1.91, + "grad_norm": 8.385572144046881, + "learning_rate": 9.815034957837977e-08, + "loss": 1.1928, + "step": 159123 + }, + { + "epoch": 1.91, + "grad_norm": 13.084787469794307, + "learning_rate": 9.806871991784206e-08, + "loss": 1.2716, + "step": 159126 + }, + { + "epoch": 1.91, + "grad_norm": 6.597282871931351, + "learning_rate": 9.79871240490271e-08, + "loss": 1.3418, + "step": 159129 + }, + { + "epoch": 1.91, + "grad_norm": 13.645898725202887, + "learning_rate": 9.79055619722169e-08, + "loss": 0.7855, + "step": 159132 + }, + { + "epoch": 1.91, + "grad_norm": 27.849649213554567, + "learning_rate": 9.78240336876879e-08, + "loss": 1.093, + "step": 159135 + }, + { + "epoch": 1.91, + "grad_norm": 12.29136471268009, + "learning_rate": 9.774253919571763e-08, + "loss": 1.3256, + "step": 159138 + }, + { + "epoch": 1.91, + "grad_norm": 30.287180182476355, + "learning_rate": 9.766107849658701e-08, + "loss": 0.9643, + "step": 159141 + }, + { + "epoch": 1.91, + "grad_norm": 15.315613325980092, + "learning_rate": 9.757965159057136e-08, + "loss": 1.141, + "step": 159144 + }, + { + "epoch": 1.91, + "grad_norm": 5.509033426845727, + "learning_rate": 9.749825847795046e-08, + "loss": 1.1541, + "step": 159147 + }, + { + "epoch": 1.91, + "grad_norm": 6.204075291369046, + "learning_rate": 9.741689915899966e-08, + "loss": 1.0899, + "step": 159150 + }, + { + "epoch": 1.91, + "grad_norm": 16.379701715573106, + "learning_rate": 9.73355736339987e-08, + "loss": 1.0004, + "step": 159153 + }, + { + "epoch": 1.91, + "grad_norm": 38.699232722491374, + "learning_rate": 9.725428190322406e-08, + "loss": 0.8398, + "step": 159156 + }, + { + "epoch": 1.91, + "grad_norm": 3.0816102719781346, + "learning_rate": 9.71730239669555e-08, + "loss": 0.9839, + "step": 159159 + }, + { + "epoch": 1.91, + "grad_norm": 12.26601892759375, + "learning_rate": 9.709179982546613e-08, + "loss": 1.081, + "step": 159162 + }, + { + "epoch": 1.91, + "grad_norm": 100.04245659328711, + "learning_rate": 9.701060947903684e-08, + "loss": 0.8839, + "step": 159165 + }, + { + "epoch": 1.91, + "grad_norm": 12.662077865172883, + "learning_rate": 9.692945292794298e-08, + "loss": 0.8523, + "step": 159168 + }, + { + "epoch": 1.91, + "grad_norm": 4.827179231678747, + "learning_rate": 9.684833017246209e-08, + "loss": 1.3459, + "step": 159171 + }, + { + "epoch": 1.91, + "grad_norm": 3.5267138087836396, + "learning_rate": 9.676724121286951e-08, + "loss": 0.9856, + "step": 159174 + }, + { + "epoch": 1.91, + "grad_norm": 4.300916693999206, + "learning_rate": 9.66861860494439e-08, + "loss": 1.3517, + "step": 159177 + }, + { + "epoch": 1.91, + "grad_norm": 6.31652703192624, + "learning_rate": 9.660516468246172e-08, + "loss": 1.0802, + "step": 159180 + }, + { + "epoch": 1.91, + "grad_norm": 22.97921234952733, + "learning_rate": 9.65241771121983e-08, + "loss": 1.3319, + "step": 159183 + }, + { + "epoch": 1.91, + "grad_norm": 7.816954338019812, + "learning_rate": 9.64432233389312e-08, + "loss": 0.857, + "step": 159186 + }, + { + "epoch": 1.91, + "grad_norm": 4.552381307949055, + "learning_rate": 9.636230336293572e-08, + "loss": 1.1739, + "step": 159189 + }, + { + "epoch": 1.91, + "grad_norm": 9.46322572592719, + "learning_rate": 9.628141718448836e-08, + "loss": 1.1512, + "step": 159192 + }, + { + "epoch": 1.91, + "grad_norm": 8.368809155954693, + "learning_rate": 9.620056480386552e-08, + "loss": 1.4062, + "step": 159195 + }, + { + "epoch": 1.91, + "grad_norm": 7.732920827685346, + "learning_rate": 9.611974622134368e-08, + "loss": 1.0587, + "step": 159198 + }, + { + "epoch": 1.91, + "grad_norm": 4.594142570157808, + "learning_rate": 9.603896143719593e-08, + "loss": 1.2313, + "step": 159201 + }, + { + "epoch": 1.91, + "grad_norm": 8.481406159037773, + "learning_rate": 9.595821045170095e-08, + "loss": 1.0982, + "step": 159204 + }, + { + "epoch": 1.91, + "grad_norm": 4.588706321437868, + "learning_rate": 9.587749326513295e-08, + "loss": 1.1064, + "step": 159207 + }, + { + "epoch": 1.91, + "grad_norm": 7.060452449829924, + "learning_rate": 9.579680987776729e-08, + "loss": 1.0377, + "step": 159210 + }, + { + "epoch": 1.91, + "grad_norm": 6.39504305265594, + "learning_rate": 9.57161602898804e-08, + "loss": 0.9708, + "step": 159213 + }, + { + "epoch": 1.91, + "grad_norm": 7.2096428562877914, + "learning_rate": 9.56355445017465e-08, + "loss": 1.1609, + "step": 159216 + }, + { + "epoch": 1.91, + "grad_norm": 12.83161311544185, + "learning_rate": 9.555496251363983e-08, + "loss": 1.2282, + "step": 159219 + }, + { + "epoch": 1.91, + "grad_norm": 9.533843991508963, + "learning_rate": 9.547441432583792e-08, + "loss": 0.8729, + "step": 159222 + }, + { + "epoch": 1.91, + "grad_norm": 3.2299373881028317, + "learning_rate": 9.539389993861392e-08, + "loss": 1.4619, + "step": 159225 + }, + { + "epoch": 1.91, + "grad_norm": 14.66428521433555, + "learning_rate": 9.531341935224203e-08, + "loss": 1.0006, + "step": 159228 + }, + { + "epoch": 1.91, + "grad_norm": 8.475027271617437, + "learning_rate": 9.52329725669987e-08, + "loss": 1.0822, + "step": 159231 + }, + { + "epoch": 1.91, + "grad_norm": 25.622588016977257, + "learning_rate": 9.515255958315817e-08, + "loss": 0.9279, + "step": 159234 + }, + { + "epoch": 1.91, + "grad_norm": 5.284563456263655, + "learning_rate": 9.507218040099242e-08, + "loss": 1.0537, + "step": 159237 + }, + { + "epoch": 1.91, + "grad_norm": 4.899746045356039, + "learning_rate": 9.499183502077902e-08, + "loss": 1.1856, + "step": 159240 + }, + { + "epoch": 1.91, + "grad_norm": 10.277342107456443, + "learning_rate": 9.491152344278998e-08, + "loss": 1.0967, + "step": 159243 + }, + { + "epoch": 1.91, + "grad_norm": 8.41603708204374, + "learning_rate": 9.483124566730062e-08, + "loss": 1.0316, + "step": 159246 + }, + { + "epoch": 1.91, + "grad_norm": 7.986126100021337, + "learning_rate": 9.475100169458407e-08, + "loss": 1.3689, + "step": 159249 + }, + { + "epoch": 1.91, + "grad_norm": 6.442449582264574, + "learning_rate": 9.467079152491455e-08, + "loss": 0.8974, + "step": 159252 + }, + { + "epoch": 1.91, + "grad_norm": 29.55359596525808, + "learning_rate": 9.459061515856627e-08, + "loss": 1.0166, + "step": 159255 + }, + { + "epoch": 1.92, + "grad_norm": 5.348120843722759, + "learning_rate": 9.451047259581126e-08, + "loss": 1.1737, + "step": 159258 + }, + { + "epoch": 1.92, + "grad_norm": 3.276382472640892, + "learning_rate": 9.443036383692483e-08, + "loss": 0.8104, + "step": 159261 + }, + { + "epoch": 1.92, + "grad_norm": 11.154486609935228, + "learning_rate": 9.435028888217901e-08, + "loss": 1.447, + "step": 159264 + }, + { + "epoch": 1.92, + "grad_norm": 10.292073598892868, + "learning_rate": 9.427024773184912e-08, + "loss": 1.1519, + "step": 159267 + }, + { + "epoch": 1.92, + "grad_norm": 5.0272484547760685, + "learning_rate": 9.419024038620495e-08, + "loss": 1.1632, + "step": 159270 + }, + { + "epoch": 1.92, + "grad_norm": 7.332307207217129, + "learning_rate": 9.411026684552294e-08, + "loss": 0.9123, + "step": 159273 + }, + { + "epoch": 1.92, + "grad_norm": 7.332425119092591, + "learning_rate": 9.403032711007398e-08, + "loss": 1.1464, + "step": 159276 + }, + { + "epoch": 1.92, + "grad_norm": 5.084962441930581, + "learning_rate": 9.39504211801312e-08, + "loss": 1.0643, + "step": 159279 + }, + { + "epoch": 1.92, + "grad_norm": 18.722630891135765, + "learning_rate": 9.387054905596771e-08, + "loss": 0.9578, + "step": 159282 + }, + { + "epoch": 1.92, + "grad_norm": 2.696072783213441, + "learning_rate": 9.379071073785551e-08, + "loss": 1.513, + "step": 159285 + }, + { + "epoch": 1.92, + "grad_norm": 7.20037705006589, + "learning_rate": 9.37109062260666e-08, + "loss": 1.01, + "step": 159288 + }, + { + "epoch": 1.92, + "grad_norm": 10.361215554661367, + "learning_rate": 9.363113552087522e-08, + "loss": 1.5098, + "step": 159291 + }, + { + "epoch": 1.92, + "grad_norm": 5.323620057814644, + "learning_rate": 9.355139862255336e-08, + "loss": 0.8042, + "step": 159294 + }, + { + "epoch": 1.92, + "grad_norm": 6.15484836708318, + "learning_rate": 9.34716955313708e-08, + "loss": 1.1446, + "step": 159297 + }, + { + "epoch": 1.92, + "grad_norm": 2.381094564405083, + "learning_rate": 9.33920262476018e-08, + "loss": 1.3136, + "step": 159300 + }, + { + "epoch": 1.92, + "grad_norm": 6.705043311438326, + "learning_rate": 9.331239077151721e-08, + "loss": 1.6947, + "step": 159303 + }, + { + "epoch": 1.92, + "grad_norm": 7.617146066854583, + "learning_rate": 9.323278910338907e-08, + "loss": 0.9498, + "step": 159306 + }, + { + "epoch": 1.92, + "grad_norm": 19.45601275068104, + "learning_rate": 9.315322124348936e-08, + "loss": 1.193, + "step": 159309 + }, + { + "epoch": 1.92, + "grad_norm": 9.320700010912635, + "learning_rate": 9.307368719208897e-08, + "loss": 1.1709, + "step": 159312 + }, + { + "epoch": 1.92, + "grad_norm": 6.630584748033557, + "learning_rate": 9.299418694945994e-08, + "loss": 0.8237, + "step": 159315 + }, + { + "epoch": 1.92, + "grad_norm": 15.270280388585174, + "learning_rate": 9.291472051587202e-08, + "loss": 0.6693, + "step": 159318 + }, + { + "epoch": 1.92, + "grad_norm": 2.405347233396295, + "learning_rate": 9.283528789159946e-08, + "loss": 1.1989, + "step": 159321 + }, + { + "epoch": 1.92, + "grad_norm": 7.70074390891165, + "learning_rate": 9.27558890769098e-08, + "loss": 1.283, + "step": 159324 + }, + { + "epoch": 1.92, + "grad_norm": 22.819355978118306, + "learning_rate": 9.267652407207727e-08, + "loss": 1.0033, + "step": 159327 + }, + { + "epoch": 1.92, + "grad_norm": 6.238830122449867, + "learning_rate": 9.259719287736946e-08, + "loss": 1.2504, + "step": 159330 + }, + { + "epoch": 1.92, + "grad_norm": 9.515678316978576, + "learning_rate": 9.251789549306056e-08, + "loss": 1.3944, + "step": 159333 + }, + { + "epoch": 1.92, + "grad_norm": 10.252088645341624, + "learning_rate": 9.243863191941816e-08, + "loss": 1.251, + "step": 159336 + }, + { + "epoch": 1.92, + "grad_norm": 17.79048484751915, + "learning_rate": 9.235940215671535e-08, + "loss": 1.3043, + "step": 159339 + }, + { + "epoch": 1.92, + "grad_norm": 7.093530342105359, + "learning_rate": 9.228020620521971e-08, + "loss": 1.1772, + "step": 159342 + }, + { + "epoch": 1.92, + "grad_norm": 5.443759893828876, + "learning_rate": 9.220104406520325e-08, + "loss": 1.0938, + "step": 159345 + }, + { + "epoch": 1.92, + "grad_norm": 5.723780231110597, + "learning_rate": 9.212191573693685e-08, + "loss": 1.1571, + "step": 159348 + }, + { + "epoch": 1.92, + "grad_norm": 5.492694752668772, + "learning_rate": 9.204282122068919e-08, + "loss": 1.1519, + "step": 159351 + }, + { + "epoch": 1.92, + "grad_norm": 8.875681870645812, + "learning_rate": 9.196376051673006e-08, + "loss": 0.9107, + "step": 159354 + }, + { + "epoch": 1.92, + "grad_norm": 6.182030520853877, + "learning_rate": 9.188473362533035e-08, + "loss": 1.2553, + "step": 159357 + }, + { + "epoch": 1.92, + "grad_norm": 4.643956658884228, + "learning_rate": 9.180574054675761e-08, + "loss": 0.8098, + "step": 159360 + }, + { + "epoch": 1.92, + "grad_norm": 7.245186745880599, + "learning_rate": 9.172678128128499e-08, + "loss": 1.4833, + "step": 159363 + }, + { + "epoch": 1.92, + "grad_norm": 11.938843203628442, + "learning_rate": 9.16478558291789e-08, + "loss": 1.2741, + "step": 159366 + }, + { + "epoch": 1.92, + "grad_norm": 7.890407047330639, + "learning_rate": 9.156896419070916e-08, + "loss": 1.0294, + "step": 159369 + }, + { + "epoch": 1.92, + "grad_norm": 7.841731544217397, + "learning_rate": 9.149010636614664e-08, + "loss": 1.0375, + "step": 159372 + }, + { + "epoch": 1.92, + "grad_norm": 8.207214193491781, + "learning_rate": 9.141128235575891e-08, + "loss": 1.0314, + "step": 159375 + }, + { + "epoch": 1.92, + "grad_norm": 353.7620958979107, + "learning_rate": 9.133249215981577e-08, + "loss": 1.2792, + "step": 159378 + }, + { + "epoch": 1.92, + "grad_norm": 4.23016684054862, + "learning_rate": 9.125373577858588e-08, + "loss": 0.9771, + "step": 159381 + }, + { + "epoch": 1.92, + "grad_norm": 5.669907509840606, + "learning_rate": 9.117501321233679e-08, + "loss": 1.1098, + "step": 159384 + }, + { + "epoch": 1.92, + "grad_norm": 4.813250082973174, + "learning_rate": 9.109632446133943e-08, + "loss": 1.2581, + "step": 159387 + }, + { + "epoch": 1.92, + "grad_norm": 9.52627253501717, + "learning_rate": 9.101766952586021e-08, + "loss": 1.2105, + "step": 159390 + }, + { + "epoch": 1.92, + "grad_norm": 18.296433684842775, + "learning_rate": 9.093904840616897e-08, + "loss": 1.1292, + "step": 159393 + }, + { + "epoch": 1.92, + "grad_norm": 6.002046381230099, + "learning_rate": 9.086046110253322e-08, + "loss": 1.1996, + "step": 159396 + }, + { + "epoch": 1.92, + "grad_norm": 6.073315755318579, + "learning_rate": 9.078190761522276e-08, + "loss": 1.4506, + "step": 159399 + }, + { + "epoch": 1.92, + "grad_norm": 5.296259477740893, + "learning_rate": 9.070338794450295e-08, + "loss": 0.9126, + "step": 159402 + }, + { + "epoch": 1.92, + "grad_norm": 9.287534647532674, + "learning_rate": 9.062490209064356e-08, + "loss": 1.3284, + "step": 159405 + }, + { + "epoch": 1.92, + "grad_norm": 4.351758434284611, + "learning_rate": 9.054645005391216e-08, + "loss": 1.1247, + "step": 159408 + }, + { + "epoch": 1.92, + "grad_norm": 8.973874246218838, + "learning_rate": 9.04680318345752e-08, + "loss": 1.4448, + "step": 159411 + }, + { + "epoch": 1.92, + "grad_norm": 8.05132792991959, + "learning_rate": 9.038964743290356e-08, + "loss": 1.2073, + "step": 159414 + }, + { + "epoch": 1.92, + "grad_norm": 8.536646346786148, + "learning_rate": 9.03112968491604e-08, + "loss": 1.2158, + "step": 159417 + }, + { + "epoch": 1.92, + "grad_norm": 4.483136798119524, + "learning_rate": 9.023298008361658e-08, + "loss": 1.0595, + "step": 159420 + }, + { + "epoch": 1.92, + "grad_norm": 4.2708542825281475, + "learning_rate": 9.015469713653858e-08, + "loss": 0.7557, + "step": 159423 + }, + { + "epoch": 1.92, + "grad_norm": 9.922966614726782, + "learning_rate": 9.007644800819171e-08, + "loss": 1.1775, + "step": 159426 + }, + { + "epoch": 1.92, + "grad_norm": 3.6096816561604066, + "learning_rate": 8.999823269884578e-08, + "loss": 1.0555, + "step": 159429 + }, + { + "epoch": 1.92, + "grad_norm": 6.999142520494551, + "learning_rate": 8.992005120876612e-08, + "loss": 1.2926, + "step": 159432 + }, + { + "epoch": 1.92, + "grad_norm": 4.634572300435628, + "learning_rate": 8.98419035382192e-08, + "loss": 1.0723, + "step": 159435 + }, + { + "epoch": 1.92, + "grad_norm": 4.4936440700079965, + "learning_rate": 8.976378968747146e-08, + "loss": 1.5547, + "step": 159438 + }, + { + "epoch": 1.92, + "grad_norm": 10.527680986717767, + "learning_rate": 8.968570965679157e-08, + "loss": 1.0142, + "step": 159441 + }, + { + "epoch": 1.92, + "grad_norm": 8.6040530974233, + "learning_rate": 8.960766344644379e-08, + "loss": 1.0193, + "step": 159444 + }, + { + "epoch": 1.92, + "grad_norm": 6.953908074083915, + "learning_rate": 8.952965105669676e-08, + "loss": 0.9963, + "step": 159447 + }, + { + "epoch": 1.92, + "grad_norm": 10.810870008455936, + "learning_rate": 8.945167248781361e-08, + "loss": 1.4444, + "step": 159450 + }, + { + "epoch": 1.92, + "grad_norm": 2.91882004671563, + "learning_rate": 8.937372774006303e-08, + "loss": 1.4847, + "step": 159453 + }, + { + "epoch": 1.92, + "grad_norm": 19.60323328949573, + "learning_rate": 8.929581681371036e-08, + "loss": 1.5479, + "step": 159456 + }, + { + "epoch": 1.92, + "grad_norm": 5.712010929208291, + "learning_rate": 8.921793970902093e-08, + "loss": 1.0347, + "step": 159459 + }, + { + "epoch": 1.92, + "grad_norm": 8.656560722592156, + "learning_rate": 8.914009642626009e-08, + "loss": 1.3949, + "step": 159462 + }, + { + "epoch": 1.92, + "grad_norm": 6.827084830282062, + "learning_rate": 8.90622869656954e-08, + "loss": 1.3026, + "step": 159465 + }, + { + "epoch": 1.92, + "grad_norm": 7.056225228931876, + "learning_rate": 8.898451132759112e-08, + "loss": 1.2873, + "step": 159468 + }, + { + "epoch": 1.92, + "grad_norm": 12.157405453063506, + "learning_rate": 8.890676951221255e-08, + "loss": 1.2109, + "step": 159471 + }, + { + "epoch": 1.92, + "grad_norm": 12.83660413879801, + "learning_rate": 8.882906151982395e-08, + "loss": 1.1886, + "step": 159474 + }, + { + "epoch": 1.92, + "grad_norm": 7.533106725557024, + "learning_rate": 8.875138735069288e-08, + "loss": 1.0906, + "step": 159477 + }, + { + "epoch": 1.92, + "grad_norm": 7.997685751603082, + "learning_rate": 8.867374700508357e-08, + "loss": 1.4758, + "step": 159480 + }, + { + "epoch": 1.92, + "grad_norm": 4.63401748989642, + "learning_rate": 8.859614048326026e-08, + "loss": 0.835, + "step": 159483 + }, + { + "epoch": 1.92, + "grad_norm": 5.68700749101157, + "learning_rate": 8.851856778548828e-08, + "loss": 1.0286, + "step": 159486 + }, + { + "epoch": 1.92, + "grad_norm": 9.27921215325983, + "learning_rate": 8.844102891203299e-08, + "loss": 1.2919, + "step": 159489 + }, + { + "epoch": 1.92, + "grad_norm": 18.80366507424031, + "learning_rate": 8.83635238631575e-08, + "loss": 1.1833, + "step": 159492 + }, + { + "epoch": 1.92, + "grad_norm": 4.464588344870302, + "learning_rate": 8.828605263912826e-08, + "loss": 1.2072, + "step": 159495 + }, + { + "epoch": 1.92, + "grad_norm": 4.374458747558503, + "learning_rate": 8.820861524020729e-08, + "loss": 0.8103, + "step": 159498 + }, + { + "epoch": 1.92, + "grad_norm": 9.912223750548995, + "learning_rate": 8.813121166665995e-08, + "loss": 1.046, + "step": 159501 + }, + { + "epoch": 1.92, + "grad_norm": 9.762201307538149, + "learning_rate": 8.805384191875155e-08, + "loss": 1.2349, + "step": 159504 + }, + { + "epoch": 1.92, + "grad_norm": 5.5876179869093425, + "learning_rate": 8.797650599674523e-08, + "loss": 1.179, + "step": 159507 + }, + { + "epoch": 1.92, + "grad_norm": 6.36021825109868, + "learning_rate": 8.789920390090412e-08, + "loss": 0.9762, + "step": 159510 + }, + { + "epoch": 1.92, + "grad_norm": 33.87967028001811, + "learning_rate": 8.782193563149244e-08, + "loss": 1.0503, + "step": 159513 + }, + { + "epoch": 1.92, + "grad_norm": 4.3693831348800245, + "learning_rate": 8.774470118877443e-08, + "loss": 1.2664, + "step": 159516 + }, + { + "epoch": 1.92, + "grad_norm": 6.233233231678821, + "learning_rate": 8.766750057301432e-08, + "loss": 1.0439, + "step": 159519 + }, + { + "epoch": 1.92, + "grad_norm": 8.244088332594801, + "learning_rate": 8.759033378447412e-08, + "loss": 1.2553, + "step": 159522 + }, + { + "epoch": 1.92, + "grad_norm": 15.145205233625868, + "learning_rate": 8.751320082341696e-08, + "loss": 1.1596, + "step": 159525 + }, + { + "epoch": 1.92, + "grad_norm": 6.604245609167578, + "learning_rate": 8.743610169010708e-08, + "loss": 1.2436, + "step": 159528 + }, + { + "epoch": 1.92, + "grad_norm": 13.035210187256666, + "learning_rate": 8.735903638480758e-08, + "loss": 1.3296, + "step": 159531 + }, + { + "epoch": 1.92, + "grad_norm": 12.317245098647353, + "learning_rate": 8.72820049077805e-08, + "loss": 1.0377, + "step": 159534 + }, + { + "epoch": 1.92, + "grad_norm": 5.00974580272073, + "learning_rate": 8.720500725929004e-08, + "loss": 0.9141, + "step": 159537 + }, + { + "epoch": 1.92, + "grad_norm": 12.288626818478106, + "learning_rate": 8.712804343959824e-08, + "loss": 1.0118, + "step": 159540 + }, + { + "epoch": 1.92, + "grad_norm": 3.637900815732901, + "learning_rate": 8.705111344896821e-08, + "loss": 1.2258, + "step": 159543 + }, + { + "epoch": 1.92, + "grad_norm": 4.1197311782782995, + "learning_rate": 8.697421728766086e-08, + "loss": 0.9104, + "step": 159546 + }, + { + "epoch": 1.92, + "grad_norm": 3.357208762441356, + "learning_rate": 8.689735495594153e-08, + "loss": 1.1223, + "step": 159549 + }, + { + "epoch": 1.92, + "grad_norm": 6.198472377168314, + "learning_rate": 8.682052645407002e-08, + "loss": 0.9305, + "step": 159552 + }, + { + "epoch": 1.92, + "grad_norm": 8.546164238945613, + "learning_rate": 8.674373178230944e-08, + "loss": 0.8325, + "step": 159555 + }, + { + "epoch": 1.92, + "grad_norm": 19.271442874066413, + "learning_rate": 8.666697094092069e-08, + "loss": 1.1414, + "step": 159558 + }, + { + "epoch": 1.92, + "grad_norm": 2.860618507380786, + "learning_rate": 8.659024393016802e-08, + "loss": 1.1679, + "step": 159561 + }, + { + "epoch": 1.92, + "grad_norm": 8.680342625445865, + "learning_rate": 8.651355075031232e-08, + "loss": 1.0638, + "step": 159564 + }, + { + "epoch": 1.92, + "grad_norm": 21.323683008629956, + "learning_rate": 8.643689140161449e-08, + "loss": 0.895, + "step": 159567 + }, + { + "epoch": 1.92, + "grad_norm": 7.051434993002684, + "learning_rate": 8.636026588433655e-08, + "loss": 1.1396, + "step": 159570 + }, + { + "epoch": 1.92, + "grad_norm": 10.127911105985438, + "learning_rate": 8.628367419874051e-08, + "loss": 0.7547, + "step": 159573 + }, + { + "epoch": 1.92, + "grad_norm": 6.387684341504535, + "learning_rate": 8.620711634508727e-08, + "loss": 1.0684, + "step": 159576 + }, + { + "epoch": 1.92, + "grad_norm": 6.921933192444171, + "learning_rate": 8.613059232363774e-08, + "loss": 1.0275, + "step": 159579 + }, + { + "epoch": 1.92, + "grad_norm": 10.902978767999286, + "learning_rate": 8.605410213465392e-08, + "loss": 1.6416, + "step": 159582 + }, + { + "epoch": 1.92, + "grad_norm": 10.442501033738965, + "learning_rate": 8.597764577839673e-08, + "loss": 1.2763, + "step": 159585 + }, + { + "epoch": 1.92, + "grad_norm": 6.186696650252113, + "learning_rate": 8.590122325512595e-08, + "loss": 1.2291, + "step": 159588 + }, + { + "epoch": 1.92, + "grad_norm": 5.41297194777981, + "learning_rate": 8.582483456510471e-08, + "loss": 0.8066, + "step": 159591 + }, + { + "epoch": 1.92, + "grad_norm": 9.00098253407102, + "learning_rate": 8.574847970859168e-08, + "loss": 1.0982, + "step": 159594 + }, + { + "epoch": 1.92, + "grad_norm": 9.528628849763498, + "learning_rate": 8.567215868584666e-08, + "loss": 1.2261, + "step": 159597 + }, + { + "epoch": 1.92, + "grad_norm": 3.2930165335220347, + "learning_rate": 8.559587149713277e-08, + "loss": 0.9123, + "step": 159600 + }, + { + "epoch": 1.92, + "grad_norm": 3.851981584518941, + "learning_rate": 8.55196181427087e-08, + "loss": 1.0502, + "step": 159603 + }, + { + "epoch": 1.92, + "grad_norm": 7.945517205539276, + "learning_rate": 8.544339862283535e-08, + "loss": 1.0329, + "step": 159606 + }, + { + "epoch": 1.92, + "grad_norm": 5.162989765303179, + "learning_rate": 8.536721293777139e-08, + "loss": 1.07, + "step": 159609 + }, + { + "epoch": 1.92, + "grad_norm": 6.059384924403898, + "learning_rate": 8.529106108777885e-08, + "loss": 0.9028, + "step": 159612 + }, + { + "epoch": 1.92, + "grad_norm": 11.539428271321144, + "learning_rate": 8.521494307311639e-08, + "loss": 1.1232, + "step": 159615 + }, + { + "epoch": 1.92, + "grad_norm": 9.177175385649848, + "learning_rate": 8.513885889404494e-08, + "loss": 1.0265, + "step": 159618 + }, + { + "epoch": 1.92, + "grad_norm": 14.766901449213824, + "learning_rate": 8.506280855082205e-08, + "loss": 0.8581, + "step": 159621 + }, + { + "epoch": 1.92, + "grad_norm": 6.307931869720631, + "learning_rate": 8.498679204370864e-08, + "loss": 0.9344, + "step": 159624 + }, + { + "epoch": 1.92, + "grad_norm": 5.6884786888229995, + "learning_rate": 8.491080937296447e-08, + "loss": 1.1277, + "step": 159627 + }, + { + "epoch": 1.92, + "grad_norm": 4.457680114053058, + "learning_rate": 8.483486053884827e-08, + "loss": 1.138, + "step": 159630 + }, + { + "epoch": 1.92, + "grad_norm": 4.038678990414237, + "learning_rate": 8.475894554161867e-08, + "loss": 1.2154, + "step": 159633 + }, + { + "epoch": 1.92, + "grad_norm": 10.895816974716524, + "learning_rate": 8.468306438153551e-08, + "loss": 0.9981, + "step": 159636 + }, + { + "epoch": 1.92, + "grad_norm": 3.6881578197037714, + "learning_rate": 8.460721705885744e-08, + "loss": 1.5434, + "step": 159639 + }, + { + "epoch": 1.92, + "grad_norm": 5.560034788343364, + "learning_rate": 8.453140357384427e-08, + "loss": 1.3807, + "step": 159642 + }, + { + "epoch": 1.92, + "grad_norm": 7.092136798794356, + "learning_rate": 8.445562392675355e-08, + "loss": 1.1663, + "step": 159645 + }, + { + "epoch": 1.92, + "grad_norm": 8.07047574640578, + "learning_rate": 8.437987811784288e-08, + "loss": 1.2768, + "step": 159648 + }, + { + "epoch": 1.92, + "grad_norm": 6.146554534400827, + "learning_rate": 8.430416614737314e-08, + "loss": 1.0439, + "step": 159651 + }, + { + "epoch": 1.92, + "grad_norm": 12.632043262377906, + "learning_rate": 8.422848801560191e-08, + "loss": 1.0698, + "step": 159654 + }, + { + "epoch": 1.92, + "grad_norm": 5.269660433682919, + "learning_rate": 8.415284372278677e-08, + "loss": 1.1425, + "step": 159657 + }, + { + "epoch": 1.92, + "grad_norm": 19.18603045799132, + "learning_rate": 8.407723326918748e-08, + "loss": 1.0079, + "step": 159660 + }, + { + "epoch": 1.92, + "grad_norm": 5.147377449754335, + "learning_rate": 8.400165665505944e-08, + "loss": 0.9697, + "step": 159663 + }, + { + "epoch": 1.92, + "grad_norm": 5.545386779495382, + "learning_rate": 8.392611388066241e-08, + "loss": 1.0717, + "step": 159666 + }, + { + "epoch": 1.92, + "grad_norm": 3.3417487970953004, + "learning_rate": 8.385060494625397e-08, + "loss": 1.0267, + "step": 159669 + }, + { + "epoch": 1.92, + "grad_norm": 5.793302675339526, + "learning_rate": 8.377512985209169e-08, + "loss": 1.3623, + "step": 159672 + }, + { + "epoch": 1.92, + "grad_norm": 12.732052025557397, + "learning_rate": 8.369968859843314e-08, + "loss": 0.9682, + "step": 159675 + }, + { + "epoch": 1.92, + "grad_norm": 13.250958548162934, + "learning_rate": 8.362428118553589e-08, + "loss": 1.4621, + "step": 159678 + }, + { + "epoch": 1.92, + "grad_norm": 19.861677383218574, + "learning_rate": 8.354890761365641e-08, + "loss": 1.0524, + "step": 159681 + }, + { + "epoch": 1.92, + "grad_norm": 2.2815797117342234, + "learning_rate": 8.347356788305338e-08, + "loss": 1.1598, + "step": 159684 + }, + { + "epoch": 1.92, + "grad_norm": 5.893719661738879, + "learning_rate": 8.339826199398215e-08, + "loss": 0.9176, + "step": 159687 + }, + { + "epoch": 1.92, + "grad_norm": 4.7465064896159594, + "learning_rate": 8.33229899467014e-08, + "loss": 0.9918, + "step": 159690 + }, + { + "epoch": 1.92, + "grad_norm": 29.798387280006327, + "learning_rate": 8.324775174146649e-08, + "loss": 0.9114, + "step": 159693 + }, + { + "epoch": 1.92, + "grad_norm": 7.378414091011865, + "learning_rate": 8.317254737853608e-08, + "loss": 1.1496, + "step": 159696 + }, + { + "epoch": 1.92, + "grad_norm": 12.526527636946964, + "learning_rate": 8.309737685816555e-08, + "loss": 0.5369, + "step": 159699 + }, + { + "epoch": 1.92, + "grad_norm": 7.318156235869127, + "learning_rate": 8.302224018061245e-08, + "loss": 0.9868, + "step": 159702 + }, + { + "epoch": 1.92, + "grad_norm": 7.834206699123179, + "learning_rate": 8.294713734613103e-08, + "loss": 0.9162, + "step": 159705 + }, + { + "epoch": 1.92, + "grad_norm": 12.476592201640415, + "learning_rate": 8.287206835497885e-08, + "loss": 1.4224, + "step": 159708 + }, + { + "epoch": 1.92, + "grad_norm": 6.367097863035872, + "learning_rate": 8.279703320741239e-08, + "loss": 1.321, + "step": 159711 + }, + { + "epoch": 1.92, + "grad_norm": 9.23014397804226, + "learning_rate": 8.272203190368811e-08, + "loss": 1.2592, + "step": 159714 + }, + { + "epoch": 1.92, + "grad_norm": 15.117051845152407, + "learning_rate": 8.264706444406023e-08, + "loss": 0.8518, + "step": 159717 + }, + { + "epoch": 1.92, + "grad_norm": 5.347833022586777, + "learning_rate": 8.257213082878635e-08, + "loss": 1.1457, + "step": 159720 + }, + { + "epoch": 1.92, + "grad_norm": 10.244068494190632, + "learning_rate": 8.249723105812179e-08, + "loss": 0.9417, + "step": 159723 + }, + { + "epoch": 1.92, + "grad_norm": 6.716284553363318, + "learning_rate": 8.242236513232193e-08, + "loss": 1.1435, + "step": 159726 + }, + { + "epoch": 1.92, + "grad_norm": 4.1203224951940935, + "learning_rate": 8.234753305164212e-08, + "loss": 0.9375, + "step": 159729 + }, + { + "epoch": 1.92, + "grad_norm": 4.8794371839729305, + "learning_rate": 8.227273481633769e-08, + "loss": 0.913, + "step": 159732 + }, + { + "epoch": 1.92, + "grad_norm": 6.192536872033529, + "learning_rate": 8.2197970426664e-08, + "loss": 1.3546, + "step": 159735 + }, + { + "epoch": 1.92, + "grad_norm": 69.30483261297404, + "learning_rate": 8.212323988287641e-08, + "loss": 1.0762, + "step": 159738 + }, + { + "epoch": 1.92, + "grad_norm": 6.621689038800391, + "learning_rate": 8.204854318523026e-08, + "loss": 1.0908, + "step": 159741 + }, + { + "epoch": 1.92, + "grad_norm": 4.477461937815994, + "learning_rate": 8.197388033397979e-08, + "loss": 0.9398, + "step": 159744 + }, + { + "epoch": 1.92, + "grad_norm": 3.0546930215422363, + "learning_rate": 8.189925132937927e-08, + "loss": 1.1347, + "step": 159747 + }, + { + "epoch": 1.92, + "grad_norm": 7.448023582213549, + "learning_rate": 8.182465617168511e-08, + "loss": 1.7169, + "step": 159750 + }, + { + "epoch": 1.92, + "grad_norm": 9.161346393723726, + "learning_rate": 8.175009486115048e-08, + "loss": 1.1339, + "step": 159753 + }, + { + "epoch": 1.92, + "grad_norm": 5.577770815922495, + "learning_rate": 8.167556739802962e-08, + "loss": 1.1293, + "step": 159756 + }, + { + "epoch": 1.92, + "grad_norm": 6.921646308508176, + "learning_rate": 8.160107378257786e-08, + "loss": 0.9016, + "step": 159759 + }, + { + "epoch": 1.92, + "grad_norm": 6.916705960250601, + "learning_rate": 8.152661401504946e-08, + "loss": 1.3474, + "step": 159762 + }, + { + "epoch": 1.92, + "grad_norm": 13.57236320501822, + "learning_rate": 8.145218809569754e-08, + "loss": 1.0389, + "step": 159765 + }, + { + "epoch": 1.92, + "grad_norm": 6.815627642281531, + "learning_rate": 8.137779602477746e-08, + "loss": 1.2124, + "step": 159768 + }, + { + "epoch": 1.92, + "grad_norm": 6.197175539156689, + "learning_rate": 8.130343780254124e-08, + "loss": 1.1127, + "step": 159771 + }, + { + "epoch": 1.92, + "grad_norm": 16.337673760121866, + "learning_rate": 8.122911342924422e-08, + "loss": 1.0323, + "step": 159774 + }, + { + "epoch": 1.92, + "grad_norm": 4.901978307151309, + "learning_rate": 8.115482290513954e-08, + "loss": 1.2779, + "step": 159777 + }, + { + "epoch": 1.92, + "grad_norm": 12.923230294010576, + "learning_rate": 8.108056623048143e-08, + "loss": 1.425, + "step": 159780 + }, + { + "epoch": 1.92, + "grad_norm": 6.3523046009782265, + "learning_rate": 8.100634340552083e-08, + "loss": 1.3579, + "step": 159783 + }, + { + "epoch": 1.92, + "grad_norm": 4.25995250232421, + "learning_rate": 8.093215443051416e-08, + "loss": 1.1076, + "step": 159786 + }, + { + "epoch": 1.92, + "grad_norm": 9.858322187077508, + "learning_rate": 8.085799930571348e-08, + "loss": 1.0702, + "step": 159789 + }, + { + "epoch": 1.92, + "grad_norm": 3.448151605393057, + "learning_rate": 8.078387803137078e-08, + "loss": 0.9658, + "step": 159792 + }, + { + "epoch": 1.92, + "grad_norm": 24.840168968535973, + "learning_rate": 8.070979060774031e-08, + "loss": 1.1045, + "step": 159795 + }, + { + "epoch": 1.92, + "grad_norm": 7.946094915107136, + "learning_rate": 8.06357370350741e-08, + "loss": 1.0249, + "step": 159798 + }, + { + "epoch": 1.92, + "grad_norm": 8.1254065013011, + "learning_rate": 8.056171731362416e-08, + "loss": 0.9508, + "step": 159801 + }, + { + "epoch": 1.92, + "grad_norm": 12.970864699885727, + "learning_rate": 8.048773144364475e-08, + "loss": 1.104, + "step": 159804 + }, + { + "epoch": 1.92, + "grad_norm": 15.72146216454091, + "learning_rate": 8.041377942538897e-08, + "loss": 1.1665, + "step": 159807 + }, + { + "epoch": 1.92, + "grad_norm": 6.166911511375649, + "learning_rate": 8.033986125910775e-08, + "loss": 1.1554, + "step": 159810 + }, + { + "epoch": 1.92, + "grad_norm": 47.820854504021696, + "learning_rate": 8.0265976945052e-08, + "loss": 1.2381, + "step": 159813 + }, + { + "epoch": 1.92, + "grad_norm": 5.155389878144179, + "learning_rate": 8.019212648347597e-08, + "loss": 1.3605, + "step": 159816 + }, + { + "epoch": 1.92, + "grad_norm": 5.372541091694528, + "learning_rate": 8.011830987463165e-08, + "loss": 0.8633, + "step": 159819 + }, + { + "epoch": 1.92, + "grad_norm": 12.128956172770721, + "learning_rate": 8.004452711876997e-08, + "loss": 1.3284, + "step": 159822 + }, + { + "epoch": 1.92, + "grad_norm": 12.305745425730558, + "learning_rate": 7.997077821614408e-08, + "loss": 1.1978, + "step": 159825 + }, + { + "epoch": 1.92, + "grad_norm": 3.2178661016121346, + "learning_rate": 7.989706316700374e-08, + "loss": 1.205, + "step": 159828 + }, + { + "epoch": 1.92, + "grad_norm": 3.787481694917142, + "learning_rate": 7.98233819716021e-08, + "loss": 1.0524, + "step": 159831 + }, + { + "epoch": 1.92, + "grad_norm": 5.021798718801709, + "learning_rate": 7.974973463019009e-08, + "loss": 1.013, + "step": 159834 + }, + { + "epoch": 1.92, + "grad_norm": 5.452441053632843, + "learning_rate": 7.96761211430186e-08, + "loss": 0.9686, + "step": 159837 + }, + { + "epoch": 1.92, + "grad_norm": 8.123148341707491, + "learning_rate": 7.960254151033853e-08, + "loss": 1.3562, + "step": 159840 + }, + { + "epoch": 1.92, + "grad_norm": 7.943098455192093, + "learning_rate": 7.952899573240192e-08, + "loss": 1.0073, + "step": 159843 + }, + { + "epoch": 1.92, + "grad_norm": 4.747669059410532, + "learning_rate": 7.945548380946077e-08, + "loss": 1.3469, + "step": 159846 + }, + { + "epoch": 1.92, + "grad_norm": 6.524485277513253, + "learning_rate": 7.93820057417638e-08, + "loss": 1.1752, + "step": 159849 + }, + { + "epoch": 1.92, + "grad_norm": 10.316257599845361, + "learning_rate": 7.930856152956301e-08, + "loss": 1.1681, + "step": 159852 + }, + { + "epoch": 1.92, + "grad_norm": 6.763962054722759, + "learning_rate": 7.92351511731082e-08, + "loss": 0.8936, + "step": 159855 + }, + { + "epoch": 1.92, + "grad_norm": 2.248916628065958, + "learning_rate": 7.91617746726503e-08, + "loss": 1.0255, + "step": 159858 + }, + { + "epoch": 1.92, + "grad_norm": 12.49792190417335, + "learning_rate": 7.908843202844018e-08, + "loss": 1.3653, + "step": 159861 + }, + { + "epoch": 1.92, + "grad_norm": 21.89072862828375, + "learning_rate": 7.901512324072768e-08, + "loss": 0.8147, + "step": 159864 + }, + { + "epoch": 1.92, + "grad_norm": 3.158759814566061, + "learning_rate": 7.89418483097626e-08, + "loss": 1.0079, + "step": 159867 + }, + { + "epoch": 1.92, + "grad_norm": 5.373349112327629, + "learning_rate": 7.886860723579692e-08, + "loss": 1.0375, + "step": 159870 + }, + { + "epoch": 1.92, + "grad_norm": 9.859318973702651, + "learning_rate": 7.879540001907826e-08, + "loss": 1.1939, + "step": 159873 + }, + { + "epoch": 1.92, + "grad_norm": 5.1420683399750065, + "learning_rate": 7.872222665985641e-08, + "loss": 1.2002, + "step": 159876 + }, + { + "epoch": 1.92, + "grad_norm": 2.7768999535763337, + "learning_rate": 7.864908715838226e-08, + "loss": 1.0546, + "step": 159879 + }, + { + "epoch": 1.92, + "grad_norm": 8.20715314471064, + "learning_rate": 7.857598151490564e-08, + "loss": 1.291, + "step": 159882 + }, + { + "epoch": 1.92, + "grad_norm": 6.672705974870373, + "learning_rate": 7.850290972967522e-08, + "loss": 1.0016, + "step": 159885 + }, + { + "epoch": 1.92, + "grad_norm": 7.164900638849439, + "learning_rate": 7.842987180294081e-08, + "loss": 0.9495, + "step": 159888 + }, + { + "epoch": 1.92, + "grad_norm": 2.9852343242996207, + "learning_rate": 7.835686773495221e-08, + "loss": 1.0209, + "step": 159891 + }, + { + "epoch": 1.92, + "grad_norm": 7.378403572939697, + "learning_rate": 7.8283897525957e-08, + "loss": 1.1737, + "step": 159894 + }, + { + "epoch": 1.92, + "grad_norm": 10.911707180398167, + "learning_rate": 7.821096117620497e-08, + "loss": 1.1633, + "step": 159897 + }, + { + "epoch": 1.92, + "grad_norm": 4.144298213162627, + "learning_rate": 7.813805868594592e-08, + "loss": 0.876, + "step": 159900 + }, + { + "epoch": 1.92, + "grad_norm": 4.265126404505209, + "learning_rate": 7.806519005542746e-08, + "loss": 1.1454, + "step": 159903 + }, + { + "epoch": 1.92, + "grad_norm": 16.20935569881411, + "learning_rate": 7.799235528489824e-08, + "loss": 1.2071, + "step": 159906 + }, + { + "epoch": 1.92, + "grad_norm": 9.140032501946784, + "learning_rate": 7.791955437460696e-08, + "loss": 1.6281, + "step": 159909 + }, + { + "epoch": 1.92, + "grad_norm": 18.258427607936287, + "learning_rate": 7.784678732480344e-08, + "loss": 1.1408, + "step": 159912 + }, + { + "epoch": 1.92, + "grad_norm": 8.781064397357595, + "learning_rate": 7.777405413573413e-08, + "loss": 0.9177, + "step": 159915 + }, + { + "epoch": 1.92, + "grad_norm": 5.776942425439649, + "learning_rate": 7.770135480764773e-08, + "loss": 1.2688, + "step": 159918 + }, + { + "epoch": 1.92, + "grad_norm": 7.57222270085092, + "learning_rate": 7.762868934079293e-08, + "loss": 1.1401, + "step": 159921 + }, + { + "epoch": 1.92, + "grad_norm": 9.26375487813573, + "learning_rate": 7.75560577354173e-08, + "loss": 1.2359, + "step": 159924 + }, + { + "epoch": 1.92, + "grad_norm": 5.860405558376019, + "learning_rate": 7.748345999176953e-08, + "loss": 1.1192, + "step": 159927 + }, + { + "epoch": 1.92, + "grad_norm": 8.992077020286319, + "learning_rate": 7.741089611009611e-08, + "loss": 1.2635, + "step": 159930 + }, + { + "epoch": 1.92, + "grad_norm": 3.9021644282157606, + "learning_rate": 7.73383660906446e-08, + "loss": 1.085, + "step": 159933 + }, + { + "epoch": 1.92, + "grad_norm": 5.886027924445803, + "learning_rate": 7.72658699336648e-08, + "loss": 1.2107, + "step": 159936 + }, + { + "epoch": 1.92, + "grad_norm": 6.734536187240818, + "learning_rate": 7.719340763940098e-08, + "loss": 0.848, + "step": 159939 + }, + { + "epoch": 1.92, + "grad_norm": 8.782893612373728, + "learning_rate": 7.71209792081029e-08, + "loss": 1.0268, + "step": 159942 + }, + { + "epoch": 1.92, + "grad_norm": 10.069020712977114, + "learning_rate": 7.704858464001597e-08, + "loss": 0.9449, + "step": 159945 + }, + { + "epoch": 1.92, + "grad_norm": 2.96083854852821, + "learning_rate": 7.697622393538772e-08, + "loss": 1.4027, + "step": 159948 + }, + { + "epoch": 1.92, + "grad_norm": 4.941343626608607, + "learning_rate": 7.690389709446577e-08, + "loss": 0.8943, + "step": 159951 + }, + { + "epoch": 1.92, + "grad_norm": 11.885884004040987, + "learning_rate": 7.683160411749657e-08, + "loss": 0.9069, + "step": 159954 + }, + { + "epoch": 1.92, + "grad_norm": 6.343974906958987, + "learning_rate": 7.675934500472659e-08, + "loss": 1.1602, + "step": 159957 + }, + { + "epoch": 1.92, + "grad_norm": 3.9964323703525566, + "learning_rate": 7.66871197564023e-08, + "loss": 0.9337, + "step": 159960 + }, + { + "epoch": 1.92, + "grad_norm": 15.136285286454607, + "learning_rate": 7.661492837277129e-08, + "loss": 1.1587, + "step": 159963 + }, + { + "epoch": 1.92, + "grad_norm": 3.1327793962748234, + "learning_rate": 7.654277085407891e-08, + "loss": 0.9273, + "step": 159966 + }, + { + "epoch": 1.92, + "grad_norm": 6.2759683504207295, + "learning_rate": 7.647064720057162e-08, + "loss": 1.2727, + "step": 159969 + }, + { + "epoch": 1.92, + "grad_norm": 2.6970952935030694, + "learning_rate": 7.63985574124948e-08, + "loss": 1.0927, + "step": 159972 + }, + { + "epoch": 1.92, + "grad_norm": 7.8489140510368856, + "learning_rate": 7.632650149009601e-08, + "loss": 1.1251, + "step": 159975 + }, + { + "epoch": 1.92, + "grad_norm": 5.185632791813056, + "learning_rate": 7.625447943362063e-08, + "loss": 1.3139, + "step": 159978 + }, + { + "epoch": 1.92, + "grad_norm": 10.237850723308206, + "learning_rate": 7.618249124331401e-08, + "loss": 1.3972, + "step": 159981 + }, + { + "epoch": 1.92, + "grad_norm": 9.417851686019182, + "learning_rate": 7.61105369194226e-08, + "loss": 1.3388, + "step": 159984 + }, + { + "epoch": 1.92, + "grad_norm": 7.756281995330963, + "learning_rate": 7.603861646218957e-08, + "loss": 1.3039, + "step": 159987 + }, + { + "epoch": 1.92, + "grad_norm": 12.666250409967803, + "learning_rate": 7.59667298718636e-08, + "loss": 1.38, + "step": 159990 + }, + { + "epoch": 1.92, + "grad_norm": 6.585941590572209, + "learning_rate": 7.589487714868782e-08, + "loss": 0.9809, + "step": 159993 + }, + { + "epoch": 1.92, + "grad_norm": 7.2046470990675076, + "learning_rate": 7.582305829290871e-08, + "loss": 0.7828, + "step": 159996 + }, + { + "epoch": 1.92, + "grad_norm": 21.134832651577632, + "learning_rate": 7.57512733047705e-08, + "loss": 1.3452, + "step": 159999 + }, + { + "epoch": 1.92, + "grad_norm": 6.093844151491588, + "learning_rate": 7.567952218451968e-08, + "loss": 1.5517, + "step": 160002 + }, + { + "epoch": 1.92, + "grad_norm": 9.112427750973836, + "learning_rate": 7.560780493239828e-08, + "loss": 1.1286, + "step": 160005 + }, + { + "epoch": 1.92, + "grad_norm": 7.840056165041462, + "learning_rate": 7.553612154865386e-08, + "loss": 0.9488, + "step": 160008 + }, + { + "epoch": 1.92, + "grad_norm": 3.8894659490433803, + "learning_rate": 7.546447203352847e-08, + "loss": 1.1759, + "step": 160011 + }, + { + "epoch": 1.92, + "grad_norm": 6.034938592389585, + "learning_rate": 7.539285638726856e-08, + "loss": 1.2694, + "step": 160014 + }, + { + "epoch": 1.92, + "grad_norm": 15.245300226819143, + "learning_rate": 7.53212746101184e-08, + "loss": 0.9655, + "step": 160017 + }, + { + "epoch": 1.92, + "grad_norm": 4.717631163656365, + "learning_rate": 7.524972670232223e-08, + "loss": 0.9204, + "step": 160020 + }, + { + "epoch": 1.92, + "grad_norm": 17.701362102983925, + "learning_rate": 7.517821266412428e-08, + "loss": 1.0133, + "step": 160023 + }, + { + "epoch": 1.92, + "grad_norm": 7.959953154010081, + "learning_rate": 7.51067324957666e-08, + "loss": 1.4015, + "step": 160026 + }, + { + "epoch": 1.92, + "grad_norm": 9.305248950545623, + "learning_rate": 7.503528619749567e-08, + "loss": 1.0603, + "step": 160029 + }, + { + "epoch": 1.92, + "grad_norm": 12.89476907257058, + "learning_rate": 7.496387376955349e-08, + "loss": 1.1694, + "step": 160032 + }, + { + "epoch": 1.92, + "grad_norm": 3.2757090188007445, + "learning_rate": 7.489249521218656e-08, + "loss": 1.2638, + "step": 160035 + }, + { + "epoch": 1.92, + "grad_norm": 5.211884476005584, + "learning_rate": 7.482115052563465e-08, + "loss": 1.0826, + "step": 160038 + }, + { + "epoch": 1.92, + "grad_norm": 9.354940187051351, + "learning_rate": 7.474983971014316e-08, + "loss": 0.9761, + "step": 160041 + }, + { + "epoch": 1.92, + "grad_norm": 7.446193095325819, + "learning_rate": 7.46785627659563e-08, + "loss": 1.3935, + "step": 160044 + }, + { + "epoch": 1.92, + "grad_norm": 4.504084900774843, + "learning_rate": 7.460731969331614e-08, + "loss": 1.2558, + "step": 160047 + }, + { + "epoch": 1.92, + "grad_norm": 7.496661464997162, + "learning_rate": 7.453611049246689e-08, + "loss": 0.8952, + "step": 160050 + }, + { + "epoch": 1.92, + "grad_norm": 7.25513745644646, + "learning_rate": 7.446493516364949e-08, + "loss": 1.2802, + "step": 160053 + }, + { + "epoch": 1.92, + "grad_norm": 7.10951362139735, + "learning_rate": 7.43937937071082e-08, + "loss": 1.1108, + "step": 160056 + }, + { + "epoch": 1.92, + "grad_norm": 8.517761214169838, + "learning_rate": 7.432268612308613e-08, + "loss": 0.8862, + "step": 160059 + }, + { + "epoch": 1.92, + "grad_norm": 9.783547097493448, + "learning_rate": 7.425161241182532e-08, + "loss": 1.6792, + "step": 160062 + }, + { + "epoch": 1.92, + "grad_norm": 14.12428125897343, + "learning_rate": 7.418057257356892e-08, + "loss": 0.8688, + "step": 160065 + }, + { + "epoch": 1.92, + "grad_norm": 8.593638470389637, + "learning_rate": 7.410956660855894e-08, + "loss": 1.3153, + "step": 160068 + }, + { + "epoch": 1.92, + "grad_norm": 9.468000384892285, + "learning_rate": 7.403859451703744e-08, + "loss": 1.0381, + "step": 160071 + }, + { + "epoch": 1.92, + "grad_norm": 13.438916170082704, + "learning_rate": 7.39676562992464e-08, + "loss": 0.9693, + "step": 160074 + }, + { + "epoch": 1.92, + "grad_norm": 2.5727880719116154, + "learning_rate": 7.3896751955429e-08, + "loss": 1.3008, + "step": 160077 + }, + { + "epoch": 1.92, + "grad_norm": 12.953454485350667, + "learning_rate": 7.382588148582614e-08, + "loss": 1.1555, + "step": 160080 + }, + { + "epoch": 1.92, + "grad_norm": 2.9557442665086056, + "learning_rate": 7.375504489068097e-08, + "loss": 1.2715, + "step": 160083 + }, + { + "epoch": 1.92, + "grad_norm": 4.389396898439936, + "learning_rate": 7.36842421702344e-08, + "loss": 1.0608, + "step": 160086 + }, + { + "epoch": 1.93, + "grad_norm": 8.483577835801835, + "learning_rate": 7.361347332472735e-08, + "loss": 1.29, + "step": 160089 + }, + { + "epoch": 1.93, + "grad_norm": 2.7494299528430037, + "learning_rate": 7.354273835440295e-08, + "loss": 0.9974, + "step": 160092 + }, + { + "epoch": 1.93, + "grad_norm": 2.842195985919518, + "learning_rate": 7.347203725950102e-08, + "loss": 1.076, + "step": 160095 + }, + { + "epoch": 1.93, + "grad_norm": 4.853467132306401, + "learning_rate": 7.340137004026248e-08, + "loss": 1.112, + "step": 160098 + }, + { + "epoch": 1.93, + "grad_norm": 5.329349729636906, + "learning_rate": 7.333073669693047e-08, + "loss": 1.0271, + "step": 160101 + }, + { + "epoch": 1.93, + "grad_norm": 4.461001788309449, + "learning_rate": 7.32601372297459e-08, + "loss": 1.3861, + "step": 160104 + }, + { + "epoch": 1.93, + "grad_norm": 5.630243789592337, + "learning_rate": 7.318957163894857e-08, + "loss": 1.1037, + "step": 160107 + }, + { + "epoch": 1.93, + "grad_norm": 4.939145753039511, + "learning_rate": 7.311903992477942e-08, + "loss": 1.6634, + "step": 160110 + }, + { + "epoch": 1.93, + "grad_norm": 8.316297810814742, + "learning_rate": 7.304854208747824e-08, + "loss": 0.9628, + "step": 160113 + }, + { + "epoch": 1.93, + "grad_norm": 5.003052202810548, + "learning_rate": 7.297807812728819e-08, + "loss": 0.8106, + "step": 160116 + }, + { + "epoch": 1.93, + "grad_norm": 6.361615643270181, + "learning_rate": 7.290764804444795e-08, + "loss": 1.1022, + "step": 160119 + }, + { + "epoch": 1.93, + "grad_norm": 3.6923365864440716, + "learning_rate": 7.283725183919733e-08, + "loss": 1.1989, + "step": 160122 + }, + { + "epoch": 1.93, + "grad_norm": 4.249425382968182, + "learning_rate": 7.276688951177835e-08, + "loss": 1.3298, + "step": 160125 + }, + { + "epoch": 1.93, + "grad_norm": 12.261298189229151, + "learning_rate": 7.269656106242973e-08, + "loss": 1.2686, + "step": 160128 + }, + { + "epoch": 1.93, + "grad_norm": 10.83645090953951, + "learning_rate": 7.262626649139349e-08, + "loss": 1.0433, + "step": 160131 + }, + { + "epoch": 1.93, + "grad_norm": 17.18366585951135, + "learning_rate": 7.255600579890609e-08, + "loss": 0.9983, + "step": 160134 + }, + { + "epoch": 1.93, + "grad_norm": 4.3685227493640415, + "learning_rate": 7.248577898520958e-08, + "loss": 1.1689, + "step": 160137 + }, + { + "epoch": 1.93, + "grad_norm": 5.033617040615263, + "learning_rate": 7.241558605054267e-08, + "loss": 1.4412, + "step": 160140 + }, + { + "epoch": 1.93, + "grad_norm": 10.86211823678324, + "learning_rate": 7.234542699514735e-08, + "loss": 1.381, + "step": 160143 + }, + { + "epoch": 1.93, + "grad_norm": 4.534660804655616, + "learning_rate": 7.227530181925901e-08, + "loss": 1.0331, + "step": 160146 + }, + { + "epoch": 1.93, + "grad_norm": 5.76711272014331, + "learning_rate": 7.22052105231208e-08, + "loss": 1.1952, + "step": 160149 + }, + { + "epoch": 1.93, + "grad_norm": 9.756249430848717, + "learning_rate": 7.213515310696917e-08, + "loss": 0.898, + "step": 160152 + }, + { + "epoch": 1.93, + "grad_norm": 8.669246856279289, + "learning_rate": 7.206512957104506e-08, + "loss": 1.187, + "step": 160155 + }, + { + "epoch": 1.93, + "grad_norm": 12.349823416338033, + "learning_rate": 7.199513991558604e-08, + "loss": 1.1213, + "step": 160158 + }, + { + "epoch": 1.93, + "grad_norm": 6.122669128922696, + "learning_rate": 7.192518414083194e-08, + "loss": 1.043, + "step": 160161 + }, + { + "epoch": 1.93, + "grad_norm": 15.847412682822373, + "learning_rate": 7.185526224702033e-08, + "loss": 0.9318, + "step": 160164 + }, + { + "epoch": 1.93, + "grad_norm": 1.9696401189994779, + "learning_rate": 7.178537423439102e-08, + "loss": 1.2728, + "step": 160167 + }, + { + "epoch": 1.93, + "grad_norm": 5.6762788135661815, + "learning_rate": 7.171552010318272e-08, + "loss": 1.1772, + "step": 160170 + }, + { + "epoch": 1.93, + "grad_norm": 7.555940982537875, + "learning_rate": 7.164569985363301e-08, + "loss": 1.1957, + "step": 160173 + }, + { + "epoch": 1.93, + "grad_norm": 9.277008515617352, + "learning_rate": 7.157591348597947e-08, + "loss": 1.2353, + "step": 160176 + }, + { + "epoch": 1.93, + "grad_norm": 14.532724353315682, + "learning_rate": 7.150616100046193e-08, + "loss": 1.292, + "step": 160179 + }, + { + "epoch": 1.93, + "grad_norm": 8.387006716930042, + "learning_rate": 7.143644239731684e-08, + "loss": 1.4326, + "step": 160182 + }, + { + "epoch": 1.93, + "grad_norm": 4.987636798459708, + "learning_rate": 7.136675767678402e-08, + "loss": 1.4543, + "step": 160185 + }, + { + "epoch": 1.93, + "grad_norm": 3.663093491116347, + "learning_rate": 7.129710683909996e-08, + "loss": 1.088, + "step": 160188 + }, + { + "epoch": 1.93, + "grad_norm": 6.178604937805939, + "learning_rate": 7.122748988450224e-08, + "loss": 1.2075, + "step": 160191 + }, + { + "epoch": 1.93, + "grad_norm": 6.919686943592891, + "learning_rate": 7.115790681322843e-08, + "loss": 0.9634, + "step": 160194 + }, + { + "epoch": 1.93, + "grad_norm": 38.674180528181985, + "learning_rate": 7.108835762551724e-08, + "loss": 1.0492, + "step": 160197 + }, + { + "epoch": 1.93, + "grad_norm": 4.481537256086077, + "learning_rate": 7.101884232160405e-08, + "loss": 1.1073, + "step": 160200 + }, + { + "epoch": 1.93, + "grad_norm": 14.671547143756197, + "learning_rate": 7.094936090172755e-08, + "loss": 1.0151, + "step": 160203 + }, + { + "epoch": 1.93, + "grad_norm": 13.218588509175522, + "learning_rate": 7.087991336612532e-08, + "loss": 1.3152, + "step": 160206 + }, + { + "epoch": 1.93, + "grad_norm": 9.946785855147924, + "learning_rate": 7.081049971503162e-08, + "loss": 1.1756, + "step": 160209 + }, + { + "epoch": 1.93, + "grad_norm": 12.586863309125837, + "learning_rate": 7.074111994868738e-08, + "loss": 1.1007, + "step": 160212 + }, + { + "epoch": 1.93, + "grad_norm": 3.3026224962802417, + "learning_rate": 7.067177406732573e-08, + "loss": 1.0502, + "step": 160215 + }, + { + "epoch": 1.93, + "grad_norm": 7.811534610042216, + "learning_rate": 7.060246207118537e-08, + "loss": 1.3888, + "step": 160218 + }, + { + "epoch": 1.93, + "grad_norm": 10.006708749531052, + "learning_rate": 7.053318396050168e-08, + "loss": 1.0083, + "step": 160221 + }, + { + "epoch": 1.93, + "grad_norm": 4.98438080731789, + "learning_rate": 7.046393973551335e-08, + "loss": 1.1693, + "step": 160224 + }, + { + "epoch": 1.93, + "grad_norm": 5.639365108393308, + "learning_rate": 7.039472939645353e-08, + "loss": 1.1989, + "step": 160227 + }, + { + "epoch": 1.93, + "grad_norm": 11.092374324915173, + "learning_rate": 7.032555294355981e-08, + "loss": 1.2183, + "step": 160230 + }, + { + "epoch": 1.93, + "grad_norm": 3.0101672636374044, + "learning_rate": 7.025641037706865e-08, + "loss": 0.9274, + "step": 160233 + }, + { + "epoch": 1.93, + "grad_norm": 3.1345426513995727, + "learning_rate": 7.018730169721544e-08, + "loss": 0.9177, + "step": 160236 + }, + { + "epoch": 1.93, + "grad_norm": 6.597054071263705, + "learning_rate": 7.011822690423665e-08, + "loss": 1.1354, + "step": 160239 + }, + { + "epoch": 1.93, + "grad_norm": 11.459018965086916, + "learning_rate": 7.004918599836763e-08, + "loss": 0.8878, + "step": 160242 + }, + { + "epoch": 1.93, + "grad_norm": 5.738008144566214, + "learning_rate": 6.998017897984266e-08, + "loss": 1.2926, + "step": 160245 + }, + { + "epoch": 1.93, + "grad_norm": 10.959849374759608, + "learning_rate": 6.991120584890043e-08, + "loss": 1.43, + "step": 160248 + }, + { + "epoch": 1.93, + "grad_norm": 37.83700828532735, + "learning_rate": 6.984226660577408e-08, + "loss": 1.2948, + "step": 160251 + }, + { + "epoch": 1.93, + "grad_norm": 8.184464998316031, + "learning_rate": 6.977336125069789e-08, + "loss": 1.2057, + "step": 160254 + }, + { + "epoch": 1.93, + "grad_norm": 4.457757308020321, + "learning_rate": 6.970448978391053e-08, + "loss": 1.163, + "step": 160257 + }, + { + "epoch": 1.93, + "grad_norm": 8.158165919960812, + "learning_rate": 6.963565220564295e-08, + "loss": 1.2777, + "step": 160260 + }, + { + "epoch": 1.93, + "grad_norm": 1.8496310485360028, + "learning_rate": 6.956684851613272e-08, + "loss": 0.8906, + "step": 160263 + }, + { + "epoch": 1.93, + "grad_norm": 11.384927930254932, + "learning_rate": 6.949807871561409e-08, + "loss": 1.4176, + "step": 160266 + }, + { + "epoch": 1.93, + "grad_norm": 6.024476358829977, + "learning_rate": 6.942934280432134e-08, + "loss": 1.1934, + "step": 160269 + }, + { + "epoch": 1.93, + "grad_norm": 11.725466199925322, + "learning_rate": 6.936064078248872e-08, + "loss": 0.9252, + "step": 160272 + }, + { + "epoch": 1.93, + "grad_norm": 12.125015370908493, + "learning_rate": 6.92919726503527e-08, + "loss": 1.2193, + "step": 160275 + }, + { + "epoch": 1.93, + "grad_norm": 15.84550167221618, + "learning_rate": 6.922333840814532e-08, + "loss": 1.1788, + "step": 160278 + }, + { + "epoch": 1.93, + "grad_norm": 10.923421884242313, + "learning_rate": 6.915473805610196e-08, + "loss": 1.4438, + "step": 160281 + }, + { + "epoch": 1.93, + "grad_norm": 3.411343506316352, + "learning_rate": 6.908617159445574e-08, + "loss": 1.1077, + "step": 160284 + }, + { + "epoch": 1.93, + "grad_norm": 10.755247463956058, + "learning_rate": 6.901763902344094e-08, + "loss": 1.1729, + "step": 160287 + }, + { + "epoch": 1.93, + "grad_norm": 8.20172544562508, + "learning_rate": 6.894914034329292e-08, + "loss": 1.1917, + "step": 160290 + }, + { + "epoch": 1.93, + "grad_norm": 17.185518602254707, + "learning_rate": 6.888067555424372e-08, + "loss": 1.3624, + "step": 160293 + }, + { + "epoch": 1.93, + "grad_norm": 4.60435851079455, + "learning_rate": 6.881224465652758e-08, + "loss": 1.0982, + "step": 160296 + }, + { + "epoch": 1.93, + "grad_norm": 11.799396453502158, + "learning_rate": 6.874384765037877e-08, + "loss": 0.8616, + "step": 160299 + }, + { + "epoch": 1.93, + "grad_norm": 7.384088950580797, + "learning_rate": 6.867548453602935e-08, + "loss": 0.8774, + "step": 160302 + }, + { + "epoch": 1.93, + "grad_norm": 7.0918744888933425, + "learning_rate": 6.860715531371353e-08, + "loss": 1.1998, + "step": 160305 + }, + { + "epoch": 1.93, + "grad_norm": 4.266302963697691, + "learning_rate": 6.853885998366339e-08, + "loss": 1.1914, + "step": 160308 + }, + { + "epoch": 1.93, + "grad_norm": 6.951683694338671, + "learning_rate": 6.847059854611426e-08, + "loss": 0.9836, + "step": 160311 + }, + { + "epoch": 1.93, + "grad_norm": 17.935152866166018, + "learning_rate": 6.840237100129599e-08, + "loss": 1.1881, + "step": 160314 + }, + { + "epoch": 1.93, + "grad_norm": 4.473926756264237, + "learning_rate": 6.833417734944392e-08, + "loss": 1.1678, + "step": 160317 + }, + { + "epoch": 1.93, + "grad_norm": 12.039675935200313, + "learning_rate": 6.826601759079011e-08, + "loss": 1.2456, + "step": 160320 + }, + { + "epoch": 1.93, + "grad_norm": 4.0805297824480204, + "learning_rate": 6.819789172556768e-08, + "loss": 1.2246, + "step": 160323 + }, + { + "epoch": 1.93, + "grad_norm": 4.761512058414162, + "learning_rate": 6.812979975400758e-08, + "loss": 1.125, + "step": 160326 + }, + { + "epoch": 1.93, + "grad_norm": 7.588104274863771, + "learning_rate": 6.806174167634294e-08, + "loss": 1.0344, + "step": 160329 + }, + { + "epoch": 1.93, + "grad_norm": 12.083846073067413, + "learning_rate": 6.799371749280692e-08, + "loss": 0.9995, + "step": 160332 + }, + { + "epoch": 1.93, + "grad_norm": 11.466385157007664, + "learning_rate": 6.792572720362934e-08, + "loss": 1.0882, + "step": 160335 + }, + { + "epoch": 1.93, + "grad_norm": 5.43556299626151, + "learning_rate": 6.785777080904443e-08, + "loss": 0.667, + "step": 160338 + }, + { + "epoch": 1.93, + "grad_norm": 8.338178616210348, + "learning_rate": 6.778984830928425e-08, + "loss": 1.3822, + "step": 160341 + }, + { + "epoch": 1.93, + "grad_norm": 16.878964188590835, + "learning_rate": 6.772195970457863e-08, + "loss": 1.2673, + "step": 160344 + }, + { + "epoch": 1.93, + "grad_norm": 10.58496245990209, + "learning_rate": 6.765410499516068e-08, + "loss": 1.3054, + "step": 160347 + }, + { + "epoch": 1.93, + "grad_norm": 4.341714714988037, + "learning_rate": 6.758628418126245e-08, + "loss": 1.0748, + "step": 160350 + }, + { + "epoch": 1.93, + "grad_norm": 17.04873121747206, + "learning_rate": 6.751849726311376e-08, + "loss": 1.1178, + "step": 160353 + }, + { + "epoch": 1.93, + "grad_norm": 8.990061690240033, + "learning_rate": 6.745074424094666e-08, + "loss": 1.2343, + "step": 160356 + }, + { + "epoch": 1.93, + "grad_norm": 9.300161454110532, + "learning_rate": 6.738302511499317e-08, + "loss": 1.1331, + "step": 160359 + }, + { + "epoch": 1.93, + "grad_norm": 8.477627018627308, + "learning_rate": 6.731533988548311e-08, + "loss": 1.2214, + "step": 160362 + }, + { + "epoch": 1.93, + "grad_norm": 7.494811132127635, + "learning_rate": 6.724768855264851e-08, + "loss": 1.3225, + "step": 160365 + }, + { + "epoch": 1.93, + "grad_norm": 8.34373188456236, + "learning_rate": 6.71800711167192e-08, + "loss": 1.0505, + "step": 160368 + }, + { + "epoch": 1.93, + "grad_norm": 13.099186848354675, + "learning_rate": 6.71124875779261e-08, + "loss": 1.2138, + "step": 160371 + }, + { + "epoch": 1.93, + "grad_norm": 12.198827603880716, + "learning_rate": 6.704493793650124e-08, + "loss": 1.221, + "step": 160374 + }, + { + "epoch": 1.93, + "grad_norm": 15.323781429534925, + "learning_rate": 6.697742219267445e-08, + "loss": 1.5807, + "step": 160377 + }, + { + "epoch": 1.93, + "grad_norm": 8.205769529264566, + "learning_rate": 6.690994034667441e-08, + "loss": 1.1971, + "step": 160380 + }, + { + "epoch": 1.93, + "grad_norm": 4.059956725374345, + "learning_rate": 6.684249239873431e-08, + "loss": 0.9935, + "step": 160383 + }, + { + "epoch": 1.93, + "grad_norm": 14.879243885485291, + "learning_rate": 6.677507834908281e-08, + "loss": 1.3545, + "step": 160386 + }, + { + "epoch": 1.93, + "grad_norm": 10.018797912855607, + "learning_rate": 6.670769819794975e-08, + "loss": 1.1775, + "step": 160389 + }, + { + "epoch": 1.93, + "grad_norm": 5.038597438836667, + "learning_rate": 6.664035194556495e-08, + "loss": 1.4546, + "step": 160392 + }, + { + "epoch": 1.93, + "grad_norm": 5.768705287080889, + "learning_rate": 6.657303959215821e-08, + "loss": 1.2948, + "step": 160395 + }, + { + "epoch": 1.93, + "grad_norm": 36.76041883623121, + "learning_rate": 6.650576113796048e-08, + "loss": 1.2122, + "step": 160398 + }, + { + "epoch": 1.93, + "grad_norm": 2.8186567460638767, + "learning_rate": 6.643851658320155e-08, + "loss": 0.9823, + "step": 160401 + }, + { + "epoch": 1.93, + "grad_norm": 8.317135508543872, + "learning_rate": 6.637130592810792e-08, + "loss": 1.1849, + "step": 160404 + }, + { + "epoch": 1.93, + "grad_norm": 14.18980344332237, + "learning_rate": 6.630412917291274e-08, + "loss": 1.2474, + "step": 160407 + }, + { + "epoch": 1.93, + "grad_norm": 8.696384647807433, + "learning_rate": 6.623698631784247e-08, + "loss": 0.9748, + "step": 160410 + }, + { + "epoch": 1.93, + "grad_norm": 11.780035175901368, + "learning_rate": 6.616987736312696e-08, + "loss": 1.1913, + "step": 160413 + }, + { + "epoch": 1.93, + "grad_norm": 16.089401338273877, + "learning_rate": 6.610280230899602e-08, + "loss": 1.4388, + "step": 160416 + }, + { + "epoch": 1.93, + "grad_norm": 16.7389860722557, + "learning_rate": 6.603576115567722e-08, + "loss": 1.3015, + "step": 160419 + }, + { + "epoch": 1.93, + "grad_norm": 5.927892694002109, + "learning_rate": 6.596875390340151e-08, + "loss": 1.316, + "step": 160422 + }, + { + "epoch": 1.93, + "grad_norm": 7.112381906467731, + "learning_rate": 6.590178055239538e-08, + "loss": 1.5883, + "step": 160425 + }, + { + "epoch": 1.93, + "grad_norm": 7.939241519183172, + "learning_rate": 6.583484110288863e-08, + "loss": 0.8893, + "step": 160428 + }, + { + "epoch": 1.93, + "grad_norm": 9.325682314599625, + "learning_rate": 6.576793555510885e-08, + "loss": 1.1079, + "step": 160431 + }, + { + "epoch": 1.93, + "grad_norm": 15.376446467183953, + "learning_rate": 6.570106390928476e-08, + "loss": 1.1238, + "step": 160434 + }, + { + "epoch": 1.93, + "grad_norm": 5.813260036030154, + "learning_rate": 6.563422616564508e-08, + "loss": 1.3103, + "step": 160437 + }, + { + "epoch": 1.93, + "grad_norm": 11.413547841881106, + "learning_rate": 6.556742232441737e-08, + "loss": 1.1155, + "step": 160440 + }, + { + "epoch": 1.93, + "grad_norm": 5.334613144625738, + "learning_rate": 6.550065238582926e-08, + "loss": 1.0305, + "step": 160443 + }, + { + "epoch": 1.93, + "grad_norm": 5.176415198199126, + "learning_rate": 6.543391635010942e-08, + "loss": 1.0214, + "step": 160446 + }, + { + "epoch": 1.93, + "grad_norm": 27.966640242010968, + "learning_rate": 6.536721421748549e-08, + "loss": 1.0836, + "step": 160449 + }, + { + "epoch": 1.93, + "grad_norm": 15.007322828890828, + "learning_rate": 6.530054598818503e-08, + "loss": 1.0246, + "step": 160452 + }, + { + "epoch": 1.93, + "grad_norm": 23.82239573337659, + "learning_rate": 6.523391166243454e-08, + "loss": 0.7929, + "step": 160455 + }, + { + "epoch": 1.93, + "grad_norm": 11.002795783019346, + "learning_rate": 6.516731124046272e-08, + "loss": 1.1898, + "step": 160458 + }, + { + "epoch": 1.93, + "grad_norm": 4.768822565769126, + "learning_rate": 6.510074472249605e-08, + "loss": 1.4597, + "step": 160461 + }, + { + "epoch": 1.93, + "grad_norm": 9.768012844206774, + "learning_rate": 6.503421210876104e-08, + "loss": 0.9714, + "step": 160464 + }, + { + "epoch": 1.93, + "grad_norm": 4.207806615411879, + "learning_rate": 6.496771339948748e-08, + "loss": 1.5606, + "step": 160467 + }, + { + "epoch": 1.93, + "grad_norm": 9.09123909321273, + "learning_rate": 6.490124859489966e-08, + "loss": 1.1115, + "step": 160470 + }, + { + "epoch": 1.93, + "grad_norm": 8.331104480603171, + "learning_rate": 6.483481769522514e-08, + "loss": 1.1674, + "step": 160473 + }, + { + "epoch": 1.93, + "grad_norm": 7.789198621686757, + "learning_rate": 6.476842070069156e-08, + "loss": 1.2502, + "step": 160476 + }, + { + "epoch": 1.93, + "grad_norm": 6.717343398837638, + "learning_rate": 6.470205761152426e-08, + "loss": 0.9172, + "step": 160479 + }, + { + "epoch": 1.93, + "grad_norm": 9.812838239878287, + "learning_rate": 6.463572842794974e-08, + "loss": 1.0237, + "step": 160482 + }, + { + "epoch": 1.93, + "grad_norm": 15.41228425358874, + "learning_rate": 6.456943315019449e-08, + "loss": 1.371, + "step": 160485 + }, + { + "epoch": 1.93, + "grad_norm": 5.12539284532339, + "learning_rate": 6.450317177848608e-08, + "loss": 0.8718, + "step": 160488 + }, + { + "epoch": 1.93, + "grad_norm": 9.444440176028966, + "learning_rate": 6.443694431304992e-08, + "loss": 1.245, + "step": 160491 + }, + { + "epoch": 1.93, + "grad_norm": 18.64373922104518, + "learning_rate": 6.437075075411025e-08, + "loss": 1.1529, + "step": 160494 + }, + { + "epoch": 1.93, + "grad_norm": 7.80246099230681, + "learning_rate": 6.430459110189579e-08, + "loss": 1.1825, + "step": 160497 + }, + { + "epoch": 1.93, + "grad_norm": 2.5285434766038537, + "learning_rate": 6.42384653566297e-08, + "loss": 1.0798, + "step": 160500 + }, + { + "epoch": 1.93, + "grad_norm": 14.213683320157633, + "learning_rate": 6.417237351853956e-08, + "loss": 0.9853, + "step": 160503 + }, + { + "epoch": 1.93, + "grad_norm": 5.353057421234521, + "learning_rate": 6.410631558784964e-08, + "loss": 0.9685, + "step": 160506 + }, + { + "epoch": 1.93, + "grad_norm": 9.53228303154219, + "learning_rate": 6.404029156478642e-08, + "loss": 1.4349, + "step": 160509 + }, + { + "epoch": 1.93, + "grad_norm": 7.158233150132141, + "learning_rate": 6.397430144957528e-08, + "loss": 1.2702, + "step": 160512 + }, + { + "epoch": 1.93, + "grad_norm": 16.471786101576708, + "learning_rate": 6.39083452424405e-08, + "loss": 1.093, + "step": 160515 + }, + { + "epoch": 1.93, + "grad_norm": 4.849086649881218, + "learning_rate": 6.384242294360743e-08, + "loss": 1.0918, + "step": 160518 + }, + { + "epoch": 1.93, + "grad_norm": 6.751911543062769, + "learning_rate": 6.377653455330146e-08, + "loss": 1.4856, + "step": 160521 + }, + { + "epoch": 1.93, + "grad_norm": 11.588382548274131, + "learning_rate": 6.371068007174685e-08, + "loss": 1.1518, + "step": 160524 + }, + { + "epoch": 1.93, + "grad_norm": 1.8281161509184802, + "learning_rate": 6.364485949916787e-08, + "loss": 0.8396, + "step": 160527 + }, + { + "epoch": 1.93, + "grad_norm": 2.3894877391034894, + "learning_rate": 6.3579072835791e-08, + "loss": 1.3883, + "step": 160530 + }, + { + "epoch": 1.93, + "grad_norm": 5.882064732482286, + "learning_rate": 6.351332008183941e-08, + "loss": 1.4161, + "step": 160533 + }, + { + "epoch": 1.93, + "grad_norm": 6.907705250113026, + "learning_rate": 6.344760123753846e-08, + "loss": 1.1275, + "step": 160536 + }, + { + "epoch": 1.93, + "grad_norm": 9.464543684591407, + "learning_rate": 6.33819163031113e-08, + "loss": 1.2715, + "step": 160539 + }, + { + "epoch": 1.93, + "grad_norm": 8.771433915114503, + "learning_rate": 6.331626527878221e-08, + "loss": 0.9213, + "step": 160542 + }, + { + "epoch": 1.93, + "grad_norm": 7.351396829684625, + "learning_rate": 6.325064816477545e-08, + "loss": 1.0513, + "step": 160545 + }, + { + "epoch": 1.93, + "grad_norm": 11.852353449365221, + "learning_rate": 6.318506496131527e-08, + "loss": 1.1648, + "step": 160548 + }, + { + "epoch": 1.93, + "grad_norm": 10.934018534968681, + "learning_rate": 6.311951566862485e-08, + "loss": 0.8431, + "step": 160551 + }, + { + "epoch": 1.93, + "grad_norm": 7.1452914594135075, + "learning_rate": 6.305400028692843e-08, + "loss": 1.1002, + "step": 160554 + }, + { + "epoch": 1.93, + "grad_norm": 8.1119614383343, + "learning_rate": 6.29885188164503e-08, + "loss": 1.0152, + "step": 160557 + }, + { + "epoch": 1.93, + "grad_norm": 4.5070020292605095, + "learning_rate": 6.292307125741137e-08, + "loss": 1.1952, + "step": 160560 + }, + { + "epoch": 1.93, + "grad_norm": 12.371187538215485, + "learning_rate": 6.285765761003815e-08, + "loss": 1.0755, + "step": 160563 + }, + { + "epoch": 1.93, + "grad_norm": 9.859321986143003, + "learning_rate": 6.279227787455155e-08, + "loss": 1.0478, + "step": 160566 + }, + { + "epoch": 1.93, + "grad_norm": 9.43802401727055, + "learning_rate": 6.272693205117586e-08, + "loss": 1.0759, + "step": 160569 + }, + { + "epoch": 1.93, + "grad_norm": 6.699745827591927, + "learning_rate": 6.266162014013421e-08, + "loss": 1.0871, + "step": 160572 + }, + { + "epoch": 1.93, + "grad_norm": 9.237947124073617, + "learning_rate": 6.259634214164867e-08, + "loss": 0.9743, + "step": 160575 + }, + { + "epoch": 1.93, + "grad_norm": 6.528757381082502, + "learning_rate": 6.253109805594237e-08, + "loss": 1.2211, + "step": 160578 + }, + { + "epoch": 1.93, + "grad_norm": 6.094999987618676, + "learning_rate": 6.246588788323848e-08, + "loss": 1.0393, + "step": 160581 + }, + { + "epoch": 1.93, + "grad_norm": 3.9648686174869874, + "learning_rate": 6.240071162375794e-08, + "loss": 1.0192, + "step": 160584 + }, + { + "epoch": 1.93, + "grad_norm": 5.222811345564137, + "learning_rate": 6.2335569277725e-08, + "loss": 1.1169, + "step": 160587 + }, + { + "epoch": 1.93, + "grad_norm": 16.394192425421004, + "learning_rate": 6.227046084536059e-08, + "loss": 1.2413, + "step": 160590 + }, + { + "epoch": 1.93, + "grad_norm": 6.87803711822694, + "learning_rate": 6.220538632688789e-08, + "loss": 0.9134, + "step": 160593 + }, + { + "epoch": 1.93, + "grad_norm": 8.27550662502701, + "learning_rate": 6.214034572252892e-08, + "loss": 0.8254, + "step": 160596 + }, + { + "epoch": 1.93, + "grad_norm": 4.457282999102754, + "learning_rate": 6.207533903250573e-08, + "loss": 1.0229, + "step": 160599 + }, + { + "epoch": 1.93, + "grad_norm": 10.774591436419167, + "learning_rate": 6.201036625703927e-08, + "loss": 1.1013, + "step": 160602 + }, + { + "epoch": 1.93, + "grad_norm": 6.957008868798939, + "learning_rate": 6.194542739635156e-08, + "loss": 1.1923, + "step": 160605 + }, + { + "epoch": 1.93, + "grad_norm": 14.238122088271247, + "learning_rate": 6.188052245066467e-08, + "loss": 1.4698, + "step": 160608 + }, + { + "epoch": 1.93, + "grad_norm": 5.164581297863062, + "learning_rate": 6.181565142019951e-08, + "loss": 1.583, + "step": 160611 + }, + { + "epoch": 1.93, + "grad_norm": 6.771351689517675, + "learning_rate": 6.175081430517816e-08, + "loss": 1.1638, + "step": 160614 + }, + { + "epoch": 1.93, + "grad_norm": 30.295956449833806, + "learning_rate": 6.168601110582151e-08, + "loss": 1.1242, + "step": 160617 + }, + { + "epoch": 1.93, + "grad_norm": 2.922329475362274, + "learning_rate": 6.162124182235052e-08, + "loss": 1.1625, + "step": 160620 + }, + { + "epoch": 1.93, + "grad_norm": 13.028351973866986, + "learning_rate": 6.155650645498724e-08, + "loss": 1.0748, + "step": 160623 + }, + { + "epoch": 1.93, + "grad_norm": 10.435011355738235, + "learning_rate": 6.149180500395035e-08, + "loss": 0.9192, + "step": 160626 + }, + { + "epoch": 1.93, + "grad_norm": 13.659874665558695, + "learning_rate": 6.142713746946417e-08, + "loss": 0.8953, + "step": 160629 + }, + { + "epoch": 1.93, + "grad_norm": 5.68250385786433, + "learning_rate": 6.136250385174514e-08, + "loss": 0.8246, + "step": 160632 + }, + { + "epoch": 1.93, + "grad_norm": 13.82248533288292, + "learning_rate": 6.129790415101755e-08, + "loss": 1.0369, + "step": 160635 + }, + { + "epoch": 1.93, + "grad_norm": 5.0195826190875605, + "learning_rate": 6.123333836749901e-08, + "loss": 1.2964, + "step": 160638 + }, + { + "epoch": 1.93, + "grad_norm": 12.805449782426463, + "learning_rate": 6.116880650141266e-08, + "loss": 1.3346, + "step": 160641 + }, + { + "epoch": 1.93, + "grad_norm": 10.711346079212854, + "learning_rate": 6.110430855297611e-08, + "loss": 1.3773, + "step": 160644 + }, + { + "epoch": 1.93, + "grad_norm": 4.982457329979819, + "learning_rate": 6.10398445224103e-08, + "loss": 1.1121, + "step": 160647 + }, + { + "epoch": 1.93, + "grad_norm": 10.024518357406475, + "learning_rate": 6.097541440993615e-08, + "loss": 1.183, + "step": 160650 + }, + { + "epoch": 1.93, + "grad_norm": 14.900208303909137, + "learning_rate": 6.091101821577239e-08, + "loss": 1.2881, + "step": 160653 + }, + { + "epoch": 1.93, + "grad_norm": 6.266662147790123, + "learning_rate": 6.084665594014105e-08, + "loss": 1.0735, + "step": 160656 + }, + { + "epoch": 1.93, + "grad_norm": 12.802818681711436, + "learning_rate": 6.078232758325753e-08, + "loss": 1.1176, + "step": 160659 + }, + { + "epoch": 1.93, + "grad_norm": 4.344867819474356, + "learning_rate": 6.071803314534608e-08, + "loss": 1.3227, + "step": 160662 + }, + { + "epoch": 1.93, + "grad_norm": 6.756884524553612, + "learning_rate": 6.065377262662209e-08, + "loss": 0.9854, + "step": 160665 + }, + { + "epoch": 1.93, + "grad_norm": 3.3534422732010074, + "learning_rate": 6.05895460273076e-08, + "loss": 0.8214, + "step": 160668 + }, + { + "epoch": 1.93, + "grad_norm": 13.410806279597919, + "learning_rate": 6.052535334762022e-08, + "loss": 1.333, + "step": 160671 + }, + { + "epoch": 1.93, + "grad_norm": 3.3028756863291777, + "learning_rate": 6.046119458777978e-08, + "loss": 0.9957, + "step": 160674 + }, + { + "epoch": 1.93, + "grad_norm": 12.131232715641444, + "learning_rate": 6.039706974800608e-08, + "loss": 0.9784, + "step": 160677 + }, + { + "epoch": 1.93, + "grad_norm": 9.038137022734874, + "learning_rate": 6.033297882851563e-08, + "loss": 1.2441, + "step": 160680 + }, + { + "epoch": 1.93, + "grad_norm": 10.746102540157722, + "learning_rate": 6.026892182952937e-08, + "loss": 1.0187, + "step": 160683 + }, + { + "epoch": 1.93, + "grad_norm": 4.318052557974446, + "learning_rate": 6.020489875126489e-08, + "loss": 1.3174, + "step": 160686 + }, + { + "epoch": 1.93, + "grad_norm": 11.687523631745439, + "learning_rate": 6.014090959393982e-08, + "loss": 1.1887, + "step": 160689 + }, + { + "epoch": 1.93, + "grad_norm": 8.654916110028875, + "learning_rate": 6.007695435777394e-08, + "loss": 1.2258, + "step": 160692 + }, + { + "epoch": 1.93, + "grad_norm": 4.0223951006382155, + "learning_rate": 6.0013033042986e-08, + "loss": 1.141, + "step": 160695 + }, + { + "epoch": 1.93, + "grad_norm": 5.446510764140058, + "learning_rate": 5.994914564979248e-08, + "loss": 1.2412, + "step": 160698 + }, + { + "epoch": 1.93, + "grad_norm": 6.510805358637153, + "learning_rate": 5.988529217841211e-08, + "loss": 1.3421, + "step": 160701 + }, + { + "epoch": 1.93, + "grad_norm": 9.094340107792062, + "learning_rate": 5.982147262906246e-08, + "loss": 1.1034, + "step": 160704 + }, + { + "epoch": 1.93, + "grad_norm": 52.93568578632741, + "learning_rate": 5.975768700196227e-08, + "loss": 0.872, + "step": 160707 + }, + { + "epoch": 1.93, + "grad_norm": 2.6672025868571754, + "learning_rate": 5.969393529732914e-08, + "loss": 1.1741, + "step": 160710 + }, + { + "epoch": 1.93, + "grad_norm": 10.423197843573107, + "learning_rate": 5.963021751537956e-08, + "loss": 1.3976, + "step": 160713 + }, + { + "epoch": 1.93, + "grad_norm": 34.085535710807825, + "learning_rate": 5.9566533656331136e-08, + "loss": 1.155, + "step": 160716 + }, + { + "epoch": 1.93, + "grad_norm": 27.868649347979524, + "learning_rate": 5.9502883720401475e-08, + "loss": 1.0117, + "step": 160719 + }, + { + "epoch": 1.93, + "grad_norm": 7.770495338058433, + "learning_rate": 5.943926770780817e-08, + "loss": 1.0299, + "step": 160722 + }, + { + "epoch": 1.93, + "grad_norm": 22.729245570233836, + "learning_rate": 5.937568561876883e-08, + "loss": 0.8776, + "step": 160725 + }, + { + "epoch": 1.93, + "grad_norm": 6.89471063862301, + "learning_rate": 5.931213745349884e-08, + "loss": 1.0882, + "step": 160728 + }, + { + "epoch": 1.93, + "grad_norm": 3.432499154560755, + "learning_rate": 5.924862321221692e-08, + "loss": 1.0102, + "step": 160731 + }, + { + "epoch": 1.93, + "grad_norm": 10.44223871830409, + "learning_rate": 5.918514289513733e-08, + "loss": 1.1256, + "step": 160734 + }, + { + "epoch": 1.93, + "grad_norm": 20.417567529693805, + "learning_rate": 5.9121696502479897e-08, + "loss": 1.2067, + "step": 160737 + }, + { + "epoch": 1.93, + "grad_norm": 7.745726950168749, + "learning_rate": 5.905828403445779e-08, + "loss": 1.1244, + "step": 160740 + }, + { + "epoch": 1.93, + "grad_norm": 15.205052732581072, + "learning_rate": 5.899490549128972e-08, + "loss": 1.0072, + "step": 160743 + }, + { + "epoch": 1.93, + "grad_norm": 6.577012737727226, + "learning_rate": 5.893156087319107e-08, + "loss": 0.8596, + "step": 160746 + }, + { + "epoch": 1.93, + "grad_norm": 8.660234488243233, + "learning_rate": 5.886825018037834e-08, + "loss": 1.1643, + "step": 160749 + }, + { + "epoch": 1.93, + "grad_norm": 10.845779959390654, + "learning_rate": 5.880497341306801e-08, + "loss": 1.2124, + "step": 160752 + }, + { + "epoch": 1.93, + "grad_norm": 9.993215370967471, + "learning_rate": 5.874173057147547e-08, + "loss": 1.4448, + "step": 160755 + }, + { + "epoch": 1.93, + "grad_norm": 7.130120154209553, + "learning_rate": 5.8678521655816115e-08, + "loss": 1.4019, + "step": 160758 + }, + { + "epoch": 1.93, + "grad_norm": 10.201677921087507, + "learning_rate": 5.8615346666306414e-08, + "loss": 0.9242, + "step": 160761 + }, + { + "epoch": 1.93, + "grad_norm": 5.735434630197606, + "learning_rate": 5.855220560316177e-08, + "loss": 1.2509, + "step": 160764 + }, + { + "epoch": 1.93, + "grad_norm": 5.136502068579489, + "learning_rate": 5.848909846659645e-08, + "loss": 1.1617, + "step": 160767 + }, + { + "epoch": 1.93, + "grad_norm": 3.9565033745807177, + "learning_rate": 5.8426025256828054e-08, + "loss": 1.379, + "step": 160770 + }, + { + "epoch": 1.93, + "grad_norm": 2.3993699440557115, + "learning_rate": 5.8362985974070866e-08, + "loss": 1.2107, + "step": 160773 + }, + { + "epoch": 1.93, + "grad_norm": 8.139625301765603, + "learning_rate": 5.8299980618539145e-08, + "loss": 0.8803, + "step": 160776 + }, + { + "epoch": 1.93, + "grad_norm": 7.018396197439242, + "learning_rate": 5.823700919044828e-08, + "loss": 1.3196, + "step": 160779 + }, + { + "epoch": 1.93, + "grad_norm": 2.541432848685442, + "learning_rate": 5.8174071690014765e-08, + "loss": 1.1102, + "step": 160782 + }, + { + "epoch": 1.93, + "grad_norm": 3.520624815141327, + "learning_rate": 5.811116811745066e-08, + "loss": 0.9082, + "step": 160785 + }, + { + "epoch": 1.93, + "grad_norm": 12.003941363686566, + "learning_rate": 5.804829847297355e-08, + "loss": 1.0513, + "step": 160788 + }, + { + "epoch": 1.93, + "grad_norm": 11.373412135898135, + "learning_rate": 5.7985462756795504e-08, + "loss": 1.3055, + "step": 160791 + }, + { + "epoch": 1.93, + "grad_norm": 31.139279260584985, + "learning_rate": 5.792266096913302e-08, + "loss": 1.3313, + "step": 160794 + }, + { + "epoch": 1.93, + "grad_norm": 7.314860251310442, + "learning_rate": 5.785989311019813e-08, + "loss": 1.1782, + "step": 160797 + }, + { + "epoch": 1.93, + "grad_norm": 17.262135752992894, + "learning_rate": 5.779715918020623e-08, + "loss": 1.4537, + "step": 160800 + }, + { + "epoch": 1.93, + "grad_norm": 5.7778515967267285, + "learning_rate": 5.773445917937159e-08, + "loss": 1.0021, + "step": 160803 + }, + { + "epoch": 1.93, + "grad_norm": 2.3199390798628223, + "learning_rate": 5.76717931079096e-08, + "loss": 1.1239, + "step": 160806 + }, + { + "epoch": 1.93, + "grad_norm": 5.4711107513556945, + "learning_rate": 5.76091609660312e-08, + "loss": 1.0622, + "step": 160809 + }, + { + "epoch": 1.93, + "grad_norm": 11.055213059514411, + "learning_rate": 5.754656275395176e-08, + "loss": 1.026, + "step": 160812 + }, + { + "epoch": 1.93, + "grad_norm": 11.935446634902366, + "learning_rate": 5.7483998471884463e-08, + "loss": 1.1593, + "step": 160815 + }, + { + "epoch": 1.93, + "grad_norm": 4.534160588134483, + "learning_rate": 5.7421468120043566e-08, + "loss": 0.8623, + "step": 160818 + }, + { + "epoch": 1.93, + "grad_norm": 6.742698306723189, + "learning_rate": 5.735897169864113e-08, + "loss": 1.2325, + "step": 160821 + }, + { + "epoch": 1.93, + "grad_norm": 2.835703075953832, + "learning_rate": 5.7296509207891425e-08, + "loss": 1.2353, + "step": 160824 + }, + { + "epoch": 1.93, + "grad_norm": 5.376612476876809, + "learning_rate": 5.7234080648007615e-08, + "loss": 1.1349, + "step": 160827 + }, + { + "epoch": 1.93, + "grad_norm": 8.856440576024763, + "learning_rate": 5.717168601920287e-08, + "loss": 1.2268, + "step": 160830 + }, + { + "epoch": 1.93, + "grad_norm": 9.361906431118157, + "learning_rate": 5.710932532168922e-08, + "loss": 1.0671, + "step": 160833 + }, + { + "epoch": 1.93, + "grad_norm": 7.014522132432206, + "learning_rate": 5.704699855567986e-08, + "loss": 1.5101, + "step": 160836 + }, + { + "epoch": 1.93, + "grad_norm": 5.424398109731168, + "learning_rate": 5.6984705721387924e-08, + "loss": 1.5221, + "step": 160839 + }, + { + "epoch": 1.93, + "grad_norm": 4.828602482817569, + "learning_rate": 5.692244681902548e-08, + "loss": 1.196, + "step": 160842 + }, + { + "epoch": 1.93, + "grad_norm": 11.506509622480584, + "learning_rate": 5.686022184880568e-08, + "loss": 1.2781, + "step": 160845 + }, + { + "epoch": 1.93, + "grad_norm": 7.222124029781691, + "learning_rate": 5.6798030810940595e-08, + "loss": 1.1058, + "step": 160848 + }, + { + "epoch": 1.93, + "grad_norm": 4.208163092702776, + "learning_rate": 5.6735873705641154e-08, + "loss": 1.1087, + "step": 160851 + }, + { + "epoch": 1.93, + "grad_norm": 4.395709490699958, + "learning_rate": 5.667375053312274e-08, + "loss": 0.9093, + "step": 160854 + }, + { + "epoch": 1.93, + "grad_norm": 7.549941586590902, + "learning_rate": 5.661166129359297e-08, + "loss": 1.1044, + "step": 160857 + }, + { + "epoch": 1.93, + "grad_norm": 9.273538873161177, + "learning_rate": 5.654960598726722e-08, + "loss": 1.1649, + "step": 160860 + }, + { + "epoch": 1.93, + "grad_norm": 12.44585218638559, + "learning_rate": 5.648758461435644e-08, + "loss": 1.3283, + "step": 160863 + }, + { + "epoch": 1.93, + "grad_norm": 5.5337851532489974, + "learning_rate": 5.642559717507046e-08, + "loss": 1.0189, + "step": 160866 + }, + { + "epoch": 1.93, + "grad_norm": 2.4256816084665496, + "learning_rate": 5.636364366962355e-08, + "loss": 1.1705, + "step": 160869 + }, + { + "epoch": 1.93, + "grad_norm": 6.393483409670859, + "learning_rate": 5.630172409822554e-08, + "loss": 0.907, + "step": 160872 + }, + { + "epoch": 1.93, + "grad_norm": 5.358939951054728, + "learning_rate": 5.623983846108738e-08, + "loss": 1.4508, + "step": 160875 + }, + { + "epoch": 1.93, + "grad_norm": 2.1990636203377396, + "learning_rate": 5.6177986758422234e-08, + "loss": 1.4831, + "step": 160878 + }, + { + "epoch": 1.93, + "grad_norm": 5.683943769406089, + "learning_rate": 5.611616899043881e-08, + "loss": 0.9648, + "step": 160881 + }, + { + "epoch": 1.93, + "grad_norm": 18.529361167257377, + "learning_rate": 5.6054385157349175e-08, + "loss": 1.2225, + "step": 160884 + }, + { + "epoch": 1.93, + "grad_norm": 5.187438760389947, + "learning_rate": 5.599263525936427e-08, + "loss": 1.0531, + "step": 160887 + }, + { + "epoch": 1.93, + "grad_norm": 78.23784799832094, + "learning_rate": 5.593091929669503e-08, + "loss": 1.3552, + "step": 160890 + }, + { + "epoch": 1.93, + "grad_norm": 13.891884191273796, + "learning_rate": 5.586923726955129e-08, + "loss": 1.1997, + "step": 160893 + }, + { + "epoch": 1.93, + "grad_norm": 8.6220355172526, + "learning_rate": 5.5807589178143996e-08, + "loss": 1.1461, + "step": 160896 + }, + { + "epoch": 1.93, + "grad_norm": 7.567354887496073, + "learning_rate": 5.574597502268408e-08, + "loss": 1.2407, + "step": 160899 + }, + { + "epoch": 1.93, + "grad_norm": 7.251763191882903, + "learning_rate": 5.568439480338139e-08, + "loss": 1.0921, + "step": 160902 + }, + { + "epoch": 1.93, + "grad_norm": 12.231462240496695, + "learning_rate": 5.5622848520444637e-08, + "loss": 1.281, + "step": 160905 + }, + { + "epoch": 1.93, + "grad_norm": 9.684220933896372, + "learning_rate": 5.5561336174085876e-08, + "loss": 1.0191, + "step": 160908 + }, + { + "epoch": 1.93, + "grad_norm": 9.840201384380459, + "learning_rate": 5.549985776451383e-08, + "loss": 1.0373, + "step": 160911 + }, + { + "epoch": 1.93, + "grad_norm": 11.053153967590898, + "learning_rate": 5.543841329193944e-08, + "loss": 1.2716, + "step": 160914 + }, + { + "epoch": 1.93, + "grad_norm": 9.93796220800936, + "learning_rate": 5.537700275657143e-08, + "loss": 1.1656, + "step": 160917 + }, + { + "epoch": 1.94, + "grad_norm": 9.957925741065262, + "learning_rate": 5.531562615862074e-08, + "loss": 1.3221, + "step": 160920 + }, + { + "epoch": 1.94, + "grad_norm": 4.952266611832197, + "learning_rate": 5.525428349829387e-08, + "loss": 1.0625, + "step": 160923 + }, + { + "epoch": 1.94, + "grad_norm": 16.518959439402586, + "learning_rate": 5.519297477580399e-08, + "loss": 1.5429, + "step": 160926 + }, + { + "epoch": 1.94, + "grad_norm": 7.773425004678281, + "learning_rate": 5.513169999135759e-08, + "loss": 0.7839, + "step": 160929 + }, + { + "epoch": 1.94, + "grad_norm": 7.531973372631437, + "learning_rate": 5.50704591451634e-08, + "loss": 1.2735, + "step": 160932 + }, + { + "epoch": 1.94, + "grad_norm": 7.402840808652716, + "learning_rate": 5.500925223743236e-08, + "loss": 1.0998, + "step": 160935 + }, + { + "epoch": 1.94, + "grad_norm": 3.8825330177267956, + "learning_rate": 5.49480792683732e-08, + "loss": 1.0033, + "step": 160938 + }, + { + "epoch": 1.94, + "grad_norm": 12.890907925935172, + "learning_rate": 5.488694023819463e-08, + "loss": 1.2163, + "step": 160941 + }, + { + "epoch": 1.94, + "grad_norm": 7.661251252924834, + "learning_rate": 5.4825835147103154e-08, + "loss": 1.1458, + "step": 160944 + }, + { + "epoch": 1.94, + "grad_norm": 2.6841480278736807, + "learning_rate": 5.476476399530972e-08, + "loss": 1.0985, + "step": 160947 + }, + { + "epoch": 1.94, + "grad_norm": 4.854321969901911, + "learning_rate": 5.4703726783020825e-08, + "loss": 1.0857, + "step": 160950 + }, + { + "epoch": 1.94, + "grad_norm": 14.736396200789164, + "learning_rate": 5.4642723510447415e-08, + "loss": 1.4982, + "step": 160953 + }, + { + "epoch": 1.94, + "grad_norm": 6.238405418273441, + "learning_rate": 5.4581754177794875e-08, + "loss": 1.524, + "step": 160956 + }, + { + "epoch": 1.94, + "grad_norm": 8.920626935534022, + "learning_rate": 5.4520818785273046e-08, + "loss": 1.132, + "step": 160959 + }, + { + "epoch": 1.94, + "grad_norm": 8.378703337657251, + "learning_rate": 5.445991733308953e-08, + "loss": 0.96, + "step": 160962 + }, + { + "epoch": 1.94, + "grad_norm": 3.0469528648965984, + "learning_rate": 5.4399049821450834e-08, + "loss": 1.5814, + "step": 160965 + }, + { + "epoch": 1.94, + "grad_norm": 12.252776059198002, + "learning_rate": 5.433821625056679e-08, + "loss": 1.0336, + "step": 160968 + }, + { + "epoch": 1.94, + "grad_norm": 9.48192291126622, + "learning_rate": 5.4277416620643894e-08, + "loss": 1.0695, + "step": 160971 + }, + { + "epoch": 1.94, + "grad_norm": 25.595722196393176, + "learning_rate": 5.421665093188977e-08, + "loss": 1.1816, + "step": 160974 + }, + { + "epoch": 1.94, + "grad_norm": 6.417098922554813, + "learning_rate": 5.415591918451091e-08, + "loss": 1.2805, + "step": 160977 + }, + { + "epoch": 1.94, + "grad_norm": 8.922765727397836, + "learning_rate": 5.409522137871603e-08, + "loss": 1.2026, + "step": 160980 + }, + { + "epoch": 1.94, + "grad_norm": 2.594815072175979, + "learning_rate": 5.403455751471276e-08, + "loss": 1.4197, + "step": 160983 + }, + { + "epoch": 1.94, + "grad_norm": 4.545680561320521, + "learning_rate": 5.3973927592705366e-08, + "loss": 0.9548, + "step": 160986 + }, + { + "epoch": 1.94, + "grad_norm": 12.209404769220226, + "learning_rate": 5.391333161290369e-08, + "loss": 1.3271, + "step": 160989 + }, + { + "epoch": 1.94, + "grad_norm": 8.073126550778642, + "learning_rate": 5.3852769575512e-08, + "loss": 1.0369, + "step": 160992 + }, + { + "epoch": 1.94, + "grad_norm": 6.714182679867927, + "learning_rate": 5.3792241480739025e-08, + "loss": 1.2972, + "step": 160995 + }, + { + "epoch": 1.94, + "grad_norm": 3.9773830626412194, + "learning_rate": 5.373174732879016e-08, + "loss": 1.0148, + "step": 160998 + }, + { + "epoch": 1.94, + "grad_norm": 7.797101703769475, + "learning_rate": 5.3671287119871904e-08, + "loss": 1.458, + "step": 161001 + }, + { + "epoch": 1.94, + "grad_norm": 4.718319956679717, + "learning_rate": 5.361086085419187e-08, + "loss": 1.1884, + "step": 161004 + }, + { + "epoch": 1.94, + "grad_norm": 6.317385563641371, + "learning_rate": 5.355046853195545e-08, + "loss": 1.2824, + "step": 161007 + }, + { + "epoch": 1.94, + "grad_norm": 7.627163604958972, + "learning_rate": 5.349011015336803e-08, + "loss": 1.1726, + "step": 161010 + }, + { + "epoch": 1.94, + "grad_norm": 3.8723617353274937, + "learning_rate": 5.3429785718635e-08, + "loss": 1.2263, + "step": 161013 + }, + { + "epoch": 1.94, + "grad_norm": 4.119944024427981, + "learning_rate": 5.336949522796508e-08, + "loss": 0.6929, + "step": 161016 + }, + { + "epoch": 1.94, + "grad_norm": 15.817374529437421, + "learning_rate": 5.330923868156146e-08, + "loss": 1.2616, + "step": 161019 + }, + { + "epoch": 1.94, + "grad_norm": 4.825217815744139, + "learning_rate": 5.324901607963062e-08, + "loss": 1.11, + "step": 161022 + }, + { + "epoch": 1.94, + "grad_norm": 2.4492891226577806, + "learning_rate": 5.3188827422379076e-08, + "loss": 1.218, + "step": 161025 + }, + { + "epoch": 1.94, + "grad_norm": 7.549191999819996, + "learning_rate": 5.312867271000999e-08, + "loss": 1.2695, + "step": 161028 + }, + { + "epoch": 1.94, + "grad_norm": 17.178607566696495, + "learning_rate": 5.3068551942729865e-08, + "loss": 1.0799, + "step": 161031 + }, + { + "epoch": 1.94, + "grad_norm": 12.717118211099162, + "learning_rate": 5.30084651207452e-08, + "loss": 1.174, + "step": 161034 + }, + { + "epoch": 1.94, + "grad_norm": 4.029751514740891, + "learning_rate": 5.294841224425917e-08, + "loss": 1.1557, + "step": 161037 + }, + { + "epoch": 1.94, + "grad_norm": 2.5049788944912357, + "learning_rate": 5.288839331347606e-08, + "loss": 0.9237, + "step": 161040 + }, + { + "epoch": 1.94, + "grad_norm": 7.2140973628710805, + "learning_rate": 5.282840832860347e-08, + "loss": 1.3118, + "step": 161043 + }, + { + "epoch": 1.94, + "grad_norm": 7.8058312458846215, + "learning_rate": 5.2768457289844586e-08, + "loss": 1.329, + "step": 161046 + }, + { + "epoch": 1.94, + "grad_norm": 8.300322269347266, + "learning_rate": 5.270854019740368e-08, + "loss": 1.2789, + "step": 161049 + }, + { + "epoch": 1.94, + "grad_norm": 5.200058198786432, + "learning_rate": 5.264865705148503e-08, + "loss": 1.2716, + "step": 161052 + }, + { + "epoch": 1.94, + "grad_norm": 9.803869372738413, + "learning_rate": 5.258880785229292e-08, + "loss": 1.278, + "step": 161055 + }, + { + "epoch": 1.94, + "grad_norm": 10.321572282054442, + "learning_rate": 5.2528992600032744e-08, + "loss": 1.2543, + "step": 161058 + }, + { + "epoch": 1.94, + "grad_norm": 13.372709288506357, + "learning_rate": 5.2469211294908784e-08, + "loss": 1.3174, + "step": 161061 + }, + { + "epoch": 1.94, + "grad_norm": 7.92999751444977, + "learning_rate": 5.240946393712421e-08, + "loss": 0.9156, + "step": 161064 + }, + { + "epoch": 1.94, + "grad_norm": 11.171965365895074, + "learning_rate": 5.2349750526883294e-08, + "loss": 1.3502, + "step": 161067 + }, + { + "epoch": 1.94, + "grad_norm": 8.229099801253293, + "learning_rate": 5.22900710643881e-08, + "loss": 1.0679, + "step": 161070 + }, + { + "epoch": 1.94, + "grad_norm": 6.6751963606638505, + "learning_rate": 5.223042554984403e-08, + "loss": 1.2944, + "step": 161073 + }, + { + "epoch": 1.94, + "grad_norm": 7.068746068872736, + "learning_rate": 5.217081398345536e-08, + "loss": 1.15, + "step": 161076 + }, + { + "epoch": 1.94, + "grad_norm": 3.4604066723445235, + "learning_rate": 5.2111236365424147e-08, + "loss": 0.9788, + "step": 161079 + }, + { + "epoch": 1.94, + "grad_norm": 13.505903576591388, + "learning_rate": 5.205169269595356e-08, + "loss": 1.2382, + "step": 161082 + }, + { + "epoch": 1.94, + "grad_norm": 7.000851709853805, + "learning_rate": 5.1992182975247886e-08, + "loss": 1.0815, + "step": 161085 + }, + { + "epoch": 1.94, + "grad_norm": 10.12617437889563, + "learning_rate": 5.193270720351029e-08, + "loss": 1.2668, + "step": 161088 + }, + { + "epoch": 1.94, + "grad_norm": 11.38721099730476, + "learning_rate": 5.187326538094173e-08, + "loss": 0.9333, + "step": 161091 + }, + { + "epoch": 1.94, + "grad_norm": 10.59266887149649, + "learning_rate": 5.181385750774759e-08, + "loss": 1.0584, + "step": 161094 + }, + { + "epoch": 1.94, + "grad_norm": 5.592362995797709, + "learning_rate": 5.175448358412771e-08, + "loss": 1.0316, + "step": 161097 + }, + { + "epoch": 1.94, + "grad_norm": 4.316091788124594, + "learning_rate": 5.169514361028749e-08, + "loss": 1.108, + "step": 161100 + }, + { + "epoch": 1.94, + "grad_norm": 14.44365987451126, + "learning_rate": 5.1635837586428985e-08, + "loss": 1.0883, + "step": 161103 + }, + { + "epoch": 1.94, + "grad_norm": 8.527523769296774, + "learning_rate": 5.157656551275314e-08, + "loss": 1.2719, + "step": 161106 + }, + { + "epoch": 1.94, + "grad_norm": 4.180242005631702, + "learning_rate": 5.1517327389464246e-08, + "loss": 1.1269, + "step": 161109 + }, + { + "epoch": 1.94, + "grad_norm": 4.5581131452525465, + "learning_rate": 5.1458123216762136e-08, + "loss": 1.4396, + "step": 161112 + }, + { + "epoch": 1.94, + "grad_norm": 3.2600275228241102, + "learning_rate": 5.1398952994849984e-08, + "loss": 1.18, + "step": 161115 + }, + { + "epoch": 1.94, + "grad_norm": 6.8725367046145065, + "learning_rate": 5.133981672392985e-08, + "loss": 0.9557, + "step": 161118 + }, + { + "epoch": 1.94, + "grad_norm": 4.133468014026863, + "learning_rate": 5.128071440420379e-08, + "loss": 1.0632, + "step": 161121 + }, + { + "epoch": 1.94, + "grad_norm": 7.895527821743936, + "learning_rate": 5.1221646035872765e-08, + "loss": 0.7513, + "step": 161124 + }, + { + "epoch": 1.94, + "grad_norm": 9.882551245961382, + "learning_rate": 5.1162611619138823e-08, + "loss": 0.9499, + "step": 161127 + }, + { + "epoch": 1.94, + "grad_norm": 13.77322827023837, + "learning_rate": 5.110361115420403e-08, + "loss": 1.073, + "step": 161130 + }, + { + "epoch": 1.94, + "grad_norm": 4.946310779061244, + "learning_rate": 5.104464464126824e-08, + "loss": 1.5295, + "step": 161133 + }, + { + "epoch": 1.94, + "grad_norm": 6.789556619685345, + "learning_rate": 5.098571208053349e-08, + "loss": 0.7558, + "step": 161136 + }, + { + "epoch": 1.94, + "grad_norm": 11.092374364707759, + "learning_rate": 5.0926813472200744e-08, + "loss": 1.1908, + "step": 161139 + }, + { + "epoch": 1.94, + "grad_norm": 8.105322656237934, + "learning_rate": 5.086794881647206e-08, + "loss": 0.9217, + "step": 161142 + }, + { + "epoch": 1.94, + "grad_norm": 19.016462070376846, + "learning_rate": 5.080911811354616e-08, + "loss": 1.1043, + "step": 161145 + }, + { + "epoch": 1.94, + "grad_norm": 6.616713170861764, + "learning_rate": 5.075032136362623e-08, + "loss": 1.1544, + "step": 161148 + }, + { + "epoch": 1.94, + "grad_norm": 6.965239754471692, + "learning_rate": 5.069155856691099e-08, + "loss": 1.2054, + "step": 161151 + }, + { + "epoch": 1.94, + "grad_norm": 7.510863648378629, + "learning_rate": 5.063282972360251e-08, + "loss": 1.0801, + "step": 161154 + }, + { + "epoch": 1.94, + "grad_norm": 8.630890011414598, + "learning_rate": 5.05741348338995e-08, + "loss": 0.6219, + "step": 161157 + }, + { + "epoch": 1.94, + "grad_norm": 23.160120141820983, + "learning_rate": 5.051547389800404e-08, + "loss": 1.2546, + "step": 161160 + }, + { + "epoch": 1.94, + "grad_norm": 13.952636332737148, + "learning_rate": 5.0456846916114854e-08, + "loss": 1.2888, + "step": 161163 + }, + { + "epoch": 1.94, + "grad_norm": 8.48408111041878, + "learning_rate": 5.039825388843178e-08, + "loss": 1.022, + "step": 161166 + }, + { + "epoch": 1.94, + "grad_norm": 9.631733725004082, + "learning_rate": 5.033969481515688e-08, + "loss": 1.0283, + "step": 161169 + }, + { + "epoch": 1.94, + "grad_norm": 8.615836586546543, + "learning_rate": 5.0281169696488885e-08, + "loss": 1.0172, + "step": 161172 + }, + { + "epoch": 1.94, + "grad_norm": 8.246360861866346, + "learning_rate": 5.0222678532627634e-08, + "loss": 0.8572, + "step": 161175 + }, + { + "epoch": 1.94, + "grad_norm": 74.0667758580066, + "learning_rate": 5.016422132377186e-08, + "loss": 1.026, + "step": 161178 + }, + { + "epoch": 1.94, + "grad_norm": 7.3884681915929855, + "learning_rate": 5.010579807012139e-08, + "loss": 0.9913, + "step": 161181 + }, + { + "epoch": 1.94, + "grad_norm": 2.754365923238484, + "learning_rate": 5.004740877187608e-08, + "loss": 1.5538, + "step": 161184 + }, + { + "epoch": 1.94, + "grad_norm": 3.578955367350747, + "learning_rate": 4.998905342923466e-08, + "loss": 1.1896, + "step": 161187 + }, + { + "epoch": 1.94, + "grad_norm": 3.963751180777308, + "learning_rate": 4.9930732042396957e-08, + "loss": 1.1335, + "step": 161190 + }, + { + "epoch": 1.94, + "grad_norm": 2.6477983208280644, + "learning_rate": 4.987244461156171e-08, + "loss": 0.8882, + "step": 161193 + }, + { + "epoch": 1.94, + "grad_norm": 4.365514273195866, + "learning_rate": 4.9814191136928755e-08, + "loss": 1.107, + "step": 161196 + }, + { + "epoch": 1.94, + "grad_norm": 5.996898092557522, + "learning_rate": 4.975597161869572e-08, + "loss": 1.1098, + "step": 161199 + }, + { + "epoch": 1.94, + "grad_norm": 3.1555779384903517, + "learning_rate": 4.969778605706021e-08, + "loss": 1.1456, + "step": 161202 + }, + { + "epoch": 1.94, + "grad_norm": 10.903340287746595, + "learning_rate": 4.9639634452222085e-08, + "loss": 1.2211, + "step": 161205 + }, + { + "epoch": 1.94, + "grad_norm": 12.057050092318779, + "learning_rate": 4.958151680438117e-08, + "loss": 1.3195, + "step": 161208 + }, + { + "epoch": 1.94, + "grad_norm": 5.335173804905756, + "learning_rate": 4.9523433113733974e-08, + "loss": 1.1078, + "step": 161211 + }, + { + "epoch": 1.94, + "grad_norm": 10.120030766809341, + "learning_rate": 4.946538338047813e-08, + "loss": 1.0723, + "step": 161214 + }, + { + "epoch": 1.94, + "grad_norm": 22.397146797367803, + "learning_rate": 4.940736760481346e-08, + "loss": 1.1344, + "step": 161217 + }, + { + "epoch": 1.94, + "grad_norm": 3.9192342026316744, + "learning_rate": 4.9349385786937595e-08, + "loss": 1.1916, + "step": 161220 + }, + { + "epoch": 1.94, + "grad_norm": 8.145381985485482, + "learning_rate": 4.929143792704816e-08, + "loss": 1.2113, + "step": 161223 + }, + { + "epoch": 1.94, + "grad_norm": 6.90675377496609, + "learning_rate": 4.923352402534165e-08, + "loss": 0.9179, + "step": 161226 + }, + { + "epoch": 1.94, + "grad_norm": 9.3300537849187, + "learning_rate": 4.917564408201792e-08, + "loss": 1.0041, + "step": 161229 + }, + { + "epoch": 1.94, + "grad_norm": 7.464193430606085, + "learning_rate": 4.911779809727346e-08, + "loss": 0.9995, + "step": 161232 + }, + { + "epoch": 1.94, + "grad_norm": 9.914770136540621, + "learning_rate": 4.90599860713048e-08, + "loss": 1.2948, + "step": 161235 + }, + { + "epoch": 1.94, + "grad_norm": 4.392908482931777, + "learning_rate": 4.900220800431066e-08, + "loss": 1.1007, + "step": 161238 + }, + { + "epoch": 1.94, + "grad_norm": 4.11953520247033, + "learning_rate": 4.894446389648866e-08, + "loss": 0.8132, + "step": 161241 + }, + { + "epoch": 1.94, + "grad_norm": 4.8709295448780665, + "learning_rate": 4.888675374803309e-08, + "loss": 1.2159, + "step": 161244 + }, + { + "epoch": 1.94, + "grad_norm": 4.222410416183237, + "learning_rate": 4.882907755914379e-08, + "loss": 1.2559, + "step": 161247 + }, + { + "epoch": 1.94, + "grad_norm": 16.62363522925843, + "learning_rate": 4.8771435330016156e-08, + "loss": 1.0631, + "step": 161250 + }, + { + "epoch": 1.94, + "grad_norm": 8.099973722126732, + "learning_rate": 4.871382706084671e-08, + "loss": 1.1246, + "step": 161253 + }, + { + "epoch": 1.94, + "grad_norm": 15.299321868173138, + "learning_rate": 4.865625275183306e-08, + "loss": 1.3361, + "step": 161256 + }, + { + "epoch": 1.94, + "grad_norm": 7.695797290496709, + "learning_rate": 4.85987124031706e-08, + "loss": 1.1664, + "step": 161259 + }, + { + "epoch": 1.94, + "grad_norm": 6.8231453727480735, + "learning_rate": 4.854120601505696e-08, + "loss": 0.9621, + "step": 161262 + }, + { + "epoch": 1.94, + "grad_norm": 15.145401998234133, + "learning_rate": 4.848373358768754e-08, + "loss": 1.1914, + "step": 161265 + }, + { + "epoch": 1.94, + "grad_norm": 3.631528439681342, + "learning_rate": 4.8426295121258846e-08, + "loss": 1.1964, + "step": 161268 + }, + { + "epoch": 1.94, + "grad_norm": 12.007508048137048, + "learning_rate": 4.836889061596628e-08, + "loss": 0.9904, + "step": 161271 + }, + { + "epoch": 1.94, + "grad_norm": 5.042527311175954, + "learning_rate": 4.8311520072005234e-08, + "loss": 0.8762, + "step": 161274 + }, + { + "epoch": 1.94, + "grad_norm": 6.3274943185102694, + "learning_rate": 4.8254183489573336e-08, + "loss": 1.0248, + "step": 161277 + }, + { + "epoch": 1.94, + "grad_norm": 36.674810499530174, + "learning_rate": 4.819688086886598e-08, + "loss": 1.5353, + "step": 161280 + }, + { + "epoch": 1.94, + "grad_norm": 7.492880149142296, + "learning_rate": 4.813961221007746e-08, + "loss": 1.0154, + "step": 161283 + }, + { + "epoch": 1.94, + "grad_norm": 10.620767480179595, + "learning_rate": 4.8082377513403164e-08, + "loss": 0.9862, + "step": 161286 + }, + { + "epoch": 1.94, + "grad_norm": 8.321267109440967, + "learning_rate": 4.802517677903962e-08, + "loss": 0.919, + "step": 161289 + }, + { + "epoch": 1.94, + "grad_norm": 11.905224203448391, + "learning_rate": 4.796801000718221e-08, + "loss": 0.8961, + "step": 161292 + }, + { + "epoch": 1.94, + "grad_norm": 9.601342547328148, + "learning_rate": 4.791087719802412e-08, + "loss": 0.9765, + "step": 161295 + }, + { + "epoch": 1.94, + "grad_norm": 5.873728050011218, + "learning_rate": 4.785377835176186e-08, + "loss": 1.1695, + "step": 161298 + }, + { + "epoch": 1.94, + "grad_norm": 5.107773399479743, + "learning_rate": 4.7796713468589716e-08, + "loss": 1.3548, + "step": 161301 + }, + { + "epoch": 1.94, + "grad_norm": 7.359759393712969, + "learning_rate": 4.773968254870309e-08, + "loss": 1.0162, + "step": 161304 + }, + { + "epoch": 1.94, + "grad_norm": 3.9715505736662307, + "learning_rate": 4.768268559229627e-08, + "loss": 1.0166, + "step": 161307 + }, + { + "epoch": 1.94, + "grad_norm": 4.887338378395138, + "learning_rate": 4.762572259956244e-08, + "loss": 1.094, + "step": 161310 + }, + { + "epoch": 1.94, + "grad_norm": 8.74580369964723, + "learning_rate": 4.75687935706981e-08, + "loss": 0.8485, + "step": 161313 + }, + { + "epoch": 1.94, + "grad_norm": 3.1089822445118136, + "learning_rate": 4.751189850589644e-08, + "loss": 1.319, + "step": 161316 + }, + { + "epoch": 1.94, + "grad_norm": 19.04321944736817, + "learning_rate": 4.745503740535173e-08, + "loss": 1.0109, + "step": 161319 + }, + { + "epoch": 1.94, + "grad_norm": 10.796868228640445, + "learning_rate": 4.739821026925828e-08, + "loss": 0.787, + "step": 161322 + }, + { + "epoch": 1.94, + "grad_norm": 5.652498634516951, + "learning_rate": 4.734141709781037e-08, + "loss": 1.0042, + "step": 161325 + }, + { + "epoch": 1.94, + "grad_norm": 8.273318394117695, + "learning_rate": 4.728465789120007e-08, + "loss": 1.3294, + "step": 161328 + }, + { + "epoch": 1.94, + "grad_norm": 6.816884392308412, + "learning_rate": 4.7227932649622776e-08, + "loss": 1.5418, + "step": 161331 + }, + { + "epoch": 1.94, + "grad_norm": 7.047207739321129, + "learning_rate": 4.717124137327167e-08, + "loss": 1.0666, + "step": 161334 + }, + { + "epoch": 1.94, + "grad_norm": 11.37422611585697, + "learning_rate": 4.711458406233993e-08, + "loss": 1.2258, + "step": 161337 + }, + { + "epoch": 1.94, + "grad_norm": 3.434270137006337, + "learning_rate": 4.7057960717021844e-08, + "loss": 1.3041, + "step": 161340 + }, + { + "epoch": 1.94, + "grad_norm": 16.74206811936849, + "learning_rate": 4.7001371337509484e-08, + "loss": 0.8881, + "step": 161343 + }, + { + "epoch": 1.94, + "grad_norm": 7.124200163837667, + "learning_rate": 4.694481592399602e-08, + "loss": 0.8995, + "step": 161346 + }, + { + "epoch": 1.94, + "grad_norm": 7.396116347997978, + "learning_rate": 4.6888294476674644e-08, + "loss": 0.9632, + "step": 161349 + }, + { + "epoch": 1.94, + "grad_norm": 5.631903909935566, + "learning_rate": 4.6831806995739636e-08, + "loss": 1.252, + "step": 161352 + }, + { + "epoch": 1.94, + "grad_norm": 12.25249739030419, + "learning_rate": 4.6775353481380845e-08, + "loss": 1.1205, + "step": 161355 + }, + { + "epoch": 1.94, + "grad_norm": 17.173483237601495, + "learning_rate": 4.6718933933793677e-08, + "loss": 1.4509, + "step": 161358 + }, + { + "epoch": 1.94, + "grad_norm": 11.172781912436827, + "learning_rate": 4.666254835317019e-08, + "loss": 1.073, + "step": 161361 + }, + { + "epoch": 1.94, + "grad_norm": 5.769785738881637, + "learning_rate": 4.660619673970135e-08, + "loss": 0.8963, + "step": 161364 + }, + { + "epoch": 1.94, + "grad_norm": 6.730775389256148, + "learning_rate": 4.6549879093580333e-08, + "loss": 1.1912, + "step": 161367 + }, + { + "epoch": 1.94, + "grad_norm": 7.624271950164154, + "learning_rate": 4.6493595414999206e-08, + "loss": 1.1259, + "step": 161370 + }, + { + "epoch": 1.94, + "grad_norm": 9.355918826230782, + "learning_rate": 4.643734570415115e-08, + "loss": 1.0648, + "step": 161373 + }, + { + "epoch": 1.94, + "grad_norm": 18.997405662418863, + "learning_rate": 4.6381129961227125e-08, + "loss": 1.3665, + "step": 161376 + }, + { + "epoch": 1.94, + "grad_norm": 3.5469901514242923, + "learning_rate": 4.6324948186418084e-08, + "loss": 1.1502, + "step": 161379 + }, + { + "epoch": 1.94, + "grad_norm": 10.183498293412187, + "learning_rate": 4.626880037991721e-08, + "loss": 1.4364, + "step": 161382 + }, + { + "epoch": 1.94, + "grad_norm": 3.500113286083383, + "learning_rate": 4.6212686541915464e-08, + "loss": 1.1068, + "step": 161385 + }, + { + "epoch": 1.94, + "grad_norm": 2.332451716344684, + "learning_rate": 4.615660667260491e-08, + "loss": 0.9612, + "step": 161388 + }, + { + "epoch": 1.94, + "grad_norm": 3.4026282183666567, + "learning_rate": 4.6100560772175394e-08, + "loss": 0.7965, + "step": 161391 + }, + { + "epoch": 1.94, + "grad_norm": 4.2830270665751025, + "learning_rate": 4.60445488408201e-08, + "loss": 1.1604, + "step": 161394 + }, + { + "epoch": 1.94, + "grad_norm": 5.103983232722384, + "learning_rate": 4.598857087872888e-08, + "loss": 0.8642, + "step": 161397 + }, + { + "epoch": 1.94, + "grad_norm": 10.457746274268587, + "learning_rate": 4.59326268860949e-08, + "loss": 1.3491, + "step": 161400 + }, + { + "epoch": 1.94, + "grad_norm": 9.258575444846764, + "learning_rate": 4.58767168631058e-08, + "loss": 1.0419, + "step": 161403 + }, + { + "epoch": 1.94, + "grad_norm": 24.006380944944578, + "learning_rate": 4.5820840809953636e-08, + "loss": 0.8453, + "step": 161406 + }, + { + "epoch": 1.94, + "grad_norm": 14.888143618975555, + "learning_rate": 4.576499872683049e-08, + "loss": 1.0812, + "step": 161409 + }, + { + "epoch": 1.94, + "grad_norm": 13.089759293041121, + "learning_rate": 4.57091906139262e-08, + "loss": 1.1234, + "step": 161412 + }, + { + "epoch": 1.94, + "grad_norm": 59.17486661287662, + "learning_rate": 4.5653416471430625e-08, + "loss": 0.8547, + "step": 161415 + }, + { + "epoch": 1.94, + "grad_norm": 2.0396152712094415, + "learning_rate": 4.55976762995336e-08, + "loss": 1.3455, + "step": 161418 + }, + { + "epoch": 1.94, + "grad_norm": 3.908392692283358, + "learning_rate": 4.55419700984272e-08, + "loss": 1.0489, + "step": 161421 + }, + { + "epoch": 1.94, + "grad_norm": 5.067243181050884, + "learning_rate": 4.548629786829906e-08, + "loss": 0.9434, + "step": 161424 + }, + { + "epoch": 1.94, + "grad_norm": 8.052445735796027, + "learning_rate": 4.5430659609342346e-08, + "loss": 1.0965, + "step": 161427 + }, + { + "epoch": 1.94, + "grad_norm": 13.13683140311777, + "learning_rate": 4.5375055321743574e-08, + "loss": 1.237, + "step": 161430 + }, + { + "epoch": 1.94, + "grad_norm": 8.747950201852532, + "learning_rate": 4.531948500569594e-08, + "loss": 1.306, + "step": 161433 + }, + { + "epoch": 1.94, + "grad_norm": 10.791076518009366, + "learning_rate": 4.526394866138595e-08, + "loss": 1.101, + "step": 161436 + }, + { + "epoch": 1.94, + "grad_norm": 5.741177549632389, + "learning_rate": 4.520844628900456e-08, + "loss": 0.9204, + "step": 161439 + }, + { + "epoch": 1.94, + "grad_norm": 8.852691526052224, + "learning_rate": 4.5152977888740515e-08, + "loss": 1.1449, + "step": 161442 + }, + { + "epoch": 1.94, + "grad_norm": 10.123338913751713, + "learning_rate": 4.5097543460784766e-08, + "loss": 1.0251, + "step": 161445 + }, + { + "epoch": 1.94, + "grad_norm": 4.881891256189, + "learning_rate": 4.504214300532384e-08, + "loss": 1.2031, + "step": 161448 + }, + { + "epoch": 1.94, + "grad_norm": 17.944093012242604, + "learning_rate": 4.498677652254979e-08, + "loss": 1.0121, + "step": 161451 + }, + { + "epoch": 1.94, + "grad_norm": 10.279210900635755, + "learning_rate": 4.493144401265026e-08, + "loss": 1.1901, + "step": 161454 + }, + { + "epoch": 1.94, + "grad_norm": 5.100713228797177, + "learning_rate": 4.487614547581287e-08, + "loss": 1.2037, + "step": 161457 + }, + { + "epoch": 1.94, + "grad_norm": 7.9559250208232, + "learning_rate": 4.482088091222747e-08, + "loss": 1.1991, + "step": 161460 + }, + { + "epoch": 1.94, + "grad_norm": 5.740974598775965, + "learning_rate": 4.4765650322082795e-08, + "loss": 1.3097, + "step": 161463 + }, + { + "epoch": 1.94, + "grad_norm": 13.693184246245135, + "learning_rate": 4.471045370556759e-08, + "loss": 1.2375, + "step": 161466 + }, + { + "epoch": 1.94, + "grad_norm": 6.252699914429775, + "learning_rate": 4.4655291062869474e-08, + "loss": 1.0721, + "step": 161469 + }, + { + "epoch": 1.94, + "grad_norm": 3.5263192816873183, + "learning_rate": 4.460016239417608e-08, + "loss": 1.0278, + "step": 161472 + }, + { + "epoch": 1.94, + "grad_norm": 6.8377741844004865, + "learning_rate": 4.454506769967726e-08, + "loss": 1.028, + "step": 161475 + }, + { + "epoch": 1.94, + "grad_norm": 11.130960461329812, + "learning_rate": 4.449000697956063e-08, + "loss": 1.1589, + "step": 161478 + }, + { + "epoch": 1.94, + "grad_norm": 11.19688611270997, + "learning_rate": 4.443498023401271e-08, + "loss": 1.0895, + "step": 161481 + }, + { + "epoch": 1.94, + "grad_norm": 16.436337078493995, + "learning_rate": 4.437998746322225e-08, + "loss": 1.2138, + "step": 161484 + }, + { + "epoch": 1.94, + "grad_norm": 3.23736241700567, + "learning_rate": 4.432502866737798e-08, + "loss": 1.2118, + "step": 161487 + }, + { + "epoch": 1.94, + "grad_norm": 8.007468972502496, + "learning_rate": 4.4270103846665304e-08, + "loss": 1.2644, + "step": 161490 + }, + { + "epoch": 1.94, + "grad_norm": 8.459630435808997, + "learning_rate": 4.421521300127296e-08, + "loss": 1.1239, + "step": 161493 + }, + { + "epoch": 1.94, + "grad_norm": 7.2620939772456605, + "learning_rate": 4.416035613138858e-08, + "loss": 1.3362, + "step": 161496 + }, + { + "epoch": 1.94, + "grad_norm": 4.256552090568064, + "learning_rate": 4.410553323719757e-08, + "loss": 1.126, + "step": 161499 + }, + { + "epoch": 1.94, + "grad_norm": 4.895740528156027, + "learning_rate": 4.405074431888978e-08, + "loss": 1.2571, + "step": 161502 + }, + { + "epoch": 1.94, + "grad_norm": 6.751422455390592, + "learning_rate": 4.3995989376649504e-08, + "loss": 1.1144, + "step": 161505 + }, + { + "epoch": 1.94, + "grad_norm": 14.21025826998477, + "learning_rate": 4.3941268410665484e-08, + "loss": 1.287, + "step": 161508 + }, + { + "epoch": 1.94, + "grad_norm": 2.892204039153196, + "learning_rate": 4.388658142112312e-08, + "loss": 1.1213, + "step": 161511 + }, + { + "epoch": 1.94, + "grad_norm": 2.3903322215202905, + "learning_rate": 4.383192840821005e-08, + "loss": 1.1839, + "step": 161514 + }, + { + "epoch": 1.94, + "grad_norm": 5.500909237246177, + "learning_rate": 4.3777309372112775e-08, + "loss": 1.2258, + "step": 161517 + }, + { + "epoch": 1.94, + "grad_norm": 4.102845781611743, + "learning_rate": 4.3722724313016716e-08, + "loss": 1.1824, + "step": 161520 + }, + { + "epoch": 1.94, + "grad_norm": 6.509293635503806, + "learning_rate": 4.36681732311095e-08, + "loss": 1.8368, + "step": 161523 + }, + { + "epoch": 1.94, + "grad_norm": 4.714750112836073, + "learning_rate": 4.361365612657542e-08, + "loss": 1.1198, + "step": 161526 + }, + { + "epoch": 1.94, + "grad_norm": 9.108272399125735, + "learning_rate": 4.3559172999602105e-08, + "loss": 1.2278, + "step": 161529 + }, + { + "epoch": 1.94, + "grad_norm": 15.156014421893907, + "learning_rate": 4.350472385037607e-08, + "loss": 1.1858, + "step": 161532 + }, + { + "epoch": 1.94, + "grad_norm": 5.651292508780818, + "learning_rate": 4.345030867908162e-08, + "loss": 0.89, + "step": 161535 + }, + { + "epoch": 1.94, + "grad_norm": 11.822882291097224, + "learning_rate": 4.3395927485904156e-08, + "loss": 0.8228, + "step": 161538 + }, + { + "epoch": 1.94, + "grad_norm": 7.569419106158403, + "learning_rate": 4.334158027103019e-08, + "loss": 1.3769, + "step": 161541 + }, + { + "epoch": 1.94, + "grad_norm": 4.512455687732527, + "learning_rate": 4.328726703464514e-08, + "loss": 1.1688, + "step": 161544 + }, + { + "epoch": 1.94, + "grad_norm": 6.476326031825047, + "learning_rate": 4.323298777693552e-08, + "loss": 1.1356, + "step": 161547 + }, + { + "epoch": 1.94, + "grad_norm": 9.242874826999492, + "learning_rate": 4.31787424980834e-08, + "loss": 1.0967, + "step": 161550 + }, + { + "epoch": 1.94, + "grad_norm": 10.327323328376856, + "learning_rate": 4.312453119827642e-08, + "loss": 1.1038, + "step": 161553 + }, + { + "epoch": 1.94, + "grad_norm": 11.281032138434782, + "learning_rate": 4.3070353877698866e-08, + "loss": 1.4019, + "step": 161556 + }, + { + "epoch": 1.94, + "grad_norm": 23.56327804420825, + "learning_rate": 4.3016210536537265e-08, + "loss": 1.1283, + "step": 161559 + }, + { + "epoch": 1.94, + "grad_norm": 9.222529839176117, + "learning_rate": 4.296210117497368e-08, + "loss": 1.2099, + "step": 161562 + }, + { + "epoch": 1.94, + "grad_norm": 7.383982329135927, + "learning_rate": 4.290802579319353e-08, + "loss": 1.3762, + "step": 161565 + }, + { + "epoch": 1.94, + "grad_norm": 7.140828480242411, + "learning_rate": 4.285398439138222e-08, + "loss": 1.0465, + "step": 161568 + }, + { + "epoch": 1.94, + "grad_norm": 21.412236297849585, + "learning_rate": 4.2799976969722935e-08, + "loss": 1.1224, + "step": 161571 + }, + { + "epoch": 1.94, + "grad_norm": 9.749920362699047, + "learning_rate": 4.274600352840219e-08, + "loss": 1.1974, + "step": 161574 + }, + { + "epoch": 1.94, + "grad_norm": 7.0614032784379175, + "learning_rate": 4.269206406760207e-08, + "loss": 1.1055, + "step": 161577 + }, + { + "epoch": 1.94, + "grad_norm": 7.447223046488329, + "learning_rate": 4.263815858750686e-08, + "loss": 0.9992, + "step": 161580 + }, + { + "epoch": 1.94, + "grad_norm": 8.613174675529637, + "learning_rate": 4.258428708830087e-08, + "loss": 1.4677, + "step": 161583 + }, + { + "epoch": 1.94, + "grad_norm": 7.116800785230491, + "learning_rate": 4.2530449570169493e-08, + "loss": 1.3669, + "step": 161586 + }, + { + "epoch": 1.94, + "grad_norm": 12.815940448804305, + "learning_rate": 4.247664603329371e-08, + "loss": 1.1597, + "step": 161589 + }, + { + "epoch": 1.94, + "grad_norm": 6.0574213369727214, + "learning_rate": 4.242287647785892e-08, + "loss": 1.0764, + "step": 161592 + }, + { + "epoch": 1.94, + "grad_norm": 6.444567387093241, + "learning_rate": 4.236914090404831e-08, + "loss": 1.472, + "step": 161595 + }, + { + "epoch": 1.94, + "grad_norm": 14.575546694423755, + "learning_rate": 4.231543931204396e-08, + "loss": 1.08, + "step": 161598 + }, + { + "epoch": 1.94, + "grad_norm": 104.41599736305648, + "learning_rate": 4.226177170203127e-08, + "loss": 0.9185, + "step": 161601 + }, + { + "epoch": 1.94, + "grad_norm": 7.86736815084568, + "learning_rate": 4.2208138074193436e-08, + "loss": 1.26, + "step": 161604 + }, + { + "epoch": 1.94, + "grad_norm": 11.93878317546297, + "learning_rate": 4.2154538428710315e-08, + "loss": 1.4109, + "step": 161607 + }, + { + "epoch": 1.94, + "grad_norm": 4.930620262073017, + "learning_rate": 4.210097276576841e-08, + "loss": 1.1509, + "step": 161610 + }, + { + "epoch": 1.94, + "grad_norm": 6.814766460329632, + "learning_rate": 4.2047441085548704e-08, + "loss": 1.3875, + "step": 161613 + }, + { + "epoch": 1.94, + "grad_norm": 11.723377066823131, + "learning_rate": 4.199394338823437e-08, + "loss": 0.9807, + "step": 161616 + }, + { + "epoch": 1.94, + "grad_norm": 7.103053087000414, + "learning_rate": 4.1940479674007496e-08, + "loss": 1.1479, + "step": 161619 + }, + { + "epoch": 1.94, + "grad_norm": 4.602463387519612, + "learning_rate": 4.188704994305126e-08, + "loss": 0.7077, + "step": 161622 + }, + { + "epoch": 1.94, + "grad_norm": 3.9463617877242365, + "learning_rate": 4.183365419554663e-08, + "loss": 1.1145, + "step": 161625 + }, + { + "epoch": 1.94, + "grad_norm": 14.836451990952565, + "learning_rate": 4.1780292431677915e-08, + "loss": 1.589, + "step": 161628 + }, + { + "epoch": 1.94, + "grad_norm": 4.786948353126285, + "learning_rate": 4.172696465162607e-08, + "loss": 0.9052, + "step": 161631 + }, + { + "epoch": 1.94, + "grad_norm": 5.1778971746317835, + "learning_rate": 4.167367085557206e-08, + "loss": 0.8801, + "step": 161634 + }, + { + "epoch": 1.94, + "grad_norm": 7.571105791764505, + "learning_rate": 4.1620411043700183e-08, + "loss": 1.2776, + "step": 161637 + }, + { + "epoch": 1.94, + "grad_norm": 14.16336577673948, + "learning_rate": 4.15671852161903e-08, + "loss": 1.1846, + "step": 161640 + }, + { + "epoch": 1.94, + "grad_norm": 2.3911975297761234, + "learning_rate": 4.151399337322448e-08, + "loss": 1.4344, + "step": 161643 + }, + { + "epoch": 1.94, + "grad_norm": 12.392959159685624, + "learning_rate": 4.146083551498481e-08, + "loss": 1.263, + "step": 161646 + }, + { + "epoch": 1.94, + "grad_norm": 4.802879914138643, + "learning_rate": 4.1407711641652246e-08, + "loss": 1.0345, + "step": 161649 + }, + { + "epoch": 1.94, + "grad_norm": 12.64411075656977, + "learning_rate": 4.1354621753406654e-08, + "loss": 1.0902, + "step": 161652 + }, + { + "epoch": 1.94, + "grad_norm": 7.046202478882791, + "learning_rate": 4.130156585043232e-08, + "loss": 0.879, + "step": 161655 + }, + { + "epoch": 1.94, + "grad_norm": 4.709376318359687, + "learning_rate": 4.1248543932908004e-08, + "loss": 0.9572, + "step": 161658 + }, + { + "epoch": 1.94, + "grad_norm": 7.004588431455008, + "learning_rate": 4.119555600101577e-08, + "loss": 1.1391, + "step": 161661 + }, + { + "epoch": 1.94, + "grad_norm": 4.243055190067181, + "learning_rate": 4.114260205493548e-08, + "loss": 0.7207, + "step": 161664 + }, + { + "epoch": 1.94, + "grad_norm": 2.7332012799942635, + "learning_rate": 4.108968209484809e-08, + "loss": 1.3579, + "step": 161667 + }, + { + "epoch": 1.94, + "grad_norm": 6.1526523933428106, + "learning_rate": 4.10367961209357e-08, + "loss": 1.4185, + "step": 161670 + }, + { + "epoch": 1.94, + "grad_norm": 10.7118812226228, + "learning_rate": 4.098394413337592e-08, + "loss": 1.3314, + "step": 161673 + }, + { + "epoch": 1.94, + "grad_norm": 15.786817865873852, + "learning_rate": 4.093112613235084e-08, + "loss": 1.3126, + "step": 161676 + }, + { + "epoch": 1.94, + "grad_norm": 7.2568204060085355, + "learning_rate": 4.0878342118041427e-08, + "loss": 1.2647, + "step": 161679 + }, + { + "epoch": 1.94, + "grad_norm": 8.100908533596021, + "learning_rate": 4.082559209062753e-08, + "loss": 1.6522, + "step": 161682 + }, + { + "epoch": 1.94, + "grad_norm": 8.905923588356854, + "learning_rate": 4.077287605028679e-08, + "loss": 0.7431, + "step": 161685 + }, + { + "epoch": 1.94, + "grad_norm": 6.9211265685511245, + "learning_rate": 4.0720193997202395e-08, + "loss": 1.1997, + "step": 161688 + }, + { + "epoch": 1.94, + "grad_norm": 8.921870301032607, + "learning_rate": 4.066754593155198e-08, + "loss": 0.8428, + "step": 161691 + }, + { + "epoch": 1.94, + "grad_norm": 11.586385094362978, + "learning_rate": 4.0614931853516506e-08, + "loss": 1.4071, + "step": 161694 + }, + { + "epoch": 1.94, + "grad_norm": 2.67024433099491, + "learning_rate": 4.056235176327472e-08, + "loss": 1.2037, + "step": 161697 + }, + { + "epoch": 1.94, + "grad_norm": 12.730590856044621, + "learning_rate": 4.050980566100537e-08, + "loss": 1.2475, + "step": 161700 + }, + { + "epoch": 1.94, + "grad_norm": 8.479405668202563, + "learning_rate": 4.045729354688943e-08, + "loss": 1.1492, + "step": 161703 + }, + { + "epoch": 1.94, + "grad_norm": 3.2686788643241482, + "learning_rate": 4.0404815421105633e-08, + "loss": 1.2035, + "step": 161706 + }, + { + "epoch": 1.94, + "grad_norm": 7.908416624550667, + "learning_rate": 4.035237128383163e-08, + "loss": 1.2745, + "step": 161709 + }, + { + "epoch": 1.94, + "grad_norm": 3.096942398115286, + "learning_rate": 4.0299961135248366e-08, + "loss": 1.4307, + "step": 161712 + }, + { + "epoch": 1.94, + "grad_norm": 12.26870403186975, + "learning_rate": 4.0247584975533494e-08, + "loss": 1.2253, + "step": 161715 + }, + { + "epoch": 1.94, + "grad_norm": 8.051616314470182, + "learning_rate": 4.019524280486686e-08, + "loss": 1.2613, + "step": 161718 + }, + { + "epoch": 1.94, + "grad_norm": 5.660733513683349, + "learning_rate": 4.014293462342611e-08, + "loss": 1.3347, + "step": 161721 + }, + { + "epoch": 1.94, + "grad_norm": 9.38885759764767, + "learning_rate": 4.0090660431389985e-08, + "loss": 1.1716, + "step": 161724 + }, + { + "epoch": 1.94, + "grad_norm": 16.81458524877288, + "learning_rate": 4.003842022893612e-08, + "loss": 0.9647, + "step": 161727 + }, + { + "epoch": 1.94, + "grad_norm": 4.401746865488036, + "learning_rate": 3.9986214016244365e-08, + "loss": 1.1774, + "step": 161730 + }, + { + "epoch": 1.94, + "grad_norm": 12.660923826402367, + "learning_rate": 3.993404179349236e-08, + "loss": 1.3165, + "step": 161733 + }, + { + "epoch": 1.94, + "grad_norm": 5.283709307597888, + "learning_rate": 3.988190356085664e-08, + "loss": 1.33, + "step": 161736 + }, + { + "epoch": 1.94, + "grad_norm": 20.539316834398484, + "learning_rate": 3.982979931851705e-08, + "loss": 1.1516, + "step": 161739 + }, + { + "epoch": 1.94, + "grad_norm": 4.533866637707624, + "learning_rate": 3.9777729066651226e-08, + "loss": 1.0614, + "step": 161742 + }, + { + "epoch": 1.94, + "grad_norm": 6.011840629419244, + "learning_rate": 3.97256928054357e-08, + "loss": 0.96, + "step": 161745 + }, + { + "epoch": 1.94, + "grad_norm": 2.578101552644387, + "learning_rate": 3.96736905350481e-08, + "loss": 0.9913, + "step": 161748 + }, + { + "epoch": 1.95, + "grad_norm": 10.07071791144687, + "learning_rate": 3.962172225566829e-08, + "loss": 1.0647, + "step": 161751 + }, + { + "epoch": 1.95, + "grad_norm": 4.445639258636841, + "learning_rate": 3.9569787967470575e-08, + "loss": 1.3239, + "step": 161754 + }, + { + "epoch": 1.95, + "grad_norm": 7.071606813554834, + "learning_rate": 3.9517887670633694e-08, + "loss": 1.0742, + "step": 161757 + }, + { + "epoch": 1.95, + "grad_norm": 4.571452025227748, + "learning_rate": 3.946602136533417e-08, + "loss": 0.901, + "step": 161760 + }, + { + "epoch": 1.95, + "grad_norm": 4.8578730512574415, + "learning_rate": 3.9414189051750764e-08, + "loss": 0.9847, + "step": 161763 + }, + { + "epoch": 1.95, + "grad_norm": 41.83565621080237, + "learning_rate": 3.936239073005777e-08, + "loss": 1.3842, + "step": 161766 + }, + { + "epoch": 1.95, + "grad_norm": 8.634806344941666, + "learning_rate": 3.9310626400432814e-08, + "loss": 1.2087, + "step": 161769 + }, + { + "epoch": 1.95, + "grad_norm": 2.608649126507105, + "learning_rate": 3.925889606305355e-08, + "loss": 1.0429, + "step": 161772 + }, + { + "epoch": 1.95, + "grad_norm": 7.553582400503377, + "learning_rate": 3.920719971809539e-08, + "loss": 1.0246, + "step": 161775 + }, + { + "epoch": 1.95, + "grad_norm": 12.750617396623525, + "learning_rate": 3.915553736573596e-08, + "loss": 1.4387, + "step": 161778 + }, + { + "epoch": 1.95, + "grad_norm": 11.475741119040716, + "learning_rate": 3.9103909006149576e-08, + "loss": 0.8155, + "step": 161781 + }, + { + "epoch": 1.95, + "grad_norm": 4.362586351198533, + "learning_rate": 3.9052314639513864e-08, + "loss": 0.73, + "step": 161784 + }, + { + "epoch": 1.95, + "grad_norm": 6.599768754090012, + "learning_rate": 3.900075426600536e-08, + "loss": 1.1645, + "step": 161787 + }, + { + "epoch": 1.95, + "grad_norm": 6.487603978489621, + "learning_rate": 3.8949227885799465e-08, + "loss": 1.0741, + "step": 161790 + }, + { + "epoch": 1.95, + "grad_norm": 9.053930380349216, + "learning_rate": 3.889773549907161e-08, + "loss": 1.0582, + "step": 161793 + }, + { + "epoch": 1.95, + "grad_norm": 10.121657822221565, + "learning_rate": 3.88462771059972e-08, + "loss": 1.2322, + "step": 161796 + }, + { + "epoch": 1.95, + "grad_norm": 8.791736930596715, + "learning_rate": 3.879485270675387e-08, + "loss": 1.5246, + "step": 161799 + }, + { + "epoch": 1.95, + "grad_norm": 2.57181938459742, + "learning_rate": 3.874346230151371e-08, + "loss": 1.6206, + "step": 161802 + }, + { + "epoch": 1.95, + "grad_norm": 7.5742959092272235, + "learning_rate": 3.8692105890455466e-08, + "loss": 1.3204, + "step": 161805 + }, + { + "epoch": 1.95, + "grad_norm": 6.938266680133396, + "learning_rate": 3.864078347375344e-08, + "loss": 1.0609, + "step": 161808 + }, + { + "epoch": 1.95, + "grad_norm": 7.013846994539424, + "learning_rate": 3.858949505158083e-08, + "loss": 0.6817, + "step": 161811 + }, + { + "epoch": 1.95, + "grad_norm": 10.99921101079863, + "learning_rate": 3.8538240624115264e-08, + "loss": 1.1883, + "step": 161814 + }, + { + "epoch": 1.95, + "grad_norm": 3.6717184901607642, + "learning_rate": 3.8487020191531056e-08, + "loss": 0.9504, + "step": 161817 + }, + { + "epoch": 1.95, + "grad_norm": 4.836211512675878, + "learning_rate": 3.84358337540014e-08, + "loss": 1.0957, + "step": 161820 + }, + { + "epoch": 1.95, + "grad_norm": 26.823497619265844, + "learning_rate": 3.838468131170281e-08, + "loss": 0.9521, + "step": 161823 + }, + { + "epoch": 1.95, + "grad_norm": 12.818331712663648, + "learning_rate": 3.833356286480849e-08, + "loss": 1.0948, + "step": 161826 + }, + { + "epoch": 1.95, + "grad_norm": 4.352968521496324, + "learning_rate": 3.828247841349386e-08, + "loss": 1.1322, + "step": 161829 + }, + { + "epoch": 1.95, + "grad_norm": 5.719537921698794, + "learning_rate": 3.823142795793322e-08, + "loss": 1.0405, + "step": 161832 + }, + { + "epoch": 1.95, + "grad_norm": 8.430375651538492, + "learning_rate": 3.8180411498299766e-08, + "loss": 1.2206, + "step": 161835 + }, + { + "epoch": 1.95, + "grad_norm": 11.434138227411992, + "learning_rate": 3.812942903476891e-08, + "loss": 0.9813, + "step": 161838 + }, + { + "epoch": 1.95, + "grad_norm": 3.252687941721118, + "learning_rate": 3.807848056751384e-08, + "loss": 1.2051, + "step": 161841 + }, + { + "epoch": 1.95, + "grad_norm": 3.851145165323766, + "learning_rate": 3.802756609670888e-08, + "loss": 1.1949, + "step": 161844 + }, + { + "epoch": 1.95, + "grad_norm": 5.894746310492615, + "learning_rate": 3.797668562252832e-08, + "loss": 1.1182, + "step": 161847 + }, + { + "epoch": 1.95, + "grad_norm": 8.00838006196467, + "learning_rate": 3.792583914514425e-08, + "loss": 1.3677, + "step": 161850 + }, + { + "epoch": 1.95, + "grad_norm": 7.332550215637075, + "learning_rate": 3.7875026664729866e-08, + "loss": 1.1578, + "step": 161853 + }, + { + "epoch": 1.95, + "grad_norm": 8.079138212408676, + "learning_rate": 3.782424818146169e-08, + "loss": 0.8441, + "step": 161856 + }, + { + "epoch": 1.95, + "grad_norm": 5.898384786805676, + "learning_rate": 3.777350369550958e-08, + "loss": 0.8144, + "step": 161859 + }, + { + "epoch": 1.95, + "grad_norm": 3.06366278534106, + "learning_rate": 3.772279320704897e-08, + "loss": 0.7549, + "step": 161862 + }, + { + "epoch": 1.95, + "grad_norm": 8.89881199007054, + "learning_rate": 3.7672116716251926e-08, + "loss": 1.0429, + "step": 161865 + }, + { + "epoch": 1.95, + "grad_norm": 4.473545421822322, + "learning_rate": 3.7621474223290546e-08, + "loss": 1.0132, + "step": 161868 + }, + { + "epoch": 1.95, + "grad_norm": 2.703524398929512, + "learning_rate": 3.7570865728339126e-08, + "loss": 1.1819, + "step": 161871 + }, + { + "epoch": 1.95, + "grad_norm": 10.605332762800371, + "learning_rate": 3.752029123157086e-08, + "loss": 1.1857, + "step": 161874 + }, + { + "epoch": 1.95, + "grad_norm": 2.949338453198147, + "learning_rate": 3.746975073315673e-08, + "loss": 1.2121, + "step": 161877 + }, + { + "epoch": 1.95, + "grad_norm": 4.9839223957510015, + "learning_rate": 3.7419244233269926e-08, + "loss": 0.9462, + "step": 161880 + }, + { + "epoch": 1.95, + "grad_norm": 17.057946279258957, + "learning_rate": 3.7368771732082535e-08, + "loss": 1.4555, + "step": 161883 + }, + { + "epoch": 1.95, + "grad_norm": 7.040857522296932, + "learning_rate": 3.7318333229766634e-08, + "loss": 1.4242, + "step": 161886 + }, + { + "epoch": 1.95, + "grad_norm": 4.79934701760202, + "learning_rate": 3.726792872649543e-08, + "loss": 1.1676, + "step": 161889 + }, + { + "epoch": 1.95, + "grad_norm": 4.543518540713016, + "learning_rate": 3.7217558222439886e-08, + "loss": 1.289, + "step": 161892 + }, + { + "epoch": 1.95, + "grad_norm": 13.185909351737614, + "learning_rate": 3.716722171777209e-08, + "loss": 0.8833, + "step": 161895 + }, + { + "epoch": 1.95, + "grad_norm": 4.488881698085809, + "learning_rate": 3.7116919212664137e-08, + "loss": 1.2496, + "step": 161898 + }, + { + "epoch": 1.95, + "grad_norm": 6.754798555467777, + "learning_rate": 3.70666507072881e-08, + "loss": 1.1246, + "step": 161901 + }, + { + "epoch": 1.95, + "grad_norm": 4.152804139469241, + "learning_rate": 3.7016416201813844e-08, + "loss": 1.2441, + "step": 161904 + }, + { + "epoch": 1.95, + "grad_norm": 6.675112958889628, + "learning_rate": 3.696621569641456e-08, + "loss": 1.3057, + "step": 161907 + }, + { + "epoch": 1.95, + "grad_norm": 17.88573081074698, + "learning_rate": 3.691604919126013e-08, + "loss": 0.9714, + "step": 161910 + }, + { + "epoch": 1.95, + "grad_norm": 9.379723937857237, + "learning_rate": 3.686591668652373e-08, + "loss": 1.5392, + "step": 161913 + }, + { + "epoch": 1.95, + "grad_norm": 12.318292047021389, + "learning_rate": 3.681581818237523e-08, + "loss": 1.0146, + "step": 161916 + }, + { + "epoch": 1.95, + "grad_norm": 15.667217285500781, + "learning_rate": 3.676575367898449e-08, + "loss": 1.1383, + "step": 161919 + }, + { + "epoch": 1.95, + "grad_norm": 5.350436335520123, + "learning_rate": 3.671572317652472e-08, + "loss": 1.3465, + "step": 161922 + }, + { + "epoch": 1.95, + "grad_norm": 11.112421462322967, + "learning_rate": 3.666572667516466e-08, + "loss": 0.869, + "step": 161925 + }, + { + "epoch": 1.95, + "grad_norm": 5.4335875490067105, + "learning_rate": 3.6615764175076396e-08, + "loss": 0.8095, + "step": 161928 + }, + { + "epoch": 1.95, + "grad_norm": 7.9851169606921255, + "learning_rate": 3.656583567642869e-08, + "loss": 1.0038, + "step": 161931 + }, + { + "epoch": 1.95, + "grad_norm": 3.7391822200740688, + "learning_rate": 3.651594117939361e-08, + "loss": 1.2249, + "step": 161934 + }, + { + "epoch": 1.95, + "grad_norm": 4.225867072101279, + "learning_rate": 3.6466080684139925e-08, + "loss": 1.1791, + "step": 161937 + }, + { + "epoch": 1.95, + "grad_norm": 3.775809596633827, + "learning_rate": 3.6416254190839714e-08, + "loss": 1.2284, + "step": 161940 + }, + { + "epoch": 1.95, + "grad_norm": 4.791750419466182, + "learning_rate": 3.6366461699661736e-08, + "loss": 0.8268, + "step": 161943 + }, + { + "epoch": 1.95, + "grad_norm": 3.1596105023938055, + "learning_rate": 3.631670321077696e-08, + "loss": 1.0402, + "step": 161946 + }, + { + "epoch": 1.95, + "grad_norm": 3.0743945382477356, + "learning_rate": 3.626697872435303e-08, + "loss": 1.5213, + "step": 161949 + }, + { + "epoch": 1.95, + "grad_norm": 6.09662348320093, + "learning_rate": 3.621728824056092e-08, + "loss": 1.2094, + "step": 161952 + }, + { + "epoch": 1.95, + "grad_norm": 9.863550825419917, + "learning_rate": 3.616763175957161e-08, + "loss": 1.1384, + "step": 161955 + }, + { + "epoch": 1.95, + "grad_norm": 16.42477524181772, + "learning_rate": 3.6118009281551627e-08, + "loss": 1.1968, + "step": 161958 + }, + { + "epoch": 1.95, + "grad_norm": 6.957476575342809, + "learning_rate": 3.6068420806673056e-08, + "loss": 1.0346, + "step": 161961 + }, + { + "epoch": 1.95, + "grad_norm": 9.301607148820013, + "learning_rate": 3.601886633510354e-08, + "loss": 0.757, + "step": 161964 + }, + { + "epoch": 1.95, + "grad_norm": 23.493675972621926, + "learning_rate": 3.596934586701295e-08, + "loss": 1.0163, + "step": 161967 + }, + { + "epoch": 1.95, + "grad_norm": 18.809351528542777, + "learning_rate": 3.5919859402570035e-08, + "loss": 1.3183, + "step": 161970 + }, + { + "epoch": 1.95, + "grad_norm": 22.798370023427644, + "learning_rate": 3.587040694194244e-08, + "loss": 0.8172, + "step": 161973 + }, + { + "epoch": 1.95, + "grad_norm": 8.59709379945681, + "learning_rate": 3.582098848530113e-08, + "loss": 1.5976, + "step": 161976 + }, + { + "epoch": 1.95, + "grad_norm": 8.22687566851884, + "learning_rate": 3.577160403281266e-08, + "loss": 1.2794, + "step": 161979 + }, + { + "epoch": 1.95, + "grad_norm": 6.373930789219315, + "learning_rate": 3.572225358464798e-08, + "loss": 1.6724, + "step": 161982 + }, + { + "epoch": 1.95, + "grad_norm": 3.310924261372673, + "learning_rate": 3.567293714097364e-08, + "loss": 1.1274, + "step": 161985 + }, + { + "epoch": 1.95, + "grad_norm": 6.395432170924568, + "learning_rate": 3.562365470195839e-08, + "loss": 0.9673, + "step": 161988 + }, + { + "epoch": 1.95, + "grad_norm": 2.918334586526666, + "learning_rate": 3.557440626777098e-08, + "loss": 1.0252, + "step": 161991 + }, + { + "epoch": 1.95, + "grad_norm": 6.901781976714358, + "learning_rate": 3.5525191838577944e-08, + "loss": 1.7997, + "step": 161994 + }, + { + "epoch": 1.95, + "grad_norm": 15.783586449786101, + "learning_rate": 3.547601141454915e-08, + "loss": 1.1867, + "step": 161997 + }, + { + "epoch": 1.95, + "grad_norm": 5.94585202670539, + "learning_rate": 3.542686499585002e-08, + "loss": 1.3262, + "step": 162000 + }, + { + "epoch": 1.95, + "grad_norm": 7.910635510934859, + "learning_rate": 3.5377752582651526e-08, + "loss": 1.5593, + "step": 162003 + }, + { + "epoch": 1.95, + "grad_norm": 12.75601140793258, + "learning_rate": 3.532867417511909e-08, + "loss": 1.2243, + "step": 162006 + }, + { + "epoch": 1.95, + "grad_norm": 7.034012623982368, + "learning_rate": 3.5279629773420364e-08, + "loss": 1.1269, + "step": 162009 + }, + { + "epoch": 1.95, + "grad_norm": 7.289284813164186, + "learning_rate": 3.523061937772299e-08, + "loss": 1.1086, + "step": 162012 + }, + { + "epoch": 1.95, + "grad_norm": 9.809444810649012, + "learning_rate": 3.5181642988193485e-08, + "loss": 1.0592, + "step": 162015 + }, + { + "epoch": 1.95, + "grad_norm": 3.748345295860223, + "learning_rate": 3.5132700605000626e-08, + "loss": 1.3256, + "step": 162018 + }, + { + "epoch": 1.95, + "grad_norm": 6.1026267152563936, + "learning_rate": 3.508379222831093e-08, + "loss": 1.1212, + "step": 162021 + }, + { + "epoch": 1.95, + "grad_norm": 26.16303692529422, + "learning_rate": 3.503491785828983e-08, + "loss": 1.0215, + "step": 162024 + }, + { + "epoch": 1.95, + "grad_norm": 15.13309320424205, + "learning_rate": 3.4986077495104966e-08, + "loss": 1.2775, + "step": 162027 + }, + { + "epoch": 1.95, + "grad_norm": 10.465611655467407, + "learning_rate": 3.493727113892509e-08, + "loss": 1.0162, + "step": 162030 + }, + { + "epoch": 1.95, + "grad_norm": 11.866774392994502, + "learning_rate": 3.488849878991341e-08, + "loss": 1.5954, + "step": 162033 + }, + { + "epoch": 1.95, + "grad_norm": 42.590884663040065, + "learning_rate": 3.483976044823867e-08, + "loss": 1.0764, + "step": 162036 + }, + { + "epoch": 1.95, + "grad_norm": 5.177046184946306, + "learning_rate": 3.479105611406519e-08, + "loss": 1.3795, + "step": 162039 + }, + { + "epoch": 1.95, + "grad_norm": 5.298192301885314, + "learning_rate": 3.4742385787561726e-08, + "loss": 1.1779, + "step": 162042 + }, + { + "epoch": 1.95, + "grad_norm": 5.027843812011338, + "learning_rate": 3.469374946889148e-08, + "loss": 0.981, + "step": 162045 + }, + { + "epoch": 1.95, + "grad_norm": 11.684450408771143, + "learning_rate": 3.46451471582232e-08, + "loss": 0.9468, + "step": 162048 + }, + { + "epoch": 1.95, + "grad_norm": 2.9852822607491367, + "learning_rate": 3.459657885572232e-08, + "loss": 1.2338, + "step": 162051 + }, + { + "epoch": 1.95, + "grad_norm": 13.102248408816502, + "learning_rate": 3.454804456155203e-08, + "loss": 1.1764, + "step": 162054 + }, + { + "epoch": 1.95, + "grad_norm": 8.18209856446102, + "learning_rate": 3.449954427587998e-08, + "loss": 1.3215, + "step": 162057 + }, + { + "epoch": 1.95, + "grad_norm": 7.159499868416469, + "learning_rate": 3.4451077998872705e-08, + "loss": 0.8761, + "step": 162060 + }, + { + "epoch": 1.95, + "grad_norm": 11.8661858146755, + "learning_rate": 3.4402645730693405e-08, + "loss": 1.0913, + "step": 162063 + }, + { + "epoch": 1.95, + "grad_norm": 10.3503964759088, + "learning_rate": 3.435424747150862e-08, + "loss": 1.1179, + "step": 162066 + }, + { + "epoch": 1.95, + "grad_norm": 25.352020045897763, + "learning_rate": 3.430588322148265e-08, + "loss": 1.0211, + "step": 162069 + }, + { + "epoch": 1.95, + "grad_norm": 5.9768420955765675, + "learning_rate": 3.425755298078093e-08, + "loss": 1.5556, + "step": 162072 + }, + { + "epoch": 1.95, + "grad_norm": 9.542330109178664, + "learning_rate": 3.420925674956999e-08, + "loss": 1.3586, + "step": 162075 + }, + { + "epoch": 1.95, + "grad_norm": 10.040236405410797, + "learning_rate": 3.416099452801191e-08, + "loss": 1.2363, + "step": 162078 + }, + { + "epoch": 1.95, + "grad_norm": 10.154434471828655, + "learning_rate": 3.411276631627214e-08, + "loss": 1.1519, + "step": 162081 + }, + { + "epoch": 1.95, + "grad_norm": 5.192703550507716, + "learning_rate": 3.406457211451719e-08, + "loss": 1.1542, + "step": 162084 + }, + { + "epoch": 1.95, + "grad_norm": 4.230376346856109, + "learning_rate": 3.4016411922909166e-08, + "loss": 1.1038, + "step": 162087 + }, + { + "epoch": 1.95, + "grad_norm": 6.485221187505473, + "learning_rate": 3.396828574161459e-08, + "loss": 0.9903, + "step": 162090 + }, + { + "epoch": 1.95, + "grad_norm": 3.6865709589911706, + "learning_rate": 3.3920193570795565e-08, + "loss": 1.4296, + "step": 162093 + }, + { + "epoch": 1.95, + "grad_norm": 9.306989833073187, + "learning_rate": 3.387213541061751e-08, + "loss": 1.0256, + "step": 162096 + }, + { + "epoch": 1.95, + "grad_norm": 4.2296962223910155, + "learning_rate": 3.3824111261243633e-08, + "loss": 1.0081, + "step": 162099 + }, + { + "epoch": 1.95, + "grad_norm": 9.713945335753655, + "learning_rate": 3.377612112283823e-08, + "loss": 1.0077, + "step": 162102 + }, + { + "epoch": 1.95, + "grad_norm": 9.970754335979452, + "learning_rate": 3.3728164995565636e-08, + "loss": 1.3612, + "step": 162105 + }, + { + "epoch": 1.95, + "grad_norm": 14.563114253543072, + "learning_rate": 3.368024287958793e-08, + "loss": 1.0447, + "step": 162108 + }, + { + "epoch": 1.95, + "grad_norm": 8.52228274915659, + "learning_rate": 3.3632354775070543e-08, + "loss": 1.4122, + "step": 162111 + }, + { + "epoch": 1.95, + "grad_norm": 12.562459728785512, + "learning_rate": 3.358450068217667e-08, + "loss": 0.8486, + "step": 162114 + }, + { + "epoch": 1.95, + "grad_norm": 5.910855467540084, + "learning_rate": 3.3536680601068405e-08, + "loss": 0.9145, + "step": 162117 + }, + { + "epoch": 1.95, + "grad_norm": 9.703489299333965, + "learning_rate": 3.3488894531910065e-08, + "loss": 0.9185, + "step": 162120 + }, + { + "epoch": 1.95, + "grad_norm": 16.455590010965313, + "learning_rate": 3.344114247486263e-08, + "loss": 1.2498, + "step": 162123 + }, + { + "epoch": 1.95, + "grad_norm": 19.544079210175877, + "learning_rate": 3.3393424430091523e-08, + "loss": 1.3754, + "step": 162126 + }, + { + "epoch": 1.95, + "grad_norm": 98.56268589196743, + "learning_rate": 3.334574039775995e-08, + "loss": 1.3393, + "step": 162129 + }, + { + "epoch": 1.95, + "grad_norm": 3.342115116591819, + "learning_rate": 3.329809037802778e-08, + "loss": 1.0047, + "step": 162132 + }, + { + "epoch": 1.95, + "grad_norm": 7.348778113219228, + "learning_rate": 3.325047437105933e-08, + "loss": 1.0295, + "step": 162135 + }, + { + "epoch": 1.95, + "grad_norm": 12.162779711488572, + "learning_rate": 3.3202892377017794e-08, + "loss": 1.0127, + "step": 162138 + }, + { + "epoch": 1.95, + "grad_norm": 8.070321689203174, + "learning_rate": 3.315534439606416e-08, + "loss": 1.2447, + "step": 162141 + }, + { + "epoch": 1.95, + "grad_norm": 7.249971150255534, + "learning_rate": 3.3107830428361634e-08, + "loss": 1.1552, + "step": 162144 + }, + { + "epoch": 1.95, + "grad_norm": 7.081978632700992, + "learning_rate": 3.30603504740723e-08, + "loss": 1.2317, + "step": 162147 + }, + { + "epoch": 1.95, + "grad_norm": 6.193783075690187, + "learning_rate": 3.301290453335715e-08, + "loss": 0.641, + "step": 162150 + }, + { + "epoch": 1.95, + "grad_norm": 21.922480828179076, + "learning_rate": 3.296549260637938e-08, + "loss": 1.0778, + "step": 162153 + }, + { + "epoch": 1.95, + "grad_norm": 11.406097500239689, + "learning_rate": 3.2918114693299975e-08, + "loss": 1.2447, + "step": 162156 + }, + { + "epoch": 1.95, + "grad_norm": 3.7027767957016033, + "learning_rate": 3.287077079428103e-08, + "loss": 1.066, + "step": 162159 + }, + { + "epoch": 1.95, + "grad_norm": 11.338151277763284, + "learning_rate": 3.282346090948463e-08, + "loss": 1.1315, + "step": 162162 + }, + { + "epoch": 1.95, + "grad_norm": 11.874112433657896, + "learning_rate": 3.277618503907065e-08, + "loss": 0.8677, + "step": 162165 + }, + { + "epoch": 1.95, + "grad_norm": 5.512900377208181, + "learning_rate": 3.272894318320119e-08, + "loss": 1.2471, + "step": 162168 + }, + { + "epoch": 1.95, + "grad_norm": 14.810225637640276, + "learning_rate": 3.2681735342038336e-08, + "loss": 0.9051, + "step": 162171 + }, + { + "epoch": 1.95, + "grad_norm": 4.989499850290997, + "learning_rate": 3.263456151574196e-08, + "loss": 0.9451, + "step": 162174 + }, + { + "epoch": 1.95, + "grad_norm": 14.901130310160228, + "learning_rate": 3.258742170447416e-08, + "loss": 1.4229, + "step": 162177 + }, + { + "epoch": 1.95, + "grad_norm": 11.202236045032343, + "learning_rate": 3.25403159083959e-08, + "loss": 1.1328, + "step": 162180 + }, + { + "epoch": 1.95, + "grad_norm": 2.403088548351854, + "learning_rate": 3.249324412766597e-08, + "loss": 1.171, + "step": 162183 + }, + { + "epoch": 1.95, + "grad_norm": 6.170485433776127, + "learning_rate": 3.244620636244755e-08, + "loss": 0.715, + "step": 162186 + }, + { + "epoch": 1.95, + "grad_norm": 9.761220939900511, + "learning_rate": 3.2399202612898306e-08, + "loss": 0.9862, + "step": 162189 + }, + { + "epoch": 1.95, + "grad_norm": 6.416069444804057, + "learning_rate": 3.235223287918143e-08, + "loss": 0.8337, + "step": 162192 + }, + { + "epoch": 1.95, + "grad_norm": 5.774479843067191, + "learning_rate": 3.230529716145569e-08, + "loss": 1.0452, + "step": 162195 + }, + { + "epoch": 1.95, + "grad_norm": 12.130246401966506, + "learning_rate": 3.225839545988207e-08, + "loss": 0.8812, + "step": 162198 + }, + { + "epoch": 1.95, + "grad_norm": 25.92471428190662, + "learning_rate": 3.2211527774619335e-08, + "loss": 1.0736, + "step": 162201 + }, + { + "epoch": 1.95, + "grad_norm": 33.04410467651422, + "learning_rate": 3.2164694105828454e-08, + "loss": 1.0016, + "step": 162204 + }, + { + "epoch": 1.95, + "grad_norm": 5.371812493346003, + "learning_rate": 3.211789445366931e-08, + "loss": 1.3236, + "step": 162207 + }, + { + "epoch": 1.95, + "grad_norm": 9.62688219093606, + "learning_rate": 3.2071128818301766e-08, + "loss": 1.2269, + "step": 162210 + }, + { + "epoch": 1.95, + "grad_norm": 11.089138595529956, + "learning_rate": 3.2024397199884595e-08, + "loss": 0.9232, + "step": 162213 + }, + { + "epoch": 1.95, + "grad_norm": 11.544741383862226, + "learning_rate": 3.197769959857766e-08, + "loss": 1.159, + "step": 162216 + }, + { + "epoch": 1.95, + "grad_norm": 8.317085003955027, + "learning_rate": 3.193103601454084e-08, + "loss": 0.9025, + "step": 162219 + }, + { + "epoch": 1.95, + "grad_norm": 11.050735078812718, + "learning_rate": 3.188440644793289e-08, + "loss": 1.2289, + "step": 162222 + }, + { + "epoch": 1.95, + "grad_norm": 14.270005959195913, + "learning_rate": 3.183781089891258e-08, + "loss": 1.0695, + "step": 162225 + }, + { + "epoch": 1.95, + "grad_norm": 10.840783289129243, + "learning_rate": 3.1791249367639775e-08, + "loss": 1.2089, + "step": 162228 + }, + { + "epoch": 1.95, + "grad_norm": 5.978126184688374, + "learning_rate": 3.174472185427213e-08, + "loss": 1.0045, + "step": 162231 + }, + { + "epoch": 1.95, + "grad_norm": 20.685157735235478, + "learning_rate": 3.1698228358969516e-08, + "loss": 1.5211, + "step": 162234 + }, + { + "epoch": 1.95, + "grad_norm": 7.004141463874106, + "learning_rate": 3.16517688818907e-08, + "loss": 1.1815, + "step": 162237 + }, + { + "epoch": 1.95, + "grad_norm": 7.478587152428991, + "learning_rate": 3.160534342319332e-08, + "loss": 1.0171, + "step": 162240 + }, + { + "epoch": 1.95, + "grad_norm": 5.5962931803748175, + "learning_rate": 3.1558951983036156e-08, + "loss": 0.789, + "step": 162243 + }, + { + "epoch": 1.95, + "grad_norm": 16.65600924500951, + "learning_rate": 3.1512594561577956e-08, + "loss": 1.1875, + "step": 162246 + }, + { + "epoch": 1.95, + "grad_norm": 7.635806960245081, + "learning_rate": 3.146627115897749e-08, + "loss": 1.1063, + "step": 162249 + }, + { + "epoch": 1.95, + "grad_norm": 4.0198309608475755, + "learning_rate": 3.1419981775391294e-08, + "loss": 1.2745, + "step": 162252 + }, + { + "epoch": 1.95, + "grad_norm": 7.223007875105366, + "learning_rate": 3.137372641097702e-08, + "loss": 1.1019, + "step": 162255 + }, + { + "epoch": 1.95, + "grad_norm": 13.032207042052075, + "learning_rate": 3.132750506589455e-08, + "loss": 1.0216, + "step": 162258 + }, + { + "epoch": 1.95, + "grad_norm": 13.026501099842365, + "learning_rate": 3.128131774030041e-08, + "loss": 1.2099, + "step": 162261 + }, + { + "epoch": 1.95, + "grad_norm": 12.684272427419444, + "learning_rate": 3.123516443435226e-08, + "loss": 1.1795, + "step": 162264 + }, + { + "epoch": 1.95, + "grad_norm": 15.682980162898657, + "learning_rate": 3.118904514820886e-08, + "loss": 1.293, + "step": 162267 + }, + { + "epoch": 1.95, + "grad_norm": 11.38421014277755, + "learning_rate": 3.1142959882025645e-08, + "loss": 1.1102, + "step": 162270 + }, + { + "epoch": 1.95, + "grad_norm": 6.408627875915694, + "learning_rate": 3.1096908635960265e-08, + "loss": 1.0315, + "step": 162273 + }, + { + "epoch": 1.95, + "grad_norm": 5.095450665792581, + "learning_rate": 3.105089141017037e-08, + "loss": 1.3931, + "step": 162276 + }, + { + "epoch": 1.95, + "grad_norm": 7.101668698713703, + "learning_rate": 3.100490820481361e-08, + "loss": 1.5081, + "step": 162279 + }, + { + "epoch": 1.95, + "grad_norm": 3.5107573480279997, + "learning_rate": 3.095895902004653e-08, + "loss": 1.1344, + "step": 162282 + }, + { + "epoch": 1.95, + "grad_norm": 5.29770351955539, + "learning_rate": 3.091304385602567e-08, + "loss": 1.2252, + "step": 162285 + }, + { + "epoch": 1.95, + "grad_norm": 7.105888460876652, + "learning_rate": 3.0867162712906464e-08, + "loss": 0.9678, + "step": 162288 + }, + { + "epoch": 1.95, + "grad_norm": 3.1766772134599566, + "learning_rate": 3.082131559084878e-08, + "loss": 0.9407, + "step": 162291 + }, + { + "epoch": 1.95, + "grad_norm": 4.892637170338247, + "learning_rate": 3.0775502490005824e-08, + "loss": 1.1038, + "step": 162294 + }, + { + "epoch": 1.95, + "grad_norm": 10.969165761149702, + "learning_rate": 3.072972341053415e-08, + "loss": 1.1099, + "step": 162297 + }, + { + "epoch": 1.95, + "grad_norm": 13.025015697223283, + "learning_rate": 3.068397835259251e-08, + "loss": 0.9341, + "step": 162300 + }, + { + "epoch": 1.95, + "grad_norm": 8.076023232949703, + "learning_rate": 3.0638267316335236e-08, + "loss": 0.9954, + "step": 162303 + }, + { + "epoch": 1.95, + "grad_norm": 21.522726782284877, + "learning_rate": 3.0592590301918855e-08, + "loss": 1.3824, + "step": 162306 + }, + { + "epoch": 1.95, + "grad_norm": 3.7275169328487534, + "learning_rate": 3.0546947309498806e-08, + "loss": 0.8529, + "step": 162309 + }, + { + "epoch": 1.95, + "grad_norm": 6.665311311301467, + "learning_rate": 3.0501338339230526e-08, + "loss": 1.184, + "step": 162312 + }, + { + "epoch": 1.95, + "grad_norm": 3.747312318762318, + "learning_rate": 3.0455763391270544e-08, + "loss": 1.1378, + "step": 162315 + }, + { + "epoch": 1.95, + "grad_norm": 17.238683127348967, + "learning_rate": 3.0410222465774296e-08, + "loss": 1.1576, + "step": 162318 + }, + { + "epoch": 1.95, + "grad_norm": 8.425554306395941, + "learning_rate": 3.036471556289611e-08, + "loss": 1.0534, + "step": 162321 + }, + { + "epoch": 1.95, + "grad_norm": 5.171601517696776, + "learning_rate": 3.031924268279252e-08, + "loss": 1.1921, + "step": 162324 + }, + { + "epoch": 1.95, + "grad_norm": 12.000108539038305, + "learning_rate": 3.027380382561895e-08, + "loss": 1.0857, + "step": 162327 + }, + { + "epoch": 1.95, + "grad_norm": 9.237312933756842, + "learning_rate": 3.022839899152974e-08, + "loss": 1.1812, + "step": 162330 + }, + { + "epoch": 1.95, + "grad_norm": 14.915144831544847, + "learning_rate": 3.01830281806792e-08, + "loss": 1.2205, + "step": 162333 + }, + { + "epoch": 1.95, + "grad_norm": 2.9363735590634534, + "learning_rate": 3.0137691393222756e-08, + "loss": 1.15, + "step": 162336 + }, + { + "epoch": 1.95, + "grad_norm": 5.7995835131793125, + "learning_rate": 3.009238862931585e-08, + "loss": 1.236, + "step": 162339 + }, + { + "epoch": 1.95, + "grad_norm": 16.034207755943726, + "learning_rate": 3.0047119889111684e-08, + "loss": 1.2746, + "step": 162342 + }, + { + "epoch": 1.95, + "grad_norm": 4.957650404983211, + "learning_rate": 3.000188517276681e-08, + "loss": 1.1941, + "step": 162345 + }, + { + "epoch": 1.95, + "grad_norm": 8.157744997906368, + "learning_rate": 2.9956684480433316e-08, + "loss": 1.0908, + "step": 162348 + }, + { + "epoch": 1.95, + "grad_norm": 9.321386543418418, + "learning_rate": 2.9911517812267755e-08, + "loss": 0.8262, + "step": 162351 + }, + { + "epoch": 1.95, + "grad_norm": 9.922599533672365, + "learning_rate": 2.986638516842111e-08, + "loss": 1.2504, + "step": 162354 + }, + { + "epoch": 1.95, + "grad_norm": 9.643887827069888, + "learning_rate": 2.9821286549049924e-08, + "loss": 1.1874, + "step": 162357 + }, + { + "epoch": 1.95, + "grad_norm": 9.239523805573537, + "learning_rate": 2.9776221954308514e-08, + "loss": 1.2556, + "step": 162360 + }, + { + "epoch": 1.95, + "grad_norm": 8.953015092327702, + "learning_rate": 2.9731191384347878e-08, + "loss": 0.97, + "step": 162363 + }, + { + "epoch": 1.95, + "grad_norm": 4.722803138943358, + "learning_rate": 2.9686194839324555e-08, + "loss": 1.1735, + "step": 162366 + }, + { + "epoch": 1.95, + "grad_norm": 4.0619928681751505, + "learning_rate": 2.9641232319390646e-08, + "loss": 1.1566, + "step": 162369 + }, + { + "epoch": 1.95, + "grad_norm": 14.593532491980548, + "learning_rate": 2.9596303824700467e-08, + "loss": 1.0778, + "step": 162372 + }, + { + "epoch": 1.95, + "grad_norm": 4.17529960329806, + "learning_rate": 2.9551409355406123e-08, + "loss": 1.0496, + "step": 162375 + }, + { + "epoch": 1.95, + "grad_norm": 11.787019413630734, + "learning_rate": 2.9506548911661937e-08, + "loss": 1.0721, + "step": 162378 + }, + { + "epoch": 1.95, + "grad_norm": 14.566808404913559, + "learning_rate": 2.9461722493620004e-08, + "loss": 1.2352, + "step": 162381 + }, + { + "epoch": 1.95, + "grad_norm": 14.81248354314127, + "learning_rate": 2.9416930101433538e-08, + "loss": 1.2607, + "step": 162384 + }, + { + "epoch": 1.95, + "grad_norm": 4.142507946777476, + "learning_rate": 2.937217173525686e-08, + "loss": 1.0308, + "step": 162387 + }, + { + "epoch": 1.95, + "grad_norm": 7.066052539321256, + "learning_rate": 2.932744739524096e-08, + "loss": 1.3815, + "step": 162390 + }, + { + "epoch": 1.95, + "grad_norm": 3.663403116693869, + "learning_rate": 2.9282757081539048e-08, + "loss": 1.0301, + "step": 162393 + }, + { + "epoch": 1.95, + "grad_norm": 11.405847559066386, + "learning_rate": 2.9238100794303226e-08, + "loss": 1.2575, + "step": 162396 + }, + { + "epoch": 1.95, + "grad_norm": 9.797051462047731, + "learning_rate": 2.9193478533686703e-08, + "loss": 1.2352, + "step": 162399 + }, + { + "epoch": 1.95, + "grad_norm": 10.531187880331771, + "learning_rate": 2.914889029984158e-08, + "loss": 0.6359, + "step": 162402 + }, + { + "epoch": 1.95, + "grad_norm": 20.12196167029544, + "learning_rate": 2.910433609291996e-08, + "loss": 1.2576, + "step": 162405 + }, + { + "epoch": 1.95, + "grad_norm": 18.783381410717432, + "learning_rate": 2.905981591307283e-08, + "loss": 1.1983, + "step": 162408 + }, + { + "epoch": 1.95, + "grad_norm": 3.797090356525749, + "learning_rate": 2.9015329760453403e-08, + "loss": 1.268, + "step": 162411 + }, + { + "epoch": 1.95, + "grad_norm": 4.072874825888156, + "learning_rate": 2.897087763521378e-08, + "loss": 0.8232, + "step": 162414 + }, + { + "epoch": 1.95, + "grad_norm": 7.990982188276861, + "learning_rate": 2.8926459537504948e-08, + "loss": 1.2204, + "step": 162417 + }, + { + "epoch": 1.95, + "grad_norm": 9.32165646257519, + "learning_rate": 2.8882075467477898e-08, + "loss": 0.9351, + "step": 162420 + }, + { + "epoch": 1.95, + "grad_norm": 20.44652218493163, + "learning_rate": 2.8837725425284736e-08, + "loss": 1.3706, + "step": 162423 + }, + { + "epoch": 1.95, + "grad_norm": 6.364671285360402, + "learning_rate": 2.8793409411077556e-08, + "loss": 1.2132, + "step": 162426 + }, + { + "epoch": 1.95, + "grad_norm": 17.979620431069215, + "learning_rate": 2.874912742500624e-08, + "loss": 1.4094, + "step": 162429 + }, + { + "epoch": 1.95, + "grad_norm": 8.847985183410977, + "learning_rate": 2.870487946722289e-08, + "loss": 1.4286, + "step": 162432 + }, + { + "epoch": 1.95, + "grad_norm": 18.247634548571394, + "learning_rate": 2.8660665537877386e-08, + "loss": 1.5768, + "step": 162435 + }, + { + "epoch": 1.95, + "grad_norm": 10.118363173349465, + "learning_rate": 2.8616485637121827e-08, + "loss": 1.3175, + "step": 162438 + }, + { + "epoch": 1.95, + "grad_norm": 6.930851188622646, + "learning_rate": 2.8572339765107204e-08, + "loss": 1.678, + "step": 162441 + }, + { + "epoch": 1.95, + "grad_norm": 5.999996577806388, + "learning_rate": 2.852822792198229e-08, + "loss": 0.8217, + "step": 162444 + }, + { + "epoch": 1.95, + "grad_norm": 9.932111773949527, + "learning_rate": 2.8484150107899177e-08, + "loss": 1.2235, + "step": 162447 + }, + { + "epoch": 1.95, + "grad_norm": 4.69899160932474, + "learning_rate": 2.8440106323008864e-08, + "loss": 0.9787, + "step": 162450 + }, + { + "epoch": 1.95, + "grad_norm": 15.254304180816979, + "learning_rate": 2.8396096567460117e-08, + "loss": 1.0956, + "step": 162453 + }, + { + "epoch": 1.95, + "grad_norm": 2.3269104023100438, + "learning_rate": 2.835212084140393e-08, + "loss": 1.4004, + "step": 162456 + }, + { + "epoch": 1.95, + "grad_norm": 4.865123802621117, + "learning_rate": 2.8308179144989067e-08, + "loss": 1.2249, + "step": 162459 + }, + { + "epoch": 1.95, + "grad_norm": 9.050043423338034, + "learning_rate": 2.8264271478367632e-08, + "loss": 1.3148, + "step": 162462 + }, + { + "epoch": 1.95, + "grad_norm": 5.052898800988368, + "learning_rate": 2.8220397841688396e-08, + "loss": 1.2438, + "step": 162465 + }, + { + "epoch": 1.95, + "grad_norm": 8.718763009928239, + "learning_rate": 2.8176558235101237e-08, + "loss": 1.4407, + "step": 162468 + }, + { + "epoch": 1.95, + "grad_norm": 12.72109134497895, + "learning_rate": 2.8132752658754925e-08, + "loss": 1.1941, + "step": 162471 + }, + { + "epoch": 1.95, + "grad_norm": 6.140859060021564, + "learning_rate": 2.808898111280045e-08, + "loss": 1.3671, + "step": 162474 + }, + { + "epoch": 1.95, + "grad_norm": 11.565593533460568, + "learning_rate": 2.8045243597386585e-08, + "loss": 1.4554, + "step": 162477 + }, + { + "epoch": 1.95, + "grad_norm": 4.310957404226544, + "learning_rate": 2.8001540112662096e-08, + "loss": 0.9339, + "step": 162480 + }, + { + "epoch": 1.95, + "grad_norm": 8.81264215662234, + "learning_rate": 2.7957870658775755e-08, + "loss": 1.1266, + "step": 162483 + }, + { + "epoch": 1.95, + "grad_norm": 12.465648052851394, + "learning_rate": 2.791423523587744e-08, + "loss": 1.3749, + "step": 162486 + }, + { + "epoch": 1.95, + "grad_norm": 8.815517096445458, + "learning_rate": 2.7870633844117034e-08, + "loss": 1.2559, + "step": 162489 + }, + { + "epoch": 1.95, + "grad_norm": 7.116717421838327, + "learning_rate": 2.7827066483641085e-08, + "loss": 0.8797, + "step": 162492 + }, + { + "epoch": 1.95, + "grad_norm": 14.141516365631599, + "learning_rate": 2.7783533154599473e-08, + "loss": 0.9581, + "step": 162495 + }, + { + "epoch": 1.95, + "grad_norm": 7.52307479208469, + "learning_rate": 2.774003385714208e-08, + "loss": 1.1554, + "step": 162498 + }, + { + "epoch": 1.95, + "grad_norm": 9.825284468813702, + "learning_rate": 2.769656859141434e-08, + "loss": 1.1149, + "step": 162501 + }, + { + "epoch": 1.95, + "grad_norm": 5.863078062400952, + "learning_rate": 2.765313735756725e-08, + "loss": 1.1674, + "step": 162504 + }, + { + "epoch": 1.95, + "grad_norm": 10.97545346521437, + "learning_rate": 2.7609740155747356e-08, + "loss": 1.2121, + "step": 162507 + }, + { + "epoch": 1.95, + "grad_norm": 2.8451985986846196, + "learning_rate": 2.7566376986103426e-08, + "loss": 1.2384, + "step": 162510 + }, + { + "epoch": 1.95, + "grad_norm": 6.647175252456804, + "learning_rate": 2.7523047848783125e-08, + "loss": 1.429, + "step": 162513 + }, + { + "epoch": 1.95, + "grad_norm": 10.099171929406786, + "learning_rate": 2.7479752743935216e-08, + "loss": 1.1411, + "step": 162516 + }, + { + "epoch": 1.95, + "grad_norm": 14.081574021196946, + "learning_rate": 2.7436491671706255e-08, + "loss": 1.0726, + "step": 162519 + }, + { + "epoch": 1.95, + "grad_norm": 5.767561805825861, + "learning_rate": 2.739326463224501e-08, + "loss": 0.7677, + "step": 162522 + }, + { + "epoch": 1.95, + "grad_norm": 6.414482504002964, + "learning_rate": 2.7350071625698026e-08, + "loss": 1.0043, + "step": 162525 + }, + { + "epoch": 1.95, + "grad_norm": 8.600198022750648, + "learning_rate": 2.730691265221297e-08, + "loss": 1.4887, + "step": 162528 + }, + { + "epoch": 1.95, + "grad_norm": 2.985788341042316, + "learning_rate": 2.7263787711937495e-08, + "loss": 1.0381, + "step": 162531 + }, + { + "epoch": 1.95, + "grad_norm": 4.456479586809847, + "learning_rate": 2.7220696805019264e-08, + "loss": 1.2206, + "step": 162534 + }, + { + "epoch": 1.95, + "grad_norm": 8.064745581389069, + "learning_rate": 2.7177639931603715e-08, + "loss": 1.3235, + "step": 162537 + }, + { + "epoch": 1.95, + "grad_norm": 5.732574873047992, + "learning_rate": 2.713461709183962e-08, + "loss": 1.1193, + "step": 162540 + }, + { + "epoch": 1.95, + "grad_norm": 3.5173638123724222, + "learning_rate": 2.7091628285871306e-08, + "loss": 1.4255, + "step": 162543 + }, + { + "epoch": 1.95, + "grad_norm": 8.857224662867688, + "learning_rate": 2.7048673513848654e-08, + "loss": 0.8437, + "step": 162546 + }, + { + "epoch": 1.95, + "grad_norm": 3.9314002070865284, + "learning_rate": 2.7005752775915996e-08, + "loss": 1.0629, + "step": 162549 + }, + { + "epoch": 1.95, + "grad_norm": 10.446195817628858, + "learning_rate": 2.6962866072219874e-08, + "loss": 1.2038, + "step": 162552 + }, + { + "epoch": 1.95, + "grad_norm": 6.837017373291452, + "learning_rate": 2.6920013402907953e-08, + "loss": 1.1649, + "step": 162555 + }, + { + "epoch": 1.95, + "grad_norm": 3.1123881973043477, + "learning_rate": 2.687719476812567e-08, + "loss": 1.0496, + "step": 162558 + }, + { + "epoch": 1.95, + "grad_norm": 24.183907542795513, + "learning_rate": 2.6834410168018467e-08, + "loss": 0.8701, + "step": 162561 + }, + { + "epoch": 1.95, + "grad_norm": 21.57701781306655, + "learning_rate": 2.6791659602734e-08, + "loss": 1.128, + "step": 162564 + }, + { + "epoch": 1.95, + "grad_norm": 10.251560339892354, + "learning_rate": 2.67489430724166e-08, + "loss": 1.1149, + "step": 162567 + }, + { + "epoch": 1.95, + "grad_norm": 12.950896001967886, + "learning_rate": 2.670626057721393e-08, + "loss": 1.0391, + "step": 162570 + }, + { + "epoch": 1.95, + "grad_norm": 15.469215159358857, + "learning_rate": 2.66636121172692e-08, + "loss": 1.2754, + "step": 162573 + }, + { + "epoch": 1.95, + "grad_norm": 6.565977226292335, + "learning_rate": 2.6620997692730077e-08, + "loss": 0.8116, + "step": 162576 + }, + { + "epoch": 1.95, + "grad_norm": 6.372692858065774, + "learning_rate": 2.657841730373978e-08, + "loss": 1.0811, + "step": 162579 + }, + { + "epoch": 1.96, + "grad_norm": 7.268206439876293, + "learning_rate": 2.6535870950445964e-08, + "loss": 1.2562, + "step": 162582 + }, + { + "epoch": 1.96, + "grad_norm": 2.755846995035989, + "learning_rate": 2.6493358632992962e-08, + "loss": 1.1916, + "step": 162585 + }, + { + "epoch": 1.96, + "grad_norm": 10.313337656300948, + "learning_rate": 2.64508803515251e-08, + "loss": 0.8641, + "step": 162588 + }, + { + "epoch": 1.96, + "grad_norm": 10.423187481746014, + "learning_rate": 2.640843610618782e-08, + "loss": 1.018, + "step": 162591 + }, + { + "epoch": 1.96, + "grad_norm": 6.76207387371494, + "learning_rate": 2.6366025897126556e-08, + "loss": 1.0393, + "step": 162594 + }, + { + "epoch": 1.96, + "grad_norm": 18.95766035490388, + "learning_rate": 2.6323649724485645e-08, + "loss": 1.2395, + "step": 162597 + }, + { + "epoch": 1.96, + "grad_norm": 6.787468577682049, + "learning_rate": 2.62813075884083e-08, + "loss": 1.4371, + "step": 162600 + }, + { + "epoch": 1.96, + "grad_norm": 22.577703196140572, + "learning_rate": 2.6238999489042182e-08, + "loss": 1.3526, + "step": 162603 + }, + { + "epoch": 1.96, + "grad_norm": 3.2918742760564355, + "learning_rate": 2.61967254265294e-08, + "loss": 1.0646, + "step": 162606 + }, + { + "epoch": 1.96, + "grad_norm": 10.607652016025204, + "learning_rate": 2.6154485401014285e-08, + "loss": 1.2775, + "step": 162609 + }, + { + "epoch": 1.96, + "grad_norm": 7.752001873847775, + "learning_rate": 2.611227941264116e-08, + "loss": 0.9217, + "step": 162612 + }, + { + "epoch": 1.96, + "grad_norm": 9.17859624161968, + "learning_rate": 2.6070107461554363e-08, + "loss": 1.1722, + "step": 162615 + }, + { + "epoch": 1.96, + "grad_norm": 10.148096914829706, + "learning_rate": 2.6027969547897104e-08, + "loss": 1.1244, + "step": 162618 + }, + { + "epoch": 1.96, + "grad_norm": 10.453753523551036, + "learning_rate": 2.5985865671814824e-08, + "loss": 1.3838, + "step": 162621 + }, + { + "epoch": 1.96, + "grad_norm": 6.712664387070919, + "learning_rate": 2.5943795833449637e-08, + "loss": 1.1511, + "step": 162624 + }, + { + "epoch": 1.96, + "grad_norm": 3.031226195715848, + "learning_rate": 2.5901760032945865e-08, + "loss": 1.1371, + "step": 162627 + }, + { + "epoch": 1.96, + "grad_norm": 4.705901406414515, + "learning_rate": 2.5859758270446735e-08, + "loss": 0.9713, + "step": 162630 + }, + { + "epoch": 1.96, + "grad_norm": 9.895835647172872, + "learning_rate": 2.5817790546095455e-08, + "loss": 1.1537, + "step": 162633 + }, + { + "epoch": 1.96, + "grad_norm": 6.375692995767187, + "learning_rate": 2.5775856860035253e-08, + "loss": 0.7244, + "step": 162636 + }, + { + "epoch": 1.96, + "grad_norm": 10.698512714264886, + "learning_rate": 2.5733957212409344e-08, + "loss": 1.2978, + "step": 162639 + }, + { + "epoch": 1.96, + "grad_norm": 8.399761394335554, + "learning_rate": 2.5692091603360946e-08, + "loss": 1.1695, + "step": 162642 + }, + { + "epoch": 1.96, + "grad_norm": 10.86128131777445, + "learning_rate": 2.565026003303217e-08, + "loss": 1.1548, + "step": 162645 + }, + { + "epoch": 1.96, + "grad_norm": 3.6039330214185292, + "learning_rate": 2.5608462501567345e-08, + "loss": 0.6429, + "step": 162648 + }, + { + "epoch": 1.96, + "grad_norm": 7.1931966380254195, + "learning_rate": 2.5566699009107464e-08, + "loss": 0.8443, + "step": 162651 + }, + { + "epoch": 1.96, + "grad_norm": 7.106713067913888, + "learning_rate": 2.5524969555795755e-08, + "loss": 1.4107, + "step": 162654 + }, + { + "epoch": 1.96, + "grad_norm": 10.960997783817747, + "learning_rate": 2.5483274141774315e-08, + "loss": 1.2018, + "step": 162657 + }, + { + "epoch": 1.96, + "grad_norm": 4.411928615112748, + "learning_rate": 2.5441612767186373e-08, + "loss": 0.6651, + "step": 162660 + }, + { + "epoch": 1.96, + "grad_norm": 3.178768644397967, + "learning_rate": 2.5399985432171815e-08, + "loss": 1.2134, + "step": 162663 + }, + { + "epoch": 1.96, + "grad_norm": 9.0061084654284, + "learning_rate": 2.5358392136876074e-08, + "loss": 1.0222, + "step": 162666 + }, + { + "epoch": 1.96, + "grad_norm": 7.3148339887356375, + "learning_rate": 2.5316832881437937e-08, + "loss": 0.655, + "step": 162669 + }, + { + "epoch": 1.96, + "grad_norm": 3.8717394849144076, + "learning_rate": 2.5275307666001726e-08, + "loss": 1.0079, + "step": 162672 + }, + { + "epoch": 1.96, + "grad_norm": 12.675516697321429, + "learning_rate": 2.523381649070733e-08, + "loss": 1.3811, + "step": 162675 + }, + { + "epoch": 1.96, + "grad_norm": 9.613908558936897, + "learning_rate": 2.5192359355696862e-08, + "loss": 1.049, + "step": 162678 + }, + { + "epoch": 1.96, + "grad_norm": 7.433094376415474, + "learning_rate": 2.5150936261112425e-08, + "loss": 1.0792, + "step": 162681 + }, + { + "epoch": 1.96, + "grad_norm": 13.304167071678535, + "learning_rate": 2.510954720709502e-08, + "loss": 1.315, + "step": 162684 + }, + { + "epoch": 1.96, + "grad_norm": 5.666287122856744, + "learning_rate": 2.5068192193785645e-08, + "loss": 1.3338, + "step": 162687 + }, + { + "epoch": 1.96, + "grad_norm": 10.452621928786733, + "learning_rate": 2.50268712213253e-08, + "loss": 1.1577, + "step": 162690 + }, + { + "epoch": 1.96, + "grad_norm": 16.844072589610533, + "learning_rate": 2.498558428985498e-08, + "loss": 1.0601, + "step": 162693 + }, + { + "epoch": 1.96, + "grad_norm": 7.416774549535004, + "learning_rate": 2.4944331399516795e-08, + "loss": 1.1017, + "step": 162696 + }, + { + "epoch": 1.96, + "grad_norm": 5.262038151218511, + "learning_rate": 2.4903112550449526e-08, + "loss": 1.35, + "step": 162699 + }, + { + "epoch": 1.96, + "grad_norm": 7.987233056173591, + "learning_rate": 2.486192774279639e-08, + "loss": 1.6019, + "step": 162702 + }, + { + "epoch": 1.96, + "grad_norm": 3.0296464606161235, + "learning_rate": 2.482077697669505e-08, + "loss": 1.3378, + "step": 162705 + }, + { + "epoch": 1.96, + "grad_norm": 5.709518641868937, + "learning_rate": 2.4779660252287617e-08, + "loss": 1.1152, + "step": 162708 + }, + { + "epoch": 1.96, + "grad_norm": 33.49850341315587, + "learning_rate": 2.4738577569715093e-08, + "loss": 1.4277, + "step": 162711 + }, + { + "epoch": 1.96, + "grad_norm": 20.768589985116108, + "learning_rate": 2.4697528929116256e-08, + "loss": 1.1536, + "step": 162714 + }, + { + "epoch": 1.96, + "grad_norm": 10.509931433075517, + "learning_rate": 2.4656514330630987e-08, + "loss": 0.9896, + "step": 162717 + }, + { + "epoch": 1.96, + "grad_norm": 4.871172443472298, + "learning_rate": 2.4615533774400292e-08, + "loss": 1.1597, + "step": 162720 + }, + { + "epoch": 1.96, + "grad_norm": 11.483618775814664, + "learning_rate": 2.4574587260564054e-08, + "loss": 1.0204, + "step": 162723 + }, + { + "epoch": 1.96, + "grad_norm": 11.778877660270359, + "learning_rate": 2.4533674789261054e-08, + "loss": 1.0038, + "step": 162726 + }, + { + "epoch": 1.96, + "grad_norm": 9.232265615141966, + "learning_rate": 2.449279636063229e-08, + "loss": 0.8157, + "step": 162729 + }, + { + "epoch": 1.96, + "grad_norm": 3.506020869657341, + "learning_rate": 2.4451951974815425e-08, + "loss": 1.1105, + "step": 162732 + }, + { + "epoch": 1.96, + "grad_norm": 7.62473677763329, + "learning_rate": 2.4411141631951464e-08, + "loss": 0.9117, + "step": 162735 + }, + { + "epoch": 1.96, + "grad_norm": 6.314868918814684, + "learning_rate": 2.437036533217918e-08, + "loss": 1.0187, + "step": 162738 + }, + { + "epoch": 1.96, + "grad_norm": 9.633061384421989, + "learning_rate": 2.4329623075637355e-08, + "loss": 0.9708, + "step": 162741 + }, + { + "epoch": 1.96, + "grad_norm": 9.473613366120173, + "learning_rate": 2.4288914862465873e-08, + "loss": 0.8312, + "step": 162744 + }, + { + "epoch": 1.96, + "grad_norm": 6.212848422764722, + "learning_rate": 2.4248240692802406e-08, + "loss": 1.1036, + "step": 162747 + }, + { + "epoch": 1.96, + "grad_norm": 7.880413062605306, + "learning_rate": 2.4207600566786838e-08, + "loss": 1.3296, + "step": 162750 + }, + { + "epoch": 1.96, + "grad_norm": 8.019975045273544, + "learning_rate": 2.416699448455684e-08, + "loss": 1.3371, + "step": 162753 + }, + { + "epoch": 1.96, + "grad_norm": 10.530277244753066, + "learning_rate": 2.4126422446252296e-08, + "loss": 1.2152, + "step": 162756 + }, + { + "epoch": 1.96, + "grad_norm": 6.809273344921871, + "learning_rate": 2.4085884452010878e-08, + "loss": 1.0295, + "step": 162759 + }, + { + "epoch": 1.96, + "grad_norm": 8.460901377937466, + "learning_rate": 2.404538050197025e-08, + "loss": 0.9692, + "step": 162762 + }, + { + "epoch": 1.96, + "grad_norm": 6.317374555542628, + "learning_rate": 2.4004910596270302e-08, + "loss": 1.1761, + "step": 162765 + }, + { + "epoch": 1.96, + "grad_norm": 6.171440160840911, + "learning_rate": 2.3964474735048704e-08, + "loss": 1.241, + "step": 162768 + }, + { + "epoch": 1.96, + "grad_norm": 3.27627714832572, + "learning_rate": 2.392407291844201e-08, + "loss": 1.0622, + "step": 162771 + }, + { + "epoch": 1.96, + "grad_norm": 15.176714433257477, + "learning_rate": 2.3883705146588997e-08, + "loss": 1.3356, + "step": 162774 + }, + { + "epoch": 1.96, + "grad_norm": 5.524560224987888, + "learning_rate": 2.3843371419628446e-08, + "loss": 1.2496, + "step": 162777 + }, + { + "epoch": 1.96, + "grad_norm": 5.389722820821496, + "learning_rate": 2.3803071737696913e-08, + "loss": 1.4655, + "step": 162780 + }, + { + "epoch": 1.96, + "grad_norm": 13.40141584412853, + "learning_rate": 2.3762806100930957e-08, + "loss": 1.1177, + "step": 162783 + }, + { + "epoch": 1.96, + "grad_norm": 16.690835894431913, + "learning_rate": 2.3722574509470465e-08, + "loss": 1.4007, + "step": 162786 + }, + { + "epoch": 1.96, + "grad_norm": 11.926864634213132, + "learning_rate": 2.368237696345088e-08, + "loss": 0.9776, + "step": 162789 + }, + { + "epoch": 1.96, + "grad_norm": 4.146379642134824, + "learning_rate": 2.3642213463009877e-08, + "loss": 0.8125, + "step": 162792 + }, + { + "epoch": 1.96, + "grad_norm": 4.465221163975901, + "learning_rate": 2.360208400828401e-08, + "loss": 0.9933, + "step": 162795 + }, + { + "epoch": 1.96, + "grad_norm": 3.705491977636196, + "learning_rate": 2.3561988599410944e-08, + "loss": 1.4747, + "step": 162798 + }, + { + "epoch": 1.96, + "grad_norm": 9.691176295749326, + "learning_rate": 2.3521927236528353e-08, + "loss": 1.1484, + "step": 162801 + }, + { + "epoch": 1.96, + "grad_norm": 5.86881059708022, + "learning_rate": 2.3481899919770567e-08, + "loss": 1.1691, + "step": 162804 + }, + { + "epoch": 1.96, + "grad_norm": 4.768547646522635, + "learning_rate": 2.344190664927637e-08, + "loss": 0.9212, + "step": 162807 + }, + { + "epoch": 1.96, + "grad_norm": 11.233582839495707, + "learning_rate": 2.3401947425181204e-08, + "loss": 1.6421, + "step": 162810 + }, + { + "epoch": 1.96, + "grad_norm": 8.785394033278118, + "learning_rate": 2.336202224762163e-08, + "loss": 0.8872, + "step": 162813 + }, + { + "epoch": 1.96, + "grad_norm": 12.721437449266597, + "learning_rate": 2.3322131116734204e-08, + "loss": 1.0493, + "step": 162816 + }, + { + "epoch": 1.96, + "grad_norm": 4.345896476168218, + "learning_rate": 2.3282274032653263e-08, + "loss": 0.8283, + "step": 162819 + }, + { + "epoch": 1.96, + "grad_norm": 7.713084993250154, + "learning_rate": 2.3242450995518695e-08, + "loss": 1.1238, + "step": 162822 + }, + { + "epoch": 1.96, + "grad_norm": 3.57961152551799, + "learning_rate": 2.320266200546262e-08, + "loss": 1.2636, + "step": 162825 + }, + { + "epoch": 1.96, + "grad_norm": 10.437371629588064, + "learning_rate": 2.3162907062621586e-08, + "loss": 0.8134, + "step": 162828 + }, + { + "epoch": 1.96, + "grad_norm": 8.43197816490943, + "learning_rate": 2.312318616713327e-08, + "loss": 1.2302, + "step": 162831 + }, + { + "epoch": 1.96, + "grad_norm": 8.133821669595582, + "learning_rate": 2.30834993191309e-08, + "loss": 1.2294, + "step": 162834 + }, + { + "epoch": 1.96, + "grad_norm": 8.171455188468933, + "learning_rate": 2.3043846518751023e-08, + "loss": 1.08, + "step": 162837 + }, + { + "epoch": 1.96, + "grad_norm": 14.416656393523294, + "learning_rate": 2.3004227766130205e-08, + "loss": 0.9806, + "step": 162840 + }, + { + "epoch": 1.96, + "grad_norm": 3.1929024594919952, + "learning_rate": 2.296464306140056e-08, + "loss": 1.512, + "step": 162843 + }, + { + "epoch": 1.96, + "grad_norm": 8.332765408048566, + "learning_rate": 2.2925092404699757e-08, + "loss": 1.2538, + "step": 162846 + }, + { + "epoch": 1.96, + "grad_norm": 3.325130123241271, + "learning_rate": 2.288557579616102e-08, + "loss": 1.0072, + "step": 162849 + }, + { + "epoch": 1.96, + "grad_norm": 7.99831248493135, + "learning_rate": 2.284609323592091e-08, + "loss": 1.0453, + "step": 162852 + }, + { + "epoch": 1.96, + "grad_norm": 3.3646178259315658, + "learning_rate": 2.2806644724112647e-08, + "loss": 1.1994, + "step": 162855 + }, + { + "epoch": 1.96, + "grad_norm": 3.6512150204135447, + "learning_rate": 2.2767230260872796e-08, + "loss": 1.2873, + "step": 162858 + }, + { + "epoch": 1.96, + "grad_norm": 6.126888084648812, + "learning_rate": 2.2727849846333472e-08, + "loss": 0.961, + "step": 162861 + }, + { + "epoch": 1.96, + "grad_norm": 10.841848087182122, + "learning_rate": 2.268850348063123e-08, + "loss": 1.1431, + "step": 162864 + }, + { + "epoch": 1.96, + "grad_norm": 5.875205663375041, + "learning_rate": 2.2649191163898186e-08, + "loss": 1.1894, + "step": 162867 + }, + { + "epoch": 1.96, + "grad_norm": 16.77643073930227, + "learning_rate": 2.260991289626979e-08, + "loss": 1.1825, + "step": 162870 + }, + { + "epoch": 1.96, + "grad_norm": 8.97173006754162, + "learning_rate": 2.257066867788038e-08, + "loss": 1.1715, + "step": 162873 + }, + { + "epoch": 1.96, + "grad_norm": 6.461347447866875, + "learning_rate": 2.253145850886318e-08, + "loss": 0.712, + "step": 162876 + }, + { + "epoch": 1.96, + "grad_norm": 7.816104955551594, + "learning_rate": 2.2492282389352526e-08, + "loss": 1.36, + "step": 162879 + }, + { + "epoch": 1.96, + "grad_norm": 7.250898071421558, + "learning_rate": 2.2453140319481648e-08, + "loss": 0.9094, + "step": 162882 + }, + { + "epoch": 1.96, + "grad_norm": 8.137307385498545, + "learning_rate": 2.241403229938377e-08, + "loss": 1.0085, + "step": 162885 + }, + { + "epoch": 1.96, + "grad_norm": 11.31303279671278, + "learning_rate": 2.237495832919323e-08, + "loss": 1.0463, + "step": 162888 + }, + { + "epoch": 1.96, + "grad_norm": 5.874168720617136, + "learning_rate": 2.2335918409043257e-08, + "loss": 1.4251, + "step": 162891 + }, + { + "epoch": 1.96, + "grad_norm": 4.931400239295889, + "learning_rate": 2.2296912539067073e-08, + "loss": 1.1955, + "step": 162894 + }, + { + "epoch": 1.96, + "grad_norm": 4.333797351313113, + "learning_rate": 2.22579407193968e-08, + "loss": 1.0002, + "step": 162897 + }, + { + "epoch": 1.96, + "grad_norm": 6.781727141620923, + "learning_rate": 2.2219002950166768e-08, + "loss": 0.8091, + "step": 162900 + }, + { + "epoch": 1.96, + "grad_norm": 19.29848830473013, + "learning_rate": 2.21800992315091e-08, + "loss": 1.222, + "step": 162903 + }, + { + "epoch": 1.96, + "grad_norm": 9.197187571163436, + "learning_rate": 2.214122956355702e-08, + "loss": 1.3792, + "step": 162906 + }, + { + "epoch": 1.96, + "grad_norm": 3.4247273290884492, + "learning_rate": 2.2102393946442648e-08, + "loss": 1.0267, + "step": 162909 + }, + { + "epoch": 1.96, + "grad_norm": 8.259237957313074, + "learning_rate": 2.2063592380299202e-08, + "loss": 1.2526, + "step": 162912 + }, + { + "epoch": 1.96, + "grad_norm": 7.089508969381554, + "learning_rate": 2.202482486525881e-08, + "loss": 1.2339, + "step": 162915 + }, + { + "epoch": 1.96, + "grad_norm": 11.189506296176848, + "learning_rate": 2.1986091401452468e-08, + "loss": 1.2545, + "step": 162918 + }, + { + "epoch": 1.96, + "grad_norm": 10.209482085687059, + "learning_rate": 2.1947391989014522e-08, + "loss": 0.9235, + "step": 162921 + }, + { + "epoch": 1.96, + "grad_norm": 18.68394566491937, + "learning_rate": 2.1908726628075972e-08, + "loss": 1.2116, + "step": 162924 + }, + { + "epoch": 1.96, + "grad_norm": 9.97422859791835, + "learning_rate": 2.1870095318768935e-08, + "loss": 1.3655, + "step": 162927 + }, + { + "epoch": 1.96, + "grad_norm": 5.030151029074989, + "learning_rate": 2.183149806122442e-08, + "loss": 1.1607, + "step": 162930 + }, + { + "epoch": 1.96, + "grad_norm": 2.690794980074191, + "learning_rate": 2.1792934855575653e-08, + "loss": 1.0875, + "step": 162933 + }, + { + "epoch": 1.96, + "grad_norm": 6.9203559303162585, + "learning_rate": 2.175440570195253e-08, + "loss": 1.078, + "step": 162936 + }, + { + "epoch": 1.96, + "grad_norm": 6.830057084707299, + "learning_rate": 2.1715910600488277e-08, + "loss": 1.2883, + "step": 162939 + }, + { + "epoch": 1.96, + "grad_norm": 5.241329043020774, + "learning_rate": 2.16774495513139e-08, + "loss": 1.4131, + "step": 162942 + }, + { + "epoch": 1.96, + "grad_norm": 5.23762431386463, + "learning_rate": 2.1639022554559296e-08, + "loss": 1.2009, + "step": 162945 + }, + { + "epoch": 1.96, + "grad_norm": 6.571356905573772, + "learning_rate": 2.1600629610356583e-08, + "loss": 1.5244, + "step": 162948 + }, + { + "epoch": 1.96, + "grad_norm": 6.009959086023635, + "learning_rate": 2.1562270718836765e-08, + "loss": 1.1024, + "step": 162951 + }, + { + "epoch": 1.96, + "grad_norm": 5.7094731586718295, + "learning_rate": 2.1523945880130846e-08, + "loss": 1.5539, + "step": 162954 + }, + { + "epoch": 1.96, + "grad_norm": 7.509646290679186, + "learning_rate": 2.148565509436984e-08, + "loss": 1.1874, + "step": 162957 + }, + { + "epoch": 1.96, + "grad_norm": 11.794870690655726, + "learning_rate": 2.1447398361683635e-08, + "loss": 1.2611, + "step": 162960 + }, + { + "epoch": 1.96, + "grad_norm": 8.360938608222135, + "learning_rate": 2.1409175682203244e-08, + "loss": 1.1795, + "step": 162963 + }, + { + "epoch": 1.96, + "grad_norm": 10.50399247742215, + "learning_rate": 2.1370987056059667e-08, + "loss": 0.8449, + "step": 162966 + }, + { + "epoch": 1.96, + "grad_norm": 12.64808281101631, + "learning_rate": 2.1332832483381694e-08, + "loss": 1.6025, + "step": 162969 + }, + { + "epoch": 1.96, + "grad_norm": 9.058162546341213, + "learning_rate": 2.129471196430144e-08, + "loss": 0.8411, + "step": 162972 + }, + { + "epoch": 1.96, + "grad_norm": 6.256526142253385, + "learning_rate": 2.125662549894658e-08, + "loss": 1.1684, + "step": 162975 + }, + { + "epoch": 1.96, + "grad_norm": 6.245622078690468, + "learning_rate": 2.1218573087449236e-08, + "loss": 1.3318, + "step": 162978 + }, + { + "epoch": 1.96, + "grad_norm": 3.2650860934205927, + "learning_rate": 2.1180554729939294e-08, + "loss": 0.8569, + "step": 162981 + }, + { + "epoch": 1.96, + "grad_norm": 20.60252742643115, + "learning_rate": 2.114257042654444e-08, + "loss": 1.2694, + "step": 162984 + }, + { + "epoch": 1.96, + "grad_norm": 9.179039048945853, + "learning_rate": 2.1104620177396784e-08, + "loss": 1.1862, + "step": 162987 + }, + { + "epoch": 1.96, + "grad_norm": 14.00858178702258, + "learning_rate": 2.1066703982624003e-08, + "loss": 1.1937, + "step": 162990 + }, + { + "epoch": 1.96, + "grad_norm": 12.107157408873396, + "learning_rate": 2.1028821842357105e-08, + "loss": 0.9933, + "step": 162993 + }, + { + "epoch": 1.96, + "grad_norm": 7.64970033625758, + "learning_rate": 2.0990973756723764e-08, + "loss": 0.8388, + "step": 162996 + }, + { + "epoch": 1.96, + "grad_norm": 5.787392612373224, + "learning_rate": 2.0953159725853876e-08, + "loss": 1.1606, + "step": 162999 + }, + { + "epoch": 1.96, + "grad_norm": 8.532522556804647, + "learning_rate": 2.091537974987623e-08, + "loss": 1.1482, + "step": 163002 + }, + { + "epoch": 1.96, + "grad_norm": 7.742063169820301, + "learning_rate": 2.0877633828919606e-08, + "loss": 1.4688, + "step": 163005 + }, + { + "epoch": 1.96, + "grad_norm": 3.69269923551459, + "learning_rate": 2.0839921963113907e-08, + "loss": 0.9556, + "step": 163008 + }, + { + "epoch": 1.96, + "grad_norm": 4.02814394575937, + "learning_rate": 2.0802244152586803e-08, + "loss": 1.1544, + "step": 163011 + }, + { + "epoch": 1.96, + "grad_norm": 9.13752128536602, + "learning_rate": 2.0764600397467083e-08, + "loss": 1.2553, + "step": 163014 + }, + { + "epoch": 1.96, + "grad_norm": 6.696934719156367, + "learning_rate": 2.072699069788242e-08, + "loss": 1.206, + "step": 163017 + }, + { + "epoch": 1.96, + "grad_norm": 8.918803804697331, + "learning_rate": 2.0689415053963825e-08, + "loss": 1.1729, + "step": 163020 + }, + { + "epoch": 1.96, + "grad_norm": 9.733373902310566, + "learning_rate": 2.0651873465836745e-08, + "loss": 1.2574, + "step": 163023 + }, + { + "epoch": 1.96, + "grad_norm": 11.664793849951785, + "learning_rate": 2.0614365933629975e-08, + "loss": 1.0207, + "step": 163026 + }, + { + "epoch": 1.96, + "grad_norm": 27.62851615182796, + "learning_rate": 2.05768924574723e-08, + "loss": 0.9512, + "step": 163029 + }, + { + "epoch": 1.96, + "grad_norm": 7.52450362898534, + "learning_rate": 2.0539453037491387e-08, + "loss": 1.2898, + "step": 163032 + }, + { + "epoch": 1.96, + "grad_norm": 15.617216724330923, + "learning_rate": 2.050204767381381e-08, + "loss": 0.8527, + "step": 163035 + }, + { + "epoch": 1.96, + "grad_norm": 15.293280909431532, + "learning_rate": 2.046467636656835e-08, + "loss": 1.3663, + "step": 163038 + }, + { + "epoch": 1.96, + "grad_norm": 6.592340717683845, + "learning_rate": 2.0427339115882684e-08, + "loss": 1.0754, + "step": 163041 + }, + { + "epoch": 1.96, + "grad_norm": 3.153415794186155, + "learning_rate": 2.0390035921884488e-08, + "loss": 1.2227, + "step": 163044 + }, + { + "epoch": 1.96, + "grad_norm": 9.941550373570553, + "learning_rate": 2.035276678469922e-08, + "loss": 1.2494, + "step": 163047 + }, + { + "epoch": 1.96, + "grad_norm": 2.8121544639466336, + "learning_rate": 2.031553170445566e-08, + "loss": 1.1307, + "step": 163050 + }, + { + "epoch": 1.96, + "grad_norm": 8.202532535307284, + "learning_rate": 2.0278330681281488e-08, + "loss": 1.3716, + "step": 163053 + }, + { + "epoch": 1.96, + "grad_norm": 3.7174339035717763, + "learning_rate": 2.024116371530216e-08, + "loss": 1.1382, + "step": 163056 + }, + { + "epoch": 1.96, + "grad_norm": 6.954920286352653, + "learning_rate": 2.0204030806644236e-08, + "loss": 1.4721, + "step": 163059 + }, + { + "epoch": 1.96, + "grad_norm": 8.122444681739399, + "learning_rate": 2.0166931955436507e-08, + "loss": 0.9191, + "step": 163062 + }, + { + "epoch": 1.96, + "grad_norm": 5.246191081455839, + "learning_rate": 2.0129867161803317e-08, + "loss": 1.0799, + "step": 163065 + }, + { + "epoch": 1.96, + "grad_norm": 11.675312688506976, + "learning_rate": 2.009283642587234e-08, + "loss": 1.1452, + "step": 163068 + }, + { + "epoch": 1.96, + "grad_norm": 4.42614480428936, + "learning_rate": 2.0055839747770145e-08, + "loss": 1.1741, + "step": 163071 + }, + { + "epoch": 1.96, + "grad_norm": 6.931892201685514, + "learning_rate": 2.0018877127622182e-08, + "loss": 0.7263, + "step": 163074 + }, + { + "epoch": 1.96, + "grad_norm": 6.087316288196983, + "learning_rate": 1.998194856555613e-08, + "loss": 0.7837, + "step": 163077 + }, + { + "epoch": 1.96, + "grad_norm": 5.638603267768349, + "learning_rate": 1.9945054061695225e-08, + "loss": 1.0962, + "step": 163080 + }, + { + "epoch": 1.96, + "grad_norm": 9.991367610201507, + "learning_rate": 1.990819361616825e-08, + "loss": 0.8485, + "step": 163083 + }, + { + "epoch": 1.96, + "grad_norm": 9.987453430368253, + "learning_rate": 1.9871367229099547e-08, + "loss": 1.115, + "step": 163086 + }, + { + "epoch": 1.96, + "grad_norm": 10.430010530092282, + "learning_rate": 1.983457490061458e-08, + "loss": 1.2242, + "step": 163089 + }, + { + "epoch": 1.96, + "grad_norm": 11.069976283465698, + "learning_rate": 1.9797816630839905e-08, + "loss": 0.6709, + "step": 163092 + }, + { + "epoch": 1.96, + "grad_norm": 7.152380179781666, + "learning_rate": 1.9761092419899875e-08, + "loss": 0.8719, + "step": 163095 + }, + { + "epoch": 1.96, + "grad_norm": 6.90434948292702, + "learning_rate": 1.972440226792105e-08, + "loss": 1.581, + "step": 163098 + }, + { + "epoch": 1.96, + "grad_norm": 9.494185677402815, + "learning_rate": 1.9687746175027777e-08, + "loss": 1.0368, + "step": 163101 + }, + { + "epoch": 1.96, + "grad_norm": 8.089361237552156, + "learning_rate": 1.965112414134551e-08, + "loss": 1.0729, + "step": 163104 + }, + { + "epoch": 1.96, + "grad_norm": 8.834060018535908, + "learning_rate": 1.96145361669986e-08, + "loss": 0.8199, + "step": 163107 + }, + { + "epoch": 1.96, + "grad_norm": 6.778010271438105, + "learning_rate": 1.9577982252112494e-08, + "loss": 1.4318, + "step": 163110 + }, + { + "epoch": 1.96, + "grad_norm": 6.733363235388396, + "learning_rate": 1.9541462396811538e-08, + "loss": 1.2025, + "step": 163113 + }, + { + "epoch": 1.96, + "grad_norm": 9.94015155571326, + "learning_rate": 1.9504976601221194e-08, + "loss": 1.4478, + "step": 163116 + }, + { + "epoch": 1.96, + "grad_norm": 13.912808702971432, + "learning_rate": 1.946852486546469e-08, + "loss": 1.232, + "step": 163119 + }, + { + "epoch": 1.96, + "grad_norm": 12.89928656029275, + "learning_rate": 1.943210718966748e-08, + "loss": 0.7812, + "step": 163122 + }, + { + "epoch": 1.96, + "grad_norm": 4.714623322883283, + "learning_rate": 1.9395723573953917e-08, + "loss": 1.0112, + "step": 163125 + }, + { + "epoch": 1.96, + "grad_norm": 4.464852437306779, + "learning_rate": 1.935937401844723e-08, + "loss": 0.9265, + "step": 163128 + }, + { + "epoch": 1.96, + "grad_norm": 6.065308719004935, + "learning_rate": 1.9323058523271765e-08, + "loss": 1.0603, + "step": 163131 + }, + { + "epoch": 1.96, + "grad_norm": 6.316198608871104, + "learning_rate": 1.9286777088551866e-08, + "loss": 1.3813, + "step": 163134 + }, + { + "epoch": 1.96, + "grad_norm": 9.044730618322237, + "learning_rate": 1.925052971441188e-08, + "loss": 1.3215, + "step": 163137 + }, + { + "epoch": 1.96, + "grad_norm": 7.05389328933295, + "learning_rate": 1.921431640097393e-08, + "loss": 1.129, + "step": 163140 + }, + { + "epoch": 1.96, + "grad_norm": 3.1398400786313774, + "learning_rate": 1.9178137148363474e-08, + "loss": 1.1104, + "step": 163143 + }, + { + "epoch": 1.96, + "grad_norm": 4.850740187508774, + "learning_rate": 1.9141991956701523e-08, + "loss": 1.3473, + "step": 163146 + }, + { + "epoch": 1.96, + "grad_norm": 5.104218050407865, + "learning_rate": 1.9105880826113534e-08, + "loss": 1.2057, + "step": 163149 + }, + { + "epoch": 1.96, + "grad_norm": 2.988697641298687, + "learning_rate": 1.906980375672274e-08, + "loss": 1.1668, + "step": 163152 + }, + { + "epoch": 1.96, + "grad_norm": 2.7073908469688135, + "learning_rate": 1.9033760748651265e-08, + "loss": 1.0146, + "step": 163155 + }, + { + "epoch": 1.96, + "grad_norm": 8.993550631225597, + "learning_rate": 1.8997751802022347e-08, + "loss": 1.4074, + "step": 163158 + }, + { + "epoch": 1.96, + "grad_norm": 7.631057812957002, + "learning_rate": 1.8961776916958107e-08, + "loss": 1.1878, + "step": 163161 + }, + { + "epoch": 1.96, + "grad_norm": 5.885695269562691, + "learning_rate": 1.8925836093582894e-08, + "loss": 0.8519, + "step": 163164 + }, + { + "epoch": 1.96, + "grad_norm": 8.75144211128064, + "learning_rate": 1.888992933201883e-08, + "loss": 1.4021, + "step": 163167 + }, + { + "epoch": 1.96, + "grad_norm": 5.8151716845609345, + "learning_rate": 1.885405663238804e-08, + "loss": 1.2104, + "step": 163170 + }, + { + "epoch": 1.96, + "grad_norm": 9.584090034568648, + "learning_rate": 1.881821799481376e-08, + "loss": 1.3876, + "step": 163173 + }, + { + "epoch": 1.96, + "grad_norm": 8.11566393845199, + "learning_rate": 1.8782413419417e-08, + "loss": 1.1868, + "step": 163176 + }, + { + "epoch": 1.96, + "grad_norm": 3.016340858524786, + "learning_rate": 1.874664290631989e-08, + "loss": 0.9911, + "step": 163179 + }, + { + "epoch": 1.96, + "grad_norm": 8.723572700337696, + "learning_rate": 1.871090645564677e-08, + "loss": 0.8837, + "step": 163182 + }, + { + "epoch": 1.96, + "grad_norm": 7.979047914744239, + "learning_rate": 1.867520406751755e-08, + "loss": 1.0801, + "step": 163185 + }, + { + "epoch": 1.96, + "grad_norm": 3.9540268560360903, + "learning_rate": 1.8639535742054348e-08, + "loss": 1.1839, + "step": 163188 + }, + { + "epoch": 1.96, + "grad_norm": 11.660152554342368, + "learning_rate": 1.8603901479379293e-08, + "loss": 1.4622, + "step": 163191 + }, + { + "epoch": 1.96, + "grad_norm": 3.2574539912983873, + "learning_rate": 1.85683012796134e-08, + "loss": 1.1739, + "step": 163194 + }, + { + "epoch": 1.96, + "grad_norm": 7.257873038482243, + "learning_rate": 1.8532735142878788e-08, + "loss": 1.1667, + "step": 163197 + }, + { + "epoch": 1.96, + "grad_norm": 3.779121778786073, + "learning_rate": 1.8497203069297587e-08, + "loss": 0.9934, + "step": 163200 + }, + { + "epoch": 1.96, + "grad_norm": 7.360600822287748, + "learning_rate": 1.84617050589897e-08, + "loss": 1.0378, + "step": 163203 + }, + { + "epoch": 1.96, + "grad_norm": 3.7228735536655337, + "learning_rate": 1.8426241112076137e-08, + "loss": 1.2872, + "step": 163206 + }, + { + "epoch": 1.96, + "grad_norm": 7.262672994525509, + "learning_rate": 1.839081122867903e-08, + "loss": 1.3497, + "step": 163209 + }, + { + "epoch": 1.96, + "grad_norm": 27.28046485351587, + "learning_rate": 1.8355415408918276e-08, + "loss": 1.0454, + "step": 163212 + }, + { + "epoch": 1.96, + "grad_norm": 8.856168279287823, + "learning_rate": 1.8320053652914894e-08, + "loss": 1.1326, + "step": 163215 + }, + { + "epoch": 1.96, + "grad_norm": 13.284647444335565, + "learning_rate": 1.8284725960789895e-08, + "loss": 1.1771, + "step": 163218 + }, + { + "epoch": 1.96, + "grad_norm": 9.975526556001524, + "learning_rate": 1.8249432332664298e-08, + "loss": 1.1613, + "step": 163221 + }, + { + "epoch": 1.96, + "grad_norm": 4.504341405476826, + "learning_rate": 1.821417276865689e-08, + "loss": 1.2305, + "step": 163224 + }, + { + "epoch": 1.96, + "grad_norm": 6.4829738555664855, + "learning_rate": 1.8178947268890913e-08, + "loss": 0.755, + "step": 163227 + }, + { + "epoch": 1.96, + "grad_norm": 8.508287218166345, + "learning_rate": 1.8143755833482933e-08, + "loss": 0.6741, + "step": 163230 + }, + { + "epoch": 1.96, + "grad_norm": 8.334380849865976, + "learning_rate": 1.8108598462556194e-08, + "loss": 1.1684, + "step": 163233 + }, + { + "epoch": 1.96, + "grad_norm": 15.369498644114094, + "learning_rate": 1.807347515622837e-08, + "loss": 1.0075, + "step": 163236 + }, + { + "epoch": 1.96, + "grad_norm": 12.882175937165783, + "learning_rate": 1.803838591462048e-08, + "loss": 1.482, + "step": 163239 + }, + { + "epoch": 1.96, + "grad_norm": 25.031436970790875, + "learning_rate": 1.800333073785243e-08, + "loss": 1.0571, + "step": 163242 + }, + { + "epoch": 1.96, + "grad_norm": 5.33869654698444, + "learning_rate": 1.796830962604412e-08, + "loss": 1.3769, + "step": 163245 + }, + { + "epoch": 1.96, + "grad_norm": 7.63257574509442, + "learning_rate": 1.7933322579313238e-08, + "loss": 1.1171, + "step": 163248 + }, + { + "epoch": 1.96, + "grad_norm": 6.294421802786234, + "learning_rate": 1.7898369597780795e-08, + "loss": 0.9739, + "step": 163251 + }, + { + "epoch": 1.96, + "grad_norm": 5.380390628042527, + "learning_rate": 1.7863450681565587e-08, + "loss": 1.2352, + "step": 163254 + }, + { + "epoch": 1.96, + "grad_norm": 8.259384431715034, + "learning_rate": 1.7828565830787514e-08, + "loss": 0.9653, + "step": 163257 + }, + { + "epoch": 1.96, + "grad_norm": 7.6798929993948155, + "learning_rate": 1.7793715045564264e-08, + "loss": 1.2948, + "step": 163260 + }, + { + "epoch": 1.96, + "grad_norm": 5.264101541101287, + "learning_rate": 1.775889832601574e-08, + "loss": 1.0522, + "step": 163263 + }, + { + "epoch": 1.96, + "grad_norm": 7.97061344599039, + "learning_rate": 1.7724115672260732e-08, + "loss": 1.2727, + "step": 163266 + }, + { + "epoch": 1.96, + "grad_norm": 5.987403989129248, + "learning_rate": 1.768936708441804e-08, + "loss": 1.2006, + "step": 163269 + }, + { + "epoch": 1.96, + "grad_norm": 2.784348476266324, + "learning_rate": 1.7654652562605345e-08, + "loss": 1.2269, + "step": 163272 + }, + { + "epoch": 1.96, + "grad_norm": 17.6761093377676, + "learning_rate": 1.761997210694144e-08, + "loss": 1.3107, + "step": 163275 + }, + { + "epoch": 1.96, + "grad_norm": 12.519545448820876, + "learning_rate": 1.758532571754512e-08, + "loss": 1.2783, + "step": 163278 + }, + { + "epoch": 1.96, + "grad_norm": 6.393632481317287, + "learning_rate": 1.755071339453407e-08, + "loss": 0.9388, + "step": 163281 + }, + { + "epoch": 1.96, + "grad_norm": 12.416751980602223, + "learning_rate": 1.751613513802708e-08, + "loss": 1.2328, + "step": 163284 + }, + { + "epoch": 1.96, + "grad_norm": 4.552683797251322, + "learning_rate": 1.7481590948142945e-08, + "loss": 1.1146, + "step": 163287 + }, + { + "epoch": 1.96, + "grad_norm": 8.62727745365484, + "learning_rate": 1.744708082499713e-08, + "loss": 0.9252, + "step": 163290 + }, + { + "epoch": 1.96, + "grad_norm": 7.971501283894553, + "learning_rate": 1.741260476870954e-08, + "loss": 1.0263, + "step": 163293 + }, + { + "epoch": 1.96, + "grad_norm": 13.04118576179883, + "learning_rate": 1.7378162779396745e-08, + "loss": 1.0901, + "step": 163296 + }, + { + "epoch": 1.96, + "grad_norm": 3.5296537235882472, + "learning_rate": 1.7343754857176432e-08, + "loss": 0.8363, + "step": 163299 + }, + { + "epoch": 1.96, + "grad_norm": 9.461516260559874, + "learning_rate": 1.730938100216517e-08, + "loss": 0.9838, + "step": 163302 + }, + { + "epoch": 1.96, + "grad_norm": 9.438259865145968, + "learning_rate": 1.7275041214482868e-08, + "loss": 1.2085, + "step": 163305 + }, + { + "epoch": 1.96, + "grad_norm": 24.247672513415864, + "learning_rate": 1.7240735494243877e-08, + "loss": 1.2156, + "step": 163308 + }, + { + "epoch": 1.96, + "grad_norm": 6.703226691728573, + "learning_rate": 1.72064638415681e-08, + "loss": 0.9821, + "step": 163311 + }, + { + "epoch": 1.96, + "grad_norm": 6.857440081846892, + "learning_rate": 1.7172226256569892e-08, + "loss": 1.3804, + "step": 163314 + }, + { + "epoch": 1.96, + "grad_norm": 12.808971440579505, + "learning_rate": 1.713802273936693e-08, + "loss": 1.065, + "step": 163317 + }, + { + "epoch": 1.96, + "grad_norm": 9.342380055851441, + "learning_rate": 1.71038532900758e-08, + "loss": 1.2621, + "step": 163320 + }, + { + "epoch": 1.96, + "grad_norm": 4.190574420010616, + "learning_rate": 1.7069717908814176e-08, + "loss": 0.9008, + "step": 163323 + }, + { + "epoch": 1.96, + "grad_norm": 8.270833557659138, + "learning_rate": 1.7035616595696412e-08, + "loss": 1.3559, + "step": 163326 + }, + { + "epoch": 1.96, + "grad_norm": 9.791581303285723, + "learning_rate": 1.7001549350841306e-08, + "loss": 1.106, + "step": 163329 + }, + { + "epoch": 1.96, + "grad_norm": 4.6840618951687025, + "learning_rate": 1.6967516174364318e-08, + "loss": 0.9321, + "step": 163332 + }, + { + "epoch": 1.96, + "grad_norm": 5.138143843848427, + "learning_rate": 1.6933517066380913e-08, + "loss": 1.0953, + "step": 163335 + }, + { + "epoch": 1.96, + "grad_norm": 17.368919838977366, + "learning_rate": 1.689955202700766e-08, + "loss": 1.2569, + "step": 163338 + }, + { + "epoch": 1.96, + "grad_norm": 14.52556105989712, + "learning_rate": 1.6865621056360026e-08, + "loss": 1.1507, + "step": 163341 + }, + { + "epoch": 1.96, + "grad_norm": 15.110822293244786, + "learning_rate": 1.6831724154554584e-08, + "loss": 1.4823, + "step": 163344 + }, + { + "epoch": 1.96, + "grad_norm": 2.7827125905403873, + "learning_rate": 1.679786132170569e-08, + "loss": 1.2178, + "step": 163347 + }, + { + "epoch": 1.96, + "grad_norm": 6.503482422048106, + "learning_rate": 1.676403255793102e-08, + "loss": 1.0178, + "step": 163350 + }, + { + "epoch": 1.96, + "grad_norm": 5.28386332221206, + "learning_rate": 1.6730237863343825e-08, + "loss": 1.0299, + "step": 163353 + }, + { + "epoch": 1.96, + "grad_norm": 10.817327241591506, + "learning_rate": 1.669647723806067e-08, + "loss": 1.2302, + "step": 163356 + }, + { + "epoch": 1.96, + "grad_norm": 8.240278826535933, + "learning_rate": 1.6662750682197025e-08, + "loss": 1.1601, + "step": 163359 + }, + { + "epoch": 1.96, + "grad_norm": 6.059286413550945, + "learning_rate": 1.6629058195866133e-08, + "loss": 1.1223, + "step": 163362 + }, + { + "epoch": 1.96, + "grad_norm": 6.219270366475119, + "learning_rate": 1.6595399779185673e-08, + "loss": 1.0583, + "step": 163365 + }, + { + "epoch": 1.96, + "grad_norm": 20.186769285607884, + "learning_rate": 1.656177543226778e-08, + "loss": 1.319, + "step": 163368 + }, + { + "epoch": 1.96, + "grad_norm": 8.551050722522058, + "learning_rate": 1.6528185155229025e-08, + "loss": 1.3903, + "step": 163371 + }, + { + "epoch": 1.96, + "grad_norm": 15.588042941257111, + "learning_rate": 1.6494628948183766e-08, + "loss": 1.4235, + "step": 163374 + }, + { + "epoch": 1.96, + "grad_norm": 11.29717812722664, + "learning_rate": 1.6461106811246353e-08, + "loss": 1.6023, + "step": 163377 + }, + { + "epoch": 1.96, + "grad_norm": 14.529548188366567, + "learning_rate": 1.642761874453114e-08, + "loss": 0.9881, + "step": 163380 + }, + { + "epoch": 1.96, + "grad_norm": 8.960660903985485, + "learning_rate": 1.6394164748152475e-08, + "loss": 1.1143, + "step": 163383 + }, + { + "epoch": 1.96, + "grad_norm": 6.861955829356995, + "learning_rate": 1.6360744822224717e-08, + "loss": 1.1375, + "step": 163386 + }, + { + "epoch": 1.96, + "grad_norm": 8.575814963411057, + "learning_rate": 1.6327358966861106e-08, + "loss": 1.0593, + "step": 163389 + }, + { + "epoch": 1.96, + "grad_norm": 9.916407817219367, + "learning_rate": 1.6294007182175997e-08, + "loss": 0.9467, + "step": 163392 + }, + { + "epoch": 1.96, + "grad_norm": 5.066134892541655, + "learning_rate": 1.6260689468283742e-08, + "loss": 1.297, + "step": 163395 + }, + { + "epoch": 1.96, + "grad_norm": 21.267042885886447, + "learning_rate": 1.6227405825297583e-08, + "loss": 0.9713, + "step": 163398 + }, + { + "epoch": 1.96, + "grad_norm": 9.574416187642186, + "learning_rate": 1.6194156253330762e-08, + "loss": 0.919, + "step": 163401 + }, + { + "epoch": 1.96, + "grad_norm": 19.657940402768347, + "learning_rate": 1.6160940752496522e-08, + "loss": 1.0951, + "step": 163404 + }, + { + "epoch": 1.96, + "grad_norm": 12.417324208068372, + "learning_rate": 1.612775932291033e-08, + "loss": 1.1641, + "step": 163407 + }, + { + "epoch": 1.96, + "grad_norm": 5.50928183833278, + "learning_rate": 1.6094611964683204e-08, + "loss": 0.924, + "step": 163410 + }, + { + "epoch": 1.96, + "grad_norm": 7.253571437548919, + "learning_rate": 1.606149867792839e-08, + "loss": 1.0562, + "step": 163413 + }, + { + "epoch": 1.97, + "grad_norm": 7.81885190684086, + "learning_rate": 1.6028419462760236e-08, + "loss": 0.9193, + "step": 163416 + }, + { + "epoch": 1.97, + "grad_norm": 6.5129978288953145, + "learning_rate": 1.5995374319290878e-08, + "loss": 0.9985, + "step": 163419 + }, + { + "epoch": 1.97, + "grad_norm": 9.749552205286447, + "learning_rate": 1.5962363247632452e-08, + "loss": 1.2552, + "step": 163422 + }, + { + "epoch": 1.97, + "grad_norm": 15.762593734521314, + "learning_rate": 1.5929386247899303e-08, + "loss": 0.864, + "step": 163425 + }, + { + "epoch": 1.97, + "grad_norm": 8.23490375077399, + "learning_rate": 1.589644332020246e-08, + "loss": 1.064, + "step": 163428 + }, + { + "epoch": 1.97, + "grad_norm": 7.3988013678030695, + "learning_rate": 1.586353446465405e-08, + "loss": 1.1861, + "step": 163431 + }, + { + "epoch": 1.97, + "grad_norm": 9.937178087069517, + "learning_rate": 1.5830659681368432e-08, + "loss": 1.1483, + "step": 163434 + }, + { + "epoch": 1.97, + "grad_norm": 8.032023766856764, + "learning_rate": 1.5797818970455513e-08, + "loss": 1.2858, + "step": 163437 + }, + { + "epoch": 1.97, + "grad_norm": 15.320915826247912, + "learning_rate": 1.576501233202854e-08, + "loss": 0.9854, + "step": 163440 + }, + { + "epoch": 1.97, + "grad_norm": 2.292001367724669, + "learning_rate": 1.5732239766199642e-08, + "loss": 1.2213, + "step": 163443 + }, + { + "epoch": 1.97, + "grad_norm": 13.398646600637191, + "learning_rate": 1.5699501273079844e-08, + "loss": 1.12, + "step": 163446 + }, + { + "epoch": 1.97, + "grad_norm": 10.49664685913298, + "learning_rate": 1.5666796852781275e-08, + "loss": 1.1125, + "step": 163449 + }, + { + "epoch": 1.97, + "grad_norm": 10.450191314756772, + "learning_rate": 1.563412650541607e-08, + "loss": 1.234, + "step": 163452 + }, + { + "epoch": 1.97, + "grad_norm": 3.055059083379772, + "learning_rate": 1.560149023109525e-08, + "loss": 1.1307, + "step": 163455 + }, + { + "epoch": 1.97, + "grad_norm": 13.275223019154378, + "learning_rate": 1.556888802992984e-08, + "loss": 1.0782, + "step": 163458 + }, + { + "epoch": 1.97, + "grad_norm": 4.555818224041046, + "learning_rate": 1.553631990203197e-08, + "loss": 1.3171, + "step": 163461 + }, + { + "epoch": 1.97, + "grad_norm": 5.577549947831694, + "learning_rate": 1.5503785847511556e-08, + "loss": 0.9271, + "step": 163464 + }, + { + "epoch": 1.97, + "grad_norm": 8.455324664097349, + "learning_rate": 1.5471285866480723e-08, + "loss": 1.36, + "step": 163467 + }, + { + "epoch": 1.97, + "grad_norm": 8.916762904022413, + "learning_rate": 1.543881995904939e-08, + "loss": 1.0059, + "step": 163470 + }, + { + "epoch": 1.97, + "grad_norm": 11.919835943240967, + "learning_rate": 1.5406388125329685e-08, + "loss": 1.4138, + "step": 163473 + }, + { + "epoch": 1.97, + "grad_norm": 3.8754458995676253, + "learning_rate": 1.5373990365431523e-08, + "loss": 1.2487, + "step": 163476 + }, + { + "epoch": 1.97, + "grad_norm": 5.972810673299341, + "learning_rate": 1.5341626679464816e-08, + "loss": 1.1916, + "step": 163479 + }, + { + "epoch": 1.97, + "grad_norm": 10.120352071199516, + "learning_rate": 1.5309297067541696e-08, + "loss": 0.9808, + "step": 163482 + }, + { + "epoch": 1.97, + "grad_norm": 13.398386740235361, + "learning_rate": 1.5277001529769854e-08, + "loss": 0.7977, + "step": 163485 + }, + { + "epoch": 1.97, + "grad_norm": 5.20126015475948, + "learning_rate": 1.5244740066262532e-08, + "loss": 1.0954, + "step": 163488 + }, + { + "epoch": 1.97, + "grad_norm": 10.267686903279422, + "learning_rate": 1.5212512677128533e-08, + "loss": 0.831, + "step": 163491 + }, + { + "epoch": 1.97, + "grad_norm": 7.495976006053253, + "learning_rate": 1.518031936247666e-08, + "loss": 1.2433, + "step": 163494 + }, + { + "epoch": 1.97, + "grad_norm": 13.983817928275975, + "learning_rate": 1.5148160122417932e-08, + "loss": 1.1202, + "step": 163497 + }, + { + "epoch": 1.97, + "grad_norm": 11.016669065678972, + "learning_rate": 1.5116034957063376e-08, + "loss": 1.3082, + "step": 163500 + }, + { + "epoch": 1.97, + "grad_norm": 2.1858631037832117, + "learning_rate": 1.508394386651957e-08, + "loss": 0.6825, + "step": 163503 + }, + { + "epoch": 1.97, + "grad_norm": 10.745537258190987, + "learning_rate": 1.5051886850898645e-08, + "loss": 1.1157, + "step": 163506 + }, + { + "epoch": 1.97, + "grad_norm": 5.4050792570005965, + "learning_rate": 1.5019863910309406e-08, + "loss": 1.0396, + "step": 163509 + }, + { + "epoch": 1.97, + "grad_norm": 6.085640190326654, + "learning_rate": 1.4987875044860656e-08, + "loss": 0.6943, + "step": 163512 + }, + { + "epoch": 1.97, + "grad_norm": 4.297837186896029, + "learning_rate": 1.4955920254661193e-08, + "loss": 0.9772, + "step": 163515 + }, + { + "epoch": 1.97, + "grad_norm": 6.995825674379554, + "learning_rate": 1.4923999539819823e-08, + "loss": 0.9501, + "step": 163518 + }, + { + "epoch": 1.97, + "grad_norm": 7.496698379383206, + "learning_rate": 1.4892112900447564e-08, + "loss": 1.1668, + "step": 163521 + }, + { + "epoch": 1.97, + "grad_norm": 4.206924007653664, + "learning_rate": 1.486026033665211e-08, + "loss": 1.2706, + "step": 163524 + }, + { + "epoch": 1.97, + "grad_norm": 3.4881123817607302, + "learning_rate": 1.4828441848541153e-08, + "loss": 1.1742, + "step": 163527 + }, + { + "epoch": 1.97, + "grad_norm": 4.918099805383284, + "learning_rate": 1.4796657436223493e-08, + "loss": 0.9286, + "step": 163530 + }, + { + "epoch": 1.97, + "grad_norm": 6.819421577580535, + "learning_rate": 1.4764907099809046e-08, + "loss": 1.1833, + "step": 163533 + }, + { + "epoch": 1.97, + "grad_norm": 8.069509400651244, + "learning_rate": 1.4733190839405498e-08, + "loss": 1.0973, + "step": 163536 + }, + { + "epoch": 1.97, + "grad_norm": 11.526169377609822, + "learning_rate": 1.4701508655120544e-08, + "loss": 1.0859, + "step": 163539 + }, + { + "epoch": 1.97, + "grad_norm": 3.8650973495033782, + "learning_rate": 1.4669860547062985e-08, + "loss": 0.956, + "step": 163542 + }, + { + "epoch": 1.97, + "grad_norm": 5.529378635605561, + "learning_rate": 1.4638246515340515e-08, + "loss": 1.1291, + "step": 163545 + }, + { + "epoch": 1.97, + "grad_norm": 14.920426973003341, + "learning_rate": 1.4606666560060823e-08, + "loss": 1.3551, + "step": 163548 + }, + { + "epoch": 1.97, + "grad_norm": 24.28172437075538, + "learning_rate": 1.4575120681331601e-08, + "loss": 1.0892, + "step": 163551 + }, + { + "epoch": 1.97, + "grad_norm": 4.138922867599157, + "learning_rate": 1.454360887926054e-08, + "loss": 0.8184, + "step": 163554 + }, + { + "epoch": 1.97, + "grad_norm": 6.7797996606429045, + "learning_rate": 1.4512131153955334e-08, + "loss": 1.3455, + "step": 163557 + }, + { + "epoch": 1.97, + "grad_norm": 3.88858659778122, + "learning_rate": 1.4480687505524783e-08, + "loss": 1.0255, + "step": 163560 + }, + { + "epoch": 1.97, + "grad_norm": 6.742400709809512, + "learning_rate": 1.4449277934073247e-08, + "loss": 1.1621, + "step": 163563 + }, + { + "epoch": 1.97, + "grad_norm": 27.352331087191196, + "learning_rate": 1.441790243971064e-08, + "loss": 1.5193, + "step": 163566 + }, + { + "epoch": 1.97, + "grad_norm": 6.854666395286727, + "learning_rate": 1.4386561022542433e-08, + "loss": 1.1426, + "step": 163569 + }, + { + "epoch": 1.97, + "grad_norm": 11.883991468871065, + "learning_rate": 1.4355253682676318e-08, + "loss": 1.2169, + "step": 163572 + }, + { + "epoch": 1.97, + "grad_norm": 5.178168229337368, + "learning_rate": 1.4323980420218875e-08, + "loss": 0.8984, + "step": 163575 + }, + { + "epoch": 1.97, + "grad_norm": 16.407458979629507, + "learning_rate": 1.4292741235276685e-08, + "loss": 1.1307, + "step": 163578 + }, + { + "epoch": 1.97, + "grad_norm": 8.84002052122632, + "learning_rate": 1.4261536127956332e-08, + "loss": 1.0564, + "step": 163581 + }, + { + "epoch": 1.97, + "grad_norm": 4.080057112554027, + "learning_rate": 1.4230365098365506e-08, + "loss": 1.2147, + "step": 163584 + }, + { + "epoch": 1.97, + "grad_norm": 6.707028153344718, + "learning_rate": 1.4199228146608567e-08, + "loss": 0.9919, + "step": 163587 + }, + { + "epoch": 1.97, + "grad_norm": 5.114118273630269, + "learning_rate": 1.416812527279321e-08, + "loss": 1.0032, + "step": 163590 + }, + { + "epoch": 1.97, + "grad_norm": 2.980785165998518, + "learning_rate": 1.4137056477024902e-08, + "loss": 1.1008, + "step": 163593 + }, + { + "epoch": 1.97, + "grad_norm": 5.819434838830609, + "learning_rate": 1.4106021759410227e-08, + "loss": 1.2669, + "step": 163596 + }, + { + "epoch": 1.97, + "grad_norm": 3.2051323710682595, + "learning_rate": 1.4075021120054655e-08, + "loss": 1.1286, + "step": 163599 + }, + { + "epoch": 1.97, + "grad_norm": 7.326507830746774, + "learning_rate": 1.4044054559064768e-08, + "loss": 1.3521, + "step": 163602 + }, + { + "epoch": 1.97, + "grad_norm": 2.49907030905356, + "learning_rate": 1.4013122076544927e-08, + "loss": 0.9069, + "step": 163605 + }, + { + "epoch": 1.97, + "grad_norm": 3.48939155340104, + "learning_rate": 1.3982223672601713e-08, + "loss": 1.5868, + "step": 163608 + }, + { + "epoch": 1.97, + "grad_norm": 12.10205728034205, + "learning_rate": 1.3951359347339488e-08, + "loss": 1.1344, + "step": 163611 + }, + { + "epoch": 1.97, + "grad_norm": 16.82765180373771, + "learning_rate": 1.3920529100864832e-08, + "loss": 1.3271, + "step": 163614 + }, + { + "epoch": 1.97, + "grad_norm": 8.583453906497608, + "learning_rate": 1.3889732933282108e-08, + "loss": 1.4671, + "step": 163617 + }, + { + "epoch": 1.97, + "grad_norm": 13.772005894690535, + "learning_rate": 1.3858970844696785e-08, + "loss": 1.2552, + "step": 163620 + }, + { + "epoch": 1.97, + "grad_norm": 6.141767376786131, + "learning_rate": 1.3828242835214334e-08, + "loss": 1.0638, + "step": 163623 + }, + { + "epoch": 1.97, + "grad_norm": 13.995847894250964, + "learning_rate": 1.3797548904939118e-08, + "loss": 1.2647, + "step": 163626 + }, + { + "epoch": 1.97, + "grad_norm": 6.166187580522416, + "learning_rate": 1.3766889053975497e-08, + "loss": 0.8992, + "step": 163629 + }, + { + "epoch": 1.97, + "grad_norm": 4.519246563396702, + "learning_rate": 1.3736263282428941e-08, + "loss": 1.0159, + "step": 163632 + }, + { + "epoch": 1.97, + "grad_norm": 8.11473159775769, + "learning_rate": 1.3705671590402703e-08, + "loss": 1.1454, + "step": 163635 + }, + { + "epoch": 1.97, + "grad_norm": 6.26051951683304, + "learning_rate": 1.3675113978002252e-08, + "loss": 0.895, + "step": 163638 + }, + { + "epoch": 1.97, + "grad_norm": 5.656045205949327, + "learning_rate": 1.3644590445331952e-08, + "loss": 1.0267, + "step": 163641 + }, + { + "epoch": 1.97, + "grad_norm": 6.632490357919265, + "learning_rate": 1.361410099249505e-08, + "loss": 1.0498, + "step": 163644 + }, + { + "epoch": 1.97, + "grad_norm": 8.808030888363845, + "learning_rate": 1.358364561959591e-08, + "loss": 1.3768, + "step": 163647 + }, + { + "epoch": 1.97, + "grad_norm": 3.201090810778774, + "learning_rate": 1.355322432674e-08, + "loss": 1.1097, + "step": 163650 + }, + { + "epoch": 1.97, + "grad_norm": 5.204247919603694, + "learning_rate": 1.3522837114028354e-08, + "loss": 1.0484, + "step": 163653 + }, + { + "epoch": 1.97, + "grad_norm": 5.056015912614686, + "learning_rate": 1.349248398156644e-08, + "loss": 1.0038, + "step": 163656 + }, + { + "epoch": 1.97, + "grad_norm": 8.354083625217962, + "learning_rate": 1.3462164929458621e-08, + "loss": 1.2587, + "step": 163659 + }, + { + "epoch": 1.97, + "grad_norm": 14.821493461511997, + "learning_rate": 1.3431879957805926e-08, + "loss": 1.2646, + "step": 163662 + }, + { + "epoch": 1.97, + "grad_norm": 10.017030500469968, + "learning_rate": 1.3401629066713828e-08, + "loss": 1.4242, + "step": 163665 + }, + { + "epoch": 1.97, + "grad_norm": 10.776664660658012, + "learning_rate": 1.3371412256284466e-08, + "loss": 1.0134, + "step": 163668 + }, + { + "epoch": 1.97, + "grad_norm": 9.214319405052793, + "learning_rate": 1.3341229526622201e-08, + "loss": 0.9667, + "step": 163671 + }, + { + "epoch": 1.97, + "grad_norm": 10.315552992448083, + "learning_rate": 1.3311080877828064e-08, + "loss": 1.4888, + "step": 163674 + }, + { + "epoch": 1.97, + "grad_norm": 4.212851134614896, + "learning_rate": 1.3280966310006416e-08, + "loss": 1.1608, + "step": 163677 + }, + { + "epoch": 1.97, + "grad_norm": 6.776626144299723, + "learning_rate": 1.3250885823259396e-08, + "loss": 1.2548, + "step": 163680 + }, + { + "epoch": 1.97, + "grad_norm": 14.20477892622411, + "learning_rate": 1.3220839417690256e-08, + "loss": 1.2378, + "step": 163683 + }, + { + "epoch": 1.97, + "grad_norm": 10.599294476162116, + "learning_rate": 1.3190827093401137e-08, + "loss": 1.1826, + "step": 163686 + }, + { + "epoch": 1.97, + "grad_norm": 8.508437891427448, + "learning_rate": 1.316084885049418e-08, + "loss": 1.0103, + "step": 163689 + }, + { + "epoch": 1.97, + "grad_norm": 8.92004332044503, + "learning_rate": 1.3130904689072633e-08, + "loss": 1.2627, + "step": 163692 + }, + { + "epoch": 1.97, + "grad_norm": 5.473163183617532, + "learning_rate": 1.3100994609237527e-08, + "loss": 1.0434, + "step": 163695 + }, + { + "epoch": 1.97, + "grad_norm": 4.941210199313545, + "learning_rate": 1.3071118611092115e-08, + "loss": 1.4391, + "step": 163698 + }, + { + "epoch": 1.97, + "grad_norm": 7.368837641565376, + "learning_rate": 1.3041276694737425e-08, + "loss": 0.9522, + "step": 163701 + }, + { + "epoch": 1.97, + "grad_norm": 4.34446722506204, + "learning_rate": 1.301146886027449e-08, + "loss": 1.1656, + "step": 163704 + }, + { + "epoch": 1.97, + "grad_norm": 7.232004325065957, + "learning_rate": 1.2981695107807667e-08, + "loss": 1.3134, + "step": 163707 + }, + { + "epoch": 1.97, + "grad_norm": 8.344185924711722, + "learning_rate": 1.295195543743577e-08, + "loss": 1.094, + "step": 163710 + }, + { + "epoch": 1.97, + "grad_norm": 10.204540480907736, + "learning_rate": 1.2922249849262047e-08, + "loss": 0.9122, + "step": 163713 + }, + { + "epoch": 1.97, + "grad_norm": 8.967214945967513, + "learning_rate": 1.289257834338753e-08, + "loss": 1.0854, + "step": 163716 + }, + { + "epoch": 1.97, + "grad_norm": 5.413709456091174, + "learning_rate": 1.2862940919913247e-08, + "loss": 1.1217, + "step": 163719 + }, + { + "epoch": 1.97, + "grad_norm": 7.625388408267747, + "learning_rate": 1.283333757894023e-08, + "loss": 1.1518, + "step": 163722 + }, + { + "epoch": 1.97, + "grad_norm": 5.704631970292463, + "learning_rate": 1.280376832056951e-08, + "loss": 1.0515, + "step": 163725 + }, + { + "epoch": 1.97, + "grad_norm": 4.4399053422262895, + "learning_rate": 1.2774233144903226e-08, + "loss": 1.4367, + "step": 163728 + }, + { + "epoch": 1.97, + "grad_norm": 16.67376683069502, + "learning_rate": 1.2744732052040187e-08, + "loss": 1.0325, + "step": 163731 + }, + { + "epoch": 1.97, + "grad_norm": 10.392436616402499, + "learning_rate": 1.2715265042081426e-08, + "loss": 1.0163, + "step": 163734 + }, + { + "epoch": 1.97, + "grad_norm": 4.407086281671872, + "learning_rate": 1.2685832115129082e-08, + "loss": 0.8352, + "step": 163737 + }, + { + "epoch": 1.97, + "grad_norm": 5.019629943154911, + "learning_rate": 1.2656433271281965e-08, + "loss": 1.1123, + "step": 163740 + }, + { + "epoch": 1.97, + "grad_norm": 4.668593018544076, + "learning_rate": 1.2627068510641104e-08, + "loss": 1.0763, + "step": 163743 + }, + { + "epoch": 1.97, + "grad_norm": 23.200582891091017, + "learning_rate": 1.2597737833307533e-08, + "loss": 0.984, + "step": 163746 + }, + { + "epoch": 1.97, + "grad_norm": 12.419972238380803, + "learning_rate": 1.2568441239378948e-08, + "loss": 1.0142, + "step": 163749 + }, + { + "epoch": 1.97, + "grad_norm": 16.939854649353272, + "learning_rate": 1.253917872895749e-08, + "loss": 1.0973, + "step": 163752 + }, + { + "epoch": 1.97, + "grad_norm": 2.7439792616677012, + "learning_rate": 1.2509950302143081e-08, + "loss": 1.2918, + "step": 163755 + }, + { + "epoch": 1.97, + "grad_norm": 5.4863738204263495, + "learning_rate": 1.248075595903342e-08, + "loss": 1.1051, + "step": 163758 + }, + { + "epoch": 1.97, + "grad_norm": 12.268276835257645, + "learning_rate": 1.2451595699730645e-08, + "loss": 1.5852, + "step": 163761 + }, + { + "epoch": 1.97, + "grad_norm": 32.67556270870694, + "learning_rate": 1.2422469524332458e-08, + "loss": 0.7876, + "step": 163764 + }, + { + "epoch": 1.97, + "grad_norm": 7.137437294983688, + "learning_rate": 1.239337743293878e-08, + "loss": 0.9929, + "step": 163767 + }, + { + "epoch": 1.97, + "grad_norm": 3.6355704562831503, + "learning_rate": 1.2364319425649529e-08, + "loss": 1.1878, + "step": 163770 + }, + { + "epoch": 1.97, + "grad_norm": 2.8551507761645425, + "learning_rate": 1.2335295502562406e-08, + "loss": 0.939, + "step": 163773 + }, + { + "epoch": 1.97, + "grad_norm": 35.51933947976831, + "learning_rate": 1.230630566377844e-08, + "loss": 0.947, + "step": 163776 + }, + { + "epoch": 1.97, + "grad_norm": 8.004284159882262, + "learning_rate": 1.2277349909394221e-08, + "loss": 0.9991, + "step": 163779 + }, + { + "epoch": 1.97, + "grad_norm": 10.526889805023865, + "learning_rate": 1.2248428239510779e-08, + "loss": 0.6429, + "step": 163782 + }, + { + "epoch": 1.97, + "grad_norm": 14.53890034479207, + "learning_rate": 1.2219540654225814e-08, + "loss": 1.1108, + "step": 163785 + }, + { + "epoch": 1.97, + "grad_norm": 8.875311903495064, + "learning_rate": 1.2190687153638137e-08, + "loss": 1.1542, + "step": 163788 + }, + { + "epoch": 1.97, + "grad_norm": 15.244759575191472, + "learning_rate": 1.2161867737845444e-08, + "loss": 1.4423, + "step": 163791 + }, + { + "epoch": 1.97, + "grad_norm": 8.59335869688658, + "learning_rate": 1.213308240694655e-08, + "loss": 1.2246, + "step": 163794 + }, + { + "epoch": 1.97, + "grad_norm": 12.222525758761519, + "learning_rate": 1.2104331161040261e-08, + "loss": 1.0921, + "step": 163797 + }, + { + "epoch": 1.97, + "grad_norm": 8.717063920167147, + "learning_rate": 1.2075614000224278e-08, + "loss": 1.4498, + "step": 163800 + }, + { + "epoch": 1.97, + "grad_norm": 3.171316995365131, + "learning_rate": 1.2046930924597411e-08, + "loss": 1.1355, + "step": 163803 + }, + { + "epoch": 1.97, + "grad_norm": 20.172091663154674, + "learning_rate": 1.2018281934256248e-08, + "loss": 1.4924, + "step": 163806 + }, + { + "epoch": 1.97, + "grad_norm": 3.334050957035273, + "learning_rate": 1.198966702929849e-08, + "loss": 0.9537, + "step": 163809 + }, + { + "epoch": 1.97, + "grad_norm": 8.62376118052694, + "learning_rate": 1.1961086209821836e-08, + "loss": 0.9398, + "step": 163812 + }, + { + "epoch": 1.97, + "grad_norm": 8.81094214320646, + "learning_rate": 1.1932539475925097e-08, + "loss": 1.3005, + "step": 163815 + }, + { + "epoch": 1.97, + "grad_norm": 5.209009812773551, + "learning_rate": 1.1904026827704862e-08, + "loss": 1.0301, + "step": 163818 + }, + { + "epoch": 1.97, + "grad_norm": 30.131747045942447, + "learning_rate": 1.187554826525883e-08, + "loss": 1.1434, + "step": 163821 + }, + { + "epoch": 1.97, + "grad_norm": 10.75433544234797, + "learning_rate": 1.1847103788683589e-08, + "loss": 0.9684, + "step": 163824 + }, + { + "epoch": 1.97, + "grad_norm": 12.457353588368202, + "learning_rate": 1.1818693398075731e-08, + "loss": 0.9741, + "step": 163827 + }, + { + "epoch": 1.97, + "grad_norm": 10.145065798736459, + "learning_rate": 1.1790317093534065e-08, + "loss": 1.3852, + "step": 163830 + }, + { + "epoch": 1.97, + "grad_norm": 19.213552002831292, + "learning_rate": 1.176197487515407e-08, + "loss": 1.0654, + "step": 163833 + }, + { + "epoch": 1.97, + "grad_norm": 8.122159460073894, + "learning_rate": 1.1733666743032335e-08, + "loss": 0.6141, + "step": 163836 + }, + { + "epoch": 1.97, + "grad_norm": 7.472119979241465, + "learning_rate": 1.1705392697265449e-08, + "loss": 1.0236, + "step": 163839 + }, + { + "epoch": 1.97, + "grad_norm": 6.290460848717365, + "learning_rate": 1.1677152737951114e-08, + "loss": 1.2665, + "step": 163842 + }, + { + "epoch": 1.97, + "grad_norm": 4.657655963801806, + "learning_rate": 1.1648946865184807e-08, + "loss": 0.9109, + "step": 163845 + }, + { + "epoch": 1.97, + "grad_norm": 13.240403915231585, + "learning_rate": 1.1620775079063119e-08, + "loss": 1.2042, + "step": 163848 + }, + { + "epoch": 1.97, + "grad_norm": 8.313110188938667, + "learning_rate": 1.1592637379681526e-08, + "loss": 1.1667, + "step": 163851 + }, + { + "epoch": 1.97, + "grad_norm": 6.833641321402, + "learning_rate": 1.156453376713551e-08, + "loss": 0.7641, + "step": 163854 + }, + { + "epoch": 1.97, + "grad_norm": 11.967106411679344, + "learning_rate": 1.1536464241522772e-08, + "loss": 1.4583, + "step": 163857 + }, + { + "epoch": 1.97, + "grad_norm": 17.23201991421346, + "learning_rate": 1.1508428802938786e-08, + "loss": 1.0789, + "step": 163860 + }, + { + "epoch": 1.97, + "grad_norm": 5.49878800290236, + "learning_rate": 1.1480427451477928e-08, + "loss": 1.1914, + "step": 163863 + }, + { + "epoch": 1.97, + "grad_norm": 10.69499204798723, + "learning_rate": 1.145246018723678e-08, + "loss": 1.1719, + "step": 163866 + }, + { + "epoch": 1.97, + "grad_norm": 11.94450761879365, + "learning_rate": 1.1424527010310826e-08, + "loss": 0.8965, + "step": 163869 + }, + { + "epoch": 1.97, + "grad_norm": 4.876112247448234, + "learning_rate": 1.1396627920794434e-08, + "loss": 0.8448, + "step": 163872 + }, + { + "epoch": 1.97, + "grad_norm": 8.641205719140132, + "learning_rate": 1.1368762918784193e-08, + "loss": 1.2438, + "step": 163875 + }, + { + "epoch": 1.97, + "grad_norm": 9.569275511162578, + "learning_rate": 1.1340932004373362e-08, + "loss": 1.2445, + "step": 163878 + }, + { + "epoch": 1.97, + "grad_norm": 7.29647633066273, + "learning_rate": 1.131313517765964e-08, + "loss": 0.9435, + "step": 163881 + }, + { + "epoch": 1.97, + "grad_norm": 2.9807719759874436, + "learning_rate": 1.1285372438735177e-08, + "loss": 1.2914, + "step": 163884 + }, + { + "epoch": 1.97, + "grad_norm": 11.322159420614218, + "learning_rate": 1.125764378769656e-08, + "loss": 1.2329, + "step": 163887 + }, + { + "epoch": 1.97, + "grad_norm": 6.045991164611622, + "learning_rate": 1.1229949224637049e-08, + "loss": 1.3139, + "step": 163890 + }, + { + "epoch": 1.97, + "grad_norm": 7.587273745140899, + "learning_rate": 1.1202288749651013e-08, + "loss": 1.1416, + "step": 163893 + }, + { + "epoch": 1.97, + "grad_norm": 31.610777973232704, + "learning_rate": 1.1174662362835042e-08, + "loss": 1.4327, + "step": 163896 + }, + { + "epoch": 1.97, + "grad_norm": 11.716108090001484, + "learning_rate": 1.1147070064281285e-08, + "loss": 0.9384, + "step": 163899 + }, + { + "epoch": 1.97, + "grad_norm": 9.931559147883707, + "learning_rate": 1.1119511854085219e-08, + "loss": 1.2791, + "step": 163902 + }, + { + "epoch": 1.97, + "grad_norm": 7.055700487189928, + "learning_rate": 1.1091987732338994e-08, + "loss": 0.8523, + "step": 163905 + }, + { + "epoch": 1.97, + "grad_norm": 3.0159896746482358, + "learning_rate": 1.1064497699139198e-08, + "loss": 0.8122, + "step": 163908 + }, + { + "epoch": 1.97, + "grad_norm": 9.981350187357032, + "learning_rate": 1.1037041754577982e-08, + "loss": 1.1009, + "step": 163911 + }, + { + "epoch": 1.97, + "grad_norm": 5.0601562272458755, + "learning_rate": 1.1009619898748602e-08, + "loss": 1.1959, + "step": 163914 + }, + { + "epoch": 1.97, + "grad_norm": 8.280952337216293, + "learning_rate": 1.098223213174654e-08, + "loss": 0.657, + "step": 163917 + }, + { + "epoch": 1.97, + "grad_norm": 4.4232440934128086, + "learning_rate": 1.0954878453662831e-08, + "loss": 0.8977, + "step": 163920 + }, + { + "epoch": 1.97, + "grad_norm": 4.19822013751201, + "learning_rate": 1.0927558864592958e-08, + "loss": 1.066, + "step": 163923 + }, + { + "epoch": 1.97, + "grad_norm": 5.1625272330772365, + "learning_rate": 1.0900273364629067e-08, + "loss": 1.2924, + "step": 163926 + }, + { + "epoch": 1.97, + "grad_norm": 6.426097077059184, + "learning_rate": 1.0873021953864416e-08, + "loss": 1.1094, + "step": 163929 + }, + { + "epoch": 1.97, + "grad_norm": 2.6142402530938775, + "learning_rate": 1.0845804632392264e-08, + "loss": 0.8082, + "step": 163932 + }, + { + "epoch": 1.97, + "grad_norm": 4.282323891893017, + "learning_rate": 1.0818621400304762e-08, + "loss": 1.1453, + "step": 163935 + }, + { + "epoch": 1.97, + "grad_norm": 7.318539390531844, + "learning_rate": 1.0791472257696278e-08, + "loss": 1.365, + "step": 163938 + }, + { + "epoch": 1.97, + "grad_norm": 19.537270004089955, + "learning_rate": 1.0764357204656738e-08, + "loss": 1.0944, + "step": 163941 + }, + { + "epoch": 1.97, + "grad_norm": 4.71736706849676, + "learning_rate": 1.0737276241281624e-08, + "loss": 0.8349, + "step": 163944 + }, + { + "epoch": 1.97, + "grad_norm": 15.432997693848364, + "learning_rate": 1.0710229367661973e-08, + "loss": 0.7704, + "step": 163947 + }, + { + "epoch": 1.97, + "grad_norm": 34.31049887100974, + "learning_rate": 1.0683216583889933e-08, + "loss": 1.1201, + "step": 163950 + }, + { + "epoch": 1.97, + "grad_norm": 5.5330133910669845, + "learning_rate": 1.0656237890057652e-08, + "loss": 1.3513, + "step": 163953 + }, + { + "epoch": 1.97, + "grad_norm": 8.267781570174247, + "learning_rate": 1.062929328625839e-08, + "loss": 1.3039, + "step": 163956 + }, + { + "epoch": 1.97, + "grad_norm": 6.2211230930636106, + "learning_rate": 1.0602382772582075e-08, + "loss": 1.3309, + "step": 163959 + }, + { + "epoch": 1.97, + "grad_norm": 5.857790227121377, + "learning_rate": 1.0575506349121966e-08, + "loss": 1.2748, + "step": 163962 + }, + { + "epoch": 1.97, + "grad_norm": 17.31921219116392, + "learning_rate": 1.0548664015970212e-08, + "loss": 1.1645, + "step": 163965 + }, + { + "epoch": 1.97, + "grad_norm": 6.644072753848115, + "learning_rate": 1.0521855773216737e-08, + "loss": 1.0265, + "step": 163968 + }, + { + "epoch": 1.97, + "grad_norm": 5.477366926536793, + "learning_rate": 1.0495081620954806e-08, + "loss": 1.1368, + "step": 163971 + }, + { + "epoch": 1.97, + "grad_norm": 10.159318416150342, + "learning_rate": 1.0468341559274343e-08, + "loss": 1.151, + "step": 163974 + }, + { + "epoch": 1.97, + "grad_norm": 5.339475785443737, + "learning_rate": 1.0441635588267497e-08, + "loss": 1.4665, + "step": 163977 + }, + { + "epoch": 1.97, + "grad_norm": 8.078635171714375, + "learning_rate": 1.0414963708025306e-08, + "loss": 1.1071, + "step": 163980 + }, + { + "epoch": 1.97, + "grad_norm": 5.994394680104698, + "learning_rate": 1.038832591863881e-08, + "loss": 1.0045, + "step": 163983 + }, + { + "epoch": 1.97, + "grad_norm": 13.853188754874507, + "learning_rate": 1.0361722220197934e-08, + "loss": 1.5555, + "step": 163986 + }, + { + "epoch": 1.97, + "grad_norm": 4.298759813912048, + "learning_rate": 1.033515261279483e-08, + "loss": 1.0795, + "step": 163989 + }, + { + "epoch": 1.97, + "grad_norm": 3.5354677579209675, + "learning_rate": 1.0308617096519424e-08, + "loss": 1.2265, + "step": 163992 + }, + { + "epoch": 1.97, + "grad_norm": 14.544615996489659, + "learning_rate": 1.0282115671461645e-08, + "loss": 0.8134, + "step": 163995 + }, + { + "epoch": 1.97, + "grad_norm": 67.86285065934868, + "learning_rate": 1.0255648337713642e-08, + "loss": 0.9374, + "step": 163998 + }, + { + "epoch": 1.97, + "grad_norm": 6.727634270598872, + "learning_rate": 1.0229215095364231e-08, + "loss": 0.7821, + "step": 164001 + }, + { + "epoch": 1.97, + "grad_norm": 7.506351872799547, + "learning_rate": 1.0202815944505562e-08, + "loss": 0.973, + "step": 164004 + }, + { + "epoch": 1.97, + "grad_norm": 3.334526094247894, + "learning_rate": 1.0176450885225343e-08, + "loss": 1.013, + "step": 164007 + }, + { + "epoch": 1.97, + "grad_norm": 11.333554508072405, + "learning_rate": 1.015011991761461e-08, + "loss": 1.4211, + "step": 164010 + }, + { + "epoch": 1.97, + "grad_norm": 6.9250206788909505, + "learning_rate": 1.0123823041763292e-08, + "loss": 1.1408, + "step": 164013 + }, + { + "epoch": 1.97, + "grad_norm": 12.76316711360394, + "learning_rate": 1.0097560257760208e-08, + "loss": 1.1076, + "step": 164016 + }, + { + "epoch": 1.97, + "grad_norm": 14.743289594560519, + "learning_rate": 1.0071331565697506e-08, + "loss": 0.9328, + "step": 164019 + }, + { + "epoch": 1.97, + "grad_norm": 6.058771353454095, + "learning_rate": 1.0045136965661783e-08, + "loss": 1.0126, + "step": 164022 + }, + { + "epoch": 1.97, + "grad_norm": 7.312865331075118, + "learning_rate": 1.0018976457744078e-08, + "loss": 1.1269, + "step": 164025 + }, + { + "epoch": 1.97, + "grad_norm": 8.12737668258756, + "learning_rate": 9.992850042033208e-09, + "loss": 0.936, + "step": 164028 + }, + { + "epoch": 1.97, + "grad_norm": 5.999205332492955, + "learning_rate": 9.966757718617992e-09, + "loss": 1.0698, + "step": 164031 + }, + { + "epoch": 1.97, + "grad_norm": 9.82869065209681, + "learning_rate": 9.940699487588356e-09, + "loss": 1.0735, + "step": 164034 + }, + { + "epoch": 1.97, + "grad_norm": 31.67700126732081, + "learning_rate": 9.91467534903201e-09, + "loss": 1.1079, + "step": 164037 + }, + { + "epoch": 1.97, + "grad_norm": 2.3506831328672746, + "learning_rate": 9.88868530303888e-09, + "loss": 1.1482, + "step": 164040 + }, + { + "epoch": 1.97, + "grad_norm": 6.489565534115159, + "learning_rate": 9.862729349696675e-09, + "loss": 1.1543, + "step": 164043 + }, + { + "epoch": 1.97, + "grad_norm": 8.549861186688096, + "learning_rate": 9.836807489095323e-09, + "loss": 1.0816, + "step": 164046 + }, + { + "epoch": 1.97, + "grad_norm": 5.0072179140622985, + "learning_rate": 9.810919721322531e-09, + "loss": 1.0986, + "step": 164049 + }, + { + "epoch": 1.97, + "grad_norm": 3.4817154055215362, + "learning_rate": 9.785066046466008e-09, + "loss": 1.1154, + "step": 164052 + }, + { + "epoch": 1.97, + "grad_norm": 11.256140611629638, + "learning_rate": 9.759246464615679e-09, + "loss": 1.17, + "step": 164055 + }, + { + "epoch": 1.97, + "grad_norm": 6.007860688309986, + "learning_rate": 9.733460975858145e-09, + "loss": 1.1253, + "step": 164058 + }, + { + "epoch": 1.97, + "grad_norm": 4.304594097960546, + "learning_rate": 9.707709580281111e-09, + "loss": 1.3727, + "step": 164061 + }, + { + "epoch": 1.97, + "grad_norm": 3.2203542405149905, + "learning_rate": 9.681992277973396e-09, + "loss": 1.2882, + "step": 164064 + }, + { + "epoch": 1.97, + "grad_norm": 7.327833891157311, + "learning_rate": 9.656309069022706e-09, + "loss": 1.1676, + "step": 164067 + }, + { + "epoch": 1.97, + "grad_norm": 10.69756181183193, + "learning_rate": 9.630659953516752e-09, + "loss": 1.1531, + "step": 164070 + }, + { + "epoch": 1.97, + "grad_norm": 8.7796781208892, + "learning_rate": 9.605044931543239e-09, + "loss": 1.3745, + "step": 164073 + }, + { + "epoch": 1.97, + "grad_norm": 4.194357938738615, + "learning_rate": 9.579464003187655e-09, + "loss": 1.1999, + "step": 164076 + }, + { + "epoch": 1.97, + "grad_norm": 14.186326342496134, + "learning_rate": 9.553917168539928e-09, + "loss": 0.9752, + "step": 164079 + }, + { + "epoch": 1.97, + "grad_norm": 9.260091144690419, + "learning_rate": 9.528404427686654e-09, + "loss": 1.3047, + "step": 164082 + }, + { + "epoch": 1.97, + "grad_norm": 3.3486433118200147, + "learning_rate": 9.502925780713323e-09, + "loss": 0.8188, + "step": 164085 + }, + { + "epoch": 1.97, + "grad_norm": 4.7835319051332705, + "learning_rate": 9.477481227708751e-09, + "loss": 1.0792, + "step": 164088 + }, + { + "epoch": 1.97, + "grad_norm": 4.85397761170533, + "learning_rate": 9.452070768758426e-09, + "loss": 1.2048, + "step": 164091 + }, + { + "epoch": 1.97, + "grad_norm": 18.59348379478449, + "learning_rate": 9.426694403950055e-09, + "loss": 1.5113, + "step": 164094 + }, + { + "epoch": 1.97, + "grad_norm": 12.623953033894953, + "learning_rate": 9.401352133369123e-09, + "loss": 0.8856, + "step": 164097 + }, + { + "epoch": 1.97, + "grad_norm": 10.764472805607259, + "learning_rate": 9.376043957103343e-09, + "loss": 1.2841, + "step": 164100 + }, + { + "epoch": 1.97, + "grad_norm": 11.528544026877265, + "learning_rate": 9.350769875238198e-09, + "loss": 1.1307, + "step": 164103 + }, + { + "epoch": 1.97, + "grad_norm": 9.751204801751513, + "learning_rate": 9.325529887860285e-09, + "loss": 1.3192, + "step": 164106 + }, + { + "epoch": 1.97, + "grad_norm": 9.443608760918703, + "learning_rate": 9.300323995056204e-09, + "loss": 1.1154, + "step": 164109 + }, + { + "epoch": 1.97, + "grad_norm": 3.5911659556106317, + "learning_rate": 9.27515219691033e-09, + "loss": 1.2093, + "step": 164112 + }, + { + "epoch": 1.97, + "grad_norm": 12.18400254727114, + "learning_rate": 9.250014493511483e-09, + "loss": 1.2913, + "step": 164115 + }, + { + "epoch": 1.97, + "grad_norm": 10.355829592102292, + "learning_rate": 9.224910884941818e-09, + "loss": 1.1291, + "step": 164118 + }, + { + "epoch": 1.97, + "grad_norm": 2.5423489300712463, + "learning_rate": 9.199841371290153e-09, + "loss": 1.0071, + "step": 164121 + }, + { + "epoch": 1.97, + "grad_norm": 12.650339057253255, + "learning_rate": 9.174805952639753e-09, + "loss": 1.0539, + "step": 164124 + }, + { + "epoch": 1.97, + "grad_norm": 10.237915666697965, + "learning_rate": 9.14980462907833e-09, + "loss": 0.8515, + "step": 164127 + }, + { + "epoch": 1.97, + "grad_norm": 27.37344665714715, + "learning_rate": 9.124837400689146e-09, + "loss": 1.1646, + "step": 164130 + }, + { + "epoch": 1.97, + "grad_norm": 4.164431205332201, + "learning_rate": 9.099904267558802e-09, + "loss": 1.052, + "step": 164133 + }, + { + "epoch": 1.97, + "grad_norm": 15.042089248438593, + "learning_rate": 9.075005229770561e-09, + "loss": 1.0194, + "step": 164136 + }, + { + "epoch": 1.97, + "grad_norm": 14.466477776742412, + "learning_rate": 9.050140287411024e-09, + "loss": 1.1679, + "step": 164139 + }, + { + "epoch": 1.97, + "grad_norm": 6.251639585353737, + "learning_rate": 9.025309440565678e-09, + "loss": 0.8965, + "step": 164142 + }, + { + "epoch": 1.97, + "grad_norm": 5.22887078709942, + "learning_rate": 9.000512689317787e-09, + "loss": 1.2454, + "step": 164145 + }, + { + "epoch": 1.97, + "grad_norm": 5.415229144076469, + "learning_rate": 8.975750033751729e-09, + "loss": 1.2287, + "step": 164148 + }, + { + "epoch": 1.97, + "grad_norm": 10.71431745490266, + "learning_rate": 8.951021473952993e-09, + "loss": 1.1708, + "step": 164151 + }, + { + "epoch": 1.97, + "grad_norm": 8.363095673403121, + "learning_rate": 8.926327010005952e-09, + "loss": 1.0995, + "step": 164154 + }, + { + "epoch": 1.97, + "grad_norm": 5.096242430598192, + "learning_rate": 8.901666641993877e-09, + "loss": 0.88, + "step": 164157 + }, + { + "epoch": 1.97, + "grad_norm": 10.058912414986676, + "learning_rate": 8.877040370002254e-09, + "loss": 1.1247, + "step": 164160 + }, + { + "epoch": 1.97, + "grad_norm": 18.177110582359298, + "learning_rate": 8.85244819411435e-09, + "loss": 1.2199, + "step": 164163 + }, + { + "epoch": 1.97, + "grad_norm": 4.587748984697064, + "learning_rate": 8.82789011441454e-09, + "loss": 1.3617, + "step": 164166 + }, + { + "epoch": 1.97, + "grad_norm": 13.825385835408033, + "learning_rate": 8.80336613098609e-09, + "loss": 1.0135, + "step": 164169 + }, + { + "epoch": 1.97, + "grad_norm": 4.288160088452942, + "learning_rate": 8.778876243913382e-09, + "loss": 1.2773, + "step": 164172 + }, + { + "epoch": 1.97, + "grad_norm": 9.055422945483269, + "learning_rate": 8.754420453278568e-09, + "loss": 1.5504, + "step": 164175 + }, + { + "epoch": 1.97, + "grad_norm": 5.698187293094463, + "learning_rate": 8.729998759166025e-09, + "loss": 0.9442, + "step": 164178 + }, + { + "epoch": 1.97, + "grad_norm": 8.53766499286843, + "learning_rate": 8.705611161660133e-09, + "loss": 1.0311, + "step": 164181 + }, + { + "epoch": 1.97, + "grad_norm": 15.78962176068015, + "learning_rate": 8.681257660843046e-09, + "loss": 1.1459, + "step": 164184 + }, + { + "epoch": 1.97, + "grad_norm": 9.672722522634482, + "learning_rate": 8.65693825679803e-09, + "loss": 1.5451, + "step": 164187 + }, + { + "epoch": 1.97, + "grad_norm": 8.236921344898155, + "learning_rate": 8.632652949607246e-09, + "loss": 1.0196, + "step": 164190 + }, + { + "epoch": 1.97, + "grad_norm": 6.959833955369196, + "learning_rate": 8.608401739355066e-09, + "loss": 1.1442, + "step": 164193 + }, + { + "epoch": 1.97, + "grad_norm": 55.722606755804485, + "learning_rate": 8.584184626122538e-09, + "loss": 1.0906, + "step": 164196 + }, + { + "epoch": 1.97, + "grad_norm": 5.055079593350613, + "learning_rate": 8.56000160999404e-09, + "loss": 0.9782, + "step": 164199 + }, + { + "epoch": 1.97, + "grad_norm": 5.5663657718870505, + "learning_rate": 8.535852691051728e-09, + "loss": 0.9533, + "step": 164202 + }, + { + "epoch": 1.97, + "grad_norm": 6.14071159042621, + "learning_rate": 8.511737869376647e-09, + "loss": 1.0987, + "step": 164205 + }, + { + "epoch": 1.97, + "grad_norm": 9.456296564124264, + "learning_rate": 8.487657145053174e-09, + "loss": 1.0233, + "step": 164208 + }, + { + "epoch": 1.97, + "grad_norm": 11.44924053951165, + "learning_rate": 8.463610518162357e-09, + "loss": 1.1201, + "step": 164211 + }, + { + "epoch": 1.97, + "grad_norm": 4.81756505757632, + "learning_rate": 8.439597988785241e-09, + "loss": 1.2025, + "step": 164214 + }, + { + "epoch": 1.97, + "grad_norm": 4.9999191486409496, + "learning_rate": 8.415619557005094e-09, + "loss": 1.1802, + "step": 164217 + }, + { + "epoch": 1.97, + "grad_norm": 3.949740636852939, + "learning_rate": 8.391675222904073e-09, + "loss": 0.9738, + "step": 164220 + }, + { + "epoch": 1.97, + "grad_norm": 8.173401974181575, + "learning_rate": 8.36776498656322e-09, + "loss": 1.3978, + "step": 164223 + }, + { + "epoch": 1.97, + "grad_norm": 9.451284097532376, + "learning_rate": 8.343888848063585e-09, + "loss": 0.8675, + "step": 164226 + }, + { + "epoch": 1.97, + "grad_norm": 30.665809184647873, + "learning_rate": 8.320046807488436e-09, + "loss": 1.204, + "step": 164229 + }, + { + "epoch": 1.97, + "grad_norm": 17.958055531108446, + "learning_rate": 8.296238864916595e-09, + "loss": 0.9445, + "step": 164232 + }, + { + "epoch": 1.97, + "grad_norm": 7.073922549583243, + "learning_rate": 8.27246502043133e-09, + "loss": 1.0186, + "step": 164235 + }, + { + "epoch": 1.97, + "grad_norm": 3.656742852888795, + "learning_rate": 8.248725274113689e-09, + "loss": 1.3973, + "step": 164238 + }, + { + "epoch": 1.97, + "grad_norm": 7.960439142956375, + "learning_rate": 8.225019626043607e-09, + "loss": 0.841, + "step": 164241 + }, + { + "epoch": 1.97, + "grad_norm": 5.756802139029417, + "learning_rate": 8.20134807630213e-09, + "loss": 1.0471, + "step": 164244 + }, + { + "epoch": 1.98, + "grad_norm": 4.257419221576363, + "learning_rate": 8.177710624971414e-09, + "loss": 1.0575, + "step": 164247 + }, + { + "epoch": 1.98, + "grad_norm": 4.437530243632749, + "learning_rate": 8.154107272130284e-09, + "loss": 1.4058, + "step": 164250 + }, + { + "epoch": 1.98, + "grad_norm": 7.29845609277169, + "learning_rate": 8.1305380178609e-09, + "loss": 1.0708, + "step": 164253 + }, + { + "epoch": 1.98, + "grad_norm": 29.195969340814667, + "learning_rate": 8.107002862242086e-09, + "loss": 1.101, + "step": 164256 + }, + { + "epoch": 1.98, + "grad_norm": 9.770281579466664, + "learning_rate": 8.083501805354888e-09, + "loss": 1.0031, + "step": 164259 + }, + { + "epoch": 1.98, + "grad_norm": 3.3674098281298663, + "learning_rate": 8.060034847280352e-09, + "loss": 1.1158, + "step": 164262 + }, + { + "epoch": 1.98, + "grad_norm": 11.041623467774285, + "learning_rate": 8.036601988097303e-09, + "loss": 1.3086, + "step": 164265 + }, + { + "epoch": 1.98, + "grad_norm": 7.8937860283445, + "learning_rate": 8.01320322788679e-09, + "loss": 1.0466, + "step": 164268 + }, + { + "epoch": 1.98, + "grad_norm": 2.891579959060804, + "learning_rate": 7.989838566727637e-09, + "loss": 1.2497, + "step": 164271 + }, + { + "epoch": 1.98, + "grad_norm": 5.0677086430587135, + "learning_rate": 7.96650800470089e-09, + "loss": 0.7629, + "step": 164274 + }, + { + "epoch": 1.98, + "grad_norm": 9.412602257121284, + "learning_rate": 7.943211541884265e-09, + "loss": 0.9894, + "step": 164277 + }, + { + "epoch": 1.98, + "grad_norm": 16.042458602725812, + "learning_rate": 7.91994917835881e-09, + "loss": 1.2645, + "step": 164280 + }, + { + "epoch": 1.98, + "grad_norm": 6.787442379357126, + "learning_rate": 7.896720914203348e-09, + "loss": 0.9705, + "step": 164283 + }, + { + "epoch": 1.98, + "grad_norm": 11.531128307182444, + "learning_rate": 7.873526749497817e-09, + "loss": 1.1777, + "step": 164286 + }, + { + "epoch": 1.98, + "grad_norm": 10.681801020998664, + "learning_rate": 7.850366684319931e-09, + "loss": 0.938, + "step": 164289 + }, + { + "epoch": 1.98, + "grad_norm": 12.661799923357597, + "learning_rate": 7.827240718749629e-09, + "loss": 1.0599, + "step": 164292 + }, + { + "epoch": 1.98, + "grad_norm": 12.447535652309103, + "learning_rate": 7.804148852866844e-09, + "loss": 1.1738, + "step": 164295 + }, + { + "epoch": 1.98, + "grad_norm": 6.069725067797555, + "learning_rate": 7.781091086748182e-09, + "loss": 1.0807, + "step": 164298 + }, + { + "epoch": 1.98, + "grad_norm": 7.4967357315154, + "learning_rate": 7.758067420473581e-09, + "loss": 1.0704, + "step": 164301 + }, + { + "epoch": 1.98, + "grad_norm": 9.670624213258574, + "learning_rate": 7.735077854121864e-09, + "loss": 1.0664, + "step": 164304 + }, + { + "epoch": 1.98, + "grad_norm": 3.4336293496022647, + "learning_rate": 7.71212238777075e-09, + "loss": 1.3087, + "step": 164307 + }, + { + "epoch": 1.98, + "grad_norm": 4.987012030805534, + "learning_rate": 7.68920102150017e-09, + "loss": 1.0, + "step": 164310 + }, + { + "epoch": 1.98, + "grad_norm": 10.130177370359322, + "learning_rate": 7.666313755385623e-09, + "loss": 1.1975, + "step": 164313 + }, + { + "epoch": 1.98, + "grad_norm": 3.528743476162045, + "learning_rate": 7.643460589508156e-09, + "loss": 1.4544, + "step": 164316 + }, + { + "epoch": 1.98, + "grad_norm": 7.011614926085648, + "learning_rate": 7.620641523943261e-09, + "loss": 1.3829, + "step": 164319 + }, + { + "epoch": 1.98, + "grad_norm": 9.793937809447964, + "learning_rate": 7.597856558769767e-09, + "loss": 0.8598, + "step": 164322 + }, + { + "epoch": 1.98, + "grad_norm": 12.402070284473018, + "learning_rate": 7.575105694066498e-09, + "loss": 1.0817, + "step": 164325 + }, + { + "epoch": 1.98, + "grad_norm": 11.070603723339447, + "learning_rate": 7.552388929908949e-09, + "loss": 1.1318, + "step": 164328 + }, + { + "epoch": 1.98, + "grad_norm": 7.672062071378269, + "learning_rate": 7.529706266377056e-09, + "loss": 1.1941, + "step": 164331 + }, + { + "epoch": 1.98, + "grad_norm": 14.308993965881397, + "learning_rate": 7.507057703546316e-09, + "loss": 1.2627, + "step": 164334 + }, + { + "epoch": 1.98, + "grad_norm": 9.729932265505713, + "learning_rate": 7.484443241494443e-09, + "loss": 0.8664, + "step": 164337 + }, + { + "epoch": 1.98, + "grad_norm": 13.748260341426793, + "learning_rate": 7.461862880299154e-09, + "loss": 1.1616, + "step": 164340 + }, + { + "epoch": 1.98, + "grad_norm": 24.22380499182084, + "learning_rate": 7.439316620037051e-09, + "loss": 1.1514, + "step": 164343 + }, + { + "epoch": 1.98, + "grad_norm": 15.386895295581159, + "learning_rate": 7.416804460785854e-09, + "loss": 1.1323, + "step": 164346 + }, + { + "epoch": 1.98, + "grad_norm": 7.030460151976095, + "learning_rate": 7.394326402621055e-09, + "loss": 1.3891, + "step": 164349 + }, + { + "epoch": 1.98, + "grad_norm": 2.6824402777578524, + "learning_rate": 7.371882445620371e-09, + "loss": 1.0468, + "step": 164352 + }, + { + "epoch": 1.98, + "grad_norm": 6.868056398535088, + "learning_rate": 7.349472589860407e-09, + "loss": 1.2085, + "step": 164355 + }, + { + "epoch": 1.98, + "grad_norm": 11.15938264448075, + "learning_rate": 7.327096835417768e-09, + "loss": 0.8323, + "step": 164358 + }, + { + "epoch": 1.98, + "grad_norm": 10.023936227248571, + "learning_rate": 7.304755182366841e-09, + "loss": 1.2519, + "step": 164361 + }, + { + "epoch": 1.98, + "grad_norm": 3.1052838284303816, + "learning_rate": 7.282447630786449e-09, + "loss": 1.0529, + "step": 164364 + }, + { + "epoch": 1.98, + "grad_norm": 7.945533372597947, + "learning_rate": 7.260174180752089e-09, + "loss": 1.1806, + "step": 164367 + }, + { + "epoch": 1.98, + "grad_norm": 9.293786810886628, + "learning_rate": 7.2379348323392555e-09, + "loss": 0.953, + "step": 164370 + }, + { + "epoch": 1.98, + "grad_norm": 5.199304261009283, + "learning_rate": 7.2157295856234435e-09, + "loss": 1.3848, + "step": 164373 + }, + { + "epoch": 1.98, + "grad_norm": 18.14087514062005, + "learning_rate": 7.193558440681258e-09, + "loss": 1.2064, + "step": 164376 + }, + { + "epoch": 1.98, + "grad_norm": 13.938309892194624, + "learning_rate": 7.171421397588197e-09, + "loss": 1.3042, + "step": 164379 + }, + { + "epoch": 1.98, + "grad_norm": 5.216423673922157, + "learning_rate": 7.149318456418641e-09, + "loss": 1.4157, + "step": 164382 + }, + { + "epoch": 1.98, + "grad_norm": 10.893284855448659, + "learning_rate": 7.127249617250309e-09, + "loss": 0.9055, + "step": 164385 + }, + { + "epoch": 1.98, + "grad_norm": 7.000100744433709, + "learning_rate": 7.105214880156474e-09, + "loss": 1.1335, + "step": 164388 + }, + { + "epoch": 1.98, + "grad_norm": 6.5221610297659955, + "learning_rate": 7.083214245213743e-09, + "loss": 0.9392, + "step": 164391 + }, + { + "epoch": 1.98, + "grad_norm": 7.693329494654299, + "learning_rate": 7.0612477124964995e-09, + "loss": 1.5053, + "step": 164394 + }, + { + "epoch": 1.98, + "grad_norm": 9.269139706125072, + "learning_rate": 7.0393152820791285e-09, + "loss": 1.2811, + "step": 164397 + }, + { + "epoch": 1.98, + "grad_norm": 7.937048380298026, + "learning_rate": 7.017416954038237e-09, + "loss": 1.1309, + "step": 164400 + }, + { + "epoch": 1.98, + "grad_norm": 11.251408927286157, + "learning_rate": 6.995552728447097e-09, + "loss": 0.9681, + "step": 164403 + }, + { + "epoch": 1.98, + "grad_norm": 9.491834604477244, + "learning_rate": 6.973722605380095e-09, + "loss": 1.1466, + "step": 164406 + }, + { + "epoch": 1.98, + "grad_norm": 8.871967975351554, + "learning_rate": 6.9519265849127275e-09, + "loss": 1.5632, + "step": 164409 + }, + { + "epoch": 1.98, + "grad_norm": 6.9080735704075344, + "learning_rate": 6.9301646671182664e-09, + "loss": 1.3046, + "step": 164412 + }, + { + "epoch": 1.98, + "grad_norm": 7.024765190796671, + "learning_rate": 6.908436852072209e-09, + "loss": 1.1764, + "step": 164415 + }, + { + "epoch": 1.98, + "grad_norm": 11.224658120153324, + "learning_rate": 6.8867431398478294e-09, + "loss": 1.2942, + "step": 164418 + }, + { + "epoch": 1.98, + "grad_norm": 8.089050212488184, + "learning_rate": 6.865083530520622e-09, + "loss": 1.1513, + "step": 164421 + }, + { + "epoch": 1.98, + "grad_norm": 12.658090049538789, + "learning_rate": 6.843458024162752e-09, + "loss": 1.2677, + "step": 164424 + }, + { + "epoch": 1.98, + "grad_norm": 6.202674126721157, + "learning_rate": 6.8218666208474945e-09, + "loss": 0.9337, + "step": 164427 + }, + { + "epoch": 1.98, + "grad_norm": 3.379198496450421, + "learning_rate": 6.800309320651455e-09, + "loss": 1.1738, + "step": 164430 + }, + { + "epoch": 1.98, + "grad_norm": 6.242120372550142, + "learning_rate": 6.778786123645686e-09, + "loss": 1.0485, + "step": 164433 + }, + { + "epoch": 1.98, + "grad_norm": 4.721079228361114, + "learning_rate": 6.757297029903464e-09, + "loss": 1.1652, + "step": 164436 + }, + { + "epoch": 1.98, + "grad_norm": 6.929628353838583, + "learning_rate": 6.735842039500285e-09, + "loss": 1.0652, + "step": 164439 + }, + { + "epoch": 1.98, + "grad_norm": 11.9940895439972, + "learning_rate": 6.71442115250831e-09, + "loss": 1.2199, + "step": 164442 + }, + { + "epoch": 1.98, + "grad_norm": 5.045741961627339, + "learning_rate": 6.693034368999707e-09, + "loss": 1.0254, + "step": 164445 + }, + { + "epoch": 1.98, + "grad_norm": 2.594543446794421, + "learning_rate": 6.6716816890477486e-09, + "loss": 1.2249, + "step": 164448 + }, + { + "epoch": 1.98, + "grad_norm": 7.0441101481888655, + "learning_rate": 6.6503631127268205e-09, + "loss": 1.1426, + "step": 164451 + }, + { + "epoch": 1.98, + "grad_norm": 9.71325260361882, + "learning_rate": 6.6290786401079774e-09, + "loss": 1.405, + "step": 164454 + }, + { + "epoch": 1.98, + "grad_norm": 4.615118892072228, + "learning_rate": 6.6078282712644935e-09, + "loss": 1.1556, + "step": 164457 + }, + { + "epoch": 1.98, + "grad_norm": 7.850128991138552, + "learning_rate": 6.5866120062685336e-09, + "loss": 1.0369, + "step": 164460 + }, + { + "epoch": 1.98, + "grad_norm": 12.209852954263926, + "learning_rate": 6.565429845194482e-09, + "loss": 1.1447, + "step": 164463 + }, + { + "epoch": 1.98, + "grad_norm": 6.363962976754, + "learning_rate": 6.544281788111173e-09, + "loss": 1.0909, + "step": 164466 + }, + { + "epoch": 1.98, + "grad_norm": 13.180779265829736, + "learning_rate": 6.523167835094102e-09, + "loss": 1.228, + "step": 164469 + }, + { + "epoch": 1.98, + "grad_norm": 13.359778814466596, + "learning_rate": 6.502087986212102e-09, + "loss": 1.3695, + "step": 164472 + }, + { + "epoch": 1.98, + "grad_norm": 15.734645852431491, + "learning_rate": 6.481042241540669e-09, + "loss": 0.9006, + "step": 164475 + }, + { + "epoch": 1.98, + "grad_norm": 4.195066732269422, + "learning_rate": 6.4600306011486365e-09, + "loss": 1.1983, + "step": 164478 + }, + { + "epoch": 1.98, + "grad_norm": 14.17560190041806, + "learning_rate": 6.4390530651092796e-09, + "loss": 0.9987, + "step": 164481 + }, + { + "epoch": 1.98, + "grad_norm": 10.663706818013326, + "learning_rate": 6.418109633493652e-09, + "loss": 1.124, + "step": 164484 + }, + { + "epoch": 1.98, + "grad_norm": 11.377016111448633, + "learning_rate": 6.397200306372808e-09, + "loss": 1.0008, + "step": 164487 + }, + { + "epoch": 1.98, + "grad_norm": 9.905399022405634, + "learning_rate": 6.376325083818913e-09, + "loss": 1.054, + "step": 164490 + }, + { + "epoch": 1.98, + "grad_norm": 11.013644022055777, + "learning_rate": 6.35548396590413e-09, + "loss": 1.6071, + "step": 164493 + }, + { + "epoch": 1.98, + "grad_norm": 8.836289957361368, + "learning_rate": 6.334676952697294e-09, + "loss": 1.2425, + "step": 164496 + }, + { + "epoch": 1.98, + "grad_norm": 10.597996753796668, + "learning_rate": 6.313904044270569e-09, + "loss": 0.7809, + "step": 164499 + }, + { + "epoch": 1.98, + "grad_norm": 11.038351087129884, + "learning_rate": 6.293165240695009e-09, + "loss": 1.3212, + "step": 164502 + }, + { + "epoch": 1.98, + "grad_norm": 4.257177194158107, + "learning_rate": 6.272460542040559e-09, + "loss": 1.3075, + "step": 164505 + }, + { + "epoch": 1.98, + "grad_norm": 3.490569371033757, + "learning_rate": 6.251789948379383e-09, + "loss": 1.137, + "step": 164508 + }, + { + "epoch": 1.98, + "grad_norm": 6.708729020426655, + "learning_rate": 6.2311534597803145e-09, + "loss": 0.9533, + "step": 164511 + }, + { + "epoch": 1.98, + "grad_norm": 7.819072315136085, + "learning_rate": 6.210551076314408e-09, + "loss": 0.9484, + "step": 164514 + }, + { + "epoch": 1.98, + "grad_norm": 14.408196518986687, + "learning_rate": 6.189982798052718e-09, + "loss": 1.0438, + "step": 164517 + }, + { + "epoch": 1.98, + "grad_norm": 11.160466158455023, + "learning_rate": 6.1694486250651885e-09, + "loss": 1.3506, + "step": 164520 + }, + { + "epoch": 1.98, + "grad_norm": 5.0153289251918665, + "learning_rate": 6.148948557421763e-09, + "loss": 0.9939, + "step": 164523 + }, + { + "epoch": 1.98, + "grad_norm": 2.6956044350470374, + "learning_rate": 6.128482595191276e-09, + "loss": 1.0846, + "step": 164526 + }, + { + "epoch": 1.98, + "grad_norm": 12.938423967853081, + "learning_rate": 6.108050738444782e-09, + "loss": 1.3314, + "step": 164529 + }, + { + "epoch": 1.98, + "grad_norm": 4.668732181905049, + "learning_rate": 6.0876529872522235e-09, + "loss": 1.0763, + "step": 164532 + }, + { + "epoch": 1.98, + "grad_norm": 9.958987688433082, + "learning_rate": 6.067289341682436e-09, + "loss": 1.0801, + "step": 164535 + }, + { + "epoch": 1.98, + "grad_norm": 16.204920259002964, + "learning_rate": 6.046959801804253e-09, + "loss": 1.0967, + "step": 164538 + }, + { + "epoch": 1.98, + "grad_norm": 7.938105512688347, + "learning_rate": 6.0266643676887285e-09, + "loss": 1.2333, + "step": 164541 + }, + { + "epoch": 1.98, + "grad_norm": 13.129979252983105, + "learning_rate": 6.006403039404696e-09, + "loss": 1.153, + "step": 164544 + }, + { + "epoch": 1.98, + "grad_norm": 7.599474625146869, + "learning_rate": 5.986175817020989e-09, + "loss": 0.9991, + "step": 164547 + }, + { + "epoch": 1.98, + "grad_norm": 6.331084775514543, + "learning_rate": 5.965982700606443e-09, + "loss": 0.9805, + "step": 164550 + }, + { + "epoch": 1.98, + "grad_norm": 3.585236643221663, + "learning_rate": 5.94582369022989e-09, + "loss": 1.2304, + "step": 164553 + }, + { + "epoch": 1.98, + "grad_norm": 15.096311638732507, + "learning_rate": 5.925698785960165e-09, + "loss": 1.2901, + "step": 164556 + }, + { + "epoch": 1.98, + "grad_norm": 11.861097098422526, + "learning_rate": 5.9056079878672126e-09, + "loss": 0.9185, + "step": 164559 + }, + { + "epoch": 1.98, + "grad_norm": 9.234376749367478, + "learning_rate": 5.885551296017644e-09, + "loss": 1.0541, + "step": 164562 + }, + { + "epoch": 1.98, + "grad_norm": 6.860446146776235, + "learning_rate": 5.8655287104814054e-09, + "loss": 1.2784, + "step": 164565 + }, + { + "epoch": 1.98, + "grad_norm": 9.520210233371113, + "learning_rate": 5.845540231326219e-09, + "loss": 1.1781, + "step": 164568 + }, + { + "epoch": 1.98, + "grad_norm": 5.108994956619466, + "learning_rate": 5.825585858619809e-09, + "loss": 0.8684, + "step": 164571 + }, + { + "epoch": 1.98, + "grad_norm": 6.5432348479910125, + "learning_rate": 5.805665592431009e-09, + "loss": 1.0813, + "step": 164574 + }, + { + "epoch": 1.98, + "grad_norm": 4.924856411382128, + "learning_rate": 5.785779432827543e-09, + "loss": 1.2437, + "step": 164577 + }, + { + "epoch": 1.98, + "grad_norm": 13.258980898329469, + "learning_rate": 5.765927379877134e-09, + "loss": 1.511, + "step": 164580 + }, + { + "epoch": 1.98, + "grad_norm": 10.340720221112377, + "learning_rate": 5.746109433648617e-09, + "loss": 1.5141, + "step": 164583 + }, + { + "epoch": 1.98, + "grad_norm": 6.941666012132171, + "learning_rate": 5.726325594208604e-09, + "loss": 0.916, + "step": 164586 + }, + { + "epoch": 1.98, + "grad_norm": 4.417551142943087, + "learning_rate": 5.706575861623709e-09, + "loss": 1.3143, + "step": 164589 + }, + { + "epoch": 1.98, + "grad_norm": 3.749316475068183, + "learning_rate": 5.686860235962766e-09, + "loss": 1.0302, + "step": 164592 + }, + { + "epoch": 1.98, + "grad_norm": 9.74926671559057, + "learning_rate": 5.667178717292387e-09, + "loss": 1.2635, + "step": 164595 + }, + { + "epoch": 1.98, + "grad_norm": 7.257356529341189, + "learning_rate": 5.647531305680298e-09, + "loss": 1.1972, + "step": 164598 + }, + { + "epoch": 1.98, + "grad_norm": 6.863080966291415, + "learning_rate": 5.627918001194222e-09, + "loss": 0.9867, + "step": 164601 + }, + { + "epoch": 1.98, + "grad_norm": 8.476037536765123, + "learning_rate": 5.6083388038985495e-09, + "loss": 1.2765, + "step": 164604 + }, + { + "epoch": 1.98, + "grad_norm": 12.258909746614595, + "learning_rate": 5.588793713862118e-09, + "loss": 1.1536, + "step": 164607 + }, + { + "epoch": 1.98, + "grad_norm": 5.410351773123276, + "learning_rate": 5.5692827311515376e-09, + "loss": 1.1689, + "step": 164610 + }, + { + "epoch": 1.98, + "grad_norm": 4.686227091442491, + "learning_rate": 5.549805855832313e-09, + "loss": 0.9506, + "step": 164613 + }, + { + "epoch": 1.98, + "grad_norm": 7.360744698326374, + "learning_rate": 5.530363087972168e-09, + "loss": 0.9796, + "step": 164616 + }, + { + "epoch": 1.98, + "grad_norm": 6.968451555003643, + "learning_rate": 5.510954427637716e-09, + "loss": 1.078, + "step": 164619 + }, + { + "epoch": 1.98, + "grad_norm": 6.398020659641112, + "learning_rate": 5.491579874893349e-09, + "loss": 1.4772, + "step": 164622 + }, + { + "epoch": 1.98, + "grad_norm": 10.092804149233803, + "learning_rate": 5.472239429805681e-09, + "loss": 1.4109, + "step": 164625 + }, + { + "epoch": 1.98, + "grad_norm": 9.351907480712127, + "learning_rate": 5.4529330924424365e-09, + "loss": 0.9556, + "step": 164628 + }, + { + "epoch": 1.98, + "grad_norm": 17.78951322846282, + "learning_rate": 5.4336608628668965e-09, + "loss": 0.8672, + "step": 164631 + }, + { + "epoch": 1.98, + "grad_norm": 4.021056248256744, + "learning_rate": 5.414422741147896e-09, + "loss": 1.4152, + "step": 164634 + }, + { + "epoch": 1.98, + "grad_norm": 8.914908385342464, + "learning_rate": 5.395218727348717e-09, + "loss": 1.1805, + "step": 164637 + }, + { + "epoch": 1.98, + "grad_norm": 6.47845841433205, + "learning_rate": 5.376048821535973e-09, + "loss": 1.2851, + "step": 164640 + }, + { + "epoch": 1.98, + "grad_norm": 6.883751385373356, + "learning_rate": 5.3569130237740575e-09, + "loss": 1.4336, + "step": 164643 + }, + { + "epoch": 1.98, + "grad_norm": 4.617313753201361, + "learning_rate": 5.3378113341284735e-09, + "loss": 1.0595, + "step": 164646 + }, + { + "epoch": 1.98, + "grad_norm": 9.930163030640346, + "learning_rate": 5.318743752665833e-09, + "loss": 0.8169, + "step": 164649 + }, + { + "epoch": 1.98, + "grad_norm": 7.0397530549147875, + "learning_rate": 5.299710279450532e-09, + "loss": 0.9277, + "step": 164652 + }, + { + "epoch": 1.98, + "grad_norm": 7.076186191860718, + "learning_rate": 5.28071091454696e-09, + "loss": 1.3141, + "step": 164655 + }, + { + "epoch": 1.98, + "grad_norm": 5.139172647685417, + "learning_rate": 5.261745658019513e-09, + "loss": 1.4506, + "step": 164658 + }, + { + "epoch": 1.98, + "grad_norm": 66.11899268024585, + "learning_rate": 5.242814509933691e-09, + "loss": 1.2146, + "step": 164661 + }, + { + "epoch": 1.98, + "grad_norm": 7.06354792079341, + "learning_rate": 5.223917470355e-09, + "loss": 1.3973, + "step": 164664 + }, + { + "epoch": 1.98, + "grad_norm": 20.253198447578328, + "learning_rate": 5.205054539346721e-09, + "loss": 1.0511, + "step": 164667 + }, + { + "epoch": 1.98, + "grad_norm": 29.14998190074247, + "learning_rate": 5.1862257169732474e-09, + "loss": 1.1188, + "step": 164670 + }, + { + "epoch": 1.98, + "grad_norm": 8.427187187662604, + "learning_rate": 5.167431003298973e-09, + "loss": 1.4693, + "step": 164673 + }, + { + "epoch": 1.98, + "grad_norm": 4.619826160026878, + "learning_rate": 5.148670398388289e-09, + "loss": 1.5036, + "step": 164676 + }, + { + "epoch": 1.98, + "grad_norm": 16.045725707763648, + "learning_rate": 5.1299439023044795e-09, + "loss": 1.3168, + "step": 164679 + }, + { + "epoch": 1.98, + "grad_norm": 9.070723739513456, + "learning_rate": 5.111251515113047e-09, + "loss": 1.2099, + "step": 164682 + }, + { + "epoch": 1.98, + "grad_norm": 5.823757242459219, + "learning_rate": 5.092593236876164e-09, + "loss": 1.015, + "step": 164685 + }, + { + "epoch": 1.98, + "grad_norm": 4.0074301136843635, + "learning_rate": 5.073969067658225e-09, + "loss": 1.001, + "step": 164688 + }, + { + "epoch": 1.98, + "grad_norm": 6.198341163210504, + "learning_rate": 5.055379007522509e-09, + "loss": 1.2253, + "step": 164691 + }, + { + "epoch": 1.98, + "grad_norm": 6.620921388012717, + "learning_rate": 5.036823056533413e-09, + "loss": 0.9093, + "step": 164694 + }, + { + "epoch": 1.98, + "grad_norm": 9.904603419507803, + "learning_rate": 5.018301214751997e-09, + "loss": 1.1897, + "step": 164697 + }, + { + "epoch": 1.98, + "grad_norm": 6.4708002130322075, + "learning_rate": 4.999813482244875e-09, + "loss": 1.4526, + "step": 164700 + }, + { + "epoch": 1.98, + "grad_norm": 11.88237605752013, + "learning_rate": 4.981359859071999e-09, + "loss": 1.2568, + "step": 164703 + }, + { + "epoch": 1.98, + "grad_norm": 9.464641805926911, + "learning_rate": 4.962940345296652e-09, + "loss": 1.2917, + "step": 164706 + }, + { + "epoch": 1.98, + "grad_norm": 4.82632432262057, + "learning_rate": 4.944554940984336e-09, + "loss": 1.2004, + "step": 164709 + }, + { + "epoch": 1.98, + "grad_norm": 2.2650143843457675, + "learning_rate": 4.926203646195005e-09, + "loss": 0.9696, + "step": 164712 + }, + { + "epoch": 1.98, + "grad_norm": 11.954435801921939, + "learning_rate": 4.90788646099305e-09, + "loss": 1.0171, + "step": 164715 + }, + { + "epoch": 1.98, + "grad_norm": 6.4929771507029495, + "learning_rate": 4.889603385439534e-09, + "loss": 1.2832, + "step": 164718 + }, + { + "epoch": 1.98, + "grad_norm": 4.532451664991929, + "learning_rate": 4.87135441959774e-09, + "loss": 0.6961, + "step": 164721 + }, + { + "epoch": 1.98, + "grad_norm": 9.188502440337313, + "learning_rate": 4.8531395635309505e-09, + "loss": 0.8294, + "step": 164724 + }, + { + "epoch": 1.98, + "grad_norm": 7.642613628970154, + "learning_rate": 4.834958817299118e-09, + "loss": 0.8841, + "step": 164727 + }, + { + "epoch": 1.98, + "grad_norm": 10.014620815399162, + "learning_rate": 4.816812180964414e-09, + "loss": 0.8935, + "step": 164730 + }, + { + "epoch": 1.98, + "grad_norm": 7.154337460121756, + "learning_rate": 4.798699654591232e-09, + "loss": 1.1083, + "step": 164733 + }, + { + "epoch": 1.98, + "grad_norm": 5.350037798406564, + "learning_rate": 4.780621238239525e-09, + "loss": 1.1202, + "step": 164736 + }, + { + "epoch": 1.98, + "grad_norm": 8.627702306183288, + "learning_rate": 4.762576931970353e-09, + "loss": 0.8039, + "step": 164739 + }, + { + "epoch": 1.98, + "grad_norm": 8.134196923424406, + "learning_rate": 4.744566735847001e-09, + "loss": 1.0896, + "step": 164742 + }, + { + "epoch": 1.98, + "grad_norm": 15.169341185598414, + "learning_rate": 4.726590649930529e-09, + "loss": 1.2753, + "step": 164745 + }, + { + "epoch": 1.98, + "grad_norm": 3.849565156567659, + "learning_rate": 4.708648674280892e-09, + "loss": 1.0761, + "step": 164748 + }, + { + "epoch": 1.98, + "grad_norm": 8.219146327606401, + "learning_rate": 4.690740808961369e-09, + "loss": 0.918, + "step": 164751 + }, + { + "epoch": 1.98, + "grad_norm": 13.084328235046861, + "learning_rate": 4.672867054031915e-09, + "loss": 0.9235, + "step": 164754 + }, + { + "epoch": 1.98, + "grad_norm": 4.409997594663703, + "learning_rate": 4.655027409553592e-09, + "loss": 1.0137, + "step": 164757 + }, + { + "epoch": 1.98, + "grad_norm": 5.877083776663059, + "learning_rate": 4.637221875586351e-09, + "loss": 1.1588, + "step": 164760 + }, + { + "epoch": 1.98, + "grad_norm": 5.636865004414861, + "learning_rate": 4.619450452193475e-09, + "loss": 1.1977, + "step": 164763 + }, + { + "epoch": 1.98, + "grad_norm": 5.66936609476622, + "learning_rate": 4.601713139432695e-09, + "loss": 1.1093, + "step": 164766 + }, + { + "epoch": 1.98, + "grad_norm": 13.587900739077007, + "learning_rate": 4.584009937367295e-09, + "loss": 1.2397, + "step": 164769 + }, + { + "epoch": 1.98, + "grad_norm": 12.435835485589134, + "learning_rate": 4.5663408460550065e-09, + "loss": 0.8975, + "step": 164772 + }, + { + "epoch": 1.98, + "grad_norm": 7.453588383594757, + "learning_rate": 4.5487058655580005e-09, + "loss": 1.4111, + "step": 164775 + }, + { + "epoch": 1.98, + "grad_norm": 7.184698502005331, + "learning_rate": 4.531104995936231e-09, + "loss": 1.3918, + "step": 164778 + }, + { + "epoch": 1.98, + "grad_norm": 5.087839950489996, + "learning_rate": 4.5135382372496486e-09, + "loss": 0.9814, + "step": 164781 + }, + { + "epoch": 1.98, + "grad_norm": 9.06013251636972, + "learning_rate": 4.496005589557096e-09, + "loss": 0.8806, + "step": 164784 + }, + { + "epoch": 1.98, + "grad_norm": 8.338033903639579, + "learning_rate": 4.478507052919634e-09, + "loss": 1.4292, + "step": 164787 + }, + { + "epoch": 1.98, + "grad_norm": 6.0181202309238735, + "learning_rate": 4.461042627397216e-09, + "loss": 0.8146, + "step": 164790 + }, + { + "epoch": 1.98, + "grad_norm": 12.994970508470843, + "learning_rate": 4.4436123130486844e-09, + "loss": 0.9811, + "step": 164793 + }, + { + "epoch": 1.98, + "grad_norm": 8.078905738725304, + "learning_rate": 4.4262161099328796e-09, + "loss": 0.9076, + "step": 164796 + }, + { + "epoch": 1.98, + "grad_norm": 5.797587383590253, + "learning_rate": 4.4088540181108645e-09, + "loss": 1.0644, + "step": 164799 + }, + { + "epoch": 1.98, + "grad_norm": 6.763255255095117, + "learning_rate": 4.39152603764148e-09, + "loss": 1.1456, + "step": 164802 + }, + { + "epoch": 1.98, + "grad_norm": 5.505360545669578, + "learning_rate": 4.37423216858246e-09, + "loss": 1.2241, + "step": 164805 + }, + { + "epoch": 1.98, + "grad_norm": 10.510371016843209, + "learning_rate": 4.356972410993754e-09, + "loss": 1.2814, + "step": 164808 + }, + { + "epoch": 1.98, + "grad_norm": 7.00050797168967, + "learning_rate": 4.339746764935316e-09, + "loss": 1.2102, + "step": 164811 + }, + { + "epoch": 1.98, + "grad_norm": 4.981148001919492, + "learning_rate": 4.3225552304637655e-09, + "loss": 0.9818, + "step": 164814 + }, + { + "epoch": 1.98, + "grad_norm": 5.103226379572035, + "learning_rate": 4.305397807639056e-09, + "loss": 1.022, + "step": 164817 + }, + { + "epoch": 1.98, + "grad_norm": 3.890326178732281, + "learning_rate": 4.288274496520029e-09, + "loss": 1.4028, + "step": 164820 + }, + { + "epoch": 1.98, + "grad_norm": 9.339952037473298, + "learning_rate": 4.271185297164415e-09, + "loss": 1.4593, + "step": 164823 + }, + { + "epoch": 1.98, + "grad_norm": 15.926379673506695, + "learning_rate": 4.254130209631058e-09, + "loss": 1.19, + "step": 164826 + }, + { + "epoch": 1.98, + "grad_norm": 10.226681087244412, + "learning_rate": 4.237109233977688e-09, + "loss": 1.1771, + "step": 164829 + }, + { + "epoch": 1.98, + "grad_norm": 3.4008471938461695, + "learning_rate": 4.2201223702631465e-09, + "loss": 1.2991, + "step": 164832 + }, + { + "epoch": 1.98, + "grad_norm": 6.1437800451907725, + "learning_rate": 4.203169618544056e-09, + "loss": 1.2145, + "step": 164835 + }, + { + "epoch": 1.98, + "grad_norm": 4.303494298647111, + "learning_rate": 4.186250978879258e-09, + "loss": 0.859, + "step": 164838 + }, + { + "epoch": 1.98, + "grad_norm": 7.7890283027440725, + "learning_rate": 4.169366451326484e-09, + "loss": 0.9657, + "step": 164841 + }, + { + "epoch": 1.98, + "grad_norm": 2.4842527896524196, + "learning_rate": 4.1525160359434655e-09, + "loss": 1.2115, + "step": 164844 + }, + { + "epoch": 1.98, + "grad_norm": 13.053144982853235, + "learning_rate": 4.135699732786824e-09, + "loss": 1.3994, + "step": 164847 + }, + { + "epoch": 1.98, + "grad_norm": 14.732003540767966, + "learning_rate": 4.118917541915401e-09, + "loss": 0.9237, + "step": 164850 + }, + { + "epoch": 1.98, + "grad_norm": 6.669259151883572, + "learning_rate": 4.102169463384708e-09, + "loss": 1.1097, + "step": 164853 + }, + { + "epoch": 1.98, + "grad_norm": 6.829657916560443, + "learning_rate": 4.085455497253588e-09, + "loss": 1.1499, + "step": 164856 + }, + { + "epoch": 1.98, + "grad_norm": 6.040991236589424, + "learning_rate": 4.068775643577549e-09, + "loss": 1.0889, + "step": 164859 + }, + { + "epoch": 1.98, + "grad_norm": 6.845658054486448, + "learning_rate": 4.052129902414326e-09, + "loss": 1.1685, + "step": 164862 + }, + { + "epoch": 1.98, + "grad_norm": 12.647076376825181, + "learning_rate": 4.035518273821648e-09, + "loss": 1.5372, + "step": 164865 + }, + { + "epoch": 1.98, + "grad_norm": 6.2392015214546515, + "learning_rate": 4.018940757853917e-09, + "loss": 1.0726, + "step": 164868 + }, + { + "epoch": 1.98, + "grad_norm": 12.644356152063555, + "learning_rate": 4.002397354569975e-09, + "loss": 1.3329, + "step": 164871 + }, + { + "epoch": 1.98, + "grad_norm": 6.60694215039192, + "learning_rate": 3.985888064025334e-09, + "loss": 1.0671, + "step": 164874 + }, + { + "epoch": 1.98, + "grad_norm": 7.136477808762646, + "learning_rate": 3.969412886275504e-09, + "loss": 1.238, + "step": 164877 + }, + { + "epoch": 1.98, + "grad_norm": 4.702608117709482, + "learning_rate": 3.952971821378215e-09, + "loss": 0.9089, + "step": 164880 + }, + { + "epoch": 1.98, + "grad_norm": 3.886734200910909, + "learning_rate": 3.936564869387871e-09, + "loss": 1.1375, + "step": 164883 + }, + { + "epoch": 1.98, + "grad_norm": 5.757712141124382, + "learning_rate": 3.920192030362202e-09, + "loss": 1.2653, + "step": 164886 + }, + { + "epoch": 1.98, + "grad_norm": 6.417437951634979, + "learning_rate": 3.90385330435672e-09, + "loss": 0.965, + "step": 164889 + }, + { + "epoch": 1.98, + "grad_norm": 4.062898505867808, + "learning_rate": 3.8875486914258245e-09, + "loss": 1.19, + "step": 164892 + }, + { + "epoch": 1.98, + "grad_norm": 4.254920303929538, + "learning_rate": 3.871278191627248e-09, + "loss": 1.1947, + "step": 164895 + }, + { + "epoch": 1.98, + "grad_norm": 3.1762344030163274, + "learning_rate": 3.855041805014281e-09, + "loss": 1.3557, + "step": 164898 + }, + { + "epoch": 1.98, + "grad_norm": 14.702419651887817, + "learning_rate": 3.838839531644656e-09, + "loss": 0.8865, + "step": 164901 + }, + { + "epoch": 1.98, + "grad_norm": 9.854785861800796, + "learning_rate": 3.822671371571663e-09, + "loss": 1.415, + "step": 164904 + }, + { + "epoch": 1.98, + "grad_norm": 12.371744788711315, + "learning_rate": 3.806537324851922e-09, + "loss": 1.319, + "step": 164907 + }, + { + "epoch": 1.98, + "grad_norm": 6.07175546724469, + "learning_rate": 3.790437391538726e-09, + "loss": 1.1884, + "step": 164910 + }, + { + "epoch": 1.98, + "grad_norm": 6.469371994500492, + "learning_rate": 3.774371571688695e-09, + "loss": 0.8517, + "step": 164913 + }, + { + "epoch": 1.98, + "grad_norm": 7.619375281811784, + "learning_rate": 3.7583398653562305e-09, + "loss": 1.1793, + "step": 164916 + }, + { + "epoch": 1.98, + "grad_norm": 10.826447889062296, + "learning_rate": 3.742342272596844e-09, + "loss": 1.1882, + "step": 164919 + }, + { + "epoch": 1.98, + "grad_norm": 11.22977667265462, + "learning_rate": 3.726378793462715e-09, + "loss": 1.2729, + "step": 164922 + }, + { + "epoch": 1.98, + "grad_norm": 18.394180407596448, + "learning_rate": 3.710449428010465e-09, + "loss": 1.1644, + "step": 164925 + }, + { + "epoch": 1.98, + "grad_norm": 4.726131311598323, + "learning_rate": 3.6945541762933857e-09, + "loss": 1.0299, + "step": 164928 + }, + { + "epoch": 1.98, + "grad_norm": 5.270946163876209, + "learning_rate": 3.6786930383669872e-09, + "loss": 1.1613, + "step": 164931 + }, + { + "epoch": 1.98, + "grad_norm": 5.3663461426609, + "learning_rate": 3.6628660142845607e-09, + "loss": 1.151, + "step": 164934 + }, + { + "epoch": 1.98, + "grad_norm": 10.408538385643409, + "learning_rate": 3.6470731040993967e-09, + "loss": 1.018, + "step": 164937 + }, + { + "epoch": 1.98, + "grad_norm": 7.71566219089587, + "learning_rate": 3.6313143078658963e-09, + "loss": 0.8544, + "step": 164940 + }, + { + "epoch": 1.98, + "grad_norm": 9.21353118343953, + "learning_rate": 3.6155896256384604e-09, + "loss": 1.1522, + "step": 164943 + }, + { + "epoch": 1.98, + "grad_norm": 4.8907008223332245, + "learning_rate": 3.5998990574703794e-09, + "loss": 1.0771, + "step": 164946 + }, + { + "epoch": 1.98, + "grad_norm": 10.407445007382139, + "learning_rate": 3.584242603414945e-09, + "loss": 1.4005, + "step": 164949 + }, + { + "epoch": 1.98, + "grad_norm": 21.6332880400154, + "learning_rate": 3.5686202635265567e-09, + "loss": 1.4277, + "step": 164952 + }, + { + "epoch": 1.98, + "grad_norm": 6.087297559459484, + "learning_rate": 3.553032037857396e-09, + "loss": 0.789, + "step": 164955 + }, + { + "epoch": 1.98, + "grad_norm": 9.51210782730179, + "learning_rate": 3.537477926460753e-09, + "loss": 0.9248, + "step": 164958 + }, + { + "epoch": 1.98, + "grad_norm": 5.788028380109575, + "learning_rate": 3.5219579293899185e-09, + "loss": 1.2931, + "step": 164961 + }, + { + "epoch": 1.98, + "grad_norm": 12.683534300750766, + "learning_rate": 3.5064720466981837e-09, + "loss": 1.1359, + "step": 164964 + }, + { + "epoch": 1.98, + "grad_norm": 8.49676643700873, + "learning_rate": 3.491020278437729e-09, + "loss": 1.1152, + "step": 164967 + }, + { + "epoch": 1.98, + "grad_norm": 6.732723385091458, + "learning_rate": 3.4756026246618447e-09, + "loss": 1.3024, + "step": 164970 + }, + { + "epoch": 1.98, + "grad_norm": 21.40280595143473, + "learning_rate": 3.4602190854227114e-09, + "loss": 1.0567, + "step": 164973 + }, + { + "epoch": 1.98, + "grad_norm": 5.560713241222909, + "learning_rate": 3.44486966077362e-09, + "loss": 1.207, + "step": 164976 + }, + { + "epoch": 1.98, + "grad_norm": 3.894782462088572, + "learning_rate": 3.4295543507667507e-09, + "loss": 1.2375, + "step": 164979 + }, + { + "epoch": 1.98, + "grad_norm": 4.9991983250747065, + "learning_rate": 3.414273155453174e-09, + "loss": 1.1716, + "step": 164982 + }, + { + "epoch": 1.98, + "grad_norm": 7.612956680359363, + "learning_rate": 3.3990260748872904e-09, + "loss": 0.7234, + "step": 164985 + }, + { + "epoch": 1.98, + "grad_norm": 9.49847340155936, + "learning_rate": 3.3838131091190606e-09, + "loss": 1.2015, + "step": 164988 + }, + { + "epoch": 1.98, + "grad_norm": 10.611604124638523, + "learning_rate": 3.3686342582006648e-09, + "loss": 1.0953, + "step": 164991 + }, + { + "epoch": 1.98, + "grad_norm": 10.523516863965694, + "learning_rate": 3.3534895221842834e-09, + "loss": 1.3401, + "step": 164994 + }, + { + "epoch": 1.98, + "grad_norm": 14.797834609019397, + "learning_rate": 3.338378901122097e-09, + "loss": 1.0532, + "step": 164997 + }, + { + "epoch": 1.98, + "grad_norm": 5.0955243027422235, + "learning_rate": 3.3233023950651755e-09, + "loss": 1.3291, + "step": 165000 + }, + { + "epoch": 1.98, + "grad_norm": 5.914045203081643, + "learning_rate": 3.3082600040656997e-09, + "loss": 1.0622, + "step": 165003 + }, + { + "epoch": 1.98, + "grad_norm": 27.353234249098385, + "learning_rate": 3.29325172817363e-09, + "loss": 1.4029, + "step": 165006 + }, + { + "epoch": 1.98, + "grad_norm": 8.409636911807432, + "learning_rate": 3.2782775674400358e-09, + "loss": 1.2644, + "step": 165009 + }, + { + "epoch": 1.98, + "grad_norm": 4.59974805030323, + "learning_rate": 3.2633375219182084e-09, + "loss": 1.4957, + "step": 165012 + }, + { + "epoch": 1.98, + "grad_norm": 5.3488823082304355, + "learning_rate": 3.2484315916569974e-09, + "loss": 1.1703, + "step": 165015 + }, + { + "epoch": 1.98, + "grad_norm": 20.235728014764703, + "learning_rate": 3.2335597767085834e-09, + "loss": 0.972, + "step": 165018 + }, + { + "epoch": 1.98, + "grad_norm": 2.76058755152013, + "learning_rate": 3.218722077124037e-09, + "loss": 1.1619, + "step": 165021 + }, + { + "epoch": 1.98, + "grad_norm": 7.028616452864417, + "learning_rate": 3.203918492952207e-09, + "loss": 1.2097, + "step": 165024 + }, + { + "epoch": 1.98, + "grad_norm": 7.690472880354873, + "learning_rate": 3.1891490242441645e-09, + "loss": 0.9121, + "step": 165027 + }, + { + "epoch": 1.98, + "grad_norm": 10.873523134579857, + "learning_rate": 3.1744136710509797e-09, + "loss": 1.3301, + "step": 165030 + }, + { + "epoch": 1.98, + "grad_norm": 21.096510861415286, + "learning_rate": 3.1597124334226127e-09, + "loss": 1.6718, + "step": 165033 + }, + { + "epoch": 1.98, + "grad_norm": 4.349044276582962, + "learning_rate": 3.1450453114101333e-09, + "loss": 0.7726, + "step": 165036 + }, + { + "epoch": 1.98, + "grad_norm": 8.076616553974645, + "learning_rate": 3.1304123050623913e-09, + "loss": 1.1113, + "step": 165039 + }, + { + "epoch": 1.98, + "grad_norm": 2.6401994476720376, + "learning_rate": 3.1158134144293474e-09, + "loss": 1.4161, + "step": 165042 + }, + { + "epoch": 1.98, + "grad_norm": 2.938545234968771, + "learning_rate": 3.101248639560961e-09, + "loss": 0.9921, + "step": 165045 + }, + { + "epoch": 1.98, + "grad_norm": 7.027479163040911, + "learning_rate": 3.0867179805071924e-09, + "loss": 1.0498, + "step": 165048 + }, + { + "epoch": 1.98, + "grad_norm": 3.373246752900935, + "learning_rate": 3.0722214373180015e-09, + "loss": 0.9544, + "step": 165051 + }, + { + "epoch": 1.98, + "grad_norm": 17.280625625446707, + "learning_rate": 3.0577590100422383e-09, + "loss": 1.3247, + "step": 165054 + }, + { + "epoch": 1.98, + "grad_norm": 8.94864257804418, + "learning_rate": 3.043330698729863e-09, + "loss": 1.2237, + "step": 165057 + }, + { + "epoch": 1.98, + "grad_norm": 9.492020193918247, + "learning_rate": 3.0289365034297247e-09, + "loss": 1.2, + "step": 165060 + }, + { + "epoch": 1.98, + "grad_norm": 3.5429030031340836, + "learning_rate": 3.014576424190674e-09, + "loss": 1.1434, + "step": 165063 + }, + { + "epoch": 1.98, + "grad_norm": 4.17253362127507, + "learning_rate": 3.00025046106156e-09, + "loss": 1.4519, + "step": 165066 + }, + { + "epoch": 1.98, + "grad_norm": 11.000822198827313, + "learning_rate": 2.9859586140923434e-09, + "loss": 1.338, + "step": 165069 + }, + { + "epoch": 1.98, + "grad_norm": 6.07270253106812, + "learning_rate": 2.9717008833307637e-09, + "loss": 1.2865, + "step": 165072 + }, + { + "epoch": 1.98, + "grad_norm": 3.1691456111227385, + "learning_rate": 2.9574772688256703e-09, + "loss": 1.1214, + "step": 165075 + }, + { + "epoch": 1.99, + "grad_norm": 1.9931272086099103, + "learning_rate": 2.9432877706259134e-09, + "loss": 1.3044, + "step": 165078 + }, + { + "epoch": 1.99, + "grad_norm": 5.084314743637772, + "learning_rate": 2.9291323887803423e-09, + "loss": 0.8853, + "step": 165081 + }, + { + "epoch": 1.99, + "grad_norm": 10.472049681352313, + "learning_rate": 2.915011123335587e-09, + "loss": 0.9944, + "step": 165084 + }, + { + "epoch": 1.99, + "grad_norm": 6.9825802154649095, + "learning_rate": 2.9009239743416072e-09, + "loss": 1.0231, + "step": 165087 + }, + { + "epoch": 1.99, + "grad_norm": 21.83841206667351, + "learning_rate": 2.8868709418461427e-09, + "loss": 1.2461, + "step": 165090 + }, + { + "epoch": 1.99, + "grad_norm": 9.660402899388707, + "learning_rate": 2.872852025896933e-09, + "loss": 1.4497, + "step": 165093 + }, + { + "epoch": 1.99, + "grad_norm": 2.5647896719943812, + "learning_rate": 2.858867226540607e-09, + "loss": 0.8683, + "step": 165096 + }, + { + "epoch": 1.99, + "grad_norm": 8.267472195635802, + "learning_rate": 2.8449165438271253e-09, + "loss": 1.0848, + "step": 165099 + }, + { + "epoch": 1.99, + "grad_norm": 8.113780743426862, + "learning_rate": 2.8309999778031172e-09, + "loss": 1.0125, + "step": 165102 + }, + { + "epoch": 1.99, + "grad_norm": 2.4311833970790304, + "learning_rate": 2.817117528515212e-09, + "loss": 0.8893, + "step": 165105 + }, + { + "epoch": 1.99, + "grad_norm": 6.731010080870731, + "learning_rate": 2.803269196012259e-09, + "loss": 0.9832, + "step": 165108 + }, + { + "epoch": 1.99, + "grad_norm": 12.81846300504399, + "learning_rate": 2.7894549803397785e-09, + "loss": 1.6434, + "step": 165111 + }, + { + "epoch": 1.99, + "grad_norm": 9.540607265253234, + "learning_rate": 2.7756748815466193e-09, + "loss": 1.2362, + "step": 165114 + }, + { + "epoch": 1.99, + "grad_norm": 15.482230545345942, + "learning_rate": 2.761928899678301e-09, + "loss": 1.1953, + "step": 165117 + }, + { + "epoch": 1.99, + "grad_norm": 4.532850167259408, + "learning_rate": 2.7482170347836732e-09, + "loss": 1.2443, + "step": 165120 + }, + { + "epoch": 1.99, + "grad_norm": 5.118580666768435, + "learning_rate": 2.7345392869082556e-09, + "loss": 0.8183, + "step": 165123 + }, + { + "epoch": 1.99, + "grad_norm": 7.481264777659594, + "learning_rate": 2.7208956560975663e-09, + "loss": 0.997, + "step": 165126 + }, + { + "epoch": 1.99, + "grad_norm": 10.377547789243259, + "learning_rate": 2.707286142400456e-09, + "loss": 1.3062, + "step": 165129 + }, + { + "epoch": 1.99, + "grad_norm": 12.109338323293292, + "learning_rate": 2.6937107458624434e-09, + "loss": 0.7712, + "step": 165132 + }, + { + "epoch": 1.99, + "grad_norm": 10.348309600021901, + "learning_rate": 2.6801694665301583e-09, + "loss": 1.195, + "step": 165135 + }, + { + "epoch": 1.99, + "grad_norm": 11.213519881778792, + "learning_rate": 2.666662304448009e-09, + "loss": 1.1944, + "step": 165138 + }, + { + "epoch": 1.99, + "grad_norm": 4.5543486940659115, + "learning_rate": 2.6531892596648455e-09, + "loss": 1.2916, + "step": 165141 + }, + { + "epoch": 1.99, + "grad_norm": 6.736680204591054, + "learning_rate": 2.639750332225077e-09, + "loss": 0.8749, + "step": 165144 + }, + { + "epoch": 1.99, + "grad_norm": 12.23529830868982, + "learning_rate": 2.6263455221742228e-09, + "loss": 0.9358, + "step": 165147 + }, + { + "epoch": 1.99, + "grad_norm": 8.279134614376995, + "learning_rate": 2.612974829558912e-09, + "loss": 1.2094, + "step": 165150 + }, + { + "epoch": 1.99, + "grad_norm": 26.96230795645412, + "learning_rate": 2.5996382544246633e-09, + "loss": 1.0176, + "step": 165153 + }, + { + "epoch": 1.99, + "grad_norm": 5.550659590011416, + "learning_rate": 2.5863357968169965e-09, + "loss": 1.0256, + "step": 165156 + }, + { + "epoch": 1.99, + "grad_norm": 15.567378859855578, + "learning_rate": 2.5730674567803206e-09, + "loss": 1.1226, + "step": 165159 + }, + { + "epoch": 1.99, + "grad_norm": 4.6711822651812245, + "learning_rate": 2.5598332343612642e-09, + "loss": 1.056, + "step": 165162 + }, + { + "epoch": 1.99, + "grad_norm": 7.278058985866454, + "learning_rate": 2.5466331296042367e-09, + "loss": 1.1659, + "step": 165165 + }, + { + "epoch": 1.99, + "grad_norm": 5.008843293074304, + "learning_rate": 2.5334671425547573e-09, + "loss": 1.5043, + "step": 165168 + }, + { + "epoch": 1.99, + "grad_norm": 10.06242799809673, + "learning_rate": 2.5203352732572352e-09, + "loss": 1.1507, + "step": 165171 + }, + { + "epoch": 1.99, + "grad_norm": 8.2432113491753, + "learning_rate": 2.5072375217571887e-09, + "loss": 0.9869, + "step": 165174 + }, + { + "epoch": 1.99, + "grad_norm": 5.411905577720293, + "learning_rate": 2.4941738880990273e-09, + "loss": 1.3787, + "step": 165177 + }, + { + "epoch": 1.99, + "grad_norm": 4.464201555196556, + "learning_rate": 2.48114437232605e-09, + "loss": 0.6966, + "step": 165180 + }, + { + "epoch": 1.99, + "grad_norm": 4.9086963147323, + "learning_rate": 2.4681489744848854e-09, + "loss": 1.0342, + "step": 165183 + }, + { + "epoch": 1.99, + "grad_norm": 4.083131001197836, + "learning_rate": 2.455187694617722e-09, + "loss": 1.3665, + "step": 165186 + }, + { + "epoch": 1.99, + "grad_norm": 18.783289213963382, + "learning_rate": 2.44226053277119e-09, + "loss": 1.2278, + "step": 165189 + }, + { + "epoch": 1.99, + "grad_norm": 5.561176904022753, + "learning_rate": 2.4293674889874776e-09, + "loss": 1.3487, + "step": 165192 + }, + { + "epoch": 1.99, + "grad_norm": 7.648810183372933, + "learning_rate": 2.4165085633109932e-09, + "loss": 1.1583, + "step": 165195 + }, + { + "epoch": 1.99, + "grad_norm": 10.917029785005633, + "learning_rate": 2.403683755786146e-09, + "loss": 1.2585, + "step": 165198 + }, + { + "epoch": 1.99, + "grad_norm": 12.22869057674484, + "learning_rate": 2.3908930664562346e-09, + "loss": 1.0232, + "step": 165201 + }, + { + "epoch": 1.99, + "grad_norm": 2.343576411329178, + "learning_rate": 2.3781364953656684e-09, + "loss": 1.0657, + "step": 165204 + }, + { + "epoch": 1.99, + "grad_norm": 8.455996335116199, + "learning_rate": 2.365414042556635e-09, + "loss": 1.2582, + "step": 165207 + }, + { + "epoch": 1.99, + "grad_norm": 4.144751266599883, + "learning_rate": 2.3527257080735445e-09, + "loss": 1.0092, + "step": 165210 + }, + { + "epoch": 1.99, + "grad_norm": 7.311176094728808, + "learning_rate": 2.3400714919596947e-09, + "loss": 0.9943, + "step": 165213 + }, + { + "epoch": 1.99, + "grad_norm": 9.98700490680862, + "learning_rate": 2.327451394258384e-09, + "loss": 0.9251, + "step": 165216 + }, + { + "epoch": 1.99, + "grad_norm": 5.758007015301837, + "learning_rate": 2.314865415011802e-09, + "loss": 1.0256, + "step": 165219 + }, + { + "epoch": 1.99, + "grad_norm": 10.392201693633309, + "learning_rate": 2.3023135542632467e-09, + "loss": 1.13, + "step": 165222 + }, + { + "epoch": 1.99, + "grad_norm": 8.461808253810629, + "learning_rate": 2.2897958120560172e-09, + "loss": 1.2597, + "step": 165225 + }, + { + "epoch": 1.99, + "grad_norm": 6.651141258146056, + "learning_rate": 2.277312188432301e-09, + "loss": 1.1686, + "step": 165228 + }, + { + "epoch": 1.99, + "grad_norm": 4.18622853907148, + "learning_rate": 2.264862683435398e-09, + "loss": 0.9868, + "step": 165231 + }, + { + "epoch": 1.99, + "grad_norm": 26.18269949300854, + "learning_rate": 2.252447297106386e-09, + "loss": 1.6625, + "step": 165234 + }, + { + "epoch": 1.99, + "grad_norm": 6.2727907392013975, + "learning_rate": 2.240066029489674e-09, + "loss": 0.7946, + "step": 165237 + }, + { + "epoch": 1.99, + "grad_norm": 7.010517675164404, + "learning_rate": 2.2277188806263393e-09, + "loss": 1.1062, + "step": 165240 + }, + { + "epoch": 1.99, + "grad_norm": 5.625013045803621, + "learning_rate": 2.2154058505585717e-09, + "loss": 1.2349, + "step": 165243 + }, + { + "epoch": 1.99, + "grad_norm": 8.90447652061194, + "learning_rate": 2.203126939327449e-09, + "loss": 0.962, + "step": 165246 + }, + { + "epoch": 1.99, + "grad_norm": 3.649512025285381, + "learning_rate": 2.19088214697738e-09, + "loss": 1.1625, + "step": 165249 + }, + { + "epoch": 1.99, + "grad_norm": 6.635605036718701, + "learning_rate": 2.1786714735472226e-09, + "loss": 1.4166, + "step": 165252 + }, + { + "epoch": 1.99, + "grad_norm": 2.2223763672335854, + "learning_rate": 2.1664949190813854e-09, + "loss": 1.1638, + "step": 165255 + }, + { + "epoch": 1.99, + "grad_norm": 5.826237632932934, + "learning_rate": 2.154352483618727e-09, + "loss": 0.8666, + "step": 165258 + }, + { + "epoch": 1.99, + "grad_norm": 5.369015749761337, + "learning_rate": 2.1422441672036556e-09, + "loss": 1.3612, + "step": 165261 + }, + { + "epoch": 1.99, + "grad_norm": 29.95361056292962, + "learning_rate": 2.1301699698750287e-09, + "loss": 1.4104, + "step": 165264 + }, + { + "epoch": 1.99, + "grad_norm": 9.555547253419263, + "learning_rate": 2.118129891673926e-09, + "loss": 1.0783, + "step": 165267 + }, + { + "epoch": 1.99, + "grad_norm": 5.107005789920368, + "learning_rate": 2.1061239326436445e-09, + "loss": 1.0474, + "step": 165270 + }, + { + "epoch": 1.99, + "grad_norm": 8.510910540750972, + "learning_rate": 2.0941520928241533e-09, + "loss": 1.3819, + "step": 165273 + }, + { + "epoch": 1.99, + "grad_norm": 12.37636338358089, + "learning_rate": 2.08221437225542e-09, + "loss": 0.8994, + "step": 165276 + }, + { + "epoch": 1.99, + "grad_norm": 8.23758160600334, + "learning_rate": 2.0703107709796333e-09, + "loss": 0.9647, + "step": 165279 + }, + { + "epoch": 1.99, + "grad_norm": 11.653670352319393, + "learning_rate": 2.058441289035651e-09, + "loss": 1.1184, + "step": 165282 + }, + { + "epoch": 1.99, + "grad_norm": 19.173893840868747, + "learning_rate": 2.0466059264656612e-09, + "loss": 1.2949, + "step": 165285 + }, + { + "epoch": 1.99, + "grad_norm": 11.933343291301973, + "learning_rate": 2.034804683308522e-09, + "loss": 1.239, + "step": 165288 + }, + { + "epoch": 1.99, + "grad_norm": 7.783481808066995, + "learning_rate": 2.023037559606422e-09, + "loss": 1.0581, + "step": 165291 + }, + { + "epoch": 1.99, + "grad_norm": 8.348721557016557, + "learning_rate": 2.0113045553971087e-09, + "loss": 1.1994, + "step": 165294 + }, + { + "epoch": 1.99, + "grad_norm": 8.981710486457178, + "learning_rate": 1.999605670723881e-09, + "loss": 1.0402, + "step": 165297 + }, + { + "epoch": 1.99, + "grad_norm": 17.511867465947912, + "learning_rate": 1.9879409056233756e-09, + "loss": 1.2552, + "step": 165300 + }, + { + "epoch": 1.99, + "grad_norm": 6.31600273599219, + "learning_rate": 1.9763102601366714e-09, + "loss": 1.2826, + "step": 165303 + }, + { + "epoch": 1.99, + "grad_norm": 13.346218302806065, + "learning_rate": 1.9647137343037357e-09, + "loss": 1.2308, + "step": 165306 + }, + { + "epoch": 1.99, + "grad_norm": 7.98694782315424, + "learning_rate": 1.9531513281645375e-09, + "loss": 1.0569, + "step": 165309 + }, + { + "epoch": 1.99, + "grad_norm": 7.057166326828594, + "learning_rate": 1.9416230417568237e-09, + "loss": 1.1969, + "step": 165312 + }, + { + "epoch": 1.99, + "grad_norm": 9.997592453337433, + "learning_rate": 1.9301288751227833e-09, + "loss": 1.0857, + "step": 165315 + }, + { + "epoch": 1.99, + "grad_norm": 3.2462691287895304, + "learning_rate": 1.9186688282990527e-09, + "loss": 1.2051, + "step": 165318 + }, + { + "epoch": 1.99, + "grad_norm": 7.434216532599183, + "learning_rate": 1.907242901324491e-09, + "loss": 0.8752, + "step": 165321 + }, + { + "epoch": 1.99, + "grad_norm": 6.520630572661874, + "learning_rate": 1.8958510942401753e-09, + "loss": 1.1991, + "step": 165324 + }, + { + "epoch": 1.99, + "grad_norm": 4.364621910351546, + "learning_rate": 1.884493407084964e-09, + "loss": 1.3728, + "step": 165327 + }, + { + "epoch": 1.99, + "grad_norm": 6.6612812739313005, + "learning_rate": 1.8731698398954946e-09, + "loss": 1.0643, + "step": 165330 + }, + { + "epoch": 1.99, + "grad_norm": 3.672275327341172, + "learning_rate": 1.8618803927117347e-09, + "loss": 0.9074, + "step": 165333 + }, + { + "epoch": 1.99, + "grad_norm": 9.532360406412819, + "learning_rate": 1.8506250655725422e-09, + "loss": 1.4492, + "step": 165336 + }, + { + "epoch": 1.99, + "grad_norm": 15.11947417532171, + "learning_rate": 1.8394038585145545e-09, + "loss": 1.0578, + "step": 165339 + }, + { + "epoch": 1.99, + "grad_norm": 11.03230468875278, + "learning_rate": 1.8282167715788502e-09, + "loss": 1.1452, + "step": 165342 + }, + { + "epoch": 1.99, + "grad_norm": 5.996480310421575, + "learning_rate": 1.817063804800956e-09, + "loss": 1.0826, + "step": 165345 + }, + { + "epoch": 1.99, + "grad_norm": 7.691317014685606, + "learning_rate": 1.80594495822084e-09, + "loss": 1.2184, + "step": 165348 + }, + { + "epoch": 1.99, + "grad_norm": 5.271137595721283, + "learning_rate": 1.7948602318762499e-09, + "loss": 1.1303, + "step": 165351 + }, + { + "epoch": 1.99, + "grad_norm": 24.72047318041621, + "learning_rate": 1.7838096258038228e-09, + "loss": 0.9586, + "step": 165354 + }, + { + "epoch": 1.99, + "grad_norm": 4.348511771435698, + "learning_rate": 1.7727931400413067e-09, + "loss": 0.7651, + "step": 165357 + }, + { + "epoch": 1.99, + "grad_norm": 10.032351307172068, + "learning_rate": 1.7618107746275592e-09, + "loss": 1.1648, + "step": 165360 + }, + { + "epoch": 1.99, + "grad_norm": 4.6506261099068436, + "learning_rate": 1.7508625295992177e-09, + "loss": 0.9187, + "step": 165363 + }, + { + "epoch": 1.99, + "grad_norm": 13.723509787072155, + "learning_rate": 1.7399484049940296e-09, + "loss": 1.1611, + "step": 165366 + }, + { + "epoch": 1.99, + "grad_norm": 17.53231910484794, + "learning_rate": 1.7290684008486325e-09, + "loss": 1.2442, + "step": 165369 + }, + { + "epoch": 1.99, + "grad_norm": 6.080853313250188, + "learning_rate": 1.7182225172007738e-09, + "loss": 1.0795, + "step": 165372 + }, + { + "epoch": 1.99, + "grad_norm": 17.112362696601583, + "learning_rate": 1.7074107540882012e-09, + "loss": 1.4294, + "step": 165375 + }, + { + "epoch": 1.99, + "grad_norm": 7.328082506519674, + "learning_rate": 1.6966331115453315e-09, + "loss": 1.0755, + "step": 165378 + }, + { + "epoch": 1.99, + "grad_norm": 4.130900209745215, + "learning_rate": 1.6858895896110228e-09, + "loss": 0.9314, + "step": 165381 + }, + { + "epoch": 1.99, + "grad_norm": 10.814085438803453, + "learning_rate": 1.6751801883219122e-09, + "loss": 1.0125, + "step": 165384 + }, + { + "epoch": 1.99, + "grad_norm": 23.418635136036, + "learning_rate": 1.6645049077135267e-09, + "loss": 1.1745, + "step": 165387 + }, + { + "epoch": 1.99, + "grad_norm": 11.832323885265698, + "learning_rate": 1.653863747822504e-09, + "loss": 1.0598, + "step": 165390 + }, + { + "epoch": 1.99, + "grad_norm": 11.434912588402797, + "learning_rate": 1.6432567086865915e-09, + "loss": 1.1102, + "step": 165393 + }, + { + "epoch": 1.99, + "grad_norm": 4.708596514791066, + "learning_rate": 1.632683790340206e-09, + "loss": 1.5838, + "step": 165396 + }, + { + "epoch": 1.99, + "grad_norm": 9.88407982112306, + "learning_rate": 1.6221449928199851e-09, + "loss": 1.1607, + "step": 165399 + }, + { + "epoch": 1.99, + "grad_norm": 16.38985235704996, + "learning_rate": 1.6116403161614558e-09, + "loss": 1.125, + "step": 165402 + }, + { + "epoch": 1.99, + "grad_norm": 3.9087527006443965, + "learning_rate": 1.6011697604023658e-09, + "loss": 1.2577, + "step": 165405 + }, + { + "epoch": 1.99, + "grad_norm": 6.553183856125641, + "learning_rate": 1.5907333255760215e-09, + "loss": 1.0626, + "step": 165408 + }, + { + "epoch": 1.99, + "grad_norm": 4.853820250440417, + "learning_rate": 1.5803310117190606e-09, + "loss": 1.1763, + "step": 165411 + }, + { + "epoch": 1.99, + "grad_norm": 3.5740337155118325, + "learning_rate": 1.5699628188670103e-09, + "loss": 1.4377, + "step": 165414 + }, + { + "epoch": 1.99, + "grad_norm": 7.877851036018996, + "learning_rate": 1.5596287470553972e-09, + "loss": 1.3082, + "step": 165417 + }, + { + "epoch": 1.99, + "grad_norm": 8.391675832521406, + "learning_rate": 1.549328796319749e-09, + "loss": 1.1825, + "step": 165420 + }, + { + "epoch": 1.99, + "grad_norm": 9.803451830779629, + "learning_rate": 1.5390629666944823e-09, + "loss": 0.8713, + "step": 165423 + }, + { + "epoch": 1.99, + "grad_norm": 15.97512706501751, + "learning_rate": 1.5288312582151244e-09, + "loss": 0.907, + "step": 165426 + }, + { + "epoch": 1.99, + "grad_norm": 3.65202051170248, + "learning_rate": 1.518633670916092e-09, + "loss": 1.2044, + "step": 165429 + }, + { + "epoch": 1.99, + "grad_norm": 10.389242750451878, + "learning_rate": 1.5084702048329125e-09, + "loss": 1.246, + "step": 165432 + }, + { + "epoch": 1.99, + "grad_norm": 5.2992780202262, + "learning_rate": 1.4983408600000026e-09, + "loss": 0.9484, + "step": 165435 + }, + { + "epoch": 1.99, + "grad_norm": 4.305650260148476, + "learning_rate": 1.4882456364517795e-09, + "loss": 0.9008, + "step": 165438 + }, + { + "epoch": 1.99, + "grad_norm": 3.053258374471301, + "learning_rate": 1.4781845342226597e-09, + "loss": 1.1425, + "step": 165441 + }, + { + "epoch": 1.99, + "grad_norm": 6.141431826711294, + "learning_rate": 1.4681575533459502e-09, + "loss": 1.3108, + "step": 165444 + }, + { + "epoch": 1.99, + "grad_norm": 10.510549251692334, + "learning_rate": 1.4581646938582883e-09, + "loss": 1.34, + "step": 165447 + }, + { + "epoch": 1.99, + "grad_norm": 7.262352963975163, + "learning_rate": 1.4482059557918703e-09, + "loss": 1.2304, + "step": 165450 + }, + { + "epoch": 1.99, + "grad_norm": 6.294297381976257, + "learning_rate": 1.4382813391822237e-09, + "loss": 0.9704, + "step": 165453 + }, + { + "epoch": 1.99, + "grad_norm": 9.333731886708549, + "learning_rate": 1.4283908440615446e-09, + "loss": 1.1052, + "step": 165456 + }, + { + "epoch": 1.99, + "grad_norm": 6.046353321379973, + "learning_rate": 1.41853447046425e-09, + "loss": 0.9411, + "step": 165459 + }, + { + "epoch": 1.99, + "grad_norm": 14.115118668445877, + "learning_rate": 1.4087122184236468e-09, + "loss": 1.4327, + "step": 165462 + }, + { + "epoch": 1.99, + "grad_norm": 192.67884070790257, + "learning_rate": 1.3989240879741517e-09, + "loss": 1.492, + "step": 165465 + }, + { + "epoch": 1.99, + "grad_norm": 9.094183669637525, + "learning_rate": 1.3891700791479612e-09, + "loss": 0.9219, + "step": 165468 + }, + { + "epoch": 1.99, + "grad_norm": 5.707842872150145, + "learning_rate": 1.3794501919794923e-09, + "loss": 1.4086, + "step": 165471 + }, + { + "epoch": 1.99, + "grad_norm": 5.387045365153989, + "learning_rate": 1.3697644265020516e-09, + "loss": 1.1708, + "step": 165474 + }, + { + "epoch": 1.99, + "grad_norm": 3.6957866686110528, + "learning_rate": 1.3601127827478356e-09, + "loss": 1.0299, + "step": 165477 + }, + { + "epoch": 1.99, + "grad_norm": 3.0983292075389466, + "learning_rate": 1.3504952607501509e-09, + "loss": 1.3436, + "step": 165480 + }, + { + "epoch": 1.99, + "grad_norm": 3.048258623230545, + "learning_rate": 1.3409118605411942e-09, + "loss": 1.3242, + "step": 165483 + }, + { + "epoch": 1.99, + "grad_norm": 6.784526341482409, + "learning_rate": 1.331362582154272e-09, + "loss": 1.1004, + "step": 165486 + }, + { + "epoch": 1.99, + "grad_norm": 7.444042403690823, + "learning_rate": 1.3218474256215807e-09, + "loss": 0.867, + "step": 165489 + }, + { + "epoch": 1.99, + "grad_norm": 23.44937945352984, + "learning_rate": 1.3123663909764273e-09, + "loss": 1.6763, + "step": 165492 + }, + { + "epoch": 1.99, + "grad_norm": 6.046052605741503, + "learning_rate": 1.302919478251008e-09, + "loss": 1.0078, + "step": 165495 + }, + { + "epoch": 1.99, + "grad_norm": 6.950785105585009, + "learning_rate": 1.293506687476409e-09, + "loss": 1.0877, + "step": 165498 + }, + { + "epoch": 1.99, + "grad_norm": 2.892980476077221, + "learning_rate": 1.2841280186859372e-09, + "loss": 1.4925, + "step": 165501 + }, + { + "epoch": 1.99, + "grad_norm": 5.4676656618694945, + "learning_rate": 1.274783471911789e-09, + "loss": 1.1451, + "step": 165504 + }, + { + "epoch": 1.99, + "grad_norm": 3.1792367283095246, + "learning_rate": 1.2654730471850507e-09, + "loss": 1.3394, + "step": 165507 + }, + { + "epoch": 1.99, + "grad_norm": 3.5434726127556466, + "learning_rate": 1.2561967445368084e-09, + "loss": 0.9115, + "step": 165510 + }, + { + "epoch": 1.99, + "grad_norm": 4.59940419344918, + "learning_rate": 1.2469545640003689e-09, + "loss": 1.0644, + "step": 165513 + }, + { + "epoch": 1.99, + "grad_norm": 10.374402016182739, + "learning_rate": 1.2377465056068183e-09, + "loss": 0.8798, + "step": 165516 + }, + { + "epoch": 1.99, + "grad_norm": 4.3932128869596925, + "learning_rate": 1.2285725693872431e-09, + "loss": 1.0747, + "step": 165519 + }, + { + "epoch": 1.99, + "grad_norm": 5.7034739343097955, + "learning_rate": 1.2194327553727292e-09, + "loss": 1.024, + "step": 165522 + }, + { + "epoch": 1.99, + "grad_norm": 11.106158674363023, + "learning_rate": 1.210327063594363e-09, + "loss": 1.1161, + "step": 165525 + }, + { + "epoch": 1.99, + "grad_norm": 9.054839035199008, + "learning_rate": 1.2012554940843412e-09, + "loss": 1.2987, + "step": 165528 + }, + { + "epoch": 1.99, + "grad_norm": 6.643070390875734, + "learning_rate": 1.1922180468715293e-09, + "loss": 0.99, + "step": 165531 + }, + { + "epoch": 1.99, + "grad_norm": 6.547508796264788, + "learning_rate": 1.1832147219892344e-09, + "loss": 1.0068, + "step": 165534 + }, + { + "epoch": 1.99, + "grad_norm": 9.380257411161283, + "learning_rate": 1.1742455194674318e-09, + "loss": 1.0198, + "step": 165537 + }, + { + "epoch": 1.99, + "grad_norm": 6.160582561480985, + "learning_rate": 1.165310439336098e-09, + "loss": 1.443, + "step": 165540 + }, + { + "epoch": 1.99, + "grad_norm": 3.495367154987696, + "learning_rate": 1.1564094816252092e-09, + "loss": 1.416, + "step": 165543 + }, + { + "epoch": 1.99, + "grad_norm": 11.389486040296237, + "learning_rate": 1.1475426463669615e-09, + "loss": 1.4176, + "step": 165546 + }, + { + "epoch": 1.99, + "grad_norm": 10.04059615063372, + "learning_rate": 1.1387099335902207e-09, + "loss": 0.9927, + "step": 165549 + }, + { + "epoch": 1.99, + "grad_norm": 6.463682898545088, + "learning_rate": 1.1299113433249632e-09, + "loss": 1.0389, + "step": 165552 + }, + { + "epoch": 1.99, + "grad_norm": 18.405757324608885, + "learning_rate": 1.1211468756022748e-09, + "loss": 0.8557, + "step": 165555 + }, + { + "epoch": 1.99, + "grad_norm": 7.054069590964545, + "learning_rate": 1.1124165304510215e-09, + "loss": 1.3783, + "step": 165558 + }, + { + "epoch": 1.99, + "grad_norm": 23.721637051483, + "learning_rate": 1.1037203079022895e-09, + "loss": 1.2149, + "step": 165561 + }, + { + "epoch": 1.99, + "grad_norm": 8.925568241568596, + "learning_rate": 1.0950582079838346e-09, + "loss": 1.092, + "step": 165564 + }, + { + "epoch": 1.99, + "grad_norm": 19.743884200988926, + "learning_rate": 1.086430230727853e-09, + "loss": 1.2472, + "step": 165567 + }, + { + "epoch": 1.99, + "grad_norm": 6.193415463281334, + "learning_rate": 1.07783637616099e-09, + "loss": 0.9453, + "step": 165570 + }, + { + "epoch": 1.99, + "grad_norm": 7.997567789842433, + "learning_rate": 1.069276644314332e-09, + "loss": 1.271, + "step": 165573 + }, + { + "epoch": 1.99, + "grad_norm": 3.8819595527619137, + "learning_rate": 1.060751035216745e-09, + "loss": 1.2875, + "step": 165576 + }, + { + "epoch": 1.99, + "grad_norm": 4.439437213579783, + "learning_rate": 1.0522595488970943e-09, + "loss": 0.9977, + "step": 165579 + }, + { + "epoch": 1.99, + "grad_norm": 5.519741106868459, + "learning_rate": 1.0438021853853563e-09, + "loss": 1.0268, + "step": 165582 + }, + { + "epoch": 1.99, + "grad_norm": 12.494237405043718, + "learning_rate": 1.0353789447081764e-09, + "loss": 1.3193, + "step": 165585 + }, + { + "epoch": 1.99, + "grad_norm": 6.872918474905082, + "learning_rate": 1.0269898268966405e-09, + "loss": 1.5418, + "step": 165588 + }, + { + "epoch": 1.99, + "grad_norm": 6.297076381220885, + "learning_rate": 1.0186348319773941e-09, + "loss": 1.0044, + "step": 165591 + }, + { + "epoch": 1.99, + "grad_norm": 9.747854930960825, + "learning_rate": 1.0103139599804135e-09, + "loss": 1.187, + "step": 165594 + }, + { + "epoch": 1.99, + "grad_norm": 4.858820923489368, + "learning_rate": 1.002027210934564e-09, + "loss": 0.9066, + "step": 165597 + }, + { + "epoch": 1.99, + "grad_norm": 8.962096869237941, + "learning_rate": 9.937745848653813e-10, + "loss": 1.053, + "step": 165600 + }, + { + "epoch": 1.99, + "grad_norm": 14.788761962793945, + "learning_rate": 9.855560818039512e-10, + "loss": 1.0094, + "step": 165603 + }, + { + "epoch": 1.99, + "grad_norm": 4.995774598446293, + "learning_rate": 9.77371701776919e-10, + "loss": 1.1608, + "step": 165606 + }, + { + "epoch": 1.99, + "grad_norm": 5.60803284482286, + "learning_rate": 9.692214448131509e-10, + "loss": 0.9684, + "step": 165609 + }, + { + "epoch": 1.99, + "grad_norm": 18.87118397829137, + "learning_rate": 9.611053109381818e-10, + "loss": 1.0953, + "step": 165612 + }, + { + "epoch": 1.99, + "grad_norm": 12.512464154942428, + "learning_rate": 9.53023300183098e-10, + "loss": 1.4367, + "step": 165615 + }, + { + "epoch": 1.99, + "grad_norm": 45.736136805317244, + "learning_rate": 9.449754125723243e-10, + "loss": 1.245, + "step": 165618 + }, + { + "epoch": 1.99, + "grad_norm": 4.777298218331848, + "learning_rate": 9.369616481358368e-10, + "loss": 1.0059, + "step": 165621 + }, + { + "epoch": 1.99, + "grad_norm": 22.298503078479996, + "learning_rate": 9.289820068991706e-10, + "loss": 0.8275, + "step": 165624 + }, + { + "epoch": 1.99, + "grad_norm": 17.58433868344696, + "learning_rate": 9.210364888900813e-10, + "loss": 0.93, + "step": 165627 + }, + { + "epoch": 1.99, + "grad_norm": 17.4789706637286, + "learning_rate": 9.131250941363246e-10, + "loss": 1.1266, + "step": 165630 + }, + { + "epoch": 1.99, + "grad_norm": 4.741899910295651, + "learning_rate": 9.052478226645456e-10, + "loss": 1.2063, + "step": 165633 + }, + { + "epoch": 1.99, + "grad_norm": 11.878523043172255, + "learning_rate": 8.974046745013898e-10, + "loss": 1.0031, + "step": 165636 + }, + { + "epoch": 1.99, + "grad_norm": 5.195377328528228, + "learning_rate": 8.895956496735026e-10, + "loss": 1.1915, + "step": 165639 + }, + { + "epoch": 1.99, + "grad_norm": 3.006511824207334, + "learning_rate": 8.818207482075292e-10, + "loss": 1.1344, + "step": 165642 + }, + { + "epoch": 1.99, + "grad_norm": 4.80433619285379, + "learning_rate": 8.74079970130115e-10, + "loss": 0.9462, + "step": 165645 + }, + { + "epoch": 1.99, + "grad_norm": 3.643082234848975, + "learning_rate": 8.663733154679055e-10, + "loss": 1.3019, + "step": 165648 + }, + { + "epoch": 1.99, + "grad_norm": 16.617833994713074, + "learning_rate": 8.587007842475459e-10, + "loss": 0.849, + "step": 165651 + }, + { + "epoch": 1.99, + "grad_norm": 8.542127352118445, + "learning_rate": 8.510623764945713e-10, + "loss": 1.1139, + "step": 165654 + }, + { + "epoch": 1.99, + "grad_norm": 4.111451490893884, + "learning_rate": 8.434580922356272e-10, + "loss": 1.4069, + "step": 165657 + }, + { + "epoch": 1.99, + "grad_norm": 4.889847137385552, + "learning_rate": 8.358879314962487e-10, + "loss": 1.137, + "step": 165660 + }, + { + "epoch": 1.99, + "grad_norm": 10.542774001530356, + "learning_rate": 8.283518943019708e-10, + "loss": 0.9729, + "step": 165663 + }, + { + "epoch": 1.99, + "grad_norm": 2.4800571376313187, + "learning_rate": 8.208499806783288e-10, + "loss": 1.0982, + "step": 165666 + }, + { + "epoch": 1.99, + "grad_norm": 4.271030578982744, + "learning_rate": 8.133821906519679e-10, + "loss": 0.8208, + "step": 165669 + }, + { + "epoch": 1.99, + "grad_norm": 11.496321612415965, + "learning_rate": 8.05948524247313e-10, + "loss": 0.935, + "step": 165672 + }, + { + "epoch": 1.99, + "grad_norm": 15.460376709626374, + "learning_rate": 7.985489814910097e-10, + "loss": 0.8755, + "step": 165675 + }, + { + "epoch": 1.99, + "grad_norm": 6.741367425910089, + "learning_rate": 7.911835624074826e-10, + "loss": 1.3209, + "step": 165678 + }, + { + "epoch": 1.99, + "grad_norm": 4.086143709466382, + "learning_rate": 7.838522670222671e-10, + "loss": 1.2834, + "step": 165681 + }, + { + "epoch": 1.99, + "grad_norm": 23.399526493134715, + "learning_rate": 7.765550953586775e-10, + "loss": 1.2924, + "step": 165684 + }, + { + "epoch": 1.99, + "grad_norm": 10.620742996214894, + "learning_rate": 7.692920474444698e-10, + "loss": 0.7822, + "step": 165687 + }, + { + "epoch": 1.99, + "grad_norm": 9.390694110369562, + "learning_rate": 7.620631233018483e-10, + "loss": 0.8557, + "step": 165690 + }, + { + "epoch": 1.99, + "grad_norm": 10.248784058025027, + "learning_rate": 7.548683229574583e-10, + "loss": 1.1004, + "step": 165693 + }, + { + "epoch": 1.99, + "grad_norm": 3.7345741755606574, + "learning_rate": 7.477076464346144e-10, + "loss": 1.1043, + "step": 165696 + }, + { + "epoch": 1.99, + "grad_norm": 17.789796198011658, + "learning_rate": 7.40581093758852e-10, + "loss": 1.6868, + "step": 165699 + }, + { + "epoch": 1.99, + "grad_norm": 11.938573540700412, + "learning_rate": 7.334886649523754e-10, + "loss": 1.1376, + "step": 165702 + }, + { + "epoch": 1.99, + "grad_norm": 16.210273349022778, + "learning_rate": 7.264303600418299e-10, + "loss": 0.9991, + "step": 165705 + }, + { + "epoch": 1.99, + "grad_norm": 6.397244438472284, + "learning_rate": 7.1940617904942e-10, + "loss": 1.0714, + "step": 165708 + }, + { + "epoch": 1.99, + "grad_norm": 3.5251204852922218, + "learning_rate": 7.124161220006809e-10, + "loss": 1.4879, + "step": 165711 + }, + { + "epoch": 1.99, + "grad_norm": 7.416108691106437, + "learning_rate": 7.054601889189272e-10, + "loss": 1.1544, + "step": 165714 + }, + { + "epoch": 1.99, + "grad_norm": 7.029391310055548, + "learning_rate": 6.985383798274737e-10, + "loss": 1.0294, + "step": 165717 + }, + { + "epoch": 1.99, + "grad_norm": 17.01878891818136, + "learning_rate": 6.916506947496348e-10, + "loss": 1.0726, + "step": 165720 + }, + { + "epoch": 1.99, + "grad_norm": 12.166573191505497, + "learning_rate": 6.847971337098358e-10, + "loss": 1.0551, + "step": 165723 + }, + { + "epoch": 1.99, + "grad_norm": 7.153762412571172, + "learning_rate": 6.779776967313911e-10, + "loss": 1.2613, + "step": 165726 + }, + { + "epoch": 1.99, + "grad_norm": 8.260079360470781, + "learning_rate": 6.711923838365053e-10, + "loss": 1.3328, + "step": 165729 + }, + { + "epoch": 1.99, + "grad_norm": 21.032201946497267, + "learning_rate": 6.644411950496032e-10, + "loss": 1.2818, + "step": 165732 + }, + { + "epoch": 1.99, + "grad_norm": 5.277574505234286, + "learning_rate": 6.577241303939996e-10, + "loss": 1.6259, + "step": 165735 + }, + { + "epoch": 1.99, + "grad_norm": 3.112569631693287, + "learning_rate": 6.510411898907887e-10, + "loss": 1.2133, + "step": 165738 + }, + { + "epoch": 1.99, + "grad_norm": 3.0835330716935725, + "learning_rate": 6.443923735632851e-10, + "loss": 1.1089, + "step": 165741 + }, + { + "epoch": 1.99, + "grad_norm": 6.532514913015756, + "learning_rate": 6.377776814359138e-10, + "loss": 1.2518, + "step": 165744 + }, + { + "epoch": 1.99, + "grad_norm": 5.951909561657369, + "learning_rate": 6.31197113529769e-10, + "loss": 0.9067, + "step": 165747 + }, + { + "epoch": 1.99, + "grad_norm": 9.952332944192841, + "learning_rate": 6.246506698670551e-10, + "loss": 1.3003, + "step": 165750 + }, + { + "epoch": 1.99, + "grad_norm": 9.747588077626594, + "learning_rate": 6.181383504710869e-10, + "loss": 1.2166, + "step": 165753 + }, + { + "epoch": 1.99, + "grad_norm": 12.717351498520877, + "learning_rate": 6.116601553640689e-10, + "loss": 0.9673, + "step": 165756 + }, + { + "epoch": 1.99, + "grad_norm": 9.834722451843303, + "learning_rate": 6.052160845670952e-10, + "loss": 1.1109, + "step": 165759 + }, + { + "epoch": 1.99, + "grad_norm": 17.23714534828793, + "learning_rate": 5.988061381023702e-10, + "loss": 0.9547, + "step": 165762 + }, + { + "epoch": 1.99, + "grad_norm": 9.28613224483376, + "learning_rate": 5.924303159932087e-10, + "loss": 1.2247, + "step": 165765 + }, + { + "epoch": 1.99, + "grad_norm": 6.593972262169804, + "learning_rate": 5.860886182595949e-10, + "loss": 1.0286, + "step": 165768 + }, + { + "epoch": 1.99, + "grad_norm": 12.49429806837342, + "learning_rate": 5.797810449237329e-10, + "loss": 0.893, + "step": 165771 + }, + { + "epoch": 1.99, + "grad_norm": 3.9303638924718087, + "learning_rate": 5.735075960078273e-10, + "loss": 1.1055, + "step": 165774 + }, + { + "epoch": 1.99, + "grad_norm": 6.536891125586988, + "learning_rate": 5.672682715329725e-10, + "loss": 1.2725, + "step": 165777 + }, + { + "epoch": 1.99, + "grad_norm": 5.640646744012455, + "learning_rate": 5.610630715202625e-10, + "loss": 1.0567, + "step": 165780 + }, + { + "epoch": 1.99, + "grad_norm": 6.718347220995694, + "learning_rate": 5.548919959896815e-10, + "loss": 1.3619, + "step": 165783 + }, + { + "epoch": 1.99, + "grad_norm": 5.233100154912584, + "learning_rate": 5.48755044964544e-10, + "loss": 0.9065, + "step": 165786 + }, + { + "epoch": 1.99, + "grad_norm": 6.246234886028746, + "learning_rate": 5.426522184648342e-10, + "loss": 1.424, + "step": 165789 + }, + { + "epoch": 1.99, + "grad_norm": 18.10256045628706, + "learning_rate": 5.36583516510536e-10, + "loss": 1.3827, + "step": 165792 + }, + { + "epoch": 1.99, + "grad_norm": 6.543191354719195, + "learning_rate": 5.305489391227437e-10, + "loss": 0.9192, + "step": 165795 + }, + { + "epoch": 1.99, + "grad_norm": 4.1991876851370025, + "learning_rate": 5.245484863236616e-10, + "loss": 1.1823, + "step": 165798 + }, + { + "epoch": 1.99, + "grad_norm": 5.312613936598389, + "learning_rate": 5.185821581321637e-10, + "loss": 1.2242, + "step": 165801 + }, + { + "epoch": 1.99, + "grad_norm": 6.99487246024987, + "learning_rate": 5.126499545682339e-10, + "loss": 0.8194, + "step": 165804 + }, + { + "epoch": 1.99, + "grad_norm": 13.187812671345416, + "learning_rate": 5.067518756529666e-10, + "loss": 0.8762, + "step": 165807 + }, + { + "epoch": 1.99, + "grad_norm": 5.114116219704783, + "learning_rate": 5.008879214074558e-10, + "loss": 1.2378, + "step": 165810 + }, + { + "epoch": 1.99, + "grad_norm": 4.81226487176565, + "learning_rate": 4.950580918494652e-10, + "loss": 1.1895, + "step": 165813 + }, + { + "epoch": 1.99, + "grad_norm": 6.752886720582987, + "learning_rate": 4.89262387000089e-10, + "loss": 1.3479, + "step": 165816 + }, + { + "epoch": 1.99, + "grad_norm": 20.506613264096526, + "learning_rate": 4.835008068793112e-10, + "loss": 1.1348, + "step": 165819 + }, + { + "epoch": 1.99, + "grad_norm": 10.84846559194176, + "learning_rate": 4.777733515071159e-10, + "loss": 1.1157, + "step": 165822 + }, + { + "epoch": 1.99, + "grad_norm": 2.467008542940263, + "learning_rate": 4.720800209012666e-10, + "loss": 1.2595, + "step": 165825 + }, + { + "epoch": 1.99, + "grad_norm": 36.14529730910697, + "learning_rate": 4.664208150839678e-10, + "loss": 1.0402, + "step": 165828 + }, + { + "epoch": 1.99, + "grad_norm": 14.122468644878623, + "learning_rate": 4.607957340718727e-10, + "loss": 1.5563, + "step": 165831 + }, + { + "epoch": 1.99, + "grad_norm": 6.627846795864377, + "learning_rate": 4.5520477788496554e-10, + "loss": 0.9652, + "step": 165834 + }, + { + "epoch": 1.99, + "grad_norm": 6.3325257027815365, + "learning_rate": 4.4964794654323017e-10, + "loss": 0.9795, + "step": 165837 + }, + { + "epoch": 1.99, + "grad_norm": 6.308226836372097, + "learning_rate": 4.441252400644303e-10, + "loss": 1.0857, + "step": 165840 + }, + { + "epoch": 1.99, + "grad_norm": 7.271457929717253, + "learning_rate": 4.3863665846854976e-10, + "loss": 1.1487, + "step": 165843 + }, + { + "epoch": 1.99, + "grad_norm": 3.903651206295865, + "learning_rate": 4.3318220177446245e-10, + "loss": 0.8282, + "step": 165846 + }, + { + "epoch": 1.99, + "grad_norm": 6.693328954797315, + "learning_rate": 4.277618699988217e-10, + "loss": 1.1772, + "step": 165849 + }, + { + "epoch": 1.99, + "grad_norm": 9.454921507682164, + "learning_rate": 4.223756631627218e-10, + "loss": 1.1278, + "step": 165852 + }, + { + "epoch": 1.99, + "grad_norm": 13.546752023579659, + "learning_rate": 4.1702358128170586e-10, + "loss": 1.1004, + "step": 165855 + }, + { + "epoch": 1.99, + "grad_norm": 13.651438362283335, + "learning_rate": 4.11705624376868e-10, + "loss": 1.1, + "step": 165858 + }, + { + "epoch": 1.99, + "grad_norm": 8.074051676177247, + "learning_rate": 4.0642179246375145e-10, + "loss": 1.2407, + "step": 165861 + }, + { + "epoch": 1.99, + "grad_norm": 6.712925297604557, + "learning_rate": 4.0117208556234024e-10, + "loss": 0.8957, + "step": 165864 + }, + { + "epoch": 1.99, + "grad_norm": 8.9377800611876, + "learning_rate": 3.959565036903978e-10, + "loss": 1.413, + "step": 165867 + }, + { + "epoch": 1.99, + "grad_norm": 9.892372386006652, + "learning_rate": 3.9077504686457767e-10, + "loss": 1.1574, + "step": 165870 + }, + { + "epoch": 1.99, + "grad_norm": 8.530318293523605, + "learning_rate": 3.856277151037535e-10, + "loss": 1.6357, + "step": 165873 + }, + { + "epoch": 1.99, + "grad_norm": 27.38577884910568, + "learning_rate": 3.8051450842457873e-10, + "loss": 1.3864, + "step": 165876 + }, + { + "epoch": 1.99, + "grad_norm": 3.1841486547654814, + "learning_rate": 3.754354268448168e-10, + "loss": 1.5483, + "step": 165879 + }, + { + "epoch": 1.99, + "grad_norm": 5.662981717678727, + "learning_rate": 3.7039047038112117e-10, + "loss": 1.3596, + "step": 165882 + }, + { + "epoch": 1.99, + "grad_norm": 17.2495895874438, + "learning_rate": 3.6537963905236564e-10, + "loss": 1.3343, + "step": 165885 + }, + { + "epoch": 1.99, + "grad_norm": 8.128801562872157, + "learning_rate": 3.604029328740932e-10, + "loss": 1.3453, + "step": 165888 + }, + { + "epoch": 1.99, + "grad_norm": 8.781293088615593, + "learning_rate": 3.554603518651778e-10, + "loss": 1.1031, + "step": 165891 + }, + { + "epoch": 1.99, + "grad_norm": 2.626699850257633, + "learning_rate": 3.5055189604005224e-10, + "loss": 1.293, + "step": 165894 + }, + { + "epoch": 1.99, + "grad_norm": 10.91264709581983, + "learning_rate": 3.456775654164801e-10, + "loss": 1.1423, + "step": 165897 + }, + { + "epoch": 1.99, + "grad_norm": 5.894896233606268, + "learning_rate": 3.40837360012225e-10, + "loss": 1.1473, + "step": 165900 + }, + { + "epoch": 1.99, + "grad_norm": 4.6675000270472085, + "learning_rate": 3.3603127984283004e-10, + "loss": 1.0985, + "step": 165903 + }, + { + "epoch": 1.99, + "grad_norm": 6.048114862819818, + "learning_rate": 3.3125932492383827e-10, + "loss": 0.9416, + "step": 165906 + }, + { + "epoch": 2.0, + "grad_norm": 5.463982103113514, + "learning_rate": 3.2652149527301336e-10, + "loss": 0.9649, + "step": 165909 + }, + { + "epoch": 2.0, + "grad_norm": 20.06455695250381, + "learning_rate": 3.2181779090478816e-10, + "loss": 1.0454, + "step": 165912 + }, + { + "epoch": 2.0, + "grad_norm": 6.054727187911471, + "learning_rate": 3.1714821183803645e-10, + "loss": 1.4194, + "step": 165915 + }, + { + "epoch": 2.0, + "grad_norm": 6.369168063266475, + "learning_rate": 3.1251275808608096e-10, + "loss": 1.5478, + "step": 165918 + }, + { + "epoch": 2.0, + "grad_norm": 5.037475491136842, + "learning_rate": 3.0791142966557495e-10, + "loss": 1.2571, + "step": 165921 + }, + { + "epoch": 2.0, + "grad_norm": 5.432266078942652, + "learning_rate": 3.0334422659206166e-10, + "loss": 1.2529, + "step": 165924 + }, + { + "epoch": 2.0, + "grad_norm": 8.261198162262131, + "learning_rate": 2.988111488821943e-10, + "loss": 1.2601, + "step": 165927 + }, + { + "epoch": 2.0, + "grad_norm": 4.126335510441949, + "learning_rate": 2.9431219655040587e-10, + "loss": 1.0111, + "step": 165930 + }, + { + "epoch": 2.0, + "grad_norm": 8.73657651983556, + "learning_rate": 2.8984736961112927e-10, + "loss": 1.2902, + "step": 165933 + }, + { + "epoch": 2.0, + "grad_norm": 14.525109179254443, + "learning_rate": 2.8541666808212797e-10, + "loss": 1.0674, + "step": 165936 + }, + { + "epoch": 2.0, + "grad_norm": 3.5005241276835273, + "learning_rate": 2.8102009197672474e-10, + "loss": 1.0705, + "step": 165939 + }, + { + "epoch": 2.0, + "grad_norm": 6.733682413242982, + "learning_rate": 2.766576413104627e-10, + "loss": 1.6579, + "step": 165942 + }, + { + "epoch": 2.0, + "grad_norm": 4.550274781709276, + "learning_rate": 2.723293160977747e-10, + "loss": 1.0895, + "step": 165945 + }, + { + "epoch": 2.0, + "grad_norm": 5.500814040057987, + "learning_rate": 2.6803511635309366e-10, + "loss": 1.0246, + "step": 165948 + }, + { + "epoch": 2.0, + "grad_norm": 20.04827444813065, + "learning_rate": 2.6377504209307293e-10, + "loss": 0.8309, + "step": 165951 + }, + { + "epoch": 2.0, + "grad_norm": 5.206039133288929, + "learning_rate": 2.5954909332992493e-10, + "loss": 0.8894, + "step": 165954 + }, + { + "epoch": 2.0, + "grad_norm": 6.793520643013661, + "learning_rate": 2.5535727007919285e-10, + "loss": 1.3185, + "step": 165957 + }, + { + "epoch": 2.0, + "grad_norm": 5.506014246666233, + "learning_rate": 2.5119957235530955e-10, + "loss": 0.8961, + "step": 165960 + }, + { + "epoch": 2.0, + "grad_norm": 9.593003686224357, + "learning_rate": 2.470760001715977e-10, + "loss": 1.0038, + "step": 165963 + }, + { + "epoch": 2.0, + "grad_norm": 7.117545807791402, + "learning_rate": 2.429865535436005e-10, + "loss": 1.2691, + "step": 165966 + }, + { + "epoch": 2.0, + "grad_norm": 8.31740278406151, + "learning_rate": 2.3893123248353025e-10, + "loss": 1.2497, + "step": 165969 + }, + { + "epoch": 2.0, + "grad_norm": 9.081712206742198, + "learning_rate": 2.349100370069302e-10, + "loss": 1.2726, + "step": 165972 + }, + { + "epoch": 2.0, + "grad_norm": 9.257329443344354, + "learning_rate": 2.309229671260127e-10, + "loss": 1.0191, + "step": 165975 + }, + { + "epoch": 2.0, + "grad_norm": 14.896584725998641, + "learning_rate": 2.2697002285521076e-10, + "loss": 1.1067, + "step": 165978 + }, + { + "epoch": 2.0, + "grad_norm": 10.60313651946, + "learning_rate": 2.2305120420784698e-10, + "loss": 1.1719, + "step": 165981 + }, + { + "epoch": 2.0, + "grad_norm": 4.32140607290296, + "learning_rate": 2.191665111983543e-10, + "loss": 1.2158, + "step": 165984 + }, + { + "epoch": 2.0, + "grad_norm": 2.436933453914618, + "learning_rate": 2.1531594383783493e-10, + "loss": 1.0829, + "step": 165987 + }, + { + "epoch": 2.0, + "grad_norm": 2.9432059253129528, + "learning_rate": 2.1149950214072178e-10, + "loss": 1.0542, + "step": 165990 + }, + { + "epoch": 2.0, + "grad_norm": 8.742154686087595, + "learning_rate": 2.077171861192273e-10, + "loss": 1.332, + "step": 165993 + }, + { + "epoch": 2.0, + "grad_norm": 5.478452704202831, + "learning_rate": 2.039689957877844e-10, + "loss": 1.1232, + "step": 165996 + }, + { + "epoch": 2.0, + "grad_norm": 9.473509252784146, + "learning_rate": 2.0025493115860551e-10, + "loss": 1.3319, + "step": 165999 + }, + { + "epoch": 2.0, + "grad_norm": 20.427921206613444, + "learning_rate": 1.9657499224279285e-10, + "loss": 1.2193, + "step": 166002 + }, + { + "epoch": 2.0, + "grad_norm": 10.867211365036434, + "learning_rate": 1.9292917905477936e-10, + "loss": 1.2751, + "step": 166005 + }, + { + "epoch": 2.0, + "grad_norm": 6.301951244671933, + "learning_rate": 1.893174916067775e-10, + "loss": 0.796, + "step": 166008 + }, + { + "epoch": 2.0, + "grad_norm": 16.554105768887023, + "learning_rate": 1.8573992990988942e-10, + "loss": 1.2845, + "step": 166011 + }, + { + "epoch": 2.0, + "grad_norm": 11.904208814974137, + "learning_rate": 1.821964939785481e-10, + "loss": 0.9771, + "step": 166014 + }, + { + "epoch": 2.0, + "grad_norm": 2.6102266054792382, + "learning_rate": 1.786871838216353e-10, + "loss": 1.3865, + "step": 166017 + }, + { + "epoch": 2.0, + "grad_norm": 18.86195056406079, + "learning_rate": 1.7521199945469413e-10, + "loss": 1.2503, + "step": 166020 + }, + { + "epoch": 2.0, + "grad_norm": 11.150851964271435, + "learning_rate": 1.717709408866064e-10, + "loss": 1.1812, + "step": 166023 + }, + { + "epoch": 2.0, + "grad_norm": 12.774092220254376, + "learning_rate": 1.6836400813069476e-10, + "loss": 1.0454, + "step": 166026 + }, + { + "epoch": 2.0, + "grad_norm": 3.7111216474346107, + "learning_rate": 1.6499120119806145e-10, + "loss": 1.22, + "step": 166029 + }, + { + "epoch": 2.0, + "grad_norm": 3.9643982744571558, + "learning_rate": 1.6165252010091892e-10, + "loss": 1.303, + "step": 166032 + }, + { + "epoch": 2.0, + "grad_norm": 7.113197185811801, + "learning_rate": 1.583479648492592e-10, + "loss": 1.4195, + "step": 166035 + }, + { + "epoch": 2.0, + "grad_norm": 17.660359442883983, + "learning_rate": 1.5507753545640492e-10, + "loss": 1.2042, + "step": 166038 + }, + { + "epoch": 2.0, + "grad_norm": 6.736040642770229, + "learning_rate": 1.518412319312379e-10, + "loss": 1.3584, + "step": 166041 + }, + { + "epoch": 2.0, + "grad_norm": 8.10689065934339, + "learning_rate": 1.486390542870808e-10, + "loss": 1.1136, + "step": 166044 + }, + { + "epoch": 2.0, + "grad_norm": 5.425737831498984, + "learning_rate": 1.4547100253281543e-10, + "loss": 1.1077, + "step": 166047 + }, + { + "epoch": 2.0, + "grad_norm": 12.89310717428737, + "learning_rate": 1.4233707668065422e-10, + "loss": 1.358, + "step": 166050 + }, + { + "epoch": 2.0, + "grad_norm": 14.281375135309858, + "learning_rate": 1.3923727673947895e-10, + "loss": 1.2252, + "step": 166053 + }, + { + "epoch": 2.0, + "grad_norm": 7.331358051994751, + "learning_rate": 1.361716027226123e-10, + "loss": 1.1943, + "step": 166056 + }, + { + "epoch": 2.0, + "grad_norm": 29.3970804896375, + "learning_rate": 1.3314005463893608e-10, + "loss": 1.4533, + "step": 166059 + }, + { + "epoch": 2.0, + "grad_norm": 23.64723950853665, + "learning_rate": 1.3014263249844228e-10, + "loss": 0.7529, + "step": 166062 + }, + { + "epoch": 2.0, + "grad_norm": 8.835211478118882, + "learning_rate": 1.271793363111229e-10, + "loss": 1.1732, + "step": 166065 + }, + { + "epoch": 2.0, + "grad_norm": 4.97468518533733, + "learning_rate": 1.2425016608919038e-10, + "loss": 1.1975, + "step": 166068 + }, + { + "epoch": 2.0, + "grad_norm": 6.169875025996475, + "learning_rate": 1.213551218404163e-10, + "loss": 1.1197, + "step": 166071 + }, + { + "epoch": 2.0, + "grad_norm": 13.351496644853787, + "learning_rate": 1.1849420357590292e-10, + "loss": 1.2421, + "step": 166074 + }, + { + "epoch": 2.0, + "grad_norm": 3.9871063600418837, + "learning_rate": 1.1566741130453197e-10, + "loss": 0.799, + "step": 166077 + }, + { + "epoch": 2.0, + "grad_norm": 8.635903960892115, + "learning_rate": 1.128747450374057e-10, + "loss": 0.9487, + "step": 166080 + }, + { + "epoch": 2.0, + "grad_norm": 3.9932294276077918, + "learning_rate": 1.1011620478229568e-10, + "loss": 0.8253, + "step": 166083 + }, + { + "epoch": 2.0, + "grad_norm": 3.912939608014992, + "learning_rate": 1.073917905491939e-10, + "loss": 0.8979, + "step": 166086 + }, + { + "epoch": 2.0, + "grad_norm": 6.194246935805657, + "learning_rate": 1.047015023480924e-10, + "loss": 0.9488, + "step": 166089 + }, + { + "epoch": 2.0, + "grad_norm": 17.196811876246, + "learning_rate": 1.0204534018787293e-10, + "loss": 1.0713, + "step": 166092 + }, + { + "epoch": 2.0, + "grad_norm": 11.995237542928546, + "learning_rate": 9.942330407741729e-11, + "loss": 1.0468, + "step": 166095 + }, + { + "epoch": 2.0, + "grad_norm": 12.027945676281263, + "learning_rate": 9.683539402560727e-11, + "loss": 1.288, + "step": 166098 + }, + { + "epoch": 2.0, + "grad_norm": 9.728255917457515, + "learning_rate": 9.428161004132463e-11, + "loss": 1.6345, + "step": 166101 + }, + { + "epoch": 2.0, + "grad_norm": 14.418167679668972, + "learning_rate": 9.176195213234095e-11, + "loss": 1.2524, + "step": 166104 + }, + { + "epoch": 2.0, + "grad_norm": 4.972088541088474, + "learning_rate": 8.927642030975848e-11, + "loss": 1.073, + "step": 166107 + }, + { + "epoch": 2.0, + "grad_norm": 3.3306493197000195, + "learning_rate": 8.682501457912829e-11, + "loss": 1.1155, + "step": 166110 + }, + { + "epoch": 2.0, + "grad_norm": 5.929431522196795, + "learning_rate": 8.440773495155264e-11, + "loss": 1.3635, + "step": 166113 + }, + { + "epoch": 2.0, + "grad_norm": 8.76244465068547, + "learning_rate": 8.202458143258262e-11, + "loss": 1.2449, + "step": 166116 + }, + { + "epoch": 2.0, + "grad_norm": 5.441159292510426, + "learning_rate": 7.96755540333205e-11, + "loss": 0.883, + "step": 166119 + }, + { + "epoch": 2.0, + "grad_norm": 8.088282745149327, + "learning_rate": 7.736065275931736e-11, + "loss": 1.3305, + "step": 166122 + }, + { + "epoch": 2.0, + "grad_norm": 5.9544139025598195, + "learning_rate": 7.507987761834479e-11, + "loss": 1.2142, + "step": 166125 + }, + { + "epoch": 2.0, + "grad_norm": 4.37843334564529, + "learning_rate": 7.283322862039476e-11, + "loss": 1.157, + "step": 166128 + }, + { + "epoch": 2.0, + "grad_norm": 12.372266786723834, + "learning_rate": 7.062070577212865e-11, + "loss": 0.9911, + "step": 166131 + }, + { + "epoch": 2.0, + "grad_norm": 9.72256171895406, + "learning_rate": 6.844230908020777e-11, + "loss": 1.2492, + "step": 166134 + }, + { + "epoch": 2.0, + "grad_norm": 6.4011018243516915, + "learning_rate": 6.629803855240368e-11, + "loss": 1.0849, + "step": 166137 + }, + { + "epoch": 2.0, + "grad_norm": 9.987033478008488, + "learning_rate": 6.418789419759818e-11, + "loss": 0.8429, + "step": 166140 + }, + { + "epoch": 2.0, + "grad_norm": 3.5477935549575683, + "learning_rate": 6.211187602023217e-11, + "loss": 0.9926, + "step": 166143 + }, + { + "epoch": 2.0, + "grad_norm": 12.6361106676346, + "learning_rate": 6.006998403029762e-11, + "loss": 1.4065, + "step": 166146 + }, + { + "epoch": 2.0, + "grad_norm": 7.9469103200067925, + "learning_rate": 5.806221823223546e-11, + "loss": 1.3648, + "step": 166149 + }, + { + "epoch": 2.0, + "grad_norm": 13.164809511173395, + "learning_rate": 5.6088578634927447e-11, + "loss": 1.0738, + "step": 166152 + }, + { + "epoch": 2.0, + "grad_norm": 7.715566718830254, + "learning_rate": 5.414906524392472e-11, + "loss": 1.0694, + "step": 166155 + }, + { + "epoch": 2.0, + "grad_norm": 10.571374541510103, + "learning_rate": 5.224367806588859e-11, + "loss": 0.9137, + "step": 166158 + }, + { + "epoch": 2.0, + "grad_norm": 8.452077418215795, + "learning_rate": 5.037241710748042e-11, + "loss": 1.1721, + "step": 166161 + }, + { + "epoch": 2.0, + "grad_norm": 4.85407660529662, + "learning_rate": 4.853528237647176e-11, + "loss": 1.134, + "step": 166164 + }, + { + "epoch": 2.0, + "grad_norm": 26.545855304457675, + "learning_rate": 4.673227387619328e-11, + "loss": 0.899, + "step": 166167 + }, + { + "epoch": 2.0, + "grad_norm": 2.7924603884141095, + "learning_rate": 4.496339161552676e-11, + "loss": 1.3373, + "step": 166170 + }, + { + "epoch": 2.0, + "grad_norm": 9.25864406439717, + "learning_rate": 4.322863559891311e-11, + "loss": 1.121, + "step": 166173 + }, + { + "epoch": 2.0, + "grad_norm": 5.012949893244944, + "learning_rate": 4.152800583301364e-11, + "loss": 1.0327, + "step": 166176 + }, + { + "epoch": 2.0, + "grad_norm": 4.923343095706256, + "learning_rate": 3.986150232337949e-11, + "loss": 0.9414, + "step": 166179 + }, + { + "epoch": 2.0, + "grad_norm": 10.966923885989079, + "learning_rate": 3.822912507556176e-11, + "loss": 1.017, + "step": 166182 + }, + { + "epoch": 2.0, + "grad_norm": 8.753934603440959, + "learning_rate": 3.663087409511157e-11, + "loss": 1.0681, + "step": 166185 + }, + { + "epoch": 2.0, + "grad_norm": 7.662899364874918, + "learning_rate": 3.5066749388690256e-11, + "loss": 1.2416, + "step": 166188 + }, + { + "epoch": 2.0, + "grad_norm": 8.912388989699831, + "learning_rate": 3.3536750959628494e-11, + "loss": 0.8797, + "step": 166191 + }, + { + "epoch": 2.0, + "grad_norm": 6.647679722046128, + "learning_rate": 3.204087881458762e-11, + "loss": 1.4643, + "step": 166194 + }, + { + "epoch": 2.0, + "grad_norm": 4.534276356950776, + "learning_rate": 3.0579132958008515e-11, + "loss": 1.3411, + "step": 166197 + }, + { + "epoch": 2.0, + "grad_norm": 9.108398919654515, + "learning_rate": 2.915151339544231e-11, + "loss": 0.8673, + "step": 166200 + }, + { + "epoch": 2.0, + "grad_norm": 16.289388500064888, + "learning_rate": 2.7758020130219665e-11, + "loss": 1.2686, + "step": 166203 + }, + { + "epoch": 2.0, + "grad_norm": 15.14970398588172, + "learning_rate": 2.639865316900192e-11, + "loss": 1.4154, + "step": 166206 + }, + { + "epoch": 2.0, + "grad_norm": 12.856738586927381, + "learning_rate": 2.5073412516229968e-11, + "loss": 1.2664, + "step": 166209 + }, + { + "epoch": 2.0, + "grad_norm": 5.568468276884333, + "learning_rate": 2.3782298175234474e-11, + "loss": 1.0955, + "step": 166212 + }, + { + "epoch": 2.0, + "grad_norm": 6.447040811513387, + "learning_rate": 2.2525310151566558e-11, + "loss": 1.3024, + "step": 166215 + }, + { + "epoch": 2.0, + "grad_norm": 6.284269669066962, + "learning_rate": 2.130244844966711e-11, + "loss": 1.0236, + "step": 166218 + }, + { + "epoch": 2.0, + "grad_norm": 12.269080518339551, + "learning_rate": 2.011371307175658e-11, + "loss": 0.8061, + "step": 166221 + }, + { + "epoch": 2.0, + "grad_norm": 6.272288381035373, + "learning_rate": 1.8959104023386077e-11, + "loss": 1.0474, + "step": 166224 + }, + { + "epoch": 2.0, + "grad_norm": 5.401050930633306, + "learning_rate": 1.7838621308996496e-11, + "loss": 0.9266, + "step": 166227 + }, + { + "epoch": 2.0, + "grad_norm": 8.75974061192764, + "learning_rate": 1.6752264931918505e-11, + "loss": 1.4124, + "step": 166230 + }, + { + "epoch": 2.0, + "grad_norm": 7.624592575411569, + "learning_rate": 1.570003489437255e-11, + "loss": 1.1411, + "step": 166233 + }, + { + "epoch": 2.0, + "grad_norm": 6.365836960885278, + "learning_rate": 1.4681931203019973e-11, + "loss": 0.8518, + "step": 166236 + }, + { + "epoch": 2.0, + "grad_norm": 4.387396037672182, + "learning_rate": 1.3697953857860768e-11, + "loss": 1.017, + "step": 166239 + }, + { + "epoch": 2.0, + "grad_norm": 9.512644490893699, + "learning_rate": 1.2748102865556278e-11, + "loss": 0.8383, + "step": 166242 + }, + { + "epoch": 2.0, + "grad_norm": 18.602611067551326, + "learning_rate": 1.1832378227216723e-11, + "loss": 1.0446, + "step": 166245 + }, + { + "epoch": 2.0, + "grad_norm": 9.71769883785937, + "learning_rate": 1.0950779946172774e-11, + "loss": 0.8796, + "step": 166248 + }, + { + "epoch": 2.0, + "grad_norm": 7.0961564449503625, + "learning_rate": 1.0103308025755098e-11, + "loss": 1.3124, + "step": 166251 + }, + { + "epoch": 2.0, + "grad_norm": 7.403447207546265, + "learning_rate": 9.289962469294366e-12, + "loss": 1.2507, + "step": 166254 + }, + { + "epoch": 2.0, + "grad_norm": 13.093875875585919, + "learning_rate": 8.510743277900801e-12, + "loss": 0.9251, + "step": 166257 + }, + { + "epoch": 2.0, + "grad_norm": 4.066888115376651, + "learning_rate": 7.765650457125517e-12, + "loss": 1.0766, + "step": 166260 + }, + { + "epoch": 2.0, + "grad_norm": 10.786800238215145, + "learning_rate": 7.0546840069685155e-12, + "loss": 1.1248, + "step": 166263 + }, + { + "epoch": 2.0, + "grad_norm": 9.76784818968593, + "learning_rate": 6.377843930760463e-12, + "loss": 1.1134, + "step": 166266 + }, + { + "epoch": 2.0, + "grad_norm": 11.523883545335934, + "learning_rate": 5.735130230721808e-12, + "loss": 0.9865, + "step": 166269 + }, + { + "epoch": 2.0, + "grad_norm": 7.325734004524464, + "learning_rate": 5.126542910183218e-12, + "loss": 1.2753, + "step": 166272 + }, + { + "epoch": 2.0, + "grad_norm": 6.534037079816, + "learning_rate": 4.552081969144695e-12, + "loss": 0.836, + "step": 166275 + }, + { + "epoch": 2.0, + "grad_norm": 7.628420264275446, + "learning_rate": 4.011747410936906e-12, + "loss": 1.5174, + "step": 166278 + }, + { + "epoch": 2.0, + "grad_norm": 8.416101806057155, + "learning_rate": 3.5055392366700745e-12, + "loss": 0.8702, + "step": 166281 + }, + { + "epoch": 2.0, + "grad_norm": 6.704389599734034, + "learning_rate": 3.0334574485646474e-12, + "loss": 1.0965, + "step": 166284 + }, + { + "epoch": 2.0, + "grad_norm": 7.471235738765127, + "learning_rate": 2.59550204884107e-12, + "loss": 1.161, + "step": 166287 + }, + { + "epoch": 2.0, + "grad_norm": 11.678655733456369, + "learning_rate": 2.1916730374993423e-12, + "loss": 1.2324, + "step": 166290 + }, + { + "epoch": 2.0, + "grad_norm": 10.109839160153122, + "learning_rate": 1.8219704167599107e-12, + "loss": 1.1988, + "step": 166293 + }, + { + "epoch": 2.0, + "grad_norm": 7.790205571441623, + "learning_rate": 1.486394188843221e-12, + "loss": 1.1834, + "step": 166296 + }, + { + "epoch": 2.0, + "grad_norm": 8.475849373379152, + "learning_rate": 1.1849443526390504e-12, + "loss": 1.2497, + "step": 166299 + }, + { + "epoch": 2.0, + "grad_norm": 6.659280742466733, + "learning_rate": 9.176209103678446e-13, + "loss": 0.9888, + "step": 166302 + }, + { + "epoch": 2.0, + "grad_norm": 3.3869877232952725, + "learning_rate": 6.844238642500501e-13, + "loss": 0.8597, + "step": 166305 + }, + { + "epoch": 2.0, + "grad_norm": 6.851505201125735, + "learning_rate": 4.853532120652204e-13, + "loss": 1.0355, + "step": 166308 + }, + { + "epoch": 2.0, + "grad_norm": 2.8426153155306433, + "learning_rate": 3.2040895714402497e-13, + "loss": 1.0515, + "step": 166311 + }, + { + "epoch": 2.0, + "grad_norm": 16.681487696748572, + "learning_rate": 1.895910994864636e-13, + "loss": 1.042, + "step": 166314 + }, + { + "epoch": 2.0, + "grad_norm": 4.978930689609736, + "learning_rate": 9.28996390925363e-14, + "loss": 0.8432, + "step": 166317 + }, + { + "epoch": 2.0, + "grad_norm": 2.663281527801612, + "learning_rate": 3.0334575962243096e-14, + "loss": 1.4709, + "step": 166320 + }, + { + "epoch": 2.0, + "grad_norm": 10.111679702636001, + "learning_rate": 1.8959112058070105e-15, + "loss": 0.9805, + "step": 166323 + }, + { + "epoch": 2.0, + "step": 166324, + "total_flos": 2339362819236352.0, + "train_loss": 0.2048133761826705, + "train_runtime": 203478.8121, + "train_samples_per_second": 6.539, + "train_steps_per_second": 0.817 + } + ], + "logging_steps": 3, + "max_steps": 166324, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 700, + "total_flos": 2339362819236352.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}